From c0bdef74fb7a4e99f39f7ca543085f370ec8adfa Mon Sep 17 00:00:00 2001
From: qinwf <mail@qinwenfeng.com>
Date: Fri, 6 Feb 2015 10:19:43 +0800
Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E8=8B=B1=E6=96=87+=E6=95=B0?=
 =?UTF-8?q?=E5=AD=97=E5=88=86=E8=AF=8D=E8=A7=84=E5=88=99=20qinwf/jiebaR#7?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/HMMSegment.hpp                  | 12 ++++++++++--
 test/unittest/TKeywordExtractor.cpp |  2 +-
 test/unittest/TSegments.cpp         |  4 ++--
 3 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/src/HMMSegment.hpp b/src/HMMSegment.hpp
index 838eeef..d7c8c89 100644
--- a/src/HMMSegment.hpp
+++ b/src/HMMSegment.hpp
@@ -120,11 +120,19 @@ namespace CppJieba
             // sequential letters rule
             Unicode::const_iterator _sequentialLetterRule(Unicode::const_iterator begin, Unicode::const_iterator end) const
             {
-                Unicode::value_type x;
+                Unicode::value_type x = *begin;
+                if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z'))
+                {
+                    begin ++;
+                }
+                else
+                {
+                    return begin;
+                }
                 while(begin != end)
                 {
                     x = *begin;
-                    if(('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z'))
+                    if(('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z') || ('0' <= x && x <= '9'))
                     {
                         begin ++;
                     }
diff --git a/test/unittest/TKeywordExtractor.cpp b/test/unittest/TKeywordExtractor.cpp
index 2392260..9ea203c 100644
--- a/test/unittest/TKeywordExtractor.cpp
+++ b/test/unittest/TKeywordExtractor.cpp
@@ -26,7 +26,7 @@ TEST(KeywordExtractorTest, Test1)
         size_t topN = 5;
         extractor.extract(s, wordweights, topN);
         res << wordweights;
-        ASSERT_EQ(res, "[\"iPhone:11.7392\", \"一部:6.47592\"]");
+        ASSERT_EQ(res, "[\"iPhone6:11.7392\", \"一部:6.47592\"]");
     }
 }
 
diff --git a/test/unittest/TSegments.cpp b/test/unittest/TSegments.cpp
index dcdc5c1..54e5f64 100644
--- a/test/unittest/TSegments.cpp
+++ b/test/unittest/TSegments.cpp
@@ -12,8 +12,8 @@ using namespace CppJieba;
 TEST(MixSegmentTest, Test1)
 {
     MixSegment segment("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8");;
-    const char* str = "我来自北京邮电大学。。。学号123456";
-    const char* res[] = {"我", "来自", "北京邮电大学", "。","。","。", "学号", "123456"};
+    const char* str = "我来自北京邮电大学。。。学号123456，用AK47";
+    const char* res[] = {"我", "来自", "北京邮电大学", "。","。","。", "学号", "123456","，","用","AK47"};
     const char* str2 = "B超 T恤";
     const char* res2[] = {"B超"," ", "T恤"};
     vector<string> words;