diff --git a/pom.xml b/pom.xml index af3cce9..f50ffb7 100644 --- a/pom.xml +++ b/pom.xml @@ -31,7 +31,7 @@ - 1.1.1 + 1.0.0 diff --git a/src/main/java/org/wltea/analyzer/core/CJKSegmenter.java b/src/main/java/org/wltea/analyzer/core/CJKSegmenter.java index d72eaa7..a31a5d4 100644 --- a/src/main/java/org/wltea/analyzer/core/CJKSegmenter.java +++ b/src/main/java/org/wltea/analyzer/core/CJKSegmenter.java @@ -58,7 +58,7 @@ class CJKSegmenter implements ISegmenter { //处理词段队列 Hit[] tmpArray = this.tmpHits.toArray(new Hit[this.tmpHits.size()]); for(Hit hit : tmpArray){ - hit = Dictionary.getSingleton().matchWithHit(String.valueOf(context.getSegmentBuff()).toLowerCase().toCharArray(), context.getCursor() , hit); + hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit); if(hit.isMatch()){ //输出当前的词 Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_CNWORD); @@ -77,7 +77,7 @@ class CJKSegmenter implements ISegmenter { //********************************* //再对当前指针位置的字符进行单字匹配 - Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(String.valueOf(context.getSegmentBuff()).toLowerCase().toCharArray(), context.getCursor(), 1); + Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1); if(singleCharHit.isMatch()){//首字成词 //输出当前的词 Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_CNWORD); diff --git a/src/main/java/org/wltea/analyzer/dic/Dictionary.java b/src/main/java/org/wltea/analyzer/dic/Dictionary.java index 2e0f33f..6a76f2f 100644 --- a/src/main/java/org/wltea/analyzer/dic/Dictionary.java +++ b/src/main/java/org/wltea/analyzer/dic/Dictionary.java @@ -119,7 +119,7 @@ public class Dictionary { for(String word : words){ if (word != null) { //批量加载词条到主内存词典中 - singleton._MainDict.fillSegment(word.trim().toLowerCase().toCharArray()); + singleton._MainDict.fillSegment(word.trim().toCharArray()); } } } @@ -133,7 +133,7 @@ public class Dictionary { for(String word : words){ if (word != null) { //批量屏蔽词条 - singleton._MainDict.disableSegment(word.trim().toLowerCase().toCharArray()); + singleton._MainDict.disableSegment(word.trim().toCharArray()); } } } @@ -152,7 +152,7 @@ public class Dictionary { * @return Hit 匹配结果描述 */ public Hit matchInMainDict(char[] charArray , int begin, int length){ - return singleton._MainDict.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length); + return singleton._MainDict.match(charArray, begin, length); } /** @@ -160,7 +160,7 @@ public class Dictionary { * @return Hit 匹配结果描述 */ public Hit matchInQuantifierDict(char[] charArray , int begin, int length){ - return singleton._QuantifierDict.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length); + return singleton._QuantifierDict.match(charArray, begin, length); } @@ -179,7 +179,7 @@ public class Dictionary { * @return boolean */ public boolean isStopWord(char[] charArray , int begin, int length){ - return singleton._StopWords.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length).isMatch(); + return singleton._StopWords.match(charArray, begin, length).isMatch(); } /** @@ -205,7 +205,7 @@ public class Dictionary { do { theWord = br.readLine(); if (theWord != null && !"".equals(theWord.trim())) { - _MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray()); + _MainDict.fillSegment(theWord.trim().toCharArray()); } } while (theWord != null); @@ -255,7 +255,7 @@ public class Dictionary { theWord = br.readLine(); if (theWord != null && !"".equals(theWord.trim())) { //加载扩展词典数据到主内存词典中 - _MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray()); + _MainDict.fillSegment(theWord.trim().toCharArray()); } } while (theWord != null); @@ -298,7 +298,7 @@ public class Dictionary { do { theWord = br.readLine(); if (theWord != null && !"".equals(theWord.trim())) { - _StopWords.fillSegment(theWord.trim().toLowerCase().toCharArray()); + _StopWords.fillSegment(theWord.trim().toCharArray()); } } while (theWord != null); @@ -342,7 +342,7 @@ public class Dictionary { theWord = br.readLine(); if (theWord != null && !"".equals(theWord.trim())) { //加载扩展停止词典数据到内存中 - _StopWords.fillSegment(theWord.trim().toLowerCase().toCharArray()); + _StopWords.fillSegment(theWord.trim().toCharArray()); } } while (theWord != null); @@ -383,7 +383,7 @@ public class Dictionary { do { theWord = br.readLine(); if (theWord != null && !"".equals(theWord.trim())) { - _QuantifierDict.fillSegment(theWord.trim().toLowerCase().toCharArray()); + _QuantifierDict.fillSegment(theWord.trim().toCharArray()); } } while (theWord != null); @@ -440,7 +440,6 @@ public class Dictionary { } - private void loadSuffixDict(){ _SuffixDict = new DictSegment((char)0); diff --git a/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java b/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java index befe4ee..bffd984 100644 --- a/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java +++ b/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java @@ -80,7 +80,7 @@ public final class IKTokenizer extends Tokenizer { if(nextLexeme != null){ //将Lexeme转成Attributes //设置词元文本 - termAtt.append(nextLexeme.getLexemeText().toLowerCase()); + termAtt.append(nextLexeme.getLexemeText()); //设置词元长度 termAtt.setLength(nextLexeme.getLength()); //设置词元位移