diff --git a/pom.xml b/pom.xml
index af3cce9..f50ffb7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -31,7 +31,7 @@
- 1.1.1
+ 1.0.0
diff --git a/src/main/java/org/wltea/analyzer/core/CJKSegmenter.java b/src/main/java/org/wltea/analyzer/core/CJKSegmenter.java
index d72eaa7..a31a5d4 100644
--- a/src/main/java/org/wltea/analyzer/core/CJKSegmenter.java
+++ b/src/main/java/org/wltea/analyzer/core/CJKSegmenter.java
@@ -58,7 +58,7 @@ class CJKSegmenter implements ISegmenter {
//处理词段队列
Hit[] tmpArray = this.tmpHits.toArray(new Hit[this.tmpHits.size()]);
for(Hit hit : tmpArray){
- hit = Dictionary.getSingleton().matchWithHit(String.valueOf(context.getSegmentBuff()).toLowerCase().toCharArray(), context.getCursor() , hit);
+ hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
if(hit.isMatch()){
//输出当前的词
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_CNWORD);
@@ -77,7 +77,7 @@ class CJKSegmenter implements ISegmenter {
//*********************************
//再对当前指针位置的字符进行单字匹配
- Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(String.valueOf(context.getSegmentBuff()).toLowerCase().toCharArray(), context.getCursor(), 1);
+ Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
if(singleCharHit.isMatch()){//首字成词
//输出当前的词
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_CNWORD);
diff --git a/src/main/java/org/wltea/analyzer/dic/Dictionary.java b/src/main/java/org/wltea/analyzer/dic/Dictionary.java
index 2e0f33f..6a76f2f 100644
--- a/src/main/java/org/wltea/analyzer/dic/Dictionary.java
+++ b/src/main/java/org/wltea/analyzer/dic/Dictionary.java
@@ -119,7 +119,7 @@ public class Dictionary {
for(String word : words){
if (word != null) {
//批量加载词条到主内存词典中
- singleton._MainDict.fillSegment(word.trim().toLowerCase().toCharArray());
+ singleton._MainDict.fillSegment(word.trim().toCharArray());
}
}
}
@@ -133,7 +133,7 @@ public class Dictionary {
for(String word : words){
if (word != null) {
//批量屏蔽词条
- singleton._MainDict.disableSegment(word.trim().toLowerCase().toCharArray());
+ singleton._MainDict.disableSegment(word.trim().toCharArray());
}
}
}
@@ -152,7 +152,7 @@ public class Dictionary {
* @return Hit 匹配结果描述
*/
public Hit matchInMainDict(char[] charArray , int begin, int length){
- return singleton._MainDict.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length);
+ return singleton._MainDict.match(charArray, begin, length);
}
/**
@@ -160,7 +160,7 @@ public class Dictionary {
* @return Hit 匹配结果描述
*/
public Hit matchInQuantifierDict(char[] charArray , int begin, int length){
- return singleton._QuantifierDict.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length);
+ return singleton._QuantifierDict.match(charArray, begin, length);
}
@@ -179,7 +179,7 @@ public class Dictionary {
* @return boolean
*/
public boolean isStopWord(char[] charArray , int begin, int length){
- return singleton._StopWords.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length).isMatch();
+ return singleton._StopWords.match(charArray, begin, length).isMatch();
}
/**
@@ -205,7 +205,7 @@ public class Dictionary {
do {
theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) {
- _MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
+ _MainDict.fillSegment(theWord.trim().toCharArray());
}
} while (theWord != null);
@@ -255,7 +255,7 @@ public class Dictionary {
theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) {
//加载扩展词典数据到主内存词典中
- _MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
+ _MainDict.fillSegment(theWord.trim().toCharArray());
}
} while (theWord != null);
@@ -298,7 +298,7 @@ public class Dictionary {
do {
theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) {
- _StopWords.fillSegment(theWord.trim().toLowerCase().toCharArray());
+ _StopWords.fillSegment(theWord.trim().toCharArray());
}
} while (theWord != null);
@@ -342,7 +342,7 @@ public class Dictionary {
theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) {
//加载扩展停止词典数据到内存中
- _StopWords.fillSegment(theWord.trim().toLowerCase().toCharArray());
+ _StopWords.fillSegment(theWord.trim().toCharArray());
}
} while (theWord != null);
@@ -383,7 +383,7 @@ public class Dictionary {
do {
theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) {
- _QuantifierDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
+ _QuantifierDict.fillSegment(theWord.trim().toCharArray());
}
} while (theWord != null);
@@ -440,7 +440,6 @@ public class Dictionary {
}
-
private void loadSuffixDict(){
_SuffixDict = new DictSegment((char)0);
diff --git a/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java b/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java
index befe4ee..bffd984 100644
--- a/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java
+++ b/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java
@@ -80,7 +80,7 @@ public final class IKTokenizer extends Tokenizer {
if(nextLexeme != null){
//将Lexeme转成Attributes
//设置词元文本
- termAtt.append(nextLexeme.getLexemeText().toLowerCase());
+ termAtt.append(nextLexeme.getLexemeText());
//设置词元长度
termAtt.setLength(nextLexeme.getLength());
//设置词元位移