#33 fix performance issue

2014-07-04 18:58:22 +08:00 · 2014-07-04 18:58:22 +08:00 · bafb724c89
commit bafb724c89
parent 54fd970591
4 changed files with 14 additions and 15 deletions
--- a/pom.xml
+++ b/pom.xml
@ -31,7 +31,7 @@
    </parent>
    <properties>
-        <elasticsearch.version>1.1.1</elasticsearch.version>
+        <elasticsearch.version>1.0.0</elasticsearch.version>
    </properties>
  <repositories>
--- a/src/main/java/org/wltea/analyzer/core/CJKSegmenter.java
+++ b/src/main/java/org/wltea/analyzer/core/CJKSegmenter.java
@ -58,7 +58,7 @@ class CJKSegmenter implements ISegmenter {
 				//处理词段队列
 				Hit[] tmpArray = this.tmpHits.toArray(new Hit[this.tmpHits.size()]);
 				for(Hit hit : tmpArray){
-					hit = Dictionary.getSingleton().matchWithHit(String.valueOf(context.getSegmentBuff()).toLowerCase().toCharArray(), context.getCursor() , hit);
+					hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
 					if(hit.isMatch()){
 						//输出当前的词
 						Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_CNWORD);
@ -77,7 +77,7 @@ class CJKSegmenter implements ISegmenter {
 			//*********************************
 			//再对当前指针位置的字符进行单字匹配
-			Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(String.valueOf(context.getSegmentBuff()).toLowerCase().toCharArray(), context.getCursor(), 1);
+			Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
 			if(singleCharHit.isMatch()){//首字成词
 				//输出当前的词
 				Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_CNWORD);
--- a/src/main/java/org/wltea/analyzer/dic/Dictionary.java
+++ b/src/main/java/org/wltea/analyzer/dic/Dictionary.java
@ -119,7 +119,7 @@ public class Dictionary {
 			for(String word : words){
 				if (word != null) {
 					//批量加载词条到主内存词典中
-					singleton._MainDict.fillSegment(word.trim().toLowerCase().toCharArray());
+					singleton._MainDict.fillSegment(word.trim().toCharArray());
 				}
 			}
 		}
@ -133,7 +133,7 @@ public class Dictionary {
 			for(String word : words){
 				if (word != null) {
 					//批量屏蔽词条
-					singleton._MainDict.disableSegment(word.trim().toLowerCase().toCharArray());
+					singleton._MainDict.disableSegment(word.trim().toCharArray());
 				}
 			}
 		}
@ -152,7 +152,7 @@ public class Dictionary {
 	 * @return Hit 匹配结果描述
 	 */
 	public Hit matchInMainDict(char[] charArray , int begin, int length){
-        return singleton._MainDict.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length);
+        return singleton._MainDict.match(charArray, begin, length);
 	}
 	/**
@ -160,7 +160,7 @@ public class Dictionary {
 	 * @return Hit 匹配结果描述
 	 */
 	public Hit matchInQuantifierDict(char[] charArray , int begin, int length){
-		return singleton._QuantifierDict.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length);
+		return singleton._QuantifierDict.match(charArray, begin, length);
 	}
@ -179,7 +179,7 @@ public class Dictionary {
 	 * @return boolean
 	 */
 	public boolean isStopWord(char[] charArray , int begin, int length){			
-		return singleton._StopWords.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length).isMatch();
+		return singleton._StopWords.match(charArray, begin, length).isMatch();
 	}	
 	/**
@ -205,7 +205,7 @@ public class Dictionary {
 			do {
 				theWord = br.readLine();
 				if (theWord != null && !"".equals(theWord.trim())) {
-					_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
+					_MainDict.fillSegment(theWord.trim().toCharArray());
 				}
 			} while (theWord != null);
@ -255,7 +255,7 @@ public class Dictionary {
 						theWord = br.readLine();
                        if (theWord != null && !"".equals(theWord.trim())) {
 							//加载扩展词典数据到主内存词典中
-							_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
+							_MainDict.fillSegment(theWord.trim().toCharArray());
 						}
 					} while (theWord != null);
@ -298,7 +298,7 @@ public class Dictionary {
            do {
                theWord = br.readLine();
                if (theWord != null && !"".equals(theWord.trim())) {
-                    _StopWords.fillSegment(theWord.trim().toLowerCase().toCharArray());
+                    _StopWords.fillSegment(theWord.trim().toCharArray());
                }
            } while (theWord != null);
@ -342,7 +342,7 @@ public class Dictionary {
 						theWord = br.readLine();
 						if (theWord != null && !"".equals(theWord.trim())) {
 							//加载扩展停止词典数据到内存中
-                            _StopWords.fillSegment(theWord.trim().toLowerCase().toCharArray());
+                            _StopWords.fillSegment(theWord.trim().toCharArray());
 						}
 					} while (theWord != null);
@ -383,7 +383,7 @@ public class Dictionary {
 			do {
 				theWord = br.readLine();
 				if (theWord != null && !"".equals(theWord.trim())) {
-					_QuantifierDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
+					_QuantifierDict.fillSegment(theWord.trim().toCharArray());
 				}
 			} while (theWord != null);
@ -440,7 +440,6 @@ public class Dictionary {
    }
    private void loadSuffixDict(){
        _SuffixDict = new DictSegment((char)0);
--- a/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java
+++ b/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java
@ -80,7 +80,7 @@ public final class IKTokenizer extends Tokenizer {
 		if(nextLexeme != null){
 			//将Lexeme转成Attributes
 			//设置词元文本
-			termAtt.append(nextLexeme.getLexemeText().toLowerCase());
+			termAtt.append(nextLexeme.getLexemeText());
 			//设置词元长度
 			termAtt.setLength(nextLexeme.getLength());
 			//设置词元位移