#33 fix performance issue
This commit is contained in:
parent
54fd970591
commit
bafb724c89
2
pom.xml
2
pom.xml
@ -31,7 +31,7 @@
|
|||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<elasticsearch.version>1.1.1</elasticsearch.version>
|
<elasticsearch.version>1.0.0</elasticsearch.version>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<repositories>
|
<repositories>
|
||||||
|
@ -58,7 +58,7 @@ class CJKSegmenter implements ISegmenter {
|
|||||||
//处理词段队列
|
//处理词段队列
|
||||||
Hit[] tmpArray = this.tmpHits.toArray(new Hit[this.tmpHits.size()]);
|
Hit[] tmpArray = this.tmpHits.toArray(new Hit[this.tmpHits.size()]);
|
||||||
for(Hit hit : tmpArray){
|
for(Hit hit : tmpArray){
|
||||||
hit = Dictionary.getSingleton().matchWithHit(String.valueOf(context.getSegmentBuff()).toLowerCase().toCharArray(), context.getCursor() , hit);
|
hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
|
||||||
if(hit.isMatch()){
|
if(hit.isMatch()){
|
||||||
//输出当前的词
|
//输出当前的词
|
||||||
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_CNWORD);
|
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_CNWORD);
|
||||||
@ -77,7 +77,7 @@ class CJKSegmenter implements ISegmenter {
|
|||||||
|
|
||||||
//*********************************
|
//*********************************
|
||||||
//再对当前指针位置的字符进行单字匹配
|
//再对当前指针位置的字符进行单字匹配
|
||||||
Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(String.valueOf(context.getSegmentBuff()).toLowerCase().toCharArray(), context.getCursor(), 1);
|
Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
|
||||||
if(singleCharHit.isMatch()){//首字成词
|
if(singleCharHit.isMatch()){//首字成词
|
||||||
//输出当前的词
|
//输出当前的词
|
||||||
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_CNWORD);
|
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_CNWORD);
|
||||||
|
@ -119,7 +119,7 @@ public class Dictionary {
|
|||||||
for(String word : words){
|
for(String word : words){
|
||||||
if (word != null) {
|
if (word != null) {
|
||||||
//批量加载词条到主内存词典中
|
//批量加载词条到主内存词典中
|
||||||
singleton._MainDict.fillSegment(word.trim().toLowerCase().toCharArray());
|
singleton._MainDict.fillSegment(word.trim().toCharArray());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -133,7 +133,7 @@ public class Dictionary {
|
|||||||
for(String word : words){
|
for(String word : words){
|
||||||
if (word != null) {
|
if (word != null) {
|
||||||
//批量屏蔽词条
|
//批量屏蔽词条
|
||||||
singleton._MainDict.disableSegment(word.trim().toLowerCase().toCharArray());
|
singleton._MainDict.disableSegment(word.trim().toCharArray());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -152,7 +152,7 @@ public class Dictionary {
|
|||||||
* @return Hit 匹配结果描述
|
* @return Hit 匹配结果描述
|
||||||
*/
|
*/
|
||||||
public Hit matchInMainDict(char[] charArray , int begin, int length){
|
public Hit matchInMainDict(char[] charArray , int begin, int length){
|
||||||
return singleton._MainDict.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length);
|
return singleton._MainDict.match(charArray, begin, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -160,7 +160,7 @@ public class Dictionary {
|
|||||||
* @return Hit 匹配结果描述
|
* @return Hit 匹配结果描述
|
||||||
*/
|
*/
|
||||||
public Hit matchInQuantifierDict(char[] charArray , int begin, int length){
|
public Hit matchInQuantifierDict(char[] charArray , int begin, int length){
|
||||||
return singleton._QuantifierDict.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length);
|
return singleton._QuantifierDict.match(charArray, begin, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -179,7 +179,7 @@ public class Dictionary {
|
|||||||
* @return boolean
|
* @return boolean
|
||||||
*/
|
*/
|
||||||
public boolean isStopWord(char[] charArray , int begin, int length){
|
public boolean isStopWord(char[] charArray , int begin, int length){
|
||||||
return singleton._StopWords.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length).isMatch();
|
return singleton._StopWords.match(charArray, begin, length).isMatch();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -205,7 +205,7 @@ public class Dictionary {
|
|||||||
do {
|
do {
|
||||||
theWord = br.readLine();
|
theWord = br.readLine();
|
||||||
if (theWord != null && !"".equals(theWord.trim())) {
|
if (theWord != null && !"".equals(theWord.trim())) {
|
||||||
_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
|
_MainDict.fillSegment(theWord.trim().toCharArray());
|
||||||
}
|
}
|
||||||
} while (theWord != null);
|
} while (theWord != null);
|
||||||
|
|
||||||
@ -255,7 +255,7 @@ public class Dictionary {
|
|||||||
theWord = br.readLine();
|
theWord = br.readLine();
|
||||||
if (theWord != null && !"".equals(theWord.trim())) {
|
if (theWord != null && !"".equals(theWord.trim())) {
|
||||||
//加载扩展词典数据到主内存词典中
|
//加载扩展词典数据到主内存词典中
|
||||||
_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
|
_MainDict.fillSegment(theWord.trim().toCharArray());
|
||||||
}
|
}
|
||||||
} while (theWord != null);
|
} while (theWord != null);
|
||||||
|
|
||||||
@ -298,7 +298,7 @@ public class Dictionary {
|
|||||||
do {
|
do {
|
||||||
theWord = br.readLine();
|
theWord = br.readLine();
|
||||||
if (theWord != null && !"".equals(theWord.trim())) {
|
if (theWord != null && !"".equals(theWord.trim())) {
|
||||||
_StopWords.fillSegment(theWord.trim().toLowerCase().toCharArray());
|
_StopWords.fillSegment(theWord.trim().toCharArray());
|
||||||
}
|
}
|
||||||
} while (theWord != null);
|
} while (theWord != null);
|
||||||
|
|
||||||
@ -342,7 +342,7 @@ public class Dictionary {
|
|||||||
theWord = br.readLine();
|
theWord = br.readLine();
|
||||||
if (theWord != null && !"".equals(theWord.trim())) {
|
if (theWord != null && !"".equals(theWord.trim())) {
|
||||||
//加载扩展停止词典数据到内存中
|
//加载扩展停止词典数据到内存中
|
||||||
_StopWords.fillSegment(theWord.trim().toLowerCase().toCharArray());
|
_StopWords.fillSegment(theWord.trim().toCharArray());
|
||||||
}
|
}
|
||||||
} while (theWord != null);
|
} while (theWord != null);
|
||||||
|
|
||||||
@ -383,7 +383,7 @@ public class Dictionary {
|
|||||||
do {
|
do {
|
||||||
theWord = br.readLine();
|
theWord = br.readLine();
|
||||||
if (theWord != null && !"".equals(theWord.trim())) {
|
if (theWord != null && !"".equals(theWord.trim())) {
|
||||||
_QuantifierDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
|
_QuantifierDict.fillSegment(theWord.trim().toCharArray());
|
||||||
}
|
}
|
||||||
} while (theWord != null);
|
} while (theWord != null);
|
||||||
|
|
||||||
@ -440,7 +440,6 @@ public class Dictionary {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
private void loadSuffixDict(){
|
private void loadSuffixDict(){
|
||||||
|
|
||||||
_SuffixDict = new DictSegment((char)0);
|
_SuffixDict = new DictSegment((char)0);
|
||||||
|
@ -80,7 +80,7 @@ public final class IKTokenizer extends Tokenizer {
|
|||||||
if(nextLexeme != null){
|
if(nextLexeme != null){
|
||||||
//将Lexeme转成Attributes
|
//将Lexeme转成Attributes
|
||||||
//设置词元文本
|
//设置词元文本
|
||||||
termAtt.append(nextLexeme.getLexemeText().toLowerCase());
|
termAtt.append(nextLexeme.getLexemeText());
|
||||||
//设置词元长度
|
//设置词元长度
|
||||||
termAtt.setLength(nextLexeme.getLength());
|
termAtt.setLength(nextLexeme.getLength());
|
||||||
//设置词元位移
|
//设置词元位移
|
||||||
|
Loading…
x
Reference in New Issue
Block a user