bug fix,prefix blank char caused ArrayIndexOutOfBoundsException
This commit is contained in:
parent
35700686c9
commit
e2fb31a55e
4
pom.xml
4
pom.xml
@ -6,7 +6,7 @@
|
|||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<groupId>org.elasticsearch</groupId>
|
<groupId>org.elasticsearch</groupId>
|
||||||
<artifactId>elasticsearch-analysis-ik</artifactId>
|
<artifactId>elasticsearch-analysis-ik</artifactId>
|
||||||
<version>1.2.4</version>
|
<version>1.2.5</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
<description>IK Analyzer for ElasticSearch</description>
|
<description>IK Analyzer for ElasticSearch</description>
|
||||||
<inceptionYear>2009</inceptionYear>
|
<inceptionYear>2009</inceptionYear>
|
||||||
@ -31,7 +31,7 @@
|
|||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<elasticsearch.version>0.90.6</elasticsearch.version>
|
<elasticsearch.version>0.90.2</elasticsearch.version>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<repositories>
|
<repositories>
|
||||||
|
@ -25,12 +25,12 @@
|
|||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.core;
|
package org.wltea.analyzer.core;
|
||||||
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.wltea.analyzer.dic.Dictionary;
|
import org.wltea.analyzer.dic.Dictionary;
|
||||||
import org.wltea.analyzer.dic.Hit;
|
import org.wltea.analyzer.dic.Hit;
|
||||||
|
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 中文-日韩文子分词器
|
* 中文-日韩文子分词器
|
||||||
@ -58,7 +58,7 @@ class CJKSegmenter implements ISegmenter {
|
|||||||
//处理词段队列
|
//处理词段队列
|
||||||
Hit[] tmpArray = this.tmpHits.toArray(new Hit[this.tmpHits.size()]);
|
Hit[] tmpArray = this.tmpHits.toArray(new Hit[this.tmpHits.size()]);
|
||||||
for(Hit hit : tmpArray){
|
for(Hit hit : tmpArray){
|
||||||
hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
|
hit = Dictionary.getSingleton().matchWithHit(String.valueOf(context.getSegmentBuff()).toLowerCase().toCharArray(), context.getCursor() , hit);
|
||||||
if(hit.isMatch()){
|
if(hit.isMatch()){
|
||||||
//输出当前的词
|
//输出当前的词
|
||||||
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_CNWORD);
|
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_CNWORD);
|
||||||
@ -77,7 +77,7 @@ class CJKSegmenter implements ISegmenter {
|
|||||||
|
|
||||||
//*********************************
|
//*********************************
|
||||||
//再对当前指针位置的字符进行单字匹配
|
//再对当前指针位置的字符进行单字匹配
|
||||||
Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
|
Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(String.valueOf(context.getSegmentBuff()).toLowerCase().toCharArray(), context.getCursor(), 1);
|
||||||
if(singleCharHit.isMatch()){//首字成词
|
if(singleCharHit.isMatch()){//首字成词
|
||||||
//输出当前的词
|
//输出当前的词
|
||||||
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_CNWORD);
|
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_CNWORD);
|
||||||
|
@ -23,16 +23,16 @@
|
|||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.core;
|
package org.wltea.analyzer.core;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.Reader;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
import org.wltea.analyzer.cfg.Configuration;
|
import org.wltea.analyzer.cfg.Configuration;
|
||||||
import org.wltea.analyzer.dic.Dictionary;
|
import org.wltea.analyzer.dic.Dictionary;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Reader;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* IK分词器主类
|
* IK分词器主类
|
||||||
*
|
*
|
||||||
|
@ -114,8 +114,8 @@ class DictSegment implements Comparable<DictSegment>{
|
|||||||
}
|
}
|
||||||
//设置hit的当前处理位置
|
//设置hit的当前处理位置
|
||||||
searchHit.setEnd(begin);
|
searchHit.setEnd(begin);
|
||||||
|
|
||||||
Character keyChar = new Character(charArray[begin]);
|
Character keyChar = new Character(charArray[begin]);
|
||||||
DictSegment ds = null;
|
DictSegment ds = null;
|
||||||
|
|
||||||
//引用实例变量为本地变量,避免查询时遇到更新的同步问题
|
//引用实例变量为本地变量,避免查询时遇到更新的同步问题
|
||||||
|
@ -152,7 +152,7 @@ public class Dictionary {
|
|||||||
* @return Hit 匹配结果描述
|
* @return Hit 匹配结果描述
|
||||||
*/
|
*/
|
||||||
public Hit matchInMainDict(char[] charArray , int begin, int length){
|
public Hit matchInMainDict(char[] charArray , int begin, int length){
|
||||||
return singleton._MainDict.match(String.valueOf(charArray).trim().toLowerCase().toCharArray(), begin, length);
|
return singleton._MainDict.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -160,7 +160,7 @@ public class Dictionary {
|
|||||||
* @return Hit 匹配结果描述
|
* @return Hit 匹配结果描述
|
||||||
*/
|
*/
|
||||||
public Hit matchInQuantifierDict(char[] charArray , int begin, int length){
|
public Hit matchInQuantifierDict(char[] charArray , int begin, int length){
|
||||||
return singleton._QuantifierDict.match(String.valueOf(charArray).trim().toLowerCase().toCharArray(), begin, length);
|
return singleton._QuantifierDict.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -179,7 +179,7 @@ public class Dictionary {
|
|||||||
* @return boolean
|
* @return boolean
|
||||||
*/
|
*/
|
||||||
public boolean isStopWord(char[] charArray , int begin, int length){
|
public boolean isStopWord(char[] charArray , int begin, int length){
|
||||||
return singleton._StopWords.match(String.valueOf(charArray).trim().toLowerCase().toCharArray(), begin, length).isMatch();
|
return singleton._StopWords.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length).isMatch();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -26,9 +26,6 @@
|
|||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.lucene;
|
package org.wltea.analyzer.lucene;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.Reader;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
@ -38,6 +35,9 @@ import org.elasticsearch.env.Environment;
|
|||||||
import org.wltea.analyzer.core.IKSegmenter;
|
import org.wltea.analyzer.core.IKSegmenter;
|
||||||
import org.wltea.analyzer.core.Lexeme;
|
import org.wltea.analyzer.core.Lexeme;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Reader;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* IK分词器 Lucene Tokenizer适配器类
|
* IK分词器 Lucene Tokenizer适配器类
|
||||||
* 兼容Lucene 4.0版本
|
* 兼容Lucene 4.0版本
|
||||||
@ -80,7 +80,7 @@ public final class IKTokenizer extends Tokenizer {
|
|||||||
if(nextLexeme != null){
|
if(nextLexeme != null){
|
||||||
//将Lexeme转成Attributes
|
//将Lexeme转成Attributes
|
||||||
//设置词元文本
|
//设置词元文本
|
||||||
termAtt.append(nextLexeme.getLexemeText());
|
termAtt.append(nextLexeme.getLexemeText().toLowerCase());
|
||||||
//设置词元长度
|
//设置词元长度
|
||||||
termAtt.setLength(nextLexeme.getLength());
|
termAtt.setLength(nextLexeme.getLength());
|
||||||
//设置词元位移
|
//设置词元位移
|
||||||
|
@ -24,20 +24,16 @@
|
|||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.query;
|
package org.wltea.analyzer.query;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.*;
|
||||||
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Stack;
|
import java.util.Stack;
|
||||||
|
|
||||||
import org.apache.lucene.index.Term;
|
|
||||||
import org.apache.lucene.search.BooleanClause;
|
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
|
||||||
import org.apache.lucene.search.Query;
|
|
||||||
import org.apache.lucene.search.TermQuery;
|
|
||||||
import org.apache.lucene.search.TermRangeQuery;
|
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* IK简易查询表达式解析
|
* IK简易查询表达式解析
|
||||||
* 结合SWMCQuery算法
|
* 结合SWMCQuery算法
|
||||||
@ -66,7 +62,7 @@ public class IKQueryExpressionParser {
|
|||||||
*/
|
*/
|
||||||
public Query parseExp(String expression , boolean quickMode){
|
public Query parseExp(String expression , boolean quickMode){
|
||||||
Query lucenceQuery = null;
|
Query lucenceQuery = null;
|
||||||
if(expression != null && !"".equals(expression.trim())){
|
if(expression != null && !"".equals(expression)){
|
||||||
try{
|
try{
|
||||||
//文法解析
|
//文法解析
|
||||||
this.splitElements(expression);
|
this.splitElements(expression);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user