diff --git a/pom.xml b/pom.xml
index 1be1ae0..1a0bbe0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -6,7 +6,7 @@
4.0.0
org.elasticsearch
elasticsearch-analysis-ik
- 1.2.4
+ 1.2.5
jar
IK Analyzer for ElasticSearch
2009
@@ -31,7 +31,7 @@
- 0.90.6
+ 0.90.2
diff --git a/src/main/java/org/wltea/analyzer/core/CJKSegmenter.java b/src/main/java/org/wltea/analyzer/core/CJKSegmenter.java
index 5867ff5..d72eaa7 100644
--- a/src/main/java/org/wltea/analyzer/core/CJKSegmenter.java
+++ b/src/main/java/org/wltea/analyzer/core/CJKSegmenter.java
@@ -25,12 +25,12 @@
*/
package org.wltea.analyzer.core;
-import java.util.LinkedList;
-import java.util.List;
-
import org.wltea.analyzer.dic.Dictionary;
import org.wltea.analyzer.dic.Hit;
+import java.util.LinkedList;
+import java.util.List;
+
/**
* 中文-日韩文子分词器
@@ -58,7 +58,7 @@ class CJKSegmenter implements ISegmenter {
//处理词段队列
Hit[] tmpArray = this.tmpHits.toArray(new Hit[this.tmpHits.size()]);
for(Hit hit : tmpArray){
- hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
+ hit = Dictionary.getSingleton().matchWithHit(String.valueOf(context.getSegmentBuff()).toLowerCase().toCharArray(), context.getCursor() , hit);
if(hit.isMatch()){
//输出当前的词
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_CNWORD);
@@ -77,7 +77,7 @@ class CJKSegmenter implements ISegmenter {
//*********************************
//再对当前指针位置的字符进行单字匹配
- Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
+ Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(String.valueOf(context.getSegmentBuff()).toLowerCase().toCharArray(), context.getCursor(), 1);
if(singleCharHit.isMatch()){//首字成词
//输出当前的词
Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_CNWORD);
diff --git a/src/main/java/org/wltea/analyzer/core/IKSegmenter.java b/src/main/java/org/wltea/analyzer/core/IKSegmenter.java
index 4275923..9a16eea 100644
--- a/src/main/java/org/wltea/analyzer/core/IKSegmenter.java
+++ b/src/main/java/org/wltea/analyzer/core/IKSegmenter.java
@@ -23,16 +23,16 @@
*/
package org.wltea.analyzer.core;
-import java.io.IOException;
-import java.io.Reader;
-import java.util.ArrayList;
-import java.util.List;
-
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.wltea.analyzer.cfg.Configuration;
import org.wltea.analyzer.dic.Dictionary;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.List;
+
/**
* IK分词器主类
*
diff --git a/src/main/java/org/wltea/analyzer/dic/DictSegment.java b/src/main/java/org/wltea/analyzer/dic/DictSegment.java
index 7e2f420..26d1993 100644
--- a/src/main/java/org/wltea/analyzer/dic/DictSegment.java
+++ b/src/main/java/org/wltea/analyzer/dic/DictSegment.java
@@ -114,8 +114,8 @@ class DictSegment implements Comparable{
}
//设置hit的当前处理位置
searchHit.setEnd(begin);
-
- Character keyChar = new Character(charArray[begin]);
+
+ Character keyChar = new Character(charArray[begin]);
DictSegment ds = null;
//引用实例变量为本地变量,避免查询时遇到更新的同步问题
diff --git a/src/main/java/org/wltea/analyzer/dic/Dictionary.java b/src/main/java/org/wltea/analyzer/dic/Dictionary.java
index 206ffad..449ee3c 100644
--- a/src/main/java/org/wltea/analyzer/dic/Dictionary.java
+++ b/src/main/java/org/wltea/analyzer/dic/Dictionary.java
@@ -152,7 +152,7 @@ public class Dictionary {
* @return Hit 匹配结果描述
*/
public Hit matchInMainDict(char[] charArray , int begin, int length){
- return singleton._MainDict.match(String.valueOf(charArray).trim().toLowerCase().toCharArray(), begin, length);
+ return singleton._MainDict.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length);
}
/**
@@ -160,7 +160,7 @@ public class Dictionary {
* @return Hit 匹配结果描述
*/
public Hit matchInQuantifierDict(char[] charArray , int begin, int length){
- return singleton._QuantifierDict.match(String.valueOf(charArray).trim().toLowerCase().toCharArray(), begin, length);
+ return singleton._QuantifierDict.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length);
}
@@ -179,7 +179,7 @@ public class Dictionary {
* @return boolean
*/
public boolean isStopWord(char[] charArray , int begin, int length){
- return singleton._StopWords.match(String.valueOf(charArray).trim().toLowerCase().toCharArray(), begin, length).isMatch();
+ return singleton._StopWords.match(String.valueOf(charArray).toLowerCase().toCharArray(), begin, length).isMatch();
}
/**
diff --git a/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java b/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java
index 70b382b..5083e6a 100644
--- a/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java
+++ b/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java
@@ -26,9 +26,6 @@
*/
package org.wltea.analyzer.lucene;
-import java.io.IOException;
-import java.io.Reader;
-
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@@ -38,6 +35,9 @@ import org.elasticsearch.env.Environment;
import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme;
+import java.io.IOException;
+import java.io.Reader;
+
/**
* IK分词器 Lucene Tokenizer适配器类
* 兼容Lucene 4.0版本
@@ -80,7 +80,7 @@ public final class IKTokenizer extends Tokenizer {
if(nextLexeme != null){
//将Lexeme转成Attributes
//设置词元文本
- termAtt.append(nextLexeme.getLexemeText());
+ termAtt.append(nextLexeme.getLexemeText().toLowerCase());
//设置词元长度
termAtt.setLength(nextLexeme.getLength());
//设置词元位移
diff --git a/src/main/java/org/wltea/analyzer/query/IKQueryExpressionParser.java b/src/main/java/org/wltea/analyzer/query/IKQueryExpressionParser.java
index 1b86a35..679ec12 100644
--- a/src/main/java/org/wltea/analyzer/query/IKQueryExpressionParser.java
+++ b/src/main/java/org/wltea/analyzer/query/IKQueryExpressionParser.java
@@ -24,20 +24,16 @@
*/
package org.wltea.analyzer.query;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.*;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.util.BytesRef;
+
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Stack;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.TermRangeQuery;
-import org.apache.lucene.search.BooleanClause.Occur;
-import org.apache.lucene.util.BytesRef;
-
/**
* IK简易查询表达式解析
* 结合SWMCQuery算法
@@ -66,7 +62,7 @@ public class IKQueryExpressionParser {
*/
public Query parseExp(String expression , boolean quickMode){
Query lucenceQuery = null;
- if(expression != null && !"".equals(expression.trim())){
+ if(expression != null && !"".equals(expression)){
try{
//文法解析
this.splitElements(expression);