From a542bbc22917ae03294c417b0e7ddebb48b7823e Mon Sep 17 00:00:00 2001 From: medcl Date: Tue, 12 Nov 2013 15:42:42 +0800 Subject: [PATCH] lowercased before check the some dict --- README.textile | 9 +++++++-- pom.xml | 4 ++-- .../java/org/wltea/analyzer/dic/Dictionary.java | 16 ++++++++-------- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/README.textile b/README.textile index 3ac5cc7..3fe92b2 100644 --- a/README.textile +++ b/README.textile @@ -35,7 +35,7 @@ https://github.com/medcl/elasticsearch-analysis-ik/blob/master/config/ik/IKAnaly IK Analyzer 扩展配置 - + custom/mydict.dic;custom/single_word_low_freq.dic custom/ext_stopword.dic @@ -197,4 +197,9 @@ here is the query result -have fun. \ No newline at end of file +have fun. + +常见问题: + +1.自定义词典为什么没有生效? +清确保你的扩展词典的文本格式为UTF8编码 \ No newline at end of file diff --git a/pom.xml b/pom.xml index 39d9aa1..49a2139 100644 --- a/pom.xml +++ b/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.elasticsearch elasticsearch-analysis-ik - 1.2.3 + 1.2.4 jar IK Analyzer for ElasticSearch 2009 @@ -31,7 +31,7 @@ - 0.90.6 + 0.90.2 diff --git a/src/main/java/org/wltea/analyzer/dic/Dictionary.java b/src/main/java/org/wltea/analyzer/dic/Dictionary.java index 17c51f7..206ffad 100644 --- a/src/main/java/org/wltea/analyzer/dic/Dictionary.java +++ b/src/main/java/org/wltea/analyzer/dic/Dictionary.java @@ -25,14 +25,14 @@ */ package org.wltea.analyzer.dic; -import java.io.*; -import java.util.Collection; -import java.util.List; - import org.elasticsearch.common.logging.ESLogger; import org.elasticsearch.common.logging.Loggers; import org.wltea.analyzer.cfg.Configuration; +import java.io.*; +import java.util.Collection; +import java.util.List; + /** * 词典管理类,单子模式 */ @@ -152,7 +152,7 @@ public class Dictionary { * @return Hit 匹配结果描述 */ public Hit matchInMainDict(char[] charArray , int begin, int length){ - return singleton._MainDict.match(charArray, begin, length); + return singleton._MainDict.match(String.valueOf(charArray).trim().toLowerCase().toCharArray(), begin, length); } /** @@ -160,7 +160,7 @@ public class Dictionary { * @return Hit 匹配结果描述 */ public Hit matchInQuantifierDict(char[] charArray , int begin, int length){ - return singleton._QuantifierDict.match(charArray, begin, length); + return singleton._QuantifierDict.match(String.valueOf(charArray).trim().toLowerCase().toCharArray(), begin, length); } @@ -179,7 +179,7 @@ public class Dictionary { * @return boolean */ public boolean isStopWord(char[] charArray , int begin, int length){ - return singleton._StopWords.match(charArray, begin, length).isMatch(); + return singleton._StopWords.match(String.valueOf(charArray).trim().toLowerCase().toCharArray(), begin, length).isMatch(); } /** @@ -253,7 +253,7 @@ public class Dictionary { String theWord = null; do { theWord = br.readLine(); - if (theWord != null && !"".equals(theWord.trim())) { + if (theWord != null && !"".equals(theWord.trim())) { //加载扩展词典数据到主内存词典中 _MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray()); }