lowercased before check the some dict
This commit is contained in:
parent
6480991bbb
commit
a542bbc229
@ -35,7 +35,7 @@ https://github.com/medcl/elasticsearch-analysis-ik/blob/master/config/ik/IKAnaly
|
|||||||
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
|
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
|
||||||
<properties>
|
<properties>
|
||||||
<comment>IK Analyzer 扩展配置</comment>
|
<comment>IK Analyzer 扩展配置</comment>
|
||||||
<!--用户可以在这里配置自己的扩展字典 -->
|
<!--用户可以在这里配置自己的扩展字典 -->
|
||||||
<entry key="ext_dict">custom/mydict.dic;custom/single_word_low_freq.dic</entry>
|
<entry key="ext_dict">custom/mydict.dic;custom/single_word_low_freq.dic</entry>
|
||||||
<!--用户可以在这里配置自己的扩展停止词字典-->
|
<!--用户可以在这里配置自己的扩展停止词字典-->
|
||||||
<entry key="ext_stopwords">custom/ext_stopword.dic</entry>
|
<entry key="ext_stopwords">custom/ext_stopword.dic</entry>
|
||||||
@ -197,4 +197,9 @@ here is the query result
|
|||||||
</pre>
|
</pre>
|
||||||
|
|
||||||
|
|
||||||
have fun.
|
have fun.
|
||||||
|
|
||||||
|
常见问题:
|
||||||
|
|
||||||
|
1.自定义词典为什么没有生效?
|
||||||
|
清确保你的扩展词典的文本格式为UTF8编码
|
4
pom.xml
4
pom.xml
@ -6,7 +6,7 @@
|
|||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<groupId>org.elasticsearch</groupId>
|
<groupId>org.elasticsearch</groupId>
|
||||||
<artifactId>elasticsearch-analysis-ik</artifactId>
|
<artifactId>elasticsearch-analysis-ik</artifactId>
|
||||||
<version>1.2.3</version>
|
<version>1.2.4</version>
|
||||||
<packaging>jar</packaging>
|
<packaging>jar</packaging>
|
||||||
<description>IK Analyzer for ElasticSearch</description>
|
<description>IK Analyzer for ElasticSearch</description>
|
||||||
<inceptionYear>2009</inceptionYear>
|
<inceptionYear>2009</inceptionYear>
|
||||||
@ -31,7 +31,7 @@
|
|||||||
</parent>
|
</parent>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<elasticsearch.version>0.90.6</elasticsearch.version>
|
<elasticsearch.version>0.90.2</elasticsearch.version>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<repositories>
|
<repositories>
|
||||||
|
@ -25,14 +25,14 @@
|
|||||||
*/
|
*/
|
||||||
package org.wltea.analyzer.dic;
|
package org.wltea.analyzer.dic;
|
||||||
|
|
||||||
import java.io.*;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.elasticsearch.common.logging.ESLogger;
|
import org.elasticsearch.common.logging.ESLogger;
|
||||||
import org.elasticsearch.common.logging.Loggers;
|
import org.elasticsearch.common.logging.Loggers;
|
||||||
import org.wltea.analyzer.cfg.Configuration;
|
import org.wltea.analyzer.cfg.Configuration;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 词典管理类,单子模式
|
* 词典管理类,单子模式
|
||||||
*/
|
*/
|
||||||
@ -152,7 +152,7 @@ public class Dictionary {
|
|||||||
* @return Hit 匹配结果描述
|
* @return Hit 匹配结果描述
|
||||||
*/
|
*/
|
||||||
public Hit matchInMainDict(char[] charArray , int begin, int length){
|
public Hit matchInMainDict(char[] charArray , int begin, int length){
|
||||||
return singleton._MainDict.match(charArray, begin, length);
|
return singleton._MainDict.match(String.valueOf(charArray).trim().toLowerCase().toCharArray(), begin, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -160,7 +160,7 @@ public class Dictionary {
|
|||||||
* @return Hit 匹配结果描述
|
* @return Hit 匹配结果描述
|
||||||
*/
|
*/
|
||||||
public Hit matchInQuantifierDict(char[] charArray , int begin, int length){
|
public Hit matchInQuantifierDict(char[] charArray , int begin, int length){
|
||||||
return singleton._QuantifierDict.match(charArray, begin, length);
|
return singleton._QuantifierDict.match(String.valueOf(charArray).trim().toLowerCase().toCharArray(), begin, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -179,7 +179,7 @@ public class Dictionary {
|
|||||||
* @return boolean
|
* @return boolean
|
||||||
*/
|
*/
|
||||||
public boolean isStopWord(char[] charArray , int begin, int length){
|
public boolean isStopWord(char[] charArray , int begin, int length){
|
||||||
return singleton._StopWords.match(charArray, begin, length).isMatch();
|
return singleton._StopWords.match(String.valueOf(charArray).trim().toLowerCase().toCharArray(), begin, length).isMatch();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -253,7 +253,7 @@ public class Dictionary {
|
|||||||
String theWord = null;
|
String theWord = null;
|
||||||
do {
|
do {
|
||||||
theWord = br.readLine();
|
theWord = br.readLine();
|
||||||
if (theWord != null && !"".equals(theWord.trim())) {
|
if (theWord != null && !"".equals(theWord.trim())) {
|
||||||
//加载扩展词典数据到主内存词典中
|
//加载扩展词典数据到主内存词典中
|
||||||
_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
|
_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user