update to 1.2.3
This commit is contained in:
parent
72718510e4
commit
ce6e7fd070
@ -6,7 +6,8 @@ The IK Analysis plugin integrates Lucene IK analyzer into elasticsearch, support
|
||||
|
||||
Version
|
||||
-------------
|
||||
master | 0.90.0 -> master
|
||||
master | 0.90.2 -> master
|
||||
1.2.3 | 0.90.2
|
||||
1.2.0 | 0.90.0
|
||||
1.1.3 | 0.20.2
|
||||
1.1.2 | 0.19.x
|
||||
@ -35,7 +36,7 @@ https://github.com/medcl/elasticsearch-analysis-ik/blob/master/config/ik/IKAnaly
|
||||
<properties>
|
||||
<comment>IK Analyzer 扩展配置</comment>
|
||||
<!--用户可以在这里配置自己的扩展字典 -->
|
||||
<entry key="ext_dict">custom/mydict.dic;custom/sougou.dict</entry>
|
||||
<entry key="ext_dict">custom/mydict.dic;custom/single_word_low_freq.dic</entry>
|
||||
<!--用户可以在这里配置自己的扩展停止词字典-->
|
||||
<entry key="ext_stopwords">custom/ext_stopword.dic</entry>
|
||||
</properties>
|
||||
|
@ -3,7 +3,7 @@
|
||||
<properties>
|
||||
<comment>IK Analyzer 扩展配置</comment>
|
||||
<!--用户可以在这里配置自己的扩展字典 -->
|
||||
<entry key="ext_dict">custom/mydict.dic</entry>
|
||||
<entry key="ext_dict">custom/mydict.dic;custom/single_word_low_freq.dic</entry>
|
||||
<!--用户可以在这里配置自己的扩展停止词字典-->
|
||||
<entry key="ext_stopwords">custom/ext_stopword.dic</entry>
|
||||
</properties>
|
@ -1 +1 @@
|
||||
medcl
|
||||
medcl
|
12638
config/ik/custom/single_word.dic
Normal file
12638
config/ik/custom/single_word.dic
Normal file
File diff suppressed because it is too large
Load Diff
12638
config/ik/custom/single_word_full.dic
Normal file
12638
config/ik/custom/single_word_full.dic
Normal file
File diff suppressed because it is too large
Load Diff
2714
config/ik/custom/single_word_low_freq.dic
Normal file
2714
config/ik/custom/single_word_low_freq.dic
Normal file
File diff suppressed because it is too large
Load Diff
398716
config/ik/custom/sougou.dic
Normal file
398716
config/ik/custom/sougou.dic
Normal file
File diff suppressed because it is too large
Load Diff
4
pom.xml
4
pom.xml
@ -6,7 +6,7 @@
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.elasticsearch</groupId>
|
||||
<artifactId>elasticsearch-analysis-ik</artifactId>
|
||||
<version>1.2.2</version>
|
||||
<version>1.2.3</version>
|
||||
<packaging>jar</packaging>
|
||||
<description>IK Analyzer for ElasticSearch</description>
|
||||
<inceptionYear>2009</inceptionYear>
|
||||
@ -31,7 +31,7 @@
|
||||
</parent>
|
||||
|
||||
<properties>
|
||||
<elasticsearch.version>0.90.0</elasticsearch.version>
|
||||
<elasticsearch.version>0.90.2</elasticsearch.version>
|
||||
</properties>
|
||||
|
||||
<repositories>
|
||||
|
@ -6,6 +6,8 @@ import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
import org.wltea.analyzer.cfg.Configuration;
|
||||
import org.wltea.analyzer.dic.Dictionary;
|
||||
import org.wltea.analyzer.lucene.IKAnalyzer;
|
||||
|
||||
public class IkAnalyzerProvider extends AbstractIndexAnalyzerProvider<IKAnalyzer> {
|
||||
@ -13,18 +15,21 @@ public class IkAnalyzerProvider extends AbstractIndexAnalyzerProvider<IKAnalyzer
|
||||
@Inject
|
||||
public IkAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
Dictionary.initial(new Configuration(settings));
|
||||
analyzer=new IKAnalyzer(indexSettings,settings);
|
||||
}
|
||||
|
||||
public IkAnalyzerProvider(Index index, Settings indexSettings, String name,
|
||||
Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
Dictionary.initial(new Configuration(settings));
|
||||
analyzer=new IKAnalyzer(indexSettings,settings);
|
||||
}
|
||||
|
||||
public IkAnalyzerProvider(Index index, Settings indexSettings,
|
||||
String prefixSettings, String name, Settings settings) {
|
||||
super(index, indexSettings, prefixSettings, name, settings);
|
||||
Dictionary.initial(new Configuration(settings));
|
||||
analyzer=new IKAnalyzer(indexSettings,settings);
|
||||
}
|
||||
|
||||
|
@ -6,27 +6,25 @@ import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
import org.wltea.analyzer.cfg.Configuration;
|
||||
import org.wltea.analyzer.dic.Dictionary;
|
||||
import org.wltea.analyzer.lucene.IKTokenizer;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
public class IkTokenizerFactory extends AbstractTokenizerFactory {
|
||||
private boolean useSmart = false;
|
||||
private Settings settings;
|
||||
|
||||
@Inject
|
||||
public IkTokenizerFactory(Index index,@IndexSettings Settings indexSettings,@Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
Dictionary.getInstance().Init(indexSettings);
|
||||
|
||||
if (settings.get("use_smart", "true").equals("true")) {
|
||||
useSmart = true;
|
||||
}
|
||||
this.settings=settings;
|
||||
Dictionary.initial(new Configuration(settings));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Tokenizer create(Reader reader) {
|
||||
return new IKTokenizer(reader, useSmart);
|
||||
return new IKTokenizer(reader, settings);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -21,16 +21,13 @@ public class Configuration {
|
||||
private static final String EXT_STOP = "ext_stopwords";
|
||||
private static ESLogger logger = null;
|
||||
private Properties props;
|
||||
/*
|
||||
* 是否使用smart方式分词
|
||||
*/
|
||||
private boolean useSmart=true;
|
||||
|
||||
public Configuration(Settings settings){
|
||||
private Environment environment;
|
||||
|
||||
public Configuration(Settings settings){
|
||||
logger = Loggers.getLogger("ik-analyzer");
|
||||
props = new Properties();
|
||||
Environment environment=new Environment(settings);
|
||||
environment=new Environment(settings);
|
||||
|
||||
File fileConfig= new File(environment.configFile(), FILE_NAME);
|
||||
|
||||
InputStream input = null;
|
||||
@ -42,7 +39,6 @@ public class Configuration {
|
||||
if(input != null){
|
||||
try {
|
||||
props.loadFromXML(input);
|
||||
logger.info("[Dict Loading] {}",FILE_NAME);
|
||||
} catch (InvalidPropertiesFormatException e) {
|
||||
e.printStackTrace();
|
||||
} catch (IOException e) {
|
||||
@ -51,7 +47,6 @@ public class Configuration {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public List<String> getExtDictionarys(){
|
||||
List<String> extDictFiles = new ArrayList<String>(2);
|
||||
String extDictCfg = props.getProperty(EXT_DICT);
|
||||
@ -89,4 +84,8 @@ public class Configuration {
|
||||
}
|
||||
return extStopWordDictFiles;
|
||||
}
|
||||
|
||||
public File getDictRoot() {
|
||||
return environment.configFile();
|
||||
}
|
||||
}
|
||||
|
@ -90,10 +90,6 @@ class AnalyzeContext {
|
||||
int getCursor(){
|
||||
return this.cursor;
|
||||
}
|
||||
//
|
||||
// void setCursor(int cursor){
|
||||
// this.cursor = cursor;
|
||||
// }
|
||||
|
||||
char[] getSegmentBuff(){
|
||||
return this.segmentBuff;
|
||||
@ -115,7 +111,7 @@ class AnalyzeContext {
|
||||
* 根据context的上下文情况,填充segmentBuff
|
||||
* @param reader
|
||||
* @return 返回待分析的(有效的)字串长度
|
||||
* @throws IOException
|
||||
* @throws java.io.IOException
|
||||
*/
|
||||
int fillBuffer(Reader reader) throws IOException{
|
||||
int readCount = 0;
|
||||
|
@ -28,8 +28,8 @@ import java.io.Reader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.wltea.analyzer.cfg.Configuration;
|
||||
//import org.wltea.analyzer.cfg.DefaultConfig;
|
||||
import org.wltea.analyzer.dic.Dictionary;
|
||||
|
||||
/**
|
||||
@ -53,38 +53,33 @@ public final class IKSegmenter {
|
||||
|
||||
/**
|
||||
* IK分词器构造函数
|
||||
* @param input
|
||||
* @param useSmart 为true,使用智能分词策略
|
||||
*
|
||||
* 非智能分词:细粒度输出所有可能的切分结果
|
||||
* 智能分词: 合并数词和量词,对分词结果进行歧义判断
|
||||
*/
|
||||
public IKSegmenter(Reader input , boolean useSmart){
|
||||
* @param input
|
||||
*/
|
||||
public IKSegmenter(Reader input , Settings settings){
|
||||
this.input = input;
|
||||
// this.cfg = DefaultConfig.getInstance();
|
||||
this.useSmart=useSmart;
|
||||
this.init();
|
||||
this.cfg = new Configuration(settings);
|
||||
this.useSmart = settings.get("use_smart", "true").equals("true");
|
||||
this.init();
|
||||
}
|
||||
|
||||
/**
|
||||
* IK分词器构造函数
|
||||
* @param input
|
||||
* @param cfg 使用自定义的Configuration构造分词器
|
||||
*
|
||||
*/
|
||||
public IKSegmenter(Reader input , Configuration cfg){
|
||||
this.input = input;
|
||||
this.cfg = cfg;
|
||||
this.init();
|
||||
}
|
||||
// /**
|
||||
// * IK分词器构造函数
|
||||
// * @param input
|
||||
// * @param cfg 使用自定义的Configuration构造分词器
|
||||
// *
|
||||
// */
|
||||
// public IKSegmenter(Reader input , Configuration cfg){
|
||||
// this.input = input;
|
||||
// this.cfg = cfg;
|
||||
// this.init();
|
||||
// }
|
||||
|
||||
/**
|
||||
* 初始化
|
||||
*/
|
||||
private void init(){
|
||||
//初始化词典单例
|
||||
// Dictionary.initial(this.cfg);
|
||||
// Dictionary.getSingleton();
|
||||
Dictionary.initial(this.cfg);
|
||||
//初始化分词上下文
|
||||
this.context = new AnalyzeContext(useSmart);
|
||||
//加载子分词器
|
||||
@ -111,7 +106,7 @@ public final class IKSegmenter {
|
||||
/**
|
||||
* 分词,获取下一个词元
|
||||
* @return Lexeme 词元对象
|
||||
* @throws IOException
|
||||
* @throws java.io.IOException
|
||||
*/
|
||||
public synchronized Lexeme next()throws IOException{
|
||||
Lexeme l = null;
|
||||
|
@ -64,8 +64,6 @@ public class Dictionary {
|
||||
*/
|
||||
private Configuration configuration;
|
||||
private ESLogger logger=null;
|
||||
private static boolean dictInited=false;
|
||||
private Environment environment;
|
||||
public static final String PATH_DIC_MAIN = "ik/main.dic";
|
||||
public static final String PATH_DIC_SURNAME = "ik/surname.dic";
|
||||
public static final String PATH_DIC_QUANTIFIER = "ik/quantifier.dic";
|
||||
@ -75,33 +73,6 @@ public class Dictionary {
|
||||
private Dictionary(){
|
||||
logger = Loggers.getLogger("ik-analyzer");
|
||||
}
|
||||
static{
|
||||
singleton = new Dictionary();
|
||||
}
|
||||
// public Configuration getConfig(){
|
||||
// return configuration;
|
||||
// }
|
||||
// private Dictionary(Configuration cfg){
|
||||
// this.cfg = cfg;
|
||||
// this.loadMainDict();
|
||||
// this.loadStopWordDict();
|
||||
// this.loadQuantifierDict();
|
||||
// }
|
||||
|
||||
public void Init(Settings indexSettings){
|
||||
|
||||
if(!dictInited){
|
||||
environment =new Environment(indexSettings);
|
||||
configuration=new Configuration(indexSettings);
|
||||
loadMainDict();
|
||||
loadSurnameDict();
|
||||
loadQuantifierDict();
|
||||
loadSuffixDict();
|
||||
loadPrepDict();
|
||||
loadStopWordDict();
|
||||
dictInited=true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 词典初始化
|
||||
@ -111,17 +82,24 @@ public class Dictionary {
|
||||
* 该方法提供了一个在应用加载阶段就初始化字典的手段
|
||||
* @return Dictionary
|
||||
*/
|
||||
// public static Dictionary initial(Configuration cfg){
|
||||
// if(singleton == null){
|
||||
// synchronized(Dictionary.class){
|
||||
// if(singleton == null){
|
||||
// singleton = new Dictionary();
|
||||
// return singleton;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// return singleton;
|
||||
// }
|
||||
public static Dictionary initial(Configuration cfg){
|
||||
if(singleton == null){
|
||||
synchronized(Dictionary.class){
|
||||
if(singleton == null){
|
||||
singleton = new Dictionary();
|
||||
singleton.configuration=cfg;
|
||||
singleton.loadMainDict();
|
||||
singleton.loadSurnameDict();
|
||||
singleton.loadQuantifierDict();
|
||||
singleton.loadSuffixDict();
|
||||
singleton.loadPrepDict();
|
||||
singleton.loadStopWordDict();
|
||||
return singleton;
|
||||
}
|
||||
}
|
||||
}
|
||||
return singleton;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取词典单子实例
|
||||
@ -151,7 +129,6 @@ public class Dictionary {
|
||||
|
||||
/**
|
||||
* 批量移除(屏蔽)词条
|
||||
* @param words
|
||||
*/
|
||||
public void disableWords(Collection<String> words){
|
||||
if(words != null){
|
||||
@ -166,7 +143,6 @@ public class Dictionary {
|
||||
|
||||
/**
|
||||
* 检索匹配主词典
|
||||
* @param charArray
|
||||
* @return Hit 匹配结果描述
|
||||
*/
|
||||
public Hit matchInMainDict(char[] charArray){
|
||||
@ -175,9 +151,6 @@ public class Dictionary {
|
||||
|
||||
/**
|
||||
* 检索匹配主词典
|
||||
* @param charArray
|
||||
* @param begin
|
||||
* @param length
|
||||
* @return Hit 匹配结果描述
|
||||
*/
|
||||
public Hit matchInMainDict(char[] charArray , int begin, int length){
|
||||
@ -186,9 +159,6 @@ public class Dictionary {
|
||||
|
||||
/**
|
||||
* 检索匹配量词词典
|
||||
* @param charArray
|
||||
* @param begin
|
||||
* @param length
|
||||
* @return Hit 匹配结果描述
|
||||
*/
|
||||
public Hit matchInQuantifierDict(char[] charArray , int begin, int length){
|
||||
@ -198,9 +168,6 @@ public class Dictionary {
|
||||
|
||||
/**
|
||||
* 从已匹配的Hit中直接取出DictSegment,继续向下匹配
|
||||
* @param charArray
|
||||
* @param currentIndex
|
||||
* @param matchedHit
|
||||
* @return Hit
|
||||
*/
|
||||
public Hit matchWithHit(char[] charArray , int currentIndex , Hit matchedHit){
|
||||
@ -211,9 +178,6 @@ public class Dictionary {
|
||||
|
||||
/**
|
||||
* 判断是否是停止词
|
||||
* @param charArray
|
||||
* @param begin
|
||||
* @param length
|
||||
* @return boolean
|
||||
*/
|
||||
public boolean isStopWord(char[] charArray , int begin, int length){
|
||||
@ -226,8 +190,9 @@ public class Dictionary {
|
||||
private void loadMainDict(){
|
||||
//建立一个主词典实例
|
||||
_MainDict = new DictSegment((char)0);
|
||||
|
||||
//读取主词典文件
|
||||
File file= new File(environment.configFile(), Dictionary.PATH_DIC_MAIN);
|
||||
File file= new File(configuration.getDictRoot(), Dictionary.PATH_DIC_MAIN);
|
||||
|
||||
InputStream is = null;
|
||||
try {
|
||||
@ -273,8 +238,8 @@ public class Dictionary {
|
||||
InputStream is = null;
|
||||
for(String extDictName : extDictFiles){
|
||||
//读取扩展词典文件
|
||||
logger.info("加载扩展词典:" + extDictName);
|
||||
File file=new File(environment.configFile(), extDictName);
|
||||
logger.info("[Dict Loading]" + extDictName);
|
||||
File file=new File(configuration.getDictRoot(), extDictName);
|
||||
try {
|
||||
is = new FileInputStream(file);
|
||||
} catch (FileNotFoundException e) {
|
||||
@ -316,17 +281,53 @@ public class Dictionary {
|
||||
* 加载用户扩展的停止词词典
|
||||
*/
|
||||
private void loadStopWordDict(){
|
||||
//建立一个主词典实例
|
||||
//建立主词典实例
|
||||
_StopWords = new DictSegment((char)0);
|
||||
|
||||
//读取主词典文件
|
||||
File file= new File(configuration.getDictRoot(), Dictionary.PATH_DIC_STOP);
|
||||
|
||||
InputStream is = null;
|
||||
try {
|
||||
is = new FileInputStream(file);
|
||||
} catch (FileNotFoundException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
try {
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
|
||||
String theWord = null;
|
||||
do {
|
||||
theWord = br.readLine();
|
||||
if (theWord != null && !"".equals(theWord.trim())) {
|
||||
_StopWords.fillSegment(theWord.trim().toLowerCase().toCharArray());
|
||||
}
|
||||
} while (theWord != null);
|
||||
|
||||
} catch (IOException e) {
|
||||
logger.error("ik-analyzer",e);
|
||||
|
||||
}finally{
|
||||
try {
|
||||
if(is != null){
|
||||
is.close();
|
||||
is = null;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
logger.error("ik-analyzer",e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//加载扩展停止词典
|
||||
List<String> extStopWordDictFiles = configuration.getExtStopWordDictionarys();
|
||||
if(extStopWordDictFiles != null){
|
||||
InputStream is = null;
|
||||
is = null;
|
||||
for(String extStopWordDictName : extStopWordDictFiles){
|
||||
// logger.info("加载扩展停止词典:" + extStopWordDictName);
|
||||
logger.info("[Dict Loading]" + extStopWordDictName);
|
||||
|
||||
//读取扩展词典文件
|
||||
File file=new File(environment.configFile(), extStopWordDictName);
|
||||
//读取扩展词典文件
|
||||
file=new File(configuration.getDictRoot(), extStopWordDictName);
|
||||
try {
|
||||
is = new FileInputStream(file);
|
||||
} catch (FileNotFoundException e) {
|
||||
@ -371,7 +372,7 @@ public class Dictionary {
|
||||
//建立一个量词典实例
|
||||
_QuantifierDict = new DictSegment((char)0);
|
||||
//读取量词词典文件
|
||||
File file=new File(environment.configFile(),Dictionary.PATH_DIC_QUANTIFIER);
|
||||
File file=new File(configuration.getDictRoot(),Dictionary.PATH_DIC_QUANTIFIER);
|
||||
InputStream is = null;
|
||||
try {
|
||||
is = new FileInputStream(file);
|
||||
@ -407,7 +408,7 @@ public class Dictionary {
|
||||
private void loadSurnameDict(){
|
||||
|
||||
_SurnameDict = new DictSegment((char)0);
|
||||
File file=new File(environment.configFile(),Dictionary.PATH_DIC_SURNAME);
|
||||
File file=new File(configuration.getDictRoot(),Dictionary.PATH_DIC_SURNAME);
|
||||
InputStream is = null;
|
||||
try {
|
||||
is = new FileInputStream(file);
|
||||
@ -426,7 +427,6 @@ public class Dictionary {
|
||||
_SurnameDict.fillSegment(theWord.trim().toCharArray());
|
||||
}
|
||||
} while (theWord != null);
|
||||
// logger.info("[Dict Loading] {},SurnameDict Size:{}",file.toString(),_SurnameDict.getDicNum());
|
||||
} catch (IOException e) {
|
||||
logger.error("ik-analyzer",e);
|
||||
}finally{
|
||||
@ -446,7 +446,7 @@ public class Dictionary {
|
||||
private void loadSuffixDict(){
|
||||
|
||||
_SuffixDict = new DictSegment((char)0);
|
||||
File file=new File(environment.configFile(),Dictionary.PATH_DIC_SUFFIX);
|
||||
File file=new File(configuration.getDictRoot(),Dictionary.PATH_DIC_SUFFIX);
|
||||
InputStream is = null;
|
||||
try {
|
||||
is = new FileInputStream(file);
|
||||
@ -466,15 +466,12 @@ public class Dictionary {
|
||||
_SuffixDict.fillSegment(theWord.trim().toCharArray());
|
||||
}
|
||||
} while (theWord != null);
|
||||
// logger.info("[Dict Loading] {},SuffixDict Size:{}",file.toString(),_SuffixDict.getDicNum());
|
||||
} catch (IOException e) {
|
||||
logger.error("ik-analyzer",e);
|
||||
}finally{
|
||||
try {
|
||||
if(is != null){
|
||||
is.close();
|
||||
is = null;
|
||||
}
|
||||
is.close();
|
||||
is = null;
|
||||
} catch (IOException e) {
|
||||
logger.error("ik-analyzer",e);
|
||||
}
|
||||
@ -485,7 +482,7 @@ public class Dictionary {
|
||||
private void loadPrepDict(){
|
||||
|
||||
_PrepDict = new DictSegment((char)0);
|
||||
File file=new File(environment.configFile(),Dictionary.PATH_DIC_PREP);
|
||||
File file=new File(configuration.getDictRoot(),Dictionary.PATH_DIC_PREP);
|
||||
InputStream is = null;
|
||||
try {
|
||||
is = new FileInputStream(file);
|
||||
@ -506,23 +503,17 @@ public class Dictionary {
|
||||
_PrepDict.fillSegment(theWord.trim().toCharArray());
|
||||
}
|
||||
} while (theWord != null);
|
||||
// logger.info("[Dict Loading] {},PrepDict Size:{}",file.toString(),_PrepDict.getDicNum());
|
||||
} catch (IOException e) {
|
||||
logger.error("ik-analyzer",e);
|
||||
}finally{
|
||||
try {
|
||||
if(is != null){
|
||||
is.close();
|
||||
is = null;
|
||||
}
|
||||
is.close();
|
||||
is = null;
|
||||
} catch (IOException e) {
|
||||
logger.error("ik-analyzer",e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static Dictionary getInstance(){
|
||||
return Dictionary.singleton;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -66,13 +66,11 @@ public final class IKAnalyzer extends Analyzer{
|
||||
this.useSmart = useSmart;
|
||||
}
|
||||
|
||||
public IKAnalyzer(Settings indexSetting,Settings settings1) {
|
||||
super();
|
||||
Dictionary.getInstance().Init(indexSetting);
|
||||
Settings settings;
|
||||
|
||||
if(settings1.get("use_smart", "true").equals("true")){
|
||||
useSmart = true;
|
||||
}
|
||||
public IKAnalyzer(Settings indexSetting,Settings settings) {
|
||||
super();
|
||||
this.settings=settings;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -80,7 +78,7 @@ public final class IKAnalyzer extends Analyzer{
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, final Reader in) {
|
||||
Tokenizer _IKTokenizer = new IKTokenizer(in , this.useSmart());
|
||||
Tokenizer _IKTokenizer = new IKTokenizer(in , settings);
|
||||
return new TokenStreamComponents(_IKTokenizer);
|
||||
}
|
||||
|
||||
|
@ -34,6 +34,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.wltea.analyzer.core.IKSegmenter;
|
||||
import org.wltea.analyzer.core.Lexeme;
|
||||
|
||||
@ -58,14 +60,14 @@ public final class IKTokenizer extends Tokenizer {
|
||||
/**
|
||||
* Lucene 4.0 Tokenizer适配器类构造函数
|
||||
* @param in
|
||||
* @param useSmart
|
||||
*/
|
||||
public IKTokenizer(Reader in , boolean useSmart){
|
||||
*/
|
||||
public IKTokenizer(Reader in , Settings settings){
|
||||
super(in);
|
||||
offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
termAtt = addAttribute(CharTermAttribute.class);
|
||||
typeAtt = addAttribute(TypeAttribute.class);
|
||||
_IKImplement = new IKSegmenter(input , useSmart);
|
||||
|
||||
_IKImplement = new IKSegmenter(input , settings);
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
|
@ -34,6 +34,7 @@ import org.apache.lucene.queryparser.classic.ParseException;
|
||||
import org.apache.lucene.queryparser.classic.QueryParser;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.wltea.analyzer.core.IKSegmenter;
|
||||
import org.wltea.analyzer.core.Lexeme;
|
||||
|
||||
@ -70,7 +71,8 @@ public class SWMCQueryBuilder {
|
||||
*/
|
||||
private static List<Lexeme> doAnalyze(String keywords){
|
||||
List<Lexeme> lexemes = new ArrayList<Lexeme>();
|
||||
IKSegmenter ikSeg = new IKSegmenter(new StringReader(keywords) , true);
|
||||
Settings settings=null;
|
||||
IKSegmenter ikSeg = new IKSegmenter(new StringReader(keywords) , settings);
|
||||
try{
|
||||
Lexeme l = null;
|
||||
while( (l = ikSeg.next()) != null){
|
||||
|
Loading…
x
Reference in New Issue
Block a user