From 963fac4771477e0808ffff0e005fd0e9e767fb85 Mon Sep 17 00:00:00 2001 From: piaolingxue Date: Mon, 17 Jun 2013 11:45:39 +0800 Subject: [PATCH] add ik tokenizers and remove some useless class[IkAnalyzer]. --- .../analysis/IkAnalysisBinderProcessor.java | 7 ++++ .../index/analysis/IkAnalyzer.java | 33 ------------------- .../index/analysis/IkTokenizerFactory.java | 28 ++++++++++++++++ 3 files changed, 35 insertions(+), 33 deletions(-) delete mode 100644 src/main/java/org/elasticsearch/index/analysis/IkAnalyzer.java create mode 100644 src/main/java/org/elasticsearch/index/analysis/IkTokenizerFactory.java diff --git a/src/main/java/org/elasticsearch/index/analysis/IkAnalysisBinderProcessor.java b/src/main/java/org/elasticsearch/index/analysis/IkAnalysisBinderProcessor.java index 88661b3..a448c8a 100644 --- a/src/main/java/org/elasticsearch/index/analysis/IkAnalysisBinderProcessor.java +++ b/src/main/java/org/elasticsearch/index/analysis/IkAnalysisBinderProcessor.java @@ -12,4 +12,11 @@ public class IkAnalysisBinderProcessor extends AnalysisModule.AnalysisBinderProc analyzersBindings.processAnalyzer("ik", IkAnalyzerProvider.class); super.processAnalyzers(analyzersBindings); } + + + @Override + public void processTokenizers(TokenizersBindings tokenizersBindings) { + tokenizersBindings.processTokenizer("ik", IkTokenizerFactory.class); + super.processTokenizers(tokenizersBindings); + } } diff --git a/src/main/java/org/elasticsearch/index/analysis/IkAnalyzer.java b/src/main/java/org/elasticsearch/index/analysis/IkAnalyzer.java deleted file mode 100644 index ffdf0d6..0000000 --- a/src/main/java/org/elasticsearch/index/analysis/IkAnalyzer.java +++ /dev/null @@ -1,33 +0,0 @@ -package org.elasticsearch.index.analysis; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; -import org.wltea.analyzer.lucene.IKTokenizer; -//import org.wltea.lucene.IKTokenizer; - -import java.io.Reader; - - -public class IkAnalyzer extends Analyzer { -// private boolean isMaxWordLength = false; -// @Override public TokenStream tokenStream(String fieldName, Reader reader) { -// return new IKTokenizer(reader,true); -// } - - - public IkAnalyzer() { - super(); - } - - @Override - protected TokenStreamComponents createComponents(String s, Reader reader) { -// new TokenStreamComponents - Tokenizer tokenizer = new IKTokenizer(reader, true); - return new TokenStreamComponents(tokenizer, null); //To change body of implemented methods use File | Settings | File Templates. - } - -// public boolean isMaxWordLength() { -// return isMaxWordLength; -// } -} diff --git a/src/main/java/org/elasticsearch/index/analysis/IkTokenizerFactory.java b/src/main/java/org/elasticsearch/index/analysis/IkTokenizerFactory.java new file mode 100644 index 0000000..83d284c --- /dev/null +++ b/src/main/java/org/elasticsearch/index/analysis/IkTokenizerFactory.java @@ -0,0 +1,28 @@ +package org.elasticsearch.index.analysis; + +import java.io.Reader; + +import org.apache.lucene.analysis.Tokenizer; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.wltea.analyzer.dic.Dictionary; +import org.wltea.analyzer.lucene.IKTokenizer; + +public class IkTokenizerFactory extends AbstractTokenizerFactory { + private boolean useSmart = false; + + public IkTokenizerFactory(Index index, Settings indexSettings, String name, Settings settings) { + super(index, indexSettings, name, settings); + Dictionary.getInstance().Init(indexSettings); + + if (settings.get("use_smart", "true").equals("true")) { + useSmart = true; + } + } + + @Override + public Tokenizer create(Reader reader) { + return new IKTokenizer(reader, useSmart); + } + +}