make it work with the latest elasticsearch
This commit is contained in:
parent
ce6e7fd070
commit
6480991bbb
3
pom.xml
3
pom.xml
@ -31,7 +31,7 @@
|
||||
</parent>
|
||||
|
||||
<properties>
|
||||
<elasticsearch.version>0.90.2</elasticsearch.version>
|
||||
<elasticsearch.version>0.90.6</elasticsearch.version>
|
||||
</properties>
|
||||
|
||||
<repositories>
|
||||
@ -76,6 +76,7 @@
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>4.10</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
|
@ -12,28 +12,14 @@ import org.wltea.analyzer.lucene.IKAnalyzer;
|
||||
|
||||
public class IkAnalyzerProvider extends AbstractIndexAnalyzerProvider<IKAnalyzer> {
|
||||
private final IKAnalyzer analyzer;
|
||||
|
||||
@Inject
|
||||
public IkAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
Dictionary.initial(new Configuration(settings));
|
||||
analyzer=new IKAnalyzer(indexSettings,settings);
|
||||
Dictionary.initial(new Configuration(env));
|
||||
analyzer=new IKAnalyzer(indexSettings, settings, env);
|
||||
}
|
||||
|
||||
public IkAnalyzerProvider(Index index, Settings indexSettings, String name,
|
||||
Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
Dictionary.initial(new Configuration(settings));
|
||||
analyzer=new IKAnalyzer(indexSettings,settings);
|
||||
}
|
||||
|
||||
public IkAnalyzerProvider(Index index, Settings indexSettings,
|
||||
String prefixSettings, String name, Settings settings) {
|
||||
super(index, indexSettings, prefixSettings, name, settings);
|
||||
Dictionary.initial(new Configuration(settings));
|
||||
analyzer=new IKAnalyzer(indexSettings,settings);
|
||||
}
|
||||
|
||||
|
||||
@Override public IKAnalyzer get() {
|
||||
return this.analyzer;
|
||||
}
|
||||
|
@ -4,6 +4,7 @@ import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
import org.wltea.analyzer.cfg.Configuration;
|
||||
@ -13,18 +14,20 @@ import org.wltea.analyzer.lucene.IKTokenizer;
|
||||
import java.io.Reader;
|
||||
|
||||
public class IkTokenizerFactory extends AbstractTokenizerFactory {
|
||||
private Environment environment;
|
||||
private Settings settings;
|
||||
|
||||
@Inject
|
||||
public IkTokenizerFactory(Index index,@IndexSettings Settings indexSettings,@Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
this.settings=settings;
|
||||
Dictionary.initial(new Configuration(settings));
|
||||
public IkTokenizerFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
this.environment = env;
|
||||
this.settings = settings;
|
||||
Dictionary.initial(new Configuration(env));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Tokenizer create(Reader reader) {
|
||||
return new IKTokenizer(reader, settings);
|
||||
return new IKTokenizer(reader, settings, environment);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -3,17 +3,16 @@
|
||||
*/
|
||||
package org.wltea.analyzer.cfg;
|
||||
|
||||
import org.elasticsearch.common.logging.ESLogger;
|
||||
import org.elasticsearch.common.logging.Loggers;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.InvalidPropertiesFormatException;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.elasticsearch.common.logging.ESLogger;
|
||||
import org.elasticsearch.common.logging.Loggers;
|
||||
import org.elasticsearch.env.Environment;
|
||||
|
||||
public class Configuration {
|
||||
|
||||
private static String FILE_NAME = "ik/IKAnalyzer.cfg.xml";
|
||||
@ -23,10 +22,10 @@ public class Configuration {
|
||||
private Properties props;
|
||||
private Environment environment;
|
||||
|
||||
public Configuration(Settings settings){
|
||||
public Configuration(Environment env){
|
||||
logger = Loggers.getLogger("ik-analyzer");
|
||||
props = new Properties();
|
||||
environment=new Environment(settings);
|
||||
environment = env;
|
||||
|
||||
File fileConfig= new File(environment.configFile(), FILE_NAME);
|
||||
|
||||
|
@ -32,7 +32,6 @@ import java.util.LinkedList;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.wltea.analyzer.cfg.Configuration;
|
||||
import org.wltea.analyzer.dic.Dictionary;
|
||||
|
||||
/**
|
||||
|
@ -29,6 +29,7 @@ import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.wltea.analyzer.cfg.Configuration;
|
||||
import org.wltea.analyzer.dic.Dictionary;
|
||||
|
||||
@ -55,13 +56,17 @@ public final class IKSegmenter {
|
||||
* IK分词器构造函数
|
||||
* @param input
|
||||
*/
|
||||
public IKSegmenter(Reader input , Settings settings){
|
||||
public IKSegmenter(Reader input , Settings settings, Environment environment){
|
||||
this.input = input;
|
||||
this.cfg = new Configuration(settings);
|
||||
this.cfg = new Configuration(environment);
|
||||
this.useSmart = settings.get("use_smart", "true").equals("true");
|
||||
this.init();
|
||||
}
|
||||
|
||||
public IKSegmenter(Reader input){
|
||||
new IKSegmenter(input, null,null);
|
||||
}
|
||||
|
||||
// /**
|
||||
// * IK分词器构造函数
|
||||
// * @param input
|
||||
|
@ -25,16 +25,14 @@
|
||||
*/
|
||||
package org.wltea.analyzer.dic;
|
||||
|
||||
import org.elasticsearch.common.logging.ESLogger;
|
||||
import org.elasticsearch.common.logging.Loggers;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.wltea.analyzer.cfg.Configuration;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
import org.elasticsearch.common.logging.ESLogger;
|
||||
import org.elasticsearch.common.logging.Loggers;
|
||||
import org.wltea.analyzer.cfg.Configuration;
|
||||
|
||||
/**
|
||||
* 词典管理类,单子模式
|
||||
*/
|
||||
|
@ -29,7 +29,7 @@ import java.io.Reader;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.wltea.analyzer.dic.Dictionary;
|
||||
import org.elasticsearch.env.Environment;
|
||||
|
||||
/**
|
||||
* IK分词器,Lucene Analyzer接口实现
|
||||
@ -67,10 +67,12 @@ public final class IKAnalyzer extends Analyzer{
|
||||
}
|
||||
|
||||
Settings settings;
|
||||
Environment environment;
|
||||
|
||||
public IKAnalyzer(Settings indexSetting,Settings settings) {
|
||||
public IKAnalyzer(Settings indexSetting,Settings settings, Environment environment) {
|
||||
super();
|
||||
this.settings=settings;
|
||||
this.environment= environment;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -78,7 +80,7 @@ public final class IKAnalyzer extends Analyzer{
|
||||
*/
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, final Reader in) {
|
||||
Tokenizer _IKTokenizer = new IKTokenizer(in , settings);
|
||||
Tokenizer _IKTokenizer = new IKTokenizer(in , settings, environment);
|
||||
return new TokenStreamComponents(_IKTokenizer);
|
||||
}
|
||||
|
||||
|
@ -33,9 +33,8 @@ import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.wltea.analyzer.core.IKSegmenter;
|
||||
import org.wltea.analyzer.core.Lexeme;
|
||||
|
||||
@ -61,13 +60,13 @@ public final class IKTokenizer extends Tokenizer {
|
||||
* Lucene 4.0 Tokenizer适配器类构造函数
|
||||
* @param in
|
||||
*/
|
||||
public IKTokenizer(Reader in , Settings settings){
|
||||
public IKTokenizer(Reader in , Settings settings, Environment environment){
|
||||
super(in);
|
||||
offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
termAtt = addAttribute(CharTermAttribute.class);
|
||||
typeAtt = addAttribute(TypeAttribute.class);
|
||||
|
||||
_IKImplement = new IKSegmenter(input , settings);
|
||||
_IKImplement = new IKSegmenter(input , settings, environment);
|
||||
}
|
||||
|
||||
/* (non-Javadoc)
|
||||
|
@ -34,7 +34,6 @@ import org.apache.lucene.queryparser.classic.ParseException;
|
||||
import org.apache.lucene.queryparser.classic.QueryParser;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.wltea.analyzer.core.IKSegmenter;
|
||||
import org.wltea.analyzer.core.Lexeme;
|
||||
|
||||
@ -71,8 +70,8 @@ public class SWMCQueryBuilder {
|
||||
*/
|
||||
private static List<Lexeme> doAnalyze(String keywords){
|
||||
List<Lexeme> lexemes = new ArrayList<Lexeme>();
|
||||
Settings settings=null;
|
||||
IKSegmenter ikSeg = new IKSegmenter(new StringReader(keywords) , settings);
|
||||
|
||||
IKSegmenter ikSeg = new IKSegmenter(new StringReader(keywords));
|
||||
try{
|
||||
Lexeme l = null;
|
||||
while( (l = ikSeg.next()) != null){
|
||||
|
Loading…
x
Reference in New Issue
Block a user