add config to enable/disable lowercase and remote_dict, Closes #241

This commit is contained in:
medcl 2016-07-25 10:54:02 +08:00
parent b662596939
commit 341b586373
15 changed files with 236 additions and 526 deletions

View File

@ -230,7 +230,12 @@ mvn compile
mvn package mvn package
``` ```
copy & unzip file #{project_path}/elasticsearch-analysis-ik/target/releases/elasticsearch-analysis-ik-*.zip to your elasticsearch's folder: plugins/ik 拷贝和解压release下的文件: #{project_path}/elasticsearch-analysis-ik/target/releases/elasticsearch-analysis-ik-*.zip 到你的 elasticsearch 插件目录, 如: plugins/ik
重启elasticsearch
3.分词测试失败
请在某个索引下调用analyze接口测试,而不是直接调用analyze接口
如:http://localhost:9200/your_index/_analyze?text=中华人民共和国MN&tokenizer=my_ik
Thanks Thanks

View File

@ -1,7 +1,6 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
@Deprecated
public class IkAnalysisBinderProcessor extends AnalysisModule.AnalysisBinderProcessor { public class IkAnalysisBinderProcessor extends AnalysisModule.AnalysisBinderProcessor {

View File

@ -10,17 +10,16 @@ import org.wltea.analyzer.cfg.Configuration;
import org.wltea.analyzer.dic.Dictionary; import org.wltea.analyzer.dic.Dictionary;
import org.wltea.analyzer.lucene.IKAnalyzer; import org.wltea.analyzer.lucene.IKAnalyzer;
@Deprecated
public class IkAnalyzerProvider extends AbstractIndexAnalyzerProvider<IKAnalyzer> { public class IkAnalyzerProvider extends AbstractIndexAnalyzerProvider<IKAnalyzer> {
private final IKAnalyzer analyzer; private final IKAnalyzer analyzer;
private boolean useSmart=false;
@Inject @Inject
public IkAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) { public IkAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettingsService.getSettings(), name, settings); super(index, indexSettingsService.getSettings(), name, settings);
Dictionary.initial(new Configuration(env));
useSmart = settings.get("use_smart", "false").equals("true"); Configuration configuration=new Configuration(env,settings);
analyzer=new IKAnalyzer(useSmart);
analyzer=new IKAnalyzer(configuration);
} }
@Override public IKAnalyzer get() { @Override public IKAnalyzer get() {

View File

@ -8,25 +8,18 @@ import org.elasticsearch.env.Environment;
import org.elasticsearch.index.Index; import org.elasticsearch.index.Index;
import org.elasticsearch.index.settings.IndexSettingsService; import org.elasticsearch.index.settings.IndexSettingsService;
import org.wltea.analyzer.cfg.Configuration; import org.wltea.analyzer.cfg.Configuration;
import org.wltea.analyzer.dic.Dictionary;
import org.wltea.analyzer.lucene.IKTokenizer; import org.wltea.analyzer.lucene.IKTokenizer;
@Deprecated
public class IkTokenizerFactory extends AbstractTokenizerFactory { public class IkTokenizerFactory extends AbstractTokenizerFactory {
private final Settings settings; private Configuration configuration;
private boolean useSmart=false;
@Inject @Inject
public IkTokenizerFactory(Index index, IndexSettingsService indexSettingsService,Environment env, @Assisted String name, @Assisted Settings settings) { public IkTokenizerFactory(Index index, IndexSettingsService indexSettingsService,Environment env, @Assisted String name, @Assisted Settings settings) {
super(index, indexSettingsService.getSettings(), name, settings); super(index, indexSettingsService.getSettings(), name, settings);
this.settings=settings; configuration=new Configuration(env,settings);
Dictionary.initial(new Configuration(env));
} }
@Override @Override
public Tokenizer create() { public Tokenizer create() {
this.useSmart = settings.get("use_smart", "false").equals("true"); return new IKTokenizer(configuration); }
return new IKTokenizer(useSmart); }
} }

View File

@ -3,6 +3,7 @@ package org.elasticsearch.indices.analysis;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment; import org.elasticsearch.env.Environment;
import org.elasticsearch.index.analysis.AnalyzerScope; import org.elasticsearch.index.analysis.AnalyzerScope;
@ -26,21 +27,20 @@ public class IKIndicesAnalysis extends AbstractComponent {
public IKIndicesAnalysis(final Settings settings, public IKIndicesAnalysis(final Settings settings,
IndicesAnalysisService indicesAnalysisService,Environment env) { IndicesAnalysisService indicesAnalysisService,Environment env) {
super(settings); super(settings);
Dictionary.initial(new Configuration(env)); final Configuration configuration=new Configuration(env,settings).setUseSmart(false);
final Configuration smartConfiguration=new Configuration(env,settings).setUseSmart(true);
this.useSmart = settings.get("use_smart", "false").equals("true");
indicesAnalysisService.analyzerProviderFactories().put("ik", indicesAnalysisService.analyzerProviderFactories().put("ik",
new PreBuiltAnalyzerProviderFactory("ik", AnalyzerScope.GLOBAL, new PreBuiltAnalyzerProviderFactory("ik", AnalyzerScope.GLOBAL,
new IKAnalyzer(useSmart))); new IKAnalyzer(configuration)));
indicesAnalysisService.analyzerProviderFactories().put("ik_smart", indicesAnalysisService.analyzerProviderFactories().put("ik_smart",
new PreBuiltAnalyzerProviderFactory("ik_smart", AnalyzerScope.GLOBAL, new PreBuiltAnalyzerProviderFactory("ik_smart", AnalyzerScope.GLOBAL,
new IKAnalyzer(true))); new IKAnalyzer(smartConfiguration)));
indicesAnalysisService.analyzerProviderFactories().put("ik_max_word", indicesAnalysisService.analyzerProviderFactories().put("ik_max_word",
new PreBuiltAnalyzerProviderFactory("ik_max_word", AnalyzerScope.GLOBAL, new PreBuiltAnalyzerProviderFactory("ik_max_word", AnalyzerScope.GLOBAL,
new IKAnalyzer(false))); new IKAnalyzer(configuration)));
indicesAnalysisService.tokenizerFactories().put("ik", indicesAnalysisService.tokenizerFactories().put("ik",
new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() { new PreBuiltTokenizerFactoryFactory(new TokenizerFactory() {
@ -51,7 +51,7 @@ public class IKIndicesAnalysis extends AbstractComponent {
@Override @Override
public Tokenizer create() { public Tokenizer create() {
return new IKTokenizer(false); return new IKTokenizer(configuration);
} }
})); }));
@ -64,7 +64,7 @@ public class IKIndicesAnalysis extends AbstractComponent {
@Override @Override
public Tokenizer create() { public Tokenizer create() {
return new IKTokenizer(true); return new IKTokenizer(smartConfiguration);
} }
})); }));
@ -77,8 +77,8 @@ public class IKIndicesAnalysis extends AbstractComponent {
@Override @Override
public Tokenizer create() { public Tokenizer create() {
return new IKTokenizer(false); return new IKTokenizer(configuration);
} }
})); }));
} }
} }

View File

@ -7,8 +7,10 @@ import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.PathUtils; import org.elasticsearch.common.io.PathUtils;
import org.elasticsearch.common.logging.ESLogger; import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment; import org.elasticsearch.env.Environment;
import org.elasticsearch.plugin.analysis.ik.AnalysisIkPlugin; import org.elasticsearch.plugin.analysis.ik.AnalysisIkPlugin;
import org.wltea.analyzer.dic.Dictionary;
import java.io.*; import java.io.*;
import java.net.URL; import java.net.URL;
@ -20,132 +22,61 @@ import java.util.Properties;
public class Configuration { public class Configuration {
private static String FILE_NAME = "IKAnalyzer.cfg.xml";
private static final String EXT_DICT = "ext_dict";
private static final String REMOTE_EXT_DICT = "remote_ext_dict";
private static final String EXT_STOP = "ext_stopwords";
private static final String REMOTE_EXT_STOP = "remote_ext_stopwords";
private static ESLogger logger = Loggers.getLogger("ik-analyzer");
private Path conf_dir;
private Properties props;
private Environment environment; private Environment environment;
private Settings settings;
//是否启用智能分词
private boolean useSmart;
//是否启用远程词典加载
private boolean enableRemoteDict=false;
//是否启用小写处理
private boolean enableLowercase=true;
@Inject @Inject
public Configuration(Environment env) { public Configuration(Environment env,Settings settings) {
props = new Properties(); this.environment = env;
environment = env; this.settings=settings;
conf_dir = environment.configFile().resolve(AnalysisIkPlugin.PLUGIN_NAME); this.useSmart = settings.get("use_smart", "false").equals("true");
Path configFile = conf_dir.resolve(FILE_NAME); this.enableLowercase = settings.get("enable_lowercase", "true").equals("true");
this.enableRemoteDict = settings.get("enable_remote_dict", "true").equals("true");
Dictionary.initial(this);
InputStream input = null;
try {
logger.info("try load config from {}", configFile);
input = new FileInputStream(configFile.toFile());
} catch (FileNotFoundException e) {
conf_dir = this.getConfigInPluginDir();
configFile = conf_dir.resolve(FILE_NAME);
try {
logger.info("try load config from {}", configFile);
input = new FileInputStream(configFile.toFile());
} catch (FileNotFoundException ex) {
// We should report origin exception
logger.error("ik-analyzer", e);
}
}
if (input != null) {
try {
props.loadFromXML(input);
} catch (InvalidPropertiesFormatException e) {
logger.error("ik-analyzer", e);
} catch (IOException e) {
logger.error("ik-analyzer", e);
}
}
} }
public List<String> getExtDictionarys() { public Path getConfigInPluginDir() {
List<String> extDictFiles = new ArrayList<String>(2);
String extDictCfg = props.getProperty(EXT_DICT);
if (extDictCfg != null) {
String[] filePaths = extDictCfg.split(";");
if (filePaths != null) {
for (String filePath : filePaths) {
if (filePath != null && !"".equals(filePath.trim())) {
Path file = PathUtils.get(filePath.trim());
extDictFiles.add(file.toString());
}
}
}
}
return extDictFiles;
}
public List<String> getRemoteExtDictionarys() {
List<String> remoteExtDictFiles = new ArrayList<String>(2);
String remoteExtDictCfg = props.getProperty(REMOTE_EXT_DICT);
if (remoteExtDictCfg != null) {
String[] filePaths = remoteExtDictCfg.split(";");
if (filePaths != null) {
for (String filePath : filePaths) {
if (filePath != null && !"".equals(filePath.trim())) {
remoteExtDictFiles.add(filePath);
}
}
}
}
return remoteExtDictFiles;
}
public List<String> getExtStopWordDictionarys() {
List<String> extStopWordDictFiles = new ArrayList<String>(2);
String extStopWordDictCfg = props.getProperty(EXT_STOP);
if (extStopWordDictCfg != null) {
String[] filePaths = extStopWordDictCfg.split(";");
if (filePaths != null) {
for (String filePath : filePaths) {
if (filePath != null && !"".equals(filePath.trim())) {
Path file = PathUtils.get(filePath.trim());
extStopWordDictFiles.add(file.toString());
}
}
}
}
return extStopWordDictFiles;
}
public List<String> getRemoteExtStopWordDictionarys() {
List<String> remoteExtStopWordDictFiles = new ArrayList<String>(2);
String remoteExtStopWordDictCfg = props.getProperty(REMOTE_EXT_STOP);
if (remoteExtStopWordDictCfg != null) {
String[] filePaths = remoteExtStopWordDictCfg.split(";");
if (filePaths != null) {
for (String filePath : filePaths) {
if (filePath != null && !"".equals(filePath.trim())) {
remoteExtStopWordDictFiles.add(filePath);
}
}
}
}
return remoteExtStopWordDictFiles;
}
public String getDictRoot() {
return conf_dir.toAbsolutePath().toString();
}
private Path getConfigInPluginDir() {
return PathUtils return PathUtils
.get(new File(AnalysisIkPlugin.class.getProtectionDomain().getCodeSource().getLocation().getPath()) .get(new File(AnalysisIkPlugin.class.getProtectionDomain().getCodeSource().getLocation().getPath())
.getParent(), "config") .getParent(), "config")
.toAbsolutePath(); .toAbsolutePath();
} }
public boolean isUseSmart() {
return useSmart;
}
public Configuration setUseSmart(boolean useSmart) {
this.useSmart = useSmart;
return this;
}
public Environment getEnvironment() {
return environment;
}
public Settings getSettings() {
return settings;
}
public boolean isEnableRemoteDict() {
return enableRemoteDict;
}
public boolean isEnableLowercase() {
return enableLowercase;
}
} }

View File

@ -32,6 +32,7 @@ import java.util.LinkedList;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import org.wltea.analyzer.cfg.Configuration;
import org.wltea.analyzer.dic.Dictionary; import org.wltea.analyzer.dic.Dictionary;
/** /**
@ -72,12 +73,11 @@ class AnalyzeContext {
private Map<Integer , LexemePath> pathMap; private Map<Integer , LexemePath> pathMap;
//最终分词结果集 //最终分词结果集
private LinkedList<Lexeme> results; private LinkedList<Lexeme> results;
private boolean useSmart;
//分词器配置项 //分词器配置项
// private Configuration cfg; private Configuration cfg;
public AnalyzeContext(boolean useSmart){ public AnalyzeContext(Configuration configuration){
this.useSmart = useSmart; this.cfg = configuration;
this.segmentBuff = new char[BUFF_SIZE]; this.segmentBuff = new char[BUFF_SIZE];
this.charTypes = new int[BUFF_SIZE]; this.charTypes = new int[BUFF_SIZE];
this.buffLocker = new HashSet<String>(); this.buffLocker = new HashSet<String>();
@ -139,7 +139,7 @@ class AnalyzeContext {
*/ */
void initCursor(){ void initCursor(){
this.cursor = 0; this.cursor = 0;
this.segmentBuff[this.cursor] = CharacterUtil.regularize(this.segmentBuff[this.cursor]); this.segmentBuff[this.cursor] = CharacterUtil.regularize(this.segmentBuff[this.cursor],cfg.isEnableLowercase());
this.charTypes[this.cursor] = CharacterUtil.identifyCharType(this.segmentBuff[this.cursor]); this.charTypes[this.cursor] = CharacterUtil.identifyCharType(this.segmentBuff[this.cursor]);
} }
@ -151,7 +151,7 @@ class AnalyzeContext {
boolean moveCursor(){ boolean moveCursor(){
if(this.cursor < this.available - 1){ if(this.cursor < this.available - 1){
this.cursor++; this.cursor++;
this.segmentBuff[this.cursor] = CharacterUtil.regularize(this.segmentBuff[this.cursor]); this.segmentBuff[this.cursor] = CharacterUtil.regularize(this.segmentBuff[this.cursor],cfg.isEnableLowercase());
this.charTypes[this.cursor] = CharacterUtil.identifyCharType(this.segmentBuff[this.cursor]); this.charTypes[this.cursor] = CharacterUtil.identifyCharType(this.segmentBuff[this.cursor]);
return true; return true;
}else{ }else{
@ -345,7 +345,7 @@ class AnalyzeContext {
*/ */
private void compound(Lexeme result){ private void compound(Lexeme result){
if(!this.useSmart){ if(!this.cfg.isUseSmart()){
return ; return ;
} }
//数量词合并处理 //数量词合并处理

View File

@ -86,14 +86,14 @@ class CharacterUtil {
* @param input * @param input
* @return char * @return char
*/ */
static char regularize(char input){ static char regularize(char input,boolean lowercase){
if (input == 12288) { if (input == 12288) {
input = (char) 32; input = (char) 32;
}else if (input > 65280 && input < 65375) { }else if (input > 65280 && input < 65375) {
input = (char) (input - 65248); input = (char) (input - 65248);
}else if (input >= 'A' && input <= 'Z') { }else if (input >= 'A' && input <= 'Z' && lowercase) {
input += 32; input += 32;
} }

View File

@ -23,10 +23,7 @@
*/ */
package org.wltea.analyzer.core; package org.wltea.analyzer.core;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.wltea.analyzer.cfg.Configuration; import org.wltea.analyzer.cfg.Configuration;
import org.wltea.analyzer.dic.Dictionary;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
@ -47,16 +44,16 @@ public final class IKSegmenter {
private List<ISegmenter> segmenters; private List<ISegmenter> segmenters;
//分词歧义裁决器 //分词歧义裁决器
private IKArbitrator arbitrator; private IKArbitrator arbitrator;
private boolean useSmart = false; private Configuration configuration;
/** /**
* IK分词器构造函数 * IK分词器构造函数
* @param input * @param input
*/ */
public IKSegmenter(Reader input ,boolean useSmart){ public IKSegmenter(Reader input ,Configuration configuration){
this.input = input; this.input = input;
this.useSmart = useSmart; this.configuration = configuration;
this.init(); this.init();
} }
@ -66,7 +63,7 @@ public final class IKSegmenter {
*/ */
private void init(){ private void init(){
//初始化分词上下文 //初始化分词上下文
this.context = new AnalyzeContext(useSmart); this.context = new AnalyzeContext(configuration);
//加载子分词器 //加载子分词器
this.segmenters = this.loadSegmenters(); this.segmenters = this.loadSegmenters();
//加载歧义裁决器 //加载歧义裁决器
@ -127,7 +124,7 @@ public final class IKSegmenter {
} }
} }
//对分词进行歧义处理 //对分词进行歧义处理
this.arbitrator.process(context, useSmart); this.arbitrator.process(context, configuration.isUseSmart());
//将分词结果输出到结果集并处理未切分的单个CJK字符 //将分词结果输出到结果集并处理未切分的单个CJK字符
context.outputToResult(); context.outputToResult();
//记录本次分词的缓冲区位移 //记录本次分词的缓冲区位移

View File

@ -33,9 +33,7 @@ import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.ArrayList; import java.util.*;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.Executors; import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
@ -49,6 +47,7 @@ import org.apache.http.impl.client.HttpClients;
import org.elasticsearch.common.io.PathUtils; import org.elasticsearch.common.io.PathUtils;
import org.elasticsearch.common.logging.ESLogger; import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.plugin.analysis.ik.AnalysisIkPlugin;
import org.wltea.analyzer.cfg.Configuration; import org.wltea.analyzer.cfg.Configuration;
/** /**
@ -88,10 +87,53 @@ public class Dictionary {
public static final String PATH_DIC_PREP = "preposition.dic"; public static final String PATH_DIC_PREP = "preposition.dic";
public static final String PATH_DIC_STOP = "stopword.dic"; public static final String PATH_DIC_STOP = "stopword.dic";
private Dictionary() { private final static String FILE_NAME = "IKAnalyzer.cfg.xml";
private final static String EXT_DICT = "ext_dict";
private final static String REMOTE_EXT_DICT = "remote_ext_dict";
private final static String EXT_STOP = "ext_stopwords";
private final static String REMOTE_EXT_STOP = "remote_ext_stopwords";
private Path conf_dir;
private Properties props;
private Dictionary(Configuration cfg) {
this.configuration = cfg;
this.props = new Properties();
this.conf_dir = cfg.getEnvironment().configFile().resolve(AnalysisIkPlugin.PLUGIN_NAME);
Path configFile = conf_dir.resolve(FILE_NAME);
InputStream input = null;
try {
logger.info("try load config from {}", configFile);
input = new FileInputStream(configFile.toFile());
} catch (FileNotFoundException e) {
conf_dir = cfg.getConfigInPluginDir();
configFile = conf_dir.resolve(FILE_NAME);
try {
logger.info("try load config from {}", configFile);
input = new FileInputStream(configFile.toFile());
} catch (FileNotFoundException ex) {
// We should report origin exception
logger.error("ik-analyzer", e);
}
}
if (input != null) {
try {
props.loadFromXML(input);
} catch (InvalidPropertiesFormatException e) {
logger.error("ik-analyzer", e);
} catch (IOException e) {
logger.error("ik-analyzer", e);
}
}
} }
public String getProperty(String key){
if(props!=null){
return props.getProperty(key);
}
return null;
}
/** /**
* 词典初始化 由于IK Analyzer的词典采用Dictionary类的静态方法进行词典初始化 * 词典初始化 由于IK Analyzer的词典采用Dictionary类的静态方法进行词典初始化
* 只有当Dictionary类被实际调用时才会开始载入词典 这将延长首次分词操作的时间 该方法提供了一个在应用加载阶段就初始化字典的手段 * 只有当Dictionary类被实际调用时才会开始载入词典 这将延长首次分词操作的时间 该方法提供了一个在应用加载阶段就初始化字典的手段
@ -102,8 +144,8 @@ public class Dictionary {
if (singleton == null) { if (singleton == null) {
synchronized (Dictionary.class) { synchronized (Dictionary.class) {
if (singleton == null) { if (singleton == null) {
singleton = new Dictionary();
singleton.configuration = cfg; singleton = new Dictionary(cfg);
singleton.loadMainDict(); singleton.loadMainDict();
singleton.loadSurnameDict(); singleton.loadSurnameDict();
singleton.loadQuantifierDict(); singleton.loadQuantifierDict();
@ -111,13 +153,15 @@ public class Dictionary {
singleton.loadPrepDict(); singleton.loadPrepDict();
singleton.loadStopWordDict(); singleton.loadStopWordDict();
// 建立监控线程 if(cfg.isEnableRemoteDict()){
for (String location : cfg.getRemoteExtDictionarys()) { // 建立监控线程
// 10 秒是初始延迟可以修改的 60是间隔时间 单位秒 for (String location : singleton.getRemoteExtDictionarys()) {
pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS); // 10 秒是初始延迟可以修改的 60是间隔时间 单位秒
} pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);
for (String location : cfg.getRemoteExtStopWordDictionarys()) { }
pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS); for (String location : singleton.getRemoteExtStopWordDictionarys()) {
pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);
}
} }
return singleton; return singleton;
@ -127,6 +171,77 @@ public class Dictionary {
return singleton; return singleton;
} }
public List<String> getExtDictionarys() {
List<String> extDictFiles = new ArrayList<String>(2);
String extDictCfg = getProperty(EXT_DICT);
if (extDictCfg != null) {
String[] filePaths = extDictCfg.split(";");
for (String filePath : filePaths) {
if (filePath != null && !"".equals(filePath.trim())) {
Path file = PathUtils.get(filePath.trim());
extDictFiles.add(file.toString());
}
}
}
return extDictFiles;
}
public List<String> getRemoteExtDictionarys() {
List<String> remoteExtDictFiles = new ArrayList<String>(2);
String remoteExtDictCfg = getProperty(REMOTE_EXT_DICT);
if (remoteExtDictCfg != null) {
String[] filePaths = remoteExtDictCfg.split(";");
for (String filePath : filePaths) {
if (filePath != null && !"".equals(filePath.trim())) {
remoteExtDictFiles.add(filePath);
}
}
}
return remoteExtDictFiles;
}
public List<String> getExtStopWordDictionarys() {
List<String> extStopWordDictFiles = new ArrayList<String>(2);
String extStopWordDictCfg = getProperty(EXT_STOP);
if (extStopWordDictCfg != null) {
String[] filePaths = extStopWordDictCfg.split(";");
for (String filePath : filePaths) {
if (filePath != null && !"".equals(filePath.trim())) {
Path file = PathUtils.get(filePath.trim());
extStopWordDictFiles.add(file.toString());
}
}
}
return extStopWordDictFiles;
}
public List<String> getRemoteExtStopWordDictionarys() {
List<String> remoteExtStopWordDictFiles = new ArrayList<String>(2);
String remoteExtStopWordDictCfg = getProperty(REMOTE_EXT_STOP);
if (remoteExtStopWordDictCfg != null) {
String[] filePaths = remoteExtStopWordDictCfg.split(";");
for (String filePath : filePaths) {
if (filePath != null && !"".equals(filePath.trim())) {
remoteExtStopWordDictFiles.add(filePath);
}
}
}
return remoteExtStopWordDictFiles;
}
public String getDictRoot() {
return conf_dir.toAbsolutePath().toString();
}
/** /**
* 获取词典单子实例 * 获取词典单子实例
* *
@ -139,6 +254,7 @@ public class Dictionary {
return singleton; return singleton;
} }
/** /**
* 批量加载新词条 * 批量加载新词条
* *
@ -224,7 +340,7 @@ public class Dictionary {
_MainDict = new DictSegment((char) 0); _MainDict = new DictSegment((char) 0);
// 读取主词典文件 // 读取主词典文件
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_MAIN); Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_MAIN);
InputStream is = null; InputStream is = null;
try { try {
@ -267,13 +383,13 @@ public class Dictionary {
*/ */
private void loadExtDict() { private void loadExtDict() {
// 加载扩展词典配置 // 加载扩展词典配置
List<String> extDictFiles = configuration.getExtDictionarys(); List<String> extDictFiles = getExtDictionarys();
if (extDictFiles != null) { if (extDictFiles != null) {
InputStream is = null; InputStream is = null;
for (String extDictName : extDictFiles) { for (String extDictName : extDictFiles) {
// 读取扩展词典文件 // 读取扩展词典文件
logger.info("[Dict Loading] " + extDictName); logger.info("[Dict Loading] " + extDictName);
Path file = PathUtils.get(configuration.getDictRoot(), extDictName); Path file = PathUtils.get(getDictRoot(), extDictName);
try { try {
is = new FileInputStream(file.toFile()); is = new FileInputStream(file.toFile());
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
@ -315,7 +431,7 @@ public class Dictionary {
* 加载远程扩展词典到主词库表 * 加载远程扩展词典到主词库表
*/ */
private void loadRemoteExtDict() { private void loadRemoteExtDict() {
List<String> remoteExtDictFiles = configuration.getRemoteExtDictionarys(); List<String> remoteExtDictFiles = getRemoteExtDictionarys();
for (String location : remoteExtDictFiles) { for (String location : remoteExtDictFiles) {
logger.info("[Dict Loading] " + location); logger.info("[Dict Loading] " + location);
List<String> lists = getRemoteWords(location); List<String> lists = getRemoteWords(location);
@ -386,7 +502,7 @@ public class Dictionary {
_StopWords = new DictSegment((char) 0); _StopWords = new DictSegment((char) 0);
// 读取主词典文件 // 读取主词典文件
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_STOP); Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_STOP);
InputStream is = null; InputStream is = null;
try { try {
@ -420,14 +536,14 @@ public class Dictionary {
} }
// 加载扩展停止词典 // 加载扩展停止词典
List<String> extStopWordDictFiles = configuration.getExtStopWordDictionarys(); List<String> extStopWordDictFiles = getExtStopWordDictionarys();
if (extStopWordDictFiles != null) { if (extStopWordDictFiles != null) {
is = null; is = null;
for (String extStopWordDictName : extStopWordDictFiles) { for (String extStopWordDictName : extStopWordDictFiles) {
logger.info("[Dict Loading] " + extStopWordDictName); logger.info("[Dict Loading] " + extStopWordDictName);
// 读取扩展词典文件 // 读取扩展词典文件
file = PathUtils.get(configuration.getDictRoot(), extStopWordDictName); file = PathUtils.get(getDictRoot(), extStopWordDictName);
try { try {
is = new FileInputStream(file.toFile()); is = new FileInputStream(file.toFile());
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
@ -465,7 +581,7 @@ public class Dictionary {
} }
// 加载远程停用词典 // 加载远程停用词典
List<String> remoteExtStopWordDictFiles = configuration.getRemoteExtStopWordDictionarys(); List<String> remoteExtStopWordDictFiles = getRemoteExtStopWordDictionarys();
for (String location : remoteExtStopWordDictFiles) { for (String location : remoteExtStopWordDictFiles) {
logger.info("[Dict Loading] " + location); logger.info("[Dict Loading] " + location);
List<String> lists = getRemoteWords(location); List<String> lists = getRemoteWords(location);
@ -492,7 +608,7 @@ public class Dictionary {
// 建立一个量词典实例 // 建立一个量词典实例
_QuantifierDict = new DictSegment((char) 0); _QuantifierDict = new DictSegment((char) 0);
// 读取量词词典文件 // 读取量词词典文件
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_QUANTIFIER); Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_QUANTIFIER);
InputStream is = null; InputStream is = null;
try { try {
is = new FileInputStream(file.toFile()); is = new FileInputStream(file.toFile());
@ -527,7 +643,7 @@ public class Dictionary {
private void loadSurnameDict() { private void loadSurnameDict() {
_SurnameDict = new DictSegment((char) 0); _SurnameDict = new DictSegment((char) 0);
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_SURNAME); Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_SURNAME);
InputStream is = null; InputStream is = null;
try { try {
is = new FileInputStream(file.toFile()); is = new FileInputStream(file.toFile());
@ -563,7 +679,7 @@ public class Dictionary {
private void loadSuffixDict() { private void loadSuffixDict() {
_SuffixDict = new DictSegment((char) 0); _SuffixDict = new DictSegment((char) 0);
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_SUFFIX); Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_SUFFIX);
InputStream is = null; InputStream is = null;
try { try {
is = new FileInputStream(file.toFile()); is = new FileInputStream(file.toFile());
@ -598,7 +714,7 @@ public class Dictionary {
private void loadPrepDict() { private void loadPrepDict() {
_PrepDict = new DictSegment((char) 0); _PrepDict = new DictSegment((char) 0);
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_PREP); Path file = PathUtils.get(getDictRoot(), Dictionary.PATH_DIC_PREP);
InputStream is = null; InputStream is = null;
try { try {
is = new FileInputStream(file.toFile()); is = new FileInputStream(file.toFile());
@ -634,7 +750,7 @@ public class Dictionary {
public void reLoadMainDict() { public void reLoadMainDict() {
logger.info("重新加载词典..."); logger.info("重新加载词典...");
// 新开一个实例加载词典减少加载过程对当前词典使用的影响 // 新开一个实例加载词典减少加载过程对当前词典使用的影响
Dictionary tmpDict = new Dictionary(); Dictionary tmpDict = new Dictionary(configuration);
tmpDict.configuration = getSingleton().configuration; tmpDict.configuration = getSingleton().configuration;
tmpDict.loadMainDict(); tmpDict.loadMainDict();
tmpDict.loadStopWordDict(); tmpDict.loadStopWordDict();
@ -643,4 +759,4 @@ public class Dictionary {
logger.info("重新加载词典完毕..."); logger.info("重新加载词典完毕...");
} }
} }

View File

@ -26,6 +26,7 @@ package org.wltea.analyzer.lucene;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.wltea.analyzer.cfg.Configuration;
/** /**
* IK分词器Lucene Analyzer接口实现 * IK分词器Lucene Analyzer接口实现
@ -33,15 +34,7 @@ import org.apache.lucene.analysis.Tokenizer;
*/ */
public final class IKAnalyzer extends Analyzer{ public final class IKAnalyzer extends Analyzer{
private boolean useSmart; private Configuration configuration;
public boolean useSmart() {
return useSmart;
}
public void setUseSmart(boolean useSmart) {
this.useSmart = useSmart;
}
/** /**
* IK分词器Lucene Analyzer接口实现类 * IK分词器Lucene Analyzer接口实现类
@ -54,11 +47,11 @@ public final class IKAnalyzer extends Analyzer{
/** /**
* IK分词器Lucene Analyzer接口实现类 * IK分词器Lucene Analyzer接口实现类
* *
* @param useSmart 当为true时分词器进行智能切分 * @param configuration IK配置
*/ */
public IKAnalyzer(boolean useSmart){ public IKAnalyzer(Configuration configuration){
super(); super();
this.useSmart = useSmart; this.configuration = configuration;
} }
@ -67,7 +60,7 @@ public final class IKAnalyzer extends Analyzer{
*/ */
@Override @Override
protected TokenStreamComponents createComponents(String fieldName) { protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer _IKTokenizer = new IKTokenizer(useSmart); Tokenizer _IKTokenizer = new IKTokenizer(configuration);
return new TokenStreamComponents(_IKTokenizer); return new TokenStreamComponents(_IKTokenizer);
} }

View File

@ -32,6 +32,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment; import org.elasticsearch.env.Environment;
import org.wltea.analyzer.cfg.Configuration;
import org.wltea.analyzer.core.IKSegmenter; import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme; import org.wltea.analyzer.core.Lexeme;
@ -64,16 +65,15 @@ public final class IKTokenizer extends Tokenizer {
/** /**
* Lucene 4.0 Tokenizer适配器类构造函数 * Lucene 4.0 Tokenizer适配器类构造函数
* @param in
*/ */
public IKTokenizer(boolean useSmart){ public IKTokenizer(Configuration configuration){
super(); super();
offsetAtt = addAttribute(OffsetAttribute.class); offsetAtt = addAttribute(OffsetAttribute.class);
termAtt = addAttribute(CharTermAttribute.class); termAtt = addAttribute(CharTermAttribute.class);
typeAtt = addAttribute(TypeAttribute.class); typeAtt = addAttribute(TypeAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class); posIncrAtt = addAttribute(PositionIncrementAttribute.class);
_IKImplement = new IKSegmenter(input,useSmart); _IKImplement = new IKSegmenter(input,configuration);
} }
/* (non-Javadoc) /* (non-Javadoc)

View File

@ -1,90 +0,0 @@
/**
* IK 中文分词 版本 5.0.1
* IK Analyzer release 5.0.1
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* 源代码由林良益(linliangyi2005@gmail.com)提供
* 版权声明 2012乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio
*
*
*/
package org.wltea.analyzer.sample;
import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.Loggers;
import org.wltea.analyzer.lucene.IKAnalyzer;
/**
* 使用IKAnalyzer进行分词的演示
* 2012-10-22
*
*/
public class IKAnalzyerDemo {
public static ESLogger logger= Loggers.getLogger("ik-analyzer");
public static void main(String[] args){
//构建IK分词器使用smart分词模式
Analyzer analyzer = new IKAnalyzer(true);
//获取Lucene的TokenStream对象
TokenStream ts = null;
try {
ts = analyzer.tokenStream("myfield", new StringReader("WORLD ,.. html DATA</html>HELLO"));
// ts = analyzer.tokenStream("myfield", new StringReader("这是一个中文分词的例子你可以直接运行它IKAnalyer can analysis english text too"));
//获取词元位置属性
OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
//获取词元文本属性
CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
//获取词元文本属性
TypeAttribute type = ts.addAttribute(TypeAttribute.class);
//重置TokenStream重置StringReader
ts.reset();
//迭代获取分词结果
while (ts.incrementToken()) {
System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString() + " | " + type.type());
}
//关闭TokenStream关闭StringReader
ts.end(); // Perform end-of-stream operations, e.g. set the final offset.
} catch (IOException e) {
logger.error(e.getMessage(), e);
} finally {
//释放TokenStream的所有资源
if(ts != null){
try {
ts.close();
} catch (IOException e) {
logger.error(e.getMessage(), e);
}
}
}
}
}

View File

@ -1,150 +0,0 @@
/**
* IK 中文分词 版本 5.0
* IK Analyzer release 5.0
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* 源代码由林良益(linliangyi2005@gmail.com)提供
* 版权声明 2012乌龙茶工作室
* provided by Linliangyi and copyright 2012 by Oolong studio
*
*
*/
package org.wltea.analyzer.sample;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.Loggers;
import org.wltea.analyzer.lucene.IKAnalyzer;
/**
* 使用IKAnalyzer进行Lucene索引和查询的演示
* 2012-3-2
*
* 以下是结合Lucene4.0 API的写法
*
*/
public class LuceneIndexAndSearchDemo {
public static ESLogger logger= Loggers.getLogger("ik-analyzer");
/**
* 模拟
* 创建一个单条记录的索引并对其进行搜索
* @param args
*/
public static void main(String[] args){
//Lucene Document的域名
String fieldName = "text";
//检索内容
String text = "IK Analyzer是一个结合词典分词和文法分词的中文分词开源工具包。它使用了全新的正向迭代最细粒度切分算法。";
//实例化IKAnalyzer分词器
Analyzer analyzer = new IKAnalyzer(true);
Directory directory = null;
IndexWriter iwriter = null;
IndexReader ireader = null;
IndexSearcher isearcher = null;
try {
//建立内存索引对象
directory = new RAMDirectory();
//配置IndexWriterConfig
IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer);
iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
iwriter = new IndexWriter(directory , iwConfig);
//写入索引
Document doc = new Document();
doc.add(new StringField("ID", "10000", Field.Store.YES));
doc.add(new TextField(fieldName, text, Field.Store.YES));
iwriter.addDocument(doc);
iwriter.close();
//搜索过程**********************************
//实例化搜索器
ireader = DirectoryReader.open(directory);
isearcher = new IndexSearcher(ireader);
String keyword = "中文分词工具包";
//使用QueryParser查询分析器构造Query对象
QueryParser qp = new QueryParser(fieldName, analyzer);
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
Query query = qp.parse(keyword);
System.out.println("Query = " + query);
//搜索相似度最高的5条记录
TopDocs topDocs = isearcher.search(query , 5);
System.out.println("命中:" + topDocs.totalHits);
//输出结果
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (int i = 0; i < topDocs.totalHits; i++){
Document targetDoc = isearcher.doc(scoreDocs[i].doc);
System.out.println("内容:" + targetDoc.toString());
}
} catch (CorruptIndexException e) {
logger.error(e.getMessage(), e);
} catch (LockObtainFailedException e) {
logger.error(e.getMessage(), e);
} catch (IOException e) {
logger.error(e.getMessage(), e);
} catch (ParseException e) {
logger.error(e.getMessage(), e);
} finally{
if(ireader != null){
try {
ireader.close();
} catch (IOException e) {
logger.error(e.getMessage(), e);
}
}
if(directory != null){
try {
directory.close();
} catch (IOException e) {
logger.error(e.getMessage(), e);
}
}
}
}
}

View File

@ -1,83 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<Diagram>
<ID>JAVA</ID>
<OriginalElement>org.elasticsearch.index.analysis.IKAnalysisBinderProcessor</OriginalElement>
<nodes>
<node x="1244.0" y="553.0">org.elasticsearch.index.analysis.IKAnalysisBinderProcessor</node>
<node x="2212.0" y="489.0">org.elasticsearch.index.analysis.AnalysisModule.AnalysisBinderProcessor.AnalyzersBindings</node>
<node x="1316.0" y="0.0">java.lang.Object</node>
<node x="1244.0" y="329.0">org.elasticsearch.index.analysis.AnalysisModule.AnalysisBinderProcessor</node>
<node x="616.0" y="510.0">org.elasticsearch.index.analysis.AnalysisModule.AnalysisBinderProcessor.TokenFiltersBindings</node>
<node x="0.0" y="510.0">org.elasticsearch.index.analysis.AnalysisModule.AnalysisBinderProcessor.CharFiltersBindings</node>
<node x="1608.0" y="510.0">org.elasticsearch.index.analysis.AnalysisModule.AnalysisBinderProcessor.TokenizersBindings</node>
</nodes>
<notes />
<edges>
<edge source="org.elasticsearch.index.analysis.AnalysisModule.AnalysisBinderProcessor.TokenFiltersBindings" target="org.elasticsearch.index.analysis.AnalysisModule.AnalysisBinderProcessor">
<point x="152.0" y="-77.0" />
<point x="1072.0" y="469.0" />
<point x="1347.2" y="469.0" />
<point x="-68.79999999999995" y="55.0" />
</edge>
<edge source="org.elasticsearch.index.analysis.AnalysisModule.AnalysisBinderProcessor.CharFiltersBindings" target="java.lang.Object">
<point x="-149.0" y="-77.0" />
<point x="149.0" y="299.0" />
<point x="1336.0" y="299.0" />
<point x="-80.0" y="139.5" />
</edge>
<edge source="org.elasticsearch.index.analysis.AnalysisModule.AnalysisBinderProcessor" target="java.lang.Object">
<point x="0.0" y="-55.0" />
<point x="0.0" y="139.5" />
</edge>
<edge source="org.elasticsearch.index.analysis.AnalysisModule.AnalysisBinderProcessor.AnalyzersBindings" target="org.elasticsearch.index.analysis.AnalysisModule.AnalysisBinderProcessor">
<point x="-180.5" y="-98.0" />
<point x="2392.5" y="459.0" />
<point x="1553.6" y="459.0" />
<point x="137.5999999999999" y="55.0" />
</edge>
<edge source="org.elasticsearch.index.analysis.AnalysisModule.AnalysisBinderProcessor.CharFiltersBindings" target="org.elasticsearch.index.analysis.AnalysisModule.AnalysisBinderProcessor">
<point x="149.0" y="-77.0" />
<point x="447.0" y="459.0" />
<point x="1278.4" y="459.0" />
<point x="-137.5999999999999" y="55.0" />
</edge>
<edge source="org.elasticsearch.index.analysis.IKAnalysisBinderProcessor" target="org.elasticsearch.index.analysis.AnalysisModule.AnalysisBinderProcessor">
<point x="0.0" y="-34.0" />
<point x="0.0" y="55.0" />
</edge>
<edge source="org.elasticsearch.index.analysis.AnalysisModule.AnalysisBinderProcessor.TokenFiltersBindings" target="java.lang.Object">
<point x="-152.0" y="-77.0" />
<point x="768.0" y="309.0" />
<point x="1376.0" y="309.0" />
<point x="-40.0" y="139.5" />
</edge>
<edge source="org.elasticsearch.index.analysis.AnalysisModule.AnalysisBinderProcessor.AnalyzersBindings" target="java.lang.Object">
<point x="180.5" y="-98.0" />
<point x="2753.5" y="299.0" />
<point x="1496.0" y="299.0" />
<point x="80.0" y="139.5" />
</edge>
<edge source="org.elasticsearch.index.analysis.AnalysisModule.AnalysisBinderProcessor.TokenizersBindings" target="java.lang.Object">
<point x="146.0" y="-77.0" />
<point x="2046.0" y="309.0" />
<point x="1456.0" y="309.0" />
<point x="40.0" y="139.5" />
</edge>
<edge source="org.elasticsearch.index.analysis.AnalysisModule.AnalysisBinderProcessor.TokenizersBindings" target="org.elasticsearch.index.analysis.AnalysisModule.AnalysisBinderProcessor">
<point x="-146.0" y="-77.0" />
<point x="1754.0" y="469.0" />
<point x="1484.8" y="469.0" />
<point x="68.79999999999995" y="55.0" />
</edge>
</edges>
<settings layout="Hierarchic Group" zoom="1.0" x="110.5" y="89.0" />
<SelectedNodes />
<Categories>
<Category>Fields</Category>
<Category>Methods</Category>
<Category>Constructors</Category>
<Category>Inner Classes</Category>
<Category>Properties</Category>
</Categories>
</Diagram>