Also load config from /etc/elasticsearch/analysis-ik (#197)

Support install by `bin/plugin`, dealing with config files reallocation
This commit is contained in:
Robert LU 2016-05-25 17:07:25 +08:00 committed by Medcl
parent 7e29998ab9
commit 26fe905cc6
17 changed files with 238 additions and 227 deletions

View File

@ -169,7 +169,8 @@ Result
### Dictionary Configuration ### Dictionary Configuration
#### `plugins/elasticsearch-analysis-ik-*/config/ik/IKAnalyzer.cfg.xml` `IKAnalyzer.cfg.xml` can be located at `{conf}/analysis-ik/config/IKAnalyzer.cfg.xml`
or `{plugins}/elasticsearch-analysis-ik-*/config/IKAnalyzer.cfg.xml`
```xml ```xml
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>

View File

@ -1,12 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd"> <!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties> <properties>
<comment>IK Analyzer 扩展配置</comment> <comment>IK Analyzer 扩展配置</comment>
<!--用户可以在这里配置自己的扩展字典 --> <!--用户可以在这里配置自己的扩展字典 -->
<entry key="ext_dict">custom/mydict.dic;custom/single_word_low_freq.dic</entry> <entry key="ext_dict">custom/mydict.dic;custom/single_word_low_freq.dic</entry>
<!--用户可以在这里配置自己的扩展停止词字典--> <!--用户可以在这里配置自己的扩展停止词字典-->
<entry key="ext_stopwords">custom/ext_stopword.dic</entry> <entry key="ext_stopwords">custom/ext_stopword.dic</entry>
<!--用户可以在这里配置远程扩展字典 --> <!--用户可以在这里配置远程扩展字典 -->
<!-- <entry key="remote_ext_dict">words_location</entry> --> <!-- <entry key="remote_ext_dict">words_location</entry> -->
<!--用户可以在这里配置远程扩展停止词字典--> <!--用户可以在这里配置远程扩展停止词字典-->
<!-- <entry key="remote_ext_stopwords">words_location</entry> --> <!-- <entry key="remote_ext_stopwords">words_location</entry> -->

View File

@ -23,14 +23,16 @@ import static java.rmi.Naming.bind;
public class AnalysisIkPlugin extends Plugin { public class AnalysisIkPlugin extends Plugin {
public static String PLUGIN_NAME = "analysis-ik";
@Override public String name() { @Override public String name() {
return "analysis-ik"; return PLUGIN_NAME;
} }
@Override public String description() { @Override public String description() {
return "ik analysis"; return PLUGIN_NAME;
} }
@Override @Override

View File

@ -20,118 +20,132 @@ import java.util.Properties;
public class Configuration { public class Configuration {
private static String FILE_NAME = "ik/IKAnalyzer.cfg.xml"; private static String FILE_NAME = "IKAnalyzer.cfg.xml";
private static final String EXT_DICT = "ext_dict"; private static final String EXT_DICT = "ext_dict";
private static final String REMOTE_EXT_DICT = "remote_ext_dict"; private static final String REMOTE_EXT_DICT = "remote_ext_dict";
private static final String EXT_STOP = "ext_stopwords"; private static final String EXT_STOP = "ext_stopwords";
private static final String REMOTE_EXT_STOP = "remote_ext_stopwords"; private static final String REMOTE_EXT_STOP = "remote_ext_stopwords";
private static ESLogger logger = Loggers.getLogger("ik-analyzer"); private static ESLogger logger = Loggers.getLogger("ik-analyzer");
private Path conf_dir;
private Properties props; private Properties props;
private Environment environment; private Environment environment;
@Inject @Inject
public Configuration(Environment env){ public Configuration(Environment env) {
props = new Properties(); props = new Properties();
environment = env; environment = env;
conf_dir = environment.configFile().resolve(AnalysisIkPlugin.PLUGIN_NAME);
Path configFile = conf_dir.resolve(FILE_NAME);
Path fileConfig = PathUtils.get(getDictRoot(), FILE_NAME); InputStream input = null;
try {
logger.info("try load config from {}", configFile);
InputStream input = null; input = new FileInputStream(configFile.toFile());
try { } catch (FileNotFoundException e) {
input = new FileInputStream(fileConfig.toFile()); conf_dir = this.getConfigInPluginDir();
} catch (FileNotFoundException e) { configFile = conf_dir.resolve(FILE_NAME);
logger.error("ik-analyzer",e); try {
} logger.info("try load config from {}", configFile);
if(input != null){ input = new FileInputStream(configFile.toFile());
} catch (FileNotFoundException ex) {
// We should report origin exception
logger.error("ik-analyzer", e);
}
}
if (input != null) {
try { try {
props.loadFromXML(input); props.loadFromXML(input);
} catch (InvalidPropertiesFormatException e) { } catch (InvalidPropertiesFormatException e) {
logger.error("ik-analyzer", e); logger.error("ik-analyzer", e);
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer", e);
} }
} }
} }
public List<String> getExtDictionarys(){ public List<String> getExtDictionarys() {
List<String> extDictFiles = new ArrayList<String>(2); List<String> extDictFiles = new ArrayList<String>(2);
String extDictCfg = props.getProperty(EXT_DICT); String extDictCfg = props.getProperty(EXT_DICT);
if(extDictCfg != null){ if (extDictCfg != null) {
String[] filePaths = extDictCfg.split(";"); String[] filePaths = extDictCfg.split(";");
if(filePaths != null){ if (filePaths != null) {
for(String filePath : filePaths){ for (String filePath : filePaths) {
if(filePath != null && !"".equals(filePath.trim())){ if (filePath != null && !"".equals(filePath.trim())) {
Path file = PathUtils.get("ik", filePath.trim()); Path file = PathUtils.get(filePath.trim());
extDictFiles.add(file.toString()); extDictFiles.add(file.toString());
} }
} }
} }
} }
return extDictFiles; return extDictFiles;
} }
public List<String> getRemoteExtDictionarys(){ public List<String> getRemoteExtDictionarys() {
List<String> remoteExtDictFiles = new ArrayList<String>(2); List<String> remoteExtDictFiles = new ArrayList<String>(2);
String remoteExtDictCfg = props.getProperty(REMOTE_EXT_DICT); String remoteExtDictCfg = props.getProperty(REMOTE_EXT_DICT);
if(remoteExtDictCfg != null){ if (remoteExtDictCfg != null) {
String[] filePaths = remoteExtDictCfg.split(";"); String[] filePaths = remoteExtDictCfg.split(";");
if(filePaths != null){ if (filePaths != null) {
for(String filePath : filePaths){ for (String filePath : filePaths) {
if(filePath != null && !"".equals(filePath.trim())){ if (filePath != null && !"".equals(filePath.trim())) {
remoteExtDictFiles.add(filePath); remoteExtDictFiles.add(filePath);
} }
} }
} }
} }
return remoteExtDictFiles; return remoteExtDictFiles;
} }
public List<String> getExtStopWordDictionarys(){ public List<String> getExtStopWordDictionarys() {
List<String> extStopWordDictFiles = new ArrayList<String>(2); List<String> extStopWordDictFiles = new ArrayList<String>(2);
String extStopWordDictCfg = props.getProperty(EXT_STOP); String extStopWordDictCfg = props.getProperty(EXT_STOP);
if(extStopWordDictCfg != null){ if (extStopWordDictCfg != null) {
String[] filePaths = extStopWordDictCfg.split(";"); String[] filePaths = extStopWordDictCfg.split(";");
if(filePaths != null){ if (filePaths != null) {
for(String filePath : filePaths){ for (String filePath : filePaths) {
if(filePath != null && !"".equals(filePath.trim())){ if (filePath != null && !"".equals(filePath.trim())) {
Path file = PathUtils.get("ik", filePath.trim()); Path file = PathUtils.get(filePath.trim());
extStopWordDictFiles.add(file.toString()); extStopWordDictFiles.add(file.toString());
} }
} }
} }
} }
return extStopWordDictFiles; return extStopWordDictFiles;
} }
public List<String> getRemoteExtStopWordDictionarys(){ public List<String> getRemoteExtStopWordDictionarys() {
List<String> remoteExtStopWordDictFiles = new ArrayList<String>(2); List<String> remoteExtStopWordDictFiles = new ArrayList<String>(2);
String remoteExtStopWordDictCfg = props.getProperty(REMOTE_EXT_STOP); String remoteExtStopWordDictCfg = props.getProperty(REMOTE_EXT_STOP);
if(remoteExtStopWordDictCfg != null){ if (remoteExtStopWordDictCfg != null) {
String[] filePaths = remoteExtStopWordDictCfg.split(";"); String[] filePaths = remoteExtStopWordDictCfg.split(";");
if(filePaths != null){ if (filePaths != null) {
for(String filePath : filePaths){ for (String filePath : filePaths) {
if(filePath != null && !"".equals(filePath.trim())){ if (filePath != null && !"".equals(filePath.trim())) {
remoteExtStopWordDictFiles.add(filePath); remoteExtStopWordDictFiles.add(filePath);
} }
} }
} }
} }
return remoteExtStopWordDictFiles; return remoteExtStopWordDictFiles;
} }
public String getDictRoot() { public String getDictRoot() {
return PathUtils.get( return conf_dir.toAbsolutePath().toString();
new File(AnalysisIkPlugin.class.getProtectionDomain().getCodeSource().getLocation().getPath()).getParent(),"config") }
.toAbsolutePath().toString();
} private Path getConfigInPluginDir() {
return PathUtils
.get(new File(AnalysisIkPlugin.class.getProtectionDomain().getCodeSource().getLocation().getPath())
.getParent(), "config")
.toAbsolutePath();
}
} }

View File

@ -56,7 +56,6 @@ import org.wltea.analyzer.cfg.Configuration;
*/ */
public class Dictionary { public class Dictionary {
/* /*
* 词典单子实例 * 词典单子实例
*/ */
@ -74,40 +73,37 @@ public class Dictionary {
private DictSegment _StopWords; private DictSegment _StopWords;
/** /**
* 配置对象 * 配置对象
*/ */
private Configuration configuration; private Configuration configuration;
public static ESLogger logger=Loggers.getLogger("ik-analyzer"); public static ESLogger logger = Loggers.getLogger("ik-analyzer");
private static ScheduledExecutorService pool = Executors.newScheduledThreadPool(1); private static ScheduledExecutorService pool = Executors.newScheduledThreadPool(1);
public static final String PATH_DIC_MAIN = "ik/main.dic"; public static final String PATH_DIC_MAIN = "main.dic";
public static final String PATH_DIC_SURNAME = "ik/surname.dic"; public static final String PATH_DIC_SURNAME = "surname.dic";
public static final String PATH_DIC_QUANTIFIER = "ik/quantifier.dic"; public static final String PATH_DIC_QUANTIFIER = "quantifier.dic";
public static final String PATH_DIC_SUFFIX = "ik/suffix.dic"; public static final String PATH_DIC_SUFFIX = "suffix.dic";
public static final String PATH_DIC_PREP = "ik/preposition.dic"; public static final String PATH_DIC_PREP = "preposition.dic";
public static final String PATH_DIC_STOP = "ik/stopword.dic"; public static final String PATH_DIC_STOP = "stopword.dic";
private Dictionary(){ private Dictionary() {
} }
/** /**
* 词典初始化 * 词典初始化 由于IK Analyzer的词典采用Dictionary类的静态方法进行词典初始化
* 由于IK Analyzer的词典采用Dictionary类的静态方法进行词典初始化 * 只有当Dictionary类被实际调用时才会开始载入词典 这将延长首次分词操作的时间 该方法提供了一个在应用加载阶段就初始化字典的手段
* 只有当Dictionary类被实际调用时才会开始载入词典 *
* 这将延长首次分词操作的时间
* 该方法提供了一个在应用加载阶段就初始化字典的手段
* @return Dictionary * @return Dictionary
*/ */
public static synchronized Dictionary initial(Configuration cfg){ public static synchronized Dictionary initial(Configuration cfg) {
if(singleton == null){ if (singleton == null) {
synchronized(Dictionary.class){ synchronized (Dictionary.class) {
if(singleton == null){ if (singleton == null) {
singleton = new Dictionary(); singleton = new Dictionary();
singleton.configuration=cfg; singleton.configuration = cfg;
singleton.loadMainDict(); singleton.loadMainDict();
singleton.loadSurnameDict(); singleton.loadSurnameDict();
singleton.loadQuantifierDict(); singleton.loadQuantifierDict();
@ -115,12 +111,12 @@ public class Dictionary {
singleton.loadPrepDict(); singleton.loadPrepDict();
singleton.loadStopWordDict(); singleton.loadStopWordDict();
//建立监控线程 // 建立监控线程
for(String location:cfg.getRemoteExtDictionarys()){ for (String location : cfg.getRemoteExtDictionarys()) {
//10 秒是初始延迟可以修改的 60是间隔时间 单位秒 // 10 秒是初始延迟可以修改的 60是间隔时间 单位秒
pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS); pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);
} }
for(String location:cfg.getRemoteExtStopWordDictionarys()){ for (String location : cfg.getRemoteExtStopWordDictionarys()) {
pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS); pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);
} }
@ -133,10 +129,11 @@ public class Dictionary {
/** /**
* 获取词典单子实例 * 获取词典单子实例
*
* @return Dictionary 单例对象 * @return Dictionary 单例对象
*/ */
public static Dictionary getSingleton(){ public static Dictionary getSingleton() {
if(singleton == null){ if (singleton == null) {
throw new IllegalStateException("词典尚未初始化请先调用initial方法"); throw new IllegalStateException("词典尚未初始化请先调用initial方法");
} }
return singleton; return singleton;
@ -144,13 +141,15 @@ public class Dictionary {
/** /**
* 批量加载新词条 * 批量加载新词条
* @param words Collection<String>词条列表 *
* @param words
* Collection<String>词条列表
*/ */
public void addWords(Collection<String> words){ public void addWords(Collection<String> words) {
if(words != null){ if (words != null) {
for(String word : words){ for (String word : words) {
if (word != null) { if (word != null) {
//批量加载词条到主内存词典中 // 批量加载词条到主内存词典中
singleton._MainDict.fillSegment(word.trim().toCharArray()); singleton._MainDict.fillSegment(word.trim().toCharArray());
} }
} }
@ -160,11 +159,11 @@ public class Dictionary {
/** /**
* 批量移除屏蔽词条 * 批量移除屏蔽词条
*/ */
public void disableWords(Collection<String> words){ public void disableWords(Collection<String> words) {
if(words != null){ if (words != null) {
for(String word : words){ for (String word : words) {
if (word != null) { if (word != null) {
//批量屏蔽词条 // 批量屏蔽词条
singleton._MainDict.disableSegment(word.trim().toCharArray()); singleton._MainDict.disableSegment(word.trim().toCharArray());
} }
} }
@ -173,55 +172,58 @@ public class Dictionary {
/** /**
* 检索匹配主词典 * 检索匹配主词典
*
* @return Hit 匹配结果描述 * @return Hit 匹配结果描述
*/ */
public Hit matchInMainDict(char[] charArray){ public Hit matchInMainDict(char[] charArray) {
return singleton._MainDict.match(charArray); return singleton._MainDict.match(charArray);
} }
/** /**
* 检索匹配主词典 * 检索匹配主词典
*
* @return Hit 匹配结果描述 * @return Hit 匹配结果描述
*/ */
public Hit matchInMainDict(char[] charArray , int begin, int length){ public Hit matchInMainDict(char[] charArray, int begin, int length) {
return singleton._MainDict.match(charArray, begin, length); return singleton._MainDict.match(charArray, begin, length);
} }
/** /**
* 检索匹配量词词典 * 检索匹配量词词典
*
* @return Hit 匹配结果描述 * @return Hit 匹配结果描述
*/ */
public Hit matchInQuantifierDict(char[] charArray , int begin, int length){ public Hit matchInQuantifierDict(char[] charArray, int begin, int length) {
return singleton._QuantifierDict.match(charArray, begin, length); return singleton._QuantifierDict.match(charArray, begin, length);
} }
/** /**
* 从已匹配的Hit中直接取出DictSegment继续向下匹配 * 从已匹配的Hit中直接取出DictSegment继续向下匹配
*
* @return Hit * @return Hit
*/ */
public Hit matchWithHit(char[] charArray , int currentIndex , Hit matchedHit){ public Hit matchWithHit(char[] charArray, int currentIndex, Hit matchedHit) {
DictSegment ds = matchedHit.getMatchedDictSegment(); DictSegment ds = matchedHit.getMatchedDictSegment();
return ds.match(charArray, currentIndex, 1 , matchedHit); return ds.match(charArray, currentIndex, 1, matchedHit);
} }
/** /**
* 判断是否是停止词 * 判断是否是停止词
*
* @return boolean * @return boolean
*/ */
public boolean isStopWord(char[] charArray , int begin, int length){ public boolean isStopWord(char[] charArray, int begin, int length) {
return singleton._StopWords.match(charArray, begin, length).isMatch(); return singleton._StopWords.match(charArray, begin, length).isMatch();
} }
/** /**
* 加载主词典及扩展词典 * 加载主词典及扩展词典
*/ */
private void loadMainDict(){ private void loadMainDict() {
//建立一个主词典实例 // 建立一个主词典实例
_MainDict = new DictSegment((char)0); _MainDict = new DictSegment((char) 0);
//读取主词典文件 // 读取主词典文件
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_MAIN); Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_MAIN);
InputStream is = null; InputStream is = null;
@ -232,7 +234,7 @@ public class Dictionary {
} }
try { try {
BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
String theWord = null; String theWord = null;
do { do {
theWord = br.readLine(); theWord = br.readLine();
@ -242,90 +244,89 @@ public class Dictionary {
} while (theWord != null); } while (theWord != null);
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer", e);
}finally{ } finally {
try { try {
if(is != null){ if (is != null) {
is.close(); is.close();
is = null; is = null;
} }
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer", e);
} }
} }
//加载扩展词典 // 加载扩展词典
this.loadExtDict(); this.loadExtDict();
//加载远程自定义词库 // 加载远程自定义词库
this.loadRemoteExtDict(); this.loadRemoteExtDict();
} }
/** /**
* 加载用户配置的扩展词典到主词库表 * 加载用户配置的扩展词典到主词库表
*/ */
private void loadExtDict(){ private void loadExtDict() {
//加载扩展词典配置 // 加载扩展词典配置
List<String> extDictFiles = configuration.getExtDictionarys(); List<String> extDictFiles = configuration.getExtDictionarys();
if(extDictFiles != null){ if (extDictFiles != null) {
InputStream is = null; InputStream is = null;
for(String extDictName : extDictFiles){ for (String extDictName : extDictFiles) {
//读取扩展词典文件 // 读取扩展词典文件
logger.info("[Dict Loading] " + extDictName); logger.info("[Dict Loading] " + extDictName);
Path file = PathUtils.get(configuration.getDictRoot(), extDictName); Path file = PathUtils.get(configuration.getDictRoot(), extDictName);
try { try {
is = new FileInputStream(file.toFile()); is = new FileInputStream(file.toFile());
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer", e);
} }
//如果找不到扩展的字典则忽略 // 如果找不到扩展的字典则忽略
if(is == null){ if (is == null) {
continue; continue;
} }
try { try {
BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
String theWord = null; String theWord = null;
do { do {
theWord = br.readLine(); theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) { if (theWord != null && !"".equals(theWord.trim())) {
//加载扩展词典数据到主内存词典中 // 加载扩展词典数据到主内存词典中
_MainDict.fillSegment(theWord.trim().toCharArray()); _MainDict.fillSegment(theWord.trim().toCharArray());
} }
} while (theWord != null); } while (theWord != null);
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer", e);
}finally{ } finally {
try { try {
if(is != null){ if (is != null) {
is.close(); is.close();
is = null; is = null;
} }
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer", e);
} }
} }
} }
} }
} }
/** /**
* 加载远程扩展词典到主词库表 * 加载远程扩展词典到主词库表
*/ */
private void loadRemoteExtDict(){ private void loadRemoteExtDict() {
List<String> remoteExtDictFiles = configuration.getRemoteExtDictionarys(); List<String> remoteExtDictFiles = configuration.getRemoteExtDictionarys();
for(String location:remoteExtDictFiles){ for (String location : remoteExtDictFiles) {
logger.info("[Dict Loading] " + location); logger.info("[Dict Loading] " + location);
List<String> lists = getRemoteWords(location); List<String> lists = getRemoteWords(location);
//如果找不到扩展的字典则忽略 // 如果找不到扩展的字典则忽略
if(lists == null){ if (lists == null) {
logger.error("[Dict Loading] "+location+"加载失败"); logger.error("[Dict Loading] " + location + "加载失败");
continue; continue;
} }
for(String theWord:lists){ for (String theWord : lists) {
if (theWord != null && !"".equals(theWord.trim())) { if (theWord != null && !"".equals(theWord.trim())) {
//加载扩展词典数据到主内存词典中 // 加载扩展词典数据到主内存词典中
logger.info(theWord); logger.info(theWord);
_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray()); _MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
} }
@ -337,11 +338,11 @@ public class Dictionary {
/** /**
* 从远程服务器上下载自定义词条 * 从远程服务器上下载自定义词条
*/ */
private static List<String> getRemoteWords(String location){ private static List<String> getRemoteWords(String location) {
List<String> buffer = new ArrayList<String>(); List<String> buffer = new ArrayList<String>();
RequestConfig rc = RequestConfig.custom().setConnectionRequestTimeout(10*1000) RequestConfig rc = RequestConfig.custom().setConnectionRequestTimeout(10 * 1000).setConnectTimeout(10 * 1000)
.setConnectTimeout(10*1000).setSocketTimeout(60*1000).build(); .setSocketTimeout(60 * 1000).build();
CloseableHttpClient httpclient = HttpClients.createDefault(); CloseableHttpClient httpclient = HttpClients.createDefault();
CloseableHttpResponse response; CloseableHttpResponse response;
BufferedReader in; BufferedReader in;
@ -349,17 +350,17 @@ public class Dictionary {
get.setConfig(rc); get.setConfig(rc);
try { try {
response = httpclient.execute(get); response = httpclient.execute(get);
if(response.getStatusLine().getStatusCode()==200){ if (response.getStatusLine().getStatusCode() == 200) {
String charset = "UTF-8"; String charset = "UTF-8";
//获取编码默认为utf-8 // 获取编码默认为utf-8
if(response.getEntity().getContentType().getValue().contains("charset=")){ if (response.getEntity().getContentType().getValue().contains("charset=")) {
String contentType=response.getEntity().getContentType().getValue(); String contentType = response.getEntity().getContentType().getValue();
charset=contentType.substring(contentType.lastIndexOf("=")+1); charset = contentType.substring(contentType.lastIndexOf("=") + 1);
} }
in = new BufferedReader(new InputStreamReader(response.getEntity().getContent(),charset)); in = new BufferedReader(new InputStreamReader(response.getEntity().getContent(), charset));
String line ; String line;
while((line = in.readLine())!=null){ while ((line = in.readLine()) != null) {
buffer.add(line); buffer.add(line);
} }
in.close(); in.close();
@ -368,25 +369,23 @@ public class Dictionary {
} }
response.close(); response.close();
} catch (ClientProtocolException e) { } catch (ClientProtocolException e) {
logger.error( "getRemoteWords {} error" , e , location); logger.error("getRemoteWords {} error", e, location);
} catch (IllegalStateException e) { } catch (IllegalStateException e) {
logger.error( "getRemoteWords {} error" , e , location ); logger.error("getRemoteWords {} error", e, location);
} catch (IOException e) { } catch (IOException e) {
logger.error( "getRemoteWords {} error" , e , location ); logger.error("getRemoteWords {} error", e, location);
} }
return buffer; return buffer;
} }
/** /**
* 加载用户扩展的停止词词典 * 加载用户扩展的停止词词典
*/ */
private void loadStopWordDict(){ private void loadStopWordDict() {
//建立主词典实例 // 建立主词典实例
_StopWords = new DictSegment((char)0); _StopWords = new DictSegment((char) 0);
//读取主词典文件 // 读取主词典文件
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_STOP); Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_STOP);
InputStream is = null; InputStream is = null;
@ -397,7 +396,7 @@ public class Dictionary {
} }
try { try {
BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
String theWord = null; String theWord = null;
do { do {
theWord = br.readLine(); theWord = br.readLine();
@ -407,103 +406,101 @@ public class Dictionary {
} while (theWord != null); } while (theWord != null);
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer", e);
}finally{ } finally {
try { try {
if(is != null){ if (is != null) {
is.close(); is.close();
is = null; is = null;
} }
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer", e);
} }
} }
// 加载扩展停止词典
//加载扩展停止词典 List<String> extStopWordDictFiles = configuration.getExtStopWordDictionarys();
List<String> extStopWordDictFiles = configuration.getExtStopWordDictionarys(); if (extStopWordDictFiles != null) {
if(extStopWordDictFiles != null){
is = null; is = null;
for(String extStopWordDictName : extStopWordDictFiles){ for (String extStopWordDictName : extStopWordDictFiles) {
logger.info("[Dict Loading] " + extStopWordDictName); logger.info("[Dict Loading] " + extStopWordDictName);
//读取扩展词典文件 // 读取扩展词典文件
file=PathUtils.get(configuration.getDictRoot(), extStopWordDictName); file = PathUtils.get(configuration.getDictRoot(), extStopWordDictName);
try { try {
is = new FileInputStream(file.toFile()); is = new FileInputStream(file.toFile());
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer", e);
} }
//如果找不到扩展的字典则忽略 // 如果找不到扩展的字典则忽略
if(is == null){ if (is == null) {
continue; continue;
} }
try { try {
BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
String theWord = null; String theWord = null;
do { do {
theWord = br.readLine(); theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) { if (theWord != null && !"".equals(theWord.trim())) {
//加载扩展停止词典数据到内存中 // 加载扩展停止词典数据到内存中
_StopWords.fillSegment(theWord.trim().toCharArray()); _StopWords.fillSegment(theWord.trim().toCharArray());
} }
} while (theWord != null); } while (theWord != null);
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer", e);
}finally{ } finally {
try { try {
if(is != null){ if (is != null) {
is.close(); is.close();
is = null; is = null;
} }
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer", e);
} }
} }
} }
} }
//加载远程停用词典 // 加载远程停用词典
List<String> remoteExtStopWordDictFiles = configuration.getRemoteExtStopWordDictionarys(); List<String> remoteExtStopWordDictFiles = configuration.getRemoteExtStopWordDictionarys();
for(String location:remoteExtStopWordDictFiles){ for (String location : remoteExtStopWordDictFiles) {
logger.info("[Dict Loading] " + location); logger.info("[Dict Loading] " + location);
List<String> lists = getRemoteWords(location); List<String> lists = getRemoteWords(location);
//如果找不到扩展的字典则忽略 // 如果找不到扩展的字典则忽略
if(lists == null){ if (lists == null) {
logger.error("[Dict Loading] "+location+"加载失败"); logger.error("[Dict Loading] " + location + "加载失败");
continue; continue;
} }
for(String theWord:lists){ for (String theWord : lists) {
if (theWord != null && !"".equals(theWord.trim())) { if (theWord != null && !"".equals(theWord.trim())) {
//加载远程词典数据到主内存中 // 加载远程词典数据到主内存中
logger.info(theWord); logger.info(theWord);
_StopWords.fillSegment(theWord.trim().toLowerCase().toCharArray()); _StopWords.fillSegment(theWord.trim().toLowerCase().toCharArray());
} }
} }
} }
} }
/** /**
* 加载量词词典 * 加载量词词典
*/ */
private void loadQuantifierDict(){ private void loadQuantifierDict() {
//建立一个量词典实例 // 建立一个量词典实例
_QuantifierDict = new DictSegment((char)0); _QuantifierDict = new DictSegment((char) 0);
//读取量词词典文件 // 读取量词词典文件
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_QUANTIFIER); Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_QUANTIFIER);
InputStream is = null; InputStream is = null;
try { try {
is = new FileInputStream(file.toFile()); is = new FileInputStream(file.toFile());
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer", e);
} }
try { try {
BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
String theWord = null; String theWord = null;
do { do {
theWord = br.readLine(); theWord = br.readLine();
@ -515,34 +512,33 @@ public class Dictionary {
} catch (IOException ioe) { } catch (IOException ioe) {
logger.error("Quantifier Dictionary loading exception."); logger.error("Quantifier Dictionary loading exception.");
}finally{ } finally {
try { try {
if(is != null){ if (is != null) {
is.close(); is.close();
is = null; is = null;
} }
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer", e);
} }
} }
} }
private void loadSurnameDict() {
private void loadSurnameDict(){ _SurnameDict = new DictSegment((char) 0);
_SurnameDict = new DictSegment((char)0);
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_SURNAME); Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_SURNAME);
InputStream is = null; InputStream is = null;
try { try {
is = new FileInputStream(file.toFile()); is = new FileInputStream(file.toFile());
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer", e);
} }
if(is == null){ if (is == null) {
throw new RuntimeException("Surname Dictionary not found!!!"); throw new RuntimeException("Surname Dictionary not found!!!");
} }
try { try {
BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
String theWord; String theWord;
do { do {
theWord = br.readLine(); theWord = br.readLine();
@ -551,36 +547,35 @@ public class Dictionary {
} }
} while (theWord != null); } while (theWord != null);
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer", e);
}finally{ } finally {
try { try {
if(is != null){ if (is != null) {
is.close(); is.close();
is = null; is = null;
} }
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer", e);
} }
} }
} }
private void loadSuffixDict() {
private void loadSuffixDict(){ _SuffixDict = new DictSegment((char) 0);
_SuffixDict = new DictSegment((char)0);
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_SUFFIX); Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_SUFFIX);
InputStream is = null; InputStream is = null;
try { try {
is = new FileInputStream(file.toFile()); is = new FileInputStream(file.toFile());
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer", e);
} }
if(is == null){ if (is == null) {
throw new RuntimeException("Suffix Dictionary not found!!!"); throw new RuntimeException("Suffix Dictionary not found!!!");
} }
try { try {
BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
String theWord; String theWord;
do { do {
theWord = br.readLine(); theWord = br.readLine();
@ -589,34 +584,33 @@ public class Dictionary {
} }
} while (theWord != null); } while (theWord != null);
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer", e);
}finally{ } finally {
try { try {
is.close(); is.close();
is = null; is = null;
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer", e);
} }
} }
} }
private void loadPrepDict() {
private void loadPrepDict(){ _PrepDict = new DictSegment((char) 0);
_PrepDict = new DictSegment((char)0);
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_PREP); Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_PREP);
InputStream is = null; InputStream is = null;
try { try {
is = new FileInputStream(file.toFile()); is = new FileInputStream(file.toFile());
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer", e);
} }
if(is == null){ if (is == null) {
throw new RuntimeException("Preposition Dictionary not found!!!"); throw new RuntimeException("Preposition Dictionary not found!!!");
} }
try { try {
BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
String theWord; String theWord;
do { do {
theWord = br.readLine(); theWord = br.readLine();
@ -626,18 +620,18 @@ public class Dictionary {
} }
} while (theWord != null); } while (theWord != null);
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer", e);
}finally{ } finally {
try { try {
is.close(); is.close();
is = null; is = null;
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer", e);
} }
} }
} }
public void reLoadMainDict(){ public void reLoadMainDict() {
logger.info("重新加载词典..."); logger.info("重新加载词典...");
// 新开一个实例加载词典减少加载过程对当前词典使用的影响 // 新开一个实例加载词典减少加载过程对当前词典使用的影响
Dictionary tmpDict = new Dictionary(); Dictionary tmpDict = new Dictionary();