From 26fe905cc67a1095dd2ad8a543552d526d419acd Mon Sep 17 00:00:00 2001 From: Robert LU Date: Wed, 25 May 2016 17:07:25 +0800 Subject: [PATCH] Also load config from /etc/elasticsearch/analysis-ik (#197) Support install by `bin/plugin`, dealing with config files reallocation --- README.md | 3 +- config/{ik => }/IKAnalyzer.cfg.xml | 10 +- config/{ik => }/custom/ext_stopword.dic | 0 config/{ik => }/custom/mydict.dic | 0 config/{ik => }/custom/single_word.dic | 0 config/{ik => }/custom/single_word_full.dic | 0 .../{ik => }/custom/single_word_low_freq.dic | 0 config/{ik => }/custom/sougou.dic | 0 config/{ik => }/main.dic | 0 config/{ik => }/preposition.dic | 0 config/{ik => }/quantifier.dic | 0 config/{ik => }/stopword.dic | 0 config/{ik => }/suffix.dic | 0 config/{ik => }/surname.dic | 0 .../plugin/analysis/ik/AnalysisIkPlugin.java | 6 +- .../org/wltea/analyzer/cfg/Configuration.java | 120 ++++--- .../org/wltea/analyzer/dic/Dictionary.java | 326 +++++++++--------- 17 files changed, 238 insertions(+), 227 deletions(-) rename config/{ik => }/IKAnalyzer.cfg.xml (77%) rename config/{ik => }/custom/ext_stopword.dic (100%) rename config/{ik => }/custom/mydict.dic (100%) rename config/{ik => }/custom/single_word.dic (100%) rename config/{ik => }/custom/single_word_full.dic (100%) rename config/{ik => }/custom/single_word_low_freq.dic (100%) rename config/{ik => }/custom/sougou.dic (100%) rename config/{ik => }/main.dic (100%) rename config/{ik => }/preposition.dic (100%) rename config/{ik => }/quantifier.dic (100%) rename config/{ik => }/stopword.dic (100%) rename config/{ik => }/suffix.dic (100%) rename config/{ik => }/surname.dic (100%) diff --git a/README.md b/README.md index b1dc187..51518c0 100644 --- a/README.md +++ b/README.md @@ -169,7 +169,8 @@ Result ### Dictionary Configuration -#### `plugins/elasticsearch-analysis-ik-*/config/ik/IKAnalyzer.cfg.xml` +`IKAnalyzer.cfg.xml` can be located at `{conf}/analysis-ik/config/IKAnalyzer.cfg.xml` +or `{plugins}/elasticsearch-analysis-ik-*/config/IKAnalyzer.cfg.xml` ```xml diff --git a/config/ik/IKAnalyzer.cfg.xml b/config/IKAnalyzer.cfg.xml similarity index 77% rename from config/ik/IKAnalyzer.cfg.xml rename to config/IKAnalyzer.cfg.xml index 30f0cfd..9a935c1 100644 --- a/config/ik/IKAnalyzer.cfg.xml +++ b/config/IKAnalyzer.cfg.xml @@ -1,12 +1,12 @@  - - + + IK Analyzer 扩展配置 - - custom/mydict.dic;custom/single_word_low_freq.dic + + custom/mydict.dic;custom/single_word_low_freq.dic custom/ext_stopword.dic - + diff --git a/config/ik/custom/ext_stopword.dic b/config/custom/ext_stopword.dic similarity index 100% rename from config/ik/custom/ext_stopword.dic rename to config/custom/ext_stopword.dic diff --git a/config/ik/custom/mydict.dic b/config/custom/mydict.dic similarity index 100% rename from config/ik/custom/mydict.dic rename to config/custom/mydict.dic diff --git a/config/ik/custom/single_word.dic b/config/custom/single_word.dic similarity index 100% rename from config/ik/custom/single_word.dic rename to config/custom/single_word.dic diff --git a/config/ik/custom/single_word_full.dic b/config/custom/single_word_full.dic similarity index 100% rename from config/ik/custom/single_word_full.dic rename to config/custom/single_word_full.dic diff --git a/config/ik/custom/single_word_low_freq.dic b/config/custom/single_word_low_freq.dic similarity index 100% rename from config/ik/custom/single_word_low_freq.dic rename to config/custom/single_word_low_freq.dic diff --git a/config/ik/custom/sougou.dic b/config/custom/sougou.dic similarity index 100% rename from config/ik/custom/sougou.dic rename to config/custom/sougou.dic diff --git a/config/ik/main.dic b/config/main.dic similarity index 100% rename from config/ik/main.dic rename to config/main.dic diff --git a/config/ik/preposition.dic b/config/preposition.dic similarity index 100% rename from config/ik/preposition.dic rename to config/preposition.dic diff --git a/config/ik/quantifier.dic b/config/quantifier.dic similarity index 100% rename from config/ik/quantifier.dic rename to config/quantifier.dic diff --git a/config/ik/stopword.dic b/config/stopword.dic similarity index 100% rename from config/ik/stopword.dic rename to config/stopword.dic diff --git a/config/ik/suffix.dic b/config/suffix.dic similarity index 100% rename from config/ik/suffix.dic rename to config/suffix.dic diff --git a/config/ik/surname.dic b/config/surname.dic similarity index 100% rename from config/ik/surname.dic rename to config/surname.dic diff --git a/src/main/java/org/elasticsearch/plugin/analysis/ik/AnalysisIkPlugin.java b/src/main/java/org/elasticsearch/plugin/analysis/ik/AnalysisIkPlugin.java index b43c061..27f05c8 100644 --- a/src/main/java/org/elasticsearch/plugin/analysis/ik/AnalysisIkPlugin.java +++ b/src/main/java/org/elasticsearch/plugin/analysis/ik/AnalysisIkPlugin.java @@ -23,14 +23,16 @@ import static java.rmi.Naming.bind; public class AnalysisIkPlugin extends Plugin { + + public static String PLUGIN_NAME = "analysis-ik"; @Override public String name() { - return "analysis-ik"; + return PLUGIN_NAME; } @Override public String description() { - return "ik analysis"; + return PLUGIN_NAME; } @Override diff --git a/src/main/java/org/wltea/analyzer/cfg/Configuration.java b/src/main/java/org/wltea/analyzer/cfg/Configuration.java index 89a258c..1c5ef56 100644 --- a/src/main/java/org/wltea/analyzer/cfg/Configuration.java +++ b/src/main/java/org/wltea/analyzer/cfg/Configuration.java @@ -20,118 +20,132 @@ import java.util.Properties; public class Configuration { - private static String FILE_NAME = "ik/IKAnalyzer.cfg.xml"; + private static String FILE_NAME = "IKAnalyzer.cfg.xml"; private static final String EXT_DICT = "ext_dict"; private static final String REMOTE_EXT_DICT = "remote_ext_dict"; private static final String EXT_STOP = "ext_stopwords"; private static final String REMOTE_EXT_STOP = "remote_ext_stopwords"; - private static ESLogger logger = Loggers.getLogger("ik-analyzer"); + private static ESLogger logger = Loggers.getLogger("ik-analyzer"); + private Path conf_dir; private Properties props; - private Environment environment; + private Environment environment; @Inject - public Configuration(Environment env){ + public Configuration(Environment env) { props = new Properties(); - environment = env; + environment = env; + conf_dir = environment.configFile().resolve(AnalysisIkPlugin.PLUGIN_NAME); + Path configFile = conf_dir.resolve(FILE_NAME); - Path fileConfig = PathUtils.get(getDictRoot(), FILE_NAME); - - - InputStream input = null; - try { - input = new FileInputStream(fileConfig.toFile()); - } catch (FileNotFoundException e) { - logger.error("ik-analyzer",e); - } - if(input != null){ + InputStream input = null; + try { + logger.info("try load config from {}", configFile); + input = new FileInputStream(configFile.toFile()); + } catch (FileNotFoundException e) { + conf_dir = this.getConfigInPluginDir(); + configFile = conf_dir.resolve(FILE_NAME); + try { + logger.info("try load config from {}", configFile); + input = new FileInputStream(configFile.toFile()); + } catch (FileNotFoundException ex) { + // We should report origin exception + logger.error("ik-analyzer", e); + } + } + if (input != null) { try { props.loadFromXML(input); } catch (InvalidPropertiesFormatException e) { logger.error("ik-analyzer", e); } catch (IOException e) { - logger.error("ik-analyzer",e); + logger.error("ik-analyzer", e); } } } - public List getExtDictionarys(){ + public List getExtDictionarys() { List extDictFiles = new ArrayList(2); String extDictCfg = props.getProperty(EXT_DICT); - if(extDictCfg != null){ + if (extDictCfg != null) { String[] filePaths = extDictCfg.split(";"); - if(filePaths != null){ - for(String filePath : filePaths){ - if(filePath != null && !"".equals(filePath.trim())){ - Path file = PathUtils.get("ik", filePath.trim()); + if (filePaths != null) { + for (String filePath : filePaths) { + if (filePath != null && !"".equals(filePath.trim())) { + Path file = PathUtils.get(filePath.trim()); extDictFiles.add(file.toString()); } } } - } - return extDictFiles; + } + return extDictFiles; } - - public List getRemoteExtDictionarys(){ + + public List getRemoteExtDictionarys() { List remoteExtDictFiles = new ArrayList(2); String remoteExtDictCfg = props.getProperty(REMOTE_EXT_DICT); - if(remoteExtDictCfg != null){ + if (remoteExtDictCfg != null) { String[] filePaths = remoteExtDictCfg.split(";"); - if(filePaths != null){ - for(String filePath : filePaths){ - if(filePath != null && !"".equals(filePath.trim())){ + if (filePaths != null) { + for (String filePath : filePaths) { + if (filePath != null && !"".equals(filePath.trim())) { remoteExtDictFiles.add(filePath); } } } - } - return remoteExtDictFiles; + } + return remoteExtDictFiles; } - public List getExtStopWordDictionarys(){ + public List getExtStopWordDictionarys() { List extStopWordDictFiles = new ArrayList(2); String extStopWordDictCfg = props.getProperty(EXT_STOP); - if(extStopWordDictCfg != null){ - + if (extStopWordDictCfg != null) { + String[] filePaths = extStopWordDictCfg.split(";"); - if(filePaths != null){ - for(String filePath : filePaths){ - if(filePath != null && !"".equals(filePath.trim())){ - Path file = PathUtils.get("ik", filePath.trim()); + if (filePaths != null) { + for (String filePath : filePaths) { + if (filePath != null && !"".equals(filePath.trim())) { + Path file = PathUtils.get(filePath.trim()); extStopWordDictFiles.add(file.toString()); } } } - } - return extStopWordDictFiles; + } + return extStopWordDictFiles; } - - public List getRemoteExtStopWordDictionarys(){ + + public List getRemoteExtStopWordDictionarys() { List remoteExtStopWordDictFiles = new ArrayList(2); String remoteExtStopWordDictCfg = props.getProperty(REMOTE_EXT_STOP); - if(remoteExtStopWordDictCfg != null){ + if (remoteExtStopWordDictCfg != null) { String[] filePaths = remoteExtStopWordDictCfg.split(";"); - if(filePaths != null){ - for(String filePath : filePaths){ - if(filePath != null && !"".equals(filePath.trim())){ + if (filePaths != null) { + for (String filePath : filePaths) { + if (filePath != null && !"".equals(filePath.trim())) { remoteExtStopWordDictFiles.add(filePath); } } } } - return remoteExtStopWordDictFiles; + return remoteExtStopWordDictFiles; } - public String getDictRoot() { - return PathUtils.get( - new File(AnalysisIkPlugin.class.getProtectionDomain().getCodeSource().getLocation().getPath()).getParent(),"config") - .toAbsolutePath().toString(); - } + public String getDictRoot() { + return conf_dir.toAbsolutePath().toString(); + } + + private Path getConfigInPluginDir() { + return PathUtils + .get(new File(AnalysisIkPlugin.class.getProtectionDomain().getCodeSource().getLocation().getPath()) + .getParent(), "config") + .toAbsolutePath(); + } } diff --git a/src/main/java/org/wltea/analyzer/dic/Dictionary.java b/src/main/java/org/wltea/analyzer/dic/Dictionary.java index 76af79c..0b4ac70 100644 --- a/src/main/java/org/wltea/analyzer/dic/Dictionary.java +++ b/src/main/java/org/wltea/analyzer/dic/Dictionary.java @@ -56,7 +56,6 @@ import org.wltea.analyzer.cfg.Configuration; */ public class Dictionary { - /* * 词典单子实例 */ @@ -74,40 +73,37 @@ public class Dictionary { private DictSegment _StopWords; - /** * 配置对象 */ private Configuration configuration; - public static ESLogger logger=Loggers.getLogger("ik-analyzer"); + public static ESLogger logger = Loggers.getLogger("ik-analyzer"); private static ScheduledExecutorService pool = Executors.newScheduledThreadPool(1); - public static final String PATH_DIC_MAIN = "ik/main.dic"; - public static final String PATH_DIC_SURNAME = "ik/surname.dic"; - public static final String PATH_DIC_QUANTIFIER = "ik/quantifier.dic"; - public static final String PATH_DIC_SUFFIX = "ik/suffix.dic"; - public static final String PATH_DIC_PREP = "ik/preposition.dic"; - public static final String PATH_DIC_STOP = "ik/stopword.dic"; + public static final String PATH_DIC_MAIN = "main.dic"; + public static final String PATH_DIC_SURNAME = "surname.dic"; + public static final String PATH_DIC_QUANTIFIER = "quantifier.dic"; + public static final String PATH_DIC_SUFFIX = "suffix.dic"; + public static final String PATH_DIC_PREP = "preposition.dic"; + public static final String PATH_DIC_STOP = "stopword.dic"; - private Dictionary(){ + private Dictionary() { } /** - * 词典初始化 - * 由于IK Analyzer的词典采用Dictionary类的静态方法进行词典初始化 - * 只有当Dictionary类被实际调用时,才会开始载入词典, - * 这将延长首次分词操作的时间 - * 该方法提供了一个在应用加载阶段就初始化字典的手段 + * 词典初始化 由于IK Analyzer的词典采用Dictionary类的静态方法进行词典初始化 + * 只有当Dictionary类被实际调用时,才会开始载入词典, 这将延长首次分词操作的时间 该方法提供了一个在应用加载阶段就初始化字典的手段 + * * @return Dictionary */ - public static synchronized Dictionary initial(Configuration cfg){ - if(singleton == null){ - synchronized(Dictionary.class){ - if(singleton == null){ + public static synchronized Dictionary initial(Configuration cfg) { + if (singleton == null) { + synchronized (Dictionary.class) { + if (singleton == null) { singleton = new Dictionary(); - singleton.configuration=cfg; + singleton.configuration = cfg; singleton.loadMainDict(); singleton.loadSurnameDict(); singleton.loadQuantifierDict(); @@ -115,12 +111,12 @@ public class Dictionary { singleton.loadPrepDict(); singleton.loadStopWordDict(); - //建立监控线程 - for(String location:cfg.getRemoteExtDictionarys()){ - //10 秒是初始延迟可以修改的 60是间隔时间 单位秒 + // 建立监控线程 + for (String location : cfg.getRemoteExtDictionarys()) { + // 10 秒是初始延迟可以修改的 60是间隔时间 单位秒 pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS); } - for(String location:cfg.getRemoteExtStopWordDictionarys()){ + for (String location : cfg.getRemoteExtStopWordDictionarys()) { pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS); } @@ -133,10 +129,11 @@ public class Dictionary { /** * 获取词典单子实例 + * * @return Dictionary 单例对象 */ - public static Dictionary getSingleton(){ - if(singleton == null){ + public static Dictionary getSingleton() { + if (singleton == null) { throw new IllegalStateException("词典尚未初始化,请先调用initial方法"); } return singleton; @@ -144,13 +141,15 @@ public class Dictionary { /** * 批量加载新词条 - * @param words Collection词条列表 + * + * @param words + * Collection词条列表 */ - public void addWords(Collection words){ - if(words != null){ - for(String word : words){ + public void addWords(Collection words) { + if (words != null) { + for (String word : words) { if (word != null) { - //批量加载词条到主内存词典中 + // 批量加载词条到主内存词典中 singleton._MainDict.fillSegment(word.trim().toCharArray()); } } @@ -160,11 +159,11 @@ public class Dictionary { /** * 批量移除(屏蔽)词条 */ - public void disableWords(Collection words){ - if(words != null){ - for(String word : words){ + public void disableWords(Collection words) { + if (words != null) { + for (String word : words) { if (word != null) { - //批量屏蔽词条 + // 批量屏蔽词条 singleton._MainDict.disableSegment(word.trim().toCharArray()); } } @@ -173,55 +172,58 @@ public class Dictionary { /** * 检索匹配主词典 + * * @return Hit 匹配结果描述 */ - public Hit matchInMainDict(char[] charArray){ + public Hit matchInMainDict(char[] charArray) { return singleton._MainDict.match(charArray); } /** * 检索匹配主词典 + * * @return Hit 匹配结果描述 */ - public Hit matchInMainDict(char[] charArray , int begin, int length){ + public Hit matchInMainDict(char[] charArray, int begin, int length) { return singleton._MainDict.match(charArray, begin, length); } /** * 检索匹配量词词典 + * * @return Hit 匹配结果描述 */ - public Hit matchInQuantifierDict(char[] charArray , int begin, int length){ + public Hit matchInQuantifierDict(char[] charArray, int begin, int length) { return singleton._QuantifierDict.match(charArray, begin, length); } - /** * 从已匹配的Hit中直接取出DictSegment,继续向下匹配 + * * @return Hit */ - public Hit matchWithHit(char[] charArray , int currentIndex , Hit matchedHit){ + public Hit matchWithHit(char[] charArray, int currentIndex, Hit matchedHit) { DictSegment ds = matchedHit.getMatchedDictSegment(); - return ds.match(charArray, currentIndex, 1 , matchedHit); + return ds.match(charArray, currentIndex, 1, matchedHit); } - /** * 判断是否是停止词 + * * @return boolean */ - public boolean isStopWord(char[] charArray , int begin, int length){ + public boolean isStopWord(char[] charArray, int begin, int length) { return singleton._StopWords.match(charArray, begin, length).isMatch(); } /** * 加载主词典及扩展词典 */ - private void loadMainDict(){ - //建立一个主词典实例 - _MainDict = new DictSegment((char)0); + private void loadMainDict() { + // 建立一个主词典实例 + _MainDict = new DictSegment((char) 0); - //读取主词典文件 + // 读取主词典文件 Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_MAIN); InputStream is = null; @@ -232,7 +234,7 @@ public class Dictionary { } try { - BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); + BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); String theWord = null; do { theWord = br.readLine(); @@ -242,90 +244,89 @@ public class Dictionary { } while (theWord != null); } catch (IOException e) { - logger.error("ik-analyzer",e); + logger.error("ik-analyzer", e); - }finally{ + } finally { try { - if(is != null){ + if (is != null) { is.close(); is = null; } } catch (IOException e) { - logger.error("ik-analyzer",e); + logger.error("ik-analyzer", e); } } - //加载扩展词典 + // 加载扩展词典 this.loadExtDict(); - //加载远程自定义词库 + // 加载远程自定义词库 this.loadRemoteExtDict(); } /** * 加载用户配置的扩展词典到主词库表 */ - private void loadExtDict(){ - //加载扩展词典配置 - List extDictFiles = configuration.getExtDictionarys(); - if(extDictFiles != null){ + private void loadExtDict() { + // 加载扩展词典配置 + List extDictFiles = configuration.getExtDictionarys(); + if (extDictFiles != null) { InputStream is = null; - for(String extDictName : extDictFiles){ - //读取扩展词典文件 + for (String extDictName : extDictFiles) { + // 读取扩展词典文件 logger.info("[Dict Loading] " + extDictName); Path file = PathUtils.get(configuration.getDictRoot(), extDictName); try { is = new FileInputStream(file.toFile()); } catch (FileNotFoundException e) { - logger.error("ik-analyzer",e); + logger.error("ik-analyzer", e); } - //如果找不到扩展的字典,则忽略 - if(is == null){ + // 如果找不到扩展的字典,则忽略 + if (is == null) { continue; } try { - BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); + BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); String theWord = null; do { theWord = br.readLine(); if (theWord != null && !"".equals(theWord.trim())) { - //加载扩展词典数据到主内存词典中 + // 加载扩展词典数据到主内存词典中 _MainDict.fillSegment(theWord.trim().toCharArray()); } } while (theWord != null); } catch (IOException e) { - logger.error("ik-analyzer",e); - }finally{ + logger.error("ik-analyzer", e); + } finally { try { - if(is != null){ + if (is != null) { is.close(); is = null; } } catch (IOException e) { - logger.error("ik-analyzer",e); + logger.error("ik-analyzer", e); } } } } } - /** * 加载远程扩展词典到主词库表 */ - private void loadRemoteExtDict(){ - List remoteExtDictFiles = configuration.getRemoteExtDictionarys(); - for(String location:remoteExtDictFiles){ + private void loadRemoteExtDict() { + List remoteExtDictFiles = configuration.getRemoteExtDictionarys(); + for (String location : remoteExtDictFiles) { logger.info("[Dict Loading] " + location); List lists = getRemoteWords(location); - //如果找不到扩展的字典,则忽略 - if(lists == null){ - logger.error("[Dict Loading] "+location+"加载失败"); + // 如果找不到扩展的字典,则忽略 + if (lists == null) { + logger.error("[Dict Loading] " + location + "加载失败"); continue; } - for(String theWord:lists){ + for (String theWord : lists) { if (theWord != null && !"".equals(theWord.trim())) { - //加载扩展词典数据到主内存词典中 + // 加载扩展词典数据到主内存词典中 logger.info(theWord); _MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray()); } @@ -337,11 +338,11 @@ public class Dictionary { /** * 从远程服务器上下载自定义词条 */ - private static List getRemoteWords(String location){ + private static List getRemoteWords(String location) { List buffer = new ArrayList(); - RequestConfig rc = RequestConfig.custom().setConnectionRequestTimeout(10*1000) - .setConnectTimeout(10*1000).setSocketTimeout(60*1000).build(); + RequestConfig rc = RequestConfig.custom().setConnectionRequestTimeout(10 * 1000).setConnectTimeout(10 * 1000) + .setSocketTimeout(60 * 1000).build(); CloseableHttpClient httpclient = HttpClients.createDefault(); CloseableHttpResponse response; BufferedReader in; @@ -349,17 +350,17 @@ public class Dictionary { get.setConfig(rc); try { response = httpclient.execute(get); - if(response.getStatusLine().getStatusCode()==200){ + if (response.getStatusLine().getStatusCode() == 200) { String charset = "UTF-8"; - //获取编码,默认为utf-8 - if(response.getEntity().getContentType().getValue().contains("charset=")){ - String contentType=response.getEntity().getContentType().getValue(); - charset=contentType.substring(contentType.lastIndexOf("=")+1); + // 获取编码,默认为utf-8 + if (response.getEntity().getContentType().getValue().contains("charset=")) { + String contentType = response.getEntity().getContentType().getValue(); + charset = contentType.substring(contentType.lastIndexOf("=") + 1); } - in = new BufferedReader(new InputStreamReader(response.getEntity().getContent(),charset)); - String line ; - while((line = in.readLine())!=null){ + in = new BufferedReader(new InputStreamReader(response.getEntity().getContent(), charset)); + String line; + while ((line = in.readLine()) != null) { buffer.add(line); } in.close(); @@ -368,25 +369,23 @@ public class Dictionary { } response.close(); } catch (ClientProtocolException e) { - logger.error( "getRemoteWords {} error" , e , location); + logger.error("getRemoteWords {} error", e, location); } catch (IllegalStateException e) { - logger.error( "getRemoteWords {} error" , e , location ); + logger.error("getRemoteWords {} error", e, location); } catch (IOException e) { - logger.error( "getRemoteWords {} error" , e , location ); + logger.error("getRemoteWords {} error", e, location); } return buffer; } - - /** * 加载用户扩展的停止词词典 */ - private void loadStopWordDict(){ - //建立主词典实例 - _StopWords = new DictSegment((char)0); + private void loadStopWordDict() { + // 建立主词典实例 + _StopWords = new DictSegment((char) 0); - //读取主词典文件 + // 读取主词典文件 Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_STOP); InputStream is = null; @@ -397,7 +396,7 @@ public class Dictionary { } try { - BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); + BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); String theWord = null; do { theWord = br.readLine(); @@ -407,103 +406,101 @@ public class Dictionary { } while (theWord != null); } catch (IOException e) { - logger.error("ik-analyzer",e); + logger.error("ik-analyzer", e); - }finally{ + } finally { try { - if(is != null){ + if (is != null) { is.close(); is = null; } } catch (IOException e) { - logger.error("ik-analyzer",e); + logger.error("ik-analyzer", e); } } - - //加载扩展停止词典 - List extStopWordDictFiles = configuration.getExtStopWordDictionarys(); - if(extStopWordDictFiles != null){ + // 加载扩展停止词典 + List extStopWordDictFiles = configuration.getExtStopWordDictionarys(); + if (extStopWordDictFiles != null) { is = null; - for(String extStopWordDictName : extStopWordDictFiles){ + for (String extStopWordDictName : extStopWordDictFiles) { logger.info("[Dict Loading] " + extStopWordDictName); - //读取扩展词典文件 - file=PathUtils.get(configuration.getDictRoot(), extStopWordDictName); + // 读取扩展词典文件 + file = PathUtils.get(configuration.getDictRoot(), extStopWordDictName); try { is = new FileInputStream(file.toFile()); } catch (FileNotFoundException e) { - logger.error("ik-analyzer",e); + logger.error("ik-analyzer", e); } - //如果找不到扩展的字典,则忽略 - if(is == null){ + // 如果找不到扩展的字典,则忽略 + if (is == null) { continue; } try { - BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); + BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); String theWord = null; do { theWord = br.readLine(); if (theWord != null && !"".equals(theWord.trim())) { - //加载扩展停止词典数据到内存中 + // 加载扩展停止词典数据到内存中 _StopWords.fillSegment(theWord.trim().toCharArray()); } } while (theWord != null); } catch (IOException e) { - logger.error("ik-analyzer",e); + logger.error("ik-analyzer", e); - }finally{ + } finally { try { - if(is != null){ + if (is != null) { is.close(); is = null; } } catch (IOException e) { - logger.error("ik-analyzer",e); + logger.error("ik-analyzer", e); } } } } - //加载远程停用词典 - List remoteExtStopWordDictFiles = configuration.getRemoteExtStopWordDictionarys(); - for(String location:remoteExtStopWordDictFiles){ + // 加载远程停用词典 + List remoteExtStopWordDictFiles = configuration.getRemoteExtStopWordDictionarys(); + for (String location : remoteExtStopWordDictFiles) { logger.info("[Dict Loading] " + location); List lists = getRemoteWords(location); - //如果找不到扩展的字典,则忽略 - if(lists == null){ - logger.error("[Dict Loading] "+location+"加载失败"); + // 如果找不到扩展的字典,则忽略 + if (lists == null) { + logger.error("[Dict Loading] " + location + "加载失败"); continue; } - for(String theWord:lists){ + for (String theWord : lists) { if (theWord != null && !"".equals(theWord.trim())) { - //加载远程词典数据到主内存中 + // 加载远程词典数据到主内存中 logger.info(theWord); _StopWords.fillSegment(theWord.trim().toLowerCase().toCharArray()); } } } - } /** * 加载量词词典 */ - private void loadQuantifierDict(){ - //建立一个量词典实例 - _QuantifierDict = new DictSegment((char)0); - //读取量词词典文件 + private void loadQuantifierDict() { + // 建立一个量词典实例 + _QuantifierDict = new DictSegment((char) 0); + // 读取量词词典文件 Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_QUANTIFIER); InputStream is = null; try { is = new FileInputStream(file.toFile()); } catch (FileNotFoundException e) { - logger.error("ik-analyzer",e); + logger.error("ik-analyzer", e); } try { - BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); + BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); String theWord = null; do { theWord = br.readLine(); @@ -515,34 +512,33 @@ public class Dictionary { } catch (IOException ioe) { logger.error("Quantifier Dictionary loading exception."); - }finally{ + } finally { try { - if(is != null){ + if (is != null) { is.close(); is = null; } } catch (IOException e) { - logger.error("ik-analyzer",e); + logger.error("ik-analyzer", e); } } } + private void loadSurnameDict() { - private void loadSurnameDict(){ - - _SurnameDict = new DictSegment((char)0); + _SurnameDict = new DictSegment((char) 0); Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_SURNAME); InputStream is = null; try { is = new FileInputStream(file.toFile()); } catch (FileNotFoundException e) { - logger.error("ik-analyzer",e); + logger.error("ik-analyzer", e); } - if(is == null){ + if (is == null) { throw new RuntimeException("Surname Dictionary not found!!!"); } try { - BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); + BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); String theWord; do { theWord = br.readLine(); @@ -551,36 +547,35 @@ public class Dictionary { } } while (theWord != null); } catch (IOException e) { - logger.error("ik-analyzer",e); - }finally{ + logger.error("ik-analyzer", e); + } finally { try { - if(is != null){ + if (is != null) { is.close(); is = null; } } catch (IOException e) { - logger.error("ik-analyzer",e); + logger.error("ik-analyzer", e); } } } + private void loadSuffixDict() { - private void loadSuffixDict(){ - - _SuffixDict = new DictSegment((char)0); + _SuffixDict = new DictSegment((char) 0); Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_SUFFIX); InputStream is = null; try { is = new FileInputStream(file.toFile()); } catch (FileNotFoundException e) { - logger.error("ik-analyzer",e); + logger.error("ik-analyzer", e); } - if(is == null){ + if (is == null) { throw new RuntimeException("Suffix Dictionary not found!!!"); } try { - BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); + BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); String theWord; do { theWord = br.readLine(); @@ -589,34 +584,33 @@ public class Dictionary { } } while (theWord != null); } catch (IOException e) { - logger.error("ik-analyzer",e); - }finally{ + logger.error("ik-analyzer", e); + } finally { try { is.close(); is = null; } catch (IOException e) { - logger.error("ik-analyzer",e); + logger.error("ik-analyzer", e); } } } + private void loadPrepDict() { - private void loadPrepDict(){ - - _PrepDict = new DictSegment((char)0); + _PrepDict = new DictSegment((char) 0); Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_PREP); InputStream is = null; try { is = new FileInputStream(file.toFile()); } catch (FileNotFoundException e) { - logger.error("ik-analyzer",e); + logger.error("ik-analyzer", e); } - if(is == null){ + if (is == null) { throw new RuntimeException("Preposition Dictionary not found!!!"); } try { - BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); + BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512); String theWord; do { theWord = br.readLine(); @@ -626,18 +620,18 @@ public class Dictionary { } } while (theWord != null); } catch (IOException e) { - logger.error("ik-analyzer",e); - }finally{ + logger.error("ik-analyzer", e); + } finally { try { is.close(); is = null; } catch (IOException e) { - logger.error("ik-analyzer",e); + logger.error("ik-analyzer", e); } } } - public void reLoadMainDict(){ + public void reLoadMainDict() { logger.info("重新加载词典..."); // 新开一个实例加载词典,减少加载过程对当前词典使用的影响 Dictionary tmpDict = new Dictionary();