Merge branch 'master' of github.com:medcl/elasticsearch-analysis-ik
This commit is contained in:
commit
f1d59921fe
@ -169,7 +169,8 @@ Result
|
|||||||
|
|
||||||
### Dictionary Configuration
|
### Dictionary Configuration
|
||||||
|
|
||||||
#### `plugins/elasticsearch-analysis-ik-*/config/ik/IKAnalyzer.cfg.xml`
|
`IKAnalyzer.cfg.xml` can be located at `{conf}/analysis-ik/config/IKAnalyzer.cfg.xml`
|
||||||
|
or `{plugins}/elasticsearch-analysis-ik-*/config/IKAnalyzer.cfg.xml`
|
||||||
|
|
||||||
```xml
|
```xml
|
||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
@ -1,12 +1,12 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
|
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
|
||||||
<properties>
|
<properties>
|
||||||
<comment>IK Analyzer 扩展配置</comment>
|
<comment>IK Analyzer 扩展配置</comment>
|
||||||
<!--用户可以在这里配置自己的扩展字典 -->
|
<!--用户可以在这里配置自己的扩展字典 -->
|
||||||
<entry key="ext_dict">custom/mydict.dic;custom/single_word_low_freq.dic</entry>
|
<entry key="ext_dict">custom/mydict.dic;custom/single_word_low_freq.dic</entry>
|
||||||
<!--用户可以在这里配置自己的扩展停止词字典-->
|
<!--用户可以在这里配置自己的扩展停止词字典-->
|
||||||
<entry key="ext_stopwords">custom/ext_stopword.dic</entry>
|
<entry key="ext_stopwords">custom/ext_stopword.dic</entry>
|
||||||
<!--用户可以在这里配置远程扩展字典 -->
|
<!--用户可以在这里配置远程扩展字典 -->
|
||||||
<!-- <entry key="remote_ext_dict">words_location</entry> -->
|
<!-- <entry key="remote_ext_dict">words_location</entry> -->
|
||||||
<!--用户可以在这里配置远程扩展停止词字典-->
|
<!--用户可以在这里配置远程扩展停止词字典-->
|
||||||
<!-- <entry key="remote_ext_stopwords">words_location</entry> -->
|
<!-- <entry key="remote_ext_stopwords">words_location</entry> -->
|
@ -23,14 +23,16 @@ import static java.rmi.Naming.bind;
|
|||||||
|
|
||||||
|
|
||||||
public class AnalysisIkPlugin extends Plugin {
|
public class AnalysisIkPlugin extends Plugin {
|
||||||
|
|
||||||
|
public static String PLUGIN_NAME = "analysis-ik";
|
||||||
|
|
||||||
@Override public String name() {
|
@Override public String name() {
|
||||||
return "analysis-ik";
|
return PLUGIN_NAME;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override public String description() {
|
@Override public String description() {
|
||||||
return "ik analysis";
|
return PLUGIN_NAME;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -20,118 +20,132 @@ import java.util.Properties;
|
|||||||
|
|
||||||
public class Configuration {
|
public class Configuration {
|
||||||
|
|
||||||
private static String FILE_NAME = "ik/IKAnalyzer.cfg.xml";
|
private static String FILE_NAME = "IKAnalyzer.cfg.xml";
|
||||||
private static final String EXT_DICT = "ext_dict";
|
private static final String EXT_DICT = "ext_dict";
|
||||||
private static final String REMOTE_EXT_DICT = "remote_ext_dict";
|
private static final String REMOTE_EXT_DICT = "remote_ext_dict";
|
||||||
private static final String EXT_STOP = "ext_stopwords";
|
private static final String EXT_STOP = "ext_stopwords";
|
||||||
private static final String REMOTE_EXT_STOP = "remote_ext_stopwords";
|
private static final String REMOTE_EXT_STOP = "remote_ext_stopwords";
|
||||||
private static ESLogger logger = Loggers.getLogger("ik-analyzer");
|
private static ESLogger logger = Loggers.getLogger("ik-analyzer");
|
||||||
|
private Path conf_dir;
|
||||||
private Properties props;
|
private Properties props;
|
||||||
private Environment environment;
|
private Environment environment;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public Configuration(Environment env){
|
public Configuration(Environment env) {
|
||||||
props = new Properties();
|
props = new Properties();
|
||||||
environment = env;
|
environment = env;
|
||||||
|
|
||||||
|
conf_dir = environment.configFile().resolve(AnalysisIkPlugin.PLUGIN_NAME);
|
||||||
|
Path configFile = conf_dir.resolve(FILE_NAME);
|
||||||
|
|
||||||
Path fileConfig = PathUtils.get(getDictRoot(), FILE_NAME);
|
InputStream input = null;
|
||||||
|
try {
|
||||||
|
logger.info("try load config from {}", configFile);
|
||||||
InputStream input = null;
|
input = new FileInputStream(configFile.toFile());
|
||||||
try {
|
} catch (FileNotFoundException e) {
|
||||||
input = new FileInputStream(fileConfig.toFile());
|
conf_dir = this.getConfigInPluginDir();
|
||||||
} catch (FileNotFoundException e) {
|
configFile = conf_dir.resolve(FILE_NAME);
|
||||||
logger.error("ik-analyzer",e);
|
try {
|
||||||
}
|
logger.info("try load config from {}", configFile);
|
||||||
if(input != null){
|
input = new FileInputStream(configFile.toFile());
|
||||||
|
} catch (FileNotFoundException ex) {
|
||||||
|
// We should report origin exception
|
||||||
|
logger.error("ik-analyzer", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (input != null) {
|
||||||
try {
|
try {
|
||||||
props.loadFromXML(input);
|
props.loadFromXML(input);
|
||||||
} catch (InvalidPropertiesFormatException e) {
|
} catch (InvalidPropertiesFormatException e) {
|
||||||
logger.error("ik-analyzer", e);
|
logger.error("ik-analyzer", e);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error("ik-analyzer",e);
|
logger.error("ik-analyzer", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getExtDictionarys(){
|
public List<String> getExtDictionarys() {
|
||||||
List<String> extDictFiles = new ArrayList<String>(2);
|
List<String> extDictFiles = new ArrayList<String>(2);
|
||||||
String extDictCfg = props.getProperty(EXT_DICT);
|
String extDictCfg = props.getProperty(EXT_DICT);
|
||||||
if(extDictCfg != null){
|
if (extDictCfg != null) {
|
||||||
|
|
||||||
String[] filePaths = extDictCfg.split(";");
|
String[] filePaths = extDictCfg.split(";");
|
||||||
if(filePaths != null){
|
if (filePaths != null) {
|
||||||
for(String filePath : filePaths){
|
for (String filePath : filePaths) {
|
||||||
if(filePath != null && !"".equals(filePath.trim())){
|
if (filePath != null && !"".equals(filePath.trim())) {
|
||||||
Path file = PathUtils.get("ik", filePath.trim());
|
Path file = PathUtils.get(filePath.trim());
|
||||||
extDictFiles.add(file.toString());
|
extDictFiles.add(file.toString());
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return extDictFiles;
|
return extDictFiles;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getRemoteExtDictionarys(){
|
public List<String> getRemoteExtDictionarys() {
|
||||||
List<String> remoteExtDictFiles = new ArrayList<String>(2);
|
List<String> remoteExtDictFiles = new ArrayList<String>(2);
|
||||||
String remoteExtDictCfg = props.getProperty(REMOTE_EXT_DICT);
|
String remoteExtDictCfg = props.getProperty(REMOTE_EXT_DICT);
|
||||||
if(remoteExtDictCfg != null){
|
if (remoteExtDictCfg != null) {
|
||||||
|
|
||||||
String[] filePaths = remoteExtDictCfg.split(";");
|
String[] filePaths = remoteExtDictCfg.split(";");
|
||||||
if(filePaths != null){
|
if (filePaths != null) {
|
||||||
for(String filePath : filePaths){
|
for (String filePath : filePaths) {
|
||||||
if(filePath != null && !"".equals(filePath.trim())){
|
if (filePath != null && !"".equals(filePath.trim())) {
|
||||||
remoteExtDictFiles.add(filePath);
|
remoteExtDictFiles.add(filePath);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return remoteExtDictFiles;
|
return remoteExtDictFiles;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getExtStopWordDictionarys(){
|
public List<String> getExtStopWordDictionarys() {
|
||||||
List<String> extStopWordDictFiles = new ArrayList<String>(2);
|
List<String> extStopWordDictFiles = new ArrayList<String>(2);
|
||||||
String extStopWordDictCfg = props.getProperty(EXT_STOP);
|
String extStopWordDictCfg = props.getProperty(EXT_STOP);
|
||||||
if(extStopWordDictCfg != null){
|
if (extStopWordDictCfg != null) {
|
||||||
|
|
||||||
String[] filePaths = extStopWordDictCfg.split(";");
|
String[] filePaths = extStopWordDictCfg.split(";");
|
||||||
if(filePaths != null){
|
if (filePaths != null) {
|
||||||
for(String filePath : filePaths){
|
for (String filePath : filePaths) {
|
||||||
if(filePath != null && !"".equals(filePath.trim())){
|
if (filePath != null && !"".equals(filePath.trim())) {
|
||||||
Path file = PathUtils.get("ik", filePath.trim());
|
Path file = PathUtils.get(filePath.trim());
|
||||||
extStopWordDictFiles.add(file.toString());
|
extStopWordDictFiles.add(file.toString());
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return extStopWordDictFiles;
|
return extStopWordDictFiles;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getRemoteExtStopWordDictionarys(){
|
public List<String> getRemoteExtStopWordDictionarys() {
|
||||||
List<String> remoteExtStopWordDictFiles = new ArrayList<String>(2);
|
List<String> remoteExtStopWordDictFiles = new ArrayList<String>(2);
|
||||||
String remoteExtStopWordDictCfg = props.getProperty(REMOTE_EXT_STOP);
|
String remoteExtStopWordDictCfg = props.getProperty(REMOTE_EXT_STOP);
|
||||||
if(remoteExtStopWordDictCfg != null){
|
if (remoteExtStopWordDictCfg != null) {
|
||||||
|
|
||||||
String[] filePaths = remoteExtStopWordDictCfg.split(";");
|
String[] filePaths = remoteExtStopWordDictCfg.split(";");
|
||||||
if(filePaths != null){
|
if (filePaths != null) {
|
||||||
for(String filePath : filePaths){
|
for (String filePath : filePaths) {
|
||||||
if(filePath != null && !"".equals(filePath.trim())){
|
if (filePath != null && !"".equals(filePath.trim())) {
|
||||||
remoteExtStopWordDictFiles.add(filePath);
|
remoteExtStopWordDictFiles.add(filePath);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return remoteExtStopWordDictFiles;
|
return remoteExtStopWordDictFiles;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getDictRoot() {
|
public String getDictRoot() {
|
||||||
return PathUtils.get(
|
return conf_dir.toAbsolutePath().toString();
|
||||||
new File(AnalysisIkPlugin.class.getProtectionDomain().getCodeSource().getLocation().getPath()).getParent(),"config")
|
}
|
||||||
.toAbsolutePath().toString();
|
|
||||||
}
|
private Path getConfigInPluginDir() {
|
||||||
|
return PathUtils
|
||||||
|
.get(new File(AnalysisIkPlugin.class.getProtectionDomain().getCodeSource().getLocation().getPath())
|
||||||
|
.getParent(), "config")
|
||||||
|
.toAbsolutePath();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -56,7 +56,6 @@ import org.wltea.analyzer.cfg.Configuration;
|
|||||||
*/
|
*/
|
||||||
public class Dictionary {
|
public class Dictionary {
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* 词典单子实例
|
* 词典单子实例
|
||||||
*/
|
*/
|
||||||
@ -74,40 +73,37 @@ public class Dictionary {
|
|||||||
|
|
||||||
private DictSegment _StopWords;
|
private DictSegment _StopWords;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 配置对象
|
* 配置对象
|
||||||
*/
|
*/
|
||||||
private Configuration configuration;
|
private Configuration configuration;
|
||||||
public static ESLogger logger=Loggers.getLogger("ik-analyzer");
|
public static ESLogger logger = Loggers.getLogger("ik-analyzer");
|
||||||
|
|
||||||
private static ScheduledExecutorService pool = Executors.newScheduledThreadPool(1);
|
private static ScheduledExecutorService pool = Executors.newScheduledThreadPool(1);
|
||||||
|
|
||||||
public static final String PATH_DIC_MAIN = "ik/main.dic";
|
public static final String PATH_DIC_MAIN = "main.dic";
|
||||||
public static final String PATH_DIC_SURNAME = "ik/surname.dic";
|
public static final String PATH_DIC_SURNAME = "surname.dic";
|
||||||
public static final String PATH_DIC_QUANTIFIER = "ik/quantifier.dic";
|
public static final String PATH_DIC_QUANTIFIER = "quantifier.dic";
|
||||||
public static final String PATH_DIC_SUFFIX = "ik/suffix.dic";
|
public static final String PATH_DIC_SUFFIX = "suffix.dic";
|
||||||
public static final String PATH_DIC_PREP = "ik/preposition.dic";
|
public static final String PATH_DIC_PREP = "preposition.dic";
|
||||||
public static final String PATH_DIC_STOP = "ik/stopword.dic";
|
public static final String PATH_DIC_STOP = "stopword.dic";
|
||||||
|
|
||||||
private Dictionary(){
|
private Dictionary() {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 词典初始化
|
* 词典初始化 由于IK Analyzer的词典采用Dictionary类的静态方法进行词典初始化
|
||||||
* 由于IK Analyzer的词典采用Dictionary类的静态方法进行词典初始化
|
* 只有当Dictionary类被实际调用时,才会开始载入词典, 这将延长首次分词操作的时间 该方法提供了一个在应用加载阶段就初始化字典的手段
|
||||||
* 只有当Dictionary类被实际调用时,才会开始载入词典,
|
*
|
||||||
* 这将延长首次分词操作的时间
|
|
||||||
* 该方法提供了一个在应用加载阶段就初始化字典的手段
|
|
||||||
* @return Dictionary
|
* @return Dictionary
|
||||||
*/
|
*/
|
||||||
public static synchronized Dictionary initial(Configuration cfg){
|
public static synchronized Dictionary initial(Configuration cfg) {
|
||||||
if(singleton == null){
|
if (singleton == null) {
|
||||||
synchronized(Dictionary.class){
|
synchronized (Dictionary.class) {
|
||||||
if(singleton == null){
|
if (singleton == null) {
|
||||||
singleton = new Dictionary();
|
singleton = new Dictionary();
|
||||||
singleton.configuration=cfg;
|
singleton.configuration = cfg;
|
||||||
singleton.loadMainDict();
|
singleton.loadMainDict();
|
||||||
singleton.loadSurnameDict();
|
singleton.loadSurnameDict();
|
||||||
singleton.loadQuantifierDict();
|
singleton.loadQuantifierDict();
|
||||||
@ -115,12 +111,12 @@ public class Dictionary {
|
|||||||
singleton.loadPrepDict();
|
singleton.loadPrepDict();
|
||||||
singleton.loadStopWordDict();
|
singleton.loadStopWordDict();
|
||||||
|
|
||||||
//建立监控线程
|
// 建立监控线程
|
||||||
for(String location:cfg.getRemoteExtDictionarys()){
|
for (String location : cfg.getRemoteExtDictionarys()) {
|
||||||
//10 秒是初始延迟可以修改的 60是间隔时间 单位秒
|
// 10 秒是初始延迟可以修改的 60是间隔时间 单位秒
|
||||||
pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);
|
pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);
|
||||||
}
|
}
|
||||||
for(String location:cfg.getRemoteExtStopWordDictionarys()){
|
for (String location : cfg.getRemoteExtStopWordDictionarys()) {
|
||||||
pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);
|
pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -133,10 +129,11 @@ public class Dictionary {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* 获取词典单子实例
|
* 获取词典单子实例
|
||||||
|
*
|
||||||
* @return Dictionary 单例对象
|
* @return Dictionary 单例对象
|
||||||
*/
|
*/
|
||||||
public static Dictionary getSingleton(){
|
public static Dictionary getSingleton() {
|
||||||
if(singleton == null){
|
if (singleton == null) {
|
||||||
throw new IllegalStateException("词典尚未初始化,请先调用initial方法");
|
throw new IllegalStateException("词典尚未初始化,请先调用initial方法");
|
||||||
}
|
}
|
||||||
return singleton;
|
return singleton;
|
||||||
@ -144,13 +141,15 @@ public class Dictionary {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* 批量加载新词条
|
* 批量加载新词条
|
||||||
* @param words Collection<String>词条列表
|
*
|
||||||
|
* @param words
|
||||||
|
* Collection<String>词条列表
|
||||||
*/
|
*/
|
||||||
public void addWords(Collection<String> words){
|
public void addWords(Collection<String> words) {
|
||||||
if(words != null){
|
if (words != null) {
|
||||||
for(String word : words){
|
for (String word : words) {
|
||||||
if (word != null) {
|
if (word != null) {
|
||||||
//批量加载词条到主内存词典中
|
// 批量加载词条到主内存词典中
|
||||||
singleton._MainDict.fillSegment(word.trim().toCharArray());
|
singleton._MainDict.fillSegment(word.trim().toCharArray());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -160,11 +159,11 @@ public class Dictionary {
|
|||||||
/**
|
/**
|
||||||
* 批量移除(屏蔽)词条
|
* 批量移除(屏蔽)词条
|
||||||
*/
|
*/
|
||||||
public void disableWords(Collection<String> words){
|
public void disableWords(Collection<String> words) {
|
||||||
if(words != null){
|
if (words != null) {
|
||||||
for(String word : words){
|
for (String word : words) {
|
||||||
if (word != null) {
|
if (word != null) {
|
||||||
//批量屏蔽词条
|
// 批量屏蔽词条
|
||||||
singleton._MainDict.disableSegment(word.trim().toCharArray());
|
singleton._MainDict.disableSegment(word.trim().toCharArray());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -173,55 +172,58 @@ public class Dictionary {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* 检索匹配主词典
|
* 检索匹配主词典
|
||||||
|
*
|
||||||
* @return Hit 匹配结果描述
|
* @return Hit 匹配结果描述
|
||||||
*/
|
*/
|
||||||
public Hit matchInMainDict(char[] charArray){
|
public Hit matchInMainDict(char[] charArray) {
|
||||||
return singleton._MainDict.match(charArray);
|
return singleton._MainDict.match(charArray);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 检索匹配主词典
|
* 检索匹配主词典
|
||||||
|
*
|
||||||
* @return Hit 匹配结果描述
|
* @return Hit 匹配结果描述
|
||||||
*/
|
*/
|
||||||
public Hit matchInMainDict(char[] charArray , int begin, int length){
|
public Hit matchInMainDict(char[] charArray, int begin, int length) {
|
||||||
return singleton._MainDict.match(charArray, begin, length);
|
return singleton._MainDict.match(charArray, begin, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 检索匹配量词词典
|
* 检索匹配量词词典
|
||||||
|
*
|
||||||
* @return Hit 匹配结果描述
|
* @return Hit 匹配结果描述
|
||||||
*/
|
*/
|
||||||
public Hit matchInQuantifierDict(char[] charArray , int begin, int length){
|
public Hit matchInQuantifierDict(char[] charArray, int begin, int length) {
|
||||||
return singleton._QuantifierDict.match(charArray, begin, length);
|
return singleton._QuantifierDict.match(charArray, begin, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 从已匹配的Hit中直接取出DictSegment,继续向下匹配
|
* 从已匹配的Hit中直接取出DictSegment,继续向下匹配
|
||||||
|
*
|
||||||
* @return Hit
|
* @return Hit
|
||||||
*/
|
*/
|
||||||
public Hit matchWithHit(char[] charArray , int currentIndex , Hit matchedHit){
|
public Hit matchWithHit(char[] charArray, int currentIndex, Hit matchedHit) {
|
||||||
DictSegment ds = matchedHit.getMatchedDictSegment();
|
DictSegment ds = matchedHit.getMatchedDictSegment();
|
||||||
return ds.match(charArray, currentIndex, 1 , matchedHit);
|
return ds.match(charArray, currentIndex, 1, matchedHit);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 判断是否是停止词
|
* 判断是否是停止词
|
||||||
|
*
|
||||||
* @return boolean
|
* @return boolean
|
||||||
*/
|
*/
|
||||||
public boolean isStopWord(char[] charArray , int begin, int length){
|
public boolean isStopWord(char[] charArray, int begin, int length) {
|
||||||
return singleton._StopWords.match(charArray, begin, length).isMatch();
|
return singleton._StopWords.match(charArray, begin, length).isMatch();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 加载主词典及扩展词典
|
* 加载主词典及扩展词典
|
||||||
*/
|
*/
|
||||||
private void loadMainDict(){
|
private void loadMainDict() {
|
||||||
//建立一个主词典实例
|
// 建立一个主词典实例
|
||||||
_MainDict = new DictSegment((char)0);
|
_MainDict = new DictSegment((char) 0);
|
||||||
|
|
||||||
//读取主词典文件
|
// 读取主词典文件
|
||||||
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_MAIN);
|
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_MAIN);
|
||||||
|
|
||||||
InputStream is = null;
|
InputStream is = null;
|
||||||
@ -232,7 +234,7 @@ public class Dictionary {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
|
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
|
||||||
String theWord = null;
|
String theWord = null;
|
||||||
do {
|
do {
|
||||||
theWord = br.readLine();
|
theWord = br.readLine();
|
||||||
@ -242,90 +244,89 @@ public class Dictionary {
|
|||||||
} while (theWord != null);
|
} while (theWord != null);
|
||||||
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error("ik-analyzer",e);
|
logger.error("ik-analyzer", e);
|
||||||
|
|
||||||
}finally{
|
} finally {
|
||||||
try {
|
try {
|
||||||
if(is != null){
|
if (is != null) {
|
||||||
is.close();
|
is.close();
|
||||||
is = null;
|
is = null;
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error("ik-analyzer",e);
|
logger.error("ik-analyzer", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//加载扩展词典
|
// 加载扩展词典
|
||||||
this.loadExtDict();
|
this.loadExtDict();
|
||||||
//加载远程自定义词库
|
// 加载远程自定义词库
|
||||||
this.loadRemoteExtDict();
|
this.loadRemoteExtDict();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 加载用户配置的扩展词典到主词库表
|
* 加载用户配置的扩展词典到主词库表
|
||||||
*/
|
*/
|
||||||
private void loadExtDict(){
|
private void loadExtDict() {
|
||||||
//加载扩展词典配置
|
// 加载扩展词典配置
|
||||||
List<String> extDictFiles = configuration.getExtDictionarys();
|
List<String> extDictFiles = configuration.getExtDictionarys();
|
||||||
if(extDictFiles != null){
|
if (extDictFiles != null) {
|
||||||
InputStream is = null;
|
InputStream is = null;
|
||||||
for(String extDictName : extDictFiles){
|
for (String extDictName : extDictFiles) {
|
||||||
//读取扩展词典文件
|
// 读取扩展词典文件
|
||||||
logger.info("[Dict Loading] " + extDictName);
|
logger.info("[Dict Loading] " + extDictName);
|
||||||
Path file = PathUtils.get(configuration.getDictRoot(), extDictName);
|
Path file = PathUtils.get(configuration.getDictRoot(), extDictName);
|
||||||
try {
|
try {
|
||||||
is = new FileInputStream(file.toFile());
|
is = new FileInputStream(file.toFile());
|
||||||
} catch (FileNotFoundException e) {
|
} catch (FileNotFoundException e) {
|
||||||
logger.error("ik-analyzer",e);
|
logger.error("ik-analyzer", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
//如果找不到扩展的字典,则忽略
|
// 如果找不到扩展的字典,则忽略
|
||||||
if(is == null){
|
if (is == null) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
|
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
|
||||||
String theWord = null;
|
String theWord = null;
|
||||||
do {
|
do {
|
||||||
theWord = br.readLine();
|
theWord = br.readLine();
|
||||||
if (theWord != null && !"".equals(theWord.trim())) {
|
if (theWord != null && !"".equals(theWord.trim())) {
|
||||||
//加载扩展词典数据到主内存词典中
|
// 加载扩展词典数据到主内存词典中
|
||||||
_MainDict.fillSegment(theWord.trim().toCharArray());
|
_MainDict.fillSegment(theWord.trim().toCharArray());
|
||||||
}
|
}
|
||||||
} while (theWord != null);
|
} while (theWord != null);
|
||||||
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error("ik-analyzer",e);
|
logger.error("ik-analyzer", e);
|
||||||
}finally{
|
} finally {
|
||||||
try {
|
try {
|
||||||
if(is != null){
|
if (is != null) {
|
||||||
is.close();
|
is.close();
|
||||||
is = null;
|
is = null;
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error("ik-analyzer",e);
|
logger.error("ik-analyzer", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 加载远程扩展词典到主词库表
|
* 加载远程扩展词典到主词库表
|
||||||
*/
|
*/
|
||||||
private void loadRemoteExtDict(){
|
private void loadRemoteExtDict() {
|
||||||
List<String> remoteExtDictFiles = configuration.getRemoteExtDictionarys();
|
List<String> remoteExtDictFiles = configuration.getRemoteExtDictionarys();
|
||||||
for(String location:remoteExtDictFiles){
|
for (String location : remoteExtDictFiles) {
|
||||||
logger.info("[Dict Loading] " + location);
|
logger.info("[Dict Loading] " + location);
|
||||||
List<String> lists = getRemoteWords(location);
|
List<String> lists = getRemoteWords(location);
|
||||||
//如果找不到扩展的字典,则忽略
|
// 如果找不到扩展的字典,则忽略
|
||||||
if(lists == null){
|
if (lists == null) {
|
||||||
logger.error("[Dict Loading] "+location+"加载失败");
|
logger.error("[Dict Loading] " + location + "加载失败");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
for(String theWord:lists){
|
for (String theWord : lists) {
|
||||||
if (theWord != null && !"".equals(theWord.trim())) {
|
if (theWord != null && !"".equals(theWord.trim())) {
|
||||||
//加载扩展词典数据到主内存词典中
|
// 加载扩展词典数据到主内存词典中
|
||||||
logger.info(theWord);
|
logger.info(theWord);
|
||||||
_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
|
_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
|
||||||
}
|
}
|
||||||
@ -337,11 +338,11 @@ public class Dictionary {
|
|||||||
/**
|
/**
|
||||||
* 从远程服务器上下载自定义词条
|
* 从远程服务器上下载自定义词条
|
||||||
*/
|
*/
|
||||||
private static List<String> getRemoteWords(String location){
|
private static List<String> getRemoteWords(String location) {
|
||||||
|
|
||||||
List<String> buffer = new ArrayList<String>();
|
List<String> buffer = new ArrayList<String>();
|
||||||
RequestConfig rc = RequestConfig.custom().setConnectionRequestTimeout(10*1000)
|
RequestConfig rc = RequestConfig.custom().setConnectionRequestTimeout(10 * 1000).setConnectTimeout(10 * 1000)
|
||||||
.setConnectTimeout(10*1000).setSocketTimeout(60*1000).build();
|
.setSocketTimeout(60 * 1000).build();
|
||||||
CloseableHttpClient httpclient = HttpClients.createDefault();
|
CloseableHttpClient httpclient = HttpClients.createDefault();
|
||||||
CloseableHttpResponse response;
|
CloseableHttpResponse response;
|
||||||
BufferedReader in;
|
BufferedReader in;
|
||||||
@ -349,17 +350,17 @@ public class Dictionary {
|
|||||||
get.setConfig(rc);
|
get.setConfig(rc);
|
||||||
try {
|
try {
|
||||||
response = httpclient.execute(get);
|
response = httpclient.execute(get);
|
||||||
if(response.getStatusLine().getStatusCode()==200){
|
if (response.getStatusLine().getStatusCode() == 200) {
|
||||||
|
|
||||||
String charset = "UTF-8";
|
String charset = "UTF-8";
|
||||||
//获取编码,默认为utf-8
|
// 获取编码,默认为utf-8
|
||||||
if(response.getEntity().getContentType().getValue().contains("charset=")){
|
if (response.getEntity().getContentType().getValue().contains("charset=")) {
|
||||||
String contentType=response.getEntity().getContentType().getValue();
|
String contentType = response.getEntity().getContentType().getValue();
|
||||||
charset=contentType.substring(contentType.lastIndexOf("=")+1);
|
charset = contentType.substring(contentType.lastIndexOf("=") + 1);
|
||||||
}
|
}
|
||||||
in = new BufferedReader(new InputStreamReader(response.getEntity().getContent(),charset));
|
in = new BufferedReader(new InputStreamReader(response.getEntity().getContent(), charset));
|
||||||
String line ;
|
String line;
|
||||||
while((line = in.readLine())!=null){
|
while ((line = in.readLine()) != null) {
|
||||||
buffer.add(line);
|
buffer.add(line);
|
||||||
}
|
}
|
||||||
in.close();
|
in.close();
|
||||||
@ -368,25 +369,23 @@ public class Dictionary {
|
|||||||
}
|
}
|
||||||
response.close();
|
response.close();
|
||||||
} catch (ClientProtocolException e) {
|
} catch (ClientProtocolException e) {
|
||||||
logger.error( "getRemoteWords {} error" , e , location);
|
logger.error("getRemoteWords {} error", e, location);
|
||||||
} catch (IllegalStateException e) {
|
} catch (IllegalStateException e) {
|
||||||
logger.error( "getRemoteWords {} error" , e , location );
|
logger.error("getRemoteWords {} error", e, location);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error( "getRemoteWords {} error" , e , location );
|
logger.error("getRemoteWords {} error", e, location);
|
||||||
}
|
}
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 加载用户扩展的停止词词典
|
* 加载用户扩展的停止词词典
|
||||||
*/
|
*/
|
||||||
private void loadStopWordDict(){
|
private void loadStopWordDict() {
|
||||||
//建立主词典实例
|
// 建立主词典实例
|
||||||
_StopWords = new DictSegment((char)0);
|
_StopWords = new DictSegment((char) 0);
|
||||||
|
|
||||||
//读取主词典文件
|
// 读取主词典文件
|
||||||
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_STOP);
|
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_STOP);
|
||||||
|
|
||||||
InputStream is = null;
|
InputStream is = null;
|
||||||
@ -397,7 +396,7 @@ public class Dictionary {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
|
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
|
||||||
String theWord = null;
|
String theWord = null;
|
||||||
do {
|
do {
|
||||||
theWord = br.readLine();
|
theWord = br.readLine();
|
||||||
@ -407,103 +406,101 @@ public class Dictionary {
|
|||||||
} while (theWord != null);
|
} while (theWord != null);
|
||||||
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error("ik-analyzer",e);
|
logger.error("ik-analyzer", e);
|
||||||
|
|
||||||
}finally{
|
} finally {
|
||||||
try {
|
try {
|
||||||
if(is != null){
|
if (is != null) {
|
||||||
is.close();
|
is.close();
|
||||||
is = null;
|
is = null;
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error("ik-analyzer",e);
|
logger.error("ik-analyzer", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 加载扩展停止词典
|
||||||
//加载扩展停止词典
|
List<String> extStopWordDictFiles = configuration.getExtStopWordDictionarys();
|
||||||
List<String> extStopWordDictFiles = configuration.getExtStopWordDictionarys();
|
if (extStopWordDictFiles != null) {
|
||||||
if(extStopWordDictFiles != null){
|
|
||||||
is = null;
|
is = null;
|
||||||
for(String extStopWordDictName : extStopWordDictFiles){
|
for (String extStopWordDictName : extStopWordDictFiles) {
|
||||||
logger.info("[Dict Loading] " + extStopWordDictName);
|
logger.info("[Dict Loading] " + extStopWordDictName);
|
||||||
|
|
||||||
//读取扩展词典文件
|
// 读取扩展词典文件
|
||||||
file=PathUtils.get(configuration.getDictRoot(), extStopWordDictName);
|
file = PathUtils.get(configuration.getDictRoot(), extStopWordDictName);
|
||||||
try {
|
try {
|
||||||
is = new FileInputStream(file.toFile());
|
is = new FileInputStream(file.toFile());
|
||||||
} catch (FileNotFoundException e) {
|
} catch (FileNotFoundException e) {
|
||||||
logger.error("ik-analyzer",e);
|
logger.error("ik-analyzer", e);
|
||||||
}
|
}
|
||||||
//如果找不到扩展的字典,则忽略
|
// 如果找不到扩展的字典,则忽略
|
||||||
if(is == null){
|
if (is == null) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
|
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
|
||||||
String theWord = null;
|
String theWord = null;
|
||||||
do {
|
do {
|
||||||
theWord = br.readLine();
|
theWord = br.readLine();
|
||||||
if (theWord != null && !"".equals(theWord.trim())) {
|
if (theWord != null && !"".equals(theWord.trim())) {
|
||||||
//加载扩展停止词典数据到内存中
|
// 加载扩展停止词典数据到内存中
|
||||||
_StopWords.fillSegment(theWord.trim().toCharArray());
|
_StopWords.fillSegment(theWord.trim().toCharArray());
|
||||||
}
|
}
|
||||||
} while (theWord != null);
|
} while (theWord != null);
|
||||||
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error("ik-analyzer",e);
|
logger.error("ik-analyzer", e);
|
||||||
|
|
||||||
}finally{
|
} finally {
|
||||||
try {
|
try {
|
||||||
if(is != null){
|
if (is != null) {
|
||||||
is.close();
|
is.close();
|
||||||
is = null;
|
is = null;
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error("ik-analyzer",e);
|
logger.error("ik-analyzer", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//加载远程停用词典
|
// 加载远程停用词典
|
||||||
List<String> remoteExtStopWordDictFiles = configuration.getRemoteExtStopWordDictionarys();
|
List<String> remoteExtStopWordDictFiles = configuration.getRemoteExtStopWordDictionarys();
|
||||||
for(String location:remoteExtStopWordDictFiles){
|
for (String location : remoteExtStopWordDictFiles) {
|
||||||
logger.info("[Dict Loading] " + location);
|
logger.info("[Dict Loading] " + location);
|
||||||
List<String> lists = getRemoteWords(location);
|
List<String> lists = getRemoteWords(location);
|
||||||
//如果找不到扩展的字典,则忽略
|
// 如果找不到扩展的字典,则忽略
|
||||||
if(lists == null){
|
if (lists == null) {
|
||||||
logger.error("[Dict Loading] "+location+"加载失败");
|
logger.error("[Dict Loading] " + location + "加载失败");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
for(String theWord:lists){
|
for (String theWord : lists) {
|
||||||
if (theWord != null && !"".equals(theWord.trim())) {
|
if (theWord != null && !"".equals(theWord.trim())) {
|
||||||
//加载远程词典数据到主内存中
|
// 加载远程词典数据到主内存中
|
||||||
logger.info(theWord);
|
logger.info(theWord);
|
||||||
_StopWords.fillSegment(theWord.trim().toLowerCase().toCharArray());
|
_StopWords.fillSegment(theWord.trim().toLowerCase().toCharArray());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 加载量词词典
|
* 加载量词词典
|
||||||
*/
|
*/
|
||||||
private void loadQuantifierDict(){
|
private void loadQuantifierDict() {
|
||||||
//建立一个量词典实例
|
// 建立一个量词典实例
|
||||||
_QuantifierDict = new DictSegment((char)0);
|
_QuantifierDict = new DictSegment((char) 0);
|
||||||
//读取量词词典文件
|
// 读取量词词典文件
|
||||||
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_QUANTIFIER);
|
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_QUANTIFIER);
|
||||||
InputStream is = null;
|
InputStream is = null;
|
||||||
try {
|
try {
|
||||||
is = new FileInputStream(file.toFile());
|
is = new FileInputStream(file.toFile());
|
||||||
} catch (FileNotFoundException e) {
|
} catch (FileNotFoundException e) {
|
||||||
logger.error("ik-analyzer",e);
|
logger.error("ik-analyzer", e);
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
|
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
|
||||||
String theWord = null;
|
String theWord = null;
|
||||||
do {
|
do {
|
||||||
theWord = br.readLine();
|
theWord = br.readLine();
|
||||||
@ -515,34 +512,33 @@ public class Dictionary {
|
|||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
logger.error("Quantifier Dictionary loading exception.");
|
logger.error("Quantifier Dictionary loading exception.");
|
||||||
|
|
||||||
}finally{
|
} finally {
|
||||||
try {
|
try {
|
||||||
if(is != null){
|
if (is != null) {
|
||||||
is.close();
|
is.close();
|
||||||
is = null;
|
is = null;
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error("ik-analyzer",e);
|
logger.error("ik-analyzer", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void loadSurnameDict() {
|
||||||
|
|
||||||
private void loadSurnameDict(){
|
_SurnameDict = new DictSegment((char) 0);
|
||||||
|
|
||||||
_SurnameDict = new DictSegment((char)0);
|
|
||||||
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_SURNAME);
|
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_SURNAME);
|
||||||
InputStream is = null;
|
InputStream is = null;
|
||||||
try {
|
try {
|
||||||
is = new FileInputStream(file.toFile());
|
is = new FileInputStream(file.toFile());
|
||||||
} catch (FileNotFoundException e) {
|
} catch (FileNotFoundException e) {
|
||||||
logger.error("ik-analyzer",e);
|
logger.error("ik-analyzer", e);
|
||||||
}
|
}
|
||||||
if(is == null){
|
if (is == null) {
|
||||||
throw new RuntimeException("Surname Dictionary not found!!!");
|
throw new RuntimeException("Surname Dictionary not found!!!");
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
|
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
|
||||||
String theWord;
|
String theWord;
|
||||||
do {
|
do {
|
||||||
theWord = br.readLine();
|
theWord = br.readLine();
|
||||||
@ -551,36 +547,35 @@ public class Dictionary {
|
|||||||
}
|
}
|
||||||
} while (theWord != null);
|
} while (theWord != null);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error("ik-analyzer",e);
|
logger.error("ik-analyzer", e);
|
||||||
}finally{
|
} finally {
|
||||||
try {
|
try {
|
||||||
if(is != null){
|
if (is != null) {
|
||||||
is.close();
|
is.close();
|
||||||
is = null;
|
is = null;
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error("ik-analyzer",e);
|
logger.error("ik-analyzer", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void loadSuffixDict() {
|
||||||
|
|
||||||
private void loadSuffixDict(){
|
_SuffixDict = new DictSegment((char) 0);
|
||||||
|
|
||||||
_SuffixDict = new DictSegment((char)0);
|
|
||||||
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_SUFFIX);
|
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_SUFFIX);
|
||||||
InputStream is = null;
|
InputStream is = null;
|
||||||
try {
|
try {
|
||||||
is = new FileInputStream(file.toFile());
|
is = new FileInputStream(file.toFile());
|
||||||
} catch (FileNotFoundException e) {
|
} catch (FileNotFoundException e) {
|
||||||
logger.error("ik-analyzer",e);
|
logger.error("ik-analyzer", e);
|
||||||
}
|
}
|
||||||
if(is == null){
|
if (is == null) {
|
||||||
throw new RuntimeException("Suffix Dictionary not found!!!");
|
throw new RuntimeException("Suffix Dictionary not found!!!");
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
|
|
||||||
BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
|
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
|
||||||
String theWord;
|
String theWord;
|
||||||
do {
|
do {
|
||||||
theWord = br.readLine();
|
theWord = br.readLine();
|
||||||
@ -589,34 +584,33 @@ public class Dictionary {
|
|||||||
}
|
}
|
||||||
} while (theWord != null);
|
} while (theWord != null);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error("ik-analyzer",e);
|
logger.error("ik-analyzer", e);
|
||||||
}finally{
|
} finally {
|
||||||
try {
|
try {
|
||||||
is.close();
|
is.close();
|
||||||
is = null;
|
is = null;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error("ik-analyzer",e);
|
logger.error("ik-analyzer", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void loadPrepDict() {
|
||||||
|
|
||||||
private void loadPrepDict(){
|
_PrepDict = new DictSegment((char) 0);
|
||||||
|
|
||||||
_PrepDict = new DictSegment((char)0);
|
|
||||||
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_PREP);
|
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_PREP);
|
||||||
InputStream is = null;
|
InputStream is = null;
|
||||||
try {
|
try {
|
||||||
is = new FileInputStream(file.toFile());
|
is = new FileInputStream(file.toFile());
|
||||||
} catch (FileNotFoundException e) {
|
} catch (FileNotFoundException e) {
|
||||||
logger.error("ik-analyzer",e);
|
logger.error("ik-analyzer", e);
|
||||||
}
|
}
|
||||||
if(is == null){
|
if (is == null) {
|
||||||
throw new RuntimeException("Preposition Dictionary not found!!!");
|
throw new RuntimeException("Preposition Dictionary not found!!!");
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
|
|
||||||
BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
|
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"), 512);
|
||||||
String theWord;
|
String theWord;
|
||||||
do {
|
do {
|
||||||
theWord = br.readLine();
|
theWord = br.readLine();
|
||||||
@ -626,18 +620,18 @@ public class Dictionary {
|
|||||||
}
|
}
|
||||||
} while (theWord != null);
|
} while (theWord != null);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error("ik-analyzer",e);
|
logger.error("ik-analyzer", e);
|
||||||
}finally{
|
} finally {
|
||||||
try {
|
try {
|
||||||
is.close();
|
is.close();
|
||||||
is = null;
|
is = null;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
logger.error("ik-analyzer",e);
|
logger.error("ik-analyzer", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void reLoadMainDict(){
|
public void reLoadMainDict() {
|
||||||
logger.info("重新加载词典...");
|
logger.info("重新加载词典...");
|
||||||
// 新开一个实例加载词典,减少加载过程对当前词典使用的影响
|
// 新开一个实例加载词典,减少加载过程对当前词典使用的影响
|
||||||
Dictionary tmpDict = new Dictionary();
|
Dictionary tmpDict = new Dictionary();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user