merge code

This commit is contained in:
medcl 2016-04-10 22:17:59 +08:00
commit ca2bfe5732
6 changed files with 372 additions and 363 deletions

View File

@ -62,37 +62,37 @@ public class Dictionary {
*/ */
private static Dictionary singleton; private static Dictionary singleton;
private DictSegment _MainDict; private DictSegment _MainDict;
private DictSegment _SurnameDict; private DictSegment _SurnameDict;
private DictSegment _QuantifierDict; private DictSegment _QuantifierDict;
private DictSegment _SuffixDict; private DictSegment _SuffixDict;
private DictSegment _PrepDict; private DictSegment _PrepDict;
private DictSegment _StopWords; private DictSegment _StopWords;
/** /**
* 配置对象 * 配置对象
*/ */
private Configuration configuration; private Configuration configuration;
public static final ESLogger logger=Loggers.getLogger("ik-analyzer"); public static ESLogger logger=Loggers.getLogger("ik-analyzer");
private static ScheduledExecutorService pool = Executors.newScheduledThreadPool(1); private static ScheduledExecutorService pool = Executors.newScheduledThreadPool(1);
public static final String PATH_DIC_MAIN = "ik/main.dic"; public static final String PATH_DIC_MAIN = "ik/main.dic";
public static final String PATH_DIC_SURNAME = "ik/surname.dic"; public static final String PATH_DIC_SURNAME = "ik/surname.dic";
public static final String PATH_DIC_QUANTIFIER = "ik/quantifier.dic"; public static final String PATH_DIC_QUANTIFIER = "ik/quantifier.dic";
public static final String PATH_DIC_SUFFIX = "ik/suffix.dic"; public static final String PATH_DIC_SUFFIX = "ik/suffix.dic";
public static final String PATH_DIC_PREP = "ik/preposition.dic"; public static final String PATH_DIC_PREP = "ik/preposition.dic";
public static final String PATH_DIC_STOP = "ik/stopword.dic"; public static final String PATH_DIC_STOP = "ik/stopword.dic";
private Dictionary(){ private Dictionary(){
} }
/** /**
* 词典初始化 * 词典初始化
@ -103,28 +103,29 @@ public class Dictionary {
* @return Dictionary * @return Dictionary
*/ */
public static synchronized Dictionary initial(Configuration cfg){ public static synchronized Dictionary initial(Configuration cfg){
if(singleton == null){
synchronized(Dictionary.class){
if(singleton == null){
singleton = new Dictionary();
singleton.configuration=cfg;
singleton.loadMainDict();
singleton.loadSurnameDict();
singleton.loadQuantifierDict();
singleton.loadSuffixDict();
singleton.loadPrepDict();
singleton.loadStopWordDict();
synchronized(Dictionary.class){ //建立监控线程
if(singleton == null){ for(String location:cfg.getRemoteExtDictionarys()){
singleton = new Dictionary(); //10 秒是初始延迟可以修改的 60是间隔时间 单位秒
singleton.configuration=cfg; pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);
singleton.loadMainDict(); }
singleton.loadSurnameDict(); for(String location:cfg.getRemoteExtStopWordDictionarys()){
singleton.loadQuantifierDict(); pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);
singleton.loadSuffixDict(); }
singleton.loadPrepDict();
singleton.loadStopWordDict();
//建立监控线程 return singleton;
for(String location:cfg.getRemoteExtDictionarys()){
//10 秒是初始延迟可以修改的 60是间隔时间 单位秒
pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);
} }
for(String location:cfg.getRemoteExtStopWordDictionarys()){
pool.scheduleAtFixedRate(new Monitor(location), 10, 60, TimeUnit.SECONDS);
}
return singleton;
} }
} }
return singleton; return singleton;
@ -183,7 +184,7 @@ public class Dictionary {
* @return Hit 匹配结果描述 * @return Hit 匹配结果描述
*/ */
public Hit matchInMainDict(char[] charArray , int begin, int length){ public Hit matchInMainDict(char[] charArray , int begin, int length){
return singleton._MainDict.match(charArray, begin, length); return singleton._MainDict.match(charArray, begin, length);
} }
/** /**
@ -223,12 +224,12 @@ public class Dictionary {
//读取主词典文件 //读取主词典文件
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_MAIN); Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_MAIN);
InputStream is = null; InputStream is = null;
try { try {
is = new FileInputStream(file.toFile()); is = new FileInputStream(file.toFile());
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
e.printStackTrace(); logger.error(e.getMessage(), e);
} }
try { try {
BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
@ -241,16 +242,16 @@ public class Dictionary {
} while (theWord != null); } while (theWord != null);
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer",e);
}finally{ }finally{
try { try {
if(is != null){ if(is != null){
is.close(); is.close();
is = null; is = null;
} }
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer",e);
} }
} }
//加载扩展词典 //加载扩展词典
@ -269,13 +270,13 @@ public class Dictionary {
InputStream is = null; InputStream is = null;
for(String extDictName : extDictFiles){ for(String extDictName : extDictFiles){
//读取扩展词典文件 //读取扩展词典文件
logger.info("[Dict Loading] " + extDictName); logger.info("[Dict Loading] " + extDictName);
Path file = PathUtils.get(configuration.getDictRoot(), extDictName); Path file = PathUtils.get(configuration.getDictRoot(), extDictName);
try { try {
is = new FileInputStream(file.toFile()); is = new FileInputStream(file.toFile());
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer",e);
} }
//如果找不到扩展的字典则忽略 //如果找不到扩展的字典则忽略
if(is == null){ if(is == null){
@ -286,21 +287,23 @@ public class Dictionary {
String theWord = null; String theWord = null;
do { do {
theWord = br.readLine(); theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) { if (theWord != null && !"".equals(theWord.trim())) {
//加载扩展词典数据到主内存词典中 //加载扩展词典数据到主内存词典中
_MainDict.fillSegment(theWord.trim().toCharArray()); _MainDict.fillSegment(theWord.trim().toCharArray());
} }
} while (theWord != null); } while (theWord != null);
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer",e);
}finally{ }finally{
try { try {
is.close(); if(is != null){
is = null; is.close();
is = null;
}
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer",e);
} }
} }
} }
} }
@ -315,14 +318,11 @@ public class Dictionary {
for(String location:remoteExtDictFiles){ for(String location:remoteExtDictFiles){
logger.info("[Dict Loading] " + location); logger.info("[Dict Loading] " + location);
List<String> lists = getRemoteWords(location); List<String> lists = getRemoteWords(location);
/** Redundant Nullcheck as the list is initialized in the getRemoteWords method
//如果找不到扩展的字典则忽略 //如果找不到扩展的字典则忽略
if(lists == null){ if(lists == null){
logger.error("[Dict Loading] "+location+"加载失败"); logger.error("[Dict Loading] "+location+"加载失败");
continue; continue;
}*/ }
for(String theWord:lists){ for(String theWord:lists){
if (theWord != null && !"".equals(theWord.trim())) { if (theWord != null && !"".equals(theWord.trim())) {
//加载扩展词典数据到主内存词典中 //加载扩展词典数据到主内存词典中
@ -384,41 +384,41 @@ public class Dictionary {
*/ */
private void loadStopWordDict(){ private void loadStopWordDict(){
//建立主词典实例 //建立主词典实例
_StopWords = new DictSegment((char)0); _StopWords = new DictSegment((char)0);
//读取主词典文件 //读取主词典文件
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_STOP); Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_STOP);
InputStream is = null; InputStream is = null;
try { try {
is = new FileInputStream(file.toFile()); is = new FileInputStream(file.toFile());
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
e.printStackTrace(); logger.error(e.getMessage(), e);
} }
try { try {
BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
String theWord = null; String theWord = null;
do { do {
theWord = br.readLine(); theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) { if (theWord != null && !"".equals(theWord.trim())) {
_StopWords.fillSegment(theWord.trim().toCharArray()); _StopWords.fillSegment(theWord.trim().toCharArray());
} }
} while (theWord != null); } while (theWord != null);
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer",e);
}finally{ }finally{
try { try {
if(is != null){ if(is != null){
is.close(); is.close();
is = null; is = null;
} }
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer",e);
} }
} }
//加载扩展停止词典 //加载扩展停止词典
@ -426,15 +426,15 @@ public class Dictionary {
if(extStopWordDictFiles != null){ if(extStopWordDictFiles != null){
is = null; is = null;
for(String extStopWordDictName : extStopWordDictFiles){ for(String extStopWordDictName : extStopWordDictFiles){
logger.info("[Dict Loading] " + extStopWordDictName); logger.info("[Dict Loading] " + extStopWordDictName);
//读取扩展词典文件 //读取扩展词典文件
file=PathUtils.get(configuration.getDictRoot(), extStopWordDictName); file=PathUtils.get(configuration.getDictRoot(), extStopWordDictName);
try { try {
is = new FileInputStream(file.toFile()); is = new FileInputStream(file.toFile());
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer",e);
} }
//如果找不到扩展的字典则忽略 //如果找不到扩展的字典则忽略
if(is == null){ if(is == null){
continue; continue;
@ -446,19 +446,21 @@ public class Dictionary {
theWord = br.readLine(); theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) { if (theWord != null && !"".equals(theWord.trim())) {
//加载扩展停止词典数据到内存中 //加载扩展停止词典数据到内存中
_StopWords.fillSegment(theWord.trim().toCharArray()); _StopWords.fillSegment(theWord.trim().toCharArray());
} }
} while (theWord != null); } while (theWord != null);
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer",e);
}finally{ }finally{
try { try {
is.close(); if(is != null){
is = null; is.close();
is = null;
}
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer",e);
} }
} }
} }
@ -469,14 +471,11 @@ public class Dictionary {
for(String location:remoteExtStopWordDictFiles){ for(String location:remoteExtStopWordDictFiles){
logger.info("[Dict Loading] " + location); logger.info("[Dict Loading] " + location);
List<String> lists = getRemoteWords(location); List<String> lists = getRemoteWords(location);
/** Redundant Nullcheck as the list is initialized in the getRemoteWords method
//如果找不到扩展的字典则忽略 //如果找不到扩展的字典则忽略
if(lists == null){ if(lists == null){
logger.error("[Dict Loading] "+location+"加载失败"); logger.error("[Dict Loading] "+location+"加载失败");
continue; continue;
}*/ }
for(String theWord:lists){ for(String theWord:lists){
if (theWord != null && !"".equals(theWord.trim())) { if (theWord != null && !"".equals(theWord.trim())) {
//加载远程词典数据到主内存中 //加载远程词典数据到主内存中
@ -497,12 +496,12 @@ public class Dictionary {
_QuantifierDict = new DictSegment((char)0); _QuantifierDict = new DictSegment((char)0);
//读取量词词典文件 //读取量词词典文件
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_QUANTIFIER); Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_QUANTIFIER);
InputStream is = null; InputStream is = null;
try { try {
is = new FileInputStream(file.toFile()); is = new FileInputStream(file.toFile());
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer",e);
} }
try { try {
BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
String theWord = null; String theWord = null;
@ -519,125 +518,127 @@ public class Dictionary {
}finally{ }finally{
try { try {
if(is != null){ if(is != null){
is.close(); is.close();
is = null; is = null;
} }
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer",e);
} }
} }
} }
private void loadSurnameDict(){ private void loadSurnameDict(){
_SurnameDict = new DictSegment((char)0); _SurnameDict = new DictSegment((char)0);
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_SURNAME); Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_SURNAME);
InputStream is = null; InputStream is = null;
try { try {
is = new FileInputStream(file.toFile()); is = new FileInputStream(file.toFile());
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer",e);
} }
if(is == null){ if(is == null){
throw new RuntimeException("Surname Dictionary not found!!!"); throw new RuntimeException("Surname Dictionary not found!!!");
} }
try { try {
BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
String theWord; String theWord;
do { do {
theWord = br.readLine(); theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) { if (theWord != null && !"".equals(theWord.trim())) {
_SurnameDict.fillSegment(theWord.trim().toCharArray()); _SurnameDict.fillSegment(theWord.trim().toCharArray());
} }
} while (theWord != null); } while (theWord != null);
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer",e);
}finally{ }finally{
try { try {
if(is != null){
is.close();
is = null;
}
} catch (IOException e) {
logger.error("ik-analyzer",e);
}
}
}
private void loadSuffixDict(){
_SuffixDict = new DictSegment((char)0);
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_SUFFIX);
InputStream is = null;
try {
is = new FileInputStream(file.toFile());
} catch (FileNotFoundException e) {
logger.error("ik-analyzer",e);
}
if(is == null){
throw new RuntimeException("Suffix Dictionary not found!!!");
}
try {
BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
String theWord;
do {
theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) {
_SuffixDict.fillSegment(theWord.trim().toCharArray());
}
} while (theWord != null);
} catch (IOException e) {
logger.error("ik-analyzer",e);
}finally{
try {
is.close(); is.close();
is = null; is = null;
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer",e);
} }
} }
} }
private void loadSuffixDict(){ private void loadPrepDict(){
_SuffixDict = new DictSegment((char)0); _PrepDict = new DictSegment((char)0);
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_SUFFIX);
InputStream is = null;
try {
is = new FileInputStream(file.toFile());
} catch (FileNotFoundException e) {
logger.error("ik-analyzer",e);
}
if(is == null){
throw new RuntimeException("Suffix Dictionary not found!!!");
}
try {
BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
String theWord;
do {
theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) {
_SuffixDict.fillSegment(theWord.trim().toCharArray());
}
} while (theWord != null);
} catch (IOException e) {
logger.error("ik-analyzer",e);
}finally{
try {
is.close();
is = null;
} catch (IOException e) {
logger.error("ik-analyzer",e);
}
}
}
private void loadPrepDict(){
_PrepDict = new DictSegment((char)0);
Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_PREP); Path file = PathUtils.get(configuration.getDictRoot(), Dictionary.PATH_DIC_PREP);
InputStream is = null; InputStream is = null;
try { try {
is = new FileInputStream(file.toFile()); is = new FileInputStream(file.toFile());
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer",e);
} }
if(is == null){ if(is == null){
throw new RuntimeException("Preposition Dictionary not found!!!"); throw new RuntimeException("Preposition Dictionary not found!!!");
} }
try { try {
BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512); BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
String theWord; String theWord;
do { do {
theWord = br.readLine(); theWord = br.readLine();
if (theWord != null && !"".equals(theWord.trim())) { if (theWord != null && !"".equals(theWord.trim())) {
_PrepDict.fillSegment(theWord.trim().toCharArray()); _PrepDict.fillSegment(theWord.trim().toCharArray());
} }
} while (theWord != null); } while (theWord != null);
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer",e);
}finally{ }finally{
try { try {
is.close(); is.close();
is = null; is = null;
} catch (IOException e) { } catch (IOException e) {
logger.error("ik-analyzer",e); logger.error("ik-analyzer",e);
} }
} }
} }
public void reLoadMainDict(){ public void reLoadMainDict(){
logger.info("重新加载词典..."); logger.info("重新加载词典...");
// 新开一个实例加载词典减少加载过程对当前词典使用的影响 // 新开一个实例加载词典减少加载过程对当前词典使用的影响
Dictionary tmpDict = new Dictionary(); Dictionary tmpDict = new Dictionary();
tmpDict.configuration = getSingleton().configuration; tmpDict.configuration = getSingleton().configuration;
@ -646,6 +647,6 @@ public class Dictionary {
_MainDict = tmpDict._MainDict; _MainDict = tmpDict._MainDict;
_StopWords = tmpDict._StopWords; _StopWords = tmpDict._StopWords;
logger.info("重新加载词典完毕..."); logger.info("重新加载词典完毕...");
} }
} }

View File

@ -7,9 +7,13 @@ import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpHead; import org.apache.http.client.methods.HttpHead;
import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.client.HttpClients;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.Loggers;
public class Monitor implements Runnable { public class Monitor implements Runnable {
public static ESLogger logger= Loggers.getLogger("ik-analyzer");
private static CloseableHttpClient httpclient = HttpClients.createDefault(); private static CloseableHttpClient httpclient = HttpClients.createDefault();
/* /*
* 上次更改时间 * 上次更改时间
@ -65,7 +69,7 @@ public class Monitor implements Runnable {
if(response.getStatusLine().getStatusCode()==200){ if(response.getStatusLine().getStatusCode()==200){
if (!response.getLastHeader("Last-Modified").getValue().equalsIgnoreCase(last_modified) if (!response.getLastHeader("Last-Modified").getValue().equalsIgnoreCase(last_modified)
||!response.getLastHeader("ETag").getValue().equalsIgnoreCase(eTags)) { ||!response.getLastHeader("ETag").getValue().equalsIgnoreCase(eTags)) {
// 远程词库有更新,需要重新加载词典并修改last_modified,eTags // 远程词库有更新,需要重新加载词典并修改last_modified,eTags
Dictionary.getSingleton().reLoadMainDict(); Dictionary.getSingleton().reLoadMainDict();
@ -87,7 +91,7 @@ public class Monitor implements Runnable {
response.close(); response.close();
} }
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); logger.error(e.getMessage(), e);
} }
} }
} }

View File

@ -5,7 +5,7 @@ import org.elasticsearch.common.logging.Loggers;
public class Sleep { public class Sleep {
public static final ESLogger logger= Loggers.getLogger("ik-analyzer"); public static ESLogger logger= Loggers.getLogger("ik-analyzer");
public enum Type{MSEC,SEC,MIN,HOUR}; public enum Type{MSEC,SEC,MIN,HOUR};
public static void sleep(Type type,int num){ public static void sleep(Type type,int num){
@ -15,20 +15,20 @@ public class Sleep {
Thread.sleep(num); Thread.sleep(num);
return; return;
case SEC: case SEC:
Thread.sleep(num*1000L); Thread.sleep(num*1000);
return; return;
case MIN: case MIN:
Thread.sleep(num*60*1000L); Thread.sleep(num*60*1000);
return; return;
case HOUR: case HOUR:
Thread.sleep(num*60*60*1000L); Thread.sleep(num*60*60*1000);
return; return;
default: default:
logger.error("输入类型错误应为MSEC,SEC,MIN,HOUR之一"); System.err.println("输入类型错误应为MSEC,SEC,MIN,HOUR之一");
return; return;
} }
} catch (InterruptedException e) { } catch (InterruptedException e) {
e.printStackTrace(); logger.error(e.getMessage(), e);
} }
} }

View File

@ -34,6 +34,8 @@ import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.Loggers;
import org.wltea.analyzer.core.IKSegmenter; import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme; import org.wltea.analyzer.core.Lexeme;
@ -45,6 +47,8 @@ import org.wltea.analyzer.core.Lexeme;
*/ */
public class SWMCQueryBuilder { public class SWMCQueryBuilder {
public static ESLogger logger= Loggers.getLogger("ik-analyzer");
/** /**
* 生成SWMCQuery * 生成SWMCQuery
* @param fieldName * @param fieldName
@ -78,7 +82,7 @@ public class SWMCQueryBuilder {
lexemes.add(l); lexemes.add(l);
} }
}catch(IOException e){ }catch(IOException e){
e.printStackTrace(); logger.error(e.getMessage(), e);
} }
return lexemes; return lexemes;
} }
@ -87,7 +91,7 @@ public class SWMCQueryBuilder {
/** /**
* 根据分词结果生成SWMC搜索 * 根据分词结果生成SWMC搜索
* @param fieldName * @param fieldName
// * @param pathOption // * @param pathOption
* @param quickMode * @param quickMode
* @return * @return
*/ */
@ -135,7 +139,7 @@ public class SWMCQueryBuilder {
Query q = qp.parse(keywordBuffer_Short.toString()); Query q = qp.parse(keywordBuffer_Short.toString());
return q; return q;
} catch (ParseException e) { } catch (ParseException e) {
e.printStackTrace(); logger.error(e.getMessage(), e);
} }
}else{ }else{
@ -145,7 +149,7 @@ public class SWMCQueryBuilder {
Query q = qp.parse(keywordBuffer.toString()); Query q = qp.parse(keywordBuffer.toString());
return q; return q;
} catch (ParseException e) { } catch (ParseException e) {
e.printStackTrace(); logger.error(e.getMessage(), e);
} }
} }
} }

View File

@ -44,46 +44,46 @@ import org.wltea.analyzer.lucene.IKAnalyzer;
*/ */
public class IKAnalzyerDemo { public class IKAnalzyerDemo {
public static final ESLogger logger= Loggers.getLogger("ik-analyzer"); public static ESLogger logger= Loggers.getLogger("ik-analyzer");
public static void main(String[] args){ public static void main(String[] args){
//构建IK分词器使用smart分词模式 //构建IK分词器使用smart分词模式
Analyzer analyzer = new IKAnalyzer(true); Analyzer analyzer = new IKAnalyzer(true);
//获取Lucene的TokenStream对象 //获取Lucene的TokenStream对象
TokenStream ts = null; TokenStream ts = null;
try { try {
ts = analyzer.tokenStream("myfield", new StringReader("WORLD ,.. html DATA</html>HELLO")); ts = analyzer.tokenStream("myfield", new StringReader("WORLD ,.. html DATA</html>HELLO"));
// ts = analyzer.tokenStream("myfield", new StringReader("这是一个中文分词的例子你可以直接运行它IKAnalyer can analysis english text too")); // ts = analyzer.tokenStream("myfield", new StringReader("这是一个中文分词的例子你可以直接运行它IKAnalyer can analysis english text too"));
//获取词元位置属性 //获取词元位置属性
OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class); OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class);
//获取词元文本属性 //获取词元文本属性
CharTermAttribute term = ts.addAttribute(CharTermAttribute.class); CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
//获取词元文本属性 //获取词元文本属性
TypeAttribute type = ts.addAttribute(TypeAttribute.class); TypeAttribute type = ts.addAttribute(TypeAttribute.class);
//重置TokenStream重置StringReader //重置TokenStream重置StringReader
ts.reset(); ts.reset();
//迭代获取分词结果 //迭代获取分词结果
while (ts.incrementToken()) { while (ts.incrementToken()) {
logger.info(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString() + " | " + type.type()); System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString() + " | " + type.type());
} }
//关闭TokenStream关闭StringReader //关闭TokenStream关闭StringReader
ts.end(); // Perform end-of-stream operations, e.g. set the final offset. ts.end(); // Perform end-of-stream operations, e.g. set the final offset.
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); logger.error(e.getMessage(), e);
} finally { } finally {
//释放TokenStream的所有资源 //释放TokenStream的所有资源
if(ts != null){ if(ts != null){
try { try {
ts.close(); ts.close();
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); logger.error(e.getMessage(), e);
} }
} }
} }
} }

View File

@ -64,7 +64,7 @@ import org.wltea.analyzer.lucene.IKAnalyzer;
*/ */
public class LuceneIndexAndSearchDemo { public class LuceneIndexAndSearchDemo {
public static final ESLogger logger= Loggers.getLogger("ik-analyzer"); public static ESLogger logger= Loggers.getLogger("ik-analyzer");
/** /**
* 模拟 * 模拟
@ -74,7 +74,7 @@ public class LuceneIndexAndSearchDemo {
public static void main(String[] args){ public static void main(String[] args){
//Lucene Document的域名 //Lucene Document的域名
String fieldName = "text"; String fieldName = "text";
//检索内容 //检索内容
String text = "IK Analyzer是一个结合词典分词和文法分词的中文分词开源工具包。它使用了全新的正向迭代最细粒度切分算法。"; String text = "IK Analyzer是一个结合词典分词和文法分词的中文分词开源工具包。它使用了全新的正向迭代最细粒度切分算法。";
//实例化IKAnalyzer分词器 //实例化IKAnalyzer分词器
@ -101,7 +101,7 @@ public class LuceneIndexAndSearchDemo {
//搜索过程********************************** //搜索过程**********************************
//实例化搜索器 //实例化搜索器
ireader = DirectoryReader.open(directory); ireader = DirectoryReader.open(directory);
isearcher = new IndexSearcher(ireader); isearcher = new IndexSearcher(ireader);
@ -110,39 +110,39 @@ public class LuceneIndexAndSearchDemo {
QueryParser qp = new QueryParser(fieldName, analyzer); QueryParser qp = new QueryParser(fieldName, analyzer);
qp.setDefaultOperator(QueryParser.AND_OPERATOR); qp.setDefaultOperator(QueryParser.AND_OPERATOR);
Query query = qp.parse(keyword); Query query = qp.parse(keyword);
logger.info("Query = " + query); System.out.println("Query = " + query);
//搜索相似度最高的5条记录 //搜索相似度最高的5条记录
TopDocs topDocs = isearcher.search(query , 5); TopDocs topDocs = isearcher.search(query , 5);
logger.info("命中:" + topDocs.totalHits); System.out.println("命中:" + topDocs.totalHits);
//输出结果 //输出结果
ScoreDoc[] scoreDocs = topDocs.scoreDocs; ScoreDoc[] scoreDocs = topDocs.scoreDocs;
for (int i = 0; i < topDocs.totalHits; i++){ for (int i = 0; i < topDocs.totalHits; i++){
Document targetDoc = isearcher.doc(scoreDocs[i].doc); Document targetDoc = isearcher.doc(scoreDocs[i].doc);
logger.info("内容:" + targetDoc.toString()); System.out.println("内容:" + targetDoc.toString());
} }
} catch (CorruptIndexException e) { } catch (CorruptIndexException e) {
e.printStackTrace(); logger.error(e.getMessage(), e);
} catch (LockObtainFailedException e) { } catch (LockObtainFailedException e) {
e.printStackTrace(); logger.error(e.getMessage(), e);
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); logger.error(e.getMessage(), e);
} catch (ParseException e) { } catch (ParseException e) {
e.printStackTrace(); logger.error(e.getMessage(), e);
} finally{ } finally{
if(ireader != null){ if(ireader != null){
try { try {
ireader.close(); ireader.close();
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); logger.error(e.getMessage(), e);
} }
} }
if(directory != null){ if(directory != null){
try { try {
directory.close(); directory.close();
} catch (IOException e) { } catch (IOException e) {
e.printStackTrace(); logger.error(e.getMessage(), e);
} }
} }
} }