elasticsearch ik 0.20.x => 0.90.x

2013-05-09 13:46:25 +08:00 · 2013-05-09 13:46:25 +08:00 · 5e14e3d629
commit 5e14e3d629
parent a2dc3c7842
16 changed files with 1520 additions and 1580 deletions
--- a/pom.xml
+++ b/pom.xml
@ -31,7 +31,7 @@
    </parent>
    <properties>
-        <elasticsearch.version>0.20.2</elasticsearch.version>
+        <elasticsearch.version>0.90.0</elasticsearch.version>
    </properties>
  <repositories>
@ -132,4 +132,4 @@
            </plugin>
        </plugins>
    </build>
-</project>
+</project>
--- a/src/main/java/org/elasticsearch/index/analysis/IkAnalyzer.java
+++ b/src/main/java/org/elasticsearch/index/analysis/IkAnalyzer.java
@ -2,19 +2,32 @@ package org.elasticsearch.index.analysis;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.wltea.analyzer.lucene.IKTokenizer;
 //import org.wltea.lucene.IKTokenizer;
 import java.io.Reader;
 public class IkAnalyzer extends Analyzer {
-   
+//    private boolean isMaxWordLength = false;
-    @Override public TokenStream tokenStream(String fieldName, Reader reader) {            
+//    @Override public TokenStream tokenStream(String fieldName, Reader reader) {
-        return new IKTokenizer(reader,true);
+//        return new IKTokenizer(reader,true);
-    }
+//    }
    public IkAnalyzer() {
        super(); 
    }
    @Override
    protected TokenStreamComponents createComponents(String s, Reader reader) {
 //        new TokenStreamComponents
        Tokenizer tokenizer = new IKTokenizer(reader, true);
        return new TokenStreamComponents(tokenizer, null);  //To change body of implemented methods use File | Settings | File Templates.
    }
 //    public boolean isMaxWordLength() {
 //        return isMaxWordLength;
 //    }
 }
--- a/src/main/java/org/wltea/analyzer/core/AnalyzeContext.java
+++ b/src/main/java/org/wltea/analyzer/core/AnalyzeContext.java
@ -24,11 +24,16 @@
 */
 package org.wltea.analyzer.core;
 import org.wltea.analyzer.dic.Dictionary;
 import java.io.IOException;
 import java.io.Reader;
-import java.util.*;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.Map;
 import java.util.Set;
 import org.wltea.analyzer.cfg.Configuration;
 import org.wltea.analyzer.dic.Dictionary;
 /**
 * 
@ -68,12 +73,12 @@ class AnalyzeContext {
    private Map<Integer , LexemePath> pathMap;    
    //最终分词结果集
    private LinkedList<Lexeme> results;
-    
+    private boolean useSmart;
 	//分词器配置项
-	private boolean useSmart;
+//	private Configuration cfg;
-    
+
    public AnalyzeContext(boolean useSmart){
-    	this.useSmart = useSmart;
+        this.useSmart = useSmart;
    	this.segmentBuff = new char[BUFF_SIZE];
    	this.charTypes = new int[BUFF_SIZE];
    	this.buffLocker = new HashSet<String>();
@ -313,7 +318,7 @@ class AnalyzeContext {
 		while(result != null){
    		//数量词合并
    		this.compound(result);
-    		if(Dictionary.isStopWord(this.segmentBuff ,  result.getBegin() , result.getLength())){
+    		if(Dictionary.getSingleton().isStopWord(this.segmentBuff ,  result.getBegin() , result.getLength())){
       			//是停止词继续取列表的下一个
    			result = this.results.pollFirst(); 				
    		}else{
@ -344,6 +349,7 @@ class AnalyzeContext {
 	 * 组合词元
 	 */
 	private void compound(Lexeme result){
 		if(!this.useSmart){
 			return ;
 		}
--- a/src/main/java/org/wltea/analyzer/core/CJKSegmenter.java
+++ b/src/main/java/org/wltea/analyzer/core/CJKSegmenter.java
@ -25,12 +25,12 @@
 */
 package org.wltea.analyzer.core;
 import org.wltea.analyzer.dic.Dictionary;
 import org.wltea.analyzer.dic.Hit;
 import java.util.LinkedList;
 import java.util.List;
 import org.wltea.analyzer.dic.Dictionary;
 import org.wltea.analyzer.dic.Hit;
 /**
 *  中文-日韩文子分词器
@ -58,7 +58,7 @@ class CJKSegmenter implements ISegmenter {
 				//处理词段队列
 				Hit[] tmpArray = this.tmpHits.toArray(new Hit[this.tmpHits.size()]);
 				for(Hit hit : tmpArray){
-					hit = Dictionary.matchInMainDictWithHit(context.getSegmentBuff(), context.getCursor() , hit);
+					hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
 					if(hit.isMatch()){
 						//输出当前的词
 						Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_CNWORD);
@ -77,7 +77,7 @@ class CJKSegmenter implements ISegmenter {
 			//*********************************
 			//再对当前指针位置的字符进行单字匹配
-			Hit singleCharHit = Dictionary.matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
+			Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
 			if(singleCharHit.isMatch()){//首字成词
 				//输出当前的词
 				Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_CNWORD);
--- a/src/main/java/org/wltea/analyzer/core/CN_QuantifierSegmenter.java
+++ b/src/main/java/org/wltea/analyzer/core/CN_QuantifierSegmenter.java
@ -24,14 +24,14 @@
 */
 package org.wltea.analyzer.core;
 import org.wltea.analyzer.dic.Dictionary;
 import org.wltea.analyzer.dic.Hit;
 import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Set;
 import org.wltea.analyzer.dic.Dictionary;
 import org.wltea.analyzer.dic.Hit;
 /**
 * 
 * 中文数量词子分词器
@ -155,7 +155,7 @@ class CN_QuantifierSegmenter implements ISegmenter{
 				//处理词段队列
 				Hit[] tmpArray = this.countHits.toArray(new Hit[this.countHits.size()]);
 				for(Hit hit : tmpArray){
-					hit = Dictionary.matchInMainDictWithHit(context.getSegmentBuff(), context.getCursor() , hit);
+					hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
 					if(hit.isMatch()){
 						//输出当前的词
 						Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_COUNT);
@ -174,7 +174,7 @@ class CN_QuantifierSegmenter implements ISegmenter{
 			//*********************************
 			//对当前指针位置的字符进行单字匹配
-			Hit singleCharHit = Dictionary.matchInQuantifierDict(context.getSegmentBuff(), context.getCursor(), 1);
+			Hit singleCharHit = Dictionary.getSingleton().matchInQuantifierDict(context.getSegmentBuff(), context.getCursor(), 1);
 			if(singleCharHit.isMatch()){//首字成量词词
 				//输出当前的词
 				Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_COUNT);
--- a/src/main/java/org/wltea/analyzer/core/IKArbitrator.java
+++ b/src/main/java/org/wltea/analyzer/core/IKArbitrator.java
@ -38,7 +38,7 @@ class IKArbitrator {
 	/**
 	 * 分词歧义处理
-	 * @param orgLexemes
+//	 * @param orgLexemes
 	 * @param useSmart
 	 */
 	void process(AnalyzeContext context , boolean useSmart){
@ -87,7 +87,6 @@ class IKArbitrator {
 	 * 歧义识别
 	 * @param lexemeCell 歧义路径链表头
 	 * @param fullTextLength 歧义路径文本长度
 	 * @param option 候选结果路径
 	 * @return
 	 */
 	private LexemePath judge(QuickSortSet.Cell lexemeCell , int fullTextLength){
@ -120,7 +119,7 @@ class IKArbitrator {
 	/**
 	 * 向前遍历，添加词元，构造一个无歧义词元组合
-	 * @param LexemePath path
+//	 * @param LexemePath path
 	 * @return
 	 */
 	private Stack<QuickSortSet.Cell> forwardPath(QuickSortSet.Cell lexemeCell , LexemePath option){
@ -140,7 +139,7 @@ class IKArbitrator {
 	/**
 	 * 回滚词元链，直到它能够接受指定的词元
-	 * @param lexeme 
+//	 * @param lexeme
 	 * @param l
 	 */
 	private void backPath(Lexeme l  , LexemePath option){
--- a/src/main/java/org/wltea/analyzer/core/IKSegmenter.java
+++ b/src/main/java/org/wltea/analyzer/core/IKSegmenter.java
@ -23,14 +23,15 @@
 */
 package org.wltea.analyzer.core;
 import org.elasticsearch.common.logging.ESLogger;
 import org.elasticsearch.common.logging.Loggers;
 import java.io.IOException;
 import java.io.Reader;
 import java.util.ArrayList;
 import java.util.List;
 import org.wltea.analyzer.cfg.Configuration;
 //import org.wltea.analyzer.cfg.DefaultConfig;
 import org.wltea.analyzer.dic.Dictionary;
 /**
 * IK分词器主类
 *
@ -39,16 +40,18 @@ public final class IKSegmenter {
 	//字符窜reader
 	private Reader input;
 	//分词器配置项
 	private Configuration cfg;
 	//分词器上下文
 	private AnalyzeContext context;
 	//分词处理器列表
 	private List<ISegmenter> segmenters;
 	//分词歧义裁决器
 	private IKArbitrator arbitrator;
-	 private ESLogger logger=null;
+    private  boolean useSmart = false;
-    private final boolean useSmart;
+	
-    /**
+	/**
 	 * IK分词器构造函数
 	 * @param input 
 	 * @param useSmart 为true，使用智能分词策略
@ -57,16 +60,31 @@ public final class IKSegmenter {
 	 * 智能分词： 合并数词和量词，对分词结果进行歧义判断
 	 */
 	public IKSegmenter(Reader input , boolean useSmart){
        logger = Loggers.getLogger("ik-analyzer");
 		this.input = input;
 //		this.cfg = DefaultConfig.getInstance();
        this.useSmart=useSmart;
-        this.init();
+		this.init();
 	}
 	/**
 	 * IK分词器构造函数
 	 * @param input
 	 * @param cfg 使用自定义的Configuration构造分词器
 	 * 
 	 */
 	public IKSegmenter(Reader input , Configuration cfg){
 		this.input = input;
 		this.cfg = cfg;
 		this.init();
 	}
 	/**
 	 * 初始化
 	 */
 	private void init(){
 		//初始化词典单例
 //		Dictionary.initial(this.cfg);
 //        Dictionary.getSingleton();
 		//初始化分词上下文
 		this.context = new AnalyzeContext(useSmart);
 		//加载子分词器
--- a/src/main/java/org/wltea/analyzer/core/LetterSegmenter.java
+++ b/src/main/java/org/wltea/analyzer/core/LetterSegmenter.java
@ -120,7 +120,7 @@ class LetterSegmenter implements ISegmenter {
 	/**
 	 * 处理数字字母混合输出
 	 * 如：windos2000 | linliangyi2005@gmail.com
-	 * @param input
+//	 * @param input
 	 * @param context
 	 * @return
 	 */
--- a/src/main/java/org/wltea/analyzer/dic/DictSegment.java
+++ b/src/main/java/org/wltea/analyzer/dic/DictSegment.java
@ -326,13 +326,5 @@ class DictSegment implements Comparable<DictSegment>{
 		//对当前节点存储的char进行比较
 		return this.nodeChar.compareTo(o.nodeChar);
 	}
-
+	
    public int getDicNum(){
        if(charMap!=null)
        {
            return charMap.size();
        }
        return 0;
    }
 }
--- a/src/main/java/org/wltea/analyzer/dic/Dictionary.java
+++ b/src/main/java/org/wltea/analyzer/dic/Dictionary.java
@ -1,74 +1,233 @@
 /**
 * IK 中文分词  版本 5.0
 * IK Analyzer release 5.0
 * 
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * 源代码由林良益(linliangyi2005@gmail.com)提供
 * 版权声明 2012，乌龙茶工作室
 * provided by Linliangyi and copyright 2012 by Oolong studio
 * 
 * 
 */
 package org.wltea.analyzer.dic;
 import java.io.*;
 import java.util.Collection;
 import java.util.List;
 import org.elasticsearch.common.logging.ESLogger;
 import org.elasticsearch.common.logging.Loggers;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.wltea.analyzer.cfg.Configuration;
-import java.io.*;
+/**
-import java.util.Collection;
+ * 词典管理类,单子模式
-import java.util.List;
+ */
 public class Dictionary {
 	public static final String PATH_DIC_MAIN = "ik/main.dic";
 	public static final String PATH_DIC_SURNAME = "ik/surname.dic";
 	public static final String PATH_DIC_QUANTIFIER = "ik/quantifier.dic";
 	public static final String PATH_DIC_SUFFIX = "ik/suffix.dic";
 	public static final String PATH_DIC_PREP = "ik/preposition.dic";
 	public static final String PATH_DIC_STOP = "ik/stopword.dic";
 	private static final Dictionary singleton;
 	static{
 		singleton = new Dictionary();
 	}
 	/*
 	 * 词典单子实例
 	 */
 	private static Dictionary singleton;
 	/*
 	 * 主词典对象
 	 */
 	private DictSegment _MainDict;
-
+	
-	private DictSegment _SurnameDict;
+	/*
-
+	 * 停止词词典 
 	 */
 	private DictSegment _StopWordDict;
 	/*
 	 * 量词词典
 	 */
 	private DictSegment _QuantifierDict;
-
+	
-	private DictSegment _SuffixDict;
+	/**
-
+	 * 配置对象
-	private DictSegment _PrepDict;
+	 */
-
+	private Configuration configuration;
 	private DictSegment _StopWords;
    private Environment environment;
    private Configuration configuration;
    private ESLogger logger=null;
    private static boolean dictInited=false;
-	private Dictionary(){
+    private Environment environment;
    public static final String PATH_DIC_MAIN = "ik/main.dic";
    public static final String PATH_DIC_SURNAME = "ik/surname.dic";
    public static final String PATH_DIC_QUANTIFIER = "ik/quantifier.dic";
    public static final String PATH_DIC_SUFFIX = "ik/suffix.dic";
    public static final String PATH_DIC_PREP = "ik/preposition.dic";
    public static final String PATH_DIC_STOP = "ik/stopword.dic";
    private Dictionary(){
        logger = Loggers.getLogger("ik-analyzer");
-	}
+    }
-
+    static{
-   public Configuration getConfig(){
+        singleton = new Dictionary();
-       return  configuration;
+    }
-   }
+//    public Configuration getConfig(){
 //        return  configuration;
 //    }
 //	private Dictionary(Configuration cfg){
 //		this.cfg = cfg;
 //		this.loadMainDict();
 //		this.loadStopWordDict();
 //		this.loadQuantifierDict();
 //	}
    public void Init(Settings indexSettings){
-            if(!dictInited){
+        if(!dictInited){
-                environment =new Environment(indexSettings);
+            environment =new Environment(indexSettings);
-                configuration=new Configuration(indexSettings);
+            configuration=new Configuration(indexSettings);
-                loadMainDict();
+            loadMainDict();
-                loadSurnameDict();
+//            loadSurnameDict();
-                loadQuantifierDict();
+            loadQuantifierDict();
-                loadSuffixDict();
+//            loadSuffixDict();
-                loadPrepDict();
+//            loadPrepDict();
-                loadStopWordDict();
+            loadStopWordDict();
-                dictInited=true;
+            dictInited=true;
-            }
+        }
    }
 	/**
 	 * 词典初始化
 	 * 由于IK Analyzer的词典采用Dictionary类的静态方法进行词典初始化
 	 * 只有当Dictionary类被实际调用时，才会开始载入词典，
 	 * 这将延长首次分词操作的时间
 	 * 该方法提供了一个在应用加载阶段就初始化字典的手段
 	 * @return Dictionary
 	 */
 //	public static Dictionary initial(Configuration cfg){
 //		if(singleton == null){
 //			synchronized(Dictionary.class){
 //				if(singleton == null){
 //					singleton = new Dictionary();
 //					return singleton;
 //				}
 //			}
 //		}
 //		return singleton;
 //	}
 	/**
 	 * 获取词典单子实例
 	 * @return Dictionary 单例对象
 	 */
 	public static Dictionary getSingleton(){
 		if(singleton == null){
 			throw new IllegalStateException("词典尚未初始化，请先调用initial方法");
 		}
 		return singleton;
 	}
 	/**
 	 * 批量加载新词条
 	 * @param words Collection<String>词条列表
 	 */
 	public void addWords(Collection<String> words){
 		if(words != null){
 			for(String word : words){
 				if (word != null) {
 					//批量加载词条到主内存词典中
 					singleton._MainDict.fillSegment(word.trim().toLowerCase().toCharArray());
 				}
 			}
 		}
 	}
 	/**
 	 * 批量移除（屏蔽）词条
 	 * @param words
 	 */
 	public void disableWords(Collection<String> words){
 		if(words != null){
 			for(String word : words){
 				if (word != null) {
 					//批量屏蔽词条
 					singleton._MainDict.disableSegment(word.trim().toLowerCase().toCharArray());
 				}
 			}
 		}
 	}
 	/**
 	 * 检索匹配主词典
 	 * @param charArray
 	 * @return Hit 匹配结果描述
 	 */
 	public Hit matchInMainDict(char[] charArray){
 		return singleton._MainDict.match(charArray);
 	}
 	/**
 	 * 检索匹配主词典
 	 * @param charArray
 	 * @param begin
 	 * @param length
 	 * @return Hit 匹配结果描述
 	 */
 	public Hit matchInMainDict(char[] charArray , int begin, int length){
 		return singleton._MainDict.match(charArray, begin, length);
 	}
 	/**
 	 * 检索匹配量词词典
 	 * @param charArray
 	 * @param begin
 	 * @param length
 	 * @return Hit 匹配结果描述
 	 */
 	public Hit matchInQuantifierDict(char[] charArray , int begin, int length){
 		return singleton._QuantifierDict.match(charArray, begin, length);
 	}
 	/**
 	 * 从已匹配的Hit中直接取出DictSegment，继续向下匹配
 	 * @param charArray
 	 * @param currentIndex
 	 * @param matchedHit
 	 * @return Hit
 	 */
 	public Hit matchWithHit(char[] charArray , int currentIndex , Hit matchedHit){
 		DictSegment ds = matchedHit.getMatchedDictSegment();
 		return ds.match(charArray, currentIndex, 1 , matchedHit);
 	}
 	/**
 	 * 判断是否是停止词
 	 * @param charArray
 	 * @param begin
 	 * @param length
 	 * @return boolean
 	 */
 	public boolean isStopWord(char[] charArray , int begin, int length){			
 		return singleton._StopWordDict.match(charArray, begin, length).isMatch();
 	}	
 	/**
 	 * 加载主词典及扩展词典
 	 */
 	private void loadMainDict(){
 		//建立一个主词典实例
 		_MainDict = new DictSegment((char)0);
-
+		//读取主词典文件
        File file= new File(environment.configFile(), Dictionary.PATH_DIC_MAIN);
        InputStream is = null;
@ -77,24 +236,21 @@ public class Dictionary {
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
-        if(is == null){
+        
        	throw new RuntimeException("Main Dictionary not found!!!");
        }
 		try {
 			BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
-			String theWord;
+			String theWord = null;
 			do {
 				theWord = br.readLine();
 				if (theWord != null && !"".equals(theWord.trim())) {
-					_MainDict.fillSegment(theWord.trim().toCharArray());
+					_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
 				}
 			} while (theWord != null);
-         logger.info("[Dict Loading] {},MainDict Size:{}",file.toString(),_MainDict.getDicNum());
+			
 		} catch (IOException ioe) {
 			System.err.println("Main Dictionary loading exception.");
 			ioe.printStackTrace();
-
+			
 		}finally{
 			try {
 				if(is != null){
@ -105,41 +261,42 @@ public class Dictionary {
 				e.printStackTrace();
 			}
 		}
-
+		//加载扩展词典
-
+		this.loadExtDict();
 	}	
 	/**
 	 * 加载用户配置的扩展词典到主词库表
 	 */
 	private void loadExtDict(){
 		//加载扩展词典配置
 		List<String> extDictFiles  = configuration.getExtDictionarys();
 		if(extDictFiles != null){
 			InputStream is = null;
 			for(String extDictName : extDictFiles){
-
+				//读取扩展词典文件
-                File tempFile=new File(environment.configFile(),extDictName);
+				System.out.println("加载扩展词典：" + extDictName);
-
+				is = this.getClass().getClassLoader().getResourceAsStream(extDictName);
-                try {
+				//如果找不到扩展的字典，则忽略
-                    is = new FileInputStream(tempFile);
+				if(is == null){
                } catch (FileNotFoundException e) {
                    e.printStackTrace();
                    logger.error("[Dict Loading]",e);
                }
                if(is == null){
 					continue;
 				}
 				try {
 					BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
-					String theWord;
+					String theWord = null;
 					do {
 						theWord = br.readLine();
 						if (theWord != null && !"".equals(theWord.trim())) {
-
+							//加载扩展词典数据到主内存词典中
-
+							//System.out.println(theWord);
 							_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
 						}
 					} while (theWord != null);
-                 logger.info("[Dict Loading] {},MainDict Size:{}",tempFile.toString(),_MainDict.getDicNum());
+					
 				} catch (IOException ioe) {
 					System.err.println("Extension Dictionary loading exception.");
 					ioe.printStackTrace();
-
+					
 				}finally{
 					try {
 						if(is != null){
@ -151,77 +308,85 @@ public class Dictionary {
 					}
 				}
 			}
-		}
+		}		
 	}
-
+	
-
+	/**
-	private void loadSurnameDict(){
+	 * 加载用户扩展的停止词词典
-
+	 */
-		_SurnameDict = new DictSegment((char)0);
+	private void loadStopWordDict(){
-        File file=new File(environment.configFile(),Dictionary.PATH_DIC_SURNAME);
+		//建立一个主词典实例
-        InputStream is = null;
+		_StopWordDict = new DictSegment((char)0);
-        try {
+		//加载扩展停止词典
-            is = new FileInputStream(file);
+		List<String> extStopWordDictFiles  = configuration.getExtStopWordDictionarys();
-        } catch (FileNotFoundException e) {
+		if(extStopWordDictFiles != null){
-            e.printStackTrace();
+			InputStream is = null;
-        }
+			for(String extStopWordDictName : extStopWordDictFiles){
-        if(is == null){
+				System.out.println("加载扩展停止词典：" + extStopWordDictName);
-        	throw new RuntimeException("Surname Dictionary not found!!!");
+				//读取扩展词典文件
-        }
+				is = this.getClass().getClassLoader().getResourceAsStream(extStopWordDictName);
-		try {
+				//如果找不到扩展的字典，则忽略
-			BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
+				if(is == null){
-			String theWord;
+					continue;
 			do {
 				theWord = br.readLine();
 				if (theWord != null && !"".equals(theWord.trim())) {
 					_SurnameDict.fillSegment(theWord.trim().toCharArray());
 				}
-			} while (theWord != null);
+				try {
-         logger.info("[Dict Loading] {},SurnameDict Size:{}",file.toString(),_SurnameDict.getDicNum());
+					BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
-		} catch (IOException ioe) {
+					String theWord = null;
-			System.err.println("Surname Dictionary loading exception.");
+					do {
-			ioe.printStackTrace();
+						theWord = br.readLine();
-
+						if (theWord != null && !"".equals(theWord.trim())) {
-		}finally{
+							//System.out.println(theWord);
-			try {
+							//加载扩展停止词典数据到内存中
-				if(is != null){
+							_StopWordDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
-                    is.close();
+						}
-                    is = null;
+					} while (theWord != null);
 				} catch (IOException ioe) {
 					System.err.println("Extension Stop word Dictionary loading exception.");
 					ioe.printStackTrace();
 				}finally{
 					try {
 						if(is != null){
 		                    is.close();
 		                    is = null;
 						}
 					} catch (IOException e) {
 						e.printStackTrace();
 					}
 				}
 			} catch (IOException e) {
 				e.printStackTrace();
 			}
-		}
+		}		
 	}
-
+	
-
+	/**
 	 * 加载量词词典
 	 */
 	private void loadQuantifierDict(){
-
+		//建立一个量词典实例
 		_QuantifierDict = new DictSegment((char)0);
 		//读取量词词典文件
        File file=new File(environment.configFile(),Dictionary.PATH_DIC_QUANTIFIER);
        InputStream is = null;
        try {
            is = new FileInputStream(file);
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        if(is == null){
        	throw new RuntimeException("Quantifier Dictionary not found!!!");
        }
 		try {
 			BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
-			String theWord;
+			String theWord = null;
 			do {
 				theWord = br.readLine();
 				if (theWord != null && !"".equals(theWord.trim())) {
-					_QuantifierDict.fillSegment(theWord.trim().toCharArray());
+					_QuantifierDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
 				}
 			} while (theWord != null);
-        logger.info("[Dict Loading] {},QuantifierDict Size:{}",file.toString(),_QuantifierDict.getDicNum());
+			
 		} catch (IOException ioe) {
 			System.err.println("Quantifier Dictionary loading exception.");
 			ioe.printStackTrace();
-
+			
 		}finally{
 			try {
 				if(is != null){
@ -235,304 +400,8 @@ public class Dictionary {
 	}
-	private void loadSuffixDict(){
+    public static Dictionary getInstance(){
-
+        return Dictionary.singleton;
-		_SuffixDict = new DictSegment((char)0);
+    }
        File file=new File(environment.configFile(),Dictionary.PATH_DIC_SUFFIX);
        InputStream is = null;
        try {
            is = new FileInputStream(file);
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        if(is == null){
        	throw new RuntimeException("Suffix Dictionary not found!!!");
        }
 		try {
 			BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
 			String theWord;
 			do {
 				theWord = br.readLine();
 				if (theWord != null && !"".equals(theWord.trim())) {
 					_SuffixDict.fillSegment(theWord.trim().toCharArray());
 				}
 			} while (theWord != null);
            logger.info("[Dict Loading] {},SuffixDict Size:{}",file.toString(),_SuffixDict.getDicNum());
 		} catch (IOException ioe) {
 			System.err.println("Suffix Dictionary loading exception.");
 			ioe.printStackTrace();
 		}finally{
 			try {
 				if(is != null){
                    is.close();
                    is = null;
 				}
 			} catch (IOException e) {
 				e.printStackTrace();
 			}
 		}
 	}
 	private void loadPrepDict(){
 		_PrepDict = new DictSegment((char)0);
        File file=new File(environment.configFile(),Dictionary.PATH_DIC_PREP);
        InputStream is = null;
        try {
            is = new FileInputStream(file);
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        if(is == null){
        	throw new RuntimeException("Preposition Dictionary not found!!!");
        }
 		try {
            BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
 			String theWord;
 			do {
 				theWord = br.readLine();
 				if (theWord != null && !"".equals(theWord.trim())) {
 					_PrepDict.fillSegment(theWord.trim().toCharArray());
 				}
 			} while (theWord != null);
            logger.info("[Dict Loading] {},PrepDict Size:{}",file.toString(),_PrepDict.getDicNum());
 		} catch (IOException ioe) {
 			System.err.println("Preposition Dictionary loading exception.");
 			ioe.printStackTrace();
 		}finally{
 			try {
 				if(is != null){
                    is.close();
                    is = null;
 				}
 			} catch (IOException e) {
 				e.printStackTrace();
 			}
 		}
 	}
 	private void loadStopWordDict(){
 		_StopWords = new DictSegment((char)0);
        File file=new File(environment.configFile(),Dictionary.PATH_DIC_STOP);
        InputStream is = null;
        try {
            is = new FileInputStream(file);
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        if(is == null){
        	throw new RuntimeException("Stopword Dictionary not found!!!");
        }
 		try {
            BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
 			String theWord;
 			do {
 				theWord = br.readLine();
 				if (theWord != null && !"".equals(theWord.trim())) {
 					_StopWords.fillSegment(theWord.trim().toCharArray());
 				}
 			} while (theWord != null);
            logger.info("[Dict Loading] {},Stopwords Size:{}",file.toString(),_StopWords.getDicNum());
 		} catch (IOException ioe) {
 			System.err.println("Stopword Dictionary loading exception.");
 			ioe.printStackTrace();
 		}finally{
 			try {
 				if(is != null){
                    is.close();
                    is = null;
 				}
 			} catch (IOException e) {
 				e.printStackTrace();
 			}
 		}
 		List<String> extStopWordDictFiles  = configuration.getExtStopWordDictionarys();
 		if(extStopWordDictFiles != null){
 			for(String extStopWordDictName : extStopWordDictFiles){
                File tempFile=new File(environment.configFile(),extStopWordDictName);
                try {
                    is = new FileInputStream(tempFile);
                } catch (FileNotFoundException e) {
                    e.printStackTrace();
                }
                if(is == null){
 					continue;
 				}
 				try {
                    BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
 					String theWord;
 					do {
 						theWord = br.readLine();
 						if (theWord != null && !"".equals(theWord.trim())) {
 							_StopWords.fillSegment(theWord.trim().toCharArray());
 						}
 					} while (theWord != null);
                    logger.info("[Dict Loading] {},Stopwords Size:{}",tempFile.toString(),_StopWords.getDicNum());
 				} catch (IOException ioe) {
 					System.err.println("Extension Stop word Dictionary loading exception.");
 					ioe.printStackTrace();
 				}finally{
 					try {
 						if(is != null){
 		                    is.close();
 		                    is = null;
 						}
 					} catch (IOException e) {
 						e.printStackTrace();
 					}
 				}
 			}
 		}
 	}
 	public static Dictionary getInstance(){
 		return Dictionary.singleton;
 	}
 	public static void loadExtendWords(Collection<String> extWords){
 		if(extWords != null){
 			for(String extWord : extWords){
 				if (extWord != null) {
 					singleton._MainDict.fillSegment(extWord.trim().toCharArray());
 				}
 			}
 		}
 	}
 	public static void loadExtendStopWords(Collection<String> extStopWords){
 		if(extStopWords != null){
 			for(String extStopWord : extStopWords){
 				if (extStopWord != null) {
 					singleton._StopWords.fillSegment(extStopWord.trim().toCharArray());
 				}
 			}
 		}
 	}
 	public static Hit matchInMainDict(char[] charArray){
 		return singleton._MainDict.match(charArray);
 	}
 	public static Hit matchInMainDict(char[] charArray , int begin, int length){
 		return singleton._MainDict.match(charArray, begin, length);
 	}
 	public static Hit matchInMainDictWithHit(char[] charArray , int currentIndex , Hit matchedHit){
 		DictSegment ds = matchedHit.getMatchedDictSegment();
 		return ds.match(charArray, currentIndex, 1 , matchedHit);
 	}
 	public static Hit matchInSurnameDict(char[] charArray , int begin, int length){
 		return singleton._SurnameDict.match(charArray, begin, length);
 	}
 	/**
 	 * 检索匹配量词词典
 	 * @param charArray
 	 * @param begin
 	 * @param length
 	 * @return Hit 匹配结果描述
 	 */
 	public static Hit matchInQuantifierDict(char[] charArray , int begin, int length){
 		return singleton._QuantifierDict.match(charArray, begin, length);
 	}
 	/**
 	 * 检索匹配在后缀词典
 	 * @param charArray
 	 * @param begin
 	 * @param length
 	 * @return Hit 匹配结果描述
 	 */
 	public static Hit matchInSuffixDict(char[] charArray , int begin, int length){
 		return singleton._SuffixDict.match(charArray, begin, length);
 	}
 	/**
 	 * 检索匹配介词、副词词典
 	 * @param charArray
 	 * @param begin
 	 * @param length
 	 * @return  Hit 匹配结果描述
 	 */
 	public static Hit matchInPrepDict(char[] charArray , int begin, int length){
 		return singleton._PrepDict.match(charArray, begin, length);
 	}
 	/**
 	 * 判断是否是停止词
 	 * @param charArray
 	 * @param begin
 	 * @param length
 	 * @return boolean
 	 */
 	public static boolean isStopWord(char[] charArray , int begin, int length){			
 		return singleton._StopWords.match(charArray, begin, length).isMatch();
 	}	
 }
--- a/src/main/java/org/wltea/analyzer/dic/Hit.java
+++ b/src/main/java/org/wltea/analyzer/dic/Hit.java
@ -58,7 +58,9 @@ public class Hit {
 	public boolean isMatch() {
 		return (this.hitState & MATCH) > 0;
 	}
-
+	/**
 	 * 
 	 */
 	public void setMatch() {
 		this.hitState = this.hitState | MATCH;
 	}
@ -69,7 +71,9 @@ public class Hit {
 	public boolean isPrefix() {
 		return (this.hitState & PREFIX) > 0;
 	}
-
+	/**
 	 * 
 	 */
 	public void setPrefix() {
 		this.hitState = this.hitState | PREFIX;
 	}
@ -79,7 +83,9 @@ public class Hit {
 	public boolean isUnmatch() {
 		return this.hitState == UNMATCH ;
 	}
-
+	/**
 	 * 
 	 */
 	public void setUnmatch() {
 		this.hitState = UNMATCH;
 	}
--- a/src/main/java/org/wltea/analyzer/lucene/IKAnalyzer.java
+++ b/src/main/java/org/wltea/analyzer/lucene/IKAnalyzer.java
@ -1,51 +1,87 @@
 /**
 * IK 中文分词  版本 5.0.1
 * IK Analyzer release 5.0.1
 * 
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * 源代码由林良益(linliangyi2005@gmail.com)提供
 * 版权声明 2012，乌龙茶工作室
 * provided by Linliangyi and copyright 2012 by Oolong studio
 * 
 */
 package org.wltea.analyzer.lucene;
 import java.io.Reader;
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
 import org.elasticsearch.common.settings.Settings;
 import org.wltea.analyzer.dic.Dictionary;
-import java.io.Reader;
+/**
-
+ * IK分词器，Lucene Analyzer接口实现
-public final class IKAnalyzer extends Analyzer {
+ * 兼容Lucene 4.0版本
 */
 public final class IKAnalyzer extends Analyzer{
-	private boolean isMaxWordLength = false;
+	private boolean useSmart;
-    private boolean useSmart=false;
+	
 	public boolean useSmart() {
 		return useSmart;
 	}
-    public IKAnalyzer(){
+	public void setUseSmart(boolean useSmart) {
 		this.useSmart = useSmart;
 	}
 	/**
 	 * IK分词器Lucene  Analyzer接口实现类
 	 * 
 	 * 默认细粒度切分算法
 	 */
 	public IKAnalyzer(){
 		this(false);
 	}
-
+	/**
-	public IKAnalyzer(boolean isMaxWordLength){
+	 * IK分词器Lucene Analyzer接口实现类
 	 * 
 	 * @param useSmart 当为true时，分词器进行智能切分
 	 */
 	public IKAnalyzer(boolean useSmart){
 		super();
-		this.setMaxWordLength(isMaxWordLength);
+		this.useSmart = useSmart;
 	}
    public IKAnalyzer(Settings indexSetting,Settings settings1) {
        super();
-       Dictionary.getInstance().Init(indexSetting);
+        Dictionary.getInstance().Init(indexSetting);
        if(settings1.get("use_smart", "true").equals("true")){
-            useSmart=true;
+            useSmart = true;
        }
    }
-
+	/**
-    @Override
+	 * 重载Analyzer接口，构造分词组件
-	public TokenStream tokenStream(String fieldName, Reader reader) {
+	 */
-		return new IKTokenizer(reader , useSmart);
+	@Override
-	}
+	protected TokenStreamComponents createComponents(String fieldName, final Reader in) {
-
+		Tokenizer _IKTokenizer = new IKTokenizer(in , this.useSmart());
-	public void setMaxWordLength(boolean isMaxWordLength) {
+		return new TokenStreamComponents(_IKTokenizer);
 		this.isMaxWordLength = isMaxWordLength;
 	}
 	public boolean isMaxWordLength() {
 		return isMaxWordLength;
 	}
 }
--- a/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java
+++ b/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java
@ -1,7 +1,7 @@
 /**
 * IK 中文分词  版本 5.0.1
 * IK Analyzer release 5.0.1
- *
+ * 
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
@ -20,94 +20,95 @@
 * 源代码由林良益(linliangyi2005@gmail.com)提供
 * 版权声明 2012，乌龙茶工作室
 * provided by Linliangyi and copyright 2012 by Oolong studio
- *
+ * 
- *
+ * 
 */
 package org.wltea.analyzer.lucene;
 import java.io.IOException;
 import java.io.Reader;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.wltea.analyzer.core.IKSegmenter;
 import org.wltea.analyzer.core.Lexeme;
 import java.io.IOException;
 import java.io.Reader;
 /**
 * IK分词器 Lucene Tokenizer适配器类
 * 兼容Lucene 4.0版本
 */
 public final class IKTokenizer extends Tokenizer {
 	//IK分词器实现
 	private IKSegmenter _IKImplement;
 	//词元文本属性
 	private final CharTermAttribute termAtt;
 	//词元位移属性
 	private final OffsetAttribute offsetAtt;
 	//词元分类属性（该属性分类参考org.wltea.analyzer.core.Lexeme中的分类常量）
 	private final TypeAttribute typeAtt;
 	//记录最后一个词元的结束位置
 	private int endPosition;
 	/**
 	 * Lucene 4.0 Tokenizer适配器类构造函数
 	 * @param in
 	 * @param useSmart
 	 */
 	public IKTokenizer(Reader in , boolean useSmart){
 	    super(in);
 	    offsetAtt = addAttribute(OffsetAttribute.class);
 	    termAtt = addAttribute(CharTermAttribute.class);
 	    typeAtt = addAttribute(TypeAttribute.class);
 		_IKImplement = new IKSegmenter(input , useSmart);
 	}
-    //IK分词器实现
+	/* (non-Javadoc)
-    private IKSegmenter _IKImplement;
+	 * @see org.apache.lucene.analysis.TokenStream#incrementToken()
-
+	 */
-    //词元文本属性
+	@Override
-    private final CharTermAttribute termAtt;
+	public boolean incrementToken() throws IOException {
-    //词元位移属性
+		//清除所有的词元属性
-    private final OffsetAttribute offsetAtt;
+		clearAttributes();
-    //词元分类属性（该属性分类参考org.wltea.analyzer.core.Lexeme中的分类常量）
+		Lexeme nextLexeme = _IKImplement.next();
-    private final TypeAttribute typeAtt;
+		if(nextLexeme != null){
-    //记录最后一个词元的结束位置
+			//将Lexeme转成Attributes
-    private int endPosition;
+			//设置词元文本
-
+			termAtt.append(nextLexeme.getLexemeText());
-    /**
+			//设置词元长度
-     * Lucene 4.0 Tokenizer适配器类构造函数
+			termAtt.setLength(nextLexeme.getLength());
-     * @param in
+			//设置词元位移
-     * @param useSmart
+			offsetAtt.setOffset(nextLexeme.getBeginPosition(), nextLexeme.getEndPosition());
-     */
+			//记录分词的最后位置
-    public IKTokenizer(Reader in , boolean useSmart){
+			endPosition = nextLexeme.getEndPosition();
-        super(in);
+			//记录词元分类
-        offsetAtt = addAttribute(OffsetAttribute.class);
+			typeAtt.setType(nextLexeme.getLexemeTypeString());			
-        termAtt = addAttribute(CharTermAttribute.class);
+			//返会true告知还有下个词元
-        typeAtt = addAttribute(TypeAttribute.class);
+			return true;
-        _IKImplement = new IKSegmenter(input , useSmart);
+		}
-    }
+		//返会false告知词元输出完毕
-
+		return false;
-    /* (non-Javadoc)
+	}
-     * @see org.apache.lucene.analysis.TokenStream#incrementToken()
+	
-     */
+	/*
-    @Override
+	 * (non-Javadoc)
-    public boolean incrementToken() throws IOException {
+	 * @see org.apache.lucene.analysis.Tokenizer#reset(java.io.Reader)
-        //清除所有的词元属性
+	 */
-        clearAttributes();
+	@Override
-        Lexeme nextLexeme = _IKImplement.next();
+	public void reset() throws IOException {
-        if(nextLexeme != null){
+		super.reset();
-            //将Lexeme转成Attributes
+		_IKImplement.reset(input);
-            //设置词元文本
+	}	
-            termAtt.append(nextLexeme.getLexemeText());
+	
-            //设置词元长度
+	@Override
-            termAtt.setLength(nextLexeme.getLength());
+	public final void end() {
-            //设置词元位移
+	    // set final offset
-            offsetAtt.setOffset(nextLexeme.getBeginPosition(), nextLexeme.getEndPosition());
+		int finalOffset = correctOffset(this.endPosition);
-            //记录分词的最后位置
+		offsetAtt.setOffset(finalOffset, finalOffset);
-            endPosition = nextLexeme.getEndPosition();
+	}
            //记录词元分类
            typeAtt.setType(nextLexeme.getLexemeTypeString());
            //返会true告知还有下个词元
            return true;
        }
        //返会false告知词元输出完毕
        return false;
    }
    /*
     * (non-Javadoc)
     * @see org.apache.lucene.analysis.Tokenizer#reset(java.io.Reader)
     */
    @Override
    public void reset() throws IOException {
        super.reset();
        _IKImplement.reset(input);
    }
    @Override
    public final void end() {
        // set final offset
        int finalOffset = correctOffset(this.endPosition);
        offsetAtt.setOffset(finalOffset, finalOffset);
    }
 }
--- a/src/main/java/org/wltea/analyzer/query/IKQueryExpressionParser.java
+++ b/src/main/java/org/wltea/analyzer/query/IKQueryExpressionParser.java
--- a/src/main/java/org/wltea/analyzer/query/SWMCQueryBuilder.java
+++ b/src/main/java/org/wltea/analyzer/query/SWMCQueryBuilder.java
@ -1,153 +1,153 @@
-///**
+/**
-// * IK 中文分词  版本 5.0
+ * IK 中文分词  版本 5.0
-// * IK Analyzer release 5.0
+ * IK Analyzer release 5.0
-// *
+ * 
-// * Licensed to the Apache Software Foundation (ASF) under one or more
+ * Licensed to the Apache Software Foundation (ASF) under one or more
-// * contributor license agreements.  See the NOTICE file distributed with
+ * contributor license agreements.  See the NOTICE file distributed with
-// * this work for additional information regarding copyright ownership.
+ * this work for additional information regarding copyright ownership.
-// * The ASF licenses this file to You under the Apache License, Version 2.0
+ * The ASF licenses this file to You under the Apache License, Version 2.0
-// * (the "License"); you may not use this file except in compliance with
+ * (the "License"); you may not use this file except in compliance with
-// * the License.  You may obtain a copy of the License at
+ * the License.  You may obtain a copy of the License at
-// *
+ *
-// *     http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
-// *
+ *
-// * Unless required by applicable law or agreed to in writing, software
+ * Unless required by applicable law or agreed to in writing, software
-// * distributed under the License is distributed on an "AS IS" BASIS,
+ * distributed under the License is distributed on an "AS IS" BASIS,
-// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// * See the License for the specific language governing permissions and
+ * See the License for the specific language governing permissions and
-// * limitations under the License.
+ * limitations under the License.
-// *
+ *
-// * 源代码由林良益(linliangyi2005@gmail.com)提供
+ * 源代码由林良益(linliangyi2005@gmail.com)提供
-// * 版权声明 2012，乌龙茶工作室
+ * 版权声明 2012，乌龙茶工作室
-// * provided by Linliangyi and copyright 2012 by Oolong studio
+ * provided by Linliangyi and copyright 2012 by Oolong studio
-// *
+ * 
-// */
+ */
-//package org.wltea.analyzer.query;
+package org.wltea.analyzer.query;
-//
+
-//import java.io.IOException;
+import java.io.IOException;
-//import java.io.StringReader;
+import java.io.StringReader;
-//import java.util.ArrayList;
+import java.util.ArrayList;
-//import java.util.List;
+import java.util.List;
-//
+
-//import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
-//import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.ParseException;
-//import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.queryparser.classic.QueryParser;
-//import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Query;
-//import org.apache.lucene.util.Version;
+import org.apache.lucene.util.Version;
-//import org.wltea.analyzer.core.IKSegmenter;
+import org.wltea.analyzer.core.IKSegmenter;
-//import org.wltea.analyzer.core.Lexeme;
+import org.wltea.analyzer.core.Lexeme;
-//
+
-///**
+/**
-// * Single Word Multi Char Query Builder
+ * Single Word Multi Char Query Builder
-// * IK分词算法专用
+ * IK分词算法专用
-// * @author linliangyi
+ * @author linliangyi
-// *
+ *
-// */
+ */
-//public class SWMCQueryBuilder {
+public class SWMCQueryBuilder {
-//
+
-//	/**
+	/**
-//	 * 生成SWMCQuery
+	 * 生成SWMCQuery
-//	 * @param fieldName
+	 * @param fieldName
-//	 * @param keywords
+	 * @param keywords
-//	 * @param quickMode
+	 * @param quickMode
-//	 * @return Lucene Query
+	 * @return Lucene Query
-//	 */
+	 */
-//	public static Query create(String fieldName ,String keywords , boolean quickMode){
+	public static Query create(String fieldName ,String keywords , boolean quickMode){
-//		if(fieldName == null || keywords == null){
+		if(fieldName == null || keywords == null){
-//			throw new IllegalArgumentException("参数 fieldName 、 keywords 不能为null.");
+			throw new IllegalArgumentException("参数 fieldName 、 keywords 不能为null.");
-//		}
+		}
-//		//1.对keywords进行分词处理
+		//1.对keywords进行分词处理
-//		List<Lexeme> lexemes = doAnalyze(keywords);
+		List<Lexeme> lexemes = doAnalyze(keywords);
-//		//2.根据分词结果，生成SWMCQuery
+		//2.根据分词结果，生成SWMCQuery
-//		Query _SWMCQuery = getSWMCQuery(fieldName , lexemes , quickMode);
+		Query _SWMCQuery = getSWMCQuery(fieldName , lexemes , quickMode);
-//		return _SWMCQuery;
+		return _SWMCQuery;
-//	}
+	}
-//
+	
-//	/**
+	/**
-//	 * 分词切分，并返回结链表
+	 * 分词切分，并返回结链表
-//	 * @param keywords
+	 * @param keywords
-//	 * @return
+	 * @return
-//	 */
+	 */
-//	private static List<Lexeme> doAnalyze(String keywords){
+	private static List<Lexeme> doAnalyze(String keywords){
-//		List<Lexeme> lexemes = new ArrayList<Lexeme>();
+		List<Lexeme> lexemes = new ArrayList<Lexeme>();
-//		IKSegmenter ikSeg = new IKSegmenter(new StringReader(keywords) , true);
+		IKSegmenter ikSeg = new IKSegmenter(new StringReader(keywords) , true);
-//		try{
+		try{
-//			Lexeme l = null;
+			Lexeme l = null;
-//			while( (l = ikSeg.next()) != null){
+			while( (l = ikSeg.next()) != null){
-//				lexemes.add(l);
+				lexemes.add(l);
-//			}
+			}
-//		}catch(IOException e){
+		}catch(IOException e){
-//			e.printStackTrace();
+			e.printStackTrace();
-//		}
+		}
-//		return lexemes;
+		return lexemes;
-//	}
+	}
-//
+	
-//
+	
-//	/**
+	/**
-//	 * 根据分词结果生成SWMC搜索
+	 * 根据分词结果生成SWMC搜索
-//	 * @param fieldName
+	 * @param fieldName
 //	 * @param pathOption
-//	 * @param quickMode
+	 * @param quickMode
-//	 * @return
+	 * @return
-//	 */
+	 */
-//	private static Query getSWMCQuery(String fieldName , List<Lexeme> lexemes , boolean quickMode){
+	private static Query getSWMCQuery(String fieldName , List<Lexeme> lexemes , boolean quickMode){
-//		//构造SWMC的查询表达式
+		//构造SWMC的查询表达式
-//		StringBuffer keywordBuffer = new StringBuffer();
+		StringBuffer keywordBuffer = new StringBuffer();
-//		//精简的SWMC的查询表达式
+		//精简的SWMC的查询表达式
-//		StringBuffer keywordBuffer_Short = new StringBuffer();
+		StringBuffer keywordBuffer_Short = new StringBuffer();
-//		//记录最后词元长度
+		//记录最后词元长度
-//		int lastLexemeLength = 0;
+		int lastLexemeLength = 0;
-//		//记录最后词元结束位置
+		//记录最后词元结束位置
-//		int lastLexemeEnd = -1;
+		int lastLexemeEnd = -1;
-//
+		
-//		int shortCount = 0;
+		int shortCount = 0;
-//		int totalCount = 0;
+		int totalCount = 0;
-//		for(Lexeme l : lexemes){
+		for(Lexeme l : lexemes){
-//			totalCount += l.getLength();
+			totalCount += l.getLength();
-//			//精简表达式
+			//精简表达式
-//			if(l.getLength() > 1){
+			if(l.getLength() > 1){
-//				keywordBuffer_Short.append(' ').append(l.getLexemeText());
+				keywordBuffer_Short.append(' ').append(l.getLexemeText());
-//				shortCount += l.getLength();
+				shortCount += l.getLength();
-//			}
+			}
-//
+			
-//			if(lastLexemeLength == 0){
+			if(lastLexemeLength == 0){
-//				keywordBuffer.append(l.getLexemeText());
+				keywordBuffer.append(l.getLexemeText());				
-//			}else if(lastLexemeLength == 1 && l.getLength() == 1
+			}else if(lastLexemeLength == 1 && l.getLength() == 1
-//					&& lastLexemeEnd == l.getBeginPosition()){//单字位置相邻，长度为一，合并)
+					&& lastLexemeEnd == l.getBeginPosition()){//单字位置相邻，长度为一，合并)
-//				keywordBuffer.append(l.getLexemeText());
+				keywordBuffer.append(l.getLexemeText());
-//			}else{
+			}else{
-//				keywordBuffer.append(' ').append(l.getLexemeText());
+				keywordBuffer.append(' ').append(l.getLexemeText());
-//
+				
-//			}
+			}
-//			lastLexemeLength = l.getLength();
+			lastLexemeLength = l.getLength();
-//			lastLexemeEnd = l.getEndPosition();
+			lastLexemeEnd = l.getEndPosition();
-//		}
+		}
-//
+
-//		//借助lucene queryparser 生成SWMC Query
+		//借助lucene queryparser 生成SWMC Query
-//		QueryParser qp = new QueryParser(Version.LUCENE_40, fieldName, new StandardAnalyzer(Version.LUCENE_40));
+		QueryParser qp = new QueryParser(Version.LUCENE_40, fieldName, new StandardAnalyzer(Version.LUCENE_40));
-//		qp.setDefaultOperator(QueryParser.AND_OPERATOR);
+		qp.setDefaultOperator(QueryParser.AND_OPERATOR);
-//		qp.setAutoGeneratePhraseQueries(true);
+		qp.setAutoGeneratePhraseQueries(true);
-//
+		
-//		if(quickMode && (shortCount * 1.0f / totalCount) > 0.5f){
+		if(quickMode && (shortCount * 1.0f / totalCount) > 0.5f){
-//			try {
+			try {
-//				//System.out.println(keywordBuffer.toString());
+				//System.out.println(keywordBuffer.toString());
-//				Query q = qp.parse(keywordBuffer_Short.toString());
+				Query q = qp.parse(keywordBuffer_Short.toString());
-//				return q;
+				return q;
-//			} catch (ParseException e) {
+			} catch (ParseException e) {
-//				e.printStackTrace();
+				e.printStackTrace();
-//			}
+			}
-//
+			
-//		}else{
+		}else{
-//			if(keywordBuffer.length() > 0){
+			if(keywordBuffer.length() > 0){
-//				try {
+				try {
-//					//System.out.println(keywordBuffer.toString());
+					//System.out.println(keywordBuffer.toString());
-//					Query q = qp.parse(keywordBuffer.toString());
+					Query q = qp.parse(keywordBuffer.toString());
-//					return q;
+					return q;
-//				} catch (ParseException e) {
+				} catch (ParseException e) {
-//					e.printStackTrace();
+					e.printStackTrace();
-//				}
+				}
-//			}
+			}
-//		}
+		}
-//		return null;
+		return null;
-//	}
+	}
-//}
+}
--- a/src/main/java/org/wltea/analyzer/sample/LuceneIndexAndSearchDemo.java
+++ b/src/main/java/org/wltea/analyzer/sample/LuceneIndexAndSearchDemo.java
@ -1,147 +1,147 @@
-///**
+/**
-// * IK 中文分词  版本 5.0
+ * IK 中文分词  版本 5.0
-// * IK Analyzer release 5.0
+ * IK Analyzer release 5.0
-// *
+ * 
-// * Licensed to the Apache Software Foundation (ASF) under one or more
+ * Licensed to the Apache Software Foundation (ASF) under one or more
-// * contributor license agreements.  See the NOTICE file distributed with
+ * contributor license agreements.  See the NOTICE file distributed with
-// * this work for additional information regarding copyright ownership.
+ * this work for additional information regarding copyright ownership.
-// * The ASF licenses this file to You under the Apache License, Version 2.0
+ * The ASF licenses this file to You under the Apache License, Version 2.0
-// * (the "License"); you may not use this file except in compliance with
+ * (the "License"); you may not use this file except in compliance with
-// * the License.  You may obtain a copy of the License at
+ * the License.  You may obtain a copy of the License at
-// *
+ *
-// *     http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
-// *
+ *
-// * Unless required by applicable law or agreed to in writing, software
+ * Unless required by applicable law or agreed to in writing, software
-// * distributed under the License is distributed on an "AS IS" BASIS,
+ * distributed under the License is distributed on an "AS IS" BASIS,
-// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// * See the License for the specific language governing permissions and
+ * See the License for the specific language governing permissions and
-// * limitations under the License.
+ * limitations under the License.
-// *
+ *
-// * 源代码由林良益(linliangyi2005@gmail.com)提供
+ * 源代码由林良益(linliangyi2005@gmail.com)提供
-// * 版权声明 2012，乌龙茶工作室
+ * 版权声明 2012，乌龙茶工作室
-// * provided by Linliangyi and copyright 2012 by Oolong studio
+ * provided by Linliangyi and copyright 2012 by Oolong studio
-// *
+ * 
-// *
+ * 
-// */
+ */
-//package org.wltea.analyzer.sample;
+package org.wltea.analyzer.sample;
-//
+
-//import java.io.IOException;
+import java.io.IOException;
-//
+
-//import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.Analyzer;
-//import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Document;
-//import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field;
-//import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.StringField;
-//import org.apache.lucene.document.TextField;
+import org.apache.lucene.document.TextField;
-//import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.CorruptIndexException;
-//import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.DirectoryReader;
-//import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader;
-//import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriter;
-//import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexWriterConfig;
-//import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-//import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.ParseException;
-//import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.queryparser.classic.QueryParser;
-//import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.IndexSearcher;
-//import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Query;
-//import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.ScoreDoc;
-//import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TopDocs;
-//import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.Directory;
-//import org.apache.lucene.store.LockObtainFailedException;
+import org.apache.lucene.store.LockObtainFailedException;
-//import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.store.RAMDirectory;
-//import org.apache.lucene.util.Version;
+import org.apache.lucene.util.Version;
-//import org.wltea.analyzer.lucene.IKAnalyzer;
+import org.wltea.analyzer.lucene.IKAnalyzer;
-//
+
-//
+
-//
+
-//
+
-///**
+/**
-// * 使用IKAnalyzer进行Lucene索引和查询的演示
+ * 使用IKAnalyzer进行Lucene索引和查询的演示
-// * 2012-3-2
+ * 2012-3-2
-// *
+ * 
-// * 以下是结合Lucene4.0 API的写法
+ * 以下是结合Lucene4.0 API的写法
-// *
+ *
-// */
+ */
-//public class LuceneIndexAndSearchDemo {
+public class LuceneIndexAndSearchDemo {
-//
+	
-//
+	
-//	/**
+	/**
-//	 * 模拟：
+	 * 模拟：
-//	 * 创建一个单条记录的索引，并对其进行搜索
+	 * 创建一个单条记录的索引，并对其进行搜索
-//	 * @param args
+	 * @param args
-//	 */
+	 */
-//	public static void main(String[] args){
+	public static void main(String[] args){
-//		//Lucene Document的域名
+		//Lucene Document的域名
-//		String fieldName = "text";
+		String fieldName = "text";
-//		 //检索内容
+		 //检索内容
-//		String text = "IK Analyzer是一个结合词典分词和文法分词的中文分词开源工具包。它使用了全新的正向迭代最细粒度切分算法。";
+		String text = "IK Analyzer是一个结合词典分词和文法分词的中文分词开源工具包。它使用了全新的正向迭代最细粒度切分算法。";
-//
+		
-//		//实例化IKAnalyzer分词器
+		//实例化IKAnalyzer分词器
-//		Analyzer analyzer = new IKAnalyzer(true);
+		Analyzer analyzer = new IKAnalyzer(true);
-//
+		
-//		Directory directory = null;
+		Directory directory = null;
-//		IndexWriter iwriter = null;
+		IndexWriter iwriter = null;
-//		IndexReader ireader = null;
+		IndexReader ireader = null;
-//		IndexSearcher isearcher = null;
+		IndexSearcher isearcher = null;
-//		try {
+		try {
-//			//建立内存索引对象
+			//建立内存索引对象
-//			directory = new RAMDirectory();
+			directory = new RAMDirectory();	 
-//
+			
-//			//配置IndexWriterConfig
+			//配置IndexWriterConfig
-//			IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_40 , analyzer);
+			IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_40 , analyzer);
-//			iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
+			iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
-//			iwriter = new IndexWriter(directory , iwConfig);
+			iwriter = new IndexWriter(directory , iwConfig);
-//			//写入索引
+			//写入索引
-//			Document doc = new Document();
+			Document doc = new Document();
-//			doc.add(new StringField("ID", "10000", Field.Store.YES));
+			doc.add(new StringField("ID", "10000", Field.Store.YES));
-//			doc.add(new TextField(fieldName, text, Field.Store.YES));
+			doc.add(new TextField(fieldName, text, Field.Store.YES));
-//			iwriter.addDocument(doc);
+			iwriter.addDocument(doc);
-//			iwriter.close();
+			iwriter.close();
-//
+			
-//
+			
-//			//搜索过程**********************************
+			//搜索过程**********************************
-//		    //实例化搜索器
+		    //实例化搜索器   
-//			ireader = DirectoryReader.open(directory);
+			ireader = DirectoryReader.open(directory);
-//			isearcher = new IndexSearcher(ireader);
+			isearcher = new IndexSearcher(ireader);			
-//
+			
-//			String keyword = "中文分词工具包";
+			String keyword = "中文分词工具包";			
-//			//使用QueryParser查询分析器构造Query对象
+			//使用QueryParser查询分析器构造Query对象
-//			QueryParser qp = new QueryParser(Version.LUCENE_40, fieldName,  analyzer);
+			QueryParser qp = new QueryParser(Version.LUCENE_40, fieldName,  analyzer);
-//			qp.setDefaultOperator(QueryParser.AND_OPERATOR);
+			qp.setDefaultOperator(QueryParser.AND_OPERATOR);
-//			Query query = qp.parse(keyword);
+			Query query = qp.parse(keyword);
-//			System.out.println("Query = " + query);
+			System.out.println("Query = " + query);
-//
+			
-//			//搜索相似度最高的5条记录
+			//搜索相似度最高的5条记录
-//			TopDocs topDocs = isearcher.search(query , 5);
+			TopDocs topDocs = isearcher.search(query , 5);
-//			System.out.println("命中：" + topDocs.totalHits);
+			System.out.println("命中：" + topDocs.totalHits);
-//			//输出结果
+			//输出结果
-//			ScoreDoc[] scoreDocs = topDocs.scoreDocs;
+			ScoreDoc[] scoreDocs = topDocs.scoreDocs;
-//			for (int i = 0; i < topDocs.totalHits; i++){
+			for (int i = 0; i < topDocs.totalHits; i++){
-//				Document targetDoc = isearcher.doc(scoreDocs[i].doc);
+				Document targetDoc = isearcher.doc(scoreDocs[i].doc);
-//				System.out.println("内容：" + targetDoc.toString());
+				System.out.println("内容：" + targetDoc.toString());
-//			}
+			}			
-//
+			
-//		} catch (CorruptIndexException e) {
+		} catch (CorruptIndexException e) {
-//			e.printStackTrace();
+			e.printStackTrace();
-//		} catch (LockObtainFailedException e) {
+		} catch (LockObtainFailedException e) {
-//			e.printStackTrace();
+			e.printStackTrace();
-//		} catch (IOException e) {
+		} catch (IOException e) {
-//			e.printStackTrace();
+			e.printStackTrace();
-//		} catch (ParseException e) {
+		} catch (ParseException e) {
-//			e.printStackTrace();
+			e.printStackTrace();
-//		} finally{
+		} finally{
-//			if(ireader != null){
+			if(ireader != null){
-//				try {
+				try {
-//					ireader.close();
+					ireader.close();
-//				} catch (IOException e) {
+				} catch (IOException e) {
-//					e.printStackTrace();
+					e.printStackTrace();
-//				}
+				}
-//			}
+			}
-//			if(directory != null){
+			if(directory != null){
-//				try {
+				try {
-//					directory.close();
+					directory.close();
-//				} catch (IOException e) {
+				} catch (IOException e) {
-//					e.printStackTrace();
+					e.printStackTrace();
-//				}
+				}
-//			}
+			}
-//		}
+		}
-//	}
+	}
-//}
+}