Merge pull request #10 from wyhw/ik_lucene4

elasticsearch ik 0.20.x => 0.90.x
2013-05-12 23:14:42 -07:00 · 2013-05-12 23:14:42 -07:00 · 43c8bc9f8c
commit 43c8bc9f8c
parent a2dc3c7842 5e14e3d629
16 changed files with 1520 additions and 1580 deletions
--- a/pom.xml
+++ b/pom.xml
@ -31,7 +31,7 @@
    </parent>

    <properties>
-        <elasticsearch.version>0.20.2</elasticsearch.version>
+        <elasticsearch.version>0.90.0</elasticsearch.version>
    </properties>

  <repositories>
--- a/src/main/java/org/elasticsearch/index/analysis/IkAnalyzer.java
+++ b/src/main/java/org/elasticsearch/index/analysis/IkAnalyzer.java
@ -2,19 +2,32 @@ package org.elasticsearch.index.analysis;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
 import org.wltea.analyzer.lucene.IKTokenizer;
+//import org.wltea.lucene.IKTokenizer;

 import java.io.Reader;


 public class IkAnalyzer extends Analyzer {
-   
-    @Override public TokenStream tokenStream(String fieldName, Reader reader) {            
-        return new IKTokenizer(reader,true);
-    }
+//    private boolean isMaxWordLength = false;
+//    @Override public TokenStream tokenStream(String fieldName, Reader reader) {
+//        return new IKTokenizer(reader,true);
+//    }
    
  
    public IkAnalyzer() {
        super(); 
    }
+
+    @Override
+    protected TokenStreamComponents createComponents(String s, Reader reader) {
+//        new TokenStreamComponents
+        Tokenizer tokenizer = new IKTokenizer(reader, true);
+        return new TokenStreamComponents(tokenizer, null);  //To change body of implemented methods use File | Settings | File Templates.
+    }
+
+//    public boolean isMaxWordLength() {
+//        return isMaxWordLength;
+//    }
 }
--- a/src/main/java/org/wltea/analyzer/core/AnalyzeContext.java
+++ b/src/main/java/org/wltea/analyzer/core/AnalyzeContext.java
@ -24,11 +24,16 @@
 */
 package org.wltea.analyzer.core;

-import org.wltea.analyzer.dic.Dictionary;
-
 import java.io.IOException;
 import java.io.Reader;
-import java.util.*;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.Map;
+import java.util.Set;
+
+import org.wltea.analyzer.cfg.Configuration;
+import org.wltea.analyzer.dic.Dictionary;

 /**
 * 
@ -68,12 +73,12 @@ class AnalyzeContext {
    private Map<Integer , LexemePath> pathMap;    
    //最终分词结果集
    private LinkedList<Lexeme> results;
-    
+    private boolean useSmart;
 	//分词器配置项
-	private boolean useSmart;
+//	private Configuration cfg;

    public AnalyzeContext(boolean useSmart){
-    	this.useSmart = useSmart;
+        this.useSmart = useSmart;
    	this.segmentBuff = new char[BUFF_SIZE];
    	this.charTypes = new int[BUFF_SIZE];
    	this.buffLocker = new HashSet<String>();
@ -313,7 +318,7 @@ class AnalyzeContext {
 		while(result != null){
    		//数量词合并
    		this.compound(result);
-    		if(Dictionary.isStopWord(this.segmentBuff ,  result.getBegin() , result.getLength())){
+    		if(Dictionary.getSingleton().isStopWord(this.segmentBuff ,  result.getBegin() , result.getLength())){
       			//是停止词继续取列表的下一个
    			result = this.results.pollFirst(); 				
    		}else{
@ -344,6 +349,7 @@ class AnalyzeContext {
 	 * 组合词元
 	 */
 	private void compound(Lexeme result){
+
 		if(!this.useSmart){
 			return ;
 		}
--- a/src/main/java/org/wltea/analyzer/core/CJKSegmenter.java
+++ b/src/main/java/org/wltea/analyzer/core/CJKSegmenter.java
@ -25,12 +25,12 @@
 */
 package org.wltea.analyzer.core;

-import org.wltea.analyzer.dic.Dictionary;
-import org.wltea.analyzer.dic.Hit;
-
 import java.util.LinkedList;
 import java.util.List;

+import org.wltea.analyzer.dic.Dictionary;
+import org.wltea.analyzer.dic.Hit;
+

 /**
 *  中文-日韩文子分词器
@ -58,7 +58,7 @@ class CJKSegmenter implements ISegmenter {
 				//处理词段队列
 				Hit[] tmpArray = this.tmpHits.toArray(new Hit[this.tmpHits.size()]);
 				for(Hit hit : tmpArray){
-					hit = Dictionary.matchInMainDictWithHit(context.getSegmentBuff(), context.getCursor() , hit);
+					hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
 					if(hit.isMatch()){
 						//输出当前的词
 						Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_CNWORD);
@ -77,7 +77,7 @@ class CJKSegmenter implements ISegmenter {
 			
 			//*********************************
 			//再对当前指针位置的字符进行单字匹配
-			Hit singleCharHit = Dictionary.matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
+			Hit singleCharHit = Dictionary.getSingleton().matchInMainDict(context.getSegmentBuff(), context.getCursor(), 1);
 			if(singleCharHit.isMatch()){//首字成词
 				//输出当前的词
 				Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_CNWORD);
--- a/src/main/java/org/wltea/analyzer/core/CN_QuantifierSegmenter.java
+++ b/src/main/java/org/wltea/analyzer/core/CN_QuantifierSegmenter.java
@ -24,14 +24,14 @@
 */
 package org.wltea.analyzer.core;

-import org.wltea.analyzer.dic.Dictionary;
-import org.wltea.analyzer.dic.Hit;
-
 import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Set;

+import org.wltea.analyzer.dic.Dictionary;
+import org.wltea.analyzer.dic.Hit;
+
 /**
 * 
 * 中文数量词子分词器
@ -155,7 +155,7 @@ class CN_QuantifierSegmenter implements ISegmenter{
 				//处理词段队列
 				Hit[] tmpArray = this.countHits.toArray(new Hit[this.countHits.size()]);
 				for(Hit hit : tmpArray){
-					hit = Dictionary.matchInMainDictWithHit(context.getSegmentBuff(), context.getCursor() , hit);
+					hit = Dictionary.getSingleton().matchWithHit(context.getSegmentBuff(), context.getCursor() , hit);
 					if(hit.isMatch()){
 						//输出当前的词
 						Lexeme newLexeme = new Lexeme(context.getBufferOffset() , hit.getBegin() , context.getCursor() - hit.getBegin() + 1 , Lexeme.TYPE_COUNT);
@ -174,7 +174,7 @@ class CN_QuantifierSegmenter implements ISegmenter{

 			//*********************************
 			//对当前指针位置的字符进行单字匹配
-			Hit singleCharHit = Dictionary.matchInQuantifierDict(context.getSegmentBuff(), context.getCursor(), 1);
+			Hit singleCharHit = Dictionary.getSingleton().matchInQuantifierDict(context.getSegmentBuff(), context.getCursor(), 1);
 			if(singleCharHit.isMatch()){//首字成量词词
 				//输出当前的词
 				Lexeme newLexeme = new Lexeme(context.getBufferOffset() , context.getCursor() , 1 , Lexeme.TYPE_COUNT);
--- a/src/main/java/org/wltea/analyzer/core/IKArbitrator.java
+++ b/src/main/java/org/wltea/analyzer/core/IKArbitrator.java
@ -38,7 +38,7 @@ class IKArbitrator {
 	
 	/**
 	 * 分词歧义处理
-	 * @param orgLexemes
+//	 * @param orgLexemes
 	 * @param useSmart
 	 */
 	void process(AnalyzeContext context , boolean useSmart){
@ -87,7 +87,6 @@ class IKArbitrator {
 	 * 歧义识别
 	 * @param lexemeCell 歧义路径链表头
 	 * @param fullTextLength 歧义路径文本长度
-	 * @param option 候选结果路径
 	 * @return
 	 */
 	private LexemePath judge(QuickSortSet.Cell lexemeCell , int fullTextLength){
@ -120,7 +119,7 @@ class IKArbitrator {
 	
 	/**
 	 * 向前遍历，添加词元，构造一个无歧义词元组合
-	 * @param LexemePath path
+//	 * @param LexemePath path
 	 * @return
 	 */
 	private Stack<QuickSortSet.Cell> forwardPath(QuickSortSet.Cell lexemeCell , LexemePath option){
@ -140,7 +139,7 @@ class IKArbitrator {
 	
 	/**
 	 * 回滚词元链，直到它能够接受指定的词元
-	 * @param lexeme 
+//	 * @param lexeme
 	 * @param l
 	 */
 	private void backPath(Lexeme l  , LexemePath option){
--- a/src/main/java/org/wltea/analyzer/core/IKSegmenter.java
+++ b/src/main/java/org/wltea/analyzer/core/IKSegmenter.java
@ -23,14 +23,15 @@
 */
 package org.wltea.analyzer.core;

-import org.elasticsearch.common.logging.ESLogger;
-import org.elasticsearch.common.logging.Loggers;
-
 import java.io.IOException;
 import java.io.Reader;
 import java.util.ArrayList;
 import java.util.List;

+import org.wltea.analyzer.cfg.Configuration;
+//import org.wltea.analyzer.cfg.DefaultConfig;
+import org.wltea.analyzer.dic.Dictionary;
+
 /**
 * IK分词器主类
 *
@ -39,16 +40,18 @@ public final class IKSegmenter {
 	
 	//字符窜reader
 	private Reader input;
+	//分词器配置项
+	private Configuration cfg;
 	//分词器上下文
 	private AnalyzeContext context;
 	//分词处理器列表
 	private List<ISegmenter> segmenters;
 	//分词歧义裁决器
 	private IKArbitrator arbitrator;
-	 private ESLogger logger=null;
-    private final boolean useSmart;
+    private  boolean useSmart = false;
 	
-    /**
+
+	/**
 	 * IK分词器构造函数
 	 * @param input 
 	 * @param useSmart 为true，使用智能分词策略
@ -57,16 +60,31 @@ public final class IKSegmenter {
 	 * 智能分词： 合并数词和量词，对分词结果进行歧义判断
 	 */
 	public IKSegmenter(Reader input , boolean useSmart){
-        logger = Loggers.getLogger("ik-analyzer");
 		this.input = input;
+//		this.cfg = DefaultConfig.getInstance();
        this.useSmart=useSmart;
-        this.init();
+		this.init();
+	}
+	
+	/**
+	 * IK分词器构造函数
+	 * @param input
+	 * @param cfg 使用自定义的Configuration构造分词器
+	 * 
+	 */
+	public IKSegmenter(Reader input , Configuration cfg){
+		this.input = input;
+		this.cfg = cfg;
+		this.init();
 	}
 	
 	/**
 	 * 初始化
 	 */
 	private void init(){
+		//初始化词典单例
+//		Dictionary.initial(this.cfg);
+//        Dictionary.getSingleton();
 		//初始化分词上下文
 		this.context = new AnalyzeContext(useSmart);
 		//加载子分词器
--- a/src/main/java/org/wltea/analyzer/core/LetterSegmenter.java
+++ b/src/main/java/org/wltea/analyzer/core/LetterSegmenter.java
@ -120,7 +120,7 @@ class LetterSegmenter implements ISegmenter {
 	/**
 	 * 处理数字字母混合输出
 	 * 如：windos2000 | linliangyi2005@gmail.com
-	 * @param input
+//	 * @param input
 	 * @param context
 	 * @return
 	 */
--- a/src/main/java/org/wltea/analyzer/dic/DictSegment.java
+++ b/src/main/java/org/wltea/analyzer/dic/DictSegment.java
@ -327,12 +327,4 @@ class DictSegment implements Comparable<DictSegment>{
 		return this.nodeChar.compareTo(o.nodeChar);
 	}
 	
-    public int getDicNum(){
-        if(charMap!=null)
-        {
-            return charMap.size();
-        }
-        return 0;
-    }
-
 }
--- a/src/main/java/org/wltea/analyzer/dic/Dictionary.java
+++ b/src/main/java/org/wltea/analyzer/dic/Dictionary.java
@ -1,74 +1,233 @@
 /**
+ * IK 中文分词  版本 5.0
+ * IK Analyzer release 5.0
+ * 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * 源代码由林良益(linliangyi2005@gmail.com)提供
+ * 版权声明 2012，乌龙茶工作室
+ * provided by Linliangyi and copyright 2012 by Oolong studio
+ * 
 * 
 */
 package org.wltea.analyzer.dic;

+import java.io.*;
+import java.util.Collection;
+import java.util.List;
+
 import org.elasticsearch.common.logging.ESLogger;
 import org.elasticsearch.common.logging.Loggers;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.wltea.analyzer.cfg.Configuration;

-import java.io.*;
-import java.util.Collection;
-import java.util.List;
-
+/**
+ * 词典管理类,单子模式
+ */
 public class Dictionary {

-	public static final String PATH_DIC_MAIN = "ik/main.dic";
-	public static final String PATH_DIC_SURNAME = "ik/surname.dic";
-	public static final String PATH_DIC_QUANTIFIER = "ik/quantifier.dic";
-	public static final String PATH_DIC_SUFFIX = "ik/suffix.dic";
-	public static final String PATH_DIC_PREP = "ik/preposition.dic";
-	public static final String PATH_DIC_STOP = "ik/stopword.dic";
-	private static final Dictionary singleton;

-	static{
-		singleton = new Dictionary();
-	}
+	/*
+	 * 词典单子实例
+	 */
+	private static Dictionary singleton;
 	
+	/*
+	 * 主词典对象
+	 */
 	private DictSegment _MainDict;
 	
-	private DictSegment _SurnameDict;
-
+	/*
+	 * 停止词词典 
+	 */
+	private DictSegment _StopWordDict;
+	/*
+	 * 量词词典
+	 */
 	private DictSegment _QuantifierDict;
 	
-	private DictSegment _SuffixDict;
-
-	private DictSegment _PrepDict;
-
-	private DictSegment _StopWords;
-
-    private Environment environment;
-    private Configuration configuration;
+	/**
+	 * 配置对象
+	 */
+	private Configuration configuration;
    private ESLogger logger=null;
    private static boolean dictInited=false;
-	private Dictionary(){
+    private Environment environment;
+    public static final String PATH_DIC_MAIN = "ik/main.dic";
+    public static final String PATH_DIC_SURNAME = "ik/surname.dic";
+    public static final String PATH_DIC_QUANTIFIER = "ik/quantifier.dic";
+    public static final String PATH_DIC_SUFFIX = "ik/suffix.dic";
+    public static final String PATH_DIC_PREP = "ik/preposition.dic";
+    public static final String PATH_DIC_STOP = "ik/stopword.dic";
+    private Dictionary(){
        logger = Loggers.getLogger("ik-analyzer");
-	}
-
-   public Configuration getConfig(){
-       return  configuration;
-   }
+    }
+    static{
+        singleton = new Dictionary();
+    }
+//    public Configuration getConfig(){
+//        return  configuration;
+//    }
+//	private Dictionary(Configuration cfg){
+//		this.cfg = cfg;
+//		this.loadMainDict();
+//		this.loadStopWordDict();
+//		this.loadQuantifierDict();
+//	}

    public void Init(Settings indexSettings){

-            if(!dictInited){
-                environment =new Environment(indexSettings);
-                configuration=new Configuration(indexSettings);
-                loadMainDict();
-                loadSurnameDict();
-                loadQuantifierDict();
-                loadSuffixDict();
-                loadPrepDict();
-                loadStopWordDict();
-                dictInited=true;
-            }
+        if(!dictInited){
+            environment =new Environment(indexSettings);
+            configuration=new Configuration(indexSettings);
+            loadMainDict();
+//            loadSurnameDict();
+            loadQuantifierDict();
+//            loadSuffixDict();
+//            loadPrepDict();
+            loadStopWordDict();
+            dictInited=true;
+        }
    }

-	private void loadMainDict(){
-		_MainDict = new DictSegment((char)0);
+	/**
+	 * 词典初始化
+	 * 由于IK Analyzer的词典采用Dictionary类的静态方法进行词典初始化
+	 * 只有当Dictionary类被实际调用时，才会开始载入词典，
+	 * 这将延长首次分词操作的时间
+	 * 该方法提供了一个在应用加载阶段就初始化字典的手段
+	 * @return Dictionary
+	 */
+//	public static Dictionary initial(Configuration cfg){
+//		if(singleton == null){
+//			synchronized(Dictionary.class){
+//				if(singleton == null){
+//					singleton = new Dictionary();
+//					return singleton;
+//				}
+//			}
+//		}
+//		return singleton;
+//	}
 	
+	/**
+	 * 获取词典单子实例
+	 * @return Dictionary 单例对象
+	 */
+	public static Dictionary getSingleton(){
+		if(singleton == null){
+			throw new IllegalStateException("词典尚未初始化，请先调用initial方法");
+		}
+		return singleton;
+	}
+	
+	/**
+	 * 批量加载新词条
+	 * @param words Collection<String>词条列表
+	 */
+	public void addWords(Collection<String> words){
+		if(words != null){
+			for(String word : words){
+				if (word != null) {
+					//批量加载词条到主内存词典中
+					singleton._MainDict.fillSegment(word.trim().toLowerCase().toCharArray());
+				}
+			}
+		}
+	}
+	
+	/**
+	 * 批量移除（屏蔽）词条
+	 * @param words
+	 */
+	public void disableWords(Collection<String> words){
+		if(words != null){
+			for(String word : words){
+				if (word != null) {
+					//批量屏蔽词条
+					singleton._MainDict.disableSegment(word.trim().toLowerCase().toCharArray());
+				}
+			}
+		}
+	}
+	
+	/**
+	 * 检索匹配主词典
+	 * @param charArray
+	 * @return Hit 匹配结果描述
+	 */
+	public Hit matchInMainDict(char[] charArray){
+		return singleton._MainDict.match(charArray);
+	}
+	
+	/**
+	 * 检索匹配主词典
+	 * @param charArray
+	 * @param begin
+	 * @param length
+	 * @return Hit 匹配结果描述
+	 */
+	public Hit matchInMainDict(char[] charArray , int begin, int length){
+		return singleton._MainDict.match(charArray, begin, length);
+	}
+	
+	/**
+	 * 检索匹配量词词典
+	 * @param charArray
+	 * @param begin
+	 * @param length
+	 * @return Hit 匹配结果描述
+	 */
+	public Hit matchInQuantifierDict(char[] charArray , int begin, int length){
+		return singleton._QuantifierDict.match(charArray, begin, length);
+	}
+	
+	
+	/**
+	 * 从已匹配的Hit中直接取出DictSegment，继续向下匹配
+	 * @param charArray
+	 * @param currentIndex
+	 * @param matchedHit
+	 * @return Hit
+	 */
+	public Hit matchWithHit(char[] charArray , int currentIndex , Hit matchedHit){
+		DictSegment ds = matchedHit.getMatchedDictSegment();
+		return ds.match(charArray, currentIndex, 1 , matchedHit);
+	}
+	
+	
+	/**
+	 * 判断是否是停止词
+	 * @param charArray
+	 * @param begin
+	 * @param length
+	 * @return boolean
+	 */
+	public boolean isStopWord(char[] charArray , int begin, int length){			
+		return singleton._StopWordDict.match(charArray, begin, length).isMatch();
+	}	
+	
+	/**
+	 * 加载主词典及扩展词典
+	 */
+	private void loadMainDict(){
+		//建立一个主词典实例
+		_MainDict = new DictSegment((char)0);
+		//读取主词典文件
        File file= new File(environment.configFile(), Dictionary.PATH_DIC_MAIN);

        InputStream is = null;
@ -77,20 +236,17 @@ public class Dictionary {
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
-        if(is == null){
-        	throw new RuntimeException("Main Dictionary not found!!!");
-        }
        
 		try {
 			BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
-			String theWord;
+			String theWord = null;
 			do {
 				theWord = br.readLine();
 				if (theWord != null && !"".equals(theWord.trim())) {
-					_MainDict.fillSegment(theWord.trim().toCharArray());
+					_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
 				}
 			} while (theWord != null);
-         logger.info("[Dict Loading] {},MainDict Size:{}",file.toString(),_MainDict.getDicNum());
+			
 		} catch (IOException ioe) {
 			System.err.println("Main Dictionary loading exception.");
 			ioe.printStackTrace();
@ -105,37 +261,38 @@ public class Dictionary {
 				e.printStackTrace();
 			}
 		}
+		//加载扩展词典
+		this.loadExtDict();
+	}	
 	
-
+	/**
+	 * 加载用户配置的扩展词典到主词库表
+	 */
+	private void loadExtDict(){
+		//加载扩展词典配置
 		List<String> extDictFiles  = configuration.getExtDictionarys();
 		if(extDictFiles != null){
+			InputStream is = null;
 			for(String extDictName : extDictFiles){
-
-                File tempFile=new File(environment.configFile(),extDictName);
-
-                try {
-                    is = new FileInputStream(tempFile);
-                } catch (FileNotFoundException e) {
-                    e.printStackTrace();
-                    logger.error("[Dict Loading]",e);
-                }
-
-                if(is == null){
+				//读取扩展词典文件
+				System.out.println("加载扩展词典：" + extDictName);
+				is = this.getClass().getClassLoader().getResourceAsStream(extDictName);
+				//如果找不到扩展的字典，则忽略
+				if(is == null){
 					continue;
 				}
 				try {
-
 					BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
-					String theWord;
+					String theWord = null;
 					do {
 						theWord = br.readLine();
 						if (theWord != null && !"".equals(theWord.trim())) {
-
-
+							//加载扩展词典数据到主内存词典中
+							//System.out.println(theWord);
 							_MainDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
 						}
 					} while (theWord != null);
-                 logger.info("[Dict Loading] {},MainDict Size:{}",tempFile.toString(),_MainDict.getDicNum());
+					
 				} catch (IOException ioe) {
 					System.err.println("Extension Dictionary loading exception.");
 					ioe.printStackTrace();
@ -154,70 +311,78 @@ public class Dictionary {
 		}		
 	}
 	
-
-	private void loadSurnameDict(){
-
-		_SurnameDict = new DictSegment((char)0);
-        File file=new File(environment.configFile(),Dictionary.PATH_DIC_SURNAME);
-        InputStream is = null;
-        try {
-            is = new FileInputStream(file);
-        } catch (FileNotFoundException e) {
-            e.printStackTrace();
-        }
-        if(is == null){
-        	throw new RuntimeException("Surname Dictionary not found!!!");
-        }
-		try {
-			BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
-			String theWord;
-			do {
-				theWord = br.readLine();
-				if (theWord != null && !"".equals(theWord.trim())) {
-					_SurnameDict.fillSegment(theWord.trim().toCharArray());
+	/**
+	 * 加载用户扩展的停止词词典
+	 */
+	private void loadStopWordDict(){
+		//建立一个主词典实例
+		_StopWordDict = new DictSegment((char)0);
+		//加载扩展停止词典
+		List<String> extStopWordDictFiles  = configuration.getExtStopWordDictionarys();
+		if(extStopWordDictFiles != null){
+			InputStream is = null;
+			for(String extStopWordDictName : extStopWordDictFiles){
+				System.out.println("加载扩展停止词典：" + extStopWordDictName);
+				//读取扩展词典文件
+				is = this.getClass().getClassLoader().getResourceAsStream(extStopWordDictName);
+				//如果找不到扩展的字典，则忽略
+				if(is == null){
+					continue;
 				}
-			} while (theWord != null);
-         logger.info("[Dict Loading] {},SurnameDict Size:{}",file.toString(),_SurnameDict.getDicNum());
-		} catch (IOException ioe) {
-			System.err.println("Surname Dictionary loading exception.");
-			ioe.printStackTrace();
+				try {
+					BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
+					String theWord = null;
+					do {
+						theWord = br.readLine();
+						if (theWord != null && !"".equals(theWord.trim())) {
+							//System.out.println(theWord);
+							//加载扩展停止词典数据到内存中
+							_StopWordDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
+						}
+					} while (theWord != null);
 					
-		}finally{
-			try {
-				if(is != null){
-                    is.close();
-                    is = null;
+				} catch (IOException ioe) {
+					System.err.println("Extension Stop word Dictionary loading exception.");
+					ioe.printStackTrace();
+					
+				}finally{
+					try {
+						if(is != null){
+		                    is.close();
+		                    is = null;
+						}
+					} catch (IOException e) {
+						e.printStackTrace();
+					}
 				}
-			} catch (IOException e) {
-				e.printStackTrace();
 			}
 		}		
 	}
 	
-
+	/**
+	 * 加载量词词典
+	 */
 	private void loadQuantifierDict(){
-
+		//建立一个量词典实例
 		_QuantifierDict = new DictSegment((char)0);
+		//读取量词词典文件
        File file=new File(environment.configFile(),Dictionary.PATH_DIC_QUANTIFIER);
        InputStream is = null;
        try {
            is = new FileInputStream(file);
        } catch (FileNotFoundException e) {
            e.printStackTrace();
-        }
-        if(is == null){
-        	throw new RuntimeException("Quantifier Dictionary not found!!!");
        }
 		try {
 			BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
-			String theWord;
+			String theWord = null;
 			do {
 				theWord = br.readLine();
 				if (theWord != null && !"".equals(theWord.trim())) {
-					_QuantifierDict.fillSegment(theWord.trim().toCharArray());
+					_QuantifierDict.fillSegment(theWord.trim().toLowerCase().toCharArray());
 				}
 			} while (theWord != null);
-        logger.info("[Dict Loading] {},QuantifierDict Size:{}",file.toString(),_QuantifierDict.getDicNum());
+			
 		} catch (IOException ioe) {
 			System.err.println("Quantifier Dictionary loading exception.");
 			ioe.printStackTrace();
@ -235,304 +400,8 @@ public class Dictionary {
 	}


-	private void loadSuffixDict(){
+    public static Dictionary getInstance(){
+        return Dictionary.singleton;
+    }
 	
-		_SuffixDict = new DictSegment((char)0);
-        File file=new File(environment.configFile(),Dictionary.PATH_DIC_SUFFIX);
-        InputStream is = null;
-        try {
-            is = new FileInputStream(file);
-        } catch (FileNotFoundException e) {
-            e.printStackTrace();
-        }
-        if(is == null){
-        	throw new RuntimeException("Suffix Dictionary not found!!!");
-        }
-		try {
-
-			BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
-			String theWord;
-			do {
-				theWord = br.readLine();
-				if (theWord != null && !"".equals(theWord.trim())) {
-					_SuffixDict.fillSegment(theWord.trim().toCharArray());
-				}
-			} while (theWord != null);
-            logger.info("[Dict Loading] {},SuffixDict Size:{}",file.toString(),_SuffixDict.getDicNum());
-		} catch (IOException ioe) {
-			System.err.println("Suffix Dictionary loading exception.");
-			ioe.printStackTrace();
-
-		}finally{
-			try {
-				if(is != null){
-                    is.close();
-                    is = null;
-				}
-			} catch (IOException e) {
-				e.printStackTrace();
-			}
-		}
-	}
-
-
-	private void loadPrepDict(){
-
-		_PrepDict = new DictSegment((char)0);
-        File file=new File(environment.configFile(),Dictionary.PATH_DIC_PREP);
-        InputStream is = null;
-        try {
-            is = new FileInputStream(file);
-        } catch (FileNotFoundException e) {
-            e.printStackTrace();
-        }
-        if(is == null){
-        	throw new RuntimeException("Preposition Dictionary not found!!!");
-        }
-		try {
-
-            BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
-			String theWord;
-			do {
-				theWord = br.readLine();
-				if (theWord != null && !"".equals(theWord.trim())) {
-
-					_PrepDict.fillSegment(theWord.trim().toCharArray());
-				}
-			} while (theWord != null);
-            logger.info("[Dict Loading] {},PrepDict Size:{}",file.toString(),_PrepDict.getDicNum());
-		} catch (IOException ioe) {
-			System.err.println("Preposition Dictionary loading exception.");
-			ioe.printStackTrace();
-
-		}finally{
-			try {
-				if(is != null){
-                    is.close();
-                    is = null;
-				}
-			} catch (IOException e) {
-				e.printStackTrace();
-			}
-		}
-	}
-
-
-	private void loadStopWordDict(){
-
-		_StopWords = new DictSegment((char)0);
-        File file=new File(environment.configFile(),Dictionary.PATH_DIC_STOP);
-        InputStream is = null;
-        try {
-            is = new FileInputStream(file);
-        } catch (FileNotFoundException e) {
-            e.printStackTrace();
-        }
-        if(is == null){
-        	throw new RuntimeException("Stopword Dictionary not found!!!");
-        }
-		try {
-
-            BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
-			String theWord;
-			do {
-				theWord = br.readLine();
-				if (theWord != null && !"".equals(theWord.trim())) {
-					_StopWords.fillSegment(theWord.trim().toCharArray());
-				}
-			} while (theWord != null);
-            logger.info("[Dict Loading] {},Stopwords Size:{}",file.toString(),_StopWords.getDicNum());
-		} catch (IOException ioe) {
-			System.err.println("Stopword Dictionary loading exception.");
-			ioe.printStackTrace();
-
-		}finally{
-			try {
-				if(is != null){
-                    is.close();
-                    is = null;
-				}
-			} catch (IOException e) {
-				e.printStackTrace();
-			}
-		}
-
-
-		List<String> extStopWordDictFiles  = configuration.getExtStopWordDictionarys();
-		if(extStopWordDictFiles != null){
-			for(String extStopWordDictName : extStopWordDictFiles){
-                File tempFile=new File(environment.configFile(),extStopWordDictName);
-                try {
-                    is = new FileInputStream(tempFile);
-                } catch (FileNotFoundException e) {
-                    e.printStackTrace();
-                }
-
-                if(is == null){
-					continue;
-				}
-				try {
-
-                    BufferedReader br = new BufferedReader(new InputStreamReader(is , "UTF-8"), 512);
-					String theWord;
-					do {
-						theWord = br.readLine();
-						if (theWord != null && !"".equals(theWord.trim())) {
-
-
-							_StopWords.fillSegment(theWord.trim().toCharArray());
-						}
-					} while (theWord != null);
-                    logger.info("[Dict Loading] {},Stopwords Size:{}",tempFile.toString(),_StopWords.getDicNum());
-				} catch (IOException ioe) {
-					System.err.println("Extension Stop word Dictionary loading exception.");
-					ioe.printStackTrace();
-
-				}finally{
-					try {
-						if(is != null){
-		                    is.close();
-		                    is = null;
-						}
-					} catch (IOException e) {
-						e.printStackTrace();
-					}
-				}
-			}
-		}
-
-	}
-
-	public static Dictionary getInstance(){
-		return Dictionary.singleton;
-	}
-
-	public static void loadExtendWords(Collection<String> extWords){
-		if(extWords != null){
-			for(String extWord : extWords){
-				if (extWord != null) {
-
-					singleton._MainDict.fillSegment(extWord.trim().toCharArray());
-				}
-			}
-		}
-	}
-
-
-	public static void loadExtendStopWords(Collection<String> extStopWords){
-		if(extStopWords != null){
-			for(String extStopWord : extStopWords){
-				if (extStopWord != null) {
-
-					singleton._StopWords.fillSegment(extStopWord.trim().toCharArray());
-				}
-			}
-		}
-	}
-
-
-	public static Hit matchInMainDict(char[] charArray){
-		return singleton._MainDict.match(charArray);
-	}
-
-
-	public static Hit matchInMainDict(char[] charArray , int begin, int length){
-		return singleton._MainDict.match(charArray, begin, length);
-	}
-
-
-	public static Hit matchInMainDictWithHit(char[] charArray , int currentIndex , Hit matchedHit){
-		DictSegment ds = matchedHit.getMatchedDictSegment();
-		return ds.match(charArray, currentIndex, 1 , matchedHit);
-	}
-
-	
-	public static Hit matchInSurnameDict(char[] charArray , int begin, int length){
-		return singleton._SurnameDict.match(charArray, begin, length);
-	}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-	/**
-	 * 检索匹配量词词典
-	 * @param charArray
-	 * @param begin
-	 * @param length
-	 * @return Hit 匹配结果描述
-	 */
-	public static Hit matchInQuantifierDict(char[] charArray , int begin, int length){
-		return singleton._QuantifierDict.match(charArray, begin, length);
-	}
-
-	/**
-	 * 检索匹配在后缀词典
-	 * @param charArray
-	 * @param begin
-	 * @param length
-	 * @return Hit 匹配结果描述
-	 */
-	public static Hit matchInSuffixDict(char[] charArray , int begin, int length){
-		return singleton._SuffixDict.match(charArray, begin, length);
-	}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-	/**
-	 * 检索匹配介词、副词词典
-	 * @param charArray
-	 * @param begin
-	 * @param length
-	 * @return  Hit 匹配结果描述
-	 */
-	public static Hit matchInPrepDict(char[] charArray , int begin, int length){
-		return singleton._PrepDict.match(charArray, begin, length);
-	}
-
-	/**
-	 * 判断是否是停止词
-	 * @param charArray
-	 * @param begin
-	 * @param length
-	 * @return boolean
-	 */
-	public static boolean isStopWord(char[] charArray , int begin, int length){			
-		return singleton._StopWords.match(charArray, begin, length).isMatch();
-	}	
 }
--- a/src/main/java/org/wltea/analyzer/dic/Hit.java
+++ b/src/main/java/org/wltea/analyzer/dic/Hit.java
@ -58,7 +58,9 @@ public class Hit {
 	public boolean isMatch() {
 		return (this.hitState & MATCH) > 0;
 	}
-
+	/**
+	 * 
+	 */
 	public void setMatch() {
 		this.hitState = this.hitState | MATCH;
 	}
@ -69,7 +71,9 @@ public class Hit {
 	public boolean isPrefix() {
 		return (this.hitState & PREFIX) > 0;
 	}
-
+	/**
+	 * 
+	 */
 	public void setPrefix() {
 		this.hitState = this.hitState | PREFIX;
 	}
@ -79,7 +83,9 @@ public class Hit {
 	public boolean isUnmatch() {
 		return this.hitState == UNMATCH ;
 	}
-
+	/**
+	 * 
+	 */
 	public void setUnmatch() {
 		this.hitState = UNMATCH;
 	}
--- a/src/main/java/org/wltea/analyzer/lucene/IKAnalyzer.java
+++ b/src/main/java/org/wltea/analyzer/lucene/IKAnalyzer.java
@ -1,51 +1,87 @@
 /**
+ * IK 中文分词  版本 5.0.1
+ * IK Analyzer release 5.0.1
+ * 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * 源代码由林良益(linliangyi2005@gmail.com)提供
+ * 版权声明 2012，乌龙茶工作室
+ * provided by Linliangyi and copyright 2012 by Oolong studio
 * 
 */
 package org.wltea.analyzer.lucene;

+import java.io.Reader;
+
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
 import org.elasticsearch.common.settings.Settings;
 import org.wltea.analyzer.dic.Dictionary;

-import java.io.Reader;
+/**
+ * IK分词器，Lucene Analyzer接口实现
+ * 兼容Lucene 4.0版本
+ */
+public final class IKAnalyzer extends Analyzer{
 	
-public final class IKAnalyzer extends Analyzer {
+	private boolean useSmart;
 	
-	private boolean isMaxWordLength = false;
-    private boolean useSmart=false;
+	public boolean useSmart() {
+		return useSmart;
+	}

-    public IKAnalyzer(){
+	public void setUseSmart(boolean useSmart) {
+		this.useSmart = useSmart;
+	}
+
+	/**
+	 * IK分词器Lucene  Analyzer接口实现类
+	 * 
+	 * 默认细粒度切分算法
+	 */
+	public IKAnalyzer(){
 		this(false);
 	}
 	
-
-	public IKAnalyzer(boolean isMaxWordLength){
+	/**
+	 * IK分词器Lucene Analyzer接口实现类
+	 * 
+	 * @param useSmart 当为true时，分词器进行智能切分
+	 */
+	public IKAnalyzer(boolean useSmart){
 		super();
-		this.setMaxWordLength(isMaxWordLength);
+		this.useSmart = useSmart;
 	}

    public IKAnalyzer(Settings indexSetting,Settings settings1) {
        super();
-       Dictionary.getInstance().Init(indexSetting);
+        Dictionary.getInstance().Init(indexSetting);

        if(settings1.get("use_smart", "true").equals("true")){
-            useSmart=true;
+            useSmart = true;
        }
    }

-
-    @Override
-	public TokenStream tokenStream(String fieldName, Reader reader) {
-		return new IKTokenizer(reader , useSmart);
-	}
-
-	public void setMaxWordLength(boolean isMaxWordLength) {
-		this.isMaxWordLength = isMaxWordLength;
-	}
-
-	public boolean isMaxWordLength() {
-		return isMaxWordLength;
+	/**
+	 * 重载Analyzer接口，构造分词组件
+	 */
+	@Override
+	protected TokenStreamComponents createComponents(String fieldName, final Reader in) {
+		Tokenizer _IKTokenizer = new IKTokenizer(in , this.useSmart());
+		return new TokenStreamComponents(_IKTokenizer);
 	}

 }
--- a/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java
+++ b/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java
@ -26,88 +26,89 @@
 */
 package org.wltea.analyzer.lucene;

+import java.io.IOException;
+import java.io.Reader;
+
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+
 import org.wltea.analyzer.core.IKSegmenter;
 import org.wltea.analyzer.core.Lexeme;

-import java.io.IOException;
-import java.io.Reader;
-
 /**
 * IK分词器 Lucene Tokenizer适配器类
 * 兼容Lucene 4.0版本
 */
 public final class IKTokenizer extends Tokenizer {
 	
-    //IK分词器实现
-    private IKSegmenter _IKImplement;
+	//IK分词器实现
+	private IKSegmenter _IKImplement;
 	
-    //词元文本属性
-    private final CharTermAttribute termAtt;
-    //词元位移属性
-    private final OffsetAttribute offsetAtt;
-    //词元分类属性（该属性分类参考org.wltea.analyzer.core.Lexeme中的分类常量）
-    private final TypeAttribute typeAtt;
-    //记录最后一个词元的结束位置
-    private int endPosition;
+	//词元文本属性
+	private final CharTermAttribute termAtt;
+	//词元位移属性
+	private final OffsetAttribute offsetAtt;
+	//词元分类属性（该属性分类参考org.wltea.analyzer.core.Lexeme中的分类常量）
+	private final TypeAttribute typeAtt;
+	//记录最后一个词元的结束位置
+	private int endPosition;
 	
-    /**
-     * Lucene 4.0 Tokenizer适配器类构造函数
-     * @param in
-     * @param useSmart
-     */
-    public IKTokenizer(Reader in , boolean useSmart){
-        super(in);
-        offsetAtt = addAttribute(OffsetAttribute.class);
-        termAtt = addAttribute(CharTermAttribute.class);
-        typeAtt = addAttribute(TypeAttribute.class);
-        _IKImplement = new IKSegmenter(input , useSmart);
-    }
+	/**
+	 * Lucene 4.0 Tokenizer适配器类构造函数
+	 * @param in
+	 * @param useSmart
+	 */
+	public IKTokenizer(Reader in , boolean useSmart){
+	    super(in);
+	    offsetAtt = addAttribute(OffsetAttribute.class);
+	    termAtt = addAttribute(CharTermAttribute.class);
+	    typeAtt = addAttribute(TypeAttribute.class);
+		_IKImplement = new IKSegmenter(input , useSmart);
+	}

-    /* (non-Javadoc)
-     * @see org.apache.lucene.analysis.TokenStream#incrementToken()
-     */
-    @Override
-    public boolean incrementToken() throws IOException {
-        //清除所有的词元属性
-        clearAttributes();
-        Lexeme nextLexeme = _IKImplement.next();
-        if(nextLexeme != null){
-            //将Lexeme转成Attributes
-            //设置词元文本
-            termAtt.append(nextLexeme.getLexemeText());
-            //设置词元长度
-            termAtt.setLength(nextLexeme.getLength());
-            //设置词元位移
-            offsetAtt.setOffset(nextLexeme.getBeginPosition(), nextLexeme.getEndPosition());
-            //记录分词的最后位置
-            endPosition = nextLexeme.getEndPosition();
-            //记录词元分类
-            typeAtt.setType(nextLexeme.getLexemeTypeString());
-            //返会true告知还有下个词元
-            return true;
-        }
-        //返会false告知词元输出完毕
-        return false;
-    }
+	/* (non-Javadoc)
+	 * @see org.apache.lucene.analysis.TokenStream#incrementToken()
+	 */
+	@Override
+	public boolean incrementToken() throws IOException {
+		//清除所有的词元属性
+		clearAttributes();
+		Lexeme nextLexeme = _IKImplement.next();
+		if(nextLexeme != null){
+			//将Lexeme转成Attributes
+			//设置词元文本
+			termAtt.append(nextLexeme.getLexemeText());
+			//设置词元长度
+			termAtt.setLength(nextLexeme.getLength());
+			//设置词元位移
+			offsetAtt.setOffset(nextLexeme.getBeginPosition(), nextLexeme.getEndPosition());
+			//记录分词的最后位置
+			endPosition = nextLexeme.getEndPosition();
+			//记录词元分类
+			typeAtt.setType(nextLexeme.getLexemeTypeString());			
+			//返会true告知还有下个词元
+			return true;
+		}
+		//返会false告知词元输出完毕
+		return false;
+	}
 	
-    /*
-     * (non-Javadoc)
-     * @see org.apache.lucene.analysis.Tokenizer#reset(java.io.Reader)
-     */
-    @Override
-    public void reset() throws IOException {
-        super.reset();
-        _IKImplement.reset(input);
-    }
+	/*
+	 * (non-Javadoc)
+	 * @see org.apache.lucene.analysis.Tokenizer#reset(java.io.Reader)
+	 */
+	@Override
+	public void reset() throws IOException {
+		super.reset();
+		_IKImplement.reset(input);
+	}	
 	
-    @Override
-    public final void end() {
-        // set final offset
-        int finalOffset = correctOffset(this.endPosition);
-        offsetAtt.setOffset(finalOffset, finalOffset);
-    }
+	@Override
+	public final void end() {
+	    // set final offset
+		int finalOffset = correctOffset(this.endPosition);
+		offsetAtt.setOffset(finalOffset, finalOffset);
+	}
 }
--- a/src/main/java/org/wltea/analyzer/query/IKQueryExpressionParser.java
+++ b/src/main/java/org/wltea/analyzer/query/IKQueryExpressionParser.java
--- a/src/main/java/org/wltea/analyzer/query/SWMCQueryBuilder.java
+++ b/src/main/java/org/wltea/analyzer/query/SWMCQueryBuilder.java
@ -1,153 +1,153 @@
-///**
-// * IK 中文分词  版本 5.0
-// * IK Analyzer release 5.0
-// *
-// * Licensed to the Apache Software Foundation (ASF) under one or more
-// * contributor license agreements.  See the NOTICE file distributed with
-// * this work for additional information regarding copyright ownership.
-// * The ASF licenses this file to You under the Apache License, Version 2.0
-// * (the "License"); you may not use this file except in compliance with
-// * the License.  You may obtain a copy of the License at
-// *
-// *     http://www.apache.org/licenses/LICENSE-2.0
-// *
-// * Unless required by applicable law or agreed to in writing, software
-// * distributed under the License is distributed on an "AS IS" BASIS,
-// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// * See the License for the specific language governing permissions and
-// * limitations under the License.
-// *
-// * 源代码由林良益(linliangyi2005@gmail.com)提供
-// * 版权声明 2012，乌龙茶工作室
-// * provided by Linliangyi and copyright 2012 by Oolong studio
-// *
-// */
-//package org.wltea.analyzer.query;
-//
-//import java.io.IOException;
-//import java.io.StringReader;
-//import java.util.ArrayList;
-//import java.util.List;
-//
-//import org.apache.lucene.analysis.standard.StandardAnalyzer;
-//import org.apache.lucene.queryparser.classic.ParseException;
-//import org.apache.lucene.queryparser.classic.QueryParser;
-//import org.apache.lucene.search.Query;
-//import org.apache.lucene.util.Version;
-//import org.wltea.analyzer.core.IKSegmenter;
-//import org.wltea.analyzer.core.Lexeme;
-//
-///**
-// * Single Word Multi Char Query Builder
-// * IK分词算法专用
-// * @author linliangyi
-// *
-// */
-//public class SWMCQueryBuilder {
-//
-//	/**
-//	 * 生成SWMCQuery
-//	 * @param fieldName
-//	 * @param keywords
-//	 * @param quickMode
-//	 * @return Lucene Query
-//	 */
-//	public static Query create(String fieldName ,String keywords , boolean quickMode){
-//		if(fieldName == null || keywords == null){
-//			throw new IllegalArgumentException("参数 fieldName 、 keywords 不能为null.");
-//		}
-//		//1.对keywords进行分词处理
-//		List<Lexeme> lexemes = doAnalyze(keywords);
-//		//2.根据分词结果，生成SWMCQuery
-//		Query _SWMCQuery = getSWMCQuery(fieldName , lexemes , quickMode);
-//		return _SWMCQuery;
-//	}
-//
-//	/**
-//	 * 分词切分，并返回结链表
-//	 * @param keywords
-//	 * @return
-//	 */
-//	private static List<Lexeme> doAnalyze(String keywords){
-//		List<Lexeme> lexemes = new ArrayList<Lexeme>();
-//		IKSegmenter ikSeg = new IKSegmenter(new StringReader(keywords) , true);
-//		try{
-//			Lexeme l = null;
-//			while( (l = ikSeg.next()) != null){
-//				lexemes.add(l);
-//			}
-//		}catch(IOException e){
-//			e.printStackTrace();
-//		}
-//		return lexemes;
-//	}
-//
-//
-//	/**
-//	 * 根据分词结果生成SWMC搜索
-//	 * @param fieldName
+/**
+ * IK 中文分词  版本 5.0
+ * IK Analyzer release 5.0
+ * 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * 源代码由林良益(linliangyi2005@gmail.com)提供
+ * 版权声明 2012，乌龙茶工作室
+ * provided by Linliangyi and copyright 2012 by Oolong studio
+ * 
+ */
+package org.wltea.analyzer.query;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.util.Version;
+import org.wltea.analyzer.core.IKSegmenter;
+import org.wltea.analyzer.core.Lexeme;
+
+/**
+ * Single Word Multi Char Query Builder
+ * IK分词算法专用
+ * @author linliangyi
+ *
+ */
+public class SWMCQueryBuilder {
+
+	/**
+	 * 生成SWMCQuery
+	 * @param fieldName
+	 * @param keywords
+	 * @param quickMode
+	 * @return Lucene Query
+	 */
+	public static Query create(String fieldName ,String keywords , boolean quickMode){
+		if(fieldName == null || keywords == null){
+			throw new IllegalArgumentException("参数 fieldName 、 keywords 不能为null.");
+		}
+		//1.对keywords进行分词处理
+		List<Lexeme> lexemes = doAnalyze(keywords);
+		//2.根据分词结果，生成SWMCQuery
+		Query _SWMCQuery = getSWMCQuery(fieldName , lexemes , quickMode);
+		return _SWMCQuery;
+	}
+	
+	/**
+	 * 分词切分，并返回结链表
+	 * @param keywords
+	 * @return
+	 */
+	private static List<Lexeme> doAnalyze(String keywords){
+		List<Lexeme> lexemes = new ArrayList<Lexeme>();
+		IKSegmenter ikSeg = new IKSegmenter(new StringReader(keywords) , true);
+		try{
+			Lexeme l = null;
+			while( (l = ikSeg.next()) != null){
+				lexemes.add(l);
+			}
+		}catch(IOException e){
+			e.printStackTrace();
+		}
+		return lexemes;
+	}
+	
+	
+	/**
+	 * 根据分词结果生成SWMC搜索
+	 * @param fieldName
 //	 * @param pathOption
-//	 * @param quickMode
-//	 * @return
-//	 */
-//	private static Query getSWMCQuery(String fieldName , List<Lexeme> lexemes , boolean quickMode){
-//		//构造SWMC的查询表达式
-//		StringBuffer keywordBuffer = new StringBuffer();
-//		//精简的SWMC的查询表达式
-//		StringBuffer keywordBuffer_Short = new StringBuffer();
-//		//记录最后词元长度
-//		int lastLexemeLength = 0;
-//		//记录最后词元结束位置
-//		int lastLexemeEnd = -1;
-//
-//		int shortCount = 0;
-//		int totalCount = 0;
-//		for(Lexeme l : lexemes){
-//			totalCount += l.getLength();
-//			//精简表达式
-//			if(l.getLength() > 1){
-//				keywordBuffer_Short.append(' ').append(l.getLexemeText());
-//				shortCount += l.getLength();
-//			}
-//
-//			if(lastLexemeLength == 0){
-//				keywordBuffer.append(l.getLexemeText());
-//			}else if(lastLexemeLength == 1 && l.getLength() == 1
-//					&& lastLexemeEnd == l.getBeginPosition()){//单字位置相邻，长度为一，合并)
-//				keywordBuffer.append(l.getLexemeText());
-//			}else{
-//				keywordBuffer.append(' ').append(l.getLexemeText());
-//
-//			}
-//			lastLexemeLength = l.getLength();
-//			lastLexemeEnd = l.getEndPosition();
-//		}
-//
-//		//借助lucene queryparser 生成SWMC Query
-//		QueryParser qp = new QueryParser(Version.LUCENE_40, fieldName, new StandardAnalyzer(Version.LUCENE_40));
-//		qp.setDefaultOperator(QueryParser.AND_OPERATOR);
-//		qp.setAutoGeneratePhraseQueries(true);
-//
-//		if(quickMode && (shortCount * 1.0f / totalCount) > 0.5f){
-//			try {
-//				//System.out.println(keywordBuffer.toString());
-//				Query q = qp.parse(keywordBuffer_Short.toString());
-//				return q;
-//			} catch (ParseException e) {
-//				e.printStackTrace();
-//			}
-//
-//		}else{
-//			if(keywordBuffer.length() > 0){
-//				try {
-//					//System.out.println(keywordBuffer.toString());
-//					Query q = qp.parse(keywordBuffer.toString());
-//					return q;
-//				} catch (ParseException e) {
-//					e.printStackTrace();
-//				}
-//			}
-//		}
-//		return null;
-//	}
-//}
+	 * @param quickMode
+	 * @return
+	 */
+	private static Query getSWMCQuery(String fieldName , List<Lexeme> lexemes , boolean quickMode){
+		//构造SWMC的查询表达式
+		StringBuffer keywordBuffer = new StringBuffer();
+		//精简的SWMC的查询表达式
+		StringBuffer keywordBuffer_Short = new StringBuffer();
+		//记录最后词元长度
+		int lastLexemeLength = 0;
+		//记录最后词元结束位置
+		int lastLexemeEnd = -1;
+		
+		int shortCount = 0;
+		int totalCount = 0;
+		for(Lexeme l : lexemes){
+			totalCount += l.getLength();
+			//精简表达式
+			if(l.getLength() > 1){
+				keywordBuffer_Short.append(' ').append(l.getLexemeText());
+				shortCount += l.getLength();
+			}
+			
+			if(lastLexemeLength == 0){
+				keywordBuffer.append(l.getLexemeText());				
+			}else if(lastLexemeLength == 1 && l.getLength() == 1
+					&& lastLexemeEnd == l.getBeginPosition()){//单字位置相邻，长度为一，合并)
+				keywordBuffer.append(l.getLexemeText());
+			}else{
+				keywordBuffer.append(' ').append(l.getLexemeText());
+				
+			}
+			lastLexemeLength = l.getLength();
+			lastLexemeEnd = l.getEndPosition();
+		}
+
+		//借助lucene queryparser 生成SWMC Query
+		QueryParser qp = new QueryParser(Version.LUCENE_40, fieldName, new StandardAnalyzer(Version.LUCENE_40));
+		qp.setDefaultOperator(QueryParser.AND_OPERATOR);
+		qp.setAutoGeneratePhraseQueries(true);
+		
+		if(quickMode && (shortCount * 1.0f / totalCount) > 0.5f){
+			try {
+				//System.out.println(keywordBuffer.toString());
+				Query q = qp.parse(keywordBuffer_Short.toString());
+				return q;
+			} catch (ParseException e) {
+				e.printStackTrace();
+			}
+			
+		}else{
+			if(keywordBuffer.length() > 0){
+				try {
+					//System.out.println(keywordBuffer.toString());
+					Query q = qp.parse(keywordBuffer.toString());
+					return q;
+				} catch (ParseException e) {
+					e.printStackTrace();
+				}
+			}
+		}
+		return null;
+	}
+}
--- a/src/main/java/org/wltea/analyzer/sample/LuceneIndexAndSearchDemo.java
+++ b/src/main/java/org/wltea/analyzer/sample/LuceneIndexAndSearchDemo.java
@ -1,147 +1,147 @@
-///**
-// * IK 中文分词  版本 5.0
-// * IK Analyzer release 5.0
-// *
-// * Licensed to the Apache Software Foundation (ASF) under one or more
-// * contributor license agreements.  See the NOTICE file distributed with
-// * this work for additional information regarding copyright ownership.
-// * The ASF licenses this file to You under the Apache License, Version 2.0
-// * (the "License"); you may not use this file except in compliance with
-// * the License.  You may obtain a copy of the License at
-// *
-// *     http://www.apache.org/licenses/LICENSE-2.0
-// *
-// * Unless required by applicable law or agreed to in writing, software
-// * distributed under the License is distributed on an "AS IS" BASIS,
-// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// * See the License for the specific language governing permissions and
-// * limitations under the License.
-// *
-// * 源代码由林良益(linliangyi2005@gmail.com)提供
-// * 版权声明 2012，乌龙茶工作室
-// * provided by Linliangyi and copyright 2012 by Oolong studio
-// *
-// *
-// */
-//package org.wltea.analyzer.sample;
-//
-//import java.io.IOException;
-//
-//import org.apache.lucene.analysis.Analyzer;
-//import org.apache.lucene.document.Document;
-//import org.apache.lucene.document.Field;
-//import org.apache.lucene.document.StringField;
-//import org.apache.lucene.document.TextField;
-//import org.apache.lucene.index.CorruptIndexException;
-//import org.apache.lucene.index.DirectoryReader;
-//import org.apache.lucene.index.IndexReader;
-//import org.apache.lucene.index.IndexWriter;
-//import org.apache.lucene.index.IndexWriterConfig;
-//import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-//import org.apache.lucene.queryparser.classic.ParseException;
-//import org.apache.lucene.queryparser.classic.QueryParser;
-//import org.apache.lucene.search.IndexSearcher;
-//import org.apache.lucene.search.Query;
-//import org.apache.lucene.search.ScoreDoc;
-//import org.apache.lucene.search.TopDocs;
-//import org.apache.lucene.store.Directory;
-//import org.apache.lucene.store.LockObtainFailedException;
-//import org.apache.lucene.store.RAMDirectory;
-//import org.apache.lucene.util.Version;
-//import org.wltea.analyzer.lucene.IKAnalyzer;
-//
-//
-//
-//
-///**
-// * 使用IKAnalyzer进行Lucene索引和查询的演示
-// * 2012-3-2
-// *
-// * 以下是结合Lucene4.0 API的写法
-// *
-// */
-//public class LuceneIndexAndSearchDemo {
-//
-//
-//	/**
-//	 * 模拟：
-//	 * 创建一个单条记录的索引，并对其进行搜索
-//	 * @param args
-//	 */
-//	public static void main(String[] args){
-//		//Lucene Document的域名
-//		String fieldName = "text";
-//		 //检索内容
-//		String text = "IK Analyzer是一个结合词典分词和文法分词的中文分词开源工具包。它使用了全新的正向迭代最细粒度切分算法。";
-//
-//		//实例化IKAnalyzer分词器
-//		Analyzer analyzer = new IKAnalyzer(true);
-//
-//		Directory directory = null;
-//		IndexWriter iwriter = null;
-//		IndexReader ireader = null;
-//		IndexSearcher isearcher = null;
-//		try {
-//			//建立内存索引对象
-//			directory = new RAMDirectory();
-//
-//			//配置IndexWriterConfig
-//			IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_40 , analyzer);
-//			iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
-//			iwriter = new IndexWriter(directory , iwConfig);
-//			//写入索引
-//			Document doc = new Document();
-//			doc.add(new StringField("ID", "10000", Field.Store.YES));
-//			doc.add(new TextField(fieldName, text, Field.Store.YES));
-//			iwriter.addDocument(doc);
-//			iwriter.close();
-//
-//
-//			//搜索过程**********************************
-//		    //实例化搜索器
-//			ireader = DirectoryReader.open(directory);
-//			isearcher = new IndexSearcher(ireader);
-//
-//			String keyword = "中文分词工具包";
-//			//使用QueryParser查询分析器构造Query对象
-//			QueryParser qp = new QueryParser(Version.LUCENE_40, fieldName,  analyzer);
-//			qp.setDefaultOperator(QueryParser.AND_OPERATOR);
-//			Query query = qp.parse(keyword);
-//			System.out.println("Query = " + query);
-//
-//			//搜索相似度最高的5条记录
-//			TopDocs topDocs = isearcher.search(query , 5);
-//			System.out.println("命中：" + topDocs.totalHits);
-//			//输出结果
-//			ScoreDoc[] scoreDocs = topDocs.scoreDocs;
-//			for (int i = 0; i < topDocs.totalHits; i++){
-//				Document targetDoc = isearcher.doc(scoreDocs[i].doc);
-//				System.out.println("内容：" + targetDoc.toString());
-//			}
-//
-//		} catch (CorruptIndexException e) {
-//			e.printStackTrace();
-//		} catch (LockObtainFailedException e) {
-//			e.printStackTrace();
-//		} catch (IOException e) {
-//			e.printStackTrace();
-//		} catch (ParseException e) {
-//			e.printStackTrace();
-//		} finally{
-//			if(ireader != null){
-//				try {
-//					ireader.close();
-//				} catch (IOException e) {
-//					e.printStackTrace();
-//				}
-//			}
-//			if(directory != null){
-//				try {
-//					directory.close();
-//				} catch (IOException e) {
-//					e.printStackTrace();
-//				}
-//			}
-//		}
-//	}
-//}
+/**
+ * IK 中文分词  版本 5.0
+ * IK Analyzer release 5.0
+ * 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * 源代码由林良益(linliangyi2005@gmail.com)提供
+ * 版权声明 2012，乌龙茶工作室
+ * provided by Linliangyi and copyright 2012 by Oolong studio
+ * 
+ * 
+ */
+package org.wltea.analyzer.sample;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.QueryParser;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.LockObtainFailedException;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.Version;
+import org.wltea.analyzer.lucene.IKAnalyzer;
+
+
+
+
+/**
+ * 使用IKAnalyzer进行Lucene索引和查询的演示
+ * 2012-3-2
+ * 
+ * 以下是结合Lucene4.0 API的写法
+ *
+ */
+public class LuceneIndexAndSearchDemo {
+	
+	
+	/**
+	 * 模拟：
+	 * 创建一个单条记录的索引，并对其进行搜索
+	 * @param args
+	 */
+	public static void main(String[] args){
+		//Lucene Document的域名
+		String fieldName = "text";
+		 //检索内容
+		String text = "IK Analyzer是一个结合词典分词和文法分词的中文分词开源工具包。它使用了全新的正向迭代最细粒度切分算法。";
+		
+		//实例化IKAnalyzer分词器
+		Analyzer analyzer = new IKAnalyzer(true);
+		
+		Directory directory = null;
+		IndexWriter iwriter = null;
+		IndexReader ireader = null;
+		IndexSearcher isearcher = null;
+		try {
+			//建立内存索引对象
+			directory = new RAMDirectory();	 
+			
+			//配置IndexWriterConfig
+			IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_40 , analyzer);
+			iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
+			iwriter = new IndexWriter(directory , iwConfig);
+			//写入索引
+			Document doc = new Document();
+			doc.add(new StringField("ID", "10000", Field.Store.YES));
+			doc.add(new TextField(fieldName, text, Field.Store.YES));
+			iwriter.addDocument(doc);
+			iwriter.close();
+			
+			
+			//搜索过程**********************************
+		    //实例化搜索器   
+			ireader = DirectoryReader.open(directory);
+			isearcher = new IndexSearcher(ireader);			
+			
+			String keyword = "中文分词工具包";			
+			//使用QueryParser查询分析器构造Query对象
+			QueryParser qp = new QueryParser(Version.LUCENE_40, fieldName,  analyzer);
+			qp.setDefaultOperator(QueryParser.AND_OPERATOR);
+			Query query = qp.parse(keyword);
+			System.out.println("Query = " + query);
+			
+			//搜索相似度最高的5条记录
+			TopDocs topDocs = isearcher.search(query , 5);
+			System.out.println("命中：" + topDocs.totalHits);
+			//输出结果
+			ScoreDoc[] scoreDocs = topDocs.scoreDocs;
+			for (int i = 0; i < topDocs.totalHits; i++){
+				Document targetDoc = isearcher.doc(scoreDocs[i].doc);
+				System.out.println("内容：" + targetDoc.toString());
+			}			
+			
+		} catch (CorruptIndexException e) {
+			e.printStackTrace();
+		} catch (LockObtainFailedException e) {
+			e.printStackTrace();
+		} catch (IOException e) {
+			e.printStackTrace();
+		} catch (ParseException e) {
+			e.printStackTrace();
+		} finally{
+			if(ireader != null){
+				try {
+					ireader.close();
+				} catch (IOException e) {
+					e.printStackTrace();
+				}
+			}
+			if(directory != null){
+				try {
+					directory.close();
+				} catch (IOException e) {
+					e.printStackTrace();
+				}
+			}
+		}
+	}
+}