remove unused classes
This commit is contained in:
parent
341b586373
commit
7e86d7390a
@ -1,716 +0,0 @@
|
||||
/**
|
||||
* IK 中文分词 版本 5.0
|
||||
* IK Analyzer release 5.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* 源代码由林良益(linliangyi2005@gmail.com)提供
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.query;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.common.logging.ESLogger;
|
||||
import org.elasticsearch.common.logging.Loggers;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Stack;
|
||||
|
||||
/**
|
||||
* IK简易查询表达式解析
|
||||
* 结合SWMCQuery算法
|
||||
*
|
||||
* 表达式例子 :
|
||||
* (id='1231231' && title:'monkey') || (content:'你好吗' || ulr='www.ik.com') - name:'helloword'
|
||||
* @author linliangyi
|
||||
*
|
||||
*/
|
||||
public class IKQueryExpressionParser {
|
||||
|
||||
public static final ESLogger logger= Loggers.getLogger("ik-analyzer");
|
||||
|
||||
//public static final String LUCENE_SPECIAL_CHAR = "&&||-()':={}[],";
|
||||
|
||||
private List<Element> elements = new ArrayList<Element>();
|
||||
|
||||
private Stack<Query> querys = new Stack<Query>();
|
||||
|
||||
private Stack<Element> operates = new Stack<Element>();
|
||||
|
||||
/**
|
||||
* 解析查询表达式,生成Lucene Query对象
|
||||
*
|
||||
* @param expression
|
||||
* @param quickMode
|
||||
* @return Lucene query
|
||||
*/
|
||||
public Query parseExp(String expression , boolean quickMode){
|
||||
Query lucenceQuery = null;
|
||||
if(expression != null && !"".equals(expression)){
|
||||
try{
|
||||
//文法解析
|
||||
this.splitElements(expression);
|
||||
//语法解析
|
||||
this.parseSyntax(quickMode);
|
||||
if(this.querys.size() == 1){
|
||||
lucenceQuery = this.querys.pop();
|
||||
}else{
|
||||
throw new IllegalStateException("表达式异常: 缺少逻辑操作符 或 括号缺失");
|
||||
}
|
||||
}finally{
|
||||
elements.clear();
|
||||
querys.clear();
|
||||
operates.clear();
|
||||
}
|
||||
}
|
||||
return lucenceQuery;
|
||||
}
|
||||
|
||||
/**
|
||||
* 表达式文法解析
|
||||
* @param expression
|
||||
*/
|
||||
private void splitElements(String expression){
|
||||
|
||||
if(expression == null){
|
||||
return;
|
||||
}
|
||||
Element curretElement = null;
|
||||
|
||||
char[] expChars = expression.toCharArray();
|
||||
for(int i = 0 ; i < expChars.length ; i++){
|
||||
switch(expChars[i]){
|
||||
case '&' :
|
||||
if(curretElement == null){
|
||||
curretElement = new Element();
|
||||
curretElement.type = '&';
|
||||
curretElement.append(expChars[i]);
|
||||
}else if(curretElement.type == '&'){
|
||||
curretElement.append(expChars[i]);
|
||||
this.elements.add(curretElement);
|
||||
curretElement = null;
|
||||
}else if(curretElement.type == '\''){
|
||||
curretElement.append(expChars[i]);
|
||||
}else {
|
||||
this.elements.add(curretElement);
|
||||
curretElement = new Element();
|
||||
curretElement.type = '&';
|
||||
curretElement.append(expChars[i]);
|
||||
}
|
||||
break;
|
||||
|
||||
case '|' :
|
||||
if(curretElement == null){
|
||||
curretElement = new Element();
|
||||
curretElement.type = '|';
|
||||
curretElement.append(expChars[i]);
|
||||
}else if(curretElement.type == '|'){
|
||||
curretElement.append(expChars[i]);
|
||||
this.elements.add(curretElement);
|
||||
curretElement = null;
|
||||
}else if(curretElement.type == '\''){
|
||||
curretElement.append(expChars[i]);
|
||||
}else {
|
||||
this.elements.add(curretElement);
|
||||
curretElement = new Element();
|
||||
curretElement.type = '|';
|
||||
curretElement.append(expChars[i]);
|
||||
}
|
||||
break;
|
||||
|
||||
case '-' :
|
||||
if(curretElement != null){
|
||||
if(curretElement.type == '\''){
|
||||
curretElement.append(expChars[i]);
|
||||
continue;
|
||||
}else{
|
||||
this.elements.add(curretElement);
|
||||
}
|
||||
}
|
||||
curretElement = new Element();
|
||||
curretElement.type = '-';
|
||||
curretElement.append(expChars[i]);
|
||||
this.elements.add(curretElement);
|
||||
curretElement = null;
|
||||
break;
|
||||
|
||||
case '(' :
|
||||
if(curretElement != null){
|
||||
if(curretElement.type == '\''){
|
||||
curretElement.append(expChars[i]);
|
||||
continue;
|
||||
}else{
|
||||
this.elements.add(curretElement);
|
||||
}
|
||||
}
|
||||
curretElement = new Element();
|
||||
curretElement.type = '(';
|
||||
curretElement.append(expChars[i]);
|
||||
this.elements.add(curretElement);
|
||||
curretElement = null;
|
||||
break;
|
||||
|
||||
case ')' :
|
||||
if(curretElement != null){
|
||||
if(curretElement.type == '\''){
|
||||
curretElement.append(expChars[i]);
|
||||
continue;
|
||||
}else{
|
||||
this.elements.add(curretElement);
|
||||
}
|
||||
}
|
||||
curretElement = new Element();
|
||||
curretElement.type = ')';
|
||||
curretElement.append(expChars[i]);
|
||||
this.elements.add(curretElement);
|
||||
curretElement = null;
|
||||
break;
|
||||
|
||||
case ':' :
|
||||
if(curretElement != null){
|
||||
if(curretElement.type == '\''){
|
||||
curretElement.append(expChars[i]);
|
||||
continue;
|
||||
}else{
|
||||
this.elements.add(curretElement);
|
||||
}
|
||||
}
|
||||
curretElement = new Element();
|
||||
curretElement.type = ':';
|
||||
curretElement.append(expChars[i]);
|
||||
this.elements.add(curretElement);
|
||||
curretElement = null;
|
||||
break;
|
||||
|
||||
case '=' :
|
||||
if(curretElement != null){
|
||||
if(curretElement.type == '\''){
|
||||
curretElement.append(expChars[i]);
|
||||
continue;
|
||||
}else{
|
||||
this.elements.add(curretElement);
|
||||
}
|
||||
}
|
||||
curretElement = new Element();
|
||||
curretElement.type = '=';
|
||||
curretElement.append(expChars[i]);
|
||||
this.elements.add(curretElement);
|
||||
curretElement = null;
|
||||
break;
|
||||
|
||||
case ' ' :
|
||||
if(curretElement != null){
|
||||
if(curretElement.type == '\''){
|
||||
curretElement.append(expChars[i]);
|
||||
}else{
|
||||
this.elements.add(curretElement);
|
||||
curretElement = null;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case '\'' :
|
||||
if(curretElement == null){
|
||||
curretElement = new Element();
|
||||
curretElement.type = '\'';
|
||||
|
||||
}else if(curretElement.type == '\''){
|
||||
this.elements.add(curretElement);
|
||||
curretElement = null;
|
||||
|
||||
}else{
|
||||
this.elements.add(curretElement);
|
||||
curretElement = new Element();
|
||||
curretElement.type = '\'';
|
||||
|
||||
}
|
||||
break;
|
||||
|
||||
case '[':
|
||||
if(curretElement != null){
|
||||
if(curretElement.type == '\''){
|
||||
curretElement.append(expChars[i]);
|
||||
continue;
|
||||
}else{
|
||||
this.elements.add(curretElement);
|
||||
}
|
||||
}
|
||||
curretElement = new Element();
|
||||
curretElement.type = '[';
|
||||
curretElement.append(expChars[i]);
|
||||
this.elements.add(curretElement);
|
||||
curretElement = null;
|
||||
break;
|
||||
|
||||
case ']':
|
||||
if(curretElement != null){
|
||||
if(curretElement.type == '\''){
|
||||
curretElement.append(expChars[i]);
|
||||
continue;
|
||||
}else{
|
||||
this.elements.add(curretElement);
|
||||
}
|
||||
}
|
||||
curretElement = new Element();
|
||||
curretElement.type = ']';
|
||||
curretElement.append(expChars[i]);
|
||||
this.elements.add(curretElement);
|
||||
curretElement = null;
|
||||
|
||||
break;
|
||||
|
||||
case '{':
|
||||
if(curretElement != null){
|
||||
if(curretElement.type == '\''){
|
||||
curretElement.append(expChars[i]);
|
||||
continue;
|
||||
}else{
|
||||
this.elements.add(curretElement);
|
||||
}
|
||||
}
|
||||
curretElement = new Element();
|
||||
curretElement.type = '{';
|
||||
curretElement.append(expChars[i]);
|
||||
this.elements.add(curretElement);
|
||||
curretElement = null;
|
||||
break;
|
||||
|
||||
case '}':
|
||||
if(curretElement != null){
|
||||
if(curretElement.type == '\''){
|
||||
curretElement.append(expChars[i]);
|
||||
continue;
|
||||
}else{
|
||||
this.elements.add(curretElement);
|
||||
}
|
||||
}
|
||||
curretElement = new Element();
|
||||
curretElement.type = '}';
|
||||
curretElement.append(expChars[i]);
|
||||
this.elements.add(curretElement);
|
||||
curretElement = null;
|
||||
|
||||
break;
|
||||
case ',':
|
||||
if(curretElement != null){
|
||||
if(curretElement.type == '\''){
|
||||
curretElement.append(expChars[i]);
|
||||
continue;
|
||||
}else{
|
||||
this.elements.add(curretElement);
|
||||
}
|
||||
}
|
||||
curretElement = new Element();
|
||||
curretElement.type = ',';
|
||||
curretElement.append(expChars[i]);
|
||||
this.elements.add(curretElement);
|
||||
curretElement = null;
|
||||
|
||||
break;
|
||||
|
||||
default :
|
||||
if(curretElement == null){
|
||||
curretElement = new Element();
|
||||
curretElement.type = 'F';
|
||||
curretElement.append(expChars[i]);
|
||||
|
||||
}else if(curretElement.type == 'F'){
|
||||
curretElement.append(expChars[i]);
|
||||
|
||||
}else if(curretElement.type == '\''){
|
||||
curretElement.append(expChars[i]);
|
||||
|
||||
}else{
|
||||
this.elements.add(curretElement);
|
||||
curretElement = new Element();
|
||||
curretElement.type = 'F';
|
||||
curretElement.append(expChars[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(curretElement != null){
|
||||
this.elements.add(curretElement);
|
||||
curretElement = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 语法解析
|
||||
*
|
||||
*/
|
||||
private void parseSyntax(boolean quickMode){
|
||||
for(int i = 0 ; i < this.elements.size() ; i++){
|
||||
Element e = this.elements.get(i);
|
||||
if('F' == e.type){
|
||||
Element e2 = this.elements.get(i + 1);
|
||||
if('=' != e2.type && ':' != e2.type){
|
||||
throw new IllegalStateException("表达式异常: = 或 : 号丢失");
|
||||
}
|
||||
Element e3 = this.elements.get(i + 2);
|
||||
//处理 = 和 : 运算
|
||||
if('\'' == e3.type){
|
||||
i+=2;
|
||||
if('=' == e2.type){
|
||||
TermQuery tQuery = new TermQuery(new Term(e.toString() , e3.toString()));
|
||||
this.querys.push(tQuery);
|
||||
}else if(':' == e2.type){
|
||||
String keyword = e3.toString();
|
||||
//SWMCQuery Here
|
||||
Query _SWMCQuery = SWMCQueryBuilder.create(e.toString(), keyword , quickMode);
|
||||
this.querys.push(_SWMCQuery);
|
||||
}
|
||||
|
||||
}else if('[' == e3.type || '{' == e3.type){
|
||||
i+=2;
|
||||
//处理 [] 和 {}
|
||||
LinkedList<Element> eQueue = new LinkedList<Element>();
|
||||
eQueue.add(e3);
|
||||
for( i++ ; i < this.elements.size() ; i++){
|
||||
Element eN = this.elements.get(i);
|
||||
eQueue.add(eN);
|
||||
if(']' == eN.type || '}' == eN.type){
|
||||
break;
|
||||
}
|
||||
}
|
||||
//翻译RangeQuery
|
||||
Query rangeQuery = this.toTermRangeQuery(e , eQueue);
|
||||
this.querys.push(rangeQuery);
|
||||
}else{
|
||||
throw new IllegalStateException("表达式异常:匹配值丢失");
|
||||
}
|
||||
|
||||
}else if('(' == e.type){
|
||||
this.operates.push(e);
|
||||
|
||||
}else if(')' == e.type){
|
||||
boolean doPop = true;
|
||||
while(doPop && !this.operates.empty()){
|
||||
Element op = this.operates.pop();
|
||||
if('(' == op.type){
|
||||
doPop = false;
|
||||
}else {
|
||||
Query q = toBooleanQuery(op);
|
||||
this.querys.push(q);
|
||||
}
|
||||
|
||||
}
|
||||
}else{
|
||||
|
||||
if(this.operates.isEmpty()){
|
||||
this.operates.push(e);
|
||||
}else{
|
||||
boolean doPeek = true;
|
||||
while(doPeek && !this.operates.isEmpty()){
|
||||
Element eleOnTop = this.operates.peek();
|
||||
if('(' == eleOnTop.type){
|
||||
doPeek = false;
|
||||
this.operates.push(e);
|
||||
}else if(compare(e , eleOnTop) == 1){
|
||||
this.operates.push(e);
|
||||
doPeek = false;
|
||||
}else if(compare(e , eleOnTop) == 0){
|
||||
Query q = toBooleanQuery(eleOnTop);
|
||||
this.operates.pop();
|
||||
this.querys.push(q);
|
||||
}else{
|
||||
Query q = toBooleanQuery(eleOnTop);
|
||||
this.operates.pop();
|
||||
this.querys.push(q);
|
||||
}
|
||||
}
|
||||
|
||||
if(doPeek && this.operates.empty()){
|
||||
this.operates.push(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while(!this.operates.isEmpty()){
|
||||
Element eleOnTop = this.operates.pop();
|
||||
Query q = toBooleanQuery(eleOnTop);
|
||||
this.querys.push(q);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据逻辑操作符,生成BooleanQuery
|
||||
* @param op
|
||||
* @return
|
||||
*/
|
||||
private Query toBooleanQuery(Element op){
|
||||
if(this.querys.size() == 0){
|
||||
return null;
|
||||
}
|
||||
|
||||
BooleanQuery resultQuery = new BooleanQuery();
|
||||
|
||||
if(this.querys.size() == 1){
|
||||
return this.querys.get(0);
|
||||
}
|
||||
|
||||
Query q2 = this.querys.pop();
|
||||
Query q1 = this.querys.pop();
|
||||
if('&' == op.type){
|
||||
if(q1 != null){
|
||||
if(q1 instanceof BooleanQuery){
|
||||
BooleanClause[] clauses = ((BooleanQuery)q1).getClauses();
|
||||
if(clauses.length > 0
|
||||
&& clauses[0].getOccur() == Occur.MUST){
|
||||
for(BooleanClause c : clauses){
|
||||
resultQuery.add(c);
|
||||
}
|
||||
}else{
|
||||
resultQuery.add(q1,Occur.MUST);
|
||||
}
|
||||
|
||||
}else{
|
||||
//q1 instanceof TermQuery
|
||||
//q1 instanceof TermRangeQuery
|
||||
//q1 instanceof PhraseQuery
|
||||
//others
|
||||
resultQuery.add(q1,Occur.MUST);
|
||||
}
|
||||
}
|
||||
|
||||
if(q2 != null){
|
||||
if(q2 instanceof BooleanQuery){
|
||||
BooleanClause[] clauses = ((BooleanQuery)q2).getClauses();
|
||||
if(clauses.length > 0
|
||||
&& clauses[0].getOccur() == Occur.MUST){
|
||||
for(BooleanClause c : clauses){
|
||||
resultQuery.add(c);
|
||||
}
|
||||
}else{
|
||||
resultQuery.add(q2,Occur.MUST);
|
||||
}
|
||||
|
||||
}else{
|
||||
//q1 instanceof TermQuery
|
||||
//q1 instanceof TermRangeQuery
|
||||
//q1 instanceof PhraseQuery
|
||||
//others
|
||||
resultQuery.add(q2,Occur.MUST);
|
||||
}
|
||||
}
|
||||
|
||||
}else if('|' == op.type){
|
||||
if(q1 != null){
|
||||
if(q1 instanceof BooleanQuery){
|
||||
BooleanClause[] clauses = ((BooleanQuery)q1).getClauses();
|
||||
if(clauses.length > 0
|
||||
&& clauses[0].getOccur() == Occur.SHOULD){
|
||||
for(BooleanClause c : clauses){
|
||||
resultQuery.add(c);
|
||||
}
|
||||
}else{
|
||||
resultQuery.add(q1,Occur.SHOULD);
|
||||
}
|
||||
|
||||
}else{
|
||||
//q1 instanceof TermQuery
|
||||
//q1 instanceof TermRangeQuery
|
||||
//q1 instanceof PhraseQuery
|
||||
//others
|
||||
resultQuery.add(q1,Occur.SHOULD);
|
||||
}
|
||||
}
|
||||
|
||||
if(q2 != null){
|
||||
if(q2 instanceof BooleanQuery){
|
||||
BooleanClause[] clauses = ((BooleanQuery)q2).getClauses();
|
||||
if(clauses.length > 0
|
||||
&& clauses[0].getOccur() == Occur.SHOULD){
|
||||
for(BooleanClause c : clauses){
|
||||
resultQuery.add(c);
|
||||
}
|
||||
}else{
|
||||
resultQuery.add(q2,Occur.SHOULD);
|
||||
}
|
||||
}else{
|
||||
//q2 instanceof TermQuery
|
||||
//q2 instanceof TermRangeQuery
|
||||
//q2 instanceof PhraseQuery
|
||||
//others
|
||||
resultQuery.add(q2,Occur.SHOULD);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}else if('-' == op.type){
|
||||
if(q1 == null || q2 == null){
|
||||
throw new IllegalStateException("表达式异常:SubQuery 个数不匹配");
|
||||
}
|
||||
|
||||
if(q1 instanceof BooleanQuery){
|
||||
BooleanClause[] clauses = ((BooleanQuery)q1).getClauses();
|
||||
if(clauses.length > 0){
|
||||
for(BooleanClause c : clauses){
|
||||
resultQuery.add(c);
|
||||
}
|
||||
}else{
|
||||
resultQuery.add(q1,Occur.MUST);
|
||||
}
|
||||
|
||||
}else{
|
||||
//q1 instanceof TermQuery
|
||||
//q1 instanceof TermRangeQuery
|
||||
//q1 instanceof PhraseQuery
|
||||
//others
|
||||
resultQuery.add(q1,Occur.MUST);
|
||||
}
|
||||
|
||||
resultQuery.add(q2,Occur.MUST_NOT);
|
||||
}
|
||||
return resultQuery;
|
||||
}
|
||||
|
||||
/**
|
||||
* 组装TermRangeQuery
|
||||
* @param elements
|
||||
* @return
|
||||
*/
|
||||
private TermRangeQuery toTermRangeQuery(Element fieldNameEle , LinkedList<Element> elements){
|
||||
|
||||
boolean includeFirst = false;
|
||||
boolean includeLast = false;
|
||||
String firstValue = null;
|
||||
String lastValue = null;
|
||||
//检查第一个元素是否是[或者{
|
||||
Element first = elements.getFirst();
|
||||
if('[' == first.type){
|
||||
includeFirst = true;
|
||||
}else if('{' == first.type){
|
||||
includeFirst = false;
|
||||
}else {
|
||||
throw new IllegalStateException("表达式异常");
|
||||
}
|
||||
//检查最后一个元素是否是]或者}
|
||||
Element last = elements.getLast();
|
||||
if(']' == last.type){
|
||||
includeLast = true;
|
||||
}else if('}' == last.type){
|
||||
includeLast = false;
|
||||
}else {
|
||||
throw new IllegalStateException("表达式异常, RangeQuery缺少结束括号");
|
||||
}
|
||||
if(elements.size() < 4 || elements.size() > 5){
|
||||
throw new IllegalStateException("表达式异常, RangeQuery 错误");
|
||||
}
|
||||
//读出中间部分
|
||||
Element e2 = elements.get(1);
|
||||
if('\'' == e2.type){
|
||||
firstValue = e2.toString();
|
||||
//
|
||||
Element e3 = elements.get(2);
|
||||
if(',' != e3.type){
|
||||
throw new IllegalStateException("表达式异常, RangeQuery缺少逗号分隔");
|
||||
}
|
||||
//
|
||||
Element e4 = elements.get(3);
|
||||
if('\'' == e4.type){
|
||||
lastValue = e4.toString();
|
||||
}else if(e4 != last){
|
||||
throw new IllegalStateException("表达式异常,RangeQuery格式错误");
|
||||
}
|
||||
}else if(',' == e2.type){
|
||||
firstValue = null;
|
||||
//
|
||||
Element e3 = elements.get(2);
|
||||
if('\'' == e3.type){
|
||||
lastValue = e3.toString();
|
||||
}else{
|
||||
throw new IllegalStateException("表达式异常,RangeQuery格式错误");
|
||||
}
|
||||
|
||||
}else {
|
||||
throw new IllegalStateException("表达式异常, RangeQuery格式错误");
|
||||
}
|
||||
|
||||
return new TermRangeQuery(fieldNameEle.toString() , new BytesRef(firstValue) , new BytesRef(lastValue) , includeFirst , includeLast);
|
||||
}
|
||||
|
||||
/**
|
||||
* 比较操作符优先级
|
||||
* @param e1
|
||||
* @param e2
|
||||
* @return
|
||||
*/
|
||||
private int compare(Element e1 , Element e2){
|
||||
if('&' == e1.type){
|
||||
if('&' == e2.type){
|
||||
return 0;
|
||||
}else {
|
||||
return 1;
|
||||
}
|
||||
}else if('|' == e1.type){
|
||||
if('&' == e2.type){
|
||||
return -1;
|
||||
}else if('|' == e2.type){
|
||||
return 0;
|
||||
}else{
|
||||
return 1;
|
||||
}
|
||||
}else{
|
||||
if('-' == e2.type){
|
||||
return 0;
|
||||
}else{
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 表达式元素(操作符、FieldName、FieldValue)
|
||||
* @author linliangyi
|
||||
* May 20, 2010
|
||||
*/
|
||||
private class Element{
|
||||
char type = 0;
|
||||
StringBuffer eleTextBuff;
|
||||
|
||||
public Element(){
|
||||
eleTextBuff = new StringBuffer();
|
||||
}
|
||||
|
||||
public void append(char c){
|
||||
this.eleTextBuff.append(c);
|
||||
}
|
||||
|
||||
public String toString(){
|
||||
return this.eleTextBuff.toString();
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args){
|
||||
IKQueryExpressionParser parser = new IKQueryExpressionParser();
|
||||
//String ikQueryExp = "newsTitle:'的两款《魔兽世界》插件Bigfoot和月光宝盒'";
|
||||
String ikQueryExp = "(id='ABcdRf' && date:{'20010101','20110101'} && keyword:'魔兽中国') || (content:'KSHT-KSH-A001-18' || ulr='www.ik.com') - name:'林良益'";
|
||||
Query result = parser.parseExp(ikQueryExp , true);
|
||||
logger.info(result.toString());
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -1,158 +0,0 @@
|
||||
/**
|
||||
* IK 中文分词 版本 5.0
|
||||
* IK Analyzer release 5.0
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
* 源代码由林良益(linliangyi2005@gmail.com)提供
|
||||
* 版权声明 2012,乌龙茶工作室
|
||||
* provided by Linliangyi and copyright 2012 by Oolong studio
|
||||
*
|
||||
*/
|
||||
package org.wltea.analyzer.query;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.queryparser.classic.ParseException;
|
||||
import org.apache.lucene.queryparser.classic.QueryParser;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.common.logging.ESLogger;
|
||||
import org.elasticsearch.common.logging.Loggers;
|
||||
import org.wltea.analyzer.core.IKSegmenter;
|
||||
import org.wltea.analyzer.core.Lexeme;
|
||||
|
||||
/**
|
||||
* Single Word Multi Char Query Builder
|
||||
* IK分词算法专用
|
||||
* @author linliangyi
|
||||
*
|
||||
*/
|
||||
public class SWMCQueryBuilder {
|
||||
|
||||
public static ESLogger logger= Loggers.getLogger("ik-analyzer");
|
||||
|
||||
/**
|
||||
* 生成SWMCQuery
|
||||
* @param fieldName
|
||||
* @param keywords
|
||||
* @param quickMode
|
||||
* @return Lucene Query
|
||||
*/
|
||||
public static Query create(String fieldName ,String keywords , boolean quickMode){
|
||||
if(fieldName == null || keywords == null){
|
||||
throw new IllegalArgumentException("参数 fieldName 、 keywords 不能为null.");
|
||||
}
|
||||
//1.对keywords进行分词处理
|
||||
List<Lexeme> lexemes = doAnalyze(keywords);
|
||||
//2.根据分词结果,生成SWMCQuery
|
||||
Query _SWMCQuery = getSWMCQuery(fieldName , lexemes , quickMode);
|
||||
return _SWMCQuery;
|
||||
}
|
||||
|
||||
/**
|
||||
* 分词切分,并返回结链表
|
||||
* @param keywords
|
||||
* @return
|
||||
*/
|
||||
private static List<Lexeme> doAnalyze(String keywords){
|
||||
List<Lexeme> lexemes = new ArrayList<Lexeme>();
|
||||
|
||||
IKSegmenter ikSeg = new IKSegmenter(new StringReader(keywords),true);
|
||||
try{
|
||||
Lexeme l = null;
|
||||
while( (l = ikSeg.next()) != null){
|
||||
lexemes.add(l);
|
||||
}
|
||||
}catch(IOException e){
|
||||
logger.error(e.getMessage(), e);
|
||||
}
|
||||
return lexemes;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 根据分词结果生成SWMC搜索
|
||||
* @param fieldName
|
||||
// * @param pathOption
|
||||
* @param quickMode
|
||||
* @return
|
||||
*/
|
||||
private static Query getSWMCQuery(String fieldName , List<Lexeme> lexemes , boolean quickMode){
|
||||
//构造SWMC的查询表达式
|
||||
StringBuffer keywordBuffer = new StringBuffer();
|
||||
//精简的SWMC的查询表达式
|
||||
StringBuffer keywordBuffer_Short = new StringBuffer();
|
||||
//记录最后词元长度
|
||||
int lastLexemeLength = 0;
|
||||
//记录最后词元结束位置
|
||||
int lastLexemeEnd = -1;
|
||||
|
||||
int shortCount = 0;
|
||||
int totalCount = 0;
|
||||
for(Lexeme l : lexemes){
|
||||
totalCount += l.getLength();
|
||||
//精简表达式
|
||||
if(l.getLength() > 1){
|
||||
keywordBuffer_Short.append(' ').append(l.getLexemeText());
|
||||
shortCount += l.getLength();
|
||||
}
|
||||
|
||||
if(lastLexemeLength == 0){
|
||||
keywordBuffer.append(l.getLexemeText());
|
||||
}else if(lastLexemeLength == 1 && l.getLength() == 1
|
||||
&& lastLexemeEnd == l.getBeginPosition()){//单字位置相邻,长度为一,合并)
|
||||
keywordBuffer.append(l.getLexemeText());
|
||||
}else{
|
||||
keywordBuffer.append(' ').append(l.getLexemeText());
|
||||
|
||||
}
|
||||
lastLexemeLength = l.getLength();
|
||||
lastLexemeEnd = l.getEndPosition();
|
||||
}
|
||||
|
||||
//借助lucene queryparser 生成SWMC Query
|
||||
QueryParser qp = new QueryParser(fieldName, new StandardAnalyzer());
|
||||
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
|
||||
qp.setAutoGeneratePhraseQueries(true);
|
||||
|
||||
if(quickMode && (shortCount * 1.0f / totalCount) > 0.5f){
|
||||
try {
|
||||
//System.out.println(keywordBuffer.toString());
|
||||
Query q = qp.parse(keywordBuffer_Short.toString());
|
||||
return q;
|
||||
} catch (ParseException e) {
|
||||
logger.error(e.getMessage(), e);
|
||||
}
|
||||
|
||||
}else{
|
||||
if(keywordBuffer.length() > 0){
|
||||
try {
|
||||
//System.out.println(keywordBuffer.toString());
|
||||
Query q = qp.parse(keywordBuffer.toString());
|
||||
return q;
|
||||
} catch (ParseException e) {
|
||||
logger.error(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user