diff --git a/pom.xml b/pom.xml index 6206bd6..344098c 100755 --- a/pom.xml +++ b/pom.xml @@ -1,7 +1,6 @@ + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> elasticsearch-analysis-ik 4.0.0 org.elasticsearch @@ -12,8 +11,8 @@ 2011 - 8.4.1 - 1.8 + 8.8.1 + 17 ${project.basedir}/src/main/assemblies/plugin.xml analysis-ik org.elasticsearch.plugin.analysis.ik.AnalysisIkPlugin @@ -69,8 +68,12 @@ oss.sonatype.org OSS Sonatype - true - true + + true + + + true + https://oss.sonatype.org/content/repositories/releases/ @@ -87,13 +90,13 @@ org.apache.httpcomponents httpclient - 4.5.2 + 4.5.14 org.apache.logging.log4j log4j-api - 2.18.0 + 2.19.0 @@ -187,83 +190,83 @@ -Xdoclint:none - - release - - - - org.sonatype.plugins - nexus-staging-maven-plugin - 1.6.3 - true - - oss - https://oss.sonatype.org/ - true - - - - org.apache.maven.plugins - maven-release-plugin - 2.1 - - true - false - release - deploy - - - - org.apache.maven.plugins - maven-compiler-plugin - 3.5.1 - - ${maven.compiler.target} - ${maven.compiler.target} - - - - org.apache.maven.plugins - maven-gpg-plugin - 1.5 - - - sign-artifacts - verify - - sign - - - - - - org.apache.maven.plugins - maven-source-plugin - 2.2.1 - - - attach-sources - - jar-no-fork - - - - - - org.apache.maven.plugins - maven-javadoc-plugin - 2.9 - - - attach-javadocs - - jar - - - - - - - + + release + + + + org.sonatype.plugins + nexus-staging-maven-plugin + 1.6.3 + true + + oss + https://oss.sonatype.org/ + true + + + + org.apache.maven.plugins + maven-release-plugin + 2.1 + + true + false + release + deploy + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.5.1 + + ${maven.compiler.target} + ${maven.compiler.target} + + + + org.apache.maven.plugins + maven-gpg-plugin + 1.5 + + + sign-artifacts + verify + + sign + + + + + + org.apache.maven.plugins + maven-source-plugin + 2.2.1 + + + attach-sources + + jar-no-fork + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 2.9 + + + attach-javadocs + + jar + + + + + + + diff --git a/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java b/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java index 42d380a..3a81c57 100644 --- a/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java +++ b/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java @@ -30,14 +30,11 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; import org.wltea.analyzer.cfg.Configuration; import org.wltea.analyzer.core.IKSegmenter; import org.wltea.analyzer.core.Lexeme; import java.io.IOException; -import java.io.Reader; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; /** @@ -45,86 +42,89 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; * 兼容Lucene 4.0版本 */ public final class IKTokenizer extends Tokenizer { - - //IK分词器实现 + + // IK分词器实现 private IKSegmenter _IKImplement; - - //词元文本属性 + + // 词元文本属性 private final CharTermAttribute termAtt; - //词元位移属性 + // 词元位移属性 private final OffsetAttribute offsetAtt; - //词元分类属性(该属性分类参考org.wltea.analyzer.core.Lexeme中的分类常量) + // 词元分类属性(该属性分类参考org.wltea.analyzer.core.Lexeme中的分类常量) private final TypeAttribute typeAtt; - //记录最后一个词元的结束位置 + // 记录最后一个词元的结束位置 private int endPosition; - private int skippedPositions; + private int skippedPositions; - private PositionIncrementAttribute posIncrAtt; + private PositionIncrementAttribute posIncrAtt; - - /** + /** * Lucene 4.0 Tokenizer适配器类构造函数 - */ - public IKTokenizer(Configuration configuration){ - super(); - offsetAtt = addAttribute(OffsetAttribute.class); - termAtt = addAttribute(CharTermAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); - posIncrAtt = addAttribute(PositionIncrementAttribute.class); + */ + public IKTokenizer(Configuration configuration) { + super(); + offsetAtt = addAttribute(OffsetAttribute.class); + termAtt = addAttribute(CharTermAttribute.class); + typeAtt = addAttribute(TypeAttribute.class); + posIncrAtt = addAttribute(PositionIncrementAttribute.class); - _IKImplement = new IKSegmenter(input,configuration); + _IKImplement = new IKSegmenter(input, configuration); } - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see org.apache.lucene.analysis.TokenStream#incrementToken() */ @Override public boolean incrementToken() throws IOException { - //清除所有的词元属性 + // 清除所有的词元属性 clearAttributes(); - skippedPositions = 0; + skippedPositions = 0; - Lexeme nextLexeme = _IKImplement.next(); - if(nextLexeme != null){ - posIncrAtt.setPositionIncrement(skippedPositions +1 ); + Lexeme nextLexeme = _IKImplement.next(); + if (nextLexeme != null) { + posIncrAtt.setPositionIncrement(skippedPositions + 1); - //将Lexeme转成Attributes - //设置词元文本 + // 将Lexeme转成Attributes + // 设置词元文本 termAtt.append(nextLexeme.getLexemeText()); - //设置词元长度 + // 设置词元长度 termAtt.setLength(nextLexeme.getLength()); - //设置词元位移 - offsetAtt.setOffset(correctOffset(nextLexeme.getBeginPosition()), correctOffset(nextLexeme.getEndPosition())); + // 设置词元位移 + offsetAtt.setOffset(correctOffset(nextLexeme.getBeginPosition()), + correctOffset(nextLexeme.getEndPosition())); - //记录分词的最后位置 + // 记录分词的最后位置 endPosition = nextLexeme.getEndPosition(); - //记录词元分类 - typeAtt.setType(nextLexeme.getLexemeTypeString()); - //返会true告知还有下个词元 + // 记录词元分类 + typeAtt.setType(nextLexeme.getLexemeTypeString()); + // 返会true告知还有下个词元 return true; } - //返会false告知词元输出完毕 + // 返会false告知词元输出完毕 return false; } - + /* * (non-Javadoc) + * * @see org.apache.lucene.analysis.Tokenizer#reset(java.io.Reader) */ @Override public void reset() throws IOException { super.reset(); _IKImplement.reset(input); - skippedPositions = 0; - } - + skippedPositions = 0; + } + @Override public final void end() throws IOException { - super.end(); - // set final offset + super.end(); + // set final offset int finalOffset = correctOffset(this.endPosition); offsetAtt.setOffset(finalOffset, finalOffset); - posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions); + posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions); } }