8.8.1

2023-06-25 19:19:59 +08:00 · 2023-06-25 19:19:59 +08:00 · 05ebbed97c
commit 05ebbed97c
parent 9338c19104
2 changed files with 134 additions and 131 deletions
--- a/pom.xml
+++ b/pom.xml
@ -1,7 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project xmlns="http://maven.apache.org/POM/4.0.0"
-         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <name>elasticsearch-analysis-ik</name>
    <modelVersion>4.0.0</modelVersion>
    <groupId>org.elasticsearch</groupId>
@ -12,8 +11,8 @@
    <inceptionYear>2011</inceptionYear>

    <properties>
-        <elasticsearch.version>8.4.1</elasticsearch.version>
-        <maven.compiler.target>1.8</maven.compiler.target>
+        <elasticsearch.version>8.8.1</elasticsearch.version>
+        <maven.compiler.target>17</maven.compiler.target>
        <elasticsearch.assembly.descriptor>${project.basedir}/src/main/assemblies/plugin.xml</elasticsearch.assembly.descriptor>
        <elasticsearch.plugin.name>analysis-ik</elasticsearch.plugin.name>
        <elasticsearch.plugin.classname>org.elasticsearch.plugin.analysis.ik.AnalysisIkPlugin</elasticsearch.plugin.classname>
@ -69,8 +68,12 @@
        <repository>
            <id>oss.sonatype.org</id>
            <name>OSS Sonatype</name>
-            <releases><enabled>true</enabled></releases>
-            <snapshots><enabled>true</enabled></snapshots>
+            <releases>
+                <enabled>true</enabled>
+            </releases>
+            <snapshots>
+                <enabled>true</enabled>
+            </snapshots>
            <url>https://oss.sonatype.org/content/repositories/releases/</url>
        </repository>
    </repositories>
@ -87,13 +90,13 @@
        <dependency>
            <groupId>org.apache.httpcomponents</groupId>
            <artifactId>httpclient</artifactId>
-            <version>4.5.2</version>
+            <version>4.5.14</version>
        </dependency>

        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-api</artifactId>
-            <version>2.18.0</version>
+            <version>2.19.0</version>
        </dependency>

        <dependency>
--- a/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java
+++ b/src/main/java/org/wltea/analyzer/lucene/IKTokenizer.java
@ -30,14 +30,11 @@ import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.env.Environment;
 import org.wltea.analyzer.cfg.Configuration;
 import org.wltea.analyzer.core.IKSegmenter;
 import org.wltea.analyzer.core.Lexeme;

 import java.io.IOException;
-import java.io.Reader;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;

 /**
@ -46,70 +43,73 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 */
 public final class IKTokenizer extends Tokenizer {

-	//IK分词器实现
+	// IK分词器实现
 	private IKSegmenter _IKImplement;

-	//词元文本属性
+	// 词元文本属性
 	private final CharTermAttribute termAtt;
-	//词元位移属性
+	// 词元位移属性
 	private final OffsetAttribute offsetAtt;
-	//词元分类属性（该属性分类参考org.wltea.analyzer.core.Lexeme中的分类常量）
+	// 词元分类属性（该属性分类参考org.wltea.analyzer.core.Lexeme中的分类常量）
 	private final TypeAttribute typeAtt;
-	//记录最后一个词元的结束位置
+	// 记录最后一个词元的结束位置
 	private int endPosition;

 	private int skippedPositions;

 	private PositionIncrementAttribute posIncrAtt;

-
 	/**
 	 * Lucene 4.0 Tokenizer适配器类构造函数
 	 */
-	public IKTokenizer(Configuration configuration){
+	public IKTokenizer(Configuration configuration) {
 		super();
 		offsetAtt = addAttribute(OffsetAttribute.class);
 		termAtt = addAttribute(CharTermAttribute.class);
 		typeAtt = addAttribute(TypeAttribute.class);
 		posIncrAtt = addAttribute(PositionIncrementAttribute.class);

-        _IKImplement = new IKSegmenter(input,configuration);
+		_IKImplement = new IKSegmenter(input, configuration);
 	}

-	/* (non-Javadoc)
+	/*
+	 * (non-Javadoc)
+	 * 
 	 * @see org.apache.lucene.analysis.TokenStream#incrementToken()
 	 */
 	@Override
 	public boolean incrementToken() throws IOException {
-		//清除所有的词元属性
+		// 清除所有的词元属性
 		clearAttributes();
 		skippedPositions = 0;

 		Lexeme nextLexeme = _IKImplement.next();
-		if(nextLexeme != null){
-            posIncrAtt.setPositionIncrement(skippedPositions +1 );
+		if (nextLexeme != null) {
+			posIncrAtt.setPositionIncrement(skippedPositions + 1);

-			//将Lexeme转成Attributes
-			//设置词元文本
+			// 将Lexeme转成Attributes
+			// 设置词元文本
 			termAtt.append(nextLexeme.getLexemeText());
-			//设置词元长度
+			// 设置词元长度
 			termAtt.setLength(nextLexeme.getLength());
-			//设置词元位移
-            offsetAtt.setOffset(correctOffset(nextLexeme.getBeginPosition()), correctOffset(nextLexeme.getEndPosition()));
+			// 设置词元位移
+			offsetAtt.setOffset(correctOffset(nextLexeme.getBeginPosition()),
+					correctOffset(nextLexeme.getEndPosition()));

-            //记录分词的最后位置
+			// 记录分词的最后位置
 			endPosition = nextLexeme.getEndPosition();
-			//记录词元分类
+			// 记录词元分类
 			typeAtt.setType(nextLexeme.getLexemeTypeString());
-			//返会true告知还有下个词元
+			// 返会true告知还有下个词元
 			return true;
 		}
-		//返会false告知词元输出完毕
+		// 返会false告知词元输出完毕
 		return false;
 	}

 	/*
 	 * (non-Javadoc)
+	 * 
 	 * @see org.apache.lucene.analysis.Tokenizer#reset(java.io.Reader)
 	 */
 	@Override