8.8.1
This commit is contained in:
parent
9338c19104
commit
05ebbed97c
175
pom.xml
175
pom.xml
@ -1,7 +1,6 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
|
||||||
<name>elasticsearch-analysis-ik</name>
|
<name>elasticsearch-analysis-ik</name>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<groupId>org.elasticsearch</groupId>
|
<groupId>org.elasticsearch</groupId>
|
||||||
@ -12,8 +11,8 @@
|
|||||||
<inceptionYear>2011</inceptionYear>
|
<inceptionYear>2011</inceptionYear>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<elasticsearch.version>8.4.1</elasticsearch.version>
|
<elasticsearch.version>8.8.1</elasticsearch.version>
|
||||||
<maven.compiler.target>1.8</maven.compiler.target>
|
<maven.compiler.target>17</maven.compiler.target>
|
||||||
<elasticsearch.assembly.descriptor>${project.basedir}/src/main/assemblies/plugin.xml</elasticsearch.assembly.descriptor>
|
<elasticsearch.assembly.descriptor>${project.basedir}/src/main/assemblies/plugin.xml</elasticsearch.assembly.descriptor>
|
||||||
<elasticsearch.plugin.name>analysis-ik</elasticsearch.plugin.name>
|
<elasticsearch.plugin.name>analysis-ik</elasticsearch.plugin.name>
|
||||||
<elasticsearch.plugin.classname>org.elasticsearch.plugin.analysis.ik.AnalysisIkPlugin</elasticsearch.plugin.classname>
|
<elasticsearch.plugin.classname>org.elasticsearch.plugin.analysis.ik.AnalysisIkPlugin</elasticsearch.plugin.classname>
|
||||||
@ -69,8 +68,12 @@
|
|||||||
<repository>
|
<repository>
|
||||||
<id>oss.sonatype.org</id>
|
<id>oss.sonatype.org</id>
|
||||||
<name>OSS Sonatype</name>
|
<name>OSS Sonatype</name>
|
||||||
<releases><enabled>true</enabled></releases>
|
<releases>
|
||||||
<snapshots><enabled>true</enabled></snapshots>
|
<enabled>true</enabled>
|
||||||
|
</releases>
|
||||||
|
<snapshots>
|
||||||
|
<enabled>true</enabled>
|
||||||
|
</snapshots>
|
||||||
<url>https://oss.sonatype.org/content/repositories/releases/</url>
|
<url>https://oss.sonatype.org/content/repositories/releases/</url>
|
||||||
</repository>
|
</repository>
|
||||||
</repositories>
|
</repositories>
|
||||||
@ -87,13 +90,13 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.httpcomponents</groupId>
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
<artifactId>httpclient</artifactId>
|
<artifactId>httpclient</artifactId>
|
||||||
<version>4.5.2</version>
|
<version>4.5.14</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.logging.log4j</groupId>
|
<groupId>org.apache.logging.log4j</groupId>
|
||||||
<artifactId>log4j-api</artifactId>
|
<artifactId>log4j-api</artifactId>
|
||||||
<version>2.18.0</version>
|
<version>2.19.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
@ -187,83 +190,83 @@
|
|||||||
<additionalparam>-Xdoclint:none</additionalparam>
|
<additionalparam>-Xdoclint:none</additionalparam>
|
||||||
</properties>
|
</properties>
|
||||||
</profile>
|
</profile>
|
||||||
<profile>
|
<profile>
|
||||||
<id>release</id>
|
<id>release</id>
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
<plugin>
|
<plugin>
|
||||||
<groupId>org.sonatype.plugins</groupId>
|
<groupId>org.sonatype.plugins</groupId>
|
||||||
<artifactId>nexus-staging-maven-plugin</artifactId>
|
<artifactId>nexus-staging-maven-plugin</artifactId>
|
||||||
<version>1.6.3</version>
|
<version>1.6.3</version>
|
||||||
<extensions>true</extensions>
|
<extensions>true</extensions>
|
||||||
<configuration>
|
<configuration>
|
||||||
<serverId>oss</serverId>
|
<serverId>oss</serverId>
|
||||||
<nexusUrl>https://oss.sonatype.org/</nexusUrl>
|
<nexusUrl>https://oss.sonatype.org/</nexusUrl>
|
||||||
<autoReleaseAfterClose>true</autoReleaseAfterClose>
|
<autoReleaseAfterClose>true</autoReleaseAfterClose>
|
||||||
</configuration>
|
</configuration>
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
<plugin>
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
<artifactId>maven-release-plugin</artifactId>
|
<artifactId>maven-release-plugin</artifactId>
|
||||||
<version>2.1</version>
|
<version>2.1</version>
|
||||||
<configuration>
|
<configuration>
|
||||||
<autoVersionSubmodules>true</autoVersionSubmodules>
|
<autoVersionSubmodules>true</autoVersionSubmodules>
|
||||||
<useReleaseProfile>false</useReleaseProfile>
|
<useReleaseProfile>false</useReleaseProfile>
|
||||||
<releaseProfiles>release</releaseProfiles>
|
<releaseProfiles>release</releaseProfiles>
|
||||||
<goals>deploy</goals>
|
<goals>deploy</goals>
|
||||||
</configuration>
|
</configuration>
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
<plugin>
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
<artifactId>maven-compiler-plugin</artifactId>
|
<artifactId>maven-compiler-plugin</artifactId>
|
||||||
<version>3.5.1</version>
|
<version>3.5.1</version>
|
||||||
<configuration>
|
<configuration>
|
||||||
<source>${maven.compiler.target}</source>
|
<source>${maven.compiler.target}</source>
|
||||||
<target>${maven.compiler.target}</target>
|
<target>${maven.compiler.target}</target>
|
||||||
</configuration>
|
</configuration>
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
<plugin>
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
<artifactId>maven-gpg-plugin</artifactId>
|
<artifactId>maven-gpg-plugin</artifactId>
|
||||||
<version>1.5</version>
|
<version>1.5</version>
|
||||||
<executions>
|
<executions>
|
||||||
<execution>
|
<execution>
|
||||||
<id>sign-artifacts</id>
|
<id>sign-artifacts</id>
|
||||||
<phase>verify</phase>
|
<phase>verify</phase>
|
||||||
<goals>
|
<goals>
|
||||||
<goal>sign</goal>
|
<goal>sign</goal>
|
||||||
</goals>
|
</goals>
|
||||||
</execution>
|
</execution>
|
||||||
</executions>
|
</executions>
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
<plugin>
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
<artifactId>maven-source-plugin</artifactId>
|
<artifactId>maven-source-plugin</artifactId>
|
||||||
<version>2.2.1</version>
|
<version>2.2.1</version>
|
||||||
<executions>
|
<executions>
|
||||||
<execution>
|
<execution>
|
||||||
<id>attach-sources</id>
|
<id>attach-sources</id>
|
||||||
<goals>
|
<goals>
|
||||||
<goal>jar-no-fork</goal>
|
<goal>jar-no-fork</goal>
|
||||||
</goals>
|
</goals>
|
||||||
</execution>
|
</execution>
|
||||||
</executions>
|
</executions>
|
||||||
</plugin>
|
</plugin>
|
||||||
<plugin>
|
<plugin>
|
||||||
<groupId>org.apache.maven.plugins</groupId>
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
<artifactId>maven-javadoc-plugin</artifactId>
|
<artifactId>maven-javadoc-plugin</artifactId>
|
||||||
<version>2.9</version>
|
<version>2.9</version>
|
||||||
<executions>
|
<executions>
|
||||||
<execution>
|
<execution>
|
||||||
<id>attach-javadocs</id>
|
<id>attach-javadocs</id>
|
||||||
<goals>
|
<goals>
|
||||||
<goal>jar</goal>
|
<goal>jar</goal>
|
||||||
</goals>
|
</goals>
|
||||||
</execution>
|
</execution>
|
||||||
</executions>
|
</executions>
|
||||||
</plugin>
|
</plugin>
|
||||||
</plugins>
|
</plugins>
|
||||||
</build>
|
</build>
|
||||||
</profile>
|
</profile>
|
||||||
</profiles>
|
</profiles>
|
||||||
</project>
|
</project>
|
||||||
|
@ -30,14 +30,11 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
|
||||||
import org.elasticsearch.env.Environment;
|
|
||||||
import org.wltea.analyzer.cfg.Configuration;
|
import org.wltea.analyzer.cfg.Configuration;
|
||||||
import org.wltea.analyzer.core.IKSegmenter;
|
import org.wltea.analyzer.core.IKSegmenter;
|
||||||
import org.wltea.analyzer.core.Lexeme;
|
import org.wltea.analyzer.core.Lexeme;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -45,86 +42,89 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|||||||
* 兼容Lucene 4.0版本
|
* 兼容Lucene 4.0版本
|
||||||
*/
|
*/
|
||||||
public final class IKTokenizer extends Tokenizer {
|
public final class IKTokenizer extends Tokenizer {
|
||||||
|
|
||||||
//IK分词器实现
|
// IK分词器实现
|
||||||
private IKSegmenter _IKImplement;
|
private IKSegmenter _IKImplement;
|
||||||
|
|
||||||
//词元文本属性
|
// 词元文本属性
|
||||||
private final CharTermAttribute termAtt;
|
private final CharTermAttribute termAtt;
|
||||||
//词元位移属性
|
// 词元位移属性
|
||||||
private final OffsetAttribute offsetAtt;
|
private final OffsetAttribute offsetAtt;
|
||||||
//词元分类属性(该属性分类参考org.wltea.analyzer.core.Lexeme中的分类常量)
|
// 词元分类属性(该属性分类参考org.wltea.analyzer.core.Lexeme中的分类常量)
|
||||||
private final TypeAttribute typeAtt;
|
private final TypeAttribute typeAtt;
|
||||||
//记录最后一个词元的结束位置
|
// 记录最后一个词元的结束位置
|
||||||
private int endPosition;
|
private int endPosition;
|
||||||
|
|
||||||
private int skippedPositions;
|
private int skippedPositions;
|
||||||
|
|
||||||
private PositionIncrementAttribute posIncrAtt;
|
private PositionIncrementAttribute posIncrAtt;
|
||||||
|
|
||||||
|
/**
|
||||||
/**
|
|
||||||
* Lucene 4.0 Tokenizer适配器类构造函数
|
* Lucene 4.0 Tokenizer适配器类构造函数
|
||||||
*/
|
*/
|
||||||
public IKTokenizer(Configuration configuration){
|
public IKTokenizer(Configuration configuration) {
|
||||||
super();
|
super();
|
||||||
offsetAtt = addAttribute(OffsetAttribute.class);
|
offsetAtt = addAttribute(OffsetAttribute.class);
|
||||||
termAtt = addAttribute(CharTermAttribute.class);
|
termAtt = addAttribute(CharTermAttribute.class);
|
||||||
typeAtt = addAttribute(TypeAttribute.class);
|
typeAtt = addAttribute(TypeAttribute.class);
|
||||||
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||||
|
|
||||||
_IKImplement = new IKSegmenter(input,configuration);
|
_IKImplement = new IKSegmenter(input, configuration);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* (non-Javadoc)
|
/*
|
||||||
|
* (non-Javadoc)
|
||||||
|
*
|
||||||
* @see org.apache.lucene.analysis.TokenStream#incrementToken()
|
* @see org.apache.lucene.analysis.TokenStream#incrementToken()
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean incrementToken() throws IOException {
|
||||||
//清除所有的词元属性
|
// 清除所有的词元属性
|
||||||
clearAttributes();
|
clearAttributes();
|
||||||
skippedPositions = 0;
|
skippedPositions = 0;
|
||||||
|
|
||||||
Lexeme nextLexeme = _IKImplement.next();
|
Lexeme nextLexeme = _IKImplement.next();
|
||||||
if(nextLexeme != null){
|
if (nextLexeme != null) {
|
||||||
posIncrAtt.setPositionIncrement(skippedPositions +1 );
|
posIncrAtt.setPositionIncrement(skippedPositions + 1);
|
||||||
|
|
||||||
//将Lexeme转成Attributes
|
// 将Lexeme转成Attributes
|
||||||
//设置词元文本
|
// 设置词元文本
|
||||||
termAtt.append(nextLexeme.getLexemeText());
|
termAtt.append(nextLexeme.getLexemeText());
|
||||||
//设置词元长度
|
// 设置词元长度
|
||||||
termAtt.setLength(nextLexeme.getLength());
|
termAtt.setLength(nextLexeme.getLength());
|
||||||
//设置词元位移
|
// 设置词元位移
|
||||||
offsetAtt.setOffset(correctOffset(nextLexeme.getBeginPosition()), correctOffset(nextLexeme.getEndPosition()));
|
offsetAtt.setOffset(correctOffset(nextLexeme.getBeginPosition()),
|
||||||
|
correctOffset(nextLexeme.getEndPosition()));
|
||||||
|
|
||||||
//记录分词的最后位置
|
// 记录分词的最后位置
|
||||||
endPosition = nextLexeme.getEndPosition();
|
endPosition = nextLexeme.getEndPosition();
|
||||||
//记录词元分类
|
// 记录词元分类
|
||||||
typeAtt.setType(nextLexeme.getLexemeTypeString());
|
typeAtt.setType(nextLexeme.getLexemeTypeString());
|
||||||
//返会true告知还有下个词元
|
// 返会true告知还有下个词元
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
//返会false告知词元输出完毕
|
// 返会false告知词元输出完毕
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* (non-Javadoc)
|
* (non-Javadoc)
|
||||||
|
*
|
||||||
* @see org.apache.lucene.analysis.Tokenizer#reset(java.io.Reader)
|
* @see org.apache.lucene.analysis.Tokenizer#reset(java.io.Reader)
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void reset() throws IOException {
|
public void reset() throws IOException {
|
||||||
super.reset();
|
super.reset();
|
||||||
_IKImplement.reset(input);
|
_IKImplement.reset(input);
|
||||||
skippedPositions = 0;
|
skippedPositions = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final void end() throws IOException {
|
public final void end() throws IOException {
|
||||||
super.end();
|
super.end();
|
||||||
// set final offset
|
// set final offset
|
||||||
int finalOffset = correctOffset(this.endPosition);
|
int finalOffset = correctOffset(this.endPosition);
|
||||||
offsetAtt.setOffset(finalOffset, finalOffset);
|
offsetAtt.setOffset(finalOffset, finalOffset);
|
||||||
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
|
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user