Compare commits

..

1 Commits
master ... 6.x

Author SHA1 Message Date
medcl
cfdba08f46 revert to previous commit 2022-05-24 14:17:46 +08:00
9 changed files with 39 additions and 42 deletions

2
.github/FUNDING.yml vendored
View File

@ -1,2 +0,0 @@
patreon: medcl
custom: ["https://www.buymeacoffee.com/medcl"]

View File

@ -10,9 +10,16 @@ Versions
IK version | ES version IK version | ES version
-----------|----------- -----------|-----------
master | 7.x -> master master | 6.x -> master
6.x| 6.x 6.3.0| 6.3.0
5.x| 5.x 6.2.4| 6.2.4
6.1.3| 6.1.3
5.6.8| 5.6.8
5.5.3| 5.5.3
5.4.3| 5.4.3
5.3.3| 5.3.3
5.2.2| 5.2.2
5.1.2| 5.1.2
1.10.6 | 2.4.6 1.10.6 | 2.4.6
1.9.5 | 2.3.5 1.9.5 | 2.3.5
1.8.1 | 2.2.1 1.8.1 | 2.2.1
@ -57,13 +64,13 @@ curl -XPUT http://localhost:9200/index
2.create a mapping 2.create a mapping
```bash ```bash
curl -XPOST http://localhost:9200/index/_mapping -H 'Content-Type:application/json' -d' curl -XPOST http://localhost:9200/index/fulltext/_mapping -H 'Content-Type:application/json' -d'
{ {
"properties": { "properties": {
"content": { "content": {
"type": "text", "type": "text",
"analyzer": "ik_max_word", "analyzer": "ik_max_word",
"search_analyzer": "ik_smart" "search_analyzer": "ik_max_word"
} }
} }
@ -73,25 +80,25 @@ curl -XPOST http://localhost:9200/index/_mapping -H 'Content-Type:application/js
3.index some docs 3.index some docs
```bash ```bash
curl -XPOST http://localhost:9200/index/_create/1 -H 'Content-Type:application/json' -d' curl -XPOST http://localhost:9200/index/fulltext/1 -H 'Content-Type:application/json' -d'
{"content":"美国留给伊拉克的是个烂摊子吗"} {"content":"美国留给伊拉克的是个烂摊子吗"}
' '
``` ```
```bash ```bash
curl -XPOST http://localhost:9200/index/_create/2 -H 'Content-Type:application/json' -d' curl -XPOST http://localhost:9200/index/fulltext/2 -H 'Content-Type:application/json' -d'
{"content":"公安部:各地校车将享最高路权"} {"content":"公安部:各地校车将享最高路权"}
' '
``` ```
```bash ```bash
curl -XPOST http://localhost:9200/index/_create/3 -H 'Content-Type:application/json' -d' curl -XPOST http://localhost:9200/index/fulltext/3 -H 'Content-Type:application/json' -d'
{"content":"中韩渔警冲突调查韩警平均每天扣1艘中国渔船"} {"content":"中韩渔警冲突调查韩警平均每天扣1艘中国渔船"}
' '
``` ```
```bash ```bash
curl -XPOST http://localhost:9200/index/_create/4 -H 'Content-Type:application/json' -d' curl -XPOST http://localhost:9200/index/fulltext/4 -H 'Content-Type:application/json' -d'
{"content":"中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首"} {"content":"中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首"}
' '
``` ```
@ -99,7 +106,7 @@ curl -XPOST http://localhost:9200/index/_create/4 -H 'Content-Type:application/j
4.query with highlighting 4.query with highlighting
```bash ```bash
curl -XPOST http://localhost:9200/index/_search -H 'Content-Type:application/json' -d' curl -XPOST http://localhost:9200/index/fulltext/_search -H 'Content-Type:application/json' -d'
{ {
"query" : { "match" : { "content" : "中国" }}, "query" : { "match" : { "content" : "中国" }},
"highlight" : { "highlight" : {
@ -241,13 +248,13 @@ curl -XGET "http://localhost:9200/your_index/_analyze" -H 'Content-Type: applica
4. ik_max_word 和 ik_smart 什么区别? 4. ik_max_word 和 ik_smart 什么区别?
ik_max_word: 会将文本做最细粒度的拆分,比如会将“中华人民共和国国歌”拆分为“中华人民共和国,中华人民,中华,华人,人民共和国,人民,人,民,共和国,共和,和,国国,国歌”,会穷尽各种可能的组合,适合 Term Query ik_max_word: 会将文本做最细粒度的拆分,比如会将“中华人民共和国国歌”拆分为“中华人民共和国,中华人民,中华,华人,人民共和国,人民,人,民,共和国,共和,和,国国,国歌”,会穷尽各种可能的组合;
ik_smart: 会做最粗粒度的拆分,比如会将“中华人民共和国国歌”拆分为“中华人民共和国,国歌”,适合 Phrase 查询 ik_smart: 会做最粗粒度的拆分,比如会将“中华人民共和国国歌”拆分为“中华人民共和国,国歌”。
Changes Changes
------ ------
*自 v5.0.0* *5.0.0*
- 移除名为 `ik` 的analyzer和tokenizer,请分别使用 `ik_smart``ik_max_word` - 移除名为 `ik` 的analyzer和tokenizer,请分别使用 `ik_smart``ik_max_word`

14
pom.xml Executable file → Normal file
View File

@ -12,7 +12,7 @@
<inceptionYear>2011</inceptionYear> <inceptionYear>2011</inceptionYear>
<properties> <properties>
<elasticsearch.version>8.4.1</elasticsearch.version> <elasticsearch.version>6.5.0</elasticsearch.version>
<maven.compiler.target>1.8</maven.compiler.target> <maven.compiler.target>1.8</maven.compiler.target>
<elasticsearch.assembly.descriptor>${project.basedir}/src/main/assemblies/plugin.xml</elasticsearch.assembly.descriptor> <elasticsearch.assembly.descriptor>${project.basedir}/src/main/assemblies/plugin.xml</elasticsearch.assembly.descriptor>
<elasticsearch.plugin.name>analysis-ik</elasticsearch.plugin.name> <elasticsearch.plugin.name>analysis-ik</elasticsearch.plugin.name>
@ -34,10 +34,10 @@
<developers> <developers>
<developer> <developer>
<name>INFINI Labs</name> <name>Medcl</name>
<email>hello@infini.ltd</email> <email>medcl@elastic.co</email>
<organization>INFINI Labs</organization> <organization>elastic</organization>
<organizationUrl>https://infinilabs.com</organizationUrl> <organizationUrl>http://www.elastic.co</organizationUrl>
</developer> </developer>
</developers> </developers>
@ -71,7 +71,7 @@
<name>OSS Sonatype</name> <name>OSS Sonatype</name>
<releases><enabled>true</enabled></releases> <releases><enabled>true</enabled></releases>
<snapshots><enabled>true</enabled></snapshots> <snapshots><enabled>true</enabled></snapshots>
<url>https://oss.sonatype.org/content/repositories/releases/</url> <url>http://oss.sonatype.org/content/repositories/releases/</url>
</repository> </repository>
</repositories> </repositories>
@ -93,7 +93,7 @@
<dependency> <dependency>
<groupId>org.apache.logging.log4j</groupId> <groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId> <artifactId>log4j-api</artifactId>
<version>2.18.0</version> <version>2.3</version>
</dependency> </dependency>
<dependency> <dependency>

View File

@ -10,7 +10,7 @@ public class IkAnalyzerProvider extends AbstractIndexAnalyzerProvider<IKAnalyzer
private final IKAnalyzer analyzer; private final IKAnalyzer analyzer;
public IkAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings,boolean useSmart) { public IkAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings,boolean useSmart) {
super(name, settings); super(indexSettings, name, settings);
Configuration configuration=new Configuration(env,settings).setUseSmart(useSmart); Configuration configuration=new Configuration(env,settings).setUseSmart(useSmart);

View File

@ -11,7 +11,7 @@ public class IkTokenizerFactory extends AbstractTokenizerFactory {
private Configuration configuration; private Configuration configuration;
public IkTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { public IkTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, settings,name); super(indexSettings, name, settings);
configuration=new Configuration(env,settings); configuration=new Configuration(env,settings);
} }

View File

@ -4,7 +4,7 @@
package org.wltea.analyzer.cfg; package org.wltea.analyzer.cfg;
import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.core.PathUtils; import org.elasticsearch.common.io.PathUtils;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment; import org.elasticsearch.env.Environment;
import org.elasticsearch.plugin.analysis.ik.AnalysisIkPlugin; import org.elasticsearch.plugin.analysis.ik.AnalysisIkPlugin;

View File

@ -267,14 +267,6 @@ class AnalyzeContext {
Lexeme l = path.pollFirst(); Lexeme l = path.pollFirst();
while(l != null){ while(l != null){
this.results.add(l); this.results.add(l);
//字典中无单字但是词元冲突了切分出相交词元的前一个词元中的单字
/*int innerIndex = index + 1;
for (; innerIndex < index + l.getLength(); innerIndex++) {
Lexeme innerL = path.peekFirst();
if (innerL != null && innerIndex == innerL.getBegin()) {
this.outputSingleCJK(innerIndex - 1);
}
}*/
//将index移至lexeme后 //将index移至lexeme后
index = l.getBegin() + l.getLength(); index = l.getBegin() + l.getLength();

View File

@ -57,7 +57,7 @@ class DictSegment implements Comparable<DictSegment>{
DictSegment(Character nodeChar){ DictSegment(Character nodeChar){
if(nodeChar == null){ if(nodeChar == null){
throw new IllegalArgumentException("node char cannot be empty"); throw new IllegalArgumentException("参数为空异常,字符不能为空");
} }
this.nodeChar = nodeChar; this.nodeChar = nodeChar;
} }

16
src/main/java/org/wltea/analyzer/dic/Dictionary.java Executable file → Normal file
View File

@ -52,7 +52,7 @@ import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.client.HttpClients;
import org.elasticsearch.SpecialPermission; import org.elasticsearch.SpecialPermission;
import org.elasticsearch.core.PathUtils; import org.elasticsearch.common.io.PathUtils;
import org.elasticsearch.plugin.analysis.ik.AnalysisIkPlugin; import org.elasticsearch.plugin.analysis.ik.AnalysisIkPlugin;
import org.wltea.analyzer.cfg.Configuration; import org.wltea.analyzer.cfg.Configuration;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
@ -80,7 +80,7 @@ public class Dictionary {
*/ */
private Configuration configuration; private Configuration configuration;
private static final Logger logger = ESPluginLoggerFactory.getLogger(Dictionary.class.getName()); private static final Logger logger = ESPluginLoggerFactory.getLogger(Monitor.class.getName());
private static ScheduledExecutorService pool = Executors.newScheduledThreadPool(1); private static ScheduledExecutorService pool = Executors.newScheduledThreadPool(1);
@ -294,7 +294,7 @@ public class Dictionary {
*/ */
public static Dictionary getSingleton() { public static Dictionary getSingleton() {
if (singleton == null) { if (singleton == null) {
throw new IllegalStateException("ik dict has not been initialized yet, please call initial method first."); throw new IllegalStateException("词典尚未初始化请先调用initial方法");
} }
return singleton; return singleton;
} }
@ -419,7 +419,7 @@ public class Dictionary {
List<String> lists = getRemoteWords(location); List<String> lists = getRemoteWords(location);
// 如果找不到扩展的字典则忽略 // 如果找不到扩展的字典则忽略
if (lists == null) { if (lists == null) {
logger.error("[Dict Loading] " + location + " load failed"); logger.error("[Dict Loading] " + location + "加载失败");
continue; continue;
} }
for (String theWord : lists) { for (String theWord : lists) {
@ -469,7 +469,7 @@ public class Dictionary {
} }
} }
if (entity.getContentLength() > 0 || entity.isChunked()) { if (entity.getContentLength() > 0) {
in = new BufferedReader(new InputStreamReader(entity.getContent(), charset)); in = new BufferedReader(new InputStreamReader(entity.getContent(), charset));
String line; String line;
while ((line = in.readLine()) != null) { while ((line = in.readLine()) != null) {
@ -518,7 +518,7 @@ public class Dictionary {
List<String> lists = getRemoteWords(location); List<String> lists = getRemoteWords(location);
// 如果找不到扩展的字典则忽略 // 如果找不到扩展的字典则忽略
if (lists == null) { if (lists == null) {
logger.error("[Dict Loading] " + location + " load failed"); logger.error("[Dict Loading] " + location + "加载失败");
continue; continue;
} }
for (String theWord : lists) { for (String theWord : lists) {
@ -562,7 +562,7 @@ public class Dictionary {
} }
void reLoadMainDict() { void reLoadMainDict() {
logger.info("start to reload ik dict."); logger.info("重新加载词典...");
// 新开一个实例加载词典减少加载过程对当前词典使用的影响 // 新开一个实例加载词典减少加载过程对当前词典使用的影响
Dictionary tmpDict = new Dictionary(configuration); Dictionary tmpDict = new Dictionary(configuration);
tmpDict.configuration = getSingleton().configuration; tmpDict.configuration = getSingleton().configuration;
@ -570,7 +570,7 @@ public class Dictionary {
tmpDict.loadStopWordDict(); tmpDict.loadStopWordDict();
_MainDict = tmpDict._MainDict; _MainDict = tmpDict._MainDict;
_StopWords = tmpDict._StopWords; _StopWords = tmpDict._StopWords;
logger.info("reload ik dict finished."); logger.info("重新加载词典完毕...");
} }
} }