Compare commits
1 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
cfdba08f46 |
2
.github/FUNDING.yml
vendored
2
.github/FUNDING.yml
vendored
@ -1,2 +0,0 @@
|
||||
patreon: medcl
|
||||
custom: ["https://www.buymeacoffee.com/medcl"]
|
33
README.md
33
README.md
@ -10,9 +10,16 @@ Versions
|
||||
|
||||
IK version | ES version
|
||||
-----------|-----------
|
||||
master | 7.x -> master
|
||||
6.x| 6.x
|
||||
5.x| 5.x
|
||||
master | 6.x -> master
|
||||
6.3.0| 6.3.0
|
||||
6.2.4| 6.2.4
|
||||
6.1.3| 6.1.3
|
||||
5.6.8| 5.6.8
|
||||
5.5.3| 5.5.3
|
||||
5.4.3| 5.4.3
|
||||
5.3.3| 5.3.3
|
||||
5.2.2| 5.2.2
|
||||
5.1.2| 5.1.2
|
||||
1.10.6 | 2.4.6
|
||||
1.9.5 | 2.3.5
|
||||
1.8.1 | 2.2.1
|
||||
@ -57,13 +64,13 @@ curl -XPUT http://localhost:9200/index
|
||||
2.create a mapping
|
||||
|
||||
```bash
|
||||
curl -XPOST http://localhost:9200/index/_mapping -H 'Content-Type:application/json' -d'
|
||||
curl -XPOST http://localhost:9200/index/fulltext/_mapping -H 'Content-Type:application/json' -d'
|
||||
{
|
||||
"properties": {
|
||||
"content": {
|
||||
"type": "text",
|
||||
"analyzer": "ik_max_word",
|
||||
"search_analyzer": "ik_smart"
|
||||
"search_analyzer": "ik_max_word"
|
||||
}
|
||||
}
|
||||
|
||||
@ -73,25 +80,25 @@ curl -XPOST http://localhost:9200/index/_mapping -H 'Content-Type:application/js
|
||||
3.index some docs
|
||||
|
||||
```bash
|
||||
curl -XPOST http://localhost:9200/index/_create/1 -H 'Content-Type:application/json' -d'
|
||||
curl -XPOST http://localhost:9200/index/fulltext/1 -H 'Content-Type:application/json' -d'
|
||||
{"content":"美国留给伊拉克的是个烂摊子吗"}
|
||||
'
|
||||
```
|
||||
|
||||
```bash
|
||||
curl -XPOST http://localhost:9200/index/_create/2 -H 'Content-Type:application/json' -d'
|
||||
curl -XPOST http://localhost:9200/index/fulltext/2 -H 'Content-Type:application/json' -d'
|
||||
{"content":"公安部:各地校车将享最高路权"}
|
||||
'
|
||||
```
|
||||
|
||||
```bash
|
||||
curl -XPOST http://localhost:9200/index/_create/3 -H 'Content-Type:application/json' -d'
|
||||
curl -XPOST http://localhost:9200/index/fulltext/3 -H 'Content-Type:application/json' -d'
|
||||
{"content":"中韩渔警冲突调查:韩警平均每天扣1艘中国渔船"}
|
||||
'
|
||||
```
|
||||
|
||||
```bash
|
||||
curl -XPOST http://localhost:9200/index/_create/4 -H 'Content-Type:application/json' -d'
|
||||
curl -XPOST http://localhost:9200/index/fulltext/4 -H 'Content-Type:application/json' -d'
|
||||
{"content":"中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首"}
|
||||
'
|
||||
```
|
||||
@ -99,7 +106,7 @@ curl -XPOST http://localhost:9200/index/_create/4 -H 'Content-Type:application/j
|
||||
4.query with highlighting
|
||||
|
||||
```bash
|
||||
curl -XPOST http://localhost:9200/index/_search -H 'Content-Type:application/json' -d'
|
||||
curl -XPOST http://localhost:9200/index/fulltext/_search -H 'Content-Type:application/json' -d'
|
||||
{
|
||||
"query" : { "match" : { "content" : "中国" }},
|
||||
"highlight" : {
|
||||
@ -241,13 +248,13 @@ curl -XGET "http://localhost:9200/your_index/_analyze" -H 'Content-Type: applica
|
||||
4. ik_max_word 和 ik_smart 什么区别?
|
||||
|
||||
|
||||
ik_max_word: 会将文本做最细粒度的拆分,比如会将“中华人民共和国国歌”拆分为“中华人民共和国,中华人民,中华,华人,人民共和国,人民,人,民,共和国,共和,和,国国,国歌”,会穷尽各种可能的组合,适合 Term Query;
|
||||
ik_max_word: 会将文本做最细粒度的拆分,比如会将“中华人民共和国国歌”拆分为“中华人民共和国,中华人民,中华,华人,人民共和国,人民,人,民,共和国,共和,和,国国,国歌”,会穷尽各种可能的组合;
|
||||
|
||||
ik_smart: 会做最粗粒度的拆分,比如会将“中华人民共和国国歌”拆分为“中华人民共和国,国歌”,适合 Phrase 查询。
|
||||
ik_smart: 会做最粗粒度的拆分,比如会将“中华人民共和国国歌”拆分为“中华人民共和国,国歌”。
|
||||
|
||||
Changes
|
||||
------
|
||||
*自 v5.0.0 起*
|
||||
*5.0.0*
|
||||
|
||||
- 移除名为 `ik` 的analyzer和tokenizer,请分别使用 `ik_smart` 和 `ik_max_word`
|
||||
|
||||
|
14
pom.xml
Executable file → Normal file
14
pom.xml
Executable file → Normal file
@ -12,7 +12,7 @@
|
||||
<inceptionYear>2011</inceptionYear>
|
||||
|
||||
<properties>
|
||||
<elasticsearch.version>8.4.1</elasticsearch.version>
|
||||
<elasticsearch.version>6.5.0</elasticsearch.version>
|
||||
<maven.compiler.target>1.8</maven.compiler.target>
|
||||
<elasticsearch.assembly.descriptor>${project.basedir}/src/main/assemblies/plugin.xml</elasticsearch.assembly.descriptor>
|
||||
<elasticsearch.plugin.name>analysis-ik</elasticsearch.plugin.name>
|
||||
@ -34,10 +34,10 @@
|
||||
|
||||
<developers>
|
||||
<developer>
|
||||
<name>INFINI Labs</name>
|
||||
<email>hello@infini.ltd</email>
|
||||
<organization>INFINI Labs</organization>
|
||||
<organizationUrl>https://infinilabs.com</organizationUrl>
|
||||
<name>Medcl</name>
|
||||
<email>medcl@elastic.co</email>
|
||||
<organization>elastic</organization>
|
||||
<organizationUrl>http://www.elastic.co</organizationUrl>
|
||||
</developer>
|
||||
</developers>
|
||||
|
||||
@ -71,7 +71,7 @@
|
||||
<name>OSS Sonatype</name>
|
||||
<releases><enabled>true</enabled></releases>
|
||||
<snapshots><enabled>true</enabled></snapshots>
|
||||
<url>https://oss.sonatype.org/content/repositories/releases/</url>
|
||||
<url>http://oss.sonatype.org/content/repositories/releases/</url>
|
||||
</repository>
|
||||
</repositories>
|
||||
|
||||
@ -93,7 +93,7 @@
|
||||
<dependency>
|
||||
<groupId>org.apache.logging.log4j</groupId>
|
||||
<artifactId>log4j-api</artifactId>
|
||||
<version>2.18.0</version>
|
||||
<version>2.3</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
|
@ -10,7 +10,7 @@ public class IkAnalyzerProvider extends AbstractIndexAnalyzerProvider<IKAnalyzer
|
||||
private final IKAnalyzer analyzer;
|
||||
|
||||
public IkAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings,boolean useSmart) {
|
||||
super(name, settings);
|
||||
super(indexSettings, name, settings);
|
||||
|
||||
Configuration configuration=new Configuration(env,settings).setUseSmart(useSmart);
|
||||
|
||||
|
@ -11,7 +11,7 @@ public class IkTokenizerFactory extends AbstractTokenizerFactory {
|
||||
private Configuration configuration;
|
||||
|
||||
public IkTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
super(indexSettings, settings,name);
|
||||
super(indexSettings, name, settings);
|
||||
configuration=new Configuration(env,settings);
|
||||
}
|
||||
|
||||
|
2
src/main/java/org/wltea/analyzer/cfg/Configuration.java
Executable file → Normal file
2
src/main/java/org/wltea/analyzer/cfg/Configuration.java
Executable file → Normal file
@ -4,7 +4,7 @@
|
||||
package org.wltea.analyzer.cfg;
|
||||
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.core.PathUtils;
|
||||
import org.elasticsearch.common.io.PathUtils;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.plugin.analysis.ik.AnalysisIkPlugin;
|
||||
|
@ -267,14 +267,6 @@ class AnalyzeContext {
|
||||
Lexeme l = path.pollFirst();
|
||||
while(l != null){
|
||||
this.results.add(l);
|
||||
//字典中无单字,但是词元冲突了,切分出相交词元的前一个词元中的单字
|
||||
/*int innerIndex = index + 1;
|
||||
for (; innerIndex < index + l.getLength(); innerIndex++) {
|
||||
Lexeme innerL = path.peekFirst();
|
||||
if (innerL != null && innerIndex == innerL.getBegin()) {
|
||||
this.outputSingleCJK(innerIndex - 1);
|
||||
}
|
||||
}*/
|
||||
|
||||
//将index移至lexeme后
|
||||
index = l.getBegin() + l.getLength();
|
||||
|
@ -57,7 +57,7 @@ class DictSegment implements Comparable<DictSegment>{
|
||||
|
||||
DictSegment(Character nodeChar){
|
||||
if(nodeChar == null){
|
||||
throw new IllegalArgumentException("node char cannot be empty");
|
||||
throw new IllegalArgumentException("参数为空异常,字符不能为空");
|
||||
}
|
||||
this.nodeChar = nodeChar;
|
||||
}
|
||||
|
16
src/main/java/org/wltea/analyzer/dic/Dictionary.java
Executable file → Normal file
16
src/main/java/org/wltea/analyzer/dic/Dictionary.java
Executable file → Normal file
@ -52,7 +52,7 @@ import org.apache.http.client.methods.HttpGet;
|
||||
import org.apache.http.impl.client.CloseableHttpClient;
|
||||
import org.apache.http.impl.client.HttpClients;
|
||||
import org.elasticsearch.SpecialPermission;
|
||||
import org.elasticsearch.core.PathUtils;
|
||||
import org.elasticsearch.common.io.PathUtils;
|
||||
import org.elasticsearch.plugin.analysis.ik.AnalysisIkPlugin;
|
||||
import org.wltea.analyzer.cfg.Configuration;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
@ -80,7 +80,7 @@ public class Dictionary {
|
||||
*/
|
||||
private Configuration configuration;
|
||||
|
||||
private static final Logger logger = ESPluginLoggerFactory.getLogger(Dictionary.class.getName());
|
||||
private static final Logger logger = ESPluginLoggerFactory.getLogger(Monitor.class.getName());
|
||||
|
||||
private static ScheduledExecutorService pool = Executors.newScheduledThreadPool(1);
|
||||
|
||||
@ -294,7 +294,7 @@ public class Dictionary {
|
||||
*/
|
||||
public static Dictionary getSingleton() {
|
||||
if (singleton == null) {
|
||||
throw new IllegalStateException("ik dict has not been initialized yet, please call initial method first.");
|
||||
throw new IllegalStateException("词典尚未初始化,请先调用initial方法");
|
||||
}
|
||||
return singleton;
|
||||
}
|
||||
@ -419,7 +419,7 @@ public class Dictionary {
|
||||
List<String> lists = getRemoteWords(location);
|
||||
// 如果找不到扩展的字典,则忽略
|
||||
if (lists == null) {
|
||||
logger.error("[Dict Loading] " + location + " load failed");
|
||||
logger.error("[Dict Loading] " + location + "加载失败");
|
||||
continue;
|
||||
}
|
||||
for (String theWord : lists) {
|
||||
@ -469,7 +469,7 @@ public class Dictionary {
|
||||
}
|
||||
}
|
||||
|
||||
if (entity.getContentLength() > 0 || entity.isChunked()) {
|
||||
if (entity.getContentLength() > 0) {
|
||||
in = new BufferedReader(new InputStreamReader(entity.getContent(), charset));
|
||||
String line;
|
||||
while ((line = in.readLine()) != null) {
|
||||
@ -518,7 +518,7 @@ public class Dictionary {
|
||||
List<String> lists = getRemoteWords(location);
|
||||
// 如果找不到扩展的字典,则忽略
|
||||
if (lists == null) {
|
||||
logger.error("[Dict Loading] " + location + " load failed");
|
||||
logger.error("[Dict Loading] " + location + "加载失败");
|
||||
continue;
|
||||
}
|
||||
for (String theWord : lists) {
|
||||
@ -562,7 +562,7 @@ public class Dictionary {
|
||||
}
|
||||
|
||||
void reLoadMainDict() {
|
||||
logger.info("start to reload ik dict.");
|
||||
logger.info("重新加载词典...");
|
||||
// 新开一个实例加载词典,减少加载过程对当前词典使用的影响
|
||||
Dictionary tmpDict = new Dictionary(configuration);
|
||||
tmpDict.configuration = getSingleton().configuration;
|
||||
@ -570,7 +570,7 @@ public class Dictionary {
|
||||
tmpDict.loadStopWordDict();
|
||||
_MainDict = tmpDict._MainDict;
|
||||
_StopWords = tmpDict._StopWords;
|
||||
logger.info("reload ik dict finished.");
|
||||
logger.info("重新加载词典完毕...");
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user