update to 8.4.1

Update pom.xml
Update log4j
2022-09-02 18:44:03 +08:00 · 2022-01-19 11:59:06 +08:00 · 2021-12-13 09:45:53 +08:00 · 2021-08-04 17:19:10 +08:00 · 2021-05-19 17:27:37 +08:00 · 2021-05-19 17:27:04 +08:00
9 changed files with 36 additions and 41 deletions
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@ -0,0 +1,2 @@
+patreon: medcl
+custom: ["https://www.buymeacoffee.com/medcl"]
--- a/README.md
+++ b/README.md
@ -10,16 +10,9 @@ Versions

 IK version | ES version
 -----------|-----------
-master | 6.x -> master
-6.3.0| 6.3.0
-6.2.4| 6.2.4
-6.1.3| 6.1.3
-5.6.8| 5.6.8
-5.5.3| 5.5.3
-5.4.3| 5.4.3
-5.3.3| 5.3.3
-5.2.2| 5.2.2
-5.1.2| 5.1.2
+master | 7.x -> master
+6.x| 6.x
+5.x| 5.x
 1.10.6 | 2.4.6
 1.9.5 | 2.3.5
 1.8.1 | 2.2.1
@ -64,13 +57,13 @@ curl -XPUT http://localhost:9200/index
 2.create a mapping

 ```bash
-curl -XPOST http://localhost:9200/index/fulltext/_mapping -H 'Content-Type:application/json' -d'
+curl -XPOST http://localhost:9200/index/_mapping -H 'Content-Type:application/json' -d'
 {
        "properties": {
            "content": {
                "type": "text",
                "analyzer": "ik_max_word",
-                "search_analyzer": "ik_max_word"
+                "search_analyzer": "ik_smart"
            }
        }

@ -80,25 +73,25 @@ curl -XPOST http://localhost:9200/index/fulltext/_mapping -H 'Content-Type:appli
 3.index some docs

 ```bash
-curl -XPOST http://localhost:9200/index/fulltext/1 -H 'Content-Type:application/json' -d'
+curl -XPOST http://localhost:9200/index/_create/1 -H 'Content-Type:application/json' -d'
 {"content":"美国留给伊拉克的是个烂摊子吗"}
 '
 ```

 ```bash
-curl -XPOST http://localhost:9200/index/fulltext/2 -H 'Content-Type:application/json' -d'
+curl -XPOST http://localhost:9200/index/_create/2 -H 'Content-Type:application/json' -d'
 {"content":"公安部：各地校车将享最高路权"}
 '
 ```

 ```bash
-curl -XPOST http://localhost:9200/index/fulltext/3 -H 'Content-Type:application/json' -d'
+curl -XPOST http://localhost:9200/index/_create/3 -H 'Content-Type:application/json' -d'
 {"content":"中韩渔警冲突调查：韩警平均每天扣1艘中国渔船"}
 '
 ```

 ```bash
-curl -XPOST http://localhost:9200/index/fulltext/4 -H 'Content-Type:application/json' -d'
+curl -XPOST http://localhost:9200/index/_create/4 -H 'Content-Type:application/json' -d'
 {"content":"中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首"}
 '
 ```
@ -106,7 +99,7 @@ curl -XPOST http://localhost:9200/index/fulltext/4 -H 'Content-Type:application/
 4.query with highlighting

 ```bash
-curl -XPOST http://localhost:9200/index/fulltext/_search  -H 'Content-Type:application/json' -d'
+curl -XPOST http://localhost:9200/index/_search  -H 'Content-Type:application/json' -d'
 {
    "query" : { "match" : { "content" : "中国" }},
    "highlight" : {
@ -248,13 +241,13 @@ curl -XGET "http://localhost:9200/your_index/_analyze" -H 'Content-Type: applica
 4. ik_max_word 和 ik_smart 什么区别?


-ik_max_word: 会将文本做最细粒度的拆分，比如会将“中华人民共和国国歌”拆分为“中华人民共和国,中华人民,中华,华人,人民共和国,人民,人,民,共和国,共和,和,国国,国歌”，会穷尽各种可能的组合；
+ik_max_word: 会将文本做最细粒度的拆分，比如会将“中华人民共和国国歌”拆分为“中华人民共和国,中华人民,中华,华人,人民共和国,人民,人,民,共和国,共和,和,国国,国歌”，会穷尽各种可能的组合，适合 Term Query；

-ik_smart: 会做最粗粒度的拆分，比如会将“中华人民共和国国歌”拆分为“中华人民共和国,国歌”。
+ik_smart: 会做最粗粒度的拆分，比如会将“中华人民共和国国歌”拆分为“中华人民共和国,国歌”，适合 Phrase 查询。

 Changes
 ------
-*5.0.0*
+*自 v5.0.0 起*

 - 移除名为 `ik` 的analyzer和tokenizer,请分别使用 `ik_smart` 和 `ik_max_word`

--- a/pom.xml
+++ b/pom.xml
@ -12,7 +12,7 @@
    <inceptionYear>2011</inceptionYear>

    <properties>
-        <elasticsearch.version>6.5.0</elasticsearch.version>
+        <elasticsearch.version>8.4.1</elasticsearch.version>
        <maven.compiler.target>1.8</maven.compiler.target>
        <elasticsearch.assembly.descriptor>${project.basedir}/src/main/assemblies/plugin.xml</elasticsearch.assembly.descriptor>
        <elasticsearch.plugin.name>analysis-ik</elasticsearch.plugin.name>
@ -34,10 +34,10 @@

    <developers>
        <developer>
-            <name>Medcl</name>
-            <email>medcl@elastic.co</email>
-            <organization>elastic</organization>
-            <organizationUrl>http://www.elastic.co</organizationUrl>
+            <name>INFINI Labs</name>
+            <email>hello@infini.ltd</email>
+            <organization>INFINI Labs</organization>
+            <organizationUrl>https://infinilabs.com</organizationUrl>
        </developer>
    </developers>

@ -71,7 +71,7 @@
            <name>OSS Sonatype</name>
            <releases><enabled>true</enabled></releases>
            <snapshots><enabled>true</enabled></snapshots>
-            <url>http://oss.sonatype.org/content/repositories/releases/</url>
+            <url>https://oss.sonatype.org/content/repositories/releases/</url>
        </repository>
    </repositories>

@ -93,7 +93,7 @@
        <dependency>
            <groupId>org.apache.logging.log4j</groupId>
            <artifactId>log4j-api</artifactId>
-            <version>2.3</version>
+            <version>2.18.0</version>
        </dependency>

        <dependency>
--- a/src/main/java/org/elasticsearch/index/analysis/IkAnalyzerProvider.java
+++ b/src/main/java/org/elasticsearch/index/analysis/IkAnalyzerProvider.java
@ -10,7 +10,7 @@ public class IkAnalyzerProvider extends AbstractIndexAnalyzerProvider<IKAnalyzer
    private final IKAnalyzer analyzer;

    public IkAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings,boolean useSmart) {
-        super(indexSettings, name, settings);
+        super(name, settings);

        Configuration configuration=new Configuration(env,settings).setUseSmart(useSmart);

--- a/src/main/java/org/elasticsearch/index/analysis/IkTokenizerFactory.java
+++ b/src/main/java/org/elasticsearch/index/analysis/IkTokenizerFactory.java
@ -11,7 +11,7 @@ public class IkTokenizerFactory extends AbstractTokenizerFactory {
  private Configuration configuration;

  public IkTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
-      super(indexSettings, name, settings);
+      super(indexSettings, settings,name);
 	  configuration=new Configuration(env,settings);
  }

--- a/src/main/java/org/wltea/analyzer/cfg/Configuration.java
+++ b/src/main/java/org/wltea/analyzer/cfg/Configuration.java
@ -4,7 +4,7 @@
 package org.wltea.analyzer.cfg;

 import org.elasticsearch.common.inject.Inject;
-import org.elasticsearch.common.io.PathUtils;
+import org.elasticsearch.core.PathUtils;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.env.Environment;
 import org.elasticsearch.plugin.analysis.ik.AnalysisIkPlugin;
--- a/src/main/java/org/wltea/analyzer/core/AnalyzeContext.java
+++ b/src/main/java/org/wltea/analyzer/core/AnalyzeContext.java
@ -268,13 +268,13 @@ class AnalyzeContext {
 				while(l != null){
 					this.results.add(l);
 					//字典中无单字，但是词元冲突了，切分出相交词元的前一个词元中的单字
-					int innerIndex = index + 1;
+					/*int innerIndex = index + 1;
 					for (; innerIndex < index + l.getLength(); innerIndex++) {
 						Lexeme innerL = path.peekFirst();
 						if (innerL != null && innerIndex == innerL.getBegin()) {
 							this.outputSingleCJK(innerIndex - 1);
 						}
-					}
+					}*/
 					
 					//将index移至lexeme后
 					index = l.getBegin() + l.getLength();					
--- a/src/main/java/org/wltea/analyzer/dic/DictSegment.java
+++ b/src/main/java/org/wltea/analyzer/dic/DictSegment.java
@ -57,7 +57,7 @@ class DictSegment implements Comparable<DictSegment>{
 	
 	DictSegment(Character nodeChar){
 		if(nodeChar == null){
-			throw new IllegalArgumentException("参数为空异常，字符不能为空");
+			throw new IllegalArgumentException("node char cannot be empty");
 		}
 		this.nodeChar = nodeChar;
 	}
--- a/src/main/java/org/wltea/analyzer/dic/Dictionary.java
+++ b/src/main/java/org/wltea/analyzer/dic/Dictionary.java
@ -52,7 +52,7 @@ import org.apache.http.client.methods.HttpGet;
 import org.apache.http.impl.client.CloseableHttpClient;
 import org.apache.http.impl.client.HttpClients;
 import org.elasticsearch.SpecialPermission;
-import org.elasticsearch.common.io.PathUtils;
+import org.elasticsearch.core.PathUtils;
 import org.elasticsearch.plugin.analysis.ik.AnalysisIkPlugin;
 import org.wltea.analyzer.cfg.Configuration;
 import org.apache.logging.log4j.Logger;
@ -80,7 +80,7 @@ public class Dictionary {
 	 */
 	private Configuration configuration;

-	private static final Logger logger = ESPluginLoggerFactory.getLogger(Monitor.class.getName());
+	private static final Logger logger = ESPluginLoggerFactory.getLogger(Dictionary.class.getName());

 	private static ScheduledExecutorService pool = Executors.newScheduledThreadPool(1);

@ -294,7 +294,7 @@ public class Dictionary {
 	 */
 	public static Dictionary getSingleton() {
 		if (singleton == null) {
-			throw new IllegalStateException("词典尚未初始化，请先调用initial方法");
+			throw new IllegalStateException("ik dict has not been initialized yet, please call initial method first.");
 		}
 		return singleton;
 	}
@ -419,7 +419,7 @@ public class Dictionary {
 			List<String> lists = getRemoteWords(location);
 			// 如果找不到扩展的字典，则忽略
 			if (lists == null) {
-				logger.error("[Dict Loading] " + location + "加载失败");
+				logger.error("[Dict Loading] " + location + " load failed");
 				continue;
 			}
 			for (String theWord : lists) {
@ -469,7 +469,7 @@ public class Dictionary {
 						}
 					}

-					if (entity.getContentLength() > 0) {
+					if (entity.getContentLength() > 0 || entity.isChunked()) {
 						in = new BufferedReader(new InputStreamReader(entity.getContent(), charset));
 						String line;
 						while ((line = in.readLine()) != null) {
@ -518,7 +518,7 @@ public class Dictionary {
 			List<String> lists = getRemoteWords(location);
 			// 如果找不到扩展的字典，则忽略
 			if (lists == null) {
-				logger.error("[Dict Loading] " + location + "加载失败");
+				logger.error("[Dict Loading] " + location + " load failed");
 				continue;
 			}
 			for (String theWord : lists) {
@ -562,7 +562,7 @@ public class Dictionary {
 	}

 	void reLoadMainDict() {
-		logger.info("重新加载词典...");
+		logger.info("start to reload ik dict.");
 		// 新开一个实例加载词典，减少加载过程对当前词典使用的影响
 		Dictionary tmpDict = new Dictionary(configuration);
 		tmpDict.configuration = getSingleton().configuration;
@ -570,7 +570,7 @@ public class Dictionary {
 		tmpDict.loadStopWordDict();
 		_MainDict = tmpDict._MainDict;
 		_StopWords = tmpDict._StopWords;
-		logger.info("重新加载词典完毕...");
+		logger.info("reload ik dict finished.");
 	}

 }
Author	SHA1	Message	Date
medcl	9338c19104	update to 8.4.1	2022-09-02 18:44:03 +08:00
Medcl	0fb53ac32c	Update pom.xml Update log4j	2022-01-19 11:59:06 +08:00
medcl	b637708ba0	update log4j	2021-12-13 09:45:53 +08:00
medcl	9c47725ea0	update for 7.14	2021-08-04 17:19:10 +08:00
Medcl	8e36b3240e	Update FUNDING.yml	2021-05-19 17:27:37 +08:00
Medcl	e0157d5f39	Update FUNDING.yml	2021-05-19 17:27:04 +08:00
Medcl	0fccc038e2	Create FUNDING.yml	2021-05-19 16:50:12 +08:00
Jack	5a1b8c8da6	Read chunked remote words (#817 ) Fix chunked content could not be read as it will not get content length I see there is an issue #780 and this fix it	2020-09-06 16:34:40 +08:00
medcl	1375ca6d39	fix #789	2020-06-10 16:05:01 +08:00
Howard	4619effa15	transfer log message from chinese to english (#746 )	2019-12-19 15:31:04 +08:00
medcl	5f53f1a5bf	Merge branch 'master' of github.com:medcl/elasticsearch-analysis-ik	2019-10-07 19:01:51 +08:00
medcl	904a7493ea	update to 7.4.0	2019-10-07 19:01:29 +08:00
zhipingpan	06e8a23d18	Update AnalyzeContext.java (#673 )	2019-05-01 16:57:44 +08:00
Hongliang Wang	a1d6ba8ca2	Correct Search Analyzer (#668 ) The former search analyzer `ik-max-word` will give the wrong result against described later in the README file.	2019-04-19 20:23:43 +08:00
medcl	90c9b58354	update example	2019-04-11 10:07:22 +08:00
medcl	ba8bb85f31	update to support 7.x	2019-04-11 09:35:19 +08:00