2025-07-18 00:00:12 +08:00
16 changed files with 157 additions and 149 deletions
--- a/.github/workflows/cmake.yml
+++ b/.github/workflows/cmake.yml
@ -17,14 +17,12 @@ jobs:
    strategy:
      matrix:
        os: [
          ubuntu-20.04, 
          ubuntu-22.04, 
          ubuntu-latest,
          macos-13,
          macos-14,
          macos-latest,
          windows-2019,
          windows-2022,
          windows-latest,
        ]  
        cpp_version: [11, 14, 17, 20]
--- a/.gitignore
+++ b/.gitignore
@ -15,5 +15,3 @@ tmp
 t.*
 *.pid
 build
 Testing/Temporary/CTestCostData.txt
 Testing/Temporary/LastTest.log
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -4,6 +4,10 @@ PROJECT(CPPJIEBA)
 INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/deps/limonp/include
  ${PROJECT_SOURCE_DIR}/include)
 if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
    set (CMAKE_INSTALL_PREFIX "/usr/local/cppjieba" CACHE PATH "default install path" FORCE )
 endif()
 if(NOT DEFINED CMAKE_CXX_STANDARD)
    set(CMAKE_CXX_STANDARD 11)
 endif()
@ -21,11 +25,18 @@ if(NOT DEFINED CPPJIEBA_TOP_LEVEL_PROJECT)
    endif()
 endif()
-if(CPPJIEBA_TOP_LEVEL_PROJECT)
+
 option(CPPJIEBA_BUILD_TESTS "Build cppjieba tests" ${CPPJIEBA_TOP_LEVEL_PROJECT})
 if(CPPJIEBA_BUILD_TESTS)
  ENABLE_TESTING()
  message(STATUS "MSVC value: ${MSVC}")
  ADD_SUBDIRECTORY(test)
  if(NOT MSVC)
    ADD_TEST(NAME ./test/test.run COMMAND ./test/test.run)
    ADD_TEST(NAME ./load_test COMMAND ./load_test)
  endif()
 endif()
 # ... other CMake configurations ...
--- a/README.md
+++ b/README.md
@ -10,25 +10,22 @@
 CppJieba是"结巴(Jieba)"中文分词的C++版本
-### 主要特点
+## 特性
- 🚀 高性能：经过线上环境验证的稳定性和性能表现
+ 源代码都写进头文件`include/cppjieba/*.hpp`里，`include`即可使用。
- 📦 易集成：源代码以头文件形式提供 (`include/cppjieba/*.hpp`)，包含即可使用
+ 支持`utf8`编码。
- 🔍 多种分词模式：支持精确模式、全模式、搜索引擎模式等
+ 项目自带较为完善的单元测试，核心功能中文分词(utf8)的稳定性接受过线上环境检验。
- 📚 自定义词典：支持用户自定义词典，支持多词典路径（使用'|'或';'分隔）
+ 支持载自定义用户词典，多路径时支持分隔符'|'或者';'分隔。
- 💻 跨平台：支持 Linux、macOS、Windows 操作系统
+ 支持 `Linux` , `Mac OSX`, `Windows` 操作系统。
 - 🌈 UTF-8编码：原生支持 UTF-8 编码的中文处理
-## 快速开始
+## 用法
-### 环境要求
+### 依赖软件
- C++ 编译器：
+* `g++ (version >= 4.1 is recommended) or clang++`;
-  - g++ (推荐 4.1 以上版本)
+* `cmake (version >= 2.6 is recommended)`;
  - 或 clang++
 - cmake (推荐 2.6 以上版本)
-### 安装步骤
+### 下载和编译
 ```sh
 git clone https://github.com/yanyiwu/cppjieba.git
@ -39,11 +36,15 @@ mkdir build
 cd build
 cmake ..
 make
 ```
 有兴趣的可以跑跑测试(可选):
 ```
 make test
 ```
-## 使用示例
+## Demo
 ```
 ./demo
@ -209,37 +210,71 @@ For more details, please see [demo](https://github.com/yanyiwu/cppjieba-demo).
 + [dict.367W.utf8] iLife(562193561 at qq.com)
-## 生态系统
+## 应用
-CppJieba 已经被广泛应用于各种编程语言的分词实现中：
+ [GoJieba] go语言版本的结巴中文分词。
 + [NodeJieba] Node.js 版本的结巴中文分词。
 + [simhash] 中文文档的的相似度计算
 + [exjieba] Erlang 版本的结巴中文分词。
 + [jiebaR] R语言版本的结巴中文分词。
 + [cjieba] C语言版本的结巴分词。
 + [jieba_rb] Ruby 版本的结巴分词。
 + [iosjieba] iOS 版本的结巴分词。
 + [SqlJieba] MySQL 全文索引的结巴中文分词插件。
 + [pg_jieba] PostgreSQL 数据库的分词插件。
 + [simple] SQLite3 FTS5 数据库的分词插件。
 + [gitbook-plugin-search-pro] 支持中文搜索的 gitbook 插件。
 + [ngx_http_cppjieba_module] Nginx 分词插件。
 + [cppjiebapy] 由 [jannson] 开发的供 python 模块调用的项目 [cppjiebapy], 相关讨论 [cppjiebapy_discussion] .
 + [cppjieba-py] 由 [bung87] 基于 pybind11 封装的 python 模块,使用体验上接近于原jieba。
 + [KeywordServer] 50行搭建一个中文关键词抽取服务。
 + [cppjieba-server] CppJieba HTTP 服务器。
 + [phpjieba] php版本的结巴分词扩展。
 + [perl5-jieba] Perl版本的结巴分词扩展。
 + [jieba-dlang] D 语言的结巴分词 Deimos Bindings。
- [GoJieba](https://github.com/yanyiwu/gojieba) - Go 语言版本
+## 性能评测
 - [NodeJieba](https://github.com/yanyiwu/nodejieba) - Node.js 版本
 - [CJieba](https://github.com/yanyiwu/cjieba) - C 语言版本
 - [jiebaR](https://github.com/qinwf/jiebaR) - R 语言版本
 - [exjieba](https://github.com/falood/exjieba) - Erlang 版本
 - [jieba_rb](https://github.com/altkatz/jieba_rb) - Ruby 版本
 - [iosjieba](https://github.com/yanyiwu/iosjieba) - iOS 版本
 - [phpjieba](https://github.com/jonnywang/phpjieba) - PHP 版本
 - [perl5-jieba](https://metacpan.org/pod/distribution/Lingua-ZH-Jieba/lib/Lingua/ZH/Jieba.pod) - Perl 版本
-### 应用项目
+[Jieba中文分词系列性能评测]
- [simhash](https://github.com/yanyiwu/simhash) - 中文文档相似度计算
+## Sponsorship
 - [pg_jieba](https://github.com/jaiminpan/pg_jieba) - PostgreSQL 分词插件
 - [gitbook-plugin-search-pro](https://plugins.gitbook.com/plugin/search-pro) - Gitbook 中文搜索插件
 - [ngx_http_cppjieba_module](https://github.com/yanyiwu/ngx_http_cppjieba_module) - Nginx 分词插件
-## 贡献指南
+[![sponsorship](http://images.gitads.io/cppjieba)](https://tracking.gitads.io/?campaign=gitads&repo=cppjieba&redirect=gitads.io)
-我们欢迎各种形式的贡献，包括但不限于：
+## Contributors
- 提交问题和建议
+### Code Contributors
- 改进文档
+
- 提交代码修复
+This project exists thanks to all the people who contribute.
- 添加新功能
+<a href="https://github.com/yanyiwu/cppjieba/graphs/contributors"><img src="https://opencollective.com/cppjieba/contributors.svg?width=890&button=false" /></a>
-
+
-
+[GoJieba]:https://github.com/yanyiwu/gojieba
-如果您觉得 CppJieba 对您有帮助，欢迎 star ⭐️ 支持项目！
+[CppJieba]:https://github.com/yanyiwu/cppjieba
 [jannson]:https://github.com/jannson
 [cppjiebapy]:https://github.com/jannson/cppjiebapy
 [bung87]:https://github.com/bung87
 [cppjieba-py]:https://github.com/bung87/cppjieba-py
 [cppjiebapy_discussion]:https://github.com/yanyiwu/cppjieba/issues/1
 [NodeJieba]:https://github.com/yanyiwu/nodejieba
 [jiebaR]:https://github.com/qinwf/jiebaR
 [simhash]:https://github.com/yanyiwu/simhash
 [代码详解]:https://github.com/yanyiwu/cppjieba/wiki/CppJieba%E4%BB%A3%E7%A0%81%E8%AF%A6%E8%A7%A3
 [issue25]:https://github.com/yanyiwu/cppjieba/issues/25
 [exjieba]:https://github.com/falood/exjieba
 [KeywordServer]:https://github.com/yanyiwu/keyword_server
 [ngx_http_cppjieba_module]:https://github.com/yanyiwu/ngx_http_cppjieba_module
 [dict.367W.utf8]:https://github.com/qinwf/BigDict
 [cjieba]:http://github.com/yanyiwu/cjieba
 [jieba_rb]:https://github.com/altkatz/jieba_rb
 [iosjieba]:https://github.com/yanyiwu/iosjieba
 [SqlJieba]:https://github.com/yanyiwu/sqljieba
 [Jieba中文分词系列性能评测]:http://yanyiwu.com/work/2015/06/14/jieba-series-performance-test.html
 [pg_jieba]:https://github.com/jaiminpan/pg_jieba
 [gitbook-plugin-search-pro]:https://plugins.gitbook.com/plugin/search-pro
 [cppjieba-server]:https://github.com/yanyiwu/cppjieba-server
 [phpjieba]:https://github.com/jonnywang/phpjieba
 [perl5-jieba]:https://metacpan.org/pod/distribution/Lingua-ZH-Jieba/lib/Lingua/ZH/Jieba.pod
 [jieba-dlang]:https://github.com/shove70/jieba
 [simple]:https://github.com/wangfenjin/simple
--- a/deps/limonp
+++ b/deps/limonp
@ -1 +1 @@
-Subproject commit 5c82a3f17e4e0adc6a5decfe245054b0ed533d1a
+Subproject commit ac32f1f287f65d5ce0ce295010c88026fae060ee
--- a/dict/jieba.dict.utf8
+++ b/dict/jieba.dict.utf8
@ -312698,6 +312698,7 @@ T恤 4 n
 部属 1126 n
 部属工作 3 n
 部属院校 3 n
 部手机 33 n
 部族 643 n
 部标 4 n
 部省级 2 n
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@ -1,12 +1,4 @@
 SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR})
 # Configure test paths
 configure_file("${CMAKE_CURRENT_SOURCE_DIR}/test_paths.h.in" "${CMAKE_BINARY_DIR}/test/test_paths.h")
 INCLUDE_DIRECTORIES(
    ${CMAKE_CURRENT_BINARY_DIR}
    ${CMAKE_BINARY_DIR}/test
 )
 ADD_EXECUTABLE(load_test load_test.cpp)
 ADD_SUBDIRECTORY(unittest)
--- a/test/load_test.cpp
+++ b/test/load_test.cpp
@ -6,15 +6,14 @@
 #include "cppjieba/MixSegment.hpp"
 #include "cppjieba/KeywordExtractor.hpp"
 #include "limonp/Colors.hpp"
 #include "test_paths.h"
 using namespace cppjieba;
 void Cut(size_t times = 50) {
-  MixSegment seg(DICT_DIR "/jieba.dict.utf8", DICT_DIR "/hmm_model.utf8");
+  MixSegment seg("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8");
  vector<string> res;
  string doc;
-  ifstream ifs(TEST_DATA_DIR "/weicheng.utf8");
+  ifstream ifs("../test/testdata/weicheng.utf8");
  assert(ifs);
  doc << ifs;
  long beginTime = clock();
@ -30,13 +29,10 @@ void Cut(size_t times = 50) {
 }
 void Extract(size_t times = 400) {
-  KeywordExtractor Extractor(DICT_DIR "/jieba.dict.utf8", 
+  KeywordExtractor Extractor("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8");
                           DICT_DIR "/hmm_model.utf8", 
                           DICT_DIR "/idf.utf8", 
                           DICT_DIR "/stop_words.utf8");
  vector<string> words;
  string doc;
-  ifstream ifs(TEST_DATA_DIR "/review.100");
+  ifstream ifs("../test/testdata/review.100");
  assert(ifs);
  doc << ifs;
  long beginTime = clock();
--- a/test/test_paths.h.in
+++ b/test/test_paths.h.in
@ -1,7 +0,0 @@
 #ifndef TEST_PATHS_H
 #define TEST_PATHS_H
 #define TEST_DATA_DIR "@CMAKE_CURRENT_SOURCE_DIR@/testdata"
 #define DICT_DIR "@CMAKE_SOURCE_DIR@/dict"
 #endif // TEST_PATHS_H 
--- a/test/unittest/CMakeLists.txt
+++ b/test/unittest/CMakeLists.txt
@ -1,8 +1,6 @@
 message(STATUS "MSVC value: ${MSVC}")
 if (MSVC)
 	set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebugDLL")
 	set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
 	add_compile_options(/utf-8)
 endif()
 include(FetchContent)
@ -20,12 +18,6 @@ SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib)
 ADD_DEFINITIONS(-DLOGGING_LEVEL=LL_WARNING)
 # Add include directories
 INCLUDE_DIRECTORIES(
    ${CMAKE_CURRENT_BINARY_DIR}
    ${CMAKE_BINARY_DIR}/test
 )
 ADD_EXECUTABLE(test.run 
    gtest_main.cpp 
    keyword_extractor_test.cpp 
--- a/test/unittest/jieba_test.cpp
+++ b/test/unittest/jieba_test.cpp
@ -1,6 +1,5 @@
 #include "cppjieba/Jieba.hpp"
 #include "gtest/gtest.h"
 #include "test_paths.h"
 using namespace cppjieba;
@ -38,11 +37,11 @@ TEST(JiebaTest, Test0) {
 }
 TEST(JiebaTest, Test1) {
-  cppjieba::Jieba jieba(DICT_DIR "/jieba.dict.utf8",
+  cppjieba::Jieba jieba("../dict/jieba.dict.utf8",
-                        DICT_DIR "/hmm_model.utf8",
+                        "../dict/hmm_model.utf8",
-                        DICT_DIR "/user.dict.utf8",
+                        "../dict/user.dict.utf8",
-                        DICT_DIR "/idf.utf8",
+                        "../dict/idf.utf8",
-                        DICT_DIR "/stop_words.utf8");
+                        "../dict/stop_words.utf8");
  vector<string> words;
  string result;
@ -72,14 +71,14 @@ TEST(JiebaTest, Test1) {
  jieba.CutForSearch("他来到了网易杭研大厦", words);
  result << words;
  ASSERT_EQ("[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]", result);
 }
 }
 TEST(JiebaTest, WordTest) {
-  cppjieba::Jieba jieba(DICT_DIR "/jieba.dict.utf8",
+  cppjieba::Jieba jieba("../dict/jieba.dict.utf8",
-                        DICT_DIR "/hmm_model.utf8",
+                        "../dict/hmm_model.utf8",
-                        DICT_DIR "/user.dict.utf8",
+                        "../dict/user.dict.utf8",
-                        DICT_DIR "/idf.utf8",
+                        "../dict/idf.utf8",
-                        DICT_DIR "/stop_words.utf8");
+                        "../dict/stop_words.utf8");
  vector<Word> words;
  string result;
@ -117,11 +116,11 @@ TEST(JiebaTest, WordTest) {
 }
 TEST(JiebaTest, InsertUserWord) {
-  cppjieba::Jieba jieba(DICT_DIR "/jieba.dict.utf8",
+  cppjieba::Jieba jieba("../dict/jieba.dict.utf8",
-                        DICT_DIR "/hmm_model.utf8",
+                        "../dict/hmm_model.utf8",
-                        DICT_DIR "/user.dict.utf8",
+                        "../dict/user.dict.utf8",
-                        DICT_DIR "/idf.utf8",
+                        "../dict/idf.utf8",
-                        DICT_DIR "/stop_words.utf8");
+                        "../dict/stop_words.utf8");
  vector<string> words;
  string result;
--- a/test/unittest/keyword_extractor_test.cpp
+++ b/test/unittest/keyword_extractor_test.cpp
@ -1,14 +1,10 @@
 #include "cppjieba/KeywordExtractor.hpp"
 #include "gtest/gtest.h"
 #include "test_paths.h"
 using namespace cppjieba;
 TEST(KeywordExtractorTest, Test1) {
-  KeywordExtractor Extractor(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8", 
+  KeywordExtractor Extractor("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8");
                            DICT_DIR "/hmm_model.utf8", 
                            DICT_DIR "/idf.utf8", 
                            DICT_DIR "/stop_words.utf8");
  {
    string s("你好世界世界而且而且");
@ -59,11 +55,7 @@ TEST(KeywordExtractorTest, Test1) {
 }
 TEST(KeywordExtractorTest, Test2) {
-  KeywordExtractor Extractor(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8", 
+  KeywordExtractor Extractor("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8", "../test/testdata/userdict.utf8");
                            DICT_DIR "/hmm_model.utf8", 
                            DICT_DIR "/idf.utf8", 
                            DICT_DIR "/stop_words.utf8", 
                            TEST_DATA_DIR "/userdict.utf8");
  {
    string s("蓝翔优秀毕业生");
--- a/test/unittest/pos_tagger_test.cpp
+++ b/test/unittest/pos_tagger_test.cpp
@ -1,6 +1,5 @@
 #include "cppjieba/MixSegment.hpp"
 #include "gtest/gtest.h"
 #include "test_paths.h"
 using namespace cppjieba;
@ -14,7 +13,7 @@ static const char * const ANS_TEST3 = "[iPhone6:eng, 手机:n, 的:uj, 最大:a,
 //static const char * const ANS_TEST3 = "";
 TEST(PosTaggerTest, Test) {
-  MixSegment tagger(DICT_DIR "/jieba.dict.utf8", DICT_DIR "/hmm_model.utf8");
+  MixSegment tagger("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8");
  {
    vector<pair<string, string> > res;
    tagger.Tag(QUERY_TEST1, res);
@ -24,7 +23,7 @@ TEST(PosTaggerTest, Test) {
  }
 }
 TEST(PosTagger, TestUserDict) {
-  MixSegment tagger(DICT_DIR "/jieba.dict.utf8", DICT_DIR "/hmm_model.utf8", TEST_DATA_DIR "/userdict.utf8");
+  MixSegment tagger("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../test/testdata/userdict.utf8");
  {
    vector<pair<string, string> > res;
    tagger.Tag(QUERY_TEST2, res);
--- a/test/unittest/segments_test.cpp
+++ b/test/unittest/segments_test.cpp
@ -5,12 +5,11 @@
 #include "cppjieba/FullSegment.hpp"
 #include "cppjieba/QuerySegment.hpp"
 #include "gtest/gtest.h"
 #include "test_paths.h"
 using namespace cppjieba;
 TEST(MixSegmentTest, Test1) {
-  MixSegment segment(DICT_DIR "/jieba.dict.utf8", DICT_DIR "/hmm_model.utf8");
+  MixSegment segment("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8");;
  string sentence;
  vector<string> words;
  string actual;
@ -50,18 +49,16 @@ TEST(MixSegmentTest, Test1) {
 }
 TEST(MixSegmentTest, NoUserDict) {
-  MixSegment segment(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8", DICT_DIR "/hmm_model.utf8");
+  MixSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8");
  const char* str = "令狐冲是云计算方面的专家";
  vector<string> words;
  segment.Cut(str, words);
  string res;
  ASSERT_EQ("[\"令狐冲\", \"是\", \"云\", \"计算\", \"方面\", \"的\", \"专家\"]", res << words);
 }
 }
 TEST(MixSegmentTest, UserDict) {
-  MixSegment segment(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8", 
+  MixSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../dict/user.dict.utf8");
                    DICT_DIR "/hmm_model.utf8", 
                    DICT_DIR "/user.dict.utf8");
  {
    const char* str = "令狐冲是云计算方面的专家";
    vector<string> words;
@ -86,10 +83,9 @@ TEST(MixSegmentTest, UserDict) {
    ASSERT_EQ("[\"IBM\", \",\", \"3.14\"]", res);
  }
 }
 TEST(MixSegmentTest, TestUserDict) {
-  MixSegment segment(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8", DICT_DIR "/hmm_model.utf8", 
+  MixSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", 
-        TEST_DATA_DIR "/userdict.utf8");
+        "../test/testdata/userdict.utf8");
  vector<string> words;
  string res;
@ -127,8 +123,8 @@ TEST(MixSegmentTest, TestUserDict) {
 }
 TEST(MixSegmentTest, TestMultiUserDict) {
-  MixSegment segment(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8", DICT_DIR "/hmm_model.utf8", 
+  MixSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", 
-        TEST_DATA_DIR "/userdict.utf8;" TEST_DATA_DIR "/userdict.2.utf8");
+        "../test/testdata/userdict.utf8;../test/testdata/userdict.2.utf8");
  vector<string> words;
  string res;
@ -138,7 +134,7 @@ TEST(MixSegmentTest, TestMultiUserDict) {
 }
 TEST(MPSegmentTest, Test1) {
-  MPSegment segment(DICT_DIR "/jieba.dict.utf8");
+  MPSegment segment("../dict/jieba.dict.utf8");;
  string s;
  vector<string> words;
  segment.Cut("我来自北京邮电大学。", words);
@ -167,7 +163,7 @@ TEST(MPSegmentTest, Test1) {
 }
 TEST(HMMSegmentTest, Test1) {
-  HMMSegment segment(DICT_DIR "/hmm_model.utf8");
+  HMMSegment segment("../dict/hmm_model.utf8");;
  {
    const char* str = "我来自北京邮电大学。。。学号123456";
    const char* res[] = {"我来", "自北京", "邮电大学", "。", "。", "。", "学号", "123456"};
@ -186,7 +182,7 @@ TEST(HMMSegmentTest, Test1) {
 }
 TEST(FullSegment, Test1) {
-  FullSegment segment(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8");
+  FullSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8");
  vector<string> words;
  string s;
@ -201,7 +197,7 @@ TEST(FullSegment, Test1) {
 }
 TEST(QuerySegment, Test1) {
-  QuerySegment segment(DICT_DIR "/jieba.dict.utf8", DICT_DIR "/hmm_model.utf8", "");
+  QuerySegment segment("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "");
  vector<string> words;
  string s1, s2;
@ -222,9 +218,7 @@ TEST(QuerySegment, Test1) {
 }
 TEST(QuerySegment, Test2) {
-  QuerySegment segment(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8", 
+  QuerySegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../test/testdata/userdict.utf8|../test/testdata/userdict.english");
                      DICT_DIR "/hmm_model.utf8", 
                      TEST_DATA_DIR "/userdict.utf8|" TEST_DATA_DIR "/userdict.english");
  vector<string> words;
  string s1, s2;
@ -248,13 +242,14 @@ TEST(QuerySegment, Test2) {
    s2 = "中国/科学/学院/科学院/中国科学院";
    ASSERT_EQ(s1, s2);
  }
 }
 TEST(MPSegmentTest, Unicode32) {
  string s("天气很好，🙋 我们去郊游。");
  vector<string> words;
-  MPSegment segment(DICT_DIR "/jieba.dict.utf8");
+  MPSegment segment("../dict/jieba.dict.utf8");;
  segment.Cut(s, words);
  ASSERT_EQ(Join(words.begin(), words.end(), "/"), "天气/很/好/，/🙋/ /我们/去/郊游/。");
--- a/test/unittest/textrank_test.cpp
+++ b/test/unittest/textrank_test.cpp
@ -1,14 +1,13 @@
 #include "cppjieba/TextRankExtractor.hpp"
 #include "gtest/gtest.h"
 #include "test_paths.h"
 using namespace cppjieba;
 TEST(TextRankExtractorTest, Test1) {
  TextRankExtractor Extractor(
-    TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8",
+    "../test/testdata/extra_dict/jieba.dict.small.utf8",
-    DICT_DIR "/hmm_model.utf8", 
+    "../dict/hmm_model.utf8", 
-    DICT_DIR "/stop_words.utf8");
+    "../dict/stop_words.utf8");
  {
    string s("你好世界世界而且而且");
    string res;
@ -60,10 +59,10 @@ TEST(TextRankExtractorTest, Test1) {
 TEST(TextRankExtractorTest, Test2) {
  TextRankExtractor Extractor(
-    TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8",
+    "../test/testdata/extra_dict/jieba.dict.small.utf8",
-    DICT_DIR "/hmm_model.utf8",
+    "../dict/hmm_model.utf8",
-    DICT_DIR "/stop_words.utf8",
+    "../dict/stop_words.utf8",
-    TEST_DATA_DIR "/userdict.utf8");
+    "../test/testdata/userdict.utf8");
  {
    string s("\xe8\x93\x9d\xe7\xbf\x94\xe4\xbc\x98\xe7\xa7\x80\xe6\xaf\x95\xe4\xb8\x9a\xe7\x94\x9f");
--- a/test/unittest/trie_test.cpp
+++ b/test/unittest/trie_test.cpp
@ -1,11 +1,10 @@
 #include "cppjieba/DictTrie.hpp"
 #include "cppjieba/MPSegment.hpp"
 #include "gtest/gtest.h"
 #include "test_paths.h"
 using namespace cppjieba;
-static const char* const DICT_FILE = TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8";
+static const char* const DICT_FILE = "../test/testdata/extra_dict/jieba.dict.small.utf8";
 TEST(TrieTest, Empty) {
  vector<Unicode> keys;
@ -34,6 +33,12 @@ TEST(DictTrieTest, Test1) {
  string word("来到");
  cppjieba::RuneStrArray uni;
  ASSERT_TRUE(DecodeUTF8RunesInString(word, uni));
  //DictUnit nodeInfo;
  //nodeInfo.word = uni;
  //nodeInfo.tag = "v";
  //nodeInfo.weight = -8.87033;
  //s1 << nodeInfo;
  //s2 << (*trie.Find(uni.begin(), uni.end()));
  const DictUnit* du = trie.Find(uni.begin(), uni.end());
  ASSERT_TRUE(du != NULL);
  ASSERT_EQ(2u, du->word.size());
@ -42,12 +47,14 @@ TEST(DictTrieTest, Test1) {
  ASSERT_EQ("v", du->tag);
  ASSERT_NEAR(-8.870, du->weight, 0.001);
  //EXPECT_EQ("[\"26469\", \"21040\"] v -8.870", s2);
  word = "清华大学";
  LocalVector<pair<size_t, const DictUnit*> > res;
  const char * words[] = {"清", "清华", "清华大学"};
  for (size_t i = 0; i < sizeof(words)/sizeof(words[0]); i++) {
    ASSERT_TRUE(DecodeUTF8RunesInString(words[i], uni));
    res.push_back(make_pair(uni.size() - 1, trie.Find(uni.begin(), uni.end())));
    //resMap[uni.size() - 1] = trie.Find(uni.begin(), uni.end());
  }
  vector<pair<size_t, const DictUnit*> > vec;
  vector<struct Dag> dags;
@ -58,10 +65,11 @@ TEST(DictTrieTest, Test1) {
  s1 << res;
  s2 << dags[0].nexts;
  ASSERT_EQ(s1, s2);
 }
 TEST(DictTrieTest, UserDict) {
-  DictTrie trie(DICT_FILE, TEST_DATA_DIR "/userdict.utf8");
+  DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8");
  string word = "云计算";
  cppjieba::RuneStrArray unicode;
  ASSERT_TRUE(DecodeUTF8RunesInString(word, unicode));
@ -85,7 +93,7 @@ TEST(DictTrieTest, UserDict) {
 }
 TEST(DictTrieTest, UserDictWithMaxWeight) {
-  DictTrie trie(DICT_FILE, TEST_DATA_DIR "/userdict.utf8", DictTrie::WordWeightMax);
+  DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8", DictTrie::WordWeightMax);
  string word = "云计算";
  cppjieba::RuneStrArray unicode;
  ASSERT_TRUE(DecodeUTF8RunesInString(word, unicode));
@ -95,7 +103,7 @@ TEST(DictTrieTest, UserDictWithMaxWeight) {
 }
 TEST(DictTrieTest, Dag) {
-  DictTrie trie(DICT_FILE, TEST_DATA_DIR "/userdict.utf8");
+  DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8");
  {
    string word = "清华大学";
		`@ -1 +1 @@`
			`Subproject commit 5c82a3f17e4e0adc6a5decfe245054b0ed533d1a`				`Subproject commit ac32f1f287f65d5ce0ce295010c88026fae060ee`