mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
upgrade basic functions
This commit is contained in:
parent
8bf70127c2
commit
bcb112a4b1
16
ChangeLog.md
16
ChangeLog.md
@ -1,5 +1,9 @@
|
||||
# CppJieba ChangeLog
|
||||
|
||||
## next version
|
||||
|
||||
1. Upgrade [limonp] to version v0.4.1, [husky] to version v0.2.0
|
||||
|
||||
## v4.2.0
|
||||
|
||||
1. 修复[issue50]提到的多词典分隔符在Windows环境下存在的问题,从':'修改成'|'或';'。
|
||||
@ -66,14 +70,14 @@
|
||||
|
||||
## v2.4.3
|
||||
|
||||
1. 更新 [Husky] 服务代码,新 [Husky] 为基于线程池的服务器简易框架。并且修复当 HTTP POST 请求时 body 过长数据可能丢失的问题。
|
||||
1. 更新 [husky] 服务代码,新 [husky] 为基于线程池的服务器简易框架。并且修复当 HTTP POST 请求时 body 过长数据可能丢失的问题。
|
||||
2. 修改 PosTagger 的参数结构,删除暂时无用的参数。并添加使用自定义字典的参数,也就是支持 **自定义词性**。
|
||||
3. 更好的支持 `mac osx` (原谅作者如此屌丝,这么晚才买 `mac` )。
|
||||
4. 支持 `Docker` ,具体请见 `Dockerfile` 。
|
||||
|
||||
## v2.4.2
|
||||
|
||||
1. 适当使用 `vector`, 的基础上,使用`Limonp/LocalVector.hpp`作为`Unicode`的类型等优化,约提高性能 `30%`。
|
||||
1. 适当使用 `vector`, 的基础上,使用`limonp/LocalVector.hpp`作为`Unicode`的类型等优化,约提高性能 `30%`。
|
||||
2. 使 `cjserver` 支持用户自定义词典,通过在 `conf/server.conf` 里面配置 `user_dict_path` 来实现。
|
||||
3. 修复 `MPSegment` 切词时,当句子中含有特殊字符时,切词结果不完整的问题。
|
||||
4. 修改 `FullSegment` 减少内存使用。
|
||||
@ -131,9 +135,9 @@
|
||||
1. 完成__最大概率分词算法__和__HMM分词算法__,并且将他们结合起来成效果最好的`MixSegment`。
|
||||
2. 进行大量的代码重构,将主要的功能性代码都写成了hpp文件。
|
||||
3. 使用`cmake`工具来管理项目。
|
||||
4. 使用 [Limonp]作为工具函数库,比如日志,字符串操作等常用函数。
|
||||
5. 使用 [Husky] 搭简易分词服务的服务器框架。
|
||||
4. 使用 [limonp]作为工具函数库,比如日志,字符串操作等常用函数。
|
||||
5. 使用 [husky] 搭简易分词服务的服务器框架。
|
||||
|
||||
[Limonp]:http://github.com/yanyiwu/limonp.git
|
||||
[Husky]:http://github.com/yanyiwu/husky.git
|
||||
[limonp]:http://github.com/yanyiwu/limonp.git
|
||||
[husky]:http://github.com/yanyiwu/husky.git
|
||||
[issue50]:https://github.com/yanyiwu/cppjieba/issues/50
|
||||
|
@ -19,7 +19,7 @@ class ReqHandler: public IRequestHandler {
|
||||
virtual ~ReqHandler() {
|
||||
}
|
||||
|
||||
virtual bool doGET(const HttpReqInfo& httpReq, string& strSnd) {
|
||||
virtual bool DoGET(const HttpReqInfo& httpReq, string& strSnd) {
|
||||
string sentence, method, format;
|
||||
string tmp;
|
||||
vector<string> words;
|
||||
@ -32,9 +32,9 @@ class ReqHandler: public IRequestHandler {
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool doPOST(const HttpReqInfo& httpReq, string& strSnd) {
|
||||
virtual bool DoPOST(const HttpReqInfo& httpReq, string& strSnd) {
|
||||
vector<string> words;
|
||||
Run(httpReq.getBody(), "MIX", "simple", strSnd);
|
||||
Run(httpReq.GetBody(), "MIX", "simple", strSnd);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -57,7 +57,7 @@ class ReqHandler: public IRequestHandler {
|
||||
jieba_.Cut(sentence, words, false);
|
||||
}
|
||||
if (format == "simple") {
|
||||
join(words.begin(), words.end(), strSnd, " ");
|
||||
Join(words.begin(), words.end(), strSnd, " ");
|
||||
} else {
|
||||
strSnd << words;
|
||||
}
|
||||
@ -74,22 +74,22 @@ bool Run(int argc, char** argv) {
|
||||
if (!conf) {
|
||||
return false;
|
||||
}
|
||||
int port = conf.get("port", 1339);
|
||||
int threadNumber = conf.get("thread_number", 4);
|
||||
int queueMaxSize = conf.get("queue_max_size", 1024);
|
||||
string dictPath = conf.get("dict_path", "");
|
||||
string modelPath = conf.get("model_path", "");
|
||||
string userDictPath = conf.get("user_dict_path", "");
|
||||
int port = conf.Get("port", 1339);
|
||||
int threadNumber = conf.Get("thread_number", 4);
|
||||
int queueMaxSize = conf.Get("queue_max_size", 1024);
|
||||
string dictPath = conf.Get("dict_path", "");
|
||||
string modelPath = conf.Get("model_path", "");
|
||||
string userDictPath = conf.Get("user_dict_path", "");
|
||||
|
||||
LogInfo("config info: %s", conf.getConfigInfo().c_str());
|
||||
LOG(INFO) << "config info: " << conf.GetConfigInfo();
|
||||
|
||||
cppjieba::Jieba jieba(dictPath,
|
||||
modelPath,
|
||||
userDictPath);
|
||||
|
||||
ReqHandler reqHandler(jieba);
|
||||
ThreadPoolServer sf(threadNumber, queueMaxSize, port, reqHandler);
|
||||
return sf.start();
|
||||
ThreadPoolServer server(threadNumber, queueMaxSize, port, reqHandler);
|
||||
return server.Start();
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
|
@ -9,7 +9,7 @@
|
||||
#include <cmath>
|
||||
#include <limits>
|
||||
#include "limonp/StringUtil.hpp"
|
||||
#include "limonp/Logger.hpp"
|
||||
#include "limonp/Logging.hpp"
|
||||
#include "TransCode.hpp"
|
||||
#include "Trie.hpp"
|
||||
|
||||
@ -60,7 +60,7 @@ class DictTrie {
|
||||
}
|
||||
|
||||
bool IsUserDictSingleChineseWord(const Rune& word) const {
|
||||
return isIn(user_dict_single_chinese_word_, word);
|
||||
return IsIn(user_dict_single_chinese_word_, word);
|
||||
}
|
||||
|
||||
double GetMinWeight() const {
|
||||
@ -93,24 +93,20 @@ class DictTrie {
|
||||
}
|
||||
|
||||
void LoadUserDict(const string& filePaths) {
|
||||
vector<string> files = limonp::split(filePaths, "|;");
|
||||
vector<string> files = limonp::Split(filePaths, "|;");
|
||||
size_t lineno = 0;
|
||||
for (size_t i = 0; i < files.size(); i++) {
|
||||
ifstream ifs(files[i].c_str());
|
||||
if (!ifs.is_open()) {
|
||||
LogFatal("file %s open failed.", files[i].c_str());
|
||||
}
|
||||
CHECK(ifs.is_open()) << "open " << files[i] << " failed";
|
||||
string line;
|
||||
DictUnit node_info;
|
||||
vector<string> buf;
|
||||
for (; getline(ifs, line); lineno++) {
|
||||
if (line.size() == 0)
|
||||
if (line.size() == 0) {
|
||||
continue;
|
||||
buf.clear();
|
||||
split(line, buf, " ");
|
||||
if (buf.size() < 1) {
|
||||
LogFatal("split [%s] result illegal", line.c_str());
|
||||
}
|
||||
buf.clear();
|
||||
Split(line, buf, " ");
|
||||
DictUnit node_info;
|
||||
MakeNodeInfo(node_info,
|
||||
buf[0],
|
||||
@ -122,7 +118,7 @@ class DictTrie {
|
||||
}
|
||||
}
|
||||
}
|
||||
LogInfo("Load userdicts[%s] ok. lines[%u]", filePaths.c_str(), lineno);
|
||||
LOG(INFO) << "load userdicts " << filePaths << ", lines: " << lineno;
|
||||
}
|
||||
|
||||
bool MakeNodeInfo(DictUnit& node_info,
|
||||
@ -130,7 +126,7 @@ class DictTrie {
|
||||
double weight,
|
||||
const string& tag) {
|
||||
if (!TransCode::Decode(word, node_info.word)) {
|
||||
LogError("Decode %s failed.", word.c_str());
|
||||
LOG(ERROR) << "Decode " << word << " failed.";
|
||||
return false;
|
||||
}
|
||||
node_info.weight = weight;
|
||||
@ -140,18 +136,14 @@ class DictTrie {
|
||||
|
||||
void LoadDict(const string& filePath) {
|
||||
ifstream ifs(filePath.c_str());
|
||||
if (!ifs.is_open()) {
|
||||
LogFatal("file %s open failed.", filePath.c_str());
|
||||
}
|
||||
CHECK(ifs.is_open()) << "open " << filePath << " failed.";
|
||||
string line;
|
||||
vector<string> buf;
|
||||
|
||||
DictUnit node_info;
|
||||
for (size_t lineno = 0; getline(ifs, line); lineno++) {
|
||||
split(line, buf, " ");
|
||||
if (buf.size() != DICT_COLUMN_NUM) {
|
||||
LogFatal("split result illegal, line: %s, result size: %u", line.c_str(), buf.size());
|
||||
}
|
||||
Split(line, buf, " ");
|
||||
CHECK(buf.size() == DICT_COLUMN_NUM) << "split result illegal, line:" << line;
|
||||
MakeNodeInfo(node_info,
|
||||
buf[0],
|
||||
atof(buf[1].c_str()),
|
||||
@ -165,9 +157,7 @@ class DictTrie {
|
||||
}
|
||||
|
||||
void SetStaticWordWeights(UserWordWeightOption option) {
|
||||
if (static_node_infos_.empty()) {
|
||||
LogFatal("something must be wrong");
|
||||
}
|
||||
CHECK(!static_node_infos_.empty());
|
||||
vector<DictUnit> x = static_node_infos_;
|
||||
sort(x.begin(), x.end(), WeightCompare);
|
||||
min_weight_ = x[0].weight;
|
||||
|
@ -4,7 +4,7 @@
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
#include <cassert>
|
||||
#include "limonp/Logger.hpp"
|
||||
#include "limonp/Logging.hpp"
|
||||
#include "DictTrie.hpp"
|
||||
#include "SegmentBase.hpp"
|
||||
#include "TransCode.hpp"
|
||||
@ -15,7 +15,6 @@ class FullSegment: public SegmentBase {
|
||||
FullSegment(const string& dictPath) {
|
||||
dictTrie_ = new DictTrie(dictPath);
|
||||
isNeedDestroy_ = true;
|
||||
LogInfo("FullSegment init %s ok", dictPath.c_str());
|
||||
}
|
||||
FullSegment(const DictTrie* dictTrie)
|
||||
: dictTrie_(dictTrie), isNeedDestroy_(false) {
|
||||
|
@ -32,57 +32,43 @@ struct HMMModel {
|
||||
}
|
||||
void LoadModel(const string& filePath) {
|
||||
ifstream ifile(filePath.c_str());
|
||||
if (!ifile.is_open()) {
|
||||
LogFatal("open %s failed.", filePath.c_str());
|
||||
}
|
||||
CHECK(ifile.is_open()) << "open " << filePath << " failed";
|
||||
string line;
|
||||
vector<string> tmp;
|
||||
vector<string> tmp2;
|
||||
//Load startProb
|
||||
if (!GetLine(ifile, line)) {
|
||||
LogFatal("Load startProb");
|
||||
}
|
||||
split(line, tmp, " ");
|
||||
if (tmp.size() != STATUS_SUM) {
|
||||
LogFatal("start_p illegal");
|
||||
}
|
||||
CHECK(GetLine(ifile, line));
|
||||
Split(line, tmp, " ");
|
||||
CHECK(tmp.size() == STATUS_SUM);
|
||||
for (size_t j = 0; j< tmp.size(); j++) {
|
||||
startProb[j] = atof(tmp[j].c_str());
|
||||
}
|
||||
|
||||
//Load transProb
|
||||
for (size_t i = 0; i < STATUS_SUM; i++) {
|
||||
if (!GetLine(ifile, line)) {
|
||||
LogFatal("Load transProb failed.");
|
||||
}
|
||||
split(line, tmp, " ");
|
||||
if (tmp.size() != STATUS_SUM) {
|
||||
LogFatal("trans_p illegal");
|
||||
}
|
||||
CHECK(GetLine(ifile, line));
|
||||
Split(line, tmp, " ");
|
||||
CHECK(tmp.size() == STATUS_SUM);
|
||||
for (size_t j =0; j < STATUS_SUM; j++) {
|
||||
transProb[i][j] = atof(tmp[j].c_str());
|
||||
}
|
||||
}
|
||||
|
||||
//Load emitProbB
|
||||
if (!GetLine(ifile, line) || !LoadEmitProb(line, emitProbB)) {
|
||||
LogFatal("Load emitProbB failed.");
|
||||
}
|
||||
CHECK(GetLine(ifile, line));
|
||||
CHECK(LoadEmitProb(line, emitProbB));
|
||||
|
||||
//Load emitProbE
|
||||
if (!GetLine(ifile, line) || !LoadEmitProb(line, emitProbE)) {
|
||||
LogFatal("Load emitProbE failed.");
|
||||
}
|
||||
CHECK(GetLine(ifile, line));
|
||||
CHECK(LoadEmitProb(line, emitProbE));
|
||||
|
||||
//Load emitProbM
|
||||
if (!GetLine(ifile, line) || !LoadEmitProb(line, emitProbM)) {
|
||||
LogFatal("Load emitProbM failed.");
|
||||
}
|
||||
CHECK(GetLine(ifile, line));
|
||||
CHECK(LoadEmitProb(line, emitProbM));
|
||||
|
||||
//Load emitProbS
|
||||
if (!GetLine(ifile, line) || !LoadEmitProb(line, emitProbS)) {
|
||||
LogFatal("Load emitProbS failed.");
|
||||
}
|
||||
CHECK(GetLine(ifile, line));
|
||||
CHECK(LoadEmitProb(line, emitProbS));
|
||||
}
|
||||
double GetEmitProb(const EmitProbMap* ptMp, uint16_t key,
|
||||
double defVal)const {
|
||||
@ -94,11 +80,11 @@ struct HMMModel {
|
||||
}
|
||||
bool GetLine(ifstream& ifile, string& line) {
|
||||
while (getline(ifile, line)) {
|
||||
trim(line);
|
||||
Trim(line);
|
||||
if (line.empty()) {
|
||||
continue;
|
||||
}
|
||||
if (startsWith(line, "#")) {
|
||||
if (StartsWith(line, "#")) {
|
||||
continue;
|
||||
}
|
||||
return true;
|
||||
@ -111,15 +97,15 @@ struct HMMModel {
|
||||
}
|
||||
vector<string> tmp, tmp2;
|
||||
Unicode unicode;
|
||||
split(line, tmp, ",");
|
||||
Split(line, tmp, ",");
|
||||
for (size_t i = 0; i < tmp.size(); i++) {
|
||||
split(tmp[i], tmp2, ":");
|
||||
Split(tmp[i], tmp2, ":");
|
||||
if (2 != tmp2.size()) {
|
||||
LogError("emitProb illegal.");
|
||||
LOG(ERROR) << "emitProb illegal.";
|
||||
return false;
|
||||
}
|
||||
if (!TransCode::Decode(tmp2[0], unicode) || unicode.size() != 1) {
|
||||
LogError("TransCode failed.");
|
||||
LOG(ERROR) << "TransCode failed.";
|
||||
return false;
|
||||
}
|
||||
mp[unicode[0]] = atof(tmp2[1].c_str());
|
||||
|
@ -79,9 +79,7 @@ class KeywordExtractor {
|
||||
private:
|
||||
void LoadIdfDict(const string& idfPath) {
|
||||
ifstream ifs(idfPath.c_str());
|
||||
if (!ifs.is_open()) {
|
||||
LogFatal("open %s failed.", idfPath.c_str());
|
||||
}
|
||||
CHECK(ifs.is_open()) << "open " << idfPath << " failed";
|
||||
string line ;
|
||||
vector<string> buf;
|
||||
double idf = 0.0;
|
||||
@ -90,12 +88,12 @@ class KeywordExtractor {
|
||||
for (; getline(ifs, line); lineno++) {
|
||||
buf.clear();
|
||||
if (line.empty()) {
|
||||
LogError("line[%d] empty. skipped.", lineno);
|
||||
LOG(ERROR) << "lineno: " << lineno << " empty. skipped.";
|
||||
continue;
|
||||
}
|
||||
split(line, buf, " ");
|
||||
Split(line, buf, " ");
|
||||
if (buf.size() != 2) {
|
||||
LogError("line %d [%s] illegal. skipped.", lineno, line.c_str());
|
||||
LOG(ERROR) << "line: " << line << ", lineno: " << lineno << " empty. skipped.";
|
||||
continue;
|
||||
}
|
||||
idf = atof(buf[1].c_str());
|
||||
@ -110,9 +108,7 @@ class KeywordExtractor {
|
||||
}
|
||||
void LoadStopWordDict(const string& filePath) {
|
||||
ifstream ifs(filePath.c_str());
|
||||
if (!ifs.is_open()) {
|
||||
LogFatal("open %s failed.", filePath.c_str());
|
||||
}
|
||||
CHECK(ifs.is_open()) << "open " << filePath << " failed";
|
||||
string line ;
|
||||
while (getline(ifs, line)) {
|
||||
stopWords_.insert(line);
|
||||
|
@ -10,7 +10,6 @@ class LevelSegment: public SegmentBase{
|
||||
LevelSegment(const string& dictPath,
|
||||
const string& userDictPath = "")
|
||||
: mpSeg_(dictPath, userDictPath) {
|
||||
LogInfo("LevelSegment init");
|
||||
}
|
||||
LevelSegment(const DictTrie* dictTrie)
|
||||
: mpSeg_(dictTrie) {
|
||||
|
@ -4,7 +4,7 @@
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
#include <cassert>
|
||||
#include "limonp/Logger.hpp"
|
||||
#include "limonp/Logging.hpp"
|
||||
#include "DictTrie.hpp"
|
||||
#include "SegmentBase.hpp"
|
||||
|
||||
@ -15,7 +15,6 @@ class MPSegment: public SegmentBase {
|
||||
MPSegment(const string& dictPath, const string& userDictPath = "") {
|
||||
dictTrie_ = new DictTrie(dictPath, userDictPath);
|
||||
isNeedDestroy_ = true;
|
||||
LogInfo("MPSegment init(%s) ok", dictPath.c_str());
|
||||
}
|
||||
MPSegment(const DictTrie* dictTrie)
|
||||
: dictTrie_(dictTrie), isNeedDestroy_(false) {
|
||||
|
@ -13,7 +13,6 @@ class MixSegment: public SegmentBase {
|
||||
const string& userDict = "")
|
||||
: mpSeg_(mpSegDict, userDict),
|
||||
hmmSeg_(hmmSegDict) {
|
||||
LogInfo("MixSegment init %s, %s", mpSegDict.c_str(), hmmSegDict.c_str());
|
||||
}
|
||||
MixSegment(const DictTrie* dictTrie, const HMMModel* model)
|
||||
: mpSeg_(dictTrie), hmmSeg_(model) {
|
||||
|
@ -35,7 +35,7 @@ class PosTagger {
|
||||
assert(dict != NULL);
|
||||
for (vector<string>::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) {
|
||||
if (!TransCode::Decode(*itr, unico)) {
|
||||
LogError("Decode failed.");
|
||||
LOG(ERROR) << "Decode failed.";
|
||||
return false;
|
||||
}
|
||||
tmp = dict->Find(unico.begin(), unico.end());
|
||||
|
@ -38,7 +38,7 @@ class PreFilter {
|
||||
Range range;
|
||||
range.begin = cursor_;
|
||||
while (cursor_ != sentence_.end()) {
|
||||
if (isIn(symbols_, *cursor_)) {
|
||||
if (IsIn(symbols_, *cursor_)) {
|
||||
if (range.begin == cursor_) {
|
||||
cursor_ ++;
|
||||
}
|
||||
|
@ -4,7 +4,7 @@
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
#include <cassert>
|
||||
#include "limonp/Logger.hpp"
|
||||
#include "limonp/Logging.hpp"
|
||||
#include "DictTrie.hpp"
|
||||
#include "SegmentBase.hpp"
|
||||
#include "FullSegment.hpp"
|
||||
|
@ -1,7 +1,7 @@
|
||||
#ifndef CPPJIEBA_SEGMENTBASE_H
|
||||
#define CPPJIEBA_SEGMENTBASE_H
|
||||
|
||||
#include "limonp/Logger.hpp"
|
||||
#include "limonp/Logging.hpp"
|
||||
#include "PreFilter.hpp"
|
||||
#include <cassert>
|
||||
|
||||
|
@ -21,7 +21,7 @@ inline bool Decode(const string& str, Unicode& res) {
|
||||
#ifdef CPPJIEBA_GBK
|
||||
return gbkTrans(str, res);
|
||||
#else
|
||||
return utf8ToUnicode(str, res);
|
||||
return Utf8ToUnicode(str, res);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -29,7 +29,7 @@ inline void Encode(Unicode::const_iterator begin, Unicode::const_iterator end, s
|
||||
#ifdef CPPJIEBA_GBK
|
||||
gbkTrans(begin, end, res);
|
||||
#else
|
||||
unicodeToUtf8(begin, end, res);
|
||||
UnicodeToUtf8(begin, end, res);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -20,7 +20,7 @@ struct DictUnit {
|
||||
inline ostream & operator << (ostream& os, const DictUnit& unit) {
|
||||
string s;
|
||||
s << unit.word;
|
||||
return os << string_format("%s %s %.3lf", s.c_str(), unit.tag.c_str(), unit.weight);
|
||||
return os << StringFormat("%s %s %.3lf", s.c_str(), unit.tag.c_str(), unit.weight);
|
||||
}
|
||||
|
||||
struct Dag {
|
||||
|
@ -12,26 +12,26 @@ int main(int argc, char** argv) {
|
||||
|
||||
cout << "[demo] Cut With HMM" << endl;
|
||||
jieba.Cut(s, words, true);
|
||||
cout << limonp::join(words.begin(), words.end(), "/") << endl;
|
||||
cout << limonp::Join(words.begin(), words.end(), "/") << endl;
|
||||
|
||||
cout << "[demo] Cut Without HMM " << endl;
|
||||
jieba.Cut(s, words, false);
|
||||
cout << limonp::join(words.begin(), words.end(), "/") << endl;
|
||||
cout << limonp::Join(words.begin(), words.end(), "/") << endl;
|
||||
|
||||
cout << "[demo] CutAll" << endl;
|
||||
jieba.CutAll(s, words);
|
||||
cout << limonp::join(words.begin(), words.end(), "/") << endl;
|
||||
cout << limonp::Join(words.begin(), words.end(), "/") << endl;
|
||||
|
||||
cout << "[demo] CutForSearch" << endl;
|
||||
jieba.CutForSearch(s, words);
|
||||
cout << limonp::join(words.begin(), words.end(), "/") << endl;
|
||||
cout << limonp::Join(words.begin(), words.end(), "/") << endl;
|
||||
|
||||
cout << "[demo] Insert User Word" << endl;
|
||||
jieba.Cut("男默女泪", words);
|
||||
cout << limonp::join(words.begin(), words.end(), "/") << endl;
|
||||
cout << limonp::Join(words.begin(), words.end(), "/") << endl;
|
||||
jieba.InsertUserWord("男默女泪");
|
||||
jieba.Cut("男默女泪", words);
|
||||
cout << limonp::join(words.begin(), words.end(), "/") << endl;
|
||||
cout << limonp::Join(words.begin(), words.end(), "/") << endl;
|
||||
|
||||
cout << "[demo] Locate Words" << endl;
|
||||
vector<cppjieba::Jieba::LocWord> loc_words;
|
||||
|
@ -91,6 +91,6 @@ TEST(JiebaTest, InsertUserWord) {
|
||||
ASSERT_TRUE(jieba.InsertUserWord(newWord));
|
||||
jieba.Cut(newWord, words);
|
||||
result << words;
|
||||
ASSERT_EQ(result, string_format("[\"%s\"]", newWord.c_str()));
|
||||
ASSERT_EQ(result, StringFormat("[\"%s\"]", newWord.c_str()));
|
||||
}
|
||||
}
|
||||
|
@ -20,7 +20,7 @@ TEST(PreFilterTest, Test1) {
|
||||
range = filter.Next();
|
||||
words.push_back(TransCode::Encode(range.begin, range.end));
|
||||
}
|
||||
res = join(words.begin(), words.end(), "/");
|
||||
res = Join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ(res, expected);
|
||||
}
|
||||
|
||||
@ -34,7 +34,7 @@ TEST(PreFilterTest, Test1) {
|
||||
range = filter.Next();
|
||||
words.push_back(TransCode::Encode(range.begin, range.end));
|
||||
}
|
||||
res = join(words.begin(), words.end(), "/");
|
||||
res = Join(words.begin(), words.end(), "/");
|
||||
for (size_t i = 0; i < words.size(); i++) {
|
||||
}
|
||||
ASSERT_EQ(res, expected);
|
||||
|
@ -20,7 +20,7 @@ TEST(MixSegmentTest, Test1) {
|
||||
sentence = "我来自北京邮电大学。。。学号123456,用AK47";
|
||||
expected = "我/来自/北京邮电大学/。/。/。/学号/123456/,/用/AK47";
|
||||
segment.Cut(sentence, words);
|
||||
actual = join(words.begin(), words.end(), "/");
|
||||
actual = Join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ(actual, expected);
|
||||
}
|
||||
|
||||
@ -28,7 +28,7 @@ TEST(MixSegmentTest, Test1) {
|
||||
sentence = "B超 T恤";
|
||||
expected = "B超/ /T恤";
|
||||
segment.Cut(sentence, words);
|
||||
actual = join(words.begin(), words.end(), "/");
|
||||
actual = Join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ(actual, expected);
|
||||
}
|
||||
|
||||
@ -36,7 +36,7 @@ TEST(MixSegmentTest, Test1) {
|
||||
sentence = "他来到了网易杭研大厦";
|
||||
expected = "他/来到/了/网易/杭/研/大厦";
|
||||
segment.Cut(sentence, words, false);
|
||||
actual = join(words.begin(), words.end(), "/");
|
||||
actual = Join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ(actual, expected);
|
||||
}
|
||||
|
||||
@ -44,7 +44,7 @@ TEST(MixSegmentTest, Test1) {
|
||||
sentence = "他来到了网易杭研大厦";
|
||||
expected = "他/来到/了/网易/杭研/大厦";
|
||||
segment.Cut(sentence, words);
|
||||
actual = join(words.begin(), words.end(), "/");
|
||||
actual = Join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ(actual, expected);
|
||||
}
|
||||
}
|
||||
@ -102,7 +102,7 @@ TEST(MixSegmentTest, TestUserDict) {
|
||||
ASSERT_EQ("[\"I\", \"B\", \"M\", \",\", \"3.14\"]", res);
|
||||
|
||||
segment.Cut("忽如一夜春风来,千树万树梨花开", words);
|
||||
res = limonp::join(words.begin(), words.end(), "/");
|
||||
res = limonp::Join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ("忽如一夜春风来/,/千树/万树/梨花/开", res);
|
||||
}
|
||||
|
||||
@ -113,7 +113,7 @@ TEST(MixSegmentTest, TestMultiUserDict) {
|
||||
string res;
|
||||
|
||||
segment.Cut("忽如一夜春风来,千树万树梨花开", words);
|
||||
res = limonp::join(words.begin(), words.end(), "/");
|
||||
res = limonp::Join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ("忽如一夜春风来/,/千树万树梨花开", res);
|
||||
}
|
||||
|
||||
@ -138,11 +138,11 @@ TEST(MPSegmentTest, Test1) {
|
||||
ASSERT_EQ("[\"南\", \"京\", \"市\", \"长\", \"江\", \"大\", \"桥\"]", s << words);
|
||||
|
||||
segment.Cut("湖南长沙市天心区", words);
|
||||
s = join(words.begin(), words.end(), "/");
|
||||
s = Join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ("湖南长沙市/天心区", s);
|
||||
|
||||
segment.Cut("湖南长沙市天心区", words, 3);
|
||||
s = join(words.begin(), words.end(), "/");
|
||||
s = Join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ("湖南/长沙市/天心区", s);
|
||||
}
|
||||
|
||||
@ -254,7 +254,7 @@ TEST(QuerySegment, Test2) {
|
||||
{
|
||||
vector<string> words;
|
||||
segment.Cut("internal", words);
|
||||
string s = join(words.begin(), words.end(), "/");
|
||||
string s = Join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ("internal", s);
|
||||
}
|
||||
|
||||
@ -263,7 +263,7 @@ TEST(QuerySegment, Test2) {
|
||||
{
|
||||
vector<string> words;
|
||||
segment.Cut("中国科学院", words);
|
||||
string s = join(words.begin(), words.end(), "/");
|
||||
string s = Join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ("中国科学院", s);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user