mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
Merge branch 'dev' of https://github.com/aszxqw/cppjieba into dev
This commit is contained in:
commit
8e2c726a8c
13
COPYRIGHT
13
COPYRIGHT
@ -1,13 +0,0 @@
|
|||||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
|
||||||
Version 2, December 2004
|
|
||||||
|
|
||||||
Copyright (C) 2013 Yanyi Wu <wuyanyi09@gmail.com>
|
|
||||||
|
|
||||||
Everyone is permitted to copy and distribute verbatim or modified
|
|
||||||
copies of this license document, and changing it is allowed as long
|
|
||||||
as the name is changed.
|
|
||||||
|
|
||||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
|
||||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
|
||||||
|
|
||||||
0. You just DO WHAT THE FUCK YOU WANT TO.
|
|
20
LICENSE
Normal file
20
LICENSE
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
The MIT License (MIT)
|
||||||
|
|
||||||
|
Copyright (c) 2013 Yanyi Wu
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
|
the Software without restriction, including without limitation the rights to
|
||||||
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||||
|
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||||
|
subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
@ -10,6 +10,11 @@
|
|||||||
|
|
||||||
## 安装与使用
|
## 安装与使用
|
||||||
|
|
||||||
|
### 依赖
|
||||||
|
|
||||||
|
* g++ (version >= 4.6);
|
||||||
|
* cmake (version >= 2.8);
|
||||||
|
|
||||||
### 下载和安装
|
### 下载和安装
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
|
29
dict/README.md
Normal file
29
dict/README.md
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
# CppJieba字典
|
||||||
|
|
||||||
|
文件后缀名代表的是词典的编码方式。
|
||||||
|
比如filename.utf8 是 utf8编码,filename.gbk 是 gbk编码方式。
|
||||||
|
|
||||||
|
|
||||||
|
## 分词
|
||||||
|
|
||||||
|
### jieba.dict.utf8/gbk
|
||||||
|
|
||||||
|
作为最大概率法(MPSegment: Max Probability)分词所使用的词典。
|
||||||
|
|
||||||
|
### hmm_model.utf8/gbk
|
||||||
|
|
||||||
|
作为隐式马尔科夫模型(HMMSegment: Hidden Markov Model)分词所使用的词典。
|
||||||
|
|
||||||
|
__对于MixSegment(混合MPSegment和HMMSegment两者)则同时使用以上两个词典__
|
||||||
|
|
||||||
|
|
||||||
|
## 关键词抽取
|
||||||
|
|
||||||
|
## idf.utf8
|
||||||
|
|
||||||
|
IDF(Inverse Document Frequency)
|
||||||
|
在KeywordExtractor中,使用的是经典的TF-IDF算法,所以需要这么一个词典提供IDF信息。
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,43 +1,36 @@
|
|||||||
#ifndef CPPJIEBA_KEYWORD_EXTRACTOR_H
|
#ifndef CPPJIEBA_KEYWORD_EXTRACTOR_H
|
||||||
#define CPPJIEBA_KEYWORD_EXTRACTOR_H
|
#define CPPJIEBA_KEYWORD_EXTRACTOR_H
|
||||||
|
|
||||||
#include "MPSegment.hpp"
|
#include "MixSegment.hpp"
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
#include <unordered_set>
|
||||||
#define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
|
#define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
|
||||||
|
|
||||||
namespace CppJieba
|
namespace CppJieba
|
||||||
{
|
{
|
||||||
using namespace Limonp;
|
using namespace Limonp;
|
||||||
|
|
||||||
//struct KeyWordInfo
|
/*utf8*/
|
||||||
//{
|
const char * BLACK_LIST[] = {"我们", "他们"};
|
||||||
// string word;
|
|
||||||
// double tfidf;
|
|
||||||
//};
|
|
||||||
|
|
||||||
//inline ostream& operator << (ostream& os, const KeyWordInfo & keyword)
|
class KeywordExtractor: public InitOnOff
|
||||||
//{
|
|
||||||
// return os << keyword.word << "," << keyword.idf;
|
|
||||||
//}
|
|
||||||
|
|
||||||
class KeywordExtractor
|
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
MPSegment _segment;
|
MixSegment _segment;
|
||||||
private:
|
private:
|
||||||
unordered_map<string, double> _idfMap;
|
unordered_map<string, double> _idfMap;
|
||||||
protected:
|
double _idfAverage;
|
||||||
bool _isInited;
|
|
||||||
bool _getInitFlag()const{return _isInited;};
|
unordered_set<string> _blackSet;
|
||||||
bool _setInitFlag(bool flag){return _isInited = flag;};
|
|
||||||
public:
|
|
||||||
operator bool(){return _getInitFlag();};
|
|
||||||
public:
|
public:
|
||||||
KeywordExtractor(){_setInitFlag(false);};
|
KeywordExtractor(){_setInitFlag(false);};
|
||||||
explicit KeywordExtractor(const string& dictPath, const string& idfPath){_setInitFlag(init(dictPath, idfPath));};
|
explicit KeywordExtractor(const string& dictPath, const string& hmmFilePath, const string& idfPath)
|
||||||
|
{
|
||||||
|
_setInitFlag(init(dictPath, hmmFilePath, idfPath));
|
||||||
|
};
|
||||||
~KeywordExtractor(){};
|
~KeywordExtractor(){};
|
||||||
public:
|
public:
|
||||||
bool init(const string& dictPath, const string& idfPath)
|
bool init(const string& dictPath, const string& hmmFilePath, const string& idfPath)
|
||||||
{
|
{
|
||||||
ifstream ifs(idfPath.c_str());
|
ifstream ifs(idfPath.c_str());
|
||||||
if(!ifs)
|
if(!ifs)
|
||||||
@ -47,7 +40,10 @@ namespace CppJieba
|
|||||||
}
|
}
|
||||||
string line ;
|
string line ;
|
||||||
vector<string> buf;
|
vector<string> buf;
|
||||||
for(uint lineno = 0; getline(ifs, line); lineno++)
|
double idf = 0.0;
|
||||||
|
double idfSum = 0.0;
|
||||||
|
size_t lineno = 0;
|
||||||
|
for(;getline(ifs, line); lineno++)
|
||||||
{
|
{
|
||||||
buf.clear();
|
buf.clear();
|
||||||
if(line.empty())
|
if(line.empty())
|
||||||
@ -60,9 +56,22 @@ namespace CppJieba
|
|||||||
LogError("line %d [%s] illegal. skipped.", lineno, line.c_str());
|
LogError("line %d [%s] illegal. skipped.", lineno, line.c_str());
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
_idfMap[buf[0]] = atof(buf[1].c_str());
|
idf = atof(buf[1].c_str());
|
||||||
}
|
_idfMap[buf[0]] = idf;
|
||||||
return _setInitFlag(_segment.init(dictPath));
|
idfSum += idf;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
std::copy(
|
||||||
|
BLACK_LIST, BLACK_LIST + sizeof(BLACK_LIST)/sizeof(BLACK_LIST[0]),
|
||||||
|
std::inserter(_blackSet, _blackSet.begin()));
|
||||||
|
|
||||||
|
assert(lineno);
|
||||||
|
_idfAverage = idfSum / lineno;
|
||||||
|
|
||||||
|
assert(_idfAverage > 0.0);
|
||||||
|
|
||||||
|
return _setInitFlag(_segment.init(dictPath, hmmFilePath));
|
||||||
};
|
};
|
||||||
public:
|
public:
|
||||||
|
|
||||||
@ -90,30 +99,58 @@ namespace CppJieba
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// filtering single word.
|
||||||
|
for(vector<string>::iterator iter = words.begin(); iter != words.end(); )
|
||||||
|
{
|
||||||
|
if(_isSingleWord(*iter))
|
||||||
|
{
|
||||||
|
iter = words.erase(iter);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
iter++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
unordered_map<string, double> wordmap;
|
unordered_map<string, double> wordmap;
|
||||||
for(uint i = 0; i < words.size(); i ++)
|
for(uint i = 0; i < words.size(); i ++)
|
||||||
{
|
{
|
||||||
wordmap[ words[i] ] += 1.0;
|
wordmap[ words[i] ] += 1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
for(unordered_map<string, double>::iterator itr = wordmap.begin(); itr != wordmap.end();)
|
for(unordered_map<string, double>::iterator itr = wordmap.begin(); itr != wordmap.end(); )
|
||||||
{
|
{
|
||||||
|
if(_blackSet.end() != _blackSet.find(itr->first))
|
||||||
|
{
|
||||||
|
itr = wordmap.erase(itr);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
unordered_map<string, double>::const_iterator cit = _idfMap.find(itr->first);
|
unordered_map<string, double>::const_iterator cit = _idfMap.find(itr->first);
|
||||||
if(cit != _idfMap.end())
|
if(cit != _idfMap.end())
|
||||||
{
|
{
|
||||||
itr->second *= cit->second;
|
itr->second *= cit->second;
|
||||||
itr ++;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
itr = wordmap.erase(itr);
|
itr->second *= _idfAverage;
|
||||||
}
|
}
|
||||||
|
itr ++;
|
||||||
}
|
}
|
||||||
|
|
||||||
keywords.resize(MIN(topN, wordmap.size()));
|
keywords.resize(MIN(topN, wordmap.size()));
|
||||||
partial_sort_copy(wordmap.begin(), wordmap.end(), keywords.begin(), keywords.end(), _cmp);
|
partial_sort_copy(wordmap.begin(), wordmap.end(), keywords.begin(), keywords.end(), _cmp);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
private:
|
||||||
|
bool _isSingleWord(const string& str) const
|
||||||
|
{
|
||||||
|
Unicode unicode;
|
||||||
|
TransCode::decode(str, unicode);
|
||||||
|
if(unicode.size() == 1)
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static bool _cmp(const pair<string, uint>& lhs, const pair<string, uint>& rhs)
|
static bool _cmp(const pair<string, uint>& lhs, const pair<string, uint>& rhs)
|
||||||
|
@ -1 +1,3 @@
|
|||||||
INSTALL(FILES ArgvContext.hpp io_functs.hpp macro_def.hpp MysqlClient.hpp str_functs.hpp cast_functs.hpp Config.hpp logger.hpp map_functs.hpp std_outbound.hpp DESTINATION include/CppJieba/Limonp)
|
INSTALL(FILES ArgvContext.hpp io_functs.hpp macro_def.hpp MysqlClient.hpp
|
||||||
|
str_functs.hpp cast_functs.hpp Config.hpp logger.hpp map_functs.hpp
|
||||||
|
std_outbound.hpp InitOnOff.hpp DESTINATION include/CppJieba/Limonp)
|
||||||
|
21
src/Limonp/InitOnOff.hpp
Normal file
21
src/Limonp/InitOnOff.hpp
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
#ifndef LIMONP_INITONOFF_H
|
||||||
|
#define LIMONP_INITONOFF_H
|
||||||
|
|
||||||
|
namespace Limonp
|
||||||
|
{
|
||||||
|
class InitOnOff
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
InitOnOff(){_setInitFlag(false);};
|
||||||
|
~InitOnOff(){};
|
||||||
|
protected:
|
||||||
|
bool _isInited;
|
||||||
|
bool _getInitFlag()const{return _isInited;};
|
||||||
|
bool _setInitFlag(bool flag){return _isInited = flag;};
|
||||||
|
public:
|
||||||
|
operator bool(){return _getInitFlag();};
|
||||||
|
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -100,7 +100,7 @@ namespace Limonp
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
inline bool split(const string& src, vector<string>& res, const string& pattern)
|
inline bool split(const string& src, vector<string>& res, const string& pattern, size_t offset = 0, size_t len = string::npos)
|
||||||
{
|
{
|
||||||
if(src.empty())
|
if(src.empty())
|
||||||
{
|
{
|
||||||
@ -110,20 +110,28 @@ namespace Limonp
|
|||||||
|
|
||||||
size_t start = 0;
|
size_t start = 0;
|
||||||
size_t end = 0;
|
size_t end = 0;
|
||||||
while(start < src.size())
|
size_t cnt = 0;
|
||||||
|
while(start < src.size() && res.size() < len)
|
||||||
{
|
{
|
||||||
end = src.find_first_of(pattern, start);
|
end = src.find_first_of(pattern, start);
|
||||||
if(string::npos == end)
|
if(string::npos == end)
|
||||||
{
|
{
|
||||||
res.push_back(src.substr(start));
|
if(cnt >= offset)
|
||||||
|
{
|
||||||
|
res.push_back(src.substr(start));
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
res.push_back(src.substr(start, end - start));
|
//if(end == src.size() - 1)
|
||||||
if(end == src.size() - 1)
|
//{
|
||||||
|
// res.push_back("");
|
||||||
|
// return true;
|
||||||
|
//}
|
||||||
|
if(cnt >= offset)
|
||||||
{
|
{
|
||||||
res.push_back("");
|
res.push_back(src.substr(start, end - start));
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
cnt ++;
|
||||||
start = end + 1;
|
start = end + 1;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
@ -158,12 +166,8 @@ namespace Limonp
|
|||||||
return ltrim(rtrim(s));
|
return ltrim(rtrim(s));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
inline bool startsWith(const string& str, const string& prefix)
|
inline bool startsWith(const string& str, const string& prefix)
|
||||||
{
|
{
|
||||||
//return str.substr(0, prefix.size()) == prefix;
|
|
||||||
if(prefix.length() > str.length())
|
if(prefix.length() > str.length())
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
#include "TransCode.hpp"
|
#include "TransCode.hpp"
|
||||||
#include "Limonp/logger.hpp"
|
#include "Limonp/logger.hpp"
|
||||||
|
#include "Limonp/InitOnOff.hpp"
|
||||||
#include "ISegment.hpp"
|
#include "ISegment.hpp"
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
|
|
||||||
@ -10,17 +11,11 @@
|
|||||||
namespace CppJieba
|
namespace CppJieba
|
||||||
{
|
{
|
||||||
using namespace Limonp;
|
using namespace Limonp;
|
||||||
class SegmentBase: public ISegment
|
class SegmentBase: public ISegment, public InitOnOff
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
SegmentBase(){_setInitFlag(false);};
|
SegmentBase(){};
|
||||||
virtual ~SegmentBase(){};
|
virtual ~SegmentBase(){};
|
||||||
protected:
|
|
||||||
bool _isInited;
|
|
||||||
bool _getInitFlag()const{return _isInited;};
|
|
||||||
bool _setInitFlag(bool flag){return _isInited = flag;};
|
|
||||||
public:
|
|
||||||
operator bool(){return _getInitFlag();};
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const = 0;
|
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const = 0;
|
||||||
|
@ -3,25 +3,25 @@
|
|||||||
|
|
||||||
using namespace CppJieba;
|
using namespace CppJieba;
|
||||||
|
|
||||||
|
const char* KEYWORD_EXT_TEST_SENTENCE = "我来自北京邮电大学。 学号123456";
|
||||||
|
|
||||||
TEST(KeywordExtractorTest, Test1)
|
TEST(KeywordExtractorTest, Test1)
|
||||||
{
|
{
|
||||||
KeywordExtractor extractor("../dict/jieba.dict.utf8", "../dict/idf.utf8");
|
KeywordExtractor extractor("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8");
|
||||||
const char* str = "我来自北京邮电大学。。。 学号 123456";
|
const char* res[] = {"学号", "北京邮电大学"};
|
||||||
const char* res[] = {"北京邮电大学", "来自"};
|
|
||||||
vector<string> words;
|
vector<string> words;
|
||||||
ASSERT_TRUE(extractor);
|
ASSERT_TRUE(extractor);
|
||||||
ASSERT_TRUE(extractor.extract(str, words, 2));
|
ASSERT_TRUE(extractor.extract(KEYWORD_EXT_TEST_SENTENCE, words, 2));
|
||||||
ASSERT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
|
ASSERT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(KeywordExtractorTest, Test2)
|
TEST(KeywordExtractorTest, Test2)
|
||||||
{
|
{
|
||||||
KeywordExtractor extractor("../dict/jieba.dict.utf8", "../dict/idf.utf8");
|
KeywordExtractor extractor("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8");
|
||||||
const char* str = "我来自北京邮电大学。。。 学号 123456";
|
const char* res[] = {"学号", "北京邮电大学", "123456", "来自"};
|
||||||
const char* res[] = {"北京邮电大学", "来自"};
|
|
||||||
vector<string> words;
|
vector<string> words;
|
||||||
ASSERT_TRUE(extractor);
|
ASSERT_TRUE(extractor);
|
||||||
ASSERT_TRUE(extractor.extract(str, words, 9));
|
ASSERT_TRUE(extractor.extract(KEYWORD_EXT_TEST_SENTENCE, words, 9));
|
||||||
ASSERT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
|
ASSERT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -31,26 +31,35 @@ TEST(KeywordExtractorTest, Test3)
|
|||||||
ifstream ifs("../test/testdata/weicheng.utf8");
|
ifstream ifs("../test/testdata/weicheng.utf8");
|
||||||
ASSERT_TRUE(!!ifs);
|
ASSERT_TRUE(!!ifs);
|
||||||
string str((istreambuf_iterator<char>(ifs)), (istreambuf_iterator<char>()));
|
string str((istreambuf_iterator<char>(ifs)), (istreambuf_iterator<char>()));
|
||||||
KeywordExtractor extractor("../dict/jieba.dict.utf8", "../dict/idf.utf8");
|
KeywordExtractor extractor("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8");
|
||||||
const char* res[] = {"小姐", "孙小姐", "方鸿渐", "自己", "没有"};
|
const char* res[] = {"柔嘉", "小姐", "孙小姐", "方鸿渐", "鸿渐"};
|
||||||
|
const char* res2 = "[\"柔嘉:5611.34\", \"小姐:4268.75\", \"孙小姐:3789.41\", \"方鸿渐:3030.35\", \"鸿渐:2552.93\"]";
|
||||||
vector<string> keywords;
|
vector<string> keywords;
|
||||||
|
string resStr;
|
||||||
|
vector<pair<string,double> > keywords2;
|
||||||
extractor.extract(str, keywords, 5);
|
extractor.extract(str, keywords, 5);
|
||||||
|
extractor.extract(str, keywords2, 5);
|
||||||
ASSERT_EQ(keywords, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
|
ASSERT_EQ(keywords, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
|
||||||
|
resStr << keywords2;
|
||||||
|
ASSERT_EQ(res2, resStr);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(KeywordExtractorTest, Test4)
|
//TEST(KeywordExtractorTest, Test4)
|
||||||
{
|
//{
|
||||||
ifstream ifs("../test/testdata/weicheng.utf8");
|
// ifstream ifs("../test/testdata/weicheng.utf8");
|
||||||
ASSERT_TRUE(!!ifs);
|
// ASSERT_TRUE(!!ifs);
|
||||||
string str((istreambuf_iterator<char>(ifs)), (istreambuf_iterator<char>()));
|
// string str((istreambuf_iterator<char>(ifs)), (istreambuf_iterator<char>()));
|
||||||
KeywordExtractor extractor("../dict/jieba.dict.utf8", "../dict/idf.utf8");
|
// KeywordExtractor extractor("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8");
|
||||||
//const char* res[] = {"小姐", "孙小姐", "方鸿渐", "自己", "没有"};
|
// //const char* res[] = {"小姐", "孙小姐", "方鸿渐", "自己", "没有"};
|
||||||
vector<pair<string,double> > keywords;
|
// vector<pair<string,double> > keywords;
|
||||||
extractor.extract(str, keywords, 5);
|
// extractor.extract(str, keywords, 5);
|
||||||
//print(keywords);
|
// //print(keywords);
|
||||||
string res;
|
// string res;
|
||||||
res << keywords;
|
// res << keywords;
|
||||||
ASSERT_EQ(res, "[\"小姐:4268.75\", \"孙小姐:3789.41\", \"方鸿渐:3030.35\", \"自己:2300.54\", \"没有:2104.27\"]");
|
// print(keywords);
|
||||||
|
// print(__LINE__);
|
||||||
}
|
// exit(1);
|
||||||
|
// ASSERT_EQ(res, "[\"小姐:4268.75\", \"孙小姐:3789.41\", \"方鸿渐:3030.35\", \"自己:2300.54\", \"没有:2104.27\"]");
|
||||||
|
//
|
||||||
|
//}
|
||||||
|
@ -19,7 +19,7 @@ TEST(Md5Test, Test1)
|
|||||||
{
|
{
|
||||||
ASSERT_EQ(sizeof(DICT_FILE)/sizeof(DICT_FILE[0]), sizeof(DICT_FILE_MD5)/sizeof(DICT_FILE_MD5[0]));
|
ASSERT_EQ(sizeof(DICT_FILE)/sizeof(DICT_FILE[0]), sizeof(DICT_FILE_MD5)/sizeof(DICT_FILE_MD5[0]));
|
||||||
string tmp;
|
string tmp;
|
||||||
for (int i = 0; i < sizeof(DICT_FILE)/sizeof(DICT_FILE[0]); i++)
|
for (uint i = 0; i < sizeof(DICT_FILE)/sizeof(DICT_FILE[0]); i++)
|
||||||
{
|
{
|
||||||
md5File(DICT_FILE[i], tmp);
|
md5File(DICT_FILE[i], tmp);
|
||||||
ASSERT_EQ(tmp, string(DICT_FILE_MD5[i]));
|
ASSERT_EQ(tmp, string(DICT_FILE_MD5[i]));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user