rm TrieManager.hpp

This commit is contained in:
wyy 2014-03-15 22:48:29 +08:00
parent ddaa5589f1
commit a4b0a6c762
14 changed files with 109841 additions and 32 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,22 @@
/************************************
************************************/
#ifndef LIMONP_NONCOPYABLE_H
#define LIMONP_NONCOPYABLE_H
#include <iostream>
#include <string>
namespace Limonp
{
class NonCopyable
{
protected:
NonCopyable(){};
~NonCopyable(){};
private:
NonCopyable(const NonCopyable& );
const NonCopyable& operator=(const NonCopyable& );
};
}
#endif

View File

@ -12,16 +12,14 @@
#include <stdio.h>
#include <stdarg.h>
#include <cassert>
#include "io_functs.hpp"
#include "str_functs.hpp"
#define FILE_BASENAME strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__
#define LogDebug(fmt, ...) Logger::LoggingF(LL_DEBUG, FILE_BASENAME, __LINE__, fmt, ## __VA_ARGS__)
#define LogInfo(fmt, ...) Logger::LoggingF(LL_INFO, FILE_BASENAME, __LINE__, fmt, ## __VA_ARGS__)
#define LogWarn(fmt, ...) Logger::LoggingF(LL_WARN, FILE_BASENAME, __LINE__, fmt, ## __VA_ARGS__)
#define LogError(fmt, ...) Logger::LoggingF(LL_ERROR, FILE_BASENAME, __LINE__, fmt, ## __VA_ARGS__)
#define LogFatal(fmt, ...) Logger::LoggingF(LL_FATAL, FILE_BASENAME, __LINE__, fmt, ## __VA_ARGS__)
#define LogDebug(fmt, ...) Limonp::Logger::LoggingF(Limonp::LL_DEBUG, FILE_BASENAME, __LINE__, fmt, ## __VA_ARGS__)
#define LogInfo(fmt, ...) Limonp::Logger::LoggingF(Limonp::LL_INFO, FILE_BASENAME, __LINE__, fmt, ## __VA_ARGS__)
#define LogWarn(fmt, ...) Limonp::Logger::LoggingF(Limonp::LL_WARN, FILE_BASENAME, __LINE__, fmt, ## __VA_ARGS__)
#define LogError(fmt, ...) Limonp::Logger::LoggingF(Limonp::LL_ERROR, FILE_BASENAME, __LINE__, fmt, ## __VA_ARGS__)
#define LogFatal(fmt, ...) Limonp::Logger::LoggingF(Limonp::LL_FATAL, FILE_BASENAME, __LINE__, fmt, ## __VA_ARGS__)
@ -36,7 +34,7 @@ namespace Limonp
class Logger
{
public:
static bool Logging(uint level, const string& msg, const char* fileName, int lineNo)
static bool Logging(size_t level, const string& msg, const char* fileName, int lineNo)
{
assert(level <= LL_FATAL);
char buf[CSTR_BUFFER_SIZE];
@ -50,7 +48,7 @@ namespace Limonp
fprintf(stderr, LOG_FORMAT, buf, fileName, lineNo,LOG_LEVEL_ARRAY[level], msg.c_str());
return true;
}
static bool LoggingF(uint level, const char* fileName, int lineNo, const string& fmt, ...)
static bool LoggingF(size_t level, const char* fileName, int lineNo, const string& fmt, ...)
{
#ifdef LOGGER_LEVEL
if(level < LOGGER_LEVEL) return true;

View File

@ -9,6 +9,7 @@
#include <map>
#include <set>
#include <vector>
#include <iostream>
#include <sstream>

View File

@ -4,9 +4,12 @@
#include "map_functs.hpp"
#include <map>
#include <set>
#include <vector>
#include <fstream>
namespace std
{
using namespace std;
template<typename T>
ostream& operator << (ostream& os, const vector<T>& vec)
{
@ -96,6 +99,18 @@ namespace std
os<<'}';
return os;
}
template<class KeyType, class ContainType>
bool isIn(const ContainType& contain, const KeyType& key)
{
return contain.end() != contain.find(key);
}
template<class T>
basic_string<T> & operator << (basic_string<T> & s, ifstream & ifs)
{
return s.assign((istreambuf_iterator<T>(ifs)), istreambuf_iterator<T>());
}
}
#endif

View File

@ -24,7 +24,7 @@
#include "std_outbound.hpp"
#include "map_functs.hpp"
#define print(x) cout<<(x)<<endl
#define print(x) cout<< #x": " << x <<endl
namespace Limonp
{
@ -166,6 +166,23 @@ namespace Limonp
return ltrim(rtrim(s));
}
inline std::string & ltrim(std::string & s, char x)
{
s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::bind2nd(std::equal_to<char>(), x))));
return s;
}
inline std::string & rtrim(std::string & s, char x)
{
s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::bind2nd(std::equal_to<char>(), x))).base(), s.end());
return s;
}
inline std::string &trim(std::string &s, char x)
{
return ltrim(rtrim(s, x), x);
}
inline bool startsWith(const string& str, const string& prefix)
{
if(prefix.length() > str.length())

View File

@ -4,6 +4,7 @@
#include <ctype.h>
#include <string.h>
#include "Limonp/Config.hpp"
#include "Limonp/io_functs.hpp"
#include "Husky/HuskyServer.hpp"
#include "MPSegment.hpp"
#include "HMMSegment.hpp"

View File

@ -17,7 +17,7 @@
标&#12288;&#12288;签:保湿还不错心&#12288;&#12288;得:挺好的。。。。。。。。。。
["标", "&#12288;&#12288;", "签", "", "保湿", "还", "不错", "心", "&#12288;&#12288;", "得", "", "挺", "好", "的", "。", "。", "。", "。", "。", "。", "。", "。", "。", "。"]
标&#12288;&#12288;签:是正品很好用心&#12288;&#12288;得:一直在京东买,可以信赖
["标", "&#12288;&#12288;", "签", "", "是", "正品", "很", "好", "用心", "&#12288;&#12288;", "得", "", "一直", "在", "京东", "买", "", "可以信赖"]
["标", "&#12288;&#12288;", "签", "", "是", "正品", "很", "好", "用心", "&#12288;&#12288;", "得", "", "一直", "在", "京东", "买", "", "可以", "信赖"]
标&#12288;&#12288;签:是正品挺保湿的效果不错心&#12288;&#12288;得:送货快!是正品,大品牌的用的放心!
["标", "&#12288;&#12288;", "签", "", "是", "正品", "挺", "保湿", "的", "效果", "不错", "心", "&#12288;&#12288;", "得", "", "送货", "快", "", "是", "正品", "", "大", "品牌", "的", "用", "的", "放心", ""]
标&#12288;&#12288;签:乳液很好用心&#12288;&#12288;得:很好的东东,下次还会买
@ -27,7 +27,7 @@
标&#12288;&#12288;签:价格实惠心&#12288;&#12288;得:一直用,还可以吧,性价比高
["标", "&#12288;&#12288;", "签", "", "价格", "实惠", "心", "&#12288;&#12288;", "得", "", "一直", "用", "", "还", "可以", "吧", "", "性价比", "高"]
心&#12288;&#12288;得:不错够速度,效果也不错,希望大家用着也一样,顶顶顶
["心", "&#12288;&#12288;", "得", "", "不错", "够", "速度", "", "效果", "也", "不错", "", "希望", "大家", "用", "着", "也", "一样", "", "顶顶顶"]
["心", "&#12288;&#12288;", "得", "", "不错", "够", "速度", "", "效果", "也", "不错", "", "希望", "大家", "用", "着", "也", "一样", "", "顶", "", "顶"]
标&#12288;&#12288;签:挺保湿的心&#12288;&#12288;得:用着还不错。挺好的。
["标", "&#12288;&#12288;", "签", "", "挺", "保湿", "的", "心", "&#12288;&#12288;", "得", "", "用", "着", "还", "不错", "。", "挺", "好", "的", "。"]
优&#12288;&#12288;点:东西很好哦!不&#12288;&#12288;足:暂时还没有发现缺点哦!心&#12288;&#12288;得:很好,也很划算
@ -39,7 +39,7 @@
标&#12288;&#12288;签:品牌好心&#12288;&#12288;得:东西还行,就是线太少了
["标", "&#12288;&#12288;", "签", "", "品牌", "好心", "&#12288;&#12288;", "得", "", "东西", "还", "行", "", "就是", "线", "太", "少", "了"]
标&#12288;&#12288;签:还可以老婆买的心&#12288;&#12288;得:代买的,据说还不错,搞优惠屯着。
["标", "&#12288;&#12288;", "签", "", "还", "可以", "老婆", "买", "的", "心", "&#12288;&#12288;", "得", "", "代买", "的", "", "据说", "还", "不错", "", "搞", "优惠", "屯", "着", "。"]
["标", "&#12288;&#12288;", "签", "", "还", "可以", "老婆", "买", "的", "心", "&#12288;&#12288;", "得", "", "代", "买", "的", "", "据说", "还", "不错", "", "搞", "优惠", "屯", "着", "。"]
标&#12288;&#12288;签:保湿还不错很好用心&#12288;&#12288;得:一直在用这个,现在继续。
["标", "&#12288;&#12288;", "签", "", "保湿", "还", "不错", "很", "好", "用心", "&#12288;&#12288;", "得", "", "一直", "在", "用", "这个", "", "现在", "继续", "。"]
标&#12288;&#12288;签:很好用心&#12288;&#12288;得:正品,方便好用,比店里便宜
@ -109,7 +109,7 @@
标&#12288;&#12288;签:很好用心&#12288;&#12288;得:还可以,常规的东东。.
["标", "&#12288;&#12288;", "签", "", "很", "好", "用心", "&#12288;&#12288;", "得", "", "还", "可以", "", "常规", "的", "东东", "。", "."]
标&#12288;&#12288;签:包装好乳液很好用补水效果好物流速度快价格实惠心&#12288;&#12288;得:挺好的,脸上不紧绷,舒服
["标", "&#12288;&#12288;", "签", "", "包装", "好", "乳液", "很", "好", "用", "补水", "效果", "好", "物流", "速度", "快", "价格", "实惠", "心", "&#12288;&#12288;", "得", "", "挺", "好", "的", "", "脸上", "不", "紧绷", "", "舒服"]
["标", "&#12288;&#12288;", "签", "", "包装", "好", "乳液", "很", "好", "用", "补水", "效果", "好", "物流", "速度", "快", "价格", "实惠", "心", "&#12288;&#12288;", "得", "", "挺", "好", "的", "", "脸上", "不", "紧", "绷", "", "舒服"]
标&#12288;&#12288;签:物流速度快价格实惠心&#12288;&#12288;得:应该是正品吧,价格比超市便宜些。正在使用中
["标", "&#12288;&#12288;", "签", "", "物流", "速度", "快", "价格", "实惠", "心", "&#12288;&#12288;", "得", "", "应该", "是", "正品", "吧", "", "价格比", "超市", "便宜", "些", "。", "正在", "使用", "中"]
标&#12288;&#12288;签:还可以心&#12288;&#12288;得:挺滋润的,价钱也合适!
@ -131,7 +131,7 @@
标&#12288;&#12288;签:挺保湿的物流速度快比商场便宜品牌好心&#12288;&#12288;得:正品,平价,比商场便宜,物流很快。
["标", "&#12288;&#12288;", "签", "", "挺", "保湿", "的", "物流", "速度", "快", "比", "商场", "便宜", "品牌", "好心", "&#12288;&#12288;", "得", "", "正品", "", "平价", "", "比", "商场", "便宜", "", "物流", "很快", "。"]
标&#12288;&#12288;签:服务好心&#12288;&#12288;得还没有使用过就发现YMX只要79元我哭为什么京东价格拼不过YMX呀~~~
["标", "&#12288;&#12288;", "签", "", "服务", "好心", "&#12288;&#12288;", "得", "", "还", "没有", "使用", "过", "", "就", "发现", "YMX", "只要", "79", "元", "", "我", "哭", "", "为什么", "京东", "价格", "拼不过", "YMX", "呀", "~~~"]
["标", "&#12288;&#12288;", "签", "", "服务", "好心", "&#12288;&#12288;", "得", "", "还", "没有", "使用", "过", "", "就", "发现", "YMX", "只要", "79", "元", "", "我", "哭", "", "为什么", "京东", "价格", "拼", "不过", "YMX", "呀", "~~~"]
标&#12288;&#12288;签:挺保湿的心&#12288;&#12288;得:第一次购买,用了感觉还不错
["标", "&#12288;&#12288;", "签", "", "挺", "保湿", "的", "心", "&#12288;&#12288;", "得", "", "第一次", "购买", ",", "用", "了", "感觉", "还", "不错"]
标&#12288;&#12288;签:服务好物流速度快脸上很舒服心&#12288;&#12288;得:刚送到家。。用用在发表好坏。
@ -141,7 +141,7 @@
标&#12288;&#12288;签:品牌好价格实惠脸上很舒服味道不错心&#12288;&#12288;得:防晒,不油腻,还可以使皮肤稍稍增白些,
["标", "&#12288;&#12288;", "签", "", "品牌", "好", "价格", "实惠", "脸上", "很", "舒服", "味道", "不错", "心", "&#12288;&#12288;", "得", "", "防晒", "", "不", "油腻", "", "还", "可以", "使", "皮肤", "稍稍", "增白", "些", ""]
标&#12288;&#12288;签:价格实惠保湿还不错心&#12288;&#12288;得:东西好用,分不清楚是不是正品。
["标", "&#12288;&#12288;", "签", "", "价格", "实惠", "保湿", "还", "不错", "心", "&#12288;&#12288;", "得", "", "东西", "好", "用", "", "分不清楚", "是不是", "正品", "。"]
["标", "&#12288;&#12288;", "签", "", "价格", "实惠", "保湿", "还", "不错", "心", "&#12288;&#12288;", "得", "", "东西", "好", "用", "", "分", "", "清楚", "是不是", "正品", "。"]
标&#12288;&#12288;签:服务好乳液很好用心&#12288;&#12288;得:乳液还是不错的用用不错的
["标", "&#12288;&#12288;", "签", "", "服务", "好", "乳液", "很", "好", "用心", "&#12288;&#12288;", "得", "", "乳液", "还是", "不错", "的", "用", "用", "不错", "的"]
标&#12288;&#12288;签:物流速度快效果不错心&#12288;&#12288;得:常用这个,夏天用,美白效果还好
@ -159,7 +159,7 @@
三八妇女节买的Z的产品随便用用可以的。女人要对自己好一点。
["三八妇女节", "买", "的", "", "Z", "的", "产品", "随便", "用", "用", "可以", "的", "。", "女人", "要", "对", "自己", "好", "一点", "。"]
标&#12288;&#12288;签:是正品挺保湿的心&#12288;&#12288;得好东东ZA我的最爱。
["标", "&#12288;&#12288;", "签", "", "是", "正品", "挺", "保湿", "的", "心", "&#12288;&#12288;", "得", "", "好东东", "", "ZA", "我", "的", "最", "爱", "。"]
["标", "&#12288;&#12288;", "签", "", "是", "正品", "挺", "保湿", "的", "心", "&#12288;&#12288;", "得", "", "好", "东东", "", "ZA", "我", "的", "最", "爱", "。"]
优&#12288;&#12288;点:没有让这次的尝试失望不&#12288;&#12288;足:货运慢,慢,慢心&#12288;&#12288;得:很舒适,用的不错
["优", "&#12288;&#12288;", "点", "", "没有", "让", "这次", "的", "尝试", "失望", "不", "&#12288;&#12288;", "足", "", "货运", "慢", "", "慢", "", "慢", "心", "&#12288;&#12288;", "得", "", "很", "舒适", "", "用", "的", "不错"]
标&#12288;&#12288;签:挺保湿的心&#12288;&#12288;得:一直用还可以~~~~~~~~~~~~~~~~
@ -173,7 +173,7 @@
效果挺好的滋润保湿了味道清淡
["效果", "挺", "好", "的", "滋润", "保湿", "了", "味道", "清淡"]
瓶子盖子都有刮痕了是不是都用过了啊。以前也在卓越买过za的其他化妆品都还算满意。这一次真觉得很恶心以后不会在这买了
["瓶子", "盖子", "都", "有", "刮痕", "了", "", "是不是", "都", "用", "过", "了", "啊", "。", "以前", "也", "在", "卓越", "买", "过", "za", "的", "其他", "化妆品", "", "都", "还", "算", "满意", "。", "这", "一次", "真", "觉得", "很", "恶心", "", "以后", "不会", "在", "这", "买", "了"]
["瓶子", "盖子", "都", "有", "刮", "痕", "了", "", "是不是", "都", "用", "过", "了", "啊", "。", "以前", "也", "在", "卓越", "买", "过", "za", "的", "其他", "化妆品", "", "都", "还", "算", "满意", "。", "这", "一次", "真", "觉得", "很", "恶心", "", "以后", "不会", "在", "这", "买", "了"]
好用不知道是不是正品啊
["好", "用", "不", "知道", "是不是", "正品", "啊"]
很好用

View File

@ -5,13 +5,14 @@ using namespace CppJieba;
TEST(FullSegment, Test1)
{
FullSegment segment("../dict/jieba.dict.utf8");
FullSegment segment("../dict/extra_dict/jieba.dict.small.utf8");
const char* str = "我来自北京邮电大学。。。 学号 123456";
const char* res[] = {"", "来自", "北京", "北京邮电", "北京邮电大学", "邮电", "邮电大学", "电大", "大学", "", "", "", " ", "学号", " 123456"};
vector<string> words;
ASSERT_EQ(segment.cut(str, words), true);
EXPECT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
string s;
s << words;
ASSERT_EQ(s, "[\"\", \"来自\", \"北京\", \"北京邮电大学\", \"邮电\", \"电大\", \"大学\", \"\", \"\", \"\", \" \", \"\", \"\", \" 123456\"]");
}

View File

@ -7,7 +7,7 @@ using namespace CppJieba;
TEST(KeywordExtractorTest, Test1)
{
KeywordExtractor extractor("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8");
KeywordExtractor extractor("../dict/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8");
string s("我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上总经理,迎娶白富美,走上人生巅峰。");
string res;
vector<pair<string, double> > wordweights;

View File

@ -7,7 +7,7 @@ using namespace Limonp;
TEST(MPSegmentTest, Test1)
{
MPSegment segment("../dict/jieba.dict.utf8");;
MPSegment segment("../dict/extra_dict/jieba.dict.small.utf8");;
const char* str = "我来自北京邮电大学。。。 学号 123456";
const char* res[] = {"", "来自", "北京邮电大学", "","",""," ","","", " 123456"};
vector<string> words;
@ -19,7 +19,7 @@ TEST(MPSegmentTest, Test1)
TEST(MPSegmentTest, Test2)
{
MPSegment segment("../dict/jieba.dict.utf8");
MPSegment segment("../dict/extra_dict/jieba.dict.small.utf8");
string line;
ifstream ifs("../test/testdata/review.100");
vector<string> words;

View File

@ -5,7 +5,7 @@ using namespace CppJieba;
TEST(MixSegmentTest, Test1)
{
MixSegment segment("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8");;
MixSegment segment("../dict/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8");;
const char* str = "我来自北京邮电大学。。。 学号 123456";
const char* res[] = {"", "来自", "北京邮电大学", "","",""," ","学号", " 123456"};
vector<string> words;

View File

@ -5,13 +5,16 @@ using namespace CppJieba;
TEST(QuerySegment, Test1)
{
QuerySegment segment("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", 3);
QuerySegment segment("../dict/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", 3);
const char* str = "小明硕士毕业于中国科学院计算所,后在日本京都大学深造";
const char* res[] = {"小明", "硕士", "毕业", "", "中国", "中国科学院", "科学", "科学院", "学院", "计算所", "", "", "", "日本", "日本京都大学", "京都", "京都大学", "大学", "深造"};
vector<string> words;
ASSERT_EQ(segment.cut(str, words), true);
ASSERT_TRUE(segment.cut(str, words));
string s1, s2;
s1 << words;
s2 = "[\"小明\", \"硕士\", \"毕业\", \"\", \"中国\", \"中国科学院\", \"科学\", \"科学院\", \"学院\", \"计算所\", \"\", \"\", \"\", \"日本\", \"京都\", \"京都大学\", \"大学\", \"深造\"]";
ASSERT_EQ(s1, s2);
EXPECT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
}

View File

@ -3,13 +3,13 @@
using namespace CppJieba;
static const char* const DICT_FILE = "../dict/jieba.dict.utf8";
static const char* const DICT_FILE = "../dict/extra_dict/jieba.dict.small.utf8";
TEST(TrieTest, Test1)
{
Trie trie;
ASSERT_TRUE(trie.init(DICT_FILE));
ASSERT_LT(trie.getMinLogFreq() + 17.2184, 0.001);
ASSERT_LT(trie.getMinLogFreq() + 15.6479, 0.001);
string word("来到");
Unicode uni;
ASSERT_TRUE(TransCode::decode(word, uni));
@ -17,7 +17,8 @@ TEST(TrieTest, Test1)
nodeInfo.word = uni;
nodeInfo.freq = 8779;
nodeInfo.tag = "v";
nodeInfo.logFreq = -8.83144;
nodeInfo.logFreq = -8.87033;
EXPECT_EQ(nodeInfo, *trie.find(uni.begin(), uni.end()));
word = "清华大学";
vector<pair<uint, const TrieNodeInfo*> > res;