remove namespace unicode

This commit is contained in:
yanyiwu 2016-04-17 21:59:10 +08:00
parent 6ff6fe1430
commit dcced8561e
15 changed files with 87 additions and 93 deletions

View File

@ -48,12 +48,12 @@ class DictTrie {
return true; return true;
} }
const DictUnit* Find(unicode::RuneStrArray::const_iterator begin, unicode::RuneStrArray::const_iterator end) const { const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
return trie_->Find(begin, end); return trie_->Find(begin, end);
} }
void Find(unicode::RuneStrArray::const_iterator begin, void Find(RuneStrArray::const_iterator begin,
unicode::RuneStrArray::const_iterator end, RuneStrArray::const_iterator end,
vector<struct Dag>&res, vector<struct Dag>&res,
size_t max_word_len = MAX_WORD_LENGTH) const { size_t max_word_len = MAX_WORD_LENGTH) const {
trie_->Find(begin, end, res, max_word_len); trie_->Find(begin, end, res, max_word_len);
@ -124,7 +124,7 @@ class DictTrie {
const string& word, const string& word,
double weight, double weight,
const string& tag) { const string& tag) {
if (!unicode::DecodeRunesInString(word, node_info.word)) { if (!DecodeRunesInString(word, node_info.word)) {
XLOG(ERROR) << "Decode " << word << " failed."; XLOG(ERROR) << "Decode " << word << " failed.";
return false; return false;
} }

View File

@ -29,7 +29,7 @@ class FullSegment: public SegmentBase {
vector<string>& words) const { vector<string>& words) const {
PreFilter pre_filter(symbols_, sentence); PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range; PreFilter::Range range;
vector<unicode::WordRange> wrs; vector<WordRange> wrs;
wrs.reserve(sentence.size()/2); wrs.reserve(sentence.size()/2);
while (pre_filter.HasNext()) { while (pre_filter.HasNext()) {
range = pre_filter.Next(); range = pre_filter.Next();
@ -37,11 +37,11 @@ class FullSegment: public SegmentBase {
} }
words.clear(); words.clear();
words.reserve(wrs.size()); words.reserve(wrs.size());
unicode::GetStringsFromWordRanges(wrs, words); GetStringsFromWordRanges(wrs, words);
} }
void Cut(unicode::RuneStrArray::const_iterator begin, void Cut(RuneStrArray::const_iterator begin,
unicode::RuneStrArray::const_iterator end, RuneStrArray::const_iterator end,
vector<unicode::WordRange>& res) const { vector<WordRange>& res) const {
//resut of searching in trie tree //resut of searching in trie tree
LocalVector<pair<size_t, const DictUnit*> > tRes; LocalVector<pair<size_t, const DictUnit*> > tRes;
@ -63,13 +63,13 @@ class FullSegment: public SegmentBase {
const DictUnit* du = dags[i].nexts[j].second; const DictUnit* du = dags[i].nexts[j].second;
if (du == NULL) { if (du == NULL) {
if (dags[i].nexts.size() == 1 && maxIdx <= uIdx) { if (dags[i].nexts.size() == 1 && maxIdx <= uIdx) {
unicode::WordRange wr(begin + i, begin + nextoffset); WordRange wr(begin + i, begin + nextoffset);
res.push_back(wr); res.push_back(wr);
} }
} else { } else {
wordLen = du->word.size(); wordLen = du->word.size();
if (wordLen >= 2 || (dags[i].nexts.size() == 1 && maxIdx <= uIdx)) { if (wordLen >= 2 || (dags[i].nexts.size() == 1 && maxIdx <= uIdx)) {
unicode::WordRange wr(begin + i, begin + nextoffset); WordRange wr(begin + i, begin + nextoffset);
res.push_back(wr); res.push_back(wr);
} }
} }

View File

@ -105,7 +105,7 @@ struct HMMModel {
XLOG(ERROR) << "emitProb illegal."; XLOG(ERROR) << "emitProb illegal.";
return false; return false;
} }
if (!unicode::DecodeRunesInString(tmp2[0], unicode) || unicode.size() != 1) { if (!DecodeRunesInString(tmp2[0], unicode) || unicode.size() != 1) {
XLOG(ERROR) << "TransCode failed."; XLOG(ERROR) << "TransCode failed.";
return false; return false;
} }

View File

@ -27,7 +27,7 @@ class HMMSegment: public SegmentBase {
vector<string>& words) const { vector<string>& words) const {
PreFilter pre_filter(symbols_, sentence); PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range; PreFilter::Range range;
vector<unicode::WordRange> wrs; vector<WordRange> wrs;
wrs.reserve(sentence.size()/2); wrs.reserve(sentence.size()/2);
while (pre_filter.HasNext()) { while (pre_filter.HasNext()) {
range = pre_filter.Next(); range = pre_filter.Next();
@ -35,11 +35,11 @@ class HMMSegment: public SegmentBase {
} }
words.clear(); words.clear();
words.reserve(wrs.size()); words.reserve(wrs.size());
unicode::GetStringsFromWordRanges(wrs, words); GetStringsFromWordRanges(wrs, words);
} }
void Cut(unicode::RuneStrArray::const_iterator begin, unicode::RuneStrArray::const_iterator end, vector<unicode::WordRange>& res) const { void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
unicode::RuneStrArray::const_iterator left = begin; RuneStrArray::const_iterator left = begin;
unicode::RuneStrArray::const_iterator right = begin; RuneStrArray::const_iterator right = begin;
while (right != end) { while (right != end) {
if (right->rune < 0x80) { if (right->rune < 0x80) {
if (left != right) { if (left != right) {
@ -57,7 +57,7 @@ class HMMSegment: public SegmentBase {
} }
right ++; right ++;
} while (false); } while (false);
unicode::WordRange wr(left, right - 1); WordRange wr(left, right - 1);
res.push_back(wr); res.push_back(wr);
left = right; left = right;
} else { } else {
@ -70,7 +70,7 @@ class HMMSegment: public SegmentBase {
} }
private: private:
// sequential letters rule // sequential letters rule
unicode::RuneStrArray::const_iterator SequentialLetterRule(unicode::RuneStrArray::const_iterator begin, unicode::RuneStrArray::const_iterator end) const { RuneStrArray::const_iterator SequentialLetterRule(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
Rune x = begin->rune; Rune x = begin->rune;
if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z')) { if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z')) {
begin ++; begin ++;
@ -88,7 +88,7 @@ class HMMSegment: public SegmentBase {
return begin; return begin;
} }
// //
unicode::RuneStrArray::const_iterator NumbersRule(unicode::RuneStrArray::const_iterator begin, unicode::RuneStrArray::const_iterator end) const { RuneStrArray::const_iterator NumbersRule(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
Rune x = begin->rune; Rune x = begin->rune;
if ('0' <= x && x <= '9') { if ('0' <= x && x <= '9') {
begin ++; begin ++;
@ -105,24 +105,24 @@ class HMMSegment: public SegmentBase {
} }
return begin; return begin;
} }
void InternalCut(unicode::RuneStrArray::const_iterator begin, unicode::RuneStrArray::const_iterator end, vector<unicode::WordRange>& res) const { void InternalCut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
vector<size_t> status; vector<size_t> status;
Viterbi(begin, end, status); Viterbi(begin, end, status);
unicode::RuneStrArray::const_iterator left = begin; RuneStrArray::const_iterator left = begin;
unicode::RuneStrArray::const_iterator right; RuneStrArray::const_iterator right;
for (size_t i = 0; i < status.size(); i++) { for (size_t i = 0; i < status.size(); i++) {
if (status[i] % 2) { //if (HMMModel::E == status[i] || HMMModel::S == status[i]) if (status[i] % 2) { //if (HMMModel::E == status[i] || HMMModel::S == status[i])
right = begin + i + 1; right = begin + i + 1;
unicode::WordRange wr(left, right - 1); WordRange wr(left, right - 1);
res.push_back(wr); res.push_back(wr);
left = right; left = right;
} }
} }
} }
void Viterbi(unicode::RuneStrArray::const_iterator begin, void Viterbi(RuneStrArray::const_iterator begin,
unicode::RuneStrArray::const_iterator end, RuneStrArray::const_iterator end,
vector<size_t>& status) const { vector<size_t>& status) const {
size_t Y = HMMModel::STATUS_SUM; size_t Y = HMMModel::STATUS_SUM;
size_t X = end - begin; size_t X = end - begin;

View File

@ -69,7 +69,7 @@ class KeywordExtractor {
for (size_t i = 0; i < words.size(); ++i) { for (size_t i = 0; i < words.size(); ++i) {
size_t t = offset; size_t t = offset;
offset += words[i].size(); offset += words[i].size();
if (unicode::IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) { if (IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
continue; continue;
} }
wordmap[words[i]].offsets.push_back(t); wordmap[words[i]].offsets.push_back(t);

View File

@ -17,8 +17,8 @@ class LevelSegment: public SegmentBase{
~LevelSegment() { ~LevelSegment() {
} }
void Cut(unicode::RuneStrArray::const_iterator begin, void Cut(RuneStrArray::const_iterator begin,
unicode::RuneStrArray::const_iterator end, RuneStrArray::const_iterator end,
vector<pair<WordRange, size_t> >& res) const { vector<pair<WordRange, size_t> >& res) const {
res.clear(); res.clear();
vector<Unicode> words; vector<Unicode> words;
@ -50,7 +50,7 @@ class LevelSegment: public SegmentBase{
vector<pair<string, size_t> >& words) const { vector<pair<string, size_t> >& words) const {
words.clear(); words.clear();
RuneStrArray unicode; RuneStrArray unicode;
unicode::DecodeRunesInString(sentence, unicode); DecodeRunesInString(sentence, unicode);
vector<pair<WordRange, size_t> > unicodeWords; vector<pair<WordRange, size_t> > unicodeWords;
Cut(unicode.begin(), unicode.end(), unicodeWords); Cut(unicode.begin(), unicode.end(), unicodeWords);
words.resize(unicodeWords.size()); words.resize(unicodeWords.size());

View File

@ -30,7 +30,7 @@ class MPSegment: public SegmentBase {
size_t max_word_len = MAX_WORD_LENGTH) const { size_t max_word_len = MAX_WORD_LENGTH) const {
PreFilter pre_filter(symbols_, sentence); PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range; PreFilter::Range range;
vector<unicode::WordRange> wrs; vector<WordRange> wrs;
wrs.reserve(sentence.size()/2); wrs.reserve(sentence.size()/2);
while (pre_filter.HasNext()) { while (pre_filter.HasNext()) {
range = pre_filter.Next(); range = pre_filter.Next();
@ -38,11 +38,11 @@ class MPSegment: public SegmentBase {
} }
words.clear(); words.clear();
words.reserve(wrs.size()); words.reserve(wrs.size());
unicode::GetStringsFromWordRanges(wrs, words); GetStringsFromWordRanges(wrs, words);
} }
void Cut(unicode::RuneStrArray::const_iterator begin, void Cut(RuneStrArray::const_iterator begin,
unicode::RuneStrArray::const_iterator end, RuneStrArray::const_iterator end,
vector<unicode::WordRange>& words, vector<WordRange>& words,
size_t max_word_len = MAX_WORD_LENGTH) const { size_t max_word_len = MAX_WORD_LENGTH) const {
vector<Dag> dags; vector<Dag> dags;
dictTrie_->Find(begin, dictTrie_->Find(begin,
@ -90,20 +90,20 @@ class MPSegment: public SegmentBase {
} }
} }
} }
void CutByDag(unicode::RuneStrArray::const_iterator begin, void CutByDag(RuneStrArray::const_iterator begin,
unicode::RuneStrArray::const_iterator end, RuneStrArray::const_iterator end,
const vector<Dag>& dags, const vector<Dag>& dags,
vector<unicode::WordRange>& words) const { vector<WordRange>& words) const {
size_t i = 0; size_t i = 0;
while (i < dags.size()) { while (i < dags.size()) {
const DictUnit* p = dags[i].pInfo; const DictUnit* p = dags[i].pInfo;
if (p) { if (p) {
assert(p->word.size() >= 1); assert(p->word.size() >= 1);
unicode::WordRange wr(begin + i, begin + i + p->word.size() - 1); WordRange wr(begin + i, begin + i + p->word.size() - 1);
words.push_back(wr); words.push_back(wr);
i += p->word.size(); i += p->word.size();
} else { //single chinese word } else { //single chinese word
unicode::WordRange wr(begin + i, begin + i); WordRange wr(begin + i, begin + i);
words.push_back(wr); words.push_back(wr);
i++; i++;
} }

View File

@ -23,7 +23,7 @@ class MixSegment: public SegmentBase {
void Cut(const string& sentence, vector<string>& words, bool hmm = true) const { void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
PreFilter pre_filter(symbols_, sentence); PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range; PreFilter::Range range;
vector<unicode::WordRange> wrs; vector<WordRange> wrs;
wrs.reserve(sentence.size() / 2); wrs.reserve(sentence.size() / 2);
while (pre_filter.HasNext()) { while (pre_filter.HasNext()) {
range = pre_filter.Next(); range = pre_filter.Next();
@ -31,20 +31,20 @@ class MixSegment: public SegmentBase {
} }
words.clear(); words.clear();
words.reserve(wrs.size()); words.reserve(wrs.size());
unicode::GetStringsFromWordRanges(wrs, words); GetStringsFromWordRanges(wrs, words);
} }
void Cut(unicode::RuneStrArray::const_iterator begin, unicode::RuneStrArray::const_iterator end, vector<unicode::WordRange>& res, bool hmm) const { void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {
if (!hmm) { if (!hmm) {
mpSeg_.Cut(begin, end, res); mpSeg_.Cut(begin, end, res);
return; return;
} }
vector<unicode::WordRange> words; vector<WordRange> words;
assert(end >= begin); assert(end >= begin);
words.reserve(end - begin); words.reserve(end - begin);
mpSeg_.Cut(begin, end, words); mpSeg_.Cut(begin, end, words);
vector<unicode::WordRange> hmmRes; vector<WordRange> hmmRes;
hmmRes.reserve(end - begin); hmmRes.reserve(end - begin);
for (size_t i = 0; i < words.size(); i++) { for (size_t i = 0; i < words.size(); i++) {
//if mp Get a word, it's ok, put it into result //if mp Get a word, it's ok, put it into result

View File

@ -30,11 +30,11 @@ class PosTagger {
segment_.Cut(src, CutRes); segment_.Cut(src, CutRes);
const DictUnit *tmp = NULL; const DictUnit *tmp = NULL;
unicode::RuneStrArray runes; RuneStrArray runes;
const DictTrie * dict = segment_.GetDictTrie(); const DictTrie * dict = segment_.GetDictTrie();
assert(dict != NULL); assert(dict != NULL);
for (vector<string>::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) { for (vector<string>::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) {
if (!unicode::DecodeRunesInString(*itr, runes)) { if (!DecodeRunesInString(*itr, runes)) {
XLOG(ERROR) << "Decode failed."; XLOG(ERROR) << "Decode failed.";
return false; return false;
} }
@ -48,7 +48,7 @@ class PosTagger {
return !res.empty(); return !res.empty();
} }
private: private:
const char* SpecialRule(const unicode::RuneStrArray& unicode) const { const char* SpecialRule(const RuneStrArray& unicode) const {
size_t m = 0; size_t m = 0;
size_t eng = 0; size_t eng = 0;
for (size_t i = 0; i < unicode.size() && eng < unicode.size() / 2; i++) { for (size_t i = 0; i < unicode.size() && eng < unicode.size() / 2; i++) {

View File

@ -9,14 +9,14 @@ class PreFilter {
public: public:
//TODO use WordRange instead of Range //TODO use WordRange instead of Range
struct Range { struct Range {
unicode::RuneStrArray::const_iterator begin; RuneStrArray::const_iterator begin;
unicode::RuneStrArray::const_iterator end; RuneStrArray::const_iterator end;
}; // struct Range }; // struct Range
PreFilter(const unordered_set<Rune>& symbols, PreFilter(const unordered_set<Rune>& symbols,
const string& sentence) const string& sentence)
: symbols_(symbols) { : symbols_(symbols) {
unicode::DecodeRunesInString(sentence, sentence_); DecodeRunesInString(sentence, sentence_);
cursor_ = sentence_.begin(); cursor_ = sentence_.begin();
} }
~PreFilter() { ~PreFilter() {
@ -41,8 +41,8 @@ class PreFilter {
return range; return range;
} }
private: private:
unicode::RuneStrArray::const_iterator cursor_; RuneStrArray::const_iterator cursor_;
unicode::RuneStrArray sentence_; RuneStrArray sentence_;
const unordered_set<Rune>& symbols_; const unordered_set<Rune>& symbols_;
}; // class PreFilter }; // class PreFilter

View File

@ -29,7 +29,7 @@ class QuerySegment: public SegmentBase {
void Cut(const string& sentence, vector<string>& words, bool hmm = true) const { void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
PreFilter pre_filter(symbols_, sentence); PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range; PreFilter::Range range;
vector<unicode::WordRange> wrs; vector<WordRange> wrs;
wrs.reserve(sentence.size()/2); wrs.reserve(sentence.size()/2);
while (pre_filter.HasNext()) { while (pre_filter.HasNext()) {
range = pre_filter.Next(); range = pre_filter.Next();
@ -37,19 +37,19 @@ class QuerySegment: public SegmentBase {
} }
words.clear(); words.clear();
words.reserve(wrs.size()); words.reserve(wrs.size());
unicode::GetStringsFromWordRanges(wrs, words); GetStringsFromWordRanges(wrs, words);
} }
void Cut(unicode::RuneStrArray::const_iterator begin, unicode::RuneStrArray::const_iterator end, vector<unicode::WordRange>& res, bool hmm) const { void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {
//use mix Cut first //use mix Cut first
vector<unicode::WordRange> mixRes; vector<WordRange> mixRes;
mixSeg_.Cut(begin, end, mixRes, hmm); mixSeg_.Cut(begin, end, mixRes, hmm);
vector<unicode::WordRange> fullRes; vector<WordRange> fullRes;
for (vector<unicode::WordRange>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) { for (vector<WordRange>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) {
// if it's too long, Cut with fullSeg_, put fullRes in res // if it's too long, Cut with fullSeg_, put fullRes in res
if (mixResItr->Length() > maxWordLen_ && !mixResItr->IsAllAscii()) { if (mixResItr->Length() > maxWordLen_ && !mixResItr->IsAllAscii()) {
fullSeg_.Cut(mixResItr->left, mixResItr->right + 1, fullRes); fullSeg_.Cut(mixResItr->left, mixResItr->right + 1, fullRes);
for (vector<unicode::WordRange>::const_iterator fullResItr = fullRes.begin(); fullResItr != fullRes.end(); fullResItr++) { for (vector<WordRange>::const_iterator fullResItr = fullRes.begin(); fullResItr != fullRes.end(); fullResItr++) {
res.push_back(*fullResItr); res.push_back(*fullResItr);
} }

View File

@ -9,15 +9,11 @@
namespace cppjieba { namespace cppjieba {
using namespace std; using namespace std;
using unicode::Rune;
using unicode::RuneStr;
using unicode::Unicode;
using unicode::WordRange;
const size_t MAX_WORD_LENGTH = 512; const size_t MAX_WORD_LENGTH = 512;
struct DictUnit { struct DictUnit {
unicode::Unicode word; Unicode word;
double weight; double weight;
string tag; string tag;
}; // struct DictUnit }; // struct DictUnit
@ -62,14 +58,14 @@ class Trie {
DeleteNode(root_); DeleteNode(root_);
} }
const DictUnit* Find(unicode::RuneStrArray::const_iterator begin, unicode::RuneStrArray::const_iterator end) const { const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
if (begin == end) { if (begin == end) {
return NULL; return NULL;
} }
const TrieNode* ptNode = root_; const TrieNode* ptNode = root_;
TrieNode::NextMap::const_iterator citer; TrieNode::NextMap::const_iterator citer;
for (unicode::RuneStrArray::const_iterator it = begin; it != end; it++) { for (RuneStrArray::const_iterator it = begin; it != end; it++) {
if (NULL == ptNode->next) { if (NULL == ptNode->next) {
return NULL; return NULL;
} }
@ -82,8 +78,8 @@ class Trie {
return ptNode->ptValue; return ptNode->ptValue;
} }
void Find(unicode::RuneStrArray::const_iterator begin, void Find(RuneStrArray::const_iterator begin,
unicode::RuneStrArray::const_iterator end, RuneStrArray::const_iterator end,
vector<struct Dag>&res, vector<struct Dag>&res,
size_t max_word_len = MAX_WORD_LENGTH) const { size_t max_word_len = MAX_WORD_LENGTH) const {
assert(root_ != NULL); assert(root_ != NULL);

View File

@ -8,7 +8,6 @@
#include "limonp/LocalVector.hpp" #include "limonp/LocalVector.hpp"
namespace cppjieba { namespace cppjieba {
namespace unicode {
typedef uint32_t Rune; typedef uint32_t Rune;
@ -156,7 +155,7 @@ inline Unicode DecodeRunesInString(const std::string& s) {
//[left, right] //[left, right]
inline std::string GetStringFromRunes(unicode::RuneStrArray::const_iterator left, unicode::RuneStrArray::const_iterator right) { inline std::string GetStringFromRunes(RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
assert(right->str >= left->str); assert(right->str >= left->str);
return std::string(left->str, right->str - left->str + right->len); return std::string(left->str, right->str - left->str + right->len);
} }
@ -173,7 +172,6 @@ inline std::vector<std::string> GetStringsFromWordRanges(const std::vector<WordR
return result; return result;
} }
} // namespace unicode
} // namespace cppjieba } // namespace cppjieba
#endif // CPPJIEBA_UNICODE_H #endif // CPPJIEBA_UNICODE_H

View File

@ -20,7 +20,7 @@ TEST(PreFilterTest, Test1) {
while (filter.HasNext()) { while (filter.HasNext()) {
PreFilter::Range range; PreFilter::Range range;
range = filter.Next(); range = filter.Next();
words.push_back(unicode::GetStringFromRunes(range.begin, range.end - 1)); words.push_back(GetStringFromRunes(range.begin, range.end - 1));
} }
res = limonp::Join(words.begin(), words.end(), "/"); res = limonp::Join(words.begin(), words.end(), "/");
ASSERT_EQ(res, expected); ASSERT_EQ(res, expected);
@ -35,7 +35,7 @@ TEST(PreFilterTest, Test1) {
while (filter.HasNext()) { while (filter.HasNext()) {
PreFilter::Range range; PreFilter::Range range;
range = filter.Next(); range = filter.Next();
words.push_back(unicode::GetStringFromRunes(range.begin, range.end - 1)); words.push_back(GetStringFromRunes(range.begin, range.end - 1));
} }
res = limonp::Join(words.begin(), words.end(), "/"); res = limonp::Join(words.begin(), words.end(), "/");
ASSERT_EQ(res, expected); ASSERT_EQ(res, expected);

View File

@ -15,7 +15,7 @@ TEST(TrieTest, Empty) {
TEST(TrieTest, Construct) { TEST(TrieTest, Construct) {
vector<Unicode> keys; vector<Unicode> keys;
vector<const DictUnit*> values; vector<const DictUnit*> values;
keys.push_back(unicode::DecodeRunesInString("")); keys.push_back(DecodeRunesInString(""));
values.push_back((const DictUnit*)(NULL)); values.push_back((const DictUnit*)(NULL));
Trie trie(keys, values); Trie trie(keys, values);
} }
@ -31,8 +31,8 @@ TEST(DictTrieTest, Test1) {
DictTrie trie(DICT_FILE); DictTrie trie(DICT_FILE);
ASSERT_LT(trie.GetMinWeight() + 15.6479, 0.001); ASSERT_LT(trie.GetMinWeight() + 15.6479, 0.001);
string word("来到"); string word("来到");
cppjieba::unicode::RuneStrArray uni; cppjieba::RuneStrArray uni;
ASSERT_TRUE(unicode::DecodeRunesInString(word, uni)); ASSERT_TRUE(DecodeRunesInString(word, uni));
//DictUnit nodeInfo; //DictUnit nodeInfo;
//nodeInfo.word = uni; //nodeInfo.word = uni;
//nodeInfo.tag = "v"; //nodeInfo.tag = "v";
@ -52,13 +52,13 @@ TEST(DictTrieTest, Test1) {
LocalVector<pair<size_t, const DictUnit*> > res; LocalVector<pair<size_t, const DictUnit*> > res;
const char * words[] = {"", "清华", "清华大学"}; const char * words[] = {"", "清华", "清华大学"};
for (size_t i = 0; i < sizeof(words)/sizeof(words[0]); i++) { for (size_t i = 0; i < sizeof(words)/sizeof(words[0]); i++) {
ASSERT_TRUE(unicode::DecodeRunesInString(words[i], uni)); ASSERT_TRUE(DecodeRunesInString(words[i], uni));
res.push_back(make_pair(uni.size() - 1, trie.Find(uni.begin(), uni.end()))); res.push_back(make_pair(uni.size() - 1, trie.Find(uni.begin(), uni.end())));
//resMap[uni.size() - 1] = trie.Find(uni.begin(), uni.end()); //resMap[uni.size() - 1] = trie.Find(uni.begin(), uni.end());
} }
vector<pair<size_t, const DictUnit*> > vec; vector<pair<size_t, const DictUnit*> > vec;
vector<struct Dag> dags; vector<struct Dag> dags;
ASSERT_TRUE(unicode::DecodeRunesInString(word, uni)); ASSERT_TRUE(DecodeRunesInString(word, uni));
trie.Find(uni.begin(), uni.end(), dags); trie.Find(uni.begin(), uni.end(), dags);
ASSERT_EQ(dags.size(), uni.size()); ASSERT_EQ(dags.size(), uni.size());
ASSERT_NE(dags.size(), 0u); ASSERT_NE(dags.size(), 0u);
@ -71,8 +71,8 @@ TEST(DictTrieTest, Test1) {
TEST(DictTrieTest, UserDict) { TEST(DictTrieTest, UserDict) {
DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8"); DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8");
string word = "云计算"; string word = "云计算";
cppjieba::unicode::RuneStrArray unicode; cppjieba::RuneStrArray unicode;
ASSERT_TRUE(unicode::DecodeRunesInString(word, unicode)); ASSERT_TRUE(DecodeRunesInString(word, unicode));
const DictUnit * unit = trie.Find(unicode.begin(), unicode.end()); const DictUnit * unit = trie.Find(unicode.begin(), unicode.end());
ASSERT_TRUE(unit); ASSERT_TRUE(unit);
ASSERT_NEAR(unit->weight, -14.100, 0.001); ASSERT_NEAR(unit->weight, -14.100, 0.001);
@ -81,8 +81,8 @@ TEST(DictTrieTest, UserDict) {
TEST(DictTrieTest, UserDictWithMaxWeight) { TEST(DictTrieTest, UserDictWithMaxWeight) {
DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8", DictTrie::WordWeightMax); DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8", DictTrie::WordWeightMax);
string word = "云计算"; string word = "云计算";
cppjieba::unicode::RuneStrArray unicode; cppjieba::RuneStrArray unicode;
ASSERT_TRUE(unicode::DecodeRunesInString(word, unicode)); ASSERT_TRUE(DecodeRunesInString(word, unicode));
const DictUnit * unit = trie.Find(unicode.begin(), unicode.end()); const DictUnit * unit = trie.Find(unicode.begin(), unicode.end());
ASSERT_TRUE(unit); ASSERT_TRUE(unit);
ASSERT_NEAR(unit->weight, -2.975, 0.001); ASSERT_NEAR(unit->weight, -2.975, 0.001);
@ -93,8 +93,8 @@ TEST(DictTrieTest, Dag) {
{ {
string word = "清华大学"; string word = "清华大学";
cppjieba::unicode::RuneStrArray unicode; cppjieba::RuneStrArray unicode;
ASSERT_TRUE(unicode::DecodeRunesInString(word, unicode)); ASSERT_TRUE(DecodeRunesInString(word, unicode));
vector<struct Dag> res; vector<struct Dag> res;
trie.Find(unicode.begin(), unicode.end(), res); trie.Find(unicode.begin(), unicode.end(), res);
@ -107,8 +107,8 @@ TEST(DictTrieTest, Dag) {
{ {
string word = "北京邮电大学"; string word = "北京邮电大学";
cppjieba::unicode::RuneStrArray unicode; cppjieba::RuneStrArray unicode;
ASSERT_TRUE(unicode::DecodeRunesInString(word, unicode)); ASSERT_TRUE(DecodeRunesInString(word, unicode));
vector<struct Dag> res; vector<struct Dag> res;
trie.Find(unicode.begin(), unicode.end(), res); trie.Find(unicode.begin(), unicode.end(), res);
@ -121,8 +121,8 @@ TEST(DictTrieTest, Dag) {
{ {
string word = "长江大桥"; string word = "长江大桥";
cppjieba::unicode::RuneStrArray unicode; cppjieba::RuneStrArray unicode;
ASSERT_TRUE(unicode::DecodeRunesInString(word, unicode)); ASSERT_TRUE(DecodeRunesInString(word, unicode));
vector<struct Dag> res; vector<struct Dag> res;
trie.Find(unicode.begin(), unicode.end(), res); trie.Find(unicode.begin(), unicode.end(), res);
@ -135,8 +135,8 @@ TEST(DictTrieTest, Dag) {
{ {
string word = "长江大桥"; string word = "长江大桥";
cppjieba::unicode::RuneStrArray unicode; cppjieba::RuneStrArray unicode;
ASSERT_TRUE(unicode::DecodeRunesInString(word, unicode)); ASSERT_TRUE(DecodeRunesInString(word, unicode));
vector<struct Dag> res; vector<struct Dag> res;
trie.Find(unicode.begin(), unicode.end(), res, 3); trie.Find(unicode.begin(), unicode.end(), res, 3);
@ -149,8 +149,8 @@ TEST(DictTrieTest, Dag) {
{ {
string word = "长江大桥"; string word = "长江大桥";
cppjieba::unicode::RuneStrArray unicode; cppjieba::RuneStrArray unicode;
ASSERT_TRUE(unicode::DecodeRunesInString(word, unicode)); ASSERT_TRUE(DecodeRunesInString(word, unicode));
vector<struct Dag> res; vector<struct Dag> res;
trie.Find(unicode.begin(), unicode.end(), res, 4); trie.Find(unicode.begin(), unicode.end(), res, 4);