From 30488d0473157fe5011ca23e7945632153339493 Mon Sep 17 00:00:00 2001 From: gwdwyy Date: Thu, 11 Jul 2013 00:03:56 +0800 Subject: [PATCH] add scripts/iconv_dict.py --- Segment.cpp | 1 + Trie.cpp | 122 +++++++++--------------------------------- Trie.h | 5 +- scripts/iconv_dict.py | 15 ++++++ 4 files changed, 43 insertions(+), 100 deletions(-) create mode 100755 scripts/iconv_dict.py diff --git a/Segment.cpp b/Segment.cpp index d95ee3a..08a6e7a 100644 --- a/Segment.cpp +++ b/Segment.cpp @@ -178,6 +178,7 @@ int main() vector res; //string title = "我来到北京清华大学"; string title = "特价!camel骆驼 柔软舒适头层牛皮平底凉鞋女 休闲平跟妈妈鞋夏"; + cout<hmap.find(chUni) == p->hmap.end()) - { - return false; - } - else - { - p = p->hmap[chUni]; - } - } - return p->isLeaf; - } - */ - - /* - bool Trie::find(const vector& uniVec) - { - TrieNode * p = _root; - for(size_t i = 0; i < uniVec.size(); i++) - { - ChUnicode chUni = uniVec[i]; - if(p->hmap.find(chUni) == p->hmap.end()) - { - return false; - } - else - { - p = p-> hmap[chUni]; - } - } - return p->isLeaf; - } - */ - - /* - int Trie::findMaxMatch(const ChUnicode* chUniStr, size_t len) - { - int res = -1; - TrieNode * p = _root; - for(int i = 0; i < len; i++) - { - ChUnicode chWord = chUniStr[i]; - TrieNodeMap::const_iterator iter = p->hmap.find(chWord); - if(iter != p->hmap.end()) - { - TrieNode * next = iter->second; - if(next->isLeaf) - { - res = i + 1; - } - p = next; - } - else - { - break; - } - } - //cout<<__FILE__<<__LINE__< >& res) { @@ -284,28 +226,6 @@ namespace CppJieba return true; } - void Trie::_display(TrieNode* node, int level) - { - if(NULL == node) - { - LogError("failed! node is null."); - return; - } - for(TrieNodeMap::const_iterator it = node->hmap.begin(); it != node->hmap.end(); it++) - { - char utfBuf[8]; - ChUnicode chBuf[1]; - for(int i = 0; i < level; i++) - { - cout<<" "; - } - chBuf[0]=it->first; - unicodeToUtf8(chBuf, 1, utfBuf); - cout<second, level + 1); - } - } - bool Trie::_insert(const TrieNodeInfo& nodeInfo) { _nodeInfoVec.push_back(nodeInfo); @@ -407,7 +327,13 @@ int main() { Trie trie; trie.init("dicts/segdict.utf8.v2.1"); + //trie.init("dicts/jieba.dict.utf8"); + //trie.init("dict.100"); //char utf[1024] = "我来到北京清华大学3D电视"; + //trie.display(); + //getchar(); + cout<> sys.stderr, err +