This commit is contained in:
gwdwyy 2013-07-05 11:09:27 +08:00
parent 3da92a2e1a
commit a36782ca76
2 changed files with 36 additions and 15 deletions

View File

@ -94,10 +94,10 @@ namespace CppJieba
bool Trie::cut(const ChUnicode* chUniStr, size_t len, vector< vector<size_t> >& res)
{
res.clear();
cout<<len<<endl;
//cout<<len<<endl;
for(size_t i = 0; i < len; i++)
{
cout<<__LINE__<<","<<chUniStr[i]<<endl;
//cout<<__LINE__<<","<<chUniStr[i]<<endl;
res.push_back(vector<size_t>());
vector<size_t>& vec = res[i];
for(size_t j = i; j < len; j++)
@ -111,6 +111,28 @@ namespace CppJieba
return true;
}
bool Trie::cutUtf8(const string& str, vector< vector<size_t> >& res)
{
ChUnicode buf[ChUniMaxLen];
size_t len = utf8ToUnicode(str.c_str(), str.size(), buf);
if(0 == len)
{
return false;
}
return cut(buf, len, res);
/*
PRINT_MATRIX(res);
char buf[1024];
FOR_VECTOR(res, i)
{
FOR_VECTOR(res[i], j)
{
unicodeToUtf8(chUniStr + i, res[i][j] - i + 1, buf);
cout<<buf<<endl;
}
}*/
}
bool Trie::_destroyNode(TrieNode* node)
{
for(TrieNodeHashMap::iterator it = node->hmap.begin(); it != node->hmap.end(); it++)
@ -189,21 +211,11 @@ int main()
//int uniLen = utf8ToUnicode(utf, sizeof(utf), chUniStr);
//cout<<trie.find(chUniStr, uniLen)<<endl;
char utf[1024] = "我来到北京清华大学3D电视";
char buf[1024];
ChUnicode chUniStr[1024];
//ChUnicode chUniStr[1024];
//cout<<sizeof(utf)<<endl;
int uniLen = utf8ToUnicode(utf, strlen(utf), chUniStr);
//int uniLen = utf8ToUnicode(utf, strlen(utf), chUniStr);
vector< vector<size_t> > res;
cout<<trie.cut(chUniStr, uniLen, res)<<endl;
PRINT_MATRIX(res);
FOR_VECTOR(res, i)
{
FOR_VECTOR(res[i], j)
{
unicodeToUtf8(chUniStr + i, res[i][j] - i + 1, buf);
cout<<buf<<endl;
}
}
//cout<<trie.cutUtf8(utf, res)<<endl;
trie.destroy();
//hash_map<ChUnicode, int> hmap;
//hmap[136]=1;

9
Trie.h
View File

@ -17,6 +17,7 @@ namespace CppJieba
using namespace std;
//using __gnu_cxx::hash_map;
typedef uint16_t ChUnicode;
const size_t ChUniMaxLen = 1024;
typedef map<ChUnicode, struct TrieNode*> TrieNodeHashMap;
struct TrieNode
@ -83,9 +84,11 @@ namespace CppJieba
vector<TrieNode> _nodeVec;
public:
typedef TrieNodeIterator iterator;
public:
iterator begin();
iterator end();
public:
Trie();
~Trie();
@ -93,7 +96,13 @@ namespace CppJieba
bool destroy();
void display();
bool find(const ChUnicode* chUniStr, size_t len);
public:
bool cut(const ChUnicode* chUniStr, size_t len, vector< vector<size_t> >& res);
//bool cutUni(const vector<ChUnicode>& uniVec, )
bool cutUtf8(const string& str, vector< vector<size_t> >& res);
//bool cutMa
private:
bool _destroyNode(TrieNode* node);
void _display(TrieNode* node, int level);