mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
finished trie'cut
This commit is contained in:
parent
812adcc20b
commit
5be8d48a18
46
Trie.cpp
46
Trie.cpp
@ -91,6 +91,26 @@ namespace CppJieba
|
|||||||
return p->isLeaf;
|
return p->isLeaf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Trie::cut(const ChUnicode* chUniStr, size_t len, vector< vector<size_t> >& res)
|
||||||
|
{
|
||||||
|
res.clear();
|
||||||
|
cout<<len<<endl;
|
||||||
|
for(size_t i = 0; i < len; i++)
|
||||||
|
{
|
||||||
|
cout<<__LINE__<<","<<chUniStr[i]<<endl;
|
||||||
|
res.push_back(vector<size_t>());
|
||||||
|
vector<size_t>& vec = res[i];
|
||||||
|
for(size_t j = i; j < len; j++)
|
||||||
|
{
|
||||||
|
if(find(chUniStr + i, j - i + 1))
|
||||||
|
{
|
||||||
|
vec.push_back(j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool Trie::_destroyNode(TrieNode* node)
|
bool Trie::_destroyNode(TrieNode* node)
|
||||||
{
|
{
|
||||||
for(TrieNodeHashMap::iterator it = node->hmap.begin(); it != node->hmap.end(); it++)
|
for(TrieNodeHashMap::iterator it = node->hmap.begin(); it != node->hmap.end(); it++)
|
||||||
@ -161,13 +181,27 @@ int main()
|
|||||||
//trie.init("test/dict.txt");
|
//trie.init("test/dict.txt");
|
||||||
trie.init("dict.txt");
|
trie.init("dict.txt");
|
||||||
//trie.display();
|
//trie.display();
|
||||||
const char * utf = "B";
|
//const char * utf = "B";
|
||||||
ChUnicode chUniStr[16];
|
//ChUnicode chUniStr[16];
|
||||||
int uniLen = utf8ToUnicode(utf, sizeof(utf), chUniStr);
|
//int uniLen = utf8ToUnicode(utf, sizeof(utf), chUniStr);
|
||||||
cout<<trie.find(chUniStr, uniLen)<<endl;
|
//cout<<trie.find(chUniStr, uniLen)<<endl;
|
||||||
getchar();
|
char utf[1024] = "我来到北京清华大学";
|
||||||
|
char buf[1024];
|
||||||
|
ChUnicode chUniStr[1024];
|
||||||
|
//cout<<sizeof(utf)<<endl;
|
||||||
|
int uniLen = utf8ToUnicode(utf, strlen(utf), chUniStr);
|
||||||
|
vector< vector<size_t> > res;
|
||||||
|
cout<<trie.cut(chUniStr, uniLen, res)<<endl;
|
||||||
|
PRINT_MATRIX(res);
|
||||||
|
FOR_VECTOR(res, i)
|
||||||
|
{
|
||||||
|
FOR_VECTOR(res[i], j)
|
||||||
|
{
|
||||||
|
unicodeToUtf8(chUniStr + i, res[i][j] - i + 1, buf);
|
||||||
|
cout<<buf<<endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
trie.destroy();
|
trie.destroy();
|
||||||
getchar();
|
|
||||||
//hash_map<ChUnicode, int> hmap;
|
//hash_map<ChUnicode, int> hmap;
|
||||||
//hmap[136]=1;
|
//hmap[136]=1;
|
||||||
return 0;
|
return 0;
|
||||||
|
2
Trie.h
2
Trie.h
@ -3,6 +3,7 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <ext/hash_map>
|
#include <ext/hash_map>
|
||||||
|
#include <cstring>
|
||||||
#include "cppcommon/str_functs.h"
|
#include "cppcommon/str_functs.h"
|
||||||
#include "cppcommon/vec_functs.h"
|
#include "cppcommon/vec_functs.h"
|
||||||
#include "cppcommon/logger.h"
|
#include "cppcommon/logger.h"
|
||||||
@ -34,6 +35,7 @@ namespace CppJieba
|
|||||||
bool destroy();
|
bool destroy();
|
||||||
void display();
|
void display();
|
||||||
bool find(const ChUnicode* chUniStr, size_t len);
|
bool find(const ChUnicode* chUniStr, size_t len);
|
||||||
|
bool cut(const ChUnicode* chUniStr, size_t len, vector< vector<size_t> >& res);
|
||||||
private:
|
private:
|
||||||
bool _destroyNode(TrieNode* node);
|
bool _destroyNode(TrieNode* node);
|
||||||
void _display(TrieNode* node, int level);
|
void _display(TrieNode* node, int level);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user