finishing cutDAG

This commit is contained in:
gwdwyy 2013-07-08 18:28:43 +08:00
parent 1de2635e44
commit 4e69f342ee
4 changed files with 46 additions and 54 deletions

View File

@ -22,7 +22,43 @@ namespace CppJieba
bool Segment::cutDAG(const string& chStr, vector<string>& res)
{
res.clear();
char utfBuf[bufSize];
ChUnicode uniStr[bufSize];
memset(uniStr, 0, sizeof(uniStr));
size_t len = _utf8ToUni(chStr, uniStr, bufSize);
if(0 == len)
{
LogError("_utf8ToUni failed.");
return false;
}
//calc DAG
vector<vector<uint> > dag;
for(uint i = 0; i < len; i++)
{
vector<uint> vec;
vec.push_back(i);
for(uint j = i + 2; j <= len; j++)
{
if(NULL != _trie.find(uniStr + i, j - i))
{
vec.push_back(j - 1);
}
}
dag.push_back(vec);
}
PRINT_MATRIX(dag);
getchar();
//calc dp
return true;
}
@ -129,6 +165,12 @@ namespace CppJieba
return len;
}
bool Segment::_calcDP(const ChUnicode* uniStr, size_t len, vector<pair<uint, double> >& res)
{
return true;
}
}
@ -141,8 +183,8 @@ int main()
segment.init("jieba.dict.utf8");
vector<string> res;
string title = "我来到北京清华大学3D电视";
bool flag = segment.cutMM(title, res);
string title = "我来到北京清华大学";
bool flag = segment.cutDAG(title, res);
if(flag)
{
for(int i = 0; i < res.size(); i++)

View File

@ -22,6 +22,7 @@ namespace CppJieba
private:
size_t _utf8ToUni(const string& chStr, ChUnicode* uniStr, size_t size);
bool _calcDP(const ChUnicode* uniStr, size_t len, vector<pair<uint, double> >& res);
private:
enum {bufSize = 1024};

52
Trie.h
View File

@ -44,52 +44,6 @@ namespace CppJieba
}
};
/*
struct TrieNodeIterator
{
TrieNode* ptNode;
TrieNodeIterator():ptNode(NULL)
{
}
TrieNodeIterator(TrieNode* ptr):ptNode(NULL)
{
ptNode = ptr;
}
const int operator++(int)
{
return 1;
}
TrieNodeIterator& operator++()
{
return *this;
}
TrieNode& operator*() const
{
return *ptNode;
}
TrieNode* operator->() const
{
return ptNode;
}
bool operator==(const TrieNodeIterator& x) const
{
return ptNode == x.ptNode;
}
bool operator!=(const TrieNodeIterator& x) const
{
return ptNode != x.ptNode;
}
};
*/
class Trie
{
private:
@ -115,16 +69,10 @@ namespace CppJieba
public:
const TrieNodeInfo* find(const ChUnicode* const chUniStr, size_t len);
//bool find(const ChUnicode* chUniStr, size_t len);
//bool find(const vector<ChUnicode>& uniVec);
int findMaxMatch(const ChUnicode* chUniStr, size_t len);
public:
double getWeight(const ChUnicode* uniStr, size_t len);
//bool cut(const ChUnicode* chUniStr, size_t len, vector< vector<size_t> >& res);
//bool cutUni(const vector<ChUnicode>& uniVec, )
//bool cutUtf8(const string& str, vector< vector<size_t> >& res);
//bool cutMa
private:
bool _buildTree(const char* const filePath);

View File

@ -8,6 +8,7 @@ const char * const DICT_FILE_PATH = "dict.txt";
//typedefs
typedef uint16_t ChUnicode;
typedef unsigned int uint;
#endif