diff --git a/Segment.cpp b/Segment.cpp index be56218..9703601 100644 --- a/Segment.cpp +++ b/Segment.cpp @@ -22,7 +22,43 @@ namespace CppJieba bool Segment::cutDAG(const string& chStr, vector& res) { + res.clear(); + char utfBuf[bufSize]; + ChUnicode uniStr[bufSize]; + memset(uniStr, 0, sizeof(uniStr)); + size_t len = _utf8ToUni(chStr, uniStr, bufSize); + if(0 == len) + { + LogError("_utf8ToUni failed."); + return false; + } + + //calc DAG + vector > dag; + for(uint i = 0; i < len; i++) + { + vector vec; + vec.push_back(i); + for(uint j = i + 2; j <= len; j++) + { + if(NULL != _trie.find(uniStr + i, j - i)) + { + vec.push_back(j - 1); + } + } + dag.push_back(vec); + } + PRINT_MATRIX(dag); + getchar(); + + + //calc dp + + + + + return true; } @@ -129,6 +165,12 @@ namespace CppJieba return len; } + + bool Segment::_calcDP(const ChUnicode* uniStr, size_t len, vector >& res) + { + return true; + } + } @@ -141,8 +183,8 @@ int main() segment.init("jieba.dict.utf8"); vector res; - string title = "我来到北京清华大学3D电视"; - bool flag = segment.cutMM(title, res); + string title = "我来到北京清华大学"; + bool flag = segment.cutDAG(title, res); if(flag) { for(int i = 0; i < res.size(); i++) diff --git a/Segment.h b/Segment.h index fc9834a..d3fb150 100644 --- a/Segment.h +++ b/Segment.h @@ -22,6 +22,7 @@ namespace CppJieba private: size_t _utf8ToUni(const string& chStr, ChUnicode* uniStr, size_t size); + bool _calcDP(const ChUnicode* uniStr, size_t len, vector >& res); private: enum {bufSize = 1024}; diff --git a/Trie.h b/Trie.h index bcec12a..07cad6a 100644 --- a/Trie.h +++ b/Trie.h @@ -44,52 +44,6 @@ namespace CppJieba } }; - /* - struct TrieNodeIterator - { - TrieNode* ptNode; - - TrieNodeIterator():ptNode(NULL) - { - } - - TrieNodeIterator(TrieNode* ptr):ptNode(NULL) - { - ptNode = ptr; - } - - const int operator++(int) - { - return 1; - } - - TrieNodeIterator& operator++() - { - return *this; - } - - TrieNode& operator*() const - { - return *ptNode; - } - - TrieNode* operator->() const - { - return ptNode; - } - - bool operator==(const TrieNodeIterator& x) const - { - return ptNode == x.ptNode; - } - - bool operator!=(const TrieNodeIterator& x) const - { - return ptNode != x.ptNode; - } - }; - */ - class Trie { private: @@ -115,16 +69,10 @@ namespace CppJieba public: const TrieNodeInfo* find(const ChUnicode* const chUniStr, size_t len); - //bool find(const ChUnicode* chUniStr, size_t len); - //bool find(const vector& uniVec); int findMaxMatch(const ChUnicode* chUniStr, size_t len); public: double getWeight(const ChUnicode* uniStr, size_t len); - //bool cut(const ChUnicode* chUniStr, size_t len, vector< vector >& res); - //bool cutUni(const vector& uniVec, ) - //bool cutUtf8(const string& str, vector< vector >& res); - //bool cutMa private: bool _buildTree(const char* const filePath); diff --git a/globals.h b/globals.h index c495050..7db0f1e 100644 --- a/globals.h +++ b/globals.h @@ -8,6 +8,7 @@ const char * const DICT_FILE_PATH = "dict.txt"; //typedefs typedef uint16_t ChUnicode; +typedef unsigned int uint; #endif