bak

2025-07-18 00:00:12 +08:00 · 2013-07-06 14:41:08 +08:00 · 2013-07-06 14:41:08 +08:00 · 1de2635e44
commit 1de2635e44
parent 6eb200d867
6 changed files with 50 additions and 32 deletions
--- a/4
+++ b/4
@ -32,10 +32,10 @@ $(CMLIB): $(CMDIR)
 	cd $(CMDIR) && $(MAKE)
 #unit test
-Trie.ut: Trie.cpp Trie.h $(CMLIB)
+Trie.ut: Trie.cpp Trie.h globals.h $(CMLIB)
 	$(CC) -o $@ $< -DTRIE_UT $(CMLIB) 
-Segment.ut: Segment.cpp Trie.cpp Segment.h Trie.h $(CMLIB)
+Segment.ut: Segment.cpp Trie.cpp Segment.h Trie.h globals.h $(CMLIB)
 	$(CC) -o $@ Segment.cpp Trie.cpp -DSEGMENT_UT $(CMLIB) 
--- a/Segment.cpp
+++ b/Segment.cpp
@ -20,26 +20,22 @@ namespace CppJieba
 		return _trie.destroy();
 	}
 	bool Segment::cutDAG(const string& chStr, vector<string>& res)
 	{
 	}
 	bool Segment::cutMM(const string& chStr, vector<string>& res)
 	{
 		res.clear();
 		char logBuf[bufSize];
 		char utfBuf[bufSize];
 		ChUnicode uniStr[bufSize];
 		memset(uniStr, 0, sizeof(uniStr));
-		size_t len = utf8ToUnicode(chStr.c_str(), chStr.size(), uniStr);
+		size_t len = _utf8ToUni(chStr, uniStr, bufSize);
 		if(0 == len)
 		{
-			sprintf(logBuf, "utf8ToUnicode [%s] failed!", chStr.c_str());
+			LogError("_utf8ToUni failed.");
 			LogError(logBuf);
 			return false;
 		}
 		if(sizeof(uniStr) - len <= 5)
 		{
 			sprintf(logBuf, "%s too long!", chStr.c_str());
 			LogError(logBuf);
 			return false;
 		}
@ -71,23 +67,14 @@ namespace CppJieba
 	bool Segment::cutRMM(const string& chStr, vector<string>& res)
 	{
 		res.clear();
 		char logBuf[bufSize];
 		char utfBuf[bufSize];
 		ChUnicode uniStr[bufSize];
 		memset(uniStr, 0, sizeof(uniStr));
 		size_t len = utf8ToUnicode(chStr.c_str(), chStr.size(), uniStr);
 		size_t len = _utf8ToUni(chStr, uniStr, bufSize);
 		if(0 == len)
 		{
-			sprintf(logBuf, "utf8ToUnicode [%s] failed!", chStr.c_str());
+			LogError("_utf8ToUni failed.");
 			LogError(logBuf);
 			return false;
 		}
 		if(sizeof(uniStr) - len <= 5)
 		{
 			sprintf(logBuf, "%s too long!", chStr.c_str());
 			LogError(logBuf);
 			return false;
 		}
@ -120,6 +107,28 @@ namespace CppJieba
 		}
 		return true;
 	}
 	size_t Segment::_utf8ToUni(const string& chStr, ChUnicode* uniStr, size_t size)
 	{
 		char logBuf[bufSize];
 		size_t len = utf8ToUnicode(chStr.c_str(), chStr.size(), uniStr);
 		if(0 == len)
 		{
 			sprintf(logBuf, "utf8ToUnicode [%s] failed!", chStr.c_str());
 			LogError(logBuf);
 			return 0;
 		}
 		if(size - len <= 5)
 		{
 			sprintf(logBuf, "%s too long!", chStr.c_str());
 			LogError(logBuf);
 			return 0;
 		}
 		return len;
 	}
 }
--- a/Segment.h
+++ b/Segment.h
@ -16,9 +16,13 @@ namespace CppJieba
 			bool init(const char* const dictFilePath);
 			bool destroy();
 		public:
 			bool cutDAG(const string& chStr, vector<string>& res);
 			bool cutMM(const string& chStr, vector<string>& res);
 			bool cutRMM(const string& chStr, vector<string>& res);
 		private:
 			size_t _utf8ToUni(const string& chStr, ChUnicode* uniStr, size_t size);
 		private:
 			enum {bufSize = 1024};
--- a/Trie.cpp
+++ b/Trie.cpp
@ -183,7 +183,7 @@ namespace CppJieba
 		for(int i = 0; i < len; i++)
 		{
 			ChUnicode chWord = chUniStr[i];
-			TrieNodeHashMap::const_iterator iter = p->hmap.find(chWord);
+			TrieNodeMap::const_iterator iter = p->hmap.find(chWord);
 			if(iter != p->hmap.end())
 			{
 				TrieNode * next = iter->second;
@ -240,7 +240,7 @@ namespace CppJieba
    bool Trie::_destroyNode(TrieNode* node)
    {
-        for(TrieNodeHashMap::iterator it = node->hmap.begin(); it != node->hmap.end(); it++)
+        for(TrieNodeMap::iterator it = node->hmap.begin(); it != node->hmap.end(); it++)
        {
            TrieNode* next = it->second;
            _destroyNode(next);
@ -257,7 +257,7 @@ namespace CppJieba
            LogError("failed! node is null.");
            return;
        }
-        for(TrieNodeHashMap::const_iterator it = node->hmap.begin(); it != node->hmap.end(); it++)
+        for(TrieNodeMap::const_iterator it = node->hmap.begin(); it != node->hmap.end(); it++)
        {
            char utfBuf[8];
            ChUnicode chBuf[1];
--- a/Trie.h
+++ b/Trie.h
@ -19,9 +19,7 @@ namespace CppJieba
    using namespace CPPCOMMON;
    using namespace std;
    //using __gnu_cxx::hash_map;
-    typedef uint16_t ChUnicode;
+	typedef map<ChUnicode, struct TrieNode*> TrieNodeMap;
 	const size_t ChUniMaxLen = 1024;
    typedef map<ChUnicode, struct TrieNode*> TrieNodeHashMap;
 	struct TrieNodeInfo
 	{
@ -36,7 +34,7 @@ namespace CppJieba
 	struct TrieNode
    {
-        TrieNodeHashMap hmap;
+        TrieNodeMap hmap;
        bool isLeaf;
 		unsigned int nodeInfoVecPos;
--- a/globals.h
+++ b/globals.h
@ -1,6 +1,13 @@
 #ifndef GLOBALS_H
 #define GLOBALS_H
 #include <map>
 //file path
 const char * const DICT_FILE_PATH = "dict.txt";
 //typedefs
 typedef uint16_t ChUnicode;
 #endif