little modification

This commit is contained in:
wyy 2014-11-12 19:45:20 +08:00
parent 3ced451212
commit 75367a20c9
2 changed files with 21 additions and 18 deletions

View File

@ -76,11 +76,7 @@ namespace CppJieba
_calcDP(segmentChars); _calcDP(segmentChars);
if(!_cut(segmentChars, res)) _cut(segmentChars, res);
{
LogError("_cut failed.");
return false;
}
return true; return true;
} }
@ -127,7 +123,7 @@ namespace CppJieba
} }
} }
} }
bool _cut(const vector<SegmentChar>& segmentChars, vector<Unicode>& res)const void _cut(const vector<SegmentChar>& segmentChars, vector<Unicode>& res) const
{ {
size_t i = 0; size_t i = 0;
while(i < segmentChars.size()) while(i < segmentChars.size())
@ -144,7 +140,6 @@ namespace CppJieba
i++; i++;
} }
} }
return true;
} }

View File

@ -87,6 +87,7 @@ namespace CppJieba
} }
return ptNode->ptValue; return ptNode->ptValue;
} }
// aho-corasick-automation
void find( void find(
typename KeyContainerType::const_iterator begin, typename KeyContainerType::const_iterator begin,
typename KeyContainerType::const_iterator end, typename KeyContainerType::const_iterator end,
@ -96,26 +97,32 @@ namespace CppJieba
res.resize(end - begin); res.resize(end - begin);
const TrieNodeType * now = _root; const TrieNodeType * now = _root;
typename TrieNodeType::NextMap::const_iterator iter; typename TrieNodeType::NextMap::const_iterator iter;
for (size_t i = 0; i < end - begin; i++) { for (size_t i = 0; i < end - begin; i++)
{
bool flag = false; bool flag = false;
res[i].uniCh = *(begin + i); res[i].uniCh = *(begin + i);
assert(res[i].dag.empty()); assert(res[i].dag.empty());
res[i].dag.reserve(4);//TODO res[i].dag.reserve(2);
while( now != _root && (now->next == NULL || (iter = now->next->find(*(begin + i))) == now->next->end())) { while( now != _root && (now->next == NULL || (iter = now->next->find(*(begin + i))) == now->next->end()))
{
now = now->fail; now = now->fail;
} }
if(now->next == NULL || (iter = now->next->find(*(begin + i))) == now->next->end()) { if(now->next == NULL || (iter = now->next->find(*(begin + i))) == now->next->end())
{
now = _root; now = _root;
} else { }
else
{
now = iter->second; now = iter->second;
const TrieNodeType * temp = now; const TrieNodeType * temp = now;
while(temp != _root) { while(temp != _root)
if (temp->ptValue) { {
string str; if (temp->ptValue)
TransCode::encode(temp->ptValue->word, str); {
size_t pos = i - temp->ptValue->word.size() + 1; size_t pos = i - temp->ptValue->word.size() + 1;
res[pos].dag.push_back(pair<typename KeysContainerType::size_type, const ValueType* >(i, temp->ptValue)); res[pos].dag.push_back(pair<typename KeysContainerType::size_type, const ValueType* >(i, temp->ptValue));
if(temp->ptValue->word.size() == 1) { if(pos == i)
{
flag = true; flag = true;
} }
} }
@ -123,7 +130,8 @@ namespace CppJieba
assert(temp); assert(temp);
} }
} }
if(!flag) { if(!flag)
{
res[i].dag.push_back(pair<typename KeysContainerType::size_type, const ValueType* >(i, NULL)); res[i].dag.push_back(pair<typename KeysContainerType::size_type, const ValueType* >(i, NULL));
} }
} }