mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
little modification
This commit is contained in:
parent
3ced451212
commit
75367a20c9
@ -76,11 +76,7 @@ namespace CppJieba
|
|||||||
|
|
||||||
_calcDP(segmentChars);
|
_calcDP(segmentChars);
|
||||||
|
|
||||||
if(!_cut(segmentChars, res))
|
_cut(segmentChars, res);
|
||||||
{
|
|
||||||
LogError("_cut failed.");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -127,7 +123,7 @@ namespace CppJieba
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
bool _cut(const vector<SegmentChar>& segmentChars, vector<Unicode>& res)const
|
void _cut(const vector<SegmentChar>& segmentChars, vector<Unicode>& res) const
|
||||||
{
|
{
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
while(i < segmentChars.size())
|
while(i < segmentChars.size())
|
||||||
@ -144,7 +140,6 @@ namespace CppJieba
|
|||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
30
src/Trie.hpp
30
src/Trie.hpp
@ -87,6 +87,7 @@ namespace CppJieba
|
|||||||
}
|
}
|
||||||
return ptNode->ptValue;
|
return ptNode->ptValue;
|
||||||
}
|
}
|
||||||
|
// aho-corasick-automation
|
||||||
void find(
|
void find(
|
||||||
typename KeyContainerType::const_iterator begin,
|
typename KeyContainerType::const_iterator begin,
|
||||||
typename KeyContainerType::const_iterator end,
|
typename KeyContainerType::const_iterator end,
|
||||||
@ -96,26 +97,32 @@ namespace CppJieba
|
|||||||
res.resize(end - begin);
|
res.resize(end - begin);
|
||||||
const TrieNodeType * now = _root;
|
const TrieNodeType * now = _root;
|
||||||
typename TrieNodeType::NextMap::const_iterator iter;
|
typename TrieNodeType::NextMap::const_iterator iter;
|
||||||
for (size_t i = 0; i < end - begin; i++) {
|
for (size_t i = 0; i < end - begin; i++)
|
||||||
|
{
|
||||||
bool flag = false;
|
bool flag = false;
|
||||||
res[i].uniCh = *(begin + i);
|
res[i].uniCh = *(begin + i);
|
||||||
assert(res[i].dag.empty());
|
assert(res[i].dag.empty());
|
||||||
res[i].dag.reserve(4);//TODO
|
res[i].dag.reserve(2);
|
||||||
while( now != _root && (now->next == NULL || (iter = now->next->find(*(begin + i))) == now->next->end())) {
|
while( now != _root && (now->next == NULL || (iter = now->next->find(*(begin + i))) == now->next->end()))
|
||||||
|
{
|
||||||
now = now->fail;
|
now = now->fail;
|
||||||
}
|
}
|
||||||
if(now->next == NULL || (iter = now->next->find(*(begin + i))) == now->next->end()) {
|
if(now->next == NULL || (iter = now->next->find(*(begin + i))) == now->next->end())
|
||||||
|
{
|
||||||
now = _root;
|
now = _root;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
now = iter->second;
|
now = iter->second;
|
||||||
const TrieNodeType * temp = now;
|
const TrieNodeType * temp = now;
|
||||||
while(temp != _root) {
|
while(temp != _root)
|
||||||
if (temp->ptValue) {
|
{
|
||||||
string str;
|
if (temp->ptValue)
|
||||||
TransCode::encode(temp->ptValue->word, str);
|
{
|
||||||
size_t pos = i - temp->ptValue->word.size() + 1;
|
size_t pos = i - temp->ptValue->word.size() + 1;
|
||||||
res[pos].dag.push_back(pair<typename KeysContainerType::size_type, const ValueType* >(i, temp->ptValue));
|
res[pos].dag.push_back(pair<typename KeysContainerType::size_type, const ValueType* >(i, temp->ptValue));
|
||||||
if(temp->ptValue->word.size() == 1) {
|
if(pos == i)
|
||||||
|
{
|
||||||
flag = true;
|
flag = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -123,7 +130,8 @@ namespace CppJieba
|
|||||||
assert(temp);
|
assert(temp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(!flag) {
|
if(!flag)
|
||||||
|
{
|
||||||
res[i].dag.push_back(pair<typename KeysContainerType::size_type, const ValueType* >(i, NULL));
|
res[i].dag.push_back(pair<typename KeysContainerType::size_type, const ValueType* >(i, NULL));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user