mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
modify the worlen weight calc by log()
This commit is contained in:
parent
1e246d6bb8
commit
562b376869
@ -89,7 +89,7 @@ namespace CppJieba
|
|||||||
|
|
||||||
bool KeyWordExt::_sortWLIDF(vector<WordInfo>& wordInfos)
|
bool KeyWordExt::_sortWLIDF(vector<WordInfo>& wordInfos)
|
||||||
{
|
{
|
||||||
size_t wLenSum = 0;
|
//size_t wLenSum = 0;
|
||||||
for(uint i = 0; i < wordInfos.size(); i++)
|
for(uint i = 0; i < wordInfos.size(); i++)
|
||||||
{
|
{
|
||||||
wordInfos[i].wLen = getUtf8WordLen(wordInfos[i].word);
|
wordInfos[i].wLen = getUtf8WordLen(wordInfos[i].word);
|
||||||
@ -98,14 +98,15 @@ namespace CppJieba
|
|||||||
LogFatal("wLen is 0");
|
LogFatal("wLen is 0");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
wLenSum += wordInfos[i].wLen;
|
//wLenSum += wordInfos[i].wLen;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
if(0 == wLenSum)
|
if(0 == wLenSum)
|
||||||
{
|
{
|
||||||
LogFatal("wLenSum == 0.");
|
LogFatal("wLenSum == 0.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}*/
|
||||||
|
|
||||||
for(uint i = 0; i < wordInfos.size(); i++)
|
for(uint i = 0; i < wordInfos.size(); i++)
|
||||||
{
|
{
|
||||||
@ -117,7 +118,7 @@ namespace CppJieba
|
|||||||
{
|
{
|
||||||
LogFatal("getUtf8WordLen(%s) return 0");
|
LogFatal("getUtf8WordLen(%s) return 0");
|
||||||
}
|
}
|
||||||
wInfo.weight = 1.0 * wLen / wLenSum * wInfo.idf;
|
wInfo.weight = log(double(wLen + 1)) * wInfo.idf;
|
||||||
}
|
}
|
||||||
sort(wordInfos.begin(), wordInfos.end(), _wordInfoCompare);
|
sort(wordInfos.begin(), wordInfos.end(), _wordInfoCompare);
|
||||||
return true;
|
return true;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user