mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
modify the worlen weight calc by log()
This commit is contained in:
parent
1e246d6bb8
commit
562b376869
@ -89,7 +89,7 @@ namespace CppJieba
|
||||
|
||||
bool KeyWordExt::_sortWLIDF(vector<WordInfo>& wordInfos)
|
||||
{
|
||||
size_t wLenSum = 0;
|
||||
//size_t wLenSum = 0;
|
||||
for(uint i = 0; i < wordInfos.size(); i++)
|
||||
{
|
||||
wordInfos[i].wLen = getUtf8WordLen(wordInfos[i].word);
|
||||
@ -98,14 +98,15 @@ namespace CppJieba
|
||||
LogFatal("wLen is 0");
|
||||
return false;
|
||||
}
|
||||
wLenSum += wordInfos[i].wLen;
|
||||
//wLenSum += wordInfos[i].wLen;
|
||||
}
|
||||
|
||||
/*
|
||||
if(0 == wLenSum)
|
||||
{
|
||||
LogFatal("wLenSum == 0.");
|
||||
return false;
|
||||
}
|
||||
}*/
|
||||
|
||||
for(uint i = 0; i < wordInfos.size(); i++)
|
||||
{
|
||||
@ -117,7 +118,7 @@ namespace CppJieba
|
||||
{
|
||||
LogFatal("getUtf8WordLen(%s) return 0");
|
||||
}
|
||||
wInfo.weight = 1.0 * wLen / wLenSum * wInfo.idf;
|
||||
wInfo.weight = log(double(wLen + 1)) * wInfo.idf;
|
||||
}
|
||||
sort(wordInfos.begin(), wordInfos.end(), _wordInfoCompare);
|
||||
return true;
|
||||
|
Loading…
x
Reference in New Issue
Block a user