From 562b376869ef14a9da35806379e24c168387a4a1 Mon Sep 17 00:00:00 2001 From: gwdwyy Date: Sat, 20 Jul 2013 13:19:17 +0800 Subject: [PATCH] modify the worlen weight calc by log() --- src/KeyWordExt.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/KeyWordExt.cpp b/src/KeyWordExt.cpp index 0120954..e6c6f54 100644 --- a/src/KeyWordExt.cpp +++ b/src/KeyWordExt.cpp @@ -89,7 +89,7 @@ namespace CppJieba bool KeyWordExt::_sortWLIDF(vector& wordInfos) { - size_t wLenSum = 0; + //size_t wLenSum = 0; for(uint i = 0; i < wordInfos.size(); i++) { wordInfos[i].wLen = getUtf8WordLen(wordInfos[i].word); @@ -98,14 +98,15 @@ namespace CppJieba LogFatal("wLen is 0"); return false; } - wLenSum += wordInfos[i].wLen; + //wLenSum += wordInfos[i].wLen; } + /* if(0 == wLenSum) { LogFatal("wLenSum == 0."); return false; - } + }*/ for(uint i = 0; i < wordInfos.size(); i++) { @@ -117,7 +118,7 @@ namespace CppJieba { LogFatal("getUtf8WordLen(%s) return 0"); } - wInfo.weight = 1.0 * wLen / wLenSum * wInfo.idf; + wInfo.weight = log(double(wLen + 1)) * wInfo.idf; } sort(wordInfos.begin(), wordInfos.end(), _wordInfoCompare); return true;