mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
1. remove stopword from span windows
2. update unittest
This commit is contained in:
parent
04c176de08
commit
d5a52a8e7b
@ -135,8 +135,9 @@ namespace cppjieba {
|
||||
if (IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
|
||||
continue;
|
||||
}
|
||||
for(size_t j=i+1;j<i+span && j<words.size();j++){
|
||||
for(size_t j=i+1,skip=0;j<i+span+skip && j<words.size();j++){
|
||||
if (IsSingleWord(words[j]) || stopWords_.find(words[j]) != stopWords_.end()) {
|
||||
skip++;
|
||||
continue;
|
||||
}
|
||||
graph.addEdge(words[i],words[j],1);
|
||||
|
@ -44,7 +44,7 @@ TEST(TextRankExtractorTest, Test1) {
|
||||
size_t topN = 5;
|
||||
Extractor.Extract(s, wordweights, topN);
|
||||
res << wordweights;
|
||||
ASSERT_EQ(res, "[\"\xE4\xB8\x93\xE4\xB8\x9A|[\"36\"]|1\", \"CEO|[\"94\"]|0.95375\", \"\xE6\x89\x8B\xE6\x89\xB6\xE6\x8B\x96\xE6\x8B\x89\xE6\x9C\xBA|[\"21\"]|0.801701\", \"\xE5\xBD\x93\xE4\xB8\x8A|[\"87\"]|0.798968\", \"\xE8\xB5\xB0\xE4\xB8\x8A|[\"100\"]|0.775505\"]");
|
||||
ASSERT_EQ(res, "[\"\xE5\xBD\x93\xE4\xB8\x8A|[\"87\"]|1\", \"\xE4\xB8\x8D\xE7\x94\xA8|[\"48\"]|0.989848\", \"\xE5\xA4\x9A\xE4\xB9\x85|[\"54\"]|0.985126\", \"\xE5\x8A\xA0\xE8\x96\xAA|[\"78\"]|0.983046\", \"\xE5\x8D\x87\xE8\x81\x8C|[\"72\"]|0.980278\"]");
|
||||
// ASSERT_EQ(res, "[\"\xE4\xB8\x93\xE4\xB8\x9A|[\"36\"]|1\", \"CEO|[\"94\"]|0.953149\", \"\xE6\x89\x8B\xE6\x89\xB6\xE6\x8B\x96\xE6\x8B\x89\xE6\x9C\xBA|[\"21\"]|0.794203\", \"\xE5\xBD\x93\xE4\xB8\x8A|[\"87\"]|0.78716\", \"\xE8\xB5\xB0\xE4\xB8\x8A|[\"100\"]|0.767636\"]");
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user