mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-10 00:01:33 +08:00
为函数textrank增加参数allowPOS,并修改extract_tags的参数allowPOS与textrank保持一致。
This commit is contained in:
parent
bab5f362ba
commit
a336e26403
@ -59,14 +59,14 @@ def set_stop_words(stop_words_path):
|
||||
for line in lines:
|
||||
STOP_WORDS.add(line)
|
||||
|
||||
def extract_tags(sentence, topK=20, withWeight=False, allowPOS=[]):
|
||||
def extract_tags(sentence, topK=20, withWeight=False, allowPOS=['ns', 'n', 'vn', 'v']):
|
||||
"""
|
||||
Extract keywords from sentence using TF-IDF algorithm.
|
||||
Parameter:
|
||||
- topK: return how many top keywords. `None` for all possible words.
|
||||
- withWeight: if True, return a list of (word, weight);
|
||||
if False, return a list of words.
|
||||
- allowPOS: the allowed POS list eg. ['n'].
|
||||
- allowPOS: the allowed POS list eg. ['ns', 'n', 'vn', 'v'].
|
||||
if the POS of w is not in this list,it will be filtered.
|
||||
"""
|
||||
global STOP_WORDS, idf_loader
|
||||
|
@ -48,15 +48,17 @@ class UndirectWeightedGraph:
|
||||
return ws
|
||||
|
||||
|
||||
def textrank(sentence, topK=10, withWeight=False):
|
||||
def textrank(sentence, topK=10, withWeight=False, allowPOS=['ns', 'n', 'vn', 'v']):
|
||||
"""
|
||||
Extract keywords from sentence using TextRank algorithm.
|
||||
Parameter:
|
||||
- topK: return how many top keywords. `None` for all possible words.
|
||||
- withWeight: if True, return a list of (word, weight);
|
||||
if False, return a list of words.
|
||||
- allowPOS: the allowed POS list eg. ['ns', 'n', 'vn', 'v'].
|
||||
if the POS of w is not in this list,it will be filtered.
|
||||
"""
|
||||
pos_filt = frozenset(('ns', 'n', 'vn', 'v'))
|
||||
pos_filt = frozenset(allowPOS)
|
||||
g = UndirectWeightedGraph()
|
||||
cm = collections.defaultdict(int)
|
||||
span = 5
|
||||
|
Loading…
x
Reference in New Issue
Block a user