diff --git a/jieba/analyse/__init__.py b/jieba/analyse/__init__.py index ea7e66c..c56b9a4 100755 --- a/jieba/analyse/__init__.py +++ b/jieba/analyse/__init__.py @@ -59,14 +59,14 @@ def set_stop_words(stop_words_path): for line in lines: STOP_WORDS.add(line) -def extract_tags(sentence, topK=20, withWeight=False, allowPOS=[]): +def extract_tags(sentence, topK=20, withWeight=False, allowPOS=['ns', 'n', 'vn', 'v']): """ Extract keywords from sentence using TF-IDF algorithm. Parameter: - topK: return how many top keywords. `None` for all possible words. - withWeight: if True, return a list of (word, weight); if False, return a list of words. - - allowPOS: the allowed POS list eg. ['n']. + - allowPOS: the allowed POS list eg. ['ns', 'n', 'vn', 'v']. if the POS of w is not in this list,it will be filtered. """ global STOP_WORDS, idf_loader diff --git a/jieba/analyse/textrank.py b/jieba/analyse/textrank.py index 9ac9ece..739c60e 100644 --- a/jieba/analyse/textrank.py +++ b/jieba/analyse/textrank.py @@ -48,15 +48,17 @@ class UndirectWeightedGraph: return ws -def textrank(sentence, topK=10, withWeight=False): +def textrank(sentence, topK=10, withWeight=False, allowPOS=['ns', 'n', 'vn', 'v']): """ Extract keywords from sentence using TextRank algorithm. Parameter: - topK: return how many top keywords. `None` for all possible words. - withWeight: if True, return a list of (word, weight); if False, return a list of words. + - allowPOS: the allowed POS list eg. ['ns', 'n', 'vn', 'v']. + if the POS of w is not in this list,it will be filtered. """ - pos_filt = frozenset(('ns', 'n', 'vn', 'v')) + pos_filt = frozenset(allowPOS) g = UndirectWeightedGraph() cm = collections.defaultdict(int) span = 5