add a sample script about tags extraction

This commit is contained in:
Sun Junyi 2012-10-16 13:25:35 +08:00
parent 3fe92f8520
commit 15a5a2d50e
4 changed files with 227 additions and 194 deletions

33
test/extract_tags.py Normal file
View File

@ -0,0 +1,33 @@
import sys
sys.path.append('../')
import jieba
import jieba.analyse
from optparse import OptionParser
USAGE ="usage: python extract_tags.py [file name] -k [top k]"
parser = OptionParser(USAGE)
parser.add_option("-k",dest="topK")
opt, args = parser.parse_args()
if len(args) <1:
print USAGE
sys.exit(1)
file_name = args[0]
if opt.topK==None:
topK=10
else:
topK = int(opt.topK)
content = open(file_name,'rb').read()
tags = jieba.analyse.extract_tags(content,topK=topK)
print ",".join(tags)