mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-24 00:00:05 +08:00
add a sample script about tags extraction
This commit is contained in:
parent
3fe92f8520
commit
15a5a2d50e
33
test/extract_tags.py
Normal file
33
test/extract_tags.py
Normal file
@ -0,0 +1,33 @@
|
||||
import sys
|
||||
sys.path.append('../')
|
||||
|
||||
import jieba
|
||||
import jieba.analyse
|
||||
from optparse import OptionParser
|
||||
|
||||
USAGE ="usage: python extract_tags.py [file name] -k [top k]"
|
||||
|
||||
parser = OptionParser(USAGE)
|
||||
parser.add_option("-k",dest="topK")
|
||||
opt, args = parser.parse_args()
|
||||
|
||||
|
||||
if len(args) <1:
|
||||
print USAGE
|
||||
sys.exit(1)
|
||||
|
||||
file_name = args[0]
|
||||
|
||||
if opt.topK==None:
|
||||
topK=10
|
||||
else:
|
||||
topK = int(opt.topK)
|
||||
|
||||
|
||||
content = open(file_name,'rb').read()
|
||||
|
||||
tags = jieba.analyse.extract_tags(content,topK=topK)
|
||||
|
||||
print ",".join(tags)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user