mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-10 00:01:33 +08:00
36 lines
1.4 KiB
Python
36 lines
1.4 KiB
Python
"""Jieba command line interface."""
|
|
import sys
|
|
import jieba
|
|
from argparse import ArgumentParser
|
|
|
|
parser = ArgumentParser(usage="%s -m jieba [options] filename" % sys.executable, description="Jieba command line interface.", version="Jieba " + jieba.__version__, epilog="If no filename specified, use STDIN instead.")
|
|
parser.add_argument("-d", "--delimiter", metavar="DELIM", default=' / ',
|
|
nargs='?', const=' ',
|
|
help="use DELIM instead of ' / ' for word delimiter; use a space if it is without DELIM")
|
|
parser.add_argument("-a", "--cut-all",
|
|
action="store_true", dest="cutall", default=False,
|
|
help="full pattern cutting")
|
|
parser.add_argument("-n", "--no-hmm", dest="hmm", action="store_false",
|
|
default=True, help="don't use the Hidden Markov Model")
|
|
parser.add_argument("-q", "--quiet", action="store_true", default=False,
|
|
help="don't print loading messages to stderr")
|
|
parser.add_argument("filename", nargs='?', help="input file")
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.quiet:
|
|
jieba.setLogLevel(60)
|
|
delim = unicode(args.delimiter)
|
|
cutall = args.cutall
|
|
hmm = args.hmm
|
|
fp = open(args.filename, 'r') if args.filename else sys.stdin
|
|
|
|
jieba.initialize()
|
|
ln = fp.readline()
|
|
while ln:
|
|
l = ln.rstrip('\r\n')
|
|
print(delim.join(jieba.cut(ln.rstrip('\r\n'), cutall, hmm)).encode('utf-8'))
|
|
ln = fp.readline()
|
|
|
|
fp.close()
|