mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-10 00:01:33 +08:00
API changes: * class jieba.Tokenizer, jieba.posseg.POSTokenizer * class jieba.analyse.TFIDF, jieba.analyse.TextRank * global functions are mapped to jieba.(posseg.)dt, the default (POS)Tokenizer * multiprocessing only works with jieba.(posseg.)dt * new lcut, lcut_for_search functions that returns a list * jieba.analyse.textrank now returns 20 items by default Tests: * added test_lock.py to test multithread locking * demo.py now contains most of the examples in README
43 lines
1.1 KiB
Python
43 lines
1.1 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import jieba
|
|
import threading
|
|
|
|
def inittokenizer(tokenizer, group):
|
|
print('===> Thread %s:%s started' % (group, threading.current_thread().ident))
|
|
tokenizer.initialize()
|
|
print('<=== Thread %s:%s finished' % (group, threading.current_thread().ident))
|
|
|
|
tokrs1 = [jieba.Tokenizer() for n in range(5)]
|
|
tokrs2 = [jieba.Tokenizer('../extra_dict/dict.txt.small') for n in range(5)]
|
|
|
|
thr1 = [threading.Thread(target=inittokenizer, args=(tokr, 1)) for tokr in tokrs1]
|
|
thr2 = [threading.Thread(target=inittokenizer, args=(tokr, 2)) for tokr in tokrs2]
|
|
for thr in thr1:
|
|
thr.start()
|
|
for thr in thr2:
|
|
thr.start()
|
|
for thr in thr1:
|
|
thr.join()
|
|
for thr in thr2:
|
|
thr.join()
|
|
|
|
del tokrs1, tokrs2
|
|
|
|
print('='*40)
|
|
|
|
tokr1 = jieba.Tokenizer()
|
|
tokr2 = jieba.Tokenizer('../extra_dict/dict.txt.small')
|
|
|
|
thr1 = [threading.Thread(target=inittokenizer, args=(tokr1, 1)) for n in range(5)]
|
|
thr2 = [threading.Thread(target=inittokenizer, args=(tokr2, 2)) for n in range(5)]
|
|
for thr in thr1:
|
|
thr.start()
|
|
for thr in thr2:
|
|
thr.start()
|
|
for thr in thr1:
|
|
thr.join()
|
|
for thr in thr2:
|
|
thr.join()
|