mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-10 00:01:33 +08:00
讓 jieba 可以自行指定 cache_file 產生的目錄位置,提供 jieba 在 Read-only file system 環境中運行
1.在呼叫 jieba.cut() 等相關動作前,先透過 jieba.tmp_dir 指定目錄位置 2.當應用環境為 Read-Only File System,可透過預先產生 cache_file 的機制,讓 jieba 正常運行 3.實際案例為 Google App Engine 和 Heroku,其中前者免費版僅 128MB 記憶體空間無法運行,後者免費環境有 512MB 可正常運行。發佈前,先在本地端產生 cache_file 後,連同 cache_file 一併發佈至 Google App Engine 或 Heroku 環境上即可使用。
This commit is contained in:
parent
4b4aff6d89
commit
62433a3205
@ -23,6 +23,7 @@ total = 0
|
||||
user_word_tag_tab = {}
|
||||
initialized = False
|
||||
pool = None
|
||||
tmp_dir = None
|
||||
|
||||
_curpath = os.path.normpath(
|
||||
os.path.join(os.getcwd(), os.path.dirname(__file__)))
|
||||
@ -61,7 +62,7 @@ def gen_pfdict(f_name):
|
||||
|
||||
|
||||
def initialize(dictionary=None):
|
||||
global FREQ, total, initialized, DICTIONARY, DICT_LOCK
|
||||
global FREQ, total, initialized, DICTIONARY, DICT_LOCK, tmp_dir
|
||||
if not dictionary:
|
||||
dictionary = DICTIONARY
|
||||
with DICT_LOCK:
|
||||
@ -73,9 +74,9 @@ def initialize(dictionary=None):
|
||||
t1 = time.time()
|
||||
# default dictionary
|
||||
if abs_path == os.path.join(_curpath, "dict.txt"):
|
||||
cache_file = os.path.join(tempfile.gettempdir(), "jieba.cache")
|
||||
cache_file = os.path.join(tmp_dir if tmp_dir else tempfile.gettempdir(),"jieba.cache")
|
||||
else: # custom dictionary
|
||||
cache_file = os.path.join(tempfile.gettempdir(), "jieba.u%s.cache" % md5(
|
||||
cache_file = os.path.join(tmp_dir if tmp_dir else tempfile.gettempdir(),"jieba.u%s.cache" % md5(
|
||||
abs_path.encode('utf-8', 'replace')).hexdigest())
|
||||
|
||||
load_from_cache_fail = True
|
||||
|
Loading…
x
Reference in New Issue
Block a user