mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
68 lines
1.4 KiB
C++
68 lines
1.4 KiB
C++
#include <iostream>
|
|
#include <fstream>
|
|
#include "../src/headers.h"
|
|
|
|
using namespace CppJieba;
|
|
|
|
|
|
void testKeyWordExt(const char * dictPath, const char * filePath)
|
|
{
|
|
KeyWordExt ext;
|
|
if(!ext.init(dictPath))
|
|
{
|
|
return;
|
|
}
|
|
|
|
ifstream ifile(filePath);
|
|
vector<KeyWordInfo> res;
|
|
string line;
|
|
while(getline(ifile, line))
|
|
{
|
|
res.clear();
|
|
if(!line.empty())
|
|
{
|
|
ext.extract(line, res, 20);
|
|
cout<<line<<'\n'<<joinWordInfos(res)<<endl;
|
|
}
|
|
|
|
}
|
|
ext.dispose();
|
|
}
|
|
|
|
const char * const DEFAULT_DICTPATH = "../dicts/jieba.dict.utf8";
|
|
|
|
int main(int argc, char ** argv)
|
|
{
|
|
if(2 > argc)
|
|
{
|
|
cout<<"usage: \n\t"<<argv[0]<<" [options] <filename>\n"
|
|
<<"options:\n"
|
|
<<"\t--dictpath\tIf not specified, the default is "<<DEFAULT_DICTPATH<<"\n"
|
|
<<"\t--encoding\tSupported encoding methods are [gbk, utf-8] for now. \n\t\t\tIf not specified, the default is utf-8.\n"
|
|
<<"examples:\n"
|
|
<<"\t"<<argv[0]<<" testlines.utf8 --encoding utf-8 --dictpath ../dicts/jieba.dict.utf8\n"
|
|
<<"\t"<<argv[0]<<" testlines.gbk --encoding gbk --dictpath ../dicts/jieba.dict.gbk\n"
|
|
<<endl;
|
|
return -1;
|
|
}
|
|
|
|
ArgvContext arg(argc, argv);
|
|
string dictPath = arg["--dictpath"];
|
|
string encoding = arg["--encoding"];
|
|
if("" == dictPath)
|
|
{
|
|
dictPath = DEFAULT_DICTPATH;
|
|
}
|
|
if("gbk" == encoding)
|
|
{
|
|
TransCode::setGbkEnc();
|
|
}
|
|
else
|
|
{
|
|
TransCode::setUtf8Enc();
|
|
}
|
|
|
|
testKeyWordExt(dictPath.c_str(), arg[1].c_str());
|
|
return 0;
|
|
}
|