mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
update cppcommon && adding utf-8
This commit is contained in:
parent
c8d128a1d3
commit
9302f54e04
@ -5,7 +5,7 @@
|
||||
using namespace CppJieba;
|
||||
|
||||
Segment seg;
|
||||
bool init()
|
||||
bool init(const char * const filePath)
|
||||
{
|
||||
if(!seg.init())
|
||||
{
|
||||
@ -13,7 +13,7 @@ bool init()
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!seg.loadSegDict("../dicts/jieba.dict.gbk"))
|
||||
if(!seg.loadSegDict(filePath))
|
||||
{
|
||||
cout<<"seg loadDict failed."<<endl;
|
||||
return false;
|
||||
@ -49,16 +49,22 @@ bool dispose()
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
{
|
||||
//map<string, string> mpss;
|
||||
//getArgvMap(argc, argv, mpss);
|
||||
//string enc = getMap<string, string>(mpss, "--encoding", "");
|
||||
map<string, string> mpss;
|
||||
getArgvMap(argc, argv, mpss);
|
||||
string enc = getMap<string, string>(mpss, "--encoding", "");
|
||||
string dictPath = getMap<string, string>(mpss, "--dictpath", "../dicts/jieba.dict.gbk");
|
||||
|
||||
if( argc < 2)
|
||||
if(argc < 2)
|
||||
{
|
||||
cout<<"usage: "<<"\n\t"<<argv[0]<<" <filename>"<<endl;
|
||||
cout<<"usage: \n\t"<<argv[0]<<" <filename> [options]\n"
|
||||
<<"options:\n"
|
||||
<<"\t--dictpath\tIf is not specified, the default is ../dicts/jieba.dict.gbk\n"
|
||||
<<"\t--encoding\tSupported encoding methods are [gbk, utf-8] for now. \n\t\t\tIf is not specified, the default is gbk."
|
||||
<<endl;
|
||||
|
||||
return -1;
|
||||
}
|
||||
init();
|
||||
init(dictPath.c_str());
|
||||
run(argv[1]);
|
||||
dispose();
|
||||
return 0;
|
||||
|
@ -1,18 +1,45 @@
|
||||
/************************************
|
||||
* file enc : ASCII
|
||||
* author : wuyanyi09@gmail.com
|
||||
************************************/
|
||||
#include "TransCode.h"
|
||||
|
||||
namespace CppJieba
|
||||
{
|
||||
string TransCode::_enc;
|
||||
vector<string> TransCode::_encVec;
|
||||
bool TransCode::_isInitted = TransCode::init();
|
||||
|
||||
bool TransCode::init()
|
||||
{
|
||||
_encVec.push_back("utf-8");
|
||||
_encVec.push_back("gbk");
|
||||
_enc = _encVec[0];
|
||||
return true;
|
||||
}
|
||||
|
||||
TransCode::TransCode()
|
||||
{
|
||||
|
||||
}
|
||||
TransCode::~TransCode()
|
||||
{
|
||||
}
|
||||
|
||||
bool TransCode::setEnc(const string& enc)
|
||||
{
|
||||
if(_encVec.empty())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if(isInVec<string>(_encVec, enc))
|
||||
{
|
||||
_enc = enc;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TransCode::strToVec(const string& str, vector<uint16_t>& vec)
|
||||
{
|
||||
vec.clear();
|
||||
|
@ -15,9 +15,19 @@ namespace CppJieba
|
||||
using namespace CPPCOMMON;
|
||||
class TransCode
|
||||
{
|
||||
private:
|
||||
static string _enc;
|
||||
static vector<string> _encVec;
|
||||
static bool _isInitted;
|
||||
|
||||
public:
|
||||
static bool setEnc(const string& enc);
|
||||
|
||||
public:
|
||||
TransCode();
|
||||
~TransCode();
|
||||
public:
|
||||
static bool init();
|
||||
public:
|
||||
static bool strToVec(const string& str, vector<uint16_t>& vec);
|
||||
static string vecToStr(const vector<uint16_t>& vec);
|
||||
|
@ -23,6 +23,59 @@ namespace CPPCOMMON
|
||||
return true;
|
||||
}
|
||||
|
||||
ArgvContext::ArgvContext(int argc, const char* const * argv)
|
||||
{
|
||||
for(int i = 0; i < argc; i++)
|
||||
{
|
||||
if(strStartsWith(argv[i], "--"))
|
||||
{
|
||||
if(i + 1 < argc && !strStartsWith(argv[i+1], "--"))
|
||||
{
|
||||
_mpss[argv[i]] = argv[i+1];
|
||||
i++;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
_args.push_back(argv[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ArgvContext::~ArgvContext()
|
||||
{
|
||||
}
|
||||
|
||||
string ArgvContext::toString()
|
||||
{
|
||||
string res;
|
||||
res += string_format("[%s]\n", joinStr(_args, ", ").c_str());
|
||||
res += jsonMPSS(_mpss);
|
||||
return res;
|
||||
}
|
||||
|
||||
string ArgvContext::operator [](int i)
|
||||
{
|
||||
if(i < _args.size())
|
||||
{
|
||||
return _args[i];
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
string ArgvContext::operator [](const string& key)
|
||||
{
|
||||
map<string, string>::const_iterator it = _mpss.find(key);
|
||||
if(it != _mpss.end())
|
||||
{
|
||||
return it->second;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -33,9 +86,13 @@ using namespace CPPCOMMON;
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
map<string,string> argvMap;
|
||||
getArgvMap(argc, argv, argvMap);
|
||||
PRINT_MAPSS(argvMap);
|
||||
//map<string,string> argvMap;
|
||||
//getArgvMap(argc, argv, argvMap);
|
||||
//PRINT_MAPSS(argvMap);
|
||||
ArgvContext arg(argc, argv);
|
||||
cout<<arg.toString()<<endl;
|
||||
cout<<arg[1]<<endl;
|
||||
cout<<arg["--hehe"]<<endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -13,6 +13,20 @@ namespace CPPCOMMON
|
||||
{
|
||||
using namespace std;
|
||||
bool getArgvMap(int argc, const char* const* argv, map<string, string>& mpss);
|
||||
class ArgvContext
|
||||
{
|
||||
public :
|
||||
ArgvContext(int argc, const char* const * argv);
|
||||
~ArgvContext();
|
||||
public:
|
||||
string toString();
|
||||
string operator [](int i);
|
||||
string operator [](const string& key);
|
||||
private:
|
||||
vector<string> _args;
|
||||
map<string, string> _mpss;
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -14,6 +14,29 @@ namespace CPPCOMMON
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
inline string jsonMPSS(const map<string, string>& mpss)
|
||||
{
|
||||
if(mpss.empty())
|
||||
{
|
||||
return "{}";
|
||||
}
|
||||
string res("{");
|
||||
map<string, string>::const_iterator it = mpss.begin();
|
||||
res += it->first;
|
||||
res += ": ";
|
||||
res += it->second;
|
||||
it++;
|
||||
while(it != mpss.end())
|
||||
{
|
||||
res += ", ";
|
||||
res += it->first;
|
||||
res += ": ";
|
||||
res += it->second;
|
||||
it++;
|
||||
}
|
||||
res += "}";
|
||||
return res;
|
||||
}
|
||||
|
||||
template<class kT, class vT>
|
||||
void printMap(const map<kT, vT>& mp)
|
||||
|
Loading…
x
Reference in New Issue
Block a user