#ifndef CPPJIEBA_LEVELSEGMENT_H #define CPPJIEBA_LEVELSEGMENT_H #include "MPSegment.hpp" namespace cppjieba { class LevelSegment: public SegmentBase{ public: LevelSegment(const string& dictPath, const string& userDictPath = "") : mpSeg_(dictPath, userDictPath) { LogInfo("LevelSegment init"); } LevelSegment(const DictTrie* dictTrie) : mpSeg_(dictTrie) { } ~LevelSegment() { } void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector >& res) const { res.clear(); vector words; vector smallerWords; words.reserve(end - begin); mpSeg_.cut(begin, end, words); smallerWords.reserve(words.size()); res.reserve(words.size()); size_t level = 0; while (!words.empty()) { smallerWords.clear(); for (size_t i = 0; i < words.size(); i++) { if (words[i].size() >= 3) { size_t len = words[i].size() - 1; mpSeg_.cut(words[i].begin(), words[i].end(), smallerWords, len); // buffer.push_back without clear } if (words[i].size() > 1) { res.push_back(pair(words[i], level)); } } words.swap(smallerWords); level++; } } void cut(const string& sentence, vector >& words) const { words.clear(); Unicode unicode; TransCode::decode(sentence, unicode); vector > unicodeWords; cut(unicode.begin(), unicode.end(), unicodeWords); words.resize(unicodeWords.size()); for (size_t i = 0; i < words.size(); i++) { TransCode::encode(unicodeWords[i].first, words[i].first); words[i].second = unicodeWords[i].second; } } bool cut(const string& sentence, vector& res) const { vector > words; cut(sentence, words); res.clear(); res.reserve(words.size()); for (size_t i = 0; i < words.size(); i++) { res.push_back(words[i].first); } return true; } private: MPSegment mpSeg_; }; // class LevelSegment } // namespace cppjieba #endif // CPPJIEBA_LEVELSEGMENT_H