/************************************ * file enc : ASCII * author : wuyanyi09@gmail.com ************************************/ #ifndef CPPJIEBA_KEYWORDEXT_H #define CPPJIEBA_KEYWORDEXT_H #include "Segment.h" #include "structs.h" namespace CppJieba { class KeyWordExt { private: Segment _segment; vector _priorSubWords; set _stopWords; public: KeyWordExt(); ~KeyWordExt(); bool init(); bool loadSegDict(const char * const filePath); //load stopwords bool loadStopWords(const char * const filePath); //load prior words' prefix bool loadPriorSubWords(const char * const filePath); bool dispose(); public: bool extract(const string& title, vector& keyWordInfos, uint topN); bool extract(const vector& words, vector& keyWordInfos, uint topN); private: static bool _wordInfoCompare(const KeyWordInfo& a, const KeyWordInfo& b); private: bool _extract(vector& keyWordInfos, uint topN); bool _extTopN(vector& wordInfos, uint topN); private: //sort by word len - idf bool _sortWLIDF(vector& wordInfos); private: bool _filter(vector& ); bool _filterDuplicate(vector& ); bool _filterSingleWord(vector& ); bool _filterSubstr(vector& ); bool _filterStopWords(vector& ); private: bool _prioritizeSubWords(vector& wordInfos); bool _isContainSubWords(const string& word); }; } #endif