From 12ac1c9a6bd57daae1a0f0bff1c6ddfe0c3ec68d Mon Sep 17 00:00:00 2001 From: wyy Date: Sun, 23 Jun 2013 23:58:25 +0800 Subject: [PATCH] init --- .gitignore | 2 ++ Makefile | 46 ++++++++++++++++++++++++++++++++++++++++++++++ Trie.cpp | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ Trie.h | 23 +++++++++++++++++++++++ globals.h | 6 ++++++ main.cpp | 29 +++++++++++++++++++++++++++++ 6 files changed, 155 insertions(+) create mode 100644 Makefile create mode 100644 Trie.cpp create mode 100644 Trie.h create mode 100644 globals.h create mode 100644 main.cpp diff --git a/.gitignore b/.gitignore index 887a366..5a49d7d 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ tags *.d *.ut log +main +cppcommon/cmlib.a diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f49d634 --- /dev/null +++ b/Makefile @@ -0,0 +1,46 @@ +CC = g++ +CCOPT = -Wall -c +LINK = g++ +LINKOPT = +PACKA = ar +PARCAOPT = rc +DOLINK = $(LINK) $(LINKOPT) -o $@ $^ +SOURCES := $(wildcard *.cpp) +OBJS := $(patsubst %.cpp,%.o,$(SOURCES)) + +CMDIR = ./cppcommon/ +CMLIB = $(CMDIR)cmlib.a + + +# remove the objs after compilation +.INTERMEDIATE: +.PHONY: clean + +# This is a suffix rule +#.c.o: +%.o: %.cpp + $(CC) $(CCOPT) $< + +# Main Targets +all: main + + +main: $(OBJS) $(CMLIB) + $(DOLINK) + +$(CMLIB): $(CMDIR) + cd $(CMDIR) && $(MAKE) + +#unit test +Trie.ut: Trie.cpp Trie.h $(CMLIB) + g++ -o $@ $< -DTRIE_UT $(CMLIB) + +clean: + rm -f *.o *.ut $(CMLIB) main + +sinclude $(SOURCES:.cpp=.d) +%.d:%.cpp + @set -e; rm -f $@; \ + $(CC) -MM $< > $@.$$$$; \ + sed 's,\($*\).o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ + rm -f $@.$$$$ diff --git a/Trie.cpp b/Trie.cpp new file mode 100644 index 0000000..1fcbb13 --- /dev/null +++ b/Trie.cpp @@ -0,0 +1,49 @@ +#include "Trie.h" + +namespace CppJieba +{ + Trie::Trie() + { + } + + Trie::~Trie() + { + } + + bool Trie::init(const char* const filepath) + { + ifstream ifile(filepath); + string line; + vector vecBuf; + while(getline(ifile, line)) + { + vecBuf.clear(); + splitStr(line, vecBuf, " "); + PRINT_VECTOR(vecBuf); + getchar(); + uint16_t strbuf[1024]; + + size_t unilen = utf8ToUnicode(line.c_str(), line.size(), strbuf); + for(int i = 0; i < unilen; i++) + { + // printf("%x\n", strbuf[i]); + } + char utf8str[512]={0}; + unicodeToUtf8(strbuf, unilen, utf8str); + //cout< +#include +#include "cppcommon/str_functs.h" +#include "cppcommon/vec_functs.h" +#include "globals.h" + +namespace CppJieba +{ + using namespace CPPCOMMON; + using namespace std; + class Trie + { + public: + Trie(); + ~Trie(); + public: + bool init(const char* const filepath = DICT_FILE_PATH); + }; +} + +#endif diff --git a/globals.h b/globals.h new file mode 100644 index 0000000..70863d7 --- /dev/null +++ b/globals.h @@ -0,0 +1,6 @@ +#ifndef GLOBALS_H +#define GLOBALS_H + +const char * const DICT_FILE_PATH = "dict.txt"; + +#endif diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..6141e56 --- /dev/null +++ b/main.cpp @@ -0,0 +1,29 @@ +#include +#include +#include +#include +#include +#include "cppcommon/str_functs.h" +using namespace std; +using namespace CPPCOMMON; + +int main() +{ + ifstream ifile("dict.txt"); + string line; + while(getline(ifile, line)) + { + uint16_t strbuf[1024]; + + size_t unilen = utf8ToUnicode(line.c_str(), line.size(), strbuf); + for(int i = 0; i < unilen; i++) + { + // printf("%x\n", strbuf[i]); + } + char utf8str[512]={0}; + unicodeToUtf8(strbuf, unilen, utf8str); + //cout<