From 0bd30975f3777aee2c921cb293283e0a59a09e47 Mon Sep 17 00:00:00 2001 From: msojocs Date: Sun, 25 Jun 2023 15:31:44 +0800 Subject: [PATCH] =?UTF-8?q?perf:=20=E8=A1=A5=E5=85=A8Parser::DOM?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/include/wxml.h | 23 +++++--- src/wxml/dom_lib/parser.cpp | 107 +++++++++++++++++++++++++++++++++--- 2 files changed, 114 insertions(+), 16 deletions(-) diff --git a/src/include/wxml.h b/src/include/wxml.h index 412b905..8e8f1ae 100644 --- a/src/include/wxml.h +++ b/src/include/wxml.h @@ -65,8 +65,6 @@ namespace WXML private: /* data */ std::string offset_0; // content - int offset_16 = 0; // pos - int offset_20 = 0; // size bool offset_28; // hasCache toString std::string offset_32; // chcheData toString bool offset_84; // hasCache attrContent @@ -74,6 +72,9 @@ namespace WXML public: int offset_8 = 0; // ??? int offset_12 = 0; // ??? + int offset_16 = 0; // pos + int offset_20 = 0; // size + int offset_24 = 0; // ??? int offset_40 = 0; // AttrsCompartor用到,怎么来不知道 int offset_56 = 0; // ??? -3, -1 std::string offset_60; // ??? @@ -220,7 +221,6 @@ namespace WXML bool offset_28; std::map offset_48; // std::string offset_52; - WXML::DOMLib::Token offset_84; // token int offset_92; // pos1 int offset_96; // pos2 int offset_104; // len @@ -231,8 +231,10 @@ namespace WXML public: std::string offset_0; // type std::map offset_12; - int offset_24; // ??? + std::string offset_24; // ??? + std::vector> offset_72; // int offset_256; // ??? + WXML::DOMLib::Token offset_84; // token WXMLDom(/* args */); ~WXMLDom(); std::string Error( @@ -312,15 +314,18 @@ namespace WXML private: /* data */ std::shared_ptr dom; - std::deque dequeStr; std::vector tokenList; - int peekIndex = 0; // _DWORD * a1[25], *((_DWORD *)a1 + 25) int offset_4; - int offset_8; // _DWORD * a1[8] + std::deque dequeStr;// offset_8 + int offset_16; + int offset_32; // _DWORD * a1[8] + int offset_36; // + int offset_40; + int offset_44; + std::deque> dequeDom; // offset_48 int a1 + 48, _DWORD * a1 + 12 + int peekIndex = 0; // offset_100 int offset_128; std::string filePath; - std::deque> dequeDom; // int a1 + 48, _DWORD * a1 + 12 - int v8; public: Parser(/* args */); ~Parser(); diff --git a/src/wxml/dom_lib/parser.cpp b/src/wxml/dom_lib/parser.cpp index 26b289f..683b2de 100644 --- a/src/wxml/dom_lib/parser.cpp +++ b/src/wxml/dom_lib/parser.cpp @@ -110,10 +110,9 @@ namespace WXML void Parser::DOM() { auto token = this->Peek(); - int v44; - if (v44) + if (token.offset_24) { - if (v44 == 4) + if (token.offset_24 == 4) { return; } @@ -130,14 +129,108 @@ namespace WXML throw this->Error("unexpected tag", 0); } this->peekIndex++; - // if (this->offset_8 == ) + if (this->offset_32 == this->offset_40 - 24) + { + this->dequeStr.push_back(tag); + } + else + { + this->offset_32 += 24; + } std::shared_ptr domPtr; + domPtr->offset_0.assign(tag); + domPtr->offset_24.assign(domPtr->offset_0); + domPtr->offset_84 = token; auto v8 = this->dequeDom.back(); // push_back + v8->offset_72.push_back(domPtr); // push_back + this->dequeDom.push_back(domPtr); this->ATTR_LIST(); - auto v9 = this->Peek(); + auto v43 = this->Peek(); + if(!token.offset_24) + { + if (v43.IsMatch(">")) + { + this->peekIndex++; + this->DOMS(); + auto v11 = this->Peek(); + if (/*v48[5] || */!v11.IsMatch("Error("unexpected token", &token); + throw "ParseException"; + } + this->peekIndex++; + auto v47 = this->Peek(); + auto v13 = this->offset_32; + std::string v40 = ""; + if (this->offset_16 == v13) + { + } + else + { + // if (v13 == this->offset_36) + // v13 = this->offset_44 + v40 = ""; + } + if (!v47.IsMatch(&v40[0])) + { + std::string msg = "expect end-tag `" + v40; + msg += v40 + "`."; + auto err = this->Error(&msg[0], 0); + throw err; + } + this->peekIndex++; + this->dequeStr.pop_back(); + this->dequeDom.pop_back(); + auto v47 = this->Peek(); + if (!v47.IsMatch(">")) + { + throw this->Error("unexpected token", 0); + } + this->peekIndex++; + return; + } + if (v43.IsMatch("/>")) + { + this->peekIndex++; + this->dequeDom.pop_back(); + this->dequeStr.pop_back(); + return; + } + } + throw this->Error("unexpected token", 0); + } + if (token.IsMatch("offset_32 == this->offset_16) + { + throw this->Error("get tag end without start", 0); + } + return; + } + auto v16 = token.GetContent(); + this->peekIndex++; + if (v16.length() > 0) + { + int v17 = 0; + // TODO v18 = *v16 + v43[4]; + char* v18 = &v16[0] + token.offset_16; + while(token.offset_20 > v17) + { + int v19 = *(uint8_t *)(v18 + v17) - 9; + if (v19 > 0x17u || ((0x800013u >> v19) & 1) == 0) + { + auto v45 = this->dequeDom.back(); + std::shared_ptr dom; + dom->offset_0 = "TEXTNODE"; + dom->offset_84 = token; + v45->offset_72.push_back(dom); + break; + } + ++v17; + } } } } @@ -157,11 +250,11 @@ namespace WXML { this->DOM(); WXML::DOMLib::Token token = this->Peek(); - if (this->v8 == 4) + if (token.offset_24 == 4) break; if (token.IsMatch("offset_4 == this->offset_8) + if (this->offset_4 == this->offset_32) { throw WXML::DOMLib::Parser::Error("get tag end without start", nullptr); }