From ece40d1fc08d49df1101d082263adead6f147b61 Mon Sep 17 00:00:00 2001 From: ArvinLovegood Date: Tue, 8 Apr 2025 17:06:10 +0800 Subject: [PATCH] =?UTF-8?q?feat(data):=20=E6=B7=BB=E5=8A=A0=E9=9B=AA?= =?UTF-8?q?=E7=90=83=E6=8E=A5=E5=8F=A3=E8=8E=B7=E5=8F=96=E8=B4=A2=E5=8A=A1?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E5=B9=B6=E4=BC=98=E5=8C=96=E8=A1=A8=E6=A0=BC?= =?UTF-8?q?=E8=A7=A3=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 GetFinancialReportsByXUEQIU 函数,用于从雪球获取财务报告 - 优化 GetTableMarkdown 函数,改进表格解析逻辑 - 更新测试用例,验证新接口的正确性- 重构原有 GetFinancialReports函数,提高代码可维护性 --- backend/data/openai_api.go | 36 ++++++++++++++++++++++++++++- backend/data/stock_data_api_test.go | 8 +++++-- backend/data/utils.go | 26 ++++++++++++++++++++- 3 files changed, 66 insertions(+), 4 deletions(-) diff --git a/backend/data/openai_api.go b/backend/data/openai_api.go index 42b7fa5..4845d5f 100644 --- a/backend/data/openai_api.go +++ b/backend/data/openai_api.go @@ -285,7 +285,7 @@ func (o OpenAi) NewChatStream(stock, stockCode, userQuestion string, sysPromptId return } - messages := GetFinancialReports(stockCode, o.CrawlTimeOut) + messages := GetFinancialReportsByXUEQIU(stockCode, o.CrawlTimeOut) if messages == nil || len(*messages) == 0 { logger.SugaredLogger.Error("获取股票财报失败") // "***❗获取股票财报失败,分析结果可能不准确***
" @@ -608,7 +608,41 @@ func SearchGuShiTongStockInfo(stock string, crawlTimeOut int64) *[]string { } return &messages } +func GetFinancialReportsByXUEQIU(stockCode string, crawlTimeOut int64) *[]string { + if strutil.HasPrefixAny(stockCode, []string{"HK", "hk"}) { + stockCode = strings.ReplaceAll(stockCode, "hk", "") + stockCode = strings.ReplaceAll(stockCode, "HK", "") + } + if strutil.HasPrefixAny(stockCode, []string{"us", "gb_"}) { + stockCode = strings.ReplaceAll(stockCode, "us", "") + stockCode = strings.ReplaceAll(stockCode, "gb_", "") + } + url := fmt.Sprintf("https://xueqiu.com/snowman/S/%s/detail#/ZYCWZB", stockCode) + waitVisible := "div.tab-table-responsive table" + crawlerAPI := CrawlerApi{} + crawlerBaseInfo := CrawlerBaseInfo{ + Name: "TestCrawler", + Description: "Test Crawler Description", + BaseUrl: "https://xueqiu.com", + Headers: map[string]string{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"}, + } + ctx, cancel := context.WithTimeout(context.Background(), time.Duration(crawlTimeOut)*time.Second) + defer cancel() + crawlerAPI = crawlerAPI.NewCrawler(ctx, crawlerBaseInfo) + var markdown strings.Builder + markdown.WriteString("\n## 财务数据:\n") + html, ok := crawlerAPI.GetHtml(url, waitVisible, true) + if !ok { + return &[]string{""} + } + document, err := goquery.NewDocumentFromReader(strings.NewReader(html)) + if err != nil { + logger.SugaredLogger.Error(err.Error()) + } + GetTableMarkdown(document, waitVisible, &markdown) + return &[]string{markdown.String()} +} func GetFinancialReports(stockCode string, crawlTimeOut int64) *[]string { url := "https://emweb.securities.eastmoney.com/pc_hsf10/pages/index.html?type=web&code=" + stockCode + "#/cwfx" waitVisible := "div.report_table table" diff --git a/backend/data/stock_data_api_test.go b/backend/data/stock_data_api_test.go index 7acc573..e318c0a 100644 --- a/backend/data/stock_data_api_test.go +++ b/backend/data/stock_data_api_test.go @@ -26,9 +26,13 @@ func TestGetTelegraph(t *testing.T) { } func TestGetFinancialReports(t *testing.T) { + db.Init("../../data/stock.db") //GetFinancialReports("sz000802", 30) //GetFinancialReports("hk00927", 30) - GetFinancialReports("gb_aapl", 30) + //GetFinancialReports("gb_aapl", 30) + GetFinancialReportsByXUEQIU("sz000802", 30) + GetFinancialReportsByXUEQIU("gb_aapl", 30) + GetFinancialReportsByXUEQIU("hk00927", 30) } @@ -191,7 +195,7 @@ func TestReadFile(t *testing.T) { func TestFollowedList(t *testing.T) { db.Init("../../data/stock.db") stockDataApi := NewStockDataApi() - stockDataApi.GetFollowList() + stockDataApi.GetFollowList(1) } diff --git a/backend/data/utils.go b/backend/data/utils.go index 6ffbb4a..cceac90 100644 --- a/backend/data/utils.go +++ b/backend/data/utils.go @@ -64,7 +64,13 @@ func GetTableMarkdown(document *goquery.Document, waitVisible string, markdown * document.Find(waitVisible).First().Find("tr").Each(func(index int, item *goquery.Selection) { row := "" item.Find("th, td").Each(func(i int, cell *goquery.Selection) { - text := cell.Text() + text := cell.Children().FilterFunction(func(i int, s *goquery.Selection) bool { + return isVisible(s) + }).Text() + if text == "" { + text = cell.Text() + } + row += "|" + text }) row += "|" @@ -85,3 +91,21 @@ func GetTableMarkdown(document *goquery.Document, waitVisible string, markdown * }) logger.SugaredLogger.Infof("\n%s", markdown.String()) } + +// isVisible 函数用于判断元素是否可见 +func isVisible(s *goquery.Selection) bool { + // 检查 display 属性 + display, _ := s.Attr("style") + if strings.Contains(strings.ToLower(display), "display: none") { + return false + } + // 检查 visibility 属性 + if strings.Contains(strings.ToLower(display), "visibility: hidden") { + return false + } + // 检查 opacity 属性 + if strings.Contains(strings.ToLower(display), "opacity: 0") { + return false + } + return true +}