feat(data): 添加雪球接口获取财务数据并优化表格解析

- 新增 GetFinancialReportsByXUEQIU 函数,用于从雪球获取财务报告
- 优化 GetTableMarkdown 函数,改进表格解析逻辑
- 更新测试用例,验证新接口的正确性- 重构原有 GetFinancialReports函数,提高代码可维护性
This commit is contained in:
ArvinLovegood 2025-04-08 17:06:10 +08:00
parent 1a3c8b4fae
commit ece40d1fc0
3 changed files with 66 additions and 4 deletions

View File

@ -285,7 +285,7 @@ func (o OpenAi) NewChatStream(stock, stockCode, userQuestion string, sysPromptId
return
}
messages := GetFinancialReports(stockCode, o.CrawlTimeOut)
messages := GetFinancialReportsByXUEQIU(stockCode, o.CrawlTimeOut)
if messages == nil || len(*messages) == 0 {
logger.SugaredLogger.Error("获取股票财报失败")
// "***❗获取股票财报失败,分析结果可能不准确***<hr>"
@ -608,7 +608,41 @@ func SearchGuShiTongStockInfo(stock string, crawlTimeOut int64) *[]string {
}
return &messages
}
func GetFinancialReportsByXUEQIU(stockCode string, crawlTimeOut int64) *[]string {
if strutil.HasPrefixAny(stockCode, []string{"HK", "hk"}) {
stockCode = strings.ReplaceAll(stockCode, "hk", "")
stockCode = strings.ReplaceAll(stockCode, "HK", "")
}
if strutil.HasPrefixAny(stockCode, []string{"us", "gb_"}) {
stockCode = strings.ReplaceAll(stockCode, "us", "")
stockCode = strings.ReplaceAll(stockCode, "gb_", "")
}
url := fmt.Sprintf("https://xueqiu.com/snowman/S/%s/detail#/ZYCWZB", stockCode)
waitVisible := "div.tab-table-responsive table"
crawlerAPI := CrawlerApi{}
crawlerBaseInfo := CrawlerBaseInfo{
Name: "TestCrawler",
Description: "Test Crawler Description",
BaseUrl: "https://xueqiu.com",
Headers: map[string]string{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"},
}
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(crawlTimeOut)*time.Second)
defer cancel()
crawlerAPI = crawlerAPI.NewCrawler(ctx, crawlerBaseInfo)
var markdown strings.Builder
markdown.WriteString("\n## 财务数据:\n")
html, ok := crawlerAPI.GetHtml(url, waitVisible, true)
if !ok {
return &[]string{""}
}
document, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
logger.SugaredLogger.Error(err.Error())
}
GetTableMarkdown(document, waitVisible, &markdown)
return &[]string{markdown.String()}
}
func GetFinancialReports(stockCode string, crawlTimeOut int64) *[]string {
url := "https://emweb.securities.eastmoney.com/pc_hsf10/pages/index.html?type=web&code=" + stockCode + "#/cwfx"
waitVisible := "div.report_table table"

View File

@ -26,9 +26,13 @@ func TestGetTelegraph(t *testing.T) {
}
func TestGetFinancialReports(t *testing.T) {
db.Init("../../data/stock.db")
//GetFinancialReports("sz000802", 30)
//GetFinancialReports("hk00927", 30)
GetFinancialReports("gb_aapl", 30)
//GetFinancialReports("gb_aapl", 30)
GetFinancialReportsByXUEQIU("sz000802", 30)
GetFinancialReportsByXUEQIU("gb_aapl", 30)
GetFinancialReportsByXUEQIU("hk00927", 30)
}
@ -191,7 +195,7 @@ func TestReadFile(t *testing.T) {
func TestFollowedList(t *testing.T) {
db.Init("../../data/stock.db")
stockDataApi := NewStockDataApi()
stockDataApi.GetFollowList()
stockDataApi.GetFollowList(1)
}

View File

@ -64,7 +64,13 @@ func GetTableMarkdown(document *goquery.Document, waitVisible string, markdown *
document.Find(waitVisible).First().Find("tr").Each(func(index int, item *goquery.Selection) {
row := ""
item.Find("th, td").Each(func(i int, cell *goquery.Selection) {
text := cell.Text()
text := cell.Children().FilterFunction(func(i int, s *goquery.Selection) bool {
return isVisible(s)
}).Text()
if text == "" {
text = cell.Text()
}
row += "|" + text
})
row += "|"
@ -85,3 +91,21 @@ func GetTableMarkdown(document *goquery.Document, waitVisible string, markdown *
})
logger.SugaredLogger.Infof("\n%s", markdown.String())
}
// isVisible 函数用于判断元素是否可见
func isVisible(s *goquery.Selection) bool {
// 检查 display 属性
display, _ := s.Attr("style")
if strings.Contains(strings.ToLower(display), "display: none") {
return false
}
// 检查 visibility 属性
if strings.Contains(strings.ToLower(display), "visibility: hidden") {
return false
}
// 检查 opacity 属性
if strings.Contains(strings.ToLower(display), "opacity: 0") {
return false
}
return true
}