mirror of
https://github.com/ArvinLovegood/go-stock.git
synced 2025-07-19 00:00:09 +08:00
refactor(data):重构财务数据爬取功能
- 移除雪球爬虫测试,改为 sina 和 eastmoney 测试 - 新增eastmoney财务数据爬取支持 - 优化openai_api.go中的财务报告获取逻辑 - 使用通用爬虫API替代chromedp实现
This commit is contained in:
parent
5f8556cc3d
commit
f1e40e7d3b
@ -298,7 +298,7 @@ func TestUSSINA(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestXueqiu(t *testing.T) {
|
func TestSina(t *testing.T) {
|
||||||
db.Init("../../data/stock.db")
|
db.Init("../../data/stock.db")
|
||||||
url := "https://finance.sina.com.cn/realstock/company/sz002906/nc.shtml"
|
url := "https://finance.sina.com.cn/realstock/company/sz002906/nc.shtml"
|
||||||
crawlerAPI := CrawlerApi{}
|
crawlerAPI := CrawlerApi{}
|
||||||
@ -331,6 +331,34 @@ func TestXueqiu(t *testing.T) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestDC(t *testing.T) {
|
||||||
|
url := "https://emweb.securities.eastmoney.com/pc_hsf10/pages/index.html?type=web&code=sh600745#/cwfx"
|
||||||
|
db.Init("../../data/stock.db")
|
||||||
|
crawlerAPI := CrawlerApi{}
|
||||||
|
crawlerBaseInfo := CrawlerBaseInfo{
|
||||||
|
Name: "TestCrawler",
|
||||||
|
Description: "Test Crawler Description",
|
||||||
|
BaseUrl: "https://emweb.securities.eastmoney.com",
|
||||||
|
Headers: map[string]string{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"},
|
||||||
|
}
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Minute)
|
||||||
|
defer cancel()
|
||||||
|
crawlerAPI = crawlerAPI.NewCrawler(ctx, crawlerBaseInfo)
|
||||||
|
|
||||||
|
var markdown strings.Builder
|
||||||
|
markdown.WriteString("\n ## 财务数据:\n")
|
||||||
|
html, ok := crawlerAPI.GetHtml(url, "div.report_table table", false)
|
||||||
|
if !ok {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
document, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
||||||
|
if err != nil {
|
||||||
|
logger.SugaredLogger.Error(err.Error())
|
||||||
|
}
|
||||||
|
GetTableMarkdown(document, "div.report_table table", &markdown)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
type Tick struct {
|
type Tick struct {
|
||||||
Code int `json:"code"`
|
Code int `json:"code"`
|
||||||
Status string `json:"status"`
|
Status string `json:"status"`
|
||||||
|
@ -499,99 +499,48 @@ func SearchGuShiTongStockInfo(stock string, crawlTimeOut int64) *[]string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func GetFinancialReports(stockCode string, crawlTimeOut int64) *[]string {
|
func GetFinancialReports(stockCode string, crawlTimeOut int64) *[]string {
|
||||||
|
url := "https://emweb.securities.eastmoney.com/pc_hsf10/pages/index.html?type=web&code=" + stockCode + "#/cwfx"
|
||||||
|
waitVisible := "div.report_table table"
|
||||||
if strutil.HasPrefixAny(stockCode, []string{"HK", "hk"}) {
|
if strutil.HasPrefixAny(stockCode, []string{"HK", "hk"}) {
|
||||||
stockCode = strings.ReplaceAll(stockCode, "hk", "")
|
stockCode = strings.ReplaceAll(stockCode, "hk", "")
|
||||||
stockCode = strings.ReplaceAll(stockCode, "HK", "")
|
stockCode = strings.ReplaceAll(stockCode, "HK", "")
|
||||||
|
url = "https://emweb.securities.eastmoney.com/PC_HKF10/pages/home/index.html?code=" + stockCode + "&type=web&color=w#/NewFinancialAnalysis"
|
||||||
|
waitVisible = "div table.commonTable"
|
||||||
}
|
}
|
||||||
if strutil.HasPrefixAny(stockCode, []string{"us", "gb_"}) {
|
if strutil.HasPrefixAny(stockCode, []string{"us", "gb_"}) {
|
||||||
stockCode = strings.ReplaceAll(stockCode, "us", "")
|
stockCode = strings.ReplaceAll(stockCode, "us", "")
|
||||||
stockCode = strings.ReplaceAll(stockCode, "gb_", "")
|
stockCode = strings.ReplaceAll(stockCode, "gb_", "")
|
||||||
|
url = "https://emweb.securities.eastmoney.com/pc_usf10/pages/index.html?type=web&code=" + stockCode + "#/cwfx"
|
||||||
|
waitVisible = "div.zyzb_table_detail table"
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// 创建一个 chromedp 上下文
|
logger.SugaredLogger.Infof("GetFinancialReports搜索股票-%s: %s", stockCode, url)
|
||||||
timeoutCtx, timeoutCtxCancel := context.WithTimeout(context.Background(), time.Duration(crawlTimeOut)*time.Second)
|
|
||||||
defer timeoutCtxCancel()
|
|
||||||
var ctx context.Context
|
|
||||||
var cancel context.CancelFunc
|
|
||||||
path := getConfig().BrowserPath
|
|
||||||
logger.SugaredLogger.Infof("GetFinancialReports path:%s", path)
|
|
||||||
|
|
||||||
if path != "" {
|
db.Init("../../data/stock.db")
|
||||||
pctx, pcancel := chromedp.NewExecAllocator(
|
crawlerAPI := CrawlerApi{}
|
||||||
timeoutCtx,
|
crawlerBaseInfo := CrawlerBaseInfo{
|
||||||
chromedp.ExecPath(path),
|
Name: "TestCrawler",
|
||||||
chromedp.Flag("headless", true),
|
Description: "Test Crawler Description",
|
||||||
chromedp.Flag("disable-javascript", false),
|
BaseUrl: "https://emweb.securities.eastmoney.com",
|
||||||
chromedp.Flag("disable-gpu", true),
|
Headers: map[string]string{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"},
|
||||||
chromedp.UserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"),
|
|
||||||
chromedp.Flag("disable-background-networking", true),
|
|
||||||
chromedp.Flag("enable-features", "NetworkService,NetworkServiceInProcess"),
|
|
||||||
chromedp.Flag("disable-background-timer-throttling", true),
|
|
||||||
chromedp.Flag("disable-backgrounding-occluded-windows", true),
|
|
||||||
chromedp.Flag("disable-breakpad", true),
|
|
||||||
chromedp.Flag("disable-client-side-phishing-detection", true),
|
|
||||||
chromedp.Flag("disable-default-apps", true),
|
|
||||||
chromedp.Flag("disable-dev-shm-usage", true),
|
|
||||||
chromedp.Flag("disable-extensions", true),
|
|
||||||
chromedp.Flag("disable-features", "site-per-process,Translate,BlinkGenPropertyTrees"),
|
|
||||||
chromedp.Flag("disable-hang-monitor", true),
|
|
||||||
chromedp.Flag("disable-ipc-flooding-protection", true),
|
|
||||||
chromedp.Flag("disable-popup-blocking", true),
|
|
||||||
chromedp.Flag("disable-prompt-on-repost", true),
|
|
||||||
chromedp.Flag("disable-renderer-backgrounding", true),
|
|
||||||
chromedp.Flag("disable-sync", true),
|
|
||||||
chromedp.Flag("force-color-profile", "srgb"),
|
|
||||||
chromedp.Flag("metrics-recording-only", true),
|
|
||||||
chromedp.Flag("safebrowsing-disable-auto-update", true),
|
|
||||||
chromedp.Flag("enable-automation", true),
|
|
||||||
chromedp.Flag("password-store", "basic"),
|
|
||||||
chromedp.Flag("use-mock-keychain", true),
|
|
||||||
)
|
|
||||||
defer pcancel()
|
|
||||||
ctx, cancel = chromedp.NewContext(
|
|
||||||
pctx,
|
|
||||||
chromedp.WithLogf(logger.SugaredLogger.Infof),
|
|
||||||
chromedp.WithErrorf(logger.SugaredLogger.Errorf),
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
ctx, cancel = chromedp.NewContext(
|
|
||||||
timeoutCtx,
|
|
||||||
chromedp.WithLogf(logger.SugaredLogger.Infof),
|
|
||||||
chromedp.WithErrorf(logger.SugaredLogger.Errorf),
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(crawlTimeOut)*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
var htmlContent string
|
crawlerAPI = crawlerAPI.NewCrawler(ctx, crawlerBaseInfo)
|
||||||
url := fmt.Sprintf("https://xueqiu.com/snowman/S/%s/detail#/ZYCWZB", stockCode)
|
|
||||||
err := chromedp.Run(ctx,
|
var markdown strings.Builder
|
||||||
chromedp.Navigate(url),
|
markdown.WriteString("\n## 财务数据:\n")
|
||||||
// 等待页面加载完成,可以根据需要调整等待时间
|
html, ok := crawlerAPI.GetHtml(url, waitVisible, true)
|
||||||
chromedp.WaitVisible("table.table", chromedp.ByQuery),
|
if !ok {
|
||||||
chromedp.OuterHTML("html", &htmlContent, chromedp.ByQuery),
|
return &[]string{""}
|
||||||
)
|
}
|
||||||
|
document, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.SugaredLogger.Error(err.Error())
|
logger.SugaredLogger.Error(err.Error())
|
||||||
}
|
}
|
||||||
document, err := goquery.NewDocumentFromReader(strings.NewReader(htmlContent))
|
GetTableMarkdown(document, waitVisible, &markdown)
|
||||||
if err != nil {
|
return &[]string{markdown.String()}
|
||||||
logger.SugaredLogger.Error(err.Error())
|
|
||||||
return &[]string{}
|
|
||||||
}
|
|
||||||
var messages []string
|
|
||||||
document.Find("table tr").Each(func(i int, selection *goquery.Selection) {
|
|
||||||
tr := ""
|
|
||||||
selection.Find("th,td").Each(func(i int, selection *goquery.Selection) {
|
|
||||||
ret := selection.Find("p").First().Text()
|
|
||||||
if ret == "" {
|
|
||||||
ret = selection.Text()
|
|
||||||
}
|
|
||||||
text := strutil.RemoveNonPrintable(ret)
|
|
||||||
tr += text + " "
|
|
||||||
})
|
|
||||||
logger.SugaredLogger.Infof("%s", tr+" \n")
|
|
||||||
messages = append(messages, tr+" \n")
|
|
||||||
})
|
|
||||||
return &messages
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetTelegraphList(crawlTimeOut int64) *[]string {
|
func GetTelegraphList(crawlTimeOut int64) *[]string {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user