mirror of
https://github.com/ArvinLovegood/go-stock.git
synced 2025-07-19 00:00:09 +08:00
refactor(stock):重构股票价格数据爬取功能
- 移除了不必要的 chromedp Cancel 调用 - 新增了对雪球网的爬虫测试用例 - 修改了股票价格信息的爬取逻辑,使用新浪财经作为数据源 - 优化了爬取结果的 Markdown 格式输出 - 删除了未使用的 validator包引用
This commit is contained in:
parent
34e2de07fb
commit
5f8556cc3d
@ -68,7 +68,7 @@ func (c *CrawlerApi) GetHtml(url, waitVisible string, headless bool) (string, bo
|
||||
defer pcancel()
|
||||
ctx, cancel := chromedp.NewContext(pctx, chromedp.WithLogf(logger.SugaredLogger.Infof))
|
||||
defer cancel()
|
||||
defer chromedp.Cancel(ctx)
|
||||
//defer chromedp.Cancel(ctx)
|
||||
err := chromedp.Run(ctx, chromedp.Navigate(url),
|
||||
chromedp.WaitVisible(waitVisible, chromedp.ByQuery), // 确保 元素可见
|
||||
chromedp.WaitReady(waitVisible, chromedp.ByQuery), // 确保 元素准备好
|
||||
@ -81,7 +81,7 @@ func (c *CrawlerApi) GetHtml(url, waitVisible string, headless bool) (string, bo
|
||||
} else {
|
||||
ctx, cancel := chromedp.NewContext(c.crawlerCtx, chromedp.WithLogf(logger.SugaredLogger.Infof))
|
||||
defer cancel()
|
||||
defer chromedp.Cancel(ctx)
|
||||
//defer chromedp.Cancel(ctx)
|
||||
err := chromedp.Run(ctx, chromedp.Navigate(url), chromedp.WaitVisible("body"), chromedp.InnerHTML("body", &htmlContent))
|
||||
if err != nil {
|
||||
logger.SugaredLogger.Error(err.Error())
|
||||
@ -199,7 +199,7 @@ func (c *CrawlerApi) GetHtmlWithActions(actions *[]chromedp.Action, headless boo
|
||||
defer pcancel()
|
||||
ctx, cancel := chromedp.NewContext(pctx, chromedp.WithLogf(logger.SugaredLogger.Infof))
|
||||
defer cancel()
|
||||
defer chromedp.Cancel(ctx)
|
||||
//defer chromedp.Cancel(ctx)
|
||||
|
||||
err := chromedp.Run(ctx, *actions...)
|
||||
if err != nil {
|
||||
@ -209,7 +209,7 @@ func (c *CrawlerApi) GetHtmlWithActions(actions *[]chromedp.Action, headless boo
|
||||
} else {
|
||||
ctx, cancel := chromedp.NewContext(c.crawlerCtx, chromedp.WithLogf(logger.SugaredLogger.Infof))
|
||||
defer cancel()
|
||||
defer chromedp.Cancel(ctx)
|
||||
//defer chromedp.Cancel(ctx)
|
||||
|
||||
err := chromedp.Run(ctx, *actions...)
|
||||
if err != nil {
|
||||
|
@ -298,6 +298,39 @@ func TestUSSINA(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func TestXueqiu(t *testing.T) {
|
||||
db.Init("../../data/stock.db")
|
||||
url := "https://finance.sina.com.cn/realstock/company/sz002906/nc.shtml"
|
||||
crawlerAPI := CrawlerApi{}
|
||||
crawlerBaseInfo := CrawlerBaseInfo{
|
||||
Name: "TestCrawler",
|
||||
Description: "Test Crawler Description",
|
||||
BaseUrl: "https://finance.sina.com.cn",
|
||||
Headers: map[string]string{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"},
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Minute)
|
||||
defer cancel()
|
||||
crawlerAPI = crawlerAPI.NewCrawler(ctx, crawlerBaseInfo)
|
||||
html, ok := crawlerAPI.GetHtml(url, "div#hqDetails table", true)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
document, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
||||
if err != nil {
|
||||
logger.SugaredLogger.Error(err.Error())
|
||||
}
|
||||
|
||||
//price
|
||||
price := strutil.RemoveWhiteSpace(document.Find("div#price").First().Text(), false)
|
||||
hqTime := strutil.RemoveWhiteSpace(document.Find("div#hqTime").First().Text(), false)
|
||||
|
||||
var markdown strings.Builder
|
||||
markdown.WriteString("\n ## 当前股票数据:\n")
|
||||
markdown.WriteString(fmt.Sprintf("### 当前股价:%s 时间:%s\n", price, hqTime))
|
||||
GetTableMarkdown(document, "div#hqDetails table", &markdown)
|
||||
|
||||
}
|
||||
|
||||
type Tick struct {
|
||||
Code int `json:"code"`
|
||||
Status string `json:"status"`
|
||||
|
@ -196,7 +196,7 @@ func (o OpenAi) NewChatStream(stock, stockCode, userQuestion string, sysPromptId
|
||||
}
|
||||
msg = append(msg, map[string]interface{}{
|
||||
"role": "user",
|
||||
"content": stock + time.Now().Format(time.DateOnly) + "价格:" + price,
|
||||
"content": "\n## " + stock + "股价数据:\n" + price,
|
||||
})
|
||||
}()
|
||||
|
||||
|
@ -15,7 +15,6 @@ import (
|
||||
"github.com/duke-git/lancet/v2/convertor"
|
||||
"github.com/duke-git/lancet/v2/slice"
|
||||
"github.com/duke-git/lancet/v2/strutil"
|
||||
"github.com/duke-git/lancet/v2/validator"
|
||||
"github.com/go-resty/resty/v2"
|
||||
"go-stock/backend/db"
|
||||
"go-stock/backend/logger"
|
||||
@ -770,12 +769,12 @@ func GetRealTimeStockPriceInfo(ctx context.Context, stockCode string) (price, pr
|
||||
func SearchStockPriceInfo(stockCode string, crawlTimeOut int64) *[]string {
|
||||
|
||||
if strutil.HasPrefixAny(stockCode, []string{"SZ", "SH", "sh", "sz", "bj"}) {
|
||||
if strutil.HasPrefixAny(stockCode, []string{"bj", "BJ"}) {
|
||||
stockCode = strutil.ReplaceWithMap(stockCode, map[string]string{
|
||||
"bj": "",
|
||||
"BJ": "",
|
||||
}) + ".BJ"
|
||||
}
|
||||
//if strutil.HasPrefixAny(stockCode, []string{"bj", "BJ"}) {
|
||||
// stockCode = strutil.ReplaceWithMap(stockCode, map[string]string{
|
||||
// "bj": "",
|
||||
// "BJ": "",
|
||||
// }) + ".BJ"
|
||||
//}
|
||||
|
||||
return getSHSZStockPriceInfo(stockCode, crawlTimeOut)
|
||||
}
|
||||
@ -893,90 +892,36 @@ func getHKStockPriceInfo(stockCode string, crawlTimeOut int64) *[]string {
|
||||
}
|
||||
|
||||
func getSHSZStockPriceInfo(stockCode string, crawlTimeOut int64) *[]string {
|
||||
var messages []string
|
||||
url := "https://www.cls.cn/stock?code=" + stockCode
|
||||
// 创建一个 chromedp 上下文
|
||||
timeoutCtx, timeoutCtxCancel := context.WithTimeout(context.Background(), time.Duration(crawlTimeOut)*time.Second)
|
||||
defer timeoutCtxCancel()
|
||||
var ctx context.Context
|
||||
var cancel context.CancelFunc
|
||||
path := getConfig().BrowserPath
|
||||
logger.SugaredLogger.Infof("SearchStockPriceInfo BrowserPath:%s", path)
|
||||
if path != "" {
|
||||
pctx, pcancel := chromedp.NewExecAllocator(
|
||||
timeoutCtx,
|
||||
chromedp.ExecPath(path),
|
||||
chromedp.Flag("headless", true),
|
||||
)
|
||||
defer pcancel()
|
||||
ctx, cancel = chromedp.NewContext(
|
||||
pctx,
|
||||
chromedp.WithLogf(logger.SugaredLogger.Infof),
|
||||
chromedp.WithErrorf(logger.SugaredLogger.Errorf),
|
||||
)
|
||||
} else {
|
||||
ctx, cancel = chromedp.NewContext(
|
||||
timeoutCtx,
|
||||
chromedp.WithLogf(logger.SugaredLogger.Infof),
|
||||
chromedp.WithErrorf(logger.SugaredLogger.Errorf),
|
||||
)
|
||||
url := "https://finance.sina.com.cn/realstock/company/" + stockCode + "/nc.shtml"
|
||||
crawlerAPI := CrawlerApi{}
|
||||
crawlerBaseInfo := CrawlerBaseInfo{
|
||||
Name: "TestCrawler",
|
||||
Description: "Test Crawler Description",
|
||||
BaseUrl: "https://finance.sina.com.cn",
|
||||
Headers: map[string]string{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"},
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(crawlTimeOut)*time.Second)
|
||||
defer cancel()
|
||||
|
||||
var htmlContent string
|
||||
|
||||
var tasks chromedp.Tasks
|
||||
tasks = append(tasks, chromedp.Navigate(url))
|
||||
tasks = append(tasks, chromedp.WaitVisible("div.quote-change-box", chromedp.ByQuery))
|
||||
tasks = append(tasks, chromedp.ActionFunc(func(ctx context.Context) error {
|
||||
price, _ := FetchPrice(ctx)
|
||||
logger.SugaredLogger.Infof("price:%s", price)
|
||||
return nil
|
||||
}))
|
||||
tasks = append(tasks, chromedp.OuterHTML("html", &htmlContent, chromedp.ByQuery))
|
||||
|
||||
err := chromedp.Run(ctx, tasks)
|
||||
crawlerAPI = crawlerAPI.NewCrawler(ctx, crawlerBaseInfo)
|
||||
html, ok := crawlerAPI.GetHtml(url, "div#hqDetails table", true)
|
||||
if !ok {
|
||||
return &[]string{""}
|
||||
}
|
||||
document, err := goquery.NewDocumentFromReader(strings.NewReader(html))
|
||||
if err != nil {
|
||||
logger.SugaredLogger.Error(err.Error())
|
||||
return &[]string{}
|
||||
}
|
||||
document, err := goquery.NewDocumentFromReader(strings.NewReader(htmlContent))
|
||||
if err != nil {
|
||||
logger.SugaredLogger.Error(err.Error())
|
||||
return &[]string{}
|
||||
}
|
||||
|
||||
document.Find("div.quote-text-border,span.quote-price").Each(func(i int, selection *goquery.Selection) {
|
||||
text := strutil.RemoveNonPrintable(selection.Text())
|
||||
logger.SugaredLogger.Info(text)
|
||||
messages = append(messages, text)
|
||||
//price
|
||||
price := strutil.RemoveWhiteSpace(document.Find("div#price").First().Text(), false)
|
||||
hqTime := strutil.RemoveWhiteSpace(document.Find("div#hqTime").First().Text(), false)
|
||||
|
||||
})
|
||||
return &messages
|
||||
var markdown strings.Builder
|
||||
markdown.WriteString(fmt.Sprintf("### 当前股价:%s 时间:%s\n", price, hqTime))
|
||||
GetTableMarkdown(document, "div#hqDetails table", &markdown)
|
||||
return &[]string{markdown.String()}
|
||||
}
|
||||
func FetchPrice(ctx context.Context) (string, error) {
|
||||
var price string
|
||||
timeout := time.After(10 * time.Second) // 设置超时时间为10秒
|
||||
ticker := time.NewTicker(1 * time.Second) // 每秒尝试一次
|
||||
defer ticker.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-timeout:
|
||||
return "", fmt.Errorf("timeout reached while fetching price")
|
||||
case <-ticker.C:
|
||||
err := chromedp.Run(ctx, chromedp.Text("span.quote-price", &price, chromedp.BySearch))
|
||||
if err != nil {
|
||||
logger.SugaredLogger.Errorf("failed to fetch price: %v", err)
|
||||
continue
|
||||
}
|
||||
logger.SugaredLogger.Infof("price:%s", price)
|
||||
if price != "" && validator.IsNumberStr(price) {
|
||||
return price, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
func SearchStockInfo(stock, msgType string, crawlTimeOut int64) *[]string {
|
||||
crawler := CrawlerApi{
|
||||
crawlerBaseInfo: CrawlerBaseInfo{
|
||||
|
@ -49,7 +49,7 @@ func TestSearchStockPriceInfo(t *testing.T) {
|
||||
db.Init("../../data/stock.db")
|
||||
//SearchStockPriceInfo("hk06030", 30)
|
||||
//SearchStockPriceInfo("sh600171", 30)
|
||||
//SearchStockPriceInfo("gb_aapl", 30)
|
||||
SearchStockPriceInfo("gb_aapl", 30)
|
||||
SearchStockPriceInfo("bj430198", 30)
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,8 @@
|
||||
package data
|
||||
|
||||
import (
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"go-stock/backend/logger"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
@ -57,3 +59,29 @@ func ConvertTushareCodeToStockCode(stockCode string) string {
|
||||
stockCode = strings.ToLower(RemoveAllDigitChar(stockCode)) + RemoveAllNonDigitChar(stockCode)
|
||||
return strings.ReplaceAll(stockCode, ".", "")
|
||||
}
|
||||
|
||||
func GetTableMarkdown(document *goquery.Document, waitVisible string, markdown *strings.Builder) {
|
||||
document.Find(waitVisible).First().Find("tr").Each(func(index int, item *goquery.Selection) {
|
||||
row := ""
|
||||
item.Find("th, td").Each(func(i int, cell *goquery.Selection) {
|
||||
text := cell.Text()
|
||||
row += "|" + text
|
||||
})
|
||||
row += "|"
|
||||
|
||||
if index == 0 {
|
||||
// Header row
|
||||
markdown.WriteString(row + "\n")
|
||||
// Separator row
|
||||
separator := ""
|
||||
item.Find("th, td").Each(func(i int, cell *goquery.Selection) {
|
||||
separator += "|---"
|
||||
})
|
||||
markdown.WriteString(separator + "|\n")
|
||||
} else {
|
||||
// Data row
|
||||
markdown.WriteString(row + "\n")
|
||||
}
|
||||
})
|
||||
logger.SugaredLogger.Infof("\n%s", markdown.String())
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user