diff --git a/backend/data/crawler_api.go b/backend/data/crawler_api.go index b20a3f2..e1cd6aa 100644 --- a/backend/data/crawler_api.go +++ b/backend/data/crawler_api.go @@ -68,7 +68,7 @@ func (c *CrawlerApi) GetHtml(url, waitVisible string, headless bool) (string, bo defer pcancel() ctx, cancel := chromedp.NewContext(pctx, chromedp.WithLogf(logger.SugaredLogger.Infof)) defer cancel() - defer chromedp.Cancel(ctx) + //defer chromedp.Cancel(ctx) err := chromedp.Run(ctx, chromedp.Navigate(url), chromedp.WaitVisible(waitVisible, chromedp.ByQuery), // 确保 元素可见 chromedp.WaitReady(waitVisible, chromedp.ByQuery), // 确保 元素准备好 @@ -81,7 +81,7 @@ func (c *CrawlerApi) GetHtml(url, waitVisible string, headless bool) (string, bo } else { ctx, cancel := chromedp.NewContext(c.crawlerCtx, chromedp.WithLogf(logger.SugaredLogger.Infof)) defer cancel() - defer chromedp.Cancel(ctx) + //defer chromedp.Cancel(ctx) err := chromedp.Run(ctx, chromedp.Navigate(url), chromedp.WaitVisible("body"), chromedp.InnerHTML("body", &htmlContent)) if err != nil { logger.SugaredLogger.Error(err.Error()) @@ -199,7 +199,7 @@ func (c *CrawlerApi) GetHtmlWithActions(actions *[]chromedp.Action, headless boo defer pcancel() ctx, cancel := chromedp.NewContext(pctx, chromedp.WithLogf(logger.SugaredLogger.Infof)) defer cancel() - defer chromedp.Cancel(ctx) + //defer chromedp.Cancel(ctx) err := chromedp.Run(ctx, *actions...) if err != nil { @@ -209,7 +209,7 @@ func (c *CrawlerApi) GetHtmlWithActions(actions *[]chromedp.Action, headless boo } else { ctx, cancel := chromedp.NewContext(c.crawlerCtx, chromedp.WithLogf(logger.SugaredLogger.Infof)) defer cancel() - defer chromedp.Cancel(ctx) + //defer chromedp.Cancel(ctx) err := chromedp.Run(ctx, *actions...) if err != nil { diff --git a/backend/data/crawler_api_test.go b/backend/data/crawler_api_test.go index c8692f2..e7677ec 100644 --- a/backend/data/crawler_api_test.go +++ b/backend/data/crawler_api_test.go @@ -298,6 +298,39 @@ func TestUSSINA(t *testing.T) { }) } +func TestXueqiu(t *testing.T) { + db.Init("../../data/stock.db") + url := "https://finance.sina.com.cn/realstock/company/sz002906/nc.shtml" + crawlerAPI := CrawlerApi{} + crawlerBaseInfo := CrawlerBaseInfo{ + Name: "TestCrawler", + Description: "Test Crawler Description", + BaseUrl: "https://finance.sina.com.cn", + Headers: map[string]string{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"}, + } + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Minute) + defer cancel() + crawlerAPI = crawlerAPI.NewCrawler(ctx, crawlerBaseInfo) + html, ok := crawlerAPI.GetHtml(url, "div#hqDetails table", true) + if !ok { + return + } + document, err := goquery.NewDocumentFromReader(strings.NewReader(html)) + if err != nil { + logger.SugaredLogger.Error(err.Error()) + } + + //price + price := strutil.RemoveWhiteSpace(document.Find("div#price").First().Text(), false) + hqTime := strutil.RemoveWhiteSpace(document.Find("div#hqTime").First().Text(), false) + + var markdown strings.Builder + markdown.WriteString("\n ## 当前股票数据:\n") + markdown.WriteString(fmt.Sprintf("### 当前股价:%s 时间:%s\n", price, hqTime)) + GetTableMarkdown(document, "div#hqDetails table", &markdown) + +} + type Tick struct { Code int `json:"code"` Status string `json:"status"` diff --git a/backend/data/openai_api.go b/backend/data/openai_api.go index cfc4f88..bb2152a 100644 --- a/backend/data/openai_api.go +++ b/backend/data/openai_api.go @@ -196,7 +196,7 @@ func (o OpenAi) NewChatStream(stock, stockCode, userQuestion string, sysPromptId } msg = append(msg, map[string]interface{}{ "role": "user", - "content": stock + time.Now().Format(time.DateOnly) + "价格:" + price, + "content": "\n## " + stock + "股价数据:\n" + price, }) }() diff --git a/backend/data/stock_data_api.go b/backend/data/stock_data_api.go index f1ecfbe..1bcb81e 100644 --- a/backend/data/stock_data_api.go +++ b/backend/data/stock_data_api.go @@ -15,7 +15,6 @@ import ( "github.com/duke-git/lancet/v2/convertor" "github.com/duke-git/lancet/v2/slice" "github.com/duke-git/lancet/v2/strutil" - "github.com/duke-git/lancet/v2/validator" "github.com/go-resty/resty/v2" "go-stock/backend/db" "go-stock/backend/logger" @@ -770,12 +769,12 @@ func GetRealTimeStockPriceInfo(ctx context.Context, stockCode string) (price, pr func SearchStockPriceInfo(stockCode string, crawlTimeOut int64) *[]string { if strutil.HasPrefixAny(stockCode, []string{"SZ", "SH", "sh", "sz", "bj"}) { - if strutil.HasPrefixAny(stockCode, []string{"bj", "BJ"}) { - stockCode = strutil.ReplaceWithMap(stockCode, map[string]string{ - "bj": "", - "BJ": "", - }) + ".BJ" - } + //if strutil.HasPrefixAny(stockCode, []string{"bj", "BJ"}) { + // stockCode = strutil.ReplaceWithMap(stockCode, map[string]string{ + // "bj": "", + // "BJ": "", + // }) + ".BJ" + //} return getSHSZStockPriceInfo(stockCode, crawlTimeOut) } @@ -893,90 +892,36 @@ func getHKStockPriceInfo(stockCode string, crawlTimeOut int64) *[]string { } func getSHSZStockPriceInfo(stockCode string, crawlTimeOut int64) *[]string { - var messages []string - url := "https://www.cls.cn/stock?code=" + stockCode - // 创建一个 chromedp 上下文 - timeoutCtx, timeoutCtxCancel := context.WithTimeout(context.Background(), time.Duration(crawlTimeOut)*time.Second) - defer timeoutCtxCancel() - var ctx context.Context - var cancel context.CancelFunc - path := getConfig().BrowserPath - logger.SugaredLogger.Infof("SearchStockPriceInfo BrowserPath:%s", path) - if path != "" { - pctx, pcancel := chromedp.NewExecAllocator( - timeoutCtx, - chromedp.ExecPath(path), - chromedp.Flag("headless", true), - ) - defer pcancel() - ctx, cancel = chromedp.NewContext( - pctx, - chromedp.WithLogf(logger.SugaredLogger.Infof), - chromedp.WithErrorf(logger.SugaredLogger.Errorf), - ) - } else { - ctx, cancel = chromedp.NewContext( - timeoutCtx, - chromedp.WithLogf(logger.SugaredLogger.Infof), - chromedp.WithErrorf(logger.SugaredLogger.Errorf), - ) + url := "https://finance.sina.com.cn/realstock/company/" + stockCode + "/nc.shtml" + crawlerAPI := CrawlerApi{} + crawlerBaseInfo := CrawlerBaseInfo{ + Name: "TestCrawler", + Description: "Test Crawler Description", + BaseUrl: "https://finance.sina.com.cn", + Headers: map[string]string{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"}, } + ctx, cancel := context.WithTimeout(context.Background(), time.Duration(crawlTimeOut)*time.Second) defer cancel() - - var htmlContent string - - var tasks chromedp.Tasks - tasks = append(tasks, chromedp.Navigate(url)) - tasks = append(tasks, chromedp.WaitVisible("div.quote-change-box", chromedp.ByQuery)) - tasks = append(tasks, chromedp.ActionFunc(func(ctx context.Context) error { - price, _ := FetchPrice(ctx) - logger.SugaredLogger.Infof("price:%s", price) - return nil - })) - tasks = append(tasks, chromedp.OuterHTML("html", &htmlContent, chromedp.ByQuery)) - - err := chromedp.Run(ctx, tasks) + crawlerAPI = crawlerAPI.NewCrawler(ctx, crawlerBaseInfo) + html, ok := crawlerAPI.GetHtml(url, "div#hqDetails table", true) + if !ok { + return &[]string{""} + } + document, err := goquery.NewDocumentFromReader(strings.NewReader(html)) if err != nil { logger.SugaredLogger.Error(err.Error()) - return &[]string{} - } - document, err := goquery.NewDocumentFromReader(strings.NewReader(htmlContent)) - if err != nil { - logger.SugaredLogger.Error(err.Error()) - return &[]string{} } - document.Find("div.quote-text-border,span.quote-price").Each(func(i int, selection *goquery.Selection) { - text := strutil.RemoveNonPrintable(selection.Text()) - logger.SugaredLogger.Info(text) - messages = append(messages, text) + //price + price := strutil.RemoveWhiteSpace(document.Find("div#price").First().Text(), false) + hqTime := strutil.RemoveWhiteSpace(document.Find("div#hqTime").First().Text(), false) - }) - return &messages + var markdown strings.Builder + markdown.WriteString(fmt.Sprintf("### 当前股价:%s 时间:%s\n", price, hqTime)) + GetTableMarkdown(document, "div#hqDetails table", &markdown) + return &[]string{markdown.String()} } -func FetchPrice(ctx context.Context) (string, error) { - var price string - timeout := time.After(10 * time.Second) // 设置超时时间为10秒 - ticker := time.NewTicker(1 * time.Second) // 每秒尝试一次 - defer ticker.Stop() - for { - select { - case <-timeout: - return "", fmt.Errorf("timeout reached while fetching price") - case <-ticker.C: - err := chromedp.Run(ctx, chromedp.Text("span.quote-price", &price, chromedp.BySearch)) - if err != nil { - logger.SugaredLogger.Errorf("failed to fetch price: %v", err) - continue - } - logger.SugaredLogger.Infof("price:%s", price) - if price != "" && validator.IsNumberStr(price) { - return price, nil - } - } - } -} func SearchStockInfo(stock, msgType string, crawlTimeOut int64) *[]string { crawler := CrawlerApi{ crawlerBaseInfo: CrawlerBaseInfo{ diff --git a/backend/data/stock_data_api_test.go b/backend/data/stock_data_api_test.go index ae2ca55..132bbfe 100644 --- a/backend/data/stock_data_api_test.go +++ b/backend/data/stock_data_api_test.go @@ -49,7 +49,7 @@ func TestSearchStockPriceInfo(t *testing.T) { db.Init("../../data/stock.db") //SearchStockPriceInfo("hk06030", 30) //SearchStockPriceInfo("sh600171", 30) - //SearchStockPriceInfo("gb_aapl", 30) + SearchStockPriceInfo("gb_aapl", 30) SearchStockPriceInfo("bj430198", 30) } diff --git a/backend/data/utils.go b/backend/data/utils.go index 1c4ebb7..6ffbb4a 100644 --- a/backend/data/utils.go +++ b/backend/data/utils.go @@ -1,6 +1,8 @@ package data import ( + "github.com/PuerkitoBio/goquery" + "go-stock/backend/logger" "regexp" "strings" ) @@ -57,3 +59,29 @@ func ConvertTushareCodeToStockCode(stockCode string) string { stockCode = strings.ToLower(RemoveAllDigitChar(stockCode)) + RemoveAllNonDigitChar(stockCode) return strings.ReplaceAll(stockCode, ".", "") } + +func GetTableMarkdown(document *goquery.Document, waitVisible string, markdown *strings.Builder) { + document.Find(waitVisible).First().Find("tr").Each(func(index int, item *goquery.Selection) { + row := "" + item.Find("th, td").Each(func(i int, cell *goquery.Selection) { + text := cell.Text() + row += "|" + text + }) + row += "|" + + if index == 0 { + // Header row + markdown.WriteString(row + "\n") + // Separator row + separator := "" + item.Find("th, td").Each(func(i int, cell *goquery.Selection) { + separator += "|---" + }) + markdown.WriteString(separator + "|\n") + } else { + // Data row + markdown.WriteString(row + "\n") + } + }) + logger.SugaredLogger.Infof("\n%s", markdown.String()) +}