mirror of
https://github.com/ArvinLovegood/go-stock.git
synced 2025-07-19 00:00:09 +08:00
feat(backend):添加股市通资讯爬取功能
- 新增 SearchGuShiTongStockInfo函数,用于爬取百度股市通的股票资讯 - 修改 OpenAI_API 函数,增加股市通资讯的爬取 - 添加 RemoveAllNonDigitChar 函数,用于去除所有非数字字符
This commit is contained in:
parent
fc37440f6b
commit
f4da21d645
115
backend/data/crawler_api_test.go
Normal file
115
backend/data/crawler_api_test.go
Normal file
@ -0,0 +1,115 @@
|
||||
package data
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/duke-git/lancet/v2/strutil"
|
||||
"go-stock/backend/logger"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/chromedp/chromedp"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestNewTimeOutGuShiTongCrawler(t *testing.T) {
|
||||
crawlerAPI := CrawlerApi{}
|
||||
timeout := 10
|
||||
crawlerBaseInfo := CrawlerBaseInfo{
|
||||
Name: "TestCrawler",
|
||||
Description: "Test Crawler Description",
|
||||
BaseUrl: "https://gushitong.baidu.com",
|
||||
Headers: map[string]string{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"},
|
||||
}
|
||||
|
||||
result := crawlerAPI.NewTimeOutCrawler(timeout, crawlerBaseInfo)
|
||||
assert.NotNil(t, result.crawlerCtx)
|
||||
assert.Equal(t, crawlerBaseInfo, result.crawlerBaseInfo)
|
||||
}
|
||||
|
||||
func TestNewGuShiTongCrawler(t *testing.T) {
|
||||
crawlerAPI := CrawlerApi{}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
crawlerBaseInfo := CrawlerBaseInfo{
|
||||
Name: "TestCrawler",
|
||||
Description: "Test Crawler Description",
|
||||
BaseUrl: "https://gushitong.baidu.com",
|
||||
Headers: map[string]string{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"},
|
||||
}
|
||||
|
||||
result := crawlerAPI.NewCrawler(ctx, crawlerBaseInfo)
|
||||
assert.Equal(t, ctx, result.crawlerCtx)
|
||||
assert.Equal(t, crawlerBaseInfo, result.crawlerBaseInfo)
|
||||
}
|
||||
|
||||
func TestGetHtml(t *testing.T) {
|
||||
crawlerAPI := CrawlerApi{}
|
||||
crawlerBaseInfo := CrawlerBaseInfo{
|
||||
Name: "TestCrawler",
|
||||
Description: "Test Crawler Description",
|
||||
BaseUrl: "https://gushitong.baidu.com",
|
||||
Headers: map[string]string{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"},
|
||||
}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
||||
defer cancel()
|
||||
crawlerAPI = crawlerAPI.NewCrawler(ctx, crawlerBaseInfo)
|
||||
|
||||
url := "https://www.cls.cn/searchPage?type=depth&keyword=%E6%96%B0%E5%B8%8C%E6%9C%9B"
|
||||
waitVisible := ".search-telegraph-list,.subject-interest-list"
|
||||
|
||||
//url = "https://gushitong.baidu.com/stock/ab-600745"
|
||||
//waitVisible = "div.news-item"
|
||||
htmlContent, success := crawlerAPI.GetHtml(url, waitVisible, true)
|
||||
if success {
|
||||
document, err := goquery.NewDocumentFromReader(strings.NewReader(htmlContent))
|
||||
if err != nil {
|
||||
logger.SugaredLogger.Error(err.Error())
|
||||
}
|
||||
var messages []string
|
||||
document.Find(waitVisible).Each(func(i int, selection *goquery.Selection) {
|
||||
text := strutil.RemoveNonPrintable(selection.Text())
|
||||
messages = append(messages, text)
|
||||
logger.SugaredLogger.Infof("搜索到消息-%s: %s", "", text)
|
||||
})
|
||||
}
|
||||
//logger.SugaredLogger.Infof("htmlContent:%s", htmlContent)
|
||||
}
|
||||
|
||||
func TestGetHtmlWithActions(t *testing.T) {
|
||||
crawlerAPI := CrawlerApi{}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
||||
defer cancel()
|
||||
|
||||
crawlerAPI = crawlerAPI.NewCrawler(ctx, CrawlerBaseInfo{
|
||||
Name: "百度股市通",
|
||||
Description: "Test Crawler Description",
|
||||
BaseUrl: "https://gushitong.baidu.com",
|
||||
Headers: map[string]string{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"},
|
||||
})
|
||||
actions := []chromedp.Action{
|
||||
chromedp.Navigate("https://gushitong.baidu.com/stock/ab-600745"),
|
||||
chromedp.WaitVisible("div.cos-tab"),
|
||||
chromedp.Click("div.cos-tab:nth-child(5)", chromedp.ByQuery),
|
||||
chromedp.ScrollIntoView("div.body-box"),
|
||||
chromedp.WaitVisible("div.body-col"),
|
||||
chromedp.Evaluate(`window.scrollTo(0, document.body.scrollHeight);`, nil),
|
||||
chromedp.Sleep(1 * time.Second),
|
||||
}
|
||||
htmlContent, success := crawlerAPI.GetHtmlWithActions(&actions, true)
|
||||
if success {
|
||||
document, err := goquery.NewDocumentFromReader(strings.NewReader(htmlContent))
|
||||
if err != nil {
|
||||
logger.SugaredLogger.Error(err.Error())
|
||||
}
|
||||
var messages []string
|
||||
document.Find("div.finance-hover,div.list-date").Each(func(i int, selection *goquery.Selection) {
|
||||
text := strutil.RemoveNonPrintable(selection.Text())
|
||||
messages = append(messages, text)
|
||||
logger.SugaredLogger.Infof("搜索到消息-%s: %s", "", text)
|
||||
})
|
||||
logger.SugaredLogger.Infof("messages:%d", len(messages))
|
||||
}
|
||||
//logger.SugaredLogger.Infof("htmlContent:%s", htmlContent)
|
||||
}
|
@ -191,22 +191,22 @@ func (o OpenAi) NewChatStream(stock, stockCode string) <-chan string {
|
||||
}
|
||||
}()
|
||||
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
messages := SearchStockInfo(stock, "depth", o.CrawlTimeOut)
|
||||
if messages == nil || len(*messages) == 0 {
|
||||
logger.SugaredLogger.Error("获取股票资讯失败")
|
||||
//ch <- "***❗获取股票资讯失败,分析结果可能不准确***<hr>"
|
||||
//go runtime.EventsEmit(o.ctx, "warnMsg", "❗获取股票资讯失败,分析结果可能不准确")
|
||||
return
|
||||
}
|
||||
for _, message := range *messages {
|
||||
msg = append(msg, map[string]interface{}{
|
||||
"role": "assistant",
|
||||
"content": message,
|
||||
})
|
||||
}
|
||||
}()
|
||||
//go func() {
|
||||
// defer wg.Done()
|
||||
// messages := SearchStockInfo(stock, "depth", o.CrawlTimeOut)
|
||||
// if messages == nil || len(*messages) == 0 {
|
||||
// logger.SugaredLogger.Error("获取股票资讯失败")
|
||||
// //ch <- "***❗获取股票资讯失败,分析结果可能不准确***<hr>"
|
||||
// //go runtime.EventsEmit(o.ctx, "warnMsg", "❗获取股票资讯失败,分析结果可能不准确")
|
||||
// return
|
||||
// }
|
||||
// for _, message := range *messages {
|
||||
// msg = append(msg, map[string]interface{}{
|
||||
// "role": "assistant",
|
||||
// "content": message,
|
||||
// })
|
||||
// }
|
||||
//}()
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
messages := SearchStockInfo(stock, "telegram", o.CrawlTimeOut)
|
||||
@ -223,6 +223,24 @@ func (o OpenAi) NewChatStream(stock, stockCode string) <-chan string {
|
||||
})
|
||||
}
|
||||
}()
|
||||
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
messages := SearchGuShiTongStockInfo(stockCode, o.CrawlTimeOut)
|
||||
if messages == nil || len(*messages) == 0 {
|
||||
logger.SugaredLogger.Error("获取股势通资讯失败")
|
||||
//ch <- "***❗获取股势通资讯失败,分析结果可能不准确***<hr>"
|
||||
//go runtime.EventsEmit(o.ctx, "warnMsg", "❗获取股势通资讯失败,分析结果可能不准确")
|
||||
return
|
||||
}
|
||||
for _, message := range *messages {
|
||||
msg = append(msg, map[string]interface{}{
|
||||
"role": "assistant",
|
||||
"content": message,
|
||||
})
|
||||
}
|
||||
}()
|
||||
|
||||
wg.Wait()
|
||||
msg = append(msg, map[string]interface{}{
|
||||
"role": "user",
|
||||
@ -308,6 +326,45 @@ func (o OpenAi) NewChatStream(stock, stockCode string) <-chan string {
|
||||
return ch
|
||||
}
|
||||
|
||||
func SearchGuShiTongStockInfo(stock string, crawlTimeOut int64) *[]string {
|
||||
crawlerAPI := CrawlerApi{}
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Duration(crawlTimeOut)*time.Second)
|
||||
defer cancel()
|
||||
|
||||
crawlerAPI = crawlerAPI.NewCrawler(ctx, CrawlerBaseInfo{
|
||||
Name: "百度股市通",
|
||||
BaseUrl: "https://gushitong.baidu.com",
|
||||
Headers: map[string]string{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"},
|
||||
})
|
||||
url := "https://gushitong.baidu.com/stock/ab-" + RemoveAllNonDigitChar(stock)
|
||||
logger.SugaredLogger.Infof("SearchGuShiTongStockInfo搜索股票-%s: %s", stock, url)
|
||||
actions := []chromedp.Action{
|
||||
chromedp.Navigate(url),
|
||||
chromedp.WaitVisible("div.cos-tab"),
|
||||
chromedp.Click("div.cos-tab:nth-child(5)", chromedp.ByQuery),
|
||||
chromedp.ScrollIntoView("div.body-box"),
|
||||
chromedp.WaitVisible("div.body-col"),
|
||||
chromedp.Evaluate(`window.scrollTo(0, document.body.scrollHeight);`, nil),
|
||||
chromedp.Sleep(1 * time.Second),
|
||||
}
|
||||
htmlContent, success := crawlerAPI.GetHtmlWithActions(&actions, true)
|
||||
var messages []string
|
||||
if success {
|
||||
document, err := goquery.NewDocumentFromReader(strings.NewReader(htmlContent))
|
||||
if err != nil {
|
||||
logger.SugaredLogger.Error(err.Error())
|
||||
return &[]string{}
|
||||
}
|
||||
document.Find("div.finance-hover,div.list-date").Each(func(i int, selection *goquery.Selection) {
|
||||
text := strutil.RemoveNonPrintable(selection.Text())
|
||||
messages = append(messages, text)
|
||||
logger.SugaredLogger.Infof("SearchGuShiTongStockInfo搜索到消息-%s: %s", "", text)
|
||||
})
|
||||
logger.SugaredLogger.Infof("messages:%d", len(messages))
|
||||
}
|
||||
return &messages
|
||||
}
|
||||
|
||||
func GetFinancialReports(stockCode string, crawlTimeOut int64) *[]string {
|
||||
// 创建一个 chromedp 上下文
|
||||
timeoutCtx, timeoutCtxCancel := context.WithTimeout(context.Background(), time.Duration(crawlTimeOut)*time.Second)
|
||||
|
@ -15,3 +15,9 @@ func removeAllSpaces(s string) string {
|
||||
re := regexp.MustCompile(`\s`)
|
||||
return re.ReplaceAllString(s, "")
|
||||
}
|
||||
|
||||
// RemoveAllNonDigitChar 去除所有非数字字符
|
||||
func RemoveAllNonDigitChar(s string) string {
|
||||
re := regexp.MustCompile(`\D`)
|
||||
return re.ReplaceAllString(s, "")
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user