go-stock/backend/data/crawler_api_test.go
ArvinLovegood a6f17c632e feat(stock):添加香港股票数据支持
- 新增 StockInfoHK模型用于存储香港股票基本信息- 实现香港股票数据的爬取和解析功能
- 更新数据库初始化逻辑,支持香港股票数据导入
- 修改股票价格信息获取接口,支持香港股票
- 优化股票数据解析逻辑,适配香港股票数据格式
2025-02-22 21:47:05 +08:00

169 lines
6.6 KiB
Go

package data
import (
"context"
"fmt"
"github.com/PuerkitoBio/goquery"
"github.com/duke-git/lancet/v2/strutil"
"go-stock/backend/db"
"go-stock/backend/logger"
"go-stock/backend/models"
"strings"
"testing"
"time"
"github.com/chromedp/chromedp"
"github.com/stretchr/testify/assert"
)
func TestNewTimeOutGuShiTongCrawler(t *testing.T) {
crawlerAPI := CrawlerApi{}
timeout := 10
crawlerBaseInfo := CrawlerBaseInfo{
Name: "TestCrawler",
Description: "Test Crawler Description",
BaseUrl: "https://gushitong.baidu.com",
Headers: map[string]string{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"},
}
result := crawlerAPI.NewTimeOutCrawler(timeout, crawlerBaseInfo)
assert.NotNil(t, result.crawlerCtx)
assert.Equal(t, crawlerBaseInfo, result.crawlerBaseInfo)
}
func TestNewGuShiTongCrawler(t *testing.T) {
crawlerAPI := CrawlerApi{}
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
crawlerBaseInfo := CrawlerBaseInfo{
Name: "TestCrawler",
Description: "Test Crawler Description",
BaseUrl: "https://gushitong.baidu.com",
Headers: map[string]string{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"},
}
result := crawlerAPI.NewCrawler(ctx, crawlerBaseInfo)
assert.Equal(t, ctx, result.crawlerCtx)
assert.Equal(t, crawlerBaseInfo, result.crawlerBaseInfo)
}
func TestGetHtml(t *testing.T) {
crawlerAPI := CrawlerApi{}
crawlerBaseInfo := CrawlerBaseInfo{
Name: "TestCrawler",
Description: "Test Crawler Description",
BaseUrl: "https://gushitong.baidu.com",
Headers: map[string]string{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"},
}
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
crawlerAPI = crawlerAPI.NewCrawler(ctx, crawlerBaseInfo)
url := "https://www.cls.cn/searchPage?type=depth&keyword=%E6%96%B0%E5%B8%8C%E6%9C%9B"
waitVisible := ".search-telegraph-list,.subject-interest-list"
//url = "https://gushitong.baidu.com/stock/ab-600745"
//waitVisible = "div.news-item"
htmlContent, success := crawlerAPI.GetHtml(url, waitVisible, true)
if success {
document, err := goquery.NewDocumentFromReader(strings.NewReader(htmlContent))
if err != nil {
logger.SugaredLogger.Error(err.Error())
}
var messages []string
document.Find(waitVisible).Each(func(i int, selection *goquery.Selection) {
text := strutil.RemoveNonPrintable(selection.Text())
messages = append(messages, text)
logger.SugaredLogger.Infof("搜索到消息-%s: %s", "", text)
})
}
//logger.SugaredLogger.Infof("htmlContent:%s", htmlContent)
}
func TestGetHtmlWithActions(t *testing.T) {
crawlerAPI := CrawlerApi{}
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
crawlerAPI = crawlerAPI.NewCrawler(ctx, CrawlerBaseInfo{
Name: "百度股市通",
Description: "Test Crawler Description",
BaseUrl: "https://gushitong.baidu.com",
Headers: map[string]string{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"},
})
actions := []chromedp.Action{
chromedp.Navigate("https://gushitong.baidu.com/stock/ab-600745"),
chromedp.WaitVisible("div.cos-tab"),
chromedp.Click(".header div.cos-tab:nth-child(6)", chromedp.ByQuery),
chromedp.ScrollIntoView("div.finance-container >div.row:nth-child(3)"),
chromedp.WaitVisible("div.cos-tabs-header-container"),
chromedp.Click(".page-content .cos-tabs-header-container .cos-tabs-header .cos-tab:nth-child(1)", chromedp.ByQuery),
chromedp.WaitVisible(".page-content .finance-container .report-col-content", chromedp.ByQuery),
chromedp.Click(".page-content .cos-tabs-header-container .cos-tabs-header .cos-tab:nth-child(4)", chromedp.ByQuery),
chromedp.Evaluate(`window.scrollTo(0, document.body.scrollHeight);`, nil),
chromedp.Sleep(1 * time.Second),
}
htmlContent, success := crawlerAPI.GetHtmlWithActions(&actions, false)
if success {
document, err := goquery.NewDocumentFromReader(strings.NewReader(htmlContent))
if err != nil {
logger.SugaredLogger.Error(err.Error())
}
var messages []string
document.Find("div.report-table-list-container,div.report-row").Each(func(i int, selection *goquery.Selection) {
text := strutil.RemoveWhiteSpace(selection.Text(), false)
messages = append(messages, text)
logger.SugaredLogger.Infof("搜索到消息-%s: %s", "", text)
})
logger.SugaredLogger.Infof("messages:%d", len(messages))
}
//logger.SugaredLogger.Infof("htmlContent:%s", htmlContent)
}
func TestHk(t *testing.T) {
//https://stock.finance.sina.com.cn/hkstock/quotes/00001.html
db.Init("../../data/stock.db")
hks := &[]models.StockInfoHK{}
db.Dao.Model(&models.StockInfoHK{}).Limit(1).Find(hks)
crawlerAPI := CrawlerApi{}
crawlerBaseInfo := CrawlerBaseInfo{
Name: "TestCrawler",
Description: "Test Crawler Description",
BaseUrl: "https://stock.finance.sina.com.cn",
Headers: map[string]string{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/133.0.0.0"},
}
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Minute)
defer cancel()
crawlerAPI = crawlerAPI.NewCrawler(ctx, crawlerBaseInfo)
for _, hk := range *hks {
logger.SugaredLogger.Infof("hk: %+v", hk)
url := fmt.Sprintf("https://stock.finance.sina.com.cn/hkstock/quotes/%s.html", strings.ReplaceAll(hk.Code, ".HK", ""))
htmlContent, ok := crawlerAPI.GetHtml(url, "#stock_cname", true)
if !ok {
continue
}
//logger.SugaredLogger.Infof("htmlContent: %s", htmlContent)
document, err := goquery.NewDocumentFromReader(strings.NewReader(htmlContent))
if err != nil {
logger.SugaredLogger.Error(err.Error())
}
document.Find("#stock_cname").Each(func(i int, selection *goquery.Selection) {
text := strutil.RemoveNonPrintable(selection.Text())
logger.SugaredLogger.Infof("股票名称-:%s", text)
})
document.Find("#mts_stock_hk_price").Each(func(i int, selection *goquery.Selection) {
text := strutil.RemoveNonPrintable(selection.Text())
logger.SugaredLogger.Infof("股票名称-现价: %s", text)
})
document.Find(".deta_hqContainer >.deta03 li").Each(func(i int, selection *goquery.Selection) {
text := strutil.RemoveNonPrintable(selection.Text())
logger.SugaredLogger.Infof("股票名称-%s: %s", "", text)
})
}
}