feat(settings):添加浏览器路径配置并优化爬虫功能

- 在前端和后端的设置中添加浏览器路径配置项
- 修改爬虫相关函数,使用配置的浏览器路径替代自动检测
- 优化日志输出,统一使用"BrowserPath"字段
- 重构部分代码,提高可维护性
This commit is contained in:
ArvinLovegood 2025-03-15 10:20:26 +08:00
parent 1d4ede336c
commit 4c3fa36d4f
7 changed files with 35 additions and 17 deletions

View File

@ -31,9 +31,9 @@ func (c *CrawlerApi) NewCrawler(ctx context.Context, crawlerBaseInfo CrawlerBase
func (c *CrawlerApi) GetHtml(url, waitVisible string, headless bool) (string, bool) { func (c *CrawlerApi) GetHtml(url, waitVisible string, headless bool) (string, bool) {
htmlContent := "" htmlContent := ""
path, e := checkBrowserOnWindows() path := getConfig().BrowserPath
logger.SugaredLogger.Infof("GetHtml path:%s", path) logger.SugaredLogger.Infof("Browser path:%s", path)
if e { if path != "" {
pctx, pcancel := chromedp.NewExecAllocator( pctx, pcancel := chromedp.NewExecAllocator(
c.crawlerCtx, c.crawlerCtx,
chromedp.ExecPath(path), chromedp.ExecPath(path),
@ -92,14 +92,14 @@ func (c *CrawlerApi) GetHtml(url, waitVisible string, headless bool) (string, bo
func (c *CrawlerApi) GetHtmlWithNoCancel(url, waitVisible string, headless bool) (html string, ok bool, parent context.CancelFunc, child context.CancelFunc) { func (c *CrawlerApi) GetHtmlWithNoCancel(url, waitVisible string, headless bool) (html string, ok bool, parent context.CancelFunc, child context.CancelFunc) {
htmlContent := "" htmlContent := ""
path, e := checkBrowserOnWindows() path := getConfig().BrowserPath
logger.SugaredLogger.Infof("GetHtml path:%s", path) logger.SugaredLogger.Infof("BrowserPath :%s", path)
var parentCancel context.CancelFunc var parentCancel context.CancelFunc
var childCancel context.CancelFunc var childCancel context.CancelFunc
var pctx context.Context var pctx context.Context
var cctx context.Context var cctx context.Context
if e { if path != "" {
pctx, parentCancel = chromedp.NewExecAllocator( pctx, parentCancel = chromedp.NewExecAllocator(
c.crawlerCtx, c.crawlerCtx,
chromedp.ExecPath(path), chromedp.ExecPath(path),
@ -160,9 +160,9 @@ func (c *CrawlerApi) GetHtmlWithActions(actions *[]chromedp.Action, headless boo
htmlContent := "" htmlContent := ""
*actions = append(*actions, chromedp.InnerHTML("body", &htmlContent)) *actions = append(*actions, chromedp.InnerHTML("body", &htmlContent))
path, e := checkBrowserOnWindows() path := getConfig().BrowserPath
logger.SugaredLogger.Infof("GetHtmlWithActions path:%s", path) logger.SugaredLogger.Infof("GetHtmlWithActions path:%s", path)
if e { if path != "" {
pctx, pcancel := chromedp.NewExecAllocator( pctx, pcancel := chromedp.NewExecAllocator(
c.crawlerCtx, c.crawlerCtx,
chromedp.ExecPath(path), chromedp.ExecPath(path),

View File

@ -34,6 +34,7 @@ type OpenAi struct {
QuestionTemplate string `json:"question_template"` QuestionTemplate string `json:"question_template"`
CrawlTimeOut int64 `json:"crawl_time_out"` CrawlTimeOut int64 `json:"crawl_time_out"`
KDays int64 `json:"kDays"` KDays int64 `json:"kDays"`
BrowserPath string `json:"browser_path"`
} }
func NewDeepSeekOpenAi(ctx context.Context) *OpenAi { func NewDeepSeekOpenAi(ctx context.Context) *OpenAi {
@ -61,6 +62,7 @@ func NewDeepSeekOpenAi(ctx context.Context) *OpenAi {
QuestionTemplate: config.QuestionTemplate, QuestionTemplate: config.QuestionTemplate,
CrawlTimeOut: config.CrawlTimeOut, CrawlTimeOut: config.CrawlTimeOut,
KDays: config.KDays, KDays: config.KDays,
BrowserPath: config.BrowserPath,
} }
} }
@ -500,10 +502,10 @@ func GetFinancialReports(stockCode string, crawlTimeOut int64) *[]string {
defer timeoutCtxCancel() defer timeoutCtxCancel()
var ctx context.Context var ctx context.Context
var cancel context.CancelFunc var cancel context.CancelFunc
path, e := checkBrowserOnWindows() path := getConfig().BrowserPath
logger.SugaredLogger.Infof("GetFinancialReports path:%s", path) logger.SugaredLogger.Infof("GetFinancialReports path:%s", path)
if e { if path != "" {
pctx, pcancel := chromedp.NewExecAllocator( pctx, pcancel := chromedp.NewExecAllocator(
timeoutCtx, timeoutCtx,
chromedp.ExecPath(path), chromedp.ExecPath(path),

View File

@ -9,7 +9,7 @@ import (
func TestNewDeepSeekOpenAiConfig(t *testing.T) { func TestNewDeepSeekOpenAiConfig(t *testing.T) {
db.Init("../../data/stock.db") db.Init("../../data/stock.db")
ai := NewDeepSeekOpenAi(context.TODO()) ai := NewDeepSeekOpenAi(context.TODO())
res := ai.NewChatStream("北京文化", "sz000802", "") res := ai.NewChatStream("上海贝岭", "sh600171", "分析以上股票资金流入信息,找出适合买入的股票,给出具体操作建议")
for { for {
select { select {
case msg := <-res: case msg := <-res:

View File

@ -29,6 +29,7 @@ type Settings struct {
CrawlTimeOut int64 `json:"crawlTimeOut"` CrawlTimeOut int64 `json:"crawlTimeOut"`
KDays int64 `json:"kDays"` KDays int64 `json:"kDays"`
EnableDanmu bool `json:"enableDanmu"` EnableDanmu bool `json:"enableDanmu"`
BrowserPath string `json:"browserPath"`
} }
func (receiver Settings) TableName() string { func (receiver Settings) TableName() string {
@ -69,6 +70,7 @@ func (s SettingsApi) UpdateConfig() string {
"crawl_time_out": s.Config.CrawlTimeOut, "crawl_time_out": s.Config.CrawlTimeOut,
"k_days": s.Config.KDays, "k_days": s.Config.KDays,
"enable_danmu": s.Config.EnableDanmu, "enable_danmu": s.Config.EnableDanmu,
"browser_path": s.Config.BrowserPath,
}) })
} else { } else {
logger.SugaredLogger.Infof("未找到配置,创建默认配置:%+v", s.Config) logger.SugaredLogger.Infof("未找到配置,创建默认配置:%+v", s.Config)
@ -92,6 +94,7 @@ func (s SettingsApi) UpdateConfig() string {
CrawlTimeOut: s.Config.CrawlTimeOut, CrawlTimeOut: s.Config.CrawlTimeOut,
KDays: s.Config.KDays, KDays: s.Config.KDays,
EnableDanmu: s.Config.EnableDanmu, EnableDanmu: s.Config.EnableDanmu,
BrowserPath: s.Config.BrowserPath,
}) })
} }
return "保存成功!" return "保存成功!"
@ -111,6 +114,10 @@ func (s SettingsApi) GetConfig() *Settings {
settings.KDays = 120 settings.KDays = 120
} }
} }
if settings.BrowserPath == "" {
settings.BrowserPath, _ = CheckBrowserOnWindows()
}
return &settings return &settings
} }

View File

@ -872,9 +872,9 @@ func getSHSZStockPriceInfo(stockCode string, crawlTimeOut int64) *[]string {
defer timeoutCtxCancel() defer timeoutCtxCancel()
var ctx context.Context var ctx context.Context
var cancel context.CancelFunc var cancel context.CancelFunc
path, e := checkBrowserOnWindows() path := getConfig().BrowserPath
logger.SugaredLogger.Infof("SearchStockPriceInfo path:%s", path) logger.SugaredLogger.Infof("SearchStockPriceInfo BrowserPath:%s", path)
if e { if path != "" {
pctx, pcancel := chromedp.NewExecAllocator( pctx, pcancel := chromedp.NewExecAllocator(
timeoutCtx, timeoutCtx,
chromedp.ExecPath(path), chromedp.ExecPath(path),
@ -1042,8 +1042,8 @@ func checkChromeOnWindows() (string, bool) {
return path + "\\chrome.exe", true return path + "\\chrome.exe", true
} }
// checkBrowserOnWindows 在 Windows 系统上检查Edge浏览器是否安装并返回安装路径 // CheckBrowserOnWindows 在 Windows 系统上检查Edge浏览器是否安装并返回安装路径
func checkBrowserOnWindows() (string, bool) { func CheckBrowserOnWindows() (string, bool) {
if path, ok := checkChromeOnWindows(); ok { if path, ok := checkChromeOnWindows(); ok {
return path, true return path, true
} }

View File

@ -34,6 +34,7 @@ const formValue = ref({
kDays:30, kDays:30,
}, },
enableDanmu:false, enableDanmu:false,
browserPath: '',
}) })
onMounted(()=>{ onMounted(()=>{
@ -63,6 +64,7 @@ onMounted(()=>{
kDays:res.kDays, kDays:res.kDays,
} }
formValue.value.enableDanmu = res.enableDanmu formValue.value.enableDanmu = res.enableDanmu
formValue.value.browserPath = res.browserPath
console.log(res) console.log(res)
}) })
//message.info("") //message.info("")
@ -89,7 +91,8 @@ function saveConfig(){
questionTemplate:formValue.value.openAI.questionTemplate, questionTemplate:formValue.value.openAI.questionTemplate,
crawlTimeOut:formValue.value.openAI.crawlTimeOut, crawlTimeOut:formValue.value.openAI.crawlTimeOut,
kDays:formValue.value.openAI.kDays, kDays:formValue.value.openAI.kDays,
enableDanmu:formValue.value.enableDanmu enableDanmu:formValue.value.enableDanmu,
browserPath:formValue.value.browserPath
}) })
//console.log("Settings",config) //console.log("Settings",config)
@ -161,6 +164,7 @@ function importConfig(){
kDays:config.kDays kDays:config.kDays
} }
formValue.value.enableDanmu = config.enableDanmu formValue.value.enableDanmu = config.enableDanmu
formValue.value.browserPath = config.browserPath
// formRef.value.resetFields() // formRef.value.resetFields()
}; };
reader.readAsText(file); reader.readAsText(file);
@ -205,6 +209,9 @@ window.onerror = function (event, source, lineno, colno, error) {
</template> </template>
</n-input-number> </n-input-number>
</n-form-item-gi> </n-form-item-gi>
<n-form-item-gi :span="22" label="浏览器路径:" path="browserPath" >
<n-input type="text" placeholder="浏览器路径" v-model:value="formValue.browserPath" clearable />
</n-form-item-gi>
</n-grid> </n-grid>
<n-grid :cols="24" :x-gap="24" style="text-align: left"> <n-grid :cols="24" :x-gap="24" style="text-align: left">

View File

@ -170,6 +170,7 @@ export namespace data {
crawlTimeOut: number; crawlTimeOut: number;
kDays: number; kDays: number;
enableDanmu: boolean; enableDanmu: boolean;
browserPath: string;
static createFrom(source: any = {}) { static createFrom(source: any = {}) {
return new Settings(source); return new Settings(source);
@ -200,6 +201,7 @@ export namespace data {
this.crawlTimeOut = source["crawlTimeOut"]; this.crawlTimeOut = source["crawlTimeOut"];
this.kDays = source["kDays"]; this.kDays = source["kDays"];
this.enableDanmu = source["enableDanmu"]; this.enableDanmu = source["enableDanmu"];
this.browserPath = source["browserPath"];
} }
convertValues(a: any, classs: any, asMap: boolean = false): any { convertValues(a: any, classs: any, asMap: boolean = false): any {