gggaaallleee f7b2a57ca3
1 (#3924)
2025-02-28 19:00:58 +08:00

55 lines
1.9 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import axios from 'axios';
import { URL } from 'url';
import dotenv from 'dotenv';
dotenv.config();
const blacklistDomains = process.env.BLACKLIST ? JSON.parse(process.env.BLACKLIST) : [];
export const fetchSearchResults = async (query: string, pageCount: number, searchUrlBase: string, categories: string) => {
const MAX_PAGES = (pageCount / 10 +1) * 2+1; // 最多搜索的页面数
//如果searchUrlBase为空返回空数组pagecount是需要搜索结果的数量
if (!searchUrlBase) {
return { resultUrls: [], results: new Map() };
}
const resultUrls: string[] = [];
const results = new Map<string, any>();
let fetchedResultsCount = 0;
let pageIndex = 0;
while (fetchedResultsCount < pageCount && pageIndex < MAX_PAGES) {
const searchUrl = new URL(`${searchUrlBase}?q=${encodeURIComponent(query)}&pageno=${pageIndex + 1}&format=json&categories=${categories}`);
console.log(`Fetching page ${pageIndex + 1} from SearchXNG: ${searchUrl.toString()}`);
const response = await axios.get(searchUrl.toString());
const jsonResults = response.data.results;
for (let index = 0; index < jsonResults.length; index++) {
const result = jsonResults[index];
const resultDomain = new URL(result.url).hostname;
if (blacklistDomains.some((domain: string) => resultDomain.endsWith(domain)) || resultDomain.includes('zhihu')) {
continue;
}
resultUrls.push(result.url);
results.set(result.url, {
title: result.title,
url: result.url,
snippet: result.content,
source: result.engine,
crawlStatus: 'Pending',
score: result.score
});
fetchedResultsCount++;
if (fetchedResultsCount >= pageCount) {
break;
}
}
pageIndex++;
if (jsonResults.length === 0) {
break; // 如果没有更多结果,退出循环
}
}
return { resultUrls, results };
};