add env to check internal ip (#4187)

* fix: ts

* update jieba package

* add env to check internal ip

* package

* fix: jieba

* reset package

* update config

* fix: jieba package

* init shell

* init version

* change team reload
This commit is contained in:
Archer 2025-03-17 18:21:27 +08:00 committed by GitHub
parent d5e7751f2c
commit 5c9cd15d6f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 121 additions and 26 deletions

1
.npmrc
View File

@ -1,3 +1,4 @@
public-hoist-pattern[]=*tiktoken*
public-hoist-pattern[]=*@zilliz/milvus2-sdk-node*
public-hoist-pattern[]=*@node-rs/jieba*
registry=https://registry.npmjs.org/

View File

@ -7,3 +7,4 @@ docSite/
pnpm-lock.yaml
cl100l_base.ts
dict.json

View File

@ -7,6 +7,26 @@ toc: true
weight: 799
---
## 更新指南
### 1. 做好数据库备份
### 2. 更新镜像和 PG 容器
### 3. 执行升级脚本
从任意终端,发起 1 个 HTTP 请求。其中 {{rootkey}} 替换成环境变量里的 `rootkey`{{host}} 替换成**FastGPT 域名**。
```bash
curl --location --request POST 'https://{{host}}/api/admin/initv491' \
--header 'rootkey: {{rootkey}}' \
--header 'Content-Type: application/json'
```
**脚本功能**
重新使用最新的 jieba 分词库进行分词处理。
## 🚀 新增内容
1. 商业版支持单团队模式,更好的管理内部成员。
@ -23,6 +43,7 @@ weight: 799
3. 增加依赖包安全版本检测,并升级部分依赖包。
4. 模型测试代码。
5. 优化思考过程解析逻辑:只要配置了模型支持思考,均会解析 <think> 标签,不会因为对话时,关闭思考而不解析。
6. 载入最新 jieba 分词库,增强全文检索分词效果。
## 🐛 修复

File diff suppressed because one or more lines are too long

View File

@ -1,13 +1,13 @@
import { Jieba } from '@node-rs/jieba';
import fs from 'fs';
import path from 'path';
// 使用 require.resolve 获取包的路径,然后拼接字典文件路径
const jiebaPath = path.dirname(require.resolve('@node-rs/jieba/package.json'));
const dictPath = path.join(jiebaPath, 'dict.txt');
let jieba: Jieba | undefined;
// 使用正确的文件路径加载字典
const jieba = Jieba.withDict(fs.readFileSync(dictPath));
(async () => {
const dictData = await import('./dict.json');
// @ts-ignore
const dictBuffer = Buffer.from(dictData.dict?.replace(/\\n/g, '\n'), 'utf-8');
jieba = Jieba.withDict(dictBuffer);
})();
const stopWords = new Set([
'--',
@ -1519,7 +1519,9 @@ const stopWords = new Set([
]);
export async function jiebaSplit({ text }: { text: string }) {
const tokens = (await jieba.cutAsync(text, true)) as string[];
text = text.replace(/[#*`_~>[\](){}|]/g, '').replace(/\S*https?\S*/gi, '');
const tokens = (await jieba!.cutAsync(text, true)) as string[];
return (
tokens

View File

@ -30,6 +30,8 @@ export const isInternalAddress = (url: string): boolean => {
return true;
}
if (process.env.CHECK_INTERNAL_IP !== 'true') return false;
// For IP addresses, check if they are internal
const ipv4Pattern = /^(\d{1,3}\.){3}\d{1,3}$/;
if (!ipv4Pattern.test(hostname)) {

View File

@ -41,7 +41,7 @@ try {
}
);
DatasetDataTextSchema.index({ teamId: 1, datasetId: 1, collectionId: 1 });
DatasetDataTextSchema.index({ dataId: 1 }, { unique: true });
DatasetDataTextSchema.index({ dataId: 'hashed' });
} catch (error) {
console.log(error);
}

View File

@ -86,7 +86,8 @@ const DatasetDataSchema = new Schema({
// Abandon
fullTextToken: String,
initFullText: Boolean
initFullText: Boolean,
initJieba: Boolean
});
try {
@ -103,6 +104,9 @@ try {
DatasetDataSchema.index({ updateTime: 1 });
// rebuild data
DatasetDataSchema.index({ rebuilding: 1, teamId: 1, datasetId: 1 });
// 为查询 initJieba 字段不存在的数据添加索引
DatasetDataSchema.index({ initJieba: 1 }, { sparse: true });
} catch (error) {
console.log(error);
}

View File

@ -16,7 +16,7 @@ import { reRankRecall } from '../../../core/ai/rerank';
import { countPromptTokens } from '../../../common/string/tiktoken/index';
import { datasetSearchResultConcat } from '@fastgpt/global/core/dataset/search/utils';
import { hashStr } from '@fastgpt/global/common/string/tools';
import { jiebaSplit } from '../../../common/string/jieba';
import { jiebaSplit } from '../../../common/string/jieba/index';
import { getCollectionSourceData } from '@fastgpt/global/core/dataset/collection/utils';
import { Types } from '../../../common/mongo';
import json5 from 'json5';

View File

@ -52,6 +52,8 @@ USE_IP_LIMIT=false
WORKFLOW_MAX_RUN_TIMES=500
# 循环最大运行次数,避免极端的死循环情况
WORKFLOW_MAX_LOOP_TIMES=50
# 启用内网 IP 检查
CHECK_INTERNAL_IP=false
# 对话日志推送服务
# # 日志服务地址

View File

@ -84,7 +84,8 @@ const nextConfig = {
'mongoose',
'pg',
'@zilliz/milvus2-sdk-node',
"tiktoken"
"tiktoken",
"@node-rs/jieba"
],
outputFileTracingRoot: path.join(__dirname, '../../'),
instrumentationHook: true

View File

@ -1,14 +1,12 @@
import React, { useMemo } from 'react';
import { Box, ButtonProps, Flex } from '@chakra-ui/react';
import { Box, ButtonProps } from '@chakra-ui/react';
import { useUserStore } from '@/web/support/user/useUserStore';
import { useTranslation } from 'next-i18next';
import Avatar from '@fastgpt/web/components/common/Avatar';
import { getTeamList, putSwitchTeam } from '@/web/support/user/team/api';
import { TeamMemberStatusEnum } from '@fastgpt/global/support/user/team/constant';
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
import MySelect from '@fastgpt/web/components/common/MySelect';
import { useSystemStore } from '@/web/common/system/useSystemStore';
import MyIcon from '@fastgpt/web/components/common/Icon';
import { useRouter } from 'next/router';
const TeamSelector = ({
@ -21,7 +19,7 @@ const TeamSelector = ({
}) => {
const { t } = useTranslation();
const router = useRouter();
const { userInfo, initUserInfo } = useUserStore();
const { userInfo } = useUserStore();
const { setLoading } = useSystemStore();
const { data: myTeams = [] } = useRequest2(() => getTeamList(TeamMemberStatusEnum.active), {
@ -33,12 +31,11 @@ const TeamSelector = ({
async (teamId: string) => {
setLoading(true);
await putSwitchTeam(teamId);
return initUserInfo();
},
{
onFinally: () => {
router.reload();
setLoading(false);
onChange?.();
},
errorToast: t('common:user.team.Switch Team Failed')
}

View File

@ -277,7 +277,7 @@ const MyInfo = ({ onOpenContact }: { onOpenContact: () => void }) => {
<Flex mt={6} alignItems={'center'}>
<Box {...labelStyles}>{t('account_info:user_team_team_name')}:&nbsp;</Box>
<Flex flex={'1 0 0'} w={0} align={'center'}>
<TeamSelector height={'28px'} w={'100%'} showManage onChange={initUserInfo} />
<TeamSelector height={'28px'} w={'100%'} showManage />
</Flex>
</Flex>
)}

View File

@ -48,10 +48,7 @@ const Team = () => {
const { t } = useTranslation();
const { userInfo } = useUserStore();
const { setEditTeamData, isLoading, teamSize, refetchMembers } = useContextSelector(
TeamContext,
(v) => v
);
const { setEditTeamData, isLoading, teamSize } = useContextSelector(TeamContext, (v) => v);
const Tabs = useMemo(
() => (

View File

@ -1,7 +1,7 @@
import { NextAPI } from '@/service/middleware/entry';
import { delay } from '@fastgpt/global/common/system/utils';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { jiebaSplit } from '@fastgpt/service/common/string/jieba';
import { jiebaSplit } from '@fastgpt/service/common/string/jieba/index';
import { MongoDatasetDataText } from '@fastgpt/service/core/dataset/data/dataTextSchema';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { authCert } from '@fastgpt/service/support/permission/auth/common';

View File

@ -0,0 +1,64 @@
import { NextAPI } from '@/service/middleware/entry';
import { authCert } from '@fastgpt/service/support/permission/auth/common';
import { NextApiRequest, NextApiResponse } from 'next';
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
import { jiebaSplit } from '@fastgpt/service/common/string/jieba';
import { addLog } from '@fastgpt/service/common/system/log';
import { delay } from '@fastgpt/global/common/system/utils';
import { MongoDatasetDataText } from '@fastgpt/service/core/dataset/data/dataTextSchema';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
const updateData = async () => {
let success = 0;
while (true) {
try {
const data = await MongoDatasetData.find({ initJieba: { $exists: false } }).limit(100);
if (data.length === 0) {
console.log('更新分词完成');
break;
}
await Promise.allSettled(
data.map(async (item) => {
const text = `${item.q} ${item.a}`.trim();
try {
await mongoSessionRun(async (session) => {
await MongoDatasetDataText.updateOne(
{
dataId: item._id
},
{
fullTextToken: await jiebaSplit({ text })
},
{
session
}
);
// @ts-ignore
item.initJieba = true;
await item.save({ session });
});
} catch (error) {
console.log(error);
}
})
);
success += data.length;
console.log(`成功 ${success}`);
} catch (error) {
addLog.error('更新所有旧的 jieba 分词失败', error);
await delay(1000);
}
}
};
async function handler(req: NextApiRequest, _res: NextApiResponse) {
await authCert({ req, authRoot: true });
console.log('更新所有旧的 jieba 分词');
updateData();
return { success: true };
}
export default NextAPI(handler);

View File

@ -6,7 +6,7 @@ import {
} from '@fastgpt/global/core/dataset/controller';
import { insertDatasetDataVector } from '@fastgpt/service/common/vectorStore/controller';
import { getDefaultIndex } from '@fastgpt/global/core/dataset/utils';
import { jiebaSplit } from '@fastgpt/service/common/string/jieba';
import { jiebaSplit } from '@fastgpt/service/common/string/jieba/index';
import { deleteDatasetDataVector } from '@fastgpt/service/common/vectorStore/controller';
import { DatasetDataIndexItemType, DatasetDataItemType } from '@fastgpt/global/core/dataset/type';
import { getEmbeddingModel } from '@fastgpt/service/core/ai/model';