add env to check internal ip (#4187)
* fix: ts * update jieba package * add env to check internal ip * package * fix: jieba * reset package * update config * fix: jieba package * init shell * init version * change team reload
This commit is contained in:
parent
9d43edb75c
commit
02813f3a47
1
.npmrc
1
.npmrc
@ -1,3 +1,4 @@
|
||||
public-hoist-pattern[]=*tiktoken*
|
||||
public-hoist-pattern[]=*@zilliz/milvus2-sdk-node*
|
||||
public-hoist-pattern[]=*@node-rs/jieba*
|
||||
registry=https://registry.npmjs.org/
|
||||
@ -6,4 +6,5 @@ docSite/
|
||||
*.md
|
||||
|
||||
pnpm-lock.yaml
|
||||
cl100l_base.ts
|
||||
cl100l_base.ts
|
||||
dict.json
|
||||
@ -7,6 +7,26 @@ toc: true
|
||||
weight: 799
|
||||
---
|
||||
|
||||
## 更新指南
|
||||
|
||||
### 1. 做好数据库备份
|
||||
|
||||
### 2. 更新镜像和 PG 容器
|
||||
|
||||
### 3. 执行升级脚本
|
||||
|
||||
从任意终端,发起 1 个 HTTP 请求。其中 {{rootkey}} 替换成环境变量里的 `rootkey`;{{host}} 替换成**FastGPT 域名**。
|
||||
|
||||
```bash
|
||||
curl --location --request POST 'https://{{host}}/api/admin/initv491' \
|
||||
--header 'rootkey: {{rootkey}}' \
|
||||
--header 'Content-Type: application/json'
|
||||
```
|
||||
|
||||
**脚本功能**
|
||||
|
||||
重新使用最新的 jieba 分词库进行分词处理。
|
||||
|
||||
## 🚀 新增内容
|
||||
|
||||
1. 商业版支持单团队模式,更好的管理内部成员。
|
||||
@ -23,6 +43,7 @@ weight: 799
|
||||
3. 增加依赖包安全版本检测,并升级部分依赖包。
|
||||
4. 模型测试代码。
|
||||
5. 优化思考过程解析逻辑:只要配置了模型支持思考,均会解析 <think> 标签,不会因为对话时,关闭思考而不解析。
|
||||
6. 载入最新 jieba 分词库,增强全文检索分词效果。
|
||||
|
||||
## 🐛 修复
|
||||
|
||||
|
||||
3
packages/service/common/string/jieba/dict.json
Normal file
3
packages/service/common/string/jieba/dict.json
Normal file
File diff suppressed because one or more lines are too long
@ -1,13 +1,13 @@
|
||||
import { Jieba } from '@node-rs/jieba';
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
|
||||
// 使用 require.resolve 获取包的路径,然后拼接字典文件路径
|
||||
const jiebaPath = path.dirname(require.resolve('@node-rs/jieba/package.json'));
|
||||
const dictPath = path.join(jiebaPath, 'dict.txt');
|
||||
let jieba: Jieba | undefined;
|
||||
|
||||
// 使用正确的文件路径加载字典
|
||||
const jieba = Jieba.withDict(fs.readFileSync(dictPath));
|
||||
(async () => {
|
||||
const dictData = await import('./dict.json');
|
||||
// @ts-ignore
|
||||
const dictBuffer = Buffer.from(dictData.dict?.replace(/\\n/g, '\n'), 'utf-8');
|
||||
jieba = Jieba.withDict(dictBuffer);
|
||||
})();
|
||||
|
||||
const stopWords = new Set([
|
||||
'--',
|
||||
@ -1519,7 +1519,9 @@ const stopWords = new Set([
|
||||
]);
|
||||
|
||||
export async function jiebaSplit({ text }: { text: string }) {
|
||||
const tokens = (await jieba.cutAsync(text, true)) as string[];
|
||||
text = text.replace(/[#*`_~>[\](){}|]/g, '').replace(/\S*https?\S*/gi, '');
|
||||
|
||||
const tokens = (await jieba!.cutAsync(text, true)) as string[];
|
||||
|
||||
return (
|
||||
tokens
|
||||
@ -30,6 +30,8 @@ export const isInternalAddress = (url: string): boolean => {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (process.env.CHECK_INTERNAL_IP !== 'true') return false;
|
||||
|
||||
// For IP addresses, check if they are internal
|
||||
const ipv4Pattern = /^(\d{1,3}\.){3}\d{1,3}$/;
|
||||
if (!ipv4Pattern.test(hostname)) {
|
||||
|
||||
@ -41,7 +41,7 @@ try {
|
||||
}
|
||||
);
|
||||
DatasetDataTextSchema.index({ teamId: 1, datasetId: 1, collectionId: 1 });
|
||||
DatasetDataTextSchema.index({ dataId: 1 }, { unique: true });
|
||||
DatasetDataTextSchema.index({ dataId: 'hashed' });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
|
||||
@ -86,7 +86,8 @@ const DatasetDataSchema = new Schema({
|
||||
|
||||
// Abandon
|
||||
fullTextToken: String,
|
||||
initFullText: Boolean
|
||||
initFullText: Boolean,
|
||||
initJieba: Boolean
|
||||
});
|
||||
|
||||
try {
|
||||
@ -103,6 +104,9 @@ try {
|
||||
DatasetDataSchema.index({ updateTime: 1 });
|
||||
// rebuild data
|
||||
DatasetDataSchema.index({ rebuilding: 1, teamId: 1, datasetId: 1 });
|
||||
|
||||
// 为查询 initJieba 字段不存在的数据添加索引
|
||||
DatasetDataSchema.index({ initJieba: 1 }, { sparse: true });
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
|
||||
@ -16,7 +16,7 @@ import { reRankRecall } from '../../../core/ai/rerank';
|
||||
import { countPromptTokens } from '../../../common/string/tiktoken/index';
|
||||
import { datasetSearchResultConcat } from '@fastgpt/global/core/dataset/search/utils';
|
||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||
import { jiebaSplit } from '../../../common/string/jieba';
|
||||
import { jiebaSplit } from '../../../common/string/jieba/index';
|
||||
import { getCollectionSourceData } from '@fastgpt/global/core/dataset/collection/utils';
|
||||
import { Types } from '../../../common/mongo';
|
||||
import json5 from 'json5';
|
||||
|
||||
@ -52,6 +52,8 @@ USE_IP_LIMIT=false
|
||||
WORKFLOW_MAX_RUN_TIMES=500
|
||||
# 循环最大运行次数,避免极端的死循环情况
|
||||
WORKFLOW_MAX_LOOP_TIMES=50
|
||||
# 启用内网 IP 检查
|
||||
CHECK_INTERNAL_IP=false
|
||||
|
||||
# 对话日志推送服务
|
||||
# # 日志服务地址
|
||||
|
||||
@ -84,7 +84,8 @@ const nextConfig = {
|
||||
'mongoose',
|
||||
'pg',
|
||||
'@zilliz/milvus2-sdk-node',
|
||||
"tiktoken"
|
||||
"tiktoken",
|
||||
"@node-rs/jieba"
|
||||
],
|
||||
outputFileTracingRoot: path.join(__dirname, '../../'),
|
||||
instrumentationHook: true
|
||||
|
||||
@ -1,14 +1,12 @@
|
||||
import React, { useMemo } from 'react';
|
||||
import { Box, ButtonProps, Flex } from '@chakra-ui/react';
|
||||
import { Box, ButtonProps } from '@chakra-ui/react';
|
||||
import { useUserStore } from '@/web/support/user/useUserStore';
|
||||
import { useTranslation } from 'next-i18next';
|
||||
import Avatar from '@fastgpt/web/components/common/Avatar';
|
||||
import { getTeamList, putSwitchTeam } from '@/web/support/user/team/api';
|
||||
import { TeamMemberStatusEnum } from '@fastgpt/global/support/user/team/constant';
|
||||
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
|
||||
import MySelect from '@fastgpt/web/components/common/MySelect';
|
||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
||||
import { useRouter } from 'next/router';
|
||||
|
||||
const TeamSelector = ({
|
||||
@ -21,7 +19,7 @@ const TeamSelector = ({
|
||||
}) => {
|
||||
const { t } = useTranslation();
|
||||
const router = useRouter();
|
||||
const { userInfo, initUserInfo } = useUserStore();
|
||||
const { userInfo } = useUserStore();
|
||||
const { setLoading } = useSystemStore();
|
||||
|
||||
const { data: myTeams = [] } = useRequest2(() => getTeamList(TeamMemberStatusEnum.active), {
|
||||
@ -33,12 +31,11 @@ const TeamSelector = ({
|
||||
async (teamId: string) => {
|
||||
setLoading(true);
|
||||
await putSwitchTeam(teamId);
|
||||
return initUserInfo();
|
||||
},
|
||||
{
|
||||
onFinally: () => {
|
||||
router.reload();
|
||||
setLoading(false);
|
||||
onChange?.();
|
||||
},
|
||||
errorToast: t('common:user.team.Switch Team Failed')
|
||||
}
|
||||
|
||||
@ -277,7 +277,7 @@ const MyInfo = ({ onOpenContact }: { onOpenContact: () => void }) => {
|
||||
<Flex mt={6} alignItems={'center'}>
|
||||
<Box {...labelStyles}>{t('account_info:user_team_team_name')}: </Box>
|
||||
<Flex flex={'1 0 0'} w={0} align={'center'}>
|
||||
<TeamSelector height={'28px'} w={'100%'} showManage onChange={initUserInfo} />
|
||||
<TeamSelector height={'28px'} w={'100%'} showManage />
|
||||
</Flex>
|
||||
</Flex>
|
||||
)}
|
||||
|
||||
@ -48,10 +48,7 @@ const Team = () => {
|
||||
const { t } = useTranslation();
|
||||
const { userInfo } = useUserStore();
|
||||
|
||||
const { setEditTeamData, isLoading, teamSize, refetchMembers } = useContextSelector(
|
||||
TeamContext,
|
||||
(v) => v
|
||||
);
|
||||
const { setEditTeamData, isLoading, teamSize } = useContextSelector(TeamContext, (v) => v);
|
||||
|
||||
const Tabs = useMemo(
|
||||
() => (
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { delay } from '@fastgpt/global/common/system/utils';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
import { jiebaSplit } from '@fastgpt/service/common/string/jieba';
|
||||
import { jiebaSplit } from '@fastgpt/service/common/string/jieba/index';
|
||||
import { MongoDatasetDataText } from '@fastgpt/service/core/dataset/data/dataTextSchema';
|
||||
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
|
||||
64
projects/app/src/pages/api/admin/initv491.ts
Normal file
64
projects/app/src/pages/api/admin/initv491.ts
Normal file
@ -0,0 +1,64 @@
|
||||
import { NextAPI } from '@/service/middleware/entry';
|
||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||
import { NextApiRequest, NextApiResponse } from 'next';
|
||||
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||
import { jiebaSplit } from '@fastgpt/service/common/string/jieba';
|
||||
import { addLog } from '@fastgpt/service/common/system/log';
|
||||
import { delay } from '@fastgpt/global/common/system/utils';
|
||||
import { MongoDatasetDataText } from '@fastgpt/service/core/dataset/data/dataTextSchema';
|
||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||
|
||||
const updateData = async () => {
|
||||
let success = 0;
|
||||
while (true) {
|
||||
try {
|
||||
const data = await MongoDatasetData.find({ initJieba: { $exists: false } }).limit(100);
|
||||
if (data.length === 0) {
|
||||
console.log('更新分词完成');
|
||||
break;
|
||||
}
|
||||
|
||||
await Promise.allSettled(
|
||||
data.map(async (item) => {
|
||||
const text = `${item.q} ${item.a}`.trim();
|
||||
|
||||
try {
|
||||
await mongoSessionRun(async (session) => {
|
||||
await MongoDatasetDataText.updateOne(
|
||||
{
|
||||
dataId: item._id
|
||||
},
|
||||
{
|
||||
fullTextToken: await jiebaSplit({ text })
|
||||
},
|
||||
{
|
||||
session
|
||||
}
|
||||
);
|
||||
// @ts-ignore
|
||||
item.initJieba = true;
|
||||
await item.save({ session });
|
||||
});
|
||||
} catch (error) {
|
||||
console.log(error);
|
||||
}
|
||||
})
|
||||
);
|
||||
success += data.length;
|
||||
console.log(`成功 ${success}`);
|
||||
} catch (error) {
|
||||
addLog.error('更新所有旧的 jieba 分词失败', error);
|
||||
await delay(1000);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
async function handler(req: NextApiRequest, _res: NextApiResponse) {
|
||||
await authCert({ req, authRoot: true });
|
||||
|
||||
console.log('更新所有旧的 jieba 分词');
|
||||
updateData();
|
||||
return { success: true };
|
||||
}
|
||||
|
||||
export default NextAPI(handler);
|
||||
@ -6,7 +6,7 @@ import {
|
||||
} from '@fastgpt/global/core/dataset/controller';
|
||||
import { insertDatasetDataVector } from '@fastgpt/service/common/vectorStore/controller';
|
||||
import { getDefaultIndex } from '@fastgpt/global/core/dataset/utils';
|
||||
import { jiebaSplit } from '@fastgpt/service/common/string/jieba';
|
||||
import { jiebaSplit } from '@fastgpt/service/common/string/jieba/index';
|
||||
import { deleteDatasetDataVector } from '@fastgpt/service/common/vectorStore/controller';
|
||||
import { DatasetDataIndexItemType, DatasetDataItemType } from '@fastgpt/global/core/dataset/type';
|
||||
import { getEmbeddingModel } from '@fastgpt/service/core/ai/model';
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user