add env to check internal ip (#4187)
* fix: ts * update jieba package * add env to check internal ip * package * fix: jieba * reset package * update config * fix: jieba package * init shell * init version * change team reload
This commit is contained in:
parent
9d43edb75c
commit
02813f3a47
1
.npmrc
1
.npmrc
@ -1,3 +1,4 @@
|
|||||||
public-hoist-pattern[]=*tiktoken*
|
public-hoist-pattern[]=*tiktoken*
|
||||||
public-hoist-pattern[]=*@zilliz/milvus2-sdk-node*
|
public-hoist-pattern[]=*@zilliz/milvus2-sdk-node*
|
||||||
|
public-hoist-pattern[]=*@node-rs/jieba*
|
||||||
registry=https://registry.npmjs.org/
|
registry=https://registry.npmjs.org/
|
||||||
@ -7,3 +7,4 @@ docSite/
|
|||||||
|
|
||||||
pnpm-lock.yaml
|
pnpm-lock.yaml
|
||||||
cl100l_base.ts
|
cl100l_base.ts
|
||||||
|
dict.json
|
||||||
@ -7,6 +7,26 @@ toc: true
|
|||||||
weight: 799
|
weight: 799
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## 更新指南
|
||||||
|
|
||||||
|
### 1. 做好数据库备份
|
||||||
|
|
||||||
|
### 2. 更新镜像和 PG 容器
|
||||||
|
|
||||||
|
### 3. 执行升级脚本
|
||||||
|
|
||||||
|
从任意终端,发起 1 个 HTTP 请求。其中 {{rootkey}} 替换成环境变量里的 `rootkey`;{{host}} 替换成**FastGPT 域名**。
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl --location --request POST 'https://{{host}}/api/admin/initv491' \
|
||||||
|
--header 'rootkey: {{rootkey}}' \
|
||||||
|
--header 'Content-Type: application/json'
|
||||||
|
```
|
||||||
|
|
||||||
|
**脚本功能**
|
||||||
|
|
||||||
|
重新使用最新的 jieba 分词库进行分词处理。
|
||||||
|
|
||||||
## 🚀 新增内容
|
## 🚀 新增内容
|
||||||
|
|
||||||
1. 商业版支持单团队模式,更好的管理内部成员。
|
1. 商业版支持单团队模式,更好的管理内部成员。
|
||||||
@ -23,6 +43,7 @@ weight: 799
|
|||||||
3. 增加依赖包安全版本检测,并升级部分依赖包。
|
3. 增加依赖包安全版本检测,并升级部分依赖包。
|
||||||
4. 模型测试代码。
|
4. 模型测试代码。
|
||||||
5. 优化思考过程解析逻辑:只要配置了模型支持思考,均会解析 <think> 标签,不会因为对话时,关闭思考而不解析。
|
5. 优化思考过程解析逻辑:只要配置了模型支持思考,均会解析 <think> 标签,不会因为对话时,关闭思考而不解析。
|
||||||
|
6. 载入最新 jieba 分词库,增强全文检索分词效果。
|
||||||
|
|
||||||
## 🐛 修复
|
## 🐛 修复
|
||||||
|
|
||||||
|
|||||||
3
packages/service/common/string/jieba/dict.json
Normal file
3
packages/service/common/string/jieba/dict.json
Normal file
File diff suppressed because one or more lines are too long
@ -1,13 +1,13 @@
|
|||||||
import { Jieba } from '@node-rs/jieba';
|
import { Jieba } from '@node-rs/jieba';
|
||||||
import fs from 'fs';
|
|
||||||
import path from 'path';
|
|
||||||
|
|
||||||
// 使用 require.resolve 获取包的路径,然后拼接字典文件路径
|
let jieba: Jieba | undefined;
|
||||||
const jiebaPath = path.dirname(require.resolve('@node-rs/jieba/package.json'));
|
|
||||||
const dictPath = path.join(jiebaPath, 'dict.txt');
|
|
||||||
|
|
||||||
// 使用正确的文件路径加载字典
|
(async () => {
|
||||||
const jieba = Jieba.withDict(fs.readFileSync(dictPath));
|
const dictData = await import('./dict.json');
|
||||||
|
// @ts-ignore
|
||||||
|
const dictBuffer = Buffer.from(dictData.dict?.replace(/\\n/g, '\n'), 'utf-8');
|
||||||
|
jieba = Jieba.withDict(dictBuffer);
|
||||||
|
})();
|
||||||
|
|
||||||
const stopWords = new Set([
|
const stopWords = new Set([
|
||||||
'--',
|
'--',
|
||||||
@ -1519,7 +1519,9 @@ const stopWords = new Set([
|
|||||||
]);
|
]);
|
||||||
|
|
||||||
export async function jiebaSplit({ text }: { text: string }) {
|
export async function jiebaSplit({ text }: { text: string }) {
|
||||||
const tokens = (await jieba.cutAsync(text, true)) as string[];
|
text = text.replace(/[#*`_~>[\](){}|]/g, '').replace(/\S*https?\S*/gi, '');
|
||||||
|
|
||||||
|
const tokens = (await jieba!.cutAsync(text, true)) as string[];
|
||||||
|
|
||||||
return (
|
return (
|
||||||
tokens
|
tokens
|
||||||
@ -30,6 +30,8 @@ export const isInternalAddress = (url: string): boolean => {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (process.env.CHECK_INTERNAL_IP !== 'true') return false;
|
||||||
|
|
||||||
// For IP addresses, check if they are internal
|
// For IP addresses, check if they are internal
|
||||||
const ipv4Pattern = /^(\d{1,3}\.){3}\d{1,3}$/;
|
const ipv4Pattern = /^(\d{1,3}\.){3}\d{1,3}$/;
|
||||||
if (!ipv4Pattern.test(hostname)) {
|
if (!ipv4Pattern.test(hostname)) {
|
||||||
|
|||||||
@ -41,7 +41,7 @@ try {
|
|||||||
}
|
}
|
||||||
);
|
);
|
||||||
DatasetDataTextSchema.index({ teamId: 1, datasetId: 1, collectionId: 1 });
|
DatasetDataTextSchema.index({ teamId: 1, datasetId: 1, collectionId: 1 });
|
||||||
DatasetDataTextSchema.index({ dataId: 1 }, { unique: true });
|
DatasetDataTextSchema.index({ dataId: 'hashed' });
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.log(error);
|
console.log(error);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -86,7 +86,8 @@ const DatasetDataSchema = new Schema({
|
|||||||
|
|
||||||
// Abandon
|
// Abandon
|
||||||
fullTextToken: String,
|
fullTextToken: String,
|
||||||
initFullText: Boolean
|
initFullText: Boolean,
|
||||||
|
initJieba: Boolean
|
||||||
});
|
});
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@ -103,6 +104,9 @@ try {
|
|||||||
DatasetDataSchema.index({ updateTime: 1 });
|
DatasetDataSchema.index({ updateTime: 1 });
|
||||||
// rebuild data
|
// rebuild data
|
||||||
DatasetDataSchema.index({ rebuilding: 1, teamId: 1, datasetId: 1 });
|
DatasetDataSchema.index({ rebuilding: 1, teamId: 1, datasetId: 1 });
|
||||||
|
|
||||||
|
// 为查询 initJieba 字段不存在的数据添加索引
|
||||||
|
DatasetDataSchema.index({ initJieba: 1 }, { sparse: true });
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.log(error);
|
console.log(error);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -16,7 +16,7 @@ import { reRankRecall } from '../../../core/ai/rerank';
|
|||||||
import { countPromptTokens } from '../../../common/string/tiktoken/index';
|
import { countPromptTokens } from '../../../common/string/tiktoken/index';
|
||||||
import { datasetSearchResultConcat } from '@fastgpt/global/core/dataset/search/utils';
|
import { datasetSearchResultConcat } from '@fastgpt/global/core/dataset/search/utils';
|
||||||
import { hashStr } from '@fastgpt/global/common/string/tools';
|
import { hashStr } from '@fastgpt/global/common/string/tools';
|
||||||
import { jiebaSplit } from '../../../common/string/jieba';
|
import { jiebaSplit } from '../../../common/string/jieba/index';
|
||||||
import { getCollectionSourceData } from '@fastgpt/global/core/dataset/collection/utils';
|
import { getCollectionSourceData } from '@fastgpt/global/core/dataset/collection/utils';
|
||||||
import { Types } from '../../../common/mongo';
|
import { Types } from '../../../common/mongo';
|
||||||
import json5 from 'json5';
|
import json5 from 'json5';
|
||||||
|
|||||||
@ -52,6 +52,8 @@ USE_IP_LIMIT=false
|
|||||||
WORKFLOW_MAX_RUN_TIMES=500
|
WORKFLOW_MAX_RUN_TIMES=500
|
||||||
# 循环最大运行次数,避免极端的死循环情况
|
# 循环最大运行次数,避免极端的死循环情况
|
||||||
WORKFLOW_MAX_LOOP_TIMES=50
|
WORKFLOW_MAX_LOOP_TIMES=50
|
||||||
|
# 启用内网 IP 检查
|
||||||
|
CHECK_INTERNAL_IP=false
|
||||||
|
|
||||||
# 对话日志推送服务
|
# 对话日志推送服务
|
||||||
# # 日志服务地址
|
# # 日志服务地址
|
||||||
|
|||||||
@ -84,7 +84,8 @@ const nextConfig = {
|
|||||||
'mongoose',
|
'mongoose',
|
||||||
'pg',
|
'pg',
|
||||||
'@zilliz/milvus2-sdk-node',
|
'@zilliz/milvus2-sdk-node',
|
||||||
"tiktoken"
|
"tiktoken",
|
||||||
|
"@node-rs/jieba"
|
||||||
],
|
],
|
||||||
outputFileTracingRoot: path.join(__dirname, '../../'),
|
outputFileTracingRoot: path.join(__dirname, '../../'),
|
||||||
instrumentationHook: true
|
instrumentationHook: true
|
||||||
|
|||||||
@ -1,14 +1,12 @@
|
|||||||
import React, { useMemo } from 'react';
|
import React, { useMemo } from 'react';
|
||||||
import { Box, ButtonProps, Flex } from '@chakra-ui/react';
|
import { Box, ButtonProps } from '@chakra-ui/react';
|
||||||
import { useUserStore } from '@/web/support/user/useUserStore';
|
import { useUserStore } from '@/web/support/user/useUserStore';
|
||||||
import { useTranslation } from 'next-i18next';
|
import { useTranslation } from 'next-i18next';
|
||||||
import Avatar from '@fastgpt/web/components/common/Avatar';
|
|
||||||
import { getTeamList, putSwitchTeam } from '@/web/support/user/team/api';
|
import { getTeamList, putSwitchTeam } from '@/web/support/user/team/api';
|
||||||
import { TeamMemberStatusEnum } from '@fastgpt/global/support/user/team/constant';
|
import { TeamMemberStatusEnum } from '@fastgpt/global/support/user/team/constant';
|
||||||
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
|
import { useRequest2 } from '@fastgpt/web/hooks/useRequest';
|
||||||
import MySelect from '@fastgpt/web/components/common/MySelect';
|
import MySelect from '@fastgpt/web/components/common/MySelect';
|
||||||
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
import { useSystemStore } from '@/web/common/system/useSystemStore';
|
||||||
import MyIcon from '@fastgpt/web/components/common/Icon';
|
|
||||||
import { useRouter } from 'next/router';
|
import { useRouter } from 'next/router';
|
||||||
|
|
||||||
const TeamSelector = ({
|
const TeamSelector = ({
|
||||||
@ -21,7 +19,7 @@ const TeamSelector = ({
|
|||||||
}) => {
|
}) => {
|
||||||
const { t } = useTranslation();
|
const { t } = useTranslation();
|
||||||
const router = useRouter();
|
const router = useRouter();
|
||||||
const { userInfo, initUserInfo } = useUserStore();
|
const { userInfo } = useUserStore();
|
||||||
const { setLoading } = useSystemStore();
|
const { setLoading } = useSystemStore();
|
||||||
|
|
||||||
const { data: myTeams = [] } = useRequest2(() => getTeamList(TeamMemberStatusEnum.active), {
|
const { data: myTeams = [] } = useRequest2(() => getTeamList(TeamMemberStatusEnum.active), {
|
||||||
@ -33,12 +31,11 @@ const TeamSelector = ({
|
|||||||
async (teamId: string) => {
|
async (teamId: string) => {
|
||||||
setLoading(true);
|
setLoading(true);
|
||||||
await putSwitchTeam(teamId);
|
await putSwitchTeam(teamId);
|
||||||
return initUserInfo();
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
onFinally: () => {
|
onFinally: () => {
|
||||||
|
router.reload();
|
||||||
setLoading(false);
|
setLoading(false);
|
||||||
onChange?.();
|
|
||||||
},
|
},
|
||||||
errorToast: t('common:user.team.Switch Team Failed')
|
errorToast: t('common:user.team.Switch Team Failed')
|
||||||
}
|
}
|
||||||
|
|||||||
@ -277,7 +277,7 @@ const MyInfo = ({ onOpenContact }: { onOpenContact: () => void }) => {
|
|||||||
<Flex mt={6} alignItems={'center'}>
|
<Flex mt={6} alignItems={'center'}>
|
||||||
<Box {...labelStyles}>{t('account_info:user_team_team_name')}: </Box>
|
<Box {...labelStyles}>{t('account_info:user_team_team_name')}: </Box>
|
||||||
<Flex flex={'1 0 0'} w={0} align={'center'}>
|
<Flex flex={'1 0 0'} w={0} align={'center'}>
|
||||||
<TeamSelector height={'28px'} w={'100%'} showManage onChange={initUserInfo} />
|
<TeamSelector height={'28px'} w={'100%'} showManage />
|
||||||
</Flex>
|
</Flex>
|
||||||
</Flex>
|
</Flex>
|
||||||
)}
|
)}
|
||||||
|
|||||||
@ -48,10 +48,7 @@ const Team = () => {
|
|||||||
const { t } = useTranslation();
|
const { t } = useTranslation();
|
||||||
const { userInfo } = useUserStore();
|
const { userInfo } = useUserStore();
|
||||||
|
|
||||||
const { setEditTeamData, isLoading, teamSize, refetchMembers } = useContextSelector(
|
const { setEditTeamData, isLoading, teamSize } = useContextSelector(TeamContext, (v) => v);
|
||||||
TeamContext,
|
|
||||||
(v) => v
|
|
||||||
);
|
|
||||||
|
|
||||||
const Tabs = useMemo(
|
const Tabs = useMemo(
|
||||||
() => (
|
() => (
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
import { NextAPI } from '@/service/middleware/entry';
|
import { NextAPI } from '@/service/middleware/entry';
|
||||||
import { delay } from '@fastgpt/global/common/system/utils';
|
import { delay } from '@fastgpt/global/common/system/utils';
|
||||||
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||||
import { jiebaSplit } from '@fastgpt/service/common/string/jieba';
|
import { jiebaSplit } from '@fastgpt/service/common/string/jieba/index';
|
||||||
import { MongoDatasetDataText } from '@fastgpt/service/core/dataset/data/dataTextSchema';
|
import { MongoDatasetDataText } from '@fastgpt/service/core/dataset/data/dataTextSchema';
|
||||||
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||||
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||||
|
|||||||
64
projects/app/src/pages/api/admin/initv491.ts
Normal file
64
projects/app/src/pages/api/admin/initv491.ts
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
import { NextAPI } from '@/service/middleware/entry';
|
||||||
|
import { authCert } from '@fastgpt/service/support/permission/auth/common';
|
||||||
|
import { NextApiRequest, NextApiResponse } from 'next';
|
||||||
|
import { MongoDatasetData } from '@fastgpt/service/core/dataset/data/schema';
|
||||||
|
import { jiebaSplit } from '@fastgpt/service/common/string/jieba';
|
||||||
|
import { addLog } from '@fastgpt/service/common/system/log';
|
||||||
|
import { delay } from '@fastgpt/global/common/system/utils';
|
||||||
|
import { MongoDatasetDataText } from '@fastgpt/service/core/dataset/data/dataTextSchema';
|
||||||
|
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
|
||||||
|
|
||||||
|
const updateData = async () => {
|
||||||
|
let success = 0;
|
||||||
|
while (true) {
|
||||||
|
try {
|
||||||
|
const data = await MongoDatasetData.find({ initJieba: { $exists: false } }).limit(100);
|
||||||
|
if (data.length === 0) {
|
||||||
|
console.log('更新分词完成');
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
await Promise.allSettled(
|
||||||
|
data.map(async (item) => {
|
||||||
|
const text = `${item.q} ${item.a}`.trim();
|
||||||
|
|
||||||
|
try {
|
||||||
|
await mongoSessionRun(async (session) => {
|
||||||
|
await MongoDatasetDataText.updateOne(
|
||||||
|
{
|
||||||
|
dataId: item._id
|
||||||
|
},
|
||||||
|
{
|
||||||
|
fullTextToken: await jiebaSplit({ text })
|
||||||
|
},
|
||||||
|
{
|
||||||
|
session
|
||||||
|
}
|
||||||
|
);
|
||||||
|
// @ts-ignore
|
||||||
|
item.initJieba = true;
|
||||||
|
await item.save({ session });
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
console.log(error);
|
||||||
|
}
|
||||||
|
})
|
||||||
|
);
|
||||||
|
success += data.length;
|
||||||
|
console.log(`成功 ${success}`);
|
||||||
|
} catch (error) {
|
||||||
|
addLog.error('更新所有旧的 jieba 分词失败', error);
|
||||||
|
await delay(1000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
async function handler(req: NextApiRequest, _res: NextApiResponse) {
|
||||||
|
await authCert({ req, authRoot: true });
|
||||||
|
|
||||||
|
console.log('更新所有旧的 jieba 分词');
|
||||||
|
updateData();
|
||||||
|
return { success: true };
|
||||||
|
}
|
||||||
|
|
||||||
|
export default NextAPI(handler);
|
||||||
@ -6,7 +6,7 @@ import {
|
|||||||
} from '@fastgpt/global/core/dataset/controller';
|
} from '@fastgpt/global/core/dataset/controller';
|
||||||
import { insertDatasetDataVector } from '@fastgpt/service/common/vectorStore/controller';
|
import { insertDatasetDataVector } from '@fastgpt/service/common/vectorStore/controller';
|
||||||
import { getDefaultIndex } from '@fastgpt/global/core/dataset/utils';
|
import { getDefaultIndex } from '@fastgpt/global/core/dataset/utils';
|
||||||
import { jiebaSplit } from '@fastgpt/service/common/string/jieba';
|
import { jiebaSplit } from '@fastgpt/service/common/string/jieba/index';
|
||||||
import { deleteDatasetDataVector } from '@fastgpt/service/common/vectorStore/controller';
|
import { deleteDatasetDataVector } from '@fastgpt/service/common/vectorStore/controller';
|
||||||
import { DatasetDataIndexItemType, DatasetDataItemType } from '@fastgpt/global/core/dataset/type';
|
import { DatasetDataIndexItemType, DatasetDataItemType } from '@fastgpt/global/core/dataset/type';
|
||||||
import { getEmbeddingModel } from '@fastgpt/service/core/ai/model';
|
import { getEmbeddingModel } from '@fastgpt/service/core/ai/model';
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user