This commit is contained in:
Archer 2025-03-18 11:01:58 +08:00 committed by archer
parent f842ad93ce
commit 5563c3a0e7
No known key found for this signature in database
GPG Key ID: 4446499B846D4A9E
6 changed files with 73 additions and 45 deletions

View File

@ -114,15 +114,15 @@ services:
# fastgpt # fastgpt
sandbox: sandbox:
container_name: sandbox container_name: sandbox
image: ghcr.io/labring/fastgpt-sandbox:v4.9.0 # git image: ghcr.io/labring/fastgpt-sandbox:v4.9.1 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.0 # 阿里云 # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.1 # 阿里云
networks: networks:
- fastgpt - fastgpt
restart: always restart: always
fastgpt: fastgpt:
container_name: fastgpt container_name: fastgpt
image: ghcr.io/labring/fastgpt:v4.9.0 # git image: ghcr.io/labring/fastgpt:v4.9.1 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.0 # 阿里云 # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.1 # 阿里云
ports: ports:
- 3000:3000 - 3000:3000
networks: networks:
@ -175,7 +175,8 @@ services:
# AI Proxy # AI Proxy
aiproxy: aiproxy:
image: 'ghcr.io/labring/aiproxy:latest' image: ghcr.io/labring/aiproxy:v0.1.3
# image: registry.cn-hangzhou.aliyuncs.com/labring/aiproxy:v0.1.3 # 阿里云
container_name: aiproxy container_name: aiproxy
restart: unless-stopped restart: unless-stopped
depends_on: depends_on:

View File

@ -72,15 +72,15 @@ services:
# fastgpt # fastgpt
sandbox: sandbox:
container_name: sandbox container_name: sandbox
image: ghcr.io/labring/fastgpt-sandbox:v4.9.0 # git image: ghcr.io/labring/fastgpt-sandbox:v4.9.1 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.0 # 阿里云 # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.1 # 阿里云
networks: networks:
- fastgpt - fastgpt
restart: always restart: always
fastgpt: fastgpt:
container_name: fastgpt container_name: fastgpt
image: ghcr.io/labring/fastgpt:v4.9.0 # git image: ghcr.io/labring/fastgpt:v4.9.1 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.0 # 阿里云 # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.1 # 阿里云
ports: ports:
- 3000:3000 - 3000:3000
networks: networks:
@ -132,7 +132,8 @@ services:
# AI Proxy # AI Proxy
aiproxy: aiproxy:
image: 'ghcr.io/labring/aiproxy:latest' image: ghcr.io/labring/aiproxy:v0.1.3
# image: registry.cn-hangzhou.aliyuncs.com/labring/aiproxy:v0.1.3 # 阿里云
container_name: aiproxy container_name: aiproxy
restart: unless-stopped restart: unless-stopped
depends_on: depends_on:

View File

@ -53,15 +53,15 @@ services:
wait $$! wait $$!
sandbox: sandbox:
container_name: sandbox container_name: sandbox
image: ghcr.io/labring/fastgpt-sandbox:v4.9.0 # git image: ghcr.io/labring/fastgpt-sandbox:v4.9.1 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.0 # 阿里云 # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt-sandbox:v4.9.1 # 阿里云
networks: networks:
- fastgpt - fastgpt
restart: always restart: always
fastgpt: fastgpt:
container_name: fastgpt container_name: fastgpt
image: ghcr.io/labring/fastgpt:v4.9.0 # git image: ghcr.io/labring/fastgpt:v4.9.1 # git
# image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.0 # 阿里云 # image: registry.cn-hangzhou.aliyuncs.com/fastgpt/fastgpt:v4.9.1 # 阿里云
ports: ports:
- 3000:3000 - 3000:3000
networks: networks:
@ -113,7 +113,8 @@ services:
# AI Proxy # AI Proxy
aiproxy: aiproxy:
image: 'ghcr.io/labring/aiproxy:latest' image: ghcr.io/labring/aiproxy:v0.1.3
# image: registry.cn-hangzhou.aliyuncs.com/labring/aiproxy:v0.1.3 # 阿里云
container_name: aiproxy container_name: aiproxy
restart: unless-stopped restart: unless-stopped
depends_on: depends_on:

View File

@ -11,7 +11,12 @@ weight: 799
### 1. 做好数据库备份 ### 1. 做好数据库备份
### 2. 更新镜像和 PG 容器 ### 2. 更新镜像
- 更新 FastGPT 镜像 tag: v4.9.1
- 更新 FastGPT 商业版镜像 tag: v4.9.1
- Sandbox 镜像,可以不更新
- AIProxy 镜像修改为: registry.cn-hangzhou.aliyuncs.com/labring/aiproxy:v0.1.3
### 3. 执行升级脚本 ### 3. 执行升级脚本
@ -25,7 +30,7 @@ curl --location --request POST 'https://{{host}}/api/admin/initv491' \
**脚本功能** **脚本功能**
重新使用最新的 jieba 分词库进行分词处理。 重新使用最新的 jieba 分词库进行分词处理。时间较长,可以从日志里查看进度。
## 🚀 新增内容 ## 🚀 新增内容

View File

@ -106,7 +106,7 @@ try {
DatasetDataSchema.index({ rebuilding: 1, teamId: 1, datasetId: 1 }); DatasetDataSchema.index({ rebuilding: 1, teamId: 1, datasetId: 1 });
// 为查询 initJieba 字段不存在的数据添加索引 // 为查询 initJieba 字段不存在的数据添加索引
DatasetDataSchema.index({ initJieba: 1 }, { sparse: true }); DatasetDataSchema.index({ initJieba: 1, updateTime: 1 });
} catch (error) { } catch (error) {
console.log(error); console.log(error);
} }

View File

@ -7,44 +7,64 @@ import { addLog } from '@fastgpt/service/common/system/log';
import { delay } from '@fastgpt/global/common/system/utils'; import { delay } from '@fastgpt/global/common/system/utils';
import { MongoDatasetDataText } from '@fastgpt/service/core/dataset/data/dataTextSchema'; import { MongoDatasetDataText } from '@fastgpt/service/core/dataset/data/dataTextSchema';
import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun'; import { mongoSessionRun } from '@fastgpt/service/common/mongo/sessionRun';
import { DatasetDataTextSchemaType } from '@fastgpt/global/core/dataset/type';
import type { AnyBulkWriteOperation } from '@fastgpt/service/common/mongo';
const updateData = async () => { const updateData = async () => {
let success = 0; let success = 0;
while (true) { while (true) {
try { try {
const data = await MongoDatasetData.find({ initJieba: { $exists: false } }).limit(100); const time = Date.now();
const data = await MongoDatasetData.find({
initJieba: { $exists: false },
updateTime: { $lte: time } // 只需要取旧的数据
})
.limit(1000)
.lean();
if (data.length === 0) { if (data.length === 0) {
console.log('更新分词完成'); console.log('更新分词完成');
break; break;
} }
console.log('读取数据完成', Date.now() - time);
const dataTextOps: AnyBulkWriteOperation<DatasetDataTextSchemaType>[] = [];
const datasetDataIds: string[] = [];
await Promise.allSettled( // 先进行分词处理
data.map(async (item) => { for await (const item of data) {
const text = `${item.q} ${item.a}`.trim(); const text = `${item.q} ${item.a}`.trim();
try {
const tokens = await jiebaSplit({ text });
dataTextOps.push({
updateOne: {
filter: { dataId: item._id },
update: { $set: { fullTextToken: tokens } }
}
});
datasetDataIds.push(item._id);
} catch (error) {
console.log(`分词处理错误: ${item._id}`, error);
}
}
console.log('分词处理完成', Date.now() - time);
try { await mongoSessionRun(async (session) => {
await mongoSessionRun(async (session) => { if (dataTextOps.length > 0) {
await MongoDatasetDataText.updateOne( await MongoDatasetDataText.bulkWrite(dataTextOps, { session, ordered: true });
{ }
dataId: item._id if (datasetDataIds.length > 0) {
}, await MongoDatasetData.updateMany(
{ { _id: { $in: datasetDataIds } },
fullTextToken: await jiebaSplit({ text }) { $set: { initJieba: true } },
}, {
{ session
session }
} );
); }
// @ts-ignore });
item.initJieba = true; console.log('保存完成', Date.now() - time);
await item.save({ session });
}); success += dataTextOps.length;
} catch (error) {
console.log(error);
}
})
);
success += data.length;
console.log(`成功 ${success}`); console.log(`成功 ${success}`);
} catch (error) { } catch (error) {
addLog.error('更新所有旧的 jieba 分词失败', error); addLog.error('更新所有旧的 jieba 分词失败', error);