FastGPT/client/src/service/utils/chat/index.ts

import { ChatItemType } from '@/types/chat';
import { modelToolMap } from '@/utils/plugin';
import { ChatRoleEnum } from '@/constants/chat';
import type { NextApiResponse } from 'next';

export type ChatCompletionResponseType = {
  streamResponse: any;
  responseMessages: ChatItemType[];
  responseText: string;
  totalTokens: number;
};
export type StreamResponseType = {
  chatResponse: any;
  prompts: ChatItemType[];
  res: NextApiResponse;
  model: string;
  [key: string]: any;
};

/* slice chat context by tokens */
export const ChatContextFilter = ({
  model,
  prompts = [],
  maxTokens
}: {
  model: string;
  prompts: ChatItemType[];
  maxTokens: number;
}) => {
  if (!Array.isArray(prompts)) {
    return [];
  }
  const rawTextLen = prompts.reduce((sum, item) => sum + item.value.length, 0);

  // If the text length is less than half of the maximum token, no calculation is required
  if (rawTextLen < maxTokens * 0.5) {
    return prompts;
  }

  // filter startWith system prompt
  const chatStartIndex = prompts.findIndex((item) => item.obj !== ChatRoleEnum.System);
  const systemPrompts: ChatItemType[] = prompts.slice(0, chatStartIndex);
  const chatPrompts: ChatItemType[] = prompts.slice(chatStartIndex);

  // reduce  token of systemPrompt
  maxTokens -= modelToolMap.countTokens({
    messages: systemPrompts
  });

  // 根据 tokens 截断内容
  const chats: ChatItemType[] = [];

  // 从后往前截取对话内容
  for (let i = chatPrompts.length - 1; i >= 0; i--) {
    chats.unshift(chatPrompts[i]);

    const tokens = modelToolMap.countTokens({
      messages: chats
    });

    /* 整体 tokens 超出范围, system必须保留 */
    if (tokens >= maxTokens) {
      chats.shift();
      break;
    }
  }

  return [...systemPrompts, ...chats];
};