mirror of
https://github.com/EthanMarti/infio-copilot.git
synced 2026-01-16 08:21:55 +00:00
update vector manager
This commit is contained in:
parent
c1fbd4da21
commit
34296e6871
236
pnpm-lock.yaml
generated
236
pnpm-lock.yaml
generated
@ -123,6 +123,9 @@ importers:
|
||||
handlebars:
|
||||
specifier: ^4.7.7
|
||||
version: 4.7.8
|
||||
jieba-wasm:
|
||||
specifier: ^2.2.0
|
||||
version: 2.2.0
|
||||
js-tiktoken:
|
||||
specifier: ^1.0.15
|
||||
version: 1.0.20
|
||||
@ -583,9 +586,9 @@ packages:
|
||||
'@codemirror/language@6.11.2':
|
||||
resolution: {integrity: sha512-p44TsNArL4IVXDTbapUmEkAlvWs2CFQbcfc0ymDsis1kH2wh0gcY96AS29c/vp2d0y2Tquk1EDSaawpzilUiAw==}
|
||||
|
||||
'@codemirror/language@https://codeload.github.com/lishid/cm-language/tar.gz/6c1c5f5b677f6f6503d1ca2ec47f62f6406cda67':
|
||||
resolution: {tarball: https://codeload.github.com/lishid/cm-language/tar.gz/6c1c5f5b677f6f6503d1ca2ec47f62f6406cda67}
|
||||
version: 6.10.8
|
||||
'@codemirror/language@https://codeload.github.com/lishid/cm-language/tar.gz/a9c3c7efe17dd1d24395ee2a179fe12dd6ed1e76':
|
||||
resolution: {tarball: https://codeload.github.com/lishid/cm-language/tar.gz/a9c3c7efe17dd1d24395ee2a179fe12dd6ed1e76}
|
||||
version: 6.11.2
|
||||
|
||||
'@codemirror/lint@0.20.3':
|
||||
resolution: {integrity: sha512-06xUScbbspZ8mKoODQCEx6hz1bjaq9m8W8DxdycWARMiiX1wMtfCh/MoHpaL7ws/KUMwlsFFfp2qhm32oaCvVA==}
|
||||
@ -669,8 +672,8 @@ packages:
|
||||
cpu: [ppc64]
|
||||
os: [aix]
|
||||
|
||||
'@esbuild/aix-ppc64@0.25.5':
|
||||
resolution: {integrity: sha512-9o3TMmpmftaCMepOdA5k/yDw8SfInyzWWTjYTFCX3kPSDJMROQTb8jg+h9Cnwnmm1vOzvxN7gIfB5V2ewpjtGA==}
|
||||
'@esbuild/aix-ppc64@0.25.6':
|
||||
resolution: {integrity: sha512-ShbM/3XxwuxjFiuVBHA+d3j5dyac0aEVVq1oluIDf71hUw0aRF59dV/efUsIwFnR6m8JNM2FjZOzmaZ8yG61kw==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [ppc64]
|
||||
os: [aix]
|
||||
@ -693,8 +696,8 @@ packages:
|
||||
cpu: [arm64]
|
||||
os: [android]
|
||||
|
||||
'@esbuild/android-arm64@0.25.5':
|
||||
resolution: {integrity: sha512-VGzGhj4lJO+TVGV1v8ntCZWJktV7SGCs3Pn1GRWI1SBFtRALoomm8k5E9Pmwg3HOAal2VDc2F9+PM/rEY6oIDg==}
|
||||
'@esbuild/android-arm64@0.25.6':
|
||||
resolution: {integrity: sha512-hd5zdUarsK6strW+3Wxi5qWws+rJhCCbMiC9QZyzoxfk5uHRIE8T287giQxzVpEvCwuJ9Qjg6bEjcRJcgfLqoA==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [arm64]
|
||||
os: [android]
|
||||
@ -717,8 +720,8 @@ packages:
|
||||
cpu: [arm]
|
||||
os: [android]
|
||||
|
||||
'@esbuild/android-arm@0.25.5':
|
||||
resolution: {integrity: sha512-AdJKSPeEHgi7/ZhuIPtcQKr5RQdo6OO2IL87JkianiMYMPbCtot9fxPbrMiBADOWWm3T2si9stAiVsGbTQFkbA==}
|
||||
'@esbuild/android-arm@0.25.6':
|
||||
resolution: {integrity: sha512-S8ToEOVfg++AU/bHwdksHNnyLyVM+eMVAOf6yRKFitnwnbwwPNqKr3srzFRe7nzV69RQKb5DgchIX5pt3L53xg==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [arm]
|
||||
os: [android]
|
||||
@ -741,8 +744,8 @@ packages:
|
||||
cpu: [x64]
|
||||
os: [android]
|
||||
|
||||
'@esbuild/android-x64@0.25.5':
|
||||
resolution: {integrity: sha512-D2GyJT1kjvO//drbRT3Hib9XPwQeWd9vZoBJn+bu/lVsOZ13cqNdDeqIF/xQ5/VmWvMduP6AmXvylO/PIc2isw==}
|
||||
'@esbuild/android-x64@0.25.6':
|
||||
resolution: {integrity: sha512-0Z7KpHSr3VBIO9A/1wcT3NTy7EB4oNC4upJ5ye3R7taCc2GUdeynSLArnon5G8scPwaU866d3H4BCrE5xLW25A==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [x64]
|
||||
os: [android]
|
||||
@ -765,8 +768,8 @@ packages:
|
||||
cpu: [arm64]
|
||||
os: [darwin]
|
||||
|
||||
'@esbuild/darwin-arm64@0.25.5':
|
||||
resolution: {integrity: sha512-GtaBgammVvdF7aPIgH2jxMDdivezgFu6iKpmT+48+F8Hhg5J/sfnDieg0aeG/jfSvkYQU2/pceFPDKlqZzwnfQ==}
|
||||
'@esbuild/darwin-arm64@0.25.6':
|
||||
resolution: {integrity: sha512-FFCssz3XBavjxcFxKsGy2DYK5VSvJqa6y5HXljKzhRZ87LvEi13brPrf/wdyl/BbpbMKJNOr1Sd0jtW4Ge1pAA==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [arm64]
|
||||
os: [darwin]
|
||||
@ -789,8 +792,8 @@ packages:
|
||||
cpu: [x64]
|
||||
os: [darwin]
|
||||
|
||||
'@esbuild/darwin-x64@0.25.5':
|
||||
resolution: {integrity: sha512-1iT4FVL0dJ76/q1wd7XDsXrSW+oLoquptvh4CLR4kITDtqi2e/xwXwdCVH8hVHU43wgJdsq7Gxuzcs6Iq/7bxQ==}
|
||||
'@esbuild/darwin-x64@0.25.6':
|
||||
resolution: {integrity: sha512-GfXs5kry/TkGM2vKqK2oyiLFygJRqKVhawu3+DOCk7OxLy/6jYkWXhlHwOoTb0WqGnWGAS7sooxbZowy+pK9Yg==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [x64]
|
||||
os: [darwin]
|
||||
@ -813,8 +816,8 @@ packages:
|
||||
cpu: [arm64]
|
||||
os: [freebsd]
|
||||
|
||||
'@esbuild/freebsd-arm64@0.25.5':
|
||||
resolution: {integrity: sha512-nk4tGP3JThz4La38Uy/gzyXtpkPW8zSAmoUhK9xKKXdBCzKODMc2adkB2+8om9BDYugz+uGV7sLmpTYzvmz6Sw==}
|
||||
'@esbuild/freebsd-arm64@0.25.6':
|
||||
resolution: {integrity: sha512-aoLF2c3OvDn2XDTRvn8hN6DRzVVpDlj2B/F66clWd/FHLiHaG3aVZjxQX2DYphA5y/evbdGvC6Us13tvyt4pWg==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [arm64]
|
||||
os: [freebsd]
|
||||
@ -837,8 +840,8 @@ packages:
|
||||
cpu: [x64]
|
||||
os: [freebsd]
|
||||
|
||||
'@esbuild/freebsd-x64@0.25.5':
|
||||
resolution: {integrity: sha512-PrikaNjiXdR2laW6OIjlbeuCPrPaAl0IwPIaRv+SMV8CiM8i2LqVUHFC1+8eORgWyY7yhQY+2U2fA55mBzReaw==}
|
||||
'@esbuild/freebsd-x64@0.25.6':
|
||||
resolution: {integrity: sha512-2SkqTjTSo2dYi/jzFbU9Plt1vk0+nNg8YC8rOXXea+iA3hfNJWebKYPs3xnOUf9+ZWhKAaxnQNUf2X9LOpeiMQ==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [x64]
|
||||
os: [freebsd]
|
||||
@ -861,8 +864,8 @@ packages:
|
||||
cpu: [arm64]
|
||||
os: [linux]
|
||||
|
||||
'@esbuild/linux-arm64@0.25.5':
|
||||
resolution: {integrity: sha512-Z9kfb1v6ZlGbWj8EJk9T6czVEjjq2ntSYLY2cw6pAZl4oKtfgQuS4HOq41M/BcoLPzrUbNd+R4BXFyH//nHxVg==}
|
||||
'@esbuild/linux-arm64@0.25.6':
|
||||
resolution: {integrity: sha512-b967hU0gqKd9Drsh/UuAm21Khpoh6mPBSgz8mKRq4P5mVK8bpA+hQzmm/ZwGVULSNBzKdZPQBRT3+WuVavcWsQ==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [arm64]
|
||||
os: [linux]
|
||||
@ -885,8 +888,8 @@ packages:
|
||||
cpu: [arm]
|
||||
os: [linux]
|
||||
|
||||
'@esbuild/linux-arm@0.25.5':
|
||||
resolution: {integrity: sha512-cPzojwW2okgh7ZlRpcBEtsX7WBuqbLrNXqLU89GxWbNt6uIg78ET82qifUy3W6OVww6ZWobWub5oqZOVtwolfw==}
|
||||
'@esbuild/linux-arm@0.25.6':
|
||||
resolution: {integrity: sha512-SZHQlzvqv4Du5PrKE2faN0qlbsaW/3QQfUUc6yO2EjFcA83xnwm91UbEEVx4ApZ9Z5oG8Bxz4qPE+HFwtVcfyw==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [arm]
|
||||
os: [linux]
|
||||
@ -909,8 +912,8 @@ packages:
|
||||
cpu: [ia32]
|
||||
os: [linux]
|
||||
|
||||
'@esbuild/linux-ia32@0.25.5':
|
||||
resolution: {integrity: sha512-sQ7l00M8bSv36GLV95BVAdhJ2QsIbCuCjh/uYrWiMQSUuV+LpXwIqhgJDcvMTj+VsQmqAHL2yYaasENvJ7CDKA==}
|
||||
'@esbuild/linux-ia32@0.25.6':
|
||||
resolution: {integrity: sha512-aHWdQ2AAltRkLPOsKdi3xv0mZ8fUGPdlKEjIEhxCPm5yKEThcUjHpWB1idN74lfXGnZ5SULQSgtr5Qos5B0bPw==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [ia32]
|
||||
os: [linux]
|
||||
@ -933,8 +936,8 @@ packages:
|
||||
cpu: [loong64]
|
||||
os: [linux]
|
||||
|
||||
'@esbuild/linux-loong64@0.25.5':
|
||||
resolution: {integrity: sha512-0ur7ae16hDUC4OL5iEnDb0tZHDxYmuQyhKhsPBV8f99f6Z9KQM02g33f93rNH5A30agMS46u2HP6qTdEt6Q1kg==}
|
||||
'@esbuild/linux-loong64@0.25.6':
|
||||
resolution: {integrity: sha512-VgKCsHdXRSQ7E1+QXGdRPlQ/e08bN6WMQb27/TMfV+vPjjTImuT9PmLXupRlC90S1JeNNW5lzkAEO/McKeJ2yg==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [loong64]
|
||||
os: [linux]
|
||||
@ -957,8 +960,8 @@ packages:
|
||||
cpu: [mips64el]
|
||||
os: [linux]
|
||||
|
||||
'@esbuild/linux-mips64el@0.25.5':
|
||||
resolution: {integrity: sha512-kB/66P1OsHO5zLz0i6X0RxlQ+3cu0mkxS3TKFvkb5lin6uwZ/ttOkP3Z8lfR9mJOBk14ZwZ9182SIIWFGNmqmg==}
|
||||
'@esbuild/linux-mips64el@0.25.6':
|
||||
resolution: {integrity: sha512-WViNlpivRKT9/py3kCmkHnn44GkGXVdXfdc4drNmRl15zVQ2+D2uFwdlGh6IuK5AAnGTo2qPB1Djppj+t78rzw==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [mips64el]
|
||||
os: [linux]
|
||||
@ -981,8 +984,8 @@ packages:
|
||||
cpu: [ppc64]
|
||||
os: [linux]
|
||||
|
||||
'@esbuild/linux-ppc64@0.25.5':
|
||||
resolution: {integrity: sha512-UZCmJ7r9X2fe2D6jBmkLBMQetXPXIsZjQJCjgwpVDz+YMcS6oFR27alkgGv3Oqkv07bxdvw7fyB71/olceJhkQ==}
|
||||
'@esbuild/linux-ppc64@0.25.6':
|
||||
resolution: {integrity: sha512-wyYKZ9NTdmAMb5730I38lBqVu6cKl4ZfYXIs31Baf8aoOtB4xSGi3THmDYt4BTFHk7/EcVixkOV2uZfwU3Q2Jw==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [ppc64]
|
||||
os: [linux]
|
||||
@ -1005,8 +1008,8 @@ packages:
|
||||
cpu: [riscv64]
|
||||
os: [linux]
|
||||
|
||||
'@esbuild/linux-riscv64@0.25.5':
|
||||
resolution: {integrity: sha512-kTxwu4mLyeOlsVIFPfQo+fQJAV9mh24xL+y+Bm6ej067sYANjyEw1dNHmvoqxJUCMnkBdKpvOn0Ahql6+4VyeA==}
|
||||
'@esbuild/linux-riscv64@0.25.6':
|
||||
resolution: {integrity: sha512-KZh7bAGGcrinEj4qzilJ4hqTY3Dg2U82c8bv+e1xqNqZCrCyc+TL9AUEn5WGKDzm3CfC5RODE/qc96OcbIe33w==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [riscv64]
|
||||
os: [linux]
|
||||
@ -1029,8 +1032,8 @@ packages:
|
||||
cpu: [s390x]
|
||||
os: [linux]
|
||||
|
||||
'@esbuild/linux-s390x@0.25.5':
|
||||
resolution: {integrity: sha512-K2dSKTKfmdh78uJ3NcWFiqyRrimfdinS5ErLSn3vluHNeHVnBAFWC8a4X5N+7FgVE1EjXS1QDZbpqZBjfrqMTQ==}
|
||||
'@esbuild/linux-s390x@0.25.6':
|
||||
resolution: {integrity: sha512-9N1LsTwAuE9oj6lHMyyAM+ucxGiVnEqUdp4v7IaMmrwb06ZTEVCIs3oPPplVsnjPfyjmxwHxHMF8b6vzUVAUGw==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [s390x]
|
||||
os: [linux]
|
||||
@ -1053,14 +1056,14 @@ packages:
|
||||
cpu: [x64]
|
||||
os: [linux]
|
||||
|
||||
'@esbuild/linux-x64@0.25.5':
|
||||
resolution: {integrity: sha512-uhj8N2obKTE6pSZ+aMUbqq+1nXxNjZIIjCjGLfsWvVpy7gKCOL6rsY1MhRh9zLtUtAI7vpgLMK6DxjO8Qm9lJw==}
|
||||
'@esbuild/linux-x64@0.25.6':
|
||||
resolution: {integrity: sha512-A6bJB41b4lKFWRKNrWoP2LHsjVzNiaurf7wyj/XtFNTsnPuxwEBWHLty+ZE0dWBKuSK1fvKgrKaNjBS7qbFKig==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [x64]
|
||||
os: [linux]
|
||||
|
||||
'@esbuild/netbsd-arm64@0.25.5':
|
||||
resolution: {integrity: sha512-pwHtMP9viAy1oHPvgxtOv+OkduK5ugofNTVDilIzBLpoWAM16r7b/mxBvfpuQDpRQFMfuVr5aLcn4yveGvBZvw==}
|
||||
'@esbuild/netbsd-arm64@0.25.6':
|
||||
resolution: {integrity: sha512-IjA+DcwoVpjEvyxZddDqBY+uJ2Snc6duLpjmkXm/v4xuS3H+3FkLZlDm9ZsAbF9rsfP3zeA0/ArNDORZgrxR/Q==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [arm64]
|
||||
os: [netbsd]
|
||||
@ -1083,14 +1086,14 @@ packages:
|
||||
cpu: [x64]
|
||||
os: [netbsd]
|
||||
|
||||
'@esbuild/netbsd-x64@0.25.5':
|
||||
resolution: {integrity: sha512-WOb5fKrvVTRMfWFNCroYWWklbnXH0Q5rZppjq0vQIdlsQKuw6mdSihwSo4RV/YdQ5UCKKvBy7/0ZZYLBZKIbwQ==}
|
||||
'@esbuild/netbsd-x64@0.25.6':
|
||||
resolution: {integrity: sha512-dUXuZr5WenIDlMHdMkvDc1FAu4xdWixTCRgP7RQLBOkkGgwuuzaGSYcOpW4jFxzpzL1ejb8yF620UxAqnBrR9g==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [x64]
|
||||
os: [netbsd]
|
||||
|
||||
'@esbuild/openbsd-arm64@0.25.5':
|
||||
resolution: {integrity: sha512-7A208+uQKgTxHd0G0uqZO8UjK2R0DDb4fDmERtARjSHWxqMTye4Erz4zZafx7Di9Cv+lNHYuncAkiGFySoD+Mw==}
|
||||
'@esbuild/openbsd-arm64@0.25.6':
|
||||
resolution: {integrity: sha512-l8ZCvXP0tbTJ3iaqdNf3pjaOSd5ex/e6/omLIQCVBLmHTlfXW3zAxQ4fnDmPLOB1x9xrcSi/xtCWFwCZRIaEwg==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [arm64]
|
||||
os: [openbsd]
|
||||
@ -1113,12 +1116,18 @@ packages:
|
||||
cpu: [x64]
|
||||
os: [openbsd]
|
||||
|
||||
'@esbuild/openbsd-x64@0.25.5':
|
||||
resolution: {integrity: sha512-G4hE405ErTWraiZ8UiSoesH8DaCsMm0Cay4fsFWOOUcz8b8rC6uCvnagr+gnioEjWn0wC+o1/TAHt+It+MpIMg==}
|
||||
'@esbuild/openbsd-x64@0.25.6':
|
||||
resolution: {integrity: sha512-hKrmDa0aOFOr71KQ/19JC7az1P0GWtCN1t2ahYAf4O007DHZt/dW8ym5+CUdJhQ/qkZmI1HAF8KkJbEFtCL7gw==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [x64]
|
||||
os: [openbsd]
|
||||
|
||||
'@esbuild/openharmony-arm64@0.25.6':
|
||||
resolution: {integrity: sha512-+SqBcAWoB1fYKmpWoQP4pGtx+pUUC//RNYhFdbcSA16617cchuryuhOCRpPsjCblKukAckWsV+aQ3UKT/RMPcA==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [arm64]
|
||||
os: [openharmony]
|
||||
|
||||
'@esbuild/sunos-x64@0.17.3':
|
||||
resolution: {integrity: sha512-RxmhKLbTCDAY2xOfrww6ieIZkZF+KBqG7S2Ako2SljKXRFi+0863PspK74QQ7JpmWwncChY25JTJSbVBYGQk2Q==}
|
||||
engines: {node: '>=12'}
|
||||
@ -1137,8 +1146,8 @@ packages:
|
||||
cpu: [x64]
|
||||
os: [sunos]
|
||||
|
||||
'@esbuild/sunos-x64@0.25.5':
|
||||
resolution: {integrity: sha512-l+azKShMy7FxzY0Rj4RCt5VD/q8mG/e+mDivgspo+yL8zW7qEwctQ6YqKX34DTEleFAvCIUviCFX1SDZRSyMQA==}
|
||||
'@esbuild/sunos-x64@0.25.6':
|
||||
resolution: {integrity: sha512-dyCGxv1/Br7MiSC42qinGL8KkG4kX0pEsdb0+TKhmJZgCUDBGmyo1/ArCjNGiOLiIAgdbWgmWgib4HoCi5t7kA==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [x64]
|
||||
os: [sunos]
|
||||
@ -1161,8 +1170,8 @@ packages:
|
||||
cpu: [arm64]
|
||||
os: [win32]
|
||||
|
||||
'@esbuild/win32-arm64@0.25.5':
|
||||
resolution: {integrity: sha512-O2S7SNZzdcFG7eFKgvwUEZ2VG9D/sn/eIiz8XRZ1Q/DO5a3s76Xv0mdBzVM5j5R639lXQmPmSo0iRpHqUUrsxw==}
|
||||
'@esbuild/win32-arm64@0.25.6':
|
||||
resolution: {integrity: sha512-42QOgcZeZOvXfsCBJF5Afw73t4veOId//XD3i+/9gSkhSV6Gk3VPlWncctI+JcOyERv85FUo7RxuxGy+z8A43Q==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [arm64]
|
||||
os: [win32]
|
||||
@ -1185,8 +1194,8 @@ packages:
|
||||
cpu: [ia32]
|
||||
os: [win32]
|
||||
|
||||
'@esbuild/win32-ia32@0.25.5':
|
||||
resolution: {integrity: sha512-onOJ02pqs9h1iMJ1PQphR+VZv8qBMQ77Klcsqv9CNW2w6yLqoURLcgERAIurY6QE63bbLuqgP9ATqajFLK5AMQ==}
|
||||
'@esbuild/win32-ia32@0.25.6':
|
||||
resolution: {integrity: sha512-4AWhgXmDuYN7rJI6ORB+uU9DHLq/erBbuMoAuB4VWJTu5KtCgcKYPynF0YI1VkBNuEfjNlLrFr9KZPJzrtLkrQ==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [ia32]
|
||||
os: [win32]
|
||||
@ -1209,8 +1218,8 @@ packages:
|
||||
cpu: [x64]
|
||||
os: [win32]
|
||||
|
||||
'@esbuild/win32-x64@0.25.5':
|
||||
resolution: {integrity: sha512-TXv6YnJ8ZMVdX+SXWVBo/0p8LTcrUYngpWjvm91TMjjBQii7Oz11Lw5lbDV5Y0TzuhSJHwiH4hEtC1I42mMS0g==}
|
||||
'@esbuild/win32-x64@0.25.6':
|
||||
resolution: {integrity: sha512-NgJPHHbEpLQgDH2MjQu90pzW/5vvXIZ7KOnPyNBm92A6WgZ/7b6fJyUBjoumLqeOQQGqY2QjQxRo97ah4Sj0cA==}
|
||||
engines: {node: '>=18'}
|
||||
cpu: [x64]
|
||||
os: [win32]
|
||||
@ -3739,8 +3748,8 @@ packages:
|
||||
engines: {node: '>=12'}
|
||||
hasBin: true
|
||||
|
||||
esbuild@0.25.5:
|
||||
resolution: {integrity: sha512-P8OtKZRv/5J5hhz0cUAdu/cLuPIKXpQl1R9pZtvmHWQvrAUVd0UNIPT4IB4W3rNOqVO0rlqHmCIbSwxh/c9yUQ==}
|
||||
esbuild@0.25.6:
|
||||
resolution: {integrity: sha512-GVuzuUwtdsghE3ocJ9Bs8PNoF13HNQ5TXbEi2AhvVb8xU1Iwt9Fos9FEamfoee+u/TOsn7GUWc04lz46n2bbTg==}
|
||||
engines: {node: '>=18'}
|
||||
hasBin: true
|
||||
|
||||
@ -4723,6 +4732,9 @@ packages:
|
||||
node-notifier:
|
||||
optional: true
|
||||
|
||||
jieba-wasm@2.2.0:
|
||||
resolution: {integrity: sha512-IwxgUf+EMutjLair3k41i0ApM33qeHNY9EFBKlI5/XtHcISkGt5YPmUvpDJe3hUflwRYhy9g29ZzTetGZw6XgQ==}
|
||||
|
||||
js-base64@3.7.7:
|
||||
resolution: {integrity: sha512-7rCnleh0z2CkXhH67J8K1Ytz0b2Y+yxTPL+/KOJoa20hfnVQ/3/T6W/KflYI4bRHRagNeXeU2bkNGI3v1oS/lw==}
|
||||
|
||||
@ -7089,7 +7101,7 @@ snapshots:
|
||||
'@lezer/lr': 1.4.2
|
||||
style-mod: 4.1.2
|
||||
|
||||
'@codemirror/language@https://codeload.github.com/lishid/cm-language/tar.gz/6c1c5f5b677f6f6503d1ca2ec47f62f6406cda67':
|
||||
'@codemirror/language@https://codeload.github.com/lishid/cm-language/tar.gz/a9c3c7efe17dd1d24395ee2a179fe12dd6ed1e76':
|
||||
dependencies:
|
||||
'@codemirror/state': 6.5.2
|
||||
'@codemirror/view': 6.38.0
|
||||
@ -7190,7 +7202,7 @@ snapshots:
|
||||
'@esbuild/aix-ppc64@0.19.12':
|
||||
optional: true
|
||||
|
||||
'@esbuild/aix-ppc64@0.25.5':
|
||||
'@esbuild/aix-ppc64@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@esbuild/android-arm64@0.17.3':
|
||||
@ -7202,7 +7214,7 @@ snapshots:
|
||||
'@esbuild/android-arm64@0.19.12':
|
||||
optional: true
|
||||
|
||||
'@esbuild/android-arm64@0.25.5':
|
||||
'@esbuild/android-arm64@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@esbuild/android-arm@0.17.3':
|
||||
@ -7214,7 +7226,7 @@ snapshots:
|
||||
'@esbuild/android-arm@0.19.12':
|
||||
optional: true
|
||||
|
||||
'@esbuild/android-arm@0.25.5':
|
||||
'@esbuild/android-arm@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@esbuild/android-x64@0.17.3':
|
||||
@ -7226,7 +7238,7 @@ snapshots:
|
||||
'@esbuild/android-x64@0.19.12':
|
||||
optional: true
|
||||
|
||||
'@esbuild/android-x64@0.25.5':
|
||||
'@esbuild/android-x64@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@esbuild/darwin-arm64@0.17.3':
|
||||
@ -7238,7 +7250,7 @@ snapshots:
|
||||
'@esbuild/darwin-arm64@0.19.12':
|
||||
optional: true
|
||||
|
||||
'@esbuild/darwin-arm64@0.25.5':
|
||||
'@esbuild/darwin-arm64@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@esbuild/darwin-x64@0.17.3':
|
||||
@ -7250,7 +7262,7 @@ snapshots:
|
||||
'@esbuild/darwin-x64@0.19.12':
|
||||
optional: true
|
||||
|
||||
'@esbuild/darwin-x64@0.25.5':
|
||||
'@esbuild/darwin-x64@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@esbuild/freebsd-arm64@0.17.3':
|
||||
@ -7262,7 +7274,7 @@ snapshots:
|
||||
'@esbuild/freebsd-arm64@0.19.12':
|
||||
optional: true
|
||||
|
||||
'@esbuild/freebsd-arm64@0.25.5':
|
||||
'@esbuild/freebsd-arm64@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@esbuild/freebsd-x64@0.17.3':
|
||||
@ -7274,7 +7286,7 @@ snapshots:
|
||||
'@esbuild/freebsd-x64@0.19.12':
|
||||
optional: true
|
||||
|
||||
'@esbuild/freebsd-x64@0.25.5':
|
||||
'@esbuild/freebsd-x64@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@esbuild/linux-arm64@0.17.3':
|
||||
@ -7286,7 +7298,7 @@ snapshots:
|
||||
'@esbuild/linux-arm64@0.19.12':
|
||||
optional: true
|
||||
|
||||
'@esbuild/linux-arm64@0.25.5':
|
||||
'@esbuild/linux-arm64@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@esbuild/linux-arm@0.17.3':
|
||||
@ -7298,7 +7310,7 @@ snapshots:
|
||||
'@esbuild/linux-arm@0.19.12':
|
||||
optional: true
|
||||
|
||||
'@esbuild/linux-arm@0.25.5':
|
||||
'@esbuild/linux-arm@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@esbuild/linux-ia32@0.17.3':
|
||||
@ -7310,7 +7322,7 @@ snapshots:
|
||||
'@esbuild/linux-ia32@0.19.12':
|
||||
optional: true
|
||||
|
||||
'@esbuild/linux-ia32@0.25.5':
|
||||
'@esbuild/linux-ia32@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@esbuild/linux-loong64@0.17.3':
|
||||
@ -7322,7 +7334,7 @@ snapshots:
|
||||
'@esbuild/linux-loong64@0.19.12':
|
||||
optional: true
|
||||
|
||||
'@esbuild/linux-loong64@0.25.5':
|
||||
'@esbuild/linux-loong64@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@esbuild/linux-mips64el@0.17.3':
|
||||
@ -7334,7 +7346,7 @@ snapshots:
|
||||
'@esbuild/linux-mips64el@0.19.12':
|
||||
optional: true
|
||||
|
||||
'@esbuild/linux-mips64el@0.25.5':
|
||||
'@esbuild/linux-mips64el@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@esbuild/linux-ppc64@0.17.3':
|
||||
@ -7346,7 +7358,7 @@ snapshots:
|
||||
'@esbuild/linux-ppc64@0.19.12':
|
||||
optional: true
|
||||
|
||||
'@esbuild/linux-ppc64@0.25.5':
|
||||
'@esbuild/linux-ppc64@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@esbuild/linux-riscv64@0.17.3':
|
||||
@ -7358,7 +7370,7 @@ snapshots:
|
||||
'@esbuild/linux-riscv64@0.19.12':
|
||||
optional: true
|
||||
|
||||
'@esbuild/linux-riscv64@0.25.5':
|
||||
'@esbuild/linux-riscv64@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@esbuild/linux-s390x@0.17.3':
|
||||
@ -7370,7 +7382,7 @@ snapshots:
|
||||
'@esbuild/linux-s390x@0.19.12':
|
||||
optional: true
|
||||
|
||||
'@esbuild/linux-s390x@0.25.5':
|
||||
'@esbuild/linux-s390x@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@esbuild/linux-x64@0.17.3':
|
||||
@ -7382,10 +7394,10 @@ snapshots:
|
||||
'@esbuild/linux-x64@0.19.12':
|
||||
optional: true
|
||||
|
||||
'@esbuild/linux-x64@0.25.5':
|
||||
'@esbuild/linux-x64@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@esbuild/netbsd-arm64@0.25.5':
|
||||
'@esbuild/netbsd-arm64@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@esbuild/netbsd-x64@0.17.3':
|
||||
@ -7397,10 +7409,10 @@ snapshots:
|
||||
'@esbuild/netbsd-x64@0.19.12':
|
||||
optional: true
|
||||
|
||||
'@esbuild/netbsd-x64@0.25.5':
|
||||
'@esbuild/netbsd-x64@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@esbuild/openbsd-arm64@0.25.5':
|
||||
'@esbuild/openbsd-arm64@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@esbuild/openbsd-x64@0.17.3':
|
||||
@ -7412,7 +7424,10 @@ snapshots:
|
||||
'@esbuild/openbsd-x64@0.19.12':
|
||||
optional: true
|
||||
|
||||
'@esbuild/openbsd-x64@0.25.5':
|
||||
'@esbuild/openbsd-x64@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@esbuild/openharmony-arm64@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@esbuild/sunos-x64@0.17.3':
|
||||
@ -7424,7 +7439,7 @@ snapshots:
|
||||
'@esbuild/sunos-x64@0.19.12':
|
||||
optional: true
|
||||
|
||||
'@esbuild/sunos-x64@0.25.5':
|
||||
'@esbuild/sunos-x64@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@esbuild/win32-arm64@0.17.3':
|
||||
@ -7436,7 +7451,7 @@ snapshots:
|
||||
'@esbuild/win32-arm64@0.19.12':
|
||||
optional: true
|
||||
|
||||
'@esbuild/win32-arm64@0.25.5':
|
||||
'@esbuild/win32-arm64@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@esbuild/win32-ia32@0.17.3':
|
||||
@ -7448,7 +7463,7 @@ snapshots:
|
||||
'@esbuild/win32-ia32@0.19.12':
|
||||
optional: true
|
||||
|
||||
'@esbuild/win32-ia32@0.25.5':
|
||||
'@esbuild/win32-ia32@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@esbuild/win32-x64@0.17.3':
|
||||
@ -7460,7 +7475,7 @@ snapshots:
|
||||
'@esbuild/win32-x64@0.19.12':
|
||||
optional: true
|
||||
|
||||
'@esbuild/win32-x64@0.25.5':
|
||||
'@esbuild/win32-x64@0.25.6':
|
||||
optional: true
|
||||
|
||||
'@eslint-community/eslint-utils@4.7.0(eslint@8.57.1)':
|
||||
@ -10339,7 +10354,7 @@ snapshots:
|
||||
|
||||
esbuild-plugin-inline-worker@0.1.1:
|
||||
dependencies:
|
||||
esbuild: 0.25.5
|
||||
esbuild: 0.25.6
|
||||
find-cache-dir: 3.3.2
|
||||
|
||||
esbuild-register@3.6.0(esbuild@0.19.12):
|
||||
@ -10425,33 +10440,34 @@ snapshots:
|
||||
'@esbuild/win32-ia32': 0.19.12
|
||||
'@esbuild/win32-x64': 0.19.12
|
||||
|
||||
esbuild@0.25.5:
|
||||
esbuild@0.25.6:
|
||||
optionalDependencies:
|
||||
'@esbuild/aix-ppc64': 0.25.5
|
||||
'@esbuild/android-arm': 0.25.5
|
||||
'@esbuild/android-arm64': 0.25.5
|
||||
'@esbuild/android-x64': 0.25.5
|
||||
'@esbuild/darwin-arm64': 0.25.5
|
||||
'@esbuild/darwin-x64': 0.25.5
|
||||
'@esbuild/freebsd-arm64': 0.25.5
|
||||
'@esbuild/freebsd-x64': 0.25.5
|
||||
'@esbuild/linux-arm': 0.25.5
|
||||
'@esbuild/linux-arm64': 0.25.5
|
||||
'@esbuild/linux-ia32': 0.25.5
|
||||
'@esbuild/linux-loong64': 0.25.5
|
||||
'@esbuild/linux-mips64el': 0.25.5
|
||||
'@esbuild/linux-ppc64': 0.25.5
|
||||
'@esbuild/linux-riscv64': 0.25.5
|
||||
'@esbuild/linux-s390x': 0.25.5
|
||||
'@esbuild/linux-x64': 0.25.5
|
||||
'@esbuild/netbsd-arm64': 0.25.5
|
||||
'@esbuild/netbsd-x64': 0.25.5
|
||||
'@esbuild/openbsd-arm64': 0.25.5
|
||||
'@esbuild/openbsd-x64': 0.25.5
|
||||
'@esbuild/sunos-x64': 0.25.5
|
||||
'@esbuild/win32-arm64': 0.25.5
|
||||
'@esbuild/win32-ia32': 0.25.5
|
||||
'@esbuild/win32-x64': 0.25.5
|
||||
'@esbuild/aix-ppc64': 0.25.6
|
||||
'@esbuild/android-arm': 0.25.6
|
||||
'@esbuild/android-arm64': 0.25.6
|
||||
'@esbuild/android-x64': 0.25.6
|
||||
'@esbuild/darwin-arm64': 0.25.6
|
||||
'@esbuild/darwin-x64': 0.25.6
|
||||
'@esbuild/freebsd-arm64': 0.25.6
|
||||
'@esbuild/freebsd-x64': 0.25.6
|
||||
'@esbuild/linux-arm': 0.25.6
|
||||
'@esbuild/linux-arm64': 0.25.6
|
||||
'@esbuild/linux-ia32': 0.25.6
|
||||
'@esbuild/linux-loong64': 0.25.6
|
||||
'@esbuild/linux-mips64el': 0.25.6
|
||||
'@esbuild/linux-ppc64': 0.25.6
|
||||
'@esbuild/linux-riscv64': 0.25.6
|
||||
'@esbuild/linux-s390x': 0.25.6
|
||||
'@esbuild/linux-x64': 0.25.6
|
||||
'@esbuild/netbsd-arm64': 0.25.6
|
||||
'@esbuild/netbsd-x64': 0.25.6
|
||||
'@esbuild/openbsd-arm64': 0.25.6
|
||||
'@esbuild/openbsd-x64': 0.25.6
|
||||
'@esbuild/openharmony-arm64': 0.25.6
|
||||
'@esbuild/sunos-x64': 0.25.6
|
||||
'@esbuild/win32-arm64': 0.25.6
|
||||
'@esbuild/win32-ia32': 0.25.6
|
||||
'@esbuild/win32-x64': 0.25.6
|
||||
|
||||
escalade@3.2.0: {}
|
||||
|
||||
@ -11789,6 +11805,8 @@ snapshots:
|
||||
- supports-color
|
||||
- ts-node
|
||||
|
||||
jieba-wasm@2.2.0: {}
|
||||
|
||||
js-base64@3.7.7: {}
|
||||
|
||||
js-tiktoken@1.0.20:
|
||||
@ -12603,7 +12621,7 @@ snapshots:
|
||||
|
||||
obsidian-dataview@0.5.68:
|
||||
dependencies:
|
||||
'@codemirror/language': https://codeload.github.com/lishid/cm-language/tar.gz/6c1c5f5b677f6f6503d1ca2ec47f62f6406cda67
|
||||
'@codemirror/language': https://codeload.github.com/lishid/cm-language/tar.gz/a9c3c7efe17dd1d24395ee2a179fe12dd6ed1e76
|
||||
'@codemirror/state': 6.5.2
|
||||
'@codemirror/view': 6.38.0
|
||||
emoji-regex: 10.4.0
|
||||
|
||||
@ -163,7 +163,7 @@ export class RAGEngine {
|
||||
)
|
||||
}
|
||||
|
||||
async processQuery({
|
||||
async processSimilarityQuery({
|
||||
query,
|
||||
scope,
|
||||
limit,
|
||||
@ -211,6 +211,221 @@ export class RAGEngine {
|
||||
return queryResult
|
||||
}
|
||||
|
||||
async processQuery({
|
||||
query,
|
||||
scope,
|
||||
limit,
|
||||
language,
|
||||
onQueryProgressChange,
|
||||
}: {
|
||||
query: string
|
||||
scope?: {
|
||||
files: string[]
|
||||
folders: string[]
|
||||
}
|
||||
limit?: number
|
||||
language?: string
|
||||
onQueryProgressChange?: (queryProgress: QueryProgressState) => void
|
||||
}): Promise<
|
||||
(Omit<SelectVector, 'embedding'> & {
|
||||
similarity: number
|
||||
})[]
|
||||
> {
|
||||
if (!this.embeddingModel) {
|
||||
throw new Error('Embedding model is not set')
|
||||
}
|
||||
|
||||
await this.initializeDimension()
|
||||
|
||||
onQueryProgressChange?.({
|
||||
type: 'querying',
|
||||
})
|
||||
|
||||
// 并行执行相似度搜索和全文搜索
|
||||
const [similarityResults, fulltextResults] = await Promise.all([
|
||||
this.processSimilarityQuery({
|
||||
query,
|
||||
scope,
|
||||
limit,
|
||||
onQueryProgressChange: undefined, // 避免重复触发进度回调
|
||||
}),
|
||||
this.processFulltextQuery({
|
||||
query,
|
||||
scope,
|
||||
limit,
|
||||
language,
|
||||
onQueryProgressChange: undefined, // 避免重复触发进度回调
|
||||
}),
|
||||
])
|
||||
|
||||
// 优化:如果其中一个搜索结果为空,直接返回另一个结果
|
||||
let finalResults: (Omit<SelectVector, 'embedding'> & { similarity: number })[]
|
||||
|
||||
if (fulltextResults.length === 0) {
|
||||
// 全文搜索结果为空,直接返回相似度搜索结果
|
||||
finalResults = similarityResults
|
||||
} else if (similarityResults.length === 0) {
|
||||
// 相似度搜索结果为空,直接返回全文搜索结果(转换格式)
|
||||
finalResults = fulltextResults.map(result => ({
|
||||
...result,
|
||||
similarity: 1 - (result.rank - 1) / fulltextResults.length, // 将rank转换为相似度分数
|
||||
}))
|
||||
} else {
|
||||
// 两个搜索都有结果,使用 RRF 算法合并
|
||||
const rrf_k = 60 // RRF 常数
|
||||
const mergedResults = this.mergeWithRRF(similarityResults, fulltextResults, rrf_k)
|
||||
|
||||
// 转换为与现有接口兼容的格式
|
||||
finalResults = mergedResults.map(result => ({
|
||||
...result,
|
||||
similarity: result.rrfScore, // 使用 RRF 分数作为相似度
|
||||
}))
|
||||
}
|
||||
|
||||
onQueryProgressChange?.({
|
||||
type: 'querying-done',
|
||||
queryResult: finalResults,
|
||||
})
|
||||
|
||||
return finalResults
|
||||
}
|
||||
|
||||
/**
|
||||
* 使用倒数排名融合(RRF)算法合并相似度搜索和全文搜索结果
|
||||
* @param similarityResults 相似度搜索结果
|
||||
* @param fulltextResults 全文搜索结果
|
||||
* @param k RRF 常数,通常为 60
|
||||
* @returns 合并后的结果,按 RRF 分数排序
|
||||
*/
|
||||
private mergeWithRRF(
|
||||
similarityResults: (Omit<SelectVector, 'embedding'> & { similarity: number })[],
|
||||
fulltextResults: (Omit<SelectVector, 'embedding'> & { rank: number })[],
|
||||
k: number = 60
|
||||
): (Omit<SelectVector, 'embedding'> & { rrfScore: number })[] {
|
||||
// 创建一个 Map 来存储每个文档的 RRF 分数
|
||||
const rrfScores = new Map<string, {
|
||||
doc: Omit<SelectVector, 'embedding'>,
|
||||
score: number
|
||||
}>()
|
||||
|
||||
// 处理相似度搜索结果
|
||||
similarityResults.forEach((result, index) => {
|
||||
const key = `${result.path}-${result.id}`
|
||||
const rank = index + 1
|
||||
const rrfScore = 1 / (k + rank)
|
||||
|
||||
if (rrfScores.has(key)) {
|
||||
const existing = rrfScores.get(key)
|
||||
if (existing) {
|
||||
existing.score += rrfScore
|
||||
}
|
||||
} else {
|
||||
rrfScores.set(key, {
|
||||
doc: {
|
||||
id: result.id,
|
||||
path: result.path,
|
||||
mtime: result.mtime,
|
||||
content: result.content,
|
||||
metadata: result.metadata,
|
||||
},
|
||||
score: rrfScore
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
// 处理全文搜索结果
|
||||
fulltextResults.forEach((result, index) => {
|
||||
const key = `${result.path}-${result.id}`
|
||||
const rank = index + 1
|
||||
const rrfScore = 1 / (k + rank)
|
||||
|
||||
if (rrfScores.has(key)) {
|
||||
const existing = rrfScores.get(key)
|
||||
if (existing) {
|
||||
existing.score += rrfScore
|
||||
}
|
||||
} else {
|
||||
rrfScores.set(key, {
|
||||
doc: {
|
||||
id: result.id,
|
||||
path: result.path,
|
||||
mtime: result.mtime,
|
||||
content: result.content,
|
||||
metadata: result.metadata,
|
||||
},
|
||||
score: rrfScore
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
// 转换为数组并进行归一化处理
|
||||
const results = Array.from(rrfScores.values())
|
||||
|
||||
// 找到最大分数用于归一化
|
||||
const maxScore = Math.max(...results.map(r => r.score))
|
||||
|
||||
// 归一化到 0~1 范围并按分数排序
|
||||
const mergedResults = results
|
||||
.map(({ doc, score }) => ({
|
||||
...doc,
|
||||
rrfScore: maxScore > 0 ? score / maxScore : 0 // 归一化到 0~1
|
||||
}))
|
||||
.sort((a, b) => b.rrfScore - a.rrfScore)
|
||||
|
||||
return mergedResults
|
||||
}
|
||||
|
||||
async processFulltextQuery({
|
||||
query,
|
||||
scope,
|
||||
limit,
|
||||
language,
|
||||
onQueryProgressChange,
|
||||
}: {
|
||||
query: string
|
||||
scope?: {
|
||||
files: string[]
|
||||
folders: string[]
|
||||
}
|
||||
limit?: number
|
||||
language?: string
|
||||
onQueryProgressChange?: (queryProgress: QueryProgressState) => void
|
||||
}): Promise<
|
||||
(Omit<SelectVector, 'embedding'> & {
|
||||
rank: number
|
||||
})[]
|
||||
> {
|
||||
if (!this.embeddingModel) {
|
||||
throw new Error('Embedding model is not set')
|
||||
}
|
||||
|
||||
await this.initializeDimension()
|
||||
|
||||
onQueryProgressChange?.({
|
||||
type: 'querying',
|
||||
})
|
||||
|
||||
const queryResult = await this.vectorManager.performFulltextSearch(
|
||||
query,
|
||||
this.embeddingModel,
|
||||
{
|
||||
limit: limit ?? this.settings.ragOptions.limit,
|
||||
scope,
|
||||
language: language || 'english',
|
||||
},
|
||||
)
|
||||
|
||||
onQueryProgressChange?.({
|
||||
type: 'querying-done',
|
||||
queryResult: queryResult.map(result => ({
|
||||
...result,
|
||||
similarity: result.rank, // 为了兼容 QueryProgressState 类型
|
||||
})),
|
||||
})
|
||||
|
||||
return queryResult
|
||||
}
|
||||
|
||||
async getEmbedding(query: string): Promise<number[]> {
|
||||
if (!this.embeddingModel) {
|
||||
throw new Error('Embedding model is not set')
|
||||
|
||||
@ -33,6 +33,71 @@ export class VectorManager {
|
||||
this.repository = new VectorRepository(app, dbManager.getPgClient() as any)
|
||||
}
|
||||
|
||||
// 添加合并小chunks的辅助方法(仅在同一文件内合并)
|
||||
private mergeSmallChunks(chunks: { pageContent: string; metadata: any }[], minChunkSize: number): typeof chunks {
|
||||
if (!chunks || chunks.length === 0) {
|
||||
return []
|
||||
}
|
||||
|
||||
const mergedChunks: typeof chunks = []
|
||||
let currentChunkBuffer = ""
|
||||
let currentMetadata: any = null
|
||||
|
||||
for (const chunk of chunks) {
|
||||
const content = chunk.pageContent.trim()
|
||||
if (content.length === 0) continue
|
||||
|
||||
// 将当前块加入缓冲区
|
||||
const combined = currentChunkBuffer ? `${currentChunkBuffer} ${content}` : content
|
||||
|
||||
// 更新metadata,记录起始和结束位置
|
||||
const combinedMetadata = currentMetadata ? {
|
||||
...currentMetadata,
|
||||
endLine: chunk.metadata?.loc?.lines?.to || chunk.metadata?.endLine || currentMetadata.endLine
|
||||
} : {
|
||||
...chunk.metadata,
|
||||
startLine: chunk.metadata?.loc?.lines?.from || chunk.metadata?.startLine,
|
||||
endLine: chunk.metadata?.loc?.lines?.to || chunk.metadata?.endLine
|
||||
}
|
||||
|
||||
if (combined.length < minChunkSize) {
|
||||
// 如果组合后仍然太小,则更新缓冲区并继续循环
|
||||
currentChunkBuffer = combined
|
||||
currentMetadata = combinedMetadata
|
||||
} else {
|
||||
// 如果组合后达到或超过最小尺寸,将其推入最终数组,并清空缓冲区
|
||||
mergedChunks.push({
|
||||
pageContent: combined,
|
||||
metadata: combinedMetadata
|
||||
})
|
||||
currentChunkBuffer = ""
|
||||
currentMetadata = null
|
||||
}
|
||||
}
|
||||
|
||||
// 处理循环结束后缓冲区里可能剩下的最后一个小块
|
||||
if (currentChunkBuffer) {
|
||||
if (mergedChunks.length > 0) {
|
||||
// 策略1:如果缓冲区有内容,将其合并到最后一个块中
|
||||
const lastChunk = mergedChunks[mergedChunks.length - 1]
|
||||
lastChunk.pageContent += ` ${currentChunkBuffer}`
|
||||
lastChunk.metadata.endLine = currentMetadata?.endLine || lastChunk.metadata.endLine
|
||||
} else {
|
||||
// 策略2:或者如果就没有足够大的块,把它自己作为一个块
|
||||
mergedChunks.push({
|
||||
pageContent: currentChunkBuffer,
|
||||
metadata: currentMetadata
|
||||
})
|
||||
}
|
||||
}
|
||||
console.log("mergedChunks: ", mergedChunks)
|
||||
return mergedChunks
|
||||
}
|
||||
|
||||
private segmentTextForTsvector(text: string): string {
|
||||
return this.repository.segmentTextForTsvector(text)
|
||||
}
|
||||
|
||||
async performSimilaritySearch(
|
||||
queryVector: number[],
|
||||
embeddingModel: EmbeddingModel,
|
||||
@ -56,6 +121,29 @@ export class VectorManager {
|
||||
)
|
||||
}
|
||||
|
||||
async performFulltextSearch(
|
||||
searchQuery: string,
|
||||
embeddingModel: EmbeddingModel,
|
||||
options: {
|
||||
limit: number
|
||||
scope?: {
|
||||
files: string[]
|
||||
folders: string[]
|
||||
}
|
||||
language?: string
|
||||
},
|
||||
): Promise<
|
||||
(Omit<SelectVector, 'embedding'> & {
|
||||
rank: number
|
||||
})[]
|
||||
> {
|
||||
return await this.repository.performFulltextSearch(
|
||||
searchQuery,
|
||||
embeddingModel,
|
||||
options,
|
||||
)
|
||||
}
|
||||
|
||||
async getWorkspaceStatistics(
|
||||
embeddingModel: EmbeddingModel,
|
||||
workspace?: Workspace
|
||||
@ -197,7 +285,10 @@ export class VectorManager {
|
||||
"",
|
||||
],
|
||||
});
|
||||
console.log("textSplitter chunkSize: ", options.chunkSize, "overlap: ", overlap)
|
||||
|
||||
// 设置最小chunk大小,防止产生太小的chunks
|
||||
const minChunkSize = Math.max(100, Math.floor(options.chunkSize * 0.3)); // 最小50字符或chunk_size的50%
|
||||
console.log("textSplitter chunkSize: ", options.chunkSize, "overlap: ", overlap, "minChunkSize: ", minChunkSize)
|
||||
|
||||
const skippedFiles: string[] = []
|
||||
const embeddingProgress = { completed: 0, totalChunks: 0 }
|
||||
@ -205,7 +296,7 @@ export class VectorManager {
|
||||
// 分批处理文件,每批最多50个文件(减少以避免文件句柄耗尽)
|
||||
const FILE_BATCH_SIZE = 50
|
||||
// 减少批量大小以降低内存压力
|
||||
const embeddingBatchSize = Math.min(options.batchSize, 10)
|
||||
const embeddingBatchSize = options.batchSize
|
||||
|
||||
// 首先统计总的分块数量用于进度显示
|
||||
let totalChunks = 0
|
||||
@ -216,7 +307,13 @@ export class VectorManager {
|
||||
let fileContent = await this.app.vault.cachedRead(file)
|
||||
fileContent = fileContent.replace(/\0/g, '')
|
||||
const fileDocuments = await textSplitter.createDocuments([fileContent])
|
||||
totalChunks += fileDocuments.length
|
||||
// 统计阶段也需要使用相同的清理和合并逻辑
|
||||
const cleanedChunks = fileDocuments.map(chunk => ({
|
||||
pageContent: removeMarkdown(chunk.pageContent).replace(/\0/g, '').trim(),
|
||||
metadata: chunk.metadata
|
||||
})).filter(chunk => chunk.pageContent.length > 0)
|
||||
const filteredDocuments = this.mergeSmallChunks(cleanedChunks, minChunkSize)
|
||||
totalChunks += filteredDocuments.length
|
||||
} catch (error) {
|
||||
// 统计阶段跳过错误文件
|
||||
}
|
||||
@ -246,21 +343,30 @@ export class VectorManager {
|
||||
const fileDocuments = await textSplitter.createDocuments([
|
||||
fileContent,
|
||||
])
|
||||
return fileDocuments
|
||||
|
||||
// 先清理每个chunk的内容,然后基于清理后的内容进行合并
|
||||
const cleanedChunks = fileDocuments.map(chunk => ({
|
||||
pageContent: removeMarkdown(chunk.pageContent).replace(/\0/g, '').trim(),
|
||||
metadata: chunk.metadata
|
||||
})).filter(chunk => chunk.pageContent.length > 0)
|
||||
|
||||
const filteredDocuments = this.mergeSmallChunks(cleanedChunks, minChunkSize)
|
||||
return filteredDocuments
|
||||
.map((chunk): InsertVector | null => {
|
||||
// 保存原始内容,不在此处调用 removeMarkdown
|
||||
const rawContent = chunk.pageContent.replace(/\0/g, '')
|
||||
if (!rawContent || rawContent.trim().length === 0) {
|
||||
const cleanContent = chunk.pageContent
|
||||
if (!cleanContent || cleanContent.trim().length === 0) {
|
||||
return null
|
||||
}
|
||||
// Use Intl.Segmenter to add spaces for better TSVECTOR indexing
|
||||
const segmentedContent = this.segmentTextForTsvector(cleanContent)
|
||||
return {
|
||||
path: file.path,
|
||||
mtime: file.stat.mtime,
|
||||
content: rawContent, // 保存原始内容
|
||||
content: segmentedContent, // 使用分词后的内容
|
||||
embedding: [],
|
||||
metadata: {
|
||||
startLine: Number(chunk.metadata.loc.lines.from),
|
||||
endLine: Number(chunk.metadata.loc.lines.to),
|
||||
startLine: Number(chunk.metadata.loc?.lines?.from || chunk.metadata.startLine),
|
||||
endLine: Number(chunk.metadata.loc?.lines?.to || chunk.metadata.endLine),
|
||||
},
|
||||
}
|
||||
})
|
||||
@ -280,7 +386,6 @@ export class VectorManager {
|
||||
|
||||
// 第二步:嵌入处理
|
||||
console.log(`Embedding ${batchChunks.length} chunks for current file batch`)
|
||||
|
||||
if (embeddingModel.supportsBatch) {
|
||||
// 支持批量处理的提供商
|
||||
for (let j = 0; j < batchChunks.length; j += embeddingBatchSize) {
|
||||
@ -289,26 +394,25 @@ export class VectorManager {
|
||||
|
||||
await backOff(
|
||||
async () => {
|
||||
// 在嵌入之前处理 markdown
|
||||
const cleanedBatchData = embeddingBatch.map(chunk => {
|
||||
const cleanContent = removeMarkdown(chunk.content)
|
||||
return { chunk, cleanContent }
|
||||
}).filter(({ cleanContent }) => cleanContent && cleanContent.trim().length > 0)
|
||||
// 内容已经在前面清理和合并过了,直接使用
|
||||
const validBatchData = embeddingBatch.filter(chunk =>
|
||||
chunk.content && chunk.content.trim().length > 0
|
||||
)
|
||||
|
||||
if (cleanedBatchData.length === 0) {
|
||||
if (validBatchData.length === 0) {
|
||||
return
|
||||
}
|
||||
|
||||
const batchTexts = cleanedBatchData.map(({ cleanContent }) => cleanContent)
|
||||
const batchTexts = validBatchData.map(chunk => chunk.content)
|
||||
const batchEmbeddings = await embeddingModel.getBatchEmbeddings(batchTexts)
|
||||
|
||||
// 合并embedding结果到chunk数据
|
||||
for (let k = 0; k < cleanedBatchData.length; k++) {
|
||||
const { chunk, cleanContent } = cleanedBatchData[k]
|
||||
for (let k = 0; k < validBatchData.length; k++) {
|
||||
const chunk = validBatchData[k]
|
||||
const embeddedChunk: InsertVector = {
|
||||
path: chunk.path,
|
||||
mtime: chunk.mtime,
|
||||
content: cleanContent, // 使用已经清理过的内容
|
||||
content: chunk.content, // 使用已经清理和合并后的内容
|
||||
embedding: batchEmbeddings[k],
|
||||
metadata: chunk.metadata,
|
||||
}
|
||||
@ -349,18 +453,18 @@ export class VectorManager {
|
||||
try {
|
||||
await backOff(
|
||||
async () => {
|
||||
// 在嵌入之前处理 markdown
|
||||
const cleanContent = removeMarkdown(chunk.content).replace(/\0/g, '')
|
||||
// 跳过清理后为空的内容
|
||||
if (!cleanContent || cleanContent.trim().length === 0) {
|
||||
// 内容已经在前面清理和合并过了,直接使用
|
||||
const content = chunk.content.trim()
|
||||
// 跳过空内容
|
||||
if (!content || content.length === 0) {
|
||||
return
|
||||
}
|
||||
|
||||
const embedding = await embeddingModel.getEmbedding(cleanContent)
|
||||
const embedding = await embeddingModel.getEmbedding(content)
|
||||
const embeddedChunk = {
|
||||
path: chunk.path,
|
||||
mtime: chunk.mtime,
|
||||
content: cleanContent, // 使用清理后的内容
|
||||
content: content, // 使用已经清理和合并后的内容
|
||||
embedding,
|
||||
metadata: chunk.metadata,
|
||||
}
|
||||
@ -495,7 +599,10 @@ export class VectorManager {
|
||||
"",
|
||||
],
|
||||
});
|
||||
console.log("textSplitter chunkSize: ", options.chunkSize, "overlap: ", overlap)
|
||||
|
||||
// 设置最小chunk大小,防止产生太小的chunks
|
||||
const minChunkSize = Math.max(100, Math.floor(options.chunkSize * 0.5)); // 最小50字符或chunk_size的10%
|
||||
console.log("textSplitter chunkSize: ", options.chunkSize, "overlap: ", overlap, "minChunkSize: ", minChunkSize)
|
||||
|
||||
const skippedFiles: string[] = []
|
||||
const embeddingProgress = { completed: 0, totalChunks: 0 }
|
||||
@ -503,7 +610,7 @@ export class VectorManager {
|
||||
// 分批处理文件,每批最多50个文件(减少以避免文件句柄耗尽)
|
||||
const FILE_BATCH_SIZE = 50
|
||||
// 减少批量大小以降低内存压力
|
||||
const embeddingBatchSize = Math.min(options.batchSize, 10)
|
||||
const embeddingBatchSize = options.batchSize
|
||||
|
||||
// 首先统计总的分块数量用于进度显示
|
||||
let totalChunks = 0
|
||||
@ -514,7 +621,13 @@ export class VectorManager {
|
||||
let fileContent = await this.app.vault.cachedRead(file)
|
||||
fileContent = fileContent.replace(/\0/g, '')
|
||||
const fileDocuments = await textSplitter.createDocuments([fileContent])
|
||||
totalChunks += fileDocuments.length
|
||||
// 统计阶段也需要使用相同的清理和合并逻辑
|
||||
const cleanedChunks = fileDocuments.map(chunk => ({
|
||||
pageContent: removeMarkdown(chunk.pageContent).replace(/\0/g, '').trim(),
|
||||
metadata: chunk.metadata
|
||||
})).filter(chunk => chunk.pageContent.length > 0)
|
||||
const filteredDocuments = this.mergeSmallChunks(cleanedChunks, minChunkSize)
|
||||
totalChunks += filteredDocuments.length
|
||||
} catch (error) {
|
||||
// 统计阶段跳过错误文件
|
||||
}
|
||||
@ -544,21 +657,30 @@ export class VectorManager {
|
||||
const fileDocuments = await textSplitter.createDocuments([
|
||||
fileContent,
|
||||
])
|
||||
return fileDocuments
|
||||
|
||||
// 先清理每个chunk的内容,然后基于清理后的内容进行合并
|
||||
const cleanedChunks = fileDocuments.map(chunk => ({
|
||||
pageContent: removeMarkdown(chunk.pageContent).replace(/\0/g, '').trim(),
|
||||
metadata: chunk.metadata
|
||||
})).filter(chunk => chunk.pageContent.length > 0)
|
||||
|
||||
const filteredDocuments = this.mergeSmallChunks(cleanedChunks, minChunkSize)
|
||||
return filteredDocuments
|
||||
.map((chunk): InsertVector | null => {
|
||||
// 保存原始内容,不在此处调用 removeMarkdown
|
||||
const rawContent = chunk.pageContent.replace(/\0/g, '')
|
||||
if (!rawContent || rawContent.trim().length === 0) {
|
||||
const cleanContent = chunk.pageContent
|
||||
if (!cleanContent || cleanContent.trim().length === 0) {
|
||||
return null
|
||||
}
|
||||
// Use Intl.Segmenter to add spaces for better TSVECTOR indexing
|
||||
const segmentedContent = this.segmentTextForTsvector(cleanContent)
|
||||
return {
|
||||
path: file.path,
|
||||
mtime: file.stat.mtime,
|
||||
content: rawContent, // 保存原始内容
|
||||
content: segmentedContent, // 使用分词后的内容
|
||||
embedding: [],
|
||||
metadata: {
|
||||
startLine: Number(chunk.metadata.loc.lines.from),
|
||||
endLine: Number(chunk.metadata.loc.lines.to),
|
||||
startLine: Number(chunk.metadata.loc?.lines?.from || chunk.metadata.startLine),
|
||||
endLine: Number(chunk.metadata.loc?.lines?.to || chunk.metadata.endLine),
|
||||
},
|
||||
}
|
||||
})
|
||||
@ -581,32 +703,35 @@ export class VectorManager {
|
||||
|
||||
if (embeddingModel.supportsBatch) {
|
||||
// 支持批量处理的提供商
|
||||
console.log("batchChunks", batchChunks.map((chunk, index) => ({
|
||||
index,
|
||||
contentLength: chunk.content.length,
|
||||
})))
|
||||
for (let j = 0; j < batchChunks.length; j += embeddingBatchSize) {
|
||||
const embeddingBatch = batchChunks.slice(j, Math.min(j + embeddingBatchSize, batchChunks.length))
|
||||
const embeddedBatch: InsertVector[] = []
|
||||
|
||||
await backOff(
|
||||
async () => {
|
||||
// 在嵌入之前处理 markdown
|
||||
const cleanedBatchData = embeddingBatch.map(chunk => {
|
||||
const cleanContent = removeMarkdown(chunk.content)
|
||||
return { chunk, cleanContent }
|
||||
}).filter(({ cleanContent }) => cleanContent && cleanContent.trim().length > 0)
|
||||
// 内容已经在前面清理和合并过了,直接使用
|
||||
const validBatchData = embeddingBatch.filter(chunk =>
|
||||
chunk.content && chunk.content.trim().length > 0
|
||||
)
|
||||
|
||||
if (cleanedBatchData.length === 0) {
|
||||
if (validBatchData.length === 0) {
|
||||
return
|
||||
}
|
||||
|
||||
const batchTexts = cleanedBatchData.map(({ cleanContent }) => cleanContent)
|
||||
const batchTexts = validBatchData.map(chunk => chunk.content)
|
||||
const batchEmbeddings = await embeddingModel.getBatchEmbeddings(batchTexts)
|
||||
|
||||
// 合并embedding结果到chunk数据
|
||||
for (let k = 0; k < cleanedBatchData.length; k++) {
|
||||
const { chunk, cleanContent } = cleanedBatchData[k]
|
||||
for (let k = 0; k < validBatchData.length; k++) {
|
||||
const chunk = validBatchData[k]
|
||||
const embeddedChunk: InsertVector = {
|
||||
path: chunk.path,
|
||||
mtime: chunk.mtime,
|
||||
content: cleanContent, // 使用已经清理过的内容
|
||||
content: chunk.content, // 使用已经清理和合并后的内容
|
||||
embedding: batchEmbeddings[k],
|
||||
metadata: chunk.metadata,
|
||||
}
|
||||
@ -647,18 +772,18 @@ export class VectorManager {
|
||||
try {
|
||||
await backOff(
|
||||
async () => {
|
||||
// 在嵌入之前处理 markdown
|
||||
const cleanContent = removeMarkdown(chunk.content).replace(/\0/g, '')
|
||||
// 跳过清理后为空的内容
|
||||
if (!cleanContent || cleanContent.trim().length === 0) {
|
||||
// 内容已经在前面清理和合并过了,直接使用
|
||||
const content = chunk.content.trim()
|
||||
// 跳过空内容
|
||||
if (!content || content.length === 0) {
|
||||
return
|
||||
}
|
||||
|
||||
const embedding = await embeddingModel.getEmbedding(cleanContent)
|
||||
const embedding = await embeddingModel.getEmbedding(content)
|
||||
const embeddedChunk = {
|
||||
path: chunk.path,
|
||||
mtime: chunk.mtime,
|
||||
content: cleanContent, // 使用清理后的内容
|
||||
content: content, // 使用已经清理和合并后的内容
|
||||
embedding,
|
||||
metadata: chunk.metadata,
|
||||
}
|
||||
@ -756,6 +881,10 @@ export class VectorManager {
|
||||
"",
|
||||
],
|
||||
});
|
||||
|
||||
// 设置最小chunk大小,防止产生太小的chunks
|
||||
const minChunkSize = Math.max(50, Math.floor(chunkSize * 0.1)); // 最小50字符或chunk_size的10%
|
||||
|
||||
let fileContent = await this.app.vault.cachedRead(file)
|
||||
// 清理null字节,防止PostgreSQL UTF8编码错误
|
||||
fileContent = fileContent.replace(/\0/g, '')
|
||||
@ -763,21 +892,30 @@ export class VectorManager {
|
||||
fileContent,
|
||||
])
|
||||
|
||||
const contentChunks: InsertVector[] = fileDocuments
|
||||
// 先清理每个chunk的内容,然后基于清理后的内容进行合并
|
||||
const cleanedChunks = fileDocuments.map(chunk => ({
|
||||
pageContent: removeMarkdown(chunk.pageContent).replace(/\0/g, '').trim(),
|
||||
metadata: chunk.metadata
|
||||
})).filter(chunk => chunk.pageContent.length > 0)
|
||||
|
||||
const filteredDocuments = this.mergeSmallChunks(cleanedChunks, minChunkSize)
|
||||
|
||||
const contentChunks: InsertVector[] = filteredDocuments
|
||||
.map((chunk): InsertVector | null => {
|
||||
// 保存原始内容,不在此处调用 removeMarkdown
|
||||
const rawContent = String(chunk.pageContent || '').replace(/\0/g, '')
|
||||
if (!rawContent || rawContent.trim().length === 0) {
|
||||
const cleanContent = chunk.pageContent
|
||||
if (!cleanContent || cleanContent.trim().length === 0) {
|
||||
return null
|
||||
}
|
||||
// Use Intl.Segmenter to add spaces for better TSVECTOR indexing
|
||||
const segmentedContent = this.segmentTextForTsvector(cleanContent)
|
||||
return {
|
||||
path: file.path,
|
||||
mtime: file.stat.mtime,
|
||||
content: rawContent, // 保存原始内容
|
||||
content: segmentedContent, // 使用分词后的内容
|
||||
embedding: [],
|
||||
metadata: {
|
||||
startLine: Number(chunk.metadata.loc.lines.from),
|
||||
endLine: Number(chunk.metadata.loc.lines.to),
|
||||
startLine: Number(chunk.metadata.loc?.lines?.from || chunk.metadata.startLine),
|
||||
endLine: Number(chunk.metadata.loc?.lines?.to || chunk.metadata.endLine),
|
||||
},
|
||||
}
|
||||
})
|
||||
@ -795,34 +933,33 @@ export class VectorManager {
|
||||
|
||||
const embeddedBatch: InsertVector[] = []
|
||||
|
||||
await backOff(
|
||||
async () => {
|
||||
// 在嵌入之前处理 markdown,只处理一次
|
||||
const cleanedBatchData = batchChunks.map(chunk => {
|
||||
const cleanContent = removeMarkdown(chunk.content).replace(/\0/g, '')
|
||||
return { chunk, cleanContent }
|
||||
}).filter(({ cleanContent }) => cleanContent && cleanContent.trim().length > 0)
|
||||
await backOff(
|
||||
async () => {
|
||||
// 内容已经在前面清理和合并过了,直接使用
|
||||
const validBatchData = batchChunks.filter(chunk =>
|
||||
chunk.content && chunk.content.trim().length > 0
|
||||
)
|
||||
|
||||
if (cleanedBatchData.length === 0) {
|
||||
return
|
||||
}
|
||||
|
||||
const batchTexts = cleanedBatchData.map(({ cleanContent }) => cleanContent)
|
||||
const batchEmbeddings = await embeddingModel.getBatchEmbeddings(batchTexts)
|
||||
|
||||
// 合并embedding结果到chunk数据
|
||||
for (let j = 0; j < cleanedBatchData.length; j++) {
|
||||
const { chunk, cleanContent } = cleanedBatchData[j]
|
||||
const embeddedChunk: InsertVector = {
|
||||
path: chunk.path,
|
||||
mtime: chunk.mtime,
|
||||
content: cleanContent, // 使用已经清理过的内容
|
||||
embedding: batchEmbeddings[j],
|
||||
metadata: chunk.metadata,
|
||||
if (validBatchData.length === 0) {
|
||||
return
|
||||
}
|
||||
embeddedBatch.push(embeddedChunk)
|
||||
}
|
||||
},
|
||||
|
||||
const batchTexts = validBatchData.map(chunk => chunk.content)
|
||||
const batchEmbeddings = await embeddingModel.getBatchEmbeddings(batchTexts)
|
||||
|
||||
// 合并embedding结果到chunk数据
|
||||
for (let j = 0; j < validBatchData.length; j++) {
|
||||
const chunk = validBatchData[j]
|
||||
const embeddedChunk: InsertVector = {
|
||||
path: chunk.path,
|
||||
mtime: chunk.mtime,
|
||||
content: chunk.content, // 使用已经清理和合并后的内容
|
||||
embedding: batchEmbeddings[j],
|
||||
metadata: chunk.metadata,
|
||||
}
|
||||
embeddedBatch.push(embeddedChunk)
|
||||
}
|
||||
},
|
||||
{
|
||||
numOfAttempts: 3, // 减少重试次数
|
||||
startingDelay: 500, // 减少延迟
|
||||
@ -864,18 +1001,18 @@ export class VectorManager {
|
||||
try {
|
||||
await backOff(
|
||||
async () => {
|
||||
// 在嵌入之前处理 markdown
|
||||
const cleanContent = removeMarkdown(chunk.content).replace(/\0/g, '')
|
||||
// 跳过清理后为空的内容
|
||||
if (!cleanContent || cleanContent.trim().length === 0) {
|
||||
// 内容已经在前面清理和合并过了,直接使用
|
||||
const content = chunk.content.trim()
|
||||
// 跳过空内容
|
||||
if (!content || content.length === 0) {
|
||||
return
|
||||
}
|
||||
|
||||
const embedding = await embeddingModel.getEmbedding(cleanContent)
|
||||
const embedding = await embeddingModel.getEmbedding(content)
|
||||
const embeddedChunk = {
|
||||
path: chunk.path,
|
||||
mtime: chunk.mtime,
|
||||
content: cleanContent, // 使用清理后的内容
|
||||
content: content, // 使用已经清理和合并后的内容
|
||||
embedding,
|
||||
metadata: chunk.metadata,
|
||||
}
|
||||
|
||||
@ -6,174 +6,208 @@ import { DatabaseNotInitializedException } from '../../exception'
|
||||
import { InsertVector, SelectVector, vectorTables } from '../../schema'
|
||||
|
||||
export class VectorRepository {
|
||||
private app: App
|
||||
private db: PGliteInterface | null
|
||||
private app: App
|
||||
private db: PGliteInterface | null
|
||||
private stopWords: Set<string>
|
||||
|
||||
constructor(app: App, pgClient: PGliteInterface | null) {
|
||||
this.app = app
|
||||
this.db = pgClient
|
||||
}
|
||||
constructor(app: App, pgClient: PGliteInterface | null) {
|
||||
this.app = app
|
||||
this.db = pgClient
|
||||
this.stopWords = new Set([
|
||||
// Chinese stop words
|
||||
'的', '在', '是', '了', '我', '你', '他', '她', '它', '请问', '如何', '一个', '什么', '怎么',
|
||||
'这', '那', '和', '与', '或', '但', '因为', '所以', '如果', '虽然', '可是', '不过',
|
||||
'也', '都', '还', '就', '又', '很', '最', '更', '非常', '特别', '比较', '相当',
|
||||
'对', '于', '把', '被', '让', '使', '给', '为', '从', '到', '向', '往', '朝',
|
||||
'上', '下', '里', '外', '前', '后', '左', '右', '中', '间', '内', '以', '及',
|
||||
|
||||
private getTableName(embeddingModel: EmbeddingModel): string {
|
||||
const tableDefinition = vectorTables[embeddingModel.dimension]
|
||||
if (!tableDefinition) {
|
||||
throw new Error(`No table definition found for model: ${embeddingModel.id}`)
|
||||
}
|
||||
return tableDefinition.name
|
||||
}
|
||||
// English stop words
|
||||
'a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'for', 'from', 'has', 'he',
|
||||
'in', 'is', 'it', 'its', 'of', 'on', 'that', 'the', 'to', 'was', 'were', 'will',
|
||||
'with', 'would', 'could', 'should', 'can', 'may', 'might', 'must', 'shall',
|
||||
'this', 'that', 'these', 'those', 'i', 'you', 'we', 'they', 'me', 'him', 'her',
|
||||
'us', 'them', 'my', 'your', 'his', 'our', 'their', 'am', 'have', 'had', 'do',
|
||||
'does', 'did', 'get', 'got', 'go', 'went', 'come', 'came', 'make', 'made',
|
||||
'take', 'took', 'see', 'saw', 'know', 'knew', 'think', 'thought', 'say', 'said',
|
||||
'tell', 'told', 'ask', 'asked', 'give', 'gave', 'find', 'found', 'work', 'worked',
|
||||
'call', 'called', 'try', 'tried', 'need', 'needed', 'feel', 'felt', 'become',
|
||||
'became', 'leave', 'left', 'put', 'keep', 'kept', 'let', 'begin', 'began',
|
||||
'seem', 'seemed', 'help', 'helped', 'show', 'showed', 'hear', 'heard', 'play',
|
||||
'played', 'run', 'ran', 'move', 'moved', 'live', 'lived', 'believe', 'believed',
|
||||
'hold', 'held', 'bring', 'brought', 'happen', 'happened', 'write', 'wrote',
|
||||
'sit', 'sat', 'stand', 'stood', 'lose', 'lost', 'pay', 'paid', 'meet', 'met',
|
||||
'include', 'included', 'continue', 'continued', 'set', 'learn', 'learned',
|
||||
'change', 'changed', 'lead', 'led', 'understand', 'understood', 'watch', 'watched',
|
||||
'follow', 'followed', 'stop', 'stopped', 'create', 'created', 'speak', 'spoke',
|
||||
'read', 'remember', 'remembered', 'consider', 'considered', 'appear', 'appeared',
|
||||
'buy', 'bought', 'wait', 'waited', 'serve', 'served', 'die', 'died', 'send',
|
||||
'sent', 'expect', 'expected', 'build', 'built', 'stay', 'stayed', 'fall', 'fell',
|
||||
'cut', 'reach', 'reached', 'kill', 'killed', 'remain', 'remained', 'suggest',
|
||||
'suggested', 'raise', 'raised', 'pass', 'passed', 'sell', 'sold', 'require',
|
||||
'required', 'report', 'reported', 'decide', 'decided', 'pull', 'pulled'
|
||||
])
|
||||
}
|
||||
|
||||
async getAllIndexedFilePaths(embeddingModel: EmbeddingModel): Promise<string[]> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
const result = await this.db.query<{ path: string }>(
|
||||
`SELECT DISTINCT path FROM "${tableName}"`
|
||||
)
|
||||
return result.rows.map((row: { path: string }) => row.path)
|
||||
}
|
||||
private getTableName(embeddingModel: EmbeddingModel): string {
|
||||
const tableDefinition = vectorTables[embeddingModel.dimension]
|
||||
if (!tableDefinition) {
|
||||
throw new Error(`No table definition found for model: ${embeddingModel.id}`)
|
||||
}
|
||||
return tableDefinition.name
|
||||
}
|
||||
|
||||
async getMaxMtime(embeddingModel: EmbeddingModel): Promise<number | null> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
const result = await this.db.query<{ max_mtime: number | null }>(
|
||||
`SELECT MAX(mtime) as max_mtime FROM "${tableName}"`
|
||||
)
|
||||
return result.rows[0]?.max_mtime || null
|
||||
}
|
||||
|
||||
async getVectorsByFilePath(
|
||||
filePath: string,
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<SelectVector[]> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
const result = await this.db.query<SelectVector>(
|
||||
`SELECT * FROM "${tableName}" WHERE path = $1`,
|
||||
[filePath]
|
||||
)
|
||||
return result.rows
|
||||
}
|
||||
|
||||
async deleteVectorsForSingleFile(
|
||||
filePath: string,
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<void> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
await this.db.query(
|
||||
`DELETE FROM "${tableName}" WHERE path = $1`,
|
||||
[filePath]
|
||||
)
|
||||
}
|
||||
|
||||
async deleteVectorsForMultipleFiles(
|
||||
filePaths: string[],
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<void> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
await this.db.query(
|
||||
`DELETE FROM "${tableName}" WHERE path = ANY($1)`,
|
||||
[filePaths]
|
||||
)
|
||||
}
|
||||
|
||||
async clearAllVectors(embeddingModel: EmbeddingModel): Promise<void> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
async getAllIndexedFilePaths(embeddingModel: EmbeddingModel): Promise<string[]> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
await this.db.query(`DELETE FROM "${tableName}"`)
|
||||
}
|
||||
const result = await this.db.query<{ path: string }>(
|
||||
`SELECT DISTINCT path FROM "${tableName}"`
|
||||
)
|
||||
return result.rows.map((row: { path: string }) => row.path)
|
||||
}
|
||||
|
||||
async insertVectors(
|
||||
data: InsertVector[],
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<void> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
async getMaxMtime(embeddingModel: EmbeddingModel): Promise<number | null> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
const result = await this.db.query<{ max_mtime: number | null }>(
|
||||
`SELECT MAX(mtime) as max_mtime FROM "${tableName}"`
|
||||
)
|
||||
return result.rows[0]?.max_mtime || null
|
||||
}
|
||||
|
||||
// 构建批量插入的 SQL
|
||||
const values = data.map((vector, index) => {
|
||||
const offset = index * 5
|
||||
return `($${offset + 1}, $${offset + 2}, $${offset + 3}, $${offset + 4}, $${offset + 5})`
|
||||
}).join(',')
|
||||
async getVectorsByFilePath(
|
||||
filePath: string,
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<SelectVector[]> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
const result = await this.db.query<SelectVector>(
|
||||
`SELECT * FROM "${tableName}" WHERE path = $1`,
|
||||
[filePath]
|
||||
)
|
||||
return result.rows
|
||||
}
|
||||
|
||||
const params = data.flatMap(vector => [
|
||||
vector.path,
|
||||
vector.mtime,
|
||||
vector.content.replace(/\0/g, ''), // 清理null字节
|
||||
`[${vector.embedding.join(',')}]`, // 转换为PostgreSQL vector格式
|
||||
vector.metadata
|
||||
])
|
||||
async deleteVectorsForSingleFile(
|
||||
filePath: string,
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<void> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
await this.db.query(
|
||||
`DELETE FROM "${tableName}" WHERE path = $1`,
|
||||
[filePath]
|
||||
)
|
||||
}
|
||||
|
||||
await this.db.query(
|
||||
`INSERT INTO "${tableName}" (path, mtime, content, embedding, metadata)
|
||||
async deleteVectorsForMultipleFiles(
|
||||
filePaths: string[],
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<void> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
await this.db.query(
|
||||
`DELETE FROM "${tableName}" WHERE path = ANY($1)`,
|
||||
[filePaths]
|
||||
)
|
||||
}
|
||||
|
||||
async clearAllVectors(embeddingModel: EmbeddingModel): Promise<void> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
await this.db.query(`DELETE FROM "${tableName}"`)
|
||||
}
|
||||
|
||||
async insertVectors(
|
||||
data: InsertVector[],
|
||||
embeddingModel: EmbeddingModel,
|
||||
): Promise<void> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
|
||||
// 构建批量插入的 SQL
|
||||
const values = data.map((vector, index) => {
|
||||
const offset = index * 5
|
||||
return `($${offset + 1}, $${offset + 2}, $${offset + 3}, $${offset + 4}, $${offset + 5})`
|
||||
}).join(',')
|
||||
|
||||
const params = data.flatMap(vector => [
|
||||
vector.path,
|
||||
vector.mtime,
|
||||
vector.content.replace(/\0/g, ''), // 清理null字节
|
||||
`[${vector.embedding.join(',')}]`, // 转换为PostgreSQL vector格式
|
||||
vector.metadata
|
||||
])
|
||||
|
||||
await this.db.query(
|
||||
`INSERT INTO "${tableName}" (path, mtime, content, embedding, metadata)
|
||||
VALUES ${values}`,
|
||||
params
|
||||
)
|
||||
}
|
||||
params
|
||||
)
|
||||
}
|
||||
|
||||
async performSimilaritySearch(
|
||||
queryVector: number[],
|
||||
embeddingModel: EmbeddingModel,
|
||||
options: {
|
||||
minSimilarity: number
|
||||
limit: number
|
||||
scope?: {
|
||||
files: string[]
|
||||
folders: string[]
|
||||
}
|
||||
},
|
||||
): Promise<
|
||||
(Omit<SelectVector, 'embedding'> & {
|
||||
similarity: number
|
||||
})[]
|
||||
> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
async performSimilaritySearch(
|
||||
queryVector: number[],
|
||||
embeddingModel: EmbeddingModel,
|
||||
options: {
|
||||
minSimilarity: number
|
||||
limit: number
|
||||
scope?: {
|
||||
files: string[]
|
||||
folders: string[]
|
||||
}
|
||||
},
|
||||
): Promise<
|
||||
(Omit<SelectVector, 'embedding'> & {
|
||||
similarity: number
|
||||
})[]
|
||||
> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
|
||||
let scopeCondition = ''
|
||||
const params: unknown[] = [`[${queryVector.join(',')}]`, options.minSimilarity, options.limit]
|
||||
let paramIndex = 4
|
||||
let scopeCondition = ''
|
||||
const params: unknown[] = [`[${queryVector.join(',')}]`, options.minSimilarity, options.limit]
|
||||
let paramIndex = 4
|
||||
|
||||
if (options.scope) {
|
||||
const conditions: string[] = []
|
||||
if (options.scope) {
|
||||
const conditions: string[] = []
|
||||
|
||||
if (options.scope.files.length > 0) {
|
||||
conditions.push(`path = ANY($${paramIndex})`)
|
||||
params.push(options.scope.files)
|
||||
paramIndex++
|
||||
}
|
||||
if (options.scope.files.length > 0) {
|
||||
conditions.push(`path = ANY($${paramIndex})`)
|
||||
params.push(options.scope.files)
|
||||
paramIndex++
|
||||
}
|
||||
|
||||
if (options.scope.folders.length > 0) {
|
||||
const folderConditions = options.scope.folders.map((folder, idx) => {
|
||||
params.push(`${folder}/%`)
|
||||
return `path LIKE $${paramIndex + idx}`
|
||||
})
|
||||
conditions.push(`(${folderConditions.join(' OR ')})`)
|
||||
paramIndex += options.scope.folders.length
|
||||
}
|
||||
if (options.scope.folders.length > 0) {
|
||||
const folderConditions = options.scope.folders.map((folder, idx) => {
|
||||
params.push(`${folder}/%`)
|
||||
return `path LIKE $${paramIndex + idx}`
|
||||
})
|
||||
conditions.push(`(${folderConditions.join(' OR ')})`)
|
||||
paramIndex += options.scope.folders.length
|
||||
}
|
||||
|
||||
if (conditions.length > 0) {
|
||||
scopeCondition = `AND (${conditions.join(' OR ')})`
|
||||
}
|
||||
if (conditions.length > 0) {
|
||||
scopeCondition = `AND (${conditions.join(' OR ')})`
|
||||
}
|
||||
}
|
||||
|
||||
const query = `
|
||||
const query = `
|
||||
SELECT
|
||||
id, path, mtime, content, metadata,
|
||||
1 - (embedding <=> $1::vector) as similarity
|
||||
@ -184,54 +218,215 @@ export class VectorRepository {
|
||||
LIMIT $3
|
||||
`
|
||||
|
||||
type SearchResult = Omit<SelectVector, 'embedding'> & { similarity: number }
|
||||
const result = await this.db.query<SearchResult>(query, params)
|
||||
return result.rows
|
||||
type SearchResult = Omit<SelectVector, 'embedding'> & { similarity: number }
|
||||
const result = await this.db.query<SearchResult>(query, params)
|
||||
console.log("performSimilaritySearch result", result.rows)
|
||||
return result.rows
|
||||
}
|
||||
|
||||
async performFulltextSearch(
|
||||
searchQuery: string,
|
||||
embeddingModel: EmbeddingModel,
|
||||
options: {
|
||||
limit: number
|
||||
scope?: {
|
||||
files: string[]
|
||||
folders: string[]
|
||||
}
|
||||
language?: string
|
||||
},
|
||||
): Promise<
|
||||
(Omit<SelectVector, 'embedding'> & {
|
||||
rank: number
|
||||
})[]
|
||||
> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
|
||||
// handle query processing with segmentation and stop words filtering
|
||||
const processedQuery = this.createFtsQuery(searchQuery, options.language || 'english')
|
||||
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
const language = options.language || 'english'
|
||||
|
||||
let scopeCondition = ''
|
||||
const params: unknown[] = [processedQuery, options.limit]
|
||||
let paramIndex = 3
|
||||
|
||||
if (options.scope) {
|
||||
const conditions: string[] = []
|
||||
|
||||
if (options.scope.files.length > 0) {
|
||||
conditions.push(`path = ANY($${paramIndex})`)
|
||||
params.push(options.scope.files)
|
||||
paramIndex++
|
||||
}
|
||||
|
||||
if (options.scope.folders.length > 0) {
|
||||
const folderConditions = options.scope.folders.map((folder, idx) => {
|
||||
params.push(`${folder}/%`)
|
||||
return `path LIKE $${paramIndex + idx}`
|
||||
})
|
||||
conditions.push(`(${folderConditions.join(' OR ')})`)
|
||||
paramIndex += options.scope.folders.length
|
||||
}
|
||||
|
||||
if (conditions.length > 0) {
|
||||
scopeCondition = `AND (${conditions.join(' OR ')})`
|
||||
}
|
||||
}
|
||||
|
||||
const query = `
|
||||
SELECT
|
||||
id, path, mtime, content, metadata,
|
||||
ts_rank_cd(
|
||||
COALESCE(content_tsv, to_tsvector('${language}', coalesce(content, ''))),
|
||||
to_tsquery('${language}', $1)
|
||||
) AS rank
|
||||
FROM "${tableName}"
|
||||
WHERE (
|
||||
content_tsv @@ to_tsquery('${language}', $1)
|
||||
OR (content_tsv IS NULL AND to_tsvector('${language}', coalesce(content, '')) @@ to_tsquery('${language}', $1))
|
||||
)
|
||||
${scopeCondition}
|
||||
ORDER BY rank DESC
|
||||
LIMIT $2
|
||||
`
|
||||
console.log("performFulltextSearch query", query)
|
||||
type SearchResult = Omit<SelectVector, 'embedding'> & { rank: number }
|
||||
const result = await this.db.query<SearchResult>(query, params)
|
||||
console.log("performFulltextSearch result", result.rows)
|
||||
return result.rows
|
||||
}
|
||||
|
||||
public segmentTextForTsvector(text: string, language: string = 'zh-CN'): string {
|
||||
try {
|
||||
// Use Intl.Segmenter to add spaces between words for better TSVECTOR indexing
|
||||
if (typeof Intl !== 'undefined' && Intl.Segmenter) {
|
||||
const segmenter = new Intl.Segmenter(language, { granularity: 'word' })
|
||||
const segments = segmenter.segment(text)
|
||||
|
||||
const segmentedText = Array.from(segments)
|
||||
.map(segment => segment.segment)
|
||||
.join(' ')
|
||||
|
||||
return segmentedText
|
||||
}
|
||||
|
||||
// Fallback: add spaces around Chinese characters and punctuation
|
||||
return text.replace(/([一-龯])/g, ' $1 ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim()
|
||||
} catch (error) {
|
||||
console.warn('Failed to segment text for TSVECTOR:', error)
|
||||
return text
|
||||
}
|
||||
}
|
||||
|
||||
async getWorkspaceStatistics(
|
||||
embeddingModel: EmbeddingModel,
|
||||
scope?: {
|
||||
files: string[]
|
||||
folders: string[]
|
||||
}
|
||||
): Promise<{
|
||||
totalFiles: number
|
||||
totalChunks: number
|
||||
}> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
private createFtsQuery(query: string, language: string): string {
|
||||
try {
|
||||
|
||||
let scopeCondition = ''
|
||||
const params: unknown[] = []
|
||||
let paramIndex = 1
|
||||
let keywords: string[] = []
|
||||
|
||||
if (scope) {
|
||||
const conditions: string[] = []
|
||||
// Try to use Intl.Segmenter for word segmentation
|
||||
if (typeof Intl !== 'undefined' && Intl.Segmenter) {
|
||||
try {
|
||||
const segmenter = new Intl.Segmenter(language, { granularity: 'word' })
|
||||
const segments = segmenter.segment(query)
|
||||
|
||||
if (scope.files.length > 0) {
|
||||
conditions.push(`path = ANY($${paramIndex})`)
|
||||
params.push(scope.files)
|
||||
paramIndex++
|
||||
}
|
||||
keywords = Array.from(segments)
|
||||
.filter(s => s.isWordLike)
|
||||
.map(s => s.segment.trim())
|
||||
.filter(word => {
|
||||
// Filter out empty strings and stop words
|
||||
if (!word || word.length === 0) return false
|
||||
return !this.stopWords.has(word.toLowerCase())
|
||||
})
|
||||
.filter(word => {
|
||||
// Keep all words with length > 0 since stop words are already filtered
|
||||
return word.length > 0
|
||||
})
|
||||
} catch (segmentError) {
|
||||
console.warn('Intl.Segmenter failed, falling back to simple splitting:', segmentError)
|
||||
}
|
||||
}
|
||||
|
||||
if (scope.folders.length > 0) {
|
||||
const folderConditions = scope.folders.map((folder, idx) => {
|
||||
params.push(`${folder}/%`)
|
||||
return `path LIKE $${paramIndex + idx}`
|
||||
})
|
||||
conditions.push(`(${folderConditions.join(' OR ')})`)
|
||||
paramIndex += scope.folders.length
|
||||
}
|
||||
// Fallback to simple word splitting if Intl.Segmenter is not available or failed
|
||||
if (keywords.length === 0) {
|
||||
keywords = query
|
||||
.split(/[\s\p{P}\p{S}]+/u) // Split by whitespace, punctuation, and symbols
|
||||
.map(word => word.trim())
|
||||
.filter(word => {
|
||||
if (!word || word.length === 0) return false
|
||||
return !this.stopWords.has(word.toLowerCase())
|
||||
})
|
||||
.filter(word => {
|
||||
// Keep all words with length > 0 since stop words are already filtered
|
||||
return word.length > 0
|
||||
})
|
||||
}
|
||||
|
||||
if (conditions.length > 0) {
|
||||
scopeCondition = `WHERE (${conditions.join(' OR ')})`
|
||||
}
|
||||
}
|
||||
// If no keywords remain, return original query
|
||||
if (keywords.length === 0) {
|
||||
return query
|
||||
}
|
||||
|
||||
const query = `
|
||||
// Join keywords with & for PostgreSQL full-text search
|
||||
const ftsQueryString = keywords.join(' | ')
|
||||
|
||||
console.log(`Original query: "${query}" -> Processed query: "${ftsQueryString}"`)
|
||||
return ftsQueryString
|
||||
} catch (error) {
|
||||
// If all processing fails, return original query
|
||||
console.warn('Failed to process FTS query:', error)
|
||||
return query
|
||||
}
|
||||
}
|
||||
|
||||
async getWorkspaceStatistics(
|
||||
embeddingModel: EmbeddingModel,
|
||||
scope?: {
|
||||
files: string[]
|
||||
folders: string[]
|
||||
}
|
||||
): Promise<{
|
||||
totalFiles: number
|
||||
totalChunks: number
|
||||
}> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
|
||||
let scopeCondition = ''
|
||||
const params: unknown[] = []
|
||||
let paramIndex = 1
|
||||
|
||||
if (scope) {
|
||||
const conditions: string[] = []
|
||||
|
||||
if (scope.files.length > 0) {
|
||||
conditions.push(`path = ANY($${paramIndex})`)
|
||||
params.push(scope.files)
|
||||
paramIndex++
|
||||
}
|
||||
|
||||
if (scope.folders.length > 0) {
|
||||
const folderConditions = scope.folders.map((folder, idx) => {
|
||||
params.push(`${folder}/%`)
|
||||
return `path LIKE $${paramIndex + idx}`
|
||||
})
|
||||
conditions.push(`(${folderConditions.join(' OR ')})`)
|
||||
paramIndex += scope.folders.length
|
||||
}
|
||||
|
||||
if (conditions.length > 0) {
|
||||
scopeCondition = `WHERE (${conditions.join(' OR ')})`
|
||||
}
|
||||
}
|
||||
|
||||
const query = `
|
||||
SELECT
|
||||
COUNT(DISTINCT path) as total_files,
|
||||
COUNT(*) as total_chunks
|
||||
@ -239,43 +434,43 @@ export class VectorRepository {
|
||||
${scopeCondition}
|
||||
`
|
||||
|
||||
const result = await this.db.query<{
|
||||
total_files: number
|
||||
total_chunks: number
|
||||
}>(query, params)
|
||||
const result = await this.db.query<{
|
||||
total_files: number
|
||||
total_chunks: number
|
||||
}>(query, params)
|
||||
|
||||
const row = result.rows[0]
|
||||
return {
|
||||
totalFiles: Number(row?.total_files || 0),
|
||||
totalChunks: Number(row?.total_chunks || 0)
|
||||
}
|
||||
}
|
||||
const row = result.rows[0]
|
||||
return {
|
||||
totalFiles: Number(row?.total_files || 0),
|
||||
totalChunks: Number(row?.total_chunks || 0)
|
||||
}
|
||||
}
|
||||
|
||||
async getVaultStatistics(embeddingModel: EmbeddingModel): Promise<{
|
||||
totalFiles: number
|
||||
totalChunks: number
|
||||
}> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
async getVaultStatistics(embeddingModel: EmbeddingModel): Promise<{
|
||||
totalFiles: number
|
||||
totalChunks: number
|
||||
}> {
|
||||
if (!this.db) {
|
||||
throw new DatabaseNotInitializedException()
|
||||
}
|
||||
const tableName = this.getTableName(embeddingModel)
|
||||
|
||||
const query = `
|
||||
const query = `
|
||||
SELECT
|
||||
COUNT(DISTINCT path) as total_files,
|
||||
COUNT(*) as total_chunks
|
||||
FROM "${tableName}"
|
||||
`
|
||||
|
||||
const result = await this.db.query<{
|
||||
total_files: number
|
||||
total_chunks: number
|
||||
}>(query)
|
||||
const result = await this.db.query<{
|
||||
total_files: number
|
||||
total_chunks: number
|
||||
}>(query)
|
||||
|
||||
const row = result.rows[0]
|
||||
return {
|
||||
totalFiles: Number(row?.total_files || 0),
|
||||
totalChunks: Number(row?.total_chunks || 0)
|
||||
}
|
||||
}
|
||||
const row = result.rows[0]
|
||||
return {
|
||||
totalFiles: Number(row?.total_files || 0),
|
||||
totalChunks: Number(row?.total_chunks || 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -261,5 +261,108 @@ export const migrations: Record<string, SqlMigration> = {
|
||||
ALTER TABLE "source_insight_512" ADD COLUMN IF NOT EXISTS "source_mtime" bigint NOT NULL DEFAULT 0;
|
||||
ALTER TABLE "source_insight_384" ADD COLUMN IF NOT EXISTS "source_mtime" bigint NOT NULL DEFAULT 0;
|
||||
`
|
||||
},
|
||||
full_text_search: {
|
||||
description: "Adds full-text search capabilities to embedding and source insight tables",
|
||||
sql: `
|
||||
-- Add content_tsv columns to embedding tables
|
||||
ALTER TABLE "embeddings_1536" ADD COLUMN IF NOT EXISTS "content_tsv" TSVECTOR;
|
||||
ALTER TABLE "embeddings_1024" ADD COLUMN IF NOT EXISTS "content_tsv" TSVECTOR;
|
||||
ALTER TABLE "embeddings_768" ADD COLUMN IF NOT EXISTS "content_tsv" TSVECTOR;
|
||||
ALTER TABLE "embeddings_512" ADD COLUMN IF NOT EXISTS "content_tsv" TSVECTOR;
|
||||
ALTER TABLE "embeddings_384" ADD COLUMN IF NOT EXISTS "content_tsv" TSVECTOR;
|
||||
|
||||
-- Add insight_tsv columns to source insight tables
|
||||
ALTER TABLE "source_insight_1536" ADD COLUMN IF NOT EXISTS "insight_tsv" TSVECTOR;
|
||||
ALTER TABLE "source_insight_1024" ADD COLUMN IF NOT EXISTS "insight_tsv" TSVECTOR;
|
||||
ALTER TABLE "source_insight_768" ADD COLUMN IF NOT EXISTS "insight_tsv" TSVECTOR;
|
||||
ALTER TABLE "source_insight_512" ADD COLUMN IF NOT EXISTS "insight_tsv" TSVECTOR;
|
||||
ALTER TABLE "source_insight_384" ADD COLUMN IF NOT EXISTS "insight_tsv" TSVECTOR;
|
||||
|
||||
-- Create trigger function for embeddings tables
|
||||
CREATE OR REPLACE FUNCTION embeddings_tsv_trigger() RETURNS trigger AS $$
|
||||
BEGIN
|
||||
NEW.content_tsv := to_tsvector('english', coalesce(NEW.content, ''));
|
||||
RETURN NEW;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Create trigger function for source insight tables
|
||||
CREATE OR REPLACE FUNCTION source_insight_tsv_trigger() RETURNS trigger AS $$
|
||||
BEGIN
|
||||
NEW.insight_tsv := to_tsvector('english', coalesce(NEW.insight, ''));
|
||||
RETURN NEW;
|
||||
END
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
-- Create triggers for embeddings tables (drop if exists first)
|
||||
DROP TRIGGER IF EXISTS tsvector_update_embeddings_1536 ON "embeddings_1536";
|
||||
CREATE TRIGGER tsvector_update_embeddings_1536
|
||||
BEFORE INSERT OR UPDATE ON "embeddings_1536"
|
||||
FOR EACH ROW EXECUTE FUNCTION embeddings_tsv_trigger();
|
||||
|
||||
DROP TRIGGER IF EXISTS tsvector_update_embeddings_1024 ON "embeddings_1024";
|
||||
CREATE TRIGGER tsvector_update_embeddings_1024
|
||||
BEFORE INSERT OR UPDATE ON "embeddings_1024"
|
||||
FOR EACH ROW EXECUTE FUNCTION embeddings_tsv_trigger();
|
||||
|
||||
DROP TRIGGER IF EXISTS tsvector_update_embeddings_768 ON "embeddings_768";
|
||||
CREATE TRIGGER tsvector_update_embeddings_768
|
||||
BEFORE INSERT OR UPDATE ON "embeddings_768"
|
||||
FOR EACH ROW EXECUTE FUNCTION embeddings_tsv_trigger();
|
||||
|
||||
DROP TRIGGER IF EXISTS tsvector_update_embeddings_512 ON "embeddings_512";
|
||||
CREATE TRIGGER tsvector_update_embeddings_512
|
||||
BEFORE INSERT OR UPDATE ON "embeddings_512"
|
||||
FOR EACH ROW EXECUTE FUNCTION embeddings_tsv_trigger();
|
||||
|
||||
DROP TRIGGER IF EXISTS tsvector_update_embeddings_384 ON "embeddings_384";
|
||||
CREATE TRIGGER tsvector_update_embeddings_384
|
||||
BEFORE INSERT OR UPDATE ON "embeddings_384"
|
||||
FOR EACH ROW EXECUTE FUNCTION embeddings_tsv_trigger();
|
||||
|
||||
-- Create triggers for source insight tables (drop if exists first)
|
||||
DROP TRIGGER IF EXISTS tsvector_update_source_insight_1536 ON "source_insight_1536";
|
||||
CREATE TRIGGER tsvector_update_source_insight_1536
|
||||
BEFORE INSERT OR UPDATE ON "source_insight_1536"
|
||||
FOR EACH ROW EXECUTE FUNCTION source_insight_tsv_trigger();
|
||||
|
||||
DROP TRIGGER IF EXISTS tsvector_update_source_insight_1024 ON "source_insight_1024";
|
||||
CREATE TRIGGER tsvector_update_source_insight_1024
|
||||
BEFORE INSERT OR UPDATE ON "source_insight_1024"
|
||||
FOR EACH ROW EXECUTE FUNCTION source_insight_tsv_trigger();
|
||||
|
||||
DROP TRIGGER IF EXISTS tsvector_update_source_insight_768 ON "source_insight_768";
|
||||
CREATE TRIGGER tsvector_update_source_insight_768
|
||||
BEFORE INSERT OR UPDATE ON "source_insight_768"
|
||||
FOR EACH ROW EXECUTE FUNCTION source_insight_tsv_trigger();
|
||||
|
||||
DROP TRIGGER IF EXISTS tsvector_update_source_insight_512 ON "source_insight_512";
|
||||
CREATE TRIGGER tsvector_update_source_insight_512
|
||||
BEFORE INSERT OR UPDATE ON "source_insight_512"
|
||||
FOR EACH ROW EXECUTE FUNCTION source_insight_tsv_trigger();
|
||||
|
||||
DROP TRIGGER IF EXISTS tsvector_update_source_insight_384 ON "source_insight_384";
|
||||
CREATE TRIGGER tsvector_update_source_insight_384
|
||||
BEFORE INSERT OR UPDATE ON "source_insight_384"
|
||||
FOR EACH ROW EXECUTE FUNCTION source_insight_tsv_trigger();
|
||||
|
||||
-- Note: 现有数据的 tsvector 字段将保持为 NULL,只有新插入的数据会通过 trigger 自动填充
|
||||
-- 这样可以避免大量 UPDATE 操作导致的文件句柄耗尽问题
|
||||
|
||||
-- Create GIN indexes for full-text search on embeddings tables
|
||||
CREATE INDEX IF NOT EXISTS "embeddings_content_tsv_idx_1536" ON "embeddings_1536" USING GIN(content_tsv);
|
||||
CREATE INDEX IF NOT EXISTS "embeddings_content_tsv_idx_1024" ON "embeddings_1024" USING GIN(content_tsv);
|
||||
CREATE INDEX IF NOT EXISTS "embeddings_content_tsv_idx_768" ON "embeddings_768" USING GIN(content_tsv);
|
||||
CREATE INDEX IF NOT EXISTS "embeddings_content_tsv_idx_512" ON "embeddings_512" USING GIN(content_tsv);
|
||||
CREATE INDEX IF NOT EXISTS "embeddings_content_tsv_idx_384" ON "embeddings_384" USING GIN(content_tsv);
|
||||
|
||||
-- Create GIN indexes for full-text search on source insight tables
|
||||
CREATE INDEX IF NOT EXISTS "source_insight_tsv_idx_1536" ON "source_insight_1536" USING GIN(insight_tsv);
|
||||
CREATE INDEX IF NOT EXISTS "source_insight_tsv_idx_1024" ON "source_insight_1024" USING GIN(insight_tsv);
|
||||
CREATE INDEX IF NOT EXISTS "source_insight_tsv_idx_768" ON "source_insight_768" USING GIN(insight_tsv);
|
||||
CREATE INDEX IF NOT EXISTS "source_insight_tsv_idx_512" ON "source_insight_512" USING GIN(insight_tsv);
|
||||
CREATE INDEX IF NOT EXISTS "source_insight_tsv_idx_384" ON "source_insight_384" USING GIN(insight_tsv);
|
||||
`
|
||||
}
|
||||
};
|
||||
|
||||
@ -78,6 +78,7 @@ worker({
|
||||
// Execute SQL migrations
|
||||
for (const [_key, migration] of Object.entries(migrations)) {
|
||||
// Split SQL into individual commands and execute them one by one
|
||||
console.log("migration: ", migration.description)
|
||||
const commands = migration.sql.split('\n\n').filter(cmd => cmd.trim());
|
||||
for (const command of commands) {
|
||||
await db.exec(command);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user