update vector manager

2026-01-16 08:21:55 +00:00 · 2025-07-13 07:02:54 +08:00 · 2025-07-13 07:02:54 +08:00 · 34296e6871
commit 34296e6871
parent c1fbd4da21
6 changed files with 1104 additions and 435 deletions
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@ -123,6 +123,9 @@ importers:
      handlebars:
        specifier: ^4.7.7
        version: 4.7.8
+      jieba-wasm:
+        specifier: ^2.2.0
+        version: 2.2.0
      js-tiktoken:
        specifier: ^1.0.15
        version: 1.0.20
@ -583,9 +586,9 @@ packages:
  '@codemirror/language@6.11.2':
    resolution: {integrity: sha512-p44TsNArL4IVXDTbapUmEkAlvWs2CFQbcfc0ymDsis1kH2wh0gcY96AS29c/vp2d0y2Tquk1EDSaawpzilUiAw==}

-  '@codemirror/language@https://codeload.github.com/lishid/cm-language/tar.gz/6c1c5f5b677f6f6503d1ca2ec47f62f6406cda67':
-    resolution: {tarball: https://codeload.github.com/lishid/cm-language/tar.gz/6c1c5f5b677f6f6503d1ca2ec47f62f6406cda67}
-    version: 6.10.8
+  '@codemirror/language@https://codeload.github.com/lishid/cm-language/tar.gz/a9c3c7efe17dd1d24395ee2a179fe12dd6ed1e76':
+    resolution: {tarball: https://codeload.github.com/lishid/cm-language/tar.gz/a9c3c7efe17dd1d24395ee2a179fe12dd6ed1e76}
+    version: 6.11.2

  '@codemirror/lint@0.20.3':
    resolution: {integrity: sha512-06xUScbbspZ8mKoODQCEx6hz1bjaq9m8W8DxdycWARMiiX1wMtfCh/MoHpaL7ws/KUMwlsFFfp2qhm32oaCvVA==}
@ -669,8 +672,8 @@ packages:
    cpu: [ppc64]
    os: [aix]

-  '@esbuild/aix-ppc64@0.25.5':
-    resolution: {integrity: sha512-9o3TMmpmftaCMepOdA5k/yDw8SfInyzWWTjYTFCX3kPSDJMROQTb8jg+h9Cnwnmm1vOzvxN7gIfB5V2ewpjtGA==}
+  '@esbuild/aix-ppc64@0.25.6':
+    resolution: {integrity: sha512-ShbM/3XxwuxjFiuVBHA+d3j5dyac0aEVVq1oluIDf71hUw0aRF59dV/efUsIwFnR6m8JNM2FjZOzmaZ8yG61kw==}
    engines: {node: '>=18'}
    cpu: [ppc64]
    os: [aix]
@ -693,8 +696,8 @@ packages:
    cpu: [arm64]
    os: [android]

-  '@esbuild/android-arm64@0.25.5':
-    resolution: {integrity: sha512-VGzGhj4lJO+TVGV1v8ntCZWJktV7SGCs3Pn1GRWI1SBFtRALoomm8k5E9Pmwg3HOAal2VDc2F9+PM/rEY6oIDg==}
+  '@esbuild/android-arm64@0.25.6':
+    resolution: {integrity: sha512-hd5zdUarsK6strW+3Wxi5qWws+rJhCCbMiC9QZyzoxfk5uHRIE8T287giQxzVpEvCwuJ9Qjg6bEjcRJcgfLqoA==}
    engines: {node: '>=18'}
    cpu: [arm64]
    os: [android]
@ -717,8 +720,8 @@ packages:
    cpu: [arm]
    os: [android]

-  '@esbuild/android-arm@0.25.5':
-    resolution: {integrity: sha512-AdJKSPeEHgi7/ZhuIPtcQKr5RQdo6OO2IL87JkianiMYMPbCtot9fxPbrMiBADOWWm3T2si9stAiVsGbTQFkbA==}
+  '@esbuild/android-arm@0.25.6':
+    resolution: {integrity: sha512-S8ToEOVfg++AU/bHwdksHNnyLyVM+eMVAOf6yRKFitnwnbwwPNqKr3srzFRe7nzV69RQKb5DgchIX5pt3L53xg==}
    engines: {node: '>=18'}
    cpu: [arm]
    os: [android]
@ -741,8 +744,8 @@ packages:
    cpu: [x64]
    os: [android]

-  '@esbuild/android-x64@0.25.5':
-    resolution: {integrity: sha512-D2GyJT1kjvO//drbRT3Hib9XPwQeWd9vZoBJn+bu/lVsOZ13cqNdDeqIF/xQ5/VmWvMduP6AmXvylO/PIc2isw==}
+  '@esbuild/android-x64@0.25.6':
+    resolution: {integrity: sha512-0Z7KpHSr3VBIO9A/1wcT3NTy7EB4oNC4upJ5ye3R7taCc2GUdeynSLArnon5G8scPwaU866d3H4BCrE5xLW25A==}
    engines: {node: '>=18'}
    cpu: [x64]
    os: [android]
@ -765,8 +768,8 @@ packages:
    cpu: [arm64]
    os: [darwin]

-  '@esbuild/darwin-arm64@0.25.5':
-    resolution: {integrity: sha512-GtaBgammVvdF7aPIgH2jxMDdivezgFu6iKpmT+48+F8Hhg5J/sfnDieg0aeG/jfSvkYQU2/pceFPDKlqZzwnfQ==}
+  '@esbuild/darwin-arm64@0.25.6':
+    resolution: {integrity: sha512-FFCssz3XBavjxcFxKsGy2DYK5VSvJqa6y5HXljKzhRZ87LvEi13brPrf/wdyl/BbpbMKJNOr1Sd0jtW4Ge1pAA==}
    engines: {node: '>=18'}
    cpu: [arm64]
    os: [darwin]
@ -789,8 +792,8 @@ packages:
    cpu: [x64]
    os: [darwin]

-  '@esbuild/darwin-x64@0.25.5':
-    resolution: {integrity: sha512-1iT4FVL0dJ76/q1wd7XDsXrSW+oLoquptvh4CLR4kITDtqi2e/xwXwdCVH8hVHU43wgJdsq7Gxuzcs6Iq/7bxQ==}
+  '@esbuild/darwin-x64@0.25.6':
+    resolution: {integrity: sha512-GfXs5kry/TkGM2vKqK2oyiLFygJRqKVhawu3+DOCk7OxLy/6jYkWXhlHwOoTb0WqGnWGAS7sooxbZowy+pK9Yg==}
    engines: {node: '>=18'}
    cpu: [x64]
    os: [darwin]
@ -813,8 +816,8 @@ packages:
    cpu: [arm64]
    os: [freebsd]

-  '@esbuild/freebsd-arm64@0.25.5':
-    resolution: {integrity: sha512-nk4tGP3JThz4La38Uy/gzyXtpkPW8zSAmoUhK9xKKXdBCzKODMc2adkB2+8om9BDYugz+uGV7sLmpTYzvmz6Sw==}
+  '@esbuild/freebsd-arm64@0.25.6':
+    resolution: {integrity: sha512-aoLF2c3OvDn2XDTRvn8hN6DRzVVpDlj2B/F66clWd/FHLiHaG3aVZjxQX2DYphA5y/evbdGvC6Us13tvyt4pWg==}
    engines: {node: '>=18'}
    cpu: [arm64]
    os: [freebsd]
@ -837,8 +840,8 @@ packages:
    cpu: [x64]
    os: [freebsd]

-  '@esbuild/freebsd-x64@0.25.5':
-    resolution: {integrity: sha512-PrikaNjiXdR2laW6OIjlbeuCPrPaAl0IwPIaRv+SMV8CiM8i2LqVUHFC1+8eORgWyY7yhQY+2U2fA55mBzReaw==}
+  '@esbuild/freebsd-x64@0.25.6':
+    resolution: {integrity: sha512-2SkqTjTSo2dYi/jzFbU9Plt1vk0+nNg8YC8rOXXea+iA3hfNJWebKYPs3xnOUf9+ZWhKAaxnQNUf2X9LOpeiMQ==}
    engines: {node: '>=18'}
    cpu: [x64]
    os: [freebsd]
@ -861,8 +864,8 @@ packages:
    cpu: [arm64]
    os: [linux]

-  '@esbuild/linux-arm64@0.25.5':
-    resolution: {integrity: sha512-Z9kfb1v6ZlGbWj8EJk9T6czVEjjq2ntSYLY2cw6pAZl4oKtfgQuS4HOq41M/BcoLPzrUbNd+R4BXFyH//nHxVg==}
+  '@esbuild/linux-arm64@0.25.6':
+    resolution: {integrity: sha512-b967hU0gqKd9Drsh/UuAm21Khpoh6mPBSgz8mKRq4P5mVK8bpA+hQzmm/ZwGVULSNBzKdZPQBRT3+WuVavcWsQ==}
    engines: {node: '>=18'}
    cpu: [arm64]
    os: [linux]
@ -885,8 +888,8 @@ packages:
    cpu: [arm]
    os: [linux]

-  '@esbuild/linux-arm@0.25.5':
-    resolution: {integrity: sha512-cPzojwW2okgh7ZlRpcBEtsX7WBuqbLrNXqLU89GxWbNt6uIg78ET82qifUy3W6OVww6ZWobWub5oqZOVtwolfw==}
+  '@esbuild/linux-arm@0.25.6':
+    resolution: {integrity: sha512-SZHQlzvqv4Du5PrKE2faN0qlbsaW/3QQfUUc6yO2EjFcA83xnwm91UbEEVx4ApZ9Z5oG8Bxz4qPE+HFwtVcfyw==}
    engines: {node: '>=18'}
    cpu: [arm]
    os: [linux]
@ -909,8 +912,8 @@ packages:
    cpu: [ia32]
    os: [linux]

-  '@esbuild/linux-ia32@0.25.5':
-    resolution: {integrity: sha512-sQ7l00M8bSv36GLV95BVAdhJ2QsIbCuCjh/uYrWiMQSUuV+LpXwIqhgJDcvMTj+VsQmqAHL2yYaasENvJ7CDKA==}
+  '@esbuild/linux-ia32@0.25.6':
+    resolution: {integrity: sha512-aHWdQ2AAltRkLPOsKdi3xv0mZ8fUGPdlKEjIEhxCPm5yKEThcUjHpWB1idN74lfXGnZ5SULQSgtr5Qos5B0bPw==}
    engines: {node: '>=18'}
    cpu: [ia32]
    os: [linux]
@ -933,8 +936,8 @@ packages:
    cpu: [loong64]
    os: [linux]

-  '@esbuild/linux-loong64@0.25.5':
-    resolution: {integrity: sha512-0ur7ae16hDUC4OL5iEnDb0tZHDxYmuQyhKhsPBV8f99f6Z9KQM02g33f93rNH5A30agMS46u2HP6qTdEt6Q1kg==}
+  '@esbuild/linux-loong64@0.25.6':
+    resolution: {integrity: sha512-VgKCsHdXRSQ7E1+QXGdRPlQ/e08bN6WMQb27/TMfV+vPjjTImuT9PmLXupRlC90S1JeNNW5lzkAEO/McKeJ2yg==}
    engines: {node: '>=18'}
    cpu: [loong64]
    os: [linux]
@ -957,8 +960,8 @@ packages:
    cpu: [mips64el]
    os: [linux]

-  '@esbuild/linux-mips64el@0.25.5':
-    resolution: {integrity: sha512-kB/66P1OsHO5zLz0i6X0RxlQ+3cu0mkxS3TKFvkb5lin6uwZ/ttOkP3Z8lfR9mJOBk14ZwZ9182SIIWFGNmqmg==}
+  '@esbuild/linux-mips64el@0.25.6':
+    resolution: {integrity: sha512-WViNlpivRKT9/py3kCmkHnn44GkGXVdXfdc4drNmRl15zVQ2+D2uFwdlGh6IuK5AAnGTo2qPB1Djppj+t78rzw==}
    engines: {node: '>=18'}
    cpu: [mips64el]
    os: [linux]
@ -981,8 +984,8 @@ packages:
    cpu: [ppc64]
    os: [linux]

-  '@esbuild/linux-ppc64@0.25.5':
-    resolution: {integrity: sha512-UZCmJ7r9X2fe2D6jBmkLBMQetXPXIsZjQJCjgwpVDz+YMcS6oFR27alkgGv3Oqkv07bxdvw7fyB71/olceJhkQ==}
+  '@esbuild/linux-ppc64@0.25.6':
+    resolution: {integrity: sha512-wyYKZ9NTdmAMb5730I38lBqVu6cKl4ZfYXIs31Baf8aoOtB4xSGi3THmDYt4BTFHk7/EcVixkOV2uZfwU3Q2Jw==}
    engines: {node: '>=18'}
    cpu: [ppc64]
    os: [linux]
@ -1005,8 +1008,8 @@ packages:
    cpu: [riscv64]
    os: [linux]

-  '@esbuild/linux-riscv64@0.25.5':
-    resolution: {integrity: sha512-kTxwu4mLyeOlsVIFPfQo+fQJAV9mh24xL+y+Bm6ej067sYANjyEw1dNHmvoqxJUCMnkBdKpvOn0Ahql6+4VyeA==}
+  '@esbuild/linux-riscv64@0.25.6':
+    resolution: {integrity: sha512-KZh7bAGGcrinEj4qzilJ4hqTY3Dg2U82c8bv+e1xqNqZCrCyc+TL9AUEn5WGKDzm3CfC5RODE/qc96OcbIe33w==}
    engines: {node: '>=18'}
    cpu: [riscv64]
    os: [linux]
@ -1029,8 +1032,8 @@ packages:
    cpu: [s390x]
    os: [linux]

-  '@esbuild/linux-s390x@0.25.5':
-    resolution: {integrity: sha512-K2dSKTKfmdh78uJ3NcWFiqyRrimfdinS5ErLSn3vluHNeHVnBAFWC8a4X5N+7FgVE1EjXS1QDZbpqZBjfrqMTQ==}
+  '@esbuild/linux-s390x@0.25.6':
+    resolution: {integrity: sha512-9N1LsTwAuE9oj6lHMyyAM+ucxGiVnEqUdp4v7IaMmrwb06ZTEVCIs3oPPplVsnjPfyjmxwHxHMF8b6vzUVAUGw==}
    engines: {node: '>=18'}
    cpu: [s390x]
    os: [linux]
@ -1053,14 +1056,14 @@ packages:
    cpu: [x64]
    os: [linux]

-  '@esbuild/linux-x64@0.25.5':
-    resolution: {integrity: sha512-uhj8N2obKTE6pSZ+aMUbqq+1nXxNjZIIjCjGLfsWvVpy7gKCOL6rsY1MhRh9zLtUtAI7vpgLMK6DxjO8Qm9lJw==}
+  '@esbuild/linux-x64@0.25.6':
+    resolution: {integrity: sha512-A6bJB41b4lKFWRKNrWoP2LHsjVzNiaurf7wyj/XtFNTsnPuxwEBWHLty+ZE0dWBKuSK1fvKgrKaNjBS7qbFKig==}
    engines: {node: '>=18'}
    cpu: [x64]
    os: [linux]

-  '@esbuild/netbsd-arm64@0.25.5':
-    resolution: {integrity: sha512-pwHtMP9viAy1oHPvgxtOv+OkduK5ugofNTVDilIzBLpoWAM16r7b/mxBvfpuQDpRQFMfuVr5aLcn4yveGvBZvw==}
+  '@esbuild/netbsd-arm64@0.25.6':
+    resolution: {integrity: sha512-IjA+DcwoVpjEvyxZddDqBY+uJ2Snc6duLpjmkXm/v4xuS3H+3FkLZlDm9ZsAbF9rsfP3zeA0/ArNDORZgrxR/Q==}
    engines: {node: '>=18'}
    cpu: [arm64]
    os: [netbsd]
@ -1083,14 +1086,14 @@ packages:
    cpu: [x64]
    os: [netbsd]

-  '@esbuild/netbsd-x64@0.25.5':
-    resolution: {integrity: sha512-WOb5fKrvVTRMfWFNCroYWWklbnXH0Q5rZppjq0vQIdlsQKuw6mdSihwSo4RV/YdQ5UCKKvBy7/0ZZYLBZKIbwQ==}
+  '@esbuild/netbsd-x64@0.25.6':
+    resolution: {integrity: sha512-dUXuZr5WenIDlMHdMkvDc1FAu4xdWixTCRgP7RQLBOkkGgwuuzaGSYcOpW4jFxzpzL1ejb8yF620UxAqnBrR9g==}
    engines: {node: '>=18'}
    cpu: [x64]
    os: [netbsd]

-  '@esbuild/openbsd-arm64@0.25.5':
-    resolution: {integrity: sha512-7A208+uQKgTxHd0G0uqZO8UjK2R0DDb4fDmERtARjSHWxqMTye4Erz4zZafx7Di9Cv+lNHYuncAkiGFySoD+Mw==}
+  '@esbuild/openbsd-arm64@0.25.6':
+    resolution: {integrity: sha512-l8ZCvXP0tbTJ3iaqdNf3pjaOSd5ex/e6/omLIQCVBLmHTlfXW3zAxQ4fnDmPLOB1x9xrcSi/xtCWFwCZRIaEwg==}
    engines: {node: '>=18'}
    cpu: [arm64]
    os: [openbsd]
@ -1113,12 +1116,18 @@ packages:
    cpu: [x64]
    os: [openbsd]

-  '@esbuild/openbsd-x64@0.25.5':
-    resolution: {integrity: sha512-G4hE405ErTWraiZ8UiSoesH8DaCsMm0Cay4fsFWOOUcz8b8rC6uCvnagr+gnioEjWn0wC+o1/TAHt+It+MpIMg==}
+  '@esbuild/openbsd-x64@0.25.6':
+    resolution: {integrity: sha512-hKrmDa0aOFOr71KQ/19JC7az1P0GWtCN1t2ahYAf4O007DHZt/dW8ym5+CUdJhQ/qkZmI1HAF8KkJbEFtCL7gw==}
    engines: {node: '>=18'}
    cpu: [x64]
    os: [openbsd]

+  '@esbuild/openharmony-arm64@0.25.6':
+    resolution: {integrity: sha512-+SqBcAWoB1fYKmpWoQP4pGtx+pUUC//RNYhFdbcSA16617cchuryuhOCRpPsjCblKukAckWsV+aQ3UKT/RMPcA==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [openharmony]
+
  '@esbuild/sunos-x64@0.17.3':
    resolution: {integrity: sha512-RxmhKLbTCDAY2xOfrww6ieIZkZF+KBqG7S2Ako2SljKXRFi+0863PspK74QQ7JpmWwncChY25JTJSbVBYGQk2Q==}
    engines: {node: '>=12'}
@ -1137,8 +1146,8 @@ packages:
    cpu: [x64]
    os: [sunos]

-  '@esbuild/sunos-x64@0.25.5':
-    resolution: {integrity: sha512-l+azKShMy7FxzY0Rj4RCt5VD/q8mG/e+mDivgspo+yL8zW7qEwctQ6YqKX34DTEleFAvCIUviCFX1SDZRSyMQA==}
+  '@esbuild/sunos-x64@0.25.6':
+    resolution: {integrity: sha512-dyCGxv1/Br7MiSC42qinGL8KkG4kX0pEsdb0+TKhmJZgCUDBGmyo1/ArCjNGiOLiIAgdbWgmWgib4HoCi5t7kA==}
    engines: {node: '>=18'}
    cpu: [x64]
    os: [sunos]
@ -1161,8 +1170,8 @@ packages:
    cpu: [arm64]
    os: [win32]

-  '@esbuild/win32-arm64@0.25.5':
-    resolution: {integrity: sha512-O2S7SNZzdcFG7eFKgvwUEZ2VG9D/sn/eIiz8XRZ1Q/DO5a3s76Xv0mdBzVM5j5R639lXQmPmSo0iRpHqUUrsxw==}
+  '@esbuild/win32-arm64@0.25.6':
+    resolution: {integrity: sha512-42QOgcZeZOvXfsCBJF5Afw73t4veOId//XD3i+/9gSkhSV6Gk3VPlWncctI+JcOyERv85FUo7RxuxGy+z8A43Q==}
    engines: {node: '>=18'}
    cpu: [arm64]
    os: [win32]
@ -1185,8 +1194,8 @@ packages:
    cpu: [ia32]
    os: [win32]

-  '@esbuild/win32-ia32@0.25.5':
-    resolution: {integrity: sha512-onOJ02pqs9h1iMJ1PQphR+VZv8qBMQ77Klcsqv9CNW2w6yLqoURLcgERAIurY6QE63bbLuqgP9ATqajFLK5AMQ==}
+  '@esbuild/win32-ia32@0.25.6':
+    resolution: {integrity: sha512-4AWhgXmDuYN7rJI6ORB+uU9DHLq/erBbuMoAuB4VWJTu5KtCgcKYPynF0YI1VkBNuEfjNlLrFr9KZPJzrtLkrQ==}
    engines: {node: '>=18'}
    cpu: [ia32]
    os: [win32]
@ -1209,8 +1218,8 @@ packages:
    cpu: [x64]
    os: [win32]

-  '@esbuild/win32-x64@0.25.5':
-    resolution: {integrity: sha512-TXv6YnJ8ZMVdX+SXWVBo/0p8LTcrUYngpWjvm91TMjjBQii7Oz11Lw5lbDV5Y0TzuhSJHwiH4hEtC1I42mMS0g==}
+  '@esbuild/win32-x64@0.25.6':
+    resolution: {integrity: sha512-NgJPHHbEpLQgDH2MjQu90pzW/5vvXIZ7KOnPyNBm92A6WgZ/7b6fJyUBjoumLqeOQQGqY2QjQxRo97ah4Sj0cA==}
    engines: {node: '>=18'}
    cpu: [x64]
    os: [win32]
@ -3739,8 +3748,8 @@ packages:
    engines: {node: '>=12'}
    hasBin: true

-  esbuild@0.25.5:
-    resolution: {integrity: sha512-P8OtKZRv/5J5hhz0cUAdu/cLuPIKXpQl1R9pZtvmHWQvrAUVd0UNIPT4IB4W3rNOqVO0rlqHmCIbSwxh/c9yUQ==}
+  esbuild@0.25.6:
+    resolution: {integrity: sha512-GVuzuUwtdsghE3ocJ9Bs8PNoF13HNQ5TXbEi2AhvVb8xU1Iwt9Fos9FEamfoee+u/TOsn7GUWc04lz46n2bbTg==}
    engines: {node: '>=18'}
    hasBin: true

@ -4723,6 +4732,9 @@ packages:
      node-notifier:
        optional: true

+  jieba-wasm@2.2.0:
+    resolution: {integrity: sha512-IwxgUf+EMutjLair3k41i0ApM33qeHNY9EFBKlI5/XtHcISkGt5YPmUvpDJe3hUflwRYhy9g29ZzTetGZw6XgQ==}
+
  js-base64@3.7.7:
    resolution: {integrity: sha512-7rCnleh0z2CkXhH67J8K1Ytz0b2Y+yxTPL+/KOJoa20hfnVQ/3/T6W/KflYI4bRHRagNeXeU2bkNGI3v1oS/lw==}

@ -7089,7 +7101,7 @@ snapshots:
      '@lezer/lr': 1.4.2
      style-mod: 4.1.2

-  '@codemirror/language@https://codeload.github.com/lishid/cm-language/tar.gz/6c1c5f5b677f6f6503d1ca2ec47f62f6406cda67':
+  '@codemirror/language@https://codeload.github.com/lishid/cm-language/tar.gz/a9c3c7efe17dd1d24395ee2a179fe12dd6ed1e76':
    dependencies:
      '@codemirror/state': 6.5.2
      '@codemirror/view': 6.38.0
@ -7190,7 +7202,7 @@ snapshots:
  '@esbuild/aix-ppc64@0.19.12':
    optional: true

-  '@esbuild/aix-ppc64@0.25.5':
+  '@esbuild/aix-ppc64@0.25.6':
    optional: true

  '@esbuild/android-arm64@0.17.3':
@ -7202,7 +7214,7 @@ snapshots:
  '@esbuild/android-arm64@0.19.12':
    optional: true

-  '@esbuild/android-arm64@0.25.5':
+  '@esbuild/android-arm64@0.25.6':
    optional: true

  '@esbuild/android-arm@0.17.3':
@ -7214,7 +7226,7 @@ snapshots:
  '@esbuild/android-arm@0.19.12':
    optional: true

-  '@esbuild/android-arm@0.25.5':
+  '@esbuild/android-arm@0.25.6':
    optional: true

  '@esbuild/android-x64@0.17.3':
@ -7226,7 +7238,7 @@ snapshots:
  '@esbuild/android-x64@0.19.12':
    optional: true

-  '@esbuild/android-x64@0.25.5':
+  '@esbuild/android-x64@0.25.6':
    optional: true

  '@esbuild/darwin-arm64@0.17.3':
@ -7238,7 +7250,7 @@ snapshots:
  '@esbuild/darwin-arm64@0.19.12':
    optional: true

-  '@esbuild/darwin-arm64@0.25.5':
+  '@esbuild/darwin-arm64@0.25.6':
    optional: true

  '@esbuild/darwin-x64@0.17.3':
@ -7250,7 +7262,7 @@ snapshots:
  '@esbuild/darwin-x64@0.19.12':
    optional: true

-  '@esbuild/darwin-x64@0.25.5':
+  '@esbuild/darwin-x64@0.25.6':
    optional: true

  '@esbuild/freebsd-arm64@0.17.3':
@ -7262,7 +7274,7 @@ snapshots:
  '@esbuild/freebsd-arm64@0.19.12':
    optional: true

-  '@esbuild/freebsd-arm64@0.25.5':
+  '@esbuild/freebsd-arm64@0.25.6':
    optional: true

  '@esbuild/freebsd-x64@0.17.3':
@ -7274,7 +7286,7 @@ snapshots:
  '@esbuild/freebsd-x64@0.19.12':
    optional: true

-  '@esbuild/freebsd-x64@0.25.5':
+  '@esbuild/freebsd-x64@0.25.6':
    optional: true

  '@esbuild/linux-arm64@0.17.3':
@ -7286,7 +7298,7 @@ snapshots:
  '@esbuild/linux-arm64@0.19.12':
    optional: true

-  '@esbuild/linux-arm64@0.25.5':
+  '@esbuild/linux-arm64@0.25.6':
    optional: true

  '@esbuild/linux-arm@0.17.3':
@ -7298,7 +7310,7 @@ snapshots:
  '@esbuild/linux-arm@0.19.12':
    optional: true

-  '@esbuild/linux-arm@0.25.5':
+  '@esbuild/linux-arm@0.25.6':
    optional: true

  '@esbuild/linux-ia32@0.17.3':
@ -7310,7 +7322,7 @@ snapshots:
  '@esbuild/linux-ia32@0.19.12':
    optional: true

-  '@esbuild/linux-ia32@0.25.5':
+  '@esbuild/linux-ia32@0.25.6':
    optional: true

  '@esbuild/linux-loong64@0.17.3':
@ -7322,7 +7334,7 @@ snapshots:
  '@esbuild/linux-loong64@0.19.12':
    optional: true

-  '@esbuild/linux-loong64@0.25.5':
+  '@esbuild/linux-loong64@0.25.6':
    optional: true

  '@esbuild/linux-mips64el@0.17.3':
@ -7334,7 +7346,7 @@ snapshots:
  '@esbuild/linux-mips64el@0.19.12':
    optional: true

-  '@esbuild/linux-mips64el@0.25.5':
+  '@esbuild/linux-mips64el@0.25.6':
    optional: true

  '@esbuild/linux-ppc64@0.17.3':
@ -7346,7 +7358,7 @@ snapshots:
  '@esbuild/linux-ppc64@0.19.12':
    optional: true

-  '@esbuild/linux-ppc64@0.25.5':
+  '@esbuild/linux-ppc64@0.25.6':
    optional: true

  '@esbuild/linux-riscv64@0.17.3':
@ -7358,7 +7370,7 @@ snapshots:
  '@esbuild/linux-riscv64@0.19.12':
    optional: true

-  '@esbuild/linux-riscv64@0.25.5':
+  '@esbuild/linux-riscv64@0.25.6':
    optional: true

  '@esbuild/linux-s390x@0.17.3':
@ -7370,7 +7382,7 @@ snapshots:
  '@esbuild/linux-s390x@0.19.12':
    optional: true

-  '@esbuild/linux-s390x@0.25.5':
+  '@esbuild/linux-s390x@0.25.6':
    optional: true

  '@esbuild/linux-x64@0.17.3':
@ -7382,10 +7394,10 @@ snapshots:
  '@esbuild/linux-x64@0.19.12':
    optional: true

-  '@esbuild/linux-x64@0.25.5':
+  '@esbuild/linux-x64@0.25.6':
    optional: true

-  '@esbuild/netbsd-arm64@0.25.5':
+  '@esbuild/netbsd-arm64@0.25.6':
    optional: true

  '@esbuild/netbsd-x64@0.17.3':
@ -7397,10 +7409,10 @@ snapshots:
  '@esbuild/netbsd-x64@0.19.12':
    optional: true

-  '@esbuild/netbsd-x64@0.25.5':
+  '@esbuild/netbsd-x64@0.25.6':
    optional: true

-  '@esbuild/openbsd-arm64@0.25.5':
+  '@esbuild/openbsd-arm64@0.25.6':
    optional: true

  '@esbuild/openbsd-x64@0.17.3':
@ -7412,7 +7424,10 @@ snapshots:
  '@esbuild/openbsd-x64@0.19.12':
    optional: true

-  '@esbuild/openbsd-x64@0.25.5':
+  '@esbuild/openbsd-x64@0.25.6':
+    optional: true
+
+  '@esbuild/openharmony-arm64@0.25.6':
    optional: true

  '@esbuild/sunos-x64@0.17.3':
@ -7424,7 +7439,7 @@ snapshots:
  '@esbuild/sunos-x64@0.19.12':
    optional: true

-  '@esbuild/sunos-x64@0.25.5':
+  '@esbuild/sunos-x64@0.25.6':
    optional: true

  '@esbuild/win32-arm64@0.17.3':
@ -7436,7 +7451,7 @@ snapshots:
  '@esbuild/win32-arm64@0.19.12':
    optional: true

-  '@esbuild/win32-arm64@0.25.5':
+  '@esbuild/win32-arm64@0.25.6':
    optional: true

  '@esbuild/win32-ia32@0.17.3':
@ -7448,7 +7463,7 @@ snapshots:
  '@esbuild/win32-ia32@0.19.12':
    optional: true

-  '@esbuild/win32-ia32@0.25.5':
+  '@esbuild/win32-ia32@0.25.6':
    optional: true

  '@esbuild/win32-x64@0.17.3':
@ -7460,7 +7475,7 @@ snapshots:
  '@esbuild/win32-x64@0.19.12':
    optional: true

-  '@esbuild/win32-x64@0.25.5':
+  '@esbuild/win32-x64@0.25.6':
    optional: true

  '@eslint-community/eslint-utils@4.7.0(eslint@8.57.1)':
@ -10339,7 +10354,7 @@ snapshots:

  esbuild-plugin-inline-worker@0.1.1:
    dependencies:
-      esbuild: 0.25.5
+      esbuild: 0.25.6
      find-cache-dir: 3.3.2

  esbuild-register@3.6.0(esbuild@0.19.12):
@ -10425,33 +10440,34 @@ snapshots:
      '@esbuild/win32-ia32': 0.19.12
      '@esbuild/win32-x64': 0.19.12

-  esbuild@0.25.5:
+  esbuild@0.25.6:
    optionalDependencies:
-      '@esbuild/aix-ppc64': 0.25.5
-      '@esbuild/android-arm': 0.25.5
-      '@esbuild/android-arm64': 0.25.5
-      '@esbuild/android-x64': 0.25.5
-      '@esbuild/darwin-arm64': 0.25.5
-      '@esbuild/darwin-x64': 0.25.5
-      '@esbuild/freebsd-arm64': 0.25.5
-      '@esbuild/freebsd-x64': 0.25.5
-      '@esbuild/linux-arm': 0.25.5
-      '@esbuild/linux-arm64': 0.25.5
-      '@esbuild/linux-ia32': 0.25.5
-      '@esbuild/linux-loong64': 0.25.5
-      '@esbuild/linux-mips64el': 0.25.5
-      '@esbuild/linux-ppc64': 0.25.5
-      '@esbuild/linux-riscv64': 0.25.5
-      '@esbuild/linux-s390x': 0.25.5
-      '@esbuild/linux-x64': 0.25.5
-      '@esbuild/netbsd-arm64': 0.25.5
-      '@esbuild/netbsd-x64': 0.25.5
-      '@esbuild/openbsd-arm64': 0.25.5
-      '@esbuild/openbsd-x64': 0.25.5
-      '@esbuild/sunos-x64': 0.25.5
-      '@esbuild/win32-arm64': 0.25.5
-      '@esbuild/win32-ia32': 0.25.5
-      '@esbuild/win32-x64': 0.25.5
+      '@esbuild/aix-ppc64': 0.25.6
+      '@esbuild/android-arm': 0.25.6
+      '@esbuild/android-arm64': 0.25.6
+      '@esbuild/android-x64': 0.25.6
+      '@esbuild/darwin-arm64': 0.25.6
+      '@esbuild/darwin-x64': 0.25.6
+      '@esbuild/freebsd-arm64': 0.25.6
+      '@esbuild/freebsd-x64': 0.25.6
+      '@esbuild/linux-arm': 0.25.6
+      '@esbuild/linux-arm64': 0.25.6
+      '@esbuild/linux-ia32': 0.25.6
+      '@esbuild/linux-loong64': 0.25.6
+      '@esbuild/linux-mips64el': 0.25.6
+      '@esbuild/linux-ppc64': 0.25.6
+      '@esbuild/linux-riscv64': 0.25.6
+      '@esbuild/linux-s390x': 0.25.6
+      '@esbuild/linux-x64': 0.25.6
+      '@esbuild/netbsd-arm64': 0.25.6
+      '@esbuild/netbsd-x64': 0.25.6
+      '@esbuild/openbsd-arm64': 0.25.6
+      '@esbuild/openbsd-x64': 0.25.6
+      '@esbuild/openharmony-arm64': 0.25.6
+      '@esbuild/sunos-x64': 0.25.6
+      '@esbuild/win32-arm64': 0.25.6
+      '@esbuild/win32-ia32': 0.25.6
+      '@esbuild/win32-x64': 0.25.6

  escalade@3.2.0: {}

@ -11789,6 +11805,8 @@ snapshots:
      - supports-color
      - ts-node

+  jieba-wasm@2.2.0: {}
+
  js-base64@3.7.7: {}

  js-tiktoken@1.0.20:
@ -12603,7 +12621,7 @@ snapshots:

  obsidian-dataview@0.5.68:
    dependencies:
-      '@codemirror/language': https://codeload.github.com/lishid/cm-language/tar.gz/6c1c5f5b677f6f6503d1ca2ec47f62f6406cda67
+      '@codemirror/language': https://codeload.github.com/lishid/cm-language/tar.gz/a9c3c7efe17dd1d24395ee2a179fe12dd6ed1e76
      '@codemirror/state': 6.5.2
      '@codemirror/view': 6.38.0
      emoji-regex: 10.4.0
--- a/src/core/rag/rag-engine.ts
+++ b/src/core/rag/rag-engine.ts
@ -163,7 +163,7 @@ export class RAGEngine {
 		)
 	}

-	async processQuery({
+	async processSimilarityQuery({
 		query,
 		scope,
 		limit,
@ -211,6 +211,221 @@ export class RAGEngine {
 		return queryResult
 	}

+	async processQuery({
+		query,
+		scope,
+		limit,
+		language,
+		onQueryProgressChange,
+	}: {
+		query: string
+		scope?: {
+			files: string[]
+			folders: string[]
+		}
+		limit?: number
+		language?: string
+		onQueryProgressChange?: (queryProgress: QueryProgressState) => void
+	}): Promise<
+		(Omit<SelectVector, 'embedding'> & {
+			similarity: number
+		})[]
+	> {
+		if (!this.embeddingModel) {
+			throw new Error('Embedding model is not set')
+		}
+
+		await this.initializeDimension()
+
+		onQueryProgressChange?.({
+			type: 'querying',
+		})
+
+		// 并行执行相似度搜索和全文搜索
+		const [similarityResults, fulltextResults] = await Promise.all([
+			this.processSimilarityQuery({
+				query,
+				scope,
+				limit,
+				onQueryProgressChange: undefined, // 避免重复触发进度回调
+			}),
+			this.processFulltextQuery({
+				query,
+				scope,
+				limit,
+				language,
+				onQueryProgressChange: undefined, // 避免重复触发进度回调
+			}),
+		])
+
+		// 优化：如果其中一个搜索结果为空，直接返回另一个结果
+		let finalResults: (Omit<SelectVector, 'embedding'> & { similarity: number })[]
+
+		if (fulltextResults.length === 0) {
+			// 全文搜索结果为空，直接返回相似度搜索结果
+			finalResults = similarityResults
+		} else if (similarityResults.length === 0) {
+			// 相似度搜索结果为空，直接返回全文搜索结果（转换格式）
+			finalResults = fulltextResults.map(result => ({
+				...result,
+				similarity: 1 - (result.rank - 1) / fulltextResults.length, // 将rank转换为相似度分数
+			}))
+		} else {
+			// 两个搜索都有结果，使用 RRF 算法合并
+			const rrf_k = 60 // RRF 常数
+			const mergedResults = this.mergeWithRRF(similarityResults, fulltextResults, rrf_k)
+
+			// 转换为与现有接口兼容的格式
+			finalResults = mergedResults.map(result => ({
+				...result,
+				similarity: result.rrfScore, // 使用 RRF 分数作为相似度
+			}))
+		}
+
+		onQueryProgressChange?.({
+			type: 'querying-done',
+			queryResult: finalResults,
+		})
+
+		return finalResults
+	}
+
+	/**
+	 * 使用倒数排名融合（RRF）算法合并相似度搜索和全文搜索结果
+	 * @param similarityResults 相似度搜索结果
+	 * @param fulltextResults 全文搜索结果
+	 * @param k RRF 常数，通常为 60
+	 * @returns 合并后的结果，按 RRF 分数排序
+	 */
+	private mergeWithRRF(
+		similarityResults: (Omit<SelectVector, 'embedding'> & { similarity: number })[],
+		fulltextResults: (Omit<SelectVector, 'embedding'> & { rank: number })[],
+		k: number = 60
+	): (Omit<SelectVector, 'embedding'> & { rrfScore: number })[] {
+		// 创建一个 Map 来存储每个文档的 RRF 分数
+		const rrfScores = new Map<string, {
+			doc: Omit<SelectVector, 'embedding'>,
+			score: number
+		}>()
+
+		// 处理相似度搜索结果
+		similarityResults.forEach((result, index) => {
+			const key = `${result.path}-${result.id}`
+			const rank = index + 1
+			const rrfScore = 1 / (k + rank)
+			
+			if (rrfScores.has(key)) {
+				const existing = rrfScores.get(key)
+				if (existing) {
+					existing.score += rrfScore
+				}
+			} else {
+				rrfScores.set(key, {
+					doc: {
+						id: result.id,
+						path: result.path,
+						mtime: result.mtime,
+						content: result.content,
+						metadata: result.metadata,
+					},
+					score: rrfScore
+				})
+			}
+		})
+
+		// 处理全文搜索结果
+		fulltextResults.forEach((result, index) => {
+			const key = `${result.path}-${result.id}`
+			const rank = index + 1
+			const rrfScore = 1 / (k + rank)
+			
+			if (rrfScores.has(key)) {
+				const existing = rrfScores.get(key)
+				if (existing) {
+					existing.score += rrfScore
+				}
+			} else {
+				rrfScores.set(key, {
+					doc: {
+						id: result.id,
+						path: result.path,
+						mtime: result.mtime,
+						content: result.content,
+						metadata: result.metadata,
+					},
+					score: rrfScore
+				})
+			}
+		})
+
+		// 转换为数组并进行归一化处理
+		const results = Array.from(rrfScores.values())
+		
+		// 找到最大分数用于归一化
+		const maxScore = Math.max(...results.map(r => r.score))
+		
+		// 归一化到 0~1 范围并按分数排序
+		const mergedResults = results
+			.map(({ doc, score }) => ({
+				...doc,
+				rrfScore: maxScore > 0 ? score / maxScore : 0 // 归一化到 0~1
+			}))
+			.sort((a, b) => b.rrfScore - a.rrfScore)
+
+		return mergedResults
+	}
+
+	async processFulltextQuery({
+		query,
+		scope,
+		limit,
+		language,
+		onQueryProgressChange,
+	}: {
+		query: string
+		scope?: {
+			files: string[]
+			folders: string[]
+		}
+		limit?: number
+		language?: string
+		onQueryProgressChange?: (queryProgress: QueryProgressState) => void
+	}): Promise<
+		(Omit<SelectVector, 'embedding'> & {
+			rank: number
+		})[]
+	> {
+		if (!this.embeddingModel) {
+			throw new Error('Embedding model is not set')
+		}
+
+		await this.initializeDimension()
+
+		onQueryProgressChange?.({
+			type: 'querying',
+		})
+
+		const queryResult = await this.vectorManager.performFulltextSearch(
+			query,
+			this.embeddingModel,
+			{
+				limit: limit ?? this.settings.ragOptions.limit,
+				scope,
+				language: language || 'english',
+			},
+		)
+
+		onQueryProgressChange?.({
+			type: 'querying-done',
+			queryResult: queryResult.map(result => ({
+				...result,
+				similarity: result.rank, // 为了兼容 QueryProgressState 类型
+			})),
+		})
+
+		return queryResult
+	}
+
 	async getEmbedding(query: string): Promise<number[]> {
 		if (!this.embeddingModel) {
 			throw new Error('Embedding model is not set')
--- a/src/database/modules/vector/vector-manager.ts
+++ b/src/database/modules/vector/vector-manager.ts
@ -33,6 +33,71 @@ export class VectorManager {
 		this.repository = new VectorRepository(app, dbManager.getPgClient() as any)
 	}

+	// 添加合并小chunks的辅助方法（仅在同一文件内合并）
+	private mergeSmallChunks(chunks: { pageContent: string; metadata: any }[], minChunkSize: number): typeof chunks {
+		if (!chunks || chunks.length === 0) {
+			return []
+		}
+
+		const mergedChunks: typeof chunks = []
+		let currentChunkBuffer = ""
+		let currentMetadata: any = null
+
+		for (const chunk of chunks) {
+			const content = chunk.pageContent.trim()
+			if (content.length === 0) continue
+
+			// 将当前块加入缓冲区
+			const combined = currentChunkBuffer ? `${currentChunkBuffer} ${content}` : content
+			
+			// 更新metadata，记录起始和结束位置
+			const combinedMetadata = currentMetadata ? {
+				...currentMetadata,
+				endLine: chunk.metadata?.loc?.lines?.to || chunk.metadata?.endLine || currentMetadata.endLine
+			} : {
+				...chunk.metadata,
+				startLine: chunk.metadata?.loc?.lines?.from || chunk.metadata?.startLine,
+				endLine: chunk.metadata?.loc?.lines?.to || chunk.metadata?.endLine
+			}
+
+			if (combined.length < minChunkSize) {
+				// 如果组合后仍然太小，则更新缓冲区并继续循环
+				currentChunkBuffer = combined
+				currentMetadata = combinedMetadata
+			} else {
+				// 如果组合后达到或超过最小尺寸，将其推入最终数组，并清空缓冲区
+				mergedChunks.push({
+					pageContent: combined,
+					metadata: combinedMetadata
+				})
+				currentChunkBuffer = ""
+				currentMetadata = null
+			}
+		}
+
+		// 处理循环结束后缓冲区里可能剩下的最后一个小块
+		if (currentChunkBuffer) {
+			if (mergedChunks.length > 0) {
+				// 策略1：如果缓冲区有内容，将其合并到最后一个块中
+				const lastChunk = mergedChunks[mergedChunks.length - 1]
+				lastChunk.pageContent += ` ${currentChunkBuffer}`
+				lastChunk.metadata.endLine = currentMetadata?.endLine || lastChunk.metadata.endLine
+			} else {
+				// 策略2：或者如果就没有足够大的块，把它自己作为一个块
+				mergedChunks.push({
+					pageContent: currentChunkBuffer,
+					metadata: currentMetadata
+				})
+			}
+		}
+		console.log("mergedChunks: ", mergedChunks)
+		return mergedChunks
+	}
+
+	private segmentTextForTsvector(text: string): string {
+		return this.repository.segmentTextForTsvector(text)
+	}
+
 	async performSimilaritySearch(
 		queryVector: number[],
 		embeddingModel: EmbeddingModel,
@ -56,6 +121,29 @@ export class VectorManager {
 		)
 	}

+	async performFulltextSearch(
+		searchQuery: string,
+		embeddingModel: EmbeddingModel,
+		options: {
+			limit: number
+			scope?: {
+				files: string[]
+				folders: string[]
+			}
+			language?: string
+		},
+	): Promise<
+		(Omit<SelectVector, 'embedding'> & {
+			rank: number
+		})[]
+	> {
+		return await this.repository.performFulltextSearch(
+			searchQuery,
+			embeddingModel,
+			options,
+		)
+	}
+
 	async getWorkspaceStatistics(
 		embeddingModel: EmbeddingModel,
 		workspace?: Workspace
@ -197,7 +285,10 @@ export class VectorManager {
 				"",
 			],
 		});
-		console.log("textSplitter chunkSize: ", options.chunkSize, "overlap: ", overlap)
+		
+		// 设置最小chunk大小，防止产生太小的chunks
+		const minChunkSize = Math.max(100, Math.floor(options.chunkSize * 0.3)); // 最小50字符或chunk_size的50%
+		console.log("textSplitter chunkSize: ", options.chunkSize, "overlap: ", overlap, "minChunkSize: ", minChunkSize)

 		const skippedFiles: string[] = []
 		const embeddingProgress = { completed: 0, totalChunks: 0 }
@ -205,7 +296,7 @@ export class VectorManager {
 		// 分批处理文件，每批最多50个文件（减少以避免文件句柄耗尽）
 		const FILE_BATCH_SIZE = 50
 		// 减少批量大小以降低内存压力
-		const embeddingBatchSize = Math.min(options.batchSize, 10)
+		const embeddingBatchSize = options.batchSize
 		
 		// 首先统计总的分块数量用于进度显示
 		let totalChunks = 0
@ -216,7 +307,13 @@ export class VectorManager {
 					let fileContent = await this.app.vault.cachedRead(file)
 					fileContent = fileContent.replace(/\0/g, '')
 					const fileDocuments = await textSplitter.createDocuments([fileContent])
-					totalChunks += fileDocuments.length
+					// 统计阶段也需要使用相同的清理和合并逻辑
+					const cleanedChunks = fileDocuments.map(chunk => ({
+						pageContent: removeMarkdown(chunk.pageContent).replace(/\0/g, '').trim(),
+						metadata: chunk.metadata
+					})).filter(chunk => chunk.pageContent.length > 0)
+					const filteredDocuments = this.mergeSmallChunks(cleanedChunks, minChunkSize)
+					totalChunks += filteredDocuments.length
 				} catch (error) {
 					// 统计阶段跳过错误文件
 				}
@ -246,21 +343,30 @@ export class VectorManager {
 								const fileDocuments = await textSplitter.createDocuments([
 									fileContent,
 								])
-								return fileDocuments
+								
+								// 先清理每个chunk的内容，然后基于清理后的内容进行合并
+								const cleanedChunks = fileDocuments.map(chunk => ({
+									pageContent: removeMarkdown(chunk.pageContent).replace(/\0/g, '').trim(),
+									metadata: chunk.metadata
+								})).filter(chunk => chunk.pageContent.length > 0)
+								
+								const filteredDocuments = this.mergeSmallChunks(cleanedChunks, minChunkSize)
+								return filteredDocuments
 									.map((chunk): InsertVector | null => {
-										// 保存原始内容，不在此处调用 removeMarkdown
-										const rawContent = chunk.pageContent.replace(/\0/g, '')
-										if (!rawContent || rawContent.trim().length === 0) {
+										const cleanContent = chunk.pageContent
+										if (!cleanContent || cleanContent.trim().length === 0) {
 											return null
 										}
+										// Use Intl.Segmenter to add spaces for better TSVECTOR indexing
+										const segmentedContent = this.segmentTextForTsvector(cleanContent)
 										return {
 											path: file.path,
 											mtime: file.stat.mtime,
-											content: rawContent, // 保存原始内容
+											content: segmentedContent, // 使用分词后的内容
 											embedding: [],
 											metadata: {
-												startLine: Number(chunk.metadata.loc.lines.from),
-												endLine: Number(chunk.metadata.loc.lines.to),
+												startLine: Number(chunk.metadata.loc?.lines?.from || chunk.metadata.startLine),
+												endLine: Number(chunk.metadata.loc?.lines?.to || chunk.metadata.endLine),
 											},
 										}
 									})
@ -280,7 +386,6 @@ export class VectorManager {
 				
 				// 第二步：嵌入处理
 				console.log(`Embedding ${batchChunks.length} chunks for current file batch`)
-				
 				if (embeddingModel.supportsBatch) {
 					// 支持批量处理的提供商
 					for (let j = 0; j < batchChunks.length; j += embeddingBatchSize) {
@ -289,26 +394,25 @@ export class VectorManager {

 						await backOff(
 							async () => {
-								// 在嵌入之前处理 markdown
-								const cleanedBatchData = embeddingBatch.map(chunk => {
-									const cleanContent = removeMarkdown(chunk.content)
-									return { chunk, cleanContent }
-								}).filter(({ cleanContent }) => cleanContent && cleanContent.trim().length > 0)
+								// 内容已经在前面清理和合并过了，直接使用
+								const validBatchData = embeddingBatch.filter(chunk => 
+									chunk.content && chunk.content.trim().length > 0
+								)

-								if (cleanedBatchData.length === 0) {
+								if (validBatchData.length === 0) {
 									return
 								}

-								const batchTexts = cleanedBatchData.map(({ cleanContent }) => cleanContent)
+								const batchTexts = validBatchData.map(chunk => chunk.content)
 								const batchEmbeddings = await embeddingModel.getBatchEmbeddings(batchTexts)

 								// 合并embedding结果到chunk数据
-								for (let k = 0; k < cleanedBatchData.length; k++) {
-									const { chunk, cleanContent } = cleanedBatchData[k]
+								for (let k = 0; k < validBatchData.length; k++) {
+									const chunk = validBatchData[k]
 									const embeddedChunk: InsertVector = {
 										path: chunk.path,
 										mtime: chunk.mtime,
-										content: cleanContent, // 使用已经清理过的内容
+										content: chunk.content, // 使用已经清理和合并后的内容
 										embedding: batchEmbeddings[k],
 										metadata: chunk.metadata,
 									}
@ -349,18 +453,18 @@ export class VectorManager {
 								try {
 									await backOff(
 										async () => {
-											// 在嵌入之前处理 markdown
-											const cleanContent = removeMarkdown(chunk.content).replace(/\0/g, '')
-											// 跳过清理后为空的内容
-											if (!cleanContent || cleanContent.trim().length === 0) {
+											// 内容已经在前面清理和合并过了，直接使用
+											const content = chunk.content.trim()
+											// 跳过空内容
+											if (!content || content.length === 0) {
 												return
 											}

-											const embedding = await embeddingModel.getEmbedding(cleanContent)
+											const embedding = await embeddingModel.getEmbedding(content)
 											const embeddedChunk = {
 												path: chunk.path,
 												mtime: chunk.mtime,
-												content: cleanContent, // 使用清理后的内容
+												content: content, // 使用已经清理和合并后的内容
 												embedding,
 												metadata: chunk.metadata,
 											}
@ -495,7 +599,10 @@ export class VectorManager {
 				"",
 			],
 		});
-		console.log("textSplitter chunkSize: ", options.chunkSize, "overlap: ", overlap)
+		
+		// 设置最小chunk大小，防止产生太小的chunks
+		const minChunkSize = Math.max(100, Math.floor(options.chunkSize * 0.5)); // 最小50字符或chunk_size的10%
+		console.log("textSplitter chunkSize: ", options.chunkSize, "overlap: ", overlap, "minChunkSize: ", minChunkSize)

 		const skippedFiles: string[] = []
 		const embeddingProgress = { completed: 0, totalChunks: 0 }
@ -503,7 +610,7 @@ export class VectorManager {
 		// 分批处理文件，每批最多50个文件（减少以避免文件句柄耗尽）
 		const FILE_BATCH_SIZE = 50
 		// 减少批量大小以降低内存压力
-		const embeddingBatchSize = Math.min(options.batchSize, 10)
+		const embeddingBatchSize = options.batchSize
 		
 		// 首先统计总的分块数量用于进度显示
 		let totalChunks = 0
@ -514,7 +621,13 @@ export class VectorManager {
 					let fileContent = await this.app.vault.cachedRead(file)
 					fileContent = fileContent.replace(/\0/g, '')
 					const fileDocuments = await textSplitter.createDocuments([fileContent])
-					totalChunks += fileDocuments.length
+					// 统计阶段也需要使用相同的清理和合并逻辑
+					const cleanedChunks = fileDocuments.map(chunk => ({
+						pageContent: removeMarkdown(chunk.pageContent).replace(/\0/g, '').trim(),
+						metadata: chunk.metadata
+					})).filter(chunk => chunk.pageContent.length > 0)
+					const filteredDocuments = this.mergeSmallChunks(cleanedChunks, minChunkSize)
+					totalChunks += filteredDocuments.length
 				} catch (error) {
 					// 统计阶段跳过错误文件
 				}
@ -544,21 +657,30 @@ export class VectorManager {
 								const fileDocuments = await textSplitter.createDocuments([
 									fileContent,
 								])
-								return fileDocuments
+								
+								// 先清理每个chunk的内容，然后基于清理后的内容进行合并
+								const cleanedChunks = fileDocuments.map(chunk => ({
+									pageContent: removeMarkdown(chunk.pageContent).replace(/\0/g, '').trim(),
+									metadata: chunk.metadata
+								})).filter(chunk => chunk.pageContent.length > 0)
+								
+								const filteredDocuments = this.mergeSmallChunks(cleanedChunks, minChunkSize)
+								return filteredDocuments
 									.map((chunk): InsertVector | null => {
-										// 保存原始内容，不在此处调用 removeMarkdown
-										const rawContent = chunk.pageContent.replace(/\0/g, '')
-										if (!rawContent || rawContent.trim().length === 0) {
+										const cleanContent = chunk.pageContent
+										if (!cleanContent || cleanContent.trim().length === 0) {
 											return null
 										}
+										// Use Intl.Segmenter to add spaces for better TSVECTOR indexing
+										const segmentedContent = this.segmentTextForTsvector(cleanContent)
 										return {
 											path: file.path,
 											mtime: file.stat.mtime,
-											content: rawContent, // 保存原始内容
+											content: segmentedContent, // 使用分词后的内容
 											embedding: [],
 											metadata: {
-												startLine: Number(chunk.metadata.loc.lines.from),
-												endLine: Number(chunk.metadata.loc.lines.to),
+												startLine: Number(chunk.metadata.loc?.lines?.from || chunk.metadata.startLine),
+												endLine: Number(chunk.metadata.loc?.lines?.to || chunk.metadata.endLine),
 											},
 										}
 									})
@ -581,32 +703,35 @@ export class VectorManager {
 				
 				if (embeddingModel.supportsBatch) {
 					// 支持批量处理的提供商
+					console.log("batchChunks", batchChunks.map((chunk, index) => ({
+						index,
+						contentLength: chunk.content.length,
+					})))
 					for (let j = 0; j < batchChunks.length; j += embeddingBatchSize) {
 						const embeddingBatch = batchChunks.slice(j, Math.min(j + embeddingBatchSize, batchChunks.length))
 						const embeddedBatch: InsertVector[] = []

 						await backOff(
 							async () => {
-								// 在嵌入之前处理 markdown
-								const cleanedBatchData = embeddingBatch.map(chunk => {
-									const cleanContent = removeMarkdown(chunk.content)
-									return { chunk, cleanContent }
-								}).filter(({ cleanContent }) => cleanContent && cleanContent.trim().length > 0)
+								// 内容已经在前面清理和合并过了，直接使用
+								const validBatchData = embeddingBatch.filter(chunk => 
+									chunk.content && chunk.content.trim().length > 0
+								)

-								if (cleanedBatchData.length === 0) {
+								if (validBatchData.length === 0) {
 									return
 								}

-								const batchTexts = cleanedBatchData.map(({ cleanContent }) => cleanContent)
+								const batchTexts = validBatchData.map(chunk => chunk.content)
 								const batchEmbeddings = await embeddingModel.getBatchEmbeddings(batchTexts)

 								// 合并embedding结果到chunk数据
-								for (let k = 0; k < cleanedBatchData.length; k++) {
-									const { chunk, cleanContent } = cleanedBatchData[k]
+								for (let k = 0; k < validBatchData.length; k++) {
+									const chunk = validBatchData[k]
 									const embeddedChunk: InsertVector = {
 										path: chunk.path,
 										mtime: chunk.mtime,
-										content: cleanContent, // 使用已经清理过的内容
+										content: chunk.content, // 使用已经清理和合并后的内容
 										embedding: batchEmbeddings[k],
 										metadata: chunk.metadata,
 									}
@ -647,18 +772,18 @@ export class VectorManager {
 								try {
 									await backOff(
 										async () => {
-											// 在嵌入之前处理 markdown
-											const cleanContent = removeMarkdown(chunk.content).replace(/\0/g, '')
-											// 跳过清理后为空的内容
-											if (!cleanContent || cleanContent.trim().length === 0) {
+											// 内容已经在前面清理和合并过了，直接使用
+											const content = chunk.content.trim()
+											// 跳过空内容
+											if (!content || content.length === 0) {
 												return
 											}

-											const embedding = await embeddingModel.getEmbedding(cleanContent)
+											const embedding = await embeddingModel.getEmbedding(content)
 											const embeddedChunk = {
 												path: chunk.path,
 												mtime: chunk.mtime,
-												content: cleanContent, // 使用清理后的内容
+												content: content, // 使用已经清理和合并后的内容
 												embedding,
 												metadata: chunk.metadata,
 											}
@ -756,6 +881,10 @@ export class VectorManager {
 					"",
 				],
 			});
+			
+			// 设置最小chunk大小，防止产生太小的chunks
+			const minChunkSize = Math.max(50, Math.floor(chunkSize * 0.1)); // 最小50字符或chunk_size的10%
+			
 			let fileContent = await this.app.vault.cachedRead(file)
 			// 清理null字节，防止PostgreSQL UTF8编码错误
 			fileContent = fileContent.replace(/\0/g, '')
@ -763,21 +892,30 @@ export class VectorManager {
 				fileContent,
 			])
 			
-			const contentChunks: InsertVector[] = fileDocuments
+			// 先清理每个chunk的内容，然后基于清理后的内容进行合并
+			const cleanedChunks = fileDocuments.map(chunk => ({
+				pageContent: removeMarkdown(chunk.pageContent).replace(/\0/g, '').trim(),
+				metadata: chunk.metadata
+			})).filter(chunk => chunk.pageContent.length > 0)
+			
+			const filteredDocuments = this.mergeSmallChunks(cleanedChunks, minChunkSize)
+
+			const contentChunks: InsertVector[] = filteredDocuments
 				.map((chunk): InsertVector | null => {
-					// 保存原始内容，不在此处调用 removeMarkdown
-					const rawContent = String(chunk.pageContent || '').replace(/\0/g, '')
-					if (!rawContent || rawContent.trim().length === 0) {
+					const cleanContent = chunk.pageContent
+					if (!cleanContent || cleanContent.trim().length === 0) {
 						return null
 					}
+					// Use Intl.Segmenter to add spaces for better TSVECTOR indexing
+					const segmentedContent = this.segmentTextForTsvector(cleanContent)
 					return {
 						path: file.path,
 						mtime: file.stat.mtime,
-						content: rawContent, // 保存原始内容
+						content: segmentedContent, // 使用分词后的内容
 						embedding: [],
 						metadata: {
-							startLine: Number(chunk.metadata.loc.lines.from),
-							endLine: Number(chunk.metadata.loc.lines.to),
+							startLine: Number(chunk.metadata.loc?.lines?.from || chunk.metadata.startLine),
+							endLine: Number(chunk.metadata.loc?.lines?.to || chunk.metadata.endLine),
 						},
 					}
 				})
@ -795,34 +933,33 @@ export class VectorManager {

 						const embeddedBatch: InsertVector[] = []

-						await backOff(
-							async () => {
-								// 在嵌入之前处理 markdown，只处理一次
-								const cleanedBatchData = batchChunks.map(chunk => {
-									const cleanContent = removeMarkdown(chunk.content).replace(/\0/g, '')
-									return { chunk, cleanContent }
-								}).filter(({ cleanContent }) => cleanContent && cleanContent.trim().length > 0)
+													await backOff(
+								async () => {
+									// 内容已经在前面清理和合并过了，直接使用
+									const validBatchData = batchChunks.filter(chunk => 
+										chunk.content && chunk.content.trim().length > 0
+									)

-								if (cleanedBatchData.length === 0) {
-									return
-								}
-
-								const batchTexts = cleanedBatchData.map(({ cleanContent }) => cleanContent)
-								const batchEmbeddings = await embeddingModel.getBatchEmbeddings(batchTexts)
-
-								// 合并embedding结果到chunk数据
-								for (let j = 0; j < cleanedBatchData.length; j++) {
-									const { chunk, cleanContent } = cleanedBatchData[j]
-									const embeddedChunk: InsertVector = {
-										path: chunk.path,
-										mtime: chunk.mtime,
-										content: cleanContent, // 使用已经清理过的内容
-										embedding: batchEmbeddings[j],
-										metadata: chunk.metadata,
+									if (validBatchData.length === 0) {
+										return
 									}
-									embeddedBatch.push(embeddedChunk)
-								}
-							},
+
+									const batchTexts = validBatchData.map(chunk => chunk.content)
+									const batchEmbeddings = await embeddingModel.getBatchEmbeddings(batchTexts)
+
+									// 合并embedding结果到chunk数据
+									for (let j = 0; j < validBatchData.length; j++) {
+										const chunk = validBatchData[j]
+										const embeddedChunk: InsertVector = {
+											path: chunk.path,
+											mtime: chunk.mtime,
+											content: chunk.content, // 使用已经清理和合并后的内容
+											embedding: batchEmbeddings[j],
+											metadata: chunk.metadata,
+										}
+										embeddedBatch.push(embeddedChunk)
+									}
+								},
 							{
 								numOfAttempts: 3, // 减少重试次数
 								startingDelay: 500, // 减少延迟
@ -864,18 +1001,18 @@ export class VectorManager {
 								try {
 									await backOff(
 										async () => {
-											// 在嵌入之前处理 markdown
-											const cleanContent = removeMarkdown(chunk.content).replace(/\0/g, '')
-											// 跳过清理后为空的内容
-											if (!cleanContent || cleanContent.trim().length === 0) {
+											// 内容已经在前面清理和合并过了，直接使用
+											const content = chunk.content.trim()
+											// 跳过空内容
+											if (!content || content.length === 0) {
 												return
 											}

-											const embedding = await embeddingModel.getEmbedding(cleanContent)
+											const embedding = await embeddingModel.getEmbedding(content)
 											const embeddedChunk = {
 												path: chunk.path,
 												mtime: chunk.mtime,
-												content: cleanContent, // 使用清理后的内容
+												content: content, // 使用已经清理和合并后的内容
 												embedding,
 												metadata: chunk.metadata,
 											}
--- a/src/database/modules/vector/vector-repository.ts
+++ b/src/database/modules/vector/vector-repository.ts
@ -6,174 +6,208 @@ import { DatabaseNotInitializedException } from '../../exception'
 import { InsertVector, SelectVector, vectorTables } from '../../schema'

 export class VectorRepository {
-  private app: App
-  private db: PGliteInterface | null
+	private app: App
+	private db: PGliteInterface | null
+	private stopWords: Set<string>

-  constructor(app: App, pgClient: PGliteInterface | null) {
-    this.app = app
-    this.db = pgClient
-  }
+	constructor(app: App, pgClient: PGliteInterface | null) {
+		this.app = app
+		this.db = pgClient
+		this.stopWords = new Set([
+			// Chinese stop words
+			'的', '在', '是', '了', '我', '你', '他', '她', '它', '请问', '如何', '一个', '什么', '怎么',
+			'这', '那', '和', '与', '或', '但', '因为', '所以', '如果', '虽然', '可是', '不过',
+			'也', '都', '还', '就', '又', '很', '最', '更', '非常', '特别', '比较', '相当',
+			'对', '于', '把', '被', '让', '使', '给', '为', '从', '到', '向', '往', '朝',
+			'上', '下', '里', '外', '前', '后', '左', '右', '中', '间', '内', '以', '及',

-  private getTableName(embeddingModel: EmbeddingModel): string {
-    const tableDefinition = vectorTables[embeddingModel.dimension]
-    if (!tableDefinition) {
-      throw new Error(`No table definition found for model: ${embeddingModel.id}`)
-    }
-    return tableDefinition.name
-  }
+			// English stop words
+			'a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'for', 'from', 'has', 'he',
+			'in', 'is', 'it', 'its', 'of', 'on', 'that', 'the', 'to', 'was', 'were', 'will',
+			'with', 'would', 'could', 'should', 'can', 'may', 'might', 'must', 'shall',
+			'this', 'that', 'these', 'those', 'i', 'you', 'we', 'they', 'me', 'him', 'her',
+			'us', 'them', 'my', 'your', 'his', 'our', 'their', 'am', 'have', 'had', 'do',
+			'does', 'did', 'get', 'got', 'go', 'went', 'come', 'came', 'make', 'made',
+			'take', 'took', 'see', 'saw', 'know', 'knew', 'think', 'thought', 'say', 'said',
+			'tell', 'told', 'ask', 'asked', 'give', 'gave', 'find', 'found', 'work', 'worked',
+			'call', 'called', 'try', 'tried', 'need', 'needed', 'feel', 'felt', 'become',
+			'became', 'leave', 'left', 'put', 'keep', 'kept', 'let', 'begin', 'began',
+			'seem', 'seemed', 'help', 'helped', 'show', 'showed', 'hear', 'heard', 'play',
+			'played', 'run', 'ran', 'move', 'moved', 'live', 'lived', 'believe', 'believed',
+			'hold', 'held', 'bring', 'brought', 'happen', 'happened', 'write', 'wrote',
+			'sit', 'sat', 'stand', 'stood', 'lose', 'lost', 'pay', 'paid', 'meet', 'met',
+			'include', 'included', 'continue', 'continued', 'set', 'learn', 'learned',
+			'change', 'changed', 'lead', 'led', 'understand', 'understood', 'watch', 'watched',
+			'follow', 'followed', 'stop', 'stopped', 'create', 'created', 'speak', 'spoke',
+			'read', 'remember', 'remembered', 'consider', 'considered', 'appear', 'appeared',
+			'buy', 'bought', 'wait', 'waited', 'serve', 'served', 'die', 'died', 'send',
+			'sent', 'expect', 'expected', 'build', 'built', 'stay', 'stayed', 'fall', 'fell',
+			'cut', 'reach', 'reached', 'kill', 'killed', 'remain', 'remained', 'suggest',
+			'suggested', 'raise', 'raised', 'pass', 'passed', 'sell', 'sold', 'require',
+			'required', 'report', 'reported', 'decide', 'decided', 'pull', 'pulled'
+		])
+	}

-  async getAllIndexedFilePaths(embeddingModel: EmbeddingModel): Promise<string[]> {
-    if (!this.db) {
-      throw new DatabaseNotInitializedException()
-    }
-    const tableName = this.getTableName(embeddingModel)
-    const result = await this.db.query<{ path: string }>(
-      `SELECT DISTINCT path FROM "${tableName}"`
-    )
-    return result.rows.map((row: { path: string }) => row.path)
-  }
+	private getTableName(embeddingModel: EmbeddingModel): string {
+		const tableDefinition = vectorTables[embeddingModel.dimension]
+		if (!tableDefinition) {
+			throw new Error(`No table definition found for model: ${embeddingModel.id}`)
+		}
+		return tableDefinition.name
+	}

-  async getMaxMtime(embeddingModel: EmbeddingModel): Promise<number | null> {
-    if (!this.db) {
-      throw new DatabaseNotInitializedException()
-    }
-    const tableName = this.getTableName(embeddingModel)
-    const result = await this.db.query<{ max_mtime: number | null }>(
-      `SELECT MAX(mtime) as max_mtime FROM "${tableName}"`
-    )
-    return result.rows[0]?.max_mtime || null
-  }
-
-  async getVectorsByFilePath(
-    filePath: string,
-    embeddingModel: EmbeddingModel,
-  ): Promise<SelectVector[]> {
-    if (!this.db) {
-      throw new DatabaseNotInitializedException()
-    }
-    const tableName = this.getTableName(embeddingModel)
-    const result = await this.db.query<SelectVector>(
-      `SELECT * FROM "${tableName}" WHERE path = $1`,
-      [filePath]
-    )
-    return result.rows
-  }
-
-  async deleteVectorsForSingleFile(
-    filePath: string,
-    embeddingModel: EmbeddingModel,
-  ): Promise<void> {
-    if (!this.db) {
-      throw new DatabaseNotInitializedException()
-    }
-    const tableName = this.getTableName(embeddingModel)
-    await this.db.query(
-      `DELETE FROM "${tableName}" WHERE path = $1`,
-      [filePath]
-    )
-  }
-
-  async deleteVectorsForMultipleFiles(
-    filePaths: string[],
-    embeddingModel: EmbeddingModel,
-  ): Promise<void> {
-    if (!this.db) {
-      throw new DatabaseNotInitializedException()
-    }
-    const tableName = this.getTableName(embeddingModel)
-    await this.db.query(
-      `DELETE FROM "${tableName}" WHERE path = ANY($1)`,
-      [filePaths]
-    )
-  }
-
-  async clearAllVectors(embeddingModel: EmbeddingModel): Promise<void> {
-    if (!this.db) {
-      throw new DatabaseNotInitializedException()
-    }
+	async getAllIndexedFilePaths(embeddingModel: EmbeddingModel): Promise<string[]> {
+		if (!this.db) {
+			throw new DatabaseNotInitializedException()
+		}
 		const tableName = this.getTableName(embeddingModel)
-    await this.db.query(`DELETE FROM "${tableName}"`)
-  }
+		const result = await this.db.query<{ path: string }>(
+			`SELECT DISTINCT path FROM "${tableName}"`
+		)
+		return result.rows.map((row: { path: string }) => row.path)
+	}

-  async insertVectors(
-    data: InsertVector[],
-    embeddingModel: EmbeddingModel,
-  ): Promise<void> {
-    if (!this.db) {
-      throw new DatabaseNotInitializedException()
-    }
-    const tableName = this.getTableName(embeddingModel)
+	async getMaxMtime(embeddingModel: EmbeddingModel): Promise<number | null> {
+		if (!this.db) {
+			throw new DatabaseNotInitializedException()
+		}
+		const tableName = this.getTableName(embeddingModel)
+		const result = await this.db.query<{ max_mtime: number | null }>(
+			`SELECT MAX(mtime) as max_mtime FROM "${tableName}"`
+		)
+		return result.rows[0]?.max_mtime || null
+	}

-    // 构建批量插入的 SQL
-    const values = data.map((vector, index) => {
-      const offset = index * 5
-      return `($${offset + 1}, $${offset + 2}, $${offset + 3}, $${offset + 4}, $${offset + 5})`
-    }).join(',')
+	async getVectorsByFilePath(
+		filePath: string,
+		embeddingModel: EmbeddingModel,
+	): Promise<SelectVector[]> {
+		if (!this.db) {
+			throw new DatabaseNotInitializedException()
+		}
+		const tableName = this.getTableName(embeddingModel)
+		const result = await this.db.query<SelectVector>(
+			`SELECT * FROM "${tableName}" WHERE path = $1`,
+			[filePath]
+		)
+		return result.rows
+	}

-    const params = data.flatMap(vector => [
-      vector.path,
-      vector.mtime,
-      vector.content.replace(/\0/g, ''), // 清理null字节
-      `[${vector.embedding.join(',')}]`,  // 转换为PostgreSQL vector格式
-      vector.metadata
-    ])
+	async deleteVectorsForSingleFile(
+		filePath: string,
+		embeddingModel: EmbeddingModel,
+	): Promise<void> {
+		if (!this.db) {
+			throw new DatabaseNotInitializedException()
+		}
+		const tableName = this.getTableName(embeddingModel)
+		await this.db.query(
+			`DELETE FROM "${tableName}" WHERE path = $1`,
+			[filePath]
+		)
+	}

-    await this.db.query(
-      `INSERT INTO "${tableName}" (path, mtime, content, embedding, metadata)
+	async deleteVectorsForMultipleFiles(
+		filePaths: string[],
+		embeddingModel: EmbeddingModel,
+	): Promise<void> {
+		if (!this.db) {
+			throw new DatabaseNotInitializedException()
+		}
+		const tableName = this.getTableName(embeddingModel)
+		await this.db.query(
+			`DELETE FROM "${tableName}" WHERE path = ANY($1)`,
+			[filePaths]
+		)
+	}
+
+	async clearAllVectors(embeddingModel: EmbeddingModel): Promise<void> {
+		if (!this.db) {
+			throw new DatabaseNotInitializedException()
+		}
+		const tableName = this.getTableName(embeddingModel)
+		await this.db.query(`DELETE FROM "${tableName}"`)
+	}
+
+	async insertVectors(
+		data: InsertVector[],
+		embeddingModel: EmbeddingModel,
+	): Promise<void> {
+		if (!this.db) {
+			throw new DatabaseNotInitializedException()
+		}
+		const tableName = this.getTableName(embeddingModel)
+
+		// 构建批量插入的 SQL
+		const values = data.map((vector, index) => {
+			const offset = index * 5
+			return `($${offset + 1}, $${offset + 2}, $${offset + 3}, $${offset + 4}, $${offset + 5})`
+		}).join(',')
+
+		const params = data.flatMap(vector => [
+			vector.path,
+			vector.mtime,
+			vector.content.replace(/\0/g, ''), // 清理null字节
+			`[${vector.embedding.join(',')}]`,  // 转换为PostgreSQL vector格式
+			vector.metadata
+		])
+
+		await this.db.query(
+			`INSERT INTO "${tableName}" (path, mtime, content, embedding, metadata)
       VALUES ${values}`,
-      params
-    )
-  }
+			params
+		)
+	}

-  async performSimilaritySearch(
-    queryVector: number[],
-    embeddingModel: EmbeddingModel,
-    options: {
-      minSimilarity: number
-      limit: number
-      scope?: {
-        files: string[]
-        folders: string[]
-      }
-    },
-  ): Promise<
-    (Omit<SelectVector, 'embedding'> & {
-      similarity: number
-    })[]
-  > {
-    if (!this.db) {
-      throw new DatabaseNotInitializedException()
-    }
-    const tableName = this.getTableName(embeddingModel)
+	async performSimilaritySearch(
+		queryVector: number[],
+		embeddingModel: EmbeddingModel,
+		options: {
+			minSimilarity: number
+			limit: number
+			scope?: {
+				files: string[]
+				folders: string[]
+			}
+		},
+	): Promise<
+		(Omit<SelectVector, 'embedding'> & {
+			similarity: number
+		})[]
+	> {
+		if (!this.db) {
+			throw new DatabaseNotInitializedException()
+		}
+		const tableName = this.getTableName(embeddingModel)

-    let scopeCondition = ''
-    const params: unknown[] = [`[${queryVector.join(',')}]`, options.minSimilarity, options.limit]
-    let paramIndex = 4
+		let scopeCondition = ''
+		const params: unknown[] = [`[${queryVector.join(',')}]`, options.minSimilarity, options.limit]
+		let paramIndex = 4

-    if (options.scope) {
-      const conditions: string[] = []
+		if (options.scope) {
+			const conditions: string[] = []

-      if (options.scope.files.length > 0) {
-        conditions.push(`path = ANY($${paramIndex})`)
-        params.push(options.scope.files)
-        paramIndex++
-      }
+			if (options.scope.files.length > 0) {
+				conditions.push(`path = ANY($${paramIndex})`)
+				params.push(options.scope.files)
+				paramIndex++
+			}

-      if (options.scope.folders.length > 0) {
-        const folderConditions = options.scope.folders.map((folder, idx) => {
-          params.push(`${folder}/%`)
-          return `path LIKE $${paramIndex + idx}`
-        })
-        conditions.push(`(${folderConditions.join(' OR ')})`)
-        paramIndex += options.scope.folders.length
-      }
+			if (options.scope.folders.length > 0) {
+				const folderConditions = options.scope.folders.map((folder, idx) => {
+					params.push(`${folder}/%`)
+					return `path LIKE $${paramIndex + idx}`
+				})
+				conditions.push(`(${folderConditions.join(' OR ')})`)
+				paramIndex += options.scope.folders.length
+			}

-      if (conditions.length > 0) {
-        scopeCondition = `AND (${conditions.join(' OR ')})`
-      }
+			if (conditions.length > 0) {
+				scopeCondition = `AND (${conditions.join(' OR ')})`
+			}
 		}

-    const query = `
+		const query = `
      SELECT 
        id, path, mtime, content, metadata,
        1 - (embedding <=> $1::vector) as similarity
@ -184,54 +218,215 @@ export class VectorRepository {
      LIMIT $3
    `

-    type SearchResult = Omit<SelectVector, 'embedding'> & { similarity: number }
-    const result = await this.db.query<SearchResult>(query, params)
-    return result.rows
+		type SearchResult = Omit<SelectVector, 'embedding'> & { similarity: number }
+		const result = await this.db.query<SearchResult>(query, params)
+		console.log("performSimilaritySearch result", result.rows)
+		return result.rows
+	}
+
+	async performFulltextSearch(
+		searchQuery: string,
+		embeddingModel: EmbeddingModel,
+		options: {
+			limit: number
+			scope?: {
+				files: string[]
+				folders: string[]
+			}
+			language?: string
+		},
+	): Promise<
+		(Omit<SelectVector, 'embedding'> & {
+			rank: number
+		})[]
+	> {
+		if (!this.db) {
+			throw new DatabaseNotInitializedException()
+		}
+
+		// handle query processing with segmentation and stop words filtering
+		const processedQuery = this.createFtsQuery(searchQuery, options.language || 'english')
+
+		const tableName = this.getTableName(embeddingModel)
+		const language = options.language || 'english'
+
+		let scopeCondition = ''
+		const params: unknown[] = [processedQuery, options.limit]
+		let paramIndex = 3
+
+		if (options.scope) {
+			const conditions: string[] = []
+
+			if (options.scope.files.length > 0) {
+				conditions.push(`path = ANY($${paramIndex})`)
+				params.push(options.scope.files)
+				paramIndex++
+			}
+
+			if (options.scope.folders.length > 0) {
+				const folderConditions = options.scope.folders.map((folder, idx) => {
+					params.push(`${folder}/%`)
+					return `path LIKE $${paramIndex + idx}`
+				})
+				conditions.push(`(${folderConditions.join(' OR ')})`)
+				paramIndex += options.scope.folders.length
+			}
+
+			if (conditions.length > 0) {
+				scopeCondition = `AND (${conditions.join(' OR ')})`
+			}
+		}
+
+		const query = `
+      SELECT 
+        id, path, mtime, content, metadata,
+        ts_rank_cd(
+          COALESCE(content_tsv, to_tsvector('${language}', coalesce(content, ''))), 
+          to_tsquery('${language}', $1)
+        ) AS rank
+      FROM "${tableName}"
+      WHERE (
+        content_tsv @@ to_tsquery('${language}', $1) 
+        OR (content_tsv IS NULL AND to_tsvector('${language}', coalesce(content, '')) @@ to_tsquery('${language}', $1))
+      )
+      ${scopeCondition}
+      ORDER BY rank DESC
+      LIMIT $2
+    `
+		console.log("performFulltextSearch query", query)
+		type SearchResult = Omit<SelectVector, 'embedding'> & { rank: number }
+		const result = await this.db.query<SearchResult>(query, params)
+		console.log("performFulltextSearch result", result.rows)
+		return result.rows
+	}
+
+	  public segmentTextForTsvector(text: string, language: string = 'zh-CN'): string {
+    try {
+      // Use Intl.Segmenter to add spaces between words for better TSVECTOR indexing
+      if (typeof Intl !== 'undefined' && Intl.Segmenter) {
+        const segmenter = new Intl.Segmenter(language, { granularity: 'word' })
+        const segments = segmenter.segment(text)
+        
+        const segmentedText = Array.from(segments)
+          .map(segment => segment.segment)
+          .join(' ')
+        
+        return segmentedText
+      }
+      
+      // Fallback: add spaces around Chinese characters and punctuation
+      return text.replace(/([一-龯])/g, ' $1 ')
+                .replace(/\s+/g, ' ')
+                .trim()
+    } catch (error) {
+      console.warn('Failed to segment text for TSVECTOR:', error)
+      return text
+    }
  }

-  async getWorkspaceStatistics(
-    embeddingModel: EmbeddingModel,
-    scope?: {
-      files: string[]
-      folders: string[]
-    }
-  ): Promise<{
-    totalFiles: number
-    totalChunks: number
-  }> {
-    if (!this.db) {
-      throw new DatabaseNotInitializedException()
-    }
-    const tableName = this.getTableName(embeddingModel)
+  private createFtsQuery(query: string, language: string): string {
+		try {

-    let scopeCondition = ''
-    const params: unknown[] = []
-    let paramIndex = 1
+			let keywords: string[] = []

-    if (scope) {
-      const conditions: string[] = []
+			// Try to use Intl.Segmenter for word segmentation
+			if (typeof Intl !== 'undefined' && Intl.Segmenter) {
+				try {
+					const segmenter = new Intl.Segmenter(language, { granularity: 'word' })
+					const segments = segmenter.segment(query)

-      if (scope.files.length > 0) {
-        conditions.push(`path = ANY($${paramIndex})`)
-        params.push(scope.files)
-        paramIndex++
-      }
+					keywords = Array.from(segments)
+						.filter(s => s.isWordLike)
+						.map(s => s.segment.trim())
+						.filter(word => {
+							// Filter out empty strings and stop words
+							if (!word || word.length === 0) return false
+							return !this.stopWords.has(word.toLowerCase())
+						})
+						.filter(word => {
+							// Keep all words with length > 0 since stop words are already filtered
+							return word.length > 0
+						})
+				} catch (segmentError) {
+					console.warn('Intl.Segmenter failed, falling back to simple splitting:', segmentError)
+				}
+			}

-      if (scope.folders.length > 0) {
-        const folderConditions = scope.folders.map((folder, idx) => {
-          params.push(`${folder}/%`)
-          return `path LIKE $${paramIndex + idx}`
-        })
-        conditions.push(`(${folderConditions.join(' OR ')})`)
-        paramIndex += scope.folders.length
-      }
+			// Fallback to simple word splitting if Intl.Segmenter is not available or failed
+			if (keywords.length === 0) {
+				keywords = query
+					.split(/[\s\p{P}\p{S}]+/u) // Split by whitespace, punctuation, and symbols
+					.map(word => word.trim())
+					.filter(word => {
+						if (!word || word.length === 0) return false
+						return !this.stopWords.has(word.toLowerCase())
+					})
+					.filter(word => {
+						// Keep all words with length > 0 since stop words are already filtered
+						return word.length > 0
+					})
+			}

-      if (conditions.length > 0) {
-        scopeCondition = `WHERE (${conditions.join(' OR ')})`
-      }
-    }
+			// If no keywords remain, return original query
+			if (keywords.length === 0) {
+				return query
+			}

-    const query = `
+			// Join keywords with & for PostgreSQL full-text search
+			const ftsQueryString = keywords.join(' | ')
+
+			console.log(`Original query: "${query}" -> Processed query: "${ftsQueryString}"`)
+			return ftsQueryString
+		} catch (error) {
+			// If all processing fails, return original query
+			console.warn('Failed to process FTS query:', error)
+			return query
+		}
+	}
+
+	async getWorkspaceStatistics(
+		embeddingModel: EmbeddingModel,
+		scope?: {
+			files: string[]
+			folders: string[]
+		}
+	): Promise<{
+		totalFiles: number
+		totalChunks: number
+	}> {
+		if (!this.db) {
+			throw new DatabaseNotInitializedException()
+		}
+		const tableName = this.getTableName(embeddingModel)
+
+		let scopeCondition = ''
+		const params: unknown[] = []
+		let paramIndex = 1
+
+		if (scope) {
+			const conditions: string[] = []
+
+			if (scope.files.length > 0) {
+				conditions.push(`path = ANY($${paramIndex})`)
+				params.push(scope.files)
+				paramIndex++
+			}
+
+			if (scope.folders.length > 0) {
+				const folderConditions = scope.folders.map((folder, idx) => {
+					params.push(`${folder}/%`)
+					return `path LIKE $${paramIndex + idx}`
+				})
+				conditions.push(`(${folderConditions.join(' OR ')})`)
+				paramIndex += scope.folders.length
+			}
+
+			if (conditions.length > 0) {
+				scopeCondition = `WHERE (${conditions.join(' OR ')})`
+			}
+		}
+
+		const query = `
      SELECT 
        COUNT(DISTINCT path) as total_files,
        COUNT(*) as total_chunks
@ -239,43 +434,43 @@ export class VectorRepository {
      ${scopeCondition}
    `

-    const result = await this.db.query<{
-      total_files: number
-      total_chunks: number
-    }>(query, params)
+		const result = await this.db.query<{
+			total_files: number
+			total_chunks: number
+		}>(query, params)

-    const row = result.rows[0]
-    return {
-      totalFiles: Number(row?.total_files || 0),
-      totalChunks: Number(row?.total_chunks || 0)
-    }
-  }
+		const row = result.rows[0]
+		return {
+			totalFiles: Number(row?.total_files || 0),
+			totalChunks: Number(row?.total_chunks || 0)
+		}
+	}

-  async getVaultStatistics(embeddingModel: EmbeddingModel): Promise<{
-    totalFiles: number
-    totalChunks: number
-  }> {
-    if (!this.db) {
-      throw new DatabaseNotInitializedException()
-    }
-    const tableName = this.getTableName(embeddingModel)
+	async getVaultStatistics(embeddingModel: EmbeddingModel): Promise<{
+		totalFiles: number
+		totalChunks: number
+	}> {
+		if (!this.db) {
+			throw new DatabaseNotInitializedException()
+		}
+		const tableName = this.getTableName(embeddingModel)

-    const query = `
+		const query = `
      SELECT 
        COUNT(DISTINCT path) as total_files,
        COUNT(*) as total_chunks
      FROM "${tableName}"
    `

-    const result = await this.db.query<{
-      total_files: number
-      total_chunks: number
-    }>(query)
+		const result = await this.db.query<{
+			total_files: number
+			total_chunks: number
+		}>(query)

-    const row = result.rows[0]
-    return {
-      totalFiles: Number(row?.total_files || 0),
-      totalChunks: Number(row?.total_chunks || 0)
-    }
-  }
+		const row = result.rows[0]
+		return {
+			totalFiles: Number(row?.total_files || 0),
+			totalChunks: Number(row?.total_chunks || 0)
+		}
+	}
 }
--- a/src/database/sql.ts
+++ b/src/database/sql.ts
@ -261,5 +261,108 @@ export const migrations: Record<string, SqlMigration> = {
            ALTER TABLE "source_insight_512" ADD COLUMN IF NOT EXISTS "source_mtime" bigint NOT NULL DEFAULT 0;
            ALTER TABLE "source_insight_384" ADD COLUMN IF NOT EXISTS "source_mtime" bigint NOT NULL DEFAULT 0;
        `
+	},
+	full_text_search: {
+		description: "Adds full-text search capabilities to embedding and source insight tables",
+		sql: `
+            -- Add content_tsv columns to embedding tables
+            ALTER TABLE "embeddings_1536" ADD COLUMN IF NOT EXISTS "content_tsv" TSVECTOR;
+            ALTER TABLE "embeddings_1024" ADD COLUMN IF NOT EXISTS "content_tsv" TSVECTOR;
+            ALTER TABLE "embeddings_768" ADD COLUMN IF NOT EXISTS "content_tsv" TSVECTOR;
+            ALTER TABLE "embeddings_512" ADD COLUMN IF NOT EXISTS "content_tsv" TSVECTOR;
+            ALTER TABLE "embeddings_384" ADD COLUMN IF NOT EXISTS "content_tsv" TSVECTOR;
+
+            -- Add insight_tsv columns to source insight tables
+            ALTER TABLE "source_insight_1536" ADD COLUMN IF NOT EXISTS "insight_tsv" TSVECTOR;
+            ALTER TABLE "source_insight_1024" ADD COLUMN IF NOT EXISTS "insight_tsv" TSVECTOR;
+            ALTER TABLE "source_insight_768" ADD COLUMN IF NOT EXISTS "insight_tsv" TSVECTOR;
+            ALTER TABLE "source_insight_512" ADD COLUMN IF NOT EXISTS "insight_tsv" TSVECTOR;
+            ALTER TABLE "source_insight_384" ADD COLUMN IF NOT EXISTS "insight_tsv" TSVECTOR;
+
+            -- Create trigger function for embeddings tables
+            CREATE OR REPLACE FUNCTION embeddings_tsv_trigger() RETURNS trigger AS $$
+            BEGIN
+                NEW.content_tsv := to_tsvector('english', coalesce(NEW.content, ''));
+                RETURN NEW;
+            END
+            $$ LANGUAGE plpgsql;
+
+            -- Create trigger function for source insight tables
+            CREATE OR REPLACE FUNCTION source_insight_tsv_trigger() RETURNS trigger AS $$
+            BEGIN
+                NEW.insight_tsv := to_tsvector('english', coalesce(NEW.insight, ''));
+                RETURN NEW;
+            END
+            $$ LANGUAGE plpgsql;
+
+            -- Create triggers for embeddings tables (drop if exists first)
+            DROP TRIGGER IF EXISTS tsvector_update_embeddings_1536 ON "embeddings_1536";
+            CREATE TRIGGER tsvector_update_embeddings_1536
+            BEFORE INSERT OR UPDATE ON "embeddings_1536"
+            FOR EACH ROW EXECUTE FUNCTION embeddings_tsv_trigger();
+
+            DROP TRIGGER IF EXISTS tsvector_update_embeddings_1024 ON "embeddings_1024";
+            CREATE TRIGGER tsvector_update_embeddings_1024
+            BEFORE INSERT OR UPDATE ON "embeddings_1024"
+            FOR EACH ROW EXECUTE FUNCTION embeddings_tsv_trigger();
+
+            DROP TRIGGER IF EXISTS tsvector_update_embeddings_768 ON "embeddings_768";
+            CREATE TRIGGER tsvector_update_embeddings_768
+            BEFORE INSERT OR UPDATE ON "embeddings_768"
+            FOR EACH ROW EXECUTE FUNCTION embeddings_tsv_trigger();
+
+            DROP TRIGGER IF EXISTS tsvector_update_embeddings_512 ON "embeddings_512";
+            CREATE TRIGGER tsvector_update_embeddings_512
+            BEFORE INSERT OR UPDATE ON "embeddings_512"
+            FOR EACH ROW EXECUTE FUNCTION embeddings_tsv_trigger();
+
+            DROP TRIGGER IF EXISTS tsvector_update_embeddings_384 ON "embeddings_384";
+            CREATE TRIGGER tsvector_update_embeddings_384
+            BEFORE INSERT OR UPDATE ON "embeddings_384"
+            FOR EACH ROW EXECUTE FUNCTION embeddings_tsv_trigger();
+
+            -- Create triggers for source insight tables (drop if exists first)
+            DROP TRIGGER IF EXISTS tsvector_update_source_insight_1536 ON "source_insight_1536";
+            CREATE TRIGGER tsvector_update_source_insight_1536
+            BEFORE INSERT OR UPDATE ON "source_insight_1536"
+            FOR EACH ROW EXECUTE FUNCTION source_insight_tsv_trigger();
+
+            DROP TRIGGER IF EXISTS tsvector_update_source_insight_1024 ON "source_insight_1024";
+            CREATE TRIGGER tsvector_update_source_insight_1024
+            BEFORE INSERT OR UPDATE ON "source_insight_1024"
+            FOR EACH ROW EXECUTE FUNCTION source_insight_tsv_trigger();
+
+            DROP TRIGGER IF EXISTS tsvector_update_source_insight_768 ON "source_insight_768";
+            CREATE TRIGGER tsvector_update_source_insight_768
+            BEFORE INSERT OR UPDATE ON "source_insight_768"
+            FOR EACH ROW EXECUTE FUNCTION source_insight_tsv_trigger();
+
+            DROP TRIGGER IF EXISTS tsvector_update_source_insight_512 ON "source_insight_512";
+            CREATE TRIGGER tsvector_update_source_insight_512
+            BEFORE INSERT OR UPDATE ON "source_insight_512"
+            FOR EACH ROW EXECUTE FUNCTION source_insight_tsv_trigger();
+
+            DROP TRIGGER IF EXISTS tsvector_update_source_insight_384 ON "source_insight_384";
+            CREATE TRIGGER tsvector_update_source_insight_384
+            BEFORE INSERT OR UPDATE ON "source_insight_384"
+            FOR EACH ROW EXECUTE FUNCTION source_insight_tsv_trigger();
+
+            -- Note: 现有数据的 tsvector 字段将保持为 NULL，只有新插入的数据会通过 trigger 自动填充
+            -- 这样可以避免大量 UPDATE 操作导致的文件句柄耗尽问题
+
+            -- Create GIN indexes for full-text search on embeddings tables
+            CREATE INDEX IF NOT EXISTS "embeddings_content_tsv_idx_1536" ON "embeddings_1536" USING GIN(content_tsv);
+            CREATE INDEX IF NOT EXISTS "embeddings_content_tsv_idx_1024" ON "embeddings_1024" USING GIN(content_tsv);
+            CREATE INDEX IF NOT EXISTS "embeddings_content_tsv_idx_768" ON "embeddings_768" USING GIN(content_tsv);
+            CREATE INDEX IF NOT EXISTS "embeddings_content_tsv_idx_512" ON "embeddings_512" USING GIN(content_tsv);
+            CREATE INDEX IF NOT EXISTS "embeddings_content_tsv_idx_384" ON "embeddings_384" USING GIN(content_tsv);
+
+            -- Create GIN indexes for full-text search on source insight tables
+            CREATE INDEX IF NOT EXISTS "source_insight_tsv_idx_1536" ON "source_insight_1536" USING GIN(insight_tsv);
+            CREATE INDEX IF NOT EXISTS "source_insight_tsv_idx_1024" ON "source_insight_1024" USING GIN(insight_tsv);
+            CREATE INDEX IF NOT EXISTS "source_insight_tsv_idx_768" ON "source_insight_768" USING GIN(insight_tsv);
+            CREATE INDEX IF NOT EXISTS "source_insight_tsv_idx_512" ON "source_insight_512" USING GIN(insight_tsv);
+            CREATE INDEX IF NOT EXISTS "source_insight_tsv_idx_384" ON "source_insight_384" USING GIN(insight_tsv);
+        `
 	}
 };
--- a/src/pgworker/pglite.worker.ts
+++ b/src/pgworker/pglite.worker.ts
@ -78,6 +78,7 @@ worker({
 		// Execute SQL migrations
 		for (const [_key, migration] of Object.entries(migrations)) {
 			// Split SQL into individual commands and execute them one by one
+			console.log("migration: ", migration.description)
 			const commands = migration.sql.split('\n\n').filter(cmd => cmd.trim());
 			for (const command of commands) {
 				await db.exec(command);