From 2b571f67a727ac3a2c3759d3ffb92d7972847706 Mon Sep 17 00:00:00 2001 From: duanfuxiang Date: Sat, 7 Jun 2025 17:13:02 +0800 Subject: [PATCH] update local pdf tools --- esbuild.config.mjs | 1 + pnpm-lock.yaml | 282 +++++++++++++++++++++++++- src/components/chat-view/ChatView.tsx | 2 +- src/utils/obsidian.ts | 44 +++- src/utils/prompt-generator.ts | 91 +++++++-- src/utils/video-detector.test.ts | 107 ++++++++++ src/utils/video-detector.ts | 142 +++++++++++++ src/utils/web-search.ts | 35 +++- 8 files changed, 668 insertions(+), 36 deletions(-) create mode 100644 src/utils/video-detector.test.ts create mode 100644 src/utils/video-detector.ts diff --git a/esbuild.config.mjs b/esbuild.config.mjs index f184738..8459533 100644 --- a/esbuild.config.mjs +++ b/esbuild.config.mjs @@ -43,6 +43,7 @@ const context = await esbuild.context({ '@lezer/highlight', '@lezer/lr', '@lexical/clipboard/clipboard', + 'pdfjs-dist', ...nodeBuiltins, ], format: 'cjs', diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 4d4be8c..907d0ee 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -287,7 +287,7 @@ importers: version: 29.7.0(@types/node@16.18.126) jest-environment-jsdom: specifier: ^29.7.0 - version: 29.7.0 + version: 29.7.0(canvas@2.11.2) obsidian: specifier: ^1.8.7 version: 1.8.7(@codemirror/state@6.5.2)(@codemirror/view@6.36.2) @@ -1434,6 +1434,10 @@ packages: '@libsql/core@0.14.0': resolution: {integrity: sha512-nhbuXf7GP3PSZgdCY2Ecj8vz187ptHlZQ0VRc751oB2C1W8jQUXKKklvt7t1LJiUTQBVJuadF628eUk+3cRi4Q==} + '@mapbox/node-pre-gyp@1.0.11': + resolution: {integrity: sha512-Yhlar6v9WQgUp/He7BdgzOz8lqMQ8sU+jkCq7Wx8Myc5YFJLbEe7lgui/V7G1qB1DJykHSGwreceSaD60Y0PUQ==} + hasBin: true + '@marijn/find-cluster-break@1.0.2': resolution: {integrity: sha512-l0h88YhZFyKdXIFNfSWpyjStDjGHwZ/U7iobcK1cQQD8sejsONdQtTVU+1wVN1PBw40PiiHB1vA5S7VTfQiP9g==} @@ -2695,6 +2699,9 @@ packages: resolution: {integrity: sha512-j2afSsaIENvHZN2B8GOpF566vZ5WVk5opAiMTvWgaQT8DkbOqsTfvNAvHoRGU2zzP8cPoqys+xHTRDWW8L+/BA==} deprecated: Use your platform's native atob() and btoa() methods instead + abbrev@1.1.1: + resolution: {integrity: sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==} + abort-controller@3.0.0: resolution: {integrity: sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==} engines: {node: '>=6.5'} @@ -2762,6 +2769,14 @@ packages: resolution: {integrity: sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==} engines: {node: '>= 8'} + aproba@2.0.0: + resolution: {integrity: sha512-lYe4Gx7QT+MKGbDsA+Z+he/Wtef0BiwDOlK/XkBrdfsh9J/jPPXbX0tE9x9cl27Tmu5gg3QUbUrQYa/y+KOHPQ==} + + are-we-there-yet@2.0.0: + resolution: {integrity: sha512-Ci/qENmwHnsYo9xKIcUJN5LeDKdJ6R1Z1j9V/J5wyq8nh/mYPEpIKJbBZXtZjG04HiK7zV/p6Vs9952MrMeUIw==} + engines: {node: '>=10'} + deprecated: This package is no longer supported. + argparse@1.0.10: resolution: {integrity: sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==} @@ -2942,6 +2957,10 @@ packages: caniuse-lite@1.0.30001699: resolution: {integrity: sha512-b+uH5BakXZ9Do9iK+CkDmctUSEqZl+SP056vc5usa0PL+ev5OHw003rZXcnjNDv3L8P5j6rwT6C0BPKSikW08w==} + canvas@2.11.2: + resolution: {integrity: sha512-ItanGBMrmRV7Py2Z+Xhs7cT+FNt5K0vPL4p9EZ/UX/Mu7hFbkxSjKF2KVtPwX7UYWp7dRKnrTvReflgrItJbdw==} + engines: {node: '>=6'} + ccount@2.0.1: resolution: {integrity: sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg==} @@ -2982,6 +3001,10 @@ packages: resolution: {integrity: sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA==} engines: {node: '>= 14.16.0'} + chownr@2.0.0: + resolution: {integrity: sha512-bIomtDF5KGpdogkLd9VspvFzk9KfpyyGlS8YFVZl7TGPBHL5snIOnxeshwVgPteQ9b4Eydl+pVbIyE1DcvCWgQ==} + engines: {node: '>=10'} + ci-info@3.9.0: resolution: {integrity: sha512-NIxF55hv4nSqQswkAeiOi1r83xy8JldOFDTWiug55KBu9Jnblncd2U6ViHmYgHf01TPZS77NJBhBMKdWj9HQMQ==} engines: {node: '>=8'} @@ -3011,6 +3034,10 @@ packages: color-name@1.1.4: resolution: {integrity: sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==} + color-support@1.1.3: + resolution: {integrity: sha512-qiBjkpbMLO/HL68y+lh4q0/O1MZFj2RX6X/KmMa3+gJD3z+WwI1ZzDHysvqHGS3mP6mznPckpXmw1nI9cJjyRg==} + hasBin: true + colord@2.9.3: resolution: {integrity: sha512-jeC1axXpnb0/2nn/Y1LPuLdgXBLH7aDcHu4KEKfqw3CUhX7ZpfBSlPKyqXE6btIgEzfWtrX3/tyBCaCvXvMkOw==} @@ -3030,6 +3057,9 @@ packages: concat-map@0.0.1: resolution: {integrity: sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==} + console-control-strings@1.1.0: + resolution: {integrity: sha512-ty/fTekppD2fIwRvnZAVdeOiGd1c7YXEixbgJTNzqcxJWKQnjJ/V1bNEEE6hygpM3WjwHFUVK6HTjWSzV4a8sQ==} + console-table-printer@2.12.1: resolution: {integrity: sha512-wKGOQRRvdnd89pCeH96e2Fn4wkbenSP6LMHfjfyNLMbGuHEFbMqQNuxXqd0oXG9caIOQ1FTvc5Uijp9/4jujnQ==} @@ -3149,6 +3179,10 @@ packages: decode-named-character-reference@1.0.2: resolution: {integrity: sha512-O8x12RzrUF8xyVcY0KJowWsmaJxQbmy0/EtnNtHRpsOcT7dFk5W598coHqBVpmWo1oQQfsCqfCmkZN5DJrZVdg==} + decompress-response@4.2.1: + resolution: {integrity: sha512-jOSne2qbyE+/r8G1VU+G/82LBs2Fs4LAsTiLSHOCOMZQl2OKZ6i8i4IyHemTe+/yIXOtTcRQMzPcgyhoFlqPkw==} + engines: {node: '>=8'} + dedent@1.5.3: resolution: {integrity: sha512-NHQtfOOW68WD8lgypbLA5oT+Bt0xXJhiYvoR6SmmNXZfpzOGXwdKWmcwG8N7PwVVWV3eF/68nmD9BaJSsTBhyQ==} peerDependencies: @@ -3184,6 +3218,9 @@ packages: resolution: {integrity: sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==} engines: {node: '>=0.4.0'} + delegates@1.0.0: + resolution: {integrity: sha512-bd2L678uiWATM6m5Z1VzNCErI3jiGzt6HGY8OVICs40JQq/HALfbyNJmp0UDakEY4pMMaN0Ly5om/B1VI/+xfQ==} + depd@2.0.0: resolution: {integrity: sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==} engines: {node: '>= 0.8'} @@ -3192,6 +3229,10 @@ packages: resolution: {integrity: sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==} engines: {node: '>=6'} + detect-libc@2.0.4: + resolution: {integrity: sha512-3UDv+G9CsCKO1WKMGw9fwq/SWJYbI0c5Y7LU1AXYoDdbhE2AHQ6N6Nb34sG8Fj7T5APy8qXDCKuuIHd1BR0tVA==} + engines: {node: '>=8'} + detect-newline@3.1.0: resolution: {integrity: sha512-TLz+x/vEXm/Y7P7wn1EJFNLxYpUD4TgMosxY6fAVJUnJMbupHBOncxyWUG9OpTaH9EBD7uFI5LfEgmMOc54DsA==} engines: {node: '>=8'} @@ -3747,6 +3788,10 @@ packages: resolution: {integrity: sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A==} engines: {node: '>= 0.8'} + fs-minipass@2.1.0: + resolution: {integrity: sha512-V/JgOLFCS+R6Vcq0slCuaeWEdNC3ouDlJMNIsacH2VtALiu9mV4LPrHc5cDl8k5aw6J8jwgWWpiTo5RYhmIzvg==} + engines: {node: '>= 8'} + fs.realpath@1.0.0: resolution: {integrity: sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==} @@ -3772,6 +3817,11 @@ packages: fuzzysort@3.1.0: resolution: {integrity: sha512-sR9BNCjBg6LNgwvxlBd0sBABvQitkLzoVY9MYYROQVX/FvfJ4Mai9LsGhDgd8qYdds0bY77VzYd5iuB+v5rwQQ==} + gauge@3.0.2: + resolution: {integrity: sha512-+5J6MS/5XksCuXq++uFRsnUd7Ovu1XenbeuIuNRJxYWjgQbPuFhT14lAvsWfqfAmnwluf1OwMjz39HjfLPci0Q==} + engines: {node: '>=10'} + deprecated: This package is no longer supported. + gaxios@6.7.1: resolution: {integrity: sha512-LDODD4TMYx7XXdpwxAVRAIAuB0bzv0s+ywFonY46k126qzQHT9ygyoa9tncmOiQmmDrik65UYsEkv3lbfqQ3yQ==} engines: {node: '>=14'} @@ -3912,6 +3962,9 @@ packages: resolution: {integrity: sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==} engines: {node: '>= 0.4'} + has-unicode@2.0.1: + resolution: {integrity: sha512-8Rf9Y83NBReMnx0gFzA8JImQACstCYWUplepDa9xprwwtmgEZUF0h/i5xSA625zB/I37EtrswSST6OXxwaaIJQ==} + hasown@2.0.2: resolution: {integrity: sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==} engines: {node: '>= 0.4'} @@ -4812,6 +4865,10 @@ packages: resolution: {integrity: sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==} engines: {node: '>=6'} + mimic-response@2.1.0: + resolution: {integrity: sha512-wXqjST+SLt7R009ySCglWBCFpjUygmCIfD790/kVbiGmUgfYGuB14PiTd5DwVxSV4NcYHjzMkoj5LjQZwTQLEA==} + engines: {node: '>=8'} + minimatch@10.0.1: resolution: {integrity: sha512-ethXTt3SGGR+95gudmqJ1eNhRO7eGEGIgYA9vnPatK4/etz2MEVDno5GMCibdMTuBMyElzIlgxMna3K94XDIDQ==} engines: {node: 20 || >=22} @@ -4830,6 +4887,23 @@ packages: minimist@1.2.8: resolution: {integrity: sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==} + minipass@3.3.6: + resolution: {integrity: sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw==} + engines: {node: '>=8'} + + minipass@5.0.0: + resolution: {integrity: sha512-3FnjYuehv9k6ovOEbyOswadCDPX1piCfhV8ncmYtHOjuPwylVWsghTLo7rabjC3Rx5xD4HDx8Wm1xnMF7S5qFQ==} + engines: {node: '>=8'} + + minizlib@2.1.2: + resolution: {integrity: sha512-bAxsR8BVfj60DWXHE3u30oHzfl4G7khkSuPW+qvpd7jFRHm7dLxOjUk1EHACJ/hxLY8phGJ0YhYHZo7jil7Qdg==} + engines: {node: '>= 8'} + + mkdirp@1.0.4: + resolution: {integrity: sha512-vVqVZQyf3WLx2Shd0qJ9xuvqgAyKPLAiqITEtqW0oIUjzo3PePDd6fW9iFz30ef7Ysp/oiWqbhszeGWW2T6Gzw==} + engines: {node: '>=10'} + hasBin: true + moment@2.29.4: resolution: {integrity: sha512-5LC9SOxjSc2HF6vO2CyuTDNivEdoz2IvyJJGj6X8DJ0eFyfszE0QiEd+iXmBvUP3WHxSjFH/vIsA0EN00cgr8w==} @@ -4840,6 +4914,9 @@ packages: resolution: {integrity: sha512-71ippSywq5Yb7/tVYyGbkBggbU8H3u5Rz56fH60jGFgr8uHwxs+aSKeqmluIVzM0m0kB7xQjKS6qPfd0b2ZoqQ==} hasBin: true + nan@2.22.2: + resolution: {integrity: sha512-DANghxFkS1plDdRsX0X9pm0Z6SJNN6gBdtXfanwoZ8hooC5gosGFSBGRYHUVPz1asKA/kMRqDRdHrluZ61SpBQ==} + nanoid@3.3.8: resolution: {integrity: sha512-WNLf5Sd8oZxOm+TzppcYk8gVOgP+l58xNy58D0nbUnOxOWRWvlcCV4kUF7ltmI6PsrLl/BgKEyS4mqsGChFN0w==} engines: {node: ^10 || ^12 || ^13.7 || ^14 || >=15.0.1} @@ -4877,6 +4954,11 @@ packages: node-releases@2.0.19: resolution: {integrity: sha512-xxOWJsBKtzAq7DY0J+DTzuz58K8e7sJbdgwkbMWQe8UYB6ekmsQ45q0M/tJDsGaZmbC+l7n57UV8Hl5tHxO9uw==} + nopt@5.0.0: + resolution: {integrity: sha512-Tbj67rffqceeLpcRXrT7vKAN8CwfPeIBgM7E6iBkmKLV7bEMwpGgYLGv0jACUsECaa/vuxP0IjEont6umdMgtQ==} + engines: {node: '>=6'} + hasBin: true + normalize-path@3.0.0: resolution: {integrity: sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==} engines: {node: '>=0.10.0'} @@ -4885,6 +4967,10 @@ packages: resolution: {integrity: sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw==} engines: {node: '>=8'} + npmlog@5.0.1: + resolution: {integrity: sha512-AqZtDUWOMKs1G/8lwylVjrdYgqA4d9nu8hc+0gzRxlDb1I10+FHBGMXs6aiQHFdCUUlqH99MUMuLfzWDNDtfxw==} + deprecated: This package is no longer supported. + nwsapi@2.2.16: resolution: {integrity: sha512-F1I/bimDpj3ncaNDhfyMWuFqmQDBwDB0Fogc2qpL3BWvkQteFD/8BzWuIRl83rq0DXfm8SGt/HFhLXZyljTXcQ==} @@ -5253,6 +5339,10 @@ packages: readable-stream@1.1.14: resolution: {integrity: sha512-+MeVjFf4L44XUkhM1eYbD8fyEsxcV81pqMSR5gblfcLCHfZvbrqy4/qYHE+/R5HoBUT11WV5O08Cr1n3YXkWVQ==} + readable-stream@3.6.2: + resolution: {integrity: sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==} + engines: {node: '>= 6'} + readdirp@4.1.2: resolution: {integrity: sha512-GDhwkLfywWL2s6vEjyhri+eXmfH6j1L7JE27WhqLeYzoh/A3DBaYGEj2H/HFZCn/kMfim73FXxEJTw06WtxQwg==} engines: {node: '>= 14.18.0'} @@ -5391,6 +5481,9 @@ packages: resolution: {integrity: sha512-61g9pCh0Vnh7IutZjtLGGpTA355+OPn2TyDv/6ivP2h/AdAVX9azsoxmg2/M6nZeQZNYBEwIcsne1mJd9oQItQ==} engines: {node: '>= 18'} + set-blocking@2.0.0: + resolution: {integrity: sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw==} + set-function-length@1.2.2: resolution: {integrity: sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg==} engines: {node: '>= 0.4'} @@ -5441,6 +5534,12 @@ packages: resolution: {integrity: sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==} engines: {node: '>=14'} + simple-concat@1.0.1: + resolution: {integrity: sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==} + + simple-get@3.1.1: + resolution: {integrity: sha512-CQ5LTKGfCpvE1K0n2us+kuMPbk/q0EKl82s4aheV9oXjFEz6W/Y7oQFVJuU6QG77hRT4Ghb5RURteF5vnWjupA==} + simple-git@3.27.0: resolution: {integrity: sha512-ivHoFS9Yi9GY49ogc6/YAi3Fl9ROnF4VyubNylgCkA+RVqLaKWnDSzXOVzya8csELIaWaYNutsEuAhZrtOjozA==} @@ -5523,6 +5622,9 @@ packages: string_decoder@0.10.31: resolution: {integrity: sha512-ev2QzSzWPYmy9GuqfIVildA4OdcGLeFZQrq5ys6RtiuF+RQQiZWr8TZNyAcuVXyQRYfEO+MsoB/1BuQVhOJuoQ==} + string_decoder@1.3.0: + resolution: {integrity: sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==} + stringify-entities@4.0.4: resolution: {integrity: sha512-IwfBptatlO+QCJUo19AqvrPNqlVMpW9YEL2LIVY+Rpv2qsjCGxaDLNRgeGsQWJhfItebuJhsGSLjaBbNSQ+ieg==} @@ -5587,6 +5689,10 @@ packages: resolution: {integrity: sha512-9kY+CygyYM6j02t5YFHbNz2FN5QmYGv9zAjVp4lCDjlCw7amdckXlEt/bjMhUIfj4ThGRE4gCUH5+yGnNuPo5A==} engines: {node: '>=10.0.0'} + tar@6.2.1: + resolution: {integrity: sha512-DZ4yORTwrbTj/7MZYq2w+/ZFdI6OZ/f9SFHR+71gIVUZhOQPHzVCLpvRnPgyaMpfWxxk/4ONva3GQSyNIKRv6A==} + engines: {node: '>=10'} + test-exclude@6.0.0: resolution: {integrity: sha512-cAGWPIyOHU6zlmg88jwm7VRyXnMN7iV68OGAbYDk/Mh/xC/pzVPlQtY6ngoIH/5/tciuhGfvESU8GrHrcxD56w==} engines: {node: '>=8'} @@ -5878,6 +5984,9 @@ packages: engines: {node: '>= 8'} hasBin: true + wide-align@1.1.5: + resolution: {integrity: sha512-eDMORYaPNZ4sQIuuYPDHdQvf4gyCF9rEEV/yPxGfwPkRodwEgiMUUXTx/dex+Me0wxx53S+NgUHaP7y3MGlDmg==} + word-wrap@1.2.5: resolution: {integrity: sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==} engines: {node: '>=0.10.0'} @@ -5930,6 +6039,9 @@ packages: yallist@3.1.1: resolution: {integrity: sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==} + yallist@4.0.0: + resolution: {integrity: sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==} + yaml@2.7.0: resolution: {integrity: sha512-+hSoy/QHluxmC9kCIJyL/uyFmLmc+e5CFR5Wa+bpIhIj85LVb9ZH2nVnqrHoSvKogwODv0ClqZkmiSSaIH5LTA==} engines: {node: '>= 14'} @@ -7159,6 +7271,22 @@ snapshots: dependencies: js-base64: 3.7.7 + '@mapbox/node-pre-gyp@1.0.11': + dependencies: + detect-libc: 2.0.4 + https-proxy-agent: 5.0.1 + make-dir: 3.1.0 + node-fetch: 2.7.0 + nopt: 5.0.0 + npmlog: 5.0.1 + rimraf: 3.0.2 + semver: 7.7.1 + tar: 6.2.1 + transitivePeerDependencies: + - encoding + - supports-color + optional: true + '@marijn/find-cluster-break@1.0.2': {} '@modelcontextprotocol/sdk@1.12.1': @@ -8535,6 +8663,9 @@ snapshots: abab@2.0.6: {} + abbrev@1.1.1: + optional: true + abort-controller@3.0.0: dependencies: event-target-shim: 5.0.1 @@ -8604,6 +8735,15 @@ snapshots: normalize-path: 3.0.0 picomatch: 2.3.1 + aproba@2.0.0: + optional: true + + are-we-there-yet@2.0.0: + dependencies: + delegates: 1.0.0 + readable-stream: 3.6.2 + optional: true + argparse@1.0.10: dependencies: sprintf-js: 1.0.3 @@ -8850,6 +8990,16 @@ snapshots: caniuse-lite@1.0.30001699: {} + canvas@2.11.2: + dependencies: + '@mapbox/node-pre-gyp': 1.0.11 + nan: 2.22.2 + simple-get: 3.1.1 + transitivePeerDependencies: + - encoding + - supports-color + optional: true + ccount@2.0.1: {} chalk@4.1.2: @@ -8885,6 +9035,9 @@ snapshots: dependencies: readdirp: 4.1.2 + chownr@2.0.0: + optional: true + ci-info@3.9.0: {} cjs-module-lexer@1.4.3: {} @@ -8907,6 +9060,9 @@ snapshots: color-name@1.1.4: {} + color-support@1.1.3: + optional: true + colord@2.9.3: {} combined-stream@1.0.8: @@ -8921,6 +9077,9 @@ snapshots: concat-map@0.0.1: {} + console-control-strings@1.1.0: + optional: true + console-table-printer@2.12.1: dependencies: simple-wcswidth: 1.0.1 @@ -9036,6 +9195,11 @@ snapshots: dependencies: character-entities: 2.0.2 + decompress-response@4.2.1: + dependencies: + mimic-response: 2.1.0 + optional: true + dedent@1.5.3: {} deep-is@0.1.4: {} @@ -9060,10 +9224,16 @@ snapshots: delayed-stream@1.0.0: {} + delegates@1.0.0: + optional: true + depd@2.0.0: {} dequal@2.0.3: {} + detect-libc@2.0.4: + optional: true + detect-newline@3.1.0: {} detect-node-es@1.1.0: {} @@ -9775,6 +9945,11 @@ snapshots: fresh@2.0.0: {} + fs-minipass@2.1.0: + dependencies: + minipass: 3.3.6 + optional: true + fs.realpath@1.0.0: {} fsevents@2.3.3: @@ -9797,6 +9972,19 @@ snapshots: fuzzysort@3.1.0: {} + gauge@3.0.2: + dependencies: + aproba: 2.0.0 + color-support: 1.1.3 + console-control-strings: 1.1.0 + has-unicode: 2.0.1 + object-assign: 4.1.1 + signal-exit: 3.0.7 + string-width: 4.2.3 + strip-ansi: 6.0.1 + wide-align: 1.1.5 + optional: true + gaxios@6.7.1: dependencies: extend: 3.0.2 @@ -9975,6 +10163,9 @@ snapshots: dependencies: has-symbols: 1.1.0 + has-unicode@2.0.1: + optional: true + hasown@2.0.2: dependencies: function-bind: 1.1.2 @@ -10479,7 +10670,7 @@ snapshots: jest-util: 29.7.0 pretty-format: 29.7.0 - jest-environment-jsdom@29.7.0: + jest-environment-jsdom@29.7.0(canvas@2.11.2): dependencies: '@jest/environment': 29.7.0 '@jest/fake-timers': 29.7.0 @@ -10488,7 +10679,9 @@ snapshots: '@types/node': 16.18.126 jest-mock: 29.7.0 jest-util: 29.7.0 - jsdom: 20.0.3 + jsdom: 20.0.3(canvas@2.11.2) + optionalDependencies: + canvas: 2.11.2 transitivePeerDependencies: - bufferutil - supports-color @@ -10719,7 +10912,7 @@ snapshots: dependencies: argparse: 2.0.1 - jsdom@20.0.3: + jsdom@20.0.3(canvas@2.11.2): dependencies: abab: 2.0.6 acorn: 8.14.0 @@ -10747,6 +10940,8 @@ snapshots: whatwg-url: 11.0.0 ws: 8.18.0 xml-name-validator: 4.0.0 + optionalDependencies: + canvas: 2.11.2 transitivePeerDependencies: - bufferutil - supports-color @@ -11302,6 +11497,9 @@ snapshots: mimic-fn@2.1.0: {} + mimic-response@2.1.0: + optional: true + minimatch@10.0.1: dependencies: brace-expansion: 2.0.1 @@ -11320,12 +11518,32 @@ snapshots: minimist@1.2.8: {} + minipass@3.3.6: + dependencies: + yallist: 4.0.0 + optional: true + + minipass@5.0.0: + optional: true + + minizlib@2.1.2: + dependencies: + minipass: 3.3.6 + yallist: 4.0.0 + optional: true + + mkdirp@1.0.4: + optional: true + moment@2.29.4: {} ms@2.1.3: {} mustache@4.2.0: {} + nan@2.22.2: + optional: true + nanoid@3.3.8: {} natural-compare@1.4.0: {} @@ -11346,12 +11564,25 @@ snapshots: node-releases@2.0.19: {} + nopt@5.0.0: + dependencies: + abbrev: 1.1.1 + optional: true + normalize-path@3.0.0: {} npm-run-path@4.0.1: dependencies: path-key: 3.1.1 + npmlog@5.0.1: + dependencies: + are-we-there-yet: 2.0.0 + console-control-strings: 1.1.0 + gauge: 3.0.2 + set-blocking: 2.0.0 + optional: true + nwsapi@2.2.16: {} object-assign@4.1.1: {} @@ -11788,6 +12019,13 @@ snapshots: string_decoder: 0.10.31 optional: true + readable-stream@3.6.2: + dependencies: + inherits: 2.0.4 + string_decoder: 1.3.0 + util-deprecate: 1.0.2 + optional: true + readdirp@4.1.2: {} reconnecting-eventsource@1.6.4: {} @@ -11972,6 +12210,9 @@ snapshots: transitivePeerDependencies: - supports-color + set-blocking@2.0.0: + optional: true + set-function-length@1.2.2: dependencies: define-data-property: 1.1.4 @@ -12040,6 +12281,16 @@ snapshots: signal-exit@4.1.0: {} + simple-concat@1.0.1: + optional: true + + simple-get@3.1.1: + dependencies: + decompress-response: 4.2.1 + once: 1.4.0 + simple-concat: 1.0.1 + optional: true + simple-git@3.27.0: dependencies: '@kwsites/file-exists': 1.1.1 @@ -12146,6 +12397,11 @@ snapshots: string_decoder@0.10.31: optional: true + string_decoder@1.3.0: + dependencies: + safe-buffer: 5.2.1 + optional: true + stringify-entities@4.0.4: dependencies: character-entities-html4: 2.1.0 @@ -12244,6 +12500,16 @@ snapshots: string-width: 4.2.3 strip-ansi: 6.0.1 + tar@6.2.1: + dependencies: + chownr: 2.0.0 + fs-minipass: 2.1.0 + minipass: 5.0.0 + minizlib: 2.1.2 + mkdirp: 1.0.4 + yallist: 4.0.0 + optional: true + test-exclude@6.0.0: dependencies: '@istanbuljs/schema': 0.1.3 @@ -12566,6 +12832,11 @@ snapshots: dependencies: isexe: 2.0.0 + wide-align@1.1.5: + dependencies: + string-width: 4.2.3 + optional: true + word-wrap@1.2.5: {} wordwrap@1.0.0: {} @@ -12600,6 +12871,9 @@ snapshots: yallist@3.1.1: {} + yallist@4.0.0: + optional: true + yaml@2.7.0: {} yargs-parser@21.1.1: {} diff --git a/src/components/chat-view/ChatView.tsx b/src/components/chat-view/ChatView.tsx index 7a2c762..f6200c1 100644 --- a/src/components/chat-view/ChatView.tsx +++ b/src/components/chat-view/ChatView.tsx @@ -576,7 +576,7 @@ const Chat = forwardRef((props, ref) => { if (!opFile) { throw new Error(`File not found: ${toolArgs.filepath}`) } - const fileContent = await readTFileContent(opFile, app.vault) + const fileContent = await readTFileContent(opFile, app.vault, app) const formattedContent = `[read_file for '${toolArgs.filepath}'] Result:\n${addLineNumbers(fileContent)}\n`; return { type: 'read_file', diff --git a/src/utils/obsidian.ts b/src/utils/obsidian.ts index 6b5dff5..b4d09aa 100644 --- a/src/utils/obsidian.ts +++ b/src/utils/obsidian.ts @@ -1,22 +1,62 @@ import * as path from 'path' -import { App, Editor, MarkdownView, TFile, TFolder, Vault, WorkspaceLeaf } from 'obsidian' +import { App, Editor, MarkdownView, TFile, TFolder, Vault, WorkspaceLeaf, loadPdfJs } from 'obsidian' import { MentionableBlockData } from '../types/mentionable' +export async function parsePdfContent(file: TFile, app: App): Promise { + try { + // 使用 Obsidian 内置的 PDF.js + const pdfjsLib = await loadPdfJs() + + // Read PDF file as binary buffer + const pdfBuffer = await app.vault.readBinary(file) + + // 使用 Obsidian 内置的 PDF.js 处理 PDF + const loadingTask = pdfjsLib.getDocument({ data: pdfBuffer }) + const doc = await loadingTask.promise + let fullText = '' + + for (let pageNum = 1; pageNum <= doc.numPages; pageNum++) { + const page = await doc.getPage(pageNum) + const textContent = await page.getTextContent() + const pageText = textContent.items + .map((item: any) => item.str) + .join(' ') + fullText += pageText + '\n\n' + } + + return fullText || '(Empty PDF content)' + } catch (error: any) { + console.error('Error parsing PDF:', error) + return `(Error reading PDF file: ${error?.message || 'Unknown error'})` + } +} + export async function readTFileContent( file: TFile, vault: Vault, + app?: App, ): Promise { + if (file.extension === 'pdf') { + if (app) { + return await parsePdfContent(file, app) + } + return "(PDF file, app context required for processing)" + } + if (file.extension != 'md') { + return "(Binary file, unable to display content)" + } return await vault.cachedRead(file) } export async function readMultipleTFiles( files: TFile[], vault: Vault, + app?: App, ): Promise { // Read files in parallel - const readPromises = files.map((file) => readTFileContent(file, vault)) + const readPromises = files.map((file) => readTFileContent(file, vault, app)) return await Promise.all(readPromises) } diff --git a/src/utils/prompt-generator.ts b/src/utils/prompt-generator.ts index cf80a15..175850a 100644 --- a/src/utils/prompt-generator.ts +++ b/src/utils/prompt-generator.ts @@ -7,7 +7,7 @@ import { McpHub } from '../core/mcp/McpHub' import { SystemPrompt } from '../core/prompts/system' import { RAGEngine } from '../core/rag/rag-engine' import { SelectVector } from '../database/schema' -import { ChatAssistantMessage, ChatMessage, ChatUserMessage } from '../types/chat' +import { ChatMessage, ChatUserMessage } from '../types/chat' import { ContentPart, RequestMessage } from '../types/llm/request' import { MentionableBlock, @@ -21,10 +21,14 @@ import { InfioSettings } from '../types/settings' import { CustomModePrompts, Mode, ModeConfig, getFullModeDetails } from "../utils/modes" import { - readTFileContent + readTFileContent, + readMultipleTFiles, + getNestedFiles, + parsePdfContent } from './obsidian' import { tokenCount } from './token' -import { YoutubeTranscript, isYoutubeUrl } from './youtube-transcript' +import { isVideoUrl, isYoutubeUrl } from './video-detector' +import { YoutubeTranscript } from './youtube-transcript' export function addLineNumbers(content: string, startLine: number = 1): string { const lines = content.split("\n") @@ -66,13 +70,20 @@ async function getFolderTreeContent(path: TFolder): Promise { } } -async function getFileOrFolderContent(path: TAbstractFile, vault: Vault): Promise { +async function getFileOrFolderContent(path: TAbstractFile, vault: Vault, app?: App): Promise { try { if (path instanceof TFile) { + if (path.extension === 'pdf') { + // Handle PDF files without line numbers + if (app) { + return await parsePdfContent(path, app) + } + return "(PDF file, app context required for processing)" + } if (path.extension != 'md') { return "(Binary file, unable to display content)" } - return addLineNumbers(await readTFileContent(path, vault)) + return addLineNumbers(await readTFileContent(path, vault, app)) } else if (path instanceof TFolder) { const entries = path.children let folderContent = "" @@ -85,10 +96,18 @@ async function getFileOrFolderContent(path: TAbstractFile, vault: Vault): Promis fileContentPromises.push( (async () => { try { + if (entry.extension === 'pdf') { + // Handle PDF files in folders + if (app) { + const content = await parsePdfContent(entry, app) + return `\n${content}\n` + } + return `\n(PDF file, app context required for processing)\n` + } if (entry.extension != 'md') { return undefined } - const content = addLineNumbers(await readTFileContent(entry, vault)) + const content = addLineNumbers(await readTFileContent(entry, vault, app)) return `\n${content}\n` } catch (error) { return undefined @@ -196,18 +215,18 @@ export class PromptGenerator { ...compiledMessages.slice(-19) .filter((message) => !(message.role === 'assistant' && message.isToolResult)) .map((message): RequestMessage => { - if (message.role === 'user') { - return { - role: 'user', - content: message.promptContent ?? '', + if (message.role === 'user') { + return { + role: 'user', + content: message.promptContent ?? '', + } + } else { + return { + role: 'assistant', + content: message.content, + } } - } else { - return { - role: 'assistant', - content: message.content, - } - } - }), + }), ] return { @@ -336,7 +355,7 @@ export class PromptGenerator { .map((m) => m.file) let fileContentsPrompts = files.length > 0 ? (await Promise.all(files.map(async (file) => { - const content = await getFileOrFolderContent(file, this.app.vault) + const content = await getFileOrFolderContent(file, this.app.vault, this.app) return `\n${content}\n` }))).join('\n') : undefined @@ -347,7 +366,7 @@ export class PromptGenerator { .map((m) => m.folder) let folderContentsPrompts = folders.length > 0 ? (await Promise.all(folders.map(async (folder) => { - const content = await getFileOrFolderContent(folder, this.app.vault) + const content = await getFileOrFolderContent(folder, this.app.vault, this.app) return `\n${content}\n` }))).join('\n') : undefined @@ -387,7 +406,7 @@ export class PromptGenerator { .filter((m): m is MentionableFile => m.type === 'current-file') .first() const currentFileContent = currentFile && currentFile.file != null - ? await getFileOrFolderContent(currentFile.file, this.app.vault) + ? await getFileOrFolderContent(currentFile.file, this.app.vault, this.app) : undefined // Check if current file content should be included @@ -647,7 +666,7 @@ ${customInstruction} private async getCurrentFileMessage( currentFile: TFile, ): Promise { - const fileContent = await readTFileContent(currentFile, this.app.vault) + const fileContent = await readTFileContent(currentFile, this.app.vault, this.app) return { role: 'user', content: `# Inputs @@ -669,7 +688,7 @@ ${fileContent} return null; } - const fileContent = await readTFileContent(currentFile, this.app.vault); + const fileContent = await readTFileContent(currentFile, this.app.vault, this.app); const lines = fileContent.split('\n'); // 计算上下文范围,并处理边界情况 @@ -743,6 +762,12 @@ When writing out new markdown blocks, remember not to include "line_number|" at return linesWithNumbers.join('\n') } + + private async getPdfContent(file: TFile): Promise { + return await parsePdfContent(file, this.app) + } + + /** * TODO: Improve markdown conversion logic * - filter visually hidden elements @@ -763,4 +788,26 @@ ${transcript.map((t) => `${t.offset}: ${t.text}`).join('\n')}` return htmlToMarkdown(response.text) } + + private async callMcpToolGetWebsiteContent(url: string, mcpHub: McpHub | null): Promise { + if (isVideoUrl(url)) { + return this.callMcpToolConvertVideo(url, mcpHub) + } + return this.callMcpToolFetchUrlContent(url, mcpHub) + } + + private async callMcpToolConvertVideo(url: string, mcpHub: McpHub | null): Promise { + // TODO: implement + return '' + } + + private async callMcpToolFetchUrlContent(url: string, mcpHub: McpHub | null): Promise { + // TODO: implement + return '' + } + + private async callMcpToolConvertDocument(file: TFile, mcpHub: McpHub | null): Promise { + // TODO: implement + return '' + } } diff --git a/src/utils/video-detector.test.ts b/src/utils/video-detector.test.ts new file mode 100644 index 0000000..b197ad9 --- /dev/null +++ b/src/utils/video-detector.test.ts @@ -0,0 +1,107 @@ +import { + extractVideoId, + getSupportedVideoProviders, + getVideoProvider, + isBilibiliUrl, + isTikTokUrl, + isVideoUrl, + isVimeoUrl +} from './video-detector' + +describe('video-detector', () => { + describe('isVideoUrl', () => { + it('should correctly identify YouTube URLs', () => { + expect(isVideoUrl('https://www.youtube.com/watch?v=dQw4w9WgXcQ')).toBe(true) + expect(isVideoUrl('https://youtu.be/dQw4w9WgXcQ')).toBe(true) + }) + + it('should correctly identify Bilibili URLs', () => { + expect(isVideoUrl('https://www.bilibili.com/video/BV1GJ411x7h7')).toBe(true) + expect(isVideoUrl('https://b23.tv/BV1GJ411x7h7')).toBe(true) + }) + + it('should correctly identify Vimeo URLs', () => { + expect(isVideoUrl('https://vimeo.com/123456789')).toBe(true) + }) + + it('should correctly identify TikTok URLs', () => { + expect(isVideoUrl('https://www.tiktok.com/@username/video/1234567890')).toBe(true) + expect(isVideoUrl('https://vm.tiktok.com/ZMeABCDEF/')).toBe(true) + }) + + it('should correctly identify video file URLs', () => { + expect(isVideoUrl('https://example.com/video.mp4')).toBe(true) + expect(isVideoUrl('https://example.com/movie.avi?t=123')).toBe(true) + expect(isVideoUrl('https://example.com/clip.webm')).toBe(true) + }) + + it('should correctly reject non-video URLs', () => { + expect(isVideoUrl('https://www.google.com')).toBe(false) + expect(isVideoUrl('https://github.com/user/repo')).toBe(false) + expect(isVideoUrl('https://docs.google.com/document/123')).toBe(false) + }) + }) + + describe('getVideoProvider', () => { + it('should correctly identify YouTube provider', () => { + expect(getVideoProvider('https://www.youtube.com/watch?v=dQw4w9WgXcQ')).toBe('youtube') + expect(getVideoProvider('https://youtu.be/dQw4w9WgXcQ')).toBe('youtube') + }) + + it('should correctly identify Bilibili provider', () => { + expect(getVideoProvider('https://www.bilibili.com/video/BV1GJ411x7h7')).toBe('bilibili') + }) + + it('should correctly identify Vimeo provider', () => { + expect(getVideoProvider('https://vimeo.com/123456789')).toBe('vimeo') + }) + + it('should return null for non-video URLs', () => { + expect(getVideoProvider('https://www.google.com')).toBeNull() + expect(getVideoProvider('https://github.com/user/repo')).toBeNull() + }) + }) + + describe('extractVideoId', () => { + it('should extract YouTube video IDs', () => { + expect(extractVideoId('https://www.youtube.com/watch?v=dQw4w9WgXcQ')).toBe('dQw4w9WgXcQ') + expect(extractVideoId('https://youtu.be/dQw4w9WgXcQ')).toBe('dQw4w9WgXcQ') + }) + + it('should extract Vimeo video IDs', () => { + expect(extractVideoId('https://vimeo.com/123456789')).toBe('123456789') + }) + + it('should return null for non-video URLs', () => { + expect(extractVideoId('https://www.google.com')).toBeNull() + }) + }) + + describe('platform-specific detectors', () => { + it('should correctly detect Bilibili URLs', () => { + expect(isBilibiliUrl('https://www.bilibili.com/video/BV1GJ411x7h7')).toBe(true) + expect(isBilibiliUrl('https://www.youtube.com/watch?v=123')).toBe(false) + }) + + it('should correctly detect Vimeo URLs', () => { + expect(isVimeoUrl('https://vimeo.com/123456789')).toBe(true) + expect(isVimeoUrl('https://www.youtube.com/watch?v=123')).toBe(false) + }) + + it('should correctly detect TikTok URLs', () => { + expect(isTikTokUrl('https://www.tiktok.com/@user/video/123')).toBe(true) + expect(isTikTokUrl('https://www.youtube.com/watch?v=123')).toBe(false) + }) + }) + + describe('getSupportedVideoProviders', () => { + it('should return an array of supported providers', () => { + const providers = getSupportedVideoProviders() + expect(Array.isArray(providers)).toBe(true) + expect(providers.length).toBeGreaterThan(0) + expect(providers).toContain('youtube') + expect(providers).toContain('bilibili') + expect(providers).toContain('vimeo') + }) + }) +}) diff --git a/src/utils/video-detector.ts b/src/utils/video-detector.ts new file mode 100644 index 0000000..e315a5d --- /dev/null +++ b/src/utils/video-detector.ts @@ -0,0 +1,142 @@ +/** + * 视频平台URL检测工具 + * 支持多种主流视频平台的URL识别 + */ + +// 各种视频平台的正则表达式 +const VIDEO_PATTERNS = { + // YouTube + youtube: /(?:youtube\.com\/(?:[^/]+\/.+\/|(?:v|e(?:mbed)?)\/|.*[?&]v=)|youtu\.be\/)([^"&?/\s]{11})/i, + + // Bilibili + bilibili: /(?:bilibili\.com\/video\/|b23\.tv\/)[A-Za-z0-9]+/i, + + // Vimeo + vimeo: /(?:vimeo\.com\/)([0-9]+)/i, + + // Dailymotion + dailymotion: /(?:dailymotion\.com\/video\/|dai\.ly\/)([A-Za-z0-9]+)/i, + + // TikTok + tiktok: /(?:tiktok\.com\/@[^/]+\/video\/|vm\.tiktok\.com\/)[A-Za-z0-9]+/i, + + // Twitch + twitch: /(?:twitch\.tv\/videos\/|clips\.twitch\.tv\/)[A-Za-z0-9]+/i, + + // 腾讯视频 + tencent: /(?:v\.qq\.com\/x\/cover\/|v\.qq\.com\/x\/page\/)[A-Za-z0-9]+/i, + + // 爱奇艺 + iqiyi: /(?:iqiyi\.com\/v_)[A-Za-z0-9]+/i, + + // 优酷 + youku: /(?:youku\.com\/v_show\/id_)[A-Za-z0-9]+/i, + + // Facebook/Meta + facebook: /(?:facebook\.com\/watch\/|fb\.watch\/)[A-Za-z0-9]+/i, + + // Instagram + instagram: /(?:instagram\.com\/(?:p|reel)\/)[A-Za-z0-9_-]+/i, + + // Twitter/X + twitter: /(?:twitter\.com\/[^/]+\/status\/|x\.com\/[^/]+\/status\/)[0-9]+/i, + + // 抖音 + douyin: /(?:douyin\.com\/video\/)[0-9]+/i, + + // 快手 + kuaishou: /(?:kuaishou\.com\/short-video\/)[A-Za-z0-9]+/i, + + // 小红书 + xiaohongshu: /(?:xiaohongshu\.com\/explore\/)[A-Za-z0-9]+/i, + + // 微博视频 + weibo: /(?:weibo\.com\/[^/]+\/[A-Za-z0-9]+|weibo\.cn\/sinaurl)/i, + + // Rumble + rumble: /(?:rumble\.com\/)[A-Za-z0-9_-]+/i, + + // Odysee + odysee: /(?:odysee\.com\/@[^/]+\/)[A-Za-z0-9_-]+/i, + + // JW Player (通用嵌入式播放器) + jwplayer: /(?:jwplayer\.com\/players\/)[A-Za-z0-9_-]+/i, + + // 通用视频文件扩展名 + videoFile: /\.(mp4|avi|mov|wmv|flv|webm|mkv|m4v|3gp|ogv)(\?.*)?$/i, + + // 通用视频流媒体 + streaming: /(?:stream|live|video|watch|play).*\.(m3u8|mpd|f4m)(\?.*)?$/i +} + +export type VideoProvider = keyof typeof VIDEO_PATTERNS + +/** + * 检测URL是否为视频内容 + * @param url 要检测的URL + * @returns 是否为视频URL + */ +export function isVideoUrl(url: string): boolean { + return Object.values(VIDEO_PATTERNS).some(pattern => pattern.test(url)) +} + +/** + * 检测URL属于哪个视频平台 + * @param url 要检测的URL + * @returns 视频平台名称,如果不是视频URL则返回null + */ +export function getVideoProvider(url: string): VideoProvider | null { + for (const [provider, pattern] of Object.entries(VIDEO_PATTERNS)) { + if (pattern.test(url)) { + return provider as VideoProvider + } + } + return null +} + +/** + * 检测特定平台的视频URL + * @param url 要检测的URL + * @param provider 视频平台 + * @returns 是否为指定平台的视频URL + */ +export function isVideoUrlFromProvider(url: string, provider: VideoProvider): boolean { + const pattern = VIDEO_PATTERNS[provider] + return pattern ? pattern.test(url) : false +} + +/** + * 从URL中提取视频ID(如果可能) + * @param url 视频URL + * @returns 视频ID或null + */ +export function extractVideoId(url: string): string | null { + const provider = getVideoProvider(url) + if (!provider) return null + + const pattern = VIDEO_PATTERNS[provider] + const match = url.match(pattern) + + // 返回第一个捕获组(如果存在) + return match && match[1] ? match[1] : null +} + +/** + * 获取支持的视频平台列表 + * @returns 支持的视频平台名称数组 + */ +export function getSupportedVideoProviders(): VideoProvider[] { + return Object.keys(VIDEO_PATTERNS) as VideoProvider[] +} + +// 为了向后兼容,保留原有的YouTube检测函数 +export function isYoutubeUrl(url: string): boolean { + return isVideoUrlFromProvider(url, 'youtube') +} + +// 导出常用的视频平台检测函数 +export const isYouTubeUrl = isYoutubeUrl // 别名 +export const isBilibiliUrl = (url: string) => isVideoUrlFromProvider(url, 'bilibili') +export const isVimeoUrl = (url: string) => isVideoUrlFromProvider(url, 'vimeo') +export const isTikTokUrl = (url: string) => isVideoUrlFromProvider(url, 'tiktok') +export const isTwitchUrl = (url: string) => isVideoUrlFromProvider(url, 'twitch') diff --git a/src/utils/web-search.ts b/src/utils/web-search.ts index 1777896..ac65ffc 100644 --- a/src/utils/web-search.ts +++ b/src/utils/web-search.ts @@ -5,6 +5,7 @@ import { htmlToMarkdown, requestUrl } from 'obsidian'; import { JINA_BASE_URL, SERPER_BASE_URL } from '../constants'; import { RAGEngine } from '../core/rag/rag-engine'; +import { isVideoUrl, getVideoProvider } from './video-detector'; import { YoutubeTranscript, isYoutubeUrl } from './youtube-transcript'; @@ -172,18 +173,37 @@ async function filterByEmbedding(query: string, results: SearchResult[], ragEngi } async function fetchByLocalTool(url: string): Promise { - if (isYoutubeUrl(url)) { - // TODO: pass language based on user preferences - const { title, transcript } = - await YoutubeTranscript.fetchTranscriptAndMetadata(url) + // 检查是否为视频内容 + if (isVideoUrl(url)) { + const provider = getVideoProvider(url) + + // 对于YouTube,使用现有的转录功能 + if (provider === 'youtube') { + try { + // TODO: pass language based on user preferences + const { title, transcript } = + await YoutubeTranscript.fetchTranscriptAndMetadata(url) - return `Title: ${title} + return `Title: ${title} Video Transcript: ${transcript.map((t) => `${t.offset}: ${t.text}`).join('\n')}` + } catch (error) { + console.warn('Failed to extract YouTube transcript:', error) + // 如果转录失败,返回视频信息提示 + return `Video Content Detected: ${url} +Platform: YouTube +Note: This is a video content. Transcript extraction failed. Please use specialized video processing tools for content analysis.` + } + } + + // 对于其他视频平台,返回视频信息提示 + return `Video Content Detected: ${url} +Platform: ${provider || 'Unknown'} +Note: This is a video content. Please use specialized video processing tools for content analysis.` } + // 非视频内容,使用常规方式获取网页内容 const response = await requestUrl({ url }) - return htmlToMarkdown(response.text) } @@ -236,7 +256,8 @@ async function fetchByJina(url: string, apiKey: string): Promise { export async function fetchUrlContent(url: string, apiKey: string): Promise { try { - if (isYoutubeUrl(url)) { + // 如果是视频内容,直接使用本地工具处理 + if (isVideoUrl(url)) { return await fetchByLocalTool(url); } let content: string | null = null;