diff --git a/README.md b/README.md index 9caec9e2..c83afdc8 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ - _(Optional)_ [git](https://git-scm.com/) - Used for fetching git diffs for `git` context - For Arch Linux users, you can install [`git`](https://archlinux.org/packages/extra/x86_64/git) from the official repositories - For other systems, use your package manager to install `git`. For windows use the installer provided from git site -- _(Optional)_ [lynx](https://lynx.invisible-island.net/) - Used for fetching textual representation of URLs for `url` context +- _(Optional)_ [lynx](https://lynx.invisible-island.net/) - Used for improved fetching of URLs for `url` context - For Arch Linux users, you can install [`lynx`](https://archlinux.org/packages/extra/x86_64/lynx) from the official repositories - For other systems, use your package manager to install `lynx`. For windows use the installer provided from lynx site @@ -281,7 +281,7 @@ Default contexts are: - `git` - Requires `git`. Includes current git diff in chat context. Supports input (default unstaged). - `git:unstaged` - Includes unstaged changes in chat context. - `git:staged` - Includes staged changes in chat context. -- `url` - Requires `lynx`. Includes content of provided URL in chat context. Supports input. +- `url` - Includes content of provided URL in chat context. Supports input. - `register` - Includes contents of register in chat context. Supports input (default +, e.g clipboard). You can define custom contexts like this: diff --git a/lua/CopilotChat/config.lua b/lua/CopilotChat/config.lua index 4c760d47..d2a1974f 100644 --- a/lua/CopilotChat/config.lua +++ b/lua/CopilotChat/config.lua @@ -248,7 +248,7 @@ return { end, }, url = { - description = 'Requires `lynx`. Includes content of provided URL in chat context. Supports input.', + description = 'Includes content of provided URL in chat context. Supports input.', input = function(callback) vim.ui.input({ prompt = 'Enter URL> ', diff --git a/lua/CopilotChat/context.lua b/lua/CopilotChat/context.lua index 026a806c..2921ad86 100644 --- a/lua/CopilotChat/context.lua +++ b/lua/CopilotChat/context.lua @@ -423,11 +423,45 @@ function M.url(url) local content = url_cache[url] if not content then notify.publish(notify.STATUS, 'Fetching ' .. url) - local out = utils.system({ 'lynx', '-dump', url }) - if not out or out.code ~= 0 then - return nil + + local ok, out = async.util.apcall(utils.system, { 'lynx', '-dump', url }) + if ok and out and out.code == 0 then + -- Use lynx to fetch content + content = out.stdout + else + -- Fallback to curl if lynx fails + local response = utils.curl_get(url, { raw = { '-L' } }) + if not response or not response.body then + return nil + end + + content = vim.trim(response + .body + -- Remove script, style tags and their contents first + :gsub( + '', + '' + ) + :gsub('', '') + -- Remove XML/CDATA in one go + :gsub('', '') + -- Remove all HTML tags (both opening and closing) in one go + :gsub( + '<%/?%w+[^>]*>', + ' ' + ) + -- Handle common HTML entities + :gsub('&(%w+);', { + nbsp = ' ', + lt = '<', + gt = '>', + amp = '&', + quot = '"', + }) + -- Remove any remaining HTML entities (numeric or named) + :gsub('&#?%w+;', '')) end - content = out.stdout + url_cache[url] = content end diff --git a/lua/CopilotChat/health.lua b/lua/CopilotChat/health.lua index d8ffed6c..3908c74c 100644 --- a/lua/CopilotChat/health.lua +++ b/lua/CopilotChat/health.lua @@ -82,7 +82,7 @@ function M.check() local lynx_version = run_command('lynx', '-version') if lynx_version == false then warn( - 'lynx: missing, optional for fetching url contents. See "https://lynx.invisible-island.net/".' + 'lynx: missing, optional for improved fetching of url contents. See "https://lynx.invisible-island.net/".' ) else ok('lynx: ' .. lynx_version)