Skip to content

Commit

Permalink
Automatically sanitize input sent to copilot API
Browse files Browse the repository at this point in the history
Removes all non-standard characters to avoid issues with copilot API
(especially claude).

Possibly related: #464

Signed-off-by: Tomas Slusny <[email protected]>
  • Loading branch information
deathbeam committed Nov 10, 2024
1 parent 18d5175 commit eaa5341
Showing 1 changed file with 22 additions and 10 deletions.
32 changes: 22 additions & 10 deletions lua/CopilotChat/copilot.lua
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,16 @@ local function machine_id()
return hex
end

local function sanitize_text(text)
if not text then
return ''
end

-- Keep basic ASCII, numbers, common punctuation and whitespace
-- Remove everything else (e.g. special characters, emojis)
return text:gsub('[^%w%p%s\n]', '')
end

local function find_config_path()
local config = vim.fn.expand('$XDG_CONFIG_HOME')
if config and vim.fn.isdirectory(config) > 0 then
Expand Down Expand Up @@ -561,15 +571,17 @@ function Copilot:ask(prompt, opts)

-- Generate the request
local url = 'https://api.githubcopilot.com/chat/completions'
local body = vim.json.encode(
generate_ask_request(
self.history,
prompt,
embeddings_message,
selection_message,
system_prompt,
model,
temperature
local body = sanitize_text(
vim.json.encode(
generate_ask_request(
self.history,
prompt,
embeddings_message,
selection_message,
system_prompt,
model,
temperature
)
)
)

Expand Down Expand Up @@ -744,7 +756,7 @@ function Copilot:embed(inputs, opts)

local jobs = {}
for _, chunk in ipairs(chunks) do
local body = vim.json.encode(generate_embedding_request(chunk, model))
local body = sanitize_text(vim.json.encode(generate_embedding_request(chunk, model)))

table.insert(jobs, function(resolve)
local headers = generate_headers(self.token.token, self.sessionid, self.machineid)
Expand Down

0 comments on commit eaa5341

Please sign in to comment.