VSCode Extension Bugfix (openvinotoolkit#728)

Add timeout handling on backend Fix Pasring of Complex Types for Docstring Generation Fix completion acceptance that starts with tab Update README.md Update settings description
apaniukov · Sep 18, 2023 · cabe453 · cabe453
1 parent 57388ff
commit cabe453
Show file tree

Hide file tree

Showing 18 changed files with 218 additions and 95 deletions.
diff --git a/modules/openvino_code/README.md b/modules/openvino_code/README.md
@@ -1,23 +1,50 @@
 # OpenVINO Code - VSCode extension for AI code completion with OpenVINO™
 
-VSCode extension for helping developers writing code with AI code assistant. 
-OpenVINO Code is working with Large Language Model for Code (Code LLM) deployed on local server 
+VSCode extension for helping developers writing code with AI code assistant.
+OpenVINO Code is working with Large Language Model for Code (Code LLM) deployed on local server
 or remote server using [Remote Explorer](https://marketplace.visualstudio.com/items?itemName=ms-vscode.remote-explorer).
 
 OpenVINO Code provides the following features:
-- Inline Code Completion
-- Summarization via docstring
 
+- Inline Code Completion
+- Summarization via Docstring
 
 ## Working with Extension
 
-1. Create a new python file
-2. Try typing `def main():`
-3. Press shortcut button `ctrl+alt+space` for code completion
+### Starting Server
+
+On the extension side panel, choose your preferred model from the available options. 
+The features supported by the selected model, which will be displayed under the model selector.
+
+Once the server is up and running, you can access instructions for utilizing various functions available with selected model on the sidebar of the extension.
+Now you can check the server's status and connection status. 
+Additionally, connection status shown on the VSCode Status Bar.
+To check the connection manually, use the `Check Connection` button located on the side panel. 
+
+### Code Completion
+
+1. Create a new Python file or open an existing one.
+1. Type `def main():` or place the cursor where you'd like code suggestions to be generated.
+1. Press the keyboard shortcut `Ctrl+Alt+Space` or click the `Generate Code Completion` button located in the side panel.
+1. Use the `Tab` key to accept the entire suggestion or `Ctrl`+`Right Arrow` to accept it word by word. To decline the suggestion, press `Esc`.
+
+You can customize the length of the generated code by adjusting `Max New Tokens` and `Min New Tokens` parameters in the extension settings. 
+The number of generated tokens is also influenced by the `Server Request Timeout` setting.
+
+To enable streaming generation mode, check the `Stream Inline Completion` checkbox in the extension settings.
+This mode allows you to immediately receive model output and avoid problems with server response timeouts.
+
+### Summarization via Docstring Generation
+
+To generate function docstring start typing `"""` or `'''` right under function signature and choose `Generate Docstring`.
+You can select the desired type of quotes in the extension settings.
+
+The model can generate docstring in Code Completion mode, but in this case it is impossible to control the result. 
+In the docstring generation mode, various popular templates are available in the settings that will guide the model output.
 
-### Checking output
+### Monitoring Extension Output
 
-You can see input to and output from the code generation API:
+To examine the input and output from the code generation API, follow these steps:
 
-1. Open VSCode Side Panel
-2. Click `Show Server Log` or `Show Extension Log`
+1. Open OpenVINO Code Side Panel
+1. Choose between two options: `Show Server Log` or `Show Extension Log`.
diff --git a/modules/openvino_code/openvino-code-completion-0.0.3.vsix b/modules/openvino_code/openvino-code-completion-0.0.3.vsix
diff --git a/modules/openvino_code/openvino-code-completion-0.0.4.vsix b/modules/openvino_code/openvino-code-completion-0.0.4.vsix
diff --git a/modules/openvino_code/package-lock.json b/modules/openvino_code/package-lock.json
diff --git a/modules/openvino_code/package.json b/modules/openvino_code/package.json
@@ -1,7 +1,7 @@
 {
   "publisher": "OpenVINO",
   "name": "openvino-code-completion",
-  "version": "0.0.3",
+  "version": "0.0.4",
   "displayName": "OpenVINO Code Completion",
   "description": "VSCode extension for AI code completion with OpenVINO",
   "icon": "media/logo.png",
@@ -186,76 +186,52 @@
             "type": "integer",
             "exclusiveMinimum": 0,
             "default": 30,
-            "markdownDescription": "Server request timeout in seconds after which request will be aborted."
+            "markdownDescription": "Server request timeout in seconds after which request will be aborted. Constrains the number of generated tokens in non-streaming mode."
+          },
+          "openvinoCode.minNewTokens": {
+            "order": 3,
+            "type": "number",
+            "default": 1,
+            "description": "Minimum of new generated tokens."
+          },
+          "openvinoCode.maxNewTokens": {
+            "order": 3,
+            "type": "number",
+            "default": 100,
+            "description": "Maximum of new generated tokens."
           },
           "openvinoCode.streamInlineCompletion": {
             "order": 3,
             "type": "boolean",
             "default": "false",
             "description": "When checked inline complention will be generated in streaming mode"
           },
-          "openvinoCode.fillInTheMiddleMode": {
-            "order": 4,
-            "type": "boolean",
-            "default": false,
-            "markdownDescription": "When checked, text before (above) and after (below) the cursor will be used for completion generation. When unckecked, only text before (above) the cursor will be used."
-          },
           "openvinoCode.temperature": {
-            "order": 5,
+            "order": 4,
             "type": "number",
             "default": 0.2,
             "description": "Non-zero value. The higher the value, the more diverse the code suggestions and the lower temperature emphasizes the most likely words."
           },
           "openvinoCode.topK": {
-            "order": 5,
+            "order": 4,
             "type": "integer",
             "default": 10,
             "description": "Select the next word during suggestion generation from the top K candidates. Improves diversity of generated suggestions."
           },
           "openvinoCode.topP": {
-            "order": 5,
+            "order": 4,
             "type": "number",
             "default": 1,
             "description": "A value between 0 and 1. Similar to Top K, it adjusts the number of candidate words based on their probability. Candidates will be added for selection until the cumulative probability exceeds P."
           },
-          "openvinoCode.minNewTokens": {
-            "order": 6,
+          "openvinoCode.repetitionPenalty": {
+            "order": 4,
             "type": "number",
             "default": 1,
-            "description": "Minimum of new generated tokens."
-          },
-          "openvinoCode.maxNewTokens": {
-            "order": 6,
-            "type": "number",
-            "default": 100,
-            "description": "Maximum of new generated tokens."
-          },
-          "openvinoCode.startToken": {
-            "order": 7,
-            "type": "string",
-            "default": "<fim_prefix>",
-            "markdownDescription": "String that is sent to server is in format: `{startToken}{text above cursor}{middleToken}{text below cursor if fillInTheMiddleMode=true}{endToken}`. Leave `startToken`, `middleToken`, or `endToken` empty if there is no special token for those placements."
-          },
-          "openvinoCode.middleToken": {
-            "order": 8,
-            "type": "string",
-            "default": "<fim_middle>",
-            "markdownDescription": "String that is sent to server is in format: `{startToken}{text above cursor}{middleToken}{text below cursor if fillInTheMiddleMode=true}{endToken}`. Leave `startToken`, `middleToken`, or `endToken` empty if there is no special token for those placements."
-          },
-          "openvinoCode.endToken": {
-            "order": 9,
-            "type": "string",
-            "default": "<fim_suffix>",
-            "markdownDescription": "String that is sent to server is in format: `{startToken}{text above cursor}{middleToken}{text below cursor if fillInTheMiddleMode=true}{endToken}`. Leave `startToken`, `middleToken`, or `endToken` empty if there is no special token for those placements."
-          },
-          "openvinoCode.stopToken": {
-            "order": 10,
-            "type": "string",
-            "default": "<|endoftext|>",
-            "description": "(Optional) Stop token."
+            "description": "A non-negative value that discourages the repetition of the same words. 1.0 means no penalty."
           },
           "openvinoCode.quoteStyle": {
-            "order": 11,
+            "order": 5,
             "type": "string",
             "default": "\"\"\"",
             "enum": [
@@ -265,7 +241,7 @@
             "description": "Style of quote used with generate docstring command"
           },
           "openvinoCode.docstringFormat": {
-            "order": 12,
+            "order": 6,
             "type": "string",
             "default": "google_summary_only",
             "enum": [
@@ -292,6 +268,11 @@
         "key": "escape",
         "mac": "escape",
         "when": "openvinoCode.generating"
+      },
+      {
+        "command": "openvinoCode.acceptInlineCompletion",
+        "key": "tab",
+        "when": "inlineSuggestionVisible && !editorHoverFocused && !editorTabMovesFocus && !suggestWidgetVisible"
       }
     ]
   },

diff --git a/modules/openvino_code/server/src/app.py b/modules/openvino_code/server/src/app.py
@@ -1,5 +1,5 @@
 from time import perf_counter
-from typing import Dict, Union
+from typing import Dict, Optional, Union
 
 from fastapi import Depends, FastAPI, Request
 from fastapi.responses import RedirectResponse, StreamingResponse
@@ -20,6 +20,9 @@ class GenerationParameters(BaseModel):
     max_new_tokens: int = 60
     min_new_tokens: int = 0
 
+    timeout: Optional[int] = None
+    repetition_penalty: float = 1.0
+
 
 class GenerationRequest(BaseModel):
     inputs: str
@@ -110,7 +113,9 @@ async def generate_stream(
 ) -> StreamingResponse:
     generation_request = TypeAdapter(GenerationRequest).validate_python(await request.json())
     logger.info(generation_request)
-    return StreamingResponse(generator.generate_stream(generation_request.inputs, generation_request.parameters.model_dump(), request))
+    return StreamingResponse(
+        generator.generate_stream(generation_request.inputs, generation_request.parameters.model_dump(), request)
+    )
 
 
 @app.post("/api/summarize", status_code=200, response_model=GenerationResponse)

diff --git a/modules/openvino_code/server/src/generators.py b/modules/openvino_code/server/src/generators.py
@@ -4,6 +4,7 @@
 from io import StringIO
 from pathlib import Path
 from threading import Thread
+from time import time
 from typing import Any, Callable, Container, Dict, Generator, List, Optional, Type, Union
 
 import torch
@@ -116,10 +117,14 @@ def __init__(
                     stop_tokens.append(token_id)
             self.summarize_stopping_criteria = StoppingCriteriaList([StopOnTokens(stop_tokens)])
 
-    def __call__(
-        self, input_text: str, parameters: Dict[str, Any], stopping_criteria: Optional[StoppingCriteriaList] = None
-    ) -> str:
+    def __call__(self, input_text: str, parameters: Dict[str, Any]) -> str:
         input_ids = self.tokenizer.encode(input_text, return_tensors="pt")
+
+        stopping_criteria = None
+        if (timeout := parameters.pop("timeout", None)) is not None:
+            stop_on_time = StopOnTime(timeout)
+            stopping_criteria = StoppingCriteriaList([stop_on_time])
+
         prompt_len = input_ids.shape[-1]
         config = GenerationConfig.from_dict({**self.generation_config.to_dict(), **parameters})
         output_ids = self.model.generate(
@@ -128,7 +133,9 @@ def __call__(
         logger.info(f"Number of input tokens: {prompt_len}; generated {len(output_ids)} tokens")
         return self.tokenizer.decode(output_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
 
-    async def generate_stream(self, input_text: str, parameters: Dict[str, Any], request: Request = None):
+    async def generate_stream(
+        self, input_text: str, parameters: Dict[str, Any], request: Optional[Request] = None
+    ) -> Generator[str, None, None]:
         input_ids = self.tokenizer.encode(input_text, return_tensors="pt")
         streamer = TextIteratorStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
         parameters["streamer"] = streamer
@@ -149,8 +156,8 @@ async def listen():
                 message = await request.receive()
                 if message.get("type") == "http.disconnect":
                     stop_on_tokens.cancelled = True
-            asyncio.create_task(listen())
 
+            asyncio.create_task(listen())
 
         listen_thread = Thread(target=listen_for_disconnect)
         # thread.run doesn't actually start a new thread
@@ -192,7 +199,6 @@ def generate_between(
 
             decoded = self.tokenizer.decode(prompt[0, prev_len:], skip_special_tokens=True)
             buffer.write(decoded.lstrip(" "))  # hack to delete leadding spaces if there are any
-
         buffer.write(input_parts[-1])
         return buffer.getvalue()
 
@@ -277,3 +283,25 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwa
         if self.cancelled:
             return True
         return torch.any(torch.eq(input_ids[0, -1], self.token_ids)).item()
+
+
+class StopOnTime(StoppingCriteria):
+    def __init__(self, timeout: float, budget_reduction: float = 0.99) -> None:
+        self.time = time()
+        self.stop_until = self.time + timeout * budget_reduction
+        self.time_for_prev_token = 0.0
+        self.grow_factor = 0.0
+
+    def __call__(self, *args, **kwargs) -> bool:
+        current_time = time()
+        if current_time > self.stop_until:
+            return True
+
+        elapsed = current_time - self.time
+        if self.time_for_prev_token > 0:
+            self.grow_factor = elapsed / self.time_for_prev_token
+
+        self.time_for_prev_token = elapsed
+        self.time = current_time
+
+        return self.stop_until < current_time + self.time_for_prev_token * self.grow_factor
diff --git a/modules/openvino_code/src/configuration.ts b/modules/openvino_code/src/configuration.ts
@@ -1,5 +1,6 @@
 import { ModelName } from '@shared/model';
-import { WorkspaceConfiguration } from 'vscode';
+import { WorkspaceConfiguration, workspace } from 'vscode';
+import { CONFIG_KEY } from './constants';
 
 /**
  * Extension configuration should match `contributes.configuration` properties in package.json
@@ -13,6 +14,7 @@ export type CustomConfiguration = {
   temperature: number;
   topK: number;
   topP: number;
+  repetitionPenalty: number;
   minNewTokens: number;
   maxNewTokens: number;
   startToken: string;
@@ -25,3 +27,53 @@ export type CustomConfiguration = {
 };
 
 export type ExtensionConfiguration = WorkspaceConfiguration & CustomConfiguration;
+
+const hiddenConfigurations = {
+  'openvinoCode.fillInTheMiddleMode': {
+    order: 4,
+    type: 'boolean',
+    default: false,
+    markdownDescription:
+      'When checked, text before (above) and after (below) the cursor will be used for completion generation. When unckecked, only text before (above) the cursor will be used.',
+  },
+  'openvinoCode.startToken': {
+    order: 7,
+    type: 'string',
+    default: '<fim_prefix>',
+    markdownDescription:
+      'String that is sent to server is in format: `{startToken}{text above cursor}{middleToken}{text below cursor if fillInTheMiddleMode=true}{endToken}`. Leave `startToken`, `middleToken`, or `endToken` empty if there is no special token for those placements.',
+  },
+  'openvinoCode.middleToken': {
+    order: 8,
+    type: 'string',
+    default: '<fim_middle>',
+    markdownDescription:
+      'String that is sent to server is in format: `{startToken}{text above cursor}{middleToken}{text below cursor if fillInTheMiddleMode=true}{endToken}`. Leave `startToken`, `middleToken`, or `endToken` empty if there is no special token for those placements.',
+  },
+  'openvinoCode.endToken': {
+    order: 9,
+    type: 'string',
+    default: '<fim_suffix>',
+    markdownDescription:
+      'String that is sent to server is in format: `{startToken}{text above cursor}{middleToken}{text below cursor if fillInTheMiddleMode=true}{endToken}`. Leave `startToken`, `middleToken`, or `endToken` empty if there is no special token for those placements.',
+  },
+  'openvinoCode.stopToken': {
+    order: 10,
+    type: 'string',
+    default: '<|endoftext|>',
+    description: '(Optional) Stop token.',
+  },
+};
+
+const configurationDefaults: Partial<CustomConfiguration> = {
+  fillInTheMiddleMode: hiddenConfigurations['openvinoCode.fillInTheMiddleMode'].default,
+  startToken: hiddenConfigurations['openvinoCode.startToken'].default,
+  middleToken: hiddenConfigurations['openvinoCode.middleToken'].default,
+  endToken: hiddenConfigurations['openvinoCode.endToken'].default,
+  stopToken: hiddenConfigurations['openvinoCode.stopToken'].default,
+};
+
+export const getConfig = () => ({
+  ...configurationDefaults,
+  ...(workspace.getConfiguration(CONFIG_KEY) as ExtensionConfiguration),
+});
diff --git a/modules/openvino_code/src/constants.ts b/modules/openvino_code/src/constants.ts
@@ -18,6 +18,7 @@ export const COMMANDS = {
   FOCUS_SIDE_PANEL: `${SIDE_PANEL_VIEW_ID}.focus`,
   OPEN_SETTINGS: 'openvinoCode.openSettings',
   GENERATE_INLINE_COPMLETION: 'openvinoCode.generateInlineCompletion',
+  ACCEPT_INLINE_COMPLETION: 'openvinoCode.acceptInlineCompletion',
   GENERATE_DOC_STRING: 'openvinoCode.generateDocstring',
   CHECK_CONNECTION: 'openvinoCode.checkConnection',
   START_SERVER_NATIVE: 'openvinoCode.startServerNative',