From 60c823a000507e6667670f96f1607ba2ea160c57 Mon Sep 17 00:00:00 2001
From: aaronvg <aaron@boundaryml.com>
Date: Tue, 19 Nov 2024 13:41:14 -0800
Subject: [PATCH] Fix template string highlights (#1182)

<!-- ELLIPSIS_HIDDEN -->



> [!IMPORTANT]
> Fixes template string highlighting in BAML files and adds a
`Completion` function for handling template string completions.
>
>   - **Syntax Highlighting**:
> - Fixes template string highlighting in `baml.tmLanguage.json` by
adjusting the regex patterns for `template_string_declaration` and
`template_string_body`.
> - Updates `block_string_pair` and `function_declaration2` patterns to
handle multi-line strings correctly.
>   - **New Functionality**:
> - Adds `Completion` function to handle template string completions in
`async_client.py`, `sync_client.py`, and `client.rb`.
>   - **Testing**:
> - Adds `template_string.baml` test file to verify template string
handling.
> - Updates `inlinedbaml.py` and `inlined.rb` to include the new test
file.
>
> <sup>This description was created by </sup>[<img alt="Ellipsis"
src="https://img.shields.io/badge/Ellipsis-blue?color=175173">](https://www.ellipsis.dev?ref=BoundaryML%2Fbaml&utm_source=github&utm_medium=referral)<sup>
for 5ce06a51e95be0c4baa3b0fdb8a9401626fec224. It will automatically
update as commits are pushed.</sup>

<!-- ELLIPSIS_HIDDEN -->
---
 .../template_string/template_string.baml      | 151 ++++++++++++++++++
 .../python/baml_client/async_client.py        |  55 +++++++
 integ-tests/python/baml_client/inlinedbaml.py |   1 +
 integ-tests/python/baml_client/sync_client.py |  55 +++++++
 integ-tests/ruby/baml_client/client.rb        |  67 ++++++++
 integ-tests/ruby/baml_client/inlined.rb       |   1 +
 .../typescript/baml_client/async_client.ts    |  58 +++++++
 .../typescript/baml_client/inlinedbaml.ts     |   1 +
 .../typescript/baml_client/sync_client.ts     |  25 +++
 .../packages/syntaxes/baml.tmLanguage.json    |  47 +++---
 10 files changed, 439 insertions(+), 22 deletions(-)
 create mode 100644 integ-tests/baml_src/test-files/template_string/template_string.baml
diff --git a/integ-tests/baml_src/test-files/template_string/template_string.baml b/integ-tests/baml_src/test-files/template_string/template_string.baml
new file mode 100644
index 000000000..42d344a04
--- /dev/null
+++ b/integ-tests/baml_src/test-files/template_string/template_string.baml
@@ -0,0 +1,151 @@
+
+function Completion(prefix: string, suffix: string, language: string) -> string {
+  client "openai/gpt-4o"
+  prompt ##"
+    {{ _.role("system", cache_control={"type": "ephemeral"}) }}
+
+    You are a programmer that suggests code completions in the %INSERT-HERE% part below with  {{ language }} code. Only output the code that replaces %INSERT-HERE% part, NOT THE SUFFIX OR PREFIX. Respond only with code, and with no markdown formatting.
+
+    Try to complete a whole section inside curlies when you can.
+
+    {% if language == "baml" %}
+    {{ BAMLBackground2()}}
+
+    Examples:
+    INPUT:
+    ---
+    class MyObject {{"{"}}%INSERT-HERE%
+    }
+    ---
+    OUTPUT:
+    ---
+      property string
+    ---
+    In this example, we just inserted one line, with tabs for a fake property to aid the user.
+
+    INPUT:
+    ---
+    function foo(input: string) -> string {{"{"}} %INSERT-HERE%
+      prompt #"
+        {{ "{{ input }}" }}
+      "#
+    }
+    ---
+    OUTPUT:
+    ---
+      client "openai/gpt-4o"
+    ---
+    In this example, no need to add the prompt because it was part of the suffix after INSERT-HERE
+
+    INPUT:
+    OUTPUT: N/A
+    In this example there was nothing to complete, so we returned N/A.
+
+    Ignore the "---" in your outputs.
+    {% endif %}
+
+
+    {{ _.role("user") }}
+    INPUT:
+    ---
+    {{ prefix }}%INSERT-HERE%{{ suffix }}
+    ---
+  "##
+}
+
+test CompletionTest3 {
+  functions [Completion]
+  args {
+    prefix ##"function foo(input: string) -> string {
+      client "openai/gpt-4o"
+      prompt #"
+    "##
+    suffix ""
+    language "baml"
+  }
+}
+
+test CompletionTest2 {
+  functions [Completion]
+  args {
+    prefix "function foo(input: string) -> string {\n"
+    suffix "\n  prompt #\n\""
+    language "baml"
+  }
+}
+ 
+template_string Hi(
+  hello: string,
+  world: string,
+) ##"
+  {{ hello }} {{ world }}
+"##
+
+template_string Hi3(
+  hello: string,
+  world: string,
+) #"
+  {{ hello }} {{ world }}
+"#
+
+template_string BAMLBackground2() ##"
+  <Overview>
+    BAML is a domain-specific language for building LLM prompts as functions.
+      client "openai/gpt-4o"
+      // prompt with jinja syntax inside here. with double curly braces for variables.
+      // make sure to include: {{ "{{ ctx.output_format }}"}} in the prompt, which prints the output schema instructions so the LLM returns the output in the correct format (json or string, etc.). DO NOT write the output schema manually.
+      prompt #"
+        
+      "#
+    }
+
+      3. You do not need to specify to "answer in JSON format". Only write in the prompt brief instruction, and any other task-specific things to keep in mind for the task.
+      4. Write a {{ "{{ _.role(\"user\") }}" }} tag to indicate where the user's inputs start. So if there's a convo you can write
+      #"{{ "{{ _.role(\"user\") }}" }} {{ "{{ some-variable }}" }}#
+    </Prompt>
+  </Functions>
+
+  The @asserts only go in the "output" types. Don't use them in inputs.
+  Do NOT use numbers as confidence intervals if you need to use them. Prefer an enum with descriptions or literals like "high", "medium", "low".
+
+  Dedent all declarations.
+"##
+
+template_string BamlTests() ##"
+  // For image inputs:
+  test ImageTest {
+    functions [MyFunction]
+    args {
+      imageArg {
+        file "../images/test.png"
+        // Optional: media_type "image/png"
+      }
+      // Or using URL:
+      // imageArg {
+      //   url "https://example.com/image.png"
+      // }
+    }
+  }
+
+  // For array/object inputs:
+  test ComplexTest {
+    functions [MyFunction]
+    args {
+      input {
+        name "Complex Object"
+        tags [
+          "tag1",
+          #"
+            Multi-line
+            tag here
+          "#
+        ]
+        status PENDING
+        type "error"
+        count 100
+        enabled false
+        score 7.8
+      }
+    }
+  }
+"##
diff --git a/integ-tests/python/baml_client/async_client.py b/integ-tests/python/baml_client/async_client.py
index d49f1ebd7..ae4f20aa7 100644
--- a/integ-tests/python/baml_client/async_client.py
+++ b/integ-tests/python/baml_client/async_client.py
@@ -349,6 +349,29 @@ async def ClassifyMessage3(
       )
       return cast(types.Category, raw.cast_to(types, types))
     
+    async def Completion(
+        self,
+        prefix: str,suffix: str,language: str,
+        baml_options: BamlCallOptions = {},
+    ) -> str:
+      __tb__ = baml_options.get("tb", None)
+      if __tb__ is not None:
+        tb = __tb__._tb # type: ignore (we know how to use this private attribute)
+      else:
+        tb = None
+      __cr__ = baml_options.get("client_registry", None)
+
+      raw = await self.__runtime.call_function(
+        "Completion",
+        {
+          "prefix": prefix,"suffix": suffix,"language": language,
+        },
+        self.__ctx_manager.get(),
+        tb,
+        __cr__,
+      )
+      return cast(str, raw.cast_to(types, types))
+    
     async def CustomTask(
         self,
         input: str,
@@ -3142,6 +3165,38 @@ def ClassifyMessage3(
         self.__ctx_manager.get(),
       )
     
+    def Completion(
+        self,
+        prefix: str,suffix: str,language: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.BamlStream[Optional[str], str]:
+      __tb__ = baml_options.get("tb", None)
+      if __tb__ is not None:
+        tb = __tb__._tb # type: ignore (we know how to use this private attribute)
+      else:
+        tb = None
+      __cr__ = baml_options.get("client_registry", None)
+
+      raw = self.__runtime.stream_function(
+        "Completion",
+        {
+          "prefix": prefix,
+          "suffix": suffix,
+          "language": language,
+        },
+        None,
+        self.__ctx_manager.get(),
+        tb,
+        __cr__,
+      )
+
+      return baml_py.BamlStream[Optional[str], str](
+        raw,
+        lambda x: cast(Optional[str], x.cast_to(types, partial_types)),
+        lambda x: cast(str, x.cast_to(types, types)),
+        self.__ctx_manager.get(),
+      )
+    
     def CustomTask(
         self,
         input: str,
diff --git a/integ-tests/python/baml_client/inlinedbaml.py b/integ-tests/python/baml_client/inlinedbaml.py
index 4f7d1acb2..5b61d4c61 100644
--- a/integ-tests/python/baml_client/inlinedbaml.py
+++ b/integ-tests/python/baml_client/inlinedbaml.py
@@ -91,6 +91,7 @@
     "test-files/strategies/fallback.baml": "// Happy path fallbacks.\nclient<llm> FaultyClient {\n  provider openai\n  options {\n    model unknown-model\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n\nclient<llm> FallbackClient {\n  provider fallback\n  options {\n    // first 2 clients are expected to fail.\n    strategy [\n      FaultyClient,\n      RetryClientConstant,\n      GPT35\n      Gemini\n\n    ]\n  }\n}\n\nfunction TestFallbackClient() -> string {\n  client FallbackClient\n  // TODO make it return the client name instead\n  prompt #\"\n    Say a haiku about mexico.\n  \"#\n}\n\n// Fallbacks should fail gracefully.\nclient<llm> FaultyAzureClient {\n  provider azure-openai\n  options {\n    model unknown-model\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> SingleFallbackClient {\n  provider fallback\n  options {\n    // first 2 clients are expected to fail.\n    strategy [\n      FaultyAzureClient\n    ]\n  }\n}\n\nfunction TestSingleFallbackClient() -> string {\n  client SingleFallbackClient\n  // TODO make it return the client name instead\n  prompt #\"\n    Say a haiku about mexico.\n  \"#\n}\n",
     "test-files/strategies/retry.baml": "\nretry_policy Exponential {\n  max_retries 3\n  strategy {\n    type exponential_backoff\n  }\n}\n\nretry_policy Constant {\n  max_retries 3\n  strategy {\n    type constant_delay\n    delay_ms 100\n  }\n}\n\nclient<llm> RetryClientConstant {\n  provider openai\n  retry_policy Constant\n  options {\n    model \"gpt-3.5-turbo\"\n    api_key \"blah\"\n  }\n}\n\nclient<llm> RetryClientExponential {\n  provider openai\n  retry_policy Exponential\n  options {\n    model \"gpt-3.5-turbo\"\n    api_key \"blahh\"\n  }\n}\n\nfunction TestRetryConstant() -> string {\n  client RetryClientConstant\n  prompt #\"\n    Say a haiku\n  \"#\n}\n\nfunction TestRetryExponential() -> string {\n  client RetryClientExponential\n  prompt #\"\n    Say a haiku\n  \"#\n}\n",
     "test-files/strategies/roundrobin.baml": "",
+    "test-files/template_string/template_string.baml": "\nfunction Completion(prefix: string, suffix: string, language: string) -> string {\n  client \"openai/gpt-4o\"\n  prompt ##\"\n    {{ _.role(\"system\", cache_control={\"type\": \"ephemeral\"}) }}\n\n    You are a programmer that suggests code completions in the %INSERT-HERE% part below with  {{ language }} code. Only output the code that replaces %INSERT-HERE% part, NOT THE SUFFIX OR PREFIX. Respond only with code, and with no markdown formatting.\n\n    Try to complete a whole section inside curlies when you can.\n\n    {% if language == \"baml\" %}\n    {{ BAMLBackground2()}}\n\n    Examples:\n    INPUT:\n    ---\n    class MyObject {{\"{\"}}%INSERT-HERE%\n    }\n    ---\n    OUTPUT:\n    ---\n      property string\n    ---\n    In this example, we just inserted one line, with tabs for a fake property to aid the user.\n\n    INPUT:\n    ---\n    function foo(input: string) -> string {{\"{\"}} %INSERT-HERE%\n      prompt #\"\n        {{ \"{{ input }}\" }}\n      \"#\n    }\n    ---\n    OUTPUT:\n    ---\n      client \"openai/gpt-4o\"\n    ---\n    In this example, no need to add the prompt because it was part of the suffix after INSERT-HERE\n\n    INPUT:\n    OUTPUT: N/A\n    In this example there was nothing to complete, so we returned N/A.\n\n    Ignore the \"---\" in your outputs.\n    {% endif %}\n\n\n    {{ _.role(\"user\") }}\n    INPUT:\n    ---\n    {{ prefix }}%INSERT-HERE%{{ suffix }}\n    ---\n  \"##\n}\n\ntest CompletionTest3 {\n  functions [Completion]\n  args {\n    prefix ##\"function foo(input: string) -> string {\n      client \"openai/gpt-4o\"\n      prompt #\"\n    \"##\n    suffix \"\"\n    language \"baml\"\n  }\n}\n\ntest CompletionTest2 {\n  functions [Completion]\n  args {\n    prefix \"function foo(input: string) -> string {\\n\"\n    suffix \"\\n  prompt #\\n\\\"\"\n    language \"baml\"\n  }\n}\n \ntemplate_string Hi(\n  hello: string,\n  world: string,\n) ##\"\n  {{ hello }} {{ world }}\n\"##\n\ntemplate_string Hi3(\n  hello: string,\n  world: string,\n) #\"\n  {{ hello }} {{ world }}\n\"#\n\ntemplate_string BAMLBackground2() ##\"\n  <Overview>\n    BAML is a domain-specific language for building LLM prompts as functions.\n      client \"openai/gpt-4o\"\n      // prompt with jinja syntax inside here. with double curly braces for variables.\n      // make sure to include: {{ \"{{ ctx.output_format }}\"}} in the prompt, which prints the output schema instructions so the LLM returns the output in the correct format (json or string, etc.). DO NOT write the output schema manually.\n      prompt #\"\n        \n      \"#\n    }\n\n      3. You do not need to specify to \"answer in JSON format\". Only write in the prompt brief instruction, and any other task-specific things to keep in mind for the task.\n      4. Write a {{ \"{{ _.role(\\\"user\\\") }}\" }} tag to indicate where the user's inputs start. So if there's a convo you can write\n      #\"{{ \"{{ _.role(\\\"user\\\") }}\" }} {{ \"{{ some-variable }}\" }}#\n    </Prompt>\n  </Functions>\n\n  The @asserts only go in the \"output\" types. Don't use them in inputs.\n  Do NOT use numbers as confidence intervals if you need to use them. Prefer an enum with descriptions or literals like \"high\", \"medium\", \"low\".\n\n  Dedent all declarations.\n\"##\n\ntemplate_string BamlTests() ##\"\n  // For image inputs:\n  test ImageTest {\n    functions [MyFunction]\n    args {\n      imageArg {\n        file \"../images/test.png\"\n        // Optional: media_type \"image/png\"\n      }\n      // Or using URL:\n      // imageArg {\n      //   url \"https://example.com/image.png\"\n      // }\n    }\n  }\n\n  // For array/object inputs:\n  test ComplexTest {\n    functions [MyFunction]\n    args {\n      input {\n        name \"Complex Object\"\n        tags [\n          \"tag1\",\n          #\"\n            Multi-line\n            tag here\n          \"#\n        ]\n        status PENDING\n        type \"error\"\n        count 100\n        enabled false\n        score 7.8\n      }\n    }\n  }\n\"##\n",
     "test-files/testing_pipeline/output-format.baml": "class Recipe {\n    ingredients map<string, Quantity>\n    recipe_type \"breakfast\" | \"dinner\"\n}\n\nclass Quantity {\n    amount int | float\n    unit string?\n}\n\nfunction AaaSamOutputFormat(recipe: string) -> Recipe {\n  client GPT35\n  prompt #\"\n    Return this value back to me: {{recipe}}\n\n    {{ctx.output_format(map_style='angle')}}\n  \"#\n}\n\ntest MyOutput {\n    functions [AaaSamOutputFormat]\n    args {\n        recipe #\"\n            Here's a simple recipe for beef stew:\nIngredients:\n\n2 lbs beef chuck, cut into 1-inch cubes\n2 tbsp vegetable oil\n1 onion, diced\n3 carrots, sliced\n2 celery stalks, chopped\n2 potatoes, cubed\n3 cloves garlic, minced\n4 cups beef broth\n1 can (14.5 oz) diced tomatoes\n1 tbsp Worcestershire sauce\n1 tsp dried thyme\n1 bay leaf\nSalt and pepper to taste\n\nInstructions:\n\nSeason beef with salt and pepper. Heat oil in a large pot over medium-high heat. Brown the beef in batches, then set aside.\nIn the same pot, sauté onion, carrots, and celery until softened, about 5 minutes.\nAdd garlic and cook for another minute.\nReturn beef to the pot. Add broth, tomatoes, Worcestershire sauce, thyme, and bay leaf.\nBring to a boil, then reduce heat and simmer covered for 1 hour.\nAdd potatoes and continue simmering for 30-45 minutes, until beef and potatoes are tender.\nRemove bay leaf, adjust seasoning if needed, and serve hot.\n\nWould you like any additional information or variations on this recipe?\n        \"#\n    }\n}\n",
     "test-files/testing_pipeline/resume.baml": "class Resume {\n    name string\n    email string\n    phone string\n    experience Education[]\n    education string[]\n    skills string[]\n}\n\nclass Education {\n    institution string\n    location string\n    degree string\n    major string[]\n    graduation_date string?\n}\n\ntemplate_string AddRole(foo: string) #\"\n    {{ _.role('system')}}\n    You are a {{ foo }}. be nice\n\n    {{ _.role('user') }}\n\"#\n\nclient<llm> TestClient {\n    provider fallback\n    retry_policy Constant\n    options {\n        strategy [\n            Claude\n            GPT35\n            AwsBedrock\n        ]\n    }\n}\n\nclient<llm> Claude2 {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.FOOBAR3\n    max_tokens 1000\n  }\n}\n\nfunction ExtractResume(resume: string, img: image?) -> Resume {\n    client Claude2\n    prompt #\"\n        {{ AddRole(\"Software Engineer\") }}\n\n        Extract data:\n        \n\n        <<<<\n        {{ resume }}\n        <<<<\n\n        {% if img %}\n        {{img}}\n        {% endif %}\n\n        {{ ctx.output_format }}\n    \"#\n}\n\ntest sam_resume {\n    functions [ExtractResume]\n    input {\n        img {\n            url \"https://avatars.githubusercontent.com/u/1016595?v=4\"\n        }\n        resume #\"\n            Sam Lijin\n            he/him |  jobs@sxlijin.com |  sxlijin.github.io | 111-222-3333 | sxlijin |  sxlijin\n\n            Experience\n            Trunk\n            | July 2021 - current\n            Trunk Check | Senior Software Engineer | Services TL, Mar 2023 - current | IC, July 2021 - Feb 2023\n            Proposed, designed, and led a team of 3 to build a web experience for Check (both a web-only onboarding flow and SaaS offerings)\n            Proposed and built vulnerability scanning into Check, enabling it to compete with security products such as Snyk\n            Helped grow Check from <1K users to 90K+ users by focusing on product-led growth\n            Google | Sept 2017 - June 2021\n            User Identity SRE | Senior Software Engineer | IC, Mar 2021 - June 2021\n            Designed an incremental key rotation system to limit the global outage risk to Google SSO\n            Discovered and severed an undocumented Gmail serving dependency on Identity-internal systems\n            Cloud Firestore | Senior Software Engineer | EngProd TL, Aug 2019 - Feb 2021 | IC, Sept 2017 - July 2019\n            Metadata TTL system: backlog of XX trillion records, sustained 1M ops/sec, peaking at 3M ops/sec\n\n            Designed and implemented a logging system with novel observability and privacy requirements\n            Designed and implemented Jepsen-style testing to validate correctness guarantees\n            Datastore Migration: zero downtime, xM RPS and xxPB of data over xM customers and 36 datacenters\n\n            Designed composite index migration, queue processing migration, progressive rollout, fast rollback, and disk stockout mitigations; implemented transaction log replay, state transitions, and dark launch process\n            Designed and implemented end-to-end correctness and performance testing\n            Velocity improvements for 60-eng org\n\n            Proposed and implemented automated rollbacks: got us out of a 3-month release freeze and prevented 5 outages over the next 6 months\n            Proposed and implemented new development and release environments spanning 30+ microservices\n            Incident response for API proxy rollback affecting every Google Cloud service\n\n            Google App Engine Memcache | Software Engineer | EngProd TL, Apr 2019 - July 2019\n            Proposed and led execution of test coverage improvement strategy for a new control plane: reduced rollbacks and ensured strong consistency of a distributed cache serving xxM QPS\n            Designed and implemented automated performance regression testing for two critical serving paths\n            Used to validate Google-wide rollout of AMD CPUs, by proving a 50p latency delta of <10µs\n            Implemented on shared Borg (i.e. vulnerable to noisy neighbors) with <12% variance\n            Miscellaneous | Sept 2017 - June 2021\n            Redesigned the Noogler training on Google-internal storage technologies & trained 2500+ Nooglers\n            Landed multiple google3-wide refactorings, each spanning xxK files (e.g. SWIG to CLIF)\n            Education\n            Vanderbilt University (Nashville, TN) | May 2017 | B.S. in Computer Science, Mathematics, and Political Science\n\n            Stuyvesant HS (New York, NY) | 2013\n\n            Skills\n            C++, Java, Typescript, Javascript, Python, Bash; light experience with Rust, Golang, Scheme\n            gRPC, Bazel, React, Linux\n            Hobbies: climbing, skiing, photography\n        \"#\n    }\n}\n\ntest vaibhav_resume {\n    functions [ExtractResume]\n    input {\n        resume #\"\n            Vaibhav Gupta\n            linkedin/vaigup\n            (972) 400-5279\n            vaibhavtheory@gmail.com\n            EXPERIENCE\n            Google,\n            Software Engineer\n            Dec 2018-Present\n            Seattle, WA\n            •\n            Augmented Reality,\n            Depth Team\n            •\n            Technical Lead for on-device optimizations\n            •\n            Optimized and designed front\n            facing depth algorithm\n            on Pixel 4\n            •\n            Focus: C++ and SIMD on custom silicon\n            \n            \n            EDUCATION\n            University of Texas at Austin\n            Aug 2012-May 2015\n            Bachelors of Engineering, Integrated Circuits\n            Bachelors of Computer Science\n        \"#\n    }\n}",
 }
diff --git a/integ-tests/python/baml_client/sync_client.py b/integ-tests/python/baml_client/sync_client.py
index 107be357c..d1f667c70 100644
--- a/integ-tests/python/baml_client/sync_client.py
+++ b/integ-tests/python/baml_client/sync_client.py
@@ -346,6 +346,29 @@ def ClassifyMessage3(
       )
       return cast(types.Category, raw.cast_to(types, types))
     
+    def Completion(
+        self,
+        prefix: str,suffix: str,language: str,
+        baml_options: BamlCallOptions = {},
+    ) -> str:
+      __tb__ = baml_options.get("tb", None)
+      if __tb__ is not None:
+        tb = __tb__._tb # type: ignore (we know how to use this private attribute)
+      else:
+        tb = None
+      __cr__ = baml_options.get("client_registry", None)
+
+      raw = self.__runtime.call_function_sync(
+        "Completion",
+        {
+          "prefix": prefix,"suffix": suffix,"language": language,
+        },
+        self.__ctx_manager.get(),
+        tb,
+        __cr__,
+      )
+      return cast(str, raw.cast_to(types, types))
+    
     def CustomTask(
         self,
         input: str,
@@ -3140,6 +3163,38 @@ def ClassifyMessage3(
         self.__ctx_manager.get(),
       )
     
+    def Completion(
+        self,
+        prefix: str,suffix: str,language: str,
+        baml_options: BamlCallOptions = {},
+    ) -> baml_py.BamlSyncStream[Optional[str], str]:
+      __tb__ = baml_options.get("tb", None)
+      if __tb__ is not None:
+        tb = __tb__._tb # type: ignore (we know how to use this private attribute)
+      else:
+        tb = None
+      __cr__ = baml_options.get("client_registry", None)
+
+      raw = self.__runtime.stream_function_sync(
+        "Completion",
+        {
+          "prefix": prefix,
+          "suffix": suffix,
+          "language": language,
+        },
+        None,
+        self.__ctx_manager.get(),
+        tb,
+        __cr__,
+      )
+
+      return baml_py.BamlSyncStream[Optional[str], str](
+        raw,
+        lambda x: cast(Optional[str], x.cast_to(types, partial_types)),
+        lambda x: cast(str, x.cast_to(types, types)),
+        self.__ctx_manager.get(),
+      )
+    
     def CustomTask(
         self,
         input: str,
diff --git a/integ-tests/ruby/baml_client/client.rb b/integ-tests/ruby/baml_client/client.rb
index 49a987d31..0bf846671 100644
--- a/integ-tests/ruby/baml_client/client.rb
+++ b/integ-tests/ruby/baml_client/client.rb
@@ -466,6 +466,38 @@ def ClassifyMessage3(
       (raw.parsed_using_types(Baml::Types))
     end
 
+    sig {
+      params(
+        varargs: T.untyped,
+        prefix: String,suffix: String,language: String,
+        baml_options: T::Hash[Symbol, T.any(Baml::TypeBuilder, Baml::ClientRegistry)]
+      ).returns(String)
+    }
+    def Completion(
+        *varargs,
+        prefix:,suffix:,language:,
+        baml_options: {}
+    )
+      if varargs.any?
+        
+        raise ArgumentError.new("Completion may only be called with keyword arguments")
+      end
+      if (baml_options.keys - [:client_registry, :tb]).any?
+        raise ArgumentError.new("Received unknown keys in baml_options (valid keys: :client_registry, :tb): #{baml_options.keys - [:client_registry, :tb]}")
+      end
+
+      raw = @runtime.call_function(
+        "Completion",
+        {
+          prefix: prefix,suffix: suffix,language: language,
+        },
+        @ctx_manager,
+        baml_options[:tb]&.instance_variable_get(:@registry),
+        baml_options[:client_registry],
+      )
+      (raw.parsed_using_types(Baml::Types))
+    end
+
     sig {
       params(
         varargs: T.untyped,
@@ -4262,6 +4294,41 @@ def ClassifyMessage3(
       )
     end
 
+    sig {
+      params(
+        varargs: T.untyped,
+        prefix: String,suffix: String,language: String,
+        baml_options: T::Hash[Symbol, T.any(Baml::TypeBuilder, Baml::ClientRegistry)]
+      ).returns(Baml::BamlStream[String])
+    }
+    def Completion(
+        *varargs,
+        prefix:,suffix:,language:,
+        baml_options: {}
+    )
+      if varargs.any?
+        
+        raise ArgumentError.new("Completion may only be called with keyword arguments")
+      end
+      if (baml_options.keys - [:client_registry, :tb]).any?
+        raise ArgumentError.new("Received unknown keys in baml_options (valid keys: :client_registry, :tb): #{baml_options.keys - [:client_registry, :tb]}")
+      end
+
+      raw = @runtime.stream_function(
+        "Completion",
+        {
+          prefix: prefix,suffix: suffix,language: language,
+        },
+        @ctx_manager,
+        baml_options[:tb]&.instance_variable_get(:@registry),
+        baml_options[:client_registry],
+      )
+      Baml::BamlStream[T.nilable(String), String].new(
+        ffi_stream: raw,
+        ctx_manager: @ctx_manager
+      )
+    end
+
     sig {
       params(
         varargs: T.untyped,
diff --git a/integ-tests/ruby/baml_client/inlined.rb b/integ-tests/ruby/baml_client/inlined.rb
index 8eac818cc..05f152c23 100644
--- a/integ-tests/ruby/baml_client/inlined.rb
+++ b/integ-tests/ruby/baml_client/inlined.rb
@@ -91,6 +91,7 @@ module Inlined
         "test-files/strategies/fallback.baml" => "// Happy path fallbacks.\nclient<llm> FaultyClient {\n  provider openai\n  options {\n    model unknown-model\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n\nclient<llm> FallbackClient {\n  provider fallback\n  options {\n    // first 2 clients are expected to fail.\n    strategy [\n      FaultyClient,\n      RetryClientConstant,\n      GPT35\n      Gemini\n\n    ]\n  }\n}\n\nfunction TestFallbackClient() -> string {\n  client FallbackClient\n  // TODO make it return the client name instead\n  prompt #\"\n    Say a haiku about mexico.\n  \"#\n}\n\n// Fallbacks should fail gracefully.\nclient<llm> FaultyAzureClient {\n  provider azure-openai\n  options {\n    model unknown-model\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> SingleFallbackClient {\n  provider fallback\n  options {\n    // first 2 clients are expected to fail.\n    strategy [\n      FaultyAzureClient\n    ]\n  }\n}\n\nfunction TestSingleFallbackClient() -> string {\n  client SingleFallbackClient\n  // TODO make it return the client name instead\n  prompt #\"\n    Say a haiku about mexico.\n  \"#\n}\n",
         "test-files/strategies/retry.baml" => "\nretry_policy Exponential {\n  max_retries 3\n  strategy {\n    type exponential_backoff\n  }\n}\n\nretry_policy Constant {\n  max_retries 3\n  strategy {\n    type constant_delay\n    delay_ms 100\n  }\n}\n\nclient<llm> RetryClientConstant {\n  provider openai\n  retry_policy Constant\n  options {\n    model \"gpt-3.5-turbo\"\n    api_key \"blah\"\n  }\n}\n\nclient<llm> RetryClientExponential {\n  provider openai\n  retry_policy Exponential\n  options {\n    model \"gpt-3.5-turbo\"\n    api_key \"blahh\"\n  }\n}\n\nfunction TestRetryConstant() -> string {\n  client RetryClientConstant\n  prompt #\"\n    Say a haiku\n  \"#\n}\n\nfunction TestRetryExponential() -> string {\n  client RetryClientExponential\n  prompt #\"\n    Say a haiku\n  \"#\n}\n",
         "test-files/strategies/roundrobin.baml" => "",
+        "test-files/template_string/template_string.baml" => "\nfunction Completion(prefix: string, suffix: string, language: string) -> string {\n  client \"openai/gpt-4o\"\n  prompt ##\"\n    {{ _.role(\"system\", cache_control={\"type\": \"ephemeral\"}) }}\n\n    You are a programmer that suggests code completions in the %INSERT-HERE% part below with  {{ language }} code. Only output the code that replaces %INSERT-HERE% part, NOT THE SUFFIX OR PREFIX. Respond only with code, and with no markdown formatting.\n\n    Try to complete a whole section inside curlies when you can.\n\n    {% if language == \"baml\" %}\n    {{ BAMLBackground2()}}\n\n    Examples:\n    INPUT:\n    ---\n    class MyObject {{\"{\"}}%INSERT-HERE%\n    }\n    ---\n    OUTPUT:\n    ---\n      property string\n    ---\n    In this example, we just inserted one line, with tabs for a fake property to aid the user.\n\n    INPUT:\n    ---\n    function foo(input: string) -> string {{\"{\"}} %INSERT-HERE%\n      prompt #\"\n        {{ \"{{ input }}\" }}\n      \"#\n    }\n    ---\n    OUTPUT:\n    ---\n      client \"openai/gpt-4o\"\n    ---\n    In this example, no need to add the prompt because it was part of the suffix after INSERT-HERE\n\n    INPUT:\n    OUTPUT: N/A\n    In this example there was nothing to complete, so we returned N/A.\n\n    Ignore the \"---\" in your outputs.\n    {% endif %}\n\n\n    {{ _.role(\"user\") }}\n    INPUT:\n    ---\n    {{ prefix }}%INSERT-HERE%{{ suffix }}\n    ---\n  \"##\n}\n\ntest CompletionTest3 {\n  functions [Completion]\n  args {\n    prefix ##\"function foo(input: string) -> string {\n      client \"openai/gpt-4o\"\n      prompt #\"\n    \"##\n    suffix \"\"\n    language \"baml\"\n  }\n}\n\ntest CompletionTest2 {\n  functions [Completion]\n  args {\n    prefix \"function foo(input: string) -> string {\\n\"\n    suffix \"\\n  prompt #\\n\\\"\"\n    language \"baml\"\n  }\n}\n \ntemplate_string Hi(\n  hello: string,\n  world: string,\n) ##\"\n  {{ hello }} {{ world }}\n\"##\n\ntemplate_string Hi3(\n  hello: string,\n  world: string,\n) #\"\n  {{ hello }} {{ world }}\n\"#\n\ntemplate_string BAMLBackground2() ##\"\n  <Overview>\n    BAML is a domain-specific language for building LLM prompts as functions.\n      client \"openai/gpt-4o\"\n      // prompt with jinja syntax inside here. with double curly braces for variables.\n      // make sure to include: {{ \"{{ ctx.output_format }}\"}} in the prompt, which prints the output schema instructions so the LLM returns the output in the correct format (json or string, etc.). DO NOT write the output schema manually.\n      prompt #\"\n        \n      \"#\n    }\n\n      3. You do not need to specify to \"answer in JSON format\". Only write in the prompt brief instruction, and any other task-specific things to keep in mind for the task.\n      4. Write a {{ \"{{ _.role(\\\"user\\\") }}\" }} tag to indicate where the user's inputs start. So if there's a convo you can write\n      #\"{{ \"{{ _.role(\\\"user\\\") }}\" }} {{ \"{{ some-variable }}\" }}#\n    </Prompt>\n  </Functions>\n\n  The @asserts only go in the \"output\" types. Don't use them in inputs.\n  Do NOT use numbers as confidence intervals if you need to use them. Prefer an enum with descriptions or literals like \"high\", \"medium\", \"low\".\n\n  Dedent all declarations.\n\"##\n\ntemplate_string BamlTests() ##\"\n  // For image inputs:\n  test ImageTest {\n    functions [MyFunction]\n    args {\n      imageArg {\n        file \"../images/test.png\"\n        // Optional: media_type \"image/png\"\n      }\n      // Or using URL:\n      // imageArg {\n      //   url \"https://example.com/image.png\"\n      // }\n    }\n  }\n\n  // For array/object inputs:\n  test ComplexTest {\n    functions [MyFunction]\n    args {\n      input {\n        name \"Complex Object\"\n        tags [\n          \"tag1\",\n          #\"\n            Multi-line\n            tag here\n          \"#\n        ]\n        status PENDING\n        type \"error\"\n        count 100\n        enabled false\n        score 7.8\n      }\n    }\n  }\n\"##\n",
         "test-files/testing_pipeline/output-format.baml" => "class Recipe {\n    ingredients map<string, Quantity>\n    recipe_type \"breakfast\" | \"dinner\"\n}\n\nclass Quantity {\n    amount int | float\n    unit string?\n}\n\nfunction AaaSamOutputFormat(recipe: string) -> Recipe {\n  client GPT35\n  prompt #\"\n    Return this value back to me: {{recipe}}\n\n    {{ctx.output_format(map_style='angle')}}\n  \"#\n}\n\ntest MyOutput {\n    functions [AaaSamOutputFormat]\n    args {\n        recipe #\"\n            Here's a simple recipe for beef stew:\nIngredients:\n\n2 lbs beef chuck, cut into 1-inch cubes\n2 tbsp vegetable oil\n1 onion, diced\n3 carrots, sliced\n2 celery stalks, chopped\n2 potatoes, cubed\n3 cloves garlic, minced\n4 cups beef broth\n1 can (14.5 oz) diced tomatoes\n1 tbsp Worcestershire sauce\n1 tsp dried thyme\n1 bay leaf\nSalt and pepper to taste\n\nInstructions:\n\nSeason beef with salt and pepper. Heat oil in a large pot over medium-high heat. Brown the beef in batches, then set aside.\nIn the same pot, sauté onion, carrots, and celery until softened, about 5 minutes.\nAdd garlic and cook for another minute.\nReturn beef to the pot. Add broth, tomatoes, Worcestershire sauce, thyme, and bay leaf.\nBring to a boil, then reduce heat and simmer covered for 1 hour.\nAdd potatoes and continue simmering for 30-45 minutes, until beef and potatoes are tender.\nRemove bay leaf, adjust seasoning if needed, and serve hot.\n\nWould you like any additional information or variations on this recipe?\n        \"#\n    }\n}\n",
         "test-files/testing_pipeline/resume.baml" => "class Resume {\n    name string\n    email string\n    phone string\n    experience Education[]\n    education string[]\n    skills string[]\n}\n\nclass Education {\n    institution string\n    location string\n    degree string\n    major string[]\n    graduation_date string?\n}\n\ntemplate_string AddRole(foo: string) #\"\n    {{ _.role('system')}}\n    You are a {{ foo }}. be nice\n\n    {{ _.role('user') }}\n\"#\n\nclient<llm> TestClient {\n    provider fallback\n    retry_policy Constant\n    options {\n        strategy [\n            Claude\n            GPT35\n            AwsBedrock\n        ]\n    }\n}\n\nclient<llm> Claude2 {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.FOOBAR3\n    max_tokens 1000\n  }\n}\n\nfunction ExtractResume(resume: string, img: image?) -> Resume {\n    client Claude2\n    prompt #\"\n        {{ AddRole(\"Software Engineer\") }}\n\n        Extract data:\n        \n\n        <<<<\n        {{ resume }}\n        <<<<\n\n        {% if img %}\n        {{img}}\n        {% endif %}\n\n        {{ ctx.output_format }}\n    \"#\n}\n\ntest sam_resume {\n    functions [ExtractResume]\n    input {\n        img {\n            url \"https://avatars.githubusercontent.com/u/1016595?v=4\"\n        }\n        resume #\"\n            Sam Lijin\n            he/him |  jobs@sxlijin.com |  sxlijin.github.io | 111-222-3333 | sxlijin |  sxlijin\n\n            Experience\n            Trunk\n            | July 2021 - current\n            Trunk Check | Senior Software Engineer | Services TL, Mar 2023 - current | IC, July 2021 - Feb 2023\n            Proposed, designed, and led a team of 3 to build a web experience for Check (both a web-only onboarding flow and SaaS offerings)\n            Proposed and built vulnerability scanning into Check, enabling it to compete with security products such as Snyk\n            Helped grow Check from <1K users to 90K+ users by focusing on product-led growth\n            Google | Sept 2017 - June 2021\n            User Identity SRE | Senior Software Engineer | IC, Mar 2021 - June 2021\n            Designed an incremental key rotation system to limit the global outage risk to Google SSO\n            Discovered and severed an undocumented Gmail serving dependency on Identity-internal systems\n            Cloud Firestore | Senior Software Engineer | EngProd TL, Aug 2019 - Feb 2021 | IC, Sept 2017 - July 2019\n            Metadata TTL system: backlog of XX trillion records, sustained 1M ops/sec, peaking at 3M ops/sec\n\n            Designed and implemented a logging system with novel observability and privacy requirements\n            Designed and implemented Jepsen-style testing to validate correctness guarantees\n            Datastore Migration: zero downtime, xM RPS and xxPB of data over xM customers and 36 datacenters\n\n            Designed composite index migration, queue processing migration, progressive rollout, fast rollback, and disk stockout mitigations; implemented transaction log replay, state transitions, and dark launch process\n            Designed and implemented end-to-end correctness and performance testing\n            Velocity improvements for 60-eng org\n\n            Proposed and implemented automated rollbacks: got us out of a 3-month release freeze and prevented 5 outages over the next 6 months\n            Proposed and implemented new development and release environments spanning 30+ microservices\n            Incident response for API proxy rollback affecting every Google Cloud service\n\n            Google App Engine Memcache | Software Engineer | EngProd TL, Apr 2019 - July 2019\n            Proposed and led execution of test coverage improvement strategy for a new control plane: reduced rollbacks and ensured strong consistency of a distributed cache serving xxM QPS\n            Designed and implemented automated performance regression testing for two critical serving paths\n            Used to validate Google-wide rollout of AMD CPUs, by proving a 50p latency delta of <10µs\n            Implemented on shared Borg (i.e. vulnerable to noisy neighbors) with <12% variance\n            Miscellaneous | Sept 2017 - June 2021\n            Redesigned the Noogler training on Google-internal storage technologies & trained 2500+ Nooglers\n            Landed multiple google3-wide refactorings, each spanning xxK files (e.g. SWIG to CLIF)\n            Education\n            Vanderbilt University (Nashville, TN) | May 2017 | B.S. in Computer Science, Mathematics, and Political Science\n\n            Stuyvesant HS (New York, NY) | 2013\n\n            Skills\n            C++, Java, Typescript, Javascript, Python, Bash; light experience with Rust, Golang, Scheme\n            gRPC, Bazel, React, Linux\n            Hobbies: climbing, skiing, photography\n        \"#\n    }\n}\n\ntest vaibhav_resume {\n    functions [ExtractResume]\n    input {\n        resume #\"\n            Vaibhav Gupta\n            linkedin/vaigup\n            (972) 400-5279\n            vaibhavtheory@gmail.com\n            EXPERIENCE\n            Google,\n            Software Engineer\n            Dec 2018-Present\n            Seattle, WA\n            •\n            Augmented Reality,\n            Depth Team\n            •\n            Technical Lead for on-device optimizations\n            •\n            Optimized and designed front\n            facing depth algorithm\n            on Pixel 4\n            •\n            Focus: C++ and SIMD on custom silicon\n            \n            \n            EDUCATION\n            University of Texas at Austin\n            Aug 2012-May 2015\n            Bachelors of Engineering, Integrated Circuits\n            Bachelors of Computer Science\n        \"#\n    }\n}",
     }
diff --git a/integ-tests/typescript/baml_client/async_client.ts b/integ-tests/typescript/baml_client/async_client.ts
index ae277c2a6..7c910ffae 100644
--- a/integ-tests/typescript/baml_client/async_client.ts
+++ b/integ-tests/typescript/baml_client/async_client.ts
@@ -368,6 +368,31 @@ export class BamlAsyncClient {
     }
   }
   
+  async Completion(
+      prefix: string,suffix: string,language: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }
+  ): Promise<string> {
+    try {
+      const raw = await this.runtime.callFunction(
+        "Completion",
+        {
+          "prefix": prefix,"suffix": suffix,"language": language
+        },
+        this.ctx_manager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+      )
+      return raw.parsed() as string
+    } catch (error: any) {
+      const bamlError = createBamlValidationError(error);
+      if (bamlError instanceof BamlValidationError) {
+        throw bamlError;
+      } else {
+        throw error;
+      }
+    }
+  }
+  
   async CustomTask(
       input: string,
       __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }
@@ -3403,6 +3428,39 @@ class BamlStreamClient {
     }
   }
   
+  Completion(
+      prefix: string,suffix: string,language: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }
+  ): BamlStream<RecursivePartialNull<string>, string> {
+    try {
+      const raw = this.runtime.streamFunction(
+        "Completion",
+        {
+          "prefix": prefix,"suffix": suffix,"language": language
+        },
+        undefined,
+        this.ctx_manager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+        __baml_options__?.clientRegistry,
+      )
+      return new BamlStream<RecursivePartialNull<string>, string>(
+        raw,
+        (a): a is RecursivePartialNull<string> => a,
+        (a): a is string => a,
+        this.ctx_manager.cloneContext(),
+        __baml_options__?.tb?.__tb(),
+      )
+    } catch (error) {
+      if (error instanceof Error) {
+        const bamlError = createBamlValidationError(error);
+        if (bamlError instanceof BamlValidationError) {
+          throw bamlError;
+        }
+      }
+      throw error;
+    }
+  }
+  
   CustomTask(
       input: string,
       __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }
diff --git a/integ-tests/typescript/baml_client/inlinedbaml.ts b/integ-tests/typescript/baml_client/inlinedbaml.ts
index 439752eaa..57db73d28 100644
--- a/integ-tests/typescript/baml_client/inlinedbaml.ts
+++ b/integ-tests/typescript/baml_client/inlinedbaml.ts
@@ -92,6 +92,7 @@ const fileMap = {
   "test-files/strategies/fallback.baml": "// Happy path fallbacks.\nclient<llm> FaultyClient {\n  provider openai\n  options {\n    model unknown-model\n    api_key env.OPENAI_API_KEY\n  }\n}\n\n\nclient<llm> FallbackClient {\n  provider fallback\n  options {\n    // first 2 clients are expected to fail.\n    strategy [\n      FaultyClient,\n      RetryClientConstant,\n      GPT35\n      Gemini\n\n    ]\n  }\n}\n\nfunction TestFallbackClient() -> string {\n  client FallbackClient\n  // TODO make it return the client name instead\n  prompt #\"\n    Say a haiku about mexico.\n  \"#\n}\n\n// Fallbacks should fail gracefully.\nclient<llm> FaultyAzureClient {\n  provider azure-openai\n  options {\n    model unknown-model\n    api_key env.OPENAI_API_KEY\n  }\n}\n\nclient<llm> SingleFallbackClient {\n  provider fallback\n  options {\n    // first 2 clients are expected to fail.\n    strategy [\n      FaultyAzureClient\n    ]\n  }\n}\n\nfunction TestSingleFallbackClient() -> string {\n  client SingleFallbackClient\n  // TODO make it return the client name instead\n  prompt #\"\n    Say a haiku about mexico.\n  \"#\n}\n",
   "test-files/strategies/retry.baml": "\nretry_policy Exponential {\n  max_retries 3\n  strategy {\n    type exponential_backoff\n  }\n}\n\nretry_policy Constant {\n  max_retries 3\n  strategy {\n    type constant_delay\n    delay_ms 100\n  }\n}\n\nclient<llm> RetryClientConstant {\n  provider openai\n  retry_policy Constant\n  options {\n    model \"gpt-3.5-turbo\"\n    api_key \"blah\"\n  }\n}\n\nclient<llm> RetryClientExponential {\n  provider openai\n  retry_policy Exponential\n  options {\n    model \"gpt-3.5-turbo\"\n    api_key \"blahh\"\n  }\n}\n\nfunction TestRetryConstant() -> string {\n  client RetryClientConstant\n  prompt #\"\n    Say a haiku\n  \"#\n}\n\nfunction TestRetryExponential() -> string {\n  client RetryClientExponential\n  prompt #\"\n    Say a haiku\n  \"#\n}\n",
   "test-files/strategies/roundrobin.baml": "",
+  "test-files/template_string/template_string.baml": "\nfunction Completion(prefix: string, suffix: string, language: string) -> string {\n  client \"openai/gpt-4o\"\n  prompt ##\"\n    {{ _.role(\"system\", cache_control={\"type\": \"ephemeral\"}) }}\n\n    You are a programmer that suggests code completions in the %INSERT-HERE% part below with  {{ language }} code. Only output the code that replaces %INSERT-HERE% part, NOT THE SUFFIX OR PREFIX. Respond only with code, and with no markdown formatting.\n\n    Try to complete a whole section inside curlies when you can.\n\n    {% if language == \"baml\" %}\n    {{ BAMLBackground2()}}\n\n    Examples:\n    INPUT:\n    ---\n    class MyObject {{\"{\"}}%INSERT-HERE%\n    }\n    ---\n    OUTPUT:\n    ---\n      property string\n    ---\n    In this example, we just inserted one line, with tabs for a fake property to aid the user.\n\n    INPUT:\n    ---\n    function foo(input: string) -> string {{\"{\"}} %INSERT-HERE%\n      prompt #\"\n        {{ \"{{ input }}\" }}\n      \"#\n    }\n    ---\n    OUTPUT:\n    ---\n      client \"openai/gpt-4o\"\n    ---\n    In this example, no need to add the prompt because it was part of the suffix after INSERT-HERE\n\n    INPUT:\n    OUTPUT: N/A\n    In this example there was nothing to complete, so we returned N/A.\n\n    Ignore the \"---\" in your outputs.\n    {% endif %}\n\n\n    {{ _.role(\"user\") }}\n    INPUT:\n    ---\n    {{ prefix }}%INSERT-HERE%{{ suffix }}\n    ---\n  \"##\n}\n\ntest CompletionTest3 {\n  functions [Completion]\n  args {\n    prefix ##\"function foo(input: string) -> string {\n      client \"openai/gpt-4o\"\n      prompt #\"\n    \"##\n    suffix \"\"\n    language \"baml\"\n  }\n}\n\ntest CompletionTest2 {\n  functions [Completion]\n  args {\n    prefix \"function foo(input: string) -> string {\\n\"\n    suffix \"\\n  prompt #\\n\\\"\"\n    language \"baml\"\n  }\n}\n \ntemplate_string Hi(\n  hello: string,\n  world: string,\n) ##\"\n  {{ hello }} {{ world }}\n\"##\n\ntemplate_string Hi3(\n  hello: string,\n  world: string,\n) #\"\n  {{ hello }} {{ world }}\n\"#\n\ntemplate_string BAMLBackground2() ##\"\n  <Overview>\n    BAML is a domain-specific language for building LLM prompts as functions.\n      client \"openai/gpt-4o\"\n      // prompt with jinja syntax inside here. with double curly braces for variables.\n      // make sure to include: {{ \"{{ ctx.output_format }}\"}} in the prompt, which prints the output schema instructions so the LLM returns the output in the correct format (json or string, etc.). DO NOT write the output schema manually.\n      prompt #\"\n        \n      \"#\n    }\n\n      3. You do not need to specify to \"answer in JSON format\". Only write in the prompt brief instruction, and any other task-specific things to keep in mind for the task.\n      4. Write a {{ \"{{ _.role(\\\"user\\\") }}\" }} tag to indicate where the user's inputs start. So if there's a convo you can write\n      #\"{{ \"{{ _.role(\\\"user\\\") }}\" }} {{ \"{{ some-variable }}\" }}#\n    </Prompt>\n  </Functions>\n\n  The @asserts only go in the \"output\" types. Don't use them in inputs.\n  Do NOT use numbers as confidence intervals if you need to use them. Prefer an enum with descriptions or literals like \"high\", \"medium\", \"low\".\n\n  Dedent all declarations.\n\"##\n\ntemplate_string BamlTests() ##\"\n  // For image inputs:\n  test ImageTest {\n    functions [MyFunction]\n    args {\n      imageArg {\n        file \"../images/test.png\"\n        // Optional: media_type \"image/png\"\n      }\n      // Or using URL:\n      // imageArg {\n      //   url \"https://example.com/image.png\"\n      // }\n    }\n  }\n\n  // For array/object inputs:\n  test ComplexTest {\n    functions [MyFunction]\n    args {\n      input {\n        name \"Complex Object\"\n        tags [\n          \"tag1\",\n          #\"\n            Multi-line\n            tag here\n          \"#\n        ]\n        status PENDING\n        type \"error\"\n        count 100\n        enabled false\n        score 7.8\n      }\n    }\n  }\n\"##\n",
   "test-files/testing_pipeline/output-format.baml": "class Recipe {\n    ingredients map<string, Quantity>\n    recipe_type \"breakfast\" | \"dinner\"\n}\n\nclass Quantity {\n    amount int | float\n    unit string?\n}\n\nfunction AaaSamOutputFormat(recipe: string) -> Recipe {\n  client GPT35\n  prompt #\"\n    Return this value back to me: {{recipe}}\n\n    {{ctx.output_format(map_style='angle')}}\n  \"#\n}\n\ntest MyOutput {\n    functions [AaaSamOutputFormat]\n    args {\n        recipe #\"\n            Here's a simple recipe for beef stew:\nIngredients:\n\n2 lbs beef chuck, cut into 1-inch cubes\n2 tbsp vegetable oil\n1 onion, diced\n3 carrots, sliced\n2 celery stalks, chopped\n2 potatoes, cubed\n3 cloves garlic, minced\n4 cups beef broth\n1 can (14.5 oz) diced tomatoes\n1 tbsp Worcestershire sauce\n1 tsp dried thyme\n1 bay leaf\nSalt and pepper to taste\n\nInstructions:\n\nSeason beef with salt and pepper. Heat oil in a large pot over medium-high heat. Brown the beef in batches, then set aside.\nIn the same pot, sauté onion, carrots, and celery until softened, about 5 minutes.\nAdd garlic and cook for another minute.\nReturn beef to the pot. Add broth, tomatoes, Worcestershire sauce, thyme, and bay leaf.\nBring to a boil, then reduce heat and simmer covered for 1 hour.\nAdd potatoes and continue simmering for 30-45 minutes, until beef and potatoes are tender.\nRemove bay leaf, adjust seasoning if needed, and serve hot.\n\nWould you like any additional information or variations on this recipe?\n        \"#\n    }\n}\n",
   "test-files/testing_pipeline/resume.baml": "class Resume {\n    name string\n    email string\n    phone string\n    experience Education[]\n    education string[]\n    skills string[]\n}\n\nclass Education {\n    institution string\n    location string\n    degree string\n    major string[]\n    graduation_date string?\n}\n\ntemplate_string AddRole(foo: string) #\"\n    {{ _.role('system')}}\n    You are a {{ foo }}. be nice\n\n    {{ _.role('user') }}\n\"#\n\nclient<llm> TestClient {\n    provider fallback\n    retry_policy Constant\n    options {\n        strategy [\n            Claude\n            GPT35\n            AwsBedrock\n        ]\n    }\n}\n\nclient<llm> Claude2 {\n  provider anthropic\n  options {\n    model claude-3-haiku-20240307\n    api_key env.FOOBAR3\n    max_tokens 1000\n  }\n}\n\nfunction ExtractResume(resume: string, img: image?) -> Resume {\n    client Claude2\n    prompt #\"\n        {{ AddRole(\"Software Engineer\") }}\n\n        Extract data:\n        \n\n        <<<<\n        {{ resume }}\n        <<<<\n\n        {% if img %}\n        {{img}}\n        {% endif %}\n\n        {{ ctx.output_format }}\n    \"#\n}\n\ntest sam_resume {\n    functions [ExtractResume]\n    input {\n        img {\n            url \"https://avatars.githubusercontent.com/u/1016595?v=4\"\n        }\n        resume #\"\n            Sam Lijin\n            he/him |  jobs@sxlijin.com |  sxlijin.github.io | 111-222-3333 | sxlijin |  sxlijin\n\n            Experience\n            Trunk\n            | July 2021 - current\n            Trunk Check | Senior Software Engineer | Services TL, Mar 2023 - current | IC, July 2021 - Feb 2023\n            Proposed, designed, and led a team of 3 to build a web experience for Check (both a web-only onboarding flow and SaaS offerings)\n            Proposed and built vulnerability scanning into Check, enabling it to compete with security products such as Snyk\n            Helped grow Check from <1K users to 90K+ users by focusing on product-led growth\n            Google | Sept 2017 - June 2021\n            User Identity SRE | Senior Software Engineer | IC, Mar 2021 - June 2021\n            Designed an incremental key rotation system to limit the global outage risk to Google SSO\n            Discovered and severed an undocumented Gmail serving dependency on Identity-internal systems\n            Cloud Firestore | Senior Software Engineer | EngProd TL, Aug 2019 - Feb 2021 | IC, Sept 2017 - July 2019\n            Metadata TTL system: backlog of XX trillion records, sustained 1M ops/sec, peaking at 3M ops/sec\n\n            Designed and implemented a logging system with novel observability and privacy requirements\n            Designed and implemented Jepsen-style testing to validate correctness guarantees\n            Datastore Migration: zero downtime, xM RPS and xxPB of data over xM customers and 36 datacenters\n\n            Designed composite index migration, queue processing migration, progressive rollout, fast rollback, and disk stockout mitigations; implemented transaction log replay, state transitions, and dark launch process\n            Designed and implemented end-to-end correctness and performance testing\n            Velocity improvements for 60-eng org\n\n            Proposed and implemented automated rollbacks: got us out of a 3-month release freeze and prevented 5 outages over the next 6 months\n            Proposed and implemented new development and release environments spanning 30+ microservices\n            Incident response for API proxy rollback affecting every Google Cloud service\n\n            Google App Engine Memcache | Software Engineer | EngProd TL, Apr 2019 - July 2019\n            Proposed and led execution of test coverage improvement strategy for a new control plane: reduced rollbacks and ensured strong consistency of a distributed cache serving xxM QPS\n            Designed and implemented automated performance regression testing for two critical serving paths\n            Used to validate Google-wide rollout of AMD CPUs, by proving a 50p latency delta of <10µs\n            Implemented on shared Borg (i.e. vulnerable to noisy neighbors) with <12% variance\n            Miscellaneous | Sept 2017 - June 2021\n            Redesigned the Noogler training on Google-internal storage technologies & trained 2500+ Nooglers\n            Landed multiple google3-wide refactorings, each spanning xxK files (e.g. SWIG to CLIF)\n            Education\n            Vanderbilt University (Nashville, TN) | May 2017 | B.S. in Computer Science, Mathematics, and Political Science\n\n            Stuyvesant HS (New York, NY) | 2013\n\n            Skills\n            C++, Java, Typescript, Javascript, Python, Bash; light experience with Rust, Golang, Scheme\n            gRPC, Bazel, React, Linux\n            Hobbies: climbing, skiing, photography\n        \"#\n    }\n}\n\ntest vaibhav_resume {\n    functions [ExtractResume]\n    input {\n        resume #\"\n            Vaibhav Gupta\n            linkedin/vaigup\n            (972) 400-5279\n            vaibhavtheory@gmail.com\n            EXPERIENCE\n            Google,\n            Software Engineer\n            Dec 2018-Present\n            Seattle, WA\n            •\n            Augmented Reality,\n            Depth Team\n            •\n            Technical Lead for on-device optimizations\n            •\n            Optimized and designed front\n            facing depth algorithm\n            on Pixel 4\n            •\n            Focus: C++ and SIMD on custom silicon\n            \n            \n            EDUCATION\n            University of Texas at Austin\n            Aug 2012-May 2015\n            Bachelors of Engineering, Integrated Circuits\n            Bachelors of Computer Science\n        \"#\n    }\n}",
 }
diff --git a/integ-tests/typescript/baml_client/sync_client.ts b/integ-tests/typescript/baml_client/sync_client.ts
index 2cc0e6f65..f57137669 100644
--- a/integ-tests/typescript/baml_client/sync_client.ts
+++ b/integ-tests/typescript/baml_client/sync_client.ts
@@ -368,6 +368,31 @@ export class BamlSyncClient {
     }
   }
   
+  Completion(
+      prefix: string,suffix: string,language: string,
+      __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }
+  ): string {
+    try {
+    const raw = this.runtime.callFunctionSync(
+      "Completion",
+      {
+        "prefix": prefix,"suffix": suffix,"language": language
+      },
+      this.ctx_manager.cloneContext(),
+      __baml_options__?.tb?.__tb(),
+      __baml_options__?.clientRegistry,
+    )
+    return raw.parsed() as string
+    } catch (error: any) {
+      const bamlError = createBamlValidationError(error);
+      if (bamlError instanceof BamlValidationError) {
+        throw bamlError;
+      } else {
+        throw error;
+      }
+    }
+  }
+  
   CustomTask(
       input: string,
       __baml_options__?: { tb?: TypeBuilder, clientRegistry?: ClientRegistry }
diff --git a/typescript/vscode-ext/packages/syntaxes/baml.tmLanguage.json b/typescript/vscode-ext/packages/syntaxes/baml.tmLanguage.json
index bf940a0b6..c3a32f6be 100644
--- a/typescript/vscode-ext/packages/syntaxes/baml.tmLanguage.json
+++ b/typescript/vscode-ext/packages/syntaxes/baml.tmLanguage.json
@@ -83,26 +83,27 @@
         "1": { "name": "storage.type.declaration.function" },
         "2": { "name": "entity.name.function" }
       },
-      "end": "(\")(#{1,3})",
+
+      "end": "^(\"#{1,3})",
       "endCaptures": {
-        "0": { "name": "string.quoted.block.baml" }
+        "1": { "name": "string.quoted.block.baml.end" }
       },
       "patterns": [
         { "include": "#comment" },
         { "include": "#function_parameters" },
-        {
-          "begin": "(#{1,3}\")",
-          "beginCaptures": {
-            "0": { "name": "string.quoted.block.baml" }
-          },
-          "end": "(?=\"#{1,3})",
-          "contentName": "string.quoted.block.baml",
-          "patterns": [
-            { "include": "source.baml-jinja" }
-          ]
-        }
+        { "include": "#template_string_body" }
       ]
     },
+    "template_string_body": {
+      "begin": "\\s+(#{1,3})(\")",
+      "beginCaptures": {
+        "1": { "name": "string.quoted.block.baml.body.start" },
+        "2": { "name": "string.quoted.block.baml.body.start" }
+      },
+      "end": "(?=\"\\1)",
+      "contentName": "string.quoted.block.baml.body",
+      "patterns": [{ "include": "source.baml-jinja" }]
+    },
     "function_declaration": {
       "comment": "Function declaration",
       "begin": "(function)\\s+(\\w+)",
@@ -118,10 +119,12 @@
         { "include": "#function_body" }
       ]
     },
+
     "function_parameters": {
       "begin": "\\(",
       "end": "\\)",
-      "patterns": [{ "include": "#comment" }, { "include": "#function_name_type" }]
+      "patterns": [{ "include": "#comment" }, { "include": "#function_name_type" }],
+      "contentName": "function.params"
     },
     "function_name_type": {
       "patterns": [
@@ -285,12 +288,12 @@
           "name": "meta.client.declaration"
         },
         {
-          "begin": "\\s+(prompt)\\s+(#\"){1,3}",
+          "begin": "\\s+(prompt)\\s+(#{1,3}\")",
           "beginCaptures": {
             "1": { "name": "variable.other.readwrite.prompt" },
             "2": { "name": "string.quoted.block.baml.prompt" }
           },
-          "end": "\\s*(\"#)",
+          "end": "\\s*(\"#{1,3})",
           "contentName": "string.quoted.block.baml.prompt",
           "endCaptures": {
             "1": { "name": "string.quoted.block.baml.prompt" }
@@ -381,11 +384,11 @@
           ]
         },
         {
-          "begin": "(@{1,2}\\w+)\\(#\"",
+          "begin": "(@{1,2}\\w+)\\(#{1,3}\"",
           "beginCaptures": {
             "1": { "name": "entity.name.function.attribute" }
           },
-          "end": "\"#\\)",
+          "end": "\"#{1,3}\\)",
           "name": "string.quoted.block.baml",
           "patterns": [
             { "include": "#comment" },
@@ -458,11 +461,11 @@
           ]
         },
         {
-          "begin": "(@{1,2}\\w+)\\(#",
+          "begin": "(@{1,2}\\w+)\\(#{1,3}",
           "beginCaptures": {
             "1": { "name": "entity.name.function.attribute" }
           },
-          "end": "#\\)",
+          "end": "#{1,3}\\)",
           "name": "string.quoted.block.baml",
           "patterns": [
             {
@@ -589,12 +592,12 @@
       ]
     },
     "block_string_pair": {
-      "begin": "(\\w+)?\\s+(#(\"){1,3})",
+      "begin": "(\\w+)?\\s+(#{1,3}(\"){1,3})",
       "beginCaptures": {
         "1": { "name": "variable.other.readwrite.block_string_pair" },
         "2": { "name": "string.quoted.block.baml.startquote" }
       },
-      "end": "((\"){1,3}#)",
+      "end": "((\"){1,3}#{1,3})",
       "endCaptures": {
         "1": { "name": "string.quoted.block.baml.endquote" }
       },