Skip to content

Commit

Permalink
fix(nl-to-mquery): incorrect extract_json result
Browse files Browse the repository at this point in the history
  • Loading branch information
XuHaoJun committed Oct 1, 2024
1 parent c198dff commit 2253428
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 6 deletions.
3 changes: 2 additions & 1 deletion backend/.dockerignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.venv
__pycache__
.env
.env
tests
2 changes: 2 additions & 0 deletions backend/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ Tech use Azure OpenAI.
3. copy `.env.example` to `.env`, and config your `AZURE_OPENAI_*` settings.
4. `fastapi dev backend/main.py`

run unit test: `python -m unittest discover -s tests -p 'test_*.py'`

## Reference

1. [Poetry Dockerfile](https://github.com/gianfa/poetry/blob/docs/docker-best-practices/docker-examples/poetry-multistage/Dockerfile)
2 changes: 1 addition & 1 deletion backend/backend/nl_to_mquery/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .myopenai import nl_to_mquery, get_num_tokens
from .myopenai import nl_to_mquery, get_num_tokens, extract_json
4 changes: 2 additions & 2 deletions backend/backend/nl_to_mquery/extract_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ def extract_json(response) -> dict | None:
except json.JSONDecodeError:
pass

# Look for a JSON object directly in the text
json_object_match = re.search(r'{[^{}]+}', response)
# Look for a JSON object that is not enclosed in triple backticks
json_object_match = re.search(r'\{.*\}', response, re.DOTALL)
if json_object_match:
json_text = json_object_match.group(0)
try:
Expand Down
4 changes: 2 additions & 2 deletions backend/backend/nl_to_mquery/myopenai.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from .extract_json import extract_json

client = AzureOpenAI(
azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"),
api_key=os.getenv("AZURE_OPENAI_API_KEY"),
azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") or "https://openai.openai.azure.com/",
api_key=os.getenv("AZURE_OPENAI_API_KEY") or "<AZURE_OPENAI_API_KEY>",
api_version="2024-02-01"
)

Expand Down
Empty file added backend/tests/__init__.py
Empty file.
48 changes: 48 additions & 0 deletions backend/tests/test_extract_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import unittest
import json
import re

from backend.nl_to_mquery import extract_json

class TestExtractJson(unittest.TestCase):

def test_json_block_extraction(self):
response = '```json\n{"key": "value"}\n```'
expected_result = {"key": "value"}
self.assertEqual(extract_json(response), expected_result)

def test_json_object_extraction(self):
response = 'This is some text with a JSON object in it: { "key": "value" }'
expected_result = {"key": "value"}
self.assertEqual(extract_json(response), expected_result)

def test_no_json_found(self):
response = 'This is some text with no JSON in it.'
self.assertIsNone(extract_json(response))

def test_invalid_json(self):
response = '```json\n{"key": "value" invalid json}\n```'
self.assertIsNone(extract_json(response))

def test_empty_response(self):
response = ''
self.assertIsNone(extract_json(response))

def test_response_with_only_triple_backticks(self):
response = '```json```'
self.assertIsNone(extract_json(response))

def test_response_with_only_curly_brackets(self):
response = '{}'
expected_result = {}
result = extract_json(response)
self.assertEqual(result, expected_result)

def test_extract_json_with_openai_response_1(self):
response = '{"$and": [{"key": "value"}, {"key2": "value2"}]}'
expected_result = {"$and": [{"key": "value"}, {"key2": "value2"}]}
result = extract_json(response)
self.assertEqual(result, expected_result)

if __name__ == '__main__':
unittest.main()

0 comments on commit 2253428

Please sign in to comment.