Skip to content

Commit

Permalink
bug fix:
Browse files Browse the repository at this point in the history
1. url encoding for sql password
2. text-to-speech: stream instead of creating a temp file
3. check for undefined category id
4. fix bicep configuration for bing search
5. fix function for the indexer
  • Loading branch information
kimtth committed Dec 20, 2024
1 parent cf50df3 commit 1b4a742
Show file tree
Hide file tree
Showing 10 changed files with 285 additions and 181 deletions.
29 changes: 27 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ https://github.com/kimtth/visual-genius/assets/13846660/7a39a3ba-32e7-4742-aea6-
1. Set parameters under `infra\parameter.json`
2. Execute `deploy.ps1` to upload the dataset, deploy Azure resources, initialize the database, and set up the search index.
3. Create a DALL·E model on the Azure Portal and set the deployment model name in `Azure > WebApp > Environment variables > 'AZURE_OPENAI_IMG_MODEL_DEPLOYMENT_NAME'`. When attempting to deploy the model using Bicep, it was not possible to deploy at that time.
4. Deploy the application code to Azure App Service: It is recommended to use the `Azure Extension` in VS Code to deploy the code to Azure App Service. You can follow the [Quickstart: Deploy a Python app](https://learn.microsoft.com/en-us/azure/app-service/quickstart-python), or use `az webapp deployment source config-zip` to deploy if you have SCM Basic Auth credentials available.
4. Open the `backend` directory. Deploy the application code to Azure App Service: It is recommended to use the `Azure Extension` in VS Code to deploy the code to Azure App Service. You can follow the [Quickstart: Deploy a Python app](https://learn.microsoft.com/en-us/azure/app-service/quickstart-python), or use `az webapp deployment source config-zip` to deploy if you have SCM Basic Auth credentials available.

- The Deployment step using Azure CLI is commented out in `deploy.ps1`.

Expand All @@ -53,7 +53,32 @@ https://github.com/kimtth/visual-genius/assets/13846660/7a39a3ba-32e7-4742-aea6-
#>
```

- Note: Please ensure you have installed <code><a href="https://nodejs.org/en/download/">nodejs</a></code>, <code><a href="https://classic.yarnpkg.com/en/docs/install">yarn</a></code>, <code><a href="https://learn.microsoft.com/en-us/cli/azure/install-azure-cli">Azure CLI</a></code>, and <code><a href="https://www.python.org/downloads/">python3</a></code>.
- Note: Please ensure you have installed <code><a href="https://nodejs.org/en/download/">nodejs</a></code>, <code><a href="https://classic.yarnpkg.com/en/docs/install">yarn</a></code>, <code><a href="https://learn.microsoft.com/en-us/cli/azure/install-azure-cli">Azure CLI</a></code>, <code><a href="https://github.com/Azure/azure-functions-core-tools">Azure Functions Core Tools</a></code>, <code><a href="https://www.postgresql.org/download/">psql</a></code>, and <code><a href="https://www.python.org/downloads/">python3</a></code>.

```powershell
# Install Chocolatey
Set-ExecutionPolicy Bypass -Scope Process -Force; `
[System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; `
iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))
# Install Node.js
choco install nodejs -y
# Install Yarn
choco install yarn -y
# Install Azure CLI
choco install azure-cli -y
# Install Azure Functions Core Tools
choco install azure-functions-core-tools -y
# Install PostgreSQL
choco install postgresql -y
# Install Python 3.11
choco install python --version=3.11.0 -y
```

#### To dev:

Expand Down
13 changes: 11 additions & 2 deletions backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,12 @@
postgre_db = os.getenv("POSTGRE_DATABASE")
postgre_pwd = os.getenv("POSTGRE_PASSWORD")

# url encode for postgre_pwd
import urllib.parse
encoded_postgre_pwd = urllib.parse.quote(postgre_pwd)

engine = create_engine(
f"postgresql://{postgre_user}:{postgre_pwd}@{postgre_host}:{postgre_port}/{postgre_db}"
f"postgresql://{postgre_user}:{encoded_postgre_pwd}@{postgre_host}:{postgre_port}/{postgre_db}"
)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)

Expand Down Expand Up @@ -813,7 +817,12 @@ def get_image(

image_list = []
for item in items:
item.imgPath = f"{item.imgPath}?{sas_token}"
pattern = r"\?se=[^&]+&sig=[^&]+"
dalle_url_pattern = re.search(pattern, item.imgPath)
if dalle_url_pattern:
item.imgPath = item.imgPath
else:
item.imgPath = f"{item.imgPath}?{sas_token}"
image_dict = ImageDB.model_validate(item)
image_list.append(image_dict)

Expand Down
172 changes: 96 additions & 76 deletions backend/func/acs_skillset_for_indexer/GetImageEmbeddings/__init__.py
Original file line number Diff line number Diff line change
@@ -1,79 +1,99 @@
import os
import json
import logging
import requests

import azure.functions as func


def main(req: func.HttpRequest) -> func.HttpResponse:
logging.info('Python HTTP trigger function processed a request.')

# Extract values from request payload
req_body = req.get_body().decode('utf-8')
logging.info(f"Request body: {req_body}")

if req_body:
import os
import json
import logging
import requests
import azure.functions as func

# Sample of input and output data
# https://learn.microsoft.com/en-us/azure/search/cognitive-search-custom-skill-web-api#sample-input-json-structure
# https://learn.microsoft.com/en-us/azure/search/cognitive-search-custom-skill-web-api#sample-output-json-structure


def main(req: func.HttpRequest) -> func.HttpResponse:
logging.info("Python HTTP trigger function processed a request.")

try:
req_body = req.get_body().decode("utf-8")
logging.info(f"Request body: {req_body}")

request = json.loads(req_body)
values = request['values']
values = request.get("values", [])

# Process values and generate the response payload
response_values = []
if not values:
logging.info("No values provided in the request.")
return func.HttpResponse(
json.dumps({"values": []}), mimetype="application/json", status_code=200
)

response_results = []
for value in values:
imageUrl = value['data']['imgPath']
recordId = value['recordId']
logging.info(f"Input imageUrl: {imageUrl}")
logging.info(f"Input recordId: {recordId}")

# Get image embeddings
vector = get_image_embeddings(imageUrl)

# Add the processed value to the response payload
response_values.append({
"recordId": recordId,
"data": {
"vector": vector
},
"errors": None,
"warnings": None
})

# Create the response object
response_body = {
"values": response_values
}
logging.info(f"Response body: {response_body}")

# Return the response
return func.HttpResponse(json.dumps(response_body), mimetype="application/json")
else:
logging.info("req_body is empty")


def get_image_embeddings(imageUrl):
cogSvcsEndpoint = os.environ["COGNITIVE_SERVICES_ENDPOINT"]
cogSvcsApiKey = os.environ["COGNITIVE_SERVICES_API_KEY"]

url = f"{cogSvcsEndpoint}/computervision/retrieval:vectorizeImage"

params = {
"api-version": "2023-02-01-preview"
}

headers = {
"Content-Type": "application/json",
"Ocp-Apim-Subscription-Key": cogSvcsApiKey
}

data = {
"url": imageUrl
}

response = requests.post(url, params=params, headers=headers, json=data)

if response.status_code != 200:
logging.error(f"Error: {response.status_code}, {response.text}")
response.raise_for_status()

embeddings = response.json()["vector"]
return embeddings
record_id = value.get("recordId", "Unknown")
logging.info(f"Processing recordId: {record_id}")

img_path = value.get("data", {}).get("imgPath")
if not img_path:
logging.error("imgPath is missing.")
response = create_error_response(record_id, "Missing key: imgPath")
response_results.append(response)
continue

vector = get_image_embeddings(img_path)
if vector:
response = create_success_response(record_id, vector)
else:
response = create_error_response(
record_id, "Failed to retrieve image embeddings."
)
response_results.append(response)

logging.info(f"Response body: {response_results}")
return func.HttpResponse(
json.dumps({"values": response_results}),
mimetype="application/json",
status_code=200,
)
except Exception as e:
logging.error(f"Unexpected error: {e}")
return func.HttpResponse(f"Internal Server Error: {e}", status_code=500)


def get_image_embeddings(img_path):
cog_svcs_endpoint = os.getenv("COGNITIVE_SERVICES_ENDPOINT")
cog_svcs_api_key = os.getenv("COGNITIVE_SERVICES_API_KEY")
cog_svcs_api_version = os.getenv("COGNITIVE_SERVICES_API_VERSION", "2024-02-01")

url = f"{cog_svcs_endpoint}/computervision/retrieval:vectorizeImage"
params = {"api-version": cog_svcs_api_version}
headers = {
"Content-Type": "application/json",
"Ocp-Apim-Subscription-Key": cog_svcs_api_key,
}
data = {"url": img_path}

try:
response = requests.post(url, params=params, headers=headers, json=data)
if response.status_code != 200:
logging.error(f"Error: {response.status_code}, {response.text}")
return None
return response.json().get("vector", [])
except Exception as e:
logging.error(f"Error getting image embeddings: {e}")
return None


def create_success_response(record_id, vector):
return {
"recordId": record_id,
"data": {"vector": vector},
"errors": [],
"warnings": None,
}


def create_error_response(record_id, message):
return {
"recordId": record_id,
"data": None,
"errors": [{"message": message}],
"warnings": None,
}
1 change: 0 additions & 1 deletion backend/module/aoai_call.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
api_key=os.getenv("AZURE_OPENAI_API_KEY")
)


async def img_gen(query):
try:
response = aoai_client.images.generate(
Expand Down
92 changes: 61 additions & 31 deletions backend/module/text_to_speech.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,83 @@
import os
import uuid
from azure.cognitiveservices.speech import CancellationReason, SpeechSynthesisCancellationDetails, ResultReason, SpeechConfig, SpeechSynthesizer, AudioDataStream, SpeechSynthesisOutputFormat
from azure.cognitiveservices.speech.audio import AudioOutputConfig
from azure.cognitiveservices.speech import (
CancellationReason,
SpeechSynthesisCancellationDetails,
ResultReason,
SpeechConfig,
SpeechSynthesizer,
AudioDataStream,
SpeechSynthesisOutputFormat,
)
from azure.cognitiveservices.speech.audio import AudioOutputConfig, PullAudioOutputStream

async def synthesize_speech(text: str, speech_subscription_key: str, speech_region: str):

async def synthesize_speech(
text: str, speech_subscription_key: str, speech_region: str
):
try:
# Validate input parameters
if not text:
raise ValueError("Text for speech synthesis cannot be empty.")
if not speech_subscription_key or not speech_region:
raise ValueError("Speech subscription key and region must be provided.")

# Configure speech synthesis
speech_config = SpeechConfig(
subscription=speech_subscription_key, region=speech_region)
subscription=speech_subscription_key, region=speech_region
)
speech_config.speech_synthesis_language = "en-US"
speech_config.speech_synthesis_voice_name = "en-US-JennyMultilingualNeural"
# https://learn.microsoft.com/en-us/answers/questions/1184428/azure-text-to-speech-error-code-0x38-(spxerr-audio
# the remote app service the default audio config needs to be set to an audio file
# instead of default as in local machine it cannot default to a speaker in this case.
file_name = str(uuid.uuid4()) + ".mp3"
file_config = AudioOutputConfig(filename=file_name)


# Set up pull stream and audio output
pull_stream = PullAudioOutputStream()
audio_config = AudioOutputConfig(stream=pull_stream)

# Specify the output format
speech_config.set_speech_synthesis_output_format(
SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3)
synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=file_config)
SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3
)

# Initialize the synthesizer
synthesizer = SpeechSynthesizer(
speech_config=speech_config, audio_config=audio_config
)

# Start speech synthesis asynchronously
result = synthesizer.speak_text_async(text).get()

# Handle synthesis result
if result.reason == ResultReason.SynthesizingAudioCompleted:
print("Speech synthesized to speaker for text [{}]".format(text))
stream = AudioDataStream(result)
print(f"Speech synthesized successfully for text: {text}")
audio_data_stream = AudioDataStream(result)
audio_buffer = bytes(16000)
audio_data = bytearray()

while True:
num_bytes_read = stream.read_data(audio_buffer)
num_bytes_read = audio_data_stream.read_data(audio_buffer)
if num_bytes_read == 0:
break
audio_data.extend(audio_buffer[:num_bytes_read])

return bytes(audio_data)
elif result.reason == ResultReason.Canceled:
cancellation_details = SpeechSynthesisCancellationDetails.from_result(
result)
print("Speech synthesis canceled: {}".format(
cancellation_details.reason))
if cancellation_details.reason == CancellationReason.Error:
if cancellation_details.error_details:
print("Error details: {}".format(
cancellation_details.error_details))
print("Did you update the subscription info?")
return {"message": "Speech synthesis canceled", "error": cancellation_details.reason}
result
)
print(f"Speech synthesis canceled: {cancellation_details.reason}")
if (
cancellation_details.reason == CancellationReason.Error
and cancellation_details.error_details
):
print(f"Error details: {cancellation_details.error_details}")
print("Ensure that the subscription info is correct.")
return {
"message": "Speech synthesis canceled",
"error": cancellation_details.reason,
}

except Exception as e:
print("Error: {}".format(e))
return {"message": "Error", "error": e}
print(f"Error during speech synthesis: {e}")
return {"message": "Error", "error": str(e)}

finally:
# After processing (or if an error occurs), delete the file
os.remove(file_name)
# Ensure resources are cleaned up properly
if "synthesizer" in locals():
del synthesizer
1 change: 1 addition & 0 deletions backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ passlib==1.7.4
bcrypt==4.0.1
SQLAlchemy==2.0.27
azure-identity~=1.17.1
azure-functions~=1.21.3
Loading

0 comments on commit 1b4a742

Please sign in to comment.