From cac7360dc696427b5f7691e05f1f11f4396abef3 Mon Sep 17 00:00:00 2001 From: Ning Date: Tue, 7 Nov 2023 14:46:32 +0800 Subject: [PATCH 01/37] feat: add txt loader --- src/etl-stack.ts | 4 +- src/scripts/dep/llm_bot_dep/loaders/auto.py | 5 +- src/scripts/dep/llm_bot_dep/loaders/text.py | 56 ++++++++++++++++++--- src/scripts/dep/setup.py | 1 + src/scripts/glue-job-script.py | 16 +++--- 5 files changed, 63 insertions(+), 19 deletions(-) diff --git a/src/etl-stack.ts b/src/etl-stack.ts index 573dc062..3dfe4232 100644 --- a/src/etl-stack.ts +++ b/src/etl-stack.ts @@ -97,7 +97,7 @@ export class EtlStack extends NestedStack { '--REGION': props._region, '--EMBEDDING_MODEL_ENDPOINT': props._embeddingEndpoint, '--DOC_INDEX_TABLE': 'chatbot-index', - '--additional-python-modules': 'langchain==0.0.312,beautifulsoup4==4.12.2,requests-aws4auth==1.2.3,boto3==1.28.69,openai==0.28.1,nougat-ocr==0.1.17,pyOpenSSL==23.3.0,tenacity==8.2.3', + '--additional-python-modules': 'langchain==0.0.312,beautifulsoup4==4.12.2,requests-aws4auth==1.2.3,boto3==1.28.69,openai==0.28.1,nougat-ocr==0.1.17,pyOpenSSL==23.3.0,tenacity==8.2.3,markdownify==0.11.6', // add multiple extra python files '--extra-py-files': extraPythonFilesList } @@ -130,7 +130,7 @@ export class EtlStack extends NestedStack { }), }); - // multiplex the same glue job to offline and online + // Multiplex the same glue job to offline and online const onlineGlueJob = new tasks.GlueStartJobRun(this, 'OnlineGlueJob', { glueJobName: glueJob.jobName, integrationPattern: sfn.IntegrationPattern.RUN_JOB, diff --git a/src/scripts/dep/llm_bot_dep/loaders/auto.py b/src/scripts/dep/llm_bot_dep/loaders/auto.py index 60eca5e9..37e56b57 100644 --- a/src/scripts/dep/llm_bot_dep/loaders/auto.py +++ b/src/scripts/dep/llm_bot_dep/loaders/auto.py @@ -8,8 +8,8 @@ def cb_process_object(s3, file_type: str, file_content, **kwargs): res = None - if file_type == 'text': - process_text(s3, file_content, **kwargs) + if file_type == 'txt': + res = process_text(file_content, **kwargs) elif file_type == 'csv': res = process_csv(s3, file_content, **kwargs) elif file_type == 'html': @@ -19,4 +19,5 @@ def cb_process_object(s3, file_type: str, file_content, **kwargs): res = process_pdf(s3, file_content, **kwargs) elif file_type == 'image': process_image(s3, file_content, **kwargs) + return res \ No newline at end of file diff --git a/src/scripts/dep/llm_bot_dep/loaders/text.py b/src/scripts/dep/llm_bot_dep/loaders/text.py index 04c00f76..858d6576 100644 --- a/src/scripts/dep/llm_bot_dep/loaders/text.py +++ b/src/scripts/dep/llm_bot_dep/loaders/text.py @@ -1,12 +1,52 @@ +import logging import re +from typing import List, Optional +from langchain.docstore.document import Document +from langchain.document_loaders.text import TextLoader -def pre_process_text(text: str): - # Remove special characters, punctuation, line breaks and multiple spaces with a single space, - str_doc = re.sub(r'[^\w\s]', '', text) - str_doc = re.sub(r'\s+', ' ', str_doc) - str_doc = re.sub(r'\n', ' ', str_doc) - return str_doc.strip() +logger = logging.getLogger(__name__) -def process_text(text: str): - text = pre_process_text(text) \ No newline at end of file +class CustomTextLoader(TextLoader): + """Load text file. + + Args: + file_content: Text file content. + + encoding: File encoding to use. If `None`, the file will be loaded + with the default system encoding. + + autodetect_encoding: Whether to try to autodetect the file encoding + if the specified encoding fails. + """ + + def __init__( + self, + file_path: str, + encoding: Optional[str] = None, + autodetect_encoding: bool = False, + ): + """Initialize with file path.""" + self.file_path = file_path + self.encoding = encoding + self.autodetect_encoding = autodetect_encoding + + def load(self, text_content: str) -> List[Document]: + """Load from file path.""" + metadata = {"source": self.file_path} + return [Document(page_content=text_content, metadata=metadata)] + + +def pre_process_text(text_content: str) -> str: + # Clean up text content + text_content = re.sub(r'\s+', ' ', text_content) + text_content = re.sub(r'\n+', '\n', text_content) + return text_content.strip() + + +def process_text(file_content: str, **kwargs): + clean_text = pre_process_text(file_content) + loader = CustomTextLoader(file_path=kwargs['bucket'] + "/" + kwargs['key']) + data = loader.load(clean_text) + + return data diff --git a/src/scripts/dep/setup.py b/src/scripts/dep/setup.py index 3f1c756d..b1e7d1e3 100644 --- a/src/scripts/dep/setup.py +++ b/src/scripts/dep/setup.py @@ -13,5 +13,6 @@ 'unstructured', 'boto3', 'nougat-ocr', + 'markdownify' ], ) \ No newline at end of file diff --git a/src/scripts/glue-job-script.py b/src/scripts/glue-job-script.py index 122eb846..1fc238a2 100644 --- a/src/scripts/glue-job-script.py +++ b/src/scripts/glue-job-script.py @@ -62,7 +62,7 @@ def iterate_s3_files(bucket: str, prefix: str) -> Generator: kwargs = {'bucket': bucket, 'key': key} if file_type in ['txt']: - yield 'text', file_content.decode('utf-8'), kwargs + yield 'txt', file_content.decode('utf-8'), kwargs elif file_type in ['csv']: # Update row count here, the default row count is 1 kwargs['csv_row_count'] = 1 @@ -73,6 +73,8 @@ def iterate_s3_files(bucket: str, prefix: str) -> Generator: yield 'pdf', file_content, kwargs elif file_type in ['jpg', 'png']: yield 'image', file_content, kwargs + elif file_type in ['docx', 'doc']: + yield 'doc', file_content.decode('utf-8'), kwargs else: logger.info(f"Unknown file type: {file_type}") @@ -140,10 +142,10 @@ def _aos_injection(document: Document) -> Document: # logger.info("Adding documents %s to OpenSearch with index %s", document, index_name) _aos_injection(document) -# main function to be called by Glue job script +# Main function to be called by Glue job script def main(): logger.info("Starting Glue job with passing arguments: %s", args) - # check if offline mode + # Check if offline mode if offline == 'true': logger.info("Running in offline mode with consideration for large file size...") for file_type, file_content, kwargs in iterate_s3_files(s3_bucket, s3_prefix): @@ -155,7 +157,7 @@ def main(): if file_type == 'csv': # CSV page document has been splited into chunk, no more spliting is needed aos_injection(res, embeddingModelEndpoint, aosEndpoint, 'chatbot-index', gen_chunk=False) - elif file_type == 'pdf': + elif file_type in ['pdf', 'txt']: aos_injection(res, embeddingModelEndpoint, aosEndpoint, 'chatbot-index') if qa_enhancement == 'true': # iterate the document to get the QA pairs @@ -180,13 +182,13 @@ def main(): if __name__ == '__main__': logger.info("boto3 version: %s", boto3.__version__) - + # Set the NLTK data path to the /tmp directory for AWS Glue jobs nltk.data.path.append("/tmp") # List of NLTK packages to download nltk_packages = ['words'] # Download the required NLTK packages to /tmp for package in nltk_packages: - # download the package to /tmp/nltk_data + # Download the package to /tmp/nltk_data nltk.download(package, download_dir='/tmp/nltk_data') - main() \ No newline at end of file + main() From 38eac4f72c676f16ba74b4af1c2b57d1b8c9ea16 Mon Sep 17 00:00:00 2001 From: yike5460 Date: Wed, 8 Nov 2023 03:08:03 +0000 Subject: [PATCH 02/37] feat: multi glue job per file to increase processing concurrency --- src/etl-stack.ts | 77 +++++++++++++++++++++++++++++++-- src/lambda/etl/Dockerfile | 8 ++++ src/lambda/etl/main.py | 43 ++++++++++++++++++ src/lambda/etl/requirements.txt | 9 ++++ src/scripts/glue-job-script.py | 12 ++++- 5 files changed, 143 insertions(+), 6 deletions(-) create mode 100644 src/lambda/etl/Dockerfile create mode 100644 src/lambda/etl/main.py create mode 100644 src/lambda/etl/requirements.txt diff --git a/src/etl-stack.ts b/src/etl-stack.ts index 573dc062..081442ec 100644 --- a/src/etl-stack.ts +++ b/src/etl-stack.ts @@ -13,6 +13,10 @@ import * as s3 from 'aws-cdk-lib/aws-s3'; import * as s3assets from 'aws-cdk-lib/aws-s3-assets'; import * as s3deploy from 'aws-cdk-lib/aws-s3-deployment'; import path from "path"; +import * as lambda from 'aws-cdk-lib/aws-lambda'; +import { DockerImageCode, Architecture, DockerImageFunction} from 'aws-cdk-lib/aws-lambda'; +import { join } from "path"; +import { off } from 'process'; interface etlStackProps extends StackProps { _vpc: ec2.Vpc; @@ -110,6 +114,45 @@ export class EtlStack extends NestedStack { }); topic.addSubscription(new subscriptions.EmailSubscription(props._subEmail)); + // Lambda function to for file deduplication and glue job allocation based on file number + const lambdaETL = new DockerImageFunction(this, + "lambdaETL", { + code: DockerImageCode.fromImageAsset(join(__dirname, "../src/lambda/etl")), + timeout: Duration.minutes(15), + memorySize: 1024, + architecture: Architecture.X86_64, + }); + + lambdaETL.addToRolePolicy(new iam.PolicyStatement({ + actions: [ + // glue job + "glue:StartJobRun", + "s3:List*", + "s3:Put*", + "s3:Get*", + ], + effect: iam.Effect.ALLOW, + resources: ['*'], + } + )) + + const lambdaETLIntegration = new tasks.LambdaInvoke(this, 'lambdaETLIntegration', { + lambdaFunction: lambdaETL, + // Use the result of this invocation to decide how many Glue jobs to run + resultSelector: { + "processedPayload": { + 'batchIndices.$': '$.Payload.batchIndices', + 's3Bucket.$': '$.Payload.s3Bucket', + 's3Prefix.$': '$.Payload.s3Prefix', + 'qaEnhance.$': '$.Payload.qaEnhance', + 'offline.$': '$.Payload.offline', + } + }, + // we need the original input + resultPath: '$.TaskResult', + outputPath: '$.TaskResult.processedPayload', + }); + const offlineChoice = new sfn.Choice(this, 'Offline or Online', { comment: 'Check if the job is offline or online', }); @@ -127,9 +170,32 @@ export class EtlStack extends NestedStack { '--REGION': props._region, '--OFFLINE': 'true', '--QA_ENHANCEMENT.$': '$.qaEnhance', + // Convert the numeric index to a string + '--BATCH_INDICE.$': 'States.Format(\'{}\', $.batchIndices)', }), }); + // Define a Map state to run multiple Glue jobs in parallel based on the number of files to process + const mapState = new sfn.Map(this, 'MapState', { + // inputPath should point to the root since we want to pass the entire payload to the iterator + inputPath: '$', + // itemsPath should reference an array. We need to construct this array based on batchIndices + itemsPath: sfn.JsonPath.stringAt('$.batchIndices'), + // set the max concurrency to 0 to run all the jobs in parallel + maxConcurrency: 0, + parameters: { + // These parameters are passed to each iteration of the map state + 's3Bucket.$': '$.s3Bucket', + 's3Prefix.$': '$.s3Prefix', + 'qaEnhance.$': '$.qaEnhance', + // 'index' is a special variable within the Map state that represents the current index + 'batchIndices.$': '$$.Map.Item.Index' // Add this if you need to know the index of the current item in the map state + }, + resultPath: '$.mapResults', + }); + + mapState.iterator(offlineGlueJob); + // multiplex the same glue job to offline and online const onlineGlueJob = new tasks.GlueStartJobRun(this, 'OnlineGlueJob', { glueJobName: glueJob.jobName, @@ -154,10 +220,13 @@ export class EtlStack extends NestedStack { message: sfn.TaskInput.fromText(`Glue job ${glueJob.jobName} completed!`), }); - const sfnDefinition = offlineChoice - .when(sfn.Condition.stringEquals('$.offline', 'true'), offlineGlueJob) - .when(sfn.Condition.stringEquals('$.offline', 'false'), onlineGlueJob) - .afterwards().next(notifyTask); + offlineChoice.when(sfn.Condition.booleanEquals('$.offline', true), mapState) + .when(sfn.Condition.booleanEquals('$.offline', false), onlineGlueJob) + + // add the notify task to both online and offline branches + mapState.next(notifyTask); + + const sfnDefinition = lambdaETLIntegration.next(offlineChoice) const sfnStateMachine = new sfn.StateMachine(this, 'ETLState', { definitionBody: sfn.DefinitionBody.fromChainable(sfnDefinition), diff --git a/src/lambda/etl/Dockerfile b/src/lambda/etl/Dockerfile new file mode 100644 index 00000000..73e6adf5 --- /dev/null +++ b/src/lambda/etl/Dockerfile @@ -0,0 +1,8 @@ +FROM --platform=linux/amd64 public.ecr.aws/lambda/python:3.9 + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" + +COPY main.py ${LAMBDA_TASK_ROOT} + +CMD [ "main.lambda_handler" ] diff --git a/src/lambda/etl/main.py b/src/lambda/etl/main.py new file mode 100644 index 00000000..d1a13624 --- /dev/null +++ b/src/lambda/etl/main.py @@ -0,0 +1,43 @@ +import json +import boto3 +import logging + +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +s3_client = boto3.client('s3') + +# Offline lambda function to count the number of files in the S3 bucket +def lambda_handler(event, context): + logger.info(f"event:{event}") + # Retrieve bucket name and prefix from the event object passed by Step Function + bucket_name = event['s3Bucket'] + prefix = event['s3Prefix'] + + # Initialize the file count + file_count = 0 + + # Paginate through the list of objects in the bucket with the specified prefix + paginator = s3_client.get_paginator('list_objects_v2') + page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=prefix) + + # Count the files, note skip the prefix with slash, which is the folder name + for page in page_iterator: + for obj in page.get('Contents', []): + if obj['Key'].endswith('/'): + continue + file_count += 1 + + # convert the fileCount into an array of numbers "fileIndices": [0, 1, 2, ..., 10], an array from 0 to fileCount-1 + batch_indices = list(range(file_count)) + + # This response should match the expected input schema of the downstream tasks in the Step Functions workflow + return { + 'fileCount': file_count, + 's3Bucket': bucket_name, + 's3Prefix': prefix, + 'qaEnhance': event['qaEnhance'], + # boolean value to indicate if the lambda function is running in offline mode + 'offline': event['offline'], + 'batchIndices': batch_indices, + } diff --git a/src/lambda/etl/requirements.txt b/src/lambda/etl/requirements.txt new file mode 100644 index 00000000..e6914666 --- /dev/null +++ b/src/lambda/etl/requirements.txt @@ -0,0 +1,9 @@ +langchain==0.0.312 +opensearch-py==2.2.0 +faiss_cpu==1.7.4 +sagemaker==2.48.0 +numpy==1.22.0 +# nltk==3.8.1 +requests-aws4auth==1.2.3 +unstructured==0.10.5 +unstructured[pdf] diff --git a/src/scripts/glue-job-script.py b/src/scripts/glue-job-script.py index 122eb846..db673471 100644 --- a/src/scripts/glue-job-script.py +++ b/src/scripts/glue-job-script.py @@ -31,7 +31,7 @@ os.environ['NLTK_DATA'] = '/tmp/nltk_data' # Parse arguments -args = getResolvedOptions(sys.argv, ['JOB_NAME', 'S3_BUCKET', 'S3_PREFIX', 'AOS_ENDPOINT', 'EMBEDDING_MODEL_ENDPOINT', 'REGION', 'OFFLINE', 'QA_ENHANCEMENT']) +args = getResolvedOptions(sys.argv, ['JOB_NAME', 'S3_BUCKET', 'S3_PREFIX', 'AOS_ENDPOINT', 'EMBEDDING_MODEL_ENDPOINT', 'REGION', 'OFFLINE', 'QA_ENHANCEMENT', 'BATCH_INDICE']) s3_bucket = args['S3_BUCKET'] s3_prefix = args['S3_PREFIX'] aosEndpoint = args['AOS_ENDPOINT'] @@ -39,20 +39,28 @@ region = args['REGION'] offline = args['OFFLINE'] qa_enhancement = args['QA_ENHANCEMENT'] +# TODO, pass the bucket and prefix need to handle in current job directly +batchIndice = args['BATCH_INDICE'] ENHANCE_CHUNK_SIZE = 500 credentials = boto3.Session().get_credentials() awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, 'es', session_token=credentials.token) +# such glue job is running as map job, the batchIndice is the index per file to handle in current job def iterate_s3_files(bucket: str, prefix: str) -> Generator: paginator = s3.get_paginator('list_objects_v2') - + currentIndice = 0 for page in paginator.paginate(Bucket=bucket, Prefix=prefix): for obj in page.get('Contents', []): # skip the prefix with slash, which is the folder name if obj['Key'].endswith('/'): continue + # skip the file if the index is not in the batchIndice + if currentIndice != int(batchIndice): + logger.info("currentIndice: {}, batchIndice: {}, skip file: {}".format(currentIndice, batchIndice, obj['Key'])) + currentIndice += 1 + continue key = obj['Key'] file_type = key.split('.')[-1] # Extract file extension From b59a853f2f0b3ebd78fe3e7ed043bd49e66bcc57 Mon Sep 17 00:00:00 2001 From: Ning Date: Wed, 8 Nov 2023 15:23:21 +0800 Subject: [PATCH 03/37] fix: update ui according to backend change --- src/panel/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/panel/app.py b/src/panel/app.py index 9ac7062e..6a14a3a0 100644 --- a/src/panel/app.py +++ b/src/panel/app.py @@ -86,7 +86,7 @@ def pipeline_tab(): 's3Bucket': s3_bucket_name, 's3Prefix': s3_object_name, 'documentEnhance': documentEnhance, - 'qaPairEnhance': qaPairEnhance, + 'qaEnhance': str(qaPairEnhance), 'keyWordExtract': keyWordExtract, 'textSummarize': textSummarize, 'offline': 'true' From 5b0834a0547e91c3ce28f480cb4a48bda93da32e Mon Sep 17 00:00:00 2001 From: Ning Date: Wed, 8 Nov 2023 16:26:21 +0800 Subject: [PATCH 04/37] feat: add html loader --- src/scripts/dep/llm_bot_dep/loaders/auto.py | 2 +- src/scripts/dep/llm_bot_dep/loaders/html.py | 105 +++++++++++------- src/scripts/dep/llm_bot_dep/loaders/text.py | 2 + src/scripts/dep/llm_bot_dep/splitter_utils.py | 54 +++++---- src/scripts/glue-job-script.py | 3 + 5 files changed, 104 insertions(+), 62 deletions(-) diff --git a/src/scripts/dep/llm_bot_dep/loaders/auto.py b/src/scripts/dep/llm_bot_dep/loaders/auto.py index 37e56b57..36e7b82a 100644 --- a/src/scripts/dep/llm_bot_dep/loaders/auto.py +++ b/src/scripts/dep/llm_bot_dep/loaders/auto.py @@ -13,7 +13,7 @@ def cb_process_object(s3, file_type: str, file_content, **kwargs): elif file_type == 'csv': res = process_csv(s3, file_content, **kwargs) elif file_type == 'html': - process_html(s3, file_content, **kwargs) + res = process_html(file_content, **kwargs) elif file_type == 'pdf': # res = post_process_pdf(process_pdf(file_content, **kwargs)) res = process_pdf(s3, file_content, **kwargs) diff --git a/src/scripts/dep/llm_bot_dep/loaders/html.py b/src/scripts/dep/llm_bot_dep/loaders/html.py index 55a4ce6d..31a67acf 100644 --- a/src/scripts/dep/llm_bot_dep/loaders/html.py +++ b/src/scripts/dep/llm_bot_dep/loaders/html.py @@ -1,42 +1,67 @@ +import logging import re +import markdownify +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader +from llm_bot_dep.splitter_utils import MarkdownHeaderTextSplitter -def process_html(htmlstr: str): - logger.info("Processing HTML file...") - # filter out DOCTYPE - htmlstr = ' '.join(htmlstr.split()) - re_doctype = re.compile(r'', re.S) - s = re_doctype.sub('', htmlstr) - - # filter out CDATA - re_cdata = re.compile('//]*//\]\]>', re.I) - s = re_cdata.sub('', s) - - # filter out Script - re_script = re.compile('<\s*script[^>]*>[^<]*<\s*/\s*script\s*>', re.I) - s = re_script.sub('', s) - - # filter out style - re_style = re.compile('<\s*style[^>]*>[^<]*<\s*/\s*style\s*>', re.I) - s = re_style.sub('', s) - - # transfor br to \n - re_br = re.compile('') - s = re_br.sub('', s) - - # filter out HTML tags - re_h = re.compile('<\?[\w+[^>]*>') - s = re_h.sub('', s) - - # filter out HTML comments - re_comment = re.compile('') - s = re_comment.sub('', s) - - # remove extra blank lines - blank_line = re.compile('\n+') - s = blank_line.sub('', s) - - # remove hyperlinks - http_link = re.compile(r'(http://.+html)') - s = http_link.sub('', s) - - return s +logger = logging.getLogger(__name__) + + +class CustomHtmlLoader(BaseLoader): + """Load `HTML` files using `Unstructured`. + + You can run the loader in one of two modes: "single" and "elements". + If you use "single" mode, the document will be returned as a single + langchain Document object. If you use "elements" mode, the unstructured + library will split the document into elements such as Title and NarrativeText. + You can pass in additional unstructured kwargs after mode to apply + different unstructured settings. + + """ + + def clean_html(self, html_str: str) -> str: + # Filter out DOCTYPE + html_str = ' '.join(html_str.split()) + re_doctype = re.compile(r'', re.S) + s = re_doctype.sub('', html_str) + + # Filter out CDATA + re_cdata = re.compile('//]*//\]\]>', re.I) + s = re_cdata.sub('', s) + + # Filter out script + re_script = re.compile('<\s*script[^>]*>[^<]*<\s*/\s*script\s*>', re.I) + s = re_script.sub('', s) + + # Filter out style + re_style = re.compile('<\s*style[^>]*>[^<]*<\s*/\s*style\s*>', re.I) + s = re_style.sub('', s) + + # Filter out HTML comments + re_comment = re.compile('') + s = re_comment.sub('', s) + + # Remove extra blank lines + blank_line = re.compile('\n+') + s = blank_line.sub('\n', s) + + return s.strip() + + # def load(self, file_content: str) -> List[Document]: + def load(self, file_content: str): + file_content = self.clean_html(file_content) + file_content = markdownify.markdownify(file_content) + doc = Document(page_content=file_content, + metadata={"file_type": "html"}) + + return doc + + +def process_html(html_str: str, **kwargs): + loader = CustomHtmlLoader() + doc = loader.load(html_str) + splitter = MarkdownHeaderTextSplitter() + doc_list = splitter.split_text(doc) + + return doc_list diff --git a/src/scripts/dep/llm_bot_dep/loaders/text.py b/src/scripts/dep/llm_bot_dep/loaders/text.py index 858d6576..df8ffe1f 100644 --- a/src/scripts/dep/llm_bot_dep/loaders/text.py +++ b/src/scripts/dep/llm_bot_dep/loaders/text.py @@ -34,6 +34,7 @@ def __init__( def load(self, text_content: str) -> List[Document]: """Load from file path.""" metadata = {"source": self.file_path} + return [Document(page_content=text_content, metadata=metadata)] @@ -41,6 +42,7 @@ def pre_process_text(text_content: str) -> str: # Clean up text content text_content = re.sub(r'\s+', ' ', text_content) text_content = re.sub(r'\n+', '\n', text_content) + return text_content.strip() diff --git a/src/scripts/dep/llm_bot_dep/splitter_utils.py b/src/scripts/dep/llm_bot_dep/splitter_utils.py index eab88313..d00d9c9e 100644 --- a/src/scripts/dep/llm_bot_dep/splitter_utils.py +++ b/src/scripts/dep/llm_bot_dep/splitter_utils.py @@ -1,11 +1,10 @@ import re -from typing import Any, Dict, List, Optional, Iterator, Union +from typing import Any, Dict, Iterator, List, Optional, Union + from langchain.docstore.document import Document -from langchain.text_splitter import ( - RecursiveCharacterTextSplitter, - Language, - TextSplitter, -) +from langchain.text_splitter import (Language, RecursiveCharacterTextSplitter, + TextSplitter) + def _make_spacy_pipeline_for_splitting(pipeline: str) -> Any: # avoid importing spacy try: @@ -125,6 +124,16 @@ class MarkdownHeaderTextSplitter: def __init__(self) -> None: pass + def _is_markdown_header(self, line): + header_pattern = r'^#+\s+' + if re.match(header_pattern, line): + return True + else: + return False + + def _is_markdown_table_row(self, line): + return re.fullmatch(r'\|.*\|.*\|', line) is not None + def split_text(self, text: Document) -> List[Document]: lines = text.page_content.strip().split('\n') chunks = [] @@ -135,11 +144,25 @@ def split_text(self, text: Document) -> List[Document]: for line in lines: # Replace escaped characters for table markers + line = line.strip() line = line.replace(r"\begin{table}", "\\begin{table}").replace(r"\end{table}", "\\end{table}") - if line.strip() == "\\begin{table}": + if line in ["\\begin{table}", "\\end{table}"]: + continue + + if self._is_markdown_header(line): # Assuming these denote headings + # Save the current chunk if it exists + if current_chunk_content: + metadata = text.metadata.copy() + metadata['heading_hierarchy'] = extract_headings('\n'.join(current_chunk_content)) + metadata['chunk_id'] = f"${chunk_id}" + chunk_id += 1 # Increment chunk_id for the next chunk + chunks.append(Document(page_content='\n'.join(current_chunk_content), metadata=metadata)) + current_chunk_content = [] # Reset for the next chunk + + if self._is_markdown_table_row(line): inside_table = True - continue # Skip this line - elif line.strip() == "\\end{table}": + elif inside_table: + # The first line under a table inside_table = False # Save table content as a separate document if table_content: @@ -148,21 +171,10 @@ def split_text(self, text: Document) -> List[Document]: metadata['chunk_id'] = f"${chunk_id}" chunks.append(Document(page_content='\n'.join(table_content), metadata=metadata)) table_content = [] # Reset for the next table - continue # Skip this line if inside_table: table_content.append(line) - elif line.startswith(('## ', ' ### ')): # Assuming these denote headings - # Save the current chunk if it exists - if current_chunk_content: - metadata = text.metadata.copy() - metadata['heading_hierarchy'] = extract_headings('\n'.join(current_chunk_content)) - metadata['chunk_id'] = f"${chunk_id}" - chunk_id += 1 # Increment chunk_id for the next chunk - chunks.append(Document(page_content='\n'.join(current_chunk_content), metadata=metadata)) - current_chunk_content = [] # Reset for the next chunk - - if not inside_table: + else: current_chunk_content.append(line) # Save the last chunk if it exists diff --git a/src/scripts/glue-job-script.py b/src/scripts/glue-job-script.py index faa55eb9..1632ac18 100644 --- a/src/scripts/glue-job-script.py +++ b/src/scripts/glue-job-script.py @@ -106,6 +106,7 @@ def aos_injection(content: List[Document], embeddingModelEndpoint: str, aosEndpo aosEndpoint (str): The endpoint of the AOS. index_name (str): The name of the index to be created in the AOS. chunk_size (int): The size of each chunk to be indexed in the AOS. + gen_chunk (bool): Whether generate chunks or not. Returns: @@ -165,6 +166,8 @@ def main(): if file_type == 'csv': # CSV page document has been splited into chunk, no more spliting is needed aos_injection(res, embeddingModelEndpoint, aosEndpoint, 'chatbot-index', gen_chunk=False) + elif file_type == 'html': + aos_injection(res, embeddingModelEndpoint, aosEndpoint, 'chatbot-index') elif file_type in ['pdf', 'txt']: aos_injection(res, embeddingModelEndpoint, aosEndpoint, 'chatbot-index') if qa_enhancement == 'true': From fc7d9d9fe35c7b998b74dd963399f6ff0465f842 Mon Sep 17 00:00:00 2001 From: Ning Date: Wed, 8 Nov 2023 18:12:41 +0800 Subject: [PATCH 05/37] chore: update heading style --- src/scripts/dep/llm_bot_dep/loaders/html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scripts/dep/llm_bot_dep/loaders/html.py b/src/scripts/dep/llm_bot_dep/loaders/html.py index 31a67acf..38386fb6 100644 --- a/src/scripts/dep/llm_bot_dep/loaders/html.py +++ b/src/scripts/dep/llm_bot_dep/loaders/html.py @@ -51,7 +51,7 @@ def clean_html(self, html_str: str) -> str: # def load(self, file_content: str) -> List[Document]: def load(self, file_content: str): file_content = self.clean_html(file_content) - file_content = markdownify.markdownify(file_content) + file_content = markdownify.markdownify(file_content, heading_style="ATX") doc = Document(page_content=file_content, metadata={"file_type": "html"}) From 64280ce7f73acdbebe03c6158460893ecd4cfea3 Mon Sep 17 00:00:00 2001 From: yike5460 Date: Thu, 9 Nov 2023 07:29:35 +0000 Subject: [PATCH 06/37] feat: redundancy object processing removal --- src/etl-stack.ts | 24 ++++++++++++++ src/main.ts | 1 + src/scripts/glue-job-script.py | 57 +++++++++++++++++++++++++++++----- 3 files changed, 75 insertions(+), 7 deletions(-) diff --git a/src/etl-stack.ts b/src/etl-stack.ts index 0049fbf6..a8697d1f 100644 --- a/src/etl-stack.ts +++ b/src/etl-stack.ts @@ -12,6 +12,7 @@ import * as subscriptions from 'aws-cdk-lib/aws-sns-subscriptions'; import * as s3 from 'aws-cdk-lib/aws-s3'; import * as s3assets from 'aws-cdk-lib/aws-s3-assets'; import * as s3deploy from 'aws-cdk-lib/aws-s3-deployment'; +import * as dynamodb from 'aws-cdk-lib/aws-dynamodb'; import path from "path"; import * as lambda from 'aws-cdk-lib/aws-lambda'; import { DockerImageCode, Architecture, DockerImageFunction} from 'aws-cdk-lib/aws-lambda'; @@ -32,6 +33,7 @@ export class EtlStack extends NestedStack { _sfnOutput; _jobName; _jobArn; + _processedObjectsTable; constructor(scope: Construct, id: string, props: etlStackProps) { super(scope, id, props); @@ -42,6 +44,24 @@ export class EtlStack extends NestedStack { securityGroups: [props._securityGroups], }); + const table = new dynamodb.Table(this, 'ProcessedObjects', { + partitionKey: { name: 'ObjectKey', type: dynamodb.AttributeType.STRING }, + billingMode: dynamodb.BillingMode.PAY_PER_REQUEST, + }); + + table.addGlobalSecondaryIndex({ + indexName: 'BucketAndPrefixIndex', + partitionKey: { name: 'Bucket', type: dynamodb.AttributeType.STRING }, + sortKey: { name: 'Prefix', type: dynamodb.AttributeType.STRING }, + }); + + // Add ExpiryTimestamp as an attribute but not as a sort key in the base table + table.addGlobalSecondaryIndex({ + indexName: 'ExpiryTimestampIndex', + partitionKey: { name: 'ExpiryTimestamp', type: dynamodb.AttributeType.NUMBER }, + // No sort key for this index + }); + const _S3Bucket = new s3.Bucket(this, 'llm-bot-glue-lib', { bucketName: `llm-bot-glue-lib-${Aws.ACCOUNT_ID}-${Aws.REGION}`, blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL, @@ -61,6 +81,7 @@ export class EtlStack extends NestedStack { // the role is used by the glue job to access AOS and by default it has 1 hour session duration which is not enough for the glue job to finish the embedding injection maxSessionDuration: Duration.hours(12), }); + // TODO, narrow down the policy to specific resources and actions glueRole.addToPrincipalPolicy( new iam.PolicyStatement({ actions: [ @@ -70,6 +91,7 @@ export class EtlStack extends NestedStack { "es:*", "glue:*", "ec2:*", + "dynamodb:*", // cloudwatch logs "logs:*", ], @@ -172,6 +194,7 @@ export class EtlStack extends NestedStack { '--QA_ENHANCEMENT.$': '$.qaEnhance', // Convert the numeric index to a string '--BATCH_INDICE.$': 'States.Format(\'{}\', $.batchIndices)', + '--ProcessedObjectsTable': table.tableName, }), }); @@ -239,5 +262,6 @@ export class EtlStack extends NestedStack { this._sfnOutput = sfnStateMachine; this._jobName = glueJob.jobName; this._jobArn = glueJob.jobArn; + this._processedObjectsTable = table.tableName } } \ No newline at end of file diff --git a/src/main.ts b/src/main.ts index 841ec0fd..a48bf19f 100644 --- a/src/main.ts +++ b/src/main.ts @@ -93,6 +93,7 @@ export class RootStack extends Stack { new CfnOutput(this, 'Cross Model Endpoint', {value:_LLMStack._crossEndPoint || 'No Cross Endpoint Created'}); new CfnOutput(this, 'Embedding Model Endpoint', {value:_LLMStack._embeddingEndPoint || 'No Embedding Endpoint Created'}); new CfnOutput(this, 'Instruct Model Endpoint', {value:_LLMStack._instructEndPoint || 'No Instruct Endpoint Created'}); + new CfnOutput(this, 'Processed Object Table', {value:_EtlStack._processedObjectsTable}); } } diff --git a/src/scripts/glue-job-script.py b/src/scripts/glue-job-script.py index faa55eb9..6c62ac9e 100644 --- a/src/scripts/glue-job-script.py +++ b/src/scripts/glue-job-script.py @@ -1,8 +1,10 @@ import os import boto3 +from boto3.dynamodb.conditions import Key, Attr import sys import logging import itertools +import time from typing import Generator, Any, Dict, Iterable, List, Optional, Tuple import nltk @@ -23,15 +25,13 @@ logger = logging.getLogger() logger.setLevel(logging.INFO) -s3 = boto3.client('s3') - # Adaption to allow nougat to run in AWS Glue with writable /tmp os.environ['TRANSFORMERS_CACHE'] = '/tmp/transformers_cache' os.environ['NOUGAT_CHECKPOINT'] = '/tmp/nougat_checkpoint' os.environ['NLTK_DATA'] = '/tmp/nltk_data' # Parse arguments -args = getResolvedOptions(sys.argv, ['JOB_NAME', 'S3_BUCKET', 'S3_PREFIX', 'AOS_ENDPOINT', 'EMBEDDING_MODEL_ENDPOINT', 'REGION', 'OFFLINE', 'QA_ENHANCEMENT', 'BATCH_INDICE']) +args = getResolvedOptions(sys.argv, ['JOB_NAME', 'S3_BUCKET', 'S3_PREFIX', 'AOS_ENDPOINT', 'EMBEDDING_MODEL_ENDPOINT', 'REGION', 'OFFLINE', 'QA_ENHANCEMENT', 'BATCH_INDICE', 'ProcessedObjectsTable']) s3_bucket = args['S3_BUCKET'] s3_prefix = args['S3_PREFIX'] aosEndpoint = args['AOS_ENDPOINT'] @@ -41,8 +41,15 @@ qa_enhancement = args['QA_ENHANCEMENT'] # TODO, pass the bucket and prefix need to handle in current job directly batchIndice = args['BATCH_INDICE'] +processedObjectsTable = args['ProcessedObjectsTable'] + +s3 = boto3.client('s3') +dynamodb = boto3.resource('dynamodb') +table = dynamodb.Table(processedObjectsTable) ENHANCE_CHUNK_SIZE = 500 +# Make it 60s for debugging purpose +OBJECT_EXPIRY_TIME = 60 credentials = boto3.Session().get_credentials() awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, 'es', session_token=credentials.token) @@ -53,17 +60,46 @@ def iterate_s3_files(bucket: str, prefix: str) -> Generator: currentIndice = 0 for page in paginator.paginate(Bucket=bucket, Prefix=prefix): for obj in page.get('Contents', []): + key = obj['Key'] # skip the prefix with slash, which is the folder name - if obj['Key'].endswith('/'): + if key.endswith('/'): + continue + # Truncate to seconds with round() + current_time = int(round(time.time())) + # Check for redundancy and expiry + response = table.query( + KeyConditionExpression = Key('ObjectKey').eq(key), + ScanIndexForward=False, # Sort by ProcessTimestamp in descending order + Limit=1 # We only need the latest record + ) + + # If the object is found and has not expired, skip processing + if response['Items'] and response['Items'][0]['ExpiryTimestamp'] > current_time: + logger.info(f"Object {key} has not expired yet and will be skipped.") continue + # skip the file if the index is not in the batchIndice if currentIndice != int(batchIndice): - logger.info("currentIndice: {}, batchIndice: {}, skip file: {}".format(currentIndice, batchIndice, obj['Key'])) + logger.info("currentIndice: {}, batchIndice: {}, skip file: {}".format(currentIndice, batchIndice, key)) currentIndice += 1 continue - key = obj['Key'] - file_type = key.split('.')[-1] # Extract file extension + # Record the processing of the S3 object with an updated expiry timestamp, and each job only update single object in table. TODO, current assume the object will be handled successfully + expiry_timestamp = current_time + OBJECT_EXPIRY_TIME + try: + table.put_item( + Item={ + 'ObjectKey': key, + 'ProcessTimestamp': current_time, + 'Bucket': bucket, + 'Prefix': '/'.join(key.split('/')[:-1]), + 'ExpiryTimestamp': expiry_timestamp + } + ) + except Exception as e: + logger.error(f"Error recording processed of S3 object {key}: {e}") + + file_type = key.split('.')[-1] # Extract file extension response = s3.get_object(Bucket=bucket, Key=key) file_content = response['Body'].read() # assemble bucket and key as args for the callback function @@ -71,20 +107,27 @@ def iterate_s3_files(bucket: str, prefix: str) -> Generator: if file_type in ['txt']: yield 'txt', file_content.decode('utf-8'), kwargs + break elif file_type in ['csv']: # Update row count here, the default row count is 1 kwargs['csv_row_count'] = 1 yield 'csv', file_content.decode('utf-8'), kwargs + break elif file_type in ['html']: yield 'html', file_content.decode('utf-8'), kwargs + break elif file_type in ['pdf']: yield 'pdf', file_content, kwargs + break elif file_type in ['jpg', 'png']: yield 'image', file_content, kwargs + break elif file_type in ['docx', 'doc']: yield 'doc', file_content.decode('utf-8'), kwargs + break else: logger.info(f"Unknown file type: {file_type}") + break def batch_generator(generator, batch_size: int): iterator = iter(generator) From 78f9f2b9df38651af14dbb835ba377351111449f Mon Sep 17 00:00:00 2001 From: Xu Han Date: Thu, 9 Nov 2023 08:15:27 +0000 Subject: [PATCH 07/37] fix: fix offline trigger in panel --- src/panel/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/panel/app.py b/src/panel/app.py index 6a14a3a0..31af707b 100644 --- a/src/panel/app.py +++ b/src/panel/app.py @@ -89,7 +89,7 @@ def pipeline_tab(): 'qaEnhance': str(qaPairEnhance), 'keyWordExtract': keyWordExtract, 'textSummarize': textSummarize, - 'offline': 'true' + 'offline': True } # send button to trigger the request sending to the endpoint with s3_bucket_name and s3_object_name as request body, in conform with send_button = st.button('Start Offline Job') From 5c510883909dcfec442d39e373b38975c9a666fb Mon Sep 17 00:00:00 2001 From: Xu Han Date: Thu, 9 Nov 2023 09:00:16 +0000 Subject: [PATCH 08/37] fix: fix aos_utils in eecutor lambda --- src/lambda/executor/utils/aos_utils.py | 4 +- src/scripts/dep/llm_bot_dep/loaders/pdf.py | 275 +-------------------- 2 files changed, 3 insertions(+), 276 deletions(-) diff --git a/src/lambda/executor/utils/aos_utils.py b/src/lambda/executor/utils/aos_utils.py index 752915a0..d0c273b9 100644 --- a/src/lambda/executor/utils/aos_utils.py +++ b/src/lambda/executor/utils/aos_utils.py @@ -121,13 +121,13 @@ def organize_results(self, query_type, response, field): if query_type == "exact": for aos_hit in aos_hits: doc = aos_hit['_source'][field] - source = aos_hit['_source']['metadata']['source'] + source = aos_hit['_source']['metadata']['file_path'] score = aos_hit["_score"] results.append({'doc': doc, 'score': score, 'source': source}) else: for aos_hit in aos_hits: doc = f"{aos_hit['_source'][field]}" - source = aos_hit['_source']['metadata']['source'] + source = aos_hit['_source']['metadata']['file_path'] score = aos_hit["_score"] results.append({'doc': doc, 'score': score, 'source': source}) return results diff --git a/src/scripts/dep/llm_bot_dep/loaders/pdf.py b/src/scripts/dep/llm_bot_dep/loaders/pdf.py index a7eb9863..aa4598e4 100644 --- a/src/scripts/dep/llm_bot_dep/loaders/pdf.py +++ b/src/scripts/dep/llm_bot_dep/loaders/pdf.py @@ -11,6 +11,7 @@ from langchain.document_loaders import PDFMinerPDFasHTMLLoader from langchain.document_loaders.pdf import BasePDFLoader +from ..splitter_utils import extract_headings, MarkdownHeaderTextSplitter # from langchain.text_splitter import MarkdownHeaderTextSplitter logging.basicConfig(level=logging.INFO) @@ -26,115 +27,6 @@ "summary": "", } -class NestedDict(dict): - def __missing__(self, key): - self[key] = NestedDict() - return self[key] - - -# rewrite this class to use the new TextSplitter for mmd type -class MarkdownHeaderTextSplitter: - # Place holder for now without parameters - def __init__(self) -> None: - pass - - def split_text(self, text: Document) -> List[Document]: - lines = text.page_content.strip().split('\n') - chunks = [] - current_chunk_content = [] - table_content = [] - inside_table = False - chunk_id = 1 # Initializing chunk_id - - for line in lines: - # Replace escaped characters for table markers - line = line.replace(r"\begin{table}", "\\begin{table}").replace(r"\end{table}", "\\end{table}") - if line.strip() == "\\begin{table}": - inside_table = True - continue # Skip this line - elif line.strip() == "\\end{table}": - inside_table = False - # Save table content as a separate document - if table_content: - metadata = text.metadata.copy() - metadata['content_type'] = 'table' - metadata['chunk_id'] = f"${chunk_id}" - chunks.append(Document(page_content='\n'.join(table_content), metadata=metadata)) - table_content = [] # Reset for the next table - continue # Skip this line - - if inside_table: - table_content.append(line) - elif line.startswith(('## ', ' ### ')): # Assuming these denote headings - # Save the current chunk if it exists - if current_chunk_content: - metadata = text.metadata.copy() - metadata['heading_hierarchy'] = extract_headings('\n'.join(current_chunk_content)) - metadata['chunk_id'] = f"${chunk_id}" - chunk_id += 1 # Increment chunk_id for the next chunk - chunks.append(Document(page_content='\n'.join(current_chunk_content), metadata=metadata)) - current_chunk_content = [] # Reset for the next chunk - - if not inside_table: - current_chunk_content.append(line) - - # Save the last chunk if it exists - if current_chunk_content: - metadata = text.metadata.copy() - metadata['heading_hierarchy'] = extract_headings('\n'.join(current_chunk_content)) - metadata['chunk_id'] = f"${chunk_id}" - chunks.append(Document(page_content='\n'.join(current_chunk_content), metadata=metadata)) - - return chunks - - -# TODO, this function is duplicated in splitter_utils.py, need to merge to one place -def extract_headings(md_content): - """Extract headings hierarchically from Markdown content. - Consider alternate syntax that "any number of == characters for heading level 1 or -- characters for heading level 2." - See https://www.markdownguide.org/basic-syntax/ - Args: - md_content (str): Markdown content. - Returns: - NestedDict: A nested dictionary containing the headings. Sample output: - { - 'Title 1': { - 'Subtitle 1.1': {}, - 'Subtitle 1.2': {} - }, - 'Title 2': { - 'Subtitle 2.1': {} - } - } - """ - headings = NestedDict() - current_heads = [headings] - lines = md_content.strip().split('\n') - - for i, line in enumerate(lines): - match = re.match(r'(#+) (.+)', line) - if not match and i > 0: # If the line is not a heading, check if the previous line is a heading using alternate syntax - if re.match(r'=+', lines[i - 1]): - level = 1 - title = lines[i - 2] - elif re.match(r'-+', lines[i - 1]): - level = 2 - title = lines[i - 2] - else: - continue - elif match: - level = len(match.group(1)) - title = match.group(2) - else: - continue - - current_heads = current_heads[:level] - current_heads[-1][title] - current_heads.append(current_heads[-1][title]) - - return headings - - class NougatPDFLoader(BasePDFLoader): """A PDF loader class for converting PDF files to MMD. @@ -237,122 +129,6 @@ def lazy_load(self) -> Iterator[Document]: # logger.info(f"An error occurred while processing the PDF: {str(e)}") -def fontsize_mapping(heading_fonts_arr): - heading_fonts_set = list(set(heading_fonts_arr)) - heading_fonts_set.sort(reverse=True) - idxs = range(len(heading_fonts_set)) - font_idx_mapping = dict(zip(heading_fonts_set,idxs)) - return font_idx_mapping - -def link_header(semantic_snippets): - """ - Processes a list of semantic snippets to organize and structure the header information based on font size, - and then outputs the structured data as a JSON string. - - Parameters: - semantic_snippets (list): A list of objects where each object has a 'metadata' attribute containing 'heading_font' and 'heading' fields. - - Returns: - str: A JSON string representing the structured header and content information of each snippet. - """ - heading_fonts_arr = [ item.metadata['heading_font'] for item in semantic_snippets ] - heading_arr = [ item.metadata['heading'] for item in semantic_snippets ] - fontsize_dict = fontsize_mapping(heading_fonts_arr) - - snippet_arr = [] - for idx, snippet in enumerate(semantic_snippets): - font_size = heading_fonts_arr[idx] - heading_stack = [] - heading_info = {"font_size":heading_fonts_arr[idx], "heading":heading_arr[idx], "fontsize_idx" : fontsize_dict[font_size]} - heading_stack.append(heading_info) - for id in range(0,idx)[::-1]: - if font_size < heading_fonts_arr[id]: - font_size = heading_fonts_arr[id] - heading_info = {"font_size":font_size, "heading":heading_arr[id], "fontsize_idx" : fontsize_dict[font_size]} - heading_stack.append(heading_info) - - snippet_info = { - "heading" : heading_stack, - "content" : snippet.page_content - } - snippet_arr.append(snippet_info) - - json_arr = json.dumps(snippet_arr, ensure_ascii=False) - return json_arr - -def parse_pdf_to_json(file_content): - """ - Credit to https://python.langchain.com/docs/modules/data_connection/document_loaders/pdf, parses the content of a PDF file converted to HTML format, organizing text segments semantically based on their font size. - - Parameters: - file_content (str): The HTML content of the converted PDF file. - - Returns: - list: A list of Document objects, each representing a semantically grouped section of the PDF file. Each Document object contains a metadata dictionary with details about the heading and content font sizes, and a page_content string with the text content of that section. - - Notes: - - Assumes that headings have a larger font size than their respective content. - - It first iterates through all the text segments, grouping consecutive segments with the same font size together. - - Then, iterates through these grouped segments, identifying new headings based on a change in font size, and grouping the content under these headings. - - The function is designed to work with a specific HTML structure and may not work as expected with differently structured HTML. - """ - soup = BeautifulSoup(file_content,'html.parser') - content = soup.find_all('div') - - cur_fs = None - cur_text = '' - snippets = [] # first collect all snippets that have the same font size - for c in content: - sp = c.find('span') - if not sp: - continue - st = sp.get('style') - if not st: - continue - fs = re.findall('font-size:(\d+)px',st) - if not fs: - continue - fs = int(fs[0]) - if not cur_fs: - cur_fs = fs - if fs == cur_fs: - cur_text += c.text - else: - snippets.append((cur_text,cur_fs)) - cur_fs = fs - cur_text = c.text - snippets.append((cur_text,cur_fs)) - - cur_idx = -1 - semantic_snippets = [] - # Assumption: headings have higher font size than their respective content - for s in snippets: - # if current snippet's font size > previous section's heading => it is a new heading - if not semantic_snippets or s[1] > semantic_snippets[cur_idx].metadata['heading_font']: - metadata={'heading':s[0], 'content_font': 0, 'heading_font': s[1]} - #metadata.update(data.metadata) - semantic_snippets.append(Document(page_content='',metadata=metadata)) - cur_idx += 1 - continue - - # if current snippet's font size <= previous section's content => content belongs to the same section (one can also create - # a tree like structure for sub sections if needed but that may require some more thinking and may be data specific) - if not semantic_snippets[cur_idx].metadata['content_font'] or s[1] <= semantic_snippets[cur_idx].metadata['content_font']: - semantic_snippets[cur_idx].page_content += s[0] - semantic_snippets[cur_idx].metadata['content_font'] = max(s[1], semantic_snippets[cur_idx].metadata['content_font']) - continue - - # if current snippet's font size > previous section's content but less tha previous section's heading than also make a new - # section (e.g. title of a pdf will have the highest font size but we don't want it to subsume all sections) - metadata={'heading':s[0], 'content_font': 0, 'heading_font': s[1]} - #metadata.update(data.metadata) - semantic_snippets.append(Document(page_content='',metadata=metadata)) - cur_idx += 1 - - res = link_header(semantic_snippets) - return res - - def process_pdf(s3, pdf: bytes, **kwargs): """ Process a given PDF file and extracts structured information from it. @@ -393,52 +169,3 @@ def process_pdf(s3, pdf: bytes, **kwargs): for i, doc in enumerate(md_header_splits): logger.info("PDF file processed successfully, with content of chunk %s: %s", i, doc) return md_header_splits - -def post_process_pdf(s3, pdf: str): - """ - Transforms a given string of a specific format into a desired formatted string. - - The function extracts the 'page_content' value from the input string and - constructs a new string in a JSON-like format with specific hardcoded values - and the extracted 'page_content' value. - - Parameters: - ----------- - original_string : str - The input string to be transformed. Sample: - str: A string formatted in the desired JSON-like structure. Sample: - [ - { - "heading": [ - { - "font_size": 10, - "heading": "5\n1\n0\n2\ny\na\nM\n8\n1\n", - "fontsize_idx": 2 - } - ], - "content": "this is the content\n" - } - ... - ] - Returns: - -------- - str: A string to conform to AOS embedding wrapper. Sample: - List[Document] - [Document(page_content='this is the content', metadata={'source': '/tmp/tmpghff3i39/xx/dth.txt', 'timestamp': 1697513348.1026106, 'embeddings_model': 'embedding-endpoint'})] - """ - logger.info("Post-processing PDF file %s", pdf) - # Parse the input string to a Python data structure - input_data = json.loads(pdf) - # Create an empty list to hold the Document objects - documents: List[Document] = [] - - # Iterate through the parsed data, creating Document objects for each item - for item in input_data: - page_content = item['content'] - # Assuming some default metadata; adjust as necessary - metadata = {'source': 'unknown', 'fontsize': item['heading'][0]['font_size'], 'heading': item['heading'][0]['heading'], 'fontsize_idx': item['heading'][0]['fontsize_idx']} - doc = Document(page_content=page_content, metadata=metadata) - documents.append(doc) - - logger.info("Post-processing PDF with result %s", documents) - return documents From d14e28c0e409b9cb28c9fa0af549083350d3bb20 Mon Sep 17 00:00:00 2001 From: yike5460 Date: Thu, 9 Nov 2023 09:11:53 +0000 Subject: [PATCH 09/37] fix: adjust the redundancy removal logic to avoid unexpected object processing --- src/scripts/dep/llm_bot_dep/loader_utils.py | 194 -------------------- src/scripts/glue-job-script.py | 18 +- 2 files changed, 9 insertions(+), 203 deletions(-) delete mode 100644 src/scripts/dep/llm_bot_dep/loader_utils.py diff --git a/src/scripts/dep/llm_bot_dep/loader_utils.py b/src/scripts/dep/llm_bot_dep/loader_utils.py deleted file mode 100644 index d956ec19..00000000 --- a/src/scripts/dep/llm_bot_dep/loader_utils.py +++ /dev/null @@ -1,194 +0,0 @@ -import re -import logging -import subprocess -from pathlib import Path -from typing import Dict, List, Optional, Iterator, Sequence -from langchain.document_loaders.pdf import BasePDFLoader -from langchain.docstore.document import Document -import csv -from io import TextIOWrapper -# from langchain.text_splitter import MarkdownHeaderTextSplitter -# from splitter_utils import MarkdownHeaderTextSplitter - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -# local debugging purpose -# if __name__ == "__main__": -# markdown_document = r""" -# # Learning to Retrieve In-Context Examples for Large Language Models - -# ###### Abstract - -# aaaa - -# ## 1 Introduction - -# 1111 - -# ## 2 Related Work - -# 2222 - -# ## 3 Preliminaries - -# 3333 - -# ## 4 Methodology - -# 4444 - -# ### Training Data Generation - -# 5555 - -# ### Reward Modeling - -# 6666 - -# ### Training LLM Retrievers with Knowledge Distillation - -# 7777 - -# ### Evaluation of LLM Retrievers - -# 8888 - -# ## 5 Experiments - -# ### Evaluation Setup - -# 9999 - -# ### Main Results - -# 0000 - -# \begin{table} -# This is table content -# \end{table} - -# ### Training Pipeline of LLM-R - -# 1010 - -# ### Generalization Ability of LLM-R - -# 1212 - -# ### When does LLM-R Work and When Does it Not? - -# 1313 - -# ### Using Different LLMs for Data Generation and Task Evaluation - -# 1414 - -# ### Scaling the Number of In-Context Examples and Retriever Size - -# 1515 - -# ## 7 Conclusion - -# 1616 - -# ## Limitations - -# 1717 - -# ## References - -# 1818 -# """ -# markdown_splitter = MarkdownHeaderTextSplitter() - -# # construct a fake document data -# data = [Document(page_content=markdown_document, metadata=metadata_template)] -# md_header_splits = markdown_splitter.split_text(data[0]) -# for i, doc in enumerate(md_header_splits): -# logger.info("content of chunk %s: %s", i, doc) - - # local pdf file in current folder - # loader = NougatPDFLoader('1.pdf') - # data = loader.load() - # logger.info("raw data: %s", data) - # md_header_splits = markdown_splitter.split_text(data[0]) - # for i, doc in enumerate(md_header_splits): - # logger.info("content of chunk %s: %s", i, doc) - - # official splits will be deprecated by the new MarkdownHeaderTextSplitter - # markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on) - # headers_to_split_on = [ - # ("#", "Header 1"), - # ("##", "Header 2"), - # ] - # markdown_document = "# Foo\n\n ## Bar\n\nHi this is Jim\n\nHi this is Joe\n\n ### Boo \n\n Hi this is Lance \n\n ## Baz\n\n Hi this is Molly" - # md_header_splits = markdown_splitter.split_text(markdown_document) - - # Char-level splits - # from langchain.text_splitter import RecursiveCharacterTextSplitter - - # chunk_size = 250 - # chunk_overlap = 30 - # text_splitter = RecursiveCharacterTextSplitter( - # chunk_size=chunk_size, chunk_overlap=chunk_overlap - # ) - - # splits = text_splitter.split_documents(md_header_splits) - # logger.info("splits: %s", splits) - # from typing import Generator - # import itertools - # from langchain.text_splitter import RecursiveCharacterTextSplitter - # def chunk_generator(content: List[Document], chunk_size: int = 500, chunk_overlap: int = 30) -> Generator[Document, None, None]: - # text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) - # for document in content: - # splits = text_splitter.split_documents([document]) - # # list of Document objects - # for split in splits: - # yield split - # def batch_generator(generator, batch_size): - # while True: - # batch = list(itertools.islice(generator, batch_size)) - # if not batch: - # break - # yield batch - - # generator = chunk_generator(md_header_splits, ) - # batches = batch_generator(generator, batch_size=10) - # logger.info("current batch size: {} and next batch size: {}".format(len(next(batches)), len(next(batches)))) - # # note: typeof(batch)->list[Document], sizeof(batch)=batch_size - # for batch in batches: - # logger.info("batch: %s", batch) - - -# TODO: Local debug CSV loader, remove it before release -# if __name__ == "__main__": -# import uuid -# import boto3 -# from datetime import datetime - -# s3 = boto3.client('s3') -# now = datetime.now() -# timestamp_str = now.strftime("%Y%m%d%H%M%S") -# print(timestamp_str) -# random_uuid = str(uuid.uuid4())[:8] -# print(random_uuid) - -# def process_csv(csv_content: str, kwargs): -# bucket_name = kwargs['bucket'] -# key = kwargs['key'] -# local_path = f'/temp-{timestamp_str}-{random_uuid}.csv' -# s3.download_file(bucket_name, key, local_path) - -# # loader = CustomCSVLoader(file_path=local_path, row_count=1) -# # loader = CustomCSVLoader(file_path=local_path, row_count=999) -# loader = CustomCSVLoader(file_path=local_path, row_count=2) -# # loader = CustomCSVLoader(file_path=local_path, row_count=3) -# data = loader.load() -# # print(data) - -# # TSV -# # process_csv("x", {'bucket': '', 'key': 'athena_results/OrderTable.tsv'}) -# # CSV -# process_csv("x", {'bucket': '', 'key': 'athena_results/sdps-api-test-s3-key-58h54muj.csv'}) diff --git a/src/scripts/glue-job-script.py b/src/scripts/glue-job-script.py index addaae20..eb230216 100644 --- a/src/scripts/glue-job-script.py +++ b/src/scripts/glue-job-script.py @@ -48,8 +48,8 @@ table = dynamodb.Table(processedObjectsTable) ENHANCE_CHUNK_SIZE = 500 -# Make it 60s for debugging purpose -OBJECT_EXPIRY_TIME = 60 +# Make it 3600s for debugging purpose +OBJECT_EXPIRY_TIME = 3600 credentials = boto3.Session().get_credentials() awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, 'es', session_token=credentials.token) @@ -64,6 +64,13 @@ def iterate_s3_files(bucket: str, prefix: str) -> Generator: # skip the prefix with slash, which is the folder name if key.endswith('/'): continue + + # skip the file if the index is not in the batchIndice + if currentIndice != int(batchIndice): + logger.info("currentIndice: {}, batchIndice: {}, skip file: {}".format(currentIndice, batchIndice, key)) + currentIndice += 1 + continue + # Truncate to seconds with round() current_time = int(round(time.time())) # Check for redundancy and expiry @@ -78,12 +85,6 @@ def iterate_s3_files(bucket: str, prefix: str) -> Generator: logger.info(f"Object {key} has not expired yet and will be skipped.") continue - # skip the file if the index is not in the batchIndice - if currentIndice != int(batchIndice): - logger.info("currentIndice: {}, batchIndice: {}, skip file: {}".format(currentIndice, batchIndice, key)) - currentIndice += 1 - continue - # Record the processing of the S3 object with an updated expiry timestamp, and each job only update single object in table. TODO, current assume the object will be handled successfully expiry_timestamp = current_time + OBJECT_EXPIRY_TIME try: @@ -127,7 +128,6 @@ def iterate_s3_files(bucket: str, prefix: str) -> Generator: break else: logger.info(f"Unknown file type: {file_type}") - break def batch_generator(generator, batch_size: int): iterator = iter(generator) From 1efe2c2cae181e325d8a9ca012695f279e738b0a Mon Sep 17 00:00:00 2001 From: Ning Date: Thu, 9 Nov 2023 21:15:21 +0800 Subject: [PATCH 10/37] feat: support docx and markdown --- src/etl-stack.ts | 8 +-- src/scripts/dep/llm_bot_dep/loaders/auto.py | 6 ++ src/scripts/dep/llm_bot_dep/loaders/docx.py | 59 +++++++++++++++++++ src/scripts/dep/llm_bot_dep/loaders/html.py | 4 ++ .../dep/llm_bot_dep/loaders/markdown.py | 49 +++++++++++++++ src/scripts/dep/setup.py | 3 +- src/scripts/glue-job-script.py | 10 ++-- 7 files changed, 130 insertions(+), 9 deletions(-) create mode 100644 src/scripts/dep/llm_bot_dep/loaders/docx.py create mode 100644 src/scripts/dep/llm_bot_dep/loaders/markdown.py diff --git a/src/etl-stack.ts b/src/etl-stack.ts index a8697d1f..4ee7bfa5 100644 --- a/src/etl-stack.ts +++ b/src/etl-stack.ts @@ -123,7 +123,7 @@ export class EtlStack extends NestedStack { '--REGION': props._region, '--EMBEDDING_MODEL_ENDPOINT': props._embeddingEndpoint, '--DOC_INDEX_TABLE': 'chatbot-index', - '--additional-python-modules': 'langchain==0.0.312,beautifulsoup4==4.12.2,requests-aws4auth==1.2.3,boto3==1.28.69,openai==0.28.1,nougat-ocr==0.1.17,pyOpenSSL==23.3.0,tenacity==8.2.3,markdownify==0.11.6', + '--additional-python-modules': 'langchain==0.0.312,beautifulsoup4==4.12.2,requests-aws4auth==1.2.3,boto3==1.28.69,openai==0.28.1,nougat-ocr==0.1.17,pyOpenSSL==23.3.0,tenacity==8.2.3,markdownify==0.11.6,mammoth==1.6.0', // add multiple extra python files '--extra-py-files': extraPythonFilesList } @@ -143,9 +143,9 @@ export class EtlStack extends NestedStack { timeout: Duration.minutes(15), memorySize: 1024, architecture: Architecture.X86_64, - }); + }); - lambdaETL.addToRolePolicy(new iam.PolicyStatement({ + lambdaETL.addToRolePolicy(new iam.PolicyStatement({ actions: [ // glue job "glue:StartJobRun", @@ -155,7 +155,7 @@ export class EtlStack extends NestedStack { ], effect: iam.Effect.ALLOW, resources: ['*'], - } + } )) const lambdaETLIntegration = new tasks.LambdaInvoke(this, 'lambdaETLIntegration', { diff --git a/src/scripts/dep/llm_bot_dep/loaders/auto.py b/src/scripts/dep/llm_bot_dep/loaders/auto.py index 36e7b82a..37d8f354 100644 --- a/src/scripts/dep/llm_bot_dep/loaders/auto.py +++ b/src/scripts/dep/llm_bot_dep/loaders/auto.py @@ -1,5 +1,7 @@ +from llm_bot_dep.loaders.docx import process_doc +from llm_bot_dep.loaders.markdown import process_md from .text import process_text from .csv import process_csv from .html import process_html @@ -14,6 +16,10 @@ def cb_process_object(s3, file_type: str, file_content, **kwargs): res = process_csv(s3, file_content, **kwargs) elif file_type == 'html': res = process_html(file_content, **kwargs) + elif file_type == 'doc': + res = process_doc(file_content, **kwargs) + elif file_type == 'md': + res = process_md(file_content, **kwargs) elif file_type == 'pdf': # res = post_process_pdf(process_pdf(file_content, **kwargs)) res = process_pdf(s3, file_content, **kwargs) diff --git a/src/scripts/dep/llm_bot_dep/loaders/docx.py b/src/scripts/dep/llm_bot_dep/loaders/docx.py new file mode 100644 index 00000000..e453db78 --- /dev/null +++ b/src/scripts/dep/llm_bot_dep/loaders/docx.py @@ -0,0 +1,59 @@ +import logging +from typing import List, Optional +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader +from llm_bot_dep.loaders.html import CustomHtmlLoader +import mammoth +from llm_bot_dep.splitter_utils import MarkdownHeaderTextSplitter + +logger = logging.getLogger(__name__) + + +class CustomDocLoader(BaseLoader): + """Load docx file. + + Args: + file_content: File content in docx file. + + encoding: File encoding to use. If `None`, the file will be loaded + with the default system encoding. + + autodetect_encoding: Whether to try to autodetect the file encoding + if the specified encoding fails. + """ + + def __init__( + self, + file_path: str, + encoding: Optional[str] = None, + autodetect_encoding: bool = False, + ): + """Initialize with file path.""" + self.file_path = file_path + self.encoding = encoding + self.autodetect_encoding = autodetect_encoding + + def load(self, content: str) -> List[Document]: + """Load from file path.""" + metadata = {"file_path": self.file_path, "file_type": "docx"} + + def _convert_image(image): + # Images are excluded + return {"src": ""} + + html_content = mammoth.convert_to_html( + content, convert_image=mammoth.images.img_element(_convert_image)) + loader = CustomHtmlLoader() + doc = loader.load(html_content) + doc.metadata = metadata + + return doc + + +def process_doc(file_content: str, **kwargs): + loader = CustomDocLoader(file_path=kwargs['bucket'] + "/" + kwargs['key']) + doc = loader.load(file_content) + splitter = MarkdownHeaderTextSplitter() + doc_list = splitter.split_text(doc) + + return doc_list diff --git a/src/scripts/dep/llm_bot_dep/loaders/html.py b/src/scripts/dep/llm_bot_dep/loaders/html.py index 38386fb6..007432dc 100644 --- a/src/scripts/dep/llm_bot_dep/loaders/html.py +++ b/src/scripts/dep/llm_bot_dep/loaders/html.py @@ -46,6 +46,10 @@ def clean_html(self, html_str: str) -> str: blank_line = re.compile('\n+') s = blank_line.sub('\n', s) + # Remove blank image + img_src = re.compile('') + s = img_src.sub('', s) + return s.strip() # def load(self, file_content: str) -> List[Document]: diff --git a/src/scripts/dep/llm_bot_dep/loaders/markdown.py b/src/scripts/dep/llm_bot_dep/loaders/markdown.py new file mode 100644 index 00000000..6c629102 --- /dev/null +++ b/src/scripts/dep/llm_bot_dep/loaders/markdown.py @@ -0,0 +1,49 @@ +import logging +from typing import List, Optional + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader +from llm_bot_dep.splitter_utils import MarkdownHeaderTextSplitter + +logger = logging.getLogger(__name__) + + +class CustomMarkdownLoader(BaseLoader): + """Load markdown file. + + Args: + file_content: File content in markdown file. + + encoding: File encoding to use. If `None`, the file will be loaded + with the default system encoding. + + autodetect_encoding: Whether to try to autodetect the file encoding + if the specified encoding fails. + """ + + def __init__( + self, + file_path: str, + encoding: Optional[str] = None, + autodetect_encoding: bool = False, + ): + """Initialize with file path.""" + self.file_path = file_path + self.encoding = encoding + self.autodetect_encoding = autodetect_encoding + + def load(self, content: str) -> Document: + """Load from file path.""" + metadata = {"file_path": self.file_path, "file_type": "md"} + + return Document(page_content=content, metadata=metadata) + + +def process_md(file_content: str, **kwargs): + loader = CustomMarkdownLoader( + file_path=kwargs['bucket'] + "/" + kwargs['key']) + doc = loader.load(file_content) + splitter = MarkdownHeaderTextSplitter() + doc_list = splitter.split_text(doc) + + return doc_list diff --git a/src/scripts/dep/setup.py b/src/scripts/dep/setup.py index b1e7d1e3..9630780d 100644 --- a/src/scripts/dep/setup.py +++ b/src/scripts/dep/setup.py @@ -13,6 +13,7 @@ 'unstructured', 'boto3', 'nougat-ocr', - 'markdownify' + 'markdownify', + 'mammoth' ], ) \ No newline at end of file diff --git a/src/scripts/glue-job-script.py b/src/scripts/glue-job-script.py index eb230216..3eeeab42 100644 --- a/src/scripts/glue-job-script.py +++ b/src/scripts/glue-job-script.py @@ -106,15 +106,15 @@ def iterate_s3_files(bucket: str, prefix: str) -> Generator: # assemble bucket and key as args for the callback function kwargs = {'bucket': bucket, 'key': key} - if file_type in ['txt']: + if file_type == 'txt': yield 'txt', file_content.decode('utf-8'), kwargs break - elif file_type in ['csv']: + elif file_type == 'csv': # Update row count here, the default row count is 1 kwargs['csv_row_count'] = 1 yield 'csv', file_content.decode('utf-8'), kwargs break - elif file_type in ['html']: + elif file_type == 'html': yield 'html', file_content.decode('utf-8'), kwargs break elif file_type in ['pdf']: @@ -126,6 +126,8 @@ def iterate_s3_files(bucket: str, prefix: str) -> Generator: elif file_type in ['docx', 'doc']: yield 'doc', file_content.decode('utf-8'), kwargs break + elif file_type == 'md': + yield 'md', file_content.decode('utf-8'), kwargs else: logger.info(f"Unknown file type: {file_type}") @@ -211,7 +213,7 @@ def main(): aos_injection(res, embeddingModelEndpoint, aosEndpoint, 'chatbot-index', gen_chunk=False) elif file_type == 'html': aos_injection(res, embeddingModelEndpoint, aosEndpoint, 'chatbot-index') - elif file_type in ['pdf', 'txt']: + elif file_type in ['pdf', 'txt', 'doc', 'md']: aos_injection(res, embeddingModelEndpoint, aosEndpoint, 'chatbot-index') if qa_enhancement == 'true': # iterate the document to get the QA pairs From c7415be3bc97b823eaa9a26e21d643a7d08f7d05 Mon Sep 17 00:00:00 2001 From: Ning Date: Fri, 10 Nov 2023 14:58:22 +0800 Subject: [PATCH 11/37] feat: add auto detect encoding and update doc loader --- src/etl-stack.ts | 2 +- src/scripts/dep/llm_bot_dep/loaders/auto.py | 2 +- src/scripts/dep/llm_bot_dep/loaders/docx.py | 31 ++++++++---- src/scripts/dep/setup.py | 3 +- src/scripts/glue-job-script.py | 52 ++++++++++++++------- 5 files changed, 60 insertions(+), 30 deletions(-) diff --git a/src/etl-stack.ts b/src/etl-stack.ts index 4ee7bfa5..cdf6223b 100644 --- a/src/etl-stack.ts +++ b/src/etl-stack.ts @@ -123,7 +123,7 @@ export class EtlStack extends NestedStack { '--REGION': props._region, '--EMBEDDING_MODEL_ENDPOINT': props._embeddingEndpoint, '--DOC_INDEX_TABLE': 'chatbot-index', - '--additional-python-modules': 'langchain==0.0.312,beautifulsoup4==4.12.2,requests-aws4auth==1.2.3,boto3==1.28.69,openai==0.28.1,nougat-ocr==0.1.17,pyOpenSSL==23.3.0,tenacity==8.2.3,markdownify==0.11.6,mammoth==1.6.0', + '--additional-python-modules': 'langchain==0.0.312,beautifulsoup4==4.12.2,requests-aws4auth==1.2.3,boto3==1.28.69,openai==0.28.1,nougat-ocr==0.1.17,pyOpenSSL==23.3.0,tenacity==8.2.3,markdownify==0.11.6,mammoth==1.6.0,chardet==5.2.0', // add multiple extra python files '--extra-py-files': extraPythonFilesList } diff --git a/src/scripts/dep/llm_bot_dep/loaders/auto.py b/src/scripts/dep/llm_bot_dep/loaders/auto.py index 37d8f354..418f2468 100644 --- a/src/scripts/dep/llm_bot_dep/loaders/auto.py +++ b/src/scripts/dep/llm_bot_dep/loaders/auto.py @@ -17,7 +17,7 @@ def cb_process_object(s3, file_type: str, file_content, **kwargs): elif file_type == 'html': res = process_html(file_content, **kwargs) elif file_type == 'doc': - res = process_doc(file_content, **kwargs) + res = process_doc(s3, **kwargs) elif file_type == 'md': res = process_md(file_content, **kwargs) elif file_type == 'pdf': diff --git a/src/scripts/dep/llm_bot_dep/loaders/docx.py b/src/scripts/dep/llm_bot_dep/loaders/docx.py index e453db78..fcf33605 100644 --- a/src/scripts/dep/llm_bot_dep/loaders/docx.py +++ b/src/scripts/dep/llm_bot_dep/loaders/docx.py @@ -4,6 +4,8 @@ from langchain.document_loaders.base import BaseLoader from llm_bot_dep.loaders.html import CustomHtmlLoader import mammoth +import uuid +from datetime import datetime from llm_bot_dep.splitter_utils import MarkdownHeaderTextSplitter logger = logging.getLogger(__name__) @@ -33,26 +35,35 @@ def __init__( self.encoding = encoding self.autodetect_encoding = autodetect_encoding - def load(self, content: str) -> List[Document]: + def load(self) -> List[Document]: """Load from file path.""" metadata = {"file_path": self.file_path, "file_type": "docx"} def _convert_image(image): # Images are excluded return {"src": ""} - - html_content = mammoth.convert_to_html( - content, convert_image=mammoth.images.img_element(_convert_image)) - loader = CustomHtmlLoader() - doc = loader.load(html_content) - doc.metadata = metadata + + with open(self.file_path, "rb") as docx_file: + result = mammoth.convert_to_html(docx_file, convert_image=mammoth.images.img_element(_convert_image)) + html_content = result.value # The generated HTML + loader = CustomHtmlLoader() + doc = loader.load(html_content) + doc.metadata = metadata return doc -def process_doc(file_content: str, **kwargs): - loader = CustomDocLoader(file_path=kwargs['bucket'] + "/" + kwargs['key']) - doc = loader.load(file_content) +def process_doc(s3, **kwargs): + now = datetime.now() + timestamp_str = now.strftime("%Y%m%d%H%M%S") + random_uuid = str(uuid.uuid4())[:8] + bucket_name = kwargs['bucket'] + key = kwargs['key'] + local_path = f'/tmp/doc-{timestamp_str}-{random_uuid}.csv' + + s3.download_file(bucket_name, key, local_path) + loader = CustomDocLoader(file_path=local_path) + doc = loader.load() splitter = MarkdownHeaderTextSplitter() doc_list = splitter.split_text(doc) diff --git a/src/scripts/dep/setup.py b/src/scripts/dep/setup.py index 9630780d..fe5f0d47 100644 --- a/src/scripts/dep/setup.py +++ b/src/scripts/dep/setup.py @@ -14,6 +14,7 @@ 'boto3', 'nougat-ocr', 'markdownify', - 'mammoth' + 'mammoth', + 'chardet' ], ) \ No newline at end of file diff --git a/src/scripts/glue-job-script.py b/src/scripts/glue-job-script.py index 3eeeab42..63a14454 100644 --- a/src/scripts/glue-job-script.py +++ b/src/scripts/glue-job-script.py @@ -1,24 +1,22 @@ +import itertools +import logging import os -import boto3 -from boto3.dynamodb.conditions import Key, Attr import sys -import logging -import itertools import time +from typing import Any, Dict, Generator, Iterable, List, Optional, Tuple -from typing import Generator, Any, Dict, Iterable, List, Optional, Tuple +import boto3 +import chardet import nltk - -from langchain.text_splitter import RecursiveCharacterTextSplitter +from awsglue.utils import getResolvedOptions +from boto3.dynamodb.conditions import Attr, Key from langchain.docstore.document import Document +from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.vectorstores import OpenSearchVectorSearch -from opensearchpy import RequestsHttpConnection - -from awsglue.utils import getResolvedOptions from llm_bot_dep import sm_utils -from llm_bot_dep.loaders.auto import cb_process_object from llm_bot_dep.enhance_utils import EnhanceWithBedrock - +from llm_bot_dep.loaders.auto import cb_process_object +from opensearchpy import RequestsHttpConnection from requests_aws4auth import AWS4Auth from tenacity import retry, stop_after_attempt @@ -54,6 +52,25 @@ credentials = boto3.Session().get_credentials() awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, 'es', session_token=credentials.token) + +def decode_file_content(content: str, default_encoding: str = 'utf-8'): + """Decode the file content and auto detect the content encoding. + + Args: + content: The content to detect the encoding. + default_encoding: The default encoding to try to decode the content. + timeout: The timeout in seconds for the encoding detection. + """ + + try: + decoded_content = content.decode(default_encoding) + except UnicodeDecodeError: + # Try to detect encoding + encoding = chardet.detect(content)['encoding'] + decoded_content = content.decode(encoding) + + return decoded_content + # such glue job is running as map job, the batchIndice is the index per file to handle in current job def iterate_s3_files(bucket: str, prefix: str) -> Generator: paginator = s3.get_paginator('list_objects_v2') @@ -107,15 +124,15 @@ def iterate_s3_files(bucket: str, prefix: str) -> Generator: kwargs = {'bucket': bucket, 'key': key} if file_type == 'txt': - yield 'txt', file_content.decode('utf-8'), kwargs + yield 'txt', decode_file_content(file_content), kwargs break elif file_type == 'csv': # Update row count here, the default row count is 1 kwargs['csv_row_count'] = 1 - yield 'csv', file_content.decode('utf-8'), kwargs + yield 'csv', decode_file_content(file_content), kwargs break elif file_type == 'html': - yield 'html', file_content.decode('utf-8'), kwargs + yield 'html', decode_file_content(file_content), kwargs break elif file_type in ['pdf']: yield 'pdf', file_content, kwargs @@ -124,10 +141,11 @@ def iterate_s3_files(bucket: str, prefix: str) -> Generator: yield 'image', file_content, kwargs break elif file_type in ['docx', 'doc']: - yield 'doc', file_content.decode('utf-8'), kwargs + yield 'doc', file_content, kwargs break elif file_type == 'md': - yield 'md', file_content.decode('utf-8'), kwargs + yield 'md', decode_file_content(file_content), kwargs + break else: logger.info(f"Unknown file type: {file_type}") From 4a6e213dc31078ace37d939d69bc34c7d3443ab2 Mon Sep 17 00:00:00 2001 From: yike5460 Date: Fri, 10 Nov 2023 09:53:54 +0000 Subject: [PATCH 12/37] fix: add punkt installation for qa enhance --- src/etl-stack.ts | 1 + .../dist/llm_bot_dep-0.1.0-py3-none-any.whl | Bin 23185 -> 22498 bytes src/scripts/dep/llm_bot_dep/enhance_utils.py | 48 +++++++++--------- src/scripts/glue-job-script.py | 5 +- 4 files changed, 29 insertions(+), 25 deletions(-) diff --git a/src/etl-stack.ts b/src/etl-stack.ts index a8697d1f..1c5f670d 100644 --- a/src/etl-stack.ts +++ b/src/etl-stack.ts @@ -92,6 +92,7 @@ export class EtlStack extends NestedStack { "glue:*", "ec2:*", "dynamodb:*", + "bedrock:*", // cloudwatch logs "logs:*", ], diff --git a/src/scripts/dep/dist/llm_bot_dep-0.1.0-py3-none-any.whl b/src/scripts/dep/dist/llm_bot_dep-0.1.0-py3-none-any.whl index 9097dcc17ae52cad446b7e55812dedcb30a8ae1f..235e865375da8825a1e750a776ba1d52d146e39e 100644 GIT binary patch delta 12948 zcmZ|01C%CBvo89!ZQDI=_MB zY>muq3>>W(>^*!`wd~f}(S4U|IODW>Ar0vzJ+%NvR2j{1N6vzC1xSKD z>o7;+Shck+Qd)T?mH_Fc4A`@u39*AU%wC%%S+bic^g)cUL;Avw6w_ekM=jfvmZ%Q= zr^-+9LV#wcm)X6)jD_0RS+Pv1Z?)*P{6n3x#|mIXV@@6l>O_m!k%^h8Lx`-Lmr7Jm zX~G02_Pyh;GVgx6EqQ`U9&cG6dAekHZL6Flzl;6Aj zB4?*vm2u)&M_i*8+P}37SdbFNKS3MqxN~!n%-9h&X*vpeXneFr;Y)c$5R?_%YZwKm zo-frD8e_z`fjcDb5U{?|w^;9->9TU~NV$7ls2Wm*oKKTNVrp~EE;l@*5>p^Zx|z*> zXC&rC55eUxKY;a-0$uQX11smxtmtQ%Pj8<2uqGTl5n{LTO7EQs7$&$TE}&{Ks47Pg zrF8Ns{N`_B$V4L}Q%6esb5(cRxL3sLuhs7IAjijr=1V+c@Ox%;HR~!gu*G; z$o^@jDrt)UwTJ3I%SfD&;*32syNf=q_7gy$vsCr)X*tVEhzePQZl*$c{n^61Wc?1N zl)C2$Mjk_bt*D{J?eyLjI`ev$i`dF@%Qol=;DEa zQ8ZwiLp4-%HC~M!Bw`Ju>h2Dt6Um5B(b$-7bshZm#^UaecKV%?@a@NdXG}pbo@^Wh z`I?s}%7m?GG`E3TGu5zQI@b%{!Un|0`!}s@#y|EIk+d#v0o?5wcclPuA)o6f-pd#x z&OEoLj~GT@B(j4_bXQ*UOH1i?1A?o&WGkS5(XYCLOIKT+Gf&gGJw&{eX%Dm4(uVgi zfLitzNXn*wAYXwL!&*zlA}5Mp7Lwep_AukDBd+g(Q4s!QeZxE zgV?qP+P0NRb1KYLzt|1g)6QFfk3O2`#75DXpMztjq;r|opIf0)9)b?FX+%!Bj5i6{ z!qOF3xy+lqKM@(xfRUGG-tU?uLo}tIUw)uv`x7Sz$?Z=Wq zW`Kf#`@u>WZ_Ojfwm6}ip`%MX1(>_ zhSDzEQ3#lqZFadeI{Qdb4+{z9_gM^fn14#-n+d_U@^Vi9SgG^SR~U521@fi_?#`x- za_OT;^grOs(oAnN6Q2n<% zsu<~<`CyCsZ$Wl5{vhf#LZI+z@f$V44wxMreUO!;2Op)e{T@^%aqzAYsuSV;&l1c? z7HIIII~?9%;!>hegHDQm{GeiwQF&x|7hExX!{n4&@xe64$_+3t5#6-6JY(V{aye@H zL0rM45iX<_JTZcL%QA7)h*_1eU7SyJygJxV#5!CgaR!U1>s<|GaNy`|o)G4awwjC< zHkwD%k}--m6uV8{G2U@#=-}XUA=YDcPrKX4-PVKLh?;N5*O}gkTK87R=gVj2vSj3j z_girn9#QPq&HE=F*3PF1+isA}A~Yf}Xpv z$%^YsJ*8BU}=UeKU@YIIqQ-7b>bnMsT z=JW5SK&&3Tufxj^fxn6`qi;F%qV?V;pC6`eec$f-Xc-&FwjL^4a=skjr{xZF5cz#A zyicpBk0l+vmKis^kFTMKyeUPZwm*t);4TQdel7XlCf*jO0x`y0h%%<|zV`c{_tj>D z6*namFTaaG=dAa58+>h!E8aan)ll;nPb09t;$ancbjCT#rWBGy^FFc;_L5D=8QB%i z6W>gm`RuhLZ>68FWd(n*$4_1USSYelv|gGFpfk3t<-(55ef@lWHqCw zCj8>7?{t1W0Pdk(ogR*;%I5fa#2LVix^e~vLb?M*Kby2|l8-!HmXmi74 z?8>@dA%v&05TziPkA?#-hM1;4#P*$q5B(+6y?fIzpQ8=Dk0xpwrPJZ?Gr+GP*CDG6 zW5mY`WY)PoEMEruBRcFYhkOiQmQ{a+&0&=le`?wV0!c4;_$&;T;lRN2Z2OK!L9~>l zVw}1|SM+<%uRRTFvRZGzDVd^WTozWFAdQ4*-Sp47bxJq| zcIkcc8{}#d;NzcBT17x*k{8YnQ{MGbBURhaIu6py$lK^vYDMU)v0A-@!pbC=Bw?mp z>!U5Am}3wH+>MDV`i2ycP5Yb2fwCU2IPQjB4VW@RCu#J)DB;>tpSB_!F*beugJ zOx(mA@nZw@G!(sDvP_y6Kzy^Y6bDjTHlsg12f>mVkWCx_Y0w+Grbv^|Z5;k!L1?OO z2qeiu)naaf2WFXSQLAylG ztl|C&e*%^>sMl#+)%x)mW$q}6IZa283shh)O5f)!o*+%r>FTi1Pf$uUU#LErXF*WQ zco5EFp0ARklfVH-|HCr`vc(G<25E+ggw}{9k#pJ_FicTQn4=V><;q}SL3|rClJNn3 zrIhtEO!jDDZyjF4zaNl5_lS%x+~?bDLKj|+I!^=X3{pmZm2KjuAL0k**|!SF6SoY0>2UEq!F=T^QCl0Nqw6?|Rh7{eK;uK>%Mlux zUtMjAFgbLN++aM$$^-bZ;LpWc=w2bLh9w{jquy%grpc2>y z1XXbYI!t|FO2zSi(6Q{bPP=bmk!~I_Bf_1WX>FnNe*) z^EY9Y_!-Z%XO4(qn=CJXV4xr(yO<)2Y!jfMoiE!|E&Szj3%vZfTK&4Ns_Al=+x$yr z*}eML7p#r;ckceQ*Nr6zFy-0i;6Iote->J_AO$u9ghIN(l@;+ zEt_%OqNLw3$_Bpq7O{y?^KR7m|3I z;RRvw2ofw(5{sgq#MMIgZqjzhzVW7_aY#&rfSw@ARkyM1J_C=7g|iE=Ddzf&(k^vSOO5ZF4x#Ac zt-tKzu6%fu#Xl813$9UgSw(G7zs-S}M1uD#rw&qjk(bl14y=(nRD?Sy?gw^UL`?XW zc`8}!#$rA#@IT(S+f>h*mJ4$GzPnG(*5wS}1fG$N5;m{R6avu^bwoJ!N2%|73+^Kv z$eNm!e(Hp>!ZE9ShN1|f{|3xP5c~UgG7Nr1;^793!$R!KFeTzJwl^CTrlz8a&ZKs` zM0K)2a6rmwqW~m}CA(rs=SyZ34s)pzOdvs>Gz}A0^~#XG?9}3wPQ-5VaO)NgU})+P zJ0|ijW6{%0%SKDStbZm0B%`TrmU|~3LTjz1z`NAjTZ&{>$OR&MX%^ZURxJg!3BNLX zjtE(akKhf182dN9Jg`Y!ZUv0=ub(GdI{uW9MrdYJ$OooJsFZ4&KHVs>MSS^|iz3C} zDzUxps}-8v7-JnCkgZoh>wUY6FPAu{k@dGvV-;4hM9+%Yi&IphQ^w3VR0Bb|a0G9X z06>*@K9+YFC)=H=m=rUPyHs~<26yPrF(|9BW#+^MX8?E_@aK6bUGvg^+kuiFjQ2|? zx$iw9HvwxDE@x<@w5BU-u7+`4WYb7unO~ws4r!s0QH9+d0-v-4lPCvR5XL86hRkrM zRI^@LhCqhbnp8rtf#dubv1+~BaXzFI$a`>>5h&X zKQ;|c`_Q0_^e*e6lD3uaa)z#v0wz~YK@iq^hejryUu|Ap%Zj?wXxfZBBnZX)rfJU@sr2r>^0L?PR@x>L#q^nKI*BRprTzOI=vSRp;x1XfF&1Hw>8mJSE1@8Pczp@g9k{y&g%ZdhIURUyq`TLt1-0 zyv`s2`*m<4X|iGP!XGXEY#2|(0(mJ0RT*RiRu&-K%2je6!Pblp5O`?9a~KM zK_LhdRWl~-lFLAXk0Wnzym@AWFbt{6$ocEGn;SaCEThLdH<*mcoqNqRAFsxdi2{yl zcwaq@qp54LGA-rh8D3LnEs?Py>m6%l-!B(W)C2?J-mQOmbXq?~Sd-9rWk2 z{a!g26HaZZ_p4ZcpMG2fMWj88wPS+-s0jy-;qVuNdo4;vK>pvpIGhhHK<+M578-F!?+HYEsS z6^nre=^`{$(}00K(_o2#=DEsMdiQOH1gMtT+C}WSL~Rx9dDCj7w_$?=1vDBadV`B}vb{->pnUN`~%(mWC4pN^Sfp(^e{F zhphRPfqBOLAt5!YH;uKgGzMpq4;ToL62ncVL;~7h3!!2=21Bn*_=q+5RM?;?i?~F$ zbYOPuehbh+)`Za7b;@&t4l|J?TCzKKW505t6_F=yL^r8!IIIBM)o__SE1nOQ4Tp=8 zawtAUv3bvR@?Gfu4^R3Eckls5ru3Cg)pesCz@_B&AkKj@mpP4-vxEHBA-~@flq^G{ zRIGcw2wc&dd%ZkC%wm^>i&JQ>jz}&;XxxE&rS^JD_}HpF9POVc0p@UGf~?bCEY%^z zdxSi~@)U;Pa{Bpxu>^I*9aFpG{j}8ii0{%7oxy}^zXF}=s~1JRnh5IL>gSM)^&`Nn zfTlcF>BRAZ@yu8z(ULNKoE;S^^`;QdsYb&zkY#fUSlBs>=nZPW0XnDhJb%z;C~7Ww zHl`rbTJFoi-nNYey7JGZ3_DWUONu3z)4{ZY^+>qOz-zwWWRVYNY{FuEv)3Eaa=I-d$>2N;GafB8_xNpuo0vD(g z8qiQ%W6{()F?k^lxkr@%m<7wuA5TlL%-;r=P$`FLG{%G00vTd*J(?sU=?~@!TWY_$ zL>O2 zFSH~pYiWvddOcgVEeczlt>u0Bz_K#cn(Nk-v9^a}wrr0;f`yXGaR>kiF*_g1fnCzt zR$nz>CPsReBLrfot$l6Q0y_#OdM1dW2Bl+#F1n@|BtP=j*)EX1 z=8VVai7esSkyGHwgh65$ZIx#>t;2h8!^CJxjqN5^JF!We8&p+)6;=!ym?Y@mhNe5i zUJuFOHWZ%6UJss7a0NTeVTrH#`-lA~3_;%!h99fdRbc zH(=SZHhf3i!yzd6^vuknfr*|Bt6UxGLGyudk%0tSJsNU-hy7@vC65!?0s|X| zQWFn)@EcH(^eUT~RI{{>V_2>!-}6D&VpLTWQu{=(JsqcFyC+OmlzOg3&ILQ#kyNhV zyd`t!Vx*?088Mw4cY}Tmjw_b2a=MiJy9=FHi$MmW(Am&h-1tH%16REuj(*@PAiGIg zce$BZCh*GLK7q_?3&kAyf>(Wxw@!|RePrB%hgFQui3ZX%v$}%0Fn2CG`rmtV-tK%j z8M9FLS=;d^aJxFo_z%cfG4h=aUZrhDO2u?jTkqTw6*l1cuwS`Z!RhF||IlL0T}}>d z^r2!^yym>fvR}XH07?(4)bU+;6oP1LEomHapC4VHB!3*MkIX`soX>KNegNhXglvmB zP`E&c$P{1({si)_877vH7wrpm&gpuoc#^WiiM5c+K(fo#8abV&N18+gi42|%s~U^8 ztg?}k&eZV^9j97>t2s-AVQ}wKXb*d#1-XD)9sgLLRuxiZ01AA2qO~F~3*m4xq*ydl ze1ia%x8ADtchfnyeOrCIS_h@-AU=M>{Yxbi>d*KTyW;ZAIbk-W^>An|dwj0}#20}w zts*c$m54HpdDnZ-`we0uRQ1s@T!CbUmg@hZDnbi@i@JZo`iJSM30re{xAny^zU+<*`sMe+vp2X8-{%@pe`Rr>z(0&@;B$s?XW*pq;>-^F144>4r_& z$y?Wr0N!2xApN(aW8B1hCu_PLFRAUxqSDZwqZ3Hvm&u%%--;NMRIyLtSP{ai{0Fg; zp|fJYy1c_fzKyGi4W&JTVL_AeV-?v zk$nOW4GELHxKJ>xP`@|1;LCF3GwQ_Yfa4KjwfdldaG}@H*0AsCmSh-fB02?3Gx}TX z+ckhJK5FuD`BR|#G;4F7p1IHU1wtiNVi&7Kh)QKe0A3)?F9b&CNcU~=4xvcR{p};8 zeJpTxBIR0Zv`8fLt898n)ILe+1Pf$vi@wH?^^7~JYiSL45QQ2OPYl3EKU3rOjeUzR z@C(4jC@)k+&BW`Cx>m-`*3^GNf|&JXrXlnjyiC*2Ll}4i1BOKm5irrmD@A1z@(TMC zucQ|_ZIpNr^b+Ni$<=@B(7s5WEnDf>%psM*+U;h#lS)5UnInOO39Q85AF7p82c><_ zZv!~AFh~{=F!V6vo5pCeNi>55wkWnj1BpW%d7ceWgGR(;WLj?1kNXd&EG|r zhe|U(c>>S`heI1yHR@#dH2p6hkfIcyElfJg}v5x@_R1f4Mz2Dp9>rG1^Xt%?mtL~~XFY>kT`tdSQvuPSx?8V_P z@vh>C(!>r7FFbAhqnb98o^BL;AZL^GyS^Y2;}>x3K?DtFZ*{t2ta$Td1+3|hU0+@G z)2#6vZ7yU@MVF6PLpM%+Db9E62|f+FYQ|=~8&A;XUKD_o3g-78OP?9&R$uw3 z=QqIZpBKC!hR$6q<~J*%>e1Y#ti0v0MYM-%{@IKp(OUYfJragtO^t&0LxX z*JRaO_Da#}w-ys^hG4RZg!7ErV>=@)2mVm)N!j^Kd3neHj7c(TO zg-ir{2WkZd&21@PK~vl8UgFomlceuYhX!aQd=`&+G!#k;YsWme=BZ4XFC=UD>VH`1 z_4ot$(Q<>IB6L7rnfGml{v0ICzdW_Zinxh(6tMF6(olwEk*(P#oS`{>_PPD*jday< z6Txv%6InS;Q2Zn;)N)Z*t^V>+0AxG>05=f?2P3hE6B-zdvu?Z2`M9Yk$f=`DNM#AN zSqjk2n}?%ZDS^qj8X!eL*DSD+C0CGAl;IQn^qJ-&r;v2i6j(v^aKBR1T&kK&p>Bw~ zl8C&yPgRg{uU@eImbqzIp)$#Slsi%LMdEID*j!PqvJ9y|Z=+cSYY-J^*S4wVh&H9& zSFv6jiV2Jhd61i1N$N_4@a{MsUy@wBp)omDHMdQcU2S^S`Gng6rG%^5=Z0RIU*iwR7UVl(MeNxB<<1 zEjQOk_uYnq9y%fLcX>|MvjH2UbvITgxJ_D;jy=#vuRQcf&cCz=w6{s1jne>)lJ4RN z#No#a8aSNYnDsz3-PSKtS~m%b?np}l*C_#Dr#1?{zBbgn8IOU$tY*S7LXAjH4Q8aK zx~}866YW{^o@%fE2t?p?y?nIDrC!=sKw+w|m`%KW%(r>aL@DCsrs_u@o8+hmMYu&yBuZq=bcMZhml$u#H%zq4}{j=stP*^O-L z8rx3to@q8|#~yltEf|i*A{_s<;9U#fWMuZ)^W~GJ*aw{tDr#yM8yi?7xdg31S`z)^ zs{(u&yps4((b_`|BsRncMFc9r8|(TJ|HEf5Rs5u?Nz;~_HcE-h*K@vpP`wu7L=hyb=15AQ1QnqCC+smU)acJ(641 zEy-B%;RvBMRtND_hI0VaOyXDgr7*3_MZMXAC8`LbAX>~%=E z{fO*OxM)_EUMMpcrwy(jmbn4u+l>WrS}?l!p`uhUX?2#i4T2#po@T?4VTLj4uO>4d zcPGn%MsVw=rU5t*WX60v-I71n4Tu%=Tid4e>C8t1G%AxnU&vPkycQjqMqLK98q`u? z?@p;9$1ExYX_Lg|}n zts`buJypX$(>~+b9dkMKT5&Z<4tkVjJfw^M)JEBWRgOM}4$A>ryKe+!o`ga9a7&`9 z*}y9X8Ad=eqG+Y0NSsuigxF@#A;mUPkiNn`&LJ>P;)}4>e>;WW&gRc4@fSc1n^Vr} zN!$l-IQ-4kgsliz!juG{U$0zl1+hQ4qQ9o~!!IhXHzrV+hE!}IJ}fGHmAgQX_Y=h^ zYm%_=G3`bi4i?H5WB<#6pvxKrvFq%b3|L)Nz$dZN4{qMyyi28N(tZwNqVh@lY8My> z!6TpG;O%9(_3aE$I`N`p;OUkkiF9oEA@Gd^P?BYOR?U<0K~(42J0xD)bOwsP#%q5cpSff9eiMBvrM#!MMw=IR(39)lmWBJC@!S@9S4Vr(v=L3wC;>}pOh2PD=$>|Zn&N91LtL*kucqn zE%thRPi9$gKKjyeO%cRG+ww{KvoCD{+ z0K424vekEB!z*olW}4|*2GTBJDVfds1Z}Qo1Ozp;H%d!$aG$gqb6*L%eFB}B@a3R`C zGf)wNsH8-JOtlTNl6DwjC0>=mBmiq=#?ZyzWjhO|?DtZ!Li0Qu@BF}{LC)hnlA zVwtGsw5er6rgn?|vfwV1zOh(~$Px($49LieQxo-b?C0u{pavMUMEx($BJug3T=$&; z8E>wzieW4`Y>1pYyLI3v)v7tRH}=lB+!DS$mp=r!e@lL+hgVkRFR%Bb5ra$=5Wi|d z6EOM03}u^`{!&zIyCH^0}9Vw?>gB^yiV-b;G!+5Vlvrh6x;Eo7 zSmnig;-jVV(DT=O6xTES*kqW@Yyhg&5N0d!c6oxw-YD6$CS!l{sec{Bf-xHC99{8` zXaC%sx@rC5a*TQy{Bw+0m@73CL?h2I>&J;~M4C<-ZA&y_L0mLtP#<+IsDzcR;F<3W zyKpVDW6q=dzQ9G%_5AO`^uJPp{#ld~mF@Eyf&u{V5CFj6Fc4s!XwHC^cq0o9RF;Wc zXF~3JQ-ix|%Y6*e9dXR*+H-R4PnPo}IyY-aWR{%6aa zeCt6{3%T5;&6il0D&3b0-UXaf#%o{)u6Tr-yNR6rNovgjK!m(RY9vl;N|XZ z1h3F%Qtwp)v)fJ@Y{9<(5lTYC0)2$m_2diXcJR8%!^LBVhC8yRb9I^ks;IK7s}v6X zPJ#2!g(A1RRMhfu?SW_}&%JS?<8a>xDH)02z&{re8)#)3Qf}|{dluAi9JA3Val#KQ z7-OVe`R=viSI6C>Q1dHU*BmHzd|sZ^Q)6F~M0`MoW*THVo{rM>tza;Hr-vDR9?q2< zcEm@fG3`a6)58}g-Kw?=$dCcGu_L~Mw^VF>HL9dXz3nd*tAN7t{Jg5yka{|I!Labs z{jhw}T#ZOyk!~`$D`&lsM;pcQ_3NYKK2DU!9LA}>(d=^Lex0qu8?h^!ay8~E3vIy) zwo)Z{IBDuz7k%WX;5d{!@+djHPip|#+;x?X1b7#v=}IZL2iubb5D|tvRb+h2En(|a zMb|@w_TfA`K>Ut>($s`jB^4>)+`P4eCkML=*9cBkHU$bTvZB251MK?d3gz)^G6Ts9 z54Ds0EuXtU#?>C_wluAKei&tXjA0N@jz5UrJC{C*L4?Qr9_fGb(mej=r9~zvkh)qRsuuo^CJsEl0LCEfPaeb@f zlT96mqx1>1oZ@)Dkdx(`{YY?#X%3cqxKIJxdz;l;GaS-uN~N*= zOB@(c=11Ir1U_I85(rAl`%xw1VxFo{@dm1A{P|+0!$JHLOkDp7&#!RCm=faI)?6W!&jD8Fn} z6^Lb8n@*m^v6L}_cINnCh7xS%d|WT2o9)FSj}#JF$4cMrhhn|o31hd2W`1r`yH9pm zALFHT`k*^-8{TR62j77JS9Pjtg9ysrtPr-)J>Cn%AT!hkzDEXUkb;Tfo0nkxDc&!r zs}0lTjmzS@DJ)Tb`Z?cbZMA#^I5LR->%2;VkJibL=UVf8id-( z14#J%-K|6X8q}D5am@4!h$%Mx=l&9v^v-4G7z4pT&85u{9;=pcrBaoKB*kFW5psLd z?YUS#%*Sgb`1-Us*Xz(lrd`fY2etZcbkMA|i!`|BY}QE=M)p9&yG8R{b7qtU79c$y zWFbtB=ROme1@pY(ly~_)Z!wkV4^4nUs$&t!GA7 ze5iZyIg9dKbC(4xdM%4g=Hg*L6<~Q=&=n2Zv5Zf=Po&$u)V2{XMK{I%=O?+S7k4Tl za&j5x@u1A76FNmdXMnrhk&u!=`kK&|ZDjpzsZ2iMH>*4F z>Cmir0?gpT1mBU2q?88jR%2|D_*#<8vKG-s(StpH$R(Xn8ziYcL34>lH! z=m6?A9{s})%O=A2T18Be94Pjh`9tZag_}O78V$Qf6L&gf@pZ{CpiN)k#vN0~Etjz| zQx1EmR$y&QR3(}*EknQM+j@k8Ec^oZ*HL_o6Pr;Ze^Dl{KtO;^+Os+CI7K;fXB2+rhz=X~{2#Haj0KC%6YKzK?IW6|% zxj^ETa;oY7?cl&)XY+4q^w0V`3I6{bBaoK@0YwMV}4F1pT{oke0f9Aiyy=u&$`>g-U!KkDD zEAv03?EfiK!}BjtL7f>?Rp9RIG`qyImQ(SHy2w{7&_{YL;Z@!)?C@_&XE|BY?* zKa%(tcq0q-AKL#<_fP)c68~!Vm;T>#0Xp$N!2Dw7EA77>|5TFy07}aI1v35%LQEZS zLHyeZ|F=^A2k7u05Md&rCeFXW|Mc7cgKPxhmi-5a|6g8QUJBx`V+R1x|6cQdf0P%6 Hf8PEVkk}b! delta 13601 zcmZXb18^qY*6*L#HYT={i8Zn9NhY>!Jh5$0Y}>Xcwrv}C&U^0n-FxbE)vn#$^;@ep zy4LPq|J}RQAaRW#HAN7J9|9)or_$7ctOx+WJWav@EE(|27Jm@q>*F*0kLKw`uloh~ zEQvIhkrfyND|0Qm3XWykT2}j-o2~20O5>hJa$ib3xLLqz=&fwt%&n+`1ds|@$ovcE z_ue~}x2=hb%cD&ea#sT?396gzVJC;}rsv(}(c~q%q)VM3J_^5{zQiuoIx(+&Jm;hY z$JFlI2s7~L+5QN;cIOIgijmN)5$2(T^C6%_zAhEMj&8^C`*)vn`w*Lcps)vjG(IE2 z)O~;EWFV^{Z^q5r_xY#d>7fnt*1Yc{!8?gw*_h?>A-zl^W|#vgw&23K!WF***#~3l zd85ORm@c0LaVFzxLLsAcaFHaN5^~ZlOqj3h*xiA=+i95y=JQXGtfBZS{1M8*NH4LX zX(o!@a65{r*6g^-WRhS+W03Ib;@__)V(uz(o$T8Zp*Dr^AN;$%)zM?Rgr^-SBmK64 z;ZBa1%k7vmUD-33aM=h&Ogn3`%c9#fLJ7Oo$11T;W4gx9Etsh%hWtg4X-afHKvzSx z+aeCk9__t0li5G74=vmd7h^sJCB9|FrE@^55sz=h!J{~4#BQc5zoC-Gf#KXyj3uKJ zPHX2ybTrzsbYLk^loPImUqP%BM2=&4V6X34+m9;#?bBA9F~h%x7DG#%pJ}c-QdK&h zoy!>I1L_q!a<|jZ1S?a>?bTs@LB1HVU8~TiD zmYMC3AdJy@Jw9A;MP%sIL_pPeSo{tA(f+V`_NLqYqwRL~cvU?gPu1!?>)FKVV0GtiEAsR4uvho5{u zqo_}3{rjEqW1-SZ&eF#;67cD7zq;(MAC0Nq?}ydVf6pF4;(jzn%xddMa8XMC@f9QR zL^aqZJ}jST|b$_glvTq{_v$bk4cGOuUfaOi|;m2elp zHPUYnU0aiUSV2Cx*Bz3KIh_gAahu?)zaP0hQD#D7v5KqvHnp82*rl5Uyhu){>$;}g zZ%s~S5?z#I%!Zt_qvU;p9FR^!8i!=v7a=jRO@RLgO>Pf|X!RvQ(e&En7 zs(?V01$9lh@Ks|TmKo2ZU*qnqbF%PR$2tQzw5`uCKN|V=H0=vJ*Wog?JdFt?X&fEP{cNHEKZ0U%_L-Tf zwHUY&k|0j|R1kb)3Pc53#+@X9LR63vX{7YkIT_|hn(!(Q)+>qa-|{EMNR$dX=*&Zw zlHm;T!%4b^Oo*=#MJ4p~J0U#Or-cQMTx;ZYVN|y5(9wWsgMe}is1OHXS&!Z zEfg{#0l#uy%c*~grLrb(Q7A^P`M-kdL;Yy_CcLWgjIDG4*oL34YY?*5r@t^A6XR(e zlPlUckQl{0?7(Y9E?^f!3C*DFj0G(NH`)~h1)a7=!nT7Ag!D{ih+RNG2S=Gl0^^@a zR)Sa{j;Ij7V-g022Aw1>hnE^Z@4=7XvX*h?v%3h=YKWX8%?s5l+Hv+^i!mo9KL;o0 zfPsM3S6p!h3Y$ z-yH-TC}v2|E^;!b0Ua>zLF9KCb?VgCeN4qzxya$lBjA8yi3nzgoO0Cl&?f3vHZ07e zr8iddRmXEI2~}d1|TM%10q`xxWoP3T0*VR+xbe z7Q3&d$EC-A@VT>FH~7jm)a05+Ivson0DvAlsvkw}tsHv7QtUX=GnOcip4Jkbr@ZS` zerlqf$GN=;=0A|U*hx{Jo$?$Tt%`=li}`S%QvHF2(1nqURd{dE5j9yZ?_Q~Pp3VMj zbgjacazFFJMd+NsnjGB6e2o@g7M{H_TVN*e%W<|uqHAx6`HsSqQ~C*0A{xMqC{2c8 zxTLl?i`c*IwL*SjthSgq1u{RhR)T=#n634>!|m`sTqO6(H}AyeUa^Gb(}I#) zav_0oVpG&-$cs*@?N!~)qIIW+C=+1Ycm@O8{AI}Uc}C_M9g1ld#&gMp>Szlx`r@Dh zuP#pn?R$tKnK0B{f2i9pp%>U5S{uxwxr&eENqy&lbqstVsh`@PtDPYq2(-fVza3~B^ zo$mQFeowWkC{*LfRz#RkTQxbe-jl(~sx+2E=Qh!xkinXPYybeFwL4>NhN2^vJ?iLZ z@u-~X9KlzX7nJ^K(5pH%54`BOk5h=don>%l;2BeObx@>HD8f`<3d@3=dO0Axf8FL` zGjLO&@lI{I&Fdlz4Zs)jdb-E(JI(}ZgoBMhaS@>Uq@^2ek-I{M9K}E^h`Ig2Zj{WFKuV-Xg;v%^#k-0t5vvn#6x~%r z+bbA?+N!5zdCNVOt##+qTvvWuo;z!570U>NJlP*jU=ttqK|TQ{%S< z+YH5F-1qq3huf1jc4-y0am~^VsJsvkz~|M$Bh$m79-F3n;x!JRQ2I`VL^^JxSPxev z4S~{WhQ|L%l|qGosL(E5$u`C|46k7g@>l*9b1txi`$9YfzDVru3XQ=+dcyD5o`0(3C)YU7N)-PbU=lOyh&BUxqbHpQ3Ix!N}q$Yga#h zpyx1a1s4X_!Zmt&^Gm?EtAUA!GWH_=nG}_VvZ77(g=QS3sevBn)X2y9XGXbr0J5iM zzO6y!Vrr|%3$w>CDRaRd!Y-h!=kfp$q5Q$}{IfGimgTJ*^`G@com z0}c?D18F@9qU;tvkqb+p3J6=FW6pZpd3PpEqi5CQjkUun|&2@J9?^z>- zWc}9G=x3S!m1t$%WX_RV7y2=#yEjmT&R@O+twxZra?7$_v`i!UvTVk52=`TLL`HG+ zz(i?i{)Yw8ATN_(?BDk5($NcvVY64plPCwh)5M*7Ey%qvrjjr~P5B9$!A5}#@_Z_s z2;Qx&H?XA80$?GL-DNBM)#yyMg=FeH9ExvI;x^uh6c?-!efaA{wspp$Ii41w zv<6GO@xD{y^Mf8kOY{CR_IOTrv}(7~P{us$56QUV6a$i;=^&Y@J{pfk3Uh8#!BlH1 z=C~?|K(y+Jq01p)`Jfil5(5+*MrIREa#k7t9vmKZi;4c6>FQeJ_?;9cgBazkT@rq` z6s#celFz>>%nmtom`Lav%D}j`Z_p$wcL`j*JvtH>O(r#vc zSy^>?iM@#JnFQ~NSY<_(8o}k)+j!q7mZw>W?U#e?ZHdZjMB%BiWaC{a!-mrGDW$EV zGsNwW+G8L{y#%%ZDykm3fqG46-dq$E05F9E0DJ=^;HeM-J2f_J*P1YaPr8E3STzmY zr}5%o1PWI4Humi{8Xmthd-Tax4;w{b$W)hTU%Rg|g_TgPDXr@sUWs!@`tM$N2;(c# zHMNQc%_`5rSA%QOJK+y89B$-Q@``Hu8iNX5-Lpv74e9(W`wjOe20s*_0 z@;s#Ju016Ocstp7CU1&u9M!p?mSfk{1}nyka4GMF+lqjBP>dO6e2)q@8-XTa5VikrjbdPmf zv0P$A97rlO8tg|+Ybo=l{Ai~hU-~}#_-vkOH^tb!1c{1#j6#$E(&yqZ;pwcT* zhoK1+?}3<{=PorcEWi6^BE?(ONJAIeOb23MfJD1oQWFTzT!k`=#PJ?_ZcD!}%>_gK zgB%q)Y7YXxcF?4v6O8p#+xKv75&UC<@o?J__`2T{-Z~CBKl|5@@A5hF;P7#Fmc&_! zg9T8N`ADcxX~E@pb9~3UYV`QD`}(cXm4v<>r!A>M@l}Nbj=a{&vce2_^lb~+^$PWX z;lfdLNOCNKd4WXvjOmzZtLgSGTV-D37BkgCG3JmZEI)0rH`$CF(Z|45PZZljzs>Me zkMrswVmLdx- zLWdmt4n{Y1D3Pw;G5I47Lrssy1dOJoQ1S(Am7o2~fC#?%D#EGEQ7DlTXnDgK} z{(C=e@eZ)avOq)LsAt`gWTCP-MthY3PIA8!~}EdMe8VqWP#u9&`EpB_dKWV zTJzPaCWh$?s{`7AUN|lI^Al!fm>$p3S+A_}vX-XOo`1Bz$8_XGhs=m*p37xa?`(J1 zK95%SusTRj%M$N9@Wr8f%QQ(HXgB9C^s|zK@v1fMC1~!5e8W8mXTG)K2?78@#18Bk z`7zQM5aA6V`j(bQ84NNgsY!UuF4GezCegRO6sdg&+lEuF1^3W?^u%?lHHS0{`scvO z$4MY$>7vv52uIN?jZex^loe(44+7)mDeZ1mM@#Agl}-@ZS*#Oeg@Q2~FdA7R0DN4) zOL|y?&8{6u4ze~50iAlR%E%+XsFW558j&lXHnjsZHn^IJgqR}WytE|XrF<@a$ksxhm-R-N`_} z$I*n*tnYGl)5|q+?$v|7>CCmTcMt7Pu=<^R<2*+`1P~2txqd-YZOTvyx^Bk77gEV6 zTTyho+hDM41AE$^Ks|n?4QOQ?3LnmM`(Q*w(a7@%KQ2GSc?E&MQFpM;!xBMm@*0rf zRQS#VN9MU`$-H5XCj-(2)3!&5tp?lOEGG##q9`h;=@a`&5-xtkd$!%s8}n7d)|&tV z72l~0e&#=DOFV98r2F|5b1W`m0e*eDNrWM_NIh&{R2>;$KwQpbrysj8nEc^cuo}Zu z+^QhJSh!_0(PEOypuRhu20+f>0-8zR#tjnj2%U$D-c_ z-aD|ZY48bleI)yXtr6>_Fo>!KP3N0Z!>Tqz)8EnFKIT-d8&*VKAkVrNW`#rk3Q z&|a8N?CqdejJz5(4~$`xIVZMPYgiq41w`OuOL zZ%6{49qwC*A0a%mVAR{_KZY8=d>)~jreVXG`JeF@@OzPiL&TY%t(QGv54*lEW=Gdv zL51#dxKoP(_5UEKvSNhWBwf5{6uh6S)iKv?(P#M?F4& zv2~Dl^1*QocO*gGB7@qJS^lXowGQ&uXhVl4a~9F4LIk8^1Xx06GeW&Mj4raFC-<8` zK1cE;b3D4nI3NA-?d>d&CMy#=RiT79p#FTOun~Klc$}z?J%6V>YUX3vis2HrCT%pW z6Wl`hPR~?}?1BZ0qTw2otNug4`YA)vVk`h`aL2PyxNdhY*$R7N!@IYn{|lC;AmsF% zOjkh~3_O8A6*s}@2YNjQTLC0WjiIr+yb*4E0SEx#1pYq*Vdi9Q^>-km5_E0-na~Hf zKG4;l0H<_UL*|hTEAjN-|D-_71JsO#q;1aFyFXTMO~bi|!FC?Av$v@2^2rXUK+G$f z@+>9dLR3Nz)F^5zZG{Z5<||h!{n`w=Rusrkv>e-+M+Z>Wdgjg_AQly6V2GoN;xbUHNCdV1A|klrvo5-GFc5sw7Rdw665wB&eLox)6Q5Q?&J0y ztsK)k)0B3G3VDNU$E*WXNf4^3W&RiLc}Z;W^;Zl)kGo_Yr?Fs<7HpEUO*!Iy;v!>c z?O$U#ON^p3XXptr|! zE3l!9e47?QPj94RfWzu=0v&TzR?GDGYPa$Nk5-*EJ(rg`fmd$`8bpF z(%R}QW(1amf8qgCf61ZdlkdBo4@^1GeNCgp)yrFQe3YP`tG zpeswqFi0Gc$5{I9gCbh>qj=CJWxxGT6Aeof3ee>A<%iqy{AiOTV?@z>Pu@zRDiFn< zn9K`j@r`xmm<{V%wW|b)oM>ShC?|4J&mjp89G@l}=cc}8dRS>sSJ&58;cwsnvT!4~ zi4%zneqmS*kt7%pB?I{Hg;4CdSKf3fZb4zr+&OCoGFJ!YUQNnhC1k2!+fseLynIam zn0w1b$a)aOu|?1o>ecVmbnoMq23q=PRg0!kuw_bDyA!G{+1n?4CG6!*0~|_;#7GCf z^w7e|Jhb=MjE)N5Y2~qAC)<&JcxR&G>-{qGgW%Eml9}#!;70o1r6Vt%AGxHDzqr$O z_%kh-Ed;`q6cRY37`Nm`fs8;nz0v|-Zl@k};BzkR%clSpy7UH4rX>0X1{B!XC{Gbk zQ@(MAoedZsPx8RO$awe28VJ2^zZ!z%Jg}Tc@Twrxx$Boz9PHC!NT-7<4*+c^jLKt0 z_xIk;J7hkM!0OkX;}RV98ecr4Ljf)Q<>_Cn^&3R+x0Rkw8BF1(t@IYt zh7}Y??Y&6KSN(w>MJ8SW3s`~j99W;Z#iV6orKLhQZlK~&Ma9pcFS!QOF3}%a?;SS; zE3C6#lTomVB|E8rNyM$dc2*#nJ%5_2%dD;0tUZpAWC>hEh}{=r{?mOh#MN zkef-BF&iH>98V;fjlXzq=lR(7;cZdU$s?kZ)+;T(Dl*mW148JuZ>Nxq@*T}9Z=JW# z5Ek_hpKfuvgMn8U)+NNXSCVhrDkfJot=3&d4CV?G;@-%_SZHf3fJPM@$;!zs51694 z_!Ab*amC+B`9cZ80mK28-|jj9lZw$Y_zT>e)hwz?pducV&-5B2pEPbron#CV}HmzUU^F1uWz!eUWfT+<7kOs zu1?uX7D(IpTH?z0Ori>dk$HJb0_5j9xAGi;HB}!j-`@7n^YHFRv#568-ft$wb#d#g z6U0*^nV#`EkWWOmy$#^;ov^!uMW}+vwzmVKduT%ufZug4IDLFmBjXGS^|2QXbujU8 z3rB|bZnR*GCeY_VeY587iN3>rQ?NL~p0OA~Qh=ewED}(Uwo$^rX3 zLB?0y06bJ}tOu=5#+nFl1EqVuCjJrEP!VKJ^ux4T0Q@;h-@upJ$T?>r)|2{&-ZO3w zTL4+;_t2X0p*9xR=ucUGN;W{2{9zW>{cYXK{H?bDG7TX&S{N~~(7#O)4pIe0lA&gk>!4MFE6G}m;1o|XHh3v+#W0^puIWUNv zf}&ikEY>&W?@E}Y&Q?a;HGB)LioMwl1fq?2|7cEv;x&Pol8@B^j+!4a5^c!_4Iw5c zb%0i+MH<313VezBfZc#WV>TDCd56S}XRAxnlMP^nJTf{R^w@$pf|C**AQ87#ne*KH zj8=Qa;8dL~VqAzt&Ghq9U{)51z{+v_b}wN{{bv{AH*oWZ2bpi?7+ zip)Uge3(o-3Ke!Dp6408P|F0$Zu8K9c2$<;rCTN}qLs7QA{#9tgD>BETWn_=y^p+> zjIiwQu?HeU!0b2oh&4FK20E`=1Hp(z^*RRa+(&KGI8A#*etTb?2{&8CdI=JO3C0-b z1FB1h>}bsWZped4`viEXSx^^x*+9z#XyoT`Bi2Gw8v8j-RfF)GbUpdr*t;aSj$2Ji zRs~ZYxRJhxLWbZZFu6(@;d%rYG05plzNbHmekh{gWJ~9j9sCUIGV2m}O-k7`tYK9~rb+$~tXjHQv&%dH} z-S&LXPcuz8kHv+fA6BoO(0ly)%0=BnZF{Sx8JuG+Mm>MeRhk99#JrtdaAE(W-QZ93 z{SY!EV+9-56~pGNqmJswQK}t-hv?7=fE)zE%JRe@pX5*nE8MFZ2%78^*~Z?x`o)Gb z2KN#*0Z>EcQ4F@q0GI5de}&qLE+n?Apu%B~s@!GuJ3>Fs{sXtPaA26CHHWfa} z$XhmOh}9^JLS_;O932}J3ijhj^+mK8)xl|y_BZ4JQGq%ux(fZxiCVBz4Rt!{t~_qm zB#*G>M2D??#cL`yVY(6_vfxj)o42vR+aO{M2Z{Tm zXN5)#=99#sfZgf4Sd}38pA?%jhb~W}!i2Zl#dAhrGK63ns6E2S}HjFFP>GWI7N{}MHWD0E~z~2t>NvU@qg4M$k!_J9C?shr*09qWB#8XAr}X}7u0x}>z=dfgtr zD&Mc_D=z)nh;jYAxMaJ*^2|KR@r1}>bK%*3fNEFOR%Jzh%GBl}ZgfwZh#}_^g|K?# zgpzdkrAJ)@#;%jcv`hROk`81g#j%-RymWDZ!)|^vX!$?0&(UbvuvYy zBW+rFc`d~Vn#WUU4ZG=pUU_*|)m4@MuJMgzFCqcL{|dRAi6SDsbI7_EQ*wd3nv`I~ z#9#ZWD2pC;ZBfAbphh?v>?8T28`w5l$;juoCnNL(KY#9L9cWazxph9aNfK{FX zDuQODC96GFON3w8=F-wjEm_`r=El&^O8k8%cBQOxBUds&5*M+$Pjjw{^FEh}J~$1Z zg@hdXf)`wzlgJ;Y{9E&Wu!3f4PYvoHpNoRc?^a`039#7N3V!~vU}&zMMZoVC;>KR1 z2Dr|F>&Vibq4Q>mTy+9@%KckoD&;+DkTIjx2$AhCTx0U&?rnV|6Q?iRTkh3+R+o^GIZGHgkd-^jS^{ zvMC$}ZAp@$2j}qcQQZbYkMAk+;4gM`780-y;*u+S1&8VFF~8Z(hX50ih)M|sh2Tp$ z6@}pY(I&3J9j|D7*y(L4yGlIWns=$)&_+m`4cwbfD^u!@w`XowxJfmh&Fq~(!me;` zXC*z}zS+{VUC8luuBMdme0v}^$mGkS|Lwa@g&)XJ{btk* zY}@(XMnz?&&a>j~&I3Ft(v=Sk`Y`6%$(3R2Se(+kK^-q;z1LqU%_ez748NzFR`>0A zvMGIT;7*@|kJ+Zbl*wZeZ@f!OBQG*`bUAc&=-@)x8H%7Z39P0zoS;8;)Vi^oFOz;F zt#?BEsy&gcB%N-%qwnN-bOQNo!H&;iPG_7PMv#Y_XHnH7$p99S!XXV`)KJ6yG3?C? zUsG*CLk4Qw)2V4|jaI3&o8-qq&q7TZGJW8M)AXg|iw}M+kO3HBmzilx=41DlxK{;Y z5mFvNF9KeNE~!2CsRqW(c48uIfARx0ur$HtWWeJ^FT74y^BeX$rYl4C-?r*}UOmm1 za@?74A(Z5dO@WkNBayKXq?2QPmQ|5qq5*$|2+l#-u0>V=E^)S~*W*KI3C`$hW>O;S z+ts+YFkaBL+d?>SW)#6Gvz4?6)+@I>=}E=uiCasVoJAsqonbwGGLmyi}pf_0VbDRO`h#Ax-mF*on8}K1;!9pJ@zy%|c8&uLd#w-n> z`gqXiW2;Kls^mn&HUlzL_ne_pG8UELye#)-QccIHBJ{=I zJtL1n2V5~-fk8I+=>U$R`wRz%HDlTE6rQV81eG~=)pGzC)KBm_qwV28KkFw_ze>g? z%Uiky24r!QvmgiMfQYSa*GrH^({|K`4N|?PrbQP-wJx6bZw`mQ_tV=pLWR%=fS0kK zMV4+eYQmQIf+HQpDL3m!ICDPlY-UaH*N7`GAa=jV&EKFt@8jv0jv%29umzU!(f}TY0A2ra^oE%zqvjnsZd5bzo~C+XtsU`&7TQw$v?;sD|lD+ z{mOZ=((bDBKrcyKjdQA5Z8s=|$?EnB=a@1Fb6JnS-82)VY>Jj&UShI#3dVf8A zx85rP^Y9A$Y0>Dv{POe5lpoA968Pni^jr79RQ=_y{M{}oME&Kh{N486?i~PG9KuTY ztO)ga!V#^OjBbMwf)nJMiQF@sSm> zT6b$V9wu5Z9Jd5y`Ht}ObE!MC+wJD48{)`x8p!$M4FmvxW02L0A&pY(&K~eyOxL3+ zb6H#tMM7GmQeQ_%AQ=EfA>Ez{ckkpE{NY)i=tcVkHt z<|_-(Pkaz;qdkwnC~ZLP2l`0!fzO@8j-*d>DfTC>+u%v4GlWoi{fxoEjV03+wr6g- z(7MX^c)n;o|LPo^LS#UmM@OQl+|~F2Qd}4H$rYv+_hq2}E=_v;MIY;!2v|kIuWVBGUC9XHg0Y{JP3#EXEEJzzA4(#M)AvFf# z;3w9_mSjpFJno16E}F66Og9-frYnr&>RRXh_JQ}WTr&D!Q3M+BFJ%Bh|L?ZLzgHF@ z37?WAKn20*zQ4N269%ch87&Qg?*L_s%?eEcWS(`xh)WaiZ}2~OdZSHNF?@?HT##x^ zstXcWB1OawHDoMQ8;#WZNQyR37{+&4NYGH-KGMg4Gr~X$=T=JaX z(|-8fkRGy@MkueazVKrfb6|(wEj6z2QM0TtsX#^ue_BwUtlNFutuc0Ho+3Iw-YUPr ztd2MJ#P(ah;+$@gq_NiOdbVe3Tdi7P97k{#PJ!EsTK$tD|97v@Xie2oi`W%>{`PA1 zZy}49BTSf5g9d6(@gVjwA%o9s7LPzIO8L^T(W9f^M|6hMlQ*rzAU6+V1dXS046aKs z=aKM+kuTNBQFgcR5H!fW`Sg_2h*(@s*XzEY6WljVkl9VkU$Wb7M!Dzb;`RMr%7Azdz^6yX1N-2b*PL0vW#>|gb*tP;Y% zTC81(@+k-QuVlWW94&A-K3@tMV&=mO?tXjH027)LW)2z$84pz1jS4v^uIux`grdMq zw0aXMb0ovP-h7oQm`XF|B?(mzbN|Y9qnar7_bt zU#;c}>jzK9nUh^kbJNJ@1zvp={C=3E$>)OJmI)62fL|0*mutX)sXgigq@{M~e7MZk zb%xV%3CJ?*Z06*mK`dk^(RuQ1yi?RDf=MQRCvjuadCU6i^xe!$S{975g$+kYXPngl zrv#>WWUfP1?LYKV3NY=qc23M4!4DMAJy5;odwPIP>{w2EU{%+AC0W0!=ey1_(Xd&Z zSJYMy6-2Ww#T)P#N5|Eb&34ubr@mOzlsXC7bv0~Rnyb2+nlkcL6vjmtZ~GX_(W=`W zb`Zg6F9lSi?Y$oPI?X9FguKa&5Hn+xZ`OdOL{+U3UJQ-*;q*Z^Y$k)+w*9%y>E=&l zgs_t1Ef?G^DSCu;?X2~PhsUW!@Ig$*xW7GqZ)b@zM;>r3G3Nv_dqfWI8_nTJnKUhN z=-49#Q>7J(5BuOxM`%w{>`;wOl}uknk}s2#y|1Sow3qprz9m^c({`C8jiqs-p}%Xx6-5Hg+v5Ut@NGRSy$Cb?wqNs zKC`|?ZJHJ)cRHV+ zV4s$&PIn9{GA)SMX~`9+ONn^DJj*Oh(aIHTW*!h>x=+IIU{hRNm+|!%nPjl(*yH-n zcH)$YxN^D8lp&)gv0<8F2N$_L#W<_J4_me;HLsjkEa8B-W;8ToJN#$Ud_sW)<%#aq znWSpe#U6o(!`m`qu!|Tn;+gYfa~P;BvUT|dUAtqS9y;~AnWvU2+gIN_s7J)Pg%}r$ zz?o7tM+|QkB&A+yAn{iESe+OU@%%gFZ?SJjs}Ri^VLV8V-H3d(=}F#`bLC*&%9OL&4s zPAF4`Paskz0EIIOjj7C#sl4|0NIphfU1C!S8DScK-ju|6gJFKfo@ze>{4s zVG#XSMEwu2Mm_;Vj^J;EMgIrPEo8p@^%wY8A^s20PT?PrNFC)L5HY3S8SyUw|F=;8 t2MDK_ps3FA5A?rb5fT6(`TrphJ;i^#4XGm{q5;qVQ4j#YrOLmE{{!t4hDZPa diff --git a/src/scripts/dep/llm_bot_dep/enhance_utils.py b/src/scripts/dep/llm_bot_dep/enhance_utils.py index 763b2dcd..6ef569c0 100644 --- a/src/scripts/dep/llm_bot_dep/enhance_utils.py +++ b/src/scripts/dep/llm_bot_dep/enhance_utils.py @@ -215,27 +215,27 @@ def SplitDocumentByTokenNum(self, document: Document, token_num: str) -> List[Do return documents_list # local debugging purpose -if __name__ == "__main__": - # test the function - prompt = "Do we have any solution offer to Stable Diffusion?" - solution_title = "Stable Diffusion AWS Extensions" - page_content = """ - Stable Diffusion AWS Extensions is a CSDC solution that... - """ - # construct a Document object - document = Document(page_content=page_content) - ewb = EnhanceWithBedrock(prompt, solution_title, document) - document_list = ewb.SplitDocumentByTokenNum(document, slice_size) - # test the function - for document in document_list: - prompt = "Do we have any solution offer to Stable Diffusion?" - solution_title = "Stable Diffusion AWS Extensions" - enhanced_prompt = ewb.EnhanceWithClaude(prompt, solution_title, document) - logger.info("Enhanced prompt: {}".format(enhanced_prompt)) - - # test the function - for document in document_list: - prompt = "Do we have any solution offer to Stable Diffusion?" - solution_title = "Stable Diffusion AWS Extensions" - enhanced_prompt = ewb.EnhanceWithOpenAI(prompt, solution_title, document) - logger.info("Enhanced prompt: {}".format(enhanced_prompt)) +# if __name__ == "__main__": +# # test the function +# prompt = "Do we have any solution offer to Stable Diffusion?" +# solution_title = "Stable Diffusion AWS Extensions" +# page_content = """ +# Stable Diffusion AWS Extensions is a CSDC solution that... +# """ +# # construct a Document object +# document = Document(page_content=page_content) +# ewb = EnhanceWithBedrock(prompt, solution_title, document) +# document_list = ewb.SplitDocumentByTokenNum(document, slice_size) +# # test the function +# for document in document_list: +# prompt = "Do we have any solution offer to Stable Diffusion?" +# solution_title = "Stable Diffusion AWS Extensions" +# enhanced_prompt = ewb.EnhanceWithClaude(prompt, solution_title, document) +# logger.info("Enhanced prompt: {}".format(enhanced_prompt)) + +# # test the function +# for document in document_list: +# prompt = "Do we have any solution offer to Stable Diffusion?" +# solution_title = "Stable Diffusion AWS Extensions" +# enhanced_prompt = ewb.EnhanceWithOpenAI(prompt, solution_title, document) +# logger.info("Enhanced prompt: {}".format(enhanced_prompt)) diff --git a/src/scripts/glue-job-script.py b/src/scripts/glue-job-script.py index eb230216..99488b73 100644 --- a/src/scripts/glue-job-script.py +++ b/src/scripts/glue-job-script.py @@ -54,6 +54,9 @@ credentials = boto3.Session().get_credentials() awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, 'es', session_token=credentials.token) +# Set the NLTK data path to the /tmp directory for AWS Glue jobs +nltk.data.path.append('/tmp/nltk_data') + # such glue job is running as map job, the batchIndice is the index per file to handle in current job def iterate_s3_files(bucket: str, prefix: str) -> Generator: paginator = s3.get_paginator('list_objects_v2') @@ -240,7 +243,7 @@ def main(): # Set the NLTK data path to the /tmp directory for AWS Glue jobs nltk.data.path.append("/tmp") # List of NLTK packages to download - nltk_packages = ['words'] + nltk_packages = ['words', 'punkt'] # Download the required NLTK packages to /tmp for package in nltk_packages: # Download the package to /tmp/nltk_data From e91da19f1638f1e28f48ffeface7b80c9cd21eda Mon Sep 17 00:00:00 2001 From: yike5460 Date: Sun, 12 Nov 2023 07:24:49 +0000 Subject: [PATCH 13/37] fix: update to latest boto3 package to adapt bedrock --- src/etl-stack.ts | 2 +- .../dist/llm_bot_dep-0.1.0-py3-none-any.whl | Bin 22498 -> 24501 bytes src/scripts/dep/llm_bot_dep/enhance_utils.py | 16 +++++++++------- src/scripts/dep/setup.py | 2 +- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/etl-stack.ts b/src/etl-stack.ts index 8b20f3d7..3f328ef6 100644 --- a/src/etl-stack.ts +++ b/src/etl-stack.ts @@ -124,7 +124,7 @@ export class EtlStack extends NestedStack { '--REGION': props._region, '--EMBEDDING_MODEL_ENDPOINT': props._embeddingEndpoint, '--DOC_INDEX_TABLE': 'chatbot-index', - '--additional-python-modules': 'langchain==0.0.312,beautifulsoup4==4.12.2,requests-aws4auth==1.2.3,boto3==1.28.69,openai==0.28.1,nougat-ocr==0.1.17,pyOpenSSL==23.3.0,tenacity==8.2.3,markdownify==0.11.6,mammoth==1.6.0,chardet==5.2.0', + '--additional-python-modules': 'langchain==0.0.312,beautifulsoup4==4.12.2,requests-aws4auth==1.2.3,boto3==1.28.84,openai==0.28.1,nougat-ocr==0.1.17,pyOpenSSL==23.3.0,tenacity==8.2.3,markdownify==0.11.6,mammoth==1.6.0,chardet==5.2.0', // add multiple extra python files '--extra-py-files': extraPythonFilesList } diff --git a/src/scripts/dep/dist/llm_bot_dep-0.1.0-py3-none-any.whl b/src/scripts/dep/dist/llm_bot_dep-0.1.0-py3-none-any.whl index 235e865375da8825a1e750a776ba1d52d146e39e..3945d1a9443b836401abe3ac482cde2b0bbc16ec 100644 GIT binary patch delta 8078 zcmZ{pbx@p3)Tf6T+!H)VfZz~xaCe8`5JGTw2^QeRHR#|H0t9ym4#73JdvJ$fVS+C= zwYPTfezjfIb^4FzbXT9M)4$W*8NT1fWLlDnF9j0jVkd2b%e5YQ+g6c#OeN0mxG%I%j zp8Prjk;j9_(m7`q+;a-(JrHIs7_g>zB$A5;f~67XWlv6qM@NT8?<|@$Xck71ou&Tx zhfqc2Ic1y*$!|bkt?)&?q?QFT^JKG3dqRK8W*Ul2+Ta~{IUk^?Wt(x3nCW%xT3Fo8 zZ(^?m0+QaWF=T8MqnF*8Bf(Hpp^URzu4#DTSkH?E5e>< zj);;}l4aS!(B2;;6tcap#PD0nPX%(iQb96o(P1=8DsPl#YKcAdGK2u~Q>Cj==)p(G z3n^^$>|Vz1FrR^3C0G$W&?rhjcM+Wxe6>*F z`3{X_TPaGyT}jU!%gcHT@3cDcj3y?ya3b8XIuYVH>f@v_EeupmYQWVO`%zX2)lsJs z@0yiP*LrYx0x-J!X?BG6LY0&W&DT)*hAdCup2xTVv< z($sxEIoqsz%;#OpC{z$C8ZQ}nsvXo^p;52K6~2QwElH__Hz{nQd_D9s*{$C^j(@k=rd(C{ ziZtKd-p@Tgnd8+uHu%r)Z*8{vS}KnW9^EbriQqq*s~*=49p62fnYqnXC#T;_6DV`= zipTXe(O*Xfz!;gg)B44PZT3pzdFMkfZ>b8ncJ&Up(BT#J{`l)QRyM_u)ZpI1?S3el zt?~%)tX4J~8-b6#^f}+ydnx*f)lk^;I&9+S>dNDfhCJTY7ryY3z389Ot9KiH@Jfo- zEA@;cgBF*I-b&fWhj$hGYg0NhL_gEmE6f@^j_;!34_>DeL6X{5@aD|ynxERr=V z+t*B3zLnUkSYZs2Q{s!M`K44_+&>q3v5l;a9y`rEmd2IJU!8Y5Y$tgb2@fsA2{~tU z?aKMd5tp`>jj%VI$O75Q3ixg1ul~U2CpU@#7=~(7Ve)5P5ijsfK-g=E`fd_CXgIuZ zyK==veU|7dm~U>9tYF|^uop&2QW3nn>hH;TVZ7)cni=ljhTGTl=$VOKy!?g{hO;+C z?UzTIEpksfqOk)GqYG&1tQRjyHT`NnQDC3-PEUZN{b;n3rA77i{VWrbI1;}>vFIFt z0l{P=PxB+Yw&B%+yQK&DNX|l2a%M}dFPoW74|W)!`4${~icOQsm{&#hDpcaN!aPIc z_JuZyXYnlOFyhJ7j6&l=y{)tAB!0mbraV)Ju&=9b-d!N~q^71u4lM3UX;>HB2N>noZfV^d{xi*aX)hc@5P-L2}*F?Rx--fl`Cp zQbp4BT@E9kF>*wKX1^O%A=YHWjucPS6o?Xc4RwYV?>TJfe2FH~Sud`*<800R2S;Oc zQ*5Ue56DEEQStS3!Jaxm=Qs^3y)<)SO~-f485LZ|P_fA)LTK@t#9&<0=-{IW=B!$t z^+(9>@UF732B)QntFDRPvjc08a>k@|@|po}C=M5*Epci@dHCBE#orWv}fNN$nTVbPB-wh$CINC7r^W{Y-Vs*=ov zkf|t)&=f4I9X^%8S|573VP6C~h-%^_4W;qDacMw|))|f&glfUgkjsifP+lI(AA%-rdul!2UEWK{3LA{Hm~8Ju z#|p@rfIKfZ;;HUuqn%+G*#QZVIZgmpzsHQyyZj- zyqkE9Bo%mw6~}F!X~)>3w%Ld-m*DGIw-xnuCRmH&`0%^}{{h@`P3u^G^y?Q2@1qb0 z(qhnlO0FQiMXjo0_qg*(b^0g^N0W_b{a}23Ly;f8ZsIvyC^+-;B&^?<5!>h z9)NFpEa^Q<(90F}G9{`C(j7ArgR?adU^*FeJYYPKZcJI~)@gvZ-og4@pQ*@>!O=w0 z`gzWn`O6X24+=nvK-Q|z+r?CG3fTMej$w#-t;@HWW2qj_bebGFVrOU&IvWb1wXXjl zTKGQV1l?W_!Q?!F@Rw_WKZY%Fb$NbE3Pmw@))`%HpDafzxT&IT1!RdevX@@4fyf$O zA2n35e)+z_xwlVv!~r#+L--JqL-YDJ00E^9Bml62#r%MN#uRe#Hp!mzDfB%EQj49mrWC?H^0?#)b;HV}`j+ znH>=gzjF&P3;j+$IIL}|9~M1xAgQL>Ir_tm`9O`js+POt{(g8ZA5dn)@v%$ds;WlG4luur62H~vFOZoGY-B22ufLd-k&qfvY@1Fy zV%Jz1l`00Mr5hMfxj$#vP5E`m-(xUi+^EK6p2{Z((ivMQ2%*=$o0Z;>k^ z;CJT7wEMkBJ zCHLYoSf;le)-CNnf--SPcEw(m#E>&bBWZubrOD$Yptm~E6I8D;`0=I>-`E-*t&op= zAvBl!T=EiaSn?eAIj5P}-MgcxjZeLKXwQDO0#IhkiaveFcyaeI68mCcj8F_8qWP87 zcU;!l>Kpmccq@T!IkP(Yu;L_-$gn{wWazKqS*LNi7FV~cby=qS{LlqG1iO@;Sk$7- zssr_HUJlv4$@;IQusrr0B+(l`$E+@KDMa%ovMHiJ_hlJ$4EERSaQI{m zgrUP`sZ^e=vs>6ij6yxJ%OwXhK~5UX;AG%B09w66yZshEAWD_mEO zN0)vlx>J>O*)>aoxt|>=@fGMO9f{#30Vx<}11TL!VnWWI;gfUW?hHedi(F?jZB}`s zs2W>nbZoYo6&fHIWtS_d(9S_2lqAtwm$l;6$wxkY=Yt>kC>*mD(j*o>hF@}0FvedM z4vTf5wM8hR1PLv^G3c9SU!4I5v(#)W@`rrUnB!&YwzLR$ez_&?@|8|G(VfjT29U$? z!(Jy}Lc&ntQ&brDw$nzdAhBU4ksnNDf$<8R7BTGr`1{CGbH&`?tcrqdHUT z614|gub4kPMrlZb*}I<3TENmj4X{V$cvgWFjAt0t7t>cdsE<3Rf`UhoSihf~RU)*B zM@rRbVKQa0xL7xJp@7f&mcGmB1K)c`bWUIOmcDUG!#bg@!5ozt{C=&$CIu@3}N8Ppq2q;v0mYypB}_=Da7 ztEhZ55TovVeA2Ffb{;sQc}+#q7t1fcDcdw}Mr@|PazMoTfnJ_+x`ZZ;LYC`m4b-<@Umpn>iJiQ<%knVpLM9%=;*Yn_G2M{K;sY) zh!o~5N(2~xaC2mJ^2$mOcL-v~m4;`Ud}Wo$4Oe!LZ22I8@{n)Iv!BqNf;$zj8a2@leFlO_^!E|qWOK^ z+Ss$$wfVlSx{>lzSs7fo>{I98Mgpr4L6qsS!^N#7_!5MdNrFJNw>__YMwUy=VtuiMY%bBR};m+tAz0(N&)UfQHXNxIHX?~0>w(Jtz&T) zIBED{&;fvrQU75;>9G5goJeHsSE<2*sXmVU!?*PxG)Y$2;gjp>gPm@#%74tmN99-0 zxhWH^4mT*xGE$nWR7k%_{}ek6TuMd-_-1HsF^3I&Lxwi(h z;`|A+)$9OHI6PfL(kE=Ed2_A52_jbiGUvC`MMqKL(deMe4z0~f7!6o&gAF0ym9xj@ zi`46gt)Hwd+!H&iNpT}~yXT~OVV(3HJ|ESe0PR}iONTt4jTx?%gPfU-3a!Xsw)4dR z@)o+4mfG@2UR%F`)J1I6n}H6D6@jw7WMAos=;yHG2sYnw!e%M=R|4U~*0cHp3mm`hd6Fc$e~l8ql$7pnh{Y#Hn{@YvHe#= zB?K=d04_Uhiqon0mxDtPJCu+znnYKCBB{fDJ0}0QdOuv;`^W*0|OCt{Nt#MMMXxY=7glaAV{b@abhWY1( zDq7Ey$AY;X`vxW$+UFL$*>FvHt-bLK0B<(hNnu4i8cZjBH=O6TevOB$I_{ctv)!3L z=6|Rh^{$MO0T&sq;5Wh?`L{td`Px_wy0UhYOw1*Z7X4=27WRm9u?1@t2us=>XZ?@` z5m~F*S>?_b|AVZu3zaD?sYPx;3H(VAcOX0X0HM@5EG0jb5OcI8!C+-&wJbAiA=2R& zicJl6VA;LTE7;3{LCzyI9z&=w%dU0|xfq#>^ntCV8^Wy1G|iPppNNmA5IUKMsiVGc zk8fAcs2bTqyj7N!KnyP)mDQlLR=h|AFW{K2+|;zV?BLfE6!gr#m{bKA19v@3ALlI0 zHiSXjem-2SIp2QwMBKIH-9YAq69rwF_zQLY=n9@J(rL!EX|5!CX)xv6)5>b7WA>BP zkm%j&d6$R*CQ?ndi??M_-$N=51}lm52qD$vo)X?}gSkLM3!Js^p6qh{OmY)w_Z<9& zqQgg2l#plcDbys%PJT(_CTD%p&@cN4{wHDaK_Cz+=$~V*(+0fz=_yg4q5YevR&Mro zPl+1+l&DL5xNV0zO4C8+jx0Ze=A)qY@!)-j>JI_0?R0YTF2LN=>EP~e%be&E4GrdDij9tIAfEfHl79(Osw{w*&F+MtPB}m} zTpO%zQB+C!%@`dsAOcf@Wjt9ftQzA|Buu!sq*x^@yVsz(%3wX)ORa;j*pWuk2w4dN z!EthKrgIxXXu1zAjzGaDzeSMujmwVhZHO#xqyJD&61mTO^H8VxLq}ZS-8^@o^ERNUSKfW=&8tJv_)$L|{ zJ?p0sS@)cZdALZ)yIXuN~ZZ0FGMXL0YkCzjYwrh-9yLt3tU~u|w z96~;|u&XtF)ao*~guS|BvXrAt`#e-(+VJFK#Yjn3L`Es#zm%pCs$%8x{K0kBQUqEp(xWKG0zDIzQr$<=pC3sD z06|>|?+iP_b$~5OIr!3NQ9?_hpr~%lgI$QtnDY$M!l-+JK58h^D1w(A0*^F++{Eu- zOI)`(gRL>zl@`CS~5%v|+7x0-^M*d{Ed6YecvkWE`P>%_SHnS^m>O%I|@ z)YuLMX&R`dO~zO6(Mv{=kNCYT_#G)ZPqa7ypqDdWO|BnZ#_Yn40hg)rdD7Wd>3*0=PuBX9 zFV-EKH(`KKB*PvoGztA}Yei2stza6DxD;)_9Dd$SDrGIO@vgz?2Xm_Smu4+Mw2E07 z=+&f;^+Gc$+?P^OAi$dRevy{I3IWOEhSt z2sinrKll|}ul_lf_o~v_qgn@~x@2hmyQ}x!(qZ zh1@)hK?uGp6*UmM*?Ub(V^j5!@hzy#$?Rz8ggT@BoP`1Dtl0U1)bpN)a7|ia{Va6y zt!jaELK4j&%AF@{%-Ik1azNy=6bUKhOQbs%I`& zwWOgyYFA3%JVkoTk)on9mVPja$6#h&I`zcn{=1K8%f#)vjcB6m*HT$er2UM<_S*IE z-O~OMhH6Gh_-S?Im*1`uuPhuCG4DLshD?NVnzA|F-AR|yDUmJhRzESRppX99!l_*s zEx~-&`t+F*Bi5vp{Uj%BPqi{lg33$Ss5UBK9?O9f$c`%me?XTZUqt1^ea@ppPPROD z6)dyZeaPJdu)!or+FN;Be?@|^5icS9UXi{tsc%;(jSNRz$5o#nJE;LaoOS%gf_^oY zN;exYJG!0;l{Hf|tIky1c@^*L0mU!J66z)O)M+2X>0Mk)Uh5TdBmDT@liMLrW zWcEoDiBLcw>ZdIM>e7;`>Qd#?gEhHw_{eL%!WhdY6ohp~+H^>2iCCR+!*K(Y8&eH! zjpQ%pinQ80i0B{h#;ntMKCR;ye0d}fHP=xw{xefKpeuzqa+jm4=X?{n6ds0()n6rG z=#$E6-twv5mlyNmE~n!)w}j=hB3=+GM^x8F?c^SZ*egN-S&zM5)oosLVn{z^FX z6QoSGydk^-70&{<(|B>+q^D`u`V$pjLRS<5OvC2S}F-sT2yHyNj;vnf<{!9y<+q-{eb*qO%r@5R>qv5`@iyDjZ2M6(LwI z!CPk@4FPKCp)cv~_ZYQZ8@O!~+(@S0mrn!-p6SnYyHQ!zeH?Pib5965=28#!;p)E% zqsJR2a2%yYNoM^?K<3>IzYXhcfKd=`98t)LY$r060&mf&IFdc#HwBpmI2yAGu8nW9 za3>AE(46iOSuCDpnQLqNIvbazQY_rg8=-M`oetmr*mcMZnY?&5o#<}{wXPz2k9oq=$hem)fS@G+8`>ey1AvZFO z8A}4xf*;D}NY9YF|HPW1!)~MWa{Z_*_tcoWUfT~Za`t!D`YJWKT6g%+aDai3#i+dm4HNiU%kMCz)BFBT>n1)auo% zJ=`3F1dHcs84md{+!f27t9hO@OOIf+f4?ZZ0gZleWZ-fJ`mXs$07p6WloRQZ;JchC z59j>Tig8}jDkTv|@P0GIMZE*K*7doE^JTDC zj(~+gvZ2w9_@KrLq@@<;?!M;VWkQ|4G=;m|UL3R|!;YVpX(1;c6Q$pOYAiDQRsZwz z8yUSGMO<~A{-{#Z=r!ndeI~-T}-zFb3p z;|xygKuy66<~EIqaxFz&4DHO``LfS>WxZ*;8vX?-9pSjXr;f&X0N4f~@map8xJ9&a zuV;)j(4BmpbTq^hiuwpoqn{7MY7E%1ZB~6e?pA+~kDGk0Rg{d6li@HU%bGGbSWmly zc)lr_O$mLu$CcE6TS0yNjzrw=9s;{oTls%JXE)XL5kA61HI%?Q6fiLga#)E56L^;v zHcm?d`=!BzPz`&bsQ~Wcgq3oV!2&gzz&%_~7(T2^lZ5hrydXTCkpH%C0TAdvU;-2k zMy$mImgoNKrmKbj*X=)7=YM^k?6&{{p?^SbSGNCsmwuy&0LF*>e`@*vcn*sGUE+%t z7dS@p?{Hs>@b7B>DIxyjLn8GLi0;bvZ%qL^)p`xCl=(4z_|wHIAmHCND-}GYC3TEzI$R`|Xwct>Bta2M6`f$p{yTyQFZ6=wj#W zGeOxcPO2{6STb^Ha89vwuGv6DH#(mPxO{u7V;u> zK1_fog-nSMHDZ z8HMZuGZr*a6o}tf%(i09X(^Ke{~;BXpf`TNBm&xiVnSVkoOP)R=_-)=?IU5 z@ja=~Pb_yPnl&*WtDeU9gBi!|m?Z2=4x9WsoxPM;JH@2RP>{(6yO?aDl{m^5LEfnk zKN@_DRR&!0!GfPcH)k?N_>3_>_Tt8y&c6}cw&B#?rZrBacH`idn>J!|)X{{xq=T@W z`-;hBwIe`Y-o(2j-&VHGC5m$*bO{=9&HKqIg$^CegWGzEri35u&E6Nnwlb0`g~z#; zX;281v2;XyC(tPC#}ga}u1K_D1T{+TS&q?4wPIIPrXL>U9cgHW2-G%dYeW*~#L zdKK1B(lFz(z??jlT8W-{fKW7bm=C>;NSdV4wp<1)W=11wli(SLr~&yIt$`3-hQ%iC zbW;Zd4m@&RAda`8uce^(4%???(Hzqsf!m>Ak7zG4a$w-D82_fWyWQ*QV*OfaSSzsO z<)U$A^Zj%7l3eVn|6^$v5n1BP*`H@3{M%nImlI?7x7~NHAVRd-0HMIsq4oG* zB!7O_bP=6qlpx@XkaS;+PE?&98~Jt-Qr92Wh=2oKeuwhhetUBE-$^@faT?;OUH|jv z@cHr6vc%-7?`>bN^i<-@&D!1fmQeg|qL-cHCz030$MNSpM)5|E6L;5Bj)9LCy^O5Q zqwCjIZFw)Qf2Ndn^3a3>ZTt^v==bDY{FYc({r650$oy#~<98e-^Jcpc@ zegfl;wUK2_61_nC?x0#T;i_x0s>d9Xh80(q8mvh3OxRWQ3Kg^dnsM;@1gfN-g*7NF; zpoTcO2L1pNCx}OdFK}8h)04gkHFml`Y=gJ3PY!m5H5K!Me3DGwj(GBhhQfP;rJh^# z6u-mwBN;8);lOcP|riWFA(eQfUs`eHhqZ3hEP@0TOK)M}3LJa)Mu2*19~0cMg9+PK9T*lZJH*ViOZB*{(h?*CvBV!-E;*05WB*i{ zPnPj%+YjXA8l*+DzZVBN@&}mkb}+QB&JWy}2lcdWi+eN@vVFPz{vl1<%)L!s4>#me zJcD1veqWnw%bUFfP8k_nv@xn>O?2A78A@^FRcz_dQ`P$e4yaUZ zlXCFCiZaMbHB5=kuF$?!;g*L|pJCKefNw5G7*zq)seS|-Yz3!_&2(SI8h9v=W6$Hi z(5oVzjMf`%7gwhurAn~ntCutXP)jcb;!x_3$0EsWfO=hbIhzO6IKu(sFoku)B@+=qq;J4LX%nTz-$S2$F~=6!$|)8a7D4f-ZYa*gH^UFNH4{;#kR8% zQR7}7)$jz-;ET((vhC%Lgp;?h$9!0YIf%rmlq;7P1dyMtE+zp#mrol{&AtI;hvbrn zz?<|$E~_#W@|#Cr+mKpnn}R8Fu=LnlP{28sI(bsKR`WwF)^B2?o`Q9)xslN#K@lop zvyI;hEoOp)F+VUZrs+l?kL}S}kIUhb`;E05Bw9Dur<*nojwCsn{&4&x9QPPtQ@}wm zSZPPUf9p0d7CBbT{f2Ronq9~H0c9UHZ@{S2yynZ(O`Nr>9NrWYGa*=oyCf6JTRKje zVbIlKW1OM}v7WEpp97(4WnD|;u+P+vL`?;K5o{rBp*->7xt~8-C@CyJzAmZ6X8^aUSpHV5OLs(b3VNZrc z1zNe0r*;Jzs;s`&XXgAPu=x{6fP4i`%`r<~SNBDTOArQ=B$&>k*|Y1%a3kW1&G3{o zQkxe;O9YLVwMQyVWm;6YV~-|_5<$|qKzBw?A~sKOCKVTyXoQ_zxM)v*=EC(3>IxM2 z;AtP2l2iq9hBnru9mD?u}!c3!W`ntn**vs%AHb!Lw(PTD#yJdb@fbLeS`O&SiJa} zCkNNP=(YoA1?mrME<2I|A{YpKVRt9L8DVDl8o)RRN$6~(fbKBbpI)Q6y(T_K^M^;U zlkAOIpS!eSY;`!+Tq1E~zEn6RodgJ|%RH<27M)@ma!NU1y`P1(h=uD@|1?1BM_tMI zvwxY=r7GG*6&l)c6f+)J?yF{Rn22{fFMM;=?oc~pSt-gN_{V!e zx)_XuW+2G}9ihMKDY}Yrp=xPW6Ele9e9Nx!9EmB4^Bph~LmnJh_O-*%FXtJd@)8qD z>@BgPg>qd(Qt`9a;+=MnnHm!VHqW%>6ayc z+^Huh4|&((TYa9<`NAr4XDM zqw!hS^7c%PE9NDzQVKo!T#f4is#R=tW{$tJO|?>mX!QOfxl;BwgJQ6A2B(CEEly6% zR+6e3lR943jusH}$Q7b-S?(E_ehIi6y~}o z>Xt)j)bCQPxRF{I)Y}s1d13rr`8(w$#p4CF$?shX5n8K&jg|}7>Etp10Qd<5*%Tp! zDC#4Cb5lN|^^s#l+~$~K;XA~;KqpJI&X5%b3yT~m*uFWks>?UP7dM8ym<8?v&HBpG z(!;c1e%e?dHmeZV)O6ypwe8V+s0R5RF4JG@_V~Upo0kQCuXS{eK}c>OLEcz7tf)K& z^6hxG04VgT_sd*R)8%>H7O-!cgYN%$S16e2#d5*6q3F_?6#EhSOB0D@wIO%@GqwoZj8VsBNr7iCclP1 zqywal?&8QLdJXM8$SWGw2ix_@GQlNT1pUvqq_R)$TaJM;<`AD20!Qzv@2^sGN3x1@thwV()b})k$ZZsLqo@!3J9v9E|XV*8sD9D`*uwI6puYlMwJh z75(C};m(lVPW4y%>76?PwHShDREJl!_0ec0rcSXS;L%6c zjN9J?A7s7`0_keWxf0|Ini_*M;iKMp#f^kgbn!#eD*I);>hj$qY>~|$Xj7ys14>mwR%oc8Nbn!v+ zrDn!-W~N#Awvt2Y_r{`F+=|g_No<0xpHo%f(bBc2AFwT#PQ4Aaw=?FmjQQ|&Rb2so zP2UI_KNE1+j|=HA)v~q{?X|Djf2;Ov8Z_-#IU&S7H7s`)s_519X*pVR6)V&J? zu8eQXm21zK%FVj>=kn1(PUP!oOrpT7J+U(;E0?Es=ZMWdYXVI*g&Ae^gyY!`|Xco7^PAYrS7JYR;XaWqZwshEvqYg?u`-6hbyP1q?K5^UHezgw~(fvhOLDiM205%tpc?!r1xYw=o95 z2X?5V_~mxW-2LsBL`g5Hjv`KhKnB`~9I9o!4N6GpysTOH;eSt{#O9e)d)Vq5tu!DAiUVVe~7D4$1c@ zG_02NH`fqz=%01*JK(>If2|iHq@Dr$kN=^riuOKwmsfC=qKtB-NKoIT1y(qDJ%Fi6CyCsD0d zqa#N%P_v8Co_hXUx)15;p&D*wN`~)Y@F?3UPs~ND@f!|I&hk;l+q+!O2@6*4P_&B$ z>wIf=%y|$q5qvRHo-g>iz5QJTkD8`!z(uAej6G2~yKC2~4B=dsl*i(GBFW0l>0Ig6 z8`W?Z%G>t)A2R*<`bq4h3mcehoWez}^EZ1SAKO~h!VlBW zmSgBId(JlIY6(ez4fP5}AZW#lWr8!k-CWdRjIF z_-ZV2=8b3ImCxFot@LiNUSxS)N+X^%Bg?q${Ys3ABFa4f%WiUl8<$zLa7nhHNJxl7 z#+^1NX5T=*6{#|Ut#{=0@Y%a(PkRP`+z`qRxt-Lc0!OPZe2@hx8bYQ(|G2>wkix82 zMD6}(o5sEM8^(7xzeOP2I&1&6VJ6qr1Ij@Bb>(5GNg?clIFNN+4q!6`OHTqOkO4Bm zKnC&BV}MBnLDoUUknY!OGRG^12LbBilmF-F1OO1dO4YwkbMW;4f{Oa=Fi=iNus+>i z{@)|VfAe($e-qOgumd$Amk5a%ygSjv8){~GwmDE List[Do # local debugging purpose # if __name__ == "__main__": +# # log out boto3 version +# logger.info("boto3 version: {}".format(boto3.__version__)) # # test the function # prompt = "Do we have any solution offer to Stable Diffusion?" # solution_title = "Stable Diffusion AWS Extensions" diff --git a/src/scripts/dep/setup.py b/src/scripts/dep/setup.py index fe5f0d47..ff6569da 100644 --- a/src/scripts/dep/setup.py +++ b/src/scripts/dep/setup.py @@ -11,7 +11,7 @@ # 'sagemaker', 'requests_aws4auth', 'unstructured', - 'boto3', + 'boto3==1.28.84', 'nougat-ocr', 'markdownify', 'mammoth', From 830b61ea2b71f4ad0563a3168434f436950ee5ec Mon Sep 17 00:00:00 2001 From: Ning Date: Mon, 13 Nov 2023 09:22:52 +0800 Subject: [PATCH 14/37] chore: update gitignore to ignore python build cache and model files --- .gitignore | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.gitignore b/.gitignore index c94e12a0..5d2f4f1c 100644 --- a/.gitignore +++ b/.gitignore @@ -54,3 +54,11 @@ cdk.context.json package-lock.json .idea .env +**/llm_bot_dep.egg-info +**/llm_bot_dep/__pycache__ +**/llm_bot_dep/loaders/__pycache__ +**/dep/build +**/models/cross/model/models--csdc-atl--buffer-cross-001 +**/models/embedding/model/models--BAAI--bge-large-zh-v1.5 +**/models/embedding/model/models--csdc-atl--buffer-embedding-002 +**/models/instruct/model/models--csdc-atl--buffer-instruct-InternLM-001 From c12980c3223941d2f06cb92d234aa90da01c562e Mon Sep 17 00:00:00 2001 From: Ning Date: Mon, 13 Nov 2023 09:33:02 +0800 Subject: [PATCH 15/37] chore: add deployment files for further implementation --- CHANGELOG.md | 19 +++++ LICENSE => LICENSE.txt | 2 +- NOTICE | 1 - NOTICE.txt | 14 ++++ buildspec.yml | 40 +++++++++ codescan-prebuild-custom.sh | 112 +++++++++++++++++++++++++ copy-repo.sh | 38 +++++++++ deployment/build-open-source-dist.sh | 120 +++++++++++++++++++++++++++ deployment/build-s3-dist.sh | 83 ++++++++++++++++++ deployment/example.yaml | 103 +++++++++++++++++++++++ deployment/run-unit-tests.sh | 41 +++++++++ initialize-repo.sh | 48 +++++++++++ sonar-project.properties | 44 ++++++++++ 13 files changed, 663 insertions(+), 2 deletions(-) create mode 100644 CHANGELOG.md rename LICENSE => LICENSE.txt (99%) delete mode 100644 NOTICE create mode 100644 NOTICE.txt create mode 100644 buildspec.yml create mode 100644 codescan-prebuild-custom.sh create mode 100755 copy-repo.sh create mode 100755 deployment/build-open-source-dist.sh create mode 100755 deployment/build-s3-dist.sh create mode 100644 deployment/example.yaml create mode 100755 deployment/run-unit-tests.sh create mode 100755 initialize-repo.sh create mode 100644 sonar-project.properties diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..fc84645a --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,19 @@ +# Change Log +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + + +## [Unreleased] +### Added +- CHANGELOG templated file +- README templated file +- NOTICE file +- LICENSE file + +### Changed + + +### Removed + diff --git a/LICENSE b/LICENSE.txt similarity index 99% rename from LICENSE rename to LICENSE.txt index 67db8588..19dc35b2 100644 --- a/LICENSE +++ b/LICENSE.txt @@ -172,4 +172,4 @@ of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. + of your accepting any such warranty or additional liability. \ No newline at end of file diff --git a/NOTICE b/NOTICE deleted file mode 100644 index 616fc588..00000000 --- a/NOTICE +++ /dev/null @@ -1 +0,0 @@ -Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. diff --git a/NOTICE.txt b/NOTICE.txt new file mode 100644 index 00000000..d3a2d969 --- /dev/null +++ b/NOTICE.txt @@ -0,0 +1,14 @@ +LLM-Bot +Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +Licensed under the Apache License Version 2.0 (the "License"). You may not use this file except +in compliance with the License. A copy of the License is located at http://www.apache.org/licenses/ +or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for the +specific language governing permissions and limitations under the License. + +********************** +THIRD PARTY COMPONENTS +********************** +This software includes third party software subject to the following copyrights: + +AWS SDK under the Apache License Version 2.0 diff --git a/buildspec.yml b/buildspec.yml new file mode 100644 index 00000000..d2cfa91e --- /dev/null +++ b/buildspec.yml @@ -0,0 +1,40 @@ +version: 0.2 + +phases: + install: + runtime-versions: + nodejs: 12 + python: 3.8 + commands: + - echo "nothing to do in install" + pre_build: + commands: + - echo "Installing dependencies and executing unit tests - `pwd`" + - echo Update npm to latest + - npm install -g npm@latest + - cd deployment && chmod +x ./run-unit-tests.sh && ./run-unit-tests.sh + - echo "Installing dependencies and executing unit tests completed `date`" + build: + commands: + - echo "Starting build `date` in `pwd`" + - chmod +x ./build-s3-dist.sh && ./build-s3-dist.sh $DIST_OUTPUT_BUCKET $SOLUTION_NAME $VERSION + - echo "Build completed `date`" + - echo "Starting open-source-dist `date` in `pwd`" + - chmod +x ./build-open-source-dist.sh && ./build-open-source-dist.sh $SOLUTION_NAME + - echo "Open Source Dist completed `date`" + post_build: + commands: + - echo "Retrieving next stage buildspec `date` in `pwd`" + - aws s3 cp s3://${SOLUTIONS_BUILD_ASSETS_BUCKET:-solutions-build-assets}/changelog-spec.yml ../buildspec.yml + - echo "Retrieving next stage buildspec complete" + - echo "Post build completed on `date`" + +artifacts: + files: + - deployment/**/* + - source/**/* + - .gitignore + - sonar-project.properties + - buildspec.yml + - CHANGELOG.md + - .cfnnag_* diff --git a/codescan-prebuild-custom.sh b/codescan-prebuild-custom.sh new file mode 100644 index 00000000..a4214e7e --- /dev/null +++ b/codescan-prebuild-custom.sh @@ -0,0 +1,112 @@ +#!/bin/bash +#-------------------------------------------------------------------- +# Usage: this script must exit with a non-zero return code if the +# Viperlight scan fails. +#-------------------------------------------------------------------- + +source_dir='./source' # May need to adjust this for your repo, but this + # should generally work +viperlight_temp=/tmp/viperlight_scan # should work in most environments +export PATH=$PATH:../viperlight/bin + +failed_scans=0 + +if [ -d $viperlight_temp ]; then + rm $viperlight_temp/* + rmdir $viperlight_temp +fi +viperlight_temp=/tmp/viperlight_scan +mkdir $viperlight_temp + +scan_npm() { + echo ----------------------------------------------------------- + echo NPM Scanning $1 + echo ----------------------------------------------------------- + folder_path=`dirname $1` + viperlight scan -t $folder_path -m node-npmaudit -m node-npmoutdated + rc=$? + if [ $rc -eq 0 ]; then + echo SUCCESS + elif [ $rc -eq 42 ]; then + echo NOTHING TO SCAN + else + echo FAILED rc=$rc + ((failed_scans=failed_scans+1)) + fi +} + +scan_py() { + echo ----------------------------------------------------------- + echo Python Scanning $1 + echo ----------------------------------------------------------- + folder_path=`dirname $1` + viperlight scan -t $folder_path -m python-piprot -m python-safety + rc=$? + if [ $rc -eq 0 ]; then + echo SUCCESS + elif [ $rc -eq 42 ]; then + echo NOTHING TO SCAN + else + echo FAILED rc=$rc + ((failed_scans=failed_scans+1)) + fi +} + +echo ----------------------------------------------------------- +echo Environment +echo ----------------------------------------------------------- +echo npm `npm --version` +echo `python --version` + +echo ----------------------------------------------------------- +echo Update npm to latest +echo ----------------------------------------------------------- +npm install -g npm@latest + +echo ----------------------------------------------------------- +echo Scanning all Nodejs projects +echo ----------------------------------------------------------- +find $source_dir -name package.json | grep -v node_modules | while read folder + do + echo $folder >> $viperlight_temp/scan_npm_list.txt + done +while read folder + do + scan_npm $folder + done < $viperlight_temp/scan_npm_list.txt + +echo ----------------------------------------------------------- +echo Scanning all python projects +echo ----------------------------------------------------------- +find . -name requirements.txt | while read folder + do + echo $folder >> $viperlight_temp/scan_python_list.txt + done + +while read folder + do + if [[ -z $pi_scans_installed ]]; then + echo Installing piprot and safety + pip install piprot safety + pi_scans_installed=YES + fi + scan_py $folder + done < $viperlight_temp/scan_python_list.txt + +echo ----------------------------------------------------------- +echo Scanning everywhere else +echo ----------------------------------------------------------- +viperlight scan +rc=$? +if [ $rc -gt 0 ]; then + ((failed_scans=failed_scans+1)) +fi + +if [ $failed_scans == 0 ] +then + echo Scan completed successfully +else + echo $failed_scans scans failed. Check previous messages for findings. +fi + +exit $failed_scans diff --git a/copy-repo.sh b/copy-repo.sh new file mode 100755 index 00000000..118d922a --- /dev/null +++ b/copy-repo.sh @@ -0,0 +1,38 @@ +#!/bin/bash +echo "Path to repo to copy to (e.g. ../new-solution-name): " +read solution_path +echo "Initialize repo (y/n)? [N]:" +read init_repo +if [ .$init_repo = "." ]; then + init_repo="n" +fi +init_repo=$(echo $init_repo | tr '[:upper:]' '[:lower:]') + +cp -r * $solution_path +mkdir -p $solution_path/.github +cp -r .github/* $solution_path/.github/ +cp .gitignore $solution_path +cp .viperlight* $solution_path + +echo "Viperlight: enter 'y' to use the custom codescan script, codescan-prebuild-custom.sh:" +echo -e "- runs python scans where there is a requirements.txt" +echo -e "- updates environment to npm@latest (regardless of whether npm is used)" +echo -e "- runs node scans where there is a package.json" +echo -e "- runs viperlight scan from the root" +echo -e "\nInstall codescan-prebuild-custom.sh (y/n)? [Y]:" +read use_custom_script +if [ .$use_custom_script = "." ]; then + use_custom_script="y" +fi +use_custom_script=$(echo $use_custom_script | tr '[:upper:]' '[:lower:]') + +if [ $use_custom_script = "y" ]; then + cp codescan-prebuild-custom.sh $solution_path + chmod +x $solution_path/codescan-prebuild-custom.sh +fi + +if [ $init_repo = "y" ]; then + cd $solution_path + chmod +x initialize-repo.sh + ./initialize-repo.sh +fi diff --git a/deployment/build-open-source-dist.sh b/deployment/build-open-source-dist.sh new file mode 100755 index 00000000..c68adcd9 --- /dev/null +++ b/deployment/build-open-source-dist.sh @@ -0,0 +1,120 @@ +#!/bin/bash +# +# This assumes all of the OS-level configuration has been completed and git repo has already been cloned +# +# This script should be run from the repo's deployment directory +# cd deployment +# ./build-open-source-dist.sh solution-name +# +# Paramenters: +# - solution-name: name of the solution for consistency + +# Check to see if input has been provided: +if [ -z "$1" ]; then + echo "Please provide the trademark approved solution name for the open source package." + echo "For example: ./build-open-source-dist.sh trademarked-solution-name" + exit 1 +fi + +# Get reference for all important folders +source_template_dir="$PWD" +dist_dir="$source_template_dir/open-source" +dist_template_dir="$dist_dir/deployment" +source_dir="$source_template_dir/../source" +github_dir="$source_template_dir/../.github" + +echo "------------------------------------------------------------------------------" +echo "[Init] Clean old open-source folder" +echo "------------------------------------------------------------------------------" +echo "rm -rf $dist_dir" +rm -rf $dist_dir +echo "mkdir -p $dist_dir" +mkdir -p $dist_dir +echo "mkdir -p $dist_template_dir" +mkdir -p $dist_template_dir + +echo "------------------------------------------------------------------------------" +echo "[Packing] Templates" +echo "------------------------------------------------------------------------------" +echo "cp $source_template_dir/*.template $dist_template_dir/" +cp $source_template_dir/*.template $dist_template_dir/ +echo "copy yaml templates and rename" +cp $source_template_dir/*.yaml $dist_template_dir/ +cd $dist_template_dir +# Rename all *.yaml to *.template +for f in *.yaml; do + mv -- "$f" "${f%.yaml}.template" +done + +echo "------------------------------------------------------------------------------" +echo "[Packing] Build Script" +echo "------------------------------------------------------------------------------" +echo "cp $source_template_dir/build-s3-dist.sh $dist_template_dir" +cp $source_template_dir/build-s3-dist.sh $dist_template_dir +echo "cp $source_template_dir/run-unit-tests.sh $dist_template_dir" +cp $source_template_dir/run-unit-tests.sh $dist_template_dir + +echo "------------------------------------------------------------------------------" +echo "[Packing] GitHub templates" +echo "------------------------------------------------------------------------------" +echo "cp -r $github_dir $dist_dir" +cp -r $github_dir $dist_dir + +echo "------------------------------------------------------------------------------" +echo "[Packing] Source Folder" +echo "------------------------------------------------------------------------------" +echo "cp -r $source_dir $dist_dir" +cp -r $source_dir $dist_dir + +echo "------------------------------------------------------------------------------" +echo "[Packing] Files from the root level of the project" +echo "------------------------------------------------------------------------------" +echo "cp $source_template_dir/../LICENSE.txt $dist_dir" +cp $source_template_dir/../LICENSE.txt $dist_dir +echo "cp $source_template_dir/../NOTICE.txt $dist_dir" +cp $source_template_dir/../NOTICE.txt $dist_dir +echo "cp $source_template_dir/../README.md $dist_dir" +cp $source_template_dir/../README.md $dist_dir +echo "cp $source_template_dir/../CODE_OF_CONDUCT.md $dist_dir" +cp $source_template_dir/../CODE_OF_CONDUCT.md $dist_dir +echo "cp $source_template_dir/../CONTRIBUTING.md $dist_dir" +cp $source_template_dir/../CONTRIBUTING.md $dist_dir +echo "cp $source_template_dir/../CHANGELOG.md $dist_dir" +cp $source_template_dir/../CHANGELOG.md $dist_dir +echo "cp $source_template_dir/../.gitignore $dist_dir" +cp $source_template_dir/../.gitignore $dist_dir + +echo "------------------------------------------------------------------------------" +echo "[Packing] Clean dist, node_modules and bower_components folders" +echo "------------------------------------------------------------------------------" +echo "find $dist_dir -iname "node_modules" -type d -exec rm -r "{}" \; 2> /dev/null" +find $dist_dir -iname "node_modules" -type d -exec rm -r "{}" \; 2> /dev/null +echo "find $dist_dir -iname "tests" -type d -exec rm -r "{}" \; 2> /dev/null" +find $dist_dir -iname "tests" -type d -exec rm -r "{}" \; 2> /dev/null +echo "find $dist_dir -iname "dist" -type d -exec rm -r "{}" \; 2> /dev/null" +find $dist_dir -iname "dist" -type d -exec rm -r "{}" \; 2> /dev/null +echo "find $dist_dir -iname "bower_components" -type d -exec rm -r "{}" \; 2> /dev/null" +find $dist_dir -iname "bower_components" -type d -exec rm -r "{}" \; 2> /dev/null +echo "find ../ -type f -name 'package-lock.json' -delete" +find $dist_dir -type f -name 'package-lock.json' -delete + +echo "------------------------------------------------------------------------------" +echo "[Packing] Create GitHub (open-source) zip file" +echo "------------------------------------------------------------------------------" +# Create the zip file +echo "cd $dist_dir" +cd $dist_dir + +echo "zip -q -r9 ../$1.zip ." +zip -q -r9 ../$1.zip . + +# Cleanup any temporary/unnecessary files +echo "Clean up open-source folder" +echo "rm -rf * .*" +rm -rf * .* + +# Place final zip file in $dist_dir +echo "mv ../$1.zip ." +mv ../$1.zip . + +echo "Completed building $1.zip dist" diff --git a/deployment/build-s3-dist.sh b/deployment/build-s3-dist.sh new file mode 100755 index 00000000..4587d619 --- /dev/null +++ b/deployment/build-s3-dist.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# +# This assumes all of the OS-level configuration has been completed and git repo has already been cloned +# +# This script should be run from the repo's deployment directory +# cd deployment +# ./build-s3-dist.sh source-bucket-base-name solution-name version-code +# +# Paramenters: +# - source-bucket-base-name: Name for the S3 bucket location where the template will source the Lambda +# code from. The template will append '-[region_name]' to this bucket name. +# For example: ./build-s3-dist.sh solutions v1.0.0 +# The template will then expect the source code to be located in the solutions-[region_name] bucket +# +# - solution-name: name of the solution for consistency +# +# - version-code: version of the package + +# Check to see if input has been provided: +if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then + echo "Please provide the base source bucket name, trademark approved solution name and version where the lambda code will eventually reside." + echo "For example: ./build-s3-dist.sh solutions trademarked-solution-name v1.0.0" + exit 1 +fi + +# This is set by initialize-repo.sh +SOLUTION_ID="%%SOLUTION_ID_VAL%%" + +# Get reference for all important folders +template_dir="$PWD" +template_dist_dir="$template_dir/global-s3-assets" +build_dist_dir="$template_dir/regional-s3-assets" +source_dir="$template_dir/../source" + +echo "------------------------------------------------------------------------------" +echo "[Init] Clean old dist, node_modules and bower_components folders" +echo "------------------------------------------------------------------------------" +echo "rm -rf $template_dist_dir" +rm -rf $template_dist_dir +echo "mkdir -p $template_dist_dir" +mkdir -p $template_dist_dir +echo "rm -rf $build_dist_dir" +rm -rf $build_dist_dir +echo "mkdir -p $build_dist_dir" +mkdir -p $build_dist_dir + +echo "------------------------------------------------------------------------------" +echo "[Packing] Templates" +echo "------------------------------------------------------------------------------" +echo "cp $template_dir/*.template $template_dist_dir/" +cp $template_dir/*.template $template_dist_dir/ +echo "copy yaml templates and rename" +cp $template_dir/*.yaml $template_dist_dir/ +cd $template_dist_dir +# Rename all *.yaml to *.template +for f in *.yaml; do + mv -- "$f" "${f%.yaml}.template" +done + +cd .. +echo "Updating code source bucket in template with $1" +replace="s/%%SOLUTION_ID%%/$SOLUTION_ID/g" +echo "sed -i '' -e $replace $template_dist_dir/*.template" +sed -i '' -e $replace $template_dist_dir/*.template +replace="s/%%BUCKET_NAME%%/$1/g" +echo "sed -i '' -e $replace $template_dist_dir/*.template" +sed -i '' -e $replace $template_dist_dir/*.template +replace="s/%%BUCKET_NAME%%/$1/g" +echo "sed -i '' -e $replace $template_dist_dir/*.template" +sed -i '' -e $replace $template_dist_dir/*.template +replace="s/%%SOLUTION_NAME%%/$2/g" +echo "sed -i '' -e $replace $template_dist_dir/*.template" +sed -i '' -e $replace $template_dist_dir/*.template +replace="s/%%VERSION%%/$3/g" +echo "sed -i '' -e $replace $template_dist_dir/*.template" +sed -i '' -e $replace $template_dist_dir/*.template + +echo "------------------------------------------------------------------------------" +echo "[Rebuild] Example Function" +echo "------------------------------------------------------------------------------" +cd $source_dir/example-function-js +npm run build +cp ./dist/example-function-js.zip $build_dist_dir/example-function-js.zip diff --git a/deployment/example.yaml b/deployment/example.yaml new file mode 100644 index 00000000..c8ed3a11 --- /dev/null +++ b/deployment/example.yaml @@ -0,0 +1,103 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). +# You may not use this file except in compliance with the License. +# A copy of the License is located at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# or in the "license" file accompanying this file. This file is distributed +# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +# express or implied. See the License for the specific language governing +# permissions and limitations under the License. +# +# Template for AWS Example Solution +# +AWSTemplateFormatVersion: 2010-09-09 + +Description: (%%SOLUTION_ID%%) - AWS Example Solution %%VERSION%% - Main Template + +Parameters: + # Sample parameter 1 + Param1: + Description: Parameter 1. + Type: String + +Metadata: + AWS::CloudFormation::Interface: + ParameterGroups: + - Label: + default: Section 1 + Parameters: + - Param1 + ParameterLabels: + Param1: + default: Parameter #1 + +Mappings: + MetricsMap: + Send-Data: + SendAnonymousData: "Yes" # change to 'No' if needed + + SourceCode: + General: + S3Bucket: "%%BUCKET_NAME%%" + KeyPrefix: "%%SOLUTION_NAME%%/%%VERSION%%" + + AWSSDK: + UserAgent: + Extra: "AWSSOLUTION/%%SOLUTION_ID%%/%%VERSION%%" + +Resources: + # + # Hello Function + # [HelloFunction, HelloFunctionRole] + # + HelloFunction: + Type: AWS::Lambda::Function + Properties: + Description: Example Solution - Lambda function to send greetings + Environment: + Variables: + LOG_LEVEL: 'INFO' # change to WARN, ERROR or DEBUG as needed + AWS_SDK_USER_AGENT_EXTRA: !FindInMap ["AWSSDK", "UserAgent", "Extra"] + Handler: index.handler + Role: !GetAtt HelloFunctionRole.Arn + Code: + S3Bucket: !Join ["-", [!FindInMap ["SourceCode", "General", "S3Bucket"], Ref: "AWS::Region"]] + S3Key: !Join ["/", [!FindInMap ["SourceCode", "General", "KeyPrefix"], "example-function-js.zip"]] + Runtime: nodejs12.x + Timeout: 300 + + HelloFunctionRole: + Type: AWS::IAM::Role + Metadata: + cfn_nag: + rules_to_suppress: + - id: W11 + reason: Override the IAM role to allow support:* for logs:PutLogEvents resource on its permissions policy + Properties: + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: lambda.amazonaws.com + Action: sts:AssumeRole + Path: / + Policies: + - PolicyName: !Sub Hello-Function-Policy-${AWS::StackName}-${AWS::Region} + PolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Action: + - logs:CreateLogGroup + - logs:CreateLogStream + - logs:PutLogEvents + Resource: !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/* + +Outputs: + HelloFunction: + Description: Ref value of HelloFunction + Value: !Ref 'HelloFunction' diff --git a/deployment/run-unit-tests.sh b/deployment/run-unit-tests.sh new file mode 100755 index 00000000..49738dc0 --- /dev/null +++ b/deployment/run-unit-tests.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# +# This assumes all of the OS-level configuration has been completed and git repo has already been cloned +# +# This script should be run from the repo's deployment directory +# cd deployment +# ./run-unit-tests.sh +# + +# Get reference for all important folders +template_dir="$PWD" +source_dir="$template_dir/../source" + +echo "------------------------------------------------------------------------------" +echo "[Init] Clean old dist and node_modules folders" +echo "------------------------------------------------------------------------------" +echo "find $source_dir/services -iname "node_modules" -type d -exec rm -r "{}" \; 2> /dev/null" +find $source_dir/services -iname "node_modules" -type d -exec rm -r "{}" \; 2> /dev/null +echo "find $source_dir/services -iname "dist" -type d -exec rm -r "{}" \; 2> /dev/null" +find $source_dir/services -iname "dist" -type d -exec rm -r "{}" \; 2> /dev/null +echo "find ../ -type f -name 'package-lock.json' -delete" +find $source_dir/services -type f -name 'package-lock.json' -delete +echo "find $source_dir/resources -iname "node_modules" -type d -exec rm -r "{}" \; 2> /dev/null" +find $source_dir/resources -iname "node_modules" -type d -exec rm -r "{}" \; 2> /dev/null +echo "find $source_dir/resources -iname "dist" -type d -exec rm -r "{}" \; 2> /dev/null" +find $source_dir/resources -iname "dist" -type d -exec rm -r "{}" \; 2> /dev/null +echo "find ../ -type f -name 'package-lock.json' -delete" +find $source_dir/resources -type f -name 'package-lock.json' -delete +echo "find $source_dir/simulator -iname "node_modules" -type d -exec rm -r "{}" \; 2> /dev/null" +find $source_dir/simulator -iname "node_modules" -type d -exec rm -r "{}" \; 2> /dev/null +echo "find $source_dir/simulator -iname "dist" -type d -exec rm -r "{}" \; 2> /dev/null" +find $source_dir/simulator -iname "dist" -type d -exec rm -r "{}" \; 2> /dev/null +echo "find ../ -type f -name 'package-lock.json' -delete" +find $source_dir/simulator -type f -name 'package-lock.json' -delete + +echo "------------------------------------------------------------------------------" +echo "[Test] Services - Example Function" +echo "------------------------------------------------------------------------------" +cd $source_dir/example-function-js +npm install +npm test diff --git a/initialize-repo.sh b/initialize-repo.sh new file mode 100755 index 00000000..ec8ca955 --- /dev/null +++ b/initialize-repo.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +# This script initializes a git repo using the current directory name as the solution name. +solution_name=`echo ${PWD##*/} | tr '[:upper:]' '[:lower:]'` + +echo "Solution S3 location will be configured to this repo name: $solution_name" +echo "Please provide the solution ID (e.g. SO0xyz):" +read solution_id +echo "Please provide a solution name for the README.md file:" +read readme_name +echo "Please provide an initial description for the README.md file:" +read solution_description + +# Update CONTRIBUTING.md from $solution_name +replace="s/%%SOLUTION_NAME%%/$solution_name/g" +echo "sed -i '' -e $replace CONTRIBUTING.md" +sed -i '' -e $replace CONTRIBUTING.md + +# Update build-s3-dist.sh with $solution_id +replace="s/%%SOLUTION_ID_VAL%%/$solution_id/g" +echo "sed -i '' -e '$replace' deployment/build-s3-dist.sh" +sed -i '' -e "$replace" deployment/build-s3-dist.sh + +# Rename example.yaml to $solution_name.yaml +mv deployment/example.yaml deployment/$solution_name.yaml + +# Update README.md solution name with $readme_name +replace="s/%%SOLUTION_NAME%%/$readme_name/g" +echo "sed -i '' -e '$replace' README.md" +sed -i '' -e "$replace" README.md + +# Update NOTICE.txt from $solution_name +echo "sed -i '' -e $replace NOTICE.txt" +sed -i '' -e "$replace" NOTICE.txt + +# Update README.md description with $solution_description +replace="s/%%SOLUTION_DESCRIPTION%%/$solution_description/g" +echo "sed -i '' -e '$replace' README.md" +sed -i '' -e "$replace" README.md + +# Remove TODO.md +rm TODO.md + +# Remove copy-repo.sh script +rm copy-repo.sh + +# Remove this initalization script +rm initialize-repo.sh diff --git a/sonar-project.properties b/sonar-project.properties new file mode 100644 index 00000000..35efd502 --- /dev/null +++ b/sonar-project.properties @@ -0,0 +1,44 @@ +# NOTE: +# - Do not include sonar.projectVersion or sonar.projectName as these are set automatically by the pipeline +# - Customize sonar.sources, sonar.exclusions, sonar.coverage.exclusions, sonar.tests and sonar +# unit test coverage reports based on your project + +# Refer to https://docs.sonarqube.org/latest/project-administration/narrowing-the-focus/ +# for details on sources and exclusions. Note also .gitignore which is used by sonarqube +# +sonar.sources=source, deployment + +# Focusing sonarqube analysis on non test code first and reducing noise from analysis of test code. Projects +# can customize the exclusions to include analyzing of test code if desired +sonar.exclusions=\ + **/test/**, \ + **/tests/**, \ + **/test*, \ + **/node_modules/**, \ + deployment/*-assets/** \ + deployment/open-source + +# Code coverage Specific Properties +sonar.coverage.exclusions=\ + **/jest.config.js + +## Python Specific Properties* +# coverage +# https://docs.sonarqube.org/pages/viewpage.action?pageId=4784149 +# Comma-separated list of ant pattern describing paths to coverage reports, relative to projects +# root. Leave unset to use the default ("coverage-reports/*coverage-*.xml"). +# +# Example: Uncomment next section for python code +#sonar.python.coverage.reportPaths=source/tests/coverage-reports/*.coverage.xml + +# Sensor SonarJS Coverage [javascript] was not allowing globbing +# for sonar.javascript.lcov.reportPaths such as this +# source/test/coverage-reports/jest/*/lcov.info +# so we have to provide an explicit list of reportPaths +# +# Example: Uncomment next section for javascript/typescript code +#sonar.javascript.lcov.reportPaths= \ +# source/test/coverage-reports/jest/example-function-js/lcov.info + +# Encoding of the source files +sonar.sourceEncoding=UTF-8 From c6613bbf04243b742d445477a5e9c5e22508854d Mon Sep 17 00:00:00 2001 From: Ning Date: Mon, 13 Nov 2023 09:36:39 +0800 Subject: [PATCH 16/37] chore: remove unused file --- deployment/example.yaml | 103 ---------------------------------------- 1 file changed, 103 deletions(-) delete mode 100644 deployment/example.yaml diff --git a/deployment/example.yaml b/deployment/example.yaml deleted file mode 100644 index c8ed3a11..00000000 --- a/deployment/example.yaml +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"). -# You may not use this file except in compliance with the License. -# A copy of the License is located at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# or in the "license" file accompanying this file. This file is distributed -# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -# express or implied. See the License for the specific language governing -# permissions and limitations under the License. -# -# Template for AWS Example Solution -# -AWSTemplateFormatVersion: 2010-09-09 - -Description: (%%SOLUTION_ID%%) - AWS Example Solution %%VERSION%% - Main Template - -Parameters: - # Sample parameter 1 - Param1: - Description: Parameter 1. - Type: String - -Metadata: - AWS::CloudFormation::Interface: - ParameterGroups: - - Label: - default: Section 1 - Parameters: - - Param1 - ParameterLabels: - Param1: - default: Parameter #1 - -Mappings: - MetricsMap: - Send-Data: - SendAnonymousData: "Yes" # change to 'No' if needed - - SourceCode: - General: - S3Bucket: "%%BUCKET_NAME%%" - KeyPrefix: "%%SOLUTION_NAME%%/%%VERSION%%" - - AWSSDK: - UserAgent: - Extra: "AWSSOLUTION/%%SOLUTION_ID%%/%%VERSION%%" - -Resources: - # - # Hello Function - # [HelloFunction, HelloFunctionRole] - # - HelloFunction: - Type: AWS::Lambda::Function - Properties: - Description: Example Solution - Lambda function to send greetings - Environment: - Variables: - LOG_LEVEL: 'INFO' # change to WARN, ERROR or DEBUG as needed - AWS_SDK_USER_AGENT_EXTRA: !FindInMap ["AWSSDK", "UserAgent", "Extra"] - Handler: index.handler - Role: !GetAtt HelloFunctionRole.Arn - Code: - S3Bucket: !Join ["-", [!FindInMap ["SourceCode", "General", "S3Bucket"], Ref: "AWS::Region"]] - S3Key: !Join ["/", [!FindInMap ["SourceCode", "General", "KeyPrefix"], "example-function-js.zip"]] - Runtime: nodejs12.x - Timeout: 300 - - HelloFunctionRole: - Type: AWS::IAM::Role - Metadata: - cfn_nag: - rules_to_suppress: - - id: W11 - reason: Override the IAM role to allow support:* for logs:PutLogEvents resource on its permissions policy - Properties: - AssumeRolePolicyDocument: - Version: '2012-10-17' - Statement: - - Effect: Allow - Principal: - Service: lambda.amazonaws.com - Action: sts:AssumeRole - Path: / - Policies: - - PolicyName: !Sub Hello-Function-Policy-${AWS::StackName}-${AWS::Region} - PolicyDocument: - Version: '2012-10-17' - Statement: - - Effect: Allow - Action: - - logs:CreateLogGroup - - logs:CreateLogStream - - logs:PutLogEvents - Resource: !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/lambda/* - -Outputs: - HelloFunction: - Description: Ref value of HelloFunction - Value: !Ref 'HelloFunction' From 5f86f0f0f12321347f12624994f3af07bb3d4d4b Mon Sep 17 00:00:00 2001 From: Ning Date: Mon, 13 Nov 2023 18:58:56 +0800 Subject: [PATCH 17/37] chore: restructure --- .gitignore | 2 +- codescan-prebuild-custom.sh | 112 ------- copy-repo.sh | 38 --- {img => docs/images}/arch.png | Bin initialize-repo.sh | 48 --- source/sample/dth.txt | 18 ++ source/sample/embedding_wrapper.py | 277 ++++++++++++++++++ source/sample/langchain_sample/csdc_llm.py | 239 +++++++++++++++ source/sample/langchain_sample/llm_wrapper.py | 262 +++++++++++++++++ source/sample/requirements.txt | 7 + source/sample/sagemaker_utils.py | 159 ++++++++++ source/sample/sample_llm.py | 19 ++ source/sample/sample_sm.py | 136 +++++++++ {script => source/sample/script}/README.md | 0 {script => source/sample/script}/ec2config.sh | 0 {script => source/sample/script}/inference.py | 0 {script => source/sample/script}/nginx.sh | 0 .../sagemaker/buffer-cross-001-deploy.ipynb | 0 .../buffer-embedding-002-deploy.ipynb | 0 .../buffer-instruct-internlm-001-deploy.ipynb | 0 20 files changed, 1118 insertions(+), 199 deletions(-) delete mode 100644 codescan-prebuild-custom.sh delete mode 100755 copy-repo.sh rename {img => docs/images}/arch.png (100%) delete mode 100755 initialize-repo.sh create mode 100644 source/sample/dth.txt create mode 100644 source/sample/embedding_wrapper.py create mode 100644 source/sample/langchain_sample/csdc_llm.py create mode 100644 source/sample/langchain_sample/llm_wrapper.py create mode 100644 source/sample/requirements.txt create mode 100644 source/sample/sagemaker_utils.py create mode 100644 source/sample/sample_llm.py create mode 100644 source/sample/sample_sm.py rename {script => source/sample/script}/README.md (100%) rename {script => source/sample/script}/ec2config.sh (100%) rename {script => source/sample/script}/inference.py (100%) rename {script => source/sample/script}/nginx.sh (100%) rename {script => source/sample/script}/sagemaker/buffer-cross-001-deploy.ipynb (100%) rename {script => source/sample/script}/sagemaker/buffer-embedding-002-deploy.ipynb (100%) rename {script => source/sample/script}/sagemaker/buffer-instruct-internlm-001-deploy.ipynb (100%) diff --git a/.gitignore b/.gitignore index 5d2f4f1c..f4d40475 100644 --- a/.gitignore +++ b/.gitignore @@ -47,7 +47,7 @@ junit.xml !/.eslintrc.json /assets/ !/cdk.json -/cdk.out/ +**/cdk.out .cdk.staging/ .parcel-cache/ cdk.context.json diff --git a/codescan-prebuild-custom.sh b/codescan-prebuild-custom.sh deleted file mode 100644 index a4214e7e..00000000 --- a/codescan-prebuild-custom.sh +++ /dev/null @@ -1,112 +0,0 @@ -#!/bin/bash -#-------------------------------------------------------------------- -# Usage: this script must exit with a non-zero return code if the -# Viperlight scan fails. -#-------------------------------------------------------------------- - -source_dir='./source' # May need to adjust this for your repo, but this - # should generally work -viperlight_temp=/tmp/viperlight_scan # should work in most environments -export PATH=$PATH:../viperlight/bin - -failed_scans=0 - -if [ -d $viperlight_temp ]; then - rm $viperlight_temp/* - rmdir $viperlight_temp -fi -viperlight_temp=/tmp/viperlight_scan -mkdir $viperlight_temp - -scan_npm() { - echo ----------------------------------------------------------- - echo NPM Scanning $1 - echo ----------------------------------------------------------- - folder_path=`dirname $1` - viperlight scan -t $folder_path -m node-npmaudit -m node-npmoutdated - rc=$? - if [ $rc -eq 0 ]; then - echo SUCCESS - elif [ $rc -eq 42 ]; then - echo NOTHING TO SCAN - else - echo FAILED rc=$rc - ((failed_scans=failed_scans+1)) - fi -} - -scan_py() { - echo ----------------------------------------------------------- - echo Python Scanning $1 - echo ----------------------------------------------------------- - folder_path=`dirname $1` - viperlight scan -t $folder_path -m python-piprot -m python-safety - rc=$? - if [ $rc -eq 0 ]; then - echo SUCCESS - elif [ $rc -eq 42 ]; then - echo NOTHING TO SCAN - else - echo FAILED rc=$rc - ((failed_scans=failed_scans+1)) - fi -} - -echo ----------------------------------------------------------- -echo Environment -echo ----------------------------------------------------------- -echo npm `npm --version` -echo `python --version` - -echo ----------------------------------------------------------- -echo Update npm to latest -echo ----------------------------------------------------------- -npm install -g npm@latest - -echo ----------------------------------------------------------- -echo Scanning all Nodejs projects -echo ----------------------------------------------------------- -find $source_dir -name package.json | grep -v node_modules | while read folder - do - echo $folder >> $viperlight_temp/scan_npm_list.txt - done -while read folder - do - scan_npm $folder - done < $viperlight_temp/scan_npm_list.txt - -echo ----------------------------------------------------------- -echo Scanning all python projects -echo ----------------------------------------------------------- -find . -name requirements.txt | while read folder - do - echo $folder >> $viperlight_temp/scan_python_list.txt - done - -while read folder - do - if [[ -z $pi_scans_installed ]]; then - echo Installing piprot and safety - pip install piprot safety - pi_scans_installed=YES - fi - scan_py $folder - done < $viperlight_temp/scan_python_list.txt - -echo ----------------------------------------------------------- -echo Scanning everywhere else -echo ----------------------------------------------------------- -viperlight scan -rc=$? -if [ $rc -gt 0 ]; then - ((failed_scans=failed_scans+1)) -fi - -if [ $failed_scans == 0 ] -then - echo Scan completed successfully -else - echo $failed_scans scans failed. Check previous messages for findings. -fi - -exit $failed_scans diff --git a/copy-repo.sh b/copy-repo.sh deleted file mode 100755 index 118d922a..00000000 --- a/copy-repo.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/bash -echo "Path to repo to copy to (e.g. ../new-solution-name): " -read solution_path -echo "Initialize repo (y/n)? [N]:" -read init_repo -if [ .$init_repo = "." ]; then - init_repo="n" -fi -init_repo=$(echo $init_repo | tr '[:upper:]' '[:lower:]') - -cp -r * $solution_path -mkdir -p $solution_path/.github -cp -r .github/* $solution_path/.github/ -cp .gitignore $solution_path -cp .viperlight* $solution_path - -echo "Viperlight: enter 'y' to use the custom codescan script, codescan-prebuild-custom.sh:" -echo -e "- runs python scans where there is a requirements.txt" -echo -e "- updates environment to npm@latest (regardless of whether npm is used)" -echo -e "- runs node scans where there is a package.json" -echo -e "- runs viperlight scan from the root" -echo -e "\nInstall codescan-prebuild-custom.sh (y/n)? [Y]:" -read use_custom_script -if [ .$use_custom_script = "." ]; then - use_custom_script="y" -fi -use_custom_script=$(echo $use_custom_script | tr '[:upper:]' '[:lower:]') - -if [ $use_custom_script = "y" ]; then - cp codescan-prebuild-custom.sh $solution_path - chmod +x $solution_path/codescan-prebuild-custom.sh -fi - -if [ $init_repo = "y" ]; then - cd $solution_path - chmod +x initialize-repo.sh - ./initialize-repo.sh -fi diff --git a/img/arch.png b/docs/images/arch.png similarity index 100% rename from img/arch.png rename to docs/images/arch.png diff --git a/initialize-repo.sh b/initialize-repo.sh deleted file mode 100755 index ec8ca955..00000000 --- a/initialize-repo.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash - -# This script initializes a git repo using the current directory name as the solution name. -solution_name=`echo ${PWD##*/} | tr '[:upper:]' '[:lower:]'` - -echo "Solution S3 location will be configured to this repo name: $solution_name" -echo "Please provide the solution ID (e.g. SO0xyz):" -read solution_id -echo "Please provide a solution name for the README.md file:" -read readme_name -echo "Please provide an initial description for the README.md file:" -read solution_description - -# Update CONTRIBUTING.md from $solution_name -replace="s/%%SOLUTION_NAME%%/$solution_name/g" -echo "sed -i '' -e $replace CONTRIBUTING.md" -sed -i '' -e $replace CONTRIBUTING.md - -# Update build-s3-dist.sh with $solution_id -replace="s/%%SOLUTION_ID_VAL%%/$solution_id/g" -echo "sed -i '' -e '$replace' deployment/build-s3-dist.sh" -sed -i '' -e "$replace" deployment/build-s3-dist.sh - -# Rename example.yaml to $solution_name.yaml -mv deployment/example.yaml deployment/$solution_name.yaml - -# Update README.md solution name with $readme_name -replace="s/%%SOLUTION_NAME%%/$readme_name/g" -echo "sed -i '' -e '$replace' README.md" -sed -i '' -e "$replace" README.md - -# Update NOTICE.txt from $solution_name -echo "sed -i '' -e $replace NOTICE.txt" -sed -i '' -e "$replace" NOTICE.txt - -# Update README.md description with $solution_description -replace="s/%%SOLUTION_DESCRIPTION%%/$solution_description/g" -echo "sed -i '' -e '$replace' README.md" -sed -i '' -e "$replace" README.md - -# Remove TODO.md -rm TODO.md - -# Remove copy-repo.sh script -rm copy-repo.sh - -# Remove this initalization script -rm initialize-repo.sh diff --git a/source/sample/dth.txt b/source/sample/dth.txt new file mode 100644 index 00000000..1c73643b --- /dev/null +++ b/source/sample/dth.txt @@ -0,0 +1,18 @@ +Data Transfer Hub (数据传输解决方案) + +轻松将数据移入和移出 AWS 中国区域 + +概览 + +此解决方案可为 Amazon Simple Storage Service (Amazon S3) 对象和 Amazon Elastic Container Registry (Amazon ECR) 映像提供安全、可扩展且可追踪的数据传输。使用数据传输解决方案,您可以执行以下任何任务:在 AWS S3 之间传输对象 + +优势 + +直观的用户界面 +客户可在用户界面上为 Amazon S3 对象和 Amazon ECR 映像创建和管理数据传输任务。 + +支持各类源 +将数据从其他云服务商的对象存储服务(包括阿里云 OSS,腾讯 COS,七牛 Kodo 以及其他兼容 Amazon S3 的云存储服务)传输到 Amazon S3。在 Amazon ECR 之间传输容器镜像。将容器镜像从公共容器镜像仓库(例如 Docker Hub、Google gcr.io 和 Red Hat Quay.io)传输到 Amazon ECR。 + +无服务器架构 +传输任务可按需使用并随用随付。有关更多信息,请参阅实施指南的“成本”部分。 \ No newline at end of file diff --git a/source/sample/embedding_wrapper.py b/source/sample/embedding_wrapper.py new file mode 100644 index 00000000..f0eaf2cf --- /dev/null +++ b/source/sample/embedding_wrapper.py @@ -0,0 +1,277 @@ +import asyncio +import json +import os +from functools import partial +from typing import Any, Dict, List, Optional + +from langchain.embeddings.base import Embeddings +# from langchain.pydantic_v1 import BaseModel, Extra, root_validator + +import os +import time +import logging +import boto3 +import tempfile +import numpy as np + +from langchain.vectorstores import OpenSearchVectorSearch +from langchain.document_loaders.unstructured import UnstructuredFileLoader +from langchain.text_splitter import RecursiveCharacterTextSplitter + +from opensearchpy import RequestsHttpConnection +from sagemaker_utils import create_sagemaker_embeddings_from_js_model, SagemakerEndpointVectorOrCross +from requests_aws4auth import AWS4Auth + +s3 = boto3.resource('s3') +aws_region = boto3.Session().region_name +credentials = boto3.Session().get_credentials() +awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, aws_region, 'es', session_token=credentials.token) + +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +MAX_FILE_SIZE = 1024*1024*100 # 100MB +MAX_OS_DOCS_PER_PUT = 500 +CHUNK_SIZE_FOR_DOC_SPLIT = 600 +CHUNK_OVERLAP_FOR_DOC_SPLIT = 20 + +class CSDCEmbeddings: + """CSDC embedding models. + + To authenticate, the AWS client uses the following methods to + automatically load credentials: + https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html + + If a specific credential profile should be used, you must pass + the name of the profile from the ~/.aws/credentials file that is to be used. + + Make sure the credentials / roles used have the required policies to + access the CSDC service. + """ + + """ + Example: + .. code-block:: python + from llm_utils import CSDCEmbeddings + + embeddings = CSDCEmbeddings(region = 'us-east-1', aosEndpointName = 'Amazon OpenSearch Service Domain Endpoint') + doc_reult = embeddings.embed_documents(bucketName=, prefix=) + logging.info(f"doc_reult is {doc_reult}, the type of doc_reult is {type(doc_reult)}") + """ + + client: Any #: :meta private: + """CSDC client.""" + region_name: Optional[str] = None + """The aws region e.g., `us-west-2`. Fallsback to AWS_DEFAULT_REGION env variable + or region specified in ~/.aws/config in case it is not provided here. + """ + + credentials_profile_name: Optional[str] = None + """The name of the profile in the ~/.aws/credentials or ~/.aws/config files, which + has either access keys or role information specified. + If not specified, the default credential profile or, if on an EC2 instance, + credentials from IMDS will be used. + See: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html + """ + + model_id: str = "csdc-default-model" + """Id of the model to call, e.g., csdc-default-model, this is + equivalent to the modelId property in the list-foundation-models api""" + + model_kwargs: Optional[Dict] = None + """Key word arguments to pass to the model.""" + + endpoint_url: Optional[str] = None + """Needed if you don't want to default to us-east-1 endpoint""" + + class Config: + """Configuration for this pydantic object.""" + + # extra = Extra.forbid + + # @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that AWS credentials to and python package exists in environment.""" + + if values["client"] is not None: + return values + + try: + import boto3 + + if values["credentials_profile_name"] is not None: + session = boto3.Session(profile_name=values["credentials_profile_name"]) + else: + # use default credentials + session = boto3.Session() + + client_params = {} + if values["region_name"]: + client_params["region_name"] = values["region_name"] + + if values["endpoint_url"]: + client_params["endpoint_url"] = values["endpoint_url"] + + values["client"] = session.client("sagemaker-runtime", **client_params) + + except ImportError: + raise ModuleNotFoundError( + "Could not import boto3 python package. " + "Please install it with `pip install boto3`." + ) + except Exception as e: + raise ValueError( + "Could not load credentials to authenticate with AWS client. " + "Please check that credentials in the specified " + "profile name are valid." + ) from e + + return values + + def _embedding_func_legacy(self, text: str) -> List[float]: + """Call out to CSDC embedding endpoint.""" + # replace newlines, which can negatively affect performance. + text = text.replace(os.linesep, " ") + _model_kwargs = self.model_kwargs or {} + content_type = "application/json" + + input_body = {"inputs": text, **_model_kwargs} + body = json.dumps(input_body) + + try: + response = self.client.invoke_endpoint( + EndpointName = self.endpoint_url, Body=body, ContentType=content_type + ) + response_body = json.loads(response['Body'].read().decode("utf-8")) + return response_body.get('sentence_embeddings') + except Exception as e: + raise ValueError(f"Error raised by inference endpoint: {e}") + + def embed_documents_legacy(self, texts: List[str]) -> List[List[float]]: + """Compute doc embeddings using a CSDC model. + + Args: + texts: The list of texts to embed + + Returns: + List of embeddings, one for each text. + """ + results = [] + for text in texts: + response = self._embedding_func_legacy(text) + results.append(response) + return results + + async def aembed_query(self, text: str) -> List[float]: + """Asynchronous compute query embeddings using a CSDC model. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + + return await asyncio.get_running_loop().run_in_executor( + None, partial(self.embed_query, text) + ) + + async def aembed_documents(self, texts: List[str]) -> List[List[float]]: + """Asynchronous compute doc embeddings using a CSDC model. + + Args: + texts: The list of texts to embed + + Returns: + List of embeddings, one for each text. + """ + + result = await asyncio.gather(*[self.aembed_query(text) for text in texts]) + + return list(result) + + def _construct_shard(self, bucketName: str, prefix: str, embeddingEndpointName: str) -> str: + """Inner helper function to construct a shard of documents. + + Args: + bucketName (str): + prefix (str): _description_ + embeddingEndpointName (str): _description_ + + Returns: + str: _description_ + """ + docs = [] + document_bucket = s3.Bucket(bucketName) + for obj in document_bucket.objects.filter(Prefix=prefix): + if obj.key.endswith("/"): + continue + else: + with tempfile.TemporaryDirectory(dir='/tmp') as temp_dir: + file_path = f"{temp_dir}/{obj.key}" + logging.info(f"bucketName={bucketName}, obj.key={obj.key}, file_path={file_path}") + os.makedirs(os.path.dirname(file_path), exist_ok=True) + s3.meta.client.download_file(bucketName, obj.key, file_path) + + loader = UnstructuredFileLoader(file_path) + docs.extend(loader.load()) + + # add a custom metadata field, timestamp and embeddings_model + for doc in docs: + doc.metadata['timestamp'] = time.time() + doc.metadata['embeddings_model'] = embeddingEndpointName + + text_splitter = RecursiveCharacterTextSplitter( + chunk_size = CHUNK_SIZE_FOR_DOC_SPLIT, + chunk_overlap = CHUNK_OVERLAP_FOR_DOC_SPLIT, + length_function = len, + ) + + chunks = text_splitter.create_documents([doc.page_content for doc in docs], metadatas=[doc.metadata for doc in docs]) + + db_shards = (len(chunks) // MAX_OS_DOCS_PER_PUT) + 1 + shards = np.array_split(chunks, db_shards) + return shards[0].tolist() + + def _embedding_func(self, shard) -> List[float]: + """Call out to CSDC embedding endpoint. + Args: + shard (_type_): must be a list of documents, sample format as follows: + [ + Document( + page_content='Data Transfer Hub (数据传输解决方案)\n\n轻松将数据移入和移出 AWS 中国区域\n\n概览\n\n此解决方案可为 Amazon Simple Storage Service (Amazon S3) 对象和 Amazon Elastic Container Registry (Amazon ECR) 映像提供安全、可扩展且可追踪的数据传输。使用数据传输解决方案,您可以执行以下任何任务:在 AWS S3 之间传输对象\n\n优势\n\n直观的用户界面 客户可在用户界面上为 Amazon S3 对象和 Amazon ECR 映像创建和管理数据传输任务。\n\n支持各类源 将数据从其他云服务商的对象存储服务(包括阿里云 OSS,腾讯 COS,七牛 Kodo 以及其他兼容 Amazon S3 的云存储服务)传输到 Amazon S3。在 Amazon ECR 之间传输容器镜像。将容器镜像从公共容器镜像仓库(例如 Docker Hub、Google gcr.io 和 Red Hat Quay.io)传输到 Amazon ECR。\n\n无服务器架构\n\n传输任务可按需使用并随用随付。有关更多信息,请参阅实施指南的“成本”部分。', + metadata={ + 'source': '/tmp/tmpmmod0k9m/csdc/dth.txt', + 'timestamp': 1693494146.1509278, + 'embeddings_model': 'embedding-endpoint' + }) + ] + Returns: + List[float]: embeddings for the documents. + """ + embeddings = create_sagemaker_embeddings_from_js_model(self.endpoint_url, self.region) + return embeddings + + def embed_documents(self, bucketName: str, prefix: str) -> List[List[float]]: + """Compute doc embeddings using a CSDC model. + Args: + bucketName (str): The name of the bucket to embed + prefix (str): The prefix of the bucket to embed + Returns: + List of embeddings, one for each text. + """ + shard = self._construct_shard(bucketName, prefix, self.endpoint_url) + embeddings = self._embedding_func(shard) + return embeddings.embed_documents([str(shard[0])]) + + def embed_query(self, text: str) -> List[float]: + """Compute query embeddings using a CSDC model. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + embeddings = self._embedding_func(text) + return embeddings.embed_documents([text]) diff --git a/source/sample/langchain_sample/csdc_llm.py b/source/sample/langchain_sample/csdc_llm.py new file mode 100644 index 00000000..1a223bb4 --- /dev/null +++ b/source/sample/langchain_sample/csdc_llm.py @@ -0,0 +1,239 @@ +import json +from abc import ABC +from typing import Any, Dict, List, Mapping, Optional + +from langchain.callbacks.manager import CallbackManagerForLLMRun +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens +from langchain.pydantic_v1 import BaseModel, Extra, root_validator + + +class LLMInputOutputAdapter: + """Adapter class to prepare the inputs from Langchain to a format + that LLM model expects. + + It also provides helper function to extract + the generated text from the model response.""" + + @classmethod + def prepare_input( + cls, provider: str, prompt: str, streaming: bool, model_kwargs: Dict[str, Any] + ) -> Dict[str, Any]: + input_body = {**model_kwargs} + if provider == "CSDC": + input_body = dict() + input_body["inputs"] = prompt + input_body["history"] = [] + input_body["parameters"] = {**model_kwargs} + input_body["stream"] = streaming + else: + input_body["inputText"] = prompt + + return input_body + + @classmethod + def prepare_output(cls, provider: str, response: Any) -> str: + if provider == "CSDC": + response_body = json.loads(response['Body'].read().decode("utf-8")) + return response_body.get('outputs') + else: + response_body = json.loads(response.get("body").read()) + +class CSDCLLMBase(BaseModel, ABC): + client: Any #: :meta private: + + region_name: Optional[str] = None + """The aws region e.g., `us-west-2`. Fallsback to AWS_DEFAULT_REGION env variable + or region specified in ~/.aws/config in case it is not provided here. + """ + + credentials_profile_name: Optional[str] = None + """The name of the profile in the ~/.aws/credentials or ~/.aws/config files, which + has either access keys or role information specified. + If not specified, the default credential profile or, if on an EC2 instance, + credentials from IMDS will be used. + See: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html + """ + + model_id: str + """Id of the model to call, e.g., amazon.titan-tg1-large, this is + equivalent to the modelId property in the list-foundation-models api""" + + model_endpoint: str + """SageMaker Endpoint of the model to call, e.g. instruct-endpoint""" + + model_provider: Optional[str] = "CSDC" + """This model is provided by CSDC""" + + model_kwargs: Optional[Dict] = None + """Key word arguments to pass to the model.""" + + endpoint_url: Optional[str] = None + """Needed if you don't want to default to us-east-1 endpoint""" + + streaming: bool = False + """Whether to stream the results.""" + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that AWS credentials to and python package exists in environment.""" + + # Skip creating new client if passed in constructor + if values["client"] is not None: + return values + + try: + import boto3 + + if values["credentials_profile_name"] is not None: + session = boto3.Session(profile_name=values["credentials_profile_name"]) + else: + # use default credentials + session = boto3.Session() + + client_params = {} + if values["region_name"]: + client_params["region_name"] = values["region_name"] + if values["endpoint_url"]: + client_params["endpoint_url"] = values["endpoint_url"] + + values["client"] = session.client("sagemaker-runtime", **client_params) + + except ImportError: + raise ModuleNotFoundError( + "Could not import boto3 python package. " + "Please install it with `pip install boto3`." + ) + except Exception as e: + raise ValueError( + "Could not load credentials to authenticate with AWS client. " + "Please check that credentials in the specified " + "profile name are valid." + ) from e + + return values + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + _model_kwargs = self.model_kwargs or {} + return { + **{"model_kwargs": _model_kwargs}, + } + + def _get_provider(self) -> str: + return self.model_provider if self.model_provider else self.model_id.split(".")[0] + + def _get_streaming(self) -> bool: + return self.streaming + + def _prepare_input_and_invoke( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> str: + _model_kwargs = self.model_kwargs or {} + + provider = self._get_provider() + streaming = self._get_streaming() + params = {**_model_kwargs, **kwargs} + input_body = LLMInputOutputAdapter.prepare_input(provider, prompt, streaming, params) + body = json.dumps(input_body).encode('utf-8') + accept = "application/json" + contentType = "application/json" + endpoint_name = self.model_endpoint + + try: + response = self.client.invoke_endpoint( + EndpointName = endpoint_name, Body=body, ContentType=contentType + ) + text = LLMInputOutputAdapter.prepare_output(provider, response) + + except Exception as e: + raise ValueError(f"Error raised by invoking CSDC LLM: {e}") + + if stop is not None: + text = enforce_stop_tokens(text, stop) + + return text + + def _prepare_input_and_invoke_stream( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> str: + _model_kwargs = self.model_kwargs or {} + + provider = self._get_provider() + streaming = self._get_streaming() + params = {**_model_kwargs, **kwargs} + input_body = LLMInputOutputAdapter.prepare_input(provider, prompt, streaming, params) + body = json.dumps(input_body).encode('utf-8') + accept = "application/json" + contentType = "application/json" + endpoint_name = self.model_endpoint + + try: + resp = self.client.invoke_endpoint_with_response_stream( + EndpointName=endpoint_name, + Body=body, + ContentType=contentType + ) + return resp + except Exception as e: + raise ValueError(f"Error raised by streaming inference endpoint: {e}") + + +class CSDCLLM(LLM, CSDCLLMBase): + """CSDC LLM base model. + + To authenticate, the AWS client uses the following methods to + automatically load credentials: + https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html + + If a specific credential profile should be used, you must pass + the name of the profile from the ~/.aws/credentials file that is to be used. + + Make sure the credentials / roles used have the required policies to + access the SageMaker service. + """ + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "aws_csdc_llm" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def _call( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> str: + """Call out to CSDC LLM model in SageMaker Endpoint. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The string generated by the model. + + Example: + .. code-block:: python + + response = se("Tell me a joke.") + """ + + text = self._prepare_input_and_invoke(prompt=prompt, stop=stop, **kwargs) + + return text \ No newline at end of file diff --git a/source/sample/langchain_sample/llm_wrapper.py b/source/sample/langchain_sample/llm_wrapper.py new file mode 100644 index 00000000..62c370ce --- /dev/null +++ b/source/sample/langchain_sample/llm_wrapper.py @@ -0,0 +1,262 @@ +import io +import json + +from typing import Any, AsyncIterator, Dict, Iterator, List, Optional + +from langchain.callbacks.manager import ( + AsyncCallbackManagerForLLMRun, + CallbackManagerForLLMRun, +) +from langchain.schema.messages import ( + AIMessage, + AIMessageChunk, + BaseMessage, + ChatMessage, + HumanMessage, + SystemMessage, +) + +from langchain.chat_models.base import BaseChatModel +from csdc_llm import CSDCLLMBase +from langchain.pydantic_v1 import Extra +from langchain.schema.messages import AIMessage, BaseMessage +from langchain.schema.output import ChatGeneration, ChatGenerationChunk, ChatResult + +from langchain.llms.utils import enforce_stop_tokens + + +class ChatPromptAdapter: + """Adapter class to prepare the inputs from Langchain to prompt format + that Chat model expects. + """ + def _convert_one_message_to_text( + self, + message: BaseMessage, + human_prompt: str, + ai_prompt: str, + system_prompt: str, + ) -> str: + if isinstance(message, ChatMessage): + message_text = f"\n\n{message.role.capitalize()}: {message.content}" + elif isinstance(message, HumanMessage): + message_text = f"{human_prompt} {message.content}" + elif isinstance(message, AIMessage): + message_text = f"{ai_prompt} {message.content}" + elif isinstance(message, SystemMessage): + context = ''.join(message.content.split('\n----------------\n')[1:]) + message_text = system_prompt.format(context = context) + else: + raise ValueError(f"Got unknown type {message}") + return message_text + + def convert_messages_to_prompt_CSDC( + self, + messages: List[BaseMessage], + *, + human_prompt: str = "<|User|>:", + ai_prompt: str = "<|Bot|>:", + system_prompt: str = "<|System|>:", + ) -> str: + """Format a list of messages into a full prompt for the CSDC Chat model + Args: + messages (List[BaseMessage]): List of BaseMessage to combine. + human_prompt (str, optional): Human prompt tag. Defaults to "<|User|>:". + ai_prompt (str, optional): AI prompt tag. Defaults to "<|Bot|>:". + system_prompt (str, optional): System prompt tag. Defaults to "<|System|>:". + Returns: + str: Combined string with necessary human_prompt and ai_prompt tags. + """ + + messages = messages.copy() # don't mutate the original list + system_prompt = '以下context xml tag内的文本内容为背景知识:\n\n{context}\n\n请根据背景知识, 回答这个问题:' + + text = "".join( + self._convert_one_message_to_text(message, human_prompt = '', ai_prompt = '', system_prompt = system_prompt) + for message in messages + ) + + # trim off the trailing ' ' that might come from the "Assistant: " + return text.rstrip() + + @classmethod + def convert_messages_to_prompt( + cls, provider: str, messages: List[BaseMessage], **kwargs: Any + ) -> str: + if provider == "CSDC": + prompt = cls().convert_messages_to_prompt_CSDC(messages=messages, **kwargs) + else: + raise NotImplementedError( + f"Provider {provider} model does not support chat." + ) + return prompt + +class LineIterator: + """ + A helper class for parsing the byte stream input. + + The output of the model will be in the following format: + + b'{"outputs": [" a"]}\n' + b'{"outputs": [" challenging"]}\n' + b'{"outputs": [" problem"]}\n' + ... + + While usually each PayloadPart event from the event stream will + contain a byte array with a full json, this is not guaranteed + and some of the json objects may be split acrossPayloadPart events. + + For example: + + {'PayloadPart': {'Bytes': b'{"outputs": '}} + {'PayloadPart': {'Bytes': b'[" problem"]}\n'}} + + + This class accounts for this by concatenating bytes written via the 'write' function + and then exposing a method which will return lines (ending with a '\n' character) + within the buffer via the 'scan_lines' function. + It maintains the position of the last read position to ensure + that previous bytes are not exposed again. + + For more details see: + https://aws.amazon.com/blogs/machine-learning/elevating-the-generative-ai-experience-introducing-streaming-support-in-amazon-sagemaker-hosting/ + """ + + def __init__(self, stream: Any) -> None: + self.byte_iterator = iter(stream) + self.buffer = io.BytesIO() + self.read_pos = 0 + + def __iter__(self) -> "LineIterator": + return self + + def __next__(self) -> Any: + while True: + self.buffer.seek(self.read_pos) + line = self.buffer.readline() + if line and line[-1] == ord("\n"): + self.read_pos += len(line) + return line[:-1] + try: + chunk = next(self.byte_iterator) + except StopIteration: + if self.read_pos < self.buffer.getbuffer().nbytes: + continue + raise + if "PayloadPart" not in chunk: + # Unknown Event Type + continue + self.buffer.seek(0, io.SEEK_END) + self.buffer.write(chunk["PayloadPart"]["Bytes"]) + + +class ChatCSDC(BaseChatModel, CSDCLLMBase): + @property + def _llm_type(self) -> str: + """Return type of chat model.""" + return "aws_csdc_chat" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def _invoke( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> ChatResult: + provider = self._get_provider() + + prompt = ChatPromptAdapter.convert_messages_to_prompt( + provider=provider, messages=messages + ) + + params: Dict[str, Any] = {**kwargs} + if stop: + params["stop_sequences"] = stop + + completion = self._prepare_input_and_invoke( + prompt=prompt, stop=stop, run_manager=run_manager, **params + ) + + message = AIMessage(content=completion) + return ChatResult(generations=[ChatGeneration(message=message)]) + + def _stream( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> Iterator[ChatGenerationChunk]: + provider = self._get_provider() + + prompt = ChatPromptAdapter.convert_messages_to_prompt( + provider=provider, messages=messages + ) + + params: Dict[str, Any] = {**kwargs} + if stop: + params["stop_sequences"] = stop + + resp = self._prepare_input_and_invoke_stream( + prompt=prompt, stop=stop, run_manager=run_manager, **params + ) + + iterator = LineIterator(resp["Body"]) + current_completion: str = "" + for line in iterator: + resp = json.loads(line) + resp_output = resp.get("outputs")["outputs"] + if stop is not None: + # Uses same approach as below + resp_output = enforce_stop_tokens(resp_output, stop) + message_chunk = AIMessageChunk(content=resp_output) + yield ChatGenerationChunk(message=message_chunk, generations = "") + # current_completion += resp_output + run_manager.on_llm_new_token(resp_output) + + + def _astream( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> AsyncIterator[ChatGenerationChunk]: + raise NotImplementedError( + """CSDC Chat doesn't support async requests at the moment.""" + ) + + def _generate( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ): + streaming = self._get_streaming() + print(f"+++++++++++ streaming is: {streaming}") + completion = "" + if streaming: + for chunk in self._stream(messages, stop, run_manager, **kwargs): + completion += chunk.text + message = AIMessage(content=completion) + return ChatResult(generations=[ChatGeneration(message=message)]) + else: + return self._invoke( + messages=messages, stop=stop, run_manager=run_manager, **kwargs + ) + + async def _agenerate( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> ChatResult: + raise NotImplementedError( + """CSDC Chat doesn't support async stream requests at the moment.""" + ) \ No newline at end of file diff --git a/source/sample/requirements.txt b/source/sample/requirements.txt new file mode 100644 index 00000000..3eb2077d --- /dev/null +++ b/source/sample/requirements.txt @@ -0,0 +1,7 @@ +langchain==0.0.312 +opensearch-py==2.2.0 +boto3==1.26.114 +botocore==1.29.140 +requests_aws4auth==1.2.2 +openai==0.27.6 +tiktoken==0.3.3 diff --git a/source/sample/sagemaker_utils.py b/source/sample/sagemaker_utils.py new file mode 100644 index 00000000..c9dbefea --- /dev/null +++ b/source/sample/sagemaker_utils.py @@ -0,0 +1,159 @@ +""" +Helper functions for using Samgemaker Endpoint via langchain +""" +import time +import json +import logging +import json +import re +from typing import List + +from langchain.llms.sagemaker_endpoint import LLMContentHandler, SagemakerEndpoint +from langchain.embeddings import SagemakerEndpointEmbeddings +from langchain.embeddings.sagemaker_endpoint import EmbeddingsContentHandler +from typing import Dict, List + +import logging +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +# extend the SagemakerEndpointEmbeddings class from langchain to provide a custom embedding function +class SagemakerEndpointEmbeddingsJumpStart(SagemakerEndpointEmbeddings): + def embed_documents( + self, texts: List[str], chunk_size: int = 5 + ) -> List[List[float]]: + """Compute doc embeddings using a SageMaker Inference Endpoint. + + Args: + texts: The list of texts to embed. + chunk_size: The chunk size defines how many input texts will + be grouped together as request. If None, will use the + chunk size specified by the class. + + Returns: + List of embeddings, one for each text. + """ + results = [] + _chunk_size = len(texts) if chunk_size > len(texts) else chunk_size + st = time.time() + for i in range(0, len(texts), _chunk_size): + response = self._embedding_func(texts[i:i + _chunk_size]) + results.extend(response) + time_taken = time.time() - st + logger.info(f"get results for {len(texts)} in {time_taken}s, length of embeddings list is {len(results)}") + return results + +# class for serializing/deserializing requests/responses to/from the embeddings model +class ContentHandler(EmbeddingsContentHandler): + content_type = "application/json" + accepts = "application/json" + + def transform_input(self, prompt: str, model_kwargs={}) -> bytes: + input_str = json.dumps({"inputs": prompt, **model_kwargs}) + return input_str.encode('utf-8') + + def transform_output(self, output: bytes) -> str: + response_json = json.loads(output.read().decode("utf-8")) + embeddings = response_json["sentence_embeddings"] + if len(embeddings) == 1: + return [embeddings[0]] + return embeddings + +def create_sagemaker_embeddings_from_js_model(embeddings_model_endpoint_name: str, aws_region: str) -> SagemakerEndpointEmbeddingsJumpStart: + # all set to create the objects for the ContentHandler and + # SagemakerEndpointEmbeddingsJumpStart classes + content_handler = ContentHandler() + logger.info(f'content_handler: {content_handler}, embeddings_model_endpoint_name: {embeddings_model_endpoint_name}, aws_region: {aws_region}') + # note the name of the LLM Sagemaker endpoint, this is the model that we would + # be using for generating the embeddings + embeddings = SagemakerEndpointEmbeddingsJumpStart( + endpoint_name = embeddings_model_endpoint_name, + region_name = aws_region, + content_handler = content_handler + ) + return embeddings + +def enforce_stop_tokens(text, stop) -> str: + """Cut off the text as soon as any stop words occur.""" + if stop is None: + return text + + return re.split("|".join(stop), text)[0] + +class vectorContentHandler(EmbeddingsContentHandler): + content_type = "application/json" + accepts = "application/json" + + def transform_input(self, inputs: List[str], model_kwargs: Dict) -> bytes: + input_str = json.dumps({"inputs": inputs, **model_kwargs}) + return input_str.encode("utf-8") + + def transform_output(self, output: bytes) -> List[List[float]]: + response_json = json.loads(output.read().decode("utf-8")) + return response_json["sentence_embeddings"] + +class crossContentHandler(LLMContentHandler): + content_type = "application/json" + accepts = "application/json" + + def transform_input(self, prompt: str, model_kwargs: Dict) -> bytes: + input_str = json.dumps({"inputs": prompt, "docs":model_kwargs["context"]}) + return input_str.encode('utf-8') + + def transform_output(self, output: bytes) -> str: + response_json = json.loads(output.read().decode("utf-8")) + return response_json['scores'][0][1] + +class answerContentHandler(LLMContentHandler): + content_type = "application/json" + accepts = "application/json" + + def transform_input(self, question: str, model_kwargs: Dict) -> bytes: + + template_1 = '以下context xml tag内的文本内容为背景知识:\n\n{context}\n\n请根据背景知识, 回答这个问题:{question}' + context = model_kwargs["context"] + + if len(context) == 0: + prompt = question + else: + prompt = template_1.format(context = model_kwargs["context"], question = question) + + input_str = json.dumps({"inputs": prompt, + "history": model_kwargs["history"], + "parameters": model_kwargs["parameters"]}) + return input_str.encode('utf-8') + + def transform_output(self, output: bytes) -> str: + response_json = json.loads(output.read().decode("utf-8")) + return response_json['outputs'] + +def SagemakerEndpointVectorOrCross(prompt: str, endpoint_name: str, region_name: str, model_type: str, stop: List[str], **kwargs) -> SagemakerEndpoint: + """ + original class invocation: + response = self.client.invoke_endpoint( + EndpointName=self.endpoint_name, + Body=body, + ContentType=content_type, + Accept=accepts, + **_endpoint_kwargs, + ) + """ + if model_type == "vector": + content_handler = vectorContentHandler() + embeddings = SagemakerEndpointEmbeddings( + endpoint_name=endpoint_name, + region_name=region_name, + content_handler=content_handler, + ) + query_result = embeddings.embed_query(prompt) + return query_result + elif model_type == "cross": + content_handler = crossContentHandler() + elif model_type == "answer": + content_handler = answerContentHandler() + genericModel = SagemakerEndpoint( + endpoint_name = endpoint_name, + region_name = region_name, + content_handler = content_handler + ) + return genericModel(prompt=prompt, stop=stop, **kwargs) diff --git a/source/sample/sample_llm.py b/source/sample/sample_llm.py new file mode 100644 index 00000000..0157bf14 --- /dev/null +++ b/source/sample/sample_llm.py @@ -0,0 +1,19 @@ +import logging +from embedding_wrapper import CSDCEmbeddings + +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +if __name__ == "__main__": + embeddings = CSDCEmbeddings( + aosEndpointName = 'vpc-xx.us-east-1.es.amazonaws.com', + region = 'us-east-1' + ) + doc_reult = embeddings.embed_documents( + bucketName='llm-bot-documents-xx-us-east-1', + prefix='csdc' + ) + query_result = embeddings.embed_query( + text="请给我介绍一下什么是Data Transfer Hub方案?" + ) + logging.info(f"doc_reult is {doc_reult}, the type of doc_reult is {type(doc_reult)}, query_result is {query_result}, the type of query_result is {type(query_result)}") diff --git a/source/sample/sample_sm.py b/source/sample/sample_sm.py new file mode 100644 index 00000000..2eeb424b --- /dev/null +++ b/source/sample/sample_sm.py @@ -0,0 +1,136 @@ +import os +import time +import logging +import boto3 +import tempfile +import numpy as np + +from langchain.vectorstores import OpenSearchVectorSearch +from langchain.document_loaders.unstructured import UnstructuredFileLoader +from langchain.text_splitter import RecursiveCharacterTextSplitter + +from opensearchpy import RequestsHttpConnection +from sagemaker_utils import create_sagemaker_embeddings_from_js_model, SagemakerEndpointVectorOrCross +from requests_aws4auth import AWS4Auth + +s3 = boto3.resource('s3') +aws_region = boto3.Session().region_name +credentials = boto3.Session().get_credentials() +awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, aws_region, 'es', session_token=credentials.token) + +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +MAX_FILE_SIZE = 1024*1024*100 # 100MB +MAX_OS_DOCS_PER_PUT = 500 +CHUNK_SIZE_FOR_DOC_SPLIT = 600 +CHUNK_OVERLAP_FOR_DOC_SPLIT = 20 + +def process_shard(shard, embeddings_model_endpoint_name, aws_region, os_index_name, os_domain_ep, os_http_auth) -> int: + embeddings = create_sagemaker_embeddings_from_js_model(embeddings_model_endpoint_name, aws_region) + docsearch = OpenSearchVectorSearch( + index_name=os_index_name, + embedding_function=embeddings, + opensearch_url="https://{}".format(os_domain_ep), + http_auth = os_http_auth, + use_ssl = True, + verify_certs = True, + connection_class = RequestsHttpConnection + ) + docsearch.add_documents(documents=shard) + return 0 + +def construct_shard(bucketName: str, prefix: str, embeddingEndpointName: str) -> str: + docs = [] + document_bucket = s3.Bucket(bucketName) + for obj in document_bucket.objects.filter(Prefix=prefix): + if obj.key.endswith("/"): + continue + else: + with tempfile.TemporaryDirectory(dir='/tmp') as temp_dir: + file_path = f"{temp_dir}/{obj.key}" + logging.info(f"bucketName={bucketName}, obj.key={obj.key}, file_path={file_path}") + os.makedirs(os.path.dirname(file_path), exist_ok=True) + s3.meta.client.download_file(bucketName, obj.key, file_path) + + loader = UnstructuredFileLoader(file_path) + docs.extend(loader.load()) + + # add a custom metadata field, timestamp and embeddings_model + for doc in docs: + doc.metadata['timestamp'] = time.time() + doc.metadata['embeddings_model'] = embeddingEndpointName + + text_splitter = RecursiveCharacterTextSplitter( + chunk_size = CHUNK_SIZE_FOR_DOC_SPLIT, + chunk_overlap = CHUNK_OVERLAP_FOR_DOC_SPLIT, + length_function = len, + ) + + chunks = text_splitter.create_documents([doc.page_content for doc in docs], metadatas=[doc.metadata for doc in docs]) + + db_shards = (len(chunks) // MAX_OS_DOCS_PER_PUT) + 1 + shards = np.array_split(chunks, db_shards) + return shards[0].tolist() + +# Main entry point +if __name__ == "__main__": + """ + Embedding Sample, shard format: + [ + Document( + page_content='Data Transfer Hub (数据传输解决方案)\n\n轻松将数据移入和移出 AWS 中国区域\n\n概览\n\n此解决方案可为 Amazon Simple Storage Service (Amazon S3) 对象和 Amazon Elastic Container Registry (Amazon ECR) 映像提供安全、可扩展且可追踪的数据传输。使用数据传输解决方案,您可以执行以下任何任务:在 AWS S3 之间传输对象\n\n优势\n\n直观的用户界面 客户可在用户界面上为 Amazon S3 对象和 Amazon ECR 映像创建和管理数据传输任务。\n\n支持各类源 将数据从其他云服务商的对象存储服务(包括阿里云 OSS,腾讯 COS,七牛 Kodo 以及其他兼容 Amazon S3 的云存储服务)传输到 Amazon S3。在 Amazon ECR 之间传输容器镜像。将容器镜像从公共容器镜像仓库(例如 Docker Hub、Google gcr.io 和 Red Hat Quay.io)传输到 Amazon ECR。\n\n无服务器架构\n\n传输任务可按需使用并随用随付。有关更多信息,请参阅实施指南的“成本”部分。', + metadata={ + 'source': '/tmp/tmpmmod0k9m/csdc/dth.txt', + 'timestamp': 1693494146.1509278, + 'embeddings_model': 'embedding-endpoint' + }) + ] + """ + shard = construct_shard('', '', 'embedding-endpoint') + + process_shard(shard, 'embedding-endpoint', 'us-east-1', 'chatbot-index', '', awsauth) + + """ + LLM Sample + """ + + query_knowledge = "给我介绍一下什么是data transfer hub方案?" + query_embedding = SagemakerEndpointVectorOrCross(prompt="为这个句子生成表示以用于检索相关文章:" + query_knowledge, endpoint_name="embedding-endpoint", region_name='us-east-1', model_type="vector", stop=None) + logging.info(f"query_embedding is {query_embedding}") + + # For demo usage, should be retrieved from AOS + retrieveContext = """ + Data Transfer Hub (数据传输解决方案) + 轻松将数据移入和移出 AWS 中国区域 + 概览 + 此解决方案可为 Amazon Simple Storage Service (Amazon S3) 对象和 Amazon Elastic Container Registry (Amazon ECR) 映像提供安全、可扩展且可追踪的数据传输。使用数据传输解决方案,您可以执行以下任何任务:在 AWS S3 之间传输对象 + 优势 + 直观的用户界面 客户可在用户界面上为 Amazon S3 对象和 Amazon ECR 映像创建和管理数据传输任务。 + 支持各类源 将数据从其他云服务商的对象存储服务(包括阿里云 OSS,腾讯 COS,七牛 Kodo 以及其他兼容 Amazon S3 的云存储服务)传输到 Amazon S3。在 Amazon ECR 之间传输容器镜像。将容器镜像从公共容器镜像仓库(例如 Docker Hub、Google gcr.io 和 Red Hat Quay.io)传输到 Amazon ECR。 + 无服务器架构 + 传输任务可按需使用并随用随付。有关更多信息,请参阅实施指南的“成本”部分。 + """ + # Optional, predict recall knowledge correlation + score = float(SagemakerEndpointVectorOrCross(prompt=query_knowledge, endpoint_name="cross-endpoint", region_name="us-east-1", model_type="cross", stop=None, context=retrieveContext)) + logging.info(f"score is {score}") + + # For demo usage, refer main.py in executor folder for recall process + recallContext = """ + Data Transfer Hub (数据传输解决方案) + 轻松将数据移入和移出 AWS 中国区域 + 概览 + 此解决方案可为 Amazon Simple Storage Service (Amazon S3) 对象和 Amazon Elastic Container Registry (Amazon ECR) 映像提供安全、可扩展且可追踪的数据传输。使用数据传输解决方案,您可以执行以下任何任务:在 AWS S3 之间传输对象 + 优势 + 直观的用户界面 客户可在用户界面上为 Amazon S3 对象和 Amazon ECR 映像创建和管理数据传输任务。 + 支持各类源 将数据从其他云服务商的对象存储服务(包括阿里云 OSS,腾讯 COS,七牛 Kodo 以及其他兼容 Amazon S3 的云存储服务)传输到 Amazon S3。在 Amazon ECR 之间传输容器镜像。将容器镜像从公共容器镜像仓库(例如 Docker Hub、Google gcr.io 和 Red Hat Quay.io)传输到 Amazon ECR。 + 无服务器架构 + 传输任务可按需使用并随用随付。有关更多信息,请参阅实施指南的“成本”部分。 + """ + answer = SagemakerEndpointVectorOrCross(prompt="请给我介绍一下什么是Data Transfer Hub方案?", endpoint_name="instruct-endpoint", region_name="us-east-1", model_type="answer", stop=None, history=[], parameters={'temperature': 0.8}, context=recallContext) + + logger.info(f"answer is {answer}") + + + + diff --git a/script/README.md b/source/sample/script/README.md similarity index 100% rename from script/README.md rename to source/sample/script/README.md diff --git a/script/ec2config.sh b/source/sample/script/ec2config.sh similarity index 100% rename from script/ec2config.sh rename to source/sample/script/ec2config.sh diff --git a/script/inference.py b/source/sample/script/inference.py similarity index 100% rename from script/inference.py rename to source/sample/script/inference.py diff --git a/script/nginx.sh b/source/sample/script/nginx.sh similarity index 100% rename from script/nginx.sh rename to source/sample/script/nginx.sh diff --git a/script/sagemaker/buffer-cross-001-deploy.ipynb b/source/sample/script/sagemaker/buffer-cross-001-deploy.ipynb similarity index 100% rename from script/sagemaker/buffer-cross-001-deploy.ipynb rename to source/sample/script/sagemaker/buffer-cross-001-deploy.ipynb diff --git a/script/sagemaker/buffer-embedding-002-deploy.ipynb b/source/sample/script/sagemaker/buffer-embedding-002-deploy.ipynb similarity index 100% rename from script/sagemaker/buffer-embedding-002-deploy.ipynb rename to source/sample/script/sagemaker/buffer-embedding-002-deploy.ipynb diff --git a/script/sagemaker/buffer-instruct-internlm-001-deploy.ipynb b/source/sample/script/sagemaker/buffer-instruct-internlm-001-deploy.ipynb similarity index 100% rename from script/sagemaker/buffer-instruct-internlm-001-deploy.ipynb rename to source/sample/script/sagemaker/buffer-instruct-internlm-001-deploy.ipynb From 68b2fed421cf9ad06db29905f2ede26a3a42b092 Mon Sep 17 00:00:00 2001 From: Ning Date: Mon, 13 Nov 2023 19:03:42 +0800 Subject: [PATCH 18/37] chore: ignore model lock --- .gitignore | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index f4d40475..97bc48be 100644 --- a/.gitignore +++ b/.gitignore @@ -58,7 +58,10 @@ package-lock.json **/llm_bot_dep/__pycache__ **/llm_bot_dep/loaders/__pycache__ **/dep/build -**/models/cross/model/models--csdc-atl--buffer-cross-001 -**/models/embedding/model/models--BAAI--bge-large-zh-v1.5 -**/models/embedding/model/models--csdc-atl--buffer-embedding-002 -**/models/instruct/model/models--csdc-atl--buffer-instruct-InternLM-001 +**/model/cross/model/.locks +**/model/embedding/model/.locks +**/model/instruct/model/.locks +**/model/cross/model/models--csdc-atl--buffer-cross-001 +**/model/embedding/model/models--BAAI--bge-large-zh-v1.5 +**/model/embedding/model/models--csdc-atl--buffer-embedding-002 +**/model/instruct/model/models--csdc-atl--buffer-instruct-InternLM-001 From d116df1c2e151b0c5f694643d2e303f4d7b14ed8 Mon Sep 17 00:00:00 2001 From: Ning Date: Mon, 13 Nov 2023 21:12:58 +0800 Subject: [PATCH 19/37] chore: restructure --- README.md | 11 +- source/infrastructure/bin/main.ts | 107 +++++++ cdk.json => source/infrastructure/cdk.json | 2 +- source/infrastructure/lib/api/api-stack.ts | 207 ++++++++++++++ source/infrastructure/lib/ddb-stack.ts | 83 ++++++ source/infrastructure/lib/etl/etl-stack.ts | 267 +++++++++++++++++ .../infrastructure/lib/model/assets-stack.ts | 114 ++++++++ source/infrastructure/lib/model/llm-stack.ts | 172 +++++++++++ source/infrastructure/lib/shared/vpc-stack.ts | 43 +++ source/infrastructure/lib/tool/ec2-stack.ts | 73 +++++ .../lib/vector-store/os-stack.ts | 47 +++ .../infrastructure/package.json | 0 .../infrastructure/tsconfig.dev.json | 0 .../infrastructure/tsconfig.json | 0 source/lambda/custom/index.js | 31 ++ source/lambda/ddb/rating.py | 63 ++++ source/lambda/embedding/Dockerfile | 9 + source/lambda/embedding/main.py | 228 +++++++++++++++ source/lambda/embedding/requirements.txt | 9 + source/lambda/embedding/utils/aos_utils.py | 146 ++++++++++ source/lambda/embedding/utils/sm_utils.py | 73 +++++ source/lambda/etl/Dockerfile | 8 + source/lambda/etl/main.py | 43 +++ source/lambda/etl/requirements.txt | 9 + source/lambda/executor/Dockerfile | 9 + source/lambda/executor/main.py | 194 +++++++++++++ source/lambda/executor/requirements.txt | 7 + source/lambda/executor/utils/aos_utils.py | 158 ++++++++++ source/lambda/executor/utils/ddb_utils.py | 124 ++++++++ source/lambda/executor/utils/llmbot_utils.py | 132 +++++++++ source/lambda/executor/utils/sm_utils.py | 202 +++++++++++++ source/lambda/job/dep/README.md | 19 ++ .../dist/llm_bot_dep-0.1.0-py3-none-any.whl | Bin 0 -> 24501 bytes .../dist/nougat_ocr-0.1.17-py3-none-any.whl | Bin 0 -> 82497 bytes source/lambda/job/dep/llm_bot_dep/__init__.py | 0 .../lambda/job/dep/llm_bot_dep/aos_utils.py | 146 ++++++++++ .../job/dep/llm_bot_dep/enhance_utils.py | 243 ++++++++++++++++ .../job/dep/llm_bot_dep/loaders/__init__.py | 0 .../job/dep/llm_bot_dep/loaders/auto.py | 29 ++ .../lambda/job/dep/llm_bot_dep/loaders/csv.py | 172 +++++++++++ .../job/dep/llm_bot_dep/loaders/docx.py | 70 +++++ .../job/dep/llm_bot_dep/loaders/html.py | 71 +++++ .../job/dep/llm_bot_dep/loaders/image.py | 5 + .../job/dep/llm_bot_dep/loaders/markdown.py | 49 ++++ .../lambda/job/dep/llm_bot_dep/loaders/pdf.py | 171 +++++++++++ .../job/dep/llm_bot_dep/loaders/text.py | 54 ++++ source/lambda/job/dep/llm_bot_dep/sm_utils.py | 71 +++++ .../job/dep/llm_bot_dep/splitter_utils.py | 187 ++++++++++++ source/lambda/job/dep/setup.py | 20 ++ source/lambda/job/glue-job-script.py | 270 ++++++++++++++++++ 50 files changed, 4142 insertions(+), 6 deletions(-) create mode 100644 source/infrastructure/bin/main.ts rename cdk.json => source/infrastructure/cdk.json (86%) create mode 100644 source/infrastructure/lib/api/api-stack.ts create mode 100644 source/infrastructure/lib/ddb-stack.ts create mode 100644 source/infrastructure/lib/etl/etl-stack.ts create mode 100644 source/infrastructure/lib/model/assets-stack.ts create mode 100644 source/infrastructure/lib/model/llm-stack.ts create mode 100644 source/infrastructure/lib/shared/vpc-stack.ts create mode 100644 source/infrastructure/lib/tool/ec2-stack.ts create mode 100644 source/infrastructure/lib/vector-store/os-stack.ts rename package.json => source/infrastructure/package.json (100%) rename tsconfig.dev.json => source/infrastructure/tsconfig.dev.json (100%) rename tsconfig.json => source/infrastructure/tsconfig.json (100%) create mode 100644 source/lambda/custom/index.js create mode 100644 source/lambda/ddb/rating.py create mode 100644 source/lambda/embedding/Dockerfile create mode 100644 source/lambda/embedding/main.py create mode 100644 source/lambda/embedding/requirements.txt create mode 100644 source/lambda/embedding/utils/aos_utils.py create mode 100644 source/lambda/embedding/utils/sm_utils.py create mode 100644 source/lambda/etl/Dockerfile create mode 100644 source/lambda/etl/main.py create mode 100644 source/lambda/etl/requirements.txt create mode 100644 source/lambda/executor/Dockerfile create mode 100644 source/lambda/executor/main.py create mode 100644 source/lambda/executor/requirements.txt create mode 100644 source/lambda/executor/utils/aos_utils.py create mode 100644 source/lambda/executor/utils/ddb_utils.py create mode 100644 source/lambda/executor/utils/llmbot_utils.py create mode 100644 source/lambda/executor/utils/sm_utils.py create mode 100644 source/lambda/job/dep/README.md create mode 100644 source/lambda/job/dep/dist/llm_bot_dep-0.1.0-py3-none-any.whl create mode 100644 source/lambda/job/dep/dist/nougat_ocr-0.1.17-py3-none-any.whl create mode 100644 source/lambda/job/dep/llm_bot_dep/__init__.py create mode 100644 source/lambda/job/dep/llm_bot_dep/aos_utils.py create mode 100644 source/lambda/job/dep/llm_bot_dep/enhance_utils.py create mode 100644 source/lambda/job/dep/llm_bot_dep/loaders/__init__.py create mode 100644 source/lambda/job/dep/llm_bot_dep/loaders/auto.py create mode 100644 source/lambda/job/dep/llm_bot_dep/loaders/csv.py create mode 100644 source/lambda/job/dep/llm_bot_dep/loaders/docx.py create mode 100644 source/lambda/job/dep/llm_bot_dep/loaders/html.py create mode 100644 source/lambda/job/dep/llm_bot_dep/loaders/image.py create mode 100644 source/lambda/job/dep/llm_bot_dep/loaders/markdown.py create mode 100644 source/lambda/job/dep/llm_bot_dep/loaders/pdf.py create mode 100644 source/lambda/job/dep/llm_bot_dep/loaders/text.py create mode 100644 source/lambda/job/dep/llm_bot_dep/sm_utils.py create mode 100644 source/lambda/job/dep/llm_bot_dep/splitter_utils.py create mode 100644 source/lambda/job/dep/setup.py create mode 100644 source/lambda/job/glue-job-script.py diff --git a/README.md b/README.md index 7fba9062..e5232aa2 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,8 @@ 1. Prepare model assets by executing script per model folder ```bash -% cd src/models//model -% ./model.sh +cd source/model//model +./model.sh Make sure Python installed properly. Usage: ./model.sh -t TOKEN [-m MODEL_NAME] [-c COMMIT_HASH] [-s S3_BUCKET_NAME] -t TOKEN Hugging Face token (required) -m MODEL_NAME Model name (default: csdc-atl/buffer-cross-001) @@ -18,11 +18,12 @@ Make sure Python installed properly. Usage: ./model.sh -t TOKEN [-m MODEL_NAME] 2. Deploy CDK template ```bash +cd source/infrastructure +aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws npm install npx cdk deploy --rollback false --parameters S3ModelAssets= ``` - -You will get output similar like below: +You can update us-east-1 to any other available region according to your need. You will get output similar like below: ``` Outputs: llm-bot-dev.APIEndpointAddress = https://xx.execute-api.us-east-1.amazonaws.com/v1/ @@ -209,7 +210,7 @@ You should see output like this: 1. Launch dashboard to check and debug the ETL & QA process ```bash -cd /src/panel +cd /source/panel pip install -r requirements.txt mv .env_sample .env # fill .env content accordingly with cdk output diff --git a/source/infrastructure/bin/main.ts b/source/infrastructure/bin/main.ts new file mode 100644 index 00000000..b6bf1411 --- /dev/null +++ b/source/infrastructure/bin/main.ts @@ -0,0 +1,107 @@ +import { App, CfnOutput, CfnParameter, Stack, StackProps } from 'aws-cdk-lib'; +import { Construct } from 'constructs'; +import * as dotenv from "dotenv"; +import { LLMApiStack } from '../lib/api/api-stack'; +import { DynamoDBStack } from '../lib/ddb-stack'; +import { EtlStack } from '../lib/etl/etl-stack'; +import { AssetsStack } from '../lib/model/assets-stack'; +import { LLMStack } from '../lib/model/llm-stack'; +import { VpcStack } from '../lib/shared/vpc-stack'; +import { OpenSearchStack } from '../lib/vector-store/os-stack'; +dotenv.config(); + +export class RootStack extends Stack { + constructor(scope: Construct, id: string, props: StackProps = {}) { + super(scope, id, props); + + // add cdk input parameters for user to specify s3 bucket store model assets + // using npx cdk deploy --rollback false --parameters S3ModelAssets=llm-rag --parameters SubEmail=example@example.org to deploy + const _S3ModelAssets = new CfnParameter(this, 'S3ModelAssets', { + type: 'String', + description: 'S3 Bucket for model & code assets', + // default: 'llm-rag', + }); + + const _SubEmail = new CfnParameter(this, 'SubEmail', { + type: 'String', + description: 'Email address for SNS notification', + }); + + // This assest stack is to mitigate issue that the model assets in s3 bucket can't be located immediately to create sagemaker model + const _AssetsStack = new AssetsStack(this, 'assets-stack', {_s3ModelAssets:_S3ModelAssets.valueAsString, env:process.env}); + const _LLMStack = new LLMStack(this, 'llm-stack', { + _s3ModelAssets:_S3ModelAssets.valueAsString, + _crossCodePrefix:_AssetsStack._crossCodePrefix, + _embeddingCodePrefix:_AssetsStack._embeddingCodePrefix, + _instructCodePrefix:_AssetsStack._instructCodePrefix, + env:process.env + }); + _LLMStack.addDependency(_AssetsStack); + + const _VpcStack = new VpcStack(this, 'vpc-stack', {env:process.env}); + + const _OsStack = new OpenSearchStack(this,'os-stack', {_vpc:_VpcStack._vpc, _securityGroup:_VpcStack._securityGroup}); + _OsStack.addDependency(_VpcStack); + + // const _Ec2Stack = new Ec2Stack(this, 'ec2-stack', {_vpc:_VpcStack._vpc, _securityGroup:_VpcStack._securityGroup, _domainEndpoint:_OsStack._domainEndpoint, env:process.env}); + // _Ec2Stack.addDependency(_VpcStack); + // _Ec2Stack.addDependency(_OsStack); + + const _DynamoDBStack = new DynamoDBStack(this, 'ddb-stack', {_vpc:_VpcStack._vpc, _securityGroup:_VpcStack._securityGroup, _domainEndpoint:_OsStack._domainEndpoint, env:process.env}); + _DynamoDBStack.addDependency(_VpcStack); + _DynamoDBStack.addDependency(_OsStack); + + const _EtlStack = new EtlStack(this, 'etl-stack', { + _domainEndpoint: _OsStack._domainEndpoint, + _embeddingEndpoint: _LLMStack._embeddingEndPoint ?? '', + _region: props.env?.region || 'us-east-1', + _subEmail: _SubEmail.valueAsString ?? '', + _vpc: _VpcStack._vpc, + _subnets: _VpcStack._privateSubnets, + _securityGroups: _VpcStack._securityGroup, + }); + _EtlStack.addDependency(_VpcStack); + _EtlStack.addDependency(_OsStack); + _EtlStack.addDependency(_LLMStack); + + const _ApiStack = new LLMApiStack(this, 'api-stack', { + _vpc:_VpcStack._vpc, + _securityGroup:_VpcStack._securityGroup, + _domainEndpoint:_OsStack._domainEndpoint, + _crossEndPoint: _LLMStack._crossEndPoint ?? '', + _embeddingEndPoint:_LLMStack._embeddingEndPoint || '', + _instructEndPoint:_LLMStack._instructEndPoint || '', + _chatSessionTable: _DynamoDBStack._chatSessionTable, + _sfnOutput: _EtlStack._sfnOutput, + env:process.env + }); + _ApiStack.addDependency(_VpcStack); + _ApiStack.addDependency(_OsStack); + _ApiStack.addDependency(_LLMStack); + _ApiStack.addDependency(_DynamoDBStack); + + new CfnOutput(this, 'VPC', {value:_VpcStack._vpc.vpcId}); + new CfnOutput(this, 'OpenSearch Endpoint', {value:_OsStack._domainEndpoint}); + new CfnOutput(this, 'Document Bucket', {value:_ApiStack._documentBucket}); + // deprecate for now since proxy in ec2 instance is not allowed according to policy + // new CfnOutput(this, 'OpenSearch Dashboard', {value:`${_Ec2Stack._publicIP}:8081/_dashboards`}); + new CfnOutput(this, 'API Endpoint Address', {value:_ApiStack._apiEndpoint}); + new CfnOutput(this, 'Glue Job Name', {value:_EtlStack._jobName}); + new CfnOutput(this, 'Cross Model Endpoint', {value:_LLMStack._crossEndPoint || 'No Cross Endpoint Created'}); + new CfnOutput(this, 'Embedding Model Endpoint', {value:_LLMStack._embeddingEndPoint || 'No Embedding Endpoint Created'}); + new CfnOutput(this, 'Instruct Model Endpoint', {value:_LLMStack._instructEndPoint || 'No Instruct Endpoint Created'}); + new CfnOutput(this, 'Processed Object Table', {value:_EtlStack._processedObjectsTable}); + } +} + +// for development, use account/region from cdk cli +const devEnv = { + account: process.env.CDK_DEFAULT_ACCOUNT, + region: process.env.CDK_DEFAULT_REGION, +}; + +const app = new App(); + +new RootStack(app, 'llm-bot-dev', { env: devEnv }); + +app.synth(); \ No newline at end of file diff --git a/cdk.json b/source/infrastructure/cdk.json similarity index 86% rename from cdk.json rename to source/infrastructure/cdk.json index 49f14ceb..cac07010 100644 --- a/cdk.json +++ b/source/infrastructure/cdk.json @@ -1,5 +1,5 @@ { - "app": "npx ts-node -P tsconfig.json --prefer-ts-exts src/main.ts", + "app": "npx ts-node -P tsconfig.json --prefer-ts-exts bin/main.ts", "output": "cdk.out", "build": "npx projen bundle", "watch": { diff --git a/source/infrastructure/lib/api/api-stack.ts b/source/infrastructure/lib/api/api-stack.ts new file mode 100644 index 00000000..5707f923 --- /dev/null +++ b/source/infrastructure/lib/api/api-stack.ts @@ -0,0 +1,207 @@ +import { NestedStack, StackProps, Duration, Aws } from 'aws-cdk-lib'; +import { DockerImageFunction, Handler } from 'aws-cdk-lib/aws-lambda'; +import { DockerImageCode, Architecture } from 'aws-cdk-lib/aws-lambda'; +import * as iam from "aws-cdk-lib/aws-iam"; +import * as ec2 from 'aws-cdk-lib/aws-ec2'; +import * as apigw from 'aws-cdk-lib/aws-apigateway'; +import * as s3 from 'aws-cdk-lib/aws-s3'; +import * as s3n from 'aws-cdk-lib/aws-s3-notifications'; +import * as sfn from 'aws-cdk-lib/aws-stepfunctions'; +import * as lambda from 'aws-cdk-lib/aws-lambda'; +import { Construct } from 'constructs'; +import { join } from "path"; + +interface apiStackProps extends StackProps { + _vpc: ec2.Vpc; + _securityGroup: ec2.SecurityGroup; + _domainEndpoint: string; + _crossEndPoint: string; + _embeddingEndPoint: string; + _instructEndPoint: string; + _chatSessionTable: string; + // type of StepFunctions + _sfnOutput: sfn.StateMachine; +} + +export class LLMApiStack extends NestedStack { + + _apiEndpoint; + _documentBucket; + constructor(scope: Construct, id: string, props: apiStackProps) { + super(scope, id, props); + + const _vpc = props._vpc + const _securityGroup = props._securityGroup + const _domainEndpoint = props._domainEndpoint + const _aosIndex = "chatbot-index" + const _chatSessionTable = props._chatSessionTable + + // s3 bucket for storing documents + const _S3Bucket = new s3.Bucket(this, 'llm-bot-documents', { + // bucketName: `llm-bot-documents-${Aws.ACCOUNT_ID}-${Aws.REGION}`, + blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL, + }); + + const lambdaExecutor = new DockerImageFunction(this, + "lambdaExecutor", { + code: DockerImageCode.fromImageAsset(join(__dirname, "../../../lambda/executor")), + timeout: Duration.minutes(15), + memorySize: 1024, + vpc: _vpc, + vpcSubnets: { + subnets: _vpc.privateSubnets, + }, + securityGroups: [_securityGroup], + architecture: Architecture.X86_64, + environment: { + aos_endpoint: _domainEndpoint, + llm_endpoint: props._instructEndPoint, + embedding_endpoint: props._embeddingEndPoint, + cross_endpoint: props._crossEndPoint, + aos_index: _aosIndex, + chat_session_table: _chatSessionTable, + }, + }); + + lambdaExecutor.addToRolePolicy(new iam.PolicyStatement({ + // principals: [new iam.AnyPrincipal()], + actions: [ + "sagemaker:InvokeEndpointAsync", + "sagemaker:InvokeEndpoint", + "s3:List*", + "s3:Put*", + "s3:Get*", + "es:*", + "dynamodb:*", + "secretsmanager:GetSecretValue", + ], + effect: iam.Effect.ALLOW, + resources: ['*'], + } + )) + + const lambdaEmbedding = new DockerImageFunction(this, + "lambdaEmbedding", { + code: DockerImageCode.fromImageAsset(join(__dirname, "../../../lambda/embedding")), + timeout: Duration.minutes(15), + memorySize: 4096, + vpc: _vpc, + vpcSubnets: { + subnets: _vpc.privateSubnets, + }, + securityGroups: [_securityGroup], + architecture: Architecture.X86_64, + environment: { + document_bucket: _S3Bucket.bucketName, + opensearch_cluster_domain: _domainEndpoint, + llm_endpoint: props._instructEndPoint, + embedding_endpoint: props._embeddingEndPoint, + cross_endpoint: props._crossEndPoint, + }, + }); + + lambdaEmbedding.addToRolePolicy(new iam.PolicyStatement({ + actions: [ + "sagemaker:InvokeEndpointAsync", + "sagemaker:InvokeEndpoint", + "s3:List*", + "s3:Put*", + "s3:Get*", + "es:*", + ], + effect: iam.Effect.ALLOW, + resources: ['*'], + } + )) + // Define the API Gateway + const api = new apigw.RestApi(this, 'llmApi', { + restApiName: 'llmApi', + description: 'This service serves the LLM API.', + endpointConfiguration: { + types: [apigw.EndpointType.REGIONAL] + }, + deployOptions: { + stageName: 'v1', + metricsEnabled: true, + loggingLevel: apigw.MethodLoggingLevel.INFO, + dataTraceEnabled: true, + tracingEnabled: true, + }, + }); + + // Define the API Gateway Lambda Integration with proxy and no integration responses + const lambdaExecutorIntegration = new apigw.LambdaIntegration(lambdaExecutor, { proxy: true, }); + + // Define the API Gateway Method + const apiResourceLLM = api.root.addResource('llm'); + apiResourceLLM.addMethod('POST', lambdaExecutorIntegration); + + // Define the API Gateway Lambda Integration with proxy and no integration responses + const lambdaEmbeddingIntegration = new apigw.LambdaIntegration(lambdaEmbedding, { proxy: true, }); + + // Define the API Gateway Method + const apiResourceEmbedding = api.root.addResource('embedding'); + apiResourceEmbedding.addMethod('POST', lambdaEmbeddingIntegration); + + // Add Get method to query & search index in OpenSearch, the POST method above should be deprecated in the future and replaced by AWS Glue + apiResourceEmbedding.addMethod('GET', lambdaEmbeddingIntegration); + + // Integration with Step Function to trigger ETL process + // Lambda function to trigger Step Function + const lambdaStepFunction = new lambda.Function(this, 'lambdaStepFunction', { + // format to avoid indent error, using inline for simplicity no more container pack time needed + code: lambda.Code.fromInline + (` +import json +import boto3 +import os +client = boto3.client('stepfunctions') +def handler(event, context): + # First check the event for possible S3 created event + inputPayload = {} + if 'Records' in event: + print('S3 created event detected') + # TODO, Aggregate the bucket and key from the event object for S3 created event + bucket = event['Records'][0]['s3']['bucket']['name'] + key = event['Records'][0]['s3']['object']['key'] + # Pass the bucket and key to the Step Function, align with the input schema in etl-stack.ts + inputPayload=json.dumps({'s3Bucket': bucket, 's3Prefix': key, 'offline': 'false'}) + else: + print('API Gateway event detected') + # Parse the body from the event object + body = json.loads(event['body']) + # Pass the parsed body to the Step Function + inputPayload=json.dumps(body) + + response = client.start_execution( + stateMachineArn=os.environ['sfn_arn'], + input=inputPayload + ) + return { + 'statusCode': 200, + 'body': json.dumps('Step Function triggered, Step Function ARN: ' + response['executionArn'] + ' Input Payload: ' + inputPayload) + } + `), + handler: 'index.handler', + runtime: lambda.Runtime.PYTHON_3_9, + timeout: Duration.seconds(30), + environment: { + sfn_arn: props._sfnOutput.stateMachineArn, + }, + memorySize: 256, + }); + + // grant lambda function to invoke step function + props._sfnOutput.grantStartExecution(lambdaStepFunction); + + const apiResourceStepFunction = api.root.addResource('etl'); + apiResourceStepFunction.addMethod('POST', new apigw.LambdaIntegration(lambdaStepFunction)); + + // add s3 event notification when file uploaded to the bucket + _S3Bucket.addEventNotification(s3.EventType.OBJECT_CREATED, new s3n.LambdaDestination(lambdaStepFunction), { prefix: 'documents/' }); + _S3Bucket.grantReadWrite(lambdaStepFunction); + + this._apiEndpoint = api.url + this._documentBucket = _S3Bucket.bucketName + } +} \ No newline at end of file diff --git a/source/infrastructure/lib/ddb-stack.ts b/source/infrastructure/lib/ddb-stack.ts new file mode 100644 index 00000000..54704a70 --- /dev/null +++ b/source/infrastructure/lib/ddb-stack.ts @@ -0,0 +1,83 @@ +import { NestedStack, StackProps, Duration, CfnOutput,NestedStackProps, RemovalPolicy } from "aws-cdk-lib"; +import { Construct } from "constructs"; +import { Table, AttributeType } from "aws-cdk-lib/aws-dynamodb"; +import { Function, Runtime, Code } from "aws-cdk-lib/aws-lambda"; +import { LambdaIntegration, RestApi } from "aws-cdk-lib/aws-apigateway"; +import * as lambda from "aws-cdk-lib/aws-lambda"; +import * as iam from "aws-cdk-lib/aws-iam"; +import * as ec2 from 'aws-cdk-lib/aws-ec2'; +import * as apigw from 'aws-cdk-lib/aws-apigateway'; +import { join } from "path"; + +interface ddbStackProps extends StackProps { + _vpc: ec2.Vpc; + _securityGroup: ec2.SecurityGroup; + _domainEndpoint: string; +} + +export class DynamoDBStack extends NestedStack { + + _chatSessionTable; + constructor(scope: Construct, id: string, props: ddbStackProps) { + super(scope, id, props); + const _vpc = props._vpc; + + // Create the DynamoDB table + const table = new Table(this, "modelRatingTable", { + // tableName: "modelRatingInfo", + partitionKey: { + name: "session_id", + type: AttributeType.STRING, + }, + // removalPolicy: RemovalPolicy.DESTROY, + }); + + // Create the Lambda functions + const postFn = new lambda.Function(this, "PostRatingFunction", { + runtime:lambda.Runtime.PYTHON_3_7, + handler: "rating.lambda_handler", + code: Code.fromAsset(join(__dirname, "../../lambda/ddb")), + environment: { + TABLE_NAME: table.tableName, + }, + vpc: _vpc, + vpcSubnets: { + subnets: _vpc.privateSubnets, + }, + securityGroups: [props._securityGroup] + }); + + postFn.addToRolePolicy(new iam.PolicyStatement({ + actions: [ + "dynamodb:*" + ], + effect: iam.Effect.ALLOW, + resources: ['*'], + } + )) + + + // Grant permissions to the Lambda functions to access the DynamoDB table + table.grantReadWriteData(postFn); + + const api = new apigw.RestApi(this, 'ddbApi', { + restApiName: 'ddbApi', + description: 'This service serves the dynamodb which stores the data of model rating.', + endpointConfiguration: { + types: [apigw.EndpointType.REGIONAL] + }, + deployOptions: { + stageName: 'v1', + metricsEnabled: true, + loggingLevel: apigw.MethodLoggingLevel.INFO, + dataTraceEnabled: true, + tracingEnabled: true, + }, + }); + // Define the API resources and methods + const session = api.root.addResource('rating'); + session.addMethod("POST", new LambdaIntegration(postFn)); + + this._chatSessionTable = table.tableName; + } +} diff --git a/source/infrastructure/lib/etl/etl-stack.ts b/source/infrastructure/lib/etl/etl-stack.ts new file mode 100644 index 00000000..a03ddfe3 --- /dev/null +++ b/source/infrastructure/lib/etl/etl-stack.ts @@ -0,0 +1,267 @@ +import { NestedStack, StackProps, RemovalPolicy, Duration, Aws } from 'aws-cdk-lib'; +import { Construct } from 'constructs'; + +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as api from 'aws-cdk-lib/aws-apigateway'; +import * as glue from '@aws-cdk/aws-glue-alpha'; +import * as sfn from 'aws-cdk-lib/aws-stepfunctions'; +import * as tasks from 'aws-cdk-lib/aws-stepfunctions-tasks'; +import * as ec2 from 'aws-cdk-lib/aws-ec2'; +import * as sns from 'aws-cdk-lib/aws-sns'; +import * as subscriptions from 'aws-cdk-lib/aws-sns-subscriptions'; +import * as s3 from 'aws-cdk-lib/aws-s3'; +import * as s3assets from 'aws-cdk-lib/aws-s3-assets'; +import * as s3deploy from 'aws-cdk-lib/aws-s3-deployment'; +import * as dynamodb from 'aws-cdk-lib/aws-dynamodb'; +import * as lambda from 'aws-cdk-lib/aws-lambda'; +import { DockerImageCode, Architecture, DockerImageFunction} from 'aws-cdk-lib/aws-lambda'; +import { join } from "path"; +import { off } from 'process'; + +interface etlStackProps extends StackProps { + _vpc: ec2.Vpc; + _subnets: ec2.ISubnet[]; + _securityGroups: ec2.SecurityGroup; + _domainEndpoint: string; + _embeddingEndpoint: string; + _region: string; + _subEmail: string; +} + +export class EtlStack extends NestedStack { + _sfnOutput; + _jobName; + _jobArn; + _processedObjectsTable; + + constructor(scope: Construct, id: string, props: etlStackProps) { + super(scope, id, props); + + const connection = new glue.Connection(this, 'GlueJobConnection', { + type: glue.ConnectionType.NETWORK, + subnet: props._subnets[0], + securityGroups: [props._securityGroups], + }); + + const table = new dynamodb.Table(this, 'ProcessedObjects', { + partitionKey: { name: 'ObjectKey', type: dynamodb.AttributeType.STRING }, + billingMode: dynamodb.BillingMode.PAY_PER_REQUEST, + }); + + table.addGlobalSecondaryIndex({ + indexName: 'BucketAndPrefixIndex', + partitionKey: { name: 'Bucket', type: dynamodb.AttributeType.STRING }, + sortKey: { name: 'Prefix', type: dynamodb.AttributeType.STRING }, + }); + + // Add ExpiryTimestamp as an attribute but not as a sort key in the base table + table.addGlobalSecondaryIndex({ + indexName: 'ExpiryTimestampIndex', + partitionKey: { name: 'ExpiryTimestamp', type: dynamodb.AttributeType.NUMBER }, + // No sort key for this index + }); + + const _S3Bucket = new s3.Bucket(this, 'llm-bot-glue-lib', { + // bucketName: `llm-bot-glue-lib-${Aws.ACCOUNT_ID}-${Aws.REGION}`, + blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL, + }); + + const extraPythonFiles = new s3deploy.BucketDeployment(this, 'extraPythonFiles', { + sources: [s3deploy.Source.asset(join(__dirname, '../../../lambda/job/dep/dist'))], + destinationBucket: _S3Bucket, + // destinationKeyPrefix: 'llm_bot_dep-0.1.0-py3-none-any.whl', + }); + + // Assemble the extra python files list using _S3Bucket.s3UrlForObject('llm_bot_dep-0.1.0-py3-none-any.whl') and _S3Bucket.s3UrlForObject('nougat_ocr-0.1.17-py3-none-any.whl') and convert to string + const extraPythonFilesList = [_S3Bucket.s3UrlForObject('llm_bot_dep-0.1.0-py3-none-any.whl')].join(','); + + const glueRole = new iam.Role(this, 'ETLGlueJobRole', { + assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), + // The role is used by the glue job to access AOS and by default it has 1 hour session duration which is not enough for the glue job to finish the embedding injection + maxSessionDuration: Duration.hours(12), + }); + // TODO: narrow down the policy to specific resources and actions + glueRole.addToPrincipalPolicy( + new iam.PolicyStatement({ + actions: [ + "sagemaker:InvokeEndpointAsync", + "sagemaker:InvokeEndpoint", + "s3:*", + "es:*", + "glue:*", + "ec2:*", + "dynamodb:*", + "bedrock:*", + // cloudwatch logs + "logs:*", + ], + effect: iam.Effect.ALLOW, + resources: ['*'], + }) + ) + + // Creata glue job to process files speicified in s3 bucket and prefix + const glueJob = new glue.Job(this, 'PythonShellJob', { + executable: glue.JobExecutable.pythonShell({ + glueVersion: glue.GlueVersion.V3_0, + pythonVersion: glue.PythonVersion.THREE_NINE, + script: glue.Code.fromAsset(join(__dirname, '../../../lambda/job/glue-job-script.py')), + }), + // Worker Type is not supported for Job Command pythonshell and Both workerType and workerCount must be set... + // workerType: glue.WorkerType.G_2X, + // workerCount: 2, + maxConcurrentRuns: 200, + maxRetries: 1, + connections: [connection], + maxCapacity: 1, + role: glueRole, + defaultArguments: { + '--S3_BUCKET.$': sfn.JsonPath.stringAt('$.s3Bucket'), + '--S3_PREFIX.$': sfn.JsonPath.stringAt('$.s3Prefix'), + '--QA_ENHANCEMENT.$': sfn.JsonPath.stringAt('$.qaEnhance'), + '--AOS_ENDPOINT': props._domainEndpoint, + '--REGION': props._region, + '--EMBEDDING_MODEL_ENDPOINT': props._embeddingEndpoint, + '--DOC_INDEX_TABLE': 'chatbot-index', + '--additional-python-modules': 'langchain==0.0.312,beautifulsoup4==4.12.2,requests-aws4auth==1.2.3,boto3==1.28.84,openai==0.28.1,nougat-ocr==0.1.17,pyOpenSSL==23.3.0,tenacity==8.2.3,markdownify==0.11.6,mammoth==1.6.0,chardet==5.2.0', + // add multiple extra python files + '--extra-py-files': extraPythonFilesList + } + }); + + // Create SNS topic and subscription to notify when glue job is completed + const topic = new sns.Topic(this, 'etl-topic', { + displayName: 'etl-topic', + topicName: 'etl-topic', + }); + topic.addSubscription(new subscriptions.EmailSubscription(props._subEmail)); + + // Lambda function to for file deduplication and glue job allocation based on file number + const lambdaETL = new DockerImageFunction(this, + "lambdaETL", { + code: DockerImageCode.fromImageAsset(join(__dirname, "../../../lambda/etl")), + timeout: Duration.minutes(15), + memorySize: 1024, + architecture: Architecture.X86_64, + }); + + lambdaETL.addToRolePolicy(new iam.PolicyStatement({ + actions: [ + // glue job + "glue:StartJobRun", + "s3:List*", + "s3:Put*", + "s3:Get*", + ], + effect: iam.Effect.ALLOW, + resources: ['*'], + } + )) + + const lambdaETLIntegration = new tasks.LambdaInvoke(this, 'lambdaETLIntegration', { + lambdaFunction: lambdaETL, + // Use the result of this invocation to decide how many Glue jobs to run + resultSelector: { + "processedPayload": { + 'batchIndices.$': '$.Payload.batchIndices', + 's3Bucket.$': '$.Payload.s3Bucket', + 's3Prefix.$': '$.Payload.s3Prefix', + 'qaEnhance.$': '$.Payload.qaEnhance', + 'offline.$': '$.Payload.offline', + } + }, + // we need the original input + resultPath: '$.TaskResult', + outputPath: '$.TaskResult.processedPayload', + }); + + const offlineChoice = new sfn.Choice(this, 'Offline or Online', { + comment: 'Check if the job is offline or online', + }); + + const offlineGlueJob = new tasks.GlueStartJobRun(this, 'OfflineGlueJob', { + glueJobName: glueJob.jobName, + integrationPattern: sfn.IntegrationPattern.RUN_JOB, + arguments: sfn.TaskInput.fromObject({ + '--job-language': 'python', + '--JOB_NAME': glueJob.jobName, + '--S3_BUCKET.$': '$.s3Bucket', + '--S3_PREFIX.$': '$.s3Prefix', + '--AOS_ENDPOINT': props._domainEndpoint, + '--EMBEDDING_MODEL_ENDPOINT': props._embeddingEndpoint, + '--REGION': props._region, + '--OFFLINE': 'true', + '--QA_ENHANCEMENT.$': '$.qaEnhance', + // Convert the numeric index to a string + '--BATCH_INDICE.$': 'States.Format(\'{}\', $.batchIndices)', + '--ProcessedObjectsTable': table.tableName, + }), + }); + + // Define a Map state to run multiple Glue jobs in parallel based on the number of files to process + const mapState = new sfn.Map(this, 'MapState', { + // inputPath should point to the root since we want to pass the entire payload to the iterator + inputPath: '$', + // itemsPath should reference an array. We need to construct this array based on batchIndices + itemsPath: sfn.JsonPath.stringAt('$.batchIndices'), + // set the max concurrency to 0 to run all the jobs in parallel + maxConcurrency: 0, + parameters: { + // These parameters are passed to each iteration of the map state + 's3Bucket.$': '$.s3Bucket', + 's3Prefix.$': '$.s3Prefix', + 'qaEnhance.$': '$.qaEnhance', + // 'index' is a special variable within the Map state that represents the current index + 'batchIndices.$': '$$.Map.Item.Index' // Add this if you need to know the index of the current item in the map state + }, + resultPath: '$.mapResults', + }); + + mapState.iterator(offlineGlueJob); + + // multiplex the same glue job to offline and online + const onlineGlueJob = new tasks.GlueStartJobRun(this, 'OnlineGlueJob', { + glueJobName: glueJob.jobName, + integrationPattern: sfn.IntegrationPattern.RUN_JOB, + arguments: sfn.TaskInput.fromObject({ + '--job-language': 'python', + '--JOB_NAME': glueJob.jobName, + '--S3_BUCKET.$': '$.s3Bucket', + '--S3_PREFIX.$': '$.s3Prefix', + '--AOS_ENDPOINT': props._domainEndpoint, + '--EMBEDDING_MODEL_ENDPOINT': props._embeddingEndpoint, + '--REGION': props._region, + '--OFFLINE': 'false', + '--QA_ENHANCEMENT.$': '$.qaEnhance', + }), + }); + + // Notify the result of the glue job + const notifyTask = new tasks.SnsPublish(this, 'NotifyTask', { + integrationPattern: sfn.IntegrationPattern.REQUEST_RESPONSE, + topic: topic, + message: sfn.TaskInput.fromText(`Glue job ${glueJob.jobName} completed!`), + }); + + offlineChoice.when(sfn.Condition.booleanEquals('$.offline', true), mapState) + .when(sfn.Condition.booleanEquals('$.offline', false), onlineGlueJob) + + // add the notify task to both online and offline branches + mapState.next(notifyTask); + + const sfnDefinition = lambdaETLIntegration.next(offlineChoice) + + const sfnStateMachine = new sfn.StateMachine(this, 'ETLState', { + definitionBody: sfn.DefinitionBody.fromChainable(sfnDefinition), + stateMachineType: sfn.StateMachineType.STANDARD, + // Align with the glue job timeout + timeout: Duration.minutes(2880), + }); + + // Export the Step function to be used in API Gateway + this._sfnOutput = sfnStateMachine; + this._jobName = glueJob.jobName; + this._jobArn = glueJob.jobArn; + this._processedObjectsTable = table.tableName + } +} \ No newline at end of file diff --git a/source/infrastructure/lib/model/assets-stack.ts b/source/infrastructure/lib/model/assets-stack.ts new file mode 100644 index 00000000..b80d600c --- /dev/null +++ b/source/infrastructure/lib/model/assets-stack.ts @@ -0,0 +1,114 @@ +import { NestedStack, StackProps } from 'aws-cdk-lib'; +import { Construct } from 'constructs'; + +import * as s3 from 'aws-cdk-lib/aws-s3'; +import * as s3assets from 'aws-cdk-lib/aws-s3-assets'; +import * as s3deploy from 'aws-cdk-lib/aws-s3-deployment'; +import { join } from "path"; +import * as dotenv from "dotenv"; +dotenv.config(); + +interface assetsStackProps extends StackProps { + _s3ModelAssets: string; +} + +export class AssetsStack extends NestedStack { + _crossCodePrefix; + _embeddingCodePrefix; + _instructCodePrefix; + + constructor(scope: Construct, id: string, props: assetsStackProps) { + super(scope, id, props); + + // Prepare model asset to download from Hugging Face follow script + + // Check if _s3ModelAssets is provided, create a new s3 bucket if not + const _S3Bucket = props._s3ModelAssets ? s3.Bucket.fromBucketName(this, 'llm-rag', props._s3ModelAssets) : new s3.Bucket(this, 'llm-rag', { + // Fixed name for serving.properties for now, default is llm-rag inherit from main stack + bucketName: props._s3ModelAssets, + blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL, + }); + + // const crossModelPrefix = props._s3BucketPrefix + const crossModelPrefix = 'buffer-cross-001-model' + const crossCodePrefix = 'buffer_cross_001_deploy_code' + const embeddingModelPrefix = 'buffer-embedding-002-model' + const embeddingCodePrefix = 'buffer_embedding_002_deploy_code' + const instructModelPrefix = 'buffer-instruct-003-model' + const instructCodePrefix = 'buffer_instruct_003_deploy_code' + + // CROSS MODEL + // Define a local asset for code + const crossCodeAsset = new s3assets.Asset(this, 'crossCodeAsset', { + path: join(__dirname, '../../../model/cross/code'), + }); + + const crossCodeAssetDeployment = new s3deploy.BucketDeployment(this, 'crossCodeAssetDeployment', { + sources: [s3deploy.Source.asset(join(__dirname, '../../../model/cross/code'))], + destinationBucket: _S3Bucket, + destinationKeyPrefix: crossCodePrefix, + }); + this._crossCodePrefix = crossCodePrefix + + // EMBEDDING MODEL + // Define a local asset for code + const embeddingCodeAsset = new s3assets.Asset(this, 'embeddingCodeAsset', { + path: join(__dirname, '../../../model/embedding/code'), + }); + + const embeddingCodeAssetDeployment = new s3deploy.BucketDeployment(this, 'embeddingCodeAssetDeployment', { + sources: [s3deploy.Source.asset(join(__dirname, '../../../model/embedding/code'))], + destinationBucket: _S3Bucket, + destinationKeyPrefix: embeddingCodePrefix, + }); + this._embeddingCodePrefix = embeddingCodePrefix + + // INSTRUCT MODEL + // Define a local asset for code + const instructCodeAsset = new s3assets.Asset(this, 'instructCodeAsset', { + path: join(__dirname, '../../../model/instruct/code'), + }); + + const instructCodeAssetDeployment = new s3deploy.BucketDeployment(this, 'instructCodeAssetDeployment', { + sources: [s3deploy.Source.asset(join(__dirname, '../../../model/instruct/code'))], + destinationBucket: _S3Bucket, + destinationKeyPrefix: instructCodePrefix, + }); + this._instructCodePrefix = instructCodePrefix + + // Skip the deployment if _s3ModelAssets is provided + if (!props._s3ModelAssets) { + // Define a local asset for model + const crossModelAsset = new s3assets.Asset(this, 'ModelAsset', { + path: join(__dirname, '../../../model/cross/model'), + }); + const crossModelAssetDeployment = new s3deploy.BucketDeployment(this, 'crossModelAssetDeployment', { + sources: [s3deploy.Source.asset(join(__dirname, '../../../model/cross/model'))], + destinationBucket: _S3Bucket, + destinationKeyPrefix: crossModelPrefix, + // memoryLimit: 4096, + }); + + // Define a local asset for model + const embeddingModelAsset = new s3assets.Asset(this, 'embeddingModelAsset', { + path: join(__dirname, '../../../model/embedding/model'), + }); + const embeddingModelAssetDeployment = new s3deploy.BucketDeployment(this, 'embeddingModelAssetDeployment', { + sources: [s3deploy.Source.asset(join(__dirname, '../../../model/embedding/model'))], + destinationBucket: _S3Bucket, + destinationKeyPrefix: embeddingModelPrefix, + }); + + // Define a local asset for model + const instructModelAsset = new s3assets.Asset(this, 'instructModelAsset', { + path: join(__dirname, '../../../model/instruct/model'), + }); + + const instructModelAssetDeployment = new s3deploy.BucketDeployment(this, 'instructModelAssetDeployment', { + sources: [s3deploy.Source.asset(join(__dirname, '../../../model/instruct/model'))], + destinationBucket: _S3Bucket, + destinationKeyPrefix: instructModelPrefix, + }); + } + } +} \ No newline at end of file diff --git a/source/infrastructure/lib/model/llm-stack.ts b/source/infrastructure/lib/model/llm-stack.ts new file mode 100644 index 00000000..16ff379d --- /dev/null +++ b/source/infrastructure/lib/model/llm-stack.ts @@ -0,0 +1,172 @@ +import { NestedStack, StackProps } from 'aws-cdk-lib'; +import { Construct } from 'constructs'; + +import * as iam from 'aws-cdk-lib/aws-iam'; +import * as sagemaker from 'aws-cdk-lib/aws-sagemaker'; +import * as dotenv from "dotenv"; + +dotenv.config(); + +interface llmStackProps extends StackProps { + _s3ModelAssets: string; + _crossCodePrefix: string; + _embeddingCodePrefix: string; + _instructCodePrefix: string; +} + +export class LLMStack extends NestedStack { + _crossEndPoint; + _embeddingEndPoint; + _instructEndPoint; + + constructor(scope: Construct, id: string, props: llmStackProps) { + super(scope, id, props); + + // Prepare model asset to download from Hugging Face follow script + + // Specify s3 bucket and prefix for model + // const _S3Bucket = new s3.Bucket(this, 'llm-rag', { + // // Fixed name for serving.properties for now + // bucketName: "llm-rag", + // blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL, + // }); + + // // Create a Lambda function + // const fn = new lambda.Function(this, 'justFunction', { + // runtime: lambda.Runtime.NODEJS_14_X, + // handler: 'index.handler', + // code: lambda.Code.fromAsset(path.join(__dirname, 'lambda/custom')), + // environment: { + // BUCKET_NAME: _S3Bucket.bucketName, + // }, + // }); + + // // Allow the Lambda function to put objects in the S3 bucket + // _S3Bucket.grantPut(fn); + + // // Create a custom resource that triggers the Lambda function + // new cr.AwsCustomResource(this, 'uploadModelAssets', { + // onCreate: { + // service: 'Lambda', + // action: 'invoke', + // parameters: { + // FunctionName: fn.functionName, + // }, + // physicalResourceId: cr.PhysicalResourceId.of('uploadModelAssets'), + // }, + // policy: cr.AwsCustomResourcePolicy.fromSdkCalls({resources: cr.AwsCustomResourcePolicy.ANY_RESOURCE}), + // }); + + // Create IAM execution role + const executionRole = new iam.Role(this, 'cross-execution-role', { + assumedBy: new iam.ServicePrincipal('sagemaker.amazonaws.com'), + managedPolicies: [ + iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonSageMakerFullAccess'), + iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonS3FullAccess'), + iam.ManagedPolicy.fromAwsManagedPolicyName('CloudWatchLogsFullAccess'), + ], + }); + + // CROSS MODEL + // Create model, BucketDeployment construct automatically handles dependencies to ensure model assets uploaded before creating the model in this.region + const crossImageUrl = '763104351884.dkr.ecr.'+ this.region +'.amazonaws.com/djl-inference:0.21.0-deepspeed0.8.3-cu117' + const crossModel = new sagemaker.CfnModel(this, 'cross-model', { + executionRoleArn: executionRole.roleArn, + primaryContainer: { + image: crossImageUrl, + modelDataUrl: `s3://${props._s3ModelAssets}/${props._crossCodePrefix}/cross_model.tar.gz`, + environment: { + S3_CODE_PREFIX: props._crossCodePrefix, + }, + }, + }); + + // Create endpoint configuration, refer to https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.aws_sagemaker.CfnEndpointConfig.html for full options + const crossEndpointConfig = new sagemaker.CfnEndpointConfig(this, 'cross-endpoint-config', { + productionVariants: [{ + initialVariantWeight: 1.0, + modelName: crossModel.attrModelName, + variantName: 'variantProd', + containerStartupHealthCheckTimeoutInSeconds: 15*60, + initialInstanceCount: 1, + instanceType: 'ml.g4dn.xlarge', + }], + }); + + // Create endpoint + const crossEndpoint = new sagemaker.CfnEndpoint(this, 'cross-endpoint', { + endpointConfigName: crossEndpointConfig.attrEndpointConfigName, + endpointName: 'cross-endpoint', + }); + + this._crossEndPoint = crossEndpoint.endpointName; + + // EMBEDDING MODEL + // Create model, BucketDeployment construct automatically handles dependencies to ensure model assets uploaded before creating the model in this.region + const embeddingImageUrl = '763104351884.dkr.ecr.'+ this.region +'.amazonaws.com/djl-inference:0.21.0-deepspeed0.8.3-cu117' + const embeddingModel = new sagemaker.CfnModel(this, 'embedding-model', { + executionRoleArn: executionRole.roleArn, + primaryContainer: { + image: embeddingImageUrl, + modelDataUrl: `s3://${props._s3ModelAssets}/${props._embeddingCodePrefix}/s2e_model.tar.gz`, + environment: { + S3_CODE_PREFIX: props._embeddingCodePrefix, + }, + }, + }); + + // Create endpoint configuration, refer to https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.aws_sagemaker.CfnEndpointConfig.html for full options + const embeddingEndpointConfig = new sagemaker.CfnEndpointConfig(this, 'embedding-endpoint-config', { + productionVariants: [{ + initialVariantWeight: 1.0, + modelName: embeddingModel.attrModelName, + variantName: 'variantProd', + containerStartupHealthCheckTimeoutInSeconds: 15*60, + initialInstanceCount: 1, + instanceType: 'ml.g4dn.xlarge', + }], + }); + + // Create endpoint + const embeddingEndpoint = new sagemaker.CfnEndpoint(this, 'embedding-endpoint', { + endpointConfigName: embeddingEndpointConfig.attrEndpointConfigName, + endpointName: 'embedding-endpoint', + }); + + this._embeddingEndPoint = embeddingEndpoint.endpointName; + + // INSTRUCT MODEL + // Create model, BucketDeployment construct automatically handles dependencies to ensure model assets uploaded before creating the model in this.region + const instructImageUrl = '763104351884.dkr.ecr.'+ this.region +'.amazonaws.com/djl-inference:0.21.0-deepspeed0.8.3-cu117' + const instructModel = new sagemaker.CfnModel(this, 'instruct-model', { + executionRoleArn: executionRole.roleArn, + primaryContainer: { + image: instructImageUrl, + modelDataUrl: `s3://${props._s3ModelAssets}/${props._instructCodePrefix}/model.tar.gz`, + environment: { + S3_CODE_PREFIX: props._instructCodePrefix, + }, + }, + }); + + // Create endpoint configuration, refer to https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.aws_sagemaker.CfnEndpointConfig.html for full options + const instructEndpointConfig = new sagemaker.CfnEndpointConfig(this, 'instruct-endpoint-config', { + productionVariants: [{ + initialVariantWeight: 1.0, + modelName: instructModel.attrModelName, + variantName: 'variantProd', + containerStartupHealthCheckTimeoutInSeconds: 15*60, + initialInstanceCount: 1, + instanceType: 'ml.g5.4xlarge', + }], + }); + + // Create endpoint + const instructEndpoint = new sagemaker.CfnEndpoint(this, 'instruct-endpoint', { + endpointConfigName: instructEndpointConfig.attrEndpointConfigName, + endpointName: 'instruct-endpoint', + }); + + this._instructEndPoint = instructEndpoint.endpointName; + } +} \ No newline at end of file diff --git a/source/infrastructure/lib/shared/vpc-stack.ts b/source/infrastructure/lib/shared/vpc-stack.ts new file mode 100644 index 00000000..bc7f9706 --- /dev/null +++ b/source/infrastructure/lib/shared/vpc-stack.ts @@ -0,0 +1,43 @@ +import { NestedStack, StackProps } from 'aws-cdk-lib'; +import { Construct } from 'constructs'; + +import * as ec2 from 'aws-cdk-lib/aws-ec2'; + +import * as dotenv from "dotenv"; +dotenv.config(); + +export class VpcStack extends NestedStack { + + _vpc; + _privateSubnets; + _securityGroup; + + constructor(scope: Construct, id: string, props: StackProps = {}) { + super(scope, id, props); + + this._vpc = new ec2.Vpc(this, 'LLM-VPC', { + ipAddresses: ec2.IpAddresses.cidr('10.100.0.0/16'), + maxAzs: 2, + }); + + this._privateSubnets = this._vpc.privateSubnets; + + this._securityGroup = new ec2.SecurityGroup(this, 'LLM-VPC-SG', { + vpc: this._vpc, + description: 'LLM Security Group' + }); + + this._securityGroup.addIngressRule(this._securityGroup, ec2.Port.allTraffic(), 'allow self traffic'); + + this._vpc.addGatewayEndpoint('DynamoDbEndpoint', { + service: ec2.GatewayVpcEndpointAwsService.DYNAMODB, + }); + + this._vpc.addInterfaceEndpoint('Glue', { + service: ec2.InterfaceVpcEndpointAwsService.GLUE, + securityGroups: [this._securityGroup], + subnets: { subnets: this._privateSubnets, }, + }); + + } +} \ No newline at end of file diff --git a/source/infrastructure/lib/tool/ec2-stack.ts b/source/infrastructure/lib/tool/ec2-stack.ts new file mode 100644 index 00000000..c5d5831a --- /dev/null +++ b/source/infrastructure/lib/tool/ec2-stack.ts @@ -0,0 +1,73 @@ + +import { NestedStack, StackProps } from 'aws-cdk-lib'; +import { Construct } from 'constructs'; + +import * as ec2 from "aws-cdk-lib/aws-ec2"; +import * as iam from "aws-cdk-lib/aws-iam"; +import { Asset } from 'aws-cdk-lib/aws-s3-assets'; + +import path from "path"; + +interface Ec2StackProps extends StackProps { + _vpc: ec2.Vpc; + _securityGroup: ec2.SecurityGroup; + _domainEndpoint: string; +} + +export class Ec2Stack extends NestedStack { + _instanceId; + _dnsName; + _publicIP; + + constructor(scope: Construct, id: string, props: Ec2StackProps) { + super(scope, id, props); + const _vpc = props._vpc; + const _securityGroup = props._securityGroup; + const _domainEndpoint = props._domainEndpoint; + + _securityGroup.addIngressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(22), 'Allow SSH Access') + _securityGroup.addIngressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(443), 'Allow HTTPS Access') + _securityGroup.addIngressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(8081), 'Allow HTTP 8081 port Access') + _securityGroup.addIngressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(80), 'Allow HTTP Access') + _securityGroup.addIngressRule(_securityGroup, ec2.Port.allTraffic(), 'Allow Self Access') + + const role = new iam.Role(this, 'ec2Role', { + assumedBy: new iam.ServicePrincipal('ec2.amazonaws.com') + }) + + role.addManagedPolicy(iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonSSMManagedInstanceCore')) + + const ami = new ec2.AmazonLinuxImage({ + generation: ec2.AmazonLinuxGeneration.AMAZON_LINUX_2, + cpuType: ec2.AmazonLinuxCpuType.X86_64 + }); + + // Create the instance using the Security Group, AMI, and KeyPair defined in the VPC created + const ec2Instance = new ec2.Instance(this, 'ProxyInstance', { + vpc: _vpc, + instanceType: ec2.InstanceType.of(ec2.InstanceClass.T3, ec2.InstanceSize.MICRO), + machineImage: ami, + securityGroup: _securityGroup, + vpcSubnets: { subnetType: ec2.SubnetType.PUBLIC, }, + // specify the key name for the instance for debugging purposes + // keyName: 'us-east-1', + }); + + const asset = new Asset(this, 'UserdataAsset', { path: path.join(__dirname, '../../../sample/script/ec2config.sh') }); + const localPath = ec2Instance.userData.addS3DownloadCommand({ + bucket: asset.bucket, + bucketKey: asset.s3ObjectKey, + }); + + ec2Instance.userData.addExecuteFileCommand({ + filePath: localPath, + // pass _domainEndpoint as an argument to the script + arguments: _domainEndpoint, + }); + asset.grantRead(ec2Instance.role); + + this._instanceId = ec2Instance.instanceId; + this._dnsName = ec2Instance.instancePublicDnsName; + this._publicIP = ec2Instance.instancePublicIp; + } +} \ No newline at end of file diff --git a/source/infrastructure/lib/vector-store/os-stack.ts b/source/infrastructure/lib/vector-store/os-stack.ts new file mode 100644 index 00000000..90c7761f --- /dev/null +++ b/source/infrastructure/lib/vector-store/os-stack.ts @@ -0,0 +1,47 @@ +import { NestedStack, StackProps, RemovalPolicy } from 'aws-cdk-lib'; +import { Construct } from 'constructs'; +import { EngineVersion, Domain} from 'aws-cdk-lib/aws-opensearchservice'; +import * as ec2 from 'aws-cdk-lib/aws-ec2'; +import * as iam from "aws-cdk-lib/aws-iam"; + +interface osStackProps extends StackProps { + _vpc: ec2.Vpc; + _securityGroup: ec2.SecurityGroup; +} + +export class OpenSearchStack extends NestedStack { + _domainEndpoint; + _domain; + + constructor(scope: Construct, id: string, props: osStackProps) { + super(scope, id, props); + + const devDomain = new Domain(this, 'Domain', { + version: EngineVersion.OPENSEARCH_2_5, + removalPolicy: RemovalPolicy.DESTROY, + vpc:props._vpc, + zoneAwareness: { + enabled:true + }, + securityGroups: [props._securityGroup], + capacity: { + dataNodes: 2, + }, + ebs: { + volumeSize: 300, + volumeType: ec2.EbsDeviceVolumeType.GENERAL_PURPOSE_SSD_GP3, + }, + }); + + devDomain.addAccessPolicies(new iam.PolicyStatement({ + actions: ['es:*'], + effect: iam.Effect.ALLOW, + principals:[new iam.AnyPrincipal()], + resources: [`${devDomain.domainArn}/*`], + })) + + this._domainEndpoint = devDomain.domainEndpoint; + this._domain = devDomain; + + } +} \ No newline at end of file diff --git a/package.json b/source/infrastructure/package.json similarity index 100% rename from package.json rename to source/infrastructure/package.json diff --git a/tsconfig.dev.json b/source/infrastructure/tsconfig.dev.json similarity index 100% rename from tsconfig.dev.json rename to source/infrastructure/tsconfig.dev.json diff --git a/tsconfig.json b/source/infrastructure/tsconfig.json similarity index 100% rename from tsconfig.json rename to source/infrastructure/tsconfig.json diff --git a/source/lambda/custom/index.js b/source/lambda/custom/index.js new file mode 100644 index 00000000..7bfa61b7 --- /dev/null +++ b/source/lambda/custom/index.js @@ -0,0 +1,31 @@ +const AWS = require('aws-sdk'); +const fs = require('fs'); +const tar = require('tar'); + +// obsolete for now, use script to upload model.tar.gz to s3 instead +exports.handler = async (event) => { + const s3 = new AWS.S3(); + const bucketName = process.env.BUCKET_NAME; + const key = 'model.tar.gz'; + + // Create files A and B + fs.writeFileSync('/tmp/fileA.txt', 'Content of file A'); + fs.writeFileSync('/tmp/fileB.txt', 'Content of file B'); + + // Package the files into model.tar.gz + await tar.c({ + gzip: true, + file: '/tmp/model.tar.gz', + cwd: '/tmp', + }, ['fileA.txt', 'fileB.txt']); + + // Upload model.tar.gz to the S3 bucket + const fileStream = fs.createReadStream('/tmp/model.tar.gz'); + await s3.upload({ + Bucket: bucketName, + Key: key, + Body: fileStream, + }).promise(); + + console.log(`Uploaded model.tar.gz to s3://${bucketName}/${key}`); +}; diff --git a/source/lambda/ddb/rating.py b/source/lambda/ddb/rating.py new file mode 100644 index 00000000..67e4866c --- /dev/null +++ b/source/lambda/ddb/rating.py @@ -0,0 +1,63 @@ +import datetime +import json +import boto3 +import os +import uuid + +def lambda_handler(event, context): + dynamodb = boto3.resource('dynamodb') + table_name = os.getenv('TABLE_NAME') + session_table = dynamodb.Table(table_name) + + http_method = event['httpMethod'] + + try: + if http_method == 'POST': + return post_handler(event, session_table) + else: + return { + 'statusCode': 400, + 'body': json.dumps({ + 'message': 'Invalid request method' + }) + } + except Exception as e: + # Return an error response + return { + 'statusCode': 500, + 'body': json.dumps({'error': str(e)}) + } + + +def post_handler(event, session_table): + body = event['body'] + session_id = str(uuid.uuid1()) + required_fields = ['question_content', 'question_answer', 'answer_rating'] + + if not all(field in body for field in required_fields): + return { + 'statusCode': 400, + 'body': json.dumps({ + 'message': 'Missing required fields' + }) + } + session_creation_date = datetime.datetime.now().strftime("%m/%d/%y,%H:%M:%S") + + # inserting values into table + response = session_table.put_item( + Item={ + "session_id":session_id, + "question_content":body['question_content'], + "question_answer":body['question_answer'], + "revised_answer":body['revised_answer'] if 'revised_answer' in body else None, + "answer_rating":body['answer_rating'], + + } + ) + + return { + 'statusCode': 200, + 'body': json.dumps({ + 'message': 'Data inserted successfully' + }) + } \ No newline at end of file diff --git a/source/lambda/embedding/Dockerfile b/source/lambda/embedding/Dockerfile new file mode 100644 index 00000000..961f6054 --- /dev/null +++ b/source/lambda/embedding/Dockerfile @@ -0,0 +1,9 @@ +FROM --platform=linux/amd64 public.ecr.aws/lambda/python:3.9 + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" + +COPY main.py ${LAMBDA_TASK_ROOT} +COPY ./utils/* ${LAMBDA_TASK_ROOT} + +CMD [ "main.lambda_handler" ] diff --git a/source/lambda/embedding/main.py b/source/lambda/embedding/main.py new file mode 100644 index 00000000..300a9a63 --- /dev/null +++ b/source/lambda/embedding/main.py @@ -0,0 +1,228 @@ +import os +import time +import json +import logging +import numpy as np +import boto3, json +import tempfile +import nltk + +from langchain.document_loaders import S3DirectoryLoader +from langchain.vectorstores import OpenSearchVectorSearch +from langchain.document_loaders.unstructured import UnstructuredFileLoader +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain.schema.document import Document + +from sm_utils import create_sagemaker_embeddings_from_js_model +from requests_aws4auth import AWS4Auth +from aos_utils import OpenSearchClient + +from opensearchpy import OpenSearch, RequestsHttpConnection + +# global constants +MAX_FILE_SIZE = 1024*1024*100 # 100MB +MAX_OS_DOCS_PER_PUT = 20 +CHUNK_SIZE_FOR_DOC_SPLIT = 600 +CHUNK_OVERLAP_FOR_DOC_SPLIT = 20 + +logger = logging.getLogger() +# logging.basicConfig(format='%(asctime)s,%(module)s,%(processName)s,%(levelname)s,%(message)s', level=logging.INFO, stream=sys.stderr) +logger.setLevel(logging.INFO) + +# fetch all the environment variables +_document_bucket = os.environ.get('document_bucket') +_embeddings_model_endpoint_name = os.environ.get('embedding_endpoint') +_opensearch_cluster_domain = os.environ.get('opensearch_cluster_domain') + +s3 = boto3.resource('s3') +aws_region = boto3.Session().region_name +document_bucket = s3.Bucket(_document_bucket) +credentials = boto3.Session().get_credentials() +awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, aws_region, 'es', session_token=credentials.token) + +def load_documents(prefix=""): + docs = [] + for obj in document_bucket.objects.filter(Prefix=prefix): + if obj.key.endswith("/"): # bypass the prefix directory + continue + else: + # loader = S3FileLoader(bucket, obj.key) + with tempfile.TemporaryDirectory(dir='/tmp') as temp_dir: + file_path = f"{temp_dir}/{obj.key}" + logging.info(f"_document_bucket={_document_bucket}, obj.key={obj.key}, file_path={file_path}") + os.makedirs(os.path.dirname(file_path), exist_ok=True) + s3.meta.client.download_file(_document_bucket, obj.key, file_path) + + loader = UnstructuredFileLoader(file_path) + # return loader.load() + docs.extend(loader.load()) + return docs + +def split_documents(docs): + text_splitter = RecursiveCharacterTextSplitter( + # Set a really small chunk size, just to show. + chunk_size = CHUNK_SIZE_FOR_DOC_SPLIT, + chunk_overlap = CHUNK_OVERLAP_FOR_DOC_SPLIT, + length_function = len, + ) + + # add a custom metadata field, timestamp and embeddings_model + for doc in docs: + doc.metadata['timestamp'] = time.time() + doc.metadata['embeddings_model'] = _embeddings_model_endpoint_name + chunks = text_splitter.create_documents([doc.page_content for doc in docs], metadatas=[doc.metadata for doc in docs]) + return chunks + +def load_processed_documents(prefix=""): + chunks = [] + for obj in document_bucket.objects.filter(Prefix=prefix): + if obj.key.endswith("/"): # bypass the prefix directory + continue + else: + # loader = S3FileLoader(bucket, obj.key) + with tempfile.TemporaryDirectory(dir='/tmp') as temp_dir: + file_path = f"{temp_dir}/{obj.key}" + logging.info(f"_document_bucket={_document_bucket}, obj.key={obj.key}, file_path={file_path}") + os.makedirs(os.path.dirname(file_path), exist_ok=True) + s3.meta.client.download_file(_document_bucket, obj.key, file_path) + + file_content = json.load(open(file_path, 'r')) + for raw_chunk in file_content: + chunk_source = raw_chunk.get('source') if isinstance(raw_chunk.get('source'), str) else "CSDC & DGR Data 20230830" + chunk = Document(page_content=raw_chunk['content'], metadata={"source": chunk_source}) + chunks.append(chunk) + + for chunk in chunks: + chunk.metadata['timestamp'] = time.time() + chunk.metadata['embeddings_model'] = _embeddings_model_endpoint_name + + return chunks + +def process_shard(shard, embeddings_model_endpoint_name, aws_region, os_index_name, os_domain_ep, os_http_auth) -> int: + logger.info(f'Starting process_shard with content: {shard}') + st = time.time() + embeddings = create_sagemaker_embeddings_from_js_model(embeddings_model_endpoint_name, aws_region) + docsearch = OpenSearchVectorSearch( + index_name=os_index_name, + embedding_function=embeddings, + opensearch_url="https://{}".format(os_domain_ep), + http_auth = os_http_auth, + use_ssl = True, + verify_certs = True, + connection_class = RequestsHttpConnection + ) + docsearch.add_documents(documents=shard) + et = time.time() - st + logger.info(f'Shard completed in {et} seconds.') + return 0 + +def lambda_handler(event, context): + request_timestamp = time.time() + logger.info(f'request_timestamp :{request_timestamp}') + logger.info(f"event:{event}") + logger.info(f"context:{context}") + + # parse arguments from event + index_name = json.loads(event['body'])['aos_index'] + operation = json.loads(event['body'])['operation'] + body = json.loads(event['body'])['body'] + aos_client = OpenSearchClient(_opensearch_cluster_domain) + # re-route GET request to seperate processing branch + if event['httpMethod'] == 'GET': + if operation == 'query': + response = aos_client.query(index_name, json.dumps(body)) + elif operation == 'match_all': + response = aos_client.match_all(index_name) + else: + raise Exception(f'Invalid query operation: {operation}') + return { + 'statusCode': 200, + 'headers': {'Content-Type': 'application/json'}, + 'body': json.dumps(response) + } + elif event['httpMethod'] == 'POST': + if operation == 'delete': + response = aos_client.delete_index(index_name) + elif operation == 'create': + logger.info(f'create index with query: {json.dumps(body)}') + response = aos_client.create_index(index_name, json.dumps(body)) + else: + raise Exception(f'Invalid query operation: {operation}') + return { + 'statusCode': 200, + 'headers': {'Content-Type': 'application/json'}, + 'body': json.dumps(response) + } + + # parse arguments from event + prefix = json.loads(event['body'])['document_prefix'] + file_processed = json.loads(event['body']).get('file_processed', False) + + # Set the NLTK data path to the /tmp directory (writable in AWS Lambda) + nltk.data.path.append("/tmp") + # List of NLTK packages to download + nltk_packages = ['punkt', 'averaged_perceptron_tagger'] + # Download the required NLTK packages to /tmp + for package in nltk_packages: + nltk.download(package, download_dir='/tmp') + + aos_client = OpenSearch( + hosts = [{'host': _opensearch_cluster_domain.replace("https://", ""), 'port': 443}], + http_auth = awsauth, + use_ssl = True, + verify_certs = True, + connection_class = RequestsHttpConnection, + region=aws_region + ) + + # iterate all files within specific s3 prefix in bucket llm-bot-documents and print out file number and total size + total_size = 0 + total_files = 0 + for obj in document_bucket.objects.filter(Prefix=prefix): + total_files += 1 + total_size += obj.size + logger.info(f'total_files:{total_files}, total_size:{total_size}') + # raise error and return if the total size is larger than 100MB + if total_size > MAX_FILE_SIZE: + raise Exception(f'total_size:{total_size} is larger than {MAX_FILE_SIZE}') + + # split all docs into chunks + st = time.time() + logger.info('Loading documents ...') + if file_processed: + chunks = load_processed_documents(prefix=prefix) + else: + docs = load_documents(prefix=prefix) + chunks = split_documents(docs) + + et = time.time() - st + # [Document(page_content = 'xx', metadata = { 'source': '/tmp/xx/xx.pdf', 'timestamp': 123.456, 'embeddings_model': 'embedding-endpoint'})], + logger.info(f'Time taken: {et} seconds. {len(chunks)} chunks generated') + + st = time.time() + db_shards = (len(chunks) // MAX_OS_DOCS_PER_PUT) + 1 + shards = np.array_split(chunks, db_shards) + logger.info(f'Loading chunks into vector store ... using {db_shards} shards, shards content: {shards}') + + # TBD, create index if not exists instead of using API in AOS console manually + # Reply: Langchain has already implemented the code to create index if not exists + # Refer Link: https://github.com/langchain-ai/langchain/blob/eb3d1fa93caa26d497e5b5bdf6134d266f6a6990/libs/langchain/langchain/vectorstores/opensearch_vector_search.py#L120 + exists = aos_client.indices.exists(index_name) + logger.info(f"index_name={index_name}, exists={exists}") + + # shard_start_index = 1 + for shard_id, shard in enumerate(shards): + process_shard(shards[shard_id].tolist(), _embeddings_model_endpoint_name, aws_region, index_name, _opensearch_cluster_domain, awsauth) + + et = time.time() - st + logger.info(f'Time taken: {et} seconds. all shards processed') + + return { + 'statusCode': 200, + 'headers': {'Content-Type': 'application/json'}, + 'body': json.dumps({ + "created": request_timestamp, + "model": _embeddings_model_endpoint_name, + }) + } + diff --git a/source/lambda/embedding/requirements.txt b/source/lambda/embedding/requirements.txt new file mode 100644 index 00000000..e6914666 --- /dev/null +++ b/source/lambda/embedding/requirements.txt @@ -0,0 +1,9 @@ +langchain==0.0.312 +opensearch-py==2.2.0 +faiss_cpu==1.7.4 +sagemaker==2.48.0 +numpy==1.22.0 +# nltk==3.8.1 +requests-aws4auth==1.2.3 +unstructured==0.10.5 +unstructured[pdf] diff --git a/source/lambda/embedding/utils/aos_utils.py b/source/lambda/embedding/utils/aos_utils.py new file mode 100644 index 00000000..7d1cb88e --- /dev/null +++ b/source/lambda/embedding/utils/aos_utils.py @@ -0,0 +1,146 @@ +import boto3 +import json +from typing import List + +from requests_aws4auth import AWS4Auth +from opensearchpy import OpenSearch, RequestsHttpConnection + +import logging +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +credentials = boto3.Session().get_credentials() +region = boto3.Session().region_name +awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, 'es', session_token=credentials.token) + +class OpenSearchClient: + def __init__(self, _opensearch_cluster_domain: str): + """ + Initialize OpenSearch client using OpenSearch Endpoint + """ + self.client = OpenSearch( + hosts = [{'host': _opensearch_cluster_domain.replace("https://", ""), 'port': 443}], + http_auth = awsauth, + use_ssl = True, + verify_certs = True, + connection_class = RequestsHttpConnection, + region=region + ) + def create_index(self, index: str, body: str): + """ + Create an index in OpenSearch. + + Args: + index (str): The name of the index to create. + body (dict): A dictionary containing the settings and mappings for the index. + """ + body_dict = json.loads(body) + # Extract the settings and mappings from the body + settings = body_dict.get('body', {}).get('settings', {}) + mappings = body_dict.get('body', {}).get('mappings', {}) + + # Create the index with the specified settings and mappings + self.client.indices.create( + index=index, + body={ + 'settings': settings, + 'mappings': mappings + } + ) + + def delete_index(self, index: str): + """ + Delete an index in OpenSearch. + """ + # avoid NotFoundError: NotFoundError(404, 'index_not_found_exception'... + if not self.client.indices.exists(index=index): + # hint to the caller that the index does not exist + return { + 'statusCode': 404, + 'headers': {'Content-Type': 'application/json'}, + 'body': json.dumps({'error': f'index {index} does not exist'}) + } + # delete the index + self.client.indices.delete(index=index) + def delete_document(self, index: str, document_id: str): + """ + Delete a document in a specific index. + """ + # delete the document + self.client.delete(index=index, id=document_id) + def bulk(self, index: str, document: List[str]): + """ + Bulk index documents in a specific index. + """ + # bulk index the documents + self.client.bulk(index=index, body=document) + def index(self, index: str, document: List[str]): + """ + Index a document in a specific index. + """ + # iterate through the documents and index them + for doc in document: + try: + response = self.client.index(index=index, body=doc) + logger.info(f"response: {response}") + except Exception as e: + logger.error(f"Error indexing document: {e}") + def query(self, index: str, field: str, value: str): + """ + Execute a query on a specific index based on a field and value. + """ + body = { + "query": { + "match": { + field: value + } + } + } + response = self.client.search(index=index, body=body) + return response + def match_all(self, index: str): + """ + Execute a match_all query on a specific index. + """ + # avoid NotFoundError: NotFoundError(404, 'index_not_found_exception'... + if not self.client.indices.exists(index=index): + # hint to the caller that the index does not exist + return { + 'statusCode': 404, + 'headers': {'Content-Type': 'application/json'}, + 'body': json.dumps({'error': f'index {index} does not exist'}) + } + body = { + "query": { + "match_all": {} + } + } + response = self.client.search(index=index, body=body) + return response + def search_with_metadata(self, index: str, query: str, filter: str): + """ + Execute a search query using the query DSL, using bool query to filter on metadata. + """ + # avoid NotFoundError: NotFoundError(404, 'index_not_found_exception'... + if not self.client.indices.exists(index=index): + # hint to the caller that the index does not exist + return { + 'statusCode': 404, + 'headers': {'Content-Type': 'application/json'}, + 'body': json.dumps({'error': f'index {index} does not exist'}) + } + body = { + "query": { + "bool": { + "must": [ + {"match": {"content": query}}, + ], + # looking for documents where the metadata field exactly matches the value of filter + "filter": [ + {"term": {"metadata": filter}} + ] + } + } + } + response = self.client.search(index=index, body=body) + return response \ No newline at end of file diff --git a/source/lambda/embedding/utils/sm_utils.py b/source/lambda/embedding/utils/sm_utils.py new file mode 100644 index 00000000..76675faa --- /dev/null +++ b/source/lambda/embedding/utils/sm_utils.py @@ -0,0 +1,73 @@ +""" +Helper functions for using Samgemaker Endpoint via langchain +""" +import sys +import time +import json +import logging +from typing import List +from langchain.embeddings import SagemakerEndpointEmbeddings +from langchain.embeddings.sagemaker_endpoint import EmbeddingsContentHandler + +logger = logging.getLogger() +# logging.basicConfig(format='%(asctime)s,%(module)s,%(processName)s,%(levelname)s,%(message)s', level=logging.INFO, stream=sys.stderr) +logger.setLevel(logging.INFO) + +# extend the SagemakerEndpointEmbeddings class from langchain to provide a custom embedding function +class SagemakerEndpointEmbeddingsJumpStart(SagemakerEndpointEmbeddings): + def embed_documents( + self, texts: List[str], chunk_size: int = 500 + ) -> List[List[float]]: + """Compute doc embeddings using a SageMaker Inference Endpoint. + + Args: + texts: The list of texts to embed. + chunk_size: The chunk size defines how many input texts will + be grouped together as request. If None, will use the + chunk size specified by the class. + + Returns: + List of embeddings, one for each text. + """ + results = [] + _chunk_size = len(texts) if chunk_size > len(texts) else chunk_size + st = time.time() + for i in range(0, len(texts), _chunk_size): + response = self._embedding_func(texts[i:i + _chunk_size]) + results.extend(response) + time_taken = time.time() - st + logger.info(f"got results for {len(texts)} in {time_taken}s, length of embeddings list is {len(results)}") + return results + + +# class for serializing/deserializing requests/responses to/from the embeddings model +class ContentHandler(EmbeddingsContentHandler): + content_type = "application/json" + accepts = "application/json" + + def transform_input(self, prompt, model_kwargs={}) -> bytes: + # add bge_prompt to each element in prompt + new_prompt = ["为这个句子生成表示以用于检索相关文章:" + p for p in prompt] + input_str = json.dumps({"inputs": new_prompt, **model_kwargs}) + return input_str.encode('utf-8') + + def transform_output(self, output: bytes) -> str: + response_json = json.loads(output.read().decode("utf-8")) + embeddings = response_json["sentence_embeddings"] + if len(embeddings) == 1: + return [embeddings[0]] + return embeddings + +def create_sagemaker_embeddings_from_js_model(embeddings_model_endpoint_name: str, aws_region: str) -> SagemakerEndpointEmbeddingsJumpStart: + # all set to create the objects for the ContentHandler and + # SagemakerEndpointEmbeddingsJumpStart classes + content_handler = ContentHandler() + logger.info(f'content_handler: {content_handler}, embeddings_model_endpoint_name: {embeddings_model_endpoint_name}, aws_region: {aws_region}') + # note the name of the LLM Sagemaker endpoint, this is the model that we would + # be using for generating the embeddings + embeddings = SagemakerEndpointEmbeddingsJumpStart( + endpoint_name = embeddings_model_endpoint_name, + region_name = aws_region, + content_handler = content_handler + ) + return embeddings \ No newline at end of file diff --git a/source/lambda/etl/Dockerfile b/source/lambda/etl/Dockerfile new file mode 100644 index 00000000..73e6adf5 --- /dev/null +++ b/source/lambda/etl/Dockerfile @@ -0,0 +1,8 @@ +FROM --platform=linux/amd64 public.ecr.aws/lambda/python:3.9 + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" + +COPY main.py ${LAMBDA_TASK_ROOT} + +CMD [ "main.lambda_handler" ] diff --git a/source/lambda/etl/main.py b/source/lambda/etl/main.py new file mode 100644 index 00000000..d1a13624 --- /dev/null +++ b/source/lambda/etl/main.py @@ -0,0 +1,43 @@ +import json +import boto3 +import logging + +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +s3_client = boto3.client('s3') + +# Offline lambda function to count the number of files in the S3 bucket +def lambda_handler(event, context): + logger.info(f"event:{event}") + # Retrieve bucket name and prefix from the event object passed by Step Function + bucket_name = event['s3Bucket'] + prefix = event['s3Prefix'] + + # Initialize the file count + file_count = 0 + + # Paginate through the list of objects in the bucket with the specified prefix + paginator = s3_client.get_paginator('list_objects_v2') + page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=prefix) + + # Count the files, note skip the prefix with slash, which is the folder name + for page in page_iterator: + for obj in page.get('Contents', []): + if obj['Key'].endswith('/'): + continue + file_count += 1 + + # convert the fileCount into an array of numbers "fileIndices": [0, 1, 2, ..., 10], an array from 0 to fileCount-1 + batch_indices = list(range(file_count)) + + # This response should match the expected input schema of the downstream tasks in the Step Functions workflow + return { + 'fileCount': file_count, + 's3Bucket': bucket_name, + 's3Prefix': prefix, + 'qaEnhance': event['qaEnhance'], + # boolean value to indicate if the lambda function is running in offline mode + 'offline': event['offline'], + 'batchIndices': batch_indices, + } diff --git a/source/lambda/etl/requirements.txt b/source/lambda/etl/requirements.txt new file mode 100644 index 00000000..e6914666 --- /dev/null +++ b/source/lambda/etl/requirements.txt @@ -0,0 +1,9 @@ +langchain==0.0.312 +opensearch-py==2.2.0 +faiss_cpu==1.7.4 +sagemaker==2.48.0 +numpy==1.22.0 +# nltk==3.8.1 +requests-aws4auth==1.2.3 +unstructured==0.10.5 +unstructured[pdf] diff --git a/source/lambda/executor/Dockerfile b/source/lambda/executor/Dockerfile new file mode 100644 index 00000000..0a7a3e53 --- /dev/null +++ b/source/lambda/executor/Dockerfile @@ -0,0 +1,9 @@ +FROM --platform=linux/amd64 public.ecr.aws/lambda/python:3.9 + +COPY requirements.txt . +RUN pip3 install -r requirements.txt --target "/var/task" + +COPY main.py /var/task +COPY ./utils/* /var/task/ + +CMD [ "main.lambda_handler" ] diff --git a/source/lambda/executor/main.py b/source/lambda/executor/main.py new file mode 100644 index 00000000..7ca89a42 --- /dev/null +++ b/source/lambda/executor/main.py @@ -0,0 +1,194 @@ +import json +import logging +import os +import boto3 +import time +from aos_utils import LLMBotOpenSearchClient +from llmbot_utils import QueryType, combine_recalls, concat_recall_knowledge, process_input_messages +from ddb_utils import get_session, update_session +from sm_utils import SagemakerEndpointVectorOrCross + +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +region = os.environ['AWS_REGION'] +embedding_endpoint = os.environ.get("embedding_endpoint", "") +cross_endpoint = os.environ.get("cross_endpoint", "") +aos_endpoint = os.environ.get("aos_endpoint", "") +aos_index = os.environ.get("aos_index", "") +llm_endpoint = os.environ.get('llm_endpoint', "") +chat_session_table = os.environ.get('chat_session_table', "") + +sm_client = boto3.client("sagemaker-runtime") +aos_client = LLMBotOpenSearchClient(aos_endpoint) + +class APIException(Exception): + def __init__(self, message, code: str = None): + if code: + super().__init__("[{}] {}".format(code, message)) + else: + super().__init__(message) + +def handle_error(func): + """Decorator for exception handling""" + + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except APIException as e: + logger.exception(e) + raise e + except Exception as e: + logger.exception(e) + raise RuntimeError( + "Unknown exception, please check Lambda log for more details" + ) + + return wrapper + +def main_entry(session_id:str, query_input:str, history:list, embedding_model_endpoint:str, cross_model_endpoint:str, + llm_model_endpoint:str, aos_index:str, enable_knowledge_qa:bool, temperature: float): + """ + Entry point for the Lambda function. + + :param session_id: The ID of the session. + :param query_input: The query input. + :param history: The history of the conversation. + :param embedding_model_endpoint: The endpoint of the embedding model. + :param cross_model_endpoint: The endpoint of the cross model. + :param llm_model_endpoint: The endpoint of the language model. + :param llm_model_name: The name of the language model. + :param aos_index: The index of the AOS engine. + :param enable_knowledge_qa: Whether to enable knowledge QA. + :param temperature: The temperature of the language model. + + return: answer(str) + """ + + if enable_knowledge_qa: + # 1. concatenate query_input and history to unified prompt + query_knowledge = ''.join([query_input] + [row[0] for row in history][::-1]) + + # 2. get AOS knn recall + start = time.time() + query_embedding = SagemakerEndpointVectorOrCross(prompt="为这个句子生成表示以用于检索相关文章:" + query_knowledge, endpoint_name=embedding_model_endpoint, region_name=region, model_type="vector", stop=None) + opensearch_knn_respose = aos_client.search(index_name=aos_index, query_type="knn", query_term=query_embedding) + logger.info(json.dumps(opensearch_knn_respose, ensure_ascii=False)) + elpase_time = time.time() - start + logger.info(f'runing time of opensearch_knn : {elpase_time}s seconds') + + # 3. get AOS invertedIndex recall + start = time.time() + opensearch_query_response = aos_client.search(index_name=aos_index, query_type="basic", query_term=query_knowledge) + logger.info(json.dumps(opensearch_query_response, ensure_ascii=False)) + elpase_time = time.time() - start + logger.info(f'runing time of opensearch_query : {elpase_time}s seconds') + + # 4. combine these two opensearch_knn_respose and opensearch_query_response + recall_knowledge = combine_recalls(opensearch_knn_respose, opensearch_query_response) + + # 5. Predict correlation score using cross model + recall_knowledge_cross = [] + for knowledge in recall_knowledge: + # get score using cross model + score = float(SagemakerEndpointVectorOrCross(prompt=query_knowledge, endpoint_name=cross_model_endpoint, region_name=region, model_type="cross", stop=None, context=knowledge['doc'])) + logger.info(json.dumps({'doc': knowledge['doc'], 'score': score, 'source': knowledge['source']}, ensure_ascii=False)) + if score > 0.8: + recall_knowledge_cross.append({'doc': knowledge['doc'], 'score': score, 'source': knowledge['source']}) + + recall_knowledge_cross.sort(key=lambda x: x["score"], reverse=True) + + recall_knowledge_str = concat_recall_knowledge(recall_knowledge_cross[:2]) + sources = list(set([item["source"] for item in recall_knowledge_cross[:2]])) + query_type = QueryType.KnowledgeQuery + elpase_time = time.time() - start + logger.info(f'runing time of recall knowledge : {elpase_time}s seconds') + else: + recall_knowledge_str = "" + query_type = QueryType.Conversation + + # 6. generate answer using question and recall_knowledge + parameters = {'temperature': temperature} + try: + # generate_answer + answer = SagemakerEndpointVectorOrCross(prompt=query_input, endpoint_name=llm_model_endpoint, region_name=region, model_type="answer", stop=None, history=history, parameters=parameters, context=recall_knowledge_str) + except Exception as e: + logger.info(f'Exceptions: str({e})') + answer = "" + + # 7. update_session + start = time.time() + update_session(session_id=session_id, chat_session_table=chat_session_table, + question=query_input, answer=answer, knowledge_sources=sources) + elpase_time = time.time() - start + logger.info(f'runing time of update_session : {elpase_time}s seconds') + + # 8. log results + json_obj = { + "session_id": session_id, + "query": query_input, + "recall_knowledge_cross_str": recall_knowledge_str, + "detect_query_type": str(query_type), + "history": history, + "chatbot_answer": answer, + "sources": sources, + "timestamp": int(time.time()) + } + + json_obj_str = json.dumps(json_obj, ensure_ascii=False) + logger.info(json_obj_str) + + return answer, sources + +@handle_error +def lambda_handler(event, context): + request_timestamp = time.time() + logger.info(f'request_timestamp :{request_timestamp}') + logger.info(f"event:{event}") + logger.info(f"context:{context}") + + # Get request body + event_body = json.loads(event['body']) + model = event_body['model'] + messages = event_body['messages'] + temperature = event_body['temperature'] + + history, question = process_input_messages(messages) + role = "user" + session_id = f"{role}_{int(request_timestamp)}" + knowledge_qa_flag = True if model == 'knowledge_qa' else False + + main_entry_start = time.time() + answer, sources = main_entry(session_id, question, history, embedding_endpoint, cross_endpoint, llm_endpoint, aos_index, knowledge_qa_flag, temperature) + main_entry_elpase = time.time() - main_entry_start + logger.info(f'runing time of main_entry : {main_entry_elpase}s seconds') + + llmbot_response = { + "id": session_id, + "object": "chat.completion", + "created": int(request_timestamp), + "model": model, + "usage": { + "prompt_tokens": 13, + "completion_tokens": 7, + "total_tokens": 20 + }, + "choices": [ + { + "message": { + "role": "assistant", + "content": answer, + "knowledge_sources": sources + }, + "finish_reason": "stop", + "index": 0 + } + ] + } + + # 2. return rusult + return { + 'statusCode': 200, + 'headers': {'Content-Type': 'application/json'}, + 'body': json.dumps(llmbot_response) + } diff --git a/source/lambda/executor/requirements.txt b/source/lambda/executor/requirements.txt new file mode 100644 index 00000000..3eb2077d --- /dev/null +++ b/source/lambda/executor/requirements.txt @@ -0,0 +1,7 @@ +langchain==0.0.312 +opensearch-py==2.2.0 +boto3==1.26.114 +botocore==1.29.140 +requests_aws4auth==1.2.2 +openai==0.27.6 +tiktoken==0.3.3 diff --git a/source/lambda/executor/utils/aos_utils.py b/source/lambda/executor/utils/aos_utils.py new file mode 100644 index 00000000..d0c273b9 --- /dev/null +++ b/source/lambda/executor/utils/aos_utils.py @@ -0,0 +1,158 @@ +import json +import boto3 +import requests +from requests_aws4auth import AWS4Auth +from opensearchpy import OpenSearch, RequestsHttpConnection + +credentials = boto3.Session().get_credentials() +region = boto3.Session().region_name +awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, 'es', session_token=credentials.token) + +IMPORT_OPENSEARCH_PY_ERROR = ( + "Could not import OpenSearch. Please install it with `pip install opensearch-py`." +) +def _import_not_found_error(): + """Import not found error if available, otherwise raise error.""" + try: + from opensearchpy.exceptions import NotFoundError + except ImportError: + raise ImportError(IMPORT_OPENSEARCH_PY_ERROR) + return NotFoundError + +class LLMBotOpenSearchClient: + def __init__(self, host): + """ + Initialize OpenSearch client using OpenSearch Endpoint + """ + self.client = OpenSearch( + hosts = [{'host': host.replace("https://", ""), 'port': 443}], + http_auth=awsauth, + use_ssl=True, + verify_certs=True, + connection_class=RequestsHttpConnection + ) + self.query_match = {"knn": self._build_knn_search_query, + "exact": self._build_exactly_match_query, + "basic": self._build_basic_search_query} + + def _build_basic_search_query(self, index_name, query_term, field, size): + """ + Build basic search query + + :param index_name: Target Index Name + :param query_term: query term + :param field: search field + :param size: number of results to return from aos + + :return: aos response json + """ + query = { + "size": size, + "query": { + "bool":{ + "should": [ {"match": { field : query_term }} ] + } + }, + "sort": [ + { + "_score": { + "order": "desc" + } + } + ] + } + + return query + + def _build_knn_search_query(self, index_name, query_term, field, size): + """ + Build knn search query + + :param index_name: Target Index Name + :param query_term: query term + :param field: search field + :param size: number of results to return from aos + + :return: aos response json + """ + query = { + "size": size, + "query": { + "knn": { + "vector_field": { + "vector": query_term, + "k": size + } + } + } + } + + return query + + def _build_exactly_match_query(self, index_name, query_term, field, size): + """ + Build exactly match query + + :param index_name: Target Index Name + :param query_term: query term + :param field: search field + :param size: number of results to return from aos + + :return: aos response json + """ + query = { + "query" : { + "match_phrase":{ + field : query_term + } + } + } + return query + + def organize_results(self, query_type, response, field): + """ + Organize results from aos response + + :param query_type: query type + :param response: aos response json + """ + results = [] + aos_hits = response["hits"]["hits"] + if query_type == "exact": + for aos_hit in aos_hits: + doc = aos_hit['_source'][field] + source = aos_hit['_source']['metadata']['file_path'] + score = aos_hit["_score"] + results.append({'doc': doc, 'score': score, 'source': source}) + else: + for aos_hit in aos_hits: + doc = f"{aos_hit['_source'][field]}" + source = aos_hit['_source']['metadata']['file_path'] + score = aos_hit["_score"] + results.append({'doc': doc, 'score': score, 'source': source}) + return results + + def search(self, index_name, query_type, query_term, field: str = "text", size: int = 10): + """ + Perform search on aos + + :param index_name: Target Index Name + :param query_type: query type + :param query_term: query term + :param field: search field + :param size: number of results to return from aos + + :return: aos response json + """ + not_found_error = _import_not_found_error() + try: + self.client.indices.get(index=index_name) + except not_found_error: + return [] + query = self.query_match[query_type](index_name, query_term, field, size) + response = self.client.search( + body=query, + index=index_name + ) + result = self.organize_results(query_type, response, field) + return result \ No newline at end of file diff --git a/source/lambda/executor/utils/ddb_utils.py b/source/lambda/executor/utils/ddb_utils.py new file mode 100644 index 00000000..074cbcc7 --- /dev/null +++ b/source/lambda/executor/utils/ddb_utils.py @@ -0,0 +1,124 @@ +import json +import boto3 +from datetime import date + +def get_session(session_id, chat_session_table): + + table_name = chat_session_table + dynamodb = boto3.resource('dynamodb') + + # table name + table = dynamodb.Table(table_name) + operation_result = "" + + response = table.get_item(Key={'session_id': session_id}) + + if "Item" in response.keys(): + # print("****** " + response["Item"]["content"]) + operation_result = json.loads(response["Item"]["content"]) + else: + # print("****** No result") + operation_result = "" + + return operation_result + + +# param: session_id +# question +# answer +# return: success +# failed +def update_session(session_id, chat_session_table, question, answer, knowledge_sources): + + table_name = chat_session_table + dynamodb = boto3.resource('dynamodb') + + # table name + table = dynamodb.Table(table_name) + operation_result = "" + + response = table.get_item(Key={'session_id': session_id}) + + item = { + "session_id":session_id, + "question_content":question, + "question_answer":answer, + "revised_answer":"", + "answer_rating":"", + "knowledge_sources": knowledge_sources, + } + + # inserting values into table + response = table.put_item( + Item=item + ) + + if "ResponseMetadata" in response.keys(): + if response["ResponseMetadata"]["HTTPStatusCode"] == 200: + operation_result = "success" + else: + operation_result = "failed" + else: + operation_result = "failed" + + return operation_result + +# For Wechat Miniprogram +# param: session_id +# user +# message +# timestamp +# isFirstUpdate +# return: success +# failed +def update_history(session_id, chat_session_table, user, message, timestamp, isFirstUpdate): + + table_name = chat_session_table + dynamodb = boto3.resource('dynamodb') + + # table name + table = dynamodb.Table(table_name) + operation_result = "" + + response = table.get_item(Key={'session_id': session_id}) + + if "Item" in response.keys(): + # print("****** " + response["Item"]["content"]) + chat_history = json.loads(response["Item"]["content"]) + else: + # print("****** No result") + chat_history = [] + + chat_history.append([user, message, timestamp]) + content = json.dumps(chat_history) + TodayDate = date.today() + + # inserting values into table + if isFirstUpdate: + response = table.put_item( + Item={ + 'session_id': session_id, + 'content': content, + 'sessionCreationDate': TodayDate, + 'lastUpdateDate': TodayDate + } + ) + else: + response = table.put_item( + Item={ + 'session_id': session_id, + 'content': content, + 'lastUpdateDate': TodayDate + } + ) + + + if "ResponseMetadata" in response.keys(): + if response["ResponseMetadata"]["HTTPStatusCode"] == 200: + operation_result = "success" + else: + operation_result = "failed" + else: + operation_result = "failed" + + return operation_result \ No newline at end of file diff --git a/source/lambda/executor/utils/llmbot_utils.py b/source/lambda/executor/utils/llmbot_utils.py new file mode 100644 index 00000000..cca7a32a --- /dev/null +++ b/source/lambda/executor/utils/llmbot_utils.py @@ -0,0 +1,132 @@ +from enum import Enum + +QA_SEP = "=>" +AWS_Free_Chat_Prompt = """你是云服务AWS的智能客服机器人{B},能够回答{A}的各种问题以及陪{A}聊天,如:{chat_history}\n\n{A}: {question}\n{B}: """ +AWS_Knowledge_QA_Prompt = """你是云服务AWS的智能客服机器人{B},请严格根据反括号中的资料提取相关信息\n```\n{fewshot}\n```\n回答{A}的各种问题,比如:\n\n{A}: {question}\n{B}: """ +Fewshot_prefix_Q="问题" +Fewshot_prefix_A="回答" + +class QueryType(Enum): + KeywordQuery = "KeywordQuery" #用户仅仅输入了一些关键词(2 token) + KnowledgeQuery = "KnowledgeQuery" #用户输入的需要参考知识库有关来回答 + Conversation = "Conversation" #用户输入的是跟知识库无关的问题 + +def combine_recalls(opensearch_knn_respose, opensearch_query_response): + ''' + filter knn_result if the result don't appear in filter_inverted_result + ''' + knn_threshold = 0.2 + inverted_theshold = 5.0 + filter_knn_result = { item["doc"] : [item['source'], item["score"]] for item in opensearch_knn_respose if item["score"]> knn_threshold } + filter_inverted_result = { item["doc"] : [item['source'], item["score"]] for item in opensearch_query_response if item["score"]> inverted_theshold } + + combine_result = [] + for doc, doc_info in filter_knn_result.items(): + if doc in filter_inverted_result.keys(): + combine_result.append({ "doc" : doc, "score" : doc_info[1], "source" : doc_info[0] }) + + return combine_result + +def concat_recall_knowledge(recall_knowledge_list): + """ + Concat recall knowledge result from OpenSearch into a single string. + """ + return "\n".join([item["doc"] for item in recall_knowledge_list]) + +def process_input_messages(messages): + # 1. If two messages are from the same role, we need to merge them into one message. + # Make sure new_messages follows this order: [user, assistant, user, assistant, ...] + new_messages = [] + previous_role = None + for message in messages: + if message['role'] not in ("user", "assistant"): + continue + + if len(new_messages) == 0: + if message['role'] == 'user': + new_messages.append(message['content']) + else: + if message['role'] == previous_role: + new_messages[-1] += f"\n{message['content']}" + else: + new_messages.append(message['content']) + previous_role = message['role'] + + # 2. Generate history and question + if len(new_messages) % 2 == 0: + print("The number of messages is even, which is not expected.") + history = [[new_messages[i], new_messages[i+1]] for i in range(0, len(new_messages)-2, 2)] + question = new_messages[-2] + else: + history = [[new_messages[i], new_messages[i+1]] for i in range(0, len(new_messages)-1, 2)] + question = new_messages[-1] + + return history, question + +def build_conversation_prompt(post_text, conversations, role_a, role_b): + """ + Build conversation prompt for LLM. + In current version, we concatenate all conversation history into a single prompt. + + :param post_text: user post text + :param conversations: conversation history + :param role_a: role name, e.g. "用户" + :param role_b: role name, e.g. "AWSBot" + :return: conversation prompt string + """ + chat_history = [f"{role_a}: {item[0]}\n{role_b}: {item[1]}" for item in conversations] + chat_histories = "\n\n".join(chat_history) + chat_histories = f"\n\n{chat_histories}" if chat_histories else "" + + conversation_prompt = AWS_Free_Chat_Prompt.format(chat_history=chat_histories, question=post_text, A=role_a, B=role_b) + + return conversation_prompt + +def build_knowledge_qa_prompt(post_text, qa_recalls, role_a, role_b): + """ + build prompt using qa for LLM. + For Knowledge QA, it will merge all retrieved related document paragraphs into a single prompt + + :param post_text: user post text + :param qa_recalls: all retrieved related document paragraphs from OpenSearch + :param role_a: role name, e.g. "用户" + :param role_b: role name, e.g. "AWSBot" + """ + qa_pairs = [ obj["doc"].split(QA_SEP) for obj in qa_recalls ] + qa_fewshots = [f"{Fewshot_prefix_Q}: {pair[0]}\n{Fewshot_prefix_A}: {pair[1]}" for pair in qa_pairs] + fewshots_str = "\n\n".join(qa_fewshots[-3:]) + + knowledge_qa_prompt = AWS_Knowledge_QA_Prompt.format(fewshot=fewshots_str, question=post_text, A=role_a, B=role_b) + return knowledge_qa_prompt + +def build_final_prompt(query_input, session_history, exactly_match_result, recall_knowledge, role_a, role_b): + """ + built final prompt for generating answer for user LLM. + + :param query_input: user post text + :param session_history: conversation history from DynamoDB + :param exactly_match_result: exactly match result from OpenSearch + :param recall_knowledge: knowledge recall result from OpenSearch + :param role_a: role name, e.g. "用户" + :param role_b: role name, e.g. "AWSBot" + + :return: (answer, final_prompt, query_type) + """ + + answer = None + final_prompt = None + query_type = None + + if exactly_match_result and recall_knowledge: + query_type = QueryType.KeywordQuery + answer = exactly_match_result[0]["doc"] + final_prompt = "" + elif recall_knowledge: + query_type = QueryType.KnowledgeQuery + final_prompt = build_knowledge_qa_prompt(query_input, recall_knowledge, role_a=role_a, role_b=role_b) + else: + query_type = QueryType.Conversation + free_chat_coversions = [item for item in session_history if item[2] == QueryType.Conversation] + final_prompt = build_conversation_prompt(query_input, free_chat_coversions[-2:], role_a=role_a, role_b=role_b) + + return (answer, final_prompt, query_type) \ No newline at end of file diff --git a/source/lambda/executor/utils/sm_utils.py b/source/lambda/executor/utils/sm_utils.py new file mode 100644 index 00000000..394cc99c --- /dev/null +++ b/source/lambda/executor/utils/sm_utils.py @@ -0,0 +1,202 @@ +import json +import re +import io +from langchain.llms.sagemaker_endpoint import LLMContentHandler, SagemakerEndpoint +from langchain.embeddings import SagemakerEndpointEmbeddings +from langchain.embeddings.sagemaker_endpoint import EmbeddingsContentHandler +from langchain.callbacks.manager import CallbackManagerForLLMRun +from langchain.llms.utils import enforce_stop_tokens +from typing import Dict, List, Optional, Any + +import logging +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +class vectorContentHandler(EmbeddingsContentHandler): + content_type = "application/json" + accepts = "application/json" + + def transform_input(self, inputs: List[str], model_kwargs: Dict) -> bytes: + input_str = json.dumps({"inputs": inputs, **model_kwargs}) + return input_str.encode("utf-8") + + def transform_output(self, output: bytes) -> List[List[float]]: + response_json = json.loads(output.read().decode("utf-8")) + return response_json["sentence_embeddings"] + +class crossContentHandler(LLMContentHandler): + content_type = "application/json" + accepts = "application/json" + + def transform_input(self, prompt: str, model_kwargs: Dict) -> bytes: + input_str = json.dumps({"inputs": prompt, "docs":model_kwargs["context"]}) + return input_str.encode('utf-8') + + def transform_output(self, output: bytes) -> str: + response_json = json.loads(output.read().decode("utf-8")) + return response_json['scores'][0][1] + +class answerContentHandler(LLMContentHandler): + content_type = "application/json" + accepts = "application/json" + + def transform_input(self, question: str, model_kwargs: Dict) -> bytes: + + template_1 = '以下context xml tag内的文本内容为背景知识:\n\n{context}\n\n请根据背景知识, 回答这个问题:{question}' + context = model_kwargs["context"] + + if len(context) == 0: + prompt = question + else: + prompt = template_1.format(context = model_kwargs["context"], question = question) + + input_str = json.dumps({"inputs": prompt, + "history": model_kwargs["history"], + "parameters": model_kwargs["parameters"]}) + return input_str.encode('utf-8') + + def transform_output(self, output: bytes) -> str: + response_json = json.loads(output.read().decode("utf-8")) + return response_json['outputs'] + +class LineIterator: + """ + A helper class for parsing the byte stream input. + + The output of the model will be in the following format: + ``` + b'{"outputs": [" a"]}\n' + b'{"outputs": [" challenging"]}\n' + b'{"outputs": [" problem"]}\n' + ... + ``` + + While usually each PayloadPart event from the event stream will contain a byte array + with a full json, this is not guaranteed and some of the json objects may be split across + PayloadPart events. For example: + ``` + {'PayloadPart': {'Bytes': b'{"outputs": '}} + {'PayloadPart': {'Bytes': b'[" problem"]}\n'}} + ``` + + This class accounts for this by concatenating bytes written via the 'write' function + and then exposing a method which will return lines (ending with a '\n' character) within + the buffer via the 'scan_lines' function. It maintains the position of the last read + position to ensure that previous bytes are not exposed again. + """ + + def __init__(self, stream): + self.byte_iterator = iter(stream) + self.buffer = io.BytesIO() + self.read_pos = 0 + + def __iter__(self): + return self + + def __next__(self): + while True: + self.buffer.seek(self.read_pos) + line = self.buffer.readline() + if line and line[-1] == ord('\n'): + self.read_pos += len(line) + return line[:-1] + try: + chunk = next(self.byte_iterator) + except StopIteration: + if self.read_pos < self.buffer.getbuffer().nbytes: + continue + raise + if 'PayloadPart' not in chunk: + print('Unknown event type:' + chunk) + continue + self.buffer.seek(0, io.SEEK_END) + self.buffer.write(chunk['PayloadPart']['Bytes']) + +class SagemakerEndpointStreaming(SagemakerEndpoint): + # override the _call function to support streaming function with invoke_endpoint_with_response_stream + def _call( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> str: + """Call out to Sagemaker inference endpoint. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The string generated by the model. + + Example: + .. code-block:: python + + response = se("Tell me a joke.") + """ + _model_kwargs = self.model_kwargs or {} + _model_kwargs = {**_model_kwargs, **kwargs} + _endpoint_kwargs = self.endpoint_kwargs or {} + + body = self.content_handler.transform_input(prompt, _model_kwargs) + # the content type should be application/json if we are using LMI container + content_type = self.content_handler.content_type + accepts = self.content_handler.accepts + + # send request + try: + response = self.client.invoke_endpoint_with_response_stream( + EndpointName=self.endpoint_name, + Body=body, + ContentType=content_type, + Accept=accepts, + **_endpoint_kwargs, + ) + except Exception as e: + raise ValueError(f"Error raised by inference endpoint: {e}") + + # transform_output is not used here because the response is a stream + for line in LineIterator(response['Body']): + resp = json.loads(line) + logging.info(resp.get("outputs")[0], end='') + + # enforce stop tokens if they are provided + if stop is not None: + # This is a bit hacky, but I can't figure out a better way to enforce + # stop tokens when making calls to the sagemaker endpoint. + text = enforce_stop_tokens(text, stop) + + return resp.get("outputs")[0] + +def SagemakerEndpointVectorOrCross(prompt: str, endpoint_name: str, region_name: str, model_type: str, stop: List[str], **kwargs) -> SagemakerEndpoint: + """ + original class invocation: + response = self.client.invoke_endpoint( + EndpointName=self.endpoint_name, + Body=body, + ContentType=content_type, + Accept=accepts, + **_endpoint_kwargs, + ) + """ + if model_type == "vector": + content_handler = vectorContentHandler() + embeddings = SagemakerEndpointEmbeddings( + endpoint_name=endpoint_name, + region_name=region_name, + content_handler=content_handler, + ) + query_result = embeddings.embed_query(prompt) + return query_result + elif model_type == "cross": + content_handler = crossContentHandler() + elif model_type == "answer": + content_handler = answerContentHandler() + # TODO: replace with SagemakerEndpointStreaming + genericModel = SagemakerEndpoint( + endpoint_name = endpoint_name, + region_name = region_name, + content_handler = content_handler + ) + return genericModel(prompt=prompt, stop=stop, **kwargs) diff --git a/source/lambda/job/dep/README.md b/source/lambda/job/dep/README.md new file mode 100644 index 00000000..d54876a5 --- /dev/null +++ b/source/lambda/job/dep/README.md @@ -0,0 +1,19 @@ +# Update Dependencies once files in dep folder are updated +## Make sure you have the necessary tools installed: + +```bash +pip install setuptools wheel +``` + +## Navigate to the directory containing setup.py in your terminal. +Run the following command to create the wheel distribution: + +```bash +python setup.py develop +python setup.py bdist_wheel +``` + +## The wheel file will be located in the dist directory. +The file will have a name like llm_bot_dep-0.1.0-py3-none-any.whl, reflecting the package name, version, and other metadata. + +## Copy the wheel file to the whl folder for CDK update \ No newline at end of file diff --git a/source/lambda/job/dep/dist/llm_bot_dep-0.1.0-py3-none-any.whl b/source/lambda/job/dep/dist/llm_bot_dep-0.1.0-py3-none-any.whl new file mode 100644 index 0000000000000000000000000000000000000000..3945d1a9443b836401abe3ac482cde2b0bbc16ec GIT binary patch literal 24501 zcmZ^~W02_3)-Bq$ZQHhO+qP}nwr%dVZM%E7ZCh{Oy7j(u-+6UYm8|5)N>x@eXT}(F z4h3mo5EK9a00;mlIR*89CjdbI>+o+w|F_%N*y!6NinU@%sz_2rTEF$Vhb=P~^_=UWiK>T(#C!TamNXY3DK(21 zj)ZHPvHMdg&MHwtmAR{`*=acYB~$V?qa*!P30k7wqqiKi?6k6=`~R(@bFiNf0WAfh zJG5=FrtCe~sO8jd+QnwWYqS=ZXOFGwp)%4)x$NQMB`{n6oAo9D}5IR|Au|=wyg_{>W3WtzQhWob?S0EK9>yrBO?P z4tA<;suWqXET?E9Ht$deuHz>zxT|LHbpPYWUw+KPW5^*^FzE@?+cx5&xc}bKCXx#M zL}&{AP^X3u2S3#aL=u~&!K#RjybMKD>eStv%o=l>Cunv~7bS>ATn^%x-kcaa_aPw_ zzhQH=_M-3Jd}b7Re~QIW0NKrU&ti?-Nor=+#Em*4Jv8fK*>QK<|2t4q+?9Zh8VA$- z)QdQC)7nq5l_`-q+S0RJ#7K|fm?S$b-=sl&3R(z3EyXxf#q~ zPMdc2)6nplwwUJ%#Pz<5GOxr21UAZ^c-ixvWO#x_fjX?2C3paKOw|P-YO)>ygiY^{ zb>8ycl>_3`gO9tgRt(1tou*s`r^Ue<+dD+C)#V~j5MF%lM>58bnODqtW5j9vrD+2B zN%Ul{m_Cu3A{2I5+xOub`aL`zBUMJppn_LxHg#E|^UIkDGJ|GW5Q9K~wy(H*DD}54`eA20wP6TKb5n(3P1ezC6f^mJi zUc`d+Gqqhu6vhcgJ7*-PF;_Wx0v-;KJYQHlSVBj2e0wTzGc>f!6@NX`H-wGwC^!)F zxL+d~jQ2Z{(>hEH-ZxV&GVB`269>1cC#w07M{;>aIpd)neP3zE8no2JjVPN~P&?se zU=)AVIGZom4>^N;jW>rENAM8=)Opn;bFmxXb@d+J|Mz=LgL}|4OL}T~h>PuPC;oCX zFh?h@fJnl?5>g6 zJ#K!#Vlvt6d-@SeZe?SIdbov>g*YzK@@}12o#SgdHqjQym;~RSt{{61caiy_XMTaB zC+O1Svb(5$z!*gi0*?8$P9GPt+xL`D zH@j#z{W%5P<-7Qy{k=mxe!Bu-2Dz@IxEupR*MFkXwvT?T_Kj@ex?V#HzU$Y;yV(W~ zKh$Chka^OoV9-MLLlfaQIH-VxgA25~NDa&WHn;8R`jhiuWEHZV?E?;O`iZwae$Ig? zntXEVq;`QvLa&wJy18j)%8X!;`sZfGy|ty-Jz{v8`UB9uc>{KG#}^n)2?b2G8kIfm z-oat$W5M_V|368#RK`l3?fJ6*69fR@3hMu#WQ|Pij4f;povi;QSzk46`zBU&jhZ2)76tXAj~7s16Mc)@~2Bva=!k=UZFcH5uNUSX-G+&|oaIOjV2dDx|s zTG-N|dUUESFhMtjS`lKU_AaT_$L$IFri_=Q=J-3`ypCRr^EGWTX^XSC_BkY}6sAcs zK?NylCWBGKlTCCXsSIy;A(eGCH8w``=)Et=rs1ZMb4p^2eX zs-yr#ka2?3{$i(#buML!A+mQc!+__-h?Dz#q8H~qH3<^vC!FJ!I+NOlOz=rlKR|!% zx$^Vj&DmhLsk(~#sC>1@VamBhVO118>gfe%->%ga|HO%N`R@^WfI|7n+@pVSWXj2Z z!RH@vB56t&bG*(7i>oiTy54b*OU?qq>t(kFTo78|KZjPm3p7RZ_rM$stzW*epj>3X zetH!`nX+?7iQgwGe|0CJnqrx{0;xkHsh)tA(Xo+uwK);{Q9-4JMe z)OjcXo}Lz4ta6LP9GExOZ>ds2t=Uma;%_|NU<9PecY9$S`0lqL7rN>SF5Sb2aHxIj;kmF9vmvapXbr7i@3 ztGinJ{A0Dif`nHfqtVI?4gqA7{Z6 zks)*%sv<1*vqe?M##S=0Qx?7CK5US%O5ai?S*TM);q#Y%e{WQM$QD(1%E~(c7;1(n zC!zmqSzCnI3;4I4KLolE{a3o{YM0{7SNIC?*L;-eZAHy(>?E16D1+?$6~>%fxA z#-nRDJKYN}v&92A?6f&g^N;f8uLuD3+#SHQA31#(MlXJeq59Y^m3Rp65qr8n4s@$d z7HkJ9AGOEMUfbc=w{#^N#kVQJg$T{!yPC+m)~2oLkT-+kcf_xIpFzF`$X?TbOD+N& zowB7|Dy$#xh0D2dyVU34IOMZF<%-MKq2#mg3Lc}fVgbh9TX~qZ2uG;Oe}I$B-jJk` z)FhJ{cDv)-^xL~W{I`Jy{Qr|MlW1(z+4g6fTwnnJLZ|@%aQ+LGn%Y?y+8LYv2bpSl z+y5O!{Qcz{2?Fx!J`$Ob`wIp%8%RVMnYFBD11MMsF^$*ZE*>q;q@&bC1L1%ddreXs zyejbsv{*6&EQ$OYPv)3Y_RYkl`Gc0bp-Y$Uo`TCC1SATwE`D<}Jv%!+`(@UlOtLWx z<|y(O5K0(X?2vuOBYD6=yv-4fPb8jGES_sT9JVW-qtCZ!g?!=Zcmb}MZ^DRaqB(eK zX7;sy2)7;NFYHAZheMAy`!tj@6T)K5lYRfmy!=`|H@t1hS0tb~pLOB%fGhQTL3x?CVNv;2X>$yG2TciL zDdnqE=5B8oo-Gi1XpH29=PXP{7R_`If@9MB4h1M*4=3zC&B56q?SxQRY@TI};{c(0 zIYg9b)Ti*6?u`0j@Gm$aFd>Pjm?Yr|L=n%dXZtqCMS`lZNRq-+T@dWtohr>R)!Avp ziid=fr0L%vGd&oAsBl~+_|ML&O`%MQ>QXXj1MYcCVA@>{c8+0kvHb2o?$^&f*6A;G zzI>fupWkc0|Ayj)TMV)<2 zYu)eu9{=r?uKyg4m&5+!f_46O`@_J`2z#|0Q1i~$qdGgkm-p>wym-1kw(hr`%lBj~ zef^ccN29FC+|277zR$zK89sjuoj$L}d-%fc&i3zATQU0%mhbD#S?q4?&ey@{Ydv1~ zi&FNLZkN;JNWJ*)k51kB-jcc)`feseok^Sf^;hickLTS2deX?*_Z(DL|K>uL)lE7- zwyuwAqR(%mW%okgY&@SYt}Mj}E`F758CcNq3w`dc|8LR}&&MHll-z_fZtBk}s3jeJ zNltR9g#d9pFErmB#^ciZR)#gg+oNWDJ6*f`Nhh1RVZCgL<7vP3A{!;zrI{ca(~5>o zEhjF>*3n-BxB~~AAo`jzPHV}Rx7YQ>gUV!R`Hu3mJPkfippU?CTA|hjyDC8i5ywz)c*T#S8Jv-LDx^0eQpRG@;OoVu0#YtXLa zRw?(1G!Mxbiy7JFhUJ<^_l>EX++9?~#&+Q)J1$;>0B)%r9qo*&QtH9S6X90oE6)nx zsbLoexYyjKKs7KN>;ZMo!C^06#^N*~D1#5*PQPOVl$y~p_~;%+u!SR8CTXRYm&d*R z+nPMoh9YWb!bDLi7$m2nX^BEfhF8rAE^QM&DX@zikUTJ736b8ON2n7ZW|KWIL|nu7 zwUTO1VMewh7wGO#Vg`vZ4zgiCMEIViGQ-+3(1$=Rpq`UWEC3QiqJ3uo6a>nw=5oT$ zI)#)RhG3CpDq}8$<*EP*o91?BeyzZv#`YbjH+aGS=xSm2pKtT8eoYa_J6c;y;t&a1FxQZio(Sr5p$J*a5{-N2yC|16VX?}04BS7f`Y^_ zvv9dWd}Dv4vNI4`jC=oBeSLJCb(~G$Se19=>Q-l{xwVZ4H~HfQ004MN@RxaYup0N#mJUh`Oe|HI*ke8#D?)EkImd@by? ziaNp^j-MXg06Y{o69<1Z9%#H6d_8VBo@Gc-?J}T?yx&iUq)k;}Lt$?uY>8AjXNo@~ z?<$qfmAk{EwVC0Cm#p{DKMic!?DS{lT4b0hi=oGJDV^#(buZl`z(tOB_Qve+!Eq0H=-is;z;ppm&?; z>>TZikwupb&^5G>g!VHK5WEL~%byNNz;Dd(JX3#qhzuU4x<^2lQD(Sk>u#yiscUj& zijY{ShPpj_IuR`6_Ub~cv9CXx0ZCn*U!8FcMa2HZ)?gYrTtT9UiJASuDHLqg3v37Xu%7w6^{J`v#3-yH}QA0Cp75-sk*~+}m7%``c(j)=3$Mmv7HcR%NQGg$E zmn-;c1PWYLLPNtW&IL@tEQXdEXPMYyP&-xme(U3+n2^Y%bkB0;6@&8jtVkt5W|pol zp&Js#Y5MIY=dkXIVY?!gX$A*(aW8&EEKZRB>yu_MMUF-ktOPhA-hnS>mdOUGI#~8` zWa`R!c#g3Nt9(}@Oa-Y)=OGYf!hTsVU+3?N!oWK@Wu)CecSZCjiChWjBot&uB~fH- zx`Ybw#!wZJBTtl{l5wy%P1Lkgh1ZcyUNai_4qw_+r&V2|SrH$X-44&05fBGw1tC&+2x2LvQ0 z6M-+CtLTH6kzz>1-EJ0Yg1S*{;Q7j-X;6mEpg5j54iM89kzNG4)y5CWutW>4AqkTf z^6<)3_o(m*LNJ!w=~ah$i7scCmwj=n@6xahO+cIU;dqFOoQ4afV|GUIv&Ghu>g4xS z)7m;bSRoqA*rv!9E#nKM^Dzos_>qJ-_6Xo{AnWE!>olz%(D=}nHEIhd83`HKR@Sh2 znSm-@+GdY8%-Dn7Ig2L*JpST^e7xAa(OljBiLq97MxWgAaH!-}LOH~J21^JPD*!px z1B41nGO9o`&mMvxRXD(GkcPrZ<@!!$wf6~qLTX>NI_K@{*{$akH^Qnxn@(XMR>UhA zjxu$bD*@!xbZC?D^nMuu9!HOe#kB}Gipj-I=KGZ%i{e=bT`;O4vGtA?^au@ za$6c-8UsoBw7ez7tF@qxy@4bnUz(sFF12T=b9*JK?kysB`VoPE*EpF;A!@HyGD|M= zfJ|EmM#MVM5M=%6(YVp-32lTmIdEjul-BdK+$x?!WNgBAGovN5&CQmjM=2CKE%HGJ zR}MXUC}ViuBv)rg=qM7;tl=D1pzeBtdcj1waE&_d5Qx3bG?uHBc30HX5meUhCFQ+n zD1mAh^}Eb3mfOy%IagGXY@!1P>@__fYt-x4AI%Foe#uz>I3>sHn?5PXhoBjicS6k3 zcuv7X@s4#93={3`3v@bH%;R}^9o6^+zG@)o24^_w^XIlDRkCVFeYZCi?%d;F18gJSzBf07 zrB!0s!{eSdK?MUlVMsDR} zl{?c+_dumJZ<~WvT$3(qY75*fD^1}>$z=7Oz~{Ry4|0*CRxN>pLkDcz;DYF4 zO~`8}cg{N%J}W}(73@B9r{eMXMrgprY(Be_dE2S|&`K?7sU?;;Aexl4sQE73=(iD6 zS)v(tK!ErSG4h~&D177wt5F)x0Pa>DG7%W;2eC;=hCW8`*J}DkfPcN%OX!VJNo&jT=R;78Sjr&cu}4TX_PN85jDdx4CDHFMc5|P;6ZEM z_Y|j+1%CnlViH|spT%)q;=w+{gCW`%v?N5|f>&&y1a5nt2X&`T$^myDjdn1IWyiMD z$PLAsRBM{_N#fVi|Og&JZe}?%xQ;1|$ zX?3u~(xi-74LcAc45hxguAK4|?HLZR{Zv>-j6Ka?k!~s_MZfA2-Yp)=f+g=z`{sCJ zJ)bSjq{y4C2adsDu@BjnnL_NHyRmGY6xd1izg*s5@i?UKFO8&-67jIzwb_Z1T@C~} z@wJ7Eu|hY)Pw4Q&g_YVmMig;i-r*(SA?&#I<)W#MW%Jg-DjL$P_9-Wtfwtd?wv~GW zH{WCgzS;t3QlYTjyD-(2zGq$DvLZ^7>N1@;*uA*v1x$=PO4QHloaSwMU0FNSR-&g- zGP1FS1XsnKtp(Uhm$CVyO(S1VP-Ye}eeqvj=673?pdHm~vAq=F=7U|b&Wv|5MnFAo zdFejSuBCcpf2DQn-d1F;W@xYM@kL!!3HF1-YgR7e-PE>2_w+f4LdLSjs}tpL3r9LU zja5hWir0V1EU|yQ90Y&f@p*W*@)%&uc=VE$)yA8fB5{voU#{AcbFrthqkrMVaQfXO zEVkv#LPtY##F0$ECHTt1rt>awibzv#o`w423?w0w{p>Qz@4TTd1th5Tc#!gkdjqV28x&Ck{MoYVTmCX424n#kmucS zx_@sU`mue97^TIp!DYpP$DR7H_7GB(2}hDAxC)^UjfkCX^Cxg}W2*MDs_emWiK~on zxs0kK_gVaD`A%#nDOxdbU}1`Tb~;ecL85aA(TV{V)(ttETHcnEIA(Rz44|0xeJF`_ z=YtZ0io)}}6jk@Fdp~ngicTMMJd(+gsIg+=$%xE_ys&;*28zvnZS^GfA}l*EkNiNJ4sGJvTyc?Xq0RA(ApWGNfWI*Kd$ zYVZq0)~bzIW*N$oP__@o?%U;I+DUe23&tJTW4r*ZyJ&r=iB8)WX&b+@>&Mw?|Apkv zg?pvPYS2S z>A2_K@CU$G9WZm3|F|-498>+ySJ9GADogw*)-1`2kl1PBr%E%ENjiJUDYK4(ek@bs zGANa67IY><<&a9-Bd(JnG*vBpj&ztBbAhCIjGxQ%Sj=rn^L@h87unhBN9z@ z7DW?6a<)Q;>5VR(HFTn5T%%5*OP6_kE^;xc46BU-2AW9OprUmA0;L$7y_^BZ0e|>J zr8&k2!6`QT^WUY|j%3?}60q$rzC{kXq$8S&fLSF)wZu|Hum@8;EebCBUE z?UzNj@~^xf;i%M-WCVFI>u)*pOw9qxAP%xSP~XXgIkM(?5B`%J$@FM4MlirVu#;tF&F1df-cBY;=U_GSl@Kp4#Y;CIl4Sv=@n*hqO);{LDj_4i^XE-2?*3CoK;~ zz4@K%fk~PUwnx})GWs&Q5&+`P5zAsvPZnMv{A|SBkXtHM?|mrQ1b-b}y3SO_1;&|$ zCW7iPEtU9ysX1A4fX1x#<$|G-INk0U{FP{llo(_aEpEsg%?$7)Ht$BOb4I9(GgmYL zf62=+vL$1{;1Z^ukH}n(&cyKwTS1I6QwcD-CxZC4=GRzoTHWG2I}ir?E93oS)LlEs zye}~Lg2(Sci8YZogreLRf`qXG+W(}H6`>K0ZH*TfYH*BdE7Dim(N!C8j6~aDaPgSS zMDva26WD0{c}jLMcF0xYc=ph@<(B>dZEXh8x94uuxJwCs&nd#D@FWSOFith8q&%3& zov2E({kisY0sn2{Xe(E2L`x$;JNB8%Ynwtt5c8da7%)Je)-HNfuth3Gl2 z-tANe?>b6Fz@LgQ9oPt!6F<0M1#ez|_{7(HatC2X(a7>c49XCNhC{cTj1UHl%m0Kp zC7Qbh)5nyPk6UQ}G!Z=>ccGo#=R3$-Q0XyGP>b4SG6A9$>=Sf)h8m((Q&1C}`W})r z*A|$Vu>?)X1a}dg`HfF0IGz=kd>pe}PGz~R!ErQ>9nHce%^~f%US9gMr3OngPO6D+ zFAaV}$G-nKYU_+pA3EMwsI4PwT!%0$P0#7Tda%v|l2v_3W`+5JaRGnQzi+b0ToXP` zMu^$t!y`XL9>k>0^y#})r>uW=NB`=yjue*qbU&l`fmT@yA7>6W*pj~!!V87~V<1A) z_VXn|;@|W~l7U}=a0so%k#R|Qz;R47Pt#*67;@#j>Uh8mjT2$B%$d?g&4XBPBI~&n z1E~8%;I<&Li&33ZeuLlPCLe2uvUDEJc#9Hg;;ggHUfxW?2a9b`AlEP9#hIQ+4{|7| za4F9;?cJq`)!w(-;My?w=V!nrlJq5Z`+EQbM?(RYzK==3V{aK^D1v9(nx-{63&r8|NrNdL1=eP+H|f>q8r z?)yq%CakwP1RX1YcLpOZ5PUyuasF&P7E1_f{86`cJO)r2F_k9!rE<3C_ok5>f-?87a6EzyU{ccbr*c^Wd>}#^vVp~FSlT@t5h&YyzeTf;d z05&dOEgXE*hdiTc3hc91dLsw0`zXe%+=Zb`7sBQav#rvUC5!*HCP(~ z9#=tRDMomQaui^=*N3VC@afdDwFWq^Y;f*l5<2}EhEzo4ZGac*ni)`*vAnrDl}52ant<=+m{0kGyJq0j z6{ufVSEB}lj-OldO=&&xWY?BQqx9^TB)eq|@&K;a)A3=>R#)E&SSK2Z8--J@CJ{U_ zZ?9Ww)_j6#@FP3Dc7$zm-U?H@D&L9lYRtgp25V-^u@8ibb41J{!=o4z;?eLje(7a6 z&f^_YmFydx;wkCsP4Ffo82G_Pei9DzQVbeI-aK6rZBGD}v`yi7GG zlvdyQZKACydagF3U3sl-!NLWN$%0uHlrrsYKS(I6B&?%fl(LwqrX&x7bszUMLXK?l z(B`ScraP672Km$~gSwm>BU;|@%5++#|BRer$UKAUNYQXibEYr3atf$tMI-sb31)?5 zytE6yVwMMlMZ6KOY{p4LoImS8ILfIlb0O4t5xJ?g)^m3HsgtEGnVLK8bbT|)ThIVY zE^AUKQ!synF6OUH5$DDqFIzE}g8;MF_!zUn6OSfli{FxkOStOnL>5nGxWyo|lBl>s zUWI+HhK&Aj_}-2s=3a7X%2=|b7_}ZcdOzKfJ0!D>OUysoHMnfiNz*moXn!ClVEPfs zVdsTJ<(Nw^D(&B{6AXp4k8l`9Wpb7jjPY1rtWJH<7} zT%T3is@}fIDR^grfWhP}T+Q&z2T2w%n~b|NO>>&-+REU=V{$fypD>H3x@;(tYQ#<( zZTlNam*U=U0kHIJYX=%Wo!0Z$f5_)dtywRPb)FyNY4>1?m8eRT83#-|`_atlU0Wt$ zC36ug&YZg((1Jt{C@pEQoEfW;OcF}z>a1`|$6Z0~D zCnHQeM0{gy#2~3o%6$d9iW+5Iacjwm(K6P3Dl;}Qcoq@q>CWoZ? z{1lRR%TZrwAe#67|7eP8<+fBusYb3`{l_dA{?Q8D|6&%-4mOr9F8>F&=+XG!n$eBE z;5sl3Yh341F(5oeD;o8x>rj%f6HjQOMRv0YB$4Xa<;SkOI|}Inz2_Deu&n8WNBtZx zkLHP~BBom8KogadnOms}HQDtAc1v~xzKIdpRSzO6yC|z6eo5GEUy2x7lsWQCS5SB$ zZ=Pd|)T&Mu1uYWn0TBL$4#lC%&%Dnrzrk=%q-&-ma9WPT9pH;~FB0-gw#0?hl>EUb zYrO}M(3-rgzDw5uA7ZKDt>w3^Dyk~Ej$@^$KlNP@wq79` z8LKj=iK^vLvrGxHnpx1$fLW;IQ0rpN2Cc^~T@rZPLcQx`J?Eh}4mO z5tdt=;)sNBh--9+Q(%v3wamqH0qQlS0dpH}$k+>UURlapKeM%|XxYe9$m(cvP`Pa& zS;l&e+=vaIN`c8>SXjN<-JgaJD#Ht@XH-})$x zx-fzRJZ=#MQX>#~gTjD#+(v?~+5M?OYRW>mFPW(BP(1a@VCSZ~6*2D?+(!4NMTAm= zC}3+dZtF~fM8QNxbkkTV*#VBOYSkt)(6@BsQ7W*CMMZRsycLvY^+^ETa|P~41<`CQfryO+M)V{TooYo%y&5?qgmTfsN@d%BifGW zmtSeeqEt*EO0}X`jqVDLvuQeqrobCaB~>&5VhtMvJ(&RZo_IK*Vi=*3+`=?2H((^_qUbOxIPGbas(qRY_;?!iNj&;Vys^+dEflqHMWof%l1P9 zA7z50ig}U5H0;0iU53R031l2;pq1X1{se@|(6IvHTNw!NM(A=pFS3W1f_iK(IW1Er zH6tUn$0MtEqw#Fz+oH`ci;pyfW>D85s>oSRely$Cz07ADFQFp3MXb( z2u;L%f1T=#3zEz#N8k#R1Zkqzcz2O|=(DsZWTF69r5)N0r~!2XEWTJqGg7{(!j<|j z6dU?qzcG(UP*PrfbMx3_FS-rRE{%|-V5yChjK=6-TWT3K_>0JJyzsJ(k z&-yTb!J}zax3ei0sGTOz-O`u~fi~mRR29<)L@~UaX5)LOP1lwCZbdHzyE@@jZ$Evc z@@V3u=jQ0qoSpVV1cs-SDp)vNs{%cQFYCm?f)QzKsV(pPz*RQ9K~Ds56cOB|YH zt&(`;?_U8IbcEwh{6K1drrKx5BfhilK_aTh7sZ09T3OtHT^YMqoC2SGIPUkpolTf1 z2W;&5lDOPlWCMp}t?79$hHo;qqow0|DQ))d@r#?Wec5i@t)X@GzXY`D^Vd?t|M*g{ zDBW^g>3hz?(js+owjZLks% z%{TIloTgiYsJlo;pmOb#>Wq3LhqwY+p9*ZvsR^sm@sqz&Ta#3TvAY|Qu9z!*g8Emr z->VPy(ztYxZ+zZt0a0`joPJ{c>t)0SC0e1wUk96MQl!RCQQxzGGyDt19v3{5omYeq-YCY8v_0g#Ic)^2`?aOVp2)yvtr) zuG+E|K^#=Dd*3`ZnEj`4`tML=g1mE}EkErv;~0Lp(O9wgdY0Tf&jBO5pM~5RG;S3} zqqT#zWM7p+_)64{CJjB0*@AhUT)8bElGFPh#|dk_@4V>E>jN;bbUsI)7%)qfarX;k zt}zqm4_1I56I4${eqisA^$Ey@jo_=|D+3qLF&)xxDgZzE=&G^rKK1!`!&(zrj3KDXR!>@~IT*N!foHaVnk zU&-7WkAM)|OH^XZlp9V&mn0_c3Ya5BhuhoH>ZQe}#zapa&KxuOnEI&q6NqK#LYK1aCrxPu+A+(dit*>(ndWn?sD>TOVyM33S}!y>dIXHkP;Z zrs?NL!(G0U0-hb~)gCRZGaN#;zhO_`xqQLCj~yUnDxVa^;J=Xc{zcSOCGmh@+dxju}> z6-vSW&u2J*Yo-o3A6AnB1_0ps&qej$pW*-e5{#YQ{w-pQhPVAP2g0vEe<-R7E3{<+ z%DiPMFXRHz`~aaMoXdskzzmnJelmwSw{OMPtsk?T4SBW=zC*f{How{EYNs!X#`lOkSv*{TsoeOg+iNiHQ!R!3>iod z0d$2d)jPE89;5_8-x83B!y_Sb5~FlKe1|jdBAIiXq?$yTLsn!W5(RyB;!s z#DW~9Fd;!T=}g9|&2ovNl1Do%D#=6$T{Quz%0Mb*BB7+1-l_YdTwAS`94xJ+!fCxc zc55`jHEbg1MtndEW*2WblQ8pcM}&4MX@l zlQ83_f|qafOlAxF;MDgxwrsAA6xedn0#1$a4>wbD9At9oE%X)11~Os zKoB?hf!=0EN+2#)aY7TFoIrR-`kAE_$IPJR)k~Z7bVa_?0>RHh)~>h_7UxtE9 z8xdi~2@Wi5QvuA?%tz`#%GH-uRWz|T1KwwBXeywFrwh+{UWbGz$AWs;bEY^KpsKJ5@IZf?#%9BOJxE|%ZzI^3i~b=0^+z1#ev`0Q*ae-xb%rd}|AvQk9ut%GdenbZQ{Ss6fV_Vuj=@)y)|SP#QF zLp2ZTU=t{X7x)_*dv5Kwy4<0Y93z2&^z}XEyiD4oqim z8uPJrK4B30qtP~+3M9$@nqc%@_KE}kMtVY-TvB=({0zi8kRbb8%WGX?v zaj1BoSf#WhL+5M%GikUP*3rfCkuGODzsg_>v)cd#>y}z7lJhjK zYuWT{eYCr-2epJKsiHvkf*LQkz%*hW&P6+L@n(H5?u|?4<}v1GkV95T#4w3(&78Rj zJB`+HDbHJvb9xYPFyr zYQ_gzD76+lhir=Gl9pX3*+RDeBwZe`ztSYOncQf_=CoQ^43B`VZ9NQ*VnL84`vd;x zU-rKc!H}u0pZ7mB;0^JAM+7GJ#vcF6d!{ODf5ZmU^QjI&2iAxl_u9aV0xT2gh8_(f z;RcUgKoKNR-Dnw)u)##anf$MOOHZl&GcI#E6eGbG6<;DUQMNS7SbC||>Z*dd&G5Ro zpEWVyLR>qw{yHnU4aG{3VsUvuV3M8Q*&v5qAi67X8MKwI$alE+tYfJd)cCHa0U_sq z@G*KR&c1TjSKdd=7 zX*O`bCwUU=tCk_Yl&@7?VE<-s;~T?KQG^v_$gMEL6ZU5G5@}X@0pQdUMKly?HEz05 z0{Gr!mSV-pVIW4s@Fo2%w5%~v!s7Ag5UlHPY7ty9E} zm@8u1a#eeL!*#^ZZ;AG6I)gjl)l9q+WefO`mZH2iZV{KKB1GzGlJV0Bmt`q0x}wOv z69hc;kU6jqE?_52i2G5jOyeM|4`2=RHA-p6ERD!clc9XViJnf53n$! z@1O_~`HghZ`!M`#_Nw)`O7OPj0?!_d_;B9BtZ8u?`B6^O)b2M;J3RZRVQLhA%|D-u zd=Vh|T`MHa86YNH%>IUYb+2I9vf7InfJia~Hr>r)82C<3^>ujs)qMgcs7Q-dLk73!S`R-N_e__|BJ z!`*cH>L$B%`sg}44}*NVFrZEOPG05}qdyrW&~%uhGiZd1XEMmAZKhmpU9WWI-#AbE z_riz%eI65kz|}d>E0T&$J$O2V8E~ImBrI>*#>9Vt{=YFhUYT7ZNhT+ z@%r^gg2IsJ`s3a(}q;CiN;7LRXbxv(&n8|TLgUU zAMQUDq(8=dtbT$9kAk3&Y>gh_Cp#NOB#1S`B$(iW31~9b?X{Hg53?zu=m^hRqyJDh zJ&f=tEWG;^{40FIBM|Fy7SPa;d0}jovNH8F-a-Gne^j_APDCZ8Z1`)eW}<1>xo9Wo{{2aX z%_EB)*aH+lWE~M&+5E!`9)CTlWh%_f^a+{4N4F z-`n;y*0wBIE>h!zm;RqZ&N?irb#3Exmx6RENOzZn3_a2<-AGA;bV_$ggMiXq5+XTt zH#js1LwCrBI}U6=_Bm^2U2Eo#`CadtYu1`4?&r3!`98;BgE2iCM!>Z~+HW$6w-s-% zU8<^!mjbe^St#NT4>@I4t|L!F@Q8SwAELGvHLn%1Q99|i#7%r^aD42>Uf!uuD-v31 z(Aae&Z+Oz*Vb-DEM$0(iDIHRMqa5sbR*l6k=Z!zFO>opX8$qQloByicp#i_iGN3C} z{~A^1oWy4*)u1fbXHSeX{a;uo>PBOY!x z*yMG6q{`)O!fw#m)etPn==8eE*mk4cyOREbwBVFNYo;rl zsYt&z!5DMs{Z>idInoSR5`pslRyU};SzswKI;&UN+G78~Zds&cjyFuTx!4nxFMt6= zeZKbLQn9veK>7_4pBKNns9TC9?xGiSDA51NeMVG8I`32UpfigAvp(BC>=#<~LzDsC z7j-YNGXhRSv|%rz*Dyp68)HY$Yg-}#>q=kpIQTtbr~^}<%~;3$z_$C@ig|Yt|B_8T z$J+m{aGTBR-R{2+F#erHnp^3c{;FmyFG))Qm1t;sK2DLJu3K3yXw3yi;A48{i|PbfvHzwU#- zS5){F1I%3HQ)%2fgXG^TbpNfW@N53u$i~_FR~(?+YCXn+)wE78UeBQ=T31>n39qd1 z`71Of#_dRNtki@ib;NdX0{s4NSX%q$h&N_1#FgFiaGPNKr1P=aC`ZAYn3)f_8*_)m zV`dF%5q4)Iv21B$Af(}nH$8sJ+6u`d(d8S|FGETCJ<0NpLtakd&2*y_)1QniZXZRg zpXz(rjTTQ4J$`Z$uvPt3lpaBGrk^z9qoC^An1|TzF}l~#Byu9<3;L?Elu>%ZkC$bM z$ipDqfGJ8TMT<@1MlzmACk*bGZrm3sRxko19LI0WsENm<4Wr>^!>#7tAG8sPne#2Z zuC?u@Pg2cnQ1KEfqX%{&rLt7VO^_KS{2R{{T9+ zKZ#tfxp2V%>BarP5ly)~KukO(2=PYR@ag7BDT6(J_#9nSxVcttOX9jwG{P(ZMuRy; z9p%erMpus0Vf$pZTtRJ~*t(dOaUx*FhP0$Kk{T4xr9D0cNZPTuZgv;?GJLscArvR^ zu~5PVelsPmrE>B8dSO>TO*!q$)4lSL%wv1eXC~G%Xjjh6Jq7|<^%-nVPDHaG$Pr8} z7sBXXp$wd@+`B-$rOeeb9#6mNzx{{9y6yh`Tb0Gu$mI4189*>=JEl~)*e!+j!QR)={G ziDy@Cq?Li)ii!~_#;>O!J@rhVli9KvuMy5FDNftgL$@GkP0lC31?FqDP(6=6_H%~q zHR!j(=h$$lHXzHkaE67?CiKoyRL!9lBu)cDUX8CG5$nB195veB`9H&`Jb4uE(^cN~NMk-pyNaK^`O64m+3^?KBM>Xqh^=#Z6EsRoWw;t^+voP#9SF{GHz{cdyhRmxln+@ zTf-^SaY!8m5+7Dn%V%$z*ju|n*r$m9h9Omh-cP zU3?qqBbMPfur1OUX82j{t}t}}+bK5C_I{S(p=XKaxX~>n=+<$t`mN*G zkyWW)gr(U;raW1(!THdmuA{>0rQ6WiqbXqDEDtWt>|*Tlheit_%M^dlHu@w@enwOHrI<5&7;yhKQo}JM(M83Xj9ktQt(;z36TUb|$1lGElb|Snth)};U9&$a{ znDjG5o!vC?C4hfsB*@<^Ej+7FuAo;}KcL58*6piN6#w&_a!TmkxE)KkW1Ci`S^~943&Mz8#ZoN40 zgqF?HSvcYEq1){;C2Tl7F_q1Mi`+v_V8QcM$kO?g9SW>ARH8Kn6hO<)_&wi*UoV?rE^5N z(4FYmhE#RWsbc$8`{BPcX+Om`R*>DnUr#47iW3a6)Vth7`=0h9Jt>g&srm3NzHf3&V$san5<^KPIP<|_5rK?_G9U!@Vy$%LOlraG$BI{k+<0WB^JxI2 z?A0OTxPiXw%81mG!{rz0DhnbZAwAb}$^ehlUAHRXwssIpxGdbouB;hp9sz&25vF9| z6*n-Pwp?!8=b1q1P6pk%QL1nt8s<5y;{vl7H46-KWgt!`I5Dwx z`nf0qyhjVn-_BP=9B1IkorY(Cu;s0dCi0VHz28cR%g5=dZ9iEF==uo< z*YGK$qJarfJ6il1T(SiqmVuKv5Uq*?QTmhao0@)kf10f{a9%VI!&rCF8MI@oo(2@Z zMXO#&F-~BRs+3*c%peFStE0sogwwHY&{*SyLrz64&>Sn zEXBY}%OH}`XEp97M36t=<~CN^0TRgS3`w@i*ppodAOyLTl*FRTB#gkkQb~>-o_iaE_`3hBFUYE0J zoN6fiQvOUK&EOa=y-8hfcwd;8BPQ4t7P)yYy}Tb<^DTb1=NB*V5I^QA?9moKZub~J zzEet;%1JDb#Zcya1I!OnK+7A@C57(H*$g?_cC+f9YZG_#%yO}JDc>SWXy=Iuxr|wK zbb52)3s%eu0!NHohf{b>7)#}w%P27i56D|O?@Xw}MHi{vHFO!So&s)yqip;FYlJ$_ zwz+pnvqOU{yFdh+n2C5!%MI+2maa#bPqXYPyY)!Sw2#C!KYY6m$D;gBQlAMzk|@=e z98%w#?ggsNpw5LIWCI5;F&)AuRJyoL!PnMUb z@JzKHp@_Z{eJWa&$=DZfYRJIR$MJhA@~57OT!orZ6aKU_MYD86SS z;KbkoFP6h?l&Gd#@7X#K@=1BNK!k<(dZgJI(UetR6jXTANg%`uN$yl*3mMNq7%5|BeMkq8*iIio#0u+JaA@*(#)UA9Oz{2^o&thV~{*=!fy8QcQ7Ue8DLX#t{BL3$cj zT_4>ONnL~Md8Yo9O0`nbG$ny1H@L_>C9Ar4{GWpAH!G2_%=+4EJ1=_EMCS+g>TY=L z!j9Vy*D-Q3WQP3dOYE5)j9nc5NK1gBv$tuq*Jh0xWGD zBPy!QJKCc%qQ($GCov{jI4oD4LoB;O4rB;*>ABpBlY6sVF|_gJt(t>4Nen%4w-ixYQQqrcE@ zgU5}o^%^clM&h0m*Pt~IUhEv*e~u5#i6o+-SHUhu7vTF^1y`uYqW)D0*5-R$doA%c z^vgJGJeU(C9XG)szPIo8G6{*ekWkkw8Jz6S0I5GidSidr5{E$}sz z8GH5&+%C(|d0ipg#9IJ~wmOy$<%X}`XCfMQAS;bLdt$$A5HtN0`@1Rca=sRY($Dca zW?mBOTLp0~rM9JUMQJpAJx;%hYN#_cUuAa!rZ_1jO zJ|ND4C8vlcaxHhdm2_#p`^ZEpJYqXLo7SyN+#K0v8ar=3p4_M@!C)**Hf~>(vmDE$ z597Z1eEsDpN}S&e*}kI66f$=-%lX9}qbcqAbi__7_ShSgB9(y7xB==W#?YT4y@)PY z-IQn^b-vF=c8j&8;F_MBOy%>ra-K?IAX6p+dKa8y7B*FMTmkgQTWP+M2QT_fjOkSp zF@3kp>b~%&<2Ml+qN=`1K*SC$EGW8$-QC-Hbn>JB1Lhl|$BmSiS$x?*CtJ+@{G{g5 z&PRhFrgkE^7ygXy8H{mE0DiNhd-+M@tpibnaJ_8CZSu~1+fS#wJ>Ij^WMpAtWny76 zGPieNG`BXfVFm(}MZ`puMT{d^F@0IE#7}>sh?C49v0**tQX?Ur8$9tBpK06TZ1-Z0 zeM!*L$lbUb0)qi}1k39h;xV42IZv*q+iyQ4_ZVWU;N{L%&HI_(v%*yNVky@0dHe7y zupzQpYZBJfE4TB6sM=tum_W|zC}X++;#k|77y@lj4GW)Ikp3s&XmW{Qg6Ky9U$ctW zF;G`dRpK`@RPabXCls^XPBhZj_sRmlPB1H%b?5c%iT&Cs{>KTbNdf?}sDqNg{$X?( zM!JE%!AeDzF?O(>T)zyXGy{maLQzs0#K08HjQCk$h-HYqW{7=w1*K<@dH9%N0-c6Y zx*sH4p-4kZ2V6ywlC4#w`NT2N-#aKX4(uyjS%Ud(X&8xZju^K~x|O&?(f*I6IoR0h zS{geUTQWJgIP`)*;c`99!E#cJ;NPL#4g3KUw`EnlTgmCu{{dA5yp&TED;@(?WJ}^8 zfIS3J=L|@3tMyc$!z;w0w?_3vb(1fR);86V5KewlY59sreRDNv{(&oO5hpM6h9uBf z?Unx7cxkt~7+n8Vmb%89OF`M)xeR^5iz$lN8ASfSFG_7B?0Jm{ww>=wLS_cfn?9GH)vJuYdPP(;4(yvb|OjZtZYJHbhIhvqv1JlH&;n*kQwjBmiMfeyLE+;P& zmT$DqY&bqppW&Ki7bE6=Hc+svO#8&)Tiie{?T~e3tB>|tI-JeDvigm`X>z0y+k#H} z)Kk<&3_bW>B_BcJ&y3erX*dZWGWBuh)YDM7^>lab^hBN$OkzLFXM z#yPGBdVFY#(j0WF)%&uj#o3oDjxoPI?MvJ2JqBbT=hyyXwX<8@ns=#?e+O4S{deEw z+grmQTb0kR!@b-7edX1C$U`p5eD?ziPuQRJ@&Al6-3L81 z0d)sjr}`(z{%#)XKIEa{gF8q$-EWZJM-lF$9+EBIp}w*G6ZKCK<9*JrFX-3jZe!s; zwqJGnzqcx%cJ_aA{%#=Lv-Q3Ydq@g+2ZQB!81|Pi@IL4vHQXIYit}O6pW3+loQISM zcbq2z4|D#~CHxNhy*+vd0tr0~`m0%bAM{X*<_;9`@?p^54V(LPzw*SPq1;_sp79g#)( zzZ3rmM1RNq9xmSDdjALRAHn1A*xv)j|6rlXRsQ=b{|q1HW#DgBq)+qP}n_@`~#wr$(CZQHhO>#qH9YoBu-Mx|=JbtTpLx|1kK1B0Ld z002M$C`gH@Y1r5of2J@!515*O0@co(d#!Nf z5G8xgAyd@I%b<&QKRZM~dRu1{se+i2U-P-kEwoU#%Zj8Owvjr4nSa(RG`7&(?@n25 zPONq{cAkdpY|w#;zFo^wX%jM|A{w#QaWG9Ji`~j1Vx?sQWWt#aq8p6~ zc``D4nF>9$y@lYcDa$p3hP4{G!rsX5Zt42@`tUOvpb{c#*!)ITpz2H~6N-Nu}%Q;ndXkroD<=*A}! zRjC%s32EC~r=RjgnY8@SZsDFLA*}?c7yU7WmZT69PPc6+cBbJkQyX-TRh3 zhV8${Mw3A^n75yvtE9cT#kVHvM0dZ%aNIy-Ym~dYpReQ7=cAYFKC^moCG}sHr_)&JIcLM)S1;mRLZl_*`Ee?jILl&aQUA@dghHg$dFPvS5*pc`2Ets$TtU=f)l5vVF;ogoIO!k_w8?iPsRrl zT;A9P&F)Hp$9TSFNwshiOQD2*C{0=6g@j3<4Vz$2PF@}w=g}zEVN&&S9d6$*WbQz} zu>~Rk=G-%jJO;rDhf0}DQC*ZzMp$yv1GnZh%!DA$e4h2=(Pg92f9)-^Hjdamv1mm( zE&B?GpSbg7j0%T2^TE@73hFig9stPaA((dBJB4;Sd{i;P00#mSJy-L^?G7j#W(oF7 z{qDQ>PgM%;+!r{^ggPOt*a&5?3PuY8@^sg0xfo!6o0$eN=~Bim=q0;nIA+PPK*le# z$$hxGiN%3c5z6*H&4gmX9{0`C#m2D7nI8I~vEZ*SnXeFHB20&81+|It(<@MA0dfTq0^jMwFBY_>~;-?pT*4aP6zer z?ckFMNEVRzD%!oobUD5wj*>T|cKr6|f-2|I-}JP;AS(by`XHlY<=Jiv*;#+l}= zK#Hq&ID}60L@n%VyE#KA3tX_{?HK09A?K-X{iz;0ng2}kgXi0~LVId!nml^F@p*)? zR|V8h`GE}52{F1`wybG$9XpU#Thdec_3eUbBqY|l;SdW{5Re)fIhMgoa^i^bA$C23 zMgkz`ZPlBgA;~doRRh^D?8Y6CQye(-P+qB{Llh`{w;f@KQM!bh3F;!qg?0r%MgR!o z^%trZE+cuzxwcY(@DulSvrR3Z(K!NMQ#nZz#$pXaQZCd6QMCFnpxi17XF9zSmuPvt>=x0Gqw#6pzU;I1#s zP?}qnr49Oz13lXW@~NTKs*49=$6OAfpQA!?i|;`3f%e3LWFpbBS_*MHxh(A)_u(k) z`e<}1D)&aeY^@A0d1!1@?KM90!}+S_F8BBSNhbW{YUxbT`vm5g?8#5n|87M_t{`C}Pg2rO{@9J03!b69wRnsY4JPni zQxprjMp_>{8GP^&>Zc9c=-s|O{MlAx0_xDsrZ-Z;cWTmH1jls51IRJDxIHsJ-arSU+zi+64%2us&V?6;^x%Nu z4FFrV&L6qGDe8mW*Tj)N0blsI0@lUTt<>M&EXkumHuzX^w(_~<9b5+|<-yqI>_Z{^ zBg^&+%VI4BD7u)+Y*+&XOd)>^z=Rf$PjTXZ5N@InVJs@$C>BsIVImfh>lRp`_ou}_ zJ$--*{}(8b6Y@&CK>`3A;sO9b{Ray6jwZ$yM$Z2O3mP{60}H>l{eJ-p2qKoQE)Ol{ z@FO<<*-0p1NMq$o@Zf6*<`J7JG!a~hjZLtRdv}RB{v27?9Dsi(@RjO=xcg&`N_e`zQ?JU(_| z(i(Oh7i6-l>bRa+vvX^J$^>2zHa;5}+zRFV^8 znq496+GdQ;X{K8I=x#)N&dg?KcKjeBp57oEH8}OnLea!9DU5oRL;z5)%YC7+zD#fcl<09 z=16kGmJnMGe&xwQu%m{f%#4(LXvaKhCFv8Q%_Ip%Z+v3-w1$GdJgK$ml4UWP2(w;@ zxxw_So%Ac?HWpp+!P@Rb&EzXfb6yj-`!a}l8T*PGEep<`r8 zC!Kcq$tuULfey?OKBuc-7iJ3iK`yH zU_l&9>VN7Me+1fdr0o2^$ON^pAY|*k4MybrVb{rNGHQW|LL+kYzx@`I+y&jN&m@|9f zA$O7O)z4~iXfYo)2%vuA6Q68VRM$*ukBig5DcXENu~k%iP03~+H6ZF(25W((su^q= z-{uz7y{Mir^eb-rC7_(7cTCBQ!~Q8Q4#4=LCyvzXO-V2EReHG^H=3{4ASMw)u3HUx z))vNV-$;${q;gL5D>b&-*pY%ZLFJ<61cCS<5)wWDg^7*pqh7tilKW>@{sOfve(+Mw z5FNEAR{WdJ>(gcxq?n}kGZ0Uh!!2_Bxo{(9=>`&IJ7jcWGbCSM$bqO$@%Vu0?5>l& zawI?yZiO6LE*ihNRY-ZgZgd&hL`+tUpL|nJuE4-l)==#Q!Js_J9p^8^`;l;7*1LI_ z)@g88QVA&)&t{kbq)2#z-q(%*cmi7r60;;X!x$EcY;?AQ$3wdJ06ib!>F{vU85iN@ z@RF3tiIwP1W2}@wmYg(TmgZcY5At9|$A#9{5N~zfsmgrRP}jL^Grm<%2uT)mpCit7Ur5zxhXxsZ3uiUOf&T(GEmQc(0*JT;OmcxO3bm zEx3(W&8r58(>+sU7d(vm_X=|U+WqgE(W}k~yaglU5K9zmKSNA?b1<2LQ3bu|XrhOp z1{2VXQE9QF^s^W-U%EyECF1D~FxnMv8>tT0&=I0U-a0dzaq&O8Y0<`}a$ZZP8tRaP z)&~&>Qu7%qiy$GgTiJ8$z5ZEmw!ZACl8^U(iKmf_m_!1zMX>?DC z9>mGNLl6TZt%kj)qAAeD$harkN;I~j0{eslbSSPb7oW5E%V_Q43n=|IyzLfa zv~me&v;a%S<@`%ed_x@Mwk*g*5>1nBh$lKd{9WBV+<&y4?k`(e=Z8ZlV^(&8=<@0@ z?iClCxZxx+tM2EXuucW)rL)L0#S~O5cO*kVy_{)$HvMJgRDU#MNZlTO4-b8Pz8A9( zFPRdh>O_xpelBt*%+u@NB;@Q2KPzQckHkWil4Gakr`T3{be`fxDNV^c&|1GMs6~SS zie~DB9Ld7`h%mO-mk}Zb!O75_>_dlMmfk28=}c>nrc1Aj6G5#-_|Bt=n`B&L|Cl*^ zi#2w8xxC(Zt5{Mss+I9et3^g%H48Z9#d`8Z$kR2C8LK~F4aNJa2^^L>5}+wV@Hgjv z&yk;yd`NSmBkZv?-@_GkTHA=h(df%fGU9XX-W^_tINW&tp1+G%cA5IZRDN^5+}6y% zmHG7Fzu%zyGPEs~PtEj-LlA631G^AW z7i-#9vO<3Cs`<1xxxUe*w6_rC;ZS($&vP8%vbGXJsuES49*`!sE@mc(^1M+gu$0=2^aEAZtCs7c!h$hT%DT)MO zi|SNbWKJFx{Q>{qc;>jq!IT0D0H6#H0090Uo;jO1{STYPsLj}JaUk?!-*Y2$Dg#-! z%oxA}HVd>(0D&f2D;U2afq>A)Sl5#NBds8@jd$B46qQWAF1`{bf;-FGg9%eap+!gk z*KFGHPL&mOxRpcbkQ?75Z)SES764{2K9lB0*OyfHkM+XQ;pZk+`vE67v}2wXL(G=i zRI0+VhiFeE7l{mNa57OX8*{##pi#_YN5TX`X)BbnY6AIpyQY}jAxrC|8z-@a<=RPfg35DAIjt3c7=B`LPFb8cq^dQNF)tLYW zSrO*>XyuPt=`4KFov2Ma>Pxm@>$d4TCuiZWv;)AY~`c68ha5`Lb;gV*a8ZxBn8oATVgMsPZYyXI6;t$~Ky zttzWFBc(=d{Xj-<>2t|;WY)*s27Rcm-qHLg*+uB=0$daT0=g+`+L>6QY=yqyZP z?kxoih-VM$GI^Pe>d^(ac?e}$N3wf+m<@miswk8w===S7kB@O^g6Qg#u@}#J^car@ z2DkIM@gHIF9jcAUK&+Ka9f`Yu^bk@cwevQ$H6x0wL2^9ddz{&b>63mXRbqMYCc3Q+ zo*zKHnOqA{!>G(yILfrb<1J@n$YWP11dCwx#ym^~pF zcoUV}#Xv=`ePrdz9QC0i0Pm2;oVT)ga#7U{?+#5}O`WZEWh-HoSgj5>R62>a3C7vg z_4qxOy4o_`Q%ID0TJsL~22wv%;`q;94~8Xn9@3Y$l3 z#VnTL!(Vmu>n?v|;(Zmhx@&?vT(}*O9!shzD4t*(nswuv41;~^oW8B#V2#`85ctoH zU+Fy50ME9xB8m;I4%C3$m@m=T|#ghSyU&AYQp7ezQo^z_#nHAjsyt2RZW`ES0dcT zDj>bQcWBW6{-0p&=0}Tf@L#aL|1VhoSHuH2I~rKn{%74i8d`SS?Wn%&`+oxF1_DXA z)UgXUL4c^TO_J>XHy{yPg$ytu#15wIsUozZ8yR39cf4ou$rKi&S+W72gbCwLM{c#t zB8vaT<)%t*C+}w|R&H8$t)WBjf!Vp)IeBbaO+{?ipU1p9{(3bj_KAmul`U*nN+-oO z!%1gS7_&QR+f^yGo5Y@)r{IT9eea&sl4G%E0s^6uM$2Tj=quSFRw+klPy|d3Sm;V# zK20l?0pUgaCKFqqkpY(2;N~c;677$jw|h_TWoXKUVa?U7;tkA_C)y)gt4ZIy)$C$S zo5ZI^?T7E)m&KzaVlcn7Kr51O05-h}O`w;>TkQ&=aD_sfy zH~$A6X0`-GrkYJsgt<*(Yre8;+(tU;m0XD z9-H*J2YTcBJ^l};KZJZ|g^bgdovNoXmzRevt5SO2QO|adp5=DqK`|%XOsK z1~fk1_g#?v9+Y?@m|lf#Yj(S|@jver^d4J{YC2ZRsG)K=y)@ea6h{vSq*D<=8m<(` z)hF--nUx|H1cdvXccRVLib9i5h4JaQ-Nsk7vo}R7T_CiC9iB^EI`40kEQiR+WXrAX z`s`YQF2oA_=Z1E9eMXgtnMe&cCc=tam4+mE)@U33nDBAZR3g$~VawjcOe^vA&zdWS z50vo%e)xhgM+J~#jKFNIG2Qh&pJR?>iD9YcNWWq|BjxBW&(45 zLkco0QsG5@$DS4Z*T?$gssQQG3};I7+X}VmH`s;n*Vn<1-a7S>tb-Q_00wK~I~*25 z<1lJG$MSPA=@^S71@n2 zJB<~kh-Iz`UbGx-(2_;tbSFKr;BM!IKUP73YV0L$gn zw^%uvNx(amIcoO%ePG#<`NPJ34cN;1DVG0{uD4_2A_=HXbZZa-Fx8ohV8kAFFdaui zPBAFOFm)I63jEP;Yb#8X#TZQHUxm^(yl#XP^dYPVawwmR zc&FB2dO_Y+q4uFGxNOyzCkARnjN!CqCgRpfp@+6+cfAx=Wwz4qv!D+(YmW8wlSbhc zfy(_}^+&rrmJb*hQThYF$Z%Lxu)OS&e$^<&PtRj6VedrLE2(p@CiH|F3@=mae#%-fZi3S0ihzeNL1wTnY*`nWXkK0e=i=?EQl^TAKnEUX#EA z31h6cqz5#n)wL%JWh31V03dA#xyac73JF83m39dBLNN2=(xcfZT6~KyL*ufe%9-91 zSn@%ag-?(M8Xu$da96`Er~QFO!viK`O^^+^N_6LPnf-4|4q)EK$L4%Cc(r32z+^B)Ks$ltOhIzyrI|MHEU8>obc8Cgx zt%o5lln-eT2z23fl>xIMUzuoVj$!~d`n-@-M;5Jsc1&D>g_(*@V{|cDR72IkZHh>3 z_gtK?8sZnc5M;C$4*81r=AFsr0tq9C=@#Pf{CRm3ta7Fn@xP`v(SS;d7K*=scK|oJ z`G?4wi=Q&+6TsdHOSD0eiq_i2BlAJ6pQddwiQOw{6e*r<`>guDDbp?K&?wu@jm4kU-uQiRXa$E<;mFO#&Uko#NIz~|VWzL1%qO@2bnW2&K5Osj!bG2Z zWfBqh$I0>KvC%u`2yP$^#Bp5TC(imUE@PLToP3Y*-_bmCria*D{eJ-(=zjzBhXR6H z`qgK5&wu#E4hR5%^xtS}=VE5yOs}VBVQb;6r}rO_>BI;|4Kg5z-F=`6%R#Y9+~E~_ zBD795hNqc83UgNkuqY)~?Dw_Zsw$kZ#l1Z=uUd{dpr}09%l2|Z(`1q+Uo~pOMlgv6 z%>Abb7PV}k{L?-deotZgnXN(+p%*CfX!#5Xu7T0N0n~XGJWgnXg7-!H5Km@~7WDq7 z--7CXy+9c!=PvT2>TZxG?ogPG7RoenZQ&Mwwy;NGGWTex0bal6->ML8deicDEv zQ>VMB--8-U7}SfbiOC}cm&Kcx4Ss-Zx;n+>cPubPtEkM-A;TJy zCazmBO&d6i#~Q*RvQ@#hdGV5MG7`fuxo=>*{$ZTO>cnz6@)29 zsy5k+^-=d<{o(On{ek$uvu9)C>}X-+^q-y*HQBf=HiRDf`o92F&v>l5NEBWavFLmV zM~HZuKvTO`!g(ZGX6ghgNmhrpk2@dXghXpQHdssjH6qTZQ($--y;=-V-sx~{_p5CAPOhA=jS<`0M>O`?*xh&!(uWLEeY($_| zWJk@`9JW{oCAV$${+k$EjV*RLSFyA~%?eAj;$5qqXeu%&w{2Xk4(yP681fQZtagHy z#)oDkFSvWDR62+bBX;HoRZVdVNa>Zfodky|W%fOrZJ+tlv2tpvi%2K{?UV)1H?x&d zC(qmh^QQ&N6Lw|d6pHBb4Je0`5?gL#X4VPQCdsAmvNVm)#}W7}*-)2M<2AMVMb__k z)D)LY{;Vo%$&m-0Csb^1Md=m~e~~@=rVpg$w5%9~73;q|{3c_;NqiXW>sg-4>yDKHk|*FlJKfI?SJCR!b=S-3H|qQRPC z28pj{cW01}+a7pjf(~YFjpxE~3x1tAwALt0!+!fy4rn)#;h_0b#_5A&?D9h=O0MO$ z)i!-;Rf;!0h}jcIT@_Gx zAPXU$fKgzj2-F2^ZA95NAt;*mY0pd9T!3_4XGbUKd@1!dDO=)VWjdn3J6C(;U>lDu z)S297|n86o8%<xO|C}Kz<4Afs8F%KR+ zFj8C0f_79qInW_e`8^ddJj^hPuH9&^7iCkMTum`12WAAk-UJ$>j7Bv@&t9O>N@bXU zH0un+0@}}?z&~NAvMzJC)@$dFzUfyWkw{HV_Cd&Q81uk9{XKCX8O%W=fZhPQ4XhF- z@7TunHTM)OkKp?g2sx5iD6Bb9C@BmCmCn4dos6LdjBsU)%*3n?YK3R}f{lujzlzd_ zVO)FCDvCu(H|op}L2q;$mLtBLLyrv!CE0GcVp>@JbIYR0fVuoY0u~y3JbR61Rp;9s zov3;XUYoV@&?Jj1X4xAHaZxzXw?AY9M)bh1{iMRXo_7-Aq6}L3PBOn`uRJNMCC1z^S zECIrVgxM>5;hx!N4ud(TKB4Ws?L`9g*isns%EjdVO-ObZ43{}6?>!uJ>w4W6KFH0S z-Bvc9iv0$g0-o4=Ici+EM0`PlY}>W*!-WdnsG@=+=_sO8B~+2d9)o>P7R;P-p3u*D z0K5tB5CchYDkmE(1*UM$t?B1?#ei=2u>}lf!`MK>xOd)5dOwXG z&E{NGsmE|k$t7;#S-p4V#|+A=j%-L=!hPKToxfP%@+49@4xB-!G7|AGU>5`DHAr%E zw|)9-2uGSiw0!;fuHAlh<5~OWttsijJ4kmz{20gNO3Pa&7CFj`c9#W)lUF%_9EPv!50%l4haR)?|WQ z>dRr|=guxvkBD;d^>lM+2-#CZ8VfFr`Pja4FZD`RypV_N z% zGSIk6MI%K+9FrhjzwE?ytr@o>^QE(T?bA2Q1>0w-ek@Maripat2hKm-+gq{GR?2tR z#YD+$CHcE06d#Gnt}^oUJ(Ti3u(q-HGLq?zWFC&IVzLCteeP?yXZ6R`O`b{YGAG#Y z?^I}@cf-`GQt&Cu>8FdjZR?=a;wi=_ZJAYEGY*fus&{roAK}m?e=W}~6L(&v(FRJh zA$TX%WwS+NCjP8hMbAf$^A=u?OGwQZnLq-CIb}DX&_R~u5WJhRz<09vgPC0 zWvO@4GgSrM?!e)?fI1*TY^j%;TJ})uczMpJ?TT9ucvd2R&=v&va@d~)brgY9D5BE# z(`A*kE7YZ{b%N4pRE9Nob07E)mEz7C;W)Wp^CHwMJ1-9ZL}e5M{U*)@PNW%e8ZW=J z&YTC`+VbOaP!B1XhFJ`HaruaHz(kT`B z!(y}@!UMZs1$w%F&Cz8p0X-jGfCf%1R9I;IZj&|X_()}ypaSJ=4~GTf8w_vYoBTak zq|Ic(rqp=oB05Y}L#6hr*-{{WLucaj*bW=Yr;Xw4geKK|GR0a(N^F%jgdL!H!af+( z5-0>wOpT>2F~08sTTZwc05s9={Vtt_n&0T5Pzb~#WoOA%Zue!2^cj@)9r)CuDiem| z(pq|{R94Xf5UPF1G!^F^=F~qp)q9#&XGpVxaSx+dy;!uv zMX!H0!PZ=rfnQ83(R;1aScjW*%1ixPY5f*yJCbhF%2O2WSVu`|gNscS{K~EV_DJN3 zz7;ba#u}JpCpLYw9L5slmRTvttPc{{Ffq&N`h+hsZ@Q`e1pU~{}x@KS%MJ$Sb$=ocG&3IaYRS# z1Y`#2CZ=;$9_0FBmbG}%hVp{(H%G<@B*nAM-hzMsJy%v@T^R_1yZZQsdwk|v{zN~) zIxE404{rt85(9*qLc;O9as$=w5>VNsVI$2G-Y@$GFvV+rw}n-@a@3l}i1vz`{&4OK z1TxsD-*9{jj{nwK;6Z@yJ<|x4#{KaGC;EA=*ibjGr~3ZMLO&H`tn2r@;>akFZQ~$7 z3=fCVk}dFxTULwca15&>dz$UFxMovn%*2HBRe)f-tIWNljBhu9N;8ftPXld$hAuF0 zat}W$nj z_;mh|R((G)1MTTxhWvY9w$5nJKmySf6N{;<YXpK$ zwCnU067awM>SbAvr~n z+_c=;2B_l{b!Z-`(T1WtdJf@699M){Pxnz*7J4<(H`f-UlJyw;KZ6ciaNp7%R z>TNC_j)a2x1tr9P;r>^()S!O_gp;(Sn2!3}Y^VML5P^p6AORhuIc6`g7{s>t{bl{< z1#0_rhobZg)ND)6V;N$>a7@)BwLNZ{L~<3cI4!K_x$yixb{6=aX+h*7zP~49tu4c( zox6@@1zXKCv<$%tAcv<?J4H1|TgPA~?E~4n6MNohIz`04|jcE_YdG$;gMD+l!T%A?_0f>QZRpS`q2b%fNy!hBW-k2i1{f)VlZLC zN9hog0v$%J0Q$x<>iYK9W*@4fOg5PBM6+N;)>jJPfbApEIIgnKaRLkiP~2gG)JkW` z{zpwQ{Qw0j`!m9Y z5jm60rRG8aayUB4k6kP=$Sg7e@@2+>wZIatfqgDj$N1G1uu+pmfj_+IBqLsmlZ@-; z0to}wrU_kPL(gW(-)=wh)O$hcG* zRA+2$IXngw{t5wppqP>B1EDE_qBH85F7-_Z&F}^-XvxvrT5~)$KvNbCcBr27+y1}W zk>9{zk~Vy4}V4;}h)&HCNv!+l60GVS)O^&*X231%DpR`iBbA*Php92c3Z%2?<~ zdEn*MYW1XpzgBdCLz>9!eB65nwJ9K1D2oQ-uO-Gn*cZbI3jy0x>8W97x;Qw$+f#@c z2!x6)GU`OvJwu%F{cvv~>$zh!!XI#R>LMg+=#V{gng~|xQfis#Ws)2&Kw7QF68MR& zWth50vv0u{E_&c^4tp=CVrvE;Hl4O(r)6bU@dACps9n6Q)m=(U0NgsNGmp=w7ED>B zdU=g(ZF`ZL6pYyNF1+x4vaWA>y3?j}^80yqgU-ApICsWRG$#3{FX`l>xwlXAh%>Z<=|ft`<>IG%KGXh zGg5rXtQh{MRB)z*k%1D*gT014k>#?K+GmEG4tlcb_`Mdk1|bK^ZN3sLV zKv`;ivi9^=QnWq}P1zZ?60On2p zX9qX6L$=EW+G&iY)Mq8xm&zO~s-{Q4@bTV!WKAjDmEbHYgYjTkdX3;*d2m)>u$>C8 zvt&}}^aJaOnPOfrRU8B1wHD<0%O0|b-+-K)N&T9bIAUPuyaUd%L6UPPI8WV9!b1@i zkPLkrGvu8!#(d3gt*a5dj{eQ~?fbL#0KV+Yo0b&3EeAcnH$WaGkg(aLY0B#*kj4Z| z$+0kptc~lg3b0gG-s;fT8eWyhUyItoYDcWt>4jr#ZtGs&LsVLWvmQL(f+xi99PiQa z{j^!WWYEDlA(A%*Gs-Te+p5<1MjBxtJZO)Gchl%Fy5an`vf1+I(05CU*;~6r*k5-f z_n8X&$!nZ-i@Gs}Q!`TS75c`fYtrdqMT<8s9>Ap=0(lj%{r)6k56GGB!214%1VZ%$ z2cmpLkmfLZsY5kVec||NPC*EXBoisJvgIvfqTHp{(nyt+X{Jh!Xj-I>fby~6p4~Le z^om`eaHunz3Ky#mCR%uAmUg_!yoLJT;#B!s8GtxULPTCfz}~kWu@4>*ao7o6;?jF^ z4-n4k(f~<0o&fGhM~_M8@p*6`&MEQi+qLJ63(Ks9agCuS5L5tV1#)-cCI0ZTXz*C~GwIxBCc zdPe_d)_Zauy}Qo#UYtXNJUJ>K@?!e=NcG(wfm8j6J)yY$m%{iwi4R-g&mY>gbJvlS zrQ(c=B0(Ymt?%*9C({#_hOf%~XT(#Yrf$8twr&*{u4&vcBbSrhrQA2%RT5-ffshIq-2Ny7RKmE7`DL>~e!;YjI@?nv&553p# z92CInb@={H=ae4L`m*tPSc@X-U$b)oQd?&t96E%I?RmJ+fg=G^UA}3yp|Z z8pHthFEC*^2sVc(7bb=sfvS)A!+G36Ai^AezxxFVjA87LIT^r$A6rH;h?TnKbG3sE zYLwXj1p@Ey|n^9wPg!Oc<^?EW3s)izu74k;iAX^{2CNOU4F>pZ$O zpqCKfY7=UhkoPDQRk?eXdtPHihugCCJrdxhdnUD*gf46;G(704hEy^rItTnSlB0aK zg!c&zLF0?PH5nw#Ud^yPqxhuPk5)qKh&#IHxnpxDD6WX))KHNPz5Jr#9O}5+kZg$l zA>7m=7PqL}D{@&btj`I-yfE&HLf!|Kr9%Uouw}jvaCO&>aagJ5uR$>2=?CsC3IPs( z9fkNu(b06&hK{)sf3ZNO)230dPF4(Ds#;RQEXAbVA6q(z6%DP`;}NrnZ3Tx1h#H^1 zaKKL|KTTO)5^t|#%(7obYwE5{d*B2tl5_WIu53#znrN;=Hh-bQVcF05l%f{#phhp| z{alBr5y0P#oLs(~QfNhB?0?*{ZN42C)KtnNZXC^nw?b7%aywPB8aA|u2@d#dzuY|J zNTh9-Db8}AxCExEZagS;0`-&}C4L5da)kj(-b_cQ zn9CSR#V|G?qVN>_BCV~L=BRUUO_j5OnuOfgymFREiutU7uIQqHG~ehRdn)xPB3ce; zd5&`%`6%}4`}+0bTW-h9GD0kx@T8OApf3e5H^IHxrZHmlp1;+f2UvwMB&nbJJ1qG_ zHwV>8>-3MM0|JATs#!E;LiSh?j>aUF5dGO+FxA#5dl2@$SDzszSq%x|6Oeb@>}UslhG=fd+Di4!luB)?tR>SHnE9v-RwOE?Tj9?FX)z zIaO;&pJ6!B<&>QKJ`7?lLg{vU5r-%U$Ul!By_Vzsv+D6So|&!{HNef5aJ#D^3al0j zUH&0CLom^|ilnW+_xrG1Uj&B!S z;fk$1XxW>914nm=rW76oB)VUD11=r;M|QJgefyD$>WL5O;fD(Frl@hDLI+lZ%P%Yo zK|%^`O3Ak73}!N{ppOfYpeU{Z{G>!!gXv~2z?kDSp!I{jvsI=|??2=$uRMoguKu09 zg2SN-CI!_DZyb%{F(XCp8K}F?MHG55BY{3QI5M8u^;+#WS@_bapvB3=z_8+Gm9pRE z)-X|4zp+}0O?Sb5T|3Wz0SmrOXK%>fzcTU%*^*avxE13y!G}81H zgn)I{naxW+lSPj;eX$7-d)_}1SPX63Ri+bZgx{;K)8djVw7zFo;S(&GSdHLDfN_R; z4zI#DJG$dDfY-Y14Pi7I-Nj|%uGWoPt|NaIenoBqwG(-2EG|178ap5($Q;k)QR7)^ z261ZhLJo{j^NU#1o!>Vd%CkT8vBxRf+h8GWw{?3nDVwv&cLncdG$j zQ`ytE@Xn?~>zZ?PujpL!a%bGQQaf%+rz{5+#q|BoJGcHyU)(LXNwpD{o@=5^k2F>@ z3n|i2!Pf(s4Az$OrAb0Hcl=HqEs`3&k{Bw^Wz3FN#~XnN#^6gKs%%bKZS0 zAO}a(+99vGbpM>wW-){OO-RJtEo_L@#7q@K37+{iJsLCxn+Zm%6*l^U1@@EJV>O>aG*+ztzToJtOZ+YjDhTWhO#>@Y6X zyYXtUwqh_AVc{gfSQs1m#Q%tT6aB=c)Vl3pK z?id`!Kq^~puggffVK>|B&QD0 zCZdXsb@fZ{E}9<7AjT}E!$wfKq9eLq1Fa(uYpK4cRTOJgO_R`%d7J}fvj2O}%cIN* zwgNZ4WD(kQMgYTzZmdZduO<+MrjQ%>tUJiimr-QzzCd)%@uJ>&CvfBC6i%ts-;$BXfKeHE)N9k+fDl>@42r$tVliMdv&c{Kp$Vjl;$6&XW#}JV-Mr=&;8x$e~tQ1ybM$W9E)bF46dd*G!LlLz|78W+0XN4qy!hx+t+kiF!f~0e!U1$ zf+yqr2Uv98AXGmXF5_XJ;`ElJAx0eS5{q;L$WFkf@#Qrq0fmcILn1lo6f}=S{Z7K6A1kV5)N3N^on^)!P#nRi9qA za}%;U%dLrNaw;lOK{nf{qr`;kzxOX~{$0=kAOccqw~XbEHc;`~BSsDMG)tFHc>M2- z^nB?PZ=nWyS553XB3Hgxk*<1nRfOKDBY@G3sL?BzB(g*U)qD5FWAU?eNl~hAtudU@ zkErH%tg=RVsy-vF>rv@4Iu6e#%b9j&mpB`mNrq#G;&u98uLsxR=r9FN%j1*3?tf|u z51Qv&=CQSf?R-R%bN{w*(W+^$;em6a$tle`iYMh-oo14`@H1EC@S%9spQ9&Zz2*3I zH)S6)#gRrK!AT`Y)L~Ce8*11aGJ5sYiya%~L=;oJ$0{a<-nmDWSPgsT0#5nMb5WHj zlEv-1H`e++AqR~LGFl|bxE!C)iX&^i2bz;;BBY>n%xs`zfw?k(6A#l{-LnBthR0>}#<5Rq>b~W9)?}Q5U*Rst(Czw{zA6$7&vUH>wPHvxT#lrG=ka`OkTCLyXyO1AsY0 z4u7C*jb{Vld?9XtZs@U>=BCt+4y39d05)X0%VAw(Y-=0&pWwBO@L&TNIKeG4fOH5n zK4x&0QD@;ytACEzX9Ph)6CT1As43kN#Tf8X7z`R3@O~d&r)i#@Od(EV-Nz?#jgVow zG^_5syJtPnh54LcSboqdL@)}kBY>df{pa8mm=wJ+kEr@BZUACo*`|KpV*%yuSUKo? z_d$Gj9}_(xrRPABgSoa9*;W+0gC>4?ChkN)jD6~x<4vhn5KYXVQvy>m8CNsCxSdOb zjv1kbIH)dI7U;~_lWjv*8umNkbV6}wLvra3aK#!SnL1KthtVqRL>Sg2W`}e#n@oS6 zqZ&?gW#~dEdTaLa`Bc71KIuN;vds5qj9Bpdkv~HM-!I^O^xeRQ`f`1;{Jvmge)My2XE8p*8*>JQY=^F=l zCSl2y7(!Fkf=Md{)mX`lb4{v6!7l6+3;5EZmq&33X{Q1UQyB(r(H;As zL1F<#N!!;z=5<@iCULz%E?}YpRGWCF1;NAE8TQYPFGm_qKzgj?z+e?@9jz*InShCp zM^MZ`gaHd`-%Wo9n5&Ah*8spo>ePgkRO*1v=!`g>kY*|%6i}|)uzQ@%4%8`I+cAD1 z%uO4ict#?@(A4fQrB``PRf}fr3IGI!W#;kp-Rzy|B&#I^`^dsf(zyQg=QZ}~8`%HPnYi#lvNd?Gr27Wepo=QS7oOt&k8%+5 zh>g)A3;PH5-9-ayYntI-+dE}xYFsv7yHvFWD^1AzATYL?!ow1Tw`mi(=0THyfCbu9 zfEIkwkmXhtn(93bV~m)81;ST3Sg}Tl_Mr_d$*vUBWW^}`pjCV4bm8JbioHRBLoeI} ziRVl88)QES8Wde*hU1Xfy~Hc5`Z13t@$CCExf5`!%{*%?FncPk@E$$*nN4rF;2OA! zU#UZ!Zep6@?Th~aNNLSxbjwk-vDFFW9-$_BYu(B?B=FTr5Gbn#255i;r=ar!5eJH+ zh{Oui0J%Yvq03`I@HfYGAQ|Wu|L&nlYEd0u>`)x4VB-!lnCjU>7HVW}71b%T#Lplj zG|Lt%@s{a?CslX>LNH+)VonzaK6mDrwWzURe#`pV41%OH=z@g70)FJ zKqm8&7u0q)0MeAe0!$Z`pTt_^^-(t`NI53jvP0j?Q^pBb@CjjU{ZeFQ|82Q7V4l~&&*&12U^!rPIjG|GJIX)M*$t63=mf5 zC6YfmDmtt~X6epF_eeVG*jdiC^!GaPw5)sU#15U1oS7eI@c8(fLkQZd^QTwqL%{U# z;hr7PWmADBc=LLa?v?D`j~7GX>h}J6-TYr~qI33h?rGN&1YxyHl|u*5FfJ<#SKKxj zjn7DN!F_9wuvuDptPt}wnf+xe*{K$ZMF!hLh~HNli6wEa#S;eFiHQsoW2QLn?d>e0 zZ2EXZe-Fi2=)XaNV^I1<1ASo0n=C{Q7r&(OsOQvYKB_w>8q z<`yE+VN2?TAGELuRDjMJKq~ zEeukBNp*~l3M|oW3wwFCl4is&5YJtpD_wxXo1|!nw%=DhR%-UGzT8fzFingqYjAcR ze`a+>ax!5OcC>hrKof#>amQD54bY8JT`7}D3P0}4#+t#HyjU(zlm5X?y5dEl(HPvV zcD_i@kZ%n>EDpfl+Zib}4<@RjntK9L#1pfT+FUG($<%i3-A8N~PNiDSa4J$Qg48ye zoz>{>0%D_*o}pTY+B@%r_=1w`W2R4S1u(8uNdzCB8%L6b#X?x1@?z8|OYRq?Q!p$- z2{LFsiQpojzEva>$`5}=lpfTjmp|jhXGI|H-*QcnOFlnVHs5AVm~b^!JD z%)>PS291Nk?}CwB>obX0<{pum3+m){0uc&xY!|;qnz=s#le+Ol$-KD$sQuWJo%*xd z=G(2`pQ5XBvjDGboR`YX5U_ref>9V2RUZr=8#El^>=#jeK;g!-_@5E*0Jad0~x2naU5A(NA*z{UaFNvHw7h+el)Rbt|{ zB@Af0YuepwQdt2tC@nSFuIyW3U~t(VReQx0|3#n+ zES8yCMyJQm&CohdHftSfL>v^@)HYO;;9A&`?BH@g|A0`w*$G^YAxCSNVmEQ$h9R08 z3wirEbz#&2+9K}rtY=wjS!c$wspaL7+hCe!9+kQAYJ=Y{fNi%8D4IRn3r;auhiArB z2Nf8DL>oOTtW#$A5uBzT!8iq|2R=Q=P90%FP`ZZ7UsdWc1121$XJmcxO%($vBz2$h z%9gVW8c}Anb=@r|CdelG{Lf-QX!WnVLrCuXpsxG-`@5a=QXx%_uyC-r4U0~gN$1Y_ zT4EFi(F_dWq_hR5FPV4x>N;v#tt|T3+Ff=Dd?*447~RQLpbY;!lO;YJAXSQa(4uNO zSTyM;F7j9JC)2boy}7Tz-Lb*UC) zt9-n{bYi#Xn{xN7F`uFJ)`V|E*>1xDBq|mlU>Ww5k0Z*Q9-o(+tLv^NbYzdCo zD2|~;SmYJLYr(<-D{hpss4|r9xXQ7;*^|Rfs7Zw=GLhGLBarMzy}p|g2mwqSLCrEv zQNW`1{a-{TTwp0%H&z=9E3wvmTLT?a)QAwv^in`w+ zK{P`&&ar1hY(&`|6Y6VbgU9`aMGCA!s0k=}&z?yQ6}|gHF`59*%z|D#0ru%^?g}k% zeAoKdDfG(BwvmpNGiyFKAy;-#MGwV|TAMpm62x*Wqoy)2=UPOlF+)EG?07?;xtsU8 zf=@;AWD@)2o~myS&cJnU#@20p1UP4oc(a_@V;YX-IF(2 z4W-6~6Mea?ovjg1-$2ue8u}lt08UI>Xx5Q8prMD|Ao!$?V>WTSctEsy-ADE_Jn~XR zu3ok7CchhVc%P_%T?_%$>F;c$(+%{~1KhS~cEZ^+b=jIryoq-xtCL%SZI#mWm|tP< zAvbXd?*|@)cbJ%S^*o}sn&Zt1KN)Fiisj#$KXNOxnH+8xVwR&h?$F3dgB4ya?SJ`d zJelv-bC%`{y}pu9mu@B$`LpGXyf)xJ^M?R(hx+@&1Ak(f0TUUiPqz>+JFwiX!8pV2 z!*V!$D5HN60%9%$cf|Jq3|`%2lH|kak0B+s+_D4zxl`?eB2XD`MS$(ro*^DtQBMzcQTFLY^tJR1!L+8ZRt<2hq!=k zYT)UbnZj{IFt;HDi=&>jI1cx&4!x8dajS3TmiQYuV!iuQYVY>AazDJ7h%8HO^ju#k zs?OL%P;-4j47dS5K;b-Hg7`%4?N`}z7lUwqNu`uPaD^YE^UDUX8x z>;n1NPI03|+#Wuo{ddR$);eWb|D(ED+3n)$`dR}e!NL<4O; za@E#U`KpX(i*B+u70$j`Cik$LeBAN*{rmZ!ZR!7HVPsi0c2Ry=uJFGm-+w@_|2N6> zn=7|-HnVrK{ZAa`T0`4@lMUtngv&kXoeHetl#R6+cuOKTF~>_Kx~~U*(}+bjMJW}D zD#-S0{=1wdBvZfPv@<&aX^=S4ypOk8@`)b3x=qgXH?8~QNd)?fWNI@CPx=bU5~&t> zGEEeOtdS5hNc0@6vS7RN&CJ?%GA%SJBa$RWVh2i(VWt@)B@;&F;TnvNESYNCB#AY0 zo}pm7Qzs|B-d=3v`pHSzq7oL8lPFQKD%M2EE4wX>5scRx!};9=$t>u!#%YT|8GiBHZ(wSRw}Vis%_A`P$i?*t)T$w~^QD^||z=x8>KXjosbeY(0akZMkVA z82j%6i#+CIOi|`o-t&AnZV)sm61=)PM@lm&(9#+cUhPbVDd4I~C70NTFU>i-mzxE7 zVOqyLV!;+NV^UU>kzkA<$f#WtBi?P;!DpbpoZ}7+t2kQVYJ#!GC(-el*NA{EoHHBs8ctj@rwZR-y69h$b0aP)e5!AS( z8!N|!2~vzr)P{Z~leuC5*R&}g=B%;>{KbM+GxlW;yx~AupE%1iC%(c-eygPlWoOGj zAQ)N1Rns{%ehAtcNmqLYPI~$mPXLNs=6J4X<1bYwHa5lgb8cI9uE0=8*4HFK3o~t{ z4VVD2qZUqCI?obhFOd)TG=j}~G^w$rH!ASxz8{@Ylt-0yYNk5L36X3XOibr0ih_Iv zk;UU4dT-7u-dvnKf5|`IA0HomZLhg6-vzuBgg3}s^JBo;BcIwh+_Dem;SL?w?@~B6 zZJam4fR_=O<-u-;MmlRKrNPC2c&oq^$*T424NmsPs2wU@B6n+H`hztzj^R)}{1B{g zbPmYV|2yqD%jAqu~qJ-gQV`{4|}8QVG|pYu1Er4Zc@P%DtHbVhSoi9AbwS zboz!M{gw~~#D*q20``amKuwcS6Tt1CWV}n(Bn&>LIe-l3tj7u;U%}IA+{V^&2PsTb zYzLrPmEhQvb0>2@1Rf65(ekyS;jo`&+v3(e6c(Bo1 z#c0mDosxr}*wY!LZa0H9%eGC{k;KuM8>qOwW#H0ipIDPUn5j}XvOh~s_S@a%nMvh- zsih1y%uZ|~*~i@CQ;Vs(i{CN6P4NI$d6cw|MX!_dVLJOc2gX+VO)oD%STuc}rKibn z(Cq&4FuF;wvM&{c+eT}U6-n&aque`S6zMa#da7;Eop2@XI9-t{5+}#>sskIiKdY~4 z3``vyLJJI4VN`xZ!K(v&|2Td9SL$?rv28UFJ7c&0e5`}(4!1G+E_eS5WOuK$j(4ja zwQZP*8g$wxfAz$Sw22>KHS^p?Rf^Pm4b*-`a6s#m#~ngw(S~7B1+@})<5q1eskR@p zpJwl&vb&oPK?5HTkKr`>fC?*a`=>JPWGW?WYzUAOD8PSlEoVb%87jD(g%32|iWeQE zedGC-ADp4>S@JgaE0M!kD}Ts3G3eb}16CX9z{@+M_zD>l2p*4wCi0tsYNXFPmsiWe zmzJ}@g@XdLkDp@P@>vXJ@=*>a9}n&j>la(5H`a~*Nzi3&@z_)~ip26ln z)^n~8`%eEqUF1J`*x$wh0Q8^7?}hby|KBcRV(4P%Z2JGo#RC6V7V|%8ovm_u(8csOR%>GF@!N?~HEitO_13y3GG|GV*J`R#9_NjZO^;2RJxl6<>+W2bC3`?-{{AWzNlYj=D5-i~>JD;u>fl-6R42h2!tbY7p32s;dU2|Vr|oc`ULbf6V_8+ zW0Peh_d`2PNX>sucr05OaMx>}=QJ1HA%jfzJJtq1C`c5N4o{OGMH!t@&V=H$Eb ztmisZd2F8j*&wyl2s6Fj+wEoT|0Jw{_v;Fq{`KBS*f zHX?8-Rh+{2m#CYa!{D)k7B6Xj6hcorLk(e5x^Gx)Q)_A&A(1s;|492fPVya+kutYp z5Rcsr!ENOF1=uFWk{;B4)LS=4#ou3kzNA5Hp#VGD3>KdPHPIi6R#= zznV?hkquu3Iz39YIoIK+R(Jwn#~$|$5cm}b{%iq4d;-JEuK{z$YMcK9iA45G@yV%s zv?-sx7ee^%)_Zapa_*FWC7UFaT`^0BG$t{jxhXurKBjPhk(7dfi8L#S@aE_v3eX~e z=&Z(rYQk2OHWwi@zD6)On36(y0hs_PMc3m4go88%cL5^-VA=fPWI>eSBF6*R0kGu# zKt(k*Sgk|uFhNffX{K2+5H46A+)1A)b`$@nySzpEnO##Wy(0<+O-SBCELLG07Xsth zvlOJ)YK>_eu#$I}Hj)}naUb?DbAg*rN;s6Dz!&MA$_enO;Z_`muqcoZ zS=L7m8s8DeuqY_PAV6=2kxPN$yzd8_Le?<&acVYdpliVDA8mLh;iLTbcpt5**vgCM zr1?LJDBs!2Lz0MiMbtGxU7j5FUyvVDkcR;d>uU^$^X}2C_INCah~N-P1}PGp=|FcC zDay*jgQg=|j_obd6#pfA{&sdB>E(})TQ0d|iK*S;Y_ua}XY?NaG7{$70c7{#36dfL z4Gx?)B8FKv^I%8ukpFp#xMkZ(zl;Xlfb@H;+TdC(-rW(B6e&*mk_(&jYEH~mC`j^8 zMu^5P2}v4#AiR*PlZ%{#OgXtP4Qv#>L{NIm#&l!HT9BCaxgtImbNq>LC+^YT^zo(z zg9uWwS)iDgf!pZ1a*Fh!zGCC^Bq5_&5}c$08-PPc^vD53k$ra?1Wf&ZjdEn&iuzbD z5lBeL14mtPaCM3z8lGFmz@06Ba5O`0u;BYm67JJt_(m~YJpRGZOk%=YQw}3dS%w$JE*;Iy8EG7wwjR|-&|n_7kQZ6kfK%xS{&hGmeXFl z_TMoJc!4JBBE6{v_5hvoS*9UI&HZr5TQHV19u4|=+;+nxdZ4<<6>w*;+!VD&7w$(| znPn>%TEcAd8N=<8*97zMce)}DWrA`jwSgdbhF*>Vk_KyE7HHI4TO4y$GDP^ScXW#d z_4ABb9&ncLdFnzJJ!k8T3Wu8QgbTwbzHZ&XLCe0PC>X7Q^vNcf&~b$a>FI>pzXrxl z+rc#6yrG?)J49Z!>U(OYlk}0r2w3jvBe1tX1^_tg5W1m~*BC07BrTT~vo6h^=(_@D z?le%F(WY;Y&*+C-|pg2+mh zIl2d^@rBmT%08fM)@+|~t>=3U6L4zthkMxHUr%FGON(D)A&i*jZ;BkJiciUYZ#kc- z4*e{k+8TcY7Uo;k;?`Q{S^78KQbt>8`}+QJwP`4HZkBq4jo5r-q^OI03M=I$`G0qc z^-LvQf^KF14Suw!rcn%zDVP(2Ce~L*74JOWS<`JoyA*o5WzuPS#EKE6DiRf_eAqww zlxmryJ=#0{oI(rvXCkNC^fo8;(_AJxXxOfJ@1J{|`hk?J{?DkYj#Zn(PRjJJW-#BI z-}zi1y+Rqe(IDbYK=hS(g16o-#y^@ypx)Cqvf2?ojH;a7V1=bKW7hcM+I9W^k(vKl zN_q@}pxpfZCWLoy4<+9;xi^NSK@M86OZPRQRl6ddV7Q4Wq8dZO@b_I`F90Yq8yT%(_A_Zk+& z5=3v2p@TY8vb5?srcN{q7#Wj+=dAQB%!LP6rkt8~t>BbcDMaNof`#@2O6VvIR$?Jg zljy5gkCV|j=|mv3H$(20j=$axA5EY_1A0)>qQEF>nOK#20;eQ6vlwC0bwAfkwH6hy z5`KE;`d_KC3UEcr=%DDqPH%TlSJ$tPmyhG2@~8CZkEiXC3wVtLzZj#jR20WRGMF&y z>sSM@HYEd=Tn!4Z#X-Z3OZGoBCCYh2U0~zx)%h~y0{Y>I{LG{fgjXTY(BEmW5F`gN zBut{S(PD;E5;M~)Qk$-6?&R;td(dUEhS+1cnpTl8p*B!vvrJ|gV$N)5!%x;AN`R8u z&EMOoP{7<}9??(xAp!7?^BtI)Nae{e&JvoMwcuOdxFj0d=E(D6GX?xQ$_(C-`>2zn zBu90Gcyp)BYlRyUv5+<;G?H#3QnHzdPd+VytKQcq;hl{HjV;}~>|=CYQ}u0K9En&*+B^GMuGkE>{qa+k=9cjhe~pty_NmD%fSjBQF+Mq&{hprEP!xJYU^5MWgY|Fxp7xj|+__?$Pr_@~xVU z$jrTX>#E76p^C@xF6w6km1s6R$P)uJ_x-%MAF3&1FB> z^5GAyF|T-w;LHz~T)MyPo431=I6)e60j53ho=9D+eDCpqo7kNUp{qT1@Y`e zceLyqXg2G;2jlU(_Pk-M|8P8Ahz2^1)@*vVI4Bl`P0OyW?iBrq2SbH=%)eU#KAK#4 z?XFN}mp}#gs;^(+wN$e``4_?#i=r)=ILe3^x6{e%&|mq;H9UjsbYJA?^#1)DX2D{8 z$838vkHOq*N?>t^rGkpivTqB_mEVOYa_j(fK)1mOA#F`oSPn`~_+)(8v(U3| zK^p_$7Z!9|k+@p^bns{ZiCi;iXQzE}01L}W4Ha*aq=RYqp^Gt00$b#N^5gc7snlh0 z$N<12MK`KT%9Hhm8uY3tjVLl^a*@kLBuoC^&Eh{NND}@PonR0E0C8vl0PO!=6r2p5 zT}+)Uy-fd86Vznwe*wq;sRFj$js3M&ZXl)u-gs zSN-*YiXH?Zw4|U*=Y6b8`2uPbxH=lLod4k>S}ssj(lYcY%V6_CS#@kiy0OYJJq4pq`)Bs2#D_qbQsA;{*OA94>ls zl5bXC!>Px(Y~D@HEnPz+rY~;}!>Ki$0xwmH1U=a(`uEi(Sk`gvqD`TAny+cwU1h{= z_b|W~*wja65cQ%rentGIzf-dsy)%p&#@Fll9q`BHzJtCYwZ8K^Xr0(#RjA}$@OIjO zZy16RQ69}98di`nk(4!De3I;m&>|u}SWXb`5-{vvXF(H_hMKRQ*U|BEKQx}G?ob%K z=KkMW3}|ZR_C3@~E9(fjOfVLPKM2NJKU23OR-QIL-?|38r~n7CDNl%#+j+#ovSK|u zl>34BxvAo`G^`X>EG7z2j}BL>s2mR0Q8*@bbvD+k^2%$~NU-`_1d%YwWpR(EEqQZW z^W=0z_22aWGK>+y^afriU3V?Nw=Uo&B&61g`vZ|vEU#q$vaWr3>)^hP2Cz@W{k zk-6`Er7edGKq7_|Qq{Xn8Qbik7#X`>m`NvrWc3a_T2h%-H+Cj$*6#ly-8_P5lRsD1 zQ$BZ+ap^jAoY6MV3puicFupuGV!2Gehm;sUEUt%D&P+vdc>k+SxPe68F_C+T4BBMM zI(JUc9mqcyJQIMpLqV|E>&J5M4~_bwLA<`?Vu(~mlyeo^y@fN(Q0^9)TwXP6Yqlnj zC$B~+k}W1WSD~D5B11`nc9u`!#3MIX52Z8G+qENr)AN4VJ+0Cey@xsr?E!0_Od<$H zf|c-AN2`uTw~dbnY5*pL{=K-TC5`)ALJrKASYlwJO8ayb8Yr|@?ur62CbC+?U&x9N$AvGjqvHp97U*M4>$)zzXekUbaFxzdQg1ZyAbzjSnPY%@p8 z#L{%HC2+$D^IK6YoHFeG)bE4aA9qe@WMjjVWH~UM^eLcnnteu8Mq6{G;O|O|3x{0VTaM=)(9yPdy2tAP{FCkO0X zuP(8GKp&R<>;x8^PX+lx}<_84jrttYi|(R|@b zpNVBr=P@Kww<}3mLcF3sWtK5|YUr^#5O&zvTfI(^st`<|VsGuoE(KIatVqj-P2A{x z>!z#O;hKJmK9uzsW|tuja;h$~D8#*}_7A&iDaooTLrzuQIhWnER&-}z^4^4g@SoTW z)AmJ~-%^Udhz^hjTW>Fv*6JHe4209E)NRGVi1xyuO3s=gfIM3c_cnvC;b_@93R{Q4 zt_Ci=$SMA=zf{8Iz2XGb0SzqqrNQ<+Vxa!=V%VR^MJH2mfgGHLHnw-SV2}Qn0K-5jtoJP=ZcSoou_YWaU^}GLxO}(F7J&TnBT%8M#O_IH(J$i-@gPCPj zr`hoztyHbN{e(19PXv;@+8TY{&Ruvp)KzjqKKOAg0iZNt%Q}BO2%AiY8B*B+31@!+ z>IM}=?6OPG^WWqe^rH2>p-m~85H=4Ez;P%@9nd}TBzqYu4(=dHCPmVZDA@^gg+57m zv5y6-{OW}xBc9AYDPVVeoN7sWIG7chh3IBWDFy+i7jK4xx|ruIb>A=1f7=Hx5{q|i zp+DUlpmB6uYj!XM)JOx05Y~ls0Zpcgb=B(rkx3}=_=owmVES>z+2USMA&8Ml{S&0t z!|Z9s2L`g`?j4Z;>#0{S{~A2%FjyzJq7}%}JAID5#^^-@d&5aMRd}x&DrY25Fhr^D z661?ZFB2ZjXWIq?z+@VNinzjC$oNL?>WN-rToEv+OyQDX2&b-%6V?P6$u27bR&CY@g!ua><+Jzm1ihK z-jU*}orLYV(ZOgqlgu^(JmGgh620dA8Wut*YZr(_YIHZ?s~KqVgAc7Cc0eONy-!xe zYdLi?fq-#gN(>w%Dt!Uk*|@8K+dI5Cotb7B%IQEJ6mFhnCakHe*9}Lz;Yww!6*u9H#QE` zfu5kuv{rL1+7(!XY)9``S8TA4L)RXgO<(J!P={wn|70zR5I6}L!#)0CA;m76Wq2x# zeUyN=+*td~OH;(tEF>?&g-{va7{>@r5Cl#>!6ZY+-_daAWg39OQ%67AMvtzYB60koD5*pex1$_OgU9BufD#A>Z??e_whr@kcD*(;s|Aq)j~X>#Kx7yp zY(VJv!X0e&^(@xQ8(IBn52OHi9wB^v!NLn|BNa^aHNK8Q}t< z!NGpShp?s8Fc##R)y?c*-w90T$KXavG~^xVA$41bM2iyk!mUvwno?N{p3YujS*SEa z=tbmZa}67yN|%}SZpu4EnAUMv^cO@6mE0JVN7gYl+ly1@eZ$$XR2wRSxc%=P5e0Iu zadd)Mb);dvTCSGuSw6kN7i%k3o3O2MRcoQaNP1xzW_HX8-tWq0G+J?YF z7hDwfdj>y*Cp>8nEa1&ZK?N|BeRgJIPr8ql2cco@r`8van*s|9W^KqxOD~DfRG1mhQcP~Xa9LB~9kCRq?8x0%RruL@( z*2q<9CMM6OuS9`qCOmtatwAQ|5xLfyn?{Rn7c~v(#yZ~gnt#aOfPNXP zLZ4yTx8{va4r_@UH6TMX@Qsp3DZQSp&?dyD`LPOi7+3wo1@idkh94P z2JwZ9kP}~q?Yv0efCV?h37N8xm)U4iRphrK+!l#p+ibL_*0N7CS!f7$I-xbHRw; zy`@;r5f$FobWyqv9(x9M6wD}JV0col|GNL^KI664Sqbt2>h*Erk^s!H@5k*e_k94& zBY{>}OFwPNhPVD@mx9=oj@5^ShHUmKa07#_UjSFbh(XgXP6>UV0WY#inAF=nSi>+a zAZ*1a9S!X9yLdh0{~uH9KN`@Q4dHbIQSb*ZMT~3k!$kZbL(@eJLt0BXGYIeJ0vu9Z8Y>zjZ}|$ z?h{Mo^7%TqQ*o{{mD>)jmaRDht5sWforl_PRc+m-UwKXWdb-uCH5Plk)Ydp2??+G7 zr|R}Q`i%XP$I>fovU9DbQ|!J=eD!4kZC;tZ$2co+31<%uHXC`z*Ebn$xCPW3CjcQT zsMECBoyJH9oHQGcZU7v76^XQ`;-7p3AM>nFcgG1Y-kg=34{YE;e}@}S!*OpRt9_QK zpx(ocHE&UlqMeq;u3Xm#`c})0M~t7-u_~h1H;I)p)q%sxu=()t<2Hp3xR;O9px>ef z_D2A|v;2YXZ@l$Z+8G~geYpUtJ;yGJMV7bKcCeCyPWpIE(G%y1M0CJ@ehW zbR|pU=S_~Jy4L2D>M?EAE9$u!MwryjYK-zeREvhW4t|gWj_Z}L= z9T;cdZhW@q3#`*8XgiKhPhvp)lp!%Cf5Tv5P~#EEo| z(Bi}fJxz_X{s}ISSoE5BND954L&FhJAz23$&JdgVXp{W@*nXL@(h#t_U8W*>Vvm6~ zR&GiId9P!IEmhr{g%1l0h98%s#6jgTW{*_8Hu4+75C;k_F$i2In+tKK3WUVtT_Np$ z)g@QUdhpm|K^u5n&I*N}1hC~&?iBSg-@Mtk5%*bu`4(F^Opd9%^1eP7G59xMu)=ED zpFNm=vZ+`Xk@4ZECcMohDA`a*>#fZb)mAprN8D&cU`21k8wqNR!sXsJ*{o};ot9Jy z+f&^_@bV~y3jt6oo87jL;M%E#7Y_)I;xwUuF15uICrnFiBK_4*@>W0Xw4VO)AzQic zn6r%hMe30;S@^^0#+D?U@TcXBFv^B7b-c<<{La%N!Jq&{|7_z9gbEz+I~<$5SgkF* zKGF5l82y01tj`$SAvt`>5Reml(uJ=$7Zbn*!&AK7;5eRq|1SjfacGCcODFMefu;?r z5?G$;>TNP11S|Oldx!bi2_UXnv`8AQw38G*@$-;C5iMkY>XCSTHSIv8ASE8gk`M*K zgRowGFi&|hR!m|hQ9Vj%Wh)R;k@y0|@`@+ELhZ3IC4pE-hN8xNqfydsdOb^InU#pi zH3MW?Gr?{`cAtkN=cCFru0$%MdUBNzITmo~N=^b|J)#e+p9aSPcmj`cg&@WugSb6m z-z~)cUF)#SvE7{ZtVi!U_MV(i;>Ru`ZM%UtL9J%*Xzm;-?Vg2P09evG5{zrIazJdP z!?g2i!n|T+<0ugD&`)Ov{_ta*+geJ3KYal0h(x?G8}vV@$#ATPS*>|$3u7YL<_O1j z5O7FI-=)+bn-Y{12an=sqki_Qm@TtP;zh9G8;h^`lfc>Bi@0KLLBr?F^CqBM`B$$3=>W%nIj+(%{ zT}4@X-m%<%&|-|10(wh?Rg+Q5_5D203I;c>h8S!2%HWdVWi4qsa4IxK?>QSdgN<83 zX!+|kAH)r#Bn}>&LhC-UjVv3SrU|JMizw4H%!bORR82l?(c|}r4XFvFVW{^Tz{$Jo zPv{%c&>fGP)!u0*aj*s?h{4pbSXT@-&4f#{;^3jDxl3{?K5N2Rz?5a3Nr>%yh7dXg z$YZd8e33=h*I-CC%DYrI;}fc0CMT2~DTu)4q17~Blof$TH6bttdVX?VylK)>(3#4p zEC%qQ69F^sS9!tr(8w1e$^gdgG0^(t2*YT)UPDyyqEiCqj8O+!sw7a#V1nER1L-)( zxft^ht*{?`o;y5@Y$DkwsVGNd(udJSntUQSNQiW_`;uTfzxDSghEaw-(9@hSApuE& zE0?$g$O&vG?tP&AIGb@HRwBTISXkf^k=LEYQ#6+x(lYqtAQo*#Z-k0v1!~SHhVd3c z5OB4R+Djnq?Ff?a!V%UrJJMmIn;f%h!i_846HRqS^D0xHP&Uig=QplxsPa}bFAzhO z&nZN9W31$3 zf*(+<^C;!D;0Pugs)(lfc-|#9WzD|Z=PpUW6?`Uo08PauRyXzH#uiFkK2e~fHI>!i z7X=>LU?5dAsbmz=ron|oF?2nq>3k)bNJdLIaI(3uS5}{Z5#u&dAhL$OGuQS{EAKn( zE31vIeQs=be7Aql32d>wr*a|T!nnP4em7LAhrJ4WD`3-B(%#&@<;=Fjc#vgu3HIAA z54l4cO``rEy3VOR6kyA?v2EM7ZQHhO+qP}&c*nMr9ox38n{!|KqWcfjV|}Y;jX72m za+O>McZB4wxIGX%BrbVD#$e-4bfw6(>x+^C9;Y0H2=)B_e1wkf4vwM0F$m|C>K{fXG1JH% zgrXB)>W7~OTMcR8RUZ^eT$Os@^{e<|yxiX+XcF7%zB@ZY0iQ)BA#D#-C=2v6Z5sqy z8~K@S0!XXEj^tbd=UN|ssD^!t=D2sVa8L{I@8I8(Ihb0s$ zIKSjLQ|=94(XHb`EMRmwP|y}*x$L5(8ufIme&U+jyZc2xCXo=UQDKdk9r-N)UtW^% z+9*={;>BmyPpln;2Nm<`ALgf34|H3UvZySM&6v3LGV?@F)+~kN7dR=LDfWbF9O(lhP_Plr{ZRb7WDJ)Ixm{yh+S^;Xl!BsfuGvY^LH?j41nLI8-|Dlk1wfNXX zBtly7?Gq2ItFl^gE{4Jn*zwtoecV~XNx0PVXrAlR$tY!2OmGASe1NHm*^`tIEX7(8 zMz;CDwP3wcVsRdxjLFsbiPH0K&JVOU>hRbM(Fp9?C_Fqx+4freu|-G$wW0gM3jV zC!9vYIe!N8s+cAn3DgU@A?(xkCR^?6cDlNI-ajVQ`M;00mUim$zuulyQ+yuEtJ&%8 z=Fv@Onh#uz6OgOEy{9}^R@T+}(yI~`Z3K_7tZMdlI_>_C-)tt00xt~%K zPq$K>*^IwiQlJ|La56r!% zJb475Hf3OM7BS~?PBr#fG7r@v8qo+mGA%F@xu=q&dd^JGTo50Wknn34i1a2@kN8Aw z7IiJpc_WaJL!kKxr%ksPgaCOmjEG@GjP@cJ3qqD}%=&54kN8D(UQMAVyyyz@_%*CF zPGyl*_bwr6jU=rd&oOivaKwWPi0>J42G5Wf_$A!}=Vv!)cexzEk+IgJ^F%i;B_0a3 zih&+pW_0{=t;d3&U9|I_(bn&KH&e_m)<&Lq`20|m>(H7VhPTg6=0O)Pk7$^hA5fUg zj{UtT->{Mnp>-^y7mI2=Y#%#`WK2+dC?xqc zF;$8CD$0oXuN%YTh<<+(?P^*B2MHyC3F_ca~ zVgS4dTJy0`xP0kB6JY?3iaa^CU&{sKgU6ti@ycg7f2U`rTVF^g6 zjM?N(V-DmQgLJ@im*iJ)Q#pFVyN&}O0m@NRqLf9PSfGsm@T78h1F`K7$g%a*tbhe9 z1%eEMzJOIkEYRAz{coSixqa0)aZ9!751`I`5CtyHLI-Q{DtU<2tp0ozjS5R&p1&T- zcxyC@-bA zJ{VpEv}nh*GyDNEF6H7)u1JTD&Q(p>O+1`?0$X<&;ix(arA?wF!J}3k!lv&|mKjua ze(byh#pkMJbO-`4MTgJk8#svr|LG+f8O=Y`UKmYIfk(;W@I9`?yNY9w&Kw(K-9Sdj zdV&OSvZegG2)ymYo{-4}11za@<}y2*@KSe78o|u3<>^4y94ZX48PB|j;jLvbLw82R zBhh`%)tP{*4^QdjwP$6 z72_$Fx0PvHlBh$J+8%mfbN5M2>x-ENHy8Dvq3!ElO^5Rw>rCFP#4D*o_dqM}YwbH4 zwMp(*y0PC6!-F*%Nrb~^HIfiQ$6KKWpg;DS2L*%ohCik@E)zHE*$`I4CSkQ>%DR$< z-(8Ha7eb!NrAyINVSm(&$5tQ1h3R-8x1%>Z^cPVw$g1hgtiuz_5G z+G}*fK_Y0|#A~~VY9zJ3f{On2T4ed&afWKO)^Ya6YpX<_weA7Bm&PwZ7R}l8rnZIG z0)Kr}QO)y{f;}ekRBT5m4b;GF0l@#Qbd#=Uly>3!Ow^R}VNAz-@DKq#%@@-nJq@&F zLT=CzziebY%POp_P$AHP46&284Wk%U(MMzI4(lD?0fXif3x${Q>Myvrkw-TX zZtDPC_CihhVxM57Cs*Ph>8LIIp($(>?n$jUEH5Ch4b$>$`8s1Dd#=|EibjVrM^9u=U-y>AeLVh z*OTcKbWe#CW7?|i!}HNoZEKZwl|Uxj`K0X7+@h)5?ozn1QxlYtNi(apa4!X4JkbA! zaHr#gIl(cu3knRY>r8F`q%zQtPUU&azE9`ICL6i}8i5YgqPyk>WC@!!%AKTHb9C9s)FWuJ;ap0+H(pC_AN#lG@I6P^H@isu3gXYfR)i*mROCpLY6VXthQHiGI2x_b6lT?dI^<-ZOsitV78;sPp8YyPfuug_=K_ig zm4v%nuUvPWX=&+U2yH>21}n3U8ZhppRveUi=p!tG@DC*WS-MuubiZ63v~fTkkhZ~usDRaU@Qt`F zpR&09Anup+xpPO=33cZK*NX8)s&;F_UYIZQCXhn*Vczf2(Kmj&kP@e0lfE`(C6reJ zd!#Zjp}q(I|4Q8cgOSm%dC9>4t_ES{{%`TZ|NZOw%Z&U2Bg5GKbsKEgKR{n_9bLd? zy(ev+TU>txo3`w%!+~twEp{A$lNO(xQ-;(>n zl87pmFk!`B?|c-l?xdcXX*1H_tx4+*U*c=()I>Xx13mM|3!$A*qt&fgrj4MY?|pf{ z-00dW_lr1EAkQQj4P*M4Sq?b&t6wJ6d5l-ln$zd&FHPI%3UEt1U$*(*3_sxK^>{s` z0IJnuX=dKFlx$9do5eV;x&pigsrJE|QAbm@0i9f-q_ht;C< zy3X*~v~T^|wO6{UM^;7YXiZn;^y{Q+y;^zv<^_Y7IByO?qXA<@K($1Y(n4SdfzT_V zj%#!5dpQ!TXn5^InX)cVSZJs+_-K@9LwKrR>EcRh=W8qtp@dRn@fS|_p@c*IgI|^@ zUw$xIYNJzJTOgL^*c0e_zX{B-v+{57g-3Tl;jJj1E)wn1LKEy9t!6<l#>^;fwN=>dX|L#AwFpkwcREoQe&9qCVqqe00NC@)JZ+U>qoQZWA$ zRmaS?eUD)jeaE$h6X1S0P~qHpePRKI{K}^t8ZV+w=1gUGC@c zy|tjuPlrdB_AOy443tH(ioJ>2=O?Nt(n&HARk94$E-Q(sz?fiK7u9uVRH2MWfeaJH z)8FLU_w0yiKjE=Mp>n|NSYSsC$YWCo==?DUje7LE?N~U1#n}0r`VGzmG?qv5fJUmt zk+Q2szQ;DYrv|s{??0a@KXRB3YI>*NgBAL}l@YTxBQs`&~ywSv}Sf#(n0T4&9)e`(G}rFv!qr0oA{ z1SE%DU1uB6_C;Qc`gQ$$dbqoEeOyJ3qdzDEOezCxQv&>;0N{$r0h;>wB7xBm1$}be zE3S%$^)vw?JB}%HHTD@6YA9aP4+XyLy?wp1_~YI1ac}ebe?6{2rp@$A<%Vg|*zG&@ z$|71d^r34#wp2YEz3&3rq;rJiCU>3S8`=vPM6=xse(p}z;rYKkmEd253wLp`K7oeHN1 zlDT)#xIs4fk`G8_!`|z~1Cc+*?#G7(5|@tqr(e`j$=fmTuv~{htga8CaLLV8=EDlG za}ykLdqPmrB&aYy1+QyweCOE59*j{E!+&S-4}fWH5cIY%5T8un@+y3A$A^gLEnx33 zy;I&mmOP#I<*DU+ZZS?Nt{EeXVgBnv^%BwATz#)@1VSqxiJhd? zMJK}in=TM}ssh@od8*{mgoM*@=DxLIA)i)_T>7Cm{p<&U#Z-oQ93>*P&9bGImNAEU zr^f(USBV{+t>Gy(z6mD}To=Dt7KUzl&k!wL&z>aRt;)c%#L_`wh%OwFf)(N2Pk))+ zRat{@I5rt9I-Rh67{W1(0O)6j^2{2M?2}H9ja)(HrPhbm=#5Z~JaK42Om9WbE+ok! zZZkmto{tbmLR>}M`utJpz5P5>IO`_h7zt`J(g@8MjM!RnsyD3Uo5*4B5U78a{L5j~D`8PV#1B zT&TD{CLQtfom8qa2IuwRedn&jO;okjyi^f|2l<&gA$OXH>e8{SDyqPqii)ukhAZwg z)KM=Lw~Rih_2lbRVKh}0RI)SnYkb?cWeDO+Ag~}Cea1T_!-5DV6(9c9MZ!WQDlJuO zERo!6HbPQPD&u0s(;-AbC<@X!kit{NoWN?Aa{(injGa~aX?Ik-5eZ*oD|dvz*?{C> z;v+#FsTe+9CIxMzKqS`1}FgOLpgl%g?@r0}Mg;UY1yN&SnbOF>}4=zK)r1#Pmh zEz|1E8%P~PZ?52y5*kS64{mO zc_RxV=oU)#E0ukRvhc|Gd>ZYXzNTH`*AA;6If>cVaN6RS(`YRAt|<429oRSj!MBOkmYv0wLVA8@15jH8Ap)8o98 zNC=?vT<4^(JDJmmcvZOKs?r|CiGSM8r+a9&txkW0F;!f?V7d>J#k11kR%C8}RRRfBoTIBL; zB*Ma*#yK?E*Lx>y({$0YKNvaMAIqeOI* zuQSz}SK(fTxcKgv^UfU(fag~nfePxnLpM2Wpz5oayCQu(&Z&G^39lA7HY&VRr>&9b zm(nOK)Bg%I%n`#zb1i61Zb%SsG+=t>F$q`h8ax*uf~HAk_87+DJd$b9E)&QNP>l$V zq^O?+g=;f|1F`&Qv_tmWE*Y|bVxkvb<#eJek;#N|b<%D`7{P!FVi1>iM6+ zA3DT@XSzh$RXylZ)N~ni3C^G6EFu4`=`!3Cd7p`niDEsRZy;&2eAxAAmIDbyUd>b$ z`J$J!yy@WrZ_w2t6Io~CS*Wm;qzQ|-YnjSHS~?u5>%N5E)*I=qD9Jz{l6dU?Qy{>J z$n409VK76Ws{4&>d6VUrhl@jU`Ob8n=zSOZINuJk=3l@s9b@^BT7(m|Ha$Em$Uo+Q zTp9KVYP|nUj?Faw1Ij(JrY2d4fO@P7EX9yh)EM_HB^eQ=l2x>M35qJlneg^2-&Ww2 zXo+REgfDENKWj_0MmYM2tJ;sXDJ^A)uPii4ON}j7J*dbXhfQl-vM<$U z7*;u^HpXHLXVmFpb#K!K&hrHVRq`B^mR$oa6e?e2@c^c8RRwdC-CnBwj z-$S%!ho6}Bpd~*qVdp~}ONiLsh}s#BRe60tPTRLds?J)63qO=TeF5rOx?YEY>gT@} zEVnjqSAJj+!!NPMt5R<0{$5;?a(plv@8yKmF&#tz9YNC>z>6wGq>gK1dZ@ZiuDdSp z>8!0Q{sw5n(2OqU2f!uY!05#8-twPxr>b#Xjb9O z=u0N5ch_ZQPnAr|QU|zv5KuYIr~oZcIltYkl?6I=rM&Vmv4YPq^uO=$5G2Uk80R`V&m; zrAypr=jMpLiw83}De-qOn#PK%(O9WOOI^c7oVnCUX*W zP~zLBL(Xa}wS*R`43hJ}M11?}4nrpd204#V@T6}Ry?nwCSWj_e2I7dV4Q%=3v=e3F)2y5 z>ETdT7~Y!Ka|2DC-5IUrtp!B269=ZoK$K=w4jny|8$oQ5&NaMkD)}rm0#_Xr6)CT| z14R-md=U+nI}fS)%FOK25on+|)6xXgC_cv50JdKJp=amBSWU%hoX*}nR9fcH)A!eJJTAv8d67Z{-w>DX3gFYVO{=`qKlUB zWJ^-DGm(W+`c7tK&{Y;D9$mEDiRphWF{WI|dPJdj?xn8WDj4+K%XQ|g(w!`@NfWKW zvNJUsv(P}P)eiA;Aj_^zqG80sD|}J)`R17H0aY`eA7C%jT|PwF%e};5l}jq6;o`LW z5xy4@{^5pz;T=9J;$feFfk7p4uy+u-!%yX^3^a=4DiDli!rMHKWd!0~r*Rz~*Aqiu zf&f_1k7=6K1(76H*TZljA!Gr!1WrzHwsU4!6aA&}hPs8C2il*}b^eQfD$xi1w=3Lo zSMH*N=K%VAZEG~=Q-zV|RTK0cpk=BXd3EM;$b!{51e2KnG7Avm^#mAL4vBLh=J*`v z2U#8daOG;r=$nwa+^J}G7uOy#=F;!V{sn)Xe-oOGtn}$c#vD%rlWWIqrPB&@ZV1b2 zQDga(0!JuWWn5qv+0>+Sj0J(?6{3x!IW}+>0CUS$kYbQpCpK2lT8OkJjt0^MFpY+m zy1wdcIF_w{2o7$bIZU-*K0~deavx2egg!5#;hj4=ovjWX9iQStl>Vq6n~&< z6SRp|FJ^4`43Br>O)bnEy&8bMiAVr@vq~t=vL4zlk)wjB^th!i~iv1%-ZM zyV22@y1tD_=m^5P{NXkPD}mulMr;`$+mxoL8sAf-8mE6c#DD{R0g2eav2pA5C4+&m zuk3wLpW(ZW(%nyf!6oqKEI4<(7jEmIb)6zZ8vLZgeIhq-JvIlpz#^>55ZSObO0X!j06_vy0O$bHYi{iRK9 zNdO_ZvC8a7x5&j+FzgNYR{7`-zK5lT&on$!C(_)^GKRd`b6lzwzEgAz%XQr^e8 z6qE;xgyu85!{IQ5q#MIjy@}^W-}Ud8laliublCL`cuL9)gC#@6<+Vr&RyJxd@sd~MpdX67 zpDFf@M^w6CiTz7^#jeG4s}an-Q~jBdm)+6Vs*$oCe3iQ zii+EUaHS2us6E<~d5ZqjoL;Byipkwt_;jor{UB_MBBzS>L!pKj7|x0-n5nw6Ls-X+ za7N1@GEv}0dFNW*Q4R=Kj9ji>1$vZf`;^L!J>AJfneI7^r-6Ll#iYfE2?~ih`2bco z+DC}>X)(5Kp=96`blT-PrDqcxKQ4f(zdWTkVfw=q@k116^c z5$f2_8bhcvXtJmO%tJ+*yoIk&f=5`Rg(5Bh9Wn}TFQ`_5a zEM%D`(8dy-?uuL*AC$|iLKxB;xgpfVme`AaN?_HdH;f(@Ko+CIPFHxP+6y3Eeo?ps{fY!LzQ zvbEd|bcXzQ28jm?0V;}oYjyPz9kJJ84W?$TnxG;V|N!qsivGr%tOQGtKv?G6!epPrjd%jzhQ zY5qZ1twA-DRJo09$B4FBM^7Rx_;c|AJ=pR`fudr9}6>G*F2(7dY>x0 zv_r2b#mxBdI+4d0f*Rzrmc=uyEx;*FSLgB4H5-%Cx-NDI_KwDW zbPBNT0qIX5>OE&YBRD~haa8BC@8f9u#t<}tir=X&h=0Hsi&Kh;c4Fx$lH61-EBQ$& z>Rh;4@Q_-U?7Da)0!KhzPXlse4_r!P$ynD1sN1id@oIa$*{PPT=V&~Sv>egYS8XH0 z;?dsIy5Qp|<^H(dcat5XG=M?z#jRxqZCixJVjaNVnPSJWBfdG^u`&&)xvY3?8%M z1ysgD5cR5T~pKtULRQ7r2znY7kMXu2=94-I&D0oYh zZAokiPc@ zn6Fp_W@4S9vzpQ^Y;VUZQm=4E&K)|*kIia~$`ejzuEpYOHlUxZGsDDSwQECXz;7mx ziAV?0L|pFeC8tR%FrIO9PJe|;~9fWAgJZpvJiGE<3M!$KDj_eEjzozd0 zSG7g>|Ks8ghEC3=PW1Y}%Bw-ZRX)rq9>c#_Py z-!HuINko<5xqpGDSfj*1qO?dXDGxd*ZbTabm)){DQjmV@Yp1fBmj>H3 z!G)HI>O@;t(MSPDWLAw_k_YQB>pK^EK7Q{a-6hR5zuz`g3rbR;$s(>O=8CH0XbpB* zK*LPZWkgkfG|xYLv-B_efAMV~KKq9cN`Bok)f&7Vzyz<)9o~67)AOfuKl&fk)a7ii z&+sYJ%#m9*#=pnYaLcHwL_5UXD%NT``oOrWLGgVd?RMF^lkZ9E1CY=D$)WQd9+F`` zWje^RjX)|D+YD_`2Ja8jooAm+1W9>(Z=IQ6XUOlwGtBMDp14#l(GQbHhbgV0ApPt3 z{RpL->5V`bCCn3{CNTSSHLXP3WtvJ8IHYt$kzT2#>`E%GYeTNeQb=}2db;Un9e|$d z3w!4HsS#8vDzb9Js1Y?*s>svpduA;N>{f*Z27rGZDRx+dgw>d>4L`>!k<0@YS*-RIKv)+akrjUwkz$1tG8BP)DMC(MmmO`Iihyo2G}f zyeJL5oeFH98zdea1f6H6JpNrP)qu621)67dlx8BBM?8CHl85+B0F>{4IuYslg8Tn@ z!$DyN6}>^W4>q^@IRRNH%otfkv6<)5WQ-vL1~persqR=azfU1?c5Egos-?+Fkk7Te zZn5aEpTvsHcL5e-oUZ1YFvi>m@Gpx5H&l!r+>&GE{8psCrb#6&)=%^sx4++~TX{Dj z(U`=HyA1CFdWH)a24%oMLVg6dus(WbEE>i!j|nFkiRcI>m+GI+@sA?KTy{Xt5%q+l z3uVM$MRq0RTd=7{Wi|no_xzh&y_9aK{AjFVkKbs|e+G(7@a+z>NZPh4j~l@YrbZI4AAqEZRgKKA!wiNrJVxhuwa6DosrkcD#g%peKSW0ayxQ`vl_|Ld-u{A zCv|H)Dxm)_t~)*ZKmo(BTJ7{IjP0zcIvJ1{TF&$3)pkAZjY$OeAE{ud@!3bCJ4542 zVt+M|p}ZS*s^y#t5tEiJ^fYuSOzp%e{Uv!Ptq-r^Fc!0=ylx z$Y}}M!hnlEWA=iRyi!q5;t^4fmWdKmZ0^R;M&Ffj^*XYqEmdto4POchg&EUbwwb&T zVSgzZO!Qau02Vr8qrkV|nih7XnjO7k%nqcnuVlI#ta=R|gNlz3w8>CV_L3tb7yC-% z0iI>$LF4`C?c%RT%zN?Kq}&IKcdFfF;uY$?DVlt}ax6JU^1iy_@sPE277;5PEClg^ zpZGx~7Jv=??E3K495b&k!h>t^!C#eC#8yoI`YS9tpw=+I^|W1tXfaLx6*?E$Z)3ZMRop! zWmi2WtX!E`DQC6dMs z#S-H%6rNl)e~#>fZ|K&QoXsNtky4s=RGW_(8F=>0ZzNlrMr0RH# zepIB8YJ3zl!px6%Q0`ZysxGb=#1Ly?P7HL(Q=7(}3(*qYl-%F4RBHiu%vcZ-!94UM zM{6+O=brYrTNX*iRO=F=P?CqU)3_{pvf%^J6(%=QHB19uROXEK#-9f`RB?u#ZRI)z~m=YtS zVjQS3tfV_pzz9>a?emTW8MFqBWf8iU{Wb-Fh^p_|x&yi!gUh12=$(OynY>KN*9t^Ppg&c`wmWK z#RV-MZ7Rep`fL08b%{R54>oi zkFimqR6NC6^Et1(q8umrV-}h*4RN?2x^Ay~=cCevWe*aDwJ-nX<-)H$yvxg9I8Q#qK5>FAwb8&2mtP5P9Z= z2Z7|rTGhR}OIv^?XUx9A0&urvvG#Q|-@1AM$Rvj^5p=}P$H+EyP@~vg=&~`AZQC%E zV%SVl!^lr<6~#hW2nYy9XNQo*@4phxOVm#i}9I>kS3LHW+#)%(dEU)NbL025GWYBzf{LTIWqiunBl<)cZ z?0QFvIEmTb9)m+{>2jL`6NMww0|&!Fa3COfgj1WwyEzEdk6-n-30J zs`GV{Q0o4d*u9&;i^agHIO<*~yoVv`DPhZTc!?!c_@10)shZCr1`Zn=RFAT*E38;H z;y=A)k(~|2?AT6hZ(&XYVqv4V;HpuLXaZ;sf9|?AC$~L>m0Pz?itezUS!jJviD$42 ztl0m%eHRf(+m&T6ceQU#cRGY~wiBX)u-+|lY3^~sx}GF^Y}SOlK6z+cGC9V_m5BRj z#Y&R2RL(ykD@AdjWf_dZ^YCUf_8en&CR`J)GB=gN(_=ycFTIX(5dP$qD!x8 z88%vJ^e7wov!M%da?|ZH-27<8cix;J+|=C9nB|cPQJOT3NJ)FCd9;xZNl9(si<1;Z zK)=5st5A{3bd1fW6tr2!#tEF|D%%dw@f9)QngWE@U_99_7)3Wis-9q%!xi-spSuy5T>CqJ^u_LXKp)rpA&d$knNol8tx+l_T>)R$W0sC~+i z<}EvSC3}bi*o)a1{*w{z7G&=su~3-u<3SbKQM!4tSE2bmKsVm{o#R*&;ok z(w9>I`C%Nx{-iBDSu;}iSVBQL5o+CL4TipPX|2_&bzGTIthE}mOL|8bC()f!mmnrV zixcBtbEP|^TmM%9D~fs#!Ht#51UM9ZR*x8k{QUB|ymEbzG+E&+yt5GgEo|W>nD~n4 zy$s7H-Zufku{hsUQb1mgm=xbv;DlQ7L@&iJf(d&scD$m_nWLOhJ=q)h59_>=FRv$* zOV6(1n;L!s)PgL9)A>VEAQlnPiCjr|6HpIrzr(z)2?&M)HAul75Uf1~iq2@h(oVAz zuqNXO2Jq%0D|!@VL^p3~74+6r9E8@Faa*LacQJ>eDZ0 zKW`%zF(q=8Fc7Z%Q~6gxfI-o5wVazImmal3LNA(Tp?fcyIFUeC;IX)WZormZ*jIAR z(a*f$2AUtxSkI%M5u=x_F1-^`pISfz==BP6dERJIz1E4Qt%}QIoVn5P_<`+?WyNe} zg9MRf`S1n%}eZB(kA0BKOez#q+i;n`G^%-5Zd( zwVT*mAm!$fS&E{G91ZbAAeoN!CenRwv;+;NnU+8AF0eP*mA>N(KiKf{K$Ld~YE43v zFv@U7_4CiLJ=y+k-eceIx z9d#~SEFSd5PZ)V-iM~4A9N{D_n-T{Z{^o$X;4BL4gF=zwE~^@4p2i9FChL)xc4+H7 z75k8Afy(2))%g<*0Gb?k3aA*o9cTTbfznHcUn-SP@cP@z@ALXhzfeI4Lc*y%`Ixdl zcp7gsrUlTBZx*^G8c z}AH63q>aC7u2XWT9q>ey|`j>3jOVMl)vNihQt1ATTXhW*YAG@ziYYx zm&p2IV&7n>->SE&*3+FY#e>>Z0g-MS8V)nFpf4v4{j*r>61PNqFZKtX5h3vcAAMij zd=Ow}!VHsi$jY_}5&A4B6>b$^&zQ_h1M=2*T){8iHGn5_LZoRf5yg&1#pF!7?K{R| z86%M|qF8XYkE9yKAbG!a<&PHXl`J=kwDZGrwVTH-dJ_C@XXu2%yx)2vb+Dj=KI=t2 z%m93^{Ltj>;1)>$1?B7Q@u2=5AXseRCgRu)F#(lZ=m>v*YhL_!%lmG6-Z$`nC2{}3 z$e#>c=VyLxq&E-%00jSI689f_7n7~YZ;lqKs`zUph4F(=_z^fA-!9z-yWCSC0Jaki zM76ky(ydRYLr}?<3t3N+OUgdniBGv2lI)xj)s`;dQ^Mz;7m9eh9O(#N$+ivoGqnd; z95gC6y#CHcvpbUlN}(KBWxZvA%82r?=F_r%4$$w`BSK*tDljRQZULnBFB9rlu{|)j zWwjZ$KF?<{81{DgeSQ3V@uE$jg2UFYja@j8H)-(&Xd~NM&M-%;WYaHsv8;(N_!g_o zhoEz13oSQnTbyrL&o}Q>KNP%R#*B6C8Ug)!T+|}Q8a(OzYWw#muqwLaI_svznRp<+ z3is;MZ^{sRqio5{fw^U)LPO=3CU1~fdkcCU^4%Eo;~XeS#j$}-Fb$rN7)kD|AuG$J zx)a`1drP0Vo{AU~RaR7Mxzu7wgl5fG)evqUz87U$Ns^p+m2Z<1;bD}$0KJTOy&$Ii zqNCZXX3ZE}y6Lu$@bl%(2js1U`*a#y1$4=^YMZOGc&{oW2=M00U>Tgz|0II}_=NHF zAxsPz1Leo1k<;_Fq88fG{22)4i7E-`rA3 zxgso+UzG=ytB7w3nT))TFC{GHmd=zaIH6`qe54Uq=$$^=pq!AUT+B-*ohJVUrK+j~ z3zyl}u|I+qO%ifsT(p(INi$5ozGZZfwm-Wx=-WYlz5dT~g zT^o(R=jhWh3$QUuygfE0Y(n1hRQy~sLA30_>%`WJiWV~wzlqJXM zse-PjJ2&m_vsrapw*hvBakpGW&fz9tO3lCeHUyp0fV z8zXm}W{>!_4$8rBp^*zL_=`-qqw$gEmmJ2FxpJ1N&LD9lMhFIQ;NwvjKiga00N}fD z1SX>k6U5N*8{OT$H`|GV2swE?ba1UaTh}0v2biAYAf)`JDmG9(^e2a&c;X?)kcHW@ zS*0rVOkBL2yzQH%q?FN7qcKCG0}z{LmQ(jhq$Lx`Bay3&_xEq5o-%BpJmi*f^m6ig zJpP}LqtWFc^-Nz0w?zmflAO_^{q~X#f#uUILCj#z;1Vt!{3tpIGv|}{Er0QTv>KH9 zf??^OMKWU9&L9Kxz5AxyR;4WxASG8j*6m0s&znX51*_S^|D{<{Ay9m_%dfHH z;KjkvPo+uLA%ia93xb3r2v-mUbw(2DMX{{CxieqWnIKdo+pf{*IdW0{6R?Gn*%1}3 z^N+=xC&PUt+BI?WIZukllQ1JBgtSBamdLmomI53OR{$vJxeM%yiesZwA5Y+h?OMH8 ztrl}d6S3E|H%Y-V4-k1q;MK(;V8}SWO(~tATa&oaB^&q(FVYfB6*C*T83Ss9uyn4T z&Yb-=1GGu(YQ9kov@d!c^}P4Q7C9ra>1M2hf~h7xCo<|}s9b(7FUrH*VGX%T)o0NJ zSI*-^%QjPE7?XmZdav0Uss789@-;P&Vk_VXs2H%^ic298B^IB#koNS{4Q_ zBv8@85(E(_0v3n&76*yZ4M=%IN#nEb>6V%p7sFTWu4#l-d7W;Ke`1=^n?w)$=1Os3 zG=JcDfIR2rzG{@u>=P_8}l!GZ^wv*bP^3+j42H3VHBvB9!?V!Vd8H>Oe`FZeAtN#W^w|-l;OVn8fy&R zPA~NzFI%e(wu9$uTg;=x0K9yw zyG55}U@g}Assv_z~{n!;5J9Fh; zxz+(p;72CTcWRar0S&bV{(65jf`=9QQ$i=Anl6#*yjc0wpSpF!ATe~iq1;KZ=;SsT zgll&|Ok@PT$BxcOPNF_n_7QwK|LNFY>+p#p4ciHV4l#&!BJ*d^JTf{7FEbO4?q!1w z)Fs9mRMVqT4Q84l65uGYSsW+z?yKU3=4s6mW<5qcWI)y>kVz9XSQ=m|g1?I4LgdKY zX;J2ItRqcwIUK>W?s;wA&`2z2i+zLSv(B8lfeUqLtSA^uI;p6KCcGH`DwHQH7(LWV z6Sdn}51qpU`uy>Tl#T^TcnPd%ho(AouW^~9=OUqJxeITAEv!&^L`iFx9heXBlqXwG znFtQ`-#Q17w3hyW08Fqbz#l|NlPguMl;YX`9vzikh^3qI%32ciUF7BDpTyg=`5>Ev zo5I>L)RB441+sK{mV`dalp}9!Qp5_f&`1W}V3e8biggoDHSiwUdIwxCSU~S;_?k@p zU|n-ui`bt;}!|?dchZG^WEr6s%!RCp& z{|#s(130|}N0x0Gc&X=l#elByxbG?LrA}t7)Ra*M>}3-fT9+SM7aV$zVW|W+W3ay> zMdhZRVlytayG@(ZlGy(2w`l3kRSac@kZu`+#PxWZ)*~hx2N#V_NmyX+ZGr@ zdWcRTVq_48y9l~D$r=s)WJ& z#1<_@2BedQ_8vT>kMb#=e=O9;0%V3*mbDiU*lG*!&K(fm^CnnK6v)x;>V}unS2?a6 z)7Sz*zag{rQn(TJn`>^$A2h2AxTxv=j+$h`Sktw%jmroab{vmBOB1QzlG@ZbUYj|} zWrhNxHMf;TqOftYg7?52r#)fnE?}sCDOoZ!rJMGx(KZ3&DT8I!j*1{vnP7`@O10L` z3RprwrbQ&;9;gSl%$DJ=&(&>xh(B8lBAT3DarDyAWMNwQ-!7A=Kq9$?gm~ZCN&|UL zdV2hn?|x&J+UECkBisB6*-KgtPwcz2=(w! zZpLIf4u7mFwW`)ic4@zH%b#cqJD+^JF zeGmbDW)=d$eAcn}LqciC($4j8VGr$)?*YZn0L90p>_nsIH`2G5Hz&s_vFW3dgVE&t zb~_HtQAy9LX(~LJ>B9DuvWHW)BF{W~d5AVqHOFOX!Krp*cHtl=$HgJtf`NH`sE1{) zWs-t6?_v{ss%qZ&6(GfXb0m5YFo*F)y~dlBxV3WoVfy+7=JNy8v$G7X#Esr$4)YiZ zaPeWH*I;Tbc+qtZe<+W?=IGF%KZ%%~n75;A&hCMl(pFdQUt}KeR%(&}lAy|Euys|Z zx9399VHhh%m}$W(-&&o=ca4KCEF{K6hUmDtqLV> ztP(MD946f}NF}Olw~K7~ZdN*9A4bb$8r&*$zZqZ4nr>JgrQJ{omK^rjKtDDxY_JI9 z0*scN!jq+Zkqn%XQrtvQOU2^gSN3N8)yp#B?R=@72B2(oq%IoR0Ayi=A&VqgEb@*+ zUjhyslzR@WBLPT&TXGRYl#WDgU$JS~xx4ZHyfMB7F1nQ!IBNo#C%grrUGXuRu9~^* zOj!%?TV9c&GK}$R{F==Cl=HRvU4f~6K$xG={C$Ct!=s^INqE*lnpEk2mvU5e zwq^w#(65L~p+1Zi$aDyRKoO%6(c{vBeuf{+9|HQM5X3Iepw@5v_7@FU8+fL|{LQV# z)vjl}La}68_JQEF0OmT8{&E57d1i4R!dLl<`OlTa4fz$&5dxJx&dGSp4O7P4LGvx* z)R14Z;|0v#qabHgldgA?bcW)RG#(fc zZDfZhh)r)3^KWulHnX!kwMIX*kQHX2Fv+8KQNyvhHD?!20UKeP)1LhSc_@7*p@kUO zAjOzZ(rXh%@vHKRzqN+x5;9$h;oYBqaAJiQv`cx0{JdnCO@Iy!{}_Q?7OZrfhx;(K zi}F_d81KT$%GE3BU1!8ZxrOeiv7L)myyF}hbDa|>8)rQ6DT4nz>g%R}S!;@+2&D)5 zqwm}Z#Xo)?x7VBE?FEaj#1jahC$UklS)PEhYPgi0yK0Wlr`)-tjdJX)0a-94J)95p zMFe5Uu+@dMfdi~wtS-TxsnMXs@};~rUQsX);U|*@0esdT$z2H4InroR=_5u|j|;5N zpvLR=dzT-!jxc6Zx$bvYGRWs-{wn8-o5WR}O3?#%{d}*3%&iVMv6a&t7{9YamYUmSxF0T_*J?ve zJ~$tx9RKIpVLdj*6uySYvTC5iM_X`(f~**cH-~+}aG;@gA>NP%zrZ5lT_e%gfYge? zmr3OlW(yG_ml5~Tg`vNynM*va;(R2YlHjm(ygyGuiJ>6+S>nU7O4H%igUOX92~Pdu zDrrDuWK64A+k}@a;>^lwznU%k*1y!ggrGnE&~OU#$)|O{^gt~Ty!J7^!kLmE-&!tv z$o65M%y3NAdvrGQ$*i^zzB0|dL=*`hvfAk90c_}s}8qUS=<>&C~iOmFDT z;&b{N&C$n)ql2#ZNyysx>nv|9MO}*q=gYk!BAu}O-x?NY@oTt_3EfZESgmf4`}3^J zNK2FM65|;gm#7M4FvnK%nLgvgA}UM}+@{@Q;H?4*IeESO;!l>k_x_YqpzGr04?ZR> z+k)h4c-R9njIx(899>TWHAUY}72h+#S%(1x=;D|3aeT>Eml96EhBQY|OQo?V%fow7vf+ zm;>#-osIbd{=dOKAgUs*=P%gL7+Wf-&BW!b9?2)GqfUnRoE;$#} zjM}Usa70o&t2WeK=;kOi0!)h`+yv~KzRSyTMO4&ekDtr}*Y4wgx2ca^Z7wGCiuFo) z6#Nn$j-4M}?#vI%_E}d|;Ai(V&?k{utxB;x`EBw!;y!%R4QWrKf=pM){E%V%nZXLD zCrM8&EzK=(=!C>HwDpvv3oLmcI)!#p$YQ{XB!E*A-Fb&&efbt-4Jhl9X?i2i%Ljqr zAM(1_$bM4B#AEX;4^+FB^q*Y4jFGF zh*$9=gcF{JYfSJ9gRvfmwd!Ji3NaIS9_u_lXQojmUj*4`jIs`_j{h84AhL3X=R`WoITOuAzKrTe^G~LTK-TEL z%E&;^Tf^E=7LX!r%D5x$QW9aO=7i*1fWC`o7yWa$^DWrF|jk$t@g$=w;j5BEE9s za{vSN^4WRR?=_pXR$*_W!`@tk{4Ll^u}KRH3Af=uhdpWE6JpUi4bhDR`$JahiWQ>A z4aiIX(S!7*iz!>Ezf?p;3R2007srh# ztL62`1BmhMZWg-n7cd(zf`b<@cg!n{aBTBeYl()>6!^M9xR?KVG<#-jfh8YUvi6{J zw*iyV6cJ0oi7o_0i~;qVp7?aP5AYu$a3^~qIf@q*LRhEZm@5ZyWiH^f-I<4xE&LzM zu2*O?k=Mq(>@E8>QDQM_;#k4&8p}_h8mIma^=x@aLRyE}1v@RmzcBRiy|vvvTJt1! z{$}Ip&e;k9aN;s3p06_7pAa_qho|Zb%aWXlZ#*GFR+yr!pQ0#!G(@fNP^88>?9XPRX@}eLISP?d6YdtqR zVS__?^BpsKBjWw>TUISn%ZL!y1$a8-l^8m?!H9(J5!bc`Gd9jDIF_0yj_x(&&*>68 zOj9=r_evV2l!O!>i*_8;uVcV2{LdVKLIMF_%&$mW0p!OyAYzz$jbpfhKJnEl+}y4j z_E(b?7@i-_3#q|&8ANJa80wG1AA$7cv zeuZ(&bps1&O;QTTUp9^)d1#r(0pLx}z@!;OplWm#w8N$m$!J~#k?2&=93U`b(7LU= z*F~&0lE63{H#G^9V|JbC&!9w%ms=C(=@%$&Eua^29D>h)?gjriatdyQ>0jpz74-8% zCjz{HpGv(Hx@QEx5k3FWhP=#(iYn^|An&V1$HQeb4eBu{J|9QGNv9hZur-Mm;^79$ zYiIy28f;g&X z8)I8qOmyEMM!H7h*M_^`o|biD5W%-Q3q1QN0IBC2G08ba&bi(PVmkP$TM1kJ*ovvKw^YJGFus_Wu4M26=7hT%;#^~I&0Y@ zMUZ)@+9uJO-ss7pjUi6FL*1GZwK=s5&*Wh_?gQv88QWu6ZmCOU_=Tuv!_yx{^>D`l zk5$KzeU{YB8l@ZG5wgFGe?PsIr}jmNslZtYMzJ$FA-+51+9s-AX9&Fur|PELM6-@X zDPh`|3(_5R;4aZ;)7rK{%{AW9v;3fV=2mTh3wqW-u-HUY?4D3%hW$;#kR_4_aBfV= zuuCR@^esP+X4?ZErnDr>7nMs4DzR$Xk<6rJkHTX?%RE*Knt^F0WfEhWvZpvkPVl$i z?Zo9JB3}FJyD!f2`@)M&*w{)fjRIAitJTZ{quGx9OOE??tWllBmsq9L<9^GMoK&d@rX!= z@4&EGd5AsY$Z9oYFGs^#L}1rs1Dp0neGY}0);CsB>|>1m&h;Ah=8IO$vpx^8^X?LD ziwTOlcW-;PqTqG`YEnLG-A)`le>oy`65QfVI#m2)KTmzA(5^tq@AJG}*(X7g z+>+M&2N3|CDQ$<{x?*=4%U`o?B<|pGZf&IRCK6m-N0|&uXfW*0(rZ&d#ZC~0%>E9$ zaSBqC@yv8RmtX^+7?L`P89io3AuMyDfWL7{Aiu`kgKsfOM?EJttZM8qZld8mYIguo zUNbrsHrmIchj$OSKU>7-{{RW^5-`xw{;cHHo3Xenc2~dyq~n^Rj7ZZy;wxAxR^7*2 zkHtlkZ%cFqChm2T0W1%YofnA1>5O)mz$g_H6JvobC|!p1r*NAaMtD*uFo*a?jQ)FT zupDn*BQ0p!RoP655OI`-Z_u&mS2n($!V$3Ibu3$IK}h$*9wYjQHD)7clMYjd1Z$(X_7JIAE#l+ zgd=uF_+^B$yK$PV;C4lEwRaeSV;?JtkOgI=fsQ&f*n=7(P!RF?gPJoIa1x(sZ*VWE zMgQ~qX^UhO2wX9=T4BDDdJ1SZ1NBHqPhzNYoh;7mw$f2 zepw6JYjZ+Bub;5k`(a6ycI7Fr7v9uZU>3gtl0o+$;jf%?vjpPzZ(Jqy{wsA)2jp{^lYVL)*kJ`=zC-)Y!@rT!p4 z!0Fd(MSre{Cv!n+I}a+q9_xZxOc!a#8>R{NGPoaLj6aNXvQ`b$E*BKQe(#@1?X+g_ zu2lMeXL{GQ3J3pL5A100fc9d+T~eu$9PIau-6$Ji*FtrMTTU#PPW&*bqD-Pxh{AV-G240}He zCgiea=$76W*sscjV;4b3;#lG!nb62i|DvmQcC_5`n5hCWI?Ou@-&S%_veNDCcliHC z?*AE{qZO$&tNlHmNBlLHCivfv>7AX-tsMX7?3{N@-#8E>Owf&YguXo0d@2GwG*!TM zabp?wsx`?}M0eNI3A(Kg(8Cl-diBMon2r3=g`WB#>9Y~w2%|b$_Bob!I9@(UP?r0l zo^qGti9dDuBo?FsQvO&xak3PoqhwL7f9=f-eiTh~;KYVqhff1|*VE-2;11ANqf< z_CEwA)POSDF3?}AWJCY}ivQDU|BI8LYiMiZV(j2(Zu_5hMU|Sh?FJjdHy**y9+gvR zBbV5L?L!Kh@CqQ{y1}ZVp5GB>qsW96i6UtkM|J8?H~0B(54rt~0lbcsZzdZLFV~ld>%4X zq!{Ox4IMHy>-wt{IN*ek9Q>ZXU(Z`3J==gDM^gl5#WY1i4}(-) z;7xsVBO~floP68_a|IO1&3SWkViiE+yi&(6I7`jwa>}wFB%oW{r7uBUj%nW^M=(Fu zmZz`y@zy5n9Ghv%ibb0*JFtI@CUyEzX{KiY`1KJ+JgPJtp!+}qNI~Wf5y$Z3hamcK zkw)OXWjcrtOQ0M zvalE4XCC-6jMJB3!&ZcMQhX&lG`_@Iyh9GRqQE7#WzEBkXcKTZtY{WAuebtO1787o#O=EVrIU=7+Ivuq zKI_=I%E!r+nr+7RmwgcdnI_%W4iXgQSVKF+(M-lB(zip5s!pq-_}GIJv@;Ir=e%D zbtKIyat!lg&{pa*GE{92oyiGr-{)d#UIC~Nss{TtNSo*tiu#eOx}ZdfQcAC$r!nT? z4;}B>M_|xaBCOA!G)LGdYNg!WOfIqaW)ovpaIe^XLzeH^D?ltQ$tkm=`SMfQ7COhR z+tuR`fh(RT5zDzHEA`$vl8qEsO6Ky!3*-3l9t%#3Dw@d{JA7`Ey5Ax&OTxwVGA)ls0qPfN}QI<3pn)pvFP zq{taq?9Ux<42@|>ff&f1cNU!XCXo}yi+?jSJ~@ucr~f6ZP^Tv3V`l0l=9OrAT2?R) z!8eQvcOYw%^!9KwnNJ$<6f37GP1=~J;@nbab$cPtO#%D7@mU~;(d%=pTSc7Rp_H&p zAhnd5GG-vbCqXde(1n6!I+@Rl_0QZqD-h$oTVy^`x1muD8DfzE=IrF~sHctxdC1*n zOJ=k2H?emsuRdrWkbFnpD3%5CN2gamghU0uLLRCP5^qHI=(=bJ9963SgSkFo83WJ} z>d~XW{+F(*f~g}W4YJXp!2*c<2I=GQ@&UB2WX8UJO=ruubh%WK+x}V5(|&B2!G;-` z12(dK+j@K~bibtO=;B!pVXAA-ib^B*fTrUJY&9+1led4+ z#a6~{gkgYsLDFb?Q(ecPN#)N<+B+m9pBw>EqW6_BwUh7=YOWq7SXGLyvq>k>1<0Sx zLeBA`eQi}2nf1rHk{FmEIwnW28fk4d%vEbnXYd!GW7%;lmE?c5$JwOL*ATV$>{CdV zLZ=xHi)Wt;Re5JGftrRbMrV-uTUea0<6n=K>3KsfaJM=0Msk+XMZ*c^mM;A^caKP+ z{!07-G{f<`Zw~}mAzf%qX;Z1Jy$|vKKzGdor@947-$VBL2eQihNVhfFci;AMA)Fu|a`goELF$FY0;|YMmnjzttwoW9ktyn}k2=Z1T)gvQ%iX$a z1EZx*CiwNI&zs32r(h}h6OMwK#@j!2UmZ-C7zB*&qR-c%qodXB;|Xbmij9zz1g2Qc zz^KUUKM8Ra%Lo-e{am?_o&ahG*-25%bh}JJ=Q6=Q72r;F)}Y;aygq3<71|%7RHD%b zTFw!_5133Cian0Yml27URqyyrLcNgAr^`45#cMn#O_f+KFsGnKU!%h|i^zuVq{_Lw zZQy-@O|}ZBVpfORMT@Jjhq!Y3EBw|Qwq=)8?BuIG(%>h1;gw<_CP?L&vdebzTR{e| zv!f(}niT?tb=UPozQLTHVc3D)Sd8**`}%%e()La;(c|Rza3smk>f&a|&+b@2E8X9) z@)-Z@=`jobG?wD!)AxS7?%77>=E~pt;(mQP7#$vj`qGT6M|RivmI??ED~<740h>5) zijo|IL0^GKZl^>OfsTIQo+ypTf*%5ALJ($y-*Axl_{)xly0>v9hT8<2Bhp?N)5R^tXG-PVkGWm7>&*HrVQRMacbHH5<4)`(S71WWg9HK&x=XS6@k2QfPKQ z0;h^Zw!#@L6rAl3zjUMP)S4H*0#%=a$u@d8{#8JIDa~4vtqJ##49V6e!Q(Wab(O0` zpF1-th05Wykm;KRw|VYHcOQ50~_9GEmEH-cVrIQ z{uYR5W4eP+>d8QTJohru-{ZQ;5b8RMq8&v&cwDbyorZ|`UJkyjbg|gt`r*uC+vAl; zws98a-HTV>ieWR~1-(e5vm_?YEL)0}242Hk<*A>+^;s-*j_Jp@g43(R6dkud8)y)2 zXu8t)SoHs>&kpH>OVyEAg=BbQyGsnF&%+a->LlUu8*EVk5DQ`)=Z zjoJMo^!Hr(k}ypi`O$mMMJ&}b!1_F!H{s&vzc--&RNNg>vs4nl)iweU008d)sRsS8 zy8Cz6QTa0`HOPSAHKP{wRGi$fdqEg63JIPQ=4z`-x?)wcw_aq`obuthuq?cpIwt30 zLN1vg!ux(b`1}WYo@pRN@7-pwSg7)}f04Dli1EoCFEAM^ zKl3!dgc21QY=<4pGvFa{eYHHIGcx~~1b8a&>J78%xbQA;;MHGqvB-T_IL$Sd6?_1L z;jaO=>;7C_Y{vkNJMr1tmm2LIOS;tO(c0l>4y1d6$f$6SJg4N1Ej?5!DxLQajh22y z#l-#>u{7E)$dK&d8p(wC<-E+-|HuJuMrdlke;wfW@Bi0@@%yx~bvD&^qW|B(&i@n& z`6XVfK|!14%>BZLvfraltpD{pLo;JTOFLWh|56BP`o95#P4CZMo=;j7r^7reuE`{5 zJ&QzOvE*F)YG?t1S$I9?M1hpzb7#Siw`-9O#fW4JytMx!-~Dxtr)LagfF4I{(veR1 z(YQcb)GjSM%=Xcwk)rh@hD`(skm_%mkU0#%tS;Qs<4CIs`cIm$lQ0?w>^Uad3qGb) z79B@TLk(?Ve-ML77vT&GfC~xS$egI?fLHTVb*BX(E%sNY$iXOkCSXo-v5DHp@dgLs zQgQ$`(Ty;GG*FT<-tA@)LZiWlv+&>7rK*m$0j>Wq>n_TCj*M&SOGsCtLyI1AeQjEZ zqtZ|~?9ujWjo|O@3C0AWHk9JcmPzTaEEi20ogy)W)Rkyc)p87>N)%F+HX(~R$uh4d zFWW?IGolo`d;We}vgYY&CHwntPQ%HgfSqOr_?{Vur-C?F=s+AhrSd4Hf+m&wB1YDS z&(r>U;^CoTkKhQCW!bRgU^2YVKF3Ts~YXv*zL-;?^-MyMyP&?d@=$P|y}3S)f77tP8Yl zPwbS~S<;brE8wS3F>vz%Kj}&9g^;-fY^k3#hv_vFHZ|1!euoL|%2rEH>tbTlUWoyR zMgl10ggUb!Ot_Q=ltlzNhN5B(P(u#fB@bvqNolbj#NEt~+LXx2BPj6*2c|1>WEF($ z#I=ICD3M3%OS%#*a^`m4Ya_Jd$?xYP6~ajxbrA3Zb#j^$y63<|Wvk#63m2F7Pf>5S z`sl$Va8!551QnIaa26}opgpSUhF^OL^~j#L%cHk9d3y1j@+nw0J(ZGCLqf7?ZQJ|h z?Y_k;l#Hs_Oow&0f5MYLDMzUP2IqUWcS)&pDy1oD(vA-n6UH#uB$C~&R_oI{Q-H)` z<#3-?vPlvD!DX*CYtZ8U!X^apX7xvtymYgWx(=@$>GM2|Hy=MIyW~+5&kaB2|da?k0JNX7Hb`}N3vIXnf2x9+m@5Z z>=R$}ujtjiWO|$Xdlp-Kz=vWSj0Mz86}+>(t&;)s|BgoW8nP-dG{shJt+~F0-k4Yk zx(x(^2n!!+;Pg>*aOVQ;(iw6bdaC(tiWbffL3qKVwWEy-a>9% zJKA=dbtpiuP$s%#ZLsBNn449JHV)di#z!b~1mF{vwz$qRnyswEn>TVl@N^h9ZoXP- ziL*6`l<1>z@RREl^W;{6{YdYUL3%5ChUi>X2L{}f3bRc*{i`aekEG|>@36m`*)E-N zhkd1cOCpIq-Mo_1v-Mnx<0_!piD3QF;%7gf_Q^uHfR-JQ1=LkpTzfj_pfQ_3UfEPP zxR*TdnIjjg0@-o+s=1nFE|d=m!+wI+Sm{TDv-6sR-$^pz2lF2sf{JsFx%b9C7uNZL z_e0P*tF|o6=wX-<|ygQ^y zp<%^=$e$p{hb#tB*o|xp_{Z>*-*V8F6rwD7b`zgE<~7Oov^I*v zyC1dTwU3A9;2{4`Cf{93WA;ZLQ|ya*%@D5_t2gg9Dg@d_K3%*gCm+q;D@G<8$?RQR zXsJHY2f80Q`ue?7Y=MVeR|l!EDpS={Gl&sWQ8mvo{6Ri$Amb4QVZpP@JA_a0!$Ec7 zR0bV)Zo7}y--&XJ)?O9hZ+>r6avmy8$z#^qH9Tbu!ypZmzWI)Vre zIE!8I!-(PU|G{_p4~VFla7|PH1rSQ4008L!Ya%exclypl{S&#-N&T!XvO!o#xy z*-7-ycNv6h4p4Jx4c^PpeqlbjwJ>GVR-&4=1ln6ttFqk4UB> zOx6Co5Y$O?-ribWR?Oq;xSrUxhW?k}b+#6&v!5+W)#J1YK??*lmkBaprzzqNOcpar zK`W_aud2(kV(Jw54u33D2M?(v+n`CN0%&t16J{picWUd`{_;$D)MHyYK>@qWbJ$z|w z570k1hLdmA9%$G=OPY%Gy@G(XHR2YQ0<12<$pu3Z0Z1bcST<~rP?Y%3>QRiOfi-z~ zDMXwsm! z9NS2$uCq`G*k)`h9I^q7;i7{a^`9UHMO{eR@|P7 zZP4;HBOQ8j;@TVxm<$zZly>vD4S=lGi@7hR*<+cXC(%bvW2+Z3QkNf@RAgA``3DO} z!8#-}OGRQt>KLDMXoCEvqL7L5&toXwQtvz=x2M!IE^ssR_EI~+=VjhLy6V3NDol{` z<4SLy$8L}<*u*m@OUv46D(xm};mF(D-wLaU?iOCF=y2M3dcCRI3#J&t4Ty=6MBh&8!pV7U=SX82aQ91S#u>Q2n5#UuP&_r zfOe}+-JBer1!}$*W8^fc3i%L@sK);r z8=vU`#RDn4l<|dE9QDPa+B95T_1Q^m-d846Bk zFNl@?@%XvEOzPE0p=0~IP>WQL+6fFr%GD$tWp?QIr6Ke9=8c+RUd{0cx4N4TXGt~- z@~=mVW1T`a8^NXn<+f{Opc(twGQzxF78!|>2UhG=pg2O6(%rZ}^kYnp3k&3g3LooA zWCBzsJ^~Wz#Mb^FvmH^T-&eQo60Qr2h@#w+pZ#P~+vJ3gSg15T@FCI5;N%EXJ3f}# zP#cW^meb=yJH=VdN?fI8Evxj*IH!@61`Wsq$V#po<3b=oN32-yp2i{E z3*Cg>m5Y~XOwPyAjXnyKWTVW9IuGU}=P=5s*aceAKF=6XhT>&;v}5l;J9H!2a3y3N zK|&B)fdb2K;KboQ1gV<4;Yz%Mcj!2;>`(M>n5FuT5KuodICN=Q2ti91NF(ek$Xs^&_V=e&bctpW2l~VI#3XiR=d-cwy0qH zoqk3GKqPrmet;O+U_gCM3LUd{*J2qoLWE`cOpY+-UzK#{5q5@Fqrk z+k9b-29r2W{ocl2NwrIyP@;)&m!MqJ(T~7XfXcO7fe=wN^xY7)t$jvf(wlQB+f&PO zcEYS$Xx8Cq!#)))5un6- z8GB9Py0G;Y?%3j3XzoWq-U}1jUM~fXEhW`|pBtEW>ya&9M1e#^DZ^8XN z(F?{*E(CZVegB3PzZ&5B1S9|)V6tRbvOonIYCQ+Hmc9NX3kb9g+FhLXd5ViEAhHWs zG3BZ(Eih|lflH$0&vA`fPSyd)3e#rrE$I>A0XgPv+`M2)52t8x&a1b1Z(rH=eQOBY zDcS#bJLSbW_B4`${Bsc@+(K_tMeXGN4G6#yhq9mIjU%x0;>Ww99(UO7kyO|wz~23u zDuUH)WCFAuV3$6BOpL5Keh6B$zhA@y^7p0Y^CMGWN`aUXg})# zfa%L^RLjf{%BT#dGn>n|a%|-*i9NQ*a@?)W2}S%8aE{Vf+!itiZuDiQM~cJU0t3Wi z{|~MmE=!;>_T;GKO1Zibi}x@3ir+()(!R8Gx5_cebuA-D_DICeY>0QrvjtpQ;LELR z{uc4q{xucv+)M2O_dNn9RUi?w*+x0emB#I)omJL0kSgCMlaYSZlbMt~?H_hZ*DUEy z@54WbL0{F{JG3qlP5Bac_^)()aslRUB01I94bd>CCf1n~$=t#CXNI;qJ62%yS!95> zH~GK}JB7KZtffSxBit08wwab80Ll+EGL;_$oYbEs32jH#rQ^((hl|*ma6~8BP@1X= zhHC<68cTm!%;(hbbg-%IkCV+~Yjwq06kl)Wi^Qt0#+J0D85Es7A)8r>zxfii zBc^k&j!yGNX)KZ}#)3Ttp2teQ0~j8a(rA30lYC0<>IO6^jHP&R)DC~u4!L>=4(Dpc zC3dnlH#muvghElM!h?;mdE{yPR zJ7e*;tBv-*6{E&3=7#?<97tB~v)B+o=z@NP5A+Dr4zOq;1Q&+u3oQlOK2+D#2acAWczo_$DMmzPR4*FO+poAfn#A$@ZwtqBk z>hyed0arwg6|@t933f?iZUg>9IR_YmTcAho-&l?90OLSY^Uf~3QPsR5Kpz7|Bj{7k z+Bt@DvNU|L!;wz64`Fn6Ps%y9Aym9>yC`Q~$i$2TX*n8%tv{BiPhF`HPJ3U{oBh~s zj>CGYUYI*sLWxY>P*w*=>+Sb;d9d`#nAzuICsP$v|MuA>F?+OWiJJj=9Mt;WPrTI;;tFdaM$cpotej+Wlj<{^Akfy@|dT1!0Z zU5tTZ6%K{a+U%&S7vB$WAq%MLMM785o)S{TYgfhV@(p(G71lHV=ua@Z{>Cn0?t0o; zqBI0R7Z2By2-^2glVO*0LLyP-XUYr#&?WV@8Hwr7c~lzExxi(3SyWYEElNf zs-D$$=u5_7up$^KOdf9ZnDUxzNDOS`!exnhlSRxvZkzP-MS89d@2M?FxPi`xL=8)s zqbvW>Iji}i{g(RowcG+}`lr~b1ykJje>Bk=f5Q%eiPqBVH|zv{!;bj>2|Ha|LkC(0 zIz~E1b~+<-M<-fy8xvc48A)MLIVDkT#i^J~288YpN-@v%WgC7ehM zeW-niE?8F^t|3M(TT<~vGm|gu!i2Dg)lEvZQN`c)d>so2du`tS{lj48b>UsPGlYPg zP1I&3a2egMg_Z@d)WcNVx!MrZ^1DDVF3lsZ98*&$)~N#gf1$fWUCfRXWeGft+(IL> zfq_05*=!sZAhFC8GWU#d;>ce4wlmhYkA!Y%$DC*1o;{kCtccF?p|N*4;m~r1?4GkP z&UWbP=vEF+`{M*o!rtAY)nSyi*fRgb*!qO%jl>fM3hziS_gtr7*8AiK5069usn`Va z`^xSEUDP}TCAjoC;i)xKeAinSK}Em#;Cw|FH2Iw4kV5TXGOt?1e79)VZ{F~}g{j`4 zodTrxsChT`&qfjBTp8+@MqXHdhchR7g}J${MA_Xv6ukJ#d>Txu3RJO~%rpUL2p2D9 zPKtoZcMiWO0ePA?TbKfnv(QfgE`qta`{CD5A|dco)1G4`nbem$P}Y)7uNa;r5?xCMzfIG+a!U(p{|IPbYxkUy0j}c3B19W zOKaG$l!pl=nFN=s1K)+fm$vZCNJiY@wyzw*E2`i&dTn8?mh&L1Ye72=ue7yF>zA^e z)JqT?fcm0BOA$tzu(-H_e^limo!NgDiHc9wdYAgt`X`(_F zIui-el7vwYWe2Om>e2-eKW;DWCn7NQIUbfn${_NGs9pXK+RJ}}7t3&9Mjzpq_QDDP zK=c1Ecxh$jMMP!jtc|MvP9!R{Qc7wc6l6m7fp{gahYh0`FNtrUM1sxt5Wq$QlB5+BG;mxAJ9r- zb)h^L63mso#kn+lKzWrG!gpJ;7*kvby3dWyQj{Syk0KmGkdKyKZdr zQ<-{8&-K=h#cv@v#KybXrFmHBCieNcD}bg*t4XOuu|mppmGeptqP?^G`+nlGkk0LT z0%=N&`Z*S&_v1R|J~*qEG$L;=r(g{X&hN`ftaUdXpmm8|vjPbYP47eJFNidCAFN zgZ8)c8xl2~Vq(oQlV4j|GFNo%>oOc4(=wZRmxGeD=4AF`kc5Rq}yC#l?JfqhU^}MeewVSJgenXwm zGKPONu9rbZ!MRbYgv3VEtgN->@>Sn*b(V^8J`cd9S)@k$w2?2U6-VV!l?7^>GC~vn z)*1JadRVl#UN#6LB)mU*dq9}F2%yGIC3DZky-)>i)KN>j#5Rim(b<%Ry<>-)!-G>_ zB5U*1_yX22-flGhvLzM};ZY(TTXfYW?%yBJDzB49c^BiM;X^V&+39Dfw#D%afYn+z zGgc+sZP${m6C8R8jKQe>)`AMxjKTm8k77K47VRcB6;U*sx^r&Q6k^*>mnurXhL#pW z0;8VpkK2X+tFW^GifdW=IPMT2xCVFE;O_43n&9s45L|-0yStO%?gV#&JA?q=^1XL& z^0@cDH&v&WqL|UFlg6e^XX9I;?%v|@a1z~x;1)w} z(?E7pkXjxxe1b;-MjvDJoR<{o+k|SBug18Ru+NnBB{}I6)Fq3sHw2MEF=o^5)2(p* zgN~nW23;BSmAa9_s4Js<^C7{&aovjK^STUoZh%*Z^oF?gZsj50;zNLG;D#%Yxm>%s zq8z%v_X8#~&=ahZ={}AjkA=KnXccQ^EGj91oo($DYxeyB*cu-Kf1Y8s&n%ZvXVZA3 zdzfThm6lGysl0Lret=CHENfjg2rCcM%qOP+7oaG?I&3j=ZG>P~TP_%F13^WE-H-7O zxURNJ0aWNMJU37tl*aUJ$+Er^7`hu$n?j=zfs1}P=AsckFyPECJcFOBXGIi14L8XD zvu+CK9g+w`g4ZTn7)4)KvoI#2RT&ZA+ju$;lp6yv-9?q-K|qP9L1W091+aMOY}yK> z)bnqQ4^0HHaM+exQbeLTQ4KlN)V_!UI%N4Anr8TA3SvmiT!WzqT2$ycu$&GWO(IYh zjPm9`8x$TKxWk-XKId@hx@xDA3W+d&s^G_omV`3@Uhc?XQ^TufFyJqI6YtWtj1rKn zhfK2>I0rY$gC*{goCBT@q{BrDKPIGNtcI`&S14PBKq?7Z={JdvQjpRG_wi`#@j_`E zAr(rrwD6}!=Se}o-aKrMFxGvaGm2VW3`PQn>q-<7|9#jGE7e6i;cWGRCjd?y_lH;;7(KnoaKcG+{sSXSs z8cSg$ea=5tu^gF~v8l=B%aBE*l_(_-x0El>1)6JrsH@K>XIrol`ySo4SlJk#-1Lswz(_#A zYu=y*T@8yN?}v@X6mzer|MCvrGyO-B<5Cut&!|FmnAQ+yykr!Fq;erW6srMY?c9hz z_-%_x6ijl~Yq*8heRL~!$SK}gtb8`--%N-(#-9B^i?kbWb-`cPVpheI|2|brdmyXJ zVBo1I455M7%Av?}{gUTdF+6n-R;+QFmZOUILRoE@ZTN`h25nNeG^IwXmL>Kk4 zyN5B&%n>Xm;A_9R=nZJWx7*yb^$G2d`o?+4GV_AP((-w!8Ru!Zw{`pSfx)GvsFS7` zJABZ6fnh)*IADTvlr`W_TP&%0l*Ta7;S2a}J+lglJuNYaJsDs*`&+dKP9HBZ;j#RL z7-6jA`Z`o6vv=KlM!X!Hg0qs*%P=P34!Ows9NC4R?Y2e$~8qofv;h+)|2^TLXh!<)<{ zI>w>*{hTZZKbzW0ik9Hl?dME7HxeE${sm|;GlpuF)=G!YYDA#Kk(Z&OMa>D=(bXC^ z&jym2hh(tMMq>RWF9ey2LwTl`dX$iM^UhO$=I8s^o^;BaVm<8&_ZTerQpMa_HRdxztnrv%64+V>h`V1qT#nLC4E|8pWs09V>b_?

i2;PHz2RKO{pWyk{- znL8q#sJFo@0XV(zmSyRgC;4h%v_V+Ezz z>wWmCXU-C_SLIUQV3b+sBM%v(!NqL@1=6+^IZuI?xe{TwHzP#Qfrb_4xcBX}yYfA^ znX(w68wDa0oO6H&`jAsFPm%N9^cU&XA<;w-SayAE{s$(WSFvg&mZD2S zxFO4?FrL0p`xL^`q|TBcHv`{_?Dpmjs6Z*5aI*I-Wq#ymGYf_)sO~9X3=)kR$twsznO(x8iTWBxj+ADW-@mA(=IVbNhKj1M;x6$u8q6D-OC;THb^iFw@?w9eYOW<~*io->)% zC;mBGH^qK3D-$3#DEnc5(~9>NwLTQT0mm(4Muc>+Km-^ znN<4MhN8M>&nUgl!VmVj$bI$%m{a1$*y1E?3ka$wi(XOgZHmb|eKDb+jYv<<1-;xh zbJp8OqvRaVDLI$?>M?jvW-~)_8g(d@Kk|!<7$)&Qp1YOeu$I9rw4pF0jT>Fq9$wuvz#isvVCE0z)h_JY# z|@6W zC}I%rUoSHD8-YbD2r@}fR?FVyh$`d`povX(^U76nKnqi%ZYM`}DFCsS4IW8j5sh}8 zSOZf72`!pl>J> zp){#LMd;}zxUsd~wReV+nFf~P-?XH#4r5yPuAnv(p2e~G47RgYc|H=1WU@)GhjDs& z7AO$X?8$F3&5iyi{3F!y$H7KkX*B^Z`L_r1YmM*im+5yH>w<@mtgAWDHjl7;JQg@p zK5_&JI_=50R$0`&aEqoW+%Vu_JrcAP*{m~%TCOKUc5US< zS|}x8i%yPy7i1WQh<_)skxduUa6n51#(H`5(WCYDpv)RJJaWv zw85_g9{|(^BjAi$?tS*kR}doIMlwwkucv*(_|MtuvDr&(_}SEVt{8`nm1@SVE{{^V z++$4FVC3%+^bVYq?#$gy)8`ku<~xmblL&tl&K$bh%jsj+qk}j0boV-RJXIg3KH)@c zH*O!LS9NtD3|yZu;tc0QZ}TxvuWQ|enz4baLPI~g_S)zx}gSA!crEjRg zq039O=U)P{}%lu-nsGs3tfJ#T!tlYBhbprWorZD5cm*+tjJnf>^k`yrB-hfPPOOQ}A6p zJFo|oOfiD{C}v3g2Zq)LW5LPy!3LVTF>^Cl6vR3%d7J7^n zux@V$B1}?ArOU)hWa=4X4W*vdg}PTnLc2jLQ>Whab@gY3Tr2D!r{M|d7JNSU zIEbjdSX;sQ5_R#Wfivm}&7`ZG+-IkHxF?VF4TPZ+1PWIbLoa*pwuRRDdkYy5-yqA_ z85Y^S{sHnI4yTO>Kbx3^x%|^)~ca+@yc zLxNJuPkEHy7e)_Yt^po}*g*WDH9MGz62z^tnLM5qYvyM{7Q+>0qlouhx(26fP_~5=?lEu7Htb|ZinDima)8z@YukUKxWS|a3vnW@)U%#eHn)-9A`&Tt`^0A`367NaoVJmI&u z!EA$EUU`gaQgVU7Nn6ouVr_sVBdi1OPPoB%QsuJ!)mzL)Y_qY$iCQq`gN4%+c3GvP zVMRTTB(O~T43rp7bvt1!Aw2k@!$QaBP&H%rexeX}3WQ`EP?~CBRvG0o(o0e5HmKwN z^){}Zi&zCCLax^R>UQekSvVYy8^qbGixjU4()Y5+Q$HDG$-q&hV1L-}lRaW-{HVK} zh5ebN+qvCZse4gasXMi*p~EDHiBrSq*KLM@llL|mCgv^v$kC3IT2JAihb=XkQAO}w zQFZf1bCE~z&$O>mJLG%KC_fr2KBjF82@6Z5jeXID4P8H577A`(s&83C=NK^-sn~Z! zy-z!bxP~cwS$S&1tec zyqE_i8D*Q!*;FtgGW#=)_qVlXw&AUq(HPdu=%*zr;o)6%<^~gMUS_IrBX>{uZiYA{ z69g-LjI;4=?h6Ldjxpi~rnf?1wN>yQYfYkRBSu9RS5pPf8KU?dZFx$n3Vk9bTx?V- z%c#|LYm_SVjRUStBl&rqows6VAPic48mNnx{pFL{ZHZsQV`8TJ%lE^PV8*u0YF$T= z!fYb@C!@l3y^>qMpEZFC!$=2Fy5(^x#`O)PMxpRv`KxSqBsSV`!=yX8AASK1pP%oD z+J<|gCObHZixwdCzJSS{zA0S;#_f1hmC>a8)N7oXE9C8>0t}Tb<-wgm7+FWBOusoP z1eW-9>%Khg{u`&}+Wm3*-tlQ$YB95cef1ntkq7cw3<6p}s#7kJ`6^m2q_f=sNr<+S z2UI(=(*xs-?ZeI9E38tJw_LvkgQSsjT89q?cpM^Xn4)7o9nu}$dp}=xJ_m9S2i^%u z%4@}eWt;3`R9Tb>U=d2;a;u};fh}2~j$FrsQqo#P*gzqY%wW!(blYO{J zH>@&d_L`@AR4A5VcunumPP$R7!`>|xhuBF>6O1g5TAgfO%dGQ5U zwY;am`)A5i*7i@BFvgrO>+`3^FtRf~>xQhV;H?{S@$zti1c{w|;YUT!72sdAdQoB5 z=?SqUi^|db;y=|S#l`-(^vyBivbT&GM^MHo#gRGUE|q0)sEFmS;fv3Oc_W zLLPz|tHbq%HT}9e2no?)kQo`%5zbr;QI2*PPep+QD&+Gg@hC21_eZ?wx1bWjpO_u$ z7_oat@)H)M?3OGSlrG`|EotRrc)2inYJgEMGyNx`Y+?7zH+m5hsJ{D1P%8kBxJ) zIr)%?U?3U--U1fpS5od8#*Vs@lSj zdC96gsp>wp#CGSiJ^FaBhNRk*405j>!M9k`xjj4S^-sFt(v=T(ty-&Ciyj_Z7k_t&Zl$O@pVhUQzgI-nzn!Q`25k** z*TWzQlSdt`vCbyk1MW2OYUIaK`=^iLY)u*ShFoKl;-l*K+=RXLU4p@UcS_Vq)l9`O zD{w2ZOjV9Y`s{V-iCaHqU|B_W*f_tjdi%Wpj)}ckebdNuNch!(v`R6i+UgnCNoLsN z1CyT$v|Tm5u!<#UXW7p>o~{mo8H}&FA%4^GJR%nc!y}9(^AnR3a-X6vq%A+S!ksTY zd@wmuC;o~vI=U+`Lm7aTQ-B@KJZzw3OKV8U6hq&WKTN?#e^w7CiJ%nBa(If1tJT$% z{BVWYZ@&^pvqnu5i&Vc-E%g4InftgV9b4H|q&c>20@1A0qCK{()2L)-dVchg$j&@1 z$#i!N?FOWg52JQAU-d4NB1|jo;mCwiGnf76 z#6H={+I9KxMM+$rmbcJw#~e?JVVyDLW@4u)<0VHtS8>ExON1L$3GD<;NLOoEc#&91 zY?56tXVES?55TBsd~cmxL5i8`M97pSw86zBbKcpW)?>R(S=5-a^$;LUAsPe;*>;U? z)JuE&T5(jRz$QR#Ks(pZWZKfzpbEn+E(Wr!*nGWQGTKC2TRA2+WgsW$KD)?sww>hU zZXsm*Ne`wojk&37Mh*iL7%1P84r2Dap#a90&yaa1#yWE&g6ni&xAK(2N*=9&4m5Ur zV*(rA=?TzY^nS3b6{3`dCYs#4O=zR)LM!fZBL_i)AB`R}BhU=3Cii{c2AecJ9r#UW zu{D8G3ukFVS`W;VBVK=Uz)YrrR8sHk$N*^e33YR+C|htd;`kU)S$(UKbFF=030gu0 znA!wKVnd&tF&Fy!5{y)I|4n(BAoki_8Kb+88tGKLSmIP$Yehj^_Is*s?{w~rPfGl^ zoO$p9d5rK|gvc@Wyb@pu=tmCspN{aS1R%jZ|+sqeRrR~-VALRTU@Zsxe%Ts`V@zTMm2u_f`{Sl^Z5=A zyU2MVn#{Uu^|Siu_whj{3!pD??U(qs_ynL&hu`-=RC>dVV!X%W((koBZko4L&DDCRxPSZl|r!fh&NC&|377Zehe z%7SSR8I=?q+BOEUZB#c7nk;L1jE1g$w>DeO$r8MonDHcV_hX~%c}L8?RaackSF;HR z)6sY#dWky5I$vwqOgUIHHbbGbe2%pP@$n7TGSH?I3Z!koOr60B8_^1e)q<(S}HV+p|TsHHVFLvlfk`HXki6%WCgk%t8WEsr}YX zMV9n64?}Bqw+8%)oUnJnq~uMA?1mujNT4ZB*aSHugxkivTq0PgJK9GUNbR>Nt7zKS z1;(X<-~GZ4a-e0lx;EvXapew9_eIz|h4z+MGgEJtx(9}tr}N3e>w4MIqdIh*-eyGlxC>K9W10_KEI1od zd{zzkqm)}ZC#7}tRNA}|vk|%QVIhPI7F8vbijTuglr^lG6ZY!von!~QlVc($8$^#v zqt2f1!G$y|#H&yRUa1TD%JyS%ku8r+GF}5Hu+|dTub+^jzv-<0+=Duseo^Z55Z+mlG z0^Qfeld;E~7u84*bsh=)q&=E5_*uF+CBl?J2eOJeqy8hW$g~X~KaTVw1>5;Hf=G>G zwUI%--Uhpa!NSJOdvqy4Ej;rDiM&I441Np*^JQ*f_cGJxBwqAdoOq2zAW<$R4-7yF z&vHp+AdCoU10(fhvv*(P7Fb!EJQWlI?^p%D-|`e|*xRw`e>lsy_#VL|c8S5`#1^|t zKVQb)+D$u;723*mm5O~>+Ifl3OP!#xYXF=&@?J^V^rm`EQ(4*Ei-)(dQjqN<6wR14 zg#t7+7rx;QexbO&+6a$>Zb$oFDEp`@#EN?(I18T)C~K095_e-wnwsmmVs+!K`QbTs z+X<+wp@4{E4D>NFC)Q*hN?{ z7b&BR5qrpQSs|(GsuCrc>9Xs{aUaxsC`5Jz^ON@CQYT@1E7xjGmV*t6w?D^hQQ{Dt z-9}_Fwfux#32Cl)+a4IT_H@@n)17ULu?!3jP_aTCvF_Ool{|nS;UeLI!YL` z+;3aGNnuDQ^#QNOAKajWHyS<`4+{WF# z&CV+|KIntS$k%ZrL;yEyUM&a)Nby^*(ojg^07J$(=2Sim1d&qdEp#fEcJ!JR-&$5} z*y64VZG|*jE}f=3L?3~zG>{=sp>a1=yD~gX zlc>fQ?0xRDUZ2K3;R3Z9rM^oJTi;-P&0RFfWu&Sq$)qwKR0F{rmv{}-qmQQ5hMR>> zSk2Vzr(mbVchJR=RI_fQ;|XG_fl~KwwUe>3D3ndqSM!t$rs26?FQ1kI zQ_OpMuM?iN491<>6$|bzU6h`intHbmtrlR!G#=*guh3*T_%i4&(j393l)!DINs{?Y8|p%V$>3NA*~{sqV@ix{6?|B zJ%MmWGbeE67OvV?92w#B-RKU%%b^gHm_qX^AFxDTiR#5{3_{g!fwV@I!t=}v|7Y_OuuBR^^)RGsQXo-{>yxPA^S4?ghM0T(zU>OZ9xiL7rMYwy+ zyx7TdJmYk}JgM25XPf_ZgQ7d|m_hx`sZmJi7ix>WNRf*IPClk>y%L`VCSE(O`PUCZ`1ylJpm6xMP? z^b1v@B(?-<_OA=<+>7lCTALyn?Ci7!noJF^RE@JePhYkcl9LG`F6lb z@Zk-X>{aJgIv*WdYsVB&sR;(|1T{iMZ|l1!e=glk7j z9}NkX(}q0Qxo((g>BCifdxL2z3jyW((v;h`k!@r0Hrqgyuq#l3rCUwLbPjh8<61}4 z)*9{}(!FZCEUd3b(d+r^?>NWtxGjglNSfs99zQ2$(FR+4ZY_Irynoc}Y*el{rtmg* zC?2C;d`HRWbC{|{Dom+CZPE9}l4w-IM){p1RaZe^j8Yj5(!I!-Ni8O+iu|{Jb1b91 zjz>EPW7lqfBGEUBiBHtQ`H7d8MuyG$q2Fg1A^pjH7FVIA-1%L@f*w)^Ptw671(4R| zeOJM!&B8Evfr81eR#pnUPFRaQt3hif%D8hmIpvJb=9EIlZaQWcmRA%ol&KGm2Bit) zG9HVK`Wd9@YVWIixLAs?Kbu$J#R+AkTFRGG0-kM{iQ%Qr!HYcSPJB6x5~$>bX@4TW z`jI=q znTQRzycZW3XgnwOWKuQgcFvX@xJQV9rdW$&COG0eD@eG6Urt8J{8-DO8GcDIy(#5U z9QpP8q-#IOm1tgOy5#9$jYLo-(hn@aqt*%U-wId0gwc57P3u~K@cHE;_#a`ksu^ynnKBn{=*$at*+{S4E(o$RP2jRf@|U5$d6#2__oC>><6`~>|36T=MC)Hck} zINjtm^&C7ojl}4nRE+{T1*P;3jJQ;z0(lYh-01MQGmebt8MmCSW3x&P0Bz# zfa`zlR5rG9uyfV@e+>$IS_c=0aG9}jsR;@ZuQgHoIZ0$DgT*;VV+Afn3EusDzaO%C|~{Vhg8SWs3$sC=$JHdg`- zXu{JQcBVJ=3*hq07(!nG@3(&-=OMw%-*KP>kVDcL@v*kDA*ekkYpg* zdJ;>h=r+`-bs5&v`E+&EucQS|>+;LheL(@En=x8WFFz<;cD*2j#)fm?Kd=Oph?PkR zdJ{4{NS4%)wFt+je<$ch+^gnL87|NYqn^^sqP;j79lZcuw}cVxSnroBS2$YB@%ah0 zdprx_9sLC@%OEq#R0;wtu6!L2IFjq1hsdEtVqc0I4nVSZ zF~SLFbw{#Bj#@3YY|S?&2j4+S1ac(j<8-_#KHyoy3j_!1ioOdl$MznRhHB-NVj zxAU(ZwEc8AS*5vEHA}lRsx50M2$>lU~Y#*=N90KS4K)?T@3Yz1 zJ(1g*k)fbFP=SKhWsUF)&0|IdFWl192$EF8l?HRw;RElhpsXq^x9sv- z=|vt#TQxzOg-fHp0r|zAo$ez>Dl(#IUKrqIONl%j4t*9xflc)5xG=SR@_0sdrSVQ8Sk)du1F~HErDYJmE-$3e7 z>*p!Kz1wddg>tc`aW-YA3 z$a&m@g^^G$t&h&q4H`Y{uVHFVA&M`C+9(%N2Kk^JGndvYk9%v7l_l%=Ex7kH-1Du| z-QZVkW;XE$j!yYuD{LY44y(7_63+f0L?x?$7m~VB`ExKY@oxK4_A*jOx~Uq?oem}}uh<)EB0vz;d3^rd z8$TI=5Njt|S53x7Nrznt$L>4chV%*gCYEa!VltRO>w`s~_{DVgh@yOyuJL(xIRc!_ zIZphwfztr4w+00vJhi_VNnFjI@WgByGL>%fWzreK(|8{RwAQIR)0FVz#AXBf2M_)zD7mAH ziC^m3K;5U9uT~YRp{z6VW3E7=JdF2I{XcnGhTqT(#T9*T-AtnORhAT|9n!Na*R7ji zubaPOc5aE@v(K$-gm$ua1Ik@N1I7TG?2P5TecD7tQQu3t1Uc^o^;BpFLzklvn7Y>K zF|Cm@T#A7_k^ytaoVfEo$=aNuDHp@LlfH zQo=_*?OOzK_j|ifg@%9~L2K41*J}$|Iks&-=lV|h+1C`zk?b07_!wC5*W}bAA*>Q!;0j5c zG$@gY#vg>YTLb$~@P02bIc{}n)Et$|&L!i<(jZebwGIb@Y%Nc23)FB82|b+c)O_bF zjLO%$kcoa0#zogTTw1Y-IcJ1XyBbk51QV^JS~lUD-GqBxuqyUlL8*z=2ab=(+u=5b zx2*d%2XC#w*zL_mWK-cj)?OjoLY#@J%Lmf{E-ve~%Xco}w9*d}($0(34nBAGA;pLO z9Ii)n<#aq73qg2RYMpX~b_O*)Z6f+N`y9G^S9oQ75S2<3=$naNZm`F@b<=&47Ragg;w@Z$A02 z?$$NADL9{WC5{;h}eyNdMhzI<`Uav`1JB4E9s1I{nMM4ErJ24BU$at!T^0ac>`H`d75<<+cS zY?`+0oL&b80=fkhqW@2u%#8kGlc}+luD!AG$5)&eA7A=GQG5e11OY1(=MRGbuMNIF zCV$}o1Vc^CO&#rwUwi3gcEr4MTK<4jvZ8=QJ@$W)5JPgB<^bUX0g?U(NZ$G{AbT4Nz{)psu(Yt(b+86h*f#xJ<36(D>@Wb9QnK*Bj`9A1 zZSYlg{RgI{(LVuS{6@FvCiMoeqFFd!`@cWp?}32%zMAaI>f};4RN)4AjGpqf*AUYF z%5l&)w=)FDF1-T1UZjBk>_1zBZ$RN+K>sR4{!eT5#V(bxCR!MP!yx4T^2d%wJ1?{-QE{{|D+HLZH8+{!YU3 z7pmd#Pt<>P-rosY{-T_`ru>Jz06!_sNTYOBB2YzCI}a3)ln<{9m&n{|5efZ2UF(^}g<3WJBct zPX1$;_iN(ohrz#yK7{|B_^(I9uX(StV!wF(H2hv;FBze6%dvH{U2Vs B-(mm& literal 0 HcmV?d00001 diff --git a/source/lambda/job/dep/llm_bot_dep/__init__.py b/source/lambda/job/dep/llm_bot_dep/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/source/lambda/job/dep/llm_bot_dep/aos_utils.py b/source/lambda/job/dep/llm_bot_dep/aos_utils.py new file mode 100644 index 00000000..7d1cb88e --- /dev/null +++ b/source/lambda/job/dep/llm_bot_dep/aos_utils.py @@ -0,0 +1,146 @@ +import boto3 +import json +from typing import List + +from requests_aws4auth import AWS4Auth +from opensearchpy import OpenSearch, RequestsHttpConnection + +import logging +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +credentials = boto3.Session().get_credentials() +region = boto3.Session().region_name +awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, 'es', session_token=credentials.token) + +class OpenSearchClient: + def __init__(self, _opensearch_cluster_domain: str): + """ + Initialize OpenSearch client using OpenSearch Endpoint + """ + self.client = OpenSearch( + hosts = [{'host': _opensearch_cluster_domain.replace("https://", ""), 'port': 443}], + http_auth = awsauth, + use_ssl = True, + verify_certs = True, + connection_class = RequestsHttpConnection, + region=region + ) + def create_index(self, index: str, body: str): + """ + Create an index in OpenSearch. + + Args: + index (str): The name of the index to create. + body (dict): A dictionary containing the settings and mappings for the index. + """ + body_dict = json.loads(body) + # Extract the settings and mappings from the body + settings = body_dict.get('body', {}).get('settings', {}) + mappings = body_dict.get('body', {}).get('mappings', {}) + + # Create the index with the specified settings and mappings + self.client.indices.create( + index=index, + body={ + 'settings': settings, + 'mappings': mappings + } + ) + + def delete_index(self, index: str): + """ + Delete an index in OpenSearch. + """ + # avoid NotFoundError: NotFoundError(404, 'index_not_found_exception'... + if not self.client.indices.exists(index=index): + # hint to the caller that the index does not exist + return { + 'statusCode': 404, + 'headers': {'Content-Type': 'application/json'}, + 'body': json.dumps({'error': f'index {index} does not exist'}) + } + # delete the index + self.client.indices.delete(index=index) + def delete_document(self, index: str, document_id: str): + """ + Delete a document in a specific index. + """ + # delete the document + self.client.delete(index=index, id=document_id) + def bulk(self, index: str, document: List[str]): + """ + Bulk index documents in a specific index. + """ + # bulk index the documents + self.client.bulk(index=index, body=document) + def index(self, index: str, document: List[str]): + """ + Index a document in a specific index. + """ + # iterate through the documents and index them + for doc in document: + try: + response = self.client.index(index=index, body=doc) + logger.info(f"response: {response}") + except Exception as e: + logger.error(f"Error indexing document: {e}") + def query(self, index: str, field: str, value: str): + """ + Execute a query on a specific index based on a field and value. + """ + body = { + "query": { + "match": { + field: value + } + } + } + response = self.client.search(index=index, body=body) + return response + def match_all(self, index: str): + """ + Execute a match_all query on a specific index. + """ + # avoid NotFoundError: NotFoundError(404, 'index_not_found_exception'... + if not self.client.indices.exists(index=index): + # hint to the caller that the index does not exist + return { + 'statusCode': 404, + 'headers': {'Content-Type': 'application/json'}, + 'body': json.dumps({'error': f'index {index} does not exist'}) + } + body = { + "query": { + "match_all": {} + } + } + response = self.client.search(index=index, body=body) + return response + def search_with_metadata(self, index: str, query: str, filter: str): + """ + Execute a search query using the query DSL, using bool query to filter on metadata. + """ + # avoid NotFoundError: NotFoundError(404, 'index_not_found_exception'... + if not self.client.indices.exists(index=index): + # hint to the caller that the index does not exist + return { + 'statusCode': 404, + 'headers': {'Content-Type': 'application/json'}, + 'body': json.dumps({'error': f'index {index} does not exist'}) + } + body = { + "query": { + "bool": { + "must": [ + {"match": {"content": query}}, + ], + # looking for documents where the metadata field exactly matches the value of filter + "filter": [ + {"term": {"metadata": filter}} + ] + } + } + } + response = self.client.search(index=index, body=body) + return response \ No newline at end of file diff --git a/source/lambda/job/dep/llm_bot_dep/enhance_utils.py b/source/lambda/job/dep/llm_bot_dep/enhance_utils.py new file mode 100644 index 00000000..3934779a --- /dev/null +++ b/source/lambda/job/dep/llm_bot_dep/enhance_utils.py @@ -0,0 +1,243 @@ +# python shell only support boto3 1.22.5 (1.28.68), according to https://docs.aws.amazon.com/glue/latest/dg/add-job-python.html#python-shell-limitations +import os +import boto3 +import json +import logging +import openai +from typing import Dict, List +from langchain.docstore.document import Document +import nltk + +# print the log to stdout +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# token number to slice a document +slice_size = 50 +# number of questions to generate +question_num = 5 + +en_prompt_template = """ +Here is snippet of {solution}'s manual document within backticks +``` +{page} +``` +Please generate {question_num} questions and corresponding answers based on these document fragments, with the questions being as diverse as possible and containing details, following the rules below: +1. "{solution}" needs to be included in the Question continuously +2. The question part needs to start with "Question: " +3. The answer part needs to start with "Answer: " +4. All questions and answers need to be in Chinese +""" + +zh_prompt_template = """ +如下三个反括号中是{solution}的产品文档片段 +``` +{page} +``` +请基于这些文档片段自动生成{question_num}个问题以及对应答案, 问题需要尽可能多样化并包含细节, 且遵循如下规则: +1. "{solution}"需要一直被包含在Question中 +2. 问题部分需要以"Question: "开始 +3. 答案部分需要以"Answer: "开始 +4. 所有问题和答案需要为中文 +""" + +class EnhanceWithBedrock: + def __init__(self, prompt: str, solution_title: str, document: Document, zh: bool = True): + BEDROCK_REGION = str(boto3.session.Session().region_name) + # TODO, pass such credentials from CloudFormation creation and store in SSM + openai.api_key = os.getenv("OPENAI_API_KEY") + self.bedrock_client = boto3.client('bedrock-runtime') + # session = boto3.Session() + # self.bedrock_client = session.client( + # service_name='bedrock', + # region_name=BEDROCK_REGION, + # endpoint_url='https://bedrock-runtime.{}.amazonaws.com'.format(BEDROCK_REGION) + # ) + self.prompt = prompt + self.solution_title = solution_title + self.document = document + self.zh = zh + + def EnhanceWithClaude(self, prompt: str, solution_title: str, document: Document, zh: bool = False) -> List[Dict[str, str]]: + """ + Enhance the given prompt using the Claude model by Anthropic. This function constructs a new prompt using the given solution title and page content, + sends a request to the Claude model, and retrieves the model's response. + + Parameters: + - prompt (str): The original prompt to be enhanced, not used for now. + - solution_title (str): The title of the solution to be included in the new prompt. + - page_content (str): The content of the page to be included in the new prompt. + - zh (bool): A flag indicating whether to use the Chinese or English prompt template. Default is True (Chinese). + + Returns: + - List[Dict[str, str]]: A list of dictionaries, each containing a question and its corresponding answer. + + Example: + ```python + prompt = "Do we have any solution offer to Stable Diffusion?" + solution_title = "Stable Diffusion AWS Extensions" + page_content = "Stable Diffusion AWS Extensions is a CSDC solution that..." + enhanced_prompt = EnhanceWithClaude(prompt, solution_title, page_content) + ``` + + Note: + - Deprecated: Claude v2 does not output Chinese characters in experiment, so Claude v1 is used here. + """ + # Initialize an empty list to store the Document objects + # documents = [] + prompt_template = zh_prompt_template if zh else en_prompt_template + prompt = prompt_template.format(solution=solution_title, page=document.page_content, question_num=question_num) + prompt = "\n\nHuman:{}".format(prompt) + "\n\nAssistant:" + # schema keep changing, refer to https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters.html#model-parameters-claude for latest schema + body = json.dumps({ + "prompt": prompt, + "temperature": 0.7, + "top_p": 1, + "top_k": 0, + "max_tokens_to_sample": 500, + "stop_sequences": ["\n\nHuman:"] + }) + # note v2 is not output chinese characters + modelId = "anthropic.claude-v2" + accept = "*/*" + contentType = "application/json" + response = self.bedrock_client.invoke_model( + body=body, modelId=modelId, accept=accept, contentType=contentType + ) + response_body = json.loads(response.get("body").read()) + raw_completion = response_body.get("completion").split('\n') + + # Initialize an empty list to store the Q&A pairs + qa_list = [] + + # Initialize an empty dictionary to store the current Q&A pair + qa_dict = {} + for line in raw_completion: + # Check if the line contains a question + if line.startswith('Question:'): + # If there's already a Q&A pair in qa_dict, append it to qa_list + if qa_dict: + qa_list.append(qa_dict) + qa_dict = {} # Reset qa_dict for the next Q&A pair + qa_dict['Question'] = line.replace('Question:', '').strip() + # Check if the line contains an answer + elif line.startswith('Answer:'): + qa_dict['Answer'] = line.replace('Answer:', '').strip() + + # Append the last Q&A pair to qa_list + if qa_dict: + qa_list.append(qa_dict) + return qa_list + + def EnhanceWithOpenAI(self, prompt: str, solution_title: str, document: Document, zh: bool = True) -> List[Dict[str, str]]: + """ + Enhances a given prompt with additional information and performs a chat completion using OpenAI's GPT-3.5 Turbo model. + + Parameters: + - prompt (str): The initial prompt to be enhanced. + - solution_title (str): The title of the solution to be included in the enhanced prompt. + - page_content (str): The content of the page to be included in the enhanced prompt. + - zh (bool, optional): A flag to indicate whether to use a Chinese prompt template. Defaults to True. + + Returns: + - List[Dict[str, str]]: A list of dictionaries containing the questions and answers generated by the model. + + Example: + >>> EnhanceWithOpenAI("What is it?", "Solution Title", "Page Content") + [{'Question': 'What is Solution Title?', 'Answer': 'It is ...'}] + """ + prompt_template = zh_prompt_template if zh else en_prompt_template + prompt = prompt_template.format(solution=solution_title, page=document.page_content, question_num=question_num) + messages = [{"role": "user", "content": f"{prompt}"}] + # error and retry handling for openai api due to request cap limit + try: + response = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=messages, + temperature=0, + max_tokens=2048 + ) + except Exception as e: + logger.error("OpenAI API request failed: {}".format(e)) + return [] + raw_completion = response.choices[0]["message"]["content"].split('\n') + logger.info("raw_completion: {}".format(raw_completion)) + # Initialize an empty list to store the Q&A pairs + qa_list = [] + + # Initialize an empty dictionary to store the current Q&A pair + qa_dict = {} + for line in raw_completion: + # Check if the line contains a question + if line.startswith('Question:'): + # If there's already a Q&A pair in qa_dict, append it to qa_list + if qa_dict: + qa_list.append(qa_dict) + qa_dict = {} # Reset qa_dict for the next Q&A pair + qa_dict['Question'] = line.replace('Question:', '').strip() + # Check if the line contains an answer + elif line.startswith('Answer:'): + qa_dict['Answer'] = line.replace('Answer:', '').strip() + + # Append the last Q&A pair to qa_list + if qa_dict: + qa_list.append(qa_dict) + return qa_list + + def SplitDocumentByTokenNum(self, document: Document, token_num: str) -> List[Document]: + """ + Splits a given document into multiple documents, each containing a slice of the original document. + + Parameters: + - document (Document): The document to be split. + - token_num (int): The number of tokens to include in each document. + + Returns: + - List[Document]: A list of documents, each containing a slice of the original document. + """ + # Get the token number of input paragraph + tokens = nltk.word_tokenize(document.page_content) + # Calculate the total number of tokens and chunk number + total_tokens = len(tokens) + chunk_num = total_tokens // slice_size + 1 + + # Initial document list to sttore ducoment slices seperated by 50 tokens + documents_list = [] + # Iterate through the list of tokens, extracting slices of 50 tokens at a time + for i in range(0, len(tokens), slice_size): + token_slice = tokens[i:i+slice_size] + # Join the slice of tokens back into a string + document_slice = ' '.join(token_slice) + # Create new Document object to store the slice + document = Document(page_content=document_slice) + # Append the Document object to the list of documents + documents_list.append(document) + return documents_list + +# local debugging purpose +# if __name__ == "__main__": +# # log out boto3 version +# logger.info("boto3 version: {}".format(boto3.__version__)) +# # test the function +# prompt = "Do we have any solution offer to Stable Diffusion?" +# solution_title = "Stable Diffusion AWS Extensions" +# page_content = """ +# Stable Diffusion AWS Extensions is a CSDC solution that... +# """ +# # construct a Document object +# document = Document(page_content=page_content) +# ewb = EnhanceWithBedrock(prompt, solution_title, document) +# document_list = ewb.SplitDocumentByTokenNum(document, slice_size) +# # test the function +# for document in document_list: +# prompt = "Do we have any solution offer to Stable Diffusion?" +# solution_title = "Stable Diffusion AWS Extensions" +# enhanced_prompt = ewb.EnhanceWithClaude(prompt, solution_title, document) +# logger.info("Enhanced prompt: {}".format(enhanced_prompt)) + +# # test the function +# for document in document_list: +# prompt = "Do we have any solution offer to Stable Diffusion?" +# solution_title = "Stable Diffusion AWS Extensions" +# enhanced_prompt = ewb.EnhanceWithOpenAI(prompt, solution_title, document) +# logger.info("Enhanced prompt: {}".format(enhanced_prompt)) diff --git a/source/lambda/job/dep/llm_bot_dep/loaders/__init__.py b/source/lambda/job/dep/llm_bot_dep/loaders/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/source/lambda/job/dep/llm_bot_dep/loaders/auto.py b/source/lambda/job/dep/llm_bot_dep/loaders/auto.py new file mode 100644 index 00000000..418f2468 --- /dev/null +++ b/source/lambda/job/dep/llm_bot_dep/loaders/auto.py @@ -0,0 +1,29 @@ + + +from llm_bot_dep.loaders.docx import process_doc +from llm_bot_dep.loaders.markdown import process_md +from .text import process_text +from .csv import process_csv +from .html import process_html +from .pdf import process_pdf +from .image import process_image + +def cb_process_object(s3, file_type: str, file_content, **kwargs): + res = None + if file_type == 'txt': + res = process_text(file_content, **kwargs) + elif file_type == 'csv': + res = process_csv(s3, file_content, **kwargs) + elif file_type == 'html': + res = process_html(file_content, **kwargs) + elif file_type == 'doc': + res = process_doc(s3, **kwargs) + elif file_type == 'md': + res = process_md(file_content, **kwargs) + elif file_type == 'pdf': + # res = post_process_pdf(process_pdf(file_content, **kwargs)) + res = process_pdf(s3, file_content, **kwargs) + elif file_type == 'image': + process_image(s3, file_content, **kwargs) + + return res \ No newline at end of file diff --git a/source/lambda/job/dep/llm_bot_dep/loaders/csv.py b/source/lambda/job/dep/llm_bot_dep/loaders/csv.py new file mode 100644 index 00000000..8df30dd3 --- /dev/null +++ b/source/lambda/job/dep/llm_bot_dep/loaders/csv.py @@ -0,0 +1,172 @@ +import uuid +from datetime import datetime +from typing import Dict, List, Optional, Iterator, Sequence +from io import TextIOWrapper + +import csv + +from langchain.docstore.document import Document +from langchain.document_loaders.csv_loader import CSVLoader +from langchain.document_loaders.helpers import detect_file_encodings + +class CustomCSVLoader(CSVLoader): + """Load a `CSV` file into a list of Documents. + + Each document represents one row of the CSV file. The rows are converted into markdown format based on row_count. + + Output Example: + when row_count = 1, + page_document_1 contains: + |index|name| + |-|-| + |1|Demo1| + page_document_2 contains: + |index|name| + |-|-| + |2|Demo2| + + when row_count = 3, + page_document_1 contains: + |index|name| + |-|-| + |1|Demo1| + |2|Demo2| + |3|Demo3| + page_document_2 contains: + |index|name| + |-|-| + |4|Demo4| + |5|Demo5| + |6|Demo6| + """ + + def __init__( + self, + file_path: str, + source_column: Optional[str] = None, + metadata_columns: Sequence[str] = (), + csv_args: Optional[Dict] = None, + encoding: Optional[str] = None, + autodetect_encoding: bool = False, + row_count: int = 1 + ): + """ + + Args: + file_path: The path to the CSV file. + source_column: The name of the column in the CSV file to use as the source. + Optional. Defaults to None. + metadata_columns: A sequence of column names to use as metadata. Optional. + csv_args: A dictionary of arguments to pass to the csv.DictReader. + Optional. Defaults to None. + encoding: The encoding of the CSV file. Optional. Defaults to None. + autodetect_encoding: Whether to try to autodetect the file encoding. + row_count: How many row in a page document. + """ + self.row_number = row_count + super().__init__(file_path, source_column, metadata_columns, + csv_args, encoding, autodetect_encoding) + + def __read_file(self, csvfile: TextIOWrapper) -> List[Document]: + docs = [] + + csv_reader = csv.DictReader(csvfile, **self.csv_args) + counter = 0 + for i, row in enumerate(csv_reader): + # print(f"i: {i}") + # print(f"row: {row}") + try: + source = ( + row[self.source_column] + if self.source_column is not None + else self.file_path + ) + except KeyError: + raise ValueError( + f"Source column '{self.source_column}' not found in CSV file." + ) + counter += 1 + + if counter % self.row_number == 1: + # First row with header and separator + header = "|" + md_separator = "|" + row_content = "|" + for k, v in row.items(): + header += k + "|" + md_separator += "-|" + row_content += v + "|" + row_content += "\n" + elif counter % self.row_number == 0: + if 1 == self.row_number: + header = "|" + md_separator = "|" + row_content = "|" + for k, v in row.items(): + header += k + "|" + md_separator += "-|" + row_content += v + "|" + else: + for k, v in row.items(): + row_content += v + "|" + content = header + "\n" + md_separator + "\n" + row_content + print(f"markdown content: {content}") + + metadata = {"source": source, "row": i} + for col in self.metadata_columns: + try: + metadata[col] = row[col] + except KeyError: + raise ValueError( + f"Metadata column '{col}' not found in CSV file.") + doc = Document(page_content=content, metadata=metadata) + docs.append(doc) + counter = 0 + else: + for k, v in row.items(): + row_content += v + "|" + row_content += "\n" + + return docs + + def load(self) -> List[Document]: + """Load data into document objects.""" + + docs = [] + try: + with open(self.file_path, newline="", encoding=self.encoding) as csvfile: + docs = self.__read_file(csvfile) + except UnicodeDecodeError as e: + if self.autodetect_encoding: + detected_encodings = detect_file_encodings(self.file_path) + for encoding in detected_encodings: + try: + with open( + self.file_path, newline="", encoding=encoding.encoding + ) as csvfile: + docs = self.__read_file(csvfile) + break + except UnicodeDecodeError: + continue + else: + raise RuntimeError(f"Error loading {self.file_path}") from e + except Exception as e: + raise RuntimeError(f"Error loading {self.file_path}") from e + + return docs + +def process_csv(s3, csv_content: str, **kwargs): + now = datetime.now() + timestamp_str = now.strftime("%Y%m%d%H%M%S") + random_uuid = str(uuid.uuid4())[:8] + bucket_name = kwargs['bucket'] + key = kwargs['key'] + row_count = kwargs['csv_row_count'] + local_path = f'/tmp/csv-{timestamp_str}-{random_uuid}.csv' + + s3.download_file(bucket_name, key, local_path) + loader = CustomCSVLoader(file_path=local_path, row_count=row_count) + data = loader.load() + + return data + diff --git a/source/lambda/job/dep/llm_bot_dep/loaders/docx.py b/source/lambda/job/dep/llm_bot_dep/loaders/docx.py new file mode 100644 index 00000000..fcf33605 --- /dev/null +++ b/source/lambda/job/dep/llm_bot_dep/loaders/docx.py @@ -0,0 +1,70 @@ +import logging +from typing import List, Optional +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader +from llm_bot_dep.loaders.html import CustomHtmlLoader +import mammoth +import uuid +from datetime import datetime +from llm_bot_dep.splitter_utils import MarkdownHeaderTextSplitter + +logger = logging.getLogger(__name__) + + +class CustomDocLoader(BaseLoader): + """Load docx file. + + Args: + file_content: File content in docx file. + + encoding: File encoding to use. If `None`, the file will be loaded + with the default system encoding. + + autodetect_encoding: Whether to try to autodetect the file encoding + if the specified encoding fails. + """ + + def __init__( + self, + file_path: str, + encoding: Optional[str] = None, + autodetect_encoding: bool = False, + ): + """Initialize with file path.""" + self.file_path = file_path + self.encoding = encoding + self.autodetect_encoding = autodetect_encoding + + def load(self) -> List[Document]: + """Load from file path.""" + metadata = {"file_path": self.file_path, "file_type": "docx"} + + def _convert_image(image): + # Images are excluded + return {"src": ""} + + with open(self.file_path, "rb") as docx_file: + result = mammoth.convert_to_html(docx_file, convert_image=mammoth.images.img_element(_convert_image)) + html_content = result.value # The generated HTML + loader = CustomHtmlLoader() + doc = loader.load(html_content) + doc.metadata = metadata + + return doc + + +def process_doc(s3, **kwargs): + now = datetime.now() + timestamp_str = now.strftime("%Y%m%d%H%M%S") + random_uuid = str(uuid.uuid4())[:8] + bucket_name = kwargs['bucket'] + key = kwargs['key'] + local_path = f'/tmp/doc-{timestamp_str}-{random_uuid}.csv' + + s3.download_file(bucket_name, key, local_path) + loader = CustomDocLoader(file_path=local_path) + doc = loader.load() + splitter = MarkdownHeaderTextSplitter() + doc_list = splitter.split_text(doc) + + return doc_list diff --git a/source/lambda/job/dep/llm_bot_dep/loaders/html.py b/source/lambda/job/dep/llm_bot_dep/loaders/html.py new file mode 100644 index 00000000..007432dc --- /dev/null +++ b/source/lambda/job/dep/llm_bot_dep/loaders/html.py @@ -0,0 +1,71 @@ +import logging +import re +import markdownify +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader +from llm_bot_dep.splitter_utils import MarkdownHeaderTextSplitter + +logger = logging.getLogger(__name__) + + +class CustomHtmlLoader(BaseLoader): + """Load `HTML` files using `Unstructured`. + + You can run the loader in one of two modes: "single" and "elements". + If you use "single" mode, the document will be returned as a single + langchain Document object. If you use "elements" mode, the unstructured + library will split the document into elements such as Title and NarrativeText. + You can pass in additional unstructured kwargs after mode to apply + different unstructured settings. + + """ + + def clean_html(self, html_str: str) -> str: + # Filter out DOCTYPE + html_str = ' '.join(html_str.split()) + re_doctype = re.compile(r'', re.S) + s = re_doctype.sub('', html_str) + + # Filter out CDATA + re_cdata = re.compile('//]*//\]\]>', re.I) + s = re_cdata.sub('', s) + + # Filter out script + re_script = re.compile('<\s*script[^>]*>[^<]*<\s*/\s*script\s*>', re.I) + s = re_script.sub('', s) + + # Filter out style + re_style = re.compile('<\s*style[^>]*>[^<]*<\s*/\s*style\s*>', re.I) + s = re_style.sub('', s) + + # Filter out HTML comments + re_comment = re.compile('') + s = re_comment.sub('', s) + + # Remove extra blank lines + blank_line = re.compile('\n+') + s = blank_line.sub('\n', s) + + # Remove blank image + img_src = re.compile('') + s = img_src.sub('', s) + + return s.strip() + + # def load(self, file_content: str) -> List[Document]: + def load(self, file_content: str): + file_content = self.clean_html(file_content) + file_content = markdownify.markdownify(file_content, heading_style="ATX") + doc = Document(page_content=file_content, + metadata={"file_type": "html"}) + + return doc + + +def process_html(html_str: str, **kwargs): + loader = CustomHtmlLoader() + doc = loader.load(html_str) + splitter = MarkdownHeaderTextSplitter() + doc_list = splitter.split_text(doc) + + return doc_list diff --git a/source/lambda/job/dep/llm_bot_dep/loaders/image.py b/source/lambda/job/dep/llm_bot_dep/loaders/image.py new file mode 100644 index 00000000..2eab80de --- /dev/null +++ b/source/lambda/job/dep/llm_bot_dep/loaders/image.py @@ -0,0 +1,5 @@ + + +def process_image(image: bytes): + # TODO: Implement image processing with ASK API + pass diff --git a/source/lambda/job/dep/llm_bot_dep/loaders/markdown.py b/source/lambda/job/dep/llm_bot_dep/loaders/markdown.py new file mode 100644 index 00000000..6c629102 --- /dev/null +++ b/source/lambda/job/dep/llm_bot_dep/loaders/markdown.py @@ -0,0 +1,49 @@ +import logging +from typing import List, Optional + +from langchain.docstore.document import Document +from langchain.document_loaders.base import BaseLoader +from llm_bot_dep.splitter_utils import MarkdownHeaderTextSplitter + +logger = logging.getLogger(__name__) + + +class CustomMarkdownLoader(BaseLoader): + """Load markdown file. + + Args: + file_content: File content in markdown file. + + encoding: File encoding to use. If `None`, the file will be loaded + with the default system encoding. + + autodetect_encoding: Whether to try to autodetect the file encoding + if the specified encoding fails. + """ + + def __init__( + self, + file_path: str, + encoding: Optional[str] = None, + autodetect_encoding: bool = False, + ): + """Initialize with file path.""" + self.file_path = file_path + self.encoding = encoding + self.autodetect_encoding = autodetect_encoding + + def load(self, content: str) -> Document: + """Load from file path.""" + metadata = {"file_path": self.file_path, "file_type": "md"} + + return Document(page_content=content, metadata=metadata) + + +def process_md(file_content: str, **kwargs): + loader = CustomMarkdownLoader( + file_path=kwargs['bucket'] + "/" + kwargs['key']) + doc = loader.load(file_content) + splitter = MarkdownHeaderTextSplitter() + doc_list = splitter.split_text(doc) + + return doc_list diff --git a/source/lambda/job/dep/llm_bot_dep/loaders/pdf.py b/source/lambda/job/dep/llm_bot_dep/loaders/pdf.py new file mode 100644 index 00000000..aa4598e4 --- /dev/null +++ b/source/lambda/job/dep/llm_bot_dep/loaders/pdf.py @@ -0,0 +1,171 @@ +import os +import re +import json +import logging +from bs4 import BeautifulSoup +import subprocess +from pathlib import Path +from typing import List, Dict, List, Optional, Iterator, Sequence + +from langchain.docstore.document import Document +from langchain.document_loaders import PDFMinerPDFasHTMLLoader + +from langchain.document_loaders.pdf import BasePDFLoader +from ..splitter_utils import extract_headings, MarkdownHeaderTextSplitter +# from langchain.text_splitter import MarkdownHeaderTextSplitter + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +metadata_template = { + "content_type": "paragraph", + "heading_hierarchy": {}, + "figure_list": [], + "chunk_id": "$$", + "file_path": "", + "keywords": [], + "summary": "", +} + +class NougatPDFLoader(BasePDFLoader): + """A PDF loader class for converting PDF files to MMD. + + This class leverages the `nougat` library to perform the conversion from PDF to HTML. + It inherits from `BasePDFLoader` and extends its functionality to utilize the `nougat` library. + TODO, the load_and_split method need to be implemented and default is RecursiveCharacterTextSplitter + Attributes: + file_path (str): The path to the PDF file to be loaded. + headers (Optional[Dict]): Optional headers to be used when loading the PDF. + + Raises: + ImportError: If the `nougat` library is not installed. + RuntimeError: If the `nougat` command fails to execute successfully. + """ + + def __init__(self, file_path: str, *, headers: Optional[Dict] = None): + """Initialize with a file path.""" + try: + import nougat + except ImportError: + raise ImportError( + "Please install nougat to use NougatPDFLoader. " + "You can install it with `pip install nougat`." + ) + + super().__init__(file_path, headers=headers) + + def nougat(self, file_path: Path) -> str: + """Executes the `nougat` command to convert the specified PDF file to Markdown format. + + Args: + file_path (Path): The path to the PDF file to be converted. + + Returns: + str: The Markdown content resulting from the `nougat` conversion. + """ + # nougat ./paperSnapshot.pdf --full-precision --markdown -m 0.1.0-base -o tmp --recompute + cli_command = ["nougat", str(file_path), "full-precision", "--markdown", "-m", "0.1.0-base", "-o", "tmp", "--recompute"] + + try: + result = subprocess.run( + cli_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True + ) + result.check_returncode() + return result.stdout + + except subprocess.CalledProcessError as e: + logger.info( + f"Nougat command failed with return code {e.returncode}: {e.stderr}" + ) + raise RuntimeError("Nougat command failed.") from e + + def load(self) -> List[Document]: + """Loads and processes the specified PDF file, converting it to a list of Document objects. + + Returns: + List[Document]: A list containing a single Document object with the processed content. + """ + return list(self.lazy_load()) + + def lazy_load(self) -> Iterator[Document]: + """Lazy load and process the specified PDF file, yielding Document objects. + + This method reads the PDF file, processes it using the `nougat` command, + reads the resulting Markdown content, and yields a Document object with the content. + """ + # try: + file_path = self.file_path + # Call the method to run the Nougat OCR command + self.nougat(file_path) + + # Rest of your code for reading and processing the output + file_path = Path(file_path) + output_path = Path("tmp") / f"{file_path.stem}.mmd" + with output_path.open("r") as f: + content = f.read() + # consider math expressions are enclosed in \( and \) in Markdown + content = ( + content.replace(r"\(", "$") + .replace(r"\)", "$") + .replace(r"\[", "$$") + .replace(r"\]", "$$") + ) + logger.info("content: %s", content) + # extract headings hierarchically + headings = extract_headings(content) + + # assemble metadata from template + metadata = metadata_template + metadata["content_type"] = "paragraph" + metadata["heading_hierarchy"] = headings + metadata["chunk_id"] = "$$" + metadata["file_path"] = str(file_path) + # TODO, use PyMuPDF to detect image and figure list, but no link to the image for the extracted text + # metadata["figure_list"] = [] + + yield Document(page_content=content, metadata=metadata) + + # except Exception as e: + # logger.info(f"An error occurred while processing the PDF: {str(e)}") + + +def process_pdf(s3, pdf: bytes, **kwargs): + """ + Process a given PDF file and extracts structured information from it. + + This function reads a PDF file, converts it to HTML using PDFMiner, then extracts + and structures the information into a list of dictionaries containing headings and content. + + Parameters: + s3 (boto3.client): The S3 client to use for downloading the PDF file. + pdf (bytes): The PDF file to process. + **kwargs: Arbitrary keyword arguments. The function expects 'bucket' and 'key' among the kwargs + to specify the S3 bucket and key where the PDF file is located. + + Returns: + list[Doucment]: A list of Document objects, each representing a semantically grouped section of the PDF file. Each Document object contains a metadata defined in metadata_template, and page_content string with the text content of that section. + """ + logger.info("Processing PDF file...") + bucket = kwargs['bucket'] + key = kwargs['key'] + # extract file name also in consideration of file name with blank space + local_path = str(os.path.basename(key)) + # download to local for futher processing + logger.info(local_path) + s3.download_file(Bucket=bucket, Key=key, Filename=local_path) + # TODO, will be deprecated and replaced by nougat class in loader_utils + # loader = PDFMinerPDFasHTMLLoader(local_path) + # entire PDF is loaded as a single Document + # file_content = loader.load()[0].page_content + # res = parse_pdf_to_json(file_content) + + loader = NougatPDFLoader(local_path) + data = loader.load() + logger.info("raw data: %s", data) + # Update file_path metadata to full s3 path in list of Document objects + data[0].metadata['file_path'] = f"s3://{bucket}/{key}" + markdown_splitter = MarkdownHeaderTextSplitter() + md_header_splits = markdown_splitter.split_text(data[0]) + for i, doc in enumerate(md_header_splits): + logger.info("PDF file processed successfully, with content of chunk %s: %s", i, doc) + return md_header_splits diff --git a/source/lambda/job/dep/llm_bot_dep/loaders/text.py b/source/lambda/job/dep/llm_bot_dep/loaders/text.py new file mode 100644 index 00000000..df8ffe1f --- /dev/null +++ b/source/lambda/job/dep/llm_bot_dep/loaders/text.py @@ -0,0 +1,54 @@ +import logging +import re +from typing import List, Optional +from langchain.docstore.document import Document +from langchain.document_loaders.text import TextLoader + +logger = logging.getLogger(__name__) + + +class CustomTextLoader(TextLoader): + """Load text file. + + Args: + file_content: Text file content. + + encoding: File encoding to use. If `None`, the file will be loaded + with the default system encoding. + + autodetect_encoding: Whether to try to autodetect the file encoding + if the specified encoding fails. + """ + + def __init__( + self, + file_path: str, + encoding: Optional[str] = None, + autodetect_encoding: bool = False, + ): + """Initialize with file path.""" + self.file_path = file_path + self.encoding = encoding + self.autodetect_encoding = autodetect_encoding + + def load(self, text_content: str) -> List[Document]: + """Load from file path.""" + metadata = {"source": self.file_path} + + return [Document(page_content=text_content, metadata=metadata)] + + +def pre_process_text(text_content: str) -> str: + # Clean up text content + text_content = re.sub(r'\s+', ' ', text_content) + text_content = re.sub(r'\n+', '\n', text_content) + + return text_content.strip() + + +def process_text(file_content: str, **kwargs): + clean_text = pre_process_text(file_content) + loader = CustomTextLoader(file_path=kwargs['bucket'] + "/" + kwargs['key']) + data = loader.load(clean_text) + + return data diff --git a/source/lambda/job/dep/llm_bot_dep/sm_utils.py b/source/lambda/job/dep/llm_bot_dep/sm_utils.py new file mode 100644 index 00000000..a60299ae --- /dev/null +++ b/source/lambda/job/dep/llm_bot_dep/sm_utils.py @@ -0,0 +1,71 @@ +""" +Helper functions for using Samgemaker Endpoint via langchain +""" +import sys +import time +import json +import logging +from typing import List +from langchain.embeddings import SagemakerEndpointEmbeddings +from langchain.embeddings.sagemaker_endpoint import EmbeddingsContentHandler + +logger = logging.getLogger() +# logging.basicConfig(format='%(asctime)s,%(module)s,%(processName)s,%(levelname)s,%(message)s', level=logging.INFO, stream=sys.stderr) +logger.setLevel(logging.INFO) + +# extend the SagemakerEndpointEmbeddings class from langchain to provide a custom embedding function +class SagemakerEndpointEmbeddingsJumpStart(SagemakerEndpointEmbeddings): + def embed_documents( + self, texts: List[str], chunk_size: int = 500 + ) -> List[List[float]]: + """Compute doc embeddings using a SageMaker Inference Endpoint. + + Args: + texts: The list of texts to embed. + chunk_size: The chunk size defines how many input texts will + be grouped together as request. If None, will use the + chunk size specified by the class. + + Returns: + List of embeddings, one for each text. + """ + results = [] + _chunk_size = len(texts) if chunk_size > len(texts) else chunk_size + st = time.time() + for i in range(0, len(texts), _chunk_size): + response = self._embedding_func(texts[i:i + _chunk_size]) + results.extend(response) + time_taken = time.time() - st + logger.info(f"got results for {len(texts)} in {time_taken}s, length of embeddings list is {len(results)}") + return results + + +# class for serializing/deserializing requests/responses to/from the embeddings model +class ContentHandler(EmbeddingsContentHandler): + content_type = "application/json" + accepts = "application/json" + + def transform_input(self, prompt: str, model_kwargs={}) -> bytes: + input_str = json.dumps({"inputs": prompt, **model_kwargs}) + return input_str.encode('utf-8') + + def transform_output(self, output: bytes) -> str: + response_json = json.loads(output.read().decode("utf-8")) + embeddings = response_json["sentence_embeddings"] + if len(embeddings) == 1: + return [embeddings[0]] + return embeddings + +def create_sagemaker_embeddings_from_js_model(embeddings_model_endpoint_name: str, aws_region: str) -> SagemakerEndpointEmbeddingsJumpStart: + # all set to create the objects for the ContentHandler and + # SagemakerEndpointEmbeddingsJumpStart classes + content_handler = ContentHandler() + logger.info(f'content_handler: {content_handler}, embeddings_model_endpoint_name: {embeddings_model_endpoint_name}, aws_region: {aws_region}') + # note the name of the LLM Sagemaker endpoint, this is the model that we would + # be using for generating the embeddings + embeddings = SagemakerEndpointEmbeddingsJumpStart( + endpoint_name = embeddings_model_endpoint_name, + region_name = aws_region, + content_handler = content_handler + ) + return embeddings \ No newline at end of file diff --git a/source/lambda/job/dep/llm_bot_dep/splitter_utils.py b/source/lambda/job/dep/llm_bot_dep/splitter_utils.py new file mode 100644 index 00000000..d00d9c9e --- /dev/null +++ b/source/lambda/job/dep/llm_bot_dep/splitter_utils.py @@ -0,0 +1,187 @@ +import re +from typing import Any, Dict, Iterator, List, Optional, Union + +from langchain.docstore.document import Document +from langchain.text_splitter import (Language, RecursiveCharacterTextSplitter, + TextSplitter) + + +def _make_spacy_pipeline_for_splitting(pipeline: str) -> Any: # avoid importing spacy + try: + import spacy + except ImportError: + raise ImportError( + "Spacy is not installed, please install it with `pip install spacy`." + ) + if pipeline == "sentencizer": + from spacy.lang.en import English + + sentencizer = English() + sentencizer.add_pipe("sentencizer") + else: + sentencizer = spacy.load(pipeline, exclude=["ner", "tagger"]) + return sentencizer + +class NLTKTextSplitter(TextSplitter): + """Splitting text using NLTK package.""" + + def __init__( + self, separator: str = "\n\n", language: str = "english", **kwargs: Any + ) -> None: + """Initialize the NLTK splitter.""" + super().__init__(**kwargs) + try: + from nltk.tokenize import sent_tokenize + + self._tokenizer = sent_tokenize + except ImportError: + raise ImportError( + "NLTK is not installed, please install it with `pip install nltk`." + ) + self._separator = separator + self._language = language + + def split_text(self, text: str) -> List[str]: + """Split incoming text and return chunks.""" + # First we naively split the large input into a bunch of smaller ones. + splits = self._tokenizer(text, language=self._language) + return self._merge_splits(splits, self._separator) + +class SpacyTextSplitter(TextSplitter): + """Splitting text using Spacy package. + + + Per default, Spacy's `en_core_web_sm` model is used. For a faster, but + potentially less accurate splitting, you can use `pipeline='sentencizer'`. + """ + + def __init__( + self, separator: str = "\n\n", pipeline: str = "en_core_web_sm", **kwargs: Any + ) -> None: + """Initialize the spacy text splitter.""" + super().__init__(**kwargs) + self._tokenizer = _make_spacy_pipeline_for_splitting(pipeline) + self._separator = separator + + def split_text(self, text: str) -> List[str]: + """Split incoming text and return chunks.""" + splits = (s.text for s in self._tokenizer(text).sents) + return self._merge_splits(splits, self._separator) + +class NestedDict(dict): + def __missing__(self, key): + self[key] = NestedDict() + return self[key] + +def extract_headings(md_content): + """Extract headings hierarchically from Markdown content. + Consider alternate syntax that "any number of == characters for heading level 1 or -- characters for heading level 2." + See https://www.markdownguide.org/basic-syntax/ + Args: + md_content (str): Markdown content. + Returns: + NestedDict: A nested dictionary containing the headings. Sample output: + { + 'Title 1': { + 'Subtitle 1.1': {}, + 'Subtitle 1.2': {} + }, + 'Title 2': { + 'Subtitle 2.1': {} + } + } + """ + headings = NestedDict() + current_heads = [headings] + lines = md_content.strip().split('\n') + + for i, line in enumerate(lines): + match = re.match(r'(#+) (.+)', line) + if not match and i > 0: # If the line is not a heading, check if the previous line is a heading using alternate syntax + if re.match(r'=+', lines[i - 1]): + level = 1 + title = lines[i - 2] + elif re.match(r'-+', lines[i - 1]): + level = 2 + title = lines[i - 2] + else: + continue + elif match: + level = len(match.group(1)) + title = match.group(2) + else: + continue + + current_heads = current_heads[:level] + current_heads[-1][title] + current_heads.append(current_heads[-1][title]) + + return headings + +# rewrite this class to use the new TextSplitter for mmd type +class MarkdownHeaderTextSplitter: + # Place holder for now without parameters + def __init__(self) -> None: + pass + + def _is_markdown_header(self, line): + header_pattern = r'^#+\s+' + if re.match(header_pattern, line): + return True + else: + return False + + def _is_markdown_table_row(self, line): + return re.fullmatch(r'\|.*\|.*\|', line) is not None + + def split_text(self, text: Document) -> List[Document]: + lines = text.page_content.strip().split('\n') + chunks = [] + current_chunk_content = [] + table_content = [] + inside_table = False + chunk_id = 1 # Initializing chunk_id + + for line in lines: + # Replace escaped characters for table markers + line = line.strip() + line = line.replace(r"\begin{table}", "\\begin{table}").replace(r"\end{table}", "\\end{table}") + if line in ["\\begin{table}", "\\end{table}"]: + continue + + if self._is_markdown_header(line): # Assuming these denote headings + # Save the current chunk if it exists + if current_chunk_content: + metadata = text.metadata.copy() + metadata['heading_hierarchy'] = extract_headings('\n'.join(current_chunk_content)) + metadata['chunk_id'] = f"${chunk_id}" + chunk_id += 1 # Increment chunk_id for the next chunk + chunks.append(Document(page_content='\n'.join(current_chunk_content), metadata=metadata)) + current_chunk_content = [] # Reset for the next chunk + + if self._is_markdown_table_row(line): + inside_table = True + elif inside_table: + # The first line under a table + inside_table = False + # Save table content as a separate document + if table_content: + metadata = text.metadata.copy() + metadata['content_type'] = 'table' + metadata['chunk_id'] = f"${chunk_id}" + chunks.append(Document(page_content='\n'.join(table_content), metadata=metadata)) + table_content = [] # Reset for the next table + + if inside_table: + table_content.append(line) + else: + current_chunk_content.append(line) + + # Save the last chunk if it exists + if current_chunk_content: + metadata = text.metadata.copy() + metadata['heading_hierarchy'] = extract_headings('\n'.join(current_chunk_content)) + metadata['chunk_id'] = f"${chunk_id}" + chunks.append(Document(page_content='\n'.join(current_chunk_content), metadata=metadata)) + + return chunks diff --git a/source/lambda/job/dep/setup.py b/source/lambda/job/dep/setup.py new file mode 100644 index 00000000..ff6569da --- /dev/null +++ b/source/lambda/job/dep/setup.py @@ -0,0 +1,20 @@ +from setuptools import setup, find_packages + +setup( + name='llm_bot_dep', + version='0.1.0', + packages=find_packages(exclude=[]), + install_requires=[ + 'langchain', + 'opensearch-py', + # 'faiss_cpu', + # 'sagemaker', + 'requests_aws4auth', + 'unstructured', + 'boto3==1.28.84', + 'nougat-ocr', + 'markdownify', + 'mammoth', + 'chardet' + ], +) \ No newline at end of file diff --git a/source/lambda/job/glue-job-script.py b/source/lambda/job/glue-job-script.py new file mode 100644 index 00000000..94e51834 --- /dev/null +++ b/source/lambda/job/glue-job-script.py @@ -0,0 +1,270 @@ +import itertools +import logging +import os +import sys +import time +from typing import Any, Dict, Generator, Iterable, List, Optional, Tuple + +import boto3 +import chardet +import nltk +from awsglue.utils import getResolvedOptions +from boto3.dynamodb.conditions import Attr, Key +from langchain.docstore.document import Document +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain.vectorstores import OpenSearchVectorSearch +from llm_bot_dep import sm_utils +from llm_bot_dep.enhance_utils import EnhanceWithBedrock +from llm_bot_dep.loaders.auto import cb_process_object +from opensearchpy import RequestsHttpConnection +from requests_aws4auth import AWS4Auth +from tenacity import retry, stop_after_attempt + +logger = logging.getLogger() +logger.setLevel(logging.INFO) + +# Adaption to allow nougat to run in AWS Glue with writable /tmp +os.environ['TRANSFORMERS_CACHE'] = '/tmp/transformers_cache' +os.environ['NOUGAT_CHECKPOINT'] = '/tmp/nougat_checkpoint' +os.environ['NLTK_DATA'] = '/tmp/nltk_data' + +# Parse arguments +args = getResolvedOptions(sys.argv, ['JOB_NAME', 'S3_BUCKET', 'S3_PREFIX', 'AOS_ENDPOINT', 'EMBEDDING_MODEL_ENDPOINT', 'REGION', 'OFFLINE', 'QA_ENHANCEMENT', 'BATCH_INDICE', 'ProcessedObjectsTable']) +s3_bucket = args['S3_BUCKET'] +s3_prefix = args['S3_PREFIX'] +aosEndpoint = args['AOS_ENDPOINT'] +embeddingModelEndpoint = args['EMBEDDING_MODEL_ENDPOINT'] +region = args['REGION'] +offline = args['OFFLINE'] +qa_enhancement = args['QA_ENHANCEMENT'] +# TODO, pass the bucket and prefix need to handle in current job directly +batchIndice = args['BATCH_INDICE'] +processedObjectsTable = args['ProcessedObjectsTable'] + +s3 = boto3.client('s3') +dynamodb = boto3.resource('dynamodb') +table = dynamodb.Table(processedObjectsTable) + +ENHANCE_CHUNK_SIZE = 500 +# Make it 3600s for debugging purpose +OBJECT_EXPIRY_TIME = 3600 + +credentials = boto3.Session().get_credentials() +awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, 'es', session_token=credentials.token) + +# Set the NLTK data path to the /tmp directory for AWS Glue jobs +nltk.data.path.append('/tmp/nltk_data') + +def decode_file_content(content: str, default_encoding: str = 'utf-8'): + """Decode the file content and auto detect the content encoding. + + Args: + content: The content to detect the encoding. + default_encoding: The default encoding to try to decode the content. + timeout: The timeout in seconds for the encoding detection. + """ + + try: + decoded_content = content.decode(default_encoding) + except UnicodeDecodeError: + # Try to detect encoding + encoding = chardet.detect(content)['encoding'] + decoded_content = content.decode(encoding) + + return decoded_content + +# such glue job is running as map job, the batchIndice is the index per file to handle in current job +def iterate_s3_files(bucket: str, prefix: str) -> Generator: + paginator = s3.get_paginator('list_objects_v2') + currentIndice = 0 + for page in paginator.paginate(Bucket=bucket, Prefix=prefix): + for obj in page.get('Contents', []): + key = obj['Key'] + # skip the prefix with slash, which is the folder name + if key.endswith('/'): + continue + + # skip the file if the index is not in the batchIndice + if currentIndice != int(batchIndice): + logger.info("currentIndice: {}, batchIndice: {}, skip file: {}".format(currentIndice, batchIndice, key)) + currentIndice += 1 + continue + + # Truncate to seconds with round() + current_time = int(round(time.time())) + # Check for redundancy and expiry + response = table.query( + KeyConditionExpression = Key('ObjectKey').eq(key), + ScanIndexForward=False, # Sort by ProcessTimestamp in descending order + Limit=1 # We only need the latest record + ) + + # If the object is found and has not expired, skip processing + if response['Items'] and response['Items'][0]['ExpiryTimestamp'] > current_time: + logger.info(f"Object {key} has not expired yet and will be skipped.") + continue + + # Record the processing of the S3 object with an updated expiry timestamp, and each job only update single object in table. TODO, current assume the object will be handled successfully + expiry_timestamp = current_time + OBJECT_EXPIRY_TIME + try: + table.put_item( + Item={ + 'ObjectKey': key, + 'ProcessTimestamp': current_time, + 'Bucket': bucket, + 'Prefix': '/'.join(key.split('/')[:-1]), + 'ExpiryTimestamp': expiry_timestamp + } + ) + except Exception as e: + logger.error(f"Error recording processed of S3 object {key}: {e}") + + file_type = key.split('.')[-1] # Extract file extension + response = s3.get_object(Bucket=bucket, Key=key) + file_content = response['Body'].read() + # assemble bucket and key as args for the callback function + kwargs = {'bucket': bucket, 'key': key} + + if file_type == 'txt': + yield 'txt', decode_file_content(file_content), kwargs + break + elif file_type == 'csv': + # Update row count here, the default row count is 1 + kwargs['csv_row_count'] = 1 + yield 'csv', decode_file_content(file_content), kwargs + break + elif file_type == 'html': + yield 'html', decode_file_content(file_content), kwargs + break + elif file_type in ['pdf']: + yield 'pdf', file_content, kwargs + break + elif file_type in ['jpg', 'png']: + yield 'image', file_content, kwargs + break + elif file_type in ['docx', 'doc']: + yield 'doc', file_content, kwargs + break + elif file_type == 'md': + yield 'md', decode_file_content(file_content), kwargs + break + else: + logger.info(f"Unknown file type: {file_type}") + +def batch_generator(generator, batch_size: int): + iterator = iter(generator) + while True: + batch = list(itertools.islice(iterator, batch_size)) + if not batch: + break + yield batch + +def aos_injection(content: List[Document], embeddingModelEndpoint: str, aosEndpoint: str, index_name: str, chunk_size: int = 500, gen_chunk: bool = True) -> List[Document]: + """ + This function includes the following steps: + 1. split the document into chunks with chunk size to fit the embedding model, note the document is already splited by title/subtitle to form sementic chunks approximately; + 2. call the embedding model to get the embeddings for each chunk; + 3. call the AOS to index the chunk with the embeddings; + Parameters: + content (list): A list of Document objects, each representing a semantically grouped section of the PDF file. Each Document object contains a metadata dictionary with details about the heading hierarchy etc. + embeddingModelEndpoint (str): The endpoint of the embedding model. + aosEndpoint (str): The endpoint of the AOS. + index_name (str): The name of the index to be created in the AOS. + chunk_size (int): The size of each chunk to be indexed in the AOS. + gen_chunk (bool): Whether generate chunks or not. + + Returns: + + Note: + """ + embeddings = sm_utils.create_sagemaker_embeddings_from_js_model(embeddingModelEndpoint, region) + def chunk_generator(content: List[Document], chunk_size: int = 500, chunk_overlap: int = 30) -> Generator[Document, None, None]: + text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) + for document in content: + splits = text_splitter.split_documents([document]) + # list of Document objects + for split in splits: + yield split + + if gen_chunk: + generator = chunk_generator(content, chunk_size=chunk_size) + else: + generator = content + + batches = batch_generator(generator, batch_size=10) + # note: typeof(batch)->list[Document], sizeof(batches)=batch_size + for batch in batches: + if len(batch) == 0: + continue + # the batch are still list of Document objects, we need to iterate the list to inject the embeddings, the chunk size (500) should already be small enough to fit the embedding model + for document in batch: + @retry(stop=stop_after_attempt(3)) + def _aos_injection(document: Document) -> Document: + # TODO, parse the metadata to embed with different index + docsearch = OpenSearchVectorSearch( + index_name=index_name, + embedding_function=embeddings, + opensearch_url="https://{}".format(aosEndpoint), + http_auth = awsauth, + use_ssl = True, + verify_certs = True, + connection_class = RequestsHttpConnection + ) + logger.info("Adding documents %s to OpenSearch with index %s", document, index_name) + docsearch.add_documents(documents=[document]) + logger.info("Retry statistics: %s", _aos_injection.retry.statistics) + # logger.info("Adding documents %s to OpenSearch with index %s", document, index_name) + _aos_injection(document) + +# Main function to be called by Glue job script +def main(): + logger.info("Starting Glue job with passing arguments: %s", args) + # Check if offline mode + if offline == 'true': + logger.info("Running in offline mode with consideration for large file size...") + for file_type, file_content, kwargs in iterate_s3_files(s3_bucket, s3_prefix): + try: + res = cb_process_object(s3, file_type, file_content, **kwargs) + # TODO, parse the metadata to embed with different index + if res: + logger.info("Result: %s", res) + if file_type == 'csv': + # CSV page document has been splited into chunk, no more spliting is needed + aos_injection(res, embeddingModelEndpoint, aosEndpoint, 'chatbot-index', gen_chunk=False) + elif file_type == 'html': + aos_injection(res, embeddingModelEndpoint, aosEndpoint, 'chatbot-index') + elif file_type in ['pdf', 'txt', 'doc', 'md']: + aos_injection(res, embeddingModelEndpoint, aosEndpoint, 'chatbot-index') + if qa_enhancement == 'true': + # iterate the document to get the QA pairs + for document in res: + # prompt is not used in this case + prompt = "" + solution_title = "GCR Solution LLM Bot" + # Make sure the document is Document object + logger.info("Enhancing document type: {} and content: {}".format(type(document), document)) + ewb = EnhanceWithBedrock(prompt, solution_title, document) + # This is should be optional for the user to choose the chunk size + document_list = ewb.SplitDocumentByTokenNum(document, ENHANCE_CHUNK_SIZE) + # test the function + for document in document_list: + enhanced_prompt = ewb.EnhanceWithClaude(prompt, solution_title, document) + logger.info("Enhanced prompt: {}".format(enhanced_prompt)) + + except Exception as e: + logger.error("Error processing object %s: %s", kwargs['bucket'] + '/' + kwargs['key'], e) + else: + logger.info("Running in online mode, assume file number is small...") + +if __name__ == '__main__': + logger.info("boto3 version: %s", boto3.__version__) + + # Set the NLTK data path to the /tmp directory for AWS Glue jobs + nltk.data.path.append("/tmp") + # List of NLTK packages to download + nltk_packages = ['words', 'punkt'] + # Download the required NLTK packages to /tmp + for package in nltk_packages: + # Download the package to /tmp/nltk_data + nltk.download(package, download_dir='/tmp/nltk_data') + main() From 791e48853fe14deeea04f3e8d76a531e12ff77e7 Mon Sep 17 00:00:00 2001 From: Ning Date: Mon, 13 Nov 2023 21:30:07 +0800 Subject: [PATCH 20/37] chore: restructure --- source/model/cross/code/cross_model.tar.gz | Bin 0 -> 1148 bytes source/model/cross/code/model.py | 68 ++++++++++++++++++ source/model/cross/code/serving.properties | 5 ++ .../model/cross/model/add_your_model_here.txt | 0 source/model/cross/model/model.sh | 57 +++++++++++++++ source/model/embedding/code/model.py | 42 +++++++++++ source/model/embedding/code/requirements.txt | 3 + .../embedding/model/add_your_model_here.txt | 0 source/model/embedding/model/model.sh | 57 +++++++++++++++ source/model/instruct/code/model.py | 60 ++++++++++++++++ source/model/instruct/code/requirements.txt | 3 + .../instruct/model/add_your_model_here.txt | 0 source/model/instruct/model/model.sh | 57 +++++++++++++++ 13 files changed, 352 insertions(+) create mode 100644 source/model/cross/code/cross_model.tar.gz create mode 100644 source/model/cross/code/model.py create mode 100644 source/model/cross/code/serving.properties create mode 100644 source/model/cross/model/add_your_model_here.txt create mode 100755 source/model/cross/model/model.sh create mode 100644 source/model/embedding/code/model.py create mode 100644 source/model/embedding/code/requirements.txt create mode 100644 source/model/embedding/model/add_your_model_here.txt create mode 100755 source/model/embedding/model/model.sh create mode 100644 source/model/instruct/code/model.py create mode 100644 source/model/instruct/code/requirements.txt create mode 100644 source/model/instruct/model/add_your_model_here.txt create mode 100755 source/model/instruct/model/model.sh diff --git a/source/model/cross/code/cross_model.tar.gz b/source/model/cross/code/cross_model.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..3ab20b643382f4872597aa544b4a55486fe8ae06 GIT binary patch literal 1148 zcmV-?1cUn@iwFQN@KI#|1MOCAZ`(Ey?q~lBLir%6a1{9^O=rvniUJ)5v|E67pNv3Y zq?64?B3Y7hk_GwiJHFVGlXfq_x}iY$!7}fT&++b_BiV`ynoQn4`-UUW^B40uvEQ?^ ziTmbLKeuCMg_HT|bUr;ho#m$_pUkHD{29r=<)(I{(}pVCO0(sRufJ}GW_A51I6fA# z|9&T2m%W+OOZtrW_TpSkSJnEcuq{EmXq2vu%&lSZu9n6TZnrDm@ZdgNe@ zqFmQPwV?D4STYF><1i+#J0sp(_PrJA4ec~--o5wIkKzhA`yJH4Xf&!|%Ss?izm3dK zlep|Ey2xX~>Y%WKjOl`IDQoCu14&At(Lmx-c5ynAigATSl;>XJ?gBN8bhO1 z(OR#GR6>F>40O5}5gZ1%7OId`(WZe0x6_-fq6gZdg0%vOF5FUwSTv%fhGC`;R>odg zF%^4b3@7q3Rz-yIfwAAn-$Z0p01WlY7_UWANAEC3oO%e0e|HFWAEj=Y4|-P+ba7Ci z%+|S(3b52z)G9dy8Iw*!QB!S}Zz+}_?eh?jTx}nrd?mm!*(;8SFIv%3xr{DpLwN}m zT3#1eP1E)^24Q=Yg@9D@DotO=T?O{59I{&TTZ)QZ|U8ZCp zRq(lE3Uskn$eH9}lo@PklMFJ6F?Nc3<+#r)5^tB4Uj&EIFyT(H1`diM!FK{j)D7ho z@|)Ra)eGOWfv4+6U{)SG3f$ttc-;dsjSp%)Xaw&gTZ1Wjk|im&Oh60<*m%%NbDN|+ zatPY(+s-R$D54J_sV+Wg!H1dp+<`(DXlKn=k+%-5REe_J*h!GPAK8)UNssWh5o{J#6p*#br9{n9Hi7sHE>J-8b8WM zBu$iN`?Wnh)jBZ1hl~d%xw@umZN;v#JHIut=V^;_?RaZ>vY^nzio@XE5)NrJCcj%?FcOcq)~$!JqJ zOYMkuaAfGNvBkcNbSZEaEe>dq;kA~e8Sh$&U3Wh`X8iSyO62su$2?vd?tcwxYYX2u z^}qkd_>1>nKEsK7|IJQc%%?fN|4wI<>C^k~zZiIw;g!2Qcdx&Zke0OBm!C1q3DUch z*qT6SiBxnC2qVa?=oAsx_(rjhz#|e+^sH0%M$@y!$;pq!hnc(VUuz9Uze0ik_rcKe zDl1AQv~~vP3Tk%qYV;)(U!-Y1o#j7FUY=Zbbq(s+tv$~3$+7E;lm9!XPvdDkji>QF Oj6VSA*x^n95&!_*wLFFZ literal 0 HcmV?d00001 diff --git a/source/model/cross/code/model.py b/source/model/cross/code/model.py new file mode 100644 index 00000000..7b713fc4 --- /dev/null +++ b/source/model/cross/code/model.py @@ -0,0 +1,68 @@ +from djl_python import Input, Output +import torch +import logging +import math +import os +from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, AutoModel + +device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') +print(f'--device={device}') + + +def load_model(properties): + tensor_parallel = properties["tensor_parallel_degree"] + model_location = properties['model_dir'] + if "model_id" in properties: + model_location = properties['model_id'] + logging.info(f"Loading model in {model_location}") + + tokenizer = AutoTokenizer.from_pretrained(model_location, use_fast=False) + model = AutoModel.from_pretrained( + model_location, + # device_map="balanced_low_0", + trust_remote_code=True + ).half() + # load the model on GPU + model.to(device) + model.requires_grad_(False) + model.eval() + + return model, tokenizer + + +model = None +tokenizer = None +generator = None + +def handle(inputs: Input): + global model, tokenizer + if not model: + model, tokenizer = load_model(inputs.get_properties()) + + if inputs.is_empty(): + return None + data = inputs.get_as_json() + + queries = data["inputs"] + docs = data["docs"] + + encoded_input = tokenizer(text = [queries], text_pair=[docs], padding=True, truncation=True, max_length=2048, return_tensors='pt')['input_ids'].to(device) + # Compute token embeddings + with torch.no_grad(): + model_output = model(input_ids=encoded_input) + + # Perform pooling. In this case, max pooling. + +# # preprocess +# input_ids = tokenizer(input_sentences, return_tensors="pt").input_ids +# # pass inputs with all kwargs in data +# if params is not None: +# outputs = model.generate(input_ids, **params) +# else: +# outputs = model.generate(input_ids) + +# # postprocess the prediction +# prediction = tokenizer.decode(outputs[0], skip_special_tokens=True) + + result = {"scores": model_output.cpu().numpy()} + return Output().add_as_json(result) \ No newline at end of file diff --git a/source/model/cross/code/serving.properties b/source/model/cross/code/serving.properties new file mode 100644 index 00000000..d85a2fbe --- /dev/null +++ b/source/model/cross/code/serving.properties @@ -0,0 +1,5 @@ +engine=Python +option.tensor_parallel_degree=1 +# update according to your own path +# option.s3url = s3://<_S3ModelAssets>/<_AssetsStack._crossModelPrefix> +option.s3url = s3://llm-rag/buffer-cross-001-model/ \ No newline at end of file diff --git a/source/model/cross/model/add_your_model_here.txt b/source/model/cross/model/add_your_model_here.txt new file mode 100644 index 00000000..e69de29b diff --git a/source/model/cross/model/model.sh b/source/model/cross/model/model.sh new file mode 100755 index 00000000..270539ca --- /dev/null +++ b/source/model/cross/model/model.sh @@ -0,0 +1,57 @@ +function usage { + echo "Make sure Python installed properly. Usage: $0 -t TOKEN [-m MODEL_NAME] [-c COMMIT_HASH] [-s S3_BUCKET_NAME]" + echo " -t TOKEN Hugging Face token (required)" + echo " -m MODEL_NAME Model name (default: csdc-atl/buffer-cross-001)" + echo " -c COMMIT_HASH Commit hash (default: 46d270928463db49b317e5ea469a8ac8152f4a13)" + echo " -s S3_BUCKET_NAME S3 bucket name to upload the model (default: llm-rag)" + exit 1 +} + +# Default values +model_name="csdc-atl/buffer-cross-001" +commit_hash="46d270928463db49b317e5ea469a8ac8152f4a13" +s3_bucket_name="llm-rag" # Default S3 bucket name + +# Parse command-line options +while getopts ":t:m:c:s:" opt; do + case $opt in + t) hf_token="$OPTARG" ;; + m) model_name="$OPTARG" ;; + c) commit_hash="$OPTARG" ;; + s) s3_bucket_name="$OPTARG" ;; + \?) echo "Invalid option: -$OPTARG" >&2; usage ;; + :) echo "Option -$OPTARG requires an argument." >&2; usage ;; + esac +done + +# Validate the hf_token and python interpreter exist +if [ -z "$hf_token" ] || ! command -v python &> /dev/null; then + usage +fi + +# Install necessary packages +pip install huggingface-hub -Uqq +pip install -U sagemaker + +# Define local model path +local_model_path="." + +# Uncomment the line below if you want to create a specific directory for the model +# mkdir -p $local_model_path + +# Download model snapshot in current folder without model prefix added +python -c "from huggingface_hub import snapshot_download; from pathlib import Path; snapshot_download(repo_id='$model_name', revision='$commit_hash', cache_dir=Path('.'), token='$hf_token')" + +# Find model snapshot path with the first search result +model_snapshot_path=$(find . -path '*/snapshots/*' -type d -print -quit) +echo "Model snapshot path: $model_snapshot_path" + +# s3:/// +aws s3 cp --recursive $model_snapshot_path s3://$s3_bucket_name/buffer-cross-001-model + +# Modify the content of serving.properties and re-tar the model +cd ../code +file_path="serving.properties" +sed -i "s|option.s3url = s3://[^/]*/buffer-cross-001-model/|option.s3url = s3://$s3_bucket_name/buffer-cross-001-model/|" $file_path +rm cross_model.tar.gz +tar czvf cross_model.tar.gz * diff --git a/source/model/embedding/code/model.py b/source/model/embedding/code/model.py new file mode 100644 index 00000000..b3136d0b --- /dev/null +++ b/source/model/embedding/code/model.py @@ -0,0 +1,42 @@ +from djl_python import Input, Output +import torch +import logging +import math +import os +from sentence_transformers import SentenceTransformer + +def load_model(properties): + tensor_parallel = properties["tensor_parallel_degree"] + model_location = properties['model_dir'] + if "model_id" in properties: + model_location = properties['model_id'] + logging.info(f"Loading model in {model_location}") + + model = SentenceTransformer(model_location) + model = model.eval().cuda() + + return model + +model = None + +def handle(inputs: Input): + global model + if not model: + model = load_model(inputs.get_properties()) + + if inputs.is_empty(): + return None + data = inputs.get_as_json() + + input_sentences = None + inputs = data["inputs"] + if isinstance(inputs, list): + input_sentences = inputs + else: + input_sentences = [inputs] + logging.info(f"inputs: {input_sentences}") + + sentence_embeddings = model.encode(input_sentences, normalize_embeddings=True) + + result = {"sentence_embeddings": sentence_embeddings} + return Output().add_as_json(result) diff --git a/source/model/embedding/code/requirements.txt b/source/model/embedding/code/requirements.txt new file mode 100644 index 00000000..9f6724c7 --- /dev/null +++ b/source/model/embedding/code/requirements.txt @@ -0,0 +1,3 @@ +transformers==4.31.0 +accelerate==0.20.3 +sentence-transformers diff --git a/source/model/embedding/model/add_your_model_here.txt b/source/model/embedding/model/add_your_model_here.txt new file mode 100644 index 00000000..e69de29b diff --git a/source/model/embedding/model/model.sh b/source/model/embedding/model/model.sh new file mode 100755 index 00000000..7694295f --- /dev/null +++ b/source/model/embedding/model/model.sh @@ -0,0 +1,57 @@ +function usage { + echo "Make sure Python installed properly. Usage: $0 -t TOKEN [-m MODEL_NAME] [-c COMMIT_HASH]" + echo " -t TOKEN Hugging Face token (required)" + echo " -m MODEL_NAME Model name (default: csdc-atl/buffer-cross-001)" + echo " -c COMMIT_HASH Commit hash (default: 46d270928463db49b317e5ea469a8ac8152f4a13)" + echo " -s S3_BUCKET_NAME S3 bucket name to upload the model (default: llm-rag)" + exit 1 +} + +# Default values +model_name="BAAI/bge-large-zh-v1.5" +commit_hash="00f8ffc4928a685117583e2a38af8ebb65dcec2c" +s3_bucket_name="llm-rag" # Default S3 bucket name + +# Parse command-line options +while getopts ":t:m:c:s:" opt; do + case $opt in + t) hf_token="$OPTARG" ;; + m) model_name="$OPTARG" ;; + c) commit_hash="$OPTARG" ;; + s) s3_bucket_name="$OPTARG" ;; + \?) echo "Invalid option: -$OPTARG" >&2; usage ;; + :) echo "Option -$OPTARG requires an argument." >&2; usage ;; + esac +done + +# Validate the hf_token and python interpreter exist +if [ -z "$hf_token" ] || ! command -v python &> /dev/null; then + usage +fi + +# Install necessary packages +pip install huggingface-hub -Uqq +pip install -U sagemaker + +# Define local model path +local_model_path="." + +# Uncomment the line below if you want to create a specific directory for the model +# mkdir -p $local_model_path + +# Download model snapshot in current folder without model prefix added +python -c "from huggingface_hub import snapshot_download; from pathlib import Path; snapshot_download(repo_id='$model_name', revision='$commit_hash', cache_dir=Path('.'), token='$hf_token')" + +# Find model snapshot path with the first search result +model_snapshot_path=$(find . -path '*/snapshots/*' -type d -print -quit) +echo "Model snapshot path: $model_snapshot_path" + +# s3:/// +aws s3 cp --recursive $model_snapshot_path s3://$s3_bucket_name/buffer-embedding-002-model + +# Modify the content of serving.properties and re-tar the model +cd ../code +file_path="serving.properties" +sed -i "s|option.s3url = s3://[^/]*/buffer-embedding-002-model/|option.s3url = s3://$s3_bucket_name/buffer-embedding-002-model/|" $file_path +rm s2e_model.tar.gz +tar czvf s2e_model.tar.gz * diff --git a/source/model/instruct/code/model.py b/source/model/instruct/code/model.py new file mode 100644 index 00000000..dfed4f07 --- /dev/null +++ b/source/model/instruct/code/model.py @@ -0,0 +1,60 @@ +from djl_python import Input, Output +import torch +import logging +import math +import os +import json +from transformers import AutoTokenizer, AutoModelForCausalLM + +def load_model(properties): + tensor_parallel = properties["tensor_parallel_degree"] + model_location = properties['model_dir'] + if "model_id" in properties: + model_location = properties['model_id'] + logging.info(f"Loading model in {model_location}") + + tokenizer = AutoTokenizer.from_pretrained(model_location, trust_remote_code=True) + model = AutoModelForCausalLM.from_pretrained(model_location, trust_remote_code=True) + model = model.eval().half().cuda() + + return model, tokenizer + + +model = None +tokenizer = None +generator = None + +def stream_items(input_sentences, history, params): + global model, tokenizer + res_generator = model.stream_chat(tokenizer, input_sentences, history=history, **params) + size = 0 + response = "" + for response in res_generator: + this_response = response[size:] + size = len(response) + stream_buffer = {"outputs":this_response, "finished": len(this_response)==0} + yield stream_buffer + +def handle(inputs: Input): + global model, tokenizer + if not model: + model, tokenizer = load_model(inputs.get_properties()) + + if inputs.is_empty(): + return None + data = inputs.get_as_json() + + input_sentences = data["inputs"] + params = data["parameters"] + history = data["history"] + stream = data.get('stream', False) + + outputs = Output() + if stream: + outputs.add_property("content-type", "application/jsonlines") + outputs.add_stream_content(stream_items(input_sentences, history, params)) + else: + response = model.chat(tokenizer, input_sentences, history=history, **params) + result = {"outputs": response} + outputs.add_as_json(result) + return outputs \ No newline at end of file diff --git a/source/model/instruct/code/requirements.txt b/source/model/instruct/code/requirements.txt new file mode 100644 index 00000000..2b02551a --- /dev/null +++ b/source/model/instruct/code/requirements.txt @@ -0,0 +1,3 @@ +transformers==4.33.1 +accelerate==0.20.3 +transformers_stream_generator \ No newline at end of file diff --git a/source/model/instruct/model/add_your_model_here.txt b/source/model/instruct/model/add_your_model_here.txt new file mode 100644 index 00000000..e69de29b diff --git a/source/model/instruct/model/model.sh b/source/model/instruct/model/model.sh new file mode 100755 index 00000000..48a6360f --- /dev/null +++ b/source/model/instruct/model/model.sh @@ -0,0 +1,57 @@ +function usage { + echo "Make sure Python installed properly. Usage: $0 -t TOKEN [-m MODEL_NAME] [-c COMMIT_HASH]" + echo " -t TOKEN Hugging Face token (required)" + echo " -m MODEL_NAME Model name (default: csdc-atl/buffer-cross-001)" + echo " -c COMMIT_HASH Commit hash (default: 46d270928463db49b317e5ea469a8ac8152f4a13)" + echo " -s S3_BUCKET_NAME S3 bucket name to upload the model (default: llm-rag)" + exit 1 +} + +# Default values +model_name="csdc-atl/buffer-instruct-InternLM-001" +commit_hash="b43935b91d90df80bccdab4c2e8a5d6315f5278b" +s3_bucket_name="llm-rag" # Default S3 bucket name + +# Parse command-line options +while getopts ":t:m:c:s:" opt; do + case $opt in + t) hf_token="$OPTARG" ;; + m) model_name="$OPTARG" ;; + c) commit_hash="$OPTARG" ;; + s) s3_bucket_name="$OPTARG" ;; + \?) echo "Invalid option: -$OPTARG" >&2; usage ;; + :) echo "Option -$OPTARG requires an argument." >&2; usage ;; + esac +done + +# Validate the hf_token and python interpreter exist +if [ -z "$hf_token" ] || ! command -v python &> /dev/null; then + usage +fi + +# Install necessary packages +pip install huggingface-hub -Uqq +pip install -U sagemaker + +# Define local model path +local_model_path="." + +# Uncomment the line below if you want to create a specific directory for the model +# mkdir -p $local_model_path + +# Download model snapshot in current folder without model prefix added +python -c "from huggingface_hub import snapshot_download; from pathlib import Path; snapshot_download(repo_id='$model_name', revision='$commit_hash', cache_dir=Path('.'), token='$hf_token')" + +# Find model snapshot path with the first search result +model_snapshot_path=$(find . -path '*/snapshots/*' -type d -print -quit) +echo "Model snapshot path: $model_snapshot_path" + +# s3:/// +aws s3 cp --recursive $model_snapshot_path s3://$s3_bucket_name/buffer-instruct-003-model + +# Modify the content of serving.properties and re-tar the model +cd ../code +file_path="serving.properties" +sed -i "s|option.s3url = s3://[^/]*/buffer-instruct-003-model/|option.s3url = s3://$s3_bucket_name/buffer-instruct-003-model/|" $file_path +rm model.tar.gz +tar czvf model.tar.gz * From 2d2861d9de89eadab4fcea4ccbf28538bfccf03c Mon Sep 17 00:00:00 2001 From: Ning Date: Mon, 13 Nov 2023 21:32:10 +0800 Subject: [PATCH 21/37] chore: restructure --- source/model/embedding/code/s2e_model.tar.gz | Bin 0 -> 781 bytes source/model/embedding/code/serving.properties | 5 +++++ source/model/instruct/code/model.tar.gz | Bin 0 -> 1044 bytes source/model/instruct/code/serving.properties | 6 ++++++ 4 files changed, 11 insertions(+) create mode 100644 source/model/embedding/code/s2e_model.tar.gz create mode 100644 source/model/embedding/code/serving.properties create mode 100644 source/model/instruct/code/model.tar.gz create mode 100644 source/model/instruct/code/serving.properties diff --git a/source/model/embedding/code/s2e_model.tar.gz b/source/model/embedding/code/s2e_model.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..f76965c3dc659587b1d4bcba248c44df50a679cb GIT binary patch literal 781 zcmV+o1M>VIiwFP*^igF11MOGcZ<{a>-!p%OMIQ=MLm=d1i$~Xm3CM=&9UlOTv|1(1g3x`5G#S1 z%(%!w$aaU*hIzK(Syl>}2F7WeS83@Kf)^kZ0qHI+q)BT?F5&_UOes(lp^eD7y~B-D zSP208Tdi6n#*`;m5q-`r%|pXXi8wUEBty1YBdLuD>j@h}{t}U-4PfIfkFbn8nf*zh zQBMs-g@3yZc4xcR#%c|{$6L;}in}FWj~-lO>bPD~rr0t3`bU5&1v4_1)vA2q3`_(r zF-s{pq{;!gG`Y|OU`6=_rj2UjDaMsq*l~@YLnLxc$*;av zbS6r0J3E`qkR??b@7CV4kGh9m-Mb%1r`y1&funtgJ z=$;9Xz9G1%TUf=>nwJ9cpSnw~{sbJL21SOYsG2|-F302d@oD(E>QO#O2}&O81@m>P zaPr;T^b@tviGYmU99#R4OrR$IB=p|}Z^sv9mVp>``XilNqq-X!zqD@$8fc(_{%86P Lb_?F202BZK_!fa1 literal 0 HcmV?d00001 diff --git a/source/model/embedding/code/serving.properties b/source/model/embedding/code/serving.properties new file mode 100644 index 00000000..4de54e40 --- /dev/null +++ b/source/model/embedding/code/serving.properties @@ -0,0 +1,5 @@ +engine=Python +option.tensor_parallel_degree=1 +# update according to your own path +# option.s3url = s3://<_S3ModelAssets>/<_AssetsStack._embeddingModelPrefix> +option.s3url = s3://llm-rag/buffer-embedding-002-model/ \ No newline at end of file diff --git a/source/model/instruct/code/model.tar.gz b/source/model/instruct/code/model.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..dfa8479cc21501e589c233d325320b59ec23bd76 GIT binary patch literal 1044 zcmV+v1nc`BiwFR?2vTJL1MOE|Z`(Ey@3TGyp*}<^97VS5WHaUh#ef0>+O0tQ78C*_ zoou!esgYD$ugG`b@sBJiOV(~{8Vv9~m?rP|JKpi`bZkTkb<*x1-6N%GdcIr|GhSTG z?U*jy+)%b!UXb~6wOB4LR$00t={#Gct4Acg&qMXpDNQ9FrT)Mf-`*{UVpa7wIL;Pw z82=|!Nzsti@k{ zf(KTGmXxbX$Ofe9l|1XTcq85eXMcd4+38CYj?absnRbfSFJ1;g2^H$1W!@N8)JoBU z)C^R-4hWJ4u7u26DrsFqO*Ui@xeRY`^AffaAbe}LTEx5-1=S2qpX@ww%#z6yC(J4m zx;0jYgz=HEJIGfeveE;32PTYHBC5g{Xd_M?gu(wj1-lMoyUiKhdks}gyqFkg=dA?v zH{(!7Q;-?n0|tC9p%EJL0-2jPvV(Z=hey155#I#ZjtP9AbrdH%T30v}T}h*nX{e!- z+=6| z;-l#!YFmpdTL1L~?sO_2YjPHOEya%NNDl#hm%hzG?~^B9vjtbU4KdSRQ!BVK3t?zi zFnfko43NoLh8Wa{o{x6-e=iOE`t3o^Ypa1ry+}MTokdq&RaqB*4uvfRDqK(SXC$l` zXKDv!xVBgmcD&i7*9Xu&gSwnTyIa~(Ue@65N3C7)`HIWHl;lD?-i;ibO;Ti3Kb($a z3pyWiGm7J&NAv<{1T?MQNAZB{L(@i=nU_>kL?1y?m79_`j^`UefbwSBW$1SKO5~#8 zz_U}(SSmf=2h2gh&pp?9)HBUFq7%1tG9%Av-QTPI@z;pLuD1K5vOZM?YrSMb%d&UX zJ_-wA4AG~0-$IDDPusR;t_GYL(`zgaDvWPM>C?nPME@)|9VDPN;|n=Nxl6?Vnt>j) ztMxSI51OuTHLSmOw>2KuF)#MepZPmU!tWi!s*NsEiTj-2^IVFG`J1OUE(5LWa*FdAI@BldD?G3#ZWu7jl$VOyZuN/<_AssetsStack._instructModelPrefix> +option.s3url = s3://llm-rag/buffer-instruct-003-model/ \ No newline at end of file From 6ab85b0508643ec4249c025cbc4a48dc3c91994a Mon Sep 17 00:00:00 2001 From: Ning Date: Mon, 13 Nov 2023 21:33:57 +0800 Subject: [PATCH 22/37] chore: restructure --- source/panel/.env_sample | 3 + source/panel/app.py | 202 ++++++++++++++++++++++++++++++++++ source/panel/requirements.txt | 6 + 3 files changed, 211 insertions(+) create mode 100644 source/panel/.env_sample create mode 100644 source/panel/app.py create mode 100644 source/panel/requirements.txt diff --git a/source/panel/.env_sample b/source/panel/.env_sample new file mode 100644 index 00000000..34c96a45 --- /dev/null +++ b/source/panel/.env_sample @@ -0,0 +1,3 @@ +PIPELINE_URL='Check from CDK output' +S3_BUCKET_NAME='Check from CDK output' +GLUE_JOB_NAME='Check from CDK output' diff --git a/source/panel/app.py b/source/panel/app.py new file mode 100644 index 00000000..31af707b --- /dev/null +++ b/source/panel/app.py @@ -0,0 +1,202 @@ +from dotenv import load_dotenv +import os +from PyPDF2 import PdfReader +import streamlit as st +from langchain.text_splitter import CharacterTextSplitter +from langchain.embeddings.openai import OpenAIEmbeddings +from langchain.vectorstores import FAISS +from langchain.chains.question_answering import load_qa_chain +from langchain.llms import OpenAI +from langchain.callbacks import get_openai_callback +import boto3 +import requests +import json +import time +# Load environment variables +load_dotenv() + +# Create an S3 client +s3 = boto3.client('s3') +glue = boto3.client('glue') +# load the job name from environment variable and convert it to string like 'PythonShellJobB6964098-YYlLj16uCsAn' +glue_job_name = str(os.getenv('GLUE_JOB_NAME')) + +def process_text(text): + # Split the text into chunks using langchain + text_splitter = CharacterTextSplitter( + separator="\n", + chunk_size=1000, + chunk_overlap=200, + length_function=len + ) + chunks = text_splitter.split_text(text) + + # Convert the chunks of text into embeddings to form a knowledge base, should aware of the Rate limit. + # Prompt like "reached for text-embedding-ada-002 in organization org-xx on tokens per min. Limit: 150000 / min. Current: 1 / min.≈ + embeddings = OpenAIEmbeddings() + knowledgeBase = FAISS.from_texts(chunks, embeddings) + + return knowledgeBase + +def pipeline_tab(): + st.title("LLM Bot ETL Pipeline") + # text box to allow user input the url address of the pipeline with default value + pipeline_url = st.text_input('Pipeline URL', value=os.getenv('PIPELINE_URL')) + + col1, col2 = st.columns(2) + with col1: + # sub panel to upload pdf and trigger the pipeline + st.subheader('Online ETL Job') + # adjust the width of the file uploader and hint text + pdf = st.file_uploader('Upload your Document', type='pdf') + + if pdf is not None: + # upload the pdf onto s3 bucket created in CDK stack with fixed prefix 'documents' , and trigger the pipeline + s3.upload_fileobj(pdf, os.getenv('S3_BUCKET'), 'documents/' + pdf.name) + + # add hint text to tell user that the online ETL job will be triggered automatically after the pdf is uploaded + st.markdown('**Note:** The online ETL job will be triggered automatically after the pdf is uploaded.') + + with col2: + # sub panel to operate and monitor the offline ETL job running on AWS Glue + # input box to allow user input request body and specify endpoint url and button to trigger the request sending to the endpoint + st.subheader('Offline ETL Job') + + # dropdown to list all available s3 bucket and allow user to select one for further operation + s3_buckets = s3.list_buckets() + s3_bucket_names = [bucket['Name'] for bucket in s3_buckets['Buckets']] + s3_bucket_name = st.selectbox('Select S3 Bucket', s3_bucket_names) + + # dropdown to list all subfolders under the selected s3 bucket and allow user to select one for further operation + s3_objects = s3.list_objects(Bucket=s3_bucket_name) + s3_object_names = [obj['Key'] for obj in s3_objects['Contents']] + s3_object_name = st.selectbox('Select S3 Object', s3_object_names) + + # simple checkboxed to allow user select options to trigger the pipeline + col3, col4 = st.columns(2) + with col3: + documentEnhance = st.checkbox('Doc Enhance') + qaPairEnhance = st.checkbox('QA Pair Enhance') + with col4: + keyWordExtract = st.checkbox('Key Word Extract') + textSummarize = st.checkbox('Text Summarize') + + # request body to be sent to the endpoint + request_body = { + 's3Bucket': s3_bucket_name, + 's3Prefix': s3_object_name, + 'documentEnhance': documentEnhance, + 'qaEnhance': str(qaPairEnhance), + 'keyWordExtract': keyWordExtract, + 'textSummarize': textSummarize, + 'offline': True + } + # send button to trigger the request sending to the endpoint with s3_bucket_name and s3_object_name as request body, in conform with + send_button = st.button('Start Offline Job') + if send_button: + response = requests.post(pipeline_url + '/etl', json=request_body, headers={'Content-Type': 'application/json'}) + st.text_area('Response:', value=response.text, height=200, max_chars=None) + + # progress bar to show the offline ETL job running status + st.subheader('Online & Offline ETL Job Status') + refresh_button = st.button('Refresh') + if refresh_button: + # list all job running with a specific job name + job_runs = glue.get_job_runs(JobName=glue_job_name, MaxResults=1) + # get the latest job run id + job_run_id = job_runs['JobRuns'][0]['Id'] + # get the latest job run status + job_status = glue.get_job_run(JobName=glue_job_name, RunId=job_run_id)['JobRun']['JobRunState'] + # output the job status details with slim height + st.text_area('Job Status:', value=json.dumps(job_status, indent=4), height=100, max_chars=None) + + # sub pannel to query and search the embedding in AOS + st.subheader('Query and Search AOS') + query = st.text_input('Input your query body here', value='{"aos_index": "chatbot-index", "query": {"operation": "match_all", "match_all": {}}}') + # send button to trigger the request sending to the endpoint with query as request body + + request_body = { + 'aos_index': 'chatbot-index', + 'operation': 'match_all', + 'body': '' + } + send_button = st.button('Send') + if send_button: + response = requests.get(pipeline_url + '/embedding', json=request_body, headers={'Content-Type': 'application/json'}) + st.text_area('Response:', value=response.text, height=200, max_chars=None) + +def llm_bot_tab(): + # user input box to allow user input question + st.title("LLM Bot") + query = st.text_input('Ask a question to the PDF') + # cancel button to allow user to cancel the question + cancel_button = st.button('Cancel') + if cancel_button: + st.stop() + # send button to trigger the request sending to the endpoint with query as request body + send_button = st.button('Send') + if send_button: + # request body to be sent to the endpoint + request_body = { + "model": "knowledge_qa", + "messages": [ + { + "role": "user", + "content": query + } + ], + "temperature": 0.7 + } + response = requests.post(os.getenv('PIPELINE_URL') + '/llm', json=request_body, headers={'Content-Type': 'application/json'}) + try: + data_dict = json.loads(response.text) + content = data_dict["choices"][0]["message"]["content"] + st.text_area('Response:', value=content.encode('utf-8').decode('utf-8'), height=200, max_chars=None) + except json.JSONDecodeError as e: + st.error(f"Failed to parse response as JSON: {e}") + st.text(response.text) + # data_dict = response.text.json() + # content = data_dict["choices"][0]["message"]["content"] + # st.text_area('Response:', value=content.encode('utf-8').decode('unicode_escape'), height=200, max_chars=None) + +def main(): + # Create a tab bar + st.sidebar.title("LLM Bot") + tabs = ["ETL Pipeline", "LLM Bot"] + page = st.sidebar.radio("Select a tab", tabs) + if page == "ETL Pipeline": + pipeline_tab() + elif page == "LLM Bot": + llm_bot_tab() + + # using libary and OpenAI for local testing, comment for now + + # if pdf is not None: + # pdf_reader = PdfReader(pdf) + # # Text variable will store the pdf text + # text = "" + # for page in pdf_reader.pages: + # text += page.extract_text() + + # # Create the knowledge base object + # knowledgeBase = process_text(text) + + # query = st.text_input('Ask a question to the PDF') + # cancel_button = st.button('Cancel') + + # if cancel_button: + # st.stop() + + # if query: + # docs = knowledgeBase.similarity_search(query) + # llm = OpenAI() + # chain = load_qa_chain(llm, chain_type='stuff') + + # with get_openai_callback() as cost: + # response = chain.run(input_documents=docs, question=query) + # print(cost) + + # st.write(response) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/source/panel/requirements.txt b/source/panel/requirements.txt new file mode 100644 index 00000000..04af6ac9 --- /dev/null +++ b/source/panel/requirements.txt @@ -0,0 +1,6 @@ +python-dotenv +PyPDF2 +streamlit +langchain +openai +tiktoken \ No newline at end of file From 0c54b3ec94838dea2b74b0ebd1c5ec211a3b81bb Mon Sep 17 00:00:00 2001 From: Ning Date: Mon, 13 Nov 2023 21:36:24 +0800 Subject: [PATCH 23/37] chore: clean asset --- src/api-stack.ts | 207 ------------- src/assets-stack.ts | 114 ------- src/ddb-stack.ts | 84 ------ src/ec2-stack.ts | 73 ----- src/etl-stack.ts | 268 ----------------- src/lambda/custom/index.js | 31 -- src/lambda/ddb/rating.py | 63 ---- src/lambda/embedding/Dockerfile | 9 - src/lambda/embedding/main.py | 228 -------------- src/lambda/embedding/requirements.txt | 9 - src/lambda/embedding/utils/aos_utils.py | 146 --------- src/lambda/embedding/utils/sm_utils.py | 73 ----- src/lambda/etl/Dockerfile | 8 - src/lambda/etl/main.py | 43 --- src/lambda/etl/requirements.txt | 9 - src/lambda/executor/Dockerfile | 9 - src/lambda/executor/main.py | 194 ------------ src/lambda/executor/requirements.txt | 7 - src/lambda/executor/utils/aos_utils.py | 158 ---------- src/lambda/executor/utils/ddb_utils.py | 124 -------- src/lambda/executor/utils/llmbot_utils.py | 132 --------- src/lambda/executor/utils/sm_utils.py | 202 ------------- src/llm-stack.ts | 172 ----------- src/main.ts | 110 ------- src/models/cross/code/cross_model.tar.gz | Bin 1146 -> 0 bytes src/models/cross/code/model.py | 68 ----- src/models/cross/code/serving.properties | 5 - .../cross/model/add_your_model_here.txt | 0 src/models/cross/model/model.sh | 57 ---- src/models/embedding/code/model.py | 42 --- src/models/embedding/code/requirements.txt | 3 - src/models/embedding/code/s2e_model.tar.gz | Bin 1264 -> 0 bytes src/models/embedding/code/serving.properties | 5 - .../embedding/model/add_your_model_here.txt | 0 src/models/embedding/model/model.sh | 57 ---- src/models/instruct/code/model.py | 60 ---- src/models/instruct/code/model.tar.gz | Bin 1254 -> 0 bytes src/models/instruct/code/requirements.txt | 3 - src/models/instruct/code/serving.properties | 6 - .../instruct/model/add_your_model_here.txt | 0 src/models/instruct/model/model.sh | 57 ---- src/os-stack.ts | 47 --- src/panel/.env_sample | 3 - src/panel/app.py | 202 ------------- src/panel/requirements.txt | 6 - src/sample/dth.txt | 18 -- src/sample/embedding_wrapper.py | 277 ------------------ src/sample/langchain_sample/csdc_llm.py | 239 --------------- src/sample/langchain_sample/llm_wrapper.py | 262 ----------------- src/sample/requirements.txt | 7 - src/sample/sagemaker_utils.py | 159 ---------- src/sample/sample_llm.py | 19 -- src/sample/sample_sm.py | 136 --------- src/scripts/dep/README.md | 19 -- .../dist/llm_bot_dep-0.1.0-py3-none-any.whl | Bin 24501 -> 0 bytes .../dist/nougat_ocr-0.1.17-py3-none-any.whl | Bin 82497 -> 0 bytes src/scripts/dep/llm_bot_dep/__init__.py | 0 src/scripts/dep/llm_bot_dep/aos_utils.py | 146 --------- src/scripts/dep/llm_bot_dep/enhance_utils.py | 243 --------------- .../dep/llm_bot_dep/loaders/__init__.py | 0 src/scripts/dep/llm_bot_dep/loaders/auto.py | 29 -- src/scripts/dep/llm_bot_dep/loaders/csv.py | 172 ----------- src/scripts/dep/llm_bot_dep/loaders/docx.py | 70 ----- src/scripts/dep/llm_bot_dep/loaders/html.py | 71 ----- src/scripts/dep/llm_bot_dep/loaders/image.py | 5 - .../dep/llm_bot_dep/loaders/markdown.py | 49 ---- src/scripts/dep/llm_bot_dep/loaders/pdf.py | 171 ----------- src/scripts/dep/llm_bot_dep/loaders/text.py | 54 ---- src/scripts/dep/llm_bot_dep/sm_utils.py | 71 ----- src/scripts/dep/llm_bot_dep/splitter_utils.py | 187 ------------ src/scripts/dep/setup.py | 20 -- src/scripts/glue-job-script.py | 270 ----------------- src/vpc-stack.ts | 43 --- test/main.test.ts | 11 - 74 files changed, 5842 deletions(-) delete mode 100644 src/api-stack.ts delete mode 100644 src/assets-stack.ts delete mode 100644 src/ddb-stack.ts delete mode 100644 src/ec2-stack.ts delete mode 100644 src/etl-stack.ts delete mode 100644 src/lambda/custom/index.js delete mode 100644 src/lambda/ddb/rating.py delete mode 100644 src/lambda/embedding/Dockerfile delete mode 100644 src/lambda/embedding/main.py delete mode 100644 src/lambda/embedding/requirements.txt delete mode 100644 src/lambda/embedding/utils/aos_utils.py delete mode 100644 src/lambda/embedding/utils/sm_utils.py delete mode 100644 src/lambda/etl/Dockerfile delete mode 100644 src/lambda/etl/main.py delete mode 100644 src/lambda/etl/requirements.txt delete mode 100644 src/lambda/executor/Dockerfile delete mode 100644 src/lambda/executor/main.py delete mode 100644 src/lambda/executor/requirements.txt delete mode 100644 src/lambda/executor/utils/aos_utils.py delete mode 100644 src/lambda/executor/utils/ddb_utils.py delete mode 100644 src/lambda/executor/utils/llmbot_utils.py delete mode 100644 src/lambda/executor/utils/sm_utils.py delete mode 100644 src/llm-stack.ts delete mode 100644 src/main.ts delete mode 100644 src/models/cross/code/cross_model.tar.gz delete mode 100644 src/models/cross/code/model.py delete mode 100644 src/models/cross/code/serving.properties delete mode 100644 src/models/cross/model/add_your_model_here.txt delete mode 100755 src/models/cross/model/model.sh delete mode 100644 src/models/embedding/code/model.py delete mode 100644 src/models/embedding/code/requirements.txt delete mode 100644 src/models/embedding/code/s2e_model.tar.gz delete mode 100644 src/models/embedding/code/serving.properties delete mode 100644 src/models/embedding/model/add_your_model_here.txt delete mode 100755 src/models/embedding/model/model.sh delete mode 100644 src/models/instruct/code/model.py delete mode 100644 src/models/instruct/code/model.tar.gz delete mode 100644 src/models/instruct/code/requirements.txt delete mode 100644 src/models/instruct/code/serving.properties delete mode 100644 src/models/instruct/model/add_your_model_here.txt delete mode 100755 src/models/instruct/model/model.sh delete mode 100644 src/os-stack.ts delete mode 100644 src/panel/.env_sample delete mode 100644 src/panel/app.py delete mode 100644 src/panel/requirements.txt delete mode 100644 src/sample/dth.txt delete mode 100644 src/sample/embedding_wrapper.py delete mode 100644 src/sample/langchain_sample/csdc_llm.py delete mode 100644 src/sample/langchain_sample/llm_wrapper.py delete mode 100644 src/sample/requirements.txt delete mode 100644 src/sample/sagemaker_utils.py delete mode 100644 src/sample/sample_llm.py delete mode 100644 src/sample/sample_sm.py delete mode 100644 src/scripts/dep/README.md delete mode 100644 src/scripts/dep/dist/llm_bot_dep-0.1.0-py3-none-any.whl delete mode 100644 src/scripts/dep/dist/nougat_ocr-0.1.17-py3-none-any.whl delete mode 100644 src/scripts/dep/llm_bot_dep/__init__.py delete mode 100644 src/scripts/dep/llm_bot_dep/aos_utils.py delete mode 100644 src/scripts/dep/llm_bot_dep/enhance_utils.py delete mode 100644 src/scripts/dep/llm_bot_dep/loaders/__init__.py delete mode 100644 src/scripts/dep/llm_bot_dep/loaders/auto.py delete mode 100644 src/scripts/dep/llm_bot_dep/loaders/csv.py delete mode 100644 src/scripts/dep/llm_bot_dep/loaders/docx.py delete mode 100644 src/scripts/dep/llm_bot_dep/loaders/html.py delete mode 100644 src/scripts/dep/llm_bot_dep/loaders/image.py delete mode 100644 src/scripts/dep/llm_bot_dep/loaders/markdown.py delete mode 100644 src/scripts/dep/llm_bot_dep/loaders/pdf.py delete mode 100644 src/scripts/dep/llm_bot_dep/loaders/text.py delete mode 100644 src/scripts/dep/llm_bot_dep/sm_utils.py delete mode 100644 src/scripts/dep/llm_bot_dep/splitter_utils.py delete mode 100644 src/scripts/dep/setup.py delete mode 100644 src/scripts/glue-job-script.py delete mode 100644 src/vpc-stack.ts delete mode 100644 test/main.test.ts diff --git a/src/api-stack.ts b/src/api-stack.ts deleted file mode 100644 index e4dc7867..00000000 --- a/src/api-stack.ts +++ /dev/null @@ -1,207 +0,0 @@ -import { NestedStack, StackProps, Duration, Aws } from 'aws-cdk-lib'; -import { DockerImageFunction, Handler } from 'aws-cdk-lib/aws-lambda'; -import { DockerImageCode, Architecture } from 'aws-cdk-lib/aws-lambda'; -import * as iam from "aws-cdk-lib/aws-iam"; -import * as ec2 from 'aws-cdk-lib/aws-ec2'; -import * as apigw from 'aws-cdk-lib/aws-apigateway'; -import * as s3 from 'aws-cdk-lib/aws-s3'; -import * as s3n from 'aws-cdk-lib/aws-s3-notifications'; -import * as sfn from 'aws-cdk-lib/aws-stepfunctions'; -import * as lambda from 'aws-cdk-lib/aws-lambda'; -import { Construct } from 'constructs'; -import { join } from "path"; - -interface apiStackProps extends StackProps { - _vpc: ec2.Vpc; - _securityGroup: ec2.SecurityGroup; - _domainEndpoint: string; - _crossEndPoint: string; - _embeddingEndPoint: string; - _instructEndPoint: string; - _chatSessionTable: string; - // type of StepFunctions - _sfnOutput: sfn.StateMachine; -} - -export class LLMApiStack extends NestedStack { - - _apiEndpoint; - _documentBucket; - constructor(scope: Construct, id: string, props: apiStackProps) { - super(scope, id, props); - - const _vpc = props._vpc - const _securityGroup = props._securityGroup - const _domainEndpoint = props._domainEndpoint - const _aosIndex = "chatbot-index" - const _chatSessionTable = props._chatSessionTable - - // s3 bucket for storing documents - const _S3Bucket = new s3.Bucket(this, 'llm-bot-documents', { - bucketName: `llm-bot-documents-${Aws.ACCOUNT_ID}-${Aws.REGION}`, - blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL, - }); - - const lambdaExecutor = new DockerImageFunction(this, - "lambdaExecutor", { - code: DockerImageCode.fromImageAsset(join(__dirname, "../src/lambda/executor")), - timeout: Duration.minutes(15), - memorySize: 1024, - vpc: _vpc, - vpcSubnets: { - subnets: _vpc.privateSubnets, - }, - securityGroups: [_securityGroup], - architecture: Architecture.X86_64, - environment: { - aos_endpoint: _domainEndpoint, - llm_endpoint: props._instructEndPoint, - embedding_endpoint: props._embeddingEndPoint, - cross_endpoint: props._crossEndPoint, - aos_index: _aosIndex, - chat_session_table: _chatSessionTable, - }, - }); - - lambdaExecutor.addToRolePolicy(new iam.PolicyStatement({ - // principals: [new iam.AnyPrincipal()], - actions: [ - "sagemaker:InvokeEndpointAsync", - "sagemaker:InvokeEndpoint", - "s3:List*", - "s3:Put*", - "s3:Get*", - "es:*", - "dynamodb:*", - "secretsmanager:GetSecretValue", - ], - effect: iam.Effect.ALLOW, - resources: ['*'], - } - )) - - const lambdaEmbedding = new DockerImageFunction(this, - "lambdaEmbedding", { - code: DockerImageCode.fromImageAsset(join(__dirname, "../src/lambda/embedding")), - timeout: Duration.minutes(15), - memorySize: 4096, - vpc: _vpc, - vpcSubnets: { - subnets: _vpc.privateSubnets, - }, - securityGroups: [_securityGroup], - architecture: Architecture.X86_64, - environment: { - document_bucket: _S3Bucket.bucketName, - opensearch_cluster_domain: _domainEndpoint, - llm_endpoint: props._instructEndPoint, - embedding_endpoint: props._embeddingEndPoint, - cross_endpoint: props._crossEndPoint, - }, - }); - - lambdaEmbedding.addToRolePolicy(new iam.PolicyStatement({ - actions: [ - "sagemaker:InvokeEndpointAsync", - "sagemaker:InvokeEndpoint", - "s3:List*", - "s3:Put*", - "s3:Get*", - "es:*", - ], - effect: iam.Effect.ALLOW, - resources: ['*'], - } - )) - // Define the API Gateway - const api = new apigw.RestApi(this, 'llmApi', { - restApiName: 'llmApi', - description: 'This service serves the LLM API.', - endpointConfiguration: { - types: [apigw.EndpointType.REGIONAL] - }, - deployOptions: { - stageName: 'v1', - metricsEnabled: true, - loggingLevel: apigw.MethodLoggingLevel.INFO, - dataTraceEnabled: true, - tracingEnabled: true, - }, - }); - - // Define the API Gateway Lambda Integration with proxy and no integration responses - const lambdaExecutorIntegration = new apigw.LambdaIntegration(lambdaExecutor, { proxy: true, }); - - // Define the API Gateway Method - const apiResourceLLM = api.root.addResource('llm'); - apiResourceLLM.addMethod('POST', lambdaExecutorIntegration); - - // Define the API Gateway Lambda Integration with proxy and no integration responses - const lambdaEmbeddingIntegration = new apigw.LambdaIntegration(lambdaEmbedding, { proxy: true, }); - - // Define the API Gateway Method - const apiResourceEmbedding = api.root.addResource('embedding'); - apiResourceEmbedding.addMethod('POST', lambdaEmbeddingIntegration); - - // Add Get method to query & search index in OpenSearch, the POST method above should be deprecated in the future and replaced by AWS Glue - apiResourceEmbedding.addMethod('GET', lambdaEmbeddingIntegration); - - // Integration with Step Function to trigger ETL process - // Lambda function to trigger Step Function - const lambdaStepFunction = new lambda.Function(this, 'lambdaStepFunction', { - // format to avoid indent error, using inline for simplicity no more container pack time needed - code: lambda.Code.fromInline - (` -import json -import boto3 -import os -client = boto3.client('stepfunctions') -def handler(event, context): - # First check the event for possible S3 created event - inputPayload = {} - if 'Records' in event: - print('S3 created event detected') - # TODO, Aggregate the bucket and key from the event object for S3 created event - bucket = event['Records'][0]['s3']['bucket']['name'] - key = event['Records'][0]['s3']['object']['key'] - # Pass the bucket and key to the Step Function, align with the input schema in etl-stack.ts - inputPayload=json.dumps({'s3Bucket': bucket, 's3Prefix': key, 'offline': 'false'}) - else: - print('API Gateway event detected') - # Parse the body from the event object - body = json.loads(event['body']) - # Pass the parsed body to the Step Function - inputPayload=json.dumps(body) - - response = client.start_execution( - stateMachineArn=os.environ['sfn_arn'], - input=inputPayload - ) - return { - 'statusCode': 200, - 'body': json.dumps('Step Function triggered, Step Function ARN: ' + response['executionArn'] + ' Input Payload: ' + inputPayload) - } - `), - handler: 'index.handler', - runtime: lambda.Runtime.PYTHON_3_9, - timeout: Duration.seconds(30), - environment: { - sfn_arn: props._sfnOutput.stateMachineArn, - }, - memorySize: 256, - }); - - // grant lambda function to invoke step function - props._sfnOutput.grantStartExecution(lambdaStepFunction); - - const apiResourceStepFunction = api.root.addResource('etl'); - apiResourceStepFunction.addMethod('POST', new apigw.LambdaIntegration(lambdaStepFunction)); - - // add s3 event notification when file uploaded to the bucket - _S3Bucket.addEventNotification(s3.EventType.OBJECT_CREATED, new s3n.LambdaDestination(lambdaStepFunction), { prefix: 'documents/' }); - _S3Bucket.grantReadWrite(lambdaStepFunction); - - this._apiEndpoint = api.url - this._documentBucket = _S3Bucket.bucketName - } -} \ No newline at end of file diff --git a/src/assets-stack.ts b/src/assets-stack.ts deleted file mode 100644 index 9f6cebda..00000000 --- a/src/assets-stack.ts +++ /dev/null @@ -1,114 +0,0 @@ -import { NestedStack, StackProps } from 'aws-cdk-lib'; -import { Construct } from 'constructs'; - -import * as s3 from 'aws-cdk-lib/aws-s3'; -import * as s3assets from 'aws-cdk-lib/aws-s3-assets'; -import * as s3deploy from 'aws-cdk-lib/aws-s3-deployment'; - -import * as dotenv from "dotenv"; -dotenv.config(); - -interface assetsStackProps extends StackProps { - _s3ModelAssets: string; -} - -export class AssetsStack extends NestedStack { - _crossCodePrefix; - _embeddingCodePrefix; - _instructCodePrefix; - - constructor(scope: Construct, id: string, props: assetsStackProps) { - super(scope, id, props); - - // Prepare model asset to download from Hugging Face follow script - - // Check if _s3ModelAssets is provided, create a new s3 bucket if not - const _S3Bucket = props._s3ModelAssets ? s3.Bucket.fromBucketName(this, 'llm-rag', props._s3ModelAssets) : new s3.Bucket(this, 'llm-rag', { - // Fixed name for serving.properties for now, default is llm-rag inherit from main stack - bucketName: props._s3ModelAssets, - blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL, - }); - - // const crossModelPrefix = props._s3BucketPrefix - const crossModelPrefix = 'buffer-cross-001-model' - const crossCodePrefix = 'buffer_cross_001_deploy_code' - const embeddingModelPrefix = 'buffer-embedding-002-model' - const embeddingCodePrefix = 'buffer_embedding_002_deploy_code' - const instructModelPrefix = 'buffer-instruct-003-model' - const instructCodePrefix = 'buffer_instruct_003_deploy_code' - - // CROSS MODEL - // Define a local asset for code - const crossCodeAsset = new s3assets.Asset(this, 'crossCodeAsset', { - path: 'src/models/cross/code', - }); - - const crossCodeAssetDeployment = new s3deploy.BucketDeployment(this, 'crossCodeAssetDeployment', { - sources: [s3deploy.Source.asset('src/models/cross/code')], - destinationBucket: _S3Bucket, - destinationKeyPrefix: crossCodePrefix, - }); - this._crossCodePrefix = crossCodePrefix - - // EMBEDDING MODEL - // Define a local asset for code - const embeddingCodeAsset = new s3assets.Asset(this, 'embeddingCodeAsset', { - path: 'src/models/embedding/code', - }); - - const embeddingCodeAssetDeployment = new s3deploy.BucketDeployment(this, 'embeddingCodeAssetDeployment', { - sources: [s3deploy.Source.asset('src/models/embedding/code')], - destinationBucket: _S3Bucket, - destinationKeyPrefix: embeddingCodePrefix, - }); - this._embeddingCodePrefix = embeddingCodePrefix - - // INSTRUCT MODEL - // Define a local asset for code - const instructCodeAsset = new s3assets.Asset(this, 'instructCodeAsset', { - path: 'src/models/instruct/code', - }); - - const instructCodeAssetDeployment = new s3deploy.BucketDeployment(this, 'instructCodeAssetDeployment', { - sources: [s3deploy.Source.asset('src/models/instruct/code')], - destinationBucket: _S3Bucket, - destinationKeyPrefix: instructCodePrefix, - }); - this._instructCodePrefix = instructCodePrefix - - // Skip the deployment if _s3ModelAssets is provided - if (!props._s3ModelAssets) { - // Define a local asset for model - const crossModelAsset = new s3assets.Asset(this, 'ModelAsset', { - path: 'src/models/cross/model', - }); - const crossModelAssetDeployment = new s3deploy.BucketDeployment(this, 'crossModelAssetDeployment', { - sources: [s3deploy.Source.asset('src/models/cross/model')], - destinationBucket: _S3Bucket, - destinationKeyPrefix: crossModelPrefix, - // memoryLimit: 4096, - }); - - // Define a local asset for model - const embeddingModelAsset = new s3assets.Asset(this, 'embeddingModelAsset', { - path: 'src/models/embedding/model', - }); - const embeddingModelAssetDeployment = new s3deploy.BucketDeployment(this, 'embeddingModelAssetDeployment', { - sources: [s3deploy.Source.asset('src/models/embedding/model')], - destinationBucket: _S3Bucket, - destinationKeyPrefix: embeddingModelPrefix, - }); - - // Define a local asset for model - const instructModelAsset = new s3assets.Asset(this, 'instructModelAsset', { - path: 'src/models/instruct/model', - }); - - const instructModelAssetDeployment = new s3deploy.BucketDeployment(this, 'instructModelAssetDeployment', { - sources: [s3deploy.Source.asset('src/models/instruct/model')], - destinationBucket: _S3Bucket, - destinationKeyPrefix: instructModelPrefix, - }); - } - } -} \ No newline at end of file diff --git a/src/ddb-stack.ts b/src/ddb-stack.ts deleted file mode 100644 index f166f8ef..00000000 --- a/src/ddb-stack.ts +++ /dev/null @@ -1,84 +0,0 @@ -import { NestedStack, StackProps, Duration, CfnOutput,NestedStackProps, RemovalPolicy } from "aws-cdk-lib"; -import { Construct } from "constructs"; -import { Table, AttributeType } from "aws-cdk-lib/aws-dynamodb"; -import { Function, Runtime, Code } from "aws-cdk-lib/aws-lambda"; -import { LambdaIntegration, RestApi } from "aws-cdk-lib/aws-apigateway"; -import * as lambda from "aws-cdk-lib/aws-lambda"; -import * as iam from "aws-cdk-lib/aws-iam"; -import * as ec2 from 'aws-cdk-lib/aws-ec2'; -import * as apigw from 'aws-cdk-lib/aws-apigateway'; -import { join } from "path"; - -interface ddbStackProps extends StackProps { - _vpc: ec2.Vpc; - _securityGroup: ec2.SecurityGroup; - _domainEndpoint: string; -} - -export class DynamoDBStack extends NestedStack { - - _chatSessionTable; - constructor(scope: Construct, id: string, props: ddbStackProps) { - super(scope, id, props); - const _vpc = props._vpc; - - // Create the DynamoDB table - const table = new Table(this, "modelRatingTable", { - tableName: "modelRatingInfo", - partitionKey: { - name: "session_id", - type: AttributeType.STRING, - }, - // removalPolicy: RemovalPolicy.DESTROY, - }); - - // Create the Lambda functions - const postFn = new lambda.Function(this, "PostRatingFunction", { - runtime:lambda.Runtime.PYTHON_3_7, - handler: "rating.lambda_handler", - code: Code.fromAsset(join(__dirname, "../src/lambda/ddb")), - environment: { - TABLE_NAME: table.tableName, - }, - vpc: _vpc, - vpcSubnets: { - subnets: _vpc.privateSubnets, - }, - securityGroups: [props._securityGroup] - }); - - postFn.addToRolePolicy(new iam.PolicyStatement({ - actions: [ - "dynamodb:*" - ], - effect: iam.Effect.ALLOW, - resources: ['*'], - } - )) - - - // Grant permissions to the Lambda functions to access the DynamoDB table - table.grantReadWriteData(postFn); - - - const api = new apigw.RestApi(this, 'ddbApi', { - restApiName: 'ddbApi', - description: 'This service serves the dynamodb which stores the data of model rating.', - endpointConfiguration: { - types: [apigw.EndpointType.REGIONAL] - }, - deployOptions: { - stageName: 'v1', - metricsEnabled: true, - loggingLevel: apigw.MethodLoggingLevel.INFO, - dataTraceEnabled: true, - tracingEnabled: true, - }, - }); - // Define the API resources and methods - const session = api.root.addResource('rating'); - session.addMethod("POST", new LambdaIntegration(postFn)); - - this._chatSessionTable = table.tableName; - } -} diff --git a/src/ec2-stack.ts b/src/ec2-stack.ts deleted file mode 100644 index 5f20585c..00000000 --- a/src/ec2-stack.ts +++ /dev/null @@ -1,73 +0,0 @@ - -import { NestedStack, StackProps } from 'aws-cdk-lib'; -import { Construct } from 'constructs'; - -import * as ec2 from "aws-cdk-lib/aws-ec2"; -import * as iam from "aws-cdk-lib/aws-iam"; -import { Asset } from 'aws-cdk-lib/aws-s3-assets'; - -import path from "path"; - -interface Ec2StackProps extends StackProps { - _vpc: ec2.Vpc; - _securityGroup: ec2.SecurityGroup; - _domainEndpoint: string; -} - -export class Ec2Stack extends NestedStack { - _instanceId; - _dnsName; - _publicIP; - - constructor(scope: Construct, id: string, props: Ec2StackProps) { - super(scope, id, props); - const _vpc = props._vpc; - const _securityGroup = props._securityGroup; - const _domainEndpoint = props._domainEndpoint; - - _securityGroup.addIngressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(22), 'Allow SSH Access') - _securityGroup.addIngressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(443), 'Allow HTTPS Access') - _securityGroup.addIngressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(8081), 'Allow HTTP 8081 port Access') - _securityGroup.addIngressRule(ec2.Peer.anyIpv4(), ec2.Port.tcp(80), 'Allow HTTP Access') - _securityGroup.addIngressRule(_securityGroup, ec2.Port.allTraffic(), 'Allow Self Access') - - const role = new iam.Role(this, 'ec2Role', { - assumedBy: new iam.ServicePrincipal('ec2.amazonaws.com') - }) - - role.addManagedPolicy(iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonSSMManagedInstanceCore')) - - const ami = new ec2.AmazonLinuxImage({ - generation: ec2.AmazonLinuxGeneration.AMAZON_LINUX_2, - cpuType: ec2.AmazonLinuxCpuType.X86_64 - }); - - // Create the instance using the Security Group, AMI, and KeyPair defined in the VPC created - const ec2Instance = new ec2.Instance(this, 'ProxyInstance', { - vpc: _vpc, - instanceType: ec2.InstanceType.of(ec2.InstanceClass.T3, ec2.InstanceSize.MICRO), - machineImage: ami, - securityGroup: _securityGroup, - vpcSubnets: {subnetType: ec2.SubnetType.PUBLIC,}, - // specify the key name for the instance for debugging purposes - // keyName: 'us-east-1', - }); - - const asset = new Asset(this, 'UserdataAsset', { path: path.join(__dirname, '../script/ec2config.sh') }); - const localPath = ec2Instance.userData.addS3DownloadCommand({ - bucket: asset.bucket, - bucketKey: asset.s3ObjectKey, - }); - - ec2Instance.userData.addExecuteFileCommand({ - filePath: localPath, - // pass _domainEndpoint as an argument to the script - arguments: _domainEndpoint, - }); - asset.grantRead(ec2Instance.role); - - this._instanceId = ec2Instance.instanceId; - this._dnsName = ec2Instance.instancePublicDnsName; - this._publicIP = ec2Instance.instancePublicIp; - } - } \ No newline at end of file diff --git a/src/etl-stack.ts b/src/etl-stack.ts deleted file mode 100644 index 3f328ef6..00000000 --- a/src/etl-stack.ts +++ /dev/null @@ -1,268 +0,0 @@ -import { NestedStack, StackProps, RemovalPolicy, Duration, Aws } from 'aws-cdk-lib'; -import { Construct } from 'constructs'; - -import * as iam from 'aws-cdk-lib/aws-iam'; -import * as api from 'aws-cdk-lib/aws-apigateway'; -import * as glue from '@aws-cdk/aws-glue-alpha'; -import * as sfn from 'aws-cdk-lib/aws-stepfunctions'; -import * as tasks from 'aws-cdk-lib/aws-stepfunctions-tasks'; -import * as ec2 from 'aws-cdk-lib/aws-ec2'; -import * as sns from 'aws-cdk-lib/aws-sns'; -import * as subscriptions from 'aws-cdk-lib/aws-sns-subscriptions'; -import * as s3 from 'aws-cdk-lib/aws-s3'; -import * as s3assets from 'aws-cdk-lib/aws-s3-assets'; -import * as s3deploy from 'aws-cdk-lib/aws-s3-deployment'; -import * as dynamodb from 'aws-cdk-lib/aws-dynamodb'; -import path from "path"; -import * as lambda from 'aws-cdk-lib/aws-lambda'; -import { DockerImageCode, Architecture, DockerImageFunction} from 'aws-cdk-lib/aws-lambda'; -import { join } from "path"; -import { off } from 'process'; - -interface etlStackProps extends StackProps { - _vpc: ec2.Vpc; - _subnets: ec2.ISubnet[]; - _securityGroups: ec2.SecurityGroup; - _domainEndpoint: string; - _embeddingEndpoint: string; - _region: string; - _subEmail: string; -} - -export class EtlStack extends NestedStack { - _sfnOutput; - _jobName; - _jobArn; - _processedObjectsTable; - - constructor(scope: Construct, id: string, props: etlStackProps) { - super(scope, id, props); - - const connection = new glue.Connection(this, 'GlueJobConnection', { - type: glue.ConnectionType.NETWORK, - subnet: props._subnets[0], - securityGroups: [props._securityGroups], - }); - - const table = new dynamodb.Table(this, 'ProcessedObjects', { - partitionKey: { name: 'ObjectKey', type: dynamodb.AttributeType.STRING }, - billingMode: dynamodb.BillingMode.PAY_PER_REQUEST, - }); - - table.addGlobalSecondaryIndex({ - indexName: 'BucketAndPrefixIndex', - partitionKey: { name: 'Bucket', type: dynamodb.AttributeType.STRING }, - sortKey: { name: 'Prefix', type: dynamodb.AttributeType.STRING }, - }); - - // Add ExpiryTimestamp as an attribute but not as a sort key in the base table - table.addGlobalSecondaryIndex({ - indexName: 'ExpiryTimestampIndex', - partitionKey: { name: 'ExpiryTimestamp', type: dynamodb.AttributeType.NUMBER }, - // No sort key for this index - }); - - const _S3Bucket = new s3.Bucket(this, 'llm-bot-glue-lib', { - bucketName: `llm-bot-glue-lib-${Aws.ACCOUNT_ID}-${Aws.REGION}`, - blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL, - }); - - const extraPythonFiles = new s3deploy.BucketDeployment(this, 'extraPythonFiles', { - sources: [s3deploy.Source.asset('src/scripts/dep/dist')], - destinationBucket: _S3Bucket, - // destinationKeyPrefix: 'llm_bot_dep-0.1.0-py3-none-any.whl', - }); - - // Assemble the extra python files list using _S3Bucket.s3UrlForObject('llm_bot_dep-0.1.0-py3-none-any.whl') and _S3Bucket.s3UrlForObject('nougat_ocr-0.1.17-py3-none-any.whl') and convert to string - const extraPythonFilesList = [_S3Bucket.s3UrlForObject('llm_bot_dep-0.1.0-py3-none-any.whl')].join(','); - - const glueRole = new iam.Role(this, 'ETLGlueJobRole', { - assumedBy: new iam.ServicePrincipal('glue.amazonaws.com'), - // the role is used by the glue job to access AOS and by default it has 1 hour session duration which is not enough for the glue job to finish the embedding injection - maxSessionDuration: Duration.hours(12), - }); - // TODO, narrow down the policy to specific resources and actions - glueRole.addToPrincipalPolicy( - new iam.PolicyStatement({ - actions: [ - "sagemaker:InvokeEndpointAsync", - "sagemaker:InvokeEndpoint", - "s3:*", - "es:*", - "glue:*", - "ec2:*", - "dynamodb:*", - "bedrock:*", - // cloudwatch logs - "logs:*", - ], - effect: iam.Effect.ALLOW, - resources: ['*'], - }) - ) - - // Creata glue job to process files speicified in s3 bucket and prefix - const glueJob = new glue.Job(this, 'PythonShellJob', { - executable: glue.JobExecutable.pythonShell({ - glueVersion: glue.GlueVersion.V3_0, - pythonVersion: glue.PythonVersion.THREE_NINE, - script: glue.Code.fromAsset(path.join(__dirname, 'scripts/glue-job-script.py')), - }), - // Worker Type is not supported for Job Command pythonshell and Both workerType and workerCount must be set... - // workerType: glue.WorkerType.G_2X, - // workerCount: 2, - maxConcurrentRuns: 200, - maxRetries: 1, - connections: [connection], - maxCapacity: 1, - role: glueRole, - defaultArguments: { - '--S3_BUCKET.$': sfn.JsonPath.stringAt('$.s3Bucket'), - '--S3_PREFIX.$': sfn.JsonPath.stringAt('$.s3Prefix'), - '--QA_ENHANCEMENT.$': sfn.JsonPath.stringAt('$.qaEnhance'), - '--AOS_ENDPOINT': props._domainEndpoint, - '--REGION': props._region, - '--EMBEDDING_MODEL_ENDPOINT': props._embeddingEndpoint, - '--DOC_INDEX_TABLE': 'chatbot-index', - '--additional-python-modules': 'langchain==0.0.312,beautifulsoup4==4.12.2,requests-aws4auth==1.2.3,boto3==1.28.84,openai==0.28.1,nougat-ocr==0.1.17,pyOpenSSL==23.3.0,tenacity==8.2.3,markdownify==0.11.6,mammoth==1.6.0,chardet==5.2.0', - // add multiple extra python files - '--extra-py-files': extraPythonFilesList - } - }); - - // Create SNS topic and subscription to notify when glue job is completed - const topic = new sns.Topic(this, 'etl-topic', { - displayName: 'etl-topic', - topicName: 'etl-topic', - }); - topic.addSubscription(new subscriptions.EmailSubscription(props._subEmail)); - - // Lambda function to for file deduplication and glue job allocation based on file number - const lambdaETL = new DockerImageFunction(this, - "lambdaETL", { - code: DockerImageCode.fromImageAsset(join(__dirname, "../src/lambda/etl")), - timeout: Duration.minutes(15), - memorySize: 1024, - architecture: Architecture.X86_64, - }); - - lambdaETL.addToRolePolicy(new iam.PolicyStatement({ - actions: [ - // glue job - "glue:StartJobRun", - "s3:List*", - "s3:Put*", - "s3:Get*", - ], - effect: iam.Effect.ALLOW, - resources: ['*'], - } - )) - - const lambdaETLIntegration = new tasks.LambdaInvoke(this, 'lambdaETLIntegration', { - lambdaFunction: lambdaETL, - // Use the result of this invocation to decide how many Glue jobs to run - resultSelector: { - "processedPayload": { - 'batchIndices.$': '$.Payload.batchIndices', - 's3Bucket.$': '$.Payload.s3Bucket', - 's3Prefix.$': '$.Payload.s3Prefix', - 'qaEnhance.$': '$.Payload.qaEnhance', - 'offline.$': '$.Payload.offline', - } - }, - // we need the original input - resultPath: '$.TaskResult', - outputPath: '$.TaskResult.processedPayload', - }); - - const offlineChoice = new sfn.Choice(this, 'Offline or Online', { - comment: 'Check if the job is offline or online', - }); - - const offlineGlueJob = new tasks.GlueStartJobRun(this, 'OfflineGlueJob', { - glueJobName: glueJob.jobName, - integrationPattern: sfn.IntegrationPattern.RUN_JOB, - arguments: sfn.TaskInput.fromObject({ - '--job-language': 'python', - '--JOB_NAME': glueJob.jobName, - '--S3_BUCKET.$': '$.s3Bucket', - '--S3_PREFIX.$': '$.s3Prefix', - '--AOS_ENDPOINT': props._domainEndpoint, - '--EMBEDDING_MODEL_ENDPOINT': props._embeddingEndpoint, - '--REGION': props._region, - '--OFFLINE': 'true', - '--QA_ENHANCEMENT.$': '$.qaEnhance', - // Convert the numeric index to a string - '--BATCH_INDICE.$': 'States.Format(\'{}\', $.batchIndices)', - '--ProcessedObjectsTable': table.tableName, - }), - }); - - // Define a Map state to run multiple Glue jobs in parallel based on the number of files to process - const mapState = new sfn.Map(this, 'MapState', { - // inputPath should point to the root since we want to pass the entire payload to the iterator - inputPath: '$', - // itemsPath should reference an array. We need to construct this array based on batchIndices - itemsPath: sfn.JsonPath.stringAt('$.batchIndices'), - // set the max concurrency to 0 to run all the jobs in parallel - maxConcurrency: 0, - parameters: { - // These parameters are passed to each iteration of the map state - 's3Bucket.$': '$.s3Bucket', - 's3Prefix.$': '$.s3Prefix', - 'qaEnhance.$': '$.qaEnhance', - // 'index' is a special variable within the Map state that represents the current index - 'batchIndices.$': '$$.Map.Item.Index' // Add this if you need to know the index of the current item in the map state - }, - resultPath: '$.mapResults', - }); - - mapState.iterator(offlineGlueJob); - - // multiplex the same glue job to offline and online - const onlineGlueJob = new tasks.GlueStartJobRun(this, 'OnlineGlueJob', { - glueJobName: glueJob.jobName, - integrationPattern: sfn.IntegrationPattern.RUN_JOB, - arguments: sfn.TaskInput.fromObject({ - '--job-language': 'python', - '--JOB_NAME': glueJob.jobName, - '--S3_BUCKET.$': '$.s3Bucket', - '--S3_PREFIX.$': '$.s3Prefix', - '--AOS_ENDPOINT': props._domainEndpoint, - '--EMBEDDING_MODEL_ENDPOINT': props._embeddingEndpoint, - '--REGION': props._region, - '--OFFLINE': 'false', - '--QA_ENHANCEMENT.$': '$.qaEnhance', - }), - }); - - // Notify the result of the glue job - const notifyTask = new tasks.SnsPublish(this, 'NotifyTask', { - integrationPattern: sfn.IntegrationPattern.REQUEST_RESPONSE, - topic: topic, - message: sfn.TaskInput.fromText(`Glue job ${glueJob.jobName} completed!`), - }); - - offlineChoice.when(sfn.Condition.booleanEquals('$.offline', true), mapState) - .when(sfn.Condition.booleanEquals('$.offline', false), onlineGlueJob) - - // add the notify task to both online and offline branches - mapState.next(notifyTask); - - const sfnDefinition = lambdaETLIntegration.next(offlineChoice) - - const sfnStateMachine = new sfn.StateMachine(this, 'ETLState', { - definitionBody: sfn.DefinitionBody.fromChainable(sfnDefinition), - stateMachineType: sfn.StateMachineType.STANDARD, - // Align with the glue job timeout - timeout: Duration.minutes(2880), - }); - - // Export the Step function to be used in API Gateway - this._sfnOutput = sfnStateMachine; - this._jobName = glueJob.jobName; - this._jobArn = glueJob.jobArn; - this._processedObjectsTable = table.tableName - } -} \ No newline at end of file diff --git a/src/lambda/custom/index.js b/src/lambda/custom/index.js deleted file mode 100644 index 7bfa61b7..00000000 --- a/src/lambda/custom/index.js +++ /dev/null @@ -1,31 +0,0 @@ -const AWS = require('aws-sdk'); -const fs = require('fs'); -const tar = require('tar'); - -// obsolete for now, use script to upload model.tar.gz to s3 instead -exports.handler = async (event) => { - const s3 = new AWS.S3(); - const bucketName = process.env.BUCKET_NAME; - const key = 'model.tar.gz'; - - // Create files A and B - fs.writeFileSync('/tmp/fileA.txt', 'Content of file A'); - fs.writeFileSync('/tmp/fileB.txt', 'Content of file B'); - - // Package the files into model.tar.gz - await tar.c({ - gzip: true, - file: '/tmp/model.tar.gz', - cwd: '/tmp', - }, ['fileA.txt', 'fileB.txt']); - - // Upload model.tar.gz to the S3 bucket - const fileStream = fs.createReadStream('/tmp/model.tar.gz'); - await s3.upload({ - Bucket: bucketName, - Key: key, - Body: fileStream, - }).promise(); - - console.log(`Uploaded model.tar.gz to s3://${bucketName}/${key}`); -}; diff --git a/src/lambda/ddb/rating.py b/src/lambda/ddb/rating.py deleted file mode 100644 index 67e4866c..00000000 --- a/src/lambda/ddb/rating.py +++ /dev/null @@ -1,63 +0,0 @@ -import datetime -import json -import boto3 -import os -import uuid - -def lambda_handler(event, context): - dynamodb = boto3.resource('dynamodb') - table_name = os.getenv('TABLE_NAME') - session_table = dynamodb.Table(table_name) - - http_method = event['httpMethod'] - - try: - if http_method == 'POST': - return post_handler(event, session_table) - else: - return { - 'statusCode': 400, - 'body': json.dumps({ - 'message': 'Invalid request method' - }) - } - except Exception as e: - # Return an error response - return { - 'statusCode': 500, - 'body': json.dumps({'error': str(e)}) - } - - -def post_handler(event, session_table): - body = event['body'] - session_id = str(uuid.uuid1()) - required_fields = ['question_content', 'question_answer', 'answer_rating'] - - if not all(field in body for field in required_fields): - return { - 'statusCode': 400, - 'body': json.dumps({ - 'message': 'Missing required fields' - }) - } - session_creation_date = datetime.datetime.now().strftime("%m/%d/%y,%H:%M:%S") - - # inserting values into table - response = session_table.put_item( - Item={ - "session_id":session_id, - "question_content":body['question_content'], - "question_answer":body['question_answer'], - "revised_answer":body['revised_answer'] if 'revised_answer' in body else None, - "answer_rating":body['answer_rating'], - - } - ) - - return { - 'statusCode': 200, - 'body': json.dumps({ - 'message': 'Data inserted successfully' - }) - } \ No newline at end of file diff --git a/src/lambda/embedding/Dockerfile b/src/lambda/embedding/Dockerfile deleted file mode 100644 index 961f6054..00000000 --- a/src/lambda/embedding/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -FROM --platform=linux/amd64 public.ecr.aws/lambda/python:3.9 - -COPY requirements.txt . -RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" - -COPY main.py ${LAMBDA_TASK_ROOT} -COPY ./utils/* ${LAMBDA_TASK_ROOT} - -CMD [ "main.lambda_handler" ] diff --git a/src/lambda/embedding/main.py b/src/lambda/embedding/main.py deleted file mode 100644 index 300a9a63..00000000 --- a/src/lambda/embedding/main.py +++ /dev/null @@ -1,228 +0,0 @@ -import os -import time -import json -import logging -import numpy as np -import boto3, json -import tempfile -import nltk - -from langchain.document_loaders import S3DirectoryLoader -from langchain.vectorstores import OpenSearchVectorSearch -from langchain.document_loaders.unstructured import UnstructuredFileLoader -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain.schema.document import Document - -from sm_utils import create_sagemaker_embeddings_from_js_model -from requests_aws4auth import AWS4Auth -from aos_utils import OpenSearchClient - -from opensearchpy import OpenSearch, RequestsHttpConnection - -# global constants -MAX_FILE_SIZE = 1024*1024*100 # 100MB -MAX_OS_DOCS_PER_PUT = 20 -CHUNK_SIZE_FOR_DOC_SPLIT = 600 -CHUNK_OVERLAP_FOR_DOC_SPLIT = 20 - -logger = logging.getLogger() -# logging.basicConfig(format='%(asctime)s,%(module)s,%(processName)s,%(levelname)s,%(message)s', level=logging.INFO, stream=sys.stderr) -logger.setLevel(logging.INFO) - -# fetch all the environment variables -_document_bucket = os.environ.get('document_bucket') -_embeddings_model_endpoint_name = os.environ.get('embedding_endpoint') -_opensearch_cluster_domain = os.environ.get('opensearch_cluster_domain') - -s3 = boto3.resource('s3') -aws_region = boto3.Session().region_name -document_bucket = s3.Bucket(_document_bucket) -credentials = boto3.Session().get_credentials() -awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, aws_region, 'es', session_token=credentials.token) - -def load_documents(prefix=""): - docs = [] - for obj in document_bucket.objects.filter(Prefix=prefix): - if obj.key.endswith("/"): # bypass the prefix directory - continue - else: - # loader = S3FileLoader(bucket, obj.key) - with tempfile.TemporaryDirectory(dir='/tmp') as temp_dir: - file_path = f"{temp_dir}/{obj.key}" - logging.info(f"_document_bucket={_document_bucket}, obj.key={obj.key}, file_path={file_path}") - os.makedirs(os.path.dirname(file_path), exist_ok=True) - s3.meta.client.download_file(_document_bucket, obj.key, file_path) - - loader = UnstructuredFileLoader(file_path) - # return loader.load() - docs.extend(loader.load()) - return docs - -def split_documents(docs): - text_splitter = RecursiveCharacterTextSplitter( - # Set a really small chunk size, just to show. - chunk_size = CHUNK_SIZE_FOR_DOC_SPLIT, - chunk_overlap = CHUNK_OVERLAP_FOR_DOC_SPLIT, - length_function = len, - ) - - # add a custom metadata field, timestamp and embeddings_model - for doc in docs: - doc.metadata['timestamp'] = time.time() - doc.metadata['embeddings_model'] = _embeddings_model_endpoint_name - chunks = text_splitter.create_documents([doc.page_content for doc in docs], metadatas=[doc.metadata for doc in docs]) - return chunks - -def load_processed_documents(prefix=""): - chunks = [] - for obj in document_bucket.objects.filter(Prefix=prefix): - if obj.key.endswith("/"): # bypass the prefix directory - continue - else: - # loader = S3FileLoader(bucket, obj.key) - with tempfile.TemporaryDirectory(dir='/tmp') as temp_dir: - file_path = f"{temp_dir}/{obj.key}" - logging.info(f"_document_bucket={_document_bucket}, obj.key={obj.key}, file_path={file_path}") - os.makedirs(os.path.dirname(file_path), exist_ok=True) - s3.meta.client.download_file(_document_bucket, obj.key, file_path) - - file_content = json.load(open(file_path, 'r')) - for raw_chunk in file_content: - chunk_source = raw_chunk.get('source') if isinstance(raw_chunk.get('source'), str) else "CSDC & DGR Data 20230830" - chunk = Document(page_content=raw_chunk['content'], metadata={"source": chunk_source}) - chunks.append(chunk) - - for chunk in chunks: - chunk.metadata['timestamp'] = time.time() - chunk.metadata['embeddings_model'] = _embeddings_model_endpoint_name - - return chunks - -def process_shard(shard, embeddings_model_endpoint_name, aws_region, os_index_name, os_domain_ep, os_http_auth) -> int: - logger.info(f'Starting process_shard with content: {shard}') - st = time.time() - embeddings = create_sagemaker_embeddings_from_js_model(embeddings_model_endpoint_name, aws_region) - docsearch = OpenSearchVectorSearch( - index_name=os_index_name, - embedding_function=embeddings, - opensearch_url="https://{}".format(os_domain_ep), - http_auth = os_http_auth, - use_ssl = True, - verify_certs = True, - connection_class = RequestsHttpConnection - ) - docsearch.add_documents(documents=shard) - et = time.time() - st - logger.info(f'Shard completed in {et} seconds.') - return 0 - -def lambda_handler(event, context): - request_timestamp = time.time() - logger.info(f'request_timestamp :{request_timestamp}') - logger.info(f"event:{event}") - logger.info(f"context:{context}") - - # parse arguments from event - index_name = json.loads(event['body'])['aos_index'] - operation = json.loads(event['body'])['operation'] - body = json.loads(event['body'])['body'] - aos_client = OpenSearchClient(_opensearch_cluster_domain) - # re-route GET request to seperate processing branch - if event['httpMethod'] == 'GET': - if operation == 'query': - response = aos_client.query(index_name, json.dumps(body)) - elif operation == 'match_all': - response = aos_client.match_all(index_name) - else: - raise Exception(f'Invalid query operation: {operation}') - return { - 'statusCode': 200, - 'headers': {'Content-Type': 'application/json'}, - 'body': json.dumps(response) - } - elif event['httpMethod'] == 'POST': - if operation == 'delete': - response = aos_client.delete_index(index_name) - elif operation == 'create': - logger.info(f'create index with query: {json.dumps(body)}') - response = aos_client.create_index(index_name, json.dumps(body)) - else: - raise Exception(f'Invalid query operation: {operation}') - return { - 'statusCode': 200, - 'headers': {'Content-Type': 'application/json'}, - 'body': json.dumps(response) - } - - # parse arguments from event - prefix = json.loads(event['body'])['document_prefix'] - file_processed = json.loads(event['body']).get('file_processed', False) - - # Set the NLTK data path to the /tmp directory (writable in AWS Lambda) - nltk.data.path.append("/tmp") - # List of NLTK packages to download - nltk_packages = ['punkt', 'averaged_perceptron_tagger'] - # Download the required NLTK packages to /tmp - for package in nltk_packages: - nltk.download(package, download_dir='/tmp') - - aos_client = OpenSearch( - hosts = [{'host': _opensearch_cluster_domain.replace("https://", ""), 'port': 443}], - http_auth = awsauth, - use_ssl = True, - verify_certs = True, - connection_class = RequestsHttpConnection, - region=aws_region - ) - - # iterate all files within specific s3 prefix in bucket llm-bot-documents and print out file number and total size - total_size = 0 - total_files = 0 - for obj in document_bucket.objects.filter(Prefix=prefix): - total_files += 1 - total_size += obj.size - logger.info(f'total_files:{total_files}, total_size:{total_size}') - # raise error and return if the total size is larger than 100MB - if total_size > MAX_FILE_SIZE: - raise Exception(f'total_size:{total_size} is larger than {MAX_FILE_SIZE}') - - # split all docs into chunks - st = time.time() - logger.info('Loading documents ...') - if file_processed: - chunks = load_processed_documents(prefix=prefix) - else: - docs = load_documents(prefix=prefix) - chunks = split_documents(docs) - - et = time.time() - st - # [Document(page_content = 'xx', metadata = { 'source': '/tmp/xx/xx.pdf', 'timestamp': 123.456, 'embeddings_model': 'embedding-endpoint'})], - logger.info(f'Time taken: {et} seconds. {len(chunks)} chunks generated') - - st = time.time() - db_shards = (len(chunks) // MAX_OS_DOCS_PER_PUT) + 1 - shards = np.array_split(chunks, db_shards) - logger.info(f'Loading chunks into vector store ... using {db_shards} shards, shards content: {shards}') - - # TBD, create index if not exists instead of using API in AOS console manually - # Reply: Langchain has already implemented the code to create index if not exists - # Refer Link: https://github.com/langchain-ai/langchain/blob/eb3d1fa93caa26d497e5b5bdf6134d266f6a6990/libs/langchain/langchain/vectorstores/opensearch_vector_search.py#L120 - exists = aos_client.indices.exists(index_name) - logger.info(f"index_name={index_name}, exists={exists}") - - # shard_start_index = 1 - for shard_id, shard in enumerate(shards): - process_shard(shards[shard_id].tolist(), _embeddings_model_endpoint_name, aws_region, index_name, _opensearch_cluster_domain, awsauth) - - et = time.time() - st - logger.info(f'Time taken: {et} seconds. all shards processed') - - return { - 'statusCode': 200, - 'headers': {'Content-Type': 'application/json'}, - 'body': json.dumps({ - "created": request_timestamp, - "model": _embeddings_model_endpoint_name, - }) - } - diff --git a/src/lambda/embedding/requirements.txt b/src/lambda/embedding/requirements.txt deleted file mode 100644 index e6914666..00000000 --- a/src/lambda/embedding/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -langchain==0.0.312 -opensearch-py==2.2.0 -faiss_cpu==1.7.4 -sagemaker==2.48.0 -numpy==1.22.0 -# nltk==3.8.1 -requests-aws4auth==1.2.3 -unstructured==0.10.5 -unstructured[pdf] diff --git a/src/lambda/embedding/utils/aos_utils.py b/src/lambda/embedding/utils/aos_utils.py deleted file mode 100644 index 7d1cb88e..00000000 --- a/src/lambda/embedding/utils/aos_utils.py +++ /dev/null @@ -1,146 +0,0 @@ -import boto3 -import json -from typing import List - -from requests_aws4auth import AWS4Auth -from opensearchpy import OpenSearch, RequestsHttpConnection - -import logging -logger = logging.getLogger() -logger.setLevel(logging.INFO) - -credentials = boto3.Session().get_credentials() -region = boto3.Session().region_name -awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, 'es', session_token=credentials.token) - -class OpenSearchClient: - def __init__(self, _opensearch_cluster_domain: str): - """ - Initialize OpenSearch client using OpenSearch Endpoint - """ - self.client = OpenSearch( - hosts = [{'host': _opensearch_cluster_domain.replace("https://", ""), 'port': 443}], - http_auth = awsauth, - use_ssl = True, - verify_certs = True, - connection_class = RequestsHttpConnection, - region=region - ) - def create_index(self, index: str, body: str): - """ - Create an index in OpenSearch. - - Args: - index (str): The name of the index to create. - body (dict): A dictionary containing the settings and mappings for the index. - """ - body_dict = json.loads(body) - # Extract the settings and mappings from the body - settings = body_dict.get('body', {}).get('settings', {}) - mappings = body_dict.get('body', {}).get('mappings', {}) - - # Create the index with the specified settings and mappings - self.client.indices.create( - index=index, - body={ - 'settings': settings, - 'mappings': mappings - } - ) - - def delete_index(self, index: str): - """ - Delete an index in OpenSearch. - """ - # avoid NotFoundError: NotFoundError(404, 'index_not_found_exception'... - if not self.client.indices.exists(index=index): - # hint to the caller that the index does not exist - return { - 'statusCode': 404, - 'headers': {'Content-Type': 'application/json'}, - 'body': json.dumps({'error': f'index {index} does not exist'}) - } - # delete the index - self.client.indices.delete(index=index) - def delete_document(self, index: str, document_id: str): - """ - Delete a document in a specific index. - """ - # delete the document - self.client.delete(index=index, id=document_id) - def bulk(self, index: str, document: List[str]): - """ - Bulk index documents in a specific index. - """ - # bulk index the documents - self.client.bulk(index=index, body=document) - def index(self, index: str, document: List[str]): - """ - Index a document in a specific index. - """ - # iterate through the documents and index them - for doc in document: - try: - response = self.client.index(index=index, body=doc) - logger.info(f"response: {response}") - except Exception as e: - logger.error(f"Error indexing document: {e}") - def query(self, index: str, field: str, value: str): - """ - Execute a query on a specific index based on a field and value. - """ - body = { - "query": { - "match": { - field: value - } - } - } - response = self.client.search(index=index, body=body) - return response - def match_all(self, index: str): - """ - Execute a match_all query on a specific index. - """ - # avoid NotFoundError: NotFoundError(404, 'index_not_found_exception'... - if not self.client.indices.exists(index=index): - # hint to the caller that the index does not exist - return { - 'statusCode': 404, - 'headers': {'Content-Type': 'application/json'}, - 'body': json.dumps({'error': f'index {index} does not exist'}) - } - body = { - "query": { - "match_all": {} - } - } - response = self.client.search(index=index, body=body) - return response - def search_with_metadata(self, index: str, query: str, filter: str): - """ - Execute a search query using the query DSL, using bool query to filter on metadata. - """ - # avoid NotFoundError: NotFoundError(404, 'index_not_found_exception'... - if not self.client.indices.exists(index=index): - # hint to the caller that the index does not exist - return { - 'statusCode': 404, - 'headers': {'Content-Type': 'application/json'}, - 'body': json.dumps({'error': f'index {index} does not exist'}) - } - body = { - "query": { - "bool": { - "must": [ - {"match": {"content": query}}, - ], - # looking for documents where the metadata field exactly matches the value of filter - "filter": [ - {"term": {"metadata": filter}} - ] - } - } - } - response = self.client.search(index=index, body=body) - return response \ No newline at end of file diff --git a/src/lambda/embedding/utils/sm_utils.py b/src/lambda/embedding/utils/sm_utils.py deleted file mode 100644 index 76675faa..00000000 --- a/src/lambda/embedding/utils/sm_utils.py +++ /dev/null @@ -1,73 +0,0 @@ -""" -Helper functions for using Samgemaker Endpoint via langchain -""" -import sys -import time -import json -import logging -from typing import List -from langchain.embeddings import SagemakerEndpointEmbeddings -from langchain.embeddings.sagemaker_endpoint import EmbeddingsContentHandler - -logger = logging.getLogger() -# logging.basicConfig(format='%(asctime)s,%(module)s,%(processName)s,%(levelname)s,%(message)s', level=logging.INFO, stream=sys.stderr) -logger.setLevel(logging.INFO) - -# extend the SagemakerEndpointEmbeddings class from langchain to provide a custom embedding function -class SagemakerEndpointEmbeddingsJumpStart(SagemakerEndpointEmbeddings): - def embed_documents( - self, texts: List[str], chunk_size: int = 500 - ) -> List[List[float]]: - """Compute doc embeddings using a SageMaker Inference Endpoint. - - Args: - texts: The list of texts to embed. - chunk_size: The chunk size defines how many input texts will - be grouped together as request. If None, will use the - chunk size specified by the class. - - Returns: - List of embeddings, one for each text. - """ - results = [] - _chunk_size = len(texts) if chunk_size > len(texts) else chunk_size - st = time.time() - for i in range(0, len(texts), _chunk_size): - response = self._embedding_func(texts[i:i + _chunk_size]) - results.extend(response) - time_taken = time.time() - st - logger.info(f"got results for {len(texts)} in {time_taken}s, length of embeddings list is {len(results)}") - return results - - -# class for serializing/deserializing requests/responses to/from the embeddings model -class ContentHandler(EmbeddingsContentHandler): - content_type = "application/json" - accepts = "application/json" - - def transform_input(self, prompt, model_kwargs={}) -> bytes: - # add bge_prompt to each element in prompt - new_prompt = ["为这个句子生成表示以用于检索相关文章:" + p for p in prompt] - input_str = json.dumps({"inputs": new_prompt, **model_kwargs}) - return input_str.encode('utf-8') - - def transform_output(self, output: bytes) -> str: - response_json = json.loads(output.read().decode("utf-8")) - embeddings = response_json["sentence_embeddings"] - if len(embeddings) == 1: - return [embeddings[0]] - return embeddings - -def create_sagemaker_embeddings_from_js_model(embeddings_model_endpoint_name: str, aws_region: str) -> SagemakerEndpointEmbeddingsJumpStart: - # all set to create the objects for the ContentHandler and - # SagemakerEndpointEmbeddingsJumpStart classes - content_handler = ContentHandler() - logger.info(f'content_handler: {content_handler}, embeddings_model_endpoint_name: {embeddings_model_endpoint_name}, aws_region: {aws_region}') - # note the name of the LLM Sagemaker endpoint, this is the model that we would - # be using for generating the embeddings - embeddings = SagemakerEndpointEmbeddingsJumpStart( - endpoint_name = embeddings_model_endpoint_name, - region_name = aws_region, - content_handler = content_handler - ) - return embeddings \ No newline at end of file diff --git a/src/lambda/etl/Dockerfile b/src/lambda/etl/Dockerfile deleted file mode 100644 index 73e6adf5..00000000 --- a/src/lambda/etl/Dockerfile +++ /dev/null @@ -1,8 +0,0 @@ -FROM --platform=linux/amd64 public.ecr.aws/lambda/python:3.9 - -COPY requirements.txt . -RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" - -COPY main.py ${LAMBDA_TASK_ROOT} - -CMD [ "main.lambda_handler" ] diff --git a/src/lambda/etl/main.py b/src/lambda/etl/main.py deleted file mode 100644 index d1a13624..00000000 --- a/src/lambda/etl/main.py +++ /dev/null @@ -1,43 +0,0 @@ -import json -import boto3 -import logging - -logger = logging.getLogger() -logger.setLevel(logging.INFO) - -s3_client = boto3.client('s3') - -# Offline lambda function to count the number of files in the S3 bucket -def lambda_handler(event, context): - logger.info(f"event:{event}") - # Retrieve bucket name and prefix from the event object passed by Step Function - bucket_name = event['s3Bucket'] - prefix = event['s3Prefix'] - - # Initialize the file count - file_count = 0 - - # Paginate through the list of objects in the bucket with the specified prefix - paginator = s3_client.get_paginator('list_objects_v2') - page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=prefix) - - # Count the files, note skip the prefix with slash, which is the folder name - for page in page_iterator: - for obj in page.get('Contents', []): - if obj['Key'].endswith('/'): - continue - file_count += 1 - - # convert the fileCount into an array of numbers "fileIndices": [0, 1, 2, ..., 10], an array from 0 to fileCount-1 - batch_indices = list(range(file_count)) - - # This response should match the expected input schema of the downstream tasks in the Step Functions workflow - return { - 'fileCount': file_count, - 's3Bucket': bucket_name, - 's3Prefix': prefix, - 'qaEnhance': event['qaEnhance'], - # boolean value to indicate if the lambda function is running in offline mode - 'offline': event['offline'], - 'batchIndices': batch_indices, - } diff --git a/src/lambda/etl/requirements.txt b/src/lambda/etl/requirements.txt deleted file mode 100644 index e6914666..00000000 --- a/src/lambda/etl/requirements.txt +++ /dev/null @@ -1,9 +0,0 @@ -langchain==0.0.312 -opensearch-py==2.2.0 -faiss_cpu==1.7.4 -sagemaker==2.48.0 -numpy==1.22.0 -# nltk==3.8.1 -requests-aws4auth==1.2.3 -unstructured==0.10.5 -unstructured[pdf] diff --git a/src/lambda/executor/Dockerfile b/src/lambda/executor/Dockerfile deleted file mode 100644 index 0a7a3e53..00000000 --- a/src/lambda/executor/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -FROM --platform=linux/amd64 public.ecr.aws/lambda/python:3.9 - -COPY requirements.txt . -RUN pip3 install -r requirements.txt --target "/var/task" - -COPY main.py /var/task -COPY ./utils/* /var/task/ - -CMD [ "main.lambda_handler" ] diff --git a/src/lambda/executor/main.py b/src/lambda/executor/main.py deleted file mode 100644 index 7ca89a42..00000000 --- a/src/lambda/executor/main.py +++ /dev/null @@ -1,194 +0,0 @@ -import json -import logging -import os -import boto3 -import time -from aos_utils import LLMBotOpenSearchClient -from llmbot_utils import QueryType, combine_recalls, concat_recall_knowledge, process_input_messages -from ddb_utils import get_session, update_session -from sm_utils import SagemakerEndpointVectorOrCross - -logger = logging.getLogger() -logger.setLevel(logging.INFO) - -region = os.environ['AWS_REGION'] -embedding_endpoint = os.environ.get("embedding_endpoint", "") -cross_endpoint = os.environ.get("cross_endpoint", "") -aos_endpoint = os.environ.get("aos_endpoint", "") -aos_index = os.environ.get("aos_index", "") -llm_endpoint = os.environ.get('llm_endpoint', "") -chat_session_table = os.environ.get('chat_session_table', "") - -sm_client = boto3.client("sagemaker-runtime") -aos_client = LLMBotOpenSearchClient(aos_endpoint) - -class APIException(Exception): - def __init__(self, message, code: str = None): - if code: - super().__init__("[{}] {}".format(code, message)) - else: - super().__init__(message) - -def handle_error(func): - """Decorator for exception handling""" - - def wrapper(*args, **kwargs): - try: - return func(*args, **kwargs) - except APIException as e: - logger.exception(e) - raise e - except Exception as e: - logger.exception(e) - raise RuntimeError( - "Unknown exception, please check Lambda log for more details" - ) - - return wrapper - -def main_entry(session_id:str, query_input:str, history:list, embedding_model_endpoint:str, cross_model_endpoint:str, - llm_model_endpoint:str, aos_index:str, enable_knowledge_qa:bool, temperature: float): - """ - Entry point for the Lambda function. - - :param session_id: The ID of the session. - :param query_input: The query input. - :param history: The history of the conversation. - :param embedding_model_endpoint: The endpoint of the embedding model. - :param cross_model_endpoint: The endpoint of the cross model. - :param llm_model_endpoint: The endpoint of the language model. - :param llm_model_name: The name of the language model. - :param aos_index: The index of the AOS engine. - :param enable_knowledge_qa: Whether to enable knowledge QA. - :param temperature: The temperature of the language model. - - return: answer(str) - """ - - if enable_knowledge_qa: - # 1. concatenate query_input and history to unified prompt - query_knowledge = ''.join([query_input] + [row[0] for row in history][::-1]) - - # 2. get AOS knn recall - start = time.time() - query_embedding = SagemakerEndpointVectorOrCross(prompt="为这个句子生成表示以用于检索相关文章:" + query_knowledge, endpoint_name=embedding_model_endpoint, region_name=region, model_type="vector", stop=None) - opensearch_knn_respose = aos_client.search(index_name=aos_index, query_type="knn", query_term=query_embedding) - logger.info(json.dumps(opensearch_knn_respose, ensure_ascii=False)) - elpase_time = time.time() - start - logger.info(f'runing time of opensearch_knn : {elpase_time}s seconds') - - # 3. get AOS invertedIndex recall - start = time.time() - opensearch_query_response = aos_client.search(index_name=aos_index, query_type="basic", query_term=query_knowledge) - logger.info(json.dumps(opensearch_query_response, ensure_ascii=False)) - elpase_time = time.time() - start - logger.info(f'runing time of opensearch_query : {elpase_time}s seconds') - - # 4. combine these two opensearch_knn_respose and opensearch_query_response - recall_knowledge = combine_recalls(opensearch_knn_respose, opensearch_query_response) - - # 5. Predict correlation score using cross model - recall_knowledge_cross = [] - for knowledge in recall_knowledge: - # get score using cross model - score = float(SagemakerEndpointVectorOrCross(prompt=query_knowledge, endpoint_name=cross_model_endpoint, region_name=region, model_type="cross", stop=None, context=knowledge['doc'])) - logger.info(json.dumps({'doc': knowledge['doc'], 'score': score, 'source': knowledge['source']}, ensure_ascii=False)) - if score > 0.8: - recall_knowledge_cross.append({'doc': knowledge['doc'], 'score': score, 'source': knowledge['source']}) - - recall_knowledge_cross.sort(key=lambda x: x["score"], reverse=True) - - recall_knowledge_str = concat_recall_knowledge(recall_knowledge_cross[:2]) - sources = list(set([item["source"] for item in recall_knowledge_cross[:2]])) - query_type = QueryType.KnowledgeQuery - elpase_time = time.time() - start - logger.info(f'runing time of recall knowledge : {elpase_time}s seconds') - else: - recall_knowledge_str = "" - query_type = QueryType.Conversation - - # 6. generate answer using question and recall_knowledge - parameters = {'temperature': temperature} - try: - # generate_answer - answer = SagemakerEndpointVectorOrCross(prompt=query_input, endpoint_name=llm_model_endpoint, region_name=region, model_type="answer", stop=None, history=history, parameters=parameters, context=recall_knowledge_str) - except Exception as e: - logger.info(f'Exceptions: str({e})') - answer = "" - - # 7. update_session - start = time.time() - update_session(session_id=session_id, chat_session_table=chat_session_table, - question=query_input, answer=answer, knowledge_sources=sources) - elpase_time = time.time() - start - logger.info(f'runing time of update_session : {elpase_time}s seconds') - - # 8. log results - json_obj = { - "session_id": session_id, - "query": query_input, - "recall_knowledge_cross_str": recall_knowledge_str, - "detect_query_type": str(query_type), - "history": history, - "chatbot_answer": answer, - "sources": sources, - "timestamp": int(time.time()) - } - - json_obj_str = json.dumps(json_obj, ensure_ascii=False) - logger.info(json_obj_str) - - return answer, sources - -@handle_error -def lambda_handler(event, context): - request_timestamp = time.time() - logger.info(f'request_timestamp :{request_timestamp}') - logger.info(f"event:{event}") - logger.info(f"context:{context}") - - # Get request body - event_body = json.loads(event['body']) - model = event_body['model'] - messages = event_body['messages'] - temperature = event_body['temperature'] - - history, question = process_input_messages(messages) - role = "user" - session_id = f"{role}_{int(request_timestamp)}" - knowledge_qa_flag = True if model == 'knowledge_qa' else False - - main_entry_start = time.time() - answer, sources = main_entry(session_id, question, history, embedding_endpoint, cross_endpoint, llm_endpoint, aos_index, knowledge_qa_flag, temperature) - main_entry_elpase = time.time() - main_entry_start - logger.info(f'runing time of main_entry : {main_entry_elpase}s seconds') - - llmbot_response = { - "id": session_id, - "object": "chat.completion", - "created": int(request_timestamp), - "model": model, - "usage": { - "prompt_tokens": 13, - "completion_tokens": 7, - "total_tokens": 20 - }, - "choices": [ - { - "message": { - "role": "assistant", - "content": answer, - "knowledge_sources": sources - }, - "finish_reason": "stop", - "index": 0 - } - ] - } - - # 2. return rusult - return { - 'statusCode': 200, - 'headers': {'Content-Type': 'application/json'}, - 'body': json.dumps(llmbot_response) - } diff --git a/src/lambda/executor/requirements.txt b/src/lambda/executor/requirements.txt deleted file mode 100644 index 3eb2077d..00000000 --- a/src/lambda/executor/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -langchain==0.0.312 -opensearch-py==2.2.0 -boto3==1.26.114 -botocore==1.29.140 -requests_aws4auth==1.2.2 -openai==0.27.6 -tiktoken==0.3.3 diff --git a/src/lambda/executor/utils/aos_utils.py b/src/lambda/executor/utils/aos_utils.py deleted file mode 100644 index d0c273b9..00000000 --- a/src/lambda/executor/utils/aos_utils.py +++ /dev/null @@ -1,158 +0,0 @@ -import json -import boto3 -import requests -from requests_aws4auth import AWS4Auth -from opensearchpy import OpenSearch, RequestsHttpConnection - -credentials = boto3.Session().get_credentials() -region = boto3.Session().region_name -awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, 'es', session_token=credentials.token) - -IMPORT_OPENSEARCH_PY_ERROR = ( - "Could not import OpenSearch. Please install it with `pip install opensearch-py`." -) -def _import_not_found_error(): - """Import not found error if available, otherwise raise error.""" - try: - from opensearchpy.exceptions import NotFoundError - except ImportError: - raise ImportError(IMPORT_OPENSEARCH_PY_ERROR) - return NotFoundError - -class LLMBotOpenSearchClient: - def __init__(self, host): - """ - Initialize OpenSearch client using OpenSearch Endpoint - """ - self.client = OpenSearch( - hosts = [{'host': host.replace("https://", ""), 'port': 443}], - http_auth=awsauth, - use_ssl=True, - verify_certs=True, - connection_class=RequestsHttpConnection - ) - self.query_match = {"knn": self._build_knn_search_query, - "exact": self._build_exactly_match_query, - "basic": self._build_basic_search_query} - - def _build_basic_search_query(self, index_name, query_term, field, size): - """ - Build basic search query - - :param index_name: Target Index Name - :param query_term: query term - :param field: search field - :param size: number of results to return from aos - - :return: aos response json - """ - query = { - "size": size, - "query": { - "bool":{ - "should": [ {"match": { field : query_term }} ] - } - }, - "sort": [ - { - "_score": { - "order": "desc" - } - } - ] - } - - return query - - def _build_knn_search_query(self, index_name, query_term, field, size): - """ - Build knn search query - - :param index_name: Target Index Name - :param query_term: query term - :param field: search field - :param size: number of results to return from aos - - :return: aos response json - """ - query = { - "size": size, - "query": { - "knn": { - "vector_field": { - "vector": query_term, - "k": size - } - } - } - } - - return query - - def _build_exactly_match_query(self, index_name, query_term, field, size): - """ - Build exactly match query - - :param index_name: Target Index Name - :param query_term: query term - :param field: search field - :param size: number of results to return from aos - - :return: aos response json - """ - query = { - "query" : { - "match_phrase":{ - field : query_term - } - } - } - return query - - def organize_results(self, query_type, response, field): - """ - Organize results from aos response - - :param query_type: query type - :param response: aos response json - """ - results = [] - aos_hits = response["hits"]["hits"] - if query_type == "exact": - for aos_hit in aos_hits: - doc = aos_hit['_source'][field] - source = aos_hit['_source']['metadata']['file_path'] - score = aos_hit["_score"] - results.append({'doc': doc, 'score': score, 'source': source}) - else: - for aos_hit in aos_hits: - doc = f"{aos_hit['_source'][field]}" - source = aos_hit['_source']['metadata']['file_path'] - score = aos_hit["_score"] - results.append({'doc': doc, 'score': score, 'source': source}) - return results - - def search(self, index_name, query_type, query_term, field: str = "text", size: int = 10): - """ - Perform search on aos - - :param index_name: Target Index Name - :param query_type: query type - :param query_term: query term - :param field: search field - :param size: number of results to return from aos - - :return: aos response json - """ - not_found_error = _import_not_found_error() - try: - self.client.indices.get(index=index_name) - except not_found_error: - return [] - query = self.query_match[query_type](index_name, query_term, field, size) - response = self.client.search( - body=query, - index=index_name - ) - result = self.organize_results(query_type, response, field) - return result \ No newline at end of file diff --git a/src/lambda/executor/utils/ddb_utils.py b/src/lambda/executor/utils/ddb_utils.py deleted file mode 100644 index 074cbcc7..00000000 --- a/src/lambda/executor/utils/ddb_utils.py +++ /dev/null @@ -1,124 +0,0 @@ -import json -import boto3 -from datetime import date - -def get_session(session_id, chat_session_table): - - table_name = chat_session_table - dynamodb = boto3.resource('dynamodb') - - # table name - table = dynamodb.Table(table_name) - operation_result = "" - - response = table.get_item(Key={'session_id': session_id}) - - if "Item" in response.keys(): - # print("****** " + response["Item"]["content"]) - operation_result = json.loads(response["Item"]["content"]) - else: - # print("****** No result") - operation_result = "" - - return operation_result - - -# param: session_id -# question -# answer -# return: success -# failed -def update_session(session_id, chat_session_table, question, answer, knowledge_sources): - - table_name = chat_session_table - dynamodb = boto3.resource('dynamodb') - - # table name - table = dynamodb.Table(table_name) - operation_result = "" - - response = table.get_item(Key={'session_id': session_id}) - - item = { - "session_id":session_id, - "question_content":question, - "question_answer":answer, - "revised_answer":"", - "answer_rating":"", - "knowledge_sources": knowledge_sources, - } - - # inserting values into table - response = table.put_item( - Item=item - ) - - if "ResponseMetadata" in response.keys(): - if response["ResponseMetadata"]["HTTPStatusCode"] == 200: - operation_result = "success" - else: - operation_result = "failed" - else: - operation_result = "failed" - - return operation_result - -# For Wechat Miniprogram -# param: session_id -# user -# message -# timestamp -# isFirstUpdate -# return: success -# failed -def update_history(session_id, chat_session_table, user, message, timestamp, isFirstUpdate): - - table_name = chat_session_table - dynamodb = boto3.resource('dynamodb') - - # table name - table = dynamodb.Table(table_name) - operation_result = "" - - response = table.get_item(Key={'session_id': session_id}) - - if "Item" in response.keys(): - # print("****** " + response["Item"]["content"]) - chat_history = json.loads(response["Item"]["content"]) - else: - # print("****** No result") - chat_history = [] - - chat_history.append([user, message, timestamp]) - content = json.dumps(chat_history) - TodayDate = date.today() - - # inserting values into table - if isFirstUpdate: - response = table.put_item( - Item={ - 'session_id': session_id, - 'content': content, - 'sessionCreationDate': TodayDate, - 'lastUpdateDate': TodayDate - } - ) - else: - response = table.put_item( - Item={ - 'session_id': session_id, - 'content': content, - 'lastUpdateDate': TodayDate - } - ) - - - if "ResponseMetadata" in response.keys(): - if response["ResponseMetadata"]["HTTPStatusCode"] == 200: - operation_result = "success" - else: - operation_result = "failed" - else: - operation_result = "failed" - - return operation_result \ No newline at end of file diff --git a/src/lambda/executor/utils/llmbot_utils.py b/src/lambda/executor/utils/llmbot_utils.py deleted file mode 100644 index cca7a32a..00000000 --- a/src/lambda/executor/utils/llmbot_utils.py +++ /dev/null @@ -1,132 +0,0 @@ -from enum import Enum - -QA_SEP = "=>" -AWS_Free_Chat_Prompt = """你是云服务AWS的智能客服机器人{B},能够回答{A}的各种问题以及陪{A}聊天,如:{chat_history}\n\n{A}: {question}\n{B}: """ -AWS_Knowledge_QA_Prompt = """你是云服务AWS的智能客服机器人{B},请严格根据反括号中的资料提取相关信息\n```\n{fewshot}\n```\n回答{A}的各种问题,比如:\n\n{A}: {question}\n{B}: """ -Fewshot_prefix_Q="问题" -Fewshot_prefix_A="回答" - -class QueryType(Enum): - KeywordQuery = "KeywordQuery" #用户仅仅输入了一些关键词(2 token) - KnowledgeQuery = "KnowledgeQuery" #用户输入的需要参考知识库有关来回答 - Conversation = "Conversation" #用户输入的是跟知识库无关的问题 - -def combine_recalls(opensearch_knn_respose, opensearch_query_response): - ''' - filter knn_result if the result don't appear in filter_inverted_result - ''' - knn_threshold = 0.2 - inverted_theshold = 5.0 - filter_knn_result = { item["doc"] : [item['source'], item["score"]] for item in opensearch_knn_respose if item["score"]> knn_threshold } - filter_inverted_result = { item["doc"] : [item['source'], item["score"]] for item in opensearch_query_response if item["score"]> inverted_theshold } - - combine_result = [] - for doc, doc_info in filter_knn_result.items(): - if doc in filter_inverted_result.keys(): - combine_result.append({ "doc" : doc, "score" : doc_info[1], "source" : doc_info[0] }) - - return combine_result - -def concat_recall_knowledge(recall_knowledge_list): - """ - Concat recall knowledge result from OpenSearch into a single string. - """ - return "\n".join([item["doc"] for item in recall_knowledge_list]) - -def process_input_messages(messages): - # 1. If two messages are from the same role, we need to merge them into one message. - # Make sure new_messages follows this order: [user, assistant, user, assistant, ...] - new_messages = [] - previous_role = None - for message in messages: - if message['role'] not in ("user", "assistant"): - continue - - if len(new_messages) == 0: - if message['role'] == 'user': - new_messages.append(message['content']) - else: - if message['role'] == previous_role: - new_messages[-1] += f"\n{message['content']}" - else: - new_messages.append(message['content']) - previous_role = message['role'] - - # 2. Generate history and question - if len(new_messages) % 2 == 0: - print("The number of messages is even, which is not expected.") - history = [[new_messages[i], new_messages[i+1]] for i in range(0, len(new_messages)-2, 2)] - question = new_messages[-2] - else: - history = [[new_messages[i], new_messages[i+1]] for i in range(0, len(new_messages)-1, 2)] - question = new_messages[-1] - - return history, question - -def build_conversation_prompt(post_text, conversations, role_a, role_b): - """ - Build conversation prompt for LLM. - In current version, we concatenate all conversation history into a single prompt. - - :param post_text: user post text - :param conversations: conversation history - :param role_a: role name, e.g. "用户" - :param role_b: role name, e.g. "AWSBot" - :return: conversation prompt string - """ - chat_history = [f"{role_a}: {item[0]}\n{role_b}: {item[1]}" for item in conversations] - chat_histories = "\n\n".join(chat_history) - chat_histories = f"\n\n{chat_histories}" if chat_histories else "" - - conversation_prompt = AWS_Free_Chat_Prompt.format(chat_history=chat_histories, question=post_text, A=role_a, B=role_b) - - return conversation_prompt - -def build_knowledge_qa_prompt(post_text, qa_recalls, role_a, role_b): - """ - build prompt using qa for LLM. - For Knowledge QA, it will merge all retrieved related document paragraphs into a single prompt - - :param post_text: user post text - :param qa_recalls: all retrieved related document paragraphs from OpenSearch - :param role_a: role name, e.g. "用户" - :param role_b: role name, e.g. "AWSBot" - """ - qa_pairs = [ obj["doc"].split(QA_SEP) for obj in qa_recalls ] - qa_fewshots = [f"{Fewshot_prefix_Q}: {pair[0]}\n{Fewshot_prefix_A}: {pair[1]}" for pair in qa_pairs] - fewshots_str = "\n\n".join(qa_fewshots[-3:]) - - knowledge_qa_prompt = AWS_Knowledge_QA_Prompt.format(fewshot=fewshots_str, question=post_text, A=role_a, B=role_b) - return knowledge_qa_prompt - -def build_final_prompt(query_input, session_history, exactly_match_result, recall_knowledge, role_a, role_b): - """ - built final prompt for generating answer for user LLM. - - :param query_input: user post text - :param session_history: conversation history from DynamoDB - :param exactly_match_result: exactly match result from OpenSearch - :param recall_knowledge: knowledge recall result from OpenSearch - :param role_a: role name, e.g. "用户" - :param role_b: role name, e.g. "AWSBot" - - :return: (answer, final_prompt, query_type) - """ - - answer = None - final_prompt = None - query_type = None - - if exactly_match_result and recall_knowledge: - query_type = QueryType.KeywordQuery - answer = exactly_match_result[0]["doc"] - final_prompt = "" - elif recall_knowledge: - query_type = QueryType.KnowledgeQuery - final_prompt = build_knowledge_qa_prompt(query_input, recall_knowledge, role_a=role_a, role_b=role_b) - else: - query_type = QueryType.Conversation - free_chat_coversions = [item for item in session_history if item[2] == QueryType.Conversation] - final_prompt = build_conversation_prompt(query_input, free_chat_coversions[-2:], role_a=role_a, role_b=role_b) - - return (answer, final_prompt, query_type) \ No newline at end of file diff --git a/src/lambda/executor/utils/sm_utils.py b/src/lambda/executor/utils/sm_utils.py deleted file mode 100644 index 394cc99c..00000000 --- a/src/lambda/executor/utils/sm_utils.py +++ /dev/null @@ -1,202 +0,0 @@ -import json -import re -import io -from langchain.llms.sagemaker_endpoint import LLMContentHandler, SagemakerEndpoint -from langchain.embeddings import SagemakerEndpointEmbeddings -from langchain.embeddings.sagemaker_endpoint import EmbeddingsContentHandler -from langchain.callbacks.manager import CallbackManagerForLLMRun -from langchain.llms.utils import enforce_stop_tokens -from typing import Dict, List, Optional, Any - -import logging -logger = logging.getLogger() -logger.setLevel(logging.INFO) - -class vectorContentHandler(EmbeddingsContentHandler): - content_type = "application/json" - accepts = "application/json" - - def transform_input(self, inputs: List[str], model_kwargs: Dict) -> bytes: - input_str = json.dumps({"inputs": inputs, **model_kwargs}) - return input_str.encode("utf-8") - - def transform_output(self, output: bytes) -> List[List[float]]: - response_json = json.loads(output.read().decode("utf-8")) - return response_json["sentence_embeddings"] - -class crossContentHandler(LLMContentHandler): - content_type = "application/json" - accepts = "application/json" - - def transform_input(self, prompt: str, model_kwargs: Dict) -> bytes: - input_str = json.dumps({"inputs": prompt, "docs":model_kwargs["context"]}) - return input_str.encode('utf-8') - - def transform_output(self, output: bytes) -> str: - response_json = json.loads(output.read().decode("utf-8")) - return response_json['scores'][0][1] - -class answerContentHandler(LLMContentHandler): - content_type = "application/json" - accepts = "application/json" - - def transform_input(self, question: str, model_kwargs: Dict) -> bytes: - - template_1 = '以下context xml tag内的文本内容为背景知识:\n\n{context}\n\n请根据背景知识, 回答这个问题:{question}' - context = model_kwargs["context"] - - if len(context) == 0: - prompt = question - else: - prompt = template_1.format(context = model_kwargs["context"], question = question) - - input_str = json.dumps({"inputs": prompt, - "history": model_kwargs["history"], - "parameters": model_kwargs["parameters"]}) - return input_str.encode('utf-8') - - def transform_output(self, output: bytes) -> str: - response_json = json.loads(output.read().decode("utf-8")) - return response_json['outputs'] - -class LineIterator: - """ - A helper class for parsing the byte stream input. - - The output of the model will be in the following format: - ``` - b'{"outputs": [" a"]}\n' - b'{"outputs": [" challenging"]}\n' - b'{"outputs": [" problem"]}\n' - ... - ``` - - While usually each PayloadPart event from the event stream will contain a byte array - with a full json, this is not guaranteed and some of the json objects may be split across - PayloadPart events. For example: - ``` - {'PayloadPart': {'Bytes': b'{"outputs": '}} - {'PayloadPart': {'Bytes': b'[" problem"]}\n'}} - ``` - - This class accounts for this by concatenating bytes written via the 'write' function - and then exposing a method which will return lines (ending with a '\n' character) within - the buffer via the 'scan_lines' function. It maintains the position of the last read - position to ensure that previous bytes are not exposed again. - """ - - def __init__(self, stream): - self.byte_iterator = iter(stream) - self.buffer = io.BytesIO() - self.read_pos = 0 - - def __iter__(self): - return self - - def __next__(self): - while True: - self.buffer.seek(self.read_pos) - line = self.buffer.readline() - if line and line[-1] == ord('\n'): - self.read_pos += len(line) - return line[:-1] - try: - chunk = next(self.byte_iterator) - except StopIteration: - if self.read_pos < self.buffer.getbuffer().nbytes: - continue - raise - if 'PayloadPart' not in chunk: - print('Unknown event type:' + chunk) - continue - self.buffer.seek(0, io.SEEK_END) - self.buffer.write(chunk['PayloadPart']['Bytes']) - -class SagemakerEndpointStreaming(SagemakerEndpoint): - # override the _call function to support streaming function with invoke_endpoint_with_response_stream - def _call( - self, - prompt: str, - stop: Optional[List[str]] = None, - run_manager: Optional[CallbackManagerForLLMRun] = None, - **kwargs: Any, - ) -> str: - """Call out to Sagemaker inference endpoint. - - Args: - prompt: The prompt to pass into the model. - stop: Optional list of stop words to use when generating. - - Returns: - The string generated by the model. - - Example: - .. code-block:: python - - response = se("Tell me a joke.") - """ - _model_kwargs = self.model_kwargs or {} - _model_kwargs = {**_model_kwargs, **kwargs} - _endpoint_kwargs = self.endpoint_kwargs or {} - - body = self.content_handler.transform_input(prompt, _model_kwargs) - # the content type should be application/json if we are using LMI container - content_type = self.content_handler.content_type - accepts = self.content_handler.accepts - - # send request - try: - response = self.client.invoke_endpoint_with_response_stream( - EndpointName=self.endpoint_name, - Body=body, - ContentType=content_type, - Accept=accepts, - **_endpoint_kwargs, - ) - except Exception as e: - raise ValueError(f"Error raised by inference endpoint: {e}") - - # transform_output is not used here because the response is a stream - for line in LineIterator(response['Body']): - resp = json.loads(line) - logging.info(resp.get("outputs")[0], end='') - - # enforce stop tokens if they are provided - if stop is not None: - # This is a bit hacky, but I can't figure out a better way to enforce - # stop tokens when making calls to the sagemaker endpoint. - text = enforce_stop_tokens(text, stop) - - return resp.get("outputs")[0] - -def SagemakerEndpointVectorOrCross(prompt: str, endpoint_name: str, region_name: str, model_type: str, stop: List[str], **kwargs) -> SagemakerEndpoint: - """ - original class invocation: - response = self.client.invoke_endpoint( - EndpointName=self.endpoint_name, - Body=body, - ContentType=content_type, - Accept=accepts, - **_endpoint_kwargs, - ) - """ - if model_type == "vector": - content_handler = vectorContentHandler() - embeddings = SagemakerEndpointEmbeddings( - endpoint_name=endpoint_name, - region_name=region_name, - content_handler=content_handler, - ) - query_result = embeddings.embed_query(prompt) - return query_result - elif model_type == "cross": - content_handler = crossContentHandler() - elif model_type == "answer": - content_handler = answerContentHandler() - # TODO: replace with SagemakerEndpointStreaming - genericModel = SagemakerEndpoint( - endpoint_name = endpoint_name, - region_name = region_name, - content_handler = content_handler - ) - return genericModel(prompt=prompt, stop=stop, **kwargs) diff --git a/src/llm-stack.ts b/src/llm-stack.ts deleted file mode 100644 index 16ff379d..00000000 --- a/src/llm-stack.ts +++ /dev/null @@ -1,172 +0,0 @@ -import { NestedStack, StackProps } from 'aws-cdk-lib'; -import { Construct } from 'constructs'; - -import * as iam from 'aws-cdk-lib/aws-iam'; -import * as sagemaker from 'aws-cdk-lib/aws-sagemaker'; -import * as dotenv from "dotenv"; - -dotenv.config(); - -interface llmStackProps extends StackProps { - _s3ModelAssets: string; - _crossCodePrefix: string; - _embeddingCodePrefix: string; - _instructCodePrefix: string; -} - -export class LLMStack extends NestedStack { - _crossEndPoint; - _embeddingEndPoint; - _instructEndPoint; - - constructor(scope: Construct, id: string, props: llmStackProps) { - super(scope, id, props); - - // Prepare model asset to download from Hugging Face follow script - - // Specify s3 bucket and prefix for model - // const _S3Bucket = new s3.Bucket(this, 'llm-rag', { - // // Fixed name for serving.properties for now - // bucketName: "llm-rag", - // blockPublicAccess: s3.BlockPublicAccess.BLOCK_ALL, - // }); - - // // Create a Lambda function - // const fn = new lambda.Function(this, 'justFunction', { - // runtime: lambda.Runtime.NODEJS_14_X, - // handler: 'index.handler', - // code: lambda.Code.fromAsset(path.join(__dirname, 'lambda/custom')), - // environment: { - // BUCKET_NAME: _S3Bucket.bucketName, - // }, - // }); - - // // Allow the Lambda function to put objects in the S3 bucket - // _S3Bucket.grantPut(fn); - - // // Create a custom resource that triggers the Lambda function - // new cr.AwsCustomResource(this, 'uploadModelAssets', { - // onCreate: { - // service: 'Lambda', - // action: 'invoke', - // parameters: { - // FunctionName: fn.functionName, - // }, - // physicalResourceId: cr.PhysicalResourceId.of('uploadModelAssets'), - // }, - // policy: cr.AwsCustomResourcePolicy.fromSdkCalls({resources: cr.AwsCustomResourcePolicy.ANY_RESOURCE}), - // }); - - // Create IAM execution role - const executionRole = new iam.Role(this, 'cross-execution-role', { - assumedBy: new iam.ServicePrincipal('sagemaker.amazonaws.com'), - managedPolicies: [ - iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonSageMakerFullAccess'), - iam.ManagedPolicy.fromAwsManagedPolicyName('AmazonS3FullAccess'), - iam.ManagedPolicy.fromAwsManagedPolicyName('CloudWatchLogsFullAccess'), - ], - }); - - // CROSS MODEL - // Create model, BucketDeployment construct automatically handles dependencies to ensure model assets uploaded before creating the model in this.region - const crossImageUrl = '763104351884.dkr.ecr.'+ this.region +'.amazonaws.com/djl-inference:0.21.0-deepspeed0.8.3-cu117' - const crossModel = new sagemaker.CfnModel(this, 'cross-model', { - executionRoleArn: executionRole.roleArn, - primaryContainer: { - image: crossImageUrl, - modelDataUrl: `s3://${props._s3ModelAssets}/${props._crossCodePrefix}/cross_model.tar.gz`, - environment: { - S3_CODE_PREFIX: props._crossCodePrefix, - }, - }, - }); - - // Create endpoint configuration, refer to https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.aws_sagemaker.CfnEndpointConfig.html for full options - const crossEndpointConfig = new sagemaker.CfnEndpointConfig(this, 'cross-endpoint-config', { - productionVariants: [{ - initialVariantWeight: 1.0, - modelName: crossModel.attrModelName, - variantName: 'variantProd', - containerStartupHealthCheckTimeoutInSeconds: 15*60, - initialInstanceCount: 1, - instanceType: 'ml.g4dn.xlarge', - }], - }); - - // Create endpoint - const crossEndpoint = new sagemaker.CfnEndpoint(this, 'cross-endpoint', { - endpointConfigName: crossEndpointConfig.attrEndpointConfigName, - endpointName: 'cross-endpoint', - }); - - this._crossEndPoint = crossEndpoint.endpointName; - - // EMBEDDING MODEL - // Create model, BucketDeployment construct automatically handles dependencies to ensure model assets uploaded before creating the model in this.region - const embeddingImageUrl = '763104351884.dkr.ecr.'+ this.region +'.amazonaws.com/djl-inference:0.21.0-deepspeed0.8.3-cu117' - const embeddingModel = new sagemaker.CfnModel(this, 'embedding-model', { - executionRoleArn: executionRole.roleArn, - primaryContainer: { - image: embeddingImageUrl, - modelDataUrl: `s3://${props._s3ModelAssets}/${props._embeddingCodePrefix}/s2e_model.tar.gz`, - environment: { - S3_CODE_PREFIX: props._embeddingCodePrefix, - }, - }, - }); - - // Create endpoint configuration, refer to https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.aws_sagemaker.CfnEndpointConfig.html for full options - const embeddingEndpointConfig = new sagemaker.CfnEndpointConfig(this, 'embedding-endpoint-config', { - productionVariants: [{ - initialVariantWeight: 1.0, - modelName: embeddingModel.attrModelName, - variantName: 'variantProd', - containerStartupHealthCheckTimeoutInSeconds: 15*60, - initialInstanceCount: 1, - instanceType: 'ml.g4dn.xlarge', - }], - }); - - // Create endpoint - const embeddingEndpoint = new sagemaker.CfnEndpoint(this, 'embedding-endpoint', { - endpointConfigName: embeddingEndpointConfig.attrEndpointConfigName, - endpointName: 'embedding-endpoint', - }); - - this._embeddingEndPoint = embeddingEndpoint.endpointName; - - // INSTRUCT MODEL - // Create model, BucketDeployment construct automatically handles dependencies to ensure model assets uploaded before creating the model in this.region - const instructImageUrl = '763104351884.dkr.ecr.'+ this.region +'.amazonaws.com/djl-inference:0.21.0-deepspeed0.8.3-cu117' - const instructModel = new sagemaker.CfnModel(this, 'instruct-model', { - executionRoleArn: executionRole.roleArn, - primaryContainer: { - image: instructImageUrl, - modelDataUrl: `s3://${props._s3ModelAssets}/${props._instructCodePrefix}/model.tar.gz`, - environment: { - S3_CODE_PREFIX: props._instructCodePrefix, - }, - }, - }); - - // Create endpoint configuration, refer to https://docs.aws.amazon.com/cdk/api/v2/docs/aws-cdk-lib.aws_sagemaker.CfnEndpointConfig.html for full options - const instructEndpointConfig = new sagemaker.CfnEndpointConfig(this, 'instruct-endpoint-config', { - productionVariants: [{ - initialVariantWeight: 1.0, - modelName: instructModel.attrModelName, - variantName: 'variantProd', - containerStartupHealthCheckTimeoutInSeconds: 15*60, - initialInstanceCount: 1, - instanceType: 'ml.g5.4xlarge', - }], - }); - - // Create endpoint - const instructEndpoint = new sagemaker.CfnEndpoint(this, 'instruct-endpoint', { - endpointConfigName: instructEndpointConfig.attrEndpointConfigName, - endpointName: 'instruct-endpoint', - }); - - this._instructEndPoint = instructEndpoint.endpointName; - } -} \ No newline at end of file diff --git a/src/main.ts b/src/main.ts deleted file mode 100644 index a48bf19f..00000000 --- a/src/main.ts +++ /dev/null @@ -1,110 +0,0 @@ -import { App, CfnOutput, Stack, StackProps, CfnParameter } from 'aws-cdk-lib'; -import { Construct } from 'constructs'; - -import { VpcStack } from './vpc-stack'; -import { Ec2Stack } from './ec2-stack'; -import { OpenSearchStack } from './os-stack'; -import { LLMApiStack } from './api-stack'; -import { DynamoDBStack } from './ddb-stack'; -import { LLMStack } from './llm-stack'; -import { AssetsStack } from './assets-stack'; -import { EtlStack } from './etl-stack'; - -import * as dotenv from "dotenv"; -dotenv.config(); - -export class RootStack extends Stack { - constructor(scope: Construct, id: string, props: StackProps = {}) { - super(scope, id, props); - - // add cdk input parameters for user to specify s3 bucket store model assets - // using npx cdk deploy --rollback false --parameters S3ModelAssets=llm-rag --parameters SubEmail=example@example.org to deploy - const _S3ModelAssets = new CfnParameter(this, 'S3ModelAssets', { - type: 'String', - description: 'S3 Bucket for model & code assets', - // default: 'llm-rag', - }); - - const _SubEmail = new CfnParameter(this, 'SubEmail', { - type: 'String', - description: 'Email address for SNS notification', - }); - - // This assest stack is to mitigate issue that the model assets in s3 bucket can't be located immediately to create sagemaker model - const _AssetsStack = new AssetsStack(this, 'assets-stack', {_s3ModelAssets:_S3ModelAssets.valueAsString, env:process.env}); - const _LLMStack = new LLMStack(this, 'llm-stack', { - _s3ModelAssets:_S3ModelAssets.valueAsString, - _crossCodePrefix:_AssetsStack._crossCodePrefix, - _embeddingCodePrefix:_AssetsStack._embeddingCodePrefix, - _instructCodePrefix:_AssetsStack._instructCodePrefix, - env:process.env - }); - _LLMStack.addDependency(_AssetsStack); - - const _VpcStack = new VpcStack(this, 'vpc-stack', {env:process.env}); - - const _OsStack = new OpenSearchStack(this,'os-stack', {_vpc:_VpcStack._vpc, _securityGroup:_VpcStack._securityGroup}); - _OsStack.addDependency(_VpcStack); - - // const _Ec2Stack = new Ec2Stack(this, 'ec2-stack', {_vpc:_VpcStack._vpc, _securityGroup:_VpcStack._securityGroup, _domainEndpoint:_OsStack._domainEndpoint, env:process.env}); - // _Ec2Stack.addDependency(_VpcStack); - // _Ec2Stack.addDependency(_OsStack); - - const _DynamoDBStack = new DynamoDBStack(this, 'ddb-stack', {_vpc:_VpcStack._vpc, _securityGroup:_VpcStack._securityGroup, _domainEndpoint:_OsStack._domainEndpoint, env:process.env}); - _DynamoDBStack.addDependency(_VpcStack); - _DynamoDBStack.addDependency(_OsStack); - - const _EtlStack = new EtlStack(this, 'etl-stack', { - _domainEndpoint: _OsStack._domainEndpoint, - _embeddingEndpoint: _LLMStack._embeddingEndPoint ?? '', - _region: props.env?.region || 'us-east-1', - _subEmail: _SubEmail.valueAsString ?? '', - _vpc: _VpcStack._vpc, - _subnets: _VpcStack._privateSubnets, - _securityGroups: _VpcStack._securityGroup, - }); - _EtlStack.addDependency(_VpcStack); - _EtlStack.addDependency(_OsStack); - _EtlStack.addDependency(_LLMStack); - - const _ApiStack = new LLMApiStack(this, 'api-stack', { - _vpc:_VpcStack._vpc, - _securityGroup:_VpcStack._securityGroup, - _domainEndpoint:_OsStack._domainEndpoint, - _crossEndPoint: _LLMStack._crossEndPoint ?? '', - _embeddingEndPoint:_LLMStack._embeddingEndPoint || '', - _instructEndPoint:_LLMStack._instructEndPoint || '', - _chatSessionTable: _DynamoDBStack._chatSessionTable, - _sfnOutput: _EtlStack._sfnOutput, - env:process.env - }); - _ApiStack.addDependency(_VpcStack); - _ApiStack.addDependency(_OsStack); - _ApiStack.addDependency(_LLMStack); - _ApiStack.addDependency(_DynamoDBStack); - - new CfnOutput(this, 'VPC', {value:_VpcStack._vpc.vpcId}); - new CfnOutput(this, 'OpenSearch Endpoint', {value:_OsStack._domainEndpoint}); - new CfnOutput(this, 'Document Bucket', {value:_ApiStack._documentBucket}); - // deprecate for now since proxy in ec2 instance is not allowed according to policy - // new CfnOutput(this, 'OpenSearch Dashboard', {value:`${_Ec2Stack._publicIP}:8081/_dashboards`}); - new CfnOutput(this, 'API Endpoint Address', {value:_ApiStack._apiEndpoint}); - new CfnOutput(this, 'Glue Job Name', {value:_EtlStack._jobName}); - new CfnOutput(this, 'Cross Model Endpoint', {value:_LLMStack._crossEndPoint || 'No Cross Endpoint Created'}); - new CfnOutput(this, 'Embedding Model Endpoint', {value:_LLMStack._embeddingEndPoint || 'No Embedding Endpoint Created'}); - new CfnOutput(this, 'Instruct Model Endpoint', {value:_LLMStack._instructEndPoint || 'No Instruct Endpoint Created'}); - new CfnOutput(this, 'Processed Object Table', {value:_EtlStack._processedObjectsTable}); - } -} - -// for development, use account/region from cdk cli -const devEnv = { - account: process.env.CDK_DEFAULT_ACCOUNT, - region: process.env.CDK_DEFAULT_REGION, -}; - -const app = new App(); - -new RootStack(app, 'llm-bot-dev', { env: devEnv }); - -app.synth(); \ No newline at end of file diff --git a/src/models/cross/code/cross_model.tar.gz b/src/models/cross/code/cross_model.tar.gz deleted file mode 100644 index c4af2fdef9dbf8109ad7d0baf72ce35e256f38f2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1146 zcmV-=1cmz_iwFP*g5zWW1MODZZreBz?X$juP#!E5wj#%FyfD@UiUM6M&~^i~`(y+H zBaLJ>63LQOE)DYUJG|JIlXfq_b{7T02g{rpp5e@#A=!xv+DzU)`;H^e^V9j9*zaPI zyKjEt=XT5%iz%7T=ckL)MSg<+^XUv_&q)3~H?@Wssp9Bz>-O58HX`>)f@57vhR&huW7Go`}UoeeiWC$*%wd)qbRE2ij_cCejAye zCZn>i=rSJ>RtJR@WK0+Iin5kow2-6(S`B1W%6^naQZa6ldNi4Mh}E6{-lNWXsWCKK z6`l2(NF^jF!$7CYh~O~5wNQnmincAZxShSpI)0!nDrgiyeD0Py#G(}?H4HO-uyW*; z6;mT`jN!yy#;TYwJ}~wh`J0HW3V@+r8RNA`>i8|jh*J+?@$U}7?&H)g^FeP5f-VjU zl-W8LQUR74i&`azAY;;NC~B(B>J7ybr29MsBv;!PR;)_;vRIcI++EQLZ zg_hSv9uMq{!u~E4bizPUBHHSs>cOegY(v{R@q0RQMJHwhVSAX)U(SBt?>93d@h(#` zkSh4xGX=V66mlkc7-a@mv`q$?#29h(ID-v&;mG^=}G)%Y?G{8YoB=}a~h`OP? zLVh#bta|C2Ht@7*1!m>3qrfdLjMqIN)A*p)gGTT^vIb1CCs~qW%LK$=fQ<*8G`C5* zM-D-|ecO3O4Mp?;B-O&f}FT6CxdzcB*c1vr=5sjr#Ygp-U5N2_5feM1O$EMp{t+E{0+ zgU~~5j>s7(dkPaNL~D-#BnmQ}X;M<{L)xjL=qU6Twj&n01g(Q;=j9-!UaEm(0?_zT zK4NL&G~2D+!&9vT1ANGMV3Nygsv0YHmEHQSi9Js{lxxRZ%cEsb+TpXIx9P}2ufRSp zWAgmD$4J9Edm;QYjC7D1p-sq*J25a-6)P=Cg2`Sm%oSc)wmM1hwa$@k`jW{)ODGv_ z3umbv(GHFb-M6;bcd;%7&f?_(4Klpek~HIeC$a19hsTV+zEO#s-u0NrOT+!IL0#Fx z@0D^0gO(3*HD!K=R5#(0%iim4`qu58_5eX=I)+>9X>Dlu5_~+uo%w6`cv<9PJqQL+A zU}$-n6{QkdJA*R?HM@BkeGR2;yNROB@kL+PpeAn7B+sW4*AU15cQT*G(|8(B;|Cgl M0IpaO1ppEN0EVJ84*&oF diff --git a/src/models/cross/code/model.py b/src/models/cross/code/model.py deleted file mode 100644 index 7b713fc4..00000000 --- a/src/models/cross/code/model.py +++ /dev/null @@ -1,68 +0,0 @@ -from djl_python import Input, Output -import torch -import logging -import math -import os -from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, AutoModel - -device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') -print(f'--device={device}') - - -def load_model(properties): - tensor_parallel = properties["tensor_parallel_degree"] - model_location = properties['model_dir'] - if "model_id" in properties: - model_location = properties['model_id'] - logging.info(f"Loading model in {model_location}") - - tokenizer = AutoTokenizer.from_pretrained(model_location, use_fast=False) - model = AutoModel.from_pretrained( - model_location, - # device_map="balanced_low_0", - trust_remote_code=True - ).half() - # load the model on GPU - model.to(device) - model.requires_grad_(False) - model.eval() - - return model, tokenizer - - -model = None -tokenizer = None -generator = None - -def handle(inputs: Input): - global model, tokenizer - if not model: - model, tokenizer = load_model(inputs.get_properties()) - - if inputs.is_empty(): - return None - data = inputs.get_as_json() - - queries = data["inputs"] - docs = data["docs"] - - encoded_input = tokenizer(text = [queries], text_pair=[docs], padding=True, truncation=True, max_length=2048, return_tensors='pt')['input_ids'].to(device) - # Compute token embeddings - with torch.no_grad(): - model_output = model(input_ids=encoded_input) - - # Perform pooling. In this case, max pooling. - -# # preprocess -# input_ids = tokenizer(input_sentences, return_tensors="pt").input_ids -# # pass inputs with all kwargs in data -# if params is not None: -# outputs = model.generate(input_ids, **params) -# else: -# outputs = model.generate(input_ids) - -# # postprocess the prediction -# prediction = tokenizer.decode(outputs[0], skip_special_tokens=True) - - result = {"scores": model_output.cpu().numpy()} - return Output().add_as_json(result) \ No newline at end of file diff --git a/src/models/cross/code/serving.properties b/src/models/cross/code/serving.properties deleted file mode 100644 index d85a2fbe..00000000 --- a/src/models/cross/code/serving.properties +++ /dev/null @@ -1,5 +0,0 @@ -engine=Python -option.tensor_parallel_degree=1 -# update according to your own path -# option.s3url = s3://<_S3ModelAssets>/<_AssetsStack._crossModelPrefix> -option.s3url = s3://llm-rag/buffer-cross-001-model/ \ No newline at end of file diff --git a/src/models/cross/model/add_your_model_here.txt b/src/models/cross/model/add_your_model_here.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/src/models/cross/model/model.sh b/src/models/cross/model/model.sh deleted file mode 100755 index 270539ca..00000000 --- a/src/models/cross/model/model.sh +++ /dev/null @@ -1,57 +0,0 @@ -function usage { - echo "Make sure Python installed properly. Usage: $0 -t TOKEN [-m MODEL_NAME] [-c COMMIT_HASH] [-s S3_BUCKET_NAME]" - echo " -t TOKEN Hugging Face token (required)" - echo " -m MODEL_NAME Model name (default: csdc-atl/buffer-cross-001)" - echo " -c COMMIT_HASH Commit hash (default: 46d270928463db49b317e5ea469a8ac8152f4a13)" - echo " -s S3_BUCKET_NAME S3 bucket name to upload the model (default: llm-rag)" - exit 1 -} - -# Default values -model_name="csdc-atl/buffer-cross-001" -commit_hash="46d270928463db49b317e5ea469a8ac8152f4a13" -s3_bucket_name="llm-rag" # Default S3 bucket name - -# Parse command-line options -while getopts ":t:m:c:s:" opt; do - case $opt in - t) hf_token="$OPTARG" ;; - m) model_name="$OPTARG" ;; - c) commit_hash="$OPTARG" ;; - s) s3_bucket_name="$OPTARG" ;; - \?) echo "Invalid option: -$OPTARG" >&2; usage ;; - :) echo "Option -$OPTARG requires an argument." >&2; usage ;; - esac -done - -# Validate the hf_token and python interpreter exist -if [ -z "$hf_token" ] || ! command -v python &> /dev/null; then - usage -fi - -# Install necessary packages -pip install huggingface-hub -Uqq -pip install -U sagemaker - -# Define local model path -local_model_path="." - -# Uncomment the line below if you want to create a specific directory for the model -# mkdir -p $local_model_path - -# Download model snapshot in current folder without model prefix added -python -c "from huggingface_hub import snapshot_download; from pathlib import Path; snapshot_download(repo_id='$model_name', revision='$commit_hash', cache_dir=Path('.'), token='$hf_token')" - -# Find model snapshot path with the first search result -model_snapshot_path=$(find . -path '*/snapshots/*' -type d -print -quit) -echo "Model snapshot path: $model_snapshot_path" - -# s3:/// -aws s3 cp --recursive $model_snapshot_path s3://$s3_bucket_name/buffer-cross-001-model - -# Modify the content of serving.properties and re-tar the model -cd ../code -file_path="serving.properties" -sed -i "s|option.s3url = s3://[^/]*/buffer-cross-001-model/|option.s3url = s3://$s3_bucket_name/buffer-cross-001-model/|" $file_path -rm cross_model.tar.gz -tar czvf cross_model.tar.gz * diff --git a/src/models/embedding/code/model.py b/src/models/embedding/code/model.py deleted file mode 100644 index b3136d0b..00000000 --- a/src/models/embedding/code/model.py +++ /dev/null @@ -1,42 +0,0 @@ -from djl_python import Input, Output -import torch -import logging -import math -import os -from sentence_transformers import SentenceTransformer - -def load_model(properties): - tensor_parallel = properties["tensor_parallel_degree"] - model_location = properties['model_dir'] - if "model_id" in properties: - model_location = properties['model_id'] - logging.info(f"Loading model in {model_location}") - - model = SentenceTransformer(model_location) - model = model.eval().cuda() - - return model - -model = None - -def handle(inputs: Input): - global model - if not model: - model = load_model(inputs.get_properties()) - - if inputs.is_empty(): - return None - data = inputs.get_as_json() - - input_sentences = None - inputs = data["inputs"] - if isinstance(inputs, list): - input_sentences = inputs - else: - input_sentences = [inputs] - logging.info(f"inputs: {input_sentences}") - - sentence_embeddings = model.encode(input_sentences, normalize_embeddings=True) - - result = {"sentence_embeddings": sentence_embeddings} - return Output().add_as_json(result) diff --git a/src/models/embedding/code/requirements.txt b/src/models/embedding/code/requirements.txt deleted file mode 100644 index 9f6724c7..00000000 --- a/src/models/embedding/code/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -transformers==4.31.0 -accelerate==0.20.3 -sentence-transformers diff --git a/src/models/embedding/code/s2e_model.tar.gz b/src/models/embedding/code/s2e_model.tar.gz deleted file mode 100644 index 639cb2f7cd90ee5773971d70ac28a311da4d22b3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1264 zcmVtmcr#M} z8jp&$r1NY9!x)M(B|Jal<6x1+`^w!Cd z-z=FLd1Z7b@-kLNgz=uTH{>rOvN8aMe5H(6BCeu$=p(**2#bHe4fYTv?wEJFO9)=E zm7vt-IhP7B)tJ;W-Ub;X1+czy1&uI}7r43jq*_R}QBz4vYfP?LX%jqBY`HR{P0+%% zjtGygd?FfSvfdaHpW(crauHq7n(_il)V$8KsHeBzSht;|D_U2vH+<+ALd*(+c^La& z&VJw6n;H@OD-+U_D)`zm1v+0UBv|~GA}f>LK3vf{?u8wFY!&yL<4w4brCn4$2o8gO zcAQ`d92CWB=md_{4N#s-A+Y+EAy2}U_n1(F#I-@g2 zZVulvr41GuGzdjRwWUl7!40NM6RZo5Og6}#@V4W)`*XOFl$TKAjoX%`E!SULfIEmM z(%0^ z@K)nN33^oKPRF(W;BdTiYD_{8aV6{e4)=W^2)tjpZ_s)%l4kUX7(?<#Gzbg(8G6mk zDx{waSHu0lZP7iHN_=pzRHLyCy;*O$6P=$=mt%?_ zH>5gm8;SWG42GfeVWUJ2V1#rK`M|MKe>F_tBngM)Q3+e(3ir zKZ{|Gk?89p^r*={pZ_$dD@(IYqxkn`%WnP2{GZK^PP+MjnjM|u{C{$iO`qrg|6&2h zrVNWSH~kNUv}{lJkFkp>)/<_AssetsStack._embeddingModelPrefix> -option.s3url = s3://llm-rag/buffer-embedding-002-model/ \ No newline at end of file diff --git a/src/models/embedding/model/add_your_model_here.txt b/src/models/embedding/model/add_your_model_here.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/src/models/embedding/model/model.sh b/src/models/embedding/model/model.sh deleted file mode 100755 index 7694295f..00000000 --- a/src/models/embedding/model/model.sh +++ /dev/null @@ -1,57 +0,0 @@ -function usage { - echo "Make sure Python installed properly. Usage: $0 -t TOKEN [-m MODEL_NAME] [-c COMMIT_HASH]" - echo " -t TOKEN Hugging Face token (required)" - echo " -m MODEL_NAME Model name (default: csdc-atl/buffer-cross-001)" - echo " -c COMMIT_HASH Commit hash (default: 46d270928463db49b317e5ea469a8ac8152f4a13)" - echo " -s S3_BUCKET_NAME S3 bucket name to upload the model (default: llm-rag)" - exit 1 -} - -# Default values -model_name="BAAI/bge-large-zh-v1.5" -commit_hash="00f8ffc4928a685117583e2a38af8ebb65dcec2c" -s3_bucket_name="llm-rag" # Default S3 bucket name - -# Parse command-line options -while getopts ":t:m:c:s:" opt; do - case $opt in - t) hf_token="$OPTARG" ;; - m) model_name="$OPTARG" ;; - c) commit_hash="$OPTARG" ;; - s) s3_bucket_name="$OPTARG" ;; - \?) echo "Invalid option: -$OPTARG" >&2; usage ;; - :) echo "Option -$OPTARG requires an argument." >&2; usage ;; - esac -done - -# Validate the hf_token and python interpreter exist -if [ -z "$hf_token" ] || ! command -v python &> /dev/null; then - usage -fi - -# Install necessary packages -pip install huggingface-hub -Uqq -pip install -U sagemaker - -# Define local model path -local_model_path="." - -# Uncomment the line below if you want to create a specific directory for the model -# mkdir -p $local_model_path - -# Download model snapshot in current folder without model prefix added -python -c "from huggingface_hub import snapshot_download; from pathlib import Path; snapshot_download(repo_id='$model_name', revision='$commit_hash', cache_dir=Path('.'), token='$hf_token')" - -# Find model snapshot path with the first search result -model_snapshot_path=$(find . -path '*/snapshots/*' -type d -print -quit) -echo "Model snapshot path: $model_snapshot_path" - -# s3:/// -aws s3 cp --recursive $model_snapshot_path s3://$s3_bucket_name/buffer-embedding-002-model - -# Modify the content of serving.properties and re-tar the model -cd ../code -file_path="serving.properties" -sed -i "s|option.s3url = s3://[^/]*/buffer-embedding-002-model/|option.s3url = s3://$s3_bucket_name/buffer-embedding-002-model/|" $file_path -rm s2e_model.tar.gz -tar czvf s2e_model.tar.gz * diff --git a/src/models/instruct/code/model.py b/src/models/instruct/code/model.py deleted file mode 100644 index dfed4f07..00000000 --- a/src/models/instruct/code/model.py +++ /dev/null @@ -1,60 +0,0 @@ -from djl_python import Input, Output -import torch -import logging -import math -import os -import json -from transformers import AutoTokenizer, AutoModelForCausalLM - -def load_model(properties): - tensor_parallel = properties["tensor_parallel_degree"] - model_location = properties['model_dir'] - if "model_id" in properties: - model_location = properties['model_id'] - logging.info(f"Loading model in {model_location}") - - tokenizer = AutoTokenizer.from_pretrained(model_location, trust_remote_code=True) - model = AutoModelForCausalLM.from_pretrained(model_location, trust_remote_code=True) - model = model.eval().half().cuda() - - return model, tokenizer - - -model = None -tokenizer = None -generator = None - -def stream_items(input_sentences, history, params): - global model, tokenizer - res_generator = model.stream_chat(tokenizer, input_sentences, history=history, **params) - size = 0 - response = "" - for response in res_generator: - this_response = response[size:] - size = len(response) - stream_buffer = {"outputs":this_response, "finished": len(this_response)==0} - yield stream_buffer - -def handle(inputs: Input): - global model, tokenizer - if not model: - model, tokenizer = load_model(inputs.get_properties()) - - if inputs.is_empty(): - return None - data = inputs.get_as_json() - - input_sentences = data["inputs"] - params = data["parameters"] - history = data["history"] - stream = data.get('stream', False) - - outputs = Output() - if stream: - outputs.add_property("content-type", "application/jsonlines") - outputs.add_stream_content(stream_items(input_sentences, history, params)) - else: - response = model.chat(tokenizer, input_sentences, history=history, **params) - result = {"outputs": response} - outputs.add_as_json(result) - return outputs \ No newline at end of file diff --git a/src/models/instruct/code/model.tar.gz b/src/models/instruct/code/model.tar.gz deleted file mode 100644 index 0a5b03816fa836d6209ff7a0c235b3adb1ca3020..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1254 zcmVfPWgcuJ=i+~fB@0JFl`2(&775-+by|FD z`^eqyvAYK`QKS?oEh42aRT@YHs8VQEfrBbl1W1(+Q=em=@)bIFdzU-ADYR8WgyebI z+nt%;%+Ag1&7RE+Vrfq2#1T@VP?)Y%kUv(dg!B{Hs67x_Ja1ukA#|>*~qwJ=}t$s8AYb;FfP$& z)Ny4Vec?(NQ{kdyywQ#(7HhRk+KQ$&misIwQUR>wn2ILjHsK-yKI2Muk{)DhMBcVHj*NoeR{|rlYWdiJ>rRezbLm69~9TiIE)!$-|3)EY~@)nIw7x znDvX?CAl~rA>!zpI27Pu)W|h#Wpi2^TTK`mu7R_Ot$_uXQze>@Hwu{$p?uCLNhKo? zEVKwE97A443o3Qmm>L$zntl?Cno8y{b6OU=jIF<03g0$mQt?J!Z+3rXDa?tH!b@OFRYoBsMUc=o!ldRwmt zx0d@4Hg_Ly3^woVZ9kl)r$eooS-KEDF2d?$ygb-^*?;n?|6Mn!n@8T-quu8}40ay$ zwtjv4WB2V(tAMv~)g>aRxy-PeCEraD?yqio_aA#d-%~0z6l=^<@8xg(^}FD-!UpC@ z1VCn%?%sdlt^ac1W(PZ~0MOf7>b+h|ZhUWhd2jn$@9E9{qjeB?yS_Ad`qkd{-QFKh zyze(5dTN7S_g3%CGq1bst=tAHNnie4y7`}C)n9oL2EY3|SRd@Hfmi+K8wwokCXq9V z#DpW8&}@j9Lr{ak!bl(z`40sHB>~}$W3Bn03oNs728K_06h#}=M@k?+nu+yMlPX=U z5f9(xNb&zT64?>t0O%3f@YoX$qk`lU1*wk9MjLF)_&cd!_T}o~m`A5hsahX8OoeOt zCKu8y_YERTG&YR=14v=!j=NhK(_s@QS6vf6;Elk@T@ zY`mxchZ{gO?*FTP|36hK6vzGlhoEGSUawcQa#1U!aHByizp*0qdO<4{wDNJgIi@2b zLHITQLJ~Kxhi~J3^em=PUrF^y?V QWBMrQFHv4Yod6gB08?gLqyPW_ diff --git a/src/models/instruct/code/requirements.txt b/src/models/instruct/code/requirements.txt deleted file mode 100644 index 2b02551a..00000000 --- a/src/models/instruct/code/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -transformers==4.33.1 -accelerate==0.20.3 -transformers_stream_generator \ No newline at end of file diff --git a/src/models/instruct/code/serving.properties b/src/models/instruct/code/serving.properties deleted file mode 100644 index b9e85357..00000000 --- a/src/models/instruct/code/serving.properties +++ /dev/null @@ -1,6 +0,0 @@ -engine=Python -option.tensor_parallel_degree=1 -option.enable_streaming=True -# update according to your own path -# option.s3url = s3://<_S3ModelAssets>/<_AssetsStack._instructModelPrefix> -option.s3url = s3://llm-rag/buffer-instruct-003-model/ \ No newline at end of file diff --git a/src/models/instruct/model/add_your_model_here.txt b/src/models/instruct/model/add_your_model_here.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/src/models/instruct/model/model.sh b/src/models/instruct/model/model.sh deleted file mode 100755 index 48a6360f..00000000 --- a/src/models/instruct/model/model.sh +++ /dev/null @@ -1,57 +0,0 @@ -function usage { - echo "Make sure Python installed properly. Usage: $0 -t TOKEN [-m MODEL_NAME] [-c COMMIT_HASH]" - echo " -t TOKEN Hugging Face token (required)" - echo " -m MODEL_NAME Model name (default: csdc-atl/buffer-cross-001)" - echo " -c COMMIT_HASH Commit hash (default: 46d270928463db49b317e5ea469a8ac8152f4a13)" - echo " -s S3_BUCKET_NAME S3 bucket name to upload the model (default: llm-rag)" - exit 1 -} - -# Default values -model_name="csdc-atl/buffer-instruct-InternLM-001" -commit_hash="b43935b91d90df80bccdab4c2e8a5d6315f5278b" -s3_bucket_name="llm-rag" # Default S3 bucket name - -# Parse command-line options -while getopts ":t:m:c:s:" opt; do - case $opt in - t) hf_token="$OPTARG" ;; - m) model_name="$OPTARG" ;; - c) commit_hash="$OPTARG" ;; - s) s3_bucket_name="$OPTARG" ;; - \?) echo "Invalid option: -$OPTARG" >&2; usage ;; - :) echo "Option -$OPTARG requires an argument." >&2; usage ;; - esac -done - -# Validate the hf_token and python interpreter exist -if [ -z "$hf_token" ] || ! command -v python &> /dev/null; then - usage -fi - -# Install necessary packages -pip install huggingface-hub -Uqq -pip install -U sagemaker - -# Define local model path -local_model_path="." - -# Uncomment the line below if you want to create a specific directory for the model -# mkdir -p $local_model_path - -# Download model snapshot in current folder without model prefix added -python -c "from huggingface_hub import snapshot_download; from pathlib import Path; snapshot_download(repo_id='$model_name', revision='$commit_hash', cache_dir=Path('.'), token='$hf_token')" - -# Find model snapshot path with the first search result -model_snapshot_path=$(find . -path '*/snapshots/*' -type d -print -quit) -echo "Model snapshot path: $model_snapshot_path" - -# s3:/// -aws s3 cp --recursive $model_snapshot_path s3://$s3_bucket_name/buffer-instruct-003-model - -# Modify the content of serving.properties and re-tar the model -cd ../code -file_path="serving.properties" -sed -i "s|option.s3url = s3://[^/]*/buffer-instruct-003-model/|option.s3url = s3://$s3_bucket_name/buffer-instruct-003-model/|" $file_path -rm model.tar.gz -tar czvf model.tar.gz * diff --git a/src/os-stack.ts b/src/os-stack.ts deleted file mode 100644 index 90c7761f..00000000 --- a/src/os-stack.ts +++ /dev/null @@ -1,47 +0,0 @@ -import { NestedStack, StackProps, RemovalPolicy } from 'aws-cdk-lib'; -import { Construct } from 'constructs'; -import { EngineVersion, Domain} from 'aws-cdk-lib/aws-opensearchservice'; -import * as ec2 from 'aws-cdk-lib/aws-ec2'; -import * as iam from "aws-cdk-lib/aws-iam"; - -interface osStackProps extends StackProps { - _vpc: ec2.Vpc; - _securityGroup: ec2.SecurityGroup; -} - -export class OpenSearchStack extends NestedStack { - _domainEndpoint; - _domain; - - constructor(scope: Construct, id: string, props: osStackProps) { - super(scope, id, props); - - const devDomain = new Domain(this, 'Domain', { - version: EngineVersion.OPENSEARCH_2_5, - removalPolicy: RemovalPolicy.DESTROY, - vpc:props._vpc, - zoneAwareness: { - enabled:true - }, - securityGroups: [props._securityGroup], - capacity: { - dataNodes: 2, - }, - ebs: { - volumeSize: 300, - volumeType: ec2.EbsDeviceVolumeType.GENERAL_PURPOSE_SSD_GP3, - }, - }); - - devDomain.addAccessPolicies(new iam.PolicyStatement({ - actions: ['es:*'], - effect: iam.Effect.ALLOW, - principals:[new iam.AnyPrincipal()], - resources: [`${devDomain.domainArn}/*`], - })) - - this._domainEndpoint = devDomain.domainEndpoint; - this._domain = devDomain; - - } -} \ No newline at end of file diff --git a/src/panel/.env_sample b/src/panel/.env_sample deleted file mode 100644 index 34c96a45..00000000 --- a/src/panel/.env_sample +++ /dev/null @@ -1,3 +0,0 @@ -PIPELINE_URL='Check from CDK output' -S3_BUCKET_NAME='Check from CDK output' -GLUE_JOB_NAME='Check from CDK output' diff --git a/src/panel/app.py b/src/panel/app.py deleted file mode 100644 index 31af707b..00000000 --- a/src/panel/app.py +++ /dev/null @@ -1,202 +0,0 @@ -from dotenv import load_dotenv -import os -from PyPDF2 import PdfReader -import streamlit as st -from langchain.text_splitter import CharacterTextSplitter -from langchain.embeddings.openai import OpenAIEmbeddings -from langchain.vectorstores import FAISS -from langchain.chains.question_answering import load_qa_chain -from langchain.llms import OpenAI -from langchain.callbacks import get_openai_callback -import boto3 -import requests -import json -import time -# Load environment variables -load_dotenv() - -# Create an S3 client -s3 = boto3.client('s3') -glue = boto3.client('glue') -# load the job name from environment variable and convert it to string like 'PythonShellJobB6964098-YYlLj16uCsAn' -glue_job_name = str(os.getenv('GLUE_JOB_NAME')) - -def process_text(text): - # Split the text into chunks using langchain - text_splitter = CharacterTextSplitter( - separator="\n", - chunk_size=1000, - chunk_overlap=200, - length_function=len - ) - chunks = text_splitter.split_text(text) - - # Convert the chunks of text into embeddings to form a knowledge base, should aware of the Rate limit. - # Prompt like "reached for text-embedding-ada-002 in organization org-xx on tokens per min. Limit: 150000 / min. Current: 1 / min.≈ - embeddings = OpenAIEmbeddings() - knowledgeBase = FAISS.from_texts(chunks, embeddings) - - return knowledgeBase - -def pipeline_tab(): - st.title("LLM Bot ETL Pipeline") - # text box to allow user input the url address of the pipeline with default value - pipeline_url = st.text_input('Pipeline URL', value=os.getenv('PIPELINE_URL')) - - col1, col2 = st.columns(2) - with col1: - # sub panel to upload pdf and trigger the pipeline - st.subheader('Online ETL Job') - # adjust the width of the file uploader and hint text - pdf = st.file_uploader('Upload your Document', type='pdf') - - if pdf is not None: - # upload the pdf onto s3 bucket created in CDK stack with fixed prefix 'documents' , and trigger the pipeline - s3.upload_fileobj(pdf, os.getenv('S3_BUCKET'), 'documents/' + pdf.name) - - # add hint text to tell user that the online ETL job will be triggered automatically after the pdf is uploaded - st.markdown('**Note:** The online ETL job will be triggered automatically after the pdf is uploaded.') - - with col2: - # sub panel to operate and monitor the offline ETL job running on AWS Glue - # input box to allow user input request body and specify endpoint url and button to trigger the request sending to the endpoint - st.subheader('Offline ETL Job') - - # dropdown to list all available s3 bucket and allow user to select one for further operation - s3_buckets = s3.list_buckets() - s3_bucket_names = [bucket['Name'] for bucket in s3_buckets['Buckets']] - s3_bucket_name = st.selectbox('Select S3 Bucket', s3_bucket_names) - - # dropdown to list all subfolders under the selected s3 bucket and allow user to select one for further operation - s3_objects = s3.list_objects(Bucket=s3_bucket_name) - s3_object_names = [obj['Key'] for obj in s3_objects['Contents']] - s3_object_name = st.selectbox('Select S3 Object', s3_object_names) - - # simple checkboxed to allow user select options to trigger the pipeline - col3, col4 = st.columns(2) - with col3: - documentEnhance = st.checkbox('Doc Enhance') - qaPairEnhance = st.checkbox('QA Pair Enhance') - with col4: - keyWordExtract = st.checkbox('Key Word Extract') - textSummarize = st.checkbox('Text Summarize') - - # request body to be sent to the endpoint - request_body = { - 's3Bucket': s3_bucket_name, - 's3Prefix': s3_object_name, - 'documentEnhance': documentEnhance, - 'qaEnhance': str(qaPairEnhance), - 'keyWordExtract': keyWordExtract, - 'textSummarize': textSummarize, - 'offline': True - } - # send button to trigger the request sending to the endpoint with s3_bucket_name and s3_object_name as request body, in conform with - send_button = st.button('Start Offline Job') - if send_button: - response = requests.post(pipeline_url + '/etl', json=request_body, headers={'Content-Type': 'application/json'}) - st.text_area('Response:', value=response.text, height=200, max_chars=None) - - # progress bar to show the offline ETL job running status - st.subheader('Online & Offline ETL Job Status') - refresh_button = st.button('Refresh') - if refresh_button: - # list all job running with a specific job name - job_runs = glue.get_job_runs(JobName=glue_job_name, MaxResults=1) - # get the latest job run id - job_run_id = job_runs['JobRuns'][0]['Id'] - # get the latest job run status - job_status = glue.get_job_run(JobName=glue_job_name, RunId=job_run_id)['JobRun']['JobRunState'] - # output the job status details with slim height - st.text_area('Job Status:', value=json.dumps(job_status, indent=4), height=100, max_chars=None) - - # sub pannel to query and search the embedding in AOS - st.subheader('Query and Search AOS') - query = st.text_input('Input your query body here', value='{"aos_index": "chatbot-index", "query": {"operation": "match_all", "match_all": {}}}') - # send button to trigger the request sending to the endpoint with query as request body - - request_body = { - 'aos_index': 'chatbot-index', - 'operation': 'match_all', - 'body': '' - } - send_button = st.button('Send') - if send_button: - response = requests.get(pipeline_url + '/embedding', json=request_body, headers={'Content-Type': 'application/json'}) - st.text_area('Response:', value=response.text, height=200, max_chars=None) - -def llm_bot_tab(): - # user input box to allow user input question - st.title("LLM Bot") - query = st.text_input('Ask a question to the PDF') - # cancel button to allow user to cancel the question - cancel_button = st.button('Cancel') - if cancel_button: - st.stop() - # send button to trigger the request sending to the endpoint with query as request body - send_button = st.button('Send') - if send_button: - # request body to be sent to the endpoint - request_body = { - "model": "knowledge_qa", - "messages": [ - { - "role": "user", - "content": query - } - ], - "temperature": 0.7 - } - response = requests.post(os.getenv('PIPELINE_URL') + '/llm', json=request_body, headers={'Content-Type': 'application/json'}) - try: - data_dict = json.loads(response.text) - content = data_dict["choices"][0]["message"]["content"] - st.text_area('Response:', value=content.encode('utf-8').decode('utf-8'), height=200, max_chars=None) - except json.JSONDecodeError as e: - st.error(f"Failed to parse response as JSON: {e}") - st.text(response.text) - # data_dict = response.text.json() - # content = data_dict["choices"][0]["message"]["content"] - # st.text_area('Response:', value=content.encode('utf-8').decode('unicode_escape'), height=200, max_chars=None) - -def main(): - # Create a tab bar - st.sidebar.title("LLM Bot") - tabs = ["ETL Pipeline", "LLM Bot"] - page = st.sidebar.radio("Select a tab", tabs) - if page == "ETL Pipeline": - pipeline_tab() - elif page == "LLM Bot": - llm_bot_tab() - - # using libary and OpenAI for local testing, comment for now - - # if pdf is not None: - # pdf_reader = PdfReader(pdf) - # # Text variable will store the pdf text - # text = "" - # for page in pdf_reader.pages: - # text += page.extract_text() - - # # Create the knowledge base object - # knowledgeBase = process_text(text) - - # query = st.text_input('Ask a question to the PDF') - # cancel_button = st.button('Cancel') - - # if cancel_button: - # st.stop() - - # if query: - # docs = knowledgeBase.similarity_search(query) - # llm = OpenAI() - # chain = load_qa_chain(llm, chain_type='stuff') - - # with get_openai_callback() as cost: - # response = chain.run(input_documents=docs, question=query) - # print(cost) - - # st.write(response) - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/src/panel/requirements.txt b/src/panel/requirements.txt deleted file mode 100644 index 04af6ac9..00000000 --- a/src/panel/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -python-dotenv -PyPDF2 -streamlit -langchain -openai -tiktoken \ No newline at end of file diff --git a/src/sample/dth.txt b/src/sample/dth.txt deleted file mode 100644 index 1c73643b..00000000 --- a/src/sample/dth.txt +++ /dev/null @@ -1,18 +0,0 @@ -Data Transfer Hub (数据传输解决方案) - -轻松将数据移入和移出 AWS 中国区域 - -概览 - -此解决方案可为 Amazon Simple Storage Service (Amazon S3) 对象和 Amazon Elastic Container Registry (Amazon ECR) 映像提供安全、可扩展且可追踪的数据传输。使用数据传输解决方案,您可以执行以下任何任务:在 AWS S3 之间传输对象 - -优势 - -直观的用户界面 -客户可在用户界面上为 Amazon S3 对象和 Amazon ECR 映像创建和管理数据传输任务。 - -支持各类源 -将数据从其他云服务商的对象存储服务(包括阿里云 OSS,腾讯 COS,七牛 Kodo 以及其他兼容 Amazon S3 的云存储服务)传输到 Amazon S3。在 Amazon ECR 之间传输容器镜像。将容器镜像从公共容器镜像仓库(例如 Docker Hub、Google gcr.io 和 Red Hat Quay.io)传输到 Amazon ECR。 - -无服务器架构 -传输任务可按需使用并随用随付。有关更多信息,请参阅实施指南的“成本”部分。 \ No newline at end of file diff --git a/src/sample/embedding_wrapper.py b/src/sample/embedding_wrapper.py deleted file mode 100644 index f0eaf2cf..00000000 --- a/src/sample/embedding_wrapper.py +++ /dev/null @@ -1,277 +0,0 @@ -import asyncio -import json -import os -from functools import partial -from typing import Any, Dict, List, Optional - -from langchain.embeddings.base import Embeddings -# from langchain.pydantic_v1 import BaseModel, Extra, root_validator - -import os -import time -import logging -import boto3 -import tempfile -import numpy as np - -from langchain.vectorstores import OpenSearchVectorSearch -from langchain.document_loaders.unstructured import UnstructuredFileLoader -from langchain.text_splitter import RecursiveCharacterTextSplitter - -from opensearchpy import RequestsHttpConnection -from sagemaker_utils import create_sagemaker_embeddings_from_js_model, SagemakerEndpointVectorOrCross -from requests_aws4auth import AWS4Auth - -s3 = boto3.resource('s3') -aws_region = boto3.Session().region_name -credentials = boto3.Session().get_credentials() -awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, aws_region, 'es', session_token=credentials.token) - -logger = logging.getLogger() -logger.setLevel(logging.INFO) - -MAX_FILE_SIZE = 1024*1024*100 # 100MB -MAX_OS_DOCS_PER_PUT = 500 -CHUNK_SIZE_FOR_DOC_SPLIT = 600 -CHUNK_OVERLAP_FOR_DOC_SPLIT = 20 - -class CSDCEmbeddings: - """CSDC embedding models. - - To authenticate, the AWS client uses the following methods to - automatically load credentials: - https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html - - If a specific credential profile should be used, you must pass - the name of the profile from the ~/.aws/credentials file that is to be used. - - Make sure the credentials / roles used have the required policies to - access the CSDC service. - """ - - """ - Example: - .. code-block:: python - from llm_utils import CSDCEmbeddings - - embeddings = CSDCEmbeddings(region = 'us-east-1', aosEndpointName = 'Amazon OpenSearch Service Domain Endpoint') - doc_reult = embeddings.embed_documents(bucketName=, prefix=) - logging.info(f"doc_reult is {doc_reult}, the type of doc_reult is {type(doc_reult)}") - """ - - client: Any #: :meta private: - """CSDC client.""" - region_name: Optional[str] = None - """The aws region e.g., `us-west-2`. Fallsback to AWS_DEFAULT_REGION env variable - or region specified in ~/.aws/config in case it is not provided here. - """ - - credentials_profile_name: Optional[str] = None - """The name of the profile in the ~/.aws/credentials or ~/.aws/config files, which - has either access keys or role information specified. - If not specified, the default credential profile or, if on an EC2 instance, - credentials from IMDS will be used. - See: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html - """ - - model_id: str = "csdc-default-model" - """Id of the model to call, e.g., csdc-default-model, this is - equivalent to the modelId property in the list-foundation-models api""" - - model_kwargs: Optional[Dict] = None - """Key word arguments to pass to the model.""" - - endpoint_url: Optional[str] = None - """Needed if you don't want to default to us-east-1 endpoint""" - - class Config: - """Configuration for this pydantic object.""" - - # extra = Extra.forbid - - # @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate that AWS credentials to and python package exists in environment.""" - - if values["client"] is not None: - return values - - try: - import boto3 - - if values["credentials_profile_name"] is not None: - session = boto3.Session(profile_name=values["credentials_profile_name"]) - else: - # use default credentials - session = boto3.Session() - - client_params = {} - if values["region_name"]: - client_params["region_name"] = values["region_name"] - - if values["endpoint_url"]: - client_params["endpoint_url"] = values["endpoint_url"] - - values["client"] = session.client("sagemaker-runtime", **client_params) - - except ImportError: - raise ModuleNotFoundError( - "Could not import boto3 python package. " - "Please install it with `pip install boto3`." - ) - except Exception as e: - raise ValueError( - "Could not load credentials to authenticate with AWS client. " - "Please check that credentials in the specified " - "profile name are valid." - ) from e - - return values - - def _embedding_func_legacy(self, text: str) -> List[float]: - """Call out to CSDC embedding endpoint.""" - # replace newlines, which can negatively affect performance. - text = text.replace(os.linesep, " ") - _model_kwargs = self.model_kwargs or {} - content_type = "application/json" - - input_body = {"inputs": text, **_model_kwargs} - body = json.dumps(input_body) - - try: - response = self.client.invoke_endpoint( - EndpointName = self.endpoint_url, Body=body, ContentType=content_type - ) - response_body = json.loads(response['Body'].read().decode("utf-8")) - return response_body.get('sentence_embeddings') - except Exception as e: - raise ValueError(f"Error raised by inference endpoint: {e}") - - def embed_documents_legacy(self, texts: List[str]) -> List[List[float]]: - """Compute doc embeddings using a CSDC model. - - Args: - texts: The list of texts to embed - - Returns: - List of embeddings, one for each text. - """ - results = [] - for text in texts: - response = self._embedding_func_legacy(text) - results.append(response) - return results - - async def aembed_query(self, text: str) -> List[float]: - """Asynchronous compute query embeddings using a CSDC model. - - Args: - text: The text to embed. - - Returns: - Embeddings for the text. - """ - - return await asyncio.get_running_loop().run_in_executor( - None, partial(self.embed_query, text) - ) - - async def aembed_documents(self, texts: List[str]) -> List[List[float]]: - """Asynchronous compute doc embeddings using a CSDC model. - - Args: - texts: The list of texts to embed - - Returns: - List of embeddings, one for each text. - """ - - result = await asyncio.gather(*[self.aembed_query(text) for text in texts]) - - return list(result) - - def _construct_shard(self, bucketName: str, prefix: str, embeddingEndpointName: str) -> str: - """Inner helper function to construct a shard of documents. - - Args: - bucketName (str): - prefix (str): _description_ - embeddingEndpointName (str): _description_ - - Returns: - str: _description_ - """ - docs = [] - document_bucket = s3.Bucket(bucketName) - for obj in document_bucket.objects.filter(Prefix=prefix): - if obj.key.endswith("/"): - continue - else: - with tempfile.TemporaryDirectory(dir='/tmp') as temp_dir: - file_path = f"{temp_dir}/{obj.key}" - logging.info(f"bucketName={bucketName}, obj.key={obj.key}, file_path={file_path}") - os.makedirs(os.path.dirname(file_path), exist_ok=True) - s3.meta.client.download_file(bucketName, obj.key, file_path) - - loader = UnstructuredFileLoader(file_path) - docs.extend(loader.load()) - - # add a custom metadata field, timestamp and embeddings_model - for doc in docs: - doc.metadata['timestamp'] = time.time() - doc.metadata['embeddings_model'] = embeddingEndpointName - - text_splitter = RecursiveCharacterTextSplitter( - chunk_size = CHUNK_SIZE_FOR_DOC_SPLIT, - chunk_overlap = CHUNK_OVERLAP_FOR_DOC_SPLIT, - length_function = len, - ) - - chunks = text_splitter.create_documents([doc.page_content for doc in docs], metadatas=[doc.metadata for doc in docs]) - - db_shards = (len(chunks) // MAX_OS_DOCS_PER_PUT) + 1 - shards = np.array_split(chunks, db_shards) - return shards[0].tolist() - - def _embedding_func(self, shard) -> List[float]: - """Call out to CSDC embedding endpoint. - Args: - shard (_type_): must be a list of documents, sample format as follows: - [ - Document( - page_content='Data Transfer Hub (数据传输解决方案)\n\n轻松将数据移入和移出 AWS 中国区域\n\n概览\n\n此解决方案可为 Amazon Simple Storage Service (Amazon S3) 对象和 Amazon Elastic Container Registry (Amazon ECR) 映像提供安全、可扩展且可追踪的数据传输。使用数据传输解决方案,您可以执行以下任何任务:在 AWS S3 之间传输对象\n\n优势\n\n直观的用户界面 客户可在用户界面上为 Amazon S3 对象和 Amazon ECR 映像创建和管理数据传输任务。\n\n支持各类源 将数据从其他云服务商的对象存储服务(包括阿里云 OSS,腾讯 COS,七牛 Kodo 以及其他兼容 Amazon S3 的云存储服务)传输到 Amazon S3。在 Amazon ECR 之间传输容器镜像。将容器镜像从公共容器镜像仓库(例如 Docker Hub、Google gcr.io 和 Red Hat Quay.io)传输到 Amazon ECR。\n\n无服务器架构\n\n传输任务可按需使用并随用随付。有关更多信息,请参阅实施指南的“成本”部分。', - metadata={ - 'source': '/tmp/tmpmmod0k9m/csdc/dth.txt', - 'timestamp': 1693494146.1509278, - 'embeddings_model': 'embedding-endpoint' - }) - ] - Returns: - List[float]: embeddings for the documents. - """ - embeddings = create_sagemaker_embeddings_from_js_model(self.endpoint_url, self.region) - return embeddings - - def embed_documents(self, bucketName: str, prefix: str) -> List[List[float]]: - """Compute doc embeddings using a CSDC model. - Args: - bucketName (str): The name of the bucket to embed - prefix (str): The prefix of the bucket to embed - Returns: - List of embeddings, one for each text. - """ - shard = self._construct_shard(bucketName, prefix, self.endpoint_url) - embeddings = self._embedding_func(shard) - return embeddings.embed_documents([str(shard[0])]) - - def embed_query(self, text: str) -> List[float]: - """Compute query embeddings using a CSDC model. - - Args: - text: The text to embed. - - Returns: - Embeddings for the text. - """ - embeddings = self._embedding_func(text) - return embeddings.embed_documents([text]) diff --git a/src/sample/langchain_sample/csdc_llm.py b/src/sample/langchain_sample/csdc_llm.py deleted file mode 100644 index 1a223bb4..00000000 --- a/src/sample/langchain_sample/csdc_llm.py +++ /dev/null @@ -1,239 +0,0 @@ -import json -from abc import ABC -from typing import Any, Dict, List, Mapping, Optional - -from langchain.callbacks.manager import CallbackManagerForLLMRun -from langchain.llms.base import LLM -from langchain.llms.utils import enforce_stop_tokens -from langchain.pydantic_v1 import BaseModel, Extra, root_validator - - -class LLMInputOutputAdapter: - """Adapter class to prepare the inputs from Langchain to a format - that LLM model expects. - - It also provides helper function to extract - the generated text from the model response.""" - - @classmethod - def prepare_input( - cls, provider: str, prompt: str, streaming: bool, model_kwargs: Dict[str, Any] - ) -> Dict[str, Any]: - input_body = {**model_kwargs} - if provider == "CSDC": - input_body = dict() - input_body["inputs"] = prompt - input_body["history"] = [] - input_body["parameters"] = {**model_kwargs} - input_body["stream"] = streaming - else: - input_body["inputText"] = prompt - - return input_body - - @classmethod - def prepare_output(cls, provider: str, response: Any) -> str: - if provider == "CSDC": - response_body = json.loads(response['Body'].read().decode("utf-8")) - return response_body.get('outputs') - else: - response_body = json.loads(response.get("body").read()) - -class CSDCLLMBase(BaseModel, ABC): - client: Any #: :meta private: - - region_name: Optional[str] = None - """The aws region e.g., `us-west-2`. Fallsback to AWS_DEFAULT_REGION env variable - or region specified in ~/.aws/config in case it is not provided here. - """ - - credentials_profile_name: Optional[str] = None - """The name of the profile in the ~/.aws/credentials or ~/.aws/config files, which - has either access keys or role information specified. - If not specified, the default credential profile or, if on an EC2 instance, - credentials from IMDS will be used. - See: https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html - """ - - model_id: str - """Id of the model to call, e.g., amazon.titan-tg1-large, this is - equivalent to the modelId property in the list-foundation-models api""" - - model_endpoint: str - """SageMaker Endpoint of the model to call, e.g. instruct-endpoint""" - - model_provider: Optional[str] = "CSDC" - """This model is provided by CSDC""" - - model_kwargs: Optional[Dict] = None - """Key word arguments to pass to the model.""" - - endpoint_url: Optional[str] = None - """Needed if you don't want to default to us-east-1 endpoint""" - - streaming: bool = False - """Whether to stream the results.""" - - @root_validator() - def validate_environment(cls, values: Dict) -> Dict: - """Validate that AWS credentials to and python package exists in environment.""" - - # Skip creating new client if passed in constructor - if values["client"] is not None: - return values - - try: - import boto3 - - if values["credentials_profile_name"] is not None: - session = boto3.Session(profile_name=values["credentials_profile_name"]) - else: - # use default credentials - session = boto3.Session() - - client_params = {} - if values["region_name"]: - client_params["region_name"] = values["region_name"] - if values["endpoint_url"]: - client_params["endpoint_url"] = values["endpoint_url"] - - values["client"] = session.client("sagemaker-runtime", **client_params) - - except ImportError: - raise ModuleNotFoundError( - "Could not import boto3 python package. " - "Please install it with `pip install boto3`." - ) - except Exception as e: - raise ValueError( - "Could not load credentials to authenticate with AWS client. " - "Please check that credentials in the specified " - "profile name are valid." - ) from e - - return values - - @property - def _identifying_params(self) -> Mapping[str, Any]: - """Get the identifying parameters.""" - _model_kwargs = self.model_kwargs or {} - return { - **{"model_kwargs": _model_kwargs}, - } - - def _get_provider(self) -> str: - return self.model_provider if self.model_provider else self.model_id.split(".")[0] - - def _get_streaming(self) -> bool: - return self.streaming - - def _prepare_input_and_invoke( - self, - prompt: str, - stop: Optional[List[str]] = None, - run_manager: Optional[CallbackManagerForLLMRun] = None, - **kwargs: Any, - ) -> str: - _model_kwargs = self.model_kwargs or {} - - provider = self._get_provider() - streaming = self._get_streaming() - params = {**_model_kwargs, **kwargs} - input_body = LLMInputOutputAdapter.prepare_input(provider, prompt, streaming, params) - body = json.dumps(input_body).encode('utf-8') - accept = "application/json" - contentType = "application/json" - endpoint_name = self.model_endpoint - - try: - response = self.client.invoke_endpoint( - EndpointName = endpoint_name, Body=body, ContentType=contentType - ) - text = LLMInputOutputAdapter.prepare_output(provider, response) - - except Exception as e: - raise ValueError(f"Error raised by invoking CSDC LLM: {e}") - - if stop is not None: - text = enforce_stop_tokens(text, stop) - - return text - - def _prepare_input_and_invoke_stream( - self, - prompt: str, - stop: Optional[List[str]] = None, - run_manager: Optional[CallbackManagerForLLMRun] = None, - **kwargs: Any, - ) -> str: - _model_kwargs = self.model_kwargs or {} - - provider = self._get_provider() - streaming = self._get_streaming() - params = {**_model_kwargs, **kwargs} - input_body = LLMInputOutputAdapter.prepare_input(provider, prompt, streaming, params) - body = json.dumps(input_body).encode('utf-8') - accept = "application/json" - contentType = "application/json" - endpoint_name = self.model_endpoint - - try: - resp = self.client.invoke_endpoint_with_response_stream( - EndpointName=endpoint_name, - Body=body, - ContentType=contentType - ) - return resp - except Exception as e: - raise ValueError(f"Error raised by streaming inference endpoint: {e}") - - -class CSDCLLM(LLM, CSDCLLMBase): - """CSDC LLM base model. - - To authenticate, the AWS client uses the following methods to - automatically load credentials: - https://boto3.amazonaws.com/v1/documentation/api/latest/guide/credentials.html - - If a specific credential profile should be used, you must pass - the name of the profile from the ~/.aws/credentials file that is to be used. - - Make sure the credentials / roles used have the required policies to - access the SageMaker service. - """ - - @property - def _llm_type(self) -> str: - """Return type of llm.""" - return "aws_csdc_llm" - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - def _call( - self, - prompt: str, - stop: Optional[List[str]] = None, - run_manager: Optional[CallbackManagerForLLMRun] = None, - **kwargs: Any, - ) -> str: - """Call out to CSDC LLM model in SageMaker Endpoint. - - Args: - prompt: The prompt to pass into the model. - stop: Optional list of stop words to use when generating. - - Returns: - The string generated by the model. - - Example: - .. code-block:: python - - response = se("Tell me a joke.") - """ - - text = self._prepare_input_and_invoke(prompt=prompt, stop=stop, **kwargs) - - return text \ No newline at end of file diff --git a/src/sample/langchain_sample/llm_wrapper.py b/src/sample/langchain_sample/llm_wrapper.py deleted file mode 100644 index 62c370ce..00000000 --- a/src/sample/langchain_sample/llm_wrapper.py +++ /dev/null @@ -1,262 +0,0 @@ -import io -import json - -from typing import Any, AsyncIterator, Dict, Iterator, List, Optional - -from langchain.callbacks.manager import ( - AsyncCallbackManagerForLLMRun, - CallbackManagerForLLMRun, -) -from langchain.schema.messages import ( - AIMessage, - AIMessageChunk, - BaseMessage, - ChatMessage, - HumanMessage, - SystemMessage, -) - -from langchain.chat_models.base import BaseChatModel -from csdc_llm import CSDCLLMBase -from langchain.pydantic_v1 import Extra -from langchain.schema.messages import AIMessage, BaseMessage -from langchain.schema.output import ChatGeneration, ChatGenerationChunk, ChatResult - -from langchain.llms.utils import enforce_stop_tokens - - -class ChatPromptAdapter: - """Adapter class to prepare the inputs from Langchain to prompt format - that Chat model expects. - """ - def _convert_one_message_to_text( - self, - message: BaseMessage, - human_prompt: str, - ai_prompt: str, - system_prompt: str, - ) -> str: - if isinstance(message, ChatMessage): - message_text = f"\n\n{message.role.capitalize()}: {message.content}" - elif isinstance(message, HumanMessage): - message_text = f"{human_prompt} {message.content}" - elif isinstance(message, AIMessage): - message_text = f"{ai_prompt} {message.content}" - elif isinstance(message, SystemMessage): - context = ''.join(message.content.split('\n----------------\n')[1:]) - message_text = system_prompt.format(context = context) - else: - raise ValueError(f"Got unknown type {message}") - return message_text - - def convert_messages_to_prompt_CSDC( - self, - messages: List[BaseMessage], - *, - human_prompt: str = "<|User|>:", - ai_prompt: str = "<|Bot|>:", - system_prompt: str = "<|System|>:", - ) -> str: - """Format a list of messages into a full prompt for the CSDC Chat model - Args: - messages (List[BaseMessage]): List of BaseMessage to combine. - human_prompt (str, optional): Human prompt tag. Defaults to "<|User|>:". - ai_prompt (str, optional): AI prompt tag. Defaults to "<|Bot|>:". - system_prompt (str, optional): System prompt tag. Defaults to "<|System|>:". - Returns: - str: Combined string with necessary human_prompt and ai_prompt tags. - """ - - messages = messages.copy() # don't mutate the original list - system_prompt = '以下context xml tag内的文本内容为背景知识:\n\n{context}\n\n请根据背景知识, 回答这个问题:' - - text = "".join( - self._convert_one_message_to_text(message, human_prompt = '', ai_prompt = '', system_prompt = system_prompt) - for message in messages - ) - - # trim off the trailing ' ' that might come from the "Assistant: " - return text.rstrip() - - @classmethod - def convert_messages_to_prompt( - cls, provider: str, messages: List[BaseMessage], **kwargs: Any - ) -> str: - if provider == "CSDC": - prompt = cls().convert_messages_to_prompt_CSDC(messages=messages, **kwargs) - else: - raise NotImplementedError( - f"Provider {provider} model does not support chat." - ) - return prompt - -class LineIterator: - """ - A helper class for parsing the byte stream input. - - The output of the model will be in the following format: - - b'{"outputs": [" a"]}\n' - b'{"outputs": [" challenging"]}\n' - b'{"outputs": [" problem"]}\n' - ... - - While usually each PayloadPart event from the event stream will - contain a byte array with a full json, this is not guaranteed - and some of the json objects may be split acrossPayloadPart events. - - For example: - - {'PayloadPart': {'Bytes': b'{"outputs": '}} - {'PayloadPart': {'Bytes': b'[" problem"]}\n'}} - - - This class accounts for this by concatenating bytes written via the 'write' function - and then exposing a method which will return lines (ending with a '\n' character) - within the buffer via the 'scan_lines' function. - It maintains the position of the last read position to ensure - that previous bytes are not exposed again. - - For more details see: - https://aws.amazon.com/blogs/machine-learning/elevating-the-generative-ai-experience-introducing-streaming-support-in-amazon-sagemaker-hosting/ - """ - - def __init__(self, stream: Any) -> None: - self.byte_iterator = iter(stream) - self.buffer = io.BytesIO() - self.read_pos = 0 - - def __iter__(self) -> "LineIterator": - return self - - def __next__(self) -> Any: - while True: - self.buffer.seek(self.read_pos) - line = self.buffer.readline() - if line and line[-1] == ord("\n"): - self.read_pos += len(line) - return line[:-1] - try: - chunk = next(self.byte_iterator) - except StopIteration: - if self.read_pos < self.buffer.getbuffer().nbytes: - continue - raise - if "PayloadPart" not in chunk: - # Unknown Event Type - continue - self.buffer.seek(0, io.SEEK_END) - self.buffer.write(chunk["PayloadPart"]["Bytes"]) - - -class ChatCSDC(BaseChatModel, CSDCLLMBase): - @property - def _llm_type(self) -> str: - """Return type of chat model.""" - return "aws_csdc_chat" - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - def _invoke( - self, - messages: List[BaseMessage], - stop: Optional[List[str]] = None, - run_manager: Optional[CallbackManagerForLLMRun] = None, - **kwargs: Any, - ) -> ChatResult: - provider = self._get_provider() - - prompt = ChatPromptAdapter.convert_messages_to_prompt( - provider=provider, messages=messages - ) - - params: Dict[str, Any] = {**kwargs} - if stop: - params["stop_sequences"] = stop - - completion = self._prepare_input_and_invoke( - prompt=prompt, stop=stop, run_manager=run_manager, **params - ) - - message = AIMessage(content=completion) - return ChatResult(generations=[ChatGeneration(message=message)]) - - def _stream( - self, - messages: List[BaseMessage], - stop: Optional[List[str]] = None, - run_manager: Optional[CallbackManagerForLLMRun] = None, - **kwargs: Any, - ) -> Iterator[ChatGenerationChunk]: - provider = self._get_provider() - - prompt = ChatPromptAdapter.convert_messages_to_prompt( - provider=provider, messages=messages - ) - - params: Dict[str, Any] = {**kwargs} - if stop: - params["stop_sequences"] = stop - - resp = self._prepare_input_and_invoke_stream( - prompt=prompt, stop=stop, run_manager=run_manager, **params - ) - - iterator = LineIterator(resp["Body"]) - current_completion: str = "" - for line in iterator: - resp = json.loads(line) - resp_output = resp.get("outputs")["outputs"] - if stop is not None: - # Uses same approach as below - resp_output = enforce_stop_tokens(resp_output, stop) - message_chunk = AIMessageChunk(content=resp_output) - yield ChatGenerationChunk(message=message_chunk, generations = "") - # current_completion += resp_output - run_manager.on_llm_new_token(resp_output) - - - def _astream( - self, - messages: List[BaseMessage], - stop: Optional[List[str]] = None, - run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, - **kwargs: Any, - ) -> AsyncIterator[ChatGenerationChunk]: - raise NotImplementedError( - """CSDC Chat doesn't support async requests at the moment.""" - ) - - def _generate( - self, - messages: List[BaseMessage], - stop: Optional[List[str]] = None, - run_manager: Optional[CallbackManagerForLLMRun] = None, - **kwargs: Any, - ): - streaming = self._get_streaming() - print(f"+++++++++++ streaming is: {streaming}") - completion = "" - if streaming: - for chunk in self._stream(messages, stop, run_manager, **kwargs): - completion += chunk.text - message = AIMessage(content=completion) - return ChatResult(generations=[ChatGeneration(message=message)]) - else: - return self._invoke( - messages=messages, stop=stop, run_manager=run_manager, **kwargs - ) - - async def _agenerate( - self, - messages: List[BaseMessage], - stop: Optional[List[str]] = None, - run_manager: Optional[AsyncCallbackManagerForLLMRun] = None, - **kwargs: Any, - ) -> ChatResult: - raise NotImplementedError( - """CSDC Chat doesn't support async stream requests at the moment.""" - ) \ No newline at end of file diff --git a/src/sample/requirements.txt b/src/sample/requirements.txt deleted file mode 100644 index 3eb2077d..00000000 --- a/src/sample/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -langchain==0.0.312 -opensearch-py==2.2.0 -boto3==1.26.114 -botocore==1.29.140 -requests_aws4auth==1.2.2 -openai==0.27.6 -tiktoken==0.3.3 diff --git a/src/sample/sagemaker_utils.py b/src/sample/sagemaker_utils.py deleted file mode 100644 index c9dbefea..00000000 --- a/src/sample/sagemaker_utils.py +++ /dev/null @@ -1,159 +0,0 @@ -""" -Helper functions for using Samgemaker Endpoint via langchain -""" -import time -import json -import logging -import json -import re -from typing import List - -from langchain.llms.sagemaker_endpoint import LLMContentHandler, SagemakerEndpoint -from langchain.embeddings import SagemakerEndpointEmbeddings -from langchain.embeddings.sagemaker_endpoint import EmbeddingsContentHandler -from typing import Dict, List - -import logging -logger = logging.getLogger() -logger.setLevel(logging.INFO) - -# extend the SagemakerEndpointEmbeddings class from langchain to provide a custom embedding function -class SagemakerEndpointEmbeddingsJumpStart(SagemakerEndpointEmbeddings): - def embed_documents( - self, texts: List[str], chunk_size: int = 5 - ) -> List[List[float]]: - """Compute doc embeddings using a SageMaker Inference Endpoint. - - Args: - texts: The list of texts to embed. - chunk_size: The chunk size defines how many input texts will - be grouped together as request. If None, will use the - chunk size specified by the class. - - Returns: - List of embeddings, one for each text. - """ - results = [] - _chunk_size = len(texts) if chunk_size > len(texts) else chunk_size - st = time.time() - for i in range(0, len(texts), _chunk_size): - response = self._embedding_func(texts[i:i + _chunk_size]) - results.extend(response) - time_taken = time.time() - st - logger.info(f"get results for {len(texts)} in {time_taken}s, length of embeddings list is {len(results)}") - return results - -# class for serializing/deserializing requests/responses to/from the embeddings model -class ContentHandler(EmbeddingsContentHandler): - content_type = "application/json" - accepts = "application/json" - - def transform_input(self, prompt: str, model_kwargs={}) -> bytes: - input_str = json.dumps({"inputs": prompt, **model_kwargs}) - return input_str.encode('utf-8') - - def transform_output(self, output: bytes) -> str: - response_json = json.loads(output.read().decode("utf-8")) - embeddings = response_json["sentence_embeddings"] - if len(embeddings) == 1: - return [embeddings[0]] - return embeddings - -def create_sagemaker_embeddings_from_js_model(embeddings_model_endpoint_name: str, aws_region: str) -> SagemakerEndpointEmbeddingsJumpStart: - # all set to create the objects for the ContentHandler and - # SagemakerEndpointEmbeddingsJumpStart classes - content_handler = ContentHandler() - logger.info(f'content_handler: {content_handler}, embeddings_model_endpoint_name: {embeddings_model_endpoint_name}, aws_region: {aws_region}') - # note the name of the LLM Sagemaker endpoint, this is the model that we would - # be using for generating the embeddings - embeddings = SagemakerEndpointEmbeddingsJumpStart( - endpoint_name = embeddings_model_endpoint_name, - region_name = aws_region, - content_handler = content_handler - ) - return embeddings - -def enforce_stop_tokens(text, stop) -> str: - """Cut off the text as soon as any stop words occur.""" - if stop is None: - return text - - return re.split("|".join(stop), text)[0] - -class vectorContentHandler(EmbeddingsContentHandler): - content_type = "application/json" - accepts = "application/json" - - def transform_input(self, inputs: List[str], model_kwargs: Dict) -> bytes: - input_str = json.dumps({"inputs": inputs, **model_kwargs}) - return input_str.encode("utf-8") - - def transform_output(self, output: bytes) -> List[List[float]]: - response_json = json.loads(output.read().decode("utf-8")) - return response_json["sentence_embeddings"] - -class crossContentHandler(LLMContentHandler): - content_type = "application/json" - accepts = "application/json" - - def transform_input(self, prompt: str, model_kwargs: Dict) -> bytes: - input_str = json.dumps({"inputs": prompt, "docs":model_kwargs["context"]}) - return input_str.encode('utf-8') - - def transform_output(self, output: bytes) -> str: - response_json = json.loads(output.read().decode("utf-8")) - return response_json['scores'][0][1] - -class answerContentHandler(LLMContentHandler): - content_type = "application/json" - accepts = "application/json" - - def transform_input(self, question: str, model_kwargs: Dict) -> bytes: - - template_1 = '以下context xml tag内的文本内容为背景知识:\n\n{context}\n\n请根据背景知识, 回答这个问题:{question}' - context = model_kwargs["context"] - - if len(context) == 0: - prompt = question - else: - prompt = template_1.format(context = model_kwargs["context"], question = question) - - input_str = json.dumps({"inputs": prompt, - "history": model_kwargs["history"], - "parameters": model_kwargs["parameters"]}) - return input_str.encode('utf-8') - - def transform_output(self, output: bytes) -> str: - response_json = json.loads(output.read().decode("utf-8")) - return response_json['outputs'] - -def SagemakerEndpointVectorOrCross(prompt: str, endpoint_name: str, region_name: str, model_type: str, stop: List[str], **kwargs) -> SagemakerEndpoint: - """ - original class invocation: - response = self.client.invoke_endpoint( - EndpointName=self.endpoint_name, - Body=body, - ContentType=content_type, - Accept=accepts, - **_endpoint_kwargs, - ) - """ - if model_type == "vector": - content_handler = vectorContentHandler() - embeddings = SagemakerEndpointEmbeddings( - endpoint_name=endpoint_name, - region_name=region_name, - content_handler=content_handler, - ) - query_result = embeddings.embed_query(prompt) - return query_result - elif model_type == "cross": - content_handler = crossContentHandler() - elif model_type == "answer": - content_handler = answerContentHandler() - genericModel = SagemakerEndpoint( - endpoint_name = endpoint_name, - region_name = region_name, - content_handler = content_handler - ) - return genericModel(prompt=prompt, stop=stop, **kwargs) diff --git a/src/sample/sample_llm.py b/src/sample/sample_llm.py deleted file mode 100644 index 0157bf14..00000000 --- a/src/sample/sample_llm.py +++ /dev/null @@ -1,19 +0,0 @@ -import logging -from embedding_wrapper import CSDCEmbeddings - -logger = logging.getLogger() -logger.setLevel(logging.INFO) - -if __name__ == "__main__": - embeddings = CSDCEmbeddings( - aosEndpointName = 'vpc-xx.us-east-1.es.amazonaws.com', - region = 'us-east-1' - ) - doc_reult = embeddings.embed_documents( - bucketName='llm-bot-documents-xx-us-east-1', - prefix='csdc' - ) - query_result = embeddings.embed_query( - text="请给我介绍一下什么是Data Transfer Hub方案?" - ) - logging.info(f"doc_reult is {doc_reult}, the type of doc_reult is {type(doc_reult)}, query_result is {query_result}, the type of query_result is {type(query_result)}") diff --git a/src/sample/sample_sm.py b/src/sample/sample_sm.py deleted file mode 100644 index 2eeb424b..00000000 --- a/src/sample/sample_sm.py +++ /dev/null @@ -1,136 +0,0 @@ -import os -import time -import logging -import boto3 -import tempfile -import numpy as np - -from langchain.vectorstores import OpenSearchVectorSearch -from langchain.document_loaders.unstructured import UnstructuredFileLoader -from langchain.text_splitter import RecursiveCharacterTextSplitter - -from opensearchpy import RequestsHttpConnection -from sagemaker_utils import create_sagemaker_embeddings_from_js_model, SagemakerEndpointVectorOrCross -from requests_aws4auth import AWS4Auth - -s3 = boto3.resource('s3') -aws_region = boto3.Session().region_name -credentials = boto3.Session().get_credentials() -awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, aws_region, 'es', session_token=credentials.token) - -logger = logging.getLogger() -logger.setLevel(logging.INFO) - -MAX_FILE_SIZE = 1024*1024*100 # 100MB -MAX_OS_DOCS_PER_PUT = 500 -CHUNK_SIZE_FOR_DOC_SPLIT = 600 -CHUNK_OVERLAP_FOR_DOC_SPLIT = 20 - -def process_shard(shard, embeddings_model_endpoint_name, aws_region, os_index_name, os_domain_ep, os_http_auth) -> int: - embeddings = create_sagemaker_embeddings_from_js_model(embeddings_model_endpoint_name, aws_region) - docsearch = OpenSearchVectorSearch( - index_name=os_index_name, - embedding_function=embeddings, - opensearch_url="https://{}".format(os_domain_ep), - http_auth = os_http_auth, - use_ssl = True, - verify_certs = True, - connection_class = RequestsHttpConnection - ) - docsearch.add_documents(documents=shard) - return 0 - -def construct_shard(bucketName: str, prefix: str, embeddingEndpointName: str) -> str: - docs = [] - document_bucket = s3.Bucket(bucketName) - for obj in document_bucket.objects.filter(Prefix=prefix): - if obj.key.endswith("/"): - continue - else: - with tempfile.TemporaryDirectory(dir='/tmp') as temp_dir: - file_path = f"{temp_dir}/{obj.key}" - logging.info(f"bucketName={bucketName}, obj.key={obj.key}, file_path={file_path}") - os.makedirs(os.path.dirname(file_path), exist_ok=True) - s3.meta.client.download_file(bucketName, obj.key, file_path) - - loader = UnstructuredFileLoader(file_path) - docs.extend(loader.load()) - - # add a custom metadata field, timestamp and embeddings_model - for doc in docs: - doc.metadata['timestamp'] = time.time() - doc.metadata['embeddings_model'] = embeddingEndpointName - - text_splitter = RecursiveCharacterTextSplitter( - chunk_size = CHUNK_SIZE_FOR_DOC_SPLIT, - chunk_overlap = CHUNK_OVERLAP_FOR_DOC_SPLIT, - length_function = len, - ) - - chunks = text_splitter.create_documents([doc.page_content for doc in docs], metadatas=[doc.metadata for doc in docs]) - - db_shards = (len(chunks) // MAX_OS_DOCS_PER_PUT) + 1 - shards = np.array_split(chunks, db_shards) - return shards[0].tolist() - -# Main entry point -if __name__ == "__main__": - """ - Embedding Sample, shard format: - [ - Document( - page_content='Data Transfer Hub (数据传输解决方案)\n\n轻松将数据移入和移出 AWS 中国区域\n\n概览\n\n此解决方案可为 Amazon Simple Storage Service (Amazon S3) 对象和 Amazon Elastic Container Registry (Amazon ECR) 映像提供安全、可扩展且可追踪的数据传输。使用数据传输解决方案,您可以执行以下任何任务:在 AWS S3 之间传输对象\n\n优势\n\n直观的用户界面 客户可在用户界面上为 Amazon S3 对象和 Amazon ECR 映像创建和管理数据传输任务。\n\n支持各类源 将数据从其他云服务商的对象存储服务(包括阿里云 OSS,腾讯 COS,七牛 Kodo 以及其他兼容 Amazon S3 的云存储服务)传输到 Amazon S3。在 Amazon ECR 之间传输容器镜像。将容器镜像从公共容器镜像仓库(例如 Docker Hub、Google gcr.io 和 Red Hat Quay.io)传输到 Amazon ECR。\n\n无服务器架构\n\n传输任务可按需使用并随用随付。有关更多信息,请参阅实施指南的“成本”部分。', - metadata={ - 'source': '/tmp/tmpmmod0k9m/csdc/dth.txt', - 'timestamp': 1693494146.1509278, - 'embeddings_model': 'embedding-endpoint' - }) - ] - """ - shard = construct_shard('', '', 'embedding-endpoint') - - process_shard(shard, 'embedding-endpoint', 'us-east-1', 'chatbot-index', '', awsauth) - - """ - LLM Sample - """ - - query_knowledge = "给我介绍一下什么是data transfer hub方案?" - query_embedding = SagemakerEndpointVectorOrCross(prompt="为这个句子生成表示以用于检索相关文章:" + query_knowledge, endpoint_name="embedding-endpoint", region_name='us-east-1', model_type="vector", stop=None) - logging.info(f"query_embedding is {query_embedding}") - - # For demo usage, should be retrieved from AOS - retrieveContext = """ - Data Transfer Hub (数据传输解决方案) - 轻松将数据移入和移出 AWS 中国区域 - 概览 - 此解决方案可为 Amazon Simple Storage Service (Amazon S3) 对象和 Amazon Elastic Container Registry (Amazon ECR) 映像提供安全、可扩展且可追踪的数据传输。使用数据传输解决方案,您可以执行以下任何任务:在 AWS S3 之间传输对象 - 优势 - 直观的用户界面 客户可在用户界面上为 Amazon S3 对象和 Amazon ECR 映像创建和管理数据传输任务。 - 支持各类源 将数据从其他云服务商的对象存储服务(包括阿里云 OSS,腾讯 COS,七牛 Kodo 以及其他兼容 Amazon S3 的云存储服务)传输到 Amazon S3。在 Amazon ECR 之间传输容器镜像。将容器镜像从公共容器镜像仓库(例如 Docker Hub、Google gcr.io 和 Red Hat Quay.io)传输到 Amazon ECR。 - 无服务器架构 - 传输任务可按需使用并随用随付。有关更多信息,请参阅实施指南的“成本”部分。 - """ - # Optional, predict recall knowledge correlation - score = float(SagemakerEndpointVectorOrCross(prompt=query_knowledge, endpoint_name="cross-endpoint", region_name="us-east-1", model_type="cross", stop=None, context=retrieveContext)) - logging.info(f"score is {score}") - - # For demo usage, refer main.py in executor folder for recall process - recallContext = """ - Data Transfer Hub (数据传输解决方案) - 轻松将数据移入和移出 AWS 中国区域 - 概览 - 此解决方案可为 Amazon Simple Storage Service (Amazon S3) 对象和 Amazon Elastic Container Registry (Amazon ECR) 映像提供安全、可扩展且可追踪的数据传输。使用数据传输解决方案,您可以执行以下任何任务:在 AWS S3 之间传输对象 - 优势 - 直观的用户界面 客户可在用户界面上为 Amazon S3 对象和 Amazon ECR 映像创建和管理数据传输任务。 - 支持各类源 将数据从其他云服务商的对象存储服务(包括阿里云 OSS,腾讯 COS,七牛 Kodo 以及其他兼容 Amazon S3 的云存储服务)传输到 Amazon S3。在 Amazon ECR 之间传输容器镜像。将容器镜像从公共容器镜像仓库(例如 Docker Hub、Google gcr.io 和 Red Hat Quay.io)传输到 Amazon ECR。 - 无服务器架构 - 传输任务可按需使用并随用随付。有关更多信息,请参阅实施指南的“成本”部分。 - """ - answer = SagemakerEndpointVectorOrCross(prompt="请给我介绍一下什么是Data Transfer Hub方案?", endpoint_name="instruct-endpoint", region_name="us-east-1", model_type="answer", stop=None, history=[], parameters={'temperature': 0.8}, context=recallContext) - - logger.info(f"answer is {answer}") - - - - diff --git a/src/scripts/dep/README.md b/src/scripts/dep/README.md deleted file mode 100644 index d54876a5..00000000 --- a/src/scripts/dep/README.md +++ /dev/null @@ -1,19 +0,0 @@ -# Update Dependencies once files in dep folder are updated -## Make sure you have the necessary tools installed: - -```bash -pip install setuptools wheel -``` - -## Navigate to the directory containing setup.py in your terminal. -Run the following command to create the wheel distribution: - -```bash -python setup.py develop -python setup.py bdist_wheel -``` - -## The wheel file will be located in the dist directory. -The file will have a name like llm_bot_dep-0.1.0-py3-none-any.whl, reflecting the package name, version, and other metadata. - -## Copy the wheel file to the whl folder for CDK update \ No newline at end of file diff --git a/src/scripts/dep/dist/llm_bot_dep-0.1.0-py3-none-any.whl b/src/scripts/dep/dist/llm_bot_dep-0.1.0-py3-none-any.whl deleted file mode 100644 index 3945d1a9443b836401abe3ac482cde2b0bbc16ec..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 24501 zcmZ^~W02_3)-Bq$ZQHhO+qP}nwr%dVZM%E7ZCh{Oy7j(u-+6UYm8|5)N>x@eXT}(F z4h3mo5EK9a00;mlIR*89CjdbI>+o+w|F_%N*y!6NinU@%sz_2rTEF$Vhb=P~^_=UWiK>T(#C!TamNXY3DK(21 zj)ZHPvHMdg&MHwtmAR{`*=acYB~$V?qa*!P30k7wqqiKi?6k6=`~R(@bFiNf0WAfh zJG5=FrtCe~sO8jd+QnwWYqS=ZXOFGwp)%4)x$NQMB`{n6oAo9D}5IR|Au|=wyg_{>W3WtzQhWob?S0EK9>yrBO?P z4tA<;suWqXET?E9Ht$deuHz>zxT|LHbpPYWUw+KPW5^*^FzE@?+cx5&xc}bKCXx#M zL}&{AP^X3u2S3#aL=u~&!K#RjybMKD>eStv%o=l>Cunv~7bS>ATn^%x-kcaa_aPw_ zzhQH=_M-3Jd}b7Re~QIW0NKrU&ti?-Nor=+#Em*4Jv8fK*>QK<|2t4q+?9Zh8VA$- z)QdQC)7nq5l_`-q+S0RJ#7K|fm?S$b-=sl&3R(z3EyXxf#q~ zPMdc2)6nplwwUJ%#Pz<5GOxr21UAZ^c-ixvWO#x_fjX?2C3paKOw|P-YO)>ygiY^{ zb>8ycl>_3`gO9tgRt(1tou*s`r^Ue<+dD+C)#V~j5MF%lM>58bnODqtW5j9vrD+2B zN%Ul{m_Cu3A{2I5+xOub`aL`zBUMJppn_LxHg#E|^UIkDGJ|GW5Q9K~wy(H*DD}54`eA20wP6TKb5n(3P1ezC6f^mJi zUc`d+Gqqhu6vhcgJ7*-PF;_Wx0v-;KJYQHlSVBj2e0wTzGc>f!6@NX`H-wGwC^!)F zxL+d~jQ2Z{(>hEH-ZxV&GVB`269>1cC#w07M{;>aIpd)neP3zE8no2JjVPN~P&?se zU=)AVIGZom4>^N;jW>rENAM8=)Opn;bFmxXb@d+J|Mz=LgL}|4OL}T~h>PuPC;oCX zFh?h@fJnl?5>g6 zJ#K!#Vlvt6d-@SeZe?SIdbov>g*YzK@@}12o#SgdHqjQym;~RSt{{61caiy_XMTaB zC+O1Svb(5$z!*gi0*?8$P9GPt+xL`D zH@j#z{W%5P<-7Qy{k=mxe!Bu-2Dz@IxEupR*MFkXwvT?T_Kj@ex?V#HzU$Y;yV(W~ zKh$Chka^OoV9-MLLlfaQIH-VxgA25~NDa&WHn;8R`jhiuWEHZV?E?;O`iZwae$Ig? zntXEVq;`QvLa&wJy18j)%8X!;`sZfGy|ty-Jz{v8`UB9uc>{KG#}^n)2?b2G8kIfm z-oat$W5M_V|368#RK`l3?fJ6*69fR@3hMu#WQ|Pij4f;povi;QSzk46`zBU&jhZ2)76tXAj~7s16Mc)@~2Bva=!k=UZFcH5uNUSX-G+&|oaIOjV2dDx|s zTG-N|dUUESFhMtjS`lKU_AaT_$L$IFri_=Q=J-3`ypCRr^EGWTX^XSC_BkY}6sAcs zK?NylCWBGKlTCCXsSIy;A(eGCH8w``=)Et=rs1ZMb4p^2eX zs-yr#ka2?3{$i(#buML!A+mQc!+__-h?Dz#q8H~qH3<^vC!FJ!I+NOlOz=rlKR|!% zx$^Vj&DmhLsk(~#sC>1@VamBhVO118>gfe%->%ga|HO%N`R@^WfI|7n+@pVSWXj2Z z!RH@vB56t&bG*(7i>oiTy54b*OU?qq>t(kFTo78|KZjPm3p7RZ_rM$stzW*epj>3X zetH!`nX+?7iQgwGe|0CJnqrx{0;xkHsh)tA(Xo+uwK);{Q9-4JMe z)OjcXo}Lz4ta6LP9GExOZ>ds2t=Uma;%_|NU<9PecY9$S`0lqL7rN>SF5Sb2aHxIj;kmF9vmvapXbr7i@3 ztGinJ{A0Dif`nHfqtVI?4gqA7{Z6 zks)*%sv<1*vqe?M##S=0Qx?7CK5US%O5ai?S*TM);q#Y%e{WQM$QD(1%E~(c7;1(n zC!zmqSzCnI3;4I4KLolE{a3o{YM0{7SNIC?*L;-eZAHy(>?E16D1+?$6~>%fxA z#-nRDJKYN}v&92A?6f&g^N;f8uLuD3+#SHQA31#(MlXJeq59Y^m3Rp65qr8n4s@$d z7HkJ9AGOEMUfbc=w{#^N#kVQJg$T{!yPC+m)~2oLkT-+kcf_xIpFzF`$X?TbOD+N& zowB7|Dy$#xh0D2dyVU34IOMZF<%-MKq2#mg3Lc}fVgbh9TX~qZ2uG;Oe}I$B-jJk` z)FhJ{cDv)-^xL~W{I`Jy{Qr|MlW1(z+4g6fTwnnJLZ|@%aQ+LGn%Y?y+8LYv2bpSl z+y5O!{Qcz{2?Fx!J`$Ob`wIp%8%RVMnYFBD11MMsF^$*ZE*>q;q@&bC1L1%ddreXs zyejbsv{*6&EQ$OYPv)3Y_RYkl`Gc0bp-Y$Uo`TCC1SATwE`D<}Jv%!+`(@UlOtLWx z<|y(O5K0(X?2vuOBYD6=yv-4fPb8jGES_sT9JVW-qtCZ!g?!=Zcmb}MZ^DRaqB(eK zX7;sy2)7;NFYHAZheMAy`!tj@6T)K5lYRfmy!=`|H@t1hS0tb~pLOB%fGhQTL3x?CVNv;2X>$yG2TciL zDdnqE=5B8oo-Gi1XpH29=PXP{7R_`If@9MB4h1M*4=3zC&B56q?SxQRY@TI};{c(0 zIYg9b)Ti*6?u`0j@Gm$aFd>Pjm?Yr|L=n%dXZtqCMS`lZNRq-+T@dWtohr>R)!Avp ziid=fr0L%vGd&oAsBl~+_|ML&O`%MQ>QXXj1MYcCVA@>{c8+0kvHb2o?$^&f*6A;G zzI>fupWkc0|Ayj)TMV)<2 zYu)eu9{=r?uKyg4m&5+!f_46O`@_J`2z#|0Q1i~$qdGgkm-p>wym-1kw(hr`%lBj~ zef^ccN29FC+|277zR$zK89sjuoj$L}d-%fc&i3zATQU0%mhbD#S?q4?&ey@{Ydv1~ zi&FNLZkN;JNWJ*)k51kB-jcc)`feseok^Sf^;hickLTS2deX?*_Z(DL|K>uL)lE7- zwyuwAqR(%mW%okgY&@SYt}Mj}E`F758CcNq3w`dc|8LR}&&MHll-z_fZtBk}s3jeJ zNltR9g#d9pFErmB#^ciZR)#gg+oNWDJ6*f`Nhh1RVZCgL<7vP3A{!;zrI{ca(~5>o zEhjF>*3n-BxB~~AAo`jzPHV}Rx7YQ>gUV!R`Hu3mJPkfippU?CTA|hjyDC8i5ywz)c*T#S8Jv-LDx^0eQpRG@;OoVu0#YtXLa zRw?(1G!Mxbiy7JFhUJ<^_l>EX++9?~#&+Q)J1$;>0B)%r9qo*&QtH9S6X90oE6)nx zsbLoexYyjKKs7KN>;ZMo!C^06#^N*~D1#5*PQPOVl$y~p_~;%+u!SR8CTXRYm&d*R z+nPMoh9YWb!bDLi7$m2nX^BEfhF8rAE^QM&DX@zikUTJ736b8ON2n7ZW|KWIL|nu7 zwUTO1VMewh7wGO#Vg`vZ4zgiCMEIViGQ-+3(1$=Rpq`UWEC3QiqJ3uo6a>nw=5oT$ zI)#)RhG3CpDq}8$<*EP*o91?BeyzZv#`YbjH+aGS=xSm2pKtT8eoYa_J6c;y;t&a1FxQZio(Sr5p$J*a5{-N2yC|16VX?}04BS7f`Y^_ zvv9dWd}Dv4vNI4`jC=oBeSLJCb(~G$Se19=>Q-l{xwVZ4H~HfQ004MN@RxaYup0N#mJUh`Oe|HI*ke8#D?)EkImd@by? ziaNp^j-MXg06Y{o69<1Z9%#H6d_8VBo@Gc-?J}T?yx&iUq)k;}Lt$?uY>8AjXNo@~ z?<$qfmAk{EwVC0Cm#p{DKMic!?DS{lT4b0hi=oGJDV^#(buZl`z(tOB_Qve+!Eq0H=-is;z;ppm&?; z>>TZikwupb&^5G>g!VHK5WEL~%byNNz;Dd(JX3#qhzuU4x<^2lQD(Sk>u#yiscUj& zijY{ShPpj_IuR`6_Ub~cv9CXx0ZCn*U!8FcMa2HZ)?gYrTtT9UiJASuDHLqg3v37Xu%7w6^{J`v#3-yH}QA0Cp75-sk*~+}m7%``c(j)=3$Mmv7HcR%NQGg$E zmn-;c1PWYLLPNtW&IL@tEQXdEXPMYyP&-xme(U3+n2^Y%bkB0;6@&8jtVkt5W|pol zp&Js#Y5MIY=dkXIVY?!gX$A*(aW8&EEKZRB>yu_MMUF-ktOPhA-hnS>mdOUGI#~8` zWa`R!c#g3Nt9(}@Oa-Y)=OGYf!hTsVU+3?N!oWK@Wu)CecSZCjiChWjBot&uB~fH- zx`Ybw#!wZJBTtl{l5wy%P1Lkgh1ZcyUNai_4qw_+r&V2|SrH$X-44&05fBGw1tC&+2x2LvQ0 z6M-+CtLTH6kzz>1-EJ0Yg1S*{;Q7j-X;6mEpg5j54iM89kzNG4)y5CWutW>4AqkTf z^6<)3_o(m*LNJ!w=~ah$i7scCmwj=n@6xahO+cIU;dqFOoQ4afV|GUIv&Ghu>g4xS z)7m;bSRoqA*rv!9E#nKM^Dzos_>qJ-_6Xo{AnWE!>olz%(D=}nHEIhd83`HKR@Sh2 znSm-@+GdY8%-Dn7Ig2L*JpST^e7xAa(OljBiLq97MxWgAaH!-}LOH~J21^JPD*!px z1B41nGO9o`&mMvxRXD(GkcPrZ<@!!$wf6~qLTX>NI_K@{*{$akH^Qnxn@(XMR>UhA zjxu$bD*@!xbZC?D^nMuu9!HOe#kB}Gipj-I=KGZ%i{e=bT`;O4vGtA?^au@ za$6c-8UsoBw7ez7tF@qxy@4bnUz(sFF12T=b9*JK?kysB`VoPE*EpF;A!@HyGD|M= zfJ|EmM#MVM5M=%6(YVp-32lTmIdEjul-BdK+$x?!WNgBAGovN5&CQmjM=2CKE%HGJ zR}MXUC}ViuBv)rg=qM7;tl=D1pzeBtdcj1waE&_d5Qx3bG?uHBc30HX5meUhCFQ+n zD1mAh^}Eb3mfOy%IagGXY@!1P>@__fYt-x4AI%Foe#uz>I3>sHn?5PXhoBjicS6k3 zcuv7X@s4#93={3`3v@bH%;R}^9o6^+zG@)o24^_w^XIlDRkCVFeYZCi?%d;F18gJSzBf07 zrB!0s!{eSdK?MUlVMsDR} zl{?c+_dumJZ<~WvT$3(qY75*fD^1}>$z=7Oz~{Ry4|0*CRxN>pLkDcz;DYF4 zO~`8}cg{N%J}W}(73@B9r{eMXMrgprY(Be_dE2S|&`K?7sU?;;Aexl4sQE73=(iD6 zS)v(tK!ErSG4h~&D177wt5F)x0Pa>DG7%W;2eC;=hCW8`*J}DkfPcN%OX!VJNo&jT=R;78Sjr&cu}4TX_PN85jDdx4CDHFMc5|P;6ZEM z_Y|j+1%CnlViH|spT%)q;=w+{gCW`%v?N5|f>&&y1a5nt2X&`T$^myDjdn1IWyiMD z$PLAsRBM{_N#fVi|Og&JZe}?%xQ;1|$ zX?3u~(xi-74LcAc45hxguAK4|?HLZR{Zv>-j6Ka?k!~s_MZfA2-Yp)=f+g=z`{sCJ zJ)bSjq{y4C2adsDu@BjnnL_NHyRmGY6xd1izg*s5@i?UKFO8&-67jIzwb_Z1T@C~} z@wJ7Eu|hY)Pw4Q&g_YVmMig;i-r*(SA?&#I<)W#MW%Jg-DjL$P_9-Wtfwtd?wv~GW zH{WCgzS;t3QlYTjyD-(2zGq$DvLZ^7>N1@;*uA*v1x$=PO4QHloaSwMU0FNSR-&g- zGP1FS1XsnKtp(Uhm$CVyO(S1VP-Ye}eeqvj=673?pdHm~vAq=F=7U|b&Wv|5MnFAo zdFejSuBCcpf2DQn-d1F;W@xYM@kL!!3HF1-YgR7e-PE>2_w+f4LdLSjs}tpL3r9LU zja5hWir0V1EU|yQ90Y&f@p*W*@)%&uc=VE$)yA8fB5{voU#{AcbFrthqkrMVaQfXO zEVkv#LPtY##F0$ECHTt1rt>awibzv#o`w423?w0w{p>Qz@4TTd1th5Tc#!gkdjqV28x&Ck{MoYVTmCX424n#kmucS zx_@sU`mue97^TIp!DYpP$DR7H_7GB(2}hDAxC)^UjfkCX^Cxg}W2*MDs_emWiK~on zxs0kK_gVaD`A%#nDOxdbU}1`Tb~;ecL85aA(TV{V)(ttETHcnEIA(Rz44|0xeJF`_ z=YtZ0io)}}6jk@Fdp~ngicTMMJd(+gsIg+=$%xE_ys&;*28zvnZS^GfA}l*EkNiNJ4sGJvTyc?Xq0RA(ApWGNfWI*Kd$ zYVZq0)~bzIW*N$oP__@o?%U;I+DUe23&tJTW4r*ZyJ&r=iB8)WX&b+@>&Mw?|Apkv zg?pvPYS2S z>A2_K@CU$G9WZm3|F|-498>+ySJ9GADogw*)-1`2kl1PBr%E%ENjiJUDYK4(ek@bs zGANa67IY><<&a9-Bd(JnG*vBpj&ztBbAhCIjGxQ%Sj=rn^L@h87unhBN9z@ z7DW?6a<)Q;>5VR(HFTn5T%%5*OP6_kE^;xc46BU-2AW9OprUmA0;L$7y_^BZ0e|>J zr8&k2!6`QT^WUY|j%3?}60q$rzC{kXq$8S&fLSF)wZu|Hum@8;EebCBUE z?UzNj@~^xf;i%M-WCVFI>u)*pOw9qxAP%xSP~XXgIkM(?5B`%J$@FM4MlirVu#;tF&F1df-cBY;=U_GSl@Kp4#Y;CIl4Sv=@n*hqO);{LDj_4i^XE-2?*3CoK;~ zz4@K%fk~PUwnx})GWs&Q5&+`P5zAsvPZnMv{A|SBkXtHM?|mrQ1b-b}y3SO_1;&|$ zCW7iPEtU9ysX1A4fX1x#<$|G-INk0U{FP{llo(_aEpEsg%?$7)Ht$BOb4I9(GgmYL zf62=+vL$1{;1Z^ukH}n(&cyKwTS1I6QwcD-CxZC4=GRzoTHWG2I}ir?E93oS)LlEs zye}~Lg2(Sci8YZogreLRf`qXG+W(}H6`>K0ZH*TfYH*BdE7Dim(N!C8j6~aDaPgSS zMDva26WD0{c}jLMcF0xYc=ph@<(B>dZEXh8x94uuxJwCs&nd#D@FWSOFith8q&%3& zov2E({kisY0sn2{Xe(E2L`x$;JNB8%Ynwtt5c8da7%)Je)-HNfuth3Gl2 z-tANe?>b6Fz@LgQ9oPt!6F<0M1#ez|_{7(HatC2X(a7>c49XCNhC{cTj1UHl%m0Kp zC7Qbh)5nyPk6UQ}G!Z=>ccGo#=R3$-Q0XyGP>b4SG6A9$>=Sf)h8m((Q&1C}`W})r z*A|$Vu>?)X1a}dg`HfF0IGz=kd>pe}PGz~R!ErQ>9nHce%^~f%US9gMr3OngPO6D+ zFAaV}$G-nKYU_+pA3EMwsI4PwT!%0$P0#7Tda%v|l2v_3W`+5JaRGnQzi+b0ToXP` zMu^$t!y`XL9>k>0^y#})r>uW=NB`=yjue*qbU&l`fmT@yA7>6W*pj~!!V87~V<1A) z_VXn|;@|W~l7U}=a0so%k#R|Qz;R47Pt#*67;@#j>Uh8mjT2$B%$d?g&4XBPBI~&n z1E~8%;I<&Li&33ZeuLlPCLe2uvUDEJc#9Hg;;ggHUfxW?2a9b`AlEP9#hIQ+4{|7| za4F9;?cJq`)!w(-;My?w=V!nrlJq5Z`+EQbM?(RYzK==3V{aK^D1v9(nx-{63&r8|NrNdL1=eP+H|f>q8r z?)yq%CakwP1RX1YcLpOZ5PUyuasF&P7E1_f{86`cJO)r2F_k9!rE<3C_ok5>f-?87a6EzyU{ccbr*c^Wd>}#^vVp~FSlT@t5h&YyzeTf;d z05&dOEgXE*hdiTc3hc91dLsw0`zXe%+=Zb`7sBQav#rvUC5!*HCP(~ z9#=tRDMomQaui^=*N3VC@afdDwFWq^Y;f*l5<2}EhEzo4ZGac*ni)`*vAnrDl}52ant<=+m{0kGyJq0j z6{ufVSEB}lj-OldO=&&xWY?BQqx9^TB)eq|@&K;a)A3=>R#)E&SSK2Z8--J@CJ{U_ zZ?9Ww)_j6#@FP3Dc7$zm-U?H@D&L9lYRtgp25V-^u@8ibb41J{!=o4z;?eLje(7a6 z&f^_YmFydx;wkCsP4Ffo82G_Pei9DzQVbeI-aK6rZBGD}v`yi7GG zlvdyQZKACydagF3U3sl-!NLWN$%0uHlrrsYKS(I6B&?%fl(LwqrX&x7bszUMLXK?l z(B`ScraP672Km$~gSwm>BU;|@%5++#|BRer$UKAUNYQXibEYr3atf$tMI-sb31)?5 zytE6yVwMMlMZ6KOY{p4LoImS8ILfIlb0O4t5xJ?g)^m3HsgtEGnVLK8bbT|)ThIVY zE^AUKQ!synF6OUH5$DDqFIzE}g8;MF_!zUn6OSfli{FxkOStOnL>5nGxWyo|lBl>s zUWI+HhK&Aj_}-2s=3a7X%2=|b7_}ZcdOzKfJ0!D>OUysoHMnfiNz*moXn!ClVEPfs zVdsTJ<(Nw^D(&B{6AXp4k8l`9Wpb7jjPY1rtWJH<7} zT%T3is@}fIDR^grfWhP}T+Q&z2T2w%n~b|NO>>&-+REU=V{$fypD>H3x@;(tYQ#<( zZTlNam*U=U0kHIJYX=%Wo!0Z$f5_)dtywRPb)FyNY4>1?m8eRT83#-|`_atlU0Wt$ zC36ug&YZg((1Jt{C@pEQoEfW;OcF}z>a1`|$6Z0~D zCnHQeM0{gy#2~3o%6$d9iW+5Iacjwm(K6P3Dl;}Qcoq@q>CWoZ? z{1lRR%TZrwAe#67|7eP8<+fBusYb3`{l_dA{?Q8D|6&%-4mOr9F8>F&=+XG!n$eBE z;5sl3Yh341F(5oeD;o8x>rj%f6HjQOMRv0YB$4Xa<;SkOI|}Inz2_Deu&n8WNBtZx zkLHP~BBom8KogadnOms}HQDtAc1v~xzKIdpRSzO6yC|z6eo5GEUy2x7lsWQCS5SB$ zZ=Pd|)T&Mu1uYWn0TBL$4#lC%&%Dnrzrk=%q-&-ma9WPT9pH;~FB0-gw#0?hl>EUb zYrO}M(3-rgzDw5uA7ZKDt>w3^Dyk~Ej$@^$KlNP@wq79` z8LKj=iK^vLvrGxHnpx1$fLW;IQ0rpN2Cc^~T@rZPLcQx`J?Eh}4mO z5tdt=;)sNBh--9+Q(%v3wamqH0qQlS0dpH}$k+>UURlapKeM%|XxYe9$m(cvP`Pa& zS;l&e+=vaIN`c8>SXjN<-JgaJD#Ht@XH-})$x zx-fzRJZ=#MQX>#~gTjD#+(v?~+5M?OYRW>mFPW(BP(1a@VCSZ~6*2D?+(!4NMTAm= zC}3+dZtF~fM8QNxbkkTV*#VBOYSkt)(6@BsQ7W*CMMZRsycLvY^+^ETa|P~41<`CQfryO+M)V{TooYo%y&5?qgmTfsN@d%BifGW zmtSeeqEt*EO0}X`jqVDLvuQeqrobCaB~>&5VhtMvJ(&RZo_IK*Vi=*3+`=?2H((^_qUbOxIPGbas(qRY_;?!iNj&;Vys^+dEflqHMWof%l1P9 zA7z50ig}U5H0;0iU53R031l2;pq1X1{se@|(6IvHTNw!NM(A=pFS3W1f_iK(IW1Er zH6tUn$0MtEqw#Fz+oH`ci;pyfW>D85s>oSRely$Cz07ADFQFp3MXb( z2u;L%f1T=#3zEz#N8k#R1Zkqzcz2O|=(DsZWTF69r5)N0r~!2XEWTJqGg7{(!j<|j z6dU?qzcG(UP*PrfbMx3_FS-rRE{%|-V5yChjK=6-TWT3K_>0JJyzsJ(k z&-yTb!J}zax3ei0sGTOz-O`u~fi~mRR29<)L@~UaX5)LOP1lwCZbdHzyE@@jZ$Evc z@@V3u=jQ0qoSpVV1cs-SDp)vNs{%cQFYCm?f)QzKsV(pPz*RQ9K~Ds56cOB|YH zt&(`;?_U8IbcEwh{6K1drrKx5BfhilK_aTh7sZ09T3OtHT^YMqoC2SGIPUkpolTf1 z2W;&5lDOPlWCMp}t?79$hHo;qqow0|DQ))d@r#?Wec5i@t)X@GzXY`D^Vd?t|M*g{ zDBW^g>3hz?(js+owjZLks% z%{TIloTgiYsJlo;pmOb#>Wq3LhqwY+p9*ZvsR^sm@sqz&Ta#3TvAY|Qu9z!*g8Emr z->VPy(ztYxZ+zZt0a0`joPJ{c>t)0SC0e1wUk96MQl!RCQQxzGGyDt19v3{5omYeq-YCY8v_0g#Ic)^2`?aOVp2)yvtr) zuG+E|K^#=Dd*3`ZnEj`4`tML=g1mE}EkErv;~0Lp(O9wgdY0Tf&jBO5pM~5RG;S3} zqqT#zWM7p+_)64{CJjB0*@AhUT)8bElGFPh#|dk_@4V>E>jN;bbUsI)7%)qfarX;k zt}zqm4_1I56I4${eqisA^$Ey@jo_=|D+3qLF&)xxDgZzE=&G^rKK1!`!&(zrj3KDXR!>@~IT*N!foHaVnk zU&-7WkAM)|OH^XZlp9V&mn0_c3Ya5BhuhoH>ZQe}#zapa&KxuOnEI&q6NqK#LYK1aCrxPu+A+(dit*>(ndWn?sD>TOVyM33S}!y>dIXHkP;Z zrs?NL!(G0U0-hb~)gCRZGaN#;zhO_`xqQLCj~yUnDxVa^;J=Xc{zcSOCGmh@+dxju}> z6-vSW&u2J*Yo-o3A6AnB1_0ps&qej$pW*-e5{#YQ{w-pQhPVAP2g0vEe<-R7E3{<+ z%DiPMFXRHz`~aaMoXdskzzmnJelmwSw{OMPtsk?T4SBW=zC*f{How{EYNs!X#`lOkSv*{TsoeOg+iNiHQ!R!3>iod z0d$2d)jPE89;5_8-x83B!y_Sb5~FlKe1|jdBAIiXq?$yTLsn!W5(RyB;!s z#DW~9Fd;!T=}g9|&2ovNl1Do%D#=6$T{Quz%0Mb*BB7+1-l_YdTwAS`94xJ+!fCxc zc55`jHEbg1MtndEW*2WblQ8pcM}&4MX@l zlQ83_f|qafOlAxF;MDgxwrsAA6xedn0#1$a4>wbD9At9oE%X)11~Os zKoB?hf!=0EN+2#)aY7TFoIrR-`kAE_$IPJR)k~Z7bVa_?0>RHh)~>h_7UxtE9 z8xdi~2@Wi5QvuA?%tz`#%GH-uRWz|T1KwwBXeywFrwh+{UWbGz$AWs;bEY^KpsKJ5@IZf?#%9BOJxE|%ZzI^3i~b=0^+z1#ev`0Q*ae-xb%rd}|AvQk9ut%GdenbZQ{Ss6fV_Vuj=@)y)|SP#QF zLp2ZTU=t{X7x)_*dv5Kwy4<0Y93z2&^z}XEyiD4oqim z8uPJrK4B30qtP~+3M9$@nqc%@_KE}kMtVY-TvB=({0zi8kRbb8%WGX?v zaj1BoSf#WhL+5M%GikUP*3rfCkuGODzsg_>v)cd#>y}z7lJhjK zYuWT{eYCr-2epJKsiHvkf*LQkz%*hW&P6+L@n(H5?u|?4<}v1GkV95T#4w3(&78Rj zJB`+HDbHJvb9xYPFyr zYQ_gzD76+lhir=Gl9pX3*+RDeBwZe`ztSYOncQf_=CoQ^43B`VZ9NQ*VnL84`vd;x zU-rKc!H}u0pZ7mB;0^JAM+7GJ#vcF6d!{ODf5ZmU^QjI&2iAxl_u9aV0xT2gh8_(f z;RcUgKoKNR-Dnw)u)##anf$MOOHZl&GcI#E6eGbG6<;DUQMNS7SbC||>Z*dd&G5Ro zpEWVyLR>qw{yHnU4aG{3VsUvuV3M8Q*&v5qAi67X8MKwI$alE+tYfJd)cCHa0U_sq z@G*KR&c1TjSKdd=7 zX*O`bCwUU=tCk_Yl&@7?VE<-s;~T?KQG^v_$gMEL6ZU5G5@}X@0pQdUMKly?HEz05 z0{Gr!mSV-pVIW4s@Fo2%w5%~v!s7Ag5UlHPY7ty9E} zm@8u1a#eeL!*#^ZZ;AG6I)gjl)l9q+WefO`mZH2iZV{KKB1GzGlJV0Bmt`q0x}wOv z69hc;kU6jqE?_52i2G5jOyeM|4`2=RHA-p6ERD!clc9XViJnf53n$! z@1O_~`HghZ`!M`#_Nw)`O7OPj0?!_d_;B9BtZ8u?`B6^O)b2M;J3RZRVQLhA%|D-u zd=Vh|T`MHa86YNH%>IUYb+2I9vf7InfJia~Hr>r)82C<3^>ujs)qMgcs7Q-dLk73!S`R-N_e__|BJ z!`*cH>L$B%`sg}44}*NVFrZEOPG05}qdyrW&~%uhGiZd1XEMmAZKhmpU9WWI-#AbE z_riz%eI65kz|}d>E0T&$J$O2V8E~ImBrI>*#>9Vt{=YFhUYT7ZNhT+ z@%r^gg2IsJ`s3a(}q;CiN;7LRXbxv(&n8|TLgUU zAMQUDq(8=dtbT$9kAk3&Y>gh_Cp#NOB#1S`B$(iW31~9b?X{Hg53?zu=m^hRqyJDh zJ&f=tEWG;^{40FIBM|Fy7SPa;d0}jovNH8F-a-Gne^j_APDCZ8Z1`)eW}<1>xo9Wo{{2aX z%_EB)*aH+lWE~M&+5E!`9)CTlWh%_f^a+{4N4F z-`n;y*0wBIE>h!zm;RqZ&N?irb#3Exmx6RENOzZn3_a2<-AGA;bV_$ggMiXq5+XTt zH#js1LwCrBI}U6=_Bm^2U2Eo#`CadtYu1`4?&r3!`98;BgE2iCM!>Z~+HW$6w-s-% zU8<^!mjbe^St#NT4>@I4t|L!F@Q8SwAELGvHLn%1Q99|i#7%r^aD42>Uf!uuD-v31 z(Aae&Z+Oz*Vb-DEM$0(iDIHRMqa5sbR*l6k=Z!zFO>opX8$qQloByicp#i_iGN3C} z{~A^1oWy4*)u1fbXHSeX{a;uo>PBOY!x z*yMG6q{`)O!fw#m)etPn==8eE*mk4cyOREbwBVFNYo;rl zsYt&z!5DMs{Z>idInoSR5`pslRyU};SzswKI;&UN+G78~Zds&cjyFuTx!4nxFMt6= zeZKbLQn9veK>7_4pBKNns9TC9?xGiSDA51NeMVG8I`32UpfigAvp(BC>=#<~LzDsC z7j-YNGXhRSv|%rz*Dyp68)HY$Yg-}#>q=kpIQTtbr~^}<%~;3$z_$C@ig|Yt|B_8T z$J+m{aGTBR-R{2+F#erHnp^3c{;FmyFG))Qm1t;sK2DLJu3K3yXw3yi;A48{i|PbfvHzwU#- zS5){F1I%3HQ)%2fgXG^TbpNfW@N53u$i~_FR~(?+YCXn+)wE78UeBQ=T31>n39qd1 z`71Of#_dRNtki@ib;NdX0{s4NSX%q$h&N_1#FgFiaGPNKr1P=aC`ZAYn3)f_8*_)m zV`dF%5q4)Iv21B$Af(}nH$8sJ+6u`d(d8S|FGETCJ<0NpLtakd&2*y_)1QniZXZRg zpXz(rjTTQ4J$`Z$uvPt3lpaBGrk^z9qoC^An1|TzF}l~#Byu9<3;L?Elu>%ZkC$bM z$ipDqfGJ8TMT<@1MlzmACk*bGZrm3sRxko19LI0WsENm<4Wr>^!>#7tAG8sPne#2Z zuC?u@Pg2cnQ1KEfqX%{&rLt7VO^_KS{2R{{T9+ zKZ#tfxp2V%>BarP5ly)~KukO(2=PYR@ag7BDT6(J_#9nSxVcttOX9jwG{P(ZMuRy; z9p%erMpus0Vf$pZTtRJ~*t(dOaUx*FhP0$Kk{T4xr9D0cNZPTuZgv;?GJLscArvR^ zu~5PVelsPmrE>B8dSO>TO*!q$)4lSL%wv1eXC~G%Xjjh6Jq7|<^%-nVPDHaG$Pr8} z7sBXXp$wd@+`B-$rOeeb9#6mNzx{{9y6yh`Tb0Gu$mI4189*>=JEl~)*e!+j!QR)={G ziDy@Cq?Li)ii!~_#;>O!J@rhVli9KvuMy5FDNftgL$@GkP0lC31?FqDP(6=6_H%~q zHR!j(=h$$lHXzHkaE67?CiKoyRL!9lBu)cDUX8CG5$nB195veB`9H&`Jb4uE(^cN~NMk-pyNaK^`O64m+3^?KBM>Xqh^=#Z6EsRoWw;t^+voP#9SF{GHz{cdyhRmxln+@ zTf-^SaY!8m5+7Dn%V%$z*ju|n*r$m9h9Omh-cP zU3?qqBbMPfur1OUX82j{t}t}}+bK5C_I{S(p=XKaxX~>n=+<$t`mN*G zkyWW)gr(U;raW1(!THdmuA{>0rQ6WiqbXqDEDtWt>|*Tlheit_%M^dlHu@w@enwOHrI<5&7;yhKQo}JM(M83Xj9ktQt(;z36TUb|$1lGElb|Snth)};U9&$a{ znDjG5o!vC?C4hfsB*@<^Ej+7FuAo;}KcL58*6piN6#w&_a!TmkxE)KkW1Ci`S^~943&Mz8#ZoN40 zgqF?HSvcYEq1){;C2Tl7F_q1Mi`+v_V8QcM$kO?g9SW>ARH8Kn6hO<)_&wi*UoV?rE^5N z(4FYmhE#RWsbc$8`{BPcX+Om`R*>DnUr#47iW3a6)Vth7`=0h9Jt>g&srm3NzHf3&V$san5<^KPIP<|_5rK?_G9U!@Vy$%LOlraG$BI{k+<0WB^JxI2 z?A0OTxPiXw%81mG!{rz0DhnbZAwAb}$^ehlUAHRXwssIpxGdbouB;hp9sz&25vF9| z6*n-Pwp?!8=b1q1P6pk%QL1nt8s<5y;{vl7H46-KWgt!`I5Dwx z`nf0qyhjVn-_BP=9B1IkorY(Cu;s0dCi0VHz28cR%g5=dZ9iEF==uo< z*YGK$qJarfJ6il1T(SiqmVuKv5Uq*?QTmhao0@)kf10f{a9%VI!&rCF8MI@oo(2@Z zMXO#&F-~BRs+3*c%peFStE0sogwwHY&{*SyLrz64&>Sn zEXBY}%OH}`XEp97M36t=<~CN^0TRgS3`w@i*ppodAOyLTl*FRTB#gkkQb~>-o_iaE_`3hBFUYE0J zoN6fiQvOUK&EOa=y-8hfcwd;8BPQ4t7P)yYy}Tb<^DTb1=NB*V5I^QA?9moKZub~J zzEet;%1JDb#Zcya1I!OnK+7A@C57(H*$g?_cC+f9YZG_#%yO}JDc>SWXy=Iuxr|wK zbb52)3s%eu0!NHohf{b>7)#}w%P27i56D|O?@Xw}MHi{vHFO!So&s)yqip;FYlJ$_ zwz+pnvqOU{yFdh+n2C5!%MI+2maa#bPqXYPyY)!Sw2#C!KYY6m$D;gBQlAMzk|@=e z98%w#?ggsNpw5LIWCI5;F&)AuRJyoL!PnMUb z@JzKHp@_Z{eJWa&$=DZfYRJIR$MJhA@~57OT!orZ6aKU_MYD86SS z;KbkoFP6h?l&Gd#@7X#K@=1BNK!k<(dZgJI(UetR6jXTANg%`uN$yl*3mMNq7%5|BeMkq8*iIio#0u+JaA@*(#)UA9Oz{2^o&thV~{*=!fy8QcQ7Ue8DLX#t{BL3$cj zT_4>ONnL~Md8Yo9O0`nbG$ny1H@L_>C9Ar4{GWpAH!G2_%=+4EJ1=_EMCS+g>TY=L z!j9Vy*D-Q3WQP3dOYE5)j9nc5NK1gBv$tuq*Jh0xWGD zBPy!QJKCc%qQ($GCov{jI4oD4LoB;O4rB;*>ABpBlY6sVF|_gJt(t>4Nen%4w-ixYQQqrcE@ zgU5}o^%^clM&h0m*Pt~IUhEv*e~u5#i6o+-SHUhu7vTF^1y`uYqW)D0*5-R$doA%c z^vgJGJeU(C9XG)szPIo8G6{*ekWkkw8Jz6S0I5GidSidr5{E$}sz z8GH5&+%C(|d0ipg#9IJ~wmOy$<%X}`XCfMQAS;bLdt$$A5HtN0`@1Rca=sRY($Dca zW?mBOTLp0~rM9JUMQJpAJx;%hYN#_cUuAa!rZ_1jO zJ|ND4C8vlcaxHhdm2_#p`^ZEpJYqXLo7SyN+#K0v8ar=3p4_M@!C)**Hf~>(vmDE$ z597Z1eEsDpN}S&e*}kI66f$=-%lX9}qbcqAbi__7_ShSgB9(y7xB==W#?YT4y@)PY z-IQn^b-vF=c8j&8;F_MBOy%>ra-K?IAX6p+dKa8y7B*FMTmkgQTWP+M2QT_fjOkSp zF@3kp>b~%&<2Ml+qN=`1K*SC$EGW8$-QC-Hbn>JB1Lhl|$BmSiS$x?*CtJ+@{G{g5 z&PRhFrgkE^7ygXy8H{mE0DiNhd-+M@tpibnaJ_8CZSu~1+fS#wJ>Ij^WMpAtWny76 zGPieNG`BXfVFm(}MZ`puMT{d^F@0IE#7}>sh?C49v0**tQX?Ur8$9tBpK06TZ1-Z0 zeM!*L$lbUb0)qi}1k39h;xV42IZv*q+iyQ4_ZVWU;N{L%&HI_(v%*yNVky@0dHe7y zupzQpYZBJfE4TB6sM=tum_W|zC}X++;#k|77y@lj4GW)Ikp3s&XmW{Qg6Ky9U$ctW zF;G`dRpK`@RPabXCls^XPBhZj_sRmlPB1H%b?5c%iT&Cs{>KTbNdf?}sDqNg{$X?( zM!JE%!AeDzF?O(>T)zyXGy{maLQzs0#K08HjQCk$h-HYqW{7=w1*K<@dH9%N0-c6Y zx*sH4p-4kZ2V6ywlC4#w`NT2N-#aKX4(uyjS%Ud(X&8xZju^K~x|O&?(f*I6IoR0h zS{geUTQWJgIP`)*;c`99!E#cJ;NPL#4g3KUw`EnlTgmCu{{dA5yp&TED;@(?WJ}^8 zfIS3J=L|@3tMyc$!z;w0w?_3vb(1fR);86V5KewlY59sreRDNv{(&oO5hpM6h9uBf z?Unx7cxkt~7+n8Vmb%89OF`M)xeR^5iz$lN8ASfSFG_7B?0Jm{ww>=wLS_cfn?9GH)vJuYdPP(;4(yvb|OjZtZYJHbhIhvqv1JlH&;n*kQwjBmiMfeyLE+;P& zmT$DqY&bqppW&Ki7bE6=Hc+svO#8&)Tiie{?T~e3tB>|tI-JeDvigm`X>z0y+k#H} z)Kk<&3_bW>B_BcJ&y3erX*dZWGWBuh)YDM7^>lab^hBN$OkzLFXM z#yPGBdVFY#(j0WF)%&uj#o3oDjxoPI?MvJ2JqBbT=hyyXwX<8@ns=#?e+O4S{deEw z+grmQTb0kR!@b-7edX1C$U`p5eD?ziPuQRJ@&Al6-3L81 z0d)sjr}`(z{%#)XKIEa{gF8q$-EWZJM-lF$9+EBIp}w*G6ZKCK<9*JrFX-3jZe!s; zwqJGnzqcx%cJ_aA{%#=Lv-Q3Ydq@g+2ZQB!81|Pi@IL4vHQXIYit}O6pW3+loQISM zcbq2z4|D#~CHxNhy*+vd0tr0~`m0%bAM{X*<_;9`@?p^54V(LPzw*SPq1;_sp79g#)( zzZ3rmM1RNq9xmSDdjALRAHn1A*xv)j|6rlXRsQ=b{|q1HW#DgBq)+qP}n_@`~#wr$(CZQHhO>#qH9YoBu-Mx|=JbtTpLx|1kK1B0Ld z002M$C`gH@Y1r5of2J@!515*O0@co(d#!Nf z5G8xgAyd@I%b<&QKRZM~dRu1{se+i2U-P-kEwoU#%Zj8Owvjr4nSa(RG`7&(?@n25 zPONq{cAkdpY|w#;zFo^wX%jM|A{w#QaWG9Ji`~j1Vx?sQWWt#aq8p6~ zc``D4nF>9$y@lYcDa$p3hP4{G!rsX5Zt42@`tUOvpb{c#*!)ITpz2H~6N-Nu}%Q;ndXkroD<=*A}! zRjC%s32EC~r=RjgnY8@SZsDFLA*}?c7yU7WmZT69PPc6+cBbJkQyX-TRh3 zhV8${Mw3A^n75yvtE9cT#kVHvM0dZ%aNIy-Ym~dYpReQ7=cAYFKC^moCG}sHr_)&JIcLM)S1;mRLZl_*`Ee?jILl&aQUA@dghHg$dFPvS5*pc`2Ets$TtU=f)l5vVF;ogoIO!k_w8?iPsRrl zT;A9P&F)Hp$9TSFNwshiOQD2*C{0=6g@j3<4Vz$2PF@}w=g}zEVN&&S9d6$*WbQz} zu>~Rk=G-%jJO;rDhf0}DQC*ZzMp$yv1GnZh%!DA$e4h2=(Pg92f9)-^Hjdamv1mm( zE&B?GpSbg7j0%T2^TE@73hFig9stPaA((dBJB4;Sd{i;P00#mSJy-L^?G7j#W(oF7 z{qDQ>PgM%;+!r{^ggPOt*a&5?3PuY8@^sg0xfo!6o0$eN=~Bim=q0;nIA+PPK*le# z$$hxGiN%3c5z6*H&4gmX9{0`C#m2D7nI8I~vEZ*SnXeFHB20&81+|It(<@MA0dfTq0^jMwFBY_>~;-?pT*4aP6zer z?ckFMNEVRzD%!oobUD5wj*>T|cKr6|f-2|I-}JP;AS(by`XHlY<=Jiv*;#+l}= zK#Hq&ID}60L@n%VyE#KA3tX_{?HK09A?K-X{iz;0ng2}kgXi0~LVId!nml^F@p*)? zR|V8h`GE}52{F1`wybG$9XpU#Thdec_3eUbBqY|l;SdW{5Re)fIhMgoa^i^bA$C23 zMgkz`ZPlBgA;~doRRh^D?8Y6CQye(-P+qB{Llh`{w;f@KQM!bh3F;!qg?0r%MgR!o z^%trZE+cuzxwcY(@DulSvrR3Z(K!NMQ#nZz#$pXaQZCd6QMCFnpxi17XF9zSmuPvt>=x0Gqw#6pzU;I1#s zP?}qnr49Oz13lXW@~NTKs*49=$6OAfpQA!?i|;`3f%e3LWFpbBS_*MHxh(A)_u(k) z`e<}1D)&aeY^@A0d1!1@?KM90!}+S_F8BBSNhbW{YUxbT`vm5g?8#5n|87M_t{`C}Pg2rO{@9J03!b69wRnsY4JPni zQxprjMp_>{8GP^&>Zc9c=-s|O{MlAx0_xDsrZ-Z;cWTmH1jls51IRJDxIHsJ-arSU+zi+64%2us&V?6;^x%Nu z4FFrV&L6qGDe8mW*Tj)N0blsI0@lUTt<>M&EXkumHuzX^w(_~<9b5+|<-yqI>_Z{^ zBg^&+%VI4BD7u)+Y*+&XOd)>^z=Rf$PjTXZ5N@InVJs@$C>BsIVImfh>lRp`_ou}_ zJ$--*{}(8b6Y@&CK>`3A;sO9b{Ray6jwZ$yM$Z2O3mP{60}H>l{eJ-p2qKoQE)Ol{ z@FO<<*-0p1NMq$o@Zf6*<`J7JG!a~hjZLtRdv}RB{v27?9Dsi(@RjO=xcg&`N_e`zQ?JU(_| z(i(Oh7i6-l>bRa+vvX^J$^>2zHa;5}+zRFV^8 znq496+GdQ;X{K8I=x#)N&dg?KcKjeBp57oEH8}OnLea!9DU5oRL;z5)%YC7+zD#fcl<09 z=16kGmJnMGe&xwQu%m{f%#4(LXvaKhCFv8Q%_Ip%Z+v3-w1$GdJgK$ml4UWP2(w;@ zxxw_So%Ac?HWpp+!P@Rb&EzXfb6yj-`!a}l8T*PGEep<`r8 zC!Kcq$tuULfey?OKBuc-7iJ3iK`yH zU_l&9>VN7Me+1fdr0o2^$ON^pAY|*k4MybrVb{rNGHQW|LL+kYzx@`I+y&jN&m@|9f zA$O7O)z4~iXfYo)2%vuA6Q68VRM$*ukBig5DcXENu~k%iP03~+H6ZF(25W((su^q= z-{uz7y{Mir^eb-rC7_(7cTCBQ!~Q8Q4#4=LCyvzXO-V2EReHG^H=3{4ASMw)u3HUx z))vNV-$;${q;gL5D>b&-*pY%ZLFJ<61cCS<5)wWDg^7*pqh7tilKW>@{sOfve(+Mw z5FNEAR{WdJ>(gcxq?n}kGZ0Uh!!2_Bxo{(9=>`&IJ7jcWGbCSM$bqO$@%Vu0?5>l& zawI?yZiO6LE*ihNRY-ZgZgd&hL`+tUpL|nJuE4-l)==#Q!Js_J9p^8^`;l;7*1LI_ z)@g88QVA&)&t{kbq)2#z-q(%*cmi7r60;;X!x$EcY;?AQ$3wdJ06ib!>F{vU85iN@ z@RF3tiIwP1W2}@wmYg(TmgZcY5At9|$A#9{5N~zfsmgrRP}jL^Grm<%2uT)mpCit7Ur5zxhXxsZ3uiUOf&T(GEmQc(0*JT;OmcxO3bm zEx3(W&8r58(>+sU7d(vm_X=|U+WqgE(W}k~yaglU5K9zmKSNA?b1<2LQ3bu|XrhOp z1{2VXQE9QF^s^W-U%EyECF1D~FxnMv8>tT0&=I0U-a0dzaq&O8Y0<`}a$ZZP8tRaP z)&~&>Qu7%qiy$GgTiJ8$z5ZEmw!ZACl8^U(iKmf_m_!1zMX>?DC z9>mGNLl6TZt%kj)qAAeD$harkN;I~j0{eslbSSPb7oW5E%V_Q43n=|IyzLfa zv~me&v;a%S<@`%ed_x@Mwk*g*5>1nBh$lKd{9WBV+<&y4?k`(e=Z8ZlV^(&8=<@0@ z?iClCxZxx+tM2EXuucW)rL)L0#S~O5cO*kVy_{)$HvMJgRDU#MNZlTO4-b8Pz8A9( zFPRdh>O_xpelBt*%+u@NB;@Q2KPzQckHkWil4Gakr`T3{be`fxDNV^c&|1GMs6~SS zie~DB9Ld7`h%mO-mk}Zb!O75_>_dlMmfk28=}c>nrc1Aj6G5#-_|Bt=n`B&L|Cl*^ zi#2w8xxC(Zt5{Mss+I9et3^g%H48Z9#d`8Z$kR2C8LK~F4aNJa2^^L>5}+wV@Hgjv z&yk;yd`NSmBkZv?-@_GkTHA=h(df%fGU9XX-W^_tINW&tp1+G%cA5IZRDN^5+}6y% zmHG7Fzu%zyGPEs~PtEj-LlA631G^AW z7i-#9vO<3Cs`<1xxxUe*w6_rC;ZS($&vP8%vbGXJsuES49*`!sE@mc(^1M+gu$0=2^aEAZtCs7c!h$hT%DT)MO zi|SNbWKJFx{Q>{qc;>jq!IT0D0H6#H0090Uo;jO1{STYPsLj}JaUk?!-*Y2$Dg#-! z%oxA}HVd>(0D&f2D;U2afq>A)Sl5#NBds8@jd$B46qQWAF1`{bf;-FGg9%eap+!gk z*KFGHPL&mOxRpcbkQ?75Z)SES764{2K9lB0*OyfHkM+XQ;pZk+`vE67v}2wXL(G=i zRI0+VhiFeE7l{mNa57OX8*{##pi#_YN5TX`X)BbnY6AIpyQY}jAxrC|8z-@a<=RPfg35DAIjt3c7=B`LPFb8cq^dQNF)tLYW zSrO*>XyuPt=`4KFov2Ma>Pxm@>$d4TCuiZWv;)AY~`c68ha5`Lb;gV*a8ZxBn8oATVgMsPZYyXI6;t$~Ky zttzWFBc(=d{Xj-<>2t|;WY)*s27Rcm-qHLg*+uB=0$daT0=g+`+L>6QY=yqyZP z?kxoih-VM$GI^Pe>d^(ac?e}$N3wf+m<@miswk8w===S7kB@O^g6Qg#u@}#J^car@ z2DkIM@gHIF9jcAUK&+Ka9f`Yu^bk@cwevQ$H6x0wL2^9ddz{&b>63mXRbqMYCc3Q+ zo*zKHnOqA{!>G(yILfrb<1J@n$YWP11dCwx#ym^~pF zcoUV}#Xv=`ePrdz9QC0i0Pm2;oVT)ga#7U{?+#5}O`WZEWh-HoSgj5>R62>a3C7vg z_4qxOy4o_`Q%ID0TJsL~22wv%;`q;94~8Xn9@3Y$l3 z#VnTL!(Vmu>n?v|;(Zmhx@&?vT(}*O9!shzD4t*(nswuv41;~^oW8B#V2#`85ctoH zU+Fy50ME9xB8m;I4%C3$m@m=T|#ghSyU&AYQp7ezQo^z_#nHAjsyt2RZW`ES0dcT zDj>bQcWBW6{-0p&=0}Tf@L#aL|1VhoSHuH2I~rKn{%74i8d`SS?Wn%&`+oxF1_DXA z)UgXUL4c^TO_J>XHy{yPg$ytu#15wIsUozZ8yR39cf4ou$rKi&S+W72gbCwLM{c#t zB8vaT<)%t*C+}w|R&H8$t)WBjf!Vp)IeBbaO+{?ipU1p9{(3bj_KAmul`U*nN+-oO z!%1gS7_&QR+f^yGo5Y@)r{IT9eea&sl4G%E0s^6uM$2Tj=quSFRw+klPy|d3Sm;V# zK20l?0pUgaCKFqqkpY(2;N~c;677$jw|h_TWoXKUVa?U7;tkA_C)y)gt4ZIy)$C$S zo5ZI^?T7E)m&KzaVlcn7Kr51O05-h}O`w;>TkQ&=aD_sfy zH~$A6X0`-GrkYJsgt<*(Yre8;+(tU;m0XD z9-H*J2YTcBJ^l};KZJZ|g^bgdovNoXmzRevt5SO2QO|adp5=DqK`|%XOsK z1~fk1_g#?v9+Y?@m|lf#Yj(S|@jver^d4J{YC2ZRsG)K=y)@ea6h{vSq*D<=8m<(` z)hF--nUx|H1cdvXccRVLib9i5h4JaQ-Nsk7vo}R7T_CiC9iB^EI`40kEQiR+WXrAX z`s`YQF2oA_=Z1E9eMXgtnMe&cCc=tam4+mE)@U33nDBAZR3g$~VawjcOe^vA&zdWS z50vo%e)xhgM+J~#jKFNIG2Qh&pJR?>iD9YcNWWq|BjxBW&(45 zLkco0QsG5@$DS4Z*T?$gssQQG3};I7+X}VmH`s;n*Vn<1-a7S>tb-Q_00wK~I~*25 z<1lJG$MSPA=@^S71@n2 zJB<~kh-Iz`UbGx-(2_;tbSFKr;BM!IKUP73YV0L$gn zw^%uvNx(amIcoO%ePG#<`NPJ34cN;1DVG0{uD4_2A_=HXbZZa-Fx8ohV8kAFFdaui zPBAFOFm)I63jEP;Yb#8X#TZQHUxm^(yl#XP^dYPVawwmR zc&FB2dO_Y+q4uFGxNOyzCkARnjN!CqCgRpfp@+6+cfAx=Wwz4qv!D+(YmW8wlSbhc zfy(_}^+&rrmJb*hQThYF$Z%Lxu)OS&e$^<&PtRj6VedrLE2(p@CiH|F3@=mae#%-fZi3S0ihzeNL1wTnY*`nWXkK0e=i=?EQl^TAKnEUX#EA z31h6cqz5#n)wL%JWh31V03dA#xyac73JF83m39dBLNN2=(xcfZT6~KyL*ufe%9-91 zSn@%ag-?(M8Xu$da96`Er~QFO!viK`O^^+^N_6LPnf-4|4q)EK$L4%Cc(r32z+^B)Ks$ltOhIzyrI|MHEU8>obc8Cgx zt%o5lln-eT2z23fl>xIMUzuoVj$!~d`n-@-M;5Jsc1&D>g_(*@V{|cDR72IkZHh>3 z_gtK?8sZnc5M;C$4*81r=AFsr0tq9C=@#Pf{CRm3ta7Fn@xP`v(SS;d7K*=scK|oJ z`G?4wi=Q&+6TsdHOSD0eiq_i2BlAJ6pQddwiQOw{6e*r<`>guDDbp?K&?wu@jm4kU-uQiRXa$E<;mFO#&Uko#NIz~|VWzL1%qO@2bnW2&K5Osj!bG2Z zWfBqh$I0>KvC%u`2yP$^#Bp5TC(imUE@PLToP3Y*-_bmCria*D{eJ-(=zjzBhXR6H z`qgK5&wu#E4hR5%^xtS}=VE5yOs}VBVQb;6r}rO_>BI;|4Kg5z-F=`6%R#Y9+~E~_ zBD795hNqc83UgNkuqY)~?Dw_Zsw$kZ#l1Z=uUd{dpr}09%l2|Z(`1q+Uo~pOMlgv6 z%>Abb7PV}k{L?-deotZgnXN(+p%*CfX!#5Xu7T0N0n~XGJWgnXg7-!H5Km@~7WDq7 z--7CXy+9c!=PvT2>TZxG?ogPG7RoenZQ&Mwwy;NGGWTex0bal6->ML8deicDEv zQ>VMB--8-U7}SfbiOC}cm&Kcx4Ss-Zx;n+>cPubPtEkM-A;TJy zCazmBO&d6i#~Q*RvQ@#hdGV5MG7`fuxo=>*{$ZTO>cnz6@)29 zsy5k+^-=d<{o(On{ek$uvu9)C>}X-+^q-y*HQBf=HiRDf`o92F&v>l5NEBWavFLmV zM~HZuKvTO`!g(ZGX6ghgNmhrpk2@dXghXpQHdssjH6qTZQ($--y;=-V-sx~{_p5CAPOhA=jS<`0M>O`?*xh&!(uWLEeY($_| zWJk@`9JW{oCAV$${+k$EjV*RLSFyA~%?eAj;$5qqXeu%&w{2Xk4(yP681fQZtagHy z#)oDkFSvWDR62+bBX;HoRZVdVNa>Zfodky|W%fOrZJ+tlv2tpvi%2K{?UV)1H?x&d zC(qmh^QQ&N6Lw|d6pHBb4Je0`5?gL#X4VPQCdsAmvNVm)#}W7}*-)2M<2AMVMb__k z)D)LY{;Vo%$&m-0Csb^1Md=m~e~~@=rVpg$w5%9~73;q|{3c_;NqiXW>sg-4>yDKHk|*FlJKfI?SJCR!b=S-3H|qQRPC z28pj{cW01}+a7pjf(~YFjpxE~3x1tAwALt0!+!fy4rn)#;h_0b#_5A&?D9h=O0MO$ z)i!-;Rf;!0h}jcIT@_Gx zAPXU$fKgzj2-F2^ZA95NAt;*mY0pd9T!3_4XGbUKd@1!dDO=)VWjdn3J6C(;U>lDu z)S297|n86o8%<xO|C}Kz<4Afs8F%KR+ zFj8C0f_79qInW_e`8^ddJj^hPuH9&^7iCkMTum`12WAAk-UJ$>j7Bv@&t9O>N@bXU zH0un+0@}}?z&~NAvMzJC)@$dFzUfyWkw{HV_Cd&Q81uk9{XKCX8O%W=fZhPQ4XhF- z@7TunHTM)OkKp?g2sx5iD6Bb9C@BmCmCn4dos6LdjBsU)%*3n?YK3R}f{lujzlzd_ zVO)FCDvCu(H|op}L2q;$mLtBLLyrv!CE0GcVp>@JbIYR0fVuoY0u~y3JbR61Rp;9s zov3;XUYoV@&?Jj1X4xAHaZxzXw?AY9M)bh1{iMRXo_7-Aq6}L3PBOn`uRJNMCC1z^S zECIrVgxM>5;hx!N4ud(TKB4Ws?L`9g*isns%EjdVO-ObZ43{}6?>!uJ>w4W6KFH0S z-Bvc9iv0$g0-o4=Ici+EM0`PlY}>W*!-WdnsG@=+=_sO8B~+2d9)o>P7R;P-p3u*D z0K5tB5CchYDkmE(1*UM$t?B1?#ei=2u>}lf!`MK>xOd)5dOwXG z&E{NGsmE|k$t7;#S-p4V#|+A=j%-L=!hPKToxfP%@+49@4xB-!G7|AGU>5`DHAr%E zw|)9-2uGSiw0!;fuHAlh<5~OWttsijJ4kmz{20gNO3Pa&7CFj`c9#W)lUF%_9EPv!50%l4haR)?|WQ z>dRr|=guxvkBD;d^>lM+2-#CZ8VfFr`Pja4FZD`RypV_N z% zGSIk6MI%K+9FrhjzwE?ytr@o>^QE(T?bA2Q1>0w-ek@Maripat2hKm-+gq{GR?2tR z#YD+$CHcE06d#Gnt}^oUJ(Ti3u(q-HGLq?zWFC&IVzLCteeP?yXZ6R`O`b{YGAG#Y z?^I}@cf-`GQt&Cu>8FdjZR?=a;wi=_ZJAYEGY*fus&{roAK}m?e=W}~6L(&v(FRJh zA$TX%WwS+NCjP8hMbAf$^A=u?OGwQZnLq-CIb}DX&_R~u5WJhRz<09vgPC0 zWvO@4GgSrM?!e)?fI1*TY^j%;TJ})uczMpJ?TT9ucvd2R&=v&va@d~)brgY9D5BE# z(`A*kE7YZ{b%N4pRE9Nob07E)mEz7C;W)Wp^CHwMJ1-9ZL}e5M{U*)@PNW%e8ZW=J z&YTC`+VbOaP!B1XhFJ`HaruaHz(kT`B z!(y}@!UMZs1$w%F&Cz8p0X-jGfCf%1R9I;IZj&|X_()}ypaSJ=4~GTf8w_vYoBTak zq|Ic(rqp=oB05Y}L#6hr*-{{WLucaj*bW=Yr;Xw4geKK|GR0a(N^F%jgdL!H!af+( z5-0>wOpT>2F~08sTTZwc05s9={Vtt_n&0T5Pzb~#WoOA%Zue!2^cj@)9r)CuDiem| z(pq|{R94Xf5UPF1G!^F^=F~qp)q9#&XGpVxaSx+dy;!uv zMX!H0!PZ=rfnQ83(R;1aScjW*%1ixPY5f*yJCbhF%2O2WSVu`|gNscS{K~EV_DJN3 zz7;ba#u}JpCpLYw9L5slmRTvttPc{{Ffq&N`h+hsZ@Q`e1pU~{}x@KS%MJ$Sb$=ocG&3IaYRS# z1Y`#2CZ=;$9_0FBmbG}%hVp{(H%G<@B*nAM-hzMsJy%v@T^R_1yZZQsdwk|v{zN~) zIxE404{rt85(9*qLc;O9as$=w5>VNsVI$2G-Y@$GFvV+rw}n-@a@3l}i1vz`{&4OK z1TxsD-*9{jj{nwK;6Z@yJ<|x4#{KaGC;EA=*ibjGr~3ZMLO&H`tn2r@;>akFZQ~$7 z3=fCVk}dFxTULwca15&>dz$UFxMovn%*2HBRe)f-tIWNljBhu9N;8ftPXld$hAuF0 zat}W$nj z_;mh|R((G)1MTTxhWvY9w$5nJKmySf6N{;<YXpK$ zwCnU067awM>SbAvr~n z+_c=;2B_l{b!Z-`(T1WtdJf@699M){Pxnz*7J4<(H`f-UlJyw;KZ6ciaNp7%R z>TNC_j)a2x1tr9P;r>^()S!O_gp;(Sn2!3}Y^VML5P^p6AORhuIc6`g7{s>t{bl{< z1#0_rhobZg)ND)6V;N$>a7@)BwLNZ{L~<3cI4!K_x$yixb{6=aX+h*7zP~49tu4c( zox6@@1zXKCv<$%tAcv<?J4H1|TgPA~?E~4n6MNohIz`04|jcE_YdG$;gMD+l!T%A?_0f>QZRpS`q2b%fNy!hBW-k2i1{f)VlZLC zN9hog0v$%J0Q$x<>iYK9W*@4fOg5PBM6+N;)>jJPfbApEIIgnKaRLkiP~2gG)JkW` z{zpwQ{Qw0j`!m9Y z5jm60rRG8aayUB4k6kP=$Sg7e@@2+>wZIatfqgDj$N1G1uu+pmfj_+IBqLsmlZ@-; z0to}wrU_kPL(gW(-)=wh)O$hcG* zRA+2$IXngw{t5wppqP>B1EDE_qBH85F7-_Z&F}^-XvxvrT5~)$KvNbCcBr27+y1}W zk>9{zk~Vy4}V4;}h)&HCNv!+l60GVS)O^&*X231%DpR`iBbA*Php92c3Z%2?<~ zdEn*MYW1XpzgBdCLz>9!eB65nwJ9K1D2oQ-uO-Gn*cZbI3jy0x>8W97x;Qw$+f#@c z2!x6)GU`OvJwu%F{cvv~>$zh!!XI#R>LMg+=#V{gng~|xQfis#Ws)2&Kw7QF68MR& zWth50vv0u{E_&c^4tp=CVrvE;Hl4O(r)6bU@dACps9n6Q)m=(U0NgsNGmp=w7ED>B zdU=g(ZF`ZL6pYyNF1+x4vaWA>y3?j}^80yqgU-ApICsWRG$#3{FX`l>xwlXAh%>Z<=|ft`<>IG%KGXh zGg5rXtQh{MRB)z*k%1D*gT014k>#?K+GmEG4tlcb_`Mdk1|bK^ZN3sLV zKv`;ivi9^=QnWq}P1zZ?60On2p zX9qX6L$=EW+G&iY)Mq8xm&zO~s-{Q4@bTV!WKAjDmEbHYgYjTkdX3;*d2m)>u$>C8 zvt&}}^aJaOnPOfrRU8B1wHD<0%O0|b-+-K)N&T9bIAUPuyaUd%L6UPPI8WV9!b1@i zkPLkrGvu8!#(d3gt*a5dj{eQ~?fbL#0KV+Yo0b&3EeAcnH$WaGkg(aLY0B#*kj4Z| z$+0kptc~lg3b0gG-s;fT8eWyhUyItoYDcWt>4jr#ZtGs&LsVLWvmQL(f+xi99PiQa z{j^!WWYEDlA(A%*Gs-Te+p5<1MjBxtJZO)Gchl%Fy5an`vf1+I(05CU*;~6r*k5-f z_n8X&$!nZ-i@Gs}Q!`TS75c`fYtrdqMT<8s9>Ap=0(lj%{r)6k56GGB!214%1VZ%$ z2cmpLkmfLZsY5kVec||NPC*EXBoisJvgIvfqTHp{(nyt+X{Jh!Xj-I>fby~6p4~Le z^om`eaHunz3Ky#mCR%uAmUg_!yoLJT;#B!s8GtxULPTCfz}~kWu@4>*ao7o6;?jF^ z4-n4k(f~<0o&fGhM~_M8@p*6`&MEQi+qLJ63(Ks9agCuS5L5tV1#)-cCI0ZTXz*C~GwIxBCc zdPe_d)_Zauy}Qo#UYtXNJUJ>K@?!e=NcG(wfm8j6J)yY$m%{iwi4R-g&mY>gbJvlS zrQ(c=B0(Ymt?%*9C({#_hOf%~XT(#Yrf$8twr&*{u4&vcBbSrhrQA2%RT5-ffshIq-2Ny7RKmE7`DL>~e!;YjI@?nv&553p# z92CInb@={H=ae4L`m*tPSc@X-U$b)oQd?&t96E%I?RmJ+fg=G^UA}3yp|Z z8pHthFEC*^2sVc(7bb=sfvS)A!+G36Ai^AezxxFVjA87LIT^r$A6rH;h?TnKbG3sE zYLwXj1p@Ey|n^9wPg!Oc<^?EW3s)izu74k;iAX^{2CNOU4F>pZ$O zpqCKfY7=UhkoPDQRk?eXdtPHihugCCJrdxhdnUD*gf46;G(704hEy^rItTnSlB0aK zg!c&zLF0?PH5nw#Ud^yPqxhuPk5)qKh&#IHxnpxDD6WX))KHNPz5Jr#9O}5+kZg$l zA>7m=7PqL}D{@&btj`I-yfE&HLf!|Kr9%Uouw}jvaCO&>aagJ5uR$>2=?CsC3IPs( z9fkNu(b06&hK{)sf3ZNO)230dPF4(Ds#;RQEXAbVA6q(z6%DP`;}NrnZ3Tx1h#H^1 zaKKL|KTTO)5^t|#%(7obYwE5{d*B2tl5_WIu53#znrN;=Hh-bQVcF05l%f{#phhp| z{alBr5y0P#oLs(~QfNhB?0?*{ZN42C)KtnNZXC^nw?b7%aywPB8aA|u2@d#dzuY|J zNTh9-Db8}AxCExEZagS;0`-&}C4L5da)kj(-b_cQ zn9CSR#V|G?qVN>_BCV~L=BRUUO_j5OnuOfgymFREiutU7uIQqHG~ehRdn)xPB3ce; zd5&`%`6%}4`}+0bTW-h9GD0kx@T8OApf3e5H^IHxrZHmlp1;+f2UvwMB&nbJJ1qG_ zHwV>8>-3MM0|JATs#!E;LiSh?j>aUF5dGO+FxA#5dl2@$SDzszSq%x|6Oeb@>}UslhG=fd+Di4!luB)?tR>SHnE9v-RwOE?Tj9?FX)z zIaO;&pJ6!B<&>QKJ`7?lLg{vU5r-%U$Ul!By_Vzsv+D6So|&!{HNef5aJ#D^3al0j zUH&0CLom^|ilnW+_xrG1Uj&B!S z;fk$1XxW>914nm=rW76oB)VUD11=r;M|QJgefyD$>WL5O;fD(Frl@hDLI+lZ%P%Yo zK|%^`O3Ak73}!N{ppOfYpeU{Z{G>!!gXv~2z?kDSp!I{jvsI=|??2=$uRMoguKu09 zg2SN-CI!_DZyb%{F(XCp8K}F?MHG55BY{3QI5M8u^;+#WS@_bapvB3=z_8+Gm9pRE z)-X|4zp+}0O?Sb5T|3Wz0SmrOXK%>fzcTU%*^*avxE13y!G}81H zgn)I{naxW+lSPj;eX$7-d)_}1SPX63Ri+bZgx{;K)8djVw7zFo;S(&GSdHLDfN_R; z4zI#DJG$dDfY-Y14Pi7I-Nj|%uGWoPt|NaIenoBqwG(-2EG|178ap5($Q;k)QR7)^ z261ZhLJo{j^NU#1o!>Vd%CkT8vBxRf+h8GWw{?3nDVwv&cLncdG$j zQ`ytE@Xn?~>zZ?PujpL!a%bGQQaf%+rz{5+#q|BoJGcHyU)(LXNwpD{o@=5^k2F>@ z3n|i2!Pf(s4Az$OrAb0Hcl=HqEs`3&k{Bw^Wz3FN#~XnN#^6gKs%%bKZS0 zAO}a(+99vGbpM>wW-){OO-RJtEo_L@#7q@K37+{iJsLCxn+Zm%6*l^U1@@EJV>O>aG*+ztzToJtOZ+YjDhTWhO#>@Y6X zyYXtUwqh_AVc{gfSQs1m#Q%tT6aB=c)Vl3pK z?id`!Kq^~puggffVK>|B&QD0 zCZdXsb@fZ{E}9<7AjT}E!$wfKq9eLq1Fa(uYpK4cRTOJgO_R`%d7J}fvj2O}%cIN* zwgNZ4WD(kQMgYTzZmdZduO<+MrjQ%>tUJiimr-QzzCd)%@uJ>&CvfBC6i%ts-;$BXfKeHE)N9k+fDl>@42r$tVliMdv&c{Kp$Vjl;$6&XW#}JV-Mr=&;8x$e~tQ1ybM$W9E)bF46dd*G!LlLz|78W+0XN4qy!hx+t+kiF!f~0e!U1$ zf+yqr2Uv98AXGmXF5_XJ;`ElJAx0eS5{q;L$WFkf@#Qrq0fmcILn1lo6f}=S{Z7K6A1kV5)N3N^on^)!P#nRi9qA za}%;U%dLrNaw;lOK{nf{qr`;kzxOX~{$0=kAOccqw~XbEHc;`~BSsDMG)tFHc>M2- z^nB?PZ=nWyS553XB3Hgxk*<1nRfOKDBY@G3sL?BzB(g*U)qD5FWAU?eNl~hAtudU@ zkErH%tg=RVsy-vF>rv@4Iu6e#%b9j&mpB`mNrq#G;&u98uLsxR=r9FN%j1*3?tf|u z51Qv&=CQSf?R-R%bN{w*(W+^$;em6a$tle`iYMh-oo14`@H1EC@S%9spQ9&Zz2*3I zH)S6)#gRrK!AT`Y)L~Ce8*11aGJ5sYiya%~L=;oJ$0{a<-nmDWSPgsT0#5nMb5WHj zlEv-1H`e++AqR~LGFl|bxE!C)iX&^i2bz;;BBY>n%xs`zfw?k(6A#l{-LnBthR0>}#<5Rq>b~W9)?}Q5U*Rst(Czw{zA6$7&vUH>wPHvxT#lrG=ka`OkTCLyXyO1AsY0 z4u7C*jb{Vld?9XtZs@U>=BCt+4y39d05)X0%VAw(Y-=0&pWwBO@L&TNIKeG4fOH5n zK4x&0QD@;ytACEzX9Ph)6CT1As43kN#Tf8X7z`R3@O~d&r)i#@Od(EV-Nz?#jgVow zG^_5syJtPnh54LcSboqdL@)}kBY>df{pa8mm=wJ+kEr@BZUACo*`|KpV*%yuSUKo? z_d$Gj9}_(xrRPABgSoa9*;W+0gC>4?ChkN)jD6~x<4vhn5KYXVQvy>m8CNsCxSdOb zjv1kbIH)dI7U;~_lWjv*8umNkbV6}wLvra3aK#!SnL1KthtVqRL>Sg2W`}e#n@oS6 zqZ&?gW#~dEdTaLa`Bc71KIuN;vds5qj9Bpdkv~HM-!I^O^xeRQ`f`1;{Jvmge)My2XE8p*8*>JQY=^F=l zCSl2y7(!Fkf=Md{)mX`lb4{v6!7l6+3;5EZmq&33X{Q1UQyB(r(H;As zL1F<#N!!;z=5<@iCULz%E?}YpRGWCF1;NAE8TQYPFGm_qKzgj?z+e?@9jz*InShCp zM^MZ`gaHd`-%Wo9n5&Ah*8spo>ePgkRO*1v=!`g>kY*|%6i}|)uzQ@%4%8`I+cAD1 z%uO4ict#?@(A4fQrB``PRf}fr3IGI!W#;kp-Rzy|B&#I^`^dsf(zyQg=QZ}~8`%HPnYi#lvNd?Gr27Wepo=QS7oOt&k8%+5 zh>g)A3;PH5-9-ayYntI-+dE}xYFsv7yHvFWD^1AzATYL?!ow1Tw`mi(=0THyfCbu9 zfEIkwkmXhtn(93bV~m)81;ST3Sg}Tl_Mr_d$*vUBWW^}`pjCV4bm8JbioHRBLoeI} ziRVl88)QES8Wde*hU1Xfy~Hc5`Z13t@$CCExf5`!%{*%?FncPk@E$$*nN4rF;2OA! zU#UZ!Zep6@?Th~aNNLSxbjwk-vDFFW9-$_BYu(B?B=FTr5Gbn#255i;r=ar!5eJH+ zh{Oui0J%Yvq03`I@HfYGAQ|Wu|L&nlYEd0u>`)x4VB-!lnCjU>7HVW}71b%T#Lplj zG|Lt%@s{a?CslX>LNH+)VonzaK6mDrwWzURe#`pV41%OH=z@g70)FJ zKqm8&7u0q)0MeAe0!$Z`pTt_^^-(t`NI53jvP0j?Q^pBb@CjjU{ZeFQ|82Q7V4l~&&*&12U^!rPIjG|GJIX)M*$t63=mf5 zC6YfmDmtt~X6epF_eeVG*jdiC^!GaPw5)sU#15U1oS7eI@c8(fLkQZd^QTwqL%{U# z;hr7PWmADBc=LLa?v?D`j~7GX>h}J6-TYr~qI33h?rGN&1YxyHl|u*5FfJ<#SKKxj zjn7DN!F_9wuvuDptPt}wnf+xe*{K$ZMF!hLh~HNli6wEa#S;eFiHQsoW2QLn?d>e0 zZ2EXZe-Fi2=)XaNV^I1<1ASo0n=C{Q7r&(OsOQvYKB_w>8q z<`yE+VN2?TAGELuRDjMJKq~ zEeukBNp*~l3M|oW3wwFCl4is&5YJtpD_wxXo1|!nw%=DhR%-UGzT8fzFingqYjAcR ze`a+>ax!5OcC>hrKof#>amQD54bY8JT`7}D3P0}4#+t#HyjU(zlm5X?y5dEl(HPvV zcD_i@kZ%n>EDpfl+Zib}4<@RjntK9L#1pfT+FUG($<%i3-A8N~PNiDSa4J$Qg48ye zoz>{>0%D_*o}pTY+B@%r_=1w`W2R4S1u(8uNdzCB8%L6b#X?x1@?z8|OYRq?Q!p$- z2{LFsiQpojzEva>$`5}=lpfTjmp|jhXGI|H-*QcnOFlnVHs5AVm~b^!JD z%)>PS291Nk?}CwB>obX0<{pum3+m){0uc&xY!|;qnz=s#le+Ol$-KD$sQuWJo%*xd z=G(2`pQ5XBvjDGboR`YX5U_ref>9V2RUZr=8#El^>=#jeK;g!-_@5E*0Jad0~x2naU5A(NA*z{UaFNvHw7h+el)Rbt|{ zB@Af0YuepwQdt2tC@nSFuIyW3U~t(VReQx0|3#n+ zES8yCMyJQm&CohdHftSfL>v^@)HYO;;9A&`?BH@g|A0`w*$G^YAxCSNVmEQ$h9R08 z3wirEbz#&2+9K}rtY=wjS!c$wspaL7+hCe!9+kQAYJ=Y{fNi%8D4IRn3r;auhiArB z2Nf8DL>oOTtW#$A5uBzT!8iq|2R=Q=P90%FP`ZZ7UsdWc1121$XJmcxO%($vBz2$h z%9gVW8c}Anb=@r|CdelG{Lf-QX!WnVLrCuXpsxG-`@5a=QXx%_uyC-r4U0~gN$1Y_ zT4EFi(F_dWq_hR5FPV4x>N;v#tt|T3+Ff=Dd?*447~RQLpbY;!lO;YJAXSQa(4uNO zSTyM;F7j9JC)2boy}7Tz-Lb*UC) zt9-n{bYi#Xn{xN7F`uFJ)`V|E*>1xDBq|mlU>Ww5k0Z*Q9-o(+tLv^NbYzdCo zD2|~;SmYJLYr(<-D{hpss4|r9xXQ7;*^|Rfs7Zw=GLhGLBarMzy}p|g2mwqSLCrEv zQNW`1{a-{TTwp0%H&z=9E3wvmTLT?a)QAwv^in`w+ zK{P`&&ar1hY(&`|6Y6VbgU9`aMGCA!s0k=}&z?yQ6}|gHF`59*%z|D#0ru%^?g}k% zeAoKdDfG(BwvmpNGiyFKAy;-#MGwV|TAMpm62x*Wqoy)2=UPOlF+)EG?07?;xtsU8 zf=@;AWD@)2o~myS&cJnU#@20p1UP4oc(a_@V;YX-IF(2 z4W-6~6Mea?ovjg1-$2ue8u}lt08UI>Xx5Q8prMD|Ao!$?V>WTSctEsy-ADE_Jn~XR zu3ok7CchhVc%P_%T?_%$>F;c$(+%{~1KhS~cEZ^+b=jIryoq-xtCL%SZI#mWm|tP< zAvbXd?*|@)cbJ%S^*o}sn&Zt1KN)Fiisj#$KXNOxnH+8xVwR&h?$F3dgB4ya?SJ`d zJelv-bC%`{y}pu9mu@B$`LpGXyf)xJ^M?R(hx+@&1Ak(f0TUUiPqz>+JFwiX!8pV2 z!*V!$D5HN60%9%$cf|Jq3|`%2lH|kak0B+s+_D4zxl`?eB2XD`MS$(ro*^DtQBMzcQTFLY^tJR1!L+8ZRt<2hq!=k zYT)UbnZj{IFt;HDi=&>jI1cx&4!x8dajS3TmiQYuV!iuQYVY>AazDJ7h%8HO^ju#k zs?OL%P;-4j47dS5K;b-Hg7`%4?N`}z7lUwqNu`uPaD^YE^UDUX8x z>;n1NPI03|+#Wuo{ddR$);eWb|D(ED+3n)$`dR}e!NL<4O; za@E#U`KpX(i*B+u70$j`Cik$LeBAN*{rmZ!ZR!7HVPsi0c2Ry=uJFGm-+w@_|2N6> zn=7|-HnVrK{ZAa`T0`4@lMUtngv&kXoeHetl#R6+cuOKTF~>_Kx~~U*(}+bjMJW}D zD#-S0{=1wdBvZfPv@<&aX^=S4ypOk8@`)b3x=qgXH?8~QNd)?fWNI@CPx=bU5~&t> zGEEeOtdS5hNc0@6vS7RN&CJ?%GA%SJBa$RWVh2i(VWt@)B@;&F;TnvNESYNCB#AY0 zo}pm7Qzs|B-d=3v`pHSzq7oL8lPFQKD%M2EE4wX>5scRx!};9=$t>u!#%YT|8GiBHZ(wSRw}Vis%_A`P$i?*t)T$w~^QD^||z=x8>KXjosbeY(0akZMkVA z82j%6i#+CIOi|`o-t&AnZV)sm61=)PM@lm&(9#+cUhPbVDd4I~C70NTFU>i-mzxE7 zVOqyLV!;+NV^UU>kzkA<$f#WtBi?P;!DpbpoZ}7+t2kQVYJ#!GC(-el*NA{EoHHBs8ctj@rwZR-y69h$b0aP)e5!AS( z8!N|!2~vzr)P{Z~leuC5*R&}g=B%;>{KbM+GxlW;yx~AupE%1iC%(c-eygPlWoOGj zAQ)N1Rns{%ehAtcNmqLYPI~$mPXLNs=6J4X<1bYwHa5lgb8cI9uE0=8*4HFK3o~t{ z4VVD2qZUqCI?obhFOd)TG=j}~G^w$rH!ASxz8{@Ylt-0yYNk5L36X3XOibr0ih_Iv zk;UU4dT-7u-dvnKf5|`IA0HomZLhg6-vzuBgg3}s^JBo;BcIwh+_Dem;SL?w?@~B6 zZJam4fR_=O<-u-;MmlRKrNPC2c&oq^$*T424NmsPs2wU@B6n+H`hztzj^R)}{1B{g zbPmYV|2yqD%jAqu~qJ-gQV`{4|}8QVG|pYu1Er4Zc@P%DtHbVhSoi9AbwS zboz!M{gw~~#D*q20``amKuwcS6Tt1CWV}n(Bn&>LIe-l3tj7u;U%}IA+{V^&2PsTb zYzLrPmEhQvb0>2@1Rf65(ekyS;jo`&+v3(e6c(Bo1 z#c0mDosxr}*wY!LZa0H9%eGC{k;KuM8>qOwW#H0ipIDPUn5j}XvOh~s_S@a%nMvh- zsih1y%uZ|~*~i@CQ;Vs(i{CN6P4NI$d6cw|MX!_dVLJOc2gX+VO)oD%STuc}rKibn z(Cq&4FuF;wvM&{c+eT}U6-n&aque`S6zMa#da7;Eop2@XI9-t{5+}#>sskIiKdY~4 z3``vyLJJI4VN`xZ!K(v&|2Td9SL$?rv28UFJ7c&0e5`}(4!1G+E_eS5WOuK$j(4ja zwQZP*8g$wxfAz$Sw22>KHS^p?Rf^Pm4b*-`a6s#m#~ngw(S~7B1+@})<5q1eskR@p zpJwl&vb&oPK?5HTkKr`>fC?*a`=>JPWGW?WYzUAOD8PSlEoVb%87jD(g%32|iWeQE zedGC-ADp4>S@JgaE0M!kD}Ts3G3eb}16CX9z{@+M_zD>l2p*4wCi0tsYNXFPmsiWe zmzJ}@g@XdLkDp@P@>vXJ@=*>a9}n&j>la(5H`a~*Nzi3&@z_)~ip26ln z)^n~8`%eEqUF1J`*x$wh0Q8^7?}hby|KBcRV(4P%Z2JGo#RC6V7V|%8ovm_u(8csOR%>GF@!N?~HEitO_13y3GG|GV*J`R#9_NjZO^;2RJxl6<>+W2bC3`?-{{AWzNlYj=D5-i~>JD;u>fl-6R42h2!tbY7p32s;dU2|Vr|oc`ULbf6V_8+ zW0Peh_d`2PNX>sucr05OaMx>}=QJ1HA%jfzJJtq1C`c5N4o{OGMH!t@&V=H$Eb ztmisZd2F8j*&wyl2s6Fj+wEoT|0Jw{_v;Fq{`KBS*f zHX?8-Rh+{2m#CYa!{D)k7B6Xj6hcorLk(e5x^Gx)Q)_A&A(1s;|492fPVya+kutYp z5Rcsr!ENOF1=uFWk{;B4)LS=4#ou3kzNA5Hp#VGD3>KdPHPIi6R#= zznV?hkquu3Iz39YIoIK+R(Jwn#~$|$5cm}b{%iq4d;-JEuK{z$YMcK9iA45G@yV%s zv?-sx7ee^%)_Zapa_*FWC7UFaT`^0BG$t{jxhXurKBjPhk(7dfi8L#S@aE_v3eX~e z=&Z(rYQk2OHWwi@zD6)On36(y0hs_PMc3m4go88%cL5^-VA=fPWI>eSBF6*R0kGu# zKt(k*Sgk|uFhNffX{K2+5H46A+)1A)b`$@nySzpEnO##Wy(0<+O-SBCELLG07Xsth zvlOJ)YK>_eu#$I}Hj)}naUb?DbAg*rN;s6Dz!&MA$_enO;Z_`muqcoZ zS=L7m8s8DeuqY_PAV6=2kxPN$yzd8_Le?<&acVYdpliVDA8mLh;iLTbcpt5**vgCM zr1?LJDBs!2Lz0MiMbtGxU7j5FUyvVDkcR;d>uU^$^X}2C_INCah~N-P1}PGp=|FcC zDay*jgQg=|j_obd6#pfA{&sdB>E(})TQ0d|iK*S;Y_ua}XY?NaG7{$70c7{#36dfL z4Gx?)B8FKv^I%8ukpFp#xMkZ(zl;Xlfb@H;+TdC(-rW(B6e&*mk_(&jYEH~mC`j^8 zMu^5P2}v4#AiR*PlZ%{#OgXtP4Qv#>L{NIm#&l!HT9BCaxgtImbNq>LC+^YT^zo(z zg9uWwS)iDgf!pZ1a*Fh!zGCC^Bq5_&5}c$08-PPc^vD53k$ra?1Wf&ZjdEn&iuzbD z5lBeL14mtPaCM3z8lGFmz@06Ba5O`0u;BYm67JJt_(m~YJpRGZOk%=YQw}3dS%w$JE*;Iy8EG7wwjR|-&|n_7kQZ6kfK%xS{&hGmeXFl z_TMoJc!4JBBE6{v_5hvoS*9UI&HZr5TQHV19u4|=+;+nxdZ4<<6>w*;+!VD&7w$(| znPn>%TEcAd8N=<8*97zMce)}DWrA`jwSgdbhF*>Vk_KyE7HHI4TO4y$GDP^ScXW#d z_4ABb9&ncLdFnzJJ!k8T3Wu8QgbTwbzHZ&XLCe0PC>X7Q^vNcf&~b$a>FI>pzXrxl z+rc#6yrG?)J49Z!>U(OYlk}0r2w3jvBe1tX1^_tg5W1m~*BC07BrTT~vo6h^=(_@D z?le%F(WY;Y&*+C-|pg2+mh zIl2d^@rBmT%08fM)@+|~t>=3U6L4zthkMxHUr%FGON(D)A&i*jZ;BkJiciUYZ#kc- z4*e{k+8TcY7Uo;k;?`Q{S^78KQbt>8`}+QJwP`4HZkBq4jo5r-q^OI03M=I$`G0qc z^-LvQf^KF14Suw!rcn%zDVP(2Ce~L*74JOWS<`JoyA*o5WzuPS#EKE6DiRf_eAqww zlxmryJ=#0{oI(rvXCkNC^fo8;(_AJxXxOfJ@1J{|`hk?J{?DkYj#Zn(PRjJJW-#BI z-}zi1y+Rqe(IDbYK=hS(g16o-#y^@ypx)Cqvf2?ojH;a7V1=bKW7hcM+I9W^k(vKl zN_q@}pxpfZCWLoy4<+9;xi^NSK@M86OZPRQRl6ddV7Q4Wq8dZO@b_I`F90Yq8yT%(_A_Zk+& z5=3v2p@TY8vb5?srcN{q7#Wj+=dAQB%!LP6rkt8~t>BbcDMaNof`#@2O6VvIR$?Jg zljy5gkCV|j=|mv3H$(20j=$axA5EY_1A0)>qQEF>nOK#20;eQ6vlwC0bwAfkwH6hy z5`KE;`d_KC3UEcr=%DDqPH%TlSJ$tPmyhG2@~8CZkEiXC3wVtLzZj#jR20WRGMF&y z>sSM@HYEd=Tn!4Z#X-Z3OZGoBCCYh2U0~zx)%h~y0{Y>I{LG{fgjXTY(BEmW5F`gN zBut{S(PD;E5;M~)Qk$-6?&R;td(dUEhS+1cnpTl8p*B!vvrJ|gV$N)5!%x;AN`R8u z&EMOoP{7<}9??(xAp!7?^BtI)Nae{e&JvoMwcuOdxFj0d=E(D6GX?xQ$_(C-`>2zn zBu90Gcyp)BYlRyUv5+<;G?H#3QnHzdPd+VytKQcq;hl{HjV;}~>|=CYQ}u0K9En&*+B^GMuGkE>{qa+k=9cjhe~pty_NmD%fSjBQF+Mq&{hprEP!xJYU^5MWgY|Fxp7xj|+__?$Pr_@~xVU z$jrTX>#E76p^C@xF6w6km1s6R$P)uJ_x-%MAF3&1FB> z^5GAyF|T-w;LHz~T)MyPo431=I6)e60j53ho=9D+eDCpqo7kNUp{qT1@Y`e zceLyqXg2G;2jlU(_Pk-M|8P8Ahz2^1)@*vVI4Bl`P0OyW?iBrq2SbH=%)eU#KAK#4 z?XFN}mp}#gs;^(+wN$e``4_?#i=r)=ILe3^x6{e%&|mq;H9UjsbYJA?^#1)DX2D{8 z$838vkHOq*N?>t^rGkpivTqB_mEVOYa_j(fK)1mOA#F`oSPn`~_+)(8v(U3| zK^p_$7Z!9|k+@p^bns{ZiCi;iXQzE}01L}W4Ha*aq=RYqp^Gt00$b#N^5gc7snlh0 z$N<12MK`KT%9Hhm8uY3tjVLl^a*@kLBuoC^&Eh{NND}@PonR0E0C8vl0PO!=6r2p5 zT}+)Uy-fd86Vznwe*wq;sRFj$js3M&ZXl)u-gs zSN-*YiXH?Zw4|U*=Y6b8`2uPbxH=lLod4k>S}ssj(lYcY%V6_CS#@kiy0OYJJq4pq`)Bs2#D_qbQsA;{*OA94>ls zl5bXC!>Px(Y~D@HEnPz+rY~;}!>Ki$0xwmH1U=a(`uEi(Sk`gvqD`TAny+cwU1h{= z_b|W~*wja65cQ%rentGIzf-dsy)%p&#@Fll9q`BHzJtCYwZ8K^Xr0(#RjA}$@OIjO zZy16RQ69}98di`nk(4!De3I;m&>|u}SWXb`5-{vvXF(H_hMKRQ*U|BEKQx}G?ob%K z=KkMW3}|ZR_C3@~E9(fjOfVLPKM2NJKU23OR-QIL-?|38r~n7CDNl%#+j+#ovSK|u zl>34BxvAo`G^`X>EG7z2j}BL>s2mR0Q8*@bbvD+k^2%$~NU-`_1d%YwWpR(EEqQZW z^W=0z_22aWGK>+y^afriU3V?Nw=Uo&B&61g`vZ|vEU#q$vaWr3>)^hP2Cz@W{k zk-6`Er7edGKq7_|Qq{Xn8Qbik7#X`>m`NvrWc3a_T2h%-H+Cj$*6#ly-8_P5lRsD1 zQ$BZ+ap^jAoY6MV3puicFupuGV!2Gehm;sUEUt%D&P+vdc>k+SxPe68F_C+T4BBMM zI(JUc9mqcyJQIMpLqV|E>&J5M4~_bwLA<`?Vu(~mlyeo^y@fN(Q0^9)TwXP6Yqlnj zC$B~+k}W1WSD~D5B11`nc9u`!#3MIX52Z8G+qENr)AN4VJ+0Cey@xsr?E!0_Od<$H zf|c-AN2`uTw~dbnY5*pL{=K-TC5`)ALJrKASYlwJO8ayb8Yr|@?ur62CbC+?U&x9N$AvGjqvHp97U*M4>$)zzXekUbaFxzdQg1ZyAbzjSnPY%@p8 z#L{%HC2+$D^IK6YoHFeG)bE4aA9qe@WMjjVWH~UM^eLcnnteu8Mq6{G;O|O|3x{0VTaM=)(9yPdy2tAP{FCkO0X zuP(8GKp&R<>;x8^PX+lx}<_84jrttYi|(R|@b zpNVBr=P@Kww<}3mLcF3sWtK5|YUr^#5O&zvTfI(^st`<|VsGuoE(KIatVqj-P2A{x z>!z#O;hKJmK9uzsW|tuja;h$~D8#*}_7A&iDaooTLrzuQIhWnER&-}z^4^4g@SoTW z)AmJ~-%^Udhz^hjTW>Fv*6JHe4209E)NRGVi1xyuO3s=gfIM3c_cnvC;b_@93R{Q4 zt_Ci=$SMA=zf{8Iz2XGb0SzqqrNQ<+Vxa!=V%VR^MJH2mfgGHLHnw-SV2}Qn0K-5jtoJP=ZcSoou_YWaU^}GLxO}(F7J&TnBT%8M#O_IH(J$i-@gPCPj zr`hoztyHbN{e(19PXv;@+8TY{&Ruvp)KzjqKKOAg0iZNt%Q}BO2%AiY8B*B+31@!+ z>IM}=?6OPG^WWqe^rH2>p-m~85H=4Ez;P%@9nd}TBzqYu4(=dHCPmVZDA@^gg+57m zv5y6-{OW}xBc9AYDPVVeoN7sWIG7chh3IBWDFy+i7jK4xx|ruIb>A=1f7=Hx5{q|i zp+DUlpmB6uYj!XM)JOx05Y~ls0Zpcgb=B(rkx3}=_=owmVES>z+2USMA&8Ml{S&0t z!|Z9s2L`g`?j4Z;>#0{S{~A2%FjyzJq7}%}JAID5#^^-@d&5aMRd}x&DrY25Fhr^D z661?ZFB2ZjXWIq?z+@VNinzjC$oNL?>WN-rToEv+OyQDX2&b-%6V?P6$u27bR&CY@g!ua><+Jzm1ihK z-jU*}orLYV(ZOgqlgu^(JmGgh620dA8Wut*YZr(_YIHZ?s~KqVgAc7Cc0eONy-!xe zYdLi?fq-#gN(>w%Dt!Uk*|@8K+dI5Cotb7B%IQEJ6mFhnCakHe*9}Lz;Yww!6*u9H#QE` zfu5kuv{rL1+7(!XY)9``S8TA4L)RXgO<(J!P={wn|70zR5I6}L!#)0CA;m76Wq2x# zeUyN=+*td~OH;(tEF>?&g-{va7{>@r5Cl#>!6ZY+-_daAWg39OQ%67AMvtzYB60koD5*pex1$_OgU9BufD#A>Z??e_whr@kcD*(;s|Aq)j~X>#Kx7yp zY(VJv!X0e&^(@xQ8(IBn52OHi9wB^v!NLn|BNa^aHNK8Q}t< z!NGpShp?s8Fc##R)y?c*-w90T$KXavG~^xVA$41bM2iyk!mUvwno?N{p3YujS*SEa z=tbmZa}67yN|%}SZpu4EnAUMv^cO@6mE0JVN7gYl+ly1@eZ$$XR2wRSxc%=P5e0Iu zadd)Mb);dvTCSGuSw6kN7i%k3o3O2MRcoQaNP1xzW_HX8-tWq0G+J?YF z7hDwfdj>y*Cp>8nEa1&ZK?N|BeRgJIPr8ql2cco@r`8van*s|9W^KqxOD~DfRG1mhQcP~Xa9LB~9kCRq?8x0%RruL@( z*2q<9CMM6OuS9`qCOmtatwAQ|5xLfyn?{Rn7c~v(#yZ~gnt#aOfPNXP zLZ4yTx8{va4r_@UH6TMX@Qsp3DZQSp&?dyD`LPOi7+3wo1@idkh94P z2JwZ9kP}~q?Yv0efCV?h37N8xm)U4iRphrK+!l#p+ibL_*0N7CS!f7$I-xbHRw; zy`@;r5f$FobWyqv9(x9M6wD}JV0col|GNL^KI664Sqbt2>h*Erk^s!H@5k*e_k94& zBY{>}OFwPNhPVD@mx9=oj@5^ShHUmKa07#_UjSFbh(XgXP6>UV0WY#inAF=nSi>+a zAZ*1a9S!X9yLdh0{~uH9KN`@Q4dHbIQSb*ZMT~3k!$kZbL(@eJLt0BXGYIeJ0vu9Z8Y>zjZ}|$ z?h{Mo^7%TqQ*o{{mD>)jmaRDht5sWforl_PRc+m-UwKXWdb-uCH5Plk)Ydp2??+G7 zr|R}Q`i%XP$I>fovU9DbQ|!J=eD!4kZC;tZ$2co+31<%uHXC`z*Ebn$xCPW3CjcQT zsMECBoyJH9oHQGcZU7v76^XQ`;-7p3AM>nFcgG1Y-kg=34{YE;e}@}S!*OpRt9_QK zpx(ocHE&UlqMeq;u3Xm#`c})0M~t7-u_~h1H;I)p)q%sxu=()t<2Hp3xR;O9px>ef z_D2A|v;2YXZ@l$Z+8G~geYpUtJ;yGJMV7bKcCeCyPWpIE(G%y1M0CJ@ehW zbR|pU=S_~Jy4L2D>M?EAE9$u!MwryjYK-zeREvhW4t|gWj_Z}L= z9T;cdZhW@q3#`*8XgiKhPhvp)lp!%Cf5Tv5P~#EEo| z(Bi}fJxz_X{s}ISSoE5BND954L&FhJAz23$&JdgVXp{W@*nXL@(h#t_U8W*>Vvm6~ zR&GiId9P!IEmhr{g%1l0h98%s#6jgTW{*_8Hu4+75C;k_F$i2In+tKK3WUVtT_Np$ z)g@QUdhpm|K^u5n&I*N}1hC~&?iBSg-@Mtk5%*bu`4(F^Opd9%^1eP7G59xMu)=ED zpFNm=vZ+`Xk@4ZECcMohDA`a*>#fZb)mAprN8D&cU`21k8wqNR!sXsJ*{o};ot9Jy z+f&^_@bV~y3jt6oo87jL;M%E#7Y_)I;xwUuF15uICrnFiBK_4*@>W0Xw4VO)AzQic zn6r%hMe30;S@^^0#+D?U@TcXBFv^B7b-c<<{La%N!Jq&{|7_z9gbEz+I~<$5SgkF* zKGF5l82y01tj`$SAvt`>5Reml(uJ=$7Zbn*!&AK7;5eRq|1SjfacGCcODFMefu;?r z5?G$;>TNP11S|Oldx!bi2_UXnv`8AQw38G*@$-;C5iMkY>XCSTHSIv8ASE8gk`M*K zgRowGFi&|hR!m|hQ9Vj%Wh)R;k@y0|@`@+ELhZ3IC4pE-hN8xNqfydsdOb^InU#pi zH3MW?Gr?{`cAtkN=cCFru0$%MdUBNzITmo~N=^b|J)#e+p9aSPcmj`cg&@WugSb6m z-z~)cUF)#SvE7{ZtVi!U_MV(i;>Ru`ZM%UtL9J%*Xzm;-?Vg2P09evG5{zrIazJdP z!?g2i!n|T+<0ugD&`)Ov{_ta*+geJ3KYal0h(x?G8}vV@$#ATPS*>|$3u7YL<_O1j z5O7FI-=)+bn-Y{12an=sqki_Qm@TtP;zh9G8;h^`lfc>Bi@0KLLBr?F^CqBM`B$$3=>W%nIj+(%{ zT}4@X-m%<%&|-|10(wh?Rg+Q5_5D203I;c>h8S!2%HWdVWi4qsa4IxK?>QSdgN<83 zX!+|kAH)r#Bn}>&LhC-UjVv3SrU|JMizw4H%!bORR82l?(c|}r4XFvFVW{^Tz{$Jo zPv{%c&>fGP)!u0*aj*s?h{4pbSXT@-&4f#{;^3jDxl3{?K5N2Rz?5a3Nr>%yh7dXg z$YZd8e33=h*I-CC%DYrI;}fc0CMT2~DTu)4q17~Blof$TH6bttdVX?VylK)>(3#4p zEC%qQ69F^sS9!tr(8w1e$^gdgG0^(t2*YT)UPDyyqEiCqj8O+!sw7a#V1nER1L-)( zxft^ht*{?`o;y5@Y$DkwsVGNd(udJSntUQSNQiW_`;uTfzxDSghEaw-(9@hSApuE& zE0?$g$O&vG?tP&AIGb@HRwBTISXkf^k=LEYQ#6+x(lYqtAQo*#Z-k0v1!~SHhVd3c z5OB4R+Djnq?Ff?a!V%UrJJMmIn;f%h!i_846HRqS^D0xHP&Uig=QplxsPa}bFAzhO z&nZN9W31$3 zf*(+<^C;!D;0Pugs)(lfc-|#9WzD|Z=PpUW6?`Uo08PauRyXzH#uiFkK2e~fHI>!i z7X=>LU?5dAsbmz=ron|oF?2nq>3k)bNJdLIaI(3uS5}{Z5#u&dAhL$OGuQS{EAKn( zE31vIeQs=be7Aql32d>wr*a|T!nnP4em7LAhrJ4WD`3-B(%#&@<;=Fjc#vgu3HIAA z54l4cO``rEy3VOR6kyA?v2EM7ZQHhO+qP}&c*nMr9ox38n{!|KqWcfjV|}Y;jX72m za+O>McZB4wxIGX%BrbVD#$e-4bfw6(>x+^C9;Y0H2=)B_e1wkf4vwM0F$m|C>K{fXG1JH% zgrXB)>W7~OTMcR8RUZ^eT$Os@^{e<|yxiX+XcF7%zB@ZY0iQ)BA#D#-C=2v6Z5sqy z8~K@S0!XXEj^tbd=UN|ssD^!t=D2sVa8L{I@8I8(Ihb0s$ zIKSjLQ|=94(XHb`EMRmwP|y}*x$L5(8ufIme&U+jyZc2xCXo=UQDKdk9r-N)UtW^% z+9*={;>BmyPpln;2Nm<`ALgf34|H3UvZySM&6v3LGV?@F)+~kN7dR=LDfWbF9O(lhP_Plr{ZRb7WDJ)Ixm{yh+S^;Xl!BsfuGvY^LH?j41nLI8-|Dlk1wfNXX zBtly7?Gq2ItFl^gE{4Jn*zwtoecV~XNx0PVXrAlR$tY!2OmGASe1NHm*^`tIEX7(8 zMz;CDwP3wcVsRdxjLFsbiPH0K&JVOU>hRbM(Fp9?C_Fqx+4freu|-G$wW0gM3jV zC!9vYIe!N8s+cAn3DgU@A?(xkCR^?6cDlNI-ajVQ`M;00mUim$zuulyQ+yuEtJ&%8 z=Fv@Onh#uz6OgOEy{9}^R@T+}(yI~`Z3K_7tZMdlI_>_C-)tt00xt~%K zPq$K>*^IwiQlJ|La56r!% zJb475Hf3OM7BS~?PBr#fG7r@v8qo+mGA%F@xu=q&dd^JGTo50Wknn34i1a2@kN8Aw z7IiJpc_WaJL!kKxr%ksPgaCOmjEG@GjP@cJ3qqD}%=&54kN8D(UQMAVyyyz@_%*CF zPGyl*_bwr6jU=rd&oOivaKwWPi0>J42G5Wf_$A!}=Vv!)cexzEk+IgJ^F%i;B_0a3 zih&+pW_0{=t;d3&U9|I_(bn&KH&e_m)<&Lq`20|m>(H7VhPTg6=0O)Pk7$^hA5fUg zj{UtT->{Mnp>-^y7mI2=Y#%#`WK2+dC?xqc zF;$8CD$0oXuN%YTh<<+(?P^*B2MHyC3F_ca~ zVgS4dTJy0`xP0kB6JY?3iaa^CU&{sKgU6ti@ycg7f2U`rTVF^g6 zjM?N(V-DmQgLJ@im*iJ)Q#pFVyN&}O0m@NRqLf9PSfGsm@T78h1F`K7$g%a*tbhe9 z1%eEMzJOIkEYRAz{coSixqa0)aZ9!751`I`5CtyHLI-Q{DtU<2tp0ozjS5R&p1&T- zcxyC@-bA zJ{VpEv}nh*GyDNEF6H7)u1JTD&Q(p>O+1`?0$X<&;ix(arA?wF!J}3k!lv&|mKjua ze(byh#pkMJbO-`4MTgJk8#svr|LG+f8O=Y`UKmYIfk(;W@I9`?yNY9w&Kw(K-9Sdj zdV&OSvZegG2)ymYo{-4}11za@<}y2*@KSe78o|u3<>^4y94ZX48PB|j;jLvbLw82R zBhh`%)tP{*4^QdjwP$6 z72_$Fx0PvHlBh$J+8%mfbN5M2>x-ENHy8Dvq3!ElO^5Rw>rCFP#4D*o_dqM}YwbH4 zwMp(*y0PC6!-F*%Nrb~^HIfiQ$6KKWpg;DS2L*%ohCik@E)zHE*$`I4CSkQ>%DR$< z-(8Ha7eb!NrAyINVSm(&$5tQ1h3R-8x1%>Z^cPVw$g1hgtiuz_5G z+G}*fK_Y0|#A~~VY9zJ3f{On2T4ed&afWKO)^Ya6YpX<_weA7Bm&PwZ7R}l8rnZIG z0)Kr}QO)y{f;}ekRBT5m4b;GF0l@#Qbd#=Uly>3!Ow^R}VNAz-@DKq#%@@-nJq@&F zLT=CzziebY%POp_P$AHP46&284Wk%U(MMzI4(lD?0fXif3x${Q>Myvrkw-TX zZtDPC_CihhVxM57Cs*Ph>8LIIp($(>?n$jUEH5Ch4b$>$`8s1Dd#=|EibjVrM^9u=U-y>AeLVh z*OTcKbWe#CW7?|i!}HNoZEKZwl|Uxj`K0X7+@h)5?ozn1QxlYtNi(apa4!X4JkbA! zaHr#gIl(cu3knRY>r8F`q%zQtPUU&azE9`ICL6i}8i5YgqPyk>WC@!!%AKTHb9C9s)FWuJ;ap0+H(pC_AN#lG@I6P^H@isu3gXYfR)i*mROCpLY6VXthQHiGI2x_b6lT?dI^<-ZOsitV78;sPp8YyPfuug_=K_ig zm4v%nuUvPWX=&+U2yH>21}n3U8ZhppRveUi=p!tG@DC*WS-MuubiZ63v~fTkkhZ~usDRaU@Qt`F zpR&09Anup+xpPO=33cZK*NX8)s&;F_UYIZQCXhn*Vczf2(Kmj&kP@e0lfE`(C6reJ zd!#Zjp}q(I|4Q8cgOSm%dC9>4t_ES{{%`TZ|NZOw%Z&U2Bg5GKbsKEgKR{n_9bLd? zy(ev+TU>txo3`w%!+~twEp{A$lNO(xQ-;(>n zl87pmFk!`B?|c-l?xdcXX*1H_tx4+*U*c=()I>Xx13mM|3!$A*qt&fgrj4MY?|pf{ z-00dW_lr1EAkQQj4P*M4Sq?b&t6wJ6d5l-ln$zd&FHPI%3UEt1U$*(*3_sxK^>{s` z0IJnuX=dKFlx$9do5eV;x&pigsrJE|QAbm@0i9f-q_ht;C< zy3X*~v~T^|wO6{UM^;7YXiZn;^y{Q+y;^zv<^_Y7IByO?qXA<@K($1Y(n4SdfzT_V zj%#!5dpQ!TXn5^InX)cVSZJs+_-K@9LwKrR>EcRh=W8qtp@dRn@fS|_p@c*IgI|^@ zUw$xIYNJzJTOgL^*c0e_zX{B-v+{57g-3Tl;jJj1E)wn1LKEy9t!6<l#>^;fwN=>dX|L#AwFpkwcREoQe&9qCVqqe00NC@)JZ+U>qoQZWA$ zRmaS?eUD)jeaE$h6X1S0P~qHpePRKI{K}^t8ZV+w=1gUGC@c zy|tjuPlrdB_AOy443tH(ioJ>2=O?Nt(n&HARk94$E-Q(sz?fiK7u9uVRH2MWfeaJH z)8FLU_w0yiKjE=Mp>n|NSYSsC$YWCo==?DUje7LE?N~U1#n}0r`VGzmG?qv5fJUmt zk+Q2szQ;DYrv|s{??0a@KXRB3YI>*NgBAL}l@YTxBQs`&~ywSv}Sf#(n0T4&9)e`(G}rFv!qr0oA{ z1SE%DU1uB6_C;Qc`gQ$$dbqoEeOyJ3qdzDEOezCxQv&>;0N{$r0h;>wB7xBm1$}be zE3S%$^)vw?JB}%HHTD@6YA9aP4+XyLy?wp1_~YI1ac}ebe?6{2rp@$A<%Vg|*zG&@ z$|71d^r34#wp2YEz3&3rq;rJiCU>3S8`=vPM6=xse(p}z;rYKkmEd253wLp`K7oeHN1 zlDT)#xIs4fk`G8_!`|z~1Cc+*?#G7(5|@tqr(e`j$=fmTuv~{htga8CaLLV8=EDlG za}ykLdqPmrB&aYy1+QyweCOE59*j{E!+&S-4}fWH5cIY%5T8un@+y3A$A^gLEnx33 zy;I&mmOP#I<*DU+ZZS?Nt{EeXVgBnv^%BwATz#)@1VSqxiJhd? zMJK}in=TM}ssh@od8*{mgoM*@=DxLIA)i)_T>7Cm{p<&U#Z-oQ93>*P&9bGImNAEU zr^f(USBV{+t>Gy(z6mD}To=Dt7KUzl&k!wL&z>aRt;)c%#L_`wh%OwFf)(N2Pk))+ zRat{@I5rt9I-Rh67{W1(0O)6j^2{2M?2}H9ja)(HrPhbm=#5Z~JaK42Om9WbE+ok! zZZkmto{tbmLR>}M`utJpz5P5>IO`_h7zt`J(g@8MjM!RnsyD3Uo5*4B5U78a{L5j~D`8PV#1B zT&TD{CLQtfom8qa2IuwRedn&jO;okjyi^f|2l<&gA$OXH>e8{SDyqPqii)ukhAZwg z)KM=Lw~Rih_2lbRVKh}0RI)SnYkb?cWeDO+Ag~}Cea1T_!-5DV6(9c9MZ!WQDlJuO zERo!6HbPQPD&u0s(;-AbC<@X!kit{NoWN?Aa{(injGa~aX?Ik-5eZ*oD|dvz*?{C> z;v+#FsTe+9CIxMzKqS`1}FgOLpgl%g?@r0}Mg;UY1yN&SnbOF>}4=zK)r1#Pmh zEz|1E8%P~PZ?52y5*kS64{mO zc_RxV=oU)#E0ukRvhc|Gd>ZYXzNTH`*AA;6If>cVaN6RS(`YRAt|<429oRSj!MBOkmYv0wLVA8@15jH8Ap)8o98 zNC=?vT<4^(JDJmmcvZOKs?r|CiGSM8r+a9&txkW0F;!f?V7d>J#k11kR%C8}RRRfBoTIBL; zB*Ma*#yK?E*Lx>y({$0YKNvaMAIqeOI* zuQSz}SK(fTxcKgv^UfU(fag~nfePxnLpM2Wpz5oayCQu(&Z&G^39lA7HY&VRr>&9b zm(nOK)Bg%I%n`#zb1i61Zb%SsG+=t>F$q`h8ax*uf~HAk_87+DJd$b9E)&QNP>l$V zq^O?+g=;f|1F`&Qv_tmWE*Y|bVxkvb<#eJek;#N|b<%D`7{P!FVi1>iM6+ zA3DT@XSzh$RXylZ)N~ni3C^G6EFu4`=`!3Cd7p`niDEsRZy;&2eAxAAmIDbyUd>b$ z`J$J!yy@WrZ_w2t6Io~CS*Wm;qzQ|-YnjSHS~?u5>%N5E)*I=qD9Jz{l6dU?Qy{>J z$n409VK76Ws{4&>d6VUrhl@jU`Ob8n=zSOZINuJk=3l@s9b@^BT7(m|Ha$Em$Uo+Q zTp9KVYP|nUj?Faw1Ij(JrY2d4fO@P7EX9yh)EM_HB^eQ=l2x>M35qJlneg^2-&Ww2 zXo+REgfDENKWj_0MmYM2tJ;sXDJ^A)uPii4ON}j7J*dbXhfQl-vM<$U z7*;u^HpXHLXVmFpb#K!K&hrHVRq`B^mR$oa6e?e2@c^c8RRwdC-CnBwj z-$S%!ho6}Bpd~*qVdp~}ONiLsh}s#BRe60tPTRLds?J)63qO=TeF5rOx?YEY>gT@} zEVnjqSAJj+!!NPMt5R<0{$5;?a(plv@8yKmF&#tz9YNC>z>6wGq>gK1dZ@ZiuDdSp z>8!0Q{sw5n(2OqU2f!uY!05#8-twPxr>b#Xjb9O z=u0N5ch_ZQPnAr|QU|zv5KuYIr~oZcIltYkl?6I=rM&Vmv4YPq^uO=$5G2Uk80R`V&m; zrAypr=jMpLiw83}De-qOn#PK%(O9WOOI^c7oVnCUX*W zP~zLBL(Xa}wS*R`43hJ}M11?}4nrpd204#V@T6}Ry?nwCSWj_e2I7dV4Q%=3v=e3F)2y5 z>ETdT7~Y!Ka|2DC-5IUrtp!B269=ZoK$K=w4jny|8$oQ5&NaMkD)}rm0#_Xr6)CT| z14R-md=U+nI}fS)%FOK25on+|)6xXgC_cv50JdKJp=amBSWU%hoX*}nR9fcH)A!eJJTAv8d67Z{-w>DX3gFYVO{=`qKlUB zWJ^-DGm(W+`c7tK&{Y;D9$mEDiRphWF{WI|dPJdj?xn8WDj4+K%XQ|g(w!`@NfWKW zvNJUsv(P}P)eiA;Aj_^zqG80sD|}J)`R17H0aY`eA7C%jT|PwF%e};5l}jq6;o`LW z5xy4@{^5pz;T=9J;$feFfk7p4uy+u-!%yX^3^a=4DiDli!rMHKWd!0~r*Rz~*Aqiu zf&f_1k7=6K1(76H*TZljA!Gr!1WrzHwsU4!6aA&}hPs8C2il*}b^eQfD$xi1w=3Lo zSMH*N=K%VAZEG~=Q-zV|RTK0cpk=BXd3EM;$b!{51e2KnG7Avm^#mAL4vBLh=J*`v z2U#8daOG;r=$nwa+^J}G7uOy#=F;!V{sn)Xe-oOGtn}$c#vD%rlWWIqrPB&@ZV1b2 zQDga(0!JuWWn5qv+0>+Sj0J(?6{3x!IW}+>0CUS$kYbQpCpK2lT8OkJjt0^MFpY+m zy1wdcIF_w{2o7$bIZU-*K0~deavx2egg!5#;hj4=ovjWX9iQStl>Vq6n~&< z6SRp|FJ^4`43Br>O)bnEy&8bMiAVr@vq~t=vL4zlk)wjB^th!i~iv1%-ZM zyV22@y1tD_=m^5P{NXkPD}mulMr;`$+mxoL8sAf-8mE6c#DD{R0g2eav2pA5C4+&m zuk3wLpW(ZW(%nyf!6oqKEI4<(7jEmIb)6zZ8vLZgeIhq-JvIlpz#^>55ZSObO0X!j06_vy0O$bHYi{iRK9 zNdO_ZvC8a7x5&j+FzgNYR{7`-zK5lT&on$!C(_)^GKRd`b6lzwzEgAz%XQr^e8 z6qE;xgyu85!{IQ5q#MIjy@}^W-}Ud8laliublCL`cuL9)gC#@6<+Vr&RyJxd@sd~MpdX67 zpDFf@M^w6CiTz7^#jeG4s}an-Q~jBdm)+6Vs*$oCe3iQ zii+EUaHS2us6E<~d5ZqjoL;Byipkwt_;jor{UB_MBBzS>L!pKj7|x0-n5nw6Ls-X+ za7N1@GEv}0dFNW*Q4R=Kj9ji>1$vZf`;^L!J>AJfneI7^r-6Ll#iYfE2?~ih`2bco z+DC}>X)(5Kp=96`blT-PrDqcxKQ4f(zdWTkVfw=q@k116^c z5$f2_8bhcvXtJmO%tJ+*yoIk&f=5`Rg(5Bh9Wn}TFQ`_5a zEM%D`(8dy-?uuL*AC$|iLKxB;xgpfVme`AaN?_HdH;f(@Ko+CIPFHxP+6y3Eeo?ps{fY!LzQ zvbEd|bcXzQ28jm?0V;}oYjyPz9kJJ84W?$TnxG;V|N!qsivGr%tOQGtKv?G6!epPrjd%jzhQ zY5qZ1twA-DRJo09$B4FBM^7Rx_;c|AJ=pR`fudr9}6>G*F2(7dY>x0 zv_r2b#mxBdI+4d0f*Rzrmc=uyEx;*FSLgB4H5-%Cx-NDI_KwDW zbPBNT0qIX5>OE&YBRD~haa8BC@8f9u#t<}tir=X&h=0Hsi&Kh;c4Fx$lH61-EBQ$& z>Rh;4@Q_-U?7Da)0!KhzPXlse4_r!P$ynD1sN1id@oIa$*{PPT=V&~Sv>egYS8XH0 z;?dsIy5Qp|<^H(dcat5XG=M?z#jRxqZCixJVjaNVnPSJWBfdG^u`&&)xvY3?8%M z1ysgD5cR5T~pKtULRQ7r2znY7kMXu2=94-I&D0oYh zZAokiPc@ zn6Fp_W@4S9vzpQ^Y;VUZQm=4E&K)|*kIia~$`ejzuEpYOHlUxZGsDDSwQECXz;7mx ziAV?0L|pFeC8tR%FrIO9PJe|;~9fWAgJZpvJiGE<3M!$KDj_eEjzozd0 zSG7g>|Ks8ghEC3=PW1Y}%Bw-ZRX)rq9>c#_Py z-!HuINko<5xqpGDSfj*1qO?dXDGxd*ZbTabm)){DQjmV@Yp1fBmj>H3 z!G)HI>O@;t(MSPDWLAw_k_YQB>pK^EK7Q{a-6hR5zuz`g3rbR;$s(>O=8CH0XbpB* zK*LPZWkgkfG|xYLv-B_efAMV~KKq9cN`Bok)f&7Vzyz<)9o~67)AOfuKl&fk)a7ii z&+sYJ%#m9*#=pnYaLcHwL_5UXD%NT``oOrWLGgVd?RMF^lkZ9E1CY=D$)WQd9+F`` zWje^RjX)|D+YD_`2Ja8jooAm+1W9>(Z=IQ6XUOlwGtBMDp14#l(GQbHhbgV0ApPt3 z{RpL->5V`bCCn3{CNTSSHLXP3WtvJ8IHYt$kzT2#>`E%GYeTNeQb=}2db;Un9e|$d z3w!4HsS#8vDzb9Js1Y?*s>svpduA;N>{f*Z27rGZDRx+dgw>d>4L`>!k<0@YS*-RIKv)+akrjUwkz$1tG8BP)DMC(MmmO`Iihyo2G}f zyeJL5oeFH98zdea1f6H6JpNrP)qu621)67dlx8BBM?8CHl85+B0F>{4IuYslg8Tn@ z!$DyN6}>^W4>q^@IRRNH%otfkv6<)5WQ-vL1~persqR=azfU1?c5Egos-?+Fkk7Te zZn5aEpTvsHcL5e-oUZ1YFvi>m@Gpx5H&l!r+>&GE{8psCrb#6&)=%^sx4++~TX{Dj z(U`=HyA1CFdWH)a24%oMLVg6dus(WbEE>i!j|nFkiRcI>m+GI+@sA?KTy{Xt5%q+l z3uVM$MRq0RTd=7{Wi|no_xzh&y_9aK{AjFVkKbs|e+G(7@a+z>NZPh4j~l@YrbZI4AAqEZRgKKA!wiNrJVxhuwa6DosrkcD#g%peKSW0ayxQ`vl_|Ld-u{A zCv|H)Dxm)_t~)*ZKmo(BTJ7{IjP0zcIvJ1{TF&$3)pkAZjY$OeAE{ud@!3bCJ4542 zVt+M|p}ZS*s^y#t5tEiJ^fYuSOzp%e{Uv!Ptq-r^Fc!0=ylx z$Y}}M!hnlEWA=iRyi!q5;t^4fmWdKmZ0^R;M&Ffj^*XYqEmdto4POchg&EUbwwb&T zVSgzZO!Qau02Vr8qrkV|nih7XnjO7k%nqcnuVlI#ta=R|gNlz3w8>CV_L3tb7yC-% z0iI>$LF4`C?c%RT%zN?Kq}&IKcdFfF;uY$?DVlt}ax6JU^1iy_@sPE277;5PEClg^ zpZGx~7Jv=??E3K495b&k!h>t^!C#eC#8yoI`YS9tpw=+I^|W1tXfaLx6*?E$Z)3ZMRop! zWmi2WtX!E`DQC6dMs z#S-H%6rNl)e~#>fZ|K&QoXsNtky4s=RGW_(8F=>0ZzNlrMr0RH# zepIB8YJ3zl!px6%Q0`ZysxGb=#1Ly?P7HL(Q=7(}3(*qYl-%F4RBHiu%vcZ-!94UM zM{6+O=brYrTNX*iRO=F=P?CqU)3_{pvf%^J6(%=QHB19uROXEK#-9f`RB?u#ZRI)z~m=YtS zVjQS3tfV_pzz9>a?emTW8MFqBWf8iU{Wb-Fh^p_|x&yi!gUh12=$(OynY>KN*9t^Ppg&c`wmWK z#RV-MZ7Rep`fL08b%{R54>oi zkFimqR6NC6^Et1(q8umrV-}h*4RN?2x^Ay~=cCevWe*aDwJ-nX<-)H$yvxg9I8Q#qK5>FAwb8&2mtP5P9Z= z2Z7|rTGhR}OIv^?XUx9A0&urvvG#Q|-@1AM$Rvj^5p=}P$H+EyP@~vg=&~`AZQC%E zV%SVl!^lr<6~#hW2nYy9XNQo*@4phxOVm#i}9I>kS3LHW+#)%(dEU)NbL025GWYBzf{LTIWqiunBl<)cZ z?0QFvIEmTb9)m+{>2jL`6NMww0|&!Fa3COfgj1WwyEzEdk6-n-30J zs`GV{Q0o4d*u9&;i^agHIO<*~yoVv`DPhZTc!?!c_@10)shZCr1`Zn=RFAT*E38;H z;y=A)k(~|2?AT6hZ(&XYVqv4V;HpuLXaZ;sf9|?AC$~L>m0Pz?itezUS!jJviD$42 ztl0m%eHRf(+m&T6ceQU#cRGY~wiBX)u-+|lY3^~sx}GF^Y}SOlK6z+cGC9V_m5BRj z#Y&R2RL(ykD@AdjWf_dZ^YCUf_8en&CR`J)GB=gN(_=ycFTIX(5dP$qD!x8 z88%vJ^e7wov!M%da?|ZH-27<8cix;J+|=C9nB|cPQJOT3NJ)FCd9;xZNl9(si<1;Z zK)=5st5A{3bd1fW6tr2!#tEF|D%%dw@f9)QngWE@U_99_7)3Wis-9q%!xi-spSuy5T>CqJ^u_LXKp)rpA&d$knNol8tx+l_T>)R$W0sC~+i z<}EvSC3}bi*o)a1{*w{z7G&=su~3-u<3SbKQM!4tSE2bmKsVm{o#R*&;ok z(w9>I`C%Nx{-iBDSu;}iSVBQL5o+CL4TipPX|2_&bzGTIthE}mOL|8bC()f!mmnrV zixcBtbEP|^TmM%9D~fs#!Ht#51UM9ZR*x8k{QUB|ymEbzG+E&+yt5GgEo|W>nD~n4 zy$s7H-Zufku{hsUQb1mgm=xbv;DlQ7L@&iJf(d&scD$m_nWLOhJ=q)h59_>=FRv$* zOV6(1n;L!s)PgL9)A>VEAQlnPiCjr|6HpIrzr(z)2?&M)HAul75Uf1~iq2@h(oVAz zuqNXO2Jq%0D|!@VL^p3~74+6r9E8@Faa*LacQJ>eDZ0 zKW`%zF(q=8Fc7Z%Q~6gxfI-o5wVazImmal3LNA(Tp?fcyIFUeC;IX)WZormZ*jIAR z(a*f$2AUtxSkI%M5u=x_F1-^`pISfz==BP6dERJIz1E4Qt%}QIoVn5P_<`+?WyNe} zg9MRf`S1n%}eZB(kA0BKOez#q+i;n`G^%-5Zd( zwVT*mAm!$fS&E{G91ZbAAeoN!CenRwv;+;NnU+8AF0eP*mA>N(KiKf{K$Ld~YE43v zFv@U7_4CiLJ=y+k-eceIx z9d#~SEFSd5PZ)V-iM~4A9N{D_n-T{Z{^o$X;4BL4gF=zwE~^@4p2i9FChL)xc4+H7 z75k8Afy(2))%g<*0Gb?k3aA*o9cTTbfznHcUn-SP@cP@z@ALXhzfeI4Lc*y%`Ixdl zcp7gsrUlTBZx*^G8c z}AH63q>aC7u2XWT9q>ey|`j>3jOVMl)vNihQt1ATTXhW*YAG@ziYYx zm&p2IV&7n>->SE&*3+FY#e>>Z0g-MS8V)nFpf4v4{j*r>61PNqFZKtX5h3vcAAMij zd=Ow}!VHsi$jY_}5&A4B6>b$^&zQ_h1M=2*T){8iHGn5_LZoRf5yg&1#pF!7?K{R| z86%M|qF8XYkE9yKAbG!a<&PHXl`J=kwDZGrwVTH-dJ_C@XXu2%yx)2vb+Dj=KI=t2 z%m93^{Ltj>;1)>$1?B7Q@u2=5AXseRCgRu)F#(lZ=m>v*YhL_!%lmG6-Z$`nC2{}3 z$e#>c=VyLxq&E-%00jSI689f_7n7~YZ;lqKs`zUph4F(=_z^fA-!9z-yWCSC0Jaki zM76ky(ydRYLr}?<3t3N+OUgdniBGv2lI)xj)s`;dQ^Mz;7m9eh9O(#N$+ivoGqnd; z95gC6y#CHcvpbUlN}(KBWxZvA%82r?=F_r%4$$w`BSK*tDljRQZULnBFB9rlu{|)j zWwjZ$KF?<{81{DgeSQ3V@uE$jg2UFYja@j8H)-(&Xd~NM&M-%;WYaHsv8;(N_!g_o zhoEz13oSQnTbyrL&o}Q>KNP%R#*B6C8Ug)!T+|}Q8a(OzYWw#muqwLaI_svznRp<+ z3is;MZ^{sRqio5{fw^U)LPO=3CU1~fdkcCU^4%Eo;~XeS#j$}-Fb$rN7)kD|AuG$J zx)a`1drP0Vo{AU~RaR7Mxzu7wgl5fG)evqUz87U$Ns^p+m2Z<1;bD}$0KJTOy&$Ii zqNCZXX3ZE}y6Lu$@bl%(2js1U`*a#y1$4=^YMZOGc&{oW2=M00U>Tgz|0II}_=NHF zAxsPz1Leo1k<;_Fq88fG{22)4i7E-`rA3 zxgso+UzG=ytB7w3nT))TFC{GHmd=zaIH6`qe54Uq=$$^=pq!AUT+B-*ohJVUrK+j~ z3zyl}u|I+qO%ifsT(p(INi$5ozGZZfwm-Wx=-WYlz5dT~g zT^o(R=jhWh3$QUuygfE0Y(n1hRQy~sLA30_>%`WJiWV~wzlqJXM zse-PjJ2&m_vsrapw*hvBakpGW&fz9tO3lCeHUyp0fV z8zXm}W{>!_4$8rBp^*zL_=`-qqw$gEmmJ2FxpJ1N&LD9lMhFIQ;NwvjKiga00N}fD z1SX>k6U5N*8{OT$H`|GV2swE?ba1UaTh}0v2biAYAf)`JDmG9(^e2a&c;X?)kcHW@ zS*0rVOkBL2yzQH%q?FN7qcKCG0}z{LmQ(jhq$Lx`Bay3&_xEq5o-%BpJmi*f^m6ig zJpP}LqtWFc^-Nz0w?zmflAO_^{q~X#f#uUILCj#z;1Vt!{3tpIGv|}{Er0QTv>KH9 zf??^OMKWU9&L9Kxz5AxyR;4WxASG8j*6m0s&znX51*_S^|D{<{Ay9m_%dfHH z;KjkvPo+uLA%ia93xb3r2v-mUbw(2DMX{{CxieqWnIKdo+pf{*IdW0{6R?Gn*%1}3 z^N+=xC&PUt+BI?WIZukllQ1JBgtSBamdLmomI53OR{$vJxeM%yiesZwA5Y+h?OMH8 ztrl}d6S3E|H%Y-V4-k1q;MK(;V8}SWO(~tATa&oaB^&q(FVYfB6*C*T83Ss9uyn4T z&Yb-=1GGu(YQ9kov@d!c^}P4Q7C9ra>1M2hf~h7xCo<|}s9b(7FUrH*VGX%T)o0NJ zSI*-^%QjPE7?XmZdav0Uss789@-;P&Vk_VXs2H%^ic298B^IB#koNS{4Q_ zBv8@85(E(_0v3n&76*yZ4M=%IN#nEb>6V%p7sFTWu4#l-d7W;Ke`1=^n?w)$=1Os3 zG=JcDfIR2rzG{@u>=P_8}l!GZ^wv*bP^3+j42H3VHBvB9!?V!Vd8H>Oe`FZeAtN#W^w|-l;OVn8fy&R zPA~NzFI%e(wu9$uTg;=x0K9yw zyG55}U@g}Assv_z~{n!;5J9Fh; zxz+(p;72CTcWRar0S&bV{(65jf`=9QQ$i=Anl6#*yjc0wpSpF!ATe~iq1;KZ=;SsT zgll&|Ok@PT$BxcOPNF_n_7QwK|LNFY>+p#p4ciHV4l#&!BJ*d^JTf{7FEbO4?q!1w z)Fs9mRMVqT4Q84l65uGYSsW+z?yKU3=4s6mW<5qcWI)y>kVz9XSQ=m|g1?I4LgdKY zX;J2ItRqcwIUK>W?s;wA&`2z2i+zLSv(B8lfeUqLtSA^uI;p6KCcGH`DwHQH7(LWV z6Sdn}51qpU`uy>Tl#T^TcnPd%ho(AouW^~9=OUqJxeITAEv!&^L`iFx9heXBlqXwG znFtQ`-#Q17w3hyW08Fqbz#l|NlPguMl;YX`9vzikh^3qI%32ciUF7BDpTyg=`5>Ev zo5I>L)RB441+sK{mV`dalp}9!Qp5_f&`1W}V3e8biggoDHSiwUdIwxCSU~S;_?k@p zU|n-ui`bt;}!|?dchZG^WEr6s%!RCp& z{|#s(130|}N0x0Gc&X=l#elByxbG?LrA}t7)Ra*M>}3-fT9+SM7aV$zVW|W+W3ay> zMdhZRVlytayG@(ZlGy(2w`l3kRSac@kZu`+#PxWZ)*~hx2N#V_NmyX+ZGr@ zdWcRTVq_48y9l~D$r=s)WJ& z#1<_@2BedQ_8vT>kMb#=e=O9;0%V3*mbDiU*lG*!&K(fm^CnnK6v)x;>V}unS2?a6 z)7Sz*zag{rQn(TJn`>^$A2h2AxTxv=j+$h`Sktw%jmroab{vmBOB1QzlG@ZbUYj|} zWrhNxHMf;TqOftYg7?52r#)fnE?}sCDOoZ!rJMGx(KZ3&DT8I!j*1{vnP7`@O10L` z3RprwrbQ&;9;gSl%$DJ=&(&>xh(B8lBAT3DarDyAWMNwQ-!7A=Kq9$?gm~ZCN&|UL zdV2hn?|x&J+UECkBisB6*-KgtPwcz2=(w! zZpLIf4u7mFwW`)ic4@zH%b#cqJD+^JF zeGmbDW)=d$eAcn}LqciC($4j8VGr$)?*YZn0L90p>_nsIH`2G5Hz&s_vFW3dgVE&t zb~_HtQAy9LX(~LJ>B9DuvWHW)BF{W~d5AVqHOFOX!Krp*cHtl=$HgJtf`NH`sE1{) zWs-t6?_v{ss%qZ&6(GfXb0m5YFo*F)y~dlBxV3WoVfy+7=JNy8v$G7X#Esr$4)YiZ zaPeWH*I;Tbc+qtZe<+W?=IGF%KZ%%~n75;A&hCMl(pFdQUt}KeR%(&}lAy|Euys|Z zx9399VHhh%m}$W(-&&o=ca4KCEF{K6hUmDtqLV> ztP(MD946f}NF}Olw~K7~ZdN*9A4bb$8r&*$zZqZ4nr>JgrQJ{omK^rjKtDDxY_JI9 z0*scN!jq+Zkqn%XQrtvQOU2^gSN3N8)yp#B?R=@72B2(oq%IoR0Ayi=A&VqgEb@*+ zUjhyslzR@WBLPT&TXGRYl#WDgU$JS~xx4ZHyfMB7F1nQ!IBNo#C%grrUGXuRu9~^* zOj!%?TV9c&GK}$R{F==Cl=HRvU4f~6K$xG={C$Ct!=s^INqE*lnpEk2mvU5e zwq^w#(65L~p+1Zi$aDyRKoO%6(c{vBeuf{+9|HQM5X3Iepw@5v_7@FU8+fL|{LQV# z)vjl}La}68_JQEF0OmT8{&E57d1i4R!dLl<`OlTa4fz$&5dxJx&dGSp4O7P4LGvx* z)R14Z;|0v#qabHgldgA?bcW)RG#(fc zZDfZhh)r)3^KWulHnX!kwMIX*kQHX2Fv+8KQNyvhHD?!20UKeP)1LhSc_@7*p@kUO zAjOzZ(rXh%@vHKRzqN+x5;9$h;oYBqaAJiQv`cx0{JdnCO@Iy!{}_Q?7OZrfhx;(K zi}F_d81KT$%GE3BU1!8ZxrOeiv7L)myyF}hbDa|>8)rQ6DT4nz>g%R}S!;@+2&D)5 zqwm}Z#Xo)?x7VBE?FEaj#1jahC$UklS)PEhYPgi0yK0Wlr`)-tjdJX)0a-94J)95p zMFe5Uu+@dMfdi~wtS-TxsnMXs@};~rUQsX);U|*@0esdT$z2H4InroR=_5u|j|;5N zpvLR=dzT-!jxc6Zx$bvYGRWs-{wn8-o5WR}O3?#%{d}*3%&iVMv6a&t7{9YamYUmSxF0T_*J?ve zJ~$tx9RKIpVLdj*6uySYvTC5iM_X`(f~**cH-~+}aG;@gA>NP%zrZ5lT_e%gfYge? zmr3OlW(yG_ml5~Tg`vNynM*va;(R2YlHjm(ygyGuiJ>6+S>nU7O4H%igUOX92~Pdu zDrrDuWK64A+k}@a;>^lwznU%k*1y!ggrGnE&~OU#$)|O{^gt~Ty!J7^!kLmE-&!tv z$o65M%y3NAdvrGQ$*i^zzB0|dL=*`hvfAk90c_}s}8qUS=<>&C~iOmFDT z;&b{N&C$n)ql2#ZNyysx>nv|9MO}*q=gYk!BAu}O-x?NY@oTt_3EfZESgmf4`}3^J zNK2FM65|;gm#7M4FvnK%nLgvgA}UM}+@{@Q;H?4*IeESO;!l>k_x_YqpzGr04?ZR> z+k)h4c-R9njIx(899>TWHAUY}72h+#S%(1x=;D|3aeT>Eml96EhBQY|OQo?V%fow7vf+ zm;>#-osIbd{=dOKAgUs*=P%gL7+Wf-&BW!b9?2)GqfUnRoE;$#} zjM}Usa70o&t2WeK=;kOi0!)h`+yv~KzRSyTMO4&ekDtr}*Y4wgx2ca^Z7wGCiuFo) z6#Nn$j-4M}?#vI%_E}d|;Ai(V&?k{utxB;x`EBw!;y!%R4QWrKf=pM){E%V%nZXLD zCrM8&EzK=(=!C>HwDpvv3oLmcI)!#p$YQ{XB!E*A-Fb&&efbt-4Jhl9X?i2i%Ljqr zAM(1_$bM4B#AEX;4^+FB^q*Y4jFGF zh*$9=gcF{JYfSJ9gRvfmwd!Ji3NaIS9_u_lXQojmUj*4`jIs`_j{h84AhL3X=R`WoITOuAzKrTe^G~LTK-TEL z%E&;^Tf^E=7LX!r%D5x$QW9aO=7i*1fWC`o7yWa$^DWrF|jk$t@g$=w;j5BEE9s za{vSN^4WRR?=_pXR$*_W!`@tk{4Ll^u}KRH3Af=uhdpWE6JpUi4bhDR`$JahiWQ>A z4aiIX(S!7*iz!>Ezf?p;3R2007srh# ztL62`1BmhMZWg-n7cd(zf`b<@cg!n{aBTBeYl()>6!^M9xR?KVG<#-jfh8YUvi6{J zw*iyV6cJ0oi7o_0i~;qVp7?aP5AYu$a3^~qIf@q*LRhEZm@5ZyWiH^f-I<4xE&LzM zu2*O?k=Mq(>@E8>QDQM_;#k4&8p}_h8mIma^=x@aLRyE}1v@RmzcBRiy|vvvTJt1! z{$}Ip&e;k9aN;s3p06_7pAa_qho|Zb%aWXlZ#*GFR+yr!pQ0#!G(@fNP^88>?9XPRX@}eLISP?d6YdtqR zVS__?^BpsKBjWw>TUISn%ZL!y1$a8-l^8m?!H9(J5!bc`Gd9jDIF_0yj_x(&&*>68 zOj9=r_evV2l!O!>i*_8;uVcV2{LdVKLIMF_%&$mW0p!OyAYzz$jbpfhKJnEl+}y4j z_E(b?7@i-_3#q|&8ANJa80wG1AA$7cv zeuZ(&bps1&O;QTTUp9^)d1#r(0pLx}z@!;OplWm#w8N$m$!J~#k?2&=93U`b(7LU= z*F~&0lE63{H#G^9V|JbC&!9w%ms=C(=@%$&Eua^29D>h)?gjriatdyQ>0jpz74-8% zCjz{HpGv(Hx@QEx5k3FWhP=#(iYn^|An&V1$HQeb4eBu{J|9QGNv9hZur-Mm;^79$ zYiIy28f;g&X z8)I8qOmyEMM!H7h*M_^`o|biD5W%-Q3q1QN0IBC2G08ba&bi(PVmkP$TM1kJ*ovvKw^YJGFus_Wu4M26=7hT%;#^~I&0Y@ zMUZ)@+9uJO-ss7pjUi6FL*1GZwK=s5&*Wh_?gQv88QWu6ZmCOU_=Tuv!_yx{^>D`l zk5$KzeU{YB8l@ZG5wgFGe?PsIr}jmNslZtYMzJ$FA-+51+9s-AX9&Fur|PELM6-@X zDPh`|3(_5R;4aZ;)7rK{%{AW9v;3fV=2mTh3wqW-u-HUY?4D3%hW$;#kR_4_aBfV= zuuCR@^esP+X4?ZErnDr>7nMs4DzR$Xk<6rJkHTX?%RE*Knt^F0WfEhWvZpvkPVl$i z?Zo9JB3}FJyD!f2`@)M&*w{)fjRIAitJTZ{quGx9OOE??tWllBmsq9L<9^GMoK&d@rX!= z@4&EGd5AsY$Z9oYFGs^#L}1rs1Dp0neGY}0);CsB>|>1m&h;Ah=8IO$vpx^8^X?LD ziwTOlcW-;PqTqG`YEnLG-A)`le>oy`65QfVI#m2)KTmzA(5^tq@AJG}*(X7g z+>+M&2N3|CDQ$<{x?*=4%U`o?B<|pGZf&IRCK6m-N0|&uXfW*0(rZ&d#ZC~0%>E9$ zaSBqC@yv8RmtX^+7?L`P89io3AuMyDfWL7{Aiu`kgKsfOM?EJttZM8qZld8mYIguo zUNbrsHrmIchj$OSKU>7-{{RW^5-`xw{;cHHo3Xenc2~dyq~n^Rj7ZZy;wxAxR^7*2 zkHtlkZ%cFqChm2T0W1%YofnA1>5O)mz$g_H6JvobC|!p1r*NAaMtD*uFo*a?jQ)FT zupDn*BQ0p!RoP655OI`-Z_u&mS2n($!V$3Ibu3$IK}h$*9wYjQHD)7clMYjd1Z$(X_7JIAE#l+ zgd=uF_+^B$yK$PV;C4lEwRaeSV;?JtkOgI=fsQ&f*n=7(P!RF?gPJoIa1x(sZ*VWE zMgQ~qX^UhO2wX9=T4BDDdJ1SZ1NBHqPhzNYoh;7mw$f2 zepw6JYjZ+Bub;5k`(a6ycI7Fr7v9uZU>3gtl0o+$;jf%?vjpPzZ(Jqy{wsA)2jp{^lYVL)*kJ`=zC-)Y!@rT!p4 z!0Fd(MSre{Cv!n+I}a+q9_xZxOc!a#8>R{NGPoaLj6aNXvQ`b$E*BKQe(#@1?X+g_ zu2lMeXL{GQ3J3pL5A100fc9d+T~eu$9PIau-6$Ji*FtrMTTU#PPW&*bqD-Pxh{AV-G240}He zCgiea=$76W*sscjV;4b3;#lG!nb62i|DvmQcC_5`n5hCWI?Ou@-&S%_veNDCcliHC z?*AE{qZO$&tNlHmNBlLHCivfv>7AX-tsMX7?3{N@-#8E>Owf&YguXo0d@2GwG*!TM zabp?wsx`?}M0eNI3A(Kg(8Cl-diBMon2r3=g`WB#>9Y~w2%|b$_Bob!I9@(UP?r0l zo^qGti9dDuBo?FsQvO&xak3PoqhwL7f9=f-eiTh~;KYVqhff1|*VE-2;11ANqf< z_CEwA)POSDF3?}AWJCY}ivQDU|BI8LYiMiZV(j2(Zu_5hMU|Sh?FJjdHy**y9+gvR zBbV5L?L!Kh@CqQ{y1}ZVp5GB>qsW96i6UtkM|J8?H~0B(54rt~0lbcsZzdZLFV~ld>%4X zq!{Ox4IMHy>-wt{IN*ek9Q>ZXU(Z`3J==gDM^gl5#WY1i4}(-) z;7xsVBO~floP68_a|IO1&3SWkViiE+yi&(6I7`jwa>}wFB%oW{r7uBUj%nW^M=(Fu zmZz`y@zy5n9Ghv%ibb0*JFtI@CUyEzX{KiY`1KJ+JgPJtp!+}qNI~Wf5y$Z3hamcK zkw)OXWjcrtOQ0M zvalE4XCC-6jMJB3!&ZcMQhX&lG`_@Iyh9GRqQE7#WzEBkXcKTZtY{WAuebtO1787o#O=EVrIU=7+Ivuq zKI_=I%E!r+nr+7RmwgcdnI_%W4iXgQSVKF+(M-lB(zip5s!pq-_}GIJv@;Ir=e%D zbtKIyat!lg&{pa*GE{92oyiGr-{)d#UIC~Nss{TtNSo*tiu#eOx}ZdfQcAC$r!nT? z4;}B>M_|xaBCOA!G)LGdYNg!WOfIqaW)ovpaIe^XLzeH^D?ltQ$tkm=`SMfQ7COhR z+tuR`fh(RT5zDzHEA`$vl8qEsO6Ky!3*-3l9t%#3Dw@d{JA7`Ey5Ax&OTxwVGA)ls0qPfN}QI<3pn)pvFP zq{taq?9Ux<42@|>ff&f1cNU!XCXo}yi+?jSJ~@ucr~f6ZP^Tv3V`l0l=9OrAT2?R) z!8eQvcOYw%^!9KwnNJ$<6f37GP1=~J;@nbab$cPtO#%D7@mU~;(d%=pTSc7Rp_H&p zAhnd5GG-vbCqXde(1n6!I+@Rl_0QZqD-h$oTVy^`x1muD8DfzE=IrF~sHctxdC1*n zOJ=k2H?emsuRdrWkbFnpD3%5CN2gamghU0uLLRCP5^qHI=(=bJ9963SgSkFo83WJ} z>d~XW{+F(*f~g}W4YJXp!2*c<2I=GQ@&UB2WX8UJO=ruubh%WK+x}V5(|&B2!G;-` z12(dK+j@K~bibtO=;B!pVXAA-ib^B*fTrUJY&9+1led4+ z#a6~{gkgYsLDFb?Q(ecPN#)N<+B+m9pBw>EqW6_BwUh7=YOWq7SXGLyvq>k>1<0Sx zLeBA`eQi}2nf1rHk{FmEIwnW28fk4d%vEbnXYd!GW7%;lmE?c5$JwOL*ATV$>{CdV zLZ=xHi)Wt;Re5JGftrRbMrV-uTUea0<6n=K>3KsfaJM=0Msk+XMZ*c^mM;A^caKP+ z{!07-G{f<`Zw~}mAzf%qX;Z1Jy$|vKKzGdor@947-$VBL2eQihNVhfFci;AMA)Fu|a`goELF$FY0;|YMmnjzttwoW9ktyn}k2=Z1T)gvQ%iX$a z1EZx*CiwNI&zs32r(h}h6OMwK#@j!2UmZ-C7zB*&qR-c%qodXB;|Xbmij9zz1g2Qc zz^KUUKM8Ra%Lo-e{am?_o&ahG*-25%bh}JJ=Q6=Q72r;F)}Y;aygq3<71|%7RHD%b zTFw!_5133Cian0Yml27URqyyrLcNgAr^`45#cMn#O_f+KFsGnKU!%h|i^zuVq{_Lw zZQy-@O|}ZBVpfORMT@Jjhq!Y3EBw|Qwq=)8?BuIG(%>h1;gw<_CP?L&vdebzTR{e| zv!f(}niT?tb=UPozQLTHVc3D)Sd8**`}%%e()La;(c|Rza3smk>f&a|&+b@2E8X9) z@)-Z@=`jobG?wD!)AxS7?%77>=E~pt;(mQP7#$vj`qGT6M|RivmI??ED~<740h>5) zijo|IL0^GKZl^>OfsTIQo+ypTf*%5ALJ($y-*Axl_{)xly0>v9hT8<2Bhp?N)5R^tXG-PVkGWm7>&*HrVQRMacbHH5<4)`(S71WWg9HK&x=XS6@k2QfPKQ z0;h^Zw!#@L6rAl3zjUMP)S4H*0#%=a$u@d8{#8JIDa~4vtqJ##49V6e!Q(Wab(O0` zpF1-th05Wykm;KRw|VYHcOQ50~_9GEmEH-cVrIQ z{uYR5W4eP+>d8QTJohru-{ZQ;5b8RMq8&v&cwDbyorZ|`UJkyjbg|gt`r*uC+vAl; zws98a-HTV>ieWR~1-(e5vm_?YEL)0}242Hk<*A>+^;s-*j_Jp@g43(R6dkud8)y)2 zXu8t)SoHs>&kpH>OVyEAg=BbQyGsnF&%+a->LlUu8*EVk5DQ`)=Z zjoJMo^!Hr(k}ypi`O$mMMJ&}b!1_F!H{s&vzc--&RNNg>vs4nl)iweU008d)sRsS8 zy8Cz6QTa0`HOPSAHKP{wRGi$fdqEg63JIPQ=4z`-x?)wcw_aq`obuthuq?cpIwt30 zLN1vg!ux(b`1}WYo@pRN@7-pwSg7)}f04Dli1EoCFEAM^ zKl3!dgc21QY=<4pGvFa{eYHHIGcx~~1b8a&>J78%xbQA;;MHGqvB-T_IL$Sd6?_1L z;jaO=>;7C_Y{vkNJMr1tmm2LIOS;tO(c0l>4y1d6$f$6SJg4N1Ej?5!DxLQajh22y z#l-#>u{7E)$dK&d8p(wC<-E+-|HuJuMrdlke;wfW@Bi0@@%yx~bvD&^qW|B(&i@n& z`6XVfK|!14%>BZLvfraltpD{pLo;JTOFLWh|56BP`o95#P4CZMo=;j7r^7reuE`{5 zJ&QzOvE*F)YG?t1S$I9?M1hpzb7#Siw`-9O#fW4JytMx!-~Dxtr)LagfF4I{(veR1 z(YQcb)GjSM%=Xcwk)rh@hD`(skm_%mkU0#%tS;Qs<4CIs`cIm$lQ0?w>^Uad3qGb) z79B@TLk(?Ve-ML77vT&GfC~xS$egI?fLHTVb*BX(E%sNY$iXOkCSXo-v5DHp@dgLs zQgQ$`(Ty;GG*FT<-tA@)LZiWlv+&>7rK*m$0j>Wq>n_TCj*M&SOGsCtLyI1AeQjEZ zqtZ|~?9ujWjo|O@3C0AWHk9JcmPzTaEEi20ogy)W)Rkyc)p87>N)%F+HX(~R$uh4d zFWW?IGolo`d;We}vgYY&CHwntPQ%HgfSqOr_?{Vur-C?F=s+AhrSd4Hf+m&wB1YDS z&(r>U;^CoTkKhQCW!bRgU^2YVKF3Ts~YXv*zL-;?^-MyMyP&?d@=$P|y}3S)f77tP8Yl zPwbS~S<;brE8wS3F>vz%Kj}&9g^;-fY^k3#hv_vFHZ|1!euoL|%2rEH>tbTlUWoyR zMgl10ggUb!Ot_Q=ltlzNhN5B(P(u#fB@bvqNolbj#NEt~+LXx2BPj6*2c|1>WEF($ z#I=ICD3M3%OS%#*a^`m4Ya_Jd$?xYP6~ajxbrA3Zb#j^$y63<|Wvk#63m2F7Pf>5S z`sl$Va8!551QnIaa26}opgpSUhF^OL^~j#L%cHk9d3y1j@+nw0J(ZGCLqf7?ZQJ|h z?Y_k;l#Hs_Oow&0f5MYLDMzUP2IqUWcS)&pDy1oD(vA-n6UH#uB$C~&R_oI{Q-H)` z<#3-?vPlvD!DX*CYtZ8U!X^apX7xvtymYgWx(=@$>GM2|Hy=MIyW~+5&kaB2|da?k0JNX7Hb`}N3vIXnf2x9+m@5Z z>=R$}ujtjiWO|$Xdlp-Kz=vWSj0Mz86}+>(t&;)s|BgoW8nP-dG{shJt+~F0-k4Yk zx(x(^2n!!+;Pg>*aOVQ;(iw6bdaC(tiWbffL3qKVwWEy-a>9% zJKA=dbtpiuP$s%#ZLsBNn449JHV)di#z!b~1mF{vwz$qRnyswEn>TVl@N^h9ZoXP- ziL*6`l<1>z@RREl^W;{6{YdYUL3%5ChUi>X2L{}f3bRc*{i`aekEG|>@36m`*)E-N zhkd1cOCpIq-Mo_1v-Mnx<0_!piD3QF;%7gf_Q^uHfR-JQ1=LkpTzfj_pfQ_3UfEPP zxR*TdnIjjg0@-o+s=1nFE|d=m!+wI+Sm{TDv-6sR-$^pz2lF2sf{JsFx%b9C7uNZL z_e0P*tF|o6=wX-<|ygQ^y zp<%^=$e$p{hb#tB*o|xp_{Z>*-*V8F6rwD7b`zgE<~7Oov^I*v zyC1dTwU3A9;2{4`Cf{93WA;ZLQ|ya*%@D5_t2gg9Dg@d_K3%*gCm+q;D@G<8$?RQR zXsJHY2f80Q`ue?7Y=MVeR|l!EDpS={Gl&sWQ8mvo{6Ri$Amb4QVZpP@JA_a0!$Ec7 zR0bV)Zo7}y--&XJ)?O9hZ+>r6avmy8$z#^qH9Tbu!ypZmzWI)Vre zIE!8I!-(PU|G{_p4~VFla7|PH1rSQ4008L!Ya%exclypl{S&#-N&T!XvO!o#xy z*-7-ycNv6h4p4Jx4c^PpeqlbjwJ>GVR-&4=1ln6ttFqk4UB> zOx6Co5Y$O?-ribWR?Oq;xSrUxhW?k}b+#6&v!5+W)#J1YK??*lmkBaprzzqNOcpar zK`W_aud2(kV(Jw54u33D2M?(v+n`CN0%&t16J{picWUd`{_;$D)MHyYK>@qWbJ$z|w z570k1hLdmA9%$G=OPY%Gy@G(XHR2YQ0<12<$pu3Z0Z1bcST<~rP?Y%3>QRiOfi-z~ zDMXwsm! z9NS2$uCq`G*k)`h9I^q7;i7{a^`9UHMO{eR@|P7 zZP4;HBOQ8j;@TVxm<$zZly>vD4S=lGi@7hR*<+cXC(%bvW2+Z3QkNf@RAgA``3DO} z!8#-}OGRQt>KLDMXoCEvqL7L5&toXwQtvz=x2M!IE^ssR_EI~+=VjhLy6V3NDol{` z<4SLy$8L}<*u*m@OUv46D(xm};mF(D-wLaU?iOCF=y2M3dcCRI3#J&t4Ty=6MBh&8!pV7U=SX82aQ91S#u>Q2n5#UuP&_r zfOe}+-JBer1!}$*W8^fc3i%L@sK);r z8=vU`#RDn4l<|dE9QDPa+B95T_1Q^m-d846Bk zFNl@?@%XvEOzPE0p=0~IP>WQL+6fFr%GD$tWp?QIr6Ke9=8c+RUd{0cx4N4TXGt~- z@~=mVW1T`a8^NXn<+f{Opc(twGQzxF78!|>2UhG=pg2O6(%rZ}^kYnp3k&3g3LooA zWCBzsJ^~Wz#Mb^FvmH^T-&eQo60Qr2h@#w+pZ#P~+vJ3gSg15T@FCI5;N%EXJ3f}# zP#cW^meb=yJH=VdN?fI8Evxj*IH!@61`Wsq$V#po<3b=oN32-yp2i{E z3*Cg>m5Y~XOwPyAjXnyKWTVW9IuGU}=P=5s*aceAKF=6XhT>&;v}5l;J9H!2a3y3N zK|&B)fdb2K;KboQ1gV<4;Yz%Mcj!2;>`(M>n5FuT5KuodICN=Q2ti91NF(ek$Xs^&_V=e&bctpW2l~VI#3XiR=d-cwy0qH zoqk3GKqPrmet;O+U_gCM3LUd{*J2qoLWE`cOpY+-UzK#{5q5@Fqrk z+k9b-29r2W{ocl2NwrIyP@;)&m!MqJ(T~7XfXcO7fe=wN^xY7)t$jvf(wlQB+f&PO zcEYS$Xx8Cq!#)))5un6- z8GB9Py0G;Y?%3j3XzoWq-U}1jUM~fXEhW`|pBtEW>ya&9M1e#^DZ^8XN z(F?{*E(CZVegB3PzZ&5B1S9|)V6tRbvOonIYCQ+Hmc9NX3kb9g+FhLXd5ViEAhHWs zG3BZ(Eih|lflH$0&vA`fPSyd)3e#rrE$I>A0XgPv+`M2)52t8x&a1b1Z(rH=eQOBY zDcS#bJLSbW_B4`${Bsc@+(K_tMeXGN4G6#yhq9mIjU%x0;>Ww99(UO7kyO|wz~23u zDuUH)WCFAuV3$6BOpL5Keh6B$zhA@y^7p0Y^CMGWN`aUXg})# zfa%L^RLjf{%BT#dGn>n|a%|-*i9NQ*a@?)W2}S%8aE{Vf+!itiZuDiQM~cJU0t3Wi z{|~MmE=!;>_T;GKO1Zibi}x@3ir+()(!R8Gx5_cebuA-D_DICeY>0QrvjtpQ;LELR z{uc4q{xucv+)M2O_dNn9RUi?w*+x0emB#I)omJL0kSgCMlaYSZlbMt~?H_hZ*DUEy z@54WbL0{F{JG3qlP5Bac_^)()aslRUB01I94bd>CCf1n~$=t#CXNI;qJ62%yS!95> zH~GK}JB7KZtffSxBit08wwab80Ll+EGL;_$oYbEs32jH#rQ^((hl|*ma6~8BP@1X= zhHC<68cTm!%;(hbbg-%IkCV+~Yjwq06kl)Wi^Qt0#+J0D85Es7A)8r>zxfii zBc^k&j!yGNX)KZ}#)3Ttp2teQ0~j8a(rA30lYC0<>IO6^jHP&R)DC~u4!L>=4(Dpc zC3dnlH#muvghElM!h?;mdE{yPR zJ7e*;tBv-*6{E&3=7#?<97tB~v)B+o=z@NP5A+Dr4zOq;1Q&+u3oQlOK2+D#2acAWczo_$DMmzPR4*FO+poAfn#A$@ZwtqBk z>hyed0arwg6|@t933f?iZUg>9IR_YmTcAho-&l?90OLSY^Uf~3QPsR5Kpz7|Bj{7k z+Bt@DvNU|L!;wz64`Fn6Ps%y9Aym9>yC`Q~$i$2TX*n8%tv{BiPhF`HPJ3U{oBh~s zj>CGYUYI*sLWxY>P*w*=>+Sb;d9d`#nAzuICsP$v|MuA>F?+OWiJJj=9Mt;WPrTI;;tFdaM$cpotej+Wlj<{^Akfy@|dT1!0Z zU5tTZ6%K{a+U%&S7vB$WAq%MLMM785o)S{TYgfhV@(p(G71lHV=ua@Z{>Cn0?t0o; zqBI0R7Z2By2-^2glVO*0LLyP-XUYr#&?WV@8Hwr7c~lzExxi(3SyWYEElNf zs-D$$=u5_7up$^KOdf9ZnDUxzNDOS`!exnhlSRxvZkzP-MS89d@2M?FxPi`xL=8)s zqbvW>Iji}i{g(RowcG+}`lr~b1ykJje>Bk=f5Q%eiPqBVH|zv{!;bj>2|Ha|LkC(0 zIz~E1b~+<-M<-fy8xvc48A)MLIVDkT#i^J~288YpN-@v%WgC7ehM zeW-niE?8F^t|3M(TT<~vGm|gu!i2Dg)lEvZQN`c)d>so2du`tS{lj48b>UsPGlYPg zP1I&3a2egMg_Z@d)WcNVx!MrZ^1DDVF3lsZ98*&$)~N#gf1$fWUCfRXWeGft+(IL> zfq_05*=!sZAhFC8GWU#d;>ce4wlmhYkA!Y%$DC*1o;{kCtccF?p|N*4;m~r1?4GkP z&UWbP=vEF+`{M*o!rtAY)nSyi*fRgb*!qO%jl>fM3hziS_gtr7*8AiK5069usn`Va z`^xSEUDP}TCAjoC;i)xKeAinSK}Em#;Cw|FH2Iw4kV5TXGOt?1e79)VZ{F~}g{j`4 zodTrxsChT`&qfjBTp8+@MqXHdhchR7g}J${MA_Xv6ukJ#d>Txu3RJO~%rpUL2p2D9 zPKtoZcMiWO0ePA?TbKfnv(QfgE`qta`{CD5A|dco)1G4`nbem$P}Y)7uNa;r5?xCMzfIG+a!U(p{|IPbYxkUy0j}c3B19W zOKaG$l!pl=nFN=s1K)+fm$vZCNJiY@wyzw*E2`i&dTn8?mh&L1Ye72=ue7yF>zA^e z)JqT?fcm0BOA$tzu(-H_e^limo!NgDiHc9wdYAgt`X`(_F zIui-el7vwYWe2Om>e2-eKW;DWCn7NQIUbfn${_NGs9pXK+RJ}}7t3&9Mjzpq_QDDP zK=c1Ecxh$jMMP!jtc|MvP9!R{Qc7wc6l6m7fp{gahYh0`FNtrUM1sxt5Wq$QlB5+BG;mxAJ9r- zb)h^L63mso#kn+lKzWrG!gpJ;7*kvby3dWyQj{Syk0KmGkdKyKZdr zQ<-{8&-K=h#cv@v#KybXrFmHBCieNcD}bg*t4XOuu|mppmGeptqP?^G`+nlGkk0LT z0%=N&`Z*S&_v1R|J~*qEG$L;=r(g{X&hN`ftaUdXpmm8|vjPbYP47eJFNidCAFN zgZ8)c8xl2~Vq(oQlV4j|GFNo%>oOc4(=wZRmxGeD=4AF`kc5Rq}yC#l?JfqhU^}MeewVSJgenXwm zGKPONu9rbZ!MRbYgv3VEtgN->@>Sn*b(V^8J`cd9S)@k$w2?2U6-VV!l?7^>GC~vn z)*1JadRVl#UN#6LB)mU*dq9}F2%yGIC3DZky-)>i)KN>j#5Rim(b<%Ry<>-)!-G>_ zB5U*1_yX22-flGhvLzM};ZY(TTXfYW?%yBJDzB49c^BiM;X^V&+39Dfw#D%afYn+z zGgc+sZP${m6C8R8jKQe>)`AMxjKTm8k77K47VRcB6;U*sx^r&Q6k^*>mnurXhL#pW z0;8VpkK2X+tFW^GifdW=IPMT2xCVFE;O_43n&9s45L|-0yStO%?gV#&JA?q=^1XL& z^0@cDH&v&WqL|UFlg6e^XX9I;?%v|@a1z~x;1)w} z(?E7pkXjxxe1b;-MjvDJoR<{o+k|SBug18Ru+NnBB{}I6)Fq3sHw2MEF=o^5)2(p* zgN~nW23;BSmAa9_s4Js<^C7{&aovjK^STUoZh%*Z^oF?gZsj50;zNLG;D#%Yxm>%s zq8z%v_X8#~&=ahZ={}AjkA=KnXccQ^EGj91oo($DYxeyB*cu-Kf1Y8s&n%ZvXVZA3 zdzfThm6lGysl0Lret=CHENfjg2rCcM%qOP+7oaG?I&3j=ZG>P~TP_%F13^WE-H-7O zxURNJ0aWNMJU37tl*aUJ$+Er^7`hu$n?j=zfs1}P=AsckFyPECJcFOBXGIi14L8XD zvu+CK9g+w`g4ZTn7)4)KvoI#2RT&ZA+ju$;lp6yv-9?q-K|qP9L1W091+aMOY}yK> z)bnqQ4^0HHaM+exQbeLTQ4KlN)V_!UI%N4Anr8TA3SvmiT!WzqT2$ycu$&GWO(IYh zjPm9`8x$TKxWk-XKId@hx@xDA3W+d&s^G_omV`3@Uhc?XQ^TufFyJqI6YtWtj1rKn zhfK2>I0rY$gC*{goCBT@q{BrDKPIGNtcI`&S14PBKq?7Z={JdvQjpRG_wi`#@j_`E zAr(rrwD6}!=Se}o-aKrMFxGvaGm2VW3`PQn>q-<7|9#jGE7e6i;cWGRCjd?y_lH;;7(KnoaKcG+{sSXSs z8cSg$ea=5tu^gF~v8l=B%aBE*l_(_-x0El>1)6JrsH@K>XIrol`ySo4SlJk#-1Lswz(_#A zYu=y*T@8yN?}v@X6mzer|MCvrGyO-B<5Cut&!|FmnAQ+yykr!Fq;erW6srMY?c9hz z_-%_x6ijl~Yq*8heRL~!$SK}gtb8`--%N-(#-9B^i?kbWb-`cPVpheI|2|brdmyXJ zVBo1I455M7%Av?}{gUTdF+6n-R;+QFmZOUILRoE@ZTN`h25nNeG^IwXmL>Kk4 zyN5B&%n>Xm;A_9R=nZJWx7*yb^$G2d`o?+4GV_AP((-w!8Ru!Zw{`pSfx)GvsFS7` zJABZ6fnh)*IADTvlr`W_TP&%0l*Ta7;S2a}J+lglJuNYaJsDs*`&+dKP9HBZ;j#RL z7-6jA`Z`o6vv=KlM!X!Hg0qs*%P=P34!Ows9NC4R?Y2e$~8qofv;h+)|2^TLXh!<)<{ zI>w>*{hTZZKbzW0ik9Hl?dME7HxeE${sm|;GlpuF)=G!YYDA#Kk(Z&OMa>D=(bXC^ z&jym2hh(tMMq>RWF9ey2LwTl`dX$iM^UhO$=I8s^o^;BaVm<8&_ZTerQpMa_HRdxztnrv%64+V>h`V1qT#nLC4E|8pWs09V>b_?

i2;PHz2RKO{pWyk{- znL8q#sJFo@0XV(zmSyRgC;4h%v_V+Ezz z>wWmCXU-C_SLIUQV3b+sBM%v(!NqL@1=6+^IZuI?xe{TwHzP#Qfrb_4xcBX}yYfA^ znX(w68wDa0oO6H&`jAsFPm%N9^cU&XA<;w-SayAE{s$(WSFvg&mZD2S zxFO4?FrL0p`xL^`q|TBcHv`{_?Dpmjs6Z*5aI*I-Wq#ymGYf_)sO~9X3=)kR$twsznO(x8iTWBxj+ADW-@mA(=IVbNhKj1M;x6$u8q6D-OC;THb^iFw@?w9eYOW<~*io->)% zC;mBGH^qK3D-$3#DEnc5(~9>NwLTQT0mm(4Muc>+Km-^ znN<4MhN8M>&nUgl!VmVj$bI$%m{a1$*y1E?3ka$wi(XOgZHmb|eKDb+jYv<<1-;xh zbJp8OqvRaVDLI$?>M?jvW-~)_8g(d@Kk|!<7$)&Qp1YOeu$I9rw4pF0jT>Fq9$wuvz#isvVCE0z)h_JY# z|@6W zC}I%rUoSHD8-YbD2r@}fR?FVyh$`d`povX(^U76nKnqi%ZYM`}DFCsS4IW8j5sh}8 zSOZf72`!pl>J> zp){#LMd;}zxUsd~wReV+nFf~P-?XH#4r5yPuAnv(p2e~G47RgYc|H=1WU@)GhjDs& z7AO$X?8$F3&5iyi{3F!y$H7KkX*B^Z`L_r1YmM*im+5yH>w<@mtgAWDHjl7;JQg@p zK5_&JI_=50R$0`&aEqoW+%Vu_JrcAP*{m~%TCOKUc5US< zS|}x8i%yPy7i1WQh<_)skxduUa6n51#(H`5(WCYDpv)RJJaWv zw85_g9{|(^BjAi$?tS*kR}doIMlwwkucv*(_|MtuvDr&(_}SEVt{8`nm1@SVE{{^V z++$4FVC3%+^bVYq?#$gy)8`ku<~xmblL&tl&K$bh%jsj+qk}j0boV-RJXIg3KH)@c zH*O!LS9NtD3|yZu;tc0QZ}TxvuWQ|enz4baLPI~g_S)zx}gSA!crEjRg zq039O=U)P{}%lu-nsGs3tfJ#T!tlYBhbprWorZD5cm*+tjJnf>^k`yrB-hfPPOOQ}A6p zJFo|oOfiD{C}v3g2Zq)LW5LPy!3LVTF>^Cl6vR3%d7J7^n zux@V$B1}?ArOU)hWa=4X4W*vdg}PTnLc2jLQ>Whab@gY3Tr2D!r{M|d7JNSU zIEbjdSX;sQ5_R#Wfivm}&7`ZG+-IkHxF?VF4TPZ+1PWIbLoa*pwuRRDdkYy5-yqA_ z85Y^S{sHnI4yTO>Kbx3^x%|^)~ca+@yc zLxNJuPkEHy7e)_Yt^po}*g*WDH9MGz62z^tnLM5qYvyM{7Q+>0qlouhx(26fP_~5=?lEu7Htb|ZinDima)8z@YukUKxWS|a3vnW@)U%#eHn)-9A`&Tt`^0A`367NaoVJmI&u z!EA$EUU`gaQgVU7Nn6ouVr_sVBdi1OPPoB%QsuJ!)mzL)Y_qY$iCQq`gN4%+c3GvP zVMRTTB(O~T43rp7bvt1!Aw2k@!$QaBP&H%rexeX}3WQ`EP?~CBRvG0o(o0e5HmKwN z^){}Zi&zCCLax^R>UQekSvVYy8^qbGixjU4()Y5+Q$HDG$-q&hV1L-}lRaW-{HVK} zh5ebN+qvCZse4gasXMi*p~EDHiBrSq*KLM@llL|mCgv^v$kC3IT2JAihb=XkQAO}w zQFZf1bCE~z&$O>mJLG%KC_fr2KBjF82@6Z5jeXID4P8H577A`(s&83C=NK^-sn~Z! zy-z!bxP~cwS$S&1tec zyqE_i8D*Q!*;FtgGW#=)_qVlXw&AUq(HPdu=%*zr;o)6%<^~gMUS_IrBX>{uZiYA{ z69g-LjI;4=?h6Ldjxpi~rnf?1wN>yQYfYkRBSu9RS5pPf8KU?dZFx$n3Vk9bTx?V- z%c#|LYm_SVjRUStBl&rqows6VAPic48mNnx{pFL{ZHZsQV`8TJ%lE^PV8*u0YF$T= z!fYb@C!@l3y^>qMpEZFC!$=2Fy5(^x#`O)PMxpRv`KxSqBsSV`!=yX8AASK1pP%oD z+J<|gCObHZixwdCzJSS{zA0S;#_f1hmC>a8)N7oXE9C8>0t}Tb<-wgm7+FWBOusoP z1eW-9>%Khg{u`&}+Wm3*-tlQ$YB95cef1ntkq7cw3<6p}s#7kJ`6^m2q_f=sNr<+S z2UI(=(*xs-?ZeI9E38tJw_LvkgQSsjT89q?cpM^Xn4)7o9nu}$dp}=xJ_m9S2i^%u z%4@}eWt;3`R9Tb>U=d2;a;u};fh}2~j$FrsQqo#P*gzqY%wW!(blYO{J zH>@&d_L`@AR4A5VcunumPP$R7!`>|xhuBF>6O1g5TAgfO%dGQ5U zwY;am`)A5i*7i@BFvgrO>+`3^FtRf~>xQhV;H?{S@$zti1c{w|;YUT!72sdAdQoB5 z=?SqUi^|db;y=|S#l`-(^vyBivbT&GM^MHo#gRGUE|q0)sEFmS;fv3Oc_W zLLPz|tHbq%HT}9e2no?)kQo`%5zbr;QI2*PPep+QD&+Gg@hC21_eZ?wx1bWjpO_u$ z7_oat@)H)M?3OGSlrG`|EotRrc)2inYJgEMGyNx`Y+?7zH+m5hsJ{D1P%8kBxJ) zIr)%?U?3U--U1fpS5od8#*Vs@lSj zdC96gsp>wp#CGSiJ^FaBhNRk*405j>!M9k`xjj4S^-sFt(v=T(ty-&Ciyj_Z7k_t&Zl$O@pVhUQzgI-nzn!Q`25k** z*TWzQlSdt`vCbyk1MW2OYUIaK`=^iLY)u*ShFoKl;-l*K+=RXLU4p@UcS_Vq)l9`O zD{w2ZOjV9Y`s{V-iCaHqU|B_W*f_tjdi%Wpj)}ckebdNuNch!(v`R6i+UgnCNoLsN z1CyT$v|Tm5u!<#UXW7p>o~{mo8H}&FA%4^GJR%nc!y}9(^AnR3a-X6vq%A+S!ksTY zd@wmuC;o~vI=U+`Lm7aTQ-B@KJZzw3OKV8U6hq&WKTN?#e^w7CiJ%nBa(If1tJT$% z{BVWYZ@&^pvqnu5i&Vc-E%g4InftgV9b4H|q&c>20@1A0qCK{()2L)-dVchg$j&@1 z$#i!N?FOWg52JQAU-d4NB1|jo;mCwiGnf76 z#6H={+I9KxMM+$rmbcJw#~e?JVVyDLW@4u)<0VHtS8>ExON1L$3GD<;NLOoEc#&91 zY?56tXVES?55TBsd~cmxL5i8`M97pSw86zBbKcpW)?>R(S=5-a^$;LUAsPe;*>;U? z)JuE&T5(jRz$QR#Ks(pZWZKfzpbEn+E(Wr!*nGWQGTKC2TRA2+WgsW$KD)?sww>hU zZXsm*Ne`wojk&37Mh*iL7%1P84r2Dap#a90&yaa1#yWE&g6ni&xAK(2N*=9&4m5Ur zV*(rA=?TzY^nS3b6{3`dCYs#4O=zR)LM!fZBL_i)AB`R}BhU=3Cii{c2AecJ9r#UW zu{D8G3ukFVS`W;VBVK=Uz)YrrR8sHk$N*^e33YR+C|htd;`kU)S$(UKbFF=030gu0 znA!wKVnd&tF&Fy!5{y)I|4n(BAoki_8Kb+88tGKLSmIP$Yehj^_Is*s?{w~rPfGl^ zoO$p9d5rK|gvc@Wyb@pu=tmCspN{aS1R%jZ|+sqeRrR~-VALRTU@Zsxe%Ts`V@zTMm2u_f`{Sl^Z5=A zyU2MVn#{Uu^|Siu_whj{3!pD??U(qs_ynL&hu`-=RC>dVV!X%W((koBZko4L&DDCRxPSZl|r!fh&NC&|377Zehe z%7SSR8I=?q+BOEUZB#c7nk;L1jE1g$w>DeO$r8MonDHcV_hX~%c}L8?RaackSF;HR z)6sY#dWky5I$vwqOgUIHHbbGbe2%pP@$n7TGSH?I3Z!koOr60B8_^1e)q<(S}HV+p|TsHHVFLvlfk`HXki6%WCgk%t8WEsr}YX zMV9n64?}Bqw+8%)oUnJnq~uMA?1mujNT4ZB*aSHugxkivTq0PgJK9GUNbR>Nt7zKS z1;(X<-~GZ4a-e0lx;EvXapew9_eIz|h4z+MGgEJtx(9}tr}N3e>w4MIqdIh*-eyGlxC>K9W10_KEI1od zd{zzkqm)}ZC#7}tRNA}|vk|%QVIhPI7F8vbijTuglr^lG6ZY!von!~QlVc($8$^#v zqt2f1!G$y|#H&yRUa1TD%JyS%ku8r+GF}5Hu+|dTub+^jzv-<0+=Duseo^Z55Z+mlG z0^Qfeld;E~7u84*bsh=)q&=E5_*uF+CBl?J2eOJeqy8hW$g~X~KaTVw1>5;Hf=G>G zwUI%--Uhpa!NSJOdvqy4Ej;rDiM&I441Np*^JQ*f_cGJxBwqAdoOq2zAW<$R4-7yF z&vHp+AdCoU10(fhvv*(P7Fb!EJQWlI?^p%D-|`e|*xRw`e>lsy_#VL|c8S5`#1^|t zKVQb)+D$u;723*mm5O~>+Ifl3OP!#xYXF=&@?J^V^rm`EQ(4*Ei-)(dQjqN<6wR14 zg#t7+7rx;QexbO&+6a$>Zb$oFDEp`@#EN?(I18T)C~K095_e-wnwsmmVs+!K`QbTs z+X<+wp@4{E4D>NFC)Q*hN?{ z7b&BR5qrpQSs|(GsuCrc>9Xs{aUaxsC`5Jz^ON@CQYT@1E7xjGmV*t6w?D^hQQ{Dt z-9}_Fwfux#32Cl)+a4IT_H@@n)17ULu?!3jP_aTCvF_Ool{|nS;UeLI!YL` z+;3aGNnuDQ^#QNOAKajWHyS<`4+{WF# z&CV+|KIntS$k%ZrL;yEyUM&a)Nby^*(ojg^07J$(=2Sim1d&qdEp#fEcJ!JR-&$5} z*y64VZG|*jE}f=3L?3~zG>{=sp>a1=yD~gX zlc>fQ?0xRDUZ2K3;R3Z9rM^oJTi;-P&0RFfWu&Sq$)qwKR0F{rmv{}-qmQQ5hMR>> zSk2Vzr(mbVchJR=RI_fQ;|XG_fl~KwwUe>3D3ndqSM!t$rs26?FQ1kI zQ_OpMuM?iN491<>6$|bzU6h`intHbmtrlR!G#=*guh3*T_%i4&(j393l)!DINs{?Y8|p%V$>3NA*~{sqV@ix{6?|B zJ%MmWGbeE67OvV?92w#B-RKU%%b^gHm_qX^AFxDTiR#5{3_{g!fwV@I!t=}v|7Y_OuuBR^^)RGsQXo-{>yxPA^S4?ghM0T(zU>OZ9xiL7rMYwy+ zyx7TdJmYk}JgM25XPf_ZgQ7d|m_hx`sZmJi7ix>WNRf*IPClk>y%L`VCSE(O`PUCZ`1ylJpm6xMP? z^b1v@B(?-<_OA=<+>7lCTALyn?Ci7!noJF^RE@JePhYkcl9LG`F6lb z@Zk-X>{aJgIv*WdYsVB&sR;(|1T{iMZ|l1!e=glk7j z9}NkX(}q0Qxo((g>BCifdxL2z3jyW((v;h`k!@r0Hrqgyuq#l3rCUwLbPjh8<61}4 z)*9{}(!FZCEUd3b(d+r^?>NWtxGjglNSfs99zQ2$(FR+4ZY_Irynoc}Y*el{rtmg* zC?2C;d`HRWbC{|{Dom+CZPE9}l4w-IM){p1RaZe^j8Yj5(!I!-Ni8O+iu|{Jb1b91 zjz>EPW7lqfBGEUBiBHtQ`H7d8MuyG$q2Fg1A^pjH7FVIA-1%L@f*w)^Ptw671(4R| zeOJM!&B8Evfr81eR#pnUPFRaQt3hif%D8hmIpvJb=9EIlZaQWcmRA%ol&KGm2Bit) zG9HVK`Wd9@YVWIixLAs?Kbu$J#R+AkTFRGG0-kM{iQ%Qr!HYcSPJB6x5~$>bX@4TW z`jI=q znTQRzycZW3XgnwOWKuQgcFvX@xJQV9rdW$&COG0eD@eG6Urt8J{8-DO8GcDIy(#5U z9QpP8q-#IOm1tgOy5#9$jYLo-(hn@aqt*%U-wId0gwc57P3u~K@cHE;_#a`ksu^ynnKBn{=*$at*+{S4E(o$RP2jRf@|U5$d6#2__oC>><6`~>|36T=MC)Hck} zINjtm^&C7ojl}4nRE+{T1*P;3jJQ;z0(lYh-01MQGmebt8MmCSW3x&P0Bz# zfa`zlR5rG9uyfV@e+>$IS_c=0aG9}jsR;@ZuQgHoIZ0$DgT*;VV+Afn3EusDzaO%C|~{Vhg8SWs3$sC=$JHdg`- zXu{JQcBVJ=3*hq07(!nG@3(&-=OMw%-*KP>kVDcL@v*kDA*ekkYpg* zdJ;>h=r+`-bs5&v`E+&EucQS|>+;LheL(@En=x8WFFz<;cD*2j#)fm?Kd=Oph?PkR zdJ{4{NS4%)wFt+je<$ch+^gnL87|NYqn^^sqP;j79lZcuw}cVxSnroBS2$YB@%ah0 zdprx_9sLC@%OEq#R0;wtu6!L2IFjq1hsdEtVqc0I4nVSZ zF~SLFbw{#Bj#@3YY|S?&2j4+S1ac(j<8-_#KHyoy3j_!1ioOdl$MznRhHB-NVj zxAU(ZwEc8AS*5vEHA}lRsx50M2$>lU~Y#*=N90KS4K)?T@3Yz1 zJ(1g*k)fbFP=SKhWsUF)&0|IdFWl192$EF8l?HRw;RElhpsXq^x9sv- z=|vt#TQxzOg-fHp0r|zAo$ez>Dl(#IUKrqIONl%j4t*9xflc)5xG=SR@_0sdrSVQ8Sk)du1F~HErDYJmE-$3e7 z>*p!Kz1wddg>tc`aW-YA3 z$a&m@g^^G$t&h&q4H`Y{uVHFVA&M`C+9(%N2Kk^JGndvYk9%v7l_l%=Ex7kH-1Du| z-QZVkW;XE$j!yYuD{LY44y(7_63+f0L?x?$7m~VB`ExKY@oxK4_A*jOx~Uq?oem}}uh<)EB0vz;d3^rd z8$TI=5Njt|S53x7Nrznt$L>4chV%*gCYEa!VltRO>w`s~_{DVgh@yOyuJL(xIRc!_ zIZphwfztr4w+00vJhi_VNnFjI@WgByGL>%fWzreK(|8{RwAQIR)0FVz#AXBf2M_)zD7mAH ziC^m3K;5U9uT~YRp{z6VW3E7=JdF2I{XcnGhTqT(#T9*T-AtnORhAT|9n!Na*R7ji zubaPOc5aE@v(K$-gm$ua1Ik@N1I7TG?2P5TecD7tQQu3t1Uc^o^;BpFLzklvn7Y>K zF|Cm@T#A7_k^ytaoVfEo$=aNuDHp@LlfH zQo=_*?OOzK_j|ifg@%9~L2K41*J}$|Iks&-=lV|h+1C`zk?b07_!wC5*W}bAA*>Q!;0j5c zG$@gY#vg>YTLb$~@P02bIc{}n)Et$|&L!i<(jZebwGIb@Y%Nc23)FB82|b+c)O_bF zjLO%$kcoa0#zogTTw1Y-IcJ1XyBbk51QV^JS~lUD-GqBxuqyUlL8*z=2ab=(+u=5b zx2*d%2XC#w*zL_mWK-cj)?OjoLY#@J%Lmf{E-ve~%Xco}w9*d}($0(34nBAGA;pLO z9Ii)n<#aq73qg2RYMpX~b_O*)Z6f+N`y9G^S9oQ75S2<3=$naNZm`F@b<=&47Ragg;w@Z$A02 z?$$NADL9{WC5{;h}eyNdMhzI<`Uav`1JB4E9s1I{nMM4ErJ24BU$at!T^0ac>`H`d75<<+cS zY?`+0oL&b80=fkhqW@2u%#8kGlc}+luD!AG$5)&eA7A=GQG5e11OY1(=MRGbuMNIF zCV$}o1Vc^CO&#rwUwi3gcEr4MTK<4jvZ8=QJ@$W)5JPgB<^bUX0g?U(NZ$G{AbT4Nz{)psu(Yt(b+86h*f#xJ<36(D>@Wb9QnK*Bj`9A1 zZSYlg{RgI{(LVuS{6@FvCiMoeqFFd!`@cWp?}32%zMAaI>f};4RN)4AjGpqf*AUYF z%5l&)w=)FDF1-T1UZjBk>_1zBZ$RN+K>sR4{!eT5#V(bxCR!MP!yx4T^2d%wJ1?{-QE{{|D+HLZH8+{!YU3 z7pmd#Pt<>P-rosY{-T_`ru>Jz06!_sNTYOBB2YzCI}a3)ln<{9m&n{|5efZ2UF(^}g<3WJBct zPX1$;_iN(ohrz#yK7{|B_^(I9uX(StV!wF(H2hv;FBze6%dvH{U2Vs B-(mm& diff --git a/src/scripts/dep/llm_bot_dep/__init__.py b/src/scripts/dep/llm_bot_dep/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/scripts/dep/llm_bot_dep/aos_utils.py b/src/scripts/dep/llm_bot_dep/aos_utils.py deleted file mode 100644 index 7d1cb88e..00000000 --- a/src/scripts/dep/llm_bot_dep/aos_utils.py +++ /dev/null @@ -1,146 +0,0 @@ -import boto3 -import json -from typing import List - -from requests_aws4auth import AWS4Auth -from opensearchpy import OpenSearch, RequestsHttpConnection - -import logging -logger = logging.getLogger() -logger.setLevel(logging.INFO) - -credentials = boto3.Session().get_credentials() -region = boto3.Session().region_name -awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, 'es', session_token=credentials.token) - -class OpenSearchClient: - def __init__(self, _opensearch_cluster_domain: str): - """ - Initialize OpenSearch client using OpenSearch Endpoint - """ - self.client = OpenSearch( - hosts = [{'host': _opensearch_cluster_domain.replace("https://", ""), 'port': 443}], - http_auth = awsauth, - use_ssl = True, - verify_certs = True, - connection_class = RequestsHttpConnection, - region=region - ) - def create_index(self, index: str, body: str): - """ - Create an index in OpenSearch. - - Args: - index (str): The name of the index to create. - body (dict): A dictionary containing the settings and mappings for the index. - """ - body_dict = json.loads(body) - # Extract the settings and mappings from the body - settings = body_dict.get('body', {}).get('settings', {}) - mappings = body_dict.get('body', {}).get('mappings', {}) - - # Create the index with the specified settings and mappings - self.client.indices.create( - index=index, - body={ - 'settings': settings, - 'mappings': mappings - } - ) - - def delete_index(self, index: str): - """ - Delete an index in OpenSearch. - """ - # avoid NotFoundError: NotFoundError(404, 'index_not_found_exception'... - if not self.client.indices.exists(index=index): - # hint to the caller that the index does not exist - return { - 'statusCode': 404, - 'headers': {'Content-Type': 'application/json'}, - 'body': json.dumps({'error': f'index {index} does not exist'}) - } - # delete the index - self.client.indices.delete(index=index) - def delete_document(self, index: str, document_id: str): - """ - Delete a document in a specific index. - """ - # delete the document - self.client.delete(index=index, id=document_id) - def bulk(self, index: str, document: List[str]): - """ - Bulk index documents in a specific index. - """ - # bulk index the documents - self.client.bulk(index=index, body=document) - def index(self, index: str, document: List[str]): - """ - Index a document in a specific index. - """ - # iterate through the documents and index them - for doc in document: - try: - response = self.client.index(index=index, body=doc) - logger.info(f"response: {response}") - except Exception as e: - logger.error(f"Error indexing document: {e}") - def query(self, index: str, field: str, value: str): - """ - Execute a query on a specific index based on a field and value. - """ - body = { - "query": { - "match": { - field: value - } - } - } - response = self.client.search(index=index, body=body) - return response - def match_all(self, index: str): - """ - Execute a match_all query on a specific index. - """ - # avoid NotFoundError: NotFoundError(404, 'index_not_found_exception'... - if not self.client.indices.exists(index=index): - # hint to the caller that the index does not exist - return { - 'statusCode': 404, - 'headers': {'Content-Type': 'application/json'}, - 'body': json.dumps({'error': f'index {index} does not exist'}) - } - body = { - "query": { - "match_all": {} - } - } - response = self.client.search(index=index, body=body) - return response - def search_with_metadata(self, index: str, query: str, filter: str): - """ - Execute a search query using the query DSL, using bool query to filter on metadata. - """ - # avoid NotFoundError: NotFoundError(404, 'index_not_found_exception'... - if not self.client.indices.exists(index=index): - # hint to the caller that the index does not exist - return { - 'statusCode': 404, - 'headers': {'Content-Type': 'application/json'}, - 'body': json.dumps({'error': f'index {index} does not exist'}) - } - body = { - "query": { - "bool": { - "must": [ - {"match": {"content": query}}, - ], - # looking for documents where the metadata field exactly matches the value of filter - "filter": [ - {"term": {"metadata": filter}} - ] - } - } - } - response = self.client.search(index=index, body=body) - return response \ No newline at end of file diff --git a/src/scripts/dep/llm_bot_dep/enhance_utils.py b/src/scripts/dep/llm_bot_dep/enhance_utils.py deleted file mode 100644 index 3934779a..00000000 --- a/src/scripts/dep/llm_bot_dep/enhance_utils.py +++ /dev/null @@ -1,243 +0,0 @@ -# python shell only support boto3 1.22.5 (1.28.68), according to https://docs.aws.amazon.com/glue/latest/dg/add-job-python.html#python-shell-limitations -import os -import boto3 -import json -import logging -import openai -from typing import Dict, List -from langchain.docstore.document import Document -import nltk - -# print the log to stdout -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -# token number to slice a document -slice_size = 50 -# number of questions to generate -question_num = 5 - -en_prompt_template = """ -Here is snippet of {solution}'s manual document within backticks -``` -{page} -``` -Please generate {question_num} questions and corresponding answers based on these document fragments, with the questions being as diverse as possible and containing details, following the rules below: -1. "{solution}" needs to be included in the Question continuously -2. The question part needs to start with "Question: " -3. The answer part needs to start with "Answer: " -4. All questions and answers need to be in Chinese -""" - -zh_prompt_template = """ -如下三个反括号中是{solution}的产品文档片段 -``` -{page} -``` -请基于这些文档片段自动生成{question_num}个问题以及对应答案, 问题需要尽可能多样化并包含细节, 且遵循如下规则: -1. "{solution}"需要一直被包含在Question中 -2. 问题部分需要以"Question: "开始 -3. 答案部分需要以"Answer: "开始 -4. 所有问题和答案需要为中文 -""" - -class EnhanceWithBedrock: - def __init__(self, prompt: str, solution_title: str, document: Document, zh: bool = True): - BEDROCK_REGION = str(boto3.session.Session().region_name) - # TODO, pass such credentials from CloudFormation creation and store in SSM - openai.api_key = os.getenv("OPENAI_API_KEY") - self.bedrock_client = boto3.client('bedrock-runtime') - # session = boto3.Session() - # self.bedrock_client = session.client( - # service_name='bedrock', - # region_name=BEDROCK_REGION, - # endpoint_url='https://bedrock-runtime.{}.amazonaws.com'.format(BEDROCK_REGION) - # ) - self.prompt = prompt - self.solution_title = solution_title - self.document = document - self.zh = zh - - def EnhanceWithClaude(self, prompt: str, solution_title: str, document: Document, zh: bool = False) -> List[Dict[str, str]]: - """ - Enhance the given prompt using the Claude model by Anthropic. This function constructs a new prompt using the given solution title and page content, - sends a request to the Claude model, and retrieves the model's response. - - Parameters: - - prompt (str): The original prompt to be enhanced, not used for now. - - solution_title (str): The title of the solution to be included in the new prompt. - - page_content (str): The content of the page to be included in the new prompt. - - zh (bool): A flag indicating whether to use the Chinese or English prompt template. Default is True (Chinese). - - Returns: - - List[Dict[str, str]]: A list of dictionaries, each containing a question and its corresponding answer. - - Example: - ```python - prompt = "Do we have any solution offer to Stable Diffusion?" - solution_title = "Stable Diffusion AWS Extensions" - page_content = "Stable Diffusion AWS Extensions is a CSDC solution that..." - enhanced_prompt = EnhanceWithClaude(prompt, solution_title, page_content) - ``` - - Note: - - Deprecated: Claude v2 does not output Chinese characters in experiment, so Claude v1 is used here. - """ - # Initialize an empty list to store the Document objects - # documents = [] - prompt_template = zh_prompt_template if zh else en_prompt_template - prompt = prompt_template.format(solution=solution_title, page=document.page_content, question_num=question_num) - prompt = "\n\nHuman:{}".format(prompt) + "\n\nAssistant:" - # schema keep changing, refer to https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters.html#model-parameters-claude for latest schema - body = json.dumps({ - "prompt": prompt, - "temperature": 0.7, - "top_p": 1, - "top_k": 0, - "max_tokens_to_sample": 500, - "stop_sequences": ["\n\nHuman:"] - }) - # note v2 is not output chinese characters - modelId = "anthropic.claude-v2" - accept = "*/*" - contentType = "application/json" - response = self.bedrock_client.invoke_model( - body=body, modelId=modelId, accept=accept, contentType=contentType - ) - response_body = json.loads(response.get("body").read()) - raw_completion = response_body.get("completion").split('\n') - - # Initialize an empty list to store the Q&A pairs - qa_list = [] - - # Initialize an empty dictionary to store the current Q&A pair - qa_dict = {} - for line in raw_completion: - # Check if the line contains a question - if line.startswith('Question:'): - # If there's already a Q&A pair in qa_dict, append it to qa_list - if qa_dict: - qa_list.append(qa_dict) - qa_dict = {} # Reset qa_dict for the next Q&A pair - qa_dict['Question'] = line.replace('Question:', '').strip() - # Check if the line contains an answer - elif line.startswith('Answer:'): - qa_dict['Answer'] = line.replace('Answer:', '').strip() - - # Append the last Q&A pair to qa_list - if qa_dict: - qa_list.append(qa_dict) - return qa_list - - def EnhanceWithOpenAI(self, prompt: str, solution_title: str, document: Document, zh: bool = True) -> List[Dict[str, str]]: - """ - Enhances a given prompt with additional information and performs a chat completion using OpenAI's GPT-3.5 Turbo model. - - Parameters: - - prompt (str): The initial prompt to be enhanced. - - solution_title (str): The title of the solution to be included in the enhanced prompt. - - page_content (str): The content of the page to be included in the enhanced prompt. - - zh (bool, optional): A flag to indicate whether to use a Chinese prompt template. Defaults to True. - - Returns: - - List[Dict[str, str]]: A list of dictionaries containing the questions and answers generated by the model. - - Example: - >>> EnhanceWithOpenAI("What is it?", "Solution Title", "Page Content") - [{'Question': 'What is Solution Title?', 'Answer': 'It is ...'}] - """ - prompt_template = zh_prompt_template if zh else en_prompt_template - prompt = prompt_template.format(solution=solution_title, page=document.page_content, question_num=question_num) - messages = [{"role": "user", "content": f"{prompt}"}] - # error and retry handling for openai api due to request cap limit - try: - response = openai.ChatCompletion.create( - model="gpt-3.5-turbo", - messages=messages, - temperature=0, - max_tokens=2048 - ) - except Exception as e: - logger.error("OpenAI API request failed: {}".format(e)) - return [] - raw_completion = response.choices[0]["message"]["content"].split('\n') - logger.info("raw_completion: {}".format(raw_completion)) - # Initialize an empty list to store the Q&A pairs - qa_list = [] - - # Initialize an empty dictionary to store the current Q&A pair - qa_dict = {} - for line in raw_completion: - # Check if the line contains a question - if line.startswith('Question:'): - # If there's already a Q&A pair in qa_dict, append it to qa_list - if qa_dict: - qa_list.append(qa_dict) - qa_dict = {} # Reset qa_dict for the next Q&A pair - qa_dict['Question'] = line.replace('Question:', '').strip() - # Check if the line contains an answer - elif line.startswith('Answer:'): - qa_dict['Answer'] = line.replace('Answer:', '').strip() - - # Append the last Q&A pair to qa_list - if qa_dict: - qa_list.append(qa_dict) - return qa_list - - def SplitDocumentByTokenNum(self, document: Document, token_num: str) -> List[Document]: - """ - Splits a given document into multiple documents, each containing a slice of the original document. - - Parameters: - - document (Document): The document to be split. - - token_num (int): The number of tokens to include in each document. - - Returns: - - List[Document]: A list of documents, each containing a slice of the original document. - """ - # Get the token number of input paragraph - tokens = nltk.word_tokenize(document.page_content) - # Calculate the total number of tokens and chunk number - total_tokens = len(tokens) - chunk_num = total_tokens // slice_size + 1 - - # Initial document list to sttore ducoment slices seperated by 50 tokens - documents_list = [] - # Iterate through the list of tokens, extracting slices of 50 tokens at a time - for i in range(0, len(tokens), slice_size): - token_slice = tokens[i:i+slice_size] - # Join the slice of tokens back into a string - document_slice = ' '.join(token_slice) - # Create new Document object to store the slice - document = Document(page_content=document_slice) - # Append the Document object to the list of documents - documents_list.append(document) - return documents_list - -# local debugging purpose -# if __name__ == "__main__": -# # log out boto3 version -# logger.info("boto3 version: {}".format(boto3.__version__)) -# # test the function -# prompt = "Do we have any solution offer to Stable Diffusion?" -# solution_title = "Stable Diffusion AWS Extensions" -# page_content = """ -# Stable Diffusion AWS Extensions is a CSDC solution that... -# """ -# # construct a Document object -# document = Document(page_content=page_content) -# ewb = EnhanceWithBedrock(prompt, solution_title, document) -# document_list = ewb.SplitDocumentByTokenNum(document, slice_size) -# # test the function -# for document in document_list: -# prompt = "Do we have any solution offer to Stable Diffusion?" -# solution_title = "Stable Diffusion AWS Extensions" -# enhanced_prompt = ewb.EnhanceWithClaude(prompt, solution_title, document) -# logger.info("Enhanced prompt: {}".format(enhanced_prompt)) - -# # test the function -# for document in document_list: -# prompt = "Do we have any solution offer to Stable Diffusion?" -# solution_title = "Stable Diffusion AWS Extensions" -# enhanced_prompt = ewb.EnhanceWithOpenAI(prompt, solution_title, document) -# logger.info("Enhanced prompt: {}".format(enhanced_prompt)) diff --git a/src/scripts/dep/llm_bot_dep/loaders/__init__.py b/src/scripts/dep/llm_bot_dep/loaders/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/scripts/dep/llm_bot_dep/loaders/auto.py b/src/scripts/dep/llm_bot_dep/loaders/auto.py deleted file mode 100644 index 418f2468..00000000 --- a/src/scripts/dep/llm_bot_dep/loaders/auto.py +++ /dev/null @@ -1,29 +0,0 @@ - - -from llm_bot_dep.loaders.docx import process_doc -from llm_bot_dep.loaders.markdown import process_md -from .text import process_text -from .csv import process_csv -from .html import process_html -from .pdf import process_pdf -from .image import process_image - -def cb_process_object(s3, file_type: str, file_content, **kwargs): - res = None - if file_type == 'txt': - res = process_text(file_content, **kwargs) - elif file_type == 'csv': - res = process_csv(s3, file_content, **kwargs) - elif file_type == 'html': - res = process_html(file_content, **kwargs) - elif file_type == 'doc': - res = process_doc(s3, **kwargs) - elif file_type == 'md': - res = process_md(file_content, **kwargs) - elif file_type == 'pdf': - # res = post_process_pdf(process_pdf(file_content, **kwargs)) - res = process_pdf(s3, file_content, **kwargs) - elif file_type == 'image': - process_image(s3, file_content, **kwargs) - - return res \ No newline at end of file diff --git a/src/scripts/dep/llm_bot_dep/loaders/csv.py b/src/scripts/dep/llm_bot_dep/loaders/csv.py deleted file mode 100644 index 8df30dd3..00000000 --- a/src/scripts/dep/llm_bot_dep/loaders/csv.py +++ /dev/null @@ -1,172 +0,0 @@ -import uuid -from datetime import datetime -from typing import Dict, List, Optional, Iterator, Sequence -from io import TextIOWrapper - -import csv - -from langchain.docstore.document import Document -from langchain.document_loaders.csv_loader import CSVLoader -from langchain.document_loaders.helpers import detect_file_encodings - -class CustomCSVLoader(CSVLoader): - """Load a `CSV` file into a list of Documents. - - Each document represents one row of the CSV file. The rows are converted into markdown format based on row_count. - - Output Example: - when row_count = 1, - page_document_1 contains: - |index|name| - |-|-| - |1|Demo1| - page_document_2 contains: - |index|name| - |-|-| - |2|Demo2| - - when row_count = 3, - page_document_1 contains: - |index|name| - |-|-| - |1|Demo1| - |2|Demo2| - |3|Demo3| - page_document_2 contains: - |index|name| - |-|-| - |4|Demo4| - |5|Demo5| - |6|Demo6| - """ - - def __init__( - self, - file_path: str, - source_column: Optional[str] = None, - metadata_columns: Sequence[str] = (), - csv_args: Optional[Dict] = None, - encoding: Optional[str] = None, - autodetect_encoding: bool = False, - row_count: int = 1 - ): - """ - - Args: - file_path: The path to the CSV file. - source_column: The name of the column in the CSV file to use as the source. - Optional. Defaults to None. - metadata_columns: A sequence of column names to use as metadata. Optional. - csv_args: A dictionary of arguments to pass to the csv.DictReader. - Optional. Defaults to None. - encoding: The encoding of the CSV file. Optional. Defaults to None. - autodetect_encoding: Whether to try to autodetect the file encoding. - row_count: How many row in a page document. - """ - self.row_number = row_count - super().__init__(file_path, source_column, metadata_columns, - csv_args, encoding, autodetect_encoding) - - def __read_file(self, csvfile: TextIOWrapper) -> List[Document]: - docs = [] - - csv_reader = csv.DictReader(csvfile, **self.csv_args) - counter = 0 - for i, row in enumerate(csv_reader): - # print(f"i: {i}") - # print(f"row: {row}") - try: - source = ( - row[self.source_column] - if self.source_column is not None - else self.file_path - ) - except KeyError: - raise ValueError( - f"Source column '{self.source_column}' not found in CSV file." - ) - counter += 1 - - if counter % self.row_number == 1: - # First row with header and separator - header = "|" - md_separator = "|" - row_content = "|" - for k, v in row.items(): - header += k + "|" - md_separator += "-|" - row_content += v + "|" - row_content += "\n" - elif counter % self.row_number == 0: - if 1 == self.row_number: - header = "|" - md_separator = "|" - row_content = "|" - for k, v in row.items(): - header += k + "|" - md_separator += "-|" - row_content += v + "|" - else: - for k, v in row.items(): - row_content += v + "|" - content = header + "\n" + md_separator + "\n" + row_content - print(f"markdown content: {content}") - - metadata = {"source": source, "row": i} - for col in self.metadata_columns: - try: - metadata[col] = row[col] - except KeyError: - raise ValueError( - f"Metadata column '{col}' not found in CSV file.") - doc = Document(page_content=content, metadata=metadata) - docs.append(doc) - counter = 0 - else: - for k, v in row.items(): - row_content += v + "|" - row_content += "\n" - - return docs - - def load(self) -> List[Document]: - """Load data into document objects.""" - - docs = [] - try: - with open(self.file_path, newline="", encoding=self.encoding) as csvfile: - docs = self.__read_file(csvfile) - except UnicodeDecodeError as e: - if self.autodetect_encoding: - detected_encodings = detect_file_encodings(self.file_path) - for encoding in detected_encodings: - try: - with open( - self.file_path, newline="", encoding=encoding.encoding - ) as csvfile: - docs = self.__read_file(csvfile) - break - except UnicodeDecodeError: - continue - else: - raise RuntimeError(f"Error loading {self.file_path}") from e - except Exception as e: - raise RuntimeError(f"Error loading {self.file_path}") from e - - return docs - -def process_csv(s3, csv_content: str, **kwargs): - now = datetime.now() - timestamp_str = now.strftime("%Y%m%d%H%M%S") - random_uuid = str(uuid.uuid4())[:8] - bucket_name = kwargs['bucket'] - key = kwargs['key'] - row_count = kwargs['csv_row_count'] - local_path = f'/tmp/csv-{timestamp_str}-{random_uuid}.csv' - - s3.download_file(bucket_name, key, local_path) - loader = CustomCSVLoader(file_path=local_path, row_count=row_count) - data = loader.load() - - return data - diff --git a/src/scripts/dep/llm_bot_dep/loaders/docx.py b/src/scripts/dep/llm_bot_dep/loaders/docx.py deleted file mode 100644 index fcf33605..00000000 --- a/src/scripts/dep/llm_bot_dep/loaders/docx.py +++ /dev/null @@ -1,70 +0,0 @@ -import logging -from typing import List, Optional -from langchain.docstore.document import Document -from langchain.document_loaders.base import BaseLoader -from llm_bot_dep.loaders.html import CustomHtmlLoader -import mammoth -import uuid -from datetime import datetime -from llm_bot_dep.splitter_utils import MarkdownHeaderTextSplitter - -logger = logging.getLogger(__name__) - - -class CustomDocLoader(BaseLoader): - """Load docx file. - - Args: - file_content: File content in docx file. - - encoding: File encoding to use. If `None`, the file will be loaded - with the default system encoding. - - autodetect_encoding: Whether to try to autodetect the file encoding - if the specified encoding fails. - """ - - def __init__( - self, - file_path: str, - encoding: Optional[str] = None, - autodetect_encoding: bool = False, - ): - """Initialize with file path.""" - self.file_path = file_path - self.encoding = encoding - self.autodetect_encoding = autodetect_encoding - - def load(self) -> List[Document]: - """Load from file path.""" - metadata = {"file_path": self.file_path, "file_type": "docx"} - - def _convert_image(image): - # Images are excluded - return {"src": ""} - - with open(self.file_path, "rb") as docx_file: - result = mammoth.convert_to_html(docx_file, convert_image=mammoth.images.img_element(_convert_image)) - html_content = result.value # The generated HTML - loader = CustomHtmlLoader() - doc = loader.load(html_content) - doc.metadata = metadata - - return doc - - -def process_doc(s3, **kwargs): - now = datetime.now() - timestamp_str = now.strftime("%Y%m%d%H%M%S") - random_uuid = str(uuid.uuid4())[:8] - bucket_name = kwargs['bucket'] - key = kwargs['key'] - local_path = f'/tmp/doc-{timestamp_str}-{random_uuid}.csv' - - s3.download_file(bucket_name, key, local_path) - loader = CustomDocLoader(file_path=local_path) - doc = loader.load() - splitter = MarkdownHeaderTextSplitter() - doc_list = splitter.split_text(doc) - - return doc_list diff --git a/src/scripts/dep/llm_bot_dep/loaders/html.py b/src/scripts/dep/llm_bot_dep/loaders/html.py deleted file mode 100644 index 007432dc..00000000 --- a/src/scripts/dep/llm_bot_dep/loaders/html.py +++ /dev/null @@ -1,71 +0,0 @@ -import logging -import re -import markdownify -from langchain.docstore.document import Document -from langchain.document_loaders.base import BaseLoader -from llm_bot_dep.splitter_utils import MarkdownHeaderTextSplitter - -logger = logging.getLogger(__name__) - - -class CustomHtmlLoader(BaseLoader): - """Load `HTML` files using `Unstructured`. - - You can run the loader in one of two modes: "single" and "elements". - If you use "single" mode, the document will be returned as a single - langchain Document object. If you use "elements" mode, the unstructured - library will split the document into elements such as Title and NarrativeText. - You can pass in additional unstructured kwargs after mode to apply - different unstructured settings. - - """ - - def clean_html(self, html_str: str) -> str: - # Filter out DOCTYPE - html_str = ' '.join(html_str.split()) - re_doctype = re.compile(r'', re.S) - s = re_doctype.sub('', html_str) - - # Filter out CDATA - re_cdata = re.compile('//]*//\]\]>', re.I) - s = re_cdata.sub('', s) - - # Filter out script - re_script = re.compile('<\s*script[^>]*>[^<]*<\s*/\s*script\s*>', re.I) - s = re_script.sub('', s) - - # Filter out style - re_style = re.compile('<\s*style[^>]*>[^<]*<\s*/\s*style\s*>', re.I) - s = re_style.sub('', s) - - # Filter out HTML comments - re_comment = re.compile('') - s = re_comment.sub('', s) - - # Remove extra blank lines - blank_line = re.compile('\n+') - s = blank_line.sub('\n', s) - - # Remove blank image - img_src = re.compile('') - s = img_src.sub('', s) - - return s.strip() - - # def load(self, file_content: str) -> List[Document]: - def load(self, file_content: str): - file_content = self.clean_html(file_content) - file_content = markdownify.markdownify(file_content, heading_style="ATX") - doc = Document(page_content=file_content, - metadata={"file_type": "html"}) - - return doc - - -def process_html(html_str: str, **kwargs): - loader = CustomHtmlLoader() - doc = loader.load(html_str) - splitter = MarkdownHeaderTextSplitter() - doc_list = splitter.split_text(doc) - - return doc_list diff --git a/src/scripts/dep/llm_bot_dep/loaders/image.py b/src/scripts/dep/llm_bot_dep/loaders/image.py deleted file mode 100644 index 2eab80de..00000000 --- a/src/scripts/dep/llm_bot_dep/loaders/image.py +++ /dev/null @@ -1,5 +0,0 @@ - - -def process_image(image: bytes): - # TODO: Implement image processing with ASK API - pass diff --git a/src/scripts/dep/llm_bot_dep/loaders/markdown.py b/src/scripts/dep/llm_bot_dep/loaders/markdown.py deleted file mode 100644 index 6c629102..00000000 --- a/src/scripts/dep/llm_bot_dep/loaders/markdown.py +++ /dev/null @@ -1,49 +0,0 @@ -import logging -from typing import List, Optional - -from langchain.docstore.document import Document -from langchain.document_loaders.base import BaseLoader -from llm_bot_dep.splitter_utils import MarkdownHeaderTextSplitter - -logger = logging.getLogger(__name__) - - -class CustomMarkdownLoader(BaseLoader): - """Load markdown file. - - Args: - file_content: File content in markdown file. - - encoding: File encoding to use. If `None`, the file will be loaded - with the default system encoding. - - autodetect_encoding: Whether to try to autodetect the file encoding - if the specified encoding fails. - """ - - def __init__( - self, - file_path: str, - encoding: Optional[str] = None, - autodetect_encoding: bool = False, - ): - """Initialize with file path.""" - self.file_path = file_path - self.encoding = encoding - self.autodetect_encoding = autodetect_encoding - - def load(self, content: str) -> Document: - """Load from file path.""" - metadata = {"file_path": self.file_path, "file_type": "md"} - - return Document(page_content=content, metadata=metadata) - - -def process_md(file_content: str, **kwargs): - loader = CustomMarkdownLoader( - file_path=kwargs['bucket'] + "/" + kwargs['key']) - doc = loader.load(file_content) - splitter = MarkdownHeaderTextSplitter() - doc_list = splitter.split_text(doc) - - return doc_list diff --git a/src/scripts/dep/llm_bot_dep/loaders/pdf.py b/src/scripts/dep/llm_bot_dep/loaders/pdf.py deleted file mode 100644 index aa4598e4..00000000 --- a/src/scripts/dep/llm_bot_dep/loaders/pdf.py +++ /dev/null @@ -1,171 +0,0 @@ -import os -import re -import json -import logging -from bs4 import BeautifulSoup -import subprocess -from pathlib import Path -from typing import List, Dict, List, Optional, Iterator, Sequence - -from langchain.docstore.document import Document -from langchain.document_loaders import PDFMinerPDFasHTMLLoader - -from langchain.document_loaders.pdf import BasePDFLoader -from ..splitter_utils import extract_headings, MarkdownHeaderTextSplitter -# from langchain.text_splitter import MarkdownHeaderTextSplitter - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -metadata_template = { - "content_type": "paragraph", - "heading_hierarchy": {}, - "figure_list": [], - "chunk_id": "$$", - "file_path": "", - "keywords": [], - "summary": "", -} - -class NougatPDFLoader(BasePDFLoader): - """A PDF loader class for converting PDF files to MMD. - - This class leverages the `nougat` library to perform the conversion from PDF to HTML. - It inherits from `BasePDFLoader` and extends its functionality to utilize the `nougat` library. - TODO, the load_and_split method need to be implemented and default is RecursiveCharacterTextSplitter - Attributes: - file_path (str): The path to the PDF file to be loaded. - headers (Optional[Dict]): Optional headers to be used when loading the PDF. - - Raises: - ImportError: If the `nougat` library is not installed. - RuntimeError: If the `nougat` command fails to execute successfully. - """ - - def __init__(self, file_path: str, *, headers: Optional[Dict] = None): - """Initialize with a file path.""" - try: - import nougat - except ImportError: - raise ImportError( - "Please install nougat to use NougatPDFLoader. " - "You can install it with `pip install nougat`." - ) - - super().__init__(file_path, headers=headers) - - def nougat(self, file_path: Path) -> str: - """Executes the `nougat` command to convert the specified PDF file to Markdown format. - - Args: - file_path (Path): The path to the PDF file to be converted. - - Returns: - str: The Markdown content resulting from the `nougat` conversion. - """ - # nougat ./paperSnapshot.pdf --full-precision --markdown -m 0.1.0-base -o tmp --recompute - cli_command = ["nougat", str(file_path), "full-precision", "--markdown", "-m", "0.1.0-base", "-o", "tmp", "--recompute"] - - try: - result = subprocess.run( - cli_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True - ) - result.check_returncode() - return result.stdout - - except subprocess.CalledProcessError as e: - logger.info( - f"Nougat command failed with return code {e.returncode}: {e.stderr}" - ) - raise RuntimeError("Nougat command failed.") from e - - def load(self) -> List[Document]: - """Loads and processes the specified PDF file, converting it to a list of Document objects. - - Returns: - List[Document]: A list containing a single Document object with the processed content. - """ - return list(self.lazy_load()) - - def lazy_load(self) -> Iterator[Document]: - """Lazy load and process the specified PDF file, yielding Document objects. - - This method reads the PDF file, processes it using the `nougat` command, - reads the resulting Markdown content, and yields a Document object with the content. - """ - # try: - file_path = self.file_path - # Call the method to run the Nougat OCR command - self.nougat(file_path) - - # Rest of your code for reading and processing the output - file_path = Path(file_path) - output_path = Path("tmp") / f"{file_path.stem}.mmd" - with output_path.open("r") as f: - content = f.read() - # consider math expressions are enclosed in \( and \) in Markdown - content = ( - content.replace(r"\(", "$") - .replace(r"\)", "$") - .replace(r"\[", "$$") - .replace(r"\]", "$$") - ) - logger.info("content: %s", content) - # extract headings hierarchically - headings = extract_headings(content) - - # assemble metadata from template - metadata = metadata_template - metadata["content_type"] = "paragraph" - metadata["heading_hierarchy"] = headings - metadata["chunk_id"] = "$$" - metadata["file_path"] = str(file_path) - # TODO, use PyMuPDF to detect image and figure list, but no link to the image for the extracted text - # metadata["figure_list"] = [] - - yield Document(page_content=content, metadata=metadata) - - # except Exception as e: - # logger.info(f"An error occurred while processing the PDF: {str(e)}") - - -def process_pdf(s3, pdf: bytes, **kwargs): - """ - Process a given PDF file and extracts structured information from it. - - This function reads a PDF file, converts it to HTML using PDFMiner, then extracts - and structures the information into a list of dictionaries containing headings and content. - - Parameters: - s3 (boto3.client): The S3 client to use for downloading the PDF file. - pdf (bytes): The PDF file to process. - **kwargs: Arbitrary keyword arguments. The function expects 'bucket' and 'key' among the kwargs - to specify the S3 bucket and key where the PDF file is located. - - Returns: - list[Doucment]: A list of Document objects, each representing a semantically grouped section of the PDF file. Each Document object contains a metadata defined in metadata_template, and page_content string with the text content of that section. - """ - logger.info("Processing PDF file...") - bucket = kwargs['bucket'] - key = kwargs['key'] - # extract file name also in consideration of file name with blank space - local_path = str(os.path.basename(key)) - # download to local for futher processing - logger.info(local_path) - s3.download_file(Bucket=bucket, Key=key, Filename=local_path) - # TODO, will be deprecated and replaced by nougat class in loader_utils - # loader = PDFMinerPDFasHTMLLoader(local_path) - # entire PDF is loaded as a single Document - # file_content = loader.load()[0].page_content - # res = parse_pdf_to_json(file_content) - - loader = NougatPDFLoader(local_path) - data = loader.load() - logger.info("raw data: %s", data) - # Update file_path metadata to full s3 path in list of Document objects - data[0].metadata['file_path'] = f"s3://{bucket}/{key}" - markdown_splitter = MarkdownHeaderTextSplitter() - md_header_splits = markdown_splitter.split_text(data[0]) - for i, doc in enumerate(md_header_splits): - logger.info("PDF file processed successfully, with content of chunk %s: %s", i, doc) - return md_header_splits diff --git a/src/scripts/dep/llm_bot_dep/loaders/text.py b/src/scripts/dep/llm_bot_dep/loaders/text.py deleted file mode 100644 index df8ffe1f..00000000 --- a/src/scripts/dep/llm_bot_dep/loaders/text.py +++ /dev/null @@ -1,54 +0,0 @@ -import logging -import re -from typing import List, Optional -from langchain.docstore.document import Document -from langchain.document_loaders.text import TextLoader - -logger = logging.getLogger(__name__) - - -class CustomTextLoader(TextLoader): - """Load text file. - - Args: - file_content: Text file content. - - encoding: File encoding to use. If `None`, the file will be loaded - with the default system encoding. - - autodetect_encoding: Whether to try to autodetect the file encoding - if the specified encoding fails. - """ - - def __init__( - self, - file_path: str, - encoding: Optional[str] = None, - autodetect_encoding: bool = False, - ): - """Initialize with file path.""" - self.file_path = file_path - self.encoding = encoding - self.autodetect_encoding = autodetect_encoding - - def load(self, text_content: str) -> List[Document]: - """Load from file path.""" - metadata = {"source": self.file_path} - - return [Document(page_content=text_content, metadata=metadata)] - - -def pre_process_text(text_content: str) -> str: - # Clean up text content - text_content = re.sub(r'\s+', ' ', text_content) - text_content = re.sub(r'\n+', '\n', text_content) - - return text_content.strip() - - -def process_text(file_content: str, **kwargs): - clean_text = pre_process_text(file_content) - loader = CustomTextLoader(file_path=kwargs['bucket'] + "/" + kwargs['key']) - data = loader.load(clean_text) - - return data diff --git a/src/scripts/dep/llm_bot_dep/sm_utils.py b/src/scripts/dep/llm_bot_dep/sm_utils.py deleted file mode 100644 index a60299ae..00000000 --- a/src/scripts/dep/llm_bot_dep/sm_utils.py +++ /dev/null @@ -1,71 +0,0 @@ -""" -Helper functions for using Samgemaker Endpoint via langchain -""" -import sys -import time -import json -import logging -from typing import List -from langchain.embeddings import SagemakerEndpointEmbeddings -from langchain.embeddings.sagemaker_endpoint import EmbeddingsContentHandler - -logger = logging.getLogger() -# logging.basicConfig(format='%(asctime)s,%(module)s,%(processName)s,%(levelname)s,%(message)s', level=logging.INFO, stream=sys.stderr) -logger.setLevel(logging.INFO) - -# extend the SagemakerEndpointEmbeddings class from langchain to provide a custom embedding function -class SagemakerEndpointEmbeddingsJumpStart(SagemakerEndpointEmbeddings): - def embed_documents( - self, texts: List[str], chunk_size: int = 500 - ) -> List[List[float]]: - """Compute doc embeddings using a SageMaker Inference Endpoint. - - Args: - texts: The list of texts to embed. - chunk_size: The chunk size defines how many input texts will - be grouped together as request. If None, will use the - chunk size specified by the class. - - Returns: - List of embeddings, one for each text. - """ - results = [] - _chunk_size = len(texts) if chunk_size > len(texts) else chunk_size - st = time.time() - for i in range(0, len(texts), _chunk_size): - response = self._embedding_func(texts[i:i + _chunk_size]) - results.extend(response) - time_taken = time.time() - st - logger.info(f"got results for {len(texts)} in {time_taken}s, length of embeddings list is {len(results)}") - return results - - -# class for serializing/deserializing requests/responses to/from the embeddings model -class ContentHandler(EmbeddingsContentHandler): - content_type = "application/json" - accepts = "application/json" - - def transform_input(self, prompt: str, model_kwargs={}) -> bytes: - input_str = json.dumps({"inputs": prompt, **model_kwargs}) - return input_str.encode('utf-8') - - def transform_output(self, output: bytes) -> str: - response_json = json.loads(output.read().decode("utf-8")) - embeddings = response_json["sentence_embeddings"] - if len(embeddings) == 1: - return [embeddings[0]] - return embeddings - -def create_sagemaker_embeddings_from_js_model(embeddings_model_endpoint_name: str, aws_region: str) -> SagemakerEndpointEmbeddingsJumpStart: - # all set to create the objects for the ContentHandler and - # SagemakerEndpointEmbeddingsJumpStart classes - content_handler = ContentHandler() - logger.info(f'content_handler: {content_handler}, embeddings_model_endpoint_name: {embeddings_model_endpoint_name}, aws_region: {aws_region}') - # note the name of the LLM Sagemaker endpoint, this is the model that we would - # be using for generating the embeddings - embeddings = SagemakerEndpointEmbeddingsJumpStart( - endpoint_name = embeddings_model_endpoint_name, - region_name = aws_region, - content_handler = content_handler - ) - return embeddings \ No newline at end of file diff --git a/src/scripts/dep/llm_bot_dep/splitter_utils.py b/src/scripts/dep/llm_bot_dep/splitter_utils.py deleted file mode 100644 index d00d9c9e..00000000 --- a/src/scripts/dep/llm_bot_dep/splitter_utils.py +++ /dev/null @@ -1,187 +0,0 @@ -import re -from typing import Any, Dict, Iterator, List, Optional, Union - -from langchain.docstore.document import Document -from langchain.text_splitter import (Language, RecursiveCharacterTextSplitter, - TextSplitter) - - -def _make_spacy_pipeline_for_splitting(pipeline: str) -> Any: # avoid importing spacy - try: - import spacy - except ImportError: - raise ImportError( - "Spacy is not installed, please install it with `pip install spacy`." - ) - if pipeline == "sentencizer": - from spacy.lang.en import English - - sentencizer = English() - sentencizer.add_pipe("sentencizer") - else: - sentencizer = spacy.load(pipeline, exclude=["ner", "tagger"]) - return sentencizer - -class NLTKTextSplitter(TextSplitter): - """Splitting text using NLTK package.""" - - def __init__( - self, separator: str = "\n\n", language: str = "english", **kwargs: Any - ) -> None: - """Initialize the NLTK splitter.""" - super().__init__(**kwargs) - try: - from nltk.tokenize import sent_tokenize - - self._tokenizer = sent_tokenize - except ImportError: - raise ImportError( - "NLTK is not installed, please install it with `pip install nltk`." - ) - self._separator = separator - self._language = language - - def split_text(self, text: str) -> List[str]: - """Split incoming text and return chunks.""" - # First we naively split the large input into a bunch of smaller ones. - splits = self._tokenizer(text, language=self._language) - return self._merge_splits(splits, self._separator) - -class SpacyTextSplitter(TextSplitter): - """Splitting text using Spacy package. - - - Per default, Spacy's `en_core_web_sm` model is used. For a faster, but - potentially less accurate splitting, you can use `pipeline='sentencizer'`. - """ - - def __init__( - self, separator: str = "\n\n", pipeline: str = "en_core_web_sm", **kwargs: Any - ) -> None: - """Initialize the spacy text splitter.""" - super().__init__(**kwargs) - self._tokenizer = _make_spacy_pipeline_for_splitting(pipeline) - self._separator = separator - - def split_text(self, text: str) -> List[str]: - """Split incoming text and return chunks.""" - splits = (s.text for s in self._tokenizer(text).sents) - return self._merge_splits(splits, self._separator) - -class NestedDict(dict): - def __missing__(self, key): - self[key] = NestedDict() - return self[key] - -def extract_headings(md_content): - """Extract headings hierarchically from Markdown content. - Consider alternate syntax that "any number of == characters for heading level 1 or -- characters for heading level 2." - See https://www.markdownguide.org/basic-syntax/ - Args: - md_content (str): Markdown content. - Returns: - NestedDict: A nested dictionary containing the headings. Sample output: - { - 'Title 1': { - 'Subtitle 1.1': {}, - 'Subtitle 1.2': {} - }, - 'Title 2': { - 'Subtitle 2.1': {} - } - } - """ - headings = NestedDict() - current_heads = [headings] - lines = md_content.strip().split('\n') - - for i, line in enumerate(lines): - match = re.match(r'(#+) (.+)', line) - if not match and i > 0: # If the line is not a heading, check if the previous line is a heading using alternate syntax - if re.match(r'=+', lines[i - 1]): - level = 1 - title = lines[i - 2] - elif re.match(r'-+', lines[i - 1]): - level = 2 - title = lines[i - 2] - else: - continue - elif match: - level = len(match.group(1)) - title = match.group(2) - else: - continue - - current_heads = current_heads[:level] - current_heads[-1][title] - current_heads.append(current_heads[-1][title]) - - return headings - -# rewrite this class to use the new TextSplitter for mmd type -class MarkdownHeaderTextSplitter: - # Place holder for now without parameters - def __init__(self) -> None: - pass - - def _is_markdown_header(self, line): - header_pattern = r'^#+\s+' - if re.match(header_pattern, line): - return True - else: - return False - - def _is_markdown_table_row(self, line): - return re.fullmatch(r'\|.*\|.*\|', line) is not None - - def split_text(self, text: Document) -> List[Document]: - lines = text.page_content.strip().split('\n') - chunks = [] - current_chunk_content = [] - table_content = [] - inside_table = False - chunk_id = 1 # Initializing chunk_id - - for line in lines: - # Replace escaped characters for table markers - line = line.strip() - line = line.replace(r"\begin{table}", "\\begin{table}").replace(r"\end{table}", "\\end{table}") - if line in ["\\begin{table}", "\\end{table}"]: - continue - - if self._is_markdown_header(line): # Assuming these denote headings - # Save the current chunk if it exists - if current_chunk_content: - metadata = text.metadata.copy() - metadata['heading_hierarchy'] = extract_headings('\n'.join(current_chunk_content)) - metadata['chunk_id'] = f"${chunk_id}" - chunk_id += 1 # Increment chunk_id for the next chunk - chunks.append(Document(page_content='\n'.join(current_chunk_content), metadata=metadata)) - current_chunk_content = [] # Reset for the next chunk - - if self._is_markdown_table_row(line): - inside_table = True - elif inside_table: - # The first line under a table - inside_table = False - # Save table content as a separate document - if table_content: - metadata = text.metadata.copy() - metadata['content_type'] = 'table' - metadata['chunk_id'] = f"${chunk_id}" - chunks.append(Document(page_content='\n'.join(table_content), metadata=metadata)) - table_content = [] # Reset for the next table - - if inside_table: - table_content.append(line) - else: - current_chunk_content.append(line) - - # Save the last chunk if it exists - if current_chunk_content: - metadata = text.metadata.copy() - metadata['heading_hierarchy'] = extract_headings('\n'.join(current_chunk_content)) - metadata['chunk_id'] = f"${chunk_id}" - chunks.append(Document(page_content='\n'.join(current_chunk_content), metadata=metadata)) - - return chunks diff --git a/src/scripts/dep/setup.py b/src/scripts/dep/setup.py deleted file mode 100644 index ff6569da..00000000 --- a/src/scripts/dep/setup.py +++ /dev/null @@ -1,20 +0,0 @@ -from setuptools import setup, find_packages - -setup( - name='llm_bot_dep', - version='0.1.0', - packages=find_packages(exclude=[]), - install_requires=[ - 'langchain', - 'opensearch-py', - # 'faiss_cpu', - # 'sagemaker', - 'requests_aws4auth', - 'unstructured', - 'boto3==1.28.84', - 'nougat-ocr', - 'markdownify', - 'mammoth', - 'chardet' - ], -) \ No newline at end of file diff --git a/src/scripts/glue-job-script.py b/src/scripts/glue-job-script.py deleted file mode 100644 index 94e51834..00000000 --- a/src/scripts/glue-job-script.py +++ /dev/null @@ -1,270 +0,0 @@ -import itertools -import logging -import os -import sys -import time -from typing import Any, Dict, Generator, Iterable, List, Optional, Tuple - -import boto3 -import chardet -import nltk -from awsglue.utils import getResolvedOptions -from boto3.dynamodb.conditions import Attr, Key -from langchain.docstore.document import Document -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain.vectorstores import OpenSearchVectorSearch -from llm_bot_dep import sm_utils -from llm_bot_dep.enhance_utils import EnhanceWithBedrock -from llm_bot_dep.loaders.auto import cb_process_object -from opensearchpy import RequestsHttpConnection -from requests_aws4auth import AWS4Auth -from tenacity import retry, stop_after_attempt - -logger = logging.getLogger() -logger.setLevel(logging.INFO) - -# Adaption to allow nougat to run in AWS Glue with writable /tmp -os.environ['TRANSFORMERS_CACHE'] = '/tmp/transformers_cache' -os.environ['NOUGAT_CHECKPOINT'] = '/tmp/nougat_checkpoint' -os.environ['NLTK_DATA'] = '/tmp/nltk_data' - -# Parse arguments -args = getResolvedOptions(sys.argv, ['JOB_NAME', 'S3_BUCKET', 'S3_PREFIX', 'AOS_ENDPOINT', 'EMBEDDING_MODEL_ENDPOINT', 'REGION', 'OFFLINE', 'QA_ENHANCEMENT', 'BATCH_INDICE', 'ProcessedObjectsTable']) -s3_bucket = args['S3_BUCKET'] -s3_prefix = args['S3_PREFIX'] -aosEndpoint = args['AOS_ENDPOINT'] -embeddingModelEndpoint = args['EMBEDDING_MODEL_ENDPOINT'] -region = args['REGION'] -offline = args['OFFLINE'] -qa_enhancement = args['QA_ENHANCEMENT'] -# TODO, pass the bucket and prefix need to handle in current job directly -batchIndice = args['BATCH_INDICE'] -processedObjectsTable = args['ProcessedObjectsTable'] - -s3 = boto3.client('s3') -dynamodb = boto3.resource('dynamodb') -table = dynamodb.Table(processedObjectsTable) - -ENHANCE_CHUNK_SIZE = 500 -# Make it 3600s for debugging purpose -OBJECT_EXPIRY_TIME = 3600 - -credentials = boto3.Session().get_credentials() -awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, 'es', session_token=credentials.token) - -# Set the NLTK data path to the /tmp directory for AWS Glue jobs -nltk.data.path.append('/tmp/nltk_data') - -def decode_file_content(content: str, default_encoding: str = 'utf-8'): - """Decode the file content and auto detect the content encoding. - - Args: - content: The content to detect the encoding. - default_encoding: The default encoding to try to decode the content. - timeout: The timeout in seconds for the encoding detection. - """ - - try: - decoded_content = content.decode(default_encoding) - except UnicodeDecodeError: - # Try to detect encoding - encoding = chardet.detect(content)['encoding'] - decoded_content = content.decode(encoding) - - return decoded_content - -# such glue job is running as map job, the batchIndice is the index per file to handle in current job -def iterate_s3_files(bucket: str, prefix: str) -> Generator: - paginator = s3.get_paginator('list_objects_v2') - currentIndice = 0 - for page in paginator.paginate(Bucket=bucket, Prefix=prefix): - for obj in page.get('Contents', []): - key = obj['Key'] - # skip the prefix with slash, which is the folder name - if key.endswith('/'): - continue - - # skip the file if the index is not in the batchIndice - if currentIndice != int(batchIndice): - logger.info("currentIndice: {}, batchIndice: {}, skip file: {}".format(currentIndice, batchIndice, key)) - currentIndice += 1 - continue - - # Truncate to seconds with round() - current_time = int(round(time.time())) - # Check for redundancy and expiry - response = table.query( - KeyConditionExpression = Key('ObjectKey').eq(key), - ScanIndexForward=False, # Sort by ProcessTimestamp in descending order - Limit=1 # We only need the latest record - ) - - # If the object is found and has not expired, skip processing - if response['Items'] and response['Items'][0]['ExpiryTimestamp'] > current_time: - logger.info(f"Object {key} has not expired yet and will be skipped.") - continue - - # Record the processing of the S3 object with an updated expiry timestamp, and each job only update single object in table. TODO, current assume the object will be handled successfully - expiry_timestamp = current_time + OBJECT_EXPIRY_TIME - try: - table.put_item( - Item={ - 'ObjectKey': key, - 'ProcessTimestamp': current_time, - 'Bucket': bucket, - 'Prefix': '/'.join(key.split('/')[:-1]), - 'ExpiryTimestamp': expiry_timestamp - } - ) - except Exception as e: - logger.error(f"Error recording processed of S3 object {key}: {e}") - - file_type = key.split('.')[-1] # Extract file extension - response = s3.get_object(Bucket=bucket, Key=key) - file_content = response['Body'].read() - # assemble bucket and key as args for the callback function - kwargs = {'bucket': bucket, 'key': key} - - if file_type == 'txt': - yield 'txt', decode_file_content(file_content), kwargs - break - elif file_type == 'csv': - # Update row count here, the default row count is 1 - kwargs['csv_row_count'] = 1 - yield 'csv', decode_file_content(file_content), kwargs - break - elif file_type == 'html': - yield 'html', decode_file_content(file_content), kwargs - break - elif file_type in ['pdf']: - yield 'pdf', file_content, kwargs - break - elif file_type in ['jpg', 'png']: - yield 'image', file_content, kwargs - break - elif file_type in ['docx', 'doc']: - yield 'doc', file_content, kwargs - break - elif file_type == 'md': - yield 'md', decode_file_content(file_content), kwargs - break - else: - logger.info(f"Unknown file type: {file_type}") - -def batch_generator(generator, batch_size: int): - iterator = iter(generator) - while True: - batch = list(itertools.islice(iterator, batch_size)) - if not batch: - break - yield batch - -def aos_injection(content: List[Document], embeddingModelEndpoint: str, aosEndpoint: str, index_name: str, chunk_size: int = 500, gen_chunk: bool = True) -> List[Document]: - """ - This function includes the following steps: - 1. split the document into chunks with chunk size to fit the embedding model, note the document is already splited by title/subtitle to form sementic chunks approximately; - 2. call the embedding model to get the embeddings for each chunk; - 3. call the AOS to index the chunk with the embeddings; - Parameters: - content (list): A list of Document objects, each representing a semantically grouped section of the PDF file. Each Document object contains a metadata dictionary with details about the heading hierarchy etc. - embeddingModelEndpoint (str): The endpoint of the embedding model. - aosEndpoint (str): The endpoint of the AOS. - index_name (str): The name of the index to be created in the AOS. - chunk_size (int): The size of each chunk to be indexed in the AOS. - gen_chunk (bool): Whether generate chunks or not. - - Returns: - - Note: - """ - embeddings = sm_utils.create_sagemaker_embeddings_from_js_model(embeddingModelEndpoint, region) - def chunk_generator(content: List[Document], chunk_size: int = 500, chunk_overlap: int = 30) -> Generator[Document, None, None]: - text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap) - for document in content: - splits = text_splitter.split_documents([document]) - # list of Document objects - for split in splits: - yield split - - if gen_chunk: - generator = chunk_generator(content, chunk_size=chunk_size) - else: - generator = content - - batches = batch_generator(generator, batch_size=10) - # note: typeof(batch)->list[Document], sizeof(batches)=batch_size - for batch in batches: - if len(batch) == 0: - continue - # the batch are still list of Document objects, we need to iterate the list to inject the embeddings, the chunk size (500) should already be small enough to fit the embedding model - for document in batch: - @retry(stop=stop_after_attempt(3)) - def _aos_injection(document: Document) -> Document: - # TODO, parse the metadata to embed with different index - docsearch = OpenSearchVectorSearch( - index_name=index_name, - embedding_function=embeddings, - opensearch_url="https://{}".format(aosEndpoint), - http_auth = awsauth, - use_ssl = True, - verify_certs = True, - connection_class = RequestsHttpConnection - ) - logger.info("Adding documents %s to OpenSearch with index %s", document, index_name) - docsearch.add_documents(documents=[document]) - logger.info("Retry statistics: %s", _aos_injection.retry.statistics) - # logger.info("Adding documents %s to OpenSearch with index %s", document, index_name) - _aos_injection(document) - -# Main function to be called by Glue job script -def main(): - logger.info("Starting Glue job with passing arguments: %s", args) - # Check if offline mode - if offline == 'true': - logger.info("Running in offline mode with consideration for large file size...") - for file_type, file_content, kwargs in iterate_s3_files(s3_bucket, s3_prefix): - try: - res = cb_process_object(s3, file_type, file_content, **kwargs) - # TODO, parse the metadata to embed with different index - if res: - logger.info("Result: %s", res) - if file_type == 'csv': - # CSV page document has been splited into chunk, no more spliting is needed - aos_injection(res, embeddingModelEndpoint, aosEndpoint, 'chatbot-index', gen_chunk=False) - elif file_type == 'html': - aos_injection(res, embeddingModelEndpoint, aosEndpoint, 'chatbot-index') - elif file_type in ['pdf', 'txt', 'doc', 'md']: - aos_injection(res, embeddingModelEndpoint, aosEndpoint, 'chatbot-index') - if qa_enhancement == 'true': - # iterate the document to get the QA pairs - for document in res: - # prompt is not used in this case - prompt = "" - solution_title = "GCR Solution LLM Bot" - # Make sure the document is Document object - logger.info("Enhancing document type: {} and content: {}".format(type(document), document)) - ewb = EnhanceWithBedrock(prompt, solution_title, document) - # This is should be optional for the user to choose the chunk size - document_list = ewb.SplitDocumentByTokenNum(document, ENHANCE_CHUNK_SIZE) - # test the function - for document in document_list: - enhanced_prompt = ewb.EnhanceWithClaude(prompt, solution_title, document) - logger.info("Enhanced prompt: {}".format(enhanced_prompt)) - - except Exception as e: - logger.error("Error processing object %s: %s", kwargs['bucket'] + '/' + kwargs['key'], e) - else: - logger.info("Running in online mode, assume file number is small...") - -if __name__ == '__main__': - logger.info("boto3 version: %s", boto3.__version__) - - # Set the NLTK data path to the /tmp directory for AWS Glue jobs - nltk.data.path.append("/tmp") - # List of NLTK packages to download - nltk_packages = ['words', 'punkt'] - # Download the required NLTK packages to /tmp - for package in nltk_packages: - # Download the package to /tmp/nltk_data - nltk.download(package, download_dir='/tmp/nltk_data') - main() diff --git a/src/vpc-stack.ts b/src/vpc-stack.ts deleted file mode 100644 index bc7f9706..00000000 --- a/src/vpc-stack.ts +++ /dev/null @@ -1,43 +0,0 @@ -import { NestedStack, StackProps } from 'aws-cdk-lib'; -import { Construct } from 'constructs'; - -import * as ec2 from 'aws-cdk-lib/aws-ec2'; - -import * as dotenv from "dotenv"; -dotenv.config(); - -export class VpcStack extends NestedStack { - - _vpc; - _privateSubnets; - _securityGroup; - - constructor(scope: Construct, id: string, props: StackProps = {}) { - super(scope, id, props); - - this._vpc = new ec2.Vpc(this, 'LLM-VPC', { - ipAddresses: ec2.IpAddresses.cidr('10.100.0.0/16'), - maxAzs: 2, - }); - - this._privateSubnets = this._vpc.privateSubnets; - - this._securityGroup = new ec2.SecurityGroup(this, 'LLM-VPC-SG', { - vpc: this._vpc, - description: 'LLM Security Group' - }); - - this._securityGroup.addIngressRule(this._securityGroup, ec2.Port.allTraffic(), 'allow self traffic'); - - this._vpc.addGatewayEndpoint('DynamoDbEndpoint', { - service: ec2.GatewayVpcEndpointAwsService.DYNAMODB, - }); - - this._vpc.addInterfaceEndpoint('Glue', { - service: ec2.InterfaceVpcEndpointAwsService.GLUE, - securityGroups: [this._securityGroup], - subnets: { subnets: this._privateSubnets, }, - }); - - } -} \ No newline at end of file diff --git a/test/main.test.ts b/test/main.test.ts deleted file mode 100644 index c4c606ea..00000000 --- a/test/main.test.ts +++ /dev/null @@ -1,11 +0,0 @@ -import { App } from 'aws-cdk-lib'; -import { Template } from 'aws-cdk-lib/assertions'; -import { MyStack } from '../src/main'; - -test('Snapshot', () => { - const app = new App(); - const stack = new MyStack(app, 'test'); - - const template = Template.fromStack(stack); - expect(template.toJSON()).toMatchSnapshot(); -}); \ No newline at end of file From 33d6da2ffd977fe456dd6e797018f7e84afb99d0 Mon Sep 17 00:00:00 2001 From: yike5460 Date: Mon, 13 Nov 2023 14:58:24 +0000 Subject: [PATCH 24/37] feat: support multiple glue jobs display --- src/panel/app.py | 57 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 12 deletions(-) diff --git a/src/panel/app.py b/src/panel/app.py index 31af707b..2341732d 100644 --- a/src/panel/app.py +++ b/src/panel/app.py @@ -12,6 +12,11 @@ import requests import json import time + +import logging +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + # Load environment variables load_dotenv() @@ -38,6 +43,15 @@ def process_text(text): return knowledgeBase +def get_job_runs(page_token=None): + if page_token: + response = glue.get_job_runs(JobName=glue_job_name, MaxResults=10, NextToken=page_token) + else: + response = glue.get_job_runs(JobName=glue_job_name, MaxResults=10) + # function only return running aws glue jobs + job_runs = [job_run for job_run in response['JobRuns'] if job_run['JobRunState'] == 'RUNNING'] + return job_runs, response.get('NextToken'), response.get('PreviousToken') + def pipeline_tab(): st.title("LLM Bot ETL Pipeline") # text box to allow user input the url address of the pipeline with default value @@ -98,23 +112,42 @@ def pipeline_tab(): st.text_area('Response:', value=response.text, height=200, max_chars=None) # progress bar to show the offline ETL job running status - st.subheader('Online & Offline ETL Job Status') - refresh_button = st.button('Refresh') - if refresh_button: - # list all job running with a specific job name - job_runs = glue.get_job_runs(JobName=glue_job_name, MaxResults=1) - # get the latest job run id - job_run_id = job_runs['JobRuns'][0]['Id'] - # get the latest job run status - job_status = glue.get_job_run(JobName=glue_job_name, RunId=job_run_id)['JobRun']['JobRunState'] - # output the job status details with slim height - st.text_area('Job Status:', value=json.dumps(job_status, indent=4), height=100, max_chars=None) + st.subheader('AWS Glue Job Status') + + # Initialize session state for pagination + if 'next_token' not in st.session_state: + st.session_state['next_token'] = None + + # Layout for buttons + left_col, right_col = st.columns(2) + with left_col: + refresh_button = st.button('Refresh') + + with right_col: + placeholder, prev_button, next_button = st.columns([3, 1, 1]) + with prev_button: + if st.button('Prev'): + st.session_state['next_token'] = st.session_state.get('new_prev_token') + with next_button: + if st.button('Next'): + st.session_state['next_token'] = st.session_state.get('new_next_token') + + if refresh_button or 'next_token' in st.session_state: + response, st.session_state['new_next_token'], st.session_state['new_prev_token'] = get_job_runs(st.session_state['next_token']) + # Display the running glue jobs + job_runs_container = "" + total_job_runs = len(response) + for job_run in response: + job_runs_container += (f"Job Run ID: {job_run['Id']}") + job_runs_container += (f" Retries: {job_run.get('Attempt', 'N/A')}") + job_runs_container += (f" Start Time: {job_run['StartedOn']}") + job_runs_container += (f" Duration: {job_run.get('ExecutionTime', 'N/A')} seconds\n") + st.text_area('Running AWS Glue Jobs in total: ' + str(total_job_runs), value=job_runs_container, height=200, max_chars=None) # sub pannel to query and search the embedding in AOS st.subheader('Query and Search AOS') query = st.text_input('Input your query body here', value='{"aos_index": "chatbot-index", "query": {"operation": "match_all", "match_all": {}}}') # send button to trigger the request sending to the endpoint with query as request body - request_body = { 'aos_index': 'chatbot-index', 'operation': 'match_all', From 0ce42aa6e250389e3228669bf91aac54c9d53e91 Mon Sep 17 00:00:00 2001 From: Ning Date: Tue, 14 Nov 2023 11:35:58 +0800 Subject: [PATCH 25/37] feat: support removing header and footer --- source/infrastructure/lib/etl/etl-stack.ts | 2 +- .../job/dep/llm_bot_dep/loaders/docx.py | 21 +++++++++++++++++++ source/lambda/job/dep/setup.py | 3 ++- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/source/infrastructure/lib/etl/etl-stack.ts b/source/infrastructure/lib/etl/etl-stack.ts index a03ddfe3..cd98a898 100644 --- a/source/infrastructure/lib/etl/etl-stack.ts +++ b/source/infrastructure/lib/etl/etl-stack.ts @@ -123,7 +123,7 @@ export class EtlStack extends NestedStack { '--REGION': props._region, '--EMBEDDING_MODEL_ENDPOINT': props._embeddingEndpoint, '--DOC_INDEX_TABLE': 'chatbot-index', - '--additional-python-modules': 'langchain==0.0.312,beautifulsoup4==4.12.2,requests-aws4auth==1.2.3,boto3==1.28.84,openai==0.28.1,nougat-ocr==0.1.17,pyOpenSSL==23.3.0,tenacity==8.2.3,markdownify==0.11.6,mammoth==1.6.0,chardet==5.2.0', + '--additional-python-modules': 'langchain==0.0.312,beautifulsoup4==4.12.2,requests-aws4auth==1.2.3,boto3==1.28.84,openai==0.28.1,nougat-ocr==0.1.17,pyOpenSSL==23.3.0,tenacity==8.2.3,markdownify==0.11.6,mammoth==1.6.0,chardet==5.2.0,python-docx==1.1.0', // add multiple extra python files '--extra-py-files': extraPythonFilesList } diff --git a/source/lambda/job/dep/llm_bot_dep/loaders/docx.py b/source/lambda/job/dep/llm_bot_dep/loaders/docx.py index fcf33605..6e4fc36a 100644 --- a/source/lambda/job/dep/llm_bot_dep/loaders/docx.py +++ b/source/lambda/job/dep/llm_bot_dep/loaders/docx.py @@ -7,6 +7,7 @@ import uuid from datetime import datetime from llm_bot_dep.splitter_utils import MarkdownHeaderTextSplitter +from docx import Document as pyDocument logger = logging.getLogger(__name__) @@ -34,6 +35,22 @@ def __init__( self.file_path = file_path self.encoding = encoding self.autodetect_encoding = autodetect_encoding + + def clean_document(self, doc: pyDocument): + """Clean document including removing header and footer for each page + + Args: + doc (Document): The document to clean + """ + # Remove headers and footers + for section in doc.sections: + if section.header is not None: + for paragraph in section.header.paragraphs: + paragraph.clear() + + if section.footer is not None: + for paragraph in section.footer.paragraphs: + paragraph.clear() def load(self) -> List[Document]: """Load from file path.""" @@ -43,6 +60,10 @@ def _convert_image(image): # Images are excluded return {"src": ""} + pyDoc = pyDocument(self.file_path) + self.clean_document(pyDoc) + pyDoc.save(self.file_path) + with open(self.file_path, "rb") as docx_file: result = mammoth.convert_to_html(docx_file, convert_image=mammoth.images.img_element(_convert_image)) html_content = result.value # The generated HTML diff --git a/source/lambda/job/dep/setup.py b/source/lambda/job/dep/setup.py index ff6569da..ce3ea0a9 100644 --- a/source/lambda/job/dep/setup.py +++ b/source/lambda/job/dep/setup.py @@ -15,6 +15,7 @@ 'nougat-ocr', 'markdownify', 'mammoth', - 'chardet' + 'chardet', + 'python-docx' ], ) \ No newline at end of file From 2c059aeb376811316e423ba3d656d4d99c191b2e Mon Sep 17 00:00:00 2001 From: Ning Date: Tue, 14 Nov 2023 16:26:54 +0800 Subject: [PATCH 26/37] fix: compatible with NVIDIA driver in endpoint --- .../dist/llm_bot_dep-0.1.0-py3-none-any.whl | Bin 24501 -> 19201 bytes source/model/embedding/code/requirements.txt | 1 + 2 files changed, 1 insertion(+) diff --git a/source/lambda/job/dep/dist/llm_bot_dep-0.1.0-py3-none-any.whl b/source/lambda/job/dep/dist/llm_bot_dep-0.1.0-py3-none-any.whl index 3945d1a9443b836401abe3ac482cde2b0bbc16ec..92567768c09def9bd5af336035268f8a2e391a78 100644 GIT binary patch delta 6388 zcmZXY1yCG8x3(AC;O_1oba8hNE{g_&6Ck*=K|^p@NN^`u@ZjzqT!Xv2CxHvxs{gzB z{;8VjuG7z%b52jyJKbH=1{3fF22D)~4jvZ(03ZU8QS9T;KyXh@rL-9W8U~8JZqDAi zvjZ9cPz>Efrh_~>lJ?*|{(T4^*WW+t@H&E@$bqu;&jAgsv9Xm3xn{WPggolbmXE8d z?fNQNL)ht%#(*_gTdC67TM=2Y4{DUb6A#>?eRu5l%fp9}MKTk$puCw}oB!xIdle z6j4aS#rK#eSsWLxQgnz9Tjt<}9AV(D^REety14j81$H>>FXC)O| zF`ZWNdO^5WlMm5sK&PRgFj8F62+dH8yD*t#di4{83&vqy;wYl{JQ&o$P*f{9p3~u& zi|^UyPTmQxmd!}7f&$2@RPfy+G7@oOGTwi7Vx?P>^t5WPVvtEaYQ!SroV8yQ1~+$5 zV{he>NA}uuw!aIGm;4!O{lV))22Az43-KIapC9Dh5ItPZf_m$t3@d^rC|rjCNUf6F zLCpu_N1L2lsUCP8=sr|M98;YT_YsF2L7|gtv!QE)L+*Tuo`HsV3pCXkTS`d+#M)DL zq(Ny$TlT!3w;~ZjBC9Yu^JF$p;C_uf&uKBwvzZ^K+TN$p3+2bokfmwu(Liq&xad$( zzk#$XsxGPS0Vq5T@^)MBjTS!d`>U zkpOVYCsZN{%>t)xDxD&v2$sv(3-OZ2&3`wA*{CPsW@BSK7sgCZ{Q?lF%^P z>|G-Sl8m(yy)f-DkFaS^aSa6vUyPAJLdZv43_)|4Yd7th5Uw~KB^CS*_pC{veyfwe zXP9cFn78T4eTmO3`-#z`tM69b@A3yX->rj|gUpjQuhOahkvfxwrn9qKjM0-eC#T21 zVk^zQPLC1n3@vM`3$q(H4u?y(OC4>?Pf)bL#L zpi$b0v!+a3_kfyQp6Nw)f3DU`NEP+nwbZ2N2E>pM@`pJ^EnG-I>z6Vb{Lqm}YsKTS z#emnJ-?O;s@jF1~zflnrhQ{&&3`66wvgAI*zN=W<1(kK<98R~@_pVu#|S(Nd=&(;%zY1LTk7U}Hf)i1QiJ&E@$! z76w&uvC|5fes}($DB#&cRw4KN?@zByibJeSBHtP4L%p#@)5S-;n# z6zD86e1X^}24)4OWlUurKtfS_XBoS@Ri$7Q3|uJcFC^_;(7ZHp3&66~9=|Z@UN6(v1?wIqRe}90mPM z0ZoI8hKs?>`6V6an^HB^``(2?ic)SB(br2{Fgn0A3dz|!Qz@SoNF@!v6td}^al>Qo zYQlFJ%NP{l%Fr=06|SL)t1D-d)nnF-L=I{eR7xxvMb!8U>LGcm0mf^vy!S72s=-j3 zG7_^mSf@y0pP|KrEnKy3uNRl@tXoaf79xFPgQ_6Q71^9+G7L2MEnrEACL4| z!Q##yyQiMDUu)I#v}%iRk$$mnm6-%knU#=_Z>#BABnIW%AJF#Z5+G0suT$GeKSf}u z8ISn51iSYM_mt@^DTXAGo`YTgM~$~4dexwS;h`*s(<})>$Z7UWg5n4|*vv2G6Hy=! z>TtJ?^y@D20`Qjt^&Y~SRuqj6S~EXN{Nkv<4j4?%aES(K#UH`!yM!>eW@Ogf(t4tD z5@tH#0Df^DK%1J?_TxNv~o3=n~#(eT)%}WD!Hd7nQ4YC-XJ1oP3 zxscWL;i4!Hp&=6%H5o&nhkc5og0nQs! z$#G&|;!7tx&aff##+>Bq^3O0b`Ycl`rPWe(v7?-&6ZG`bM=9*3#BKic-t^wWpB9R$ ztL^Y62aoX^Os?4hm;iB%y*@4Wo6z^t1HgAk`mZ|&E77ubk3OBS^l0m|7dus$3--fh zj(i4)LQ;42UJsoQ%z#zzVM5;yNFwFtAM}XAva$}7iBZrkE|2?ls&68fMo&mLEkszh z=WwVJp(cCybIwyRIKYa?*)H(Kn&tROk2d6ZZl7*AW4XVbtGpWsAQkwa>|H@7fC}bD zHBV%#x1%PVlQC05NJQIGv1e4FzHC^}y(@aN2hk{~PL)$4(Hwm@p>&+uRQ_Hky0$Sr zOiva{i;F}Py{)#k)lQ9u!XbdbnNGO?pI$FX+?4YQWJ2(0M^NQH?P!2BohW9nnN)W6 zeZJ;fo|DQB;=6qpgAIg_8?_;S@n;aG>I{G)OMxPx--c+j$~Y48EW|igvk1g@Mx_D$ zClG#Ck*jKixy=C6nw>(H(9e3VB>sZh4!RBA0_M_TZU`PwfeFAxy3rEjHGwCV?E3z` z5T473_@(7Cuvgd#F@% zg&7@Vu0H&&JZwI`8Nje*;uo$|tT>jkD#UX4SKRP>o?5p#y;4!!E6`!Ha$OEN9juUU z?mZH@oj%%#n{d1oT|R16V4pHtjY2gN8*rDe(!k?wl>Ay2G7`$caQ08Yeq^>q;yYW~ z*I^~j#12iA_O#M@%~MM01L)k-N6HDvu|@OKcxDU%&Zb)#8B%|;1X1}E^HFiEtRPc2 zaFuaVrzfqVc9{3wHrp>{()xz$!Zr3n7gy4}$V^%m@q~zmzab)xQ zqVkwyKOU_Q59*3JC#J?zGWOUE6R;K`@WWbh-SkSd#Q}$=+Hw5kGGaxQ9Y**vmI7KL z&b*`G2slRyC2u31S$S@43NkVOW{Bm1o@(xIm|MiEPo=yGj3E-bwY!*5f0AtDiRQTnr8%Q*F}j|Yc28bea>Fm*Rd zVRolXx2Xy3)#b?LNCUZim`xA~4)lX)yW~NsrnFTHlU-ATO+8(gm~bf`J9hlmc}pUK zkE>FN4;o#EX@J_HbvbygH+pVG=c!3Ft~Le}(*vDFc%@~cXp*cpQB|L7^l|pF_xc#g zFnVI6`6A)^VeKvs?7rx?AZWyd)XcJ2J4?zdqt)^Rj45{%8@;afyhVyyUs1Kb#3UEX z9;cv)%IHWpY9ElR)#da}@0UY{&QQI8V+*h14WimvTz9xCu zUs1+r3WmkonOZ(8z3LKPP&4YC;#i)B4Q8#{QW6Ns(wyaE?%LT7bE4Svcg~|%NpNR( ziI&IkM_(iRg@E#gUrBMy`@Jzz8Idgu5x&Ae5-XHX__UCP0#%zb{yH(Ty?%!aN=|?e zA?EODwYM#>XZ53gtDyQNV^aHYHt@{{h(e_(mIQ>T#5@0zj|p$tKCbDj8D+bV9KA(0 zo`?YylwRFF%p079JPX_Txm*TEZex1$b(md2&}U@DU#>$A+8G0%qc9D*D{Z*#j~4d4 z=#U9YHBIXUNz3^{DnF4AcsJA|$ixe)K+Jw-F|?DQoKxfOxqsejqzQv5vvVWnR`hQ6 z!rSTL;c1=?B9yw;d8=>_us>a??%O5zuEV&-C@;LwJXlobvyr3 zUw#Y8H*zDsFqt@RgZXEFReed<2M_nOv#M@oMn`fv$-WLstx6LDF?r>_`MK{clXD43*s@az!u6(SuMtL=<5M$PD57 zumYJ+qV2JcY~y~sai)6wJz|~z*R;uRS9C6ZJ2hD~H#tsgn(!c*w<*2Yqq>v_?}8mI zI(c83Z}5)pcgfk{l!gc5>jR_8uf^N>Olf@h)?+$qzw@r;(lXT%4~Hupe|5`X+>@b;Xa5SLY#7_0}u6qkUL!Cn~q|WBMY^(7u#vj@_aEQPCR; zF0??9+^nAeJ~q)%FQs+Nn*>3_8r00a>mQ ztXO|06|1`L1vPTeRYmNAES4)~V60{hu8&kTyXxpJd=M876>LU$+dI`0TZ$OAZeX-J zb!r#5Z`OsiT1Mh@=e;!pKXpa~w2S$O+bfl|_+_`j@batPF5~;zl0X1u0h0E3302?&RGK=fZ*PxftdA zomZ5$7dPdQcVaL-aU_FPHPMURrJ(Wh%J~h+53uNO@>IgH%y*DUrbkv1MS^{7Dg|fv zJ0?T!o4p_8^kD3J1MBjgi&j|n$~dLr@ma6Ti;HP>305#e_W_z3e?b`^gd_*2n4)PN z<~u=g=iy+dqv+DSkh@Ar5-xt~Pi~9D!r5`Fj=EY|wx9T$i0QQIVu*MF!Jh{EzrDY( zQ6QhzQ5!0lsfR*d@mF#kb$$yF6hewv8g^;SXQU2j=Np3ofrS zhu)S#C6fgH{6ZSTbmxe2%}^DNpLR9MUWo>X$uGh7W+HpN;?ZkvR&saq@ZnZJCPyxG z|0CFc4t=PI-aT#X?{!eE@J+C^;g{yi5LiFm|E6sFov9rnAXqi*XIOP}LXk;(U6`a& z<0UDych})WJU>3=1?GV*#|X{}-mq3WnD;$9$nP5NrwB zyu48QvU5{nLILYAi667t1qbIumi#-y;VLIcxN}H8xVpYfg~|<5WaxXUeF1ahb|*;n z+x<>({k&HT#iM=~c2m^Z8>Aj7m10iArgYv&+R$%hh3@1DxbkWe)e#UGbr5OURJ;AV zV=(gzlUN8#hD)*WYt|{^H|ZEWo-~j4(T7+6(WGZ7ni1=Sdy%44sjwF*qL+rkzI2*p zVUb@t9coFiShwU3x=x+ZZuw8AMe;BJ04-EWivpq|qa`6Np(SB4%84Dwfr&6CTCfT` z@KJ+fcx>L*)U=JB#hezr_#W8MqJ^Km_~?Ij6Ap_Ge+ehx74GN871b!w-kP$Ll}Wp> zmh)hk+B>x$oq0$EZM~)S!CALvIIV=&=b(qLg&Nmldx>!>OjLZsuTr+4>Z467HJ8<0 z5EYO7In4puo=PrzEucXz0Rpqp0ouq)$J@|@V1vc`h<;lgsttP1o97gg&6jJ4{ipKh zYJ8ny2ceQ$V0g|c?ipyN7BAeB6*{M-f&M(2Z1lsC3Jw71M}YEcGeIT?s#3&pKd$*@ zAfQ4~BUlNw*|I3Js@79Yb`@ByxjbtXl= z;(=fCl&ecK`LrGF>SB|1ZKjyI@jPO?a+X|ICkUdCR}VSN;E!EHH5JA4e?HlGVo_V_zTIN{wI~PlwKQk8>YXR0 zKq}XzO+$?EPkFXW?+vYB+Ea=F7qQl`q7nRavMlV5iY)T7h4!v*gCm+Y4P_QdK``b+ zIGn6l9hwQ3al??Yy9YCezljFCNe_XM6stVSLUx~rj=p*dT+cN8h(VG&4vEWrL){UU zb34xZL9;DM5#(MQk6$#C^G)q5F1P1oLo4{*hfKjxhJdc_eB1ILI2{$Jf5(GW;)4iq z)v_hhsrB-|S_|a^q1}D8>5+d9)RyQvJOMY2zb9U8KMdbN8Z4oU#i%Mr5#DhZpNbX} zlXHJ{epl99JXb^i5~JZZ-8H{Ye2fLEN{ir?a7jBIKdKr+Y1oH{MDp4dajqMd*32~t z*HOx)2;Cj3B=1P>HvhoanDl33yl7wqdi*gfW?`Z5DivAT4H}(4++%sII6~o(8oPO= zARxo(;r90j1eH0^xXQ6WS_8#f;x*UYJM3D5%gGr(Fx2nPF{u3mO{i*T4oa?CM(wZ4 zUrjR?It&%nH^}+*LJIC_p7==De#?&6rlkin6t`it@yk)ufYe`UXj=mbN>aO1`@m1R z69_^r@39-BETPnSkkLqWg4wt5SFkH*D?Ul93a>baRm+_PEL;uis^=pbc;EG@-(hZc zbaf+wG0wqTE3E}32C6h+6wk{Y7+74m|97CQ`=k{B*yqR7Mex7P%QFvZ0%jz8TJPur ze0Js75deT+PfmpY5P4uG%70FwSpWdi|4Rgh(r0D zKFnC?_XHpS;Pq4;@juDsQ9%O?87co=$^if*|3DuYpF)uRQ^9k|9k#GLkx$x#h4kMJ zyR0ucF&g2#Xtt=)b55lkFUEE$M5vb$>dT}p+!xS+pJu+`X=*9{3CYj-0yh~Eynvb5 z^L8UoVKAQ(`?r9f+%F)aG4@OOWX8{-CPiM1YQ{K}|KAVcpD+&MP-QK0Xtpu#3;3+H zhEKrrP4y3`E%5}>KyQsvUqJM)U2f=4z|*$_F#Q9bK7-^?NfV+M@IU$t08sxQ@JE1* aBve?N7Mf{-h>i`w2FN^3!8e&_;(q|%{a$PU delta 11727 zcmZ{K1z227vi88>Zo%E%9fG?%0fIXOCpZiY4q=eNB?Jg=A-Dy1hhQN%!QF#i^8I)B z+ugf=KhK$-Ip?kF>Yme8)m0Ue3RB+!gRY?rgu?>>07w9LHH}1cUf_FA>lzsgcu=8= zgFgS&LFYXj0C0!`01&=_-o0}&wQ%t?wX$~Qu&{Qvv~x0dcVu_<4b(GoS>neFny(g2 zFzQ6GU{mrl0$4I;HlS{MN>1lvN`9KgvGz!niObJya(cS|Cav@}r%nWhbnmVBN0P!} zLlWh16L#HtH1IK%VU%2B)-552@4RWG>mXDU8-h zD&SAadRE;ry~AJG!%{eJ$-_#kDk|Pt2-KsgGggFSE7R=j`e22zFmVg2=qVAKPtPI{ zubxl=lII3LYsA4H+#ytW&gvb=SRZZ;K_{7BahHZ0bHx%lHaE}GJNiM3Ky1q9{5iMe zgy9CZcMVG1ISq7XFqVW7T~XUw=_ne54lT)Zj9Ve&WqTwMAltp8oX%1!>1KXj0vo0? zBQ~RuNROPMd}KMu?p1z`+-H6)N=~K-3P;bmBE3E4D9KSVDN;3Ux!igKM0rXZika}l zaAsXPKt3`=aug)DGhO8gD$)#7Jw@w<*(<;vUKf+y-)pZ(P$1YA8nk~qY-G-jnKbeQ zTjwI2n~Q17kG{;@n%}`3Xf%jcEFz1pqv=!4Avu0~q_0^AkrxK7QTf251gV@6JP2l} zsXt)mZV2HRC>IFcj7iJuPdB_j6&X|`hE4-UA8Md<#lk-gcl{U>#D2LpLv7&sS%%_4s zPco)2#NT>E;_(ISyiq@?5E~we93NfyVq}l}mqZ&bPeGS2b zYC!t)(2z-04cf`i*>#B)wzZ<@)obuAz9dj2hX`Gx`u7ce(n=hzch47Fy{K?juS3y- zT8!(Ws_8|(OpR)p!`lM!x#^ooj&{G}PsuPdo z-{-^oj(8i_*YMK03eJ703ZcGjpfKe zM~40`Kl`ztpT+vXuzqb@GDB)V5n#tdsF|WOzw5n&k<3FHC2#hYkCo?stM!!?$(20r zh_*6xPT>M}`tul25%(sZ-YvW6kz3f{2|s7im_5xqnM@oED+{+Ee|$VTK0Z4BVAHHk zyEu;ECUYAcP8D6?nsq9wxWP-aEEr1xqL$Atkk7H~jaZe>HWiz8z}@$C+eg&Qwc;YO zGU(p1v3Xe7#8?J{r2W_-q#Weqmp$2IVZ4^2S!ee=-*1X1dYA2m3U~%1){_H1;@7H2 zc5x2c;rC*@z)4d{SLBBA_Kgsc%lCN@-rQ4uC{lPM6Dr3M7fHLMrlB%hOXy<&$`U0z zRk@Df-S-C)%V417_A&KD1`Ot_Kud1GzIZkyas7CT(*Oi(U_@;JvCsB^@0`Y0LE~Yd z3l+pqaus8d9#uqk;yijly`fhyHA#)CZN_$SCV$LpISM1=puzPc>xvn6^tm~K+$OL2 z8kdwJm8~>ylc?Z8@M4VtrDiB-liZgIv9EwT@#_S2Dwv5_wj33hSTtppm=!FRG&;7x zDd{iSYHbk5U@)xEi5w56H77d%x$c%u^V=B;rHXSWb}%2*OF#!ld6qUN!ml8>h(14D z#N~utFIM@yMJD>C5-a7UV&H}DYrlniT9bT63kfTpigKw*2D*#~xN3vGzy30?c3X5eLPZtddJRc%*!C0;HrIqS;w;tG!z^{1N?LA` zcV2JUy;_g*bmx^}N0&esX9GnjrOaUB8%e%w29e?|SMJC|8561+6?>=nIJZ`Jdd!%cq%= z7(Y6DJ`TrmR3CzTYE-Qz#%?Ak0?s#fDa1dpo4)q>9XYkSviv;KSRk-M9C$Of8@C#_ z^03i=Q%&A>t(A3X-0FVOS1tei^tNhmZAM>?U^RoY%Bs=(=ppXr$@g@MAgOQnaRQ~a z^T$-C!*QB83CK7=H!zX5H74)36C3?3>5~DWc zL6-6~2XD{L@i`k>XeU7v{~A^7l<;C^VJY1a>-NGRzKOlbd%K0-*1THvv)fLm<1{}L z{(+4oKKJaKca=h9a7$Z1%`i4xNx>WyMM92>*S9wd)1Zw~D5`pMaq7o6V!p6XFUf@1E@PzM!|Q`=_RzPx60vVab|hjATY)Ah5M1RE8UecYvy^8I!|OhObblV`Vls$Tp&;jSDHE|nUHozHZ@Ty$^5W3!LxBFAQ^pH5Lp!MAtBn| zck9& zP}+I6WOMw+Dhnk49va}8L@l#)sIbWgP8xVHii#x=MC z;28_SpR-VDAS9Yp^;|ldj&Q4Hc;zWv5VeXQYQizl^xN*mlkJY-6H6Zz_b1+hQ3h?0 za453abiAf2%T5ZPiA9Y_L$})&R2!;wwb8G3=H?u#IK= zND%L0CkDOE7mu3w(BP+vMhUHN(Ibk|r)0k-^(mLd#jwJV58k44Tk@_s76f2#?qdFL zs)^RYgb7tO#Ce@4bG2-Nifh^`le?fvSqVC~8?;a)8&1Z2cEneemGMI1$g+;7_LDu8 zeU$9bp%|O#j(!9@`$`Q^@i<0~l-NL>QdT-vz2&z$hlji7-R9jPQitN^9OLqIJ+H>W z(61o!U;w~RLA=PXS-?zMiJwq;(g9l!;)K-hApWxqPv5@1q`xc4UK}%kp9Tbb>zQv^ z&pI+-RQ!4mkq}IWO7|hPisT;Y%NI`Xhk$fffB73^`9zkw?PU5NvnISi{BYl4v%!js z^0OyFH{4wzt(k8jc$P=fesiz-cp@p%V!r@;AY%~Bt-%n>>CmG=^TAAW%5u*xW8C#l zb{r$-l6QD5E24%!(*4MxCg+`_K2#*ly>)nYgo;M0f~6^IG!Y^FJshEbLr|U+&sCK1OIC}RQ$rbUNjS8T_zhG zPpjcTh0r<6(yq!0hl~g6+OmrHMm996XK54}H@h#bsopg{;>ohFN&TgkuZ&`|rf7EZ z?EsCCj@B?Gi_hqHjV#`*HH+XN+}8Jqb5VF0WeH!tjFax8eHte;)DtR_n+7*A7q2&5 zOv_2h3@f*P&p71NULKbz1!QCz8&i4VFz%$C90>Iq&zd)BGFzvE1VswIQAEX&fsf(~%8r;OT7G$u&PGSr_nX4JjJZ|mL1T71M6 zvDC{JhpI%Id@8{jFnRlK1Sqb5pdRenZ|H9XrF%1R4Tr^=(`JZA_m zD!=ZXJQuix5g3}z4qR{O)wa=W^)p6zj?=a&#$H`?#;s^@f)FIC}$(WSD&k5P>M* zAgp8Ufn%znPCI;jc>O$!ox6&H3VH)y#H31BnK4_f&n;2h#_ELKB^J4O!Bj4J+{gYL z1I5FZ0kzu|j3a4p zA=7g>>Qre1pp@MnB%<5<#k@qx_HWp$Xpa2_jokJ}@DRD?s$_|50!%;UrJ+vJ6pu=D zBDY5?BZi9p&@k@*&bcxR8^-$em$Fd!C+&Fw<{n#{C^w2NN%zn6s>xm)9uNi0AdGdg zC3sXdL1m>$zju1bRYVRfL?FeXbXKT<=xGVd38oHZ(sjy*&L&$PQF`gtGNwFjbs0dW z7C4uZWsfnTnQ@=Tzro3&*oife)l5-6_2Z1@;XJr`tt-hQkQMwStaqif=c2{jwnXjC z(I*jn15p{4V)1LBw->ez&TvBF!mff3!!?cVhxC^Z8DY(zJD1~u%Z=9GzU;*wA` z*;vfj{P`d-k)1;rBcb<%qeH`Ewf-^|`@KG?Qu%4*3(wa7PBB4J2t zEvdYR$xZKH?ex9kYn#zti$#$r_tZblc;G#0DVuozN+yeX!=KD)SL z90;Q2c68jKgnSMj(*gaaBI=J9lH8PUUa%sxGFsjzV1G~aYf}BKZv2#3IV|cI4-Dmn zZ)%hM#~p=KTv3GfO~OV*0#7@+R1D%o`Q$+lwmn0}Zx)G^XFbv#G<9=Y=$xs^i?kO# zNGjD5?+fOWCXDSGXunlD&kBtky*D~2(f4_l4k_!(2$YBBi6#o&b zvi9+GH@Eb(cX2j-?`i+e<0WDhr|W`p+|4{5y-a&V~}M5lf*xGXRQDX zPgUdBSHx2&Q`Ot*I5{ry78!i#;_vehJU*r>ER!Q)P-T#E^(b*p({2eqZK~E;u&U97DeJAR1+AXQpYw*ejC$*tE?~@KHDnEJIms(@= z)2o_XB0MGFJFkG6Hga%d=Inu?;ayIR7-04|D~8J9mAw{?`;ssh4d?ruk$4KRK2}7z zclT}3*G()z_=WatEDAFqL8Px*hAd+HUL4Cs9>!)$Ep-L+!F^6ybkqWb^>rLxRJg>0iw);i0}b9`V}Su zw#%pbgxC|rl103bW#2t;TgtE+ux+cqnNuz+u8CExC(gQ*Ptq=jb;LfP(dkGBo@3Xx zr3`6Nsw@c@CO?P;qRcM)AXjELa?Z4O9|7`Ul*ueJZ03z)P408(b)~`TNx6MLyCa&v zX5PjBRN@#la5;HM_d_cw_Qsg%bb3=20lLBxnC(k^wL3GGtZuUqGJr-b(Bo8OjkLDs z#k;iq$yurM`rzzP)HQ8=rZ1U}T9p5^(OH)MpbP9S))*-V;zO;)9C|B`5mD&m7L_kZ zbV{B;j|k%O;0koCF>UDKR(u0uP~2EOT3wz1eBSbLlva^nQdV8-J=^W?57{GEAl z!iTL$XUx6r>U`~G5{ze7jY%2I+V#&|=YJ5J$C+i`#&G@7wV=moj zc1Uy*=9_%=gUXD+-StN3{i&Fb zZ-b~A;h4`i`l8BsTWcJVzN~{eCkA1GGy#GK(hovGpyPz;##~j@SY$y6?GQ#npelOW z>lq;#Wu_my;en)GNx1Y+t6b_Ux36Y`6ZASs!=47iXLvQkKBCz0DjljpSKwvxVki8xQ0%j*m-dG_n$cG%U`KPEQO=FghB z@^VA2+&A>IaoAnM3_l=B8;9+Vd|y^m_{HaC5X?9pc>X!gTMR`CC5GJhKvqAn>i4dv zR&3gU+m=eULWKjjXnJ%G^1kEscNmClcg-&bW6()}pZAiqFoQbxQVto-4jk%zCe8kz>zV7tQ@J*ch0`)16=7w*Bz5BxO)HJWQ zf-k~lEnpz~hS?n_iHEEOOWbn1i^y>J6a|#qQ)L~SZ?784*H0#UWA+S7-(ZAYWFO9( zP_&L95ZLNt-9mrrgdh^qZaIZ&Jngt?K)}`!?G(SV8YI->@_^$lOm=9(XWkHx=pp8- zOF)c>5Wf(2yJ#UqH24hoThD*7wAQEa?+wyN1OP5xauy{2tLC97Y#5+_YW7M(N0+6B zYyTklb;>VrgCEHl9PvlV8mx@{U8eC=k7KCN;gs^7#*<1Cb$;>=f>wnB7?U=Rie!-& z=zTwma*$^dq$Ff2H1r&?5{%Gsqy?4ke@>#dRytz+%%~uH#k_;;w&wk%4iKmhld&rP zs}wRwq<KMuEZb3I zbtYT8dAzqMU`&H#%XN=a6sV3ppSiP7*}+}S|vCPBc()Hn?3;8Xas5-Tx=P(k*!5;svn z1WHKv9+ALxHm(pM8lX*ioG^us`#ZSJ*3exesf|hg6Ay9h!(BAK?l_(SlHz#DTkc!- zG`{emX3*f9DvefSp23Oe>7*j!Mn*JLYHhQU!ohvK0z`pgPEuEj-ff-Q4*^*2aas2- zKjl^wQ!l`2c)UFMHk_oF!Pz|RI2nM=<@FU9v|^0rBr|x?1*X~Lhkm6G`V3E45d;1c z{5m$*tm&*@9$$Z8csD*iP6AYv<$qf}2vV*Y0vR9_35H*;`K~DV!1UDKUZrl=hhWw0 zo=IlY5FX1!nzxZ7$5{>|BjU4%KL$=N(!ER#fYe zTIgffN@{s>ZsDxea9_N6)b7}Pl^HHHCVvulLq8KeP!}=w+qQgd0yz?|=^+0^{pR&u zBe~#a<1DsA8&OtN@Vri25#fd1>bdC(chJ0zzKWDf``lmqq#0&9ik^XHa60M`{RMMP#of z24oT9&4wLtG*t^lNfPop;{<83QEPFkCfXe{)YzDjEBrX^FVF5{Dae>LfO%#pCd^X& zfPj%Oas8b(Y2lsp!9NruW|`1c`+X zp25=?;4VtVcu#?)iG!Q!Qt9*YsRxz@3-aCw_8Vz(lsdDQn+f*Cz9SSCwN;4~>(8Zp z_onU=>S^hkqa}FdW9lTL`~=@n3uo#gQuF3CIrMjSJ_(~QZ3v@fid8Ao#!${sS(4M! zQIIF%^H;&fV$B--R7(6`0V5w9M?m;ka)BdAU1kL1f?x6nJ+b7B%}83Fk=pmlS6lQE zTeniMC`RH7yB5RM| zWA^X~8Hk>;W6$x)pMH(YD_#uPe^22(sUUT;jO))NqsyjiKl(mXx1@*8I!I?g;G&Fp z%r3Bg(eLDeRUJOqk!SQ))v^k!SD8b|m2YD~6fU!Tliq>ons6U;yK~)Ynx`Uil%9&H zJwQ~whXKrO#C;z)Q>AUXdkW&XabLiRNVz;4)4ankt;B@bB6Qd1u7tftz(O;VVQqYR zkf8;AtyAO_*P!abuXp2`k?wLEF!*TTGZKnCyI1zB%LZQv>qn6X(>pyM8k3>St3pE9 zwrz=%PtmPh`a8|6JAG-)i-)Nw7bZ|@ftbIr7i4t9QWNnCzfW^>& zPk-|!%=MD{#F{+DzVkxRr(fS3&B$b%a5cLc2jwq{)sI_S=ic*& zGBRIbzao)bhlTJWkO&okCj66I{B-6;4~}W2Vl!M&Yw{&qdwAQTM8T%_Rr+I-%Q$rBfBc>;J2PS!dIGz2RMr%Qw3-^}Q3nSMiCpjm^7@~@F zc?s}jQkY3taQRVZ5}l|37`1`h_gG=;S7%eIk&37t_G?^D%ci^_w=!qoZLjJ&d@8m< zkLCMCr%o>|2^o1M24>8iI<6<>oy*@wS%Kb0A4vqZRu{Ji4%i@KUrR!WfdtZ_G0=ZaT=Zf1 z#Y8i~006}QaUy%~?BVJD-qQ2EyYkOK(5J49}QPw3z4uxE!1X| ztG6ClgT@`P1U{@7vl}rW2X=g(m=~3h!nlq}tRL|rjcQdSBAZ2UV{G=i-I_bFyMHs( z-T@+=Kz+rcA`~_#8C5^Ex{x0@erTBx9mGg>ymjdppsfsHCOys)))435u}9mK=x3xN zr+>#K=|&!)0+-kYsaNw`^T>pEPMygEJ#JN*<=u6?`N8M?n(PPD`j^dfKge#g0-YWm z*`jUp-sMuR*%E&|XEDb;2i?>EdMhsu3KHPOU4gPviUK>;9BLgYrD1?wWHz-mR6__D zFL(5Ntc`>GfdGI%QW6_6$jZgi=T9AluByuxKU({}J`y`Z4FTll%WFKKN{AOhES!QD zCdns8uta%{eLU78H?0u##d+aiGI|&I)0kA!ZTGJJ47UU})h=*RsBIO9fBfE{~0*>IdW$Y_lM z@XTslbXg9qLl6Q|w0To)tEUL>yLAOh|AB#YH`s}~EL^dh8YmkbhXXx|<_Mg`Z;|n$ z5sn(QpEDXjVEy?$8 zfkO=69@; z_H4Ka>XROMcfYy{CxQ=DW8YRVF=8Pg7hS>3lhq9&E7rzq(^qs@WTP#4w;HuzwQ@$E zODx)}0a-I1xEqFT2uRzk&Z@UR1@GrnT&T_H%KYF1sla{^!5YjB+lMK4i%ctwz(*Tz zO)_3yUa82AT#Ru(L3H?v0j_uqpn*~h4sjnM^PBR%X5G<)kV%lL$?iMadcw@Pf2Y0H z?iUO25k(>OwsbKH><#Sh9oHmXOtj0f6NU)jQrQi;=_ZOr3xHfQRhyfC{B#cL4GqP1 zDkV_^O~7_zXHIe#=bA!!+gG2i*WB*E`M~Yi3Tz;7-w=deTLg=Ck93Dkm*}@(IkZ$0 zP#Dhy_O`K`>RYX{n-aWTIqw!TMuY#F>+WY;(*KlBi^@(UJBCj+y{m$|(`YT+*m}cW z{78DaekQ#M>hKw=L(~@}C`l@?_7QEC<|Mns_f)VyZXA$*hW%Ty{n@rxtldE1pMBiI z8THpbmgR!N0)rLpYnOOnQT|eQYp>H`vD;n}{t$^~mE0FK$MK@>XrZo>6wo6@B#?DH z7bch!OYY_gMULzT5;qnOzdjl1&xz|WxgR|Te7ztJs5DVWn?T!13@irRB~ZvWCU7?0 zaGBD~bj)K?9p}hZ6BNK*FH|ZL^wn17ZUg#`zIRG(%@WzJD;;Y=&F^<=LSwXIQUqUp zf;DqEDb%$giO6`Ufm2vN(!4}cKZMaAOa7Zw%b-VwR#Jkgi4s9U|G3Mf=h4XLnB++` zV^TR420k;I?eV0tt@j^xE$~VTJ>t=d2;dpGj%2_cedRJvhnUK~`ij)A=jJ}LuMtX) zzi~*eAoeSnx!e$iiYwLU26R`HU4*GUr{vM3k%KjD{E7B-m%!%7agMc~UZ9(KMLPN7 z{el@XU-#+g{m71Uc7#yIHcVSW4)JkxdRIAnWfLXyQb>W}Q1|#EscjtYwVZ38g<=IE z29?jXbn51_(X9(_Cc`bhU4A8EVX1}YyV{%*UT|d#rIKgfrK>sGTaZ#5cR+2uW0-A5 zycPF~N$&zR>NFv|&$D?OEGbNJS&6*6`Q}J-6{zDA4;@gZv;W z+g(lOZZ@aUAKU}JWM^$gJA4iAs#!HKvpvCZ!mrV+b=`<>v``LX`EU_TIy}CS2nubgf?sNN>*;T~su1l8 zq%Gdz^W~x&8+G~clMZ<7pDb(gp{c~`q+#`=hMYmKGM1M9Ky0~X+#2uihHS>`aFDsF zy^xySqV;KGp%0kVr(59T&tlER?yG&%J%z@BrdiCk!LLQLXuoJJRBOrWA#}65=RX6^ zs~b8r@h(W{@j;VDKKk0{A+Qc0qJWz1VAVP-kc%Co1<4>izj!<}zR=Th=g?DC{^+*eT_y(N+idIV}H z!@%MJ|5+ER|Dx9buzx;Y8o~eT5dP}wpcAr;WG^+N`T+l3d5)I={L%~6pIXs>v^prB z9Fxs|t0w&?A|(5T?*CU?`mdt=-@>YY@BjenLdH;D%B+~3{#!QSAF#R*{}{8#QT&MWkb{2#Fu`ThcI%H#aS>2F8& zKRLY;{R_yVfcwXGLM0XcP!Ea!b<OXp}E&uTN33 zDQfXT{P05j?=!E{_-n*$MO@1NW6=LiG<9vLnK^}hmPD0QLcQndIQ%J6^fJ$wK@ N!2abMzri2W{{UDUP-p-E diff --git a/source/model/embedding/code/requirements.txt b/source/model/embedding/code/requirements.txt index 9f6724c7..453cc3a1 100644 --- a/source/model/embedding/code/requirements.txt +++ b/source/model/embedding/code/requirements.txt @@ -1,3 +1,4 @@ +torch==2.0.1 transformers==4.31.0 accelerate==0.20.3 sentence-transformers From 81e859a5d94acc4be10fab23adce8df8e1362897 Mon Sep 17 00:00:00 2001 From: yike5460 Date: Tue, 14 Nov 2023 15:33:32 +0000 Subject: [PATCH 27/37] feat: add intermediate result to s3 for observation --- source/infrastructure/lib/etl/etl-stack.ts | 1 + source/lambda/job/glue-job-script.py | 64 +++++++++++++++++++++- 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/source/infrastructure/lib/etl/etl-stack.ts b/source/infrastructure/lib/etl/etl-stack.ts index cd98a898..9ade659b 100644 --- a/source/infrastructure/lib/etl/etl-stack.ts +++ b/source/infrastructure/lib/etl/etl-stack.ts @@ -190,6 +190,7 @@ export class EtlStack extends NestedStack { '--AOS_ENDPOINT': props._domainEndpoint, '--EMBEDDING_MODEL_ENDPOINT': props._embeddingEndpoint, '--REGION': props._region, + '--RES_BUCKET': _S3Bucket.bucketName, '--OFFLINE': 'true', '--QA_ENHANCEMENT.$': '$.qaEnhance', // Convert the numeric index to a string diff --git a/source/lambda/job/glue-job-script.py b/source/lambda/job/glue-job-script.py index 94e51834..3e472bfa 100644 --- a/source/lambda/job/glue-job-script.py +++ b/source/lambda/job/glue-job-script.py @@ -3,6 +3,8 @@ import os import sys import time +import json +import datetime from typing import Any, Dict, Generator, Iterable, List, Optional, Tuple import boto3 @@ -29,12 +31,13 @@ os.environ['NLTK_DATA'] = '/tmp/nltk_data' # Parse arguments -args = getResolvedOptions(sys.argv, ['JOB_NAME', 'S3_BUCKET', 'S3_PREFIX', 'AOS_ENDPOINT', 'EMBEDDING_MODEL_ENDPOINT', 'REGION', 'OFFLINE', 'QA_ENHANCEMENT', 'BATCH_INDICE', 'ProcessedObjectsTable']) +args = getResolvedOptions(sys.argv, ['JOB_NAME', 'S3_BUCKET', 'S3_PREFIX', 'AOS_ENDPOINT', 'EMBEDDING_MODEL_ENDPOINT', 'REGION', 'RES_BUCKET', 'OFFLINE', 'QA_ENHANCEMENT', 'BATCH_INDICE', 'ProcessedObjectsTable']) s3_bucket = args['S3_BUCKET'] s3_prefix = args['S3_PREFIX'] aosEndpoint = args['AOS_ENDPOINT'] embeddingModelEndpoint = args['EMBEDDING_MODEL_ENDPOINT'] region = args['REGION'] +res_bucket = args['RES_BUCKET'] offline = args['OFFLINE'] qa_enhancement = args['QA_ENHANCEMENT'] # TODO, pass the bucket and prefix need to handle in current job directly @@ -55,6 +58,48 @@ # Set the NLTK data path to the /tmp directory for AWS Glue jobs nltk.data.path.append('/tmp/nltk_data') +def convert_to_logger(document: Document) -> str: + # TODO: Convert the document to a logger file format, customize if possible + logger_content = "Page Content: " + document.page_content + "\n" + logger_content += "Metadata: " + json.dumps(document.metadata) + return logger_content + +def upload_chunk_to_s3(logger_content: str, bucket: str, prefix: str, splitting_type: str): + """Upload the logger file to S3 with hierachy below: + filename A + ├── semantic-splitting + │ ├── timestamp 1 + │ │ ├── logger file 1 + │ ├── timestamp 2 + │ │ ├── logger file 2 + ├── chunk-size-splitting + │ ├── timestamp 3 + │ │ ├── logger file 3 + │ ├── timestamp 4 + │ │ ├── logger file 4 + filename B + ├── semantic-splitting + │ ├── timestamp 5 + │ │ ├── logger file 5 + │ ├── timestamp 6 + │ │ ├── logger file 6 + ├── chunk-size-splitting + │ ├── timestamp 7 + │ │ ├── logger file 7 + │ ├── timestamp 8 + │ │ ├── logger file 8 + ... + """ + # round the timestamp to hours to avoid too many folders + timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H") + # make the logger file name unique + object_key = f"{prefix}/{splitting_type}/{timestamp}/{datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')}.log" + try: + res = s3.put_object(Bucket=bucket, Key=object_key, Body=logger_content) + logger.info(f"Upload logger file to S3: {res}") + except Exception as e: + logger.error(f"Error uploading logger file to S3: {e}") + def decode_file_content(content: str, default_encoding: str = 'utf-8'): """Decode the file content and auto detect the content encoding. @@ -214,6 +259,14 @@ def _aos_injection(document: Document) -> Document: docsearch.add_documents(documents=[document]) logger.info("Retry statistics: %s", _aos_injection.retry.statistics) # logger.info("Adding documents %s to OpenSearch with index %s", document, index_name) + + logger_file = convert_to_logger(document) + # Extract the filename from the file_path in the metadata + file_path = document.metadata.get('file_path', '') + filename = file_path.split('/')[-1].split('.')[0] + # RecursiveCharacterTextSplitter have been rewrite to split based on chunk size & overlap, use seperate folder to store the logger file + upload_chunk_to_s3(logger_file, res_bucket, filename, 'chunk-size-splitting') + _aos_injection(document) # Main function to be called by Glue job script @@ -225,6 +278,15 @@ def main(): for file_type, file_content, kwargs in iterate_s3_files(s3_bucket, s3_prefix): try: res = cb_process_object(s3, file_type, file_content, **kwargs) + for document in res: + logger_file = convert_to_logger(document) + # Extract the filename from the file_path in the metadata + file_path = document.metadata.get('file_path', '') + filename = file_path.split('/')[-1].split('.')[0] + # semantic split based on title/sub-title within loader per file type, use seperate folder to store the logger file + upload_chunk_to_s3(logger_file, res_bucket, filename, 'semantic-splitting') + + # the res is unified to list[Doucment] type, store the res to S3 for observation # TODO, parse the metadata to embed with different index if res: logger.info("Result: %s", res) From 3659a2e0499c7306f7f5736962ce552f3b97ee9f Mon Sep 17 00:00:00 2001 From: yike5460 Date: Wed, 15 Nov 2023 10:52:02 +0000 Subject: [PATCH 28/37] feat: initial script to benchmakr --- source/panel/benchmark.py | 187 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 187 insertions(+) create mode 100644 source/panel/benchmark.py diff --git a/source/panel/benchmark.py b/source/panel/benchmark.py new file mode 100644 index 00000000..1497bcb2 --- /dev/null +++ b/source/panel/benchmark.py @@ -0,0 +1,187 @@ +import os +import logging +import time + +from typing import Any, Dict, Generator, Iterable, List, Optional, Tuple +from langchain.docstore.document import Document +from langchain.embeddings import OpenAIEmbeddings +from langchain.vectorstores import FAISS + +from dotenv import load_dotenv +load_dotenv() + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +metadata_template = { +"content_type": "paragraph", +"heading_hierarchy": {}, +"figure_list": [], +"chunk_id": "$$", +"file_path": "", +"keywords": [], +"summary": "", +} + +markdown_document = """ +# Learning to Retrieve In-Context Examples for Large Language Models +###### Abstract +aaaa +## 1 Introduction +1111 +## 2 Related Work +2222 +## 3 Preliminaries +3333 +## 4 Methodology +4444 +### Training Data Generation +5555 +### Reward Modeling +6666 +### Training LLM Retrievers with Knowledge Distillation +7777 +### Evaluation of LLM Retrievers +8888 +|-|-| +|:--:|:--:| +## 5 Experiments +### Evaluation Setup +9999 +### Main Results +0000 +### Training Pipeline of LLM-R +1010 +### Generalization Ability of LLM-R +1212 +### When does LLM-R Work and When Does it Not? +1313 +### Using Different LLMs for Data Generation and Task Evaluation +1414 +### Scaling the Number of In-Context Examples and Retriever Size +1515 +## 7 Conclusion +1616 +## Limitations +1717 +## References +1818 +""" + +def nougat_loader(): + # benchemark the nougat package + # nougat ./2.pdf -o . --full-precision --markdown -m 0.1.0-base --recompute + pass + +def llamaIndex_loader(file_path: str): + try: + import pypdf + except ImportError: + raise ImportError( + "pypdf is required to read PDF files: `pip install pypdf`" + ) + with open(file_path, "rb") as fp: + # Create a PDF object + pdf = pypdf.PdfReader(fp) + + # Get the number of pages in the PDF document + num_pages = len(pdf.pages) + + # Iterate over every page + docs = [] + for page in range(num_pages): + # Extract the text from the page + page_text = pdf.pages[page].extract_text() + page_label = pdf.page_labels[page] + + metadata = {"page_label": page_label, "file_name": file_path} + logger.info("page_text: {}, page_label: {}".format(page_text, page_label)) + docs.append(Document(page_content=page_text, metadata=metadata)) + +def unstructured_loader(file_path: str): + from langchain.document_loaders import UnstructuredFileLoader + loader = UnstructuredFileLoader(file_path, mode="elements") + docs = loader.load() + logger.info("loader docs: {}".format(docs)) + return docs + +def recursive_splitter(docs: List[Document]): + from langchain.text_splitter import RecursiveCharacterTextSplitter + text_splitter = RecursiveCharacterTextSplitter( + chunk_size = 500, + chunk_overlap = 30, + length_function = len, + add_start_index = True, + ) + docs = text_splitter.split_documents(docs) + logger.info("splitter docs: {}".format(docs)) + return docs + +def csdc_markdown_header_splitter(): + from splitter_utils import MarkdownHeaderTextSplitter, Document + markdown_splitter = MarkdownHeaderTextSplitter() + # construct a fake document data + data = [Document(page_content=markdown_document, metadata=metadata_template)] + md_header_splits = markdown_splitter.split_text(data[0]) + for i, doc in enumerate(md_header_splits): + logger.info("content of chunk %s: %s", i, doc) + return md_header_splits + +def openai_embedding(): + embeddings = OpenAIEmbeddings() + return embeddings + +def faiss_retriver(texts: List[str], query: str): + retriever = FAISS.from_texts(texts, OpenAIEmbeddings()).as_retriever() + docs = retriever.get_relevant_documents(query) + logger.info("retriever docs: {}".format(docs)) + db = FAISS.from_texts(texts, OpenAIEmbeddings()) + docs_with_score = db.similarity_search_with_score(query, 3) + logger.info("docs_with_score: {}".format(docs_with_score)) + return docs_with_score + +def run_embeddings(embeddings_list, docs: List[str]): + results = [] + for embed_func in embeddings_list: + start = time.perf_counter() + embedding_result = embed_func.embed_documents(docs) + end = time.perf_counter() + time_elapsed = end - start + results.append({ + 'Model': embed_func.__class__.__name__, + 'Dimensions': len(embedding_result[0]), + 'time': round(time_elapsed, 4) + }) + return results + +# main entry point +if __name__ == "__main__": + + # Preparing loader, splitter, and embeddings retriever list, iterate them to create comparasion matrix + loader_list = [unstructured_loader] + splitter_list = [recursive_splitter, csdc_markdown_header_splitter] + embeddings_list = [] + retriever_list = [faiss_retriver] + + # load + docs = unstructured_loader("paper-01.pdf") + + # split + docs = recursive_splitter(docs) + + # embedding & evaluate with dimension/time + # In compatible with OpenAIEmbeddings + texts = [doc.page_content for doc in docs] + embedding_instance = openai_embedding() + embeddings_list.append(embedding_instance) + results = run_embeddings(embeddings_list, texts) + + # retriever + query = "什么是思维链?" + docs_with_score = faiss_retriver(texts, query = query) + + # evaluate retriever + # evaluate the retriever + # from vectorview import Vectorview + # vv = Vectorview(key) + # vv.event(query, docs_with_score) \ No newline at end of file From ef99b56ea10070bba1bf2aa67f186e7e185668b4 Mon Sep 17 00:00:00 2001 From: Ning Date: Thu, 16 Nov 2023 09:46:45 +0800 Subject: [PATCH 29/37] fix: fix file path --- source/lambda/job/dep/llm_bot_dep/loaders/docx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/lambda/job/dep/llm_bot_dep/loaders/docx.py b/source/lambda/job/dep/llm_bot_dep/loaders/docx.py index 6e4fc36a..793bd2f4 100644 --- a/source/lambda/job/dep/llm_bot_dep/loaders/docx.py +++ b/source/lambda/job/dep/llm_bot_dep/loaders/docx.py @@ -80,7 +80,7 @@ def process_doc(s3, **kwargs): random_uuid = str(uuid.uuid4())[:8] bucket_name = kwargs['bucket'] key = kwargs['key'] - local_path = f'/tmp/doc-{timestamp_str}-{random_uuid}.csv' + local_path = f'/tmp/doc-{timestamp_str}-{random_uuid}.docx' s3.download_file(bucket_name, key, local_path) loader = CustomDocLoader(file_path=local_path) From 9621d644c65b044d42d018c25230ee822cf9a71d Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 16 Nov 2023 06:00:50 +0000 Subject: [PATCH 30/37] chore: move nougat out of glue script --- source/infrastructure/lib/etl/etl-stack.ts | 2 +- .../job/dep/llm_bot_dep/loaders/nougat_pdf.py | 161 ++++++++++++++++++ .../lambda/job/dep/llm_bot_dep/loaders/pdf.py | 145 +++------------- source/lambda/job/dep/setup.py | 4 +- source/panel/app.py | 1 - 5 files changed, 191 insertions(+), 122 deletions(-) create mode 100644 source/lambda/job/dep/llm_bot_dep/loaders/nougat_pdf.py diff --git a/source/infrastructure/lib/etl/etl-stack.ts b/source/infrastructure/lib/etl/etl-stack.ts index 9ade659b..5c3abb51 100644 --- a/source/infrastructure/lib/etl/etl-stack.ts +++ b/source/infrastructure/lib/etl/etl-stack.ts @@ -123,7 +123,7 @@ export class EtlStack extends NestedStack { '--REGION': props._region, '--EMBEDDING_MODEL_ENDPOINT': props._embeddingEndpoint, '--DOC_INDEX_TABLE': 'chatbot-index', - '--additional-python-modules': 'langchain==0.0.312,beautifulsoup4==4.12.2,requests-aws4auth==1.2.3,boto3==1.28.84,openai==0.28.1,nougat-ocr==0.1.17,pyOpenSSL==23.3.0,tenacity==8.2.3,markdownify==0.11.6,mammoth==1.6.0,chardet==5.2.0,python-docx==1.1.0', + '--additional-python-modules': 'langchain==0.0.312,beautifulsoup4==4.12.2,requests-aws4auth==1.2.3,boto3==1.28.84,openai==0.28.1,pyOpenSSL==23.3.0,tenacity==8.2.3,markdownify==0.11.6,mammoth==1.6.0,chardet==5.2.0,python-docx==1.1.0,nltk==3.8.1,pdfminer.six==20221105', // add multiple extra python files '--extra-py-files': extraPythonFilesList } diff --git a/source/lambda/job/dep/llm_bot_dep/loaders/nougat_pdf.py b/source/lambda/job/dep/llm_bot_dep/loaders/nougat_pdf.py new file mode 100644 index 00000000..17c37aee --- /dev/null +++ b/source/lambda/job/dep/llm_bot_dep/loaders/nougat_pdf.py @@ -0,0 +1,161 @@ +import os +import re +import json +import logging +from bs4 import BeautifulSoup +import subprocess +from pathlib import Path +from typing import List, Dict, List, Optional, Iterator, Sequence + +from langchain.docstore.document import Document +from langchain.document_loaders import PDFMinerPDFasHTMLLoader + +from langchain.document_loaders.pdf import BasePDFLoader +from ..splitter_utils import extract_headings, MarkdownHeaderTextSplitter +# from langchain.text_splitter import MarkdownHeaderTextSplitter + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +metadata_template = { + "content_type": "paragraph", + "heading_hierarchy": {}, + "figure_list": [], + "chunk_id": "$$", + "file_path": "", + "keywords": [], + "summary": "", +} + +class NougatPDFLoader(BasePDFLoader): + """A PDF loader class for converting PDF files to MMD. + + This class leverages the `nougat` library to perform the conversion from PDF to HTML. + It inherits from `BasePDFLoader` and extends its functionality to utilize the `nougat` library. + TODO, the load_and_split method need to be implemented and default is RecursiveCharacterTextSplitter + Attributes: + file_path (str): The path to the PDF file to be loaded. + headers (Optional[Dict]): Optional headers to be used when loading the PDF. + + Raises: + ImportError: If the `nougat` library is not installed. + RuntimeError: If the `nougat` command fails to execute successfully. + """ + + def __init__(self, file_path: str, *, headers: Optional[Dict] = None): + """Initialize with a file path.""" + try: + import nougat + except ImportError: + raise ImportError( + "Please install nougat to use NougatPDFLoader. " + "You can install it with `pip install nougat`." + ) + + super().__init__(file_path, headers=headers) + + def nougat(self, file_path: Path) -> str: + """Executes the `nougat` command to convert the specified PDF file to Markdown format. + + Args: + file_path (Path): The path to the PDF file to be converted. + + Returns: + str: The Markdown content resulting from the `nougat` conversion. + """ + # nougat ./paperSnapshot.pdf --full-precision --markdown -m 0.1.0-base -o tmp --recompute + cli_command = ["nougat", str(file_path), "full-precision", "--markdown", "-m", "0.1.0-base", "-o", "tmp", "--recompute"] + + try: + result = subprocess.run( + cli_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True + ) + result.check_returncode() + return result.stdout + + except subprocess.CalledProcessError as e: + logger.info( + f"Nougat command failed with return code {e.returncode}: {e.stderr}" + ) + raise RuntimeError("Nougat command failed.") from e + + def load(self) -> List[Document]: + """Loads and processes the specified PDF file, converting it to a list of Document objects. + + Returns: + List[Document]: A list containing a single Document object with the processed content. + """ + return list(self.lazy_load()) + + def lazy_load(self) -> Iterator[Document]: + """Lazy load and process the specified PDF file, yielding Document objects. + + This method reads the PDF file, processes it using the `nougat` command, + reads the resulting Markdown content, and yields a Document object with the content. + """ + # try: + file_path = self.file_path + # Call the method to run the Nougat OCR command + self.nougat(file_path) + + # Rest of your code for reading and processing the output + file_path = Path(file_path) + output_path = Path("tmp") / f"{file_path.stem}.mmd" + with output_path.open("r") as f: + content = f.read() + # consider math expressions are enclosed in \( and \) in Markdown + content = ( + content.replace(r"\(", "$") + .replace(r"\)", "$") + .replace(r"\[", "$$") + .replace(r"\]", "$$") + ) + logger.info("content: %s", content) + # extract headings hierarchically + headings = extract_headings(content) + + # assemble metadata from template + metadata = metadata_template + metadata["content_type"] = "paragraph" + metadata["heading_hierarchy"] = headings + metadata["chunk_id"] = "$$" + metadata["file_path"] = str(file_path) + # TODO, use PyMuPDF to detect image and figure list, but no link to the image for the extracted text + # metadata["figure_list"] = [] + + yield Document(page_content=content, metadata=metadata) + + # except Exception as e: + # logger.info(f"An error occurred while processing the PDF: {str(e)}") + + +def nougat_process_pdf(local_path, **kwargs): + """ + Process a given PDF file and extracts structured information from it. + + This function reads a PDF file, converts it to HTML using PDFMiner, then extracts + and structures the information into a list of dictionaries containing headings and content. + + Parameters: + s3 (boto3.client): The S3 client to use for downloading the PDF file. + pdf (bytes): The PDF file to process. + **kwargs: Arbitrary keyword arguments. The function expects 'bucket' and 'key' among the kwargs + to specify the S3 bucket and key where the PDF file is located. + + Returns: + list[Doucment]: A list of Document objects, each representing a semantically grouped section of the PDF file. Each Document object contains a metadata defined in metadata_template, and page_content string with the text content of that section. + """ + + bucket = kwargs['bucket'] + key = kwargs['key'] + + loader = NougatPDFLoader(local_path) + data = loader.load() + logger.info("raw data: %s", data) + # Update file_path metadata to full s3 path in list of Document objects + data[0].metadata['file_path'] = f"s3://{bucket}/{key}" + markdown_splitter = MarkdownHeaderTextSplitter() + md_header_splits = markdown_splitter.split_text(data[0]) + for i, doc in enumerate(md_header_splits): + logger.info("PDF file processed successfully, with content of chunk %s: %s", i, doc) + return md_header_splits diff --git a/source/lambda/job/dep/llm_bot_dep/loaders/pdf.py b/source/lambda/job/dep/llm_bot_dep/loaders/pdf.py index aa4598e4..29506139 100644 --- a/source/lambda/job/dep/llm_bot_dep/loaders/pdf.py +++ b/source/lambda/job/dep/llm_bot_dep/loaders/pdf.py @@ -2,17 +2,13 @@ import re import json import logging -from bs4 import BeautifulSoup -import subprocess -from pathlib import Path -from typing import List, Dict, List, Optional, Iterator, Sequence from langchain.docstore.document import Document +from langchain.document_loaders.pdf import BasePDFLoader from langchain.document_loaders import PDFMinerPDFasHTMLLoader -from langchain.document_loaders.pdf import BasePDFLoader from ..splitter_utils import extract_headings, MarkdownHeaderTextSplitter -# from langchain.text_splitter import MarkdownHeaderTextSplitter +from .html import CustomHtmlLoader logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -27,106 +23,19 @@ "summary": "", } -class NougatPDFLoader(BasePDFLoader): - """A PDF loader class for converting PDF files to MMD. - - This class leverages the `nougat` library to perform the conversion from PDF to HTML. - It inherits from `BasePDFLoader` and extends its functionality to utilize the `nougat` library. - TODO, the load_and_split method need to be implemented and default is RecursiveCharacterTextSplitter - Attributes: - file_path (str): The path to the PDF file to be loaded. - headers (Optional[Dict]): Optional headers to be used when loading the PDF. - - Raises: - ImportError: If the `nougat` library is not installed. - RuntimeError: If the `nougat` command fails to execute successfully. +def detect_language(input): """ - - def __init__(self, file_path: str, *, headers: Optional[Dict] = None): - """Initialize with a file path.""" - try: - import nougat - except ImportError: - raise ImportError( - "Please install nougat to use NougatPDFLoader. " - "You can install it with `pip install nougat`." - ) - - super().__init__(file_path, headers=headers) - - def nougat(self, file_path: Path) -> str: - """Executes the `nougat` command to convert the specified PDF file to Markdown format. - - Args: - file_path (Path): The path to the PDF file to be converted. - - Returns: - str: The Markdown content resulting from the `nougat` conversion. - """ - # nougat ./paperSnapshot.pdf --full-precision --markdown -m 0.1.0-base -o tmp --recompute - cli_command = ["nougat", str(file_path), "full-precision", "--markdown", "-m", "0.1.0-base", "-o", "tmp", "--recompute"] - - try: - result = subprocess.run( - cli_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True - ) - result.check_returncode() - return result.stdout - - except subprocess.CalledProcessError as e: - logger.info( - f"Nougat command failed with return code {e.returncode}: {e.stderr}" - ) - raise RuntimeError("Nougat command failed.") from e - - def load(self) -> List[Document]: - """Loads and processes the specified PDF file, converting it to a list of Document objects. - - Returns: - List[Document]: A list containing a single Document object with the processed content. - """ - return list(self.lazy_load()) - - def lazy_load(self) -> Iterator[Document]: - """Lazy load and process the specified PDF file, yielding Document objects. - - This method reads the PDF file, processes it using the `nougat` command, - reads the resulting Markdown content, and yields a Document object with the content. - """ - # try: - file_path = self.file_path - # Call the method to run the Nougat OCR command - self.nougat(file_path) - - # Rest of your code for reading and processing the output - file_path = Path(file_path) - output_path = Path("tmp") / f"{file_path.stem}.mmd" - with output_path.open("r") as f: - content = f.read() - # consider math expressions are enclosed in \( and \) in Markdown - content = ( - content.replace(r"\(", "$") - .replace(r"\)", "$") - .replace(r"\[", "$$") - .replace(r"\]", "$$") - ) - logger.info("content: %s", content) - # extract headings hierarchically - headings = extract_headings(content) - - # assemble metadata from template - metadata = metadata_template - metadata["content_type"] = "paragraph" - metadata["heading_hierarchy"] = headings - metadata["chunk_id"] = "$$" - metadata["file_path"] = str(file_path) - # TODO, use PyMuPDF to detect image and figure list, but no link to the image for the extracted text - # metadata["figure_list"] = [] - - yield Document(page_content=content, metadata=metadata) - - # except Exception as e: - # logger.info(f"An error occurred while processing the PDF: {str(e)}") + This function detects the language of the input text. It checks if the input is a list, + and if so, it joins the list into a single string. Then it uses a regular expression to + search for Chinese characters in the input. If it finds any, it returns 'ch' for Chinese. + If it doesn't find any Chinese characters, it assumes the language is English and returns 'en'. + """ + if isinstance(input, list): + input = ' '.join(input) + if re.search("[\u4e00-\u9FFF]", input): + return 'ch' + else: + return 'en' def process_pdf(s3, pdf: bytes, **kwargs): @@ -154,18 +63,18 @@ def process_pdf(s3, pdf: bytes, **kwargs): logger.info(local_path) s3.download_file(Bucket=bucket, Key=key, Filename=local_path) # TODO, will be deprecated and replaced by nougat class in loader_utils - # loader = PDFMinerPDFasHTMLLoader(local_path) + loader = PDFMinerPDFasHTMLLoader(local_path) # entire PDF is loaded as a single Document - # file_content = loader.load()[0].page_content - # res = parse_pdf_to_json(file_content) + file_content = loader.load()[0].page_content + + loader = CustomHtmlLoader() + doc = loader.load(file_content) + splitter = MarkdownHeaderTextSplitter() + doc_list = splitter.split_text(doc) + + for doc in doc_list: + doc.metadata = metadata_template + doc.metadata['file_path'] = f"s3://{bucket}/{key}" + - loader = NougatPDFLoader(local_path) - data = loader.load() - logger.info("raw data: %s", data) - # Update file_path metadata to full s3 path in list of Document objects - data[0].metadata['file_path'] = f"s3://{bucket}/{key}" - markdown_splitter = MarkdownHeaderTextSplitter() - md_header_splits = markdown_splitter.split_text(data[0]) - for i, doc in enumerate(md_header_splits): - logger.info("PDF file processed successfully, with content of chunk %s: %s", i, doc) - return md_header_splits + return doc_list diff --git a/source/lambda/job/dep/setup.py b/source/lambda/job/dep/setup.py index ce3ea0a9..07bd1043 100644 --- a/source/lambda/job/dep/setup.py +++ b/source/lambda/job/dep/setup.py @@ -10,9 +10,9 @@ # 'faiss_cpu', # 'sagemaker', 'requests_aws4auth', - 'unstructured', + # 'unstructured', 'boto3==1.28.84', - 'nougat-ocr', + # 'nougat-ocr', 'markdownify', 'mammoth', 'chardet', diff --git a/source/panel/app.py b/source/panel/app.py index 2341732d..d46f5a54 100644 --- a/source/panel/app.py +++ b/source/panel/app.py @@ -1,6 +1,5 @@ from dotenv import load_dotenv import os -from PyPDF2 import PdfReader import streamlit as st from langchain.text_splitter import CharacterTextSplitter from langchain.embeddings.openai import OpenAIEmbeddings From 82770004e6c232e5dab325cc158c80b58e26fed6 Mon Sep 17 00:00:00 2001 From: Ning Date: Thu, 16 Nov 2023 14:07:13 +0800 Subject: [PATCH 31/37] fix: set torch version for instruct model --- source/model/instruct/code/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/source/model/instruct/code/requirements.txt b/source/model/instruct/code/requirements.txt index 2b02551a..c986ac64 100644 --- a/source/model/instruct/code/requirements.txt +++ b/source/model/instruct/code/requirements.txt @@ -1,3 +1,4 @@ +torch==2.0.1 transformers==4.33.1 accelerate==0.20.3 transformers_stream_generator \ No newline at end of file From 85759ed9b180c24870c958d8054011cda1e4dc36 Mon Sep 17 00:00:00 2001 From: Ning Date: Thu, 16 Nov 2023 15:39:53 +0800 Subject: [PATCH 32/37] feat: add file path in metadata --- .../dist/llm_bot_dep-0.1.0-py3-none-any.whl | Bin 19201 -> 23507 bytes .../job/dep/llm_bot_dep/loaders/auto.py | 2 - .../lambda/job/dep/llm_bot_dep/loaders/csv.py | 46 ++++++++++-------- .../job/dep/llm_bot_dep/loaders/docx.py | 10 ++-- .../job/dep/llm_bot_dep/loaders/html.py | 12 ++++- .../job/dep/llm_bot_dep/loaders/markdown.py | 11 +++-- .../job/dep/llm_bot_dep/loaders/text.py | 6 ++- 7 files changed, 53 insertions(+), 34 deletions(-) diff --git a/source/lambda/job/dep/dist/llm_bot_dep-0.1.0-py3-none-any.whl b/source/lambda/job/dep/dist/llm_bot_dep-0.1.0-py3-none-any.whl index 92567768c09def9bd5af336035268f8a2e391a78..53dd3f8c4dbae7a69073c8a04f7aa4d26cbaebe9 100644 GIT binary patch delta 13778 zcmZ|019W8VvppQ!wkEbYv2EM7*~!G##1m&?b7I@JZCn4$d)NKm`@462-F+IXcRgKo zR-dXmyY>l42A*vKuBIXaAaFaV%bI93d&2+$9g_k9p#lK`+1S|X8`-<)o0vKj>`U68Z%U;wXSdie=`h4k) zz<^RBJ>xcw&R8Ong^M`3|BZJr$lO#>G6AC%f6`ri&_o*@J3bTz&~dh>O}|I=T=BDQ z$kNJEDg82mci*#~3WYc_Vp}@1Mn`Kv{lGg;biB_}kfR)BIvvJr`2+aUex8#ty(Y>` zllbvmK=(E}sWmr2GK&r5%$-G_G`p1@nOefrf%eR^2TocY8$_f3D@0j%5t zt8A!83+p`9r);1Hz&T+#u;UToA}JM2beS4wa~nayyw;U{3_iA1XQ+)bh>Zy%(&tF2 z(ZWlXy_B2V1u34w_y@!_rl3SviAhu4Q+m17Pcsm=c9B`S;oh({PD~OBj8n)?U;p#v zA=$=r1gvD5-04-E-b9sxD9hF^|6A|1xng3_K|!fA$zzlSAQm|*k;J@U3B^-Cu6&8n zQ4j>}XRZH~k1$9w^IZ)T$+wg7WIO>a_)0s<_*)fdJGaZMA}URmI;F<$uv`%qEXi@X zMnl!|Powat?_|;7G3hIszmo14J`6!OlA5x{J|e-BFI=%f0AqrE$PAyMe7Xi+roQzk*XI=zyZd>2z8_xl^UNj_ z3ONgrPwRtE3iDu4RRt^aD;Qr9sjx!e#brvyCn4Dx0@ysS$0R7GZ#(tn!|PU3j}qf`-N?s9kmH0f4ToN&QynA&wk0yWgusnp9_ zE3=A4GK8cTGQL(r4XM#6OWc)*NIy#Ur<@J~3c#M+m|nTocu!fTEwHA^h^a|Z1lK$} z>&RZTC_d3Th|u6a4ezFe?h?6m@}eI=gDM3XRyztUebuqA>+u6GYa)?JlIa*zAEZCv zH>E#8s5LY}hz_Du>pB*#M+-JhY7lq2mLE8s;aDAJs#KIl=(UeOAz?BFc+1e#s=N)u z6aY^(zs(nt_f73lbkAO^%IXGJe2F*3dmU{#d4cNZPtcsi^7s?W!iFF|@?8AE3gpi< zmaQs<_8`4T*-Bz-`0Be%+ih)85~<`Xy3mWoax-a9>b!+qJJ=}WEN5tgvcPJ&)-1Y` zwK4}j{JU(?E$JMEA-FcTmpQLR(Rh$^1^_EMd`$|S!5J=N?Cj6a3`rtbjqzIEPtsDX@UZG@yyz1g16|ko^hbtfVV21cXQ6)(&nrL)2ai4CH zkZJ+E209oUQ=Y&pZ;?*m zqV#5U6)y_fh};o0(x<8^I+Yn{oMU*H;1|L!MFwr*N937VfgEQ)Y$UkF7`;*rW^{x5 zo7B{umdqS1#H?;GeBxV15+MLOW6*#Q@-6^{(jPQP*n8oLlZ4PhO(-)!SxQq{eJ5pN zL;mji)5d8!Lur=qq+t#%KFX{RUxS{p9VLxl>Dnn`bdyjbSp zrMy6|*9wIoTSgKqQ-(q-C?jL)vbxoh{_NI6Wavw0VsmqtHBoF`7&61gGkJoc{1uE5$P=$tBwgLx2O!*mW=6AV#}U$llvj?hjB= zS5TJ!$Y5K&ev3qTM*^blycRE{r$fVzoq4{n3hb+k)O9m;^Hh(;xX=AYjz<)iQx-h%mX0+QRHkhNFL=XkjqC;>_rqB@?_w}cs3YQh7h3+Y|**x zpiJQ9y|53^k~_04=syY_wxayQWO$EA5x8*grK=w=OF;&}eLIiD_ka@f$T<6MIm1#i zS5lU#WC+JM(QU;0l3HC@2d`hn*Dy5uU^d?oMeoz*cHD<1gMj_X*1i1T-yM_x^wzR? z%=bhou$urMK20%4R~0n3tbTZ6QAjg5>(zSSm{bNx6pq*2Jld4PX7^j(2!byPQ@;bK zSYchC?1+k8I0hDlA(u`i4lc;i~$Is^l!+1a$y_62QH^L%3u- zsLJ8UAx=r{&Arc3TOQLPDASg65&blR6%Xc;?V5X1{Kop`2ovwmf0}kZ1l)|<77WG= zUr);f_l_BdlgLHHfH1`9pbkuzg#Zr>1T+DfK!=kUM}!4%kN+c7We~tCXK4emoEWmv z!S0B5GGeQ&_QXGV$nvrh4{lt36dv4D!Zb(a(xFIdj&&vyW9URW?TuV7l>%XNC>KF* z%x5z1T4?x-D^yWh6Cm@PHE z2_Tdvxqw(Zv7)uHsA0!8nJ2+{!nw;w*m+s7p@yY<)STV1nk=MiPUzK)H@XkjNz zAo<~@S-jdzlC+1BQ8XtYkd+r>s5UuDp`;r=c4>u}e316HD*_uh(lpLHZzzH%^!C&r zFb+EoQ7#j68sUZI;=u*a3$nDpQ6$kd5`eHjI~h{5s)mMk29RXb#(Q6N@rFL=p#*Dc z8a&OR?{oCP&>}dfozN(Zmw{rFg%{yDOyH*ZQg}g=!VIj|#>omoOhF6L5-QO}a56mX z&;(Z6Pgh3-W*PiJz8Fb`E=J){gNKn$lr zob-*!&vgO%uKPz0BDde5ZDsJ^7pLAjNB9s%-Jf?4bGZea!lbR2wC@R;wgmRaEn3$D zHUo8LknKaYA)M@-$6M;P*8+$6=~n~Mn-60qg2bHh(XM6}Ck1$HAls;cd9~wS9>nHI z_$e6h3934!E0|g;S~Bloiv;M+fs2bqElyBHP_K@4pv?U=9=T?Qmo?o4SN1QK~)tPP07s_-%DpmMtMI*u$Lh ze5E*BLc6|IS5JEDZDHVs&=kUEnW^`aTP^+_$9VQOs^L}`niV9Y)toTS{Eo{)_SsOz z@aK{=l*mZjy}Li*1KA2G)F&>zrT;RgTSu8AX4SYE!& zd(%wik(~o(BC36()Co-?!+f_{Yp=-38s?{La!UNH_1X3AM2Z5i$i6n(G9)VolTZE0 z!stX4d*^ClHp%6a7oa`5-_TLX3X(MSTkbs29&R@08cyzao+y9~lvyAk2}_Ks2NZBQ z*=}Ut@#uEhyAi&L1ZCmi$<(WE>e7k=j9~osNDI|FJIo+U%vF^rU1;`#4*G&cH&(JK z@D8z`uEG0OM&bhoAQCrz-u44RWJ)WAyGz;?)(j?TN)VZ>sFVj_>Jsc(gp)>-e;WRXqGo5%RB# zZ)TT`yp|$i_l8sK%;W+<-LIGm(*IrE;=Y)xd3*X8bub{HBG|9m4=}Me_V}u7nQH4_ zl?@5-s4rN|NP-@76UPe&p<{5WKq}AzY#m?-1(KXB$Brz8CP6KS@$^;NL{ECfs7zGu?VNini-!oVsXx$A6tdLb#ZZCVXVTik|iwrYvK4I)G@a!5cc z1?WY#EZ7k?=~9g98B*bKY9d1HeyshnO>#kiyZoiXzQYfDt8pUi3~F*g^SzY;81wFQ z=b05$Mq{A^?pZ^jvb$PV#;XI={m_!?FuFM4)}}@SO-ch7Kq9iu*X1&%(5T1wmOz^8 zYu}sza}!+jY39}_J|zyPcQlX-Y5f50PJ5PWGhuQZSU5%Ig*oLo`pYac_}9AH*=lx6 zfIZE@!`AO*LF)d-t~bS`z|ue$Z$#=`luW0w4h_ir9A_g{7rB(J90wnlt!&<$dg9rM zwy8}h+-UJPKu#o|rl-GcZd_83Q&c{RN4ckr;U)0PAZJRn%det90@zZmah7G6CViIE zF$qk_R}9Z661OSd8^WtQD$g=P$c}j};+sSTc7eKUiRn}AI~5qR&p^IGKEyo{IbzxO zNysL}&FU;+Be1ZrQ7(g_^@H!r?IE{B63blkk?>Ru0JN}>!S1*hA7$A}3R@P$w{Mcw zj(e=ZCv9w?4Ssq38yu9VvcuFBHT>6;j7a?isP%z0I|sfyx-;O?;;m;is&=fw{fFzB zUM%ICHJ!tJV5e?I8!_NPei||m7{W*G-)-dIx>n$$zKeXLv#!5QVhI&%uV6GX7ipy! zci#5d0xUagzCifN0jINS`%GrUO=aH}Vt#1a7wcHGJdq49?Lz{1*S{L?RMqh8NI%-n zbu2c{08IZb9ADXxLWzZ2H;tOC&?XWhhFrrmyurPv+<@LY=&0S=fgP!Ldv?S#{8lpR zg`jTDdl_&Z?D#uuhfK>UbZZ#qATbhaji(R+0AO%H;_y6koad&Xp-qE*F%v|fB zSG#HImjEN3S%!&Av!WO$(*nh{2F^o<2ks%0q?~h0->jfrzuEJu2ykcJQ`2rvmpJ33 zOY%5v!h*+W?)pZ+n`AM9*`E)pfUwy|VH`wX7Mg%F8$O#K2%prFMQ)ST&jn*UPBQkK z0GGb)B>CTSIJ*7~E@ma|dNq%igoq&KDwIWHgbMOLjeCD&y(8-zCqEoL%3z?u^Fkg-v4mhMc*|RnbY+GW>Me z8H*UJBxkSf$2I5a_!Tm;9z{~NUAU?e-ma&w+b&0oSEV=I$Fz6)w&d;QLrHS`TGi_^ z)s2ooE9J3uhbVi2BmK5wCkamQ8wc&1 z*^JdU1xTd%O8we-7RGlhfJ(yhwG4FAT6FGV3-q#511;sWxpef>ny0Ojbw%Tb0>G)f z9s`ujca@$zXxa_McLP(!1P7%VQfx@UI1CZ0KNUedw17w zdV9a^mFK`%1;=b(%U9NyE9*+9^9}^ef6`h>KtpOePkF=88yMh$0g8y6rY1q8;uYCK zr~Xh`Kn0m2&HPA5D`9Mmxp3ypslvIPJHc)(aa|?YZhY=YgXeB+-uWS12fr2y{)X`Cc^RfTclI%2p`!yko$W z$v#djiDk|$NkYTKFB<8yaxqjVMV0iCu_7J0pxZp7Qo)>B8!Rm)d2q z)7k!QlPoy;jgE4An4rqQ1lnJdJj^m+(81I8gXhToiiSo81t1dr{&4N~{QRiD(mQ|= zFUA>I-rAy3Res{t@Z!{-HfDA@?zOAA)@f*%S&uiz z<&Y$$cFfn32*B&subA({*ElQI zgeyMTlbDqQldL4;&kYR6Q0eOEX|boIEao@;c3?MG_{{nJBnGUlkFvC*Uec-&d1dyjzU0}b=pU2Lm$oBhPeP+o9oaWx)F@(1`@_*s;jCRGud>Pj7S)GeAw&Vs zp|&4R;3fX&*6DMAVZ%?3l@!yn*zeb*83XUn#bwwQk9$cMXqPM_?N__>6`mu02ZiIe zMftgxo&|*mz9&?UH`M46WP;aUuFthE-JM^VA+ee<0se1Zh+Qk=B>P(bpq6Vxo z!nMEBfa#y0WqB{r2|&2uMxY-x_ClN~pp_cU#vo}jlj3Nq-EE3XCTHd>yTps|Da95@ zEF%t(C>Nd?XZ^)3Ytm^^y6 z=rck&RYqBKwOG;Mk>U|Q|R1OdSI_o#lm zvCt`|eALx8i9Kc9U7s)*=wRG{XOOl6Id1BC*L#gQDYjE z1e3tmE6#O9p7+t)*m0;9*jn>(GwfteA|Lz$DrJRLC?Z`|+^B;^fJX>+u`rfP$524yJ0(8D zuQ=?RF!BlchfNImMvfA|A*e3>9JbEOlpr+BkK~NKb5BYw|NhfF=_tO{xE0m?tEr0%z`q_GD^Xmi%3_r)+{Jw&hz%&%v-IX-KM^)(F=`EHt zhAS(%zl-PQUe&02t$+Uf?Q1_#OEWzCtlk8J?M(M$ZmeZ>A;clTV-xKVes6T5vg#31 zP?vq)l4rqNzzFwuth%DZJzBjtXIAkM`|$1>@qB-byrSL8TD$({TAN6A6$iuKHk*F` zSq8TZy1imB^RI;HRMtwUr%P7=!rYAeaSu{c!TjvQK9W|JJ90um53q z)T$Z?G&B&<5*82;@qZCOcJ{93hA#RJCT3sVaOm@A$jn6a#xqJS?P zSA%Ybk;s5h4G!j_Z~dGs+_{lOhG7TLNe$>fw!Z4ar0^H+oJP28lsPvpCntUAF zlW~8ph0RB}mzhwBN@JXC_EmhQObg!bPpw7dKK5B@+$pKZP)P$emD}x}da|2C)LU~o zcp2|c^_yq7xOUOsnkLoT5AyP5<>*-xRbbN+wI4bIT+yXzBSYNEtYkga=kk;Ux~~i^ zG>dpQSsptM{WTeI1+B?dr!LPPr8k(MJ5}Bt-JkEvmNzLJoH~w9O}m1rAr99iovSHT zHXYzGseo(A!;As^4FyT`i%|>yAtS?NN%9RI7as;5F9=X}>%)8gHdMKWO?OeeqwZLw z6AAr4sP%l2*AuF0L*5O>*DiH@Uf-?Ej{JAmGCDMv-sj6rO_!&WllEyPwF)g|+347; zUqh@QAoWl{0G4Fj(MSIAnqERPSyC+t_4-B4WDc10i+BJVnzDO{Vm-Ze@?tg#HTEsY zYM;-o7=T{NhF(n)p^`Y zG#MK9Zbp-Z$5bs4H4EeqdrDo2M}CYY(}|9RNs1z{PM)4>qz7l~=``*@xnBcUqwyhQ zb5ed3C`3S)LExYld4RR`1D!Rvpuu^}TL4&QE#ohSuF+PuL>(IGYtHHS_^C=UL}Oie zgisM-_2rzMl`vubz0d0Q6Af&4b#wLR<-xL)u4!&O;N2=wS?&R=y?TQq0DNz%Kl)Fd zIx>8mDaCkPHF$?a!n4uzyiWUPG8pc8t(*Rwv|2#iu0A8(@J|SBpyV-9w4Y(zo;s7^I& z_G*6s6jLfvvxVXJt=Qb-+7AQIQvRJFG13vxplBxiz+z#{Y$~bx#MkT-Qd`+-f?dCU zl3fD+(2vU0n5D%MWg!7EQ4l%V_K?Pg)GO4ha+}y8p10+XXud&gen_!UI(5 zVez^$$JYtGH|%SCX$ud{B|TI@MXcFrKsErt$NpDq?-OFD8jY=u-94PJbU$-+6< zRa8FkK(4L)CaSCQQoAODWQY;&O#vIzpquaa2vu%YQU(=aGFN#qo#MJaTFf_Q;T^Jz zWYHUAggyB~4Qw0wLnL6^j3@9|)E9%+NTugCpb6l(rp!2GEAHKZv4Xaj{|zo7Uk?O8 zO&(8x-=O!TD9wl7iJl#UaU>(|(9{^}ON4HV=no04+AEFt_-n05O)q^nSS(}GiBc5R zP#`|$W=uQsQ@q4Xb_)<^I~@3QN9@VdNNbQYzGRE;bmV4CHiLg((VufTopu^|>RdNb zu%`{77Hh&oA#&51D&lFoDT70#g3J!y z%}xb=nQZj9Gdf59LlUEZFUIj=PEd3hmPV{WU;b)+=eh;|DG@e}75(tirn8GAQ?tm- z7^Z8*2*p3#{mkvsH{*p7+E>PKWKIb(Y80eZF>5dyieoqq_?^iRsC0JQ4YR1*}>*Yab{MTUvRfWBL1H_KDX7GHMd6>S;MKHjmpM$ za~vlD-`e0GTZsr|P=`{ti2+1_14Cl&0Avf!R@^{g!5>UC6O&aPYfn7UWXQ|SFdSwB zq&wcdqj{NgnW5dpVu%^qf>4o;%6;ip z8VPEc!^GfVrt9dX$?!tp>VH8*ROvg{(q6$RF7R~8_B(1=C?Y{Q2a{r5ps)%DDtNWXD5={~SeTOIw>Q_#O6YOJ$3R#aK$AZ(Qb zx9VKWGJJuwE-$WimITPpbwhl<@Q*Za+j)I@Jzlti2zs1~s-s7deT{_z=BYR2@M&+x zTU*m$iKYr{V)VxW)G0R(w{ryy!{ShR9B27tn^Q4;zaAHiohA4#)?;)&P=kNp8mons z*dz2Y;$@)AEpkY_=&9!X$yifnjXE)o$nHFwyk#&5o^os{o(ec75CI1*X6?;P(<|hH zK>E=W>rlH)$qC+2V6g7LXKSY!@!e)|;O_101do^TIyux@vjyv`Uy zM9sRcAb89I-Q~7#IC04R;k0G$rcQrkH>_T{wHpeo?(wEh%hK$guhENv4t-+_9O1gP zUf@~W{BA2$?UkCEVA=#ECx`UTQ776}?Uk>W}v3!kNdhc5yJ;>Vx zm|Ej}zbxWvUg9U{f3{E(=3aGJ;6Ol8u>U3)sgIgM}yVq;@WM1 z2peSsF;?v&ZS5?Hre-vZ=)yrH*{s2yFJD6lopuT|bV=iHpAW|4N(mbrg;s-M^h0JA z5z9jF){hGnh3hoVqAo-!LJO^a<3wnwFIWn{G2OFrBBRsqe6qm!wV&0O4=)*&yGtHc z)Zg6$c5V_kpZWU>z=9{Ohyt7H2`l*k5hsG3lxIs+heJOTmDkHfKe@lrs5g>Y@0mJZ zYo_~F*oPwaov0%{VX)=@Ubxp)0&ozYet@baG(joUB&n%h-c>8hE=rj z=*2`Wk%~lytWk7+^*BV{)T8%C(}{@u;A9h5bWV@ z<6IPiBNiHnI2e}!_azZKKgmrZNkyavG?4R@|Dd+!+a1A8#6ZtM-2xi)CM~i#jZej& zsir1C2gah>GRVX$)hJvGM2p-v{oaFHgHrY*K5Aa(1q%o`&c^fI_a$?q?fLxmZg78p zf1DW;5q45Otk3*nvAFKe1=EloBwj3^CP&{$$>f0+(<3|byc;E%vp7o|YFs(4%R52qTR0p8xLu#i1OX5T>K}FXxQ2|AbsbEoR=aSsmeL0KS5l{g z!!^o@(n_7_KUlSc@5TlDo{6h34x8_F-9@n44??@FcvQWeXtGyew4P2};V4rxxljo_ znx<4+15*Uex`Ud4KzAC+gkw>5$d6rufwje#KA>$;7>%E*817UYb2x?oM)VTg8$lZ% zTMo{GD7#%qh%n_WPO0>B(mv|7rwDmWUO0km(!|r8OJ+9x-i;1Moah-`DG16s8-$+c!rVJ22mDBEAnQ+ZC6vLrY-Jhy}_V>PNs96#L9;`*T+Fw`%jSXbc{T z&~=FSWp?cX`1#_)=b4{$tMWJd9LS>=@7gwHy9P-SZha5Yg7Z^c&q_(ssJXh%Dki;A zXD0jgU6#7+>FH~R6>a3`+K|-s={xo$N>BTe6?7DaSeRpMe>U_0CLf3(c-bgb=M6Vl zM7GJ%=-HW+^aMUS|Df?g6M_Tr%Npcff`@h2@DfDdipK1}M-wTUqrRiRjVvtM+eL)7 z-NMB7a<{Xa62s`6%SG|tp$5dHFhPC16uQ+P+H=YT{Xwk^AXDb=ec)#GPIW~$PIHNY zL{^Jv-e;HRIoIswI-U2i)LIv?ZmKswtkT~E=UeOS0L8vYiQM8Na2>wi;r};@{^dhr z>ZDD)zxWU?h`*B>7gLYFs$yT2ZoB`YMxZW{if-3>?SPpLjudhPYIGsarXVW6s1b!P zY6RQ%A8MrWj8@RKMuacna{8yoB8{zJapQiitOuu+3{C}i^DU(!^ES1TD1u4=FVe;VID-o!IRdyZQr#S?c7RCOvQX3V?j!RsV$})U= zVD_3EOV&6_o03KqwE}*PyUPWv%z#m&OBYn0KdG+>?+jQt8IvCD47u%&yU=UaY`U!HHM zlWJt8YasBJL)*O9NL!aWm}7A>i|{Z;z-~}J!Ek4d^BeN!^60aLexHzLjfM=$b2JD$ z_w+d%xt%Ak?~4~@Vx5I`BZV3Cm8=hY15oO(WeXW0HXh2}`orQudv|j;jlbWFxsS+i zeK(#9V)J=Es9NrIv2>$2d0Tt2cF|N1O`acP-nmK^lRZA*@8$xd(6uKux6-$L;fqZc zOw?7A-Xyw4U4t?~3E^8C10Wy! zjw;NE#YSO2r0MadPnO}SRdb*3zw8nn2NLXSs?;iU-l2&kJT7xnYZr6-Qmp0l&d=Y% z<`0_u8XFnDF)(smNHX==Y9}l;cUg+tH$;ak%Y5Y9b==OIwLpxeY@Q(KdRlFgwd$~| ztlAXnc|7HS0muu_JK6}P_aGk2ki6BiA*8?kk2?SMHB2O?+ul$563(D}iHS%P9Rvvz z&n2M%rZF`rJw!+%-W@*({UwbeWXxuktqctTA<9^wp$=)Oohc(fezw}1yE6P8$l_rhx}mA4HNd>N(_%S%*fOm`VQp3 z2iKS9iAI8{pnqTA1(gu~MpCxqZSDRwl(2(H)D@xzG}q>7VTEkBRe`&)APPbf<>Y~o zFe$lCJ$?6h2=44@nl_~eNgsT5JXn`c+gUrsQ)H4Rhq`!00ouT{^M zVBBAh@wPbR@`VrTu^r}v`BaBkk_l~Bg)!%d3b}uHmcf1l9nf>Y0t4cb8?yQ^junaJ zAA&Jh1S{-RM-5oJl`r)+e?kRphia9LlbLW+0gPJ9(uEcoVl=HY4<&W)mP5k5T#hy< zLK0Pse+{+AhIfVO(RPvTGj3eIwcq^A+5aKFi1vU`Qr@f<5o@0`GrW*neZfY~JoXv` zAH~8L^=W@@&zgngW9!UipDK#;Hk%vr?l}`Jc^KwX6vvk%zml_R?&f*YYr0}gGU-D) z3%I@}krSh$apZh@=}RkDj*q0?#rR!dPGgyF+ebZx_3*uk1bSEEg(a=~?%7BSV>_zk zeEo`uLL$k^eZD|nz-Z%)ZT{T?N{PWkhfRurNeb;K)+}J5c9^0P+VG-^v=TuWx2{FD**(T3WsOH^?a*IlQfWePryNtVRnJ^T+~d8eDQ$4rZV z6pvjhv6-99xkU28G}jXKE;$ekt*eS75vaEf3;|vnTbOLYd;Q+Oj#FTeL_~3{x*kzg zVC4UEu}vbSI6m0lOKcLM#OXjrssHNp5@p3PiT;+%e}#=eK)7Fd>A!CU0WdWG0ON=t zY5)EJ8tDI@-tWInfPe}D6lni3+7(A3`cLV^|6ZM$;;~$`l z1QW*W m2#Dl=0P6#oloAbvNHG+o!N2sJKtO0;_xZ1VdZhaI)BgkTmdEq} delta 9535 zcmZXa1yCJZ)~IoJ2<{RfxVyUrcXtUc2MZkB3370k0156I+%4F_-Q67mJno%0HFN*z z>aJbY^{v&tyL#!~Gfj|zDUj9ls*ASOW+3>-i}LbG_4 zNubboPVV3nISD7SOx~bY*X5)SuM^jgG5)IVDJV7$k_wDC-;#@^^F5L&`NW*0Yf%bm zB7Y-R?ZPXXPefHp@LPcWip{ckL&c(XTH}&@=zDm$`HRG&y)+Ev+i^^tGO&xiaHGXd z;cGvjCog#>8MOA2wi*>~Oke`zHWmizz6ZK&2%P}-znCjK`Z9Kj;F5qZJDu^QE+2nLBDnX4x#bb*Oe@c|9!3AN%5KkbOt4W$;HO*bpEFJX34b-|9E%#AM3|E6= z99i0n!~A)?yrb01+iw5PyOioGkm2D)!g&J%DKKD^j@)FbPoech%S0Zw&iCRB-gMvK#CSI7T9 zqZjNMq=vxK5RgDD2$YrFi&4AmyC9x)m&@8s)J&w$`dDBg8HKzvr4WWo;LY|}%ZVMO zGA#RP#O))OKIbuxMh;V-lpH$+a_KOlCM%7+g{+!tYOCR$dPSKYC|E&HgYOG?@Z4;e zzt7?`fNJX_gU<-oez&24L9lwVweh8EH?uYk)4~Y)Sy%4)DX{WRmja+o)F=EVRx=U1 zuwOQ8rGNJd)c-~3XfY9U@-lzd!D?pgE^U;NkeZ#KZM8EyVr9LZ2|FsAc#(KajYv{s z=8fBuvk38Fc9cL>@5zP6E}i|t#t3hBjG_kQ}{9j>k% z+m<$>KARA=0p!5>a=&XhI-?g25b;gHrf|*ss^B(D;i1dk>lb3{R9q}f{oD7UU7E|# zY23J3q(;g_pvoOi! zH)85;B!R_wDaNJuL4{#fccHf5KR-u@Y+&93@=^lJ8yE^@S|2E!q(za}ND7sLFY%I2 zk@osSNO<5tahYgq-)}dzc%q30+IHFoO^c#lfW7V;BB`Ri21x{Mhi-)I5by<*z~ zB}LjEF%T8CHV@@w{3*FR>Ew@HX*88DE9rA=XfmXZx4YnBU#% z$I?-=Rwd!tTsYCl_l9(4qqi>Um?nnelbogG^MOhQajtBlePI*3V*!;;@NJ5^MZ-=f zN22l+;oNc~Vtd*d^J6M8nJKX0v8NRU%kvVaPK~iw1TYv1B%Rs80`Bz`z@W6X%+2)7 z6~Z$naay_G$@1YEhbCa36GK(fGUx7r0S}`kNcHT-~UcsvCQ( zD$oQiS89h?2pY^bU#_z0ZX+r~;ygs)&Hl+T4{s7qfC!;&33BxjF8zx|x3Ohz*sf>) z_TnV5$zuOct|R?Z()*3BopB4#aOh6>(C>w7B&Gxh-ugCk4ed07&5wf~zSR`>22*X5 zdX9-uueYalg(Nl5y0|W@wRtMQ=ech3$Bg=(? zoIKgwP}g^zEP{KDY@cNMb;y&VF_t8PYCtQ^R-AEUNxdSFHm}0n?o0~iVO5bA!@zDF zi6&g3JIwCBZiA!M;{e`Aq7kMK_h*zHvb9vfk{mM05Y}NY+d`ByZA0F}dtw_3Z7me8 zTqJXU8UaMH*tVBV`d1Ao!j29jud1#Xqjv$2w4Xbz`U3BfV%oG82BZ_3WGePMpQW8o1R$U4mdWk#&&QWG zthOx2&uVgs44zRix}tFxYxCr2S1j;5k2_=_-y*${ERN~DObcG)hhqU1lm z%ciR#KJaSz?%5?ZVjVg=xT57`)Y zndR{VfQ0KnCBORQm%%m`N5Gsx@rjp0YIM@xfURhngUGDZEzSX;(pP%*#K6!Mj)iXed z@DT)rR8jbM3==1I)OPJW0gJ#uK*%CPKoEffW${3jI`F`3t@STNd>H=s#uNsMfapTu z8&b4jWKSV^s9_QV=t0b&Pv_qpFi-6g>OZ?uygiTG6up?X`5g!^B<^LVD;K6GXw2dt zr*pSumwMJ!aDBLDLH(9~B*`=KNb_8FIRaMmL|{5LtVu21E27Wh!L^yxQhEMfeM-gc?6 z^aLekI#^rtoON7zwMClYygO=+;S}ptYiV#=a8_IN>}+EZ7~5>Kjx#ZcX);~2R}?oG zV}_bip+5PD_D4x)G~|aRtoTmdV)^8BOS6Q=8Fv~G6>~(JBW(SvaEbT6cNyn}uFl)| znL&77983XggA5$Q9jrCu@3c~7_ru^;R+`%A15V4e>N!a3dHuUnW%b@BnrmN#r4u>Z zaqg}j)#Ub{w7d6^8a-N#OC0xGf*S4P3EGQ3>OtW>(ScnezM>8amF<4{ositT$`5O} zes)Aah{`~5hYv};!i`vQ$t4&7lz1|1@!1jNgj)dd!Ko|Zu=LYQ6rWS7^;WX{F6l11 z*BDOGZk%T`n>5u`G?-cZdh;ZHjY~~Ut?Vri*=m6`VWZDaG3ox#q>)ID_!EA)axgB0 zH817en4QY8gtkq5gDh-LXtSzgh4i7w>bx~@+C<{`2l6VM6YeLsyl!C}LXgLBp>`EI2W_)d_fg#}|L&f&jwsNGiS}LO@bIl+iJZ}yAX-{z=zW`$N>X=(=5gla^Oj+Rz@6pn+ zd&LzFwG+(hfNoWj1?ZsyB%a3iXBlx41Hc;Vo}(<7IO}ehzy1wEM0OQwAQ#CSo=fM~ zF9m>yr!R-<87a~az#smhE6{88w}Xq;p#ewb>aR&ww!D%&Iec3eN5zVE<@8-ZKK|M< zKZDx)bFy6OyB}#v4c_7chmYMZgo{&PZ(S~^D)f-NkZl`<>-dZD3RNYbC99d8SQ1$#ziL+Y9g?s0rZ3;&BzfG*{5a zUHwi9#lyJO8+fk?SWqOMQPDj|(eKgYuc5fW8Bo2w*7>oe>go(*q+4B&zyz-EiUFmt zX)-{VTvlgDHS!_ivAPg7s~=V5^`=NDfsrWUjFTy{;WfxN!r?;wr9VGs=k^=6RvT{S zdaHKf{&b?2Fo9EqhDLXGY5nLv4%zY&_A?S5hvXSj&M!VUuD>bjXFIJ&1wqtikE{(< zVC9SqgY~ddAoHL|n%daKxs5(9`jO@$AYK-jV!7f>ufvuXUSeGgNYeYP1i%XXp`-c1 z7w_Y?_qP!{{5(!ViK0m*w;$>L38rYafr`{|;AdI$1mhsXk1!ojPaI)~J+JpAU9U#n z0!BIj-5IgBYrx~~_OfGL|8xscH{I*UmI%E}k-jOf6>kQ`A68bz6pu4p@|L<&-BDoR z!Basb%H_^||H1T`|54!5W7CGt{IU(_QF2`HjGfc^3ZoO+;!F$y_YUc})jZyDvYTO* zmN8!DuzllNIZtfVJ&Q&&c{ZHiH-nNW?Y;O_{ci_V^FCWV#P?>mw>}&-1G-IO(RF6O z2H#Z7@5cLW`gMBgndbcEqZ-~cB0U~}4HyE-0r;zi1UCcA-)IaKOFoagx8k?khYn_$ zz91_+k_4V*nN=4BUP$ujnoJzoM>deuRQ5Z`ShSbX>PNe|=R~C1&(CM3wn>)V@ba)j zr|+90)NP*-4h@6 z7UF_?g%WKcFS}*=R!DQ9Nd#*yyL}*vmzAoBQQ3gidz<4|=hbntg#l0vwvrz-+oXQO zoAUQfRY~@qNaef{@dE{P#CN;?M4S(3M6Cx zWwc9AB{aHF5D+D>5D;Yl$`1t5fnZjQ_I(EF7EXQf=Bj#GSPj*mU69mhn!7$Axj8-B zZzqu{u$SjC*?ou80qBvZ-W>kdCj_&116VdQoTXn9mojh{96W1&Z7)zT6wLSK#^(& zrG5?CIB!G?6K9<+LsaElpi;Op-y~!(Pgc@gwG2X#78_V@iXz}s`EDNK$|7bX+g_de zu4u7%Phy|=#xvyN@>Ah(&WlqZQ~1OK{wTU8e86#yMnjL?aAC?<&wj?;p@7Fgj8b)K zN!)E-u!sLoE0)7*SQ&2PsW1Nd==Q)Jf%{LpZxhZ=F;+VjV8M3SX=#~&eVO21h{1>C|#CJ9i+Cy+`&S?b>Dn} za%oes^Ktm=)z%X+u5tWt9x&Sb_xuL8+5CfKQERofN&w?TK=a&25|2 z2O1`X#Nd(AoU8{^E*AR^yk9U5X-S68en8{W@IB?gl1`|u29XwCQ|yn7Cwq`CqL0`x z$WFudy|IjIKdl%%MS#ByFo{hJ;AgA_Ohu;j68BN7q;jfoqxE!*Tu0p) zjvBYLx&jAbgC}ZLVx>+468f)VnDi?jKVJhn6-ff{c-IxGE12)#1AM+-Wp-(mNCAxk z$-1t_D~y@%4Bc`^u`oNlYEA+f5(-l{U3Grpg~JyqV3Y9}8oK#im?%d}sk)diSkIm| zpPnYilsECaS*MOR9T1M`X10LzVU{Vko>$GHCi@ZdNUmo4nT5*tW`{rfIM}pkVoQQzZQ|1DqJu;sn*W%o#by= zr;;|D5x`ub$e(N@iJuDo=o4EGPCfWMzOonj;RwOU{NBXJ2Uf|?A*fOo$BMp{!3@BZ zlkck8duv0+h52Dh!6$yjJHI1-@!Lw3J*a5VbL3};QRnmbnW^p5sfImJQvr&LMGdK| zwCo3zt6RN1wma8khZwj9Bktj1gocv#Q5>LLyjJ6(=8G5ld|FE}ZsueQ-p6qztJivp z$##m%{IKJMlQE)Wq{#*}ZXFoW6LjlP*{I4JWEu>CM2RBHla64Y`H*HzajCT?zmfY* zY?5!(ZBaIJKN546n@1O011K*9AY5gsvP|MO-9 zt3)j%_^8(zO+=W4&-RJrvJj(!gx=9zzA$Y2)S_NV(NGiWk02dbo!m2QR*u)&o=@h& zc(x{pM+nZ@I`OnJ`4r~VkXyJ@@209a*hWc9x9Z$zraf%_)sstSFdvw`42QfOmHmnw z%gNpaZ|&i>!Y68%8)UOyUl6a4Xha+?O^29PW9L{W7S`lrJpdPB_D$!>a?<(N?6wP(s8CRPwzt1QRETMge zd_GQW1Z1CUo{SThKe~pk(k9O&U_ajkLKb!J0Kd`nfuGyMG=5{o3ED0`FVLzMMw;hf??5v+)5jjT@4HcZ{_ z2wwxf8pIr~JOs}9!s%KF_bmYDDXj-(R;>y@7j{oIXCOA>yTuT}owB0CGiln-apQvO zJ?%jL&4_l7G*+Nq{;?!d7QsUx4y)KHIGPbIY!qW>uy9@<>JlVf8};Uwm&30sCFiDd zI*S_?w66hAD?3&5l%u&O78Nz}qO2k1mv!z_FWTET%o?i*^K`CgLt0EA5Nm-h8}^4s zDVDxQ`6Q4^u<9)L)1yU}Xc!9mBecgFizF>86k=T}J~0pp3#GzKr1R@N%@Ajjq~t!s zN<0A`<{C@SJ1Gz&7jYO$Jqf)0592S5<0>I^$Jraj3A~Kpq3{Ptk8TrP2m!lp zlc)-uuyCOLLQ@FLTVqe^24pl}!VbO*IZB;21y=i?6cbeM8sd^te3M&-03xUdg1%D} zGp{HPSA58c1(7j!)Rh6Gt2!3szs@|yu1B;&=A66vIv2jfDQPd$X{_5~%K4&4u;&MV z*;9(=Q?5*^5%wA);ganBKdIiWN2 zM`%9I+MO%_)0SkZ+goH;IYv(s1;tb3!>Dvx$w{u zm}M?1q4J=bZFUQj)2?GObt%fnnHcE}jo7i0Q!@^!=Yn70-v->66hOa+zBv}a9RdsB z12Xe9??AkElXJg` zayrU{+0bhVJkW~V?_u?z>^0t=<0ajJey1*<0`efPH@)P}_fQqMM@o4JZVK>A9pRe& zdslqZmM14z&z0bu66cb+><_S~I|nh*Df8W^hfGLp3~!|MGJ0NOF{sx`TJnYwWU5SM zCv`3sN0qc!ke?GJeGyAGern}VgafASp*-f*i4owa-RKaY$-9tYnFb267aG-ouw=!> zs#IDnaODgGs0#^N%$7lT3zKFZfgbBwh(AW=ic}kCkl-|{Tb+YSPj#A3pv}?%xN8ds zZhko-c@UiiiChY4m8y}wgsUivhrT#tM1EXU%z4ol29|}agtG}{6k5JHbz}2nRfi2t zV@lO)Os54`;Y{^K`eb(K1kfx%PJ*f!@nP}BJuzKq1i9Luyx(PK#QAQ11!X(~kklgB z3AoUBH#Tcv4l1>B950;R3HjyxJ5NPL_($Y9m{2sdgiC7!KFPp=L}bqzuthB2aKpJK zl@?_dB$?;bQS#*lWo>RuVpyg^hp2`VJGN5eFM0w_&sZ(DS;SVQ8V7fQ=Th?%tGN>} z5ZD3XcKhKts9ql7^nTp>US6f{5*#~pR*o9Y-hP?=9s6-6Vfw-9c2um<`&zpXozhr^ zS2V?VCO#yBmMfkpR99{#vws4~{qDEz(#?-V93r)MC8(AM@g88-$W2DRqb>>(-3u|$ zc^d~B-^m)Ay{s~iXVz&0t4Ooe1>1dK;x5!3nv?Ngg>>F_;-Fbf4foE3N3+FOe_u4e z@wvv__FeCz73L~ThI~?Co(kZ_FsDqXjCCX%v?c%k+q5?3TudD=Th%Au9|?S}v|%G) z^aspG)BYgx&rFc)$F`u^dl~ZQy$pFz2H!J5(3}zxP$_!(y%KrWMmg)gt2|zVrcYu* zMpp2pzo{nr6(t0av>29Ux7B-E7fAe*{IMLZk<*9cdd-G1`u72WBom;wu%#5z#SdVppIF@^{t4l$o&Vbg4r-tta(05uZRx zBw^!0-60tH2!->z_+I1@5^*CS@7Xf~+{QG4THHFCh5aKb$bq@=)J~^r`hI{;*hcEy z3pYj{uWiWU!6+W$V=0;5X4Za<)~zVwYwHk=t`05#H}pHh9=qv|hjf5h}J)$Po{ z4EU7|*#)AxBKxx;bz}PVK!rF}O!kL|MdP~E!|4-d@MFjA{6S+S8dF)iW#6WB`MASKC*#_Uw;~-_EkI!C#QdHA4rIe$%!yMZb!T!h2y1 zQKR@Z2T@I*R~X8{w12c(DB<_!x|c%(B2uSHjjnknt{rL`c}p?e9%l#1UI~v|Su$v* zq6Zz@G`9)l;I|W+BWrz5fya(6D=mM4KEF7_ygM4tK>tF7^^5vBpT9r};EaA*lGZUZ zfH^VD+()D=9K!T9mnn%^O2Fpkzesmcf}sArCkFzf91{p@;(X*4dw=#&pXP`gFUuYtg9nq zKQEVhX|v$fAaekG^ey*<0Mzk7?Tfu-LwivJqsRIYrV(V+h~+K9CO2JOj9aT{Nij&B zRbe5my(Ap-dzO`R51d}f&cvriD#oeHOaswLQZdzu77P_4I!N$reWEhrwP;cBgJiKv z?Z2$)eG;J0s%g-FjfbjgSbt3_-LT$?;=MCB^ln|05TJZLQlOfYgtD4s#msPRhA2+J zre6*$3J4{d5nq!zk36q7UsTVgSlU+f>r=Cw{IQcFK?W#l*1AI^c+T7tWmfut!hBI`b-0l29ie4kLlzT|=MQGl33~GC zsy;L3OzJV<1G+A3*IF30F$9jKZQpV#&>>C@?SEDc6(TwO_5nj7C3#JB5?9U_Zhf*R zEe4(oX4OlkDa$x~@0+`qb(I&R{JDXLSI7xhU3oN>PVl!Z^Nr88MhNu<*|3{PN5s!@+$)kitnTVO(#oZ--r|vQb=#Io%d}uf zi&1PgMj&RldeTk8*yQ7@nd3{cK6lz{Pz>1ym!iOz$kEeOFTT6EmijnEnX}M@+|QKV z5d{xZjLzzvY4V(a#zfqoa|OjJsW=>7Gc6su=FYkNVYGq0eMNRv_1N9jC@)hXN{PXE zI4b#K*_1j(sWt*daG<`y#_X6s$11Bd-Chv)t-$AL_`TP$#}+HlN-2ucX|zwmb@8@Mo;5`4Bl`=4pc~=On9|TVQO!g8Pz(KNU94N+oor1fZslt?d3Xls`avGOw zmfPm8wP>sB?vaX`1>^u(Ubu*Nerry5rWMC?WDgP4iEFX55ShzasJp{S3K9q4Be?Lw zX;^`Fd#u)2D-h*(;6#in-n-C}eLxA~yzBgRg5 zZy$m#`jzhPdPhl_zA{w=Stlgun>KMBjFu)OC_|eREqyM_G~ zyZ(KA4>TeNiHYEXNOZ8t|GA&!U+s?nfZr+p2Ca0k$p2^2?%&k=_&v~>?r*U2UE}|> z-2VX{FoHI9WS}Zo|K>$?asKB2(e3*O&&&2N#e&F;DhiNLIMDyv2J_cL{%@D&-*Vpr ze{=jT^idD%Z|(m#i2nzi5&9c+)WatKA1C{7>V5nk=qUQ`o!I||zPiMZ7`)&7>!SDv zq$&273uDukm6B5Y@0a~cHCk$~2iiN0@_y0(L+avhDUgXi!Qblt=@b7$)(1+7|D_(( Uq>qM%1%U-2_3ku_rTzl`A8jtx4*&oF diff --git a/source/lambda/job/dep/llm_bot_dep/loaders/auto.py b/source/lambda/job/dep/llm_bot_dep/loaders/auto.py index 418f2468..22c244b1 100644 --- a/source/lambda/job/dep/llm_bot_dep/loaders/auto.py +++ b/source/lambda/job/dep/llm_bot_dep/loaders/auto.py @@ -1,5 +1,3 @@ - - from llm_bot_dep.loaders.docx import process_doc from llm_bot_dep.loaders.markdown import process_md from .text import process_text diff --git a/source/lambda/job/dep/llm_bot_dep/loaders/csv.py b/source/lambda/job/dep/llm_bot_dep/loaders/csv.py index 8df30dd3..e6625505 100644 --- a/source/lambda/job/dep/llm_bot_dep/loaders/csv.py +++ b/source/lambda/job/dep/llm_bot_dep/loaders/csv.py @@ -9,13 +9,14 @@ from langchain.document_loaders.csv_loader import CSVLoader from langchain.document_loaders.helpers import detect_file_encodings + class CustomCSVLoader(CSVLoader): """Load a `CSV` file into a list of Documents. Each document represents one row of the CSV file. The rows are converted into markdown format based on row_count. Output Example: - when row_count = 1, + when row_count = 1, page_document_1 contains: |index|name| |-|-| @@ -25,7 +26,7 @@ class CustomCSVLoader(CSVLoader): |-|-| |2|Demo2| - when row_count = 3, + when row_count = 3, page_document_1 contains: |index|name| |-|-| @@ -43,29 +44,35 @@ class CustomCSVLoader(CSVLoader): def __init__( self, file_path: str, + aws_path: str, source_column: Optional[str] = None, metadata_columns: Sequence[str] = (), csv_args: Optional[Dict] = None, encoding: Optional[str] = None, autodetect_encoding: bool = False, - row_count: int = 1 + row_count: int = 1, ): """ Args: file_path: The path to the CSV file. - source_column: The name of the column in the CSV file to use as the source. - Optional. Defaults to None. + source_column: The name of the column in the CSV file to use as the source. Optional. Defaults to None. metadata_columns: A sequence of column names to use as metadata. Optional. - csv_args: A dictionary of arguments to pass to the csv.DictReader. - Optional. Defaults to None. + csv_args: A dictionary of arguments to pass to the csv.DictReader. Optional. Defaults to None. encoding: The encoding of the CSV file. Optional. Defaults to None. autodetect_encoding: Whether to try to autodetect the file encoding. row_count: How many row in a page document. """ self.row_number = row_count - super().__init__(file_path, source_column, metadata_columns, - csv_args, encoding, autodetect_encoding) + self.aws_path = aws_path + super().__init__( + file_path, + source_column, + metadata_columns, + csv_args, + encoding, + autodetect_encoding, + ) def __read_file(self, csvfile: TextIOWrapper) -> List[Document]: docs = [] @@ -73,8 +80,6 @@ def __read_file(self, csvfile: TextIOWrapper) -> List[Document]: csv_reader = csv.DictReader(csvfile, **self.csv_args) counter = 0 for i, row in enumerate(csv_reader): - # print(f"i: {i}") - # print(f"row: {row}") try: source = ( row[self.source_column] @@ -112,13 +117,14 @@ def __read_file(self, csvfile: TextIOWrapper) -> List[Document]: content = header + "\n" + md_separator + "\n" + row_content print(f"markdown content: {content}") - metadata = {"source": source, "row": i} + metadata = {"source": source, "row": i, "file_path": self.aws_path} for col in self.metadata_columns: try: metadata[col] = row[col] except KeyError: raise ValueError( - f"Metadata column '{col}' not found in CSV file.") + f"Metadata column '{col}' not found in CSV file." + ) doc = Document(page_content=content, metadata=metadata) docs.append(doc) counter = 0 @@ -155,18 +161,20 @@ def load(self) -> List[Document]: return docs + def process_csv(s3, csv_content: str, **kwargs): now = datetime.now() timestamp_str = now.strftime("%Y%m%d%H%M%S") random_uuid = str(uuid.uuid4())[:8] - bucket_name = kwargs['bucket'] - key = kwargs['key'] - row_count = kwargs['csv_row_count'] - local_path = f'/tmp/csv-{timestamp_str}-{random_uuid}.csv' + bucket_name = kwargs["bucket"] + key = kwargs["key"] + row_count = kwargs["csv_row_count"] + local_path = f"/tmp/csv-{timestamp_str}-{random_uuid}.csv" s3.download_file(bucket_name, key, local_path) - loader = CustomCSVLoader(file_path=local_path, row_count=row_count) + loader = CustomCSVLoader( + file_path=local_path, aws_path=f"s3://{bucket_name}/{key}", row_count=row_count + ) data = loader.load() return data - diff --git a/source/lambda/job/dep/llm_bot_dep/loaders/docx.py b/source/lambda/job/dep/llm_bot_dep/loaders/docx.py index 793bd2f4..7b3398d0 100644 --- a/source/lambda/job/dep/llm_bot_dep/loaders/docx.py +++ b/source/lambda/job/dep/llm_bot_dep/loaders/docx.py @@ -28,11 +28,13 @@ class CustomDocLoader(BaseLoader): def __init__( self, file_path: str, + aws_path: str, encoding: Optional[str] = None, autodetect_encoding: bool = False, ): """Initialize with file path.""" self.file_path = file_path + self.aws_path = aws_path self.encoding = encoding self.autodetect_encoding = autodetect_encoding @@ -54,7 +56,7 @@ def clean_document(self, doc: pyDocument): def load(self) -> List[Document]: """Load from file path.""" - metadata = {"file_path": self.file_path, "file_type": "docx"} + metadata = {"file_path": self.aws_path, "file_type": "docx"} def _convert_image(image): # Images are excluded @@ -66,8 +68,8 @@ def _convert_image(image): with open(self.file_path, "rb") as docx_file: result = mammoth.convert_to_html(docx_file, convert_image=mammoth.images.img_element(_convert_image)) - html_content = result.value # The generated HTML - loader = CustomHtmlLoader() + html_content = result.value + loader = CustomHtmlLoader(aws_path=self.aws_path) doc = loader.load(html_content) doc.metadata = metadata @@ -83,7 +85,7 @@ def process_doc(s3, **kwargs): local_path = f'/tmp/doc-{timestamp_str}-{random_uuid}.docx' s3.download_file(bucket_name, key, local_path) - loader = CustomDocLoader(file_path=local_path) + loader = CustomDocLoader(file_path=local_path, aws_path=f"s3://{bucket_name}/{key}") doc = loader.load() splitter = MarkdownHeaderTextSplitter() doc_list = splitter.split_text(doc) diff --git a/source/lambda/job/dep/llm_bot_dep/loaders/html.py b/source/lambda/job/dep/llm_bot_dep/loaders/html.py index 007432dc..48376889 100644 --- a/source/lambda/job/dep/llm_bot_dep/loaders/html.py +++ b/source/lambda/job/dep/llm_bot_dep/loaders/html.py @@ -19,6 +19,12 @@ class CustomHtmlLoader(BaseLoader): different unstructured settings. """ + def __init__( + self, + aws_path: str + ): + """Initialize with file path.""" + self.aws_path = aws_path def clean_html(self, html_str: str) -> str: # Filter out DOCTYPE @@ -57,13 +63,15 @@ def load(self, file_content: str): file_content = self.clean_html(file_content) file_content = markdownify.markdownify(file_content, heading_style="ATX") doc = Document(page_content=file_content, - metadata={"file_type": "html"}) + metadata={"file_type": "html", "file_path": self.aws_path}) return doc def process_html(html_str: str, **kwargs): - loader = CustomHtmlLoader() + bucket_name = kwargs["bucket"] + key = kwargs["key"] + loader = CustomHtmlLoader(aws_path=f"s3://{bucket_name}/{key}") doc = loader.load(html_str) splitter = MarkdownHeaderTextSplitter() doc_list = splitter.split_text(doc) diff --git a/source/lambda/job/dep/llm_bot_dep/loaders/markdown.py b/source/lambda/job/dep/llm_bot_dep/loaders/markdown.py index 6c629102..cc6507b2 100644 --- a/source/lambda/job/dep/llm_bot_dep/loaders/markdown.py +++ b/source/lambda/job/dep/llm_bot_dep/loaders/markdown.py @@ -23,25 +23,26 @@ class CustomMarkdownLoader(BaseLoader): def __init__( self, - file_path: str, + aws_path: str, encoding: Optional[str] = None, autodetect_encoding: bool = False, ): """Initialize with file path.""" - self.file_path = file_path + self.aws_path = aws_path self.encoding = encoding self.autodetect_encoding = autodetect_encoding def load(self, content: str) -> Document: """Load from file path.""" - metadata = {"file_path": self.file_path, "file_type": "md"} + metadata = {"file_path": self.aws_path, "file_type": "md"} return Document(page_content=content, metadata=metadata) def process_md(file_content: str, **kwargs): - loader = CustomMarkdownLoader( - file_path=kwargs['bucket'] + "/" + kwargs['key']) + bucket = kwargs['bucket'] + key = kwargs['key'] + loader = CustomMarkdownLoader(aws_path=f"s3://{bucket}/{key}") doc = loader.load(file_content) splitter = MarkdownHeaderTextSplitter() doc_list = splitter.split_text(doc) diff --git a/source/lambda/job/dep/llm_bot_dep/loaders/text.py b/source/lambda/job/dep/llm_bot_dep/loaders/text.py index df8ffe1f..c01af896 100644 --- a/source/lambda/job/dep/llm_bot_dep/loaders/text.py +++ b/source/lambda/job/dep/llm_bot_dep/loaders/text.py @@ -33,7 +33,7 @@ def __init__( def load(self, text_content: str) -> List[Document]: """Load from file path.""" - metadata = {"source": self.file_path} + metadata = {"file_path": self.file_path, "file_type": "txt"} return [Document(page_content=text_content, metadata=metadata)] @@ -48,7 +48,9 @@ def pre_process_text(text_content: str) -> str: def process_text(file_content: str, **kwargs): clean_text = pre_process_text(file_content) - loader = CustomTextLoader(file_path=kwargs['bucket'] + "/" + kwargs['key']) + bucket = kwargs['bucket'] + key = kwargs['key'] + loader = CustomTextLoader(file_path=f"s3://{bucket}/{key}") data = loader.load(clean_text) return data From d3aeb74416221959847f389039fe992372adc960 Mon Sep 17 00:00:00 2001 From: yike5460 Date: Thu, 16 Nov 2023 08:44:04 +0000 Subject: [PATCH 33/37] feat: draft ui for image generation based on llm & sd --- source/panel/.env_sample | 3 + source/panel/.env_sd_sample | 5 + source/panel/image_generation.py | 195 +++++++++++++++++++++++++++++++ 3 files changed, 203 insertions(+) create mode 100644 source/panel/.env_sd_sample create mode 100644 source/panel/image_generation.py diff --git a/source/panel/.env_sample b/source/panel/.env_sample index 34c96a45..0b4ca339 100644 --- a/source/panel/.env_sample +++ b/source/panel/.env_sample @@ -1,3 +1,6 @@ PIPELINE_URL='Check from CDK output' S3_BUCKET_NAME='Check from CDK output' GLUE_JOB_NAME='Check from CDK output' + +OPENAI_API_KEY= +OPENAI_API_BASE='https://api.openai.com/v1' diff --git a/source/panel/.env_sd_sample b/source/panel/.env_sd_sample new file mode 100644 index 00000000..162f42a7 --- /dev/null +++ b/source/panel/.env_sd_sample @@ -0,0 +1,5 @@ +OPENAI_API_KEY= +OPENAI_API_BASE= + +API_KEY= +COMMON_API_URL= \ No newline at end of file diff --git a/source/panel/image_generation.py b/source/panel/image_generation.py new file mode 100644 index 00000000..e7ca684c --- /dev/null +++ b/source/panel/image_generation.py @@ -0,0 +1,195 @@ +import os +import boto3 +import json +import logging +import time +import json + +from langchain import PromptTemplate +from langchain.llms.bedrock import Bedrock +from typing import Any, Dict, Generator, Iterable, List, Optional, Tuple +from langchain.docstore.document import Document +from langchain.embeddings import OpenAIEmbeddings +from langchain.vectorstores import FAISS + +from dotenv import load_dotenv +# load .env file with specific name +load_dotenv(dotenv_path='.env_sd') + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +import streamlit as st +import requests +import time + +# load the URL from .env file +API_KEY = os.getenv("API_KEY") +# 'https://xxxx.execute-api.us-west-2.amazonaws.com/prod/' +COMMAND_API_URL = os.getenv("COMMON_API_URL") +GENERATE_API_URL = COMMAND_API_URL + "inference-api/inference" +STATUS_API_URL = COMMAND_API_URL + "inference/get-inference-job" +IMAGE_API_URL = COMMAND_API_URL + "inference/get-inference-job-param-output" + +def deploy_sagemaker_endpoint(): + headers = { + 'Content-Type': 'application/json', + 'Accept': 'application/json', + 'x-api-key': API_KEY + } + inputBody = { + "instance_type": "ml.g4dn.4xlarge", + "initial_instance_count": "1" + } + # https://.execute-api..amazonaws.com/{basePath}/inference/deploy-sagemaker-endpoint + res = requests.post(COMMAND_API_URL + 'inference/deploy-sagemaker-endpoint', headers = headers, json = inputBody) + logger.info("deploy_sagemaker_endpoint: {}".format(res.json())) + +def get_bedrock_client(): + # specify the profile_name to call the bedrock api if needed + bedrock_client = boto3.client('bedrock-runtime') + return bedrock_client + +def claude_template(initial_prompt: str, placeholder: str): + sd_prompt = PromptTemplate( + input_variables=["initial_prompt", "placeholder"], + template=""" + - Transform the input prompt {initial_prompt} into a detailed prompt for an image generation model, describing the scene with vivid and specific attributes that enhance the original concept, only adjective and noun are allowed, verb and adverb are not allowed, each words speperated by comma. + - Generate a negative prompt that specifies what should be avoided in the image, including any elements that contradict the desired style or tone. + - Recommend a list of suitable models from the stable diffusion lineup that best match the style and content described in the detailed prompt. + - Other notes please refer to {placeholder} + + The output should be a plain text in Python List format shown follows, no extra content added beside Positive Prompt, Negative Prompt and Recommended Model List. The model list can only be chosen from the fixed list: "sd_xl_base_1.0.safetensors", "majicmixRealistic_v7.safetensors", "x2AnimeFinal_gzku.safetensors": + + [Positive Prompt: , + Negative Prompt: , + Recommended Model List: ] + + For example: + If the input prompt is: "a cute dog in cartoon style", the output should be as follows: + [Positive Prompt: "visually appealing, high-quality image of a cute dog in a vibrant, cartoon style, adorable appearance, expressive eyes, friendly demeanor, colorful and lively, reminiscent of popular animation studios, artwork.", + Negative Prompt: "realism, dark or dull colors, scary or aggressive dog depictions, overly simplistic, stick figure drawings, blurry or distorted images, inappropriate or NSFW content.", + Recommended Model List: ["Stable-diffusion: LahCuteCartoonSDXL_alpha.safetensors", "Other model recommended..."]] + + {initial_prompt} + + """ + ) + # Pass in values to the input variables + prompt = sd_prompt.format(initial_prompt="a cute dog", placeholder="") + return prompt + +def get_llm_processed_prompts(initial_prompt): + # get the bedrock client + bedrock_client = get_bedrock_client() + + prompt = claude_template(initial_prompt, '') + prompt = "\n\nHuman:{}".format(prompt) + "\n\nAssistant:" + logger.debug("final prompt: {}".format(prompt)) + body = json.dumps({ + "prompt": prompt, + "temperature": 0.7, + "top_p": 1, + "top_k": 0, + "max_tokens_to_sample": 500, + "stop_sequences": ["\n\nHuman:"] + }) + # note v2 is not output chinese characters + modelId = "anthropic.claude-v2" + accept = "*/*" + contentType = "application/json" + response = bedrock_client.invoke_model( + body=body, modelId=modelId, accept=accept, contentType=contentType + ) + response_body = json.loads(response.get("body").read()) + raw_completion = response_body.get("completion").split('\n') + logger.info("raw_completion: {}".format(raw_completion)) + + # TODO: extract positive prompt, negative prompt and model list from the raw_completion + + logger.info("positive_prompt: {}".format(positive_prompt)) + logger.info("negative_prompt: {}".format(negative_prompt)) + logger.info("model_list: {}".format(model_list)) + return positive_prompt, negative_prompt, model_list + +def generate_image(positive_prompt, negative_prompt, model: List[str]): + # Construct the API request (this is a placeholder) + headers = { + "Content-Type": "application/json", + "Accept": "application/json", + 'x-api-key': API_KEY + } + body = { + "task_type": "txt2img", + "models": { + model + }, + "sagemaker_endpoint_name": '', + "prompt": positive_prompt, + "negative_prompt": negative_prompt, + "denoising_strength": 0.75 + } + + response = requests.post(COMMAND_API_URL + "inference-api/inference", headers = headers, json = body) + return response.json() + +def check_image_status(inference_id: str): + """Check the status of the image generation.""" + headers = { + 'Accept': 'application/json', + 'x-api-key': API_KEY + } + # TODO, the schema is not completed according to the API document + response = requests.get(GENERATE_API_URL, headers = headers) + return response.json() + +def get_image_url(inference_id): + """Get the URL of the generated image.""" + response = requests.get(f"{IMAGE_API_URL}/{inference_id}") + return response.json() + +def streamlit(): + # Streamlit layout + st.title("Image Generation Application") + + # User input + prompt = st.text_input("Enter a prompt for the image:", "A cute dog") + + # Button to start the image generation process + if st.button('Generate Image'): + positive_prompt, negative_prompt, model_list = get_llm_processed_prompts(prompt) + # Assuming the first model is chosen for simplicity + # chosen_model = model_list.split('\n')[0] + + # Generate the detailed prompt + response = generate_image(positive_prompt, negative_prompt, model_list) + + # Display image (placeholder for actual image retrieval logic) + st.image("https://picsum.photos/200", caption=positive_prompt) + + if response.status_code == 200: + inference_id = response.json()['inference_id'] + # Check the status periodically + with st.empty(): + while True: + status_response = check_image_status(inference_id) + if status_response['status'] == 'succeeded': + image_url = get_image_url(inference_id)['url'] + st.image(image_url) + break + elif status_response['status'] == 'failed': + st.error("Image generation failed.") + break + else: + st.text("Waiting for the image to be generated...") + time.sleep(5) # Sleep for a while before checking the status again + else: + st.error("Failed to start the image generation process.") + +# main entry point for debugging +if __name__ == "__main__": + # deploy_sagemaker_endpoint() + # get_llm_processed_prompts("a cute dog") + + # python -m streamlit run image-generation.py --server.port 8088 + streamlit() From 498c76da294fdc3da92adb450170fbcc8daa3f62 Mon Sep 17 00:00:00 2001 From: yike5460 Date: Thu, 16 Nov 2023 09:24:32 +0000 Subject: [PATCH 34/37] chore: tweak for endpoint create --- source/panel/image_generation.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/source/panel/image_generation.py b/source/panel/image_generation.py index e7ca684c..b0bb8624 100644 --- a/source/panel/image_generation.py +++ b/source/panel/image_generation.py @@ -31,20 +31,24 @@ STATUS_API_URL = COMMAND_API_URL + "inference/get-inference-job" IMAGE_API_URL = COMMAND_API_URL + "inference/get-inference-job-param-output" -def deploy_sagemaker_endpoint(): +def deploy_sagemaker_endpoint(instance_type: str = "ml.g4dn.4xlarge", initial_instance_count: int = 1, endpoint_name: str = "default-endpoint-for-llm-bot"): headers = { - 'Content-Type': 'application/json', - 'Accept': 'application/json', - 'x-api-key': API_KEY + 'Content-Type': 'application/json', + 'Accept': 'application/json', + 'x-api-key': API_KEY } inputBody = { - "instance_type": "ml.g4dn.4xlarge", - "initial_instance_count": "1" + "instance_type": instance_type, + "initial_instance_count": initial_instance_count, + "endpoint_name": endpoint_name } # https://.execute-api..amazonaws.com/{basePath}/inference/deploy-sagemaker-endpoint res = requests.post(COMMAND_API_URL + 'inference/deploy-sagemaker-endpoint', headers = headers, json = inputBody) logger.info("deploy_sagemaker_endpoint: {}".format(res.json())) +def upload_model(): + pass + def get_bedrock_client(): # specify the profile_name to call the bedrock api if needed bedrock_client = boto3.client('bedrock-runtime') @@ -112,7 +116,7 @@ def get_llm_processed_prompts(initial_prompt): logger.info("model_list: {}".format(model_list)) return positive_prompt, negative_prompt, model_list -def generate_image(positive_prompt, negative_prompt, model: List[str]): +def generate_image(endpoint_name: str, positive_prompt: str, negative_prompt: str, model: List[str]): # Construct the API request (this is a placeholder) headers = { "Content-Type": "application/json", @@ -124,7 +128,7 @@ def generate_image(positive_prompt, negative_prompt, model: List[str]): "models": { model }, - "sagemaker_endpoint_name": '', + "sagemaker_endpoint_name": endpoint_name, "prompt": positive_prompt, "negative_prompt": negative_prompt, "denoising_strength": 0.75 @@ -189,7 +193,11 @@ def streamlit(): # main entry point for debugging if __name__ == "__main__": # deploy_sagemaker_endpoint() - # get_llm_processed_prompts("a cute dog") + # upload_model() + positive_prompt, negative_prompt, model_list = get_llm_processed_prompts("a cute dog") + # The endpoint fixed for now, since the deploy_sagemaker_endpoint() won't return the endpoint name + response = generate_image("default-endpoint-for-llm-bot", positive_prompt, negative_prompt, model_list) + logger.info("generate image response: {}".format(response)) # python -m streamlit run image-generation.py --server.port 8088 - streamlit() + # streamlit() From 4fdbf07e89f7262996653c090d36201a0bbdfb3b Mon Sep 17 00:00:00 2001 From: Xu Han Date: Thu, 16 Nov 2023 09:34:24 +0000 Subject: [PATCH 35/37] feat: Add sagemaker endpoint inference code for nougat. --- source/model/etl/code/etl_model.tar.gz | Bin 0 -> 2111 bytes source/model/etl/code/model.py | 144 +++++++++++++++++++++++ source/model/etl/code/requirements.txt | 7 ++ source/model/etl/code/serving.properties | 2 + 4 files changed, 153 insertions(+) create mode 100644 source/model/etl/code/etl_model.tar.gz create mode 100644 source/model/etl/code/model.py create mode 100644 source/model/etl/code/requirements.txt create mode 100644 source/model/etl/code/serving.properties diff --git a/source/model/etl/code/etl_model.tar.gz b/source/model/etl/code/etl_model.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..82eca42c2636b8dd2273e5b50819790c3b4dd680 GIT binary patch literal 2111 zcmV-F2*CFriwFP!000001ML~xZrnE3&+`?83d5ShlILoB2hajdb4XDa&up`UO;Ri{gfSAS!*jop&Ur#JzuX*s#xa6% zem=wJ_+mclKJj%jzL*`2XYi?ULu#_Bel-h(vrG{>0nUf@RyNf@wdE4*~-o6EzJV^<>l)aKMXY-6(q1k z{S{T;>oqqFizugII2;V*6d*K@ew;B{DA$ow2ROeBe50p0e*5Lu-+uWQ`~h6b1@#)T9gu(b08#DW&v{A4 zTQGgdE^FJc+xRAX;G66aLDD=b6pQx~V!8*>ZP;ymvpw+5b_k(e#*^%y``Kc?2ia}d zZG7i@;5+Yo*RxOKT^D%Yc~hLa&hbCIz=yyNd6(Hr7gYo4D8msVzZh%heJeW-X zc=piG^03e_1n(bZ=%mAtcl^pbe&ZddC&Nb{crnURVzX#8C#VGSa_U394Gl}~GhKiJ zQ-@B;-{>Z2nLQ=Xc(Mulm224GGwVJpQtqaLId5L^HQSNN9oij<2x zLhK+Gx^Wm1@2^;Xo9cUYPvc5~GSTcmnN2^DnCIYL=x6X++)`*e@VhK&%u+@Za{c^8 z!_2El+$8)Cl)yzEDSx1GePk_UVaWh*pCBp9Y?RApThyl~lYbqEfE8^2;P8o|jfx=7FQf47b8U zZi#w18Tn&>-Z_SrbBaQ=78;%sr%wnVoR*fJ z<@noD)N34n&w#LjZ)iL32Rj1?w1ctJw3)uB3bz4x!Z0$K@Ja<8`0JO~S9%@Xb$eaW zdlkGDb-KVYd_vui*EGHfjnBq#qH?z!xdt?}2i*wj(EB#ZGMZeQnF*PlQKh50Ct&Kl zHukn=Nh2^rV*1UR1!N4O%x~Fz#HjO)+;vz#5ierk1wX>c2U415$lXoIcZSA0>u9Yd z3c=l6o4>lbD}hAHiU<=&Xup5`_03%bQP#H7`dO=kL}bNoY0(B2QU}vOE3G7YUKN8} zATw$Q0~bA8^Tccnzg;s)(yEB{H$kHW(OQJ3Hb^Dxz_9)9?W^xeB{A{<<`pX_^!^Zj zMN3wrMMfI>Ci8T8Uvr>!$rqRo+~o!mW{snpwyaxfNNb%1WWpr_ z@x_LKDet&Q2&`8*(3QT%(P)_7LxKe@ofs3JXwgn!7rf=XX2_uJc{4%kCOL033`k~B zXbf1Y@77e%exe22W;~AmLykrDzkANB7#V#JQZxp6&STAe?V$3MP&l1|dqCcju#TFG znidm6bVF9q)|B9Ck{Uq)OHa4t3NbuEY>kaR2iAXE zIki0QxGkwk?9Sqr29)ln2TP~cab1sg&>NWQu?8)JaT#7hLBi3OQBW21W|k7O?lT!e zps0;I>~;ff+o)y27z^x<6;)v*!w-@dUA5GyMV~UFchLFP!a%>FZPq;?7?5_!_9WY? zXn?mr*-iEis}6?i*01c{lWoAe)pOB{6sy!OmD+Us7vaW0bx-xsDAsw3-&t&t)WDBD z8=DJ6I2T1p(x~2`9}fQPyU)WoIui8fiV13}x4KuKgT^1<|BptK$<*HeU!0F7N2Bo! zRuA|8Ut;JsG6=^0$BHs-kVuH5@FDdaLgsQ@uZ=g)ccI6}MP| zpjMP{^no+Ur>F6;_#e&Zb^K4q=d%lp|LJ&oi2pA!XaOAv4X$-;L&WQY19W$i3#jP= pFC?-sp({aYF#fZdJ&eORjKesL!#Iq?IE?>!{0 str: + """Executes the `nougat` command to convert the specified PDF file to Markdown format. + + Args: + file_path (Path): The path to the PDF file to be converted. + + Returns: + str: The Markdown content resulting from the `nougat` conversion. + """ + # nougat ./paperSnapshot.pdf --full-precision --markdown -m 0.1.0-base -o tmp --recompute + cli_command = ["nougat", str(file_path), "full-precision", "--markdown", "-m", "0.1.0-base", "-o", "/tmp", "--recompute"] + + try: + result = subprocess.run( + cli_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True + ) + result.check_returncode() + return result.stdout + + except subprocess.CalledProcessError as e: + logger.info( + f"Nougat command failed with return code {e.returncode}: {e.stderr}" + ) + raise RuntimeError("Nougat command failed.") from e + +def process_pdf(bucket, object_key, destination_bucket, **kwargs): + """ + Process a given PDF file and extracts structured information from it. + + This function reads a PDF file, converts it to HTML using PDFMiner, then extracts + and structures the information into a list of dictionaries containing headings and content. + + Parameters: + s3 (boto3.client): The S3 client to use for downloading the PDF file. + pdf (bytes): The PDF file to process. + **kwargs: Arbitrary keyword arguments. The function expects 'bucket' and 'key' among the kwargs + to specify the S3 bucket and key where the PDF file is located. + + Returns: + list[Doucment]: A list of Document objects, each representing a semantically grouped section of the PDF file. Each Document object contains a metadata defined in metadata_template, and page_content string with the text content of that section. + """ + + local_path = str(os.path.basename(object_key)) + local_path = f"/tmp/{local_path}" + # download to local for futher processing + logger.info(f"Downloading {object_key} to {local_path}") + s3.download_file(Bucket=bucket, Key=object_key, Filename=local_path) + + nougat(local_path) + + # Rest of your code for reading and processing the output + file_path = Path(local_path) + output_path = Path("/tmp") / f"{file_path.stem}.mmd" + with output_path.open("r") as f: + content = f.read() + + filename = file_path.stem + destination_s3_path = upload_chunk_to_s3(content, destination_bucket, filename, "before-splitting") + + return destination_s3_path + + +def handle(inputs: Input): + + if inputs.is_empty(): + return None + data = inputs.get_as_json() + + bucket = data["s3_bucket"] + object_key = data["object_key"] + + destination_bucket = data["destination_bucket"] + + logging.info(f"Processing bucket: {bucket}, object_key: {object_key}") + + destination_path = process_pdf(bucket, object_key, destination_bucket) + + result = { + "destination_path": destination_path + } + + return Output().add_as_json(result) diff --git a/source/model/etl/code/requirements.txt b/source/model/etl/code/requirements.txt new file mode 100644 index 00000000..2ee0bac2 --- /dev/null +++ b/source/model/etl/code/requirements.txt @@ -0,0 +1,7 @@ +boto3==1.28.85 +torch==2.0.1 +opencv-contrib-python-headless==4.8.1.78 +nougat-ocr==0.1.17 +langchain==0.0.335 +unstructured==0.10.30 +unstructured[pdf] diff --git a/source/model/etl/code/serving.properties b/source/model/etl/code/serving.properties new file mode 100644 index 00000000..abafc0f9 --- /dev/null +++ b/source/model/etl/code/serving.properties @@ -0,0 +1,2 @@ +engine=Python +option.tensor_parallel_degree=1 \ No newline at end of file From a4fdabe722e0edaf94c32e86c1c75bc6e3c19e6e Mon Sep 17 00:00:00 2001 From: Xu Han Date: Thu, 16 Nov 2023 11:41:34 +0000 Subject: [PATCH 36/37] feat: add nougat inference in glue script --- source/infrastructure/lib/etl/etl-stack.ts | 1 + .../lambda/job/dep/llm_bot_dep/loaders/pdf.py | 86 ++++++++++++++++-- source/lambda/job/glue-job-script.py | 6 +- source/model/etl/code/etl_model.tar.gz | Bin 2111 -> 2107 bytes source/model/etl/code/model.py | 6 +- 5 files changed, 86 insertions(+), 13 deletions(-) diff --git a/source/infrastructure/lib/etl/etl-stack.ts b/source/infrastructure/lib/etl/etl-stack.ts index 5c3abb51..29d86ac9 100644 --- a/source/infrastructure/lib/etl/etl-stack.ts +++ b/source/infrastructure/lib/etl/etl-stack.ts @@ -122,6 +122,7 @@ export class EtlStack extends NestedStack { '--AOS_ENDPOINT': props._domainEndpoint, '--REGION': props._region, '--EMBEDDING_MODEL_ENDPOINT': props._embeddingEndpoint, + '--ETL_MODEL_ENDPOINT': 'test-etl-endpoint', '--DOC_INDEX_TABLE': 'chatbot-index', '--additional-python-modules': 'langchain==0.0.312,beautifulsoup4==4.12.2,requests-aws4auth==1.2.3,boto3==1.28.84,openai==0.28.1,pyOpenSSL==23.3.0,tenacity==8.2.3,markdownify==0.11.6,mammoth==1.6.0,chardet==5.2.0,python-docx==1.1.0,nltk==3.8.1,pdfminer.six==20221105', // add multiple extra python files diff --git a/source/lambda/job/dep/llm_bot_dep/loaders/pdf.py b/source/lambda/job/dep/llm_bot_dep/loaders/pdf.py index 29506139..c791902a 100644 --- a/source/lambda/job/dep/llm_bot_dep/loaders/pdf.py +++ b/source/lambda/job/dep/llm_bot_dep/loaders/pdf.py @@ -37,6 +37,32 @@ def detect_language(input): else: return 'en' +def invoke_etl_model(smr_client, etl_model_endpoint, bucket, key, res_bucket): + response_model = smr_client.invoke_endpoint( + EndpointName=etl_model_endpoint, + Body=json.dumps( + { + "s3_bucket": bucket, + "object_key": key, + "destination_bucket": res_bucket + } + ), + ContentType="application/json", + ) + + json_str = response_model['Body'].read().decode('utf8') + json_obj = json.loads(json_str) + markdown_prefix = json_obj['destination_prefix'] + + return markdown_prefix + +def load_content_from_s3(s3, bucket, key): + """ + This function loads the content of a file from S3 and returns it as a string. + """ + logger.info(f"Loading content from s3://{bucket}/{key}") + obj = s3.get_object(Bucket=bucket, Key=key) + return obj['Body'].read().decode('utf-8') def process_pdf(s3, pdf: bytes, **kwargs): """ @@ -57,6 +83,10 @@ def process_pdf(s3, pdf: bytes, **kwargs): logger.info("Processing PDF file...") bucket = kwargs['bucket'] key = kwargs['key'] + + etl_model_endpoint = kwargs.get('etl_model_endpoint', None) + smr_client = kwargs.get('smr_client', None) + res_bucket = kwargs.get('res_bucket', None) # extract file name also in consideration of file name with blank space local_path = str(os.path.basename(key)) # download to local for futher processing @@ -66,15 +96,55 @@ def process_pdf(s3, pdf: bytes, **kwargs): loader = PDFMinerPDFasHTMLLoader(local_path) # entire PDF is loaded as a single Document file_content = loader.load()[0].page_content - - loader = CustomHtmlLoader() - doc = loader.load(file_content) - splitter = MarkdownHeaderTextSplitter() - doc_list = splitter.split_text(doc) - for doc in doc_list: - doc.metadata = metadata_template - doc.metadata['file_path'] = f"s3://{bucket}/{key}" + detected_lang = detect_language(file_content) + logger.info(f"Detected language: {detected_lang}") + + if not etl_model_endpoint or not smr_client or not res_bucket: + logger.info("No ETL model endpoint or SageMaker Runtime client provided, using default PDF loader...") + loader = CustomHtmlLoader(aws_path=f"s3://{bucket}/{key}") + doc = loader.load(file_content) + splitter = MarkdownHeaderTextSplitter() + doc_list = splitter.split_text(doc) + + for doc in doc_list: + doc.metadata = metadata_template + doc.metadata['file_path'] = f"s3://{bucket}/{key}" + else: + if detected_lang == 'ch': + logger.info("Detected language is Chinese, using default PDF loader...") + loader = CustomHtmlLoader(aws_path=f"s3://{bucket}/{key}") + doc = loader.load(file_content) + splitter = MarkdownHeaderTextSplitter() + doc_list = splitter.split_text(doc) + + for doc in doc_list: + doc.metadata = metadata_template + doc.metadata['file_path'] = f"s3://{bucket}/{key}" + else: + logger.info("Detected language is English, using ETL model endpoint...") + markdown_prefix = invoke_etl_model(smr_client, etl_model_endpoint, bucket, key, res_bucket) + logger.info(f"Markdown file path: s3://{res_bucket}/{markdown_prefix}") + content = load_content_from_s3(s3, res_bucket, markdown_prefix) + + content = ( + content.replace(r"\(", "$") + .replace(r"\)", "$") + .replace(r"\[", "$$") + .replace(r"\]", "$$") + ) + + # extract headings hierarchically + headings = extract_headings(content) + + # assemble metadata from template + metadata = metadata_template + metadata["content_type"] = "paragraph" + metadata["heading_hierarchy"] = headings + metadata["chunk_id"] = "$$" + metadata["file_path"] = f"s3://{bucket}/{key}" + markdown_splitter = MarkdownHeaderTextSplitter() + doc_list = markdown_splitter.split_text(Document(page_content=content, metadata=metadata)) return doc_list diff --git a/source/lambda/job/glue-job-script.py b/source/lambda/job/glue-job-script.py index 3e472bfa..5574cd64 100644 --- a/source/lambda/job/glue-job-script.py +++ b/source/lambda/job/glue-job-script.py @@ -31,11 +31,12 @@ os.environ['NLTK_DATA'] = '/tmp/nltk_data' # Parse arguments -args = getResolvedOptions(sys.argv, ['JOB_NAME', 'S3_BUCKET', 'S3_PREFIX', 'AOS_ENDPOINT', 'EMBEDDING_MODEL_ENDPOINT', 'REGION', 'RES_BUCKET', 'OFFLINE', 'QA_ENHANCEMENT', 'BATCH_INDICE', 'ProcessedObjectsTable']) +args = getResolvedOptions(sys.argv, ['JOB_NAME', 'S3_BUCKET', 'S3_PREFIX', 'AOS_ENDPOINT', 'EMBEDDING_MODEL_ENDPOINT', 'ETL_MODEL_ENDPOINT', 'REGION', 'RES_BUCKET', 'OFFLINE', 'QA_ENHANCEMENT', 'BATCH_INDICE', 'ProcessedObjectsTable']) s3_bucket = args['S3_BUCKET'] s3_prefix = args['S3_PREFIX'] aosEndpoint = args['AOS_ENDPOINT'] embeddingModelEndpoint = args['EMBEDDING_MODEL_ENDPOINT'] +etlModelEndpoint = args['ETL_MODEL_ENDPOINT'] region = args['REGION'] res_bucket = args['RES_BUCKET'] offline = args['OFFLINE'] @@ -45,6 +46,7 @@ processedObjectsTable = args['ProcessedObjectsTable'] s3 = boto3.client('s3') +smr_client = boto3.client("sagemaker-runtime") dynamodb = boto3.resource('dynamodb') table = dynamodb.Table(processedObjectsTable) @@ -168,7 +170,7 @@ def iterate_s3_files(bucket: str, prefix: str) -> Generator: response = s3.get_object(Bucket=bucket, Key=key) file_content = response['Body'].read() # assemble bucket and key as args for the callback function - kwargs = {'bucket': bucket, 'key': key} + kwargs = {'bucket': bucket, 'key': key, 'etl_model_endpoint': etlModelEndpoint, 'smr_client': smr_client, 'res_bucket': res_bucket} if file_type == 'txt': yield 'txt', decode_file_content(file_content), kwargs diff --git a/source/model/etl/code/etl_model.tar.gz b/source/model/etl/code/etl_model.tar.gz index 82eca42c2636b8dd2273e5b50819790c3b4dd680..54b231ff21607db51952225dd0894ac165a03f84 100644 GIT binary patch literal 2107 zcmV-B2*mdviwFP!000001MOMsZrnB&_V0NLLWN;XVaapx*xmuOK+`yAuuW<-*&mA_ zP&E=Y!&nkklCnK(46xW|=+8b~pQPuIqAs4>Ea&|;UpHnD1Dybqt$WgVZ3RV39 z-ABIvpN>@UoFpHzuw1JZF9^#^E);oLl$AOqKU4~S2ZMT+OZ&9oicjrJ#+OT0EbVI& zDXLgb?YpcNrQk7@a-h-5NUbuquuwPfIvCVE{6!>L{4Fn1wsbRkN3+0wdHMRq4?|5y z1qm#uzohDWJ?DmD5#=-thl7Ef0)>{*k26LK+ofOt-%5~pNQ#kbVHl!B)0p?;BN#*_l|P_0XHt{sCRe1@#)(4#~fJgs67# z=e#82Et)=MSF~-~ZGMwI_)T_fLDD=b6pQ!T#B>kPZQ5;q=X>xw-?0hpGM;4r+|L%X zJz%$KxA|S{!SAB)UC%y^cU|rw!8YwSzpFj?U41I=`pX`m+q64EZnRI#jXs@!jrR)n z|0deU`-FRk*!O*#2KDb(MDVIG$*|=Psc^+BA@L!);|#uYPQZd|lJYE}Lh8lsf}p7> ze)CZ9dw1wV=1uX{b&mhx9p~P0;vIhniQa(c(JkFd#5xYDg8f`kO~w}=Xsp6px(1xo zc{G`RbN1NJ@-Wc|1kWE8=%gc%cl^pbe&ZddC&P^ovKZwkv7R@Y6I24cocd62LnD&= zOc$WQ)S*-IH@Xg5VNc03o~(m@03k^?+(+2_yr^V85IsR=C^&H2) zX8>&I8*JzOU}uzob}&$yp3)ap;Wh+M1V$kfUa6o%fAjL@TF-;KZqFWJJFJj;YKf*Bw@|PCK-3{bBL*t!wwAMQc!QEV&th$LR zfvm}j2opzWzkmJp?R^AM*7nW%w^j#<$dcXBq75uW2h%{`tR#9~6~n$@&!`;?T=Z-$ z6SFb=cEuz~t0LCl1dS48YZ;!}AeFEJ!}hzkuf8Xh#K;4hSFE7W=tKAwEv#mXf;9Lh z^K@lk1r{}uF$u?|lq5{UM}h$@#(2aEgji9X8l`4e$9^sMO(dW~DF}3<8aT_i-J?vM ze&8@+-ck4p38OPqWM2?RO#1JKp)&;%|^s4{2CCzHhNYRP@LCp%?WbCums>jW+6b zVr%{@@EYS5Yv@N!&cw|K6P?B)&5pDUxVaJ0X}D<%0^OeU_#=U#?U9&DHza4+X*LWX zE7N@A=Xqk3tS#PI?emfrt|MR}l!&w~5!M;NU_J_6m!kfeFCcv&{k5Bu<(oxD0QcAmQknC#VW~F-wV=_n8bKP}JHTcDsSL zZPa_h7z^x*B~@W0!w-@dUA5GyMIR}mcMiy6W}x5DHtU`S3`sj>dy;KcG{oDV>>_(% zRR_aO>sR)o>CrrFy47>uixjKWE{Zv?fB^fK;l@ODSM}Ja*4ajI<~v~Zf=$oT=FSj~ zLQ#@51~3rA!JmEIc^FTIpg&hkP*c#=gZdm8e|-Nx8cinC`u=}*c`=$CjmGCNd$|Aq z5<_>EK`{0wSN_#(pty)vK``;*ls3Ss6W@7Q0R&rk=6?u0Y+W4 zLvT#_^cWwD|Iut#$NzMEaej&MKOIjG@&6?TEubl(!Hte>hb4XDa&up`UO;Ri{gfSAS!*jop&Ur#JzuX*s#xa6% zem=wJ_+mclKJj%jzL*`2XYi?ULu#_Bel-h(vrG{>0nUf@RyNf@wdE4*~-o6EzJV^<>l)aKMXY-6(q1k z{S{T;>oqqFizugII2;V*6d*K@ew;B{DA$ow2ROeBe50p0e*5Lu-+uWQ`~h6b1@#)T9gu(b08#DW&v{A4 zTQGgdE^FJc+xRAX;G66aLDD=b6pQx~V!8*>ZP;ymvpw+5b_k(e#*^%y``Kc?2ia}d zZG7i@;5+Yo*RxOKT^D%Yc~hLa&hbCIz=yyNd6(Hr7gYo4D8msVzZh%heJeW-X zc=piG^03e_1n(bZ=%mAtcl^pbe&ZddC&Nb{crnURVzX#8C#VGSa_U394Gl}~GhKiJ zQ-@B;-{>Z2nLQ=Xc(Mulm224GGwVJpQtqaLId5L^HQSNN9oij<2x zLhK+Gx^Wm1@2^;Xo9cUYPvc5~GSTcmnN2^DnCIYL=x6X++)`*e@VhK&%u+@Za{c^8 z!_2El+$8)Cl)yzEDSx1GePk_UVaWh*pCBp9Y?RApThyl~lYbqEfE8^2;P8o|jfx=7FQf47b8U zZi#w18Tn&>-Z_SrbBaQ=78;%sr%wnVoR*fJ z<@noD)N34n&w#LjZ)iL32Rj1?w1ctJw3)uB3bz4x!Z0$K@Ja<8`0JO~S9%@Xb$eaW zdlkGDb-KVYd_vui*EGHfjnBq#qH?z!xdt?}2i*wj(EB#ZGMZeQnF*PlQKh50Ct&Kl zHukn=Nh2^rV*1UR1!N4O%x~Fz#HjO)+;vz#5ierk1wX>c2U415$lXoIcZSA0>u9Yd z3c=l6o4>lbD}hAHiU<=&Xup5`_03%bQP#H7`dO=kL}bNoY0(B2QU}vOE3G7YUKN8} zATw$Q0~bA8^Tccnzg;s)(yEB{H$kHW(OQJ3Hb^Dxz_9)9?W^xeB{A{<<`pX_^!^Zj zMN3wrMMfI>Ci8T8Uvr>!$rqRo+~o!mW{snpwyaxfNNb%1WWpr_ z@x_LKDet&Q2&`8*(3QT%(P)_7LxKe@ofs3JXwgn!7rf=XX2_uJc{4%kCOL033`k~B zXbf1Y@77e%exe22W;~AmLykrDzkANB7#V#JQZxp6&STAe?V$3MP&l1|dqCcju#TFG znidm6bVF9q)|B9Ck{Uq)OHa4t3NbuEY>kaR2iAXE zIki0QxGkwk?9Sqr29)ln2TP~cab1sg&>NWQu?8)JaT#7hLBi3OQBW21W|k7O?lT!e zps0;I>~;ff+o)y27z^x<6;)v*!w-@dUA5GyMV~UFchLFP!a%>FZPq;?7?5_!_9WY? zXn?mr*-iEis}6?i*01c{lWoAe)pOB{6sy!OmD+Us7vaW0bx-xsDAsw3-&t&t)WDBD z8=DJ6I2T1p(x~2`9}fQPyU)WoIui8fiV13}x4KuKgT^1<|BptK$<*HeU!0F7N2Bo! zRuA|8Ut;JsG6=^0$BHs-kVuH5@FDdaLgsQ@uZ=g)ccI6}MP| zpjMP{^no+Ur>F6;_#e&Zb^K4q=d%lp|LJ&oi2pA!XaOAv4X$-;L&WQY19W$i3#jP= pFC?-sp({aYF#fZdJ&eORjKesL!#Iq?IE?>!{0 Date: Thu, 16 Nov 2023 12:56:45 +0000 Subject: [PATCH 37/37] chore: update with langchain conversational class & tweak on prompt template to make stable output --- source/panel/image_generation.py | 100 ++++++++++++++++--------------- 1 file changed, 51 insertions(+), 49 deletions(-) diff --git a/source/panel/image_generation.py b/source/panel/image_generation.py index b0bb8624..245b24e8 100644 --- a/source/panel/image_generation.py +++ b/source/panel/image_generation.py @@ -5,12 +5,13 @@ import time import json -from langchain import PromptTemplate +from langchain.prompts import PromptTemplate from langchain.llms.bedrock import Bedrock from typing import Any, Dict, Generator, Iterable, List, Optional, Tuple from langchain.docstore.document import Document -from langchain.embeddings import OpenAIEmbeddings +from langchain.chains import ConversationChain from langchain.vectorstores import FAISS +from langchain.memory import ConversationBufferMemory from dotenv import load_dotenv # load .env file with specific name @@ -49,19 +50,25 @@ def deploy_sagemaker_endpoint(instance_type: str = "ml.g4dn.4xlarge", initial_in def upload_model(): pass -def get_bedrock_client(): +def get_bedrock_llm(): # specify the profile_name to call the bedrock api if needed bedrock_client = boto3.client('bedrock-runtime') - return bedrock_client -def claude_template(initial_prompt: str, placeholder: str): - sd_prompt = PromptTemplate( - input_variables=["initial_prompt", "placeholder"], - template=""" - - Transform the input prompt {initial_prompt} into a detailed prompt for an image generation model, describing the scene with vivid and specific attributes that enhance the original concept, only adjective and noun are allowed, verb and adverb are not allowed, each words speperated by comma. + modelId = "anthropic.claude-v2" + cl_llm = Bedrock( + model_id=modelId, + client=bedrock_client, + model_kwargs={"max_tokens_to_sample": 1000}, + ) + return cl_llm + +sd_prompt = PromptTemplate.from_template( + """ + Human: + - Transform the input prompt {input} into a detailed prompt for an image generation model, describing the scene with vivid and specific attributes that enhance the original concept, only adjective and noun are allowed, verb and adverb are not allowed, each words speperated by comma. - Generate a negative prompt that specifies what should be avoided in the image, including any elements that contradict the desired style or tone. - Recommend a list of suitable models from the stable diffusion lineup that best match the style and content described in the detailed prompt. - - Other notes please refer to {placeholder} + - Other notes please refer to the following example: The output should be a plain text in Python List format shown follows, no extra content added beside Positive Prompt, Negative Prompt and Recommended Model List. The model list can only be chosen from the fixed list: "sd_xl_base_1.0.safetensors", "majicmixRealistic_v7.safetensors", "x2AnimeFinal_gzku.safetensors": @@ -74,46 +81,41 @@ def claude_template(initial_prompt: str, placeholder: str): [Positive Prompt: "visually appealing, high-quality image of a cute dog in a vibrant, cartoon style, adorable appearance, expressive eyes, friendly demeanor, colorful and lively, reminiscent of popular animation studios, artwork.", Negative Prompt: "realism, dark or dull colors, scary or aggressive dog depictions, overly simplistic, stick figure drawings, blurry or distorted images, inappropriate or NSFW content.", Recommended Model List: ["Stable-diffusion: LahCuteCartoonSDXL_alpha.safetensors", "Other model recommended..."]] - - {initial_prompt} - - """ - ) - # Pass in values to the input variables - prompt = sd_prompt.format(initial_prompt="a cute dog", placeholder="") - return prompt + + Current conversation: + + {history} + + + Here is the human's next reply: + + {input} + + + Assistant: + """) def get_llm_processed_prompts(initial_prompt): - # get the bedrock client - bedrock_client = get_bedrock_client() - - prompt = claude_template(initial_prompt, '') - prompt = "\n\nHuman:{}".format(prompt) + "\n\nAssistant:" - logger.debug("final prompt: {}".format(prompt)) - body = json.dumps({ - "prompt": prompt, - "temperature": 0.7, - "top_p": 1, - "top_k": 0, - "max_tokens_to_sample": 500, - "stop_sequences": ["\n\nHuman:"] - }) - # note v2 is not output chinese characters - modelId = "anthropic.claude-v2" - accept = "*/*" - contentType = "application/json" - response = bedrock_client.invoke_model( - body=body, modelId=modelId, accept=accept, contentType=contentType + cl_llm = get_bedrock_llm() + memory = ConversationBufferMemory() + conversation = ConversationChain( + llm=cl_llm, verbose=False, memory=memory ) - response_body = json.loads(response.get("body").read()) - raw_completion = response_body.get("completion").split('\n') - logger.info("raw_completion: {}".format(raw_completion)) + + conversation.prompt = sd_prompt + response = conversation.predict(input=initial_prompt) + logger.info("the first invoke: {}".format(response)) + # logger.info("the second invoke: {}".format(conversation.predict(input="change to realist style"))) - # TODO: extract positive prompt, negative prompt and model list from the raw_completion - - logger.info("positive_prompt: {}".format(positive_prompt)) - logger.info("negative_prompt: {}".format(negative_prompt)) - logger.info("model_list: {}".format(model_list)) + """ + [Positive Prompt: visually appealing, high-quality image of a big, large, muscular horse with powerful body, majestic stance, flowing mane, detailed texture, vivid color, striking photography., + Negative Prompt: ugly, distorted, inappropriate or NSFW content, + Recommended Model List: ["sd_xl_base_1.0.safetensors"]] + """ + positive_prompt = response.split('Positive Prompt: ')[1].split('Negative Prompt: ')[0].strip() + negative_prompt = response.split('Negative Prompt: ')[1].split('Recommended Model List: ')[0].strip() + model_list = response.split('Recommended Model List: ')[1].strip().replace('[', '').replace(']', '').replace('"', '').split(',') + logger.info("positive_prompt: {}\n negative_prompt: {}\n model_list: {}".format(positive_prompt, negative_prompt, model_list)) return positive_prompt, negative_prompt, model_list def generate_image(endpoint_name: str, positive_prompt: str, negative_prompt: str, model: List[str]): @@ -194,10 +196,10 @@ def streamlit(): if __name__ == "__main__": # deploy_sagemaker_endpoint() # upload_model() - positive_prompt, negative_prompt, model_list = get_llm_processed_prompts("a cute dog") + positive_prompt, negative_prompt, model_list = get_llm_processed_prompts("a big horse") # The endpoint fixed for now, since the deploy_sagemaker_endpoint() won't return the endpoint name - response = generate_image("default-endpoint-for-llm-bot", positive_prompt, negative_prompt, model_list) - logger.info("generate image response: {}".format(response)) + # response = generate_image("default-endpoint-for-llm-bot", positive_prompt, negative_prompt, model_list) + # logger.info("generate image response: {}".format(response)) # python -m streamlit run image-generation.py --server.port 8088 # streamlit()