diff --git a/introduction_to_amazon_algorithms/jumpstart-foundation-models/question_answering_retrieval_augmented_generation/question_answering_langchain_jumpstart.ipynb b/introduction_to_amazon_algorithms/jumpstart-foundation-models/question_answering_retrieval_augmented_generation/question_answering_langchain_jumpstart.ipynb index a3edd26314..0a18fe4073 100644 --- a/introduction_to_amazon_algorithms/jumpstart-foundation-models/question_answering_retrieval_augmented_generation/question_answering_langchain_jumpstart.ipynb +++ b/introduction_to_amazon_algorithms/jumpstart-foundation-models/question_answering_retrieval_augmented_generation/question_answering_langchain_jumpstart.ipynb @@ -43,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "collapsed": false, "jupyter": { @@ -56,66 +56,22 @@ }, "outputs": [], "source": [ - "!pip install --upgrade sagemaker --quiet\n", - "!pip install ipywidgets==7.0.0 --quiet\n", - "!pip install langchain==0.0.148 --quiet\n", - "!pip install faiss-cpu --quiet" + "!pip install --upgrade sagemaker --quiet" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": { "tags": [] }, "outputs": [], "source": [ - "import time\n", - "import sagemaker, boto3, json\n", - "from sagemaker.session import Session\n", - "from sagemaker.model import Model\n", - "from sagemaker import image_uris, model_uris, script_uris, hyperparameters\n", - "from sagemaker.predictor import Predictor\n", + "from sagemaker import Session\n", "from sagemaker.utils import name_from_base\n", - "from typing import Any, Dict, List, Optional\n", - "from langchain.embeddings import SagemakerEndpointEmbeddings\n", - "from langchain.llms.sagemaker_endpoint import ContentHandlerBase\n", - "\n", - "sagemaker_session = Session()\n", - "aws_role = sagemaker_session.get_caller_identity_arn()\n", - "aws_region = boto3.Session().region_name\n", - "sess = sagemaker.Session()\n", - "model_version = \"1.*\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "def query_endpoint_with_json_payload(encoded_json, endpoint_name, content_type=\"application/json\"):\n", - " client = boto3.client(\"runtime.sagemaker\")\n", - " response = client.invoke_endpoint(\n", - " EndpointName=endpoint_name, ContentType=content_type, Body=encoded_json\n", - " )\n", - " return response\n", - "\n", + "from sagemaker.jumpstart.model import JumpStartModel\n", "\n", - "def parse_response_model_flan_t5(query_response):\n", - " model_predictions = json.loads(query_response[\"Body\"].read())\n", - " generated_text = model_predictions[\"generated_texts\"]\n", - " return generated_text\n", - "\n", - "\n", - "def parse_response_multiple_texts_bloomz(query_response):\n", - " generated_text = []\n", - " model_predictions = json.loads(query_response[\"Body\"].read())\n", - " for x in model_predictions[0]:\n", - " generated_text.append(x[\"generated_text\"])\n", - " return generated_text" + "sagemaker_session = Session()" ] }, { @@ -127,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": { "tags": [] }, @@ -135,30 +91,21 @@ "source": [ "_MODEL_CONFIG_ = {\n", " \"huggingface-text2text-flan-t5-xxl\": {\n", - " \"instance type\": \"ml.g5.12xlarge\",\n", - " \"env\": {\"SAGEMAKER_MODEL_SERVER_WORKERS\": \"1\", \"TS_DEFAULT_WORKERS_PER_MODEL\": \"1\"},\n", - " \"parse_function\": parse_response_model_flan_t5,\n", - " \"prompt\": \"\"\"Answer based on context:\\n\\n{context}\\n\\n{question}\"\"\",\n", - " },\n", - " \"huggingface-textembedding-gpt-j-6b\": {\n", - " \"instance type\": \"ml.g5.24xlarge\",\n", - " \"env\": {\"SAGEMAKER_MODEL_SERVER_WORKERS\": \"1\", \"TS_DEFAULT_WORKERS_PER_MODEL\": \"1\"},\n", + " \"model_version\": \"2.*\",\n", + " \"instance type\": \"ml.g5.12xlarge\"\n", " },\n", - " # \"huggingface-textgeneration1-bloomz-7b1-fp16\": {\n", - " # \"instance type\": \"ml.g5.12xlarge\",\n", - " # \"env\": {},\n", - " # \"parse_function\": parse_response_multiple_texts_bloomz,\n", - " # \"prompt\": \"\"\"question: \\\"{question}\"\\\\n\\nContext: \\\"{context}\"\\\\n\\nAnswer:\"\"\",\n", + " \"huggingface-textembedding-all-MiniLM-L6-v2\": {\n", + " \"model_version\": \"1.*\",\n", + " \"instance type\": \"ml.g5.24xlarge\"\n", + " }\n", + " # \"huggingface-textembedding-all-MiniLM-L6-v2\": {\n", + " # \"model_version\": \"3.*\",\n", + " # \"instance type\": \"ml.g5.12xlarge\"\n", " # },\n", " # \"huggingface-text2text-flan-ul2-bf16\": {\n", - " # \"instance type\": \"ml.g5.24xlarge\",\n", - " # \"env\": {\n", - " # \"SAGEMAKER_MODEL_SERVER_WORKERS\": \"1\",\n", - " # \"TS_DEFAULT_WORKERS_PER_MODEL\": \"1\"\n", - " # },\n", - " # \"parse_function\": parse_response_model_flan_t5,\n", - " # \"prompt\": \"\"\"Answer based on context:\\n\\n{context}\\n\\n{question}\"\"\",\n", - " # },\n", + " # \"model_version\": \"2.*\",\n", + " # \"instance type\": \"ml.g5.24xlarge\"\n", + " # }\n", "}" ] }, @@ -168,41 +115,32 @@ "metadata": {}, "outputs": [], "source": [ - "newline, bold, unbold = \"\\n\", \"\\033[1m\", \"\\033[0m\"\n", - "\n", "for model_id in _MODEL_CONFIG_:\n", - " endpoint_name = name_from_base(f\"jumpstart-example-raglc-{model_id}\")\n", - " inference_instance_type = _MODEL_CONFIG_[model_id][\"instance type\"]\n", - "\n", - " # Retrieve the inference container uri. This is the base HuggingFace container image for the default model above.\n", - " deploy_image_uri = image_uris.retrieve(\n", - " region=None,\n", - " framework=None, # automatically inferred from model_id\n", - " image_scope=\"inference\",\n", + " endpoint_name = name_from_base(f'jumpstart-example-raglc-{model_id}')\n", + " inference_instance_type = _MODEL_CONFIG_[model_id]['instance type']\n", + " model_version = _MODEL_CONFIG_[model_id]['model_version']\n", + "\n", + " print(f'Deploying {model_id}...')\n", + "\n", + " model = JumpStartModel(\n", " model_id=model_id,\n", - " model_version=model_version,\n", - " instance_type=inference_instance_type,\n", - " )\n", - " # Retrieve the model uri.\n", - " model_uri = model_uris.retrieve(\n", - " model_id=model_id, model_version=model_version, model_scope=\"inference\"\n", - " )\n", - " model_inference = Model(\n", - " image_uri=deploy_image_uri,\n", - " model_data=model_uri,\n", - " role=aws_role,\n", - " predictor_cls=Predictor,\n", - " name=endpoint_name,\n", - " env=_MODEL_CONFIG_[model_id][\"env\"],\n", + " model_version=model_version\n", " )\n", - " model_predictor_inference = model_inference.deploy(\n", - " initial_instance_count=1,\n", - " instance_type=inference_instance_type,\n", - " predictor_cls=Predictor,\n", - " endpoint_name=endpoint_name,\n", - " )\n", - " print(f\"{bold}Model {model_id} has been deployed successfully.{unbold}{newline}\")\n", - " _MODEL_CONFIG_[model_id][\"endpoint_name\"] = endpoint_name" + "\n", + " try:\n", + " predictor = model.deploy(\n", + " initial_instance_count=1,\n", + " instance_type=inference_instance_type,\n", + " endpoint_name=name_from_base(\n", + " f\"jumpstart-example-raglc-{model_id}\"\n", + " )\n", + " )\n", + " print(f\"Deployed endpoint: {predictor.endpoint_name}\")\n", + " _MODEL_CONFIG_[model_id]['predictor'] = predictor\n", + " except Exception as e:\n", + " print(f\"Error deploying {model_id}: {str(e)}\")\n", + "\n", + "print(\"Deployment process completed.\")" ] }, { @@ -216,7 +154,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -229,26 +167,16 @@ "metadata": {}, "outputs": [], "source": [ - "payload = {\n", - " \"text_inputs\": question,\n", - " \"max_length\": 100,\n", - " \"num_return_sequences\": 1,\n", - " \"top_k\": 50,\n", - " \"top_p\": 0.95,\n", - " \"do_sample\": True,\n", - "}\n", - "\n", "list_of_LLMs = list(_MODEL_CONFIG_.keys())\n", - "list_of_LLMs.remove(\"huggingface-textembedding-gpt-j-6b\") # remove the embedding model\n", - "\n", + "list_of_LLMs = [model for model in list_of_LLMs if \"textembedding\" not in model]\n", "\n", "for model_id in list_of_LLMs:\n", - " endpoint_name = _MODEL_CONFIG_[model_id][\"endpoint_name\"]\n", - " query_response = query_endpoint_with_json_payload(\n", - " json.dumps(payload).encode(\"utf-8\"), endpoint_name=endpoint_name\n", - " )\n", - " generated_texts = _MODEL_CONFIG_[model_id][\"parse_function\"](query_response)\n", - " print(f\"For model: {model_id}, the generated output is: {generated_texts[0]}\\n\")" + " predictor = _MODEL_CONFIG_[model_id][\"predictor\"]\n", + " response = predictor.predict({\n", + " \"inputs\": question\n", + " })\n", + " print(f\"For model: {model_id}, the generated output is:\\n\")\n", + " print(f\"{response[0]['generated_text']}\\n\")" ] }, { @@ -270,7 +198,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -283,31 +211,15 @@ "metadata": {}, "outputs": [], "source": [ - "parameters = {\n", - " \"max_length\": 200,\n", - " \"num_return_sequences\": 1,\n", - " \"top_k\": 250,\n", - " \"top_p\": 0.95,\n", - " \"do_sample\": False,\n", - " \"temperature\": 1,\n", - "}\n", + "prompt = f'Answer based on context:\\n\\n{context}\\n\\n{question}'\n", "\n", "for model_id in list_of_LLMs:\n", - " endpoint_name = _MODEL_CONFIG_[model_id][\"endpoint_name\"]\n", - "\n", - " prompt = _MODEL_CONFIG_[model_id][\"prompt\"]\n", - "\n", - " text_input = prompt.replace(\"{context}\", context)\n", - " text_input = text_input.replace(\"{question}\", question)\n", - " payload = {\"text_inputs\": text_input, **parameters}\n", - "\n", - " query_response = query_endpoint_with_json_payload(\n", - " json.dumps(payload).encode(\"utf-8\"), endpoint_name=endpoint_name\n", - " )\n", - " generated_texts = _MODEL_CONFIG_[model_id][\"parse_function\"](query_response)\n", - " print(\n", - " f\"{bold}For model: {model_id}, the generated output is: {generated_texts[0]}{unbold}{newline}\"\n", - " )" + " predictor = _MODEL_CONFIG_[model_id][\"predictor\"]\n", + " response = predictor.predict({\n", + " \"inputs\": prompt\n", + " })\n", + " print(f\"For model: {model_id}, the generated output is:\\n\")\n", + " print(f\"{response[0]['generated_text']}\\n\")" ] }, { @@ -358,7 +270,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": { "tags": [] }, @@ -405,9 +317,12 @@ "\n", "\n", "content_handler = ContentHandler()\n", + "endpoint_name=_MODEL_CONFIG_[\n", + " \"huggingface-textembedding-all-MiniLM-L6-v2\"\n", + " ][\"predictor\"].endpoint_name\n", "\n", "embeddings = SagemakerEndpointEmbeddingsJumpStart(\n", - " endpoint_name=_MODEL_CONFIG_[\"huggingface-textembedding-gpt-j-6b\"][\"endpoint_name\"],\n", + " endpoint_name=endpoint_name,\n", " region_name=aws_region,\n", " content_handler=content_handler,\n", ")" @@ -422,39 +337,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "from langchain.llms.sagemaker_endpoint import LLMContentHandler, SagemakerEndpoint\n", "\n", - "parameters = {\n", - " \"max_length\": 200,\n", - " \"num_return_sequences\": 1,\n", - " \"top_k\": 250,\n", - " \"top_p\": 0.95,\n", - " \"do_sample\": False,\n", - " \"temperature\": 1,\n", - "}\n", - "\n", - "\n", "class ContentHandler(LLMContentHandler):\n", " content_type = \"application/json\"\n", " accepts = \"application/json\"\n", "\n", " def transform_input(self, prompt: str, model_kwargs={}) -> bytes:\n", - " input_str = json.dumps({\"text_inputs\": prompt, **model_kwargs})\n", + " input_str = json.dumps({\"inputs\": prompt, **model_kwargs})\n", " return input_str.encode(\"utf-8\")\n", "\n", " def transform_output(self, output: bytes) -> str:\n", " response_json = json.loads(output.read().decode(\"utf-8\"))\n", - " return response_json[\"generated_texts\"][0]\n", + " return response_json[0][\"generated_text\"]\n", "\n", "\n", "content_handler = ContentHandler()\n", + "endpoint_name=_MODEL_CONFIG_[\n", + " \"huggingface-text2text-flan-t5-xxl\"\n", + " ][\"predictor\"].endpoint_name\n", + "\n", "\n", "sm_llm = SagemakerEndpoint(\n", - " endpoint_name=_MODEL_CONFIG_[\"huggingface-text2text-flan-t5-xxl\"][\"endpoint_name\"],\n", + " endpoint_name=endpoint_name,\n", " region_name=aws_region,\n", " model_kwargs=parameters,\n", " content_handler=content_handler,\n", @@ -493,7 +402,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": { "tags": [] }, @@ -521,7 +430,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": { "tags": [] }, @@ -543,7 +452,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": { "tags": [] }, @@ -554,7 +463,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": { "tags": [] }, @@ -580,7 +489,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "metadata": { "tags": [] }, @@ -591,7 +500,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "metadata": { "tags": [] }, @@ -628,7 +537,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 39, "metadata": {}, "outputs": [], "source": [ @@ -641,7 +550,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ @@ -675,7 +584,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 52, "metadata": {}, "outputs": [], "source": [ @@ -700,7 +609,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 54, "metadata": {}, "outputs": [], "source": [ @@ -734,7 +643,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 56, "metadata": {}, "outputs": [], "source": [ @@ -745,7 +654,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 57, "metadata": {}, "outputs": [], "source": [ @@ -761,7 +670,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 58, "metadata": {}, "outputs": [], "source": [ @@ -1384,9 +1293,9 @@ ], "instance_type": "ml.t3.medium", "kernelspec": { - "display_name": "Python 3 (Data Science 2.0)", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/sagemaker-data-science-38" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -1398,7 +1307,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.11.9" } }, "nbformat": 4,