From 50defb914749b3077ad9b5cba1c41e342015078c Mon Sep 17 00:00:00 2001
From: Bob Strahan <strahanr@amazon.com>
Date: Tue, 5 Sep 2023 15:22:03 +0000
Subject: [PATCH 01/14] Changed default value for
 'SummarizationLLMThirdPartyApiKey' to 'undefined'

---
 pca-main-nokendra.template | 4 ++--
 pca-main.template          | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pca-main-nokendra.template b/pca-main-nokendra.template
index 12e2aef6..050e106a 100644
--- a/pca-main-nokendra.template
+++ b/pca-main-nokendra.template
@@ -1,6 +1,6 @@
 AWSTemplateFormatVersion: "2010-09-09"
 
-Description: Amazon Transcribe Post Call Analytics - PCA (v0.7.0) (uksb-1sn29lk73)
+Description: Amazon Transcribe Post Call Analytics - PCA (v0.7.1) (uksb-1sn29lk73)
 
 Parameters:
 
@@ -409,7 +409,7 @@ Parameters:
     Type: String
     Description: >
       (Optional) If 'CallSummarization' or 'GenAIQuery' is ANTHROPIC, enter the provider API Key. ** Data will leave your AWS account **
-    Default: ''
+    Default: undefined
     NoEcho: true
 
   SummarizationLambdaFunctionArn:
diff --git a/pca-main.template b/pca-main.template
index b3640b97..ac458f68 100644
--- a/pca-main.template
+++ b/pca-main.template
@@ -1,6 +1,6 @@
 AWSTemplateFormatVersion: "2010-09-09"
 
-Description: Amazon Transcribe Post Call Analytics - PCA (v0.7.0) (uksb-1sn29lk73)
+Description: Amazon Transcribe Post Call Analytics - PCA (v0.7.1) (uksb-1sn29lk73)
 
 Parameters:
 
@@ -411,7 +411,7 @@ Parameters:
     Type: String
     Description: >
       (Optional) If 'CallSummarization' or 'GenAIQuery' is ANTHROPIC, enter the provider API Key. ** Data will leave your AWS account **
-    Default: ''
+    Default: undefined
     NoEcho: true
 
   SummarizationLambdaFunctionArn:

From 049a63359235d3f60f0b5e6cb947e7cd9c15d1a3 Mon Sep 17 00:00:00 2001
From: Bob Strahan <strahanr@amazon.com>
Date: Tue, 5 Sep 2023 15:22:13 +0000
Subject: [PATCH 02/14] v0.7.1

---
 CHANGELOG.md | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e3bc6b84..3003c34a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.7.1] - 2023-09-05
+### Fixed
+- Stack deploy failure (unable to create secret in SecretsManager) when SummarizationLLMThirdPartyApiKey is left empty. Changed default value to 'undefined'.
+
 ## [0.7.0] - 2023-09-01
 ### Added
 - Bedrock summarization support
@@ -123,7 +127,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 - Initial release
 
-[Unreleased]: https://github.com/aws-samples/amazon-transcribe-post-call-analytics/compare/v0.7.0...develop
+[Unreleased]: https://github.com/aws-samples/amazon-transcribe-post-call-analytics/compare/v0.7.1...develop
+[0.7.1]: https://github.com/aws-samples/amazon-transcribe-post-call-analytics/releases/tag/v0.7.0
 [0.7.0]: https://github.com/aws-samples/amazon-transcribe-post-call-analytics/releases/tag/v0.7.0
 [0.6.0]: https://github.com/aws-samples/amazon-transcribe-post-call-analytics/releases/tag/v0.6.0
 [0.5.2]: https://github.com/aws-samples/amazon-transcribe-post-call-analytics/releases/tag/v0.5.2

From 1f0283c32e52ac4497e3eadd3caf27167947ee96 Mon Sep 17 00:00:00 2001
From: Christopher Lott <cmlott@amazon.com>
Date: Thu, 28 Sep 2023 22:06:04 -0700
Subject: [PATCH 03/14] Bedrock GA support

---
 README.md                                  |  6 +--
 docs/generative_ai.md                      |  6 +--
 pca-boto3-bedrock/template.yaml            | 52 +---------------------
 pca-main.template                          | 28 ++++++------
 pca-server/src/pca/pca-aws-sf-summarize.py |  6 +--
 pca-ssm/cfn/ssm.template                   |  4 +-
 pca-ui/src/genai/index.py                  |  6 +--
 7 files changed, 29 insertions(+), 79 deletions(-)

diff --git a/README.md b/README.md
index 052ccbff..3947d861 100644
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@ PCA currently supports the following features:
     * Detects when caller and agent interrupt each other
     * Speaker loudness
 * **Generative AI**
-    * Abstractive call summarization using [HuggingFace bart-large-cnn-samsum](https://huggingface.co/philschmid/bart-large-cnn-samsum) deployed on Sagemaker, [Anthropic Claude](https://www.anthropic.com/index/introducing-claude) (which is coming to [Amazon Bedrock](https://aws.amazon.com/bedrock/)), or a user-defined custom AWS Lambda function.
+    * Abstractive call summarization using [HuggingFace bart-large-cnn-samsum](https://huggingface.co/philschmid/bart-large-cnn-samsum) deployed on Sagemaker, [Anthropic Claude](https://www.anthropic.com/index/introducing-claude) (which is also in [Amazon Bedrock](https://aws.amazon.com/bedrock/)), or a user-defined custom AWS Lambda function.
 * **Search**
     * Search on call attributes such as time range, sentiment, or entities
     * Search transcriptions
@@ -84,7 +84,7 @@ Once standard PCA processing is complete the telephony-specific CTR handler will
 
 ## (optional) Generative AI Call Summarization
 
-PCA contains a new step in the step functions that (if enabled) will generate a call summary. There are 4 choices for call summarization - Sagemaker Endpoint with HuggingFace bart-large-cnn-samsum, Amazon Bedrock (preview access only) Anthropic Claude, or a custom AWS Lambda function.  
+PCA contains a new step in the step functions that (if enabled) will generate a call summary. There are 4 choices for call summarization - Sagemaker Endpoint with HuggingFace bart-large-cnn-samsum, Amazon Bedrock, Anthropic Claude, or a custom AWS Lambda function.  
 
 Learn more about the features in the [Generative AI readme](./docs/generative_ai.md)
 
@@ -94,7 +94,7 @@ When deploying PCA, the CloudFormation parameter `CallSummarization` value defin
 
 If `DISABLED` is chosen, the PCA step function will bypass the summarization step.
 
-If `BEDROCK` is chosen, you must have access to the Amazon Bedrock service, currently in private preview. Also select the Bedrock model `SummarizationBedrockModelId` parameter. 
+If `BEDROCK` is chosen, you must select the Bedrock model `SummarizationBedrockModelId` parameter. 
 
 If `SAGEMAKER` is chosen, PCA will be deployed with the [HuggingFace bart-large-cnn-samsum](https://huggingface.co/philschmid/bart-large-cnn-samsum) model on a `ml.m5.xlarge` instance type. By default, it is deployed as a single instance count, defined by the `SummarizationSageMakerInitialInstanceCount` parameter. If `SummarizationSageMakerInitialInstanceCount` is set to `0`, the endpoint will be deployed as a [SageMaker Serverless Inference](https://docs.aws.amazon.com/sagemaker/latest/dg/serverless-endpoints.html) endpoint.
 
diff --git a/docs/generative_ai.md b/docs/generative_ai.md
index 88dfe76a..59157270 100644
--- a/docs/generative_ai.md
+++ b/docs/generative_ai.md
@@ -11,14 +11,14 @@ PCA also supports 'Generative AI Queries' - which simply means you can ask quest
 
 ## Generative AI Insights
 
-When enabled, PCA can run one or more FM inferences against Bedrock or Anthropic APIs. The prompt used to generate the insights is configured in a [AWS Systems Manager Parameter Store](https://docs.aws.amazon.com/systems-manager/latest/userguide/systems-manager-parameter-store.html). The name of the parameter is `LLMPromptSummaryTemplate`.
+When enabled, PCA can run one or more FM inferences against Amazon Bedrock or Anthropic APIs. The prompt used to generate the insights is configured in a [AWS Systems Manager Parameter Store](https://docs.aws.amazon.com/systems-manager/latest/userguide/systems-manager-parameter-store.html). The name of the parameter is `LLMPromptSummaryTemplate`.
 
 ### Single FM Inference
 
 The default value for the prompt parameter provides one single prompt:
 
 ```
-Human: Answer all the questions below as a json object with key value pairs, the key is provided, and answer as the value, based on the transcript. Only return json. 
+Human: Answer all the questions below as a json object with key value pairs, based on the transcript. Use the text before the colon as the key. Only return json. Use gender neutral pronouns. Skip the preamble; go straight into the json.
 <br><questions> 
 <br>Summary: Summarize the call. 
 <br>Topic: Topic of the call. Choose from one of these or make one up (iphone issue, billing issue, cancellation) 
@@ -31,7 +31,7 @@ Human: Answer all the questions below as a json object with key value pairs, the
 <br><transcript> 
 <br>{transcript} 
 <br></transcript> 
-<br>Assistant: Here is the JSON object with the answers to the questions:
+<br>Assistant: 
 ```
 
 The `<br>` tags are replaced with newlines, and  `{transcript}` is replaced with the call transcript.
diff --git a/pca-boto3-bedrock/template.yaml b/pca-boto3-bedrock/template.yaml
index 4d9dcd0b..da580338 100644
--- a/pca-boto3-bedrock/template.yaml
+++ b/pca-boto3-bedrock/template.yaml
@@ -3,13 +3,6 @@ Description: >
   PCA Bedrock Boto3 Lambda Layer. This will create an S3 bucket, download
   the Boto3 WHL file, and create a Lambda layer for use.
 
-Parameters:
-
-  BedrockPreviewSdkUrl:
-    Type: String
-    Default: https://d2eo22ngex1n9g.cloudfront.net/Documentation/SDK/bedrock-python-sdk.zip
-    Description: URL for the Bedrock SDK zip file (Bedrock preview access only)
-
 Resources:
 
   BedrockBoto3Bucket:
@@ -62,7 +55,6 @@ Resources:
       MemorySize: 512
       Environment:
         Variables:
-          SDK_DOWNLOAD_URL: !Ref BedrockPreviewSdkUrl
           BOTO3_BUCKET: !Ref BedrockBoto3Bucket
       Code:
         ZipFile: |
@@ -76,40 +68,13 @@ Resources:
           import urllib3
           from datetime import datetime
           import cfnresponse
-          bedrock_sdk_url = os.environ['SDK_DOWNLOAD_URL']
           boto3_bucket = os.environ['BOTO3_BUCKET']
 
-          def download_file_from_url(url, local_path):
-              """Download a file from a URL to a local save path."""
-              http = urllib3.PoolManager()
-              response = http.request('GET', url)
-              if response.status == 200:
-                  with open(local_path, 'wb') as file:
-                      file.write(response.data)
-                  print("File downloaded successfully.")
-              else:
-                  print("Failed to download the file.", response)
-
           def upload_file_to_s3(file_path, bucket, key):
               s3 = boto3.client('s3')
               s3.upload_file(file_path, bucket, key)
               print(f"Upload successful. {file_path} uploaded to {bucket}/{key}")
 
-          def extract_file_from_zip(zip_file_path, file_name):
-              with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
-                zip_ref.extract(file_name)
-                print(f"Successfully extracted {file_name} from {zip_file_path}")
-
-          def find_boto_wheels(zipname):
-            zipf = zipfile.ZipFile(zipname, 'r')
-            zip_files = zipf.namelist()
-            b = re.compile('boto3(.*)\.whl')
-            bc = re.compile('botocore(.*)\.whl')
-            boto3_whl_file = [ s for s in zip_files if b.match(s) ][0]
-            botocore_whl_file = [ s for s in zip_files if bc.match(s) ][0]
-            
-            return boto3_whl_file, botocore_whl_file
-
           def make_zip_filename():
             now = datetime.now()
             timestamp = now.strftime('%Y%m%d_%H%M%S')
@@ -141,20 +106,8 @@ Resources:
             try: 
               if event['RequestType'] != 'Delete':
                 os.chdir('/tmp')
-                # download Bedrock SDK
-                zip_file_name='bedrock-python-sdk.zip'
-                print(f"downloading from {bedrock_sdk_url} to {zip_file_name}")
-                download_file_from_url(bedrock_sdk_url, zip_file_name)
-                boto3_whl_file, botocore_whl_file = find_boto_wheels(zip_file_name)
-                extract_file_from_zip(zip_file_name, botocore_whl_file)
-                extract_file_from_zip(zip_file_name, boto3_whl_file)
-                if os.path.exists("python"):
-                  shutil.rmtree("python")
-                os.mkdir("python")
-                print(f"running pip install botocore")
-                subprocess.check_call([sys.executable, "-m", "pip", "install", botocore_whl_file, "-t", "python" ])
-                print(f"running pip install boto3")
-                subprocess.check_call([sys.executable, "-m", "pip", "install", boto3_whl_file, "-t", "python" ])
+                print(f"running pip install boto3==1.28.57")
+                subprocess.check_call([sys.executable, "-m", "pip", "install", "boto3==1.28.57", "-t", "python" ])
                 boto3_zip_name = make_zip_filename()
                 zipdir("python",boto3_zip_name)
                 print(f"uploading {boto3_zip_name} to s3 bucket {boto3_bucket}")
@@ -181,7 +134,6 @@ Resources:
     Properties:
       ServiceToken: !GetAtt BedrockBoto3ZipFunction.Arn
       # Rerun BedrockBoto3ZipFunction if any of the following parameters change
-      SDK_DOWNLOAD_URL: !Ref BedrockPreviewSdkUrl
       BOTO3_BUCKET: !Ref BedrockBoto3Bucket
 
   BedrockBoto3Layer:
diff --git a/pca-main.template b/pca-main.template
index ac458f68..a2600904 100644
--- a/pca-main.template
+++ b/pca-main.template
@@ -350,7 +350,7 @@ Parameters:
       steps required in Amazon QuickSight to (1) enable S3 access to PCA OutputBucket and (2) share dashboard and analytics assets.
 
   GenAIQuery:
-    Default: 'DISABLED'
+    Default: 'BEDROCK'
     Type: String
     AllowedValues:
       - 'DISABLED'
@@ -358,7 +358,7 @@ Parameters:
       - 'ANTHROPIC'
     Description: >
       If enabled, gives the ability to query an individual call for information. 
-      The BEDROCK option requires your account to have Amazon Bedrock preview access.
+      The BEDROCK option requires you to choose one of the supported model IDs from the provided list (GenAIQueryBedrockModelId).
       The ANTHROPIC option is a third party service, and you must enter your Anthropic API key in the 
       Third Party LLM API Key section.
 
@@ -366,14 +366,14 @@ Parameters:
     Type: String
     Default: anthropic.claude-v2
     AllowedValues:
-      - amazon.titan-tg1-large
+      - amazon.titan-text-express-v1
       - anthropic.claude-v1
       - anthropic.claude-instant-v1
       - anthropic.claude-v2
-    Description: (Optional) If 'GenAIQuery' is BEDROCK, which Bedrock model to use. (Bedrock preview access only)
+    Description: (Optional) If 'GenAIQuery' is BEDROCK, which Bedrock model to use.
 
   CallSummarization:
-    Default: 'DISABLED'
+    Default: 'BEDROCK'
     Type: String
     AllowedValues:
       - 'DISABLED'
@@ -382,23 +382,21 @@ Parameters:
       - 'ANTHROPIC'
       - 'LAMBDA'
     Description: >
-      Set to enable call summarization by a Large Language Model. The SAGEMAKER option uses a SageMaker endpoint with
-      the pretrained bart-large-cnn-samsum model with a ml.m5.xlarge instance type.  The LAMBDA option requires you 
-      to provide a function ARN below. The ANTHROPIC option is a third party service, and you must enter your 
-      Anthropic API key in the Third Party LLM API Key section. The BEDROCK option requires your account to have 
-      Amazon Bedrock preview access.
+      Set to enable call summarization by a Large Language Model. 
+      The BEDROCK option requires you to choose one of the supported model IDs from the provided list (SummarizationBedrockModelId).
+      The SAGEMAKER option uses a SageMaker endpoint with the pretrained bart-large-cnn-samsum model with a ml.m5.xlarge instance type.  
+      The LAMBDA option requires you to provide a function ARN below. 
+      The ANTHROPIC option is a third party service, and you must enter your Anthropic API key in the Third Party LLM API Key section. 
 
   SummarizationBedrockModelId:
     Type: String
-    Default: anthropic.claude-v2
+    Default: anthropic.claude-instant-v1
     AllowedValues:
-      - amazon.titan-tg1-large
-      #- ai21.j2-grande-instruct
-      #- ai21.j2-jumbo-instruct
+      - amazon.titan-text-express-v1
       - anthropic.claude-v1
       - anthropic.claude-instant-v1
       - anthropic.claude-v2
-    Description: (Optional) If 'CallSummarization' is BEDROCK, which Bedrock model to use. (Bedrock preview access only)
+    Description: (Optional) If 'CallSummarization' is BEDROCK, which Bedrock model to use. 
 
   SummarizationSageMakerInitialInstanceCount:
     Type: Number
diff --git a/pca-server/src/pca/pca-aws-sf-summarize.py b/pca-server/src/pca/pca-aws-sf-summarize.py
index b330110f..69e9d0da 100644
--- a/pca-server/src/pca/pca-aws-sf-summarize.py
+++ b/pca-server/src/pca/pca-aws-sf-summarize.py
@@ -22,8 +22,8 @@
 TOKEN_COUNT = int(os.getenv('TOKEN_COUNT', '0')) # default 0 - do not truncate.
 SUMMARY_LAMBDA_ARN = os.getenv('SUMMARY_LAMBDA_ARN','')
 FETCH_TRANSCRIPT_LAMBDA_ARN = os.getenv('FETCH_TRANSCRIPT_LAMBDA_ARN','')
-BEDROCK_MODEL_ID = os.environ.get("BEDROCK_MODEL_ID","amazon.titan-tg1-large")
-BEDROCK_ENDPOINT_URL = os.environ.get("ENDPOINT_URL", f'https://bedrock.{AWS_REGION}.amazonaws.com')
+BEDROCK_MODEL_ID = os.environ.get("BEDROCK_MODEL_ID","amazon.titan-text-express-v1")
+BEDROCK_ENDPOINT_URL = os.environ.get("ENDPOINT_URL", f'https://bedrock-runtime.{AWS_REGION}.amazonaws.com')
 
 MAX_TOKENS = int(os.getenv('MAX_TOKENS','256'))
 
@@ -45,7 +45,7 @@ def get_third_party_llm_secret():
 
 def get_bedrock_client():
     print("Connecting to Bedrock Service: ", BEDROCK_ENDPOINT_URL)
-    client = boto3.client(service_name='bedrock', region_name=AWS_REGION, endpoint_url=BEDROCK_ENDPOINT_URL)
+    client = boto3.client(service_name='bedrock-runtime', region_name=AWS_REGION, endpoint_url=BEDROCK_ENDPOINT_URL)
     return client
     
 def get_bedrock_request_body(modelId, parameters, prompt):
diff --git a/pca-ssm/cfn/ssm.template b/pca-ssm/cfn/ssm.template
index 3c448c08..39a5afa9 100644
--- a/pca-ssm/cfn/ssm.template
+++ b/pca-ssm/cfn/ssm.template
@@ -92,7 +92,7 @@ Parameters:
       a JSON Object with key/value pairs, where the LLM will run one inference on each key/value pair with the value
       containing the prompt. Use {transcript} as a placeholder for where the call transcript will be injected.
     Default: >-
-      Human: Answer all the questions below as a json object with key value pairs, the key is provided, and answer as the value, based on the transcript. Only return json.
+      Human: Answer all the questions below as a json object with key value pairs, based on the transcript. Use the text before the colon as the key. Only return json. Use gender neutral pronouns. Skip the preamble; go straight into the json.
       <br><questions>
       <br>Summary: Summarize the call.
       <br>Topic: Topic of the call. Choose from one of these or make one up (iphone issue, billing issue, cancellation)
@@ -105,7 +105,7 @@ Parameters:
       <br><transcript>
       <br>{transcript}
       <br></transcript>
-      <br>Assistant: Here is the JSON object with the answers to the questions: 
+      <br>Assistant: 
 
   LLMPromptQueryTemplate:
     Type: String
diff --git a/pca-ui/src/genai/index.py b/pca-ui/src/genai/index.py
index 833d1279..1b9719f1 100644
--- a/pca-ui/src/genai/index.py
+++ b/pca-ui/src/genai/index.py
@@ -21,8 +21,8 @@
 TOKEN_COUNT = int(os.getenv('TOKEN_COUNT', '0')) # default 0 - do not truncate.
 LLM_QUERY_LAMBDA_ARN = os.getenv('LLM_QUERY_LAMBDA_ARN','')
 FETCH_TRANSCRIPT_LAMBDA_ARN = os.getenv('FETCH_TRANSCRIPT_LAMBDA_ARN','')
-BEDROCK_MODEL_ID = os.getenv("BEDROCK_MODEL_ID","amazon.titan-tg1-large")
-BEDROCK_ENDPOINT_URL = os.getenv("ENDPOINT_URL", f'https://bedrock.{AWS_REGION}.amazonaws.com')
+BEDROCK_MODEL_ID = os.getenv("BEDROCK_MODEL_ID","amazon.text-express-v1")
+BEDROCK_ENDPOINT_URL = os.getenv("ENDPOINT_URL", f'https://bedrock-runtime.{AWS_REGION}.amazonaws.com')
 CONF_LLM_PROMPT_QUERY_TEMPLATE = os.getenv("CONF_LLM_PROMPT_QUERY_TEMPLATE","LLMPromptQueryTemplate")
 MAX_TOKENS = int(os.getenv('MAX_TOKENS','256'))
 
@@ -44,7 +44,7 @@ def get_third_party_llm_secret():
 
 def get_bedrock_client():
     print("Connecting to Bedrock Service: ", BEDROCK_ENDPOINT_URL)
-    client = boto3.client(service_name='bedrock', region_name=AWS_REGION, endpoint_url=BEDROCK_ENDPOINT_URL)
+    client = boto3.client(service_name='bedrock-runtime', region_name=AWS_REGION, endpoint_url=BEDROCK_ENDPOINT_URL)
     return client
     
 def get_bedrock_request_body(modelId, parameters, prompt):

From 936b5f8f7a4ba0617c1f85bd6310c76e87574599 Mon Sep 17 00:00:00 2001
From: Christopher Lott <cmlott@amazon.com>
Date: Sun, 1 Oct 2023 21:17:04 -0700
Subject: [PATCH 04/14] Adaptive retry for SSM parameters and Bedrock, updated
 prompts for Bedrock GA, refreshed generative_ai readme, changelog updated.

---
 CHANGELOG.md                                  |  7 ++
 README.md                                     |  4 +-
 VERSION                                       |  2 +-
 docs/generative_ai.md                         | 87 +++++++++++--------
 pca-main.template                             | 10 ++-
 .../pca/pca-aws-sf-start-transcribe-job.py    | 12 ++-
 pca-server/src/pca/pca-aws-sf-summarize.py    | 11 ++-
 pca-server/src/pca/pcaconfiguration.py        |  9 +-
 pca-ssm/cfn/ssm.template                      | 42 ++++-----
 9 files changed, 122 insertions(+), 62 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3003c34a..49cca674 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.7.2] - Unreleased
+### Fixed
+- Bedrock GA support
+- Prompt updates for Bedrock GA release, updated GenerativeAI readme
+- Links to the LLM Parameter Store Prompts from the CloudFormation Output
+- Adaptive retries for SSM GetParameter and InvokeModel to prevent throttling errors
+
 ## [0.7.1] - 2023-09-05
 ### Fixed
 - Stack deploy failure (unable to create secret in SecretsManager) when SummarizationLLMThirdPartyApiKey is left empty. Changed default value to 'undefined'.
diff --git a/README.md b/README.md
index 3947d861..6f11913a 100644
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@ PCA currently supports the following features:
     * Detects when caller and agent interrupt each other
     * Speaker loudness
 * **Generative AI**
-    * Abstractive call summarization using [HuggingFace bart-large-cnn-samsum](https://huggingface.co/philschmid/bart-large-cnn-samsum) deployed on Sagemaker, [Anthropic Claude](https://www.anthropic.com/index/introducing-claude) (which is also in [Amazon Bedrock](https://aws.amazon.com/bedrock/)), or a user-defined custom AWS Lambda function.
+    * Abstractive call summarization using [Amazon Bedrock](https://aws.amazon.com/bedrock/), [HuggingFace bart-large-cnn-samsum](https://huggingface.co/philschmid/bart-large-cnn-samsum) deployed on Sagemaker, [Anthropic Claude](https://www.anthropic.com/index/introducing-claude) (third party API), or a user-defined custom AWS Lambda function.
 * **Search**
     * Search on call attributes such as time range, sentiment, or entities
     * Search transcriptions
@@ -84,7 +84,7 @@ Once standard PCA processing is complete the telephony-specific CTR handler will
 
 ## (optional) Generative AI Call Summarization
 
-PCA contains a new step in the step functions that (if enabled) will generate a call summary. There are 4 choices for call summarization - Sagemaker Endpoint with HuggingFace bart-large-cnn-samsum, Amazon Bedrock, Anthropic Claude, or a custom AWS Lambda function.  
+PCA contains a new step in the step functions that (if enabled) will generate a call summary. There are 4 choices for call summarization - Amazon Bedrock, Sagemaker Endpoint with HuggingFace bart-large-cnn-samsum, Anthropic Claude, or a custom AWS Lambda function.
 
 Learn more about the features in the [Generative AI readme](./docs/generative_ai.md)
 
diff --git a/VERSION b/VERSION
index bcaffe19..d5cc44d1 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.7.0
\ No newline at end of file
+0.7.2
\ No newline at end of file
diff --git a/docs/generative_ai.md b/docs/generative_ai.md
index 59157270..4e190742 100644
--- a/docs/generative_ai.md
+++ b/docs/generative_ai.md
@@ -1,7 +1,7 @@
 # PCA and Generative AI
 
 Post-Call Analytics has an optional step in the step function workflow to generate insights with generative AI. 
-PCA supports [Amazon Bedrock](https://aws.amazon.com/bedrock/) (Titan or Anthropic models) and [Anthropic](https://www.anthropic.com/) (3rd party) foundational models (FMs). Customers may also write a Lambda function and provide PCA the ARN, and use any FM of their choice.
+PCA supports [Amazon Bedrock](https://aws.amazon.com/bedrock/) (Titan or Anthropic models) and [Anthropic](https://www.anthropic.com/) (3rd party) foundational models (FMs). Customers may also write a Lambda function and provide PCA the ARN, and use any FM of their choice. The prompts below are based on Anthropic's prompt formats. Learn more about prompt design at Anthropic's [Introduction to Prompt Design].(https://docs.anthropic.com/claude/docs/introduction-to-prompt-design). 
 
 PCA also supports 'Generative AI Queries' - which simply means you can ask questions about a specific call. These queries appear in a chat-like window from within the call details page.
 
@@ -13,32 +13,25 @@ PCA also supports 'Generative AI Queries' - which simply means you can ask quest
 
 When enabled, PCA can run one or more FM inferences against Amazon Bedrock or Anthropic APIs. The prompt used to generate the insights is configured in a [AWS Systems Manager Parameter Store](https://docs.aws.amazon.com/systems-manager/latest/userguide/systems-manager-parameter-store.html). The name of the parameter is `LLMPromptSummaryTemplate`.
 
-### Single FM Inference
+### Multiple inferences per call
+
+The default value for `LLMPromptSummaryTemplate` is a JSON object with key/value pairs, each pair representing the label (key) and prompt (value). During the `Summarize` step, PCA will iterate the keys and run each prompt. PCA will replace  `<br>` tags with newlines, and  `{transcript}` is replaced with the call transcript.  The key will be used as a header for the value in the "generated insights" section in the PCA UI.
 
-The default value for the prompt parameter provides one single prompt:
+Below is the default value of `LLMpromptSummaryTemplate`. 
 
 ```
-Human: Answer all the questions below as a json object with key value pairs, based on the transcript. Use the text before the colon as the key. Only return json. Use gender neutral pronouns. Skip the preamble; go straight into the json.
-<br><questions> 
-<br>Summary: Summarize the call. 
-<br>Topic: Topic of the call. Choose from one of these or make one up (iphone issue, billing issue, cancellation) 
-<br>Product: What product did the customer call about? (internet, broadband, mobile phone, mobile plans) 
-<br>Resolved: Did the agent resolve the customer's questions? (yes or no)  
-<br>Callback: Was this a callback? (yes or no)  
-<br>Politeness: Was the agent polite and professional? (yes or no) 
-<br>Actions: What actions did the Agent take?  
-<br></questions>  
-<br><transcript> 
-<br>{transcript} 
-<br></transcript> 
-<br>Assistant: 
+{
+  "Summary":"<br><br>Human: Answer the questions below, defined in <question></question> based on the transcript defined in <transcript></transcript>. If you cannot answer the question, reply with 'n/a'. Use gender neutral pronouns. When you reply, only respond with the answer.<br><br><question>What is a summary of the transcript?</question><br><br><transcript><br>{transcript}<br></transcript><br><br>Assistant:",
+  "Topic":"<br><br>Human: Answer the questions below, defined in <question></question> based on the transcript defined in <transcript></transcript>. If you cannot answer the question, reply with 'n/a'. Use gender neutral pronouns. When you reply, only respond with the answer.<br><br><question>What is the topic of the call? For example, iphone issue, billing issue, cancellation. Only reply with the topic, nothing more.</question><br><br><transcript><br>{transcript}<br></transcript><br><br>Assistant:",
+  "Product":"<br><br>Human: Answer the questions below, defined in <question></question> based on the transcript defined in <transcript></transcript>. If you cannot answer the question, reply with 'n/a'. Use gender neutral pronouns. When you reply, only respond with the answer.<br><br><question>What product did the customer call about? For example, internet, broadband, mobile phone, mobile plans. Only reply with the product, nothing more.</question><br><br><transcript><br>{transcript}<br></transcript><br><br>Assistant:",
+  "Resolved":"<br><br>Human: Answer the questions below, defined in <question></question> based on the transcript defined in <transcript></transcript>. If you cannot answer the question, reply with 'n/a'. Use gender neutral pronouns. When you reply, only respond with the answer.<br><br><question>Did the agent resolve the customer's questions? Only reply with yes or no, nothing more. </question><br><br><transcript><br>{transcript}<br></transcript><br><br>Assistant:",
+  "Callback":"<br><br>Human: Answer the questions below, defined in <question></question> based on the transcript defined in <transcript></transcript>. If you cannot answer the question, reply with 'n/a'. Use gender neutral pronouns. When you reply, only respond with the answer.<br><br><question>Was this a callback? (yes or no) Only reply with yes or no, nothing more.</question><br><br><transcript><br>{transcript}<br></transcript><br><br>Assistant:",
+  "Politeness":"<br><br>Human: Answer the question below, defined in <question></question> based on the transcript defined in <transcript></transcript>. If you cannot answer the question, reply with 'n/a'. Use gender neutral pronouns. When you reply, only respond with the answer.<br><br><question>Was the agent polite and professional? (yes or no) Only reply with yes or no, nothing more.</question><br><br><transcript><br>{transcript}<br></transcript><br><br>Assistant:",
+  "Actions":"<br><br>Human: Answer the question below, defined in <question></question> based on the transcript defined in <transcript></transcript>. If you cannot answer the question, reply with 'n/a'. Use gender neutral pronouns. When you reply, only respond with the answer.<br><br><question>What actions did the Agent take? </question><br><br><transcript><br>{transcript}<br></transcript><br><br>Assistant:"
+}
 ```
 
-The `<br>` tags are replaced with newlines, and  `{transcript}` is replaced with the call transcript.
-
-**Note:** This prompt generates 7 insights in a single inference - summary, topic, product, resolved, callback, agent politeness, and actions.
-
-The expected output of the inference should be a single JSON object with key-value pairs, similar to the below:
+The expected output after the summarize step is a single json object, as a string, that contains all the key/value pairs. For example:
 
 ```
 {
@@ -52,18 +45,37 @@ The expected output of the inference should be a single JSON object with key-val
 }
 ```
 
-### Multiple inferences per call
 
-If you would like to run individual inferences to generate the summary (for example, if you are using a fine-tuned FM for a specific inference, or your FM does not generate proper JSON), then you can change the prompt parameter input to be a JSON with key value pairs. The key will be the title in the generated insights section, and the value will be the prompt used to generate the value. Don't forget to add `{transcript}` to each prompt!
+### Single FM Inference
+
+Some LLMs may be able to generate the JSON with one inference, rather than several. Below is an example that we've seen work, but with mixed results. 
 
 ```
-{
-  "Summary":"Human: Summarize the following transcript:<br><transcript><br>{transcript}<br></transcript><br>Assistant:",
-  "Agent Politeness":"Human: Based on the following transcript, reply 'yes' if the agent was polite, or provide details if they were not polite.<br><transcript><br>{transcript}<br></transcript><br>Assistant:"
-}
+<br>
+<br>Human: Answer all the questions below, based on the contents of <transcript></transcript>, as a json object with key value pairs. Use the text before the colon as the key, and the answer as the value.  If you cannot answer the question, reply with 'n/a'. Only return json. Use gender neutral pronouns. Skip the preamble; go straight into the json.
+<br>
+<br><questions>
+<br>Summary: Summarize the transcript in no more than 5 sentences. Were the caller's needs met during the call?
+<br>Topic: Topic of the call. Choose from one of these or make one up (iphone issue, billing issue, cancellation)
+<br>Product: What product did the customer call about? (internet, broadband, mobile phone, mobile plans)
+<br>Resolved: Did the agent resolve the customer's questions? (yes or no) 
+<br>Callback: Was this a callback? (yes or no) 
+<br>Politeness: Was the agent polite and professional? (yes or no)
+<br>Actions: What actions did the Agent take? 
+<br></questions> 
+<br>
+<br><transcript>
+<br>{transcript}
+<br></transcript>
+<br>
+<br>Assistant:
 ```
 
-The expected output from the LLM is a single string that contains the value/answer. The key from the prompt definition will be used as the header in the UI.
+The `<br>` tags are replaced with newlines, and  `{transcript}` is replaced with the call transcript.
+
+**Note:** This prompt generates 7 insights in a single inference - summary, topic, product, resolved, callback, agent politeness, and actions.
+
+The expected output of the inference should be a single JSON object with key-value pairs, similar to above.
 
 ### Call list default columns
 
@@ -76,11 +88,18 @@ For interactive queries from within PCA, it uses a different parameter, named `L
 The default value is:
 
 ```
-Human: You are an AI chatbot. Carefully read the following transcript and then provide a short answer to the question. If the answer cannot be determined from the transcript or the context, then reply saying Sorry, I don't know.  
-<br><question>{question}</question> 
-<br><transcript> 
-<br>{transcript} 
-<br></transcript> 
+<br>
+<br>Human: You are an AI chatbot. Carefully read the following transcript within <transcript></transcript> 
+and then provide a short answer to the question. If the answer cannot be determined from the transcript or 
+the context, then reply saying Sorry, I don't know. Use gender neutral pronouns. Skip the preamble; when you reply, only 
+respond with the answer.
+<br>
+<br><question>{question}</question>
+<br>
+<br><transcript>
+<br>{transcript}
+<br></transcript>
+<br>
 <br>Assistant:
 ```
 
diff --git a/pca-main.template b/pca-main.template
index a2600904..d01d495b 100644
--- a/pca-main.template
+++ b/pca-main.template
@@ -1068,4 +1068,12 @@ Outputs:
 
   FetchTranscriptArn:
     Description: Lambda function arn that will generate a string of the entire transcript for custom Lambda functions to use.
-    Value: !GetAtt PCAServer.Outputs.FetchTranscriptArn
\ No newline at end of file
+    Value: !GetAtt PCAServer.Outputs.FetchTranscriptArn
+
+  LLMPromptSummaryTemplateParameter:
+    Description: The LLM summary prompt template in SSM Parameter Store - open to customise call summary prompts.
+    Value: !Sub "https://${AWS::Region}.console.aws.amazon.com/systems-manager/parameters/${SSM.Outputs.LLMPromptSummaryTemplateParameter}"
+  
+  LLMPromptQueryTemplateParameter:
+    Description: The LLM query prompt template in SSM Parameter Store - open to customise query prompts.
+    Value: !Sub "https://${AWS::Region}.console.aws.amazon.com/systems-manager/parameters/${SSM.Outputs.LLMPromptQueryTemplateParameter}"
\ No newline at end of file
diff --git a/pca-server/src/pca/pca-aws-sf-start-transcribe-job.py b/pca-server/src/pca/pca-aws-sf-start-transcribe-job.py
index 85854d81..8ebf0f52 100644
--- a/pca-server/src/pca/pca-aws-sf-start-transcribe-job.py
+++ b/pca-server/src/pca/pca-aws-sf-start-transcribe-job.py
@@ -9,14 +9,22 @@
 """
 import copy
 import boto3
+from botocore.config import Config
 import subprocess
 import pcaconfiguration as cf
 import pcacommon
 import os
+import time
 
 # Local temporary folder for file-based operations
 TMP_DIR = "/tmp/"
 
+config = Config(
+   retries = {
+      'max_attempts': 100,
+      'mode': 'adaptive'
+   }
+)
 
 def check_existing_job_status(job_name, transcribe, api_mode):
     """
@@ -55,6 +63,8 @@ def delete_existing_job(job_name, transcribe, api_mode):
             transcribe.delete_call_analytics_job(CallAnalyticsJobName=job_name)
         else:
             transcribe.delete_transcription_job(TranscriptionJobName=job_name)
+        # let the job process for a few seconds
+        time.sleep(5) 
     except Exception as e:
         # If the job has already been deleted then we don't need to take any action
         print(f"Unable to delete previous Transcribe job {job_name}: {e}")
@@ -147,7 +157,7 @@ def submitTranscribeJob(bucket, key):
     """
 
     # Work out our API mode for Transcribe, and get our boto3 client
-    transcribe = boto3.client('transcribe')
+    transcribe = boto3.client('transcribe', config=config)
     api_mode, channel_ident, base_model_name = evaluate_transcribe_mode(bucket, key)
 
     # Generate job-name - delete if it already exists
diff --git a/pca-server/src/pca/pca-aws-sf-summarize.py b/pca-server/src/pca/pca-aws-sf-summarize.py
index 69e9d0da..d967fbc9 100644
--- a/pca-server/src/pca/pca-aws-sf-summarize.py
+++ b/pca-server/src/pca/pca-aws-sf-summarize.py
@@ -13,6 +13,8 @@
 import re
 import requests
 from botocore.exceptions import ClientError
+from botocore.config import Config
+
 
 AWS_REGION = os.environ["AWS_REGION_OVERRIDE"] if "AWS_REGION_OVERRIDE" in os.environ else os.environ["AWS_REGION"]
 SUMMARIZE_TYPE = os.getenv('SUMMARY_TYPE', 'DISABLED')
@@ -31,6 +33,13 @@
 ssmClient = boto3.client("ssm")
 bedrock_client = None
 
+config = Config(
+   retries = {
+      'max_attempts': 100,
+      'mode': 'adaptive'
+   }
+)
+
 def get_third_party_llm_secret():
     print("Getting API key from Secrets Manager")
     secrets_client = boto3.client('secretsmanager')
@@ -45,7 +54,7 @@ def get_third_party_llm_secret():
 
 def get_bedrock_client():
     print("Connecting to Bedrock Service: ", BEDROCK_ENDPOINT_URL)
-    client = boto3.client(service_name='bedrock-runtime', region_name=AWS_REGION, endpoint_url=BEDROCK_ENDPOINT_URL)
+    client = boto3.client(service_name='bedrock-runtime', region_name=AWS_REGION, endpoint_url=BEDROCK_ENDPOINT_URL, config=config)
     return client
     
 def get_bedrock_request_body(modelId, parameters, prompt):
diff --git a/pca-server/src/pca/pcaconfiguration.py b/pca-server/src/pca/pcaconfiguration.py
index 5eb366f4..9f717cd7 100644
--- a/pca-server/src/pca/pcaconfiguration.py
+++ b/pca-server/src/pca/pcaconfiguration.py
@@ -7,6 +7,7 @@
 SPDX-License-Identifier: Apache-2.0
 """
 import boto3
+from botocore.config import Config
 
 # Parameter Store Field Names used by main workflow
 CONF_COMP_LANGS = "ComprehendLanguages"
@@ -76,6 +77,12 @@
 # Configuration data
 appConfig = {}
 
+config = Config(
+   retries = {
+      'max_attempts': 100,
+      'mode': 'adaptive'
+   }
+)
 
 def extractParameters(ssmResponse, useTagName):
     """
@@ -104,7 +111,7 @@ def loadConfiguration():
     """
 
     # Load the the core ones in from Parameter Store in batches of up to 10
-    ssm = boto3.client("ssm")
+    ssm = boto3.client("ssm", config=config)
     fullParamList1 = ssm.get_parameters(
         Names=[
             CONF_COMP_LANGS,
diff --git a/pca-ssm/cfn/ssm.template b/pca-ssm/cfn/ssm.template
index 39a5afa9..af614cc2 100644
--- a/pca-ssm/cfn/ssm.template
+++ b/pca-ssm/cfn/ssm.template
@@ -92,31 +92,24 @@ Parameters:
       a JSON Object with key/value pairs, where the LLM will run one inference on each key/value pair with the value
       containing the prompt. Use {transcript} as a placeholder for where the call transcript will be injected.
     Default: >-
-      Human: Answer all the questions below as a json object with key value pairs, based on the transcript. Use the text before the colon as the key. Only return json. Use gender neutral pronouns. Skip the preamble; go straight into the json.
-      <br><questions>
-      <br>Summary: Summarize the call.
-      <br>Topic: Topic of the call. Choose from one of these or make one up (iphone issue, billing issue, cancellation)
-      <br>Product: What product did the customer call about? (internet, broadband, mobile phone, mobile plans)
-      <br>Resolved: Did the agent resolve the customer's questions? (yes or no) 
-      <br>Callback: Was this a callback? (yes or no) 
-      <br>Politeness: Was the agent polite and professional? (yes or no)
-      <br>Actions: What actions did the Agent take? 
-      <br></questions> 
-      <br><transcript>
-      <br>{transcript}
-      <br></transcript>
-      <br>Assistant: 
+      {
+        "Summary":"<br><br>Human: Answer the questions below, defined in <question></question> based on the transcript defined in <transcript></transcript>. If you cannot answer the question, reply with 'n/a'. Use gender neutral pronouns. When you reply, only respond with the answer.<br><br><question>What is a summary of the transcript?</question><br><br><transcript><br>{transcript}<br></transcript><br><br>Assistant:",
+        "Topic":"<br><br>Human: Answer the questions below, defined in <question></question> based on the transcript defined in <transcript></transcript>. If you cannot answer the question, reply with 'n/a'. Use gender neutral pronouns. When you reply, only respond with the answer.<br><br><question>What is the topic of the call? For example, iphone issue, billing issue, cancellation. Only reply with the topic, nothing more.</question><br><br><transcript><br>{transcript}<br></transcript><br><br>Assistant:",
+        "Product":"<br><br>Human: Answer the questions below, defined in <question></question> based on the transcript defined in <transcript></transcript>. If you cannot answer the question, reply with 'n/a'. Use gender neutral pronouns. When you reply, only respond with the answer.<br><br><question>What product did the customer call about? For example, internet, broadband, mobile phone, mobile plans. Only reply with the product, nothing more.</question><br><br><transcript><br>{transcript}<br></transcript><br><br>Assistant:",
+        "Resolved":"<br><br>Human: Answer the questions below, defined in <question></question> based on the transcript defined in <transcript></transcript>. If you cannot answer the question, reply with 'n/a'. Use gender neutral pronouns. When you reply, only respond with the answer.<br><br><question>Did the agent resolve the customer's questions? Only reply with yes or no, nothing more. </question><br><br><transcript><br>{transcript}<br></transcript><br><br>Assistant:",
+        "Callback":"<br><br>Human: Answer the questions below, defined in <question></question> based on the transcript defined in <transcript></transcript>. If you cannot answer the question, reply with 'n/a'. Use gender neutral pronouns. When you reply, only respond with the answer.<br><br><question>Was this a callback? (yes or no) Only reply with yes or no, nothing more.</question><br><br><transcript><br>{transcript}<br></transcript><br><br>Assistant:",
+        "Politeness":"<br><br>Human: Answer the question below, defined in <question></question> based on the transcript defined in <transcript></transcript>. If you cannot answer the question, reply with 'n/a'. Use gender neutral pronouns. When you reply, only respond with the answer.<br><br><question>Was the agent polite and professional? (yes or no) Only reply with yes or no, nothing more.</question><br><br><transcript><br>{transcript}<br></transcript><br><br>Assistant:",
+        "Actions":"<br><br>Human: Answer the question below, defined in <question></question> based on the transcript defined in <transcript></transcript>. If you cannot answer the question, reply with 'n/a'. Use gender neutral pronouns. When you reply, only respond with the answer.<br><br><question>What actions did the Agent take? </question><br><br><transcript><br>{transcript}<br></transcript><br><br>Assistant:"
+      }
 
   LLMPromptQueryTemplate:
     Type: String
     Description: This is the LLM prompt template to use when querying an individual call transcript.
     Default: >-
-      Human: You are an AI chatbot. Carefully read the following transcript and then provide a short answer to the question. If the answer cannot be determined from the transcript or the context, then reply saying Sorry, I don't know. 
-      <br><question>{question}</question>
-      <br><transcript>
-      <br>{transcript}
-      <br></transcript>
-      <br>Assistant:
+      <br><br>Human: You are an AI chatbot. Carefully read the following transcript within <transcript></transcript> 
+      and then provide a short answer to the question. If the answer cannot be determined from the transcript or 
+      the context, then reply saying Sorry, I don't know. Use gender neutral pronouns. Skip the preamble; when you reply, only 
+      respond with the answer.<br><br><question>{question}</question><br><br><transcript><br>{transcript}<br></transcript><br><br>Assistant:
 
   MaxSpeakers:
     Type: String
@@ -697,4 +690,11 @@ Resources:
       Name: DatabaseName
       Type: String
       Value: !Ref DatabaseName
-      Description: PCA Glue catalog database name
\ No newline at end of file
+      Description: PCA Glue catalog database name
+
+Outputs:
+  LLMPromptSummaryTemplateParameter:
+    Value: !Ref LLMPromptSummaryTemplateParameter
+  
+  LLMPromptQueryTemplateParameter:
+    Value: !Ref LLMPromptQueryTemplateParameter
\ No newline at end of file

From 513e1bf28ea0023ea0d31d6705fb0a3b15c03cbb Mon Sep 17 00:00:00 2001
From: Christopher Lott <cmlott@amazon.com>
Date: Sun, 1 Oct 2023 22:03:23 -0700
Subject: [PATCH 05/14] Make both templates match, make genai options disabled
 by default

---
 pca-main-nokendra.template | 36 +++++++++++++++++++++---------------
 pca-main.template          |  6 +++---
 2 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/pca-main-nokendra.template b/pca-main-nokendra.template
index 050e106a..91ee28e5 100644
--- a/pca-main-nokendra.template
+++ b/pca-main-nokendra.template
@@ -356,7 +356,7 @@ Parameters:
       - 'ANTHROPIC'
     Description: >
       If enabled, gives the ability to query an individual call for information. 
-      The BEDROCK option requires your account to have Amazon Bedrock preview access.
+      The BEDROCK option requires you to choose one of the supported model IDs from the provided list (GenAIQueryBedrockModelId).
       The ANTHROPIC option is a third party service, and you must enter your Anthropic API key in the 
       Third Party LLM API Key section.
 
@@ -364,11 +364,11 @@ Parameters:
     Type: String
     Default: anthropic.claude-v2
     AllowedValues:
-      - amazon.titan-tg1-large
+      - amazon.titan-text-express-v1
       - anthropic.claude-v1
       - anthropic.claude-instant-v1
       - anthropic.claude-v2
-    Description: (Optional) If 'GenAIQuery' is BEDROCK, which Bedrock model to use. (Bedrock preview access only)
+    Description: (Optional) If 'GenAIQuery' is BEDROCK, which Bedrock model to use.
 
   CallSummarization:
     Default: 'DISABLED'
@@ -380,23 +380,21 @@ Parameters:
       - 'ANTHROPIC'
       - 'LAMBDA'
     Description: >
-      Set to enable call summarization by a Large Language Model. The SAGEMAKER option uses a SageMaker endpoint with
-      the pretrained bart-large-cnn-samsum model with a ml.m5.xlarge instance type.  The LAMBDA option requires you 
-      to provide a function ARN below. The ANTHROPIC option is a third party service, and you must enter your 
-      Anthropic API key in the Third Party LLM API Key section. The BEDROCK option requires your account to have 
-      Amazon Bedrock preview access.
+      Set to enable call summarization by a Large Language Model. 
+      The BEDROCK option requires you to choose one of the supported model IDs from the provided list (SummarizationBedrockModelId).
+      The SAGEMAKER option uses a SageMaker endpoint with the pretrained bart-large-cnn-samsum model with a ml.m5.xlarge instance type.  
+      The LAMBDA option requires you to provide a function ARN below. 
+      The ANTHROPIC option is a third party service, and you must enter your Anthropic API key in the Third Party LLM API Key section. 
 
   SummarizationBedrockModelId:
     Type: String
-    Default: anthropic.claude-v2
+    Default: anthropic.claude-instant-v1
     AllowedValues:
-      - amazon.titan-tg1-large
-      #- ai21.j2-grande-instruct
-      #- ai21.j2-jumbo-instruct
+      - amazon.titan-text-express-v1
       - anthropic.claude-v1
       - anthropic.claude-instant-v1
       - anthropic.claude-v2
-    Description: (Optional) If 'CallSummarization' is BEDROCK, which Bedrock model to use. (Bedrock preview access only)
+    Description: (Optional) If 'CallSummarization' is BEDROCK, which Bedrock model to use.
 
   SummarizationSageMakerInitialInstanceCount:
     Type: Number
@@ -409,7 +407,7 @@ Parameters:
     Type: String
     Description: >
       (Optional) If 'CallSummarization' or 'GenAIQuery' is ANTHROPIC, enter the provider API Key. ** Data will leave your AWS account **
-    Default: undefined
+    Default: ''
     NoEcho: true
 
   SummarizationLambdaFunctionArn:
@@ -924,4 +922,12 @@ Outputs:
 
   FetchTranscriptArn:
     Description: Lambda function arn that will generate a string of the entire transcript for custom Lambda functions to use.
-    Value: !GetAtt PCAServer.Outputs.FetchTranscriptArn
\ No newline at end of file
+    Value: !GetAtt PCAServer.Outputs.FetchTranscriptArn
+
+  LLMPromptSummaryTemplateParameter:
+    Description: The LLM summary prompt template in SSM Parameter Store - open to customise call summary prompts.
+    Value: !Sub "https://${AWS::Region}.console.aws.amazon.com/systems-manager/parameters/${SSM.Outputs.LLMPromptSummaryTemplateParameter}"
+  
+  LLMPromptQueryTemplateParameter:
+    Description: The LLM query prompt template in SSM Parameter Store - open to customise query prompts.
+    Value: !Sub "https://${AWS::Region}.console.aws.amazon.com/systems-manager/parameters/${SSM.Outputs.LLMPromptQueryTemplateParameter}"
\ No newline at end of file
diff --git a/pca-main.template b/pca-main.template
index d01d495b..67021fe2 100644
--- a/pca-main.template
+++ b/pca-main.template
@@ -350,7 +350,7 @@ Parameters:
       steps required in Amazon QuickSight to (1) enable S3 access to PCA OutputBucket and (2) share dashboard and analytics assets.
 
   GenAIQuery:
-    Default: 'BEDROCK'
+    Default: 'DISABLED'
     Type: String
     AllowedValues:
       - 'DISABLED'
@@ -373,7 +373,7 @@ Parameters:
     Description: (Optional) If 'GenAIQuery' is BEDROCK, which Bedrock model to use.
 
   CallSummarization:
-    Default: 'BEDROCK'
+    Default: 'DISABLED'
     Type: String
     AllowedValues:
       - 'DISABLED'
@@ -409,7 +409,7 @@ Parameters:
     Type: String
     Description: >
       (Optional) If 'CallSummarization' or 'GenAIQuery' is ANTHROPIC, enter the provider API Key. ** Data will leave your AWS account **
-    Default: undefined
+    Default: ''
     NoEcho: true
 
   SummarizationLambdaFunctionArn:

From c48b145ed76d0366c635b3d5b87d349a5a4e6609 Mon Sep 17 00:00:00 2001
From: Christopher Lott <cmlott@amazon.com>
Date: Sun, 1 Oct 2023 23:15:39 -0700
Subject: [PATCH 06/14] Add condition to not deploy LLM secret if it is
 undefined or empty

---
 pca-main-nokendra.template | 16 +++++++++++++---
 pca-main.template          | 16 +++++++++++++---
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/pca-main-nokendra.template b/pca-main-nokendra.template
index 91ee28e5..62fda953 100644
--- a/pca-main-nokendra.template
+++ b/pca-main-nokendra.template
@@ -357,6 +357,7 @@ Parameters:
     Description: >
       If enabled, gives the ability to query an individual call for information. 
       The BEDROCK option requires you to choose one of the supported model IDs from the provided list (GenAIQueryBedrockModelId).
+      You must also accept access to that model in the Amazon Bedrock > Model Access console.
       The ANTHROPIC option is a third party service, and you must enter your Anthropic API key in the 
       Third Party LLM API Key section.
 
@@ -382,6 +383,7 @@ Parameters:
     Description: >
       Set to enable call summarization by a Large Language Model. 
       The BEDROCK option requires you to choose one of the supported model IDs from the provided list (SummarizationBedrockModelId).
+      You must also accept access to that model in the Amazon Bedrock > Model Access console.
       The SAGEMAKER option uses a SageMaker endpoint with the pretrained bart-large-cnn-samsum model with a ml.m5.xlarge instance type.  
       The LAMBDA option requires you to provide a function ARN below. 
       The ANTHROPIC option is a third party service, and you must enter your Anthropic API key in the Third Party LLM API Key section. 
@@ -407,7 +409,7 @@ Parameters:
     Type: String
     Description: >
       (Optional) If 'CallSummarization' or 'GenAIQuery' is ANTHROPIC, enter the provider API Key. ** Data will leave your AWS account **
-    Default: ''
+    Default: undefined
     NoEcho: true
 
   SummarizationLambdaFunctionArn:
@@ -536,6 +538,7 @@ Conditions:
   ShouldDeployPcaDashboards: !Equals [!Ref EnablePcaDashboards, 'Yes']
   ShouldLoadSampleFiles: !Equals [!Ref loadSampleAudioFiles, 'true']
   ShouldDeployBedrockBoto3Layer: !Or [!Equals [!Ref CallSummarization, 'BEDROCK'], !Equals [!Ref GenAIQuery, 'BEDROCK'],]
+  ShouldDeployLLMThirdPartyApiKey: !And [!Not [!Equals [!Ref SummarizationLLMThirdPartyApiKey, '']], !Not [!Equals [!Ref SummarizationLLMThirdPartyApiKey, undefined]]]
 
 Resources:
   ########################################################
@@ -691,6 +694,7 @@ Resources:
       TemplateURL: pca-boto3-bedrock/template.yaml
 
   LLMThirdPartyApiKeySecret:
+    Condition: ShouldDeployLLMThirdPartyApiKey
     Type: AWS::SecretsManager::Secret
     Properties: 
       Description: API Key
@@ -712,7 +716,10 @@ Resources:
         CallSummarization: !Ref CallSummarization
         SummarizationBedrockModelId: !Ref SummarizationBedrockModelId
         SummarizationSageMakerInitialInstanceCount: !Ref SummarizationSageMakerInitialInstanceCount
-        SummarizationLLMThirdPartyApiKey: !Ref LLMThirdPartyApiKeySecret
+        SummarizationLLMThirdPartyApiKey: !If 
+          - ShouldDeployLLMThirdPartyApiKey
+          - !Ref LLMThirdPartyApiKeySecret
+          - ''
         SummarizationLambdaFunctionArn: !Ref SummarizationLambdaFunctionArn
         PyUtilsLayerArn: !GetAtt PythonUtilsLayer.Outputs.PyUtilsLayer
         Boto3LayerArn: !If
@@ -743,7 +750,10 @@ Resources:
         GenAIQueryType: !Ref GenAIQuery
         GenAIQueryBedrockModelId: !Ref GenAIQueryBedrockModelId
         FetchTranscriptArn: !GetAtt PCAServer.Outputs.FetchTranscriptArn
-        LLMThirdPartyApiKey: !Ref LLMThirdPartyApiKeySecret
+        LLMThirdPartyApiKey: !If 
+          - ShouldDeployLLMThirdPartyApiKey
+          - !Ref LLMThirdPartyApiKeySecret
+          - ''
         PyUtilsLayerArn: !GetAtt PythonUtilsLayer.Outputs.PyUtilsLayer
         Boto3LayerArn: !If
           - ShouldDeployBedrockBoto3Layer
diff --git a/pca-main.template b/pca-main.template
index 67021fe2..eab01f28 100644
--- a/pca-main.template
+++ b/pca-main.template
@@ -359,6 +359,7 @@ Parameters:
     Description: >
       If enabled, gives the ability to query an individual call for information. 
       The BEDROCK option requires you to choose one of the supported model IDs from the provided list (GenAIQueryBedrockModelId).
+      You must also accept access to that model in the Amazon Bedrock > Model Access console.
       The ANTHROPIC option is a third party service, and you must enter your Anthropic API key in the 
       Third Party LLM API Key section.
 
@@ -384,6 +385,7 @@ Parameters:
     Description: >
       Set to enable call summarization by a Large Language Model. 
       The BEDROCK option requires you to choose one of the supported model IDs from the provided list (SummarizationBedrockModelId).
+      You must also accept access to that model in the Amazon Bedrock > Model Access console.
       The SAGEMAKER option uses a SageMaker endpoint with the pretrained bart-large-cnn-samsum model with a ml.m5.xlarge instance type.  
       The LAMBDA option requires you to provide a function ARN below. 
       The ANTHROPIC option is a third party service, and you must enter your Anthropic API key in the Third Party LLM API Key section. 
@@ -409,7 +411,7 @@ Parameters:
     Type: String
     Description: >
       (Optional) If 'CallSummarization' or 'GenAIQuery' is ANTHROPIC, enter the provider API Key. ** Data will leave your AWS account **
-    Default: ''
+    Default: undefined
     NoEcho: true
 
   SummarizationLambdaFunctionArn:
@@ -538,6 +540,7 @@ Conditions:
   ShouldDeployPcaDashboards: !Equals [!Ref EnablePcaDashboards, 'Yes']
   ShouldLoadSampleFiles: !Equals [!Ref loadSampleAudioFiles, 'true']
   ShouldDeployBedrockBoto3Layer: !Or [!Equals [!Ref CallSummarization, 'BEDROCK'], !Equals [!Ref GenAIQuery, 'BEDROCK'],]
+  ShouldDeployLLMThirdPartyApiKey: !And [!Not [!Equals [!Ref SummarizationLLMThirdPartyApiKey, '']], !Not [!Equals [!Ref SummarizationLLMThirdPartyApiKey, undefined]]]
 
 Resources:
   ########################################################
@@ -823,6 +826,7 @@ Resources:
       TemplateURL: pca-boto3-bedrock/template.yaml
 
   LLMThirdPartyApiKeySecret:
+    Condition: ShouldDeployLLMThirdPartyApiKey
     Type: AWS::SecretsManager::Secret
     Properties: 
       Description: API Key
@@ -844,7 +848,10 @@ Resources:
         CallSummarization: !Ref CallSummarization
         SummarizationBedrockModelId: !Ref SummarizationBedrockModelId
         SummarizationSageMakerInitialInstanceCount: !Ref SummarizationSageMakerInitialInstanceCount
-        SummarizationLLMThirdPartyApiKey: !Ref LLMThirdPartyApiKeySecret
+        SummarizationLLMThirdPartyApiKey: !If 
+          - ShouldDeployLLMThirdPartyApiKey
+          - !Ref LLMThirdPartyApiKeySecret
+          - ''
         SummarizationLambdaFunctionArn: !Ref SummarizationLambdaFunctionArn
         PyUtilsLayerArn: !GetAtt PythonUtilsLayer.Outputs.PyUtilsLayer
         Boto3LayerArn: !If
@@ -875,7 +882,10 @@ Resources:
         GenAIQueryType: !Ref GenAIQuery
         GenAIQueryBedrockModelId: !Ref GenAIQueryBedrockModelId
         FetchTranscriptArn: !GetAtt PCAServer.Outputs.FetchTranscriptArn
-        LLMThirdPartyApiKey: !Ref LLMThirdPartyApiKeySecret
+        LLMThirdPartyApiKey: !If 
+          - ShouldDeployLLMThirdPartyApiKey
+          - !Ref LLMThirdPartyApiKeySecret
+          - ''
         PyUtilsLayerArn: !GetAtt PythonUtilsLayer.Outputs.PyUtilsLayer
         Boto3LayerArn: !If
           - ShouldDeployBedrockBoto3Layer

From 2d1a54690f32005a9dc687bb23a7f131741bef2a Mon Sep 17 00:00:00 2001
From: Christopher Lott <cmlott@amazon.com>
Date: Sun, 1 Oct 2023 23:19:55 -0700
Subject: [PATCH 07/14] Update readmes and changelogs

---
 CHANGELOG.md          | 3 ++-
 README.md             | 2 +-
 docs/generative_ai.md | 2 ++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 49cca674..26306f15 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [0.7.2] - Unreleased
 ### Fixed
 - Bedrock GA support
-- Prompt updates for Bedrock GA release, updated GenerativeAI readme
+- Prompt updates for Bedrock GA release (formatting, multiple prompts per call)
+- Updated GenerativeAI README and main README with model access details
 - Links to the LLM Parameter Store Prompts from the CloudFormation Output
 - Adaptive retries for SSM GetParameter and InvokeModel to prevent throttling errors
 
diff --git a/README.md b/README.md
index 6f11913a..2fc43a56 100644
--- a/README.md
+++ b/README.md
@@ -94,7 +94,7 @@ When deploying PCA, the CloudFormation parameter `CallSummarization` value defin
 
 If `DISABLED` is chosen, the PCA step function will bypass the summarization step.
 
-If `BEDROCK` is chosen, you must select the Bedrock model `SummarizationBedrockModelId` parameter. 
+If `BEDROCK` is chosen, you must select the Bedrock model `SummarizationBedrockModelId` parameter. You must [request model access](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html) for the model selected.
 
 If `SAGEMAKER` is chosen, PCA will be deployed with the [HuggingFace bart-large-cnn-samsum](https://huggingface.co/philschmid/bart-large-cnn-samsum) model on a `ml.m5.xlarge` instance type. By default, it is deployed as a single instance count, defined by the `SummarizationSageMakerInitialInstanceCount` parameter. If `SummarizationSageMakerInitialInstanceCount` is set to `0`, the endpoint will be deployed as a [SageMaker Serverless Inference](https://docs.aws.amazon.com/sagemaker/latest/dg/serverless-endpoints.html) endpoint.
 
diff --git a/docs/generative_ai.md b/docs/generative_ai.md
index 4e190742..8cc70da3 100644
--- a/docs/generative_ai.md
+++ b/docs/generative_ai.md
@@ -3,6 +3,8 @@
 Post-Call Analytics has an optional step in the step function workflow to generate insights with generative AI. 
 PCA supports [Amazon Bedrock](https://aws.amazon.com/bedrock/) (Titan or Anthropic models) and [Anthropic](https://www.anthropic.com/) (3rd party) foundational models (FMs). Customers may also write a Lambda function and provide PCA the ARN, and use any FM of their choice. The prompts below are based on Anthropic's prompt formats. Learn more about prompt design at Anthropic's [Introduction to Prompt Design].(https://docs.anthropic.com/claude/docs/introduction-to-prompt-design). 
 
+For Amazon Bedrock models, you must [request model access](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html) for the models selected.
+
 PCA also supports 'Generative AI Queries' - which simply means you can ask questions about a specific call. These queries appear in a chat-like window from within the call details page.
 
 *All the prompts below were tested with Amazon Titan and Anthropic FMs.*

From bdf3d6ed61194ab2546d87904a782f526c194d26 Mon Sep 17 00:00:00 2001
From: Christopher Lott <cmlott@amazon.com>
Date: Mon, 2 Oct 2023 11:37:42 -0700
Subject: [PATCH 08/14] Default GenAI options to Bedrock.

---
 pca-main-nokendra.template | 4 ++--
 pca-main.template          | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pca-main-nokendra.template b/pca-main-nokendra.template
index 62fda953..bcb1586c 100644
--- a/pca-main-nokendra.template
+++ b/pca-main-nokendra.template
@@ -348,7 +348,7 @@ Parameters:
       steps required in Amazon QuickSight to (1) enable S3 access to PCA OutputBucket and (2) share dashboard and analytics assets.
 
   GenAIQuery:
-    Default: 'DISABLED'
+    Default: 'BEDROCK'
     Type: String
     AllowedValues:
       - 'DISABLED'
@@ -372,7 +372,7 @@ Parameters:
     Description: (Optional) If 'GenAIQuery' is BEDROCK, which Bedrock model to use.
 
   CallSummarization:
-    Default: 'DISABLED'
+    Default: 'BEDROCK'
     Type: String
     AllowedValues:
       - 'DISABLED'
diff --git a/pca-main.template b/pca-main.template
index eab01f28..401f8472 100644
--- a/pca-main.template
+++ b/pca-main.template
@@ -350,7 +350,7 @@ Parameters:
       steps required in Amazon QuickSight to (1) enable S3 access to PCA OutputBucket and (2) share dashboard and analytics assets.
 
   GenAIQuery:
-    Default: 'DISABLED'
+    Default: 'BEDROCK'
     Type: String
     AllowedValues:
       - 'DISABLED'
@@ -374,7 +374,7 @@ Parameters:
     Description: (Optional) If 'GenAIQuery' is BEDROCK, which Bedrock model to use.
 
   CallSummarization:
-    Default: 'DISABLED'
+    Default: 'BEDROCK'
     Type: String
     AllowedValues:
       - 'DISABLED'

From 2d11071a546ec469e3247a81d7ed28cdceafd0ae Mon Sep 17 00:00:00 2001
From: Bob Strahan <strahanr@amazon.com>
Date: Mon, 2 Oct 2023 19:53:33 +0000
Subject: [PATCH 09/14] Test that Bedrock service and selected models are
 available in account during stack create/update to avoid downstream failures.

---
 pca-main-nokendra.template | 150 +++++++++++++++++++++++++++++++++++++
 pca-main.template          | 150 +++++++++++++++++++++++++++++++++++++
 2 files changed, 300 insertions(+)

diff --git a/pca-main-nokendra.template b/pca-main-nokendra.template
index bcb1586c..fd1e0067 100644
--- a/pca-main-nokendra.template
+++ b/pca-main-nokendra.template
@@ -539,6 +539,9 @@ Conditions:
   ShouldLoadSampleFiles: !Equals [!Ref loadSampleAudioFiles, 'true']
   ShouldDeployBedrockBoto3Layer: !Or [!Equals [!Ref CallSummarization, 'BEDROCK'], !Equals [!Ref GenAIQuery, 'BEDROCK'],]
   ShouldDeployLLMThirdPartyApiKey: !And [!Not [!Equals [!Ref SummarizationLLMThirdPartyApiKey, '']], !Not [!Equals [!Ref SummarizationLLMThirdPartyApiKey, undefined]]]
+  ShouldTestBedrockModelId: !Or [!Equals [!Ref CallSummarization, 'BEDROCK'], !Equals [!Ref GenAIQuery, 'BEDROCK'],]
+  ShouldTestGenAIQueryBedrockModelId: !Equals [!Ref GenAIQuery, 'BEDROCK']
+  ShouldTestSummarizationBedrockModelId: !Equals [!Ref CallSummarization, 'BEDROCK']
 
 Resources:
   ########################################################
@@ -607,6 +610,153 @@ Resources:
           - ServerSideEncryptionByDefault:
               SSEAlgorithm: AES256
 
+  ########################################################
+  # If Bedrock Models are selected, verify that Bedrock 
+  # is avialble in the region, and that models are enabled
+  ########################################################
+
+  TestBedrockModelFunctionRole:
+    Type: AWS::IAM::Role
+    Condition: ShouldTestBedrockModelId
+    Properties:
+      AssumeRolePolicyDocument:
+        Version: '2012-10-17'
+        Statement:
+          - Effect: Allow
+            Principal:
+              Service: lambda.amazonaws.com
+            Action: sts:AssumeRole
+      ManagedPolicyArns:
+        - arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole
+      Policies:
+        - PolicyDocument:
+            Version: 2012-10-17
+            Statement:
+              - Effect: Allow
+                Action:
+                  - "bedrock:InvokeModel"
+                Resource:
+                  - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*"
+                  - !Sub "arn:${AWS::Partition}:bedrock:*:${AWS::AccountId}:custom-model/*"
+          PolicyName: BedrockPolicy
+
+  TestBedrockModelFunction:
+    Type: AWS::Lambda::Function
+    Condition: ShouldTestBedrockModelId
+    Properties:
+      Handler: index.lambda_handler
+      Role: !GetAtt 'TestBedrockModelFunctionRole.Arn'
+      Runtime: python3.11
+      Timeout: 60
+      MemorySize: 128
+      Code: 
+        ZipFile: !Sub |
+          import cfnresponse
+          import json
+          import subprocess
+          import os
+          import sys
+          print("install latest boto3 to get bedrock service support")
+          subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--target', '/tmp', 'boto3'])
+          sys.path.insert(0,'/tmp')
+          import boto3
+
+          # Defaults
+          AWS_REGION = os.environ["AWS_REGION_OVERRIDE"] if "AWS_REGION_OVERRIDE" in os.environ else os.environ["AWS_REGION"]
+          ENDPOINT_URL = os.environ.get("ENDPOINT_URL", f'https://bedrock-runtime.{AWS_REGION}.amazonaws.com')
+          DEFAULT_MAX_TOKENS = 128
+
+          def get_request_body(modelId, parameters, prompt):
+              provider = modelId.split(".")[0]
+              request_body = None
+              if provider == "anthropic":
+                  request_body = {
+                      "prompt": prompt,
+                      "max_tokens_to_sample": DEFAULT_MAX_TOKENS
+                  } 
+                  request_body.update(parameters)
+              elif provider == "ai21":
+                  request_body = {
+                      "prompt": prompt,
+                      "maxTokens": DEFAULT_MAX_TOKENS
+                  }
+                  request_body.update(parameters)
+              elif provider == "amazon":
+                  textGenerationConfig = {
+                      "maxTokenCount": DEFAULT_MAX_TOKENS
+                  }
+                  textGenerationConfig.update(parameters)
+                  request_body = {
+                      "inputText": prompt,
+                      "textGenerationConfig": textGenerationConfig
+                  }
+              else:
+                  raise Exception("Unsupported provider: ", provider)
+              return request_body
+
+          def get_generate_text(modelId, response):
+              provider = modelId.split(".")[0]
+              generated_text = None
+              if provider == "anthropic":
+                  response_body = json.loads(response.get("body").read().decode())
+                  generated_text = response_body.get("completion")
+              elif provider == "ai21":
+                  response_body = json.loads(response.get("body").read())
+                  generated_text = response_body.get("completions")[0].get("data").get("text")
+              elif provider == "amazon":
+                  response_body = json.loads(response.get("body").read())
+                  generated_text = response_body.get("results")[0].get("outputText")
+              else:
+                  raise Exception("Unsupported provider: ", provider)
+              return generated_text
+
+          def call_llm(parameters, prompt):
+              modelId = parameters.pop("modelId")
+              body = get_request_body(modelId, parameters, prompt)
+              print("ModelId", modelId, "-  Body: ", body)
+              client = boto3.client(service_name='bedrock-runtime', region_name=AWS_REGION, endpoint_url=ENDPOINT_URL)
+              response = client.invoke_model(body=json.dumps(body), modelId=modelId, accept='application/json', contentType='application/json')
+              generated_text = get_generate_text(modelId, response)
+              return generated_text
+
+          def lambda_handler(event, context):
+              print("Event: ", json.dumps(event))
+              global client
+              status = cfnresponse.SUCCESS
+              responseData = {}
+              reason = "Success"
+              modelId = ""
+              if event['RequestType'] != 'Delete':
+                  prompt = "\n\nHuman: Why is the sky blue?\n\nAssistant:"
+                  try:
+                      # Test LLMModel
+                      llmModelId = event['ResourceProperties'].get('LLMModelId', '')
+                      modelId = llmModelId
+                      parameters = {
+                          "modelId": modelId,
+                          "temperature": 0
+                      }
+                      print(f"Testing {modelId}")
+                      call_llm(parameters, prompt)            
+                  except Exception as e:
+                      status = cfnresponse.FAILED
+                      reason = f"Exception thrown testing ModelId='{modelId}'. Check that Amazon Bedrock is available in your region, and that model is activated in your Amazon Bedrock account - {e}"
+              print(f"Status: {status}, Reason: {reason}")        
+              cfnresponse.send(event, context, status, responseData, reason=reason)
+
+  TestGenAIQueryBedrockModelId:
+    Type: Custom::TestGenAIQueryBedrockModelId
+    Condition: ShouldTestGenAIQueryBedrockModelId
+    Properties:
+      ServiceToken: !GetAtt TestBedrockModelFunction.Arn
+      LLMModelId: !Ref GenAIQueryBedrockModelId
+
+  TestSummarizationBedrockModelId:
+    Type: Custom::SummarizationBedrockModelId
+    Condition: ShouldTestSummarizationBedrockModelId
+    Properties:
+      ServiceToken: !GetAtt TestBedrockModelFunction.Arn
+      LLMModelId: !Ref SummarizationBedrockModelId
 
   ########################################################
   # SSM Stack
diff --git a/pca-main.template b/pca-main.template
index 401f8472..8b0a6a38 100644
--- a/pca-main.template
+++ b/pca-main.template
@@ -541,6 +541,10 @@ Conditions:
   ShouldLoadSampleFiles: !Equals [!Ref loadSampleAudioFiles, 'true']
   ShouldDeployBedrockBoto3Layer: !Or [!Equals [!Ref CallSummarization, 'BEDROCK'], !Equals [!Ref GenAIQuery, 'BEDROCK'],]
   ShouldDeployLLMThirdPartyApiKey: !And [!Not [!Equals [!Ref SummarizationLLMThirdPartyApiKey, '']], !Not [!Equals [!Ref SummarizationLLMThirdPartyApiKey, undefined]]]
+  ShouldTestBedrockModelId: !Or [!Equals [!Ref CallSummarization, 'BEDROCK'], !Equals [!Ref GenAIQuery, 'BEDROCK'],]
+  ShouldTestGenAIQueryBedrockModelId: !Equals [!Ref GenAIQuery, 'BEDROCK']
+  ShouldTestSummarizationBedrockModelId: !Equals [!Ref CallSummarization, 'BEDROCK']
+
 
 Resources:
   ########################################################
@@ -738,7 +742,153 @@ Resources:
             Facetable: true
           Type: 'STRING_LIST_VALUE'
 
+  ########################################################
+  # If Bedrock Models are selected, verify that Bedrock 
+  # is avialble in the region, and that models are enabled
+  ########################################################
 
+  TestBedrockModelFunctionRole:
+    Type: AWS::IAM::Role
+    Condition: ShouldTestBedrockModelId
+    Properties:
+      AssumeRolePolicyDocument:
+        Version: '2012-10-17'
+        Statement:
+          - Effect: Allow
+            Principal:
+              Service: lambda.amazonaws.com
+            Action: sts:AssumeRole
+      ManagedPolicyArns:
+        - arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole
+      Policies:
+        - PolicyDocument:
+            Version: 2012-10-17
+            Statement:
+              - Effect: Allow
+                Action:
+                  - "bedrock:InvokeModel"
+                Resource:
+                  - !Sub "arn:${AWS::Partition}:bedrock:*::foundation-model/*"
+                  - !Sub "arn:${AWS::Partition}:bedrock:*:${AWS::AccountId}:custom-model/*"
+          PolicyName: BedrockPolicy
+
+  TestBedrockModelFunction:
+    Type: AWS::Lambda::Function
+    Condition: ShouldTestBedrockModelId
+    Properties:
+      Handler: index.lambda_handler
+      Role: !GetAtt 'TestBedrockModelFunctionRole.Arn'
+      Runtime: python3.11
+      Timeout: 60
+      MemorySize: 128
+      Code: 
+        ZipFile: !Sub |
+          import cfnresponse
+          import json
+          import subprocess
+          import os
+          import sys
+          print("install latest boto3 to get bedrock service support")
+          subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--target', '/tmp', 'boto3'])
+          sys.path.insert(0,'/tmp')
+          import boto3
+
+          # Defaults
+          AWS_REGION = os.environ["AWS_REGION_OVERRIDE"] if "AWS_REGION_OVERRIDE" in os.environ else os.environ["AWS_REGION"]
+          ENDPOINT_URL = os.environ.get("ENDPOINT_URL", f'https://bedrock-runtime.{AWS_REGION}.amazonaws.com')
+          DEFAULT_MAX_TOKENS = 128
+
+          def get_request_body(modelId, parameters, prompt):
+              provider = modelId.split(".")[0]
+              request_body = None
+              if provider == "anthropic":
+                  request_body = {
+                      "prompt": prompt,
+                      "max_tokens_to_sample": DEFAULT_MAX_TOKENS
+                  } 
+                  request_body.update(parameters)
+              elif provider == "ai21":
+                  request_body = {
+                      "prompt": prompt,
+                      "maxTokens": DEFAULT_MAX_TOKENS
+                  }
+                  request_body.update(parameters)
+              elif provider == "amazon":
+                  textGenerationConfig = {
+                      "maxTokenCount": DEFAULT_MAX_TOKENS
+                  }
+                  textGenerationConfig.update(parameters)
+                  request_body = {
+                      "inputText": prompt,
+                      "textGenerationConfig": textGenerationConfig
+                  }
+              else:
+                  raise Exception("Unsupported provider: ", provider)
+              return request_body
+
+          def get_generate_text(modelId, response):
+              provider = modelId.split(".")[0]
+              generated_text = None
+              if provider == "anthropic":
+                  response_body = json.loads(response.get("body").read().decode())
+                  generated_text = response_body.get("completion")
+              elif provider == "ai21":
+                  response_body = json.loads(response.get("body").read())
+                  generated_text = response_body.get("completions")[0].get("data").get("text")
+              elif provider == "amazon":
+                  response_body = json.loads(response.get("body").read())
+                  generated_text = response_body.get("results")[0].get("outputText")
+              else:
+                  raise Exception("Unsupported provider: ", provider)
+              return generated_text
+
+          def call_llm(parameters, prompt):
+              modelId = parameters.pop("modelId")
+              body = get_request_body(modelId, parameters, prompt)
+              print("ModelId", modelId, "-  Body: ", body)
+              client = boto3.client(service_name='bedrock-runtime', region_name=AWS_REGION, endpoint_url=ENDPOINT_URL)
+              response = client.invoke_model(body=json.dumps(body), modelId=modelId, accept='application/json', contentType='application/json')
+              generated_text = get_generate_text(modelId, response)
+              return generated_text
+
+          def lambda_handler(event, context):
+              print("Event: ", json.dumps(event))
+              global client
+              status = cfnresponse.SUCCESS
+              responseData = {}
+              reason = "Success"
+              modelId = ""
+              if event['RequestType'] != 'Delete':
+                  prompt = "\n\nHuman: Why is the sky blue?\n\nAssistant:"
+                  try:
+                      # Test LLMModel
+                      llmModelId = event['ResourceProperties'].get('LLMModelId', '')
+                      modelId = llmModelId
+                      parameters = {
+                          "modelId": modelId,
+                          "temperature": 0
+                      }
+                      print(f"Testing {modelId}")
+                      call_llm(parameters, prompt)            
+                  except Exception as e:
+                      status = cfnresponse.FAILED
+                      reason = f"Exception thrown testing ModelId='{modelId}'. Check that Amazon Bedrock is available in your region, and that model is activated in your Amazon Bedrock account - {e}"
+              print(f"Status: {status}, Reason: {reason}")        
+              cfnresponse.send(event, context, status, responseData, reason=reason)
+
+  TestGenAIQueryBedrockModelId:
+    Type: Custom::TestGenAIQueryBedrockModelId
+    Condition: ShouldTestGenAIQueryBedrockModelId
+    Properties:
+      ServiceToken: !GetAtt TestBedrockModelFunction.Arn
+      LLMModelId: !Ref GenAIQueryBedrockModelId
+
+  TestSummarizationBedrockModelId:
+    Type: Custom::SummarizationBedrockModelId
+    Condition: ShouldTestSummarizationBedrockModelId
+    Properties:
+      ServiceToken: !GetAtt TestBedrockModelFunction.Arn
+      LLMModelId: !Ref SummarizationBedrockModelId
   
   ########################################################
   # SSM Stack

From 3b2ea604e5b3cca8291f2e5ede9628f5c3c25ce7 Mon Sep 17 00:00:00 2001
From: Bob Strahan <strahanr@amazon.com>
Date: Mon, 2 Oct 2023 19:58:24 +0000
Subject: [PATCH 10/14] Test that Bedrock service and selected models are
 available in account during stack create/update to avoid downstream failures.

---
 pca-main-nokendra.template | 2 +-
 pca-main.template          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pca-main-nokendra.template b/pca-main-nokendra.template
index fd1e0067..e4c6efa2 100644
--- a/pca-main-nokendra.template
+++ b/pca-main-nokendra.template
@@ -612,7 +612,7 @@ Resources:
 
   ########################################################
   # If Bedrock Models are selected, verify that Bedrock 
-  # is avialble in the region, and that models are enabled
+  # is available in the region, and that models are enabled
   ########################################################
 
   TestBedrockModelFunctionRole:
diff --git a/pca-main.template b/pca-main.template
index 8b0a6a38..05e10ba8 100644
--- a/pca-main.template
+++ b/pca-main.template
@@ -744,7 +744,7 @@ Resources:
 
   ########################################################
   # If Bedrock Models are selected, verify that Bedrock 
-  # is avialble in the region, and that models are enabled
+  # is available in the region, and that models are enabled
   ########################################################
 
   TestBedrockModelFunctionRole:

From f984211f82f9f83cfde6e17d4f2fd40bc6770203 Mon Sep 17 00:00:00 2001
From: Bob Strahan <strahanr@amazon.com>
Date: Mon, 2 Oct 2023 20:06:31 +0000
Subject: [PATCH 11/14] v0.7.2

---
 CHANGELOG.md               | 9 +++++----
 pca-main-nokendra.template | 2 +-
 pca-main.template          | 2 +-
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 26306f15..70fb7751 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,9 +4,9 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [0.7.2] - Unreleased
+## [0.7.2] - 2023-10-03
 ### Fixed
-- Bedrock GA support
+- Enable Bedrock GA by default for call summarization and chat/generative query
 - Prompt updates for Bedrock GA release (formatting, multiple prompts per call)
 - Updated GenerativeAI README and main README with model access details
 - Links to the LLM Parameter Store Prompts from the CloudFormation Output
@@ -135,8 +135,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 - Initial release
 
-[Unreleased]: https://github.com/aws-samples/amazon-transcribe-post-call-analytics/compare/v0.7.1...develop
-[0.7.1]: https://github.com/aws-samples/amazon-transcribe-post-call-analytics/releases/tag/v0.7.0
+[Unreleased]: https://github.com/aws-samples/amazon-transcribe-post-call-analytics/compare/v0.7.2...develop
+[0.7.2]: https://github.com/aws-samples/amazon-transcribe-post-call-analytics/releases/tag/v0.7.2
+[0.7.1]: https://github.com/aws-samples/amazon-transcribe-post-call-analytics/releases/tag/v0.7.1
 [0.7.0]: https://github.com/aws-samples/amazon-transcribe-post-call-analytics/releases/tag/v0.7.0
 [0.6.0]: https://github.com/aws-samples/amazon-transcribe-post-call-analytics/releases/tag/v0.6.0
 [0.5.2]: https://github.com/aws-samples/amazon-transcribe-post-call-analytics/releases/tag/v0.5.2
diff --git a/pca-main-nokendra.template b/pca-main-nokendra.template
index e4c6efa2..b9e177e5 100644
--- a/pca-main-nokendra.template
+++ b/pca-main-nokendra.template
@@ -1,6 +1,6 @@
 AWSTemplateFormatVersion: "2010-09-09"
 
-Description: Amazon Transcribe Post Call Analytics - PCA (v0.7.1) (uksb-1sn29lk73)
+Description: Amazon Transcribe Post Call Analytics - PCA (v0.7.2) (uksb-1sn29lk73)
 
 Parameters:
 
diff --git a/pca-main.template b/pca-main.template
index 05e10ba8..bc6404b7 100644
--- a/pca-main.template
+++ b/pca-main.template
@@ -1,6 +1,6 @@
 AWSTemplateFormatVersion: "2010-09-09"
 
-Description: Amazon Transcribe Post Call Analytics - PCA (v0.7.1) (uksb-1sn29lk73)
+Description: Amazon Transcribe Post Call Analytics - PCA (v0.7.2) (uksb-1sn29lk73)
 
 Parameters:
 

From c9137fcd63c364cb75b45fe4fe5f2d23e0cac973 Mon Sep 17 00:00:00 2001
From: Christopher Lott <cmlott@amazon.com>
Date: Mon, 2 Oct 2023 15:07:23 -0700
Subject: [PATCH 12/14] Fix genai query UI panels to line up properly and
 scroll when text overflows, update query prompt.

---
 pca-ssm/cfn/ssm.template                     |  7 ++--
 pca-ui/src/www/src/routes/Dashboard/index.js | 39 +++++++++++---------
 2 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/pca-ssm/cfn/ssm.template b/pca-ssm/cfn/ssm.template
index af614cc2..ecf4772f 100644
--- a/pca-ssm/cfn/ssm.template
+++ b/pca-ssm/cfn/ssm.template
@@ -106,10 +106,9 @@ Parameters:
     Type: String
     Description: This is the LLM prompt template to use when querying an individual call transcript.
     Default: >-
-      <br><br>Human: You are an AI chatbot. Carefully read the following transcript within <transcript></transcript> 
-      and then provide a short answer to the question. If the answer cannot be determined from the transcript or 
-      the context, then reply saying Sorry, I don't know. Use gender neutral pronouns. Skip the preamble; when you reply, only 
-      respond with the answer.<br><br><question>{question}</question><br><br><transcript><br>{transcript}<br></transcript><br><br>Assistant:
+      <br><br>Human: You are an AI chatbot. Carefully read the following transcript within <transcript></transcript> tags. Provide a 
+      short answer to the question at the end. If the answer cannot be determined from the transcript, then reply saying Sorry, 
+      I don't know. Use gender neutral pronouns. Do not use XML tags in the answer. <br><transcript><br>{transcript}<br></transcript><br>{question}<br><br>Assistant:
 
   MaxSpeakers:
     Type: String
diff --git a/pca-ui/src/www/src/routes/Dashboard/index.js b/pca-ui/src/www/src/routes/Dashboard/index.js
index a3efd879..673b2be2 100644
--- a/pca-ui/src/www/src/routes/Dashboard/index.js
+++ b/pca-ui/src/www/src/routes/Dashboard/index.js
@@ -670,7 +670,7 @@ function Dashboard({ setAlert }) {
 
         {window.pcaSettings.genai.query && (
           <Container
-            fitHeight={true}
+            fitHeight={false}
             header={
               <Header variant="h2">
                 Generative AI Query
@@ -681,7 +681,7 @@ function Dashboard({ setAlert }) {
               <ChatInput submitQuery={submitQuery} />
             }
           >
-            <div id="chatDiv" style={{overflow: "hidden", overflowY:'auto', maxHeight:'30em'}}>
+            <div id="chatDiv" style={{overflow: "hidden", overflowY:'auto', height:'30em'}}>
               <SpaceBetween size="m">
                 {genAiQueries.length > 0 ? genAiQueries.map((entry, i) => (
                     <ValueWithLabel key={i} label={entry.label}>
@@ -695,28 +695,31 @@ function Dashboard({ setAlert }) {
         
         {isTranscribeCallAnalyticsMode && (
           <Container
-            fitHeight={true}
+            fitHeight={false}
             header={
                 <Header variant="h2">
                   Call Analytics Summary
                 </Header>
             }
+            
           >
-            {!data && !error ? (
-              <h4>No summary available.</h4>
-            ) : (
-                <SpaceBetween size="l">
-                  <ValueWithLabel key='issues' label="Issue">
-                    {issuesTab()}
-                  </ValueWithLabel>
-                  <ValueWithLabel key='actionItems' label="Action Items">
-                    {actionItemsTab()}
-                  </ValueWithLabel>
-                  <ValueWithLabel key='outcomes' label="Outcomes">
-                    {outcomesTab()}
-                  </ValueWithLabel>
-                </SpaceBetween>
-          )}
+            <div style={{minHeight:'38em'}}>
+              {!data && !error ? (
+                <h4>No summary available.</h4>
+              ) : (
+                  <SpaceBetween size="l">
+                    <ValueWithLabel key='issues' label="Issue">
+                      {issuesTab()}
+                    </ValueWithLabel>
+                    <ValueWithLabel key='actionItems' label="Action Items">
+                      {actionItemsTab()}
+                    </ValueWithLabel>
+                    <ValueWithLabel key='outcomes' label="Outcomes">
+                      {outcomesTab()}
+                    </ValueWithLabel>
+                  </SpaceBetween>
+              )}
+            </div>
         </Container>
         )}
         <Container

From 3f7544b6660b0e26a684681b8620dbbf22736796 Mon Sep 17 00:00:00 2001
From: Bob Strahan <strahanr@amazon.com>
Date: Tue, 3 Oct 2023 15:03:57 +0000
Subject: [PATCH 13/14] enable AI summary columns in Call List UI by default

---
 pca-ui/src/www/src/components/ContactTable.js      | 14 +++++++-------
 .../www/src/components/ContactTablePreferences.js  | 12 ++++++++----
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/pca-ui/src/www/src/components/ContactTable.js b/pca-ui/src/www/src/components/ContactTable.js
index 235c9c34..2d4db068 100644
--- a/pca-ui/src/www/src/components/ContactTable.js
+++ b/pca-ui/src/www/src/components/ContactTable.js
@@ -54,7 +54,7 @@ const COLUMN_DEFINITIONS = [
     cell: (d) => d.agent,
     isRowHeader: true,
     sortingField: "agent",
-    minWidth:150
+    minWidth:130
   },
   {
     id: "customer",
@@ -62,7 +62,7 @@ const COLUMN_DEFINITIONS = [
     cell: (d) => d.customer,
     isRowHeader: true,
     sortingField: "customer",
-    minWidth:150
+    minWidth:130
   },
   {
     id: "queue",
@@ -70,7 +70,7 @@ const COLUMN_DEFINITIONS = [
     cell: (d) => d.queue,
     isRowHeader: true,
     sortingField: "queue",
-    minWidth:150
+    minWidth:130
   },
   {
     id: "summary_resolved",
@@ -78,7 +78,7 @@ const COLUMN_DEFINITIONS = [
     cell: (d) => d.summary_resolved,
     isRowHeader: true,
     sortingField: "summary_resolved",
-    minWidth:170
+    minWidth:130
   },
   {
     id: "summary_topic",
@@ -86,7 +86,7 @@ const COLUMN_DEFINITIONS = [
     cell: (d) => d.summary_topic,
     isRowHeader: true,
     sortingField: "summary_topic",
-    Width:150
+    Width:130
   },
   {
     id: "summary_product",
@@ -94,7 +94,7 @@ const COLUMN_DEFINITIONS = [
     cell: (d) => d.summary_product,
     isRowHeader: true,
     sortingField: "summary_product",
-    minWidth:150
+    minWidth:130
   },
   {
     id: "summary_summary",
@@ -140,7 +140,7 @@ const COLUMN_DEFINITIONS = [
     id: "langCode",
     header: <div className="col-header-wrapper text-left">Lang Code</div>,
     cell: (d) => d.lang,
-    minWidth: 130,
+    minWidth: 100,
   },
   {
     id: "duration",
diff --git a/pca-ui/src/www/src/components/ContactTablePreferences.js b/pca-ui/src/www/src/components/ContactTablePreferences.js
index e31fcedd..842f0beb 100644
--- a/pca-ui/src/www/src/components/ContactTablePreferences.js
+++ b/pca-ui/src/www/src/components/ContactTablePreferences.js
@@ -16,10 +16,10 @@ const VISIBLE_CONTENT_OPTIONS = [{
     { id: "agent", label: "Agent",  visible: false },
     { id: "customer", label: "Customer",  visible: false },
     { id: "queue", label: "Queue", visible: false },
-    { id: "summary_resolved", label: "Resolved", visible: false },
-    { id: "summary_topic", label: "Topic", visible: false },
-    { id: "summary_product", label: "Product", visible: false },
-    { id: "summary_summary", label: "Summary", visible: false },
+    { id: "summary_resolved", label: "Resolved", visible: true },
+    { id: "summary_topic", label: "Topic", visible: true },
+    { id: "summary_product", label: "Product", visible: true },
+    { id: "summary_summary", label: "Summary", visible: true },
     { id: "callerSentimentScore",  label: "Cust Sentiment", visible: true },
     { id: "langCode", label: "Lang Code", visible: true },
     { id: "duration", label: "Duration", visible: true },
@@ -37,6 +37,10 @@ export const DEFAULT_PREFERENCES = {
     'timestamp',
     'jobName',
     'agent',
+    'summary_resolved',
+    'summary_topic',
+    'summary_product',
+    'summary_summary',
     'callerSentimentScore',
     'langCode',
     'duration'

From e2c1467abac727865608ea1294218a48fa45c660 Mon Sep 17 00:00:00 2001
From: Bob Strahan <strahanr@amazon.com>
Date: Tue, 3 Oct 2023 15:49:21 +0000
Subject: [PATCH 14/14] Update Version property of Deploy custom resource to
 force deployment on updates

---
 pca-ui/cfn/lib/deploy.template | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pca-ui/cfn/lib/deploy.template b/pca-ui/cfn/lib/deploy.template
index 5ec4a561..590d85be 100644
--- a/pca-ui/cfn/lib/deploy.template
+++ b/pca-ui/cfn/lib/deploy.template
@@ -91,7 +91,7 @@ Resources:
     Properties:
       ServiceToken: !GetAtt DeployFunction.Arn
       Counter: !Ref DeployCountValue
-      Version: "0.7.0.2"
+      Version: "0.7.2"
 
   Configure:
     Type: "AWS::CloudFormation::CustomResource"