opea-project · Vihanth · Nov 8, 2024 · Nov 15, 2024 · Nov 18, 2024 · Nov 18, 2024
@@ -0,0 +1,26 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM python:3.11-slim
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    curl \
+    libgl1-mesa-glx \
+    libjemalloc-dev
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+USER user
+
+COPY comps /home/user/comps
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/user/comps/llms/text-generation/bedrock/requirements.txt
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user
+
+WORKDIR /home/user/comps/llms/text-generation/bedrock
+
+ENTRYPOINT ["python", "llm.py"]
@@ -0,0 +1,41 @@
+# Introduction
+
+[Bedrock](https://aws.amazon.com/bedrock) Amazon Bedrock is a fully managed service that offers a choice of high-performing foundation models (FMs) from leading AI companies like AI21 Labs, Anthropic, Cohere, Meta, Mistral AI, Stability AI, and Amazon through a single API, along with a broad set of capabilities you need to build generative AI applications with security, privacy, and responsible AI.
+
+## Get Started
+
+## Setup Environment Variables
+
+In order to start Bedrock service, you need to setup the following environment variables first.
+
+```bash
+export AWS_ACCESS_KEY_ID=${aws_access_key_id}
+export AWS_SECRET_ACCESS_KEY=${aws_secret_access_key}
+```
+
+## Build Docker Image
+
+```bash
+cd GenAIComps/
+docker build --no-cache -t opea/bedrock:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/bedrock/Dockerfile .
+```
+
+## Run the Bedrock Microservice
+
+```bash
+docker run -d --name bedrock -p  9009:9000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy  -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY  opea/bedrock:latest
+```
+
+## Consume the Bedrock Microservice
+
+```bash
+curl http://${host_ip}:9009/v1/chat/completions \
+  -X POST \
+  -d '{"model": "us.anthropic.claude-3-5-haiku-20241022-v1:0", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17}' \
+  -H 'Content-Type: application/json'
+
+curl http://${host_ip}:9009/v1/chat/completions \
+ -X POST \
+ -d '{"model": "us.anthropic.claude-3-5-haiku-20241022-v1:0", "messages": [{"role": "user", "content": "What is Deep Learning?"}], "max_tokens":17, "stream": "true"}' \
+ -H 'Content-Type: application/json'
+```
@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,147 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import json
+import os
+import time
+from typing import Union
+
+import boto3
+from fastapi.responses import StreamingResponse
+
+from comps import (
+    CustomLogger,
+    GeneratedDoc,
+    LLMParamsDoc,
+    SearchedDoc,
+    ServiceType,
+    opea_microservices,
+    register_microservice,
+    register_statistics,
+    statistics_dict,
+)
+from comps.cores.proto.api_protocol import ChatCompletionRequest
+
+logger = CustomLogger("llm_bedrock")
+logflag = os.getenv("LOGFLAG", True)
+
+region = os.getenv("BEDROCK_REGION", "us-west-2")
+bedrock_runtime = boto3.client(service_name="bedrock-runtime", region_name=region)
+
+model_kwargs = {
+    "anthropic_version": "bedrock-2023-05-31",
+    "max_tokens": 1000,
+}
+
+sse_headers = {
+    'x-accel-buffering': 'no',
+    'cache-control': 'no-cache',
+    'content-type': 'text/event-stream'
+}
+
+
+@register_microservice(
+    name="opea_service@llm_bedrock",
+    service_type=ServiceType.LLM,
+    endpoint="/v1/chat/completions",
+    host="0.0.0.0",
+    port=9000,
+)
+def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest, SearchedDoc]):
+    if logflag:
+        logger.info(input)
+
+    # Parse out arguments for Bedrock converse API
+    model_id = input.model if input.model else model_id
+    if logflag:
+        logger.info(f'[llm - chat] Using model {model_id}')
+
+    bedrock_args = {
+        'modelId': model_id
+    }
+
+    inference_config = {}
+    if input.max_tokens:
+        inference_config['maxTokens'] = input.max_tokens
+
+    if input.stop:
+        inference_config['stopSequences'] = input.stop
+
+    if input.temperature:
+        inference_config['temperature'] = input.temperature
+
+    if input.top_p:
+        inference_config['topP'] = input.top_p
+
+    if len(inference_config) > 0:
+        bedrock_args['inferenceConfig'] = inference_config
+
+    if logflag and len(inference_config) > 0:
+        logger.info(f'[llm - chat] inference_config: {inference_config}')
+
+    # Parse messages from HuggingFace TGI format to bedrock messages format
+    # tgi: [{role: "system" | "user", content: "text"}]
+    # bedrock: [role: "assistant" | "user", content: {text: "content"}]
+    messages = [
+        {
+            'role': 'assistant' if i.get('role') == 'system' else 'user',
+            'content': [{
+                'text': i.get('content', '')
+            }]
+        } for i in input.messages
+    ]
+
+    # Bedrock requires that conversations start with a user prompt
+    # TGI allows the first message to be an assistant prompt, defining assistant behavior
+    # If the message list starts with an assistant prompt, move that message to the bedrock system prompt
+    if len(messages) > 0 and messages[0]['role'] == 'assistant':
+        system_prompt = messages[0]['content'][0]['text']
+        bedrock_args['system'] = [
+            {
+                'text': system_prompt
+            }
+        ]
+        messages.pop(0)
+
+    bedrock_args['messages'] = messages
+
+    if logflag:
+        logger.info(f'[llm - chat] Bedrock args: {bedrock_args}')
+
+    if input.stream:
+        response = bedrock_runtime.converse_stream(**bedrock_args)
+
+        def stream_generator():
+            chat_response = ""
+            for chunk in response['stream']:
+                if 'contentBlockDelta' in chunk:
+                    text = chunk.get('contentBlockDelta', {}).get('delta', {}).get('text', "")
+                    if logflag:
+                        logger.info(f"[llm - chat_stream] chunk:{text}")
+
+                    tgi_format_out = {
+                        "object": "chat.completion.chunk",
+                        "model": model_id,
+                        "created": int(time.time()),
+                        "choices": [
+                            {"index": 0, "delta": {"role": "assistant", "content": text}, "finish_reason": None}
+                        ]
+                    }
+                    yield f"data: {json.dumps(tgi_format_out)}\n\n"
+            if logflag:
+                logger.info(f"[llm - chat_stream] stream response: {chat_response}")
+            yield "data: [DONE]\n\n"
+
+        return StreamingResponse(stream_generator(), headers=sse_headers)
+
+    response = bedrock_runtime.converse(**bedrock_args)
+    output_content = response.get('output', {}).get('message', {}).get('content', [])
+    output_text = output_content[0].get('text', '') if len(output_content) > 0 else ''
+    prompt = messages[-1].get('content', [{'text': ''}])[0].get('text', '')
+
+    return GeneratedDoc(text=output_text, prompt=prompt)
+
+
+if __name__ == "__main__":
+    model_id = os.getenv("MODEL_ID", "us.anthropic.claude-3-haiku-20240307-v1:0")
+    opea_microservices["opea_service@llm_bedrock"].start()
@@ -0,0 +1,17 @@
+aiohttp
+boto3
+docarray[full]
+fastapi
+httpx
+huggingface_hub
+langchain
+langchain_aws
+numpy
+openai==1.35.13
+opentelemetry-api
+opentelemetry-exporter-otlp
+opentelemetry-sdk
+prometheus-fastapi-instrumentator
+shortuuid
+transformers
+uvicorn
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Copyright (C) 2024 Intel Corporation
		# SPDX-License-Identifier: Apache-2.0