opea-project · chensuyue · Dec 31, 2024 · Dec 17, 2024 · Dec 17, 2024 · Dec 17, 2024
@@ -5,13 +5,13 @@
 services:
   asr:
     build:
-      dockerfile: comps/asr/whisper/Dockerfile
+      dockerfile: comps/asr/src/Dockerfile
     image: ${REGISTRY:-opea}/asr:${TAG:-latest}
   whisper:
     build:
-      dockerfile: comps/asr/whisper/dependency/Dockerfile
+      dockerfile: comps/asr/src/integrations/dependency/whisper/Dockerfile
     image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
   whisper-gaudi:
     build:
-      dockerfile: comps/asr/whisper/dependency/Dockerfile.intel_hpu
+      dockerfile: comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu
     image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
@@ -5,17 +5,17 @@
 services:
   tts:
     build:
-      dockerfile: comps/tts/speecht5/Dockerfile
+      dockerfile: comps/tts/src/Dockerfile
     image: ${REGISTRY:-opea}/tts:${TAG:-latest}
   speecht5:
     build:
-      dockerfile: comps/tts/speecht5/dependency/Dockerfile
+      dockerfile: comps/tts/src/integrations/dependency/speecht5/Dockerfile
     image: ${REGISTRY:-opea}/speecht5:${TAG:-latest}
   speecht5-gaudi:
     build:
-      dockerfile: comps/tts/speecht5/dependency/Dockerfile.intel_hpu
+      dockerfile: comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu
     image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest}
   gpt-sovits:
     build:
-      dockerfile: comps/tts/gpt-sovits/Dockerfile
+      dockerfile: comps/tts/src/integrations/dependency/gpt-sovits/Dockerfile
     image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest}
@@ -43,10 +43,10 @@ The initially supported `Microservices` are described in the below table. More `
 | [Retriever](./comps/retrievers/README.md)     | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5)                                   | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon   | Retriever on Xeon CPU                 |
 | [Reranking](./comps/reranks/tei/README.md)    | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base)                                 | [TEI-Gaudi](https://github.com/huggingface/tei-gaudi)           | Gaudi2 | Reranking on Gaudi2                   |
 | [Reranking](./comps/reranks/tei/README.md)    | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BBAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base)                                | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon   | Reranking on Xeon CPU                 |
-| [ASR](./comps/asr/whisper/README.md)          | NA                                                                             | [openai/whisper-small](https://huggingface.co/openai/whisper-small)                                     | NA                                                              | Gaudi2 | Audio-Speech-Recognition on Gaudi2    |
-| [ASR](./comps/asr/whisper/README.md)          | NA                                                                             | [openai/whisper-small](https://huggingface.co/openai/whisper-small)                                     | NA                                                              | Xeon   | Audio-Speech-RecognitionS on Xeon CPU |
-| [TTS](./comps/tts/speecht5/README.md)         | NA                                                                             | [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts)                                 | NA                                                              | Gaudi2 | Text-To-Speech on Gaudi2              |
-| [TTS](./comps/tts/speecht5/README.md)         | NA                                                                             | [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts)                                 | NA                                                              | Xeon   | Text-To-Speech on Xeon CPU            |
+| [ASR](./comps/asr/src/README.md)              | NA                                                                             | [openai/whisper-small](https://huggingface.co/openai/whisper-small)                                     | NA                                                              | Gaudi2 | Audio-Speech-Recognition on Gaudi2    |
+| [ASR](./comps/asr/src/README.md)              | NA                                                                             | [openai/whisper-small](https://huggingface.co/openai/whisper-small)                                     | NA                                                              | Xeon   | Audio-Speech-RecognitionS on Xeon CPU |
+| [TTS](./comps/tts/src/README.md)              | NA                                                                             | [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts)                                 | NA                                                              | Gaudi2 | Text-To-Speech on Gaudi2              |
+| [TTS](./comps/tts/src/README.md)              | NA                                                                             | [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts)                                 | NA                                                              | Xeon   | Text-To-Speech on Xeon CPU            |
 | [Dataprep](./comps/dataprep/README.md)        | [Qdrant](https://qdrant.tech/)                                                 | [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | NA                                                              | Gaudi2 | Dataprep on Gaudi2                    |
 | [Dataprep](./comps/dataprep/README.md)        | [Qdrant](https://qdrant.tech/)                                                 | [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | NA                                                              | Xeon   | Dataprep on Xeon CPU                  |
 | [Dataprep](./comps/dataprep/README.md)        | [Redis](https://redis.io/)                                                     | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5)                                   | NA                                                              | Gaudi2 | Dataprep on Gaudi2                    |

@@ -0,0 +1,35 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  whisper-service:
+    image: ${REGISTRY:-opea}/whisper:${TAG:-latest}
+    container_name: whisper-service
+    ports:
+      - "7066:7066"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:7066/health"]
+      interval: 10s
+      timeout: 6s
+      retries: 18
+  asr:
+    image: ${REGISTRY:-opea}/asr:${TAG:-latest}
+    container_name: asr-service
+    ports:
+      - "9099:9099"
+    ipc: host
+    environment:
+      ASR_ENDPOINT: ${ASR_ENDPOINT}
+    dependes_on:
+      speecht5-service:
+      condition: service_healthy
+
+networks:
+  default:
+    driver: bridge
@@ -0,0 +1,40 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+services:
+  whisper-service:
+    image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest}
+    container_name: whisper-service
+    ports:
+      - "7066:7066"
+    ipc: host
+    environment:
+      no_proxy: ${no_proxy}
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      HABANA_VISIBLE_DEVICES: all
+      OMPI_MCA_btl_vader_single_copy_mechanism: none
+    runtime: habana
+    cap_add:
+      - SYS_NICE
+    restart: unless-stopped
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:7066/health"]
+      interval: 10s
+      timeout: 6s
+      retries: 18
+  asr:
+    image: ${REGISTRY:-opea}/asr:${TAG:-latest}
+    container_name: asr-service
+    ports:
+      - "3001:9099"
+    ipc: host
+    environment:
+      ASR_ENDPOINT: ${ASR_ENDPOINT}
+    dependes_on:
+      speecht5-service:
+      condition: service_healthy
+
+networks:
+  default:
+    driver: bridge
@@ -16,13 +16,13 @@ COPY comps /home/user/comps
 RUN pip install --no-cache-dir --upgrade pip setuptools && \
     if [ "${ARCH}" = "cpu" ]; then \
         pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu ; \
-        pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/asr/whisper/requirements.txt ; \
+        pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/asr/src/requirements.txt ; \
     else \
-        pip install --no-cache-dir -r /home/user/comps/asr/whisper/requirements.txt ; \
+        pip install --no-cache-dir -r /home/user/comps/asr/src/requirements.txt ; \
     fi
 
 ENV PYTHONPATH=$PYTHONPATH:/home/user
 
-WORKDIR /home/user/comps/asr/whisper
+WORKDIR /home/user/comps/asr/src
 
-ENTRYPOINT ["python", "asr.py"]
+ENTRYPOINT ["python", "opea_asr_microservice.py"]
@@ -17,7 +17,7 @@ pip install -r requirements.txt
 - Xeon CPU
 
 ```bash
-cd dependency/
+cd integrations/dependency/whisper
 nohup python whisper_server.py --device=cpu &
 python check_whisper_server.py
 ```
@@ -51,15 +51,15 @@ curl http://localhost:7066/v1/audio/transcriptions \
 ### 1.3 Start ASR Service/Test
 
 ```bash
-cd ../
-python asr.py
+cd ../../..
+python opea_asr_microservice.py
 python check_asr_server.py
 ```
 
 While the Whisper service is running, you can start the ASR service. If the ASR service is running properly, you should see the output similar to the following:
 
 ```bash
-{'id': '0e686efd33175ce0ebcf7e0ed7431673', 'text': 'who is pat gelsinger'}
+{'text': 'who is pat gelsinger'}
 ```
 
 ## 🚀2. Start Microservice with Docker (Option 2)
@@ -74,20 +74,20 @@ Alternatively, you can also start the ASR microservice with Docker.
 
 ```bash
 cd ../..
-docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile .
+docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/Dockerfile .
 ```
 
 - Gaudi2 HPU
 
 ```bash
 cd ../..
-docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile.intel_hpu .
+docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/Dockerfile.intel_hpu .
 ```
 
 #### 2.1.2 ASR Service Image
 
 ```bash
-docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile .
+docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/Dockerfile .
 ```
 
 ### 2.2 Start Whisper and ASR Service
@@ -97,21 +97,21 @@ docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg
 - Xeon
 
 ```bash
-docker run -p 7066:7066 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/whisper:latest
+docker run -p 7066:7066 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy opea/whisper:latest
 ```
 
 - Gaudi2 HPU
 
 ```bash
-docker run -p 7066:7066 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/whisper-gaudi:latest
+docker run -p 7066:7066 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy opea/whisper-gaudi:latest
 ```
 
 #### 2.2.2 Start ASR service
 
 ```bash
 ip_address=$(hostname -I | awk '{print $1}')
 
-docker run -d -p 9099:9099 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e ASR_ENDPOINT=http://$ip_address:7066 opea/asr:latest
+docker run -d -p 9099:9099 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e ASR_ENDPOINT=http://$ip_address:7066 opea/asr:latest
 ```
 
 #### 2.2.3 Test
@@ -120,8 +120,11 @@ docker run -d -p 9099:9099 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$
 # Use curl or python
 
 # curl
-http_proxy="" curl http://localhost:9099/v1/audio/transcriptions -XPOST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' -H 'Content-Type: application/json'
-
+wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav
+curl http://localhost:9099/v1/audio/transcriptions \
+  -H "Content-Type: multipart/form-data" \
+  -F file="@./sample.wav" \
+  -F model="openai/whisper-small"
 
 # python
 python check_asr_server.py

@@ -20,11 +20,24 @@
     file_name,
 )
 
-with open(file_name, "rb") as f:
-    test_audio_base64_str = base64.b64encode(f.read()).decode("utf-8")
-os.remove(file_name)
-
 endpoint = "http://localhost:9099/v1/audio/transcriptions"
-inputs = {"byte_str": test_audio_base64_str}
-response = requests.post(url=endpoint, data=json.dumps(inputs), proxies={"http": None})
-print(response.json())
+headers = {"accept": "application/json"}
+
+# Prepare the data and files
+data = {
+    "model": "openai/whisper-small",
+    "language": "english",
+}
+
+try:
+    with open(file_name, "rb") as audio_file:
+        files = {"file": (file_name, audio_file)}
+        response = requests.post(endpoint, headers=headers, data=data, files=files)
+        if response.status_code != 200:
+            print(f"Failure with {response.reason}!")
+        else:
+            print(response.json())
+except Exception as e:
+    print(f"Failure with {e}!")
+
+os.remove(file_name)
@@ -20,16 +20,16 @@ COPY --chown=user:user comps /home/user/comps
 USER user
 
 RUN pip install --no-cache-dir --upgrade pip setuptools && \
-    pip install --no-cache-dir -r /home/user/comps/asr/whisper/requirements.txt && \
+    pip install --no-cache-dir -r /home/user/comps/asr/src/requirements.txt && \
     if [ "${ARCH}" = "cpu" ]; then \
         pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu ; \
-        pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/asr/whisper/requirements.txt ; \
+        pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/asr/src/requirements.txt ; \
     else \
-        pip install --no-cache-dir -r /home/user/comps/asr/whisper/requirements.txt ; \
+        pip install --no-cache-dir -r /home/user/comps/asr/src/requirements.txt ; \
     fi
 
 ENV PYTHONPATH=$PYTHONPATH:/home/user
 
-WORKDIR /home/user/comps/asr/whisper/dependency
+WORKDIR /home/user/comps/asr/src/integrations/dependency/whisper
 
 ENTRYPOINT ["python", "whisper_server.py", "--device", "cpu"]
@@ -23,11 +23,11 @@ USER user
 
 # Install requirements and optimum habana
 RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir -r /home/user/comps/asr/whisper/requirements.txt && \
+    pip install --no-cache-dir -r /home/user/comps/asr/src/requirements.txt && \
     pip install --no-cache-dir optimum[habana]
 
 ENV PYTHONPATH=$PYTHONPATH:/home/users
 
-WORKDIR /home/user/comps/asr/whisper/dependency
+WORKDIR /home/user/comps/asr/src/integrations/dependency/whisper
 
 ENTRYPOINT ["python", "whisper_server.py", "--device", "hpu"]
@@ -5,7 +5,7 @@
 import base64
 import os
 import uuid
-from typing import List, Optional, Union
+from typing import List
 
 import uvicorn
 from fastapi import FastAPI, File, Form, Request, UploadFile
@@ -28,7 +28,7 @@
 )
 
 
-@app.get("/v1/health")
+@app.get("/health")
 async def health() -> Response:
     """Health check."""
     return Response(status_code=200)