diff --git a/.github/workflows/docker/compose/asr-compose.yaml b/.github/workflows/docker/compose/asr-compose.yaml index bf50634f81..56a2468a44 100644 --- a/.github/workflows/docker/compose/asr-compose.yaml +++ b/.github/workflows/docker/compose/asr-compose.yaml @@ -5,13 +5,13 @@ services: asr: build: - dockerfile: comps/asr/whisper/Dockerfile + dockerfile: comps/asr/src/Dockerfile image: ${REGISTRY:-opea}/asr:${TAG:-latest} whisper: build: - dockerfile: comps/asr/whisper/dependency/Dockerfile + dockerfile: comps/asr/src/integrations/dependency/whisper/Dockerfile image: ${REGISTRY:-opea}/whisper:${TAG:-latest} whisper-gaudi: build: - dockerfile: comps/asr/whisper/dependency/Dockerfile.intel_hpu + dockerfile: comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest} diff --git a/.github/workflows/docker/compose/tts-compose.yaml b/.github/workflows/docker/compose/tts-compose.yaml index da0ca7c886..67bdf285c4 100644 --- a/.github/workflows/docker/compose/tts-compose.yaml +++ b/.github/workflows/docker/compose/tts-compose.yaml @@ -5,17 +5,17 @@ services: tts: build: - dockerfile: comps/tts/speecht5/Dockerfile + dockerfile: comps/tts/src/Dockerfile image: ${REGISTRY:-opea}/tts:${TAG:-latest} speecht5: build: - dockerfile: comps/tts/speecht5/dependency/Dockerfile + dockerfile: comps/tts/src/integrations/dependency/speecht5/Dockerfile image: ${REGISTRY:-opea}/speecht5:${TAG:-latest} speecht5-gaudi: build: - dockerfile: comps/tts/speecht5/dependency/Dockerfile.intel_hpu + dockerfile: comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest} gpt-sovits: build: - dockerfile: comps/tts/gpt-sovits/Dockerfile + dockerfile: comps/tts/src/integrations/dependency/gpt-sovits/Dockerfile image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest} diff --git a/README.md b/README.md index 3a4b17bad9..b9cbf453b3 100644 --- a/README.md +++ b/README.md @@ -43,10 +43,10 @@ The initially supported `Microservices` are described in the below table. More ` | [Retriever](./comps/retrievers/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon | Retriever on Xeon CPU | | [Reranking](./comps/reranks/tei/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base) | [TEI-Gaudi](https://github.com/huggingface/tei-gaudi) | Gaudi2 | Reranking on Gaudi2 | | [Reranking](./comps/reranks/tei/README.md) | [LangChain](https://www.langchain.com)/[LlamaIndex](https://www.llamaindex.ai) | [BBAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base) | [TEI](https://github.com/huggingface/text-embeddings-inference) | Xeon | Reranking on Xeon CPU | -| [ASR](./comps/asr/whisper/README.md) | NA | [openai/whisper-small](https://huggingface.co/openai/whisper-small) | NA | Gaudi2 | Audio-Speech-Recognition on Gaudi2 | -| [ASR](./comps/asr/whisper/README.md) | NA | [openai/whisper-small](https://huggingface.co/openai/whisper-small) | NA | Xeon | Audio-Speech-RecognitionS on Xeon CPU | -| [TTS](./comps/tts/speecht5/README.md) | NA | [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts) | NA | Gaudi2 | Text-To-Speech on Gaudi2 | -| [TTS](./comps/tts/speecht5/README.md) | NA | [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts) | NA | Xeon | Text-To-Speech on Xeon CPU | +| [ASR](./comps/asr/src/README.md) | NA | [openai/whisper-small](https://huggingface.co/openai/whisper-small) | NA | Gaudi2 | Audio-Speech-Recognition on Gaudi2 | +| [ASR](./comps/asr/src/README.md) | NA | [openai/whisper-small](https://huggingface.co/openai/whisper-small) | NA | Xeon | Audio-Speech-RecognitionS on Xeon CPU | +| [TTS](./comps/tts/src/README.md) | NA | [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts) | NA | Gaudi2 | Text-To-Speech on Gaudi2 | +| [TTS](./comps/tts/src/README.md) | NA | [microsoft/speecht5_tts](https://huggingface.co/microsoft/speecht5_tts) | NA | Xeon | Text-To-Speech on Xeon CPU | | [Dataprep](./comps/dataprep/README.md) | [Qdrant](https://qdrant.tech/) | [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | NA | Gaudi2 | Dataprep on Gaudi2 | | [Dataprep](./comps/dataprep/README.md) | [Qdrant](https://qdrant.tech/) | [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | NA | Xeon | Dataprep on Xeon CPU | | [Dataprep](./comps/dataprep/README.md) | [Redis](https://redis.io/) | [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5) | NA | Gaudi2 | Dataprep on Gaudi2 | diff --git a/comps/asr/deployment/docker_compose/compose_whisper.yaml b/comps/asr/deployment/docker_compose/compose_whisper.yaml new file mode 100644 index 0000000000..d64ecffc32 --- /dev/null +++ b/comps/asr/deployment/docker_compose/compose_whisper.yaml @@ -0,0 +1,35 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + whisper-service: + image: ${REGISTRY:-opea}/whisper:${TAG:-latest} + container_name: whisper-service + ports: + - "7066:7066" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:7066/health"] + interval: 10s + timeout: 6s + retries: 18 + asr: + image: ${REGISTRY:-opea}/asr:${TAG:-latest} + container_name: asr-service + ports: + - "9099:9099" + ipc: host + environment: + ASR_ENDPOINT: ${ASR_ENDPOINT} + dependes_on: + speecht5-service: + condition: service_healthy + +networks: + default: + driver: bridge diff --git a/comps/asr/deployment/docker_compose/compose_whisper_hpu.yaml b/comps/asr/deployment/docker_compose/compose_whisper_hpu.yaml new file mode 100644 index 0000000000..a27d219086 --- /dev/null +++ b/comps/asr/deployment/docker_compose/compose_whisper_hpu.yaml @@ -0,0 +1,40 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + whisper-service: + image: ${REGISTRY:-opea}/whisper-gaudi:${TAG:-latest} + container_name: whisper-service + ports: + - "7066:7066" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + runtime: habana + cap_add: + - SYS_NICE + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:7066/health"] + interval: 10s + timeout: 6s + retries: 18 + asr: + image: ${REGISTRY:-opea}/asr:${TAG:-latest} + container_name: asr-service + ports: + - "3001:9099" + ipc: host + environment: + ASR_ENDPOINT: ${ASR_ENDPOINT} + dependes_on: + speecht5-service: + condition: service_healthy + +networks: + default: + driver: bridge diff --git a/comps/asr/whisper/Dockerfile b/comps/asr/src/Dockerfile similarity index 71% rename from comps/asr/whisper/Dockerfile rename to comps/asr/src/Dockerfile index 5615f98152..ab0228098f 100644 --- a/comps/asr/whisper/Dockerfile +++ b/comps/asr/src/Dockerfile @@ -16,13 +16,13 @@ COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ if [ "${ARCH}" = "cpu" ]; then \ pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu ; \ - pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/asr/whisper/requirements.txt ; \ + pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/asr/src/requirements.txt ; \ else \ - pip install --no-cache-dir -r /home/user/comps/asr/whisper/requirements.txt ; \ + pip install --no-cache-dir -r /home/user/comps/asr/src/requirements.txt ; \ fi ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/user/comps/asr/whisper +WORKDIR /home/user/comps/asr/src -ENTRYPOINT ["python", "asr.py"] +ENTRYPOINT ["python", "opea_asr_microservice.py"] diff --git a/comps/asr/whisper/README.md b/comps/asr/src/README.md similarity index 75% rename from comps/asr/whisper/README.md rename to comps/asr/src/README.md index 71285dd8a1..406b7c5414 100644 --- a/comps/asr/whisper/README.md +++ b/comps/asr/src/README.md @@ -17,7 +17,7 @@ pip install -r requirements.txt - Xeon CPU ```bash -cd dependency/ +cd integrations/dependency/whisper nohup python whisper_server.py --device=cpu & python check_whisper_server.py ``` @@ -51,15 +51,15 @@ curl http://localhost:7066/v1/audio/transcriptions \ ### 1.3 Start ASR Service/Test ```bash -cd ../ -python asr.py +cd ../../.. +python opea_asr_microservice.py python check_asr_server.py ``` While the Whisper service is running, you can start the ASR service. If the ASR service is running properly, you should see the output similar to the following: ```bash -{'id': '0e686efd33175ce0ebcf7e0ed7431673', 'text': 'who is pat gelsinger'} +{'text': 'who is pat gelsinger'} ``` ## 🚀2. Start Microservice with Docker (Option 2) @@ -74,20 +74,20 @@ Alternatively, you can also start the ASR microservice with Docker. ```bash cd ../.. -docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile . +docker build -t opea/whisper:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/Dockerfile . ``` - Gaudi2 HPU ```bash cd ../.. -docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile.intel_hpu . +docker build -t opea/whisper-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/Dockerfile.intel_hpu . ``` #### 2.1.2 ASR Service Image ```bash -docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile . +docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/Dockerfile . ``` ### 2.2 Start Whisper and ASR Service @@ -97,13 +97,13 @@ docker build -t opea/asr:latest --build-arg https_proxy=$https_proxy --build-arg - Xeon ```bash -docker run -p 7066:7066 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/whisper:latest +docker run -p 7066:7066 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy opea/whisper:latest ``` - Gaudi2 HPU ```bash -docker run -p 7066:7066 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy opea/whisper-gaudi:latest +docker run -p 7066:7066 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy opea/whisper-gaudi:latest ``` #### 2.2.2 Start ASR service @@ -111,7 +111,7 @@ docker run -p 7066:7066 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_M ```bash ip_address=$(hostname -I | awk '{print $1}') -docker run -d -p 9099:9099 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e ASR_ENDPOINT=http://$ip_address:7066 opea/asr:latest +docker run -d -p 9099:9099 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -e ASR_ENDPOINT=http://$ip_address:7066 opea/asr:latest ``` #### 2.2.3 Test @@ -120,8 +120,11 @@ docker run -d -p 9099:9099 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$ # Use curl or python # curl -http_proxy="" curl http://localhost:9099/v1/audio/transcriptions -XPOST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' -H 'Content-Type: application/json' - +wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav +curl http://localhost:9099/v1/audio/transcriptions \ + -H "Content-Type: multipart/form-data" \ + -F file="@./sample.wav" \ + -F model="openai/whisper-small" # python python check_asr_server.py diff --git a/comps/asr/whisper/check_asr_server.py b/comps/asr/src/check_asr_server.py similarity index 54% rename from comps/asr/whisper/check_asr_server.py rename to comps/asr/src/check_asr_server.py index 54349f3752..394764aeb6 100644 --- a/comps/asr/whisper/check_asr_server.py +++ b/comps/asr/src/check_asr_server.py @@ -20,11 +20,24 @@ file_name, ) -with open(file_name, "rb") as f: - test_audio_base64_str = base64.b64encode(f.read()).decode("utf-8") -os.remove(file_name) - endpoint = "http://localhost:9099/v1/audio/transcriptions" -inputs = {"byte_str": test_audio_base64_str} -response = requests.post(url=endpoint, data=json.dumps(inputs), proxies={"http": None}) -print(response.json()) +headers = {"accept": "application/json"} + +# Prepare the data and files +data = { + "model": "openai/whisper-small", + "language": "english", +} + +try: + with open(file_name, "rb") as audio_file: + files = {"file": (file_name, audio_file)} + response = requests.post(endpoint, headers=headers, data=data, files=files) + if response.status_code != 200: + print(f"Failure with {response.reason}!") + else: + print(response.json()) +except Exception as e: + print(f"Failure with {e}!") + +os.remove(file_name) diff --git a/comps/asr/whisper/dependency/Dockerfile b/comps/asr/src/integrations/dependency/whisper/Dockerfile similarity index 72% rename from comps/asr/whisper/dependency/Dockerfile rename to comps/asr/src/integrations/dependency/whisper/Dockerfile index 9c191db609..a403e6cd68 100644 --- a/comps/asr/whisper/dependency/Dockerfile +++ b/comps/asr/src/integrations/dependency/whisper/Dockerfile @@ -20,16 +20,16 @@ COPY --chown=user:user comps /home/user/comps USER user RUN pip install --no-cache-dir --upgrade pip setuptools && \ - pip install --no-cache-dir -r /home/user/comps/asr/whisper/requirements.txt && \ + pip install --no-cache-dir -r /home/user/comps/asr/src/requirements.txt && \ if [ "${ARCH}" = "cpu" ]; then \ pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu ; \ - pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/asr/whisper/requirements.txt ; \ + pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/asr/src/requirements.txt ; \ else \ - pip install --no-cache-dir -r /home/user/comps/asr/whisper/requirements.txt ; \ + pip install --no-cache-dir -r /home/user/comps/asr/src/requirements.txt ; \ fi ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/user/comps/asr/whisper/dependency +WORKDIR /home/user/comps/asr/src/integrations/dependency/whisper ENTRYPOINT ["python", "whisper_server.py", "--device", "cpu"] diff --git a/comps/asr/whisper/dependency/Dockerfile.intel_hpu b/comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu similarity index 85% rename from comps/asr/whisper/dependency/Dockerfile.intel_hpu rename to comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu index 52f01f2998..ec591ea3d7 100644 --- a/comps/asr/whisper/dependency/Dockerfile.intel_hpu +++ b/comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu @@ -23,11 +23,11 @@ USER user # Install requirements and optimum habana RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/user/comps/asr/whisper/requirements.txt && \ + pip install --no-cache-dir -r /home/user/comps/asr/src/requirements.txt && \ pip install --no-cache-dir optimum[habana] ENV PYTHONPATH=$PYTHONPATH:/home/users -WORKDIR /home/user/comps/asr/whisper/dependency +WORKDIR /home/user/comps/asr/src/integrations/dependency/whisper ENTRYPOINT ["python", "whisper_server.py", "--device", "hpu"] diff --git a/comps/asr/whisper/__init__.py b/comps/asr/src/integrations/dependency/whisper/__init__.py similarity index 100% rename from comps/asr/whisper/__init__.py rename to comps/asr/src/integrations/dependency/whisper/__init__.py diff --git a/comps/asr/whisper/dependency/check_whisper_server.py b/comps/asr/src/integrations/dependency/whisper/check_whisper_server.py similarity index 100% rename from comps/asr/whisper/dependency/check_whisper_server.py rename to comps/asr/src/integrations/dependency/whisper/check_whisper_server.py diff --git a/comps/asr/whisper/dependency/whisper_model.py b/comps/asr/src/integrations/dependency/whisper/whisper_model.py similarity index 100% rename from comps/asr/whisper/dependency/whisper_model.py rename to comps/asr/src/integrations/dependency/whisper/whisper_model.py diff --git a/comps/asr/whisper/dependency/whisper_server.py b/comps/asr/src/integrations/dependency/whisper/whisper_server.py similarity index 98% rename from comps/asr/whisper/dependency/whisper_server.py rename to comps/asr/src/integrations/dependency/whisper/whisper_server.py index dcb3dd19cb..5221dc9d50 100644 --- a/comps/asr/whisper/dependency/whisper_server.py +++ b/comps/asr/src/integrations/dependency/whisper/whisper_server.py @@ -5,7 +5,7 @@ import base64 import os import uuid -from typing import List, Optional, Union +from typing import List import uvicorn from fastapi import FastAPI, File, Form, Request, UploadFile @@ -28,7 +28,7 @@ ) -@app.get("/v1/health") +@app.get("/health") async def health() -> Response: """Health check.""" return Response(status_code=200) diff --git a/comps/asr/src/integrations/opea_whisper.py b/comps/asr/src/integrations/opea_whisper.py new file mode 100644 index 0000000000..9a9c917151 --- /dev/null +++ b/comps/asr/src/integrations/opea_whisper.py @@ -0,0 +1,76 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import time +from typing import List + +import requests +from fastapi import File, Form, UploadFile + +from comps import CustomLogger, OpeaComponent, ServiceType +from comps.cores.proto.api_protocol import AudioTranscriptionResponse + +logger = CustomLogger("opea_whisper") +logflag = os.getenv("LOGFLAG", False) + + +class OpeaWhisperAsr(OpeaComponent): + """A specialized ASR (Automatic Speech Recognition) component derived from OpeaComponent for Whisper ASR services. + + Attributes: + model_name (str): The name of the ASR model used. + """ + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.ASR.name.lower(), description, config) + self.base_url = os.getenv("ASR_ENDPOINT", "http://localhost:7066") + + async def invoke( + self, + file: UploadFile = File(...), # Handling the uploaded file directly + model: str = Form("openai/whisper-small"), + language: str = Form("english"), + prompt: str = Form(None), + response_format: str = Form("json"), + temperature: float = Form(0), + timestamp_granularities: List[str] = Form(None), + ) -> AudioTranscriptionResponse: + """Involve the ASR service to generate transcription for the provided input.""" + # Read the uploaded file + file_contents = await file.read() + + # Prepare the files and data for requests.post + files = { + "file": (file.filename, file_contents, file.content_type), + } + data = { + "model": model, + "language": language, + "prompt": prompt, + "response_format": response_format, + "temperature": temperature, + "timestamp_granularities": timestamp_granularities, + } + + # Send the file and model to the server + response = requests.post(f"{self.base_url}/v1/audio/transcriptions", files=files, data=data) + res = response.json()["text"] + return AudioTranscriptionResponse(text=res) + + def check_health(self) -> bool: + """Checks the health of the embedding service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + try: + response = requests.get(f"{self.base_url}/health") + if response.status_code == 200: + return True + else: + return False + except Exception as e: + # Handle connection errors, timeouts, etc. + logger.error(f"Health check failed: {e}") + return False diff --git a/comps/asr/src/opea_asr_microservice.py b/comps/asr/src/opea_asr_microservice.py new file mode 100644 index 0000000000..c56b52bfcb --- /dev/null +++ b/comps/asr/src/opea_asr_microservice.py @@ -0,0 +1,94 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import time +from typing import List + +from fastapi import File, Form, UploadFile +from integrations.opea_whisper import OpeaWhisperAsr + +from comps import ( + Base64ByteStrDoc, + CustomLogger, + LLMParamsDoc, + OpeaComponentController, + ServiceType, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) +from comps.cores.proto.api_protocol import AudioTranscriptionResponse + +logger = CustomLogger("opea_asr_microservice") +logflag = os.getenv("LOGFLAG", False) + +# Initialize OpeaComponentController +controller = OpeaComponentController() + +# Register components +try: + # Instantiate ASR components + opea_whisper = OpeaWhisperAsr( + name="OpeaWhisperAsr", + description="OPEA Whisper ASR Service", + ) + + # Register components with the controller + controller.register(opea_whisper) + + # Discover and activate a healthy component + controller.discover_and_activate() +except Exception as e: + logger.error(f"Failed to initialize components: {e}") + + +@register_microservice( + name="opea_service@asr", + service_type=ServiceType.ASR, + endpoint="/v1/audio/transcriptions", + host="0.0.0.0", + port=9099, + input_datatype=Base64ByteStrDoc, + output_datatype=LLMParamsDoc, +) +@register_statistics(names=["opea_service@asr"]) +async def audio_to_text( + file: UploadFile = File(...), # Handling the uploaded file directly + model: str = Form("openai/whisper-small"), + language: str = Form("english"), + prompt: str = Form(None), + response_format: str = Form("json"), + temperature: float = Form(0), + timestamp_granularities: List[str] = Form(None), +) -> AudioTranscriptionResponse: + start = time.time() + + if logflag: + logger.info("ASR file uploaded.") + + try: + # Use the controller to invoke the active component + asr_response = await controller.invoke( + file=file, + model=model, + language=language, + prompt=prompt, + response_format=response_format, + temperature=temperature, + timestamp_granularities=timestamp_granularities, + ) + if logflag: + logger.info(asr_response) + statistics_dict["opea_service@asr"].append_latency(time.time() - start, None) + return asr_response + + except Exception as e: + logger.error(f"Error during asr invocation: {e}") + raise + + +if __name__ == "__main__": + logger.info("OPEA ASR Microservice is starting....") + opea_microservices["opea_service@asr"].start() diff --git a/comps/asr/whisper/requirements.txt b/comps/asr/src/requirements.txt similarity index 100% rename from comps/asr/whisper/requirements.txt rename to comps/asr/src/requirements.txt diff --git a/comps/asr/whisper/asr.py b/comps/asr/whisper/asr.py deleted file mode 100644 index 920c831526..0000000000 --- a/comps/asr/whisper/asr.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import json -import os -import time - -import requests - -from comps import CustomLogger - -logger = CustomLogger("asr") -logflag = os.getenv("LOGFLAG", False) - -from comps import ( - Base64ByteStrDoc, - LLMParamsDoc, - ServiceType, - opea_microservices, - register_microservice, - register_statistics, - statistics_dict, -) - - -@register_microservice( - name="opea_service@asr", - service_type=ServiceType.ASR, - endpoint="/v1/audio/transcriptions", - host="0.0.0.0", - port=9099, - input_datatype=Base64ByteStrDoc, - output_datatype=LLMParamsDoc, -) -@register_statistics(names=["opea_service@asr"]) -async def audio_to_text(audio: Base64ByteStrDoc): - start = time.time() - byte_str = audio.byte_str - inputs = {"audio": byte_str} - if logflag: - logger.info(inputs) - - response = requests.post(url=f"{asr_endpoint}/v1/asr", data=json.dumps(inputs), proxies={"http": None}) - if logflag: - logger.info(response) - statistics_dict["opea_service@asr"].append_latency(time.time() - start, None) - return LLMParamsDoc(query=response.json()["asr_result"]) - - -if __name__ == "__main__": - asr_endpoint = os.getenv("ASR_ENDPOINT", "http://localhost:7066") - logger.info("[asr - router] ASR initialized.") - opea_microservices["opea_service@asr"].start() diff --git a/comps/cores/proto/api_protocol.py b/comps/cores/proto/api_protocol.py index 69156fbc27..09dd207302 100644 --- a/comps/cores/proto/api_protocol.py +++ b/comps/cores/proto/api_protocol.py @@ -279,6 +279,7 @@ class DocSumChatCompletionRequest(BaseModel): class AudioChatCompletionRequest(BaseModel): audio: str + voice: str = "default" messages: Optional[ Union[ str, diff --git a/comps/tts/deployment/docker_compose/compose_gptsovits.yaml b/comps/tts/deployment/docker_compose/compose_gptsovits.yaml new file mode 100644 index 0000000000..7cf863010f --- /dev/null +++ b/comps/tts/deployment/docker_compose/compose_gptsovits.yaml @@ -0,0 +1,35 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + gpt-sovits-service: + image: ${REGISTRY:-opea}/gpt-sovits:${TAG:-latest} + container_name: gpt-sovits-service + ports: + - "9880:9880" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9880/health"] + interval: 10s + timeout: 6s + retries: 18 + tts: + image: ${REGISTRY:-opea}/tts:${TAG:-latest} + container_name: tts-service + ports: + - "3002:9088" + ipc: host + environment: + TTS_ENDPOINT: ${TTS_ENDPOINT} + dependes_on: + speecht5-service: + condition: service_healthy + +networks: + default: + driver: bridge diff --git a/comps/tts/deployment/docker_compose/compose_speecht5.yaml b/comps/tts/deployment/docker_compose/compose_speecht5.yaml new file mode 100644 index 0000000000..f5df389a6c --- /dev/null +++ b/comps/tts/deployment/docker_compose/compose_speecht5.yaml @@ -0,0 +1,35 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + speecht5-service: + image: ${REGISTRY:-opea}/speecht5:${TAG:-latest} + container_name: speecht5-service + ports: + - "7055:7055" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:7055/health"] + interval: 10s + timeout: 6s + retries: 18 + tts: + image: ${REGISTRY:-opea}/tts:${TAG:-latest} + container_name: tts-service + ports: + - "3002:9088" + ipc: host + environment: + TTS_ENDPOINT: ${TTS_ENDPOINT} + dependes_on: + speecht5-service: + condition: service_healthy + +networks: + default: + driver: bridge diff --git a/comps/tts/deployment/docker_compose/compose_speect5_hpu.yaml b/comps/tts/deployment/docker_compose/compose_speect5_hpu.yaml new file mode 100644 index 0000000000..e30ce3678a --- /dev/null +++ b/comps/tts/deployment/docker_compose/compose_speect5_hpu.yaml @@ -0,0 +1,40 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +services: + speecht5-service: + image: ${REGISTRY:-opea}/speecht5-gaudi:${TAG:-latest} + container_name: speecht5-service + ports: + - "7055:7055" + ipc: host + environment: + no_proxy: ${no_proxy} + http_proxy: ${http_proxy} + https_proxy: ${https_proxy} + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + runtime: habana + cap_add: + - SYS_NICE + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:7055/health"] + interval: 10s + timeout: 6s + retries: 18 + tts: + image: ${REGISTRY:-opea}/tts:${TAG:-latest} + container_name: tts-service + ports: + - "3002:9088" + ipc: host + environment: + TTS_ENDPOINT: ${TTS_ENDPOINT} + dependes_on: + speecht5-service: + condition: service_healthy + +networks: + default: + driver: bridge diff --git a/comps/tts/gpt-sovits/__init__.py b/comps/tts/gpt-sovits/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/tts/gpt-sovits/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/tts/speecht5/__init__.py b/comps/tts/speecht5/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/tts/speecht5/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/tts/speecht5/dependency/__init__.py b/comps/tts/speecht5/dependency/__init__.py deleted file mode 100644 index 916f3a44b2..0000000000 --- a/comps/tts/speecht5/dependency/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 diff --git a/comps/tts/speecht5/tts.py b/comps/tts/speecht5/tts.py deleted file mode 100644 index 050a1bbd55..0000000000 --- a/comps/tts/speecht5/tts.py +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import json -import os -import time - -import requests - -from comps import ( - Base64ByteStrDoc, - CustomLogger, - ServiceType, - TextDoc, - opea_microservices, - register_microservice, - register_statistics, - statistics_dict, -) - -logger = CustomLogger("tts") -logflag = os.getenv("LOGFLAG", False) - - -@register_microservice( - name="opea_service@tts", - service_type=ServiceType.TTS, - endpoint="/v1/audio/speech", - host="0.0.0.0", - port=9088, - input_datatype=TextDoc, - output_datatype=Base64ByteStrDoc, -) -@register_statistics(names=["opea_service@tts"]) -async def text_to_audio(input: TextDoc): - if logflag: - logger.info(input) - start = time.time() - text = input.text - inputs = {"text": text} - - response = requests.post(url=f"{tts_endpoint}/v1/tts", data=json.dumps(inputs), proxies={"http": None}) - statistics_dict["opea_service@tts"].append_latency(time.time() - start, None) - result = Base64ByteStrDoc(byte_str=response.json()["tts_result"]) - if logflag: - logger.info(result) - return result - - -if __name__ == "__main__": - tts_endpoint = os.getenv("TTS_ENDPOINT", "http://localhost:7055") - logger.info("[tts - router] TTS initialized.") - opea_microservices["opea_service@tts"].start() diff --git a/comps/tts/speecht5/Dockerfile b/comps/tts/src/Dockerfile similarity index 71% rename from comps/tts/speecht5/Dockerfile rename to comps/tts/src/Dockerfile index 5a78719df7..de3bbce35d 100644 --- a/comps/tts/speecht5/Dockerfile +++ b/comps/tts/src/Dockerfile @@ -14,13 +14,13 @@ COPY comps /home/user/comps RUN pip install --no-cache-dir --upgrade pip setuptools && \ if [ "${ARCH}" = "cpu" ]; then \ pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu ; \ - pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/tts/speecht5/requirements.txt ; \ + pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/tts/src/requirements.txt ; \ else \ - pip install --no-cache-dir -r /home/user/comps/tts/speecht5/requirements.txt ; \ + pip install --no-cache-dir -r /home/user/comps/tts/src/requirements.txt ; \ fi ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/user/comps/tts/speecht5 +WORKDIR /home/user/comps/tts/src -ENTRYPOINT ["python", "tts.py"] +ENTRYPOINT ["python", "opea_tts_microservice.py"] diff --git a/comps/tts/speecht5/README.md b/comps/tts/src/README.md similarity index 80% rename from comps/tts/speecht5/README.md rename to comps/tts/src/README.md index fba5e87b86..fc4ce4a9d2 100644 --- a/comps/tts/speecht5/README.md +++ b/comps/tts/src/README.md @@ -7,7 +7,7 @@ TTS (Text-To-Speech) microservice helps users convert text to speech. When build - Xeon CPU ```bash -cd dependency/ +cd integrations/dependency/speecht5 nohup python speecht5_server.py --device=cpu & curl http://localhost:7055/v1/tts -XPOST -d '{"text": "Who are you?"}' -H 'Content-Type: application/json' ``` @@ -17,7 +17,7 @@ curl http://localhost:7055/v1/tts -XPOST -d '{"text": "Who are you?"}' -H 'Conte ```bash pip install optimum[habana] -cd dependency/ +cd integrations/dependency/speecht5 nohup python speecht5_server.py --device=hpu & curl http://localhost:7055/v1/tts -XPOST -d '{"text": "Who are you?"}' -H 'Content-Type: application/json' ``` @@ -25,9 +25,9 @@ curl http://localhost:7055/v1/tts -XPOST -d '{"text": "Who are you?"}' -H 'Conte ## 1.3 Start TTS Service/Test ```bash -python tts.py +python opea_tts_microservice.py -curl http://localhost:9088/v1/audio/speech -XPOST -d '{"text": "Who are you?"}' -H 'Content-Type: application/json' +curl http://localhost:9088/v1/audio/speech -XPOST -d '{"input": "Who are you?"}' -H 'Content-Type: application/json' --output speech.mp3 ``` ## 🚀2. Start Microservice with Docker (Option 2) @@ -42,20 +42,20 @@ Alternatively, you can start the TTS microservice with Docker. ```bash cd ../../../ -docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/dependency/Dockerfile . +docker build -t opea/speecht5:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/integrations/dependency/speecht5/Dockerfile . ``` - Gaudi2 HPU ```bash cd ../../../ -docker build -t opea/speecht5-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/dependency/Dockerfile.intel_hpu . +docker build -t opea/speecht5-gaudi:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu . ``` #### 2.1.2 TTS Service Image ```bash -docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/speecht5/Dockerfile . +docker build -t opea/tts:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/tts/src/Dockerfile . ``` ### 2.2 Start SpeechT5 and TTS Service @@ -89,7 +89,5 @@ curl http://localhost:7055/v1/tts -XPOST -d '{"text": "Who are you?"}' -H 'Conte # openai protocol compatible # voice can be 'male' or 'default' -curl http://localhost:7055/v1/audio/speech -XPOST -d '{"input":"Who are you?", "voice": "male"}' -H 'Content-Type: application/json' --output speech.wav - -curl http://localhost:9088/v1/audio/speech -XPOST -d '{"text": "Who are you?"}' -H 'Content-Type: application/json' +curl http://localhost:9088/v1/audio/speech -XPOST -d '{"input":"Who are you?", "voice": "male"}' -H 'Content-Type: application/json' --output speech.wav ``` diff --git a/comps/tts/gpt-sovits/Dockerfile b/comps/tts/src/integrations/dependency/gpt-sovits/Dockerfile similarity index 100% rename from comps/tts/gpt-sovits/Dockerfile rename to comps/tts/src/integrations/dependency/gpt-sovits/Dockerfile diff --git a/comps/tts/gpt-sovits/README.md b/comps/tts/src/integrations/dependency/gpt-sovits/README.md similarity index 95% rename from comps/tts/gpt-sovits/README.md rename to comps/tts/src/integrations/dependency/gpt-sovits/README.md index 4876764cb1..39ec680731 100644 --- a/comps/tts/gpt-sovits/README.md +++ b/comps/tts/src/integrations/dependency/gpt-sovits/README.md @@ -7,7 +7,7 @@ This microservice is validated on Xeon/CUDA. HPU support is under development. ## Build the Image ```bash -docker build -t opea/gpt-sovits:latest --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -f comps/tts/gpt-sovits/Dockerfile . +docker build -t opea/gpt-sovits:latest --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy -f comps/tts/src/integrations/dependency/gpt-sovits/Dockerfile . ``` ## Start the Service diff --git a/comps/tts/speecht5/dependency/Dockerfile b/comps/tts/src/integrations/dependency/speecht5/Dockerfile similarity index 74% rename from comps/tts/speecht5/dependency/Dockerfile rename to comps/tts/src/integrations/dependency/speecht5/Dockerfile index 88d2df41aa..34ade3576e 100644 --- a/comps/tts/speecht5/dependency/Dockerfile +++ b/comps/tts/src/integrations/dependency/speecht5/Dockerfile @@ -23,13 +23,13 @@ USER user RUN pip install --no-cache-dir --upgrade pip setuptools && \ if [ "${ARCH}" = "cpu" ]; then \ pip install --no-cache-dir torch torchvision --index-url https://download.pytorch.org/whl/cpu ; \ - pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/tts/speecht5/requirements.txt ; \ + pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r /home/user/comps/tts/src/integrations/dependency/speecht5/requirements.txt ; \ else \ - pip install --no-cache-dir -r /home/user/comps/tts/speecht5/requirements.txt ; \ + pip install --no-cache-dir -r /home/user/comps/tts/src/integrations/dependency/speecht5/requirements.txt ; \ fi ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/user/comps/tts/speecht5/dependency +WORKDIR /home/user/comps/tts/src/integrations/dependency/speecht5 ENTRYPOINT ["python", "speecht5_server.py", "--device", "cpu"] diff --git a/comps/tts/speecht5/dependency/Dockerfile.intel_hpu b/comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu similarity index 83% rename from comps/tts/speecht5/dependency/Dockerfile.intel_hpu rename to comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu index 6ac7418dab..895118d1d3 100644 --- a/comps/tts/speecht5/dependency/Dockerfile.intel_hpu +++ b/comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu @@ -24,11 +24,11 @@ USER user # Install requirements and optimum habana RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r /home/user/comps/tts/speecht5/requirements.txt && \ + pip install --no-cache-dir -r /home/user/comps/tts/src/integrations/dependency/speecht5/requirements.txt && \ pip install --no-cache-dir optimum[habana] ENV PYTHONPATH=$PYTHONPATH:/home/user -WORKDIR /home/user/comps/tts/speecht5/dependency +WORKDIR /home/user/comps/tts/src/integrations/dependency/speecht5 ENTRYPOINT ["python", "speecht5_server.py", "--device", "hpu"] diff --git a/comps/asr/whisper/dependency/__init__.py b/comps/tts/src/integrations/dependency/speecht5/__init__.py similarity index 100% rename from comps/asr/whisper/dependency/__init__.py rename to comps/tts/src/integrations/dependency/speecht5/__init__.py diff --git a/comps/tts/speecht5/requirements.txt b/comps/tts/src/integrations/dependency/speecht5/requirements.txt similarity index 100% rename from comps/tts/speecht5/requirements.txt rename to comps/tts/src/integrations/dependency/speecht5/requirements.txt diff --git a/comps/tts/speecht5/dependency/speecht5_model.py b/comps/tts/src/integrations/dependency/speecht5/speecht5_model.py similarity index 93% rename from comps/tts/speecht5/dependency/speecht5_model.py rename to comps/tts/src/integrations/dependency/speecht5/speecht5_model.py index 778323b56d..dd621f8c28 100644 --- a/comps/tts/speecht5/dependency/speecht5_model.py +++ b/comps/tts/src/integrations/dependency/speecht5/speecht5_model.py @@ -24,6 +24,7 @@ def __init__(self, device="cpu"): self.processor = SpeechT5Processor.from_pretrained(self.model_name_or_path, normalize=True) self.vocoder = SpeechT5HifiGan.from_pretrained(vocoder_model_name_or_path).to(device) self.vocoder.eval() + self.voice = "default" # fetch default speaker embedding try: @@ -89,8 +90,13 @@ def _warmup_speecht5_hpu_graph(self): ) def t2s(self, text, voice="default"): - if voice == "male": - self.default_speaker_embedding = torch.load("spk_embed_male.pt") + if self.voice != voice: + try: + print(f"Loading spk embedding with voice: {voice}.") + self.default_speaker_embedding = torch.load("spk_embed_{voice}.pt") + self.voice = voice + except Exception as e: + print(e) if self.device == "hpu": # See https://github.com/huggingface/optimum-habana/pull/824 from optimum.habana.utils import set_seed diff --git a/comps/tts/speecht5/dependency/speecht5_server.py b/comps/tts/src/integrations/dependency/speecht5/speecht5_server.py similarity index 80% rename from comps/tts/speecht5/dependency/speecht5_server.py rename to comps/tts/src/integrations/dependency/speecht5/speecht5_server.py index 5435f91b93..82b0e180de 100644 --- a/comps/tts/speecht5/dependency/speecht5_server.py +++ b/comps/tts/src/integrations/dependency/speecht5/speecht5_server.py @@ -26,7 +26,7 @@ ) -@app.get("/v1/health") +@app.get("/health") async def health() -> Response: """Health check.""" return Response(status_code=200) @@ -37,8 +37,9 @@ async def text_to_speech(request: Request): logger.info("SpeechT5 generation begin.") request_dict = await request.json() text = request_dict.pop("text") + voice = request_dict.pop("voice", "default") - speech = tts.t2s(text) + speech = tts.t2s(text, voice) sf.write("tmp.wav", speech, samplerate=16000) with open("tmp.wav", "rb") as f: bytes = f.read() @@ -48,13 +49,8 @@ async def text_to_speech(request: Request): @app.post("/v1/audio/speech") -async def audio_speech(request: AudioSpeechRequest): +async def audio_speech(request: AudioSpeechRequest) -> StreamingResponse: logger.info("SpeechT5 generation begin.") - # validate the request parameters - if request.model != tts.model_name_or_path: - raise Exception("TTS model mismatch! Currently only support model: microsoft/speecht5_tts") - if request.voice not in ["default", "male"] or request.speed != 1.0: - logger.warning("Currently parameter 'speed' can only be 1.0 and 'voice' can only be default or male!") speech = tts.t2s(request.input, voice=request.voice) diff --git a/comps/tts/src/integrations/opea_gptsovits.py b/comps/tts/src/integrations/opea_gptsovits.py new file mode 100644 index 0000000000..fffb97ba84 --- /dev/null +++ b/comps/tts/src/integrations/opea_gptsovits.py @@ -0,0 +1,55 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import time + +import requests +from fastapi.responses import StreamingResponse + +from comps import CustomLogger, OpeaComponent, ServiceType +from comps.cores.proto.api_protocol import AudioSpeechRequest + +logger = CustomLogger("opea_gptsovits") +logflag = os.getenv("LOGFLAG", False) + + +class OpeaGptsovitsTts(OpeaComponent): + """A specialized TTS (Text To Speech) component derived from OpeaComponent for GPTSoVITS TTS services. + + Attributes: + model_name (str): The name of the TTS model used. + """ + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.TTS.name.lower(), description, config) + self.base_url = os.getenv("TTS_ENDPOINT", "http://localhost:9880") + + async def invoke( + self, + request: AudioSpeechRequest, + ) -> requests.models.Response: + """Involve the TTS service to generate speech for the provided input.""" + # see https://github.com/Spycsh/GPT-SoVITS/blob/openai_compat/api.py#L948 for usage + # make sure you change the refer_wav_path locally + request.voice = None + + response = requests.post(f"{self.base_url}/v1/audio/speech", data=request.json()) + return response + + def check_health(self) -> bool: + """Checks the health of the embedding service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + try: + response = requests.get(f"{self.base_url}/health") + if response.status_code == 200: + return True + else: + return False + except Exception as e: + # Handle connection errors, timeouts, etc. + logger.error(f"Health check failed: {e}") + return False diff --git a/comps/tts/src/integrations/opea_speecht5.py b/comps/tts/src/integrations/opea_speecht5.py new file mode 100644 index 0000000000..669d3ea6fa --- /dev/null +++ b/comps/tts/src/integrations/opea_speecht5.py @@ -0,0 +1,57 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import time + +import requests +from fastapi.responses import StreamingResponse + +from comps import CustomLogger, OpeaComponent, ServiceType +from comps.cores.proto.api_protocol import AudioSpeechRequest + +logger = CustomLogger("opea_speecht5") +logflag = os.getenv("LOGFLAG", False) + + +class OpeaSpeecht5Tts(OpeaComponent): + """A specialized TTS (Text To Speech) component derived from OpeaComponent for SpeechT5 TTS services. + + Attributes: + model_name (str): The name of the TTS model used. + """ + + def __init__(self, name: str, description: str, config: dict = None): + super().__init__(name, ServiceType.TTS.name.lower(), description, config) + self.base_url = os.getenv("TTS_ENDPOINT", "http://localhost:7055") + + def invoke( + self, + request: AudioSpeechRequest, + ) -> requests.models.Response: + """Involve the TTS service to generate speech for the provided input.""" + # validate the request parameters + if request.model not in ["microsoft/speecht5_tts"]: + raise Exception("TTS model mismatch! Currently only support model: microsoft/speecht5_tts") + if request.voice not in ["default", "male"] or request.speed != 1.0: + logger.warning("Currently parameter 'speed' can only be 1.0 and 'voice' can only be default or male!") + + response = requests.post(f"{self.base_url}/v1/audio/speech", data=request.json()) + return response + + def check_health(self) -> bool: + """Checks the health of the embedding service. + + Returns: + bool: True if the service is reachable and healthy, False otherwise. + """ + try: + response = requests.get(f"{self.base_url}/health") + if response.status_code == 200: + return True + else: + return False + except Exception as e: + # Handle connection errors, timeouts, etc. + logger.error(f"Health check failed: {e}") + return False diff --git a/comps/tts/src/opea_tts_microservice.py b/comps/tts/src/opea_tts_microservice.py new file mode 100644 index 0000000000..89771f4e33 --- /dev/null +++ b/comps/tts/src/opea_tts_microservice.py @@ -0,0 +1,88 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import os +import time + +from fastapi.responses import StreamingResponse +from integrations.opea_gptsovits import OpeaGptsovitsTts +from integrations.opea_speecht5 import OpeaSpeecht5Tts + +from comps import ( + CustomLogger, + OpeaComponentController, + ServiceType, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) +from comps.cores.proto.api_protocol import AudioSpeechRequest + +logger = CustomLogger("opea_tts_microservice") +logflag = os.getenv("LOGFLAG", False) + +# Initialize OpeaComponentController +controller = OpeaComponentController() + +# Register components +try: + # Instantiate TTS components + opea_speecht5 = OpeaSpeecht5Tts( + name="OpeaSpeecht5Tts", + description="OPEA SpeechT5 TTS Service", + ) + + opea_gptsovits = OpeaGptsovitsTts( + name="OpeaGptsovitsTts", + description="OPEA GPTSoVITS TTS Service", + ) + + # Register components with the controller + controller.register(opea_speecht5) + controller.register(opea_gptsovits) + + # Discover and activate a healthy component + controller.discover_and_activate() +except Exception as e: + logger.error(f"Failed to initialize components: {e}") + + +async def stream_forwarder(response): + """Forward the stream chunks to the client using iter_content.""" + for chunk in response.iter_content(chunk_size=1024): + yield chunk + + +@register_microservice( + name="opea_service@tts", + service_type=ServiceType.TTS, + endpoint="/v1/audio/speech", + host="0.0.0.0", + port=9088, + input_datatype=AudioSpeechRequest, + output_datatype=StreamingResponse, +) +@register_statistics(names=["opea_service@tts"]) +async def text_to_speech(request: AudioSpeechRequest) -> StreamingResponse: + start = time.time() + + if logflag: + logger.info(f"Input received: {request}") + + try: + # Use the controller to invoke the active component + tts_response = controller.invoke(request) + if logflag: + logger.info(tts_response) + statistics_dict["opea_service@tts"].append_latency(time.time() - start, None) + return StreamingResponse(stream_forwarder(tts_response)) + + except Exception as e: + logger.error(f"Error during tts invocation: {e}") + raise + + +if __name__ == "__main__": + logger.info("OPEA TTS Microservice is starting....") + opea_microservices["opea_service@tts"].start() diff --git a/comps/tts/src/requirements.txt b/comps/tts/src/requirements.txt new file mode 100644 index 0000000000..89a9c12a80 --- /dev/null +++ b/comps/tts/src/requirements.txt @@ -0,0 +1,11 @@ +aiohttp +docarray[full] +fastapi +opentelemetry-api +opentelemetry-exporter-otlp +opentelemetry-sdk +prometheus-fastapi-instrumentator +pydantic==2.9.1 +pyyaml +shortuuid +uvicorn diff --git a/tests/asr/test_asr_whisper.sh b/tests/asr/test_asr_opea_whisper.sh similarity index 73% rename from tests/asr/test_asr_whisper.sh rename to tests/asr/test_asr_opea_whisper.sh index d0928cf34d..c038724862 100644 --- a/tests/asr/test_asr_whisper.sh +++ b/tests/asr/test_asr_opea_whisper.sh @@ -10,7 +10,7 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/whisper:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile . + docker build --no-cache -t opea/whisper:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile . if [ $? -ne 0 ]; then echo "opea/whisper built fail" @@ -19,7 +19,7 @@ function build_docker_images() { echo "opea/whisper built successful" fi - docker build --no-cache -t opea/asr:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile . + docker build --no-cache -t opea/asr:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/Dockerfile . if [ $? -ne 0 ]; then echo "opea/asr built fail" @@ -31,14 +31,17 @@ function build_docker_images() { function start_service() { unset http_proxy - docker run -d --name="test-comps-asr-whisper" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 7066:7066 --ipc=host opea/whisper:comps - docker run -d --name="test-comps-asr" -e ASR_ENDPOINT=http://$ip_address:7066 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9089:9099 --ipc=host opea/asr:comps - sleep 60s + docker run -d --name="test-comps-asr-whisper" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -p 7066:7066 --ipc=host opea/whisper:comps + sleep 2m + docker run -d --name="test-comps-asr" -e ASR_ENDPOINT=http://$ip_address:7066 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -p 9089:9099 --ipc=host opea/asr:comps + sleep 15 } function validate_microservice() { - result=$(http_proxy="" curl http://localhost:9089/v1/audio/transcriptions -XPOST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' -H 'Content-Type: application/json') - if [[ $result == *"you"* ]]; then + wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav + result=$(http_proxy="" curl http://localhost:9089/v1/audio/transcriptions -H "Content-Type: multipart/form-data" -F file="@./sample.wav" -F model="openai/whisper-small") + rm -f sample.wav + if [[ $result == *"who is"* ]]; then echo "Result correct." else echo "Result wrong." diff --git a/tests/asr/test_asr_whisper_on_intel_hpu.sh b/tests/asr/test_asr_opea_whisper_on_intel_hpu.sh similarity index 70% rename from tests/asr/test_asr_whisper_on_intel_hpu.sh rename to tests/asr/test_asr_opea_whisper_on_intel_hpu.sh index e833ff8b44..da3317fb60 100644 --- a/tests/asr/test_asr_whisper_on_intel_hpu.sh +++ b/tests/asr/test_asr_opea_whisper_on_intel_hpu.sh @@ -10,7 +10,7 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache -t opea/whisper-gaudi:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/dependency/Dockerfile.intel_hpu . + docker build --no-cache -t opea/whisper-gaudi:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/integrations/dependency/whisper/Dockerfile.intel_hpu . if [ $? -ne 0 ]; then echo "opea/whisper-gaudi built fail" @@ -19,7 +19,7 @@ function build_docker_images() { echo "opea/whisper-gaudi built successful" fi - docker build --no-cache -t opea/asr:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/whisper/Dockerfile . + docker build --no-cache -t opea/asr:comps --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/asr/src/Dockerfile . if [ $? -ne 0 ]; then echo "opea/asr built fail" @@ -31,18 +31,20 @@ function build_docker_images() { function start_service() { unset http_proxy - docker run -d --name="test-comps-asr-whisper-gaudi" --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 7066:7066 --ipc=host opea/whisper-gaudi:comps - docker run -d --name="test-comps-asr" -e ASR_ENDPOINT=http://$ip_address:7066 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9089:9099 --ipc=host opea/asr:comps - sleep 2m + docker run -d --name="test-comps-asr-whisper-gaudi" --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -p 7067:7066 --ipc=host opea/whisper-gaudi:comps + sleep 3m + docker run -d --name="test-comps-asr" -e ASR_ENDPOINT=http://$ip_address:7067 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy -p 9089:9099 --ipc=host opea/asr:comps + sleep 15 } function validate_microservice() { - result=$(http_proxy="" curl http://localhost:9089/v1/audio/transcriptions -XPOST -d '{"byte_str": "UklGRigAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAgAAAAEA"}' -H 'Content-Type: application/json') - if [[ $result == *"you"* ]]; then + wget https://github.com/intel/intel-extension-for-transformers/raw/main/intel_extension_for_transformers/neural_chat/assets/audio/sample.wav + result=$(http_proxy="" curl http://localhost:9089/v1/audio/transcriptions -H "Content-Type: multipart/form-data" -F file="@./sample.wav" -F model="openai/whisper-small") + if [[ $result == *"who is"* ]]; then echo "Result correct." else echo "Result wrong." - docker logs test-comps-asr-whisper + docker logs test-comps-asr-whisper-gaudi docker logs test-comps-asr exit 1 fi diff --git a/tests/tts/test_gpt_sovits.sh b/tests/tts/test_gpt_sovits.sh deleted file mode 100644 index aa7cf902a2..0000000000 --- a/tests/tts/test_gpt_sovits.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/bash -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -set -x - -WORKPATH=$(dirname "$PWD") -ip_address=$(hostname -I | awk '{print $1}') - -function build_docker_images() { - cd $WORKPATH - echo $(pwd) - docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/gpt-sovits:comps -f comps/tts/gpt-sovits/Dockerfile . - if [ $? -ne 0 ]; then - echo "opea/gpt-sovits built fail" - exit 1 - else - echo "opea/gpt-sovits built successful" - fi -} - -function start_service() { - unset http_proxy - docker run -d --name="test-comps-gpt-sovits" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9880:9880 --ipc=host opea/gpt-sovits:comps - sleep 2m -} - -function validate_microservice() { - http_proxy="" curl http://localhost:9880/v1/audio/speech -XPOST -d '{"input":"你好呀,你是谁. Hello, who are you?"}' -H 'Content-Type: application/json' --output speech.mp3 - file_size=$(stat --format="%s" speech.mp3) - if [[ $file_size -gt 0 ]]; then - echo "Result correct." - else - echo "Result wrong." - docker logs test-comps-gpt-sovits - exit 1 - fi -} - -function stop_docker() { - cid=$(docker ps -aq --filter "name=test-comps-gpt-sovits*") - if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi -} - -function main() { - - stop_docker - - build_docker_images - start_service - - validate_microservice - - stop_docker - echo y | docker system prune - -} - -main diff --git a/tests/tts/test_tts_opea_gptsovits.sh b/tests/tts/test_tts_opea_gptsovits.sh new file mode 100644 index 0000000000..6399c29d31 --- /dev/null +++ b/tests/tts/test_tts_opea_gptsovits.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -x + +WORKPATH=$(dirname "$PWD") +ip_address=$(hostname -I | awk '{print $1}') + +function build_docker_images() { + cd $WORKPATH + echo $(pwd) + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/gpt-sovits:comps -f comps/tts/src/integrations/dependency/gpt-sovits/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/gpt-sovits built fail" + exit 1 + else + echo "opea/gpt-sovits built successful" + fi + + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/tts:comps -f comps/tts/src/Dockerfile . + if [ $? -ne 0 ]; then + echo "opea/tts built fail" + exit 1 + else + echo "opea/tts built successful" + fi + +} + +function start_service() { + unset http_proxy + docker run -d --name="test-comps-tts-gpt-sovits" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 9880:9880 --ipc=host opea/gpt-sovits:comps + sleep 2m + docker run -d --name="test-comps-tts" -e TTS_ENDPOINT=http://$ip_address:9880 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5016:9088 --ipc=host opea/tts:comps + sleep 15 +} + +function validate_microservice() { + http_proxy="" curl localhost:5016/v1/audio/speech -XPOST -d '{"input":"Hello, who are you? 你好。"}' -H 'Content-Type: application/json' --output speech.mp3 + + if [[ $(file speech.mp3) == *"RIFF"* ]]; then + echo "Result correct." + else + echo "Result wrong." + docker logs test-comps-tts-gpt-sovits + docker logs test-comps-tts + exit 1 + fi +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=test-comps-tts*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + build_docker_images + start_service + + validate_microservice + + stop_docker + echo y | docker system prune + +} + +main diff --git a/tests/tts/test_tts_speecht5.sh b/tests/tts/test_tts_opea_speecht5.sh similarity index 84% rename from tests/tts/test_tts_speecht5.sh rename to tests/tts/test_tts_opea_speecht5.sh index 6d15598d03..2282f0b2de 100644 --- a/tests/tts/test_tts_speecht5.sh +++ b/tests/tts/test_tts_opea_speecht5.sh @@ -10,14 +10,14 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/speecht5:comps -f comps/tts/speecht5/dependency/Dockerfile . + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/speecht5:comps -f comps/tts/src/integrations/dependency/speecht5/Dockerfile . if [ $? -ne 0 ]; then echo "opea/speecht5 built fail" exit 1 else echo "opea/speecht5 built successful" fi - docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/tts:comps -f comps/tts/speecht5/Dockerfile . + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/tts:comps -f comps/tts/src/Dockerfile . if [ $? -ne 0 ]; then echo "opea/tts built fail" exit 1 @@ -29,13 +29,14 @@ function build_docker_images() { function start_service() { unset http_proxy docker run -d --name="test-comps-tts-speecht5" -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5017:7055 --ipc=host opea/speecht5:comps + sleep 2m docker run -d --name="test-comps-tts" -e TTS_ENDPOINT=http://$ip_address:5017 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5016:9088 --ipc=host opea/tts:comps - sleep 3m + sleep 15 } function validate_microservice() { - result=$(http_proxy="" curl http://localhost:5016/v1/audio/speech -XPOST -d '{"text": "Who are you?"}' -H 'Content-Type: application/json') - if [[ $result == *"Ukl"* ]]; then + http_proxy="" curl localhost:5016/v1/audio/speech -XPOST -d '{"input":"Hello, who are you?"}' -H 'Content-Type: application/json' --output speech.mp3 + if [[ $(file speech.mp3) == *"RIFF"* ]]; then echo "Result correct." else echo "Result wrong." diff --git a/tests/tts/test_tts_speecht5_on_intel_hpu.sh b/tests/tts/test_tts_opea_speecht5_on_intel_hpu.sh similarity index 85% rename from tests/tts/test_tts_speecht5_on_intel_hpu.sh rename to tests/tts/test_tts_opea_speecht5_on_intel_hpu.sh index 8779d4b77f..bae801580e 100644 --- a/tests/tts/test_tts_speecht5_on_intel_hpu.sh +++ b/tests/tts/test_tts_opea_speecht5_on_intel_hpu.sh @@ -10,14 +10,14 @@ ip_address=$(hostname -I | awk '{print $1}') function build_docker_images() { cd $WORKPATH echo $(pwd) - docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/speecht5-gaudi:comps -f comps/tts/speecht5/dependency/Dockerfile.intel_hpu . + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/speecht5-gaudi:comps -f comps/tts/src/integrations/dependency/speecht5/Dockerfile.intel_hpu . if [ $? -ne 0 ]; then echo "opea/speecht5-gaudi built fail" exit 1 else echo "opea/speecht5-gaudi built successful" fi - docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/tts:comps -f comps/tts/speecht5/Dockerfile . + docker build --no-cache --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -t opea/tts:comps -f comps/tts/src/Dockerfile . if [ $? -ne 0 ]; then echo "opea/tts built fail" exit 1 @@ -29,13 +29,15 @@ function build_docker_images() { function start_service() { unset http_proxy docker run -d --name="test-comps-tts-speecht5" --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5017:7055 --ipc=host opea/speecht5-gaudi:comps - docker run -d --name="test-comps-tts" -e TTS_ENDPOINT=http://$ip_address:5017 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5016:9088 --ipc=host opea/tts:comps sleep 3m + docker run -d --name="test-comps-tts" -e TTS_ENDPOINT=http://$ip_address:5017 -e http_proxy=$http_proxy -e https_proxy=$https_proxy -p 5016:9088 --ipc=host opea/tts:comps + sleep 15 } function validate_microservice() { - result=$(http_proxy="" curl http://localhost:5016/v1/audio/speech -XPOST -d '{"text": "Who are you?"}' -H 'Content-Type: application/json') - if [[ $result == *"Ukl"* ]]; then + http_proxy="" curl localhost:5016/v1/audio/speech -XPOST -d '{"input":"Hello, who are you?"}' -H 'Content-Type: application/json' --output speech.mp3 + + if [[ $(file speech.mp3) == *"RIFF"* ]]; then echo "Result correct." else echo "Result wrong."