From 636b31904f8787f78ada9619a26303cc67619538 Mon Sep 17 00:00:00 2001 From: David Thorpe Date: Thu, 8 Aug 2024 09:23:49 +0200 Subject: [PATCH 01/21] Updated dockerfile --- Makefile | 2 +- etc/Dockerfile | 41 ++++++++++++++++++++++++++++++++++++++ etc/Dockerfile.linux-amd64 | 40 ------------------------------------- etc/Dockerfile.linux-arm64 | 40 ------------------------------------- 4 files changed, 42 insertions(+), 81 deletions(-) create mode 100644 etc/Dockerfile delete mode 100644 etc/Dockerfile.linux-amd64 delete mode 100644 etc/Dockerfile.linux-arm64 diff --git a/Makefile b/Makefile index 0ef3d3b..f610673 100644 --- a/Makefile +++ b/Makefile @@ -51,7 +51,7 @@ docker: docker-dep submodule --build-arg OS=${OS} \ --build-arg SOURCE=${BUILD_MODULE} \ --build-arg VERSION=${VERSION} \ - -f etc/Dockerfile.${OS}-${ARCH} . + -f etc/Dockerfile . # Test whisper bindings test: generate libwhisper libggml diff --git a/etc/Dockerfile b/etc/Dockerfile new file mode 100644 index 0000000..ee898c2 --- /dev/null +++ b/etc/Dockerfile @@ -0,0 +1,41 @@ +ARG BASE_TAG=0.0.10-4-g6421fd2 +ARG BASE_DEV_CONTAINER=ghcr.io/mutablelogic/cuda-dev:${BASE_TAG} +ARG BASE_RUN_CONTAINER=ghcr.io/mutablelogic/cuda-rt:${BASE_TAG} +ARG CUDA_DOCKER_ARCH=all +ARG GO_VERSION=1.22.5 +ARG ARCH +ARG OS + +# Setup build container +FROM ${BASE_DEV_CONTAINER} AS build +ARG CUDA_DOCKER_ARCH +ARG GO_VERSION +ARG ARCH +ARG OS + +RUN apt-get -y install software-properties-common curl \ + && add-apt-repository -y ppa:ubuntuhandbook1/ffmpeg6 \ + && apt-get -y update \ + && apt-get -y install libavcodec-dev libavdevice-dev libavfilter-dev libavutil-dev libswscale-dev libswresample-dev + +# Install go +RUN curl -sL https://golang.org/dl/go${GO_VERSION}.${OS}-${ARCH}.tar.gz | tar -C /usr/local -xz +ENV PATH=$PATH:/usr/local/go/bin + +# Copy source +WORKDIR /app +COPY . . + +# Make whisper-server +ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} +ENV GGML_CUDA=1 +RUN make -j$(nproc) server + +# Setup runtime container +FROM ${BASE_RUN_CONTAINER} AS runtime +RUN apt-get -y update && apt-get -y upgrade && apt-get -y install libgomp1 +COPY --from=build --chmod=755 /app/build/whisper /usr/local/bin/whisper + +# Expose +ENTRYPOINT [ "/usr/local/bin/whisper" ] +CMD [ "server", "--dir=/data" ] diff --git a/etc/Dockerfile.linux-amd64 b/etc/Dockerfile.linux-amd64 deleted file mode 100644 index 149a453..0000000 --- a/etc/Dockerfile.linux-amd64 +++ /dev/null @@ -1,40 +0,0 @@ - -ARG UBUNTU_VERSION=22.04 -ARG CUDA_VERSION=12.5.1 -ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} -ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} - -# Setup build container -FROM ${BASE_CUDA_DEV_CONTAINER} AS build -ARG CUDA_DOCKER_ARCH=all -RUN apt-get -y update \ - && apt-get -y install build-essential software-properties-common git libgomp1 curl pkg-config \ - && add-apt-repository -y ppa:ubuntuhandbook1/ffmpeg6 \ - && apt-get -y update \ - && apt-get -y install libavcodec-dev libavdevice-dev libavfilter-dev libavutil-dev libswscale-dev libswresample-dev - -# Install go -ARG GO_VERSION=1.22.5 -ARG GO_ARCH=amd64 -RUN curl -sL https://golang.org/dl/go${GO_VERSION}.linux-${GO_ARCH}.tar.gz | tar -C /usr/local -xz -ENV PATH=$PATH:/usr/local/go/bin - -# Copy source -WORKDIR /app -COPY . . - -# Make whisper-server -ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} -ENV GGML_CUDA=1 -RUN make -j$(nproc) server - -# Setup runtime container -FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime -ARG CUDA_MAIN_VERSION=12.5 -RUN apt-get -y update && apt-get -y upgrade && apt-get -y install libgomp1 -COPY --from=build /app/build/whisper /usr/local/bin/whisper -ENV LD_LIBRARY_PATH=/usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH - -# Expose -ENTRYPOINT [ "/usr/local/bin/whisper" ] -CMD [ "server", "--dir=/data" ] diff --git a/etc/Dockerfile.linux-arm64 b/etc/Dockerfile.linux-arm64 deleted file mode 100644 index f1dcbd7..0000000 --- a/etc/Dockerfile.linux-arm64 +++ /dev/null @@ -1,40 +0,0 @@ -# Jetson CUDA -# https://catalog.ngc.nvidia.com/orgs/nvidia/containers/l4t-cuda/tags -ARG CUDA_VERSION=12.2.12 -ARG BASE_CUDA_DEV_CONTAINER=nvcr.io/nvidia/l4t-cuda:${CUDA_VERSION}-devel -ARG BASE_CUDA_RUN_CONTAINER=nvcr.io/nvidia/l4t-cuda:${CUDA_VERSION}-runtime - -# Setup build container -FROM ${BASE_CUDA_DEV_CONTAINER} AS build -ARG CUDA_DOCKER_ARCH=all -RUN apt-get -y update \ - && apt-get -y install build-essential software-properties-common git libgomp1 curl pkg-config \ - && add-apt-repository -y ppa:ubuntuhandbook1/ffmpeg6 \ - && apt-get -y update \ - && apt-get -y install libavcodec-dev libavdevice-dev libavfilter-dev libavutil-dev libswscale-dev libswresample-dev - -# Install go -ARG GO_VERSION=1.22.5 -ARG GO_ARCH=arm64 -RUN curl -sL https://golang.org/dl/go${GO_VERSION}.linux-${GO_ARCH}.tar.gz | tar -C /usr/local -xz -ENV PATH=$PATH:/usr/local/go/bin - -# Copy source -WORKDIR /app -COPY . . - -# Make whisper-server -ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} -ENV GGML_CUDA=1 -RUN make -j$(nproc) server - -# Setup runtime container -FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime -ARG CUDA_MAIN_VERSION=12.2 -RUN apt-get -y update && apt-get -y upgrade && apt-get -y install libgomp1 -COPY --from=build /app/build/whisper /usr/local/bin/whisper -ENV LD_LIBRARY_PATH=/usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH - -# Expose -ENTRYPOINT [ "/usr/local/bin/whisper" ] -CMD [ "server", "--dir=/data" ] From 34022bee4be2c42b9e65f82147761473bd9198c0 Mon Sep 17 00:00:00 2001 From: David Thorpe Date: Thu, 8 Aug 2024 09:31:49 +0200 Subject: [PATCH 02/21] Removed unnecessary option --- etc/Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/etc/Dockerfile b/etc/Dockerfile index ee898c2..ebbd8d0 100644 --- a/etc/Dockerfile +++ b/etc/Dockerfile @@ -33,7 +33,6 @@ RUN make -j$(nproc) server # Setup runtime container FROM ${BASE_RUN_CONTAINER} AS runtime -RUN apt-get -y update && apt-get -y upgrade && apt-get -y install libgomp1 COPY --from=build --chmod=755 /app/build/whisper /usr/local/bin/whisper # Expose From 40fd0d0b2e19cc66137d81ebe1a34cc73097512c Mon Sep 17 00:00:00 2001 From: David Thorpe Date: Thu, 8 Aug 2024 09:37:10 +0200 Subject: [PATCH 03/21] Updated --- etc/Dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/etc/Dockerfile b/etc/Dockerfile index ebbd8d0..fe1c360 100644 --- a/etc/Dockerfile +++ b/etc/Dockerfile @@ -13,7 +13,8 @@ ARG GO_VERSION ARG ARCH ARG OS -RUN apt-get -y install software-properties-common curl \ +RUN apt-get -y update \ + && apt-get -y install software-properties-common curl \ && add-apt-repository -y ppa:ubuntuhandbook1/ffmpeg6 \ && apt-get -y update \ && apt-get -y install libavcodec-dev libavdevice-dev libavfilter-dev libavutil-dev libswscale-dev libswresample-dev From 0b9b125c48cdcb03d27ad665702b37e60c71f5be Mon Sep 17 00:00:00 2001 From: David Thorpe Date: Thu, 8 Aug 2024 09:40:28 +0200 Subject: [PATCH 04/21] Updated --- etc/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/Dockerfile b/etc/Dockerfile index fe1c360..59061f9 100644 --- a/etc/Dockerfile +++ b/etc/Dockerfile @@ -30,7 +30,7 @@ COPY . . # Make whisper-server ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} ENV GGML_CUDA=1 -RUN make -j$(nproc) server +RUN make -j$(nproc) # Setup runtime container FROM ${BASE_RUN_CONTAINER} AS runtime From 54a18793b299717177192384aea86ebf1926de63 Mon Sep 17 00:00:00 2001 From: David Thorpe Date: Thu, 8 Aug 2024 09:46:13 +0200 Subject: [PATCH 05/21] Updated --- etc/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/etc/Dockerfile b/etc/Dockerfile index 59061f9..b1b6976 100644 --- a/etc/Dockerfile +++ b/etc/Dockerfile @@ -30,6 +30,7 @@ COPY . . # Make whisper-server ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} ENV GGML_CUDA=1 +ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig:${PKG_CONFIG_PATH} RUN make -j$(nproc) # Setup runtime container From b84f42a59fad57868dbce67dadfa395f02be144c Mon Sep 17 00:00:00 2001 From: David Thorpe Date: Thu, 8 Aug 2024 09:52:15 +0200 Subject: [PATCH 06/21] Updated cuda version --- sys/whisper/generate_cuda.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sys/whisper/generate_cuda.go b/sys/whisper/generate_cuda.go index 1649a3c..7a2c619 100644 --- a/sys/whisper/generate_cuda.go +++ b/sys/whisper/generate_cuda.go @@ -1,10 +1,11 @@ //go:build cuda + package whisper /////////////////////////////////////////////////////////////////////////////// // CGO /* -#cgo arm64 pkg-config: cuda-12.2 cublas-12.2 cudart-12.2 +#cgo arm64 pkg-config: cuda-12.6 cublas-12.6 cudart-12.6 */ import "C" From 2d9112373b8f5853379a05bfcb29724233aafcb2 Mon Sep 17 00:00:00 2001 From: David Thorpe Date: Thu, 8 Aug 2024 10:00:24 +0200 Subject: [PATCH 07/21] Updated --- etc/Dockerfile | 1 - sys/whisper/generate_cuda.go | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/etc/Dockerfile b/etc/Dockerfile index b1b6976..59061f9 100644 --- a/etc/Dockerfile +++ b/etc/Dockerfile @@ -30,7 +30,6 @@ COPY . . # Make whisper-server ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH} ENV GGML_CUDA=1 -ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig:${PKG_CONFIG_PATH} RUN make -j$(nproc) # Setup runtime container diff --git a/sys/whisper/generate_cuda.go b/sys/whisper/generate_cuda.go index 7a2c619..085d327 100644 --- a/sys/whisper/generate_cuda.go +++ b/sys/whisper/generate_cuda.go @@ -6,6 +6,7 @@ package whisper // CGO /* -#cgo arm64 pkg-config: cuda-12.6 cublas-12.6 cudart-12.6 +#cgo pkg-config: cuda-12.6 cublas-12.6 cudart-12.6 +#cgo arm64 ldflags: -L/usr/local/cuda/lib64/stubs -lcuda */ import "C" From 80e13adf8294894134055d54ffad8af646f5d98f Mon Sep 17 00:00:00 2001 From: David Thorpe Date: Thu, 8 Aug 2024 10:02:13 +0200 Subject: [PATCH 08/21] Updated --- sys/whisper/generate_cuda.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/whisper/generate_cuda.go b/sys/whisper/generate_cuda.go index 085d327..096a53b 100644 --- a/sys/whisper/generate_cuda.go +++ b/sys/whisper/generate_cuda.go @@ -7,6 +7,6 @@ package whisper /* #cgo pkg-config: cuda-12.6 cublas-12.6 cudart-12.6 -#cgo arm64 ldflags: -L/usr/local/cuda/lib64/stubs -lcuda +#cgo LDFLAGS: -L/usr/local/cuda/lib64/stubs -lcuda */ import "C" From 6d8b4f20c2ce7d038b8d5811aa39bf52679284b4 Mon Sep 17 00:00:00 2001 From: David Thorpe Date: Thu, 8 Aug 2024 10:18:07 +0200 Subject: [PATCH 09/21] Updated dockerfile --- cmd/whisper/main.go | 2 +- etc/Dockerfile | 10 +++++++--- etc/entrypoint.sh | 17 +++++++++++++++++ 3 files changed, 25 insertions(+), 4 deletions(-) create mode 100644 etc/entrypoint.sh diff --git a/cmd/whisper/main.go b/cmd/whisper/main.go index c058800..45c52f7 100644 --- a/cmd/whisper/main.go +++ b/cmd/whisper/main.go @@ -28,7 +28,7 @@ type CLI struct { Globals Models ModelsCmd `cmd:"models" help:"List models"` Download DownloadCmd `cmd:"download" help:"Download a model"` - Server ServerCmd `cmd:"models" help:"Run the whisper server"` + Server ServerCmd `cmd:"server" help:"Run the whisper server"` } func main() { diff --git a/etc/Dockerfile b/etc/Dockerfile index 59061f9..9987541 100644 --- a/etc/Dockerfile +++ b/etc/Dockerfile @@ -35,7 +35,11 @@ RUN make -j$(nproc) # Setup runtime container FROM ${BASE_RUN_CONTAINER} AS runtime COPY --from=build --chmod=755 /app/build/whisper /usr/local/bin/whisper +COPY --from=build /app/build/whisper /usr/local/bin/whisper +COPY --chmod=755 etc/entrypoint.sh . -# Expose -ENTRYPOINT [ "/usr/local/bin/whisper" ] -CMD [ "server", "--dir=/data" ] +# Entrypoint when running the server +ENTRYPOINT [ "/entrypoint.sh" ] +STOPSIGNAL SIGQUIT +EXPOSE 80 +CMD [ "/usr/local/bin/whisper", "-dir", "/data", "-listen", ":80", "server" ] diff --git a/etc/entrypoint.sh b/etc/entrypoint.sh new file mode 100644 index 0000000..4033ec2 --- /dev/null +++ b/etc/entrypoint.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +if [ -z "$1" ]; then + echo "No command specified" + exit 1 +fi + +# Create the /alloc/logs folder if it doesn't exist +install -d -m 0755 /alloc/logs || exit 1 + +# Create the persistent data folder if it doesn't exist +install -d -m 0755 /data || exit 1 + +# Run the command +set -e +umask 022 +exec "$@" From cce82187b692315fa493af80d1223820582e785c Mon Sep 17 00:00:00 2001 From: David Thorpe Date: Sat, 10 Aug 2024 09:27:26 +0200 Subject: [PATCH 10/21] Added transcribe and delete commands --- Makefile | 10 ++-- README.md | 13 +++-- cmd/server/main.go | 2 +- cmd/whisper/delete.go | 14 ++++++ cmd/whisper/download.go | 8 +++- cmd/whisper/main.go | 16 +++++-- cmd/whisper/models.go | 11 ++++- cmd/whisper/server.go | 11 +---- cmd/whisper/transcribe.go | 71 ++++++++++++++++++++++++++++ cmd/whisper/version.go | 37 +++++++++++++++ pkg/{whisper => }/version/version.go | 0 pkg/whisper/api/register.go | 10 +++- pkg/whisper/schema/segment.go | 8 ++-- third_party/whisper.cpp | 2 +- 14 files changed, 182 insertions(+), 31 deletions(-) create mode 100644 cmd/whisper/delete.go create mode 100644 cmd/whisper/transcribe.go create mode 100644 cmd/whisper/version.go rename pkg/{whisper => }/version/version.go (100%) diff --git a/Makefile b/Makefile index f610673..89d51f7 100644 --- a/Makefile +++ b/Makefile @@ -16,11 +16,11 @@ BUILD_DIR := build # Build flags BUILD_MODULE := $(shell cat go.mod | head -1 | cut -d ' ' -f 2) -BUILD_LD_FLAGS += -X $(BUILD_MODULE)/pkg/whisper/version.GitSource=${BUILD_MODULE} -BUILD_LD_FLAGS += -X $(BUILD_MODULE)/pkg/whisper/version.GitTag=$(shell git describe --tags --always) -BUILD_LD_FLAGS += -X $(BUILD_MODULE)/pkg/whisper/version.GitBranch=$(shell git name-rev HEAD --name-only --always) -BUILD_LD_FLAGS += -X $(BUILD_MODULE)/pkg/whisper/version.GitHash=$(shell git rev-parse HEAD) -BUILD_LD_FLAGS += -X $(BUILD_MODULE)/pkg/whisper/version.GoBuildTime=$(shell date -u '+%Y-%m-%dT%H:%M:%SZ') +BUILD_LD_FLAGS += -X $(BUILD_MODULE)/pkg/version.GitSource=${BUILD_MODULE} +BUILD_LD_FLAGS += -X $(BUILD_MODULE)/pkg/version.GitTag=$(shell git describe --tags --always) +BUILD_LD_FLAGS += -X $(BUILD_MODULE)/pkg/version.GitBranch=$(shell git name-rev HEAD --name-only --always) +BUILD_LD_FLAGS += -X $(BUILD_MODULE)/pkg/version.GitHash=$(shell git rev-parse HEAD) +BUILD_LD_FLAGS += -X $(BUILD_MODULE)/pkg/version.GoBuildTime=$(shell date -u '+%Y-%m-%dT%H:%M:%SZ') BUILD_FLAGS = -ldflags "-s -w $(BUILD_LD_FLAGS)" # If GGML_CUDA is set, then add a cuda tag for the go ${BUILD FLAGS} diff --git a/README.md b/README.md index cb3ce10..aaa0832 100755 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ Speech-to-Text in golang. This is an early development version. -* `cmd` contains an OpenAI-API compatible server +* `cmd` contains an OpenAI-API compatible service * `pkg` contains the `whisper` service and client * `sys` contains the `whisper` bindings to the `whisper.cpp` library * `third_party` is a submodule for the whisper.cpp source @@ -11,6 +11,7 @@ Speech-to-Text in golang. This is an early development version. (Note: Docker images are not created yet - this is some forward planning!) +You can either run the whisper service as a CLI command or in a docker container. There are docker images for arm64 and amd64 (Intel). The arm64 image is built for Jetson GPU support specifically, but it will also run on Raspberry Pi's. @@ -19,14 +20,15 @@ In order to utilize a NVIDIA GPU, you'll need to install the A docker volume should be created called "whisper" can be used for storing the Whisper language models. You can see which models are available to download locally [here](https://huggingface.co/ggerganov/whisper.cpp). -The following command will run the server on port 8080: + +The following command will run the server on port 8080 for an NVIDIA GPU: ```bash docker run \ --name whisper-server --rm \ --runtime nvidia --gpus all \ # When using a NVIDIA GPU -v whisper:/models -p 8080:8080 -e WHISPER_DATA=/models \ - ghcr.io/mutablelogic/go-whisper:latest + ghcr.io/mutablelogic/go-whisper ``` If you include a `-debug` flag at the end, you'll get more verbose output. The API is then @@ -92,6 +94,11 @@ The following `Makefile` targets can be used: See all the other targets in the `Makefile` for more information. +## Developing + +The `cmd/examples` directory contains a simple example of how to use the `whisper` package +in your own code. + ## Status Still in development. See this [issue](https://github.com/mutablelogic/go-whisper/issues/1) for diff --git a/cmd/server/main.go b/cmd/server/main.go index 68b77ee..3ba131e 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -14,8 +14,8 @@ import ( context "github.com/mutablelogic/go-server/pkg/context" httpserver "github.com/mutablelogic/go-server/pkg/httpserver" whisper "github.com/mutablelogic/go-whisper" + version "github.com/mutablelogic/go-whisper/pkg/version" api "github.com/mutablelogic/go-whisper/pkg/whisper/api" - version "github.com/mutablelogic/go-whisper/pkg/whisper/version" ) func main() { diff --git a/cmd/whisper/delete.go b/cmd/whisper/delete.go new file mode 100644 index 0000000..5f5ed04 --- /dev/null +++ b/cmd/whisper/delete.go @@ -0,0 +1,14 @@ +package main + +// Packages + +type DeleteCmd struct { + Model string `arg:"" help:"Model id to delete"` +} + +func (cmd *DeleteCmd) Run(ctx *Globals) error { + if err := ctx.service.DeleteModelById(cmd.Model); err != nil { + return err + } + return ModelsCmd{}.Run(ctx) +} diff --git a/cmd/whisper/download.go b/cmd/whisper/download.go index ccfa6b6..c7ba8f8 100644 --- a/cmd/whisper/download.go +++ b/cmd/whisper/download.go @@ -2,6 +2,7 @@ package main import ( "log" + "time" // Packages "github.com/djthorpe/go-tablewriter" @@ -12,8 +13,13 @@ type DownloadCmd struct { } func (cmd *DownloadCmd) Run(ctx *Globals) error { + t := time.Now() model, err := ctx.service.DownloadModel(ctx.ctx, cmd.Model, func(curBytes, totalBytes uint64) { - log.Printf("Downloaded %d of %d bytes", curBytes, totalBytes) + if time.Since(t) > time.Second { + pct := float64(curBytes) / float64(totalBytes) * 100 + log.Printf("Downloaded %.0f%%", pct) + t = time.Now() + } }) if err != nil { return err diff --git a/cmd/whisper/main.go b/cmd/whisper/main.go index 45c52f7..33d2044 100644 --- a/cmd/whisper/main.go +++ b/cmd/whisper/main.go @@ -2,6 +2,7 @@ package main import ( "context" + "log" "os" "path/filepath" "syscall" @@ -26,9 +27,12 @@ type Globals struct { type CLI struct { Globals - Models ModelsCmd `cmd:"models" help:"List models"` - Download DownloadCmd `cmd:"download" help:"Download a model"` - Server ServerCmd `cmd:"server" help:"Run the whisper server"` + Transcribe TranscribeCmd `cmd:"transcribe" help:"Transcribe from file"` + Models ModelsCmd `cmd:"models" help:"List models"` + Download DownloadCmd `cmd:"download" help:"Download a model"` + Delete DeleteCmd `cmd:"delete" help:"Delete a model"` + Server ServerCmd `cmd:"server" help:"Run the whisper service"` + Version VersionCmd `cmd:"version" help:"Print version information"` } func main() { @@ -53,7 +57,11 @@ func main() { ) // Create a whisper server - set options - opts := []whisper.Opt{} + opts := []whisper.Opt{ + whisper.OptLog(func(line string) { + log.Println(line) + }), + } if cli.Globals.Debug { opts = append(opts, whisper.OptDebug()) } diff --git a/cmd/whisper/models.go b/cmd/whisper/models.go index 77549cb..fa7f4e3 100644 --- a/cmd/whisper/models.go +++ b/cmd/whisper/models.go @@ -1,12 +1,19 @@ package main import ( + "errors" + // Packages "github.com/djthorpe/go-tablewriter" ) type ModelsCmd struct{} -func (*ModelsCmd) Run(ctx *Globals) error { - return ctx.writer.Write(ctx.service.ListModels(), tablewriter.OptHeader()) +func (ModelsCmd) Run(ctx *Globals) error { + models := ctx.service.ListModels() + if len(models) == 0 { + return errors.New("no models found") + } else { + return ctx.writer.Write(ctx.service.ListModels(), tablewriter.OptHeader()) + } } diff --git a/cmd/whisper/server.go b/cmd/whisper/server.go index bdce752..842905f 100644 --- a/cmd/whisper/server.go +++ b/cmd/whisper/server.go @@ -2,7 +2,6 @@ package main import ( "log" - "net/http" // Packages "github.com/mutablelogic/go-server/pkg/httpserver" @@ -15,17 +14,11 @@ type ServerCmd struct { } func (cmd *ServerCmd) Run(ctx *Globals) error { - // Create a mux for serving requests, then register the endpoints with the mux - mux := http.NewServeMux() - - // Register the endpoints - api.RegisterEndpoints(cmd.Endpoint, mux, ctx.service) - // Create a new HTTP server - log.Println("List address", cmd.Listen) + log.Println("Listen address", cmd.Listen) server, err := httpserver.Config{ Listen: cmd.Listen, - Router: mux, + Router: api.RegisterEndpoints(cmd.Endpoint, ctx.service, nil), }.New() if err != nil { return err diff --git a/cmd/whisper/transcribe.go b/cmd/whisper/transcribe.go new file mode 100644 index 0000000..cf66d76 --- /dev/null +++ b/cmd/whisper/transcribe.go @@ -0,0 +1,71 @@ +package main + +import ( + "os" + "time" + + // Packages + + whisper "github.com/mutablelogic/go-whisper" + "github.com/mutablelogic/go-whisper/pkg/whisper/schema" + segmenter "github.com/mutablelogic/go-whisper/pkg/whisper/segmenter" + task "github.com/mutablelogic/go-whisper/pkg/whisper/task" + + // Namespace imports + . "github.com/djthorpe/go-errors" +) + +type TranscribeCmd struct { + Model string `arg:"" help:"Model to use"` + Path string `arg:"" help:"Path to audio file"` + Language string `flag:"language" help:"Language to transcribe"` + Format string `flag:"format" help:"Output format" default:"text" enum:"text,srt,vtt,json"` +} + +func (cmd *TranscribeCmd) Run(ctx *Globals) error { + // Get the model + model := ctx.service.GetModelById(cmd.Model) + if model == nil { + return ErrNotFound.With(cmd.Model) + } + + // Open the audio file + f, err := os.Open(cmd.Path) + if err != nil { + return err + } + defer f.Close() + + // Create a segmenter - read segments based on requested segment size + segmenter, err := segmenter.New(f, 0, whisper.SampleRate) + if err != nil { + return err + } + defer segmenter.Close() + + // Perform the transcription + return ctx.service.WithModel(model, func(taskctx *task.Context) error { + // Transcribe + taskctx.SetTranslate(false) + taskctx.SetDiarize(false) + + // Set language + if cmd.Language != "" { + if err := taskctx.SetLanguage(cmd.Language); err != nil { + return err + } + } + + // Read samples and transcribe them + if err := segmenter.Decode(ctx.ctx, func(ts time.Duration, buf []float32) error { + // Perform the transcription, return any errors + return taskctx.Transcribe(ctx.ctx, ts, buf, func(segment *schema.Segment) { + ctx.writer.Write(segment) + }) + }); err != nil { + return err + } + + return nil + }) +} diff --git a/cmd/whisper/version.go b/cmd/whisper/version.go new file mode 100644 index 0000000..f5abdaa --- /dev/null +++ b/cmd/whisper/version.go @@ -0,0 +1,37 @@ +package main + +import ( + "runtime" + + // Packages + "github.com/mutablelogic/go-whisper/pkg/version" +) + +type VersionCmd struct{} + +func (cmd *VersionCmd) Run(ctx *Globals) error { + type kv struct { + Key string `json:"name"` + Value string `json:"value" writer:",width:60"` + } + var metadata = []kv{} + if version.GitSource != "" { + metadata = append(metadata, kv{"source", version.GitSource}) + } + if version.GitBranch != "" { + metadata = append(metadata, kv{"branch", version.GitBranch}) + } + if version.GitTag != "" { + metadata = append(metadata, kv{"tag", version.GitTag}) + } + if version.GitHash != "" { + metadata = append(metadata, kv{"hash", version.GitHash}) + } + if version.GoBuildTime != "" { + metadata = append(metadata, kv{"build time", version.GoBuildTime}) + } + metadata = append(metadata, kv{"go version", runtime.Version()}) + metadata = append(metadata, kv{"os", runtime.GOOS + "/" + runtime.GOARCH}) + + return ctx.writer.Write(metadata) +} diff --git a/pkg/whisper/version/version.go b/pkg/version/version.go similarity index 100% rename from pkg/whisper/version/version.go rename to pkg/version/version.go diff --git a/pkg/whisper/api/register.go b/pkg/whisper/api/register.go index 623b7a6..614f88d 100644 --- a/pkg/whisper/api/register.go +++ b/pkg/whisper/api/register.go @@ -12,7 +12,12 @@ import ( ///////////////////////////////////////////////////////////////////////////// // PUBLIC METHODS -func RegisterEndpoints(base string, mux *http.ServeMux, whisper *whisper.Whisper) { +func RegisterEndpoints(base string, whisper *whisper.Whisper, mux *http.ServeMux) *http.ServeMux { + // Create a new router + if mux == nil { + mux = http.NewServeMux() + } + // Health: GET /v1/health // returns an empty OK response mux.HandleFunc(joinPath(base, "health"), func(w http.ResponseWriter, r *http.Request) { @@ -118,6 +123,9 @@ func RegisterEndpoints(base string, mux *http.ServeMux, whisper *whisper.Whisper httpresponse.Error(w, http.StatusMethodNotAllowed) } })*/ + + // Return mux + return mux } ///////////////////////////////////////////////////////////////////////////// diff --git a/pkg/whisper/schema/segment.go b/pkg/whisper/schema/segment.go index 7a346ae..c1cf0e0 100644 --- a/pkg/whisper/schema/segment.go +++ b/pkg/whisper/schema/segment.go @@ -8,10 +8,10 @@ import ( // TYPES type Segment struct { - Id int32 `json:"id"` - Start Timestamp `json:"start"` - End Timestamp `json:"end"` - Text string `json:"text"` + Id int32 `json:"id" writer:",right,width:5"` + Start Timestamp `json:"start" writer:",right,width:5"` + End Timestamp `json:"end" writer:",right,width:5"` + Text string `json:"text" writer:",wrap,width:70"` SpeakerTurn bool `json:"speaker_turn,omitempty"` // TODO } diff --git a/third_party/whisper.cpp b/third_party/whisper.cpp index 6739eb8..81c999f 160000 --- a/third_party/whisper.cpp +++ b/third_party/whisper.cpp @@ -1 +1 @@ -Subproject commit 6739eb83c3ca5cf40d24c6fe8442a761a1eb6248 +Subproject commit 81c999fe0a25c4ebbfef10ed8a1a96df9cfc10fd From 56e83cee19cf8d6ddf727533c308f29f4e8ed4c7 Mon Sep 17 00:00:00 2001 From: David Thorpe Date: Sat, 10 Aug 2024 09:31:26 +0200 Subject: [PATCH 11/21] Updated --- cmd/cli/delete.go | 13 ----- cmd/cli/download.go | 34 ----------- cmd/cli/main.go | 87 ---------------------------- cmd/cli/models.go | 13 ----- cmd/cli/transcribe.go | 42 -------------- cmd/cli/translate.go | 42 -------------- cmd/server/flags.go | 45 --------------- cmd/server/main.go | 115 -------------------------------------- cmd/whisper/delete.go | 2 - cmd/whisper/transcribe.go | 7 +-- 10 files changed, 3 insertions(+), 397 deletions(-) delete mode 100644 cmd/cli/delete.go delete mode 100644 cmd/cli/download.go delete mode 100644 cmd/cli/main.go delete mode 100644 cmd/cli/models.go delete mode 100644 cmd/cli/transcribe.go delete mode 100644 cmd/cli/translate.go delete mode 100644 cmd/server/flags.go delete mode 100644 cmd/server/main.go diff --git a/cmd/cli/delete.go b/cmd/cli/delete.go deleted file mode 100644 index 26a1c13..0000000 --- a/cmd/cli/delete.go +++ /dev/null @@ -1,13 +0,0 @@ -package main - -type DeleteCmd struct { - Id string `arg:"" required:"" help:"Model Identifier" type:"string"` -} - -func (cmd *DeleteCmd) Run(ctx *Globals) error { - err := ctx.api.DeleteModel(ctx.ctx, cmd.Id) - if err != nil { - return err - } - return nil -} diff --git a/cmd/cli/download.go b/cmd/cli/download.go deleted file mode 100644 index 512d713..0000000 --- a/cmd/cli/download.go +++ /dev/null @@ -1,34 +0,0 @@ -package main - -import "fmt" - -type DownloadCmd struct { - Path string `arg:"" required:"" help:"Model Path" type:"string"` -} - -func (cmd *DownloadCmd) Run(ctx *Globals) error { - type progress struct { - Status string `json:"status" writer:",width:60"` - Total int64 `json:"total,omitempty" writer:",right,width:12,"` - Completed int64 `json:"completed,omitempty" writer:",right,width:12,"` - Percent string `json:"percent,omitempty" writer:",width:8,right"` - } - model, err := ctx.api.DownloadModel(ctx.ctx, cmd.Path, func(status string, cur, total int64) { - percent := "" - if cur < total { - percent = fmt.Sprintf("%.1f%%", float32(cur)*100/float32(total)) - } - if status != "" { - ctx.writer.Write(progress{ - Status: status, - Completed: cur, - Total: total, - Percent: percent, - }) - } - }) - if err != nil { - return err - } - return ctx.writer.Write(model) -} diff --git a/cmd/cli/main.go b/cmd/cli/main.go deleted file mode 100644 index cf4051e..0000000 --- a/cmd/cli/main.go +++ /dev/null @@ -1,87 +0,0 @@ -package main - -import ( - "context" - "os" - "path/filepath" - "syscall" - - // Packages - kong "github.com/alecthomas/kong" - tablewriter "github.com/djthorpe/go-tablewriter" - client "github.com/mutablelogic/go-client" - ctx "github.com/mutablelogic/go-server/pkg/context" - api "github.com/mutablelogic/go-whisper/pkg/whisper/client" -) - -type Globals struct { - Debug bool `name:"debug" help:"Enable debug output"` - Endpoint string `name:"endpoint" help:"HTTP endpoint for whisper service (set WHISPER_URL environment variable to use as default)" default:"${WHISPER_URL}"` - - // Writer, client and context - writer *tablewriter.Writer - api *api.Client - ctx context.Context -} - -type CLI struct { - Globals - Models ModelsCmd `cmd:"models" help:"List available models"` - Delete DeleteCmd `cmd:"delete" help:"Delete a model"` - Download DownloadCmd `cmd:"download" help:"Download a model"` - Transcribe TranscribeCmd `cmd:"transcribe" help:"Transcribe a file"` - Translate TranslateCmd `cmd:"translate" help:"Translate a file"` -} - -func main() { - // The name of the executable - name, err := os.Executable() - if err != nil { - panic(err) - } else { - name = filepath.Base(name) - } - - // Create a cli parser - cli := CLI{} - cmd := kong.Parse(&cli, - kong.Name(name), - kong.Description("speech transcription and translation service"), - kong.UsageOnError(), - kong.ConfigureHelp(kong.HelpOptions{Compact: true}), - kong.Vars{ - "WHISPER_URL": endpointEnvOrDefault(), - }, - ) - - // Create a whisper client - opts := []client.ClientOpt{} - if cli.Globals.Debug { - opts = append(opts, client.OptTrace(os.Stderr, true)) - } - client, err := api.New(cli.Globals.Endpoint, opts...) - if err != nil { - cmd.FatalIfErrorf(err) - } else { - cli.Globals.api = client - } - - // Create a tablewriter object with text output - writer := tablewriter.New(os.Stdout, tablewriter.OptOutputText()) - cli.Globals.writer = writer - - // Create a context - cli.Globals.ctx = ctx.ContextForSignal(os.Interrupt, syscall.SIGQUIT) - - // Run the command - if err := cmd.Run(&cli.Globals); err != nil { - cmd.FatalIfErrorf(err) - } -} - -func endpointEnvOrDefault() string { - if endpoint := os.Getenv("WHISPER_URL"); endpoint != "" { - return endpoint - } - return "http://localhost:8080/v1" -} diff --git a/cmd/cli/models.go b/cmd/cli/models.go deleted file mode 100644 index 5179120..0000000 --- a/cmd/cli/models.go +++ /dev/null @@ -1,13 +0,0 @@ -package main - -import tablewriter "github.com/djthorpe/go-tablewriter" - -type ModelsCmd struct{} - -func (_ *ModelsCmd) Run(ctx *Globals) error { - models, err := ctx.api.ListModels(ctx.ctx) - if err != nil { - return err - } - return ctx.writer.Write(models, tablewriter.OptHeader()) -} diff --git a/cmd/cli/transcribe.go b/cmd/cli/transcribe.go deleted file mode 100644 index 699098d..0000000 --- a/cmd/cli/transcribe.go +++ /dev/null @@ -1,42 +0,0 @@ -package main - -import ( - "os" - "time" - - "github.com/djthorpe/go-tablewriter" - "github.com/mutablelogic/go-whisper/pkg/whisper/client" -) - -type TranscribeCmd struct { - Model string `arg:"" required:"" help:"Model Identifier" type:"string"` - Path string `arg:"" required:"" help:"Audio File Path" type:"string"` - Language string `flag:"language" help:"Source Language" type:"string"` - SegmentSize *time.Duration `flag:"segment-size" help:"Segment Size" type:"duration"` - ResponseFmt *string `flag:"format" help:"Response Format" enum:"json,verbose_json,text,vtt,srt"` -} - -func (cmd *TranscribeCmd) Run(ctx *Globals) error { - r, err := os.Open(cmd.Path) - if err != nil { - return err - } - defer r.Close() - - opts := []client.Opt{} - if cmd.Language != "" { - opts = append(opts, client.OptLanguage(cmd.Language)) - } - if cmd.SegmentSize != nil { - opts = append(opts, client.OptSegmentSize(*cmd.SegmentSize)) - } - if cmd.ResponseFmt != nil { - opts = append(opts, client.OptResponseFormat(*cmd.ResponseFmt)) - } - - transcription, err := ctx.api.Transcribe(ctx.ctx, cmd.Model, r, opts...) - if err != nil { - return err - } - return ctx.writer.Write(transcription, tablewriter.OptHeader()) -} diff --git a/cmd/cli/translate.go b/cmd/cli/translate.go deleted file mode 100644 index c6786cd..0000000 --- a/cmd/cli/translate.go +++ /dev/null @@ -1,42 +0,0 @@ -package main - -import ( - "os" - "time" - - "github.com/djthorpe/go-tablewriter" - "github.com/mutablelogic/go-whisper/pkg/whisper/client" -) - -type TranslateCmd struct { - Model string `arg:"" required:"" help:"Model Identifier" type:"string"` - Path string `arg:"" required:"" help:"Audio File Path" type:"string"` - Language string `flag:"language" required:"" help:"Target Language" type:"string"` - SegmentSize *time.Duration `flag:"segment-size" help:"Segment Size" type:"duration"` - ResponseFmt *string `flag:"format" help:"Response Format" enum:"json,verbose_json,text,vtt,srt"` -} - -func (cmd *TranslateCmd) Run(ctx *Globals) error { - r, err := os.Open(cmd.Path) - if err != nil { - return err - } - defer r.Close() - - opts := []client.Opt{} - if cmd.Language != "" { - opts = append(opts, client.OptLanguage(cmd.Language)) - } - if cmd.SegmentSize != nil { - opts = append(opts, client.OptSegmentSize(*cmd.SegmentSize)) - } - if cmd.ResponseFmt != nil { - opts = append(opts, client.OptResponseFormat(*cmd.ResponseFmt)) - } - - transcription, err := ctx.api.Translate(ctx.ctx, cmd.Model, r, opts...) - if err != nil { - return err - } - return ctx.writer.Write(transcription, tablewriter.OptHeader()) -} diff --git a/cmd/server/flags.go b/cmd/server/flags.go deleted file mode 100644 index 9ef6408..0000000 --- a/cmd/server/flags.go +++ /dev/null @@ -1,45 +0,0 @@ -package main - -import ( - "flag" - "os" -) - -type Flags struct { - *flag.FlagSet - - // Flag parameters - endpoint *string - listen *string - dir *string - debug *bool -} - -func NewFlags(name string, args []string) (*Flags, error) { - flags := &Flags{ - FlagSet: flag.NewFlagSet(name, flag.ContinueOnError), - } - flags.endpoint = flags.String("endpoint", "/v1", "HTTP endpoint") - flags.listen = flags.String("listen", "127.0.0.1:8080", "HTTP Listen address") - flags.dir = flags.String("dir", "${WHISPER_DATA}", "Model data directory") - flags.debug = flags.Bool("debug", false, "Output additional debug information") - - // Parse flags and return any error - return flags, flags.Parse(args) -} - -func (f *Flags) Listen() string { - return *f.listen -} - -func (f *Flags) Dir() string { - return os.ExpandEnv(*f.dir) -} - -func (f *Flags) Endpoint() string { - return *f.endpoint -} - -func (f *Flags) Debug() bool { - return *f.debug -} diff --git a/cmd/server/main.go b/cmd/server/main.go deleted file mode 100644 index 3ba131e..0000000 --- a/cmd/server/main.go +++ /dev/null @@ -1,115 +0,0 @@ -package main - -import ( - "flag" - "log" - "net/http" - "os" - "path/filepath" - "strconv" - "strings" - "syscall" - - // Packages - context "github.com/mutablelogic/go-server/pkg/context" - httpserver "github.com/mutablelogic/go-server/pkg/httpserver" - whisper "github.com/mutablelogic/go-whisper" - version "github.com/mutablelogic/go-whisper/pkg/version" - api "github.com/mutablelogic/go-whisper/pkg/whisper/api" -) - -func main() { - // Parse the command line flags - name := filepath.Base(os.Args[0]) - flags, err := NewFlags(name, os.Args[1:]) - if err != nil { - if err != flag.ErrHelp { - log.Println(err) - } - os.Exit(-1) - } - - // Determine the directory for models - dir := flags.Dir() - if dir == "" { - cacheDir, err := os.UserCacheDir() - if err != nil { - log.Println(err) - os.Exit(-1) - } - dir = filepath.Join(cacheDir, name) - } - - // Create the directory for models - if err := os.MkdirAll(dir, 0755); err != nil { - log.Println(err) - os.Exit(-1) - } - - // Print version - if version.GitSource != "" { - log.Println(name, version.GitSource) - } else { - log.Println(name) - } - if version.GitTag != "" { - log.Println("Version:", version.GitTag) - } - - // Create a whisper service - log.Println("Storing models at", dir) - opts := []whisper.Opt{ - whisper.OptLog(func(line string) { - log.Println(line) - }), - } - if flags.Debug() { - opts = append(opts, whisper.OptDebug()) - } - whisper, err := whisper.New(dir, opts...) - if err != nil { - log.Println(err) - os.Exit(-2) - } - - // Display models - var models []string - for _, model := range whisper.ListModels() { - models = append(models, strconv.Quote(model.Id)) - } - if len(models) > 0 { - log.Println("Models:", strings.Join(models, ", ")) - } else { - log.Println("No models") - } - - // Create a mux for serving requests, then register the endpoints with the mux - mux := http.NewServeMux() - api.RegisterEndpoints(flags.Endpoint(), mux, whisper) - - // Create a new HTTP server - log.Println("Listen address", flags.Listen()) - server, err := httpserver.Config{ - Listen: flags.Listen(), - Router: mux, - }.New() - if err != nil { - log.Println(err) - os.Exit(-2) - } - - // Run the server until CTRL+C - log.Println("Press CTRL+C to exit") - ctx := context.ContextForSignal(os.Interrupt, syscall.SIGQUIT) - if err := server.Run(ctx); err != nil { - log.Println(err) - os.Exit(-3) - } - - // Release whisper resources - log.Println("Terminating") - if err := whisper.Close(); err != nil { - log.Println(err) - os.Exit(-4) - } -} diff --git a/cmd/whisper/delete.go b/cmd/whisper/delete.go index 5f5ed04..a7832fa 100644 --- a/cmd/whisper/delete.go +++ b/cmd/whisper/delete.go @@ -1,7 +1,5 @@ package main -// Packages - type DeleteCmd struct { Model string `arg:"" help:"Model id to delete"` } diff --git a/cmd/whisper/transcribe.go b/cmd/whisper/transcribe.go index cf66d76..7b42ed9 100644 --- a/cmd/whisper/transcribe.go +++ b/cmd/whisper/transcribe.go @@ -5,9 +5,8 @@ import ( "time" // Packages - whisper "github.com/mutablelogic/go-whisper" - "github.com/mutablelogic/go-whisper/pkg/whisper/schema" + schema "github.com/mutablelogic/go-whisper/pkg/whisper/schema" segmenter "github.com/mutablelogic/go-whisper/pkg/whisper/segmenter" task "github.com/mutablelogic/go-whisper/pkg/whisper/task" @@ -18,8 +17,8 @@ import ( type TranscribeCmd struct { Model string `arg:"" help:"Model to use"` Path string `arg:"" help:"Path to audio file"` - Language string `flag:"language" help:"Language to transcribe"` - Format string `flag:"format" help:"Output format" default:"text" enum:"text,srt,vtt,json"` + Language string `flag:"language" help:"Language to transcribe" default:"auto"` + Format string `flag:"format" help:"Output format" default:"text" enum:"json,verbose_json,text,vtt,srt"` } func (cmd *TranscribeCmd) Run(ctx *Globals) error { From 22f176d5e36f65e049de55182a91518535f348d9 Mon Sep 17 00:00:00 2001 From: David Thorpe Date: Sat, 10 Aug 2024 09:32:58 +0200 Subject: [PATCH 12/21] Updated dockerfile --- etc/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/etc/Dockerfile b/etc/Dockerfile index 9987541..56ce8d0 100644 --- a/etc/Dockerfile +++ b/etc/Dockerfile @@ -34,6 +34,8 @@ RUN make -j$(nproc) # Setup runtime container FROM ${BASE_RUN_CONTAINER} AS runtime +RUN apt-get -y update \ + && apt-get -y install libavcodec libavdevice libavfilter libavutil libswscale libswresample COPY --from=build --chmod=755 /app/build/whisper /usr/local/bin/whisper COPY --from=build /app/build/whisper /usr/local/bin/whisper COPY --chmod=755 etc/entrypoint.sh . From 74d9679cdfc1226a0ab89feff3821881c6bfe244 Mon Sep 17 00:00:00 2001 From: David Thorpe Date: Sat, 10 Aug 2024 09:38:01 +0200 Subject: [PATCH 13/21] Updated dockerfile --- etc/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/etc/Dockerfile b/etc/Dockerfile index 56ce8d0..3111128 100644 --- a/etc/Dockerfile +++ b/etc/Dockerfile @@ -17,7 +17,7 @@ RUN apt-get -y update \ && apt-get -y install software-properties-common curl \ && add-apt-repository -y ppa:ubuntuhandbook1/ffmpeg6 \ && apt-get -y update \ - && apt-get -y install libavcodec-dev libavdevice-dev libavfilter-dev libavutil-dev libswscale-dev libswresample-dev + && apt-get -y install libavformat-dev libavcodec-dev libavdevice-dev libavfilter-dev libavutil-dev libswscale-dev libswresample-dev # Install go RUN curl -sL https://golang.org/dl/go${GO_VERSION}.${OS}-${ARCH}.tar.gz | tar -C /usr/local -xz @@ -35,7 +35,7 @@ RUN make -j$(nproc) # Setup runtime container FROM ${BASE_RUN_CONTAINER} AS runtime RUN apt-get -y update \ - && apt-get -y install libavcodec libavdevice libavfilter libavutil libswscale libswresample + && apt-get -y install libavformat60 libavcodec60 libavdevice60 libavfilter9 libavutil58 libswscale7 libswresample4 COPY --from=build --chmod=755 /app/build/whisper /usr/local/bin/whisper COPY --from=build /app/build/whisper /usr/local/bin/whisper COPY --chmod=755 etc/entrypoint.sh . From 75687659f8b75bbbd4b7f8a80a01a48954b4c3c3 Mon Sep 17 00:00:00 2001 From: David Thorpe Date: Sat, 10 Aug 2024 09:40:15 +0200 Subject: [PATCH 14/21] Updated dockerfile --- etc/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etc/Dockerfile b/etc/Dockerfile index 3111128..ee293fb 100644 --- a/etc/Dockerfile +++ b/etc/Dockerfile @@ -35,7 +35,7 @@ RUN make -j$(nproc) # Setup runtime container FROM ${BASE_RUN_CONTAINER} AS runtime RUN apt-get -y update \ - && apt-get -y install libavformat60 libavcodec60 libavdevice60 libavfilter9 libavutil58 libswscale7 libswresample4 + && apt-get -y install libavformat58 libavcodec58 libavdevice58 libavfilter7 libavutil56 libswscale5 libswresample3 COPY --from=build --chmod=755 /app/build/whisper /usr/local/bin/whisper COPY --from=build /app/build/whisper /usr/local/bin/whisper COPY --chmod=755 etc/entrypoint.sh . From d68fb405ba8c054f2206de8def268a4889d1d662 Mon Sep 17 00:00:00 2001 From: David Thorpe Date: Sat, 10 Aug 2024 10:16:26 +0200 Subject: [PATCH 15/21] Updated locations --- Makefile | 2 +- cmd/whisper/server.go | 2 +- cmd/whisper/transcribe.go | 6 +++--- pkg/{whisper => }/api/logging.go | 0 pkg/{whisper => }/api/models.go | 2 +- pkg/{whisper => }/api/register.go | 0 pkg/{whisper => }/api/transcribe.go | 6 +++--- pkg/{whisper => }/client/client.go | 2 +- pkg/{whisper => }/client/opts.go | 0 pkg/{whisper => }/pool/contextpool.go | 4 ++-- pkg/{whisper => }/pool/contextpool_test.go | 4 ++-- pkg/{whisper => }/pool/pool.go | 0 pkg/{whisper => }/pool/pool_test.go | 2 +- pkg/{whisper => }/schema/model.go | 0 pkg/{whisper => }/schema/segment.go | 0 pkg/{whisper => }/schema/transcription.go | 0 pkg/segmenter/doc.go | 2 ++ pkg/{whisper => }/segmenter/segmenter.go | 16 ++++++++++++---- pkg/{whisper => }/segmenter/segmenter_test.go | 4 ++-- pkg/store/doc.go | 2 ++ pkg/{whisper/model => store}/store.go | 4 ++-- pkg/{whisper/model => store}/writer.go | 2 +- pkg/{whisper => }/task/context.go | 2 +- pkg/{whisper => }/task/transcription.go | 2 +- whisper.go | 14 +++++++------- whisper_test.go | 2 +- 26 files changed, 46 insertions(+), 34 deletions(-) rename pkg/{whisper => }/api/logging.go (100%) rename pkg/{whisper => }/api/models.go (98%) rename pkg/{whisper => }/api/register.go (100%) rename pkg/{whisper => }/api/transcribe.go (98%) rename pkg/{whisper => }/client/client.go (98%) rename pkg/{whisper => }/client/opts.go (100%) rename pkg/{whisper => }/pool/contextpool.go (95%) rename pkg/{whisper => }/pool/contextpool_test.go (86%) rename pkg/{whisper => }/pool/pool.go (100%) rename pkg/{whisper => }/pool/pool_test.go (95%) rename pkg/{whisper => }/schema/model.go (100%) rename pkg/{whisper => }/schema/segment.go (100%) rename pkg/{whisper => }/schema/transcription.go (100%) create mode 100644 pkg/segmenter/doc.go rename pkg/{whisper => }/segmenter/segmenter.go (80%) rename pkg/{whisper => }/segmenter/segmenter_test.go (89%) create mode 100644 pkg/store/doc.go rename pkg/{whisper/model => store}/store.go (98%) rename pkg/{whisper/model => store}/writer.go (98%) rename pkg/{whisper => }/task/context.go (98%) rename pkg/{whisper => }/task/transcription.go (97%) diff --git a/Makefile b/Makefile index 89d51f7..d731752 100644 --- a/Makefile +++ b/Makefile @@ -58,7 +58,7 @@ test: generate libwhisper libggml @echo "Running tests (sys)" @PKG_CONFIG_PATH=${ROOT_PATH}/${BUILD_DIR} ${GO} test -v ./sys/whisper/... @echo "Running tests (pkg)" - @PKG_CONFIG_PATH=${ROOT_PATH}/${BUILD_DIR} ${GO} test -v ./pkg/whisper/... + @PKG_CONFIG_PATH=${ROOT_PATH}/${BUILD_DIR} ${GO} test -v ./pkg/... # Build whisper-static-library libwhisper: submodule diff --git a/cmd/whisper/server.go b/cmd/whisper/server.go index 842905f..1e95c9d 100644 --- a/cmd/whisper/server.go +++ b/cmd/whisper/server.go @@ -5,7 +5,7 @@ import ( // Packages "github.com/mutablelogic/go-server/pkg/httpserver" - "github.com/mutablelogic/go-whisper/pkg/whisper/api" + "github.com/mutablelogic/go-whisper/pkg/api" ) type ServerCmd struct { diff --git a/cmd/whisper/transcribe.go b/cmd/whisper/transcribe.go index 7b42ed9..73a84af 100644 --- a/cmd/whisper/transcribe.go +++ b/cmd/whisper/transcribe.go @@ -6,9 +6,9 @@ import ( // Packages whisper "github.com/mutablelogic/go-whisper" - schema "github.com/mutablelogic/go-whisper/pkg/whisper/schema" - segmenter "github.com/mutablelogic/go-whisper/pkg/whisper/segmenter" - task "github.com/mutablelogic/go-whisper/pkg/whisper/task" + schema "github.com/mutablelogic/go-whisper/pkg/schema" + segmenter "github.com/mutablelogic/go-whisper/pkg/segmenter" + task "github.com/mutablelogic/go-whisper/pkg/task" // Namespace imports . "github.com/djthorpe/go-errors" diff --git a/pkg/whisper/api/logging.go b/pkg/api/logging.go similarity index 100% rename from pkg/whisper/api/logging.go rename to pkg/api/logging.go diff --git a/pkg/whisper/api/models.go b/pkg/api/models.go similarity index 98% rename from pkg/whisper/api/models.go rename to pkg/api/models.go index ebae153..73a48c2 100644 --- a/pkg/whisper/api/models.go +++ b/pkg/api/models.go @@ -12,7 +12,7 @@ import ( "github.com/mutablelogic/go-server/pkg/httprequest" "github.com/mutablelogic/go-server/pkg/httpresponse" "github.com/mutablelogic/go-whisper" - "github.com/mutablelogic/go-whisper/pkg/whisper/schema" + "github.com/mutablelogic/go-whisper/pkg/schema" ) /////////////////////////////////////////////////////////////////////////////// diff --git a/pkg/whisper/api/register.go b/pkg/api/register.go similarity index 100% rename from pkg/whisper/api/register.go rename to pkg/api/register.go diff --git a/pkg/whisper/api/transcribe.go b/pkg/api/transcribe.go similarity index 98% rename from pkg/whisper/api/transcribe.go rename to pkg/api/transcribe.go index c64b08d..c5cc13a 100644 --- a/pkg/whisper/api/transcribe.go +++ b/pkg/api/transcribe.go @@ -13,9 +13,9 @@ import ( "github.com/mutablelogic/go-server/pkg/httprequest" "github.com/mutablelogic/go-server/pkg/httpresponse" "github.com/mutablelogic/go-whisper" - "github.com/mutablelogic/go-whisper/pkg/whisper/schema" - "github.com/mutablelogic/go-whisper/pkg/whisper/segmenter" - "github.com/mutablelogic/go-whisper/pkg/whisper/task" + "github.com/mutablelogic/go-whisper/pkg/schema" + "github.com/mutablelogic/go-whisper/pkg/segmenter" + "github.com/mutablelogic/go-whisper/pkg/task" // Namespace imports . "github.com/djthorpe/go-errors" diff --git a/pkg/whisper/client/client.go b/pkg/client/client.go similarity index 98% rename from pkg/whisper/client/client.go rename to pkg/client/client.go index 2a7dfc5..8f02d61 100644 --- a/pkg/whisper/client/client.go +++ b/pkg/client/client.go @@ -11,7 +11,7 @@ import ( "github.com/mutablelogic/go-client" "github.com/mutablelogic/go-client/pkg/multipart" "github.com/mutablelogic/go-server/pkg/httprequest" - "github.com/mutablelogic/go-whisper/pkg/whisper/schema" + "github.com/mutablelogic/go-whisper/pkg/schema" ) /////////////////////////////////////////////////////////////////////////////// diff --git a/pkg/whisper/client/opts.go b/pkg/client/opts.go similarity index 100% rename from pkg/whisper/client/opts.go rename to pkg/client/opts.go diff --git a/pkg/whisper/pool/contextpool.go b/pkg/pool/contextpool.go similarity index 95% rename from pkg/whisper/pool/contextpool.go rename to pkg/pool/contextpool.go index 15fe9a2..a57a4bc 100644 --- a/pkg/whisper/pool/contextpool.go +++ b/pkg/pool/contextpool.go @@ -5,8 +5,8 @@ import ( "fmt" // Packages - schema "github.com/mutablelogic/go-whisper/pkg/whisper/schema" - task "github.com/mutablelogic/go-whisper/pkg/whisper/task" + schema "github.com/mutablelogic/go-whisper/pkg/schema" + task "github.com/mutablelogic/go-whisper/pkg/task" // Namespace imports . "github.com/djthorpe/go-errors" diff --git a/pkg/whisper/pool/contextpool_test.go b/pkg/pool/contextpool_test.go similarity index 86% rename from pkg/whisper/pool/contextpool_test.go rename to pkg/pool/contextpool_test.go index 7a7eb47..1209179 100644 --- a/pkg/whisper/pool/contextpool_test.go +++ b/pkg/pool/contextpool_test.go @@ -4,8 +4,8 @@ import ( "testing" // Packages - pool "github.com/mutablelogic/go-whisper/pkg/whisper/pool" - schema "github.com/mutablelogic/go-whisper/pkg/whisper/schema" + pool "github.com/mutablelogic/go-whisper/pkg/pool" + schema "github.com/mutablelogic/go-whisper/pkg/schema" ) func Test_contextpool_001(t *testing.T) { diff --git a/pkg/whisper/pool/pool.go b/pkg/pool/pool.go similarity index 100% rename from pkg/whisper/pool/pool.go rename to pkg/pool/pool.go diff --git a/pkg/whisper/pool/pool_test.go b/pkg/pool/pool_test.go similarity index 95% rename from pkg/whisper/pool/pool_test.go rename to pkg/pool/pool_test.go index 73155ee..da2e0c3 100644 --- a/pkg/whisper/pool/pool_test.go +++ b/pkg/pool/pool_test.go @@ -5,7 +5,7 @@ import ( "testing" // Packages - "github.com/mutablelogic/go-whisper/pkg/whisper/pool" + "github.com/mutablelogic/go-whisper/pkg/pool" ) type Item struct { diff --git a/pkg/whisper/schema/model.go b/pkg/schema/model.go similarity index 100% rename from pkg/whisper/schema/model.go rename to pkg/schema/model.go diff --git a/pkg/whisper/schema/segment.go b/pkg/schema/segment.go similarity index 100% rename from pkg/whisper/schema/segment.go rename to pkg/schema/segment.go diff --git a/pkg/whisper/schema/transcription.go b/pkg/schema/transcription.go similarity index 100% rename from pkg/whisper/schema/transcription.go rename to pkg/schema/transcription.go diff --git a/pkg/segmenter/doc.go b/pkg/segmenter/doc.go new file mode 100644 index 0000000..9a8cc82 --- /dev/null +++ b/pkg/segmenter/doc.go @@ -0,0 +1,2 @@ +/* segmenter package provides a segmenter for audio files and streams */ +package segmenter diff --git a/pkg/whisper/segmenter/segmenter.go b/pkg/segmenter/segmenter.go similarity index 80% rename from pkg/whisper/segmenter/segmenter.go rename to pkg/segmenter/segmenter.go index 6eef889..e3dbc0b 100644 --- a/pkg/whisper/segmenter/segmenter.go +++ b/pkg/segmenter/segmenter.go @@ -14,6 +14,8 @@ import ( . "github.com/djthorpe/go-errors" ) +// A segmenter reads audio samples from a reader and segments them into +// fixed-size chunks. The segmenter can be used to process audio samples type Segmenter struct { ts time.Duration sample_rate int @@ -29,8 +31,12 @@ type SegmentFunc func(time.Duration, []float32) error ////////////////////////////////////////////////////////////////////////////// // LIFECYCLE -// Create a new segmenter for "NumSamples" with a reader r -// If NumSamples is zero then no segmenting is performed +// Create a new segmenter for a specific "dur" duration of samples with +// a reader r. If dur is zero then no segmenting is performed, the whole +// audio file is read, which could cause some memory issues. +// The sample rate is the number of samples per second. +// At the moment, the audio format is auto-detected, but there should be +// a way to specify the audio format. func New(r io.Reader, dur time.Duration, sample_rate int) (*Segmenter, error) { segmenter := new(Segmenter) @@ -75,9 +81,11 @@ func (s *Segmenter) Close() error { ////////////////////////////////////////////////////////////////////////////// // PUBLIC METHODS -// TODO: segments are output through a callback, with the samples and a timestamp +// Segments are output through a callback, with the samples and a timestamp // TODO: we could do some basic silence and voice detection to segment to ensure // we don't overtax the CPU/GPU with silence and non-speech +// TODO: We whould be able to select the audio stream to use. At the moment +// the "best" audio stream is used, based on ffmpeg heuristic. func (s *Segmenter) Decode(ctx context.Context, fn SegmentFunc) error { // Check input parameters if fn == nil { @@ -129,7 +137,7 @@ func (s *Segmenter) Decode(ctx context.Context, fn SegmentFunc) error { return nil } -// Return the duration from the file or timestamp +// Return the file duration from the file or timestamp func (s *Segmenter) Duration() time.Duration { if s.reader != nil { return s.reader.Duration() diff --git a/pkg/whisper/segmenter/segmenter_test.go b/pkg/segmenter/segmenter_test.go similarity index 89% rename from pkg/whisper/segmenter/segmenter_test.go rename to pkg/segmenter/segmenter_test.go index e4bc2c4..79aabcc 100644 --- a/pkg/whisper/segmenter/segmenter_test.go +++ b/pkg/segmenter/segmenter_test.go @@ -7,11 +7,11 @@ import ( "time" // Packages - segmenter "github.com/mutablelogic/go-whisper/pkg/whisper/segmenter" + segmenter "github.com/mutablelogic/go-whisper/pkg/segmenter" assert "github.com/stretchr/testify/assert" ) -const SAMPLE = "../../../samples/OlivierL.wav" +const SAMPLE = "../../samples/OlivierL.wav" func Test_segmenter_001(t *testing.T) { assert := assert.New(t) diff --git a/pkg/store/doc.go b/pkg/store/doc.go new file mode 100644 index 0000000..4bbd955 --- /dev/null +++ b/pkg/store/doc.go @@ -0,0 +1,2 @@ +/* store implements a model store which allows downloading models from a remote server */ +package store diff --git a/pkg/whisper/model/store.go b/pkg/store/store.go similarity index 98% rename from pkg/whisper/model/store.go rename to pkg/store/store.go index 5478d5e..56e911b 100644 --- a/pkg/whisper/model/store.go +++ b/pkg/store/store.go @@ -1,4 +1,4 @@ -package model +package store import ( "context" @@ -12,7 +12,7 @@ import ( "sync" // Packages - schema "github.com/mutablelogic/go-whisper/pkg/whisper/schema" + schema "github.com/mutablelogic/go-whisper/pkg/schema" whisper "github.com/mutablelogic/go-whisper/sys/whisper" // Namespace imports diff --git a/pkg/whisper/model/writer.go b/pkg/store/writer.go similarity index 98% rename from pkg/whisper/model/writer.go rename to pkg/store/writer.go index 5d9b82a..1cb206c 100644 --- a/pkg/whisper/model/writer.go +++ b/pkg/store/writer.go @@ -1,4 +1,4 @@ -package model +package store import ( "io" diff --git a/pkg/whisper/task/context.go b/pkg/task/context.go similarity index 98% rename from pkg/whisper/task/context.go rename to pkg/task/context.go index 98b84b6..423ae6e 100644 --- a/pkg/whisper/task/context.go +++ b/pkg/task/context.go @@ -9,7 +9,7 @@ import ( "time" // Packages - schema "github.com/mutablelogic/go-whisper/pkg/whisper/schema" + schema "github.com/mutablelogic/go-whisper/pkg/schema" whisper "github.com/mutablelogic/go-whisper/sys/whisper" // Namespace imports diff --git a/pkg/whisper/task/transcription.go b/pkg/task/transcription.go similarity index 97% rename from pkg/whisper/task/transcription.go rename to pkg/task/transcription.go index a72a9bd..0442464 100644 --- a/pkg/whisper/task/transcription.go +++ b/pkg/task/transcription.go @@ -8,7 +8,7 @@ import ( "time" // Packages - "github.com/mutablelogic/go-whisper/pkg/whisper/schema" + "github.com/mutablelogic/go-whisper/pkg/schema" "github.com/mutablelogic/go-whisper/sys/whisper" ) diff --git a/whisper.go b/whisper.go index f82604b..dcd1e6c 100644 --- a/whisper.go +++ b/whisper.go @@ -10,10 +10,10 @@ import ( // Packages ffmpeg "github.com/mutablelogic/go-media/pkg/ffmpeg" - model "github.com/mutablelogic/go-whisper/pkg/whisper/model" - pool "github.com/mutablelogic/go-whisper/pkg/whisper/pool" - schema "github.com/mutablelogic/go-whisper/pkg/whisper/schema" - task "github.com/mutablelogic/go-whisper/pkg/whisper/task" + pool "github.com/mutablelogic/go-whisper/pkg/pool" + schema "github.com/mutablelogic/go-whisper/pkg/schema" + store "github.com/mutablelogic/go-whisper/pkg/store" + task "github.com/mutablelogic/go-whisper/pkg/task" whisper "github.com/mutablelogic/go-whisper/sys/whisper" // Namespace imports @@ -26,7 +26,7 @@ import ( // Whisper represents a whisper service for running transcription and translation type Whisper struct { pool *pool.ContextPool - store *model.Store + store *store.Store } ////////////////////////////////////////////////////////////////////////////// @@ -61,7 +61,7 @@ func New(path string, opt ...Opt) (*Whisper, error) { // Create a new whisper service w := new(Whisper) - if store, err := model.NewStore(path, extModel, defaultModelUrl); err != nil { + if store, err := store.NewStore(path, extModel, defaultModelUrl); err != nil { return nil, err } else { w.store = store @@ -112,7 +112,7 @@ func (w *Whisper) Close() error { func (w *Whisper) MarshalJSON() ([]byte, error) { return json.Marshal(struct { - Store *model.Store `json:"store"` + Store *store.Store `json:"store"` Pool *pool.ContextPool `json:"pool"` }{ Store: w.store, diff --git a/whisper_test.go b/whisper_test.go index 5659930..68059a2 100644 --- a/whisper_test.go +++ b/whisper_test.go @@ -9,7 +9,7 @@ import ( // Packages wav "github.com/go-audio/wav" whisper "github.com/mutablelogic/go-whisper" - task "github.com/mutablelogic/go-whisper/pkg/whisper/task" + task "github.com/mutablelogic/go-whisper/pkg/task" assert "github.com/stretchr/testify/assert" // Namespace imports From c6ba5260d2d4f082491a4ed0b46e8f7121349693 Mon Sep 17 00:00:00 2001 From: David Thorpe Date: Sat, 10 Aug 2024 10:28:42 +0200 Subject: [PATCH 16/21] Updates --- Makefile | 2 ++ doc/notes.md | 7 ------- 2 files changed, 2 insertions(+), 7 deletions(-) delete mode 100644 doc/notes.md diff --git a/Makefile b/Makefile index d731752..097beec 100644 --- a/Makefile +++ b/Makefile @@ -59,6 +59,8 @@ test: generate libwhisper libggml @PKG_CONFIG_PATH=${ROOT_PATH}/${BUILD_DIR} ${GO} test -v ./sys/whisper/... @echo "Running tests (pkg)" @PKG_CONFIG_PATH=${ROOT_PATH}/${BUILD_DIR} ${GO} test -v ./pkg/... + @echo "Running tests (whisper)" + @PKG_CONFIG_PATH=${ROOT_PATH}/${BUILD_DIR} ${GO} test -v ./ # Build whisper-static-library libwhisper: submodule diff --git a/doc/notes.md b/doc/notes.md deleted file mode 100644 index 974a49d..0000000 --- a/doc/notes.md +++ /dev/null @@ -1,7 +0,0 @@ - if (!whisper_is_multilingual(ctx)) { - if (params.language != "en" || params.translate) { - params.language = "en"; - params.translate = false; - fprintf(stderr, "%s: WARNING: model is not multilingual, ignoring language and translation options\n", __func__); - } - } From d649ff7fc9e9420c8655b52e806c222f8b8e7232 Mon Sep 17 00:00:00 2001 From: David Thorpe Date: Sat, 10 Aug 2024 11:13:33 +0200 Subject: [PATCH 17/21] Updated dockerfile --- README.md | 3 +-- etc/Dockerfile | 5 ++++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index aaa0832..8bce1e9 100755 --- a/README.md +++ b/README.md @@ -96,8 +96,7 @@ See all the other targets in the `Makefile` for more information. ## Developing -The `cmd/examples` directory contains a simple example of how to use the `whisper` package -in your own code. +TODO ## Status diff --git a/etc/Dockerfile b/etc/Dockerfile index ee293fb..5b45d58 100644 --- a/etc/Dockerfile +++ b/etc/Dockerfile @@ -35,7 +35,10 @@ RUN make -j$(nproc) # Setup runtime container FROM ${BASE_RUN_CONTAINER} AS runtime RUN apt-get -y update \ - && apt-get -y install libavformat58 libavcodec58 libavdevice58 libavfilter7 libavutil56 libswscale5 libswresample3 + && apt-get -y install software-properties-common \ + && add-apt-repository -y ppa:ubuntuhandbook1/ffmpeg6 \ + && apt-get -y update \ + && apt-get -y install libavformat60 libavcodec60 libavdevice60 libavfilter9 libavutil58 libswscale7 libswresample4 COPY --from=build --chmod=755 /app/build/whisper /usr/local/bin/whisper COPY --from=build /app/build/whisper /usr/local/bin/whisper COPY --chmod=755 etc/entrypoint.sh . From 886d60af7ec86cb48db9c790a060cd0148823117 Mon Sep 17 00:00:00 2001 From: David Thorpe Date: Sat, 10 Aug 2024 11:16:20 +0200 Subject: [PATCH 18/21] Updated --- etc/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/etc/Dockerfile b/etc/Dockerfile index 5b45d58..30626d5 100644 --- a/etc/Dockerfile +++ b/etc/Dockerfile @@ -14,7 +14,7 @@ ARG ARCH ARG OS RUN apt-get -y update \ - && apt-get -y install software-properties-common curl \ + && apt-get -y install software-properties-common curl libgomp1 \ && add-apt-repository -y ppa:ubuntuhandbook1/ffmpeg6 \ && apt-get -y update \ && apt-get -y install libavformat-dev libavcodec-dev libavdevice-dev libavfilter-dev libavutil-dev libswscale-dev libswresample-dev @@ -35,10 +35,10 @@ RUN make -j$(nproc) # Setup runtime container FROM ${BASE_RUN_CONTAINER} AS runtime RUN apt-get -y update \ - && apt-get -y install software-properties-common \ + && apt-get -y install software-properties-common libgomp1 \ && add-apt-repository -y ppa:ubuntuhandbook1/ffmpeg6 \ && apt-get -y update \ - && apt-get -y install libavformat60 libavcodec60 libavdevice60 libavfilter9 libavutil58 libswscale7 libswresample4 + && apt-get -y install libavformat60 libavcodec60 libavdevice60 libavfilter9 libavutil58 libswscale7 libswresample4 COPY --from=build --chmod=755 /app/build/whisper /usr/local/bin/whisper COPY --from=build /app/build/whisper /usr/local/bin/whisper COPY --chmod=755 etc/entrypoint.sh . From fbcd0cfdb9fdc639da2e0013ed510d2d16f132f2 Mon Sep 17 00:00:00 2001 From: David Thorpe Date: Sat, 10 Aug 2024 12:09:42 +0200 Subject: [PATCH 19/21] Updated segmenter --- pkg/segmenter/segmenter.go | 17 ++++------- pkg/segmenter/silence.go | 61 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 12 deletions(-) create mode 100644 pkg/segmenter/silence.go diff --git a/pkg/segmenter/segmenter.go b/pkg/segmenter/segmenter.go index e3dbc0b..2102529 100644 --- a/pkg/segmenter/segmenter.go +++ b/pkg/segmenter/segmenter.go @@ -31,13 +31,15 @@ type SegmentFunc func(time.Duration, []float32) error ////////////////////////////////////////////////////////////////////////////// // LIFECYCLE -// Create a new segmenter for a specific "dur" duration of samples with -// a reader r. If dur is zero then no segmenting is performed, the whole +// Create a new segmenter with a reader r which segments raw audio of 'dur' +// length. If dur is zero then no segmenting is performed, the whole // audio file is read, which could cause some memory issues. +// // The sample rate is the number of samples per second. +// // At the moment, the audio format is auto-detected, but there should be // a way to specify the audio format. -func New(r io.Reader, dur time.Duration, sample_rate int) (*Segmenter, error) { +func NewReader(r io.Reader, dur time.Duration, sample_rate int) (*Segmenter, error) { segmenter := new(Segmenter) // Check arguments @@ -136,12 +138,3 @@ func (s *Segmenter) Decode(ctx context.Context, fn SegmentFunc) error { // Return success return nil } - -// Return the file duration from the file or timestamp -func (s *Segmenter) Duration() time.Duration { - if s.reader != nil { - return s.reader.Duration() - } else { - return s.ts + time.Duration(len(s.buf))*time.Second/time.Duration(s.sample_rate) - } -} diff --git a/pkg/segmenter/silence.go b/pkg/segmenter/silence.go new file mode 100644 index 0000000..2559d99 --- /dev/null +++ b/pkg/segmenter/silence.go @@ -0,0 +1,61 @@ +package segmenter + +import ( + "math" + "time" + // Packages +) + +//////////////////////////////////////////////////////////////////////////////// +// TYPES + +// silence is a silence detector and audio booster for raw samples +// typical values are gain=20, threshold=0.003, timeout=2s +type silence struct { + Gain float64 // gain in decibels + Threshold float64 // threshold for silence + Timeout time.Duration // duration of silence before stopping recording + + // When we last started recording + t time.Time + r bool +} + +//////////////////////////////////////////////////////////////////////////////// +// PUBLIC METHODS + +// Increase gain and compute energy of a frame of audio data, return true +// if the frame of data should be recorded, false if it should be ignored +func (s *silence) Process(data []float32) bool { + energy := process(data, float32(math.Pow(10, s.Gain/20.0))) + + // Compute the gain + if energy > s.Threshold { + if s.t.IsZero() { + // Transition from silence to recording + s.r = true + } + s.t = time.Now() + } else if !s.t.IsZero() { + if time.Since(s.t) > s.Timeout { + // Transition from recording to silence + s.t = time.Time{} + s.r = false + } + } + return s.r +} + +//////////////////////////////////////////////////////////////////////////////// +// PRIVATE METHODS + +// Increase gain and compute energy of a frame of audio data, return the +// energy of the frame of data +func process(data []float32, gain float32) float64 { + energy := float64(0) + for i := 0; i < len(data); i++ { + data[i] *= gain + energy += float64(data[i]) * float64(data[i]) + } + return energy / math.Sqrt(float64(len(data))) +} From 3e5ad1185321097a155842d16bc6b247057e462c Mon Sep 17 00:00:00 2001 From: David Thorpe Date: Sat, 10 Aug 2024 13:24:33 +0200 Subject: [PATCH 20/21] Updated whisper --- Makefile | 7 ++- cmd/api/main.go | 101 ++++++++++++++++++++++++++++++++++++++ cmd/api/ping.go | 10 ++++ cmd/whisper/transcribe.go | 3 +- etc/Dockerfile | 8 +-- pkg/api/transcribe.go | 2 +- pkg/client/client.go | 7 +++ 7 files changed, 131 insertions(+), 7 deletions(-) create mode 100644 cmd/api/main.go create mode 100644 cmd/api/ping.go diff --git a/Makefile b/Makefile index 097beec..182613f 100644 --- a/Makefile +++ b/Makefile @@ -30,7 +30,7 @@ ifeq ($(GGML_CUDA),1) endif # Targets -all: whisper +all: whisper api # Generate the pkg-config files generate: mkdir go-tidy @@ -42,6 +42,11 @@ whisper: mkdir generate go-tidy libwhisper libggml @echo "Building whisper" @PKG_CONFIG_PATH=${ROOT_PATH}/${BUILD_DIR} ${GO} build ${BUILD_FLAGS} -o ${BUILD_DIR}/whisper ./cmd/whisper +# Make api +api: mkdir go-tidy + @echo "Building api" + @${GO} build ${BUILD_FLAGS} -o ${BUILD_DIR}/api ./cmd/api + # Build docker container docker: docker-dep submodule @echo build docker image: ${BUILD_TAG} for ${OS}/${ARCH} diff --git a/cmd/api/main.go b/cmd/api/main.go new file mode 100644 index 0000000..9bb6ee5 --- /dev/null +++ b/cmd/api/main.go @@ -0,0 +1,101 @@ +package main + +import ( + "context" + "os" + "path/filepath" + "syscall" + + // Packages + kong "github.com/alecthomas/kong" + tablewriter "github.com/djthorpe/go-tablewriter" + opt "github.com/mutablelogic/go-client" + ctx "github.com/mutablelogic/go-server/pkg/context" + client "github.com/mutablelogic/go-whisper/pkg/client" +) + +//////////////////////////////////////////////////////////////////////////////// +// TYPES + +type Globals struct { + Url string `name:"url" help:"URL of whisper service (can be set from WHISPER_URL env)" default:"${WHISPER_URL}"` + Debug bool `name:"debug" help:"Enable debug output"` + + // Writer, service and context + writer *tablewriter.Writer + client *client.Client + ctx context.Context +} + +type CLI struct { + Globals + + Ping PingCmd `cmd help:"Ping the whisper service"` +} + +//////////////////////////////////////////////////////////////////////////////// +// GLOBALS + +const ( + defaultEndpoint = "http://localhost:8080/api/v1" +) + +//////////////////////////////////////////////////////////////////////////////// +// MAIN + +func main() { + // The name of the executable + name, err := os.Executable() + if err != nil { + panic(err) + } else { + name = filepath.Base(name) + } + + // Create a cli parser + cli := CLI{} + cmd := kong.Parse(&cli, + kong.Name(name), + kong.Description("speech transcription and translation service client"), + kong.UsageOnError(), + kong.ConfigureHelp(kong.HelpOptions{Compact: true}), + kong.Vars{ + "WHISPER_URL": envOrDefault("WHISPER_URL", defaultEndpoint), + }, + ) + + // Set whisper client options + opts := []opt.ClientOpt{} + if cli.Globals.Debug { + opts = append(opts, opt.OptTrace(os.Stderr, true)) + } + + // Create a whisper client + client, err := client.New(cli.Globals.Url, opts...) + if err != nil { + cmd.FatalIfErrorf(err) + return + } else { + cli.Globals.client = client + } + + // Create a tablewriter object with text output + writer := tablewriter.New(os.Stdout, tablewriter.OptOutputText()) + cli.Globals.writer = writer + + // Create a context + cli.Globals.ctx = ctx.ContextForSignal(os.Interrupt, syscall.SIGQUIT) + + // Run the command + if err := cmd.Run(&cli.Globals); err != nil { + cmd.FatalIfErrorf(err) + } +} + +func envOrDefault(name, def string) string { + if value := os.Getenv(name); value != "" { + return value + } else { + return def + } +} diff --git a/cmd/api/ping.go b/cmd/api/ping.go new file mode 100644 index 0000000..0acb1f7 --- /dev/null +++ b/cmd/api/ping.go @@ -0,0 +1,10 @@ +package main + +type PingCmd struct{} + +func (cmd *PingCmd) Run(ctx *Globals) error { + if err := ctx.client.Ping(ctx.ctx); err != nil { + return err + } + return ctx.writer.Write("OK") +} diff --git a/cmd/whisper/transcribe.go b/cmd/whisper/transcribe.go index 73a84af..2f1dc4e 100644 --- a/cmd/whisper/transcribe.go +++ b/cmd/whisper/transcribe.go @@ -36,7 +36,8 @@ func (cmd *TranscribeCmd) Run(ctx *Globals) error { defer f.Close() // Create a segmenter - read segments based on requested segment size - segmenter, err := segmenter.New(f, 0, whisper.SampleRate) + // TODO + segmenter, err := segmenter.NewReader(f, 0, whisper.SampleRate) if err != nil { return err } diff --git a/etc/Dockerfile b/etc/Dockerfile index 30626d5..9843538 100644 --- a/etc/Dockerfile +++ b/etc/Dockerfile @@ -1,4 +1,4 @@ -ARG BASE_TAG=0.0.10-4-g6421fd2 +ARG BASE_TAG=1.0.0 ARG BASE_DEV_CONTAINER=ghcr.io/mutablelogic/cuda-dev:${BASE_TAG} ARG BASE_RUN_CONTAINER=ghcr.io/mutablelogic/cuda-rt:${BASE_TAG} ARG CUDA_DOCKER_ARCH=all @@ -14,7 +14,7 @@ ARG ARCH ARG OS RUN apt-get -y update \ - && apt-get -y install software-properties-common curl libgomp1 \ + && apt-get -y install software-properties-common curl \ && add-apt-repository -y ppa:ubuntuhandbook1/ffmpeg6 \ && apt-get -y update \ && apt-get -y install libavformat-dev libavcodec-dev libavdevice-dev libavfilter-dev libavutil-dev libswscale-dev libswresample-dev @@ -35,12 +35,12 @@ RUN make -j$(nproc) # Setup runtime container FROM ${BASE_RUN_CONTAINER} AS runtime RUN apt-get -y update \ - && apt-get -y install software-properties-common libgomp1 \ + && apt-get -y install software-properties-common \ && add-apt-repository -y ppa:ubuntuhandbook1/ffmpeg6 \ && apt-get -y update \ && apt-get -y install libavformat60 libavcodec60 libavdevice60 libavfilter9 libavutil58 libswscale7 libswresample4 COPY --from=build --chmod=755 /app/build/whisper /usr/local/bin/whisper -COPY --from=build /app/build/whisper /usr/local/bin/whisper +COPY --from=build --chmod=755 /app/build/api /usr/local/bin/api COPY --chmod=755 etc/entrypoint.sh . # Entrypoint when running the server diff --git a/pkg/api/transcribe.go b/pkg/api/transcribe.go index c5cc13a..7c60816 100644 --- a/pkg/api/transcribe.go +++ b/pkg/api/transcribe.go @@ -101,7 +101,7 @@ func TranscribeFile(ctx context.Context, service *whisper.Whisper, w http.Respon defer f.Close() // Create a segmenter - read segments based on requested segment size - segmenter, err := segmenter.New(f, req.SegmentDur(), whisper.SampleRate) + segmenter, err := segmenter.NewReader(f, req.SegmentDur(), whisper.SampleRate) if err != nil { httpresponse.Error(w, http.StatusBadRequest, err.Error()) return diff --git a/pkg/client/client.go b/pkg/client/client.go index 8f02d61..56595a7 100644 --- a/pkg/client/client.go +++ b/pkg/client/client.go @@ -34,6 +34,13 @@ func New(endpoint string, opts ...client.ClientOpt) (*Client, error) { } } +/////////////////////////////////////////////////////////////////////////////// +// PING + +func (c *Client) Ping(ctx context.Context) error { + return c.DoWithContext(ctx, client.MethodGet, nil, client.OptPath("health")) +} + /////////////////////////////////////////////////////////////////////////////// // MODELS From 317ef016eb3734c4d4cb6316d9ddb1e20d100337 Mon Sep 17 00:00:00 2001 From: David Thorpe Date: Sat, 10 Aug 2024 16:32:11 +0200 Subject: [PATCH 21/21] Updated api --- cmd/api/delete.go | 12 ++++++++++++ cmd/api/download.go | 22 ++++++++++++++++++++++ cmd/api/main.go | 5 ++++- cmd/api/models.go | 13 +++++++++++++ cmd/api/ping.go | 2 +- go.mod | 10 +++++----- go.sum | 20 ++++++++++---------- pkg/client/client.go | 26 +++++++++++++++++++++++--- 8 files changed, 90 insertions(+), 20 deletions(-) create mode 100644 cmd/api/delete.go create mode 100644 cmd/api/download.go create mode 100644 cmd/api/models.go diff --git a/cmd/api/delete.go b/cmd/api/delete.go new file mode 100644 index 0000000..5f92b12 --- /dev/null +++ b/cmd/api/delete.go @@ -0,0 +1,12 @@ +package main + +type DeleteCmd struct { + Model string `arg:"" name:"model" help:"Model to delete"` +} + +func (cmd *DeleteCmd) Run(ctx *Globals) error { + if err := ctx.client.DeleteModel(ctx.ctx, cmd.Model); err != nil { + return err + } + return nil +} diff --git a/cmd/api/download.go b/cmd/api/download.go new file mode 100644 index 0000000..03e0ba7 --- /dev/null +++ b/cmd/api/download.go @@ -0,0 +1,22 @@ +package main + +import ( + "fmt" + + "github.com/djthorpe/go-tablewriter" +) + +type DownloadCmd struct { + Model string `arg:"" name:"model" help:"Model to download (must end in .bin)"` +} + +func (cmd *DownloadCmd) Run(ctx *Globals) error { + model, err := ctx.client.DownloadModel(ctx.ctx, cmd.Model, func(status string, cur, total int64) { + pct := fmt.Sprintf("%02d%%", int(100*float64(cur)/float64(total))) + ctx.writer.Writeln(pct, status) + }) + if err != nil { + return err + } + return ctx.writer.Write(model, tablewriter.OptHeader()) +} diff --git a/cmd/api/main.go b/cmd/api/main.go index 9bb6ee5..ec06962 100644 --- a/cmd/api/main.go +++ b/cmd/api/main.go @@ -30,7 +30,10 @@ type Globals struct { type CLI struct { Globals - Ping PingCmd `cmd help:"Ping the whisper service"` + Ping PingCmd `cmd:"ping" help:"Ping the whisper service"` + Models ModelsCmd `cmd:"models" help:"List models"` + Download DownloadCmd `cmd:"download" help:"Download a model"` + Delete DeleteCmd `cmd:"delete" help:"Delete a model"` } //////////////////////////////////////////////////////////////////////////////// diff --git a/cmd/api/models.go b/cmd/api/models.go new file mode 100644 index 0000000..acc5a1e --- /dev/null +++ b/cmd/api/models.go @@ -0,0 +1,13 @@ +package main + +import "github.com/djthorpe/go-tablewriter" + +type ModelsCmd struct{} + +func (cmd *ModelsCmd) Run(ctx *Globals) error { + if models, err := ctx.client.ListModels(ctx.ctx); err != nil { + return err + } else { + return ctx.writer.Write(models, tablewriter.OptHeader()) + } +} diff --git a/cmd/api/ping.go b/cmd/api/ping.go index 0acb1f7..f8cbc31 100644 --- a/cmd/api/ping.go +++ b/cmd/api/ping.go @@ -6,5 +6,5 @@ func (cmd *PingCmd) Run(ctx *Globals) error { if err := ctx.client.Ping(ctx.ctx); err != nil { return err } - return ctx.writer.Write("OK") + return ctx.writer.Writeln("OK") } diff --git a/go.mod b/go.mod index 0daa3af..d29545b 100755 --- a/go.mod +++ b/go.mod @@ -5,10 +5,10 @@ go 1.22 require ( github.com/alecthomas/kong v0.9.0 github.com/djthorpe/go-errors v1.0.3 - github.com/djthorpe/go-tablewriter v0.0.8 + github.com/djthorpe/go-tablewriter v0.0.10 github.com/go-audio/wav v1.1.0 github.com/mutablelogic/go-client v1.0.9 - github.com/mutablelogic/go-media v1.6.11 + github.com/mutablelogic/go-media v1.6.12 github.com/mutablelogic/go-server v1.4.15 github.com/stretchr/testify v1.9.0 ) @@ -20,8 +20,8 @@ require ( github.com/mattn/go-runewidth v0.0.16 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/rivo/uniseg v0.4.7 // indirect - golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect - golang.org/x/sys v0.22.0 // indirect - golang.org/x/term v0.22.0 // indirect + golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa // indirect + golang.org/x/sys v0.24.0 // indirect + golang.org/x/term v0.23.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 2c38c5e..01e27aa 100644 --- a/go.sum +++ b/go.sum @@ -8,8 +8,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/djthorpe/go-errors v1.0.3 h1:GZeMPkC1mx2vteXLI/gvxZS0Ee9zxzwD1mcYyKU5jD0= github.com/djthorpe/go-errors v1.0.3/go.mod h1:HtfrZnMd6HsX75Mtbv9Qcnn0BqOrrFArvCaj3RMnZhY= -github.com/djthorpe/go-tablewriter v0.0.8 h1:uRhB9XVgK1n9tvVS7KMyxhxxGGtDvqC80toDTpW4DB4= -github.com/djthorpe/go-tablewriter v0.0.8/go.mod h1:NVBvytpL+6fHfCKn0+3lSi15/G3A1HWf2cLNeHg6YBg= +github.com/djthorpe/go-tablewriter v0.0.10 h1:/nL5NWJba4zrQoleIEyw4NwBYY9b3XTGM9EV+c+VWhU= +github.com/djthorpe/go-tablewriter v0.0.10/go.mod h1:NVBvytpL+6fHfCKn0+3lSi15/G3A1HWf2cLNeHg6YBg= github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4= github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs= github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA= @@ -26,8 +26,8 @@ github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6T github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/mutablelogic/go-client v1.0.9 h1:Eh4sjQOFDldP/L3IizqkcOD3WigZR+u1VaHTUM4ujYw= github.com/mutablelogic/go-client v1.0.9/go.mod h1:VLyB8j8IBJSK/FXvvqhmq93PRWDKkyLu8R7V2Vudb6A= -github.com/mutablelogic/go-media v1.6.11 h1:czwRvuWIaqDArZrHv0e7nEIjXJkCbnNWkrQzkTOs96w= -github.com/mutablelogic/go-media v1.6.11/go.mod h1:HulNT0yyH63a3FRlbuzNDakhOypYrmtFVkHEXZjDgAY= +github.com/mutablelogic/go-media v1.6.12 h1:+FaqSz6GpQduF4KEDdYMxTCdtkJFOOaMJ/4wd3L59wI= +github.com/mutablelogic/go-media v1.6.12/go.mod h1:HulNT0yyH63a3FRlbuzNDakhOypYrmtFVkHEXZjDgAY= github.com/mutablelogic/go-server v1.4.15 h1:jOvVdDmVK+PGCMBAk5atKHVonnccwy/b4dWwWFAOTso= github.com/mutablelogic/go-server v1.4.15/go.mod h1:9nenPAohKu8bFoRgwHJh+3s8h0kLFjUAb8KZvT1TQNU= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -37,14 +37,14 @@ github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8= -golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= +golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa h1:ELnwvuAXPNtPk1TJRuGkI9fDTwym6AYBu0qzT8AcHdI= +golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa/go.mod h1:akd2r19cwCdwSwWeIdzYQGa/EZZyqcOdwWiwj5L5eKQ= golang.org/x/image v0.18.0 h1:jGzIakQa/ZXI1I0Fxvaa9W7yP25TqT6cHIHn+6CqvSQ= golang.org/x/image v0.18.0/go.mod h1:4yyo5vMFQjVjUcVk4jEQcU9MGy/rulF5WvUILseCM2E= -golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= -golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.22.0 h1:BbsgPEJULsl2fV/AT3v15Mjva5yXKQDyKf+TbDz7QJk= -golang.org/x/term v0.22.0/go.mod h1:F3qCibpT5AMpCRfhfT53vVJwhLtIVHhB9XDjfFvnMI4= +golang.org/x/sys v0.24.0 h1:Twjiwq9dn6R1fQcyiK+wQyHWfaz/BJB+YIpzU/Cv3Xg= +golang.org/x/sys v0.24.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU= +golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/pkg/client/client.go b/pkg/client/client.go index 56595a7..6591310 100644 --- a/pkg/client/client.go +++ b/pkg/client/client.go @@ -2,6 +2,7 @@ package client import ( "context" + "errors" "io" "net/url" "os" @@ -86,9 +87,28 @@ func (c *Client) DownloadModel(ctx context.Context, path string, fn func(status client.OptPath("models"), client.OptQuery(query), client.OptNoTimeout(), - client.OptJsonStreamCallback(func(v any) error { - if v, ok := v.(*resp); ok && fn != nil { - fn(v.Status, v.Completed, v.Total) + client.OptTextStreamCallback(func(evt client.TextStreamEvent) error { + switch evt.Event { + case "progress": + var r resp + if err := evt.Json(&r); err != nil { + return err + } else { + fn(r.Status, r.Completed, r.Total) + } + case "error": + var errstr string + if evt.Event == "error" { + if err := evt.Json(&errstr); err != nil { + return err + } else { + return errors.New(errstr) + } + } + case "ok": + if err := evt.Json(&r); err != nil { + return err + } } return nil }),