Skip to content

Commit

Permalink
Make audio format consistent in decoder and reader (#33)
Browse files Browse the repository at this point in the history
* Make audio format consistent in decoder and reader

* fix test
  • Loading branch information
gBillal authored Feb 5, 2025
1 parent 9ac6440 commit 6a2bea3
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 36 deletions.
19 changes: 5 additions & 14 deletions c_src/xav/xav_reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -228,22 +228,13 @@ static int init_audio_converter(struct XavReader *xav_reader) {
}

enum AVSampleFormat out_sample_fmt;
if (strcmp(xav_reader->out_format, "u8") == 0) {
out_sample_fmt = AV_SAMPLE_FMT_U8;
} else if (strcmp(xav_reader->out_format, "s16") == 0) {
out_sample_fmt = AV_SAMPLE_FMT_S16;
} else if (strcmp(xav_reader->out_format, "s32") == 0) {
out_sample_fmt = AV_SAMPLE_FMT_S32;
} else if (strcmp(xav_reader->out_format, "s64") == 0) {
out_sample_fmt = AV_SAMPLE_FMT_S64;
} else if (strcmp(xav_reader->out_format, "f32") == 0) {
out_sample_fmt = AV_SAMPLE_FMT_FLT;
} else if (strcmp(xav_reader->out_format, "f64") == 0) {
out_sample_fmt = AV_SAMPLE_FMT_DBL;
} else if (strcmp(xav_reader->out_format, "nil") == 0) {
if (strcmp(xav_reader->out_format, "nil") == 0) {
out_sample_fmt = av_get_alt_sample_fmt(xav_reader->reader->c->sample_fmt, 0);
} else {
return -1;
out_sample_fmt = av_get_sample_fmt(xav_reader->out_format);
if (out_sample_fmt == AV_SAMPLE_FMT_NONE) {
return -1;
}
}

struct ChannelLayout in_chlayout, out_chlayout;
Expand Down
7 changes: 0 additions & 7 deletions lib/xav/decoder.ex
Original file line number Diff line number Diff line change
Expand Up @@ -108,15 +108,13 @@ defmodule Xav.Decoder do
:ok

{:ok, {data, format, width, height, pts}} ->
format = normalize_format(format)
{:ok, Xav.Frame.new(data, format, width, height, pts)}

# Sometimes, audio converter might not return data immediately.
{:ok, {"", _format, _samples, _pts}} ->
:ok

{:ok, {data, format, samples, pts}} ->
format = normalize_format(format)
{:ok, Xav.Frame.new(data, format, samples, pts)}

{:error, _reason} = error ->
Expand Down Expand Up @@ -151,9 +149,4 @@ defmodule Xav.Decoder do
{:error, reason} -> raise "Failed to flush decoder: #{inspect(reason)}"
end
end

# Use the same formats as Nx
defp normalize_format(:flt), do: :f32
defp normalize_format(:dbl), do: :f64
defp normalize_format(other), do: other
end
29 changes: 21 additions & 8 deletions lib/xav/frame.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,15 @@ defmodule Xav.Frame do

@typedoc """
Possible audio samples formats.
To get the complete list of sample formats, check `Xav.sample_formats/0`.
"""
@type audio_format() :: :u8 | :s16 | :s32 | :s64 | :f32 | :f64
@type audio_format() :: atom()

@typedoc """
Possible video frame formats.
The list of accepted formats are all `ffmpeg` pixel formats. For a complete list run:
```sh
ffmpeg -pix_fmts
```
To get the complete list of pixel formats, check `Xav.pixel_formats/0`.
An example of a pixel format is `:rgb24`.
"""
Expand Down Expand Up @@ -77,16 +75,31 @@ defmodule Xav.Frame do
Converts a frame to an Nx tensor.
In case of a video frame, dimension names of the newly created tensor are `[:height, :width, :channels]`.
For video frames, the only supported pixel formats are:
* `:rgb24`
* `:bgr24`
"""
@spec to_nx(t()) :: Nx.Tensor.t()
def to_nx(%__MODULE__{type: :video} = frame) do
def to_nx(%__MODULE__{type: :video, format: format} = frame)
when format in [:rgb24, :bgr24] do
frame.data
|> Nx.from_binary(:u8)
|> Nx.reshape({frame.height, frame.width, 3}, names: [:height, :width, :channels])
end

def to_nx(%__MODULE__{type: :audio} = frame) do
Nx.from_binary(frame.data, frame.format)
Nx.from_binary(frame.data, normalize_format(frame.format))
end

defp normalize_format(:flt), do: :f32
defp normalize_format(:fltp), do: :f32
defp normalize_format(:dbl), do: :f64
defp normalize_format(:dblp), do: :f64
defp normalize_format(:u8p), do: :u8
defp normalize_format(:s16p), do: :s16
defp normalize_format(:s32p), do: :s32
defp normalize_format(:s64p), do: :s64
defp normalize_format(format), do: format
end
end
8 changes: 3 additions & 5 deletions lib/xav/reader.ex
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@ defmodule Xav.Reader do
Audio/video file reader.
"""

@audio_out_formats [:u8, :s16, :s32, :s64, :f32, :f64]

@reader_options_schema [
read: [
type: {:in, [:audio, :video]},
Expand All @@ -17,10 +15,10 @@ defmodule Xav.Reader do
doc: "Whether the path points to the camera"
],
out_format: [
type: {:in, @audio_out_formats},
type: :atom,
doc: """
The output format of the audio samples. It should be one of
the following values: `#{Enum.join(@audio_out_formats, ", ")}`.
The output format of the audio samples. For a list of available
sample formats check `Xav.sample_formats/0`.
For video samples, it is always `:rgb24`.
"""
Expand Down
2 changes: 1 addition & 1 deletion test/decoder_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ defmodule Xav.DecoderTest do
test "audio" do
decoder = Xav.Decoder.new(:opus)

assert {:ok, %Xav.Frame{data: data, samples: 960, pts: 0, format: :f32}} =
assert {:ok, %Xav.Frame{data: data, samples: 960, pts: 0, format: :flt}} =
Xav.Decoder.decode(decoder, @opus_frame)

assert byte_size(data) == 7680
Expand Down
2 changes: 1 addition & 1 deletion test/reader_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ defmodule Xav.ReaderTest do
Xav.Reader.stream!(path,
read: :audio,
out_channels: 1,
out_format: :f32,
out_format: :flt,
out_sample_rate: 16_000
)
|> Enum.map(&Xav.Frame.to_nx(&1))
Expand Down

0 comments on commit 6a2bea3

Please sign in to comment.