diff --git a/pedalboard/io/AudioFileInit.h b/pedalboard/io/AudioFileInit.h index 57d94cf1..1974da41 100644 --- a/pedalboard/io/AudioFileInit.h +++ b/pedalboard/io/AudioFileInit.h @@ -56,7 +56,7 @@ Unlike a typical ``open`` call: the sample rate of the file. - A file-like object can be provided to :class:`AudioFile`, allowing for reading and writing to in-memory streams or buffers. The provided file-like object must be seekable - and must be opened in binary mode (i.e.: ``io.BinaryIO`` instead of ``io.StringIO``.). + and must be opened in binary mode (i.e.: ``io.BytesIO`` instead of ``io.StringIO``). A :class:`memoryview` object may also be provided when reading audio. @@ -185,7 +185,7 @@ inline void init_audio_file( py::arg("cls"), py::arg("file_like"), py::arg("mode") = "r", "Open a file-like object for reading. The provided object must have " "``read``, ``seek``, ``tell``, and ``seekable`` methods, and must " - "return binary data (i.e.: ``open(..., \"w\")`` or ``io.BinaryIO``, " + "return binary data (i.e.: ``open(..., \"w\")`` or ``io.BytesIO``, " "etc.).") .def_static( "__new__", diff --git a/pedalboard/io/ReadableAudioFile.h b/pedalboard/io/ReadableAudioFile.h index 3df33299..a6ff2479 100644 --- a/pedalboard/io/ReadableAudioFile.h +++ b/pedalboard/io/ReadableAudioFile.h @@ -270,96 +270,123 @@ class ReadableAudioFile { py::gil_scoped_release release; + numSamplesToKeep = + readInternal(numChannels, numSamples, (float *)outputInfo.ptr); + PythonException::raise(); + } - // If the file being read does not have enough content, it _should_ pad - // the rest of the array with zeroes. Unfortunately, this does not seem to - // be true in practice, so we pre-zero the array to be returned here: - std::memset((void *)outputInfo.ptr, 0, - numChannels * numSamples * sizeof(float)); + if (numSamplesToKeep < numSamples) { + buffer.resize({(long long)numChannels, (long long)numSamplesToKeep}); + } - float **channelPointers = (float **)alloca(numChannels * sizeof(float *)); - for (long long c = 0; c < numChannels; c++) { - channelPointers[c] = ((float *)outputInfo.ptr) + (numSamples * c); - } + return buffer; + } - if (reader->usesFloatingPointData || reader->bitsPerSample == 32) { - auto readResult = reader->read(channelPointers, numChannels, - currentPosition, numSamples); - PythonException::raise(); + /** + * Read the given number of frames (samples in each channel) from this audio + * file into the given output pointers. This method does not take or hold the + * GIL, as no Python methods (like the py::array_t constructor) are + * called directly (except for in the error case). + * + * @param numChannels The number of channels to read from the file + * @param numSamplesToFill The number of frames to read from the file + * @param outputPointer A pointer to the contiguous floating-point output + * array to write to of shape [numChannels, + * numSamplesToFill]. + * + * @return the number of samples that were actually read from the file + */ + long long readInternal(const long long numChannels, + const long long numSamplesToFill, + float *outputPointer) { + // If the file being read does not have enough content, it _should_ pad + // the rest of the array with zeroes. Unfortunately, this does not seem to + // be true in practice, so we pre-zero the array to be returned here: + std::fill_n(outputPointer, numChannels * numSamplesToFill, 0); + + long long numSamples = std::min( + numSamplesToFill, + (reader->lengthInSamples + (lengthCorrection ? *lengthCorrection : 0)) - + currentPosition); - juce::int64 samplesRead = numSamples; - if (juce::AudioFormatReaderWithPosition *positionAware = - dynamic_cast( - reader.get())) { - samplesRead = positionAware->getCurrentPosition() - currentPosition; - } + long long numSamplesToKeep = numSamples; - bool hitEndOfFile = - (samplesRead + currentPosition) == reader->lengthInSamples; - - // We read some data, but not as much as we asked for! - // This will only happen for lossy, header-optional formats - // like MP3. - if (samplesRead < numSamples || hitEndOfFile) { - lengthCorrection = - (samplesRead + currentPosition) - reader->lengthInSamples; - } else if (!readResult) { - throwReadError(currentPosition, numSamples, samplesRead); - } + float **channelPointers = (float **)alloca(numChannels * sizeof(float *)); + for (long long c = 0; c < numChannels; c++) { + channelPointers[c] = ((float *)outputPointer) + (numSamples * c); + } - numSamplesToKeep = samplesRead; - } else { - // If the audio is stored in an integral format, read it as integers - // and do the floating-point conversion ourselves to work around - // floating-point imprecision in JUCE when reading formats smaller than - // 32-bit (i.e.: 16-bit audio is off by about 0.003%) - auto readResult = - reader->readSamples((int **)channelPointers, numChannels, 0, - currentPosition, numSamples); - PythonException::raise(); - if (!readResult) { - throwReadError(currentPosition, numSamples); - } + if (reader->usesFloatingPointData || reader->bitsPerSample == 32) { + auto readResult = reader->read(channelPointers, numChannels, + currentPosition, numSamples); - // When converting 24-bit, 16-bit, or 8-bit data from int to float, - // the values provided by the above read() call are shifted left - // (such that the least significant bits are all zero) - // JUCE will then divide these values by 0x7FFFFFFF, even though - // the least significant bits are zero, effectively losing precision. - // Instead, here we set the scale factor appropriately. - int maxValueAsInt; - switch (reader->bitsPerSample) { - case 24: - maxValueAsInt = 0x7FFFFF00; - break; - case 16: - maxValueAsInt = 0x7FFF0000; - break; - case 8: - maxValueAsInt = 0x7F000000; - break; - default: - throw std::runtime_error("Not sure how to convert data from " + - std::to_string(reader->bitsPerSample) + - " bits per sample to floating point!"); - } - float scaleFactor = 1.0f / static_cast(maxValueAsInt); + juce::int64 samplesRead = numSamples; + if (juce::AudioFormatReaderWithPosition *positionAware = + dynamic_cast( + reader.get())) { + samplesRead = positionAware->getCurrentPosition() - currentPosition; + } - for (long long c = 0; c < numChannels; c++) { - juce::FloatVectorOperations::convertFixedToFloat( - channelPointers[c], (const int *)channelPointers[c], scaleFactor, - static_cast(numSamples)); - } + bool hitEndOfFile = + (samplesRead + currentPosition) == reader->lengthInSamples; + + // We read some data, but not as much as we asked for! + // This will only happen for lossy, header-optional formats + // like MP3. + if (samplesRead < numSamples || hitEndOfFile) { + lengthCorrection = + (samplesRead + currentPosition) - reader->lengthInSamples; + } else if (!readResult) { + PythonException::raise(); + throwReadError(currentPosition, numSamples, samplesRead); + } + numSamplesToKeep = samplesRead; + } else { + // If the audio is stored in an integral format, read it as integers + // and do the floating-point conversion ourselves to work around + // floating-point imprecision in JUCE when reading formats smaller than + // 32-bit (i.e.: 16-bit audio is off by about 0.003%) + auto readResult = + reader->readSamples((int **)channelPointers, numChannels, 0, + currentPosition, numSamplesToKeep); + if (!readResult) { + PythonException::raise(); + throwReadError(currentPosition, numSamples); } - } - currentPosition += numSamplesToKeep; + // When converting 24-bit, 16-bit, or 8-bit data from int to float, + // the values provided by the above read() call are shifted left + // (such that the least significant bits are all zero) + // JUCE will then divide these values by 0x7FFFFFFF, even though + // the least significant bits are zero, effectively losing precision. + // Instead, here we set the scale factor appropriately. + int maxValueAsInt; + switch (reader->bitsPerSample) { + case 24: + maxValueAsInt = 0x7FFFFF00; + break; + case 16: + maxValueAsInt = 0x7FFF0000; + break; + case 8: + maxValueAsInt = 0x7F000000; + break; + default: + throw std::runtime_error("Not sure how to convert data from " + + std::to_string(reader->bitsPerSample) + + " bits per sample to floating point!"); + } + float scaleFactor = 1.0f / static_cast(maxValueAsInt); - if (numSamplesToKeep < numSamples) { - buffer.resize({(long long)numChannels, (long long)numSamplesToKeep}); + for (long long c = 0; c < numChannels; c++) { + juce::FloatVectorOperations::convertFixedToFloat( + channelPointers[c], (const int *)channelPointers[c], scaleFactor, + static_cast(numSamples)); + } } - return buffer; + currentPosition += numSamplesToKeep; + return numSamplesToKeep; } py::array readRaw(std::variant numSamplesVariant) { @@ -618,7 +645,7 @@ class ReadableAudioFile std::unique_ptr reader; juce::CriticalSection objectLock; - int currentPosition = 0; + long long currentPosition = 0; // Certain files (notably CBR MP3 files) can report the wrong number of // frames until the entire file is scanned. This field stores the delta diff --git a/pedalboard/io/ResampledReadableAudioFile.h b/pedalboard/io/ResampledReadableAudioFile.h index 0f46b263..3c83d940 100644 --- a/pedalboard/io/ResampledReadableAudioFile.h +++ b/pedalboard/io/ResampledReadableAudioFile.h @@ -100,7 +100,6 @@ class ResampledReadableAudioFile ResamplingQuality getQuality() const { return resampler.getQuality(); } py::array_t read(std::variant numSamplesVariant) { - py::gil_scoped_release release; long long numSamples = parseNumSamples(numSamplesVariant); if (numSamples == 0) throw std::domain_error( @@ -109,8 +108,25 @@ class ResampledReadableAudioFile "memory. Please pass a number of frames to read (available from " "the 'frames' attribute)."); - const juce::ScopedLock scopedLock(objectLock); + py::gil_scoped_release release; + juce::AudioBuffer resampledBuffer; + { + const juce::ScopedLock scopedLock(objectLock); + resampledBuffer = readInternal(numSamples); + } + py::gil_scoped_acquire acquire; + return copyJuceBufferIntoPyArray(resampledBuffer, + ChannelLayout::NotInterleaved, 0); + } + /** + * Read samples from the underlying audio file, resample them, and return a + * juce::AudioBuffer containing the result without holding the GIL. + * + * @param numSamples The number of samples to read. + * @return juce::AudioBuffer The resulting audio. + */ + juce::AudioBuffer readInternal(long long numSamples) { long long samplesInResampledBuffer = 0; juce::AudioBuffer resampledBuffer(audioFile->getNumChannels(), numSamples); @@ -149,28 +165,56 @@ class ResampledReadableAudioFile audioFile->getSampleRateAsDouble()) / resampler.getTargetSampleRate()); + // Make a juce::AudioBuffer that contains contiguous memory, + // which we can pass to readInternal: + std::vector contiguousSourceSampleBuffer; + std::vector contiguousSourceSampleBufferPointers = + std::vector(audioFile->getNumChannels()); + while (samplesInResampledBuffer < numSamples) { + // Cut or expand the contiguousSourceSampleBuffer to the required size: + contiguousSourceSampleBuffer.resize( + // Note: we need at least one element in this buffer or else we'll + // have no channel pointers to pass into the juce::AudioFile + // constructor! + std::max(1LL, audioFile->getNumChannels() * inputSamplesRequired)); + std::fill_n(contiguousSourceSampleBuffer.begin(), + contiguousSourceSampleBuffer.size(), 0); + for (int c = 0; c < audioFile->getNumChannels(); c++) { + contiguousSourceSampleBufferPointers[c] = + contiguousSourceSampleBuffer.data() + (c * inputSamplesRequired); + } + + juce::AudioBuffer sourceSamples = juce::AudioBuffer( + contiguousSourceSampleBufferPointers.data(), + audioFile->getNumChannels(), inputSamplesRequired); std::optional> resamplerInput; - juce::AudioBuffer sourceSamples; if (inputSamplesRequired > 0) { - { - py::gil_scoped_acquire gil; - sourceSamples = - copyPyArrayIntoJuceBuffer(audioFile->read(inputSamplesRequired), - {ChannelLayout::NotInterleaved}); - } - + // Read from the underlying audioFile into our contiguous buffer, + // which causes the sourceSamples AudioBuffer to be filled: + long long samplesRead = audioFile->readInternal( + audioFile->getNumChannels(), inputSamplesRequired, + contiguousSourceSampleBuffer.data()); + + // Resize the sourceSamples buffer to the number of samples read, + // without reallocating the memory underneath + // (still points at contiguousSourceSampleBuffer): + sourceSamples.setSize(audioFile->getNumChannels(), samplesRead, + /* keepExistingContent */ true, + /* clearExtraSpace */ false, + /* avoidReallocating */ true); + + // If the underlying source ran out of samples, tell the resampler that + // we're done by feeding in an empty optional rather than an empty + // buffer: if (sourceSamples.getNumSamples() == 0) { resamplerInput = {}; } else { - resamplerInput = - std::optional>(sourceSamples); + resamplerInput = {sourceSamples}; } } else { - sourceSamples = - juce::AudioBuffer(audioFile->getNumChannels(), 0); - resamplerInput = std::optional>(sourceSamples); + resamplerInput = {sourceSamples}; } // TODO: Provide an alternative interface to write to the output buffer @@ -217,12 +261,12 @@ class ResampledReadableAudioFile inputSamplesRequired = 1; } positionInTargetSampleRate += resampledBuffer.getNumSamples(); - py::gil_scoped_acquire acquire; - return copyJuceBufferIntoPyArray(resampledBuffer, - ChannelLayout::NotInterleaved, 0); + return resampledBuffer; } void seek(long long targetPosition) { + py::gil_scoped_release release; + const juce::ScopedLock scopedLock(objectLock); long long positionToSeekToIncludingBuffers = targetPosition; long long targetPositionInSourceSampleRate = @@ -245,14 +289,11 @@ class ResampledReadableAudioFile positionInTargetSampleRate = (long long)(floatingPositionInTargetSampleRate); - { - py::gil_scoped_release release; - resampler.reset(); + resampler.reset(); - long long inputSamplesUsed = - resampler.advanceResamplerState(positionInTargetSampleRate); - targetPositionInSourceSampleRate = inputSamplesUsed; - } + long long inputSamplesUsed = + resampler.advanceResamplerState(positionInTargetSampleRate); + targetPositionInSourceSampleRate = inputSamplesUsed; audioFile->seek(std::max(0LL, targetPositionInSourceSampleRate)); @@ -262,7 +303,7 @@ class ResampledReadableAudioFile for (long long i = positionInTargetSampleRate; i < targetPosition; i += chunkSize) { long long numSamples = std::min(chunkSize, targetPosition - i); - this->read(numSamples); + this->readInternal(numSamples); } } diff --git a/tests/test_resampled_io.py b/tests/test_resampled_io.py index 4b08bafb..474e867b 100644 --- a/tests/test_resampled_io.py +++ b/tests/test_resampled_io.py @@ -15,13 +15,13 @@ # limitations under the License. import time -import pytest -from pedalboard import Resample -from pedalboard.io import AudioFile, StreamResampler, ResampledReadableAudioFile from io import BytesIO import numpy as np +import pytest +from pedalboard import Resample +from pedalboard.io import AudioFile, ResampledReadableAudioFile, StreamResampler from .utils import generate_sine_at @@ -191,8 +191,9 @@ def test_read_resampled_in_chunks( read_buffer.name = "test.wav" with AudioFile(read_buffer, "w", sample_rate, 1, bit_depth=32) as f: f.write(signal) + read_buffer.seek(0) - with AudioFile(BytesIO(read_buffer.getvalue())).resampled_to(target_sample_rate, quality) as f: + with AudioFile(read_buffer).resampled_to(target_sample_rate, quality) as f: samples_received = 0 while f.tell() < expected_signal.shape[-1]: expected_num_frames = min(chunk_size, expected_signal.shape[-1] - f.tell())