Skip to content

Commit

Permalink
Avoid unnecessarily taking the GIL when using ResampledReadableAudioF…
Browse files Browse the repository at this point in the history
…ile. (#293)

* Add special case to release the GIL when reading from a BytesIO object.

* Fix tests.

* Disable flakey test on underpowered machines.

* Decrease test flakiness.

* Disable flaky test on CI.

* Skip on CI properly.

* Ensure that we can't read past the end of a BytesIO buffer.

* Use one of virtual or override.

* Apply suggestions from code review

Co-authored-by: David Rubinstein <[email protected]>

* Simplify read math for PythonMemoryViewInputStream.

* Fix type annotations in docs.

* Avoid unnecessarily taking the GIL when using ResampledReadableAudioFile.

* Apply suggestions from code review

Co-authored-by: David Rubinstein <[email protected]>

* Clang-format.

---------

Co-authored-by: David Rubinstein <[email protected]>
  • Loading branch information
psobot and drubinstein authored Feb 27, 2024
1 parent cd686d5 commit f72cb87
Show file tree
Hide file tree
Showing 4 changed files with 179 additions and 110 deletions.
4 changes: 2 additions & 2 deletions pedalboard/io/AudioFileInit.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ Unlike a typical ``open`` call:
the sample rate of the file.
- A file-like object can be provided to :class:`AudioFile`, allowing for reading and
writing to in-memory streams or buffers. The provided file-like object must be seekable
and must be opened in binary mode (i.e.: ``io.BinaryIO`` instead of ``io.StringIO``.).
and must be opened in binary mode (i.e.: ``io.BytesIO`` instead of ``io.StringIO``).
A :class:`memoryview` object may also be provided when reading audio.
Expand Down Expand Up @@ -185,7 +185,7 @@ inline void init_audio_file(
py::arg("cls"), py::arg("file_like"), py::arg("mode") = "r",
"Open a file-like object for reading. The provided object must have "
"``read``, ``seek``, ``tell``, and ``seekable`` methods, and must "
"return binary data (i.e.: ``open(..., \"w\")`` or ``io.BinaryIO``, "
"return binary data (i.e.: ``open(..., \"w\")`` or ``io.BytesIO``, "
"etc.).")
.def_static(
"__new__",
Expand Down
183 changes: 105 additions & 78 deletions pedalboard/io/ReadableAudioFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -270,96 +270,123 @@ class ReadableAudioFile

{
py::gil_scoped_release release;
numSamplesToKeep =
readInternal(numChannels, numSamples, (float *)outputInfo.ptr);
PythonException::raise();
}

// If the file being read does not have enough content, it _should_ pad
// the rest of the array with zeroes. Unfortunately, this does not seem to
// be true in practice, so we pre-zero the array to be returned here:
std::memset((void *)outputInfo.ptr, 0,
numChannels * numSamples * sizeof(float));
if (numSamplesToKeep < numSamples) {
buffer.resize({(long long)numChannels, (long long)numSamplesToKeep});
}

float **channelPointers = (float **)alloca(numChannels * sizeof(float *));
for (long long c = 0; c < numChannels; c++) {
channelPointers[c] = ((float *)outputInfo.ptr) + (numSamples * c);
}
return buffer;
}

if (reader->usesFloatingPointData || reader->bitsPerSample == 32) {
auto readResult = reader->read(channelPointers, numChannels,
currentPosition, numSamples);
PythonException::raise();
/**
* Read the given number of frames (samples in each channel) from this audio
* file into the given output pointers. This method does not take or hold the
* GIL, as no Python methods (like the py::array_t constructor) are
* called directly (except for in the error case).
*
* @param numChannels The number of channels to read from the file
* @param numSamplesToFill The number of frames to read from the file
* @param outputPointer A pointer to the contiguous floating-point output
* array to write to of shape [numChannels,
* numSamplesToFill].
*
* @return the number of samples that were actually read from the file
*/
long long readInternal(const long long numChannels,
const long long numSamplesToFill,
float *outputPointer) {
// If the file being read does not have enough content, it _should_ pad
// the rest of the array with zeroes. Unfortunately, this does not seem to
// be true in practice, so we pre-zero the array to be returned here:
std::fill_n(outputPointer, numChannels * numSamplesToFill, 0);

long long numSamples = std::min(
numSamplesToFill,
(reader->lengthInSamples + (lengthCorrection ? *lengthCorrection : 0)) -
currentPosition);

juce::int64 samplesRead = numSamples;
if (juce::AudioFormatReaderWithPosition *positionAware =
dynamic_cast<juce::AudioFormatReaderWithPosition *>(
reader.get())) {
samplesRead = positionAware->getCurrentPosition() - currentPosition;
}
long long numSamplesToKeep = numSamples;

bool hitEndOfFile =
(samplesRead + currentPosition) == reader->lengthInSamples;

// We read some data, but not as much as we asked for!
// This will only happen for lossy, header-optional formats
// like MP3.
if (samplesRead < numSamples || hitEndOfFile) {
lengthCorrection =
(samplesRead + currentPosition) - reader->lengthInSamples;
} else if (!readResult) {
throwReadError(currentPosition, numSamples, samplesRead);
}
float **channelPointers = (float **)alloca(numChannels * sizeof(float *));
for (long long c = 0; c < numChannels; c++) {
channelPointers[c] = ((float *)outputPointer) + (numSamples * c);
}

numSamplesToKeep = samplesRead;
} else {
// If the audio is stored in an integral format, read it as integers
// and do the floating-point conversion ourselves to work around
// floating-point imprecision in JUCE when reading formats smaller than
// 32-bit (i.e.: 16-bit audio is off by about 0.003%)
auto readResult =
reader->readSamples((int **)channelPointers, numChannels, 0,
currentPosition, numSamples);
PythonException::raise();
if (!readResult) {
throwReadError(currentPosition, numSamples);
}
if (reader->usesFloatingPointData || reader->bitsPerSample == 32) {
auto readResult = reader->read(channelPointers, numChannels,
currentPosition, numSamples);

// When converting 24-bit, 16-bit, or 8-bit data from int to float,
// the values provided by the above read() call are shifted left
// (such that the least significant bits are all zero)
// JUCE will then divide these values by 0x7FFFFFFF, even though
// the least significant bits are zero, effectively losing precision.
// Instead, here we set the scale factor appropriately.
int maxValueAsInt;
switch (reader->bitsPerSample) {
case 24:
maxValueAsInt = 0x7FFFFF00;
break;
case 16:
maxValueAsInt = 0x7FFF0000;
break;
case 8:
maxValueAsInt = 0x7F000000;
break;
default:
throw std::runtime_error("Not sure how to convert data from " +
std::to_string(reader->bitsPerSample) +
" bits per sample to floating point!");
}
float scaleFactor = 1.0f / static_cast<float>(maxValueAsInt);
juce::int64 samplesRead = numSamples;
if (juce::AudioFormatReaderWithPosition *positionAware =
dynamic_cast<juce::AudioFormatReaderWithPosition *>(
reader.get())) {
samplesRead = positionAware->getCurrentPosition() - currentPosition;
}

for (long long c = 0; c < numChannels; c++) {
juce::FloatVectorOperations::convertFixedToFloat(
channelPointers[c], (const int *)channelPointers[c], scaleFactor,
static_cast<int>(numSamples));
}
bool hitEndOfFile =
(samplesRead + currentPosition) == reader->lengthInSamples;

// We read some data, but not as much as we asked for!
// This will only happen for lossy, header-optional formats
// like MP3.
if (samplesRead < numSamples || hitEndOfFile) {
lengthCorrection =
(samplesRead + currentPosition) - reader->lengthInSamples;
} else if (!readResult) {
PythonException::raise();
throwReadError(currentPosition, numSamples, samplesRead);
}
numSamplesToKeep = samplesRead;
} else {
// If the audio is stored in an integral format, read it as integers
// and do the floating-point conversion ourselves to work around
// floating-point imprecision in JUCE when reading formats smaller than
// 32-bit (i.e.: 16-bit audio is off by about 0.003%)
auto readResult =
reader->readSamples((int **)channelPointers, numChannels, 0,
currentPosition, numSamplesToKeep);
if (!readResult) {
PythonException::raise();
throwReadError(currentPosition, numSamples);
}
}

currentPosition += numSamplesToKeep;
// When converting 24-bit, 16-bit, or 8-bit data from int to float,
// the values provided by the above read() call are shifted left
// (such that the least significant bits are all zero)
// JUCE will then divide these values by 0x7FFFFFFF, even though
// the least significant bits are zero, effectively losing precision.
// Instead, here we set the scale factor appropriately.
int maxValueAsInt;
switch (reader->bitsPerSample) {
case 24:
maxValueAsInt = 0x7FFFFF00;
break;
case 16:
maxValueAsInt = 0x7FFF0000;
break;
case 8:
maxValueAsInt = 0x7F000000;
break;
default:
throw std::runtime_error("Not sure how to convert data from " +
std::to_string(reader->bitsPerSample) +
" bits per sample to floating point!");
}
float scaleFactor = 1.0f / static_cast<float>(maxValueAsInt);

if (numSamplesToKeep < numSamples) {
buffer.resize({(long long)numChannels, (long long)numSamplesToKeep});
for (long long c = 0; c < numChannels; c++) {
juce::FloatVectorOperations::convertFixedToFloat(
channelPointers[c], (const int *)channelPointers[c], scaleFactor,
static_cast<int>(numSamples));
}
}

return buffer;
currentPosition += numSamplesToKeep;
return numSamplesToKeep;
}

py::array readRaw(std::variant<double, long long> numSamplesVariant) {
Expand Down Expand Up @@ -618,7 +645,7 @@ class ReadableAudioFile
std::unique_ptr<juce::AudioFormatReader> reader;
juce::CriticalSection objectLock;

int currentPosition = 0;
long long currentPosition = 0;

// Certain files (notably CBR MP3 files) can report the wrong number of
// frames until the entire file is scanned. This field stores the delta
Expand Down
93 changes: 67 additions & 26 deletions pedalboard/io/ResampledReadableAudioFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ class ResampledReadableAudioFile
ResamplingQuality getQuality() const { return resampler.getQuality(); }

py::array_t<float> read(std::variant<double, long long> numSamplesVariant) {
py::gil_scoped_release release;
long long numSamples = parseNumSamples(numSamplesVariant);
if (numSamples == 0)
throw std::domain_error(
Expand All @@ -109,8 +108,25 @@ class ResampledReadableAudioFile
"memory. Please pass a number of frames to read (available from "
"the 'frames' attribute).");

const juce::ScopedLock scopedLock(objectLock);
py::gil_scoped_release release;
juce::AudioBuffer<float> resampledBuffer;
{
const juce::ScopedLock scopedLock(objectLock);
resampledBuffer = readInternal(numSamples);
}
py::gil_scoped_acquire acquire;
return copyJuceBufferIntoPyArray(resampledBuffer,
ChannelLayout::NotInterleaved, 0);
}

/**
* Read samples from the underlying audio file, resample them, and return a
* juce::AudioBuffer containing the result without holding the GIL.
*
* @param numSamples The number of samples to read.
* @return juce::AudioBuffer<float> The resulting audio.
*/
juce::AudioBuffer<float> readInternal(long long numSamples) {
long long samplesInResampledBuffer = 0;
juce::AudioBuffer<float> resampledBuffer(audioFile->getNumChannels(),
numSamples);
Expand Down Expand Up @@ -149,28 +165,56 @@ class ResampledReadableAudioFile
audioFile->getSampleRateAsDouble()) /
resampler.getTargetSampleRate());

// Make a juce::AudioBuffer that contains contiguous memory,
// which we can pass to readInternal:
std::vector<float> contiguousSourceSampleBuffer;
std::vector<float *> contiguousSourceSampleBufferPointers =
std::vector<float *>(audioFile->getNumChannels());

while (samplesInResampledBuffer < numSamples) {
// Cut or expand the contiguousSourceSampleBuffer to the required size:
contiguousSourceSampleBuffer.resize(
// Note: we need at least one element in this buffer or else we'll
// have no channel pointers to pass into the juce::AudioFile
// constructor!
std::max(1LL, audioFile->getNumChannels() * inputSamplesRequired));
std::fill_n(contiguousSourceSampleBuffer.begin(),
contiguousSourceSampleBuffer.size(), 0);
for (int c = 0; c < audioFile->getNumChannels(); c++) {
contiguousSourceSampleBufferPointers[c] =
contiguousSourceSampleBuffer.data() + (c * inputSamplesRequired);
}

juce::AudioBuffer<float> sourceSamples = juce::AudioBuffer<float>(
contiguousSourceSampleBufferPointers.data(),
audioFile->getNumChannels(), inputSamplesRequired);
std::optional<juce::AudioBuffer<float>> resamplerInput;

juce::AudioBuffer<float> sourceSamples;
if (inputSamplesRequired > 0) {
{
py::gil_scoped_acquire gil;
sourceSamples =
copyPyArrayIntoJuceBuffer(audioFile->read(inputSamplesRequired),
{ChannelLayout::NotInterleaved});
}

// Read from the underlying audioFile into our contiguous buffer,
// which causes the sourceSamples AudioBuffer to be filled:
long long samplesRead = audioFile->readInternal(
audioFile->getNumChannels(), inputSamplesRequired,
contiguousSourceSampleBuffer.data());

// Resize the sourceSamples buffer to the number of samples read,
// without reallocating the memory underneath
// (still points at contiguousSourceSampleBuffer):
sourceSamples.setSize(audioFile->getNumChannels(), samplesRead,
/* keepExistingContent */ true,
/* clearExtraSpace */ false,
/* avoidReallocating */ true);

// If the underlying source ran out of samples, tell the resampler that
// we're done by feeding in an empty optional rather than an empty
// buffer:
if (sourceSamples.getNumSamples() == 0) {
resamplerInput = {};
} else {
resamplerInput =
std::optional<juce::AudioBuffer<float>>(sourceSamples);
resamplerInput = {sourceSamples};
}
} else {
sourceSamples =
juce::AudioBuffer<float>(audioFile->getNumChannels(), 0);
resamplerInput = std::optional<juce::AudioBuffer<float>>(sourceSamples);
resamplerInput = {sourceSamples};
}

// TODO: Provide an alternative interface to write to the output buffer
Expand Down Expand Up @@ -217,12 +261,12 @@ class ResampledReadableAudioFile
inputSamplesRequired = 1;
}
positionInTargetSampleRate += resampledBuffer.getNumSamples();
py::gil_scoped_acquire acquire;
return copyJuceBufferIntoPyArray(resampledBuffer,
ChannelLayout::NotInterleaved, 0);
return resampledBuffer;
}

void seek(long long targetPosition) {
py::gil_scoped_release release;
const juce::ScopedLock scopedLock(objectLock);
long long positionToSeekToIncludingBuffers = targetPosition;

long long targetPositionInSourceSampleRate =
Expand All @@ -245,14 +289,11 @@ class ResampledReadableAudioFile
positionInTargetSampleRate =
(long long)(floatingPositionInTargetSampleRate);

{
py::gil_scoped_release release;
resampler.reset();
resampler.reset();

long long inputSamplesUsed =
resampler.advanceResamplerState(positionInTargetSampleRate);
targetPositionInSourceSampleRate = inputSamplesUsed;
}
long long inputSamplesUsed =
resampler.advanceResamplerState(positionInTargetSampleRate);
targetPositionInSourceSampleRate = inputSamplesUsed;

audioFile->seek(std::max(0LL, targetPositionInSourceSampleRate));

Expand All @@ -262,7 +303,7 @@ class ResampledReadableAudioFile
for (long long i = positionInTargetSampleRate; i < targetPosition;
i += chunkSize) {
long long numSamples = std::min(chunkSize, targetPosition - i);
this->read(numSamples);
this->readInternal(numSamples);
}
}

Expand Down
Loading

0 comments on commit f72cb87

Please sign in to comment.