Avoid unnecessarily taking the GIL when using ResampledReadableAudioF…

…ile. (#293) * Add special case to release the GIL when reading from a BytesIO object. * Fix tests. * Disable flakey test on underpowered machines. * Decrease test flakiness. * Disable flaky test on CI. * Skip on CI properly. * Ensure that we can't read past the end of a BytesIO buffer. * Use one of virtual or override. * Apply suggestions from code review Co-authored-by: David Rubinstein <[email protected]> * Simplify read math for PythonMemoryViewInputStream. * Fix type annotations in docs. * Avoid unnecessarily taking the GIL when using ResampledReadableAudioFile. * Apply suggestions from code review Co-authored-by: David Rubinstein <[email protected]> * Clang-format. --------- Co-authored-by: David Rubinstein <[email protected]>
spotify · Feb 27, 2024 · f72cb87 · f72cb87
1 parent cd686d5
commit f72cb87
Show file tree

Hide file tree

Showing 4 changed files with 179 additions and 110 deletions.
diff --git a/pedalboard/io/AudioFileInit.h b/pedalboard/io/AudioFileInit.h
@@ -56,7 +56,7 @@ Unlike a typical ``open`` call:
    the sample rate of the file.
  - A file-like object can be provided to :class:`AudioFile`, allowing for reading and
    writing to in-memory streams or buffers. The provided file-like object must be seekable
-   and must be opened in binary mode (i.e.: ``io.BinaryIO`` instead of ``io.StringIO``.).
+   and must be opened in binary mode (i.e.: ``io.BytesIO`` instead of ``io.StringIO``). 
    A :class:`memoryview` object may also be provided when reading audio.
 
 
@@ -185,7 +185,7 @@ inline void init_audio_file(
           py::arg("cls"), py::arg("file_like"), py::arg("mode") = "r",
           "Open a file-like object for reading. The provided object must have "
           "``read``, ``seek``, ``tell``, and ``seekable`` methods, and must "
-          "return binary data (i.e.: ``open(..., \"w\")`` or ``io.BinaryIO``, "
+          "return binary data (i.e.: ``open(..., \"w\")`` or ``io.BytesIO``, "
           "etc.).")
       .def_static(
           "__new__",

diff --git a/pedalboard/io/ReadableAudioFile.h b/pedalboard/io/ReadableAudioFile.h
@@ -270,96 +270,123 @@ class ReadableAudioFile
 
     {
       py::gil_scoped_release release;
+      numSamplesToKeep =
+          readInternal(numChannels, numSamples, (float *)outputInfo.ptr);
+      PythonException::raise();
+    }
 
-      // If the file being read does not have enough content, it _should_ pad
-      // the rest of the array with zeroes. Unfortunately, this does not seem to
-      // be true in practice, so we pre-zero the array to be returned here:
-      std::memset((void *)outputInfo.ptr, 0,
-                  numChannels * numSamples * sizeof(float));
+    if (numSamplesToKeep < numSamples) {
+      buffer.resize({(long long)numChannels, (long long)numSamplesToKeep});
+    }
 
-      float **channelPointers = (float **)alloca(numChannels * sizeof(float *));
-      for (long long c = 0; c < numChannels; c++) {
-        channelPointers[c] = ((float *)outputInfo.ptr) + (numSamples * c);
-      }
+    return buffer;
+  }
 
-      if (reader->usesFloatingPointData || reader->bitsPerSample == 32) {
-        auto readResult = reader->read(channelPointers, numChannels,
-                                       currentPosition, numSamples);
-        PythonException::raise();
+  /**
+   * Read the given number of frames (samples in each channel) from this audio
+   * file into the given output pointers. This method does not take or hold the
+   * GIL, as no Python methods (like the py::array_t constructor) are
+   * called directly (except for in the error case).
+   *
+   * @param numChannels The number of channels to read from the file
+   * @param numSamplesToFill The number of frames to read from the file
+   * @param outputPointer A pointer to the contiguous floating-point output
+   *                      array to write to of shape [numChannels,
+   * numSamplesToFill].
+   *
+   * @return the number of samples that were actually read from the file
+   */
+  long long readInternal(const long long numChannels,
+                         const long long numSamplesToFill,
+                         float *outputPointer) {
+    // If the file being read does not have enough content, it _should_ pad
+    // the rest of the array with zeroes. Unfortunately, this does not seem to
+    // be true in practice, so we pre-zero the array to be returned here:
+    std::fill_n(outputPointer, numChannels * numSamplesToFill, 0);
+
+    long long numSamples = std::min(
+        numSamplesToFill,
+        (reader->lengthInSamples + (lengthCorrection ? *lengthCorrection : 0)) -
+            currentPosition);
 
-        juce::int64 samplesRead = numSamples;
-        if (juce::AudioFormatReaderWithPosition *positionAware =
-                dynamic_cast<juce::AudioFormatReaderWithPosition *>(
-                    reader.get())) {
-          samplesRead = positionAware->getCurrentPosition() - currentPosition;
-        }
+    long long numSamplesToKeep = numSamples;
 
-        bool hitEndOfFile =
-            (samplesRead + currentPosition) == reader->lengthInSamples;
-
-        // We read some data, but not as much as we asked for!
-        // This will only happen for lossy, header-optional formats
-        // like MP3.
-        if (samplesRead < numSamples || hitEndOfFile) {
-          lengthCorrection =
-              (samplesRead + currentPosition) - reader->lengthInSamples;
-        } else if (!readResult) {
-          throwReadError(currentPosition, numSamples, samplesRead);
-        }
+    float **channelPointers = (float **)alloca(numChannels * sizeof(float *));
+    for (long long c = 0; c < numChannels; c++) {
+      channelPointers[c] = ((float *)outputPointer) + (numSamples * c);
+    }
 
-        numSamplesToKeep = samplesRead;
-      } else {
-        // If the audio is stored in an integral format, read it as integers
-        // and do the floating-point conversion ourselves to work around
-        // floating-point imprecision in JUCE when reading formats smaller than
-        // 32-bit (i.e.: 16-bit audio is off by about 0.003%)
-        auto readResult =
-            reader->readSamples((int **)channelPointers, numChannels, 0,
-                                currentPosition, numSamples);
-        PythonException::raise();
-        if (!readResult) {
-          throwReadError(currentPosition, numSamples);
-        }
+    if (reader->usesFloatingPointData || reader->bitsPerSample == 32) {
+      auto readResult = reader->read(channelPointers, numChannels,
+                                     currentPosition, numSamples);
 
-        // When converting 24-bit, 16-bit, or 8-bit data from int to float,
-        // the values provided by the above read() call are shifted left
-        // (such that the least significant bits are all zero)
-        // JUCE will then divide these values by 0x7FFFFFFF, even though
-        // the least significant bits are zero, effectively losing precision.
-        // Instead, here we set the scale factor appropriately.
-        int maxValueAsInt;
-        switch (reader->bitsPerSample) {
-        case 24:
-          maxValueAsInt = 0x7FFFFF00;
-          break;
-        case 16:
-          maxValueAsInt = 0x7FFF0000;
-          break;
-        case 8:
-          maxValueAsInt = 0x7F000000;
-          break;
-        default:
-          throw std::runtime_error("Not sure how to convert data from " +
-                                   std::to_string(reader->bitsPerSample) +
-                                   " bits per sample to floating point!");
-        }
-        float scaleFactor = 1.0f / static_cast<float>(maxValueAsInt);
+      juce::int64 samplesRead = numSamples;
+      if (juce::AudioFormatReaderWithPosition *positionAware =
+              dynamic_cast<juce::AudioFormatReaderWithPosition *>(
+                  reader.get())) {
+        samplesRead = positionAware->getCurrentPosition() - currentPosition;
+      }
 
-        for (long long c = 0; c < numChannels; c++) {
-          juce::FloatVectorOperations::convertFixedToFloat(
-              channelPointers[c], (const int *)channelPointers[c], scaleFactor,
-              static_cast<int>(numSamples));
-        }
+      bool hitEndOfFile =
+          (samplesRead + currentPosition) == reader->lengthInSamples;
+
+      // We read some data, but not as much as we asked for!
+      // This will only happen for lossy, header-optional formats
+      // like MP3.
+      if (samplesRead < numSamples || hitEndOfFile) {
+        lengthCorrection =
+            (samplesRead + currentPosition) - reader->lengthInSamples;
+      } else if (!readResult) {
+        PythonException::raise();
+        throwReadError(currentPosition, numSamples, samplesRead);
+      }
+      numSamplesToKeep = samplesRead;
+    } else {
+      // If the audio is stored in an integral format, read it as integers
+      // and do the floating-point conversion ourselves to work around
+      // floating-point imprecision in JUCE when reading formats smaller than
+      // 32-bit (i.e.: 16-bit audio is off by about 0.003%)
+      auto readResult =
+          reader->readSamples((int **)channelPointers, numChannels, 0,
+                              currentPosition, numSamplesToKeep);
+      if (!readResult) {
+        PythonException::raise();
+        throwReadError(currentPosition, numSamples);
       }
-    }
 
-    currentPosition += numSamplesToKeep;
+      // When converting 24-bit, 16-bit, or 8-bit data from int to float,
+      // the values provided by the above read() call are shifted left
+      // (such that the least significant bits are all zero)
+      // JUCE will then divide these values by 0x7FFFFFFF, even though
+      // the least significant bits are zero, effectively losing precision.
+      // Instead, here we set the scale factor appropriately.
+      int maxValueAsInt;
+      switch (reader->bitsPerSample) {
+      case 24:
+        maxValueAsInt = 0x7FFFFF00;
+        break;
+      case 16:
+        maxValueAsInt = 0x7FFF0000;
+        break;
+      case 8:
+        maxValueAsInt = 0x7F000000;
+        break;
+      default:
+        throw std::runtime_error("Not sure how to convert data from " +
+                                 std::to_string(reader->bitsPerSample) +
+                                 " bits per sample to floating point!");
+      }
+      float scaleFactor = 1.0f / static_cast<float>(maxValueAsInt);
 
-    if (numSamplesToKeep < numSamples) {
-      buffer.resize({(long long)numChannels, (long long)numSamplesToKeep});
+      for (long long c = 0; c < numChannels; c++) {
+        juce::FloatVectorOperations::convertFixedToFloat(
+            channelPointers[c], (const int *)channelPointers[c], scaleFactor,
+            static_cast<int>(numSamples));
+      }
     }
 
-    return buffer;
+    currentPosition += numSamplesToKeep;
+    return numSamplesToKeep;
   }
 
   py::array readRaw(std::variant<double, long long> numSamplesVariant) {
@@ -618,7 +645,7 @@ class ReadableAudioFile
   std::unique_ptr<juce::AudioFormatReader> reader;
   juce::CriticalSection objectLock;
 
-  int currentPosition = 0;
+  long long currentPosition = 0;
 
   // Certain files (notably CBR MP3 files) can report the wrong number of
   // frames until the entire file is scanned. This field stores the delta

diff --git a/pedalboard/io/ResampledReadableAudioFile.h b/pedalboard/io/ResampledReadableAudioFile.h
@@ -100,7 +100,6 @@ class ResampledReadableAudioFile
   ResamplingQuality getQuality() const { return resampler.getQuality(); }
 
   py::array_t<float> read(std::variant<double, long long> numSamplesVariant) {
-    py::gil_scoped_release release;
     long long numSamples = parseNumSamples(numSamplesVariant);
     if (numSamples == 0)
       throw std::domain_error(
@@ -109,8 +108,25 @@ class ResampledReadableAudioFile
           "memory. Please pass a number of frames to read (available from "
           "the 'frames' attribute).");
 
-    const juce::ScopedLock scopedLock(objectLock);
+    py::gil_scoped_release release;
+    juce::AudioBuffer<float> resampledBuffer;
+    {
+      const juce::ScopedLock scopedLock(objectLock);
+      resampledBuffer = readInternal(numSamples);
+    }
+    py::gil_scoped_acquire acquire;
+    return copyJuceBufferIntoPyArray(resampledBuffer,
+                                     ChannelLayout::NotInterleaved, 0);
+  }
 
+  /**
+   * Read samples from the underlying audio file, resample them, and return a
+   * juce::AudioBuffer containing the result without holding the GIL.
+   *
+   * @param numSamples The number of samples to read.
+   * @return juce::AudioBuffer<float> The resulting audio.
+   */
+  juce::AudioBuffer<float> readInternal(long long numSamples) {
     long long samplesInResampledBuffer = 0;
     juce::AudioBuffer<float> resampledBuffer(audioFile->getNumChannels(),
                                              numSamples);
@@ -149,28 +165,56 @@ class ResampledReadableAudioFile
                      audioFile->getSampleRateAsDouble()) /
                     resampler.getTargetSampleRate());
 
+    // Make a juce::AudioBuffer that contains contiguous memory,
+    // which we can pass to readInternal:
+    std::vector<float> contiguousSourceSampleBuffer;
+    std::vector<float *> contiguousSourceSampleBufferPointers =
+        std::vector<float *>(audioFile->getNumChannels());
+
     while (samplesInResampledBuffer < numSamples) {
+      // Cut or expand the contiguousSourceSampleBuffer to the required size:
+      contiguousSourceSampleBuffer.resize(
+          // Note: we need at least one element in this buffer or else we'll
+          // have no channel pointers to pass into the juce::AudioFile
+          // constructor!
+          std::max(1LL, audioFile->getNumChannels() * inputSamplesRequired));
+      std::fill_n(contiguousSourceSampleBuffer.begin(),
+                  contiguousSourceSampleBuffer.size(), 0);
+      for (int c = 0; c < audioFile->getNumChannels(); c++) {
+        contiguousSourceSampleBufferPointers[c] =
+            contiguousSourceSampleBuffer.data() + (c * inputSamplesRequired);
+      }
+
+      juce::AudioBuffer<float> sourceSamples = juce::AudioBuffer<float>(
+          contiguousSourceSampleBufferPointers.data(),
+          audioFile->getNumChannels(), inputSamplesRequired);
       std::optional<juce::AudioBuffer<float>> resamplerInput;
 
-      juce::AudioBuffer<float> sourceSamples;
       if (inputSamplesRequired > 0) {
-        {
-          py::gil_scoped_acquire gil;
-          sourceSamples =
-              copyPyArrayIntoJuceBuffer(audioFile->read(inputSamplesRequired),
-                                        {ChannelLayout::NotInterleaved});
-        }
-
+        // Read from the underlying audioFile into our contiguous buffer,
+        // which causes the sourceSamples AudioBuffer to be filled:
+        long long samplesRead = audioFile->readInternal(
+            audioFile->getNumChannels(), inputSamplesRequired,
+            contiguousSourceSampleBuffer.data());
+
+        // Resize the sourceSamples buffer to the number of samples read,
+        // without reallocating the memory underneath
+        // (still points at contiguousSourceSampleBuffer):
+        sourceSamples.setSize(audioFile->getNumChannels(), samplesRead,
+                              /* keepExistingContent */ true,
+                              /* clearExtraSpace */ false,
+                              /* avoidReallocating */ true);
+
+        // If the underlying source ran out of samples, tell the resampler that
+        // we're done by feeding in an empty optional rather than an empty
+        // buffer:
         if (sourceSamples.getNumSamples() == 0) {
           resamplerInput = {};
         } else {
-          resamplerInput =
-              std::optional<juce::AudioBuffer<float>>(sourceSamples);
+          resamplerInput = {sourceSamples};
         }
       } else {
-        sourceSamples =
-            juce::AudioBuffer<float>(audioFile->getNumChannels(), 0);
-        resamplerInput = std::optional<juce::AudioBuffer<float>>(sourceSamples);
+        resamplerInput = {sourceSamples};
       }
 
       // TODO: Provide an alternative interface to write to the output buffer
@@ -217,12 +261,12 @@ class ResampledReadableAudioFile
       inputSamplesRequired = 1;
     }
     positionInTargetSampleRate += resampledBuffer.getNumSamples();
-    py::gil_scoped_acquire acquire;
-    return copyJuceBufferIntoPyArray(resampledBuffer,
-                                     ChannelLayout::NotInterleaved, 0);
+    return resampledBuffer;
   }
 
   void seek(long long targetPosition) {
+    py::gil_scoped_release release;
+    const juce::ScopedLock scopedLock(objectLock);
     long long positionToSeekToIncludingBuffers = targetPosition;
 
     long long targetPositionInSourceSampleRate =
@@ -245,14 +289,11 @@ class ResampledReadableAudioFile
     positionInTargetSampleRate =
         (long long)(floatingPositionInTargetSampleRate);
 
-    {
-      py::gil_scoped_release release;
-      resampler.reset();
+    resampler.reset();
 
-      long long inputSamplesUsed =
-          resampler.advanceResamplerState(positionInTargetSampleRate);
-      targetPositionInSourceSampleRate = inputSamplesUsed;
-    }
+    long long inputSamplesUsed =
+        resampler.advanceResamplerState(positionInTargetSampleRate);
+    targetPositionInSourceSampleRate = inputSamplesUsed;
 
     audioFile->seek(std::max(0LL, targetPositionInSourceSampleRate));
 
@@ -262,7 +303,7 @@ class ResampledReadableAudioFile
     for (long long i = positionInTargetSampleRate; i < targetPosition;
          i += chunkSize) {
       long long numSamples = std::min(chunkSize, targetPosition - i);
-      this->read(numSamples);
+      this->readInternal(numSamples);
     }
   }