Update requirements for scikit-learn 1.3 breaking changes (#659)

MontrealCorpusTools · Jul 3, 2023 · c8ef607 · c8ef607
1 parent 0f5a887
commit c8ef607
Show file tree

Hide file tree

Showing 12 changed files with 26 additions and 13 deletions.
diff --git a/ci/docker_environment.yaml b/ci/docker_environment.yaml
@@ -15,6 +15,7 @@ dependencies:
   - ffmpeg
   - pynini
   - openfst
+  - scikit-learn<1.3
   - hdbscan
   - baumwelch
   - ngram

diff --git a/docs/source/changelog/changelog_2.2.rst b/docs/source/changelog/changelog_2.2.rst
@@ -5,10 +5,16 @@
 2.2 Changelog
 *************
 
+2.2.15
+======
+
+- Fixed a crash when using fine tuned boundaries
+- Pinned scikit-learn to versions less than 1.3, due to it breaking hdbscan package
+
 2.2.13
 ======
 
-- Fixes an issue in using sqlite during subset creation for training
+- Fixed an issue in using sqlite during subset creation for training
 
 2.2.12
 ======

diff --git a/environment.yml b/environment.yml
@@ -17,7 +17,7 @@ dependencies:
   - scipy
   - pynini
   - openfst
-  - scikit-learn
+  - scikit-learn<1.3
   - hdbscan
   - baumwelch
   - ngram

diff --git a/montreal_forced_aligner/alignment/base.py b/montreal_forced_aligner/alignment/base.py
@@ -1031,9 +1031,7 @@ def fine_tune_alignments(self) -> None:
                         continue
 
                     update_mappings.extend(result[0])
-                    update_mappings.extend(
-                        [{"id": x, "begin": 0, "end": 0, "label": ""} for x in result[1]]
-                    )
+                    update_mappings.extend([{"id": x, "begin": 0, "end": 0} for x in result[1]])
                     pbar.update(1)
                 for p in procs:
                     p.join()
@@ -1049,7 +1047,7 @@ def fine_tune_alignments(self) -> None:
                     for result in function.run():
                         update_mappings.extend(result[0])
                         update_mappings.extend(
-                            [{"id": x, "begin": 0, "end": 0, "label": ""} for x in result[1]]
+                            [{"id": x, "begin": 0, "end": 0} for x in result[1]]
                         )
                         pbar.update(1)
             bulk_update(session, PhoneInterval, update_mappings)

diff --git a/montreal_forced_aligner/alignment/multiprocessing.py b/montreal_forced_aligner/alignment/multiprocessing.py
@@ -2060,6 +2060,8 @@ def cleanup_g2p_intervals(
 
         Parameters
         ----------
+        utterance_name: str
+            Name of the current utterance
         intervals: list[:class:`~montreal_forced_aligner.data.CtmInterval`]
             Intervals to process
 

diff --git a/montreal_forced_aligner/command_line/mfa.py b/montreal_forced_aligner/command_line/mfa.py
@@ -34,6 +34,7 @@
     validate_dictionary_cli,
 )
 from montreal_forced_aligner.config import GLOBAL_CONFIG, update_command_history
+from montreal_forced_aligner.exceptions import DatabaseError
 from montreal_forced_aligner.utils import check_third_party
 
 BEGIN = time.time()
@@ -152,7 +153,10 @@ def mfa_cli(ctx: click.Context) -> None:
         atexit.register(hooks.history_save_handler)
         atexit.register(cleanup_logger)
         if auto_server:
-            atexit.register(stop_server)
+            try:
+                atexit.register(stop_server)
+            except DatabaseError:
+                pass
 
     mp.freeze_support()
 

diff --git a/montreal_forced_aligner/textgrid.py b/montreal_forced_aligner/textgrid.py
@@ -370,7 +370,9 @@ def export_textgrid(
                     tg.addTier(tgio.IntervalTier(tier_name, [], minT=0, maxT=duration))
                 tier = tg.getTier(tier_name)
                 for i, a in enumerate(sorted(intervals, key=lambda x: x.begin)):
-                    if duration - a.end < (frame_shift * 2):  # Fix rounding issues
+                    if i == len(intervals) - 1 and duration - a.end < (
+                        frame_shift * 2
+                    ):  # Fix rounding issues
                         a.end = duration
                     if i > 0 and tier.entries[-1].end > a.to_tg_interval().start:
                         a.begin = tier.entries[-1].end

diff --git a/requirements.txt b/requirements.txt
@@ -4,7 +4,7 @@ pyyaml
 librosa
 numpy
 scipy
-scikit-learn
+scikit-learn<1.3
 requests
 biopython==1.79
 dataclassy

diff --git a/rtd_environment.yml b/rtd_environment.yml
@@ -14,6 +14,7 @@ dependencies:
   - pgvector
   - pgvector-python
   - postgresql
+  - scikit-learn<1.3
   - hdbscan
   - psycopg2
   - biopython=1.79

diff --git a/setup.cfg b/setup.cfg
@@ -36,7 +36,6 @@ keywords = phonology
 [options]
 packages = find:
 install_requires =
-    biopython
     biopython<=1.79
     click
     dataclassy
@@ -50,7 +49,7 @@ install_requires =
     requests
     rich
     rich-click
-    scikit-learn
+    scikit-learn<1.3
     seaborn
     sqlalchemy>=1.4
     tqdm

diff --git a/tests/data/lab/acoustic_corpus.lab b/tests/data/lab/acoustic_corpus.lab
@@ -1 +1 @@
-this is the acoustic corpus i'm talking pretty fast here there's nothing going else going on we're just yknow there's some speech errors but who cares um this is me talking really slow and slightly lower in intensity we're just saying some words and here's some more words words word words um and that should be all thanks
+this is the acoustic corpus i'm talking pretty fast here there's nothing going else going on we're just yknow there's some speech errors but who cares um this is me talking really slow and slightly lower in intensity uh we're just saying some words and here's some more words words words words um and that should be all thanks
diff --git a/tests/test_alignment_pretrained.py b/tests/test_alignment_pretrained.py
@@ -32,7 +32,7 @@ def test_align_sick(
             .filter(Word.word_type != WordType.silence)
             .count()
         )
-        assert word_interval_count == 370
+        assert word_interval_count == 372
     assert "AY_S" in a.phone_mapping
     assert os.path.exists(os.path.join(export_directory, "michael", "acoustic_corpus.TextGrid"))
     a.clean_working_directory()
-Original file line number
+Diff line change
@@ Expand Up / @@ -15,6 +15,7 @@ dependencies: @@
       - ffmpeg
       - pynini
       - openfst
+      - scikit-learn<1.3
       - hdbscan
       - baumwelch
       - ngram
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		this is the acoustic corpus i'm talking pretty fast here there's nothing going else going on we're just yknow there's some speech errors but who cares um this is me talking really slow and slightly lower in intensity we're just saying some words and here's some more words words word words um and that should be all thanks
		this is the acoustic corpus i'm talking pretty fast here there's nothing going else going on we're just yknow there's some speech errors but who cares um this is me talking really slow and slightly lower in intensity uh we're just saying some words and here's some more words words words words um and that should be all thanks