From 4f75d5cf476cf091ca866975de709710961a822e Mon Sep 17 00:00:00 2001 From: severinsimmler Date: Tue, 24 May 2022 15:31:36 +0200 Subject: [PATCH] chore: cleanup --- chaine/crf.py | 2 +- examples/notebooks/ner.ipynb | 658 ++++++++++++++++++++++++++++------- 2 files changed, 527 insertions(+), 133 deletions(-) diff --git a/chaine/crf.py b/chaine/crf.py index 0eeaefd..cd3cb3a 100755 --- a/chaine/crf.py +++ b/chaine/crf.py @@ -220,7 +220,7 @@ def params(self) -> dict[str, Union[str, int, float, bool]]: class HyperparameterOptimizer: def __init__( self, - trials: int = 20, + trials: int = 10, seed: Optional[int] = None, metric: str = "f1", folds: int = 5, diff --git a/examples/notebooks/ner.ipynb b/examples/notebooks/ner.ipynb index d561a5e..8377f79 100755 --- a/examples/notebooks/ner.ipynb +++ b/examples/notebooks/ner.ipynb @@ -37,34 +37,6 @@ "id": "6726588b-57a0-4996-9d4c-6a0856826f0b", "metadata": {}, "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "46e78394db0a4ba69e53f62a858a8dd7", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Downloading builder script: 0%| | 0.00/2.58k [00:0012\n", " B-PER\n", " I-PER\n", - " 8.655663\n", + " 8.318353\n", " \n", " \n", " 30\n", " B-ORG\n", " I-ORG\n", - " 7.053397\n", + " 7.168033\n", " \n", " \n", - " 33\n", - " I-ORG\n", - " I-ORG\n", - " 6.440869\n", + " 22\n", + " B-MISC\n", + " I-MISC\n", + " 6.609091\n", " \n", " \n", - " 0\n", - " O\n", - " O\n", - " 5.564141\n", + " 26\n", + " I-MISC\n", + " I-MISC\n", + " 6.177694\n", " \n", " \n", - " 3\n", - " O\n", - " B-MISC\n", - " 5.456652\n", + " 34\n", + " I-ORG\n", + " I-ORG\n", + " 6.171870\n", " \n", " \n", - " 4\n", + " 0\n", " O\n", - " B-ORG\n", - " 5.394471\n", - " \n", - " \n", - " 22\n", - " B-MISC\n", - " I-MISC\n", - " 5.257558\n", + " O\n", + " 5.502018\n", " \n", " \n", " 8\n", " B-LOC\n", " I-LOC\n", - " 4.875593\n", + " 5.196033\n", " \n", " \n", - " 2\n", - " O\n", - " B-PER\n", - " 4.716164\n", + " 18\n", + " I-LOC\n", + " I-LOC\n", + " 4.680722\n", " \n", " \n", - " 1\n", + " 3\n", " O\n", - " B-LOC\n", - " 4.539984\n", + " B-MISC\n", + " 4.323212\n", + " \n", + " \n", + " 15\n", + " I-PER\n", + " I-PER\n", + " 4.204643\n", " \n", " \n", "\n", @@ -463,16 +854,16 @@ ], "text/plain": [ " from to weight\n", - "12 B-PER I-PER 8.655663\n", - "30 B-ORG I-ORG 7.053397\n", - "33 I-ORG I-ORG 6.440869\n", - "0 O O 5.564141\n", - "3 O B-MISC 5.456652\n", - "4 O B-ORG 5.394471\n", - "22 B-MISC I-MISC 5.257558\n", - "8 B-LOC I-LOC 4.875593\n", - "2 O B-PER 4.716164\n", - "1 O B-LOC 4.539984" + "12 B-PER I-PER 8.318353\n", + "30 B-ORG I-ORG 7.168033\n", + "22 B-MISC I-MISC 6.609091\n", + "26 I-MISC I-MISC 6.177694\n", + "34 I-ORG I-ORG 6.171870\n", + "0 O O 5.502018\n", + "8 B-LOC I-LOC 5.196033\n", + "18 I-LOC I-LOC 4.680722\n", + "3 O B-MISC 4.323212\n", + "15 I-PER I-PER 4.204643" ] }, "execution_count": 12, @@ -519,81 +910,81 @@ " \n", " \n", " \n", - " 4385\n", + " 4353\n", " token[-2:]:0M\n", " O\n", - " 5.749702\n", + " 9.405752\n", " \n", " \n", - " 4386\n", + " 4354\n", " token[-2:]:5M\n", " O\n", - " 5.541271\n", - " \n", - " \n", - " 164\n", - " EOS\n", - " O\n", - " 5.192606\n", + " 8.817209\n", " \n", " \n", - " 214\n", + " 216\n", " +1:token.lower():1996-12-06\n", " B-LOC\n", - " 4.757576\n", + " 5.713073\n", " \n", " \n", - " 215\n", + " 3807\n", + " token.lower():painewebber\n", + " B-ORG\n", + " 5.619434\n", + " \n", + " \n", + " 217\n", " +1:token.lower():1996-12-06\n", " I-LOC\n", - " 3.692241\n", + " 5.243976\n", " \n", " \n", - " 3532\n", - " +1:token.lower():1996-12-07\n", - " B-LOC\n", - " 3.504000\n", + " 3704\n", + " +1:token.lower():exxon\n", + " O\n", + " 5.152197\n", " \n", " \n", - " 1658\n", - " -1:token.lower():b\n", - " B-PER\n", - " 3.502818\n", + " 1013\n", + " token.lower():italy\n", + " B-LOC\n", + " 5.052394\n", " \n", " \n", - " 23\n", - " token.isdigit()\n", + " 168\n", + " EOS\n", " O\n", - " 3.362199\n", + " 4.993690\n", " \n", " \n", - " 36\n", - " BOS\n", - " O\n", - " 3.280833\n", + " 1605\n", + " -1:token.lower():b\n", + " B-PER\n", + " 4.895346\n", " \n", " \n", - " 1095\n", - " -1:token.lower():at\n", - " B-LOC\n", - " 3.198786\n", + " 3233\n", + " token.lower():trans-atlantic\n", + " B-MISC\n", + " 4.789043\n", " \n", " \n", "\n", "" ], "text/plain": [ - " feature label weight\n", - "4385 token[-2:]:0M O 5.749702\n", - "4386 token[-2:]:5M O 5.541271\n", - "164 EOS O 5.192606\n", - "214 +1:token.lower():1996-12-06 B-LOC 4.757576\n", - "215 +1:token.lower():1996-12-06 I-LOC 3.692241\n", - "3532 +1:token.lower():1996-12-07 B-LOC 3.504000\n", - "1658 -1:token.lower():b B-PER 3.502818\n", - "23 token.isdigit() O 3.362199\n", - "36 BOS O 3.280833\n", - "1095 -1:token.lower():at B-LOC 3.198786" + " feature label weight\n", + "4353 token[-2:]:0M O 9.405752\n", + "4354 token[-2:]:5M O 8.817209\n", + "216 +1:token.lower():1996-12-06 B-LOC 5.713073\n", + "3807 token.lower():painewebber B-ORG 5.619434\n", + "217 +1:token.lower():1996-12-06 I-LOC 5.243976\n", + "3704 +1:token.lower():exxon O 5.152197\n", + "1013 token.lower():italy B-LOC 5.052394\n", + "168 EOS O 4.993690\n", + "1605 -1:token.lower():b B-PER 4.895346\n", + "3233 token.lower():trans-atlantic B-MISC 4.789043" ] }, "execution_count": 13, @@ -608,6 +999,9 @@ } ], "metadata": { + "interpreter": { + "hash": "b6305eb58051137afc5a5205478d9ee79d6169039fed1d5a046194240369b06c" + }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", @@ -623,7 +1017,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.3" + "version": "3.9.13" } }, "nbformat": 4,