Move to pdm

Signed-off-by: Olga Bulat <[email protected]>
obulat · Jul 14, 2024 · 7bf4408 · 7bf4408
1 parent be1870a
commit 7bf4408
Show file tree

Hide file tree

Showing 8 changed files with 127 additions and 78 deletions.
diff --git a/.github/workflows/build.yml → .github/workflows/ci.yml b/.github/workflows/build.yml → .github/workflows/ci.yml
@@ -7,30 +7,28 @@ on: [push, pull_request]
 
 jobs:
   build:
-
     runs-on: ubuntu-latest
     strategy:
       matrix:
         python-version: [3.9, '3.10', '3.11', '3.12']
 
     steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
+    - uses: actions/checkout@v4
+    - name: Set up PDM
+      uses: pdm-project/setup-pdm@v4
       with:
-        python-version: ${{ matrix.python-version }}
+          python-version: ${{ matrix.python-version }}
+
     - name: Install dependencies
       run: |
-        python -m pip install --upgrade pip
-        pip install flake8 pytest wheel
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
-        python -m nltk.downloader punkt
-        pip install coverage codecov pytest-cov
+          pdm install
+          python -m nltk.downloader punkt
+
     - name: Test with pytest and coverage
       run: |
-        python -m pytest
-
+        pdm run pytest
         python -m py.test --cov-report=xml --cov=zeyrek tests/
+
     - name: Codecov
-      uses: codecov/codecov-action@v3.1.0
+      uses: codecov/codecov-action@v4.5.0
 
diff --git a/.github/workflows/publish_to_pypi.yml b/.github/workflows/publish_to_pypi.yml
@@ -13,19 +13,19 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Set up Python
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
       with:
         python-version: '3.x'
     - name: Install dependencies
       run: |
-        python -m pip install --upgrade pip
-        pip install setuptools wheel twine
+          pdm install
+          pdm build
+
     - name: Build and publish to real pypi
       env:
         TWINE_USERNAME: __token__
         TWINE_PASSWORD: ${{ secrets.pypi_password }}
       run: |
-        python setup.py sdist bdist_wheel
-        twine upload dist/*
+          pdm publish
diff --git a/.gitignore b/.gitignore
@@ -102,3 +102,6 @@ ENV/
 .mypy_cache/
 /vvenv/
 /.idea/
+
+# pdm stuff
+.pdm-python
diff --git a/README.md b/README.md
@@ -0,0 +1,50 @@
+# Zeyrek: Morphological Analyzer and Lemmatizer
+
+![PyPI - Version](https://img.shields.io/pypi/v/:packageName)
+
+Zeyrek is a partial port of the [Zemberek library](https://github.com/ahmetaa/zemberek-nlp) to Python for lemmatizing
+and analyzing Turkish language words. It is in alpha stage, and the API
+will probably change.
+
+
+* Free software: MIT license
+* Documentation: https://zeyrek.readthedocs.io.
+
+
+## Basic Usage
+
+To use Zeyrek, first create an instance of MorphAnalyzer class::
+
+```python
+import zeyrek
+analyzer = zeyrek.MorphAnalyzer()
+```
+
+Then, you can call its `analyze` method on words or texts to get all possible analyses::
+
+```python
+print(analyzer.analyze('benim'))
+Parse(word='benim', lemma='ben', pos='Noun', morphemes=['Noun', 'A3sg', 'P1sg'], formatted='[ben:Noun] ben:Noun+A3sg+im:P1sg')
+Parse(word='benim', lemma='ben', pos='Pron', morphemes=['Pron', 'A1sg', 'Gen'], formatted='[ben:Pron,Pers] ben:Pron+A1sg+im:Gen')
+Parse(word='benim', lemma='ben', pos='Verb', morphemes=['Noun', 'A3sg', 'Zero', 'Verb', 'Pres', 'A1sg'], formatted='[ben:Noun] ben:Noun+A3sg|Zero→Verb+Pres+im:A1sg')
+Parse(word='benim', lemma='ben', pos='Verb', morphemes=['Pron', 'A1sg', 'Zero', 'Verb', 'Pres', 'A1sg'], formatted='[ben:Pron,Pers] ben:Pron+A1sg|Zero→Verb+Pres+im:A1sg')
+```
+If you only need the base form of words, or lemmas, you can call `lemmatize`. It returns a list
+of tuples, with word itself and a list of possible lemmas::
+
+```python
+print(analyzer.lemmatize('benim'))
+[('benim', ['ben'])]
+```
+
+
+## Credits
+
+This package is a Python port of part of the [Zemberek](https://github.com/ahmetaa/zemberek-nlp) package by [Ahmet A. Akın](https://github.com/ahmetaa)
+
+
+This package was created with
+[Cookiecutter](https://github.com/audreyr/cookiecutter) and the
+[audreyr/cookiecutter-pypackage](https://github.com/audreyr/cookiecutter-pypackage)
+project template.
+
diff --git a/README.rst b/README.rst
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,57 @@
+[project]
+name = "zeyrek"
+version = "0.1.4"
+description = "Python morphological analyzer and lemmatizer for Turkish"
+authors = [
+    {name = "Olga Bulat", email = "[email protected]"},
+]
+dependencies = [
+    "alabaster==0.7.16",
+    "Babel==2.15.0",
+    "bleach==6.1.0",
+    "certifi==2024.7.4",
+    "chardet==5.2.0",
+    "click==8.1.7",
+    "colorama==0.4.6",
+    "coverage==7.6.0",
+    "docutils>=0.19",
+    "idna==3.7",
+    "imagesize==1.4.1",
+    "importlib-metadata==8.0.0",
+    "Jinja2==3.1.4",
+    "joblib==1.4.2",
+    "keyring==25.2.1",
+    "MarkupSafe==2.1.5",
+    "nltk==3.8.1",
+    "packaging==24.1",
+    "pkginfo==1.11.1",
+    "Pygments==2.18.0",
+    "pyparsing==3.1.2",
+    "pytest==8.2.2",
+    "pytz==2024.1",
+    "pywin32-ctypes==0.2.0",
+    "readme-renderer==35.0",
+    "regex==2024.5.15",
+    "requests==2.32.3",
+    "requests-toolbelt==1.0.0",
+    "six==1.16.0",
+    "snowballstemmer==2.2.0",
+    "Sphinx==5.3.0",
+    "sphinx-rtd-theme==2.0.0",
+    "tqdm==4.64.0",
+    "twine==4.0.0",
+    "urllib3==1.26.18",
+    "webencodings==0.5.1",
+    "zipp==3.19.2",
+]
+requires-python = ">=3.9"
+readme = "README.md"
+license = {text = "MIT"}
+
+[build-system]
+requires = ["pdm-backend"]
+build-backend = "pdm.backend"
+
+
+[tool.pdm]
+distribution = true
diff --git a/src/zeyrek/__init__.py b/src/zeyrek/__init__.py
diff --git a/tests/__init__.py b/tests/__init__.py