Skip to content

Commit

Permalink
Merge pull request #16 from bretttolbert/catalan-contd
Browse files Browse the repository at this point in the history
Catalan contd
  • Loading branch information
bretttolbert authored Dec 17, 2023
2 parents 6155e43 + ceb1537 commit 5e492f9
Show file tree
Hide file tree
Showing 9 changed files with 149 additions and 15 deletions.
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
# verbecc Changelog

- 1.9.1 [17 December 2023]
- Renamed Catalan 'preterit' to 'passat-simple'
- Cont. added more missing conjugation templates for Catalan, decent support for most Catalan verbs now
- Added `localization` module with localization functions `localize_mood` and `localize_tense`
- Removed pre-generated model .zip files
- Added dummy file in models directory as workaround for installation issue
- fixed KeyError with Spanish verb abolir

- 1.9.0 [December 2023]
- Added limited support for Catalan language
- Please help improve support for Catalan verb conjugation, PRs welcome
Expand All @@ -8,7 +16,7 @@
- Updated dependencies (scikit-learn, etc.)
- Now targetting Python 3.11

- 1.8.1 [2022 December 28]
- 1.8.1 [28 December 2022]
- Updated from Python 3.7 to Python 3.10
- Updated dependencies
- Increased SGDClassifier max_iter from 4000 to 40000
Expand Down
2 changes: 1 addition & 1 deletion EXAMPLE_CA_SER.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
"vosaltres éreu",
"ells eren"
],
"pretèrit": [
"passat-simple": [
"jo fui",
"tu fores",
"ell fou",
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
- [HTTP API : /verbecc/conjugate/fr/manger](http://verbe.cc/verbecc/conjugate/fr/manger)

### Features
* Conjugate verbs in six romance languages: French, Spanish, Portuguese, Italian, Romanian, Catalan (limited-support)
* Conjugate verbs in six romance languages: French, Spanish, Portuguese, Italian, Romanian, Catalan
* Uses machine learning techniques to predict conjugation of unknown verbs with 99% accurracy
* Includes both simple and compound conjugations
* Unit-tested
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "verbecc"
version = "1.9.0"
version = "1.9.1"
dependencies = [
"lxml",
"lxml-stubs",
Expand Down
7 changes: 6 additions & 1 deletion tests/test_inflector_ca.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

cg = Conjugator(lang='ca')

# TODO: Fix all of these missing templates
@pytest.mark.skip("known failure")
def test_all_verbs_have_templates():
"""Have not finished adding templates for all verbs, so this should fail"""
Expand Down Expand Up @@ -140,7 +141,11 @@ def test_find_verb_by_infinitive():
('morir', 'indicatiu', 'present',
['jo moro', 'tu mors', 'ell mor', 'nosaltres morim', 'vosaltres moriu', 'ells moren']),
('eixir', 'indicatiu', 'present',
['jo ixo', 'tu ixes', 'ell ix', 'nosaltres eixim', 'vosaltres eixiu', 'ells ixen'])
['jo ixo', 'tu ixes', 'ell ix', 'nosaltres eixim', 'vosaltres eixiu', 'ells ixen']),
('jaure', 'indicatiu', 'present',
['jo jec', 'tu jeus', 'ell jeu', 'nosaltres jaiem', 'vosaltres jaieu', 'ells jeuen']),
('jeure', 'indicatiu', 'present',
['jo jec', 'tu jeus', 'ell jeu', 'nosaltres jaiem', 'vosaltres jaieu', 'ells jeuen'])
]

@pytest.mark.parametrize("infinitive,mood,tense,expected_result",
Expand Down
93 changes: 93 additions & 0 deletions verbecc/data/conjugations-ca.xml
Original file line number Diff line number Diff line change
Expand Up @@ -634,6 +634,99 @@ voler ("to want"), caldre ("must"), dir ("to say") and dur ("to take/get").
</Particip>
</template>

<!-- TODO: This should inherit from 'perd:re' as 'jau:re' -->
<template name="j:aure">
<Indicatiu>
<present>
<p><i>ec</i></p>
<p><i>eus</i></p>
<p><i>eu</i></p>
<p><i>aiem</i></p>
<p><i>aieu</i></p>
<p><i>euen</i></p>
</present>
<imperfet>
<p><i>eia</i></p>
<p><i>eies</i></p>
<p><i>eia</i></p>
<p><i>èiem</i></p>
<p><i>èieu</i></p>
<p><i>eien</i></p>
</imperfet>
<passat-simple>
<p><i>aguí</i></p>
<p><i>agueres</i></p>
<p><i>agué</i></p>
<p><i>aguérem</i></p>
<p><i>aguéreu</i></p>
<p><i>aguéren</i></p>
</passat-simple>
<futur>
<p><i>auré</i></p>
<p><i>auràs</i></p>
<p><i>aurà</i></p>
<p><i>aurem</i></p>
<p><i>aureu</i></p>
<p><i>auran</i></p>
</futur>
</Indicatiu>
<Subjuntiu>
<present>
<p><i>egui</i></p>
<p><i>eguis</i></p>
<p><i>egui</i></p>
<p><i>eguem</i></p>
<p><i>egueu</i></p>
<p><i>eguin</i></p>
</present>
<imperfet>
<p><i>agués</i></p>
<p><i>aguessis</i></p>
<p><i>agués</i></p>
<p><i>aguéssim</i></p>
<p><i>aguéssiu</i></p>
<p><i>aguessin</i></p>
</imperfet>
</Subjuntiu>
<Imperatiu>
<imperatiu-present>
<p><i>ei</i></p>
<p><i>egui</i></p>
<p><i>eguem</i></p>
<p><i>aieu</i></p>
<p><i>eguin</i></p>
</imperatiu-present>
</Imperatiu>
<Condicional>
<present>
<p><i>auria</i></p>
<p><i>auries</i></p>
<p><i>auria</i></p>
<p><i>auríem</i></p>
<p><i>auríeu</i></p>
<p><i>aurien</i></p>
</present>
</Condicional>
<Infinitiu>
<infinitiu-present>
<p><i>eure</i></p>
</infinitiu-present>
</Infinitiu>
<Gerundi>
<gerundi>
<p><i>aient</i></p>
</gerundi>
</Gerundi>
<Particip>
<particip>
<p><i>agut</i></p>
<p><i>aguda</i></p>
<p><i>aguts</i></p>
<p><i>agudes</i></p>
</particip>
</Particip>
</template>

<!-- TODO: This should inherit from 'perd:re' as 'pod:er' -->
<template name=":poder">
<Indicatiu>
Expand Down
4 changes: 2 additions & 2 deletions verbecc/data/verbs-ca.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5941,9 +5941,9 @@
<v><i>jaspiar</i><t>canvi:ar</t><en></en></v>
<v><i>jaumetar</i><t>cant:ar</t><en></en></v>
<v><i>jaupar</i><t>cant:ar</t><en></en></v>
<v><i>jaure</i><t>:perdre</t><en></en></v>
<v><i>jaure</i><t>j:aure</t><en>lie down</en></v>
<v><i>jerarquitzar</i><t>cant:ar</t><en></en></v>
<v><i>jeure</i><t>:jaure</t><en></en></v>
<v><i>jeure</i><t>j:aure</t><en>lie</en></v>
<v><i>joguinejar</i><t>enve:jar</t><en></en></v>
<v><i>jornalejar</i><t>enve:jar</t><en></en></v>
<v><i>jovenejar</i><t>enve:jar</t><en></en></v>
Expand Down
19 changes: 11 additions & 8 deletions verbecc/inflector.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

from verbecc import grammar_defines
from verbecc import exceptions
from verbecc import inflector
from verbecc import conjugations_parser
from verbecc import conjugation_template
from verbecc import verb
Expand Down Expand Up @@ -62,30 +61,34 @@ def find_verb_by_infinitive(self, infinitive) -> verb.Verb:
def find_template(self, name: str) -> conjugation_template.ConjugationTemplate:
return self._conj_parser.find_template(name)

def get_verbs_that_start_with(self, query, max_results):
def get_verbs_that_start_with(self, query: str, max_results: int):
query = query.lower()
matches = self._verb_parser.get_verbs_that_start_with(query, max_results)
return matches

def _get_verb_stem(self, infinitive, template_name):
template_beg, template_ending = template_name.split(u':')
def _get_verb_stem(self, infinitive: str, template_name: str):
"""Get the verb stem given an ininitive and a colon-delimited template name.
E.g. infinitive='parler' template_name='aim:er' -> 'parl'
Note: Catalan overrides this base class implementation to allow looser matching
(only requires the last n-1 chars of template ending to match infinitive ending)"""
_, template_ending = template_name.split(u':')
if not infinitive.endswith(template_ending):
raise exceptions.ConjugatorError(
"Template {} ending doesn't "
"match infinitive {}"
.format(template_name, infinitive))
return infinitive[:len(infinitive) - len(template_ending)]

def _is_impersonal_verb(self, infinitive):
def _is_impersonal_verb(self, infinitive: str):
return False

def _verb_can_be_reflexive(self, infinitive):
def _verb_can_be_reflexive(self, infinitive: str):
return not self._is_impersonal_verb(infinitive)

def _split_reflexive(self, infinitive):
def _split_reflexive(self, infinitive: str):
return (False, infinitive)

def _add_reflexive_pronoun(self, s):
def _add_reflexive_pronoun(self, s: str):
pass

def _add_subjunctive_relative_pronoun(self, s: str, tense_name: str):
Expand Down
25 changes: 25 additions & 0 deletions verbecc/inflector_ca.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from typing import Dict, List, Tuple

from verbecc import exceptions
from verbecc import inflector

class InflectorCa(inflector.Inflector):
Expand Down Expand Up @@ -67,6 +68,7 @@ def _get_alternate_hv_inflection(self, s: str) -> str:

def _get_compound_conjugations_aux_verb_map(self) -> Dict[str, Dict[str, Tuple[str, ...]]]:
"""
TODO: Implement all these compound tenses (Spanish compound tenses in this commment, for reference)
return {
'indicatiu': {
'pretèrit-perfet-compuest': ('indicatiu', 'present'),
Expand All @@ -85,3 +87,26 @@ def _get_compound_conjugations_aux_verb_map(self) -> Dict[str, Dict[str, Tuple[s
}
"""
return {}

def _get_verb_stem(self, infinitive: str, template_name: str):
"""Get the verb stem given an ininitive and a colon-delimited template name.
E.g. infinitive='parlar' template_name='cant:ar' -> 'parl'
Note: Base class _get_verb_stem raises exception if template ending doesn't
match infinitive ending exactly but for Catalan, some verbs
have endings where at least the first letter doesn't match.
E.g. both 'jaure' and and 'jeure' are apparently conjugated
identically, so we want either one to use the 'j:aure' template.
So since this is Catalan, let it pass if the last n-1 letters of the
template ending match the infinitive ending
"""
_, template_ending = template_name.split(u':')
if not infinitive.endswith(template_ending) \
and not infinitive.endswith(template_ending[1:]):
raise exceptions.ConjugatorError(
"Template {} ending doesn't "
"match infinitive {},"
"not even a little bit"
.format(template_name, infinitive))
return infinitive[:len(infinitive) - len(template_ending)]

0 comments on commit 5e492f9

Please sign in to comment.