-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathID.py
105 lines (78 loc) · 3.52 KB
/
ID.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#==============================================================================================
# The Jabalín morphological generator for Arabic verbs
#
# Copyright (c) 2012 Susana López Hervás, Alicia González Martínez, Antonio Moreno Sandoval
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>
#==============================================================================================
##################################################################
# MODULE INTERNAL DERIVATION
###################################################################
import re
import sys
# This function applies the rules classified as lengthening operations of Internal
# Derivation. The rules are applied to the root, which is passed as a parameter
def ID_rules_lenghtening(root, cod_lenghtening):
cod=int(cod_lenghtening)
# the last character is duplicated
if cod==1:
root=re.sub(r"(.*)(.)", r"\1\2\2", root)
# 'ّ' ('~') is added between the second and the third characters
elif cod==2:
root=re.sub(r"(..)(.*)", r"\1ّ\2", root)
# 'E' (lengthening mark) is added between the first and the second characters.
elif cod==3:
root=re.sub(r"(.)(.*)", r"\1E\2", root)
# 'E' is added at the end
elif cod==4:
root=re.sub(r"(.+)", r"\1E", root)
# the second character is duplicated
elif cod==5:
root=re.sub(r"(.)(.)(.*)", r"\1\2\2\3", root)
return (root)
# This function applies the rules classified as addition operations of Internal
# Derivation. Again, the rules are applied to the root
def ID_rules_addition(root, cod_addition):
cod=int(cod_addition)
# 'ن' ('n') is added at the beginning
if cod==1:
root=re.sub(r"(.+)", r"ن\1", root)
# 'ت' ('t') is added between the first and the second characters.
elif cod==2:
root=re.sub(r"(.)(.*)", r"\1ت\2", root)
# 'أ' ('Á') is added at the beginning
elif cod==3:
root=re.sub(r"(.+)", r"أ\1", root)
# 'ست' ('st') is added at the beginning
elif cod==4:
root=re.sub(r"(.+)", r"ست\1", root)
# 'E' is added between the second and the third characters.
elif cod==5:
root=re.sub(r"(..)(.*)", r"\1E\2", root)
# 'و' ('w') is added between the second and the third characters.
elif cod==6:
root=re.sub(r"(..)(.*)", r"\1و\2", root)
# 'وو' ('ww') is added between the second and the third characters.
elif cod==7:
root=re.sub(r"(..)(.*)", r"\1وو\2", root)
# 'ن' ('n') is added between the second and the third characters.
elif cod==8:
root=re.sub(r"(..)(.*)", r"\1ن\2", root)
return (root)
# This function applies all the rules of the Internal Derivation
# to the root, which is passed as parameter
def Internal_derivation(root, cod_ID):
root_id = ID_rules_lenghtening(root, cod_ID["lengthening"])
root_id = ID_rules_addition(root_id, cod_ID["addition"])
return(root_id)