Skip to content

Commit

Permalink
New ToBI rules for Italian (Issue marytts-it#21)
Browse files Browse the repository at this point in the history
  • Loading branch information
ftesser authored and alize committed Mar 4, 2013
1 parent 82113eb commit 0ff228b
Show file tree
Hide file tree
Showing 3 changed files with 168 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@
import java.util.Locale;

import marytts.datatypes.MaryDataType;
import marytts.modules.ProsodyGenericFST;
//import marytts.modules.ProsodyGenericFST;

/**
* @author Fabio Tesser
*
*/
public class Prosody extends ProsodyGenericFST {
public class Prosody extends marytts.language.it.ProsodyGeneric {

public Prosody() throws IOException {
super(MaryDataType.PHONEMES,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,36 @@ Possible values are only: "enfofpar"(end of paragraph) and "endofvorfeld"(end of

<definitions>
<!-- list of part of speechs that don't receive an accent (function words)-->
<list name="pos_no_accent" items="AP:BN:CC:CS:DD:DE:DI:DQ:DR:E:EA:PC:PD:PE:PI:PP:PQ:PR:RD:RI:T:VA:VM"/> <!-- TOCHECK: DE,DQ,PQ to check -->
<list name="pos_no_accent" items="AP:BN:CC:CS:DD:DE:DI:DQ:DR:E:EA:PC:PD:PE:PI:PP:PQ:PR:RD:RI:T:VA:VM"/> <!-- TOCHECK: DE,DQ,PQ,VM to check -->

<!-- list of part of speechs that receive an accent (content words) -->
<list name="pos_tonal_accent" items="A:B:I:N:NO:S:SA:SP:SW:V:X"/> <!-- TOCHECK: V to check -->

<!-- list of part of speechs for punctuation (used in boundary rules)-->
<list name="pos_punctuation" items="FB FC FF FS $PUNCT $, $( punc PUNC , '' # . "/>


<!-- the following information should always be present if the language is similar to German-->


<!-- if the POS of first word in sentence is PQ,PR,B,E it is the equivalent of wh-question for English.
these value is used in interrogW sentence.
Attention this list should be check TOCHECK -> B:E:CS
The following should not be in this cathegory:
"A che ora mi chiami?" (sicuro?)
"Sei stato tu?"
"Non mi chiami?"
"Andiamo a mangiare assieme stasera?"
-->
<list name="firstPosInQuestionW" items="PQ:PR:DQ:B:E:CS"/>

<!-- è, ho, ha, TOCHECK ma anche coniugazioni: erano sono avevano-->
<list name="verbs0" items="è:ho:ha"/>

<list name="nouns0" items="S:SP"/>

<!-- the following information should always be present -->

<!-- default accents for user input, f.e. preferred-accent-shape="rising" -->
Expand All @@ -115,6 +137,65 @@ Possible values are only: "enfofpar"(end of paragraph) and "endofvorfeld"(end of

<!-- the accentposition rules determine if a token gets a tone accent or if it doesn't receive any accent (no force accents in English) -->
<accentposition>

<!--
R1: se non una tra
V+S
V+SP
V+A+S
V+A+SP
Allora pitch accent su verbo (solo sul verbo?)
*
Mangia Antonio.
Lui nasconde Fabio.
*
Corri grande Fabio.
Corri Fabio.
-->

<rule>
<previousAttributes pos="V"/>
<attributes pos="INLIST:nouns0"/>
<action accent=""/>
</rule>


<rule>
<previousAttributes pos="V"/>
<attributes pos="A"/>
<nextAttributes pos="INLIST:nouns0"/>
<action accent=""/>
</rule>

<rule>
<previousMinus2Attributes pos="V"/>
<previousAttributes pos="A"/>
<attributes pos="INLIST:nouns0"/>
<action accent=""/>
</rule>






<!-- R3 pos = BN + è ho ha
R3:
BN + V se il verbo è {è, ho, ha}
non riceve PA (chi? il verbo o BN, il verbo direi...TOCHECK chiedi conferma cinzia)
-->
<rule>
<previousAttributes pos="BN"/>
<text word="INLIST:verbs0"/>
<action accent=""/>
</rule>



<rule> <!-- list of words that usually receive an accent(content words) -->
<attributes pos="INLIST:pos_tonal_accent"/>
<action accent="tone"/>
Expand All @@ -125,6 +206,11 @@ Possible values are only: "enfofpar"(end of paragraph) and "endofvorfeld"(end of
<action accent=""/>
</rule>






<rule> <!-- that's the default: no accent -->
<action accent=""/>
</rule>
Expand All @@ -139,19 +225,20 @@ Possible values are only: "enfofpar"(end of paragraph) and "endofvorfeld"(end of
<action accent="L+H*"/>
</rule>


<rule> <!-- nuclear accent in declarative sentence, not at end of paragraph -->
<sentence type="decl"/>
<prosodicPosition type="nuclearNonParagraphFinal"/>
<attributes accent="tone"/>
<action accent="!H*"/>
<action accent="H+L*"/> <!-- Oppure L+H* -->
</rule>
<!-- TOCHECK: Togli la regola precedente, è sotituita dalla declinazione del contorno intonativo nelle dichiarative. -->


<rule> <!-- nuclear accent in declarative sentence, at end of paragraph -->
<sentence type="decl"/>
<prosodicPosition type="nuclearParagraphFinal"/>
<attributes accent="tone"/>
<action accent="L+H*"/>
<action accent="H+L*"/> <!-- Oppure L+H* -->
</rule>

<rule> <!-- prenuclear accent in exclamative sentence -->
Expand Down Expand Up @@ -180,27 +267,27 @@ Possible values are only: "enfofpar"(end of paragraph) and "endofvorfeld"(end of
<sentence type="interrog"/>
<prosodicPosition type="prenuclear"/>
<attributes accent="tone"/>
<action accent="H+L*"/>
<action accent="H*"/>
</rule>


<rule> <!-- nuclear accent in interrogative sentence, not at end of paragraph -->
<sentence type="interrog"/>
<prosodicPosition type="nuclearNonParagraphFinal"/>
<attributes accent="tone"/>
<action accent="H+L*"/>
<action accent="H+L*"/> <!-- Oppure H* -->
</rule>

<rule> <!-- nuclear accent in interrogative sentence, at end of paragraph -->
<sentence type="interrog"/>
<prosodicPosition type="nuclearParagraphFinal"/>
<attributes accent="tone"/>
<action accent="H+L*"/>
<action accent="H+L*"/> <!-- Oppure H* -->
</rule>

<rule> <!-- catchall rule in case none of the others fired -->
<attributes accent="tone"/>
<action accent="H*"/>
<action accent="H*"/>
</rule>
</accentshape>

Expand Down Expand Up @@ -230,14 +317,21 @@ Possible values are only: "enfofpar"(end of paragraph) and "endofvorfeld"(end of
<action bi="6" tone="L-L%"/>
</rule>

<!-- -->
<rule> <!-- major boundary at end of interrogative sentence at end of paragraph -->
<sentence type="interrog"/>
<specialPosition type="endofpar"/>
<action bi="6" tone="L-H%"/> <!-- TOCHECK: temporary-->
<action bi="6" tone="L-H%"/>
</rule>

<!-- TOCHECK: cancella la precedente regola ed inserisci questa: se con parola interrogtiva che inizia con chi, cosa, perchè quale, come ... allora -> L-L% altrimenti (non inizia con: ...) L-H% -->

<rule> <!-- major boundary at end of interrogative sentence at end of paragraph -->
<sentence type="interrogW"/>
<specialPosition type="endofpar"/>
<action bi="6" tone="L-L%"/>
</rule>

<!-- Le precedenti regole: se la prima parola di una interrogativa inizia con
(chi, cosa, perché, quale, come, ...) (PR,B,E,PQ,PR,E,B) allora -> L-L% altrimenti (non inizia con: ...) -> L-H% -->

<rule> <!-- major boundary at end of declarative sentence, not at end of paragraph -->
<sentence type="decl"/>
Expand All @@ -251,22 +345,38 @@ Possible values are only: "enfofpar"(end of paragraph) and "endofvorfeld"(end of
<action bi="5" tone="L-L%"/>
</rule>

<!-- -->
<rule> <!-- major boundary at end of interrogative sentence, not at end of paragraph -->
<sentence type="interrog"/>
<folTokens num="0"/>
<action bi="5" tone="L-H%"/>
</rule>

<rule> <!-- major boundary at end of interrogative sentence, not at end of paragraph -->
<sentence type="interrogW"/>
<folTokens num="0"/>
<action bi="5" tone="L-L%"/>
</rule>

<!-- Le precedenti regole: se la prima parola di una interrogativa inizia con
(chi, che, cosa, perché, quale, quali, quanto, come, dove, ...) (PR,PQ,DQ,E,B,CS) allora -> L-L% altrimenti (non inizia con: ...) -> L-H% -->




<rule> <!-- major boundary after a punctuation mark in the middle of the sentence -->
<attributes pos="INLIST:pos_punctuation"/>
<folTokens num="1+"/>
<prevTokens num="1+"/>
<action bi="4" tone="H-L%"/>
</rule>

<!-- TOCHECK: insert the , ; : rules -->
<!-- TOCHECK: insert the , ; : rules -->

<!-- TOCHECK: please insert rules R1 and R2 R3 -->




</boundaries>
</tobipredparams>
</tobipredparams>
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
The Tanl tagset is based on the ILC/PAROLE tagset and is conformant to the EAGLES international standard.
#########################################################################
Value Description Examples Accent
#########################################################################
A adjective tossico, pachistano Yes
AP possessive adjective mio, tuo No
B adverb poi, ora, subito Yes
BN negation adverb no, non No
CC coordinate conjunction e, ed, che, ma, o No
CS subordinate conjunction allora, che, come No
DD demonstrative determiner questo, quello No
DE exclamative determiner quanto, quanti No
DI indefinite determiner alcuno, certo No
DQ interrogative determiner quanto, quanti No -> ???
DR relative determiner qual, quale No
E preposition di, a, da, in, con No
EA articulated preposition al, dal, del, nel No
FB balanced punctuation () [] "" No
FC clause boundary punctuation . - : ; No
FF comma , ... - No
FS sentence boundary punctuation . ? ! ... No
I interjection grazie, ahò, eh, beh Yes
N cardinal number 1, 2, uno, tre Yes
NO ordinal number primo, secondo, I, IV Yes
PC clitic pronoun lo, gli, la, mi, ti, t' No
PD demonstrative pronoun questo, quello, ciò No
PE personal pronoun lui, noialtri, essi No -> Yes?
PI indefinite pronoun alcuno, certo, molto No
PP possessive pronoun mio, tuo, suo, loro No
PQ interrogative pronoun quanto, che, chi, dove No -> ???
PR relative pronoun quanto, quanti, quanta No
RD determinative article il, lo, i, gli, la No
RI indeterminative article uno, un, una, un' No
S common noun allarme, vetro, piano Yes
SA abbreviation km, ndr, pm Yes ->?
SP proper noun Monica, Pisa, Fiat Yes
T predeterminer tutto, tutti, entrambi No
V main verb riesco, arrabbierei Yes
VA auxiliary verb (Class of verbs: essere, avere, venire.) No
VM modal verb (Class of verbs: volere, potere, dovere.) No
X residual class Yes
###################
SW foreign word? Yes

0 comments on commit 0ff228b

Please sign in to comment.