-
Notifications
You must be signed in to change notification settings - Fork 138
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
* #1606 Add Deepl translator module * #1606 Add Deepl translator module - first set of corrections * #1606 Add Deepl translator module - first set of corrections * #1606 Add Deepl translator module - second set of corrections * #1606 : Use of TockProxyAuthenticator + use of glossary map ids for all languages * #1606 translator: clean up module * #1606 : Correction of the glossary id name --------- Co-authored-by: charles_moulhaud <[email protected]> Co-authored-by: Fabilin <[email protected]>
- Loading branch information
1 parent
707e3a7
commit 27c7bad
Showing
7 changed files
with
376 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
# TOCK Deepl Translation | ||
|
||
Here are the configurable variables: | ||
|
||
- `tock_translator_deepl_target_languages`: set of supported languages - ex : en,es | ||
- `tock_translator_deepl_api_url`: Deepl api url (default pro api url : https://api.deepl.com/v2/translate). | ||
If you have problems with pro api, you can use free api : https://api-free.deepl.com/v2/translate | ||
- `tock_translator_deepl_api_key` : Deepl api key to use (see your account) | ||
- `tock_translator_deepl_glossary_id`: glossary identifier to use in translation | ||
|
||
> Deepl documentation: https://developers.deepl.com/docs | ||
To integrate the module into a custom Tock Admin, pass the module as a parameter to the `ai.tock.nlp.admin.startAdminServer()` function. | ||
|
||
Example: | ||
|
||
```kt | ||
package ai.tock.bot.admin | ||
|
||
import ai.tock.nlp.admin.startAdminServer | ||
import ai.tock.translator.deepl.deeplTranslatorModule | ||
|
||
fun main() { | ||
startAdminServer(deeplTranslatorModule()) | ||
} | ||
``` | ||
|
||
## Http Client Configuration | ||
|
||
You can configure the Deepl client, including proxy settings, by passing a parameter to `deeplTranslatorModule`: | ||
|
||
```kt | ||
startAdminServer(deeplTranslatorModule(OkHttpDeeplClient { | ||
proxyAuthenticator { _: Route?, response: Response -> | ||
// https://square.github.io/okhttp/3.x/okhttp/index.html?okhttp3/Authenticator.html | ||
if (response.challenges().any { it.scheme.equals("OkHttp-Preemptive", ignoreCase = true) }) { | ||
response.request.newBuilder() | ||
.header("Proxy-Authorization", credential) | ||
.build() | ||
} else { | ||
null | ||
} | ||
} | ||
})) | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<!-- | ||
~ Copyright (C) 2017/2021 e-voyageurs technologies | ||
~ | ||
~ Licensed under the Apache License, Version 2.0 (the "License"); | ||
~ you may not use this file except in compliance with the License. | ||
~ You may obtain a copy of the License at | ||
~ | ||
~ http://www.apache.org/licenses/LICENSE-2.0 | ||
~ | ||
~ Unless required by applicable law or agreed to in writing, software | ||
~ distributed under the License is distributed on an "AS IS" BASIS, | ||
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
~ See the License for the specific language governing permissions and | ||
~ limitations under the License. | ||
--> | ||
|
||
<project xmlns="http://maven.apache.org/POM/4.0.0" | ||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
<parent> | ||
<groupId>ai.tock</groupId> | ||
<artifactId>tock-translator</artifactId> | ||
<version>24.3.5-SNAPSHOT</version> | ||
</parent> | ||
|
||
<artifactId>tock-deepl-translate</artifactId> | ||
<name>Tock Deepl Translator</name> | ||
<description>Deepl translator implementation</description> | ||
|
||
<dependencies> | ||
<dependency> | ||
<groupId>org.apache.commons</groupId> | ||
<artifactId>commons-text</artifactId> | ||
</dependency> | ||
<dependency> | ||
<groupId>ai.tock</groupId> | ||
<artifactId>tock-translator-core</artifactId> | ||
</dependency> | ||
<dependency> | ||
<groupId>com.squareup.okhttp3</groupId> | ||
<artifactId>okhttp</artifactId> | ||
<version>4.12.0</version> | ||
</dependency> | ||
<dependency> | ||
<groupId>com.fasterxml.jackson.core</groupId> | ||
<artifactId>jackson-core</artifactId> | ||
</dependency> | ||
</dependencies> | ||
|
||
</project> |
125 changes: 125 additions & 0 deletions
125
translator/deepl-translate/src/main/kotlin/DeeplClient.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
/* | ||
* Copyright (C) 2017/2021 e-voyageurs technologies | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package ai.tock.translator.deepl | ||
|
||
import ai.tock.shared.TockProxyAuthenticator | ||
import ai.tock.shared.jackson.mapper | ||
import ai.tock.shared.property | ||
import ai.tock.shared.propertyOrNull | ||
import com.fasterxml.jackson.module.kotlin.readValue | ||
import java.io.IOException | ||
import java.util.regex.Pattern | ||
import okhttp3.FormBody | ||
import okhttp3.OkHttpClient | ||
import okhttp3.Request | ||
|
||
internal data class TranslationResponse( | ||
val translations: List<Translation> | ||
) | ||
|
||
internal data class Translation( | ||
val text: String | ||
) | ||
|
||
const val TAG_HANDLING = "xml" | ||
|
||
interface DeeplClient { | ||
fun translate( | ||
text: String, | ||
sourceLang: String, | ||
targetLang: String, | ||
preserveFormatting: Boolean, | ||
glossaryId: String? | ||
): String? | ||
} | ||
|
||
class OkHttpDeeplClient( | ||
private val apiURL: String = property("tock_translator_deepl_api_url", "https://api.deepl.com/v2/translate"), | ||
private val apiKey: String? = propertyOrNull("tock_translator_deepl_api_key"), | ||
okHttpCustomizer: OkHttpClient.Builder.() -> Unit = {} | ||
) : DeeplClient { | ||
private val client = OkHttpClient.Builder() | ||
.apply(TockProxyAuthenticator::install) | ||
.apply(okHttpCustomizer) | ||
.build() | ||
|
||
private fun replaceSpecificPlaceholders(text: String): Pair<String, List<String>> { | ||
// Store original placeholders for later restoration | ||
val placeholderPattern = Pattern.compile("\\{:([^}]*)}") | ||
val matcher = placeholderPattern.matcher(text) | ||
|
||
val placeholders = mutableListOf<String>() | ||
while (matcher.find()) { | ||
placeholders.add(matcher.group(1)) | ||
} | ||
|
||
// Replace placeholders with '_PLACEHOLDER_' | ||
val replacedText = matcher.replaceAll("_PLACEHOLDER_") | ||
|
||
return Pair(replacedText, placeholders) | ||
} | ||
|
||
private fun revertSpecificPlaceholders(text: String, placeholders: List<String>): String { | ||
var resultText = text | ||
for (placeholder in placeholders) { | ||
resultText = resultText.replaceFirst("_PLACEHOLDER_", "{:$placeholder}") | ||
} | ||
return resultText | ||
} | ||
|
||
override fun translate( | ||
text: String, | ||
sourceLang: String, | ||
targetLang: String, | ||
preserveFormatting: Boolean, | ||
glossaryId: String? | ||
): String? { | ||
if (apiKey == null) return text | ||
|
||
val (textWithPlaceholders, originalPlaceholders) = replaceSpecificPlaceholders(text) | ||
|
||
val formBuilder = FormBody.Builder() | ||
|
||
val requestBody = formBuilder | ||
.add("text", textWithPlaceholders) | ||
.add("source_lang", sourceLang) | ||
.add("target_lang", targetLang) | ||
.add("preserve_formatting", preserveFormatting.toString()) | ||
.add("tag_handling", TAG_HANDLING) | ||
.build() | ||
|
||
glossaryId?.let { | ||
formBuilder.add("glossary_id", it) | ||
} | ||
|
||
val request = Request.Builder() | ||
.url(apiURL) | ||
.addHeader("Authorization", "DeepL-Auth-Key $apiKey") | ||
.post(requestBody) | ||
.build() | ||
|
||
client.newCall(request).execute().use { response -> | ||
if (!response.isSuccessful) throw IOException("Unexpected code $response") | ||
|
||
val responseBody = response.body?.string() | ||
val translationResponse = mapper.readValue<TranslationResponse>(responseBody!!) | ||
|
||
val translatedText = translationResponse.translations.firstOrNull()?.text | ||
return translatedText?.let { revertSpecificPlaceholders(it, originalPlaceholders) } | ||
} | ||
} | ||
} |
41 changes: 41 additions & 0 deletions
41
translator/deepl-translate/src/main/kotlin/DeeplTranslatorEngine.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
/* | ||
* Copyright (C) 2017/2021 e-voyageurs technologies | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package ai.tock.translator.deepl | ||
|
||
import ai.tock.shared.mapProperty | ||
import ai.tock.shared.propertyOrNull | ||
import ai.tock.translator.TranslatorEngine | ||
import java.util.Locale | ||
import org.apache.commons.text.StringEscapeUtils | ||
|
||
internal class DeeplTranslatorEngine(client: DeeplClient) : TranslatorEngine { | ||
private val deeplClient = client | ||
|
||
private val supportedLanguages: Set<String>? = propertyOrNull("tock_translator_deepl_target_languages")?.split(",")?.map { it.trim() }?.toSet() | ||
private val glossaryMapIds = mapProperty("tock_translator_deepl_glossary_map_ids", emptyMap()) | ||
override val supportAdminTranslation: Boolean = true | ||
|
||
override fun translate(text: String, source: Locale, target: Locale): String { | ||
var translatedTextHTML4 = "" | ||
// Allows to filter translation on a specific language | ||
if (supportedLanguages == null || supportedLanguages.contains(target.language)) { | ||
val translatedText = deeplClient.translate(text, source.language, target.language, true, glossaryMapIds[target.language]) | ||
translatedTextHTML4 = StringEscapeUtils.unescapeHtml4(translatedText) | ||
} | ||
return translatedTextHTML4 | ||
} | ||
} |
31 changes: 31 additions & 0 deletions
31
translator/deepl-translate/src/main/kotlin/DeeplTranslatorIoc.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
/* | ||
* Copyright (C) 2017/2021 e-voyageurs technologies | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package ai.tock.translator.deepl | ||
|
||
import ai.tock.translator.TranslatorEngine | ||
import com.github.salomonbrys.kodein.Kodein | ||
import com.github.salomonbrys.kodein.bind | ||
import com.github.salomonbrys.kodein.provider | ||
|
||
/** | ||
* The default Deepl translator module, for use in a Kodein injector. | ||
*/ | ||
val deeplTranslatorModule = configureDeeplTranslatorModule() | ||
|
||
fun configureDeeplTranslatorModule(client: DeeplClient = OkHttpDeeplClient()) = Kodein.Module { | ||
bind<TranslatorEngine>(overrides = true) with provider { DeeplTranslatorEngine(client) } | ||
} |
81 changes: 81 additions & 0 deletions
81
translator/deepl-translate/src/test/kotlin/DeeplTranslateIntegrationTest.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
/* | ||
* Copyright (C) 2017/2021 e-voyageurs technologies | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package ai.tock.translator.deepl | ||
|
||
import java.util.Locale | ||
import kotlin.test.assertEquals | ||
import org.junit.jupiter.api.Disabled | ||
import org.junit.jupiter.api.Test | ||
|
||
/** | ||
* All these tests are disabled because it uses Deepl pro api that can be expensive | ||
*/ | ||
class DeeplTranslateIntegrationTest { | ||
private val deeplTranslatorEngine = DeeplTranslatorEngine(OkHttpDeeplClient()) | ||
|
||
@Test | ||
@Disabled | ||
fun simpleTest() { | ||
val result = deeplTranslatorEngine.translate( | ||
"Bonjour, je voudrais me rendre à New-York Mardi prochain", | ||
Locale.FRENCH, | ||
Locale.ENGLISH | ||
) | ||
assertEquals("Hello, I would like to go to New York next Tuesday.", result) | ||
} | ||
|
||
@Test | ||
@Disabled | ||
fun testWithEmoticonAndAntislash() { | ||
val result = deeplTranslatorEngine.translate( | ||
"Bonjour, je suis l'Agent virtuel SNCF Voyageurs! \uD83E\uDD16\n" + | ||
"Je vous informe sur l'état du trafic en temps réel.\n" + | ||
"Dites-moi par exemple \"Mon train 6111 est-il à l'heure ?\", \"Aller à Saint-Lazare\", \"Prochains départs Gare de Lyon\" ...", | ||
Locale.FRENCH, | ||
Locale.ENGLISH | ||
) | ||
|
||
assertEquals( | ||
"Hello, I'm the SNCF Voyageurs Virtual Agent! \uD83E\uDD16\n" + | ||
"I inform you about traffic conditions in real time.\n" + | ||
"Tell me for example \"Is my train 6111 on time?\", \"Going to Saint-Lazare\", \"Next departures Gare de Lyon\" ...", | ||
result | ||
) | ||
} | ||
|
||
@Test | ||
@Disabled | ||
fun testWithParameters() { | ||
val result = deeplTranslatorEngine.translate( | ||
"Bonjour, je voudrais me rendre à {:city} {:date}", | ||
Locale.FRENCH, | ||
Locale.GERMAN | ||
) | ||
assertEquals("Hallo, ich würde gerne nach {:city} {:date} fahren.", result) | ||
} | ||
|
||
@Test | ||
@Disabled | ||
fun testWithHTML() { | ||
val result = deeplTranslatorEngine.translate( | ||
"Bonjour, je voudrais me rendre à Paris <br><br/> demain soir", | ||
Locale.FRENCH, | ||
Locale.GERMAN | ||
) | ||
assertEquals("Hallo, ich möchte morgen Abend nach Paris <br><br/> fahren", result) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters