Skip to content

Commit

Permalink
#1606 Add Deepl translator module (#1671)
Browse files Browse the repository at this point in the history
* #1606 Add Deepl translator module

* #1606 Add Deepl translator module - first set of corrections

* #1606 Add Deepl translator module - first set of corrections

* #1606 Add Deepl translator module - second set of corrections

* #1606 : Use of TockProxyAuthenticator + use of glossary map ids for all languages

* #1606 translator: clean up module

* #1606 : Correction of the glossary id name

---------

Co-authored-by: charles_moulhaud <[email protected]>
Co-authored-by: Fabilin <[email protected]>
  • Loading branch information
3 people authored and Morgan Diverrez committed Aug 26, 2024
1 parent 707e3a7 commit 27c7bad
Show file tree
Hide file tree
Showing 7 changed files with 376 additions and 0 deletions.
45 changes: 45 additions & 0 deletions translator/deepl-translate/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# TOCK Deepl Translation

Here are the configurable variables:

- `tock_translator_deepl_target_languages`: set of supported languages - ex : en,es
- `tock_translator_deepl_api_url`: Deepl api url (default pro api url : https://api.deepl.com/v2/translate).
If you have problems with pro api, you can use free api : https://api-free.deepl.com/v2/translate
- `tock_translator_deepl_api_key` : Deepl api key to use (see your account)
- `tock_translator_deepl_glossary_id`: glossary identifier to use in translation

> Deepl documentation: https://developers.deepl.com/docs
To integrate the module into a custom Tock Admin, pass the module as a parameter to the `ai.tock.nlp.admin.startAdminServer()` function.

Example:

```kt
package ai.tock.bot.admin

import ai.tock.nlp.admin.startAdminServer
import ai.tock.translator.deepl.deeplTranslatorModule

fun main() {
startAdminServer(deeplTranslatorModule())
}
```

## Http Client Configuration

You can configure the Deepl client, including proxy settings, by passing a parameter to `deeplTranslatorModule`:

```kt
startAdminServer(deeplTranslatorModule(OkHttpDeeplClient {
proxyAuthenticator { _: Route?, response: Response ->
// https://square.github.io/okhttp/3.x/okhttp/index.html?okhttp3/Authenticator.html
if (response.challenges().any { it.scheme.equals("OkHttp-Preemptive", ignoreCase = true) }) {
response.request.newBuilder()
.header("Proxy-Authorization", credential)
.build()
} else {
null
}
}
}))
```
52 changes: 52 additions & 0 deletions translator/deepl-translate/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Copyright (C) 2017/2021 e-voyageurs technologies
~
~ Licensed under the Apache License, Version 2.0 (the "License");
~ you may not use this file except in compliance with the License.
~ You may obtain a copy of the License at
~
~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing, software
~ distributed under the License is distributed on an "AS IS" BASIS,
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->

<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>ai.tock</groupId>
<artifactId>tock-translator</artifactId>
<version>24.3.5-SNAPSHOT</version>
</parent>

<artifactId>tock-deepl-translate</artifactId>
<name>Tock Deepl Translator</name>
<description>Deepl translator implementation</description>

<dependencies>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-text</artifactId>
</dependency>
<dependency>
<groupId>ai.tock</groupId>
<artifactId>tock-translator-core</artifactId>
</dependency>
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>4.12.0</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</dependency>
</dependencies>

</project>
125 changes: 125 additions & 0 deletions translator/deepl-translate/src/main/kotlin/DeeplClient.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/*
* Copyright (C) 2017/2021 e-voyageurs technologies
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package ai.tock.translator.deepl

import ai.tock.shared.TockProxyAuthenticator
import ai.tock.shared.jackson.mapper
import ai.tock.shared.property
import ai.tock.shared.propertyOrNull
import com.fasterxml.jackson.module.kotlin.readValue
import java.io.IOException
import java.util.regex.Pattern
import okhttp3.FormBody
import okhttp3.OkHttpClient
import okhttp3.Request

internal data class TranslationResponse(
val translations: List<Translation>
)

internal data class Translation(
val text: String
)

const val TAG_HANDLING = "xml"

interface DeeplClient {
fun translate(
text: String,
sourceLang: String,
targetLang: String,
preserveFormatting: Boolean,
glossaryId: String?
): String?
}

class OkHttpDeeplClient(
private val apiURL: String = property("tock_translator_deepl_api_url", "https://api.deepl.com/v2/translate"),
private val apiKey: String? = propertyOrNull("tock_translator_deepl_api_key"),
okHttpCustomizer: OkHttpClient.Builder.() -> Unit = {}
) : DeeplClient {
private val client = OkHttpClient.Builder()
.apply(TockProxyAuthenticator::install)
.apply(okHttpCustomizer)
.build()

private fun replaceSpecificPlaceholders(text: String): Pair<String, List<String>> {
// Store original placeholders for later restoration
val placeholderPattern = Pattern.compile("\\{:([^}]*)}")
val matcher = placeholderPattern.matcher(text)

val placeholders = mutableListOf<String>()
while (matcher.find()) {
placeholders.add(matcher.group(1))
}

// Replace placeholders with '_PLACEHOLDER_'
val replacedText = matcher.replaceAll("_PLACEHOLDER_")

return Pair(replacedText, placeholders)
}

private fun revertSpecificPlaceholders(text: String, placeholders: List<String>): String {
var resultText = text
for (placeholder in placeholders) {
resultText = resultText.replaceFirst("_PLACEHOLDER_", "{:$placeholder}")
}
return resultText
}

override fun translate(
text: String,
sourceLang: String,
targetLang: String,
preserveFormatting: Boolean,
glossaryId: String?
): String? {
if (apiKey == null) return text

val (textWithPlaceholders, originalPlaceholders) = replaceSpecificPlaceholders(text)

val formBuilder = FormBody.Builder()

val requestBody = formBuilder
.add("text", textWithPlaceholders)
.add("source_lang", sourceLang)
.add("target_lang", targetLang)
.add("preserve_formatting", preserveFormatting.toString())
.add("tag_handling", TAG_HANDLING)
.build()

glossaryId?.let {
formBuilder.add("glossary_id", it)
}

val request = Request.Builder()
.url(apiURL)
.addHeader("Authorization", "DeepL-Auth-Key $apiKey")
.post(requestBody)
.build()

client.newCall(request).execute().use { response ->
if (!response.isSuccessful) throw IOException("Unexpected code $response")

val responseBody = response.body?.string()
val translationResponse = mapper.readValue<TranslationResponse>(responseBody!!)

val translatedText = translationResponse.translations.firstOrNull()?.text
return translatedText?.let { revertSpecificPlaceholders(it, originalPlaceholders) }
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright (C) 2017/2021 e-voyageurs technologies
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package ai.tock.translator.deepl

import ai.tock.shared.mapProperty
import ai.tock.shared.propertyOrNull
import ai.tock.translator.TranslatorEngine
import java.util.Locale
import org.apache.commons.text.StringEscapeUtils

internal class DeeplTranslatorEngine(client: DeeplClient) : TranslatorEngine {
private val deeplClient = client

private val supportedLanguages: Set<String>? = propertyOrNull("tock_translator_deepl_target_languages")?.split(",")?.map { it.trim() }?.toSet()
private val glossaryMapIds = mapProperty("tock_translator_deepl_glossary_map_ids", emptyMap())
override val supportAdminTranslation: Boolean = true

override fun translate(text: String, source: Locale, target: Locale): String {
var translatedTextHTML4 = ""
// Allows to filter translation on a specific language
if (supportedLanguages == null || supportedLanguages.contains(target.language)) {
val translatedText = deeplClient.translate(text, source.language, target.language, true, glossaryMapIds[target.language])
translatedTextHTML4 = StringEscapeUtils.unescapeHtml4(translatedText)
}
return translatedTextHTML4
}
}
31 changes: 31 additions & 0 deletions translator/deepl-translate/src/main/kotlin/DeeplTranslatorIoc.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
* Copyright (C) 2017/2021 e-voyageurs technologies
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package ai.tock.translator.deepl

import ai.tock.translator.TranslatorEngine
import com.github.salomonbrys.kodein.Kodein
import com.github.salomonbrys.kodein.bind
import com.github.salomonbrys.kodein.provider

/**
* The default Deepl translator module, for use in a Kodein injector.
*/
val deeplTranslatorModule = configureDeeplTranslatorModule()

fun configureDeeplTranslatorModule(client: DeeplClient = OkHttpDeeplClient()) = Kodein.Module {
bind<TranslatorEngine>(overrides = true) with provider { DeeplTranslatorEngine(client) }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Copyright (C) 2017/2021 e-voyageurs technologies
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package ai.tock.translator.deepl

import java.util.Locale
import kotlin.test.assertEquals
import org.junit.jupiter.api.Disabled
import org.junit.jupiter.api.Test

/**
* All these tests are disabled because it uses Deepl pro api that can be expensive
*/
class DeeplTranslateIntegrationTest {
private val deeplTranslatorEngine = DeeplTranslatorEngine(OkHttpDeeplClient())

@Test
@Disabled
fun simpleTest() {
val result = deeplTranslatorEngine.translate(
"Bonjour, je voudrais me rendre à New-York Mardi prochain",
Locale.FRENCH,
Locale.ENGLISH
)
assertEquals("Hello, I would like to go to New York next Tuesday.", result)
}

@Test
@Disabled
fun testWithEmoticonAndAntislash() {
val result = deeplTranslatorEngine.translate(
"Bonjour, je suis l'Agent virtuel SNCF Voyageurs! \uD83E\uDD16\n" +
"Je vous informe sur l'état du trafic en temps réel.\n" +
"Dites-moi par exemple \"Mon train 6111 est-il à l'heure ?\", \"Aller à Saint-Lazare\", \"Prochains départs Gare de Lyon\" ...",
Locale.FRENCH,
Locale.ENGLISH
)

assertEquals(
"Hello, I'm the SNCF Voyageurs Virtual Agent! \uD83E\uDD16\n" +
"I inform you about traffic conditions in real time.\n" +
"Tell me for example \"Is my train 6111 on time?\", \"Going to Saint-Lazare\", \"Next departures Gare de Lyon\" ...",
result
)
}

@Test
@Disabled
fun testWithParameters() {
val result = deeplTranslatorEngine.translate(
"Bonjour, je voudrais me rendre à {:city} {:date}",
Locale.FRENCH,
Locale.GERMAN
)
assertEquals("Hallo, ich würde gerne nach {:city} {:date} fahren.", result)
}

@Test
@Disabled
fun testWithHTML() {
val result = deeplTranslatorEngine.translate(
"Bonjour, je voudrais me rendre à Paris <br><br/> demain soir",
Locale.FRENCH,
Locale.GERMAN
)
assertEquals("Hallo, ich möchte morgen Abend nach Paris <br><br/> fahren", result)
}
}
1 change: 1 addition & 0 deletions translator/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
<module>core</module>
<module>noop</module>
<module>google-translate</module>
<module>deepl-translate</module>
</modules>

<dependencies>
Expand Down

0 comments on commit 27c7bad

Please sign in to comment.