From 354bb9a59e48cadf8b457c460feda7eff1c98492 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Auricoste?= Date: Tue, 28 Nov 2023 10:50:15 +0100 Subject: [PATCH] fix: lbac-1777: refactor import referentiel opco Constructys (#848) * fix: refactor import referentiel opco Constructys * Update server/src/jobs/lba_recruteur/opco/constructys/constructysImporter.ts Co-authored-by: Kevin Barnoin * fix: remove accents from emails --------- Co-authored-by: Kevin Barnoin --- server/src/commands.ts | 6 ++ .../referentielOpco/referentielOpco.schema.ts | 2 +- server/src/common/utils/fileUtils.ts | 4 +- server/src/jobs/jobs.ts | 3 + .../opco/constructys/constructysImporter.ts | 76 ++++++++++--------- server/src/services/constant.service.ts | 13 ---- server/src/services/etablissement.service.ts | 1 + server/src/services/opco.service.ts | 13 +++- shared/constants/recruteur.ts | 24 +++--- shared/models/referentielOpco.model.ts | 6 ++ shared/utils/index.ts | 1 + shared/utils/stringUtils.test.ts | 26 +++++++ shared/utils/stringUtils.ts | 1 + 13 files changed, 111 insertions(+), 65 deletions(-) create mode 100644 shared/utils/stringUtils.test.ts create mode 100644 shared/utils/stringUtils.ts diff --git a/server/src/commands.ts b/server/src/commands.ts index 354b710926..93cf0332f6 100644 --- a/server/src/commands.ts +++ b/server/src/commands.ts @@ -530,6 +530,12 @@ program .option("-q, --queued", "Run job asynchronously", false) .action(createJobAction("user-recruters:data-validation:fix")) +program + .command("import-referentiel-opco-constructys") + .description("Importe les emails pour la collection ReferentielOpco depuis l'opco Constructys") + .option("-q, --queued", "Run job asynchronously", false) + .action(createJobAction("referentiel-opco:constructys:import")) + export async function startCLI() { await program.parseAsync(process.argv) } diff --git a/server/src/common/model/schema/referentielOpco/referentielOpco.schema.ts b/server/src/common/model/schema/referentielOpco/referentielOpco.schema.ts index 748012f63d..fa103b611c 100644 --- a/server/src/common/model/schema/referentielOpco/referentielOpco.schema.ts +++ b/server/src/common/model/schema/referentielOpco/referentielOpco.schema.ts @@ -1,6 +1,6 @@ +import { OPCOS } from "shared/constants/recruteur" import { IReferentielOpco } from "shared/models" -import { OPCOS } from "../../../../services/constant.service" import { model, Schema } from "../../../mongodb" export const referentielOpcoSchema = new Schema( diff --git a/server/src/common/utils/fileUtils.ts b/server/src/common/utils/fileUtils.ts index 372b19861e..b3e904179f 100644 --- a/server/src/common/utils/fileUtils.ts +++ b/server/src/common/utils/fileUtils.ts @@ -1,7 +1,7 @@ import path from "path" import csvToJson from "convert-csv-to-json" -import { parse } from "csv-parse" +import { Options as CsvParseOptions, parse } from "csv-parse" import { isEmpty, pickBy } from "lodash-es" import XLSX from "xlsx" @@ -48,7 +48,7 @@ export const prepareMessageForMail = (data) => { return result ? result.replace(/\r\n|\r|\n/gi, "
") : result } -export const parseCsv = (options = {}) => { +export const parseCsv = (options: CsvParseOptions = {}) => { return parse({ trim: true, delimiter: ";", diff --git a/server/src/jobs/jobs.ts b/server/src/jobs/jobs.ts index 404d48e63d..db1312604d 100644 --- a/server/src/jobs/jobs.ts +++ b/server/src/jobs/jobs.ts @@ -33,6 +33,7 @@ import { updateAddressDetailOnRecruitersCollection } from "./lba_recruteur/formu import { updateMissingStartDate } from "./lba_recruteur/formulaire/misc/updateMissingStartDate" import { relanceFormulaire } from "./lba_recruteur/formulaire/relanceFormulaire" import { generateIndexes } from "./lba_recruteur/indexes/generateIndexes" +import { importReferentielOpcoFromConstructys } from "./lba_recruteur/opco/constructys/constructysImporter" import { relanceOpco } from "./lba_recruteur/opco/relanceOpco" import { createOffreCollection } from "./lba_recruteur/seed/createOffre" import { fillRecruiterRaisonSociale } from "./lba_recruteur/user/misc/fillRecruiterRaisonSociale" @@ -329,6 +330,8 @@ export async function runJob(job: IInternalJobsCronTask | IInternalJobsSimple): return fixRecruiterDataValidation() case "user-recruters:data-validation:fix": return fixUserRecruiterDataValidation() + case "referentiel-opco:constructys:import": + return importReferentielOpcoFromConstructys() /////// case "mongodb:indexes:create": return createMongoDBIndexes() diff --git a/server/src/jobs/lba_recruteur/opco/constructys/constructysImporter.ts b/server/src/jobs/lba_recruteur/opco/constructys/constructysImporter.ts index 18f7e12065..551920ac0c 100644 --- a/server/src/jobs/lba_recruteur/opco/constructys/constructysImporter.ts +++ b/server/src/jobs/lba_recruteur/opco/constructys/constructysImporter.ts @@ -1,26 +1,26 @@ import { createReadStream } from "fs" import path from "path" -import Joi from "joi" -import { filterData, oleoduc, transformData, writeData } from "oleoduc" +import { oleoduc, transformData, writeData } from "oleoduc" +import { removeAccents } from "shared" +import { OPCOS } from "shared/constants/recruteur" + +import { notifyToSlack } from "@/common/utils/slackUtils" +import { prepareReferentielOpcoForInsert } from "@/services/opco.service" import __dirname from "../../../../common/dirname" import { logger } from "../../../../common/logger" import { ReferentielOpco } from "../../../../common/model/index" import { fileDownloader, parseCsv } from "../../../../common/utils/fileUtils" import config from "../../../../config" -import { runScript } from "../../../scriptWrapper" - -const importer = async (filePath, remoteFileName, opco_label) => { - logger.info("Downloading file...") - await fileDownloader(filePath, remoteFileName, config.ftp.constructys) +const importer = async (filePath: string, opco_label: OPCOS) => { logger.info(`Deleting collection entries for ${opco_label}...`) await ReferentielOpco.deleteMany({ opco_label }) logger.info("Importing Data...") - const stat = { + const stats = { error: 0, total: 0, imported: 0, @@ -28,49 +28,53 @@ const importer = async (filePath, remoteFileName, opco_label) => { await oleoduc( createReadStream(filePath), - parseCsv(), - filterData((e) => e.Mails), + parseCsv({ delimiter: ";", encoding: "latin1" /* identique à ISO-8859-1 */ }), transformData((e) => { - const emails: string[] = [] - const { Siret, Mails } = e - - const emailsArray = Mails.split(/,|;| /).filter((x) => x) - const emailsArrayDuplicateFree = [...new Set(emailsArray)] - - for (const email of emailsArrayDuplicateFree) { - stat.total++ - const { error, value } = Joi.string().email().validate(email, { abortEarly: false }) - - if (error) { - stat.error++ - return - } - - stat.imported++ - emails.push(value) + const { Siret } = e + stats.total++ + const csvEmailStr = e["Email du contact"] + const emailsArray = removeAccents(csvEmailStr) + .split(/,|;| /) + .filter((x) => x) + const referentielOpt = prepareReferentielOpcoForInsert({ opco_label, siret_code: Siret, emails: emailsArray }) + if (referentielOpt) { + stats.imported++ + return referentielOpt + } else { + logger.error("could not import", { siret: Siret, emails: csvEmailStr }) + stats.error++ + return } - - return { siret_code: Siret, emails: [...new Set(emails)] } }), writeData( - async ({ siret_code, emails }) => { - await ReferentielOpco.create({ opco_label, siret_code, emails }) + async (referentiel) => { + const { siret_code } = referentiel + await ReferentielOpco.findOneAndUpdate({ siret_code }, { $set: referentiel }, { upsert: true }).lean() }, { parallel: 500 } ) ) logger.info("Data import done.") - return stat + await notifyToSlack({ + subject: "import referentiel opco Constructys", + message: `${stats.total} documents. ${stats.error} erreurs. ${stats.imported} mises à jour`, + error: stats.error > 0, + }) + return stats } -runScript(async () => { +export const importReferentielOpcoFromConstructys = async () => { logger.info("Constructys data import starting...") + + logger.info("Downloading file...") const dirname = __dirname(import.meta.url) const filePath = path.resolve(dirname, "./constructys-data.csv") const remoteFileName = "CTYS_MATCHA.csv" - const opco_label = "Constructys" + await fileDownloader(filePath, remoteFileName, config.ftp.constructys) - const result = await importer(filePath, remoteFileName, opco_label) + logger.info("Importing file...") + const opco_label = OPCOS.CONSTRUCTYS + const result = await importer(filePath, opco_label) return result -}) +} diff --git a/server/src/services/constant.service.ts b/server/src/services/constant.service.ts index 439b2fc034..97c050d9bf 100644 --- a/server/src/services/constant.service.ts +++ b/server/src/services/constant.service.ts @@ -34,19 +34,6 @@ export const REGEX = { GEO: /^(-?\d+(\.\d+)?),\s*(-?\d+(\.\d+)?)$/, TELEPHONE: /^[0-9]{10}$/, } -export const OPCOS = { - AFDAS: "AFDAS", - AKTO: "AKTO / Opco entreprises et salariés des services à forte intensité de main d'oeuvre", - ATLAS: "ATLAS", - CONSTRUCTYS: "Constructys", - OPCOMMERCE: "L'Opcommerce", - OCAPIAT: "OCAPIAT", - OPCO2I: "OPCO 2i", - EP: "Opco entreprises de proximité", - MOBILITE: "Opco Mobilités", - SANTE: "Opco Santé", - UNIFORMATION: "Uniformation, l'Opco de la Cohésion sociale", -} export const NIVEAUX_POUR_LBA = { INDIFFERENT: "Indifférent", diff --git a/server/src/services/etablissement.service.ts b/server/src/services/etablissement.service.ts index 75ef00d723..eb7a2453b6 100644 --- a/server/src/services/etablissement.service.ts +++ b/server/src/services/etablissement.service.ts @@ -324,6 +324,7 @@ export const getGeoCoordinates = async (adresse: string): Promise => { throw newError } } + /** * @description Get matching records from the ReferentielOpco collection for a given siret & email * @param {IReferentielOpco["siret_code"]} siretCode diff --git a/server/src/services/opco.service.ts b/server/src/services/opco.service.ts index a1a1d3ef4e..3a1bc63ea2 100644 --- a/server/src/services/opco.service.ts +++ b/server/src/services/opco.service.ts @@ -1,10 +1,11 @@ import memoize from "memoizee" +import { OPCOS } from "shared/constants/recruteur" +import { IReferentielOpco, ZReferentielOpcoInsert } from "shared/models" import { Opco } from "../common/model/index" import { IOpco } from "../common/model/schema/opco/opco.types" import { CFADOCK_FILTER_LIMIT, fetchOpcosFromCFADock } from "./cfadock.service" -import { OPCOS } from "./constant.service" /** * @description get opco from database collection OPCOS @@ -123,3 +124,13 @@ export const filterJobsByOpco = async ({ jobs, opco, opcoUrl }: { jobs: any[]; o return results } + +export const prepareReferentielOpcoForInsert = (referentiel: Omit) => { + if (ZReferentielOpcoInsert.safeParse(referentiel).success && referentiel.emails.length) { + const deduplicatedEmails = [...new Set(referentiel.emails)] + referentiel.emails = deduplicatedEmails + return referentiel + } else { + return false + } +} diff --git a/shared/constants/recruteur.ts b/shared/constants/recruteur.ts index 1e4393e09e..30bffd5c72 100644 --- a/shared/constants/recruteur.ts +++ b/shared/constants/recruteur.ts @@ -34,18 +34,18 @@ export const REGEX = { GEO: /^(-?\d+(\.\d+)?),\s*(-?\d+(\.\d+)?)$/, TELEPHONE: /^[0-9]{10}$/, } -export const OPCOS = { - AFDAS: "AFDAS", - AKTO: "AKTO / Opco entreprises et salariés des services à forte intensité de main d'oeuvre", - ATLAS: "ATLAS", - CONSTRUCTYS: "Constructys", - OPCOMMERCE: "L'Opcommerce", - OCAPIAT: "OCAPIAT", - OPCO2I: "OPCO 2i", - EP: "Opco entreprises de proximité", - MOBILITE: "Opco Mobilités", - SANTE: "Opco Santé", - UNIFORMATION: "Uniformation, l'Opco de la Cohésion sociale", +export enum OPCOS { + AFDAS = "AFDAS", + AKTO = "AKTO / Opco entreprises et salariés des services à forte intensité de main d'oeuvre", + ATLAS = "ATLAS", + CONSTRUCTYS = "Constructys", + OPCOMMERCE = "L'Opcommerce", + OCAPIAT = "OCAPIAT", + OPCO2I = "OPCO 2i", + EP = "Opco entreprises de proximité", + MOBILITE = "Opco Mobilités", + SANTE = "Opco Santé", + UNIFORMATION = "Uniformation, l'Opco de la Cohésion sociale", } export const NIVEAUX_POUR_LBA = { diff --git a/shared/models/referentielOpco.model.ts b/shared/models/referentielOpco.model.ts index 8623de41f5..85421021b6 100644 --- a/shared/models/referentielOpco.model.ts +++ b/shared/models/referentielOpco.model.ts @@ -13,4 +13,10 @@ export const ZReferentielOpco = z }) .strict() +export const ZReferentielOpcoInsert = ZReferentielOpco.pick({ + opco_label: true, + siret_code: true, + emails: true, +}) + export type IReferentielOpco = z.output diff --git a/shared/utils/index.ts b/shared/utils/index.ts index 9bfe69277e..313dfbace4 100644 --- a/shared/utils/index.ts +++ b/shared/utils/index.ts @@ -1 +1,2 @@ export * from "./assertUnreachable" +export * from "./stringUtils" diff --git a/shared/utils/stringUtils.test.ts b/shared/utils/stringUtils.test.ts new file mode 100644 index 0000000000..b24d8e976f --- /dev/null +++ b/shared/utils/stringUtils.test.ts @@ -0,0 +1,26 @@ +import { describe, it, expect } from "vitest" + +import { removeAccents } from "./stringUtils" + +describe("stringUtils", () => { + describe("removeAccents", () => { + it("should remove standard accents", () => { + expect(removeAccents("àâä")).toBe("aaa") + expect(removeAccents("éêëè")).toBe("eeee") + expect(removeAccents("ïî")).toBe("ii") + expect(removeAccents("ôö")).toBe("oo") + expect(removeAccents("üùû")).toBe("uuu") + expect(removeAccents("ÿŷ")).toBe("yy") + }) + it("should remove ç => c", () => { + expect(removeAccents("ç")).toBe("c") + }) + it("should remove accents from capital letters", () => { + expect(removeAccents("ÄÂÊËÏÎÔÖÛÜŸŶ")).toBe("AAEEIIOOUUYY") + }) + it("should not change standard characters", () => { + const unchanged = `&"'(-_)=$*µ$£%!§:/;.,?~#{}[]|^@\`` + expect(removeAccents(unchanged)).toBe(unchanged) + }) + }) +}) diff --git a/shared/utils/stringUtils.ts b/shared/utils/stringUtils.ts new file mode 100644 index 0000000000..357a0be925 --- /dev/null +++ b/shared/utils/stringUtils.ts @@ -0,0 +1 @@ +export const removeAccents = (str: string) => str.normalize("NFD").replace(/[\u0300-\u036f]/g, "")