From 8485a7c87f4172e331fe629c1c5e9d35e9ecafb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sat, 13 Aug 2022 16:35:44 +0000 Subject: [PATCH 001/163] Amorce v3 : setup DB, chargement TOPO --- bano/bin.py | 162 +------- bano/constants.py | 12 + bano/data/code_dir.json | 1 + bano/db.py | 6 +- bano/db_helpers.py | 40 -- bano/export.py | 195 ---------- bano/helpers.py | 180 +-------- bano/log_2_file.py | 34 -- bano/models.py | 357 ------------------ bano/outils_de_gestion.py | 45 --- bano/pre_process_suffixe.py | 101 ----- bano/publish.py | 86 ----- bano/setup_db.py | 8 + bano/sources/topo.py | 64 ++++ bano/sql.py | 31 ++ bano/sql/ban_hsnr_nocache.sql | 32 -- bano/sql/cadastre_2_place.sql | 7 - bano/sql/create_base.sql | 3 - bano/sql/create_table_base_bano_sources.sql | 47 +++ bano/sql/export_csv_dept.sql | 106 ------ bano/sql/export_json_dept_communes.sql | 35 -- bano/sql/export_json_dept_lieux_dits.sql | 66 ---- ...export_json_dept_voies_non_rapprochees.sql | 95 ----- .../export_json_dept_voies_rapprochees.sql | 89 ----- ...n_dept_voies_rapprochees_sans_adresses.sql | 59 --- bano/sql/geom_suffixes_insee.sql | 4 - bano/sql/highway_bbox_insee.sql | 14 - bano/sql/highway_insee.sql | 68 ---- bano/sql/highway_relation_bbox_insee.sql | 13 - bano/sql/highway_relation_insee.sql | 13 - bano/sql/highway_relation_suffixe_insee.sql | 18 - ...ay_relation_suffixe_insee_bbox_nocache.sql | 30 -- ...highway_relation_suffixe_insee_nocache.sql | 30 -- bano/sql/highway_suffixe_insee.sql | 49 --- bano/sql/highway_suffixe_insee_nocache.sql | 48 --- bano/sql/hsnr_bbox_insee.sql | 50 --- bano/sql/hsnr_bbox_insee_nocache.sql | 46 --- bano/sql/hsnr_insee.sql | 77 ---- bano/sql/hsnr_insee_nocache.sql | 73 ---- bano/sql/hsnr_suffixe_insee.sql | 76 ---- bano/sql/lieux_dits_post_process.sql | 32 -- bano/sql/load_code_cadastre.sql | 23 -- bano/sql/place_insee.sql | 29 -- bano/sql/point_par_rue_complement_insee.sql | 34 -- ...point_par_rue_complement_insee_nocache.sql | 35 -- bano/sql/point_par_rue_insee.sql | 52 --- bano/sql/point_par_rue_insee_nocache.sql | 51 --- bano/sql/replace_batiments.sql | 17 - bano/sql/replace_lieux_dits.sql | 16 - bano/sql/type_highway_insee.sql | 34 -- bano/sql/update_table_rep_b_as_bis.sql | 29 -- detection_suffixe.py | 25 -- load_COG.sh | 25 -- load_code_cadastre.py | 55 --- load_code_cadastre.sh | 2 - load_codes_postaux.sh | 11 - load_cumul.py | 161 -------- load_cumul_place.py | 77 ---- load_fantoir.sh | 12 - load_osm_france_db.sh | 24 -- load_population_INSEE.sh | 11 - munin/bano_main | 29 -- munin/bano_rapproche | 18 - out/LICENCE.txt | 24 -- out/banout-all.sh | 8 - out/banout-json-all.sh | 16 - out/banout-json.sh | 161 -------- out/banout.sh | 176 --------- out/csv2ttl.py | 48 --- out/head.sh | 6 - out/lisezmoi-bano.txt | 90 ----- sql/create_and_load_codes_postaux.sql | 29 -- sql/create_table_base_bano.sql | 139 ------- sql/create_table_base_osm.sql | 182 --------- sql/create_table_cog.sql | 57 --- sql/create_table_polygones_communes.sql | 37 -- sql/create_table_polygones_postaux.sql | 22 -- sql/create_table_population_insee.sql | 6 - sql/finalisation.sql | 6 - sql/load_expire_tiles.sql | 6 - sql/load_fantoir.sql | 34 -- sql/post_copie_ban_odbl.sql | 3 - sql/update_table_infos_communes.sql | 82 ---- update_bases_adresses_locales.sh | 15 - update_cadastre_adresses.sh | 15 - update_cadastre_batiments.sh | 22 -- update_cadastre_lieux-dits.sh | 31 -- update_table_infos_communes.sh | 7 - 88 files changed, 181 insertions(+), 4313 deletions(-) create mode 100644 bano/data/code_dir.json delete mode 100644 bano/db_helpers.py delete mode 100644 bano/export.py delete mode 100644 bano/log_2_file.py delete mode 100644 bano/models.py delete mode 100644 bano/outils_de_gestion.py delete mode 100644 bano/pre_process_suffixe.py delete mode 100644 bano/publish.py create mode 100644 bano/setup_db.py create mode 100644 bano/sources/topo.py create mode 100644 bano/sql.py delete mode 100644 bano/sql/ban_hsnr_nocache.sql delete mode 100644 bano/sql/cadastre_2_place.sql delete mode 100644 bano/sql/create_base.sql create mode 100644 bano/sql/create_table_base_bano_sources.sql delete mode 100644 bano/sql/export_csv_dept.sql delete mode 100644 bano/sql/export_json_dept_communes.sql delete mode 100644 bano/sql/export_json_dept_lieux_dits.sql delete mode 100644 bano/sql/export_json_dept_voies_non_rapprochees.sql delete mode 100644 bano/sql/export_json_dept_voies_rapprochees.sql delete mode 100644 bano/sql/export_json_dept_voies_rapprochees_sans_adresses.sql delete mode 100644 bano/sql/geom_suffixes_insee.sql delete mode 100644 bano/sql/highway_bbox_insee.sql delete mode 100644 bano/sql/highway_insee.sql delete mode 100644 bano/sql/highway_relation_bbox_insee.sql delete mode 100644 bano/sql/highway_relation_insee.sql delete mode 100644 bano/sql/highway_relation_suffixe_insee.sql delete mode 100644 bano/sql/highway_relation_suffixe_insee_bbox_nocache.sql delete mode 100644 bano/sql/highway_relation_suffixe_insee_nocache.sql delete mode 100644 bano/sql/highway_suffixe_insee.sql delete mode 100644 bano/sql/highway_suffixe_insee_nocache.sql delete mode 100644 bano/sql/hsnr_bbox_insee.sql delete mode 100644 bano/sql/hsnr_bbox_insee_nocache.sql delete mode 100644 bano/sql/hsnr_insee.sql delete mode 100644 bano/sql/hsnr_insee_nocache.sql delete mode 100644 bano/sql/hsnr_suffixe_insee.sql delete mode 100644 bano/sql/lieux_dits_post_process.sql delete mode 100644 bano/sql/load_code_cadastre.sql delete mode 100644 bano/sql/place_insee.sql delete mode 100644 bano/sql/point_par_rue_complement_insee.sql delete mode 100644 bano/sql/point_par_rue_complement_insee_nocache.sql delete mode 100644 bano/sql/point_par_rue_insee.sql delete mode 100644 bano/sql/point_par_rue_insee_nocache.sql delete mode 100644 bano/sql/replace_batiments.sql delete mode 100644 bano/sql/replace_lieux_dits.sql delete mode 100644 bano/sql/type_highway_insee.sql delete mode 100644 bano/sql/update_table_rep_b_as_bis.sql delete mode 100755 detection_suffixe.py delete mode 100755 load_COG.sh delete mode 100755 load_code_cadastre.py delete mode 100755 load_code_cadastre.sh delete mode 100755 load_codes_postaux.sh delete mode 100755 load_cumul.py delete mode 100755 load_cumul_place.py delete mode 100755 load_fantoir.sh delete mode 100755 load_osm_france_db.sh delete mode 100755 load_population_INSEE.sh delete mode 100755 munin/bano_main delete mode 100755 munin/bano_rapproche delete mode 100644 out/LICENCE.txt delete mode 100755 out/banout-all.sh delete mode 100755 out/banout-json-all.sh delete mode 100644 out/banout-json.sh delete mode 100644 out/banout.sh delete mode 100644 out/csv2ttl.py delete mode 100755 out/head.sh delete mode 100644 out/lisezmoi-bano.txt delete mode 100644 sql/create_and_load_codes_postaux.sql delete mode 100644 sql/create_table_base_bano.sql delete mode 100644 sql/create_table_base_osm.sql delete mode 100644 sql/create_table_cog.sql delete mode 100644 sql/create_table_polygones_communes.sql delete mode 100644 sql/create_table_polygones_postaux.sql delete mode 100644 sql/create_table_population_insee.sql delete mode 100644 sql/finalisation.sql delete mode 100644 sql/load_expire_tiles.sql delete mode 100644 sql/load_fantoir.sql delete mode 100644 sql/post_copie_ban_odbl.sql delete mode 100644 sql/update_table_infos_communes.sql delete mode 100755 update_bases_adresses_locales.sh delete mode 100755 update_cadastre_adresses.sh delete mode 100755 update_cadastre_batiments.sh delete mode 100755 update_cadastre_lieux-dits.sh delete mode 100755 update_table_infos_communes.sh diff --git a/bano/bin.py b/bano/bin.py index d8ceb44..dbc3691 100644 --- a/bano/bin.py +++ b/bano/bin.py @@ -1,10 +1,11 @@ +#!/usr/bin/env python +# coding: UTF-8 + import argparse import sys -from .constants import DEPARTEMENTS -from . import core, core_place, pre_process_suffixe, export, publish -from . import update_manager -from .sources import bal, cadastre_gouv, cadastre_json, ban +from . import setup_db +from .sources import topo def main(): @@ -14,157 +15,16 @@ def main(): subparsers = parser.add_subparsers(help="Commandes disponibles") subparser = subparsers.add_parser( - "process_commune", - help="Traite unitairement les adresses d'une commune", - description="Traite unitairement les adresses d'une commune", - ) - subparser.add_argument( - "source", - choices=["OSM", "CADASTRE", "BAL", "BAN"], - type=str, - help="Source des données à traiter", - ) - group = subparser.add_mutually_exclusive_group(required=True) - group.add_argument( - "--code_insee", type=str, help="Code INSEE de la commune à traiter" - ) - group.add_argument("--depts", type=str, help="Départements à traiter (toutes les communes de chaque dept sont traitées une par une)", nargs="*") - group.add_argument("--France", help="Raccourci pour tous les départements d'un coup", action="store_const", const=DEPARTEMENTS) - subparser.set_defaults(func=core.process) - - subparser = subparsers.add_parser( - "process_commune_lieux-dits", - help="Traite unitairement les lieux-dits d'une commune", - description="Traite unitairement les lieux-dits d'une commune" - ) - group = subparser.add_mutually_exclusive_group(required=True) - group.add_argument( - "--code_insee", type=str, help="Code INSEE de la commune à traiter" - ) - group.add_argument("--depts", type=str, help="Départements à traiter (toutes les communes de chaque dept sont traitées une par une)", nargs="*") - group.add_argument("--France", help="Raccourci pour tous les départements d'un coup", action="store_const", const=DEPARTEMENTS) - subparser.set_defaults(func=core_place.process) - - subparser = subparsers.add_parser( - "download_bal", - help="Met à jour les fichiers d'adresses au format BAL", - description="Met à jour les fichiers d'adresses au format BAL", - ) - subparser.add_argument( - "source", - choices=["CADASTRE", "BAL"], - type=str, - help="Source des données à traiter", - ) - subparser.add_argument( - "departements", - type=str, - help="Départements à traiter", - nargs="*", - default=DEPARTEMENTS, - ) - subparser.set_defaults(func=bal.process) - - subparser = subparsers.add_parser( - "download_ban", - help="Met à jour les fichiers d'adresses de source BAN", - description="Met à jour les fichiers d'adresses de source BAN", - ) - subparser.add_argument( - "departements", - type=str, - help="Départements à traiter", - nargs="*", - default=DEPARTEMENTS, - ) - subparser.set_defaults(func=ban.process) - - subparser = subparsers.add_parser( - "download_cadastre", - help="Met à jour les fichiers du cadastre hors adresses au format JSON", - description="Met à jour les fichiers du cadastre hors adresses au format JSON", - ) - subparser.add_argument( - "prefixe", choices=["lieux_dits"], type=str, help="Source des données à traiter" - ) - subparser.add_argument( - "departements", - type=str, - help="Départements à traiter", - nargs="*", - default=DEPARTEMENTS, - ) - subparser.set_defaults(func=cadastre_json.process) - - subparser = subparsers.add_parser( - "update_code_cadastre", - help="Met à jour la liste des communes d'après cadastre.gouv.fr - indique le format du Cadastre", - description="Met à jour la liste des communes d'après cadastre.gouv.fr - indique le format du Cadastre", - ) - subparser.set_defaults(func=cadastre_gouv.process) - - subparser = subparsers.add_parser( - "update_bis_table", - help="Identifie les indices de répétition b,t,q assimilables à bis, ter, quater", - description="Identifie les indices de répétition b,t,q assimilables à bis, ter, quater", - ) - subparser.set_defaults(func=ban.update_bis_table) - - subparser = subparsers.add_parser( - "pre_process_suffixe", - help="Détermine les zones où les noms dans le Cadastre sont suffixés", - description="Détermine les zones où les noms dans le Cadastre sont suffixés", - ) - subparser.add_argument( - "departements", - type=str, - help="Départements à traiter", - nargs="*", - default=DEPARTEMENTS, - ) - subparser.set_defaults(func=pre_process_suffixe.process) - - subparser = subparsers.add_parser( - "export", - help="Export par département dans différents formats", - description="Export par département dans différents formats", - ) - subparser.add_argument( - "departements", - type=str, - help="Départements à traiter", - nargs="*", - default=DEPARTEMENTS, - ) - subparser.set_defaults(func=export.process) - - subparser = subparsers.add_parser( - "publish", - help="Publication des exports dans le dossier web de https://bano.openstreetmap.fr/data - fichiers par département", - description="Publication des exports dans le dossier web de https://bano.openstreetmap.fr/data - fichiers par département", - ) - subparser.add_argument( - "departements", - type=str, - help="Départements à traiter", - nargs="*", - default=DEPARTEMENTS, - ) - subparser.set_defaults(func=publish.process) - - subparser = subparsers.add_parser( - "publish_aggregate", - help="Publication des exports dans un dossier web - fichiers France entière", - description="Publication des exports dans un dossier web - fichiers France entière", + "setup_db_bano_sources", + description="Initialisation de la BD des sources : OSM, BAN, TOPO", ) - subparser.set_defaults(func=publish.process_full) + subparser.set_defaults(func=setup_db.setup_bano_sources) subparser = subparsers.add_parser( - "update_insee_lists", - help="Détermine les communes mises à jour grace aux tuiles impactées", - description="Détermine les communes mises à jour grace aux tuiles impactées", + "charge_topo", + description="Charge une version du fichier TOPO", ) - subparser.set_defaults(func=update_manager.update_insee_lists) + subparser.set_defaults(func=topo.import_to_pg) args = parser.parse_args() diff --git a/bano/constants.py b/bano/constants.py index c7ef48d..5d63f28 100644 --- a/bano/constants.py +++ b/bano/constants.py @@ -1,3 +1,7 @@ +#!/usr/bin/env python +# coding: UTF-8 + +import json from pathlib import Path @@ -5,6 +9,12 @@ def load_pairs_from_file(basename): with (Path(__file__).parent / "dictionnaires" / f"{basename}.txt").open() as f: return [l[:-1].split("\t") for l in f.readlines() if not l.startswith("#")] +def load_json_from_file(json_filename): + with (Path(__file__).parent / "data" / json_filename).open() as f: + return json.loads(f.read()) + +def get_const_code_dir(): + return load_json_from_file('code_dir.json') DEPARTEMENTS = [ f"{n:>02}" @@ -72,3 +82,5 @@ def load_pairs_from_file(basename): "motorway", ] HIGHWAY_TYPES_INDEX = {e: 2 ** i for i, e in enumerate(HIGHWAY_TYPES)} + +CODE_VOIE_FANTOIR = "0123456789ABCDEFGHIJKLMNOPQRSTVWXYZ" diff --git a/bano/data/code_dir.json b/bano/data/code_dir.json new file mode 100644 index 0000000..341bff5 --- /dev/null +++ b/bano/data/code_dir.json @@ -0,0 +1 @@ +{"13001":"2","13002":"1","13003":"2","13004":"2","13005":"1","13006":"2","13007":"1","13008":"2","13009":"2","13010":"2","13011":"2","13012":"2","13013":"1","13014":"2","13015":"2","13016":"1","13017":"2","13018":"2","13019":"2","13020":"1","13021":"2","13022":"1","13023":"1","13024":"2","13025":"2","13026":"2","13027":"2","13028":"1","13029":"2","13030":"1","13031":"1","13032":"2","13033":"2","13034":"2","13035":"2","13036":"2","13037":"2","13038":"2","13039":"2","13040":"2","13041":"2","13042":"1","13043":"2","13044":"2","13045":"2","13046":"1","13047":"2","13048":"2","13049":"2","13050":"2","13051":"2","13052":"2","13053":"2","13054":"2","13056":"2","13057":"2","13058":"2","13059":"2","13060":"2","13061":"2","13062":"2","13063":"2","13064":"2","13065":"2","13066":"2","13067":"2","13068":"2","13069":"2","13070":"1","13071":"2","13072":"2","13073":"1","13074":"2","13075":"1","13076":"2","13077":"2","13078":"2","13079":"2","13080":"2","13081":"2","13082":"2","13083":"2","13084":"2","13085":"1","13086":"1","13087":"2","13088":"2","13089":"2","13090":"2","13091":"2","13092":"2","13093":"2","13094":"2","13095":"2","13096":"2","13097":"2","13098":"2","13099":"2","13100":"2","13101":"1","13102":"2","13103":"2","13104":"2","13105":"2","13106":"2","13107":"2","13108":"2","13109":"2","13110":"2","13111":"2","13112":"2","13113":"2","13114":"2","13115":"2","13116":"2","13117":"2","13118":"2","13119":"1","13201":"1","13202":"1","13203":"1","13204":"1","13205":"1","13206":"1","13207":"1","13208":"1","13209":"1","13210":"1","13211":"1","13212":"1","13213":"1","13214":"1","13215":"1","13216":"1","13331":"1","13332":"1","13333":"1","13334":"1","13335":"1","13336":"1","13337":"1","13338":"1","13339":"1","13340":"1","13341":"1","13342":"1","13343":"1","13344":"1","13345":"1","13346":"1","59001":"2","59002":"2","59003":"2","59004":"1","59005":"1","59006":"2","59007":"1","59008":"1","59009":"1","59010":"2","59011":"1","59012":"2","59013":"1","59014":"2","59015":"1","59016":"1","59017":"1","59018":"1","59019":"2","59021":"2","59022":"1","59023":"2","59024":"1","59025":"1","59026":"1","59027":"2","59028":"1","59029":"1","59031":"2","59032":"2","59033":"2","59034":"1","59035":"2","59036":"2","59037":"2","59038":"2","59039":"2","59041":"2","59042":"1","59043":"1","59044":"1","59045":"2","59046":"1","59047":"2","59048":"2","59049":"2","59050":"2","59051":"1","59052":"1","59053":"2","59054":"1","59055":"2","59056":"1","59057":"2","59058":"2","59059":"2","59060":"2","59061":"2","59062":"2","59063":"2","59064":"2","59065":"2","59066":"2","59067":"1","59068":"2","59069":"2","59070":"2","59071":"1","59072":"2","59073":"1","59074":"2","59075":"2","59076":"2","59077":"2","59078":"2","59079":"2","59080":"1","59081":"2","59082":"1","59083":"1","59084":"1","59085":"2","59086":"1","59087":"1","59088":"1","59089":"1","59090":"1","59091":"1","59092":"2","59093":"2","59094":"1","59096":"1","59097":"2","59098":"1","59099":"2","59100":"2","59101":"2","59102":"2","59103":"2","59104":"2","59105":"1","59106":"1","59107":"1","59108":"2","59109":"2","59110":"1","59111":"1","59112":"2","59113":"1","59114":"2","59115":"1","59116":"2","59117":"1","59118":"2","59119":"1","59120":"1","59121":"2","59122":"2","59123":"1","59124":"1","59125":"2","59126":"1","59127":"2","59128":"1","59129":"1","59130":"1","59131":"1","59132":"2","59133":"1","59134":"2","59135":"1","59136":"2","59137":"2","59138":"2","59139":"2","59140":"2","59141":"2","59142":"2","59143":"1","59144":"2","59145":"1","59146":"1","59147":"2","59148":"2","59149":"2","59150":"1","59151":"2","59152":"1","59153":"2","59154":"1","59155":"1","59156":"1","59157":"2","59158":"1","59159":"1","59160":"2","59161":"2","59162":"1","59163":"1","59164":"2","59165":"1","59166":"2","59167":"2","59168":"1","59169":"2","59170":"1","59171":"2","59172":"2","59173":"1","59174":"2","59175":"2","59176":"2","59177":"2","59178":"1","59179":"2","59180":"1","59181":"2","59182":"1","59183":"1","59184":"1","59185":"1","59186":"2","59187":"2","59188":"2","59189":"1","59190":"2","59191":"2","59192":"2","59193":"1","59194":"2","59195":"1","59196":"1","59197":"1","59198":"2","59199":"1","59200":"1","59201":"1","59202":"1","59203":"1","59204":"2","59205":"2","59206":"2","59207":"2","59208":"1","59209":"2","59210":"1","59211":"1","59212":"1","59213":"2","59214":"1","59215":"2","59216":"2","59217":"2","59218":"2","59219":"2","59220":"1","59221":"2","59222":"1","59223":"2","59224":"1","59225":"2","59226":"2","59227":"1","59228":"1","59229":"2","59230":"2","59231":"2","59232":"2","59233":"2","59234":"1","59236":"2","59237":"1","59238":"2","59239":"1","59240":"2","59241":"2","59242":"2","59243":"2","59244":"2","59246":"2","59247":"1","59248":"1","59249":"2","59250":"1","59251":"2","59252":"1","59253":"2","59254":"1","59255":"2","59256":"1","59257":"1","59258":"1","59259":"2","59260":"1","59261":"2","59262":"1","59263":"1","59264":"2","59265":"2","59266":"1","59267":"2","59268":"1","59269":"2","59270":"2","59271":"1","59272":"1","59273":"1","59274":"2","59275":"1","59276":"1","59277":"2","59278":"1","59279":"1","59280":"1","59281":"1","59282":"1","59283":"2","59284":"2","59285":"2","59286":"1","59287":"2","59288":"2","59289":"2","59290":"2","59291":"2","59292":"2","59293":"1","59294":"2","59295":"1","59296":"2","59297":"2","59299":"1","59300":"2","59301":"2","59302":"2","59303":"1","59304":"1","59305":"1","59306":"2","59307":"1","59308":"1","59309":"1","59310":"2","59311":"2","59312":"2","59313":"2","59314":"1","59315":"2","59316":"1","59317":"1","59318":"1","59319":"1","59320":"1","59321":"2","59322":"2","59323":"2","59324":"2","59325":"2","59326":"1","59327":"1","59328":"1","59329":"1","59330":"1","59331":"2","59332":"1","59333":"2","59334":"1","59335":"2","59336":"1","59337":"1","59338":"1","59339":"1","59340":"1","59341":"2","59342":"2","59343":"1","59344":"2","59345":"1","59346":"1","59347":"2","59348":"2","59349":"2","59350":"1","59351":"2","59352":"1","59353":"2","59354":"1","59355":"1","59356":"1","59357":"2","59358":"1","59359":"1","59360":"1","59361":"2","59363":"2","59364":"1","59365":"2","59366":"1","59367":"1","59368":"1","59369":"2","59370":"2","59371":"1","59372":"2","59374":"2","59375":"1","59377":"2","59378":"1","59379":"1","59381":"2","59382":"2","59383":"2","59384":"2","59385":"2","59386":"1","59387":"2","59388":"1","59389":"2","59390":"1","59391":"2","59392":"2","59393":"2","59394":"2","59395":"2","59396":"2","59397":"1","59398":"1","59399":"1","59400":"1","59401":"1","59402":"1","59403":"2","59404":"1","59405":"2","59406":"2","59407":"2","59408":"1","59409":"1","59410":"1","59411":"1","59412":"2","59413":"2","59414":"1","59415":"2","59416":"1","59418":"2","59419":"1","59420":"2","59421":"1","59422":"2","59423":"1","59424":"2","59425":"2","59426":"1","59427":"1","59428":"2","59429":"2","59430":"2","59431":"1","59432":"2","59433":"1","59434":"2","59435":"1","59436":"1","59437":"1","59438":"2","59439":"2","59440":"2","59441":"2","59442":"2","59443":"1","59444":"2","59445":"2","59446":"2","59447":"2","59448":"1","59449":"1","59450":"2","59451":"2","59452":"1","59453":"1","59454":"1","59455":"2","59456":"1","59457":"1","59458":"1","59459":"2","59461":"2","59462":"1","59463":"1","59464":"2","59465":"2","59466":"1","59467":"2","59468":"2","59469":"1","59470":"1","59471":"2","59472":"2","59473":"2","59474":"2","59475":"2","59476":"2","59477":"1","59478":"1","59479":"2","59480":"2","59481":"2","59482":"1","59483":"2","59484":"2","59485":"2","59486":"1","59487":"1","59488":"2","59489":"1","59490":"2","59491":"2","59492":"2","59493":"2","59494":"2","59495":"2","59496":"2","59497":"1","59498":"2","59499":"1","59500":"2","59501":"1","59502":"2","59503":"2","59504":"2","59505":"2","59506":"2","59507":"1","59508":"1","59509":"1","59511":"2","59512":"1","59513":"1","59514":"2","59515":"2","59516":"1","59517":"2","59518":"2","59519":"2","59520":"2","59521":"2","59522":"1","59523":"1","59524":"1","59525":"2","59526":"2","59527":"1","59528":"2","59529":"2","59530":"2","59531":"2","59532":"1","59533":"2","59534":"2","59535":"1","59536":"1","59537":"2","59538":"1","59539":"1","59540":"1","59541":"2","59542":"2","59543":"2","59544":"2","59545":"2","59546":"1","59547":"2","59548":"2","59549":"2","59550":"1","59551":"1","59552":"2","59553":"1","59554":"2","59555":"2","59556":"2","59557":"2","59558":"2","59559":"2","59560":"1","59562":"2","59563":"2","59564":"2","59565":"2","59566":"1","59567":"2","59568":"1","59569":"1","59570":"1","59571":"2","59572":"2","59573":"2","59574":"1","59575":"2","59576":"1","59577":"1","59578":"1","59579":"1","59580":"1","59581":"1","59582":"1","59583":"2","59584":"2","59585":"1","59586":"1","59587":"1","59588":"1","59589":"2","59590":"1","59591":"2","59592":"1","59593":"2","59594":"2","59595":"2","59596":"1","59597":"2","59598":"1","59599":"1","59600":"1","59601":"2","59602":"1","59603":"2","59604":"2","59605":"1","59606":"2","59607":"2","59608":"2","59609":"1","59610":"2","59611":"1","59612":"2","59613":"2","59614":"2","59615":"1","59616":"2","59617":"2","59618":"2","59619":"2","59620":"1","59622":"2","59623":"2","59624":"2","59625":"2","59626":"2","59627":"2","59628":"1","59629":"1","59630":"1","59631":"2","59632":"2","59633":"2","59634":"1","59635":"2","59636":"1","59637":"1","59638":"1","59639":"2","59640":"2","59641":"1","59642":"1","59643":"1","59645":"2","59646":"1","59647":"1","59648":"1","59649":"2","59650":"1","59651":"2","59652":"2","59653":"1","59654":"1","59655":"1","59656":"1","59657":"1","59658":"1","59659":"2","59660":"1","59661":"2","59662":"1","59663":"1","59664":"1","59665":"1","59666":"1","59667":"1","59668":"1","59669":"1","59670":"1","75001":"4","75002":"4","75003":"4","75004":"4","75005":"8","75006":"8","75007":"7","75008":"6","75009":"4","75010":"4","75011":"5","75012":"5","75013":"8","75014":"8","75015":"7","75016":"7","75017":"6","75018":"6","75019":"5","75020":"5","75101":"4","75102":"4","75103":"4","75104":"4","75105":"8","75106":"8","75107":"7","75108":"6","75109":"4","75110":"4","75111":"5","75112":"5","75113":"8","75114":"8","75115":"7","75116":"7","75117":"6","75118":"6","75119":"5","75120":"5","92002":"2","92004":"1","92007":"2","92009":"1","92012":"2","92014":"2","92019":"2","92020":"2","92022":"2","92023":"2","92024":"1","92025":"1","92026":"1","92032":"2","92033":"2","92035":"1","92036":"1","92040":"2","92044":"1","92046":"2","92047":"2","92048":"2","92049":"2","92050":"1","92051":"1","92060":"2","92062":"1","92063":"1","92064":"2","92071":"2","92072":"2","92073":"1","92075":"2","92076":"2","92077":"2","92078":"1","97101":"1","97102":"1","97103":"1","97104":"1","97105":"1","97106":"1","97107":"1","97108":"1","97109":"1","97110":"1","97111":"1","97112":"1","97113":"1","97114":"1","97115":"1","97116":"1","97117":"1","97118":"1","97119":"1","97120":"1","97121":"1","97122":"1","97123":"1","97124":"1","97125":"1","97126":"1","97127":"1","97128":"1","97129":"1","97130":"1","97131":"1","97132":"1","97133":"1","97134":"1","97201":"2","97202":"2","97203":"2","97204":"2","97205":"2","97206":"2","97207":"2","97208":"2","97209":"2","97210":"2","97211":"2","97212":"2","97213":"2","97214":"2","97215":"2","97216":"2","97217":"2","97218":"2","97219":"2","97220":"2","97221":"2","97222":"2","97223":"2","97224":"2","97225":"2","97226":"2","97227":"2","97228":"2","97229":"2","97230":"2","97231":"2","97232":"2","97233":"2","97234":"2","97301":"3","97302":"3","97303":"3","97304":"3","97305":"3","97306":"3","97307":"3","97308":"3","97309":"3","97310":"3","97311":"3","97312":"3","97313":"3","97314":"3","97352":"3","97353":"3","97356":"3","97357":"3","97358":"3","97360":"3","97361":"3","97362":"3","97401":"4","97402":"4","97403":"4","97404":"4","97405":"4","97406":"4","97407":"4","97408":"4","97409":"4","97410":"4","97411":"4","97412":"4","97413":"4","97414":"4","97415":"4","97416":"4","97417":"4","97418":"4","97419":"4","97420":"4","97421":"4","97422":"4","97423":"4","97424":"4","97601":"6","97602":"6","97603":"6","97604":"6","97605":"6","97606":"6","97607":"6","97608":"6","97609":"6","97610":"6","97611":"6","97612":"6","97613":"6","97614":"6","97615":"6","97616":"6","97617":"6","99999":"9"} \ No newline at end of file diff --git a/bano/db.py b/bano/db.py index e04e59a..270f940 100644 --- a/bano/db.py +++ b/bano/db.py @@ -3,6 +3,6 @@ import psycopg2 import psycopg2.extras -bano = psycopg2.connect(os.environ.get("BANO_PG", "dbname='cadastre' user='cadastre'")) -bano_cache = psycopg2.connect(os.environ.get("BANO_PG_CACHE", "dbname='osm' user='cadastre'")) -psycopg2.extras.register_hstore(bano_cache) +# bano = psycopg2.connect(os.environ.get("BANO_PG", "dbname='cadastre' user='cadastre'")) +bano_sources = psycopg2.connect(os.environ.get("BANO_PG_CACHE", "dbname='bano_sources' user='cadastre'")) +# psycopg2.extras.register_hstore(bano_cache) diff --git a/bano/db_helpers.py b/bano/db_helpers.py deleted file mode 100644 index 37160ec..0000000 --- a/bano/db_helpers.py +++ /dev/null @@ -1,40 +0,0 @@ -import time -import os - -from . import db - - -def get_insee_name_list_by_dept(dept): - with db.bano.cursor() as conn : - conn.execute(f"""SELECT com, ncc - FROM cog_commune c - LEFT OUTER JOIN (SELECT comparent FROM cog_commune WHERE dep = '{dept}' AND typecom = 'ARM') p - ON (c.com = p.comparent) - WHERE c.dep = '{dept}' AND c.typecom != 'COMD' AND p.comparent IS NULL - ORDER BY 1""") - return conn.fetchall() - -def get_insee_name(insee_com): - with db.bano.cursor() as conn : - conn.execute(f"SELECT insee_com, nom_com FROM code_cadastre WHERE insee_com = '{insee_com}';") - return conn.fetchall() - -def get_cadastre_format(insee_com): - with db.bano.cursor() as conn : - conn.execute(f"SELECT format_cadastre FROM code_cadastre WHERE insee_com = '{insee_com}';") - return conn.fetchone()[0] - -def age_etape_dept(etape,dept): - cur = db.bano.cursor() - str_query = 'SELECT timestamp_debut FROM batch WHERE etape = \'{:s}\' AND dept = \'{:s}\' UNION ALL SELECT 0 ORDER BY 1 DESC;'.format(etape,dept) - cur.execute(str_query) - c = cur.fetchone() - return round(time.mktime(time.localtime()),0) - c[0] - -def process_sql(database,query_name,dict_args): - with open(os.path.join(os.path.dirname(os.path.abspath(__file__)),'sql/{:s}.sql'.format(query_name)),'r') as fq: - str_query = fq.read() - for k,v in dict_args.items(): - str_query = str_query.replace(k,str(v)) - with database.cursor() as cur : - cur.execute(str_query) diff --git a/bano/export.py b/bano/export.py deleted file mode 100644 index 93bd0df..0000000 --- a/bano/export.py +++ /dev/null @@ -1,195 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import csv -import json -import os -import sys -import subprocess - -from pathlib import Path - -from . import constants, db -from . import helpers as hp - -class Dataset: - def __init__(self, dept): - self.dept = dept - self.csv_query = self.get_csv_query() - self.csv_data = None - self.json_commune_query = self.get_json_commune_query() - self.json_commune_data = None - self.json_voies_rapprochees_query = self.get_json_voies_rapprochees_query() - self.json_voies_rapprochees_data = None - self.json_voies_rapprochees_sans_adresses_query = self.get_json_voies_rapprochees_sans_adresses_query() - self.json_voies_rapprochees_sans_adresses_data = None - self.json_voies_non_rapprochees_query = self.get_json_voies_non_rapprochees_query() - self.json_voies_non_rapprochees_data = None - self.json_lieux_dits_query = self.get_json_lieux_dits_query() - self.json_lieux_dits_data = None - - def get_csv_query(self): - with open(os.path.join(os.path.dirname(os.path.abspath(__file__)),'sql/export_csv_dept.sql'),'r') as fq: - return fq.read().replace('__dept__',self.dept) - - def get_csv_data(self): - with db.bano.cursor() as cur: - cur.execute(self.csv_query) - return cur.fetchall() - - def get_json_commune_query(self): - with open(os.path.join(os.path.dirname(os.path.abspath(__file__)),'sql/export_json_dept_communes.sql'),'r') as fq: - return fq.read().replace('__dept__',self.dept) - - def get_json_commune_data(self): - with db.bano.cursor() as cur: - cur.execute(self.json_commune_query) - return cur.fetchall() - - def get_json_voies_non_rapprochees_query(self): - with open(os.path.join(os.path.dirname(os.path.abspath(__file__)),'sql/export_json_dept_voies_non_rapprochees.sql'),'r') as fq: - return fq.read().replace('__dept__',self.dept) - - def get_json_voies_non_rapprochees_data(self): - with db.bano.cursor() as cur: - cur.execute(self.json_voies_non_rapprochees_query) - return cur.fetchall() - - def get_json_voies_rapprochees_query(self): - with open(os.path.join(os.path.dirname(os.path.abspath(__file__)),'sql/export_json_dept_voies_rapprochees.sql'),'r') as fq: - return fq.read().replace('__dept__',self.dept) - - def get_json_voies_rapprochees_sans_adresses_data(self): - with db.bano.cursor() as cur: - cur.execute(self.json_voies_rapprochees_sans_adresses_query) - return cur.fetchall() - - def get_json_voies_rapprochees_sans_adresses_query(self): - with open(os.path.join(os.path.dirname(os.path.abspath(__file__)),'sql/export_json_dept_voies_rapprochees_sans_adresses.sql'),'r') as fq: - return fq.read().replace('__dept__',self.dept) - - def get_json_voies_rapprochees_data(self): - with db.bano.cursor() as cur: - cur.execute(self.json_voies_rapprochees_query) - return cur.fetchall() - - def get_json_lieux_dits_query(self): - with open(os.path.join(os.path.dirname(os.path.abspath(__file__)),'sql/export_json_dept_lieux_dits.sql'),'r') as fq: - return fq.read().replace('__dept__',self.dept) - - def get_json_lieux_dits_data(self): - with db.bano.cursor() as cur: - cur.execute(self.json_lieux_dits_query) - return cur.fetchall() - - def get_target_filename(self,filetype): - return f'bano-{self.dept}.{filetype}' - - def get_sas_full_filename(self,filetype): - return Path(os.environ['EXPORT_SAS_DIR']) / self.get_target_filename(filetype) - - def get_webdir_full_filename(self,filetype): - return Path(os.environ['EXPORT_WEB_DIR']) / self.get_target_filename(filetype) - - def save_as_csv(self): - if not self.csv_data : - self.csv_data = self.get_csv_data() - with open(self.get_sas_full_filename('csv'),'w', newline='') as csvfile: - writer = csv.writer(csvfile,dialect='unix',quoting=csv.QUOTE_MINIMAL) - writer.writerows([l[0:-1] for l in self.csv_data]) - - def save_as_ttl(self): - if not self.csv_data : - self.csv_data = self.get_csv_data() - with open(self.get_sas_full_filename('ttl'),'w') as ttlfile: - ttlfile.write(f"""@prefix xsd: . -@prefix locn: . -@prefix gn: . -@prefix prov: . -@prefix gsp: . -@prefix geo: . -@prefix rdfs: . -@prefix dcat: . -@prefix foaf: . -@prefix dcterms: . - - a dcat:Catalog ; -\tdcterms:title "Donnees des adresses du projet BANO (Base Adresse Nationale Ouverte) en RDF"@fr ; -\tdcterms:description "Le projet BANO en RDF de Base d\'Adresses Nationale Ouverte initie par OpenStreetMap France."@fr ; -\tfoaf:homepage ; -\tdcterms:language "fr" ; -\tdcterms:license ; -\tdcterms:publisher ; #url openstreetmap France -\tdcterms:issued "2014-05-14"^^xsd:date ; # data issued -\tdcterms:modified "2014-08-21"^^xsd:date ; #last modification -\tdcterms:spatial , ; # region/pays (France) -\t. -""") - for id,numero,voie,cp,ville,source,lat,lon,*others in self.csv_data: - ttlfile.write(f""" a locn:Address , gn:Feature ; -locn:fullAddress "{numero} {voie}, {cp} {ville}, FRANCE"; -locn:addressId "{id}" ; -locn:locatorDesignator "{numero}" ; -locn:thoroughfare "{voie}"@fr ; -locn:postalCode "{cp}" ; -locn:locatorName "{ville}"@fr ; -locn:adminUnitL1 "FR" ;""") -# traitement des arrondissements municipaux de Paris, Lyon, Marseille - if id[0:2] in '13 69 75' and (int(id[0:5]) in range(13201, 13217) or int(id[0:5]) in range(69381, 69370) or int(id[0:5]) in range(75101, 75121)): - ttlfile.write(f"locn:location ;") - else: - ttlfile.write(f"locn:location ;") - ttlfile.write(f"""locn:geometry ; -locn:geometry [a geo:Point ; geo:lat "{lat}" ; geo:long "{lon}" ] ; -locn:geometry [a gsp:Geometry; gsp:asWKT "POINT({lon} {lat})"^^gsp:wktLiteral ] ; -.""") - - - def save_as_shp(self): - subprocess.run(['ogr2ogr', '-f',"ESRI Shapefile", '-lco', 'ENCODING=UTF-8', '-s_srs', 'EPSG:4326', '-t_srs', 'EPSG:4326', '-overwrite', self.get_sas_full_filename('shp'), 'PG:dbname=cadastre user=cadastre', '-sql', f'{self.csv_query}']) - - def save_as_json(self): - with open(self.get_sas_full_filename('json'),'w') as jsonfile: - if not self.json_commune_data : - self.json_commune_data = self.get_json_commune_data() - for id,type,name,postcode,lat,lon,cityname,departement,region,population,adm_weight,importance,*others in self.json_commune_data: - if ';' in postcode: - postcode = postcode.split(';') - jsonfile.write(f'{{"id":"{id}","type":"{type}", "name":"{name}", "postcode":{json.dumps(postcode)}, "lat":{lat}, "lon":{lon}, "city":"{cityname}", "departement":"{departement}", "region":"{region}", "population":{population}, "adm_weight":{adm_weight}, "importance":{importance}}}\n') - if not self.json_voies_non_rapprochees_data : - self.json_voies_non_rapprochees_data = self.get_json_voies_non_rapprochees_data() - for fantoir,citycode,type,name,postcode,lat,lon,cityname,departement,region,importance,housenumbers,*others in self.json_voies_non_rapprochees_data: - s_housenumbers = ','.join([f'"{s.split("$")[0]}":{{"lat":{s.split("$")[1]},"lon":{s.split("$")[2]}}}' for s in housenumbers.split('#') ]) - if ';' in postcode: - postcode = postcode.split(';') - jsonfile.write(f'{{"id":"{fantoir}","citycode":"{citycode}","type":"{type}","name":"{name}","postcode":{json.dumps(postcode)},"lat":"{lat}","lon":"{lon}","city":"{cityname}","departement":"{departement}","region":"{region}","importance":{importance},"housenumbers":{{{s_housenumbers}}}}}\n') - if not self.json_voies_rapprochees_data : - self.json_voies_rapprochees_data = self.get_json_voies_rapprochees_data() - for fantoir,citycode,type,name,postcode,lat,lon,cityname,departement,region,importance,housenumbers,*others in self.json_voies_rapprochees_data: - s_housenumbers = ','.join([f'"{s.split("$")[0]}":{{"lat":{s.split("$")[1]},"lon":{s.split("$")[2]}}}' for s in housenumbers.split('#') ]) - if ';' in postcode: - postcode = postcode.split(';') - jsonfile.write(f'{{"id":"{fantoir}","citycode":"{citycode}","type":"{type}","name":"{name}","postcode":{json.dumps(postcode)},"lat":"{lat}","lon":"{lon}","city":"{cityname}","departement":"{departement}","region":"{region}","importance":{importance},"housenumbers":{{{s_housenumbers}}}}}\n') - if not self.json_voies_rapprochees_sans_adresses_data : - self.json_voies_rapprochees_sans_adresses_data = self.get_json_voies_rapprochees_sans_adresses_data() - for fantoir,citycode,type,name,postcode,lat,lon,cityname,departement,region,importance in self.json_voies_rapprochees_sans_adresses_data: - if ';' in postcode: - postcode = postcode.split(';') - jsonfile.write(f'{{"id":"{fantoir}","citycode":"{citycode}","type":"{type}","name":"{name}","postcode":{json.dumps(postcode)},"lat":"{lat}","lon":"{lon}","city":"{cityname}","departement":"{departement}","region":"{region}","importance":{importance}}}\n') - if not self.json_lieux_dits_data : - self.json_lieux_dits_data = self.get_json_lieux_dits_data() - for fantoir,citycode,type,name,postcode,lat,lon,cityname,departement,region,importance,*others in self.json_lieux_dits_data: - if ';' in postcode: - postcode = postcode.split(';') - jsonfile.write(f'{{"id":"{fantoir}","citycode":"{citycode}","type":"{type}","name":"{name}","postcode":{json.dumps(postcode)},"lat":"{lat}","lon":"{lon}","city":"{cityname}","departement":"{departement}","region":"{region}","importance":{importance}}}\n') - -def process(departements, **kwargs): - for dept in departements: - if not hp.is_valid_dept(dept): - print(f"Code {dept} invalide pour un département - abandon") - continue - d = Dataset(dept) - d.save_as_shp() - d.save_as_csv() - d.save_as_ttl() - d.save_as_json() diff --git a/bano/helpers.py b/bano/helpers.py index 821ca01..7344f2e 100644 --- a/bano/helpers.py +++ b/bano/helpers.py @@ -1,178 +1,6 @@ -from pathlib import Path - -from . import constants - - -def find_cp_in_tags(tags): - return tags.get('addr:postcode') or tags.get('postal_code') or '' - -def escape_quotes(s): - return s.replace('\'','\'\'') - -def remove_quotes(s): - return s.replace('\'','') - -def remove_quotes_on_null(s): - return s.replace("'null'","null") - -def replace_single_quotes_with_double(s): - return s.replace('\'','"') - -def format_toponyme(s): - a_s = s.replace('\'',' ').split(' ') - - # Accents - dic_replace_accents = {} - dic_replace_accents['DERRIERE'] = u'DERRIÈRE' - dic_replace_accents['EGLISE'] = u'ÉGLISE' - dic_replace_accents['ILE'] = u'ÎLE' - dic_replace_accents['ILOT'] = u'ÎLOT' - dic_replace_accents['PRE'] = u'PRÉ' - - for m in range(0,len(a_s)): - if a_s[m] in dic_replace_accents: - a_s[m] = dic_replace_accents[a_s[m]] - - # Capitalisation - a_s = [a.capitalize() for a in a_s] - - # Minuscules - dic_replace_hors_premier_mot = {} - dic_replace_hors_premier_mot['Au'] = 'au' - dic_replace_hors_premier_mot['Aux'] = 'aux' - dic_replace_hors_premier_mot['D'] = 'd\'' - dic_replace_hors_premier_mot['De'] = 'de' - dic_replace_hors_premier_mot['Des'] = 'des' - dic_replace_hors_premier_mot['Du'] = 'du' - dic_replace_hors_premier_mot['Et'] = 'et' - dic_replace_hors_premier_mot['L'] = 'l\'' - dic_replace_hors_premier_mot['La'] = 'la' - dic_replace_hors_premier_mot['Le'] = 'le' - dic_replace_hors_premier_mot['Les'] = 'les' - dic_replace_hors_premier_mot['Un'] = 'un' - dic_replace_hors_premier_mot['Une'] = 'une' - - if len(a_s) > 1: - for m in range(1,len(a_s)): - if a_s[m] in dic_replace_hors_premier_mot: - a_s[m] = dic_replace_hors_premier_mot[a_s[m]] - - # Appostrophes initiale - dic_ajoute_apostrophe = {} - dic_ajoute_apostrophe['d'] = 'd\'' - dic_ajoute_apostrophe['D'] = 'D\'' - dic_ajoute_apostrophe['l'] = 'l\'' - dic_ajoute_apostrophe['L'] = 'L\'' - - if a_s[0] in dic_ajoute_apostrophe: - a_s[0] = dic_ajoute_apostrophe[a_s[0]] - - s = ' '.join(a_s).replace('\' ','\'') - if len(s.strip())>1 and s.strip()[-1] == '\'': - s = s.strip()[0:-1] - return s - -def get_nb_parts(s): - return len(s.split()) - - -def get_part_debut(s,nb_parts): - resp = '' - if get_nb_parts(s) > nb_parts: - resp = ' '.join(s.split()[0:nb_parts]) - return resp - - -def is_valid_housenumber(hsnr): - if hsnr: - return len(hsnr) <= 11 - return False - -def is_valid_dept(dept): - return dept in constants.DEPARTEMENTS - def get_code_dept_from_insee(code_insee): - code_dept = code_insee[0:2] - if code_dept == '97': - code_dept = code_insee[0:3] - return code_dept - -def get_sql_like_dept_string(dept): - return (dept+'___')[0:5] - -def normalize(s): - s = s.upper() # tout en majuscules - s = s.split(' (')[0] # parenthèses : on coupe avant - s = s.replace('-',' ') # separateur espace - s = s.replace('\'',' ') # separateur espace - s = s.replace('’',' ') # separateur espace - s = s.replace('/',' ') # separateur espace - s = s.replace(':',' ') # separateur deux points - s = ' '.join(s.split()) # separateur : 1 espace - - for l in iter(constants.LETTRE_A_LETTRE): - for ll in constants.LETTRE_A_LETTRE[l]: - s = s.replace(ll,l) - - -# type de voie - abrev_trouvee = False - p = 5 - while (not abrev_trouvee) and p > -1: - p-= 1 - if get_part_debut(s,p) in constants.ABREV_TYPE_VOIE: - s = replace_type_voie(s,p) - abrev_trouvee = True -# ordinal - s = s.replace(' EME ','EME ') - s = s.replace(' 1ERE',' PREMIERE') - s = s.replace(' 1ER',' PREMIER') - -# chiffres - for c in constants.CHIFFRES: - s = s.replace(c[0],c[1]) - -# titres, etc. - for r in constants.EXPAND_NOMS: - s = s.replace(' '+r[0]+' ',' '+r[1]+' ') - if s[-len(r[0]):] == r[0]: - s = s.replace(' '+r[0],' '+r[1]) - for r in constants.EXPAND_TITRES: - s = s.replace(' '+r[0]+' ',' '+r[1]+' ') - if s[-len(r[0]):] == r[0]: - s = s.replace(' '+r[0],' '+r[1]) - for r in constants.ABREV_TITRES: - s = s.replace(' '+r[0]+' ',' '+r[1]+' ') - if s[-len(r[0]):] == r[0]: - s = s.replace(' '+r[0],' '+r[1]) - -# articles - for c in constants.MOT_A_BLANC: - s = s.replace(' '+c+' ',' ') - -# chiffres romains - sp = s.split() - - if len(sp)>0 and sp[-1] in constants.CHIFFRES_ROMAINS: - sp[-1] = constants.CHIFFRES_ROMAINS[sp[-1]] - s = ' '.join(sp) - -# substitution complete - if s in constants.SUBSTITUTION_COMPLETE: - s = constants.SUBSTITUTION_COMPLETE[s] - return s[0:30] - - -def replace_type_voie(s,nb): - sp = s.split() - spd = ' '.join(sp[0:nb]) - spf = ' '.join(sp[nb:len(sp)]) - s = constants.ABREV_TYPE_VOIE[spd]+' '+spf - return s - - -def is_valid_fantoir(f, insee): - return (len(f) == 10 and f[0:5] == insee); + return code_insee[0:3] if code_insee[0:2] == '97' else code_insee[0:2] -def display_insee_commune(code_insee, nom_commune): - print(f"{code_insee} - {nom_commune}") \ No newline at end of file +def get_code_dir_dict(): + with open('./data/code_dir.json') as f: + return json.loads(f.read()) \ No newline at end of file diff --git a/bano/log_2_file.py b/bano/log_2_file.py deleted file mode 100644 index 20b17c3..0000000 --- a/bano/log_2_file.py +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python -# coding: UTF-8 - -import os -import time - -def start_log_to_file(source,etape,dept): - t = time.localtime() - th = time.strftime('%d-%m-%Y %H:%M:%S',t) - t = round(time.mktime(t),0) - log_filename = '{:s}_{:s}_{:s}.log'.format(dept,etape,source) - f = open(os.path.join(os.environ['LOG_DIR'],'{:s}'.format(log_filename)),'w+') - f.write('Debut : {:s}\n'.format(th)) - f.flush() - return f - -def write_log_to_file(flog,message): - flog.write(message+'\n') - flog.flush() - -def write_sep_to_file(flog): - flog.write('\n####################\n') - flog.flush() - -def end_log_to_file(flog,display=False): - t = time.localtime() - th = time.strftime('%d-%m-%Y %H:%M:%S',t) - flog.write(u'Fin : {:s}\n'.format(th)) - flog.flush() - if display: - flog.seek(0) - print(flog.read()) - flog.close() - diff --git a/bano/models.py b/bano/models.py deleted file mode 100644 index 29d0a1d..0000000 --- a/bano/models.py +++ /dev/null @@ -1,357 +0,0 @@ -import re -import time - -from . import db -from . import helpers as hp -from .sources import fantoir -from . import core as c - - -class Adresse: - def __init__(self, node, num, voie, fantoir, code_postal): - self.node = node - self.numero = num - self.voie = voie - self.fantoir = fantoir - self.code_postal = code_postal - - -class Adresses: - def __init__(self, code_insee): - self.a = {} - self.code_insee = code_insee - - def __contains__(self, item): - return item in self.a - - def __getitem__(self, key): - return self.a[key] - - def __setitem__(self, key, value): - self.a[key] = value - - def __iter__(self): - return iter(self.a) - - def register(self, voie): - cle = hp.normalize(voie) - if not cle in self: - self[cle] = {'numeros':{},'voies':{},'fantoirs':{},'point_par_rue':[],'highway_index':0} - - def add_fantoir(self,cle,fantoir,source): - self.register(cle) - if len(fantoir) == 10: - self[cle]['fantoirs'][source] = fantoir - - def add_voie(self,voie_cle,source,voie=None): - cle = hp.normalize(voie_cle) - self[cle]['voies'][source] = voie or voie_cle - - def add_adresse(self,ad,source): - """ une adresses est considérée dans la commune si sans Fantoir ou avec un Fantoir de la commune""" - if (ad.fantoir == '' or hp.is_valid_fantoir(ad.fantoir, self.code_insee)) and hp.is_valid_housenumber(ad.numero): - cle = hp.normalize(ad.voie) - self.add_voie(ad.voie,source) - self[cle]['numeros'][ad.numero] = ad - if ad.fantoir != '': - self[cle]['fantoirs'][source] = ad.fantoir - - def get_cle_by_fantoir(self,fantoir): - cle = '' - for c in self: - if 'fantoirs' in self[c]: - if 'OSM' in self[c]['fantoirs']: - if self[c]['fantoirs']['OSM'] == fantoir: - cle = c - break - if 'FANTOIR' in self[c]['fantoirs']: - if self[c]['fantoirs']['FANTOIR'] == fantoir: - cle = c - break - return cle - - def get_best_fantoir(self, cle): - return self[cle]['fantoirs'].get('OSM') or self[cle]['fantoirs'].get('FANTOIR') or '' - - def has_already_fantoir(self,cle,source): - return source in self[cle]['fantoirs'] - - def add_highway_index(self,cle,val): - self[cle]['highway_index']+=val - - def load_cadastre_hsnr(self): - dict_node_relations = {} - str_query = f"SELECT * FROM bal_cadastre WHERE commune_code = '{self.code_insee}' AND destination_principale in ('habitation','commerce','industrie','tourisme');" - with db.bano_cache.cursor() as cur: - cur.execute(str_query) - for cle_interop, ui_adresse, numero, suffixe, pseudo_adresse, name, voie_code, code_postal, libelle_acheminement, destination_principale, commune_code, commune_nom, source, lon, lat, *others in cur: - housenumber = numero+((' '+suffixe) if suffixe and suffixe.isalnum() else '') - if not name or len(name) < 2 or not lon or pseudo_adresse == 'true': - continue - self.register(name) - - if not cle_interop in dict_node_relations: - dict_node_relations[cle_interop] = [] - dict_node_relations[cle_interop].append(hp.normalize(name)) - if hp.is_valid_housenumber(housenumber): - nd = Node({'id':cle_interop,'lon':lon,'lat':lat},{}) - self.add_adresse(Adresse(nd,housenumber,name,'',code_postal), 'CADASTRE') - - def load_ban_hsnr(self): - dict_node_relations = {} - data = c.get_data_from_pg_direct('ban_hsnr',self.code_insee) - for id, housenumber, name, lon, lat in data: - if not name or len(name) < 2 or not lon: - continue - self.register(name) - if not id in dict_node_relations: - dict_node_relations[id] = [] - dict_node_relations[id].append(hp.normalize(name)) - if hp.is_valid_housenumber(housenumber): - self.add_adresse(Adresse(Node({'id':id,'lon':lon,'lat':lat},{}),housenumber,name,'',''), 'BAN') - - def save(self, source, code_dept): - with db.bano.cursor() as cur_insert : - for a in ['cumul_adresses','cumul_voies']: - cur_insert.execute(f"DELETE FROM {a} WHERE insee_com = '{self.code_insee}' AND source = '{source}';") - nb_rec = 0 - a_values = [] - a_values_voie = [] - sload = 'INSERT INTO cumul_adresses (geometrie,numero,voie_cadastre,voie_bal,voie_osm,voie_fantoir,fantoir,insee_com,dept,code_postal,source,voie_autre) VALUES' - for v in self: - code_postal = '' - cle_fantoir = self.get_best_fantoir(v) - street_name_osm = self[v]['voies'].get('OSM') or fantoir.mapping.get_fantoir_name(cle_fantoir,'OSM') or '' - street_name_fantoir = self[v]['voies'].get('FANTOIR') or '' - street_name_cadastre = self[v]['voies'].get('CADASTRE') or '' - street_name_bal = self[v]['voies'].get('BAL') or '' - if source == 'CADASTRE': - street_name_autre = self[v]['voies'].get('CADASTRE') or '' - elif source == 'BAL': - street_name_autre = self[v]['voies'].get('BAL') or '' - elif source == 'BAN': - street_name_autre = self[v]['voies'].get('BAN') or '' - else: - street_name_autre = '' - lat_point_par_rue = None - lon_point_par_rue = None - - for num in self[v]['numeros']: - numadresse = self[v]['numeros'][num] - a_values.append("(ST_GeomFromText('POINT({:6f} {:6f})', 4326),'{:s}','{:s}','{:s}','{:s}','{:s}','{:s}','{:s}','{:s}','{:s}','{:s}','{:s}')".format(numadresse.node.attribs['lon'],numadresse.node.attribs['lat'],numadresse.numero.replace("'",""),street_name_cadastre.replace("'","''"),street_name_bal.replace("'","''"),street_name_osm.replace("'","''"),street_name_fantoir.replace("'","''"),cle_fantoir,self.code_insee,code_dept,numadresse.code_postal,source,street_name_autre.replace("'","''")).replace(",''",",null").replace(",''",",null")) - if source == 'OSM': - lat_point_par_rue = numadresse.node.attribs['lat'] - lon_point_par_rue = numadresse.node.attribs['lon'] - - if source == 'OSM': - if len(self[v]['point_par_rue'])>1: - lon_point_par_rue = self[v]['point_par_rue'][0] - lat_point_par_rue = self[v]['point_par_rue'][1] - - if lat_point_par_rue and lon_point_par_rue: - a_values_voie.append(f"(ST_GeomFromText('POINT({lon_point_par_rue} {lat_point_par_rue})', 4326),'{hp.escape_quotes(street_name_cadastre)}','{hp.escape_quotes(street_name_bal)}','{hp.escape_quotes(street_name_osm)}','{hp.escape_quotes(street_name_fantoir)}','{cle_fantoir}','{self.code_insee}','{code_dept}','','{source}',{self[v]['highway_index']})".replace(",'',",",null,")) - - nb_rec +=1 - if len(a_values)>0: - cur_insert.execute(sload+','.join(a_values)+';COMMIT;') - sload_voie = 'INSERT INTO cumul_voies (geometrie,voie_cadastre,voie_bal,voie_osm,voie_fantoir,fantoir,insee_com,dept,code_postal,source,voie_index) VALUES' - if len(a_values_voie) > 0: - sload_voie = sload_voie+','.join(a_values_voie)+';COMMIT;' - cur_insert.execute(sload_voie) - return(nb_rec) - - -class Node: - def __init__(self,attribs,tags): - self.attribs = attribs - self.tags = tags - self.sent = False - self.modified = False - - def get_geom_as_text(self): - strp = 'ST_PointFromText(\'POINT('+str(self.attribs['lon'])+' '+str(self.attribs['lat'])+')\',4326)' - return strp - - def move_to(self,lon,lat): - self.attribs['lon'] = lon - self.attribs['lat'] = lat - - -class Pg_hsnr: - def __init__(self, d, code_insee): - self.x, self.y, self.provenance, self.osm_id, self.numero, self.voie, self.tags, *others = d - self.tags = self.tags or {} - self.fantoir = '' - if self.provenance == 3 or self.provenance == 4: - self.set_street_name() - self.set_fantoir(code_insee) - self.code_postal = hp.find_cp_in_tags(self.tags) - - def set_street_name(self): - if 'type' in self.tags and self.tags['type'] == 'associatedStreet' and 'name' in self.tags: - self.voie = self.tags['name'] - - def set_fantoir(self, code_insee): - if 'ref:FR:FANTOIR' in self.tags and len(self.tags['ref:FR:FANTOIR']) == 10 and self.tags['ref:FR:FANTOIR'][0:5] == code_insee: - self.fantoir = self.tags['ref:FR:FANTOIR'] - -class Fantoir: - def __init__(self,name,fantoir,bati): - self.name = name - self.name_norm = hp.normalize(name) - self.fantoir = fantoir - self.bati = bati if bati != '' else None -class Cadastre: - def __init__(self,lon,lat,name): - self.lon = lon - self.lat = lat - self.name = name - self.name_norm = hp.normalize(name) -class Osm: - def __init__(self,lon,lat,place,name,fantoir): - self.lon = lon - self.lat = lat - self.place = place - self.name = name - self.name_norm = hp.normalize(name) - self.fantoir = fantoir -class Place: - def __init__(self,lon,lat,place,name_fantoir,name_cadastre,name_osm,fantoir,bati,code_insee): - self.has_fantoir = False - self.has_osm = False - self.has_cadastre = False - self.fantoir = Fantoir(name_fantoir,fantoir,bati) - self.osm = Osm(lon,lat,place,name_osm,fantoir) - self.cadastre = Cadastre(lon,lat,name_cadastre) - self.code_insee = str(code_insee) - self.code_dept = hp.get_code_dept_from_insee(self.code_insee) - if self.fantoir.name != '': - self.has_fantoir = True - if self.osm.name != '': - self.has_osm = True - if self.cadastre.name != '': - self.has_cadastre = True - self.id = self.fantoir.fantoir or (self.cadastre.name_norm or self.osm.name_norm) - def update_fantoir(self,name,fantoir,bati): - self.fantoir = Fantoir(name,fantoir,bati) - self.has_fantoir = True - def update_osm(self,lon,lat,place,name,fantoir=''): - self.osm = Osm(lon,lat,place,name,fantoir) - self.has_osm = True - def update_cadastre(self,lon,lat,name): - self.cadastre = Cadastre(lon,lat,name) - self.has_cadastre = True - def as_string(self): - return f"ID {self.id} \n FANTOIR {self.fantoir.name} - FANTOIR {self.fantoir.fantoir} \nOSM {self.osm.lon} - {self.osm.lat} - {self.osm.place} - {self.osm.name} \nCADASTRE {self.cadastre.lon} - {self.cadastre.lat} - {self.cadastre.name}" - def as_SQL_cadastre_row(self): - if self.has_cadastre: - return hp.remove_quotes_on_null(f"(ST_PointFromText('POINT({self.cadastre.lon} {self.cadastre.lat})',4326),'{hp.escape_quotes(hp.format_toponyme(self.cadastre.name)) or 'null'}','{hp.escape_quotes(self.osm.name) or 'null'}','{hp.escape_quotes(self.fantoir.name) or 'null'}','{self.fantoir.fantoir or 'null'}','{self.code_insee}','{self.code_dept}','null','CADASTRE',{self.fantoir.bati or 'null'},'')") - def as_SQL_osm_row(self): - return hp.remove_quotes_on_null(f"(ST_PointFromText('POINT({self.osm.lon} {self.osm.lat})',4326),null,'{hp.escape_quotes(self.osm.name) or 'null'}','{hp.escape_quotes(self.fantoir.name) or 'null'}','{self.fantoir.fantoir or 'null'}','{self.code_insee}','{self.code_dept}',null,'OSM',{self.fantoir.bati or 'null'},'{self.osm.place}')") - -class Places: - def __init__(self): - self.p = {} - - def add_place(self,new_p): - self.p[new_p.id]=new_p - def match_fantoir(self,fantoir): - res = [] - for c in self.p: - if self.p[c].fantoir and self.p[c].fantoir.fantoir and self.p[c].fantoir.fantoir == fantoir: - res+=[c] - return res - def match_name(self,name,target): - res = [] - name_norm = hp.normalize(name) - if target == 'FANTOIR': - for c in self.p: - if self.p[c].fantoir and self.p[c].fantoir.name and self.p[c].fantoir.name_norm == name_norm: - res+=[c] - if target == 'CADASTRE': - for c in self.p: - if c.cadastre and c.cadastre.name and c.cadastre.name_norm == name_norm: - res+=c - if target == 'OSM': - for c in self.p: - if c.osm and c.osm.name and c.osm.name_norm == name_norm: - res+=c - return res - def _print(self): - for c in self.p: - print(f'ID : {self.p[c].id} - FANTOIR : {self.p[c].fantoir.name} - OSM : {self.p[c].osm.name}') - print(self.p[c].as_string()) - def _print_SQL_Cadastre(self): - for c in self.p: - print(self.p[c].as_SQL_cadastre_row()) - def as_SQL_Cadastre_array(self): - a = [] - for c in self.p: - if self.p[c].has_cadastre: - a.append(self.p[c].as_SQL_cadastre_row()) - return a - def as_SQL_OSM_array(self): - a = [] - for c in self.p: - if self.p[c].has_osm: - a.append(self.p[c].as_SQL_osm_row()) - return a - - -class Tile: - def __init__(self,z,x,y): - self.x = x - self.y = y - self.z = z - self.xmin = -20037508.34 + (40075016.68/(2 ** self.z)) * self.x - self.xmax = -20037508.34 + (40075016.68/(2 ** self.z)) * (self.x + 1) - self.ymin = 20037508.34 - (40075016.68/(2 ** self.z)) * (self.y + 1) - self.ymax = 20037508.34 - (40075016.68/(2 ** self.z)) * self.y - -class Tiles: - def __init__(self): - self.t = {} - - def add_tile(self,z,x,y): - self.t[f"{z}/{x}/{y}"] = Tile(z,x,y) - - def add_tiles_from_file(self,tilefile): - with open(tilefile,'r') as f: - for ligne in f.readlines(): - self.add_tile(int(ligne.split('/')[0]),int(ligne.split('/')[1]),int(ligne.split('/')[2])) - - def as_list_of_SQL_values(self): - return [f"({self.t[tile].z},{self.t[tile].x},{self.t[tile].y},ST_SetSRID(ST_MakeBox2D(ST_Point({int(self.t[tile].xmin)},{int(self.t[tile].ymin)}),ST_Point({int(self.t[tile].xmax)},{int(self.t[tile].ymax)})),3857))" for tile in self.t] - - def convert_to_insee_list(self): - with db.bano_cache.cursor() as cur: - cur.execute("TRUNCATE TABLE expire_tiles;COMMIT;") - SQL_values = self.as_list_of_SQL_values() - while len(SQL_values) > 10000: - cur.execute(f"""INSERT INTO expire_tiles VALUES {','.join(SQL_values[0:10000])};COMMIT; """) - SQL_values = SQL_values[10000:] - if SQL_values: - cur.execute(f"""INSERT INTO expire_tiles VALUES {','.join(SQL_values)};COMMIT; """) - cur.execute("""SELECT p."ref:INSEE" - FROM planet_osm_polygon p - JOIN expire_tiles e - ON ST_intersects(p.way, e.geometrie) - WHERE p.way && e.geometrie AND - p.admin_level = 8 AND - p.boundary = 'administrative' AND - p."ref:INSEE" NOT IN ('13055','69123','75056') AND - COALESCE(p."ref:INSEE",'') != '' - UNION - SELECT p."ref:INSEE" - FROM planet_osm_polygon p - JOIN expire_tiles e - ON ST_intersects(p.way, e.geometrie) - WHERE p.way && e.geometrie AND - p.admin_level = 9 AND - p.boundary = 'administrative' AND - (p."ref:INSEE" LIKE '132__' OR - p."ref:INSEE" LIKE '6938_' OR - p."ref:INSEE" LIKE '751__') - ORDER BY 1;""") - return [f[0]for f in cur] - diff --git a/bano/outils_de_gestion.py b/bano/outils_de_gestion.py deleted file mode 100644 index 5b30c9d..0000000 --- a/bano/outils_de_gestion.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env python -# coding: UTF-8 - -import time -from . import db - -def batch_start_log(source,etape,code_geo): - t = time.localtime() - th = time.strftime('%d-%m-%Y %H:%M:%S',t) - t = round(time.mktime(t),0) - if len(str(code_geo)) > 3: - code_insee = code_geo - dept = None - else: - code_insee = None - dept = code_geo - - with db.bano.cursor() as cur: - if code_insee: - whereclause = f"insee_com = '{code_insee}' AND source = '{source}' AND etape = '{etape}'" - str_query = f"INSERT INTO batch_historique (SELECT * FROM batch WHERE {whereclause});DELETE FROM batch WHERE {whereclause};INSERT INTO batch (source,etape,timestamp_debut,date_debut,dept,insee_com,nom_com,nombre_adresses) SELECT '{source}','{etape}',{t},'{th}',dept,insee_com,nom_com,0 FROM code_cadastre WHERE insee_com = '{code_insee}';COMMIT;" - if dept: - whereclause = f"dept = '{dept}' AND source = '{source}' AND etape = '{etape}'" - str_query = f"INSERT INTO batch_historique (SELECT * FROM batch WHERE {whereclause});DELETE FROM batch WHERE {whereclause};INSERT INTO batch (source,etape,timestamp_debut,date_debut,dept,nombre_adresses) VALUES ('{source}','{etape}',{t},'{th}','{dept}',0);COMMIT;" - - cur.execute(str_query) - - cur.execute(f"SELECT id_batch::integer FROM batch WHERE {whereclause};") - c = cur.fetchone() - return c[0] - -def batch_end_log(nb,batch_id): - with db.bano.cursor() as cur: - t = time.localtime() - th = time.strftime('%d-%m-%Y %H:%M:%S',t) - whereclause = 'id_batch = {:d}'.format(batch_id) - cur.execute(f"UPDATE batch SET nombre_adresses = {nb},date_fin = '{th}' WHERE {whereclause};") - -def get_cadastre_etape_timestamp_debut(code_cadastre,etape,source): - str_query = "SELECT timestamp_debut FROM batch WHERE cadastre_com = '{:s}' AND etape = '{:s}' AND source = '{:s}';".format(code_cadastre,etape,source) - cur = db.bano.cursor() - cur.execute(str_query) - for c in cur: - code_cadastre = c[0] - return code_cadastre diff --git a/bano/pre_process_suffixe.py b/bano/pre_process_suffixe.py deleted file mode 100644 index 9783bd6..0000000 --- a/bano/pre_process_suffixe.py +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python -# coding: UTF-8 - -import re -import sys -import time -import os,os.path - -# from .core import load_cadastre_hsnr -from .outils_de_gestion import batch_start_log -from .outils_de_gestion import batch_end_log - -from . import db -from . import helpers as hp -from . import db_helpers as dh -from .models import Adresses -from .core import load_ban_hsnr - -def collect_adresses_points(selection, adresses): - kres = {} - for k in selection: - kres[k] = [] - for vv in selection[k]['liste']: - s = 0 - max = 2 - for a in adresses[vv]['numeros']: - kres[k].append('SELECT \'{:s}\' hameau,\'{:s}\' code_insee,ST_BUFFER(ST_PointFromText(\'POINT({} {})\',4326),0.0003,2) as g'.format(k.replace("'","''"),adresses.code_insee,adresses[vv]['numeros'][a].node.attribs['lon'],adresses[vv]['numeros'][a].node.attribs['lat'])) - s+=1 - if s == max: break - return kres - -def load_suffixe_2_db(adds, code_insee): - nb_res = 0 - with db.bano_cache.cursor() as cur: - str_query = f"DELETE FROM suffixe WHERE insee_com = '{code_insee}';COMMIT;" - cur.execute(str_query) - for h in adds: - # Agde (34003): detection de 'Mer' abusif, pas d'autres suffixes dans la commune - if code_insee == '34003': - continue - print(f"......... {h}") - str_query = 'INSERT INTO suffixe SELECT ST_Transform(ST_SetSRID((ST_Dump(gu)).geom,4326),3857),code_insee,hameau FROM (SELECT ST_Union(g) gu,code_insee,hameau FROM({:s})a GROUP BY 2,3)a;COMMIT;'.format(' UNION ALL '.join(adds[h])) - cur.execute(str_query) - nb_res+=len(adds[h]) - return nb_res - -def name_frequency(adresses): - freq = {} - for v in adresses: - s = v.split() - if len(s)>4: - k = ' '.join(s[-2:]) - if k not in freq: - freq[k] = {'nombre':1,'liste':[v]} - else: - freq[k]['nombre'] +=1 - freq[k]['liste'].append(v) - if len(s)>3: - k = v.split()[-1] - if k not in freq: - freq[k] = {'nombre':1,'liste':[v]} - else: - freq[k]['nombre'] +=1 - freq[k]['liste'].append(v) - return freq - -def select_street_names_by_name(freq): - sel = {} - mots = {} - for k in freq: - ks = k.split() - if freq[k]['nombre'] > 5 and len(ks) > 1: - mots[ks[0]] = 1 - mots[ks[1]] = 1 - sel[k] = freq[k] - for k in freq: - ks = k.split() - # un suffixe ne peut pas être un numero seul, cas dans les arrdts parisiens - if freq[k]['nombre'] > 5 and len(ks) == 1 and not k.isdigit() and not k in mots : - sel[k] = freq[k] - return sel - -def process(departements, **kwargs): - source = 'BAN' - for dept in departements: - if hp.is_valid_dept(dept): - for code_insee, nom_commune in dh.get_insee_name_list_by_dept(dept): - debut_total = time.time() - hp.display_insee_commune(code_insee, nom_commune) - adresses = Adresses(code_insee) - - batch_id = batch_start_log(source,'detectesuffixe',code_insee) - - # adresses.load_cadastre_hsnr() - adresses.load_ban_hsnr() - freq = name_frequency(adresses) - selection = select_street_names_by_name(freq) - adds = collect_adresses_points(selection, adresses) - nb_rec = load_suffixe_2_db(adds, code_insee) - - batch_end_log(nb_rec,batch_id) diff --git a/bano/publish.py b/bano/publish.py deleted file mode 100644 index 9552cbc..0000000 --- a/bano/publish.py +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import gzip -import tarfile -import os - -from glob import glob -from shutil import copy2 -from pathlib import Path - -from .constants import DEPARTEMENTS -from . import helpers as hp - -def get_source_dir(): - try: - cwd = Path(os.environ['EXPORT_SAS_DIR']) - except KeyError: - raise ValueError(f"La variable EXPORT_SAS_DIR n'est pas définie") - return cwd - -def get_dest_dir(): - try: - cwd = Path(os.environ['EXPORT_WEB_DIR']) - except KeyError: - raise ValueError(f"La variable EXPORT_WEB_DIR n'est pas définie") - return cwd - -def get_source_file(dept,extension): - return Path(get_source_dir()) / f'bano-{dept}.{extension}' - -def get_dest_file(dept,filetype,gzip=False,tar=False): - ext = ".tar" if tar else '' - ext = ext+".gz" if gzip else ext - return Path(get_dest_dir()) / f'bano-{dept}.{filetype}{ext}' - -def get_dest_file_full(filetype,gzip=False): - gz_ext = '.gz' if gzip else '' - return Path(get_dest_dir()) / f'full.{filetype}{gz_ext}' - -def publish_as_shp(dept): - with tarfile.open(get_dest_file(dept, 'shp', True,True), "w:gz") as tar: - tar.add(get_source_file(dept,'shp'), arcname=f'bano-{dept}.shp') - tar.add(get_source_file(dept,'dbf'), arcname=f'bano-{dept}.dbf') - tar.add(get_source_file(dept,'shx'), arcname=f'bano-{dept}.shx') - tar.add(get_source_file(dept,'prj'), arcname=f'bano-{dept}.prj') - tar.add(get_source_file(dept,'cpg'), arcname=f'bano-{dept}.cpg') - -def publish_as_csv(dept): - copy2(get_source_file(dept,'csv'),get_dest_dir()) - -def publish_as_full_csv(): - with gzip.open(get_dest_file_full('csv',True),'wb') as gz: - for infile in sorted(glob(f'{get_source_dir()}/bano-*.csv')): - with open(infile,'rb') as js: - gz.write(js.read()) - -def publish_as_ttl(dept): - with gzip.open(get_dest_file(dept,'ttl',True,False),'wb') as gz: - with open(get_source_file(dept,'ttl'),'rb') as ttl: - gz.write(ttl.read()) - -def publish_as_json(dept): - with gzip.open(get_dest_file(dept,'json',True,False),'wb') as gz: - with open(get_source_file(dept,'json'),'rb') as js: - gz.write(js.read()) - -def publish_as_full_json(): - with gzip.open(get_dest_file_full('sjson',True),'wb') as gz: - for infile in sorted(glob(f'{get_source_dir()}/bano-*.json')): - with open(infile,'rb') as js: - gz.write(js.read()) - -def process(departements, **kwargs): - for dept in departements: - if not hp.is_valid_dept(dept): - print(f"Code {dept} invalide pour un département - abandon") - continue - publish_as_shp(dept) - publish_as_csv(dept) - publish_as_ttl(dept) - publish_as_json(dept) - -def process_full(**kwargs): - publish_as_full_csv() - publish_as_full_json() diff --git a/bano/setup_db.py b/bano/setup_db.py new file mode 100644 index 0000000..dde27e1 --- /dev/null +++ b/bano/setup_db.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python +# coding: UTF-8 + +from . import db +from .sql import sql_process + +def setup_bano_sources(**kwargs): + sql_process('create_table_base_bano_sources',{},db.bano_sources) \ No newline at end of file diff --git a/bano/sources/topo.py b/bano/sources/topo.py new file mode 100644 index 0000000..72cf980 --- /dev/null +++ b/bano/sources/topo.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python +# coding: UTF-8 + +import gzip +import io + +from ..constants import get_const_code_dir,CODE_VOIE_FANTOIR + +from ..db import bano_sources +from .. import helpers as h + +CODE_DIR = get_const_code_dir() + + +def fantoir9_vers_fantoir10(fantoir): + insee = fantoir[0:5] + code_dir = CODE_DIR.get(insee,0) + dept = fantoir[0:2] # pour les DOM le distingo se fait avec le code direction + commune = insee[2:] + code_voie = '0123456789ABCDEFGHIJKLMNOPQRSTVWXYZ'.index(fantoir[5:6]) + numero = fantoir[6:] + cle = 'ABCDEFGHJKLMNPRSTUVWXYZ'[(int(dept+code_dir+commune)*19+code_voie*11+int(numero))%23] + + return(f"{fantoir}{cle}") + +def topo_voie_to_csv(ligne_brute): + longueurs = [5,2,9,2,4,26,40,1,1,1,1,1,1,8,8,1,8,8] + champs = [] + # champs = 'region,pays,voie,type_topo,nature,libelle,skip,caractere_voie,annulation,date_annulation,date_creation,type_voie,mot_classant,date_transition' + # pays,dept, insee,fantoir,nature,voie,caractere voie,annul,date annul,creation,mot classant + for l in longueurs: + champs.append((ligne_brute[:l]).strip()) + ligne_brute = ligne_brute[l:] + # print(ligne_brute) + # selection + champs = [champs[2]]+champs[4:6]+champs[11:] + #insee + champs.insert(0,champs[0][0:5]) + # code dept + champs.insert(0,h.get_code_dept_from_insee(champs[0])) + # fantoir + champs[2] = fantoir9_vers_fantoir10(champs[2]) + + return champs + + +def import_to_pg(**kwargs): + fichier_source = '/data/download/TOPO20211101.gz' + io_in_csv = io.StringIO() + with gzip.open(fichier_source, mode='rt') as f: + f.readline() # skip CSV headers + for i,line in enumerate(f): + if line[16:18] != '14': + continue + # print(line) + topo_voie_to_csv(line) + io_in_csv.write(','.join(topo_voie_to_csv(line))) + if i > 20: + break + + with db.bano_sources.cursor() as cur_insert: + cur_insert.execute(f"DELETE FROM topo WHERE code_insee LIKE '{departement+'%'}'") + cur_insert.copy_from(f, "topo", sep=';', null='') + db.bano_cache.commit() diff --git a/bano/sql.py b/bano/sql.py new file mode 100644 index 0000000..f76a025 --- /dev/null +++ b/bano/sql.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# coding: UTF-8 + +from pathlib import Path + +SQLDIR = Path(__file__).parent / 'sql' + + +def sql_process(sqlfile,args,conn): + sqlfile = (Path(SQLDIR) / sqlfile).with_suffix('.sql') + with open (sqlfile) as s: + q = s.read() + for k,v in args.items(): + q=q.replace(f'__{k}__',v) + + with conn.cursor() as cur: + cur.execute(q) + cur.execute('COMMIT') + +def sql_get_data(sqlfile,args,conn): + sqlfile = (Path(SQLDIR) / sqlfile).with_suffix('.sql') + with open (sqlfile) as s: + q = s.read() + for k,v in args.items(): + q=q.replace(f'__{k}__',v) + + with conn.cursor() as cur: + cur.execute(q) + return cur.fetchall() + + return None diff --git a/bano/sql/ban_hsnr_nocache.sql b/bano/sql/ban_hsnr_nocache.sql deleted file mode 100644 index 748e93f..0000000 --- a/bano/sql/ban_hsnr_nocache.sql +++ /dev/null @@ -1,32 +0,0 @@ -WITH -j -AS -(SELECT id, - numero, - nom_voie, - lon, - lat, - rep, - CASE r.numero - WHEN b.numero THEN - CASE - WHEN rep = 'b' THEN 'bis' - WHEN rep = 't' THEN 'ter' - WHEN rep = 'q' THEN 'quater' - ELSE NULL - END - ELSE NULL - END as rep_bis, - code_postal -FROM ban_odbl b -LEFT OUTER JOIN rep_b_as_bis r -USING (id_fantoir,numero) -WHERE code_insee = '__com__') - -SELECT id, - TRIM (BOTH FROM (numero||' '||COALESCE(rep_bis,rep,''))), - nom_voie, - lon, - lat, - code_postal -FROM j; diff --git a/bano/sql/cadastre_2_place.sql b/bano/sql/cadastre_2_place.sql deleted file mode 100644 index 037cfb5..0000000 --- a/bano/sql/cadastre_2_place.sql +++ /dev/null @@ -1,7 +0,0 @@ -SELECT ST_X(geometrie), - ST_Y(geometrie), - nom, - fantoir, - insee_com -FROM cadastre_2_place -WHERE insee_com = '__com__'; \ No newline at end of file diff --git a/bano/sql/create_base.sql b/bano/sql/create_base.sql deleted file mode 100644 index 1870633..0000000 --- a/bano/sql/create_base.sql +++ /dev/null @@ -1,3 +0,0 @@ -CREATE EXTENSION postgis; -CREATE EXTENSION hstore; -CREATE SCHEMA dev; diff --git a/bano/sql/create_table_base_bano_sources.sql b/bano/sql/create_table_base_bano_sources.sql new file mode 100644 index 0000000..a6969a5 --- /dev/null +++ b/bano/sql/create_table_base_bano_sources.sql @@ -0,0 +1,47 @@ +CREATE TABLE IF NOT EXISTS topo ( + code_pays character(2), + code_dep character(3), + code_insee character(5), + fantoir10 character(10), + nature_voie text, + libelle_voie text, + caractere_voie character(1), + caractere_annul character(1), + date_annul integer, + date_creation integer, + type_voie character(1), + mot_classant character varying(8)); +CREATE INDEX IF NOT EXISTS idx_topo_dep ON topo(code_dep); +CREATE INDEX IF NOT EXISTS idx_fantoir_code_insee ON topo(code_insee); +CREATE INDEX IF NOT EXISTS idx_fantoir_fantoir10 ON topo(fantoir10); + +CREATE TABLE IF NOT EXISTS ban_odbl ( + id text, + id_fantoir text, + numero text, + rep text, + nom_voie text, + code_postal text, + code_insee text, + nom_commune text, + code_insee_ancienne_commune text, + nom_ancienne_commune text, + x float, + y float, + lon float, + lat float, + type_position text, + alias text, + nom_ld text, + libelle_acheminement text, + nom_afnor text, + source_position text, + source_nom_voie text, + certification_commune integer, + cad_parcelles text); +-- geometrie geometry (Point, 4326) DEFAULT (ST_Point(lon,lat))); +CREATE INDEX IF NOT EXISTS idx_ban_odbl_code_insee ON ban_odbl(code_insee); + + + +GRANT SELECT ON ALL TABLES IN SCHEMA public TO public; \ No newline at end of file diff --git a/bano/sql/export_csv_dept.sql b/bano/sql/export_csv_dept.sql deleted file mode 100644 index 9a48468..0000000 --- a/bano/sql/export_csv_dept.sql +++ /dev/null @@ -1,106 +0,0 @@ ---SELECT * ---FROM ( -WITH -u -AS -(SELECT insee_com, - fantoir, - REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REGEXP_REPLACE(UPPER(numero),'^0*',''),'BIS','B'),'TER','T'),'QUATER','Q'),'QUAT','Q'),' ',''),'à','-'),';',','),'"','') AS num -FROM cumul_adresses -WHERE fantoir IS NOT NULL AND - dept = '__dept__' -GROUP BY 1,2,3), -lp -AS -(SELECT insee, - MIN(cp) AS cp -FROM codes_postaux -WHERE insee LIKE '__dept__%' -GROUP BY 1), -res_non_unique -AS -(SELECT CONCAT(u.fantoir,'-',u.num) AS id, - UPPER(REPLACE(COALESCE(o.numero,od.numero,c.numero),' ','')) AS numero, - REPLACE( - REPLACE( - REPLACE( - COALESCE(REPLACE(o.voie_osm,'’',CHR(39)),REPLACE(od.voie_osm,'’',CHR(39)),REPLACE(c.voie_osm,'’',CHR(39)),od.voie_autre,c.voie_autre), - '"',CHR(39)), - ', ',' '), - ',',' ') AS voie, - COALESCE(o.code_postal,c.code_postal,cp.postal_code, lp.cp) AS code_post, - cn.libelle AS ville, - CASE - WHEN u.num=o.num THEN 'OSM' - WHEN (u.num=od.num AND od.voie_osm != od.voie_autre AND od.voie_osm IS NOT NULL) THEN 'O+O' - WHEN u.num=od.num THEN 'OD' - WHEN c.voie_osm != '' THEN 'C+O' - ELSE 'CAD' - END AS SOURCE, - COALESCE(st_y(o.geometrie),st_y(od.geometrie),st_y(c.geometrie)) AS lat, - COALESCE(st_x(o.geometrie),st_x(od.geometrie),st_x(c.geometrie)) AS lon, - COALESCE(o.geometrie,od.geometrie,c.geometrie) AS geom -FROM u -LEFT JOIN lp -ON (lp.insee= u.insee_com) -LEFT JOIN -(SELECT *, - REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REGEXP_REPLACE(UPPER(numero),'^0*',''),'BIS','B'),'TER','T'),'QUATER','Q'),'QUAT','Q'),' ',''),'à','-'),';',',') AS num -FROM cumul_adresses -WHERE dept = '__dept__' AND - source = 'OSM') AS o -ON (u.num = o.num AND u.fantoir = o.fantoir) -LEFT JOIN -(SELECT *, - REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REGEXP_REPLACE(UPPER(numero),'^0*',''),'BIS','B'),'TER','T'),'QUATER','Q'),'QUAT','Q'),' ',''),'à','-'),';',',') AS num -FROM cumul_adresses -WHERE dept = '__dept__' AND - source = 'BAN' AND - st_x(geometrie)!=0 AND - st_y(geometrie)!=0) AS c -ON (c.num=u.num AND c.fantoir=u.fantoir) -LEFT JOIN -(SELECT *, - REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REGEXP_REPLACE(UPPER(numero),'^0*',''),'BIS','B'),'TER','T'),'QUATER','Q'),'QUAT','Q'),' ',''),'à','-'),';',',') AS num -FROM cumul_adresses -WHERE dept = '__dept__' AND - source = 'BAL' AND - st_x(geometrie)!=0 AND - st_y(geometrie)!=0) AS od -ON (od.num = u.num AND od.fantoir = u.fantoir) -JOIN cog_commune cn -ON (cn.com = u.insee_com) -LEFT JOIN (SELECT * FROM planet_osm_postal_code WHERE postal_code != '') cp -ON (cp."ref:INSEE" = u.insee_com AND ST_Contains(cp.way, ST_Transform(COALESCE(o.geometrie, od.geometrie, c.geometrie),3857))) -WHERE u.num>'0' AND - cn.typecom != 'COMD'), -res_avec_ordre_des_doublons -AS -(SELECT id, - numero, - voie, - code_post, - ville, - source, - lat, - lon, - geom, - ROW_NUMBER() OVER(PARTITION BY id ORDER BY numero) AS sequence -FROM res_non_unique -WHERE lat IS NOT NULL AND - lon IS NOT NULL AND - numero ~ '^[0-9]{1,4}( ?[A-Z]?.*)?' AND - numero != '99999' AND - numero !~'.[0-9 .-]{9,}') -SELECT id, - numero, - voie, - code_post, - ville, - source, - lat, - lon, - geom -FROM res_avec_ordre_des_doublons -WHERE sequence = 1 -ORDER BY id diff --git a/bano/sql/export_json_dept_communes.sql b/bano/sql/export_json_dept_communes.sql deleted file mode 100644 index 05204ce..0000000 --- a/bano/sql/export_json_dept_communes.sql +++ /dev/null @@ -1,35 +0,0 @@ -WITH -rang_cp -AS -(SELECT *, - RANK() OVER(PARTITION BY insee ORDER BY ligne_5,cp) AS rang -FROM codes_postaux), -cp -AS -(SELECT insee, - cp -FROM rang_cp ---WHERE insee in (select insee from rang where rang = 1 group by 1 having count(*) > 1) -WHERE rang = 1) ---order by 1,3 -SELECT DISTINCT insee AS id, -- id - i.type, -- type - i.name, -- name - cp.cp, -- postcode - round(lat,6) AS lat, -- lat - round(lon,6) AS lon, -- lon - i.name , -- city - cd.libelle, -- departement - cr.libelle, -- region, - population, - adm_weight, - greatest(0.075,round(log((adm_weight)+log(population+1)/3)::decimal,4)) AS importance -FROM infos_communes i -JOIN cp cp -ON insee_com=insee -JOIN cog_departement cd -USING (dep) -JOIN cog_region cr -USING (reg) -WHERE i.dep = '__dept__' -ORDER BY insee; \ No newline at end of file diff --git a/bano/sql/export_json_dept_lieux_dits.sql b/bano/sql/export_json_dept_lieux_dits.sql deleted file mode 100644 index 8c0b72f..0000000 --- a/bano/sql/export_json_dept_lieux_dits.sql +++ /dev/null @@ -1,66 +0,0 @@ -WITH -sort_order -AS -(SELECT 1::integer as sortnum, 'OSM' as source -UNION ALL -SELECT 2,'BAL' -UNION ALL -SELECT 3,'CADASTRE' -), -osm -AS -(SELECT geometrie, - insee_com, - fantoir, - ld_bati, - COALESCE(libelle_osm,libelle_cadastre) libelle, - code_postal, - RANK() OVER (PARTITION BY fantoir ORDER BY sortnum) rang -FROM cumul_places -JOIN sort_order USING (source) -WHERE fantoir IS NOT NULL AND - dept = '__dept__'), -osm_postal -AS -(SELECT o.geometrie, - o.insee_com, - o.libelle, - o.fantoir, - o.ld_bati, - COALESCE(o.code_postal,pp.code_postal) code_postal, - RANK() OVER(PARTITION BY o.fantoir,COALESCE(o.code_postal,pp.code_postal) ORDER BY id) rang_postal -FROM osm o -JOIN polygones_postaux pp -ON ST_Contains(pp.geometrie, o.geometrie) -WHERE o.rang = 1), -cog -AS -(SELECT cc.com AS insee, - cd.libelle AS nom_dep, - cr.libelle AS nom_reg -FROM cog_commune cc -JOIN cog_departement cd -USING (dep) -JOIN cog_region cr -ON cc.reg = cr.reg -WHERE cc.dep = '__dept__') -SELECT o.fantoir AS id, - o.insee_com AS citycode, - 'place' AS type, - replace(replace(libelle,'\"',''),'’',chr(39)) AS name, - code_postal AS postcode, - round(st_y(o.geometrie)::numeric,6) AS lat, - round(st_x(o.geometrie)::numeric,6) AS lon, - c.name AS city, - cog.nom_dep AS departement, - cog.nom_reg AS region, - least(0.05,round((log(c.adm_weight+log(c.population+1)/3)*(0.25+0.5*(1- coalesce(o.ld_bati,1))))::numeric,4)) AS importance -FROM osm_postal o -JOIN infos_communes c -ON (c.insee_com=o.insee_com) -JOIN cog -ON o.insee_com = cog.insee -WHERE o.rang_postal = 1 -ORDER BY 1; - ---least(0.05,round(log((CASE WHEN g.statut LIKE 'Capital%' THEN 6 WHEN g.statut = 'Préfecture de régi' THEN 5 WHEN g.statut='Préfecture' THEN 4 WHEN g.statut LIKE 'Sous-pr%' THEN 3 WHEN g.statut='Chef-lieu canton' THEN 2 ELSE 1 END)+log(g.population+1)/3)*(0.25+0.5*(1-('0' || coalesce(f.ld_bati,'1'))::numeric)),4)) \ No newline at end of file diff --git a/bano/sql/export_json_dept_voies_non_rapprochees.sql b/bano/sql/export_json_dept_voies_non_rapprochees.sql deleted file mode 100644 index 18b0c08..0000000 --- a/bano/sql/export_json_dept_voies_non_rapprochees.sql +++ /dev/null @@ -1,95 +0,0 @@ -WITH -sort_order -AS -(SELECT 1::integer as sortnum, 'OSM' as source -UNION ALL -SELECT 2,'BAL' -UNION ALL -SELECT 3,'BAN' --- UNION ALL --- SELECT 4,'CADASTRE' -), -v -AS -(SELECT code_insee AS insee_com, - fantoir10 AS fantoir -FROM fantoir_voie f -WHERE code_dept = '__dept__' -EXCEPT -SELECT insee_com,fantoir -FROM cumul_voies v -WHERE dept = '__dept__'), -osm -AS -(SELECT c.geometrie, - c.insee_com, - c.voie_autre AS voie, - c.fantoir, - c.numero, - c.code_postal, - source, --- RANK() OVER (PARTITION BY c.fantoir,c.numero ORDER BY c.source DESC) rang - ROW_NUMBER() OVER (PARTITION BY fantoir,numero ORDER BY sortnum) rang - FROM cumul_adresses c - JOIN v - USING (fantoir) - JOIN sort_order - USING (source) - WHERE dept = '__dept__'), -osm_postal -AS -(SELECT o.geometrie, - o.insee_com, - o.voie, - o.fantoir, - o.numero, - COALESCE(o.code_postal,pp.code_postal) code_postal, - RANK() OVER(PARTITION BY o.fantoir,o.numero ORDER BY id) rang_postal -FROM osm o -JOIN polygones_postaux pp -ON ST_Contains(pp.geometrie, o.geometrie) -WHERE o.rang = 1), -cog -AS -(SELECT cc.com AS insee, - cd.libelle AS nom_dep, - cr.libelle AS nom_reg - FROM cog_commune cc - JOIN cog_departement cd - USING (dep) - JOIN cog_region cr - ON cc.reg = cr.reg - WHERE cd.dep = '__dept__') -SELECT osm_postal.fantoir, - osm_postal.insee_com as citycode, - 'street' as type, - replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(voie,'^IMP ','Impasse '),'^RTE ','Route '),'^ALL ','Allée '),'^PL ','Place '),'^PLA ','Place '),'^AV ','Avenue '),'^LOT ','Lotissement '),'^RES ','Résidence '),'^CHEM ','Chemin '),'^RLE ','Ruelle '),'^BD ','Boulevard '),'^SQ ','Square '),'^PAS ','Passage '),'^SEN ','Sentier '),'^CRS ','Cours '),'^TRA ','Traverse '),'^MTE ','Montée '),'^RPT ','Rond-point '),'^HAM ','Hameau '),'^VLA ','Villa '),'^PROM ','Promenade '),'^ESP ','Esplanade '),'^FG ','Faubourg '),'^TSSE ','Terrasse '),'^CTRE ','Centre '),'^PASS ','Passerelle '),'^FRM ','Ferme '),' GAL ',' Général '),' MAL ',' Maréchal '),' ST ',' Saint '),' STE ',' Sainte '),' PDT ',' Président '),' HT ',' Haut '),' HTE ',' Haute '),' VX ',' Vieux '),' PTE ',' Petite '),'\"',''),'’',chr(39)) AS name, - osm_postal.code_postal AS postcode, - round(st_y(st_centroid(st_convexhull(ST_Collect(osm_postal.geometrie))))::numeric,6) AS lat, - round(st_x(st_centroid(st_convexhull(ST_Collect(osm_postal.geometrie))))::numeric,6) AS lon, - c.name AS city, - cog.nom_dep AS departement, - cog.nom_reg AS region, - round(log(c.adm_weight+log(c.population+1)/3)::numeric*log(1+log(count(osm_postal.*)+1)+log(st_length(st_longestline(st_convexhull(ST_Collect(osm_postal.geometrie)),st_convexhull(ST_Collect(osm_postal.geometrie)))::geography)+1)+log(CASE WHEN voie like 'Boulevard%' THEN 4 WHEN voie LIKE 'Place%' THEN 4 WHEN voie LIKE 'Espl%' THEN 4 WHEN voie LIKE 'Av%' THEN 3 WHEN voie LIKE 'Rue %' THEN 2 ELSE 1 END))::numeric,4) AS importance, - string_agg(numero||'$'||st_y(osm_postal.geometrie)||'$'||st_x(osm_postal.geometrie),'#' ORDER BY numero)::text AS housenumbers2 -FROM v -JOIN infos_communes c -ON (c.insee_com=v.insee_com) -JOIN osm_postal -ON osm_postal.fantoir=v.fantoir -LEFT JOIN cog -ON v.insee_com = cog.insee -WHERE osm_postal.numero ~ '^[0-9]{1,4}( ?[A-Z]?.*)?' AND - osm_postal.numero !~'.[0-9 \\.\\-]{9,}' AND - osm_postal.numero != '99999' AND - osm_postal.rang_postal = 1 -GROUP BY osm_postal.fantoir, - osm_postal.insee_com, - osm_postal.voie, - osm_postal.code_postal, - c.name, - cog.nom_dep, - cog.nom_reg, - c.adm_weight, - c.population -ORDER BY osm_postal.fantoir; \ No newline at end of file diff --git a/bano/sql/export_json_dept_voies_rapprochees.sql b/bano/sql/export_json_dept_voies_rapprochees.sql deleted file mode 100644 index 0a49b2e..0000000 --- a/bano/sql/export_json_dept_voies_rapprochees.sql +++ /dev/null @@ -1,89 +0,0 @@ -WITH -sort_order -AS -(SELECT 1::integer as sortnum, 'OSM' as source -UNION ALL -SELECT 2,'BAL' -UNION ALL -SELECT 3,'BAN' -), -pre_osm -AS -(SELECT geometrie, - insee_com, - fantoir, - REPLACE(REPLACE(numero,'\',';'),'"','') AS numero, - code_postal, - source -FROM cumul_adresses -WHERE dept = '__dept__'), -osm -AS -(SELECT geometrie, - insee_com, - fantoir, - numero, - code_postal, - ROW_NUMBER() OVER (PARTITION BY fantoir,numero ORDER BY sortnum) rang -FROM pre_osm -JOIN sort_order USING (source)), -osm_postal -AS -(SELECT o.geometrie, - o.insee_com, --- o.voie_osm, - o.fantoir, - o.numero, - COALESCE(o.code_postal,pp.code_postal) code_postal, - RANK() OVER(PARTITION BY o.fantoir,o.numero ORDER BY id) rang_postal -FROM osm o -JOIN polygones_postaux pp -ON ST_Contains(pp.geometrie, o.geometrie) -WHERE o.rang = 1), -cog -AS -(SELECT cc.com AS insee, - cd.libelle AS nom_dep, - cr.libelle AS nom_reg - FROM cog_commune cc - JOIN cog_departement cd - USING (dep) - JOIN cog_region cr - ON cc.reg = cr.reg - WHERE cc.dep = '__dept__') -SELECT osm_postal.fantoir, --|| CASE WHEN coalesce(cp.postal_code, lp.cp)!=lp.cp THEN ('_' || cp.postal_code) ELSE '' END AS id, - osm_postal.insee_com AS citycode, - 'street' AS type, - REPLACE(REPLACE(REGEXP_REPLACE(v.voie_osm,'\t',' '),'"',chr(39)),'’',chr(39)) AS name, - osm_postal.code_postal AS postcode, - round(st_y(v.geometrie)::numeric,6) AS lat, - round(st_x(v.geometrie)::numeric,6) AS lon, - c.name AS city, - cog.nom_dep AS departement, - cog.nom_reg AS region, - round(log(c.adm_weight+log(c.population+1)/3)::numeric*log(1+log(count(osm_postal.*)+1)+log(st_length(st_longestline(st_convexhull(ST_Collect(osm_postal.geometrie)),st_convexhull(ST_Collect(osm_postal.geometrie)))::geography)+1)+log(CASE WHEN v.voie_osm like 'Boulevard%' THEN 4 WHEN v.voie_osm LIKE 'Place%' THEN 4 WHEN v.voie_osm LIKE 'Espl%' THEN 4 WHEN v.voie_osm LIKE 'Av%' THEN 3 WHEN v.voie_osm LIKE 'Rue %' THEN 2 ELSE 1 END))::numeric,4) AS importance, - string_agg(numero||'$'||st_y(osm_postal.geometrie)||'$'||st_x(osm_postal.geometrie),'#' ORDER BY numero)::text AS housenumbers -FROM cumul_voies v -JOIN infos_communes c -ON (c.insee_com=v.insee_com) -JOIN osm_postal -ON osm_postal.fantoir=v.fantoir -LEFT JOIN cog -ON v.insee_com = cog.insee -WHERE v.dept = '__dept__' AND - osm_postal.rang_postal = 1 AND - osm_postal.numero ~ '^[0-9]{1,4}( ?[A-Z]?.*)?' AND - osm_postal.numero != '99999' AND - osm_postal.numero !~'.[0-9 \\.\\-]{9,}' -GROUP BY osm_postal.fantoir, - osm_postal.insee_com, - osm_postal.code_postal, - v.voie_osm, - v.geometrie, - c.name, - cog.nom_dep, - cog.nom_reg, - c.adm_weight, - c.population -ORDER BY osm_postal.fantoir; - diff --git a/bano/sql/export_json_dept_voies_rapprochees_sans_adresses.sql b/bano/sql/export_json_dept_voies_rapprochees_sans_adresses.sql deleted file mode 100644 index 1d4d16b..0000000 --- a/bano/sql/export_json_dept_voies_rapprochees_sans_adresses.sql +++ /dev/null @@ -1,59 +0,0 @@ -WITH -set_fantoir -AS -(SELECT fantoir FROM cumul_voies WHERE dept = '__dept__' AND COALESCE(fantoir,'') != '' -EXCEPT -SELECT fantoir FROM cumul_adresses WHERE dept = '__dept__'), -osm_postal -AS -(SELECT v.geometrie, - insee_com, - fantoir, - REPLACE(REPLACE(REGEXP_REPLACE(v.voie_osm,'\t',' '),'"',chr(39)),'’',chr(39)) AS name, - pp.code_postal, - RANK() OVER(PARTITION BY v.fantoir ORDER BY id) rang_postal -FROM cumul_voies v -JOIN set_fantoir -USING (fantoir) -JOIN polygones_postaux pp -ON ST_Contains(pp.geometrie, v.geometrie)), -cog -AS -(SELECT cc.com AS insee, - cd.libelle AS nom_dep, - cr.libelle AS nom_reg -FROM cog_commune cc -JOIN cog_departement cd -USING (dep) -JOIN cog_region cr -ON cc.reg = cr.reg -WHERE cc.dep = '__dept__') -SELECT p.fantoir, - p.insee_com AS citycode, - 'street' AS type, - p.name, - p.code_postal AS postcode, - round(st_y(p.geometrie)::numeric,6) AS lat, - round(st_x(p.geometrie)::numeric,6) AS lon, - c.name AS city, - cog.nom_dep AS departement, - cog.nom_reg AS region, - round(log(c.adm_weight+log(c.population+1)/3)::numeric*log(1+log(count(p.*)+1)+log(CASE WHEN p.name like 'Boulevard%' THEN 4 WHEN p.name LIKE 'Place%' THEN 4 WHEN p.name LIKE 'Espl%' THEN 4 WHEN p.name LIKE 'Av%' THEN 3 WHEN p.name LIKE 'Rue %' THEN 2 ELSE 1 END))::numeric,4) AS importance -FROM osm_postal p -JOIN infos_communes c -USING (insee_com) -LEFT JOIN cog -ON p.insee_com = cog.insee -WHERE p.rang_postal = 1 -GROUP BY p.fantoir, - p.insee_com, - p.code_postal, - p.name, - p.geometrie, - c.name, - cog.nom_dep, - cog.nom_reg, - c.adm_weight, - c.population -ORDER BY p.fantoir; - diff --git a/bano/sql/geom_suffixes_insee.sql b/bano/sql/geom_suffixes_insee.sql deleted file mode 100644 index 8254799..0000000 --- a/bano/sql/geom_suffixes_insee.sql +++ /dev/null @@ -1,4 +0,0 @@ -SELECT ST_AsText(ST_Transform(ST_SetSRID(geometrie,4326),3857)) geometrie, - libelle_suffixe -FROM suffixe -WHERE insee_com ='__com__'; diff --git a/bano/sql/highway_bbox_insee.sql b/bano/sql/highway_bbox_insee.sql deleted file mode 100644 index b16931b..0000000 --- a/bano/sql/highway_bbox_insee.sql +++ /dev/null @@ -1,14 +0,0 @@ -SELECT DISTINCT l.name, - l.tags->'ref:FR:FANTOIR' f, - l.tags->'ref:FR:FANTOIR:left' fl, - l.tags->'ref:FR:FANTOIR:right' fr, - '' suffixe, - p.tags->'ref:INSEE' -FROM planet_osm_polygon p -JOIN planet_osm_line l -ON l.way && p.way -WHERE p.tags ? 'ref:INSEE' AND - p.tags->'ref:INSEE'='__com__' AND - l.highway IS NOT NULL AND - l.name IS NOT NULL -ORDER BY 6; diff --git a/bano/sql/highway_insee.sql b/bano/sql/highway_insee.sql deleted file mode 100644 index f3c3cf3..0000000 --- a/bano/sql/highway_insee.sql +++ /dev/null @@ -1,68 +0,0 @@ -WITH -f -AS -( SELECT pl.name as namecomp, - unnest(array[pl.name,pl.tags->'alt_name',pl.tags->'old_name']) as name, - pl.tags->'ref:FR:FANTOIR' f, - '' fl, - '' fr, - '' suffixe, - p.tags->'ref:INSEE' insee - FROM planet_osm_polygon p - JOIN planet_osm_point pl - ON pl.way && p.way AND - ST_Intersects(pl.way, p.way) - WHERE p.tags ? 'ref:INSEE' AND - p.tags->'ref:INSEE' {clause_insee} AND - pl.tags->'ref:FR:FANTOIR'!='' AND - pl.name IS NOT NULL - UNION - SELECT l.name, - unnest(array[l.name,l.tags->'alt_name',l.tags->'old_name']), - l.tags->'ref:FR:FANTOIR' f, - l.tags->'ref:FR:FANTOIR:left' fl, - l.tags->'ref:FR:FANTOIR:right' fr, - '' suffixe,p.tags->'ref:INSEE' - FROM planet_osm_polygon p - JOIN planet_osm_line l - ON ST_Intersects(l.way, p.way) - WHERE p.tags ? 'ref:INSEE' AND - p.tags->'ref:INSEE' {clause_insee} AND - l.highway IS NOT NULL AND - l.name IS NOT NULL - UNION - SELECT pl.name, - unnest(array[pl.name,pl.tags->'alt_name',pl.tags->'old_name']), - pl.tags->'ref:FR:FANTOIR' f, - '' fl, - '' fr, - '' suffixe, - p.tags->'ref:INSEE' - FROM planet_osm_polygon p - JOIN planet_osm_polygon pl - ON pl.way && p.way AND - ST_Intersects(pl.way, p.way) - WHERE p.tags ? 'ref:INSEE' AND - p.tags->'ref:INSEE' {clause_insee} AND - ( coalesce(pl.highway,pl.tags->'ref:FR:FANTOIR') IS NOT NULL OR - pl.landuse = 'residential' OR - pl.amenity = 'parking') AND - pl.name IS NOT NULL) -SELECT name, - CASE - WHEN name = namecomp THEN f - ELSE NULL - END f, - CASE - WHEN name = namecomp THEN fl - ELSE NULL - END fl, - CASE - WHEN name = namecomp THEN fr - ELSE NULL - END fr, - suffixe, - insee -FROM f -WHERE name IS NOT NULL -ORDER BY 6; \ No newline at end of file diff --git a/bano/sql/highway_relation_bbox_insee.sql b/bano/sql/highway_relation_bbox_insee.sql deleted file mode 100644 index 5d256e0..0000000 --- a/bano/sql/highway_relation_bbox_insee.sql +++ /dev/null @@ -1,13 +0,0 @@ -SELECT DISTINCT l.name, - %% r.tags, - p."ref:INSEE" ---FROM planet_osm_polygon p -FROM (SELECT ST_SetSRID(ST_Extent(way),3857) way, "ref:INSEE" FROM planet_osm_polygon WHERE "ref:INSEE" = '__com__' GROUP BY 2) p -JOIN planet_osm_line l -ON ST_Intersects(l.way,p.way) -JOIN planet_osm_rels r -ON r.osm_id = l.osm_id -WHERE --p."ref:INSEE" = '__com__' AND - l.highway != '' AND - l.name != '' -ORDER BY 3; diff --git a/bano/sql/highway_relation_insee.sql b/bano/sql/highway_relation_insee.sql deleted file mode 100644 index e22530d..0000000 --- a/bano/sql/highway_relation_insee.sql +++ /dev/null @@ -1,13 +0,0 @@ -SELECT DISTINCT l.name, - %% r.tags, - p."ref:INSEE" -FROM planet_osm_polygon p -JOIN planet_osm_line l -ON ST_Intersects(l.way, p.way) -JOIN planet_osm_rels r -ON r.osm_id = l.osm_id -WHERE p."ref:INSEE" = '__com__' AND - l.highway != '' AND - l.name != '' -ORDER BY 3; - diff --git a/bano/sql/highway_relation_suffixe_insee.sql b/bano/sql/highway_relation_suffixe_insee.sql deleted file mode 100644 index c871403..0000000 --- a/bano/sql/highway_relation_suffixe_insee.sql +++ /dev/null @@ -1,18 +0,0 @@ -INSERT INTO highway_relation_suffixe_insee -SELECT DISTINCT l.name, - r.tags, - h.libelle_suffixe, - p."ref:INSEE" -FROM planet_osm_polygon p -JOIN planet_osm_line l -ON ST_Intersects(l.way, p.way) -JOIN planet_osm_rels r -ON r.osm_id = l.osm_id -LEFT OUTER JOIN suffixe h -ON ST_Intersects(l.way, h.geometrie) -WHERE p."ref:INSEE" = '__com__' AND - l.highway != '' AND - l.name != '' ---ORDER BY 4 -; - diff --git a/bano/sql/highway_relation_suffixe_insee_bbox_nocache.sql b/bano/sql/highway_relation_suffixe_insee_bbox_nocache.sql deleted file mode 100644 index fededbb..0000000 --- a/bano/sql/highway_relation_suffixe_insee_bbox_nocache.sql +++ /dev/null @@ -1,30 +0,0 @@ -SELECT DISTINCT l.name, - r.tags, - h.libelle_suffixe, - p."ref:INSEE" -FROM planet_osm_polygon p -JOIN planet_osm_line l -ON l.way && p.way -JOIN planet_osm_rels r -ON r.osm_id = l.osm_id -LEFT OUTER JOIN suffixe h -ON ST_Intersects(l.way, h.geometrie) -WHERE p."ref:INSEE" = '__com__' AND - l.highway != '' AND - l.name != '' -UNION ALL -SELECT DISTINCT l.name, - r.tags, - h.libelle_suffixe, - p."ref:INSEE" -FROM planet_osm_polygon p -JOIN planet_osm_polygon l -ON l.way && p.way -JOIN planet_osm_rels r -ON r.osm_id = l.osm_id -LEFT OUTER JOIN suffixe h -ON ST_Intersects(l.way, h.geometrie) -WHERE p."ref:INSEE" = '__com__' AND - l.highway != '' AND - l.name != ''; - diff --git a/bano/sql/highway_relation_suffixe_insee_nocache.sql b/bano/sql/highway_relation_suffixe_insee_nocache.sql deleted file mode 100644 index fb1cac0..0000000 --- a/bano/sql/highway_relation_suffixe_insee_nocache.sql +++ /dev/null @@ -1,30 +0,0 @@ -SELECT DISTINCT l.name, - r.tags, - h.libelle_suffixe, - p."ref:INSEE" -FROM planet_osm_polygon p -JOIN planet_osm_line l -ON ST_Intersects(l.way, p.way) -JOIN planet_osm_rels r -ON r.osm_id = l.osm_id -LEFT OUTER JOIN suffixe h -ON ST_Intersects(l.way, h.geometrie) -WHERE p."ref:INSEE" = '__com__' AND - l.highway != '' AND - l.name != '' -UNION ALL -SELECT DISTINCT l.name, - r.tags, - h.libelle_suffixe, - p."ref:INSEE" -FROM planet_osm_polygon p -JOIN planet_osm_polygon l -ON ST_Intersects(l.way, p.way) -JOIN planet_osm_rels r -ON r.osm_id = l.osm_id -LEFT OUTER JOIN suffixe h -ON ST_Intersects(l.way, h.geometrie) -WHERE p."ref:INSEE" = '__com__' AND - l.highway != '' AND - l.name != ''; - diff --git a/bano/sql/highway_suffixe_insee.sql b/bano/sql/highway_suffixe_insee.sql deleted file mode 100644 index 5278abc..0000000 --- a/bano/sql/highway_suffixe_insee.sql +++ /dev/null @@ -1,49 +0,0 @@ -INSERT INTO highway_suffixe_insee -SELECT pl.name, - pl."ref:FR:FANTOIR" f, - '' fl, - '' fr, - h.libelle_suffixe, - p."ref:INSEE" -FROM planet_osm_polygon p -JOIN planet_osm_point pl -ON pl.way && p.way AND - ST_Intersects(pl.way, p.way) -LEFT OUTER JOIN suffixe h -ON ST_Intersects(pl.way, h.geometrie) -WHERE p."ref:INSEE" = '__com__' AND - pl."ref:FR:FANTOIR" !='' AND - pl.name != '' -UNION -SELECT l.name, - l.tags->'ref:FR:FANTOIR' f, - l.tags->'ref:FR:FANTOIR:left' fl, - l.tags->'ref:FR:FANTOIR:right' fr, - h.libelle_suffixe, - p."ref:INSEE" -FROM planet_osm_polygon p -JOIN planet_osm_line l -ON ST_Intersects(l.way, p.way) -LEFT OUTER JOIN suffixe h -ON ST_Intersects(l.way, h.geometrie) -WHERE p."ref:INSEE" = '__com__' AND - l.highway != '' AND - l.name != '' -UNION -SELECT pl.name, - pl."ref:FR:FANTOIR" f, - pl."ref:FR:FANTOIR:left" fl, - pl."ref:FR:FANTOIR:right" fr, - h.libelle_suffixe, - p."ref:INSEE" -FROM planet_osm_polygon p -JOIN planet_osm_polygon pl -ON pl.way && p.way AND - ST_Intersects(pl.way, p.way) -LEFT OUTER JOIN suffixe h -ON ST_Intersects(pl.way, h.geometrie) -WHERE p."ref:INSEE" = '__com__' AND - (pl.highway||pl."ref:FR:FANTOIR" != '' OR - pl.landuse = 'residential' OR - pl.amenity = 'parking') AND - pl.name != ''; diff --git a/bano/sql/highway_suffixe_insee_nocache.sql b/bano/sql/highway_suffixe_insee_nocache.sql deleted file mode 100644 index 6f72ec9..0000000 --- a/bano/sql/highway_suffixe_insee_nocache.sql +++ /dev/null @@ -1,48 +0,0 @@ -SELECT pl.name, - pl."ref:FR:FANTOIR" f, - '' fl, - '' fr, - h.libelle_suffixe, - p."ref:INSEE" -FROM planet_osm_polygon p -JOIN planet_osm_point pl -ON pl.way && p.way AND - ST_Intersects(pl.way, p.way) -LEFT OUTER JOIN suffixe h -ON ST_Intersects(pl.way, h.geometrie) -WHERE p."ref:INSEE" = '__com__' AND - pl."ref:FR:FANTOIR" !='' AND - pl.name != '' -UNION -SELECT l.name, - l.tags->'ref:FR:FANTOIR' f, - l.tags->'ref:FR:FANTOIR:left' fl, - l.tags->'ref:FR:FANTOIR:right' fr, - h.libelle_suffixe, - p."ref:INSEE" -FROM planet_osm_polygon p -JOIN planet_osm_line l -ON ST_Intersects(l.way, p.way) -LEFT OUTER JOIN suffixe h -ON ST_Intersects(l.way, h.geometrie) -WHERE p."ref:INSEE" = '__com__' AND - l.highway != '' AND - l.name != '' -UNION -SELECT pl.name, - pl."ref:FR:FANTOIR" f, - pl."ref:FR:FANTOIR:left" fl, - pl."ref:FR:FANTOIR:right" fr, - h.libelle_suffixe, - p."ref:INSEE" -FROM planet_osm_polygon p -JOIN planet_osm_polygon pl -ON pl.way && p.way AND - ST_Intersects(pl.way, p.way) -LEFT OUTER JOIN suffixe h -ON ST_Intersects(pl.way, h.geometrie) -WHERE p."ref:INSEE" = '__com__' AND - (pl.highway||pl."ref:FR:FANTOIR" != '' OR - pl.landuse = 'residential' OR - pl.amenity = 'parking') AND - pl.name != ''; diff --git a/bano/sql/hsnr_bbox_insee.sql b/bano/sql/hsnr_bbox_insee.sql deleted file mode 100644 index c5c1826..0000000 --- a/bano/sql/hsnr_bbox_insee.sql +++ /dev/null @@ -1,50 +0,0 @@ -INSERT INTO hsnr_bbox_insee -SELECT ST_X(pt_geo), - ST_Y(pt_geo), - provenance, - osm_id, - hsnr, - street_name, - tags, - ''::text suffixe, - insee_com -FROM --- point dans relation associatedStreet - (SELECT 3 provenance, - ST_Transform(pt.way,4326) pt_geo, - pt.osm_id, - pt."addr:housenumber" hsnr, - null::text street_name, - r.tags tags, - p."ref:INSEE" insee_com --- FROM planet_osm_polygon p - FROM (SELECT ST_SetSRID(ST_Extent(way),3857) way, "ref:INSEE" FROM planet_osm_polygon WHERE "ref:INSEE" = '__com__' GROUP BY 2) p - JOIN planet_osm_point pt - ON ST_Intersects(pt.way,p.way) - JOIN planet_osm_rels r - ON r.osm_id = pt.osm_id - WHERE p."ref:INSEE" = '__com__' AND - pt."addr:housenumber" != '' - UNION --- way dans relation associatedStreet - SELECT 4, - ST_Transform(ST_Centroid(w.way),4326), - w.osm_id, - w."addr:housenumber", - null, - r.tags, - p."ref:INSEE" --- FROM planet_osm_polygon p - FROM (SELECT ST_SetSRID(ST_Extent(way),3857) way, "ref:INSEE" FROM planet_osm_polygon WHERE "ref:INSEE" = '__com__' GROUP BY 2) p - JOIN planet_osm_polygon w - ON ST_Intersects(w.way, p.way) - JOIN planet_osm_rels r - ON r.osm_id = w.osm_id - WHERE p."ref:INSEE" = '__com__' AND - w."addr:housenumber" != '' -)a ---ORDER BY 9 --- where hsnr is not null*/ -; - - \ No newline at end of file diff --git a/bano/sql/hsnr_bbox_insee_nocache.sql b/bano/sql/hsnr_bbox_insee_nocache.sql deleted file mode 100644 index 61825ab..0000000 --- a/bano/sql/hsnr_bbox_insee_nocache.sql +++ /dev/null @@ -1,46 +0,0 @@ -SELECT ST_X(pt_geo), - ST_Y(pt_geo), - provenance, - osm_id, - hsnr, - street_name, - tags, - ''::text suffixe, - insee_com -FROM --- point dans relation associatedStreet - (SELECT 3 provenance, - ST_Transform(pt.way,4326) pt_geo, - pt.osm_id, - pt."addr:housenumber" hsnr, - null::text street_name, - r.tags tags, - p."ref:INSEE" insee_com --- FROM planet_osm_polygon p - FROM (SELECT ST_SetSRID(ST_Extent(way),3857) way, "ref:INSEE" FROM planet_osm_polygon WHERE "ref:INSEE" = '__com__' GROUP BY 2) p - JOIN planet_osm_point pt - ON ST_Intersects(pt.way,p.way) - JOIN planet_osm_rels r - ON r.osm_id = pt.osm_id - WHERE p."ref:INSEE" = '__com__' AND - pt."addr:housenumber" != '' - UNION --- way dans relation associatedStreet - SELECT 4, - ST_Transform(ST_Centroid(w.way),4326), - w.osm_id, - w."addr:housenumber", - null, - r.tags, - p."ref:INSEE" --- FROM planet_osm_polygon p - FROM (SELECT ST_SetSRID(ST_Extent(way),3857) way, "ref:INSEE" FROM planet_osm_polygon WHERE "ref:INSEE" = '__com__' GROUP BY 2) p - JOIN planet_osm_polygon w - ON ST_Intersects(w.way, p.way) - JOIN planet_osm_rels r - ON r.osm_id = w.osm_id - WHERE p."ref:INSEE" = '__com__' AND - w."addr:housenumber" != '' -)a; - - \ No newline at end of file diff --git a/bano/sql/hsnr_insee.sql b/bano/sql/hsnr_insee.sql deleted file mode 100644 index d505320..0000000 --- a/bano/sql/hsnr_insee.sql +++ /dev/null @@ -1,77 +0,0 @@ -INSERT INTO hsnr_insee -select ST_X(pt_geo), - ST_Y(pt_geo), - provenance, - osm_id, - hsnr, - street_name, - tags, - ''::text suffixe, - insee_com -FROM --- point avec addr:street - (SELECT 1 provenance, - ST_Transform(pt.way,4326) pt_geo, - pt.osm_id, - pt."addr:housenumber" hsnr, - pt."addr:street" street_name, - null::hstore tags, - p."ref:INSEE" insee_com - FROM planet_osm_polygon p - JOIN planet_osm_point pt - ON ST_Intersects(pt.way, p.way) - WHERE p."ref:INSEE" = '__com__' AND - pt."addr:housenumber" != '' AND - pt."addr:street" != '' - UNION --- way avec addr:street - SELECT 2, - ST_Transform(ST_Centroid(w.way),4326), - w.osm_id, - w."addr:housenumber", - w."addr:street", - null::hstore tags, - p."ref:INSEE" - FROM planet_osm_polygon p - JOIN planet_osm_polygon w - ON ST_Intersects(w.way, p.way) - WHERE p."ref:INSEE" = '__com__' AND - w."addr:housenumber" != '' AND - w."addr:street" != '' - UNION --- point dans relation associatedStreet - SELECT 3, - ST_Transform(pt.way,4326), - pt.osm_id, - pt."addr:housenumber", - null, - r.tags, - p."ref:INSEE" - FROM planet_osm_polygon p - JOIN planet_osm_point pt - ON ST_Intersects(pt.way, p.way) - JOIN planet_osm_rels r - ON r.osm_id = pt.osm_id - WHERE p."ref:INSEE" = '__com__' AND - pt."addr:housenumber" != '' - UNION --- way dans relation associatedStreet - SELECT 4, - ST_Transform(ST_Centroid(w.way),4326), - w.osm_id, - w."addr:housenumber", - null, - r.tags, - p."ref:INSEE" - FROM planet_osm_polygon p - JOIN planet_osm_polygon w - ON ST_Intersects(w.way, p.way) - JOIN planet_osm_rels r - ON r.osm_id = w.osm_id - WHERE p."ref:INSEE" = '__com__' AND - w."addr:housenumber" != '' -)a ---ORDER BY 9 --- where hsnr is not null*/ -; - diff --git a/bano/sql/hsnr_insee_nocache.sql b/bano/sql/hsnr_insee_nocache.sql deleted file mode 100644 index 63aa03a..0000000 --- a/bano/sql/hsnr_insee_nocache.sql +++ /dev/null @@ -1,73 +0,0 @@ -select ST_X(pt_geo), - ST_Y(pt_geo), - provenance, - osm_id, - hsnr, - street_name, - tags, - ''::text suffixe, - insee_com -FROM --- point avec addr:street - (SELECT 1 provenance, - ST_Transform(pt.way,4326) pt_geo, - pt.osm_id, - pt."addr:housenumber" hsnr, - pt."addr:street" street_name, - null::hstore tags, - p."ref:INSEE" insee_com - FROM planet_osm_polygon p - JOIN planet_osm_point pt - ON ST_Intersects(pt.way, p.way) - WHERE p."ref:INSEE" = '__com__' AND - pt."addr:housenumber" != '' AND - pt."addr:street" != '' - UNION --- way avec addr:street - SELECT 2, - ST_Transform(ST_Centroid(w.way),4326), - w.osm_id, - w."addr:housenumber", - w."addr:street", - null::hstore tags, - p."ref:INSEE" - FROM planet_osm_polygon p - JOIN planet_osm_polygon w - ON ST_Intersects(w.way, p.way) - WHERE p."ref:INSEE" = '__com__' AND - w."addr:housenumber" != '' AND - w."addr:street" != '' - UNION --- point dans relation associatedStreet - SELECT 3, - ST_Transform(pt.way,4326), - pt.osm_id, - pt."addr:housenumber", - null, - r.tags, - p."ref:INSEE" - FROM planet_osm_polygon p - JOIN planet_osm_point pt - ON ST_Intersects(pt.way, p.way) - JOIN planet_osm_rels r - ON r.osm_id = pt.osm_id - WHERE p."ref:INSEE" = '__com__' AND - pt."addr:housenumber" != '' - UNION --- way dans relation associatedStreet - SELECT 4, - ST_Transform(ST_Centroid(w.way),4326), - w.osm_id, - w."addr:housenumber", - null, - r.tags, - p."ref:INSEE" - FROM planet_osm_polygon p - JOIN planet_osm_polygon w - ON ST_Intersects(w.way, p.way) - JOIN planet_osm_rels r - ON r.osm_id = w.osm_id - WHERE p."ref:INSEE" = '__com__' AND - w."addr:housenumber" != '' -)a; - diff --git a/bano/sql/hsnr_suffixe_insee.sql b/bano/sql/hsnr_suffixe_insee.sql deleted file mode 100644 index 322734a..0000000 --- a/bano/sql/hsnr_suffixe_insee.sql +++ /dev/null @@ -1,76 +0,0 @@ -select ST_X(ST_Transform(a.pt_geo,4326))::character varying, - ST_Y(ST_Transform(a.pt_geo,4326))::character varying, - a.provenance::character varying, - a.osm_id::character varying, - a.hsnr, - a.street_name, - a.tags, - h.libelle_suffixe, - a.insee_com -FROM --- point avec addr:street - (SELECT 1 provenance, - pt.way pt_geo, - pt.osm_id::character varying, - pt."addr:housenumber" hsnr, - pt."addr:street" street_name, - ARRAY[]::character[] tags, - p."ref:INSEE" insee_com - FROM planet_osm_polygon p - JOIN planet_osm_point pt - ON ST_Intersects(pt.way, p.way) - WHERE p."ref:INSEE" = '__com__' AND - pt."addr:housenumber" != ''AND - pt."addr:street" !='' - UNION --- way avec addr:street - SELECT 2, - ST_Centroid(w.way), - w.osm_id::character varying, - w."addr:housenumber", - w."addr:street", - ARRAY[]::character[], - p."ref:INSEE" - FROM planet_osm_polygon p - JOIN planet_osm_polygon w - ON ST_Intersects(w.way, p.way) - WHERE p."ref:INSEE" = '__com__' AND - w."addr:housenumber" !='' AND - w.tags->'addr:street' != '' - UNION --- point dans relation associatedStreet - SELECT 3, - pt.way, - pt.osm_id::character varying, - pt."addr:housenumber", - null, - %% r.tags, - p."ref:INSEE" - FROM planet_osm_polygon p - JOIN planet_osm_point pt - ON ST_Intersects(pt.way, p.way) - JOIN planet_osm_rels r - ON r.osm_id = pt.osm_id - WHERE p."ref:INSEE" = '__com__' AND - pt."addr:housenumber" != '' - UNION --- way dans relation associatedStreet - SELECT 4, - ST_Centroid(w.way), - w.osm_id::character varying, - w."addr:housenumber", - null, - %% r.tags, - p."ref:INSEE" - FROM planet_osm_polygon p - JOIN planet_osm_polygon w - ON ST_Intersects(w.way, p.way) - JOIN planet_osm_rels r - ON r.osm_id = w.osm_id - WHERE p."ref:INSEE" = '__com__' AND - w."addr:housenumber" != '' - -)a -LEFT OUTER JOIN suffixe h -ON ST_Intersects(a.pt_geo, h.geometrie); - diff --git a/bano/sql/lieux_dits_post_process.sql b/bano/sql/lieux_dits_post_process.sql deleted file mode 100644 index bb66770..0000000 --- a/bano/sql/lieux_dits_post_process.sql +++ /dev/null @@ -1,32 +0,0 @@ -DELETE FROM cadastre_2_place WHERE insee_com like '__dept__%'; -WITH -fantoir -AS -(SELECT DISTINCT code_insee||id_voie||cle_rivoli AS fantoir, - TRIM(BOTH FROM nature_voie||' '||libelle_voie) AS nom, - code_insee AS insee_com -FROM fantoir_voie -WHERE code_insee LIKE '__dept__%' AND - type_voie = '3'), -lieux_dits -AS -(SELECT ST_Centroid(geometrie) AS geometrie, - regexp_replace(regexp_replace(nom,' ',' ','g'),' ',' ','g') AS nom, - insee_com -FROM lieux_dits -WHERE insee_com LIKE '__dept__%' AND - nom IS NOT NULL ), -mix -AS -(SELECT l.*, - f.fantoir -FROM lieux_dits l -LEFT OUTER JOIN fantoir f -USING (nom,insee_com)) -INSERT INTO cadastre_2_place -SELECT geometrie, - nom, - fantoir, - insee_com -FROM mix; -COMMIT; \ No newline at end of file diff --git a/bano/sql/load_code_cadastre.sql b/bano/sql/load_code_cadastre.sql deleted file mode 100644 index 31da241..0000000 --- a/bano/sql/load_code_cadastre.sql +++ /dev/null @@ -1,23 +0,0 @@ -TRUNCATE TABLE tmp_code_cadastre; -COPY tmp_code_cadastre (dept,cadastre_dept,nom_com,code_postal,cadastre_com,format_cadastre) FROM './code_cadastre.csv' WITH CSV DELIMITER ','; -UPDATE tmp_code_cadastre SET insee_com = dept||substr(cadastre_com,3,3) WHERE length(dept) = 2; -UPDATE tmp_code_cadastre SET insee_com = dept||substr(cadastre_com,4,2) WHERE length(dept) = 3; -UPDATE tmp_code_cadastre SET date_maj = (SELECT to_char(n,'YYMMDD')::integer FROM (SELECT now() AS n)a); - -DELETE FROM code_cadastre -WHERE insee_com IN (SELECT insee_com FROM tmp_code_cadastre WHERE format_cadastre = 'VECT' AND nom_com != '' - INTERSECT - SELECT insee_com FROM code_cadastre WHERE format_cadastre = 'IMAG'); -DELETE FROM code_cadastre -WHERE insee_com IN (SELECT insee_com - FROM (SELECT insee_com,cadastre_com FROM tmp_code_cadastre WHERE format_cadastre = 'VECT' AND nom_com != '' - EXCEPT - SELECT insee_com,cadastre_com FROM code_cadastre)i); -INSERT INTO code_cadastre -SELECT t.* -FROM tmp_code_cadastre t -LEFT OUTER JOIN code_cadastre c -ON t.insee_com = c.insee_com -WHERE c.insee_com IS NULL AND - t.nom_com != ''; -COMMIT; diff --git a/bano/sql/place_insee.sql b/bano/sql/place_insee.sql deleted file mode 100644 index ba416fd..0000000 --- a/bano/sql/place_insee.sql +++ /dev/null @@ -1,29 +0,0 @@ ---DELETE FROM place_insee WHERE insee_com = '__com__'; -WITH -a AS -(SELECT ST_Transform(pt.way,4326) pt_geo, - pt.place, - pt.name, - pt."ref:FR:FANTOIR" fantoir, - pt.tags, - p."ref:INSEE" insee_com - FROM planet_osm_polygon p - JOIN planet_osm_point pt - ON ST_Intersects(pt.way, p.way) - WHERE p."ref:INSEE" = '__com__' AND - (pt.place != '' OR - (pt.railway !='' AND pt."ref:FR:FANTOIR" != '') OR - (pt.amenity !='' AND pt."ref:FR:FANTOIR" != '') ) AND - pt.name != '') ---INSERT INTO place_insee -SELECT ST_X(pt_geo), - ST_Y(pt_geo), - place, - name, - fantoir, - '0', --ld_bati - tags, - insee_com--, - -- round(extract(epoch from now())) -FROM a; ---COMMIT; \ No newline at end of file diff --git a/bano/sql/point_par_rue_complement_insee.sql b/bano/sql/point_par_rue_complement_insee.sql deleted file mode 100644 index df17a59..0000000 --- a/bano/sql/point_par_rue_complement_insee.sql +++ /dev/null @@ -1,34 +0,0 @@ -INSERT INTO point_par_rue_complement_insee -SELECT ST_X(ST_Transform(ST_SetSRID(way,3857),4326)), - ST_Y(ST_Transform(ST_SetSRID(way,3857),4326)), - name, - fantoir, - suffixe, - insee -FROM (SELECT pl.way, - pl.name, - pl."ref:FR:FANTOIR" fantoir, - '' suffixe, - p."ref:INSEE" insee - FROM planet_osm_polygon p - JOIN planet_osm_point pl - ON pl.way && p.way AND - ST_Intersects(pl.way, p.way) - WHERE p."ref:INSEE" = '__com__' AND - pl."ref:FR:FANTOIR" != '' AND - pl.name != '' - UNION - SELECT ST_Centroid(pl.way), - pl.name, - pl."ref:FR:FANTOIR" f, - '' suffixe, - p."ref:INSEE" - FROM planet_osm_polygon p - JOIN planet_osm_polygon pl - ON pl.way && p.way AND - ST_Intersects(pl.way, p.way) - WHERE p."ref:INSEE" = '__com__' AND - ( pl.highway||pl."ref:FR:FANTOIR" != '' OR - pl.landuse = 'residential') AND - pl.name != '')a -ORDER BY 6; \ No newline at end of file diff --git a/bano/sql/point_par_rue_complement_insee_nocache.sql b/bano/sql/point_par_rue_complement_insee_nocache.sql deleted file mode 100644 index 37b6fc1..0000000 --- a/bano/sql/point_par_rue_complement_insee_nocache.sql +++ /dev/null @@ -1,35 +0,0 @@ -SELECT ST_X(ST_Transform(ST_SetSRID(way,3857),4326)), - ST_Y(ST_Transform(ST_SetSRID(way,3857),4326)), - name, - fantoir, - suffixe, - insee -FROM (SELECT pl.way, - pl.name, - pl."ref:FR:FANTOIR" fantoir, - '' suffixe, - p."ref:INSEE" insee - FROM planet_osm_polygon p - JOIN planet_osm_point pl - ON pl.way && p.way AND - ST_Intersects(pl.way, p.way) - WHERE p."ref:INSEE" = '__com__' AND - (pl."ref:FR:FANTOIR" != '' OR - pl.junction != '') AND - pl.name != '' - UNION - SELECT ST_Centroid(pl.way), - pl.name, - pl."ref:FR:FANTOIR" f, - '' suffixe, - p."ref:INSEE" - FROM planet_osm_polygon p - JOIN planet_osm_polygon pl - ON pl.way && p.way AND - ST_Intersects(pl.way, p.way) - WHERE p."ref:INSEE" = '__com__' AND - ( pl.highway||pl."ref:FR:FANTOIR" != '' OR - pl.landuse = 'residential' OR - pl.place = 'square' OR - pl.amenity = 'school') AND - pl.name != '')a; \ No newline at end of file diff --git a/bano/sql/point_par_rue_insee.sql b/bano/sql/point_par_rue_insee.sql deleted file mode 100644 index 321b348..0000000 --- a/bano/sql/point_par_rue_insee.sql +++ /dev/null @@ -1,52 +0,0 @@ -WITH -w0 -AS -(SELECT l.way, - unnest(array[l.name,l.tags->'alt_name',l.tags->'old_name']) as name, - p."ref:INSEE" as insee, - ST_Within(l.way,p.way)::integer as within - FROM planet_osm_polygon p - JOIN planet_osm_line l - ON ST_Intersects(l.way, p.way) - WHERE p."ref:INSEE" = '__com__' AND - (l.highway != '' OR - l.waterway = 'dam') AND - l.name != ''), -w1 -AS -(SELECT * -FROM w0 -WHERE name IS NOT NULL), -w2 -AS -(SELECT *,rank() OVER(PARTITION BY name,insee ORDER BY within DESC) rang -FROM w1), -wl -AS -(SELECT ST_LineMerge(st_collect(way)) way, - name, - insee - FROM w2 - WHERE rang = 1 - GROUP BY 2,3), -wp -AS -(SELECT ST_Centroid(ST_LineMerge(ST_Collect(way))) way, - name - FROM w2 - WHERE rang = 1 - GROUP BY 2), -a -AS -(SELECT ST_Transform(ST_SetSRID(ST_ClosestPoint(wl.way,wp.way),3857),4326) point, - wl.name, - wl.insee -FROM wl -JOIN wp -ON wl.name = wp.name) -INSERT INTO point_par_rue_insee -SELECT ST_x(point), - ST_y(point), - name, - insee -FROM a; diff --git a/bano/sql/point_par_rue_insee_nocache.sql b/bano/sql/point_par_rue_insee_nocache.sql deleted file mode 100644 index efc0136..0000000 --- a/bano/sql/point_par_rue_insee_nocache.sql +++ /dev/null @@ -1,51 +0,0 @@ -WITH -w0 -AS -(SELECT l.way, - unnest(array[l.name,l.tags->'alt_name',l.tags->'old_name']) as name, - p."ref:INSEE" as insee, - ST_Within(l.way,p.way)::integer as within - FROM planet_osm_polygon p - JOIN planet_osm_line l - ON ST_Intersects(l.way, p.way) - WHERE p."ref:INSEE" = '__com__' AND - (l.highway != '' OR - l.waterway = 'dam') AND - l.name != ''), -w1 -AS -(SELECT * -FROM w0 -WHERE name IS NOT NULL), -w2 -AS -(SELECT *,rank() OVER(PARTITION BY name,insee ORDER BY within DESC) rang -FROM w1), -wl -AS -(SELECT ST_LineMerge(st_collect(way)) way, - name, - insee - FROM w2 - WHERE rang = 1 - GROUP BY 2,3), -wp -AS -(SELECT ST_Centroid(ST_LineMerge(ST_Collect(way))) way, - name - FROM w2 - WHERE rang = 1 - GROUP BY 2), -a -AS -(SELECT ST_Transform(ST_SetSRID(ST_ClosestPoint(wl.way,wp.way),3857),4326) point, - wl.name, - wl.insee -FROM wl -JOIN wp -ON wl.name = wp.name) -SELECT ST_x(point), - ST_y(point), - name, - insee -FROM a; diff --git a/bano/sql/replace_batiments.sql b/bano/sql/replace_batiments.sql deleted file mode 100644 index 510f4ab..0000000 --- a/bano/sql/replace_batiments.sql +++ /dev/null @@ -1,17 +0,0 @@ -SELECT :'dept'||'%' AS deptlike -\gset -DELETE FROM :schema_cible.batiments WHERE insee_com LIKE :'deptlike'; -INSERT INTO :schema_cible.batiments (insee_com, - nom, - type_batiment, - created, - updated, - geometrie) -SELECT commune, - nom, - "type", - created, - updated, - geometrie -FROM tmp_batiments:dept; -DROP TABLE tmp_batiments:dept; \ No newline at end of file diff --git a/bano/sql/replace_lieux_dits.sql b/bano/sql/replace_lieux_dits.sql deleted file mode 100644 index a9a2829..0000000 --- a/bano/sql/replace_lieux_dits.sql +++ /dev/null @@ -1,16 +0,0 @@ -SELECT :'dept'||'%' AS deptlike -\gset -DELETE FROM :schema_cible.lieux_dits WHERE insee_com LIKE :'deptlike'; -INSERT INTO :schema_cible.lieux_dits (insee_com, - nom, - created, - updated, - geometrie) -SELECT commune, - --nom, - regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(nom,' ',' ','g'),' ',' ','g'),' ',' ','g'),' ',' ','g'),' ',' ','g'), - created, - updated, - geometrie -FROM tmp_lieux_dits:dept; -DROP TABLE tmp_lieux_dits:dept; \ No newline at end of file diff --git a/bano/sql/type_highway_insee.sql b/bano/sql/type_highway_insee.sql deleted file mode 100644 index 845c824..0000000 --- a/bano/sql/type_highway_insee.sql +++ /dev/null @@ -1,34 +0,0 @@ -INSERT INTO type_highway_insee -SELECT pl.name, - pl.highway, - p."ref:INSEE" -FROM planet_osm_polygon p -JOIN planet_osm_point pl - ON pl.way && p.way AND - ST_Intersects(pl.way, p.way) - WHERE p."ref:INSEE" = '__com__' AND - pl.highway != '' AND - pl.name != '' -UNION -SELECT l.name, - l.highway, - p."ref:INSEE" - FROM planet_osm_polygon p - JOIN planet_osm_line l - ON ST_Intersects(l.way, p.way) - WHERE p."ref:INSEE" = '__com__' AND - l.highway != '' AND - l.name != '' -UNION -SELECT pl.name, - pl.highway, - p."ref:INSEE" - FROM planet_osm_polygon p - JOIN planet_osm_polygon pl - ON pl.way && p.way AND - ST_Intersects(pl.way, p.way) - WHERE p."ref:INSEE" = '__com__' AND - pl.highway != '' AND - pl.name != '' ---ORDER BY 1 -; diff --git a/bano/sql/update_table_rep_b_as_bis.sql b/bano/sql/update_table_rep_b_as_bis.sql deleted file mode 100644 index f05bb2c..0000000 --- a/bano/sql/update_table_rep_b_as_bis.sql +++ /dev/null @@ -1,29 +0,0 @@ -DROP TABLE IF EXISTS rep_b_as_bis CASCADE; -CREATE TABLE rep_b_as_bis -AS -SELECT id_fantoir, - numero -FROM ban_odbl -WHERE rep = 'b' - -EXCEPT - -(SELECT id_fantoir, - numero -FROM ban_odbl -WHERE rep = 'a' - -UNION - -SELECT id_fantoir, - numero -FROM ban_odbl -WHERE rep = 'c' - -UNION - -SELECT id_fantoir, - numero -FROM ban_odbl -WHERE rep = 'd'); -COMMIT; \ No newline at end of file diff --git a/detection_suffixe.py b/detection_suffixe.py deleted file mode 100755 index 16118b6..0000000 --- a/detection_suffixe.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python -# coding: UTF-8 - -import suffixe_depuis_noms_de_voies as a -import sys -import os,os.path -import subprocess - -if len(sys.argv) != 2: - print('Mauvais nombre d\'arguments') - print('USAGE : ./detection_suffixe.py ') - os._exit(0) - -clause_where = '' -if sys.argv[1].upper() != 'FRANCE': - num_dept_cadastre = ('000'+sys.argv[1])[-3:] - clause_where = 'AND c.cadastre_dept = \'{:s}\''.format(num_dept_cadastre) - -pgc = a.get_pgc() -str_query = 'SELECT DISTINCT c.insee_com,c.nom_com,c.cadastre_dept FROM code_cadastre c WHERE c.format_cadastre = \'VECT\' {:s} ORDER BY 3,2;'.format(clause_where) -cur = pgc.cursor() -cur.execute(str_query) -for c in cur: - print(c[0]+' '+c[1]) - a.main(['','{:s}'.format(c[0])]) diff --git a/load_COG.sh b/load_COG.sh deleted file mode 100755 index edf07c7..0000000 --- a/load_COG.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash - -source config -cd $DATA_DIR -mkdir -p cog -cd cog -rm *.csv -wget -NS https://www.insee.fr/fr/statistiques/fichier/5057840/cog_ensemble_2021_csv.zip -unzip -o -j cog_ensemble_2021_csv.zip - -CSV_COMMUNE=commune2021.csv -CSV_CANTON=canton2021.csv -CSV_ARRONDISSEMENT=arrondissement2021.csv -CSV_DEPARTEMENT=departement2021.csv -CSV_REGION=region2021.csv - -for d in osm cadastre -do - psql -d ${d} -U cadastre -f $BANO_DIR/sql/create_table_cog.sql - cat ${CSV_COMMUNE}| psql -d ${d} -U cadastre -c "TRUNCATE TABLE cog_commune; COPY cog_commune FROM stdin WITH csv header delimiter ',';" - cat ${CSV_CANTON}| psql -d ${d} -U cadastre -c "TRUNCATE TABLE cog_canton; COPY cog_canton FROM stdin WITH csv header delimiter ',';" - cat ${CSV_ARRONDISSEMENT}|psql -d ${d} -U cadastre -c "TRUNCATE TABLE cog_arrondissement;COPY cog_arrondissement FROM stdin WITH csv header delimiter ',';" - cat ${CSV_DEPARTEMENT}| psql -d ${d} -U cadastre -c "TRUNCATE TABLE cog_departement; COPY cog_departement FROM stdin WITH csv header delimiter ',';" - cat ${CSV_REGION}| psql -d ${d} -U cadastre -c "TRUNCATE TABLE cog_region; COPY cog_region FROM stdin WITH csv header delimiter ',';" -done diff --git a/load_code_cadastre.py b/load_code_cadastre.py deleted file mode 100755 index 9ba15ce..0000000 --- a/load_code_cadastre.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python - -import urllib.request -from http.cookiejar import CookieJar -import time -import os -from bs4 import BeautifulSoup - -a_depts = [] -for n in list(range(1, 20))+['2A', '2B']+list(range(21, 96))+list(range(971, 975)) + [976]: - a_depts.append(str(n).rjust(2, '0')) -a_0_depts = [a.rjust(3, '0') for a in a_depts] -f_output = open('code_cadastre.csv', 'w') -cookiejar = CookieJar() -opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cookiejar)) -request = urllib.request.Request( - 'https://www.cadastre.gouv.fr/scpc/rechercherPlan.do') -response = opener.open(request) -token = response.read().decode().split('CSRF_TOKEN=')[1].split('"')[0] - -for i, d in enumerate(a_0_depts): - fname = 'dept_'+d+'.xml' - time.sleep(1) - opener.open(request) - request2 = urllib.request.Request( - "https://www.cadastre.gouv.fr/scpc/listerCommune.do?CSRF_TOKEN={:s}&codeDepartement={:s}&libelle=&keepVolatileSession=&offset=5000".format(token, d)) - response = opener.open(request2) - rep = response.read() - fw = open(fname, 'wb') - fw.write(rep) - fw.close() - fr = BeautifulSoup(open(fname, 'rb').read(), "lxml") - - for e in fr.find_all(attrs={"class": "parcelles"}): - y = e.find(title="Ajouter au panier") - if y is None: - continue - - # y.get('onclick') structure: "ajoutArticle('CL098','VECT','COMU');" - split = y.get('onclick').split("'") - code_commune = split[1] - format_type = split[3] - - # e.strong.string structure: "COBONNE (26400) " - commune_cp = e.strong.string - nom_commune = commune_cp[:-9] - cp = commune_cp[-7:-2] - # try: - f_output.write('{:s},{:s},{:s},{:s},{:s},{:s}\n'.format( - a_depts[i], d, nom_commune, cp, code_commune, format_type)) - # except: - # import pdb; pdb.Pdb().set_trace() - f_output.flush() - os.remove(fname) -f_output.close() diff --git a/load_code_cadastre.sh b/load_code_cadastre.sh deleted file mode 100755 index e584582..0000000 --- a/load_code_cadastre.sh +++ /dev/null @@ -1,2 +0,0 @@ -python load_code_cadastre.py -psql -d cadastre -U cadastre -f sql/load_code_cadastre.sql diff --git a/load_codes_postaux.sh b/load_codes_postaux.sh deleted file mode 100755 index e382070..0000000 --- a/load_codes_postaux.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -source config -cd $DATA_DIR -mkdir -p codes_postaux -cd codes_postaux -rm *csv* -wget https://datanova.legroupe.laposte.fr/explore/dataset/laposte_hexasmal/download/?format=csv&timezone=Europe/Berlin&use_labels_for_header=true&csv_separator=%3B -mv *csv* codes_postaux.csv -psql -d osm -U cadastre -f $BANO_DIR/sql/create_and_load_codes_postaux.sql -psql -d cadastre -U cadastre -f $BANO_DIR/sql/create_and_load_codes_postaux.sql diff --git a/load_cumul.py b/load_cumul.py deleted file mode 100755 index 1382b6f..0000000 --- a/load_cumul.py +++ /dev/null @@ -1,161 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import addr_2_db as a -import log_2_file as e -import outils_de_gestion as o -import sys -import time -import os,os.path -import codecs - -os.umask(0000) - -# Wrap sys.stdout with a writer that knows how to handle encoding -# Unicode data. -# wrapped_stdout = codecs.getwriter('UTF-8')(sys.stdout) -# Replace sys.stdout with a writer -# sys.stdout = wrapped_stdout - - -def get_code_dept_from_insee(insee): - code_dept = insee[0:2] - if insee[0:2] == '97': - code_dept = insee[0:3] - return code_dept -# def get_geom_suffixes(dept): -# fq = open('sql/geom_suffixes_insee.sql','r') -# str_query = fq.read().replace("='__com__'"," LIKE '{:s}'".format(get_sql_like_dept_string(dept))) -# cursor_bano_ro = pgc.cursor() -# cursor_bano_ro.execute(str_query) -# a_queries = [] -# for l in cursor_bano_ro : -# a_queries.append("SELECT ST_PolygonFromText('{:s}',3857) as geom,'{:s}'::text suffixe".format(l[0],l[1].replace('\'','\'\''))) -# cursor_bano_ro.close() -# return ' UNION '.join(a_queries) -# def warning_message_no_suffixe(dept,etape): -# print("Pas de commune à suffixe dans le {}. Etape {} ignorée".format(dept,etape.upper())) - -def get_data_by_dept_from_pg(query_name,dept): - current_time = round(time.time()) - etape_dept = 'cache_dept_'+query_name -# Cache gardé 1h - if o.age_etape_dept(etape_dept,dept) > 3600 : - print(u'Mise à jour du cache "{:s}"'.format(query_name.upper())) - batch_id = o.batch_start_log(source,etape_dept,dept) - fq = open('sql/{:s}.sql'.format(query_name),'r') - str_query = fq.read().replace(" = '__com__'"," LIKE '{:s}'".format(get_sql_like_dept_string(dept))) - # if suffixe_data : - # str_query = str_query.replace('__suffixe_data__',suffixe_data) - # print(str_query) - fq.close() - cur_osm_ro = pgcl.cursor() - cur_osm_ro.execute(str_query) - - list_output = list() - for lt in cur_osm_ro : - list_values = list() - for item in list(lt): - if item == None: - list_values.append('null') - elif type(item) == str : - list_values.append("'{}'".format(item.replace("'","''").replace('"',''))) - elif type(item) == list : - if (len(item)) > 0 : - list_values.append("hstore(ARRAY{})".format(str([s.replace("'","''").replace('"','') for s in item]))) - else : - list_values.append('null') - else : - list_values.append(str(item)) - list_values.append(str(current_time)) - - str_values = ','.join(list_values).replace('"',"'") - list_output.append(str_values) - cur_osm_ro.close() - cur_cache_rw = pgcl.cursor() - str_query = "DELETE FROM {} WHERE insee_com LIKE '{}';".format(query_name,get_sql_like_dept_string(dept)) - cur_cache_rw.execute(str_query) - if len(list_output) > 0 : - str_query = "INSERT INTO {} VALUES ({});COMMIT;".format(query_name,'),('.join(list_output)) - strq = open('./query.txt','w') - strq.write(str_query) - strq.close() - cur_cache_rw.execute(str_query) - cur_cache_rw.close() - o.batch_end_log(0,batch_id) - -str_usage = 'USAGE : python load_cumul.py ' -if len(sys.argv) != 3: - print("Mauvais nombre d'arguments") - print(str_usage) - os._exit(0) -source = sys.argv[2].upper() -if source != 'OSM' and source != 'CADASTRE' and source != 'BAL': - print("La source doit etre OSM, CADASTRE ou BAL") - print(str_usage) - os._exit(0) - -clause_vecteur = '' -if source == 'CADASTRE': - clause_vecteur = " AND format_cadastre = 'VECT' " - -pgc = a.get_pgc() -pgcl = a.get_pgc_osm() - -if sys.argv[1].upper() == 'FRANCE': - loop_query = 'SELECT DISTINCT dept FROM code_cadastre ORDER BY 1;' -else: - num_dept_cadastre = sys.argv[1] - loop_query = "SELECT '{:s}';".format(num_dept_cadastre) - -cur_loop = pgc.cursor() -cur_loop.execute(loop_query) -for c_loop in cur_loop: - num_dept_cadastre = c_loop[0] - global f_log - f_log = e.start_log_to_file(source,os.path.basename(sys.argv[0]).split('.')[0],num_dept_cadastre) - print('## Département {:s}'.format(num_dept_cadastre)) - # geom_suffixe = get_geom_suffixes(num_dept_cadastre) - if source == 'OSM': - get_data_by_dept_from_pg('hsnr_insee',num_dept_cadastre) - # if geom_suffixe : - get_data_by_dept_from_pg('hsnr_suffixe_insee',num_dept_cadastre) - # else : - # warning_message_no_suffixe(num_dept_cadastre,'hsnr_suffixe_insee') - get_data_by_dept_from_pg('hsnr_bbox_insee',num_dept_cadastre) - get_data_by_dept_from_pg('point_par_rue_insee',num_dept_cadastre) - get_data_by_dept_from_pg('point_par_rue_complement_insee',num_dept_cadastre) - get_data_by_dept_from_pg('type_highway_insee',num_dept_cadastre) - # os._exit(0) - - get_data_by_dept_from_pg('highway_insee',num_dept_cadastre) - # if geom_suffixe : - get_data_by_dept_from_pg('highway_suffixe_insee',num_dept_cadastre) - # else : - # warning_message_no_suffixe(num_dept_cadastre,'highway_suffixe_insee') - get_data_by_dept_from_pg('highway_bbox_insee',num_dept_cadastre) - get_data_by_dept_from_pg('highway_relation_insee',num_dept_cadastre) - # if geom_suffixe : - get_data_by_dept_from_pg('highway_relation_suffixe_insee',num_dept_cadastre) - # else : - # warning_message_no_suffixe(num_dept_cadastre,'highway_relation_suffixe_insee') - get_data_by_dept_from_pg('highway_relation_bbox_insee',num_dept_cadastre) - # os._exit(0) - - clause_vecteur = '' - if source == 'CADASTRE': - clause_vecteur = " AND format_cadastre = 'VECT' " - str_query = "SELECT insee_com,nom_com FROM code_cadastre WHERE dept = '{:s}' {:s} ORDER BY 2;".format(num_dept_cadastre,clause_vecteur) - cur = pgc.cursor() - cur.execute(str_query) - for c in cur: - print('{:s} - {:s}'.format(c[0],c[1])) - try: - a.main(['',c[0],source,True]) - except : - e.write_log_to_file(f_log,'Commune : {:s}\n'.format(c[1])) - e.write_log_to_file(f_log,str(sys.exc_info()[0])) - e.write_log_to_file(f_log,str(sys.exc_info()[1])) - e.write_sep_to_file(f_log) - e.end_log_to_file(f_log,True) - diff --git a/load_cumul_place.py b/load_cumul_place.py deleted file mode 100755 index 572ceae..0000000 --- a/load_cumul_place.py +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import place_2_db as a -import log_2_file as e -import outils_de_gestion as o -import sys -import os,os.path - -# Wrap sys.stdout with a writer that knows how to handle encoding -# Unicode data. -# wrapped_stdout = codecs.getwriter('UTF-8')(sys.stdout) -# Replace sys.stdout with a writer -# sys.stdout = wrapped_stdout - -def get_sql_like_dept_string(dept): - return (dept+'___')[0:5] - -def get_code_dept_from_insee(insee): - code_dept = insee[0:2] - if insee[0:2] == '97': - code_dept = insee[0:3] - return code_dept -def get_data_by_dept_from_pg(query_name,dept): - if dept[0:1] == '0': - dept = dept[1:] - etape_dept = 'cache_dept_'+query_name -# Cache gardé 1h - if o.age_etape_dept(etape_dept,dept) > 1 : #3600: - print(u'Mise à jour du cache "{:s}"'.format(query_name.upper())) - batch_id = o.batch_start_log('',etape_dept,dept) - fq = open('sql/{:s}.sql'.format(query_name),'r') - str_query = fq.read().replace(" = '__com__'"," LIKE '{:s}'".format(get_sql_like_dept_string(dept))) - fq.close() - pgc = a.get_pgc_osm() - cur = pgc.cursor() - cur.execute(str_query) - o.batch_end_log(0,batch_id) - -str_usage = 'USAGE : python load_cumul_place.py ' -if len(sys.argv) != 2: - print('Mauvais nombre d\'arguments') - print(str_usage) - os._exit(0) - -if sys.argv[1].upper() == 'FRANCE': - loop_query = 'SELECT DISTINCT dept FROM code_cadastre ORDER BY 1;' -else: - num_dept_cadastre = sys.argv[1] - loop_query = "SELECT '{:s}';".format(num_dept_cadastre) - -pgc = a.get_pgc() -cur_loop = pgc.cursor() -cur_loop.execute(loop_query) -for c_loop in cur_loop: - num_dept_cadastre = c_loop[0] - global f_log - f_log = e.start_log_to_file('',os.path.basename(sys.argv[0]).split('.')[0],num_dept_cadastre) - print('## Département {:s}'.format(num_dept_cadastre)) - - get_data_by_dept_from_pg('place_insee',num_dept_cadastre) - get_data_by_dept_from_pg('cadastre_2_place',num_dept_cadastre) - - str_query = "SELECT insee_com,nom_com FROM code_cadastre WHERE dept = '{:s}' ORDER BY 2;".format(num_dept_cadastre) - cur = pgc.cursor() - cur.execute(str_query) - for c in cur: - print('{:s} - {:s}'.format(c[0],c[1])) - try: - a.main(['',c[0],True]) - except : - e.write_log_to_file(f_log,"Commune : {:s}\n".format(c[1])) - e.write_log_to_file(f_log,str(sys.exc_info()[0])) - e.write_log_to_file(f_log,str(sys.exc_info()[1])) - e.write_sep_to_file(f_log) - e.end_log_to_file(f_log,True) - diff --git a/load_fantoir.sh b/load_fantoir.sh deleted file mode 100755 index 17fd538..0000000 --- a/load_fantoir.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -source config -cd $DATA_DIR -mkdir -p fantoir -cd fantoir -rm -rf FANTOIR* && rm *.zip -wget https://data.economie.gouv.fr/api/datasets/1.0/fichier-fantoir-des-voies-et-lieux-dits/attachments/fichier_national_fantoir_situation_avril_2022_zip/ -O fantoir.zip -unzip fantoir.zip -mv FANTOIR* fantoir.txt -psql -d osm -U cadastre -f $BANO_DIR/sql/load_fantoir.sql -psql -d cadastre -U cadastre -f $BANO_DIR/sql/load_fantoir.sql diff --git a/load_osm_france_db.sh b/load_osm_france_db.sh deleted file mode 100755 index 527fd46..0000000 --- a/load_osm_france_db.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -source $SCRIPT_DIR/config - -lockfile=${SCRIPT_DIR}/imposm.lock - -if test -f ${lockfile} -then - echo `date`" : Process deja en cours" >> $SCRIPT_DIR/cron.log - exit 0 -fi - -touch ${lockfile} - -cd $DOWNLOAD_DIR -wget -NS http://download.openstreetmap.fr/extracts/merge/france_metro_dom_com_nc.osm.pbf -imposm import -mapping $BANO_DIR/bano.yml -read $DOWNLOAD_DIR/france_metro_dom_com_nc.osm.pbf -overwritecache -cachedir $IMPOSM_CACHE_DIR -dbschema-import public -diff -diffdir $DOWNLOAD_DIR -imposm import -mapping $BANO_DIR/bano.yml -write -connection 'postgis://cadastre@localhost/osm'?prefix=NONE -cachedir $IMPOSM_CACHE_DIR -dbschema-import public -diff -diffdir $DOWNLOAD_DIR - -psql -d osm -U cadastre -f $BANO_DIR/sql/finalisation.sql - -cp $DOWNLOAD_DIR/last.state.txt $DOWNLOAD_DIR/state.txt -rm ${lockfile} diff --git a/load_population_INSEE.sh b/load_population_INSEE.sh deleted file mode 100755 index 1db15b2..0000000 --- a/load_population_INSEE.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash - -# Après téléchargement manuel d'un CSV à https://statistiques-locales.insee.fr/#c=indicator&i=pop_legales.popmun&s=2017&view=map1 - -CSV=/data/project/bano_data/insee/population_communale_2017.csv - -for d in osm cadastre -do - psql -d ${d} -U cadastre -f $BANO_DIR/sql/create_table_population_insee.sql - cat ${CSV}| grep -v 'N/A'|psql -d ${d} -U cadastre -c "TRUNCATE TABLE population_insee; COPY population_insee FROM stdin WITH csv delimiter ';';" -done diff --git a/munin/bano_main b/munin/bano_main deleted file mode 100755 index 8afc069..0000000 --- a/munin/bano_main +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/sh - -# script pour graphes munin de la BANO sur le décompte global des adresses - -case $1 in - config) - cat <<'EOM' -graph_title BANO - Contenu -graph_vlabel BANO -graph_category bano -cumul.label Cumul toutes sources -dedup.label Sortie dedupliquee BANO .csv/.shp -cadastre.label Source cadastre -match.label Source cadastre rapprochee avec OSM -nomatch.label Source cadastre non rapprochee avec OSM -osm.label Source OSM -opendata.label Source opendata -EOM - exit 0;; -esac - -psql cadastre -c "select concat('cumul.value ',count(*)::text) from cumul_adresses;" -At -echo "dedup.value `head /data/work/bano.openstreetmap.fr/bano-data/comptage.txt -n 1`" -#psql cadastre -c "select concat('dedup.value ',count(*)::text) from (select fantoir,num from (select *,replace(replace(replace(replace(replace(replace(replace(regexp_replace(upper(numero),'^0*',''),'BIS','B'),'TER','T'),'QUATER','Q'),'QUAT','Q'),' ',''),'à','-'),';',',') as num from cumul_adresses where fantoir is not null and source in ('CADASTRE','OSM')) as a group by fantoir,num) as f;" -At -psql cadastre -c "select concat('cadastre.value ',count(*)::text) from cumul_adresses where source='CADASTRE';" -At -psql cadastre -c "select concat('match.value ',count(*)::text) from cumul_adresses where source='CADASTRE' and voie_osm != '';" -At -psql cadastre -c "select concat('nomatch.value ',count(*)::text) from cumul_adresses where source='CADASTRE' and (voie_osm = '' or voie_osm is null);" -At -psql cadastre -c "select concat('osm.value ',count(*)::text) from cumul_adresses where source='OSM';" -At -psql cadastre -c "select concat('opendata.value ',count(*)::text) from cumul_adresses where source like 'OD%';" -At diff --git a/munin/bano_rapproche b/munin/bano_rapproche deleted file mode 100755 index 7a7e07a..0000000 --- a/munin/bano_rapproche +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/sh - -case $1 in - config) - cat <<'EOM' -graph_title BANO - Rapprochements -graph_vlabel Nb de voies -graph_category bano -match.label Voies rapprochees -nomatch.label Voies non rapprochees -EOM - exit 0;; -esac - -psql cadastre -c "select concat('match.value ',count(*)::text) from (select count(*), fantoir from cumul_adresses where source='CADASTRE' and voie_osm!='' group by fantoir) as f" -At -psql cadastre -c "select concat('nomatch.value ',count(*)::text) from (select count(*), fantoir from cumul_adresses where source='CADASTRE' and (voie_osm='' or voie_osm is null) group by fantoir) as f" -At - - diff --git a/out/LICENCE.txt b/out/LICENCE.txt deleted file mode 100644 index 57e29fa..0000000 --- a/out/LICENCE.txt +++ /dev/null @@ -1,24 +0,0 @@ -Ces données sont issues du crowdsourcing effectué par les contributeurs au projet OpenStreetMap et sont sous licence ODbL et la mention d'attribution obligatoire doit être "© les contributeurs d'OpenStreetMap sous licence ODbL" conformément à http://osm.org/copyright - - -Ceci est le résumé explicatif de la licence ODbL 1.0. Merci de lire l'avertissement ci-dessous. - -Vous êtes libres : -- De partager : copier, distribuer et utiliser la base de données. -- De créer : produire des créations à partir de cette base de données. -- D'adapter : modifier, transformer et construire à partir de cette base de données. - -Aussi longtemps que : -- Vous mentionnez la paternité : vous devez mentionner la source de la base de données pour toute utilisation publique de la base de données, ou pour toute création produite à partir de la base de données, de la manière indiquée dans l'ODbL. Pour toute utilisation ou redistribution de la base de données, ou création produite à partir de cette base de données, vous devez clairement mentionner aux tiers la licence de la base de données et garder intacte toute mention légale sur la base de données originaire. - -- Vous partagez aux conditions identiques : si vous utilisez publiquement une version adaptée de cette base de données, ou que vous produisiez une création à partir d'une base de données adaptée, vous devez aussi offrir cette base de données adaptée selon les termes de la licence ODbL. - -- Gardez ouvert : si vous redistribuez la base de données, ou une version modifiée de celle-ci, alors vous ne pouvez utiliser de mesure technique restreignant la création que si vous distribuez aussi une version sans ces restrictions. - -Avertissement - -Le résumé explicatif n'est pas un contrat, mais simplement une source pratique pour faciliter la compréhension de la version complète de la licence ODbL 1.0 — il exprime en termes courants les principales notions juridiques du contrat. Ce résumé explicatif n'a pas de valeur juridique, son contenu n'apparaît pas sous cette forme dans le contrat. Seul le texte complet du contrat de licence fait loi. - -Le texte complet du contrat de licence (en anglais) est disponible sur: http://opendatacommons.org/licenses/odbl/1.0/ - -Une traduction en français est disponible sur http://www.vvlibri.org/fr/licence/odbl/10/fr/legalcode \ No newline at end of file diff --git a/out/banout-all.sh b/out/banout-all.sh deleted file mode 100755 index 09df3eb..0000000 --- a/out/banout-all.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -parallel -j 8 sh banout.sh > /dev/null -- 01 02 03 04 05 06 07 08 09 `seq 10 19` 2A 2B `seq 21 95` `seq 971 974` 976 - -# fichier global France -for d in 01 02 03 04 05 06 07 08 09 `seq 10 19` 2A 2B `seq 21 95` `seq 971 974` 976; do cat /data/project/bano.openstreetmap.fr/web/data/bano-$d.csv >> full.csv ; done -gzip -9 full.csv -mv full.csv.gz /data/project/bano.openstreetmap.fr/web/data/ - diff --git a/out/banout-json-all.sh b/out/banout-json-all.sh deleted file mode 100755 index f8c7aa0..0000000 --- a/out/banout-json-all.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -# script écrit et maintenu par cquest@openstreetmap.fr - -# export multi-thread de cumul_voie + cumul_adresse + cumul_place en json -parallel -j 8 sh banout-json.sh -- 01 02 03 04 05 06 07 08 09 `seq 10 19` 2A 2B `seq 21 95` `seq 971 974` 976 - -# fusion en un fichier unique -rm -f full.sjson* -for dep in {01..19} 2A 2B {21..95} {971..974} 976 ; do grep -v ^$ bano-$dep.json | gzip -9 >> full.sjson.gz ; gzip -9 bano-$dep.json ; done - -# compression et copie sur le site web -sudo mv *json.gz /data/project/bano.openstreetmap.fr/web/data/ - -# ménage final -rm bano-*.json diff --git a/out/banout-json.sh b/out/banout-json.sh deleted file mode 100644 index ff62cc7..0000000 --- a/out/banout-json.sh +++ /dev/null @@ -1,161 +0,0 @@ -# script écrit et maintenu par cquest@openstreetmap.fr - -OUTPUT=bano-$1.json -# dédoublement des adresses multiple OSM (séparées par ';' '-' ',' ou 'à') -psql cadastre -q -c "insert into cumul_adresses select geometrie, trim( both from regexp_split_to_table(numero,';|-|à|,')), voie_cadastre, voie_osm, fantoir, insee_com, cadastre_com, dept, code_postal, source, batch_import_id, voie_fantoir from cumul_adresses where numero ~ ';|-|à|,' and insee_com like '$1%' and source='OSM';" -psql cadastre -q -c "delete from cumul_adresses where numero ~ ';|-|à|,' and insee_com like '$1%' and source='OSM';" - -rm -f $OUTPUT - - -echo "`date +%H:%M:%S` Communes $1" -# export des communes -psql cadastre -t -A -c " \ -SELECT '{\"id\": \"' || insee || '\",\"type\": \"' || CASE WHEN population<1 THEN 'village' WHEN population<'10' THEN 'town' ELSE 'city' END || '\",\"name\": \"' || g.nom || '\",\"postcode\": \"' || ca.code_postal || '\",\"lat\": \"' || round(lat_chf,6) || '\",\"lon\": \"' || round(lon_chf,6) || '\",\"city\": \"' || g.nom|| '\",\"departement\": \"' || cog.nom_dep || '\", \"region\": \"' || cog.nom_reg || '\", \"population\": ' || population || ', \"adm_weight\": ' || CASE WHEN statut LIKE 'Capital%' THEN 6 WHEN statut = 'Préfecture de régi' THEN 5 WHEN statut='Préfecture' THEN 4 WHEN statut LIKE 'Sous-pr%' THEN 3 WHEN statut='Chef-lieu canton' THEN 2 ELSE 1 END || ', \"importance\": ' || greatest(0.075,round(log((CASE WHEN statut LIKE 'Capital%' THEN 6 WHEN statut = 'Préfecture de régi' THEN 5 WHEN statut='Préfecture' THEN 4 WHEN statut LIKE 'Sous-pr%' THEN 3 WHEN statut='Chef-lieu canton' THEN 2 ELSE 1 END)+log(population+1)/3),4)) || '}' \ -FROM geofla_plus g join code_cadastre ca on (ca.insee_com=insee) join (select dep, nom_dep, nom_reg from cog group by dep,nom_dep, nom_reg) as cog on (cog.dep=left(insee,2) or cog.dep=left(insee,3)) WHERE insee like '$1%' order by insee; \ -" | grep id >> $OUTPUT - - -echo "`date +%H:%M:%S` Voie non rapprochées $1"; -# export fantoir_voie (pour les voies non rapprochées) + cumul_adresse (ponctuel adresse) > json -for com in `psql cadastre -c "SELECT insee FROM communes where insee like '$1%';" -A -t`; do psql cadastre -t -A -c " \ -SET client_min_messages TO WARNING; WITH v as (select code_insee as insee_com, code_insee || id_voie || cle_rivoli as fantoir from fantoir_voie f left join cumul_voies v on (v.insee_com = '$com' AND f.code_insee=v.insee_com and v.fantoir = code_insee || id_voie || cle_rivoli) where v.fantoir is null and code_insee = '$com') -SELECT '{\"id\": \"' || osm.fantoir || CASE WHEN coalesce(cp.postal_cod, cad.code_postal)!=cad.code_postal THEN ('_' || cp.postal_cod) ELSE '' END || '\",\"citycode\":\"' || left(osm.fantoir,5) || '\",\"type\": \"street\",\"name\": \"' || replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(regexp_replace(max(case when osm.voie_osm !='' then osm.voie_osm else osm.voie_cadastre end),'^IMP ','Impasse '),'^RTE ','Route '),'^ALL ','Allée '),'^PL ','Place '),'^PLA ','Place '),'^AV ','Avenue '),'^LOT ','Lotissement '),'^RES ','Résidence '),'^CHEM ','Chemin '),'^RLE ','Ruelle '),'^BD ','Boulevard '),'^SQ ','Square '),'^PAS ','Passage '),'^SEN ','Sentier '),'^CRS ','Cours '),'^TRA ','Traverse '),'^MTE ','Montée '),'^RPT ','Rond-point '),'^HAM ','Hameau '),'^VLA ','Villa '),'^PROM ','Promenade '),'^ESP ','Esplanade '),'^FG ','Faubourg '),'^TSSE ','Terrasse '),'^CTRE ','Centre '),'^PASS ','Passerelle '),'^FRM ','Ferme '),' GAL ',' Général '),' MAL ',' Maréchal '),' ST ',' Saint '),' STE ',' Sainte '),' PDT ',' Président '),' HT ',' Haut '),' HTE ',' Haute '),' VX ',' Vieux '),' PTE ',' Petite '),'\"',''),'’',chr(39)) || '\",\"postcode\": \"' || coalesce(cp.postal_cod, min(lp.cp), cad.code_postal) || CASE WHEN replace(lower(cp.nom),'-',' ') != replace(lower(c.nom),'-',' ') THEN '\",\"post_office\": \"' || cp.nom ELSE '' END || '\",\"lat\": \"' || round(st_y(st_centroid(st_convexhull(ST_Collect(osm.geometrie))))::numeric,6) || '\",\"lon\": \"' || round(st_x(st_centroid(st_convexhull(ST_Collect(osm.geometrie))))::numeric,6) || '\",\"city\": \"' || c.nom || '\",\"departement\": \"' || cog.nom_dep || '\", \"region\": \"' || cog.nom_reg || '\",\"importance\": '|| round(log((CASE WHEN (cad.code_postal LIKE '75%' OR g.statut LIKE 'Capital%') THEN 6 WHEN (cad.code_postal LIKE '690%' OR cad.code_postal LIKE '130%' OR g.statut = 'Préfecture de régi') THEN 5 WHEN g.statut='Préfecture' THEN 4 WHEN g.statut LIKE 'Sous-pr%' THEN 3 WHEN g.statut='Chef-lieu canton' THEN 2 ELSE 1 END)+log(g.population+1)/3)::numeric*log(1+log(count(osm.*)+1)+log(st_length(st_longestline(st_convexhull(ST_Collect(osm.geometrie)),st_convexhull(ST_Collect(osm.geometrie)))::geography)+1)+log(CASE WHEN max(osm.voie_cadastre) like 'Boulevard%' THEN 4 WHEN max(osm.voie_cadastre) LIKE 'Place%' THEN 4 WHEN max(osm.voie_cadastre) LIKE 'Espl%' THEN 4 WHEN max(osm.voie_cadastre) LIKE 'Av%' THEN 3 WHEN max(osm.voie_cadastre) LIKE 'Rue %' THEN 2 ELSE 1 END))::numeric,4) ||' ,\"housenumbers\":' || concat('{',string_agg(DISTINCT concat('\"',replace(replace(replace(osm.numero,' ',''),'\"',''),'\\',''),'\": {\"lat\": ',round(st_y(osm.geometrie)::numeric,6),',\"lon\": ',round(st_x(osm.geometrie)::numeric,6),'}'), ','),'}}') AS sjson -FROM v -LEFT JOIN cumul_adresses osm ON (osm.fantoir=v.fantoir) -JOIN communes c ON (c.insee=v.insee_com) -JOIN code_cadastre cad ON (cad.insee_com=v.insee_com) -JOIN - (SELECT fantoir, - replace(numero,' ','') AS num, - max(SOURCE) AS src - FROM cumul_adresses - WHERE fantoir like '$com%' - GROUP BY 1, - 2) AS b ON (b.fantoir=osm.fantoir - AND osm.SOURCE=b.src - AND b.num=replace(osm.numero,' ','')) -LEFT JOIN (select dep, nom_dep, nom_reg from cog group by dep, nom_dep, nom_reg) as cog ON (cog.dep=left(v.insee_com,2) or cog.dep=left(v.insee_com,3)) -LEFT JOIN geofla_plus g ON (g.insee=v.insee_com) -LEFT JOIN laposte_cp lp ON (lp.insee=v.insee_com) -LEFT JOIN postal_code cp ON (cp.insee=v.insee_com AND ST_Contains(cp.wkb_geometry, osm.geometrie)) -WHERE osm.fantoir IS NOT NULL - AND osm.numero ~ '^[0-9]{1,4}( ?[A-Z]?.*)?' - AND osm.numero !~'.[0-9 \\.\\-]{9,}' - AND cad.insee_com = '$com' - AND c.insee = '$com' - AND g.insee = '$com' -GROUP BY osm.fantoir, - cad.code_postal, cp.postal_cod, - c.nom, cp.nom, - cog.nom_dep, - cog.nom_reg, - g.statut, - g.population -ORDER BY osm.fantoir; -" >> $OUTPUT; done - -echo "`date +%H:%M:%S` Voie rapprochées $1" -# export cumul_voie (position centre de voirie) + cumul_adresse (ponctuel adresse) > json -for com in `psql cadastre -c "SELECT insee FROM communes where insee like '$1%';" -A -t`; do psql cadastre -t -A -c " \ -SET client_min_messages TO WARNING; SELECT '{\"id\": \"' || v.fantoir || CASE WHEN coalesce(cp.postal_cod, cad.code_postal)!=cad.code_postal THEN ('_' || cp.postal_cod) ELSE '' END || '\",\"citycode\": \"' || left(v.fantoir,5) || '\",\"type\": \"street\",\"name\": \"' || replace(replace(v.voie_osm,'\"',''),'’',chr(39)) || '\",\"postcode\": \"' || coalesce(cp.postal_cod, min(lp.cp), cad.code_postal) || CASE WHEN replace(lower(cp.nom),'-',' ') != replace(lower(c.nom),'-',' ') THEN '\",\"post_office\": \"' || cp.nom ELSE '' END || '\",\"lat\": \"' || round(st_y(v.geometrie)::numeric,6) || '\",\"lon\": \"' || round(st_x(v.geometrie)::numeric,6) || '\",\"city\": \"' || c.nom || '\",\"departement\": \"' || cog.nom_dep || '\", \"region\": \"' || cog.nom_reg || '\",\"importance\": '|| round(log((CASE WHEN (cad.code_postal LIKE '75%' OR g.statut LIKE 'Capital%') THEN 6 WHEN (cad.code_postal LIKE '690%' OR cad.code_postal LIKE '130%' OR g.statut = 'Préfecture de régi') THEN 5 WHEN g.statut='Préfecture' THEN 4 WHEN g.statut LIKE 'Sous-pr%' THEN 3 WHEN g.statut='Chef-lieu canton' THEN 2 ELSE 1 END)+log(g.population+1)/3)::numeric*log(1+log(count(a.*)+1)+log(st_length(st_longestline(st_convexhull(ST_Collect(a.geometrie)),st_convexhull(ST_Collect(a.geometrie)))::geography)+1)+log(CASE WHEN v.voie_osm like 'Boulevard%' THEN 4 WHEN v.voie_osm LIKE 'Place%' THEN 4 WHEN v.voie_osm LIKE 'Espl%' THEN 4 WHEN v.voie_osm LIKE 'Av%' THEN 3 WHEN v.voie_osm LIKE 'Rue %' THEN 2 ELSE 1 END))::numeric,4) ||' ,\"housenumbers\":' || concat('{',string_agg(DISTINCT concat('\"',replace(replace(replace(a.numero,' ',''),'\"',''),'\\',''),'\": {\"lat\": ',round(st_y(a.geometrie)::numeric,6),',\"lon\": ',round(st_x(a.geometrie)::numeric,6),'}'), ','),'}}') AS sjson -FROM cumul_voies v -JOIN communes c ON (insee=insee_com) -JOIN code_cadastre cad ON (cad.insee_com=v.insee_com) -LEFT JOIN cumul_adresses a ON (a.fantoir=v.fantoir) -JOIN - (SELECT fantoir, - replace(numero,' ','') AS num, - max(SOURCE) AS src - FROM cumul_adresses - WHERE fantoir LIKE '$com%' - GROUP BY 1, - 2) AS b ON (b.fantoir=a.fantoir - AND a.SOURCE=b.src - AND b.num=replace(a.numero,' ','')) -LEFT JOIN (select dep, nom_dep, nom_reg from cog group by dep, nom_dep, nom_reg) as cog ON (cog.dep=left(v.insee_com,2) or cog.dep=left(v.insee_com,3)) -LEFT JOIN geofla_plus g ON (g.insee=v.insee_com) -LEFT JOIN laposte_cp lp ON (lp.insee=v.insee_com) -LEFT JOIN postal_code cp ON (cp.insee=v.insee_com AND ST_Contains(cp.wkb_geometry, a.geometrie)) -WHERE v.fantoir LIKE '$com%' - AND a.numero ~ '^[0-9]{1,4}( ?[A-Z]?.*)?' - AND a.numero !~'.[0-9 \\.\\-]{9,}' - AND cad.insee_com = '$com' - AND c.insee = '$com' - AND g.insee = '$com' -GROUP BY v.fantoir, - v.voie_osm, - cad.code_postal, cp.postal_cod, - v.geometrie, - c.nom, cp.nom, - cog.nom_dep, - cog.nom_reg, - g.statut, - g.population -ORDER BY v.fantoir; -" >> $OUTPUT; done - - -echo "`date +%H:%M:%S` Voie rapprochées sans adresses $1" -# export cumul_voie (position centre de voirie) > json -psql cadastre -t -A -c " \ -SET client_min_messages TO WARNING; SELECT '{\"id\": \"' || v.fantoir || CASE WHEN coalesce(cp.postal_cod, cad.code_postal)!=cad.code_postal THEN ('_' || cp.postal_cod) ELSE '' END || '\",\"citycode\": \"' || left(v.fantoir,5) || '\",\"type\": \"street\",\"name\": \"' || replace(replace(v.voie_osm,'\"',''),'’',chr(39)) || '\",\"postcode\": \"' || coalesce(cp.postal_cod, min(lp.cp), cad.code_postal) || CASE WHEN replace(lower(cp.nom),'-',' ') != replace(lower(c.nom),'-',' ') THEN '\",\"post_office\": \"' || cp.nom ELSE '' END || '\",\"lat\": \"' || round(st_y(v.geometrie)::numeric,6) || '\",\"lon\": \"' || round(st_x(v.geometrie)::numeric,6) || '\",\"city\": \"' || c.nom || '\",\"departement\": \"' || cog.nom_dep || '\", \"region\": \"' || cog.nom_reg || '\",\"importance\": '|| round(log((CASE WHEN (cad.code_postal LIKE '75%' OR g.statut LIKE 'Capital%') THEN 6 WHEN (cad.code_postal LIKE '690%' OR cad.code_postal LIKE '130%' OR g.statut = 'Préfecture de régi') THEN 5 WHEN g.statut='Préfecture' THEN 4 WHEN g.statut LIKE 'Sous-pr%' THEN 3 WHEN g.statut='Chef-lieu canton' THEN 2 ELSE 1 END)+log(g.population+1)/3)::numeric*log(1+log(count(a.*)+1)+log(CASE WHEN v.voie_osm like 'Boulevard%' THEN 4 WHEN v.voie_osm LIKE 'Place%' THEN 4 WHEN v.voie_osm LIKE 'Espl%' THEN 4 WHEN v.voie_osm LIKE 'Av%' THEN 3 WHEN v.voie_osm LIKE 'Rue %' THEN 2 ELSE 1 END))::numeric,4) ||' }' AS sjson -FROM cumul_voies v -JOIN communes c ON (insee=insee_com) -JOIN code_cadastre cad ON (cad.insee_com=v.insee_com) -LEFT JOIN cumul_adresses a ON (a.fantoir=v.fantoir) -LEFT JOIN (select dep, nom_dep, nom_reg from cog group by dep, nom_dep, nom_reg) as cog ON (cog.dep=left(v.insee_com,2) or cog.dep=left(v.insee_com,3)) -LEFT JOIN geofla_plus g ON (g.insee=v.insee_com) -LEFT JOIN laposte_cp lp ON (lp.insee=v.insee_com) -LEFT JOIN postal_code cp ON (cp.insee=v.insee_com AND ST_Contains(cp.wkb_geometry, v.geometrie)) -WHERE v.fantoir LIKE '$1%' - AND a.numero IS NULL - AND cad.insee_com like '$1%' - AND c.insee like '$1%' - AND g.insee like '$1%' -GROUP BY v.fantoir, - v.voie_osm, - cad.code_postal, cp.postal_cod, - v.geometrie, - c.nom, cp.nom, - cog.nom_dep, - cog.nom_reg, - g.statut, - g.population -ORDER BY v.fantoir; -" >> $OUTPUT - - -echo "`date +%H:%M:%S` LD $1" -# export cumul_place (lieux-dits) > json -psql cadastre -t -A -c " -SET client_min_messages TO WARNING; -WITH u AS (select fantoir as f, insee_com as insee from cumul_places where fantoir like '$1%' GROUP BY 1,2), lp as (select insee, min(cp) as cp from laposte_cp where insee like '$1%' group by 1) -select DISTINCT '{\"id\": \"' || u.f - || '\",\"citycode\": \"' || left(u.f,5) - || '\",\"type\": \"' || 'place' - || '\",\"name\": \"' || replace(replace(coalesce(o.libelle_osm, c.libelle_cadastre),'\"',''),'’',chr(39)) - || '\",\"postcode\": \"' || coalesce(cp.postal_cod, lp.cp, ca.code_postal) || CASE WHEN replace(lower(cp.nom),'-',' ') != replace(lower(coalesce(cn.nom,initcap(ca.nom_com))),'-',' ') THEN '\",\"post_office\": \"' || cp.nom ELSE '' END - || '\",\"lat\": \"' || case when o.geometrie is not null then round(st_y(o.geometrie)::numeric,6) else st_y(c.geometrie) end - || '\",\"lon\": \"' || case when o.geometrie is not null then round(st_x(o.geometrie)::numeric,6) else st_x(c.geometrie) end - || '\",\"city\": \"' || coalesce(cn.nom,initcap(ca.nom_com)) - || '\",\"departement\": \"' || cog.nom_dep - || '\", \"region\": \"' || cog.nom_reg - || '\", \"importance\": '|| least(0.05,round(log((CASE WHEN g.statut LIKE 'Capital%' THEN 6 WHEN g.statut = 'Préfecture de régi' THEN 5 WHEN g.statut='Préfecture' THEN 4 WHEN g.statut LIKE 'Sous-pr%' THEN 3 WHEN g.statut='Chef-lieu canton' THEN 2 ELSE 1 END)+log(g.population+1)/3)*(0.25+0.5*(1-('0' || coalesce(f.ld_bati,'1'))::numeric)),4)) - ||'}' -from u - LEFT JOIN fantoir_voie f on (f.code_insee=u.insee AND u.f = concat(f.code_insee,f.id_voie,f.cle_rivoli)) - LEFT JOIN cumul_places c on (c.fantoir=u.f and c.source='CADASTRE') - LEFT JOIN cumul_places o on (o.fantoir=u.f and o.source='OSM') - LEFT JOIN code_cadastre ca ON (ca.insee_com=u.insee) - LEFT JOIN communes cn ON (cn.insee=u.insee) - LEFT JOIN geofla_plus g ON (g.insee=u.insee) - LEFT JOIN lp ON (lp.insee=u.insee) - LEFT JOIN postal_code cp ON (cp.insee=u.insee AND ST_Contains(cp.wkb_geometry, o.geometrie)) - JOIN (select dep, nom_dep, nom_reg from cog group by dep, nom_dep, nom_reg) as cog ON (cog.dep=left(u.insee,2) or cog.dep=left(u.insee,3)) -where coalesce(o.libelle_osm, c.libelle_cadastre) != cn.nom ORDER BY 1; -" >> $OUTPUT - - -echo "`date +%H:%M:%S` FIN $1" - diff --git a/out/banout.sh b/out/banout.sh deleted file mode 100644 index c474b3d..0000000 --- a/out/banout.sh +++ /dev/null @@ -1,176 +0,0 @@ -# ménage préparatoire... -rm -f bano-$1* - -# dédoublement des adresses multiple OSM (séparées par ';' ',' ou 'à') -psql cadastre -q -c "insert into cumul_adresses select geometrie, trim( both from regexp_split_to_table(numero,';|à|,')), voie_cadastre, voie_osm, fantoir, insee_com, cadastre_com, dept, code_postal, source, batch_import_id, voie_fantoir from cumul_adresses where numero ~ ';|-|à|,' and insee_com like '$1%' and source='OSM';" -psql cadastre -q -c "delete from cumul_adresses where numero ~ ';|à|,' and insee_com like '$1%' and source='OSM';" - -# dédoublement des adresses multiple OSM (séparées par '-' uniquement si premier nombre inférieur au second) -psql cadastre -q -c "insert into cumul_adresses -select geometrie, trim( both from regexp_split_to_table(numero,'-')), voie_cadastre, voie_osm, fantoir, insee_com, cadastre_com, dept, code_postal, source, batch_import_id, voie_fantoir -from ( - select * from ( - select regexp_split_to_array(numero,'-') as nums,* - from cumul_adresses - where numero like '%-%' and insee_com like '$1%' and source='OSM' - ) as n - where regexp_replace('0'||nums[1],'[^0-9]','','g')::integer < regexp_replace('0'||nums[2],'[^0-9]','','g')::integer -) as n;" -psql cadastre -q -c "with n as ( - select * from ( - select regexp_split_to_array(numero,'-') as nums, fantoir as d_fantoir, numero as d_numero - from cumul_adresses - where numero like '%-%' and insee_com like '$1%' and source='OSM' - ) as n0 - where regexp_replace('0'||nums[1],'[^0-9]','','g')::integer < regexp_replace('0'||nums[2],'[^0-9]','','g')::integer -) delete from cumul_adresses where source='OSM' and fantoir||numero in (select d_fantoir||d_numero from n);" - -# export postgres > shapefile -export SHAPE_ENCODING='UTF-8' -ogr2ogr -f "ESRI Shapefile" -lco ENCODING=UTF-8 -s_srs "EPSG:4326" -t_srs "EPSG:4326" -overwrite bano-$1.shp PG:"dbname=cadastre" -sql " \ -SELECT * \ -FROM (WITH u AS \ - (SELECT fantoir, \ - replace(replace(replace(replace(replace(replace(replace(regexp_replace(upper(numero),'^0*',''),'BIS','B'),'TER','T'),'QUATER','Q'),'QUAT','Q'),' ',''),'à','-'),';',',') AS num \ - FROM cumul_adresses \ - WHERE fantoir IS NOT NULL \ - AND length(fantoir)=10 \ - AND fantoir LIKE '$1%' \ - GROUP BY fantoir, \ - num), lp as (select insee, min(cp) as cp from laposte_cp where insee like '$1%' group by 1) \ - SELECT concat(u.fantoir,'-',u.num) AS id, \ - upper(replace(CASE WHEN u.num=o.num THEN o.numero WHEN u.num=od.num THEN od.numero ELSE c.numero END,' ','')) AS numero, \ - replace(replace(regexp_replace(regexp_replace(coalesce(CASE \ - WHEN u.num=o.num THEN \ - CASE \ - WHEN o.voie_osm != '' THEN replace(o.voie_osm,'’',chr(39)) \ - ELSE o.voie_cadastre \ - END \ - WHEN u.num=od.num THEN \ - CASE \ - WHEN od.voie_osm is not null THEN replace(od.voie_osm,'’',chr(39)) \ - ELSE od.voie_cadastre \ - END \ - ELSE \ - CASE \ - WHEN c.voie_osm!='' THEN replace(c.voie_osm,'’',chr(39)) \ - ELSE c.voie_cadastre \ - END \ - END, \ - CASE \ - WHEN u.num=o.num THEN replace(o.voie_osm,'’',chr(39)) \ - ELSE \ - CASE \ - WHEN c.voie_osm !='' THEN replace(c.voie_osm,'’',chr(39)) \ - ELSE c.voie_cadastre \ - END \ - END),'([dD][eé]partementale?|Rue|[rR]urale?|[vV]icinale?|[cC]ommunale?|Cr) ([0-9]+ )?[dD]ite? ',''),'(Draille|Chemin|Sentier) [dD]ite? ','\1 '),'Voie Che ','Chemin '),'Cours Dit Che ','Chemin ') AS voie, \ - coalesce(cp.postal_cod, lp.cp, ca.code_postal) AS code_post, \ - coalesce(cn.nom,initcap(ca.nom_com)) AS ville, \ - CASE \ - WHEN u.num=o.num THEN 'OSM' \ - WHEN (u.num=od.num \ - AND od.voie_osm != od.voie_cadastre \ - AND od.voie_osm IS NOT NULL) THEN 'O+O' \ - WHEN u.num=od.num THEN 'OD' \ - WHEN c.voie_osm != '' THEN 'C+O' \ - ELSE 'CAD' \ - END AS SOURCE, \ - CASE \ - WHEN u.num=o.num THEN st_y(o.geometrie) \ - WHEN u.num=od.num THEN st_y(od.geometrie) \ - ELSE st_y(c.geometrie) \ - END AS lat, \ - CASE \ - WHEN u.num=o.num THEN st_x(o.geometrie) \ - WHEN u.num=od.num THEN st_x(od.geometrie) \ - ELSE st_x(c.geometrie) \ - END AS lon, \ - CASE \ - WHEN u.num=o.num THEN o.geometrie \ - WHEN u.num=od.num THEN od.geometrie \ - ELSE c.geometrie \ - END AS geom \ - FROM u \ - LEFT JOIN lp ON (lp.insee=left(u.fantoir,5)) \ - LEFT JOIN \ - (SELECT *, \ - replace(replace(replace(replace(replace(replace(replace(regexp_replace(upper(numero),'^0*',''),'BIS','B'),'TER','T'),'QUATER','Q'),'QUAT','Q'),' ',''),'à','-'),';',',') AS num \ - FROM cumul_adresses \ - WHERE fantoir LIKE '$1%' \ - AND SOURCE='OSM') AS o \ - ON (o.num=u.num AND o.fantoir=u.fantoir) \ - LEFT JOIN \ - (SELECT *, \ - replace(replace(replace(replace(replace(replace(replace(regexp_replace(upper(numero),'^0*',''),'BIS','B'),'TER','T'),'QUATER','Q'),'QUAT','Q'),' ',''),'à','-'),';',',') AS num \ - FROM cumul_adresses \ - WHERE fantoir LIKE '$1%' \ - AND SOURCE='CADASTRE' \ - AND st_x(geometrie)!=0 \ - AND st_y(geometrie)!=0) AS c \ - ON (c.num=u.num AND c.fantoir=u.fantoir) \ - LEFT JOIN \ - (SELECT *, \ - replace(replace(replace(replace(replace(replace(replace(regexp_replace(upper(numero),'^0*',''),'BIS','B'),'TER','T'),'QUATER','Q'),'QUAT','Q'),' ',''),'à','-'),';',',') AS num \ - FROM cumul_adresses \ - WHERE fantoir LIKE '$1%' \ - AND SOURCE LIKE 'OD%' \ - AND st_x(geometrie)!=0 \ - AND st_y(geometrie)!=0) AS od \ - ON (od.num=u.num AND od.fantoir=u.fantoir) \ - LEFT JOIN code_cadastre ca \ - ON (ca.insee_com=left(u.fantoir,5)) \ - LEFT JOIN communes cn \ - ON (cn.insee=left(u.fantoir,5)) \ - LEFT JOIN postal_code cp \ - ON (cp.insee = left(u.fantoir,5) and st_contains(cp.wkb_geometry, coalesce(o.geometrie, od.geometrie, c.geometrie))) \ - WHERE u.num>'0') AS DATA \ -WHERE lat IS NOT NULL \ - AND lon IS NOT NULL \ - AND numero ~ '^[0-9]{1,4}( ?[A-Z]?.*)?' \ - AND numero !~'.[0-9 \\.\\-]{9,}' \ -ORDER BY id \ -" - -# génération du .csv à partir du shapefile -ogr2ogr -f CSV bano-$1-tmp.csv -lco WRITE_BOM=YES bano-$1.shp -# suppression header + coupure à 6 décimale + tri -sed -e 1d bano-$1-tmp.csv | sed 's/\(\.[0-9]\{6\}\)[0-9]*/\1/g' | sort > bano-$1.csv - -#exit - -# sortie RDF "turtle" à partir du csv -python csv2ttl.py bano-$1.csv $1 > bano-$1.ttl -gzip -9 bano-$1.ttl -mv bano-$1.ttl.gz /data/project/bano.openstreetmap.fr/web/data/ - -# copie dans le dossier web -mv bano-$1.csv /data/project/bano.openstreetmap.fr/web/data/ - -# préparation du shapefile zippé -#cp -p -u /data/project/bano.openstreetmap.fr/web/data/*.txt ./ -zip -q -9 bano-$1-shp.zip bano-$1.* *.txt -chmod a+r *.zip - -# copie dans le dossier web -mv bano-$1-shp.zip /data/project/bano.openstreetmap.fr/web/data/ - - -# ménage -rm bano-$1* - - -exit - -select * from ( -with u as (select fantoir,replace(replace(replace(replace(replace(replace(replace(regexp_replace(upper(numero),'^0*',''),'BIS','B'),'TER','T'),'QUATER','Q'),'QUAT','Q'),' ',''),'à','-'),';',',') as num from cumul_adresses where fantoir like '69268%' and fantoir is not null group by fantoir,num) -select concat(u.fantoir,'-',u.num) as id, upper(replace(case when u.num=o.num then o.numero when u.num=od.num then od.num else c.numero end,' ','')) as numero,coalesce(case when u.num=o.num then case when o.voie_osm != '' then replace(o.voie_osm,'’',chr(39)) else o.voie_cadastre end when u.num=od.num then replace(od.voie_osm,'’',chr(39)) else case when c.voie_osm!='' then replace(c.voie_osm,'’',chr(39)) else c.voie_cadastre end end, case when u.num=o.num then replace(o.voie_osm,'’',chr(39)) else (case when c.voie_osm !='' then replace(c.voie_osm,'’',chr(39)) else c.voie_cadastre end) end) as voie, ca.code_postal as code_post, coalesce(cn.nom,initcap(ca.nom_com)) as ville,case when u.num=o.num then 'OSM' when (u.num=od.num and od.voie_osm != od.voie_cadastre and od.voie_osm is not null) then 'O+O' when u.num=od.num then 'OD' when c.voie_osm != '' then 'C+O' else 'CAD' end as source, case when u.num=o.num then st_y(o.geometrie) when u.num=od.num then st_y(od.geometrie) else st_y(c.geometrie) end as lat,case when u.num=o.num then st_x(o.geometrie) when u.num=od.num then st_x(od.geometrie) else st_x(c.geometrie) end as lon,case when u.num=o.num then o.geometrie when u.num=od.num then od.geometrie else c.geometrie end as geom -from u -left join (select *,replace(replace(replace(replace(replace(replace(replace(regexp_replace(upper(numero),'^0*',''),'BIS','B'),'TER','T'),'QUATER','Q'),'QUAT','Q'),' ',''),'à','-'),';',',') as num from cumul_adresses where fantoir like '69%' and source='OSM') as o on (o.num=u.num and o.fantoir=u.fantoir) -left join (select *,replace(replace(replace(replace(replace(replace(replace(regexp_replace(upper(numero),'^0*',''),'BIS','B'),'TER','T'),'QUATER','Q'),'QUAT','Q'),' ',''),'à','-'),';',',') as num from cumul_adresses where fantoir like '69%' and source='CADASTRE' and st_x(geometrie)!=0 and st_y(geometrie)!=0) as c on (c.num=u.num and c.fantoir=u.fantoir) -left join (select *,replace(replace(replace(replace(replace(replace(replace(regexp_replace(upper(numero),'^0*',''),'BIS','B'),'TER','T'),'QUATER','Q'),'QUAT','Q'),' ',''),'à','-'),';',',') as num from cumul_adresses where fantoir like '69%' and source LIKE 'OD%' and voie_osm is not null and st_x(geometrie)!=0 and st_y(geometrie)!=0) as od on (od.num=u.num and od.fantoir=u.fantoir) -left join code_cadastre ca on (ca.insee_com=left(u.fantoir,5)) -left join communes cn on (cn.insee=left(u.fantoir,5) ) -where u.num>'0' -) as data -where lat is not null and lon is not null and (upper(numero) !~ '^[0-9]{1,4}([A-Z]?.*)?' or numero ~'.[0-9 \.\-]{9,}') order by id; diff --git a/out/csv2ttl.py b/out/csv2ttl.py deleted file mode 100644 index d3594c7..0000000 --- a/out/csv2ttl.py +++ /dev/null @@ -1,48 +0,0 @@ -import sys -import csv -import string - -print('@prefix xsd: .') -print('@prefix locn: .') -print('@prefix gn: .') -print('@prefix prov: .') -print('@prefix gsp: .') -print('@prefix geo: .') -print('@prefix rdfs: .') -print('@prefix dcat: .') -print('@prefix foaf: .') -print('@prefix dcterms: .') -print -print(' a dcat:Catalog ;') -print(' dcterms:title "Donnees des adresses du projet BANO (Base Adresse Nationale Ouverte) en RDF"@fr ;') -print(' dcterms:description "Le projet BANO en RDF de Base d\'Adresses Nationale Ouverte initie par OpenStreetMap France."@fr ;') -print(' foaf:homepage ;') -print(' dcterms:language "fr" ;') -print(' dcterms:license ;') -print(' dcterms:publisher ; #url openstreetmap France') -print(' dcterms:issued "2014-05-14"^^xsd:date ; # data issued') -print(' dcterms:modified "2014-08-21"^^xsd:date ; #last modification') -print(' dcterms:spatial , ; # region/pays (France)') -print(' .') -print -with open(sys.argv[1]) as csvfile: - addr = csv.reader(csvfile, delimiter=',', quotechar='"') - for row in addr: - print(' a locn:Address , gn:Feature ;') - print(' locn:fullAddress "'+row[1]+' '+row[2]+', '+row[3]+' '+row[4]+', FRANCE";'); - print(' locn:addressId "'+row[0]+'" ;') - print(' locn:locatorDesignator "'+row[1]+'" ;') - print(' locn:thoroughfare "'+row[2]+'"@fr ;') - print(' locn:postalCode "'+row[3]+'" ;') - print(' locn:locatorName "'+row[4]+'"@fr ;') - print(' locn:adminUnitL1 "FR" ;') - # traitement des arrondissements municipaux de Paris, Lyon, Marseille - if (sys.argv[2] in ['13','69','75'] and int(row[0][0:5])) in range(13201, 13217)+range(69381, 69370)+range(75101, 75121): - print(' locn:location ;') - else: - print(' locn:location ;') - print(' locn:geometry ;') - print(' locn:geometry [a geo:Point ; geo:lat "'+row[6]+'" ; geo:long "'+row[7]+'" ] ;') - print(' locn:geometry [a gsp:Geometry; gsp:asWKT "POINT('+row[7]+' '+row[6]+')"^^gsp:wktLiteral ] ;') - print(' .') - print diff --git a/out/head.sh b/out/head.sh deleted file mode 100755 index f6819aa..0000000 --- a/out/head.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -for dep in {01..19} 2A 2B {21..95} {971..974} -do - echo $dep -# sh banout.sh $dep -done diff --git a/out/lisezmoi-bano.txt b/out/lisezmoi-bano.txt deleted file mode 100644 index e35ccd7..0000000 --- a/out/lisezmoi-bano.txt +++ /dev/null @@ -1,90 +0,0 @@ -###Descriptif du contenu des fichiers "BANO" - -**Qu'est-ce que BANO ?** - -BANO = Base d'Adresses Nationale Ouverte - -C'est un projet initié par OpenStreetMap France destiné à constituer et diffuser sous licence libre une base de données d'adresses géolocalisées sur l'ensemble du territoire français. - -Pour plus de renseignements: http://wiki.openstreetmap.org/wiki/WikiProject_France/WikiProject_Base_Adresses_Nationale_Ouverte_(BANO) - - -**Licence** - -Ces données sont sous licence libre "ODbL" (Open Database Licence). Cette licence implique: l'attribution et le partage à l'identique. - -- Pour la mention d'attribution veuillez indiquer "source: BANO" ainsi que la date du jeu de données. -- Pour le partage à l'identique, toute amélioration des données de BANO doit être repartagée sous licence identique, ceci dans le but d'une constante amélioration de BANO. - -Ceci est le résumé explicatif de la licence ODbL 1.0. Merci de lire l'avertissement ci-dessous - -Vous êtes libres : -- De partager : copier, distribuer et utiliser la base de données. -- De créer : produire des créations à partir de cette base de données. -- D'adapter : modifier, transformer et construire à partir de cette base de données. -Aussi longtemps que : -- Vous mentionnez la paternité : vous devez mentionner la source de la base de données pour toute utilisation publique de la base de données, ou pour toute création produite à partir de la base de données, de la manière indiquée dans l'ODbL. Pour toute utilisation ou redistribution de la base de données, ou création produite à partir de cette base de données, vous devez clairement mentionner aux tiers la licence de la base de données et garder intacte toute mention légale sur la base de données originaire. -- Vous partagez aux conditions identiques : si vous utilisez publiquement une version adaptée de cette base de données, ou que vous produisiez une création à partir d'une base de données adaptée, vous devez aussi offrir cette base de données adaptée selon les termes de la licence ODbL. -- Gardez ouvert : si vous redistribuez la base de données, ou une version modifiée de celle-ci, alors vous ne pouvez utiliser de mesure technique restreignant la création que si vous distribuez aussi une version sans ces restrictions. - -Avertissement - -Ce résumé explicatif n'est pas un contrat, mais simplement une source pratique pour faciliter la compréhension de la version complète de la licence ODbL 1.0 — il exprime en termes courants les principales notions juridiques du contrat. Ce résumé explicatif n'a pas de valeur juridique, son contenu n'apparaît pas sous cette forme dans le contrat. Seul le texte complet du contrat de licence fait loi. - -Version complète en français disponible sur: http://www.vvlibri.org/fr/licence/odbl/10/fr/legalcode - - -**Origine** - -BANO est une base de données composite, constituée à partir de différentes sources: -- OpenStreetMap (ODbL) -- données disponibles en opendata - - Arles Crau Camargue Montagnette (Licence Ouverte) - avril 2016 - - Ville de Montpellier (Licence Ouverte) - mai 2016 - - Toulouse Métropole (ODbL) - février 2015 - - Rennes Métropole (Licence Ouverte) - avril 2016 - - Mulhouse Alsace Agglomération (Licence Ouverte) - février 2015 - - Grand Nancy (ODbL) - avril 2016 - - Nantes Métropole (ODbL) - janvier 2015 - - Grand Lyon (Licence Ouverte) - janvier 2015 - - Bordeaux Métropole (ODbL) - février 2015 - - Strasbourg EuroMétropole (Licence Ouverte) - janvier 2014 - - Communauté Urbaine de Lille Métropole (Licence Ouverte) - juin 2014 - - Métropole Nice Côte d'Azur (Licence Ouverte) - janvier 2015 - - Ville de Limoges (ODbL) - décembre 2014 - - Ville de La Rochelle (Licence Ouverte) - octobre 2015 - - Pays de Brest (Licence Ouverte) - mars 2016 - - Mairie de Nanterre (Licence Ouverte) - février 2015 - - Ville de Grenoble (ODbL) - mars 2015 - - Ville de Paris (ODbL) - janvier 2015 - - Ville de Poitiers (Licence Ouverte) - avril 2015 - - Ville d'Angers (ODbL) - avril 2016 -- données adresses collectées sur le site du cadastre et fichier FANTOIR DGFiP (Licence Ouverte) - Octobre 2015 -- base officielle des codes postaux de La Poste (Licence Ouverte) - 2014 - - -**Format** - -Ces fichiers sont proposés au format shapefile, ainsi que sous forme de fichiers CSV (coordonnées WGS84/EPSG:4326 et textes en UTF-8). - - -**Contenu** - -Pour chaque adresse: -- id (unique) : code_insee + codefantoir + numero -- numero : numéro dans la voie avec suffixe (ex: 1, 1BIS, 1D) -- voie : nom de voie -- code_post : code postal sur 5 caractères -- nom_comm : nom de la commune -- source : OSM = donnée directement issue d'OpenStreetMap, OD = donnée provenant de source opendata locale, O+O = donnée de source opendata enrichie par OpenStreetMap, CAD = donnée directement issue du cadastre, C+O = donnée du cadastre enrichie par OSM (nom de voie par exemple) -- lat : latitude en degrés décimaux WGS84 -- lon : longitude en degrés décimaux WGS84 - - -**Mise à jour, corrections** - -Pour mettre à jour et corriger les données de BANO, il suffit de faire des améliorations directement dans OpenStreetMap, elles seront prises en compte au prochain de cycle de mise à jour. - -Un guiche unique de signalement et de correction d'erreur est en cours de co-design. Si vous voulez participer à sa mise en place, contactez-nous par mail. - -Pour toute question concernant ces exports, vous pouvez contacter bano@openstreetmap.fr diff --git a/sql/create_and_load_codes_postaux.sql b/sql/create_and_load_codes_postaux.sql deleted file mode 100644 index 0548e77..0000000 --- a/sql/create_and_load_codes_postaux.sql +++ /dev/null @@ -1,29 +0,0 @@ -CREATE TEMP TABLE tmp_codes_postaux (raw text); -\copy tmp_codes_postaux FROM 'codes_postaux.csv' WITH csv HEADER delimiter '#'; - -CREATE TABLE IF NOT EXISTS codes_postaux( - insee text, - commune text, - cp text, - libelle text, - ligne_5 text, - geometrie geometry(Point,4326)); - -TRUNCATE codes_postaux; - -WITH -a -AS -(SELECT string_to_array(raw,';') arr FROM tmp_codes_postaux) -INSERT INTO codes_postaux (insee,commune,cp,libelle,ligne_5,geometrie) -SELECT arr[1], - arr[2], - arr[3], - arr[4], - arr[5], - ST_SetSRID(ST_Point((string_to_array(arr[6],','))[2]::float,(string_to_array(arr[6],','))[1]::float),4326) -FROM a; - -CREATE INDEX IF NOT EXISTS gidx_codes_postaux ON codes_postaux USING GIST(geometrie); -CREATE INDEX IF NOT EXISTS idx_codes_postaux_insee ON codes_postaux (insee); -CREATE INDEX IF NOT EXISTS idx_codes_postaux_cp ON codes_postaux (cp); diff --git a/sql/create_table_base_bano.sql b/sql/create_table_base_bano.sql deleted file mode 100644 index 38a7192..0000000 --- a/sql/create_table_base_bano.sql +++ /dev/null @@ -1,139 +0,0 @@ -CREATE TABLE IF NOT EXISTS fantoir_voie ( - code_dept character(3), - code_dir character(1), - code_com character(3), - code_insee character(5), - id_voie character(4), - fantoir character(9), - fantoir10 character(10), - cle_rivoli character(1), - nature_voie character varying(4), - libelle_voie character varying(26), - type_commune character(1), - caractere_rur character(1), - caractere_voie character(1), - caractere_pop character(1), - pop_a_part integer, - pop_fictive integer, - caractere_annul character(1), - date_annul character varying(9), - date_creation character varying(9), - code_majic character(5), - type_voie character(1), - ld_bati character(1), - dernier_mot character varying(8)); -CREATE INDEX IF NOT EXISTS idx_fantoir_voie_dept ON fantoir_voie(code_dept); -CREATE INDEX IF NOT EXISTS idx_fantoir_code_insee ON fantoir_voie(code_insee); -CREATE INDEX IF NOT EXISTS idx_fantoir_fantoir ON fantoir_voie(fantoir); -CREATE INDEX IF NOT EXISTS idx_fantoir_fantoir10 ON fantoir_voie(fantoir10); - -CREATE TABLE IF NOT EXISTS cumul_adresses ( - geometrie geometry , - numero character varying (15), - voie_cadastre character varying (300), - voie_bal character varying (300), - voie_osm character varying (300), - voie_autre text, - voie_fantoir character varying (300), - fantoir character varying (10), - insee_com character (5), - cadastre_com character varying (10), - dept character varying (3), - code_postal text, - source character varying (100), - batch_import_id integer); - -CREATE INDEX IF NOT EXISTS cumul_adresses_fantoir ON cumul_adresses(fantoir); -CREATE INDEX IF NOT EXISTS cumul_adresses_fantoir_source_idx ON cumul_adresses(fantoir, source); -CREATE INDEX IF NOT EXISTS cumul_adresses_geo ON cumul_adresses USING gist (geometrie); -CREATE INDEX IF NOT EXISTS cumul_adresses_insee ON cumul_adresses(insee_com) WITH (fillfactor=95); -CREATE INDEX IF NOT EXISTS cumul_adresses_dept ON cumul_adresses(dept) WITH (fillfactor=95); -CREATE INDEX IF NOT EXISTS cumul_adresses_source ON cumul_adresses(source) WITH (fillfactor=95); -CREATE INDEX IF NOT EXISTS cumul_adresses_insee_source ON cumul_adresses(insee_com,source) WITH (fillfactor=95); - -CREATE TABLE IF NOT EXISTS cumul_voies -( geometrie geometry , - voie_cadastre character varying (300), - voie_bal character varying (300), - voie_osm character varying (300), - voie_autre text, - voie_fantoir character varying (300), - fantoir character varying (10), - insee_com character (5), - cadastre_com character varying (10), - dept character varying (3), - code_postal text, - source character varying (100), - voie_index integer, - batch_import_id integer); - -CREATE INDEX IF NOT EXISTS cumul_voies_fantoir ON cumul_voies(fantoir); -CREATE INDEX IF NOT EXISTS cumul_voies_fantoir_source_idx ON cumul_voies(fantoir, source); -CREATE INDEX IF NOT EXISTS cumul_voies_geo ON cumul_voies USING gist (geometrie); -CREATE INDEX IF NOT EXISTS cumul_voies_insee ON cumul_voies(insee_com) WITH (fillfactor=95); -CREATE INDEX IF NOT EXISTS cumul_voies_dept ON cumul_voies(dept) WITH (fillfactor=95); -CREATE INDEX IF NOT EXISTS cumul_voies_source ON cumul_voies(source) WITH (fillfactor=95); -CREATE INDEX IF NOT EXISTS cumul_voies_insee_source ON cumul_voies(insee_com,source) WITH (fillfactor=95); - -ALTER TABLE cumul_voies CLUSTER ON cumul_voies_insee; - -CREATE TABLE IF NOT EXISTS cumul_places -( geometrie geometry , - libelle_cadastre character varying (300), - libelle_osm character varying (300), - libelle_fantoir character varying (300), - ld_bati integer, - ld_osm character varying (30), - fantoir character varying (10), - insee_com character (5), - -- cadastre_com character varying (10), - dept character varying (3), - code_postal text, - source character varying (100), - batch_import_id integer); - -CREATE INDEX IF NOT EXISTS cumul_places_geo ON cumul_places USING GIST(geometrie); -CREATE INDEX IF NOT EXISTS cumul_places_insee_com ON cumul_places (insee_com); - -ALTER TABLE cumul_places CLUSTER ON cumul_places_insee_com; - -CREATE TABLE IF NOT EXISTS batch ( - id_batch serial, - source character varying (100), - etape character varying (100), - timestamp_debut float, - date_debut character varying (100), - date_fin character varying (100), - dept character varying (3), - cadastre_com character (5), - insee_com character (5), - nom_com character varying(250), - nombre_adresses integer); - -CREATE TABLE IF NOT EXISTS batch_historique ( - id_batch integer, - source character varying (100), - etape character varying (100), - timestamp_debut float, - date_debut character varying (100), - date_fin character varying (100), - dept character varying (3), - cadastre_com character (5), - insee_com character (5), - nom_com character varying(250), - nombre_adresses integer); - -CREATE TABLE IF NOT EXISTS code_cadastre ( - dept character varying(3), - cadastre_dept character (3), - nom_com character varying(250), - cadastre_com character(5), - insee_com character(5), - code_postal character(5), - format_cadastre character varying(10), - date_maj integer); - -CREATE TABLE IF NOT EXISTS tmp_code_cadastre -AS SELECT * FROM code_cadastre LIMIT 0; - -GRANT SELECT ON ALL TABLES IN SCHEMA public TO public; \ No newline at end of file diff --git a/sql/create_table_base_osm.sql b/sql/create_table_base_osm.sql deleted file mode 100644 index c48bc44..0000000 --- a/sql/create_table_base_osm.sql +++ /dev/null @@ -1,182 +0,0 @@ -CREATE TABLE IF NOT EXISTS fantoir_voie ( - code_dept character(3), - code_dir character(1), - code_com character(3), - code_insee character(5), - id_voie character(4), - fantoir character(9), - fantoir10 character(10), - cle_rivoli character(1), - nature_voie character varying(4), - libelle_voie character varying(26), - type_commune character(1), - caractere_rur character(1), - caractere_voie character(1), - caractere_pop character(1), - pop_a_part integer, - pop_fictive integer, - caractere_annul character(1), - date_annul character varying(9), - date_creation character varying(9), - code_majic character(5), - type_voie character(1), - ld_bati character(1), - dernier_mot character varying(8)); -CREATE INDEX IF NOT EXISTS idx_fantoir_voie_dept ON fantoir_voie(code_dept); -CREATE INDEX IF NOT EXISTS idx_fantoir_code_insee ON fantoir_voie(code_insee); -CREATE INDEX IF NOT EXISTS idx_fantoir_fantoir ON fantoir_voie(fantoir); -CREATE INDEX IF NOT EXISTS idx_fantoir_fantoir10 ON fantoir_voie(fantoir10); - -CREATE TABLE IF NOT EXISTS ban_odbl ( - id text, - id_fantoir text, - numero text, - rep text, - nom_voie text, - code_postal text, - code_insee text, - nom_commune text, - code_insee_ancienne_commune text, - nom_ancienne_commune text, - x float, - y float, - lon float, - lat float, - type_position text, - alias text, - nom_ld text, - libelle_acheminement text, - nom_afnor text, - source_position text, - source_nom_voie text, - certification_commune integer, - cad_parcelles text); --- geometrie geometry (Point, 4326) DEFAULT (ST_Point(lon,lat))); -CREATE INDEX IF NOT EXISTS idx_ban_odbl_code_insee ON ban_odbl(code_insee); - - -CREATE TABLE IF NOT EXISTS bal_cadastre ( - cle_interop text, - uid_adresse text, - numero text, - suffixe text, - pseudo_numero boolean, - voie_nom text, - voie_code text, - code_postal text, - libelle_acheminement text, - destination_principale text, - commune_code text, - commune_nom text, - source text, - long float, - lat float, - x float, - y float, - position text, - date_der_maj date); -CREATE INDEX IF NOT EXISTS idx_bal_cadastre_commune_code ON bal_cadastre(commune_code); -CREATE INDEX IF NOT EXISTS idx_bal_cadastre_commune_codedestination_principale ON bal_cadastre(commune_code,destination_principale); - -CREATE TABLE IF NOT EXISTS bal_locales ( - cle_interop text, - commune_code text, - commune_nom text, - voie_code text, - voie_nom text, - numero text, - suffixe text, - long float, - lat float, - license text); -CREATE INDEX IF NOT EXISTS idx_bal_open_data_commune_code ON bal_locales(commune_code); - -CREATE TABLE IF NOT EXISTS lieux_dits ( - insee_com character(5), - nom text, - created date, - updated date, - geometrie geometry(Polygon,4326) -); -CREATE INDEX IF NOT EXISTS lieux_dits_insee_com ON lieux_dits(insee_com); -CREATE INDEX IF NOT EXISTS gidx_lieux_dits ON lieux_dits USING gist (geometrie); - -CREATE TABLE IF NOT EXISTS parcelles_noms ( - geometrie geometry, - insee_com character(5), - libelle character varying(100), - fantoir character varying (10) -); -CREATE INDEX IF NOT EXISTS parcelles_noms_insee_com ON parcelles_noms(insee_com); - -CREATE TABLE IF NOT EXISTS type_voie ( - id_voie integer, - tag_value text, - tag_index integer); -TRUNCATE TABLE type_voie; -INSERT INTO type_voie (id_voie,tag_value) - VALUES (1,'steps'), - (2,'path'), - (3,'cycleway'), - (4,'footway'), - (5,'pedestrian'), - (6,'track'), - (7,'service'), - (8,'road'), - (9,'living_street'), - (10,'residential'), - (11,'unclassified'), - (12,'tertiary'), - (13,'secondary'), - (14,'primary'), - (15,'trunk'), - (16,'motorway'); -UPDATE type_voie SET tag_index = power(2,id_voie-1); - - -CREATE TABLE IF NOT EXISTS type_highway_insee ( - name text, - highway text, - insee_com character(5), - timestamp_maj integer DEFAULT (date_part('epoch'::text, CURRENT_TIMESTAMP))::integer -); -CREATE INDEX IF NOT EXISTS idx_type_highway_insee_insee_com ON type_highway_insee(insee_com); - -CREATE TABLE IF NOT EXISTS suffixe ( - geometrie geometry, - insee_com character(5), - libelle_suffixe character varying(100) -); -CREATE INDEX IF NOT EXISTS gidx_suffixe ON suffixe USING GIST(geometrie); - -CREATE TABLE IF NOT EXISTS place_insee ( - long float, - lat float, - place text, - name text, - fantoir text, - ld_bati text, - tags hstore, - insee_com character(5), - timestamp_maj integer DEFAULT (date_part('epoch'::text, CURRENT_TIMESTAMP))::integer -); -CREATE INDEX IF NOT EXISTS idx_place_insee_insee_com ON place_insee(insee_com); - -CREATE TABLE IF NOT EXISTS cadastre_2_place ( - geometrie geometry(Point, 4326), - nom text, - fantoir text, - insee_com character(5) -); -CREATE INDEX IF NOT EXISTS gidx_cadastre_2_place ON cadastre_2_place USING GIST(geometrie); -CREATE INDEX IF NOT EXISTS idx_cadastre_2_place_insee_com ON cadastre_2_place(insee_com); - -CREATE TABLE IF NOT EXISTS expire_tiles ( - z integer, - x integer, - y integer, - geometrie geometry(Polygon, 3857) -); -CREATE INDEX IF NOT EXISTS gidx_expire_tiles ON expire_tiles(geometrie); - -GRANT SELECT ON ALL TABLES IN SCHEMA public TO public; \ No newline at end of file diff --git a/sql/create_table_cog.sql b/sql/create_table_cog.sql deleted file mode 100644 index d7eab67..0000000 --- a/sql/create_table_cog.sql +++ /dev/null @@ -1,57 +0,0 @@ -CREATE TABLE IF NOT EXISTS cog_commune ( - typecom character(4), - com character(5), - reg character(2), - dep character varying(3), - ctcd character(4), - arr character(4), - tncc character(1), - ncc text, - nccenr text, - libelle text, - can character(5), - comparent character(5)); -CREATE INDEX IF NOT EXISTS idx_cog_commune_com ON cog_commune(com); - -CREATE TABLE IF NOT EXISTS cog_canton ( - can character(5), - dep character varying(3), - reg character(2), - compct character(1), - burcentral character(5), - tncc character(1), - ncc text, - nccenr text, - libelle text, - typect character(1)); -CREATE INDEX IF NOT EXISTS idx_cog_canton_can ON cog_canton(can); - -CREATE TABLE IF NOT EXISTS cog_arrondissement ( - arr character(4), - dep character varying(3), - reg character(2), - cheflieu character(5), - tncc character(1), - ncc text, - nccenr text, - libelle text); -CREATE INDEX IF NOT EXISTS idx_cog_arrondissement_arr ON cog_arrondissement(arr); - -CREATE TABLE IF NOT EXISTS cog_departement ( - dep character varying(3), - reg character(2), - cheflieu character(5), - tncc character(1), - ncc text, - nccenr text, - libelle text); -CREATE INDEX IF NOT EXISTS idx_cog_departement_dep ON cog_departement(dep); - -CREATE TABLE IF NOT EXISTS cog_region ( - reg character(2), - cheflieu character(5), - tncc character(1), - ncc text, - nccenr text, - libelle text); -CREATE INDEX IF NOT EXISTS idx_cog_region_reg ON cog_region(reg); \ No newline at end of file diff --git a/sql/create_table_polygones_communes.sql b/sql/create_table_polygones_communes.sql deleted file mode 100644 index 0322724..0000000 --- a/sql/create_table_polygones_communes.sql +++ /dev/null @@ -1,37 +0,0 @@ -CREATE TABLE IF NOT EXISTS polygones_insee ( - geometrie geometry (Geometry, 3857), - insee_com character(5), - nom text, - admin_level integer); -CREATE INDEX IF NOT EXISTS gidx_polygones_insee ON polygones_insee USING GIST (geometrie); -CREATE INDEX IF NOT EXISTS idx_population_insee_insee_com ON polygones_insee(insee_com); - -TRUNCATE TABLE polygones_insee; -INSERT INTO polygones_insee -SELECT way, - "ref:INSEE", - name, - admin_level -FROM planet_osm_polygon -WHERE boundary='administrative' AND - admin_level in (8,9) AND - name != ''; - -CREATE TABLE IF NOT EXISTS polygones_insee_geo ( - geometrie geometry (Geometry, 4326), - insee_com character(5), - nom text, - admin_level integer); -CREATE INDEX IF NOT EXISTS gidx_polygones_insee_geo ON polygones_insee_geo USING GIST (geometrie); -CREATE INDEX IF NOT EXISTS idx_polygones_insee_geo_insee_com ON polygones_insee_geo(insee_com); - -TRUNCATE TABLE polygones_insee_geo; -INSERT INTO polygones_insee_geo -SELECT ST_Transform(way,4326), - "ref:INSEE", - name, - admin_level -FROM planet_osm_polygon -WHERE boundary='administrative' AND - admin_level in (8,9) AND - name != ''; diff --git a/sql/create_table_polygones_postaux.sql b/sql/create_table_polygones_postaux.sql deleted file mode 100644 index 791e807..0000000 --- a/sql/create_table_polygones_postaux.sql +++ /dev/null @@ -1,22 +0,0 @@ -DROP TABLE IF EXISTS polygones_postaux CASCADE; -CREATE TABLE polygones_postaux -AS -SELECT ST_Transform(way,4326) geometrie, - CASE postal_code - WHEN '' THEN "addr:postcode" - ELSE postal_code END AS code_postal -FROM planet_osm_postal_code -WHERE boundary = 'postal_code' AND - "addr:postcode"||postal_code != '' -ORDER BY ST_Area(way); -ALTER TABLE polygones_postaux add column id serial; -CREATE INDEX gidx_polygones_postaux ON polygones_postaux USING GIST(geometrie); -INSERT INTO polygones_postaux -SELECT ST_Transform(way,4326) geometrie, - CASE postal_code - WHEN '' THEN "addr:postcode" - ELSE postal_code END AS code_postal -FROM planet_osm_postal_code -WHERE boundary = 'administrative' AND - "addr:postcode"||postal_code != '' -ORDER BY ST_Area(way); diff --git a/sql/create_table_population_insee.sql b/sql/create_table_population_insee.sql deleted file mode 100644 index 21467b4..0000000 --- a/sql/create_table_population_insee.sql +++ /dev/null @@ -1,6 +0,0 @@ -CREATE TABLE IF NOT EXISTS population_insee ( - insee_com character(5), - nom text, - population integer); -CREATE INDEX IF NOT EXISTS idx_population_insee_insee_com ON population_insee(insee_com); - diff --git a/sql/finalisation.sql b/sql/finalisation.sql deleted file mode 100644 index a970592..0000000 --- a/sql/finalisation.sql +++ /dev/null @@ -1,6 +0,0 @@ -CREATE INDEX idx_planet_osm_point_fantoir ON planet_osm_point("ref:FR:FANTOIR"); -CREATE INDEX idx_planet_osm_line_fantoir ON planet_osm_line("ref:FR:FANTOIR"); -CREATE INDEX idx_planet_osm_polygon_fantoir ON planet_osm_polygon("ref:FR:FANTOIR"); -CREATE INDEX idx_planet_osm_polygon_ref_insee ON planet_osm_polygon("ref:INSEE"); -CREATE INDEX idx_planet_osm_rels_id ON planet_osm_rels(osm_id); -CREATE INDEX idx_planet_osm_communes_statut_ref_insee ON planet_osm_communes_statut("ref:INSEE"); diff --git a/sql/load_expire_tiles.sql b/sql/load_expire_tiles.sql deleted file mode 100644 index db3ee7f..0000000 --- a/sql/load_expire_tiles.sql +++ /dev/null @@ -1,6 +0,0 @@ -TRUNCATE expire_tiles; -COPY expire_tiles(z,x,y) FROM '/data/project/bano_data/expire_tiles/20190930/211505.316.tiles' WITH CSV DELIMITER '/'; - -UPDATE expire_tiles -SET geometrie = ST_SetSRID(ST_MakeBox2D(ST_Point(-20037508.34 + (40075016.68/(2 ^ :zoom)) * x, 20037508.34 - (40075016.68/(2 ^ :zoom)) * y), - ST_Point( -20037508.34 + (40075016.68/(2 ^ :zoom)) * (x + 1) ,20037508.34 - (40075016.68/(2 ^ :zoom)) * (y + 1))),3857) diff --git a/sql/load_fantoir.sql b/sql/load_fantoir.sql deleted file mode 100644 index c9012ca..0000000 --- a/sql/load_fantoir.sql +++ /dev/null @@ -1,34 +0,0 @@ -CREATE TEMP TABLE load_fantoir (raw text); -\copy load_fantoir FROM 'fantoir.txt' WITH csv delimiter '#' quote '>'; - -TRUNCATE fantoir_voie; -INSERT INTO fantoir_voie (SELECT CASE - WHEN substr(raw,1,2) = '97' - THEN substr(raw,1,2)||substr(raw,4,1) - ELSE substr(raw,1,2) - END AS code_dept, - substr(raw,3,1) as code_dir, - substr(raw,4,3) as code_com, - substr(raw,1,2)||substr(raw,4,3) as code_insee, - substr(raw,7,4) as id_voie, - substr(raw,1,2)||substr(raw,4,3)||substr(raw,7,4) as fantoir, - substr(raw,1,2)||substr(raw,4,3)||substr(raw,7,4)||substr(raw,11,1) as fantoir10, - substr(raw,11,1) as cle_rivoli, - rtrim(substr(raw,12,4)) as nature_voie, - rtrim(substr(raw,16,26)) as libelle_voie, - substr(raw,49,1) as type_commune, - substr(raw,50,1) as caractere_rur, - substr(raw,51,1) as caractere_voie, - substr(raw,52,1) as caractere_pop, - substr(raw,60,7)::integer as pop_a_part, - substr(raw,67,7)::integer as pop_fictive, - substr(raw,74,1) as caractere_annul, - substr(raw,75,7) as date_annul, - substr(raw,82,7) as date_creation, - substr(raw,104,5) as code_majic, - substr(raw,109,1) as type_voie, - substr(raw,110,1) as ld_bati, - substr(raw,113,8) as dernier_mot - FROM load_fantoir - WHERE raw NOT LIKE '______ %' AND - raw NOT LIKE '___ %'); diff --git a/sql/post_copie_ban_odbl.sql b/sql/post_copie_ban_odbl.sql deleted file mode 100644 index 97c57cf..0000000 --- a/sql/post_copie_ban_odbl.sql +++ /dev/null @@ -1,3 +0,0 @@ -ALTER TABLE ban_odbl ADD COLUMN geometrie geometry(Point, 4326); -UPDATE ban_odbl SET geometrie = ST_SetSRID(ST_Point(lon,lat),4326); -CREATE INDEX gidx_ban_odbl ON ban_odbl USING GIST(geometrie); \ No newline at end of file diff --git a/sql/update_table_infos_communes.sql b/sql/update_table_infos_communes.sql deleted file mode 100644 index 12bf000..0000000 --- a/sql/update_table_infos_communes.sql +++ /dev/null @@ -1,82 +0,0 @@ -CREATE TABLE IF NOT EXISTS infos_communes ( - dep character varying(3), - insee_com character(5), - name text, - adm_weight integer, - population integer, - population_milliers numeric, - type text, - lon numeric, - lat numeric, - geometrie geometry(Point, 4326) -); - -CREATE INDEX IF NOT EXISTS idx_infos_communes_insee ON infos_communes(insee_com); -CREATE INDEX IF NOT EXISTS gidx_infos_communes ON infos_communes USING GIST(geometrie); - -CREATE TEMP TABLE tmp_infos_communes -AS -WITH -statut -AS -(SELECT com insee_com, 1 AS statut FROM cog_commune -UNION ALL -SELECT burcentral, 2 AS statut FROM cog_canton -UNION ALL -SELECT cheflieu, 3 AS statut FROM cog_arrondissement -UNION ALL -SELECT cheflieu, 4 AS statut FROM cog_departement -UNION ALL -SELECT cheflieu, 5 AS statut FROM cog_region), -adm_weight -AS -(SELECT insee_com, max(statut) AS adm_weight -FROM statut -GROUP BY 1), -pop -AS -(SELECT osm_id, - name, - "ref:INSEE" insee_com, - COALESCE(population_rel,population_member,0) AS population, - RANK() OVER(PARTITION BY "ref:INSEE" ORDER BY admin_level) rang -FROM planet_osm_communes_statut -WHERE admin_level in (8,9) AND - boundary = 'administrative' AND - member_role = 'admin_centre'), -pp -AS -(SELECT osm_id, - ROUND(ST_X(ST_Transform(way,4326))::numeric,6) lon, - ROUND(ST_Y(ST_Transform(way,4326))::numeric,6) lat, - ST_Transform(way,4326) geometrie -FROM planet_osm_point pp -WHERE place != '') -SELECT cc.dep, - adm_weight.insee_com, - pop.name, - adm_weight.adm_weight, - pop.population, - ROUND((pop.population::numeric/1000),1) AS population_milliers, - CASE - WHEN pop.population < 1000 THEN 'village' - WHEN pop.population < 10000 THEN 'town' - ELSE 'city' - END AS type, - pp.lon, - pp.lat, - pp.geometrie -FROM adm_weight -JOIN cog_commune cc -ON cc.com = insee_com -LEFT OUTER JOIN pop -USING (insee_com) -JOIN pp -USING (osm_id) -WHERE pop.rang = 1 AND - cc.typecom != 'COMD'; - -TRUNCATE TABLE infos_communes; -INSERT INTO infos_communes -SELECT * -FROM tmp_infos_communes; \ No newline at end of file diff --git a/update_bases_adresses_locales.sh b/update_bases_adresses_locales.sh deleted file mode 100755 index 8dc27d2..0000000 --- a/update_bases_adresses_locales.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash -source config - -cd $BAL_CACHE_DIR -for dep in {01..19} 2A 2B {21..95} {971..974} 976 -# for dep in {01..06} 2A -#for dep in {24..95} {971..974} 976 -do - echo 'Département '$dep - wget -NS https://adresse.data.gouv.fr/data/adresses-locales/latest/csv/adresses-locales-$dep.csv.gz - quoted_dep=\'$dep'%'\' - psql -d osm -U cadastre -c "DELETE FROM bal_locales WHERE commune_code LIKE $quoted_dep;" - gunzip -c $BAL_CACHE_DIR/adresses-locales-$dep.csv.gz|psql -d osm -U cadastre -c "\copy bal_locales from stdin with csv header delimiter ';'" - sleep 1 -done diff --git a/update_cadastre_adresses.sh b/update_cadastre_adresses.sh deleted file mode 100755 index e7f9a14..0000000 --- a/update_cadastre_adresses.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash -source config - -cd $CADASTRE_CACHE_DIR -for dep in {01..19} 2A 2B {21..95} {971..974} 976 -# for dep in {01..03} 2A -#for dep in {24..95} {971..974} 976 -do - echo 'Département '$dep - wget -NS https://adresse.data.gouv.fr/data/adresses-cadastre/latest/csv/adresses-cadastre-$dep.csv.gz - quoted_dep=\'$dep'%'\' - psql -d osm -U cadastre -c "DELETE FROM bal_cadastre WHERE commune_code LIKE $quoted_dep;" - gunzip -c $CADASTRE_CACHE_DIR/adresses-cadastre-$dep.csv.gz|psql -d osm -U cadastre -c "\copy bal_cadastre from stdin with csv header delimiter ';'" - sleep 1 -done diff --git a/update_cadastre_batiments.sh b/update_cadastre_batiments.sh deleted file mode 100755 index 7c73915..0000000 --- a/update_cadastre_batiments.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -source config - -cd $DATA_DIR/bano_cache - -# for dep in {01..19} 2A 2B {21..95} {971..974} 976 -# for dep in {02..03} 2A -for dep in {04..95} {971..974} 976 2B -do - mkdir $dep - cd $dep - full_dep=`pwd` - wget https://cadastre.data.gouv.fr/data/etalab-cadastre/latest/shp/departements/$dep/cadastre-$dep-batiments-shp.zip -O $CADASTRE_CACHE_DIR/cadastre-$dep-batiments-shp.zip - unzip $CADASTRE_CACHE_DIR/cadastre-$dep-batiments-shp.zip - # reconstruction du shapefile avec ogr2ogr car corrompu pour shp2pgsql - ogr2ogr -overwrite -f 'ESRI Shapefile' batiments_ok.shp batiments.shp - shp2pgsql -s 2154:4326 -g geometrie -W LATIN1 batiments_ok.shp public.tmp_batiments$dep | psql -d cadastre -q - psql -d cadastre -f $BANO_DIR/sql/replace_batiments.sql -v schema_cible=$SCHEMA_CIBLE -v dept=$dep - zip -mT $CADASTRE_CACHE_DIR/cadastre-$dep-batiments-shp.zip batiments.* - sleep 1 - cd .. -done diff --git a/update_cadastre_lieux-dits.sh b/update_cadastre_lieux-dits.sh deleted file mode 100755 index b78ac8c..0000000 --- a/update_cadastre_lieux-dits.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash -source config - -cd $CADASTRE_CACHE_DIR -echo "Mise à jour des lieux-dits" > ld.log -date >> ld.log - -for DEPT in {01..19} 2A 2B {21..95} {971..974} 976 -# for DEPT in {01..03} 2A -do - URL="https://cadastre.data.gouv.fr/data/etalab-cadastre/latest/geojson/departements/${DEPT}/cadastre-${DEPT}-lieux_dits.json.gz" - wget -NS ${URL} -o wget.log - HTTP200=`grep '200 OK' wget.log|wc -l` - if (( ${HTTP200} )) - then - echo "Téléchargement des lieux-dits pour le département ${DEPT} OK" >> ld.log - ZIPFILE=`basename ${URL}` - JSONFILE=`basename ${ZIPFILE} .gz` - # echo $URL - # echo $ZIPFILE - # echo $JSONFILE - gzip -dfk ${ZIPFILE} - mv ${JSONFILE} lieux_dits.json - # ls -al - psql -d osm -U cadastre -c "DELETE FROM lieux_dits WHERE insee_com LIKE '${DEPT}%';" - ogr2ogr -append -nln lieux_dits -fieldmap 1,0,2,3 -f PostgreSQL PG:'user=cadastre dbname=osm' lieux_dits.json - rm lieux_dits.json - else - echo "Pas de source plus à jour pour ${DEPT}" >> ld.log - fi -done diff --git a/update_table_infos_communes.sh b/update_table_infos_communes.sh deleted file mode 100755 index 39e59ae..0000000 --- a/update_table_infos_communes.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -cd $SCRIPT_DIR - -psql -d osm -U cadastre -f ./sql/update_table_infos_communes.sql - From f072ad476a2b2e5a04b64d938b8f34699571322a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 15 Aug 2022 19:13:34 +0000 Subject: [PATCH 002/163] Autocommit pour toutes les bases --- bano/db.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bano/db.py b/bano/db.py index 270f940..16ab382 100644 --- a/bano/db.py +++ b/bano/db.py @@ -3,6 +3,8 @@ import psycopg2 import psycopg2.extras -# bano = psycopg2.connect(os.environ.get("BANO_PG", "dbname='cadastre' user='cadastre'")) +bano = psycopg2.connect(os.environ.get("BANO_PG", "dbname='bano' user='cadastre'")) +bano.autocommit = True bano_sources = psycopg2.connect(os.environ.get("BANO_PG_CACHE", "dbname='bano_sources' user='cadastre'")) +bano_sources.autocommit = True # psycopg2.extras.register_hstore(bano_cache) From 239a686ec02890b5ad39a779f9cf397b24c9e90e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 15 Aug 2022 19:27:06 +0000 Subject: [PATCH 003/163] Log --- bano/batch.py | 31 +++++++++++++++++++++++++++++ bano/sql/batch_start_log.sql | 3 +++ bano/sql/batch_stop_log.sql | 1 + bano/sql/create_table_base_bano.sql | 26 ++++++++++++++++++++++++ 4 files changed, 61 insertions(+) create mode 100644 bano/batch.py create mode 100644 bano/sql/batch_start_log.sql create mode 100644 bano/sql/batch_stop_log.sql create mode 100644 bano/sql/create_table_base_bano.sql diff --git a/bano/batch.py b/bano/batch.py new file mode 100644 index 0000000..e54c7d0 --- /dev/null +++ b/bano/batch.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# coding: UTF-8 + +import time +from . import db +from .sql import sql_get_data,sql_process + +def batch_start_log(etape,source=None,code_zone=None,nom_zone=None): + t = time.localtime() + date_debut = time.strftime('%d-%m-%Y %H:%M:%S',t) + timestamp_debut = round(time.mktime(t),0) + + champs = 'etape,timestamp_debut,date_debut' + values = f"'{etape}',{timestamp_debut},'{date_debut}'" + if source: + champs = f"{champs},source" + values = f"{values},'{source}'" + if code_zone: + champs = f"{champs},code_zone" + values = f"{values},'{code_zone}'" + if nom_zone: + champs = f"{champs},nom_zone" + values = f"{values},'{nom_zone}'" + return sql_get_data('batch_start_log',dict(champs=champs,values=values),db.bano)[0][0] + + +def batch_stop_log(id_batch,status): + t = time.localtime() + date_fin = time.strftime('%d-%m-%Y %H:%M:%S',t) + timestamp_fin = round(time.mktime(t),0) + sql_process('batch_stop_log',dict(id_batch=str(id_batch),date_fin=str(date_fin),timestamp_fin=str(timestamp_fin),status=str(status)),db.bano) diff --git a/bano/sql/batch_start_log.sql b/bano/sql/batch_start_log.sql new file mode 100644 index 0000000..135123e --- /dev/null +++ b/bano/sql/batch_start_log.sql @@ -0,0 +1,3 @@ +INSERT INTO batch(__champs__) +VALUES (__values__) +RETURNING id_batch; \ No newline at end of file diff --git a/bano/sql/batch_stop_log.sql b/bano/sql/batch_stop_log.sql new file mode 100644 index 0000000..045d56b --- /dev/null +++ b/bano/sql/batch_stop_log.sql @@ -0,0 +1 @@ +UPDATE batch SET date_fin = '__date_fin__',duree=(__timestamp_fin__ - timestamp_debut)::integer,ok = __status__ WHERE id_batch = __id_batch__; diff --git a/bano/sql/create_table_base_bano.sql b/bano/sql/create_table_base_bano.sql new file mode 100644 index 0000000..7f07c1e --- /dev/null +++ b/bano/sql/create_table_base_bano.sql @@ -0,0 +1,26 @@ +CREATE TABLE IF NOT EXISTS batch ( + id_batch serial, + etape text, + source text, + timestamp_debut bigint, + date_debut text, + date_fin text, + duree integer, + code_zone text, + nom_zone text, + ok boolean, + CONSTRAINT batch_pkey PRIMARY KEY (id_batch)); + +CREATE TABLE IF NOT EXISTS batch_historique( + id_batch integer, + etape text, + source text, + timestamp_debut bigint, + date_debut text, + date_fin text, + duree integer, + code_zone text, + nom_zone text, + ok boolean); + +GRANT SELECT ON ALL TABLES IN SCHEMA public TO public; \ No newline at end of file From eed989ac45805826d9f6ec13d171f6dc36149f86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 15 Aug 2022 19:28:45 +0000 Subject: [PATCH 004/163] Setup --- setup.cfg | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.cfg b/setup.cfg index 1803d52..3768d7b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,13 +1,13 @@ [metadata] name=bano -version=2.0.0-alpha +version=3.0.0-alpha [options] packages=find: install_requires= beautifulsoup4==4.6.3 - psycopg2-binary==2.9.3 - requests==2.21.0 + psycopg2-binary + requests lxml [options.extras_require] From c3323ed1f83e0230026ea91aeb04bcce90103129 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 15 Aug 2022 19:29:52 +0000 Subject: [PATCH 005/163] Autocommit --- bano/sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bano/sql.py b/bano/sql.py index f76a025..1dc1476 100644 --- a/bano/sql.py +++ b/bano/sql.py @@ -15,7 +15,6 @@ def sql_process(sqlfile,args,conn): with conn.cursor() as cur: cur.execute(q) - cur.execute('COMMIT') def sql_get_data(sqlfile,args,conn): sqlfile = (Path(SQLDIR) / sqlfile).with_suffix('.sql') @@ -26,6 +25,7 @@ def sql_get_data(sqlfile,args,conn): with conn.cursor() as cur: cur.execute(q) + return cur.fetchall() return None From 23c6e412a90c2787ce233c593ad87d5509c47904 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 15 Aug 2022 19:32:21 +0000 Subject: [PATCH 006/163] Process ban & topo --- bano/bin.py | 24 ++++++++++++++++++++++-- bano/sources/ban.py | 32 ++++++++++++++++---------------- bano/sources/topo.py | 37 ++++++++++++++++++++++++++----------- 3 files changed, 64 insertions(+), 29 deletions(-) diff --git a/bano/bin.py b/bano/bin.py index dbc3691..1b981b5 100644 --- a/bano/bin.py +++ b/bano/bin.py @@ -5,7 +5,8 @@ import sys from . import setup_db -from .sources import topo +from .sources import topo,ban +from .constants import DEPARTEMENTS def main(): @@ -20,11 +21,30 @@ def main(): ) subparser.set_defaults(func=setup_db.setup_bano_sources) + subparser = subparsers.add_parser( + "setup_db_bano", + description="Initialisation de la BD BANO", + ) + subparser.set_defaults(func=setup_db.setup_bano) + subparser = subparsers.add_parser( "charge_topo", description="Charge une version du fichier TOPO", ) - subparser.set_defaults(func=topo.import_to_pg) + subparser.set_defaults(func=topo.process_topo) + + subparser = subparsers.add_parser( + "charge_ban", + description="Charge une version des fichiers BAN", + ) + subparser.add_argument( + "departements", + type=str, + help="Départements à traiter", + nargs="*", + default=DEPARTEMENTS, + ) + subparser.set_defaults(func=ban.process_ban) args = parser.parse_args() diff --git a/bano/sources/ban.py b/bano/sources/ban.py index a37a623..05b8b02 100644 --- a/bano/sources/ban.py +++ b/bano/sources/ban.py @@ -10,18 +10,18 @@ import psycopg2 from ..constants import DEPARTEMENTS -from .. import db -from .. import db_helpers as dbh -from .. import outils_de_gestion as m -from .. import update_manager as um +from ..db import bano_sources +from ..sql import sql_process +from .. import batch as b +# from .. import update_manager as um -def process(departements, **kwargs): +def process_ban(departements, **kwargs): source = 'BAN' departements = set(departements) depts_inconnus = departements - set(DEPARTEMENTS) if depts_inconnus: raise ValueError(f"Départements inconnus : {depts_inconnus}") - um.set_csv_directory(um.get_directory_pathname()) + # um.set_csv_directory(um.get_directory_pathname()) for dept in sorted(departements): print(f"Processing {dept}") status = download(source, dept) @@ -35,33 +35,33 @@ def download(source, departement): headers['If-Modified-Since'] = formatdate(destination.stat().st_mtime) resp = requests.get(f'https://adresse.data.gouv.fr/data/ban/adresses-odbl/latest/csv/adresses-{departement}.csv.gz', headers=headers) + id_batch = b.batch_start_log('download source', 'BAN',departement) if resp.status_code == 200: - batch_id = m.batch_start_log(source,'downloadDeptBan',departement) with destination.open('wb') as f: f.write(resp.content) mtime = parsedate_to_datetime(resp.headers['Last-Modified']).timestamp() os.utime(destination, (mtime, mtime)) - m.batch_end_log(-1,batch_id) + b.batch_stop_log(id_batch,True) return True print(resp.status_code) + b.batch_stop_log(id_batch,False) return False def import_to_pg(source, departement, **kwargs): - batch_id = m.batch_start_log(source,'loadDeptBal',departement) + id_batch = b.batch_start_log('import source', 'BAN',departement) fichier_source = get_destination(departement) with gzip.open(fichier_source, mode='rt') as f: f.readline() # skip CSV headers - with db.bano_cache.cursor() as cur_insert: + with bano_sources.cursor() as cur_insert: try: cur_insert.execute(f"DELETE FROM ban_odbl WHERE code_insee LIKE '{departement+'%'}'") cur_insert.copy_from(f, "ban_odbl", sep=';', null='') - db.bano_cache.commit() - # um.save_bal_insee_list(um.get_directory_pathname(),departement) + # bano_sources.commit() + b.batch_stop_log(id_batch,True) except psycopg2.DataError as e: - db.bano_cache.reset() - m.batch_end_log(-1,batch_id) - + b.batch_stop_log(id_batch,False) + # bano_sources.reset() def get_destination(departement): try: @@ -73,4 +73,4 @@ def get_destination(departement): return cwd / f'adresses-{departement}.csv.gz' def update_bis_table(**kwargs): - dbh.process_sql(db.bano_cache,'update_table_rep_b_as_bis',dict()) \ No newline at end of file + sql_process('update_table_rep_b_as_bis',dict(),bano_sources) \ No newline at end of file diff --git a/bano/sources/topo.py b/bano/sources/topo.py index 72cf980..647c4af 100644 --- a/bano/sources/topo.py +++ b/bano/sources/topo.py @@ -8,19 +8,26 @@ from ..db import bano_sources from .. import helpers as h +from .. import batch as b CODE_DIR = get_const_code_dir() def fantoir9_vers_fantoir10(fantoir): insee = fantoir[0:5] - code_dir = CODE_DIR.get(insee,0) + code_dir = CODE_DIR.get(insee,'0') dept = fantoir[0:2] # pour les DOM le distingo se fait avec le code direction + if dept == '2A': + dept = '210' + if dept == '2B': + dept = '20' + code_dir = '1' #2B commune = insee[2:] code_voie = '0123456789ABCDEFGHIJKLMNOPQRSTVWXYZ'.index(fantoir[5:6]) numero = fantoir[6:] cle = 'ABCDEFGHJKLMNPRSTUVWXYZ'[(int(dept+code_dir+commune)*19+code_voie*11+int(numero))%23] + # print(f"{fantoir}{cle}") return(f"{fantoir}{cle}") def topo_voie_to_csv(ligne_brute): @@ -31,9 +38,8 @@ def topo_voie_to_csv(ligne_brute): for l in longueurs: champs.append((ligne_brute[:l]).strip()) ligne_brute = ligne_brute[l:] - # print(ligne_brute) # selection - champs = [champs[2]]+champs[4:6]+champs[11:] + champs = [champs[2]]+champs[4:6]+champs[11:-1] #insee champs.insert(0,champs[0][0:5]) # code dept @@ -44,8 +50,9 @@ def topo_voie_to_csv(ligne_brute): return champs -def import_to_pg(**kwargs): +def import_to_pg(): fichier_source = '/data/download/TOPO20211101.gz' + # fichier_source = '/data/download/corse.txt.gz' io_in_csv = io.StringIO() with gzip.open(fichier_source, mode='rt') as f: f.readline() # skip CSV headers @@ -54,11 +61,19 @@ def import_to_pg(**kwargs): continue # print(line) topo_voie_to_csv(line) - io_in_csv.write(','.join(topo_voie_to_csv(line))) - if i > 20: - break + io_in_csv.write('$'.join(topo_voie_to_csv(line))+'\n') # separateur $ car on trouve des virgules dans le contenu + # if i > 20: + # break + io_in_csv.seek(0) + with bano_sources.cursor() as cur_insert: + cur_insert.execute("TRUNCATE topo") + cur_insert.copy_from(io_in_csv, "topo", sep='$',null='') + bano_sources.commit() - with db.bano_sources.cursor() as cur_insert: - cur_insert.execute(f"DELETE FROM topo WHERE code_insee LIKE '{departement+'%'}'") - cur_insert.copy_from(f, "topo", sep=';', null='') - db.bano_cache.commit() +def process_topo(**kwargs): + id_batch = b.batch_start_log('import source', 'TOPO','France','France') + try: + import_to_pg() + b.batch_stop_log(id_batch,True) + except: + b.batch_stop_log(id_batch,False) From b3dfe9962794ea0c7b9df31311e9f3ae42dcae87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 15 Aug 2022 19:46:52 +0000 Subject: [PATCH 007/163] Setup --- bano/setup_db.py | 5 ++++- bano/sql/create_table_base_bano_sources.sql | 7 ++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/bano/setup_db.py b/bano/setup_db.py index dde27e1..e77e25f 100644 --- a/bano/setup_db.py +++ b/bano/setup_db.py @@ -5,4 +5,7 @@ from .sql import sql_process def setup_bano_sources(**kwargs): - sql_process('create_table_base_bano_sources',{},db.bano_sources) \ No newline at end of file + sql_process('create_table_base_bano_sources',{},db.bano_sources) + +def setup_bano(**kwargs): + sql_process('create_table_base_bano',{},db.bano) \ No newline at end of file diff --git a/bano/sql/create_table_base_bano_sources.sql b/bano/sql/create_table_base_bano_sources.sql index a6969a5..872d9bb 100644 --- a/bano/sql/create_table_base_bano_sources.sql +++ b/bano/sql/create_table_base_bano_sources.sql @@ -1,5 +1,4 @@ CREATE TABLE IF NOT EXISTS topo ( - code_pays character(2), code_dep character(3), code_insee character(5), fantoir10 character(10), @@ -12,8 +11,8 @@ CREATE TABLE IF NOT EXISTS topo ( type_voie character(1), mot_classant character varying(8)); CREATE INDEX IF NOT EXISTS idx_topo_dep ON topo(code_dep); -CREATE INDEX IF NOT EXISTS idx_fantoir_code_insee ON topo(code_insee); -CREATE INDEX IF NOT EXISTS idx_fantoir_fantoir10 ON topo(fantoir10); +CREATE INDEX IF NOT EXISTS idx_topo_code_insee ON topo(code_insee); +CREATE INDEX IF NOT EXISTS idx_topo_fantoir10 ON topo(fantoir10); CREATE TABLE IF NOT EXISTS ban_odbl ( id text, @@ -42,6 +41,4 @@ CREATE TABLE IF NOT EXISTS ban_odbl ( -- geometrie geometry (Point, 4326) DEFAULT (ST_Point(lon,lat))); CREATE INDEX IF NOT EXISTS idx_ban_odbl_code_insee ON ban_odbl(code_insee); - - GRANT SELECT ON ALL TABLES IN SCHEMA public TO public; \ No newline at end of file From c25c7f29f014440c1efe19b65fa0944b0c86675d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Tue, 23 Aug 2022 21:46:35 +0000 Subject: [PATCH 008/163] affichage des erreurs --- bano/batch.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bano/batch.py b/bano/batch.py index e54c7d0..a5574ea 100644 --- a/bano/batch.py +++ b/bano/batch.py @@ -29,3 +29,5 @@ def batch_stop_log(id_batch,status): date_fin = time.strftime('%d-%m-%Y %H:%M:%S',t) timestamp_fin = round(time.mktime(t),0) sql_process('batch_stop_log',dict(id_batch=str(id_batch),date_fin=str(date_fin),timestamp_fin=str(timestamp_fin),status=str(status)),db.bano) + if not status: + print(f"Erreur pendant le processus {id_batch}") From cd4c8db4b088b446b26b13b75cfbd1f6394f0a86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Tue, 23 Aug 2022 21:51:04 +0000 Subject: [PATCH 009/163] COG (WIP), table BAN --- bano/sql/create_table_base_bano_sources.sql | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/bano/sql/create_table_base_bano_sources.sql b/bano/sql/create_table_base_bano_sources.sql index 872d9bb..a83e4eb 100644 --- a/bano/sql/create_table_base_bano_sources.sql +++ b/bano/sql/create_table_base_bano_sources.sql @@ -14,7 +14,7 @@ CREATE INDEX IF NOT EXISTS idx_topo_dep ON topo(code_dep); CREATE INDEX IF NOT EXISTS idx_topo_code_insee ON topo(code_insee); CREATE INDEX IF NOT EXISTS idx_topo_fantoir10 ON topo(fantoir10); -CREATE TABLE IF NOT EXISTS ban_odbl ( +CREATE TABLE IF NOT EXISTS ban ( id text, id_fantoir text, numero text, @@ -39,6 +39,13 @@ CREATE TABLE IF NOT EXISTS ban_odbl ( certification_commune integer, cad_parcelles text); -- geometrie geometry (Point, 4326) DEFAULT (ST_Point(lon,lat))); -CREATE INDEX IF NOT EXISTS idx_ban_odbl_code_insee ON ban_odbl(code_insee); +CREATE INDEX IF NOT EXISTS idx_ban_code_insee ON ban(code_insee); + +CREATE TABLE IF NOT EXISTS suffixe ( + geometrie geometry, + insee_com character(5), + libelle_suffixe character varying(100) +); +CREATE INDEX IF NOT EXISTS gidx_suffixe ON suffixe USING GIST(geometrie); GRANT SELECT ON ALL TABLES IN SCHEMA public TO public; \ No newline at end of file From bd68e0d593f904ca9b7bd3fcbbf1530aeba447ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Tue, 23 Aug 2022 21:53:15 +0000 Subject: [PATCH 010/163] Modele adresse & adresses --- bano/models.py | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 bano/models.py diff --git a/bano/models.py b/bano/models.py new file mode 100644 index 0000000..8341bb1 --- /dev/null +++ b/bano/models.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python +# coding: UTF-8 + +# import re +from collections import defaultdict + + +from . import db +# from . import helpers as hp +from .sources import fantoir +from .sql import sql_get_data +# from . import core as c + + +class Adresse: + def __init__(self, x, y, num, source, voie=None, place=None, fantoir=None, code_postal=None, sous_commune_code=None, sous_commune_nom=None): + self.x = x + self.y = y + self.source = source + self.numero = num + self.voie = voie + self.place = place + self.fantoir = fantoir + self.code_postal = code_postal + self.sous_commune_code = sous_commune_code + self.sous_commune_nom = sous_commune_nom + + def _as_string(self): + return (f"source : {self.source}, numero : {self.numero}, voie : {self.voie}, place : {self.place}, fantoir : {self.fantoir}, code_postal:{self.code_postal}, sous_commune : {self.sous_commune_code} - {self.sous_commune_nom}") + + +class Adresses: + def __init__(self, code_insee): + self.a = {} + self.liste = [] + self.index_by_voie = defaultdict(list) + self.code_insee = code_insee + self.noms_de_voies = set() + + # def __contains__(self, item): + # return item in self.a + + def __getitem__(self, key): + return self.liste[key] + + # def __setitem__(self, key, value): + # self.a[key] = value + + def __iter__(self): + return iter(self.liste) + + def add_adresse(self,ad): + """ une adresses est considérée dans la commune si sans Fantoir ou avec un Fantoir de la commune""" + # if (ad.fantoir == None or hp.is_valid_fantoir(ad.fantoir, self.code_insee)) and hp.is_valid_housenumber(ad.numero): + self.liste.append(ad) + self.index_by_voie[ad.voie].append(len(self.liste)-1) + self.noms_de_voies.add(ad.voie) + + def charge_numeros_ban(self): + data = sql_get_data('numeros_ban',dict(code_insee=self.code_insee),db.bano_sources) + for numero, voie, lon, lat, code_postal, code_insee_ac, nom_ac in data: + self.add_adresse(Adresse(lon,lat,numero,'BAN',voie=voie,code_postal=code_postal,sous_commune_code=code_insee_ac,sous_commune_nom=nom_ac)) From 72cd1b9c1670a9e74c29b38b5be1f56c7acfdb90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Tue, 23 Aug 2022 21:53:58 +0000 Subject: [PATCH 011/163] COG (WIP) --- bano/sql/create_table_cog.sql | 57 +++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 bano/sql/create_table_cog.sql diff --git a/bano/sql/create_table_cog.sql b/bano/sql/create_table_cog.sql new file mode 100644 index 0000000..d7eab67 --- /dev/null +++ b/bano/sql/create_table_cog.sql @@ -0,0 +1,57 @@ +CREATE TABLE IF NOT EXISTS cog_commune ( + typecom character(4), + com character(5), + reg character(2), + dep character varying(3), + ctcd character(4), + arr character(4), + tncc character(1), + ncc text, + nccenr text, + libelle text, + can character(5), + comparent character(5)); +CREATE INDEX IF NOT EXISTS idx_cog_commune_com ON cog_commune(com); + +CREATE TABLE IF NOT EXISTS cog_canton ( + can character(5), + dep character varying(3), + reg character(2), + compct character(1), + burcentral character(5), + tncc character(1), + ncc text, + nccenr text, + libelle text, + typect character(1)); +CREATE INDEX IF NOT EXISTS idx_cog_canton_can ON cog_canton(can); + +CREATE TABLE IF NOT EXISTS cog_arrondissement ( + arr character(4), + dep character varying(3), + reg character(2), + cheflieu character(5), + tncc character(1), + ncc text, + nccenr text, + libelle text); +CREATE INDEX IF NOT EXISTS idx_cog_arrondissement_arr ON cog_arrondissement(arr); + +CREATE TABLE IF NOT EXISTS cog_departement ( + dep character varying(3), + reg character(2), + cheflieu character(5), + tncc character(1), + ncc text, + nccenr text, + libelle text); +CREATE INDEX IF NOT EXISTS idx_cog_departement_dep ON cog_departement(dep); + +CREATE TABLE IF NOT EXISTS cog_region ( + reg character(2), + cheflieu character(5), + tncc character(1), + ncc text, + nccenr text, + libelle text); +CREATE INDEX IF NOT EXISTS idx_cog_region_reg ON cog_region(reg); \ No newline at end of file From 443030b4340d732a2786a6996aa7127d85d2dc9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Tue, 23 Aug 2022 21:54:18 +0000 Subject: [PATCH 012/163] COG (WIP) --- bano/sources/cog.py | 64 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 bano/sources/cog.py diff --git a/bano/sources/cog.py b/bano/sources/cog.py new file mode 100644 index 0000000..2a5cff7 --- /dev/null +++ b/bano/sources/cog.py @@ -0,0 +1,64 @@ +# import csv +# import gzip +import os +# import subprocess +# from datetime import datetime +from email.utils import formatdate, parsedate_to_datetime +from pathlib import Path + +import requests +# import psycopg2 + +from ..db import bano +from ..sql import sql_process +from .. import batch as b +# from .. import update_manager as um + +def process_cog(**kwargs): + sql_process('create_table_cog',dict(),bano) + for fichier_cog in ['cog_commune']: + status = download(fichier_cog) + if status: + import_to_pg(fichier_cog) + +def download(fichier_cog): + destination = get_destination(fichier_cog) + headers = {} + if destination.exists(): + headers['If-Modified-Since'] = formatdate(destination.stat().st_mtime) + + resp = requests.get(f'https://www.data.gouv.fr/fr/datasets/r/6c1db35c-3ee3-42f8-80fb-d366e451cc30', headers=headers) + id_batch = b.batch_start_log('download source', 'COG commune','France') + if resp.status_code == 200: + with destination.open('wb') as f: + f.write(resp.content) + # mtime = parsedate_to_datetime(resp.headers['Last-Modified']).timestamp() + # os.utime(destination, (mtime, mtime)) + b.batch_stop_log(id_batch,True) + return True + print(resp.status_code) + b.batch_stop_log(id_batch,False) + return False + + +def import_to_pg(fichier_cog): + id_batch = b.batch_start_log('import source', f'COG {fichier_cog}','France') + fichier_source = get_destination(fichier_cog) + with open(fichier_source, mode='rt') as f: + f.readline() # skip CSV headers + with bano.cursor() as cur_insert: + try: + cur_insert.execute(f"TRUNCATE {fichier_cog}") + cur_insert.copy_from(f,fichier_cog, sep=',', null='') + b.batch_stop_log(id_batch,True) + except psycopg2.DataError as e: + b.batch_stop_log(id_batch,False) + +def get_destination(fichier_cog): + try: + cwd = Path(os.environ['CSV_DIR']) + except KeyError: + raise ValueError(f"La variable CSV n'est pas définie") + if not cwd.exists(): + raise ValueError(f"Le répertoire {cwd} n'existe pas") + return cwd / f'{fichier_cog}.csv' From 993da1b9f68b1268f323b8ba7a2a2b8c622b1daf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Tue, 23 Aug 2022 21:56:57 +0000 Subject: [PATCH 013/163] Suffixes --- bano/pre_process_suffixe.py | 119 +++++++++++++++++++++++++ bano/sql/numeros_ban.sql | 33 +++++++ bano/sql/update_table_rep_b_as_bis.sql | 29 ++++++ 3 files changed, 181 insertions(+) create mode 100644 bano/pre_process_suffixe.py create mode 100644 bano/sql/numeros_ban.sql create mode 100644 bano/sql/update_table_rep_b_as_bis.sql diff --git a/bano/pre_process_suffixe.py b/bano/pre_process_suffixe.py new file mode 100644 index 0000000..23f4fdb --- /dev/null +++ b/bano/pre_process_suffixe.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python +# coding: UTF-8 + +import re +import sys +import time +import os,os.path + +from . import batch as b +from . import db +from . import helpers as hp +from . import db_helpers as dh +from .models import Adresses + +def name_frequency(adresses): + freq = {} + noms_hors_1ere_passe = set() + for nom in adresses.noms_de_voies: + s = nom.split() + # noms avec suffixe entre () quelle que soit leur longueur + if '(' in nom and nom[-1] == ')': + k = f"({nom.split('(')[1]}" + if k not in freq: + freq[k] = {'nombre':1,'liste':set(nom)} + else: + freq[k]['nombre'] +=1 + freq[k]['liste'].add(nom) + elif len(s)>4: + k = ' '.join(s[-2:]) + if k not in freq: + freq[k] = {'nombre':1,'liste':set(nom)} + else: + freq[k]['nombre'] +=1 + freq[k]['liste'].add(nom) + elif len(s)>3: + k = nom.split()[-1] + if k not in freq: + freq[k] = {'nombre':1,'liste':set(nom)} + else: + freq[k]['nombre'] +=1 + freq[k]['liste'].add(nom) + else: + noms_hors_1ere_passe.add(nom) + + # 2eme passe sur les noms courts (surtout des lieux-dits) avec un suffixe + for nom in noms_hors_1ere_passe: + s = nom.split() + if len(s) > 1 and len(s) < 4: + k = nom.split()[-1] + if k in freq: + freq[k]['nombre'] +=1 + freq[k]['liste'].add(nom) + + return freq + +def select_street_names_by_name(freq): + sel = {} + mots = {} + for k in freq: + ks = k.split() + if freq[k]['nombre'] > 5 and len(ks) > 1: + mots[ks[0]] = 1 + mots[ks[1]] = 1 + sel[k] = freq[k] + for k in freq: + ks = k.split() + # un suffixe ne peut pas être un numero seul, cas dans les arrdts parisiens + if freq[k]['nombre'] > 5 and len(ks) == 1 and not k.isdigit() and not k in mots : + sel[k] = freq[k] + return sel + +def collect_adresses_points(selection, adresses): + kres = {} + for k in selection: + kres[k] = [] + for nom_voie in selection[k]['liste']: + s = 0 + max = 2 + for i in adresses.index_by_voie[nom_voie]: + add = adresses[i] + suffixe = k.replace("'","''") + kres[k].append(f"SELECT '{suffixe}' AS libelle_suffixe,'{adresses.code_insee}' AS code_insee,ST_BUFFER(ST_PointFromText('POINT({add.x} {add.y})',4326),0.0003,2) as g") + # s+=1 + # if s == max: break + return kres + +def load_suffixe_2_db(adds, code_insee, nom_commune): + with db.bano_sources.cursor() as cur: + for h in adds: + # Agde (34003): detection de 'Mer' abusif, pas d'autres suffixes dans la commune + if code_insee == '34003': + continue + print(f"{code_insee} - {nom_commune}......... {h}") + str_query = f"INSERT INTO suffixe SELECT ST_Transform(ST_SetSRID((ST_Dump(gu)).geom,4326),3857),code_insee,libelle_suffixe FROM (SELECT ST_Union(g) gu,code_insee,libelle_suffixe FROM({' UNION ALL '.join(adds[h])})a GROUP BY 2,3)a;" + cur.execute(str_query) + +def process(departements, **kwargs): + for dept in departements: + if hp.is_valid_dept(dept): + print(f"Traitement du dept {dept}") + with db.bano_sources.cursor() as cur: + str_query = f"DELETE FROM suffixe WHERE insee_com LIKE '{dept}%';" + cur.execute(str_query) + for code_insee, nom_commune in dh.get_insee_name_list_by_dept(dept): + # for code_insee, nom_commune in [['49244','Mauges']]: + debut_total = time.time() + # hp.display_insee_commune(code_insee, nom_commune) + adresses = Adresses(code_insee) + batch_id = b.batch_start_log('detecte suffixe',code_insee,nom_commune) + try: + adresses.charge_numeros_ban() + freq = name_frequency(adresses) + selection = select_street_names_by_name(freq) + adds = collect_adresses_points(selection, adresses) + load_suffixe_2_db(adds, code_insee, nom_commune) + b.batch_stop_log(batch_id,True) + except(e): + print(e) + b.batch_stop_log(batch_id,False) diff --git a/bano/sql/numeros_ban.sql b/bano/sql/numeros_ban.sql new file mode 100644 index 0000000..83b33a3 --- /dev/null +++ b/bano/sql/numeros_ban.sql @@ -0,0 +1,33 @@ +WITH +j +AS +(SELECT numero, + nom_voie, + lon, + lat, + rep, + CASE r.numero + WHEN b.numero THEN + CASE + WHEN rep = 'b' THEN 'bis' + WHEN rep = 't' THEN 'ter' + WHEN rep = 'q' THEN 'quater' + ELSE NULL + END + ELSE NULL + END as rep_bis, + code_postal, + code_insee_ancienne_commune, + nom_ancienne_commune +FROM ban_odbl b +LEFT OUTER JOIN rep_b_as_bis r +USING (id_fantoir,numero) +WHERE code_insee = '__code_insee__') +SELECT TRIM (BOTH FROM (numero||' '||COALESCE(rep_bis,rep,''))), + nom_voie, + lon, + lat, + code_postal, + code_insee_ancienne_commune, + nom_ancienne_commune +FROM j; diff --git a/bano/sql/update_table_rep_b_as_bis.sql b/bano/sql/update_table_rep_b_as_bis.sql new file mode 100644 index 0000000..f05bb2c --- /dev/null +++ b/bano/sql/update_table_rep_b_as_bis.sql @@ -0,0 +1,29 @@ +DROP TABLE IF EXISTS rep_b_as_bis CASCADE; +CREATE TABLE rep_b_as_bis +AS +SELECT id_fantoir, + numero +FROM ban_odbl +WHERE rep = 'b' + +EXCEPT + +(SELECT id_fantoir, + numero +FROM ban_odbl +WHERE rep = 'a' + +UNION + +SELECT id_fantoir, + numero +FROM ban_odbl +WHERE rep = 'c' + +UNION + +SELECT id_fantoir, + numero +FROM ban_odbl +WHERE rep = 'd'); +COMMIT; \ No newline at end of file From cc3791b8a2e5acb736ae8f3e47d24629e21389aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Tue, 23 Aug 2022 21:57:43 +0000 Subject: [PATCH 014/163] BAN, suffixes, COG, doc... --- bano/bin.py | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/bano/bin.py b/bano/bin.py index 1b981b5..8e5adf6 100644 --- a/bano/bin.py +++ b/bano/bin.py @@ -4,8 +4,9 @@ import argparse import sys +from . import pre_process_suffixe from . import setup_db -from .sources import topo,ban +from .sources import topo,ban,cog from .constants import DEPARTEMENTS @@ -17,25 +18,31 @@ def main(): subparser = subparsers.add_parser( "setup_db_bano_sources", - description="Initialisation de la BD des sources : OSM, BAN, TOPO", + help="Initialisation de la BD des sources : OSM, BAN, TOPO", ) subparser.set_defaults(func=setup_db.setup_bano_sources) subparser = subparsers.add_parser( "setup_db_bano", - description="Initialisation de la BD BANO", + help="Initialisation de la BD BANO", ) subparser.set_defaults(func=setup_db.setup_bano) subparser = subparsers.add_parser( "charge_topo", - description="Charge une version du fichier TOPO", + help="Charge une version du fichier TOPO", ) subparser.set_defaults(func=topo.process_topo) + subparser = subparsers.add_parser( + "charge_cog", + help="Charge une version des fichiers COG", + ) + subparser.set_defaults(func=cog.process_cog) + subparser = subparsers.add_parser( "charge_ban", - description="Charge une version des fichiers BAN", + help="Charge une version des fichiers BAN", ) subparser.add_argument( "departements", @@ -46,6 +53,26 @@ def main(): ) subparser.set_defaults(func=ban.process_ban) + subparser = subparsers.add_parser( + "update_bis_table", + help="Identifie les indices de répétition b,t,q assimilables à bis, ter, quater", + ) + subparser.set_defaults(func=ban.update_bis_table) + + subparser = subparsers.add_parser( + "pre_process_suffixe", + help="Détermine les zones où les noms dans le Cadastre sont suffixés", + ) + subparser.add_argument( + "departements", + type=str, + help="Départements à traiter", + nargs="*", + default=DEPARTEMENTS, + ) + subparser.set_defaults(func=pre_process_suffixe.process) + + args = parser.parse_args() try: From 2ecc6c194e4cfd3881f69487f7dd664cd626b615 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Fri, 2 Sep 2022 20:35:59 +0000 Subject: [PATCH 015/163] Imposm avec parametres en fichier et 4326 --- imposm.config | 7 +++++++ load_osm_france_db.sh | 23 +++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 imposm.config create mode 100755 load_osm_france_db.sh diff --git a/imposm.config b/imposm.config new file mode 100644 index 0000000..1ceb367 --- /dev/null +++ b/imposm.config @@ -0,0 +1,7 @@ +{ + "cachedir": "/data/bano_imposm_cache_v3", + "diffdir": "/data/download_v3", + "connection": "postgis://cadastre@localhost/bano_sources?prefix=NONE", + "mapping": "/data/project/bano_v3/bano.yml", + "srid":4326 +} diff --git a/load_osm_france_db.sh b/load_osm_france_db.sh new file mode 100755 index 0000000..fd0f10a --- /dev/null +++ b/load_osm_france_db.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +source $SCRIPT_DIR/config + +lockfile=${SCRIPT_DIR}/imposm.lock + +if test -f ${lockfile} +then + echo `date`" : Process deja en cours" >> $SCRIPT_DIR/cron.log + exit 0 +fi + +touch ${lockfile} + +cd $DOWNLOAD_DIR +wget -NS http://download.openstreetmap.fr/extracts/merge/france_metro_dom_com_nc.osm.pbf +imposm import -config $SCRIPT_DIR/imposm.config -read $DOWNLOAD_DIR/france_metro_dom_com_nc.osm.pbf -overwritecache -diff -write -dbschema-import public + +psql -d bano_sources -U cadastre -f $BANO_DIR/sql/finalisation.sql + +#cp $DOWNLOAD_DIR/last.state.txt $DOWNLOAD_DIR/state.txt +rm ${lockfile} From 7e717a175304bd67b1702ddf15e85ae8d8c436b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Fri, 2 Sep 2022 20:39:30 +0000 Subject: [PATCH 016/163] WIP rapprochement --- bano/bin.py | 24 +++ bano/core.py | 329 ------------------------------------------ bano/rapprochement.py | 11 ++ 3 files changed, 35 insertions(+), 329 deletions(-) delete mode 100755 bano/core.py create mode 100644 bano/rapprochement.py diff --git a/bano/bin.py b/bano/bin.py index 8e5adf6..2c35fb6 100644 --- a/bano/bin.py +++ b/bano/bin.py @@ -6,6 +6,8 @@ from . import pre_process_suffixe from . import setup_db +from . import rapprochement +from . import boite_a_outils from .sources import topo,ban,cog from .constants import DEPARTEMENTS @@ -59,6 +61,12 @@ def main(): ) subparser.set_defaults(func=ban.update_bis_table) + subparser = subparsers.add_parser( + "update_table_communes", + help="Met à jour les polygones administratifs OSM", + ) + subparser.set_defaults(func=boite_a_outils.maj_table_communes) + subparser = subparsers.add_parser( "pre_process_suffixe", help="Détermine les zones où les noms dans le Cadastre sont suffixés", @@ -72,6 +80,22 @@ def main(): ) subparser.set_defaults(func=pre_process_suffixe.process) + subparser = subparsers.add_parser( + "rapprochement", + help="Effectue l'appariement entre sources OSM ou BAN et TOPO", + ) + subparser.add_argument( + "--source", + choices=["OSM", "BAN"], + type=str, + help="Source des données à traiter", + ) + group = subparser.add_mutually_exclusive_group(required=True) + group.add_argument( + "--code_insee", type=str, help="Code INSEE de la commune à traiter" + ) + group.add_argument("--dept", type=str, help="Département à traiter (toutes les communes du dept sont traitées une par une)") + subparser.set_defaults(func=rapprochement.process) args = parser.parse_args() diff --git a/bano/core.py b/bano/core.py deleted file mode 100755 index b2c89ba..0000000 --- a/bano/core.py +++ /dev/null @@ -1,329 +0,0 @@ -#!/usr/bin/env python -# coding: UTF-8 - -import os,os.path -import re -import sys -import time -import xml.etree.ElementTree as ET - -from . import constants, db -from . import helpers as hp -from . import db_helpers as dbhp -from . import log_2_file as log -from .models import Adresse, Adresses, Node, Pg_hsnr -from .outils_de_gestion import batch_start_log -from .outils_de_gestion import batch_end_log -# from .outils_de_gestion import age_etape_dept -from .sources import fantoir - -os.umask(0000) - - -def add_fantoir_to_hsnr(): - for v in adresses: - if v in fantoir.mapping.fantoir: - adresses[v]['fantoirs']['FANTOIR'] = fantoir.mapping.fantoir[v] - adresses[v]['voies']['FANTOIR'] = fantoir.mapping.code_fantoir_vers_nom_fantoir[fantoir.mapping.fantoir[v]] - else: - if 'OSM' in adresses[v]['fantoirs']: - if adresses[v]['fantoirs']['OSM'] in fantoir.mapping.code_fantoir_vers_nom_fantoir: - adresses[v]['voies']['FANTOIR'] = fantoir.mapping.code_fantoir_vers_nom_fantoir[adresses[v]['fantoirs']['OSM']] - -def append_suffixe(name,suffixe): - res = name - if suffixe: - name_norm = hp.normalize(name) - suffixe_norm = hp.normalize(suffixe) - ln = len(name_norm) - ls = len(suffixe) - if ln > ls: - if name[-ls:] != suffixe: - res = name+' '+suffixe - else: - res = name+' '+suffixe - return res - -def get_last_base_update(query_name,insee_com): - resp = 0 - str_query = "SELECT timestamp_maj FROM {} WHERE insee_com = '{}' LIMIT 1;".format(query_name,insee_com) - cur = db.bano.cursor() - cur.execute(str_query) - for l in cur : - resp = l[0] - if resp == 0 : - etape_dept = 'cache_dept_'+query_name - if dbhp.age_etape_dept(etape_dept,get_short_code_dept_from_insee(insee_com)) < 3600 : - resp = round(time.time()) - cur.close() - return resp - -def get_data_from_pg(query_name,insee_com): - cur_cache = db.bano_cache.cursor() - str_query = "DELETE FROM {} WHERE insee_com = '{}';".format(query_name,insee_com) - with open(os.path.join(os.path.dirname(os.path.abspath(__file__)),'sql/{:s}.sql'.format(query_name)),'r') as fq: - str_query+=fq.read().replace('__com__',insee_com) - - str_query+= "SELECT * FROM {} WHERE insee_com = '{}';".format(query_name,insee_com) - cur_cache.execute(str_query) - - res = [] - for l in cur_cache : - res.append(list(l)) - cur_cache.close() - return res - -def get_data_from_pg_direct(query_name,insee_com): - res = [] - with db.bano_cache.cursor() as cur_cache: - with open(os.path.join(os.path.dirname(os.path.abspath(__file__)),'sql/{:s}_nocache.sql'.format(query_name)),'r') as fq: - str_query = fq.read().replace('__com__',insee_com) - cur_cache.execute(str_query) - - for l in cur_cache : - res.append(list(l)) - return res - -def get_tags(xmlo): - dtags = {} - for tg in xmlo.iter('tag'): - dtags[tg.get('k')] = tg.get('v') - return dtags - -def has_addreses_with_suffix(insee): - res = False - str_query = 'SELECT count(*) FROM suffixe where insee_com = \'{:s}\';'.format(insee) - cur = db.bano.cursor() - cur.execute(str_query) - for c in cur: - if c[0]> 0 : - res = True - cur.close() - return res - -def load_ban_hsnr(code_insee): - dict_node_relations = {} - data = get_data_from_pg_direct('ban_hsnr',code_insee) - for id, housenumber, name, lon, lat,cp in data: - if not name or len(name) < 2 or not lon: - continue - adresses.register(name) - if not id in dict_node_relations: - dict_node_relations[id] = [] - dict_node_relations[id].append(hp.normalize(name)) - if hp.is_valid_housenumber(housenumber): - adresses.add_adresse(Adresse(Node({'id':id,'lon':lon,'lat':lat},{}),housenumber,name,'',cp), 'BAN') - -def load_bases_adresses_locales_hsnr(code_insee): - dict_node_relations = {} - with db.bano_cache.cursor() as cur: - cur.execute(f"SELECT cle_interop,TRIM (BOTH FROM (numero||' '||COALESCE(suffixe,''))), voie_nom, long, lat FROM bal_locales WHERE commune_code = '{code_insee}';") - for cle_interop, housenumber, name, lon, lat in cur: - if not name or len(name) < 2 or not lon: - continue - adresses.register(name) - if not cle_interop in dict_node_relations: - dict_node_relations[cle_interop] = [] - dict_node_relations[cle_interop].append(hp.normalize(name)) - if hp.is_valid_housenumber(housenumber): - adresses.add_adresse(Adresse(Node({'id':cle_interop,'lon':lon,'lat':lat},{}),housenumber,name,'',''), 'BAL') - -def load_hsnr_bbox_from_pg_osm(insee_com): - data = get_data_from_pg_direct('hsnr_bbox_insee',insee_com) - for x, y, provenance, osm_id, numero, voie, tags, *others in data: - for num in numero.translate(str.maketrans(',à;-/*','$$$$$$')).split('$'): - oa = Pg_hsnr([x, y, provenance, osm_id, num.rstrip().lstrip(), voie, tags], insee_com) - if oa.fantoir == '' or not oa.voie : - continue - adresses.register(oa.voie) - adresses.add_adresse(Adresse(Node({'id':oa.osm_id,'lon':oa.x,'lat':oa.y},{}),oa.numero,oa.voie,oa.fantoir,oa.code_postal), 'OSM') - -def load_hsnr_from_pg_osm(insee_com): - data = get_data_from_pg_direct('hsnr_insee', insee_com) - for x, y, provenance, osm_id, numero, voie, tags, *others in data: - for num in numero.translate(str.maketrans(',à;-/*','$$$$$$')).split('$'): - oa = Pg_hsnr([x, y, provenance, osm_id, num.rstrip().lstrip(), voie, tags], insee_com) - if not oa.voie : - continue - adresses.register(oa.voie) - adresses.add_adresse(Adresse(Node({'id':oa.osm_id,'lon':oa.x,'lat':oa.y},{}),oa.numero,oa.voie,oa.fantoir,oa.code_postal), 'OSM') - -def load_highways_bbox_from_pg_osm(insee_com): - data = get_data_from_pg_direct('highway_suffixe_insee',insee_com) - for name, fantoir_unique, fantoir_gauche, fantoir_droit, suffixe, *others in data: - if fantoir_unique and hp.is_valid_fantoir(fantoir_unique, insee_com): - code_fantoir = fantoir_unique - elif fantoir_gauche and hp.is_valid_fantoir(fantoir_gauche, insee_com): - code_fantoir = fantoir_gauche - elif fantoir_droit and hp.is_valid_fantoir(fantoir_droit, insee_com): - code_fantoir = fantoir_droit - else: - continue - if not name or len(name) < 2: - continue - name_suffixe = append_suffixe(name,suffixe) - adresses.register(name_suffixe) - cle = hp.normalize(name_suffixe) - if adresses.has_already_fantoir(cle,'OSM'): - continue - adresses.add_fantoir(cle,code_fantoir,'OSM') - adresses.add_voie(name_suffixe,'OSM',name) - -def load_highways_from_pg_osm(insee_com): - data = get_data_from_pg_direct('highway_suffixe_insee',insee_com) - for name, fantoir_unique, fantoir_gauche, fantoir_droit, suffixe, *others in data: - if not name or len(name) < 2: - continue - name_suffixe = append_suffixe(name,suffixe) - adresses.register(name_suffixe) - cle = hp.normalize(name_suffixe) - if adresses.has_already_fantoir(cle,'OSM'): - continue - if fantoir_unique and hp.is_valid_fantoir(fantoir_unique, insee_com): - code_fantoir = fantoir_unique - elif fantoir_gauche and hp.is_valid_fantoir(fantoir_gauche, insee_com): - code_fantoir = fantoir_gauche - elif fantoir_droit and hp.is_valid_fantoir(fantoir_droit, insee_com): - code_fantoir = fantoir_droit - else: - code_fantoir = '' - if code_fantoir != '': - adresses.add_fantoir(cle,code_fantoir,'OSM') - fantoir.mapping.add_fantoir_name(code_fantoir,name_suffixe,'OSM') - adresses.add_voie(name_suffixe,'OSM',name) - -def load_highways_relations_bbox_from_pg_osm(code_insee): - data = get_data_from_pg_direct('highway_relation_suffixe_insee_bbox', code_insee) - for name, tags, suffixe, insee, *others in data: - fantoir = '' - if 'ref:FR:FANTOIR' in tags and hp.is_valid_fantoir(tags['ref:FR:FANTOIR'], code_insee): - fantoir = tags['ref:FR:FANTOIR'] - else: - continue - if not name or len(name) < 2: - continue - name_suffixe = append_suffixe(name,suffixe or '') - adresses.register(name_suffixe) - cle = hp.normalize(name_suffixe) - if adresses.has_already_fantoir(cle,'OSM'): - continue - adresses.add_voie(name_suffixe,'OSM',name) - -def load_highways_relations_from_pg_osm(code_insee): - data = get_data_from_pg_direct('highway_relation_suffixe_insee', code_insee) - for name, tags, suffixe, *others in data: - if not name or len(name) < 2: - continue - name_suffixe = append_suffixe(name,suffixe or '') - adresses.register(name_suffixe) - cle = hp.normalize(name_suffixe) - if adresses.has_already_fantoir(cle,'OSM'): - continue - if tags.get('ref:FR:FANTOIR') and hp.is_valid_fantoir(tags.get('ref:FR:FANTOIR'), code_insee): - code_fantoir = tags.get('ref:FR:FANTOIR') - else: - code_fantoir = '' - if code_fantoir != '': - fantoir.mapping.add_fantoir_name(code_fantoir,name,'OSM') - adresses.add_voie(name_suffixe,'OSM',name) - -def load_point_par_rue_from_pg_osm(code_insee): - data = get_data_from_pg_direct('point_par_rue_insee',code_insee) - for lon, lat, name, *others in data: - if not name or len(name) < 2: - continue - adresses.register(name) - cle = hp.normalize(name) - adresses[cle]['point_par_rue'] = [lon, lat] - if 'OSM' not in adresses.a[cle]['voies']: - adresses.add_voie(name,'OSM') - if 'OSM' not in adresses[cle]['fantoirs']: - if cle in fantoir.mapping.fantoir: - adresses.add_fantoir(cle,fantoir.mapping.fantoir[cle],'OSM') - -def load_point_par_rue_complement_from_pg_osm(insee_com): - data = get_data_from_pg_direct('point_par_rue_complement_insee',insee_com) - for l in data: - name = l[2] - if not name or len(name) < 2: - continue - code_fantoir = l[3] - if code_fantoir and code_fantoir[0:5] != insee_com: - continue - if code_fantoir and len(code_fantoir) != 10: - continue - adresses.register(name) - cle = hp.normalize(name) - adresses[cle]['point_par_rue'] = l[0:2] - if code_fantoir: - adresses.add_fantoir(cle,code_fantoir,'OSM') - if 'OSM' not in adresses.a[cle]['voies']: - adresses.add_voie(name,'OSM') - if 'OSM' not in adresses[cle]['fantoirs']: - if cle in fantoir.mapping.fantoir: - adresses.add_fantoir(cle,fantoir.mapping.fantoir[cle],'OSM') - -def load_type_highway_from_pg_osm(insee_com): - data = get_data_from_pg('type_highway_insee',insee_com) - for name, highway_type, *_ in data: - adresses.register(name) - cle = hp.normalize(name) - if highway_type in constants.HIGHWAY_TYPES_INDEX: - adresses.add_highway_index(cle,constants.HIGHWAY_TYPES_INDEX[highway_type]) - -def addr_2_db(code_insee, source, **kwargs): - # global batch_id - global code_dept - global nodes,ways,adresses - # global schema_cible - - # schema_cible = 'public' - # if ('SCHEMA_CIBLE' in os.environ) : schema_cible = (os.environ['SCHEMA_CIBLE']) - - debut_total = time.time() - - adresses = Adresses(code_insee) - - fantoir.mapping.reset() - fantoir.mapping.load(code_insee) - - code_dept = hp.get_code_dept_from_insee(code_insee) - - batch_id = batch_start_log(source,'loadCumul',code_insee) - - if source == 'BAL': - load_bases_adresses_locales_hsnr(code_insee) - if source == 'BAN': - load_ban_hsnr(code_insee) - if source == 'CADASTRE': - adresses.load_cadastre_hsnr() - if source == 'OSM': - load_hsnr_from_pg_osm(code_insee) - load_hsnr_bbox_from_pg_osm(code_insee) - load_type_highway_from_pg_osm(code_insee) - if len(adresses.a) != 0: - load_highways_from_pg_osm(code_insee) - load_highways_relations_from_pg_osm(code_insee) - load_highways_bbox_from_pg_osm(code_insee) - load_highways_relations_bbox_from_pg_osm(code_insee) - add_fantoir_to_hsnr() - load_point_par_rue_from_pg_osm(code_insee) - load_point_par_rue_complement_from_pg_osm(code_insee) - nb_rec = adresses.save(source, code_dept) - batch_end_log(nb_rec,batch_id) - -def process(source, code_insee, depts, France, **kwargs): - liste_codes_insee = [] - if code_insee: - liste_codes_insee = dbhp.get_insee_name(code_insee) - if not liste_codes_insee: - for d in (depts or France): - liste_codes_insee += dbhp.get_insee_name_list_by_dept(d) - logfile = log.start_log_to_file(source,'process_commune','00') - for code_insee, nom in liste_codes_insee: - # print(f"{code_insee} - {nom}") - try: - addr_2_db(code_insee, source) - except: - log.write_log_to_file(logfile,f"Erreur pour {code_insee} - {nom}") - log.end_log_to_file(logfile) diff --git a/bano/rapprochement.py b/bano/rapprochement.py new file mode 100644 index 0000000..3f3ce64 --- /dev/null +++ b/bano/rapprochement.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +# coding: UTF-8 + +from .models import Adresses, Topo + +def process(source,code_insee,dept,**kwargs): + # topo = Topo(code_insee) + # topo._print('CO') + adresses = Adresses(code_insee) + adresses.charge_numeros_ban() + adresses._print('Hell') \ No newline at end of file From 96c1520d14a1a009bdaa489fc7d13279566f6a61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Fri, 2 Sep 2022 20:40:40 +0000 Subject: [PATCH 017/163] renommage --- bano/sql/{numeros_ban.sql => charge_ban_commune.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename bano/sql/{numeros_ban.sql => charge_ban_commune.sql} (100%) diff --git a/bano/sql/numeros_ban.sql b/bano/sql/charge_ban_commune.sql similarity index 100% rename from bano/sql/numeros_ban.sql rename to bano/sql/charge_ban_commune.sql From 441d6fc805c4532fd7b8bf5c8528d73c68c128b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Fri, 2 Sep 2022 20:49:40 +0000 Subject: [PATCH 018/163] Chargement TOPO --- bano/sql/charge_topo_commune.sql | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 bano/sql/charge_topo_commune.sql diff --git a/bano/sql/charge_topo_commune.sql b/bano/sql/charge_topo_commune.sql new file mode 100644 index 0000000..ee776d5 --- /dev/null +++ b/bano/sql/charge_topo_commune.sql @@ -0,0 +1,18 @@ +WITH +t +AS +(SELECT fantoir10, + TRIM (BOTH FROM (COALESCE(nature_voie,'')||' '||libelle_voie)) AS nom +FROM topo +WHERE code_insee = '__code_insee__' AND + caractere_annul IS NULL), +tr +AS +(SELECT fantoir10, + nom, + rank() OVER (PARTITION BY nom ORDER BY fantoir10) rang +FROM t) +SELECT fantoir10, + nom +FROM tr +WHERE rang = 1; \ No newline at end of file From d71f06ca07677e63210ea1e4a36c1a0bd518a1b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Fri, 2 Sep 2022 20:50:19 +0000 Subject: [PATCH 019/163] Tables des communes en 4326 --- bano/sql/create_table_polygones_communes.sql | 37 ++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 bano/sql/create_table_polygones_communes.sql diff --git a/bano/sql/create_table_polygones_communes.sql b/bano/sql/create_table_polygones_communes.sql new file mode 100644 index 0000000..2c80826 --- /dev/null +++ b/bano/sql/create_table_polygones_communes.sql @@ -0,0 +1,37 @@ +CREATE TABLE IF NOT EXISTS polygones_insee ( + geometrie geometry (Geometry, 4326), + insee_com character(5), + nom text, + admin_level integer); +CREATE INDEX IF NOT EXISTS gidx_polygones_insee ON polygones_insee USING GIST (geometrie); +CREATE INDEX IF NOT EXISTS idx_polygones_insee_insee_com ON polygones_insee(insee_com); + +TRUNCATE TABLE polygones_insee; +INSERT INTO polygones_insee +SELECT way, + "ref:INSEE", + name, + admin_level +FROM planet_osm_polygon +WHERE boundary='administrative' AND + admin_level in (8,9) AND + name != ''; + +CREATE TABLE IF NOT EXISTS polygones_insee_a9 ( + geometrie geometry (Geometry, 4326), + insee_com character(5), + nom text, + insee_a8 character(5)); +CREATE INDEX IF NOT EXISTS gidx_polygones_insee_a9 ON polygones_insee_a9 USING GIST (geometrie); +CREATE INDEX IF NOT EXISTS idx_polygones_insee_a9_insee_com ON polygones_insee_a9(insee_com); +CREATE INDEX IF NOT EXISTS idx_polygones_insee_a9_insee_a8 ON polygones_insee_a9(insee_a8); + +TRUNCATE TABLE polygones_insee_a9; +INSERT INTO polygones_insee_a9 +SELECT a9.geometrie, + a9.insee_com, + a9.nom, + a8.insee_com +FROM (SELECT * FROM polygones_insee WHERE admin_level = 9) a9 +JOIN (SELECT * FROM polygones_insee WHERE admin_level = 8) a8 +ON ST_Contains(a8.geometrie,a9.geometrie); From 9eeba71b07d67e8ee0e73ab4aee6864e59d62b92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Fri, 2 Sep 2022 21:14:30 +0000 Subject: [PATCH 020/163] Modele TOPO --- bano/models.py | 52 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/bano/models.py b/bano/models.py index 8341bb1..23b2835 100644 --- a/bano/models.py +++ b/bano/models.py @@ -31,10 +31,9 @@ def _as_string(self): class Adresses: def __init__(self, code_insee): - self.a = {} - self.liste = [] - self.index_by_voie = defaultdict(list) self.code_insee = code_insee + self.liste = [] + self.index_voie = defaultdict(list) self.noms_de_voies = set() # def __contains__(self, item): @@ -49,14 +48,57 @@ def __getitem__(self, key): def __iter__(self): return iter(self.liste) + def _print(self,pattern=None): + for a in self: + if not pattern or pattern in a._as_string(): + print(a._as_string()) + + def add_adresse(self,ad): """ une adresses est considérée dans la commune si sans Fantoir ou avec un Fantoir de la commune""" # if (ad.fantoir == None or hp.is_valid_fantoir(ad.fantoir, self.code_insee)) and hp.is_valid_housenumber(ad.numero): self.liste.append(ad) - self.index_by_voie[ad.voie].append(len(self.liste)-1) + self.index_voie[ad.voie].append(len(self.liste)-1) self.noms_de_voies.add(ad.voie) def charge_numeros_ban(self): - data = sql_get_data('numeros_ban',dict(code_insee=self.code_insee),db.bano_sources) + data = sql_get_data('charge_ban_commune',dict(code_insee=self.code_insee),db.bano_sources) for numero, voie, lon, lat, code_postal, code_insee_ac, nom_ac in data: self.add_adresse(Adresse(lon,lat,numero,'BAN',voie=voie,code_postal=code_postal,sous_commune_code=code_insee_ac,sous_commune_nom=nom_ac)) + + def charge_numeros_osm(self): + return None + + def charge_noms_osm(self): + data = sql_get_data('charge_ban_commune',dict(code_insee=self.code_insee),db.bano_sources) + return None + + +class Topo: + def __init__(self, code_insee): + self.code_insee = code_insee + self.topo = {} + + # self.index_by_nom_normalise = defaultdict(list) + + self.charge_topo() + + def __iter__(self): + return iter(self.topo.items()) + + # def __contains__(self, item): + # return item in self.a + + def __getitem__(self, key): + return self.topo[key] + + def _print(self,pattern=None): + for k,v in self: + if not pattern or pattern in v: + print(f"{k} : {v}") + + def charge_topo(self): + data = sql_get_data('charge_topo_commune',dict(code_insee=self.code_insee),db.bano_sources) + for fantoir,nom in data: + self.topo[fantoir] = nom + self.topo[nom] = fantoir From b6c5b164a5897418a9b614eb0dd38216ca41347c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Fri, 2 Sep 2022 21:15:24 +0000 Subject: [PATCH 021/163] =?UTF-8?q?Amorce=20de=20boite=20=C3=A0=20outils?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bano/boite_a_outils.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100755 bano/boite_a_outils.py diff --git a/bano/boite_a_outils.py b/bano/boite_a_outils.py new file mode 100755 index 0000000..e03b518 --- /dev/null +++ b/bano/boite_a_outils.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python +# coding: UTF-8 + +from . import db +from .sql import sql_process +from . import batch as b + +def maj_table_communes(**kwargs): + batch_id = b.batch_start_log('maj_table_communes','France','France') + try: + sql_process('create_table_polygones_communes',dict(),db.bano_sources) + b.batch_stop_log(batch_id,True) + except: + b.batch_stop_log(batch_id,False) From 19aabd0c83f0a544eadc12d4b17a8485ec78909a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Fri, 2 Sep 2022 21:16:47 +0000 Subject: [PATCH 022/163] Suffixes en 4326 --- bano/pre_process_suffixe.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bano/pre_process_suffixe.py b/bano/pre_process_suffixe.py index 23f4fdb..e504c83 100644 --- a/bano/pre_process_suffixe.py +++ b/bano/pre_process_suffixe.py @@ -76,12 +76,12 @@ def collect_adresses_points(selection, adresses): for nom_voie in selection[k]['liste']: s = 0 max = 2 - for i in adresses.index_by_voie[nom_voie]: + for i in adresses.index_voie[nom_voie]: add = adresses[i] suffixe = k.replace("'","''") kres[k].append(f"SELECT '{suffixe}' AS libelle_suffixe,'{adresses.code_insee}' AS code_insee,ST_BUFFER(ST_PointFromText('POINT({add.x} {add.y})',4326),0.0003,2) as g") - # s+=1 - # if s == max: break + s+=1 + if s == max: break return kres def load_suffixe_2_db(adds, code_insee, nom_commune): @@ -91,7 +91,7 @@ def load_suffixe_2_db(adds, code_insee, nom_commune): if code_insee == '34003': continue print(f"{code_insee} - {nom_commune}......... {h}") - str_query = f"INSERT INTO suffixe SELECT ST_Transform(ST_SetSRID((ST_Dump(gu)).geom,4326),3857),code_insee,libelle_suffixe FROM (SELECT ST_Union(g) gu,code_insee,libelle_suffixe FROM({' UNION ALL '.join(adds[h])})a GROUP BY 2,3)a;" + str_query = f"INSERT INTO suffixe SELECT ST_SetSRID((ST_Dump(gu)).geom,4326),code_insee,libelle_suffixe FROM (SELECT ST_Union(g) gu,code_insee,libelle_suffixe FROM({' UNION ALL '.join(adds[h])})a GROUP BY 2,3)a;" cur.execute(str_query) def process(departements, **kwargs): From ca64b2af6deff1198906d926e4c09a46255f9df4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Fri, 2 Sep 2022 21:27:50 +0000 Subject: [PATCH 023/163] Programme v3 --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index fe1e687..982829d 100644 --- a/README.md +++ b/README.md @@ -2,3 +2,9 @@ bano ==== Différents outils pour la constitution de la Base Adresses Nationale Ouverte + +v3 à venir : +- unification des adresses voies & lieux-dits +- gestion des communes fusionnées +- remplacement de FANTOIR par TOPO +- etc \ No newline at end of file From db1eb975b8bb14b83049d61427deb37095322053 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Tue, 6 Sep 2022 21:47:42 +0000 Subject: [PATCH 024/163] 1ers tests de rapprochement (WIP) --- bano/helpers.py | 181 +++++++++++++++++- bano/models.py | 44 +++-- bano/sql/charge_topo_commune.sql | 3 +- bano/sql/create_table_base_bano.sql | 27 +++ .../suppression_adresses_commune_source.sql | 3 + 5 files changed, 242 insertions(+), 16 deletions(-) create mode 100644 bano/sql/suppression_adresses_commune_source.sql diff --git a/bano/helpers.py b/bano/helpers.py index 7344f2e..770ef24 100644 --- a/bano/helpers.py +++ b/bano/helpers.py @@ -1,6 +1,179 @@ +from pathlib import Path + +from . import constants + + +def find_cp_in_tags(tags): + return tags.get('addr:postcode') or tags.get('postal_code') or '' + +def escape_quotes(s): + return s.replace('\'','\'\'') + +def remove_quotes(s): + return s.replace('\'','') + +def remove_quotes_on_null(s): + return s.replace("'null'","null") + +def replace_single_quotes_with_double(s): + return s.replace('\'','"') + +def format_toponyme(s): + a_s = s.replace('\'',' ').split(' ') + + # Accents + dic_replace_accents = {} + dic_replace_accents['DERRIERE'] = u'DERRIÈRE' + dic_replace_accents['EGLISE'] = u'ÉGLISE' + dic_replace_accents['ILE'] = u'ÎLE' + dic_replace_accents['ILOT'] = u'ÎLOT' + dic_replace_accents['PRE'] = u'PRÉ' + + for m in range(0,len(a_s)): + if a_s[m] in dic_replace_accents: + a_s[m] = dic_replace_accents[a_s[m]] + + # Capitalisation + a_s = [a.capitalize() for a in a_s] + + # Minuscules + dic_replace_hors_premier_mot = {} + dic_replace_hors_premier_mot['Au'] = 'au' + dic_replace_hors_premier_mot['Aux'] = 'aux' + dic_replace_hors_premier_mot['D'] = 'd\'' + dic_replace_hors_premier_mot['De'] = 'de' + dic_replace_hors_premier_mot['Des'] = 'des' + dic_replace_hors_premier_mot['Du'] = 'du' + dic_replace_hors_premier_mot['Et'] = 'et' + dic_replace_hors_premier_mot['L'] = 'l\'' + dic_replace_hors_premier_mot['La'] = 'la' + dic_replace_hors_premier_mot['Le'] = 'le' + dic_replace_hors_premier_mot['Les'] = 'les' + dic_replace_hors_premier_mot['Un'] = 'un' + dic_replace_hors_premier_mot['Une'] = 'une' + + if len(a_s) > 1: + for m in range(1,len(a_s)): + if a_s[m] in dic_replace_hors_premier_mot: + a_s[m] = dic_replace_hors_premier_mot[a_s[m]] + + # Appostrophes initiale + dic_ajoute_apostrophe = {} + dic_ajoute_apostrophe['d'] = 'd\'' + dic_ajoute_apostrophe['D'] = 'D\'' + dic_ajoute_apostrophe['l'] = 'l\'' + dic_ajoute_apostrophe['L'] = 'L\'' + + if a_s[0] in dic_ajoute_apostrophe: + a_s[0] = dic_ajoute_apostrophe[a_s[0]] + + s = ' '.join(a_s).replace('\' ','\'') + if len(s.strip())>1 and s.strip()[-1] == '\'': + s = s.strip()[0:-1] + return s + +def get_nb_parts(s): + return len(s.split()) + + +def get_part_debut(s,nb_parts): + resp = '' + if get_nb_parts(s) > nb_parts: + resp = ' '.join(s.split()[0:nb_parts]) + return resp + + +def is_valid_housenumber(hsnr): + if hsnr: + return len(hsnr) <= 11 + return False + +def is_valid_dept(dept): + return dept in constants.DEPARTEMENTS + def get_code_dept_from_insee(code_insee): - return code_insee[0:3] if code_insee[0:2] == '97' else code_insee[0:2] + code_dept = code_insee[0:2] + if code_dept == '97': + code_dept = code_insee[0:3] + return code_dept + +def get_sql_like_dept_string(dept): + return (dept+'___')[0:5] + +def normalize(s): + s = s.upper() # tout en majuscules + # s = s.split(' (')[0] # parenthèses : on coupe avant + s = s.replace('(','').replace(')','') # parenthèses : on supprime cf Fantoir pour les anciennes communes en suffixe + s = s.replace('-',' ') # separateur espace + s = s.replace('\'',' ') # separateur espace + s = s.replace('’',' ') # separateur espace + s = s.replace('/',' ') # separateur espace + s = s.replace(':',' ') # separateur deux points + s = ' '.join(s.split()) # separateur : 1 espace + + for l in iter(constants.LETTRE_A_LETTRE): + for ll in constants.LETTRE_A_LETTRE[l]: + s = s.replace(ll,l) + + +# type de voie + abrev_trouvee = False + p = 5 + while (not abrev_trouvee) and p > -1: + p-= 1 + if get_part_debut(s,p) in constants.ABREV_TYPE_VOIE: + s = replace_type_voie(s,p) + abrev_trouvee = True +# ordinal + s = s.replace(' EME ','EME ') + s = s.replace(' 1ERE',' PREMIERE') + s = s.replace(' 1ER',' PREMIER') + +# chiffres + for c in constants.CHIFFRES: + s = s.replace(c[0],c[1]) + +# titres, etc. + for r in constants.EXPAND_NOMS: + s = s.replace(' '+r[0]+' ',' '+r[1]+' ') + if s[-len(r[0]):] == r[0]: + s = s.replace(' '+r[0],' '+r[1]) + for r in constants.EXPAND_TITRES: + s = s.replace(' '+r[0]+' ',' '+r[1]+' ') + if s[-len(r[0]):] == r[0]: + s = s.replace(' '+r[0],' '+r[1]) + for r in constants.ABREV_TITRES: + s = s.replace(' '+r[0]+' ',' '+r[1]+' ') + if s[-len(r[0]):] == r[0]: + s = s.replace(' '+r[0],' '+r[1]) + +# articles + for c in constants.MOT_A_BLANC: + s = s.replace(' '+c+' ',' ') + +# chiffres romains + sp = s.split() + + if len(sp)>0 and sp[-1] in constants.CHIFFRES_ROMAINS: + sp[-1] = constants.CHIFFRES_ROMAINS[sp[-1]] + s = ' '.join(sp) + +# substitution complete + if s in constants.SUBSTITUTION_COMPLETE: + s = constants.SUBSTITUTION_COMPLETE[s] + return s[0:30] + + +def replace_type_voie(s,nb): + sp = s.split() + spd = ' '.join(sp[0:nb]) + spf = ' '.join(sp[nb:len(sp)]) + s = constants.ABREV_TYPE_VOIE[spd]+' '+spf + return s + + +def is_valid_fantoir(f, insee): + return (len(f) == 10 and f[0:5] == insee); -def get_code_dir_dict(): - with open('./data/code_dir.json') as f: - return json.loads(f.read()) \ No newline at end of file +def display_insee_commune(code_insee, nom_commune): + print(f"{code_insee} - {nom_commune}") \ No newline at end of file diff --git a/bano/models.py b/bano/models.py index 23b2835..66a39f4 100644 --- a/bano/models.py +++ b/bano/models.py @@ -1,19 +1,18 @@ #!/usr/bin/env python # coding: UTF-8 -# import re -from collections import defaultdict - +import io +from collections import defaultdict,OrderedDict from . import db -# from . import helpers as hp +from . import helpers as hp from .sources import fantoir -from .sql import sql_get_data -# from . import core as c +from .sql import sql_get_data,sql_process class Adresse: - def __init__(self, x, y, num, source, voie=None, place=None, fantoir=None, code_postal=None, sous_commune_code=None, sous_commune_nom=None): + def __init__(self, code_insee, x, y, num, source, voie=None, place=None, fantoir=None, code_postal=None, sous_commune_code=None, sous_commune_nom=None): + self.code_insee = code_insee self.x = x self.y = y self.source = source @@ -24,9 +23,14 @@ def __init__(self, x, y, num, source, voie=None, place=None, fantoir=None, code_ self.code_postal = code_postal self.sous_commune_code = sous_commune_code self.sous_commune_nom = sous_commune_nom + self.voie_normalisee = hp.normalize(self.voie) if self.voie else None + self.place_normalisee = hp.format_toponyme(self.place) if self.place else None + + def _as_csv_format_bano(self): + return f"{self.fantoir}${self.x}${self.y}${self.numero}${hp.escape_quotes(self.voie)}${self.code_postal}${self.code_insee}${self.sous_commune_code}${self.source}" def _as_string(self): - return (f"source : {self.source}, numero : {self.numero}, voie : {self.voie}, place : {self.place}, fantoir : {self.fantoir}, code_postal:{self.code_postal}, sous_commune : {self.sous_commune_code} - {self.sous_commune_nom}") + return (f"source : {self.source}, numero : {self.numero}, voie : {self.voie} ({self.voie_normalisee}), place : {self.place}, fantoir : {self.fantoir}, code_postal:{self.code_postal}, sous_commune : {self.sous_commune_code} - {self.sous_commune_nom}") class Adresses: @@ -64,20 +68,37 @@ def add_adresse(self,ad): def charge_numeros_ban(self): data = sql_get_data('charge_ban_commune',dict(code_insee=self.code_insee),db.bano_sources) for numero, voie, lon, lat, code_postal, code_insee_ac, nom_ac in data: - self.add_adresse(Adresse(lon,lat,numero,'BAN',voie=voie,code_postal=code_postal,sous_commune_code=code_insee_ac,sous_commune_nom=nom_ac)) + self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'BAN',voie=voie,code_postal=code_postal,sous_commune_code=code_insee_ac,sous_commune_nom=nom_ac)) def charge_numeros_osm(self): return None def charge_noms_osm(self): - data = sql_get_data('charge_ban_commune',dict(code_insee=self.code_insee),db.bano_sources) + # data = sql_get_data('charge_noms_voies_lieux-dits_OSM',dict(code_insee=self.code_insee),db.bano_sources) + # data = sql_get_data('charge_noms_voies_relation_bbox_OSM',dict(code_insee=self.code_insee),db.bano_sources) + data = sql_get_data('charge_noms_voies_relation_OSM',dict(code_insee=self.code_insee),db.bano_sources) + + for d in data: + print(hp.normalize(d[0]),d) return None + def save(self,source): + sql_process('suppression_adresses_commune_source',dict(code_insee=self.code_insee,source=source),db.bano) + io_in_csv = io.StringIO() + for a in self: + if a.source == source: + io_in_csv.write(a._as_csv_format_bano()+'\n') # separateur $ car on trouve des virgules dans le contenu + io_in_csv.seek(0) + with db.bano.cursor() as cur_insert: + cur_insert.copy_from(io_in_csv, "bano_adresses", sep='$',null='',columns=('fantoir','lon','lat','numero','nom_voie','code_postal','code_insee','code_insee_ancienne_commune','source')) + + + class Topo: def __init__(self, code_insee): self.code_insee = code_insee - self.topo = {} + self.topo = OrderedDict() # self.index_by_nom_normalise = defaultdict(list) @@ -100,5 +121,6 @@ def _print(self,pattern=None): def charge_topo(self): data = sql_get_data('charge_topo_commune',dict(code_insee=self.code_insee),db.bano_sources) for fantoir,nom in data: + nom = hp.normalize(' '.join(nom.replace('-',' ').split())) self.topo[fantoir] = nom self.topo[nom] = fantoir diff --git a/bano/sql/charge_topo_commune.sql b/bano/sql/charge_topo_commune.sql index ee776d5..7a826dc 100644 --- a/bano/sql/charge_topo_commune.sql +++ b/bano/sql/charge_topo_commune.sql @@ -15,4 +15,5 @@ FROM t) SELECT fantoir10, nom FROM tr -WHERE rang = 1; \ No newline at end of file +WHERE rang = 1 +ORDER BY length(nom) DESC,nom; \ No newline at end of file diff --git a/bano/sql/create_table_base_bano.sql b/bano/sql/create_table_base_bano.sql index 7f07c1e..34abf2a 100644 --- a/bano/sql/create_table_base_bano.sql +++ b/bano/sql/create_table_base_bano.sql @@ -1,3 +1,30 @@ +CREATE TABLE IF NOT EXISTS bano_adresses ( + fantoir text, + lon float, + lat float, + numero text, + nom_voie text, + code_postal text, + code_insee text, + code_insee_ancienne_commune text, + nom_ancienne_commune text, + source text, + certification_commune integer, + geometrie geometry (Point, 4326) GENERATED ALWAYS AS (ST_Point(lon,lat)) STORED); + +CREATE INDEX IF NOT EXISTS gidx_bano_adresses ON bano_adresses USING GIST(geometrie); +CREATE INDEX IF NOT EXISTS idx_bano_adresses_code_insee ON bano_adresses (code_insee); + +CREATE TABLE IF NOT EXISTS nom_fantoir ( + fantoir text, + nom text, + code_insee text, + source text); + +CREATE INDEX IF NOT EXISTS idx_nom_fantoir_code_insee ON nom_fantoir (code_insee); +CREATE INDEX IF NOT EXISTS idx_nom_fantoir_code_insee_source ON nom_fantoir (code_insee,source); + + CREATE TABLE IF NOT EXISTS batch ( id_batch serial, etape text, diff --git a/bano/sql/suppression_adresses_commune_source.sql b/bano/sql/suppression_adresses_commune_source.sql new file mode 100644 index 0000000..f842f23 --- /dev/null +++ b/bano/sql/suppression_adresses_commune_source.sql @@ -0,0 +1,3 @@ +DELETE FROM bano_adresses +WHERE code_insee = '__code_insee__' AND + source = '__source__'; \ No newline at end of file From e64f687cbd56a9f6f40c390e3069dc8b6ef4a8d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 25 Sep 2022 14:03:59 +0000 Subject: [PATCH 025/163] Gestion des noms --- bano/sql/suppression_noms_commune_source.sql | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 bano/sql/suppression_noms_commune_source.sql diff --git a/bano/sql/suppression_noms_commune_source.sql b/bano/sql/suppression_noms_commune_source.sql new file mode 100644 index 0000000..76a79e9 --- /dev/null +++ b/bano/sql/suppression_noms_commune_source.sql @@ -0,0 +1,3 @@ +DELETE FROM nom_fantoir +WHERE code_insee = '__code_insee__' AND + source = '__source__'; \ No newline at end of file From 47d8b4c874bb1a0700cfb5e7963d8d0fb76666d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 25 Sep 2022 14:06:58 +0000 Subject: [PATCH 026/163] Gestion des noms --- bano/models.py | 74 ++++++++++++++++--- bano/rapprochement.py | 22 ++++-- bano/sql/charge_noms_voies_lieux-dits_OSM.sql | 55 ++++++++++++++ bano/sql/charge_noms_voies_relation_OSM.sql | 55 ++++++++++++++ .../charge_noms_voies_relation_bbox_OSM.sql | 34 +++++++++ 5 files changed, 224 insertions(+), 16 deletions(-) create mode 100644 bano/sql/charge_noms_voies_lieux-dits_OSM.sql create mode 100644 bano/sql/charge_noms_voies_relation_OSM.sql create mode 100644 bano/sql/charge_noms_voies_relation_bbox_OSM.sql diff --git a/bano/models.py b/bano/models.py index 66a39f4..1ba54aa 100644 --- a/bano/models.py +++ b/bano/models.py @@ -6,7 +6,7 @@ from . import db from . import helpers as hp -from .sources import fantoir +# from .sources import fantoir from .sql import sql_get_data,sql_process @@ -32,13 +32,58 @@ def _as_csv_format_bano(self): def _as_string(self): return (f"source : {self.source}, numero : {self.numero}, voie : {self.voie} ({self.voie_normalisee}), place : {self.place}, fantoir : {self.fantoir}, code_postal:{self.code_postal}, sous_commune : {self.sous_commune_code} - {self.sous_commune_nom}") +class Nom: + def __init__(self,nom,fantoir,source,code_insee): + self.code_insee = code_insee + self.nom = nom + self.source = source + self.fantoir = fantoir + self.nom_normalise = hp.normalize(nom) + + def __eq__(self,other): + return self.nom == other.nom and self.fantoir == other.fantoir and self.source == other.source and self.code_insee == other.code_insee + + def __hash__(self): + return hash((self.nom,self.fantoir,self.source,self.code_insee)) + + def _as_csv_format_bano(self): + return f"{self.fantoir}${hp.escape_quotes(self.nom)}${self.code_insee}${self.source}" + + def add_fantoir(self,topo): + if not self.fantoir: + self.fantoir = topo.topo.get(self.nom_normalise) + +class Noms: + def __init__(self, code_insee,source): + self.code_insee = code_insee + self.source = source + self.triplets_nom_fantoir_source = set() + + def add_nom(self,nom=Nom): + self.triplets_nom_fantoir_source.add(nom) + + def add_fantoir(self,topo): + for t in self.triplets_nom_fantoir_source: + t.add_fantoir(topo) + + def enregistre(self): + sql_process('suppression_noms_commune_source',dict(code_insee=self.code_insee,source=self.source),db.bano) + io_in_csv = io.StringIO() + for t in self.triplets_nom_fantoir_source: + if t.fantoir: + io_in_csv.write(t._as_csv_format_bano()+'\n') + io_in_csv.seek(0) + with db.bano.cursor() as cur_insert: + cur_insert.copy_from(io_in_csv, "nom_fantoir", sep='$',null='',columns=('fantoir','nom','code_insee','source')) class Adresses: - def __init__(self, code_insee): + def __init__(self, code_insee,source): self.code_insee = code_insee + self.source = source self.liste = [] self.index_voie = defaultdict(list) self.noms_de_voies = set() + self.triplets_nom_fantoir_source = set() # def __contains__(self, item): # return item in self.a @@ -65,13 +110,17 @@ def add_adresse(self,ad): self.index_voie[ad.voie].append(len(self.liste)-1) self.noms_de_voies.add(ad.voie) + def add_fantoir(self): + for nom in self.noms: + fantoir = topo.topo.get(a.voie_normalisee) + def charge_numeros_ban(self): data = sql_get_data('charge_ban_commune',dict(code_insee=self.code_insee),db.bano_sources) for numero, voie, lon, lat, code_postal, code_insee_ac, nom_ac in data: self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'BAN',voie=voie,code_postal=code_postal,sous_commune_code=code_insee_ac,sous_commune_nom=nom_ac)) def charge_numeros_osm(self): - return None + data = sql_get_data('charge_numeros_bbox_OSM',dict(code_insee=self.code_insee),db.bano_sources) def charge_noms_osm(self): # data = sql_get_data('charge_noms_voies_lieux-dits_OSM',dict(code_insee=self.code_insee),db.bano_sources) @@ -79,22 +128,27 @@ def charge_noms_osm(self): data = sql_get_data('charge_noms_voies_relation_OSM',dict(code_insee=self.code_insee),db.bano_sources) for d in data: - print(hp.normalize(d[0]),d) + print(d) + print(hp.normalize(d[0])) return None - def save(self,source): - sql_process('suppression_adresses_commune_source',dict(code_insee=self.code_insee,source=source),db.bano) + def noms_des_adresses(self,noms): + for a in self: + if a.voie: + noms.triplets_nom_fantoir_source.add(Nom(a.voie,a.fantoir,a.source,self.code_insee)) + if a.place: + noms.triplets_nom_fantoir_source.add(Nom(a.place,a.fantoir,a.source,self.code_insee)) + + def enregistre(self): + sql_process('suppression_adresses_commune_source',dict(code_insee=self.code_insee,source=self.source),db.bano) io_in_csv = io.StringIO() for a in self: - if a.source == source: - io_in_csv.write(a._as_csv_format_bano()+'\n') # separateur $ car on trouve des virgules dans le contenu + io_in_csv.write(a._as_csv_format_bano()+'\n') # separateur $ car on trouve des virgules dans le contenu io_in_csv.seek(0) with db.bano.cursor() as cur_insert: cur_insert.copy_from(io_in_csv, "bano_adresses", sep='$',null='',columns=('fantoir','lon','lat','numero','nom_voie','code_postal','code_insee','code_insee_ancienne_commune','source')) - - class Topo: def __init__(self, code_insee): self.code_insee = code_insee diff --git a/bano/rapprochement.py b/bano/rapprochement.py index 3f3ce64..83f2612 100644 --- a/bano/rapprochement.py +++ b/bano/rapprochement.py @@ -1,11 +1,21 @@ #!/usr/bin/env python # coding: UTF-8 -from .models import Adresses, Topo +from .models import Noms, Adresses, Topo def process(source,code_insee,dept,**kwargs): - # topo = Topo(code_insee) - # topo._print('CO') - adresses = Adresses(code_insee) - adresses.charge_numeros_ban() - adresses._print('Hell') \ No newline at end of file + topo = Topo(code_insee) + adresses = Adresses(code_insee,source) + noms = Noms(code_insee,source) + + if source == 'OSM': + adresses.charge_noms_osm() + adresses.charge_numeros_osm() + + if source == 'BAN': + adresses.charge_numeros_ban() + + adresses.noms_des_adresses(noms) + noms.add_fantoir(topo) + adresses.enregistre() + noms.enregistre() diff --git a/bano/sql/charge_noms_voies_lieux-dits_OSM.sql b/bano/sql/charge_noms_voies_lieux-dits_OSM.sql new file mode 100644 index 0000000..ef3fe86 --- /dev/null +++ b/bano/sql/charge_noms_voies_lieux-dits_OSM.sql @@ -0,0 +1,55 @@ +SELECT pl.name, + pl."ref:FR:FANTOIR" f, + '' fl, + '' fr, + h.libelle_suffixe, + p."ref:INSEE", + CASE + WHEN pl.place='' THEN 'voie'::text + ELSE 'lieudit' + END AS nature +FROM planet_osm_polygon p +JOIN planet_osm_point pl +ON pl.way && p.way AND + ST_Intersects(pl.way, p.way) +LEFT OUTER JOIN suffixe h +ON ST_Intersects(pl.way, h.geometrie) +WHERE p."ref:INSEE" = '__code_insee__' AND + (pl."ref:FR:FANTOIR" !='' OR pl.place != '') AND + pl.name != '' +UNION +SELECT l.name, + l.tags->'ref:FR:FANTOIR' f, + l.tags->'ref:FR:FANTOIR:left' fl, + l.tags->'ref:FR:FANTOIR:right' fr, + h.libelle_suffixe, + p."ref:INSEE", + 'voie' +FROM planet_osm_polygon p +JOIN planet_osm_line l +ON ST_Intersects(l.way, p.way) +LEFT OUTER JOIN suffixe h +ON ST_Intersects(l.way, h.geometrie) +WHERE p."ref:INSEE" = '__code_insee__' AND + l.highway != '' AND + l.name != '' +UNION +SELECT pl.name, + pl."ref:FR:FANTOIR" f, + pl."ref:FR:FANTOIR:left" fl, + pl."ref:FR:FANTOIR:right" fr, + h.libelle_suffixe, + p."ref:INSEE", + 'voie' +FROM planet_osm_polygon p +JOIN planet_osm_polygon pl +ON pl.way && p.way AND + ST_Intersects(pl.way, p.way) +LEFT OUTER JOIN suffixe h +ON ST_Intersects(pl.way, h.geometrie) +WHERE p."ref:INSEE" = '__code_insee__' AND + (pl.highway||pl."ref:FR:FANTOIR" != '' OR + pl.landuse = 'residential' OR + pl.amenity = 'parking') AND + pl.name != ''; + diff --git a/bano/sql/charge_noms_voies_relation_OSM.sql b/bano/sql/charge_noms_voies_relation_OSM.sql new file mode 100644 index 0000000..4f8bedb --- /dev/null +++ b/bano/sql/charge_noms_voies_relation_OSM.sql @@ -0,0 +1,55 @@ +WITH +a +AS +(SELECT DISTINCT unnest(array[l.name,l.tags->'alt_name',l.tags->'old_name']) as name, + l.way, + r.tags, +-- h.libelle_suffixe, +-- a9.insee_com, +-- a9.nom, + 'voie'::text +FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p +JOIN (SELECT name,tags,osm_id,way FROM planet_osm_line WHERE highway != '' AND name != '') l +ON ST_Intersects(l.way, p.way) +JOIN planet_osm_rels r +ON r.osm_id = l.osm_id), +/*LEFT OUTER JOIN (SELECT * FROM suffixe WHERE insee_com = '__code_insee__') h +ON ST_Intersects(l.way, h.geometrie) +LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 where insee_a8 = '__code_insee__') a9 +ON ST_Contains(a9.geometrie,l.way)*/ +b +as +(SELECT DISTINCT unnest(array[l.name,l.tags->'alt_name',l.tags->'old_name']) as name, + l.way, + r.tags, +-- h.libelle_suffixe, +-- a9.insee_com, +-- a9.nom, + 'voie'::text +FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p +JOIN (SELECT name,tags,osm_id,way FROM planet_osm_polygon WHERE highway != '' AND name != '') l +ON ST_Intersects(l.way, p.way) +JOIN planet_osm_rels r +ON r.osm_id = l.osm_id) +select a.name, + a.tags, + h.libelle_suffixe, + a9.insee_com, + a9.nom +from a +LEFT OUTER JOIN (SELECT * FROM suffixe WHERE insee_com = '__code_insee__') h +ON ST_Intersects(way, h.geometrie) +LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 where insee_a8 = '__code_insee__') a9 +ON ST_Contains(a9.geometrie,way) +union +select b.name, + b.tags, + h.libelle_suffixe, + a9.insee_com, + a9.nom +from b +LEFT OUTER JOIN (SELECT * FROM suffixe WHERE insee_com = '__code_insee__') h +ON ST_Intersects(way, h.geometrie) +LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 where insee_a8 = '__code_insee__') a9 +ON ST_Contains(a9.geometrie,way) +; \ No newline at end of file diff --git a/bano/sql/charge_noms_voies_relation_bbox_OSM.sql b/bano/sql/charge_noms_voies_relation_bbox_OSM.sql new file mode 100644 index 0000000..267a90b --- /dev/null +++ b/bano/sql/charge_noms_voies_relation_bbox_OSM.sql @@ -0,0 +1,34 @@ +SELECT DISTINCT l.name, + r.tags, + h.libelle_suffixe, + '__code_insee__', + a9.insee_com, + a9.nom, + 'voie'::text +FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p +JOIN (SELECT name,osm_id,way FROM planet_osm_line WHERE highway != '' AND name != '') l +ON l.way && p.way +JOIN planet_osm_rels r +ON r.osm_id = l.osm_id +LEFT OUTER JOIN suffixe h +ON ST_Intersects(l.way, h.geometrie) +LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 where insee_a8 = '__code_insee__') a9 +ON ST_Contains(a9.geometrie,l.way) +UNION ALL +SELECT DISTINCT l.name, + r.tags, + h.libelle_suffixe, + '__code_insee__', + a9.insee_com, + a9.nom, + 'voie'::text +FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p +JOIN (SELECT name,osm_id,way FROM planet_osm_polygon WHERE highway != '' AND name != '') l +ON l.way && p.way +JOIN planet_osm_rels r +ON r.osm_id = l.osm_id +LEFT OUTER JOIN suffixe h +ON ST_Intersects(l.way, h.geometrie) +LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 where insee_a8 = '__code_insee__') a9 +ON ST_Contains(a9.geometrie,l.way); + From 5271aeb0356751fc0a80c749b021ebd5e2c5c934 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 25 Sep 2022 14:07:46 +0000 Subject: [PATCH 027/163] Table suffixe --- bano/sql/create_table_base_bano_sources.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bano/sql/create_table_base_bano_sources.sql b/bano/sql/create_table_base_bano_sources.sql index a83e4eb..c0d0c27 100644 --- a/bano/sql/create_table_base_bano_sources.sql +++ b/bano/sql/create_table_base_bano_sources.sql @@ -43,9 +43,10 @@ CREATE INDEX IF NOT EXISTS idx_ban_code_insee ON ban(code_insee); CREATE TABLE IF NOT EXISTS suffixe ( geometrie geometry, - insee_com character(5), + code_insee character(5), libelle_suffixe character varying(100) ); CREATE INDEX IF NOT EXISTS gidx_suffixe ON suffixe USING GIST(geometrie); +CREATE INDEX IF NOT EXISTS idx_suffixe ON suffixe USING GIST(code_insee); GRANT SELECT ON ALL TABLES IN SCHEMA public TO public; \ No newline at end of file From d13379a3c72bddbacf4d438bccbb6146dfdd9ce5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 25 Sep 2022 14:09:23 +0000 Subject: [PATCH 028/163] Numeros OSM --- bano/sql/charge_numeros_OSM.sql | 73 ++++++++++++++++++++++++++++ bano/sql/charge_numeros_bbox_OSM.sql | 44 +++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 bano/sql/charge_numeros_OSM.sql create mode 100644 bano/sql/charge_numeros_bbox_OSM.sql diff --git a/bano/sql/charge_numeros_OSM.sql b/bano/sql/charge_numeros_OSM.sql new file mode 100644 index 0000000..3a25bba --- /dev/null +++ b/bano/sql/charge_numeros_OSM.sql @@ -0,0 +1,73 @@ +select ST_X(pt_way), + ST_Y(pt_way), + provenance, + hsnr, + street_name, + tags, + h.libelle_suffixe, + insee_com +FROM +-- point avec addr:street + (SELECT 1 provenance, + pt.way, + pt."addr:housenumber" hsnr, + pt."addr:street" street_name, + pt."addr:place" place_name, + null::hstore tags, + p."ref:INSEE" insee_com + FROM planet_osm_polygon p + JOIN planet_osm_point pt + ON ST_Intersects(pt.way, p.way) + WHERE p."ref:INSEE" = '__com__' AND + pt."addr:housenumber" != '' AND + pt."addr:street" != '' + UNION ALL +-- way avec addr:street + SELECT 2, + ST_Centroid(w.way), + w."addr:housenumber", + w."addr:street", + pt."addr:place", + null::hstore tags, + p."ref:INSEE" + FROM planet_osm_polygon p + JOIN planet_osm_polygon w + ON ST_Intersects(w.way, p.way) + WHERE p."ref:INSEE" = '__com__' AND + w."addr:housenumber" != '' AND + w."addr:street" != '' + UNION ALL +-- point dans relation associatedStreet + SELECT 3, + pt.way, + pt."addr:housenumber", + null, + null, + r.tags, + p."ref:INSEE" + FROM planet_osm_polygon p + JOIN planet_osm_point pt + ON ST_Intersects(pt.way, p.way) + JOIN planet_osm_rels r + ON r.osm_id = pt.osm_id + WHERE p."ref:INSEE" = '__com__' AND + pt."addr:housenumber" != '' + UNION ALL +-- way dans relation associatedStreet + SELECT 4, + ST_Centroid(w.way), + w."addr:housenumber", + null, + null, + r.tags, + p."ref:INSEE" + FROM planet_osm_polygon p + JOIN planet_osm_polygon w + ON ST_Intersects(w.way, p.way) + JOIN planet_osm_rels r + ON r.osm_id = w.osm_id + WHERE p."ref:INSEE" = '__com__' AND + w."addr:housenumber" != '' +)a +LEFT OUTER JOIN suffixe h +ON ST_Intersects(a.way, h.geometrie); diff --git a/bano/sql/charge_numeros_bbox_OSM.sql b/bano/sql/charge_numeros_bbox_OSM.sql new file mode 100644 index 0000000..6dd34cf --- /dev/null +++ b/bano/sql/charge_numeros_bbox_OSM.sql @@ -0,0 +1,44 @@ +SELECT ST_X(pt_geo), + ST_Y(pt_geo), + provenance, + osm_id, + hsnr, + street_name, + tags, + ''::text suffixe, + insee_com +FROM +-- point dans relation associatedStreet + (SELECT 3 provenance, + pt.way, + pt."addr:housenumber" hsnr, + null::text street_name, + r.tags tags, + p."ref:INSEE" insee_com + FROM (SELECT ST_Extent(way) way, "ref:INSEE" FROM planet_osm_polygon WHERE "ref:INSEE" = '__com__' GROUP BY 2) p + JOIN planet_osm_point pt + ON ST_Intersects(pt.way,p.way) + JOIN planet_osm_rels r + ON r.osm_id = pt.osm_id + WHERE p."ref:INSEE" = '__com__' AND + pt."addr:housenumber" != '' + UNION +-- way dans relation associatedStreet + SELECT 4, + ST_Centroid(w.way), + w.osm_id, + w."addr:housenumber", + null, + r.tags, + p."ref:INSEE" +-- FROM planet_osm_polygon p + FROM (SELECT ST_Extent(way) way, "ref:INSEE" FROM planet_osm_polygon WHERE "ref:INSEE" = '__com__' GROUP BY 2) p + JOIN planet_osm_polygon w + ON ST_Intersects(w.way, p.way) + JOIN planet_osm_rels r + ON r.osm_id = w.osm_id + WHERE p."ref:INSEE" = '__com__' AND + w."addr:housenumber" != '' +)a; + + \ No newline at end of file From ac203e3bb607bd9ca4c18caaad166f521eac810d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 2 Oct 2022 19:37:20 +0000 Subject: [PATCH 029/163] Structure et process imposm --- bano.yml | 3 +++ sql/finalisation.sql | 8 ++++++++ 2 files changed, 11 insertions(+) create mode 100644 sql/finalisation.sql diff --git a/bano.yml b/bano.yml index 19c5d32..885dd95 100644 --- a/bano.yml +++ b/bano.yml @@ -19,6 +19,7 @@ tables: - {key: 'addr:housenumber', name: 'addr:housenumber', type: string} - {key: 'addr:street', name: 'addr:street', type: string} - {key: 'addr:place', name: 'addr:place', type: string} + - {key: 'addr:postcode', name: 'addr:postcode', type: string} - {key: 'ref:FR:FANTOIR', name: 'ref:FR:FANTOIR', type: string} - {key: 'highway', name: 'highway', type: string} - {key: 'amenity', name: 'amenity', type: string} @@ -67,6 +68,8 @@ tables: - {key: admin_level, name: admin_level, type: integer} - {key: 'addr:housenumber', name: 'addr:housenumber', type: string} - {key: 'addr:street', name: 'addr:street', type: string} + - {key: 'addr:place', name: 'addr:place', type: string} + - {key: 'addr:postcode', name: 'addr:postcode', type: string} - {key: 'ref:INSEE', name: 'ref:INSEE', type: string} - {key: 'ref:FR:FANTOIR', name: 'ref:FR:FANTOIR', type: string} - {key: 'ref:FR:FANTOIR:left', name: 'ref:FR:FANTOIR:left', type: string} diff --git a/sql/finalisation.sql b/sql/finalisation.sql new file mode 100644 index 0000000..3d6d705 --- /dev/null +++ b/sql/finalisation.sql @@ -0,0 +1,8 @@ +CREATE INDEX idx_planet_osm_point_fantoir ON planet_osm_point("ref:FR:FANTOIR"); +CREATE INDEX idx_planet_osm_point_housenumber ON planet_osm_point("addr:housenumber"); +CREATE INDEX idx_planet_osm_line_fantoir ON planet_osm_line("ref:FR:FANTOIR"); +CREATE INDEX idx_planet_osm_polygon_fantoir ON planet_osm_polygon("ref:FR:FANTOIR"); +CREATE INDEX idx_planet_osm_polygon_housenumber ON planet_osm_polygon("addr:housenumber"); +CREATE INDEX idx_planet_osm_polygon_ref_insee ON planet_osm_polygon("ref:INSEE"); +CREATE INDEX idx_planet_osm_rels_id ON planet_osm_rels(osm_id); +CREATE INDEX idx_planet_osm_communes_statut_ref_insee ON planet_osm_communes_statut("ref:INSEE"); From b35ea3df1d17590633df05284c685bde302c6a49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 2 Oct 2022 21:35:07 +0000 Subject: [PATCH 030/163] PG hstore => Python dict --- bano/db.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bano/db.py b/bano/db.py index 16ab382..57fb5c6 100644 --- a/bano/db.py +++ b/bano/db.py @@ -7,4 +7,4 @@ bano.autocommit = True bano_sources = psycopg2.connect(os.environ.get("BANO_PG_CACHE", "dbname='bano_sources' user='cadastre'")) bano_sources.autocommit = True -# psycopg2.extras.register_hstore(bano_cache) +psycopg2.extras.register_hstore(bano_sources) From 5a0b56cb5b6e6fac181e1fcce2a28ea5912c63c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 2 Oct 2022 21:37:39 +0000 Subject: [PATCH 031/163] Chargement des numeros --- bano/models.py | 79 +++++++++++++++++++--------- bano/sql/charge_numeros_OSM.sql | 75 +++++++++++++------------- bano/sql/charge_numeros_bbox_OSM.sql | 38 ++++++------- 3 files changed, 111 insertions(+), 81 deletions(-) diff --git a/bano/models.py b/bano/models.py index 1ba54aa..15eea99 100644 --- a/bano/models.py +++ b/bano/models.py @@ -2,6 +2,7 @@ # coding: UTF-8 import io +import json from collections import defaultdict,OrderedDict from . import db @@ -9,29 +10,6 @@ # from .sources import fantoir from .sql import sql_get_data,sql_process - -class Adresse: - def __init__(self, code_insee, x, y, num, source, voie=None, place=None, fantoir=None, code_postal=None, sous_commune_code=None, sous_commune_nom=None): - self.code_insee = code_insee - self.x = x - self.y = y - self.source = source - self.numero = num - self.voie = voie - self.place = place - self.fantoir = fantoir - self.code_postal = code_postal - self.sous_commune_code = sous_commune_code - self.sous_commune_nom = sous_commune_nom - self.voie_normalisee = hp.normalize(self.voie) if self.voie else None - self.place_normalisee = hp.format_toponyme(self.place) if self.place else None - - def _as_csv_format_bano(self): - return f"{self.fantoir}${self.x}${self.y}${self.numero}${hp.escape_quotes(self.voie)}${self.code_postal}${self.code_insee}${self.sous_commune_code}${self.source}" - - def _as_string(self): - return (f"source : {self.source}, numero : {self.numero}, voie : {self.voie} ({self.voie_normalisee}), place : {self.place}, fantoir : {self.fantoir}, code_postal:{self.code_postal}, sous_commune : {self.sous_commune_code} - {self.sous_commune_nom}") - class Nom: def __init__(self,nom,fantoir,source,code_insee): self.code_insee = code_insee @@ -76,11 +54,39 @@ def enregistre(self): with db.bano.cursor() as cur_insert: cur_insert.copy_from(io_in_csv, "nom_fantoir", sep='$',null='',columns=('fantoir','nom','code_insee','source')) +class Adresse: + def __init__(self, code_insee, x, y, num, source, voie=None, place=None, fantoir=None, code_postal=None, sous_commune_code=None, sous_commune_nom=None): + self.code_insee = code_insee + self.x = x + self.y = y + self.source = source + self.numero = num + self.voie = voie + self.place = place + self.fantoir = fantoir + self.code_postal = code_postal + self.sous_commune_code = sous_commune_code + self.sous_commune_nom = sous_commune_nom + self.voie_normalisee = hp.normalize(self.voie) if self.voie else None + self.place_normalisee = hp.format_toponyme(self.place) if self.place else None + + def __hash__(self): + return hash((self.code_insee,self.source,self.numero,self.voie,self.place,self.sous_commune_code)) + + def __eq__(self,other): + return (self.code_insee == other.code_insee and self.source == other.source and self.numero == other.numero and self.voie == other.voie and self.place == other.place and self.sous_commune_code == other.sous_commune_code) + + def _as_csv_format_bano(self): + return f"{self.fantoir}${self.x}${self.y}${self.numero}${hp.escape_quotes(self.voie)}${self.code_postal}${self.code_insee}${self.sous_commune_code if self.sous_commune_code else ''}${self.source}" + + def _as_string(self): + return (f"source : {self.source}, numero : {self.numero}, voie : {self.voie} ({self.voie_normalisee}), place : {self.place}, fantoir : {self.fantoir}, code_postal:{self.code_postal}, sous_commune : {self.sous_commune_code} - {self.sous_commune_nom}") + class Adresses: def __init__(self, code_insee,source): self.code_insee = code_insee self.source = source - self.liste = [] + self.liste = set() self.index_voie = defaultdict(list) self.noms_de_voies = set() self.triplets_nom_fantoir_source = set() @@ -106,7 +112,7 @@ def _print(self,pattern=None): def add_adresse(self,ad): """ une adresses est considérée dans la commune si sans Fantoir ou avec un Fantoir de la commune""" # if (ad.fantoir == None or hp.is_valid_fantoir(ad.fantoir, self.code_insee)) and hp.is_valid_housenumber(ad.numero): - self.liste.append(ad) + self.liste.add(ad) self.index_voie[ad.voie].append(len(self.liste)-1) self.noms_de_voies.add(ad.voie) @@ -120,7 +126,28 @@ def charge_numeros_ban(self): self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'BAN',voie=voie,code_postal=code_postal,sous_commune_code=code_insee_ac,sous_commune_nom=nom_ac)) def charge_numeros_osm(self): - data = sql_get_data('charge_numeros_bbox_OSM',dict(code_insee=self.code_insee),db.bano_sources) + data = sql_get_data('charge_numeros_OSM',dict(code_insee=self.code_insee),db.bano_sources)+sql_get_data('charge_numeros_bbox_OSM',dict(code_insee=self.code_insee),db.bano_sources) + # print(data) + for lon, lat, provenance, numero, voie, place, tags, suffixe, code_postal, code_insee_ac, nom_ac in data: + if provenance in (1,2,): + self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'OSM',voie=voie,place=place,code_postal=code_postal,sous_commune_code=code_insee_ac,sous_commune_nom=nom_ac)) + if provenance in (3,4,) and tags.get('name'): + self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'OSM',voie=tags['name'],place=None,code_postal=code_postal,sous_commune_code=code_insee_ac,sous_commune_nom=nom_ac)) + if provenance in (5,6,) and tags.get('name') and tags.get('ref:FR:FANTOIR'): + if tags['ref:FR:FANTOIR'][0:5] == self.code_insee: + self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'OSM',voie=tags['name'],place=None,code_postal=code_postal,sous_commune_code=code_insee_ac,sous_commune_nom=nom_ac)) + + + # if not voie: + # print(lon, lat, provenance, numero, voie, place, tags, suffixe, code_postal, code_insee_ac, nom_ac) + # data = sql_get_data('charge_numeros_bbox_OSM',dict(code_insee=self.code_insee),db.bano_sources) + # for d in data: + # print(d) + # for lon, lat, provenance, numero, voie, place, tags, suffixe, code_postal, code_insee_ac, nom_ac in data: + # self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'OSM',voie=voie,code_postal=code_postal,sous_commune_code=code_insee_ac,sous_commune_nom=nom_ac)) + # if not voie: + # print(lon, lat, provenance, numero, voie, place, tags, suffixe, code_postal, code_insee_ac, nom_ac) + def charge_noms_osm(self): # data = sql_get_data('charge_noms_voies_lieux-dits_OSM',dict(code_insee=self.code_insee),db.bano_sources) diff --git a/bano/sql/charge_numeros_OSM.sql b/bano/sql/charge_numeros_OSM.sql index 3a25bba..e9341ee 100644 --- a/bano/sql/charge_numeros_OSM.sql +++ b/bano/sql/charge_numeros_OSM.sql @@ -1,41 +1,42 @@ -select ST_X(pt_way), - ST_Y(pt_way), +SELECT ST_X(way), + ST_Y(way), provenance, hsnr, street_name, + place_name, tags, h.libelle_suffixe, - insee_com + code_postal, + code_insee_a9, + nom_a9 FROM --- point avec addr:street +-- point avec addr:street ou addr:place (SELECT 1 provenance, pt.way, - pt."addr:housenumber" hsnr, - pt."addr:street" street_name, - pt."addr:place" place_name, - null::hstore tags, - p."ref:INSEE" insee_com - FROM planet_osm_polygon p - JOIN planet_osm_point pt - ON ST_Intersects(pt.way, p.way) - WHERE p."ref:INSEE" = '__com__' AND - pt."addr:housenumber" != '' AND - pt."addr:street" != '' + pt."addr:housenumber" AS hsnr, + pt."addr:street" AS street_name, + pt."addr:place" AS place_name, + null::hstore AS tags, + pt."addr:postcode" AS code_postal + FROM (SELECT geometrie FROM polygones_insee WHERE admin_level = 8 AND code_insee = '__code_insee__') p + JOIN planet_osm_point pt + ON ST_Intersects(pt.way, p.geometrie) + WHERE pt."addr:housenumber" != '' AND + pt."addr:street"||pt."addr:place" != '' UNION ALL --- way avec addr:street +-- way avec addr:street ou addr:place SELECT 2, ST_Centroid(w.way), w."addr:housenumber", w."addr:street", - pt."addr:place", + w."addr:place", null::hstore tags, - p."ref:INSEE" - FROM planet_osm_polygon p - JOIN planet_osm_polygon w - ON ST_Intersects(w.way, p.way) - WHERE p."ref:INSEE" = '__com__' AND - w."addr:housenumber" != '' AND - w."addr:street" != '' + w."addr:postcode" + FROM (SELECT geometrie FROM polygones_insee WHERE admin_level = 8 AND code_insee = '__code_insee__') p + JOIN planet_osm_polygon w + ON ST_Intersects(w.way, p.geometrie) + WHERE w."addr:housenumber" != '' AND + w."addr:street"||w."addr:place" != '' UNION ALL -- point dans relation associatedStreet SELECT 3, @@ -44,14 +45,13 @@ FROM null, null, r.tags, - p."ref:INSEE" - FROM planet_osm_polygon p - JOIN planet_osm_point pt - ON ST_Intersects(pt.way, p.way) + pt."addr:postcode" + FROM (SELECT geometrie FROM polygones_insee WHERE admin_level = 8 AND code_insee = '__code_insee__') p + JOIN planet_osm_point pt + ON ST_Intersects(pt.way, p.geometrie) JOIN planet_osm_rels r ON r.osm_id = pt.osm_id - WHERE p."ref:INSEE" = '__com__' AND - pt."addr:housenumber" != '' + WHERE pt."addr:housenumber" != '' UNION ALL -- way dans relation associatedStreet SELECT 4, @@ -60,14 +60,15 @@ FROM null, null, r.tags, - p."ref:INSEE" - FROM planet_osm_polygon p + w."addr:postcode" + FROM (SELECT geometrie FROM polygones_insee WHERE admin_level = 8 AND code_insee = '__code_insee__') p JOIN planet_osm_polygon w - ON ST_Intersects(w.way, p.way) + ON ST_Intersects(w.way, p.geometrie) JOIN planet_osm_rels r ON r.osm_id = w.osm_id - WHERE p."ref:INSEE" = '__com__' AND - w."addr:housenumber" != '' + WHERE w."addr:housenumber" != '' )a -LEFT OUTER JOIN suffixe h -ON ST_Intersects(a.way, h.geometrie); +LEFT OUTER JOIN (SELECT * FROM suffixe WHERE code_insee = '__code_insee__') h +ON ST_Intersects(a.way, h.geometrie) +LEFT OUTER JOIN (SELECT geometrie, code_insee code_insee_a9,nom nom_a9 FROM polygones_insee_a9 WHERE insee_a8 = '__code_insee__') a9 +ON ST_Intersects(a.way, a9.geometrie); diff --git a/bano/sql/charge_numeros_bbox_OSM.sql b/bano/sql/charge_numeros_bbox_OSM.sql index 6dd34cf..dad3f96 100644 --- a/bano/sql/charge_numeros_bbox_OSM.sql +++ b/bano/sql/charge_numeros_bbox_OSM.sql @@ -1,44 +1,46 @@ -SELECT ST_X(pt_geo), - ST_Y(pt_geo), +select ST_X(way), + ST_Y(way), provenance, - osm_id, hsnr, street_name, + null::text place_name, tags, - ''::text suffixe, - insee_com + h.libelle_suffixe, + code_postal, + code_insee_a9, + nom_a9 FROM -- point dans relation associatedStreet - (SELECT 3 provenance, + (SELECT 5 provenance, pt.way, pt."addr:housenumber" hsnr, null::text street_name, r.tags tags, - p."ref:INSEE" insee_com - FROM (SELECT ST_Extent(way) way, "ref:INSEE" FROM planet_osm_polygon WHERE "ref:INSEE" = '__com__' GROUP BY 2) p + pt."addr:postcode" AS code_postal + FROM (SELECT ST_SetSRID(ST_Extent(geometrie),4326) way, code_insee FROM polygones_insee WHERE code_insee = '__code_insee__' GROUP BY 2) p JOIN planet_osm_point pt ON ST_Intersects(pt.way,p.way) JOIN planet_osm_rels r ON r.osm_id = pt.osm_id - WHERE p."ref:INSEE" = '__com__' AND - pt."addr:housenumber" != '' + WHERE pt."addr:housenumber" != '' UNION -- way dans relation associatedStreet - SELECT 4, + SELECT 6, ST_Centroid(w.way), - w.osm_id, w."addr:housenumber", null, r.tags, - p."ref:INSEE" --- FROM planet_osm_polygon p - FROM (SELECT ST_Extent(way) way, "ref:INSEE" FROM planet_osm_polygon WHERE "ref:INSEE" = '__com__' GROUP BY 2) p + w."addr:postcode" + FROM (SELECT ST_SetSRID(ST_Extent(geometrie),4326) way, code_insee FROM polygones_insee WHERE code_insee = '__code_insee__' GROUP BY 2) p JOIN planet_osm_polygon w ON ST_Intersects(w.way, p.way) JOIN planet_osm_rels r ON r.osm_id = w.osm_id - WHERE p."ref:INSEE" = '__com__' AND - w."addr:housenumber" != '' -)a; + WHERE w."addr:housenumber" != '')a +LEFT OUTER JOIN (SELECT * FROM suffixe WHERE code_insee = '__code_insee__') h +ON ST_Intersects(a.way, h.geometrie) +LEFT OUTER JOIN (SELECT geometrie, code_insee code_insee_a9,nom nom_a9 FROM polygones_insee_a9 WHERE insee_a8 = '__code_insee__') a9 +ON ST_Intersects(a.way, a9.geometrie); + \ No newline at end of file From 06c5d2f79c22b836383840bc2d09c91fed8cb683 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 2 Oct 2022 21:38:40 +0000 Subject: [PATCH 032/163] =?UTF-8?q?homog=C3=A9n=C3=A9isation=20:=20code=5F?= =?UTF-8?q?insee?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bano/sql/create_table_polygones_communes.sql | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/bano/sql/create_table_polygones_communes.sql b/bano/sql/create_table_polygones_communes.sql index 2c80826..a56db84 100644 --- a/bano/sql/create_table_polygones_communes.sql +++ b/bano/sql/create_table_polygones_communes.sql @@ -1,10 +1,10 @@ CREATE TABLE IF NOT EXISTS polygones_insee ( geometrie geometry (Geometry, 4326), - insee_com character(5), + code_insee character(5), nom text, admin_level integer); CREATE INDEX IF NOT EXISTS gidx_polygones_insee ON polygones_insee USING GIST (geometrie); -CREATE INDEX IF NOT EXISTS idx_polygones_insee_insee_com ON polygones_insee(insee_com); +CREATE INDEX IF NOT EXISTS idx_polygones_insee_code_insee ON polygones_insee(code_insee); TRUNCATE TABLE polygones_insee; INSERT INTO polygones_insee @@ -19,19 +19,19 @@ WHERE boundary='administrative' AND CREATE TABLE IF NOT EXISTS polygones_insee_a9 ( geometrie geometry (Geometry, 4326), - insee_com character(5), + code_insee character(5), nom text, insee_a8 character(5)); CREATE INDEX IF NOT EXISTS gidx_polygones_insee_a9 ON polygones_insee_a9 USING GIST (geometrie); -CREATE INDEX IF NOT EXISTS idx_polygones_insee_a9_insee_com ON polygones_insee_a9(insee_com); +CREATE INDEX IF NOT EXISTS idx_polygones_insee_a9_code_insee ON polygones_insee_a9(code_insee); CREATE INDEX IF NOT EXISTS idx_polygones_insee_a9_insee_a8 ON polygones_insee_a9(insee_a8); TRUNCATE TABLE polygones_insee_a9; INSERT INTO polygones_insee_a9 SELECT a9.geometrie, - a9.insee_com, + a9.code_insee, a9.nom, - a8.insee_com + a8.code_insee FROM (SELECT * FROM polygones_insee WHERE admin_level = 9) a9 JOIN (SELECT * FROM polygones_insee WHERE admin_level = 8) a8 ON ST_Contains(a8.geometrie,a9.geometrie); From c682042f4b9a3e7bf6b98a1336dfe34ab9479c45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 3 Oct 2022 21:21:18 +0000 Subject: [PATCH 033/163] nature du nom --- bano/models.py | 31 ++++++++++------------------- bano/sql/create_table_base_bano.sql | 2 ++ 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/bano/models.py b/bano/models.py index 15eea99..feaa189 100644 --- a/bano/models.py +++ b/bano/models.py @@ -11,21 +11,22 @@ from .sql import sql_get_data,sql_process class Nom: - def __init__(self,nom,fantoir,source,code_insee): + def __init__(self,nom,fantoir,nature,source,code_insee): self.code_insee = code_insee self.nom = nom - self.source = source self.fantoir = fantoir + self.nature = nature + self.source = source self.nom_normalise = hp.normalize(nom) def __eq__(self,other): - return self.nom == other.nom and self.fantoir == other.fantoir and self.source == other.source and self.code_insee == other.code_insee + return self.nom == other.nom and self.fantoir == other.fantoir and self.nature == other.nature and self.source == other.source and self.code_insee == other.code_insee def __hash__(self): - return hash((self.nom,self.fantoir,self.source,self.code_insee)) + return hash((self.nom,self.fantoir,self.source,self.nature,self.code_insee)) def _as_csv_format_bano(self): - return f"{self.fantoir}${hp.escape_quotes(self.nom)}${self.code_insee}${self.source}" + return f"{self.fantoir}${hp.escape_quotes(self.nom)}${self.nature}${self.code_insee}${self.source}" def add_fantoir(self,topo): if not self.fantoir: @@ -52,7 +53,7 @@ def enregistre(self): io_in_csv.write(t._as_csv_format_bano()+'\n') io_in_csv.seek(0) with db.bano.cursor() as cur_insert: - cur_insert.copy_from(io_in_csv, "nom_fantoir", sep='$',null='',columns=('fantoir','nom','code_insee','source')) + cur_insert.copy_from(io_in_csv, "nom_fantoir", sep='$',null='',columns=('fantoir','nom','nature','code_insee','source')) class Adresse: def __init__(self, code_insee, x, y, num, source, voie=None, place=None, fantoir=None, code_postal=None, sous_commune_code=None, sous_commune_nom=None): @@ -77,7 +78,7 @@ def __eq__(self,other): return (self.code_insee == other.code_insee and self.source == other.source and self.numero == other.numero and self.voie == other.voie and self.place == other.place and self.sous_commune_code == other.sous_commune_code) def _as_csv_format_bano(self): - return f"{self.fantoir}${self.x}${self.y}${self.numero}${hp.escape_quotes(self.voie)}${self.code_postal}${self.code_insee}${self.sous_commune_code if self.sous_commune_code else ''}${self.source}" + return f"{self.fantoir}${self.x}${self.y}${self.numero}${hp.escape_quotes(self.voie) if self.voie else ''}${hp.escape_quotes(self.place) if self.place else ''}${self.code_postal}${self.code_insee}${self.sous_commune_code if self.sous_commune_code else ''}${self.source}" def _as_string(self): return (f"source : {self.source}, numero : {self.numero}, voie : {self.voie} ({self.voie_normalisee}), place : {self.place}, fantoir : {self.fantoir}, code_postal:{self.code_postal}, sous_commune : {self.sous_commune_code} - {self.sous_commune_nom}") @@ -138,16 +139,6 @@ def charge_numeros_osm(self): self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'OSM',voie=tags['name'],place=None,code_postal=code_postal,sous_commune_code=code_insee_ac,sous_commune_nom=nom_ac)) - # if not voie: - # print(lon, lat, provenance, numero, voie, place, tags, suffixe, code_postal, code_insee_ac, nom_ac) - # data = sql_get_data('charge_numeros_bbox_OSM',dict(code_insee=self.code_insee),db.bano_sources) - # for d in data: - # print(d) - # for lon, lat, provenance, numero, voie, place, tags, suffixe, code_postal, code_insee_ac, nom_ac in data: - # self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'OSM',voie=voie,code_postal=code_postal,sous_commune_code=code_insee_ac,sous_commune_nom=nom_ac)) - # if not voie: - # print(lon, lat, provenance, numero, voie, place, tags, suffixe, code_postal, code_insee_ac, nom_ac) - def charge_noms_osm(self): # data = sql_get_data('charge_noms_voies_lieux-dits_OSM',dict(code_insee=self.code_insee),db.bano_sources) @@ -162,9 +153,9 @@ def charge_noms_osm(self): def noms_des_adresses(self,noms): for a in self: if a.voie: - noms.triplets_nom_fantoir_source.add(Nom(a.voie,a.fantoir,a.source,self.code_insee)) + noms.triplets_nom_fantoir_source.add(Nom(a.voie,a.fantoir,'voie',a.source,self.code_insee)) if a.place: - noms.triplets_nom_fantoir_source.add(Nom(a.place,a.fantoir,a.source,self.code_insee)) + noms.triplets_nom_fantoir_source.add(Nom(a.place,a.fantoir,'place',a.source,self.code_insee)) def enregistre(self): sql_process('suppression_adresses_commune_source',dict(code_insee=self.code_insee,source=self.source),db.bano) @@ -173,7 +164,7 @@ def enregistre(self): io_in_csv.write(a._as_csv_format_bano()+'\n') # separateur $ car on trouve des virgules dans le contenu io_in_csv.seek(0) with db.bano.cursor() as cur_insert: - cur_insert.copy_from(io_in_csv, "bano_adresses", sep='$',null='',columns=('fantoir','lon','lat','numero','nom_voie','code_postal','code_insee','code_insee_ancienne_commune','source')) + cur_insert.copy_from(io_in_csv, "bano_adresses", sep='$',null='',columns=('fantoir','lon','lat','numero','nom_voie','nom_place','code_postal','code_insee','code_insee_ancienne_commune','source')) class Topo: diff --git a/bano/sql/create_table_base_bano.sql b/bano/sql/create_table_base_bano.sql index 34abf2a..aa877c9 100644 --- a/bano/sql/create_table_base_bano.sql +++ b/bano/sql/create_table_base_bano.sql @@ -4,6 +4,7 @@ CREATE TABLE IF NOT EXISTS bano_adresses ( lat float, numero text, nom_voie text, + nom_place text, code_postal text, code_insee text, code_insee_ancienne_commune text, @@ -19,6 +20,7 @@ CREATE TABLE IF NOT EXISTS nom_fantoir ( fantoir text, nom text, code_insee text, + nature text, source text); CREATE INDEX IF NOT EXISTS idx_nom_fantoir_code_insee ON nom_fantoir (code_insee); From 512be04f89a4e0e7f58fd929ffe0124677da2eb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 3 Oct 2022 21:22:17 +0000 Subject: [PATCH 034/163] insee_com -> code_insee --- bano/sql/charge_noms_voies_relation_OSM.sql | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/bano/sql/charge_noms_voies_relation_OSM.sql b/bano/sql/charge_noms_voies_relation_OSM.sql index 4f8bedb..e3ee7d7 100644 --- a/bano/sql/charge_noms_voies_relation_OSM.sql +++ b/bano/sql/charge_noms_voies_relation_OSM.sql @@ -5,7 +5,7 @@ AS l.way, r.tags, -- h.libelle_suffixe, --- a9.insee_com, +-- a9.code_insee, -- a9.nom, 'voie'::text FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p @@ -13,7 +13,7 @@ JOIN (SELECT name,tags,osm_id,way FROM planet_osm_line WHERE highway != '' AND n ON ST_Intersects(l.way, p.way) JOIN planet_osm_rels r ON r.osm_id = l.osm_id), -/*LEFT OUTER JOIN (SELECT * FROM suffixe WHERE insee_com = '__code_insee__') h +/*LEFT OUTER JOIN (SELECT * FROM suffixe WHERE code_insee = '__code_insee__') h ON ST_Intersects(l.way, h.geometrie) LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 where insee_a8 = '__code_insee__') a9 ON ST_Contains(a9.geometrie,l.way)*/ @@ -23,7 +23,7 @@ as l.way, r.tags, -- h.libelle_suffixe, --- a9.insee_com, +-- a9.code_insee, -- a9.nom, 'voie'::text FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p @@ -34,22 +34,24 @@ ON r.osm_id = l.osm_id) select a.name, a.tags, h.libelle_suffixe, - a9.insee_com, + a9.code_insee, a9.nom from a -LEFT OUTER JOIN (SELECT * FROM suffixe WHERE insee_com = '__code_insee__') h +LEFT OUTER JOIN (SELECT * FROM suffixe WHERE code_insee = '__code_insee__') h ON ST_Intersects(way, h.geometrie) LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 where insee_a8 = '__code_insee__') a9 ON ST_Contains(a9.geometrie,way) +WHERE a.name IS NOT NULL union select b.name, b.tags, h.libelle_suffixe, - a9.insee_com, + a9.code_insee, a9.nom from b -LEFT OUTER JOIN (SELECT * FROM suffixe WHERE insee_com = '__code_insee__') h +LEFT OUTER JOIN (SELECT * FROM suffixe WHERE code_insee = '__code_insee__') h ON ST_Intersects(way, h.geometrie) LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 where insee_a8 = '__code_insee__') a9 ON ST_Contains(a9.geometrie,way) +WHERE b.name IS NOT NULL ; \ No newline at end of file From c5208c753bb6f02264086f17eb0366c846aba5ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Fri, 14 Oct 2022 22:32:23 +0000 Subject: [PATCH 035/163] =?UTF-8?q?WIP=20rapprochements,=20requetes,=20mod?= =?UTF-8?q?ele=20de=20donn=C3=A9es?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bano/helpers.py | 2 +- bano/models.py | 82 +++++++++------- bano/rapprochement.py | 4 +- bano/sql/charge_ban_commune.sql | 6 +- bano/sql/charge_noms_voies_lieux-dits_OSM.sql | 96 ++++++++----------- bano/sql/charge_noms_voies_relation_OSM.sql | 84 ++++++---------- .../charge_noms_voies_relation_bbox_OSM.sql | 60 ++++++------ bano/sql/charge_numeros_OSM.sql | 4 +- bano/sql/create_table_base_bano.sql | 2 + 9 files changed, 158 insertions(+), 182 deletions(-) diff --git a/bano/helpers.py b/bano/helpers.py index 770ef24..762c90d 100644 --- a/bano/helpers.py +++ b/bano/helpers.py @@ -172,7 +172,7 @@ def replace_type_voie(s,nb): return s -def is_valid_fantoir(f, insee): +def fantoir_valide(f, insee): return (len(f) == 10 and f[0:5] == insee); def display_insee_commune(code_insee, nom_commune): diff --git a/bano/models.py b/bano/models.py index feaa189..aa3fc64 100644 --- a/bano/models.py +++ b/bano/models.py @@ -11,8 +11,9 @@ from .sql import sql_get_data,sql_process class Nom: - def __init__(self,nom,fantoir,nature,source,code_insee): + def __init__(self,nom,fantoir,nature,source,code_insee,code_insee_ancienne_commune): self.code_insee = code_insee + self.code_insee_ancienne_commune = code_insee_ancienne_commune self.nom = nom self.fantoir = fantoir self.nature = nature @@ -20,13 +21,15 @@ def __init__(self,nom,fantoir,nature,source,code_insee): self.nom_normalise = hp.normalize(nom) def __eq__(self,other): - return self.nom == other.nom and self.fantoir == other.fantoir and self.nature == other.nature and self.source == other.source and self.code_insee == other.code_insee + return (self.nom == other.nom and self.fantoir == other.fantoir and self.nature == other.nature and self.source == other.source and self.code_insee == other.code_insee) + # return (self.nom == other.nom and self.fantoir == other.fantoir) def __hash__(self): return hash((self.nom,self.fantoir,self.source,self.nature,self.code_insee)) + # return hash((self.nom,self.fantoir)) def _as_csv_format_bano(self): - return f"{self.fantoir}${hp.escape_quotes(self.nom)}${self.nature}${self.code_insee}${self.source}" + return f"{self.fantoir}${hp.escape_quotes(self.nom)}${self.nature}${self.code_insee}${self.code_insee_ancienne_commune}${self.source}" def add_fantoir(self,topo): if not self.fantoir: @@ -36,10 +39,21 @@ class Noms: def __init__(self, code_insee,source): self.code_insee = code_insee self.source = source - self.triplets_nom_fantoir_source = set() - + self.triplets_nom_fantoir_source = [] + + # On ne charge pas les noms des numeros OSM. Ils sont ajoutés via Adresses.nom_des_adresses + def charge_noms_osm_hors_numeros(self): + data = sql_get_data('charge_noms_voies_lieux-dits_OSM',dict(code_insee=self.code_insee),db.bano_sources)+sql_get_data('charge_noms_voies_relation_bbox_OSM',dict(code_insee=self.code_insee),db.bano_sources)+sql_get_data('charge_noms_voies_relation_OSM',dict(code_insee=self.code_insee),db.bano_sources) + for provenance,name,tags,libelle_suffixe,ac_code_insee,ac_nom,nature in data: + if provenance in (1,2,3,4,5): + self.add_nom(Nom(name,tags.get('ref:FR:FANTOIR'),nature,'OSM',self.code_insee,ac_code_insee)) + if provenance in (6,7) and tags.get('ref:FR:FANTOIR'): + self.add_nom(Nom(name,tags['ref:FR:FANTOIR'],nature,'OSM',self.code_insee,ac_code_insee)) + + # On ajoute un nom s'il n'a pas de FANTOIR ou si son FANTOIR appartient à la commune def add_nom(self,nom=Nom): - self.triplets_nom_fantoir_source.add(nom) + if not nom.fantoir or nom.fantoir[0:5] == self.code_insee: + self.triplets_nom_fantoir_source.append(nom) def add_fantoir(self,topo): for t in self.triplets_nom_fantoir_source: @@ -48,12 +62,12 @@ def add_fantoir(self,topo): def enregistre(self): sql_process('suppression_noms_commune_source',dict(code_insee=self.code_insee,source=self.source),db.bano) io_in_csv = io.StringIO() - for t in self.triplets_nom_fantoir_source: + for t in set(self.triplets_nom_fantoir_source): if t.fantoir: io_in_csv.write(t._as_csv_format_bano()+'\n') io_in_csv.seek(0) with db.bano.cursor() as cur_insert: - cur_insert.copy_from(io_in_csv, "nom_fantoir", sep='$',null='',columns=('fantoir','nom','nature','code_insee','source')) + cur_insert.copy_from(io_in_csv, "nom_fantoir", sep='$',null='',columns=('fantoir','nom','nature','code_insee','code_insee_ancienne_commune','source')) class Adresse: def __init__(self, code_insee, x, y, num, source, voie=None, place=None, fantoir=None, code_postal=None, sous_commune_code=None, sous_commune_nom=None): @@ -78,7 +92,7 @@ def __eq__(self,other): return (self.code_insee == other.code_insee and self.source == other.source and self.numero == other.numero and self.voie == other.voie and self.place == other.place and self.sous_commune_code == other.sous_commune_code) def _as_csv_format_bano(self): - return f"{self.fantoir}${self.x}${self.y}${self.numero}${hp.escape_quotes(self.voie) if self.voie else ''}${hp.escape_quotes(self.place) if self.place else ''}${self.code_postal}${self.code_insee}${self.sous_commune_code if self.sous_commune_code else ''}${self.source}" + return f"{self.fantoir if self.fantoir else ''}${self.x}${self.y}${self.numero}${hp.escape_quotes(self.voie) if self.voie else ''}${hp.escape_quotes(self.place) if self.place else ''}${self.code_postal}${self.code_insee}${self.sous_commune_code if self.sous_commune_code else ''}${self.source}" def _as_string(self): return (f"source : {self.source}, numero : {self.numero}, voie : {self.voie} ({self.voie_normalisee}), place : {self.place}, fantoir : {self.fantoir}, code_postal:{self.code_postal}, sous_commune : {self.sous_commune_code} - {self.sous_commune_nom}") @@ -90,7 +104,6 @@ def __init__(self, code_insee,source): self.liste = set() self.index_voie = defaultdict(list) self.noms_de_voies = set() - self.triplets_nom_fantoir_source = set() # def __contains__(self, item): # return item in self.a @@ -117,45 +130,44 @@ def add_adresse(self,ad): self.index_voie[ad.voie].append(len(self.liste)-1) self.noms_de_voies.add(ad.voie) - def add_fantoir(self): - for nom in self.noms: - fantoir = topo.topo.get(a.voie_normalisee) + # def add_fantoir(self): + # for nom in self.noms: + # fantoir = topo.topo.get(a.voie_normalisee) - def charge_numeros_ban(self): + def charge_numeros_ban(self,topo): data = sql_get_data('charge_ban_commune',dict(code_insee=self.code_insee),db.bano_sources) - for numero, voie, lon, lat, code_postal, code_insee_ac, nom_ac in data: - self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'BAN',voie=voie,code_postal=code_postal,sous_commune_code=code_insee_ac,sous_commune_nom=nom_ac)) + for id_fantoir, numero, voie, lon, lat, code_postal, code_insee_ac, nom_ac in data: + if id_fantoir: + fantoir9 = f"{id_fantoir[0:5]}{id_fantoir[6:10]}" + fantoir = topo.code_fantoir9_vers_fantoir10.get(fantoir9) + else: + fantoir = None + self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'BAN',voie=voie,fantoir=fantoir,code_postal=code_postal,sous_commune_code=code_insee_ac,sous_commune_nom=nom_ac)) def charge_numeros_osm(self): data = sql_get_data('charge_numeros_OSM',dict(code_insee=self.code_insee),db.bano_sources)+sql_get_data('charge_numeros_bbox_OSM',dict(code_insee=self.code_insee),db.bano_sources) - # print(data) + for lon, lat, provenance, numero, voie, place, tags, suffixe, code_postal, code_insee_ac, nom_ac in data: + + fantoir = tags.get('ref:FR:FANTOIR') + if fantoir and not hp.fantoir_valide(fantoir,self.code_insee): + print("fantoir invalide",fantoir,self.code_insee) + continue + if provenance in (1,2,): - self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'OSM',voie=voie,place=place,code_postal=code_postal,sous_commune_code=code_insee_ac,sous_commune_nom=nom_ac)) + self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'OSM',voie=voie,place=place,fantoir=fantoir,code_postal=code_postal,sous_commune_code=code_insee_ac,sous_commune_nom=nom_ac)) if provenance in (3,4,) and tags.get('name'): - self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'OSM',voie=tags['name'],place=None,code_postal=code_postal,sous_commune_code=code_insee_ac,sous_commune_nom=nom_ac)) + self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'OSM',voie=tags['name'],place=None,fantoir=fantoir,code_postal=code_postal,sous_commune_code=code_insee_ac,sous_commune_nom=nom_ac)) if provenance in (5,6,) and tags.get('name') and tags.get('ref:FR:FANTOIR'): if tags['ref:FR:FANTOIR'][0:5] == self.code_insee: - self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'OSM',voie=tags['name'],place=None,code_postal=code_postal,sous_commune_code=code_insee_ac,sous_commune_nom=nom_ac)) - - - - def charge_noms_osm(self): - # data = sql_get_data('charge_noms_voies_lieux-dits_OSM',dict(code_insee=self.code_insee),db.bano_sources) - # data = sql_get_data('charge_noms_voies_relation_bbox_OSM',dict(code_insee=self.code_insee),db.bano_sources) - data = sql_get_data('charge_noms_voies_relation_OSM',dict(code_insee=self.code_insee),db.bano_sources) - - for d in data: - print(d) - print(hp.normalize(d[0])) - return None + self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'OSM',voie=tags['name'],place=None,fantoir=tags['ref:FR:FANTOIR'],code_postal=code_postal,sous_commune_code=code_insee_ac,sous_commune_nom=nom_ac)) def noms_des_adresses(self,noms): for a in self: if a.voie: - noms.triplets_nom_fantoir_source.add(Nom(a.voie,a.fantoir,'voie',a.source,self.code_insee)) + noms.add_nom(Nom(a.voie,a.fantoir,'voie',a.source,self.code_insee,a.sous_commune_code)) if a.place: - noms.triplets_nom_fantoir_source.add(Nom(a.place,a.fantoir,'place',a.source,self.code_insee)) + noms.add_nom(Nom(a.place,a.fantoir,'place',a.source,self.code_insee,a.sous_commune_code)) def enregistre(self): sql_process('suppression_adresses_commune_source',dict(code_insee=self.code_insee,source=self.source),db.bano) @@ -171,6 +183,7 @@ class Topo: def __init__(self, code_insee): self.code_insee = code_insee self.topo = OrderedDict() + self.code_fantoir9_vers_fantoir10 = {} # self.index_by_nom_normalise = defaultdict(list) @@ -195,4 +208,5 @@ def charge_topo(self): for fantoir,nom in data: nom = hp.normalize(' '.join(nom.replace('-',' ').split())) self.topo[fantoir] = nom + self.code_fantoir9_vers_fantoir10[fantoir[0:9]] = fantoir self.topo[nom] = fantoir diff --git a/bano/rapprochement.py b/bano/rapprochement.py index 83f2612..90000bc 100644 --- a/bano/rapprochement.py +++ b/bano/rapprochement.py @@ -9,11 +9,11 @@ def process(source,code_insee,dept,**kwargs): noms = Noms(code_insee,source) if source == 'OSM': - adresses.charge_noms_osm() + noms.charge_noms_osm_hors_numeros() adresses.charge_numeros_osm() if source == 'BAN': - adresses.charge_numeros_ban() + adresses.charge_numeros_ban(topo) adresses.noms_des_adresses(noms) noms.add_fantoir(topo) diff --git a/bano/sql/charge_ban_commune.sql b/bano/sql/charge_ban_commune.sql index 83b33a3..37ace48 100644 --- a/bano/sql/charge_ban_commune.sql +++ b/bano/sql/charge_ban_commune.sql @@ -1,7 +1,8 @@ WITH j AS -(SELECT numero, +(SELECT id_fantoir, + numero, nom_voie, lon, lat, @@ -23,7 +24,8 @@ FROM ban_odbl b LEFT OUTER JOIN rep_b_as_bis r USING (id_fantoir,numero) WHERE code_insee = '__code_insee__') -SELECT TRIM (BOTH FROM (numero||' '||COALESCE(rep_bis,rep,''))), +SELECT id_fantoir, + TRIM (BOTH FROM (numero||' '||COALESCE(rep_bis,rep,''))), nom_voie, lon, lat, diff --git a/bano/sql/charge_noms_voies_lieux-dits_OSM.sql b/bano/sql/charge_noms_voies_lieux-dits_OSM.sql index ef3fe86..ccb04a2 100644 --- a/bano/sql/charge_noms_voies_lieux-dits_OSM.sql +++ b/bano/sql/charge_noms_voies_lieux-dits_OSM.sql @@ -1,55 +1,43 @@ -SELECT pl.name, - pl."ref:FR:FANTOIR" f, - '' fl, - '' fr, - h.libelle_suffixe, - p."ref:INSEE", - CASE - WHEN pl.place='' THEN 'voie'::text - ELSE 'lieudit' - END AS nature -FROM planet_osm_polygon p -JOIN planet_osm_point pl -ON pl.way && p.way AND - ST_Intersects(pl.way, p.way) +SELECT DISTINCT provenance, + name, + tags, + libelle_suffixe, + a9.code_insee, + a9.nom, + nature +FROM (SELECT 1::integer AS provenance, + pt.way, + UNNEST(ARRAY[pt.name,pt.tags->'alt_name',pt.tags->'old_name']) as name, + tags, + CASE + WHEN pt.place='' THEN 'voie'::text + ELSE 'place' + END AS nature + FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p + JOIN (SELECT * FROM planet_osm_point WHERE ("ref:FR:FANTOIR" !='' OR place != '') AND name != '') pt + ON pt.way && p.way AND + ST_Intersects(pt.way, p.way) + UNION ALL + SELECT 2, + l.way, + UNNEST(ARRAY[l.name,l.tags->'alt_name',l.tags->'old_name']) as name, + tags, + 'voie' + FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p + JOIN (SELECT * FROM planet_osm_line WHERE highway != '' AND name != '') l + ON p.way && l.way AND ST_Contains(p.way, l.way) + UNION ALL + SELECT 3, + pl.way, + UNNEST(ARRAY[pl.name,pl.tags->'alt_name',pl.tags->'old_name']) as name, + tags, + 'voie' + FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p + JOIN (SELECT * FROM planet_osm_polygon WHERE (highway||"ref:FR:FANTOIR" != '' OR landuse = 'residential' OR amenity = 'parking') AND name != '') pl + ON pl.way && p.way AND + ST_Intersects(pl.way, p.way)) l LEFT OUTER JOIN suffixe h -ON ST_Intersects(pl.way, h.geometrie) -WHERE p."ref:INSEE" = '__code_insee__' AND - (pl."ref:FR:FANTOIR" !='' OR pl.place != '') AND - pl.name != '' -UNION -SELECT l.name, - l.tags->'ref:FR:FANTOIR' f, - l.tags->'ref:FR:FANTOIR:left' fl, - l.tags->'ref:FR:FANTOIR:right' fr, - h.libelle_suffixe, - p."ref:INSEE", - 'voie' -FROM planet_osm_polygon p -JOIN planet_osm_line l -ON ST_Intersects(l.way, p.way) -LEFT OUTER JOIN suffixe h -ON ST_Intersects(l.way, h.geometrie) -WHERE p."ref:INSEE" = '__code_insee__' AND - l.highway != '' AND - l.name != '' -UNION -SELECT pl.name, - pl."ref:FR:FANTOIR" f, - pl."ref:FR:FANTOIR:left" fl, - pl."ref:FR:FANTOIR:right" fr, - h.libelle_suffixe, - p."ref:INSEE", - 'voie' -FROM planet_osm_polygon p -JOIN planet_osm_polygon pl -ON pl.way && p.way AND - ST_Intersects(pl.way, p.way) -LEFT OUTER JOIN suffixe h -ON ST_Intersects(pl.way, h.geometrie) -WHERE p."ref:INSEE" = '__code_insee__' AND - (pl.highway||pl."ref:FR:FANTOIR" != '' OR - pl.landuse = 'residential' OR - pl.amenity = 'parking') AND - pl.name != ''; - +ON ST_Intersects(l.way, h.geometrie) +LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 where insee_a8 = '__code_insee__') a9 +ON ST_Contains(a9.geometrie,way) +WHERE l.name IS NOT NULL; \ No newline at end of file diff --git a/bano/sql/charge_noms_voies_relation_OSM.sql b/bano/sql/charge_noms_voies_relation_OSM.sql index e3ee7d7..0f386ab 100644 --- a/bano/sql/charge_noms_voies_relation_OSM.sql +++ b/bano/sql/charge_noms_voies_relation_OSM.sql @@ -1,57 +1,31 @@ -WITH -a -AS -(SELECT DISTINCT unnest(array[l.name,l.tags->'alt_name',l.tags->'old_name']) as name, - l.way, - r.tags, --- h.libelle_suffixe, --- a9.code_insee, --- a9.nom, - 'voie'::text -FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p -JOIN (SELECT name,tags,osm_id,way FROM planet_osm_line WHERE highway != '' AND name != '') l -ON ST_Intersects(l.way, p.way) -JOIN planet_osm_rels r -ON r.osm_id = l.osm_id), -/*LEFT OUTER JOIN (SELECT * FROM suffixe WHERE code_insee = '__code_insee__') h -ON ST_Intersects(l.way, h.geometrie) -LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 where insee_a8 = '__code_insee__') a9 -ON ST_Contains(a9.geometrie,l.way)*/ -b -as -(SELECT DISTINCT unnest(array[l.name,l.tags->'alt_name',l.tags->'old_name']) as name, - l.way, - r.tags, --- h.libelle_suffixe, --- a9.code_insee, --- a9.nom, - 'voie'::text -FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p -JOIN (SELECT name,tags,osm_id,way FROM planet_osm_polygon WHERE highway != '' AND name != '') l -ON ST_Intersects(l.way, p.way) -JOIN planet_osm_rels r -ON r.osm_id = l.osm_id) -select a.name, - a.tags, - h.libelle_suffixe, - a9.code_insee, - a9.nom -from a -LEFT OUTER JOIN (SELECT * FROM suffixe WHERE code_insee = '__code_insee__') h -ON ST_Intersects(way, h.geometrie) -LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 where insee_a8 = '__code_insee__') a9 -ON ST_Contains(a9.geometrie,way) -WHERE a.name IS NOT NULL -union -select b.name, - b.tags, - h.libelle_suffixe, - a9.code_insee, - a9.nom -from b +SELECT DISTINCT provenance, + name, + tags, + libelle_suffixe, + a9.code_insee, + a9.nom, + 'voie'::text +FROM (SELECT 4::integer AS provenance, + UNNEST(ARRAY[l.name,l.tags->'alt_name',l.tags->'old_name']) as name, + l.way, + r.tags + FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p + JOIN (SELECT name,tags,osm_id,way FROM planet_osm_line WHERE highway != '' AND name != '') l + ON p.way && l.way AND ST_Contains(p.way, l.way) + JOIN planet_osm_rels r + ON r.osm_id = l.osm_id + UNION ALL + SELECT 5, + UNNEST(ARRAY[l.name,l.tags->'alt_name',l.tags->'old_name']) as name, + l.way, + r.tags + FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p + JOIN (SELECT name,tags,osm_id,way FROM planet_osm_polygon WHERE highway != '' AND name != '') l + ON p.way && l.way AND ST_Contains(p.way, l.way) + JOIN planet_osm_rels r + ON r.osm_id = l.osm_id) l LEFT OUTER JOIN (SELECT * FROM suffixe WHERE code_insee = '__code_insee__') h -ON ST_Intersects(way, h.geometrie) +ON ST_Intersects(way, h.geometrie) LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 where insee_a8 = '__code_insee__') a9 -ON ST_Contains(a9.geometrie,way) -WHERE b.name IS NOT NULL -; \ No newline at end of file +ON ST_Contains(a9.geometrie,way) +WHERE l.name IS NOT NULL; \ No newline at end of file diff --git a/bano/sql/charge_noms_voies_relation_bbox_OSM.sql b/bano/sql/charge_noms_voies_relation_bbox_OSM.sql index 267a90b..09724a9 100644 --- a/bano/sql/charge_noms_voies_relation_bbox_OSM.sql +++ b/bano/sql/charge_noms_voies_relation_bbox_OSM.sql @@ -1,34 +1,30 @@ -SELECT DISTINCT l.name, - r.tags, - h.libelle_suffixe, - '__code_insee__', - a9.insee_com, - a9.nom, - 'voie'::text -FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p -JOIN (SELECT name,osm_id,way FROM planet_osm_line WHERE highway != '' AND name != '') l -ON l.way && p.way -JOIN planet_osm_rels r -ON r.osm_id = l.osm_id +SELECT DISTINCT provenance, + name, + tags, + libelle_suffixe, + a9.code_insee, + a9.nom, + 'voie'::text +FROM ( SELECT 6::integer AS provenance, + l.way, + l.name, + r.tags + FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p + JOIN (SELECT name,osm_id,way FROM planet_osm_line WHERE highway != '' AND name != '') l + ON l.way && p.way + JOIN planet_osm_rels r + ON r.osm_id = l.osm_id + UNION ALL + SELECT 7, + l.way, + l.name, + r.tags + FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p + JOIN (SELECT name,osm_id,way FROM planet_osm_polygon WHERE highway != '' AND name != '') l + ON l.way && p.way + JOIN planet_osm_rels r + ON r.osm_id = l.osm_id) l LEFT OUTER JOIN suffixe h -ON ST_Intersects(l.way, h.geometrie) +ON ST_Intersects(l.way, h.geometrie) LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 where insee_a8 = '__code_insee__') a9 -ON ST_Contains(a9.geometrie,l.way) -UNION ALL -SELECT DISTINCT l.name, - r.tags, - h.libelle_suffixe, - '__code_insee__', - a9.insee_com, - a9.nom, - 'voie'::text -FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p -JOIN (SELECT name,osm_id,way FROM planet_osm_polygon WHERE highway != '' AND name != '') l -ON l.way && p.way -JOIN planet_osm_rels r -ON r.osm_id = l.osm_id -LEFT OUTER JOIN suffixe h -ON ST_Intersects(l.way, h.geometrie) -LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 where insee_a8 = '__code_insee__') a9 -ON ST_Contains(a9.geometrie,l.way); - +ON ST_Contains(a9.geometrie,l.way); diff --git a/bano/sql/charge_numeros_OSM.sql b/bano/sql/charge_numeros_OSM.sql index e9341ee..e09da72 100644 --- a/bano/sql/charge_numeros_OSM.sql +++ b/bano/sql/charge_numeros_OSM.sql @@ -16,7 +16,7 @@ FROM pt."addr:housenumber" AS hsnr, pt."addr:street" AS street_name, pt."addr:place" AS place_name, - null::hstore AS tags, + tags, pt."addr:postcode" AS code_postal FROM (SELECT geometrie FROM polygones_insee WHERE admin_level = 8 AND code_insee = '__code_insee__') p JOIN planet_osm_point pt @@ -30,7 +30,7 @@ FROM w."addr:housenumber", w."addr:street", w."addr:place", - null::hstore tags, + tags, w."addr:postcode" FROM (SELECT geometrie FROM polygones_insee WHERE admin_level = 8 AND code_insee = '__code_insee__') p JOIN planet_osm_polygon w diff --git a/bano/sql/create_table_base_bano.sql b/bano/sql/create_table_base_bano.sql index aa877c9..91eaf69 100644 --- a/bano/sql/create_table_base_bano.sql +++ b/bano/sql/create_table_base_bano.sql @@ -21,6 +21,8 @@ CREATE TABLE IF NOT EXISTS nom_fantoir ( nom text, code_insee text, nature text, + code_insee_ancienne_commune text, + nom_ancienne_commune text, source text); CREATE INDEX IF NOT EXISTS idx_nom_fantoir_code_insee ON nom_fantoir (code_insee); From af0527a2f37b99cefb84591f4953d8b2e3f0d3dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Tue, 18 Oct 2022 22:04:58 +0000 Subject: [PATCH 036/163] renommage ban_odbl -> ban --- bano/sources/ban.py | 32 +++++++++++++++++++------- bano/sql/charge_ban_commune.sql | 2 +- bano/sql/update_table_rep_b_as_bis.sql | 8 +++---- cron_bano.sh | 4 ++-- 4 files changed, 31 insertions(+), 15 deletions(-) diff --git a/bano/sources/ban.py b/bano/sources/ban.py index 05b8b02..51899d0 100644 --- a/bano/sources/ban.py +++ b/bano/sources/ban.py @@ -23,10 +23,10 @@ def process_ban(departements, **kwargs): raise ValueError(f"Départements inconnus : {depts_inconnus}") # um.set_csv_directory(um.get_directory_pathname()) for dept in sorted(departements): - print(f"Processing {dept}") + print(f"Département {dept}") status = download(source, dept) - if status: - import_to_pg(source, dept) + # if status: + import_to_pg(source, dept) def download(source, departement): destination = get_destination(departement) @@ -55,13 +55,29 @@ def import_to_pg(source, departement, **kwargs): f.readline() # skip CSV headers with bano_sources.cursor() as cur_insert: try: - cur_insert.execute(f"DELETE FROM ban_odbl WHERE code_insee LIKE '{departement+'%'}'") - cur_insert.copy_from(f, "ban_odbl", sep=';', null='') - # bano_sources.commit() + cur_insert.execute(f"DELETE FROM ban WHERE code_insee LIKE '{departement+'%'}'") + cur_insert.copy_from(f, "ban", sep=';', null='') b.batch_stop_log(id_batch,True) except psycopg2.DataError as e: - b.batch_stop_log(id_batch,False) - # bano_sources.reset() + print(f"Erreur au chargement de la BAN {departement}") + print(e) + print("Essai via shell") + try: + cur_insert.close() + bano_sources.reset() + ret = subprocess.run(["gzip","-cd",fichier_source],capture_output=True,text=True) + tmp_filename = Path(os.environ['BAN_CACHE_DIR']) / 'tmp.csv' + with open(tmp_filename,'w') as tmpfile: + tmpfile.write(ret.stdout) + + subprocess.run(["psql","-d","bano_sources","-U","cadastre","-1","-c",f"COPY ban FROM '{tmp_filename}' WITH CSV HEADER NULL '' DELIMITER ';'"]) + tmp_filename.unlink() + b.batch_stop_log(id_batch,True) + except e: + print(f"Erreur au chargement de la BAN {departement}") + print(f"Abandon du chargement de la BAN {departement}") + bano_sources.reset() + b.batch_stop_log(id_batch,False) def get_destination(departement): try: diff --git a/bano/sql/charge_ban_commune.sql b/bano/sql/charge_ban_commune.sql index 37ace48..979aad1 100644 --- a/bano/sql/charge_ban_commune.sql +++ b/bano/sql/charge_ban_commune.sql @@ -20,7 +20,7 @@ AS code_postal, code_insee_ancienne_commune, nom_ancienne_commune -FROM ban_odbl b +FROM ban b LEFT OUTER JOIN rep_b_as_bis r USING (id_fantoir,numero) WHERE code_insee = '__code_insee__') diff --git a/bano/sql/update_table_rep_b_as_bis.sql b/bano/sql/update_table_rep_b_as_bis.sql index f05bb2c..8705af1 100644 --- a/bano/sql/update_table_rep_b_as_bis.sql +++ b/bano/sql/update_table_rep_b_as_bis.sql @@ -3,27 +3,27 @@ CREATE TABLE rep_b_as_bis AS SELECT id_fantoir, numero -FROM ban_odbl +FROM ban WHERE rep = 'b' EXCEPT (SELECT id_fantoir, numero -FROM ban_odbl +FROM ban WHERE rep = 'a' UNION SELECT id_fantoir, numero -FROM ban_odbl +FROM ban WHERE rep = 'c' UNION SELECT id_fantoir, numero -FROM ban_odbl +FROM ban WHERE rep = 'd'); COMMIT; \ No newline at end of file diff --git a/cron_bano.sh b/cron_bano.sh index 40c4454..e9b18f1 100755 --- a/cron_bano.sh +++ b/cron_bano.sh @@ -38,8 +38,8 @@ psql -d osm -U cadastre -f sql/create_table_polygones_communes.sql psql -d osm -U cadastre -f sql/create_table_polygones_postaux.sql ./copy_table_from_osm_to_cadastre.sh polygones_postaux -./copy_table_from_osm_to_cadastre.sh ban_odbl -psql -d cadastre -U cadastre -f sql/post_copie_ban_odbl.sql +./copy_table_from_osm_to_cadastre.sh ban +psql -d cadastre -U cadastre -f sql/post_copie_ban.sql # exports cat deplist.txt | parallel -j 4 bano export {1} From 675b7c1ab52cdf92ff4340ad9a53f1d2b2b53e89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Tue, 18 Oct 2022 22:06:19 +0000 Subject: [PATCH 037/163] Usage du COG --- bano/db_helpers.py | 17 +++++++++ bano/sources/cog.py | 48 ++++++++++++------------- bano/sql/liste_communes_par_dept.sql | 9 +++++ bano/sql/nom_commune_par_code_insee.sql | 3 ++ 4 files changed, 53 insertions(+), 24 deletions(-) create mode 100644 bano/db_helpers.py create mode 100644 bano/sql/liste_communes_par_dept.sql create mode 100644 bano/sql/nom_commune_par_code_insee.sql diff --git a/bano/db_helpers.py b/bano/db_helpers.py new file mode 100644 index 0000000..314bdc2 --- /dev/null +++ b/bano/db_helpers.py @@ -0,0 +1,17 @@ +import time +import os + +from . import db +from .sql import sql_get_data + + +def liste_communes_par_dept(dept): + return sql_get_data('liste_communes_par_dept',dict(dept=dept)) + +def nom_commune(code_insee): + return sql_get_data('nom_commune_par_code_insee',dict(code_insee=code_insee)) + +def get_cadastre_format(insee_com): + with db.bano.cursor() as conn : + conn.execute(f"SELECT format_cadastre FROM code_cadastre WHERE insee_com = '{insee_com}';") + return conn.fetchone()[0] diff --git a/bano/sources/cog.py b/bano/sources/cog.py index 2a5cff7..539dc9c 100644 --- a/bano/sources/cog.py +++ b/bano/sources/cog.py @@ -1,5 +1,5 @@ # import csv -# import gzip +from zipfile import ZipFile import os # import subprocess # from datetime import datetime @@ -16,19 +16,18 @@ def process_cog(**kwargs): sql_process('create_table_cog',dict(),bano) - for fichier_cog in ['cog_commune']: - status = download(fichier_cog) - if status: - import_to_pg(fichier_cog) + zip = get_destination('cog_2022.zip') + status = download(zip) + if status: + import_to_pg(zip) -def download(fichier_cog): - destination = get_destination(fichier_cog) +def download(destination): headers = {} if destination.exists(): headers['If-Modified-Since'] = formatdate(destination.stat().st_mtime) - resp = requests.get(f'https://www.data.gouv.fr/fr/datasets/r/6c1db35c-3ee3-42f8-80fb-d366e451cc30', headers=headers) - id_batch = b.batch_start_log('download source', 'COG commune','France') + resp = requests.get(f'https://www.insee.fr/fr/statistiques/fichier/6051727/cog_ensemble_2022_csv.zip', headers=headers) + id_batch = b.batch_start_log('download source', 'COG ZIP','France') if resp.status_code == 200: with destination.open('wb') as f: f.write(resp.content) @@ -41,24 +40,25 @@ def download(fichier_cog): return False -def import_to_pg(fichier_cog): - id_batch = b.batch_start_log('import source', f'COG {fichier_cog}','France') - fichier_source = get_destination(fichier_cog) - with open(fichier_source, mode='rt') as f: - f.readline() # skip CSV headers - with bano.cursor() as cur_insert: - try: - cur_insert.execute(f"TRUNCATE {fichier_cog}") - cur_insert.copy_from(f,fichier_cog, sep=',', null='') - b.batch_stop_log(id_batch,True) - except psycopg2.DataError as e: - b.batch_stop_log(id_batch,False) +def import_to_pg(fichier_zip): + table = 'cog_commune' + id_batch = b.batch_start_log('import source', f'COG {table}','France') + with ZipFile(fichier_zip) as f: + with f.open('commune_2022.csv') as csv: + csv.readline() # skip CSV headers + with bano.cursor() as cur_insert: + try: + cur_insert.execute(f"TRUNCATE {table}") + cur_insert.copy_from(csv,table, sep=',', null='') + b.batch_stop_log(id_batch,True) + except psycopg2.DataError as e: + b.batch_stop_log(id_batch,False) def get_destination(fichier_cog): try: - cwd = Path(os.environ['CSV_DIR']) + cwd = Path(os.environ['COG_DIR']) except KeyError: - raise ValueError(f"La variable CSV n'est pas définie") + raise ValueError(f"La variable COG_DIR n'est pas définie") if not cwd.exists(): raise ValueError(f"Le répertoire {cwd} n'existe pas") - return cwd / f'{fichier_cog}.csv' + return cwd / f'{fichier_cog}' diff --git a/bano/sql/liste_communes_par_dept.sql b/bano/sql/liste_communes_par_dept.sql new file mode 100644 index 0000000..ded6ca4 --- /dev/null +++ b/bano/sql/liste_communes_par_dept.sql @@ -0,0 +1,9 @@ +SELECT com, + ncc +FROM cog_commune c +LEFT OUTER JOIN (SELECT comparent FROM cog_commune WHERE dep = '__dept__' AND typecom = 'ARM') p +ON (c.com = p.comparent) +WHERE c.dep = '__dept__' AND + c.typecom != 'COMD' AND + p.comparent IS NULL +ORDER BY 1 diff --git a/bano/sql/nom_commune_par_code_insee.sql b/bano/sql/nom_commune_par_code_insee.sql new file mode 100644 index 0000000..6f4e57a --- /dev/null +++ b/bano/sql/nom_commune_par_code_insee.sql @@ -0,0 +1,3 @@ +SELECT ncc +FROM cog_commune c +WHERE com = '__code_insee__'; From 017f7bad1e0c420b06a60e833293b492b1f17828 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Tue, 18 Oct 2022 22:07:35 +0000 Subject: [PATCH 038/163] Pas besoin de doubles quotes au chargement PG --- bano/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bano/models.py b/bano/models.py index aa3fc64..e7ecfae 100644 --- a/bano/models.py +++ b/bano/models.py @@ -29,7 +29,7 @@ def __hash__(self): # return hash((self.nom,self.fantoir)) def _as_csv_format_bano(self): - return f"{self.fantoir}${hp.escape_quotes(self.nom)}${self.nature}${self.code_insee}${self.code_insee_ancienne_commune}${self.source}" + return f"{self.fantoir}${self.nom}${self.nature}${self.code_insee}${self.code_insee_ancienne_commune}${self.source}" def add_fantoir(self,topo): if not self.fantoir: @@ -92,7 +92,7 @@ def __eq__(self,other): return (self.code_insee == other.code_insee and self.source == other.source and self.numero == other.numero and self.voie == other.voie and self.place == other.place and self.sous_commune_code == other.sous_commune_code) def _as_csv_format_bano(self): - return f"{self.fantoir if self.fantoir else ''}${self.x}${self.y}${self.numero}${hp.escape_quotes(self.voie) if self.voie else ''}${hp.escape_quotes(self.place) if self.place else ''}${self.code_postal}${self.code_insee}${self.sous_commune_code if self.sous_commune_code else ''}${self.source}" + return f"{self.fantoir if self.fantoir else ''}${self.x}${self.y}${self.numero}${self.voie if self.voie else ''}${self.place if self.place else ''}${self.code_postal}${self.code_insee}${self.sous_commune_code if self.sous_commune_code else ''}${self.source}" def _as_string(self): return (f"source : {self.source}, numero : {self.numero}, voie : {self.voie} ({self.voie_normalisee}), place : {self.place}, fantoir : {self.fantoir}, code_postal:{self.code_postal}, sous_commune : {self.sous_commune_code} - {self.sous_commune_nom}") From a5bfea1dfeb5e4e0dce39e4c93ee1b67dc1b6c6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Tue, 18 Oct 2022 22:08:11 +0000 Subject: [PATCH 039/163] adaptation pour lancement par dept --- bano/rapprochement.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/bano/rapprochement.py b/bano/rapprochement.py index 90000bc..f7d559f 100644 --- a/bano/rapprochement.py +++ b/bano/rapprochement.py @@ -1,9 +1,11 @@ #!/usr/bin/env python # coding: UTF-8 +from . import db_helpers as h from .models import Noms, Adresses, Topo -def process(source,code_insee,dept,**kwargs): + +def process_unitaire(source,code_insee): topo = Topo(code_insee) adresses = Adresses(code_insee,source) noms = Noms(code_insee,source) @@ -19,3 +21,13 @@ def process(source,code_insee,dept,**kwargs): noms.add_fantoir(topo) adresses.enregistre() noms.enregistre() + +def process(source,code_insee,dept,**kwargs): + if dept: + liste_insee = h.liste_communes_par_dept(dept) + else: + liste_insee = ((code_insee,h.nom_commune(code_insee))) + for code_insee,nom in liste_insee: + if dept: + print(f"{code_insee} - {nom}") + process_unitaire(source,code_insee) From 08299e2cc46d14930c4bcc7b0f4f0e3dc97a8835 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sat, 22 Oct 2022 21:55:57 +0000 Subject: [PATCH 040/163] Chargement BAN avec gestion des erreurs --- bano/sources/ban.py | 58 ++++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/bano/sources/ban.py b/bano/sources/ban.py index 51899d0..074609f 100644 --- a/bano/sources/ban.py +++ b/bano/sources/ban.py @@ -16,19 +16,25 @@ # from .. import update_manager as um def process_ban(departements, **kwargs): - source = 'BAN' departements = set(departements) depts_inconnus = departements - set(DEPARTEMENTS) if depts_inconnus: raise ValueError(f"Départements inconnus : {depts_inconnus}") - # um.set_csv_directory(um.get_directory_pathname()) + depts_en_echec = [] for dept in sorted(departements): print(f"Département {dept}") - status = download(source, dept) - # if status: - import_to_pg(source, dept) + status = download(dept) + if status: + if not (import_to_pg(dept)): + depts_en_echec.append(dept) + print('depts_en_echec',depts_en_echec) -def download(source, departement): + for dept in depts_en_echec: + print(f"Département {dept}") + import_to_pg_subp(dept) + + +def download(departement): destination = get_destination(departement) headers = {} if destination.exists(): @@ -47,37 +53,39 @@ def download(source, departement): b.batch_stop_log(id_batch,False) return False - -def import_to_pg(source, departement, **kwargs): +def import_to_pg(departement, **kwargs): id_batch = b.batch_start_log('import source', 'BAN',departement) fichier_source = get_destination(departement) with gzip.open(fichier_source, mode='rt') as f: f.readline() # skip CSV headers with bano_sources.cursor() as cur_insert: try: - cur_insert.execute(f"DELETE FROM ban WHERE code_insee LIKE '{departement+'%'}'") + cur_insert.execute(f"DELETE FROM ban WHERE code_insee LIKE '{departement}%'") cur_insert.copy_from(f, "ban", sep=';', null='') b.batch_stop_log(id_batch,True) + return True except psycopg2.DataError as e: print(f"Erreur au chargement de la BAN {departement}") print(e) - print("Essai via shell") - try: - cur_insert.close() - bano_sources.reset() - ret = subprocess.run(["gzip","-cd",fichier_source],capture_output=True,text=True) - tmp_filename = Path(os.environ['BAN_CACHE_DIR']) / 'tmp.csv' - with open(tmp_filename,'w') as tmpfile: - tmpfile.write(ret.stdout) + return False - subprocess.run(["psql","-d","bano_sources","-U","cadastre","-1","-c",f"COPY ban FROM '{tmp_filename}' WITH CSV HEADER NULL '' DELIMITER ';'"]) - tmp_filename.unlink() - b.batch_stop_log(id_batch,True) - except e: - print(f"Erreur au chargement de la BAN {departement}") - print(f"Abandon du chargement de la BAN {departement}") - bano_sources.reset() - b.batch_stop_log(id_batch,False) +def import_to_pg_subp(departement, **kwargs): + id_batch = b.batch_start_log('import source', 'BAN',departement) + print("Essai via shell") + try: + fichier_source = get_destination(departement) + ret = subprocess.run(["gzip","-cd",fichier_source],capture_output=True,text=True) + tmp_filename = Path(os.environ['BAN_CACHE_DIR']) / 'tmp.csv' + with open(tmp_filename,'w') as tmpfile: + tmpfile.write(ret.stdout) + + subprocess.run(["psql","-d","bano_sources","-U","cadastre","-1","-c",f"DELETE FROM ban WHERE code_insee LIKE '{departement}%';COPY ban FROM '{tmp_filename}' WITH CSV HEADER NULL '' DELIMITER ';'"]) + tmp_filename.unlink() + b.batch_stop_log(id_batch,True) + except e: + print(f"Erreur au chargement de la BAN {departement}") + print(f"Abandon du chargement de la BAN {departement}") + b.batch_stop_log(id_batch,False) def get_destination(departement): try: From acb3836b8c3f0cf990d6bee0242bc2b5bc872ebc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 23 Oct 2022 21:19:28 +0000 Subject: [PATCH 041/163] Rapprochement par lots - logs --- bano/db_helpers.py | 4 ++-- bano/rapprochement.py | 33 ++++++++++++++++++++------------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/bano/db_helpers.py b/bano/db_helpers.py index 314bdc2..c449412 100644 --- a/bano/db_helpers.py +++ b/bano/db_helpers.py @@ -6,10 +6,10 @@ def liste_communes_par_dept(dept): - return sql_get_data('liste_communes_par_dept',dict(dept=dept)) + return sql_get_data('liste_communes_par_dept',dict(dept=dept),db.bano) def nom_commune(code_insee): - return sql_get_data('nom_commune_par_code_insee',dict(code_insee=code_insee)) + return sql_get_data('nom_commune_par_code_insee',dict(code_insee=code_insee),db.bano)[0][0] def get_cadastre_format(insee_com): with db.bano.cursor() as conn : diff --git a/bano/rapprochement.py b/bano/rapprochement.py index f7d559f..53d9514 100644 --- a/bano/rapprochement.py +++ b/bano/rapprochement.py @@ -2,31 +2,38 @@ # coding: UTF-8 from . import db_helpers as h +from . import batch as b from .models import Noms, Adresses, Topo def process_unitaire(source,code_insee): - topo = Topo(code_insee) - adresses = Adresses(code_insee,source) - noms = Noms(code_insee,source) + id_batch = b.batch_start_log('rapprochement', source, code_insee) + try: + topo = Topo(code_insee) + adresses = Adresses(code_insee,source) + noms = Noms(code_insee,source) - if source == 'OSM': - noms.charge_noms_osm_hors_numeros() - adresses.charge_numeros_osm() + if source == 'OSM': + noms.charge_noms_osm_hors_numeros() + adresses.charge_numeros_osm() - if source == 'BAN': - adresses.charge_numeros_ban(topo) + if source == 'BAN': + adresses.charge_numeros_ban(topo) + + adresses.noms_des_adresses(noms) + noms.add_fantoir(topo) + adresses.enregistre() + noms.enregistre() + b.batch_stop_log(id_batch,True) + except: + b.batch_stop_log(id_batch,False) - adresses.noms_des_adresses(noms) - noms.add_fantoir(topo) - adresses.enregistre() - noms.enregistre() def process(source,code_insee,dept,**kwargs): if dept: liste_insee = h.liste_communes_par_dept(dept) else: - liste_insee = ((code_insee,h.nom_commune(code_insee))) + liste_insee = [(code_insee,h.nom_commune(code_insee))] for code_insee,nom in liste_insee: if dept: print(f"{code_insee} - {nom}") From 6c57ce2153c7ea3b53df06dfad87dd52cc59e129 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 23 Oct 2022 21:20:42 +0000 Subject: [PATCH 042/163] =?UTF-8?q?Format=20des=20donn=C3=A9es=20en=20sort?= =?UTF-8?q?ie?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bano/models.py | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/bano/models.py b/bano/models.py index e7ecfae..8fbab89 100644 --- a/bano/models.py +++ b/bano/models.py @@ -29,7 +29,7 @@ def __hash__(self): # return hash((self.nom,self.fantoir)) def _as_csv_format_bano(self): - return f"{self.fantoir}${self.nom}${self.nature}${self.code_insee}${self.code_insee_ancienne_commune}${self.source}" + return f"{self.fantoir}\t{self.nom}\t{self.nature}\t{self.code_insee}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.source}" def add_fantoir(self,topo): if not self.fantoir: @@ -67,35 +67,35 @@ def enregistre(self): io_in_csv.write(t._as_csv_format_bano()+'\n') io_in_csv.seek(0) with db.bano.cursor() as cur_insert: - cur_insert.copy_from(io_in_csv, "nom_fantoir", sep='$',null='',columns=('fantoir','nom','nature','code_insee','code_insee_ancienne_commune','source')) + cur_insert.copy_from(io_in_csv, "nom_fantoir", null='',columns=('fantoir','nom','nature','code_insee','code_insee_ancienne_commune','source')) class Adresse: - def __init__(self, code_insee, x, y, num, source, voie=None, place=None, fantoir=None, code_postal=None, sous_commune_code=None, sous_commune_nom=None): + def __init__(self, code_insee, x, y, num, source, voie=None, place=None, fantoir=None, code_postal=None, code_insee_ancienne_commune=None, sous_commune_nom=None): self.code_insee = code_insee - self.x = x - self.y = y + self.x = round(x,6) + self.y = round(y,6) self.source = source self.numero = num self.voie = voie self.place = place self.fantoir = fantoir self.code_postal = code_postal - self.sous_commune_code = sous_commune_code + self.code_insee_ancienne_commune = code_insee_ancienne_commune self.sous_commune_nom = sous_commune_nom self.voie_normalisee = hp.normalize(self.voie) if self.voie else None self.place_normalisee = hp.format_toponyme(self.place) if self.place else None def __hash__(self): - return hash((self.code_insee,self.source,self.numero,self.voie,self.place,self.sous_commune_code)) + return hash((self.code_insee,self.source,self.numero,self.voie,self.place,self.code_insee_ancienne_commune)) def __eq__(self,other): - return (self.code_insee == other.code_insee and self.source == other.source and self.numero == other.numero and self.voie == other.voie and self.place == other.place and self.sous_commune_code == other.sous_commune_code) + return (self.code_insee == other.code_insee and self.source == other.source and self.numero == other.numero and self.voie == other.voie and self.place == other.place and self.code_insee_ancienne_commune == other.code_insee_ancienne_commune) def _as_csv_format_bano(self): - return f"{self.fantoir if self.fantoir else ''}${self.x}${self.y}${self.numero}${self.voie if self.voie else ''}${self.place if self.place else ''}${self.code_postal}${self.code_insee}${self.sous_commune_code if self.sous_commune_code else ''}${self.source}" + return f"{self.fantoir if self.fantoir else ''}\t{self.x}\t{self.y}\t{self.numero}\t{self.voie if self.voie else ''}\t{self.place if self.place else ''}\t{self.code_postal}\t{self.code_insee}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.source}" def _as_string(self): - return (f"source : {self.source}, numero : {self.numero}, voie : {self.voie} ({self.voie_normalisee}), place : {self.place}, fantoir : {self.fantoir}, code_postal:{self.code_postal}, sous_commune : {self.sous_commune_code} - {self.sous_commune_nom}") + return (f"source : {self.source}, numero : {self.numero}, voie : {self.voie} ({self.voie_normalisee}), place : {self.place}, fantoir : {self.fantoir}, code_postal:{self.code_postal}, sous_commune : {self.code_insee_ancienne_commune} - {self.sous_commune_nom}") class Adresses: def __init__(self, code_insee,source): @@ -142,7 +142,7 @@ def charge_numeros_ban(self,topo): fantoir = topo.code_fantoir9_vers_fantoir10.get(fantoir9) else: fantoir = None - self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'BAN',voie=voie,fantoir=fantoir,code_postal=code_postal,sous_commune_code=code_insee_ac,sous_commune_nom=nom_ac)) + self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'BAN',voie=voie,fantoir=fantoir,code_postal=code_postal,code_insee_ancienne_commune=code_insee_ac,sous_commune_nom=nom_ac)) def charge_numeros_osm(self): data = sql_get_data('charge_numeros_OSM',dict(code_insee=self.code_insee),db.bano_sources)+sql_get_data('charge_numeros_bbox_OSM',dict(code_insee=self.code_insee),db.bano_sources) @@ -151,23 +151,22 @@ def charge_numeros_osm(self): fantoir = tags.get('ref:FR:FANTOIR') if fantoir and not hp.fantoir_valide(fantoir,self.code_insee): - print("fantoir invalide",fantoir,self.code_insee) continue if provenance in (1,2,): - self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'OSM',voie=voie,place=place,fantoir=fantoir,code_postal=code_postal,sous_commune_code=code_insee_ac,sous_commune_nom=nom_ac)) + self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'OSM',voie=voie,place=place,fantoir=fantoir,code_postal=code_postal,code_insee_ancienne_commune=code_insee_ac,sous_commune_nom=nom_ac)) if provenance in (3,4,) and tags.get('name'): - self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'OSM',voie=tags['name'],place=None,fantoir=fantoir,code_postal=code_postal,sous_commune_code=code_insee_ac,sous_commune_nom=nom_ac)) + self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'OSM',voie=tags['name'],place=None,fantoir=fantoir,code_postal=code_postal,code_insee_ancienne_commune=code_insee_ac,sous_commune_nom=nom_ac)) if provenance in (5,6,) and tags.get('name') and tags.get('ref:FR:FANTOIR'): if tags['ref:FR:FANTOIR'][0:5] == self.code_insee: - self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'OSM',voie=tags['name'],place=None,fantoir=tags['ref:FR:FANTOIR'],code_postal=code_postal,sous_commune_code=code_insee_ac,sous_commune_nom=nom_ac)) + self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'OSM',voie=tags['name'],place=None,fantoir=tags['ref:FR:FANTOIR'],code_postal=code_postal,code_insee_ancienne_commune=code_insee_ac,sous_commune_nom=nom_ac)) def noms_des_adresses(self,noms): for a in self: if a.voie: - noms.add_nom(Nom(a.voie,a.fantoir,'voie',a.source,self.code_insee,a.sous_commune_code)) + noms.add_nom(Nom(a.voie,a.fantoir,'voie',a.source,self.code_insee,a.code_insee_ancienne_commune)) if a.place: - noms.add_nom(Nom(a.place,a.fantoir,'place',a.source,self.code_insee,a.sous_commune_code)) + noms.add_nom(Nom(a.place,a.fantoir,'place',a.source,self.code_insee,a.code_insee_ancienne_commune)) def enregistre(self): sql_process('suppression_adresses_commune_source',dict(code_insee=self.code_insee,source=self.source),db.bano) @@ -176,7 +175,7 @@ def enregistre(self): io_in_csv.write(a._as_csv_format_bano()+'\n') # separateur $ car on trouve des virgules dans le contenu io_in_csv.seek(0) with db.bano.cursor() as cur_insert: - cur_insert.copy_from(io_in_csv, "bano_adresses", sep='$',null='',columns=('fantoir','lon','lat','numero','nom_voie','nom_place','code_postal','code_insee','code_insee_ancienne_commune','source')) + cur_insert.copy_from(io_in_csv, "bano_adresses",null='',columns=('fantoir','lon','lat','numero','nom_voie','nom_place','code_postal','code_insee','code_insee_ancienne_commune','source')) class Topo: From fd402f84b1c8199fd0a0c998e207a216bdf9cd1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 18 Dec 2022 21:33:11 +0000 Subject: [PATCH 043/163] WIP lieux-dits --- bano/bin.py | 16 +++++++++- bano/models.py | 8 +++++ bano/rapprochement.py | 2 ++ .../{cadastre_json.py => cadastre_ld.py} | 30 +++++++++---------- bano/sql/create_table_base_bano_sources.sql | 12 +++++++- 5 files changed, 51 insertions(+), 17 deletions(-) rename bano/sources/{cadastre_json.py => cadastre_ld.py} (74%) diff --git a/bano/bin.py b/bano/bin.py index 2c35fb6..85f4f50 100644 --- a/bano/bin.py +++ b/bano/bin.py @@ -8,7 +8,7 @@ from . import setup_db from . import rapprochement from . import boite_a_outils -from .sources import topo,ban,cog +from .sources import topo,ban,cog,cadastre_ld from .constants import DEPARTEMENTS @@ -55,6 +55,20 @@ def main(): ) subparser.set_defaults(func=ban.process_ban) + subparser = subparsers.add_parser( + "charge_ld_cadastre", + help="Charge une version des lieux-dits du Cadastre JSON", + description="Charge une version des lieux-dits du Cadastre JSON", + ) + subparser.add_argument( + "departements", + type=str, + help="Départements à traiter", + nargs="*", + default=DEPARTEMENTS, + ) + subparser.set_defaults(func=cadastre_ld.process) + subparser = subparsers.add_parser( "update_bis_table", help="Identifie les indices de répétition b,t,q assimilables à bis, ter, quater", diff --git a/bano/models.py b/bano/models.py index 8fbab89..b9dcefd 100644 --- a/bano/models.py +++ b/bano/models.py @@ -178,6 +178,14 @@ def enregistre(self): cur_insert.copy_from(io_in_csv, "bano_adresses",null='',columns=('fantoir','lon','lat','numero','nom_voie','nom_place','code_postal','code_insee','code_insee_ancienne_commune','source')) +class Lieux_dits: + def __init__(self,code_insee): + self.code_insee = code_insee + self.points = set() + + def charge_points_cadastre(self): + data = sql_get_data('charge_points_cadastre') + class Topo: def __init__(self, code_insee): self.code_insee = code_insee diff --git a/bano/rapprochement.py b/bano/rapprochement.py index 53d9514..b5a77e0 100644 --- a/bano/rapprochement.py +++ b/bano/rapprochement.py @@ -16,6 +16,7 @@ def process_unitaire(source,code_insee): if source == 'OSM': noms.charge_noms_osm_hors_numeros() adresses.charge_numeros_osm() + points_nommes.charge_points() if source == 'BAN': adresses.charge_numeros_ban(topo) @@ -24,6 +25,7 @@ def process_unitaire(source,code_insee): noms.add_fantoir(topo) adresses.enregistre() noms.enregistre() + b.batch_stop_log(id_batch,True) except: b.batch_stop_log(id_batch,False) diff --git a/bano/sources/cadastre_json.py b/bano/sources/cadastre_ld.py similarity index 74% rename from bano/sources/cadastre_json.py rename to bano/sources/cadastre_ld.py index aa17030..6b02826 100644 --- a/bano/sources/cadastre_json.py +++ b/bano/sources/cadastre_ld.py @@ -15,26 +15,26 @@ from .. import helpers as hp -def process(prefixe, departements, **kwargs): +def process(departements, **kwargs): departements = set(departements) depts_inconnus = departements - set(DEPARTEMENTS) if depts_inconnus: raise ValueError(f"Départements inconnus : {depts_inconnus}") for dept in sorted(departements): print(f"Processing {dept}") - status = download(prefixe, dept) + status = download(dept) if status: - import_to_pg(prefixe, dept) - post_process(prefixe, dept) + import_to_pg(dept) + post_process(dept) -def download(prefixe, departement): - destination = get_destination(prefixe, departement) +def download(departement): + destination = get_destination(departement) headers = {} if destination.exists(): headers['If-Modified-Since'] = formatdate(destination.stat().st_mtime) - resp = requests.get(f'https://cadastre.data.gouv.fr/data/etalab-cadastre/latest/geojson/departements/{departement}/cadastre-{departement}-{prefixe}.json.gz', headers=headers) + resp = requests.get(f'https://cadastre.data.gouv.fr/data/etalab-cadastre/latest/geojson/departements/{departement}/cadastre-{departement}-lieux_dits.json.gz', headers=headers) if resp.status_code == 200: with destination.open('wb') as f: f.write(resp.content) @@ -44,15 +44,15 @@ def download(prefixe, departement): return False -def import_to_pg(prefixe, departement, **kwargs): - fichier_source = get_destination(prefixe, departement) +def import_to_pg(departement, **kwargs): + fichier_source = get_destination(departement) with gzip.open(fichier_source, mode='rt') as f: json_source = json.load(f) with db.bano_cache.cursor() as cur_insert: try: - cur_insert.execute(f"DELETE FROM {prefixe} WHERE insee_com LIKE '{departement+'%'}';COMMIT;") + cur_insert.execute(f"DELETE FROM lieux_dits WHERE insee_com LIKE '{departement+'%'}';COMMIT;") a_values = [] - str_query = f"INSERT INTO {prefixe} VALUES " + str_query = f"INSERT INTO lieux_dits VALUES " for l in json_source['features']: a_values.append(f"('{l['properties']['commune']}','{hp.escape_quotes(l['properties']['nom'])}','{l['properties']['created']}','{l['properties']['updated']}',ST_SetSRID(ST_GeomFromGeoJSON('{hp.replace_single_quotes_with_double(str(l['geometry']))}'),4326))") if a_values: @@ -61,19 +61,19 @@ def import_to_pg(prefixe, departement, **kwargs): print(e) db.bano_cache.reset() -def post_process(prefixe, departement, **kwargs): - sqlfile = Path(__file__).parent.parent / 'sql' / f'{prefixe}_post_process.sql' +def post_process(departement, **kwargs): + sqlfile = Path(__file__).parent.parent / 'sql' / 'lieux_dits_post_process.sql' if sqlfile.exists(): with open(sqlfile,'r') as fq: with db.bano_cache.cursor() as cur_post_process: str_query = fq.read().replace('__dept__',departement) cur_post_process.execute(str_query) -def get_destination(prefixe, departement): +def get_destination(departement): try: cwd = Path(os.environ['CADASTRE_CACHE_DIR']) except KeyError: raise ValueError(f"La variable CADASTRE_CACHE_DIR n'est pas définie") if not cwd.exists(): raise ValueError(f"Le répertoire {cwd} n'existe pas") - return cwd / f'cadastre-{departement}-{prefixe}.json.gz' + return cwd / f'cadastre-{departement}-lieux_dits.json.gz' diff --git a/bano/sql/create_table_base_bano_sources.sql b/bano/sql/create_table_base_bano_sources.sql index c0d0c27..4977a9a 100644 --- a/bano/sql/create_table_base_bano_sources.sql +++ b/bano/sql/create_table_base_bano_sources.sql @@ -41,12 +41,22 @@ CREATE TABLE IF NOT EXISTS ban ( -- geometrie geometry (Point, 4326) DEFAULT (ST_Point(lon,lat))); CREATE INDEX IF NOT EXISTS idx_ban_code_insee ON ban(code_insee); +CREATE TABLE IF NOT EXISTS lieux_dits ( + insee_com character(5), + nom text, + created date, + updated date, + geometrie geometry(Polygon,4326)); + +CREATE INDEX IF NOT EXISTS gidx_lieux_dits ON lieux_dits USING gist(geometrie); +CREATE INDEX IF NOT EXISTS lieux_dits_insee_com ON lieux_dits (insee_com); + CREATE TABLE IF NOT EXISTS suffixe ( geometrie geometry, code_insee character(5), libelle_suffixe character varying(100) ); CREATE INDEX IF NOT EXISTS gidx_suffixe ON suffixe USING GIST(geometrie); -CREATE INDEX IF NOT EXISTS idx_suffixe ON suffixe USING GIST(code_insee); +CREATE INDEX IF NOT EXISTS idx_suffixe ON suffixe(code_insee); GRANT SELECT ON ALL TABLES IN SCHEMA public TO public; \ No newline at end of file From cd6de24538f7f980cf2c6e8a65ce92625a273dfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Wed, 28 Dec 2022 23:08:05 +0000 Subject: [PATCH 044/163] centroides, ld, passe unique --- bano/models.py | 611 +++++++++++++++--- bano/rapprochement.py | 44 +- bano/sources/cadastre_ld.py | 57 +- .../charge_points_nommes_centroides_OSM.sql | 60 ++ ...arge_points_nommes_lieux-dits_CADASTRE.sql | 12 + bano/sql/charge_points_nommes_places_OSM.sql | 22 + bano/sql/create_table_base_bano.sql | 15 + bano/sql/create_table_base_bano_sources.sql | 10 +- bano/sql/suppression_adresses_commune.sql | 2 + .../suppression_adresses_commune_source.sql | 3 - bano/sql/suppression_noms_commune.sql | 2 + bano/sql/suppression_noms_commune_source.sql | 3 - .../sql/suppression_points_nommes_commune.sql | 2 + 13 files changed, 719 insertions(+), 124 deletions(-) create mode 100644 bano/sql/charge_points_nommes_centroides_OSM.sql create mode 100644 bano/sql/charge_points_nommes_lieux-dits_CADASTRE.sql create mode 100644 bano/sql/charge_points_nommes_places_OSM.sql create mode 100644 bano/sql/suppression_adresses_commune.sql delete mode 100644 bano/sql/suppression_adresses_commune_source.sql create mode 100644 bano/sql/suppression_noms_commune.sql delete mode 100644 bano/sql/suppression_noms_commune_source.sql create mode 100644 bano/sql/suppression_points_nommes_commune.sql diff --git a/bano/models.py b/bano/models.py index b9dcefd..7a33173 100644 --- a/bano/models.py +++ b/bano/models.py @@ -3,77 +3,203 @@ import io import json -from collections import defaultdict,OrderedDict +from collections import defaultdict, OrderedDict from . import db from . import helpers as hp + # from .sources import fantoir -from .sql import sql_get_data,sql_process +from .sql import sql_get_data, sql_process + class Nom: - def __init__(self,nom,fantoir,nature,source,code_insee,code_insee_ancienne_commune): + def __init__( + self, + nom, + fantoir, + nature, + source, + code_insee, + code_insee_ancienne_commune, + lon=None, + lat=None, + ): self.code_insee = code_insee - self.code_insee_ancienne_commune = code_insee_ancienne_commune + self.code_insee_ancienne_commune = str(code_insee_ancienne_commune) self.nom = nom self.fantoir = fantoir self.nature = nature self.source = source + self.lon = lon + self.lat = lat self.nom_normalise = hp.normalize(nom) - def __eq__(self,other): - return (self.nom == other.nom and self.fantoir == other.fantoir and self.nature == other.nature and self.source == other.source and self.code_insee == other.code_insee) - # return (self.nom == other.nom and self.fantoir == other.fantoir) + def __eq__(self, other): + return ( + self.nom == other.nom + and self.fantoir == other.fantoir + and self.nature == other.nature + and self.source == other.source + and self.code_insee == other.code_insee + and self.code_insee_ancienne_commune == other.code_insee_ancienne_commune + ) def __hash__(self): - return hash((self.nom,self.fantoir,self.source,self.nature,self.code_insee)) - # return hash((self.nom,self.fantoir)) + return hash( + ( + self.nom, + self.fantoir, + self.source, + self.nature, + self.code_insee, + self.code_insee_ancienne_commune, + ) + ) def _as_csv_format_bano(self): return f"{self.fantoir}\t{self.nom}\t{self.nature}\t{self.code_insee}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.source}" - def add_fantoir(self,topo): + def add_fantoir(self, topo): if not self.fantoir: self.fantoir = topo.topo.get(self.nom_normalise) + class Noms: - def __init__(self, code_insee,source): + def __init__(self, code_insee, source): self.code_insee = code_insee self.source = source self.triplets_nom_fantoir_source = [] + self.fantoir_par_nom_sous_commune = {"RACINE": defaultdict(list)} # On ne charge pas les noms des numeros OSM. Ils sont ajoutés via Adresses.nom_des_adresses def charge_noms_osm_hors_numeros(self): - data = sql_get_data('charge_noms_voies_lieux-dits_OSM',dict(code_insee=self.code_insee),db.bano_sources)+sql_get_data('charge_noms_voies_relation_bbox_OSM',dict(code_insee=self.code_insee),db.bano_sources)+sql_get_data('charge_noms_voies_relation_OSM',dict(code_insee=self.code_insee),db.bano_sources) - for provenance,name,tags,libelle_suffixe,ac_code_insee,ac_nom,nature in data: - if provenance in (1,2,3,4,5): - self.add_nom(Nom(name,tags.get('ref:FR:FANTOIR'),nature,'OSM',self.code_insee,ac_code_insee)) - if provenance in (6,7) and tags.get('ref:FR:FANTOIR'): - self.add_nom(Nom(name,tags['ref:FR:FANTOIR'],nature,'OSM',self.code_insee,ac_code_insee)) + data = ( + sql_get_data( + "charge_noms_voies_lieux-dits_OSM", + dict(code_insee=self.code_insee), + db.bano_sources, + ) + + sql_get_data( + "charge_noms_voies_relation_bbox_OSM", + dict(code_insee=self.code_insee), + db.bano_sources, + ) + + sql_get_data( + "charge_noms_voies_relation_OSM", + dict(code_insee=self.code_insee), + db.bano_sources, + ) + ) + for ( + provenance, + name, + tags, + libelle_suffixe, + ac_code_insee, + ac_nom, + nature, + ) in data: + if provenance in (1, 2, 3, 4, 5): + self.add_nom( + Nom( + name, + tags.get("ref:FR:FANTOIR"), + nature, + "OSM", + self.code_insee, + ac_code_insee, + ) + ) + if provenance in (6, 7) and tags.get("ref:FR:FANTOIR"): + self.add_nom( + Nom( + name, + tags["ref:FR:FANTOIR"], + nature, + "OSM", + self.code_insee, + ac_code_insee, + ) + ) # On ajoute un nom s'il n'a pas de FANTOIR ou si son FANTOIR appartient à la commune - def add_nom(self,nom=Nom): + def add_nom(self, nom=Nom): if not nom.fantoir or nom.fantoir[0:5] == self.code_insee: self.triplets_nom_fantoir_source.append(nom) - def add_fantoir(self,topo): + def add_fantoir(self, topo): for t in self.triplets_nom_fantoir_source: t.add_fantoir(topo) + def remplit_fantoir_par_nom_sous_commune(self): + for t in self.triplets_nom_fantoir_source: + if t.code_insee_ancienne_commune: + if ( + not t.code_insee_ancienne_commune + in self.fantoir_par_nom_sous_commune + ): + self.fantoir_par_nom_sous_commune[ + t.code_insee_ancienne_commune + ] = {} + self.fantoir_par_nom_sous_commune[t.code_insee_ancienne_commune][ + t.nom + ] = t.fantoir + else: + self.fantoir_par_nom_sous_commune[t.nom] = t.fantoir + + # def affiche_fantoir_par_nom_sous_commune(self): + # for branche,noms_fantoir in self.fantoir_par_nom_sous_commune.items(): + # for nom,fantoir in noms_fantoir.items(): + # print(f"{branche} - {nom} : {fantoir}") + + # # print(f"{branche} - {nom}") + # # print(f"{branche} - {nom} > {self.fantoir_par_nom_sous_commune[branche][nom]}") + def enregistre(self): - sql_process('suppression_noms_commune_source',dict(code_insee=self.code_insee,source=self.source),db.bano) + sql_process( + "suppression_noms_commune", + dict(code_insee=self.code_insee, source=self.source), + db.bano, + ) io_in_csv = io.StringIO() for t in set(self.triplets_nom_fantoir_source): if t.fantoir: - io_in_csv.write(t._as_csv_format_bano()+'\n') + io_in_csv.write(t._as_csv_format_bano() + "\n") io_in_csv.seek(0) with db.bano.cursor() as cur_insert: - cur_insert.copy_from(io_in_csv, "nom_fantoir", null='',columns=('fantoir','nom','nature','code_insee','code_insee_ancienne_commune','source')) + cur_insert.copy_from( + io_in_csv, + "nom_fantoir", + null="", + columns=( + "fantoir", + "nom", + "nature", + "code_insee", + "code_insee_ancienne_commune", + "source", + ), + ) + class Adresse: - def __init__(self, code_insee, x, y, num, source, voie=None, place=None, fantoir=None, code_postal=None, code_insee_ancienne_commune=None, sous_commune_nom=None): + def __init__( + self, + code_insee, + x, + y, + num, + source, + voie=None, + place=None, + fantoir=None, + code_postal=None, + code_insee_ancienne_commune=None, + sous_commune_nom=None, + ): self.code_insee = code_insee - self.x = round(x,6) - self.y = round(y,6) + self.x = round(x, 6) + self.y = round(y, 6) self.source = source self.numero = num self.voie = voie @@ -86,19 +212,36 @@ def __init__(self, code_insee, x, y, num, source, voie=None, place=None, fantoir self.place_normalisee = hp.format_toponyme(self.place) if self.place else None def __hash__(self): - return hash((self.code_insee,self.source,self.numero,self.voie,self.place,self.code_insee_ancienne_commune)) - - def __eq__(self,other): - return (self.code_insee == other.code_insee and self.source == other.source and self.numero == other.numero and self.voie == other.voie and self.place == other.place and self.code_insee_ancienne_commune == other.code_insee_ancienne_commune) + return hash( + ( + self.code_insee, + self.source, + self.numero, + self.voie, + self.place, + self.code_insee_ancienne_commune, + ) + ) + + def __eq__(self, other): + return ( + self.code_insee == other.code_insee + and self.source == other.source + and self.numero == other.numero + and self.voie == other.voie + and self.place == other.place + and self.code_insee_ancienne_commune == other.code_insee_ancienne_commune + ) def _as_csv_format_bano(self): return f"{self.fantoir if self.fantoir else ''}\t{self.x}\t{self.y}\t{self.numero}\t{self.voie if self.voie else ''}\t{self.place if self.place else ''}\t{self.code_postal}\t{self.code_insee}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.source}" def _as_string(self): - return (f"source : {self.source}, numero : {self.numero}, voie : {self.voie} ({self.voie_normalisee}), place : {self.place}, fantoir : {self.fantoir}, code_postal:{self.code_postal}, sous_commune : {self.code_insee_ancienne_commune} - {self.sous_commune_nom}") + return f"source : {self.source}, numero : {self.numero}, voie : {self.voie} ({self.voie_normalisee}), place : {self.place}, fantoir : {self.fantoir}, code_postal:{self.code_postal}, sous_commune : {self.code_insee_ancienne_commune} - {self.sous_commune_nom}" + class Adresses: - def __init__(self, code_insee,source): + def __init__(self, code_insee, source): self.code_insee = code_insee self.source = source self.liste = set() @@ -117,74 +260,396 @@ def __getitem__(self, key): def __iter__(self): return iter(self.liste) - def _print(self,pattern=None): + def _print(self, pattern=None): for a in self: if not pattern or pattern in a._as_string(): print(a._as_string()) - - def add_adresse(self,ad): - """ une adresses est considérée dans la commune si sans Fantoir ou avec un Fantoir de la commune""" + def add_adresse(self, ad): + """une adresses est considérée dans la commune si sans Fantoir ou avec un Fantoir de la commune""" # if (ad.fantoir == None or hp.is_valid_fantoir(ad.fantoir, self.code_insee)) and hp.is_valid_housenumber(ad.numero): self.liste.add(ad) - self.index_voie[ad.voie].append(len(self.liste)-1) + self.index_voie[ad.voie].append(len(self.liste) - 1) self.noms_de_voies.add(ad.voie) # def add_fantoir(self): # for nom in self.noms: - # fantoir = topo.topo.get(a.voie_normalisee) - - def charge_numeros_ban(self,topo): - data = sql_get_data('charge_ban_commune',dict(code_insee=self.code_insee),db.bano_sources) - for id_fantoir, numero, voie, lon, lat, code_postal, code_insee_ac, nom_ac in data: + # fantoir = topo.topo.get(a.voie_normalisee) + + def charge_numeros_ban(self, topo): + data = sql_get_data( + "charge_ban_commune", dict(code_insee=self.code_insee), db.bano_sources + ) + for ( + id_fantoir, + numero, + voie, + lon, + lat, + code_postal, + code_insee_ac, + nom_ac, + ) in data: if id_fantoir: fantoir9 = f"{id_fantoir[0:5]}{id_fantoir[6:10]}" fantoir = topo.code_fantoir9_vers_fantoir10.get(fantoir9) else: fantoir = None - self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'BAN',voie=voie,fantoir=fantoir,code_postal=code_postal,code_insee_ancienne_commune=code_insee_ac,sous_commune_nom=nom_ac)) + self.add_adresse( + Adresse( + self.code_insee, + lon, + lat, + numero, + "BAN", + voie=voie, + fantoir=fantoir, + code_postal=code_postal, + code_insee_ancienne_commune=code_insee_ac, + sous_commune_nom=nom_ac, + ) + ) def charge_numeros_osm(self): - data = sql_get_data('charge_numeros_OSM',dict(code_insee=self.code_insee),db.bano_sources)+sql_get_data('charge_numeros_bbox_OSM',dict(code_insee=self.code_insee),db.bano_sources) - - for lon, lat, provenance, numero, voie, place, tags, suffixe, code_postal, code_insee_ac, nom_ac in data: - - fantoir = tags.get('ref:FR:FANTOIR') - if fantoir and not hp.fantoir_valide(fantoir,self.code_insee): + data = sql_get_data( + "charge_numeros_OSM", dict(code_insee=self.code_insee), db.bano_sources + ) + sql_get_data( + "charge_numeros_bbox_OSM", dict(code_insee=self.code_insee), db.bano_sources + ) + + for ( + lon, + lat, + provenance, + numero, + voie, + place, + tags, + suffixe, + code_postal, + code_insee_ac, + nom_ac, + ) in data: + + fantoir = tags.get("ref:FR:FANTOIR") + if fantoir and not hp.fantoir_valide(fantoir, self.code_insee): continue - if provenance in (1,2,): - self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'OSM',voie=voie,place=place,fantoir=fantoir,code_postal=code_postal,code_insee_ancienne_commune=code_insee_ac,sous_commune_nom=nom_ac)) - if provenance in (3,4,) and tags.get('name'): - self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'OSM',voie=tags['name'],place=None,fantoir=fantoir,code_postal=code_postal,code_insee_ancienne_commune=code_insee_ac,sous_commune_nom=nom_ac)) - if provenance in (5,6,) and tags.get('name') and tags.get('ref:FR:FANTOIR'): - if tags['ref:FR:FANTOIR'][0:5] == self.code_insee: - self.add_adresse(Adresse(self.code_insee,lon,lat,numero,'OSM',voie=tags['name'],place=None,fantoir=tags['ref:FR:FANTOIR'],code_postal=code_postal,code_insee_ancienne_commune=code_insee_ac,sous_commune_nom=nom_ac)) - - def noms_des_adresses(self,noms): + if provenance in ( + 1, + 2, + ): + self.add_adresse( + Adresse( + self.code_insee, + lon, + lat, + numero, + "OSM", + voie=voie, + place=place, + fantoir=fantoir, + code_postal=code_postal, + code_insee_ancienne_commune=code_insee_ac, + sous_commune_nom=nom_ac, + ) + ) + if provenance in ( + 3, + 4, + ) and tags.get("name"): + self.add_adresse( + Adresse( + self.code_insee, + lon, + lat, + numero, + "OSM", + voie=tags["name"], + place=None, + fantoir=fantoir, + code_postal=code_postal, + code_insee_ancienne_commune=code_insee_ac, + sous_commune_nom=nom_ac, + ) + ) + if ( + provenance + in ( + 5, + 6, + ) + and tags.get("name") + and tags.get("ref:FR:FANTOIR") + ): + if tags["ref:FR:FANTOIR"][0:5] == self.code_insee: + self.add_adresse( + Adresse( + self.code_insee, + lon, + lat, + numero, + "OSM", + voie=tags["name"], + place=None, + fantoir=tags["ref:FR:FANTOIR"], + code_postal=code_postal, + code_insee_ancienne_commune=code_insee_ac, + sous_commune_nom=nom_ac, + ) + ) + + def noms_des_adresses(self, noms): for a in self: if a.voie: - noms.add_nom(Nom(a.voie,a.fantoir,'voie',a.source,self.code_insee,a.code_insee_ancienne_commune)) + noms.add_nom( + Nom( + a.voie, + a.fantoir, + "voie", + a.source, + self.code_insee, + a.code_insee_ancienne_commune, + ) + ) if a.place: - noms.add_nom(Nom(a.place,a.fantoir,'place',a.source,self.code_insee,a.code_insee_ancienne_commune)) + noms.add_nom( + Nom( + a.place, + a.fantoir, + "place", + a.source, + self.code_insee, + a.code_insee_ancienne_commune, + ) + ) + + def complete_fantoir(self, noms): + for a in self: + if a.fantoir: + continue + nom = a.voie or a.place + if a.code_insee_ancienne_commune: + a.fantoir = noms.fantoir_par_nom_sous_commune.get( + a.code_insee_ancienne_commune + ).get(nom) + else: + a.fantoir = noms.fantoir_par_nom_sous_commune.get(nom) def enregistre(self): - sql_process('suppression_adresses_commune_source',dict(code_insee=self.code_insee,source=self.source),db.bano) + sql_process( + "suppression_adresses_commune_source", + dict(code_insee=self.code_insee, source=self.source), + db.bano, + ) io_in_csv = io.StringIO() for a in self: - io_in_csv.write(a._as_csv_format_bano()+'\n') # separateur $ car on trouve des virgules dans le contenu + io_in_csv.write( + a._as_csv_format_bano() + "\n" + ) # separateur $ car on trouve des virgules dans le contenu io_in_csv.seek(0) with db.bano.cursor() as cur_insert: - cur_insert.copy_from(io_in_csv, "bano_adresses",null='',columns=('fantoir','lon','lat','numero','nom_voie','nom_place','code_postal','code_insee','code_insee_ancienne_commune','source')) + cur_insert.copy_from( + io_in_csv, + "bano_adresses", + null="", + columns=( + "fantoir", + "lon", + "lat", + "numero", + "nom_voie", + "nom_place", + "code_postal", + "code_insee", + "code_insee_ancienne_commune", + "source", + ), + ) + + +class Point_nomme: + def __init__( + self, + code_insee, + source, + nature, + lon, + lat, + nom, + fantoir=None, + code_insee_ancienne_commune=None, + ): + self.code_insee = code_insee + self.source = source + self.lon = round(lon, 6) + self.lat = round(lat, 6) + self.nature = nature + self.nom = nom + self.fantoir = fantoir + self.code_insee_ancienne_commune = code_insee_ancienne_commune + def __hash__(self): + return hash( + (self.code_insee, self.source, self.nom, self.code_insee_ancienne_commune) + ) + + def __eq__(self, other): + return ( + self.code_insee == other.code_insee + and self.source == other.source + and self.nom == other.nom + and self.code_insee_ancienne_commune == other.code_insee_ancienne_commune + ) -class Lieux_dits: - def __init__(self,code_insee): + def _as_string(self): + return f"source : {self.source}, nom : {self.nom}, nature : {self.nature}, sous_commune : {self.code_insee_ancienne_commune}" + + def _as_csv_format_bano(self): + return f"{self.fantoir if self.fantoir else ''}\t{self.nom}\t{self.code_insee}\t{self.nature}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.source}\t{self.lon}\t{self.lat}" + + +class Points_nommes: + def __init__(self, code_insee): self.code_insee = code_insee - self.points = set() + self.liste = set() + + def __getitem__(self, key): + return self.liste[key] + + def __iter__(self): + return iter(self.liste) + + def _print(self, pattern=None): + for a in self: + if not pattern or pattern in a._as_string(): + print(a._as_string()) + + def charge_points_nommes_lieux_dits_cadastre(self): + data = sql_get_data( + "charge_points_nommes_lieux-dits_CADASTRE", + dict(code_insee=self.code_insee), + db.bano_sources, + ) + for x, y, nom, code_insee_ac in data: + self.add_point_nomme( + Point_nomme( + self.code_insee, + "CADASTRE", + "lieu-dit", + x, + y, + nom, + code_insee_ancienne_commune=code_insee_ac, + ) + ) + + def charge_points_nommes_centroides_osm(self): + data = sql_get_data( + "charge_points_nommes_centroides_OSM", + dict(code_insee=self.code_insee), + db.bano_sources, + ) + for x, y, nom, code_insee_ac, fantoir in data: + self.add_point_nomme( + Point_nomme( + self.code_insee, + "OSM", + "centroide", + x, + y, + nom, + code_insee_ancienne_commune=code_insee_ac, + fantoir=fantoir, + ) + ) + + def charge_points_nommes_place_osm(self): + data = sql_get_data( + "charge_points_nommes_places_OSM", + dict(code_insee=self.code_insee), + db.bano_sources, + ) + for x, y, nom, code_insee_ac, fantoir in data: + self.add_point_nomme( + Point_nomme( + self.code_insee, + "OSM", + "place", + x, + y, + nom, + code_insee_ancienne_commune=code_insee_ac, + fantoir=fantoir, + ) + ) + + def add_point_nomme(self, ld): + self.liste.add(ld) + + def noms_des_points_nommes(self, noms): + for a in self: + if a.source == "CADASTRE": + noms.add_nom( + Nom( + hp.format_toponyme(a.nom), + a.fantoir, + a.nature, + a.source, + self.code_insee, + a.code_insee_ancienne_commune, + ) + ) + if a.source == "OSM": + noms.add_nom( + Nom( + a.nom, + a.fantoir, + a.nature, + a.source, + self.code_insee, + a.code_insee_ancienne_commune, + ) + ) + + def complete_fantoir(self, noms): + for a in self: + if a.fantoir: + continue + if a.code_insee_ancienne_commune: + a.fantoir = noms.fantoir_par_nom_sous_commune.get( + a.code_insee_ancienne_commune + ).get(a.nom) + else: + a.fantoir = noms.fantoir_par_nom_sous_commune.get(a.nom) + + def enregistre(self): + sql_process( + "suppression_points_nommes_commune", + dict(code_insee=self.code_insee), + db.bano, + ) + io_in_csv = io.StringIO() + for t in self: + io_in_csv.write(t._as_csv_format_bano() + "\n") + io_in_csv.seek(0) + with db.bano.cursor() as cur_insert: + cur_insert.copy_from( + io_in_csv, + "bano_points_nommes", + null="", + columns=( + "fantoir", + "nom", + "code_insee", + "nature", + "code_insee_ancienne_commune", + "source", + "lon", + "lat", + ), + ) - def charge_points_cadastre(self): - data = sql_get_data('charge_points_cadastre') class Topo: def __init__(self, code_insee): @@ -205,15 +670,17 @@ def __iter__(self): def __getitem__(self, key): return self.topo[key] - def _print(self,pattern=None): - for k,v in self: + def _print(self, pattern=None): + for k, v in self: if not pattern or pattern in v: print(f"{k} : {v}") def charge_topo(self): - data = sql_get_data('charge_topo_commune',dict(code_insee=self.code_insee),db.bano_sources) - for fantoir,nom in data: - nom = hp.normalize(' '.join(nom.replace('-',' ').split())) + data = sql_get_data( + "charge_topo_commune", dict(code_insee=self.code_insee), db.bano_sources + ) + for fantoir, nom in data: + nom = hp.normalize(" ".join(nom.replace("-", " ").split())) self.topo[fantoir] = nom self.code_fantoir9_vers_fantoir10[fantoir[0:9]] = fantoir self.topo[nom] = fantoir diff --git a/bano/rapprochement.py b/bano/rapprochement.py index b5a77e0..a565930 100644 --- a/bano/rapprochement.py +++ b/bano/rapprochement.py @@ -3,40 +3,48 @@ from . import db_helpers as h from . import batch as b -from .models import Noms, Adresses, Topo +from .models import Noms, Adresses, Topo, Points_nommes -def process_unitaire(source,code_insee): - id_batch = b.batch_start_log('rapprochement', source, code_insee) +def process_unitaire(source, code_insee): + id_batch = b.batch_start_log("rapprochement", source, code_insee) try: topo = Topo(code_insee) - adresses = Adresses(code_insee,source) - noms = Noms(code_insee,source) + adresses = Adresses(code_insee, source) + points_nommes = Points_nommes(code_insee) + noms = Noms(code_insee, source) - if source == 'OSM': - noms.charge_noms_osm_hors_numeros() - adresses.charge_numeros_osm() - points_nommes.charge_points() - - if source == 'BAN': - adresses.charge_numeros_ban(topo) + noms.charge_noms_osm_hors_numeros() + adresses.charge_numeros_osm() + adresses.charge_numeros_ban(topo) + points_nommes.charge_points_nommes_lieux_dits_cadastre() + points_nommes.charge_points_nommes_centroides_osm() + points_nommes.charge_points_nommes_place_osm() adresses.noms_des_adresses(noms) + points_nommes.noms_des_points_nommes(noms) + noms.add_fantoir(topo) + noms.remplit_fantoir_par_nom_sous_commune() + + points_nommes.complete_fantoir(noms) + adresses.complete_fantoir(noms) + adresses.enregistre() noms.enregistre() + points_nommes.enregistre() - b.batch_stop_log(id_batch,True) + b.batch_stop_log(id_batch, True) except: - b.batch_stop_log(id_batch,False) + b.batch_stop_log(id_batch, False) -def process(source,code_insee,dept,**kwargs): +def process(source, code_insee, dept, **kwargs): if dept: liste_insee = h.liste_communes_par_dept(dept) else: - liste_insee = [(code_insee,h.nom_commune(code_insee))] - for code_insee,nom in liste_insee: + liste_insee = [(code_insee, h.nom_commune(code_insee))] + for code_insee, nom in liste_insee: if dept: print(f"{code_insee} - {nom}") - process_unitaire(source,code_insee) + process_unitaire(source, code_insee) diff --git a/bano/sources/cadastre_ld.py b/bano/sources/cadastre_ld.py index 6b02826..8e902db 100644 --- a/bano/sources/cadastre_ld.py +++ b/bano/sources/cadastre_ld.py @@ -17,28 +17,31 @@ def process(departements, **kwargs): departements = set(departements) - depts_inconnus = departements - set(DEPARTEMENTS) + depts_inconnus = departements - set(DEPARTEMENTS) if depts_inconnus: raise ValueError(f"Départements inconnus : {depts_inconnus}") for dept in sorted(departements): print(f"Processing {dept}") status = download(dept) - if status: - import_to_pg(dept) - post_process(dept) - + # if status: + import_to_pg(dept) + post_process(dept) + def download(departement): destination = get_destination(departement) headers = {} if destination.exists(): - headers['If-Modified-Since'] = formatdate(destination.stat().st_mtime) + headers["If-Modified-Since"] = formatdate(destination.stat().st_mtime) - resp = requests.get(f'https://cadastre.data.gouv.fr/data/etalab-cadastre/latest/geojson/departements/{departement}/cadastre-{departement}-lieux_dits.json.gz', headers=headers) + resp = requests.get( + f"https://cadastre.data.gouv.fr/data/etalab-cadastre/latest/geojson/departements/{departement}/cadastre-{departement}-lieux_dits.json.gz", + headers=headers, + ) if resp.status_code == 200: - with destination.open('wb') as f: + with destination.open("wb") as f: f.write(resp.content) - mtime = parsedate_to_datetime(resp.headers['Last-Modified']).timestamp() + mtime = parsedate_to_datetime(resp.headers["Last-Modified"]).timestamp() os.utime(destination, (mtime, mtime)) return True return False @@ -46,34 +49,40 @@ def download(departement): def import_to_pg(departement, **kwargs): fichier_source = get_destination(departement) - with gzip.open(fichier_source, mode='rt') as f: + with gzip.open(fichier_source, mode="rt") as f: json_source = json.load(f) - with db.bano_cache.cursor() as cur_insert: + with db.bano_sources.cursor() as cur_insert: try: - cur_insert.execute(f"DELETE FROM lieux_dits WHERE insee_com LIKE '{departement+'%'}';COMMIT;") + cur_insert.execute( + f"DELETE FROM lieux_dits WHERE code_insee LIKE '{departement+'%'}';COMMIT;" + ) a_values = [] str_query = f"INSERT INTO lieux_dits VALUES " - for l in json_source['features']: - a_values.append(f"('{l['properties']['commune']}','{hp.escape_quotes(l['properties']['nom'])}','{l['properties']['created']}','{l['properties']['updated']}',ST_SetSRID(ST_GeomFromGeoJSON('{hp.replace_single_quotes_with_double(str(l['geometry']))}'),4326))") + for l in json_source["features"]: + a_values.append( + f"('{l['properties']['commune']}','{hp.escape_quotes(l['properties']['nom'])}','{l['properties']['created']}','{l['properties']['updated']}',ST_SetSRID(ST_GeomFromGeoJSON('{hp.replace_single_quotes_with_double(str(l['geometry']))}'),4326))" + ) if a_values: - cur_insert.execute(str_query+','.join(a_values)+';COMMIT;') + cur_insert.execute(str_query + ",".join(a_values) + ";COMMIT;") except psycopg2.DataError as e: print(e) - db.bano_cache.reset() - + db.bano_sources.reset() + + def post_process(departement, **kwargs): - sqlfile = Path(__file__).parent.parent / 'sql' / 'lieux_dits_post_process.sql' + sqlfile = Path(__file__).parent.parent / "sql" / "lieux_dits_post_process.sql" if sqlfile.exists(): - with open(sqlfile,'r') as fq: - with db.bano_cache.cursor() as cur_post_process: - str_query = fq.read().replace('__dept__',departement) + with open(sqlfile, "r") as fq: + with db.bano_sources.cursor() as cur_post_process: + str_query = fq.read().replace("__dept__", departement) cur_post_process.execute(str_query) - + + def get_destination(departement): try: - cwd = Path(os.environ['CADASTRE_CACHE_DIR']) + cwd = Path(os.environ["CADASTRE_CACHE_DIR"]) except KeyError: raise ValueError(f"La variable CADASTRE_CACHE_DIR n'est pas définie") if not cwd.exists(): raise ValueError(f"Le répertoire {cwd} n'existe pas") - return cwd / f'cadastre-{departement}-lieux_dits.json.gz' + return cwd / f"cadastre-{departement}-lieux_dits.json.gz" diff --git a/bano/sql/charge_points_nommes_centroides_OSM.sql b/bano/sql/charge_points_nommes_centroides_OSM.sql new file mode 100644 index 0000000..418fc9b --- /dev/null +++ b/bano/sql/charge_points_nommes_centroides_OSM.sql @@ -0,0 +1,60 @@ +WITH +lignes_brutes +AS +(SELECT l.way, + unnest(array[l.name,l.tags->'alt_name',l.tags->'old_name']) AS name, + a9.code_insee as insee_ac, + unnest(array["ref:FR:FANTOIR","ref:FR:FANTOIR:left","ref:FR:FANTOIR:right"]) AS fantoir, + ST_Within(l.way,p.way)::integer as within +FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p +JOIN planet_osm_line l +ON ST_Intersects(l.way, p.way) +LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 WHERE insee_a8 = '__code_insee__') a9 +ON ST_Intersects(l.way, a9.geometrie) +WHERE (l.highway != '' OR + l.waterway = 'dam') AND + l.name != ''), +lignes_noms +AS +(SELECT * +FROM lignes_brutes +WHERE name IS NOT NULL AND + (fantoir LIKE '__code_insee__%' OR fantoir = '')), +lignes_noms_rang +AS +(SELECT *, + RANK() OVER(PARTITION BY name,insee_ac ORDER BY within DESC) rang +FROM lignes_noms), +lignes_agregees +AS +(SELECT ST_LineMerge(ST_Collect(way)) way, + name, + insee_ac, + fantoir +FROM lignes_noms_rang +WHERE rang = 1 +GROUP BY 2,3,4), +centroide_lignes_agregees +AS +(SELECT ST_Centroid(ST_LineMerge(ST_Collect(way))) way, + name, + insee_ac, + fantoir +FROM lignes_noms_rang +WHERE rang = 1 +GROUP BY 2,3,4), +resultat +AS +(SELECT ST_SetSRID(ST_ClosestPoint(lignes_agregees.way,centroide_lignes_agregees.way),4326) point, + lignes_agregees.name, + lignes_agregees.insee_ac, + lignes_agregees.fantoir +FROM lignes_agregees +JOIN centroide_lignes_agregees +USING (name,insee_ac)) +SELECT ST_x(point), + ST_y(point), + name, + insee_ac, + fantoir +FROM resultat; diff --git a/bano/sql/charge_points_nommes_lieux-dits_CADASTRE.sql b/bano/sql/charge_points_nommes_lieux-dits_CADASTRE.sql new file mode 100644 index 0000000..160e6bc --- /dev/null +++ b/bano/sql/charge_points_nommes_lieux-dits_CADASTRE.sql @@ -0,0 +1,12 @@ +SELECT ST_X(geom_centroid), + ST_Y(geom_centroid), + regexp_replace(regexp_replace(ld.nom,' ',' ','g'),' ',' ','g') AS nom, + a9.code_insee +FROM (SELECT * + FROM lieux_dits + WHERE code_insee = '__code_insee__' AND + nom IS NOT NULL) ld +LEFT OUTER JOIN suffixe h +ON ST_Intersects(ld.geom_centroid, h.geometrie) +LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 where insee_a8 = '__code_insee__') a9 +ON ST_Contains(a9.geometrie,ld.geom_centroid); \ No newline at end of file diff --git a/bano/sql/charge_points_nommes_places_OSM.sql b/bano/sql/charge_points_nommes_places_OSM.sql new file mode 100644 index 0000000..02cf7ce --- /dev/null +++ b/bano/sql/charge_points_nommes_places_OSM.sql @@ -0,0 +1,22 @@ +WITH +pts +AS +(SELECT pt.way, + UNNEST(ARRAY[pt.name,pt.tags->'alt_name',pt.tags->'old_name']) as name, + tags, + place, + a9.code_insee AS insee_ac, + "ref:FR:FANTOIR" AS fantoir +FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p +JOIN (SELECT * FROM planet_osm_point WHERE place != '' AND name != '') pt +ON pt.way && p.way AND + ST_Intersects(pt.way, p.way) +LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 WHERE insee_a8 = '__code_insee__') a9 +ON ST_Intersects(pt.way, a9.geometrie)) +SELECT ST_x(way), + ST_y(way), + name, + insee_ac, + fantoir +FROM pts +WHERE name IS NOT NULL; diff --git a/bano/sql/create_table_base_bano.sql b/bano/sql/create_table_base_bano.sql index 91eaf69..6c513c0 100644 --- a/bano/sql/create_table_base_bano.sql +++ b/bano/sql/create_table_base_bano.sql @@ -16,6 +16,21 @@ CREATE TABLE IF NOT EXISTS bano_adresses ( CREATE INDEX IF NOT EXISTS gidx_bano_adresses ON bano_adresses USING GIST(geometrie); CREATE INDEX IF NOT EXISTS idx_bano_adresses_code_insee ON bano_adresses (code_insee); +CREATE TABLE IF NOT EXISTS bano_points_nommes ( + fantoir text, + nom text, + code_insee text, + nature text, + code_insee_ancienne_commune text, + nom_ancienne_commune text, + source text, + lon float, + lat float, + geometrie geometry (Point, 4326) GENERATED ALWAYS AS (ST_Point(lon,lat)) STORED); + +CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_code_insee ON bano_points_nommes (code_insee); +CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_code_insee_source ON bano_points_nommes (code_insee,source); + CREATE TABLE IF NOT EXISTS nom_fantoir ( fantoir text, nom text, diff --git a/bano/sql/create_table_base_bano_sources.sql b/bano/sql/create_table_base_bano_sources.sql index 4977a9a..293a028 100644 --- a/bano/sql/create_table_base_bano_sources.sql +++ b/bano/sql/create_table_base_bano_sources.sql @@ -42,14 +42,16 @@ CREATE TABLE IF NOT EXISTS ban ( CREATE INDEX IF NOT EXISTS idx_ban_code_insee ON ban(code_insee); CREATE TABLE IF NOT EXISTS lieux_dits ( - insee_com character(5), + code_insee character(5), nom text, created date, updated date, - geometrie geometry(Polygon,4326)); + geometrie geometry(Polygon,4326), + geom_centroid geometry (Point, 4326) GENERATED ALWAYS AS (ST_Centroid(geometrie)) STORED); +); -CREATE INDEX IF NOT EXISTS gidx_lieux_dits ON lieux_dits USING gist(geometrie); -CREATE INDEX IF NOT EXISTS lieux_dits_insee_com ON lieux_dits (insee_com); +CREATE INDEX IF NOT EXISTS gidx__centroid_lieux_dits ON lieux_dits USING gist(geom_centroid); +CREATE INDEX IF NOT EXISTS lieux_dits_code_insee ON lieux_dits (code_insee); CREATE TABLE IF NOT EXISTS suffixe ( geometrie geometry, diff --git a/bano/sql/suppression_adresses_commune.sql b/bano/sql/suppression_adresses_commune.sql new file mode 100644 index 0000000..3e94ff0 --- /dev/null +++ b/bano/sql/suppression_adresses_commune.sql @@ -0,0 +1,2 @@ +DELETE FROM bano_adresses +WHERE code_insee = '__code_insee__'; \ No newline at end of file diff --git a/bano/sql/suppression_adresses_commune_source.sql b/bano/sql/suppression_adresses_commune_source.sql deleted file mode 100644 index f842f23..0000000 --- a/bano/sql/suppression_adresses_commune_source.sql +++ /dev/null @@ -1,3 +0,0 @@ -DELETE FROM bano_adresses -WHERE code_insee = '__code_insee__' AND - source = '__source__'; \ No newline at end of file diff --git a/bano/sql/suppression_noms_commune.sql b/bano/sql/suppression_noms_commune.sql new file mode 100644 index 0000000..e3a5023 --- /dev/null +++ b/bano/sql/suppression_noms_commune.sql @@ -0,0 +1,2 @@ +DELETE FROM nom_fantoir +WHERE code_insee = '__code_insee__'; \ No newline at end of file diff --git a/bano/sql/suppression_noms_commune_source.sql b/bano/sql/suppression_noms_commune_source.sql deleted file mode 100644 index 76a79e9..0000000 --- a/bano/sql/suppression_noms_commune_source.sql +++ /dev/null @@ -1,3 +0,0 @@ -DELETE FROM nom_fantoir -WHERE code_insee = '__code_insee__' AND - source = '__source__'; \ No newline at end of file diff --git a/bano/sql/suppression_points_nommes_commune.sql b/bano/sql/suppression_points_nommes_commune.sql new file mode 100644 index 0000000..ac15428 --- /dev/null +++ b/bano/sql/suppression_points_nommes_commune.sql @@ -0,0 +1,2 @@ +DELETE FROM bano_points_nommes +WHERE code_insee = '__code_insee__'; \ No newline at end of file From 8b3812660a76164090155c450c59c25c35f721c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sat, 31 Dec 2022 15:38:31 +0000 Subject: [PATCH 045/163] =?UTF-8?q?m=C3=A9nage?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bano/db_helpers.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/bano/db_helpers.py b/bano/db_helpers.py index c449412..6643319 100644 --- a/bano/db_helpers.py +++ b/bano/db_helpers.py @@ -6,12 +6,10 @@ def liste_communes_par_dept(dept): - return sql_get_data('liste_communes_par_dept',dict(dept=dept),db.bano) + return sql_get_data("liste_communes_par_dept", dict(dept=dept), db.bano) -def nom_commune(code_insee): - return sql_get_data('nom_commune_par_code_insee',dict(code_insee=code_insee),db.bano)[0][0] -def get_cadastre_format(insee_com): - with db.bano.cursor() as conn : - conn.execute(f"SELECT format_cadastre FROM code_cadastre WHERE insee_com = '{insee_com}';") - return conn.fetchone()[0] +def nom_commune(code_insee): + return sql_get_data( + "nom_commune_par_code_insee", dict(code_insee=code_insee), db.bano + )[0][0] From f3a8ca5aff333550115b590360737ac578b26f5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sat, 31 Dec 2022 15:40:38 +0000 Subject: [PATCH 046/163] =?UTF-8?q?points=20nomm=C3=A9s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bano/models.py | 8 ++++---- bano/sql/charge_points_nommes_centroides_OSM.sql | 11 +++++++---- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/bano/models.py b/bano/models.py index 7a33173..da7b246 100644 --- a/bano/models.py +++ b/bano/models.py @@ -25,7 +25,7 @@ def __init__( lat=None, ): self.code_insee = code_insee - self.code_insee_ancienne_commune = str(code_insee_ancienne_commune) + self.code_insee_ancienne_commune = code_insee_ancienne_commune self.nom = nom self.fantoir = fantoir self.nature = nature @@ -438,8 +438,8 @@ def complete_fantoir(self, noms): def enregistre(self): sql_process( - "suppression_adresses_commune_source", - dict(code_insee=self.code_insee, source=self.source), + "suppression_adresses_commune", + dict(code_insee=self.code_insee), db.bano, ) io_in_csv = io.StringIO() @@ -614,7 +614,7 @@ def noms_des_points_nommes(self, noms): def complete_fantoir(self, noms): for a in self: - if a.fantoir: + if a.fantoir and a.fantoir[0:5] == self.code_insee: continue if a.code_insee_ancienne_commune: a.fantoir = noms.fantoir_par_nom_sous_commune.get( diff --git a/bano/sql/charge_points_nommes_centroides_OSM.sql b/bano/sql/charge_points_nommes_centroides_OSM.sql index 418fc9b..7d034af 100644 --- a/bano/sql/charge_points_nommes_centroides_OSM.sql +++ b/bano/sql/charge_points_nommes_centroides_OSM.sql @@ -3,7 +3,8 @@ lignes_brutes AS (SELECT l.way, unnest(array[l.name,l.tags->'alt_name',l.tags->'old_name']) AS name, - a9.code_insee as insee_ac, + COALESCE(a9.code_insee,'xxxxx') as insee_jointure, + a9.code_insee insee_ac, unnest(array["ref:FR:FANTOIR","ref:FR:FANTOIR:left","ref:FR:FANTOIR:right"]) AS fantoir, ST_Within(l.way,p.way)::integer as within FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p @@ -30,19 +31,21 @@ AS (SELECT ST_LineMerge(ST_Collect(way)) way, name, insee_ac, + insee_jointure, fantoir FROM lignes_noms_rang WHERE rang = 1 -GROUP BY 2,3,4), +GROUP BY 2,3,4,5), centroide_lignes_agregees AS (SELECT ST_Centroid(ST_LineMerge(ST_Collect(way))) way, name, insee_ac, + insee_jointure, fantoir FROM lignes_noms_rang WHERE rang = 1 -GROUP BY 2,3,4), +GROUP BY 2,3,4,5), resultat AS (SELECT ST_SetSRID(ST_ClosestPoint(lignes_agregees.way,centroide_lignes_agregees.way),4326) point, @@ -51,7 +54,7 @@ AS lignes_agregees.fantoir FROM lignes_agregees JOIN centroide_lignes_agregees -USING (name,insee_ac)) +USING (name,insee_jointure)) SELECT ST_x(point), ST_y(point), name, From 7ad30347991c5c3dee851a41e9fbf7f2a359687e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sat, 31 Dec 2022 16:17:37 +0000 Subject: [PATCH 047/163] Suppression de code_cadastre --- bano/sources/cadastre_gouv.py | 47 ----------------------------------- 1 file changed, 47 deletions(-) delete mode 100755 bano/sources/cadastre_gouv.py diff --git a/bano/sources/cadastre_gouv.py b/bano/sources/cadastre_gouv.py deleted file mode 100755 index c29530d..0000000 --- a/bano/sources/cadastre_gouv.py +++ /dev/null @@ -1,47 +0,0 @@ -#!/usr/bin/env python - -import os -from pathlib import Path -import time - -from bs4 import BeautifulSoup -import requests - -from ..constants import DEPARTEMENTS, DEPARTEMENTS_3CHAR -from .. import db - - -def read_from_cadastre_gouv_to_file(): - with requests.Session() as s: - reponse = s.get('https://www.cadastre.gouv.fr/scpc/rechercherPlan.do') - token = reponse.text.split('CSRF_TOKEN=')[1].split('"')[0] - - with open(Path(os.environ['DATA_DIR']) / 'code_cadastre.csv', 'w') as output: - for index, dept in enumerate(DEPARTEMENTS_3CHAR): - time.sleep(1) - reponse = BeautifulSoup(s.get(f"https://www.cadastre.gouv.fr/scpc/listerCommune.do?CSRF_TOKEN={token}&codeDepartement={dept}&libelle=&keepVolatileSession=&offset=5000").text, "lxml") - for e in reponse.find_all(attrs={"class": "parcelles"}): - y = e.find(title="Ajouter au panier") - if y is None: - continue - - split = y.get('onclick').split("'") - code_commune = split[1] - format_type = split[3] - - commune_cp = e.strong.string - nom_commune = commune_cp[:-9] - cp = commune_cp[-7:-2] - output.write(f'{DEPARTEMENTS[index]},{dept},{nom_commune},{cp},{code_commune},{format_type}\n') - - -def update_table_in_db(): - # print((Path(__file__)).parent.parent / 'sql' / 'load_code_cadastre.sql') - with open((Path(__file__)).parent.parent / 'sql' / 'load_code_cadastre.sql', 'r') as sqlfile: - cur_update = db.bano.cursor() - cur_update.execute(sqlfile.read().replace('./code_cadastre.csv', str(Path(os.environ['DATA_DIR']) / 'code_cadastre.csv'))) - - -def process(**kwargs): - read_from_cadastre_gouv_to_file() - update_table_in_db() From c33b533acc6a1abef9f430ecd1292979c88064dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 1 Jan 2023 17:16:32 +0000 Subject: [PATCH 048/163] plus de rapprochement par source --- bano/bin.py | 14 ++++++-------- bano/models.py | 20 +++++++++++++------- bano/rapprochement.py | 13 ++++++------- 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/bano/bin.py b/bano/bin.py index 85f4f50..31f10da 100644 --- a/bano/bin.py +++ b/bano/bin.py @@ -8,7 +8,7 @@ from . import setup_db from . import rapprochement from . import boite_a_outils -from .sources import topo,ban,cog,cadastre_ld +from .sources import topo, ban, cog, cadastre_ld from .constants import DEPARTEMENTS @@ -98,17 +98,15 @@ def main(): "rapprochement", help="Effectue l'appariement entre sources OSM ou BAN et TOPO", ) - subparser.add_argument( - "--source", - choices=["OSM", "BAN"], - type=str, - help="Source des données à traiter", - ) group = subparser.add_mutually_exclusive_group(required=True) group.add_argument( "--code_insee", type=str, help="Code INSEE de la commune à traiter" ) - group.add_argument("--dept", type=str, help="Département à traiter (toutes les communes du dept sont traitées une par une)") + group.add_argument( + "--dept", + type=str, + help="Département à traiter (toutes les communes du dept sont traitées une par une)", + ) subparser.set_defaults(func=rapprochement.process) args = parser.parse_args() diff --git a/bano/models.py b/bano/models.py index da7b246..6bdceb3 100644 --- a/bano/models.py +++ b/bano/models.py @@ -65,12 +65,19 @@ def add_fantoir(self, topo): class Noms: - def __init__(self, code_insee, source): + def __init__(self, code_insee): self.code_insee = code_insee - self.source = source self.triplets_nom_fantoir_source = [] self.fantoir_par_nom_sous_commune = {"RACINE": defaultdict(list)} + def __iter__(self): + return iter(self.triplets_nom_fantoir_source) + + def _print(self, pattern=None): + for a in self: + if not pattern or pattern in a._as_csv_format_bano(): + print(a._as_csv_format_bano()) + # On ne charge pas les noms des numeros OSM. Ils sont ajoutés via Adresses.nom_des_adresses def charge_noms_osm_hors_numeros(self): data = ( @@ -158,7 +165,7 @@ def remplit_fantoir_par_nom_sous_commune(self): def enregistre(self): sql_process( "suppression_noms_commune", - dict(code_insee=self.code_insee, source=self.source), + dict(code_insee=self.code_insee), db.bano, ) io_in_csv = io.StringIO() @@ -241,9 +248,8 @@ def _as_string(self): class Adresses: - def __init__(self, code_insee, source): + def __init__(self, code_insee): self.code_insee = code_insee - self.source = source self.liste = set() self.index_voie = defaultdict(list) self.noms_de_voies = set() @@ -539,7 +545,7 @@ def charge_points_nommes_lieux_dits_cadastre(self): "lieu-dit", x, y, - nom, + hp.format_toponyme(nom), code_insee_ancienne_commune=code_insee_ac, ) ) @@ -592,7 +598,7 @@ def noms_des_points_nommes(self, noms): if a.source == "CADASTRE": noms.add_nom( Nom( - hp.format_toponyme(a.nom), + a.nom, a.fantoir, a.nature, a.source, diff --git a/bano/rapprochement.py b/bano/rapprochement.py index a565930..ec22c7d 100644 --- a/bano/rapprochement.py +++ b/bano/rapprochement.py @@ -6,13 +6,13 @@ from .models import Noms, Adresses, Topo, Points_nommes -def process_unitaire(source, code_insee): - id_batch = b.batch_start_log("rapprochement", source, code_insee) +def process_unitaire(code_insee): + id_batch = b.batch_start_log("rapprochement", 'toutes', code_insee) try: topo = Topo(code_insee) - adresses = Adresses(code_insee, source) + adresses = Adresses(code_insee) points_nommes = Points_nommes(code_insee) - noms = Noms(code_insee, source) + noms = Noms(code_insee) noms.charge_noms_osm_hors_numeros() adresses.charge_numeros_osm() @@ -26,7 +26,6 @@ def process_unitaire(source, code_insee): noms.add_fantoir(topo) noms.remplit_fantoir_par_nom_sous_commune() - points_nommes.complete_fantoir(noms) adresses.complete_fantoir(noms) @@ -39,7 +38,7 @@ def process_unitaire(source, code_insee): b.batch_stop_log(id_batch, False) -def process(source, code_insee, dept, **kwargs): +def process(code_insee, dept, **kwargs): if dept: liste_insee = h.liste_communes_par_dept(dept) else: @@ -47,4 +46,4 @@ def process(source, code_insee, dept, **kwargs): for code_insee, nom in liste_insee: if dept: print(f"{code_insee} - {nom}") - process_unitaire(source, code_insee) + process_unitaire(code_insee) From 3af7da3d9fd670411a92b1dde6f17b5543ca56f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 1 Jan 2023 17:17:53 +0000 Subject: [PATCH 049/163] centroides complementaires --- .../charge_points_nommes_centroides_OSM.sql | 40 ++++++++++++++++++- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/bano/sql/charge_points_nommes_centroides_OSM.sql b/bano/sql/charge_points_nommes_centroides_OSM.sql index 7d034af..a3442e8 100644 --- a/bano/sql/charge_points_nommes_centroides_OSM.sql +++ b/bano/sql/charge_points_nommes_centroides_OSM.sql @@ -54,10 +54,46 @@ AS lignes_agregees.fantoir FROM lignes_agregees JOIN centroide_lignes_agregees -USING (name,insee_jointure)) +USING (name,insee_jointure)), +complement +AS +(SELECT c.*, + a9.code_insee AS insee_ac +FROM (SELECT pl.way point, + pl.name, + pl."ref:FR:FANTOIR" fantoir + FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p + JOIN planet_osm_point pl + ON pl.way && p.way AND + ST_Intersects(pl.way, p.way) + WHERE (pl."ref:FR:FANTOIR" != '' OR + pl.junction != '') AND + pl.name != '' + UNION + SELECT ST_Centroid(pl.way), + pl.name, + pl."ref:FR:FANTOIR" f + FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p + JOIN planet_osm_polygon pl + ON pl.way && p.way AND + ST_Intersects(pl.way, p.way) + WHERE ( pl.highway||pl."ref:FR:FANTOIR" != '' OR + pl.landuse = 'residential' OR + pl.place = 'square' OR + pl.amenity = 'school') AND + pl.name != '')c +LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 WHERE insee_a8 = '__code_insee__') a9 +ON ST_Intersects(c.point, a9.geometrie)) +SELECT ST_x(point), + ST_y(point), + name, + insee_ac, + fantoir +FROM resultat +UNION ALL SELECT ST_x(point), ST_y(point), name, insee_ac, fantoir -FROM resultat; +FROM complement; From 4094395601bde3a2daa134aa036dcf365dc69f84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 1 Jan 2023 20:55:29 +0000 Subject: [PATCH 050/163] Base unique - creation de la BD --- bano/core_place.py | 109 -------------------- bano/setup_db.py | 10 +- bano/sql/create_base.sql | 2 + bano/sql/create_table_base_bano_cibles.sql | 46 +++++++++ bano/sql/create_table_base_bano_cog.sql | 59 +++++++++++ bano/sql/create_table_base_bano_outils.sql | 27 +++++ bano/sql/create_table_base_bano_sources.sql | 6 +- create_base.sh | 12 +-- init_base.sh | 5 + sql/create_base.sql | 2 + 10 files changed, 155 insertions(+), 123 deletions(-) delete mode 100755 bano/core_place.py create mode 100644 bano/sql/create_base.sql create mode 100644 bano/sql/create_table_base_bano_cibles.sql create mode 100644 bano/sql/create_table_base_bano_cog.sql create mode 100644 bano/sql/create_table_base_bano_outils.sql create mode 100755 init_base.sh create mode 100644 sql/create_base.sql diff --git a/bano/core_place.py b/bano/core_place.py deleted file mode 100755 index ee71dfb..0000000 --- a/bano/core_place.py +++ /dev/null @@ -1,109 +0,0 @@ -#!/usr/bin/env python -# coding: UTF-8 - -# Place : ID = Fantoir quand présent, sinon nom normalisé -# 1- chargement Fantoir -# 2- chargement cadastre, rapprochement Fantoir, géométrie Cadastre calculée des parcelles -# 3- chargement OSM, rapprochement Fantoir, géométrie OSM -# 4- - -import sys -import time -import os - -from . import constants, db -from . import helpers as hp -from . import db_helpers as dbhp -from .sources import fantoir -from .models import Fantoir, Cadastre, Osm, Place, Places - -from .outils_de_gestion import batch_start_log, batch_end_log - -def get_data_from_pg(query_name,insee_com): - with db.bano_cache.cursor() as conn: - with open(os.path.join(os.path.dirname(os.path.abspath(__file__)),'sql/{:s}.sql'.format(query_name)),'r') as fq: - conn.execute(fq.read().replace('__com__',insee_com)) - res = [] - for l in conn : - res.append(list(l)) - return res - - -def load_fantoir(liste_fantoir): - for c in liste_fantoir: - places.add_place(Place(0,0,'',fantoir.mapping.fantoir[c]['nom'],'','',c,fantoir.mapping.fantoir[c]['ld_bati'], c[0:5])) - - -def load_cadastre(code_insee): - data = get_data_from_pg('cadastre_2_place',code_insee) - for lon, lat, name, fantoir, *others in data: - targets = places.match_name(name,'FANTOIR') - if targets: - for t in targets: - places.p[t].update_cadastre(lon,lat,name) - else: - places.add_place(Place(lon,lat,'','',name,'',fantoir,-1,code_insee)) - - - -def load_osm(code_insee): - data = get_data_from_pg('place_insee',code_insee) - for lon, lat, place, name, fantoir, ld_bati, tags, *others in data: - targets_fantoir = places.match_fantoir(fantoir) - targets_name = places.match_name(name,'FANTOIR') - if targets_fantoir: - for t in targets_fantoir: - places.p[t].update_osm(lon, lat, place, name, fantoir if hp.is_valid_fantoir(fantoir,code_insee) else '') - elif targets_name: - for t in targets_name: - places.p[t].update_osm(lon, lat, place, name, fantoir if hp.is_valid_fantoir(fantoir,code_insee) else '') - else: - places.add_place(Place(lon, lat, place,'','',name, fantoir if hp.is_valid_fantoir(fantoir,code_insee) else '',-1, code_insee)) - -def load_to_db(places, code_insee): - with db.bano.cursor() as conn: - conn.execute(f"DELETE FROM cumul_places WHERE insee_com = '{code_insee}'") - - sload = "INSERT INTO cumul_places (geometrie,libelle_cadastre,libelle_osm,libelle_fantoir,fantoir,insee_com,dept,code_postal,source,ld_bati,ld_osm) VALUES" - a_values = places.as_SQL_Cadastre_array() - nb_rec = len(a_values) - if nb_rec>0: - conn.execute(sload+','.join(a_values)) - a_values = places.as_SQL_OSM_array() - if len(a_values)>0: - conn.execute(sload+','.join(a_values)) - nb_rec+=len(a_values) - return(nb_rec) - -def place_2_db(code_insee): - global fantoir,places - format_cadastre = dbhp.get_cadastre_format(code_insee) - places = Places() - - fantoir.mapping.reset() - fantoir.mapping.load_lieux_dits(code_insee) - load_fantoir(fantoir.mapping.fantoir) - - batch_id_osm = batch_start_log('OSM','cumulPlaces',code_insee) - - if format_cadastre == 'VECT': - batch_id_cadastre = batch_start_log('CADASTRE','cumulPlaces',code_insee) - load_cadastre(code_insee) - load_osm(code_insee) - - nb_rec = load_to_db(places, code_insee) - batch_end_log(nb_rec,batch_id_osm) - if format_cadastre == 'VECT': - batch_end_log(nb_rec,batch_id_cadastre) - -def process(code_insee, depts, France, **kwargs): - liste_codes_insee = [] - if code_insee: - liste_codes_insee = dbhp.get_insee_name(code_insee) - if not liste_codes_insee: - for d in (depts or France): - liste_codes_insee += dbhp.get_insee_name_list_by_dept(d) - for code_insee, nom in liste_codes_insee: - # print(f"{code_insee} - {nom}") - place_2_db(code_insee) - diff --git a/bano/setup_db.py b/bano/setup_db.py index e77e25f..b4431dc 100644 --- a/bano/setup_db.py +++ b/bano/setup_db.py @@ -1,11 +1,13 @@ #!/usr/bin/env python # coding: UTF-8 -from . import db +from .db import bano_db from .sql import sql_process -def setup_bano_sources(**kwargs): - sql_process('create_table_base_bano_sources',{},db.bano_sources) def setup_bano(**kwargs): - sql_process('create_table_base_bano',{},db.bano) \ No newline at end of file + sql_process("create_base", {}, bano_db) + sql_process("create_table_base_bano_outils", {}, bano_db) + sql_process("create_table_base_bano_sources", {}, bano_db) + sql_process("create_table_base_bano_cog", {}, bano_db) + sql_process("create_table_base_bano_cibles", {}, bano_db) diff --git a/bano/sql/create_base.sql b/bano/sql/create_base.sql new file mode 100644 index 0000000..67a0700 --- /dev/null +++ b/bano/sql/create_base.sql @@ -0,0 +1,2 @@ +CREATE EXTENSION IF NOT EXISTS postgis; +CREATE EXTENSION IF NOT EXISTS hstore; diff --git a/bano/sql/create_table_base_bano_cibles.sql b/bano/sql/create_table_base_bano_cibles.sql new file mode 100644 index 0000000..3e84e0e --- /dev/null +++ b/bano/sql/create_table_base_bano_cibles.sql @@ -0,0 +1,46 @@ +CREATE TABLE IF NOT EXISTS bano_adresses ( + fantoir text, + lon float, + lat float, + numero text, + nom_voie text, + nom_place text, + code_postal text, + code_insee text, + code_insee_ancienne_commune text, + nom_ancienne_commune text, + source text, + certification_commune integer, + geometrie geometry (Point, 4326) GENERATED ALWAYS AS (ST_Point(lon,lat)) STORED); + +CREATE INDEX IF NOT EXISTS gidx_bano_adresses ON bano_adresses USING GIST(geometrie); +CREATE INDEX IF NOT EXISTS idx_bano_adresses_code_insee ON bano_adresses (code_insee); + +CREATE TABLE IF NOT EXISTS bano_points_nommes ( + fantoir text, + nom text, + code_insee text, + nature text, + code_insee_ancienne_commune text, + nom_ancienne_commune text, + source text, + lon float, + lat float, + geometrie geometry (Point, 4326) GENERATED ALWAYS AS (ST_Point(lon,lat)) STORED); + +CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_code_insee ON bano_points_nommes (code_insee); +CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_code_insee_source ON bano_points_nommes (code_insee,source); + +CREATE TABLE IF NOT EXISTS nom_fantoir ( + fantoir text, + nom text, + code_insee text, + nature text, + code_insee_ancienne_commune text, + nom_ancienne_commune text, + source text); + +CREATE INDEX IF NOT EXISTS idx_nom_fantoir_code_insee ON nom_fantoir (code_insee); +CREATE INDEX IF NOT EXISTS idx_nom_fantoir_code_insee_source ON nom_fantoir (code_insee,source); + +GRANT SELECT ON ALL TABLES IN SCHEMA public TO public; \ No newline at end of file diff --git a/bano/sql/create_table_base_bano_cog.sql b/bano/sql/create_table_base_bano_cog.sql new file mode 100644 index 0000000..09fa34e --- /dev/null +++ b/bano/sql/create_table_base_bano_cog.sql @@ -0,0 +1,59 @@ +CREATE TABLE IF NOT EXISTS cog_commune ( + typecom character(4), + com character(5), + reg character(2), + dep character varying(3), + ctcd character(4), + arr character(4), + tncc character(1), + ncc text, + nccenr text, + libelle text, + can character(5), + comparent character(5)); +CREATE INDEX IF NOT EXISTS idx_cog_commune_com ON cog_commune(com); + +CREATE TABLE IF NOT EXISTS cog_canton ( + can character(5), + dep character varying(3), + reg character(2), + compct character(1), + burcentral character(5), + tncc character(1), + ncc text, + nccenr text, + libelle text, + typect character(1)); +CREATE INDEX IF NOT EXISTS idx_cog_canton_can ON cog_canton(can); + +CREATE TABLE IF NOT EXISTS cog_arrondissement ( + arr character(4), + dep character varying(3), + reg character(2), + cheflieu character(5), + tncc character(1), + ncc text, + nccenr text, + libelle text); +CREATE INDEX IF NOT EXISTS idx_cog_arrondissement_arr ON cog_arrondissement(arr); + +CREATE TABLE IF NOT EXISTS cog_departement ( + dep character varying(3), + reg character(2), + cheflieu character(5), + tncc character(1), + ncc text, + nccenr text, + libelle text); +CREATE INDEX IF NOT EXISTS idx_cog_departement_dep ON cog_departement(dep); + +CREATE TABLE IF NOT EXISTS cog_region ( + reg character(2), + cheflieu character(5), + tncc character(1), + ncc text, + nccenr text, + libelle text); +CREATE INDEX IF NOT EXISTS idx_cog_region_reg ON cog_region(reg); + +GRANT SELECT ON ALL TABLES IN SCHEMA public TO public; \ No newline at end of file diff --git a/bano/sql/create_table_base_bano_outils.sql b/bano/sql/create_table_base_bano_outils.sql new file mode 100644 index 0000000..c8be3e9 --- /dev/null +++ b/bano/sql/create_table_base_bano_outils.sql @@ -0,0 +1,27 @@ + +CREATE TABLE IF NOT EXISTS batch ( + id_batch serial, + etape text, + source text, + timestamp_debut bigint, + date_debut text, + date_fin text, + duree integer, + code_zone text, + nom_zone text, + ok boolean, + CONSTRAINT batch_pkey PRIMARY KEY (id_batch)); + +CREATE TABLE IF NOT EXISTS batch_historique( + id_batch integer, + etape text, + source text, + timestamp_debut bigint, + date_debut text, + date_fin text, + duree integer, + code_zone text, + nom_zone text, + ok boolean); + +GRANT SELECT ON ALL TABLES IN SCHEMA public TO public; \ No newline at end of file diff --git a/bano/sql/create_table_base_bano_sources.sql b/bano/sql/create_table_base_bano_sources.sql index 293a028..fa047a9 100644 --- a/bano/sql/create_table_base_bano_sources.sql +++ b/bano/sql/create_table_base_bano_sources.sql @@ -37,8 +37,9 @@ CREATE TABLE IF NOT EXISTS ban ( source_position text, source_nom_voie text, certification_commune integer, - cad_parcelles text); --- geometrie geometry (Point, 4326) DEFAULT (ST_Point(lon,lat))); + cad_parcelles text, + geometrie geometry (Point, 4326) GENERATED ALWAYS AS (ST_Point(lon,lat)) STORED); + CREATE INDEX IF NOT EXISTS idx_ban_code_insee ON ban(code_insee); CREATE TABLE IF NOT EXISTS lieux_dits ( @@ -48,7 +49,6 @@ CREATE TABLE IF NOT EXISTS lieux_dits ( updated date, geometrie geometry(Polygon,4326), geom_centroid geometry (Point, 4326) GENERATED ALWAYS AS (ST_Centroid(geometrie)) STORED); -); CREATE INDEX IF NOT EXISTS gidx__centroid_lieux_dits ON lieux_dits USING gist(geom_centroid); CREATE INDEX IF NOT EXISTS lieux_dits_code_insee ON lieux_dits (code_insee); diff --git a/create_base.sh b/create_base.sh index 443ccd8..7640af3 100755 --- a/create_base.sh +++ b/create_base.sh @@ -1,8 +1,6 @@ -sudo -u postgres -s +#!/bin/bash + +set -e + createuser -s -d cadastre -createdb -E UTF-8 -T template0 -O cadastre cadastre -exit -psql -d cadastre -U cadastre -f sql/create_base.sql -psql -d cadastre -U cadastre -f sql/create_table_base_bano.sql -psql -d osm -U cadastre -f sql/create_base.sql -psql -d osm -U cadastre -f sql/create_table_base_osm.sql +createdb -E UTF-8 -T template0 -O cadastre bano diff --git a/init_base.sh b/init_base.sh new file mode 100755 index 0000000..9b667b9 --- /dev/null +++ b/init_base.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +set -e + +psql -d bano -U cadastre -v ON_ERROR_STOP=1 -f sql/create_base.sql diff --git a/sql/create_base.sql b/sql/create_base.sql new file mode 100644 index 0000000..67a0700 --- /dev/null +++ b/sql/create_base.sql @@ -0,0 +1,2 @@ +CREATE EXTENSION IF NOT EXISTS postgis; +CREATE EXTENSION IF NOT EXISTS hstore; From 9e65dba2455c10dd7722f7893592a00a6d5c45bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 1 Jan 2023 20:57:26 +0000 Subject: [PATCH 051/163] menage --- bano/sources/fantoir.py | 59 ----------------------- bano/sql/create_base.sql | 2 - bano/sql/create_table_base_bano.sql | 72 ----------------------------- bano/sql/create_table_cog.sql | 57 ----------------------- 4 files changed, 190 deletions(-) delete mode 100644 bano/sources/fantoir.py delete mode 100644 bano/sql/create_base.sql delete mode 100644 bano/sql/create_table_base_bano.sql delete mode 100644 bano/sql/create_table_cog.sql diff --git a/bano/sources/fantoir.py b/bano/sources/fantoir.py deleted file mode 100644 index 26e2b8d..0000000 --- a/bano/sources/fantoir.py +++ /dev/null @@ -1,59 +0,0 @@ -from bano import db -from ..helpers import normalize - -class Mapping: - def __init__(self): - self.fantoir = {} - self.code_fantoir_vers_nom_fantoir = {} - self.code_fantoir_vers_noms = {} - - def reset(self): - self.fantoir = {} - self.code_fantoir_vers_nom_fantoir = {} - self.code_fantoir_vers_noms = {} - - def load(self,insee): - str_query = ("""SELECT * - FROM (SELECT code_insee||id_voie||cle_rivoli, - nature_voie||' '||libelle_voie, - rank() OVER(PARTITION BY nature_voie||' '||libelle_voie ORDER BY type_voie,id_voie,cle_rivoli) rang - FROM fantoir_voie - WHERE code_insee = '%s' AND - caractere_annul NOT IN ('O','Q')) a - WHERE rang = 1;""" % insee) - with db.bano_cache.cursor() as cur_fantoir: - cur_fantoir.execute(str_query) - for c in cur_fantoir: - self.code_fantoir_vers_nom_fantoir[c[0]] = c[1] - cle = ' '.join(c[1].replace('-',' ').split()) - cle = normalize(cle) - self.fantoir[cle] = c[0] - - def load_lieux_dits(self,insee): - str_query = f"""SELECT fantoir10, - TRIM(BOTH FROM nature_voie||' '||libelle_voie), - ld_bati - FROM fantoir_voie - WHERE code_insee = '{insee}' AND - -- type_voie = '3' AND - COALESCE(caractere_annul,'') = '';""" - with db.bano_cache.cursor() as conn: - conn.execute(str_query) - for c in conn: - self.fantoir[c[0]] = {"nom":c[1], "ld_bati":c[2].strip()} - - def add_fantoir_name(self,fantoir,name,source): - if not fantoir in self.code_fantoir_vers_noms: - self.code_fantoir_vers_noms[fantoir] = {} - if not source in self.code_fantoir_vers_noms[fantoir]: - self.code_fantoir_vers_noms[fantoir][source] = name - - def get_fantoir_name(self,fantoir,source): - res = '' - if fantoir in self.code_fantoir_vers_noms: - if source in self.code_fantoir_vers_noms[fantoir]: - res = self.code_fantoir_vers_noms[fantoir][source] - return res - - -mapping = Mapping() diff --git a/bano/sql/create_base.sql b/bano/sql/create_base.sql deleted file mode 100644 index 67a0700..0000000 --- a/bano/sql/create_base.sql +++ /dev/null @@ -1,2 +0,0 @@ -CREATE EXTENSION IF NOT EXISTS postgis; -CREATE EXTENSION IF NOT EXISTS hstore; diff --git a/bano/sql/create_table_base_bano.sql b/bano/sql/create_table_base_bano.sql deleted file mode 100644 index 6c513c0..0000000 --- a/bano/sql/create_table_base_bano.sql +++ /dev/null @@ -1,72 +0,0 @@ -CREATE TABLE IF NOT EXISTS bano_adresses ( - fantoir text, - lon float, - lat float, - numero text, - nom_voie text, - nom_place text, - code_postal text, - code_insee text, - code_insee_ancienne_commune text, - nom_ancienne_commune text, - source text, - certification_commune integer, - geometrie geometry (Point, 4326) GENERATED ALWAYS AS (ST_Point(lon,lat)) STORED); - -CREATE INDEX IF NOT EXISTS gidx_bano_adresses ON bano_adresses USING GIST(geometrie); -CREATE INDEX IF NOT EXISTS idx_bano_adresses_code_insee ON bano_adresses (code_insee); - -CREATE TABLE IF NOT EXISTS bano_points_nommes ( - fantoir text, - nom text, - code_insee text, - nature text, - code_insee_ancienne_commune text, - nom_ancienne_commune text, - source text, - lon float, - lat float, - geometrie geometry (Point, 4326) GENERATED ALWAYS AS (ST_Point(lon,lat)) STORED); - -CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_code_insee ON bano_points_nommes (code_insee); -CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_code_insee_source ON bano_points_nommes (code_insee,source); - -CREATE TABLE IF NOT EXISTS nom_fantoir ( - fantoir text, - nom text, - code_insee text, - nature text, - code_insee_ancienne_commune text, - nom_ancienne_commune text, - source text); - -CREATE INDEX IF NOT EXISTS idx_nom_fantoir_code_insee ON nom_fantoir (code_insee); -CREATE INDEX IF NOT EXISTS idx_nom_fantoir_code_insee_source ON nom_fantoir (code_insee,source); - - -CREATE TABLE IF NOT EXISTS batch ( - id_batch serial, - etape text, - source text, - timestamp_debut bigint, - date_debut text, - date_fin text, - duree integer, - code_zone text, - nom_zone text, - ok boolean, - CONSTRAINT batch_pkey PRIMARY KEY (id_batch)); - -CREATE TABLE IF NOT EXISTS batch_historique( - id_batch integer, - etape text, - source text, - timestamp_debut bigint, - date_debut text, - date_fin text, - duree integer, - code_zone text, - nom_zone text, - ok boolean); - -GRANT SELECT ON ALL TABLES IN SCHEMA public TO public; \ No newline at end of file diff --git a/bano/sql/create_table_cog.sql b/bano/sql/create_table_cog.sql deleted file mode 100644 index d7eab67..0000000 --- a/bano/sql/create_table_cog.sql +++ /dev/null @@ -1,57 +0,0 @@ -CREATE TABLE IF NOT EXISTS cog_commune ( - typecom character(4), - com character(5), - reg character(2), - dep character varying(3), - ctcd character(4), - arr character(4), - tncc character(1), - ncc text, - nccenr text, - libelle text, - can character(5), - comparent character(5)); -CREATE INDEX IF NOT EXISTS idx_cog_commune_com ON cog_commune(com); - -CREATE TABLE IF NOT EXISTS cog_canton ( - can character(5), - dep character varying(3), - reg character(2), - compct character(1), - burcentral character(5), - tncc character(1), - ncc text, - nccenr text, - libelle text, - typect character(1)); -CREATE INDEX IF NOT EXISTS idx_cog_canton_can ON cog_canton(can); - -CREATE TABLE IF NOT EXISTS cog_arrondissement ( - arr character(4), - dep character varying(3), - reg character(2), - cheflieu character(5), - tncc character(1), - ncc text, - nccenr text, - libelle text); -CREATE INDEX IF NOT EXISTS idx_cog_arrondissement_arr ON cog_arrondissement(arr); - -CREATE TABLE IF NOT EXISTS cog_departement ( - dep character varying(3), - reg character(2), - cheflieu character(5), - tncc character(1), - ncc text, - nccenr text, - libelle text); -CREATE INDEX IF NOT EXISTS idx_cog_departement_dep ON cog_departement(dep); - -CREATE TABLE IF NOT EXISTS cog_region ( - reg character(2), - cheflieu character(5), - tncc character(1), - ncc text, - nccenr text, - libelle text); -CREATE INDEX IF NOT EXISTS idx_cog_region_reg ON cog_region(reg); \ No newline at end of file From 8e44a0090ca4fa39197761fcbab1f3746c1b6ef3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 1 Jan 2023 21:05:21 +0000 Subject: [PATCH 052/163] Base unique - propagation des modifs --- bano/batch.py | 30 ++++--- bano/bin.py | 8 +- bano/boite_a_outils.py | 10 +-- bano/constants.py | 11 ++- bano/db.py | 8 +- bano/db_helpers.py | 10 +-- bano/helpers.py | 170 +++++++++++++++++++----------------- bano/models.py | 31 ++----- bano/pre_process_suffixe.py | 74 +++++++++------- bano/rapprochement.py | 2 +- bano/sources/ban.py | 8 +- bano/sources/cadastre_ld.py | 6 +- bano/sources/cog.py | 7 +- bano/sources/topo.py | 6 +- bano/sql.py | 29 +++--- 15 files changed, 210 insertions(+), 200 deletions(-) diff --git a/bano/batch.py b/bano/batch.py index a5574ea..782cb93 100644 --- a/bano/batch.py +++ b/bano/batch.py @@ -2,15 +2,15 @@ # coding: UTF-8 import time -from . import db -from .sql import sql_get_data,sql_process +from .sql import sql_get_data, sql_process -def batch_start_log(etape,source=None,code_zone=None,nom_zone=None): + +def batch_start_log(etape, source=None, code_zone=None, nom_zone=None): t = time.localtime() - date_debut = time.strftime('%d-%m-%Y %H:%M:%S',t) - timestamp_debut = round(time.mktime(t),0) + date_debut = time.strftime("%d-%m-%Y %H:%M:%S", t) + timestamp_debut = round(time.mktime(t), 0) - champs = 'etape,timestamp_debut,date_debut' + champs = "etape,timestamp_debut,date_debut" values = f"'{etape}',{timestamp_debut},'{date_debut}'" if source: champs = f"{champs},source" @@ -21,13 +21,21 @@ def batch_start_log(etape,source=None,code_zone=None,nom_zone=None): if nom_zone: champs = f"{champs},nom_zone" values = f"{values},'{nom_zone}'" - return sql_get_data('batch_start_log',dict(champs=champs,values=values),db.bano)[0][0] + return sql_get_data("batch_start_log", dict(champs=champs, values=values))[0][0] -def batch_stop_log(id_batch,status): +def batch_stop_log(id_batch, status): t = time.localtime() - date_fin = time.strftime('%d-%m-%Y %H:%M:%S',t) - timestamp_fin = round(time.mktime(t),0) - sql_process('batch_stop_log',dict(id_batch=str(id_batch),date_fin=str(date_fin),timestamp_fin=str(timestamp_fin),status=str(status)),db.bano) + date_fin = time.strftime("%d-%m-%Y %H:%M:%S", t) + timestamp_fin = round(time.mktime(t), 0) + sql_process( + "batch_stop_log", + dict( + id_batch=str(id_batch), + date_fin=str(date_fin), + timestamp_fin=str(timestamp_fin), + status=str(status), + ), + ) if not status: print(f"Erreur pendant le processus {id_batch}") diff --git a/bano/bin.py b/bano/bin.py index 31f10da..42eb1ff 100644 --- a/bano/bin.py +++ b/bano/bin.py @@ -19,13 +19,7 @@ def main(): subparsers = parser.add_subparsers(help="Commandes disponibles") subparser = subparsers.add_parser( - "setup_db_bano_sources", - help="Initialisation de la BD des sources : OSM, BAN, TOPO", - ) - subparser.set_defaults(func=setup_db.setup_bano_sources) - - subparser = subparsers.add_parser( - "setup_db_bano", + "setup_db", help="Initialisation de la BD BANO", ) subparser.set_defaults(func=setup_db.setup_bano) diff --git a/bano/boite_a_outils.py b/bano/boite_a_outils.py index e03b518..6234900 100755 --- a/bano/boite_a_outils.py +++ b/bano/boite_a_outils.py @@ -1,14 +1,14 @@ #!/usr/bin/env python # coding: UTF-8 -from . import db from .sql import sql_process from . import batch as b + def maj_table_communes(**kwargs): - batch_id = b.batch_start_log('maj_table_communes','France','France') + batch_id = b.batch_start_log("maj_table_communes", "France", "France") try: - sql_process('create_table_polygones_communes',dict(),db.bano_sources) - b.batch_stop_log(batch_id,True) + sql_process("create_table_polygones_communes", dict()) + b.batch_stop_log(batch_id, True) except: - b.batch_stop_log(batch_id,False) + b.batch_stop_log(batch_id, False) diff --git a/bano/constants.py b/bano/constants.py index 5d63f28..d2b8f95 100644 --- a/bano/constants.py +++ b/bano/constants.py @@ -9,22 +9,25 @@ def load_pairs_from_file(basename): with (Path(__file__).parent / "dictionnaires" / f"{basename}.txt").open() as f: return [l[:-1].split("\t") for l in f.readlines() if not l.startswith("#")] + def load_json_from_file(json_filename): with (Path(__file__).parent / "data" / json_filename).open() as f: return json.loads(f.read()) + def get_const_code_dir(): - return load_json_from_file('code_dir.json') + return load_json_from_file("code_dir.json") + DEPARTEMENTS = [ f"{n:>02}" for n in [*range(1, 20), "2A", "2B", *range(21, 96), *range(971, 975), "976"] ] -DEPARTEMENTS_3CHAR = [('0'+s)[-3:] for s in DEPARTEMENTS] +DEPARTEMENTS_3CHAR = [("0" + s)[-3:] for s in DEPARTEMENTS] LETTRE_A_LETTRE = { - "A": ["Â", "À", "Á","Ã"], + "A": ["Â", "À", "Á", "Ã"], "C": ["Ç"], "E": ["È", "Ê", "É", "Ë"], "I": ["Ï", "Î", "Í"], @@ -81,6 +84,6 @@ def get_const_code_dir(): "trunk", "motorway", ] -HIGHWAY_TYPES_INDEX = {e: 2 ** i for i, e in enumerate(HIGHWAY_TYPES)} +HIGHWAY_TYPES_INDEX = {e: 2**i for i, e in enumerate(HIGHWAY_TYPES)} CODE_VOIE_FANTOIR = "0123456789ABCDEFGHIJKLMNOPQRSTVWXYZ" diff --git a/bano/db.py b/bano/db.py index 57fb5c6..3986009 100644 --- a/bano/db.py +++ b/bano/db.py @@ -3,8 +3,6 @@ import psycopg2 import psycopg2.extras -bano = psycopg2.connect(os.environ.get("BANO_PG", "dbname='bano' user='cadastre'")) -bano.autocommit = True -bano_sources = psycopg2.connect(os.environ.get("BANO_PG_CACHE", "dbname='bano_sources' user='cadastre'")) -bano_sources.autocommit = True -psycopg2.extras.register_hstore(bano_sources) +bano_db = psycopg2.connect(os.environ.get("BANO_PG", "dbname='bano' user='cadastre'")) +bano_db.autocommit = True +psycopg2.extras.register_hstore(bano_db) diff --git a/bano/db_helpers.py b/bano/db_helpers.py index 6643319..5b24db4 100644 --- a/bano/db_helpers.py +++ b/bano/db_helpers.py @@ -1,15 +1,15 @@ import time import os -from . import db from .sql import sql_get_data def liste_communes_par_dept(dept): - return sql_get_data("liste_communes_par_dept", dict(dept=dept), db.bano) + return sql_get_data("liste_communes_par_dept", dict(dept=dept)) def nom_commune(code_insee): - return sql_get_data( - "nom_commune_par_code_insee", dict(code_insee=code_insee), db.bano - )[0][0] + res = sql_get_data("nom_commune_par_code_insee", dict(code_insee=code_insee)) + if res and len(res) > 0 and len(res[0]) > 0: + return res[0][0] + return f"Commune inconnue (INSEE {code_insee})" diff --git a/bano/helpers.py b/bano/helpers.py index 762c90d..a26a1ed 100644 --- a/bano/helpers.py +++ b/bano/helpers.py @@ -4,32 +4,37 @@ def find_cp_in_tags(tags): - return tags.get('addr:postcode') or tags.get('postal_code') or '' + return tags.get("addr:postcode") or tags.get("postal_code") or "" + def escape_quotes(s): - return s.replace('\'','\'\'') + return s.replace("'", "''") + def remove_quotes(s): - return s.replace('\'','') + return s.replace("'", "") + def remove_quotes_on_null(s): - return s.replace("'null'","null") + return s.replace("'null'", "null") + def replace_single_quotes_with_double(s): - return s.replace('\'','"') + return s.replace("'", '"') + def format_toponyme(s): - a_s = s.replace('\'',' ').split(' ') + a_s = s.replace("'", " ").split(" ") # Accents dic_replace_accents = {} - dic_replace_accents['DERRIERE'] = u'DERRIÈRE' - dic_replace_accents['EGLISE'] = u'ÉGLISE' - dic_replace_accents['ILE'] = u'ÎLE' - dic_replace_accents['ILOT'] = u'ÎLOT' - dic_replace_accents['PRE'] = u'PRÉ' + dic_replace_accents["DERRIERE"] = "DERRIÈRE" + dic_replace_accents["EGLISE"] = "ÉGLISE" + dic_replace_accents["ILE"] = "ÎLE" + dic_replace_accents["ILOT"] = "ÎLOT" + dic_replace_accents["PRE"] = "PRÉ" - for m in range(0,len(a_s)): + for m in range(0, len(a_s)): if a_s[m] in dic_replace_accents: a_s[m] = dic_replace_accents[a_s[m]] @@ -38,48 +43,49 @@ def format_toponyme(s): # Minuscules dic_replace_hors_premier_mot = {} - dic_replace_hors_premier_mot['Au'] = 'au' - dic_replace_hors_premier_mot['Aux'] = 'aux' - dic_replace_hors_premier_mot['D'] = 'd\'' - dic_replace_hors_premier_mot['De'] = 'de' - dic_replace_hors_premier_mot['Des'] = 'des' - dic_replace_hors_premier_mot['Du'] = 'du' - dic_replace_hors_premier_mot['Et'] = 'et' - dic_replace_hors_premier_mot['L'] = 'l\'' - dic_replace_hors_premier_mot['La'] = 'la' - dic_replace_hors_premier_mot['Le'] = 'le' - dic_replace_hors_premier_mot['Les'] = 'les' - dic_replace_hors_premier_mot['Un'] = 'un' - dic_replace_hors_premier_mot['Une'] = 'une' + dic_replace_hors_premier_mot["Au"] = "au" + dic_replace_hors_premier_mot["Aux"] = "aux" + dic_replace_hors_premier_mot["D"] = "d'" + dic_replace_hors_premier_mot["De"] = "de" + dic_replace_hors_premier_mot["Des"] = "des" + dic_replace_hors_premier_mot["Du"] = "du" + dic_replace_hors_premier_mot["Et"] = "et" + dic_replace_hors_premier_mot["L"] = "l'" + dic_replace_hors_premier_mot["La"] = "la" + dic_replace_hors_premier_mot["Le"] = "le" + dic_replace_hors_premier_mot["Les"] = "les" + dic_replace_hors_premier_mot["Un"] = "un" + dic_replace_hors_premier_mot["Une"] = "une" if len(a_s) > 1: - for m in range(1,len(a_s)): + for m in range(1, len(a_s)): if a_s[m] in dic_replace_hors_premier_mot: a_s[m] = dic_replace_hors_premier_mot[a_s[m]] # Appostrophes initiale dic_ajoute_apostrophe = {} - dic_ajoute_apostrophe['d'] = 'd\'' - dic_ajoute_apostrophe['D'] = 'D\'' - dic_ajoute_apostrophe['l'] = 'l\'' - dic_ajoute_apostrophe['L'] = 'L\'' + dic_ajoute_apostrophe["d"] = "d'" + dic_ajoute_apostrophe["D"] = "D'" + dic_ajoute_apostrophe["l"] = "l'" + dic_ajoute_apostrophe["L"] = "L'" if a_s[0] in dic_ajoute_apostrophe: a_s[0] = dic_ajoute_apostrophe[a_s[0]] - s = ' '.join(a_s).replace('\' ','\'') - if len(s.strip())>1 and s.strip()[-1] == '\'': + s = " ".join(a_s).replace("' ", "'") + if len(s.strip()) > 1 and s.strip()[-1] == "'": s = s.strip()[0:-1] return s + def get_nb_parts(s): return len(s.split()) -def get_part_debut(s,nb_parts): - resp = '' +def get_part_debut(s, nb_parts): + resp = "" if get_nb_parts(s) > nb_parts: - resp = ' '.join(s.split()[0:nb_parts]) + resp = " ".join(s.split()[0:nb_parts]) return resp @@ -88,92 +94,98 @@ def is_valid_housenumber(hsnr): return len(hsnr) <= 11 return False + def is_valid_dept(dept): return dept in constants.DEPARTEMENTS + def get_code_dept_from_insee(code_insee): code_dept = code_insee[0:2] - if code_dept == '97': + if code_dept == "97": code_dept = code_insee[0:3] return code_dept + def get_sql_like_dept_string(dept): - return (dept+'___')[0:5] + return (dept + "___")[0:5] + def normalize(s): - s = s.upper() # tout en majuscules + s = s.upper() # tout en majuscules # s = s.split(' (')[0] # parenthèses : on coupe avant - s = s.replace('(','').replace(')','') # parenthèses : on supprime cf Fantoir pour les anciennes communes en suffixe - s = s.replace('-',' ') # separateur espace - s = s.replace('\'',' ') # separateur espace - s = s.replace('’',' ') # separateur espace - s = s.replace('/',' ') # separateur espace - s = s.replace(':',' ') # separateur deux points - s = ' '.join(s.split()) # separateur : 1 espace + s = s.replace("(", "").replace( + ")", "" + ) # parenthèses : on supprime cf Fantoir pour les anciennes communes en suffixe + s = s.replace("-", " ") # separateur espace + s = s.replace("'", " ") # separateur espace + s = s.replace("’", " ") # separateur espace + s = s.replace("/", " ") # separateur espace + s = s.replace(":", " ") # separateur deux points + s = " ".join(s.split()) # separateur : 1 espace for l in iter(constants.LETTRE_A_LETTRE): for ll in constants.LETTRE_A_LETTRE[l]: - s = s.replace(ll,l) - + s = s.replace(ll, l) -# type de voie + # type de voie abrev_trouvee = False p = 5 while (not abrev_trouvee) and p > -1: - p-= 1 - if get_part_debut(s,p) in constants.ABREV_TYPE_VOIE: - s = replace_type_voie(s,p) + p -= 1 + if get_part_debut(s, p) in constants.ABREV_TYPE_VOIE: + s = replace_type_voie(s, p) abrev_trouvee = True -# ordinal - s = s.replace(' EME ','EME ') - s = s.replace(' 1ERE',' PREMIERE') - s = s.replace(' 1ER',' PREMIER') + # ordinal + s = s.replace(" EME ", "EME ") + s = s.replace(" 1ERE", " PREMIERE") + s = s.replace(" 1ER", " PREMIER") -# chiffres + # chiffres for c in constants.CHIFFRES: - s = s.replace(c[0],c[1]) + s = s.replace(c[0], c[1]) -# titres, etc. + # titres, etc. for r in constants.EXPAND_NOMS: - s = s.replace(' '+r[0]+' ',' '+r[1]+' ') - if s[-len(r[0]):] == r[0]: - s = s.replace(' '+r[0],' '+r[1]) + s = s.replace(" " + r[0] + " ", " " + r[1] + " ") + if s[-len(r[0]) :] == r[0]: + s = s.replace(" " + r[0], " " + r[1]) for r in constants.EXPAND_TITRES: - s = s.replace(' '+r[0]+' ',' '+r[1]+' ') - if s[-len(r[0]):] == r[0]: - s = s.replace(' '+r[0],' '+r[1]) + s = s.replace(" " + r[0] + " ", " " + r[1] + " ") + if s[-len(r[0]) :] == r[0]: + s = s.replace(" " + r[0], " " + r[1]) for r in constants.ABREV_TITRES: - s = s.replace(' '+r[0]+' ',' '+r[1]+' ') - if s[-len(r[0]):] == r[0]: - s = s.replace(' '+r[0],' '+r[1]) + s = s.replace(" " + r[0] + " ", " " + r[1] + " ") + if s[-len(r[0]) :] == r[0]: + s = s.replace(" " + r[0], " " + r[1]) -# articles + # articles for c in constants.MOT_A_BLANC: - s = s.replace(' '+c+' ',' ') + s = s.replace(" " + c + " ", " ") -# chiffres romains + # chiffres romains sp = s.split() - if len(sp)>0 and sp[-1] in constants.CHIFFRES_ROMAINS: + if len(sp) > 0 and sp[-1] in constants.CHIFFRES_ROMAINS: sp[-1] = constants.CHIFFRES_ROMAINS[sp[-1]] - s = ' '.join(sp) + s = " ".join(sp) -# substitution complete + # substitution complete if s in constants.SUBSTITUTION_COMPLETE: s = constants.SUBSTITUTION_COMPLETE[s] return s[0:30] -def replace_type_voie(s,nb): +def replace_type_voie(s, nb): sp = s.split() - spd = ' '.join(sp[0:nb]) - spf = ' '.join(sp[nb:len(sp)]) - s = constants.ABREV_TYPE_VOIE[spd]+' '+spf + spd = " ".join(sp[0:nb]) + spf = " ".join(sp[nb : len(sp)]) + s = constants.ABREV_TYPE_VOIE[spd] + " " + spf return s def fantoir_valide(f, insee): - return (len(f) == 10 and f[0:5] == insee); + return len(f) == 10 and f[0:5] == insee + def display_insee_commune(code_insee, nom_commune): - print(f"{code_insee} - {nom_commune}") \ No newline at end of file + print(f"{code_insee} - {nom_commune}") diff --git a/bano/models.py b/bano/models.py index 6bdceb3..aa5bca6 100644 --- a/bano/models.py +++ b/bano/models.py @@ -5,7 +5,7 @@ import json from collections import defaultdict, OrderedDict -from . import db +from .db import bano_db from . import helpers as hp # from .sources import fantoir @@ -84,17 +84,14 @@ def charge_noms_osm_hors_numeros(self): sql_get_data( "charge_noms_voies_lieux-dits_OSM", dict(code_insee=self.code_insee), - db.bano_sources, ) + sql_get_data( "charge_noms_voies_relation_bbox_OSM", dict(code_insee=self.code_insee), - db.bano_sources, ) + sql_get_data( "charge_noms_voies_relation_OSM", dict(code_insee=self.code_insee), - db.bano_sources, ) ) for ( @@ -166,14 +163,13 @@ def enregistre(self): sql_process( "suppression_noms_commune", dict(code_insee=self.code_insee), - db.bano, ) io_in_csv = io.StringIO() for t in set(self.triplets_nom_fantoir_source): if t.fantoir: io_in_csv.write(t._as_csv_format_bano() + "\n") io_in_csv.seek(0) - with db.bano.cursor() as cur_insert: + with bano_db.cursor() as cur_insert: cur_insert.copy_from( io_in_csv, "nom_fantoir", @@ -283,9 +279,7 @@ def add_adresse(self, ad): # fantoir = topo.topo.get(a.voie_normalisee) def charge_numeros_ban(self, topo): - data = sql_get_data( - "charge_ban_commune", dict(code_insee=self.code_insee), db.bano_sources - ) + data = sql_get_data("charge_ban_commune", dict(code_insee=self.code_insee)) for ( id_fantoir, numero, @@ -318,10 +312,8 @@ def charge_numeros_ban(self, topo): def charge_numeros_osm(self): data = sql_get_data( - "charge_numeros_OSM", dict(code_insee=self.code_insee), db.bano_sources - ) + sql_get_data( - "charge_numeros_bbox_OSM", dict(code_insee=self.code_insee), db.bano_sources - ) + "charge_numeros_OSM", dict(code_insee=self.code_insee) + ) + sql_get_data("charge_numeros_bbox_OSM", dict(code_insee=self.code_insee)) for ( lon, @@ -446,7 +438,6 @@ def enregistre(self): sql_process( "suppression_adresses_commune", dict(code_insee=self.code_insee), - db.bano, ) io_in_csv = io.StringIO() for a in self: @@ -454,7 +445,7 @@ def enregistre(self): a._as_csv_format_bano() + "\n" ) # separateur $ car on trouve des virgules dans le contenu io_in_csv.seek(0) - with db.bano.cursor() as cur_insert: + with bano_db.cursor() as cur_insert: cur_insert.copy_from( io_in_csv, "bano_adresses", @@ -535,7 +526,6 @@ def charge_points_nommes_lieux_dits_cadastre(self): data = sql_get_data( "charge_points_nommes_lieux-dits_CADASTRE", dict(code_insee=self.code_insee), - db.bano_sources, ) for x, y, nom, code_insee_ac in data: self.add_point_nomme( @@ -554,7 +544,6 @@ def charge_points_nommes_centroides_osm(self): data = sql_get_data( "charge_points_nommes_centroides_OSM", dict(code_insee=self.code_insee), - db.bano_sources, ) for x, y, nom, code_insee_ac, fantoir in data: self.add_point_nomme( @@ -574,7 +563,6 @@ def charge_points_nommes_place_osm(self): data = sql_get_data( "charge_points_nommes_places_OSM", dict(code_insee=self.code_insee), - db.bano_sources, ) for x, y, nom, code_insee_ac, fantoir in data: self.add_point_nomme( @@ -633,13 +621,12 @@ def enregistre(self): sql_process( "suppression_points_nommes_commune", dict(code_insee=self.code_insee), - db.bano, ) io_in_csv = io.StringIO() for t in self: io_in_csv.write(t._as_csv_format_bano() + "\n") io_in_csv.seek(0) - with db.bano.cursor() as cur_insert: + with bano_db.cursor() as cur_insert: cur_insert.copy_from( io_in_csv, "bano_points_nommes", @@ -682,9 +669,7 @@ def _print(self, pattern=None): print(f"{k} : {v}") def charge_topo(self): - data = sql_get_data( - "charge_topo_commune", dict(code_insee=self.code_insee), db.bano_sources - ) + data = sql_get_data("charge_topo_commune", dict(code_insee=self.code_insee)) for fantoir, nom in data: nom = hp.normalize(" ".join(nom.replace("-", " ").split())) self.topo[fantoir] = nom diff --git a/bano/pre_process_suffixe.py b/bano/pre_process_suffixe.py index e504c83..9124c1c 100644 --- a/bano/pre_process_suffixe.py +++ b/bano/pre_process_suffixe.py @@ -4,41 +4,42 @@ import re import sys import time -import os,os.path +import os, os.path from . import batch as b -from . import db +from .db import bano_db from . import helpers as hp from . import db_helpers as dh from .models import Adresses + def name_frequency(adresses): freq = {} noms_hors_1ere_passe = set() for nom in adresses.noms_de_voies: s = nom.split() # noms avec suffixe entre () quelle que soit leur longueur - if '(' in nom and nom[-1] == ')': + if "(" in nom and nom[-1] == ")": k = f"({nom.split('(')[1]}" if k not in freq: - freq[k] = {'nombre':1,'liste':set(nom)} + freq[k] = {"nombre": 1, "liste": set(nom)} else: - freq[k]['nombre'] +=1 - freq[k]['liste'].add(nom) - elif len(s)>4: - k = ' '.join(s[-2:]) + freq[k]["nombre"] += 1 + freq[k]["liste"].add(nom) + elif len(s) > 4: + k = " ".join(s[-2:]) if k not in freq: - freq[k] = {'nombre':1,'liste':set(nom)} + freq[k] = {"nombre": 1, "liste": set(nom)} else: - freq[k]['nombre'] +=1 - freq[k]['liste'].add(nom) - elif len(s)>3: + freq[k]["nombre"] += 1 + freq[k]["liste"].add(nom) + elif len(s) > 3: k = nom.split()[-1] if k not in freq: - freq[k] = {'nombre':1,'liste':set(nom)} + freq[k] = {"nombre": 1, "liste": set(nom)} else: - freq[k]['nombre'] +=1 - freq[k]['liste'].add(nom) + freq[k]["nombre"] += 1 + freq[k]["liste"].add(nom) else: noms_hors_1ere_passe.add(nom) @@ -48,72 +49,79 @@ def name_frequency(adresses): if len(s) > 1 and len(s) < 4: k = nom.split()[-1] if k in freq: - freq[k]['nombre'] +=1 - freq[k]['liste'].add(nom) + freq[k]["nombre"] += 1 + freq[k]["liste"].add(nom) return freq + def select_street_names_by_name(freq): sel = {} mots = {} for k in freq: ks = k.split() - if freq[k]['nombre'] > 5 and len(ks) > 1: + if freq[k]["nombre"] > 5 and len(ks) > 1: mots[ks[0]] = 1 mots[ks[1]] = 1 sel[k] = freq[k] for k in freq: ks = k.split() # un suffixe ne peut pas être un numero seul, cas dans les arrdts parisiens - if freq[k]['nombre'] > 5 and len(ks) == 1 and not k.isdigit() and not k in mots : + if freq[k]["nombre"] > 5 and len(ks) == 1 and not k.isdigit() and not k in mots: sel[k] = freq[k] return sel + def collect_adresses_points(selection, adresses): kres = {} for k in selection: kres[k] = [] - for nom_voie in selection[k]['liste']: + for nom_voie in selection[k]["liste"]: s = 0 max = 2 for i in adresses.index_voie[nom_voie]: add = adresses[i] - suffixe = k.replace("'","''") - kres[k].append(f"SELECT '{suffixe}' AS libelle_suffixe,'{adresses.code_insee}' AS code_insee,ST_BUFFER(ST_PointFromText('POINT({add.x} {add.y})',4326),0.0003,2) as g") - s+=1 - if s == max: break + suffixe = k.replace("'", "''") + kres[k].append( + f"SELECT '{suffixe}' AS libelle_suffixe,'{adresses.code_insee}' AS code_insee,ST_BUFFER(ST_PointFromText('POINT({add.x} {add.y})',4326),0.0003,2) as g" + ) + s += 1 + if s == max: + break return kres + def load_suffixe_2_db(adds, code_insee, nom_commune): - with db.bano_sources.cursor() as cur: + with bano_db.cursor() as cur: for h in adds: # Agde (34003): detection de 'Mer' abusif, pas d'autres suffixes dans la commune - if code_insee == '34003': + if code_insee == "34003": continue print(f"{code_insee} - {nom_commune}......... {h}") str_query = f"INSERT INTO suffixe SELECT ST_SetSRID((ST_Dump(gu)).geom,4326),code_insee,libelle_suffixe FROM (SELECT ST_Union(g) gu,code_insee,libelle_suffixe FROM({' UNION ALL '.join(adds[h])})a GROUP BY 2,3)a;" cur.execute(str_query) + def process(departements, **kwargs): for dept in departements: if hp.is_valid_dept(dept): print(f"Traitement du dept {dept}") - with db.bano_sources.cursor() as cur: + with bano_db.cursor() as cur: str_query = f"DELETE FROM suffixe WHERE insee_com LIKE '{dept}%';" cur.execute(str_query) for code_insee, nom_commune in dh.get_insee_name_list_by_dept(dept): - # for code_insee, nom_commune in [['49244','Mauges']]: + # for code_insee, nom_commune in [['49244','Mauges']]: debut_total = time.time() # hp.display_insee_commune(code_insee, nom_commune) - adresses = Adresses(code_insee) - batch_id = b.batch_start_log('detecte suffixe',code_insee,nom_commune) + adresses = Adresses(code_insee) + batch_id = b.batch_start_log("detecte suffixe", code_insee, nom_commune) try: adresses.charge_numeros_ban() freq = name_frequency(adresses) selection = select_street_names_by_name(freq) adds = collect_adresses_points(selection, adresses) load_suffixe_2_db(adds, code_insee, nom_commune) - b.batch_stop_log(batch_id,True) - except(e): + b.batch_stop_log(batch_id, True) + except (e): print(e) - b.batch_stop_log(batch_id,False) + b.batch_stop_log(batch_id, False) diff --git a/bano/rapprochement.py b/bano/rapprochement.py index ec22c7d..9dcd400 100644 --- a/bano/rapprochement.py +++ b/bano/rapprochement.py @@ -7,7 +7,7 @@ def process_unitaire(code_insee): - id_batch = b.batch_start_log("rapprochement", 'toutes', code_insee) + id_batch = b.batch_start_log("rapprochement", "toutes", code_insee) try: topo = Topo(code_insee) adresses = Adresses(code_insee) diff --git a/bano/sources/ban.py b/bano/sources/ban.py index 074609f..690f8e1 100644 --- a/bano/sources/ban.py +++ b/bano/sources/ban.py @@ -10,7 +10,7 @@ import psycopg2 from ..constants import DEPARTEMENTS -from ..db import bano_sources +from ..db import bano_db from ..sql import sql_process from .. import batch as b # from .. import update_manager as um @@ -58,7 +58,7 @@ def import_to_pg(departement, **kwargs): fichier_source = get_destination(departement) with gzip.open(fichier_source, mode='rt') as f: f.readline() # skip CSV headers - with bano_sources.cursor() as cur_insert: + with bano_db.cursor() as cur_insert: try: cur_insert.execute(f"DELETE FROM ban WHERE code_insee LIKE '{departement}%'") cur_insert.copy_from(f, "ban", sep=';', null='') @@ -79,7 +79,7 @@ def import_to_pg_subp(departement, **kwargs): with open(tmp_filename,'w') as tmpfile: tmpfile.write(ret.stdout) - subprocess.run(["psql","-d","bano_sources","-U","cadastre","-1","-c",f"DELETE FROM ban WHERE code_insee LIKE '{departement}%';COPY ban FROM '{tmp_filename}' WITH CSV HEADER NULL '' DELIMITER ';'"]) + subprocess.run(["psql","-d","bano_db","-U","cadastre","-1","-c",f"DELETE FROM ban WHERE code_insee LIKE '{departement}%';COPY ban FROM '{tmp_filename}' WITH CSV HEADER NULL '' DELIMITER ';'"]) tmp_filename.unlink() b.batch_stop_log(id_batch,True) except e: @@ -97,4 +97,4 @@ def get_destination(departement): return cwd / f'adresses-{departement}.csv.gz' def update_bis_table(**kwargs): - sql_process('update_table_rep_b_as_bis',dict(),bano_sources) \ No newline at end of file + sql_process('update_table_rep_b_as_bis',dict(),bano_db) \ No newline at end of file diff --git a/bano/sources/cadastre_ld.py b/bano/sources/cadastre_ld.py index 8e902db..1b03eaa 100644 --- a/bano/sources/cadastre_ld.py +++ b/bano/sources/cadastre_ld.py @@ -51,7 +51,7 @@ def import_to_pg(departement, **kwargs): fichier_source = get_destination(departement) with gzip.open(fichier_source, mode="rt") as f: json_source = json.load(f) - with db.bano_sources.cursor() as cur_insert: + with db.bano_db.cursor() as cur_insert: try: cur_insert.execute( f"DELETE FROM lieux_dits WHERE code_insee LIKE '{departement+'%'}';COMMIT;" @@ -66,14 +66,14 @@ def import_to_pg(departement, **kwargs): cur_insert.execute(str_query + ",".join(a_values) + ";COMMIT;") except psycopg2.DataError as e: print(e) - db.bano_sources.reset() + db.bano_db.reset() def post_process(departement, **kwargs): sqlfile = Path(__file__).parent.parent / "sql" / "lieux_dits_post_process.sql" if sqlfile.exists(): with open(sqlfile, "r") as fq: - with db.bano_sources.cursor() as cur_post_process: + with db.bano_db.cursor() as cur_post_process: str_query = fq.read().replace("__dept__", departement) cur_post_process.execute(str_query) diff --git a/bano/sources/cog.py b/bano/sources/cog.py index 539dc9c..43d21b3 100644 --- a/bano/sources/cog.py +++ b/bano/sources/cog.py @@ -9,13 +9,12 @@ import requests # import psycopg2 -from ..db import bano +from ..db import bano_db from ..sql import sql_process from .. import batch as b -# from .. import update_manager as um def process_cog(**kwargs): - sql_process('create_table_cog',dict(),bano) + sql_process('create_table_cog',dict(),bano_db) zip = get_destination('cog_2022.zip') status = download(zip) if status: @@ -46,7 +45,7 @@ def import_to_pg(fichier_zip): with ZipFile(fichier_zip) as f: with f.open('commune_2022.csv') as csv: csv.readline() # skip CSV headers - with bano.cursor() as cur_insert: + with bano_db.cursor() as cur_insert: try: cur_insert.execute(f"TRUNCATE {table}") cur_insert.copy_from(csv,table, sep=',', null='') diff --git a/bano/sources/topo.py b/bano/sources/topo.py index 647c4af..2032b06 100644 --- a/bano/sources/topo.py +++ b/bano/sources/topo.py @@ -6,7 +6,7 @@ from ..constants import get_const_code_dir,CODE_VOIE_FANTOIR -from ..db import bano_sources +from ..db import bano_db from .. import helpers as h from .. import batch as b @@ -65,10 +65,10 @@ def import_to_pg(): # if i > 20: # break io_in_csv.seek(0) - with bano_sources.cursor() as cur_insert: + with bano_db.cursor() as cur_insert: cur_insert.execute("TRUNCATE topo") cur_insert.copy_from(io_in_csv, "topo", sep='$',null='') - bano_sources.commit() + bano_db.commit() def process_topo(**kwargs): id_batch = b.batch_start_log('import source', 'TOPO','France','France') diff --git a/bano/sql.py b/bano/sql.py index 1dc1476..abc2f75 100644 --- a/bano/sql.py +++ b/bano/sql.py @@ -3,27 +3,30 @@ from pathlib import Path -SQLDIR = Path(__file__).parent / 'sql' +from .db import bano_db +SQLDIR = Path(__file__).parent / "sql" -def sql_process(sqlfile,args,conn): - sqlfile = (Path(SQLDIR) / sqlfile).with_suffix('.sql') - with open (sqlfile) as s: + +def sql_process(sqlfile, args): + sqlfile = (Path(SQLDIR) / sqlfile).with_suffix(".sql") + with open(sqlfile) as s: q = s.read() - for k,v in args.items(): - q=q.replace(f'__{k}__',v) + for k, v in args.items(): + q = q.replace(f"__{k}__", v) - with conn.cursor() as cur: + with bano_db.cursor() as cur: cur.execute(q) -def sql_get_data(sqlfile,args,conn): - sqlfile = (Path(SQLDIR) / sqlfile).with_suffix('.sql') - with open (sqlfile) as s: + +def sql_get_data(sqlfile, args): + sqlfile = (Path(SQLDIR) / sqlfile).with_suffix(".sql") + with open(sqlfile) as s: q = s.read() - for k,v in args.items(): - q=q.replace(f'__{k}__',v) + for k, v in args.items(): + q = q.replace(f"__{k}__", v) - with conn.cursor() as cur: + with bano_db.cursor() as cur: cur.execute(q) return cur.fetchall() From 3ea9c699aa19386e66753d2c26fe16b915717c13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 26 Feb 2023 21:59:17 +0000 Subject: [PATCH 053/163] Calcul des suffixes dans l'import BAN --- bano/models.py | 4 +- bano/pre_process_suffixe.py | 45 +++++++++++++++------ bano/sources/ban.py | 79 ++++++++++++++++++++++++------------- 3 files changed, 87 insertions(+), 41 deletions(-) diff --git a/bano/models.py b/bano/models.py index aa5bca6..8454f55 100644 --- a/bano/models.py +++ b/bano/models.py @@ -246,7 +246,7 @@ def _as_string(self): class Adresses: def __init__(self, code_insee): self.code_insee = code_insee - self.liste = set() + self.liste = [] self.index_voie = defaultdict(list) self.noms_de_voies = set() @@ -270,7 +270,7 @@ def _print(self, pattern=None): def add_adresse(self, ad): """une adresses est considérée dans la commune si sans Fantoir ou avec un Fantoir de la commune""" # if (ad.fantoir == None or hp.is_valid_fantoir(ad.fantoir, self.code_insee)) and hp.is_valid_housenumber(ad.numero): - self.liste.add(ad) + self.liste.append(ad) self.index_voie[ad.voie].append(len(self.liste) - 1) self.noms_de_voies.add(ad.voie) diff --git a/bano/pre_process_suffixe.py b/bano/pre_process_suffixe.py index 9124c1c..cc8a0a5 100644 --- a/bano/pre_process_suffixe.py +++ b/bano/pre_process_suffixe.py @@ -7,36 +7,55 @@ import os, os.path from . import batch as b -from .db import bano_db +from . import db from . import helpers as hp from . import db_helpers as dh -from .models import Adresses +from .models import Adresses, Topo def name_frequency(adresses): freq = {} noms_hors_1ere_passe = set() + # for nom in ['rue du Pont - Pont Augan']: # adresses.noms_de_voies: for nom in adresses.noms_de_voies: s = nom.split() # noms avec suffixe entre () quelle que soit leur longueur if "(" in nom and nom[-1] == ")": k = f"({nom.split('(')[1]}" if k not in freq: - freq[k] = {"nombre": 1, "liste": set(nom)} + freq[k] = {"nombre": 1, "liste": {nom}} + else: + freq[k]["nombre"] += 1 + freq[k]["liste"].add(nom) + # noms avec suffixe après un trait d'union quelle que soit leur longueur + elif "-" in nom: + k = f"{nom.split('-')[-1]}" + # print(k) + if k not in freq: + freq[k] = {"nombre": 1, "liste": {nom}} + else: + freq[k]["nombre"] += 1 + freq[k]["liste"].add(nom) + # noms avec suffixe après une virgule quelle que soit leur longueur + elif "," in nom: + k = f"{nom.split(',')[-1]}" + # print(k) + if k not in freq: + freq[k] = {"nombre": 1, "liste": {nom}} else: freq[k]["nombre"] += 1 freq[k]["liste"].add(nom) elif len(s) > 4: k = " ".join(s[-2:]) if k not in freq: - freq[k] = {"nombre": 1, "liste": set(nom)} + freq[k] = {"nombre": 1, "liste": {nom}} else: freq[k]["nombre"] += 1 freq[k]["liste"].add(nom) elif len(s) > 3: k = nom.split()[-1] if k not in freq: - freq[k] = {"nombre": 1, "liste": set(nom)} + freq[k] = {"nombre": 1, "liste": {nom}} else: freq[k]["nombre"] += 1 freq[k]["liste"].add(nom) @@ -92,12 +111,12 @@ def collect_adresses_points(selection, adresses): def load_suffixe_2_db(adds, code_insee, nom_commune): - with bano_db.cursor() as cur: + with db.bano_db.cursor() as cur: for h in adds: # Agde (34003): detection de 'Mer' abusif, pas d'autres suffixes dans la commune if code_insee == "34003": continue - print(f"{code_insee} - {nom_commune}......... {h}") + print(f"......... {h}") str_query = f"INSERT INTO suffixe SELECT ST_SetSRID((ST_Dump(gu)).geom,4326),code_insee,libelle_suffixe FROM (SELECT ST_Union(g) gu,code_insee,libelle_suffixe FROM({' UNION ALL '.join(adds[h])})a GROUP BY 2,3)a;" cur.execute(str_query) @@ -106,17 +125,19 @@ def process(departements, **kwargs): for dept in departements: if hp.is_valid_dept(dept): print(f"Traitement du dept {dept}") - with bano_db.cursor() as cur: - str_query = f"DELETE FROM suffixe WHERE insee_com LIKE '{dept}%';" + with db.bano_db.cursor() as cur: + str_query = f"DELETE FROM suffixe WHERE code_insee LIKE '{dept}%';" cur.execute(str_query) - for code_insee, nom_commune in dh.get_insee_name_list_by_dept(dept): + for code_insee, nom_commune in dh.liste_communes_par_dept(dept): + # for code_insee, nom_commune in [['56188','Quistinic']]: # for code_insee, nom_commune in [['49244','Mauges']]: debut_total = time.time() - # hp.display_insee_commune(code_insee, nom_commune) + print(code_insee, nom_commune) adresses = Adresses(code_insee) + topo = Topo(code_insee) batch_id = b.batch_start_log("detecte suffixe", code_insee, nom_commune) try: - adresses.charge_numeros_ban() + adresses.charge_numeros_ban(topo) freq = name_frequency(adresses) selection = select_street_names_by_name(freq) adds = collect_adresses_points(selection, adresses) diff --git a/bano/sources/ban.py b/bano/sources/ban.py index 690f8e1..95428fd 100644 --- a/bano/sources/ban.py +++ b/bano/sources/ban.py @@ -13,11 +13,14 @@ from ..db import bano_db from ..sql import sql_process from .. import batch as b +from .. import pre_process_suffixe + # from .. import update_manager as um + def process_ban(departements, **kwargs): departements = set(departements) - depts_inconnus = departements - set(DEPARTEMENTS) + depts_inconnus = departements - set(DEPARTEMENTS) if depts_inconnus: raise ValueError(f"Départements inconnus : {depts_inconnus}") depts_en_echec = [] @@ -27,74 +30,96 @@ def process_ban(departements, **kwargs): if status: if not (import_to_pg(dept)): depts_en_echec.append(dept) - print('depts_en_echec',depts_en_echec) - + print("depts_en_echec", depts_en_echec) for dept in depts_en_echec: print(f"Département {dept}") import_to_pg_subp(dept) + pre_process_suffixe.process(departements) def download(departement): destination = get_destination(departement) headers = {} if destination.exists(): - headers['If-Modified-Since'] = formatdate(destination.stat().st_mtime) + headers["If-Modified-Since"] = formatdate(destination.stat().st_mtime) - resp = requests.get(f'https://adresse.data.gouv.fr/data/ban/adresses-odbl/latest/csv/adresses-{departement}.csv.gz', headers=headers) - id_batch = b.batch_start_log('download source', 'BAN',departement) + resp = requests.get( + f"https://adresse.data.gouv.fr/data/ban/adresses-odbl/latest/csv/adresses-{departement}.csv.gz", + headers=headers, + ) + id_batch = b.batch_start_log("download source", "BAN", departement) if resp.status_code == 200: - with destination.open('wb') as f: + with destination.open("wb") as f: f.write(resp.content) - mtime = parsedate_to_datetime(resp.headers['Last-Modified']).timestamp() + mtime = parsedate_to_datetime(resp.headers["Last-Modified"]).timestamp() os.utime(destination, (mtime, mtime)) - b.batch_stop_log(id_batch,True) + b.batch_stop_log(id_batch, True) return True print(resp.status_code) - b.batch_stop_log(id_batch,False) + b.batch_stop_log(id_batch, False) return False + def import_to_pg(departement, **kwargs): - id_batch = b.batch_start_log('import source', 'BAN',departement) + id_batch = b.batch_start_log("import source", "BAN", departement) fichier_source = get_destination(departement) - with gzip.open(fichier_source, mode='rt') as f: + with gzip.open(fichier_source, mode="rt") as f: f.readline() # skip CSV headers - with bano_db.cursor() as cur_insert: + with bano_db.cursor() as cur_insert: try: - cur_insert.execute(f"DELETE FROM ban WHERE code_insee LIKE '{departement}%'") - cur_insert.copy_from(f, "ban", sep=';', null='') - b.batch_stop_log(id_batch,True) + cur_insert.execute( + f"DELETE FROM ban WHERE code_insee LIKE '{departement}%'" + ) + cur_insert.copy_from(f, "ban", sep=";", null="") + b.batch_stop_log(id_batch, True) return True except psycopg2.DataError as e: print(f"Erreur au chargement de la BAN {departement}") print(e) return False + def import_to_pg_subp(departement, **kwargs): - id_batch = b.batch_start_log('import source', 'BAN',departement) + id_batch = b.batch_start_log("import source", "BAN", departement) print("Essai via shell") try: fichier_source = get_destination(departement) - ret = subprocess.run(["gzip","-cd",fichier_source],capture_output=True,text=True) - tmp_filename = Path(os.environ['BAN_CACHE_DIR']) / 'tmp.csv' - with open(tmp_filename,'w') as tmpfile: + ret = subprocess.run( + ["gzip", "-cd", fichier_source], capture_output=True, text=True + ) + tmp_filename = Path(os.environ["BAN_CACHE_DIR"]) / "tmp.csv" + with open(tmp_filename, "w") as tmpfile: tmpfile.write(ret.stdout) - subprocess.run(["psql","-d","bano_db","-U","cadastre","-1","-c",f"DELETE FROM ban WHERE code_insee LIKE '{departement}%';COPY ban FROM '{tmp_filename}' WITH CSV HEADER NULL '' DELIMITER ';'"]) + subprocess.run( + [ + "psql", + "-d", + "bano", + "-U", + "cadastre", + "-1", + "-c", + f"DELETE FROM ban WHERE code_insee LIKE '{departement}%';COPY ban FROM '{tmp_filename}' WITH CSV HEADER NULL '' DELIMITER ';'", + ] + ) tmp_filename.unlink() - b.batch_stop_log(id_batch,True) + b.batch_stop_log(id_batch, True) except e: print(f"Erreur au chargement de la BAN {departement}") print(f"Abandon du chargement de la BAN {departement}") - b.batch_stop_log(id_batch,False) - + b.batch_stop_log(id_batch, False) + + def get_destination(departement): try: - cwd = Path(os.environ['BAN_CACHE_DIR']) + cwd = Path(os.environ["BAN_CACHE_DIR"]) except KeyError: raise ValueError(f"La variable BAN_CACHE_DIR n'est pas définie") if not cwd.exists(): raise ValueError(f"Le répertoire {cwd} n'existe pas") - return cwd / f'adresses-{departement}.csv.gz' + return cwd / f"adresses-{departement}.csv.gz" + def update_bis_table(**kwargs): - sql_process('update_table_rep_b_as_bis',dict(),bano_db) \ No newline at end of file + sql_process("update_table_rep_b_as_bis", dict()) From 34e3b2484ee5c8766cd730295efb00d676b40428 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 26 Feb 2023 22:01:31 +0000 Subject: [PATCH 054/163] Une seule base --- bano/setup_db.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/bano/setup_db.py b/bano/setup_db.py index b4431dc..ae36a9f 100644 --- a/bano/setup_db.py +++ b/bano/setup_db.py @@ -6,8 +6,7 @@ def setup_bano(**kwargs): - sql_process("create_base", {}, bano_db) - sql_process("create_table_base_bano_outils", {}, bano_db) - sql_process("create_table_base_bano_sources", {}, bano_db) - sql_process("create_table_base_bano_cog", {}, bano_db) - sql_process("create_table_base_bano_cibles", {}, bano_db) + sql_process("create_table_base_bano_outils", {}) + sql_process("create_table_base_bano_sources", {}) + sql_process("create_table_base_bano_cog", {}) + sql_process("create_table_base_bano_cibles", {}) From ef464f7f25dda1ab721f6e9438dbcebd6797f06d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 26 Feb 2023 22:02:00 +0000 Subject: [PATCH 055/163] Index geom BAN --- bano/sql/create_table_base_bano_sources.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/bano/sql/create_table_base_bano_sources.sql b/bano/sql/create_table_base_bano_sources.sql index fa047a9..61aac3b 100644 --- a/bano/sql/create_table_base_bano_sources.sql +++ b/bano/sql/create_table_base_bano_sources.sql @@ -41,6 +41,7 @@ CREATE TABLE IF NOT EXISTS ban ( geometrie geometry (Point, 4326) GENERATED ALWAYS AS (ST_Point(lon,lat)) STORED); CREATE INDEX IF NOT EXISTS idx_ban_code_insee ON ban(code_insee); +CREATE INDEX IF NOT EXISTS gidx_ban ON ban(geometrie); CREATE TABLE IF NOT EXISTS lieux_dits ( code_insee character(5), From 9283c449d5a0d44494e905703528cfed03dfcef1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 26 Feb 2023 22:02:49 +0000 Subject: [PATCH 056/163] Autocommit --- bano/sql/update_table_rep_b_as_bis.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/bano/sql/update_table_rep_b_as_bis.sql b/bano/sql/update_table_rep_b_as_bis.sql index 8705af1..4986222 100644 --- a/bano/sql/update_table_rep_b_as_bis.sql +++ b/bano/sql/update_table_rep_b_as_bis.sql @@ -26,4 +26,3 @@ SELECT id_fantoir, numero FROM ban WHERE rep = 'd'); -COMMIT; \ No newline at end of file From 5d77237f854042c3a10239e7e1173bbb5ebddc59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Fri, 10 Mar 2023 21:33:10 +0000 Subject: [PATCH 057/163] Base unique --- bano/setup_db.py | 1 + bano/sql/create_base.sql | 5 +++++ imposm.config | 2 +- load_osm_france_db.sh | 20 +++++++++++++------- sql/create_base.sql | 2 -- 5 files changed, 20 insertions(+), 10 deletions(-) create mode 100644 bano/sql/create_base.sql delete mode 100644 sql/create_base.sql diff --git a/bano/setup_db.py b/bano/setup_db.py index ae36a9f..ecbe65c 100644 --- a/bano/setup_db.py +++ b/bano/setup_db.py @@ -6,6 +6,7 @@ def setup_bano(**kwargs): + sql_process("create_base", {}) sql_process("create_table_base_bano_outils", {}) sql_process("create_table_base_bano_sources", {}) sql_process("create_table_base_bano_cog", {}) diff --git a/bano/sql/create_base.sql b/bano/sql/create_base.sql new file mode 100644 index 0000000..1c443ca --- /dev/null +++ b/bano/sql/create_base.sql @@ -0,0 +1,5 @@ +CREATE EXTENSION IF NOT EXISTS postgis; +CREATE EXTENSION IF NOT EXISTS hstore; + +# tables Imposm dans le schema osm +ALTER ROLE cadastre IN DATABASE bano SET search_path TO public,osm; \ No newline at end of file diff --git a/imposm.config b/imposm.config index 1ceb367..b80215e 100644 --- a/imposm.config +++ b/imposm.config @@ -1,7 +1,7 @@ { "cachedir": "/data/bano_imposm_cache_v3", "diffdir": "/data/download_v3", - "connection": "postgis://cadastre@localhost/bano_sources?prefix=NONE", + "connection": "postgis://cadastre@localhost/bano?prefix=NONE", "mapping": "/data/project/bano_v3/bano.yml", "srid":4326 } diff --git a/load_osm_france_db.sh b/load_osm_france_db.sh index fd0f10a..6c5c73e 100755 --- a/load_osm_france_db.sh +++ b/load_osm_france_db.sh @@ -1,23 +1,29 @@ #!/bin/bash +set -e + SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" source $SCRIPT_DIR/config -lockfile=${SCRIPT_DIR}/imposm.lock +PBF_URL=${1:-http://download.openstreetmap.fr/extracts/merge/france_metro_dom_com_nc.osm.pbf} +PBF_FILE=$(basename "$PBF_URL") + +lockfile=${DATA_DIR}/imposm.lock if test -f ${lockfile} then - echo `date`" : Process deja en cours" >> $SCRIPT_DIR/cron.log - exit 0 + echo `date`" : Process deja en cours" + exit 1 fi touch ${lockfile} +mkdir -p $DOWNLOAD_DIR cd $DOWNLOAD_DIR -wget -NS http://download.openstreetmap.fr/extracts/merge/france_metro_dom_com_nc.osm.pbf -imposm import -config $SCRIPT_DIR/imposm.config -read $DOWNLOAD_DIR/france_metro_dom_com_nc.osm.pbf -overwritecache -diff -write -dbschema-import public +wget -NS $PBF_URL +imposm import -config $SCRIPT_DIR/imposm.config -read $DOWNLOAD_DIR/france_metro_dom_com_nc.osm.pbf -overwritecache -diff -write -dbschema-import osm -psql -d bano_sources -U cadastre -f $BANO_DIR/sql/finalisation.sql +psql -d bano -U cadastre -f $SCRIPT_DIR/sql/finalisation.sql -#cp $DOWNLOAD_DIR/last.state.txt $DOWNLOAD_DIR/state.txt +cp $DOWNLOAD_DIR/last.state.txt $DOWNLOAD_DIR/state.txt rm ${lockfile} diff --git a/sql/create_base.sql b/sql/create_base.sql deleted file mode 100644 index 67a0700..0000000 --- a/sql/create_base.sql +++ /dev/null @@ -1,2 +0,0 @@ -CREATE EXTENSION IF NOT EXISTS postgis; -CREATE EXTENSION IF NOT EXISTS hstore; From 92cd8abc2f44ebc643d3618b2710c8b48b06d473 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Fri, 10 Mar 2023 21:36:05 +0000 Subject: [PATCH 058/163] COG communes --- bano/sources/cog.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/bano/sources/cog.py b/bano/sources/cog.py index 43d21b3..5a428d1 100644 --- a/bano/sources/cog.py +++ b/bano/sources/cog.py @@ -14,8 +14,7 @@ from .. import batch as b def process_cog(**kwargs): - sql_process('create_table_cog',dict(),bano_db) - zip = get_destination('cog_2022.zip') + zip = get_destination('cog.zip') status = download(zip) if status: import_to_pg(zip) @@ -25,7 +24,7 @@ def download(destination): if destination.exists(): headers['If-Modified-Since'] = formatdate(destination.stat().st_mtime) - resp = requests.get(f'https://www.insee.fr/fr/statistiques/fichier/6051727/cog_ensemble_2022_csv.zip', headers=headers) + resp = requests.get(get_COG_URL(), headers=headers) id_batch = b.batch_start_log('download source', 'COG ZIP','France') if resp.status_code == 200: with destination.open('wb') as f: @@ -43,7 +42,7 @@ def import_to_pg(fichier_zip): table = 'cog_commune' id_batch = b.batch_start_log('import source', f'COG {table}','France') with ZipFile(fichier_zip) as f: - with f.open('commune_2022.csv') as csv: + with f.open(get_COG_CSV()) as csv: csv.readline() # skip CSV headers with bano_db.cursor() as cur_insert: try: @@ -61,3 +60,17 @@ def get_destination(fichier_cog): if not cwd.exists(): raise ValueError(f"Le répertoire {cwd} n'existe pas") return cwd / f'{fichier_cog}' + +def get_COG_URL(): + try: + url = os.environ['COG_URL'] + except KeyError: + raise ValueError(f"La variable COG_URL n'est pas définie") + return url + +def get_COG_CSV(): + try: + csv = os.environ['COG_CSV_COMMUNE'] + except KeyError: + raise ValueError(f"La variable COG_CSV_COMMUNE n'est pas définie") + return csv From 3131269e298f714e3debc6edecc0736ea0901d5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Fri, 10 Mar 2023 21:41:19 +0000 Subject: [PATCH 059/163] README en cours --- README.md | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 982829d..7a95532 100644 --- a/README.md +++ b/README.md @@ -7,4 +7,65 @@ v3 à venir : - unification des adresses voies & lieux-dits - gestion des communes fusionnées - remplacement de FANTOIR par TOPO -- etc \ No newline at end of file +- etc + +## Dépendances + +Il faut pour alimenter la base OSM locale dans laquelle puise BANO : +* [imposm](https://github.com/omniscale/imposm3) pour le chargement. Par défaut la dernière version. +* [osmosis](https://github.com/openstreetmap/osmosis) pour la mise à jour incrémentale. Par défaut la dernière version. + +Autres outils : parallel. + +## Configuration + +Première étape avant de lancer les chargements de données : il faut adapter le fichier `config` à votre environnement, en déclarant différents chemins. Ce fichier est utilisé en début de plusieurs scripts pour connaître le chemin de différents répertoires. + +### Création des répertoires +Une fois le fichier `config` rempli, lancer la création des répertoires avec : +``` +arborescence.sh +``` + +### Création de la base de données + +La base de données qui accueille toutes les données BANO (les sources et les données produites) s'appelle 'bano' est doit être créée en début d'installation. C'est l'utilisateur 'postgres' qui doit exécuter les scripts de création de la base. + +``` +sudo -u postgres -s "./create_base.sh" +``` +On utilise ensuite le module python 'bano' pour terminer la configuration de la BD : +``` +bano setup_db +``` +À l'issue de cette étape toutes les tables nécessaires existent dans la BD. Elles sont toutes vides. + +## Chargement des données OSM +### Chargement initial +D'abord renseigner le fichier imposm.config, puis lancer : +``` +./load_osm_france_db.sh +``` +À l'issue, les tables du schéma osm sont remplies. + +### Mise à jour en continu + +## Chargement des autres données sources +Chaque source a sa commande de chargement +### TOPO (ex FANTOIR) +``` +bano charge_topo +``` +### BAN +``` +bano charge_ban +bano update_bis_table +``` +### COG +``` +bano charge_cog +``` +### Lieux-dits du Cadastre +``` +bano charge_ld_cadastre +``` From 5610997d097948fe2a7fcc9c0ac1fd32bebec18e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 13 Mar 2023 21:04:32 +0000 Subject: [PATCH 060/163] Fantoir sans cle (WIP) --- bano/sources/topo.py | 2 +- bano/sql/create_table_base_bano_sources.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bano/sources/topo.py b/bano/sources/topo.py index 2032b06..690001f 100644 --- a/bano/sources/topo.py +++ b/bano/sources/topo.py @@ -45,7 +45,7 @@ def topo_voie_to_csv(ligne_brute): # code dept champs.insert(0,h.get_code_dept_from_insee(champs[0])) # fantoir - champs[2] = fantoir9_vers_fantoir10(champs[2]) + # champs[2] = fantoir9_vers_fantoir10(champs[2]) return champs diff --git a/bano/sql/create_table_base_bano_sources.sql b/bano/sql/create_table_base_bano_sources.sql index 61aac3b..8d7601f 100644 --- a/bano/sql/create_table_base_bano_sources.sql +++ b/bano/sql/create_table_base_bano_sources.sql @@ -1,7 +1,7 @@ CREATE TABLE IF NOT EXISTS topo ( code_dep character(3), code_insee character(5), - fantoir10 character(10), + fantoir character(9), nature_voie text, libelle_voie text, caractere_voie character(1), From 7c2000c5febf625fb75052bed32981fd9f1f1fd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Fri, 17 Mar 2023 12:09:05 +0000 Subject: [PATCH 061/163] Fantoir sans cle (WIP) --- bano/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bano/helpers.py b/bano/helpers.py index a26a1ed..5fa089e 100644 --- a/bano/helpers.py +++ b/bano/helpers.py @@ -184,7 +184,7 @@ def replace_type_voie(s, nb): def fantoir_valide(f, insee): - return len(f) == 10 and f[0:5] == insee + return len(f) == 9 and f[0:5] == insee def display_insee_commune(code_insee, nom_commune): From 28d6cfafb42be9873774e35df08c466e089efb0b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Fri, 17 Mar 2023 12:10:37 +0000 Subject: [PATCH 062/163] Base unique --- bano/sql/create_base.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bano/sql/create_base.sql b/bano/sql/create_base.sql index 1c443ca..de1c23b 100644 --- a/bano/sql/create_base.sql +++ b/bano/sql/create_base.sql @@ -1,5 +1,5 @@ CREATE EXTENSION IF NOT EXISTS postgis; CREATE EXTENSION IF NOT EXISTS hstore; -# tables Imposm dans le schema osm +-- tables Imposm dans le schema osm ALTER ROLE cadastre IN DATABASE bano SET search_path TO public,osm; \ No newline at end of file From bb3ab1e3cb8bb27c9578f12f417c17b52673f6cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Fri, 17 Mar 2023 12:12:48 +0000 Subject: [PATCH 063/163] Fantoir sans cle (WIP) --- bano/sql/charge_topo_commune.sql | 8 ++++---- bano/sql/create_table_base_bano_sources.sql | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bano/sql/charge_topo_commune.sql b/bano/sql/charge_topo_commune.sql index 7a826dc..bcd3a64 100644 --- a/bano/sql/charge_topo_commune.sql +++ b/bano/sql/charge_topo_commune.sql @@ -1,18 +1,18 @@ WITH t AS -(SELECT fantoir10, +(SELECT fantoir, TRIM (BOTH FROM (COALESCE(nature_voie,'')||' '||libelle_voie)) AS nom FROM topo WHERE code_insee = '__code_insee__' AND caractere_annul IS NULL), tr AS -(SELECT fantoir10, +(SELECT fantoir, nom, - rank() OVER (PARTITION BY nom ORDER BY fantoir10) rang + rank() OVER (PARTITION BY nom ORDER BY fantoir) rang FROM t) -SELECT fantoir10, +SELECT fantoir, nom FROM tr WHERE rang = 1 diff --git a/bano/sql/create_table_base_bano_sources.sql b/bano/sql/create_table_base_bano_sources.sql index 8d7601f..bb5d22b 100644 --- a/bano/sql/create_table_base_bano_sources.sql +++ b/bano/sql/create_table_base_bano_sources.sql @@ -12,7 +12,7 @@ CREATE TABLE IF NOT EXISTS topo ( mot_classant character varying(8)); CREATE INDEX IF NOT EXISTS idx_topo_dep ON topo(code_dep); CREATE INDEX IF NOT EXISTS idx_topo_code_insee ON topo(code_insee); -CREATE INDEX IF NOT EXISTS idx_topo_fantoir10 ON topo(fantoir10); +CREATE INDEX IF NOT EXISTS idx_topo_fantoir ON topo(fantoir); CREATE TABLE IF NOT EXISTS ban ( id text, From 17a5ac814938a951fa4d0da0f29e410d1540cfa3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Fri, 17 Mar 2023 12:13:22 +0000 Subject: [PATCH 064/163] Table statut_numero --- bano/sql/create_table_base_bano_cibles.sql | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/bano/sql/create_table_base_bano_cibles.sql b/bano/sql/create_table_base_bano_cibles.sql index 3e84e0e..c51c3c9 100644 --- a/bano/sql/create_table_base_bano_cibles.sql +++ b/bano/sql/create_table_base_bano_cibles.sql @@ -43,4 +43,14 @@ CREATE TABLE IF NOT EXISTS nom_fantoir ( CREATE INDEX IF NOT EXISTS idx_nom_fantoir_code_insee ON nom_fantoir (code_insee); CREATE INDEX IF NOT EXISTS idx_nom_fantoir_code_insee_source ON nom_fantoir (code_insee,source); -GRANT SELECT ON ALL TABLES IN SCHEMA public TO public; \ No newline at end of file +CREATE TABLE IF NOT EXISTS statut_numero ( + numero text , + fantoir character(9) , + source text , + id_statut integer, + timestamp_statut double precision, + insee_com character(5)); + +CREATE INDEX IF NOT EXISTS idx_statut_numero_fantoir ON statut_numero (fantoir, numero); + +GRANT SELECT ON ALL TABLES IN SCHEMA public TO public; From 782f2f5fb898951d71b00b1b72e3586410fd9923 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Fri, 17 Mar 2023 12:29:04 +0000 Subject: [PATCH 065/163] Table statut_fantoir --- bano/sql/create_table_base_bano_cibles.sql | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/bano/sql/create_table_base_bano_cibles.sql b/bano/sql/create_table_base_bano_cibles.sql index c51c3c9..d0779ca 100644 --- a/bano/sql/create_table_base_bano_cibles.sql +++ b/bano/sql/create_table_base_bano_cibles.sql @@ -43,6 +43,16 @@ CREATE TABLE IF NOT EXISTS nom_fantoir ( CREATE INDEX IF NOT EXISTS idx_nom_fantoir_code_insee ON nom_fantoir (code_insee); CREATE INDEX IF NOT EXISTS idx_nom_fantoir_code_insee_source ON nom_fantoir (code_insee,source); + +CREATE TABLE IF NOT EXISTS statut_fantoir ( + fantoir character varying(9), + id_statut integer, + timestamp_statut double precision, + insee_com character(5)); + +CREATE INDEX IF NOT EXISTS idx_statut_fantoir_fantoir ON statut_fantoir (fantoir); +CREATE INDEX IF NOT EXISTS idx_statut_fantoir_insee ON statut_fantoir (insee_com); + CREATE TABLE IF NOT EXISTS statut_numero ( numero text , fantoir character(9) , From 1351054718bff8a5c9cca0e0c201c894503b0421 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Fri, 17 Mar 2023 12:32:08 +0000 Subject: [PATCH 066/163] Renommage de champ --- bano/sql/create_table_base_bano_cibles.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bano/sql/create_table_base_bano_cibles.sql b/bano/sql/create_table_base_bano_cibles.sql index d0779ca..b3bcfab 100644 --- a/bano/sql/create_table_base_bano_cibles.sql +++ b/bano/sql/create_table_base_bano_cibles.sql @@ -48,10 +48,10 @@ CREATE TABLE IF NOT EXISTS statut_fantoir ( fantoir character varying(9), id_statut integer, timestamp_statut double precision, - insee_com character(5)); + code_insee character(5)); CREATE INDEX IF NOT EXISTS idx_statut_fantoir_fantoir ON statut_fantoir (fantoir); -CREATE INDEX IF NOT EXISTS idx_statut_fantoir_insee ON statut_fantoir (insee_com); +CREATE INDEX IF NOT EXISTS idx_statut_fantoir_insee ON statut_fantoir (code_insee); CREATE TABLE IF NOT EXISTS statut_numero ( numero text , @@ -59,7 +59,7 @@ CREATE TABLE IF NOT EXISTS statut_numero ( source text , id_statut integer, timestamp_statut double precision, - insee_com character(5)); + code_insee character(5)); CREATE INDEX IF NOT EXISTS idx_statut_numero_fantoir ON statut_numero (fantoir, numero); From 1939c3f3be78fe31cf5012952cd7e185f3add686 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 23 Apr 2023 18:26:43 +0000 Subject: [PATCH 067/163] Tables de statut pour Pifometre --- bano/setup_db.py | 1 + bano/sql/create_table_base_bano_pifometre.sql | 61 +++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 bano/sql/create_table_base_bano_pifometre.sql diff --git a/bano/setup_db.py b/bano/setup_db.py index ecbe65c..98175d3 100644 --- a/bano/setup_db.py +++ b/bano/setup_db.py @@ -11,3 +11,4 @@ def setup_bano(**kwargs): sql_process("create_table_base_bano_sources", {}) sql_process("create_table_base_bano_cog", {}) sql_process("create_table_base_bano_cibles", {}) + sql_process("create_table_base_bano_pifometre", {}) diff --git a/bano/sql/create_table_base_bano_pifometre.sql b/bano/sql/create_table_base_bano_pifometre.sql new file mode 100644 index 0000000..b00727b --- /dev/null +++ b/bano/sql/create_table_base_bano_pifometre.sql @@ -0,0 +1,61 @@ +CREATE TABLE IF NOT EXISTS statut_fantoir ( + fantoir character varying(10), + id_statut integer, + timestamp_statut double precision, + insee_com character(5)); + +CREATE INDEX IF NOT EXISTS idx_statut_fantoir_insee ON statut_fantoir(insee_com); +CREATE INDEX IF NOT EXISTS idx_statut_fantoir_fantoir ON statut_fantoir(fantoir); + +CREATE TABLE IF NOT EXISTS labels_statuts_fantoir( + id_statut integer primary key, + tri integer default 0, + label_statut character varying(200) +); +TRUNCATE labels_statuts_fantoir; +INSERT INTO labels_statuts_fantoir (id_statut,tri,label_statut) +VALUES (0,0,'Ok'), +(1,1,'Erreur d''orthographe'), +(2,2,'Divergence d''orthographe'), +(3,3,'Nom différent'), +(4,4,'Type de voie différent'), +(5,5,'Voie doublon et type de voie différent'), +(6,6,'Voie doublon avec orthographe différente'), +(7,8,'Répétition du type de voie'), +(8,9,'Nom introuvable sur le terrain'), +(9,10,'Ancien nom supprimé sur le terrain'), +(10,99,'Erreurs combinées'), +(11,15,'Adresses hors périmètre'), +(12,12,'Voie détruite'), +(13,13,'Voie incorporée à une autre'), +(14,14,'Voie inexistante'), +(15,7,'Voie doublon (même type et même nom)'), +(16,11,'Nom tronqué'), +(17,16,'Erreur de commune'), +(18,17,'FANTOIR annulé non remplacé'), +(19,18,'Point cardinal superflu'), +(20,19,'Voie en projet'); + +CREATE TABLE IF NOT EXISTS statut_numero ( + numero text, + fantoir character (10), + source text, + id_statut integer, + timestamp_statut double precision, + insee_com character(5)); +CREATE INDEX IF NOT EXISTS idx_statut_numero_fantoir ON statut_numero(fantoir,numero); + +CREATE TABLE IF NOT EXISTS labels_statuts_numero( + id_statut integer primary key, + tri integer default 0, + label_statut character varying(200) +); +TRUNCATE labels_statuts_numero; +INSERT INTO labels_statuts_numero (id_statut,tri,label_statut) +VALUES (0,0,'Ok'), +(1,1,'Adresse fictive en 5xxx'), +(2,2,'Adresse fictive en 9xxx'), +(3,3,'Adresse invisible sur le terrain'), +(4,4,'Emplacement sur une autre voie'), +(5,5,'Adresse en dehors de la commune'), +(6,6,'Adresse doublon'); From 47106ecb9cbfec270118e589b5d9bb2d58429fad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 24 Apr 2023 21:48:09 +0000 Subject: [PATCH 068/163] indexes --- bano/sql/create_table_base_bano_cibles.sql | 3 +++ bano/sql/create_table_base_bano_pifometre.sql | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/bano/sql/create_table_base_bano_cibles.sql b/bano/sql/create_table_base_bano_cibles.sql index b3bcfab..4765a4f 100644 --- a/bano/sql/create_table_base_bano_cibles.sql +++ b/bano/sql/create_table_base_bano_cibles.sql @@ -15,6 +15,8 @@ CREATE TABLE IF NOT EXISTS bano_adresses ( CREATE INDEX IF NOT EXISTS gidx_bano_adresses ON bano_adresses USING GIST(geometrie); CREATE INDEX IF NOT EXISTS idx_bano_adresses_code_insee ON bano_adresses (code_insee); +CREATE INDEX IF NOT EXISTS idx_bano_adresses_fantoir ON bano_adresses (fantoir); +CREATE INDEX IF NOT EXISTS idx_bano_adresses_pifo_code_insee_source ON bano_adresses (code_insee,source); CREATE TABLE IF NOT EXISTS bano_points_nommes ( fantoir text, @@ -41,6 +43,7 @@ CREATE TABLE IF NOT EXISTS nom_fantoir ( source text); CREATE INDEX IF NOT EXISTS idx_nom_fantoir_code_insee ON nom_fantoir (code_insee); +CREATE INDEX IF NOT EXISTS idx_nom_fantoir_fantoir ON nom_fantoir (fantoir); CREATE INDEX IF NOT EXISTS idx_nom_fantoir_code_insee_source ON nom_fantoir (code_insee,source); diff --git a/bano/sql/create_table_base_bano_pifometre.sql b/bano/sql/create_table_base_bano_pifometre.sql index b00727b..8da082b 100644 --- a/bano/sql/create_table_base_bano_pifometre.sql +++ b/bano/sql/create_table_base_bano_pifometre.sql @@ -2,9 +2,9 @@ CREATE TABLE IF NOT EXISTS statut_fantoir ( fantoir character varying(10), id_statut integer, timestamp_statut double precision, - insee_com character(5)); + code_insee character(5)); -CREATE INDEX IF NOT EXISTS idx_statut_fantoir_insee ON statut_fantoir(insee_com); +CREATE INDEX IF NOT EXISTS idx_statut_fantoir_insee ON statut_fantoir(code_insee); CREATE INDEX IF NOT EXISTS idx_statut_fantoir_fantoir ON statut_fantoir(fantoir); CREATE TABLE IF NOT EXISTS labels_statuts_fantoir( @@ -42,7 +42,7 @@ CREATE TABLE IF NOT EXISTS statut_numero ( source text, id_statut integer, timestamp_statut double precision, - insee_com character(5)); + code_insee character(5)); CREATE INDEX IF NOT EXISTS idx_statut_numero_fantoir ON statut_numero(fantoir,numero); CREATE TABLE IF NOT EXISTS labels_statuts_numero( From 50d170554a2a4ae7bdaaed39ea6413d5457667f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Tue, 25 Apr 2023 19:16:46 +0000 Subject: [PATCH 069/163] complement centroides --- .../charge_points_nommes_centroides_OSM.sql | 41 +++++++++++++++++-- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/bano/sql/charge_points_nommes_centroides_OSM.sql b/bano/sql/charge_points_nommes_centroides_OSM.sql index a3442e8..b988462 100644 --- a/bano/sql/charge_points_nommes_centroides_OSM.sql +++ b/bano/sql/charge_points_nommes_centroides_OSM.sql @@ -14,21 +14,54 @@ LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 WHERE insee_a8 = '__code_insee ON ST_Intersects(l.way, a9.geometrie) WHERE (l.highway != '' OR l.waterway = 'dam') AND + l.name != '' +UNION ALL +SELECT ST_PointOnSurface(l.way), + unnest(array[l.name,l.tags->'alt_name',l.tags->'old_name']) AS name, + COALESCE(a9.code_insee,'xxxxx') as insee_jointure, + a9.code_insee insee_ac, + "ref:FR:FANTOIR" AS fantoir, + ST_Within(l.way,p.geometrie)::integer as within +FROM (SELECT geometrie FROM polygones_insee WHERE code_insee = '__code_insee__') p +JOIN planet_osm_polygon l +ON ST_Intersects(l.way, p.geometrie) +LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 WHERE insee_a8 = '__code_insee__') a9 +ON ST_Intersects(l.way, a9.geometrie) +WHERE (l.highway||"ref:FR:FANTOIR" != '' OR l.landuse = 'residential' OR l.amenity = 'parking') AND + l.name != '' +UNION ALL +SELECT l.way, + unnest(array[l.name,l.tags->'alt_name',l.tags->'old_name']) AS name, + COALESCE(a9.code_insee,'xxxxx') as insee_jointure, + a9.code_insee insee_ac, + "ref:FR:FANTOIR" AS fantoir, + ST_Within(l.way,p.way)::integer as within +FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p +JOIN planet_osm_rels l +ON ST_Intersects(l.way, p.way) +LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 WHERE insee_a8 = '__code_insee__') a9 +ON ST_Intersects(l.way, a9.geometrie) +WHERE l.member_role = 'street' AND l.name != ''), lignes_noms AS -(SELECT * +(SELECT CASE + WHEN GeometryType(way) = 'POINT' THEN ST_MakeLine(ST_Translate(way,-0.000001,-0.000001),ST_Translate(way,0.000001,0.000001)) + WHEN GeometryType(way) LIKE '%POLYGON' THEN ST_ExteriorRing(way) + ELSE way + END AS way_line, + * FROM lignes_brutes WHERE name IS NOT NULL AND (fantoir LIKE '__code_insee__%' OR fantoir = '')), lignes_noms_rang AS (SELECT *, - RANK() OVER(PARTITION BY name,insee_ac ORDER BY within DESC) rang + RANK() OVER(PARTITION BY name,insee_ac ORDER BY within DESC, fantoir DESC) rang FROM lignes_noms), lignes_agregees AS -(SELECT ST_LineMerge(ST_Collect(way)) way, +(SELECT ST_LineMerge(ST_Collect(way_line)) way, name, insee_ac, insee_jointure, @@ -38,7 +71,7 @@ WHERE rang = 1 GROUP BY 2,3,4,5), centroide_lignes_agregees AS -(SELECT ST_Centroid(ST_LineMerge(ST_Collect(way))) way, +(SELECT ST_Centroid(ST_LineMerge(ST_Collect(way_line))) way, name, insee_ac, insee_jointure, From f5e931b4cc744f8cf2eccef84b87d9e5ed437b31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 1 May 2023 18:03:00 +0000 Subject: [PATCH 070/163] indexes --- bano/sql/create_table_base_bano_cibles.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/bano/sql/create_table_base_bano_cibles.sql b/bano/sql/create_table_base_bano_cibles.sql index 4765a4f..34bc9b5 100644 --- a/bano/sql/create_table_base_bano_cibles.sql +++ b/bano/sql/create_table_base_bano_cibles.sql @@ -31,6 +31,7 @@ CREATE TABLE IF NOT EXISTS bano_points_nommes ( geometrie geometry (Point, 4326) GENERATED ALWAYS AS (ST_Point(lon,lat)) STORED); CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_code_insee ON bano_points_nommes (code_insee); +CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_fantoir ON bano_points_nommes (fantoir); CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_code_insee_source ON bano_points_nommes (code_insee,source); CREATE TABLE IF NOT EXISTS nom_fantoir ( From 88b34b6616b1b61c888e49016fe744bf5af2ed95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 1 May 2023 20:54:00 +0000 Subject: [PATCH 071/163] indexes --- bano/sql/create_table_base_bano_cibles.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/bano/sql/create_table_base_bano_cibles.sql b/bano/sql/create_table_base_bano_cibles.sql index 34bc9b5..dddc7a9 100644 --- a/bano/sql/create_table_base_bano_cibles.sql +++ b/bano/sql/create_table_base_bano_cibles.sql @@ -33,6 +33,7 @@ CREATE TABLE IF NOT EXISTS bano_points_nommes ( CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_code_insee ON bano_points_nommes (code_insee); CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_fantoir ON bano_points_nommes (fantoir); CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_code_insee_source ON bano_points_nommes (code_insee,source); +CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_code_insee_nature ON bano_points_nommes (code_insee,nature); CREATE TABLE IF NOT EXISTS nom_fantoir ( fantoir text, From 6814e250ea61674ac753b85afdcfd151e097c2cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 1 May 2023 21:05:49 +0000 Subject: [PATCH 072/163] indexes --- bano/sql/create_table_base_bano_outils.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bano/sql/create_table_base_bano_outils.sql b/bano/sql/create_table_base_bano_outils.sql index c8be3e9..45782fa 100644 --- a/bano/sql/create_table_base_bano_outils.sql +++ b/bano/sql/create_table_base_bano_outils.sql @@ -12,6 +12,8 @@ CREATE TABLE IF NOT EXISTS batch ( ok boolean, CONSTRAINT batch_pkey PRIMARY KEY (id_batch)); +CREATE INDEX IF NOT EXISTS idx_batch_zone ON batch (code_zone); + CREATE TABLE IF NOT EXISTS batch_historique( id_batch integer, etape text, From fb911c2613957e9d5f5ac04564f7ef9880ab2af1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 14 May 2023 09:05:06 +0000 Subject: [PATCH 073/163] cron osm v3 en 4326 --- cron_osm.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cron_osm.sh b/cron_osm.sh index 6eccbce..9334225 100755 --- a/cron_osm.sh +++ b/cron_osm.sh @@ -1,5 +1,7 @@ #!/bin/bash +set -e + SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" source $SCRIPT_DIR/config @@ -16,9 +18,9 @@ echo debut >> $SCRIPT_DIR/cron.log touch ${lockfile} -osmosis --rri workingDirectory=/data/download --wxc /data/download/changes.osc.gz -imposm diff -mapping $SCRIPT_DIR/bano.yml -cachedir /data/bano_imposm_cache -dbschema-production public -diffdir /data/bano_imposm_diff -connection 'postgis://cadastre@localhost/osm'?prefix=NONE -expiretiles-dir $EXPIRE_TILES_DIR -expiretiles-zoom 16 /data/download/changes.osc.gz -$SCRIPT_DIR/update_table_infos_communes.sh +osmosis --rri workingDirectory=${DOWNLOAD_DIR} --wxc ${DOWNLOAD_DIR}/changes.osc.gz +imposm diff -config $SCRIPT_DIR/imposm.config -dbschema-production osm -expiretiles-dir $EXPIRE_TILES_DIR -expiretiles-zoom 16 ${DOWNLOAD_DIR}/changes.osc.gz +#$SCRIPT_DIR/update_table_infos_communes.sh rm ${lockfile} From a5aacfd6e0c6c43a892b7eb998c5fe1fc274c8e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sat, 20 May 2023 20:45:30 +0000 Subject: [PATCH 074/163] batch vers batch_historique --- bano/batch.py | 2 +- bano/sql/batch_start_log.sql | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/bano/batch.py b/bano/batch.py index 782cb93..d480780 100644 --- a/bano/batch.py +++ b/bano/batch.py @@ -21,7 +21,7 @@ def batch_start_log(etape, source=None, code_zone=None, nom_zone=None): if nom_zone: champs = f"{champs},nom_zone" values = f"{values},'{nom_zone}'" - return sql_get_data("batch_start_log", dict(champs=champs, values=values))[0][0] + return sql_get_data("batch_start_log", dict(etape=etape, code_zone=code_zone, champs=champs, values=values))[0][0] def batch_stop_log(id_batch, status): diff --git a/bano/sql/batch_start_log.sql b/bano/sql/batch_start_log.sql index 135123e..878456b 100644 --- a/bano/sql/batch_start_log.sql +++ b/bano/sql/batch_start_log.sql @@ -1,3 +1,11 @@ +INSERT INTO batch_historique +SELECT * +FROM batch +WHERE etape = '__etape__' AND + code_zone = '__code_zone__'; +DELETE FROM batch +WHERE etape = '__etape__' AND + code_zone = '__code_zone__'; INSERT INTO batch(__champs__) VALUES (__values__) RETURNING id_batch; \ No newline at end of file From 4cd90b5b1f405c53a406db099133792c5307aab6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 22 May 2023 20:23:28 +0000 Subject: [PATCH 075/163] batch index --- bano/sql/create_table_base_bano_outils.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/bano/sql/create_table_base_bano_outils.sql b/bano/sql/create_table_base_bano_outils.sql index 45782fa..537bf30 100644 --- a/bano/sql/create_table_base_bano_outils.sql +++ b/bano/sql/create_table_base_bano_outils.sql @@ -13,6 +13,7 @@ CREATE TABLE IF NOT EXISTS batch ( CONSTRAINT batch_pkey PRIMARY KEY (id_batch)); CREATE INDEX IF NOT EXISTS idx_batch_zone ON batch (code_zone); +CREATE INDEX IF NOT EXISTS idx_batch_zone_etape ON batch (code_zone,etape); CREATE TABLE IF NOT EXISTS batch_historique( id_batch integer, From f2356b27548c255c0df35f2a041ef604b4f4e666 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 22 May 2023 21:48:49 +0000 Subject: [PATCH 076/163] noms anciennes communes, fantoir sur 9 --- bano/models.py | 93 +++++++++++-------- .../charge_points_nommes_centroides_OSM.sql | 31 ++++--- ...arge_points_nommes_lieux-dits_CADASTRE.sql | 5 +- bano/sql/charge_points_nommes_places_OSM.sql | 6 +- 4 files changed, 80 insertions(+), 55 deletions(-) diff --git a/bano/models.py b/bano/models.py index 8454f55..4af5c75 100644 --- a/bano/models.py +++ b/bano/models.py @@ -21,17 +21,15 @@ def __init__( source, code_insee, code_insee_ancienne_commune, - lon=None, - lat=None, + nom_ancienne_commune, ): self.code_insee = code_insee self.code_insee_ancienne_commune = code_insee_ancienne_commune self.nom = nom - self.fantoir = fantoir + self.nom_ancienne_commune = nom_ancienne_commune + self.fantoir = fantoir[0:9] if fantoir else None self.nature = nature self.source = source - self.lon = lon - self.lat = lat self.nom_normalise = hp.normalize(nom) def __eq__(self, other): @@ -57,13 +55,12 @@ def __hash__(self): ) def _as_csv_format_bano(self): - return f"{self.fantoir}\t{self.nom}\t{self.nature}\t{self.code_insee}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.source}" + return f"{self.fantoir}\t{self.nom}\t{self.nature}\t{self.code_insee}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}" def add_fantoir(self, topo): if not self.fantoir: self.fantoir = topo.topo.get(self.nom_normalise) - class Noms: def __init__(self, code_insee): self.code_insee = code_insee @@ -99,8 +96,8 @@ def charge_noms_osm_hors_numeros(self): name, tags, libelle_suffixe, - ac_code_insee, - ac_nom, + code_insee_ancienne_commune, + nom_ancienne_commune, nature, ) in data: if provenance in (1, 2, 3, 4, 5): @@ -111,7 +108,8 @@ def charge_noms_osm_hors_numeros(self): nature, "OSM", self.code_insee, - ac_code_insee, + code_insee_ancienne_commune, + nom_ancienne_commune, ) ) if provenance in (6, 7) and tags.get("ref:FR:FANTOIR"): @@ -122,7 +120,8 @@ def charge_noms_osm_hors_numeros(self): nature, "OSM", self.code_insee, - ac_code_insee, + code_insee_ancienne_commune, + nom_ancienne_commune, ) ) @@ -180,6 +179,7 @@ def enregistre(self): "nature", "code_insee", "code_insee_ancienne_commune", + "nom_ancienne_commune", "source", ), ) @@ -198,7 +198,7 @@ def __init__( fantoir=None, code_postal=None, code_insee_ancienne_commune=None, - sous_commune_nom=None, + nom_ancienne_commune=None, ): self.code_insee = code_insee self.x = round(x, 6) @@ -207,10 +207,10 @@ def __init__( self.numero = num self.voie = voie self.place = place - self.fantoir = fantoir + self.fantoir = fantoir[0:9] if fantoir else None self.code_postal = code_postal self.code_insee_ancienne_commune = code_insee_ancienne_commune - self.sous_commune_nom = sous_commune_nom + self.nom_ancienne_commune = nom_ancienne_commune self.voie_normalisee = hp.normalize(self.voie) if self.voie else None self.place_normalisee = hp.format_toponyme(self.place) if self.place else None @@ -237,10 +237,10 @@ def __eq__(self, other): ) def _as_csv_format_bano(self): - return f"{self.fantoir if self.fantoir else ''}\t{self.x}\t{self.y}\t{self.numero}\t{self.voie if self.voie else ''}\t{self.place if self.place else ''}\t{self.code_postal}\t{self.code_insee}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.source}" + return f"{self.fantoir if self.fantoir else ''}\t{self.x}\t{self.y}\t{self.numero}\t{self.voie if self.voie else ''}\t{self.place if self.place else ''}\t{self.code_postal}\t{self.code_insee}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}" def _as_string(self): - return f"source : {self.source}, numero : {self.numero}, voie : {self.voie} ({self.voie_normalisee}), place : {self.place}, fantoir : {self.fantoir}, code_postal:{self.code_postal}, sous_commune : {self.code_insee_ancienne_commune} - {self.sous_commune_nom}" + return f"source : {self.source}, numero : {self.numero}, voie : {self.voie} ({self.voie_normalisee}), place : {self.place}, fantoir : {self.fantoir}, code_postal:{self.code_postal}, sous_commune : {self.code_insee_ancienne_commune} - {self.nom_ancienne_commune}" class Adresses: @@ -287,12 +287,12 @@ def charge_numeros_ban(self, topo): lon, lat, code_postal, - code_insee_ac, - nom_ac, + code_insee_ancienne_commune, + nom_ancienne_commune, ) in data: if id_fantoir: - fantoir9 = f"{id_fantoir[0:5]}{id_fantoir[6:10]}" - fantoir = topo.code_fantoir9_vers_fantoir10.get(fantoir9) + fantoir = f"{id_fantoir[0:5]}{id_fantoir[6:10]}" + # fantoir = topo.code_fantoir9_vers_fantoir10.get(fantoir9) else: fantoir = None self.add_adresse( @@ -305,8 +305,8 @@ def charge_numeros_ban(self, topo): voie=voie, fantoir=fantoir, code_postal=code_postal, - code_insee_ancienne_commune=code_insee_ac, - sous_commune_nom=nom_ac, + code_insee_ancienne_commune=code_insee_ancienne_commune, + nom_ancienne_commune=nom_ancienne_commune, ) ) @@ -325,11 +325,13 @@ def charge_numeros_osm(self): tags, suffixe, code_postal, - code_insee_ac, - nom_ac, + code_insee_ancienne_commune, + nom_ancienne_commune, ) in data: fantoir = tags.get("ref:FR:FANTOIR") + if fantoir : + fantoir = fantoir[0:9] if fantoir and not hp.fantoir_valide(fantoir, self.code_insee): continue @@ -348,8 +350,8 @@ def charge_numeros_osm(self): place=place, fantoir=fantoir, code_postal=code_postal, - code_insee_ancienne_commune=code_insee_ac, - sous_commune_nom=nom_ac, + code_insee_ancienne_commune=code_insee_ancienne_commune, + nom_ancienne_commune=nom_ancienne_commune, ) ) if provenance in ( @@ -367,8 +369,8 @@ def charge_numeros_osm(self): place=None, fantoir=fantoir, code_postal=code_postal, - code_insee_ancienne_commune=code_insee_ac, - sous_commune_nom=nom_ac, + code_insee_ancienne_commune=code_insee_ancienne_commune, + nom_ancienne_commune=nom_ancienne_commune, ) ) if ( @@ -392,8 +394,8 @@ def charge_numeros_osm(self): place=None, fantoir=tags["ref:FR:FANTOIR"], code_postal=code_postal, - code_insee_ancienne_commune=code_insee_ac, - sous_commune_nom=nom_ac, + code_insee_ancienne_commune=code_insee_ancienne_commune, + nom_ancienne_commune=nom_ancienne_commune, ) ) @@ -408,6 +410,7 @@ def noms_des_adresses(self, noms): a.source, self.code_insee, a.code_insee_ancienne_commune, + a.nom_ancienne_commune, ) ) if a.place: @@ -419,6 +422,7 @@ def noms_des_adresses(self, noms): a.source, self.code_insee, a.code_insee_ancienne_commune, + a.nom_ancienne_commune, ) ) @@ -460,6 +464,7 @@ def enregistre(self): "code_postal", "code_insee", "code_insee_ancienne_commune", + "nom_ancienne_commune", "source", ), ) @@ -476,6 +481,7 @@ def __init__( nom, fantoir=None, code_insee_ancienne_commune=None, + nom_ancienne_commune=None, ): self.code_insee = code_insee self.source = source @@ -483,8 +489,9 @@ def __init__( self.lat = round(lat, 6) self.nature = nature self.nom = nom - self.fantoir = fantoir + self.fantoir = fantoir[0:9] if fantoir else None self.code_insee_ancienne_commune = code_insee_ancienne_commune + self.nom_ancienne_commune = nom_ancienne_commune def __hash__(self): return hash( @@ -503,7 +510,7 @@ def _as_string(self): return f"source : {self.source}, nom : {self.nom}, nature : {self.nature}, sous_commune : {self.code_insee_ancienne_commune}" def _as_csv_format_bano(self): - return f"{self.fantoir if self.fantoir else ''}\t{self.nom}\t{self.code_insee}\t{self.nature}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.source}\t{self.lon}\t{self.lat}" + return f"{self.fantoir if self.fantoir else ''}\t{self.nom}\t{self.code_insee}\t{self.nature}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}\t{self.lon}\t{self.lat}" class Points_nommes: @@ -527,7 +534,7 @@ def charge_points_nommes_lieux_dits_cadastre(self): "charge_points_nommes_lieux-dits_CADASTRE", dict(code_insee=self.code_insee), ) - for x, y, nom, code_insee_ac in data: + for x, y, nom, code_insee_ancienne_commune,nom_ancienne_commune in data: self.add_point_nomme( Point_nomme( self.code_insee, @@ -536,7 +543,8 @@ def charge_points_nommes_lieux_dits_cadastre(self): x, y, hp.format_toponyme(nom), - code_insee_ancienne_commune=code_insee_ac, + code_insee_ancienne_commune=code_insee_ancienne_commune, + nom_ancienne_commune=nom_ancienne_commune, ) ) @@ -545,7 +553,7 @@ def charge_points_nommes_centroides_osm(self): "charge_points_nommes_centroides_OSM", dict(code_insee=self.code_insee), ) - for x, y, nom, code_insee_ac, fantoir in data: + for x, y, nom, code_insee_ancienne_commune, fantoir, nom_ancienne_commune in data: self.add_point_nomme( Point_nomme( self.code_insee, @@ -554,8 +562,9 @@ def charge_points_nommes_centroides_osm(self): x, y, nom, - code_insee_ancienne_commune=code_insee_ac, + code_insee_ancienne_commune=code_insee_ancienne_commune, fantoir=fantoir, + nom_ancienne_commune=nom_ancienne_commune, ) ) @@ -564,7 +573,7 @@ def charge_points_nommes_place_osm(self): "charge_points_nommes_places_OSM", dict(code_insee=self.code_insee), ) - for x, y, nom, code_insee_ac, fantoir in data: + for x, y, nom, code_insee_ancienne_commune, fantoir, nom_ancienne_commune in data: self.add_point_nomme( Point_nomme( self.code_insee, @@ -573,8 +582,9 @@ def charge_points_nommes_place_osm(self): x, y, nom, - code_insee_ancienne_commune=code_insee_ac, + code_insee_ancienne_commune=code_insee_ancienne_commune, fantoir=fantoir, + nom_ancienne_commune=nom_ancienne_commune, ) ) @@ -592,6 +602,7 @@ def noms_des_points_nommes(self, noms): a.source, self.code_insee, a.code_insee_ancienne_commune, + a.nom_ancienne_commune, ) ) if a.source == "OSM": @@ -603,6 +614,7 @@ def noms_des_points_nommes(self, noms): a.source, self.code_insee, a.code_insee_ancienne_commune, + a.nom_ancienne_commune, ) ) @@ -637,6 +649,7 @@ def enregistre(self): "code_insee", "nature", "code_insee_ancienne_commune", + "nom_ancienne_commune", "source", "lon", "lat", @@ -648,7 +661,7 @@ class Topo: def __init__(self, code_insee): self.code_insee = code_insee self.topo = OrderedDict() - self.code_fantoir9_vers_fantoir10 = {} + # self.code_fantoir9_vers_fantoir10 = {} # self.index_by_nom_normalise = defaultdict(list) @@ -673,5 +686,5 @@ def charge_topo(self): for fantoir, nom in data: nom = hp.normalize(" ".join(nom.replace("-", " ").split())) self.topo[fantoir] = nom - self.code_fantoir9_vers_fantoir10[fantoir[0:9]] = fantoir + # self.code_fantoir9_vers_fantoir10[fantoir[0:9]] = fantoir self.topo[nom] = fantoir diff --git a/bano/sql/charge_points_nommes_centroides_OSM.sql b/bano/sql/charge_points_nommes_centroides_OSM.sql index b988462..7277d39 100644 --- a/bano/sql/charge_points_nommes_centroides_OSM.sql +++ b/bano/sql/charge_points_nommes_centroides_OSM.sql @@ -6,7 +6,8 @@ AS COALESCE(a9.code_insee,'xxxxx') as insee_jointure, a9.code_insee insee_ac, unnest(array["ref:FR:FANTOIR","ref:FR:FANTOIR:left","ref:FR:FANTOIR:right"]) AS fantoir, - ST_Within(l.way,p.way)::integer as within + ST_Within(l.way,p.way)::integer as within, + a9.nom AS nom_ac FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p JOIN planet_osm_line l ON ST_Intersects(l.way, p.way) @@ -21,7 +22,8 @@ SELECT ST_PointOnSurface(l.way), COALESCE(a9.code_insee,'xxxxx') as insee_jointure, a9.code_insee insee_ac, "ref:FR:FANTOIR" AS fantoir, - ST_Within(l.way,p.geometrie)::integer as within + ST_Within(l.way,p.geometrie)::integer as within, + a9.nom AS nom_ac FROM (SELECT geometrie FROM polygones_insee WHERE code_insee = '__code_insee__') p JOIN planet_osm_polygon l ON ST_Intersects(l.way, p.geometrie) @@ -35,7 +37,8 @@ SELECT l.way, COALESCE(a9.code_insee,'xxxxx') as insee_jointure, a9.code_insee insee_ac, "ref:FR:FANTOIR" AS fantoir, - ST_Within(l.way,p.way)::integer as within + ST_Within(l.way,p.way)::integer as within, + a9.nom AS nom_ac FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p JOIN planet_osm_rels l ON ST_Intersects(l.way, p.way) @@ -65,33 +68,37 @@ AS name, insee_ac, insee_jointure, - fantoir + fantoir, + nom_ac FROM lignes_noms_rang WHERE rang = 1 -GROUP BY 2,3,4,5), +GROUP BY 2,3,4,5,6), centroide_lignes_agregees AS (SELECT ST_Centroid(ST_LineMerge(ST_Collect(way_line))) way, name, insee_ac, insee_jointure, - fantoir + fantoir, + nom_ac FROM lignes_noms_rang WHERE rang = 1 -GROUP BY 2,3,4,5), +GROUP BY 2,3,4,5,6), resultat AS (SELECT ST_SetSRID(ST_ClosestPoint(lignes_agregees.way,centroide_lignes_agregees.way),4326) point, lignes_agregees.name, lignes_agregees.insee_ac, - lignes_agregees.fantoir + lignes_agregees.fantoir, + lignes_agregees.nom_ac FROM lignes_agregees JOIN centroide_lignes_agregees USING (name,insee_jointure)), complement AS (SELECT c.*, - a9.code_insee AS insee_ac + a9.code_insee AS insee_ac, + a9.nom AS nom_ac FROM (SELECT pl.way point, pl.name, pl."ref:FR:FANTOIR" fantoir @@ -121,12 +128,14 @@ SELECT ST_x(point), ST_y(point), name, insee_ac, - fantoir + fantoir, + nom_ac FROM resultat UNION ALL SELECT ST_x(point), ST_y(point), name, insee_ac, - fantoir + fantoir, + nom_ac FROM complement; diff --git a/bano/sql/charge_points_nommes_lieux-dits_CADASTRE.sql b/bano/sql/charge_points_nommes_lieux-dits_CADASTRE.sql index 160e6bc..a605d5b 100644 --- a/bano/sql/charge_points_nommes_lieux-dits_CADASTRE.sql +++ b/bano/sql/charge_points_nommes_lieux-dits_CADASTRE.sql @@ -1,12 +1,13 @@ SELECT ST_X(geom_centroid), ST_Y(geom_centroid), regexp_replace(regexp_replace(ld.nom,' ',' ','g'),' ',' ','g') AS nom, - a9.code_insee + a9.code_insee, + a9.nom FROM (SELECT * FROM lieux_dits WHERE code_insee = '__code_insee__' AND nom IS NOT NULL) ld LEFT OUTER JOIN suffixe h ON ST_Intersects(ld.geom_centroid, h.geometrie) -LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 where insee_a8 = '__code_insee__') a9 +LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 WHERE insee_a8 = '__code_insee__') a9 ON ST_Contains(a9.geometrie,ld.geom_centroid); \ No newline at end of file diff --git a/bano/sql/charge_points_nommes_places_OSM.sql b/bano/sql/charge_points_nommes_places_OSM.sql index 02cf7ce..08a2bbb 100644 --- a/bano/sql/charge_points_nommes_places_OSM.sql +++ b/bano/sql/charge_points_nommes_places_OSM.sql @@ -6,7 +6,8 @@ AS tags, place, a9.code_insee AS insee_ac, - "ref:FR:FANTOIR" AS fantoir + "ref:FR:FANTOIR" AS fantoir, + a9.nom AS nom_ac FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__') p JOIN (SELECT * FROM planet_osm_point WHERE place != '' AND name != '') pt ON pt.way && p.way AND @@ -17,6 +18,7 @@ SELECT ST_x(way), ST_y(way), name, insee_ac, - fantoir + fantoir, + nom_ac FROM pts WHERE name IS NOT NULL; From 8c407403d18f19c53a0a3232754f3312f77318c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 29 May 2023 10:39:03 +0000 Subject: [PATCH 077/163] Fantoirs fictifs de la BAN --- bano/bin.py | 11 ++++++- bano/rapprochement.py | 2 ++ bano/sources/ban2topo.py | 33 +++++++++++++++++++ bano/sql/add_noms_ban_dans_topo.sql | 2 ++ bano/sql/charge_topo_commune.sql | 2 +- .../noms_ban_non_rapproches_par_commune.sql | 15 +++++++++ bano/sql/purge_noms_ban_dans_topo.sql | 3 ++ 7 files changed, 66 insertions(+), 2 deletions(-) create mode 100644 bano/sources/ban2topo.py create mode 100644 bano/sql/add_noms_ban_dans_topo.sql create mode 100644 bano/sql/noms_ban_non_rapproches_par_commune.sql create mode 100644 bano/sql/purge_noms_ban_dans_topo.sql diff --git a/bano/bin.py b/bano/bin.py index 42eb1ff..68981ce 100644 --- a/bano/bin.py +++ b/bano/bin.py @@ -8,7 +8,7 @@ from . import setup_db from . import rapprochement from . import boite_a_outils -from .sources import topo, ban, cog, cadastre_ld +from .sources import topo, ban, cog, cadastre_ld, ban2topo from .constants import DEPARTEMENTS @@ -103,6 +103,15 @@ def main(): ) subparser.set_defaults(func=rapprochement.process) + subparser = subparsers.add_parser( + "ban2topo", + help="Ajoute dans TOPO des libellés BAN", + description="Ajoute dans TOPO des libellés BAN" + ) + subparser.add_argument( + "--code_insee", type=str, help="Code INSEE de la commune à traiter" + ) + subparser.set_defaults(func=ban2topo.process) args = parser.parse_args() try: diff --git a/bano/rapprochement.py b/bano/rapprochement.py index 9dcd400..a30ccc9 100644 --- a/bano/rapprochement.py +++ b/bano/rapprochement.py @@ -4,11 +4,13 @@ from . import db_helpers as h from . import batch as b from .models import Noms, Adresses, Topo, Points_nommes +from .sources import ban2topo def process_unitaire(code_insee): id_batch = b.batch_start_log("rapprochement", "toutes", code_insee) try: + ban2topo.process(code_insee) topo = Topo(code_insee) adresses = Adresses(code_insee) points_nommes = Points_nommes(code_insee) diff --git a/bano/sources/ban2topo.py b/bano/sources/ban2topo.py new file mode 100644 index 0000000..915a3ed --- /dev/null +++ b/bano/sources/ban2topo.py @@ -0,0 +1,33 @@ +from bano import db +from bano import models +from ..sql import sql_process,sql_get_data +from ..helpers import normalize,get_code_dept_from_insee +# from . import topo + + +def get_noms_ban(code_insee): + return sql_get_data('noms_ban_non_rapproches_par_commune',dict(code_insee=code_insee)) + +def purge_noms_ban_dans_topo(code_insee): + sql_process('purge_noms_ban_dans_topo',dict(code_insee=code_insee)) + +def add_noms_ban_dans_topo(code_insee,noms_ban): + sql_process('add_noms_ban_dans_topo',dict(code_insee=code_insee,noms_ban=noms_ban)) + +def pseudo_fantoir(index,code_insee): + return f"{code_insee}b{str(index).ljust(3,'b')}" + +def process(code_insee,**kwargs): + purge_noms_ban_dans_topo(code_insee) + noms_ban = get_noms_ban(code_insee) + if len(noms_ban) > 0: + noms_ban_norm = set() + topo = models.Topo(code_insee) + dept = get_code_dept_from_insee(code_insee) + for i,n in enumerate(noms_ban): + nom_norm = normalize(n[0]) + if not nom_norm in topo.topo: + noms_ban_norm.add(f"'{dept}','{code_insee}','{pseudo_fantoir(i,code_insee)}',' ','{nom_norm}','B','B','0000000'") + print(f"'{dept}','{code_insee}','{pseudo_fantoir(i,code_insee)}',' ','{nom_norm}','B','B','0000000'") + if len(noms_ban_norm)>0: + add_noms_ban_dans_topo(code_insee, f"({'),('.join(noms_ban_norm)})") diff --git a/bano/sql/add_noms_ban_dans_topo.sql b/bano/sql/add_noms_ban_dans_topo.sql new file mode 100644 index 0000000..b282dcf --- /dev/null +++ b/bano/sql/add_noms_ban_dans_topo.sql @@ -0,0 +1,2 @@ +INSERT INTO topo(code_dep,code_insee,fantoir,nature_voie,libelle_voie,type_voie,caractere_annul,date_annul) +VALUES __noms_ban__; diff --git a/bano/sql/charge_topo_commune.sql b/bano/sql/charge_topo_commune.sql index bcd3a64..6666d4c 100644 --- a/bano/sql/charge_topo_commune.sql +++ b/bano/sql/charge_topo_commune.sql @@ -5,7 +5,7 @@ AS TRIM (BOTH FROM (COALESCE(nature_voie,'')||' '||libelle_voie)) AS nom FROM topo WHERE code_insee = '__code_insee__' AND - caractere_annul IS NULL), + COALESCE(caractere_annul,'x') NOT IN ('O','Q')), tr AS (SELECT fantoir, diff --git a/bano/sql/noms_ban_non_rapproches_par_commune.sql b/bano/sql/noms_ban_non_rapproches_par_commune.sql new file mode 100644 index 0000000..c467473 --- /dev/null +++ b/bano/sql/noms_ban_non_rapproches_par_commune.sql @@ -0,0 +1,15 @@ +SELECT nom_voie +FROM ban +WHERE code_insee = '__code_insee__' AND + id_fantoir IS NULL +EXCEPT +SELECT nom +FROM (SELECT * + FROM nom_fantoir + WHERE code_insee = '__code_insee__' AND + source = 'BAN')n +JOIN (SELECT fantoir + FROM topo + WHERE code_insee = '__code_insee__' AND + caractere_annul = 'B') t +USING (fantoir); \ No newline at end of file diff --git a/bano/sql/purge_noms_ban_dans_topo.sql b/bano/sql/purge_noms_ban_dans_topo.sql new file mode 100644 index 0000000..4429f25 --- /dev/null +++ b/bano/sql/purge_noms_ban_dans_topo.sql @@ -0,0 +1,3 @@ +DELETE FROM topo +WHERE code_insee = '__code_insee__' AND + type_voie = 'B'; \ No newline at end of file From 8f5e9920fd517f3406876bde234d8d5ce7350f63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 29 May 2023 10:39:49 +0000 Subject: [PATCH 078/163] caracteres parasite dans les LD --- bano/sql/charge_points_nommes_lieux-dits_CADASTRE.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bano/sql/charge_points_nommes_lieux-dits_CADASTRE.sql b/bano/sql/charge_points_nommes_lieux-dits_CADASTRE.sql index a605d5b..334f443 100644 --- a/bano/sql/charge_points_nommes_lieux-dits_CADASTRE.sql +++ b/bano/sql/charge_points_nommes_lieux-dits_CADASTRE.sql @@ -1,6 +1,6 @@ SELECT ST_X(geom_centroid), ST_Y(geom_centroid), - regexp_replace(regexp_replace(ld.nom,' ',' ','g'),' ',' ','g') AS nom, + regexp_replace(regexp_replace(regexp_replace(regexp_replace(ld.nom,' ',' ','g'),' ',' ','g'),' ',' ','g'),' ',' ','g') AS nom, a9.code_insee, a9.nom FROM (SELECT * From 3ebf74c8b73af1efc2936fe427a5312c00102982 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 29 May 2023 13:51:38 +0000 Subject: [PATCH 079/163] Fantoirs fictifs de la BAN - print en trop --- bano/sources/ban2topo.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bano/sources/ban2topo.py b/bano/sources/ban2topo.py index 915a3ed..a2da3bf 100644 --- a/bano/sources/ban2topo.py +++ b/bano/sources/ban2topo.py @@ -28,6 +28,5 @@ def process(code_insee,**kwargs): nom_norm = normalize(n[0]) if not nom_norm in topo.topo: noms_ban_norm.add(f"'{dept}','{code_insee}','{pseudo_fantoir(i,code_insee)}',' ','{nom_norm}','B','B','0000000'") - print(f"'{dept}','{code_insee}','{pseudo_fantoir(i,code_insee)}',' ','{nom_norm}','B','B','0000000'") if len(noms_ban_norm)>0: add_noms_ban_dans_topo(code_insee, f"({'),('.join(noms_ban_norm)})") From 05cd35282086a4168ad78fcb9e753afc757570ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 29 May 2023 22:11:23 +0000 Subject: [PATCH 080/163] Statut pour les lieux-dits limitrophes --- bano/sql/create_table_base_bano_pifometre.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bano/sql/create_table_base_bano_pifometre.sql b/bano/sql/create_table_base_bano_pifometre.sql index 8da082b..39bd8ca 100644 --- a/bano/sql/create_table_base_bano_pifometre.sql +++ b/bano/sql/create_table_base_bano_pifometre.sql @@ -34,7 +34,8 @@ VALUES (0,0,'Ok'), (17,16,'Erreur de commune'), (18,17,'FANTOIR annulé non remplacé'), (19,18,'Point cardinal superflu'), -(20,19,'Voie en projet'); +(20,19,'Voie en projet'), +(21,20,'Lieu-dit sur plusieurs communes'); CREATE TABLE IF NOT EXISTS statut_numero ( numero text, From aee41a1859b50c35076e55360d20232410ef9dc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Tue, 30 May 2023 22:10:27 +0000 Subject: [PATCH 081/163] +type voie GRAND RUE --- bano/dictionnaires/abrev_type_voie.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/bano/dictionnaires/abrev_type_voie.txt b/bano/dictionnaires/abrev_type_voie.txt index 73d554b..a5a82be 100644 --- a/bano/dictionnaires/abrev_type_voie.txt +++ b/bano/dictionnaires/abrev_type_voie.txt @@ -82,6 +82,7 @@ GRAND BOULEVARD GBD GRAND PLACE GPL GR GR GR GR GRANDE GR +GR GRAND GR GRAND RUE GR GRANDE RUE GR GROUPE SCOLAIRE GROUP SCOL From 757802a5086a56341fa3b40c0bd06cf0d2499d45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Fri, 2 Jun 2023 22:19:37 +0000 Subject: [PATCH 082/163] dedoublonnage des numeros OSM --- bano/models.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/bano/models.py b/bano/models.py index 4af5c75..6544c63 100644 --- a/bano/models.py +++ b/bano/models.py @@ -125,7 +125,7 @@ def charge_noms_osm_hors_numeros(self): ) ) - # On ajoute un nom s'il n'a pas de FANTOIR ou si son FANTOIR appartient à la commune + # On ajoute un triplet nom s'il n'a pas de FANTOIR ou si son FANTOIR appartient à la commune def add_nom(self, nom=Nom): if not nom.fantoir or nom.fantoir[0:5] == self.code_insee: self.triplets_nom_fantoir_source.append(nom) @@ -444,7 +444,8 @@ def enregistre(self): dict(code_insee=self.code_insee), ) io_in_csv = io.StringIO() - for a in self: + + for a in set(self.liste): #passage en set pour dedoublonner les adresses de provenances multiples io_in_csv.write( a._as_csv_format_bano() + "\n" ) # separateur $ car on trouve des virgules dans le contenu @@ -489,25 +490,26 @@ def __init__( self.lat = round(lat, 6) self.nature = nature self.nom = nom + self.nom_normalise = hp.normalize(nom) self.fantoir = fantoir[0:9] if fantoir else None self.code_insee_ancienne_commune = code_insee_ancienne_commune self.nom_ancienne_commune = nom_ancienne_commune def __hash__(self): return hash( - (self.code_insee, self.source, self.nom, self.code_insee_ancienne_commune) + (self.code_insee, self.source, self.nom_normalise, self.code_insee_ancienne_commune) ) def __eq__(self, other): return ( self.code_insee == other.code_insee and self.source == other.source - and self.nom == other.nom + and self.nom_normalise == other.nom_normalise and self.code_insee_ancienne_commune == other.code_insee_ancienne_commune ) def _as_string(self): - return f"source : {self.source}, nom : {self.nom}, nature : {self.nature}, sous_commune : {self.code_insee_ancienne_commune}" + return f"source : {self.source}, nom : {self.nom} ({self.nom_normalise}), nature : {self.nature}, sous_commune : {self.code_insee_ancienne_commune}" def _as_csv_format_bano(self): return f"{self.fantoir if self.fantoir else ''}\t{self.nom}\t{self.code_insee}\t{self.nature}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}\t{self.lon}\t{self.lat}" From b676ec87e027aac61b172784937e5a9110e4522c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 4 Jun 2023 14:25:02 +0000 Subject: [PATCH 083/163] conversion Fantoir -> Topo au besoin --- utils/fantoir2topo.sh | 5 +++++ utils/fantoir2topo.sql | 15 +++++++++++++++ 2 files changed, 20 insertions(+) create mode 100755 utils/fantoir2topo.sh create mode 100644 utils/fantoir2topo.sql diff --git a/utils/fantoir2topo.sh b/utils/fantoir2topo.sh new file mode 100755 index 0000000..ca385c5 --- /dev/null +++ b/utils/fantoir2topo.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +psql -d cadastre -U cadastre --csv -f fantoir2topo.sql > fantoir_202304.csv +cat fantoir_202304.csv|psql -d bano -U cadastre -c"TRUNCATE topo;COPY topo FROM STDIN WITH CSV HEADER" +rm fantoir_202304.csv \ No newline at end of file diff --git a/utils/fantoir2topo.sql b/utils/fantoir2topo.sql new file mode 100644 index 0000000..6ba8622 --- /dev/null +++ b/utils/fantoir2topo.sql @@ -0,0 +1,15 @@ +SELECT code_dept, + code_insee, + fantoir, + nature_voie, + libelle_voie, + caractere_voie, + TRIM (both FROM caractere_annul), + CASE date_annul + WHEN '0000000' then 0 + ELSE to_char(to_timestamp(date_annul::text, 'YYYYDDD'),'YYYYMMDD')::integer + END, + TO_CHAR(TO_TIMESTAMP(date_creation::text,'YYYYDDD'),'YYYYMMDD')::integer, + type_voie, + dernier_mot +FROM fantoir_voie; From 1ef22bb8c525a7d324f431afca3ae4e8a52197fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 4 Jun 2023 17:42:57 +0000 Subject: [PATCH 084/163] index --- bano/sql/create_table_base_bano_cibles.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/bano/sql/create_table_base_bano_cibles.sql b/bano/sql/create_table_base_bano_cibles.sql index dddc7a9..88c71a8 100644 --- a/bano/sql/create_table_base_bano_cibles.sql +++ b/bano/sql/create_table_base_bano_cibles.sql @@ -32,6 +32,7 @@ CREATE TABLE IF NOT EXISTS bano_points_nommes ( CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_code_insee ON bano_points_nommes (code_insee); CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_fantoir ON bano_points_nommes (fantoir); +CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_fantoir_source ON bano_points_nommes (fantoir,source); CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_code_insee_source ON bano_points_nommes (code_insee,source); CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_code_insee_nature ON bano_points_nommes (code_insee,nature); From 1a588f164a46639da8fecc50b743d7ef6c310a7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 5 Jun 2023 21:53:36 +0000 Subject: [PATCH 085/163] code dept + TAB dans les noms --- bano/models.py | 16 +++++++++++----- bano/sql/create_table_base_bano_cibles.sql | 6 ++++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/bano/models.py b/bano/models.py index 6544c63..b7945f6 100644 --- a/bano/models.py +++ b/bano/models.py @@ -24,8 +24,9 @@ def __init__( nom_ancienne_commune, ): self.code_insee = code_insee + self.code_dept = hp.get_code_dept_from_insee(code_insee) self.code_insee_ancienne_commune = code_insee_ancienne_commune - self.nom = nom + self.nom = nom.replace('\t',' ') self.nom_ancienne_commune = nom_ancienne_commune self.fantoir = fantoir[0:9] if fantoir else None self.nature = nature @@ -55,7 +56,7 @@ def __hash__(self): ) def _as_csv_format_bano(self): - return f"{self.fantoir}\t{self.nom}\t{self.nature}\t{self.code_insee}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}" + return f"{self.fantoir}\t{self.nom}\t{self.nature}\t{self.code_insee}\t{self.code_dept}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}" def add_fantoir(self, topo): if not self.fantoir: @@ -178,6 +179,7 @@ def enregistre(self): "nom", "nature", "code_insee", + "code_dept", "code_insee_ancienne_commune", "nom_ancienne_commune", "source", @@ -201,6 +203,7 @@ def __init__( nom_ancienne_commune=None, ): self.code_insee = code_insee + self.code_dept = hp.get_code_dept_from_insee(code_insee) self.x = round(x, 6) self.y = round(y, 6) self.source = source @@ -237,7 +240,7 @@ def __eq__(self, other): ) def _as_csv_format_bano(self): - return f"{self.fantoir if self.fantoir else ''}\t{self.x}\t{self.y}\t{self.numero}\t{self.voie if self.voie else ''}\t{self.place if self.place else ''}\t{self.code_postal}\t{self.code_insee}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}" + return f"{self.fantoir if self.fantoir else ''}\t{self.x}\t{self.y}\t{self.numero}\t{self.voie if self.voie else ''}\t{self.place if self.place else ''}\t{self.code_postal}\t{self.code_insee}\t{self.code_dept}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}" def _as_string(self): return f"source : {self.source}, numero : {self.numero}, voie : {self.voie} ({self.voie_normalisee}), place : {self.place}, fantoir : {self.fantoir}, code_postal:{self.code_postal}, sous_commune : {self.code_insee_ancienne_commune} - {self.nom_ancienne_commune}" @@ -464,6 +467,7 @@ def enregistre(self): "nom_place", "code_postal", "code_insee", + "code_dept", "code_insee_ancienne_commune", "nom_ancienne_commune", "source", @@ -485,11 +489,12 @@ def __init__( nom_ancienne_commune=None, ): self.code_insee = code_insee + self.code_dept = hp.get_code_dept_from_insee(code_insee) self.source = source self.lon = round(lon, 6) self.lat = round(lat, 6) self.nature = nature - self.nom = nom + self.nom = nom.replace('\t',' ') self.nom_normalise = hp.normalize(nom) self.fantoir = fantoir[0:9] if fantoir else None self.code_insee_ancienne_commune = code_insee_ancienne_commune @@ -512,7 +517,7 @@ def _as_string(self): return f"source : {self.source}, nom : {self.nom} ({self.nom_normalise}), nature : {self.nature}, sous_commune : {self.code_insee_ancienne_commune}" def _as_csv_format_bano(self): - return f"{self.fantoir if self.fantoir else ''}\t{self.nom}\t{self.code_insee}\t{self.nature}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}\t{self.lon}\t{self.lat}" + return f"{self.fantoir if self.fantoir else ''}\t{self.nom}\t{self.code_insee}\t{self.code_dept}\t{self.nature}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}\t{self.lon}\t{self.lat}" class Points_nommes: @@ -649,6 +654,7 @@ def enregistre(self): "fantoir", "nom", "code_insee", + "code_dept", "nature", "code_insee_ancienne_commune", "nom_ancienne_commune", diff --git a/bano/sql/create_table_base_bano_cibles.sql b/bano/sql/create_table_base_bano_cibles.sql index 88c71a8..f30b966 100644 --- a/bano/sql/create_table_base_bano_cibles.sql +++ b/bano/sql/create_table_base_bano_cibles.sql @@ -7,6 +7,7 @@ CREATE TABLE IF NOT EXISTS bano_adresses ( nom_place text, code_postal text, code_insee text, + code_dept text, code_insee_ancienne_commune text, nom_ancienne_commune text, source text, @@ -15,6 +16,7 @@ CREATE TABLE IF NOT EXISTS bano_adresses ( CREATE INDEX IF NOT EXISTS gidx_bano_adresses ON bano_adresses USING GIST(geometrie); CREATE INDEX IF NOT EXISTS idx_bano_adresses_code_insee ON bano_adresses (code_insee); +CREATE INDEX IF NOT EXISTS idx_bano_adresses_code_dept ON bano_adresses (code_dept); CREATE INDEX IF NOT EXISTS idx_bano_adresses_fantoir ON bano_adresses (fantoir); CREATE INDEX IF NOT EXISTS idx_bano_adresses_pifo_code_insee_source ON bano_adresses (code_insee,source); @@ -22,6 +24,7 @@ CREATE TABLE IF NOT EXISTS bano_points_nommes ( fantoir text, nom text, code_insee text, + code_dept text, nature text, code_insee_ancienne_commune text, nom_ancienne_commune text, @@ -31,6 +34,7 @@ CREATE TABLE IF NOT EXISTS bano_points_nommes ( geometrie geometry (Point, 4326) GENERATED ALWAYS AS (ST_Point(lon,lat)) STORED); CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_code_insee ON bano_points_nommes (code_insee); +CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_code_dept ON bano_points_nommes (code_dept); CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_fantoir ON bano_points_nommes (fantoir); CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_fantoir_source ON bano_points_nommes (fantoir,source); CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_code_insee_source ON bano_points_nommes (code_insee,source); @@ -40,12 +44,14 @@ CREATE TABLE IF NOT EXISTS nom_fantoir ( fantoir text, nom text, code_insee text, + code_dept text, nature text, code_insee_ancienne_commune text, nom_ancienne_commune text, source text); CREATE INDEX IF NOT EXISTS idx_nom_fantoir_code_insee ON nom_fantoir (code_insee); +CREATE INDEX IF NOT EXISTS idx_nom_fantoir_code_dept ON nom_fantoir (code_dept); CREATE INDEX IF NOT EXISTS idx_nom_fantoir_fantoir ON nom_fantoir (fantoir); CREATE INDEX IF NOT EXISTS idx_nom_fantoir_code_insee_source ON nom_fantoir (code_insee,source); From 96da3830a772c1e0d672a82b3a93dd5f5b16d814 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Tue, 6 Jun 2023 22:26:11 +0000 Subject: [PATCH 086/163] substitution de fantoir BAN par OSM --- bano/models.py | 53 ++++++++++++++++------ bano/rapprochement.py | 11 +++-- bano/sql/create_table_base_bano_cibles.sql | 7 +++ 3 files changed, 53 insertions(+), 18 deletions(-) diff --git a/bano/models.py b/bano/models.py index b7945f6..27d31a0 100644 --- a/bano/models.py +++ b/bano/models.py @@ -8,10 +8,8 @@ from .db import bano_db from . import helpers as hp -# from .sources import fantoir from .sql import sql_get_data, sql_process - class Nom: def __init__( self, @@ -55,8 +53,8 @@ def __hash__(self): ) ) - def _as_csv_format_bano(self): - return f"{self.fantoir}\t{self.nom}\t{self.nature}\t{self.code_insee}\t{self.code_dept}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}" + def _as_csv_format_bano(self,correspondance): + return f"{correspondance.get(self.fantoir,self.fantoir)}\t{self.nom}\t{self.nature}\t{self.code_insee}\t{self.code_dept}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}" def add_fantoir(self, topo): if not self.fantoir: @@ -159,7 +157,7 @@ def remplit_fantoir_par_nom_sous_commune(self): # # print(f"{branche} - {nom}") # # print(f"{branche} - {nom} > {self.fantoir_par_nom_sous_commune[branche][nom]}") - def enregistre(self): + def enregistre(self,correspondance): sql_process( "suppression_noms_commune", dict(code_insee=self.code_insee), @@ -167,7 +165,7 @@ def enregistre(self): io_in_csv = io.StringIO() for t in set(self.triplets_nom_fantoir_source): if t.fantoir: - io_in_csv.write(t._as_csv_format_bano() + "\n") + io_in_csv.write(t._as_csv_format_bano(correspondance) + "\n") io_in_csv.seek(0) with bano_db.cursor() as cur_insert: cur_insert.copy_from( @@ -239,8 +237,8 @@ def __eq__(self, other): and self.code_insee_ancienne_commune == other.code_insee_ancienne_commune ) - def _as_csv_format_bano(self): - return f"{self.fantoir if self.fantoir else ''}\t{self.x}\t{self.y}\t{self.numero}\t{self.voie if self.voie else ''}\t{self.place if self.place else ''}\t{self.code_postal}\t{self.code_insee}\t{self.code_dept}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}" + def _as_csv_format_bano(self,correspondance): + return f"{correspondance.get(self.fantoir,self.fantoir) if self.fantoir else ''}\t{self.x}\t{self.y}\t{self.numero}\t{self.voie if self.voie else ''}\t{self.place if self.place else ''}\t{self.code_postal}\t{self.code_insee}\t{self.code_dept}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}" def _as_string(self): return f"source : {self.source}, numero : {self.numero}, voie : {self.voie} ({self.voie_normalisee}), place : {self.place}, fantoir : {self.fantoir}, code_postal:{self.code_postal}, sous_commune : {self.code_insee_ancienne_commune} - {self.nom_ancienne_commune}" @@ -441,7 +439,7 @@ def complete_fantoir(self, noms): else: a.fantoir = noms.fantoir_par_nom_sous_commune.get(nom) - def enregistre(self): + def enregistre(self,correspondance): sql_process( "suppression_adresses_commune", dict(code_insee=self.code_insee), @@ -450,7 +448,7 @@ def enregistre(self): for a in set(self.liste): #passage en set pour dedoublonner les adresses de provenances multiples io_in_csv.write( - a._as_csv_format_bano() + "\n" + a._as_csv_format_bano(correspondance) + "\n" ) # separateur $ car on trouve des virgules dans le contenu io_in_csv.seek(0) with bano_db.cursor() as cur_insert: @@ -516,8 +514,8 @@ def __eq__(self, other): def _as_string(self): return f"source : {self.source}, nom : {self.nom} ({self.nom_normalise}), nature : {self.nature}, sous_commune : {self.code_insee_ancienne_commune}" - def _as_csv_format_bano(self): - return f"{self.fantoir if self.fantoir else ''}\t{self.nom}\t{self.code_insee}\t{self.code_dept}\t{self.nature}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}\t{self.lon}\t{self.lat}" + def _as_csv_format_bano(self,correspondance): + return f"{correspondance.get(self.fantoir,self.fantoir) if self.fantoir else ''}\t{self.nom}\t{self.code_insee}\t{self.code_dept}\t{self.nature}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}\t{self.lon}\t{self.lat}" class Points_nommes: @@ -636,14 +634,14 @@ def complete_fantoir(self, noms): else: a.fantoir = noms.fantoir_par_nom_sous_commune.get(a.nom) - def enregistre(self): + def enregistre(self,correspondance): sql_process( "suppression_points_nommes_commune", dict(code_insee=self.code_insee), ) io_in_csv = io.StringIO() for t in self: - io_in_csv.write(t._as_csv_format_bano() + "\n") + io_in_csv.write(t._as_csv_format_bano(correspondance) + "\n") io_in_csv.seek(0) with bano_db.cursor() as cur_insert: cur_insert.copy_from( @@ -696,3 +694,30 @@ def charge_topo(self): self.topo[fantoir] = nom # self.code_fantoir9_vers_fantoir10[fantoir[0:9]] = fantoir self.topo[nom] = fantoir + +class Correspondance_fantoir_ban_osm: + def __init__(self,code_insee): + self.dic_fantoir = {} + self.correspondance = {} + self.code_insee = code_insee + + def process(self,noms): + for n in noms: + niveau = n.code_insee_ancienne_commune if n.code_insee_ancienne_commune else 'RACINE' + cle = f"{niveau} - {n.nom_normalise}" + if n.fantoir and n.source in ('BAN','OSM'): + if not cle in self.dic_fantoir: + self.dic_fantoir[cle] = {} + if not n.source in self.dic_fantoir[cle]: + self.dic_fantoir[cle][n.source] = {} + self.dic_fantoir[cle][n.source] = n.fantoir + + # [n.code_insee_ancienne_commune if n.code_insee_ancienne_commune else 'RACINE'] = n.fantoir + for f in self.dic_fantoir: + if 'BAN' in self.dic_fantoir[f] and 'OSM' in self.dic_fantoir[f] and self.dic_fantoir[f]['BAN'] != self.dic_fantoir[f]['OSM']: + self.correspondance[self.dic_fantoir[f]['BAN']] = self.dic_fantoir[f]['OSM'] + + def enregistre(self): + return 0 + + # print(n.nom_normalise) diff --git a/bano/rapprochement.py b/bano/rapprochement.py index a30ccc9..ed5b7b2 100644 --- a/bano/rapprochement.py +++ b/bano/rapprochement.py @@ -3,7 +3,7 @@ from . import db_helpers as h from . import batch as b -from .models import Noms, Adresses, Topo, Points_nommes +from .models import Noms, Adresses, Topo, Points_nommes, Correspondance_fantoir_ban_osm from .sources import ban2topo @@ -15,6 +15,7 @@ def process_unitaire(code_insee): adresses = Adresses(code_insee) points_nommes = Points_nommes(code_insee) noms = Noms(code_insee) + correspondance_fantoir_ban_osm = Correspondance_fantoir_ban_osm(code_insee) noms.charge_noms_osm_hors_numeros() adresses.charge_numeros_osm() @@ -31,9 +32,11 @@ def process_unitaire(code_insee): points_nommes.complete_fantoir(noms) adresses.complete_fantoir(noms) - adresses.enregistre() - noms.enregistre() - points_nommes.enregistre() + correspondance_fantoir_ban_osm.process(noms) + + adresses.enregistre(correspondance_fantoir_ban_osm.correspondance) + noms.enregistre(correspondance_fantoir_ban_osm.correspondance) + points_nommes.enregistre(correspondance_fantoir_ban_osm.correspondance) b.batch_stop_log(id_batch, True) except: diff --git a/bano/sql/create_table_base_bano_cibles.sql b/bano/sql/create_table_base_bano_cibles.sql index f30b966..01a5e9c 100644 --- a/bano/sql/create_table_base_bano_cibles.sql +++ b/bano/sql/create_table_base_bano_cibles.sql @@ -55,6 +55,13 @@ CREATE INDEX IF NOT EXISTS idx_nom_fantoir_code_dept ON nom_fantoir (code_dept); CREATE INDEX IF NOT EXISTS idx_nom_fantoir_fantoir ON nom_fantoir (fantoir); CREATE INDEX IF NOT EXISTS idx_nom_fantoir_code_insee_source ON nom_fantoir (code_insee,source); +CREATE TABLE IF NOT EXISTS correspondance_fantoir_ban_osm( + code_insee text, + fantoir_ban text, + fantoir_osm text); + +CREATE INDEX IF NOT EXISTS idx_correspondance_fantoir_ban_osm_code_insee ON correspondance_fantoir_ban_osm (code_insee); +CREATE INDEX IF NOT EXISTS idx_correspondance_fantoir_ban_osm_fantoir_ban ON correspondance_fantoir_ban_osm (fantoir_ban); CREATE TABLE IF NOT EXISTS statut_fantoir ( fantoir character varying(9), From 0511e658f7d92a093570ffddd244f85f1c715ee9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Wed, 7 Jun 2023 20:15:11 +0000 Subject: [PATCH 087/163] Type d'anomalie d'adresse --- bano/sql/create_table_base_bano_pifometre.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bano/sql/create_table_base_bano_pifometre.sql b/bano/sql/create_table_base_bano_pifometre.sql index 39bd8ca..2fd66ff 100644 --- a/bano/sql/create_table_base_bano_pifometre.sql +++ b/bano/sql/create_table_base_bano_pifometre.sql @@ -59,4 +59,5 @@ VALUES (0,0,'Ok'), (3,3,'Adresse invisible sur le terrain'), (4,4,'Emplacement sur une autre voie'), (5,5,'Adresse en dehors de la commune'), -(6,6,'Adresse doublon'); +(6,6,'Adresse doublon'), +(7,7,'Adresse aussi associée à une voie'); From 6bfe6f84730777e7cdfec52adda3a6a75050ad23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sat, 17 Jun 2023 16:23:11 +0000 Subject: [PATCH 088/163] remplacement des fantoirs BAN par sous-commune --- bano/models.py | 54 +++++++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/bano/models.py b/bano/models.py index 27d31a0..9d1bcfc 100644 --- a/bano/models.py +++ b/bano/models.py @@ -30,6 +30,8 @@ def __init__( self.nature = nature self.source = source self.nom_normalise = hp.normalize(nom) + self.niveau = self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else 'RACINE' + def __eq__(self, other): return ( @@ -54,7 +56,13 @@ def __hash__(self): ) def _as_csv_format_bano(self,correspondance): - return f"{correspondance.get(self.fantoir,self.fantoir)}\t{self.nom}\t{self.nature}\t{self.code_insee}\t{self.code_dept}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}" + if self.source == 'BAN': + fantoir = remplace_fantoir_ban(correspondance,self.niveau,self.fantoir) + else: + fantoir = self.fantoir + if self.fantoir == '593507469' and self.code_insee_ancienne_commune == '59298': + print('test',self.fantoir,fantoir,self.source) + return f"{fantoir}\t{self.nom}\t{self.nature}\t{self.code_insee}\t{self.code_dept}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}" def add_fantoir(self, topo): if not self.fantoir: @@ -149,14 +157,6 @@ def remplit_fantoir_par_nom_sous_commune(self): else: self.fantoir_par_nom_sous_commune[t.nom] = t.fantoir - # def affiche_fantoir_par_nom_sous_commune(self): - # for branche,noms_fantoir in self.fantoir_par_nom_sous_commune.items(): - # for nom,fantoir in noms_fantoir.items(): - # print(f"{branche} - {nom} : {fantoir}") - - # # print(f"{branche} - {nom}") - # # print(f"{branche} - {nom} > {self.fantoir_par_nom_sous_commune[branche][nom]}") - def enregistre(self,correspondance): sql_process( "suppression_noms_commune", @@ -214,6 +214,7 @@ def __init__( self.nom_ancienne_commune = nom_ancienne_commune self.voie_normalisee = hp.normalize(self.voie) if self.voie else None self.place_normalisee = hp.format_toponyme(self.place) if self.place else None + self.niveau = self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else 'RACINE' def __hash__(self): return hash( @@ -238,7 +239,11 @@ def __eq__(self, other): ) def _as_csv_format_bano(self,correspondance): - return f"{correspondance.get(self.fantoir,self.fantoir) if self.fantoir else ''}\t{self.x}\t{self.y}\t{self.numero}\t{self.voie if self.voie else ''}\t{self.place if self.place else ''}\t{self.code_postal}\t{self.code_insee}\t{self.code_dept}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}" + if self.source == 'BAN': + fantoir = remplace_fantoir_ban(correspondance,self.niveau,self.fantoir) + else: + fantoir = self.fantoir + return f"{fantoir if fantoir else ''}\t{self.x}\t{self.y}\t{self.numero}\t{self.voie if self.voie else ''}\t{self.place if self.place else ''}\t{self.code_postal}\t{self.code_insee}\t{self.code_dept}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}" def _as_string(self): return f"source : {self.source}, numero : {self.numero}, voie : {self.voie} ({self.voie_normalisee}), place : {self.place}, fantoir : {self.fantoir}, code_postal:{self.code_postal}, sous_commune : {self.code_insee_ancienne_commune} - {self.nom_ancienne_commune}" @@ -702,22 +707,25 @@ def __init__(self,code_insee): self.code_insee = code_insee def process(self,noms): + niveaux = set() for n in noms: - niveau = n.code_insee_ancienne_commune if n.code_insee_ancienne_commune else 'RACINE' - cle = f"{niveau} - {n.nom_normalise}" + niveaux.add(n.niveau) if n.fantoir and n.source in ('BAN','OSM'): - if not cle in self.dic_fantoir: - self.dic_fantoir[cle] = {} - if not n.source in self.dic_fantoir[cle]: - self.dic_fantoir[cle][n.source] = {} - self.dic_fantoir[cle][n.source] = n.fantoir - - # [n.code_insee_ancienne_commune if n.code_insee_ancienne_commune else 'RACINE'] = n.fantoir - for f in self.dic_fantoir: - if 'BAN' in self.dic_fantoir[f] and 'OSM' in self.dic_fantoir[f] and self.dic_fantoir[f]['BAN'] != self.dic_fantoir[f]['OSM']: - self.correspondance[self.dic_fantoir[f]['BAN']] = self.dic_fantoir[f]['OSM'] + if not n.niveau in self.dic_fantoir: + self.dic_fantoir[n.niveau] = {} + if not n.nom_normalise in self.dic_fantoir[n.niveau]: + self.dic_fantoir[n.niveau][n.nom_normalise] = {} + self.dic_fantoir[n.niveau][n.nom_normalise][n.source] = n.fantoir + for n in niveaux: + self.correspondance[n] = {} + + for f in self.dic_fantoir[n]: + if 'BAN' in self.dic_fantoir[n][f] and 'OSM' in self.dic_fantoir[n][f] and self.dic_fantoir[n][f]['BAN'] != self.dic_fantoir[n][f]['OSM']: + self.correspondance[f"{n} {self.dic_fantoir[n][f]['BAN']}"] = self.dic_fantoir[n][f]['OSM'] def enregistre(self): return 0 - # print(n.nom_normalise) +def remplace_fantoir_ban(correspondance,niveau,fantoir): + cle = f"{niveau} {fantoir}" + return correspondance.get(cle,fantoir) From ca600219218c330fdf45f31e50b0ccffd5bd20c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Wed, 21 Jun 2023 20:26:26 +0000 Subject: [PATCH 089/163] +communes summary --- bano/bin.py | 9 ++++- bano/sources/datagouv_commune_summary.py | 42 +++++++++++++++++++++ bano/sql/create_table_base_bano_sources.sql | 20 ++++++++++ 3 files changed, 70 insertions(+), 1 deletion(-) create mode 100755 bano/sources/datagouv_commune_summary.py diff --git a/bano/bin.py b/bano/bin.py index 68981ce..c4c898b 100644 --- a/bano/bin.py +++ b/bano/bin.py @@ -8,7 +8,7 @@ from . import setup_db from . import rapprochement from . import boite_a_outils -from .sources import topo, ban, cog, cadastre_ld, ban2topo +from .sources import topo, ban, cog, cadastre_ld, ban2topo, datagouv_commune_summary as datagouv_cs from .constants import DEPARTEMENTS @@ -63,6 +63,13 @@ def main(): ) subparser.set_defaults(func=cadastre_ld.process) + subparser = subparsers.add_parser( + "download_commune_summary", + help="Met à jour les stats de BAL", + description="Met à jour les stats de BAL", + ) + subparser.set_defaults(func=datagouv_cs.process) + subparser = subparsers.add_parser( "update_bis_table", help="Identifie les indices de répétition b,t,q assimilables à bis, ter, quater", diff --git a/bano/sources/datagouv_commune_summary.py b/bano/sources/datagouv_commune_summary.py new file mode 100755 index 0000000..9599c20 --- /dev/null +++ b/bano/sources/datagouv_commune_summary.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python + +import os +import requests + +from pathlib import Path +from email.utils import formatdate, parsedate_to_datetime + +from .. import batch as b +from ..db import bano_db + +def dowload(): + destination = get_destination() + headers = {} + if destination.exists(): + headers['If-Modified-Since'] = formatdate(destination.stat().st_mtime) + + resp = requests.get(f'https://plateforme.adresse.data.gouv.fr/api/communes-summary.csv', headers=headers) + if resp.status_code == 200: + id_batch = b.batch_start_log("download source", "CommuneSummary", 'FRA') + with destination.open('wb') as f: + f.write(resp.content) + b.batch_stop_log(id_batch, True) + return True + print(resp.status_code) + b.batch_stop_log(id_batch, False) + return False + + +def update_table_in_db(): + with open(get_destination()) as csv: + csv.readline() + with bano_db.cursor() as cur: + cur.execute("TRUNCATE communes_summary;") + cur.copy_from(csv,'communes_summary',sep=',',null='') + +def get_destination(): + return Path(os.environ['BAN_CACHE_DIR']) / 'communes-summary.csv' + +def process(**kwargs): + if dowload(): + update_table_in_db() diff --git a/bano/sql/create_table_base_bano_sources.sql b/bano/sql/create_table_base_bano_sources.sql index bb5d22b..782c388 100644 --- a/bano/sql/create_table_base_bano_sources.sql +++ b/bano/sql/create_table_base_bano_sources.sql @@ -62,4 +62,24 @@ CREATE TABLE IF NOT EXISTS suffixe ( CREATE INDEX IF NOT EXISTS gidx_suffixe ON suffixe USING GIST(geometrie); CREATE INDEX IF NOT EXISTS idx_suffixe ON suffixe(code_insee); +CREATE TABLE IF NOT EXISTS communes_summary ( + reg character varying(3), + dep character varying(3), + code_insee character(5), + libelle text, + population integer, + id_revision text, + date_revision text, + type_composition text, + nb_lieux_dits integer, + nb_voies integer, + nb_numeros integer, + nb_numeros_certifies integer, + analyse_adressage_nb_adresses_attendues integer, + analyse_adressage_ratio float, + analyse_adressage_deficit_adresses float, + composed_at text); + +CREATE INDEX IF NOT EXISTS communes_summary_code_insee ON communes_summary (code_insee); + GRANT SELECT ON ALL TABLES IN SCHEMA public TO public; \ No newline at end of file From 5de43e107b098491fa4ae11a89056de6383b152c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 25 Jun 2023 10:31:20 +0000 Subject: [PATCH 090/163] typologies d'adresses --- bano/sql/create_table_base_bano_pifometre.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bano/sql/create_table_base_bano_pifometre.sql b/bano/sql/create_table_base_bano_pifometre.sql index 2fd66ff..8052010 100644 --- a/bano/sql/create_table_base_bano_pifometre.sql +++ b/bano/sql/create_table_base_bano_pifometre.sql @@ -60,4 +60,5 @@ VALUES (0,0,'Ok'), (4,4,'Emplacement sur une autre voie'), (5,5,'Adresse en dehors de la commune'), (6,6,'Adresse doublon'), -(7,7,'Adresse aussi associée à une voie'); +(7,7,'Adresse de lieu-dit aussi associée à une voie'), +(8,8,'Emplacement manifestement incohérent'); From 79a66c564e1568fa4b23a3a72768f2bc0b8c6912 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Tue, 4 Jul 2023 11:58:28 +0000 Subject: [PATCH 091/163] typologies d'adresses --- bano/sql/create_table_base_bano_pifometre.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bano/sql/create_table_base_bano_pifometre.sql b/bano/sql/create_table_base_bano_pifometre.sql index 8052010..b6703ad 100644 --- a/bano/sql/create_table_base_bano_pifometre.sql +++ b/bano/sql/create_table_base_bano_pifometre.sql @@ -61,4 +61,5 @@ VALUES (0,0,'Ok'), (5,5,'Adresse en dehors de la commune'), (6,6,'Adresse doublon'), (7,7,'Adresse de lieu-dit aussi associée à une voie'), -(8,8,'Emplacement manifestement incohérent'); +(8,8,'Emplacement manifestement incohérent'), +(9,9,'Ancienne numérotation plus en vigueur'); From a5cd9804b54a9060754100fd8446c7fbecd55025 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sat, 8 Jul 2023 22:18:06 +0000 Subject: [PATCH 092/163] Adresses OSM sans rue ou place ou LD --- bano/models.py | 102 ++++++++++++------ .../complement_points_nommes_numeros_OSM.sql | 22 ++++ 2 files changed, 94 insertions(+), 30 deletions(-) create mode 100644 bano/sql/complement_points_nommes_numeros_OSM.sql diff --git a/bano/models.py b/bano/models.py index 9d1bcfc..f7a46a5 100644 --- a/bano/models.py +++ b/bano/models.py @@ -10,6 +10,7 @@ from .sql import sql_get_data, sql_process + class Nom: def __init__( self, @@ -24,14 +25,17 @@ def __init__( self.code_insee = code_insee self.code_dept = hp.get_code_dept_from_insee(code_insee) self.code_insee_ancienne_commune = code_insee_ancienne_commune - self.nom = nom.replace('\t',' ') + self.nom = nom.replace("\t", " ") self.nom_ancienne_commune = nom_ancienne_commune self.fantoir = fantoir[0:9] if fantoir else None self.nature = nature self.source = source self.nom_normalise = hp.normalize(nom) - self.niveau = self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else 'RACINE' - + self.niveau = ( + self.code_insee_ancienne_commune + if self.code_insee_ancienne_commune + else "RACINE" + ) def __eq__(self, other): return ( @@ -55,19 +59,20 @@ def __hash__(self): ) ) - def _as_csv_format_bano(self,correspondance): - if self.source == 'BAN': - fantoir = remplace_fantoir_ban(correspondance,self.niveau,self.fantoir) + def _as_csv_format_bano(self, correspondance): + if self.source == "BAN": + fantoir = remplace_fantoir_ban(correspondance, self.niveau, self.fantoir) else: fantoir = self.fantoir - if self.fantoir == '593507469' and self.code_insee_ancienne_commune == '59298': - print('test',self.fantoir,fantoir,self.source) + if self.fantoir == "593507469" and self.code_insee_ancienne_commune == "59298": + print("test", self.fantoir, fantoir, self.source) return f"{fantoir}\t{self.nom}\t{self.nature}\t{self.code_insee}\t{self.code_dept}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}" def add_fantoir(self, topo): if not self.fantoir: self.fantoir = topo.topo.get(self.nom_normalise) + class Noms: def __init__(self, code_insee): self.code_insee = code_insee @@ -157,7 +162,7 @@ def remplit_fantoir_par_nom_sous_commune(self): else: self.fantoir_par_nom_sous_commune[t.nom] = t.fantoir - def enregistre(self,correspondance): + def enregistre(self, correspondance): sql_process( "suppression_noms_commune", dict(code_insee=self.code_insee), @@ -214,7 +219,11 @@ def __init__( self.nom_ancienne_commune = nom_ancienne_commune self.voie_normalisee = hp.normalize(self.voie) if self.voie else None self.place_normalisee = hp.format_toponyme(self.place) if self.place else None - self.niveau = self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else 'RACINE' + self.niveau = ( + self.code_insee_ancienne_commune + if self.code_insee_ancienne_commune + else "RACINE" + ) def __hash__(self): return hash( @@ -238,9 +247,9 @@ def __eq__(self, other): and self.code_insee_ancienne_commune == other.code_insee_ancienne_commune ) - def _as_csv_format_bano(self,correspondance): - if self.source == 'BAN': - fantoir = remplace_fantoir_ban(correspondance,self.niveau,self.fantoir) + def _as_csv_format_bano(self, correspondance): + if self.source == "BAN": + fantoir = remplace_fantoir_ban(correspondance, self.niveau, self.fantoir) else: fantoir = self.fantoir return f"{fantoir if fantoir else ''}\t{self.x}\t{self.y}\t{self.numero}\t{self.voie if self.voie else ''}\t{self.place if self.place else ''}\t{self.code_postal}\t{self.code_insee}\t{self.code_dept}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}" @@ -336,7 +345,7 @@ def charge_numeros_osm(self): ) in data: fantoir = tags.get("ref:FR:FANTOIR") - if fantoir : + if fantoir: fantoir = fantoir[0:9] if fantoir and not hp.fantoir_valide(fantoir, self.code_insee): continue @@ -444,14 +453,16 @@ def complete_fantoir(self, noms): else: a.fantoir = noms.fantoir_par_nom_sous_commune.get(nom) - def enregistre(self,correspondance): + def enregistre(self, correspondance): sql_process( "suppression_adresses_commune", dict(code_insee=self.code_insee), ) io_in_csv = io.StringIO() - for a in set(self.liste): #passage en set pour dedoublonner les adresses de provenances multiples + for a in set( + self.liste + ): # passage en set pour dedoublonner les adresses de provenances multiples io_in_csv.write( a._as_csv_format_bano(correspondance) + "\n" ) # separateur $ car on trouve des virgules dans le contenu @@ -497,7 +508,7 @@ def __init__( self.lon = round(lon, 6) self.lat = round(lat, 6) self.nature = nature - self.nom = nom.replace('\t',' ') + self.nom = nom.replace("\t", " ") self.nom_normalise = hp.normalize(nom) self.fantoir = fantoir[0:9] if fantoir else None self.code_insee_ancienne_commune = code_insee_ancienne_commune @@ -505,7 +516,12 @@ def __init__( def __hash__(self): return hash( - (self.code_insee, self.source, self.nom_normalise, self.code_insee_ancienne_commune) + ( + self.code_insee, + self.source, + self.nom_normalise, + self.code_insee_ancienne_commune, + ) ) def __eq__(self, other): @@ -519,7 +535,7 @@ def __eq__(self, other): def _as_string(self): return f"source : {self.source}, nom : {self.nom} ({self.nom_normalise}), nature : {self.nature}, sous_commune : {self.code_insee_ancienne_commune}" - def _as_csv_format_bano(self,correspondance): + def _as_csv_format_bano(self, correspondance): return f"{correspondance.get(self.fantoir,self.fantoir) if self.fantoir else ''}\t{self.nom}\t{self.code_insee}\t{self.code_dept}\t{self.nature}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}\t{self.lon}\t{self.lat}" @@ -544,7 +560,7 @@ def charge_points_nommes_lieux_dits_cadastre(self): "charge_points_nommes_lieux-dits_CADASTRE", dict(code_insee=self.code_insee), ) - for x, y, nom, code_insee_ancienne_commune,nom_ancienne_commune in data: + for x, y, nom, code_insee_ancienne_commune, nom_ancienne_commune in data: self.add_point_nomme( Point_nomme( self.code_insee, @@ -563,7 +579,14 @@ def charge_points_nommes_centroides_osm(self): "charge_points_nommes_centroides_OSM", dict(code_insee=self.code_insee), ) - for x, y, nom, code_insee_ancienne_commune, fantoir, nom_ancienne_commune in data: + for ( + x, + y, + nom, + code_insee_ancienne_commune, + fantoir, + nom_ancienne_commune, + ) in data: self.add_point_nomme( Point_nomme( self.code_insee, @@ -583,7 +606,14 @@ def charge_points_nommes_place_osm(self): "charge_points_nommes_places_OSM", dict(code_insee=self.code_insee), ) - for x, y, nom, code_insee_ancienne_commune, fantoir, nom_ancienne_commune in data: + for ( + x, + y, + nom, + code_insee_ancienne_commune, + fantoir, + nom_ancienne_commune, + ) in data: self.add_point_nomme( Point_nomme( self.code_insee, @@ -639,7 +669,7 @@ def complete_fantoir(self, noms): else: a.fantoir = noms.fantoir_par_nom_sous_commune.get(a.nom) - def enregistre(self,correspondance): + def enregistre(self, correspondance): sql_process( "suppression_points_nommes_commune", dict(code_insee=self.code_insee), @@ -666,6 +696,10 @@ def enregistre(self,correspondance): "lat", ), ) + sql_process( + "complement_points_nommes_numeros_OSM", + dict(code_insee=self.code_insee), + ) class Topo: @@ -700,17 +734,18 @@ def charge_topo(self): # self.code_fantoir9_vers_fantoir10[fantoir[0:9]] = fantoir self.topo[nom] = fantoir + class Correspondance_fantoir_ban_osm: - def __init__(self,code_insee): + def __init__(self, code_insee): self.dic_fantoir = {} self.correspondance = {} self.code_insee = code_insee - def process(self,noms): + def process(self, noms): niveaux = set() for n in noms: niveaux.add(n.niveau) - if n.fantoir and n.source in ('BAN','OSM'): + if n.fantoir and n.source in ("BAN", "OSM"): if not n.niveau in self.dic_fantoir: self.dic_fantoir[n.niveau] = {} if not n.nom_normalise in self.dic_fantoir[n.niveau]: @@ -720,12 +755,19 @@ def process(self,noms): self.correspondance[n] = {} for f in self.dic_fantoir[n]: - if 'BAN' in self.dic_fantoir[n][f] and 'OSM' in self.dic_fantoir[n][f] and self.dic_fantoir[n][f]['BAN'] != self.dic_fantoir[n][f]['OSM']: - self.correspondance[f"{n} {self.dic_fantoir[n][f]['BAN']}"] = self.dic_fantoir[n][f]['OSM'] + if ( + "BAN" in self.dic_fantoir[n][f] + and "OSM" in self.dic_fantoir[n][f] + and self.dic_fantoir[n][f]["BAN"] != self.dic_fantoir[n][f]["OSM"] + ): + self.correspondance[ + f"{n} {self.dic_fantoir[n][f]['BAN']}" + ] = self.dic_fantoir[n][f]["OSM"] def enregistre(self): return 0 -def remplace_fantoir_ban(correspondance,niveau,fantoir): + +def remplace_fantoir_ban(correspondance, niveau, fantoir): cle = f"{niveau} {fantoir}" - return correspondance.get(cle,fantoir) + return correspondance.get(cle, fantoir) diff --git a/bano/sql/complement_points_nommes_numeros_OSM.sql b/bano/sql/complement_points_nommes_numeros_OSM.sql new file mode 100644 index 0000000..4a0adf4 --- /dev/null +++ b/bano/sql/complement_points_nommes_numeros_OSM.sql @@ -0,0 +1,22 @@ +INSERT INTO bano_points_nommes (fantoir,nom,code_insee,code_dept,nature,code_insee_ancienne_commune,nom_ancienne_commune,source,lon,lat) +SELECT fantoir, + nom_voie, + code_insee, + code_dept, + 'numero', + code_insee_ancienne_commune, + nom_ancienne_commune, + 'OSM', + lon, + lat +FROM (SELECT a.*, + RANK() OVER (PARTITION BY fantoir ORDER BY numero) rang + FROM (SELECT fantoir FROM bano_adresses WHERE code_insee = '__code_insee__' AND source = 'OSM' + EXCEPT + SELECT fantoir from bano_points_nommes WHERE code_insee = '__code_insee__' AND source = 'OSM') f + JOIN (SELECT * + FROM bano_adresses + WHERE source = 'OSM' AND + code_insee = '__code_insee__')a + USING (fantoir))t +WHERE rang = 1; \ No newline at end of file From 25d722bf23081068c825d24c7d6f0e6e29cdf8ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sat, 8 Jul 2023 22:18:53 +0000 Subject: [PATCH 093/163] Crons --- cron_bano.sh | 46 +++++++++++++++++----------------------------- cron_osm.sh | 2 +- 2 files changed, 18 insertions(+), 30 deletions(-) diff --git a/cron_bano.sh b/cron_bano.sh index e9b18f1..cee6fb5 100755 --- a/cron_bano.sh +++ b/cron_bano.sh @@ -1,6 +1,6 @@ #!/bin/bash -source /data/work/vdct/bano_venv37/bin/activate +source /data/project/bano_v3/venv_v3/bin/activate SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" cd $SCRIPT_DIR @@ -12,44 +12,32 @@ source config pip install -e . # Sources -cat deplist.txt | parallel -j 4 export LANG=fr_FR.UTF-8\; bano download_bal CADASTRE {1} -cat deplist.txt | parallel -j 4 export LANG=fr_FR.UTF-8\; bano download_bal BAL {1} -cat deplist.txt | parallel -j 4 export LANG=fr_FR.UTF-8\; bano download_ban {1} -cat deplist.txt | parallel -j 4 export LANG=fr_FR.UTF-8\; bano download_cadastre lieux_dits {1} -bano update_code_cadastre -bano update_bis_table -./copy_table_from_osm_to_cadastre.sh rep_b_as_bis +cat deplist.txt | parallel -j 4 export LANG=fr_FR.UTF-8\; bano charge_ban {1} +cat deplist.txt | parallel -j 4 export LANG=fr_FR.UTF-8\; bano charge_ld_cadastre {1} + +# bano update_bis_table +# ./copy_table_from_osm_to_cadastre.sh rep_b_as_bis # BANO -bano update_insee_lists -cat insee_osm.csv | parallel -j 4 export LANG=fr_FR.UTF-8\; bano process_commune OSM --code_insee {1} -cat insee_locales.csv | parallel -j 4 export LANG=fr_FR.UTF-8\; bano process_commune BAL --code_insee {1} -cat deplist.txt | parallel -j 4 export LANG=fr_FR.UTF-8\; bano process_commune BAN --depts {1} -cat deplist.txt | parallel -j 4 export LANG=fr_FR.UTF-8\; bano process_commune_lieux-dits --depts {1} +# bano update_insee_lists +cat deplist.txt | parallel -j 4 export LANG=fr_FR.UTF-8\; bano rapprochement --dept {1} # Mise à jour quotidienne dans la base cadastre des couches des polygones postaux d'OSM et des statuts admin de communes en vue des exports -./copy_table_from_osm_to_cadastre.sh planet_osm_postal_code -./copy_table_from_osm_to_cadastre.sh infos_communes - -psql -d osm -U cadastre -f sql/create_table_polygones_communes.sql -./copy_table_from_osm_to_cadastre.sh polygones_insee -./copy_table_from_osm_to_cadastre.sh polygones_insee_geo -psql -d osm -U cadastre -f sql/create_table_polygones_postaux.sql -./copy_table_from_osm_to_cadastre.sh polygones_postaux -./copy_table_from_osm_to_cadastre.sh ban -psql -d cadastre -U cadastre -f sql/post_copie_ban.sql +# psql -d osm -U cadastre -f sql/create_table_polygones_communes.sql +# psql -d osm -U cadastre -f sql/create_table_polygones_postaux.sql +# psql -d cadastre -U cadastre -f sql/post_copie_ban.sql # exports -cat deplist.txt | parallel -j 4 bano export {1} +# cat deplist.txt | parallel -j 4 bano export {1} # copie+zip dans le dossier web -cat deplist.txt | parallel -j 4 bano publish {1} -bano publish_aggregate +# cat deplist.txt | parallel -j 4 bano publish {1} +# bano publish_aggregate # ménage PostgreSQL -psql -d cadastre -U cadastre -c "VACUUM cumul_adresses;" -psql -d cadastre -U cadastre -c "VACUUM cumul_voies;" -psql -d cadastre -U cadastre -c "VACUUM cumul_places;" +psql -d cadastre -U cadastre -c "VACUUM bano_adresses;" +psql -d cadastre -U cadastre -c "VACUUM bano_points_nommes;" +psql -d cadastre -U cadastre -c "VACUUM nom_fantoir;" psql -d cadastre -U cadastre -c "GRANT SELECT ON ALL TABLES IN SCHEMA PUBLIC TO PUBLIC"; \ No newline at end of file diff --git a/cron_osm.sh b/cron_osm.sh index 9334225..08d42a7 100755 --- a/cron_osm.sh +++ b/cron_osm.sh @@ -19,7 +19,7 @@ echo debut >> $SCRIPT_DIR/cron.log touch ${lockfile} osmosis --rri workingDirectory=${DOWNLOAD_DIR} --wxc ${DOWNLOAD_DIR}/changes.osc.gz -imposm diff -config $SCRIPT_DIR/imposm.config -dbschema-production osm -expiretiles-dir $EXPIRE_TILES_DIR -expiretiles-zoom 16 ${DOWNLOAD_DIR}/changes.osc.gz +imposm diff -config $SCRIPT_DIR/imposm.config -dbschema-production osm ${DOWNLOAD_DIR}/changes.osc.gz #$SCRIPT_DIR/update_table_infos_communes.sh rm ${lockfile} From 1a7d7f7b77c3ca93148bf2fb533a6336e40c527e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sat, 8 Jul 2023 22:20:41 +0000 Subject: [PATCH 094/163] Gestion du 304 ban --- bano/sources/ban.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/bano/sources/ban.py b/bano/sources/ban.py index 95428fd..d82dcc8 100644 --- a/bano/sources/ban.py +++ b/bano/sources/ban.py @@ -31,10 +31,12 @@ def process_ban(departements, **kwargs): if not (import_to_pg(dept)): depts_en_echec.append(dept) print("depts_en_echec", depts_en_echec) + else: + pre_process_suffixe.process(dept) for dept in depts_en_echec: print(f"Département {dept}") import_to_pg_subp(dept) - pre_process_suffixe.process(departements) + pre_process_suffixe.process(dept) def download(departement): @@ -55,6 +57,9 @@ def download(departement): os.utime(destination, (mtime, mtime)) b.batch_stop_log(id_batch, True) return True + if resp.status_code == 304: + b.batch_stop_log(id_batch, True) + return False print(resp.status_code) b.batch_stop_log(id_batch, False) return False From db8ad5982acb3fb555c91ad67ede8b26ce59f2cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 9 Jul 2023 14:07:47 +0000 Subject: [PATCH 095/163] Index pour Pifometre --- bano/sql/create_table_base_bano_cibles.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/bano/sql/create_table_base_bano_cibles.sql b/bano/sql/create_table_base_bano_cibles.sql index 01a5e9c..99aac26 100644 --- a/bano/sql/create_table_base_bano_cibles.sql +++ b/bano/sql/create_table_base_bano_cibles.sql @@ -39,6 +39,7 @@ CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_fantoir ON bano_points_nommes CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_fantoir_source ON bano_points_nommes (fantoir,source); CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_code_insee_source ON bano_points_nommes (code_insee,source); CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_code_insee_nature ON bano_points_nommes (code_insee,nature); +CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_code_dept_nature ON bano_points_nommes (code_dept,nature); CREATE TABLE IF NOT EXISTS nom_fantoir ( fantoir text, From 5d39cf7f3adee1ecacaf11336e0881579e5755ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 9 Jul 2023 14:13:43 +0000 Subject: [PATCH 096/163] Index pour Pifometre --- bano/sql/create_table_base_bano_cog.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/bano/sql/create_table_base_bano_cog.sql b/bano/sql/create_table_base_bano_cog.sql index 09fa34e..cbaf363 100644 --- a/bano/sql/create_table_base_bano_cog.sql +++ b/bano/sql/create_table_base_bano_cog.sql @@ -12,6 +12,7 @@ CREATE TABLE IF NOT EXISTS cog_commune ( can character(5), comparent character(5)); CREATE INDEX IF NOT EXISTS idx_cog_commune_com ON cog_commune(com); +CREATE INDEX IF NOT EXISTS idx_cog_commune_dep ON cog_commune(dep); CREATE TABLE IF NOT EXISTS cog_canton ( can character(5), From 3c8818b8e3e58261cc9112faf3df150ff4588f6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 9 Jul 2023 22:29:02 +0000 Subject: [PATCH 097/163] substitution & ET (#265) --- bano/dictionnaires/expand_noms.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/bano/dictionnaires/expand_noms.txt b/bano/dictionnaires/expand_noms.txt index 086d4a8..56499c2 100644 --- a/bano/dictionnaires/expand_noms.txt +++ b/bano/dictionnaires/expand_noms.txt @@ -1,6 +1,7 @@ # # Format des lignes : # +& ET B.C.P BATAILLON DE CHASSEURS A PIEDS CHARLES DE GAUL CHARLES DE GAULLE CHP CHAMP From c669e048e177c6537fd6a8aa0ad00106aecbd27b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 9 Jul 2023 22:29:40 +0000 Subject: [PATCH 098/163] Adresses OSM sans rue ou place ou LD - fallback des noms --- bano/sql/complement_points_nommes_numeros_OSM.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bano/sql/complement_points_nommes_numeros_OSM.sql b/bano/sql/complement_points_nommes_numeros_OSM.sql index 4a0adf4..563dc97 100644 --- a/bano/sql/complement_points_nommes_numeros_OSM.sql +++ b/bano/sql/complement_points_nommes_numeros_OSM.sql @@ -1,6 +1,6 @@ INSERT INTO bano_points_nommes (fantoir,nom,code_insee,code_dept,nature,code_insee_ancienne_commune,nom_ancienne_commune,source,lon,lat) SELECT fantoir, - nom_voie, + COALESCE(nom_voie,nom_place), code_insee, code_dept, 'numero', From a9fe576b3f74d552b1474c49492bc16d5af1a6bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sat, 15 Jul 2023 15:58:15 +0000 Subject: [PATCH 099/163] =?UTF-8?q?Ajout=20des=20FANTOIR=20BAN=20seulement?= =?UTF-8?q?=20si=20pr=C3=A9sents=20dans=20TOPO?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bano/models.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/bano/models.py b/bano/models.py index f7a46a5..5310e51 100644 --- a/bano/models.py +++ b/bano/models.py @@ -305,11 +305,11 @@ def charge_numeros_ban(self, topo): code_insee_ancienne_commune, nom_ancienne_commune, ) in data: - if id_fantoir: + if id_fantoir and f"{id_fantoir[0:5]}{id_fantoir[6:10]}" in topo.topo: fantoir = f"{id_fantoir[0:5]}{id_fantoir[6:10]}" - # fantoir = topo.code_fantoir9_vers_fantoir10.get(fantoir9) else: fantoir = None + self.add_adresse( Adresse( self.code_insee, @@ -734,7 +734,6 @@ def charge_topo(self): # self.code_fantoir9_vers_fantoir10[fantoir[0:9]] = fantoir self.topo[nom] = fantoir - class Correspondance_fantoir_ban_osm: def __init__(self, code_insee): self.dic_fantoir = {} From cf6f0742f95bfc3fbfd6eb53eae9df543f47c15a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sat, 15 Jul 2023 16:41:25 +0000 Subject: [PATCH 100/163] colonne FANTOIR dans la source BAN --- bano/sql/create_table_base_bano_sources.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/bano/sql/create_table_base_bano_sources.sql b/bano/sql/create_table_base_bano_sources.sql index 782c388..8028c79 100644 --- a/bano/sql/create_table_base_bano_sources.sql +++ b/bano/sql/create_table_base_bano_sources.sql @@ -17,6 +17,7 @@ CREATE INDEX IF NOT EXISTS idx_topo_fantoir ON topo(fantoir); CREATE TABLE IF NOT EXISTS ban ( id text, id_fantoir text, + fantoir text GENERATED ALWAYS AS (substr(id_fantoir,0,6)||substr(id_fantoir,7,10)) STORED, numero text, rep text, nom_voie text, From f19559d03ef700a02d6911212e84d9c495a6808e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 16 Jul 2023 14:50:34 +0000 Subject: [PATCH 101/163] effacement auto du lock apres 4h de presence --- cron_osm.sh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cron_osm.sh b/cron_osm.sh index 08d42a7..d003fba 100755 --- a/cron_osm.sh +++ b/cron_osm.sh @@ -9,8 +9,14 @@ lockfile=${SCRIPT_DIR}/imposm.lock if test -f ${lockfile} then - echo `date`" : Process deja en cours" >> $SCRIPT_DIR/cron.log - exit 0 + diff_age=$((`date +%s` - `stat -c %Y $lockfile`)) + # echo $diff_age + if [ $diff_age -gt 14400 ];then + rm imposm.lock + else + echo `date`" : Process deja en cours" + exit 0 + fi fi echo `date` > $SCRIPT_DIR/cron.log From 9c691cbdd8f984e31aa6a79d1758e100a857e3b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 16 Jul 2023 14:55:58 +0000 Subject: [PATCH 102/163] cron bano +table bis --- cron_bano.sh | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/cron_bano.sh b/cron_bano.sh index cee6fb5..7d3045f 100755 --- a/cron_bano.sh +++ b/cron_bano.sh @@ -14,10 +14,7 @@ pip install -e . # Sources cat deplist.txt | parallel -j 4 export LANG=fr_FR.UTF-8\; bano charge_ban {1} cat deplist.txt | parallel -j 4 export LANG=fr_FR.UTF-8\; bano charge_ld_cadastre {1} - -# bano update_bis_table -# ./copy_table_from_osm_to_cadastre.sh rep_b_as_bis - +bano update_bis_table # BANO # bano update_insee_lists @@ -37,7 +34,7 @@ cat deplist.txt | parallel -j 4 export LANG=fr_FR.UTF-8\; bano rapprochem # bano publish_aggregate # ménage PostgreSQL -psql -d cadastre -U cadastre -c "VACUUM bano_adresses;" -psql -d cadastre -U cadastre -c "VACUUM bano_points_nommes;" -psql -d cadastre -U cadastre -c "VACUUM nom_fantoir;" -psql -d cadastre -U cadastre -c "GRANT SELECT ON ALL TABLES IN SCHEMA PUBLIC TO PUBLIC"; \ No newline at end of file +psql -d bano -U cadastre -c "VACUUM bano_adresses;" +psql -d bano -U cadastre -c "VACUUM bano_points_nommes;" +psql -d bano -U cadastre -c "VACUUM nom_fantoir;" +psql -d bano -U cadastre -c "GRANT SELECT ON ALL TABLES IN SCHEMA PUBLIC TO PUBLIC"; From 6c4595102de2e8e0e47a9d4b1ee4959dcbd71d74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 16 Jul 2023 14:57:28 +0000 Subject: [PATCH 103/163] Calcul du code FANTOIR au chargement BAN --- bano/models.py | 6 ++---- bano/sql/charge_ban_commune.sql | 6 +++--- bano/sql/update_table_rep_b_as_bis.sql | 10 +++++----- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/bano/models.py b/bano/models.py index 5310e51..5b8753f 100644 --- a/bano/models.py +++ b/bano/models.py @@ -296,7 +296,7 @@ def add_adresse(self, ad): def charge_numeros_ban(self, topo): data = sql_get_data("charge_ban_commune", dict(code_insee=self.code_insee)) for ( - id_fantoir, + fantoir, numero, voie, lon, @@ -305,9 +305,7 @@ def charge_numeros_ban(self, topo): code_insee_ancienne_commune, nom_ancienne_commune, ) in data: - if id_fantoir and f"{id_fantoir[0:5]}{id_fantoir[6:10]}" in topo.topo: - fantoir = f"{id_fantoir[0:5]}{id_fantoir[6:10]}" - else: + if not (fantoir and fantoir in topo.topo): fantoir = None self.add_adresse( diff --git a/bano/sql/charge_ban_commune.sql b/bano/sql/charge_ban_commune.sql index 979aad1..3035ab7 100644 --- a/bano/sql/charge_ban_commune.sql +++ b/bano/sql/charge_ban_commune.sql @@ -1,7 +1,7 @@ WITH j AS -(SELECT id_fantoir, +(SELECT fantoir, numero, nom_voie, lon, @@ -22,9 +22,9 @@ AS nom_ancienne_commune FROM ban b LEFT OUTER JOIN rep_b_as_bis r -USING (id_fantoir,numero) +USING (fantoir,numero) WHERE code_insee = '__code_insee__') -SELECT id_fantoir, +SELECT fantoir, TRIM (BOTH FROM (numero||' '||COALESCE(rep_bis,rep,''))), nom_voie, lon, diff --git a/bano/sql/update_table_rep_b_as_bis.sql b/bano/sql/update_table_rep_b_as_bis.sql index 4986222..a813dd8 100644 --- a/bano/sql/update_table_rep_b_as_bis.sql +++ b/bano/sql/update_table_rep_b_as_bis.sql @@ -1,28 +1,28 @@ DROP TABLE IF EXISTS rep_b_as_bis CASCADE; CREATE TABLE rep_b_as_bis AS -SELECT id_fantoir, +SELECT fantoir, numero FROM ban WHERE rep = 'b' EXCEPT -(SELECT id_fantoir, - numero +(SELECT fantoir, + numero FROM ban WHERE rep = 'a' UNION -SELECT id_fantoir, +SELECT fantoir, numero FROM ban WHERE rep = 'c' UNION -SELECT id_fantoir, +SELECT fantoir, numero FROM ban WHERE rep = 'd'); From 1899c53efcdacb5c4aa1df25a0c9fa47e848dd16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 17 Jul 2023 20:05:08 +0000 Subject: [PATCH 104/163] Modifs pour Pifometre --- bano/sql/create_table_base_bano_pifometre.sql | 3 ++- bano/sql/create_table_base_bano_sources.sql | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/bano/sql/create_table_base_bano_pifometre.sql b/bano/sql/create_table_base_bano_pifometre.sql index b6703ad..0db9e2b 100644 --- a/bano/sql/create_table_base_bano_pifometre.sql +++ b/bano/sql/create_table_base_bano_pifometre.sql @@ -35,7 +35,8 @@ VALUES (0,0,'Ok'), (18,17,'FANTOIR annulé non remplacé'), (19,18,'Point cardinal superflu'), (20,19,'Voie en projet'), -(21,20,'Lieu-dit sur plusieurs communes'); +(21,20,'Lieu-dit sur plusieurs communes'), +(22,21,'Chaussée hors commune'); CREATE TABLE IF NOT EXISTS statut_numero ( numero text, diff --git a/bano/sql/create_table_base_bano_sources.sql b/bano/sql/create_table_base_bano_sources.sql index 8028c79..5d99fd1 100644 --- a/bano/sql/create_table_base_bano_sources.sql +++ b/bano/sql/create_table_base_bano_sources.sql @@ -42,6 +42,7 @@ CREATE TABLE IF NOT EXISTS ban ( geometrie geometry (Point, 4326) GENERATED ALWAYS AS (ST_Point(lon,lat)) STORED); CREATE INDEX IF NOT EXISTS idx_ban_code_insee ON ban(code_insee); +CREATE INDEX IF NOT EXISTS idx_ban_fantoir ON ban(fantoir); CREATE INDEX IF NOT EXISTS gidx_ban ON ban(geometrie); CREATE TABLE IF NOT EXISTS lieux_dits ( From b439b4ade2815821baf6b28663235fa3eae93ef7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Fri, 28 Jul 2023 08:41:14 +0000 Subject: [PATCH 105/163] messages cron --- cron_bano.sh | 7 +++++++ cron_osm.sh | 11 ++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/cron_bano.sh b/cron_bano.sh index 7d3045f..75c9bec 100755 --- a/cron_bano.sh +++ b/cron_bano.sh @@ -5,6 +5,8 @@ source /data/project/bano_v3/venv_v3/bin/activate SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" cd $SCRIPT_DIR +echo 'debut du cron BANO' >> $SCRIPT_DIR/cron.log + source config ./arborescence.sh @@ -16,10 +18,14 @@ cat deplist.txt | parallel -j 4 export LANG=fr_FR.UTF-8\; bano charge_ban {1} cat deplist.txt | parallel -j 4 export LANG=fr_FR.UTF-8\; bano charge_ld_cadastre {1} bano update_bis_table +echo 'sources ok' >> $SCRIPT_DIR/cron.log + # BANO # bano update_insee_lists cat deplist.txt | parallel -j 4 export LANG=fr_FR.UTF-8\; bano rapprochement --dept {1} +echo 'rapprochement ok' >> $SCRIPT_DIR/cron.log + # Mise à jour quotidienne dans la base cadastre des couches des polygones postaux d'OSM et des statuts admin de communes en vue des exports # psql -d osm -U cadastre -f sql/create_table_polygones_communes.sql @@ -38,3 +44,4 @@ psql -d bano -U cadastre -c "VACUUM bano_adresses;" psql -d bano -U cadastre -c "VACUUM bano_points_nommes;" psql -d bano -U cadastre -c "VACUUM nom_fantoir;" psql -d bano -U cadastre -c "GRANT SELECT ON ALL TABLES IN SCHEMA PUBLIC TO PUBLIC"; +echo 'fin du cron BANO' >> $SCRIPT_DIR/cron.log diff --git a/cron_osm.sh b/cron_osm.sh index d003fba..f1b7213 100755 --- a/cron_osm.sh +++ b/cron_osm.sh @@ -7,20 +7,21 @@ source $SCRIPT_DIR/config lockfile=${SCRIPT_DIR}/imposm.lock +echo `date`>> $SCRIPT_DIR/cron.log +echo debut >> $SCRIPT_DIR/cron.log + if test -f ${lockfile} then diff_age=$((`date +%s` - `stat -c %Y $lockfile`)) - # echo $diff_age if [ $diff_age -gt 14400 ];then - rm imposm.lock + echo "Effacement du lock" >> $SCRIPT_DIR/cron.log + rm ${lockfile} else - echo `date`" : Process deja en cours" + echo `date`" : Process deja en cours" >> $SCRIPT_DIR/cron.log exit 0 fi fi -echo `date` > $SCRIPT_DIR/cron.log -echo debut >> $SCRIPT_DIR/cron.log touch ${lockfile} From 7554a7d8f8cbb851bc7342f7375aaa8f7e270ff2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Fri, 28 Jul 2023 08:43:40 +0000 Subject: [PATCH 106/163] suppr test Lille --- bano/models.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/bano/models.py b/bano/models.py index 5b8753f..fe871c0 100644 --- a/bano/models.py +++ b/bano/models.py @@ -64,8 +64,6 @@ def _as_csv_format_bano(self, correspondance): fantoir = remplace_fantoir_ban(correspondance, self.niveau, self.fantoir) else: fantoir = self.fantoir - if self.fantoir == "593507469" and self.code_insee_ancienne_commune == "59298": - print("test", self.fantoir, fantoir, self.source) return f"{fantoir}\t{self.nom}\t{self.nature}\t{self.code_insee}\t{self.code_dept}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}" def add_fantoir(self, topo): From cb28946082f77335e5584a6809769ab04217a208 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Fri, 4 Aug 2023 10:32:10 +0000 Subject: [PATCH 107/163] =?UTF-8?q?Suppression=20des=20noms=20issus=20des?= =?UTF-8?q?=20arr=C3=AAts=20de=20bus=20(#363)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bano/sql/charge_points_nommes_centroides_OSM.sql | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bano/sql/charge_points_nommes_centroides_OSM.sql b/bano/sql/charge_points_nommes_centroides_OSM.sql index 7277d39..153520c 100644 --- a/bano/sql/charge_points_nommes_centroides_OSM.sql +++ b/bano/sql/charge_points_nommes_centroides_OSM.sql @@ -15,6 +15,7 @@ LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 WHERE insee_a8 = '__code_insee ON ST_Intersects(l.way, a9.geometrie) WHERE (l.highway != '' OR l.waterway = 'dam') AND + l.highway != 'bus_stop' AND l.name != '' UNION ALL SELECT ST_PointOnSurface(l.way), @@ -30,6 +31,7 @@ ON ST_Intersects(l.way, p.geometrie) LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 WHERE insee_a8 = '__code_insee__') a9 ON ST_Intersects(l.way, a9.geometrie) WHERE (l.highway||"ref:FR:FANTOIR" != '' OR l.landuse = 'residential' OR l.amenity = 'parking') AND + l.highway != 'bus_stop' AND l.name != '' UNION ALL SELECT l.way, @@ -108,6 +110,7 @@ FROM (SELECT pl.way point, ST_Intersects(pl.way, p.way) WHERE (pl."ref:FR:FANTOIR" != '' OR pl.junction != '') AND + pl.highway != 'bus_stop' AND pl.name != '' UNION SELECT ST_Centroid(pl.way), @@ -121,6 +124,7 @@ FROM (SELECT pl.way point, pl.landuse = 'residential' OR pl.place = 'square' OR pl.amenity = 'school') AND + pl.highway != 'bus_stop' AND pl.name != '')c LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 WHERE insee_a8 = '__code_insee__') a9 ON ST_Intersects(c.point, a9.geometrie)) From 1cc00700b90a52f4954258c4cc93bcd33c32abcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Fri, 4 Aug 2023 10:50:51 +0000 Subject: [PATCH 108/163] =?UTF-8?q?Suppression=20des=20noms=20issus=20des?= =?UTF-8?q?=20arr=C3=AAts=20de=20bus=20-=20platform=20(#363)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bano/sql/charge_points_nommes_centroides_OSM.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bano/sql/charge_points_nommes_centroides_OSM.sql b/bano/sql/charge_points_nommes_centroides_OSM.sql index 153520c..8ef2af6 100644 --- a/bano/sql/charge_points_nommes_centroides_OSM.sql +++ b/bano/sql/charge_points_nommes_centroides_OSM.sql @@ -15,7 +15,7 @@ LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 WHERE insee_a8 = '__code_insee ON ST_Intersects(l.way, a9.geometrie) WHERE (l.highway != '' OR l.waterway = 'dam') AND - l.highway != 'bus_stop' AND + l.highway NOT IN ('bus_stop','platform') AND l.name != '' UNION ALL SELECT ST_PointOnSurface(l.way), @@ -31,7 +31,7 @@ ON ST_Intersects(l.way, p.geometrie) LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 WHERE insee_a8 = '__code_insee__') a9 ON ST_Intersects(l.way, a9.geometrie) WHERE (l.highway||"ref:FR:FANTOIR" != '' OR l.landuse = 'residential' OR l.amenity = 'parking') AND - l.highway != 'bus_stop' AND + l.highway NOT IN ('bus_stop','platform') AND l.name != '' UNION ALL SELECT l.way, @@ -110,7 +110,7 @@ FROM (SELECT pl.way point, ST_Intersects(pl.way, p.way) WHERE (pl."ref:FR:FANTOIR" != '' OR pl.junction != '') AND - pl.highway != 'bus_stop' AND + pl.highway NOT IN ('bus_stop','platform') AND pl.name != '' UNION SELECT ST_Centroid(pl.way), @@ -124,7 +124,7 @@ FROM (SELECT pl.way point, pl.landuse = 'residential' OR pl.place = 'square' OR pl.amenity = 'school') AND - pl.highway != 'bus_stop' AND + pl.highway NOT IN ('bus_stop','platform') AND pl.name != '')c LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 WHERE insee_a8 = '__code_insee__') a9 ON ST_Intersects(c.point, a9.geometrie)) From 5126b068632121b39c085ef97c8b5bbc64ec3f4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Fri, 4 Aug 2023 13:06:16 +0000 Subject: [PATCH 109/163] Suppression des noms d'admin_centre pour les rapprochements (#364) --- bano/sql/charge_points_nommes_places_OSM.sql | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bano/sql/charge_points_nommes_places_OSM.sql b/bano/sql/charge_points_nommes_places_OSM.sql index 08a2bbb..d6d9932 100644 --- a/bano/sql/charge_points_nommes_places_OSM.sql +++ b/bano/sql/charge_points_nommes_places_OSM.sql @@ -12,8 +12,11 @@ FROM (SELECT way FROM planet_osm_polygon WHERE "ref:INSEE" = '__code_insee__' JOIN (SELECT * FROM planet_osm_point WHERE place != '' AND name != '') pt ON pt.way && p.way AND ST_Intersects(pt.way, p.way) +LEFT OUTER JOIN (SELECT osm_id FROM planet_osm_communes_statut WHERE "ref:INSEE" = '__code_insee__' AND member_role = 'admin_centre') admin_centre +ON pt.osm_id = admin_centre.osm_id LEFT OUTER JOIN (SELECT * FROM polygones_insee_a9 WHERE insee_a8 = '__code_insee__') a9 -ON ST_Intersects(pt.way, a9.geometrie)) +ON ST_Intersects(pt.way, a9.geometrie) +WHERE admin_centre.osm_id IS NULL) SELECT ST_x(way), ST_y(way), name, From e0d5970935da2de9e4eabc29fe606c2c7784eb1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 6 Aug 2023 22:11:01 +0000 Subject: [PATCH 110/163] +id_ban & fix correspondance fantoir BAN OSM --- bano/models.py | 9 ++++++++- bano/sql/charge_ban_commune.sql | 6 ++++-- bano/sql/create_table_base_bano_cibles.sql | 1 + 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/bano/models.py b/bano/models.py index fe871c0..5a4d4da 100644 --- a/bano/models.py +++ b/bano/models.py @@ -202,6 +202,7 @@ def __init__( code_postal=None, code_insee_ancienne_commune=None, nom_ancienne_commune=None, + id_ban=None ): self.code_insee = code_insee self.code_dept = hp.get_code_dept_from_insee(code_insee) @@ -222,6 +223,7 @@ def __init__( if self.code_insee_ancienne_commune else "RACINE" ) + self.id_ban = id_ban def __hash__(self): return hash( @@ -250,7 +252,7 @@ def _as_csv_format_bano(self, correspondance): fantoir = remplace_fantoir_ban(correspondance, self.niveau, self.fantoir) else: fantoir = self.fantoir - return f"{fantoir if fantoir else ''}\t{self.x}\t{self.y}\t{self.numero}\t{self.voie if self.voie else ''}\t{self.place if self.place else ''}\t{self.code_postal}\t{self.code_insee}\t{self.code_dept}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}" + return f"{fantoir if fantoir else ''}\t{self.x}\t{self.y}\t{self.numero}\t{self.voie if self.voie else ''}\t{self.place if self.place else ''}\t{self.code_postal}\t{self.code_insee}\t{self.code_dept}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}\t{self.id_ban if self.id_ban else ''}" def _as_string(self): return f"source : {self.source}, numero : {self.numero}, voie : {self.voie} ({self.voie_normalisee}), place : {self.place}, fantoir : {self.fantoir}, code_postal:{self.code_postal}, sous_commune : {self.code_insee_ancienne_commune} - {self.nom_ancienne_commune}" @@ -302,6 +304,7 @@ def charge_numeros_ban(self, topo): code_postal, code_insee_ancienne_commune, nom_ancienne_commune, + id_ban ) in data: if not (fantoir and fantoir in topo.topo): fantoir = None @@ -318,6 +321,7 @@ def charge_numeros_ban(self, topo): code_postal=code_postal, code_insee_ancienne_commune=code_insee_ancienne_commune, nom_ancienne_commune=nom_ancienne_commune, + id_ban=id_ban ) ) @@ -481,6 +485,7 @@ def enregistre(self, correspondance): "code_insee_ancienne_commune", "nom_ancienne_commune", "source", + "id_ban" ), ) @@ -739,6 +744,8 @@ def __init__(self, code_insee): def process(self, noms): niveaux = set() for n in noms: + if n.source not in ("BAN", "OSM") or not n.fantoir: + continue niveaux.add(n.niveau) if n.fantoir and n.source in ("BAN", "OSM"): if not n.niveau in self.dic_fantoir: diff --git a/bano/sql/charge_ban_commune.sql b/bano/sql/charge_ban_commune.sql index 3035ab7..aa5b31d 100644 --- a/bano/sql/charge_ban_commune.sql +++ b/bano/sql/charge_ban_commune.sql @@ -19,7 +19,8 @@ AS END as rep_bis, code_postal, code_insee_ancienne_commune, - nom_ancienne_commune + nom_ancienne_commune, + id FROM ban b LEFT OUTER JOIN rep_b_as_bis r USING (fantoir,numero) @@ -31,5 +32,6 @@ SELECT fantoir, lat, code_postal, code_insee_ancienne_commune, - nom_ancienne_commune + nom_ancienne_commune, + id FROM j; diff --git a/bano/sql/create_table_base_bano_cibles.sql b/bano/sql/create_table_base_bano_cibles.sql index 99aac26..9226db2 100644 --- a/bano/sql/create_table_base_bano_cibles.sql +++ b/bano/sql/create_table_base_bano_cibles.sql @@ -12,6 +12,7 @@ CREATE TABLE IF NOT EXISTS bano_adresses ( nom_ancienne_commune text, source text, certification_commune integer, + id_ban text, geometrie geometry (Point, 4326) GENERATED ALWAYS AS (ST_Point(lon,lat)) STORED); CREATE INDEX IF NOT EXISTS gidx_bano_adresses ON bano_adresses USING GIST(geometrie); From 0e5314a848c9a22928bb1ccd0e510576376d0356 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 7 Aug 2023 10:02:30 +0000 Subject: [PATCH 111/163] =?UTF-8?q?m=C3=A0j=20en=20continu=20des=20polygon?= =?UTF-8?q?es=20admin?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cron_osm.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cron_osm.sh b/cron_osm.sh index f1b7213..2002167 100755 --- a/cron_osm.sh +++ b/cron_osm.sh @@ -27,7 +27,8 @@ touch ${lockfile} osmosis --rri workingDirectory=${DOWNLOAD_DIR} --wxc ${DOWNLOAD_DIR}/changes.osc.gz imposm diff -config $SCRIPT_DIR/imposm.config -dbschema-production osm ${DOWNLOAD_DIR}/changes.osc.gz -#$SCRIPT_DIR/update_table_infos_communes.sh + +bano update_table_communes rm ${lockfile} From ef19731a5046ba8e2cba340eb89ef2ae8ca89701 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Wed, 9 Aug 2023 21:22:18 +0000 Subject: [PATCH 112/163] ajout du setuppython dans le cron --- cron_osm.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cron_osm.sh b/cron_osm.sh index 2002167..e4a9e64 100755 --- a/cron_osm.sh +++ b/cron_osm.sh @@ -2,7 +2,13 @@ set -e +source /data/project/bano_v3/venv_v3/bin/activate + SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" + +cd $SCRIPT_DIR +pip install -e . + source $SCRIPT_DIR/config lockfile=${SCRIPT_DIR}/imposm.lock @@ -13,7 +19,7 @@ echo debut >> $SCRIPT_DIR/cron.log if test -f ${lockfile} then diff_age=$((`date +%s` - `stat -c %Y $lockfile`)) - if [ $diff_age -gt 14400 ];then + if [ $diff_age -gt 7200 ];then echo "Effacement du lock" >> $SCRIPT_DIR/cron.log rm ${lockfile} else @@ -27,7 +33,6 @@ touch ${lockfile} osmosis --rri workingDirectory=${DOWNLOAD_DIR} --wxc ${DOWNLOAD_DIR}/changes.osc.gz imposm diff -config $SCRIPT_DIR/imposm.config -dbschema-production osm ${DOWNLOAD_DIR}/changes.osc.gz - bano update_table_communes rm ${lockfile} From 682eed2c21c76a5f3c7ee482a2a37fe6a9e7a73b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Thu, 10 Aug 2023 19:25:17 +0000 Subject: [PATCH 113/163] =?UTF-8?q?m=C3=A0j=20quotidienne=20des=20polygone?= =?UTF-8?q?s=20de=20communes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cron_bano.sh | 13 +++++++------ cron_osm.sh | 1 - 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cron_bano.sh b/cron_bano.sh index 75c9bec..d2d8e4c 100755 --- a/cron_bano.sh +++ b/cron_bano.sh @@ -20,18 +20,19 @@ bano update_bis_table echo 'sources ok' >> $SCRIPT_DIR/cron.log +# Mise à jour quotidienne dans la base cadastre des couches des polygones postaux d'OSM et des statuts admin de communes en vue des exports +bano update_table_communes + +# psql -d osm -U cadastre -f sql/create_table_polygones_postaux.sql +# psql -d cadastre -U cadastre -f sql/post_copie_ban.sql +echo 'màj polygones ok' >> $SCRIPT_DIR/cron.log + # BANO # bano update_insee_lists cat deplist.txt | parallel -j 4 export LANG=fr_FR.UTF-8\; bano rapprochement --dept {1} echo 'rapprochement ok' >> $SCRIPT_DIR/cron.log -# Mise à jour quotidienne dans la base cadastre des couches des polygones postaux d'OSM et des statuts admin de communes en vue des exports - -# psql -d osm -U cadastre -f sql/create_table_polygones_communes.sql -# psql -d osm -U cadastre -f sql/create_table_polygones_postaux.sql -# psql -d cadastre -U cadastre -f sql/post_copie_ban.sql - # exports # cat deplist.txt | parallel -j 4 bano export {1} diff --git a/cron_osm.sh b/cron_osm.sh index e4a9e64..f54ad62 100755 --- a/cron_osm.sh +++ b/cron_osm.sh @@ -33,7 +33,6 @@ touch ${lockfile} osmosis --rri workingDirectory=${DOWNLOAD_DIR} --wxc ${DOWNLOAD_DIR}/changes.osc.gz imposm diff -config $SCRIPT_DIR/imposm.config -dbschema-production osm ${DOWNLOAD_DIR}/changes.osc.gz -bano update_table_communes rm ${lockfile} From 0e4c5179f683f8cf80403594290b30bf0c909795 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sat, 7 Oct 2023 21:41:52 +0000 Subject: [PATCH 114/163] Abrev RTE RTE --- bano/dictionnaires/abrev_type_voie.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/bano/dictionnaires/abrev_type_voie.txt b/bano/dictionnaires/abrev_type_voie.txt index a5a82be..6078939 100644 --- a/bano/dictionnaires/abrev_type_voie.txt +++ b/bano/dictionnaires/abrev_type_voie.txt @@ -97,6 +97,7 @@ PETITE AVENUE PAE PETITE ROUTE PRT PETITE RUE PTR ROND POINT RPT +RTE RTE RTE RUE ALEZ ALEZ RUE HENT HENT RUE STRAED STRAED From 5d21e6839fd7094314c5984fffc241e28926ec13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 15 Oct 2023 20:07:30 +0000 Subject: [PATCH 115/163] Table de stats par commune --- bano/models.py | 31 ++++++++++++++++--- bano/rapprochement.py | 17 ++++++++++ bano/sql/create_table_base_bano_cibles.sql | 11 +++++++ .../update_table_bano_stats_communales.sql | 8 +++++ 4 files changed, 62 insertions(+), 5 deletions(-) create mode 100644 bano/sql/update_table_bano_stats_communales.sql diff --git a/bano/models.py b/bano/models.py index 5a4d4da..488d2df 100644 --- a/bano/models.py +++ b/bano/models.py @@ -187,6 +187,17 @@ def enregistre(self, correspondance): ), ) + def stats_sources(self): + par_source = {"BAN": set(), "OSM": set(), "CADASTRE": set()} + for t in self: + if t.fantoir: + par_source[t.source].add(t.fantoir) + return [ + len(par_source["BAN"]), + len(par_source["CADASTRE"]), + len(par_source["OSM"]), + ] + class Adresse: def __init__( @@ -202,7 +213,7 @@ def __init__( code_postal=None, code_insee_ancienne_commune=None, nom_ancienne_commune=None, - id_ban=None + id_ban=None, ): self.code_insee = code_insee self.code_dept = hp.get_code_dept_from_insee(code_insee) @@ -304,7 +315,7 @@ def charge_numeros_ban(self, topo): code_postal, code_insee_ancienne_commune, nom_ancienne_commune, - id_ban + id_ban, ) in data: if not (fantoir and fantoir in topo.topo): fantoir = None @@ -321,7 +332,7 @@ def charge_numeros_ban(self, topo): code_postal=code_postal, code_insee_ancienne_commune=code_insee_ancienne_commune, nom_ancienne_commune=nom_ancienne_commune, - id_ban=id_ban + id_ban=id_ban, ) ) @@ -485,10 +496,17 @@ def enregistre(self, correspondance): "code_insee_ancienne_commune", "nom_ancienne_commune", "source", - "id_ban" + "id_ban", ), ) + def stats_sources(self): + par_source = {"BAN": set(), "OSM": set()} + for t in self: + if t.fantoir: + par_source[t.source].add(f"{t.numero}{t.fantoir}") + return [len(par_source["BAN"]), len(par_source["OSM"])] + class Point_nomme: def __init__( @@ -707,6 +725,7 @@ class Topo: def __init__(self, code_insee): self.code_insee = code_insee self.topo = OrderedDict() + self.nb_fantoirs = 0 # self.code_fantoir9_vers_fantoir10 = {} # self.index_by_nom_normalise = defaultdict(list) @@ -732,8 +751,10 @@ def charge_topo(self): for fantoir, nom in data: nom = hp.normalize(" ".join(nom.replace("-", " ").split())) self.topo[fantoir] = nom - # self.code_fantoir9_vers_fantoir10[fantoir[0:9]] = fantoir self.topo[nom] = fantoir + if not "b" in fantoir: + self.nb_fantoirs += 1 + class Correspondance_fantoir_ban_osm: def __init__(self, code_insee): diff --git a/bano/rapprochement.py b/bano/rapprochement.py index ed5b7b2..cd75453 100644 --- a/bano/rapprochement.py +++ b/bano/rapprochement.py @@ -3,6 +3,7 @@ from . import db_helpers as h from . import batch as b +from .sql import sql_process from .models import Noms, Adresses, Topo, Points_nommes, Correspondance_fantoir_ban_osm from .sources import ban2topo @@ -38,6 +39,22 @@ def process_unitaire(code_insee): noms.enregistre(correspondance_fantoir_ban_osm.correspondance) points_nommes.enregistre(correspondance_fantoir_ban_osm.correspondance) + nb_addr_ban, nb_addr_osm = adresses.stats_sources() + nb_noms_ban, nb_noms_cadastre, nb_noms_osm = noms.stats_sources() + nb_noms_topo = topo.nb_fantoirs + sql_process( + "update_table_bano_stats_communales", + dict( + code_insee=code_insee, + nb_adresses_osm=str(nb_addr_osm), + nb_adresses_ban=str(nb_addr_ban), + nb_noms_osm=str(nb_noms_osm), + nb_noms_ban=str(nb_noms_ban), + nb_noms_topo=str(nb_noms_topo), + nb_noms_cadastre=str(nb_noms_cadastre), + ), + ) + b.batch_stop_log(id_batch, True) except: b.batch_stop_log(id_batch, False) diff --git a/bano/sql/create_table_base_bano_cibles.sql b/bano/sql/create_table_base_bano_cibles.sql index 9226db2..2239cd4 100644 --- a/bano/sql/create_table_base_bano_cibles.sql +++ b/bano/sql/create_table_base_bano_cibles.sql @@ -85,3 +85,14 @@ CREATE TABLE IF NOT EXISTS statut_numero ( CREATE INDEX IF NOT EXISTS idx_statut_numero_fantoir ON statut_numero (fantoir, numero); GRANT SELECT ON ALL TABLES IN SCHEMA public TO public; + +CREATE TABLE IF NOT EXISTS bano_stats_communales( + code_insee text, + nb_adresses_osm integer, + nb_adresses_ban integer, + nb_nom_osm integer, + nb_nom_ban integer, + nb_nom_cadastre integer, + nb_nom_topo integer); + +CREATE INDEX IF NOT EXISTS idx_bano_stats_communales_code_insee ON bano_stats_communales (code_insee); diff --git a/bano/sql/update_table_bano_stats_communales.sql b/bano/sql/update_table_bano_stats_communales.sql new file mode 100644 index 0000000..ad32815 --- /dev/null +++ b/bano/sql/update_table_bano_stats_communales.sql @@ -0,0 +1,8 @@ +DELETE FROM bano_stats_communales WHERE code_insee = '__code_insee__'; +INSERT INTO bano_stats_communales VALUES ('__code_insee__', + __nb_adresses_osm__, + __nb_adresses_ban__, + __nb_noms_osm__, + __nb_noms_ban__, + __nb_noms_cadastre__, + __nb_noms_topo__); \ No newline at end of file From 294f7da1713d9fa18c5208e5976877f6ceeaec5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Wed, 18 Oct 2023 17:06:42 +0000 Subject: [PATCH 116/163] complement de code Fantoir pour les noms en privilegiant OSM --- bano/models.py | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/bano/models.py b/bano/models.py index 488d2df..32fa9b5 100644 --- a/bano/models.py +++ b/bano/models.py @@ -145,20 +145,26 @@ def add_fantoir(self, topo): t.add_fantoir(topo) def remplit_fantoir_par_nom_sous_commune(self): - for t in self.triplets_nom_fantoir_source: - if t.code_insee_ancienne_commune: - if ( - not t.code_insee_ancienne_commune - in self.fantoir_par_nom_sous_commune - ): - self.fantoir_par_nom_sous_commune[ - t.code_insee_ancienne_commune - ] = {} - self.fantoir_par_nom_sous_commune[t.code_insee_ancienne_commune][ - t.nom - ] = t.fantoir - else: - self.fantoir_par_nom_sous_commune[t.nom] = t.fantoir + # privilège pour la source OSM + for source in ['OSM','BAN','CADASTRE']: + for t in self.triplets_nom_fantoir_source: + if t.source != source: + continue + if t.code_insee_ancienne_commune: + if ( + not t.code_insee_ancienne_commune + in self.fantoir_par_nom_sous_commune + ): + self.fantoir_par_nom_sous_commune[ + t.code_insee_ancienne_commune + ] = {} + if not t.nom in self.fantoir_par_nom_sous_commune[t.code_insee_ancienne_commune]: + self.fantoir_par_nom_sous_commune[t.code_insee_ancienne_commune][ + t.nom + ] = t.fantoir + else: + if not t.nom in self.fantoir_par_nom_sous_commune: + self.fantoir_par_nom_sous_commune[t.nom] = t.fantoir def enregistre(self, correspondance): sql_process( From 672c21ee607439aa8c78ecb0dcc8a33df2d2b381 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Thu, 19 Oct 2023 21:13:29 +0000 Subject: [PATCH 117/163] centroides en Web Mercator pour le rendu Pifometre --- bano/sql/create_table_polygones_communes.sql | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bano/sql/create_table_polygones_communes.sql b/bano/sql/create_table_polygones_communes.sql index a56db84..c5b48cb 100644 --- a/bano/sql/create_table_polygones_communes.sql +++ b/bano/sql/create_table_polygones_communes.sql @@ -2,8 +2,10 @@ CREATE TABLE IF NOT EXISTS polygones_insee ( geometrie geometry (Geometry, 4326), code_insee character(5), nom text, - admin_level integer); + admin_level integer, + geom_centroide_3857 geometry(Point,3857) GENERATED ALWAYS AS (ST_Transform(ST_Pointonsurface(geometrie),3857)) STORED); CREATE INDEX IF NOT EXISTS gidx_polygones_insee ON polygones_insee USING GIST (geometrie); +CREATE INDEX IF NOT EXISTS gidx_polygones_insee_centroide_3857 ON polygones_insee USING GIST (geom_centroide_3857); CREATE INDEX IF NOT EXISTS idx_polygones_insee_code_insee ON polygones_insee(code_insee); TRUNCATE TABLE polygones_insee; From 3432f8935e3e59f951d9f0d8cc362b7f64859c25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Wed, 25 Oct 2023 14:20:33 +0000 Subject: [PATCH 118/163] Table de stats par commune - date de maj pour cumul --- bano/sql/create_table_base_bano_cibles.sql | 3 ++- .../update_table_bano_stats_communales.sql | 22 ++++++++++++------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/bano/sql/create_table_base_bano_cibles.sql b/bano/sql/create_table_base_bano_cibles.sql index 2239cd4..57320a3 100644 --- a/bano/sql/create_table_base_bano_cibles.sql +++ b/bano/sql/create_table_base_bano_cibles.sql @@ -93,6 +93,7 @@ CREATE TABLE IF NOT EXISTS bano_stats_communales( nb_nom_osm integer, nb_nom_ban integer, nb_nom_cadastre integer, - nb_nom_topo integer); + nb_nom_topo integer, + maj timestamp DEFAULT now()); CREATE INDEX IF NOT EXISTS idx_bano_stats_communales_code_insee ON bano_stats_communales (code_insee); diff --git a/bano/sql/update_table_bano_stats_communales.sql b/bano/sql/update_table_bano_stats_communales.sql index ad32815..0551c51 100644 --- a/bano/sql/update_table_bano_stats_communales.sql +++ b/bano/sql/update_table_bano_stats_communales.sql @@ -1,8 +1,14 @@ -DELETE FROM bano_stats_communales WHERE code_insee = '__code_insee__'; -INSERT INTO bano_stats_communales VALUES ('__code_insee__', - __nb_adresses_osm__, - __nb_adresses_ban__, - __nb_noms_osm__, - __nb_noms_ban__, - __nb_noms_cadastre__, - __nb_noms_topo__); \ No newline at end of file +INSERT INTO bano_stats_communales (code_insee, + nb_adresses_osm, + nb_adresses_ban, + nb_nom_osm, + nb_nom_ban, + nb_nom_cadastre, + nb_nom_topo) + VALUES ('__code_insee__', + __nb_adresses_osm__, + __nb_adresses_ban__, + __nb_noms_osm__, + __nb_noms_ban__, + __nb_noms_cadastre__, + __nb_noms_topo__); From beac888a688b3cf0f1f4104319286edbeaf1070a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Wed, 25 Oct 2023 21:10:29 +0000 Subject: [PATCH 119/163] Table de stats par commune - historisation --- bano/sql/create_table_base_bano_cibles.sql | 12 ++++++++++++ bano/sql/update_table_bano_stats_communales.sql | 7 +++++++ 2 files changed, 19 insertions(+) diff --git a/bano/sql/create_table_base_bano_cibles.sql b/bano/sql/create_table_base_bano_cibles.sql index 57320a3..4d07096 100644 --- a/bano/sql/create_table_base_bano_cibles.sql +++ b/bano/sql/create_table_base_bano_cibles.sql @@ -97,3 +97,15 @@ CREATE TABLE IF NOT EXISTS bano_stats_communales( maj timestamp DEFAULT now()); CREATE INDEX IF NOT EXISTS idx_bano_stats_communales_code_insee ON bano_stats_communales (code_insee); + +CREATE TABLE IF NOT EXISTS bano_stats_communales_cumul( + code_insee text, + nb_adresses_osm integer, + nb_adresses_ban integer, + nb_nom_osm integer, + nb_nom_ban integer, + nb_nom_cadastre integer, + nb_nom_topo integer, + maj timestamp); + +CREATE INDEX IF NOT EXISTS idx_bano_stats_communales_cumul_code_insee ON bano_stats_communales_cumul (code_insee); diff --git a/bano/sql/update_table_bano_stats_communales.sql b/bano/sql/update_table_bano_stats_communales.sql index 0551c51..126b424 100644 --- a/bano/sql/update_table_bano_stats_communales.sql +++ b/bano/sql/update_table_bano_stats_communales.sql @@ -1,3 +1,10 @@ +INSERT INTO bano_stats_communales_cumul +SELECT * +FROM bano_stats_communales +WHERE code_insee = '__code_insee__'; +DELETE +FROM bano_stats_communales +WHERE code_insee = '__code_insee__'; INSERT INTO bano_stats_communales (code_insee, nb_adresses_osm, nb_adresses_ban, From c51c5fc206dc41edc839e15bb15d524b9a061ca5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 5 Nov 2023 19:32:09 +0000 Subject: [PATCH 120/163] Nouvelle structure du CSV BAN --- bano/sql/create_table_base_bano_sources.sql | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bano/sql/create_table_base_bano_sources.sql b/bano/sql/create_table_base_bano_sources.sql index 5d99fd1..b01e204 100644 --- a/bano/sql/create_table_base_bano_sources.sql +++ b/bano/sql/create_table_base_bano_sources.sql @@ -16,6 +16,9 @@ CREATE INDEX IF NOT EXISTS idx_topo_fantoir ON topo(fantoir); CREATE TABLE IF NOT EXISTS ban ( id text, + id_ban_adresse text, + id_ban_toponyme text, + id_ban_district text, id_fantoir text, fantoir text GENERATED ALWAYS AS (substr(id_fantoir,0,6)||substr(id_fantoir,7,10)) STORED, numero text, From 51910a7d0bdb9caa0a1d8a866a6a1a64e563f34c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Wed, 15 Nov 2023 22:22:48 +0000 Subject: [PATCH 121/163] Rapprochement KO sur les numeros OSM si nom different (#366) --- bano/models.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bano/models.py b/bano/models.py index 32fa9b5..67fa84e 100644 --- a/bano/models.py +++ b/bano/models.py @@ -80,10 +80,10 @@ def __init__(self, code_insee): def __iter__(self): return iter(self.triplets_nom_fantoir_source) - def _print(self, pattern=None): + def _print(self, correspondance, pattern=None): for a in self: - if not pattern or pattern in a._as_csv_format_bano(): - print(a._as_csv_format_bano()) + if not pattern or pattern in a._as_csv_format_bano(correspondance): + print(a._as_csv_format_bano(correspondance)) # On ne charge pas les noms des numeros OSM. Ils sont ajoutés via Adresses.nom_des_adresses def charge_noms_osm_hors_numeros(self): @@ -148,7 +148,7 @@ def remplit_fantoir_par_nom_sous_commune(self): # privilège pour la source OSM for source in ['OSM','BAN','CADASTRE']: for t in self.triplets_nom_fantoir_source: - if t.source != source: + if t.source != source or not t.fantoir: continue if t.code_insee_ancienne_commune: if ( From 01116821ab91f6d950b0897f46daeb7b940405f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 19 Nov 2023 23:30:23 +0000 Subject: [PATCH 122/163] COG 2023 --- bano/sources/cog.py | 98 ++++++++++++++++++++++----------------------- 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/bano/sources/cog.py b/bano/sources/cog.py index 5a428d1..b2ff40f 100644 --- a/bano/sources/cog.py +++ b/bano/sources/cog.py @@ -1,76 +1,76 @@ -# import csv -from zipfile import ZipFile import os -# import subprocess -# from datetime import datetime + from email.utils import formatdate, parsedate_to_datetime from pathlib import Path import requests -# import psycopg2 from ..db import bano_db -from ..sql import sql_process from .. import batch as b +DICT_COG = { + "commune": [ + "cog_commune", + "https://www.data.gouv.fr/fr/datasets/r/5d5c4384-b19a-4ddf-910b-6e9cbaa8e1d6", + ], + "canton": [ + "cog_canton", + "https://www.data.gouv.fr/fr/datasets/r/716922e8-2ab7-4806-94eb-21933c266ebf", + ], + "departement": [ + "cog_departement", + "https://www.data.gouv.fr/fr/datasets/r/c691a0b0-6e22-4d6f-9496-3406b017d709", + ], + "region": [ + "cog_region", + "https://www.data.gouv.fr/fr/datasets/r/6e40e6f3-7735-43a9-8f20-ce0ae53f74f7", + ], +} + + def process_cog(**kwargs): - zip = get_destination('cog.zip') - status = download(zip) - if status: - import_to_pg(zip) + for k,v in DICT_COG.items(): + print(f"Chargement de la source COG {k}") + table,url = v + csv = get_destination(f"{k}.csv") + status = download(csv,url) + if status: + import_to_pg(csv,table) -def download(destination): + +def download(destination,url): headers = {} if destination.exists(): - headers['If-Modified-Since'] = formatdate(destination.stat().st_mtime) + headers["If-Modified-Since"] = formatdate(destination.stat().st_mtime) - resp = requests.get(get_COG_URL(), headers=headers) - id_batch = b.batch_start_log('download source', 'COG ZIP','France') + resp = requests.get(url, headers=headers) + id_batch = b.batch_start_log("download source", f"COG {destination}", "France") if resp.status_code == 200: - with destination.open('wb') as f: + with destination.open("wb") as f: f.write(resp.content) - # mtime = parsedate_to_datetime(resp.headers['Last-Modified']).timestamp() - # os.utime(destination, (mtime, mtime)) - b.batch_stop_log(id_batch,True) + b.batch_stop_log(id_batch, True) return True print(resp.status_code) - b.batch_stop_log(id_batch,False) + b.batch_stop_log(id_batch, False) return False -def import_to_pg(fichier_zip): - table = 'cog_commune' - id_batch = b.batch_start_log('import source', f'COG {table}','France') - with ZipFile(fichier_zip) as f: - with f.open(get_COG_CSV()) as csv: - csv.readline() # skip CSV headers - with bano_db.cursor() as cur_insert: - try: - cur_insert.execute(f"TRUNCATE {table}") - cur_insert.copy_from(csv,table, sep=',', null='') - b.batch_stop_log(id_batch,True) - except psycopg2.DataError as e: - b.batch_stop_log(id_batch,False) - +def import_to_pg(csv,table): + id_batch = b.batch_start_log("import source", f"COG {table}", "France") + with open(csv) as f: + with bano_db.cursor() as cur_insert: + try: + cur_insert.execute(f"TRUNCATE {table}") + cur_insert.copy_expert(f"COPY {table} FROM STDIN WITH CSV HEADER",f) + b.batch_stop_log(id_batch, True) + except psycopg2.DataError as e: + b.batch_stop_log(id_batch, False) + def get_destination(fichier_cog): try: - cwd = Path(os.environ['COG_DIR']) + cwd = Path(os.environ["COG_DIR"]) except KeyError: raise ValueError(f"La variable COG_DIR n'est pas définie") if not cwd.exists(): raise ValueError(f"Le répertoire {cwd} n'existe pas") - return cwd / f'{fichier_cog}' - -def get_COG_URL(): - try: - url = os.environ['COG_URL'] - except KeyError: - raise ValueError(f"La variable COG_URL n'est pas définie") - return url - -def get_COG_CSV(): - try: - csv = os.environ['COG_CSV_COMMUNE'] - except KeyError: - raise ValueError(f"La variable COG_CSV_COMMUNE n'est pas définie") - return csv + return cwd / f"{fichier_cog}" From 8c1d7e05a40fa4490812f12103d2e92432daa340 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 20 Nov 2023 22:41:12 +0000 Subject: [PATCH 123/163] COG 2023 --- bano/sources/cog.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bano/sources/cog.py b/bano/sources/cog.py index b2ff40f..b9b3bce 100644 --- a/bano/sources/cog.py +++ b/bano/sources/cog.py @@ -17,6 +17,10 @@ "cog_canton", "https://www.data.gouv.fr/fr/datasets/r/716922e8-2ab7-4806-94eb-21933c266ebf", ], + "arrondissement": [ + "cog_arrondissement", + "https://www.data.gouv.fr/fr/datasets/r/9fdf41d6-2452-4f7c-a1e2-a7b104b38b77", + ], "departement": [ "cog_departement", "https://www.data.gouv.fr/fr/datasets/r/c691a0b0-6e22-4d6f-9496-3406b017d709", From 66dd2ab8feacded01df4517350152994cd4e308c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 20 Nov 2023 23:40:05 +0000 Subject: [PATCH 124/163] table infos_communes pour les exports JSON --- bano/bin.py | 7 +++ bano/boite_a_outils.py | 8 +++ bano/sql/create_table_base_bano_cibles.sql | 16 ++++++ bano/sql/update_table_infos_communes.sql | 66 ++++++++++++++++++++++ cron_bano.sh | 2 +- 5 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 bano/sql/update_table_infos_communes.sql diff --git a/bano/bin.py b/bano/bin.py index c4c898b..46531e1 100644 --- a/bano/bin.py +++ b/bano/bin.py @@ -119,6 +119,13 @@ def main(): "--code_insee", type=str, help="Code INSEE de la commune à traiter" ) subparser.set_defaults(func=ban2topo.process) + + subparser = subparsers.add_parser( + "update_infos_communes", + help="Màj de la table infos_communes pour les exports JSON", + ) + subparser.set_defaults(func=boite_a_outils.update_infos_communes) + args = parser.parse_args() try: diff --git a/bano/boite_a_outils.py b/bano/boite_a_outils.py index 6234900..e1a4b9e 100755 --- a/bano/boite_a_outils.py +++ b/bano/boite_a_outils.py @@ -12,3 +12,11 @@ def maj_table_communes(**kwargs): b.batch_stop_log(batch_id, True) except: b.batch_stop_log(batch_id, False) + +def update_infos_communes(**kwargs): + batch_id = b.batch_start_log("maj_table_infos_communes", "France", "France") + try: + sql_process("update_table_infos_communes", dict()) + b.batch_stop_log(batch_id, True) + except: + b.batch_stop_log(batch_id, False) diff --git a/bano/sql/create_table_base_bano_cibles.sql b/bano/sql/create_table_base_bano_cibles.sql index 4d07096..42f3b60 100644 --- a/bano/sql/create_table_base_bano_cibles.sql +++ b/bano/sql/create_table_base_bano_cibles.sql @@ -109,3 +109,19 @@ CREATE TABLE IF NOT EXISTS bano_stats_communales_cumul( maj timestamp); CREATE INDEX IF NOT EXISTS idx_bano_stats_communales_cumul_code_insee ON bano_stats_communales_cumul (code_insee); + +CREATE TABLE IF NOT EXISTS infos_communes ( + dep character varying(3), + code_insee character(5), + name text, + adm_weight integer, + population integer, + population_milliers numeric, + type text, + lon numeric, + lat numeric, + geometrie geometry(Point, 4326) +); + +CREATE INDEX IF NOT EXISTS idx_infos_communes_insee ON infos_communes(code_insee); +CREATE INDEX IF NOT EXISTS gidx_infos_communes ON infos_communes USING GIST(geometrie); diff --git a/bano/sql/update_table_infos_communes.sql b/bano/sql/update_table_infos_communes.sql new file mode 100644 index 0000000..0ec5cfd --- /dev/null +++ b/bano/sql/update_table_infos_communes.sql @@ -0,0 +1,66 @@ +CREATE TEMP TABLE tmp_infos_communes +AS +WITH +statut +AS +(SELECT com code_insee, 1 AS statut FROM cog_commune +UNION ALL +SELECT burcentral, 2 AS statut FROM cog_canton +UNION ALL +SELECT cheflieu, 3 AS statut FROM cog_arrondissement +UNION ALL +SELECT cheflieu, 4 AS statut FROM cog_departement +UNION ALL +SELECT cheflieu, 5 AS statut FROM cog_region), +adm_weight +AS +(SELECT code_insee, max(statut) AS adm_weight +FROM statut +GROUP BY 1), +pop +AS +(SELECT osm_id, + name, + "ref:INSEE" code_insee, + COALESCE(population_rel,population_member,0) AS population, + RANK() OVER(PARTITION BY "ref:INSEE" ORDER BY admin_level) rang +FROM planet_osm_communes_statut +WHERE admin_level in (8,9) AND + boundary = 'administrative' AND + member_role = 'admin_centre'), +pp +AS +(SELECT osm_id, + ROUND(ST_X(way)::numeric,6) lon, + ROUND(ST_Y(way)::numeric,6) lat, + way AS geometrie +FROM planet_osm_point pp +WHERE place != '') +SELECT cc.dep, + adm_weight.code_insee, + pop.name, + adm_weight.adm_weight, + pop.population, + ROUND((pop.population::numeric/1000),1) AS population_milliers, + CASE + WHEN pop.population < 1000 THEN 'village' + WHEN pop.population < 10000 THEN 'town' + ELSE 'city' + END AS type, + pp.lon, + pp.lat, + pp.geometrie +FROM adm_weight +JOIN cog_commune cc +ON cc.com = code_insee +LEFT OUTER JOIN pop +USING (code_insee) +JOIN pp +USING (osm_id) +WHERE pop.rang = 1 AND + cc.typecom != 'COMD'; + +TRUNCATE TABLE infos_communes; +INSERT INTO infos_communes +SELECT * +FROM tmp_infos_communes; \ No newline at end of file diff --git a/cron_bano.sh b/cron_bano.sh index d2d8e4c..14a3ec5 100755 --- a/cron_bano.sh +++ b/cron_bano.sh @@ -22,13 +22,13 @@ echo 'sources ok' >> $SCRIPT_DIR/cron.log # Mise à jour quotidienne dans la base cadastre des couches des polygones postaux d'OSM et des statuts admin de communes en vue des exports bano update_table_communes +bano update_infos_communes # psql -d osm -U cadastre -f sql/create_table_polygones_postaux.sql # psql -d cadastre -U cadastre -f sql/post_copie_ban.sql echo 'màj polygones ok' >> $SCRIPT_DIR/cron.log # BANO -# bano update_insee_lists cat deplist.txt | parallel -j 4 export LANG=fr_FR.UTF-8\; bano rapprochement --dept {1} echo 'rapprochement ok' >> $SCRIPT_DIR/cron.log From 25e0e8a82c856e7fe2fcea374053b41caf6dc054 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Tue, 21 Nov 2023 23:17:34 +0000 Subject: [PATCH 125/163] Table des CPs pour les exports JSON --- bano/bin.py | 8 ++- bano/sources/datagouv_cp.py | 65 +++++++++++++++++++++ bano/sql/create_table_base_bano_sources.sql | 16 ++++- 3 files changed, 85 insertions(+), 4 deletions(-) create mode 100644 bano/sources/datagouv_cp.py diff --git a/bano/bin.py b/bano/bin.py index 46531e1..9a1c4b8 100644 --- a/bano/bin.py +++ b/bano/bin.py @@ -8,7 +8,7 @@ from . import setup_db from . import rapprochement from . import boite_a_outils -from .sources import topo, ban, cog, cadastre_ld, ban2topo, datagouv_commune_summary as datagouv_cs +from .sources import topo, ban, cog, cadastre_ld, ban2topo, datagouv_commune_summary as datagouv_cs,datagouv_cp from .constants import DEPARTEMENTS @@ -36,6 +36,12 @@ def main(): ) subparser.set_defaults(func=cog.process_cog) + subparser = subparsers.add_parser( + "charge_cp", + help="Charge une version du fichier Codes Postaux", + ) + subparser.set_defaults(func=datagouv_cp.process) + subparser = subparsers.add_parser( "charge_ban", help="Charge une version des fichiers BAN", diff --git a/bano/sources/datagouv_cp.py b/bano/sources/datagouv_cp.py new file mode 100644 index 0000000..e3096c0 --- /dev/null +++ b/bano/sources/datagouv_cp.py @@ -0,0 +1,65 @@ +import os + +from email.utils import formatdate, parsedate_to_datetime +from pathlib import Path + +import requests + +from ..db import bano_db +from .. import batch as b + +DICT_SOURCES = { + "codes_postaux": [ + "codes_postaux", + "https://www.data.gouv.fr/fr/datasets/r/5ed9b092-a25d-49e7-bdae-0152797c7577", + ], +} + + +def process(**kwargs): + for k,v in DICT_SOURCES.items(): + print(f"Chargement de la source {k}") + table,url = v + csv = get_destination(f"{k}.csv") + status = download(csv,url) + if status: + import_to_pg(csv,table) + + +def download(destination,url): + headers = {} + if destination.exists(): + headers["If-Modified-Since"] = formatdate(destination.stat().st_mtime) + + resp = requests.get(url, headers=headers) + id_batch = b.batch_start_log("download source", destination, "France") + if resp.status_code == 200: + with destination.open("w") as f: + f.write(resp.text) + b.batch_stop_log(id_batch, True) + return True + print(resp.status_code) + b.batch_stop_log(id_batch, False) + return False + + +def import_to_pg(csv,table): + id_batch = b.batch_start_log("import source", table, "France") + with open(csv) as f: + f.readline() + with bano_db.cursor() as cur_insert: + try: + cur_insert.execute(f"TRUNCATE {table}") + cur_insert.copy_expert(f"COPY {table} FROM STDIN WITH CSV HEADER DELIMITER ';'",f) + b.batch_stop_log(id_batch, True) + except psycopg2.DataError as e: + b.batch_stop_log(id_batch, False) + +def get_destination(fichier): + try: + cwd = Path(os.environ["DATA_DIR"]) + except KeyError: + raise ValueError(f"La variable DATA_DIR n'est pas définie") + if not cwd.exists(): + raise ValueError(f"Le répertoire {cwd} n'existe pas") + return cwd / f"{fichier}" diff --git a/bano/sql/create_table_base_bano_sources.sql b/bano/sql/create_table_base_bano_sources.sql index b01e204..32cdabf 100644 --- a/bano/sql/create_table_base_bano_sources.sql +++ b/bano/sql/create_table_base_bano_sources.sql @@ -16,9 +16,9 @@ CREATE INDEX IF NOT EXISTS idx_topo_fantoir ON topo(fantoir); CREATE TABLE IF NOT EXISTS ban ( id text, - id_ban_adresse text, - id_ban_toponyme text, - id_ban_district text, + -- id_ban_adresse text, + -- id_ban_toponyme text, + -- id_ban_district text, id_fantoir text, fantoir text GENERATED ALWAYS AS (substr(id_fantoir,0,6)||substr(id_fantoir,7,10)) STORED, numero text, @@ -87,4 +87,14 @@ CREATE TABLE IF NOT EXISTS communes_summary ( CREATE INDEX IF NOT EXISTS communes_summary_code_insee ON communes_summary (code_insee); +CREATE TABLE IF NOT EXISTS codes_postaux ( + insee text, + commune text, + cp text, + libelle text, + ligne_5 text); + +CREATE INDEX IF NOT EXISTS idx_codes_postaux_cp ON codes_postaux (cp); +CREATE INDEX IF NOT EXISTS idx_codes_postaux_insee ON codes_postaux (insee); + GRANT SELECT ON ALL TABLES IN SCHEMA public TO public; \ No newline at end of file From 5053d48df67a3fc953eeeba7479caeabf9e3e80d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Tue, 21 Nov 2023 23:24:50 +0000 Subject: [PATCH 126/163] Communes pour les exports JSON --- bano/sql/export_json_dept_communes.sql | 33 ++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 bano/sql/export_json_dept_communes.sql diff --git a/bano/sql/export_json_dept_communes.sql b/bano/sql/export_json_dept_communes.sql new file mode 100644 index 0000000..706b24c --- /dev/null +++ b/bano/sql/export_json_dept_communes.sql @@ -0,0 +1,33 @@ +WITH +rang_cp +AS +(SELECT *, + RANK() OVER(PARTITION BY insee ORDER BY ligne_5,cp) AS rang +FROM codes_postaux), +cp +AS +(SELECT insee AS code_insee, + cp +FROM rang_cp +WHERE rang = 1) +SELECT DISTINCT code_insee AS id, -- id + i.type, -- type + i.name, -- name + cp.cp, -- postcode + round(lat,6) AS lat, -- lat + round(lon,6) AS lon, -- lon + i.name , -- city + cd.libelle, -- departement + cr.libelle, -- region, + population, + adm_weight, + GREATEST(0.075,ROUND(LOG((adm_weight)+LOG(population+1)/3)::decimal,4)) AS importance +FROM infos_communes i +JOIN cp +USING (code_insee) +JOIN cog_departement cd +USING (dep) +JOIN cog_region cr +USING (reg) +WHERE i.dep = '__dept__' +ORDER BY code_insee; \ No newline at end of file From d1a61def303cb726506b40e144fdce4c18d0db34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sat, 25 Nov 2023 21:42:18 +0000 Subject: [PATCH 127/163] =?UTF-8?q?Point=20d'entr=C3=A9e=20des=20exports?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bano/bin.py | 15 +++++ bano/export.py | 160 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 175 insertions(+) create mode 100644 bano/export.py diff --git a/bano/bin.py b/bano/bin.py index 9a1c4b8..f1a551b 100644 --- a/bano/bin.py +++ b/bano/bin.py @@ -8,6 +8,7 @@ from . import setup_db from . import rapprochement from . import boite_a_outils +from . import export from .sources import topo, ban, cog, cadastre_ld, ban2topo, datagouv_commune_summary as datagouv_cs,datagouv_cp from .constants import DEPARTEMENTS @@ -132,6 +133,20 @@ def main(): ) subparser.set_defaults(func=boite_a_outils.update_infos_communes) + subparser = subparsers.add_parser( + "export", + help="Export par département dans différents formats", + description="Export par département dans différents formats", + ) + subparser.add_argument( + "departements", + type=str, + help="Départements à traiter", + nargs="*", + default=DEPARTEMENTS, + ) + subparser.set_defaults(func=export.process) + args = parser.parse_args() try: diff --git a/bano/export.py b/bano/export.py new file mode 100644 index 0000000..f594f32 --- /dev/null +++ b/bano/export.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import csv +import json +import os +import sys +import subprocess + +from pathlib import Path + +from .sql import sql_get_data,sql_get_dict_data +from . import constants +from . import helpers as hp + +class Dataset: + def __init__(self, dept): + self.dept = dept + self.csv_data = None + self.json_commune_data = None + self.json_voies_rapprochees_data = None + self.json_voies_rapprochees_sans_adresses_data = None + self.json_voies_non_rapprochees_data = None + self.json_lieux_dits_data = None + + def get_csv_data(self): + self.csv_data = sql_get_data('export_csv_dept',dict(dept=self.dept)) + + def get_json_commune_data(self): + self.json_commune_data = sql_get_dict_data('export_json_dept_communes',dict(dept=self.dept)) + # print(json.dumps(self.json_commune_data)) + + def get_json_voies_non_rapprochees_data(self): + return sql_get_data('export_json_dept_voies_non_rapprochees',dict(dept=self.dept)) + + def get_json_voies_rapprochees_data(self): + return sql_get_data('export_json_dept_voies_rapprochees',dict(dept=self.dept)) + + def get_json_voies_rapprochees_sans_adresses_data(self): + return sql_get_data('export_json_dept_voies_rapprochees_sans_adresses',dict(dept=self.dept)) + + def get_json_lieux_dits_data(self): + return sql_get_data('export_json_dept_lieux_dits',dict(dept=self.dept)) + + def get_target_filename(self,filetype): + return f'bano-{self.dept}.{filetype}' + + def get_sas_full_filename(self,filetype): + return Path(os.environ['EXPORT_SAS_DIR']) / self.get_target_filename(filetype) + + def get_webdir_full_filename(self,filetype): + return Path(os.environ['EXPORT_WEB_DIR']) / self.get_target_filename(filetype) + + def save_as_csv(self): + if not self.csv_data : + self.csv_data = self.get_csv_data() + with open(self.get_sas_full_filename('csv'),'w', newline='') as csvfile: + writer = csv.writer(csvfile,dialect='unix',quoting=csv.QUOTE_MINIMAL) + writer.writerows([l[0:-1] for l in self.csv_data]) + + def save_as_ttl(self): + if not self.csv_data : + self.csv_data = self.get_csv_data() + with open(self.get_sas_full_filename('ttl'),'w') as ttlfile: + ttlfile.write(f"""@prefix xsd: . +@prefix locn: . +@prefix gn: . +@prefix prov: . +@prefix gsp: . +@prefix geo: . +@prefix rdfs: . +@prefix dcat: . +@prefix foaf: . +@prefix dcterms: . + + a dcat:Catalog ; +\tdcterms:title "Donnees des adresses du projet BANO (Base Adresse Nationale Ouverte) en RDF"@fr ; +\tdcterms:description "Le projet BANO en RDF de Base d\'Adresses Nationale Ouverte initie par OpenStreetMap France."@fr ; +\tfoaf:homepage ; +\tdcterms:language "fr" ; +\tdcterms:license ; +\tdcterms:publisher ; #url openstreetmap France +\tdcterms:issued "2014-05-14"^^xsd:date ; # data issued +\tdcterms:modified "2014-08-21"^^xsd:date ; #last modification +\tdcterms:spatial , ; # region/pays (France) +\t. +""") + for id,numero,voie,cp,ville,source,lat,lon,*others in self.csv_data: + ttlfile.write(f""" a locn:Address , gn:Feature ; +locn:fullAddress "{numero} {voie}, {cp} {ville}, FRANCE"; +locn:addressId "{id}" ; +locn:locatorDesignator "{numero}" ; +locn:thoroughfare "{voie}"@fr ; +locn:postalCode "{cp}" ; +locn:locatorName "{ville}"@fr ; +locn:adminUnitL1 "FR" ;""") +# traitement des arrondissements municipaux de Paris, Lyon, Marseille + if id[0:2] in '13 69 75' and (int(id[0:5]) in range(13201, 13217) or int(id[0:5]) in range(69381, 69370) or int(id[0:5]) in range(75101, 75121)): + ttlfile.write(f"locn:location ;") + else: + ttlfile.write(f"locn:location ;") + ttlfile.write(f"""locn:geometry ; +locn:geometry [a geo:Point ; geo:lat "{lat}" ; geo:long "{lon}" ] ; +locn:geometry [a gsp:Geometry; gsp:asWKT "POINT({lon} {lat})"^^gsp:wktLiteral ] ; +.""") + + + def save_as_shp(self): + subprocess.run(['ogr2ogr', '-f',"ESRI Shapefile", '-lco', 'ENCODING=UTF-8', '-s_srs', 'EPSG:4326', '-t_srs', 'EPSG:4326', '-overwrite', self.get_sas_full_filename('shp'), 'PG:dbname=cadastre user=cadastre', '-sql', f'{self.csv_query}']) + + def save_as_json(self): + with open(self.get_sas_full_filename('json'),'w') as jsonfile: + # if not self.json_commune_data : + # self.json_commune_data = se + for l in self.json_commune_data: + jsonfile.write(f"{json.dumps(l,ensure_ascii=False,separators=(',',':'))}\n") + # print(json.dumps(l)) + # for id,type,name,postcode,lat,lon,cityname,departement,region,population,adm_weight,importance,*others in self.json_commune_data: + # if ';' in postcode: + # print(postcode) + # postcode = postcode.split(';') + # jsonfile.write(f'{{"id":"{id}","type":"{type}", "name":"{name}", "postcode":{json.dumps(postcode)}, "lat":{lat}, "lon":{lon}, "city":"{cityname}", "departement":"{departement}", "region":"{region}", "population":{population}, "adm_weight":{adm_weight}, "importance":{importance}}}\n') + # if not self.json_voies_non_rapprochees_data : + # self.json_voies_non_rapprochees_data = self.get_json_voies_non_rapprochees_data() + # for fantoir,citycode,type,name,postcode,lat,lon,cityname,departement,region,importance,housenumbers,*others in self.json_voies_non_rapprochees_data: + # s_housenumbers = ','.join([f'"{s.split("$")[0]}":{{"lat":{s.split("$")[1]},"lon":{s.split("$")[2]}}}' for s in housenumbers.split('#') ]) + # if ';' in postcode: + # postcode = postcode.split(';') + # jsonfile.write(f'{{"id":"{fantoir}","citycode":"{citycode}","type":"{type}","name":"{name}","postcode":{json.dumps(postcode)},"lat":"{lat}","lon":"{lon}","city":"{cityname}","departement":"{departement}","region":"{region}","importance":{importance},"housenumbers":{{{s_housenumbers}}}}}\n') + # if not self.json_voies_rapprochees_data : + # self.json_voies_rapprochees_data = self.get_json_voies_rapprochees_data() + # for fantoir,citycode,type,name,postcode,lat,lon,cityname,departement,region,importance,housenumbers,*others in self.json_voies_rapprochees_data: + # s_housenumbers = ','.join([f'"{s.split("$")[0]}":{{"lat":{s.split("$")[1]},"lon":{s.split("$")[2]}}}' for s in housenumbers.split('#') ]) + # if ';' in postcode: + # postcode = postcode.split(';') + # jsonfile.write(f'{{"id":"{fantoir}","citycode":"{citycode}","type":"{type}","name":"{name}","postcode":{json.dumps(postcode)},"lat":"{lat}","lon":"{lon}","city":"{cityname}","departement":"{departement}","region":"{region}","importance":{importance},"housenumbers":{{{s_housenumbers}}}}}\n') + # if not self.json_voies_rapprochees_sans_adresses_data : + # self.json_voies_rapprochees_sans_adresses_data = self.get_json_voies_rapprochees_sans_adresses_data() + # for fantoir,citycode,type,name,postcode,lat,lon,cityname,departement,region,importance in self.json_voies_rapprochees_sans_adresses_data: + # if ';' in postcode: + # postcode = postcode.split(';') + # jsonfile.write(f'{{"id":"{fantoir}","citycode":"{citycode}","type":"{type}","name":"{name}","postcode":{json.dumps(postcode)},"lat":"{lat}","lon":"{lon}","city":"{cityname}","departement":"{departement}","region":"{region}","importance":{importance}}}\n') + # if not self.json_lieux_dits_data : + # self.json_lieux_dits_data = self.get_json_lieux_dits_data() + # for fantoir,citycode,type,name,postcode,lat,lon,cityname,departement,region,importance,*others in self.json_lieux_dits_data: + # if ';' in postcode: + # postcode = postcode.split(';') + # jsonfile.write(f'{{"id":"{fantoir}","citycode":"{citycode}","type":"{type}","name":"{name}","postcode":{json.dumps(postcode)},"lat":"{lat}","lon":"{lon}","city":"{cityname}","departement":"{departement}","region":"{region}","importance":{importance}}}\n') + +def process(departements, **kwargs): + for dept in departements: + if not hp.is_valid_dept(dept): + print(f"Code {dept} invalide pour un département - abandon") + continue + d = Dataset(dept) + d.get_json_commune_data() + # d.save_as_shp() + # d.save_as_csv() + # d.save_as_ttl() + d.save_as_json() From cc7a76692fb92103e8c1b12e993c381c71db5a03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sat, 25 Nov 2023 21:49:46 +0000 Subject: [PATCH 128/163] =?UTF-8?q?Pr=C3=A9paration=20des=20exports?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bano/sql/table_numeros_export.sql | 54 +++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 bano/sql/table_numeros_export.sql diff --git a/bano/sql/table_numeros_export.sql b/bano/sql/table_numeros_export.sql new file mode 100644 index 0000000..18b3941 --- /dev/null +++ b/bano/sql/table_numeros_export.sql @@ -0,0 +1,54 @@ +CREATE TEMP TABLE sub_cp +AS +SELECT ST_Subdivide(way) AS way, + "ref:INSEE", + postal_code +FROM planet_osm_postal_code +WHERE COALESCE(postal_code,'') != ''; + +DROP TABLE IF EXISTS numeros_export CASCADE; +CREATE TABLE numeros_export +AS +WITH +cp_fantoir +AS +(SELECT fantoir, + MIN(code_postal) AS min_cp +FROM bano_adresses +GROUP BY 1), +num_norm +AS +(SELECT REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REGEXP_REPLACE(UPPER(numero), + '^0*',''),'BIS','B'),'TER','T'),'QUATER','Q'),'QUAT','Q'),' ',''),'à','-'),';',','),'"','') AS num, + * +FROM bano_adresses), +num_norm_id +AS +(SELECT fantoir||'-'||num AS id_add, + row_number() OVER (PARTITION BY fantoir||num ORDER BY CASE WHEN source = 'OSM' THEN 1 ELSE 2 END) AS rang, + * +FROM num_norm) +SELECT dep, + n.code_insee, + n.fantoir, + id_add, + numero, + nom_voie, + COALESCE(code_postal,cp.postal_code,min_cp) code_postal, + cn.libelle, + source, + lat, + lon, + geometrie +FROM num_norm_id n +JOIN cog_commune cn +ON (cn.com = code_insee) +LEFT OUTER JOIN sub_cp cp +ON (cp."ref:INSEE" = code_insee AND + ST_Contains(cp.way, geometrie)) +LEFT OUTER JOIN cp_fantoir +USING (fantoir) +WHERE rang = 1; + +CREATE INDEX idx_numeros_export_code_insee ON numeros_export(code_insee); +CREATE INDEX idx_numeros_export_dep ON numeros_export(dep); \ No newline at end of file From 1b52f7c1216263e75a55bb57b442b35232f84b6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 1 Jan 2024 21:21:43 +0000 Subject: [PATCH 129/163] COG pyramide admin pour les exports --- bano/sources/cog.py | 3 ++- bano/sql/cog_pyramide_admin.sql | 13 +++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 bano/sql/cog_pyramide_admin.sql diff --git a/bano/sources/cog.py b/bano/sources/cog.py index b9b3bce..860dd0b 100644 --- a/bano/sources/cog.py +++ b/bano/sources/cog.py @@ -6,6 +6,7 @@ import requests from ..db import bano_db +from ..sql import sql_process from .. import batch as b DICT_COG = { @@ -40,7 +41,7 @@ def process_cog(**kwargs): status = download(csv,url) if status: import_to_pg(csv,table) - + sql_process('cog_pyramide_admin',dict()) def download(destination,url): headers = {} diff --git a/bano/sql/cog_pyramide_admin.sql b/bano/sql/cog_pyramide_admin.sql new file mode 100644 index 0000000..2184a53 --- /dev/null +++ b/bano/sql/cog_pyramide_admin.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS cog_pyramide_admin CASCADE; +CREATE TABLE cog_pyramide_admin +AS +SELECT com AS code_insee, + typecom, + c.libelle AS nom_com, + d.libelle AS nom_dep, + r.libelle AS nom_reg +FROM cog_commune c +JOIN (SELECT dep,libelle FROM cog_departement) d +USING (dep) +JOIN (SELECT reg,libelle FROM cog_region) r +USING (reg); \ No newline at end of file From e553e53391bcef4b1d805ed4466cb51f3997e229 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 1 Jan 2024 21:22:51 +0000 Subject: [PATCH 130/163] Table commune-codes postaux de source La Poste pour les exports --- bano/sources/datagouv_cp.py | 5 ++++- bano/sql/table_insee_cps.sql | 7 +++++++ 2 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 bano/sql/table_insee_cps.sql diff --git a/bano/sources/datagouv_cp.py b/bano/sources/datagouv_cp.py index e3096c0..d44c8f0 100644 --- a/bano/sources/datagouv_cp.py +++ b/bano/sources/datagouv_cp.py @@ -6,12 +6,14 @@ import requests from ..db import bano_db +from ..sql import sql_process from .. import batch as b DICT_SOURCES = { "codes_postaux": [ "codes_postaux", "https://www.data.gouv.fr/fr/datasets/r/5ed9b092-a25d-49e7-bdae-0152797c7577", + "table_insee_cps", ], } @@ -19,11 +21,12 @@ def process(**kwargs): for k,v in DICT_SOURCES.items(): print(f"Chargement de la source {k}") - table,url = v + table,url,script_post_process = v csv = get_destination(f"{k}.csv") status = download(csv,url) if status: import_to_pg(csv,table) + sql_process(script_post_process,dict()) def download(destination,url): diff --git a/bano/sql/table_insee_cps.sql b/bano/sql/table_insee_cps.sql new file mode 100644 index 0000000..f50fb7c --- /dev/null +++ b/bano/sql/table_insee_cps.sql @@ -0,0 +1,7 @@ +DROP TABLE IF EXISTS insee_codes_postaux CASCADE; +CREATE UNLOGGED TABLE insee_codes_postaux +AS +SELECT insee AS code_insee, + STRING_AGG(cp,';') +FROM (SELECT DISTINCT insee, cp FROM codes_postaux ORDER BY 1,2) AS c +GROUP BY 1; \ No newline at end of file From a5344fa002e5108625233fbb9945397f3f2327b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 1 Jan 2024 21:37:38 +0000 Subject: [PATCH 131/163] =?UTF-8?q?Priorit=C3=A9=20aux=20lieux-dits=20sur?= =?UTF-8?q?=20les=20centro=C3=AFdes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- bano/rapprochement.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bano/rapprochement.py b/bano/rapprochement.py index cd75453..549d43f 100644 --- a/bano/rapprochement.py +++ b/bano/rapprochement.py @@ -22,8 +22,9 @@ def process_unitaire(code_insee): adresses.charge_numeros_osm() adresses.charge_numeros_ban(topo) points_nommes.charge_points_nommes_lieux_dits_cadastre() - points_nommes.charge_points_nommes_centroides_osm() points_nommes.charge_points_nommes_place_osm() + # Les centroïdes viennent en dernier (fallback). Tout point déjà affecté comme lieu-dit OSM est inchangé dans l'étape charge_points_nommes_centroides_osm() + points_nommes.charge_points_nommes_centroides_osm() adresses.noms_des_adresses(noms) points_nommes.noms_des_points_nommes(noms) From f1c57c00bef863603de3a93026e73fdce20c0532 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 1 Jan 2024 21:48:49 +0000 Subject: [PATCH 132/163] WIP Export JSON --- bano/export.py | 56 +++---- bano/sql.py | 16 ++ bano/sql/export_json_dept_communes.sql | 20 +-- .../export_json_dept_voies_avec_adresses.sql | 35 ++++ bano/sql/table_numeros_export.sql | 54 ------- bano/sql/table_polygones_postaux.sql | 26 +++ bano/sql/tables_export.sql | 149 ++++++++++++++++++ 7 files changed, 260 insertions(+), 96 deletions(-) create mode 100644 bano/sql/export_json_dept_voies_avec_adresses.sql delete mode 100644 bano/sql/table_numeros_export.sql create mode 100644 bano/sql/table_polygones_postaux.sql create mode 100644 bano/sql/tables_export.sql diff --git a/bano/export.py b/bano/export.py index f594f32..ca8f8df 100644 --- a/bano/export.py +++ b/bano/export.py @@ -9,7 +9,7 @@ from pathlib import Path -from .sql import sql_get_data,sql_get_dict_data +from .sql import sql_get_data,sql_get_dict_data,sql_process from . import constants from . import helpers as hp @@ -30,14 +30,19 @@ def get_json_commune_data(self): self.json_commune_data = sql_get_dict_data('export_json_dept_communes',dict(dept=self.dept)) # print(json.dumps(self.json_commune_data)) - def get_json_voies_non_rapprochees_data(self): - return sql_get_data('export_json_dept_voies_non_rapprochees',dict(dept=self.dept)) + # def get_json_voies_non_rapprochees_data(self): + # return sql_get_data('export_json_dept_voies_non_rapprochees',dict(dept=self.dept)) - def get_json_voies_rapprochees_data(self): - return sql_get_data('export_json_dept_voies_rapprochees',dict(dept=self.dept)) + # def get_json_voies_rapprochees_data(self): + # return sql_get_data('export_json_dept_voies_rapprochees',dict(dept=self.dept)) - def get_json_voies_rapprochees_sans_adresses_data(self): - return sql_get_data('export_json_dept_voies_rapprochees_sans_adresses',dict(dept=self.dept)) + def get_json_voies_avec_adresses_data(self): + self.json_voies_avec_adresses_data = sql_get_dict_data('export_json_dept_voies_avec_adresses',dict(dept=self.dept)) + # for j in self.json_voies_avec_adresses_data: + # j['housenumbers'] = [{p.split('$')[0]:{'lat':p.split('$')[1],'lon':p.split('$')[2]}} for p in j['housenumbers'].split('@')] + # jl = json.dumps(j) + # print(jl) + # print(json.dumps(self.json_voies_avec_adresses_data)) def get_json_lieux_dits_data(self): return sql_get_data('export_json_dept_lieux_dits',dict(dept=self.dept)) @@ -110,32 +115,16 @@ def save_as_shp(self): def save_as_json(self): with open(self.get_sas_full_filename('json'),'w') as jsonfile: - # if not self.json_commune_data : - # self.json_commune_data = se for l in self.json_commune_data: jsonfile.write(f"{json.dumps(l,ensure_ascii=False,separators=(',',':'))}\n") - # print(json.dumps(l)) - # for id,type,name,postcode,lat,lon,cityname,departement,region,population,adm_weight,importance,*others in self.json_commune_data: - # if ';' in postcode: - # print(postcode) - # postcode = postcode.split(';') - # jsonfile.write(f'{{"id":"{id}","type":"{type}", "name":"{name}", "postcode":{json.dumps(postcode)}, "lat":{lat}, "lon":{lon}, "city":"{cityname}", "departement":"{departement}", "region":"{region}", "population":{population}, "adm_weight":{adm_weight}, "importance":{importance}}}\n') - # if not self.json_voies_non_rapprochees_data : - # self.json_voies_non_rapprochees_data = self.get_json_voies_non_rapprochees_data() - # for fantoir,citycode,type,name,postcode,lat,lon,cityname,departement,region,importance,housenumbers,*others in self.json_voies_non_rapprochees_data: - # s_housenumbers = ','.join([f'"{s.split("$")[0]}":{{"lat":{s.split("$")[1]},"lon":{s.split("$")[2]}}}' for s in housenumbers.split('#') ]) - # if ';' in postcode: - # postcode = postcode.split(';') - # jsonfile.write(f'{{"id":"{fantoir}","citycode":"{citycode}","type":"{type}","name":"{name}","postcode":{json.dumps(postcode)},"lat":"{lat}","lon":"{lon}","city":"{cityname}","departement":"{departement}","region":"{region}","importance":{importance},"housenumbers":{{{s_housenumbers}}}}}\n') - # if not self.json_voies_rapprochees_data : - # self.json_voies_rapprochees_data = self.get_json_voies_rapprochees_data() - # for fantoir,citycode,type,name,postcode,lat,lon,cityname,departement,region,importance,housenumbers,*others in self.json_voies_rapprochees_data: - # s_housenumbers = ','.join([f'"{s.split("$")[0]}":{{"lat":{s.split("$")[1]},"lon":{s.split("$")[2]}}}' for s in housenumbers.split('#') ]) - # if ';' in postcode: - # postcode = postcode.split(';') - # jsonfile.write(f'{{"id":"{fantoir}","citycode":"{citycode}","type":"{type}","name":"{name}","postcode":{json.dumps(postcode)},"lat":"{lat}","lon":"{lon}","city":"{cityname}","departement":"{departement}","region":"{region}","importance":{importance},"housenumbers":{{{s_housenumbers}}}}}\n') - # if not self.json_voies_rapprochees_sans_adresses_data : - # self.json_voies_rapprochees_sans_adresses_data = self.get_json_voies_rapprochees_sans_adresses_data() + for l in self.json_voies_avec_adresses_data: + dict_hsnr = {} + for p in l['housenumbers'].split('@'): + numero,lat,lon = p.split('$') + dict_hsnr[numero] = dict(lat=float(lat),lon=float(lon)) + l['housenumbers'] = dict_hsnr + jsonfile.write(f"{json.dumps(l,ensure_ascii=False,separators=(',',':'))}\n") + # for fantoir,citycode,type,name,postcode,lat,lon,cityname,departement,region,importance in self.json_voies_rapprochees_sans_adresses_data: # if ';' in postcode: # postcode = postcode.split(';') @@ -148,13 +137,16 @@ def save_as_json(self): # jsonfile.write(f'{{"id":"{fantoir}","citycode":"{citycode}","type":"{type}","name":"{name}","postcode":{json.dumps(postcode)},"lat":"{lat}","lon":"{lon}","city":"{cityname}","departement":"{departement}","region":"{region}","importance":{importance}}}\n') def process(departements, **kwargs): + sql_process('table_polygones_postaux',dict()) + sql_process('tables_export',dict()) for dept in departements: if not hp.is_valid_dept(dept): print(f"Code {dept} invalide pour un département - abandon") continue d = Dataset(dept) - d.get_json_commune_data() # d.save_as_shp() # d.save_as_csv() # d.save_as_ttl() + d.get_json_commune_data() + d.get_json_voies_avec_adresses_data() d.save_as_json() diff --git a/bano/sql.py b/bano/sql.py index abc2f75..249aae2 100644 --- a/bano/sql.py +++ b/bano/sql.py @@ -1,6 +1,8 @@ #!/usr/bin/env python # coding: UTF-8 +import psycopg2.extras + from pathlib import Path from .db import bano_db @@ -32,3 +34,17 @@ def sql_get_data(sqlfile, args): return cur.fetchall() return None + +def sql_get_dict_data(sqlfile, args): + sqlfile = (Path(SQLDIR) / sqlfile).with_suffix(".sql") + with open(sqlfile) as s: + q = s.read() + for k, v in args.items(): + q = q.replace(f"__{k}__", v) + + with bano_db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cur: + cur.execute(q) + + return cur.fetchall() + + return None diff --git a/bano/sql/export_json_dept_communes.sql b/bano/sql/export_json_dept_communes.sql index 706b24c..0bccfa3 100644 --- a/bano/sql/export_json_dept_communes.sql +++ b/bano/sql/export_json_dept_communes.sql @@ -10,18 +10,18 @@ AS cp FROM rang_cp WHERE rang = 1) -SELECT DISTINCT code_insee AS id, -- id - i.type, -- type - i.name, -- name - cp.cp, -- postcode - round(lat,6) AS lat, -- lat - round(lon,6) AS lon, -- lon - i.name , -- city - cd.libelle, -- departement - cr.libelle, -- region, +SELECT DISTINCT code_insee AS id, + i.type AS type, + i.name AS name, + cp.cp AS postcode, + round(lat,6)::float AS lat, + round(lon,6)::float AS lon, + i.name AS city, + cd.libelle AS departement, + cr.libelle AS region, population, adm_weight, - GREATEST(0.075,ROUND(LOG((adm_weight)+LOG(population+1)/3)::decimal,4)) AS importance + GREATEST(0.075,ROUND(LOG((adm_weight)+LOG(population+1)/3)::decimal,4))::float AS importance FROM infos_communes i JOIN cp USING (code_insee) diff --git a/bano/sql/export_json_dept_voies_avec_adresses.sql b/bano/sql/export_json_dept_voies_avec_adresses.sql new file mode 100644 index 0000000..0936c2d --- /dev/null +++ b/bano/sql/export_json_dept_voies_avec_adresses.sql @@ -0,0 +1,35 @@ +SELECT fantoir AS id, + code_insee AS citycode, + 'street' AS type, + REPLACE(REPLACE(REGEXP_REPLACE(nom_voie,'\t',' '),'"',chr(39)),'’',chr(39)) AS name, + code_postal AS postcode, + ROUND(lat_c::numeric,6)::float AS lat, + ROUND(lon_c::numeric,6)::float AS lon, + nom_com AS city, + nom_dep AS departement, + nom_reg AS region, + ROUND(LOG(c.adm_weight+LOG(c.population+1)/3)::numeric*LOG(1+LOG(nombre_adresses+1)+LOG(longueur_max+1)+LOG(CASE WHEN nom_voie like 'Boulevard%' THEN 4 WHEN nom_voie LIKE 'Place%' THEN 4 WHEN nom_voie LIKE 'Espl%' THEN 4 WHEN nom_voie LIKE 'Av%' THEN 3 WHEN nom_voie LIKE 'Rue %' THEN 2 ELSE 1 END))::numeric,4)::float AS importance, + string_agg(numero||'$'||ROUND(ne.lat::numeric,6)::text||'$'||ROUND(ne.lon::numeric,6)::text,'@' ORDER BY numero) AS housenumbers +FROM (SELECT * + FROM numeros_export + WHERE dep = '__dept__') ne +JOIN cog_pyramide_admin +USING (code_insee) +JOIN (SELECT fantoir, + lon AS lon_c, + lat AS lat_c + FROM bano_points_nommes + WHERE code_dept = '__dept__') pn +USING (fantoir) +JOIN infos_communes c +USING (code_insee) +JOIN (SELECT fantoir, + longueur_max, + nombre_adresses + FROM numeros_export_importance + WHERE dep = '__dept__') nimp +USING (fantoir) +GROUP BY 1,2,3,4,5,6,7,8,9,10,11 +ORDER BY 1 + +limit 50; \ No newline at end of file diff --git a/bano/sql/table_numeros_export.sql b/bano/sql/table_numeros_export.sql deleted file mode 100644 index 18b3941..0000000 --- a/bano/sql/table_numeros_export.sql +++ /dev/null @@ -1,54 +0,0 @@ -CREATE TEMP TABLE sub_cp -AS -SELECT ST_Subdivide(way) AS way, - "ref:INSEE", - postal_code -FROM planet_osm_postal_code -WHERE COALESCE(postal_code,'') != ''; - -DROP TABLE IF EXISTS numeros_export CASCADE; -CREATE TABLE numeros_export -AS -WITH -cp_fantoir -AS -(SELECT fantoir, - MIN(code_postal) AS min_cp -FROM bano_adresses -GROUP BY 1), -num_norm -AS -(SELECT REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REGEXP_REPLACE(UPPER(numero), - '^0*',''),'BIS','B'),'TER','T'),'QUATER','Q'),'QUAT','Q'),' ',''),'à','-'),';',','),'"','') AS num, - * -FROM bano_adresses), -num_norm_id -AS -(SELECT fantoir||'-'||num AS id_add, - row_number() OVER (PARTITION BY fantoir||num ORDER BY CASE WHEN source = 'OSM' THEN 1 ELSE 2 END) AS rang, - * -FROM num_norm) -SELECT dep, - n.code_insee, - n.fantoir, - id_add, - numero, - nom_voie, - COALESCE(code_postal,cp.postal_code,min_cp) code_postal, - cn.libelle, - source, - lat, - lon, - geometrie -FROM num_norm_id n -JOIN cog_commune cn -ON (cn.com = code_insee) -LEFT OUTER JOIN sub_cp cp -ON (cp."ref:INSEE" = code_insee AND - ST_Contains(cp.way, geometrie)) -LEFT OUTER JOIN cp_fantoir -USING (fantoir) -WHERE rang = 1; - -CREATE INDEX idx_numeros_export_code_insee ON numeros_export(code_insee); -CREATE INDEX idx_numeros_export_dep ON numeros_export(dep); \ No newline at end of file diff --git a/bano/sql/table_polygones_postaux.sql b/bano/sql/table_polygones_postaux.sql new file mode 100644 index 0000000..5a24383 --- /dev/null +++ b/bano/sql/table_polygones_postaux.sql @@ -0,0 +1,26 @@ +DROP TABLE IF EXISTS polygones_postaux CASCADE; +CREATE UNLOGGED TABLE polygones_postaux +AS +SELECT way AS geometrie, + CASE postal_code + WHEN '' THEN "addr:postcode" + ELSE postal_code + END AS code_postal +FROM planet_osm_postal_code +WHERE boundary = 'postal_code' AND + "addr:postcode"||postal_code != '' +ORDER BY ST_Area(way); +ALTER TABLE polygones_postaux ADD COLUMN id serial; + +INSERT INTO polygones_postaux +SELECT way, + CASE postal_code + WHEN '' THEN "addr:postcode" + ELSE postal_code + END AS code_postal +FROM planet_osm_postal_code +WHERE boundary = 'administrative' AND + "addr:postcode"||postal_code != '' +ORDER BY ST_Area(way); + +CREATE INDEX gidx_polygones_postaux ON polygones_postaux USING GIST(geometrie); \ No newline at end of file diff --git a/bano/sql/tables_export.sql b/bano/sql/tables_export.sql new file mode 100644 index 0000000..79aaba1 --- /dev/null +++ b/bano/sql/tables_export.sql @@ -0,0 +1,149 @@ +-- codes postaux +-- ne prendre dans planet_osm_postal_code que les CPs avec un boundary != '' et un ref:insee != '' +-- +-- construire la table des insee | [CPs] à partir de la table codes_postaux de La Poste +-- utiliser cette table pour l'export des communes +-- +-- s'appuyer sur cette table pour déterminer la géometrie infra communale des CPs des communes pluri-distribuées +-- utiliser les polygones issus de ce croisement en jointure ouverte pour les points nommes et les adresses, avec comme fallback la table des codes_postaux par commune si jointure nulle +-- possibilité : faire un cross join par commune pluri-distribuée et en sortir un distinct (geometrie,cp) + +CREATE TEMP TABLE sub_cp +AS +SELECT ST_Subdivide(way) AS way, + "ref:INSEE", + postal_code +FROM planet_osm_postal_code +WHERE COALESCE(postal_code,'') != ''; + +DROP TABLE IF EXISTS numeros_export CASCADE; +CREATE UNLOGGED TABLE numeros_export +AS +WITH +cp_fantoir +AS +(SELECT fantoir, + MIN(code_postal) AS min_cp +FROM bano_adresses +GROUP BY 1), +num_norm +AS +(SELECT REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REGEXP_REPLACE(UPPER(numero), + '^0*',''),'BIS','B'),'TER','T'),'QUATER','Q'),'QUAT','Q'),' ',''),'à','-'),';',','),'"','') AS num, + * +FROM bano_adresses), +num_norm_id +AS +(SELECT fantoir||'-'||num AS id_add, + row_number() OVER (PARTITION BY fantoir||num ORDER BY CASE WHEN source = 'OSM' THEN 1 ELSE 2 END) AS rang, + * +FROM num_norm) +SELECT dep, + n.code_insee, + n.fantoir, + id_add, + numero, + nom_voie, + COALESCE(code_postal,cp.postal_code,min_cp) code_postal, + cn.libelle, + source, + lat, + lon, + geometrie +FROM num_norm_id n +JOIN cog_commune cn +ON (cn.com = code_insee) +LEFT OUTER JOIN sub_cp cp +ON (cp."ref:INSEE" = code_insee AND + ST_Contains(cp.way, geometrie)) +LEFT OUTER JOIN cp_fantoir +USING (fantoir) +WHERE rang = 1; + +-- CREATE INDEX idx_numeros_export_code_insee ON numeros_export(code_insee); +CREATE INDEX idx_numeros_export_dep ON numeros_export(dep); + +DROP TABLE IF EXISTS numeros_export_importance CASCADE; +CREATE UNLOGGED TABLE numeros_export_importance +AS +SELECT dep, + fantoir, + ST_Length(ST_Transform(ST_Longestline(ST_Convexhull(ST_Collect(geometrie)),ST_Convexhull(ST_Collect(geometrie))),3857)) AS longueur_max, + count(*) AS nombre_adresses +FROM numeros_export +GROUP BY dep, + fantoir; + +CREATE INDEX idx_numeros_export_importance_dep ON numeros_export_importance(dep); + +DROP TABLE IF EXISTS export_voies_adresses_json CASCADE; +CREATE UNLOGGED TABLE export_voies_adresses_json +AS +SELECT fantoir AS id, + ne.code_insee AS citycode, + 'street' AS type, + REPLACE(REPLACE(REGEXP_REPLACE(nom_voie,'\t',' '),'"',chr(39)),'’',chr(39)) AS name, + code_postal AS postcode, + ROUND(pn.lat::numeric,6)::float AS lat, + ROUND(pn.lon::numeric,6)::float AS lon, + nom_com AS city, + nom_dep AS departement, + nom_reg AS region, + ROUND(LOG(c.adm_weight+LOG(c.population+1)/3)::numeric*LOG(1+LOG(nombre_adresses+1)+LOG(longueur_max+1)+LOG(CASE WHEN nom_voie like 'Boulevard%' THEN 4 WHEN nom_voie LIKE 'Place%' THEN 4 WHEN nom_voie LIKE 'Espl%' THEN 4 WHEN nom_voie LIKE 'Av%' THEN 3 WHEN nom_voie LIKE 'Rue %' THEN 2 ELSE 1 END))::numeric,4)::float AS importance, + string_agg(numero||'$'||ROUND(ne.lat::numeric,6)::text||'$'||ROUND(ne.lon::numeric,6)::text,'@' ORDER BY numero) AS housenumbers +FROM numeros_export ne +JOIN cog_pyramide_admin AS cog +USING (code_insee) +JOIN (SELECT fantoir, + lon, + lat + FROM bano_points_nommes) AS pn +USING (fantoir) +JOIN infos_communes c +USING (code_insee) +JOIN numeros_export_importance +USING (fantoir) +GROUP BY 1,2,3,4,5,6,7,8,9,10,11 +ORDER BY 1; + +CREATE INDEX idx_export_voies_adresses_json_dep ON export_voies_adresses_json(dep); + +DROP TABLE IF EXISTS export_voies_sans_adresses_json CASCADE; +CREATE UNLOGGED TABLE export_voies_sans_adresses_json +AS +WITH +set_fantoir +AS +(SELECT fantoir FROM bano_points_nommes +EXCEPT +SELECT fantoir FROM numeros_export) +SELECT pn.fantoir AS id, + pn.code_insee AS citycode, + nature, + CASE + WHEN nature = 'place' THEN 'place' + WHEN nature = 'lieu-dit' THEN 'place' + ELSE 'street' + END AS type, + REPLACE(REPLACE(REGEXP_REPLACE(nom,'\t',' '),'"',chr(39)),'’',chr(39)) AS name, +-- code_postal AS postcode, + ROUND(pn.lat::numeric,6)::float AS lat, + ROUND(pn.lon::numeric,6)::float AS lon, + nom_com AS city, + nom_dep AS departement, + nom_reg AS region, + CASE + WHEN nature IN ('place','lieu-dit') THEN 0.05 + ELSE ROUND(LOG(c.adm_weight+LOG(c.population+1)/3)::numeric*LOG(1+LOG(CASE WHEN nom like 'Boulevard%' THEN 4 WHEN nom LIKE 'Place%' THEN 4 WHEN nom LIKE 'Espl%' THEN 4 WHEN nom LIKE 'Av%' THEN 3 WHEN nom LIKE 'Rue %' THEN 2 ELSE 1 END))::numeric,4)::float + END AS importance, + source, + RANK() OVER (PARTITION BY fantoir ORDER BY CASE source WHEN 'OSM' THEN 1 ELSE 2 END, CASE nature WHEN 'centroide' THEN 2 ELSE 1 END) +FROM set_fantoir +--JOIN (select * from bano_points_nommes where code_insee = '85172') pn +JOIN bano_points_nommes AS pn +USING (fantoir) +JOIN cog_pyramide_admin AS cog +USING (code_insee) +JOIN infos_communes c +USING (code_insee) +ORDER BY 1 \ No newline at end of file From dde154be9783a282d8fa1967818d5b7811fcf66c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Mon, 1 Jan 2024 21:49:25 +0000 Subject: [PATCH 133/163] menage --- copy_table_from_osm_to_cadastre.sh | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100755 copy_table_from_osm_to_cadastre.sh diff --git a/copy_table_from_osm_to_cadastre.sh b/copy_table_from_osm_to_cadastre.sh deleted file mode 100755 index 16e757c..0000000 --- a/copy_table_from_osm_to_cadastre.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -source $SCRIPT_DIR/config - -set -e - -cd $TMP_DIR -pg_dump -t ${1} -U cadastre osm > ${1}_dump.sql -psql -d cadastre -U cadastre -c "DROP TABLE IF EXISTS ${1} CASCADE;" -psql -d cadastre -U cadastre -f ${1}_dump.sql -rm -f ${1}_dump.sql From dafdf085f6023d9780741cb2c2c4b6e055d08312 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 7 Jan 2024 22:23:45 +0000 Subject: [PATCH 134/163] WIP Export JSON --- ..._cps.sql => table_insee_codes_postaux.sql} | 2 +- bano/sql/tables_export.sql | 95 +++++++++++-------- 2 files changed, 55 insertions(+), 42 deletions(-) rename bano/sql/{table_insee_cps.sql => table_insee_codes_postaux.sql} (86%) diff --git a/bano/sql/table_insee_cps.sql b/bano/sql/table_insee_codes_postaux.sql similarity index 86% rename from bano/sql/table_insee_cps.sql rename to bano/sql/table_insee_codes_postaux.sql index f50fb7c..9844767 100644 --- a/bano/sql/table_insee_cps.sql +++ b/bano/sql/table_insee_codes_postaux.sql @@ -2,6 +2,6 @@ DROP TABLE IF EXISTS insee_codes_postaux CASCADE; CREATE UNLOGGED TABLE insee_codes_postaux AS SELECT insee AS code_insee, - STRING_AGG(cp,';') + STRING_AGG(cp,';') AS cps FROM (SELECT DISTINCT insee, cp FROM codes_postaux ORDER BY 1,2) AS c GROUP BY 1; \ No newline at end of file diff --git a/bano/sql/tables_export.sql b/bano/sql/tables_export.sql index 79aaba1..298d4d3 100644 --- a/bano/sql/tables_export.sql +++ b/bano/sql/tables_export.sql @@ -8,13 +8,15 @@ -- utiliser les polygones issus de ce croisement en jointure ouverte pour les points nommes et les adresses, avec comme fallback la table des codes_postaux par commune si jointure nulle -- possibilité : faire un cross join par commune pluri-distribuée et en sortir un distinct (geometrie,cp) -CREATE TEMP TABLE sub_cp -AS -SELECT ST_Subdivide(way) AS way, - "ref:INSEE", - postal_code -FROM planet_osm_postal_code -WHERE COALESCE(postal_code,'') != ''; + +-- CREATE TEMP TABLE sub_cp +-- AS +-- SELECT ST_Subdivide(way) AS way, +-- "ref:INSEE", +-- postal_code +-- FROM planet_osm_postal_code +-- WHERE COALESCE(postal_code,'') != ''; + DROP TABLE IF EXISTS numeros_export CASCADE; CREATE UNLOGGED TABLE numeros_export @@ -37,49 +39,51 @@ AS (SELECT fantoir||'-'||num AS id_add, row_number() OVER (PARTITION BY fantoir||num ORDER BY CASE WHEN source = 'OSM' THEN 1 ELSE 2 END) AS rang, * -FROM num_norm) -SELECT dep, +FROM num_norm), +resultats_multi_cp +AS +(SELECT dep, n.code_insee, n.fantoir, id_add, numero, nom_voie, - COALESCE(code_postal,cp.postal_code,min_cp) code_postal, + COALESCE(n.code_postal,pp.code_postal,min_cp) code_postal, cn.libelle, source, lat, lon, - geometrie + n.geometrie, + RANK() OVER (PARTITION BY id_add ORDER BY pp.id) rang_postal FROM num_norm_id n JOIN cog_commune cn ON (cn.com = code_insee) -LEFT OUTER JOIN sub_cp cp -ON (cp."ref:INSEE" = code_insee AND - ST_Contains(cp.way, geometrie)) +LEFT OUTER JOIN polygones_postaux pp +ON ST_Contains(pp.geometrie, n.geometrie) LEFT OUTER JOIN cp_fantoir USING (fantoir) -WHERE rang = 1; +WHERE rang = 1) +SELECT * +FROM resultats_multi_cp +WHERE rang_postal = 1; --- CREATE INDEX idx_numeros_export_code_insee ON numeros_export(code_insee); CREATE INDEX idx_numeros_export_dep ON numeros_export(dep); DROP TABLE IF EXISTS numeros_export_importance CASCADE; CREATE UNLOGGED TABLE numeros_export_importance AS -SELECT dep, - fantoir, +SELECT fantoir, ST_Length(ST_Transform(ST_Longestline(ST_Convexhull(ST_Collect(geometrie)),ST_Convexhull(ST_Collect(geometrie))),3857)) AS longueur_max, count(*) AS nombre_adresses FROM numeros_export -GROUP BY dep, - fantoir; +GROUP BY fantoir; -CREATE INDEX idx_numeros_export_importance_dep ON numeros_export_importance(dep); DROP TABLE IF EXISTS export_voies_adresses_json CASCADE; CREATE UNLOGGED TABLE export_voies_adresses_json AS -SELECT fantoir AS id, +SELECT c.dep, + fantoir AS id, ne.code_insee AS citycode, 'street' AS type, REPLACE(REPLACE(REGEXP_REPLACE(nom_voie,'\t',' '),'"',chr(39)),'’',chr(39)) AS name, @@ -103,41 +107,44 @@ JOIN infos_communes c USING (code_insee) JOIN numeros_export_importance USING (fantoir) -GROUP BY 1,2,3,4,5,6,7,8,9,10,11 +GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12 ORDER BY 1; CREATE INDEX idx_export_voies_adresses_json_dep ON export_voies_adresses_json(dep); -DROP TABLE IF EXISTS export_voies_sans_adresses_json CASCADE; -CREATE UNLOGGED TABLE export_voies_sans_adresses_json +DROP TABLE IF EXISTS export_voies_ld_sans_adresses_json CASCADE; +CREATE UNLOGGED TABLE export_voies_ld_sans_adresses_json AS WITH set_fantoir AS (SELECT fantoir FROM bano_points_nommes EXCEPT -SELECT fantoir FROM numeros_export) -SELECT pn.fantoir AS id, +SELECT fantoir FROM numeros_export), +resultats_multi_cp +AS +(SELECT pn.fantoir AS id, pn.code_insee AS citycode, - nature, - CASE - WHEN nature = 'place' THEN 'place' - WHEN nature = 'lieu-dit' THEN 'place' - ELSE 'street' - END AS type, + nature, + CASE + WHEN nature = 'place' THEN 'place' + WHEN nature = 'lieu-dit' THEN 'place' + ELSE 'street' + END AS type, REPLACE(REPLACE(REGEXP_REPLACE(nom,'\t',' '),'"',chr(39)),'’',chr(39)) AS name, --- code_postal AS postcode, + code_postal AS postcode, ROUND(pn.lat::numeric,6)::float AS lat, ROUND(pn.lon::numeric,6)::float AS lon, nom_com AS city, nom_dep AS departement, nom_reg AS region, - CASE - WHEN nature IN ('place','lieu-dit') THEN 0.05 - ELSE ROUND(LOG(c.adm_weight+LOG(c.population+1)/3)::numeric*LOG(1+LOG(CASE WHEN nom like 'Boulevard%' THEN 4 WHEN nom LIKE 'Place%' THEN 4 WHEN nom LIKE 'Espl%' THEN 4 WHEN nom LIKE 'Av%' THEN 3 WHEN nom LIKE 'Rue %' THEN 2 ELSE 1 END))::numeric,4)::float - END AS importance, - source, - RANK() OVER (PARTITION BY fantoir ORDER BY CASE source WHEN 'OSM' THEN 1 ELSE 2 END, CASE nature WHEN 'centroide' THEN 2 ELSE 1 END) + CASE + WHEN nature IN ('place','lieu-dit') THEN 0.05 + ELSE ROUND(LOG(c.adm_weight+LOG(c.population+1)/3)::numeric*LOG(1+LOG(CASE WHEN nom like 'Boulevard%' THEN 4 WHEN nom LIKE 'Place%' THEN 4 WHEN nom LIKE 'Espl%' THEN 4 WHEN nom LIKE 'Av%' THEN 3 WHEN nom LIKE 'Rue %' THEN 2 ELSE 1 END))::numeric,4)::float + END AS importance, + source, + RANK() OVER (PARTITION BY fantoir ORDER BY CASE source WHEN 'OSM' THEN 1 ELSE 2 END, CASE nature WHEN 'centroide' THEN 2 ELSE 1 END,pp.id) AS rang_par_fantoir, + c.dep FROM set_fantoir --JOIN (select * from bano_points_nommes where code_insee = '85172') pn JOIN bano_points_nommes AS pn @@ -146,4 +153,10 @@ JOIN cog_pyramide_admin AS cog USING (code_insee) JOIN infos_communes c USING (code_insee) -ORDER BY 1 \ No newline at end of file +JOIN polygones_postaux pp +ON ST_Contains(pp.geometrie, pn.geometrie)) +SELECT * +FROM resultats_multi_cp +WHERE rang_par_fantoir = 1; + +CREATE INDEX idx_export_voies_ld_sans_adresses_json_dep ON export_voies_ld_sans_adresses_json(dep); From 7672fe5a7b929e1fbe531ef2edfc42cb6d5c160a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Sun, 14 Jan 2024 22:46:27 +0000 Subject: [PATCH 135/163] WIP exports --- bano/bin.py | 29 ++++ bano/export.py | 152 +++++++----------- bano/sql.py | 8 + bano/sql/export_csv_dept.sql | 11 ++ bano/sql/export_json_dept_communes.sql | 16 +- .../export_json_dept_voies_avec_adresses.sql | 50 ++---- ...xport_json_dept_voies_ld_sans_adresses.sql | 14 ++ bano/sql/table_insee_codes_postaux.sql | 4 +- bano/sql/table_polygones_postaux.sql | 6 +- bano/sql/tables_export.sql | 3 + 10 files changed, 146 insertions(+), 147 deletions(-) create mode 100644 bano/sql/export_csv_dept.sql create mode 100644 bano/sql/export_json_dept_voies_ld_sans_adresses.sql diff --git a/bano/bin.py b/bano/bin.py index f1a551b..1cdb50a 100644 --- a/bano/bin.py +++ b/bano/bin.py @@ -9,6 +9,7 @@ from . import rapprochement from . import boite_a_outils from . import export +from . import publish from .sources import topo, ban, cog, cadastre_ld, ban2topo, datagouv_commune_summary as datagouv_cs,datagouv_cp from .constants import DEPARTEMENTS @@ -133,6 +134,13 @@ def main(): ) subparser.set_defaults(func=boite_a_outils.update_infos_communes) + subparser = subparsers.add_parser( + "prepare_export", + help="Calculs en prévision des exports", + description="Calculs en prévision des exports", + ) + subparser.set_defaults(func=export.prepare_export) + subparser = subparsers.add_parser( "export", help="Export par département dans différents formats", @@ -147,6 +155,27 @@ def main(): ) subparser.set_defaults(func=export.process) + subparser = subparsers.add_parser( + "publish", + help="Publication des exports dans le dossier web de https://bano.openstreetmap.fr/data - fichiers par département", + description="Publication des exports dans le dossier web de https://bano.openstreetmap.fr/data - fichiers par département", + ) + subparser.add_argument( + "departements", + type=str, + help="Départements à traiter", + nargs="*", + default=DEPARTEMENTS, + ) + subparser.set_defaults(func=publish.process) + + subparser = subparsers.add_parser( + "publish_aggregate", + help="Publication des exports dans un dossier web - fichiers France entière", + description="Publication des exports dans un dossier web - fichiers France entière", + ) + subparser.set_defaults(func=publish.process_full) + args = parser.parse_args() try: diff --git a/bano/export.py b/bano/export.py index ca8f8df..1295d87 100644 --- a/bano/export.py +++ b/bano/export.py @@ -9,65 +9,21 @@ from pathlib import Path -from .sql import sql_get_data,sql_get_dict_data,sql_process +from .sql import sql_get_data,sql_get_dict_data,sql_process,sql_query from . import constants from . import helpers as hp -class Dataset: - def __init__(self, dept): - self.dept = dept - self.csv_data = None - self.json_commune_data = None - self.json_voies_rapprochees_data = None - self.json_voies_rapprochees_sans_adresses_data = None - self.json_voies_non_rapprochees_data = None - self.json_lieux_dits_data = None +def get_csv_data(dept): + return sql_get_data('export_csv_dept',dict(dept=dept)) - def get_csv_data(self): - self.csv_data = sql_get_data('export_csv_dept',dict(dept=self.dept)) +def save_as_csv(dept,csv_data): + with open(get_sas_full_filename(dept,'csv'),'w', newline='') as csvfile: + writer = csv.writer(csvfile,dialect='unix',quoting=csv.QUOTE_MINIMAL) + writer.writerows([l[0:-1] for l in csv_data]) - def get_json_commune_data(self): - self.json_commune_data = sql_get_dict_data('export_json_dept_communes',dict(dept=self.dept)) - # print(json.dumps(self.json_commune_data)) - - # def get_json_voies_non_rapprochees_data(self): - # return sql_get_data('export_json_dept_voies_non_rapprochees',dict(dept=self.dept)) - - # def get_json_voies_rapprochees_data(self): - # return sql_get_data('export_json_dept_voies_rapprochees',dict(dept=self.dept)) - - def get_json_voies_avec_adresses_data(self): - self.json_voies_avec_adresses_data = sql_get_dict_data('export_json_dept_voies_avec_adresses',dict(dept=self.dept)) - # for j in self.json_voies_avec_adresses_data: - # j['housenumbers'] = [{p.split('$')[0]:{'lat':p.split('$')[1],'lon':p.split('$')[2]}} for p in j['housenumbers'].split('@')] - # jl = json.dumps(j) - # print(jl) - # print(json.dumps(self.json_voies_avec_adresses_data)) - - def get_json_lieux_dits_data(self): - return sql_get_data('export_json_dept_lieux_dits',dict(dept=self.dept)) - - def get_target_filename(self,filetype): - return f'bano-{self.dept}.{filetype}' - - def get_sas_full_filename(self,filetype): - return Path(os.environ['EXPORT_SAS_DIR']) / self.get_target_filename(filetype) - - def get_webdir_full_filename(self,filetype): - return Path(os.environ['EXPORT_WEB_DIR']) / self.get_target_filename(filetype) - - def save_as_csv(self): - if not self.csv_data : - self.csv_data = self.get_csv_data() - with open(self.get_sas_full_filename('csv'),'w', newline='') as csvfile: - writer = csv.writer(csvfile,dialect='unix',quoting=csv.QUOTE_MINIMAL) - writer.writerows([l[0:-1] for l in self.csv_data]) - - def save_as_ttl(self): - if not self.csv_data : - self.csv_data = self.get_csv_data() - with open(self.get_sas_full_filename('ttl'),'w') as ttlfile: - ttlfile.write(f"""@prefix xsd: . +def save_as_ttl(dept,csv_data): + with open(get_sas_full_filename(dept,'ttl'),'w') as ttlfile: + ttlfile.write(f"""@prefix xsd: . @prefix locn: . @prefix gn: . @prefix prov: . @@ -87,11 +43,11 @@ def save_as_ttl(self): \tdcterms:publisher ; #url openstreetmap France \tdcterms:issued "2014-05-14"^^xsd:date ; # data issued \tdcterms:modified "2014-08-21"^^xsd:date ; #last modification -\tdcterms:spatial , ; # region/pays (France) +\tdcterms:spatial , ; # region/pays (France) \t. """) - for id,numero,voie,cp,ville,source,lat,lon,*others in self.csv_data: - ttlfile.write(f""" a locn:Address , gn:Feature ; + for id,numero,voie,cp,ville,source,lat,lon,*others in csv_data: + ttlfile.write(f""" a locn:Address , gn:Feature ; locn:fullAddress "{numero} {voie}, {cp} {ville}, FRANCE"; locn:addressId "{id}" ; locn:locatorDesignator "{numero}" ; @@ -100,53 +56,57 @@ def save_as_ttl(self): locn:locatorName "{ville}"@fr ; locn:adminUnitL1 "FR" ;""") # traitement des arrondissements municipaux de Paris, Lyon, Marseille - if id[0:2] in '13 69 75' and (int(id[0:5]) in range(13201, 13217) or int(id[0:5]) in range(69381, 69370) or int(id[0:5]) in range(75101, 75121)): - ttlfile.write(f"locn:location ;") - else: - ttlfile.write(f"locn:location ;") - ttlfile.write(f"""locn:geometry ; + if id[0:2] in '13 69 75' and (int(id[0:5]) in range(13201, 13217) or int(id[0:5]) in range(69381, 69370) or int(id[0:5]) in range(75101, 75121)): + ttlfile.write(f"locn:location ;") + else: + ttlfile.write(f"locn:location ;") + ttlfile.write(f"""locn:geometry ; locn:geometry [a geo:Point ; geo:lat "{lat}" ; geo:long "{lon}" ] ; locn:geometry [a gsp:Geometry; gsp:asWKT "POINT({lon} {lat})"^^gsp:wktLiteral ] ; .""") - def save_as_shp(self): - subprocess.run(['ogr2ogr', '-f',"ESRI Shapefile", '-lco', 'ENCODING=UTF-8', '-s_srs', 'EPSG:4326', '-t_srs', 'EPSG:4326', '-overwrite', self.get_sas_full_filename('shp'), 'PG:dbname=cadastre user=cadastre', '-sql', f'{self.csv_query}']) - - def save_as_json(self): - with open(self.get_sas_full_filename('json'),'w') as jsonfile: - for l in self.json_commune_data: - jsonfile.write(f"{json.dumps(l,ensure_ascii=False,separators=(',',':'))}\n") - for l in self.json_voies_avec_adresses_data: - dict_hsnr = {} - for p in l['housenumbers'].split('@'): - numero,lat,lon = p.split('$') - dict_hsnr[numero] = dict(lat=float(lat),lon=float(lon)) - l['housenumbers'] = dict_hsnr - jsonfile.write(f"{json.dumps(l,ensure_ascii=False,separators=(',',':'))}\n") - - # for fantoir,citycode,type,name,postcode,lat,lon,cityname,departement,region,importance in self.json_voies_rapprochees_sans_adresses_data: - # if ';' in postcode: - # postcode = postcode.split(';') - # jsonfile.write(f'{{"id":"{fantoir}","citycode":"{citycode}","type":"{type}","name":"{name}","postcode":{json.dumps(postcode)},"lat":"{lat}","lon":"{lon}","city":"{cityname}","departement":"{departement}","region":"{region}","importance":{importance}}}\n') - # if not self.json_lieux_dits_data : - # self.json_lieux_dits_data = self.get_json_lieux_dits_data() - # for fantoir,citycode,type,name,postcode,lat,lon,cityname,departement,region,importance,*others in self.json_lieux_dits_data: - # if ';' in postcode: - # postcode = postcode.split(';') - # jsonfile.write(f'{{"id":"{fantoir}","citycode":"{citycode}","type":"{type}","name":"{name}","postcode":{json.dumps(postcode)},"lat":"{lat}","lon":"{lon}","city":"{cityname}","departement":"{departement}","region":"{region}","importance":{importance}}}\n') - -def process(departements, **kwargs): +def save_as_shp(dept): + # query = sql_query('export_csv_dept',dict(dept=dept)) + subprocess.run(['ogr2ogr', '-f',"ESRI Shapefile", '-lco', 'ENCODING=UTF-8', '-s_srs', 'EPSG:4326', '-t_srs', 'EPSG:4326', '-overwrite', get_sas_full_filename(dept,'shp'), 'PG:dbname=bano user=cadastre', '-sql', sql_query('export_csv_dept',dict(dept=dept))]) + +def save_as_json(dept): + with open(get_sas_full_filename(dept,'json'),'w') as jsonfile: + for l in sql_get_dict_data('export_json_dept_communes',dict(dept=dept)): + if ';' in l['postcode']: + l['postcode'] = l['postcode'].split(';') + print(l['postcode']) + jsonfile.write(f"{json.dumps(l,ensure_ascii=False,separators=(',',':'))}\n") + for l in sql_get_dict_data('export_json_dept_voies_avec_adresses',dict(dept=dept)): + dict_hsnr = {} + for p in l['housenumbers'].split('@'): + numero,lat,lon = p.split('$') + dict_hsnr[numero] = dict(lat=float(lat),lon=float(lon)) + l['housenumbers'] = dict_hsnr + jsonfile.write(f"{json.dumps(l,ensure_ascii=False,separators=(',',':'))}\n") + for l in sql_get_dict_data('export_json_dept_voies_ld_sans_adresses',dict(dept=dept)): + jsonfile.write(f"{json.dumps(l,ensure_ascii=False,separators=(',',':'))}\n") + +def get_target_filename(dept,filetype): + return f'bano-{dept}.{filetype}' + +def get_sas_full_filename(dept,filetype): + return Path(os.environ['EXPORT_SAS_DIR']) / get_target_filename(dept,filetype) + +def get_webdir_full_filename(dept,filetype): + return Path(os.environ['EXPORT_WEB_DIR']) / get_target_filename(dept,filetype) + +def prepare_export(**kwargs): sql_process('table_polygones_postaux',dict()) sql_process('tables_export',dict()) + +def process(departements, **kwargs): for dept in departements: if not hp.is_valid_dept(dept): print(f"Code {dept} invalide pour un département - abandon") continue - d = Dataset(dept) - # d.save_as_shp() - # d.save_as_csv() - # d.save_as_ttl() - d.get_json_commune_data() - d.get_json_voies_avec_adresses_data() - d.save_as_json() + save_as_shp(dept) + csv_data = get_csv_data(dept) + save_as_csv(dept,csv_data) + save_as_ttl(dept,csv_data) + # save_as_json(dept) diff --git a/bano/sql.py b/bano/sql.py index 249aae2..ceec38f 100644 --- a/bano/sql.py +++ b/bano/sql.py @@ -48,3 +48,11 @@ def sql_get_dict_data(sqlfile, args): return cur.fetchall() return None + +def sql_query(sqlfile, args): + sqlfile = (Path(SQLDIR) / sqlfile).with_suffix(".sql") + with open(sqlfile) as s: + q = s.read() + for k, v in args.items(): + q = q.replace(f"__{k}__", v) + return q \ No newline at end of file diff --git a/bano/sql/export_csv_dept.sql b/bano/sql/export_csv_dept.sql new file mode 100644 index 0000000..3ed0d5f --- /dev/null +++ b/bano/sql/export_csv_dept.sql @@ -0,0 +1,11 @@ +SELECT id_add AS id, + numero, + nom_voie AS voie, + code_postal AS code_post, + libelle AS ville, + source, + lat, + lon, + geometrie AS geom +FROM numeros_export +WHERE dep = '__dept__' \ No newline at end of file diff --git a/bano/sql/export_json_dept_communes.sql b/bano/sql/export_json_dept_communes.sql index 0bccfa3..9545474 100644 --- a/bano/sql/export_json_dept_communes.sql +++ b/bano/sql/export_json_dept_communes.sql @@ -1,19 +1,7 @@ -WITH -rang_cp -AS -(SELECT *, - RANK() OVER(PARTITION BY insee ORDER BY ligne_5,cp) AS rang -FROM codes_postaux), -cp -AS -(SELECT insee AS code_insee, - cp -FROM rang_cp -WHERE rang = 1) SELECT DISTINCT code_insee AS id, i.type AS type, i.name AS name, - cp.cp AS postcode, + cp.cps AS postcode, round(lat,6)::float AS lat, round(lon,6)::float AS lon, i.name AS city, @@ -23,7 +11,7 @@ SELECT DISTINCT code_insee AS id, adm_weight, GREATEST(0.075,ROUND(LOG((adm_weight)+LOG(population+1)/3)::decimal,4))::float AS importance FROM infos_communes i -JOIN cp +JOIN insee_codes_postaux cp USING (code_insee) JOIN cog_departement cd USING (dep) diff --git a/bano/sql/export_json_dept_voies_avec_adresses.sql b/bano/sql/export_json_dept_voies_avec_adresses.sql index 0936c2d..168680d 100644 --- a/bano/sql/export_json_dept_voies_avec_adresses.sql +++ b/bano/sql/export_json_dept_voies_avec_adresses.sql @@ -1,35 +1,15 @@ -SELECT fantoir AS id, - code_insee AS citycode, - 'street' AS type, - REPLACE(REPLACE(REGEXP_REPLACE(nom_voie,'\t',' '),'"',chr(39)),'’',chr(39)) AS name, - code_postal AS postcode, - ROUND(lat_c::numeric,6)::float AS lat, - ROUND(lon_c::numeric,6)::float AS lon, - nom_com AS city, - nom_dep AS departement, - nom_reg AS region, - ROUND(LOG(c.adm_weight+LOG(c.population+1)/3)::numeric*LOG(1+LOG(nombre_adresses+1)+LOG(longueur_max+1)+LOG(CASE WHEN nom_voie like 'Boulevard%' THEN 4 WHEN nom_voie LIKE 'Place%' THEN 4 WHEN nom_voie LIKE 'Espl%' THEN 4 WHEN nom_voie LIKE 'Av%' THEN 3 WHEN nom_voie LIKE 'Rue %' THEN 2 ELSE 1 END))::numeric,4)::float AS importance, - string_agg(numero||'$'||ROUND(ne.lat::numeric,6)::text||'$'||ROUND(ne.lon::numeric,6)::text,'@' ORDER BY numero) AS housenumbers -FROM (SELECT * - FROM numeros_export - WHERE dep = '__dept__') ne -JOIN cog_pyramide_admin -USING (code_insee) -JOIN (SELECT fantoir, - lon AS lon_c, - lat AS lat_c - FROM bano_points_nommes - WHERE code_dept = '__dept__') pn -USING (fantoir) -JOIN infos_communes c -USING (code_insee) -JOIN (SELECT fantoir, - longueur_max, - nombre_adresses - FROM numeros_export_importance - WHERE dep = '__dept__') nimp -USING (fantoir) -GROUP BY 1,2,3,4,5,6,7,8,9,10,11 -ORDER BY 1 - -limit 50; \ No newline at end of file +SELECT id, + citycode, + type, + name, + postcode, + lat, + lon, + city, + departement, + region, + importance, + housenumbers +FROM export_voies_adresses_json +WHERE dep = '__dept__' +ORDER BY 1; \ No newline at end of file diff --git a/bano/sql/export_json_dept_voies_ld_sans_adresses.sql b/bano/sql/export_json_dept_voies_ld_sans_adresses.sql new file mode 100644 index 0000000..5c9b08b --- /dev/null +++ b/bano/sql/export_json_dept_voies_ld_sans_adresses.sql @@ -0,0 +1,14 @@ +SELECT id, + citycode, + type, + name, + postcode, + lat, + lon, + city, + departement, + region, + importance +FROM export_voies_ld_sans_adresses_json +WHERE dep = '__dept__' +ORDER BY 1; \ No newline at end of file diff --git a/bano/sql/table_insee_codes_postaux.sql b/bano/sql/table_insee_codes_postaux.sql index 9844767..228766d 100644 --- a/bano/sql/table_insee_codes_postaux.sql +++ b/bano/sql/table_insee_codes_postaux.sql @@ -4,4 +4,6 @@ AS SELECT insee AS code_insee, STRING_AGG(cp,';') AS cps FROM (SELECT DISTINCT insee, cp FROM codes_postaux ORDER BY 1,2) AS c -GROUP BY 1; \ No newline at end of file +GROUP BY 1; + +CREATE INDEX idx_insee_codes_postaux_code_insee ON insee_codes_postaux(code_insee); \ No newline at end of file diff --git a/bano/sql/table_polygones_postaux.sql b/bano/sql/table_polygones_postaux.sql index 5a24383..601582c 100644 --- a/bano/sql/table_polygones_postaux.sql +++ b/bano/sql/table_polygones_postaux.sql @@ -1,3 +1,5 @@ +BEGIN; + DROP TABLE IF EXISTS polygones_postaux CASCADE; CREATE UNLOGGED TABLE polygones_postaux AS @@ -23,4 +25,6 @@ WHERE boundary = 'administrative' AND "addr:postcode"||postal_code != '' ORDER BY ST_Area(way); -CREATE INDEX gidx_polygones_postaux ON polygones_postaux USING GIST(geometrie); \ No newline at end of file +CREATE INDEX gidx_polygones_postaux ON polygones_postaux USING GIST(geometrie); + +COMMIT; \ No newline at end of file diff --git a/bano/sql/tables_export.sql b/bano/sql/tables_export.sql index 298d4d3..a86da84 100644 --- a/bano/sql/tables_export.sql +++ b/bano/sql/tables_export.sql @@ -17,6 +17,7 @@ -- FROM planet_osm_postal_code -- WHERE COALESCE(postal_code,'') != ''; +BEGIN; DROP TABLE IF EXISTS numeros_export CASCADE; CREATE UNLOGGED TABLE numeros_export @@ -160,3 +161,5 @@ FROM resultats_multi_cp WHERE rang_par_fantoir = 1; CREATE INDEX idx_export_voies_ld_sans_adresses_json_dep ON export_voies_ld_sans_adresses_json(dep); + +COMMIT; \ No newline at end of file From b57cdad0c40262b7e43bb641af0b025c2161c253 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Wed, 17 Jan 2024 21:49:23 +0000 Subject: [PATCH 136/163] WIP exports --- bano/export.py | 79 +++++++++++++++---------- bano/publish.py | 117 +++++++++++++++++++++++++++++++++++++ bano/sql/tables_export.sql | 24 ++++++-- 3 files changed, 184 insertions(+), 36 deletions(-) create mode 100644 bano/publish.py diff --git a/bano/export.py b/bano/export.py index 1295d87..4720646 100644 --- a/bano/export.py +++ b/bano/export.py @@ -10,6 +10,7 @@ from pathlib import Path from .sql import sql_get_data,sql_get_dict_data,sql_process,sql_query +from . import batch as b from . import constants from . import helpers as hp @@ -17,13 +18,20 @@ def get_csv_data(dept): return sql_get_data('export_csv_dept',dict(dept=dept)) def save_as_csv(dept,csv_data): - with open(get_sas_full_filename(dept,'csv'),'w', newline='') as csvfile: - writer = csv.writer(csvfile,dialect='unix',quoting=csv.QUOTE_MINIMAL) - writer.writerows([l[0:-1] for l in csv_data]) + id_batch = b.batch_start_log("export CSV", "", dept) + try : + with open(get_sas_full_filename(dept,'csv'),'w', newline='') as csvfile: + writer = csv.writer(csvfile,dialect='unix',quoting=csv.QUOTE_MINIMAL) + writer.writerows([l[0:-1] for l in csv_data]) + b.batch_stop_log(id_batch, True) + except: + b.batch_stop_log(id_batch, False) def save_as_ttl(dept,csv_data): - with open(get_sas_full_filename(dept,'ttl'),'w') as ttlfile: - ttlfile.write(f"""@prefix xsd: . + id_batch = b.batch_start_log("export TTL", "", dept) + try: + with open(get_sas_full_filename(dept,'ttl'),'w') as ttlfile: + ttlfile.write(f"""@prefix xsd: . @prefix locn: . @prefix gn: . @prefix prov: . @@ -46,8 +54,8 @@ def save_as_ttl(dept,csv_data): \tdcterms:spatial , ; # region/pays (France) \t. """) - for id,numero,voie,cp,ville,source,lat,lon,*others in csv_data: - ttlfile.write(f""" a locn:Address , gn:Feature ; + for id,numero,voie,cp,ville,source,lat,lon,*others in csv_data: + ttlfile.write(f""" a locn:Address , gn:Feature ; locn:fullAddress "{numero} {voie}, {cp} {ville}, FRANCE"; locn:addressId "{id}" ; locn:locatorDesignator "{numero}" ; @@ -56,36 +64,47 @@ def save_as_ttl(dept,csv_data): locn:locatorName "{ville}"@fr ; locn:adminUnitL1 "FR" ;""") # traitement des arrondissements municipaux de Paris, Lyon, Marseille - if id[0:2] in '13 69 75' and (int(id[0:5]) in range(13201, 13217) or int(id[0:5]) in range(69381, 69370) or int(id[0:5]) in range(75101, 75121)): - ttlfile.write(f"locn:location ;") - else: - ttlfile.write(f"locn:location ;") - ttlfile.write(f"""locn:geometry ; + if id[0:2] in '13 69 75' and (int(id[0:5]) in range(13201, 13217) or int(id[0:5]) in range(69381, 69370) or int(id[0:5]) in range(75101, 75121)): + ttlfile.write(f"locn:location ;") + else: + ttlfile.write(f"locn:location ;") + ttlfile.write(f"""locn:geometry ; locn:geometry [a geo:Point ; geo:lat "{lat}" ; geo:long "{lon}" ] ; locn:geometry [a gsp:Geometry; gsp:asWKT "POINT({lon} {lat})"^^gsp:wktLiteral ] ; .""") + b.batch_stop_log(id_batch, True) + except: + b.batch_stop_log(id_batch, False) def save_as_shp(dept): - # query = sql_query('export_csv_dept',dict(dept=dept)) - subprocess.run(['ogr2ogr', '-f',"ESRI Shapefile", '-lco', 'ENCODING=UTF-8', '-s_srs', 'EPSG:4326', '-t_srs', 'EPSG:4326', '-overwrite', get_sas_full_filename(dept,'shp'), 'PG:dbname=bano user=cadastre', '-sql', sql_query('export_csv_dept',dict(dept=dept))]) + id_batch = b.batch_start_log("export SHP", "", dept) + try: + subprocess.run(['ogr2ogr', '-f',"ESRI Shapefile", '-lco', 'ENCODING=UTF-8', '-s_srs', 'EPSG:4326', '-t_srs', 'EPSG:4326', '-overwrite', get_sas_full_filename(dept,'shp'), 'PG:dbname=bano user=cadastre', '-sql', sql_query('export_csv_dept',dict(dept=dept))]) + b.batch_stop_log(id_batch, True) + except: + b.batch_stop_log(id_batch, False) def save_as_json(dept): - with open(get_sas_full_filename(dept,'json'),'w') as jsonfile: - for l in sql_get_dict_data('export_json_dept_communes',dict(dept=dept)): - if ';' in l['postcode']: - l['postcode'] = l['postcode'].split(';') - print(l['postcode']) - jsonfile.write(f"{json.dumps(l,ensure_ascii=False,separators=(',',':'))}\n") - for l in sql_get_dict_data('export_json_dept_voies_avec_adresses',dict(dept=dept)): - dict_hsnr = {} - for p in l['housenumbers'].split('@'): - numero,lat,lon = p.split('$') - dict_hsnr[numero] = dict(lat=float(lat),lon=float(lon)) - l['housenumbers'] = dict_hsnr - jsonfile.write(f"{json.dumps(l,ensure_ascii=False,separators=(',',':'))}\n") - for l in sql_get_dict_data('export_json_dept_voies_ld_sans_adresses',dict(dept=dept)): - jsonfile.write(f"{json.dumps(l,ensure_ascii=False,separators=(',',':'))}\n") + id_batch = b.batch_start_log("export JSON", "", dept) + try: + with open(get_sas_full_filename(dept,'json'),'w') as jsonfile: + for l in sql_get_dict_data('export_json_dept_communes',dict(dept=dept)): + if ';' in l['postcode']: + l['postcode'] = l['postcode'].split(';') + jsonfile.write(f"{json.dumps(l,ensure_ascii=False,separators=(',',':'))}\n") + for l in sql_get_dict_data('export_json_dept_voies_avec_adresses',dict(dept=dept)): + dict_hsnr = {} + for p in l['housenumbers'].split('@'): + numero,lat,lon = p.split('$') + dict_hsnr[numero] = dict(lat=float(lat),lon=float(lon)) + l['housenumbers'] = dict_hsnr + jsonfile.write(f"{json.dumps(l,ensure_ascii=False,separators=(',',':'))}\n") + for l in sql_get_dict_data('export_json_dept_voies_ld_sans_adresses',dict(dept=dept)): + jsonfile.write(f"{json.dumps(l,ensure_ascii=False,separators=(',',':'))}\n") + b.batch_stop_log(id_batch, True) + except: + b.batch_stop_log(id_batch, False) def get_target_filename(dept,filetype): return f'bano-{dept}.{filetype}' @@ -109,4 +128,4 @@ def process(departements, **kwargs): csv_data = get_csv_data(dept) save_as_csv(dept,csv_data) save_as_ttl(dept,csv_data) - # save_as_json(dept) + save_as_json(dept) diff --git a/bano/publish.py b/bano/publish.py new file mode 100644 index 0000000..cbf250e --- /dev/null +++ b/bano/publish.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import gzip +import tarfile +import os + +from glob import glob +from shutil import copy2 +from pathlib import Path + +from .constants import DEPARTEMENTS +from . import helpers as hp +from . import batch as b + +def get_source_dir(): + try: + cwd = Path(os.environ['EXPORT_SAS_DIR']) + except KeyError: + raise ValueError(f"La variable EXPORT_SAS_DIR n'est pas définie") + return cwd + +def get_dest_dir(): + try: + cwd = Path(os.environ['EXPORT_WEB_DIR']) + except KeyError: + raise ValueError(f"La variable EXPORT_WEB_DIR n'est pas définie") + return cwd + +def get_source_file(dept,extension): + return Path(get_source_dir()) / f'bano-{dept}.{extension}' + +def get_dest_file(dept,filetype,gzip=False,tar=False): + ext = ".tar" if tar else '' + ext = ext+".gz" if gzip else ext + return Path(get_dest_dir()) / f'bano-{dept}.{filetype}{ext}' + +def get_dest_file_full(filetype,gzip=False): + gz_ext = '.gz' if gzip else '' + return Path(get_dest_dir()) / f'full.{filetype}{gz_ext}' + +def publish_as_shp(dept): + id_batch = b.batch_start_log("publish SHP", "", dept) + try : + with tarfile.open(get_dest_file(dept, 'shp', True,True), "w:gz") as tar: + tar.add(get_source_file(dept,'shp'), arcname=f'bano-{dept}.shp') + tar.add(get_source_file(dept,'dbf'), arcname=f'bano-{dept}.dbf') + tar.add(get_source_file(dept,'shx'), arcname=f'bano-{dept}.shx') + tar.add(get_source_file(dept,'prj'), arcname=f'bano-{dept}.prj') + tar.add(get_source_file(dept,'cpg'), arcname=f'bano-{dept}.cpg') + b.batch_stop_log(id_batch, True) + except: + b.batch_stop_log(id_batch, False) + +def publish_as_csv(dept): + id_batch = b.batch_start_log("publish CSV", "", dept) + try : + copy2(get_source_file(dept,'csv'),get_dest_dir()) + b.batch_stop_log(id_batch, True) + except: + b.batch_stop_log(id_batch, False) + +def publish_as_full_csv(): + id_batch = b.batch_start_log("publish FULL CSV", "", 'France') + try: + with gzip.open(get_dest_file_full('csv',True),'wb') as gz: + for infile in sorted(glob(f'{get_source_dir()}/bano-*.csv')): + with open(infile,'rb') as js: + gz.write(js.read()) + b.batch_stop_log(id_batch, True) + except: + b.batch_stop_log(id_batch, False) + +def publish_as_ttl(dept): + id_batch = b.batch_start_log("publish TTL", "", dept) + try : + with gzip.open(get_dest_file(dept,'ttl',True,False),'wb') as gz: + with open(get_source_file(dept,'ttl'),'rb') as ttl: + gz.write(ttl.read()) + b.batch_stop_log(id_batch, True) + except: + b.batch_stop_log(id_batch, False) + +def publish_as_json(dept): + id_batch = b.batch_start_log("publish JSON", "", dept) + try : + with gzip.open(get_dest_file(dept,'json',True,False),'wb') as gz: + with open(get_source_file(dept,'json'),'rb') as js: + gz.write(js.read()) + b.batch_stop_log(id_batch, True) + except: + b.batch_stop_log(id_batch, False) + +def publish_as_full_json(): + id_batch = b.batch_start_log("publish FULL JSON", "", 'France') + try: + with gzip.open(get_dest_file_full('sjson',True),'wb') as gz: + for infile in sorted(glob(f'{get_source_dir()}/bano-*.json')): + with open(infile,'rb') as js: + gz.write(js.read()) + b.batch_stop_log(id_batch, True) + except: + b.batch_stop_log(id_batch, False) + +def process(departements, **kwargs): + for dept in departements: + if not hp.is_valid_dept(dept): + print(f"Code {dept} invalide pour un département - abandon") + continue + publish_as_shp(dept) + publish_as_csv(dept) + publish_as_ttl(dept) + publish_as_json(dept) + +def process_full(**kwargs): + publish_as_full_csv() + publish_as_full_json() diff --git a/bano/sql/tables_export.sql b/bano/sql/tables_export.sql index a86da84..5512c7b 100644 --- a/bano/sql/tables_export.sql +++ b/bano/sql/tables_export.sql @@ -20,7 +20,7 @@ BEGIN; DROP TABLE IF EXISTS numeros_export CASCADE; -CREATE UNLOGGED TABLE numeros_export +CREATE TABLE numeros_export AS WITH cp_fantoir @@ -41,6 +41,16 @@ AS row_number() OVER (PARTITION BY fantoir||num ORDER BY CASE WHEN source = 'OSM' THEN 1 ELSE 2 END) AS rang, * FROM num_norm), +nom_fantoir +AS +(SELECT fantoir, + nom +FROM (SELECT fantoir, + nom, + RANK() OVER (PARTITION BY fantoir ORDER BY CASE WHEN source = 'OSM' THEN 1 ELSE 2 END, CASE nature WHEN 'lieu-dit' THEN 1 WHEN 'place' THEN 1 WHEN 'voie' THEN 2 ELSE 3 END, nom ) AS rang + FROM nom_fantoir) n +WHERE rang = 1 +GROUP BY 1,2), resultats_multi_cp AS (SELECT dep, @@ -48,7 +58,7 @@ AS n.fantoir, id_add, numero, - nom_voie, + nf.nom AS nom_voie, COALESCE(n.code_postal,pp.code_postal,min_cp) code_postal, cn.libelle, source, @@ -57,7 +67,9 @@ AS n.geometrie, RANK() OVER (PARTITION BY id_add ORDER BY pp.id) rang_postal FROM num_norm_id n -JOIN cog_commune cn +JOIN nom_fantoir nf +USING (fantoir) +JOIN (SELECT dep, com, libelle FROM cog_commune WHERE typecom in ('ARM','COM')) cn ON (cn.com = code_insee) LEFT OUTER JOIN polygones_postaux pp ON ST_Contains(pp.geometrie, n.geometrie) @@ -71,7 +83,7 @@ WHERE rang_postal = 1; CREATE INDEX idx_numeros_export_dep ON numeros_export(dep); DROP TABLE IF EXISTS numeros_export_importance CASCADE; -CREATE UNLOGGED TABLE numeros_export_importance +CREATE TABLE numeros_export_importance AS SELECT fantoir, ST_Length(ST_Transform(ST_Longestline(ST_Convexhull(ST_Collect(geometrie)),ST_Convexhull(ST_Collect(geometrie))),3857)) AS longueur_max, @@ -81,7 +93,7 @@ GROUP BY fantoir; DROP TABLE IF EXISTS export_voies_adresses_json CASCADE; -CREATE UNLOGGED TABLE export_voies_adresses_json +CREATE TABLE export_voies_adresses_json AS SELECT c.dep, fantoir AS id, @@ -114,7 +126,7 @@ ORDER BY 1; CREATE INDEX idx_export_voies_adresses_json_dep ON export_voies_adresses_json(dep); DROP TABLE IF EXISTS export_voies_ld_sans_adresses_json CASCADE; -CREATE UNLOGGED TABLE export_voies_ld_sans_adresses_json +CREATE TABLE export_voies_ld_sans_adresses_json AS WITH set_fantoir From ae54248f01e01dc5b553f9b7dc93c5e6834de3ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Wed, 17 Jan 2024 21:50:18 +0000 Subject: [PATCH 137/163] import CPs : changement d'URL --- bano/sources/datagouv_cp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bano/sources/datagouv_cp.py b/bano/sources/datagouv_cp.py index d44c8f0..a42a71b 100644 --- a/bano/sources/datagouv_cp.py +++ b/bano/sources/datagouv_cp.py @@ -12,8 +12,8 @@ DICT_SOURCES = { "codes_postaux": [ "codes_postaux", - "https://www.data.gouv.fr/fr/datasets/r/5ed9b092-a25d-49e7-bdae-0152797c7577", - "table_insee_cps", + "https://datanova.laposte.fr/data-fair/api/v1/datasets/laposte-hexasmal/raw", + "table_insee_codes_postaux", ], } @@ -35,7 +35,7 @@ def download(destination,url): headers["If-Modified-Since"] = formatdate(destination.stat().st_mtime) resp = requests.get(url, headers=headers) - id_batch = b.batch_start_log("download source", destination, "France") + id_batch = b.batch_start_log("download source", url, "France") if resp.status_code == 200: with destination.open("w") as f: f.write(resp.text) From 7c46098e3e1c3f54f1b79dd38500245ddd3d7c58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Wed, 17 Jan 2024 21:51:45 +0000 Subject: [PATCH 138/163] nettoyage des TAB dans la BAN au chargement --- bano/sql/charge_ban_commune.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bano/sql/charge_ban_commune.sql b/bano/sql/charge_ban_commune.sql index aa5b31d..b93d883 100644 --- a/bano/sql/charge_ban_commune.sql +++ b/bano/sql/charge_ban_commune.sql @@ -3,7 +3,7 @@ j AS (SELECT fantoir, numero, - nom_voie, + regexp_replace(nom_voie,'\t','') AS nom_voie, lon, lat, rep, From e36206b5d1168f0c8e6be9950f4e74abfad3afd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Wed, 17 Jan 2024 21:52:40 +0000 Subject: [PATCH 139/163] cron +exports --- cron_bano.sh | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/cron_bano.sh b/cron_bano.sh index 14a3ec5..a723557 100755 --- a/cron_bano.sh +++ b/cron_bano.sh @@ -33,12 +33,18 @@ cat deplist.txt | parallel -j 4 export LANG=fr_FR.UTF-8\; bano rapprochem echo 'rapprochement ok' >> $SCRIPT_DIR/cron.log +echo 'preparation export' >> $SCRIPT_DIR/cron.log +bano prepare_export +echo 'preparation export finie' >> $SCRIPT_DIR/cron.log + # exports -# cat deplist.txt | parallel -j 4 bano export {1} +echo 'export' >> $SCRIPT_DIR/cron.log +cat deplist.txt | parallel -j 4 bano export {1} +echo 'export fini' >> $SCRIPT_DIR/cron.log # copie+zip dans le dossier web -# cat deplist.txt | parallel -j 4 bano publish {1} -# bano publish_aggregate +cat deplist.txt | parallel -j 4 bano publish {1} +bano publish_aggregate # ménage PostgreSQL psql -d bano -U cadastre -c "VACUUM bano_adresses;" From 7109b4a76e178da779a1dca0f1e9e0170a7faf67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Wed, 17 Jan 2024 22:52:38 +0000 Subject: [PATCH 140/163] table de polygones postaux pour les exports --- bano/sql/table_polygones_postaux.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bano/sql/table_polygones_postaux.sql b/bano/sql/table_polygones_postaux.sql index 601582c..3715cc4 100644 --- a/bano/sql/table_polygones_postaux.sql +++ b/bano/sql/table_polygones_postaux.sql @@ -1,7 +1,7 @@ BEGIN; DROP TABLE IF EXISTS polygones_postaux CASCADE; -CREATE UNLOGGED TABLE polygones_postaux +CREATE TABLE polygones_postaux AS SELECT way AS geometrie, CASE postal_code From 19c748c385fed35fa00e716515314198b0b42aca Mon Sep 17 00:00:00 2001 From: vdct Date: Mon, 22 Jan 2024 13:17:42 +0100 Subject: [PATCH 141/163] Bano v3 dev (#374) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix version lxml * table insee_codes_postaux * Incohérence de rapprochement (#343) --- bano/rapprochement.py | 3 +++ bano/sql/table_insee_codes_postaux.sql | 4 ++-- setup.cfg | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/bano/rapprochement.py b/bano/rapprochement.py index 549d43f..a5fd015 100644 --- a/bano/rapprochement.py +++ b/bano/rapprochement.py @@ -32,6 +32,9 @@ def process_unitaire(code_insee): noms.add_fantoir(topo) noms.remplit_fantoir_par_nom_sous_commune() points_nommes.complete_fantoir(noms) + # 2e passe suite au complément Fantoir + + points_nommes.noms_des_points_nommes(noms) adresses.complete_fantoir(noms) correspondance_fantoir_ban_osm.process(noms) diff --git a/bano/sql/table_insee_codes_postaux.sql b/bano/sql/table_insee_codes_postaux.sql index 228766d..77dfdf5 100644 --- a/bano/sql/table_insee_codes_postaux.sql +++ b/bano/sql/table_insee_codes_postaux.sql @@ -1,9 +1,9 @@ DROP TABLE IF EXISTS insee_codes_postaux CASCADE; -CREATE UNLOGGED TABLE insee_codes_postaux +CREATE TABLE insee_codes_postaux AS SELECT insee AS code_insee, STRING_AGG(cp,';') AS cps FROM (SELECT DISTINCT insee, cp FROM codes_postaux ORDER BY 1,2) AS c GROUP BY 1; -CREATE INDEX idx_insee_codes_postaux_code_insee ON insee_codes_postaux(code_insee); \ No newline at end of file +CREATE INDEX idx_insee_codes_postaux_code_insee ON insee_codes_postaux(code_insee); diff --git a/setup.cfg b/setup.cfg index 3768d7b..bc5d694 100644 --- a/setup.cfg +++ b/setup.cfg @@ -8,7 +8,7 @@ install_requires= beautifulsoup4==4.6.3 psycopg2-binary requests - lxml + lxml==4.9.1 [options.extras_require] test= From 3d37e6b7b92e2ce1a58b6760ab1a203155cca14c Mon Sep 17 00:00:00 2001 From: vdct Date: Tue, 30 Jan 2024 12:50:00 +0100 Subject: [PATCH 142/163] Bano v3 dev (#377) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix version lxml * table insee_codes_postaux * Incohérence de rapprochement (#343) * colonnes pour exports et rendu carto --- bano/sql/create_table_base_bano_cibles.sql | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/bano/sql/create_table_base_bano_cibles.sql b/bano/sql/create_table_base_bano_cibles.sql index 42f3b60..def4d16 100644 --- a/bano/sql/create_table_base_bano_cibles.sql +++ b/bano/sql/create_table_base_bano_cibles.sql @@ -1,5 +1,6 @@ CREATE TABLE IF NOT EXISTS bano_adresses ( fantoir text, + bano_id text GENERATED ALWAYS AS (fantoir||'_'|| REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REGEXP_REPLACE(UPPER(numero),'^0*',''),'BIS','B'),'TER','T'),'QUATER','Q'),'QUAT','Q'),' ',''),'à','-'),';',','),'"','')) STORED, lon float, lat float, numero text, @@ -13,9 +14,11 @@ CREATE TABLE IF NOT EXISTS bano_adresses ( source text, certification_commune integer, id_ban text, - geometrie geometry (Point, 4326) GENERATED ALWAYS AS (ST_Point(lon,lat)) STORED); + geometrie geometry (Point, 4326) GENERATED ALWAYS AS (ST_Point(lon,lat)) STORED, + geometrie_3857 geometry (Point, 3857) GENERATED ALWAYS AS (ST_Transform(ST_SetSRID(ST_Point(lon,lat),4326),3857)) STORED); CREATE INDEX IF NOT EXISTS gidx_bano_adresses ON bano_adresses USING GIST(geometrie); +CREATE INDEX IF NOT EXISTS gidx_bano_adresses_3857 ON bano_adresses USING GIST(geometrie_3857); CREATE INDEX IF NOT EXISTS idx_bano_adresses_code_insee ON bano_adresses (code_insee); CREATE INDEX IF NOT EXISTS idx_bano_adresses_code_dept ON bano_adresses (code_dept); CREATE INDEX IF NOT EXISTS idx_bano_adresses_fantoir ON bano_adresses (fantoir); @@ -32,8 +35,11 @@ CREATE TABLE IF NOT EXISTS bano_points_nommes ( source text, lon float, lat float, - geometrie geometry (Point, 4326) GENERATED ALWAYS AS (ST_Point(lon,lat)) STORED); + geometrie geometry (Point, 4326) GENERATED ALWAYS AS (ST_Point(lon,lat)) STORED, + geometrie_3857 geometry (Point, 3857) GENERATED ALWAYS AS (ST_Transform(ST_SetSRID(ST_Point(lon,lat),4326),3857)) STORED); +CREATE INDEX IF NOT EXISTS gidx_bano_points_nommes ON bano_points_nommes USING GIST(geometrie); +CREATE INDEX IF NOT EXISTS gidx_bano_points_nommes_3857 ON bano_points_nommes USING GIST(geometrie_3857); CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_code_insee ON bano_points_nommes (code_insee); CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_code_dept ON bano_points_nommes (code_dept); CREATE INDEX IF NOT EXISTS idx_bano_points_nommes_fantoir ON bano_points_nommes (fantoir); From c4d1807b4241838f73dcd2ab423169706af1ca9b Mon Sep 17 00:00:00 2001 From: vdct Date: Thu, 1 Feb 2024 20:07:30 +0100 Subject: [PATCH 143/163] Bano v3 dev (#379) * Nettoyage des retours chariot dans les noms de voies OSM (#378) --- bano/models.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bano/models.py b/bano/models.py index 67fa84e..c447fd4 100644 --- a/bano/models.py +++ b/bano/models.py @@ -360,6 +360,9 @@ def charge_numeros_osm(self): code_insee_ancienne_commune, nom_ancienne_commune, ) in data: + # Fix issue #378 + if voie: + voie = voie.replace('\n',' ') fantoir = tags.get("ref:FR:FANTOIR") if fantoir: From f9c116e375d82d4f4406aae6076e57148a1f1b8e Mon Sep 17 00:00:00 2001 From: vdct Date: Sun, 4 Feb 2024 18:59:49 +0100 Subject: [PATCH 144/163] Bano v3 dev (#380) * fiabilisation des exports + ajout des points BAN --- bano/export.py | 16 ++++++---- bano/models.py | 30 +++++++++++++++++-- bano/rapprochement.py | 7 +++-- bano/sql/charge_points_nommes_numeros_BAN.sql | 22 ++++++++++++++ bano/sql/tables_export.sql | 29 ++++-------------- 5 files changed, 72 insertions(+), 32 deletions(-) create mode 100644 bano/sql/charge_points_nommes_numeros_BAN.sql diff --git a/bano/export.py b/bano/export.py index 4720646..6f8b839 100644 --- a/bano/export.py +++ b/bano/export.py @@ -95,15 +95,16 @@ def save_as_json(dept): jsonfile.write(f"{json.dumps(l,ensure_ascii=False,separators=(',',':'))}\n") for l in sql_get_dict_data('export_json_dept_voies_avec_adresses',dict(dept=dept)): dict_hsnr = {} - for p in l['housenumbers'].split('@'): - numero,lat,lon = p.split('$') + for p in l['housenumbers'].split('@@@'): + numero,lat,lon = p.split('$$$') dict_hsnr[numero] = dict(lat=float(lat),lon=float(lon)) l['housenumbers'] = dict_hsnr jsonfile.write(f"{json.dumps(l,ensure_ascii=False,separators=(',',':'))}\n") for l in sql_get_dict_data('export_json_dept_voies_ld_sans_adresses',dict(dept=dept)): jsonfile.write(f"{json.dumps(l,ensure_ascii=False,separators=(',',':'))}\n") b.batch_stop_log(id_batch, True) - except: + except Exception as e: + # print(p,l,e) b.batch_stop_log(id_batch, False) def get_target_filename(dept,filetype): @@ -116,8 +117,13 @@ def get_webdir_full_filename(dept,filetype): return Path(os.environ['EXPORT_WEB_DIR']) / get_target_filename(dept,filetype) def prepare_export(**kwargs): - sql_process('table_polygones_postaux',dict()) - sql_process('tables_export',dict()) + id_batch = b.batch_start_log("Preparation export", "", "") + try: + sql_process('table_polygones_postaux',dict()) + sql_process('tables_export',dict()) + b.batch_stop_log(id_batch, True) + except: + b.batch_stop_log(id_batch, False) def process(departements, **kwargs): for dept in departements: diff --git a/bano/models.py b/bano/models.py index c447fd4..df1901b 100644 --- a/bano/models.py +++ b/bano/models.py @@ -561,7 +561,7 @@ def __eq__(self, other): ) def _as_string(self): - return f"source : {self.source}, nom : {self.nom} ({self.nom_normalise}), nature : {self.nature}, sous_commune : {self.code_insee_ancienne_commune}" + return f"fantoir : {self.fantoir}, source : {self.source}, nom : {self.nom} ({self.nom_normalise}), nature : {self.nature}, sous_commune : {self.code_insee_ancienne_commune}" def _as_csv_format_bano(self, correspondance): return f"{correspondance.get(self.fantoir,self.fantoir) if self.fantoir else ''}\t{self.nom}\t{self.code_insee}\t{self.code_dept}\t{self.nature}\t{self.code_insee_ancienne_commune if self.code_insee_ancienne_commune else ''}\t{self.nom_ancienne_commune if self.nom_ancienne_commune else ''}\t{self.source}\t{self.lon}\t{self.lat}" @@ -656,6 +656,33 @@ def charge_points_nommes_place_osm(self): ) ) + def charge_points_nommes_numeros_ban(self): + data = sql_get_data( + "charge_points_nommes_numeros_BAN", + dict(code_insee=self.code_insee), + ) + for ( + x, + y, + nom, + code_insee_ancienne_commune, + fantoir, + nom_ancienne_commune, + ) in data: + self.add_point_nomme( + Point_nomme( + self.code_insee, + "BAN", + "numero", + x, + y, + nom, + code_insee_ancienne_commune=code_insee_ancienne_commune, + fantoir=fantoir, + nom_ancienne_commune=nom_ancienne_commune, + ) + ) + def add_point_nomme(self, ld): self.liste.add(ld) @@ -729,7 +756,6 @@ def enregistre(self, correspondance): dict(code_insee=self.code_insee), ) - class Topo: def __init__(self, code_insee): self.code_insee = code_insee diff --git a/bano/rapprochement.py b/bano/rapprochement.py index a5fd015..8ab93ff 100644 --- a/bano/rapprochement.py +++ b/bano/rapprochement.py @@ -25,6 +25,8 @@ def process_unitaire(code_insee): points_nommes.charge_points_nommes_place_osm() # Les centroïdes viennent en dernier (fallback). Tout point déjà affecté comme lieu-dit OSM est inchangé dans l'étape charge_points_nommes_centroides_osm() points_nommes.charge_points_nommes_centroides_osm() + points_nommes.charge_points_nommes_numeros_ban() + adresses.noms_des_adresses(noms) points_nommes.noms_des_points_nommes(noms) @@ -32,8 +34,8 @@ def process_unitaire(code_insee): noms.add_fantoir(topo) noms.remplit_fantoir_par_nom_sous_commune() points_nommes.complete_fantoir(noms) - # 2e passe suite au complément Fantoir + # 2e passe suite au complément Fantoir points_nommes.noms_des_points_nommes(noms) adresses.complete_fantoir(noms) @@ -60,7 +62,8 @@ def process_unitaire(code_insee): ) b.batch_stop_log(id_batch, True) - except: + except Exception as e: + # print(e) b.batch_stop_log(id_batch, False) diff --git a/bano/sql/charge_points_nommes_numeros_BAN.sql b/bano/sql/charge_points_nommes_numeros_BAN.sql new file mode 100644 index 0000000..2f56d81 --- /dev/null +++ b/bano/sql/charge_points_nommes_numeros_BAN.sql @@ -0,0 +1,22 @@ +WITH +duplicates +AS +(SELECT fantoir, + nom_voie, + code_insee_ancienne_commune, + nom_ancienne_commune, + lon, + lat, + RANK() OVER (PARTITION BY nom_voie ORDER BY numero) rang +FROM (SELECT fantoir,numero,nom_voie,code_insee,code_insee_ancienne_commune,nom_ancienne_commune,lon,lat + FROM ban + WHERE code_insee = '__code_insee__' + ) AS ban) +SELECT lon, + lat, + nom_voie, + code_insee_ancienne_commune, + fantoir, + nom_ancienne_commune +FROM duplicates +WHERE rang = 1; diff --git a/bano/sql/tables_export.sql b/bano/sql/tables_export.sql index 5512c7b..bd9e16a 100644 --- a/bano/sql/tables_export.sql +++ b/bano/sql/tables_export.sql @@ -1,22 +1,3 @@ --- codes postaux --- ne prendre dans planet_osm_postal_code que les CPs avec un boundary != '' et un ref:insee != '' --- --- construire la table des insee | [CPs] à partir de la table codes_postaux de La Poste --- utiliser cette table pour l'export des communes --- --- s'appuyer sur cette table pour déterminer la géometrie infra communale des CPs des communes pluri-distribuées --- utiliser les polygones issus de ce croisement en jointure ouverte pour les points nommes et les adresses, avec comme fallback la table des codes_postaux par commune si jointure nulle --- possibilité : faire un cross join par commune pluri-distribuée et en sortir un distinct (geometrie,cp) - - --- CREATE TEMP TABLE sub_cp --- AS --- SELECT ST_Subdivide(way) AS way, --- "ref:INSEE", --- postal_code --- FROM planet_osm_postal_code --- WHERE COALESCE(postal_code,'') != ''; - BEGIN; DROP TABLE IF EXISTS numeros_export CASCADE; @@ -107,19 +88,22 @@ SELECT c.dep, nom_dep AS departement, nom_reg AS region, ROUND(LOG(c.adm_weight+LOG(c.population+1)/3)::numeric*LOG(1+LOG(nombre_adresses+1)+LOG(longueur_max+1)+LOG(CASE WHEN nom_voie like 'Boulevard%' THEN 4 WHEN nom_voie LIKE 'Place%' THEN 4 WHEN nom_voie LIKE 'Espl%' THEN 4 WHEN nom_voie LIKE 'Av%' THEN 3 WHEN nom_voie LIKE 'Rue %' THEN 2 ELSE 1 END))::numeric,4)::float AS importance, - string_agg(numero||'$'||ROUND(ne.lat::numeric,6)::text||'$'||ROUND(ne.lon::numeric,6)::text,'@' ORDER BY numero) AS housenumbers + string_agg(numero||'$$$'||ROUND(ne.lat::numeric,6)::text||'$$$'||ROUND(ne.lon::numeric,6)::text,'@@@' ORDER BY numero) AS housenumbers FROM numeros_export ne JOIN cog_pyramide_admin AS cog USING (code_insee) JOIN (SELECT fantoir, lon, - lat - FROM bano_points_nommes) AS pn + lat, + RANK() OVER (PARTITION BY fantoir ORDER BY CASE source WHEN 'OSM' THEN 1 WHEN 'BAN' THEN 3 ELSE 2 END, CASE nature WHEN 'centroide' THEN 2 ELSE 1 END) AS rang_par_fantoir + FROM bano_points_nommes + WHERE fantoir IS NOT NULL) AS pn USING (fantoir) JOIN infos_communes c USING (code_insee) JOIN numeros_export_importance USING (fantoir) +WHERE pn.rang_par_fantoir = 1 GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12 ORDER BY 1; @@ -159,7 +143,6 @@ AS RANK() OVER (PARTITION BY fantoir ORDER BY CASE source WHEN 'OSM' THEN 1 ELSE 2 END, CASE nature WHEN 'centroide' THEN 2 ELSE 1 END,pp.id) AS rang_par_fantoir, c.dep FROM set_fantoir ---JOIN (select * from bano_points_nommes where code_insee = '85172') pn JOIN bano_points_nommes AS pn USING (fantoir) JOIN cog_pyramide_admin AS cog From d36412667b3c612a017d1df74366143b918ae0eb Mon Sep 17 00:00:00 2001 From: vdct Date: Sun, 4 Feb 2024 19:27:18 +0100 Subject: [PATCH 145/163] Bano v3 dev (#381) * index sur bano_id * logging de l'import BAN --- bano/sources/ban.py | 1 + bano/sql/create_table_base_bano_cibles.sql | 1 + 2 files changed, 2 insertions(+) diff --git a/bano/sources/ban.py b/bano/sources/ban.py index d82dcc8..dfeaa7a 100644 --- a/bano/sources/ban.py +++ b/bano/sources/ban.py @@ -81,6 +81,7 @@ def import_to_pg(departement, **kwargs): except psycopg2.DataError as e: print(f"Erreur au chargement de la BAN {departement}") print(e) + b.batch_stop_log(id_batch, False) return False diff --git a/bano/sql/create_table_base_bano_cibles.sql b/bano/sql/create_table_base_bano_cibles.sql index def4d16..7a0d7fc 100644 --- a/bano/sql/create_table_base_bano_cibles.sql +++ b/bano/sql/create_table_base_bano_cibles.sql @@ -22,6 +22,7 @@ CREATE INDEX IF NOT EXISTS gidx_bano_adresses_3857 ON bano_adresses USING GIST(g CREATE INDEX IF NOT EXISTS idx_bano_adresses_code_insee ON bano_adresses (code_insee); CREATE INDEX IF NOT EXISTS idx_bano_adresses_code_dept ON bano_adresses (code_dept); CREATE INDEX IF NOT EXISTS idx_bano_adresses_fantoir ON bano_adresses (fantoir); +CREATE INDEX IF NOT EXISTS idx_bano_adresses_bano_id ON bano_adresses (bano_id); CREATE INDEX IF NOT EXISTS idx_bano_adresses_pifo_code_insee_source ON bano_adresses (code_insee,source); CREATE TABLE IF NOT EXISTS bano_points_nommes ( From eb8bcc0ff6af2cec0c5c2c64a5433bba28fc57a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20de=20Ch=C3=A2teau-Thierry?= Date: Wed, 7 Feb 2024 21:22:28 +0000 Subject: [PATCH 146/163] Gestion des CPs multiples dans les exports JSON (#222) --- bano/export.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bano/export.py b/bano/export.py index 6f8b839..57de6ec 100644 --- a/bano/export.py +++ b/bano/export.py @@ -94,6 +94,8 @@ def save_as_json(dept): l['postcode'] = l['postcode'].split(';') jsonfile.write(f"{json.dumps(l,ensure_ascii=False,separators=(',',':'))}\n") for l in sql_get_dict_data('export_json_dept_voies_avec_adresses',dict(dept=dept)): + if ';' in l['postcode']: + l['postcode'] = l['postcode'].split(';') dict_hsnr = {} for p in l['housenumbers'].split('@@@'): numero,lat,lon = p.split('$$$') @@ -101,6 +103,8 @@ def save_as_json(dept): l['housenumbers'] = dict_hsnr jsonfile.write(f"{json.dumps(l,ensure_ascii=False,separators=(',',':'))}\n") for l in sql_get_dict_data('export_json_dept_voies_ld_sans_adresses',dict(dept=dept)): + if ';' in l['postcode']: + l['postcode'] = l['postcode'].split(';') jsonfile.write(f"{json.dumps(l,ensure_ascii=False,separators=(',',':'))}\n") b.batch_stop_log(id_batch, True) except Exception as e: From 18b2c9202131a67f8fc9ced9bffc832afa0d0a1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Rodrigo?= Date: Thu, 15 Feb 2024 16:07:23 +0100 Subject: [PATCH 147/163] Fix ini_base script --- init_base.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init_base.sh b/init_base.sh index 9b667b9..f0d51e2 100755 --- a/init_base.sh +++ b/init_base.sh @@ -2,4 +2,4 @@ set -e -psql -d bano -U cadastre -v ON_ERROR_STOP=1 -f sql/create_base.sql +psql -d bano -U cadastre -v ON_ERROR_STOP=1 -f bano/sql/create_base.sql From 35126d1abb21cff4641c76365f769df5969a09e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Rodrigo?= Date: Thu, 15 Feb 2024 19:10:57 +0100 Subject: [PATCH 148/163] Add missing COG_DIR to config --- arborescence.sh | 1 + config | 1 + 2 files changed, 2 insertions(+) diff --git a/arborescence.sh b/arborescence.sh index 0126609..b40b982 100755 --- a/arborescence.sh +++ b/arborescence.sh @@ -10,6 +10,7 @@ mkdir -p $BAL_CACHE_DIR mkdir -p $EXPIRE_TILES_DIR mkdir -p $DOWNLOAD_DIR mkdir -p $IMPOSM_CACHE_DIR +mkdir -p $COG_DIR mkdir -p $EXPORT_SAS_DIR mkdir -p $EXPORT_WEB_DIR mkdir -p $TMP_DIR \ No newline at end of file diff --git a/config b/config index 37515e0..a02eb2d 100644 --- a/config +++ b/config @@ -11,6 +11,7 @@ export CADASTRE_CACHE_DIR=$DATA_DIR/cadastre_cache export BAL_CACHE_DIR=$DATA_DIR/bal_cache export EXPIRE_TILES_DIR=$DATA_DIR/expire_tiles export CSV_DIR=$DATA_DIR/csv_insee +export COG_DIR=$DATA_DIR/cog export EXPORT_SAS_DIR=/data/sas_web export EXPORT_WEB_DIR=/data/work/bano.openstreetmap.fr/www/web/data/ export TMP_DIR=/data/tmp From 0f2a54498039c744e5f227f511440af1fbdd1c20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Rodrigo?= Date: Fri, 16 Feb 2024 11:59:53 +0100 Subject: [PATCH 149/163] Load topo only for configured deps --- bano/sources/topo.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/bano/sources/topo.py b/bano/sources/topo.py index 690001f..836583d 100644 --- a/bano/sources/topo.py +++ b/bano/sources/topo.py @@ -4,7 +4,7 @@ import gzip import io -from ..constants import get_const_code_dir,CODE_VOIE_FANTOIR +from ..constants import get_const_code_dir,CODE_VOIE_FANTOIR,DEPARTEMENTS from ..db import bano_db from .. import helpers as h @@ -60,8 +60,10 @@ def import_to_pg(): if line[16:18] != '14': continue # print(line) - topo_voie_to_csv(line) - io_in_csv.write('$'.join(topo_voie_to_csv(line))+'\n') # separateur $ car on trouve des virgules dans le contenu + champs = topo_voie_to_csv(line) + if champs[0] not in DEPARTEMENTS: + continue + io_in_csv.write('$'.join(champs)+'\n') # separateur $ car on trouve des virgules dans le contenu # if i > 20: # break io_in_csv.seek(0) From c9040c20a4f9c46d0a67f473724217b98c11f8ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Rodrigo?= Date: Fri, 16 Feb 2024 12:14:54 +0100 Subject: [PATCH 150/163] Does not fail on CP not modified file download --- bano/sources/datagouv_cp.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bano/sources/datagouv_cp.py b/bano/sources/datagouv_cp.py index a42a71b..8e6a020 100644 --- a/bano/sources/datagouv_cp.py +++ b/bano/sources/datagouv_cp.py @@ -41,6 +41,8 @@ def download(destination,url): f.write(resp.text) b.batch_stop_log(id_batch, True) return True + elif resp.status_code == 304: # Not Modified + return True print(resp.status_code) b.batch_stop_log(id_batch, False) return False From 237202af9d37d74bba93f75e177de6f1ee5a63c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Rodrigo?= Date: Thu, 15 Feb 2024 16:51:41 +0100 Subject: [PATCH 151/163] Load the download file with imposm --- load_osm_france_db.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/load_osm_france_db.sh b/load_osm_france_db.sh index 6c5c73e..72a3e8e 100755 --- a/load_osm_france_db.sh +++ b/load_osm_france_db.sh @@ -21,7 +21,7 @@ touch ${lockfile} mkdir -p $DOWNLOAD_DIR cd $DOWNLOAD_DIR wget -NS $PBF_URL -imposm import -config $SCRIPT_DIR/imposm.config -read $DOWNLOAD_DIR/france_metro_dom_com_nc.osm.pbf -overwritecache -diff -write -dbschema-import osm +imposm import -config $SCRIPT_DIR/imposm.config -read $DOWNLOAD_DIR/$PBF_FILE -overwritecache -diff -write -dbschema-import osm psql -d bano -U cadastre -f $SCRIPT_DIR/sql/finalisation.sql From a5c02d0b08758df72920b4a90e577d4a485387c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Rodrigo?= Date: Wed, 14 Feb 2024 17:18:18 +0100 Subject: [PATCH 152/163] Good practice, stop psql on first error when run sql script --- avancement_cumul.sh | 2 +- cron_bano.sh | 12 ++++++------ load_osm_france_db.sh | 2 +- stats.sh | 2 +- utils/fantoir2topo.sh | 6 +++--- utils/load_csv_export.sh | 25 +++++++++++++------------ 6 files changed, 25 insertions(+), 24 deletions(-) diff --git a/avancement_cumul.sh b/avancement_cumul.sh index d8155de..00b0de1 100755 --- a/avancement_cumul.sh +++ b/avancement_cumul.sh @@ -1,4 +1,4 @@ -psql -d cadastre -U cadastre -c "SELECT source,etape, date_debut,date_fin,dept,cadastre_com,nom_com,nombre_adresses FROM batch ORDER BY id_batch DESC LIMIT 10;" +psql -d cadastre -U cadastre -v ON_ERROR_STOP=1 -c "SELECT source,etape, date_debut,date_fin,dept,cadastre_com,nom_com,nombre_adresses FROM batch ORDER BY id_batch DESC LIMIT 10;" ps -eaf|grep 'bano' ps -eaf|grep cumul diff --git a/cron_bano.sh b/cron_bano.sh index a723557..7c13a25 100755 --- a/cron_bano.sh +++ b/cron_bano.sh @@ -24,8 +24,8 @@ echo 'sources ok' >> $SCRIPT_DIR/cron.log bano update_table_communes bano update_infos_communes -# psql -d osm -U cadastre -f sql/create_table_polygones_postaux.sql -# psql -d cadastre -U cadastre -f sql/post_copie_ban.sql +# psql -d osm -U cadastre -v ON_ERROR_STOP=1 -f sql/create_table_polygones_postaux.sql +# psql -d cadastre -U cadastre -v ON_ERROR_STOP=1 -f sql/post_copie_ban.sql echo 'màj polygones ok' >> $SCRIPT_DIR/cron.log # BANO @@ -47,8 +47,8 @@ cat deplist.txt | parallel -j 4 bano publish {1} bano publish_aggregate # ménage PostgreSQL -psql -d bano -U cadastre -c "VACUUM bano_adresses;" -psql -d bano -U cadastre -c "VACUUM bano_points_nommes;" -psql -d bano -U cadastre -c "VACUUM nom_fantoir;" -psql -d bano -U cadastre -c "GRANT SELECT ON ALL TABLES IN SCHEMA PUBLIC TO PUBLIC"; +psql -d bano -U cadastre -v ON_ERROR_STOP=1 -c "VACUUM bano_adresses;" +psql -d bano -U cadastre -v ON_ERROR_STOP=1 -c "VACUUM bano_points_nommes;" +psql -d bano -U cadastre -v ON_ERROR_STOP=1 -c "VACUUM nom_fantoir;" +psql -d bano -U cadastre -v ON_ERROR_STOP=1 -c "GRANT SELECT ON ALL TABLES IN SCHEMA PUBLIC TO PUBLIC"; echo 'fin du cron BANO' >> $SCRIPT_DIR/cron.log diff --git a/load_osm_france_db.sh b/load_osm_france_db.sh index 72a3e8e..b33e675 100755 --- a/load_osm_france_db.sh +++ b/load_osm_france_db.sh @@ -23,7 +23,7 @@ cd $DOWNLOAD_DIR wget -NS $PBF_URL imposm import -config $SCRIPT_DIR/imposm.config -read $DOWNLOAD_DIR/$PBF_FILE -overwritecache -diff -write -dbschema-import osm -psql -d bano -U cadastre -f $SCRIPT_DIR/sql/finalisation.sql +psql -d bano -U cadastre -v ON_ERROR_STOP=1 -f $SCRIPT_DIR/sql/finalisation.sql cp $DOWNLOAD_DIR/last.state.txt $DOWNLOAD_DIR/state.txt rm ${lockfile} diff --git a/stats.sh b/stats.sh index 2ebb25d..5ecd03b 100755 --- a/stats.sh +++ b/stats.sh @@ -1 +1 @@ -psql -d cadastre -f stats.sql -v dept=$1 +psql -d cadastre -v ON_ERROR_STOP=1 -f stats.sql -v dept=$1 diff --git a/utils/fantoir2topo.sh b/utils/fantoir2topo.sh index ca385c5..1f844ea 100755 --- a/utils/fantoir2topo.sh +++ b/utils/fantoir2topo.sh @@ -1,5 +1,5 @@ #!/bin/bash -psql -d cadastre -U cadastre --csv -f fantoir2topo.sql > fantoir_202304.csv -cat fantoir_202304.csv|psql -d bano -U cadastre -c"TRUNCATE topo;COPY topo FROM STDIN WITH CSV HEADER" -rm fantoir_202304.csv \ No newline at end of file +psql -d cadastre -U cadastre -v ON_ERROR_STOP=1 --csv -f fantoir2topo.sql > fantoir_202304.csv +cat fantoir_202304.csv| psql -d bano -U cadastre -v ON_ERROR_STOP=1 -c "TRUNCATE topo; COPY topo FROM STDIN WITH CSV HEADER" +rm fantoir_202304.csv diff --git a/utils/load_csv_export.sh b/utils/load_csv_export.sh index f32702a..41a5ffa 100755 --- a/utils/load_csv_export.sh +++ b/utils/load_csv_export.sh @@ -1,17 +1,18 @@ #!/bin/bash -psql -d cadastre -U cadastre -c "DROP TABLE IF EXISTS export_csv CASCADE; - CREATE TABLE export_csv( - id text, - numero text, - voie text, - cp text, - ville text, - source text, - lat decimal, - lon decimal);" +psql -d cadastre -U cadastre -v ON_ERROR_STOP=1 -c " + DROP TABLE IF EXISTS export_csv CASCADE; \ + CREATE TABLE export_csv( + id text, + numero text, + voie text, + cp text, + ville text, + source text, + lat decimal, + lon decimal);" for dep in {01..19} 2A 2B {21..95} {971..974} 976 do - cat /data/sas_web/bano-${dep}.csv|psql -d cadastre -U cadastre -c "COPY export_csv FROM STDIN WITH CSV" -done \ No newline at end of file + cat /data/sas_web/bano-${dep}.csv| psql -d cadastre -U cadastre -c "COPY export_csv FROM STDIN WITH CSV" +done From 75c95f39b0e0aa7e64b656dc75c1fde7727407d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Rodrigo?= Date: Wed, 14 Feb 2024 17:20:36 +0100 Subject: [PATCH 153/163] Stop all bash scripts on first error --- arborescence.sh | 4 +++- avancement_cumul.sh | 5 ++++- cron_bano.sh | 2 ++ stats.sh | 4 ++++ stats_export.sh | 6 +++++- utils/fantoir2topo.sh | 2 ++ utils/load_csv_export.sh | 2 ++ 7 files changed, 22 insertions(+), 3 deletions(-) diff --git a/arborescence.sh b/arborescence.sh index b40b982..03ac535 100755 --- a/arborescence.sh +++ b/arborescence.sh @@ -1,5 +1,7 @@ #!/bin/bash +set -e + source config mkdir -p $LOG_DIR mkdir -p $DATA_DIR @@ -13,4 +15,4 @@ mkdir -p $IMPOSM_CACHE_DIR mkdir -p $COG_DIR mkdir -p $EXPORT_SAS_DIR mkdir -p $EXPORT_WEB_DIR -mkdir -p $TMP_DIR \ No newline at end of file +mkdir -p $TMP_DIR diff --git a/avancement_cumul.sh b/avancement_cumul.sh index 00b0de1..4c3ca34 100755 --- a/avancement_cumul.sh +++ b/avancement_cumul.sh @@ -1,4 +1,7 @@ +#!/bin/bash + +set -e + psql -d cadastre -U cadastre -v ON_ERROR_STOP=1 -c "SELECT source,etape, date_debut,date_fin,dept,cadastre_com,nom_com,nombre_adresses FROM batch ORDER BY id_batch DESC LIMIT 10;" ps -eaf|grep 'bano' ps -eaf|grep cumul - diff --git a/cron_bano.sh b/cron_bano.sh index 7c13a25..819f8b2 100755 --- a/cron_bano.sh +++ b/cron_bano.sh @@ -1,5 +1,7 @@ #!/bin/bash +set -e + source /data/project/bano_v3/venv_v3/bin/activate SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" diff --git a/stats.sh b/stats.sh index 5ecd03b..ed5a5a3 100755 --- a/stats.sh +++ b/stats.sh @@ -1 +1,5 @@ +#!/bin/bash + +set -e + psql -d cadastre -v ON_ERROR_STOP=1 -f stats.sql -v dept=$1 diff --git a/stats_export.sh b/stats_export.sh index d0fd81a..d9187b5 100755 --- a/stats_export.sh +++ b/stats_export.sh @@ -1,3 +1,7 @@ +#!/bin/bash + +set -e + source config echo $EXPORT_SAS_DIR @@ -18,4 +22,4 @@ do echo $b $csvcount $csvcountunique $shpcount $status_csv $status_shp #|awk 'BEGIN{print $1 $2 $3 $4 $5};FS=" ";OFS="\t"' #awk 'BEGIN{print $1 $2 $3 $4 $5};FS=" ";OFS="\t"' "$b $csvcount $csvcountunique $shpcount $status_csv $status_shp" # wc -l $f; -done \ No newline at end of file +done diff --git a/utils/fantoir2topo.sh b/utils/fantoir2topo.sh index 1f844ea..c5f20ac 100755 --- a/utils/fantoir2topo.sh +++ b/utils/fantoir2topo.sh @@ -1,5 +1,7 @@ #!/bin/bash +set -e + psql -d cadastre -U cadastre -v ON_ERROR_STOP=1 --csv -f fantoir2topo.sql > fantoir_202304.csv cat fantoir_202304.csv| psql -d bano -U cadastre -v ON_ERROR_STOP=1 -c "TRUNCATE topo; COPY topo FROM STDIN WITH CSV HEADER" rm fantoir_202304.csv diff --git a/utils/load_csv_export.sh b/utils/load_csv_export.sh index 41a5ffa..be290f4 100755 --- a/utils/load_csv_export.sh +++ b/utils/load_csv_export.sh @@ -1,5 +1,7 @@ #!/bin/bash +set -e + psql -d cadastre -U cadastre -v ON_ERROR_STOP=1 -c " DROP TABLE IF EXISTS export_csv CASCADE; \ CREATE TABLE export_csv( From 1dba8205c566f82d41e3dca13b169cd85f25e7a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Rodrigo?= Date: Wed, 22 Feb 2023 22:13:26 +0100 Subject: [PATCH 154/163] Run scripts from project dir --- README.md | 1 + cron_osm.sh | 9 +++------ imposm.config | 2 +- load_osm_france_db.sh | 14 +++++++++----- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 7a95532..d9d389d 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ bano setup_db ## Chargement des données OSM ### Chargement initial D'abord renseigner le fichier imposm.config, puis lancer : + ``` ./load_osm_france_db.sh ``` diff --git a/cron_osm.sh b/cron_osm.sh index f54ad62..11b9dde 100755 --- a/cron_osm.sh +++ b/cron_osm.sh @@ -4,14 +4,11 @@ set -e source /data/project/bano_v3/venv_v3/bin/activate -SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" - -cd $SCRIPT_DIR pip install -e . -source $SCRIPT_DIR/config +source config -lockfile=${SCRIPT_DIR}/imposm.lock +lockfile=imposm.lock echo `date`>> $SCRIPT_DIR/cron.log echo debut >> $SCRIPT_DIR/cron.log @@ -32,7 +29,7 @@ fi touch ${lockfile} osmosis --rri workingDirectory=${DOWNLOAD_DIR} --wxc ${DOWNLOAD_DIR}/changes.osc.gz -imposm diff -config $SCRIPT_DIR/imposm.config -dbschema-production osm ${DOWNLOAD_DIR}/changes.osc.gz +imposm diff -config imposm.config -dbschema-production osm ${DOWNLOAD_DIR}/changes.osc.gz rm ${lockfile} diff --git a/imposm.config b/imposm.config index b80215e..79c94b9 100644 --- a/imposm.config +++ b/imposm.config @@ -2,6 +2,6 @@ "cachedir": "/data/bano_imposm_cache_v3", "diffdir": "/data/download_v3", "connection": "postgis://cadastre@localhost/bano?prefix=NONE", - "mapping": "/data/project/bano_v3/bano.yml", + "mapping": "bano.yml", "srid":4326 } diff --git a/load_osm_france_db.sh b/load_osm_france_db.sh index b33e675..3bd1978 100755 --- a/load_osm_france_db.sh +++ b/load_osm_france_db.sh @@ -2,8 +2,7 @@ set -e -SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -source $SCRIPT_DIR/config +source config PBF_URL=${1:-http://download.openstreetmap.fr/extracts/merge/france_metro_dom_com_nc.osm.pbf} PBF_FILE=$(basename "$PBF_URL") @@ -19,9 +18,14 @@ fi touch ${lockfile} mkdir -p $DOWNLOAD_DIR -cd $DOWNLOAD_DIR -wget -NS $PBF_URL -imposm import -config $SCRIPT_DIR/imposm.config -read $DOWNLOAD_DIR/$PBF_FILE -overwritecache -diff -write -dbschema-import osm +wget --directory-prefix=$DOWNLOAD_DIR -NS $PBF_URL +imposm import \ + -config imposm.config \ + -read $DOWNLOAD_DIR/$PBF_FILE \ + -overwritecache \ + -diff \ + -write \ + -dbschema-import osm psql -d bano -U cadastre -v ON_ERROR_STOP=1 -f $SCRIPT_DIR/sql/finalisation.sql From 116a81587e340ed904c08841f06090f1dd6cf01a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Rodrigo?= Date: Fri, 16 Feb 2024 11:34:34 +0100 Subject: [PATCH 155/163] Make imposm follow config --- imposm.config | 2 -- load_osm_france_db.sh | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/imposm.config b/imposm.config index 79c94b9..cf4d5c4 100644 --- a/imposm.config +++ b/imposm.config @@ -1,7 +1,5 @@ { - "cachedir": "/data/bano_imposm_cache_v3", "diffdir": "/data/download_v3", - "connection": "postgis://cadastre@localhost/bano?prefix=NONE", "mapping": "bano.yml", "srid":4326 } diff --git a/load_osm_france_db.sh b/load_osm_france_db.sh index 3bd1978..97017e5 100755 --- a/load_osm_france_db.sh +++ b/load_osm_france_db.sh @@ -23,8 +23,10 @@ imposm import \ -config imposm.config \ -read $DOWNLOAD_DIR/$PBF_FILE \ -overwritecache \ + -cachedir $IMPOSM_CACHE_DIR \ -diff \ -write \ + -connection postgis://cadastre@localhost/bano?prefix=NONE \ -dbschema-import osm psql -d bano -U cadastre -v ON_ERROR_STOP=1 -f $SCRIPT_DIR/sql/finalisation.sql From 4d97598915d77acd36b69d398b19ea711c8c9bd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Rodrigo?= Date: Wed, 14 Feb 2024 17:53:19 +0100 Subject: [PATCH 156/163] exit with 1 on error, does not log direcly from script --- cron_osm.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cron_osm.sh b/cron_osm.sh index 11b9dde..f35c6f0 100755 --- a/cron_osm.sh +++ b/cron_osm.sh @@ -10,18 +10,18 @@ source config lockfile=imposm.lock -echo `date`>> $SCRIPT_DIR/cron.log -echo debut >> $SCRIPT_DIR/cron.log +echo `date` +echo debut if test -f ${lockfile} then diff_age=$((`date +%s` - `stat -c %Y $lockfile`)) if [ $diff_age -gt 7200 ];then - echo "Effacement du lock" >> $SCRIPT_DIR/cron.log + echo "Effacement du lock" rm ${lockfile} else - echo `date`" : Process deja en cours" >> $SCRIPT_DIR/cron.log - exit 0 + echo `date`" : Process deja en cours" + exit 1 fi fi @@ -33,5 +33,5 @@ imposm diff -config imposm.config -dbschema-production osm ${DOWNLOAD_DIR}/chang rm ${lockfile} -echo `date` >> $SCRIPT_DIR/cron.log -echo fin >> $SCRIPT_DIR/cron.log +echo `date` +echo fin From f00ed5ca62d24906e90d6287a393ec41a21c80a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Rodrigo?= Date: Wed, 14 Feb 2024 17:56:22 +0100 Subject: [PATCH 157/163] Only one source for deps --- README.md | 4 ++++ bano/constants.py | 5 +---- utils/load_csv_export.sh | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index d9d389d..69a1577 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,10 @@ Une fois le fichier `config` rempli, lancer la création des répertoires avec : arborescence.sh ``` +### Liste des départements + +Les départements pris en compte sont listés dans `deplist.txt`. Cette liste peut être modifiée. + ### Création de la base de données La base de données qui accueille toutes les données BANO (les sources et les données produites) s'appelle 'bano' est doit être créée en début d'installation. C'est l'utilisateur 'postgres' qui doit exécuter les scripts de création de la base. diff --git a/bano/constants.py b/bano/constants.py index d2b8f95..1b1f2fd 100644 --- a/bano/constants.py +++ b/bano/constants.py @@ -19,10 +19,7 @@ def get_const_code_dir(): return load_json_from_file("code_dir.json") -DEPARTEMENTS = [ - f"{n:>02}" - for n in [*range(1, 20), "2A", "2B", *range(21, 96), *range(971, 975), "976"] -] +DEPARTEMENTS = [n.strip() for n in open('deplist.txt').readlines()] DEPARTEMENTS_3CHAR = [("0" + s)[-3:] for s in DEPARTEMENTS] diff --git a/utils/load_csv_export.sh b/utils/load_csv_export.sh index be290f4..feff1a5 100755 --- a/utils/load_csv_export.sh +++ b/utils/load_csv_export.sh @@ -14,7 +14,7 @@ psql -d cadastre -U cadastre -v ON_ERROR_STOP=1 -c " lat decimal, lon decimal);" -for dep in {01..19} 2A 2B {21..95} {971..974} 976 +for dep in `cat $BANO_DIR/deplist.txt` do cat /data/sas_web/bano-${dep}.csv| psql -d cadastre -U cadastre -c "COPY export_csv FROM STDIN WITH CSV" done From 5cbf8d5c62e2b1919556b3e3f3c897edeac5e3f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Rodrigo?= Date: Wed, 14 Feb 2024 17:59:09 +0100 Subject: [PATCH 158/163] Make LANG configurable, avoid tons of warning when remote postgres is not in French --- config | 2 +- cron_bano.sh | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/config b/config index a02eb2d..57ae1ed 100644 --- a/config +++ b/config @@ -16,4 +16,4 @@ export EXPORT_SAS_DIR=/data/sas_web export EXPORT_WEB_DIR=/data/work/bano.openstreetmap.fr/www/web/data/ export TMP_DIR=/data/tmp export SCHEMA_CIBLE=public -export LANG=fr_FR.UTF-8 +export LANG=${LANG:-fr_FR.UTF-8} diff --git a/cron_bano.sh b/cron_bano.sh index 819f8b2..45e8465 100755 --- a/cron_bano.sh +++ b/cron_bano.sh @@ -16,8 +16,8 @@ source config pip install -e . # Sources -cat deplist.txt | parallel -j 4 export LANG=fr_FR.UTF-8\; bano charge_ban {1} -cat deplist.txt | parallel -j 4 export LANG=fr_FR.UTF-8\; bano charge_ld_cadastre {1} +cat deplist.txt | parallel -j 4 export LANG=$LANG\; bano charge_ban {1} +cat deplist.txt | parallel -j 4 export LANG=$LANG\; bano charge_ld_cadastre {1} bano update_bis_table echo 'sources ok' >> $SCRIPT_DIR/cron.log @@ -31,7 +31,7 @@ bano update_infos_communes echo 'màj polygones ok' >> $SCRIPT_DIR/cron.log # BANO -cat deplist.txt | parallel -j 4 export LANG=fr_FR.UTF-8\; bano rapprochement --dept {1} +cat deplist.txt | parallel -j 4 export LANG=$LANG\; bano rapprochement --dept {1} echo 'rapprochement ok' >> $SCRIPT_DIR/cron.log From 4d339833c435d5609c4f6e831ab57a8e535b5e75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Rodrigo?= Date: Wed, 14 Feb 2024 18:14:50 +0100 Subject: [PATCH 159/163] Make postgres database configurable --- avancement_cumul.sh | 2 +- bano/db.py | 2 +- bano/export.py | 2 +- config | 17 +++++++++++++++++ cron_bano.sh | 12 ++++++------ init_base.sh | 2 +- load_osm_france_db.sh | 4 ++-- stats.sh | 2 +- utils/fantoir2topo.sh | 4 ++-- utils/load_csv_export.sh | 4 ++-- 10 files changed, 34 insertions(+), 17 deletions(-) diff --git a/avancement_cumul.sh b/avancement_cumul.sh index 4c3ca34..8e73c52 100755 --- a/avancement_cumul.sh +++ b/avancement_cumul.sh @@ -2,6 +2,6 @@ set -e -psql -d cadastre -U cadastre -v ON_ERROR_STOP=1 -c "SELECT source,etape, date_debut,date_fin,dept,cadastre_com,nom_com,nombre_adresses FROM batch ORDER BY id_batch DESC LIMIT 10;" +$pgsql_CADASTRE -c "SELECT source,etape, date_debut,date_fin,dept,cadastre_com,nom_com,nombre_adresses FROM batch ORDER BY id_batch DESC LIMIT 10;" ps -eaf|grep 'bano' ps -eaf|grep cumul diff --git a/bano/db.py b/bano/db.py index 3986009..9e19e1c 100644 --- a/bano/db.py +++ b/bano/db.py @@ -3,6 +3,6 @@ import psycopg2 import psycopg2.extras -bano_db = psycopg2.connect(os.environ.get("BANO_PG", "dbname='bano' user='cadastre'")) +bano_db = psycopg2.connect(os.environ.get("PG_BANO")) bano_db.autocommit = True psycopg2.extras.register_hstore(bano_db) diff --git a/bano/export.py b/bano/export.py index 57de6ec..eb08aaf 100644 --- a/bano/export.py +++ b/bano/export.py @@ -80,7 +80,7 @@ def save_as_ttl(dept,csv_data): def save_as_shp(dept): id_batch = b.batch_start_log("export SHP", "", dept) try: - subprocess.run(['ogr2ogr', '-f',"ESRI Shapefile", '-lco', 'ENCODING=UTF-8', '-s_srs', 'EPSG:4326', '-t_srs', 'EPSG:4326', '-overwrite', get_sas_full_filename(dept,'shp'), 'PG:dbname=bano user=cadastre', '-sql', sql_query('export_csv_dept',dict(dept=dept))]) + subprocess.run(['ogr2ogr', '-f',"ESRI Shapefile", '-lco', 'ENCODING=UTF-8', '-s_srs', 'EPSG:4326', '-t_srs', 'EPSG:4326', '-overwrite', get_sas_full_filename(dept,'shp'), 'PG:' + os.environ['PG_CADASTRE'], '-sql', sql_query('export_csv_dept',dict(dept=dept))]) b.batch_stop_log(id_batch, True) except: b.batch_stop_log(id_batch, False) diff --git a/config b/config index 57ae1ed..4cf631e 100644 --- a/config +++ b/config @@ -17,3 +17,20 @@ export EXPORT_WEB_DIR=/data/work/bano.openstreetmap.fr/www/web/data/ export TMP_DIR=/data/tmp export SCHEMA_CIBLE=public export LANG=${LANG:-fr_FR.UTF-8} + + +POSTGRES_USER=${POSTGRES_USER:-cadastre} +POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-} +POSTGRES_HOST=${POSTGRES_HOST:-localhost} +POSTGRES_PORT=${POSTGRES_PORT:-5432} + +POSTGRES_DB_BANO=${POSTGRES_DB:-bano} +POSTGRES_DB_CADASTRE=${POSTGRES_DB:-cadastre} + +export PGCON_BANO="$POSTGRES_USER${POSTGRES_PASSWORD:+:$POSTGRES_PASSWORD}@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB_BANO" +export pgsql_BANO="psql postgresql://${PGCON_BANO} -v ON_ERROR_STOP=1" +export PG_BANO="host='$POSTGRES_HOST' port=$POSTGRES_PORT dbname='$POSTGRES_DB_BANO' user='$POSTGRES_USER' ${POSTGRES_PASSWORD:+password='$POSTGRES_PASSWORD'}" + +export PGCON_CADASTRE="$POSTGRES_USER${POSTGRES_PASSWORD:+:$POSTGRES_PASSWORD}@$POSTGRES_HOST:$POSTGRES_PORT/$POSTGRES_DB_CADASTRE" +export pgsql_CADASTRE="psql postgresql://${PGCON_CADASTRE} -v ON_ERROR_STOP=1" +export PG_CADASTRE="host='$POSTGRES_HOST' port=$POSTGRES_PORT dbname='$POSTGRES_DB_CADASTRE' user='$POSTGRES_USER' ${POSTGRES_PASSWORD:+password='$POSTGRES_PASSWORD'}" diff --git a/cron_bano.sh b/cron_bano.sh index 45e8465..3fbd457 100755 --- a/cron_bano.sh +++ b/cron_bano.sh @@ -26,8 +26,8 @@ echo 'sources ok' >> $SCRIPT_DIR/cron.log bano update_table_communes bano update_infos_communes -# psql -d osm -U cadastre -v ON_ERROR_STOP=1 -f sql/create_table_polygones_postaux.sql -# psql -d cadastre -U cadastre -v ON_ERROR_STOP=1 -f sql/post_copie_ban.sql +# $pgsql_BANO -f sql/create_table_polygones_postaux.sql +# $pgsql_CADASTRE -f sql/post_copie_ban.sql echo 'màj polygones ok' >> $SCRIPT_DIR/cron.log # BANO @@ -49,8 +49,8 @@ cat deplist.txt | parallel -j 4 bano publish {1} bano publish_aggregate # ménage PostgreSQL -psql -d bano -U cadastre -v ON_ERROR_STOP=1 -c "VACUUM bano_adresses;" -psql -d bano -U cadastre -v ON_ERROR_STOP=1 -c "VACUUM bano_points_nommes;" -psql -d bano -U cadastre -v ON_ERROR_STOP=1 -c "VACUUM nom_fantoir;" -psql -d bano -U cadastre -v ON_ERROR_STOP=1 -c "GRANT SELECT ON ALL TABLES IN SCHEMA PUBLIC TO PUBLIC"; +$pgsql_BANO -c "VACUUM bano_adresses;" +$pgsql_BANO -c "VACUUM bano_points_nommes;" +$pgsql_BANO -c "VACUUM nom_fantoir;" +$pgsql_BANO -c "GRANT SELECT ON ALL TABLES IN SCHEMA PUBLIC TO PUBLIC"; echo 'fin du cron BANO' >> $SCRIPT_DIR/cron.log diff --git a/init_base.sh b/init_base.sh index f0d51e2..2d224b0 100755 --- a/init_base.sh +++ b/init_base.sh @@ -2,4 +2,4 @@ set -e -psql -d bano -U cadastre -v ON_ERROR_STOP=1 -f bano/sql/create_base.sql +$pgsql_BANO -f bano/sql/create_base.sql diff --git a/load_osm_france_db.sh b/load_osm_france_db.sh index 97017e5..1b3815b 100755 --- a/load_osm_france_db.sh +++ b/load_osm_france_db.sh @@ -26,10 +26,10 @@ imposm import \ -cachedir $IMPOSM_CACHE_DIR \ -diff \ -write \ - -connection postgis://cadastre@localhost/bano?prefix=NONE \ + -connection postgis://$PGCON_BANO?prefix=NONE \ -dbschema-import osm -psql -d bano -U cadastre -v ON_ERROR_STOP=1 -f $SCRIPT_DIR/sql/finalisation.sql +$pgsql_BANO -f $SCRIPT_DIR/sql/finalisation.sql cp $DOWNLOAD_DIR/last.state.txt $DOWNLOAD_DIR/state.txt rm ${lockfile} diff --git a/stats.sh b/stats.sh index ed5a5a3..189b1f5 100755 --- a/stats.sh +++ b/stats.sh @@ -2,4 +2,4 @@ set -e -psql -d cadastre -v ON_ERROR_STOP=1 -f stats.sql -v dept=$1 +$pgsql_CADASTRE -f stats.sql -v dept=$1 diff --git a/utils/fantoir2topo.sh b/utils/fantoir2topo.sh index c5f20ac..f4e1df8 100755 --- a/utils/fantoir2topo.sh +++ b/utils/fantoir2topo.sh @@ -2,6 +2,6 @@ set -e -psql -d cadastre -U cadastre -v ON_ERROR_STOP=1 --csv -f fantoir2topo.sql > fantoir_202304.csv -cat fantoir_202304.csv| psql -d bano -U cadastre -v ON_ERROR_STOP=1 -c "TRUNCATE topo; COPY topo FROM STDIN WITH CSV HEADER" +$pgsql_CADASTRE --csv -f fantoir2topo.sql > fantoir_202304.csv +$pgsql_BANO -v ON_ERROR_STOP=1 -c "TRUNCATE topo; COPY topo FROM STDIN WITH CSV HEADER" rm fantoir_202304.csv diff --git a/utils/load_csv_export.sh b/utils/load_csv_export.sh index feff1a5..9e96841 100755 --- a/utils/load_csv_export.sh +++ b/utils/load_csv_export.sh @@ -2,7 +2,7 @@ set -e -psql -d cadastre -U cadastre -v ON_ERROR_STOP=1 -c " +$pgsql_CADASTRE -c " DROP TABLE IF EXISTS export_csv CASCADE; \ CREATE TABLE export_csv( id text, @@ -16,5 +16,5 @@ psql -d cadastre -U cadastre -v ON_ERROR_STOP=1 -c " for dep in `cat $BANO_DIR/deplist.txt` do - cat /data/sas_web/bano-${dep}.csv| psql -d cadastre -U cadastre -c "COPY export_csv FROM STDIN WITH CSV" + cat /data/sas_web/bano-${dep}.csv| $pgsql_CADASTRE -c "COPY export_csv FROM STDIN WITH CSV" done From 824020daf3c08f514286115f09dcfa8aff7eda4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Rodrigo?= Date: Wed, 14 Feb 2024 18:18:07 +0100 Subject: [PATCH 160/163] Does not log from script --- cron_bano.sh | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/cron_bano.sh b/cron_bano.sh index 3fbd457..ab021e4 100755 --- a/cron_bano.sh +++ b/cron_bano.sh @@ -4,10 +4,7 @@ set -e source /data/project/bano_v3/venv_v3/bin/activate -SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -cd $SCRIPT_DIR - -echo 'debut du cron BANO' >> $SCRIPT_DIR/cron.log +echo 'debut du cron BANO' source config @@ -20,7 +17,7 @@ cat deplist.txt | parallel -j 4 export LANG=$LANG\; bano charge_ban {1} cat deplist.txt | parallel -j 4 export LANG=$LANG\; bano charge_ld_cadastre {1} bano update_bis_table -echo 'sources ok' >> $SCRIPT_DIR/cron.log +echo 'sources ok' # Mise à jour quotidienne dans la base cadastre des couches des polygones postaux d'OSM et des statuts admin de communes en vue des exports bano update_table_communes @@ -28,21 +25,21 @@ bano update_infos_communes # $pgsql_BANO -f sql/create_table_polygones_postaux.sql # $pgsql_CADASTRE -f sql/post_copie_ban.sql -echo 'màj polygones ok' >> $SCRIPT_DIR/cron.log +echo 'màj polygones ok' # BANO cat deplist.txt | parallel -j 4 export LANG=$LANG\; bano rapprochement --dept {1} -echo 'rapprochement ok' >> $SCRIPT_DIR/cron.log +echo 'rapprochement ok' -echo 'preparation export' >> $SCRIPT_DIR/cron.log +echo 'preparation export' bano prepare_export -echo 'preparation export finie' >> $SCRIPT_DIR/cron.log +echo 'preparation export finie' # exports -echo 'export' >> $SCRIPT_DIR/cron.log +echo 'export' cat deplist.txt | parallel -j 4 bano export {1} -echo 'export fini' >> $SCRIPT_DIR/cron.log +echo 'export fini' # copie+zip dans le dossier web cat deplist.txt | parallel -j 4 bano publish {1} @@ -53,4 +50,4 @@ $pgsql_BANO -c "VACUUM bano_adresses;" $pgsql_BANO -c "VACUUM bano_points_nommes;" $pgsql_BANO -c "VACUUM nom_fantoir;" $pgsql_BANO -c "GRANT SELECT ON ALL TABLES IN SCHEMA PUBLIC TO PUBLIC"; -echo 'fin du cron BANO' >> $SCRIPT_DIR/cron.log +echo 'fin du cron BANO' From 747837e02d9b826c20c22e7015c8391df0270615 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Rodrigo?= Date: Sat, 3 Dec 2022 17:51:28 +0100 Subject: [PATCH 161/163] Switch from python module to install to module to run --- README.md | 23 +++++++++++++++++++++++ bano/__init__.py | 0 bano/{bin.py => __main__.py} | 4 ++++ cron_bano.sh | 23 ++++++++++++----------- requirements.txt | 6 ++++++ setup.cfg | 18 ------------------ setup.py | 2 -- 7 files changed, 45 insertions(+), 31 deletions(-) delete mode 100644 bano/__init__.py rename bano/{bin.py => __main__.py} (99%) create mode 100644 requirements.txt delete mode 100644 setup.cfg delete mode 100644 setup.py diff --git a/README.md b/README.md index 69a1577..4bf7e1f 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,15 @@ Il faut pour alimenter la base OSM locale dans laquelle puise BANO : Autres outils : parallel. +## Installation + +Mettre en place un environnement virtuel python : +``` +virtualenv venv +source venv/bin/activate +pip install -r requirements.txt +``` + ## Configuration Première étape avant de lancer les chargements de données : il faut adapter le fichier `config` à votre environnement, en déclarant différents chemins. Ce fichier est utilisé en début de plusieurs scripts pour connaître le chemin de différents répertoires. @@ -74,3 +83,17 @@ bano charge_cog ``` bano charge_ld_cadastre ``` + +### Commande `bano` + +Activer l'environnement virtuel python (si pas déjà activé) : +``` +source venv/bin/activate +``` + +Pour connaître les commandes du module bano : +``` +python -m bano --help +``` + +Quasiment toutes les options sont utilisées dans le script `cron_bano`. diff --git a/bano/__init__.py b/bano/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/bano/bin.py b/bano/__main__.py similarity index 99% rename from bano/bin.py rename to bano/__main__.py index 1cdb50a..95a7147 100644 --- a/bano/bin.py +++ b/bano/__main__.py @@ -186,3 +186,7 @@ def main(): import ipdb ipdb.set_trace() + + +if __name__ == "__main__": + main() diff --git a/cron_bano.sh b/cron_bano.sh index ab021e4..3952cd8 100755 --- a/cron_bano.sh +++ b/cron_bano.sh @@ -10,40 +10,41 @@ source config ./arborescence.sh -pip install -e . + +bano="python -m bano" # Sources -cat deplist.txt | parallel -j 4 export LANG=$LANG\; bano charge_ban {1} -cat deplist.txt | parallel -j 4 export LANG=$LANG\; bano charge_ld_cadastre {1} -bano update_bis_table +cat deplist.txt | parallel -j 4 export LANG=$LANG\; $bano charge_ban {1} +cat deplist.txt | parallel -j 4 export LANG=$LANG\; $bano charge_ld_cadastre {1} +$bano update_bis_table echo 'sources ok' # Mise à jour quotidienne dans la base cadastre des couches des polygones postaux d'OSM et des statuts admin de communes en vue des exports -bano update_table_communes -bano update_infos_communes +$bano update_table_communes +$bano update_infos_communes # $pgsql_BANO -f sql/create_table_polygones_postaux.sql # $pgsql_CADASTRE -f sql/post_copie_ban.sql echo 'màj polygones ok' # BANO -cat deplist.txt | parallel -j 4 export LANG=$LANG\; bano rapprochement --dept {1} +cat deplist.txt | parallel -j 4 export LANG=$LANG\; $bano rapprochement --dept {1} echo 'rapprochement ok' echo 'preparation export' -bano prepare_export +$bano prepare_export echo 'preparation export finie' # exports echo 'export' -cat deplist.txt | parallel -j 4 bano export {1} +cat deplist.txt | parallel -j 4 $bano export {1} echo 'export fini' # copie+zip dans le dossier web -cat deplist.txt | parallel -j 4 bano publish {1} -bano publish_aggregate +cat deplist.txt | parallel -j 4 $bano publish {1} +$bano publish_aggregate # ménage PostgreSQL $pgsql_BANO -c "VACUUM bano_adresses;" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c4b3a43 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +beautifulsoup4==4.12.3 +psycopg2-binary==2.9.9 +requests==2.31.0 +lxml +ipdb==0.13.13 +urllib3==2.2.0 diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index bc5d694..0000000 --- a/setup.cfg +++ /dev/null @@ -1,18 +0,0 @@ -[metadata] -name=bano -version=3.0.0-alpha - -[options] -packages=find: -install_requires= - beautifulsoup4==4.6.3 - psycopg2-binary - requests - lxml==4.9.1 - -[options.extras_require] -test= - pytest - -[options.entry_points] -console_scripts=bano=bano.bin:main diff --git a/setup.py b/setup.py deleted file mode 100644 index 8ab824c..0000000 --- a/setup.py +++ /dev/null @@ -1,2 +0,0 @@ -from setuptools import setup -setup() \ No newline at end of file From f54e12a259e863c69dee4f933debae21001940fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Rodrigo?= Date: Fri, 16 Feb 2024 13:45:50 +0100 Subject: [PATCH 162/163] Does not source py env from script --- cron_bano.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/cron_bano.sh b/cron_bano.sh index 3952cd8..c1eba46 100755 --- a/cron_bano.sh +++ b/cron_bano.sh @@ -2,8 +2,6 @@ set -e -source /data/project/bano_v3/venv_v3/bin/activate - echo 'debut du cron BANO' source config From 2d045ce4f8488eda1f451da62a40ecb3ac2bb534 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Rodrigo?= Date: Wed, 22 Feb 2023 22:16:10 +0100 Subject: [PATCH 163/163] Add Docker support --- .dockerignore | 2 ++ Dockerfile | 18 ++++++++++++++++++ README.md | 43 +++++++++++++++++++++++++++++++++++++++++++ docker-compose.yml | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 97 insertions(+) create mode 100644 .dockerignore create mode 100644 Dockerfile create mode 100644 docker-compose.yml diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..86fff86 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,2 @@ +data +*.osm.pbf diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..3f8de5e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,18 @@ +FROM python:3.12-bookworm + +RUN apt-get update && \ + apt-get install -y \ + gdal-bin \ + parallel \ + postgresql-client \ + python3-virtualenv + +WORKDIR /opt/imposm +RUN wget https://github.com/omniscale/imposm3/releases/download/v0.11.1/imposm-0.11.1-linux-x86-64.tar.gz && \ + tar -xvzf imposm-0.11.1-linux-x86-64.tar.gz && \ + ln -s /opt/imposm/imposm-0.11.1-linux-x86-64/imposm /usr/bin/imposm + +WORKDIR /opt/bano + +ADD requirements.txt . +RUN pip install -r requirements.txt diff --git a/README.md b/README.md index 4bf7e1f..7fd1092 100644 --- a/README.md +++ b/README.md @@ -97,3 +97,46 @@ python -m bano --help ``` Quasiment toutes les options sont utilisées dans le script `cron_bano`. + +## Docker + +### Configuration +Il ne faut pas modifier le fichier de configuration pour l'exécution avec docker. Pour changer le chemin ou sont stocké les données ajuster le volume data dans `docker-compose`. + +``` +# Créer l'espace de travail +mkdir -p data +chmod a+s data +docker-compose run --rm tools ./arborescence.sh +``` + +### Initialisation +``` +# Démarre Postgres et attend un peu avant de l'utiliser +docker-compose up -d postgres && sleep 5 +docker-compose exec -u postgres postgres psql -c "DROP schema tiger CASCADE" +docker-compose run --rm tools bash -c "source config && ./init_base.sh" +docker-compose run --rm tools bash -c "source config && python -m bano setup_db" +``` + +Si besoin de se connecter sur la base de données : +``` +docker-compose exec -u postgres postgres psql +``` + +``` +# Charger les données OSM +docker-compose run --rm tools ./load_osm_france_db.sh http://download.openstreetmap.fr/extracts/europe/france/franche_comte/territoire_de_belfort.osm.pbf + +# Charger les autres données + +docker-compose run --rm tools bash -c "source config && python -m bano charge_topo" +docker-compose run --rm tools bash -c "source config && python -m bano charge_ban" +docker-compose run --rm tools bash -c "source config && python -m bano update_bis_table" +docker-compose run --rm tools bash -c "source config && python -m bano charge_cog" +docker-compose run --rm tools bash -c "source config && python -m bano charge_ld_cadastre" +docker-compose run --rm tools bash -c "source config && python -m bano charge_cp" +### Mise à jour +``` +docker-compose run --rm tools bash -c "source config && ./cron_bano.sh" +``` diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..79aa7de --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,34 @@ +version: '3.3' + +services: + postgres: + image: postgis/postgis:16-3.4-alpine + environment: + - POSTGRES_DB=postgres + - POSTGRES_USER=postgres + - POSTGRES_PASSWORD=postgres + - POSTGRES_HOST_AUTH_METHOD=trust + ports: + - "127.0.0.1:5432:5432" + + tools: + build: . + volumes: + - .:/opt/bano + - ./data:/data + environment: + # Postgres + - POSTGRES_DB=postgres + - POSTGRES_USER=postgres + - POSTGRES_PASSWORD=postgres + - POSTGRES_HOST=postgres + - POSTGRES_PORT=5432 + # BANO + - SRC_DIR=/opt/bano + - DATA_DIR=/data/bano + - DOWNLOAD_DIR=/data/download + - IMPOSM_CACHE_DIR=/data/bano_imposm_cache + - EXPORT_SAS_DIR=/data/export_sas + - EXPORT_WEB_DIR=/data/export + - TMP_DIR=/data/tmp + - LANG=C