From b221566c0d667f19aa56b538238f18afd1eb5420 Mon Sep 17 00:00:00 2001 From: Tim Howgego Date: Tue, 26 Jan 2021 22:07:07 +0000 Subject: [PATCH] Minor coding and usability enhancements --- README.md | 26 ++++++++++++++++++++++---- atcociftogtfs/__init__.py | 2 +- atcociftogtfs/atcocif.py | 28 +++++++++++++++------------- atcociftogtfs/loader.py | 28 ++++++++++++++++++---------- tests/test_loader.py | 4 ++-- 5 files changed, 58 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index 2098f85..848611f 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,10 @@ # ATCO-CIF To GTFS +[![Latest Version](https://img.shields.io/pypi/v/atcociftogtfs.svg)](https://pypi.org/project/atcociftogtfs/) [![Test Status](https://github.com/timhowgego/atcociftogtfs/workflows/test_atcociftogtfs/badge.svg)](https://github.com/timhowgego/atcociftogtfs/actions?query=workflow%3Atest_atcociftogtfs) + Converts ATCO.CIF (ATCO-CIF) public transport schedule files to [static GTFS format](https://gtfs.org/reference/static). ATCO (Association of Transport Coordinating Officers) CIF (Common Interface File) was the United Kingdom standard for bus schedule data transfer for the first decade of the 2000s, but has since been largely replaced by [TransXchange](https://www.gov.uk/government/collections/transxchange). ATCO-CIF differs from [the CIF format used by UK railways](https://wiki.openraildata.com/index.php/CIF_File_Format). -The converter supports ATCO.CIF version 5 (the only version ever deployed) but the current implementation focuses only on the core schedule/stop information that characterises most networks: There is no support for interchange (transfers), clustering (stop parents), journey associations (blocks), or most AIM data extensions (including hail-and-ride). By default, bank (public) holiday variations are ignored, and all dates are assumed to be in school term-time - but both assumptions can be overridden if the user provides bespoke lists of dates (via command line arguments `-b` and `-s`). Stop grid coordinate conversion is included, but the (EPSG) grid must be defined (via command line argument `-e`). +The converter supports ATCO-CIF version 5 (the only version ever deployed) but the current implementation focuses only on the core schedule/stop information that characterises most networks: There is no support for interchange (transfers), clustering (stop parents), journey associations (blocks), or most AIM data extensions (including hail-and-ride). By default, bank (public) holiday variations are ignored, and all dates are assumed to be in school term-time - but both assumptions can be overridden if the user provides bespoke lists of dates (via command line arguments `-b` and `-s`). Stop grid coordinate conversion is included, but the (EPSG) grid must be defined (via command line argument `-e`). ## Install @@ -18,7 +20,7 @@ The most basic usage is from the command prompt: followed by one or more space-separated ATCO.CIF data sources (ATCO.CIF file, directory or zip file containing ATCO.CIF files, or internet URL of the same). By default, the converter will output a `gtfs.zip` to your current directory. -If you do not understand the ATCO-CIF data you are importing, initially add two switches: `-u` (which protects against common _gotchas_, such as one bus operator with two identically numbered routes in different places) and `-v` (which gives feedback on processing and data). +If you do not understand the data you are importing, initially add two switches: `-u` (which protects against common _gotchas_, such as one bus operator with two identically numbered routes in different places) and `-v` (which gives feedback on processing and data). To output comprehensive GTFS information you will need to specify `-b` (with a file listing bank holidays), `-e` (`29903` in Ireland, `27700` in Great Britain), and `-s` (with a file listing school term time periods) - all detailed below. @@ -35,15 +37,31 @@ where `source` is one or more ATCO.CIF data sources: directory, cif, url, zip (m * `-d`, `--directional_routes`: Uniquely identify inbound and outbound directions as different routes. Optional, defaults to combining inbound and outbound into the same route. * `-e [EPSG]`, `--epsg [EPSG]`: EPSG Geodetic Parameter Dataset code. For Ireland, `29903`. For Great Britain, `27700`. Optional, but GTFS stop lat and lon will be 0 if argument is omitted. * `-f [FINAL_DATE]`, `--final_date [FINAL_DATE]`: Final `yyyymmdd` date of service, to replace ATCO-CIF's indefinite last date. Optional, defaults to conversion date +1 year. -* `-r [GRID_FIGURES]`, `--grid [GRID_FIGURES]`: Number of figures in each Northing or Easting grid reference value. ATCO-CIF should holds 8-figure grid references, but may contain less. Optional, defaults to best guess. +* `-r [GRID_FIGURES]`, `--grid [GRID_FIGURES]`: Number of figures in each Northing or Easting grid reference value. ATCO-CIF should hold 8-figure grid references, but may contain less. Optional, defaults to best fit. * `-g [GTFS_FILENAME]`, `--gtfs [GTFS_FILENAME]`: Output GTFS zip filename (directory optional). Optional, defaults in `gtfs.zip`. * `-l [LOG_FILENAME]`, `--log [LOG_FILENAME]`: Append feedback to this text filename (directory optional), not the console. Optional, defaults to console. * `-m [MODE]`, `--mode [MODE]`: GTFS mode integer code. Optional, defaults to `3` (bus). * `-u`, `--unique_ids`: Force IDs for operators, routes and stops to be unique to each ATCO-CIF file processed within a multi-file batch. Safely reconciles files from different sources, but creates data redundancies within the resulting GTFS file. Optional, defaults to the identifiers used in the original ATCO-CIF files. * `-v`, `--verbose`: Verbose feedback of all progress to log or console. Optional, defaults to warnings and errors only. +* `-V`, `--version`: Prints atcociftogtfs version and exits. * `-s [SCHOOL_TERM]`, `--school_term [SCHOOL_TERM]`: Filename (directory optional) for text file containing `yyyymmdd,yyyymmdd` (startdate,enddate) school term periods, one comma-separated pair of dates per line. Optional, defaults to treating all periods as school term-time. * `-t [TIMEZONE]`, `--timezone [TIMEZONE]`: Timezone in IANA TZ format. Optional, defaults to `Europe/London`. +## Module + +The converter can also be integrated into any Python script as a module, for example: + + from atcociftogtfs.atcocif import atcocif + my_instance = atcocif() # Initialise (optional args=Namespace, as below) + my_instance.file(filename="source.cif") # Process file source.cif + my_instance.file(filename="another.cif") # And so on, until + my_instance.dump(filename="output.zip") # Finally, create GTFS + del my_instance # Cleanup temporary database + +Such an instance can be initialised with an `args` Namespace, in which values are keyed using the long-form command line argument (less its initial `--`). Alternatively such arguments may be set or changed on an existing instance, for example `my_instance.epsg=29903`. + +The instance's internal Sqlite database can be queried directly using a cursor created as `my_instance.db.cursor()`. The structure of this database mimics that of the GTFS output, except table names are filenames stripped of their `.txt` (detailed by `_gtfs_structure` in `atcocif.py`). + ## Bugs and Contributions -Error reports and code improvements/extensions [are welcome](https://github.com/timhowgego/atcociftogtfs/issues). The current code should be functional, but is far from optimal. Please attach a copy of the relevant ATCO-CIF source file to reports about unexpected errors. +Error reports and code improvements/extensions [are welcome](https://github.com/timhowgego/atcociftogtfs/issues). The current code should be functional, but is far from optimal. Please attach a copy of the relevant ATCO.CIF source file to reports about unexpected errors. diff --git a/atcociftogtfs/__init__.py b/atcociftogtfs/__init__.py index 87c97c9..a902fe2 100644 --- a/atcociftogtfs/__init__.py +++ b/atcociftogtfs/__init__.py @@ -1 +1 @@ -__version__ = "2021.1.24" +__version__ = "2021.1.26" diff --git a/atcociftogtfs/atcocif.py b/atcociftogtfs/atcocif.py index 501bbea..b1966b3 100644 --- a/atcociftogtfs/atcocif.py +++ b/atcociftogtfs/atcocif.py @@ -36,8 +36,8 @@ class atcocif: epsg = None # EPSG code (None = skip coordinate processing) file_num = 0 # Incrementing file counter final_date = None # Final yyyymmdd date of service (default via __init__) - grid_figures = None # Northing/Easting grid ref figures (None = guess) - gtfs_filename = None # GTFS output zip filename (None = fail dump) + grid = None # Northing/Easting grid ref figures (None = guess) + gtfs = None # GTFS output zip filename (None = fail dump) in_trip = False # Currently processing a trip_id last_hour = 0 # Hour of the last stop_time processed line_num = 0 # Incrementing file line counter @@ -68,7 +68,7 @@ class atcocif: _arg_vars = [ "bank_holidays", "epsg", "directional_routes", "final_date", - "grid_figures", "gtfs_filename", "mode", "unique_ids", + "grid", "gtfs", "mode", "unique_ids", "verbose", "school_term", "timezone" ] # These variables can be overwritten by arguments of the same name @@ -255,12 +255,12 @@ def date_years_hence(self, years_hence=1): def dump(self, filename=None): """Creates GTFS zip archive @param filename and writes in processed - data, @return 1 OK or 0 not.""" + data, @return 0 OK or 1 not.""" if filename is None: - if self.gtfs_filename is None: + if self.gtfs is None: return 1 - filename = self.gtfs_filename + filename = self.gtfs try: c = self.db.cursor() @@ -402,7 +402,7 @@ def file(self, filename=""): logging.getLogger(__name__).exception( "Error processing line %s of %s: %s", self.line_num, - os.path.basename(self.base_filename), + self.base_filename, e, ) return 1 @@ -445,7 +445,7 @@ def report(self, topic=None): "Check %s's stops.txt for details." ), fetched[0], - self.gtfs_filename, + self.gtfs, ) if topic is None or topic == "duplication": @@ -1377,11 +1377,13 @@ def stops(self): and "easting" in self.stop_cache[stop_id] and "northing" in self.stop_cache[stop_id] ): - if self.grid_figures is None: + if self.grid is None: # Assume accuracy of first applies to all - self.grid_figures = len( + self.grid = max(len( self.stop_cache[stop_id]["easting"].strip() - ) + ), len( + self.stop_cache[stop_id]["northing"].strip() + )) latlog = transformer.transform( self.sanitize_grid_ref( @@ -1436,7 +1438,7 @@ def stops(self): ), out_of_bounds, self.epsg, - self.grid_figures, + self.grid, ) if len(insert) > 0 or len(update) > 0: @@ -1614,7 +1616,7 @@ def sanitize_grid_ref(self, ref=""): try: return float("{}{}".format( ref.strip(), - "0" * max(8 - self.grid_figures, 0) + "0" * max(8 - self.grid, 0) )) / 100 except ValueError: diff --git a/atcociftogtfs/loader.py b/atcociftogtfs/loader.py index 4b293de..d5323ae 100644 --- a/atcociftogtfs/loader.py +++ b/atcociftogtfs/loader.py @@ -14,6 +14,7 @@ import zipfile from atcociftogtfs.atcocif import atcocif +from atcociftogtfs import __version__ def main(args=None): @@ -29,9 +30,9 @@ def main(args=None): else: logging_level = logging.WARNING - if hasattr(args, "log_filename") and args.log_filename is not None: + if hasattr(args, "log") and args.log is not None: logging.basicConfig( - filename=args.log_filename, + filename=args.log, level=logging_level, format="%(asctime)s:%(levelname)s:%(message)s", ) @@ -52,24 +53,24 @@ def main(args=None): if hasattr(args, "verbose") and args.verbose: processor.report(topic=None) - if not hasattr(args, "gtfs_filename"): + if not hasattr(args, "gtfs"): logging.error("No output file specified.") return 1 - status = processor.dump(filename=args.gtfs_filename) + status = processor.dump(filename=args.gtfs) if status == 0: if processor.file_num > 1: logging.info( "Completed %s from %s ATCO-CIF files. Finished in %ss.", - args.gtfs_filename, + args.gtfs, processor.file_num, round(time.time() - start_time), ) else: logging.info( "Completed %s. Finished in %ss.", - args.gtfs_filename, + args.gtfs, round(time.time() - start_time), ) @@ -86,6 +87,7 @@ def arguments(): description="Converts ATCO.CIF files into GTFS format.", prog="atcociftogtfs", ) + parser.version = __version__ parser.add_argument( "source", @@ -135,10 +137,10 @@ def arguments(): "-r", "--grid", nargs="?", - dest="grid_figures", + dest="grid", type=int, help="""Number of figures in each Northing or Easting grid reference - value. ATCO-CIF should holds 8 figure grid references, but may + value. ATCO-CIF should hold 8 figure grid references, but may contain less. Optional, defaults to best fit.""", ) parser.add_argument( @@ -146,7 +148,7 @@ def arguments(): "--gtfs", nargs="?", default="gtfs.zip", - dest="gtfs_filename", + dest="gtfs", help="""Output GTFS zip filename (directory optional). Optional, defaults in gtfs.zip.""", ) @@ -154,7 +156,7 @@ def arguments(): "-l", "--log", nargs="?", - dest="log_filename", + dest="log", help="""Append feedback to this text filename (directory optional), not the console. Optional, defaults to console.""", ) @@ -186,6 +188,12 @@ def arguments(): help="""Verbose feedback of all progress to log or console. Optional, defaults to warnings and errors only.""", ) + parser.add_argument( + "-V", + "--version", + action="version", + help="""Prints atcociftogtfs version and exits.""", + ) parser.add_argument( "-s", "--school_term", diff --git a/tests/test_loader.py b/tests/test_loader.py index b25824d..dc39ee6 100644 --- a/tests/test_loader.py +++ b/tests/test_loader.py @@ -17,8 +17,8 @@ def test_main(self): with tempfile.NamedTemporaryFile(delete=False) as gtfs: with tempfile.NamedTemporaryFile(delete=False) as log: args = types.SimpleNamespace( - gtfs_filename=gtfs.name, - log_filename=log.name, + gtfs=gtfs.name, + log=log.name, verbose=True, source=[source.name], ) # Log to silently tests verbose