-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
191 lines (147 loc) · 5.52 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
import pdfplumber
from ics import Calendar, Event
from datetime import datetime
import pytz
import pathlib
import argparse
def get_arguments() -> str:
"""
Initializes the argparser and returns input and output filepaths
"""
parser = argparse.ArgumentParser(
description="Export ics calendar event from intercity ticket in pdf file format.")
parser.add_argument('-i',
'--input',
type=str,
metavar="ticket_filepath",
help="Path to the ticket pdf file (eg., ~/Desktop/eic_151993996.pdf")
parser.add_argument('-o',
'--output',
metavar="exported_event_filepath",
type=str,
help="Path where ics calendar event file should be saved.")
args = parser.parse_args()
return args.input, args.output
def __validate_path__(filepath: str, is_input: bool = True) -> bool:
"""
Checks if string is a correct absolute path, returns True if yes and False if not
"""
try:
pathlib.Path(filepath).resolve()
if not is_input:
return True
try:
open(filepath, 'r')
return True
except OSError:
return False
except Exception:
return False
def validate_arguments(input_arg: str, output_arg) -> bool:
"""
Validates script arguments and returns True if requirements are met
"""
# Check if input arg is a valid string path
input_arg_is_valid = type(
input_arg) is str and __validate_path__(input_arg)
# Early return if there's no output arg
if output_arg is None:
return input_arg_is_valid
# Check if output arg is a valid string path
output_arg_is_valid = type(output_arg) is str and __validate_path__(
output_arg, is_input=False)
# Return if both input and output args are valid
return input_arg_is_valid and output_arg_is_valid
def extract_data(filepath: str) -> dict:
"""
Extracts data from pdf file and returns a dictionary
"""
def create_dict_from_table(table):
departure_hour = table[1].split('-')[0].strip()
arrival_hour = table[1].split('-')[1].strip()
date = table[2]
carrier = table[3].replace('\n', ' ')
train = table[4].replace('\n', ' ')
wagon = table[6]
seats = table[7]
return {
'departure_hour': departure_hour,
'arrival_hour': arrival_hour,
'date': date,
'carrier': carrier,
'train': train,
'wagon': wagon,
'seats': seats
}
table: dict
with pdfplumber.open(filepath) as pdf:
first_page = pdf.pages[0]
origin = first_page.extract_text() \
.split('\n')[1] \
.split('\n')[0] \
.split(' ')[0] \
.strip()
destination = first_page.extract_text() \
.split('\n')[1] \
.split('\n')[0] \
.split(' ')[1] \
.strip()
table = create_dict_from_table(first_page.extract_table()[1])
return {'origin': origin, 'destination': destination, 'info': table}
def localize_datetime(time: datetime, date: datetime) -> datetime:
local_timezone = pytz.timezone('Europe/Warsaw')
formatted_datetime = datetime.strptime(
f"{date} {time}", "%d.%m.%Y %H:%M")
return local_timezone.localize(formatted_datetime)
def create_calendar(data: dict):
"""
Creates Calendar with new Event object
"""
calendar = Calendar()
event = Event()
# eg. Kraków > Wrocław
event.name = f"{data['origin']} > {data['destination']}"
event_start_datetime = data['info']['departure_hour']
event_end_datetime = data['info']['arrival_hour']
event_date = data['info']['date']
event.begin = localize_datetime(event_start_datetime, event_date)
event.end = localize_datetime(event_end_datetime, event_date)
event.description = f"Pociąg {data['info']['carrier']} {data['info']['train']}.\nWagon {data['info']['wagon']}, miejsce {data['info']['seats']}"
calendar.events.add(event)
return calendar
def get_original_filename(path: str) -> str:
"""
Returns filename from path string
"""
return path.split('/')[-1].split('.')[0]
# TODO: validation
def save_calendar_to_file(calendar: Calendar, output_path: str):
"""
Writes calendar event data to .ics file
"""
try:
with open(output_path, 'w') as file:
file.write(calendar.serialize())
except IOError:
print("Exception while writing to file.")
def start():
input_path, output_path = get_arguments()
# If there's no output path argument specified, use the input one with corresponding extension
if output_path is None:
output_path = input_path.replace(".pdf", ".ics")
# Abort exectution if there's no input argument
if input_path is None:
print(
"In order to process pdf ticket file, please add it using -i (or --input) [path/to/file.pdf] argument.")
elif validate_arguments(input_path, output_path):
extracted_data = extract_data(input_path)
calendar = create_calendar(extracted_data)
save_calendar_to_file(calendar, output_path)
green_ansi = "\033[32m"
reset_ansi = f"\033[0m"
print(
f"{green_ansi}Calendar event successfully created: {output_path}{reset_ansi}")
else:
print("Incorrect arguments were passed.")
if __name__ == "__main__":
start()