From f83f215a61421970eb97553d13dcd46895abdb53 Mon Sep 17 00:00:00 2001 From: grindsa Date: Wed, 2 Oct 2024 08:25:55 +0100 Subject: [PATCH] [fix] #61 - sanitize filenames --- dkb_robo/api.py | 6 +++--- dkb_robo/utilities.py | 10 ++++++++++ test/test_utilities.py | 45 ++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 56 insertions(+), 5 deletions(-) diff --git a/dkb_robo/api.py b/dkb_robo/api.py index e01d706..f13fafc 100644 --- a/dkb_robo/api.py +++ b/dkb_robo/api.py @@ -11,7 +11,7 @@ import threading from typing import Dict, List, Tuple import requests -from dkb_robo.utilities import get_dateformat +from dkb_robo.utilities import get_dateformat, get_valid_filename from dkb_robo.legacy import Wrapper as Legacywrapper @@ -707,7 +707,7 @@ def _download_document(self, path: str, document: Dict[str, str]) -> str: if response.status_code == 200: self.logger.info('Saving %s/%s...', directories[1], document['filename']) - with open(f'{directories[1]}/{document["filename"]}', 'wb') as file: + with open(f'{directories[1]}/{get_valid_filename(document["filename"])}', 'wb') as file: file.write(response.content) if not document['read']: @@ -777,7 +777,7 @@ def _process_document(self, path: str, prepend_date: bool, document: Dict[str, s documentname_list.append(document['filename']) self.logger.debug('api.Wrapper._process_document() ended\n') - return documentname_list, f'{path}/{document["document_type"]}/{document["filename"]}', rcode + return documentname_list, f'{path}/{document["document_type"]}/{get_valid_filename(document["filename"])}', rcode def _filter_postbox(self, msg_dic: Dict[str, str], pb_dic: Dict[str, str], path: bool = None, download_all: bool = False, _archive: bool = False, prepend_date: bool = None) -> Dict[str, str]: """ filter postbox """ diff --git a/dkb_robo/utilities.py b/dkb_robo/utilities.py index 7c07ada..e4c6f4b 100644 --- a/dkb_robo/utilities.py +++ b/dkb_robo/utilities.py @@ -6,6 +6,7 @@ from typing import List, Tuple from datetime import datetime, timezone import time +import re def get_dateformat(): @@ -45,6 +46,15 @@ def generate_random_string(length: int) -> str: return ''.join(random.choice(char_set) for _ in range(length)) +def get_valid_filename(name): + """ sanitize filenames """ + s = str(name).strip().replace(" ", "_") + s = re.sub(r"(?u)[^-\w.]", "_", s) + if s in {"", ".", ".."}: + s = f'{generate_random_string(8)}.pdf' + return s + + def string2float(value: str) -> float: """ convert string to float value """ try: diff --git a/test/test_utilities.py b/test/test_utilities.py index 148a8df..c8485ed 100644 --- a/test/test_utilities.py +++ b/test/test_utilities.py @@ -17,13 +17,14 @@ class TestDKBRobo(unittest.TestCase): def setUp(self): self.dir_path = os.path.dirname(os.path.realpath(__file__)) - from dkb_robo.utilities import validate_dates, generate_random_string, logger_setup, string2float, _convert_date_format, get_dateformat + from dkb_robo.utilities import validate_dates, generate_random_string, logger_setup, string2float, _convert_date_format, get_dateformat, get_valid_filename self.validate_dates = validate_dates self.string2float = string2float self.generate_random_string = generate_random_string self.logger_setup = logger_setup self._convert_date_format = _convert_date_format self.get_dateformat = get_dateformat + self.get_valid_filename = get_valid_filename self.logger = logging.getLogger('dkb_robo') @patch('time.time') @@ -218,7 +219,47 @@ def test_030__convert_date_format(self): """ test _convert_date_format() no match """ self.assertEqual('wrong date', self._convert_date_format(self.logger, 'wrong date', ['%Y/%m/%d', '%Y-%m-%d'], '%d.%m.%Y')) - + def test_039__get_valid_filename(self): + """ test get_valid_filename """ + filename = 'test.pdf' + self.assertEqual('test.pdf', self.get_valid_filename(filename)) + + def test_040__get_valid_filename(self): + """ test get_valid_filename """ + filename = 'test test.pdf' + self.assertEqual('test_test.pdf', self.get_valid_filename(filename)) + + def test_041__get_valid_filename(self): + """ test get_valid_filename """ + filename = 'testötest.pdf' + self.assertEqual('testötest.pdf', self.get_valid_filename(filename)) + + def test_042__get_valid_filename(self): + """ test get_valid_filename """ + filename = 'test/test.pdf' + self.assertEqual('test_test.pdf', self.get_valid_filename(filename)) + + def test_043_get_valid_filename(self): + """ test get_valid_filename """ + filename = 'test\\test.pdf' + self.assertEqual('test_test.pdf', self.get_valid_filename(filename)) + + def test_044_get_valid_filename(self): + """ test get_valid_filename """ + filename = '.\test.pdf' + self.assertEqual('._est.pdf', self.get_valid_filename(filename)) + + def test_045_get_valid_filename(self): + """ test get_valid_filename """ + filename = '../test.pdf' + self.assertEqual('.._test.pdf', self.get_valid_filename(filename)) + + @patch('dkb_robo.utilities.generate_random_string') + def test_046_get_valid_filename(self, mock_rand): + """ test get_valid_filename """ + filename = '..' + mock_rand.return_value = 'random' + self.assertEqual('random.pdf', self.get_valid_filename(filename)) if __name__ == '__main__':