Skip to content

Commit

Permalink
[fix] #61 - sanitize filenames
Browse files Browse the repository at this point in the history
  • Loading branch information
grindsa committed Oct 3, 2024
1 parent 93454ec commit f83f215
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 5 deletions.
6 changes: 3 additions & 3 deletions dkb_robo/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import threading
from typing import Dict, List, Tuple
import requests
from dkb_robo.utilities import get_dateformat
from dkb_robo.utilities import get_dateformat, get_valid_filename
from dkb_robo.legacy import Wrapper as Legacywrapper


Expand Down Expand Up @@ -707,7 +707,7 @@ def _download_document(self, path: str, document: Dict[str, str]) -> str:

if response.status_code == 200:
self.logger.info('Saving %s/%s...', directories[1], document['filename'])
with open(f'{directories[1]}/{document["filename"]}', 'wb') as file:
with open(f'{directories[1]}/{get_valid_filename(document["filename"])}', 'wb') as file:
file.write(response.content)

if not document['read']:
Expand Down Expand Up @@ -777,7 +777,7 @@ def _process_document(self, path: str, prepend_date: bool, document: Dict[str, s
documentname_list.append(document['filename'])

self.logger.debug('api.Wrapper._process_document() ended\n')
return documentname_list, f'{path}/{document["document_type"]}/{document["filename"]}', rcode
return documentname_list, f'{path}/{document["document_type"]}/{get_valid_filename(document["filename"])}', rcode

def _filter_postbox(self, msg_dic: Dict[str, str], pb_dic: Dict[str, str], path: bool = None, download_all: bool = False, _archive: bool = False, prepend_date: bool = None) -> Dict[str, str]:
""" filter postbox """
Expand Down
10 changes: 10 additions & 0 deletions dkb_robo/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import List, Tuple
from datetime import datetime, timezone
import time
import re


def get_dateformat():
Expand Down Expand Up @@ -45,6 +46,15 @@ def generate_random_string(length: int) -> str:
return ''.join(random.choice(char_set) for _ in range(length))


def get_valid_filename(name):
""" sanitize filenames """
s = str(name).strip().replace(" ", "_")
s = re.sub(r"(?u)[^-\w.]", "_", s)
if s in {"", ".", ".."}:
s = f'{generate_random_string(8)}.pdf'
return s


def string2float(value: str) -> float:
""" convert string to float value """
try:
Expand Down
45 changes: 43 additions & 2 deletions test/test_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@ class TestDKBRobo(unittest.TestCase):

def setUp(self):
self.dir_path = os.path.dirname(os.path.realpath(__file__))
from dkb_robo.utilities import validate_dates, generate_random_string, logger_setup, string2float, _convert_date_format, get_dateformat
from dkb_robo.utilities import validate_dates, generate_random_string, logger_setup, string2float, _convert_date_format, get_dateformat, get_valid_filename
self.validate_dates = validate_dates
self.string2float = string2float
self.generate_random_string = generate_random_string
self.logger_setup = logger_setup
self._convert_date_format = _convert_date_format
self.get_dateformat = get_dateformat
self.get_valid_filename = get_valid_filename
self.logger = logging.getLogger('dkb_robo')

@patch('time.time')
Expand Down Expand Up @@ -218,7 +219,47 @@ def test_030__convert_date_format(self):
""" test _convert_date_format() no match """
self.assertEqual('wrong date', self._convert_date_format(self.logger, 'wrong date', ['%Y/%m/%d', '%Y-%m-%d'], '%d.%m.%Y'))


def test_039__get_valid_filename(self):
""" test get_valid_filename """
filename = 'test.pdf'
self.assertEqual('test.pdf', self.get_valid_filename(filename))

def test_040__get_valid_filename(self):
""" test get_valid_filename """
filename = 'test test.pdf'
self.assertEqual('test_test.pdf', self.get_valid_filename(filename))

def test_041__get_valid_filename(self):
""" test get_valid_filename """
filename = 'testötest.pdf'
self.assertEqual('testötest.pdf', self.get_valid_filename(filename))

def test_042__get_valid_filename(self):
""" test get_valid_filename """
filename = 'test/test.pdf'
self.assertEqual('test_test.pdf', self.get_valid_filename(filename))

def test_043_get_valid_filename(self):
""" test get_valid_filename """
filename = 'test\\test.pdf'
self.assertEqual('test_test.pdf', self.get_valid_filename(filename))

def test_044_get_valid_filename(self):
""" test get_valid_filename """
filename = '.\test.pdf'
self.assertEqual('._est.pdf', self.get_valid_filename(filename))

def test_045_get_valid_filename(self):
""" test get_valid_filename """
filename = '../test.pdf'
self.assertEqual('.._test.pdf', self.get_valid_filename(filename))

@patch('dkb_robo.utilities.generate_random_string')
def test_046_get_valid_filename(self, mock_rand):
""" test get_valid_filename """
filename = '..'
mock_rand.return_value = 'random'
self.assertEqual('random.pdf', self.get_valid_filename(filename))

if __name__ == '__main__':

Expand Down

0 comments on commit f83f215

Please sign in to comment.