diff --git a/.travis.yml b/.travis.yml index b5ac7c9..d980530 100644 --- a/.travis.yml +++ b/.travis.yml @@ -45,7 +45,7 @@ script: after_success: - coveralls - + branches: only: - master diff --git a/tests/conftest.py b/tests/conftest.py index 4674207..57d05be 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -23,8 +23,10 @@ def app(tmpdir): return application.TimeGate(config=dict( HOST='http://localhost', BASE_URI='http://www.example.com/', - CACHE_USE=True, - CACHE_FILE=tmpdir.mkdir('cache').strpath, + CACHE_BACKEND='werkzeug.contrib.cache:FileSystemCache', + CACHE_OPTIONS={ + 'cache_dir': tmpdir.mkdir('cache').strpath, + }, )) diff --git a/timegate/application.py b/timegate/application.py index 8cd2882..9f6cc9d 100644 --- a/timegate/application.py +++ b/timegate/application.py @@ -108,7 +108,7 @@ def __init__(self, config=None, cache=None): self.config.update(config or {}) if cache: self.cache = cache - elif self.config['CACHE_USE']: + else: self._build_default_cache() @cached_property @@ -143,10 +143,10 @@ def url_map(self): def _build_default_cache(self): """Build default cache object.""" self.cache = Cache( - self.config['CACHE_FILE'], - self.config['CACHE_TOLERANCE'], - self.config['CACHE_EXP'], - self.config['CACHE_MAX_VALUES'], + self.config.get('CACHE_BACKEND', + 'werkzeug.contrib.cache.NullCache'), + cache_refresh_time=self.config.get('CACHE_REFRESH_TIME', 86400), + **self.config.get('CACHE_OPTIONS', {}) ) def __repr__(self): diff --git a/timegate/cache.py b/timegate/cache.py index 3d9ea05..cb234a7 100644 --- a/timegate/cache.py +++ b/timegate/cache.py @@ -14,65 +14,32 @@ import logging import os +import sys from datetime import datetime from dateutil.relativedelta import relativedelta from dateutil.tz import tzutc from werkzeug.contrib.cache import FileSystemCache, md5 - -from . import utils as timegate_utils -from .errors import CacheError +from werkzeug.utils import import_string class Cache(object): """Base class for TimeGate caches.""" - def __init__(self, path, tolerance, expiration, max_values, - run_tests=True, max_file_size=0): + def __init__(self, cache_backend, cache_refresh_time=86400, + max_file_size=0, **kwargs): """Constructor method. - :param path: The path of the cache database file. - :param tolerance: The tolerance, in seconds to which a TimeMap is - considered young enough to be used as is. - :param expiration: How long, in seconds, the cache entries are stored - every get will be a CACHE MISS. - :param max_values: The maximum number of TimeMaps stored in cache - before some are deleted - :param run_tests: (Optional) Tests the cache at initialization. + :param cache_backend: Importable string pointing to cache class. :param max_file_size: (Optional) The maximum size (in Bytes) for a TimeMap cache value. When max_file_size=0, there is no limit to a cache value. When max_file_size=X > 0, the cache will not store TimeMap that require more than X Bytes on disk. """ - # Parameters Check - if tolerance <= 0 or expiration <= 0 or max_values <= 0: - raise CacheError('Cannot create cache: all parameters must be > 0') - - self.tolerance = relativedelta(seconds=tolerance) - self.path = path.rstrip('/') + self.tolerance = relativedelta(seconds=cache_refresh_time) self.max_file_size = max(max_file_size, 0) self.CHECK_SIZE = self.max_file_size > 0 - self.max_values = max_values - self.backend = FileSystemCache(path, - threshold=self.max_values, - default_timeout=expiration) - - # Testing cache - if run_tests: - try: - key = b'1' - val = 1 - self.backend.set(key, val) - assert (not self.CHECK_SIZE) or self._check_size(key) > 0 - assert self.backend.get(key) == val - os.remove(os.path.join(self.path, md5(key).hexdigest())) - except Exception as e: - raise CacheError('Error testing cache: %s' % e) - - logging.debug( - 'Cache created. max_files = %d. Expiration = %d. ' - 'max_file_size = %d' % ( - self.max_values, expiration, self.max_file_size)) + self.backend = import_string(cache_backend)(**kwargs) def get_until(self, uri_r, date): """Returns the TimeMap (memento,datetime)-list for the requested @@ -88,28 +55,11 @@ def get_until(self, uri_r, date): None otherwise. """ # Query the backend for stored cache values to that memento - key = uri_r - try: - val = self.backend.get(key) - except Exception as e: - logging.error('Exception loading cache content: %s' % e) - return None - - if val: - # There is a value in the cache + val = self.backend.get(uri_r) + if val: # There is a value in the cache timestamp, timemap = val - logging.info('Cached value exists for %s' % uri_r) - if date > timestamp + self.tolerance: - logging.info('Cache MISS: value outdated for %s' % uri_r) - timemap = None - else: - logging.info('Cache HIT: found value for %s' % uri_r) - else: - # Cache MISS: No value - logging.info('Cache MISS: No cached value for %s' % uri_r) - timemap = None - - return timemap + if date <= timestamp + self.tolerance: + return timemap def get_all(self, uri_r): """Request the whole TimeMap for that uri. @@ -130,42 +80,21 @@ def set(self, uri_r, timemap): :param timemap: The value to cache. :return: The backend setter method return value. """ - logging.info('Updating cache for %s' % uri_r) timestamp = datetime.utcnow().replace(tzinfo=tzutc()) val = (timestamp, timemap) - key = uri_r - try: - self.backend.set(key, val) - if self.CHECK_SIZE: - self._check_size(uri_r) - except Exception as e: - logging.error('Error setting cache value: %s' % e) + if self._check_size(val): + self.backend.set(uri_r, val) - def _check_size(self, key, delete=True): - """Check the size that a specific TimeMap value is using on disk. + def _check_size(self, val): + """Check the size that a specific TimeMap value is using in memory. It deletes if it is more than the maximum size. - :param key: The TimeMap original resource. - :param delete: (Optional) When true, the value is deleted. - Else only a warning is raised. - :return: The size of the value on disk (0 if it was deleted). + :param val: The cached object. + :return: The True if it can be stored. """ - try: - fname = md5(key).hexdigest() # werkzeug key - fpath = self.path + '/' + fname - size = os.path.getsize(fpath) - if size > self.max_file_size and delete: - message = ('Cache value too big (%dB, max %dB) ' - 'for the TimeMap of %s') - if delete: - message += '. Deleting cached value.' - os.remove(fpath) - size = 0 - logging.warning(message % (size, self.max_file_size, key)) - return size - except Exception as e: - logging.error( - 'Exception checking cache value size for TimeMap of %s ' - 'Exception: %s' % (key, e)) - return 0 + if self.CHECK_SIZE: + size = sys.getsizeof(val) + if size > self.max_file_size: + return False + return True diff --git a/timegate/conf/config.ini b/timegate/conf/config.ini index c71c4d9..ceaf171 100644 --- a/timegate/conf/config.ini +++ b/timegate/conf/config.ini @@ -41,11 +41,9 @@ base_uri = http://www.example.com/ [cache] -# cache_activated -# When true, the cache stores TimeMaps from API that allows batch (get_all_mementos) requests, except for requests with `Cache-Control: no-cache` header, which will always return fresh Mementos. -# When false, no cache file will be created -# Default true -cache_activated = false +# cache_backend +# For disabling cache use werkzeug.contrib.cache.NullCache +cache_backend = werkzeug.contrib.cache:FileSystemCache # cache_refresh_time # Time in seconds, for which it is assumed that a TimeMap didn't change. Any TimeGate request for a datetime past this period (or any TimeMap request past this period) will trigger a refresh of the cached value. @@ -55,10 +53,10 @@ cache_refresh_time = 86400 # cache_directory # Cache directory relative path for data files. Make sure that this directory is empty or else the cache will start deleting random files. # Default cache/ -cache_directory = cache +cache_dir = cache -# cache_max_values +# threshold # Maximum number of stored TimeMaps in the cache. # Tweak this depending on how big your TimeMaps can become (number of elements and length of URIs) # Default 250 -cache_max_values = 250 +threshold = 250 diff --git a/timegate/config.py b/timegate/config.py index 8a051b3..59db557 100644 --- a/timegate/config.py +++ b/timegate/config.py @@ -56,18 +56,30 @@ def from_inifile(self, filename, silent=True): self['USE_TIMEMAPS'] = False # Cache - # When False, all cache requests will be cache MISS - self['CACHE_USE'] = conf.getboolean('cache', 'cache_activated') + self['CACHE_BACKEND'] = conf.get('cache', 'cache_backend') # Time window in which the cache value is considered young # enough to be valid - self['CACHE_TOLERANCE'] = conf.getint('cache', 'cache_refresh_time') - # Cache files paths - self['CACHE_DIRECTORY'] = conf.get( - 'cache', 'cache_directory').rstrip('/') - # Maximum number of TimeMaps stored in cache - self['CACHE_MAX_VALUES'] = conf.getint('cache', 'cache_max_values') - # Cache files paths - self['CACHE_FILE'] = self['CACHE_DIRECTORY'] # + '/cache_data' + self['CACHE_REFRESH_TIME'] = conf.getint('cache', 'cache_refresh_time') + + options = { + 'cache_backend': None, + 'cache_refresh_time': None, + 'default_timeout': 'getint', + 'mode': 'getint', + 'port': 'getint', + 'threshold': 'getint', + } + self.setdefault('CACHE_OPTIONS', {}) + + for key in conf.options('cache'): + if key in options: + getter = options[key] + if getter: + self['CACHE_OPTIONS'][key] = getattr(conf, getter)( + 'cache', key + ) + else: + self['CACHE_OPTIONS'][key] = conf.get('cache', key) def from_object(self, obj): """Update config with values from given object.