Skip to content

Commit

Permalink
cache: configurable backends
Browse files Browse the repository at this point in the history
* Adds support for any Werkzeug (like) cache backend.  (closes mementoweb#15)

* Adds new configuration option `CACHE_BACKEND`. All options in
  INI file `[cache]` section are passed to the cache constructor.

Signed-off-by: Jiri Kuncar <[email protected]>
  • Loading branch information
jirikuncar committed May 17, 2016
1 parent 6542f35 commit 1dd75e2
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 119 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ script:

after_success:
- coveralls

branches:
only:
- master
6 changes: 4 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ def app(tmpdir):
return application.TimeGate(config=dict(
HOST='http://localhost',
BASE_URI='http://www.example.com/',
CACHE_USE=True,
CACHE_FILE=tmpdir.mkdir('cache').strpath,
CACHE_BACKEND='werkzeug.contrib.cache:FileSystemCache',
CACHE_OPTIONS={
'cache_dir': tmpdir.mkdir('cache').strpath,
},
))


Expand Down
10 changes: 5 additions & 5 deletions timegate/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def __init__(self, config=None, cache=None):
self.config.update(config or {})
if cache:
self.cache = cache
elif self.config['CACHE_USE']:
else:
self._build_default_cache()

@cached_property
Expand Down Expand Up @@ -143,10 +143,10 @@ def url_map(self):
def _build_default_cache(self):
"""Build default cache object."""
self.cache = Cache(
self.config['CACHE_FILE'],
self.config['CACHE_TOLERANCE'],
self.config['CACHE_EXP'],
self.config['CACHE_MAX_VALUES'],
self.config.get('CACHE_BACKEND',
'werkzeug.contrib.cache.NullCache'),
cache_refresh_time=self.config.get('CACHE_REFRESH_TIME', 86400),
**self.config.get('CACHE_OPTIONS', {})
)

def __repr__(self):
Expand Down
115 changes: 22 additions & 93 deletions timegate/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,65 +14,32 @@

import logging
import os
import sys
from datetime import datetime

from dateutil.relativedelta import relativedelta
from dateutil.tz import tzutc
from werkzeug.contrib.cache import FileSystemCache, md5

from . import utils as timegate_utils
from .errors import CacheError
from werkzeug.utils import import_string


class Cache(object):
"""Base class for TimeGate caches."""

def __init__(self, path, tolerance, expiration, max_values,
run_tests=True, max_file_size=0):
def __init__(self, cache_backend, cache_refresh_time=86400,
max_file_size=0, **kwargs):
"""Constructor method.
:param path: The path of the cache database file.
:param tolerance: The tolerance, in seconds to which a TimeMap is
considered young enough to be used as is.
:param expiration: How long, in seconds, the cache entries are stored
every get will be a CACHE MISS.
:param max_values: The maximum number of TimeMaps stored in cache
before some are deleted
:param run_tests: (Optional) Tests the cache at initialization.
:param cache_backend: Importable string pointing to cache class.
:param max_file_size: (Optional) The maximum size (in Bytes) for a
TimeMap cache value. When max_file_size=0, there is no limit to
a cache value. When max_file_size=X > 0, the cache will not
store TimeMap that require more than X Bytes on disk.
"""
# Parameters Check
if tolerance <= 0 or expiration <= 0 or max_values <= 0:
raise CacheError('Cannot create cache: all parameters must be > 0')

self.tolerance = relativedelta(seconds=tolerance)
self.path = path.rstrip('/')
self.tolerance = relativedelta(seconds=cache_refresh_time)
self.max_file_size = max(max_file_size, 0)
self.CHECK_SIZE = self.max_file_size > 0
self.max_values = max_values
self.backend = FileSystemCache(path,
threshold=self.max_values,
default_timeout=expiration)

# Testing cache
if run_tests:
try:
key = b'1'
val = 1
self.backend.set(key, val)
assert (not self.CHECK_SIZE) or self._check_size(key) > 0
assert self.backend.get(key) == val
os.remove(os.path.join(self.path, md5(key).hexdigest()))
except Exception as e:
raise CacheError('Error testing cache: %s' % e)

logging.debug(
'Cache created. max_files = %d. Expiration = %d. '
'max_file_size = %d' % (
self.max_values, expiration, self.max_file_size))
self.backend = import_string(cache_backend)(**kwargs)

def get_until(self, uri_r, date):
"""Returns the TimeMap (memento,datetime)-list for the requested
Expand All @@ -88,28 +55,11 @@ def get_until(self, uri_r, date):
None otherwise.
"""
# Query the backend for stored cache values to that memento
key = uri_r
try:
val = self.backend.get(key)
except Exception as e:
logging.error('Exception loading cache content: %s' % e)
return None

if val:
# There is a value in the cache
val = self.backend.get(uri_r)
if val: # There is a value in the cache
timestamp, timemap = val
logging.info('Cached value exists for %s' % uri_r)
if date > timestamp + self.tolerance:
logging.info('Cache MISS: value outdated for %s' % uri_r)
timemap = None
else:
logging.info('Cache HIT: found value for %s' % uri_r)
else:
# Cache MISS: No value
logging.info('Cache MISS: No cached value for %s' % uri_r)
timemap = None

return timemap
if date <= timestamp + self.tolerance:
return timemap

def get_all(self, uri_r):
"""Request the whole TimeMap for that uri.
Expand All @@ -130,42 +80,21 @@ def set(self, uri_r, timemap):
:param timemap: The value to cache.
:return: The backend setter method return value.
"""
logging.info('Updating cache for %s' % uri_r)
timestamp = datetime.utcnow().replace(tzinfo=tzutc())
val = (timestamp, timemap)
key = uri_r
try:
self.backend.set(key, val)
if self.CHECK_SIZE:
self._check_size(uri_r)
except Exception as e:
logging.error('Error setting cache value: %s' % e)
if self._check_size(val):
self.backend.set(uri_r, val)

def _check_size(self, key, delete=True):
"""Check the size that a specific TimeMap value is using on disk.
def _check_size(self, val):
"""Check the size that a specific TimeMap value is using in memory.
It deletes if it is more than the maximum size.
:param key: The TimeMap original resource.
:param delete: (Optional) When true, the value is deleted.
Else only a warning is raised.
:return: The size of the value on disk (0 if it was deleted).
:param val: The cached object.
:return: The True if it can be stored.
"""
try:
fname = md5(key).hexdigest() # werkzeug key
fpath = self.path + '/' + fname
size = os.path.getsize(fpath)
if size > self.max_file_size and delete:
message = ('Cache value too big (%dB, max %dB) '
'for the TimeMap of %s')
if delete:
message += '. Deleting cached value.'
os.remove(fpath)
size = 0
logging.warning(message % (size, self.max_file_size, key))
return size
except Exception as e:
logging.error(
'Exception checking cache value size for TimeMap of %s '
'Exception: %s' % (key, e))
return 0
if self.CHECK_SIZE:
size = sys.getsizeof(val)
if size > self.max_file_size:
return False
return True
14 changes: 6 additions & 8 deletions timegate/conf/config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,9 @@ base_uri = http://www.example.com/

[cache]

# cache_activated
# When true, the cache stores TimeMaps from API that allows batch (get_all_mementos) requests, except for requests with `Cache-Control: no-cache` header, which will always return fresh Mementos.
# When false, no cache file will be created
# Default true
cache_activated = false
# cache_backend
# For disabling cache use werkzeug.contrib.cache.NullCache
cache_backend = werkzeug.contrib.cache:FileSystemCache

# cache_refresh_time
# Time in seconds, for which it is assumed that a TimeMap didn't change. Any TimeGate request for a datetime past this period (or any TimeMap request past this period) will trigger a refresh of the cached value.
Expand All @@ -55,10 +53,10 @@ cache_refresh_time = 86400
# cache_directory
# Cache directory relative path for data files. Make sure that this directory is empty or else the cache will start deleting random files.
# Default cache/
cache_directory = cache
cache_dir = cache

# cache_max_values
# threshold
# Maximum number of stored TimeMaps in the cache.
# Tweak this depending on how big your TimeMaps can become (number of elements and length of URIs)
# Default 250
cache_max_values = 250
threshold = 250
32 changes: 22 additions & 10 deletions timegate/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,18 +56,30 @@ def from_inifile(self, filename, silent=True):
self['USE_TIMEMAPS'] = False

# Cache
# When False, all cache requests will be cache MISS
self['CACHE_USE'] = conf.getboolean('cache', 'cache_activated')
self['CACHE_BACKEND'] = conf.get('cache', 'cache_backend')
# Time window in which the cache value is considered young
# enough to be valid
self['CACHE_TOLERANCE'] = conf.getint('cache', 'cache_refresh_time')
# Cache files paths
self['CACHE_DIRECTORY'] = conf.get(
'cache', 'cache_directory').rstrip('/')
# Maximum number of TimeMaps stored in cache
self['CACHE_MAX_VALUES'] = conf.getint('cache', 'cache_max_values')
# Cache files paths
self['CACHE_FILE'] = self['CACHE_DIRECTORY'] # + '/cache_data'
self['CACHE_REFRESH_TIME'] = conf.getint('cache', 'cache_refresh_time')

options = {
'cache_backend': None,
'cache_refresh_time': None,
'default_timeout': 'getint',
'mode': 'getint',
'port': 'getint',
'threshold': 'getint',
}
self.setdefault('CACHE_OPTIONS', {})

for key in conf.options('cache'):
if key in options:
getter = options[key]
if getter:
self['CACHE_OPTIONS'][key] = getattr(conf, getter)(
'cache', key
)
else:
self['CACHE_OPTIONS'][key] = conf.get('cache', key)

def from_object(self, obj):
"""Update config with values from given object.
Expand Down

0 comments on commit 1dd75e2

Please sign in to comment.