Mercurial > moin > 1.9
changeset 3882:c8ffd029ab1f
action cache (and tests), backported from 1.8
author | Thomas Waldmann <tw AT waldmann-edv DOT de> |
---|---|
date | Sat, 19 Jul 2008 16:11:19 +0200 |
parents | 85cd05b8af42 |
children | 9e40b4ecf68f 085328cb4f4d |
files | MoinMoin/action/_tests/test_cache.py MoinMoin/action/cache.py docs/CHANGES |
diffstat | 3 files changed, 436 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MoinMoin/action/_tests/test_cache.py Sat Jul 19 16:11:19 2008 +0200 @@ -0,0 +1,184 @@ +# -*- coding: iso-8859-1 -*- +""" + MoinMoin - tests of cache action functions + + @copyright: 2008 MoinMoin:ThomasWaldmann + @license: GNU GPL, see COPYING for details. +""" + +import os, StringIO + +from MoinMoin import caching +from MoinMoin.action import AttachFile, cache + +from MoinMoin._tests import become_trusted, create_page, nuke_page + +class TestSendCached: + """ testing action cache """ + pagename = u"AutoCreatedSillyPageToTestAttachments" + + def test_cache_key_content(self): + request = self.request + result1 = cache.key(request, content='foo', secret='bar') + result2 = cache.key(request, content='foo', secret='baz') + assert result1 # not empty + assert result1 != result2 # different for different secret + result3 = cache.key(request, content='foofoo', secret='baz') + assert result3 != result2 # different for different content + result4 = cache.key(request, content='foo'*1000, secret='baz') + assert len(result4) == len(result3) # same length of key for different input lengths + + def test_cache_key_attachment(self): + request = self.request + pagename = self.pagename + attachname = 'foo.txt' + + become_trusted(request) + create_page(request, pagename, u"Foo!") + + AttachFile.add_attachment(request, pagename, attachname, "Test content1", True) + + result1 = cache.key(request, itemname=pagename, attachname=attachname, secret='bar') + result2 = cache.key(request, itemname=pagename, attachname=attachname, secret='baz') + assert result1 # not empty + assert result1 != result2 # different for different secret + + # test below does not work, because mtime is often same, inode can be same due to how add_attachment + # works, file size is same, attachment name is same, wikiname/pagename is same. + # In practice, this should rather rarely cause problems: + #AttachFile.add_attachment(request, pagename, attachname, "Test content2", True) + #result3 = cache.key(request, itemname=pagename, attachname=attachname, secret='baz') + #assert result3 != result2 # different for different content + + AttachFile.add_attachment(request, pagename, attachname, "Test content33333", True) + result4 = cache.key(request, itemname=pagename, attachname=attachname, secret='baz') + assert len(result4) == len(result2) # same length of key for different input lengths + nuke_page(request, pagename) + + def test_put_cache_minimal(self): + """Test if put_cache() works""" + request = self.request + key = 'nooneknowsit' + data = "dontcare" + cache.put(request, key, data) + url = cache.url(request, key) + + assert key in url + meta_cache = caching.CacheEntry(request, + arena=cache.cache_arena, + scope=cache.cache_scope, + key=key+'.meta', use_pickle=True) + last_modified, headers = meta_cache.content() + assert last_modified.endswith(' GMT') # only a very rough check, it has used cache mtime as last_modified + assert "Content-Type: application/octet-stream" in headers + assert "Content-Length: %d" % len(data) in headers + + def test_put_cache_guess_ct_give_lm(self): + """Test if put_cache() works, when we give filename (so it guesses content_type) and last_modified""" + request = self.request + key = 'nooneknowsit' + filename = "test.png" + data = "dontcare" + cache.put(request, key, data, filename=filename, last_modified=1) + url = cache.url(request, key) + assert key in url + + meta_cache = caching.CacheEntry(request, + arena=cache.cache_arena, + scope=cache.cache_scope, + key=key+'.meta', use_pickle=True) + last_modified, headers = meta_cache.content() + assert last_modified == 'Thu, 01 Jan 1970 00:00:01 GMT' + assert "Content-Type: image/png" in headers + assert "Content-Length: %d" % len(data) in headers + + def test_put_cache_file_like_data(self): + """Test if put_cache() works when we give it a file like object for the content""" + request = self.request + key = 'nooneknowsit' + filename = "test.png" + data = "dontcareatall" + data_file = StringIO.StringIO(data) + cache.put(request, key, data_file) + url = cache.url(request, key) + + assert key in url + meta_cache = caching.CacheEntry(request, + arena=cache.cache_arena, + scope=cache.cache_scope, + key=key+'.meta', use_pickle=True) + last_modified, headers = meta_cache.content() + assert last_modified.endswith(' GMT') # only a very rough check, it has used cache mtime as last_modified + assert "Content-Type: application/octet-stream" in headers + assert "Content-Length: %d" % len(data) in headers + + data_cache = caching.CacheEntry(request, + arena=cache.cache_arena, + scope=cache.cache_scope, + key=key+'.data') + cached = data_cache.content() + assert data == cached + + def test_put_cache_complex(self): + """Test if put_cache() works for a more complex, practical scenario: + + As 'source' we just use some random integer as count value. + + The 'rendered representation' of it is just the word "spam" repeated + count times, which we cache. + + The cache key calculation (for the 'non-guessable' keys) is also + rather simple. + + In real world, source would be likely some big image, rendered + representation of it a thumbnail / preview of it. Or some LaTeX + source and its rendered representation as png image. + Key calculation could be some MAC or some other hard to guess and + unique string. + """ + import random + request = self.request + render = lambda data: "spam" * data + secret = 4223 + keycalc = lambda data: str(data * secret) + + source = random.randint(1, 100) + rendered1 = render(source) + key1 = keycalc(source) + + cache.put(request, key1, rendered1) + url1 = cache.url(request, key1) + assert 'key=%s' % key1 in url1 + + data_cache = caching.CacheEntry(request, + arena=cache.cache_arena, + scope=cache.cache_scope, + key=key1+'.data') + cached1 = data_cache.content() + + assert render(source) == cached1 + # if that succeeds, we have stored the rendered representation of source in the cache under key1 + + # now we use some different source, render it and store it in the cache + source = source * 2 + rendered2 = render(source) + key2 = keycalc(source) + + cache.put(request, key2, rendered2) + url2 = cache.url(request, key2) + assert 'key=%s' % key2 in url2 + + data_cache = caching.CacheEntry(request, + arena=cache.cache_arena, + scope=cache.cache_scope, + key=key2+'.data') + cached2 = data_cache.content() + + assert render(source) == cached2 + # if that succeeds, we have stored the rendered representation of updated source in the cache under key2 + + assert url2 != url1 # URLs must be different for different source (implies different keys) + + +coverage_modules = ['MoinMoin.action.cache'] +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MoinMoin/action/cache.py Sat Jul 19 16:11:19 2008 +0200 @@ -0,0 +1,240 @@ +# -*- coding: iso-8859-1 -*- +""" + MoinMoin - Send a raw object from the caching system (and offer utility + functions to put data into cache, calculate cache key, etc.). + + Sample usage + ------------ + Assume we have a big picture (bigpic) and we want to efficiently show some + thumbnail (thumbpic) for it: + + # first calculate a (hard to guess) cache key (this key will change if the + # original data (bigpic) changes): + key = cache.key(..., attachname=bigpic, ...) + + # check if we don't have it in cache yet + if not cache.exists(..., key): + # if we don't have it in cache, we need to render it - this is an + # expensive operation that we want to avoid by caching: + thumbpic = render_thumb(bigpic) + # put expensive operation's results into cache: + cache.put(..., key, thumbpic, ...) + + url = cache.url(..., key) + html = '<img src="%s">' % url + + @copyright: 2008 MoinMoin:ThomasWaldmann + @license: GNU GPL, see COPYING for details. +""" + +import hmac, sha + +from MoinMoin import log +logging = log.getLogger(__name__) + +# keep both imports below as they are, order is important: +from MoinMoin import wikiutil +import mimetypes + +from MoinMoin import config, caching +from MoinMoin.util import filesys +from MoinMoin.action import AttachFile + +action_name = __name__.split('.')[-1] + +# Do NOT get this directly from request.form or user would be able to read any cache! +cache_arena = 'sendcache' # just using action_name is maybe rather confusing + +# We maybe could use page local caching (not 'wiki' global) to have less directory entries. +# Local is easier to automatically cleanup if an item changes. Global is easier to manually cleanup. +# Local makes data_dir much larger, harder to backup. +cache_scope = 'wiki' + +do_locking = False + +def key(request, wikiname=None, itemname=None, attachname=None, content=None, secret=None): + """ + Calculate a (hard-to-guess) cache key. + + Important key properties: + * The key must be hard to guess (this is because do=get does no ACL checks, + so whoever got the key [e.g. from html rendering of an ACL protected wiki + page], will be able to see the cached content. + * The key must change if the (original) content changes. This is because + ACLs on some item may change and even if somebody was allowed to see some + revision of some item, it does not implicate that he is allowed to see + any other revision also. There will be no harm if he can see exactly the + same content again, but there could be harm if he could access a revision + with different content. + + If content is supplied, we will calculate and return a hMAC of the content. + + If wikiname, itemname, attachname is given, we don't touch the content (nor do + we read it ourselves from the attachment file), but we just calculate a key + from the given metadata values and some metadata we get from the filesystem. + + Hint: if you need multiple cache objects for the same source content (e.g. + thumbnails of different sizes for the same image), calculate the key + only once and then add some different prefixes to it to get the final + cache keys. + + @param request: the request object + @param wikiname: the name of the wiki (if not given, will be read from cfg) + @param itemname: the name of the page + @param attachname: the filename of the attachment + @param content: content data as unicode object (e.g. for page content or + parser section content) + @param secret: secret for hMAC calculation (default: use secret from cfg) + """ + if secret is None: + secret = request.cfg.secrets + if content: + hmac_data = content + elif itemname is not None and attachname is not None: + wikiname = wikiname or request.cfg.interwikiname or request.cfg.siteid + fuid = filesys.fuid(AttachFile.getFilename(request, itemname, attachname)) + hmac_data = u''.join([wikiname, itemname, attachname, repr(fuid)]) + else: + raise AssertionError('cache_key called with unsupported parameters') + + hmac_data = hmac_data.encode('utf-8') + key = hmac.new(secret, hmac_data, sha).hexdigest() + return key + + +def put(request, key, data, + filename=None, + content_type=None, + content_disposition=None, + content_length=None, + last_modified=None): + """ + Put an object into the cache to send it with cache action later. + + @param request: the request object + @param key: non-guessable key into cache (str) + @param data: content data (str or open file-like obj) + @param filename: filename for content-disposition header and for autodetecting + content_type (unicode, default: None) + @param content_type: content-type header value (str, default: autodetect from filename) + @param content_disposition: type for content-disposition header (str, default: None) + @param content_length: data length for content-length header (int, default: autodetect) + @param last_modified: last modified timestamp (int, default: autodetect) + """ + import os.path + from MoinMoin.util import timefuncs + + if filename: + # make sure we just have a simple filename (without path) + filename = os.path.basename(filename) + + if content_type is None: + # try autodetect + mt, enc = mimetypes.guess_type(filename) + if mt: + content_type = mt + + if content_type is None: + content_type = 'application/octet-stream' + + data_cache = caching.CacheEntry(request, cache_arena, key+'.data', cache_scope, do_locking=do_locking) + data_cache.update(data) + content_length = content_length or data_cache.size() + last_modified = last_modified or data_cache.mtime() + + last_modified = timefuncs.formathttpdate(int(last_modified)) + headers = ['Content-Type: %s' % content_type, + 'Last-Modified: %s' % last_modified, + 'Content-Length: %s' % content_length, + ] + if content_disposition and filename: + # TODO: fix the encoding here, plain 8 bit is not allowed according to the RFCs + # There is no solution that is compatible to IE except stripping non-ascii chars + filename = filename.encode(config.charset) + headers.append('Content-Disposition: %s; filename="%s"' % (content_disposition, filename)) + + meta_cache = caching.CacheEntry(request, cache_arena, key+'.meta', cache_scope, do_locking=do_locking, use_pickle=True) + meta_cache.update((last_modified, headers)) + + +def exists(request, key, strict=False): + """ + Check if a cached object for this key exists. + + @param request: the request object + @param key: non-guessable key into cache (str) + @param strict: if True, also check the data cache, not only meta (bool, default: False) + @return: is object cached? (bool) + """ + if strict: + data_cache = caching.CacheEntry(request, cache_arena, key+'.data', cache_scope, do_locking=do_locking) + data_cached = data_cache.exists() + else: + data_cached = True # we assume data will be there if meta is there + + meta_cache = caching.CacheEntry(request, cache_arena, key+'.meta', cache_scope, do_locking=do_locking, use_pickle=True) + meta_cached = meta_cache.exists() + + return meta_cached and data_cached + + +def remove(request, key): + """ delete headers/data cache for key """ + meta_cache = caching.CacheEntry(request, cache_arena, key+'.meta', cache_scope, do_locking=do_locking, use_pickle=True) + meta_cache.remove() + data_cache = caching.CacheEntry(request, cache_arena, key+'.data', cache_scope, do_locking=do_locking) + data_cache.remove() + + +def url(request, key, do='get'): + """ return URL for the object cached for key """ + return "%s/?%s" % ( + request.getScriptname(), + wikiutil.makeQueryString(dict(action=action_name, do=do, key=key), want_unicode=False)) + + +def _get_headers(request, key): + """ get last_modified and headers cached for key """ + meta_cache = caching.CacheEntry(request, cache_arena, key+'.meta', cache_scope, do_locking=do_locking, use_pickle=True) + last_modified, headers = meta_cache.content() + return last_modified, headers + + +def _get_datafile(request, key): + """ get an open data file for the data cached for key """ + data_cache = caching.CacheEntry(request, cache_arena, key+'.data', cache_scope, do_locking=do_locking) + data_cache.open(mode='r') + return data_cache + + +def _do_get(request, key): + """ send a complete http response with headers/data cached for key """ + try: + last_modified, headers = _get_headers(request, key) + if request.if_modified_since == last_modified: + request.emit_http_headers(["Status: 304 Not modified"]) + else: + data_file = _get_datafile(request, key) + request.emit_http_headers(headers) + request.send_file(data_file) + except caching.CacheError: + request.emit_http_headers(["Status: 404 Not found"]) + + +def _do_remove(request, key): + """ delete headers/data cache for key """ + remove(request, key) + request.emit_http_headers(["Status: 200 OK"]) + + +def _do(request, do, key): + if do == 'get': + _do_get(request, key) + elif do == 'remove': + _do_remove(request, key) + +def execute(pagename, request): + do = request.form.get('do', [None])[0] + key = request.form.get('key', [None])[0] + _do(request, do, key) +
--- a/docs/CHANGES Sat Jul 19 16:07:54 2008 +0200 +++ b/docs/CHANGES Sat Jul 19 16:11:19 2008 +0200 @@ -31,6 +31,12 @@ USE BOTH ON YOUR OWN RISK! Version 1.7.current: + New features: + * New 'cache' action (can be used to cache expensively rendered output, e.g. + scaled images, parsers that render text to images). Once put into the + cache, moin can emit a http response for that content very fast and very + efficient (including "304 not changed" handling). + Fixes: * Security fix: XSS fix for advanced search form * Avoid creation of new pagedirs with empty edit-log files by just @@ -57,6 +63,12 @@ * OpenID RP: make it compatible to python-openid 2.2.x * PackagePages.collectpackage: removed encoding from file name of zipfile + Developer notes: + * New file-like API in MoinMoin.caching (good for dealing with medium + to large files without consuming lots of memory). + * New MoinMoin.action.cache - fast/efficient serving of stuff you put + into the cache. + Version 1.7.0: Note: This is a reduced CHANGES, ommitting details from rc/beta test and also less interesting minor changes and fixes. It shows changes