changeset 3882:c8ffd029ab1f

action cache (and tests), backported from 1.8
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Sat, 19 Jul 2008 16:11:19 +0200
parents 85cd05b8af42
children 9e40b4ecf68f 085328cb4f4d
files MoinMoin/action/_tests/test_cache.py MoinMoin/action/cache.py docs/CHANGES
diffstat 3 files changed, 436 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/action/_tests/test_cache.py	Sat Jul 19 16:11:19 2008 +0200
@@ -0,0 +1,184 @@
+# -*- coding: iso-8859-1 -*-
+"""
+    MoinMoin - tests of cache action functions
+
+    @copyright: 2008 MoinMoin:ThomasWaldmann
+    @license: GNU GPL, see COPYING for details.
+"""
+
+import os, StringIO
+
+from MoinMoin import caching
+from MoinMoin.action import AttachFile, cache
+
+from MoinMoin._tests import become_trusted, create_page, nuke_page
+
+class TestSendCached:
+    """ testing action cache """
+    pagename = u"AutoCreatedSillyPageToTestAttachments"
+
+    def test_cache_key_content(self):
+        request = self.request
+        result1 = cache.key(request, content='foo', secret='bar')
+        result2 = cache.key(request, content='foo', secret='baz')
+        assert result1  # not empty
+        assert result1 != result2  # different for different secret
+        result3 = cache.key(request, content='foofoo', secret='baz')
+        assert result3 != result2  # different for different content
+        result4 = cache.key(request, content='foo'*1000, secret='baz')
+        assert len(result4) == len(result3)  # same length of key for different input lengths
+
+    def test_cache_key_attachment(self):
+        request = self.request
+        pagename = self.pagename
+        attachname = 'foo.txt'
+
+        become_trusted(request)
+        create_page(request, pagename, u"Foo!")
+
+        AttachFile.add_attachment(request, pagename, attachname, "Test content1", True)
+
+        result1 = cache.key(request, itemname=pagename, attachname=attachname, secret='bar')
+        result2 = cache.key(request, itemname=pagename, attachname=attachname, secret='baz')
+        assert result1  # not empty
+        assert result1 != result2  # different for different secret
+
+        # test below does not work, because mtime is often same, inode can be same due to how add_attachment
+        # works, file size is same, attachment name is same, wikiname/pagename is same.
+        # In practice, this should rather rarely cause problems:
+        #AttachFile.add_attachment(request, pagename, attachname, "Test content2", True)
+        #result3 = cache.key(request, itemname=pagename, attachname=attachname, secret='baz')
+        #assert result3 != result2  # different for different content
+
+        AttachFile.add_attachment(request, pagename, attachname, "Test content33333", True)
+        result4 = cache.key(request, itemname=pagename, attachname=attachname, secret='baz')
+        assert len(result4) == len(result2)  # same length of key for different input lengths
+        nuke_page(request, pagename)
+
+    def test_put_cache_minimal(self):
+        """Test if put_cache() works"""
+        request = self.request
+        key = 'nooneknowsit'
+        data = "dontcare"
+        cache.put(request, key, data)
+        url = cache.url(request, key)
+
+        assert key in url
+        meta_cache = caching.CacheEntry(request,
+                                        arena=cache.cache_arena,
+                                        scope=cache.cache_scope,
+                                        key=key+'.meta', use_pickle=True)
+        last_modified, headers = meta_cache.content()
+        assert last_modified.endswith(' GMT') # only a very rough check, it has used cache mtime as last_modified
+        assert "Content-Type: application/octet-stream" in headers
+        assert "Content-Length: %d" % len(data) in headers
+
+    def test_put_cache_guess_ct_give_lm(self):
+        """Test if put_cache() works, when we give filename (so it guesses content_type) and last_modified"""
+        request = self.request
+        key = 'nooneknowsit'
+        filename = "test.png"
+        data = "dontcare"
+        cache.put(request, key, data, filename=filename, last_modified=1)
+        url = cache.url(request, key)
+        assert key in url
+
+        meta_cache = caching.CacheEntry(request,
+                                        arena=cache.cache_arena,
+                                        scope=cache.cache_scope,
+                                        key=key+'.meta', use_pickle=True)
+        last_modified, headers = meta_cache.content()
+        assert last_modified == 'Thu, 01 Jan 1970 00:00:01 GMT'
+        assert "Content-Type: image/png" in headers
+        assert "Content-Length: %d" % len(data) in headers
+
+    def test_put_cache_file_like_data(self):
+        """Test if put_cache() works when we give it a file like object for the content"""
+        request = self.request
+        key = 'nooneknowsit'
+        filename = "test.png"
+        data = "dontcareatall"
+        data_file = StringIO.StringIO(data)
+        cache.put(request, key, data_file)
+        url = cache.url(request, key)
+
+        assert key in url
+        meta_cache = caching.CacheEntry(request,
+                                        arena=cache.cache_arena,
+                                        scope=cache.cache_scope,
+                                        key=key+'.meta', use_pickle=True)
+        last_modified, headers = meta_cache.content()
+        assert last_modified.endswith(' GMT') # only a very rough check, it has used cache mtime as last_modified
+        assert "Content-Type: application/octet-stream" in headers
+        assert "Content-Length: %d" % len(data) in headers
+
+        data_cache = caching.CacheEntry(request,
+                                        arena=cache.cache_arena,
+                                        scope=cache.cache_scope,
+                                        key=key+'.data')
+        cached = data_cache.content()
+        assert data == cached
+
+    def test_put_cache_complex(self):
+        """Test if put_cache() works for a more complex, practical scenario:
+
+           As 'source' we just use some random integer as count value.
+
+           The 'rendered representation' of it is just the word "spam" repeated
+           count times, which we cache.
+
+           The cache key calculation (for the 'non-guessable' keys) is also
+           rather simple.
+
+           In real world, source would be likely some big image, rendered
+           representation of it a thumbnail / preview of it. Or some LaTeX
+           source and its rendered representation as png image.
+           Key calculation could be some MAC or some other hard to guess and
+           unique string.
+        """
+        import random
+        request = self.request
+        render = lambda data: "spam" * data
+        secret = 4223
+        keycalc = lambda data: str(data * secret)
+
+        source = random.randint(1, 100)
+        rendered1 = render(source)
+        key1 = keycalc(source)
+
+        cache.put(request, key1, rendered1)
+        url1 = cache.url(request, key1)
+        assert 'key=%s' % key1 in url1
+
+        data_cache = caching.CacheEntry(request,
+                                        arena=cache.cache_arena,
+                                        scope=cache.cache_scope,
+                                        key=key1+'.data')
+        cached1 = data_cache.content()
+
+        assert render(source) == cached1
+        # if that succeeds, we have stored the rendered representation of source in the cache under key1
+
+        # now we use some different source, render it and store it in the cache
+        source = source * 2
+        rendered2 = render(source)
+        key2 = keycalc(source)
+
+        cache.put(request, key2, rendered2)
+        url2 = cache.url(request, key2)
+        assert 'key=%s' % key2 in url2
+
+        data_cache = caching.CacheEntry(request,
+                                        arena=cache.cache_arena,
+                                        scope=cache.cache_scope,
+                                        key=key2+'.data')
+        cached2 = data_cache.content()
+
+        assert render(source) == cached2
+        # if that succeeds, we have stored the rendered representation of updated source in the cache under key2
+
+        assert url2 != url1  # URLs must be different for different source (implies different keys)
+
+
+coverage_modules = ['MoinMoin.action.cache']
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/action/cache.py	Sat Jul 19 16:11:19 2008 +0200
@@ -0,0 +1,240 @@
+# -*- coding: iso-8859-1 -*-
+"""
+    MoinMoin - Send a raw object from the caching system (and offer utility
+    functions to put data into cache, calculate cache key, etc.).
+
+    Sample usage
+    ------------
+    Assume we have a big picture (bigpic) and we want to efficiently show some
+    thumbnail (thumbpic) for it:
+
+    # first calculate a (hard to guess) cache key (this key will change if the
+    # original data (bigpic) changes):
+    key = cache.key(..., attachname=bigpic, ...)
+
+    # check if we don't have it in cache yet
+    if not cache.exists(..., key):
+        # if we don't have it in cache, we need to render it - this is an
+        # expensive operation that we want to avoid by caching:
+        thumbpic = render_thumb(bigpic)
+        # put expensive operation's results into cache:
+        cache.put(..., key, thumbpic, ...)
+
+    url = cache.url(..., key)
+    html = '<img src="%s">' % url
+
+    @copyright: 2008 MoinMoin:ThomasWaldmann
+    @license: GNU GPL, see COPYING for details.
+"""
+
+import hmac, sha
+
+from MoinMoin import log
+logging = log.getLogger(__name__)
+
+# keep both imports below as they are, order is important:
+from MoinMoin import wikiutil
+import mimetypes
+
+from MoinMoin import config, caching
+from MoinMoin.util import filesys
+from MoinMoin.action import AttachFile
+
+action_name = __name__.split('.')[-1]
+
+# Do NOT get this directly from request.form or user would be able to read any cache!
+cache_arena = 'sendcache'  # just using action_name is maybe rather confusing
+
+# We maybe could use page local caching (not 'wiki' global) to have less directory entries.
+# Local is easier to automatically cleanup if an item changes. Global is easier to manually cleanup.
+# Local makes data_dir much larger, harder to backup.
+cache_scope = 'wiki'
+
+do_locking = False
+
+def key(request, wikiname=None, itemname=None, attachname=None, content=None, secret=None):
+    """
+    Calculate a (hard-to-guess) cache key.
+
+    Important key properties:
+    * The key must be hard to guess (this is because do=get does no ACL checks,
+      so whoever got the key [e.g. from html rendering of an ACL protected wiki
+      page], will be able to see the cached content.
+    * The key must change if the (original) content changes. This is because
+      ACLs on some item may change and even if somebody was allowed to see some
+      revision of some item, it does not implicate that he is allowed to see
+      any other revision also. There will be no harm if he can see exactly the
+      same content again, but there could be harm if he could access a revision
+      with different content.
+
+    If content is supplied, we will calculate and return a hMAC of the content.
+
+    If wikiname, itemname, attachname is given, we don't touch the content (nor do
+    we read it ourselves from the attachment file), but we just calculate a key
+    from the given metadata values and some metadata we get from the filesystem.
+
+    Hint: if you need multiple cache objects for the same source content (e.g.
+          thumbnails of different sizes for the same image), calculate the key
+          only once and then add some different prefixes to it to get the final
+          cache keys.
+
+    @param request: the request object
+    @param wikiname: the name of the wiki (if not given, will be read from cfg)
+    @param itemname: the name of the page
+    @param attachname: the filename of the attachment
+    @param content: content data as unicode object (e.g. for page content or
+                    parser section content)
+    @param secret: secret for hMAC calculation (default: use secret from cfg)
+    """
+    if secret is None:
+        secret = request.cfg.secrets
+    if content:
+        hmac_data = content
+    elif itemname is not None and attachname is not None:
+        wikiname = wikiname or request.cfg.interwikiname or request.cfg.siteid
+        fuid = filesys.fuid(AttachFile.getFilename(request, itemname, attachname))
+        hmac_data = u''.join([wikiname, itemname, attachname, repr(fuid)])
+    else:
+        raise AssertionError('cache_key called with unsupported parameters')
+
+    hmac_data = hmac_data.encode('utf-8')
+    key = hmac.new(secret, hmac_data, sha).hexdigest()
+    return key
+
+
+def put(request, key, data,
+        filename=None,
+        content_type=None,
+        content_disposition=None,
+        content_length=None,
+        last_modified=None):
+    """
+    Put an object into the cache to send it with cache action later.
+
+    @param request: the request object
+    @param key: non-guessable key into cache (str)
+    @param data: content data (str or open file-like obj)
+    @param filename: filename for content-disposition header and for autodetecting
+                     content_type (unicode, default: None)
+    @param content_type: content-type header value (str, default: autodetect from filename)
+    @param content_disposition: type for content-disposition header (str, default: None)
+    @param content_length: data length for content-length header (int, default: autodetect)
+    @param last_modified: last modified timestamp (int, default: autodetect)
+    """
+    import os.path
+    from MoinMoin.util import timefuncs
+
+    if filename:
+        # make sure we just have a simple filename (without path)
+        filename = os.path.basename(filename)
+
+        if content_type is None:
+            # try autodetect
+            mt, enc = mimetypes.guess_type(filename)
+            if mt:
+                content_type = mt
+
+    if content_type is None:
+        content_type = 'application/octet-stream'
+
+    data_cache = caching.CacheEntry(request, cache_arena, key+'.data', cache_scope, do_locking=do_locking)
+    data_cache.update(data)
+    content_length = content_length or data_cache.size()
+    last_modified = last_modified or data_cache.mtime()
+
+    last_modified = timefuncs.formathttpdate(int(last_modified))
+    headers = ['Content-Type: %s' % content_type,
+               'Last-Modified: %s' % last_modified,
+               'Content-Length: %s' % content_length,
+              ]
+    if content_disposition and filename:
+        # TODO: fix the encoding here, plain 8 bit is not allowed according to the RFCs
+        # There is no solution that is compatible to IE except stripping non-ascii chars
+        filename = filename.encode(config.charset)
+        headers.append('Content-Disposition: %s; filename="%s"' % (content_disposition, filename))
+
+    meta_cache = caching.CacheEntry(request, cache_arena, key+'.meta', cache_scope, do_locking=do_locking, use_pickle=True)
+    meta_cache.update((last_modified, headers))
+
+
+def exists(request, key, strict=False):
+    """
+    Check if a cached object for this key exists.
+
+    @param request: the request object
+    @param key: non-guessable key into cache (str)
+    @param strict: if True, also check the data cache, not only meta (bool, default: False)
+    @return: is object cached? (bool)
+    """
+    if strict:
+        data_cache = caching.CacheEntry(request, cache_arena, key+'.data', cache_scope, do_locking=do_locking)
+        data_cached = data_cache.exists()
+    else:
+        data_cached = True  # we assume data will be there if meta is there
+
+    meta_cache = caching.CacheEntry(request, cache_arena, key+'.meta', cache_scope, do_locking=do_locking, use_pickle=True)
+    meta_cached = meta_cache.exists()
+
+    return meta_cached and data_cached
+
+
+def remove(request, key):
+    """ delete headers/data cache for key """
+    meta_cache = caching.CacheEntry(request, cache_arena, key+'.meta', cache_scope, do_locking=do_locking, use_pickle=True)
+    meta_cache.remove()
+    data_cache = caching.CacheEntry(request, cache_arena, key+'.data', cache_scope, do_locking=do_locking)
+    data_cache.remove()
+
+
+def url(request, key, do='get'):
+    """ return URL for the object cached for key """
+    return "%s/?%s" % (
+        request.getScriptname(),
+        wikiutil.makeQueryString(dict(action=action_name, do=do, key=key), want_unicode=False))
+
+
+def _get_headers(request, key):
+    """ get last_modified and headers cached for key """
+    meta_cache = caching.CacheEntry(request, cache_arena, key+'.meta', cache_scope, do_locking=do_locking, use_pickle=True)
+    last_modified, headers = meta_cache.content()
+    return last_modified, headers
+
+
+def _get_datafile(request, key):
+    """ get an open data file for the data cached for key """
+    data_cache = caching.CacheEntry(request, cache_arena, key+'.data', cache_scope, do_locking=do_locking)
+    data_cache.open(mode='r')
+    return data_cache
+
+
+def _do_get(request, key):
+    """ send a complete http response with headers/data cached for key """
+    try:
+        last_modified, headers = _get_headers(request, key)
+        if request.if_modified_since == last_modified:
+            request.emit_http_headers(["Status: 304 Not modified"])
+        else:
+            data_file = _get_datafile(request, key)
+            request.emit_http_headers(headers)
+            request.send_file(data_file)
+    except caching.CacheError:
+        request.emit_http_headers(["Status: 404 Not found"])
+
+
+def _do_remove(request, key):
+    """ delete headers/data cache for key """
+    remove(request, key)
+    request.emit_http_headers(["Status: 200 OK"])
+
+
+def _do(request, do, key):
+    if do == 'get':
+        _do_get(request, key)
+    elif do == 'remove':
+        _do_remove(request, key)
+
+def execute(pagename, request):
+    do = request.form.get('do', [None])[0]
+    key = request.form.get('key', [None])[0]
+    _do(request, do, key)
+
--- a/docs/CHANGES	Sat Jul 19 16:07:54 2008 +0200
+++ b/docs/CHANGES	Sat Jul 19 16:11:19 2008 +0200
@@ -31,6 +31,12 @@
     USE BOTH ON YOUR OWN RISK!
 
 Version 1.7.current:
+  New features:
+    * New 'cache' action (can be used to cache expensively rendered output, e.g.
+      scaled images, parsers that render text to images). Once put into the
+      cache, moin can emit a http response for that content very fast and very
+      efficient (including "304 not changed" handling).
+
   Fixes:
     * Security fix: XSS fix for advanced search form
     * Avoid creation of new pagedirs with empty edit-log files by just
@@ -57,6 +63,12 @@
     * OpenID RP: make it compatible to python-openid 2.2.x
     * PackagePages.collectpackage: removed encoding from file name of zipfile
 
+  Developer notes:
+    * New file-like API in MoinMoin.caching (good for dealing with medium
+      to large files without consuming lots of memory).
+    * New MoinMoin.action.cache - fast/efficient serving of stuff you put
+      into the cache.
+
 Version 1.7.0:
   Note: This is a reduced CHANGES, ommitting details from rc/beta test and
         also less interesting minor changes and fixes. It shows changes