changeset 975:a4d4c74721bc

fixed the routing middleware the routing mw iterator now yields tuples (mountpoint, revid), some methods in the routing middleware want name,revid now (this is so they can find out the correct backend, with just the revid this is not easily possible (would need an index lookup for the name). the indexing tests now just use the storage given by the test framework instead of manually assembling their own one. most changes in this cs are related to these changes. additionally I introduced some new test class attribute "reinit_storage" - if it is present and True, the test framework will recreate the wsgi app (and storage and index) for every test method call in that class.
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Thu, 27 Oct 2011 21:47:26 -0200
parents 67e6cfa6941f
children eaee846383e6
files MoinMoin/app.py MoinMoin/conftest.py MoinMoin/storage/middleware/_tests/test_indexing.py MoinMoin/storage/middleware/_tests/test_routing.py MoinMoin/storage/middleware/_tests/test_serialization.py MoinMoin/storage/middleware/indexing.py MoinMoin/storage/middleware/routing.py MoinMoin/storage/middleware/serialization.py
diffstat 8 files changed, 99 insertions(+), 79 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/app.py	Thu Oct 27 20:35:07 2011 -0200
+++ b/MoinMoin/app.py	Thu Oct 27 21:47:26 2011 -0200
@@ -168,7 +168,6 @@
     if app.cfg.create_storage:
         app.router.create()
     app.router.open()
-    app.router = app.router._get_backend('')[0] # XXX hack until router works correctly
     app.storage = indexing.IndexingMiddleware(app.cfg.index_dir, app.router,
                                               wiki_name=app.cfg.interwikiname,
                                               acl_rights_contents=app.cfg.acl_rights_contents)
--- a/MoinMoin/conftest.py	Thu Oct 27 20:35:07 2011 -0200
+++ b/MoinMoin/conftest.py	Thu Oct 27 21:47:26 2011 -0200
@@ -80,10 +80,11 @@
         if inspect.isclass(self.parent.obj.__class__):
             cls = self.parent.obj.__class__
             cfg = getattr(cls, 'Config', wikiconfig.Config)
-            if prev_cfg is not cfg and prev_app is not None:
+            reinit = getattr(cls, 'reinit_storage', False)
+            if (prev_cfg is not cfg or reinit) and prev_app is not None:
                 # other config, previous app exists, so deinit it:
                 deinit_test_app(prev_app, prev_ctx)
-            if prev_cfg is not cfg or prev_app is None:
+            if prev_cfg is not cfg or reinit or prev_app is None:
                 # other config or no app yet, init app:
                 self.app, self.ctx = init_test_app(cfg)
             else:
--- a/MoinMoin/storage/middleware/_tests/test_indexing.py	Thu Oct 27 20:35:07 2011 -0200
+++ b/MoinMoin/storage/middleware/_tests/test_indexing.py	Thu Oct 27 21:47:26 2011 -0200
@@ -13,33 +13,38 @@
 
 import pytest
 
+from flask import g as flaskg
+
 from MoinMoin.config import NAME, SIZE, ITEMID, REVID, DATAID, HASH_ALGORITHM, CONTENT, COMMENT, \
                             LATEST_REVS, ALL_REVS
 
 from ..indexing import IndexingMiddleware
 
+from MoinMoin.auth import GivenAuth
+from MoinMoin._tests import wikiconfig
 from MoinMoin.storage.backends.stores import MutableBackend
 from MoinMoin.storage.stores.memory import BytesStore as MemoryBytesStore
 from MoinMoin.storage.stores.memory import FileStore as MemoryFileStore
+from MoinMoin.storage import create_simple_mapping
+from MoinMoin.storage.middleware import routing
+
+
+def dumper(indexer, idx_name):
+    print "*** %s ***" % idx_name
+    for kvs in indexer.dump(idx_name=idx_name):
+        for k, v in kvs:
+            print k, repr(v)[:70]
+        print
 
 
 class TestIndexingMiddleware(object):
+    reinit_storage = True # cleanup after each test method
+
     def setup_method(self, method):
-        meta_store = MemoryBytesStore()
-        data_store = MemoryFileStore()
-        self.be = MutableBackend(meta_store, data_store)
-        self.be.create()
-        self.be.open()
-        index_dir = 'ix'
-        self.imw = IndexingMiddleware(index_dir=index_dir, backend=self.be)
-        self.imw.create()
-        self.imw.open()
+        self.imw = flaskg.unprotected_storage
 
     def teardown_method(self, method):
-        self.imw.close()
-        self.imw.destroy()
-        self.be.close()
-        self.be.destroy()
+        pass
 
     def test_nonexisting_item(self):
         item = self.imw[u'foo']
@@ -251,6 +256,9 @@
         expected_all_revids.append(r.revid)
         expected_latest_revids.append(r.revid)
 
+        dumper(self.imw, ALL_REVS)
+        dumper(self.imw, LATEST_REVS)
+
         # now build a fresh index at tmp location:
         self.imw.create(tmp=True)
         self.imw.rebuild(tmp=True)
@@ -278,6 +286,9 @@
         self.imw.move_index()
         self.imw.open()
 
+        dumper(self.imw, ALL_REVS)
+        dumper(self.imw, LATEST_REVS)
+
         # read the index contents we have now:
         all_revids = [doc[REVID] for doc in self.imw._documents(idx_name=ALL_REVS)]
         latest_revids = [doc[REVID] for doc in self.imw._documents()]
@@ -292,9 +303,12 @@
         self.imw.update()
         self.imw.open()
 
+        dumper(self.imw, ALL_REVS)
+        dumper(self.imw, LATEST_REVS)
+
         # read the index contents we have now:
-        all_revids = [rev.revid for rev in self.imw.documents(idx_name=ALL_REVS)]
-        latest_revids = [rev.revid for rev in self.imw.documents()]
+        all_revids = [doc[REVID] for doc in self.imw._documents(idx_name=ALL_REVS)]
+        latest_revids = [doc[REVID] for doc in self.imw._documents()]
 
         # now it should have the previously missing rev and all should be as expected:
         for missing_revid in missing_revids:
@@ -337,22 +351,17 @@
         assert unicode(data) == doc[CONTENT]
 
 class TestProtectedIndexingMiddleware(object):
+    reinit_storage = True # cleanup after each test method
+
+    class Config(wikiconfig.Config):
+        auth = [GivenAuth(user_name=u'joe'), ] # XXX does NOT login user joe
+
     def setup_method(self, method):
-        meta_store = MemoryBytesStore()
-        data_store = MemoryFileStore()
-        self.be = MutableBackend(meta_store, data_store)
-        self.be.create()
-        self.be.open()
-        index_dir = 'ix'
-        self.imw = IndexingMiddleware(index_dir=index_dir, backend=self.be, user_name=u'joe', acl_support=True)
-        self.imw.create()
-        self.imw.open()
+        flaskg.user.name = u'joe' # XXX hack because of GivenAuth does not work
+        self.imw = flaskg.unprotected_storage # XXX use .storage
 
     def teardown_method(self, method):
-        self.imw.close()
-        self.imw.destroy()
-        self.be.close()
-        self.be.destroy()
+        pass
 
     def test_documents(self):
         item_name = u'public'
--- a/MoinMoin/storage/middleware/_tests/test_routing.py	Thu Oct 27 20:35:07 2011 -0200
+++ b/MoinMoin/storage/middleware/_tests/test_routing.py	Thu Oct 27 21:47:26 2011 -0200
@@ -46,33 +46,26 @@
 
     return router
 
-def revid_split(revid):
-    # router revids are <backend_mountpoint>:<backend_revid>, split that:
-    return revid.rsplit(u':', 1)
-
 def test_store_get_del(router):
     root_name = u'foo'
     root_revid = router.store(dict(name=root_name), StringIO(''))
     sub_name = u'sub/bar'
     sub_revid = router.store(dict(name=sub_name), StringIO(''))
 
-    assert revid_split(root_revid)[0] == ''
-    assert revid_split(sub_revid)[0] == 'sub'
-
     # when going via the router backend, we get back fully qualified names:
-    root_meta, _ = router.retrieve(root_revid)
-    sub_meta, _ = router.retrieve(sub_revid)
+    root_meta, _ = router.retrieve(root_name, root_revid)
+    sub_meta, _ = router.retrieve(sub_name, sub_revid)
     assert root_name == root_meta[NAME]
     assert sub_name == sub_meta[NAME]
 
     # when looking into the storage backend, we see relative names (without mountpoint):
-    root_meta, _ = router.mapping[-1][1].retrieve(revid_split(root_revid)[1])
-    sub_meta, _ = router.mapping[0][1].retrieve(revid_split(sub_revid)[1])
+    root_meta, _ = router.mapping[-1][1].retrieve(root_revid)
+    sub_meta, _ = router.mapping[0][1].retrieve(sub_revid)
     assert root_name == root_meta[NAME]
     assert sub_name == 'sub' + '/' + sub_meta[NAME]
     # delete revs:
-    router.remove(root_revid)
-    router.remove(sub_revid)
+    router.remove(root_name, root_revid)
+    router.remove(sub_name, sub_revid)
 
 
 def test_store_readonly_fails(router):
@@ -100,8 +93,9 @@
 
 
 def test_iter(router):
-    existing = set(router)
+    existing_before = set([revid for mountpoint, revid in router])
     root_revid = router.store(dict(name=u'foo'), StringIO(''))
     sub_revid = router.store(dict(name=u'sub/bar'), StringIO(''))
-    assert set(router) == (set([root_revid, sub_revid])|existing)
+    existing_now = set([revid for mountpoint, revid in router])
+    assert existing_now == set([root_revid, sub_revid]) | existing_before
 
--- a/MoinMoin/storage/middleware/_tests/test_serialization.py	Thu Oct 27 20:35:07 2011 -0200
+++ b/MoinMoin/storage/middleware/_tests/test_serialization.py	Thu Oct 27 21:47:26 2011 -0200
@@ -11,6 +11,7 @@
 from StringIO import StringIO
 
 from ..indexing import IndexingMiddleware
+from ..routing import Backend as RoutingBackend
 from ..serialization import serialize, deserialize
 
 from MoinMoin.storage.backends.stores import MutableBackend
@@ -49,7 +50,9 @@
 
     meta_store = BytesStore()
     data_store = FileStore()
-    backend = MutableBackend(meta_store, data_store)
+    _backend = MutableBackend(meta_store, data_store)
+    mapping = [('', _backend)]
+    backend = RoutingBackend(mapping)
     backend.create()
     backend.open()
     request.addfinalizer(backend.destroy)
--- a/MoinMoin/storage/middleware/indexing.py	Thu Oct 27 20:35:07 2011 -0200
+++ b/MoinMoin/storage/middleware/indexing.py	Thu Oct 27 21:47:26 2011 -0200
@@ -379,21 +379,21 @@
             if docnum_remove is not None:
                 # we are removing a revid that is in latest revs index
                 try:
-                    latest_revids = self._find_latest_revids(self.ix[ALL_REVS], Term(ITEMID, itemid))
+                    latest_names_revids = self._find_latest_names_revids(self.ix[ALL_REVS], Term(ITEMID, itemid))
                 except AttributeError:
                     # workaround for bug #200 AttributeError: 'FieldCache' object has no attribute 'code'
-                    latest_revids = []
-                if latest_revids:
+                    latest_names_revids = []
+                if latest_names_revids:
                     # we have a latest revision, just update the document in the index:
-                    assert len(latest_revids) == 1 # this item must have only one latest revision
-                    latest_revid = latest_revids[0]
+                    assert len(latest_names_revids) == 1 # this item must have only one latest revision
+                    latest_name_revid = latest_names_revids[0]
                     # we must fetch from backend because schema for LATEST_REVS is different than for ALL_REVS
                     # (and we can't be sure we have all fields stored, too)
-                    meta, _ = self.backend.retrieve(latest_revid)
+                    meta, _ = self.backend.retrieve(*latest_name_revid)
                     # we only use meta (not data), because we do not want to transform data->content again (this
                     # is potentially expensive) as we already have the transformed content stored in ALL_REVS index:
                     with self.ix[ALL_REVS].searcher() as searcher:
-                        doc = searcher.document(revid=latest_revid)
+                        doc = searcher.document(revid=latest_name_revid[1])
                         content = doc[CONTENT]
                     doc = backend_to_index(meta, content, self.schemas[LATEST_REVS], self.wikiname)
                     writer.update_document(**doc)
@@ -415,9 +415,9 @@
         else:
             writer = MultiSegmentWriter(index, procs, limitmb)
         with writer as writer:
-            for revid in revids:
+            for mountpoint, revid in revids:
                 if mode in ['add', 'update', ]:
-                    meta, data = self.backend.retrieve(revid)
+                    meta, data = self.backend.retrieve(mountpoint, revid)
                     content = convert_to_indexable(meta, data, is_new=False)
                     doc = backend_to_index(meta, content, schema, wikiname)
                 if mode == 'update':
@@ -429,13 +429,13 @@
                 else:
                     raise ValueError("mode must be 'update', 'add' or 'delete', not '{0}'".format(mode))
 
-    def _find_latest_revids(self, index, query=None):
+    def _find_latest_names_revids(self, index, query=None):
         """
         find the latest revids using the all-revs index
 
         :param index: an up-to-date and open ALL_REVS index
         :param query: query to search only specific revisions (optional, default: all items/revisions)
-        :returns: a list of the latest revids
+        :returns: a list of tuples (name, latest revid)
         """
         if query is None:
             query = Every()
@@ -443,8 +443,10 @@
             result = searcher.search(query, groupedby=ITEMID, sortedby=FieldFacet(MTIME, reverse=True))
             by_item = result.groups(ITEMID)
             # values in v list are in same relative order as in results, so latest MTIME is first:
-            latest_revids = [searcher.stored_fields(v[0])[REVID] for v in by_item.values()]
-        return latest_revids
+            latest_names_revids = [(searcher.stored_fields(v[0])[NAME],
+                                    searcher.stored_fields(v[0])[REVID])
+                                   for v in by_item.values()]
+        return latest_names_revids
 
     def rebuild(self, tmp=False, procs=1, limitmb=256):
         """
@@ -461,13 +463,13 @@
             # build an index of all we have (so we know what we have)
             all_revids = self.backend # the backend is an iterator over all revids
             self._modify_index(index, self.schemas[ALL_REVS], self.wikiname, all_revids, 'add', procs, limitmb)
-            latest_revids = self._find_latest_revids(index)
+            latest_names_revids = self._find_latest_names_revids(index)
         finally:
             index.close()
         # now build the index of the latest revisions:
         index = open_dir(index_dir, indexname=LATEST_REVS)
         try:
-            self._modify_index(index, self.schemas[LATEST_REVS], self.wikiname, latest_revids, 'add', procs, limitmb)
+            self._modify_index(index, self.schemas[LATEST_REVS], self.wikiname, latest_names_revids, 'add', procs, limitmb)
         finally:
             index.close()
 
@@ -487,17 +489,24 @@
         index_dir = self.index_dir_tmp if tmp else self.index_dir
         index_all = open_dir(index_dir, indexname=ALL_REVS)
         try:
+            # NOTE: self.backend iterator gives (mountpoint, revid) tuples, which is NOT
+            # the same as (name, revid), thus we do the set operations just on the revids.
             # first update ALL_REVS index:
-            backend_revids = set(self.backend)
+            revids_mountpoints = dict((revid, mountpoint) for mountpoint, revid in self.backend)
+            backend_revids = set(revids_mountpoints)
             with index_all.searcher() as searcher:
-                ix_revids = set([doc[REVID] for doc in searcher.all_stored_fields()])
+                ix_revids_names = dict((doc[REVID], doc[NAME]) for doc in searcher.all_stored_fields())
+            revids_mountpoints.update(ix_revids_names) # this is needed for stuff that was deleted from storage
+            ix_revids = set(ix_revids_names)
             add_revids = backend_revids - ix_revids
             del_revids = ix_revids - backend_revids
             changed = add_revids or del_revids
+            add_revids = [(revids_mountpoints[revid], revid) for revid in add_revids]
+            del_revids = [(revids_mountpoints[revid], revid) for revid in del_revids]
             self._modify_index(index_all, self.schemas[ALL_REVS], self.wikiname, add_revids, 'add')
             self._modify_index(index_all, self.schemas[ALL_REVS], self.wikiname, del_revids, 'delete')
 
-            backend_latest_revids = set(self._find_latest_revids(index_all))
+            backend_latest_names_revids = set(self._find_latest_names_revids(index_all))
         finally:
             index_all.close()
         index_latest = open_dir(index_dir, indexname=LATEST_REVS)
@@ -505,7 +514,9 @@
             # now update LATEST_REVS index:
             with index_latest.searcher() as searcher:
                 ix_revids = set(doc[REVID] for doc in searcher.all_stored_fields())
+            backend_latest_revids = set(revid for name, revid in backend_latest_names_revids)
             upd_revids = backend_latest_revids - ix_revids
+            upd_revids = [(revids_mountpoints[revid], revid) for revid in upd_revids]
             self._modify_index(index_latest, self.schemas[LATEST_REVS], self.wikiname, upd_revids, 'update')
             self._modify_index(index_latest, self.schemas[LATEST_REVS], self.wikiname, del_revids, 'delete')
         finally:
@@ -754,6 +765,8 @@
         meta[ITEMID] = self.itemid
         if MTIME not in meta:
             meta[MTIME] = int(time.time())
+        #if CONTENTTYPE not in meta:
+        #    meta[CONTENTTYPE] = u'application/octet-stream'
         content = convert_to_indexable(meta, data, is_new=True)
         return meta, data, content
 
@@ -798,7 +811,8 @@
         """
         Destroy revision <revid>.
         """
-        self.backend.remove(revid)
+        rev = Revision(self, revid)
+        self.backend.remove(rev.name, revid)
         self.indexer.remove_revision(revid)
 
     def destroy_all_revisions(self):
@@ -840,7 +854,7 @@
         return self.meta.get(NAME, 'DoesNotExist')
 
     def _load(self):
-        meta, data = self.backend.retrieve(self.revid) # raises KeyError if rev does not exist
+        meta, data = self.backend.retrieve(self._doc[NAME], self.revid) # raises KeyError if rev does not exist
         self.meta = Meta(self, self._doc, meta)
         self._data = data
         return meta, data
--- a/MoinMoin/storage/middleware/routing.py	Thu Oct 27 20:35:07 2011 -0200
+++ b/MoinMoin/storage/middleware/routing.py	Thu Oct 27 21:47:26 2011 -0200
@@ -73,11 +73,10 @@
         #       can be given to get_revision and be routed to the right backend.
         for mountpoint, backend in self.mapping:
             for revid in backend:
-                yield u'{0}:{1}'.format(mountpoint, revid)
+                yield (mountpoint, revid)
 
-    def retrieve(self, revid):
-        mountpoint, revid = revid.rsplit(u':', 1)
-        backend = self._get_backend(mountpoint)[0]
+    def retrieve(self, name, revid):
+        backend, _, mountpoint = self._get_backend(name)
         meta, data = backend.retrieve(revid)
         if mountpoint:
             meta[NAME] = u'{0}/{1}'.format(mountpoint, meta[NAME])
@@ -97,16 +96,17 @@
             #XXX else: log info?
 
     def store(self, meta, data):
-        itemname = meta[NAME]
-        backend, itemname, mountpoint = self._get_backend(itemname)
+        mountpoint_itemname = meta[NAME]
+        backend, itemname, mountpoint = self._get_backend(mountpoint_itemname)
         if not isinstance(backend, MutableBackendBase):
             raise TypeError('backend {0!r} mounted at {1!r} is readonly'.format(backend, mountpoint))
         meta[NAME] = itemname
-        return u'{0}:{1}'.format(mountpoint, backend.store(meta, data))
+        revid = backend.store(meta, data)
+        meta[NAME] = mountpoint_itemname # restore the original name
+        return revid
 
-    def remove(self, revid):
-        mountpoint, revid = revid.rsplit(u':', 1)
-        backend = self._get_backend(mountpoint)[0]
+    def remove(self, name, revid):
+        backend, _, mountpoint = self._get_backend(name)
         if not isinstance(backend, MutableBackendBase):
             raise TypeError('backend {0!r} mounted at {1!r} is readonly'.format(backend, mountpoint))
         backend.remove(revid)
--- a/MoinMoin/storage/middleware/serialization.py	Thu Oct 27 20:35:07 2011 -0200
+++ b/MoinMoin/storage/middleware/serialization.py	Thu Oct 27 21:47:26 2011 -0200
@@ -45,8 +45,8 @@
             yield block
 
 def serialize_iter(backend):
-    for revid in backend:
-        meta, data = backend.retrieve(revid)
+    for mountpoint, revid in backend:
+        meta, data = backend.retrieve(mountpoint, revid)
         for data in serialize_rev(meta, data):
             yield data
     for data in serialize_rev(None, None):