changeset 916:ac600982e40a storage-ng

fine tune / optimize Revision object creation, MetaData access optimize Revision object creation if we already have the relevant whoosh doc. If we do not have the whoosh doc for the revision, load it from index - this is good for: a) having it (avoid storage access) b) triggering a KeyError early if we do not have that revision Do not fetch metadata from storage if the whoosh document has the data (or could have it, as the field is in the schema). Convert MTIME datatype: whoosh: datetime -> storage: UNIX timestamp Remove .data accessing hack to trigger a key error if the revision does not exist.
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Sun, 02 Oct 2011 16:34:13 +0200
parents 3b9bef437212
children 772c4c8db164
files MoinMoin/storage/middleware/indexing.py
diffstat 1 files changed, 40 insertions(+), 23 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/storage/middleware/indexing.py	Sun Oct 02 02:13:35 2011 +0200
+++ b/MoinMoin/storage/middleware/indexing.py	Sun Oct 02 16:34:13 2011 +0200
@@ -74,6 +74,7 @@
                             ITEMID, REVID, CURRENT, PARENTID
 
 from MoinMoin.search.analyzers import item_name_analyzer, MimeTokenizer, AclTokenizer
+from MoinMoin.themes import utctimestamp
 from MoinMoin.util.crypto import make_uuid
 
 LATEST_REVS = 'latest_revs'
@@ -276,6 +277,10 @@
         self.schemas[ALL_REVS] = all_revisions_schema
         self.schemas[LATEST_REVS] = latest_revisions_schema
 
+        # what fields could whoosh result documents have (no matter whether all revs index
+        # or latest revs index):
+        self.common_fields = set(latest_revs_fields.keys()) & set(all_revs_fields.keys())
+
     def open(self):
         """
         Open all indexes.
@@ -578,7 +583,7 @@
                 doc = hit.fields()
                 latest_doc = not all_revs and doc or None
                 item = Item(self, latest_doc=latest_doc, itemid=doc[ITEMID])
-                yield item[doc[REVID]]
+                yield item.get_revision(doc[REVID], doc=doc)
 
     def search_page(self, q, all_revs=False, pagenum=1, pagelen=10, **kw):
         """
@@ -591,7 +596,7 @@
                 doc = hit.fields()
                 latest_doc = not all_revs and doc or None
                 item = Item(self, latest_doc=latest_doc, itemid=doc[ITEMID])
-                yield item[doc[REVID]]
+                yield item.get_revision(doc[REVID], doc=doc)
 
     def documents(self, all_revs=False, **kw):
         """
@@ -600,7 +605,7 @@
         for doc in self._documents(all_revs, **kw):
             latest_doc = not all_revs and doc or None
             item = Item(self, latest_doc=latest_doc, itemid=doc[ITEMID])
-            yield item[doc[REVID]]
+            yield item.get_revision(doc[REVID], doc=doc)
 
     def _documents(self, all_revs=False, **kw):
         """
@@ -625,7 +630,7 @@
         if doc:
             latest_doc = not all_revs and doc or None
             item = Item(self, latest_doc=latest_doc, itemid=doc[ITEMID])
-            return item[doc[REVID]]
+            return item.get_revision(doc[REVID], doc=doc)
 
     def _document(self, all_revs=False, **kw):
         """
@@ -742,19 +747,14 @@
         """
         Get Revision with revision id <revid>.
         """
-        if revid == CURRENT:
-            revid = self._current.get(REVID)
-            if revid is None:
-                raise KeyError
-        rev = Revision(self, revid)
-        rev.data # XXX trigger KeyError if rev does not exist
-        return rev
+        return Revision(self, revid)
 
-    def get_revision(self, revid):
+    def get_revision(self, revid, doc=None):
         """
-        Same as item[revid].
+        Similar to item[revid], but you can optionally give an already existing
+        whoosh result document for the given revid to avoid backend accesses for some use cases.
         """
-        return self[revid]
+        return Revision(self, revid, doc)
 
     def preprocess(self, meta, data):
         """
@@ -823,6 +823,18 @@
     An existing revision (exists in the backend).
     """
     def __init__(self, item, revid, doc=None):
+        is_current = revid == CURRENT
+        if doc is None:
+            if is_current:
+                doc = item._current
+            else:
+                doc = item.indexer._document(all_revs=not is_current, revid=revid)
+                if doc is None:
+                    raise KeyError
+        if is_current:
+            revid = doc.get(REVID)
+            if revid is None:
+                raise KeyError
         self.item = item
         self.revid = revid
         self.backend = item.backend
@@ -869,6 +881,7 @@
         self.revision = revision
         self._doc = doc or {}
         self._meta = meta or {}
+        self._common_fields = revision.item.indexer.common_fields
 
     def __contains__(self, key):
         try:
@@ -883,16 +896,20 @@
         return iter(self._meta)
 
     def __getitem__(self, key):
-        try:
+        if self._meta:
+            # we have real metadata (e.g. from storage)
             return self._meta[key]
-        except KeyError:
-            pass
-        try:
-            return self._doc[key]
-        except KeyError:
-            pass
-        self._meta, _ = self.revision._load()
-        return self._meta[key]
+        elif self._doc and key in self._common_fields:
+            # we have a result document from whoosh, which has quite a lot
+            # of the usually wanted metadata, avoid storage access, use this.
+            value = self._doc[key]
+            if key == MTIME:
+                # whoosh has a datetime object, but we want a UNIX timestamp
+                value = utctimestamp(value)
+            return value
+        else:
+            self._meta, _ = self.revision._load()
+            return self._meta[key]
 
     def __cmp__(self, other):
         if self[REVID] == other[REVID]: