changeset 770:a4ec2c6f0bb7 pytest2

history: do not yield Revision objects, but directly result documents from whoosh we have all that global and local history view (and also the atom feed) needs in the whoosh index. the whoosh documents yielded are dicts with all stored fields. by just using that, we do not need to access the backend storage any more for displaying history (which was one of the reasons for the slow global history for wikis with many revisions). one small difference is rev.timestamp (UNIX timestamp) vs. doc[MTIME] (datetime object). added size, action, comment fields to whoosh indexes (needed for history display and also useful for other reasons). adjusted the tests (pytest 2) TODO: ACL checks for history? (this is a general thing we need to be careful with: not to expose index data that should not be exposed). removed unused "mountpoint" history() param. add utctimestamp() == inverse of datetime.utcfromtimestamp()
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Thu, 25 Aug 2011 23:09:06 +0200
parents 813802d5072a
children e5b51d4304c8
files MoinMoin/apps/feed/views.py MoinMoin/apps/frontend/views.py MoinMoin/search/indexing.py MoinMoin/storage/_tests/test_backends_router.py MoinMoin/storage/backends/indexing.py MoinMoin/templates/global_history.html MoinMoin/templates/history.html MoinMoin/themes/__init__.py
diffstat 8 files changed, 90 insertions(+), 99 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/apps/feed/views.py	Wed Aug 24 18:19:30 2011 +0200
+++ b/MoinMoin/apps/feed/views.py	Thu Aug 25 23:09:06 2011 +0200
@@ -23,7 +23,7 @@
 from MoinMoin import wikiutil
 from MoinMoin.i18n import _, L_, N_
 from MoinMoin.apps.feed import feed
-from MoinMoin.config import NAME, ACL, ACTION, ADDRESS, HOSTNAME, USERID, COMMENT
+from MoinMoin.config import NAME, ACL, ACTION, ADDRESS, HOSTNAME, USERID, COMMENT, MTIME
 from MoinMoin.themes import get_editor_info
 from MoinMoin.items import Item
 from MoinMoin.util.crypto import cache_key
@@ -43,11 +43,11 @@
     if content is None:
         title = app.cfg.sitename
         feed = AtomFeed(title=title, feed_url=request.url, url=request.host_url)
-        for rev in flaskg.storage.history(item_name=item_name):
-            this_rev = rev
-            this_revno = rev.revno
-            item = rev.item
-            name = rev[NAME]
+        for doc in flaskg.storage.history(item_name=item_name):
+            name = doc[NAME]
+            this_revno = doc["rev_no"]
+            item = flaskg.storage.get_item(name)
+            this_rev = item.get_revision(this_revno)
             try:
                 hl_item = Item.create(name, rev_no=this_revno)
                 previous_revno = this_revno - 1
@@ -65,11 +65,11 @@
                 content = _(u'MoinMoin feels unhappy.')
                 content_type = 'text'
             feed.add(title=name, title_type='text',
-                     summary=rev.get(COMMENT, ''), summary_type='text',
+                     summary=doc.get(COMMENT, ''), summary_type='text',
                      content=content, content_type=content_type,
-                     author=get_editor_info(rev, external=True),
+                     author=get_editor_info(doc, external=True),
                      url=url_for_item(name, rev=this_revno, _external=True),
-                     updated=datetime.utcfromtimestamp(rev.timestamp),
+                     updated=doc[MTIME],
                     )
         content = feed.to_string()
         app.cache.set(cid, content)
--- a/MoinMoin/apps/frontend/views.py	Wed Aug 24 18:19:30 2011 +0200
+++ b/MoinMoin/apps/frontend/views.py	Thu Aug 25 23:09:06 2011 +0200
@@ -47,7 +47,7 @@
 from MoinMoin.items import Item, NonExistent
 from MoinMoin.items import ROWS_META, COLS, ROWS_DATA
 from MoinMoin import config, user, util, wikiutil
-from MoinMoin.config import ACTION, COMMENT, CONTENTTYPE, ITEMLINKS, ITEMTRANSCLUSIONS, NAME, CONTENTTYPE_GROUPS
+from MoinMoin.config import ACTION, COMMENT, CONTENTTYPE, ITEMLINKS, ITEMTRANSCLUSIONS, NAME, CONTENTTYPE_GROUPS, MTIME
 from MoinMoin.util import crypto
 from MoinMoin.util.interwiki import url_for_item
 from MoinMoin.security.textcha import TextCha, TextChaizedForm, TextChaValid
@@ -684,7 +684,6 @@
 @frontend.route('/+history/<itemname:item_name>')
 def history(item_name):
     history = flaskg.storage.history(item_name=item_name)
-
     offset = request.values.get('offset', 0)
     offset = max(int(offset), 0)
 
@@ -698,6 +697,7 @@
                            history_page=history_page,
                           )
 
+
 @frontend.route('/+history')
 def global_history():
     history = flaskg.storage.history(item_name='')
@@ -709,50 +709,51 @@
             bookmark_time = datetime.utcfromtimestamp(bm)
         results_per_page = flaskg.user.results_per_page # if it is 0, means no paging
     item_groups = OrderedDict()
-    for rev in history:
-        current_item_name = rev.item.name
-        if bookmark_time and rev.timestamp <= bookmark_time:
+    for doc in history:
+        current_item_name = doc[NAME]
+        if bookmark_time and doc[MTIME] <= bookmark_time:
             break
         elif current_item_name in item_groups:
-            latest_rev = item_groups[current_item_name][0]
-            tm_latest = datetime.utcfromtimestamp(latest_rev.timestamp)
-            tm_current = datetime.utcfromtimestamp(rev.timestamp)
+            latest_doc = item_groups[current_item_name][0]
+            tm_latest = latest_doc[MTIME]
+            tm_current = doc[MTIME]
             if format_date(tm_latest) == format_date(tm_current): # this change took place on the same day
-                item_groups[current_item_name].append(rev)
+                item_groups[current_item_name].append(doc)
         else:
-            item_groups[current_item_name] = [rev]
+            item_groups[current_item_name] = [doc]
 
     # Got the item dict, now doing grouping inside them
     editor_info = namedtuple('editor_info', ['editor', 'editor_revnos'])
-    for item_name, revs in item_groups.items():
+    for item_name, docs in item_groups.items():
         item_info = {}
         editors_info = OrderedDict()
         editors = []
         revnos = []
         comments = []
-        current_rev = revs[0]
+        current_doc = docs[0]
         item_info["item_name"] = item_name
-        item_info["timestamp"] = current_rev.timestamp
-        item_info["contenttype"] = current_rev.get(CONTENTTYPE)
-        item_info["action"] = current_rev.get(ACTION)
-        item_info["name"] = current_rev.get(NAME)
+        item_info["name"] = current_doc[NAME]
+        item_info["timestamp"] = current_doc[MTIME]
+        item_info["contenttype"] = current_doc[CONTENTTYPE]
+        item_info["action"] = current_doc[ACTION]
 
         # Aggregating comments, authors and revno
-        for rev in revs:
-            revnos.append(rev.revno)
-            comment = rev.get(COMMENT)
+        for doc in docs:
+            rev_no = doc["rev_no"]
+            revnos.append(rev_no)
+            comment = doc.get(COMMENT)
             if comment:
                 comment = "#%(revno)d %(comment)s" % {
-                          'revno': rev.revno,
+                          'revno': rev_no,
                           'comment': comment
                           }
                 comments.append(comment)
-            editor = get_editor_info(rev)
+            editor = get_editor_info(doc)
             editor_name = editor["name"]
             if editor_name in editors_info:
-                editors_info[editor_name].editor_revnos.append(rev.revno)
+                editors_info[editor_name].editor_revnos.append(rev_no)
             else:
-                editors_info[editor_name] = editor_info(editor, [rev.revno])
+                editors_info[editor_name] = editor_info(editor, [rev_no])
 
         if len(revnos) == 1:
             # there is only one change for this item in the history considered
@@ -784,7 +785,7 @@
     rev_tuple = namedtuple('rev_tuple', ['rev_date', 'item_revs'])
     rev_tuples = rev_tuple(prev_date, [])
     for item_group in item_groups.values():
-        tm = datetime.utcfromtimestamp(item_group["timestamp"])
+        tm = item_group["timestamp"]
         rev_date = format_date(tm)
         if revcount < offset:
             revcount += len(item_group["revnos"])
--- a/MoinMoin/search/indexing.py	Wed Aug 24 18:19:30 2011 +0200
+++ b/MoinMoin/search/indexing.py	Thu Aug 25 23:09:06 2011 +0200
@@ -74,6 +74,9 @@
             userid=ID(stored=True),
             address=ID(stored=True),
             hostname=ID(stored=True),
+            size=NUMERIC(stored=True),
+            action=ID(stored=True),
+            comment=TEXT(stored=True, multitoken_query="and"),
             content=TEXT(stored=True, multitoken_query="and"),
         )
 
--- a/MoinMoin/storage/_tests/test_backends_router.py	Wed Aug 24 18:19:30 2011 +0200
+++ b/MoinMoin/storage/_tests/test_backends_router.py	Thu Aug 25 23:09:06 2011 +0200
@@ -13,6 +13,7 @@
 
 from flask import current_app as app
 
+from MoinMoin.config import NAME
 from MoinMoin.error import ConfigurationError
 from MoinMoin.storage._tests.test_backends import BackendTest
 from MoinMoin.storage.backends.memory import MemoryBackend
@@ -172,19 +173,17 @@
             import time
             time.sleep(1)
 
-        for num, rev in enumerate(self.backend.history(reverse=False)):
+        for num, doc in enumerate(self.backend.history(reverse=False)):
             name, revno = order[num]
-            assert rev.item.name == name
-            assert rev.revno == revno
+            assert doc[NAME] == name
+            assert doc["rev_no"] == revno
 
         order.reverse()
-        for num, rev in enumerate(self.backend.history(reverse=True)):
+        for num, doc in enumerate(self.backend.history(reverse=True)):
             name, revno = order[num]
-            assert rev.item.name == name
-            assert rev.revno == revno
+            assert doc[NAME] == name
+            assert doc["rev_no"] == revno
 
-    # See history function in indexing.py for comments on why this test fails.
-    @pytest.mark.xfail
     def test_history_size_after_rename(self):
         item = self.backend.create_item(u'first')
         item.create_revision(0)
@@ -192,7 +191,7 @@
         item.rename(u'second')
         item.create_revision(1)
         item.commit()
-        assert len([rev for rev in self.backend.history()]) == 2
+        assert len(list(self.backend.history())) == 2
 
     def test_history_after_destroy_item(self):
         itemname = u"I will be completely destroyed"
@@ -204,16 +203,11 @@
 
         item.destroy()
 
-        all_rev_data = [rev.read() for rev in self.backend.history()]
-        assert not rev_data in all_rev_data
-
-        for rev in self.backend.history():
-            assert not rev.item.name == itemname
-        for rev in self.backend.history(reverse=False):
-            assert not rev.item.name == itemname
+        itemnames_history = [doc[NAME] for doc in self.backend.history()]
+        assert itemname not in itemnames_history
 
     def test_history_after_destroy_revision(self):
-        itemname = u"I will see my children die"    # removed the smiley ':-(' temporarily as it slows the test in addition with a failure
+        itemname = u"I will see my children die"
         rev_data = "I will die!"
         persistent_rev = "I will see my sibling die :-("
         item = self.backend.create_item(itemname)
@@ -227,8 +221,8 @@
         rev = item.get_revision(0)
         rev.destroy()
 
-        for rev in self.backend.history():
-            assert not (rev.item.name == itemname and rev.revno == 0)
+        itemnames_revs_history = [(doc[NAME], doc["rev_no"]) for doc in self.backend.history()]
+        assert (itemname, 0) not in itemnames_revs_history
 
     def test_history_item_names(self):
         item = self.backend.create_item(u'first')
@@ -237,9 +231,9 @@
         item.rename(u'second')
         item.create_revision(1)
         item.commit()
-        revs_in_create_order = [rev for rev in self.backend.history(reverse=False)]
-        assert revs_in_create_order[0].revno == 0
-        assert revs_in_create_order[0].item.name == u'second'
-        assert revs_in_create_order[1].revno == 1
-        assert revs_in_create_order[1].item.name == u'second'
+        docs_history = list(self.backend.history(reverse=False))
+        assert docs_history[0]["rev_no"] == 0
+        assert docs_history[0][NAME] == u'first'
+        assert docs_history[1]["rev_no"] == 1
+        assert docs_history[1][NAME] == u'second'
 
--- a/MoinMoin/storage/backends/indexing.py	Wed Aug 24 18:19:30 2011 +0200
+++ b/MoinMoin/storage/backends/indexing.py	Thu Aug 25 23:09:06 2011 +0200
@@ -61,27 +61,10 @@
         """
         History implementation using the index.
         """
-        for result in self._index.history(reverse=reverse, item_name=item_name, start=start, end=end):
-            # we currently create the item, the revision and yield it to stay
-            # compatible with storage api definition, but this could be changed to
-            # just return the data we get from the index (without accessing backend)
-            # TODO: A problem exists at item = self.get_item(name).
-            # In the history_size_after_rename test in test_backends.py,
-            # an item was created with the name "first" and then renamed to "second."
-            # When it runs through this history function and runs item = self.get_item("first"),
-            # it can't find it because it was already renamed to "second."
-            # Some suggested solutions are: using some neverchanging uuid to identify some specific item
-            # or continuing to use the name, but tracking name changes within the item's history.
-            rev_datetime, name, rev_no = result
-            try:
-                logging.debug("HISTORY: name %s revno %s" % (name, rev_no))
-                item = self.get_item(name)
-                yield item.get_revision(rev_no)
-            except AccessDeniedError as e:
-                # just skip items we may not access
-                pass
-            except (NoSuchItemError, NoSuchRevisionError) as e:
-                logging.exception("history processing catched exception")
+        for doc in self._index.history(reverse=reverse, item_name=item_name, start=start, end=end):
+            logging.debug("HISTORY: name %s revno %s" % (doc[NAME], doc["rev_no"]))
+            # XXX ACL checks?
+            yield doc
 
     def all_tags(self):
         """
@@ -291,9 +274,7 @@
                 logging.debug("Latest revisions: removing %d", latest_doc_number)
                 async_writer.delete_document(latest_doc_number)
 
-    def history(self, mountpoint=u'', item_name=u'', reverse=True, start=None, end=None):
-        if mountpoint:
-            mountpoint += '/'
+    def history(self, item_name=u'', reverse=True, start=None, end=None):
         with self.index_object.all_revisions_index.searcher() as all_revs_searcher:
             if item_name:
                 docs = all_revs_searcher.documents(name_exact=item_name,
@@ -304,7 +285,7 @@
             from operator import itemgetter
             # sort by mtime and rev_no do deal better with mtime granularity for fast item rev updates
             for doc in sorted(docs, key=itemgetter("mtime", "rev_no"), reverse=reverse)[start:end]:
-                yield (doc[MTIME], mountpoint + doc[NAME], doc["rev_no"])
+                yield doc
 
     def all_tags(self):
         with self.index_object.latest_revisions_index.searcher() as latest_revs_searcher:
--- a/MoinMoin/templates/global_history.html	Wed Aug 24 18:19:30 2011 +0200
+++ b/MoinMoin/templates/global_history.html	Thu Aug 25 23:09:06 2011 +0200
@@ -36,7 +36,7 @@
                     <span>
                         <h2>{{ rev_date }}</h2>
                         {% if user.valid %}
-                        <a class="bookmark-link" href="{{ url_for('frontend.bookmark', time=latest_timestamp) }}">{{ _("Set bookmark") }}</a>
+                        <a class="bookmark-link" href="{{ url_for('frontend.bookmark', time=utctimestamp(latest_timestamp)) }}">{{ _("Set bookmark") }}</a>
                         {% endif %}
                    </span>
                 </div>
--- a/MoinMoin/templates/history.html	Wed Aug 24 18:19:30 2011 +0200
+++ b/MoinMoin/templates/history.html	Thu Aug 25 23:09:06 2011 +0200
@@ -27,27 +27,27 @@
                 <th>{{ _("Comment") }}</th>
                 <th colspan="6">{{ _("Actions") }}</th>
             </tr>
-            {% for rev in history %}
+            {% for doc in history %}
             <tr>
-                <td class="moin-wordbreak">{{ rev.name }}</td>
-                <td class="moin-integer">{{ rev.revno }}</td>
-                <td>{{ rev.timestamp|datetimeformat }}</td>
-                <td class="moin-integer">{{ rev.size }}</td>
+                <td class="moin-wordbreak">{{ doc.name }}</td>
+                <td class="moin-integer">{{ doc.rev_no }}</td>
+                <td>{{ doc.mtime|datetimeformat }}</td>
+                <td class="moin-integer">{{ doc.size }}</td>
                 <td>
                     <div class="moin-hist-rev">
-                        <input type="radio" name="rev1" value="{{ rev.revno }}" />
-                        <input type="radio" name="rev2" value="{{ rev.revno }}" />
+                        <input type="radio" name="rev1" value="{{ doc.rev_no }}" />
+                        <input type="radio" name="rev2" value="{{ doc.rev_no }}" />
                     </div>
                 </td>
-                <td class="moin-wordbreak">{{ utils.editor_info(rev) }}</td>
-                <td class="moin-wordbreak">{{ rev.contenttype }}</td>
-                <td class="moin-wordbreak">{{ rev.comment }}</td>
-                <td><a href="{{ url_for('frontend.show_item', item_name=rev.item.name, rev=rev.revno) }}">{{ _('show') }}</a></td>
-                <td><a href="{{ url_for('frontend.show_item_meta', item_name=rev.item.name, rev=rev.revno) }}">{{ _('meta') }}</a></td>
-                <td><a href="{{ url_for('frontend.download_item', item_name=rev.item.name, rev=rev.revno) }}">{{ _('download') }}</a></td>
-                <td><a href="{{ url_for('frontend.highlight_item', item_name=rev.item.name, rev=rev.revno) }}">{{ _('highlight') }}</a></td>
-                <td><a href="{{ url_for('frontend.revert_item', item_name=rev.item.name, rev=rev.revno) }}">{{ _('revert') }}</a></td>
-                <td><a href="{{ url_for('frontend.destroy_item', item_name=rev.item.name, rev=rev.revno) }}">{{ _('destroy') }}</a></td>
+                <td class="moin-wordbreak">{{ utils.editor_info(doc) }}</td>
+                <td class="moin-wordbreak">{{ doc.contenttype }}</td>
+                <td class="moin-wordbreak">{{ doc.comment }}</td>
+                <td><a href="{{ url_for('frontend.show_item', item_name=doc.name, rev=doc.rev_no) }}">{{ _('show') }}</a></td>
+                <td><a href="{{ url_for('frontend.show_item_meta', item_name=doc.name, rev=doc.rev_no) }}">{{ _('meta') }}</a></td>
+                <td><a href="{{ url_for('frontend.download_item', item_name=doc.name, rev=doc.rev_no) }}">{{ _('download') }}</a></td>
+                <td><a href="{{ url_for('frontend.highlight_item', item_name=doc.name, rev=doc.rev_no) }}">{{ _('highlight') }}</a></td>
+                <td><a href="{{ url_for('frontend.revert_item', item_name=doc.name, rev=doc.rev_no) }}">{{ _('revert') }}</a></td>
+                <td><a href="{{ url_for('frontend.destroy_item', item_name=doc.name, rev=doc.rev_no) }}">{{ _('destroy') }}</a></td>
             </tr>
             {% endfor %}
         </table>
--- a/MoinMoin/themes/__init__.py	Wed Aug 24 18:19:30 2011 +0200
+++ b/MoinMoin/themes/__init__.py	Thu Aug 25 23:09:06 2011 +0200
@@ -366,6 +366,17 @@
     return 'moin-mime-%s' % cls
 
 
+def utctimestamp(dt):
+    """
+    convert a datetime object (UTC) to a UNIX timestamp (UTC)
+
+    Note: time library writers seem to have a distorted relationship to inverse
+          functions and also to UTC (see time.gmtime, see datetime.utcfromtimestamp).
+    """
+    from calendar import timegm
+    return timegm(dt.timetuple())
+
+
 def setup_jinja_env():
     app.jinja_env.filters['shorten_item_name'] = shorten_item_name
     app.jinja_env.filters['contenttype_to_class'] = contenttype_to_class
@@ -387,6 +398,7 @@
                             'item_name': 'handlers need to give it',
                             'url_for_item': url_for_item,
                             'get_editor_info': lambda rev: get_editor_info(rev),
+                            'utctimestamp': lambda dt: utctimestamp(dt),
                             'gen': make_generator(),
                             })