changeset 742:be9241eaa3c4 pytest2

merged with default
author pkumar <contactprashantat@gmail.com>
date Thu, 18 Aug 2011 13:02:23 +0530
parents 38f153a7c355 (current diff) 0f29cbb71223 (diff)
children 700c3f22ea19
files MoinMoin/app.py MoinMoin/conftest.py MoinMoin/items/_tests/test_Item.py MoinMoin/storage/__init__.py MoinMoin/storage/_tests/test_backends.py MoinMoin/storage/_tests/test_backends_fs2.py MoinMoin/storage/_tests/test_backends_router.py MoinMoin/util/kvstore.py setup.py
diffstat 38 files changed, 1363 insertions(+), 785 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/_tests/wikiconfig.py	Thu Aug 18 12:25:19 2011 +0530
+++ b/MoinMoin/_tests/wikiconfig.py	Thu Aug 18 13:02:23 2011 +0530
@@ -19,6 +19,8 @@
     _here = abspath(dirname(__file__))
     _root = abspath(join(_here, '..', '..'))
     data_dir = join(_here, 'wiki', 'data') # needed for plugins package TODO
+    index_dir = join(_here, 'wiki', 'index')
+    index_dir_tmp = join(_here, 'wiki', 'index_tmp')
     _test_items_xml = join(_here, 'testitems.xml')
     content_acl = None
     item_root = 'FrontPage'
--- a/MoinMoin/app.py	Thu Aug 18 12:25:19 2011 +0530
+++ b/MoinMoin/app.py	Thu Aug 18 13:02:23 2011 +0530
@@ -133,10 +133,6 @@
     clock.start('create_app init backends')
     app.unprotected_storage, app.storage = init_backends(app)
     clock.stop('create_app init backends')
-    clock.start('create_app index rebuild')
-    if app.cfg.index_rebuild:
-        app.unprotected_storage.index_rebuild() # XXX run this from a script
-    clock.stop('create_app index rebuild')
     clock.start('create_app load/save xml')
     clock.stop('create_app load/save xml')
     clock.start('create_app flask-babel')
@@ -173,14 +169,13 @@
     # A ns_mapping consists of several lines, where each line is made up like this:
     # mountpoint, unprotected backend, protection to apply as a dict
     ns_mapping = app.cfg.namespace_mapping
-    index_uri = app.cfg.router_index_uri
     # Just initialize with unprotected backends.
     unprotected_mapping = [(ns, backend) for ns, backend, acls in ns_mapping]
-    unprotected_storage = router.RouterBackend(unprotected_mapping, index_uri=index_uri)
+    unprotected_storage = router.RouterBackend(unprotected_mapping, cfg=app.cfg)
     # Protect each backend with the acls provided for it in the mapping at position 2
     amw = acl.AclWrapperBackend
     protected_mapping = [(ns, amw(app.cfg, backend, **acls)) for ns, backend, acls in ns_mapping]
-    storage = router.RouterBackend(protected_mapping, index_uri=index_uri)
+    storage = router.RouterBackend(protected_mapping, cfg=app.cfg)
     return unprotected_storage, storage
 
 def deinit_backends(app):
@@ -194,8 +189,7 @@
     xmlfile = app.cfg.load_xml
     if xmlfile:
         app.cfg.load_xml = None
-        tmp_backend = router.RouterBackend([('/', memory.MemoryBackend())],
-                                           index_uri='sqlite://')
+        tmp_backend = router.RouterBackend([('/', memory.MemoryBackend())], cfg=app.cfg)
         unserialize(tmp_backend, xmlfile)
         # TODO optimize this, maybe unserialize could count items it processed
         item_count = 0
--- a/MoinMoin/apps/frontend/views.py	Thu Aug 18 12:25:19 2011 +0530
+++ b/MoinMoin/apps/frontend/views.py	Thu Aug 18 13:02:23 2011 +0530
@@ -124,12 +124,32 @@
 class SearchForm(Form):
     q = String.using(optional=False).with_properties(autofocus=True, placeholder=L_("Search Query"))
     submit = String.using(default=L_('Search'), optional=True)
+    pagelen = String.using(optional=False)
+    search_in_all = Boolean.using(label=L_('search also in non-current revisions'), optional=True)
 
     validators = [ValidSearch()]
 
 
-def _search(query):
-    return "searching not implemented yet, query: %r" % query
+def _search(search_form, item_name):
+    from MoinMoin.search.indexing import WhooshIndex
+    from whoosh.qparser import QueryParser, MultifieldParser
+    from MoinMoin.search.analyzers import item_name_analyzer
+    from whoosh import highlight
+    query = search_form['q'].value
+    pagenum = 1 # We start from first page
+    pagelen = search_form['pagelen'].value
+    index_object = WhooshIndex()
+    ix = index_object.all_revisions_index if request.values.get('search_in_all') else index_object.latest_revisions_index
+    with ix.searcher() as searcher:
+        mparser = MultifieldParser(["name_exact", "name", "content"], schema=ix.schema)
+        q = mparser.parse(query)
+        results = searcher.search_page(q, int(pagenum), pagelen=int(pagelen))
+        return render_template('search_results.html',
+                               results=results,
+                               query=query,
+                               medium_search_form=search_form,
+                               item_name=item_name,
+                              )
 
 
 @frontend.route('/<itemname:item_name>', defaults=dict(rev=-1), methods=['GET', 'POST'])
@@ -138,8 +158,7 @@
     # first check whether we have a valid search query:
     search_form = SearchForm.from_flat(request.values)
     if search_form.validate():
-        query = search_form['q'].value
-        return _search(query)
+        return _search(search_form, item_name)
     search_form['submit'].set_default() # XXX from_flat() kills all values
 
     flaskg.user.addTrail(item_name)
--- a/MoinMoin/config/default.py	Thu Aug 18 12:25:19 2011 +0530
+++ b/MoinMoin/config/default.py	Thu Aug 18 13:02:23 2011 +0530
@@ -403,8 +403,6 @@
     ('namespace_mapping', None,
     "This needs to point to a (correctly ordered!) list of tuples, each tuple containing: Namespace identifier, backend, acl protection to be applied to that backend. " + \
     "E.g.: [('/', FSBackend('wiki/data'), dict(default='All:read,write,create')), ]. Please see HelpOnStorageConfiguration for further reference."),
-    ('index_rebuild', True,
-     'rebuild item index from scratch (you may set this to False to speedup startup once you have an index)'),
     ('load_xml', None,
      'If this points to an xml file, the file is loaded into the storage backend(s) upon first request.'),
     ('save_xml', None,
--- a/MoinMoin/conftest.py	Thu Aug 18 12:25:19 2011 +0530
+++ b/MoinMoin/conftest.py	Thu Aug 18 13:02:23 2011 +0530
@@ -55,10 +55,9 @@
     return prev_app, prev_ctx, prev_cls
 
 def init_test_app(given_config):
-    namespace_mapping, router_index_uri = create_simple_mapping("memory:", given_config.content_acl)
+    namespace_mapping = create_simple_mapping("memory:", given_config.content_acl)
     more_config = dict(
         namespace_mapping=namespace_mapping,
-        router_index_uri=router_index_uri,
     )
     app = create_app_ext(flask_config_dict=dict(SECRET_KEY='foobarfoobar'),
                          moin_config_class=given_config,
--- a/MoinMoin/converter/moinwiki_out.py	Thu Aug 18 12:25:19 2011 +0530
+++ b/MoinMoin/converter/moinwiki_out.py	Thu Aug 18 13:02:23 2011 +0530
@@ -173,6 +173,10 @@
         params['accesskey'] = elem.get(xlink.accesskey, None)
         params = u','.join([u'%s=%s' % (p, params[p]) for p in params if params[p]])
 
+        # XXX: We don't have Iri support for now
+        from MoinMoin.util.iri import Iri
+        if isinstance(href, Iri):
+            href = unicode(href)
         # TODO: this can be done using one regex, can it?
         href = href.split(u'?')
         args = u''
@@ -289,6 +293,10 @@
     def open_moinpage_object(self, elem):
         # TODO: this can be done with one regex:
         href = elem.get(xlink.href, u'')
+        # XXX: We don't have Iri support for now
+        from MoinMoin.util.iri import Iri
+        if isinstance(href, Iri):
+            href = unicode(href)
         href = href.split(u'?')
         args = u''
         if len(href) > 1:
@@ -494,7 +502,7 @@
         if table_cellstyle:
             attrib.append(u'style="%s"' % table_cellstyle)
         if number_rows_spanned:
-            attrib.append(u'|'+number_rows_spanned)
+            attrib.append(u'|'+unicode(number_rows_spanned))
 
         attrib = u' '.join(attrib)
 
--- a/MoinMoin/items/__init__.py	Thu Aug 18 12:25:19 2011 +0530
+++ b/MoinMoin/items/__init__.py	Thu Aug 18 13:02:23 2011 +0530
@@ -172,7 +172,7 @@
         if rev_no is None:
             rev_no = -1
         if contenttype is None:
-            contenttype = 'application/x-nonexistent'
+            contenttype = u'application/x-nonexistent'
 
         try:
             if item is None:
@@ -188,7 +188,7 @@
             logging.debug("Got item: %r" % name)
             try:
                 rev = item.get_revision(rev_no)
-                contenttype = 'application/octet-stream' # it exists
+                contenttype = u'application/octet-stream' # it exists
             except NoSuchRevisionError:
                 try:
                     rev = item.get_revision(-1) # fall back to current revision
@@ -430,7 +430,7 @@
                 data = self.data_form_to_internal(data)
                 data = self.data_internal_to_storage(data)
                 # we know it is text and utf-8 - XXX is there a way to get the charset of the form?
-                contenttype_guessed = 'text/plain;charset=utf-8'
+                contenttype_guessed = u'text/plain;charset=utf-8'
         # data might be None here, if we have a form with just the data_file field, no file was uploaded
         # and no data_text field. this can happen if just metadata of a non-text item is edited.
 
--- a/MoinMoin/items/_tests/test_Item.py	Thu Aug 18 12:25:19 2011 +0530
+++ b/MoinMoin/items/_tests/test_Item.py	Thu Aug 18 13:02:23 2011 +0530
@@ -256,14 +256,14 @@
         creates a container and tests the content saved to the container
         """
         item_name = u'ContainerItem1'
-        item = Item.create(item_name, contenttype='application/x-tar')
+        item = Item.create(item_name, contenttype=u'application/x-tar')
         filecontent = 'abcdefghij'
         content_length = len(filecontent)
         members = set(['example1.txt', 'example2.txt'])
         item.put_member('example1.txt', filecontent, content_length, expected_members=members)
         item.put_member('example2.txt', filecontent, content_length, expected_members=members)
 
-        item = Item.create(item_name, contenttype='application/x-tar')
+        item = Item.create(item_name, contenttype=u'application/x-tar')
         tf_names = set(item.list_members())
         assert tf_names == members
         assert item.get_member('example1.txt').read() == filecontent
@@ -273,7 +273,7 @@
         creates two revisions of a container item
         """
         item_name = u'ContainerItem2'
-        item = Item.create(item_name, contenttype='application/x-tar')
+        item = Item.create(item_name, contenttype=u'application/x-tar')
         filecontent = 'abcdefghij'
         content_length = len(filecontent)
         members = set(['example1.txt'])
@@ -285,7 +285,7 @@
         item = flaskg.storage.get_item(item_name)
         assert item.next_revno == 2
 
-        item = Item.create(item_name, contenttype='application/x-tar')
+        item = Item.create(item_name, contenttype=u'application/x-tar')
         assert item.get_member('example1.txt').read() == filecontent
 
 class TestZipMixin(object):
--- a/MoinMoin/script/__init__.py	Thu Aug 18 12:25:19 2011 +0530
+++ b/MoinMoin/script/__init__.py	Thu Aug 18 13:02:23 2011 +0530
@@ -20,6 +20,8 @@
     manager.add_option('-c', '--config', dest='config', required=False, default=wiki_config)
     manager.add_command("moin", Server(host='127.0.0.1', port=8080))
 
+    from MoinMoin.script.maint.index import IndexOperations
+    manager.add_command("index", IndexOperations())
     from MoinMoin.script.account.create import Create_User
     manager.add_command("account_create", Create_User())
     from MoinMoin.script.account.disable import Disable_User
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/script/maint/index.py	Thu Aug 18 13:02:23 2011 +0530
@@ -0,0 +1,235 @@
+# Copyright: 2011 MoinMoin:MichaelMayorov
+# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
+
+"""
+MoinMoin - Manage whoosh indexes
+"""
+
+import os, datetime
+
+from flask import current_app as app
+from flask import g as flaskg
+from flaskext.script import Command, Option
+from whoosh.filedb.multiproc import MultiSegmentWriter
+from whoosh.index import open_dir, create_in, exists_in
+from whoosh.index import EmptyIndexError
+
+from MoinMoin.search.indexing import WhooshIndex
+from MoinMoin.config import MTIME, NAME, CONTENTTYPE
+from MoinMoin.error import FatalError
+from MoinMoin.storage.error import NoSuchItemError, NoSuchRevisionError
+from MoinMoin.util.mime import Type
+from MoinMoin.search.indexing import backend_to_index
+from MoinMoin.converter import convert_to_indexable
+
+from MoinMoin import log
+logging = log.getLogger(__name__)
+
+# Information about index and schema for latest and all revisions
+latest_indexname_schema = ("latest_revisions_index", "latest_revisions_schema")
+all_indexname_schema = ("all_revisions_index", "all_revisions_schema")
+both_indexnames_schemas = [latest_indexname_schema, all_indexname_schema]
+
+
+class IndexOperations(Command):
+    description = 'Build indexes'
+
+    option_list = (
+        Option('--for', required=True, dest='indexname', type=str, choices=("all-revs", "latest-revs", "both"),
+            help='For what type of indexes we will use action'),
+        Option('--action', required=True, dest='action', type=str, choices=("build", "update", "clean", "move", "show"),
+            help="""
+                  Action for given indexes:
+                  build -- Build in index_dir_tmp
+                  update -- Update in index_dir
+                  clean -- Clean index_dir
+                  move  -- Move index files from index_dir_tmp to index_dir
+                  show -- Show index contents for the given index.
+                 """
+               ),
+        Option('--procs', '-p', required=False, dest='procs', type=int, default=None,
+            help='Number of processors the writer will use.'),
+        Option('--limitmb', '-l', required=False, dest='limitmb', type=int, default=10,
+            help='Maximum memory (in megabytes) each index-writer will use for the indexing pool.'),
+                  )
+
+    def run(self, indexname, action, procs, limitmb):
+
+        def build_index(indexnames_schemas):
+            """
+            Building in app.cfg.index_dir_tmp
+            """
+            indexnames = [indexname for indexname, schema in indexnames_schemas]
+            with MultiSegmentWriter(all_rev_index, procs, limitmb) as all_rev_writer:
+                with MultiSegmentWriter(latest_rev_index, procs, limitmb) as latest_rev_writer:
+                    for item in backend.iter_items_noindex():
+                        try:
+                            rev_no = None
+                            if "all_revisions_index" in indexnames:
+                                for rev_no in item.list_revisions():
+                                    revision = item.get_revision(rev_no)
+                                    rev_content = convert_to_indexable(revision)
+                                    metadata = backend_to_index(revision, rev_no, all_rev_schema, rev_content, interwikiname)
+                                    all_rev_writer.add_document(**metadata)
+                            else:
+                                revision = item.get_revision(-1)
+                                rev_no = revision.revno
+                                rev_content = convert_to_indexable(revision)
+                        except NoSuchRevisionError: # item has no such revision
+                            continue
+                        # revision is now the latest revision of this item
+                        if "latest_revisions_index" in indexnames and rev_no:
+                            metadata = backend_to_index(revision, rev_no, latest_rev_schema, rev_content, interwikiname)
+                            latest_rev_writer.add_document(**metadata)
+
+        def update_index(indexnames_schemas):
+            """
+            Updating index in app.cfg.index_dir_tmp
+            """
+
+            indexnames = [indexname for indexname, schema in indexnames_schemas]
+            create_documents = []
+            delete_documents = []
+            latest_documents = []
+            for item in backend.iter_items_noindex():
+                backend_rev_list = item.list_revisions()
+                if not backend_rev_list: # If item hasn't revisions, skipping it
+                    continue
+                name = item.get_revision(-1)[NAME]
+                index_rev_list = item_index_revs(all_rev_searcher, name)
+                add_rev_nos = set(backend_rev_list) - set(index_rev_list)
+                if add_rev_nos:
+                    if "all_revisions_index" in indexnames:
+                        create_documents.append((item, add_rev_nos))
+                    if "latest_revisions_index" in indexnames:
+                        latest_documents.append((item, max(add_rev_nos))) # Add latest revision
+                remove_rev_nos = set(index_rev_list) - set(backend_rev_list)
+                if remove_rev_nos:
+                    if "all_revisions_index" in indexnames:
+                        delete_documents.append((item, remove_rev_nos))
+
+            if "latest_revisions_index" in indexnames and latest_documents:
+                with latest_rev_index.writer() as latest_rev_writer:
+                    for item, rev_no in latest_documents:
+                        revision = item.get_revision(rev_no)
+                        rev_content = convert_to_indexable(revision)
+                        converted_rev = backend_to_index(revision, rev_no, latest_rev_schema, rev_content, interwikiname)
+                        found = latest_rev_searcher.document(name_exact=item.name,
+                                                             wikiname=interwikiname
+                                                            )
+                        if not found:
+                            latest_rev_writer.add_document(**converted_rev)
+                        # Checking that last revision is the latest
+                        elif found["rev_no"] < converted_rev["rev_no"]:
+                            doc_number = latest_rev_searcher.document_number(name_exact=item.name, wikiname=interwikiname)
+                            latest_rev_writer.delete_document(doc_number)
+                            latest_rev_writer.add_document(**converted_rev)
+
+            if "all_revisions_index" in indexnames and delete_documents:
+                with all_rev_index.writer() as all_rev_writer:
+                    for item, rev_nos in delete_documents:
+                        for rev_no in rev_nos:
+                            doc_number = all_rev_searcher.document_number(rev_no=rev_no,
+                                                                          exact_name=item.name,
+                                                                          wikiname=interwikiname
+                                                                         )
+                            if doc_number:
+                                all_rev_writer.delete_document(doc_number)
+
+            if "all_revisions_index" in indexnames and create_documents:
+                with all_rev_index.writer() as all_rev_writer:
+                    for item, rev_nos in create_documents:
+                        for rev_no in rev_nos:
+                            revision = item.get_revision(rev_no)
+                            rev_content = convert_to_indexable(revision)
+                            converted_rev = backend_to_index(revision, rev_no, all_rev_schema, rev_content, interwikiname)
+                            all_rev_writer.add_document(**converted_rev)
+
+        def clean_index(indexnames_schemas):
+            """
+            Clean given index in app.cfg.index_dir
+            """
+            for indexname, schema in indexnames_schemas:
+                index_object.create_index(index_dir=app.cfg.index_dir,
+                                          indexname=indexname,
+                                          schema=schema
+                                         )
+
+        def move_index(indexnames_schemas):
+            """
+            Move given indexes from index_dir_tmp to index_dir
+            """
+            clean_index(indexnames_schemas)
+            for indexname, schema in indexnames_schemas:
+                if not exists_in(app.cfg.index_dir_tmp, indexname=indexname):
+                    raise FatalError(u"Can't find %s in %s" % (indexname, app.cfg.index_dir_tmp))
+                for filename in all_rev_index.storage.list():
+                    src_file = os.path.join(app.cfg.index_dir_tmp, filename)
+                    dst_file = os.path.join(app.cfg.index_dir, filename)
+                    if indexname in filename and os.path.exists(src_file):
+                        os.rename(src_file, dst_file)
+
+        def show_index(indexnames_schemas):
+            """
+            Print documents in given index to stdout
+            """
+
+            for indexname, schema in indexnames_schemas:
+                try:
+                    if indexname == "all_revisions_index":
+                        ix = open_dir(app.cfg.index_dir, indexname="all_revisions_index")
+                    elif indexname == "latest_revisions_index":
+                        ix = open_dir(app.cfg.index_dir, indexname="latest_revisions_index")
+                    print "*** Revisions in", indexname
+                    with ix.searcher() as searcher:
+                        for rev in searcher.all_stored_fields():
+                            name = rev.pop("name", u"")
+                            content = rev.pop("content", u"")
+                            for field, value in [("name", name), ] + sorted(rev.items()) + [("content", content), ]:
+                                print "%s: %s" % (field, repr(value)[:70])
+                            print "\n"
+                    ix.close()
+                except (IOError, OSError, EmptyIndexError) as err:
+                    raise FatalError("%s [Can not open %s index" % str(err), indexname)
+
+        def item_index_revs(searcher, name):
+            """
+            Return list of found documents for given name using index searcher
+            """
+
+            revs_found = searcher.documents(name_exact=name, wikiname=interwikiname)
+            return [rev["rev_no"] for rev in revs_found]
+
+        def do_action(action, indexnames_schemas):
+            if action == "build":
+                build_index(indexnames_schemas)
+            elif action == "update":
+                update_index(indexnames_schemas)
+            elif action == "clean":
+                clean_index(indexnames_schemas)
+            elif action == "move":
+                move_index(indexnames_schemas)
+            elif action == "show":
+                show_index(indexnames_schemas)
+
+        backend = flaskg.unprotected_storage = app.unprotected_storage
+        index_object = WhooshIndex(index_dir=app.cfg.index_dir_tmp)
+        interwikiname = app.cfg.interwikiname or u''
+        if os.path.samefile(app.cfg.index_dir_tmp, app.cfg.index_dir):
+            raise FatalError(u"cfg.index_dir and cfg.index_dir_tmp must point to different directories.")
+
+        latest_rev_index = index_object.latest_revisions_index
+        all_rev_index = index_object.all_revisions_index
+
+        latest_rev_schema = latest_rev_index.schema
+        all_rev_schema = all_rev_index.schema
+
+        latest_rev_searcher = latest_rev_index.searcher()
+        all_rev_searcher = all_rev_index.searcher()
+
+        if indexname == "both":
+            do_action(action, both_indexnames_schemas)
+        elif indexname == "all-revs":
+            do_action(action, (all_indexname_schema, ))
+        elif indexname == "latest-revs":
+            do_action(action, (latest_indexname_schema, ))
--- a/MoinMoin/script/maint/xml.py	Thu Aug 18 12:25:19 2011 +0530
+++ b/MoinMoin/script/maint/xml.py	Thu Aug 18 13:02:23 2011 +0530
@@ -64,11 +64,9 @@
         if moin19data:
             # this is for backend migration scenario from moin 1.9
             from MoinMoin.storage.backends import create_simple_mapping, router
-            namespace_mapping, router_index_uri = \
-                create_simple_mapping(backend_uri='fs19:%s' % moin19data)
+            namespace_mapping = create_simple_mapping(backend_uri='fs19:%s' % moin19data)
             storage = router.RouterBackend(
-                    [(ns, be) for ns, be, acls in namespace_mapping],
-                    index_uri=router_index_uri)
+                    [(ns, be) for ns, be, acls in namespace_mapping], cfg=app.cfg)
         else:
             # this deals with the normal storage
             storage = app.unprotected_storage
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/search/__init__.py	Thu Aug 18 13:02:23 2011 +0530
@@ -0,0 +1,7 @@
+# Copyright: 2011 MoinMoin:MichaelMayorov
+# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
+
+"""
+MoinMoin - MoinMoin search package
+"""
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/search/_tests/test_analyzers.py	Thu Aug 18 13:02:23 2011 +0530
@@ -0,0 +1,163 @@
+# Copyright: 2011 MoinMoin:MichaelMayorov
+# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
+
+"""
+ MoinMoin - MoinMoin.search.analyzers Tests
+"""
+
+
+import py
+from flask import current_app as app
+
+from MoinMoin.search.analyzers import *
+
+
+class TokenizerTestBase(object):
+
+    def testTokenizer(self):
+        """ analyzers: check what obtained tokens matched given """
+        tokenizer = self.make_tokenizer()
+        for value, expected_tokens in self.test_cases_query:
+            tokens = [token.text for token in tokenizer(value)]
+            assert set(expected_tokens) == set(tokens)
+
+
+class TestAclTokenizer(TokenizerTestBase):
+    """ analyzers: test ACL tokenizer """
+
+    test_cases_query = [
+        # (query, tokens)
+        (u'-MinusGuy:read', [u'MinusGuy:-read']),
+        (u'+PlusGuy:read', [u'PlusGuy:+read']),
+        (u'Admin3:read,write,admin',
+            [
+             u'Admin3:+read',
+             u'Admin3:+write',
+             u'Admin3:-create',
+             u'Admin3:+admin',
+             u'Admin3:-destroy',
+            ]
+        ),
+        (u'Admin1,Admin2:read,write,admin',
+            [
+             u'Admin1:+read',
+             u'Admin1:+write',
+             u'Admin1:-create',
+             u'Admin1:+admin',
+             u'Admin1:-destroy',
+             u'Admin2:+read',
+             u'Admin2:+write',
+             u'Admin2:-create',
+             u'Admin2:+admin',
+             u'Admin2:-destroy',
+            ]
+        ),
+        (u'JoeDoe:read,write',
+            [
+             u'JoeDoe:+read',
+             u'JoeDoe:+write',
+             u'JoeDoe:-create',
+             u'JoeDoe:-admin',
+             u'JoeDoe:-destroy',
+            ]
+        ),
+        (u'name with spaces,another one:read,write',
+            [
+             u'name with spaces:+read',
+             u'name with spaces:+write',
+             u'name with spaces:-create',
+             u'name with spaces:-admin',
+             u'name with spaces:-destroy',
+             u'another one:+read',
+             u'another one:+write',
+             u'another one:-create',
+             u'another one:-admin',
+             u'another one:-destroy',
+            ]
+        ),
+        (u'CamelCase,extended name:read,write',
+            [
+             u'CamelCase:+read',
+             u'CamelCase:+write',
+             u'CamelCase:-create',
+             u'CamelCase:-admin',
+             u'CamelCase:-destroy',
+             u'extended name:+read',
+             u'extended name:+write',
+             u'extended name:-create',
+             u'extended name:-admin',
+             u'extended name:-destroy',
+            ]
+        ),
+        (u'BadGuy:',
+            [
+             u'BadGuy:-read',
+             u'BadGuy:-write',
+             u'BadGuy:-create',
+             u'BadGuy:-admin',
+             u'BadGuy:-destroy',
+            ]
+        ),
+        (u'All:read',
+            [
+             u'All:+read',
+             u'All:-write',
+             u'All:-create',
+             u'All:-admin',
+             u'All:-destroy',
+            ]
+        )
+    ]
+
+    def make_tokenizer(self):
+        return AclTokenizer(app.cfg)
+
+
+class TestMimeTokenizer(TokenizerTestBase):
+    """ analyzers: test content type analyzer """
+
+
+    test_cases_query = [
+                  # (query, tokens)
+                  (u'text/plain', [u'text', u'plain']),
+                  (u'text/plain;charset=utf-8', [u'text', u'plain', u'charset=utf-8']),
+                  (u'text/html;value1=foo;value2=bar',
+                   [u'text', u'html', u'value1=foo', u'value2=bar'],
+                  ),
+                  (u'text/html;value1=foo;value1=bar', [u'text', u'html', u'value1=bar'])
+                 ]
+
+    def make_tokenizer(self):
+        return MimeTokenizer()
+
+
+class TestItemNameAnalyzer(TokenizerTestBase):
+    """ analyzers: test item_name analyzer """
+
+    test_cases_query = [
+                  # (query, tokens)
+                  (u'wifi', [u'wifi']),
+                  (u'WiFi', [u'wi', u'fi']),
+                  (u'Wi-Fi', [u'wi', u'fi']),
+                  (u'some item name', [u'some', u'item', u'name']),
+                  (u'SomeItem/SubItem', [u'some', u'item', u'sub', u'item']),
+                  (u'GSOC2011', [u'gsoc', u'2011'])
+                 ]
+
+    test_cases_index = [(u'some item name', [u'some', u'item', u'name']),
+                        (u'SomeItem/SubItem', [u'some', u'item', u'sub', u'item', u'someitemsubitem']),
+                        (u'GSOC2011', [u'gsoc', u'2011'])
+                       ]
+
+    def make_tokenizer(self):
+        return item_name_analyzer()
+
+    def testTokenizer(self):
+        """ analyzers: test item name analyzer with "query" and "index" mode """
+        tokenizer = self.make_tokenizer()
+        for value, expected_tokens in self.test_cases_query:
+            tokens = [token.text for token in tokenizer(value, mode="query")]
+            assert set(expected_tokens) == set(tokens)
+        for value, expected_tokens in self.test_cases_index:
+            tokens = [token.text for token in tokenizer(value, mode="index")]
+            assert set(expected_tokens) == set(tokens)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/search/_tests/test_indexing.py	Thu Aug 18 13:02:23 2011 +0530
@@ -0,0 +1,155 @@
+# Copyright: 2011 MoinMoin:MichaelMayorov
+# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
+
+"""
+MoinMoin - MoinMoin.search.indexing Tests
+"""
+
+
+import py
+
+import shutil, tempfile
+from datetime import datetime
+
+from whoosh.qparser import QueryParser
+
+from MoinMoin import log
+from MoinMoin.search.indexing import WhooshIndex
+
+# Documents what will be added to index
+docs = {
+        u"Document One": [
+                         {
+                          "wikiname": u"Test",
+                          "name": u"Document One",
+                          "uuid": u"68054804bd7141609b7c441143adf83d",
+                          "rev_no": 0,
+                          "mtime":  datetime(2011, 6, 10, 2, 17, 5),
+                          "content": u"Wi-Fi",
+                          "contenttype": u"text/plain;charset=utf-8",
+                          "tags": [u"Rest", u"in", u"peace"],
+                          "itemlinks": [u"Home", u"Find"],
+                          "itemtransclusions": [u"Another", u"Stuff"],
+                          "acl": u"JoeDoe:read,write",
+                          "language": u"en",
+                          "userid": u"1307875904.23.55111",
+                          "address": u"127.0.0.1",
+                          "hostname": u"localhost",
+                        },
+                        {
+                          "wikiname": u"Test",
+                          "name": u"Document One",
+                          "uuid": u"68054804bd7141609b7c441143adf83d",
+                          "rev_no": 1,
+                          "mtime":  datetime(2011, 6, 12, 2, 17, 5),
+                          "content": u"Mo in Moin",
+                          "contenttype": u"text/plain;charset=utf-8",
+                          "tags": [u"first_tag", u"second_tag"],
+                          "itemlinks": [u"Home", u"Find"],
+                          "itemtransclusions": [u"Another", u"Stuff"],
+                          "acl": u"JoeDoe:read,write",
+                          "language": u"en",
+                          "address": u"195.54.14.254",
+                          "hostname": u"kb.csu.ru",
+                        },
+                       ],
+        u"Document Two": [
+                         {
+                          "wikiname": u"Test",
+                          "name": u"Document Two",
+                          "uuid": u"12354804bd7141609b7c441143adf83d",
+                          "rev_no": 0,
+                          "mtime":  datetime(2011, 6, 10, 1, 17, 5),
+                          "content": u"Hello document one",
+                          "contenttype": u"text/plain;charset=utf-8",
+                          "tags": [u"first_tag", u"tag"],
+                          "itemlinks": [u"Home", u"Find"],
+                          "itemtransclusions": [u"Another"],
+                          "acl": u"User:-write",
+                          "language": u"en",
+                          "userid": u"1307875904.23.55111",
+                          "address": u"123.213.132.231",
+                         },
+                         {
+                          "wikiname": u"Test",
+                          "name": u"Document Two",
+                          "uuid": u"12354804bd7141609b7c441143adf83d",
+                          "rev_no": 1,
+                          "mtime":  datetime(2011, 6, 12, 2, 20, 5),
+                          "content": u"Hello document two",
+                          "contenttype": u"text/plain;charset=utf-8",
+                          "tags": [u"tag", u"second_tag"],
+                          "itemlinks": [u"Home", u"Find"],
+                          "itemtransclusions": [u"Another"],
+                          "acl": u"User:read,write,admin",
+                          "language": u"en",
+                          "address": u"123.213.132.231",
+                         },
+                        ]
+       }
+
+# (field_name, search_string, expected_result_count_for_latest, excpected_result_count_for_all)
+queries = [
+           (u"wikiname", u"Test", 2, 4),
+           (u"name", u"Document", 2, 4),
+           (u"uuid", u"68054804bd7141609b7c441143adf83d", 1, 2),
+           (u"rev_no", u"1", 2, 2),
+           (u"content", u"moin", 1, 1),
+           (u"contenttype", u"text/plain", 2, 4),
+           (u"tags", u"first_tag", 1, 2),
+           (u"itemlinks", u"Home", 2, None),
+           (u"itemtransclusions", u"Stuff", 1, None),
+           (u"acl", u"JoeDoe:+read", 1, None),
+           (u"acl", u"JoeDoe:+write", 1, None),
+           (u"language", u"en", 2, 4),
+           (u"userid", u"1307875904.23.55111", 0, 2),
+           (u"address", u"127.0.0.1", 0, 1),
+           (u"hostname", u"kb.csu.ru", 1, 1),
+          ]
+
+
+class TestWhooshIndex(object):
+
+    queries = []
+
+    def setup_method(self, method):
+        """ indexing: create temporary directory with indexes """
+
+        self.index_dir = tempfile.mkdtemp('', 'moin-')
+
+    def teardown_method(self, method):
+        """ indexing: delete temporary directory """
+
+        shutil.rmtree(self.index_dir)
+
+    def testIndexSchema(self):
+        """
+        indexing: create temporary directory with indexes, add documents from
+        "docs" to indexes, and check results using "queries"
+        """
+
+        index_object = WhooshIndex(index_dir=self.index_dir)
+        latest_revs_index = index_object.latest_revisions_index
+        all_revs_index = index_object.all_revisions_index
+
+        # Add docs to indexes
+        with all_revs_index.writer() as all_revs_writer:
+            for item_name, documents in docs.items():
+                for document in documents:
+                    with latest_revs_index.writer() as latest_revs_writer:
+                        latest_revs_writer.update_document(**document)
+                    all_revs_names = all_revs_index.schema.names()
+                    all_revs_doc = dict([(key, value)
+                                         for key, value in document.items()
+                                         if key in all_revs_names])
+
+                    all_revs_writer.add_document(**all_revs_doc)
+
+       # Check that all docs were added successfully
+        with latest_revs_index.searcher() as latest_revs_searcher:
+            with all_revs_index.searcher() as all_revs_searcher:
+                for field_name, query, latest_res_len, all_res_len in queries:
+                    query = QueryParser(field_name, latest_revs_index.schema).parse(query)
+                    assert len(latest_revs_searcher.search(query)) == latest_res_len
+                    if field_name in all_revs_index.schema.names():
+                        assert len(all_revs_searcher.search(query)) == all_res_len
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/search/analyzers.py	Thu Aug 18 13:02:23 2011 +0530
@@ -0,0 +1,127 @@
+# Copyright: 2011 MoinMoin:MichaelMayorov
+# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
+
+"""
+MoinMoin - Misc. tokenizers and analyzers for whoosh indexing
+"""
+
+from whoosh.analysis import MultiFilter, IntraWordFilter, LowercaseFilter
+from whoosh.analysis import Tokenizer, Token, RegexTokenizer
+
+from MoinMoin.util.mime import Type
+from MoinMoin.security import AccessControlList
+
+
+class MimeTokenizer(Tokenizer):
+    """ Content type tokenizer """
+
+    def __call__(self, value, start_pos=0, positions=False, **kwargs):
+        """
+        Tokenizer behaviour:
+
+        Input: u"text/x.moin.wiki;charset=utf-8"
+        Output: u"text", u"x.moin.wiki", u"charset=utf-8"
+
+        Input: u"application/pdf"
+        Output: u"application", u"pdf"
+
+        :param value: String for tokenization
+        :param start_pos: The position number of the first token. For example,
+            if you set start_pos=2, the tokens will be numbered 2,3,4,...
+            instead of 0,1,2,...
+        :param positions: Whether to record token positions in the token.
+        """
+        assert isinstance(value, unicode), "%r is not unicode" % value
+        if u'/' not in value: # Add '/' if user forgot do this
+            value += u'/'
+        pos = start_pos
+        tk = Token()
+        tp = Type(value)
+        tk.text = tp.type
+        if positions:
+            tk.pos = pos
+            pos += 1
+        yield tk
+        if tp.subtype is not None:
+            tk.text = tp.subtype
+            if positions:
+                tk.pos = pos
+                pos += 1
+            yield tk
+        for key, value in tp.parameters.items():
+            tk.text = u"%s=%s" % (key, value)
+            if positions:
+                tk.pos = pos
+                pos += 1
+            yield tk
+
+
+class AclTokenizer(Tokenizer):
+    """ Access control list tokenizer """
+
+    def __init__(self, cfg):
+        """
+        :param cfg: wiki config
+        """
+        self._acl_rights_contents = cfg.acl_rights_contents
+
+    def __call__(self, value, start_pos=0, positions=False, mode=u'', **kwargs):
+        """
+        Calls AccessControlList for tokenization
+
+        Analyzer behaviour:
+
+        In index mode:
+            Input: u"JoeDoe,JaneDoe:admin,read,write,destroy +EditorGroup:write All:read"
+
+            Output: "u'JoeDoe:+read', u'JoeDoe:+write', u'JoeDoe:-create', u'JoeDoe:+admin',
+                     u'JoeDoe:+destroy', u'JaneDoe:+read', u'JaneDoe:+write', u'JaneDoe:-create',
+                     u'JaneDoe:+admin', u'JaneDoe:+destroy', u'EditorGroup:+write', u'All:+read',
+                     u'All:-write', u'All:-create', u'All:-admin', u'All:-destroy'
+
+        In query mode:
+            Input: u"JoeDoe:+write"
+
+            Output: u"JoeDoe:+write"
+
+        :param value: unicode string
+        :param positions: Whether to record token positions in the token.
+        :param start_pos: The position number of the first token. For example,
+            if you set start_pos=2, the tokens will be numbered 2,3,4,...
+            instead of 0,1,2,...
+        """
+        assert isinstance(value, unicode)
+        pos = start_pos
+        tk = Token()
+        tk.mode = mode
+        if mode == "query":
+            tk.text = value
+            if positions:
+                tk.pos = pos
+            yield tk
+        else:
+            acl = AccessControlList([value], valid=self._acl_rights_contents)
+            for name, permissions in acl.acl:
+                for permission in permissions:
+                    sign = "+" if permissions[permission] else "-"
+                    tk.text = u"%s:%s%s" % (name, sign, permission)
+                    if positions:
+                        tk.pos = pos
+                        pos += 1
+                    yield tk
+
+
+def item_name_analyzer():
+    """
+    Analyzer behaviour:
+
+    Input: u"some item name", u"SomeItem/SubItem", u"GSOC2011"
+
+    Output: u"some", u"item", u"name"; u"Some", u"Item", u"Sub", u"Item"; u"GSOC", u"2011"
+    """
+    iwf = MultiFilter(index=IntraWordFilter(mergewords=True, mergenums=True),
+                      query=IntraWordFilter(mergewords=False, mergenums=False)
+                     )
+    analyzer = RegexTokenizer(r"\S+") | iwf | LowercaseFilter()
+    return analyzer
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/search/indexing.py	Thu Aug 18 13:02:23 2011 +0530
@@ -0,0 +1,150 @@
+# Copyright: 2011 MoinMoin:MichaelMayorov
+# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
+
+"""
+MoinMoin - Whoosh index schemas / index managment
+"""
+
+import os
+import datetime
+
+from flask import current_app as app
+
+from whoosh.fields import Schema, TEXT, ID, IDLIST, NUMERIC, DATETIME, KEYWORD, BOOLEAN
+from whoosh.index import open_dir, create_in, EmptyIndexError
+
+from MoinMoin.config import MTIME, NAME
+from MoinMoin.search.analyzers import *
+from MoinMoin.error import FatalError
+
+from MoinMoin import log
+logging = log.getLogger(__name__)
+
+
+def backend_to_index(backend_rev, rev_no, schema, content, wikiname=u''):
+    """
+    Convert fields from backend format to whoosh schema
+
+    :param backend_rev: MoinMoin backend revision
+    :param rev_no: Revision number
+    :param schema_fields: list with whoosh schema fields
+    :returns: document to put into whoosh index
+    """
+
+    doc = dict([(str(key), value)
+                for key, value in backend_rev.items()
+                if key in schema])
+    doc[MTIME] = datetime.datetime.fromtimestamp(backend_rev[MTIME])
+    doc["name_exact"] = backend_rev[NAME]
+    doc["rev_no"] = rev_no
+    doc["wikiname"] = wikiname
+    doc["content"] = content
+    return doc
+
+
+class WhooshIndex(object):
+    """
+    Managing whoosh indexes
+    """
+
+    # Index names, schemas
+    _indexes = {'latest_revisions_index': 'latest_revisions_schema',
+                'all_revisions_index': 'all_revisions_schema',
+               }
+
+    def __init__(self, index_dir=None, cfg=None):
+        """
+        Create and open indexes in index_dir
+
+        :param index_dir: Directory where whoosh indexes will be created, default None
+        :param cfg: Application config (app.cfg), default None
+        """
+        self._cfg = cfg or app.cfg
+        self._index_dir = index_dir or self._cfg.index_dir
+
+        common_fields = dict(
+            wikiname=ID(stored=True),
+            name=TEXT(stored=True, multitoken_query="and", analyzer=item_name_analyzer(), field_boost=2.0),
+            name_exact=ID(field_boost=3.0),
+            rev_no=NUMERIC(stored=True),
+            mtime=DATETIME(stored=True),
+            contenttype=TEXT(stored=True, multitoken_query="and", analyzer=MimeTokenizer()),
+            tags=ID(stored=True),
+            language=ID(stored=True),
+            userid=ID(stored=True),
+            address=ID(stored=True),
+            hostname=ID(stored=True),
+            content=TEXT(stored=True, multitoken_query="and"),
+        )
+
+        self.latest_revisions_schema = Schema(uuid=ID(unique=True, stored=True),
+                                              itemlinks=ID(stored=True),
+                                              itemtransclusions=ID(stored=True),
+                                              acl=TEXT(analyzer=AclTokenizer(self._cfg), multitoken_query="and", stored=True),
+                                              **common_fields)
+
+        self.all_revisions_schema = Schema(uuid=ID(stored=True),
+                                           **common_fields)
+
+        # Define dynamic fields
+        dynamic_fields = [("*_id", ID),
+                          ("*_text", TEXT),
+                          ("*_keyword", KEYWORD),
+                          ("*_numeric", NUMERIC),
+                          ("*_datetime", DATETIME),
+                          ("*_boolean", BOOLEAN)
+                         ]
+
+        # Adding dynamic fields to schemas
+        for glob, field_type in dynamic_fields:
+            self.latest_revisions_schema.add(glob, field_type, glob=True)
+            self.all_revisions_schema.add(glob, field_type, glob=True)
+
+        for index_name, index_schema in self._indexes.items():
+            self.open_index(index_name, index_schema, create=True, index_dir=self._index_dir)
+
+    def open_index(self, indexname, schema, create=False, index_dir=None):
+        """
+        Open index <indexname> in <index_dir>. if opening fails and <create>
+        is True, try creating the index and retry opening it afterwards.
+        return index object.
+
+        :param index_dir: Directory where whoosh indexes will be created
+        :param indexname: Name of created index
+        :param schema: which schema applies
+        """
+        index_dir = index_dir or self._cfg.index_dir
+        try:
+            index = open_dir(index_dir, indexname=indexname)
+            setattr(self, indexname, index)
+        except (IOError, OSError, EmptyIndexError) as err:
+            if create:
+                self.create_index(index_dir, indexname, schema)
+                try:
+                    index = open_dir(index_dir, indexname=indexname)
+                    setattr(self, indexname, index)
+                except:
+                    # if we get here, it failed without recovery
+                    raise FatalError("can't open whoosh index")
+            else:
+                raise FatalError("can't open whoosh index")
+
+    def create_index(self, index_dir, indexname, schema):
+        """
+        Create <indexname> in <index_dir>
+
+        :param index_dir: Directory where whoosh indexes will be created
+        :param indexname: Name of created index
+        :param schema: which schema applies
+        """
+        try:
+            os.mkdir(index_dir)
+        except:
+            # ignore exception, we'll get another exception below
+            # in case there are problems with the index_dir
+            pass
+        try:
+            create_in(index_dir, getattr(self, schema), indexname=indexname)
+        except (IOError, OSError) as err:
+            logging.error(u"%s [while trying to create index '%s' in '%s']" % (str(err), indexname, index_dir))
+
--- a/MoinMoin/storage/__init__.py	Thu Aug 18 12:25:19 2011 +0530
+++ b/MoinMoin/storage/__init__.py	Thu Aug 18 13:02:23 2011 +0530
@@ -165,34 +165,6 @@
         """
         raise NotImplementedError()
 
-    def history(self, reverse=True):
-        """
-        Returns an iterator over ALL revisions of ALL items stored in the backend.
-
-        If reverse is True (default), give history in reverse revision timestamp
-        order, otherwise in revision timestamp order.
-
-        Note: some functionality (e.g. completely cloning one storage into
-              another) requires that the iterator goes over really every
-              revision we have.
-
-        :type reverse: bool
-        :param reverse: Indicate whether the iterator should go in reverse order.
-        :rtype: iterator of revision objects
-        """
-        # generic and slow history implementation
-        revs = []
-        for item in self.iteritems():
-            for revno in item.list_revisions():
-                rev = item.get_revision(revno)
-                revs.append((rev.timestamp, rev.revno, item.name, ))
-        revs.sort() # from oldest to newest
-        if reverse:
-            revs.reverse()
-        for ts, revno, name in revs:
-            item = self.get_item(name)
-            yield item.get_revision(revno)
-
     def _get_revision(self, item, revno):
         """
         For a given item and revision number, return the corresponding revision
--- a/MoinMoin/storage/_tests/test_backends.py	Thu Aug 18 12:25:19 2011 +0530
+++ b/MoinMoin/storage/_tests/test_backends.py	Thu Aug 18 13:02:23 2011 +0530
@@ -590,9 +590,6 @@
         rev = item.get_revision(0)
         assert rev[SIZE] == 8
 
-        for nrev in self.backend.history():
-            assert nrev[SIZE] == 8
-
     def test_size_2(self):
         item = self.backend.create_item(u'size2')
         rev0 = item.create_revision(0)
@@ -620,57 +617,6 @@
                                        ('1', 1, 1L, 1+0j, (1, ), ), u'ąłć', (u'ó', u'żźć'), )):
             yield test_value, value, no
 
-    def test_history(self):
-        order = [(u'first', 0, ), (u'second', 0, ), (u'first', 1, ), (u'a', 0), (u'child/my_subitem', 0) ]
-        for name, revno in order:
-            if revno == 0:
-                item = self.backend.create_item(name)
-            else:
-                item = self.backend.get_item(name)
-            item.create_revision(revno)
-            item.commit()
-
-            from MoinMoin.storage.backends import router, acl
-            if isinstance(self.backend, (router.RouterBackend, acl.AclWrapperBackend)):
-                # Revisions are created too fast for the rev's timestamp's granularity.
-                # This only affects the RouterBackend because there several different
-                # backends are used and no means for storing simultaneously created revs
-                # in the correct order exists between backends. It affects AclWrapperBackend
-                # tests as well because those use a RouterBackend internally for real-world-likeness.
-
-                # XXX XXX
-                # You may have realized that all the items above belong to the same backend so this shouldn't actually matter.
-                # It does matter, however, once you consider that the RouterBackend uses the generic, slow history implementation.
-                # This one uses iteritems and then sorts all the revisions itself, hence discarding any information of ordering
-                # for simultaneously created revisions. If we just call history of that single backend directly, it works without
-                # time.sleep. For n backends, however, you'd have to somehow merge the revisions into one generator again, thus
-                # discarding that information again. Besides, that would be a costly operation. The ordering for simultaneosly
-                # created revisions remains the same since it's based on tuple ordering. Better proposals welcome.
-                import time
-                time.sleep(1)
-
-        for num, rev in enumerate(self.backend.history(reverse=False)):
-            name, revno = order[num]
-            assert rev.item.name == name
-            assert rev.revno == revno
-
-        order.reverse()
-        for num, rev in enumerate(self.backend.history()):
-            name, revno = order[num]
-            assert rev.item.name == name
-            assert rev.revno == revno
-
-    # See history function in indexing.py for comments on why this test fails.
-    @pytest.mark.xfail
-    def test_history_size_after_rename(self):
-        item = self.backend.create_item(u'first')
-        item.create_revision(0)
-        item.commit()
-        item.rename(u'second')
-        item.create_revision(1)
-        item.commit()
-        assert len([rev for rev in self.backend.history()]) == 2
-
     def test_destroy_item(self):
         itemname = u"I will be completely destroyed"
         rev_data = "I will be completely destroyed, too, hopefully"
@@ -683,14 +629,6 @@
         assert not self.backend.has_item(itemname)
         item_names = [item.name for item in self.backend.iteritems()]
         assert not itemname in item_names
-        all_rev_data = [rev.read() for rev in self.backend.history()]
-        assert not rev_data in all_rev_data
-
-        for rev in self.backend.history():
-            assert not rev.item.name == itemname
-        for rev in self.backend.history(reverse=False):
-            assert not rev.item.name == itemname
-
 
     def test_destroy_revision(self):
         itemname = u"I will see my children die :-("
@@ -726,9 +664,6 @@
         assert last_data != third
         assert last_data == persistent_rev
 
-        for rev in self.backend.history():
-            assert not (rev.item.name == itemname and rev.revno == 2)
-
     def test_clone_backend(self):
         src = flaskg.storage
         dst = memory.MemoryBackend()
@@ -749,7 +684,6 @@
         dst.clone(src, verbose=False)
 
         assert len(list(dst.iteritems())) == 2
-        assert len(list(dst.history())) == 1
         assert dst.has_item(dollys_name)
         rev = dst.get_item(dollys_name).get_revision(0)
         data = rev.read()
@@ -793,17 +727,3 @@
         item.destroy()
         assert len([item for item in self.backend.iteritems()]) == 0
 
-    def test_history_item_names(self):
-        item = self.backend.create_item(u'first')
-        item.create_revision(0)
-        item.commit()
-        item.rename(u'second')
-        item.create_revision(1)
-        item.commit()
-        revs_in_create_order = [rev for rev in self.backend.history(reverse=False)]
-        assert revs_in_create_order[0].revno == 0
-        assert revs_in_create_order[0].item.name == u'second'
-        assert revs_in_create_order[1].revno == 1
-        assert revs_in_create_order[1].item.name == u'second'
-
-
--- a/MoinMoin/storage/_tests/test_backends_fs2.py	Thu Aug 18 12:25:19 2011 +0530
+++ b/MoinMoin/storage/_tests/test_backends_fs2.py	Thu Aug 18 13:02:23 2011 +0530
@@ -5,8 +5,9 @@
     MoinMoin - Test - FS2Backend
 """
 
+import py, os, tempfile, shutil
 
-import py, os, tempfile, shutil
+from flask import current_app as app
 
 from MoinMoin.storage._tests.test_backends import BackendTest
 from MoinMoin.storage.backends.fs2 import FS2Backend
--- a/MoinMoin/storage/_tests/test_backends_router.py	Thu Aug 18 12:25:19 2011 +0530
+++ b/MoinMoin/storage/_tests/test_backends_router.py	Thu Aug 18 13:02:23 2011 +0530
@@ -29,19 +29,19 @@
         self.child = MemoryBackend()
         self.other = MemoryBackend()
         self.mapping = [('child', self.child), ('other/', self.other), (self.ns_user_profile, self.users), ('/', self.root)]
-        return RouterBackend(self.mapping, index_uri='sqlite://')
+        return RouterBackend(self.mapping, cfg=app.cfg)
 
     def kill_backend(self):
         pass
 
 
     def test_correct_backend(self):
-        mymap = {'rootitem': self.root,         # == /rootitem
-                 'child/joe': self.child,       # Direct child of namespace.
-                 'other/jane': self.other,      # Direct child of namespace.
-                 'child/': self.child,          # Root of namespace itself (!= root)
-                 'other/': self.other,          # Root of namespace
-                 '': self.root,                 # Due to lack of any namespace info
+        mymap = {u'rootitem': self.root,         # == /rootitem
+                 u'child/joe': self.child,       # Direct child of namespace.
+                 u'other/jane': self.other,      # Direct child of namespace.
+                 u'child/': self.child,          # Root of namespace itself (!= root)
+                 u'other/': self.other,          # Root of namespace
+                 u'': self.root,                 # Due to lack of any namespace info
                 }
 
         assert not (self.root is self.child is self.other)
@@ -65,12 +65,12 @@
         assert item.name == itemname
 
     def test_traversal(self):
-        mymap = {'rootitem': self.root,         # == /rootitem
-                 'child/joe': self.child,       # Direct child of namespace.
-                 'other/jane': self.other,      # Direct child of namespace.
-                 'child/': self.child,          # Root of namespace itself (!= root)
-                 'other/': self.other,          # Root of namespace
-                 '': self.root,                 # Due to lack of any namespace info
+        mymap = {u'rootitem': self.root,         # == /rootitem
+                 u'child/joe': self.child,       # Direct child of namespace.
+                 u'other/jane': self.other,      # Direct child of namespace.
+                 u'child/': self.child,          # Root of namespace itself (!= root)
+                 u'other/': self.other,          # Root of namespace
+                 u'': self.root,                 # Due to lack of any namespace info
                 }
 
         items_in = []
@@ -131,3 +131,103 @@
         assert backend is self.child
         assert name == ''
         assert mountpoint == 'child'
+
+
+    def test_history(self):
+        order = [(u'first', 0, ), (u'second', 0, ), (u'first', 1, ), (u'a', 0), (u'child/my_subitem', 0) ]
+        for name, revno in order:
+            if revno == 0:
+                item = self.backend.create_item(name)
+            else:
+                item = self.backend.get_item(name)
+            item.create_revision(revno)
+            item.commit()
+
+            # Revisions are created too fast for the rev's timestamp's granularity.
+            # This only affects the RouterBackend because there several different
+            # backends are used and no means for storing simultaneously created revs
+            # in the correct order exists between backends. It affects AclWrapperBackend
+            # tests as well because those use a RouterBackend internally for real-world-likeness.
+
+            # XXX XXX
+            # You may have realized that all the items above belong to the same backend so this shouldn't actually matter.
+            # It does matter, however, once you consider that the RouterBackend uses the generic, slow history implementation.
+            # This one uses iteritems and then sorts all the revisions itself, hence discarding any information of ordering
+            # for simultaneously created revisions. If we just call history of that single backend directly, it works without
+            # time.sleep. For n backends, however, you'd have to somehow merge the revisions into one generator again, thus
+            # discarding that information again. Besides, that would be a costly operation. The ordering for simultaneosly
+            # created revisions remains the same since it's based on tuple ordering. Better proposals welcome.
+            import time
+            time.sleep(1)
+
+        for num, rev in enumerate(self.backend.history(reverse=False)):
+            name, revno = order[num]
+            assert rev.item.name == name
+            assert rev.revno == revno
+
+        order.reverse()
+        for num, rev in enumerate(self.backend.history(reverse=True)):
+            name, revno = order[num]
+            assert rev.item.name == name
+            assert rev.revno == revno
+
+    # See history function in indexing.py for comments on why this test fails.
+    @py.test.mark.xfail
+    def test_history_size_after_rename(self):
+        item = self.backend.create_item(u'first')
+        item.create_revision(0)
+        item.commit()
+        item.rename(u'second')
+        item.create_revision(1)
+        item.commit()
+        assert len([rev for rev in self.backend.history()]) == 2
+
+    def test_history_after_destroy_item(self):
+        itemname = u"I will be completely destroyed"
+        rev_data = "I will be completely destroyed, too, hopefully"
+        item = self.backend.create_item(itemname)
+        rev = item.create_revision(0)
+        rev.write(rev_data)
+        item.commit()
+
+        item.destroy()
+
+        all_rev_data = [rev.read() for rev in self.backend.history()]
+        assert not rev_data in all_rev_data
+
+        for rev in self.backend.history():
+            assert not rev.item.name == itemname
+        for rev in self.backend.history(reverse=False):
+            assert not rev.item.name == itemname
+
+    def test_history_after_destroy_revision(self):
+        itemname = u"I will see my children die :-("
+        rev_data = "I will die!"
+        persistent_rev = "I will see my sibling die :-("
+        item = self.backend.create_item(itemname)
+        rev = item.create_revision(0)
+        rev.write(rev_data)
+        item.commit()
+        rev = item.create_revision(1)
+        rev.write(persistent_rev)
+        item.commit()
+
+        rev = item.get_revision(0)
+        rev.destroy()
+
+        for rev in self.backend.history():
+            assert not (rev.item.name == itemname and rev.revno == 0)
+
+    def test_history_item_names(self):
+        item = self.backend.create_item(u'first')
+        item.create_revision(0)
+        item.commit()
+        item.rename(u'second')
+        item.create_revision(1)
+        item.commit()
+        revs_in_create_order = [rev for rev in self.backend.history(reverse=False)]
+        assert revs_in_create_order[0].revno == 0
+        assert revs_in_create_order[0].item.name == u'second'
+        assert revs_in_create_order[1].revno == 1
+        assert revs_in_create_order[1].item.name == u'second'
+
--- a/MoinMoin/storage/backends/__init__.py	Thu Aug 18 12:25:19 2011 +0530
+++ b/MoinMoin/storage/backends/__init__.py	Thu Aug 18 13:02:23 2011 +0530
@@ -11,6 +11,7 @@
 """
 
 
+from flask import current_app as app
 from flask import g as flaskg
 
 from MoinMoin.storage.serialization import unserialize
@@ -61,43 +62,37 @@
             hierarchic=False,
         )
 
-    def _create_backends(BackendClass, backend_uri, index_uri):
+    def _create_backends(BackendClass, backend_uri):
         backends = []
         for name in [CONTENT, USERPROFILES, TRASH, ]:
             parms = dict(nsname=name)
             backend = BackendClass(backend_uri % parms)
             backends.append(backend)
-        router_index_uri = index_uri % dict(nsname='ROUTER')
-        return backends + [router_index_uri]
+        return backends
 
     if backend_uri.startswith(FS_PREFIX):
         instance_uri = backend_uri[len(FS_PREFIX):]
-        index_uri = 'sqlite:///%s_index.sqlite' % instance_uri
-        content, userprofile, trash, router_index_uri = _create_backends(fs.FSBackend, instance_uri, index_uri)
+        content, userprofile, trash = _create_backends(fs.FSBackend, instance_uri)
 
     elif backend_uri.startswith(FS2_PREFIX):
         instance_uri = backend_uri[len(FS2_PREFIX):]
-        index_uri = 'sqlite:///%s_index.sqlite' % instance_uri
-        content, userprofile, trash, router_index_uri = _create_backends(fs2.FS2Backend, instance_uri, index_uri)
+        content, userprofile, trash = _create_backends(fs2.FS2Backend, instance_uri)
 
     elif backend_uri.startswith(HG_PREFIX):
         # Due to external dependency that may not always be present, import hg backend here:
         from MoinMoin.storage.backends import hg
         instance_uri = backend_uri[len(HG_PREFIX):]
-        index_uri = 'sqlite:///%s_index.sqlite' % instance_uri
-        content, userprofile, trash, router_index_uri = _create_backends(hg.MercurialBackend, instance_uri, index_uri)
+        content, userprofile, trash = _create_backends(hg.MercurialBackend, instance_uri)
 
     elif backend_uri.startswith(SQLA_PREFIX):
         # XXX Move this import to the module level if we depend on sqlalchemy and it is in sys.path
         from MoinMoin.storage.backends import sqla
         instance_uri = backend_uri[len(SQLA_PREFIX):]
-        index_uri = '%s_index' % instance_uri
-        content, userprofile, trash, router_index_uri = _create_backends(sqla.SQLAlchemyBackend, instance_uri, index_uri)
+        content, userprofile, trash = _create_backends(sqla.SQLAlchemyBackend, instance_uri)
 
     elif backend_uri == MEMORY_PREFIX:
         instance_uri = ''
-        index_uri = 'sqlite://' # default is memory
-        content, userprofile, trash, router_index_uri = _create_backends(memory.MemoryBackend, instance_uri, index_uri)
+        content, userprofile, trash = _create_backends(memory.MemoryBackend, instance_uri)
 
     elif backend_uri.startswith(FS19_PREFIX):
         # special case: old moin19 stuff
@@ -121,15 +116,14 @@
                     (ns_content, content, content_acl),
     ]
 
-    return namespace_mapping, router_index_uri
+    return namespace_mapping
 
 
 def upgrade_sysitems(xmlfile):
     """
     Upgrade the wiki's system pages from an XML file.
     """
-    tmp_backend = router.RouterBackend([('/', memory.MemoryBackend())],
-                                       index_uri='sqlite://')
+    tmp_backend = router.RouterBackend([('/', memory.MemoryBackend())], cfg=app.cfg)
     unserialize(tmp_backend, xmlfile)
 
     # clone to real backend from config WITHOUT checking ACLs!
--- a/MoinMoin/storage/backends/acl.py	Thu Aug 18 12:25:19 2011 +0530
+++ b/MoinMoin/storage/backends/acl.py	Thu Aug 18 13:02:23 2011 +0530
@@ -146,20 +146,6 @@
 
     iteritems = iter_items_noindex
 
-    def history(self, reverse=True):
-        """
-        @see: Backend.history.__doc__
-        """
-        for revision in self.backend.history(reverse):
-            if self._may(revision.item.name, READ):
-                # The revisions returned here should only be StoredRevisions.
-                # We wrap them nevertheless to be sure. Esp. revision.item
-                # would otherwise give access to an unwrapped item.
-                item = revision.item
-                item = AclWrapperItem(item, self)
-                revision = AclWrapperRevision(revision, item)
-                yield revision
-
     def _get_acl(self, itemname):
         """
         Get ACL strings from the last revision's metadata and return ACL object.
--- a/MoinMoin/storage/backends/flatfile.py	Thu Aug 18 12:25:19 2011 +0530
+++ b/MoinMoin/storage/backends/flatfile.py	Thu Aug 18 13:02:23 2011 +0530
@@ -63,13 +63,6 @@
         revpath = self._rev_path(name)
         return os.path.exists(revpath)
 
-    def history(self, reverse=True):
-        rev_list = [i.get_revision(-1) for i in self.iteritems()]
-        rev_list.sort(lambda x, y: cmp(x.timestamp, y.timestamp))
-        if reverse:
-            rev_list.reverse()
-        return iter(rev_list)
-
     def get_item(self, itemname):
         if not self._exists(itemname):
             raise NoSuchItemError("No such item, %r" % (itemname))
--- a/MoinMoin/storage/backends/hg.py	Thu Aug 18 12:25:19 2011 +0530
+++ b/MoinMoin/storage/backends/hg.py	Thu Aug 18 13:02:23 2011 +0530
@@ -156,39 +156,6 @@
 
     iteritems = iter_items_noindex
 
-    def history(self, reverse=True):
-        """
-        Return generator for iterating in given direction over Item Revisions
-        with timestamp order preserved.
-        Yields MercurialStoredRevision objects.
-        """
-        def restore_revision(name, id):
-            item = Item(self, name)
-            item._id = id
-            rev = MercurialStoredRevision(item, revno)
-            rev._item_id = item._id
-            return rev
-
-        # this is costly operation, but no better idea now how to do it and not
-        # break pull/merge stuff
-        renamed_items = {}
-        for ctx in self._iter_changelog(filter_meta='renamed_to'):
-            meta = self._decode_metadata(ctx.extra(), BACKEND_METADATA_PREFIX)
-            oldid, renamed_to = meta['renamed_id'], meta['renamed_to']
-            renamed_items.setdefault(oldid, []).append(renamed_to)
-
-        for ctx in self._iter_changelog(reverse=reverse):
-            meta = self._decode_metadata(ctx.extra(), BACKEND_METADATA_PREFIX)
-            revno, oldid, oldname = meta['rev'], meta['id'], meta['name']
-            try:
-                for (id, name) in renamed_items[oldid]:
-                    # consider you have backend merged from two instances,
-                    # where there was item A renamed to B in first, and the same A
-                    # renamed to C in second
-                    yield restore_revision(name, id)
-            except KeyError:
-                yield restore_revision(oldname, oldid)
-
     def _get_revision(self, item, revno):
         """
         Return given Revision of an Item. Raise NoSuchRevisionError
--- a/MoinMoin/storage/backends/indexing.py	Thu Aug 18 12:25:19 2011 +0530
+++ b/MoinMoin/storage/backends/indexing.py	Thu Aug 18 13:02:23 2011 +0530
@@ -1,4 +1,5 @@
-# Copyright: 2010 MoinMoin:ThomasWaldmann
+# Copyright: 2010-2011 MoinMoin:ThomasWaldmann
+# Copyright: 2011 MoinMoin:MichaelMayorov
 # License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
 
 """
@@ -8,11 +9,7 @@
     Item, Revision classes to support flexible metadata indexing and querying
     for wiki items / revisions
 
-    Wiki items are identified by a UUID (in the index, it is internally mapped
-    to an integer for more efficient processing).
-    Revisions of an item are identified by a integer revision number (and the
-    parent item).
-
+    Wiki items and revisions of same item are identified by same UUID.
     The wiki item name is contained in the item revision's metadata.
     If you rename an item, this is done by creating a new revision with a different
     (new) name in its revision metadata.
@@ -25,22 +22,23 @@
 from uuid import uuid4
 make_uuid = lambda: unicode(uuid4().hex)
 
-from MoinMoin import log
-logging = log.getLogger(__name__)
-
 from MoinMoin.storage.error import NoSuchItemError, NoSuchRevisionError, \
                                    AccessDeniedError
 from MoinMoin.config import ACL, CONTENTTYPE, UUID, NAME, NAME_OLD, MTIME, TAGS
+from MoinMoin.search.indexing import backend_to_index
+from MoinMoin.converter import convert_to_indexable
 
+from MoinMoin import log
+logging = log.getLogger(__name__)
 
 class IndexingBackendMixin(object):
     """
-    Backend indexing support
+    Backend indexing support / functionality using the index.
     """
     def __init__(self, *args, **kw):
-        index_uri = kw.pop('index_uri', None)
+        cfg = kw.pop('cfg')
         super(IndexingBackendMixin, self).__init__(*args, **kw)
-        self._index = ItemIndex(index_uri)
+        self._index = ItemIndex(cfg)
 
     def close(self):
         self._index.close()
@@ -59,9 +57,6 @@
         item.publish_metadata()
         return item
 
-    def index_rebuild(self):
-        return self._index.index_rebuild(self)
-
     def history(self, reverse=True, item_name=u'', start=None, end=None):
         """
         History implementation using the index.
@@ -77,8 +72,9 @@
             # it can't find it because it was already renamed to "second."
             # Some suggested solutions are: using some neverchanging uuid to identify some specific item
             # or continuing to use the name, but tracking name changes within the item's history.
-            rev_datetime, name, rev_no, rev_metas = result
+            rev_datetime, name, rev_no = result
             try:
+                logging.debug("HISTORY: name %s revno %s" % (name, rev_no))
                 item = self.get_item(name)
                 yield item.get_revision(rev_no)
             except AccessDeniedError as e:
@@ -89,8 +85,7 @@
 
     def all_tags(self):
         """
-        Return a unsorted list of tuples (count, tag, tagged_itemnames) for all
-        tags.
+        Return a unsorted list of tuples (count, tag, tagged_itemnames) for all tags.
         """
         return self._index.all_tags()
 
@@ -104,8 +99,6 @@
 class IndexingItemMixin(object):
     """
     Item indexing support
-
-    When a commit happens, index stuff.
     """
     def __init__(self, backend, *args, **kw):
         super(IndexingItemMixin, self).__init__(backend, *args, **kw)
@@ -180,7 +173,7 @@
         if UUID not in self:
             self[UUID] = uuid # do we want the item's uuid in the rev's metadata?
         if CONTENTTYPE not in self:
-            self[CONTENTTYPE] = 'application/octet-stream'
+            self[CONTENTTYPE] = u'application/octet-stream'
         metas = self
         logging.debug("item %r revno %d update index:" % (name, revno))
         for k, v in metas.items():
@@ -196,239 +189,137 @@
         revno = self.revno
         metas = self
         logging.debug("item %r revno %d remove index!" % (name, revno))
-        self._index.remove_rev(uuid, revno)
+        self._index.remove_rev(metas[UUID], revno)
 
     # TODO maybe use this class later for data indexing also,
     # TODO by intercepting write() to index data written to a revision
 
-from MoinMoin.util.kvstore import KVStoreMeta, KVStore
-
-from sqlalchemy import Table, Column, Integer, String, Unicode, DateTime, PickleType, MetaData, ForeignKey
-from sqlalchemy import create_engine, select
-from sqlalchemy.sql import and_, exists, asc, desc
+from whoosh.writing import AsyncWriter
+from MoinMoin.search.indexing import WhooshIndex
 
 class ItemIndex(object):
     """
     Index for Items/Revisions
     """
-    def __init__(self, index_uri):
-        metadata = MetaData()
-        metadata.bind = create_engine(index_uri, echo=False)
-
-        # for sqlite, lengths are not needed, but for other SQL DBs:
-        UUID_LEN = 32
-        VALUE_LEN = KVStoreMeta.VALUE_LEN # we duplicate values from there to our table
-
-        # items have a persistent uuid
-        self.item_table = Table('item_table', metadata,
-            Column('id', Integer, primary_key=True), # item's internal uuid
-            # reference to current revision:
-            Column('current', ForeignKey('rev_table.id', name="current", use_alter=True), type_=Integer),
-            # some important stuff duplicated here for easy availability:
-            # from item metadata:
-            Column('uuid', String(UUID_LEN), index=True, unique=True), # item's official persistent uuid
-            # from current revision's metadata:
-            Column('name', Unicode(VALUE_LEN), index=True, unique=True),
-            Column('contenttype', Unicode(VALUE_LEN), index=True),
-            Column('acl', Unicode(VALUE_LEN)),
-            Column('tags', Unicode(VALUE_LEN)),
-        )
-
-        # revisions have a revno and a parent item
-        self.rev_table = Table('rev_table', metadata,
-            Column('id', Integer, primary_key=True),
-            Column('item_id', ForeignKey('item_table.id')),
-            Column('revno', Integer),
-            # some important stuff duplicated here for easy availability:
-            Column('datetime', DateTime, index=True),
-        )
-
-        item_kvmeta = KVStoreMeta('item', metadata, Integer)
-        rev_kvmeta = KVStoreMeta('rev', metadata, Integer)
-        metadata.create_all()
-        self.metadata = metadata
-        self.item_kvstore = KVStore(item_kvmeta)
-        self.rev_kvstore = KVStore(rev_kvmeta)
+    def __init__(self, cfg):
+        self.wikiname = cfg.interwikiname or u''
+        self.index_object = WhooshIndex(cfg=cfg)
 
     def close(self):
-        engine = self.metadata.bind
-        engine.dispose()
-
-    def index_rebuild(self, backend):
-        self.metadata.drop_all()
-        self.metadata.create_all()
-        for item in backend.iter_items_noindex():
-            item.update_index()
-            for revno in item.list_revisions():
-                rev = item.get_revision(revno)
-                logging.debug("rebuild %s %d" % (rev[NAME], revno))
-                rev.update_index()
-
-    def get_item_id(self, uuid):
-        """
-        return the internal item id for some item with uuid or
-        None, if not found.
-        """
-        item_table = self.item_table
-        result = select([item_table.c.id],
-                        item_table.c.uuid == uuid
-                       ).execute().fetchone()
-        if result:
-            return result[0]
+        self.index_object.all_revisions_index.close()
+        self.index_object.latest_revisions_index.close()
 
     def update_item(self, metas):
         """
-        update an item with item-level metadata <metas>
-
-        note: if item does not exist already, it is added
+        update item (not revision!) metadata
         """
-        name = metas.get(NAME, '') # item name (if revisioned: same as current revision's name)
-        uuid = metas.get(UUID, '') # item uuid (never changes)
-        item_table = self.item_table
-        item_id = self.get_item_id(uuid)
-        if item_id is None:
-            res = item_table.insert().values(uuid=uuid, name=name).execute()
-            item_id = res.inserted_primary_key[0]
-        self.item_kvstore.store_kv(item_id, metas)
-        return item_id
-
-    def cache_in_item(self, item_id, rev_id, rev_metas):
-        """
-        cache some important values from current revision into item for easy availability
-        """
-        item_table = self.item_table
-        item_table.update().where(item_table.c.id == item_id).values(
-            current=rev_id,
-            name=rev_metas[NAME],
-            contenttype=rev_metas[CONTENTTYPE],
-            acl=rev_metas.get(ACL, ''),
-            tags=u'|' + u'|'.join(rev_metas.get(TAGS, [])) + u'|',
-        ).execute()
+        # XXX we do not have an index for item metadata yet!
 
     def remove_item(self, metas):
         """
-        remove an item
-
-        note: does not remove revisions, these should be removed first
+        remove all data related to this item and all its revisions from the index
         """
-        item_table = self.item_table
-        name = metas.get(NAME, '') # item name (if revisioned: same as current revision's name)
-        uuid = metas.get(UUID, '') # item uuid (never changes)
-        item_id = self.get_item_id(uuid)
-        if item_id is not None:
-            self.item_kvstore.store_kv(item_id, {})
-            item_table.delete().where(item_table.c.id == item_id).execute()
+        with self.index_object.latest_revisions_index.searcher() as latest_revs_searcher:
+            doc_number = latest_revs_searcher.document_number(uuid=metas[UUID],
+                                                              name_exact=metas[NAME],
+                                                              wikiname=self.wikiname
+                                                             )
+        if doc_number is not None:
+            with AsyncWriter(self.index_object.latest_revisions_index) as async_writer:
+                async_writer.delete_document(doc_number)
 
-    def add_rev(self, uuid, revno, metas):
+        with self.index_object.all_revisions_index.searcher() as all_revs_searcher:
+            doc_numbers = list(all_revs_searcher.document_numbers(uuid=metas[UUID],
+                                                                  name_exact=metas[NAME],
+                                                                  wikiname=self.wikiname
+                                                                 ))
+        if doc_numbers:
+            with AsyncWriter(self.index_object.all_revisions_index) as async_writer:
+                for doc_number in doc_numbers:
+                    async_writer.delete_document(doc_number)
+
+    def add_rev(self, uuid, revno, rev):
         """
         add a new revision <revno> for item <uuid> with metadata <metas>
-
-        currently assumes that added revision will be latest/current revision (not older/non-current)
         """
-        rev_table = self.rev_table
-        item_metas = dict(uuid=uuid, name=metas[NAME])
-        item_id = self.update_item(item_metas)
-
-        # get (or create) the revision entry
-        result = select([rev_table.c.id],
-                        and_(rev_table.c.revno == revno,
-                             rev_table.c.item_id == item_id)
-                       ).execute().fetchone()
-        if result:
-            rev_id = result[0]
-        else:
-            dt = datetime.datetime.utcfromtimestamp(metas[MTIME])
-            res = rev_table.insert().values(revno=revno, item_id=item_id, datetime=dt).execute()
-            rev_id = res.inserted_primary_key[0]
-
-        self.rev_kvstore.store_kv(rev_id, metas)
-
-        self.cache_in_item(item_id, rev_id, metas)
-        return rev_id
+        with self.index_object.all_revisions_index.searcher() as all_revs_searcher:
+            all_found_document = all_revs_searcher.document(uuid=rev[UUID],
+                                                            rev_no=revno,
+                                                            wikiname=self.wikiname
+                                                           )
+        with self.index_object.latest_revisions_index.searcher() as latest_revs_searcher:
+            latest_found_document = latest_revs_searcher.document(uuid=rev[UUID],
+                                                                  wikiname=self.wikiname
+                                                                 )
+        logging.debug("Processing: name %s revno %s" % (rev[NAME], revno))
+        rev.seek(0) # for a new revision, file pointer points to EOF, rewind first
+        rev_content = convert_to_indexable(rev)
+        logging.debug("Indexable content: %r" % (rev_content[:250], ))
+        if not all_found_document:
+            schema = self.index_object.all_revisions_index.schema
+            with AsyncWriter(self.index_object.all_revisions_index) as async_writer:
+                converted_rev = backend_to_index(rev, revno, schema, rev_content, self.wikiname)
+                logging.debug("All revisions: adding %s %s", converted_rev[NAME], converted_rev["rev_no"])
+                async_writer.add_document(**converted_rev)
+        if not latest_found_document or int(revno) > latest_found_document["rev_no"]:
+            schema = self.index_object.latest_revisions_index.schema
+            with AsyncWriter(self.index_object.latest_revisions_index) as async_writer:
+                converted_rev = backend_to_index(rev, revno, schema, rev_content, self.wikiname)
+                logging.debug("Latest revisions: updating %s %s", converted_rev[NAME], converted_rev["rev_no"])
+                async_writer.update_document(**converted_rev)
 
     def remove_rev(self, uuid, revno):
         """
         remove a revision <revno> of item <uuid>
-
-        Note:
-
-        * does not update metadata values cached in item (this is only a
-          problem if you delete latest revision AND you don't delete the
-          whole item anyway)
         """
-        item_id = self.get_item_id(uuid)
-        assert item_id is not None
-
-        # get the revision entry
-        rev_table = self.rev_table
-        result = select([rev_table.c.id],
-                        and_(rev_table.c.revno == revno,
-                             rev_table.c.item_id == item_id)
-                       ).execute().fetchone()
-        if result:
-            rev_id = result[0]
-            self.rev_kvstore.store_kv(rev_id, {})
-            rev_table.delete().where(rev_table.c.id == rev_id).execute()
-
-    def get_uuid_revno_name(self, rev_id):
-        """
-        get item uuid and revision number by rev_id
-        """
-        item_table = self.item_table
-        rev_table = self.rev_table
-        result = select([item_table.c.uuid, rev_table.c.revno, item_table.c.name],
-                        and_(rev_table.c.id == rev_id,
-                             item_table.c.id == rev_table.c.item_id)
-                       ).execute().fetchone()
-        return result
+        with self.index_object.latest_revisions_index.searcher() as latest_revs_searcher:
+            latest_doc_number = latest_revs_searcher.document_number(uuid=uuid,
+                                                                     rev_no=revno,
+                                                                     wikiname=self.wikiname
+                                                                    )
+        with self.index_object.all_revisions_index.searcher() as all_revs_searcher:
+            doc_number = all_revs_searcher.document_number(uuid=uuid,
+                                                           rev_no=revno,
+                                                           wikiname=self.wikiname
+                                                          )
+        if doc_number is not None:
+            with AsyncWriter(self.index_object.all_revisions_index) as async_writer:
+                logging.debug("All revisions: removing %d", doc_number)
+                async_writer.delete_document(doc_number)
+        if latest_doc_number is not None:
+            with AsyncWriter(self.index_object.latest_revisions_index) as async_writer:
+                logging.debug("Latest revisions: removing %d", latest_doc_number)
+                async_writer.delete_document(latest_doc_number)
 
     def history(self, mountpoint=u'', item_name=u'', reverse=True, start=None, end=None):
-        """
-        Yield ready-to-use history raw data for this backend.
-        """
         if mountpoint:
             mountpoint += '/'
-
-        item_table = self.item_table
-        rev_table = self.rev_table
-
-        selection = [rev_table.c.datetime, item_table.c.name, rev_table.c.revno, rev_table.c.id, ]
-
-        if reverse:
-            order_attr = desc(rev_table.c.datetime)
-        else:
-            order_attr = asc(rev_table.c.datetime)
-
-        if not item_name:
-            # empty item_name = all items
-            condition = item_table.c.id == rev_table.c.item_id
-        else:
-            condition = and_(item_table.c.id == rev_table.c.item_id,
-                             item_table.c.name == item_name)
-
-        query = select(selection, condition).order_by(order_attr)
-        if start is not None:
-            query = query.offset(start)
-            if end is not None:
-                query = query.limit(end-start)
-
-        for rev_datetime, name, revno, rev_id in query.execute().fetchall():
-            rev_metas = self.rev_kvstore.retrieve_kv(rev_id)
-            yield (rev_datetime, mountpoint + name, revno, rev_metas)
+        with self.index_object.all_revisions_index.searcher() as all_revs_searcher:
+            if item_name:
+                docs = all_revs_searcher.documents(name_exact=item_name,
+                                                   wikiname=self.wikiname
+                                                  )
+            else:
+                docs = all_revs_searcher.documents(wikiname=self.wikiname)
+            from operator import itemgetter
+            # sort by mtime and rev_no do deal better with mtime granularity for fast item rev updates
+            for doc in sorted(docs, key=itemgetter("mtime", "rev_no"), reverse=reverse)[start:end]:
+                yield (doc[MTIME], mountpoint + doc[NAME], doc["rev_no"])
 
     def all_tags(self):
-        item_table = self.item_table
-        result = select([item_table.c.name, item_table.c.tags],
-                        item_table.c.tags != u'||').execute().fetchall()
-        tags_names = {}
-        for name, tags in result:
-            for tag in tags.split(u'|')[1:-1]:
-                tags_names.setdefault(tag, []).append(name)
-        counts_tags_names = [(len(names), tag, names) for tag, names in tags_names.items()]
-        return counts_tags_names
+        with self.index_object.latest_revisions_index.searcher() as latest_revs_searcher:
+            docs = latest_revs_searcher.documents(wikiname=self.wikiname)
+            tags_names = {}
+            for doc in docs:
+                tags = doc.get(TAGS, [])
+                logging.debug("name %s rev %s tags %s" % (doc[NAME], doc["rev_no"], tags))
+                for tag in tags:
+                    tags_names.setdefault(tag, []).append(doc[NAME])
+            counts_tags_names = [(len(names), tag, names) for tag, names in tags_names.items()]
+            return counts_tags_names
 
     def tagged_items(self, tag):
-        item_table = self.item_table
-        result = select([item_table.c.name],
-                        item_table.c.tags.like('%%|%s|%%' % tag)).execute().fetchall()
-        return [row[0] for row in result]
+        with self.index_object.latest_revisions_index.searcher() as latest_revs_searcher:
+            docs = latest_revs_searcher.documents(tags=tag, wikiname=self.wikiname)
+            return [doc[NAME] for doc in docs]
+
--- a/MoinMoin/storage/backends/memory.py	Thu Aug 18 12:25:19 2011 +0530
+++ b/MoinMoin/storage/backends/memory.py	Thu Aug 18 13:02:23 2011 +0530
@@ -66,16 +66,6 @@
         self._item_metadata = {}            # {id : {metadata}}
         self._item_revisions = {}           # {id : {revision_id : (revision_data, {revision_metadata})}}
         self._item_metadata_lock = {}       # {id : Lockobject}
-        self._revision_history = []
-
-    def history(self, reverse=True):
-        """
-        @see: Backend.history.__doc__
-        """
-        if reverse:
-            return iter(self._revision_history[::-1])
-        else:
-            return iter(self._revision_history)
 
     def get_item(self, itemname):
         """
@@ -139,11 +129,6 @@
             except KeyError:
                 pass
 
-        # Create a new revision_history list first and then swap that atomically with
-        # the old one (that still contains the item's revs).
-        rev_hist = [rev for rev in self._revision_history if rev.item.name != item.name]
-        self._revision_history = rev_hist
-
     def iter_items_noindex(self):
         """
         @see: Backend.iter_items_noindex.__doc__
@@ -216,10 +201,6 @@
             # The revision has already been destroyed by someone else. No need to make our hands dirty.
             return
 
-        # Remove the rev from history
-        rev_history = [rev for rev in self._revision_history if (rev.item.name != revision.item.name or rev.revno != revision.revno)]
-        self._revision_history = rev_history
-
     def _rename_item(self, item, newname):
         """
         @see: Backend._rename_item.__doc__
@@ -273,8 +254,6 @@
         if revision._metadata is None:
             revision._metadata = {}
         self._item_revisions[item._item_id][revision.revno] = (revision._data.getvalue(), revision._metadata.copy())
-        revision = item.get_revision(revision.revno)
-        self._revision_history.append(revision)
 
     def _rollback_item(self, rev):
         """
--- a/MoinMoin/storage/backends/sqla.py	Thu Aug 18 12:25:19 2011 +0530
+++ b/MoinMoin/storage/backends/sqla.py	Thu Aug 18 13:02:23 2011 +0530
@@ -177,20 +177,6 @@
         item = SQLAItem(self, itemname)
         return item
 
-    def history(self, reverse=True):
-        """
-        @see: Backend.history.__doc__
-        """
-        session = self.Session()
-        col = SQLARevision.id
-        if reverse:
-            col = col.desc()
-        for rev in session.query(SQLARevision).order_by(col).yield_per(1):
-            # yield_per(1) says: Don't load them into memory all at once.
-            rev.setup(self)
-            yield rev
-        session.close()
-
     def iter_items_noindex(self):
         """
         Returns an iterator over all items available in this backend.
--- a/MoinMoin/templates/base.html	Thu Aug 18 12:25:19 2011 +0530
+++ b/MoinMoin/templates/base.html	Thu Aug 18 13:02:23 2011 +0530
@@ -50,15 +50,6 @@
     <script src="{{ url_for('serve.files', name='jquery', filename='jquery.min.js') }}"></script>
     <script src="{{ url_for('serve.files', name='svgweb', filename='svg.js') }}"></script>
     <script src="{{ url_for('static', filename='js/common.js') }}"></script>
-    <script type="text/x-mathjax-config">
-  MathJax.Hub.Config({
-    extensions: ["tex2jax.js"],
-    jax: ["input/TeX","output/HTML-CSS"],
-    tex2jax: {inlineMath: [["$","$"],["\\(","\\)"]]}
-  });
-</script>
-<script src="{{ url_for('serve.files', name='mathjax', filename='MathJax.js') }}"> </script>
- 
     {{ scripts }}
     <!--[if lt IE 9]>
         {# TODO: use a local copy later #}
--- a/MoinMoin/templates/forms.html	Thu Aug 18 12:25:19 2011 +0530
+++ b/MoinMoin/templates/forms.html	Thu Aug 18 13:02:23 2011 +0530
@@ -54,3 +54,9 @@
     </dd>
     {% endif %}
 {% endmacro %}
+
+{% macro render_field_without_markup(gen, field, field_type) %}
+    {{ gen.input(field, type=field_type) }}
+    {{ gen.label(field) }}
+    {{ render_errors(field) }}
+{% endmacro %}
--- a/MoinMoin/templates/layout.html	Thu Aug 18 12:25:19 2011 +0530
+++ b/MoinMoin/templates/layout.html	Thu Aug 18 13:02:23 2011 +0530
@@ -23,6 +23,7 @@
         <div>
             {{ gen.input(search_form['q'], type='search', id='moin-search-query', size='30') }}
             {{ gen.input(search_form['submit'], type='submit') }}
+            {{ gen.input(search_form['pagelen'], type='hidden', value='25') }}
             {{ forms.render_errors(search_form) }}
         </div>
     {{ gen.form.close() }}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/templates/search_results.html	Thu Aug 18 13:02:23 2011 +0530
@@ -0,0 +1,61 @@
+{% extends theme("layout.html") %}
+{% import "utils.html" as utils %}
+{% block content %}
+    {% if results %}
+    <p class="searchstats">
+        {{ _("Result: Page %(start_page)d of %(end_page)d.
+              Showing results %(start_result)d - %(end_result)d of %(result_len)d (%(runtime).3f secs)",
+              start_page=results.pagenum, end_page=results.pagecount,
+              start_result=results.offset + 1, end_result=results.offset + results.pagelen,
+              result_len=results|length, runtime=results.results.runtime
+            )
+        }}
+    {% endif %}
+    {{ gen.form.open(medium_search_form, id='moin-long-searchform', method='get', action=url_for('frontend.show_item', item_name=item_name)) }}
+        <div>
+            {{ gen.input(medium_search_form['q'], type='search', id='moin-search-query') }}
+            {{ gen.input(medium_search_form['submit'], type='submit') }}
+            {{ gen.input(medium_search_form['pagelen'], type='hidden', value='25') }}
+            {{ forms.render_field_without_markup(gen, medium_search_form['search_in_all'], 'checkbox') }}
+            {{ forms.render_errors(medium_search_form) }}
+        </div>
+    {{ gen.form.close() }}
+    </p>
+        {% if results %}
+        <div class="searchresults">
+        <table>
+            {% for result in results %}
+                {% if result['wikiname'] == cfg.interwikiname %}
+                    <tr>
+                        <td class="moin-wordbreak">{{ result.pos + 1 }}
+                        <a href="{{ url_for_item(item_name=result['name'], wiki_name='Self', rev=result['rev_no']) }}"><b>{{ result['name'] }}</b></a>
+                        </td>
+                    </tr>
+                    <tr>
+                        <td>
+                            <p class="info searchhitinfobar">{{ _("Revision: %(rev_no)d Last Change: %(mtime)s", rev_no=result['rev_no'], mtime=result['mtime']|datetimeformat) }}</p>
+                        </td>
+                    </tr>
+                    <tr>
+                        <td>
+                            {% if user.may.read(result['name']) %}
+                                <p class="info foundtext">{{ result.highlights('content')|safe }}</p>
+                            {% else %}
+                                <p class="info foundtext">{{ _("You don't have read permission for this item.") }}</p>
+                            {% endif %}
+                        </td>
+                    </tr>
+                {% else %}
+                    <tr>
+                        <td class="moin-wordbreak">{{ result.pos + 1 }}
+                        <a class="moin-interwiki" href="{{ url_for_item(item_name=result['name'], wiki_name=result['wikiname'], rev=result['rev_no']) }}"><b>{{ "%s:%s" % (result['wikiname'], result['name']) }}</b></a>
+                        </td>
+                    </tr>
+                {% endif %}
+            {% endfor %}
+        </table>
+        </div>
+    {% else %}
+        <h1>{{ _("No results for '%(query)s'", query=query) }}</h1>
+    {% endif %}
+{% endblock %}
--- a/MoinMoin/themes/modernized/static/css/common.css	Thu Aug 18 12:25:19 2011 +0530
+++ b/MoinMoin/themes/modernized/static/css/common.css	Thu Aug 18 13:02:23 2011 +0530
@@ -175,9 +175,12 @@
 .searchresults dt { margin-top: 1em; font-weight: normal; }
 .searchresults dd,
 .searchresults p { font-size: 0.85em; }
+.searchresults td { border-width: 0; }
+.searchresults p.info { margin-left: 2%; }
 .searchresults .searchhitinfobar { color: #008000; margin-left: 15px; margin-top: 0; }
-p.searchstats { font-size: 0.8em; text-align: right; width: 100%; background-color: #E6EAF0;
-            border-top: 1px solid #9088DC; padding: 2px; }
+.searchresults .foundtext { margin-left: 15px; margin-top: 0; }
+p.searchstats { font-size: 0.8em; text-align: right; width: 100%;
+             padding: 2px; margin-top: -15px; }
 p.searchhint { background-color: #E6EAF0; border: 1px solid #9088DC; padding: 2px; }
 .searchpages { margin-left: auto; margin-right: auto; }
 .searchpages tr,
@@ -471,6 +474,11 @@
 #moin-searchform ul { border: 1px solid #A4B9DF; margin: 0; padding: 0; background-color: #F3F7FD; }
 #moin-searchform li { list-style:none; }
 
+/* moin-extended searchform */
+#moin-long-searchform { padding: 0; font-size: 0.82em; text-align: left; }
+#moin-long-searchform div { margin: 0; }
+#moin-search-query { width: 50%; }
+
 /* moin-header moin-logo -- logos may be text only */
 #moin-logo { float: left; margin: 5px 10px; padding: 0; font-size: 1.4em; line-height: 1em; font-weight: bold; }
 #moin-logo img { vertical-align: middle; }
--- a/MoinMoin/util/kvstore.py	Thu Aug 18 12:25:19 2011 +0530
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,289 +0,0 @@
-#!/usr/bin/env python
-# Copyright: 2010 by Thomas Waldmann
-# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
-
-"""
-kvstore - a flexible key/value store using sqlalchemy
-"""
-
-
-from UserDict import DictMixin
-
-from sqlalchemy import MetaData, Table, Column
-from sqlalchemy import Integer, String, Unicode, DateTime, PickleType, ForeignKey
-from sqlalchemy import select
-from sqlalchemy.sql import and_, exists
-
-
-class KVStoreMeta(object):
-    """
-    Key/Value Store sqlalchemy metadata - defining DB tables and columns.
-    """
-    # for sqlite, lengths are not needed, but for other SQL DBs
-    # for key_table we try to maybe have row length of 64 Bytes:
-    TYPE_LEN = 16 # max. length of type names
-    KEY_LEN = 64 - TYPE_LEN - 4 # max. length of key names
-    # for value_table, we try to maybe have a row length of 4096 Bytes:
-    VALUE_LEN = 4096-4 # max. length of values (used for str and unicode types)
-
-    PICKLE_VALUE_TYPE = 'pickle'
-
-    @classmethod
-    def create_fact_table(cls, name, metadata, ref_type):
-        """
-        create a fact table that associates some outside item identified by
-        ref_id (type ref_type) with a kvstore key/value pair identified byi
-        key_id/value_id
-        """
-        return Table(name + '_fact', metadata,
-            Column('ref_id', ref_type, primary_key=True), # ForeignKey into some table of user
-            Column('key_id', ForeignKey('%s_key.id' % name), primary_key=True),
-            Column('value_id', Integer, primary_key=True), # this is a ForeignKey into some value_table
-        )
-
-    @classmethod
-    def create_key_table(cls, name, metadata):
-        """
-        create a key table that stores key names and value types (each key
-        name is only paired with values of 1 specific type)
-        """
-        return Table(name + '_key', metadata,
-            Column('id', Integer, primary_key=True),
-            Column('name', Unicode(cls.KEY_LEN), index=True, unique=True),
-            Column('value_type', String(cls.TYPE_LEN)), # key into value_tables
-        )
-
-    @classmethod
-    def create_value_tables(cls, name, metadata):
-        """
-        create multiple value tables - each stores values of some specific type.
-        we use one table (not: one column) per type to save some space.
-        dict keys for value_tables are Python's __class__.__name__.
-        """
-        value_tables = {
-            'unicode': Table(name + '_value_unicode', metadata,
-                Column('id', Integer, primary_key=True),
-                Column('value', Unicode(cls.VALUE_LEN), index=True),
-            ),
-            'str': Table(name + '_value_str', metadata,
-                Column('id', Integer, primary_key=True),
-                Column('value', String(cls.VALUE_LEN), index=True),
-            ),
-            'int': Table(name + '_value_int', metadata,
-                Column('id', Integer, primary_key=True),
-                Column('value', Integer, index=True),
-            ),
-            'datetime': Table(name + '_value_datetime', metadata,
-                Column('id', Integer, primary_key=True),
-                Column('value', DateTime, index=True),
-            ),
-            cls.PICKLE_VALUE_TYPE: Table(name + '_value_' + cls.PICKLE_VALUE_TYPE, metadata,
-                Column('id', Integer, primary_key=True),
-                Column('value', PickleType),
-            ),
-        }
-        supported_value_types = [key for key in value_tables if key != cls.PICKLE_VALUE_TYPE]
-        return value_tables, supported_value_types
-
-    def __init__(self, name, metadata, ref_type):
-        """
-        Initialize the KV store metadata
-        """
-        self.name = name
-        self.fact_table = self.create_fact_table(name, metadata, ref_type)
-        self.key_table = self.create_key_table(name, metadata)
-        self.value_tables, self.supported_value_types = self.create_value_tables(name, metadata)
-
-
-class KVStore(object):
-    """
-    A flexible Key/Value store
-
-    It can store arbitrary key names (unicode), new key names are added
-    automatically.
-
-    Each key name is associated with 1 value type, which is defined when first
-    key/value pair is added to the store. See KVStoreMeta for details about
-    value types.
-
-    A key name is stored only once and referred to by its key_id primary key.
-    A value is stored only once and referred to by its value_id primary key.
-
-    When retrieving key/value pairs, you just give the same reference id
-    (ref_id) that you used for storing those key/value pairs that belong to
-    that reference id.
-    """
-    def __init__(self, meta):
-        # note: for ease of use, we use implicit execution. it requires that
-        # you have bound an engine to the metadata: metadata.bind = engine
-        self.fact_table = meta.fact_table
-        self.key_table = meta.key_table
-        self.value_tables = meta.value_tables
-        self.supported_value_types = meta.supported_value_types
-        self.PICKLE_VALUE_TYPE = meta.PICKLE_VALUE_TYPE
-
-    def _get_value_type(self, value):
-        """
-        get the type string we use for this value.
-
-        For directly supported value types, it is the python class name,
-        otherwise we use pickle.
-        """
-        value_type = value.__class__.__name__
-        if value_type not in self.supported_value_types:
-            value_type = self.PICKLE_VALUE_TYPE
-        return value_type
-
-    def _get_key_id(self, name, value):
-        """
-        get key_id for <name> (create new entry for <name> if there is none yet)
-        """
-        key_table = self.key_table
-        name = unicode(name)
-        value_type = self._get_value_type(value)
-        result = select([key_table.c.id, key_table.c.value_type],
-                        key_table.c.name == name
-                       ).execute().fetchone()
-        if result:
-            key_id, wanted_value_type = result
-            assert wanted_value_type == value_type, "wanted: %r have: %r name: %r value: %r" % (
-                   wanted_value_type, value_type, name, value)
-        else:
-            res = key_table.insert().values(name=name, value_type=value_type).execute()
-            key_id = res.inserted_primary_key[0]
-        return key_id
-
-    def _get_value_id(self, value):
-        """
-        get value_id for value (create new entry for <value> if there is none yet)
-        """
-        value_type = self._get_value_type(value)
-        value_table = self.value_tables[value_type]
-        result = select([value_table.c.id],
-                        value_table.c.value == value
-                       ).execute().fetchone()
-        if result:
-            value_id = result[0]
-        else:
-            res = value_table.insert().values(value=value).execute()
-            value_id = res.inserted_primary_key[0]
-        return value_id
-
-    def _associate(self, ref_id, key_id, value_id):
-        """
-        associate a k/v pair identified by (key_id, value_id) with some entity identified by ref_id
-        """
-        fact_table = self.fact_table
-        result = select(['*'],
-                        and_(fact_table.c.ref_id == ref_id,
-                             fact_table.c.key_id == key_id)
-                       ).execute().fetchone()
-        if result:
-            res = fact_table.update().where(
-                      and_(fact_table.c.ref_id == ref_id,
-                           fact_table.c.key_id == key_id)
-                  ).values(value_id=value_id).execute()
-        else:
-            res = fact_table.insert().values(ref_id=ref_id, key_id=key_id, value_id=value_id).execute()
-
-    def _unassociate_all(self, ref_id):
-        """
-        unassociate all k/v pairs that are associated with some entity identified by ref_id
-        """
-        fact_table = self.fact_table
-        fact_table.delete().where(fact_table.c.ref_id == ref_id).execute()
-
-    def store(self, ref_id, name, value):
-        """
-        store a pair name:value and associate it with ref_id
-        """
-        key_id = self._get_key_id(name, value)
-        value_id = self._get_value_id(value)
-        self._associate(ref_id, key_id, value_id)
-
-    def retrieve(self, ref_id, name):
-        """
-        retrieve a value of a name:value pair associated with ref_id
-        """
-        fact_table = self.fact_table
-        key_table = self.key_table
-        value_tables = self.value_tables
-        name = unicode(name)
-        value_type, value_id = select([key_table.c.value_type, fact_table.c.value_id],
-                                      and_(fact_table.c.ref_id == ref_id,
-                                           fact_table.c.key_id == key_table.c.id,
-                                           key_table.c.name == name)
-                                     ).execute().fetchone()
-        value_table = value_tables[value_type]
-        value = select([value_table.c.value],
-                       value_table.c.id == value_id
-                      ).execute().fetchone()[0]
-        return value
-
-    def store_kv(self, ref_id, kvs):
-        """
-        store k/v pairs from kvs dict and associate them with ref_id
-        """
-        self._unassociate_all(ref_id)
-        for k, v in kvs.items():
-            self.store(ref_id, k, v)
-
-    def retrieve_kv(self, ref_id):
-        """
-        get all k/v pairs associated with ref_id
-        """
-        fact_table = self.fact_table
-        key_table = self.key_table
-        value_tables = self.value_tables
-        results = select([key_table.c.name, key_table.c.value_type, fact_table.c.value_id],
-                         and_(fact_table.c.ref_id == ref_id,
-                              fact_table.c.key_id == key_table.c.id)
-                        ).execute().fetchall()
-        result_dict = {}
-        for name, value_type, value_id in results:
-            value_table = value_tables[value_type]
-            value = select([value_table.c.value],
-                           value_table.c.id == value_id
-                          ).execute().fetchone()[0]
-            result_dict[name] = value
-        return result_dict
-
-    def has_kv(self, ref_table, **kvs):
-        """
-        return a conditional that can be used to select ref_table entries that
-        have all given k/v pairs associated with them
-        """
-        fact_table = self.fact_table
-        key_table = self.key_table
-        value_tables = self.value_tables
-        terms = []
-        for name, value in kvs.items():
-            name = unicode(name)
-            value_type = self._get_value_type(value)
-            # XXX does the comparison below work for pickle?
-            value_table = value_tables[value_type]
-            terms.append(exists().where(and_(
-                  key_table.c.name == name,
-                  value_table.c.value == value,
-                  fact_table.c.key_id == key_table.c.id,
-                  fact_table.c.value_id == value_table.c.id,
-                  ref_table.c.id == fact_table.c.ref_id)))
-        return and_(*terms)
-
-
-class KVItem(object, DictMixin):
-    """
-    Provides dict-like access to key/values related to one item referenced
-    by ref_id that is stored in a KVStore.
-    """
-    def __init__(self, kvstore, ref_id):
-        self.kvstore = kvstore
-        self.ref_id = ref_id
-
-    def __getitem__(self, name):
-        return self.kvstore.retrieve(self.ref_id, name)
-
-    def __setitem__(self, name, value):
-        self.kvstore.store(self.ref_id, name, value)
-
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/docs/admin/index.rst	Thu Aug 18 13:02:23 2011 +0530
@@ -0,0 +1,129 @@
+====================
+Working with indexes
+====================
+Configuration
+=============
+For correct script working you need ``index_dir`` and ``index_dir_tmp`` in
+your wiki config. They have default values and most likely you don't want to change
+them.
+
+But if you want, try something like::
+
+      index_dir = "/path/to/moin-2.0/wiki/index/"
+      index_dir_tmp = "/path/to/moin-2.0/wiki/tmp_build/"
+
+**Note:** Paths MUST BE absolute.
+
+For using one index by multiple wikis (wiki farm) you must set up ``interwikiname``
+parameter in your wiki config:
+
+Example::
+
+        interwikiname = u'MyWiki'
+
+**Note:** For correct working interwikiname must be unique for each wiki.
+
+Offline index manipulation
+==========================
+The main goal of offline index manipulation is to let wiki admin build, update, clean,
+move and monitor state of indexes.
+
+MoinMoin uses 2 indexes: ``latest-revs`` (index stores only current revisions)
+and ``all-revs`` (index stores all revisions).
+
+**Note:** If you see <indexname> below, use one of ``latest-revs``, ``all-revs``
+or ``both`` 
+
+Let's see what you can do with that stuff.
+
+Build
+-----
+Just build fresh indexes using moin backend.
+
+Example::
+
+    moin index --for <indexname> --action build
+
+Indexes will be built under ``index_dir_tmp`` so index building happens without
+affecting the index your wiki engine uses currently.
+
+Update
+------
+Update indexes to reflect the current backend contents. Add new stuff, remove
+outdated stuff.
+
+Example::
+
+    moin index --for <indexname> --action update
+
+Move
+----
+Moving indexes from ``index_dir_tmp`` to ``index_dir``.
+
+Example::
+
+    moin index --for <indexname> --action move
+
+Clean
+-----
+Create empty index in ``index_dir`` for given index (previous will be erased).
+
+Example::
+
+    moin index --for <indexname> --action clean
+
+Show
+----
+Showing content of index files in human readable form.
+
+**Note:** field length limited to 40 chars.
+
+**Note:** fields without attribute ``stored=True`` are not displayed.
+
+Example::
+
+    moin index --for indexname --action show
+
+Building wiki farm
+==================
+Wiki farm allows admins create several wikis which share one index. So users
+will be able to search in one wiki and also see results from others.
+
+Before start you must prepair your wiki config.
+
+For example, you have 3 wikis: ``Advertising``, ``Sales``, ``Engineering``
+
+So, wiki configs will be looking like 
+
+wikiconfig.py for ``Advertising``::
+
+      index_dir = "/path/to/wiki/index/"
+      index_dir_tmp = "/path/to/wiki/tmp_build/"
+      interwikiname = u"Adverising"
+
+wikiconfig.py for ``Sales``::
+
+      index_dir = "/path/to/wiki/index/"
+      index_dir_tmp = "/path/to/wiki/tmp_build/"
+      interwikiname = u"Sales"
+
+wikiconfig.py for ``Engineering``::
+
+      index_dir = "/path/to/wiki/index/"
+      index_dir_tmp = "/path/to/wiki/tmp_build/"
+      interwikiname = u"Engineering"
+
+So, after you defined configs you may start building indexes.
+
+**Note:** Do not build indexes for two or more wikis in parallel, you'll damage
+it or get traceback.
+
+You must successively build index for each wiki in appropriate virtual env and then
+move indexes from ``index_dir_tmp`` to ``index_dir``::
+
+     moin index --for both --action build # in Advertising virtual env
+     moin index --for both --action build # in Sales virtual env
+     moin index --for both --action build # in Engineering virtual env
+     moin index --for both --action move # you can run it from any virtual env
+
+So, after that just run moin and try to search for something.
--- a/docs/admin/install.rst	Thu Aug 18 12:25:19 2011 +0530
+++ b/docs/admin/install.rst	Thu Aug 18 13:02:23 2011 +0530
@@ -7,10 +7,14 @@
 For moin2, there is currently no packaged download available, you have to get
 it from the repository:
 
-Alternative 1 (using Mercurial DVCS)::
+Alternative 1a (using Mercurial DVCS)::
 
  $ hg clone http://hg.moinmo.in/moin/2.0 moin-2.0
 
+Alternative 1b (using Mercurial DVCS)::
+
+ $ hg clone http://bitbucket.org/thomaswaldmann/moin-2.0 moin-2.0
+
 Alternative 2:
 Visit http://hg.moinmo.in/moin/2.0 with your web browser, download the tgz
 and unpack it.
@@ -23,9 +27,9 @@
 -----------------
 Please make sure you have `virtualenv` installed (it includes `pip`).
 
-If you just want to run moin in-place in your mercurial workdir, run this
-from your mercurial moin2 work dir (you should do this using your normal
-user login, no root or Administrator privileges needed)::
+If you just want to run moin in-place in your mercurial workdir, with your
+normal system Python, run this from your mercurial moin2 work dir (you should
+do this using your normal user login, no root or Administrator privileges needed)::
 
  ./quickinstall  # for linux (or other posix OSes)
  # or
@@ -51,6 +55,24 @@
 directory, it will run everything from your work dir, so you can modify code
 and directly try it out (you only need to do this installation procedure once).
 
+Using a different Python or a different virtualenv directory
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+See the `quickinstall` script and just modify these lines as needed before
+running it::
+
+    DIR=env
+    PYTHON=python
+
+E.g. if you want to use `pypy` and name the virtualenv directory `env-pypy`,
+use::
+
+    DIR=env-pypy
+    PYTHON=/opt/pypy/bin/pypy
+
+That way, you can have all sorts of Pythons in different virtualenv directories
+within your moin2 workdir.
+
 Installing PIL
 ~~~~~~~~~~~~~~
 For some image processing functions (like resizing, rotating) of moin, you
--- a/docs/index.rst	Thu Aug 18 12:25:19 2011 +0530
+++ b/docs/index.rst	Thu Aug 18 13:02:23 2011 +0530
@@ -35,6 +35,7 @@
    admin/changes
    admin/upgrade
    admin/backup
+   admin/index
    man/moin
 
 Getting Support for and Contributing to MoinMoin
--- a/setup.py	Thu Aug 18 12:25:19 2011 +0530
+++ b/setup.py	Thu Aug 18 13:02:23 2011 +0530
@@ -88,6 +88,7 @@
         'sqlalchemy>=0.7.1', # metadata index and other stuff
         'Werkzeug>=0.6.2', # wsgi toolkit
         'pytest', # pytest is needed by unit tests
+        'whoosh>=2.1.0', # needed for indexed search
         'sphinx', # needed to build the docs
         'pdfminer', # pdf -> text/plain conversion
         'XStatic>=0.0.2',
--- a/wikiconfig.py	Thu Aug 18 12:25:19 2011 +0530
+++ b/wikiconfig.py	Thu Aug 18 13:02:23 2011 +0530
@@ -12,7 +12,7 @@
     # vvv DON'T TOUCH THIS EXCEPT IF YOU KNOW WHAT YOU DO vvv
     # Directory containing THIS wikiconfig:
     wikiconfig_dir = os.path.abspath(os.path.dirname(__file__))
-
+    interwikiname = u'MoinMoin2'
     # We assume this structure for a simple "unpack and run" scenario:
     # wikiconfig.py
     # wiki/
@@ -23,7 +23,8 @@
     # If that's not true, feel free to adjust the pathes.
     instance_dir = os.path.join(wikiconfig_dir, 'wiki')
     data_dir = os.path.join(instance_dir, 'data') # Note: this used to have a trailing / in the past
-
+    index_dir = os.path.join(instance_dir, "index")
+    index_dir_tmp = os.path.join(instance_dir, "index_tmp")
     # This puts the contents from the specified xml file (a serialized backend) into your
     # backend(s). You can remove this after the first request to your wiki or
     # from the beginning if you don't want to use this feature at all.
@@ -34,7 +35,7 @@
     # 'fs:' indicates that you want to use the filesystem backend. You can also use
     # 'hg:' instead to indicate that you want to use the mercurial backend.
     # Alternatively you can set up the mapping yourself (see HelpOnStorageConfiguration).
-    namespace_mapping, router_index_uri = create_simple_mapping(
+    namespace_mapping = create_simple_mapping(
                             backend_uri='fs2:%s/%%(nsname)s' % data_dir,
                             # XXX we use rather relaxed ACLs for the development wiki:
                             content_acl=dict(before=u'',
@@ -64,7 +65,7 @@
     # we slowly migrate all stuff from above (old) method, to xstatic (new) method,
     # see https://bitbucket.org/thomaswaldmann/xstatic for details:
     from xstatic.main import XStatic
-    mod_names = ['jquery', 'jquery_file_upload', 'ckeditor', 'svgweb', 'svgedit_moin', 'twikidraw_moin', 'anywikidraw', 'mathjax', ]
+    mod_names = ['jquery', 'jquery_file_upload', 'ckeditor', 'svgweb', 'svgedit_moin', 'twikidraw_moin', 'anywikidraw', ]
     pkg = __import__('xstatic.pkg', fromlist=mod_names)
     for mod_name in mod_names:
         mod = getattr(pkg, mod_name)