changeset 4967:ffa42d34b210

Xapian2009: xappy library was added to the MoinMoin.support.
author Dmitrijs Milajevs <dimazest@gmail.com>
date Mon, 27 Jul 2009 17:35:13 +0200
parents a8a777074233
children b0afbf750a24
files MoinMoin/support/xappy/__init__.py MoinMoin/support/xappy/_checkxapian.py MoinMoin/support/xappy/datastructures.py MoinMoin/support/xappy/datastructures_doctest1.txt MoinMoin/support/xappy/errors.py MoinMoin/support/xappy/errors_doctest1.txt MoinMoin/support/xappy/fieldactions.py MoinMoin/support/xappy/fieldmappings.py MoinMoin/support/xappy/fieldmappings_doctest1.txt MoinMoin/support/xappy/highlight.py MoinMoin/support/xappy/highlight_doctest1.txt MoinMoin/support/xappy/indexerconnection.py MoinMoin/support/xappy/indexerconnection_doctest1.txt MoinMoin/support/xappy/indexerconnection_doctest2.txt MoinMoin/support/xappy/indexerconnection_doctest3.txt MoinMoin/support/xappy/marshall.py MoinMoin/support/xappy/marshall_doctest1.txt MoinMoin/support/xappy/marshall_doctest2.txt MoinMoin/support/xappy/memutils.py MoinMoin/support/xappy/parsedate.py MoinMoin/support/xappy/parsedate_doctest1.txt MoinMoin/support/xappy/replaylog.py MoinMoin/support/xappy/schema.py MoinMoin/support/xappy/searchconnection.py MoinMoin/support/xappy/searchconnection_doctest1.txt MoinMoin/support/xappy/searchconnection_doctest2.txt MoinMoin/support/xappy/searchconnection_doctest3.txt
diffstat 27 files changed, 6368 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/__init__.py	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2007 Lemur Consulting Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+"""Xappy.
+
+See the accompanying documentation for details.  In particular, there should be
+an accompanying file "introduction.html" (or "introduction.rst") which gives
+details of how to use the xappy package.
+
+"""
+__docformat__ = "restructuredtext en"
+
+__version__ = '0.5'
+
+import _checkxapian
+from datastructures import Field, UnprocessedDocument, ProcessedDocument
+from errors import *
+from fieldactions import FieldActions
+from indexerconnection import IndexerConnection
+from searchconnection import SearchConnection
+from replaylog import set_replay_path
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/_checkxapian.py	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,44 @@
+# Copyright (C) 2008 Lemur Consulting Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+r"""_checkxapian.py: Check the version of xapian used.
+
+Raises an ImportError on import if the version used is too old to be used at
+all.
+
+"""
+__docformat__ = "restructuredtext en"
+
+# The minimum version of xapian required to work at all.
+min_xapian_version = (1, 0, 6)
+
+# Dictionary of features we can't support do to them being missing from the
+# available version of xapian.
+missing_features = {}
+
+import xapian
+
+versions = xapian.major_version(), xapian.minor_version(), xapian.revision()
+
+
+if versions < min_xapian_version:
+    raise ImportError("""
+        Xapian Python bindings installed, but need at least version %d.%d.%d - got %s
+        """.strip() % tuple(list(min_xapian_version) + [xapian.version_string()]))
+
+if not hasattr(xapian, 'TermCountMatchSpy'):
+    missing_features['tags'] = 1
+if not hasattr(xapian, 'CategorySelectMatchSpy'):
+    missing_features['facets'] = 1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/datastructures.py	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,238 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2007 Lemur Consulting Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+r"""datastructures.py: Datastructures for search engine core.
+
+"""
+__docformat__ = "restructuredtext en"
+
+import errors
+from replaylog import log
+import xapian
+import cPickle
+
+class Field(object):
+    # Use __slots__ because we're going to have very many Field objects in
+    # typical usage.
+    __slots__ = 'name', 'value'
+
+    def __init__(self, name, value):
+        self.name = name
+        self.value = value
+
+    def __repr__(self):
+        return 'Field(%r, %r)' % (self.name, self.value)
+
+class UnprocessedDocument(object):
+    """A unprocessed document to be passed to the indexer.
+
+    This represents an item to be processed and stored in the search engine.
+    Each document will be processed by the indexer to generate a
+    ProcessedDocument, which can then be stored in the search engine index.
+
+    Note that some information in an UnprocessedDocument will not be
+    represented in the ProcessedDocument: therefore, it is not possible to
+    retrieve an UnprocessedDocument from the search engine index.
+
+    An unprocessed document is a simple container with two attributes:
+
+     - `fields` is a list of Field objects, or an iterator returning Field
+       objects.
+     - `id` is a string holding a unique identifier for the document (or
+       None to get the database to allocate a unique identifier automatically
+       when the document is added).
+
+    """
+
+    __slots__ = 'id', 'fields',
+    def __init__(self, id=None, fields=None):
+        self.id = id
+        if fields is None:
+            self.fields = []
+        else:
+            self.fields = fields
+
+    def __repr__(self):
+        return 'UnprocessedDocument(%r, %r)' % (self.id, self.fields)
+
+class ProcessedDocument(object):
+    """A processed document, as stored in the index.
+
+    This represents an item which is ready to be stored in the search engine,
+    or which has been returned by the search engine.
+
+    """
+
+    __slots__ = '_doc', '_fieldmappings', '_data',
+    def __init__(self, fieldmappings, xapdoc=None):
+        """Create a ProcessedDocument.
+
+        `fieldmappings` is the configuration from a database connection used lookup
+        the configuration to use to store each field.
+    
+        If supplied, `xapdoc` is a Xapian document to store in the processed
+        document.  Otherwise, a new Xapian document is created.
+
+        """
+        if xapdoc is None:
+            self._doc = log(xapian.Document)
+        else:
+            self._doc = xapdoc
+        self._fieldmappings = fieldmappings
+        self._data = None
+
+    def add_term(self, field, term, wdfinc=1, positions=None):
+        """Add a term to the document.
+
+        Terms are the main unit of information used for performing searches.
+
+        - `field` is the field to add the term to.
+        - `term` is the term to add.
+        - `wdfinc` is the value to increase the within-document-frequency
+          measure for the term by.
+        - `positions` is the positional information to add for the term.
+          This may be None to indicate that there is no positional information,
+          or may be an integer to specify one position, or may be a sequence of
+          integers to specify several positions.  (Note that the wdf is not
+          increased automatically for each position: if you add a term at 7
+          positions, and the wdfinc value is 2, the total wdf for the term will
+          only be increased by 2, not by 14.)
+
+        """
+        prefix = self._fieldmappings.get_prefix(field)
+        if len(term) > 0:
+            # We use the following check, rather than "isupper()" to ensure
+            # that we match the check performed by the queryparser, regardless
+            # of our locale.
+            if ord(term[0]) >= ord('A') and ord(term[0]) <= ord('Z'):
+                prefix = prefix + ':'
+
+        # Note - xapian currently restricts term lengths to about 248
+        # characters - except that zero bytes are encoded in two bytes, so
+        # in practice a term of length 125 characters could be too long.
+        # Xapian will give an error when commit() is called after such
+        # documents have been added to the database.
+        # As a simple workaround, we give an error here for terms over 220
+        # characters, which will catch most occurrences of the error early.
+        #
+        # In future, it might be good to change to a hashing scheme in this
+        # situation (or for terms over, say, 64 characters), where the
+        # characters after position 64 are hashed (we obviously need to do this
+        # hashing at search time, too).
+        if len(prefix + term) > 220:
+            raise errors.IndexerError("Field %r is too long: maximum length "
+                                       "220 - was %d (%r)" %
+                                       (field, len(prefix + term),
+                                        prefix + term))
+
+        if positions is None:
+            self._doc.add_term(prefix + term, wdfinc)
+        elif isinstance(positions, int):
+            self._doc.add_posting(prefix + term, positions, wdfinc)
+        else:
+            self._doc.add_term(prefix + term, wdfinc)
+            for pos in positions:
+                self._doc.add_posting(prefix + term, pos, 0)
+
+    def add_value(self, field, value, purpose=''):
+        """Add a value to the document.
+
+        Values are additional units of information used when performing
+        searches.  Note that values are _not_ intended to be used to store
+        information for display in the search results - use the document data
+        for that.  The intention is that as little information as possible is
+        stored in values, so that they can be accessed as quickly as possible
+        during the search operation.
+        
+        Unlike terms, each document may have at most one value in each field
+        (whereas there may be an arbitrary number of terms in a given field).
+        If an attempt to add multiple values to a single field is made, only
+        the last value added will be stored.
+
+        """
+        slot = self._fieldmappings.get_slot(field, purpose)
+        self._doc.add_value(slot, value)
+
+    def get_value(self, field, purpose=''):
+        """Get a value from the document.
+
+        """
+        slot = self._fieldmappings.get_slot(field, purpose)
+        return self._doc.get_value(slot)
+
+    def prepare(self):
+        """Prepare the document for adding to a xapian database.
+
+        This updates the internal xapian document with any changes which have
+        been made, and then returns it.
+
+        """
+        if self._data is not None:
+            self._doc.set_data(cPickle.dumps(self._data, 2))
+            self._data = None
+        return self._doc
+
+    def _get_data(self):
+        if self._data is None:
+            rawdata = self._doc.get_data()
+            if rawdata == '':
+                self._data = {}
+            else:
+                self._data = cPickle.loads(rawdata)
+        return self._data
+    def _set_data(self, data):
+        if not isinstance(data, dict):
+            raise TypeError("Cannot set data to any type other than a dict")
+        self._data = data
+    data = property(_get_data, _set_data, doc=
+    """The data stored in this processed document.
+
+    This data is a dictionary of entries, where the key is a fieldname, and the
+    value is a list of strings.
+
+    """)
+
+    def _get_id(self):
+        tl = self._doc.termlist()
+        try:
+            term = tl.skip_to('Q').term
+            if len(term) == 0 or term[0] != 'Q':
+                return None
+        except StopIteration:
+            return None
+        return term[1:]
+    def _set_id(self, id):
+        tl = self._doc.termlist()
+        try:
+            term = tl.skip_to('Q').term
+        except StopIteration:
+            term = ''
+        if len(term) != 0 and term[0] == 'Q':
+            self._doc.remove_term(term)
+        if id is not None:
+            self._doc.add_term('Q' + id, 0)
+    id = property(_get_id, _set_id, doc=
+    """The unique ID for this document.
+
+    """)
+
+    def __repr__(self):
+        return '<ProcessedDocument(%r)>' % (self.id)
+
+if __name__ == '__main__':
+    import doctest, sys
+    doctest.testmod (sys.modules[__name__])
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/datastructures_doctest1.txt	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,30 @@
+
+>>> from fieldmappings import FieldMappings
+>>> maps = FieldMappings()
+
+Make a processed document.
+>>> doc = ProcessedDocument(maps)
+
+>>> print doc.id
+None
+>>> doc.id = '1'
+>>> print doc.id
+1
+>>> doc.id = '_'
+>>> print doc.id
+_
+
+>>> print repr(doc.data)
+{}
+>>> doc.data['foo'] = ['1', '2']
+>>> print repr(doc.data)
+{'foo': ['1', '2']}
+
+
+Adding terms which are too long gives an error straight-away.
+
+>>> maps.add_prefix('foo')
+>>> doc.add_term('foo', 'a' * 250)
+Traceback (most recent call last):
+...
+IndexerError: Field 'foo' is too long: maximum length 220 - was 252 ('XAaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/errors.py	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,87 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2007 Lemur Consulting Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+r"""errors.py: Exceptions for the search engine core.
+
+"""
+__docformat__ = "restructuredtext en"
+
+class SearchEngineError(Exception):
+    r"""Base class for exceptions thrown by the search engine.
+
+    Any errors generated by xappy itself, or by xapian, will be instances of
+    this class or its subclasses.
+
+    """
+
+class IndexerError(SearchEngineError):
+    r"""Class used to report errors relating to the indexing API.
+
+    """
+
+class SearchError(SearchEngineError):
+    r"""Class used to report errors relating to the search API.
+
+    """
+
+
+class XapianError(SearchEngineError):
+    r"""Base class for exceptions thrown by the xapian.
+
+    Any errors generated by xapian will be instances of this class or its
+    subclasses.
+
+    """
+
+def _rebase_xapian_exceptions():
+    """Add new base classes for all the xapian exceptions.
+
+    """
+    import xapian
+    for name in (
+                 'AssertionError',
+                 'DatabaseCorruptError',
+                 'DatabaseCreateError',
+                 'DatabaseError',
+                 'DatabaseLockError',
+                 'DatabaseModifiedError',
+                 'DatabaseOpeningError',
+                 'DatabaseVersionError',
+                 'DocNotFoundError',
+                 # We skip 'Error' because it inherits directly from exception
+                 # and this causes problems with method resolution order.
+                 # However, we probably don't need it anyway, because it's
+                 # just a base class, and shouldn't ever actually be raised.
+                 # Users can catch xappy.XapianError instead.
+                 'FeatureUnavailableError',
+                 'InternalError',
+                 'InvalidArgumentError',
+                 'InvalidOperationError',
+                 'LogicError',
+                 'NetworkError',
+                 'NetworkTimeoutError',
+                 'QueryParserError',
+                 'RangeError',
+                 'RuntimeError',
+                 'UnimplementedError',
+                 ):
+        xapian_exception = getattr(xapian, name, None)
+        if xapian_exception is not None:
+            xapian_exception.__bases__ += (XapianError, )
+            globals()['Xapian' + name] = xapian_exception
+
+_rebase_xapian_exceptions()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/errors_doctest1.txt	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,34 @@
+Xappy exports all the xapian errors as "XapianFooError", corresponding to
+xapian.FooError.  Firstly, we need to test that we can catch one of these
+errors.  Lets play with DatabaseLockError because it's easy to generate.
+
+>>> import xappy
+
+>>> db1 = xappy.IndexerConnection('foo')
+
+>>> try:
+...     db2 = xappy.IndexerConnection('foo')
+... except xappy.XapianDatabaseLockError:
+...     print "Got XapianDatabaseLockError"
+Got XapianDatabaseLockError
+
+
+Xappy also modifies all the Xapian errors so that they inherit from
+xappy.XapianError, so we can catch all Xapian errors this way:
+
+>>> try:
+...     db2 = xappy.IndexerConnection('foo')
+... except xappy.XapianError:
+...     print "Got XapianError"
+Got XapianError
+
+
+xappy.XapianError is a subclass of xappy.SearchEngineError, so all errors from
+xappy can be caught using xappy.SearchEngineError:
+
+>>> try:
+...     db2 = xappy.IndexerConnection('foo')
+... except xappy.SearchEngineError:
+...     print "Got SearchEngineError"
+Got SearchEngineError
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/fieldactions.py	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,432 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2007 Lemur Consulting Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+r"""fieldactions.py: Definitions and implementations of field actions.
+
+"""
+__docformat__ = "restructuredtext en"
+
+import _checkxapian
+import errors
+import marshall
+from replaylog import log
+import xapian
+import parsedate
+
+def _act_store_content(fieldname, doc, value, context):
+    """Perform the STORE_CONTENT action.
+    
+    """
+    try:
+        fielddata = doc.data[fieldname]
+    except KeyError:
+        fielddata = []
+        doc.data[fieldname] = fielddata
+    fielddata.append(value)
+
+def _act_index_exact(fieldname, doc, value, context):
+    """Perform the INDEX_EXACT action.
+    
+    """
+    doc.add_term(fieldname, value, 0)
+
+def _act_tag(fieldname, doc, value, context):
+    """Perform the TAG action.
+    
+    """
+    doc.add_term(fieldname, value.lower(), 0)
+
+def _act_facet(fieldname, doc, value, context, type=None):
+    """Perform the FACET action.
+    
+    """
+    if type is None or type == 'string':
+        value = value.lower()
+        doc.add_term(fieldname, value, 0)
+        serialiser = log(xapian.StringListSerialiser,
+                          doc.get_value(fieldname, 'facet'))
+        serialiser.append(value)
+        doc.add_value(fieldname, serialiser.get(), 'facet')
+    else:
+        marshaller = SortableMarshaller()
+        fn = marshaller.get_marshall_function(fieldname, type)
+        doc.add_value(fieldname, fn(fieldname, value), 'facet')
+
+def _act_index_freetext(fieldname, doc, value, context, weight=1, 
+                        language=None, stop=None, spell=False,
+                        nopos=False,
+                        allow_field_specific=True,
+                        search_by_default=True):
+    """Perform the INDEX_FREETEXT action.
+    
+    """
+    termgen = log(xapian.TermGenerator)
+    if language is not None:
+        termgen.set_stemmer(log(xapian.Stem, language))
+        
+    if stop is not None:
+        stopper = log(xapian.SimpleStopper)
+        for term in stop:
+            stopper.add (term)
+        termgen.set_stopper (stopper)
+
+    if spell:
+        termgen.set_database(context.index)
+        termgen.set_flags(termgen.FLAG_SPELLING)
+    
+    termgen.set_document(doc._doc)
+
+    if search_by_default:
+        termgen.set_termpos(context.current_position)
+        # Store a copy of the field without a prefix, for non-field-specific
+        # searches.
+        if nopos:
+            termgen.index_text_without_positions(value, weight, '')
+        else:
+            termgen.index_text(value, weight, '')
+
+    if allow_field_specific:
+        # Store a second copy of the term with a prefix, for field-specific
+        # searches.
+        prefix = doc._fieldmappings.get_prefix(fieldname)
+        if len(prefix) != 0:
+            termgen.set_termpos(context.current_position)
+            if nopos:
+                termgen.index_text_without_positions(value, weight, prefix)
+            else:
+                termgen.index_text(value, weight, prefix)
+
+    # Add a gap between each field instance, so that phrase searches don't
+    # match across instances.
+    termgen.increase_termpos(10)
+    context.current_position = termgen.get_termpos()
+
+class SortableMarshaller(object):
+    """Implementation of marshalling for sortable values.
+
+    """
+    def __init__(self, indexing=True):
+        if indexing:
+            self._err = errors.IndexerError
+        else:
+            self._err = errors.SearchError
+
+    def marshall_string(self, fieldname, value):
+        """Marshall a value for sorting in lexicograpical order.
+
+        This returns the input as the output, since strings already sort in
+        lexicographical order.
+
+        """
+        return value
+
+    def marshall_float(self, fieldname, value):
+        """Marshall a value for sorting as a floating point value.
+
+        """
+        # convert the value to a float
+        try:
+            value = float(value)
+        except ValueError:
+            raise self._err("Value supplied to field %r must be a "
+                            "valid floating point number: was %r" %
+                            (fieldname, value))
+        return marshall.float_to_string(value)
+
+    def marshall_date(self, fieldname, value):
+        """Marshall a value for sorting as a date.
+
+        """
+        try:
+            value = parsedate.date_from_string(value)
+        except ValueError, e:
+            raise self._err("Value supplied to field %r must be a "
+                            "valid date: was %r: error is '%s'" %
+                            (fieldname, value, str(e)))
+        return marshall.date_to_string(value)
+
+    def get_marshall_function(self, fieldname, sorttype):
+        """Get a function used to marshall values of a given sorttype.
+
+        """
+        try:
+            return {
+                None: self.marshall_string,
+                'string': self.marshall_string,
+                'float': self.marshall_float,
+                'date': self.marshall_date,
+            }[sorttype]
+        except KeyError:
+            raise self._err("Unknown sort type %r for field %r" %
+                            (sorttype, fieldname))
+
+
+def _act_sort_and_collapse(fieldname, doc, value, context, type=None):
+    """Perform the SORTABLE action.
+
+    """
+    marshaller = SortableMarshaller()
+    fn = marshaller.get_marshall_function(fieldname, type)
+    value = fn(fieldname, value)
+    doc.add_value(fieldname, value, 'collsort')
+
+class ActionContext(object):
+    """The context in which an action is performed.
+
+    This is just used to pass term generators, word positions, and the like
+    around.
+
+    """
+    def __init__(self, index):
+        self.current_language = None
+        self.current_position = 0
+        self.index = index
+
+class FieldActions(object):
+    """An object describing the actions to be performed on a field.
+
+    The supported actions are:
+    
+    - `STORE_CONTENT`: store the unprocessed content of the field in the search
+      engine database.  All fields which need to be displayed or used when
+      displaying the search results need to be given this action.
+
+    - `INDEX_EXACT`: index the exact content of the field as a single search
+      term.  Fields whose contents need to be searchable as an "exact match"
+      need to be given this action.
+
+    - `INDEX_FREETEXT`: index the content of this field as text.  The content
+      will be split into terms, allowing free text searching of the field.  Four
+      optional parameters may be supplied:
+
+      - 'weight' is a multiplier to apply to the importance of the field.  This
+        must be an integer, and the default value is 1.
+      - 'language' is the language to use when processing the field.  This can
+        be expressed as an ISO 2-letter language code.  The supported languages
+        are those supported by the xapian core in use.
+      - 'stop' is an iterable of stopwords to filter out of the generated
+        terms.  Note that due to Xapian design, only non-positional terms are
+        affected, so this is of limited use.
+      - 'spell' is a boolean flag - if true, the contents of the field will be
+        used for spelling correction.
+      - 'nopos' is a boolean flag - if true, positional information is not
+        stored.
+      - 'allow_field_specific' is a boolean flag - if False, prevents terms with the field
+        prefix being generated.  This means that searches specific to this
+        field will not work, and thus should only be used when only non-field
+        specific searches are desired.  Defaults to True.
+      - 'search_by_default' is a boolean flag - if False, the field will not be
+        searched by non-field specific searches.  If True, or omitted, the
+        field will be included in searches for non field-specific searches.
+
+    - `SORTABLE`: index the content of the field such that it can be used to
+      sort result sets.  It also allows result sets to be restricted to those
+      documents with a field values in a given range.  One optional parameter
+      may be supplied:
+
+      - 'type' is a value indicating how to sort the field.  It has several
+        possible values:
+
+        - 'string' - sort in lexicographic (ie, alphabetical) order.
+          This is the default, used if no type is set.
+        - 'float' - treat the values as (decimal representations of) floating
+          point numbers, and sort in numerical order.  The values in the field
+          must be valid floating point numbers (according to Python's float()
+          function).
+        - 'date' - sort in date order.  The values must be valid dates (either
+          Python datetime.date objects, or ISO 8601 format (ie, YYYYMMDD or
+          YYYY-MM-DD).
+
+    - `COLLAPSE`: index the content of the field such that it can be used to
+      "collapse" result sets, such that only the highest result with each value
+      of the field will be returned.
+
+    - `TAG`: the field contains tags; these are strings, which will be matched
+      in a case insensitive way, but otherwise must be exact matches.  Tag
+      fields can be searched for by making an explict query (ie, using
+      query_field(), but not with query_parse()).  A list of the most frequent
+      tags in a result set can also be accessed easily.
+
+    - `FACET`: the field represents a classification facet; these are strings
+      which will be matched exactly, but a list of all the facets present in
+      the result set can also be accessed easily - in addition, a suitable
+      subset of the facets, and a selection of the facet values, present in the
+      result set can be calculated.  One optional parameter may be supplied:
+
+      - 'type' is a value indicating the type of facet contained in the field:
+
+        - 'string' - the facet values are exact binary strings.
+        - 'float' - the facet values are floating point numbers.
+
+    """
+
+    # See the class docstring for the meanings of the following constants.
+    STORE_CONTENT = 1
+    INDEX_EXACT = 2
+    INDEX_FREETEXT = 3
+    SORTABLE = 4 
+    COLLAPSE = 5
+    TAG = 6
+    FACET = 7
+
+    # Sorting and collapsing store the data in a value, but the format depends
+    # on the sort type.  Easiest way to implement is to treat them as the same
+    # action.
+    SORT_AND_COLLAPSE = -1
+
+    _unsupported_actions = []
+
+    if 'tags' in _checkxapian.missing_features:
+        _unsupported_actions.append(TAG)
+    if 'facets' in _checkxapian.missing_features:
+        _unsupported_actions.append(FACET)
+
+    def __init__(self, fieldname):
+        # Dictionary of actions, keyed by type.
+        self._actions = {}
+        self._fieldname = fieldname
+
+    def add(self, field_mappings, action, **kwargs):
+        """Add an action to perform on a field.
+
+        """
+        if action in self._unsupported_actions:
+            raise errors.IndexerError("Action unsupported with this release of xapian")
+
+        if action not in (FieldActions.STORE_CONTENT,
+                          FieldActions.INDEX_EXACT,
+                          FieldActions.INDEX_FREETEXT,
+                          FieldActions.SORTABLE,
+                          FieldActions.COLLAPSE,
+                          FieldActions.TAG,
+                          FieldActions.FACET,
+                         ):
+            raise errors.IndexerError("Unknown field action: %r" % action)
+
+        info = self._action_info[action]
+
+        # Check parameter names
+        for key in kwargs.keys():
+            if key not in info[1]:
+                raise errors.IndexerError("Unknown parameter name for action %r: %r" % (info[0], key))
+
+        # Fields cannot be indexed both with "EXACT" and "FREETEXT": whilst we
+        # could implement this, the query parser wouldn't know what to do with
+        # searches.
+        if action == FieldActions.INDEX_EXACT:
+            if FieldActions.INDEX_FREETEXT in self._actions:
+                raise errors.IndexerError("Field %r is already marked for indexing "
+                                   "as free text: cannot mark for indexing "
+                                   "as exact text as well" % self._fieldname)
+        if action == FieldActions.INDEX_FREETEXT:
+            if FieldActions.INDEX_EXACT in self._actions:
+                raise errors.IndexerError("Field %r is already marked for indexing "
+                                   "as exact text: cannot mark for indexing "
+                                   "as free text as well" % self._fieldname)
+
+        # Fields cannot be indexed as more than one type for "SORTABLE": to
+        # implement this, we'd need to use a different prefix for each sortable
+        # type, but even then the search end wouldn't know what to sort on when
+        # searching.  Also, if they're indexed as "COLLAPSE", the value must be
+        # stored in the right format for the type "SORTABLE".
+        if action == FieldActions.SORTABLE or action == FieldActions.COLLAPSE:
+            if action == FieldActions.COLLAPSE:
+                sorttype = None
+            else:
+                try:
+                    sorttype = kwargs['type']
+                except KeyError:
+                    sorttype = 'string'
+            kwargs['type'] = sorttype
+            action = FieldActions.SORT_AND_COLLAPSE
+
+            try:
+                oldsortactions = self._actions[FieldActions.SORT_AND_COLLAPSE]
+            except KeyError:
+                oldsortactions = ()
+
+            if len(oldsortactions) > 0:
+                for oldsortaction in oldsortactions:
+                    oldsorttype = oldsortaction['type']
+
+                if sorttype == oldsorttype or oldsorttype is None:
+                    # Use new type
+                    self._actions[action] = []
+                elif sorttype is None:
+                    # Use old type
+                    return
+                else:
+                    raise errors.IndexerError("Field %r is already marked for "
+                                               "sorting, with a different "
+                                               "sort type" % self._fieldname)
+
+        if 'prefix' in info[3]:
+            field_mappings.add_prefix(self._fieldname)
+        if 'slot' in info[3]:
+            purposes = info[3]['slot']
+            if isinstance(purposes, basestring):
+                field_mappings.add_slot(self._fieldname, purposes)
+            else:
+                slotnum = None
+                for purpose in purposes:
+                    slotnum = field_mappings.get_slot(self._fieldname, purpose)
+                    if slotnum is not None:
+                        break
+                for purpose in purposes:
+                    field_mappings.add_slot(self._fieldname, purpose, slotnum=slotnum)
+
+        # Make an entry for the action
+        if action not in self._actions:
+            self._actions[action] = []
+
+        # Check for repetitions of actions
+        for old_action in self._actions[action]:
+            if old_action == kwargs:
+                return
+
+        # Append the action to the list of actions
+        self._actions[action].append(kwargs)
+
+    def perform(self, doc, value, context):
+        """Perform the actions on the field.
+
+        - `doc` is a ProcessedDocument to store the result of the actions in.
+        - `value` is a string holding the value of the field.
+        - `context` is an ActionContext object used to keep state in.
+
+        """
+        for type, actionlist in self._actions.iteritems():
+            info = self._action_info[type]            
+            for kwargs in actionlist:
+                info[2](self._fieldname, doc, value, context, **kwargs)
+
+    _action_info = {
+        STORE_CONTENT: ('STORE_CONTENT', (), _act_store_content, {}, ),
+        INDEX_EXACT: ('INDEX_EXACT', (), _act_index_exact, {'prefix': True}, ),
+        INDEX_FREETEXT: ('INDEX_FREETEXT', ('weight', 'language', 'stop', 'spell', 'nopos', 'allow_field_specific', 'search_by_default', ), 
+            _act_index_freetext, {'prefix': True, }, ),
+        SORTABLE: ('SORTABLE', ('type', ), None, {'slot': 'collsort',}, ),
+        COLLAPSE: ('COLLAPSE', (), None, {'slot': 'collsort',}, ),
+        TAG: ('TAG', (), _act_tag, {'prefix': True,}, ),
+        FACET: ('FACET', ('type', ), _act_facet, {'prefix': True, 'slot': 'facet',}, ),
+
+        SORT_AND_COLLAPSE: ('SORT_AND_COLLAPSE', ('type', ), _act_sort_and_collapse, {'slot': 'collsort',}, ),
+    }
+
+if __name__ == '__main__':
+    import doctest, sys
+    doctest.testmod (sys.modules[__name__])
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/fieldmappings.py	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,152 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2007 Lemur Consulting Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+r"""fieldmappings.py: Mappings from field names to term prefixes, etc.
+
+"""
+__docformat__ = "restructuredtext en"
+
+import cPickle as _cPickle
+
+class FieldMappings(object):
+    """Mappings from field names to term prefixes, slot values, etc.
+
+    The following mappings are maintained:
+
+    - a mapping from field name to the string prefix to insert at the start of
+      terms.
+    - a mapping from field name to the slot numbers to store the field contents
+      in.
+
+    """
+    __slots__ = '_prefixes', '_prefixcount', '_slots', '_slotcount', 
+
+    def __init__(self, serialised=None):
+        """Create a new field mapping object, or unserialise a saved one.
+
+        """
+        if serialised is not None:
+            (self._prefixes, self._prefixcount,
+             self._slots, self._slotcount) = _cPickle.loads(serialised)
+        else:
+            self._prefixes = {}
+            self._prefixcount = 0
+            self._slots = {}
+            self._slotcount = 0
+
+    def _genPrefix(self):
+        """Generate a previously unused prefix.
+
+        Prefixes are uppercase letters, and start with 'X' (this is a Xapian
+        convention, for compatibility with other Xapian tools: other starting
+        letters are reserved for special meanings):
+
+        >>> maps = FieldMappings()
+        >>> maps._genPrefix()
+        'XA'
+        >>> maps._genPrefix()
+        'XB'
+        >>> [maps._genPrefix() for i in xrange(60)]
+        ['XC', 'XD', 'XE', 'XF', 'XG', 'XH', 'XI', 'XJ', 'XK', 'XL', 'XM', 'XN', 'XO', 'XP', 'XQ', 'XR', 'XS', 'XT', 'XU', 'XV', 'XW', 'XX', 'XY', 'XZ', 'XAA', 'XBA', 'XCA', 'XDA', 'XEA', 'XFA', 'XGA', 'XHA', 'XIA', 'XJA', 'XKA', 'XLA', 'XMA', 'XNA', 'XOA', 'XPA', 'XQA', 'XRA', 'XSA', 'XTA', 'XUA', 'XVA', 'XWA', 'XXA', 'XYA', 'XZA', 'XAB', 'XBB', 'XCB', 'XDB', 'XEB', 'XFB', 'XGB', 'XHB', 'XIB', 'XJB']
+        >>> maps = FieldMappings()
+        >>> [maps._genPrefix() for i in xrange(27*26 + 5)][-10:]
+        ['XVZ', 'XWZ', 'XXZ', 'XYZ', 'XZZ', 'XAAA', 'XBAA', 'XCAA', 'XDAA', 'XEAA']
+        """
+        res = []
+        self._prefixcount += 1
+        num = self._prefixcount
+        while num != 0:
+            ch = (num - 1) % 26
+            res.append(chr(ch + ord('A')))
+            num -= ch
+            num = num // 26
+        return 'X' + ''.join(res)
+
+    def get_fieldname_from_prefix(self, prefix):
+        """Get a fieldname from a prefix.
+
+        If the prefix is not found, return None.
+
+        """
+        for key, val in self._prefixes.iteritems():
+            if val == prefix:
+                return key
+        return None
+
+    def get_prefix(self, fieldname):
+        """Get the prefix used for a given field name.
+
+        """
+        return self._prefixes[fieldname]
+
+    def get_slot(self, fieldname, purpose):
+        """Get the slot number used for a given field name and purpose.
+
+        """
+        return self._slots[(fieldname, purpose)]
+
+    def add_prefix(self, fieldname):
+        """Allocate a prefix for the given field.
+
+        If a prefix is already allocated for this field, this has no effect.
+
+        """
+        if fieldname in self._prefixes:
+            return
+        self._prefixes[fieldname] = self._genPrefix()
+
+    def add_slot(self, fieldname, purpose, slotnum=None):
+        """Allocate a slot number for the given field and purpose.
+
+        If a slot number is already allocated for this field and purpose, this
+        has no effect.
+
+        Returns the slot number allocated for the field and purpose (whether
+        newly allocated, or previously allocated).
+
+        If `slotnum` is supplied, the number contained in it is used to
+        allocate the new slot, instead of allocating a new number.  No checks
+        will be made to ensure that the slot number doesn't collide with
+        existing (or later allocated) numbers: the main purpose of this
+        parameter is to share allocations - ie, to collide deliberately.
+
+        """
+        try:
+            return self._slots[(fieldname, purpose)]
+        except KeyError:
+            pass
+
+        if slotnum is None:
+            self._slots[(fieldname, purpose)] = self._slotcount
+            self._slotcount += 1
+            return self._slotcount - 1
+        else:
+            self._slots[(fieldname, purpose)] = slotnum
+            return slotnum
+
+    def serialise(self):
+        """Serialise the field mappings to a string.
+
+        This can be unserialised by passing the result of this method to the
+        constructor of a new FieldMappings object.
+
+        """
+        return _cPickle.dumps((self._prefixes,
+                               self._prefixcount,
+                               self._slots,
+                               self._slotcount,
+                              ), 2)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/fieldmappings_doctest1.txt	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,9 @@
+
+
+Test basic workings of the FieldMappings.
+
+>>> maps = FieldMappings()
+>>> maps.get_fieldname_from_prefix('XA')
+>>> maps.add_prefix('foo')
+>>> maps.get_fieldname_from_prefix('XA')
+'foo'
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/highlight.py	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,314 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2007 Lemur Consulting Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+r"""highlight.py: Highlight and summarise text.
+
+"""
+__docformat__ = "restructuredtext en"
+
+import re
+import xapian
+
+class Highlighter(object):
+    """Class for highlighting text and creating contextual summaries.
+
+    >>> hl = Highlighter("en")
+    >>> hl.makeSample('Hello world.', ['world'])
+    'Hello world.'
+    >>> hl.highlight('Hello world', ['world'], ('<', '>'))
+    'Hello <world>'
+
+    """
+
+    # split string into words, spaces, punctuation and markup tags
+    _split_re = re.compile(r'<\w+[^>]*>|</\w+>|[\w\']+|\s+|[^\w\'\s<>/]+')
+
+    def __init__(self, language_code='en', stemmer=None):
+        """Create a new highlighter for the specified language.
+
+        """
+        if stemmer is not None:
+            self.stem = stemmer
+        else:
+            self.stem = xapian.Stem(language_code)
+
+    def _split_text(self, text, strip_tags=False):
+        """Split some text into words and non-words.
+
+        - `text` is the text to process.  It may be a unicode object or a utf-8
+          encoded simple string.
+        - `strip_tags` is a flag - False to keep tags, True to strip all tags
+          from the output.
+
+        Returns a list of utf-8 encoded simple strings.
+
+        """
+        if isinstance(text, unicode):
+            text = text.encode('utf-8')
+
+        words = self._split_re.findall(text)
+        if strip_tags:
+            return [w for w in words if w[0] != '<']
+        else:
+            return words
+
+    def _strip_prefix(self, term):
+        """Strip the prefix off a term.
+
+        Prefixes are any initial capital letters, with the exception that R always
+        ends a prefix, even if followed by capital letters.
+
+        >>> hl = Highlighter("en")
+        >>> print hl._strip_prefix('hello')
+        hello
+        >>> print hl._strip_prefix('Rhello')
+        hello
+        >>> print hl._strip_prefix('XARHello')
+        Hello
+        >>> print hl._strip_prefix('XAhello')
+        hello
+        >>> print hl._strip_prefix('XAh')
+        h
+        >>> print hl._strip_prefix('XA')
+        <BLANKLINE>
+
+        """
+        for p in xrange(len(term)):
+            if term[p].islower():
+                return term[p:]
+            elif term[p] == 'R':
+                return term[p+1:]
+        return ''
+
+    def _query_to_stemmed_words(self, query):
+        """Convert a query to a list of stemmed words.
+
+        - `query` is the query to parse: it may be xapian.Query object, or a
+          sequence of terms.
+
+        """
+        if isinstance(query, xapian.Query):
+            return [self._strip_prefix(t) for t in query]
+        else:
+            return [self.stem(q.lower()) for q in query]
+
+
+    def makeSample(self, text, query, maxlen=600, hl=None):
+        """Make a contextual summary from the supplied text.
+
+        This basically works by splitting the text into phrases, counting the query
+        terms in each, and keeping those with the most.
+
+        Any markup tags in the text will be stripped.
+
+        `text` is the source text to summarise.
+        `query` is either a Xapian query object or a list of (unstemmed) term strings.
+        `maxlen` is the maximum length of the generated summary.
+        `hl` is a pair of strings to insert around highlighted terms, e.g. ('<b>', '</b>')
+
+        """
+
+        # coerce maxlen into an int, otherwise truncation doesn't happen
+        maxlen = int(maxlen)
+
+        words = self._split_text(text, True)
+        terms = self._query_to_stemmed_words(query)
+        
+        # build blocks delimited by puncuation, and count matching words in each block
+        # blocks[n] is a block [firstword, endword, charcount, termcount, selected]
+        blocks = []
+        start = end = count = blockchars = 0
+
+        while end < len(words):
+            blockchars += len(words[end])
+            if words[end].isalnum():
+                if self.stem(words[end].lower()) in terms:
+                    count += 1
+                end += 1
+            elif words[end] in ',.;:?!\n':
+                end += 1
+                blocks.append([start, end, blockchars, count, False])
+                start = end
+                blockchars = 0
+                count = 0
+            else:
+                end += 1
+        if start != end:
+            blocks.append([start, end, blockchars, count, False])
+        if len(blocks) == 0:
+            return ''
+
+        # select high-scoring blocks first, down to zero-scoring
+        chars = 0
+        for count in xrange(3, -1, -1):
+            for b in blocks:
+                if b[3] >= count:
+                    b[4] = True
+                    chars += b[2]
+                    if chars >= maxlen: break
+            if chars >= maxlen: break
+
+        # assemble summary
+        words2 = []
+        lastblock = -1
+        for i, b in enumerate(blocks):
+            if b[4]:
+                if i != lastblock + 1:
+                    words2.append('..')
+                words2.extend(words[b[0]:b[1]])
+                lastblock = i
+
+        if not blocks[-1][4]:
+            words2.append('..')
+
+        # trim down to maxlen
+        l = 0
+        for i in xrange (len (words2)):
+            l += len (words2[i])
+            if l >= maxlen:
+                words2[i:] = ['..']
+                break
+
+        if hl is None:
+            return ''.join(words2)
+        else:
+            return self._hl(words2, terms, hl)
+
+    def highlight(self, text, query, hl, strip_tags=False):
+        """Add highlights (string prefix/postfix) to a string.
+
+        `text` is the source to highlight.
+        `query` is either a Xapian query object or a list of (unstemmed) term strings.
+        `hl` is a pair of highlight strings, e.g. ('<i>', '</i>')
+        `strip_tags` strips HTML markout iff True
+
+        >>> hl = Highlighter()
+        >>> qp = xapian.QueryParser()
+        >>> q = qp.parse_query('cat dog')
+        >>> tags = ('[[', ']]')
+        >>> hl.highlight('The cat went Dogging; but was <i>dog tired</i>.', q, tags)
+        'The [[cat]] went [[Dogging]]; but was <i>[[dog]] tired</i>.'
+
+        """
+        words = self._split_text(text, strip_tags)
+        terms = self._query_to_stemmed_words(query)
+        return self._hl(words, terms, hl)
+
+    def _hl(self, words, terms, hl):
+        """Add highlights to a list of words.
+        
+        `words` is the list of words and non-words to be highlighted..
+        `terms` is the list of stemmed words to look for.
+
+        """
+        for i, w in enumerate(words):
+            # HACK - more forgiving about stemmed terms 
+            wl = w.lower()
+            if wl in terms or self.stem (wl) in terms:
+                words[i] = ''.join((hl[0], w, hl[1]))
+
+        return ''.join(words)
+
+
+__test__ = {
+    'no_punc': r'''
+
+    Test the highlighter's behaviour when there is no punctuation in the sample
+    text (regression test - used to return no output):
+    >>> hl = Highlighter("en")
+    >>> hl.makeSample('Hello world', ['world'])
+    'Hello world'
+
+    ''',
+
+    'stem_levels': r'''
+
+    Test highlighting of words, and how it works with stemming:
+    >>> hl = Highlighter("en")
+
+    # "word" and "wording" stem to "word", so the following 4 calls all return
+    # the same thing
+    >>> hl.makeSample('Hello. word. wording. wordinging.', ['word'], hl='<>')
+    'Hello. <word>. <wording>. wordinging.'
+    >>> hl.highlight('Hello. word. wording. wordinging.', ['word'], '<>')
+    'Hello. <word>. <wording>. wordinging.'
+    >>> hl.makeSample('Hello. word. wording. wordinging.', ['wording'], hl='<>')
+    'Hello. <word>. <wording>. wordinging.'
+    >>> hl.highlight('Hello. word. wording. wordinging.', ['wording'], '<>')
+    'Hello. <word>. <wording>. wordinging.'
+
+    # "wordinging" stems to "wording", so only the last two words are
+    # highlighted for this one.
+    >>> hl.makeSample('Hello. word. wording. wordinging.', ['wordinging'], hl='<>')
+    'Hello. word. <wording>. <wordinging>.'
+    >>> hl.highlight('Hello. word. wording. wordinging.', ['wordinging'], '<>')
+    'Hello. word. <wording>. <wordinging>.'
+    ''',
+
+    'supplied_stemmer': r'''
+
+    Test behaviour if we pass in our own stemmer:
+    >>> stem = xapian.Stem('en')
+    >>> hl = Highlighter(stemmer=stem)
+    >>> hl.highlight('Hello. word. wording. wordinging.', ['word'], '<>')
+    'Hello. <word>. <wording>. wordinging.'
+
+    ''',
+
+    'unicode': r'''
+
+    Test behaviour if we pass in unicode input:
+    >>> hl = Highlighter('en')
+    >>> hl.highlight(u'Hello\xf3. word. wording. wordinging.', ['word'], '<>')
+    'Hello\xc3\xb3. <word>. <wording>. wordinging.'
+
+    ''',
+
+    'no_sample': r'''
+
+    Test behaviour if we pass in unicode input:
+    >>> hl = Highlighter('en')
+    >>> hl.makeSample(u'', ['word'])
+    ''
+
+    ''',
+
+    'short_samples': r'''
+
+    >>> hl = Highlighter('en')
+    >>> hl.makeSample("A boring start.  Hello world indeed.  A boring end.", ['hello'], 20, ('<', '>'))
+    '..  <Hello> world ..'
+    >>> hl.makeSample("A boring start.  Hello world indeed.  A boring end.", ['hello'], 40, ('<', '>'))
+    'A boring start.  <Hello> world indeed...'
+    >>> hl.makeSample("A boring start.  Hello world indeed.  A boring end.", ['boring'], 40, ('<', '>'))
+    'A <boring> start...  A <boring> end.'
+
+    ''',
+
+    'apostrophes': r'''
+
+    >>> hl = Highlighter('en')
+    >>> hl.makeSample("A boring start.  Hello world's indeed.  A boring end.", ['world'], 40, ('<', '>'))
+    "A boring start.  Hello <world's> indeed..."
+
+    ''',
+
+}
+
+if __name__ == '__main__':
+    import doctest, sys
+    doctest.testmod (sys.modules[__name__])
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/highlight_doctest1.txt	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,3 @@
+>>> teststr = r''''Python Tutorial Previous: 9. Classes Up: Python Tutorial Next: 11. Brief Tour of Subsections 10.1 Operating System Interface 10.2 File Wildcards 10.3 Command Line Arguments 10.4 Error Output Redirection and Program Termination 10.5 String Pattern Matching 10.6 Mathematics 10.7 Internet Access 10.8 Dates and Times 10.9 Data Compression 10.10 Performance Measurement 10.11 Quality Control 10.12 Batteries Included 10. Brief Tour of the Standard Library 10.1 Operating System Interface The os module provides dozens of functions for interacting with the operating system: >>> import os >>> os.system(\'time 0:02\') 0 >>> os.getcwd() # Return the current working directory \'C:\\\\Python25\' >>> os.chdir(\'/server/accesslogs\') Be sure to use the "import os" style instead of "from os import *". This will keep os.open() from shadowing the builtin open() function which operates much differently. The builtin dir() and help() functions are useful as interactive aids for working with large modules like os: >>> import os >>> dir(os) <returns a list of all module functions> >>> help(os) <returns an extensive manual page created from the module\'s docstrings> For daily file and directory management tasks, the shutil module provides a higher level interface that is easier to use: >>> import shutil >>> shutil.copyfile(\'data.db\', \'archive.db\') >>> shutil.move(\'/build/executables\', \'installdir\') 10.2 File Wildcards The glob module provides a function for making file lists from directory wildcard searches: >>> import glob >>> glob.glob(\'*.py\') [\'primes.py\', \'random.py\', \'quote.py\'] 10.3 Command Line Arguments Common utility scripts often need to process command line arguments. These arguments are stored in the sys module\'s argv attribute as a list. For instance the following output results from running "python demo.py one two three" at the command line: >>> import sys >>> print sys.argv [\'demo.py\', \'one\', \'two\', \'three\'] The getopt module processes sys.argv using the conventions of the Unix getopt() function. More powerful and flexible command line processing is provided by the optparse module. 10.4 Error Output Redirection and Program Termination The sys module also has attributes for stdin, stdout, and stderr. The latter is useful for emitting warnings and error messages to make them visible even when stdout has been redirected: >>> sys.stderr.write(\'Warning, log file not found starting a new one\\n\') Warning, log file not found starting a new one The most direct way to terminate a script is to use "sys.exit()". 10.5 String Pattern Matching The re module provides regular expression tools for advanced string processing. For complex matching and manipulation, regular expressions offer succinct, optimized solutions: >>> import re >>> re.findall(r\'\\bf[a-z]*\', \'which foot or hand fell fastest\') [\'foot\', \'fell\', \'fastest\'] >>> re.sub(r\'(\\b[a-z]+) \\1\', r\'\\1\', \'cat in the the hat\') \'cat in the hat\' When only simple capabilities are needed, string methods are preferred because they are easier to read and debug: >>> \'tea for too\'.replace(\'too\', \'two\') \'tea for two\' 10.6 Mathematics The math module gives access to the underlying C library functions for floating point math: >>> import math >>> math.cos(math.pi / 4.0) 0.70710678118654757 >>> math.log(1024, 2) 10.0 The random module provides tools for making random selections: >>> import random >>> random.choice([\'apple\', \'pear\', \'banana\']) \'apple\' >>> random.sample(xrange(100), 10) # sampling without replacement [30, 83, 16, 4, 8, 81, 41, 50, 18, 33] >>> random.random() # random float 0.17970987693706186 >>> random.randrange(6) # random integer chosen from range(6) 4 10.7 Internet Access There are a number of modules for accessing the internet and processing internet protocols. Two of the simplest are urllib2 for retrieving data from urls and smtplib for sending mail: >>> import urllib2 >>> for line in urllib2.urlopen(\'http://tycho.usno.navy.mil/cgi-bin/timer.pl\'): ... if \'EST\' in line or \'EDT\' in line: # look for Eastern Time ... print line <BR>Nov. 25, 09:43:32 PM EST >>> import smtplib >>> server = smtplib.SMTP(\'localhost\') >>> server.sendmail(\'soothsayer@example.org\', \'jcaesar@example.org\', """To: jcaesar@example.org From: soothsayer@example.org Beware the Ides of March. """) >>> server.quit() 10.8 Dates and Times The datetime module supplies classes for manipulating dates and times in both simple and complex ways. While date and time arithmetic is supported, the focus of the implementation is on efficient member extraction for output formatting and manipulation. The module also supports objects that are timezone aware. # dates are easily constructed and formatted >>> from datetime import date >>> now = date.today() >>> now datetime.date(2003, 12, 2) >>> now.strftime("%m-%d-%y. %d %b %Y is a %A on the %d day of %B.") \'12-02-03. 02 Dec 2003 is a Tuesday on the 02 day of December.\' # dates support calendar arithmetic >>> birthday = date(1964, 7, 31) >>> age = now - birthday >>> age.days 14368 10.9 Data Compression Common data archiving and compression formats are directly supported by modules including: zlib, gzip, bz2, zipfile, and tarfile. >>> import zlib >>> s = \'witch which has which witches wrist watch\' >>> len(s) 41 >>> t = zlib.compress(s) >>> len(t) 37 >>> zlib.decompress(t) \'witch which has which witches wrist watch\' >>> zlib.crc32(s) 226805979 10.10 Performance Measurement Some Python users develop a deep interest in knowing the relative performance of different approaches to the same problem. Python provides a measurement tool that answers those questions immediately. For example, it may be tempting to use the tuple packing and unpacking feature instead of the traditional approach to swapping arguments. The timeit module quickly demonstrates a modest performance advantage: >>> from timeit import Timer >>> Timer(\'t=a; a=b; b=t\', \'a=1; b=2\').timeit() 0.57535828626024577 >>> Timer(\'a,b = b,a\', \'a=1; b=2\').timeit() 0.54962537085770791 In contrast to timeit\'s fine level of granularity, the profile and pstats modules provide tools for identifying time critical sections in larger blocks of code. 10.11 Quality Control One approach for developing high quality software is to write tests for each function as it is developed and to run those tests frequently during the development process. The doctest module provides a tool for scanning a module and validating tests embedded in a program\'s docstrings. Test construction is as simple as cutting-and-pasting a typical call along with its results into the docstring. This improves the documentation by providing the user with an example and it allows the doctest module to make sure the code remains true to the documentation: def average(values): """Computes the arithmetic mean of a list of numbers. >>> print average([20, 30, 70]) 40.0 """ return sum(values, 0.0) / len(values) import doctest doctest.testmod() # automatically validate the embedded tests The unittest module is not as effortless as the doctest module, but it allows a more comprehensive set of tests to be maintained in a separate file: import unittest class TestStatisticalFunctions(unittest.TestCase): def test_average(self): self.assertEqual(average([20, 30, 70]), 40.0) self.assertEqual(round(average([1, 5, 7]), 1), 4.3) self.assertRaises(ZeroDivisionError, average, []) self.assertRaises(TypeError, average, 20, 30, 70) unittest.main() # Calling from the command line invokes all tests 10.12 Batteries Included Python has a ``batteries included\'\' philosophy. This is best seen through the sophisticated and robust capabilities of its larger packages. For example: The xmlrpclib and SimpleXMLRPCServer modules make implementing remote procedure calls into an almost trivial task. Despite the modules names, no direct knowledge or handling of XML is needed. The email package is a library for managing email messages, including MIME and other RFC 2822-based message documents. Unlike smtplib and poplib which actually send and receive messages, the email package has a complete toolset for building or decoding complex message structures (including attachments) and for implementing internet encoding and header protocols. The xml.dom and xml.sax packages provide robust support for parsing this popular data interchange format. Likewise, the csv module supports direct reads and writes in a common database format. Together, these modules and packages greatly simplify data interchange between python applications and other tools. Internationalization is supported by a number of modules including gettext, locale, and the codecs package. Python Tutorial Previous: 9. Classes Up: Python Tutorial Next: 11. Br'''
+>>> hl = Highlighter()
+>>> out = hl.highlight(teststr, ('print',), ('<i>', '</i>'))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/indexerconnection.py	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,823 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2007 Lemur Consulting Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+r"""indexerconnection.py: A connection to the search engine for indexing.
+
+"""
+__docformat__ = "restructuredtext en"
+
+import _checkxapian
+import cPickle
+import xapian
+
+from datastructures import *
+import errors
+from fieldactions import *
+import fieldmappings
+import memutils
+from replaylog import log
+
+class IndexerConnection(object):
+    """A connection to the search engine for indexing.
+
+    """
+
+    def __init__(self, indexpath):
+        """Create a new connection to the index.
+
+        There may only be one indexer connection for a particular database open
+        at a given time.  Therefore, if a connection to the database is already
+        open, this will raise a xapian.DatabaseLockError.
+
+        If the database doesn't already exist, it will be created.
+
+        """
+        self._index = log(xapian.WritableDatabase, indexpath, xapian.DB_CREATE_OR_OPEN)
+        self._indexpath = indexpath
+
+        # Read existing actions.
+        self._field_actions = {}
+        self._field_mappings = fieldmappings.FieldMappings()
+        self._facet_hierarchy = {}
+        self._facet_query_table = {}
+        self._next_docid = 0
+        self._config_modified = False
+        self._load_config()
+
+        # Set management of the memory used.
+        # This can be removed once Xapian implements this itself.
+        self._mem_buffered = 0
+        self.set_max_mem_use()
+
+    def set_max_mem_use(self, max_mem=None, max_mem_proportion=None):
+        """Set the maximum memory to use.
+
+        This call allows the amount of memory to use to buffer changes to be
+        set.  This will affect the speed of indexing, but should not result in
+        other changes to the indexing.
+
+        Note: this is an approximate measure - the actual amount of memory used
+        max exceed the specified amount.  Also, note that future versions of
+        xapian are likely to implement this differently, so this setting may be
+        entirely ignored.
+
+        The absolute amount of memory to use (in bytes) may be set by setting
+        max_mem.  Alternatively, the proportion of the available memory may be
+        set by setting max_mem_proportion (this should be a value between 0 and
+        1).
+
+        Setting too low a value will result in excessive flushing, and very
+        slow indexing.  Setting too high a value will result in excessive
+        buffering, leading to swapping, and very slow indexing.
+
+        A reasonable default for max_mem_proportion for a system which is
+        dedicated to indexing is probably 0.5: if other tasks are also being
+        performed on the system, the value should be lowered.
+
+        """
+        if self._index is None:
+            raise errors.IndexerError("IndexerConnection has been closed")
+        if max_mem is not None and max_mem_proportion is not None:
+            raise errors.IndexerError("Only one of max_mem and "
+                                       "max_mem_proportion may be specified")
+
+        if max_mem is None and max_mem_proportion is None:
+            self._max_mem = None
+
+        if max_mem_proportion is not None:
+            physmem = memutils.get_physical_memory()
+            if physmem is not None:
+                max_mem = int(physmem * max_mem_proportion)
+
+        self._max_mem = max_mem
+
+    def _store_config(self):
+        """Store the configuration for the database.
+
+        Currently, this stores the configuration in a file in the database
+        directory, so changes to it are not protected by transactions.  When
+        support is available in xapian for storing metadata associated with
+        databases. this will be used instead of a file.
+
+        """
+        assert self._index is not None
+
+        config_str = cPickle.dumps((
+                                     self._field_actions,
+                                     self._field_mappings.serialise(),
+                                     self._facet_hierarchy,
+                                     self._facet_query_table,
+                                     self._next_docid,
+                                    ), 2)
+        log(self._index.set_metadata, '_xappy_config', config_str)
+
+        self._config_modified = False
+
+    def _load_config(self):
+        """Load the configuration for the database.
+
+        """
+        assert self._index is not None
+
+        config_str = log(self._index.get_metadata, '_xappy_config')
+        if len(config_str) == 0:
+            return
+
+        try:
+            (self._field_actions, mappings, self._facet_hierarchy, self._facet_query_table, self._next_docid) = cPickle.loads(config_str)
+        except ValueError:
+            # Backwards compatibility - configuration used to lack _facet_hierarchy and _facet_query_table
+            (self._field_actions, mappings, self._next_docid) = cPickle.loads(config_str)
+            self._facet_hierarchy = {}
+            self._facet_query_table = {}
+        self._field_mappings = fieldmappings.FieldMappings(mappings)
+
+        self._config_modified = False
+
+    def _allocate_id(self):
+        """Allocate a new ID.
+
+        """
+        while True:
+            idstr = "%x" % self._next_docid
+            self._next_docid += 1
+            if not self._index.term_exists('Q' + idstr):
+                break
+        self._config_modified = True
+        return idstr
+
+    def add_field_action(self, fieldname, fieldtype, **kwargs):
+        """Add an action to be performed on a field.
+
+        Note that this change to the configuration will not be preserved on
+        disk until the next call to flush().
+
+        """
+        if self._index is None:
+            raise errors.IndexerError("IndexerConnection has been closed")
+        if fieldname in self._field_actions:
+            actions = self._field_actions[fieldname]
+        else:
+            actions = FieldActions(fieldname)
+            self._field_actions[fieldname] = actions
+        actions.add(self._field_mappings, fieldtype, **kwargs)
+        self._config_modified = True
+
+    def clear_field_actions(self, fieldname):
+        """Clear all actions for the specified field.
+
+        This does not report an error if there are already no actions for the
+        specified field.
+
+        Note that this change to the configuration will not be preserved on
+        disk until the next call to flush().
+
+        """
+        if self._index is None:
+            raise errors.IndexerError("IndexerConnection has been closed")
+        if fieldname in self._field_actions:
+            del self._field_actions[fieldname]
+            self._config_modified = True
+
+    def get_fields_with_actions(self):
+        """Get a list of field names which have actions defined.
+
+        """
+        if self._index is None:
+            raise errors.IndexerError("IndexerConnection has been closed")
+        return self._field_actions.keys()
+
+    def process(self, document):
+        """Process an UnprocessedDocument with the settings in this database.
+
+        The resulting ProcessedDocument is returned.
+
+        Note that this processing will be automatically performed if an
+        UnprocessedDocument is supplied to the add() or replace() methods of
+        IndexerConnection.  This method is exposed to allow the processing to
+        be performed separately, which may be desirable if you wish to manually
+        modify the processed document before adding it to the database, or if
+        you want to split processing of documents from adding documents to the
+        database for performance reasons.
+
+        """
+        if self._index is None:
+            raise errors.IndexerError("IndexerConnection has been closed")
+        result = ProcessedDocument(self._field_mappings)
+        result.id = document.id
+        context = ActionContext(self._index)
+
+        for field in document.fields:
+            try:
+                actions = self._field_actions[field.name]
+            except KeyError:
+                # If no actions are defined, just ignore the field.
+                continue
+            actions.perform(result, field.value, context)
+
+        return result
+
+    def _get_bytes_used_by_doc_terms(self, xapdoc):
+        """Get an estimate of the bytes used by the terms in a document.
+
+        (This is a very rough estimate.)
+
+        """
+        count = 0
+        for item in xapdoc.termlist():
+            # The term may also be stored in the spelling correction table, so
+            # double the amount used.
+            count += len(item.term) * 2
+
+            # Add a few more bytes for holding the wdf, and other bits and
+            # pieces.
+            count += 8
+
+        # Empirical observations indicate that about 5 times as much memory as
+        # the above calculation predicts is used for buffering in practice.
+        return count * 5
+
+    def add(self, document):
+        """Add a new document to the search engine index.
+
+        If the document has a id set, and the id already exists in
+        the database, an exception will be raised.  Use the replace() method
+        instead if you wish to overwrite documents.
+
+        Returns the id of the newly added document (making up a new
+        unique ID if no id was set).
+
+        The supplied document may be an instance of UnprocessedDocument, or an
+        instance of ProcessedDocument.
+
+        """
+        if self._index is None:
+            raise errors.IndexerError("IndexerConnection has been closed")
+        if not hasattr(document, '_doc'):
+            # It's not a processed document.
+            document = self.process(document)
+
+        # Ensure that we have a id
+        orig_id = document.id
+        if orig_id is None:
+            id = self._allocate_id()
+            document.id = id
+        else:
+            id = orig_id
+            if self._index.term_exists('Q' + id):
+                raise errors.IndexerError("Document ID of document supplied to add() is not unique.")
+            
+        # Add the document.
+        xapdoc = document.prepare()
+        self._index.add_document(xapdoc)
+
+        if self._max_mem is not None:
+            self._mem_buffered += self._get_bytes_used_by_doc_terms(xapdoc)
+            if self._mem_buffered > self._max_mem:
+                self.flush()
+
+        if id is not orig_id:
+            document.id = orig_id
+        return id
+
+    def replace(self, document):
+        """Replace a document in the search engine index.
+
+        If the document does not have a id set, an exception will be
+        raised.
+
+        If the document has a id set, and the id does not already
+        exist in the database, this method will have the same effect as add().
+
+        """
+        if self._index is None:
+            raise errors.IndexerError("IndexerConnection has been closed")
+        if not hasattr(document, '_doc'):
+            # It's not a processed document.
+            document = self.process(document)
+
+        # Ensure that we have a id
+        id = document.id
+        if id is None:
+            raise errors.IndexerError("No document ID set for document supplied to replace().")
+
+        xapdoc = document.prepare()
+        self._index.replace_document('Q' + id, xapdoc)
+
+        if self._max_mem is not None:
+            self._mem_buffered += self._get_bytes_used_by_doc_terms(xapdoc)
+            if self._mem_buffered > self._max_mem:
+                self.flush()
+
+    def _make_synonym_key(self, original, field):
+        """Make a synonym key (ie, the term or group of terms to store in
+        xapian).
+
+        """
+        if field is not None:
+            prefix = self._field_mappings.get_prefix(field)
+        else:
+            prefix = ''
+        original = original.lower()
+        # Add the prefix to the start of each word.
+        return ' '.join((prefix + word for word in original.split(' ')))
+
+    def add_synonym(self, original, synonym, field=None,
+                    original_field=None, synonym_field=None):
+        """Add a synonym to the index.
+
+         - `original` is the word or words which will be synonym expanded in
+           searches (if multiple words are specified, each word should be
+           separated by a single space).
+         - `synonym` is a synonym for `original`.
+         - `field` is the field which the synonym is specific to.  If no field
+           is specified, the synonym will be used for searches which are not
+           specific to any particular field.
+
+        """
+        if self._index is None:
+            raise errors.IndexerError("IndexerConnection has been closed")
+        if original_field is None:
+            original_field = field
+        if synonym_field is None:
+            synonym_field = field
+        key = self._make_synonym_key(original, original_field)
+        # FIXME - this only works for exact fields which have no upper case
+        # characters, or single words
+        value = self._make_synonym_key(synonym, synonym_field)
+        self._index.add_synonym(key, value)
+
+    def remove_synonym(self, original, synonym, field=None):
+        """Remove a synonym from the index.
+
+         - `original` is the word or words which will be synonym expanded in
+           searches (if multiple words are specified, each word should be
+           separated by a single space).
+         - `synonym` is a synonym for `original`.
+         - `field` is the field which this synonym is specific to.  If no field
+           is specified, the synonym will be used for searches which are not
+           specific to any particular field.
+
+        """
+        if self._index is None:
+            raise errors.IndexerError("IndexerConnection has been closed")
+        key = self._make_synonym_key(original, field)
+        self._index.remove_synonym(key, synonym.lower())
+
+    def clear_synonyms(self, original, field=None):
+        """Remove all synonyms for a word (or phrase).
+
+         - `field` is the field which this synonym is specific to.  If no field
+           is specified, the synonym will be used for searches which are not
+           specific to any particular field.
+
+        """
+        if self._index is None:
+            raise errors.IndexerError("IndexerConnection has been closed")
+        key = self._make_synonym_key(original, field)
+        self._index.clear_synonyms(key)
+
+    def _assert_facet(self, facet):
+        """Raise an error if facet is not a declared facet field.
+
+        """
+        for action in self._field_actions[facet]._actions:
+            if action == FieldActions.FACET:
+                return
+        raise errors.IndexerError("Field %r is not indexed as a facet" % facet)
+
+    def add_subfacet(self, subfacet, facet):
+        """Add a subfacet-facet relationship to the facet hierarchy.
+        
+        Any existing relationship for that subfacet is replaced.
+
+        Raises a KeyError if either facet or subfacet is not a field,
+        and an IndexerError if either facet or subfacet is not a facet field.
+        """
+        if self._index is None:
+            raise errors.IndexerError("IndexerConnection has been closed")
+        self._assert_facet(facet)
+        self._assert_facet(subfacet)
+        self._facet_hierarchy[subfacet] = facet
+        self._config_modified = True
+
+    def remove_subfacet(self, subfacet):
+        """Remove any existing facet hierarchy relationship for a subfacet.
+
+        """
+        if self._index is None:
+            raise errors.IndexerError("IndexerConnection has been closed")
+        if subfacet in self._facet_hierarchy:
+            del self._facet_hierarchy[subfacet]
+            self._config_modified = True
+
+    def get_subfacets(self, facet):
+        """Get a list of subfacets of a facet.
+
+        """
+        if self._index is None:
+            raise errors.IndexerError("IndexerConnection has been closed")
+        return [k for k, v in self._facet_hierarchy.iteritems() if v == facet] 
+
+    FacetQueryType_Preferred = 1;
+    FacetQueryType_Never = 2;
+    def set_facet_for_query_type(self, query_type, facet, association):
+        """Set the association between a query type and a facet.
+
+        The value of `association` must be one of
+        IndexerConnection.FacetQueryType_Preferred,
+        IndexerConnection.FacetQueryType_Never or None. A value of None removes
+        any previously set association.
+
+        """
+        if self._index is None:
+            raise errors.IndexerError("IndexerConnection has been closed")
+        if query_type is None:
+            raise errors.IndexerError("Cannot set query type information for None")
+        self._assert_facet(facet)
+        if query_type not in self._facet_query_table:
+            self._facet_query_table[query_type] = {}
+        if association is None:
+            if facet in self._facet_query_table[query_type]:
+                del self._facet_query_table[query_type][facet]
+        else:
+            self._facet_query_table[query_type][facet] = association;
+        if self._facet_query_table[query_type] == {}:
+            del self._facet_query_table[query_type]
+        self._config_modified = True
+
+    def get_facets_for_query_type(self, query_type, association):
+        """Get the set of facets associated with a query type.
+
+        Only those facets associated with the query type in the specified
+        manner are returned; `association` must be one of
+        IndexerConnection.FacetQueryType_Preferred or
+        IndexerConnection.FacetQueryType_Never.
+
+        If the query type has no facets associated with it, None is returned.
+
+        """
+        if self._index is None:
+            raise errors.IndexerError("IndexerConnection has been closed")
+        if query_type not in self._facet_query_table:
+            return None
+        facet_dict = self._facet_query_table[query_type]
+        return set([facet for facet, assoc in facet_dict.iteritems() if assoc == association])
+
+    def set_metadata(self, key, value):
+        """Set an item of metadata stored in the connection.
+
+        The value supplied will be returned by subsequent calls to
+        get_metadata() which use the same key.
+
+        Keys with a leading underscore are reserved for internal use - you
+        should not use such keys unless you really know what you are doing.
+
+        This will store the value supplied in the database.  It will not be
+        visible to readers (ie, search connections) until after the next flush.
+
+        The key is limited to about 200 characters (the same length as a term
+        is limited to).  The value can be several megabytes in size.
+
+        To remove an item of metadata, simply call this with a `value`
+        parameter containing an empty string.
+
+        """
+        if self._index is None:
+            raise errors.IndexerError("IndexerConnection has been closed")
+        if not hasattr(self._index, 'set_metadata'):
+            raise errors.IndexerError("Version of xapian in use does not support metadata")
+        log(self._index.set_metadata, key, value)
+
+    def get_metadata(self, key):
+        """Get an item of metadata stored in the connection.
+
+        This returns a value stored by a previous call to set_metadata.
+
+        If the value is not found, this will return the empty string.
+
+        """
+        if self._index is None:
+            raise errors.IndexerError("IndexerConnection has been closed")
+        if not hasattr(self._index, 'get_metadata'):
+            raise errors.IndexerError("Version of xapian in use does not support metadata")
+        return log(self._index.get_metadata, key)
+
+    def delete(self, id):
+        """Delete a document from the search engine index.
+
+        If the id does not already exist in the database, this method
+        will have no effect (and will not report an error).
+
+        """
+        if self._index is None:
+            raise errors.IndexerError("IndexerConnection has been closed")
+        self._index.delete_document('Q' + id)
+
+    def flush(self):
+        """Apply recent changes to the database.
+
+        If an exception occurs, any changes since the last call to flush() may
+        be lost.
+
+        """
+        if self._index is None:
+            raise errors.IndexerError("IndexerConnection has been closed")
+        if self._config_modified:
+            self._store_config()
+        self._index.flush()
+        self._mem_buffered = 0
+
+    def close(self):
+        """Close the connection to the database.
+
+        It is important to call this method before allowing the class to be
+        garbage collected, because it will ensure that any un-flushed changes
+        will be flushed.  It also ensures that the connection is cleaned up
+        promptly.
+
+        No other methods may be called on the connection after this has been
+        called.  (It is permissible to call close() multiple times, but
+        only the first call will have any effect.)
+
+        If an exception occurs, the database will be closed, but changes since
+        the last call to flush may be lost.
+
+        """
+        if self._index is None:
+            return
+        try:
+            self.flush()
+        finally:
+            # There is currently no "close()" method for xapian databases, so
+            # we have to rely on the garbage collector.  Since we never copy
+            # the _index property out of this class, there should be no cycles,
+            # so the standard python implementation should garbage collect
+            # _index straight away.  A close() method is planned to be added to
+            # xapian at some point - when it is, we should call it here to make
+            # the code more robust.
+            self._index = None
+            self._indexpath = None
+            self._field_actions = None
+            self._config_modified = False
+
+    def get_doccount(self):
+        """Count the number of documents in the database.
+
+        This count will include documents which have been added or removed but
+        not yet flushed().
+
+        """
+        if self._index is None:
+            raise errors.IndexerError("IndexerConnection has been closed")
+        return self._index.get_doccount()
+
+    def iterids(self):
+        """Get an iterator which returns all the ids in the database.
+
+        The unqiue_ids are currently returned in binary lexicographical sort
+        order, but this should not be relied on.
+
+        """
+        if self._index is None:
+            raise errors.IndexerError("IndexerConnection has been closed")
+        return PrefixedTermIter('Q', self._index.allterms())
+
+    def get_document(self, id):
+        """Get the document with the specified unique ID.
+
+        Raises a KeyError if there is no such document.  Otherwise, it returns
+        a ProcessedDocument.
+
+        """
+        if self._index is None:
+            raise errors.IndexerError("IndexerConnection has been closed")
+        postlist = self._index.postlist('Q' + id)
+        try:
+            plitem = postlist.next()
+        except StopIteration:
+            # Unique ID not found
+            raise KeyError('Unique ID %r not found' % id)
+        try:
+            postlist.next()
+            raise errors.IndexerError("Multiple documents " #pragma: no cover
+                                       "found with same unique ID")
+        except StopIteration:
+            # Only one instance of the unique ID found, as it should be.
+            pass
+
+        result = ProcessedDocument(self._field_mappings)
+        result.id = id
+        result._doc = self._index.get_document(plitem.docid)
+        return result
+
+    def iter_synonyms(self, prefix=""):
+        """Get an iterator over the synonyms.
+
+         - `prefix`: if specified, only synonym keys with this prefix will be
+           returned.
+
+        The iterator returns 2-tuples, in which the first item is the key (ie,
+        a 2-tuple holding the term or terms which will be synonym expanded,
+        followed by the fieldname specified (or None if no fieldname)), and the
+        second item is a tuple of strings holding the synonyms for the first
+        item.
+
+        These return values are suitable for the dict() builtin, so you can
+        write things like:
+
+         >>> conn = IndexerConnection('foo')
+         >>> conn.add_synonym('foo', 'bar')
+         >>> conn.add_synonym('foo bar', 'baz')
+         >>> conn.add_synonym('foo bar', 'foo baz')
+         >>> dict(conn.iter_synonyms())
+         {('foo', None): ('bar',), ('foo bar', None): ('baz', 'foo baz')}
+
+        """
+        if self._index is None:
+            raise errors.IndexerError("IndexerConnection has been closed")
+        return SynonymIter(self._index, self._field_mappings, prefix)
+
+    def iter_subfacets(self):
+        """Get an iterator over the facet hierarchy.
+
+        The iterator returns 2-tuples, in which the first item is the
+        subfacet and the second item is its parent facet.
+
+        The return values are suitable for the dict() builtin, for example:
+
+         >>> conn = IndexerConnection('db')
+         >>> conn.add_field_action('foo', FieldActions.FACET)
+         >>> conn.add_field_action('bar', FieldActions.FACET)
+         >>> conn.add_field_action('baz', FieldActions.FACET)
+         >>> conn.add_subfacet('foo', 'bar')
+         >>> conn.add_subfacet('baz', 'bar')
+         >>> dict(conn.iter_subfacets())
+         {'foo': 'bar', 'baz': 'bar'}
+
+        """
+        if self._index is None:
+            raise errors.IndexerError("IndexerConnection has been closed")
+        if 'facets' in _checkxapian.missing_features:
+            raise errors.IndexerError("Facets unsupported with this release of xapian")
+        return self._facet_hierarchy.iteritems()
+
+    def iter_facet_query_types(self, association):
+        """Get an iterator over query types and their associated facets.
+
+        Only facets associated with the query types in the specified manner
+        are returned; `association` must be one of IndexerConnection.FacetQueryType_Preferred
+        or IndexerConnection.FacetQueryType_Never.
+
+        The iterator returns 2-tuples, in which the first item is the query
+        type and the second item is the associated set of facets.
+
+        The return values are suitable for the dict() builtin, for example:
+
+         >>> conn = IndexerConnection('db')
+         >>> conn.add_field_action('foo', FieldActions.FACET)
+         >>> conn.add_field_action('bar', FieldActions.FACET)
+         >>> conn.add_field_action('baz', FieldActions.FACET)
+         >>> conn.set_facet_for_query_type('type1', 'foo', conn.FacetQueryType_Preferred)
+         >>> conn.set_facet_for_query_type('type1', 'bar', conn.FacetQueryType_Never)
+         >>> conn.set_facet_for_query_type('type1', 'baz', conn.FacetQueryType_Never)
+         >>> conn.set_facet_for_query_type('type2', 'bar', conn.FacetQueryType_Preferred)
+         >>> dict(conn.iter_facet_query_types(conn.FacetQueryType_Preferred))
+         {'type1': set(['foo']), 'type2': set(['bar'])}
+         >>> dict(conn.iter_facet_query_types(conn.FacetQueryType_Never))
+         {'type1': set(['bar', 'baz'])}
+
+        """
+        if self._index is None:
+            raise errors.IndexerError("IndexerConnection has been closed")
+        if 'facets' in _checkxapian.missing_features:
+            raise errors.IndexerError("Facets unsupported with this release of xapian")
+        return FacetQueryTypeIter(self._facet_query_table, association)
+
+class PrefixedTermIter(object):
+    """Iterate through all the terms with a given prefix.
+
+    """
+    def __init__(self, prefix, termiter):
+        """Initialise the prefixed term iterator.
+
+        - `prefix` is the prefix to return terms for.
+        - `termiter` is a xapian TermIterator, which should be at its start.
+
+        """
+
+        # The algorithm used in next() currently only works for single
+        # character prefixes, so assert that the prefix is single character.
+        # To deal with multicharacter prefixes, we need to check for terms
+        # which have a starting prefix equal to that given, but then have a
+        # following uppercase alphabetic character, indicating that the actual
+        # prefix is longer than the target prefix.  We then need to skip over
+        # these.  Not too hard to implement, but we don't need it yet.
+        assert(len(prefix) == 1)
+
+        self._started = False
+        self._prefix = prefix
+        self._prefixlen = len(prefix)
+        self._termiter = termiter
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        """Get the next term with the specified prefix.
+
+        """
+        if not self._started:
+            term = self._termiter.skip_to(self._prefix).term
+            self._started = True
+        else:
+            term = self._termiter.next().term
+        if len(term) < self._prefixlen or term[:self._prefixlen] != self._prefix:
+            raise StopIteration
+        return term[self._prefixlen:]
+
+
+class SynonymIter(object):
+    """Iterate through a list of synonyms.
+
+    """
+    def __init__(self, index, field_mappings, prefix):
+        """Initialise the synonym iterator.
+
+         - `index` is the index to get the synonyms from.
+         - `field_mappings` is the FieldMappings object for the iterator.
+         - `prefix` is the prefix to restrict the returned synonyms to.
+
+        """
+        self._index = index
+        self._field_mappings = field_mappings
+        self._syniter = self._index.synonym_keys(prefix)
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        """Get the next synonym.
+
+        """
+        synkey = self._syniter.next()
+        pos = 0
+        for char in synkey:
+            if char.isupper(): pos += 1
+            else: break
+        if pos == 0:
+            fieldname = None
+            terms = synkey
+        else:
+            prefix = synkey[:pos]
+            fieldname = self._field_mappings.get_fieldname_from_prefix(prefix)
+            terms = ' '.join((term[pos:] for term in synkey.split(' ')))
+        synval = tuple(self._index.synonyms(synkey))
+        return ((terms, fieldname), synval)
+
+class FacetQueryTypeIter(object):
+    """Iterate through all the query types and their associated facets.
+
+    """
+    def __init__(self, facet_query_table, association):
+        """Initialise the query type facet iterator.
+
+        Only facets associated with each query type in the specified
+        manner are returned (`association` must be one of
+        IndexerConnection.FacetQueryType_Preferred or
+        IndexerConnection.FacetQueryType_Never).
+
+        """
+        self._table_iter = facet_query_table.iteritems()
+        self._association = association
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        """Get the next (query type, facet set) 2-tuple.
+
+        """
+        query_type, facet_dict = self._table_iter.next()
+        facet_list = [facet for facet, association in facet_dict.iteritems() if association == self._association]
+        if len(facet_list) == 0:
+            return self.next()
+        return (query_type, set(facet_list))
+
+if __name__ == '__main__':
+    import doctest, sys
+    doctest.testmod (sys.modules[__name__])
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/indexerconnection_doctest1.txt	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,259 @@
+
+>>> from datastructures import *
+>>> from fieldactions import *
+
+
+
+Open a connection for indexing:
+>>> conn = IndexerConnection('foo')
+
+There can only be one IndexerConnection in existence for a given path at a
+time:
+>>> conn = IndexerConnection('foo') #doctest:+ELLIPSIS
+Traceback (most recent call last):
+...
+DatabaseLockError: Unable to acquire database write lock on foo...
+
+
+We should have no documents in the database yet:
+>>> conn.get_doccount()
+0
+
+
+
+Add some field actions to the database:
+>>> conn.add_field_action('author', FieldActions.STORE_CONTENT)
+>>> conn.add_field_action('author', FieldActions.INDEX_EXACT)
+>>> conn.add_field_action('title', FieldActions.INDEX_FREETEXT)
+
+
+We can't index as both EXACT and FREETEXT:
+>>> conn.add_field_action('author', FieldActions.INDEX_FREETEXT, weight=5, language='en')
+Traceback (most recent call last):
+...
+IndexerError: Field 'author' is already marked for indexing as exact text: cannot mark for indexing as free text as well
+>>> conn.add_field_action('title', FieldActions.INDEX_EXACT)
+Traceback (most recent call last):
+...
+IndexerError: Field 'title' is already marked for indexing as free text: cannot mark for indexing as exact text as well
+
+We can add multiple STORE_CONTENT actions though (subsequent ones have no
+further effect).
+>>> conn.add_field_action('author', FieldActions.STORE_CONTENT)
+
+
+Field actions are checked for basic validity:
+>>> conn.add_field_action('author', None)
+Traceback (most recent call last):
+...
+IndexerError: Unknown field action: None
+>>> conn.add_field_action('author', FieldActions.STORE_CONTENT, foo=1)
+Traceback (most recent call last):
+...
+IndexerError: Unknown parameter name for action 'STORE_CONTENT': 'foo'
+
+
+We can ensure there are no actions for a given field by using
+clear_field_actions():
+>>> conn.clear_field_actions('title')
+
+This doesn't complain even if we've never mentioned the field before:
+>>> conn.clear_field_actions('foo')
+
+Then we can add a field action back again:
+>>> conn.add_field_action('title', FieldActions.INDEX_FREETEXT, weight=10, language='en')
+
+
+We have to wipe out any old actions on the field to change the actions:
+>>> conn.clear_field_actions('author')
+>>> conn.add_field_action('author', FieldActions.STORE_CONTENT)
+>>> conn.add_field_action('author', FieldActions.INDEX_FREETEXT, weight=5, language='en')
+>>> conn.clear_field_actions('title')
+>>> conn.add_field_action('title', FieldActions.INDEX_EXACT)
+
+
+
+
+We should have no documents in the database yet:
+>>> conn.get_doccount()
+0
+
+
+Build up a document:
+>>> doc = UnprocessedDocument()
+
+We can add field instances.  Multiple instances of a field are valid.
+>>> doc.fields.append(Field('author', 'Richard Boulton'))
+>>> doc.fields.append(Field('author', 'Charlie Hull'))
+>>> doc.fields.append(Field('title', 'Test document'))
+
+We can get a vaguely pretty display of the contents of an
+UnprocessedDocument():
+>>> print doc
+UnprocessedDocument(None, [Field('author', 'Richard Boulton'), Field('author', 'Charlie Hull'), Field('title', 'Test document')])
+
+
+We can process a document explicitly, if we want to.
+>>> pdoc = conn.process(doc)
+
+Only the "author" field appears in the output, because only it was given the
+action STORE_CONTENT.
+>>> pdoc.data
+{'author': ['Richard Boulton', 'Charlie Hull']}
+
+We can access the xapian document representation of the processed document:
+>>> xdoc = pdoc.prepare()
+>>> import cPickle
+>>> cPickle.loads(xdoc.get_data())
+{'author': ['Richard Boulton', 'Charlie Hull']}
+
+>>> [(term.term, term.wdf, [pos for pos in term.positer]) for term in xdoc.termlist()]
+[('XAboulton', 5, [2]), ('XAcharlie', 5, [13]), ('XAhull', 5, [14]), ('XArichard', 5, [1]), ('XB:Test document', 0, []), ('ZXAboulton', 5, []), ('ZXAcharli', 5, []), ('ZXAhull', 5, []), ('ZXArichard', 5, []), ('Zboulton', 5, []), ('Zcharli', 5, []), ('Zhull', 5, []), ('Zrichard', 5, []), ('boulton', 5, [2]), ('charlie', 5, [13]), ('hull', 5, [14]), ('richard', 5, [1])]
+
+
+Adding the same document multiple times is fine if it doesn't have an id
+assigned to it: a new ID will be allocated for each addition:
+>>> conn.add(doc)
+'0'
+>>> conn.add(doc)
+'1'
+>>> conn.add(doc)
+'2'
+>>> conn.get_doccount()
+3
+
+We can set the unique ID ourselves, if we want:
+>>> print repr(doc.id)
+None
+>>> doc.id = '4'
+>>> print repr(doc.id)
+'4'
+>>> conn.add(doc)
+'4'
+>>> conn.get_doccount()
+4
+
+
+If we try adding a document with a unique ID which already exists we get an
+error:
+>>> doc.id = '1'
+>>> print repr(doc.id)
+'1'
+>>> conn.add(doc)
+Traceback (most recent call last):
+...
+IndexerError: Document ID of document supplied to add() is not unique.
+>>> conn.get_doccount()
+4
+
+
+If we remove the id, it works again:
+>>> doc.id = None
+>>> print repr(doc.id)
+None
+>>> conn.add(doc)
+'3'
+>>> conn.get_doccount()
+5
+
+But it skips ID 4 because we manually added a document with that ID.
+>>> conn.add(doc)
+'5'
+>>> conn.get_doccount()
+6
+
+Unique IDs don't have to be numbers: we can set them to anything we like.
+>>> doc.id = 'SuperFoo'
+>>> print repr(doc.id)
+'SuperFoo'
+>>> conn.add(doc)
+'SuperFoo'
+>>> conn.get_doccount()
+7
+
+We can delete documents by specifying the unique ID.
+>>> conn.delete('5')
+>>> conn.get_doccount()
+6
+
+
+Finally, we have to flush to apply the changes:
+>>> conn.flush()
+
+We can add more documents after the flush:
+>>> doc.id = None
+>>> conn.add(doc)
+'6'
+>>> conn.get_doccount()
+7
+
+We can add fields which don't have any configuration.  These will be ignored.
+>>> doc.fields.append(Field('text', 'Some boring text'))
+>>> conn.add(doc)
+'7'
+>>> conn.get_doccount()
+8
+
+We can also supply fields as an iterator instead of a list:
+>>> fieldlist = [Field('author', 'Richard Boulton')]
+>>> doc.fields = iter(fieldlist)
+>>> conn.add(doc)
+'8'
+>>> conn.get_doccount()
+9
+
+Calling close() will automatically call flush(), too:
+>>> conn.close()
+
+After calling close(), no other methods are valid:
+
+>>> conn.add_field_action('title', FieldActions.INDEX_FREETEXT, weight=10, language='en')
+Traceback (most recent call last):
+...
+IndexerError: IndexerConnection has been closed
+>>> conn.clear_field_actions('author')
+Traceback (most recent call last):
+...
+IndexerError: IndexerConnection has been closed
+>>> conn.process(doc)
+Traceback (most recent call last):
+...
+IndexerError: IndexerConnection has been closed
+>>> conn.add(doc)
+Traceback (most recent call last):
+...
+IndexerError: IndexerConnection has been closed
+>>> conn.replace(doc)
+Traceback (most recent call last):
+...
+IndexerError: IndexerConnection has been closed
+>>> conn.delete('1')
+Traceback (most recent call last):
+...
+IndexerError: IndexerConnection has been closed
+>>> conn.flush()
+Traceback (most recent call last):
+...
+IndexerError: IndexerConnection has been closed
+>>> conn.get_doccount()
+Traceback (most recent call last):
+...
+IndexerError: IndexerConnection has been closed
+>>> conn.iterids()
+Traceback (most recent call last):
+...
+IndexerError: IndexerConnection has been closed
+>>> conn.get_document('1')
+Traceback (most recent call last):
+...
+IndexerError: IndexerConnection has been closed
+
+
+But calling close() multiple times is okay:
+>>> conn.close()
+
+
+Now that we've closed the connection, we can open a new one:
+>>> conn = IndexerConnection('foo')
+>>> conn.get_doccount()
+9
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/indexerconnection_doctest2.txt	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,435 @@
+
+>>> from datastructures import *
+>>> from fieldactions import *
+>>> from searchconnection import *
+
+
+Open a connection for indexing:
+>>> iconn = IndexerConnection('foo')
+
+We should have no documents in the database yet:
+>>> iconn.get_doccount()
+0
+
+We have to wipe out any old actions on the field to change the actions:
+>>> iconn.add_field_action('author', FieldActions.STORE_CONTENT)
+>>> iconn.add_field_action('title', FieldActions.STORE_CONTENT)
+>>> iconn.add_field_action('title', FieldActions.INDEX_FREETEXT, weight=5, language='en')
+>>> iconn.add_field_action('category', FieldActions.INDEX_EXACT)
+>>> iconn.add_field_action('category', FieldActions.SORTABLE)
+>>> iconn.add_field_action('category', FieldActions.COLLAPSE)
+>>> iconn.add_field_action('text', FieldActions.INDEX_FREETEXT, language='en')
+>>> iconn.add_field_action('other', FieldActions.INDEX_FREETEXT)
+
+Build up a document:
+>>> doc = UnprocessedDocument()
+
+We can add field instances.  Multiple instances of a field are valid.
+>>> doc.fields.append(Field('author', 'Richard Boulton'))
+>>> doc.fields.append(Field('category', 'Test document'))
+>>> doc.fields.append(Field('title', 'Test document 1'))
+>>> doc.fields.append(Field('text', 'This document is a basic test document.'))
+
+We can process a document explicitly, if we want to.
+>>> pdoc = iconn.process(doc)
+
+>>> pdoc.data
+{'title': ['Test document 1'], 'author': ['Richard Boulton']}
+
+We can access the Xapian document representation of the processed document to
+double check that this document has been indexed as we wanted:
+>>> xdoc = pdoc.prepare()
+>>> import cPickle
+>>> cPickle.loads(xdoc.get_data()) == pdoc.data
+True
+>>> [(term.term, term.wdf, [pos for pos in term.positer]) for term in xdoc.termlist()]
+[('1', 5, [3]), ('XA1', 5, [3]), ('XAdocument', 5, [2]), ('XAtest', 5, [1]), ('XB:Test document', 0, []), ('XCa', 1, [17]), ('XCbasic', 1, [18]), ('XCdocument', 2, [15, 20]), ('XCis', 1, [16]), ('XCtest', 1, [19]), ('XCthis', 1, [14]), ('ZXAdocument', 5, []), ('ZXAtest', 5, []), ('ZXCa', 1, []), ('ZXCbasic', 1, []), ('ZXCdocument', 2, []), ('ZXCis', 1, []), ('ZXCtest', 1, []), ('ZXCthis', 1, []), ('Za', 1, []), ('Zbasic', 1, []), ('Zdocument', 7, []), ('Zis', 1, []), ('Ztest', 6, []), ('Zthis', 1, []), ('a', 1, [17]), ('basic', 1, [18]), ('document', 7, [2, 15, 20]), ('is', 1, [16]), ('test', 6, [1, 19]), ('this', 1, [14])]
+
+>>> [(value.num, value.value) for value in xdoc.values()]
+[(0, 'Test document')]
+
+>>> ','.join(iconn.iterids())
+''
+>>> iconn.add(pdoc)
+'0'
+>>> sconn1 = SearchConnection('foo')
+>>> ','.join(iconn.iterids())
+'0'
+
+Regression test: if we called add with a ProcessedDocument which didn't have a
+unique ID, the generated ID used to get assigned to the ProcessedDocument.
+This shouldn't happen.
+>>> print pdoc.id
+None
+>>> iconn.add(pdoc)
+'1'
+>>> pdoc.id = 'B'
+>>> iconn.add(pdoc)
+'B'
+>>> iconn.get_doccount()
+3
+
+
+Add some more documents:
+
+>>> doc = UnprocessedDocument(fields=(Field('author', 'Charlie Hull'),
+...                                   Field('category', 'Silly Document'),
+...                                   Field('text', 'Charlie is a juggler'),
+...                                   Field('other', 'Some other content.'),
+...                                   ))
+>>> iconn.add(doc)
+'2'
+
+>>> doc = UnprocessedDocument(fields=(Field('author', 'Charlie Hull'),
+...                                   Field('category', 'Juggling'),
+...                                   Field('text', '5 clubs is quite hard.'),
+...                                   ))
+>>> iconn.add(doc)
+'3'
+
+>>> doc = UnprocessedDocument(fields=(Field('author', 'Charlie Hull'),
+...                                   Field('category', 'Juggling'),
+...                                   Field('text', 'Good toilets are important at juggling festivals'),
+...                                   ))
+>>> iconn.add(doc)
+'4'
+>>> iconn.get_doccount()
+6
+
+
+Now, try searching it:
+
+There's nothing in the database, because the changes haven't been flushed.
+>>> sconn1.get_doccount()
+0
+
+The iconn can access the same documents before and after a flush:
+>>> ','.join(iconn.iterids())
+'0,1,2,3,4,B'
+>>> iconn.flush()
+>>> ','.join(iconn.iterids())
+'0,1,2,3,4,B'
+
+
+The open connection still accesses the same revision, so there are still no
+documents visible:
+>>> sconn1.get_doccount()
+0
+
+A new connection can see the documents, though:
+>>> sconn2 = SearchConnection('foo')
+>>> sconn2.get_doccount()
+6
+
+
+>>> doc = UnprocessedDocument(fields=(Field('author', 'Richard Boulton'),
+...                                   Field('category', 'Gardening'),
+...                                   Field('text', 'Clematis grows very fast, and may smother other plants'),
+...                                   ))
+>>> iconn.add(doc)
+'5'
+>>> iconn.get_doccount()
+7
+
+The current search connection can't see the new document:
+>>> sconn2.get_doccount()
+6
+
+After a flush, the old connections still can't see the new document:
+>>> iconn.flush()
+>>> sconn1.get_doccount()
+0
+>>> sconn2.get_doccount()
+6
+
+A new connection can see the new document:
+>>> sconn3 = SearchConnection('foo')
+>>> sconn3.get_doccount()
+7
+
+Let's try deleting a document:
+>>> iconn.delete('5')
+>>> iconn.get_doccount()
+6
+
+After a flush, a new connection can see the change:
+>>> iconn.flush()
+>>> sconn4 = SearchConnection('foo')
+>>> sconn1.get_doccount()
+0
+>>> sconn2.get_doccount()
+6
+>>> sconn3.get_doccount()
+7
+>>> sconn4.get_doccount()
+6
+
+If we reopen the connection, we can see the latest changes:
+>>> sconn1.reopen()
+>>> sconn1.get_doccount()
+6
+
+
+We can parse some
+>>> str(sconn4.query_parse('test'))
+'Xapian::Query((Ztest:(pos=1) AND_MAYBE test:(pos=1)))'
+>>> str(sconn4.query_parse('title:test'))
+'Xapian::Query((ZXAtest:(pos=1) AND_MAYBE XAtest:(pos=1)))'
+>>> str(sconn4.query_parse('title:Test'))
+'Xapian::Query((XAtest:(pos=1) AND_MAYBE XAtest:(pos=1)))'
+
+Xapian needs a patch to support exact prefixes.  When this is applied, the
+following test will pass.
+>> str(sconn4.query_parse('title:Test category:Te/st'))
+'Xapian::Query((XAtest:(pos=1) AND XB:Te/st:(pos=2)))'
+
+For now, the output is approximately right, and good enough to be going on
+with:
+>>> str(sconn4.query_parse('title:Test category:Te/st'))
+'Xapian::Query(((XAtest:(pos=1) AND (XBte:(pos=2) PHRASE 2 XBst:(pos=3))) AND_MAYBE (XAtest:(pos=1) AND (XBte:(pos=2) PHRASE 2 XBst:(pos=3)))))'
+
+>>> q1 = sconn4.query_parse('text:(clematis)')
+>>> q2 = sconn4.query_parse('title:Test')
+>>> str(sconn4.query_filter(q1, q2))
+'Xapian::Query(((ZXCclemati:(pos=1) AND_MAYBE XCclematis:(pos=1)) FILTER (XAtest:(pos=1) AND_MAYBE XAtest:(pos=1))))'
+
+>>> str(sconn4.query_filter(q1, "filter"))
+Traceback (most recent call last):
+...
+SearchError: Filter must be a Xapian Query object
+
+If we only allow a limited set of fields, other field specifications will be
+considered as plain text:
+>>> str(sconn4.query_parse("text:clematis title:Test"))
+'Xapian::Query(((ZXCclemati:(pos=1) AND XAtest:(pos=2)) AND_MAYBE (XCclematis:(pos=1) AND XAtest:(pos=2))))'
+>>> str(sconn4.query_parse("text:clematis title:Test", allow=("text",)))
+'Xapian::Query(((ZXCclemati:(pos=1) AND (title:(pos=2) PHRASE 2 test:(pos=3))) AND_MAYBE (XCclematis:(pos=1) AND (title:(pos=2) PHRASE 2 test:(pos=3)))))'
+>>> str(sconn4.query_parse("text:clematis title:Test", deny=("title",)))
+'Xapian::Query(((ZXCclemati:(pos=1) AND (title:(pos=2) PHRASE 2 test:(pos=3))) AND_MAYBE (XCclematis:(pos=1) AND (title:(pos=2) PHRASE 2 test:(pos=3)))))'
+>>> str(sconn4.query_parse("text:clematis title:Test", allow=("text",), deny=("title",)))
+Traceback (most recent call last):
+...
+SearchError: Cannot specify both `allow` and `deny` (got ('text',) and ('title',))
+
+
+We can parse queries which don't specify a field explicitly, too:
+>>> str(sconn4.query_parse("clematis Test"))
+'Xapian::Query(((Zclemati:(pos=1) AND test:(pos=2)) AND_MAYBE (clematis:(pos=1) AND test:(pos=2))))'
+
+We can generate a query for an individual field:
+>>> str(sconn4.query_field('text', "clematis Test"))
+'Xapian::Query(((ZXCclemati:(pos=1) AND XCtest:(pos=2)) AND_MAYBE (XCclematis:(pos=1) AND XCtest:(pos=2))))'
+
+If we generate a query for a field with no language set, it won't be stemmed:
+>>> str(sconn4.query_field('other', "clematis Test"))
+'Xapian::Query(((XDclematis:(pos=1) AND XDtest:(pos=2)) AND_MAYBE (XDclematis:(pos=1) AND XDtest:(pos=2))))'
+
+If the field is an exact text field, the query will contain a single term:
+>>> str(sconn4.query_field('category', "Clematis Test"))
+'Xapian::Query(XB:Clematis Test)'
+
+If the field isn't known, we get an empty query:
+>>> q2 = sconn4.query_field('unknown', "clematis Test")
+>>> str(q2)
+'Xapian::Query()'
+
+If we filter a query with an empty query, we get another empty query:
+>>> str(sconn4.query_filter(q1, q2))
+'Xapian::Query()'
+
+
+>>> q = sconn4.query_parse('title:Test')
+>>> str(q)
+'Xapian::Query((XAtest:(pos=1) AND_MAYBE XAtest:(pos=1)))'
+>>> res = sconn4.search(q, 0, 10)
+>>> res.matches_lower_bound
+3
+>>> res.matches_upper_bound
+3
+>>> res.matches_estimated
+3
+>>> res.estimate_is_exact
+True
+>>> res.more_matches
+False
+>>> str(res)
+'<SearchResults(startrank=0, endrank=3, more_matches=False, matches_lower_bound=3, matches_upper_bound=3, matches_estimated=3, estimate_is_exact=True)>'
+
+If we ask for fewer results, we get them:
+>>> res = sconn4.search(q, 0, 2)
+>>> str(res)
+'<SearchResults(startrank=0, endrank=2, more_matches=True, matches_lower_bound=3, matches_upper_bound=3, matches_estimated=3, estimate_is_exact=True)>'
+>>> res = sconn4.search(q, 0, 3)
+>>> str(res)
+'<SearchResults(startrank=0, endrank=3, more_matches=False, matches_lower_bound=3, matches_upper_bound=3, matches_estimated=3, estimate_is_exact=True)>'
+
+Multiword queries use AND to combine terms, by default:
+>>> q1 = sconn4.query_parse('text:(important plants)')
+>>> str(q1)
+'Xapian::Query(((ZXCimport:(pos=1) AND ZXCplant:(pos=2)) AND_MAYBE (XCimportant:(pos=1) AND XCplants:(pos=2))))'
+
+But we can set the default operator to OR if we want:
+>>> q1 = sconn4.query_parse('text:(important plants)', default_op=sconn4.OP_OR)
+>>> str(q1)
+'Xapian::Query(((ZXCimport:(pos=1) OR ZXCplant:(pos=2)) AND_MAYBE (XCimportant:(pos=1) OR XCplants:(pos=2))))'
+
+We can combine queries:
+>>> q2 = sconn4.query_parse('title:test')
+>>> q = sconn4.query_composite(sconn4.OP_OR, (q1, q2))
+>>> str(q)
+'Xapian::Query((((ZXCimport:(pos=1) OR ZXCplant:(pos=2)) AND_MAYBE (XCimportant:(pos=1) OR XCplants:(pos=2))) OR (ZXAtest:(pos=1) AND_MAYBE XAtest:(pos=1))))'
+
+
+>>> doc = UnprocessedDocument(fields=(Field('author', 'Richard Boulton'),
+...                                   Field('category', 'Gardening'),
+...                                   Field('text', 'Clematis grows very fast, and may smother other plants'),
+...                                   ))
+>>> for i in xrange(100):
+...     id = iconn.add(doc)
+>>> iconn.flush()
+>>> sconn1.reopen()
+>>> sconn2.reopen()
+>>> sconn1.search(q, 0, 3)
+<SearchResults(startrank=0, endrank=3, more_matches=True, matches_lower_bound=100, matches_upper_bound=104, matches_estimated=100, estimate_is_exact=False)>
+
+We can perform the same search again after more modifications have been made,
+and we get the same result:
+>>> for i in xrange(100):
+...     id = iconn.add(doc)
+>>> iconn.flush()
+>>> results1 = sconn1.search(q, 0, 3)
+>>> results1
+<SearchResults(startrank=0, endrank=3, more_matches=True, matches_lower_bound=100, matches_upper_bound=104, matches_estimated=100, estimate_is_exact=False)>
+
+But if further modifications have been made, the searcher has to be reopened,
+so a different result set is returned.
+>>> for i in xrange(100):
+...     id = iconn.add(doc)
+>>> iconn.flush()
+>>> results2 = sconn1.search(q, 0, 50)
+>>> results2
+<SearchResults(startrank=0, endrank=50, more_matches=True, matches_lower_bound=304, matches_upper_bound=304, matches_estimated=304, estimate_is_exact=True)>
+
+We can get the details of the hit at a given rank:
+>>> hit = results1.get_hit(2)
+>>> hit.rank
+2
+>>> hit.id
+'B'
+>>> hit.data
+{'title': ['Test document 1'], 'author': ['Richard Boulton']}
+>>> str(hit)
+"<SearchResult(rank=2, id='B', data={'title': ['Test document 1'], 'author': ['Richard Boulton']})>"
+>>> str(results2.get_hit(2))
+"<SearchResult(rank=2, id='B', data={'title': ['Test document 1'], 'author': ['Richard Boulton']})>"
+>>> str(results2.get_hit(49))
+"<SearchResult(rank=49, id='33', data={'author': ['Richard Boulton']})>"
+
+We can change a document in the index, and the old result is still available:
+>>> newdoc = UnprocessedDocument(fields=(Field('author', 'Fred Bloggs'),
+...                                      Field('category', 'Sleeping'),
+...                                      Field('text', 'This is different text to before'),),
+...                              id=results2.get_hit(49).id)
+>>> iconn.replace(newdoc)
+
+(If we don't set an ID, we get an error.
+>>> newdoc = UnprocessedDocument(fields=(Field('author', 'Freda Bloggs'),
+...                                      Field('category', 'Sleeping'),
+...                                      Field('text', 'This is different text to before'),))
+>>> iconn.replace(newdoc)
+Traceback (most recent call last):
+...
+IndexerError: No document ID set for document supplied to replace().
+
+>>> iconn.flush()
+>>> str(results2.get_hit(49))
+"<SearchResult(rank=49, id='33', data={'author': ['Richard Boulton']})>"
+
+But on a newly reopened connection, the result is gone (note the different id):
+>>> sconn2.reopen()
+>>> results3 = sconn2.search(q, 0, 50)
+>>> str(results3.get_hit(49))
+"<SearchResult(rank=49, id='34', data={'author': ['Richard Boulton']})>"
+
+We can get a list of the current document IDs
+>>> print [id for id in iconn.iterids()][:10]
+['0', '1', '10', '100', '101', '102', '103', '104', '105', '106']
+>>> pdoc = iconn.get_document('0')
+>>> print pdoc.data
+{'title': ['Test document 1'], 'author': ['Richard Boulton']}
+
+If we perform major changes on the database, the results of a search might
+become unavailable:
+>>> sconn1.reopen()
+>>> results4 = sconn1.search(q, 0, 100)
+>>> for id in iconn.iterids():
+...     iconn.delete(id)
+>>> iconn.get_doccount()
+0
+>>> iconn.flush()
+>>> iconn.get_doccount()
+0
+>>> for i in xrange(100):
+...     id = iconn.add(doc)
+>>> iconn.flush()
+>>> for i in xrange(100):
+...     id = iconn.add(doc)
+>>> iconn.flush()
+>>> for hit in results4: pass
+Traceback (most recent call last):
+...
+DatabaseModifiedError: The revision being read has been discarded - you should call Xapian::Database::reopen() and retry the operation
+
+
+When we're finished with the connection, we can close it to release the
+resources:
+>>> sconn1.close()
+
+Repeated closing is okay:
+>>> sconn1.close()
+
+After closing, no other methods should be called:
+>>> sconn1.reopen()
+Traceback (most recent call last):
+...
+SearchError: SearchConnection has been closed
+>>> sconn1.get_doccount()
+Traceback (most recent call last):
+...
+SearchError: SearchConnection has been closed
+>>> sconn1.query_composite(sconn1.OP_AND, 'foo')
+Traceback (most recent call last):
+...
+SearchError: SearchConnection has been closed
+>>> sconn1.query_filter(q, q)
+Traceback (most recent call last):
+...
+SearchError: SearchConnection has been closed
+>>> sconn1.query_range('date', '19991212', '20000101')
+Traceback (most recent call last):
+...
+SearchError: SearchConnection has been closed
+>>> sconn1.query_parse('hello')
+Traceback (most recent call last):
+...
+SearchError: SearchConnection has been closed
+>>> sconn1.query_field('author', 'richard')
+Traceback (most recent call last):
+...
+SearchError: SearchConnection has been closed
+>>> sconn1.query_facet('author', 'richard')
+Traceback (most recent call last):
+...
+SearchError: SearchConnection has been closed
+>>> sconn1.search(q, 0, 10)
+Traceback (most recent call last):
+...
+SearchError: SearchConnection has been closed
+>>> sconn1.get_document('1')
+Traceback (most recent call last):
+...
+SearchError: SearchConnection has been closed
+
+
+But calling close() multiple times is okay:
+>>> sconn1.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/indexerconnection_doctest3.txt	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,86 @@
+
+>>> from datastructures import *
+>>> from fieldactions import *
+>>> from searchconnection import *
+
+
+Open a connection for indexing:
+>>> iconn = IndexerConnection('foo')
+
+We have to wipe out any old actions on the field to change the actions:
+>>> iconn.add_field_action('author', FieldActions.STORE_CONTENT)
+>>> iconn.add_field_action('title', FieldActions.STORE_CONTENT)
+>>> iconn.add_field_action('title', FieldActions.INDEX_FREETEXT, weight=5, language='en', nopos=True)
+>>> iconn.add_field_action('category', FieldActions.INDEX_EXACT)
+>>> iconn.add_field_action('category', FieldActions.SORTABLE)
+>>> iconn.add_field_action('category', FieldActions.COLLAPSE)
+>>> iconn.add_field_action('text', FieldActions.INDEX_FREETEXT, language='en')
+>>> iconn.add_field_action('other', FieldActions.INDEX_FREETEXT)
+>>> iconn.add_field_action('date', FieldActions.STORE_CONTENT)
+>>> iconn.add_field_action('date', FieldActions.SORTABLE, type='date')
+>>> iconn.add_field_action('date', FieldActions.COLLAPSE)
+>>> iconn.add_field_action('price', FieldActions.STORE_CONTENT)
+>>> iconn.add_field_action('price', FieldActions.SORTABLE, type='float')
+>>> iconn.add_field_action('price', FieldActions.COLLAPSE)
+
+Build up a document:
+>>> doc = UnprocessedDocument()
+>>> doc.fields.append(Field('author', 'Richard Boulton'))
+>>> doc.fields.append(Field('category', 'Test document'))
+>>> doc.fields.append(Field('title', 'Test document 1'))
+>>> doc.fields.append(Field('text', 'This document is a basic test document.'))
+
+Process it:
+>>> pdoc = iconn.process(doc)
+>>> pdoc.data
+{'title': ['Test document 1'], 'author': ['Richard Boulton']}
+
+
+If we add a field which is specified as a SORTABLE date which doesn't contain
+a valid date, an error will be raised when we try to process the date:
+>>> doc.fields.append(Field('date', 'An invalid date - this will generate an error when processed.'))
+>>> iconn.process(doc)
+Traceback (most recent call last):
+...
+IndexerError: Value supplied to field 'date' must be a valid date: was 'An invalid date - this will generate an error when processed.': error is 'Unrecognised date format'
+
+
+If we add a field which is specified as a SORTABLE float which doesn't contain
+a valid floating point number, an error will be raised when we try to process
+the number:
+>>> doc.fields[-1] = Field('price', 'An invalid float - this will generate an error when processed.')
+>>> iconn.process(doc)
+Traceback (most recent call last):
+...
+IndexerError: Value supplied to field 'price' must be a valid floating point number: was 'An invalid float - this will generate an error when processed.'
+
+
+We can access the Xapian document representation of the processed document to
+double check that this document has been indexed as we wanted:
+>>> xdoc = pdoc.prepare()
+>>> import cPickle
+>>> cPickle.loads(xdoc.get_data()) == pdoc.data
+True
+>>> [(term.term, term.wdf, [pos for pos in term.positer]) for term in xdoc.termlist()]
+[('1', 5, []), ('XA1', 5, []), ('XAdocument', 5, []), ('XAtest', 5, []), ('XB:Test document', 0, []), ('XCa', 1, [14]), ('XCbasic', 1, [15]), ('XCdocument', 2, [12, 17]), ('XCis', 1, [13]), ('XCtest', 1, [16]), ('XCthis', 1, [11]), ('ZXAdocument', 5, []), ('ZXAtest', 5, []), ('ZXCa', 1, []), ('ZXCbasic', 1, []), ('ZXCdocument', 2, []), ('ZXCis', 1, []), ('ZXCtest', 1, []), ('ZXCthis', 1, []), ('Za', 1, []), ('Zbasic', 1, []), ('Zdocument', 7, []), ('Zis', 1, []), ('Ztest', 6, []), ('Zthis', 1, []), ('a', 1, [14]), ('basic', 1, [15]), ('document', 7, [12, 17]), ('is', 1, [13]), ('test', 6, [16]), ('this', 1, [11])]
+>>> [(value.num, value.value) for value in xdoc.values()]
+[(0, 'Test document')]
+
+We can add terms directly to the processed document, specifying the wdf and position:
+>>> pdoc.add_term('text', 'newterm1', wdfinc=17, positions=200)
+>>> pdoc.add_term('text', 'newterm2', wdfinc=17, positions=(201, 202))
+>>> [(term.term, term.wdf, [pos for pos in term.positer]) for term in xdoc.termlist()]
+[('1', 5, []), ('XA1', 5, []), ('XAdocument', 5, []), ('XAtest', 5, []), ('XB:Test document', 0, []), ('XCa', 1, [14]), ('XCbasic', 1, [15]), ('XCdocument', 2, [12, 17]), ('XCis', 1, [13]), ('XCnewterm1', 17, [200]), ('XCnewterm2', 17, [201, 202]), ('XCtest', 1, [16]), ('XCthis', 1, [11]), ('ZXAdocument', 5, []), ('ZXAtest', 5, []), ('ZXCa', 1, []), ('ZXCbasic', 1, []), ('ZXCdocument', 2, []), ('ZXCis', 1, []), ('ZXCtest', 1, []), ('ZXCthis', 1, []), ('Za', 1, []), ('Zbasic', 1, []), ('Zdocument', 7, []), ('Zis', 1, []), ('Ztest', 6, []), ('Zthis', 1, []), ('a', 1, [14]), ('basic', 1, [15]), ('document', 7, [12, 17]), ('is', 1, [13]), ('test', 6, [16]), ('this', 1, [11])]
+
+We can set the data directly too, as long as we set it to a dictionary:
+>>> pdoc.data = {'Empty': 'nothing'}
+>>> pdoc.data
+{'Empty': 'nothing'}
+>>> pdoc.data = None
+Traceback (most recent call last):
+...
+TypeError: Cannot set data to any type other than a dict
+
+We can also get a representation of a processed document (though it only tells us the ID number):
+>>> pdoc
+<ProcessedDocument(None)>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/marshall.py	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2007 Lemur Consulting Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+r"""marshall.py: Marshal values into strings
+
+"""
+__docformat__ = "restructuredtext en"
+
+import math
+import xapian
+from replaylog import log as _log
+
+def float_to_string(value):
+    """Marshall a floating point number to a string which sorts in the
+    appropriate manner.
+
+    """
+    return _log(xapian.sortable_serialise, value)
+
+def date_to_string(date):
+    """Marshall a date to a string which sorts in the appropriate manner.
+
+    """
+    return '%04d%02d%02d' % (date.year, date.month, date.day)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/marshall_doctest1.txt	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,40 @@
+
+
+>>> float_to_string(0)
+'\x80'
+>>> float_to_string(-0)
+'\x80'
+
+>>> float_to_string(2 ** -1023)
+'\x8f\xe4'
+>>> float_to_string(2 ** -1024)
+'\x8f\xe0'
+>>> float_to_string(2 ** -1074)
+'\x8f\x18'
+>>> float_to_string(2 ** -1075)
+'\x80'
+
+>>> float_to_string(-(2 ** -1024))
+'p\x1e'
+>>> float_to_string(-(2 ** -1023))
+'p\x1a'
+>>> float_to_string(-(2 ** -1074))
+'p\xe6'
+>>> float_to_string(-(2 ** -1075))
+'\x80'
+
+Check that the values in test_vals sort in the right order.  And then test with their negations.
+>>> pos_test_vals = (0, 2 ** -1075, 2 ** -1074, 2 ** -1023, 0.000001, 0.000002, 0.000005, 0.1, 0.2, 0.5, 1, 1.1, 1.8, 2, 1024.5, 2 ** 1022)
+>>> test_vals = [-val for val in pos_test_vals]
+>>> test_vals.reverse()
+>>> test_vals.extend(pos_test_vals)
+>>> prev_val = test_vals[0]
+>>> for val in test_vals:
+...     m_prev_val = float_to_string(prev_val)
+...     m_val = float_to_string(val)
+...     if val == prev_val:
+...         assert(m_val == m_prev_val)
+...     else:
+...         assert(val > prev_val)
+...         assert(m_val > m_prev_val)
+...     prev_val = val
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/marshall_doctest2.txt	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,20 @@
+
+
+>>> import datetime, parsedate
+>>> date_to_string(datetime.date(1999, 12, 13))
+'19991213'
+
+>>> test_date_inputs=['1066.11.05', '19700211', '19991213', '20070513']
+>>> test_dates = []
+>>> for date in test_date_inputs:
+...     test_dates.append(parsedate.date_from_string(date))
+>>> prev_val = test_dates[0]
+>>> for val in test_dates:
+...     m_prev_val = date_to_string(prev_val)
+...     m_val = date_to_string(val)
+...     if val == prev_val:
+...         assert(m_val == m_prev_val)
+...     else:
+...         assert(val > prev_val)
+...         assert(m_val > m_prev_val)
+...     prev_val = val
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/memutils.py	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,89 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2007 Lemur Consulting Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+r"""memutils.py: Memory handling utilities.
+
+"""
+__docformat__ = "restructuredtext en"
+
+import os
+
+def _get_physical_mem_sysconf():
+    """Try getting a value for the physical memory using os.sysconf().
+
+    Returns None if no value can be obtained - otherwise, returns a value in
+    bytes.
+
+    """
+    if getattr(os, 'sysconf', None) is None:
+        return None
+
+    try:
+        pagesize = os.sysconf('SC_PAGESIZE')
+    except ValueError:
+        try:
+            pagesize = os.sysconf('SC_PAGE_SIZE')
+        except ValueError:
+            return None
+
+    try:
+        pagecount = os.sysconf('SC_PHYS_PAGES')
+    except ValueError:
+        return None
+
+    return pagesize * pagecount
+
+def _get_physical_mem_win32():
+    """Try getting a value for the physical memory using GlobalMemoryStatus.
+
+    This is a windows specific method.  Returns None if no value can be
+    obtained (eg, not running on windows) - otherwise, returns a value in
+    bytes.
+
+    """
+    try:
+        import ctypes
+        import ctypes.wintypes as wintypes
+    except ValueError:
+        return None
+    
+    class MEMORYSTATUS(wintypes.Structure):
+        _fields_ = [
+            ('dwLength', wintypes.DWORD),
+            ('dwMemoryLoad', wintypes.DWORD),
+            ('dwTotalPhys', wintypes.DWORD),
+            ('dwAvailPhys', wintypes.DWORD),
+            ('dwTotalPageFile', wintypes.DWORD),
+            ('dwAvailPageFile', wintypes.DWORD),
+            ('dwTotalVirtual', wintypes.DWORD),
+            ('dwAvailVirtual', wintypes.DWORD),
+        ]
+
+    m = MEMORYSTATUS()
+    wintypes.windll.kernel32.GlobalMemoryStatus(wintypes.byref(m))
+    return m.dwTotalPhys
+
+def get_physical_memory():
+    """Get the amount of physical memory in the system, in bytes.
+
+    If this can't be obtained, returns None.
+
+    """
+    result = _get_physical_mem_sysconf()
+    if result is not None:
+        return result
+    return _get_physical_mem_win32()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/parsedate.py	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2007 Lemur Consulting Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+r"""parsedate.py: Parse date strings.
+
+"""
+__docformat__ = "restructuredtext en"
+
+import datetime
+import re
+
+yyyymmdd_re = re.compile(r'(?P<year>[0-9]{4})(?P<month>[0-9]{2})(?P<day>[0-9]{2})$')
+yyyy_mm_dd_re = re.compile(r'(?P<year>[0-9]{4})([-/.])(?P<month>[0-9]{2})\2(?P<day>[0-9]{2})$')
+
+def date_from_string(value):
+    """Parse a string into a date.
+
+    If the value supplied is already a date-like object (ie, has 'year',
+    'month' and 'day' attributes), it is returned without processing.
+
+    Supported date formats are:
+
+     - YYYYMMDD
+     - YYYY-MM-DD 
+     - YYYY/MM/DD 
+     - YYYY.MM.DD 
+
+    """
+    if (hasattr(value, 'year')
+        and hasattr(value, 'month')
+        and hasattr(value, 'day')):
+        return value
+
+    mg = yyyymmdd_re.match(value)
+    if mg is None:
+        mg = yyyy_mm_dd_re.match(value)
+
+    if mg is not None:
+        year, month, day = (int(i) for i in mg.group('year', 'month', 'day'))
+        return datetime.date(year, month, day)
+
+    raise ValueError('Unrecognised date format')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/parsedate_doctest1.txt	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,50 @@
+General tests of the datetime module.
+
+Dates may be supplied as YYYYMMDD:
+
+>>> date_from_string('19990201')
+datetime.date(1999, 2, 1)
+>>> date_from_string('19690201')
+datetime.date(1969, 2, 1)
+>>> date_from_string('20000228')
+datetime.date(2000, 2, 28)
+>>> date_from_string('20000229')
+datetime.date(2000, 2, 29)
+
+
+Dates may also be supplied as YYYY-MM-DD, YYYY/MM/DD or YYYY.MM.DD:
+
+>>> date_from_string('1999-02-01')
+datetime.date(1999, 2, 1)
+>>> date_from_string('1999/02/01')
+datetime.date(1999, 2, 1)
+>>> date_from_string('1999.02.01')
+datetime.date(1999, 2, 1)
+
+
+Out of range dates cause a ValueError:
+
+>>> date_from_string('19000229')
+Traceback (most recent call last):
+...
+ValueError: day is out of range for month
+
+>>> date_from_string('20000001')
+Traceback (most recent call last):
+...
+ValueError: month must be in 1..12
+
+
+If we pass a datetime.date object (or something which looks similar) it is
+returned unchanged:
+
+>>> date_from_string(datetime.date(2001, 7, 11))
+datetime.date(2001, 7, 11)
+
+
+If we pass something unrecognisible, we get a ValueError:
+>>> date_from_string('hello world')
+Traceback (most recent call last):
+...
+ValueError: Unrecognised date format
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/replaylog.py	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,433 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2007 Lemur Consulting Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+r"""replaylog.py: Log all xapian calls to a file, so that they can be replayed.
+
+"""
+__docformat__ = "restructuredtext en"
+
+import datetime
+import sys
+import thread
+import threading
+import time
+import traceback
+import types
+import weakref
+import xapian
+
+from pprint import pprint
+
+# The logger in use.
+_replay_log = None
+
+# True if a replay log has ever been in use since import time.
+_had_replay_log = False
+
+class NotifyingDeleteObject(object):
+    """An wrapping for an object which calls a callback when its deleted.
+
+    Note that the callback will be called from a __del__ method, so shouldn't
+    raise any exceptions, and probably shouldn't make new references to the
+    object supplied to it.
+
+    """
+    def __init__(self, obj, callback):
+        self.obj = obj
+        self.callback = callback
+
+    def __del__(self):
+        self.callback(self.obj)
+
+class ReplayLog(object):
+    """Log of xapian calls, to be replayed.
+
+    """
+
+    def __init__(self, logpath):
+        """Create a new replay log.
+
+        """
+        # Mutex used to protect all access to _fd
+        self._fd_mutex = threading.Lock()
+        self._fd = file(logpath, 'wb')
+
+        # Mutex used to protect all access to members other than _fd
+        self._mutex = threading.Lock()
+        self._next_call = 1
+
+        self._next_thread = 0
+        self._thread_ids = {}
+
+        self._objs = weakref.WeakKeyDictionary()
+        self._next_num = 1
+
+        self._xapian_classes = {}
+        self._xapian_functions = {}
+        self._xapian_methods = {}
+        for name in dir(xapian):
+            item = getattr(xapian, name)
+            has_members = False
+            for membername in dir(item):
+                member = getattr(item, membername)
+                if isinstance(member, types.MethodType):
+                    self._xapian_methods[member.im_func] = (name, membername)
+                    has_members = True
+            if has_members:
+                self._xapian_classes[item] = name
+            if isinstance(item, types.BuiltinFunctionType):
+                self._xapian_functions[item] = name
+
+    def _get_obj_num(self, obj, maybe_new):
+        """Get the number associated with an object.
+
+        If maybe_new is False, a value of 0 will be supplied if the object
+        hasn't already been seen.  Otherwise, a new (and previously unused)
+        value will be allocated to the object.
+
+        The mutex should be held when this is called.
+
+        """
+        try:
+            num = self._objs[obj]
+            return num.obj
+        except KeyError:
+            pass
+
+        if not maybe_new:
+            return 0
+
+        self._objs[obj] = NotifyingDeleteObject(self._next_num, self._obj_gone)
+        self._next_num += 1
+        return self._next_num - 1
+
+    def _is_xap_obj(self, obj):
+        """Return True iff an object is an instance of a xapian object.
+
+        (Also returns true if the object is an instance of a subclass of a
+        xapian object.)
+
+        The mutex should be held when this is called.
+
+        """
+        # Check for xapian classes.
+        classname = self._xapian_classes.get(type(obj), None)
+        if classname is not None:
+            return True
+        # Check for subclasses of xapian classes.
+        for classobj, classname in self._xapian_classes.iteritems():
+            if isinstance(obj, classobj):
+                return True
+        # Not a xapian class or subclass.
+        return False
+
+    def _get_xap_name(self, obj, maybe_new=False):
+        """Get the name of a xapian class or method.
+
+        The mutex should be held when this is called.
+
+        """
+        # Check if it's a xapian class, or subclass.
+        if isinstance(obj, types.TypeType):
+            classname = self._xapian_classes.get(obj, None)
+            if classname is not None:
+                return classname
+
+            for classobj, classname in self._xapian_classes.iteritems():
+                if issubclass(obj, classobj):
+                    return "subclassof_%s" % (classname, )
+
+            return None
+
+        # Check if it's a xapian function.
+        if isinstance(obj, types.BuiltinFunctionType):
+            funcname = self._xapian_functions.get(obj, None)
+            if funcname is not None:
+                return funcname
+
+        # Check if it's a proxied object.
+        if isinstance(obj, LoggedProxy):
+            classname = self._xapian_classes.get(obj.__class__, None)
+            if classname is not None:
+                objnum = self._get_obj_num(obj, maybe_new=maybe_new)
+                return "%s#%d" % (classname, objnum)
+
+        # Check if it's a proxied method.
+        if isinstance(obj, LoggedProxyMethod):
+            classname, methodname = self._xapian_methods[obj.real.im_func]
+            objnum = self._get_obj_num(obj.proxyobj, maybe_new=maybe_new)
+            return "%s#%d.%s" % (classname, objnum, methodname)
+
+        # Check if it's a subclass of a xapian class.  Note: this will only
+        # pick up subclasses, because the original classes are filtered out
+        # higher up.
+        for classobj, classname in self._xapian_classes.iteritems():
+            if isinstance(obj, classobj):
+                objnum = self._get_obj_num(obj, maybe_new=maybe_new)
+                return "subclassof_%s#%d" % (classname, objnum)
+
+        return None
+
+    def _log(self, msg):
+        self._fd_mutex.acquire()
+        try:
+#            msg = '%s,%s' % (
+#                datetime.datetime.fromtimestamp(time.time()).isoformat(),
+#                msg,
+#            )
+            self._fd.write(msg)
+            self._fd.flush()
+        finally:
+            self._fd_mutex.release()
+
+    def _repr_arg(self, arg):
+        """Return a representation of an argument.
+
+        The mutex should be held when this is called.
+
+        """
+
+        xapargname = self._get_xap_name(arg)
+        if xapargname is not None:
+            return xapargname
+
+        if isinstance(arg, basestring):
+            if isinstance(arg, unicode):
+                arg = arg.encode('utf-8')
+            return 'str(%d,%s)' % (len(arg), arg)
+
+        if isinstance(arg, long):
+            try:
+                arg = int(arg)
+            except OverFlowError:
+                pass
+
+        if isinstance(arg, long):
+            return 'long(%d)' % arg
+
+        if isinstance(arg, int):
+            return 'int(%d)' % arg
+
+        if isinstance(arg, float):
+            return 'float(%f)' % arg
+
+        if arg is None:
+            return 'None'
+
+        if hasattr(arg, '__iter__'):
+            seq = []
+            for item in arg:
+                seq.append(self._repr_arg(item))
+            return 'list(%s)' % ','.join(seq)
+
+        return 'UNKNOWN:' + str(arg)
+
+    def _repr_args(self, args):
+        """Return a representation of a list of arguments.
+
+        The mutex should be held when this is called.
+
+        """
+        logargs = []
+        for arg in args:
+            logargs.append(self._repr_arg(arg))
+        return ','.join(logargs)
+
+    def _get_call_id(self):
+        """Get an ID string for a call.
+
+        The mutex should be held when this is called.
+
+        """
+        call_num = self._next_call
+        self._next_call += 1
+
+        thread_id = thread.get_ident()
+        try:
+            thread_num = self._thread_ids[thread_id]
+        except KeyError:
+            thread_num = self._next_thread
+            self._thread_ids[thread_id] = thread_num
+            self._next_thread += 1
+
+        if thread_num is 0:
+            return "%s" % call_num
+        return "%dT%d" % (call_num, thread_num)
+
+    def log_call(self, call, *args):
+        """Add a log message about a call.
+
+        Returns a number for the call, so it can be tied to a particular
+        result.
+
+        """
+        self._mutex.acquire()
+        try:
+            logargs = self._repr_args(args)
+            xapobjname = self._get_xap_name(call)
+            call_id = self._get_call_id()
+        finally:
+            self._mutex.release()
+
+        if xapobjname is not None:
+            self._log("CALL%s:%s(%s)\n" % (call_id, xapobjname, logargs))
+        else:
+            self._log("CALL%s:UNKNOWN:%r(%s)\n" % (call_id, call, logargs))
+        return call_id
+
+    def log_except(self, (etype, value, tb), call_id):
+        """Log an exception which has occurred.
+
+        """
+        # No access to an members, so no need to acquire mutex.
+        exc = traceback.format_exception_only(etype, value)
+        self._log("EXCEPT%s:%s\n" % (call_id, ''.join(exc).strip()))
+
+    def log_retval(self, ret, call_id):
+        """Log a return value.
+
+        """
+        if ret is None:
+            self._log("RET%s:None\n" % call_id)
+            return
+
+        self._mutex.acquire()
+        try:
+            # If it's a xapian object, return a proxy for it.
+            if self._is_xap_obj(ret):
+                ret = LoggedProxy(ret)
+                xapobjname = self._get_xap_name(ret, maybe_new=True)
+            msg = "RET%s:%s\n" % (call_id, self._repr_arg(ret))
+        finally:
+            self._mutex.release()
+
+        # Not a xapian object - just return it.
+        self._log(msg)
+        return ret
+
+    def _obj_gone(self, num):
+        """Log that an object has been deleted.
+
+        """
+        self._log('DEL:#%d\n' % num)
+
+class LoggedProxy(object):
+    """A proxy for a xapian object, which logs all calls made on the object.
+
+    """
+    def __init__(self, obj):
+        self.__obj = obj
+
+    def __getattribute__(self, name):
+        obj = object.__getattribute__(self, '_LoggedProxy__obj')
+        if name == '__obj':
+            return obj
+        real = getattr(obj, name)
+        if not isinstance(real, types.MethodType):
+            return real
+        return LoggedProxyMethod(real, self)
+
+    def __iter__(self):
+        obj = object.__getattribute__(self, '_LoggedProxy__obj')
+        return obj.__iter__()
+
+    def __len__(self):
+        obj = object.__getattribute__(self, '_LoggedProxy__obj')
+        return obj.__len__()
+
+    def __repr__(self):
+        obj = object.__getattribute__(self, '_LoggedProxy__obj')
+        return '<LoggedProxy of %s >' % obj.__repr__()
+
+    def __str__(self):
+        obj = object.__getattribute__(self, '_LoggedProxy__obj')
+        return obj.__str__()
+
+class LoggedProxyMethod(object):
+    """A proxy for a xapian method, which logs all calls made on the method.
+
+    """
+    def __init__(self, real, proxyobj):
+        """Make a proxy for the method.
+
+        """
+        self.real = real
+        self.proxyobj = proxyobj
+
+    def __call__(self, *args):
+        """Call the proxied method, logging the call.
+
+        """
+        return log(self, *args)
+
+def set_replay_path(logpath):
+    """Set the path for the replay log.
+
+    """
+    global _replay_log
+    global _had_replay_log
+    if logpath is None:
+        _replay_log = None
+    else:
+        _had_replay_log = True
+        _replay_log = ReplayLog(logpath)
+
+def _unproxy_call_and_args(call, args):
+    """Convert a call and list of arguments to unproxied form.
+
+    """
+    if isinstance(call, LoggedProxyMethod):
+        realcall = call.real
+    else:
+        realcall = call
+
+    realargs = []
+    for arg in args:
+        if isinstance(arg, LoggedProxy):
+            arg = arg.__obj
+        realargs.append(arg)
+
+    return realcall, realargs
+
+def log(call, *args):
+    """Make a call to xapian, and log it.
+
+    """
+    # If we've never had a replay log in force, no need to unproxy objects.
+    global _had_replay_log
+    if not _had_replay_log:
+        return call(*args)
+
+    # Get unproxied versions of the call and arguments.
+    realcall, realargs = _unproxy_call_and_args(call, args)
+
+    # If we have no replay log currently, just do the call.
+    global _replay_log
+    replay_log = _replay_log
+    if replay_log is None:
+        return realcall(*realargs)
+
+    # We have a replay log: do a logged version of the call.
+    call_id = replay_log.log_call(call, *args)
+    try:
+        ret = realcall(*realargs)
+    except:
+        replay_log.log_except(sys.exc_info(), call_id)
+        raise
+    return replay_log.log_retval(ret, call_id)
+
+#set_replay_path('replay.log')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/schema.py	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2008 Lemur Consulting Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+r"""schema.py: xdefinitions and implementations of field actions.
+
+"""
+__docformat__ = "restructuredtext en"
+
+import errors as _errors
+from replaylog import log as _log
+import parsedate as _parsedate
+
+class Schema(object):
+    def __init__(self):
+        pass
+
+if __name__ == '__main__':
+    import doctest, sys
+    doctest.testmod (sys.modules[__name__])
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/searchconnection.py	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,1873 @@
+#!/usr/bin/env python
+#
+# Copyright (C) 2007 Lemur Consulting Ltd
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+r"""searchconnection.py: A connection to the search engine for searching.
+
+"""
+__docformat__ = "restructuredtext en"
+
+import _checkxapian
+import os as _os
+import cPickle as _cPickle
+import math
+
+import xapian as _xapian
+from datastructures import *
+from fieldactions import *
+import fieldmappings as _fieldmappings
+import highlight as _highlight 
+import errors as _errors
+import indexerconnection as _indexerconnection
+import re as _re
+from replaylog import log as _log
+
+class SearchResult(ProcessedDocument):
+    """A result from a search.
+
+    As well as being a ProcessedDocument representing the document in the
+    database, the result has several members which may be used to get
+    information about how well the document matches the search:
+
+     - `rank`: The rank of the document in the search results, starting at 0
+       (ie, 0 is the "top" result, 1 is the second result, etc).
+
+     - `weight`: A floating point number indicating the weight of the result
+       document.  The value is only meaningful relative to other results for a
+       given search - a different search, or the same search with a different
+       database, may give an entirely different scale to the weights.  This
+       should not usually be displayed to users, but may be useful if trying to
+       perform advanced reweighting operations on search results.
+
+     - `percent`: A percentage value for the weight of a document.  This is
+       just a rescaled form of the `weight` member.  It doesn't represent any
+       kind of probability value; the only real meaning of the numbers is that,
+       within a single set of results, a document with a higher percentage
+       corresponds to a better match.  Because the percentage doesn't really
+       represent a probability, or a confidence value, it is probably unhelpful
+       to display it to most users, since they tend to place an over emphasis
+       on its meaning.  However, it is included because it may be useful
+       occasionally.
+
+    """
+    def __init__(self, msetitem, results):
+        ProcessedDocument.__init__(self, results._fieldmappings, msetitem.document)
+        self.rank = msetitem.rank
+        self.weight = msetitem.weight
+        self.percent = msetitem.percent
+        self._results = results
+
+    def _get_language(self, field):
+        """Get the language that should be used for a given field.
+
+        Raises a KeyError if the field is not known.
+
+        """
+        actions = self._results._conn._field_actions[field]._actions
+        for action, kwargslist in actions.iteritems():
+            if action == FieldActions.INDEX_FREETEXT:
+                for kwargs in kwargslist:
+                    try:
+                        return kwargs['language']
+                    except KeyError:
+                        pass
+        return 'none'
+
+    def summarise(self, field, maxlen=600, hl=('<b>', '</b>'), query=None):
+        """Return a summarised version of the field specified.
+
+        This will return a summary of the contents of the field stored in the
+        search result, with words which match the query highlighted.
+
+        The maximum length of the summary (in characters) may be set using the
+        maxlen parameter.
+
+        The return value will be a string holding the summary, with
+        highlighting applied.  If there are multiple instances of the field in
+        the document, the instances will be joined with a newline character.
+        
+        To turn off highlighting, set hl to None.  Each highlight will consist
+        of the first entry in the `hl` list being placed before the word, and
+        the second entry in the `hl` list being placed after the word.
+
+        Any XML or HTML style markup tags in the field will be stripped before
+        the summarisation algorithm is applied.
+
+        If `query` is supplied, it should contain a Query object, as returned
+        from SearchConnection.query_parse() or related methods, which will be
+        used as the basis of the summarisation and highlighting rather than the
+        query which was used for the search.
+
+        Raises KeyError if the field is not known.
+
+        """
+        highlighter = _highlight.Highlighter(language_code=self._get_language(field))
+        field = self.data[field]
+        results = []
+        text = '\n'.join(field)
+        if query is None:
+            query = self._results._query
+        return highlighter.makeSample(text, query, maxlen, hl)
+
+    def highlight(self, field, hl=('<b>', '</b>'), strip_tags=False, query=None):
+        """Return a highlighted version of the field specified.
+
+        This will return all the contents of the field stored in the search
+        result, with words which match the query highlighted.
+
+        The return value will be a list of strings (corresponding to the list
+        of strings which is the raw field data).
+
+        Each highlight will consist of the first entry in the `hl` list being
+        placed before the word, and the second entry in the `hl` list being
+        placed after the word.
+
+        If `strip_tags` is True, any XML or HTML style markup tags in the field
+        will be stripped before highlighting is applied.
+
+        If `query` is supplied, it should contain a Query object, as returned
+        from SearchConnection.query_parse() or related methods, which will be
+        used as the basis of the summarisation and highlighting rather than the
+        query which was used for the search.
+
+        Raises KeyError if the field is not known.
+
+        """
+        highlighter = _highlight.Highlighter(language_code=self._get_language(field))
+        field = self.data[field]
+        results = []
+        if query is None:
+            query = self._results._query
+        for text in field:
+            results.append(highlighter.highlight(text, query, hl, strip_tags))
+        return results
+
+    def __repr__(self):
+        return ('<SearchResult(rank=%d, id=%r, data=%r)>' %
+                (self.rank, self.id, self.data))
+
+
+class SearchResultIter(object):
+    """An iterator over a set of results from a search.
+
+    """
+    def __init__(self, results, order):
+        self._results = results
+        self._order = order
+        if self._order is None:
+            self._iter = iter(results._mset)
+        else:
+            self._iter = iter(self._order)
+
+    def next(self):
+        if self._order is None:
+            msetitem = self._iter.next()
+        else:
+            index = self._iter.next()
+            msetitem = self._results._mset.get_hit(index)
+        return SearchResult(msetitem, self._results)
+
+
+def _get_significant_digits(value, lower, upper):
+    """Get the significant digits of value which are constrained by the
+    (inclusive) lower and upper bounds.
+
+    If there are no significant digits which are definitely within the
+    bounds, exactly one significant digit will be returned in the result.
+
+    >>> _get_significant_digits(15,15,15)
+    15
+    >>> _get_significant_digits(15,15,17)
+    20
+    >>> _get_significant_digits(4777,208,6000)
+    5000
+    >>> _get_significant_digits(4777,4755,4790)
+    4800
+    >>> _get_significant_digits(4707,4695,4710)
+    4700
+    >>> _get_significant_digits(4719,4717,4727)
+    4720
+    >>> _get_significant_digits(0,0,0)
+    0
+    >>> _get_significant_digits(9,9,10)
+    9
+    >>> _get_significant_digits(9,9,100)
+    9
+
+    """
+    assert(lower <= value)
+    assert(value <= upper)
+    diff = upper - lower
+
+    # Get the first power of 10 greater than the difference.
+    # This corresponds to the magnitude of the smallest significant digit.
+    if diff == 0:
+        pos_pow_10 = 1
+    else:
+        pos_pow_10 = int(10 ** math.ceil(math.log10(diff)))
+
+    # Special case for situation where we don't have any significant digits:
+    # get the magnitude of the most significant digit in value.
+    if pos_pow_10 > value:
+        if value == 0:
+            pos_pow_10 = 1
+        else:
+            pos_pow_10 = int(10 ** math.floor(math.log10(value)))
+
+    # Return the value, rounded to the nearest multiple of pos_pow_10
+    return ((value + pos_pow_10 // 2) // pos_pow_10) * pos_pow_10
+
+class SearchResults(object):
+    """A set of results of a search.
+
+    """
+    def __init__(self, conn, enq, query, mset, fieldmappings, tagspy,
+                 tagfields, facetspy, facetfields, facethierarchy,
+                 facetassocs):
+        self._conn = conn
+        self._enq = enq
+        self._query = query
+        self._mset = mset
+        self._mset_order = None
+        self._fieldmappings = fieldmappings
+        self._tagspy = tagspy
+        if tagfields is None:
+            self._tagfields = None
+        else:
+            self._tagfields = set(tagfields)
+        self._facetspy = facetspy
+        self._facetfields = facetfields
+        self._facethierarchy = facethierarchy
+        self._facetassocs = facetassocs
+        self._numeric_ranges_built = {}
+
+    def _cluster(self, num_clusters, maxdocs, fields=None):
+        """Cluster results based on similarity.
+
+        Note: this method is experimental, and will probably disappear or
+        change in the future.
+
+        The number of clusters is specified by num_clusters: unless there are
+        too few results, there will be exaclty this number of clusters in the
+        result.
+
+        """
+        clusterer = _xapian.ClusterSingleLink()
+        xapclusters = _xapian.ClusterAssignments()
+        docsim = _xapian.DocSimCosine()
+        source = _xapian.MSetDocumentSource(self._mset, maxdocs)
+
+        if fields is None:
+            clusterer.cluster(self._conn._index, xapclusters, docsim, source, num_clusters)
+        else:
+            decider = self._make_expand_decider(fields)
+            clusterer.cluster(self._conn._index, xapclusters, docsim, source, decider, num_clusters)
+
+        newid = 0
+        idmap = {}
+        clusters = {}
+        for item in self._mset:
+            docid = item.docid
+            clusterid = xapclusters.cluster(docid)
+            if clusterid not in idmap:
+                idmap[clusterid] = newid
+                newid += 1
+            clusterid = idmap[clusterid]
+            if clusterid not in clusters:
+                clusters[clusterid] = []
+            clusters[clusterid].append(item.rank)
+        return clusters
+
+    def _reorder_by_clusters(self, clusters):
+        """Reorder the mset based on some clusters.
+
+        """
+        if self.startrank != 0:
+            raise _errors.SearchError("startrank must be zero to reorder by clusters")
+        reordered = False
+        tophits = []
+        nottophits = []
+
+        clusterstarts = dict(((c[0], None) for c in clusters.itervalues()))
+        for i in xrange(self.endrank):
+            if i in clusterstarts:
+                tophits.append(i)
+            else:
+                nottophits.append(i)
+        self._mset_order = tophits
+        self._mset_order.extend(nottophits)
+
+    def _make_expand_decider(self, fields):
+        """Make an expand decider which accepts only terms in the specified
+        field.
+
+        """
+        prefixes = {}
+        if isinstance(fields, basestring):
+            fields = [fields]
+        for field in fields:
+            try:
+                actions = self._conn._field_actions[field]._actions
+            except KeyError:
+                continue
+            for action, kwargslist in actions.iteritems():
+                if action == FieldActions.INDEX_FREETEXT:
+                    prefix = self._conn._field_mappings.get_prefix(field)
+                    prefixes[prefix] = None
+                    prefixes['Z' + prefix] = None
+                if action in (FieldActions.INDEX_EXACT,
+                              FieldActions.TAG,
+                              FieldActions.FACET,):
+                    prefix = self._conn._field_mappings.get_prefix(field)
+                    prefixes[prefix] = None
+        prefix_re = _re.compile('|'.join([_re.escape(x) + '[^A-Z]' for x in prefixes.keys()]))
+        class decider(_xapian.ExpandDecider):
+            def __call__(self, term):
+                return prefix_re.match(term) is not None
+        return decider()
+
+    def _reorder_by_similarity(self, count, maxcount, max_similarity,
+                               fields=None):
+        """Reorder results based on similarity.
+
+        The top `count` documents will be chosen such that they are relatively
+        dissimilar.  `maxcount` documents will be considered for moving around,
+        and `max_similarity` is a value between 0 and 1 indicating the maximum
+        similarity to the previous document before a document is moved down the
+        result set.
+
+        Note: this method is experimental, and will probably disappear or
+        change in the future.
+
+        """
+        if self.startrank != 0:
+            raise _errors.SearchError("startrank must be zero to reorder by similiarity")
+        ds = _xapian.DocSimCosine()
+        ds.set_termfreqsource(_xapian.DatabaseTermFreqSource(self._conn._index))
+
+        if fields is not None:
+            ds.set_expand_decider(self._make_expand_decider(fields))
+
+        tophits = []
+        nottophits = []
+        full = False
+        reordered = False
+
+        sim_count = 0
+        new_order = []
+        end = min(self.endrank, maxcount)
+        for i in xrange(end):
+            if full:
+                new_order.append(i)
+                continue
+            hit = self._mset.get_hit(i)
+            if len(tophits) == 0:
+                tophits.append(hit)
+                continue
+
+            # Compare each incoming hit to tophits
+            maxsim = 0.0
+            for tophit in tophits[-1:]:
+                sim_count += 1
+                sim = ds.similarity(hit.document, tophit.document)
+                if sim > maxsim:
+                    maxsim = sim
+
+            # If it's not similar to an existing hit, add to tophits.
+            if maxsim < max_similarity:
+                tophits.append(hit)
+            else:
+                nottophits.append(hit)
+                reordered = True
+
+            # If we're full of hits, append to the end.
+            if len(tophits) >= count:
+                for hit in tophits:
+                    new_order.append(hit.rank)
+                for hit in nottophits:
+                    new_order.append(hit.rank)
+                full = True
+        if not full:
+            for hit in tophits:
+                new_order.append(hit.rank)
+            for hit in nottophits:
+                new_order.append(hit.rank)
+        if end != self.endrank:
+            new_order.extend(range(end, self.endrank))
+        assert len(new_order) == self.endrank
+        if reordered:
+            self._mset_order = new_order
+        else:
+            assert new_order == range(self.endrank)
+
+    def __repr__(self):
+        return ("<SearchResults(startrank=%d, "
+                "endrank=%d, "
+                "more_matches=%s, "
+                "matches_lower_bound=%d, "
+                "matches_upper_bound=%d, "
+                "matches_estimated=%d, "
+                "estimate_is_exact=%s)>" %
+                (
+                 self.startrank,
+                 self.endrank,
+                 self.more_matches,
+                 self.matches_lower_bound,
+                 self.matches_upper_bound,
+                 self.matches_estimated,
+                 self.estimate_is_exact,
+                ))
+
+    def _get_more_matches(self):
+        # This check relies on us having asked for at least one more result
+        # than retrieved to be checked.
+        return (self.matches_lower_bound > self.endrank)
+    more_matches = property(_get_more_matches, doc=
+    """Check whether there are further matches after those in this result set.
+
+    """)
+
+    def _get_startrank(self):
+        return self._mset.get_firstitem()
+    startrank = property(_get_startrank, doc=
+    """Get the rank of the first item in the search results.
+
+    This corresponds to the "startrank" parameter passed to the search() method.
+
+    """)
+
+    def _get_endrank(self):
+        return self._mset.get_firstitem() + len(self._mset)
+    endrank = property(_get_endrank, doc=
+    """Get the rank of the item after the end of the search results.
+
+    If there are sufficient results in the index, this corresponds to the
+    "endrank" parameter passed to the search() method.
+
+    """)
+
+    def _get_lower_bound(self):
+        return self._mset.get_matches_lower_bound()
+    matches_lower_bound = property(_get_lower_bound, doc=
+    """Get a lower bound on the total number of matching documents.
+
+    """)
+
+    def _get_upper_bound(self):
+        return self._mset.get_matches_upper_bound()
+    matches_upper_bound = property(_get_upper_bound, doc=
+    """Get an upper bound on the total number of matching documents.
+
+    """)
+
+    def _get_human_readable_estimate(self):
+        lower = self._mset.get_matches_lower_bound()
+        upper = self._mset.get_matches_upper_bound()
+        est = self._mset.get_matches_estimated()
+        return _get_significant_digits(est, lower, upper)
+    matches_human_readable_estimate = property(_get_human_readable_estimate,
+                                               doc=
+    """Get a human readable estimate of the number of matching documents.
+
+    This consists of the value returned by the "matches_estimated" property,
+    rounded to an appropriate number of significant digits (as determined by
+    the values of the "matches_lower_bound" and "matches_upper_bound"
+    properties).
+
+    """)
+
+    def _get_estimated(self):
+        return self._mset.get_matches_estimated()
+    matches_estimated = property(_get_estimated, doc=
+    """Get an estimate for the total number of matching documents.
+
+    """)
+
+    def _estimate_is_exact(self):
+        return self._mset.get_matches_lower_bound() == \
+               self._mset.get_matches_upper_bound()
+    estimate_is_exact = property(_estimate_is_exact, doc=
+    """Check whether the estimated number of matching documents is exact.
+
+    If this returns true, the estimate given by the `matches_estimated`
+    property is guaranteed to be correct.
+
+    If this returns false, it is possible that the actual number of matching
+    documents is different from the number given by the `matches_estimated`
+    property.
+
+    """)
+
+    def get_hit(self, index):
+        """Get the hit with a given index.
+
+        """
+        if self._mset_order is None:
+            msetitem = self._mset.get_hit(index)
+        else:
+            msetitem = self._mset.get_hit(self._mset_order[index])
+        return SearchResult(msetitem, self)
+    __getitem__ = get_hit
+
+    def __iter__(self):
+        """Get an iterator over the hits in the search result.
+
+        The iterator returns the results in increasing order of rank.
+
+        """
+        return SearchResultIter(self, self._mset_order)
+
+    def __len__(self):
+        """Get the number of hits in the search result.
+
+        Note that this is not (usually) the number of matching documents for
+        the search.  If startrank is non-zero, it's not even the rank of the
+        last document in the search result.  It's simply the number of hits
+        stored in the search result.
+
+        It is, however, the number of items returned by the iterator produced
+        by calling iter() on this SearchResults object.
+
+        """
+        return len(self._mset)
+
+    def get_top_tags(self, field, maxtags):
+        """Get the most frequent tags in a given field.
+
+         - `field` - the field to get tags for.  This must have been specified
+           in the "gettags" argument of the search() call.
+         - `maxtags` - the maximum number of tags to return.
+
+        Returns a sequence of 2-item tuples, in which the first item in the
+        tuple is the tag, and the second is the frequency of the tag in the
+        matches seen (as an integer).
+
+        """
+        if 'tags' in _checkxapian.missing_features:
+            raise errors.SearchError("Tags unsupported with this release of xapian")
+        if self._tagspy is None or field not in self._tagfields:
+            raise _errors.SearchError("Field %r was not specified for getting tags" % field)
+        prefix = self._conn._field_mappings.get_prefix(field)
+        return self._tagspy.get_top_terms(prefix, maxtags)
+
+    def get_suggested_facets(self, maxfacets=5, desired_num_of_categories=7,
+                             required_facets=None):
+        """Get a suggested set of facets, to present to the user.
+
+        This returns a list, in descending order of the usefulness of the
+        facet, in which each item is a tuple holding:
+
+         - fieldname of facet.
+         - sequence of 2-tuples holding the suggested values or ranges for that
+           field:
+
+           For facets of type 'string', the first item in the 2-tuple will
+           simply be the string supplied when the facet value was added to its
+           document.  For facets of type 'float', it will be a 2-tuple, holding
+           floats giving the start and end of the suggested value range.
+
+           The second item in the 2-tuple will be the frequency of the facet
+           value or range in the result set.
+
+        If required_facets is not None, it must be a field name, or a sequence
+        of field names.  Any field names mentioned in required_facets will be
+        returned if there are any facet values at all in the search results for
+        that field.  The facet will only be omitted if there are no facet
+        values at all for the field.
+
+        The value of maxfacets will be respected as far as possible; the
+        exception is that if there are too many fields listed in
+        required_facets with at least one value in the search results, extra
+        facets will be returned (ie, obeying the required_facets parameter is
+        considered more important than the maxfacets parameter).
+
+        If facet_hierarchy was indicated when search() was called, and the
+        query included facets, then only subfacets of those query facets and
+        top-level facets will be included in the returned list. Furthermore
+        top-level facets will only be returned if there are remaining places
+        in the list after it has been filled with subfacets. Note that
+        required_facets is still respected regardless of the facet hierarchy.
+
+        If a query type was specified when search() was called, and the query
+        included facets, then facets with an association of Never to the
+        query type are never returned, even if mentioned in required_facets.
+        Facets with an association of Preferred are listed before others in
+        the returned list.
+
+        """
+        if 'facets' in _checkxapian.missing_features:
+            raise errors.SearchError("Facets unsupported with this release of xapian")
+        if self._facetspy is None:
+            raise _errors.SearchError("Facet selection wasn't enabled when the search was run")
+        if isinstance(required_facets, basestring):
+            required_facets = [required_facets]
+        scores = []
+        facettypes = {}
+        for field, slot, kwargslist in self._facetfields:
+            type = None
+            for kwargs in kwargslist:
+                type = kwargs.get('type', None)
+                if type is not None: break
+            if type is None: type = 'string'
+
+            if type == 'float':
+                if field not in self._numeric_ranges_built:
+                    self._facetspy.build_numeric_ranges(slot, desired_num_of_categories)
+                    self._numeric_ranges_built[field] = None
+            facettypes[field] = type
+            score = self._facetspy.score_categorisation(slot, desired_num_of_categories)
+            scores.append((score, field, slot))
+
+        # Sort on whether facet is top-level ahead of score (use subfacets first),
+        # and on whether facet is preferred for the query type ahead of anything else
+        if self._facethierarchy:
+            # Note, tuple[-2] is the value of 'field' in a scores tuple
+            scores = [(tuple[-2] not in self._facethierarchy,) + tuple for tuple in scores]
+        if self._facetassocs:
+            preferred = _indexerconnection.IndexerConnection.FacetQueryType_Preferred
+            scores = [(self._facetassocs.get(tuple[-2]) != preferred,) + tuple for tuple in scores]
+        scores.sort()
+        if self._facethierarchy:
+            index = 1
+        else:
+            index = 0
+        if self._facetassocs:
+            index += 1
+        if index > 0:
+            scores = [tuple[index:] for tuple in scores]
+
+        results = []
+        required_results = []
+        for score, field, slot in scores:
+            # Check if the facet is required
+            required = False
+            if required_facets is not None:
+                required = field in required_facets
+
+            # If we've got enough facets, and the field isn't required, skip it
+            if not required and len(results) + len(required_results) >= maxfacets:
+                continue
+
+            # Get the values
+            values = self._facetspy.get_values_as_dict(slot)
+            if field in self._numeric_ranges_built:
+                if '' in values:
+                    del values['']
+
+            # Required facets must occur at least once, other facets must occur
+            # at least twice.
+            if required:
+                if len(values) < 1:
+                    continue
+            else:
+                if len(values) <= 1:
+                    continue
+
+            newvalues = []
+            if facettypes[field] == 'float':
+                # Convert numbers to python numbers, and number ranges to a
+                # python tuple of two numbers.
+                for value, frequency in values.iteritems():
+                    if len(value) <= 9:
+                        value1 = _log(_xapian.sortable_unserialise, value)
+                        value2 = value1
+                    else:
+                        value1 = _log(_xapian.sortable_unserialise, value[:9])
+                        value2 = _log(_xapian.sortable_unserialise, value[9:])
+                    newvalues.append(((value1, value2), frequency))
+            else:
+                for value, frequency in values.iteritems():
+                    newvalues.append((value, frequency))
+
+            newvalues.sort()
+            if required:
+                required_results.append((score, field, newvalues))
+            else:
+                results.append((score, field, newvalues))
+
+        # Throw away any excess results if we have more required_results to
+        # insert.
+        maxfacets = maxfacets - len(required_results)
+        if maxfacets <= 0:
+            results = required_results
+        else:
+            results = results[:maxfacets]
+            results.extend(required_results)
+            results.sort()
+
+        # Throw away the scores because they're not meaningful outside this
+        # algorithm.
+        results = [(field, newvalues) for (score, field, newvalues) in results]
+        return results
+
+
+class SearchConnection(object):
+    """A connection to the search engine for searching.
+
+    The connection will access a view of the database.
+
+    """
+    _qp_flags_base = _xapian.QueryParser.FLAG_LOVEHATE
+    _qp_flags_phrase = _xapian.QueryParser.FLAG_PHRASE
+    _qp_flags_synonym = (_xapian.QueryParser.FLAG_AUTO_SYNONYMS |
+                         _xapian.QueryParser.FLAG_AUTO_MULTIWORD_SYNONYMS)
+    _qp_flags_bool = _xapian.QueryParser.FLAG_BOOLEAN
+
+    _index = None
+
+    def __init__(self, indexpath):
+        """Create a new connection to the index for searching.
+
+        There may only an arbitrary number of search connections for a
+        particular database open at a given time (regardless of whether there
+        is a connection for indexing open as well).
+
+        If the database doesn't exist, an exception will be raised.
+
+        """
+        self._index = _log(_xapian.Database, indexpath)
+        self._indexpath = indexpath
+
+        # Read the actions.
+        self._load_config()
+
+        self._close_handlers = []
+
+    def __del__(self):
+        self.close()
+
+    def append_close_handler(self, handler, userdata=None):
+        """Append a callback to the list of close handlers.
+
+        These will be called when the SearchConnection is closed.  This happens
+        when the close() method is called, or when the SearchConnection object
+        is deleted.  The callback will be passed two arguments: the path to the
+        SearchConnection object, and the userdata supplied to this method.
+
+        The handlers will be called in the order in which they were added.
+
+        The handlers will be called after the connection has been closed, so
+        cannot prevent it closing: their return value will be ignored.  In
+        addition, they should not raise any exceptions.
+
+        """
+        self._close_handlers.append((handler, userdata))
+
+    def _get_sort_type(self, field):
+        """Get the sort type that should be used for a given field.
+
+        """
+        try:
+            actions = self._field_actions[field]._actions
+        except KeyError:
+            actions = {}
+        for action, kwargslist in actions.iteritems():
+            if action == FieldActions.SORT_AND_COLLAPSE:
+                for kwargs in kwargslist:
+                    return kwargs['type']
+
+    def _load_config(self):
+        """Load the configuration for the database.
+
+        """
+        # Note: this code is basically duplicated in the IndexerConnection
+        # class.  Move it to a shared location.
+        assert self._index is not None
+
+        config_str = _log(self._index.get_metadata, '_xappy_config')
+        if len(config_str) == 0:
+            self._field_actions = {}
+            self._field_mappings = _fieldmappings.FieldMappings()
+            self._facet_hierarchy = {}
+            self._facet_query_table = {}
+            return
+
+        try:
+            (self._field_actions, mappings, self._facet_hierarchy, self._facet_query_table, self._next_docid) = _cPickle.loads(config_str)
+        except ValueError:
+            # Backwards compatibility - configuration used to lack _facet_hierarchy and _facet_query_table
+            (self._field_actions, mappings, self._next_docid) = _cPickle.loads(config_str)
+            self._facet_hierarchy = {}
+            self._facet_query_table = {}
+        self._field_mappings = _fieldmappings.FieldMappings(mappings)
+
+    def reopen(self):
+        """Reopen the connection.
+
+        This updates the revision of the index which the connection references
+        to the latest flushed revision.
+
+        """
+        if self._index is None:
+            raise _errors.SearchError("SearchConnection has been closed")
+        self._index.reopen()
+        # Re-read the actions.
+        self._load_config()
+        
+    def close(self):
+        """Close the connection to the database.
+
+        It is important to call this method before allowing the class to be
+        garbage collected to ensure that the connection is cleaned up promptly.
+
+        No other methods may be called on the connection after this has been
+        called.  (It is permissible to call close() multiple times, but
+        only the first call will have any effect.)
+
+        If an exception occurs, the database will be closed, but changes since
+        the last call to flush may be lost.
+
+        """
+        if self._index is None:
+            return
+
+        # Remember the index path
+        indexpath = self._indexpath
+
+        # There is currently no "close()" method for xapian databases, so
+        # we have to rely on the garbage collector.  Since we never copy
+        # the _index property out of this class, there should be no cycles,
+        # so the standard python implementation should garbage collect
+        # _index straight away.  A close() method is planned to be added to
+        # xapian at some point - when it is, we should call it here to make
+        # the code more robust.
+        self._index = None
+        self._indexpath = None
+        self._field_actions = None
+        self._field_mappings = None
+
+        # Call the close handlers.
+        for handler, userdata in self._close_handlers:
+            try:
+                handler(indexpath, userdata)
+            except Exception, e:
+                import sys, traceback
+                print >>sys.stderr, "WARNING: unhandled exception in handler called by SearchConnection.close(): %s" % traceback.format_exception_only(type(e), e)
+
+    def get_doccount(self):
+        """Count the number of documents in the database.
+
+        This count will include documents which have been added or removed but
+        not yet flushed().
+
+        """
+        if self._index is None:
+            raise _errors.SearchError("SearchConnection has been closed")
+        return self._index.get_doccount()
+
+    OP_AND = _xapian.Query.OP_AND
+    OP_OR = _xapian.Query.OP_OR
+    def query_composite(self, operator, queries):
+        """Build a composite query from a list of queries.
+
+        The queries are combined with the supplied operator, which is either
+        SearchConnection.OP_AND or SearchConnection.OP_OR.
+
+        """
+        if self._index is None:
+            raise _errors.SearchError("SearchConnection has been closed")
+        return _log(_xapian.Query, operator, list(queries))
+
+    def query_multweight(self, query, multiplier):
+        """Build a query which modifies the weights of a subquery.
+
+        This produces a query which returns the same documents as the subquery,
+        and in the same order, but with the weights assigned to each document
+        multiplied by the value of "multiplier".  "multiplier" may be any floating
+        point value, but negative values will be clipped to 0, since Xapian
+        doesn't support negative weights.
+
+        This can be useful when producing queries to be combined with
+        query_composite, because it allows the relative importance of parts of
+        the query to be adjusted.
+
+        """
+        return _log(_xapian.Query, _xapian.Query.OP_SCALE_WEIGHT, query, multiplier)
+
+    def query_filter(self, query, filter, exclude=False):
+        """Filter a query with another query.
+
+        If exclude is False (or not specified), documents will only match the
+        resulting query if they match the both the first and second query: the
+        results of the first query are "filtered" to only include those which
+        also match the second query.
+
+        If exclude is True, documents will only match the resulting query if
+        they match the first query, but not the second query: the results of
+        the first query are "filtered" to only include those which do not match
+        the second query.
+        
+        Documents will always be weighted according to only the first query.
+
+        - `query`: The query to filter.
+        - `filter`: The filter to apply to the query.
+        - `exclude`: If True, the sense of the filter is reversed - only
+          documents which do not match the second query will be returned. 
+
+        """
+        if self._index is None:
+            raise _errors.SearchError("SearchConnection has been closed")
+        if not isinstance(filter, _xapian.Query):
+            raise _errors.SearchError("Filter must be a Xapian Query object")
+        if exclude:
+            return _log(_xapian.Query, _xapian.Query.OP_AND_NOT, query, filter)
+        else:
+            return _log(_xapian.Query, _xapian.Query.OP_FILTER, query, filter)
+
+    def query_adjust(self, primary, secondary):
+        """Adjust the weights of one query with a secondary query.
+
+        Documents will be returned from the resulting query if and only if they
+        match the primary query (specified by the "primary" parameter).
+        However, the weights (and hence, the relevance rankings) of the
+        documents will be adjusted by adding weights from the secondary query
+        (specified by the "secondary" parameter).
+
+        """
+        if self._index is None:
+            raise _errors.SearchError("SearchConnection has been closed")
+        return _log(_xapian.Query, _xapian.Query.OP_AND_MAYBE, primary, secondary)
+
+    def query_range(self, field, begin, end):
+        """Create a query for a range search.
+        
+        This creates a query which matches only those documents which have a
+        field value in the specified range.
+
+        Begin and end must be appropriate values for the field, according to
+        the 'type' parameter supplied to the SORTABLE action for the field.
+
+        The begin and end values are both inclusive - any documents with a
+        value equal to begin or end will be returned (unless end is less than
+        begin, in which case no documents will be returned).
+
+        Begin or end may be set to None in order to create an open-ended
+        range.  (They may also both be set to None, which will generate a query
+        which matches all documents containing any value for the field.)
+
+        """
+        if self._index is None:
+            raise _errors.SearchError("SearchConnection has been closed")
+
+        if begin is None and end is None:
+            # Return a "match everything" query
+            return _log(_xapian.Query, '')
+
+        try:
+            slot = self._field_mappings.get_slot(field, 'collsort')
+        except KeyError:
+            # Return a "match nothing" query
+            return _log(_xapian.Query)
+
+        sorttype = self._get_sort_type(field)
+        marshaller = SortableMarshaller(False)
+        fn = marshaller.get_marshall_function(field, sorttype)
+
+        if begin is not None:
+            begin = fn(field, begin)
+        if end is not None:
+            end = fn(field, end)
+
+        if begin is None:
+            return _log(_xapian.Query, _xapian.Query.OP_VALUE_LE, slot, end)
+
+        if end is None:
+            return _log(_xapian.Query, _xapian.Query.OP_VALUE_GE, slot, begin)
+
+        return _log(_xapian.Query, _xapian.Query.OP_VALUE_RANGE, slot, begin, end)
+
+    def query_facet(self, field, val):
+        """Create a query for a facet value.
+        
+        This creates a query which matches only those documents which have a
+        facet value in the specified range.
+
+        For a numeric range facet, val should be a tuple holding the start and
+        end of the range, or a comma separated string holding two floating
+        point values.  For other facets, val should be the value to look
+        for.
+
+        The start and end values are both inclusive - any documents with a
+        value equal to start or end will be returned (unless end is less than
+        start, in which case no documents will be returned).
+
+        """
+        if self._index is None:
+            raise _errors.SearchError("SearchConnection has been closed")
+        if 'facets' in _checkxapian.missing_features:
+            raise errors.SearchError("Facets unsupported with this release of xapian")
+
+        try:
+            actions = self._field_actions[field]._actions
+        except KeyError:
+            actions = {}
+        facettype = None
+        for action, kwargslist in actions.iteritems():
+            if action == FieldActions.FACET:
+                for kwargs in kwargslist:
+                    facettype = kwargs.get('type', None)
+                    if facettype is not None:
+                        break
+            if facettype is not None:
+                break
+
+        if facettype == 'float':
+            if isinstance(val, basestring):
+                val = [float(v) for v in val.split(',', 2)]
+            assert(len(val) == 2)
+            try:
+                slot = self._field_mappings.get_slot(field, 'facet')
+            except KeyError:
+                return _log(_xapian.Query)
+            # FIXME - check that sorttype == self._get_sort_type(field)
+            sorttype = 'float'
+            marshaller = SortableMarshaller(False)
+            fn = marshaller.get_marshall_function(field, sorttype)
+            begin = fn(field, val[0])
+            end = fn(field, val[1])
+            return _log(_xapian.Query, _xapian.Query.OP_VALUE_RANGE, slot, begin, end)
+        else:
+            assert(facettype == 'string' or facettype is None)
+            prefix = self._field_mappings.get_prefix(field)
+            return _log(_xapian.Query, prefix + val.lower())
+
+
+    def _prepare_queryparser(self, allow, deny, default_op, default_allow,
+                             default_deny):
+        """Prepare (and return) a query parser using the specified fields and
+        operator.
+
+        """
+        if self._index is None:
+            raise _errors.SearchError("SearchConnection has been closed")
+
+        if isinstance(allow, basestring):
+            allow = (allow, )
+        if isinstance(deny, basestring):
+            deny = (deny, )
+        if allow is not None and len(allow) == 0:
+            allow = None
+        if deny is not None and len(deny) == 0:
+            deny = None
+        if allow is not None and deny is not None:
+            raise _errors.SearchError("Cannot specify both `allow` and `deny` "
+                                      "(got %r and %r)" % (allow, deny))
+
+        if isinstance(default_allow, basestring):
+            default_allow = (default_allow, )
+        if isinstance(default_deny, basestring):
+            default_deny = (default_deny, )
+        if default_allow is not None and len(default_allow) == 0:
+            default_allow = None
+        if default_deny is not None and len(default_deny) == 0:
+            default_deny = None
+        if default_allow is not None and default_deny is not None:
+            raise _errors.SearchError("Cannot specify both `default_allow` and `default_deny` "
+                                      "(got %r and %r)" % (default_allow, default_deny))
+
+        qp = _log(_xapian.QueryParser)
+        qp.set_database(self._index)
+        qp.set_default_op(default_op)
+
+        if allow is None:
+            allow = [key for key in self._field_actions]
+        if deny is not None:
+            allow = [key for key in allow if key not in deny]
+
+        for field in allow:
+            try:
+                actions = self._field_actions[field]._actions
+            except KeyError:
+                actions = {}
+            for action, kwargslist in actions.iteritems():
+                if action == FieldActions.INDEX_EXACT:
+                    # FIXME - need patched version of xapian to add exact prefixes
+                    #qp.add_exact_prefix(field, self._field_mappings.get_prefix(field))
+                    qp.add_prefix(field, self._field_mappings.get_prefix(field))
+                if action == FieldActions.INDEX_FREETEXT:
+                    allow_field_specific = True
+                    for kwargs in kwargslist:
+                        allow_field_specific = allow_field_specific or kwargs.get('allow_field_specific', True)
+                    if not allow_field_specific:
+                        continue
+                    qp.add_prefix(field, self._field_mappings.get_prefix(field))
+                    for kwargs in kwargslist:
+                        try:
+                            lang = kwargs['language']
+                            my_stemmer = _log(_xapian.Stem, lang)
+                            qp.my_stemmer = my_stemmer
+                            qp.set_stemmer(my_stemmer)
+                            qp.set_stemming_strategy(qp.STEM_SOME)
+                        except KeyError:
+                            pass
+
+        if default_allow is not None or default_deny is not None:
+            if default_allow is None:
+                default_allow = [key for key in self._field_actions]
+            if default_deny is not None:
+                default_allow = [key for key in default_allow if key not in default_deny]
+            for field in default_allow:
+                try:
+                    actions = self._field_actions[field]._actions
+                except KeyError:
+                    actions = {}
+                for action, kwargslist in actions.iteritems():
+                    if action == FieldActions.INDEX_FREETEXT:
+                        qp.add_prefix('', self._field_mappings.get_prefix(field))
+                        # FIXME - set stemming options for the default prefix
+
+        return qp
+
+    def _query_parse_with_prefix(self, qp, string, flags, prefix):
+        """Parse a query, with an optional prefix.
+
+        """
+        if prefix is None:
+            return qp.parse_query(string, flags)
+        else:
+            return qp.parse_query(string, flags, prefix)
+
+    def _query_parse_with_fallback(self, qp, string, prefix=None):
+        """Parse a query with various flags.
+        
+        If the initial boolean pass fails, fall back to not using boolean
+        operators.
+
+        """
+        try:
+            q1 = self._query_parse_with_prefix(qp, string,
+                                               self._qp_flags_base |
+                                               self._qp_flags_phrase |
+                                               self._qp_flags_synonym |
+                                               self._qp_flags_bool,
+                                               prefix)
+        except _xapian.QueryParserError, e:
+            # If we got a parse error, retry without boolean operators (since
+            # these are the usual cause of the parse error).
+            q1 = self._query_parse_with_prefix(qp, string,
+                                               self._qp_flags_base |
+                                               self._qp_flags_phrase |
+                                               self._qp_flags_synonym,
+                                               prefix)
+
+        qp.set_stemming_strategy(qp.STEM_NONE)
+        try:
+            q2 = self._query_parse_with_prefix(qp, string,
+                                               self._qp_flags_base |
+                                               self._qp_flags_bool,
+                                               prefix)
+        except _xapian.QueryParserError, e:
+            # If we got a parse error, retry without boolean operators (since
+            # these are the usual cause of the parse error).
+            q2 = self._query_parse_with_prefix(qp, string,
+                                               self._qp_flags_base,
+                                               prefix)
+
+        return _log(_xapian.Query, _xapian.Query.OP_AND_MAYBE, q1, q2)
+
+    def query_parse(self, string, allow=None, deny=None, default_op=OP_AND,
+                    default_allow=None, default_deny=None):
+        """Parse a query string.
+
+        This is intended for parsing queries entered by a user.  If you wish to
+        combine structured queries, it is generally better to use the other
+        query building methods, such as `query_composite` (though you may wish
+        to create parts of the query to combine with such methods with this
+        method).
+
+        The string passed to this method can have various operators in it.  In
+        particular, it may contain field specifiers (ie, field names, followed
+        by a colon, followed by some text to search for in that field).  For
+        example, if "author" is a field in the database, the search string
+        could contain "author:richard", and this would be interpreted as
+        "search for richard in the author field".  By default, any fields in
+        the database which are indexed with INDEX_EXACT or INDEX_FREETEXT will
+        be available for field specific searching in this way - however, this
+        can be modified using the "allow" or "deny" parameters, and also by the
+        allow_field_specific tag on INDEX_FREETEXT fields.
+
+        Any text which isn't prefixed by a field specifier is used to search
+        the "default set" of fields.  By default, this is the full set of
+        fields in the database which are indexed with INDEX_FREETEXT and for
+        which the search_by_default flag set (ie, if the text is found in any
+        of those fields, the query will match).  However, this may be modified
+        with the "default_allow" and "default_deny" parameters.  (Note that
+        fields which are indexed with INDEX_EXACT aren't allowed to be used in
+        the default list of fields.)
+
+        - `string`: The string to parse.
+        - `allow`: A list of fields to allow in the query.
+        - `deny`: A list of fields not to allow in the query.
+        - `default_op`: The default operator to combine query terms with.
+        - `default_allow`: A list of fields to search for by default.
+        - `default_deny`: A list of fields not to search for by default.
+
+        Only one of `allow` and `deny` may be specified.
+
+        Only one of `default_allow` and `default_deny` may be specified.
+
+        If any of the entries in `allow` are not present in the configuration
+        for the database, or are not specified for indexing (either as
+        INDEX_EXACT or INDEX_FREETEXT), they will be ignored.  If any of the
+        entries in `deny` are not present in the configuration for the
+        database, they will be ignored.
+
+        Returns a Query object, which may be passed to the search() method, or
+        combined with other queries.
+
+        """
+        qp = self._prepare_queryparser(allow, deny, default_op, default_allow,
+                                       default_deny)
+        return self._query_parse_with_fallback(qp, string)
+
+    def query_field(self, field, value, default_op=OP_AND):
+        """A query for a single field.
+
+        """
+        if self._index is None:
+            raise _errors.SearchError("SearchConnection has been closed")
+        try:
+            actions = self._field_actions[field]._actions
+        except KeyError:
+            actions = {}
+
+        # need to check on field type, and stem / split as appropriate
+        for action, kwargslist in actions.iteritems():
+            if action in (FieldActions.INDEX_EXACT,
+                          FieldActions.TAG,
+                          FieldActions.FACET,):
+                prefix = self._field_mappings.get_prefix(field)
+                if len(value) > 0:
+                    chval = ord(value[0])
+                    if chval >= ord('A') and chval <= ord('Z'):
+                        prefix = prefix + ':'
+                return _log(_xapian.Query, prefix + value)
+            if action == FieldActions.INDEX_FREETEXT:
+                qp = _log(_xapian.QueryParser)
+                qp.set_default_op(default_op)
+                prefix = self._field_mappings.get_prefix(field)
+                for kwargs in kwargslist:
+                    try:
+                        lang = kwargs['language']
+                        qp.set_stemmer(_log(_xapian.Stem, lang))
+                        qp.set_stemming_strategy(qp.STEM_SOME)
+                    except KeyError:
+                        pass
+                return self._query_parse_with_fallback(qp, value, prefix)
+
+        return _log(_xapian.Query)
+
+    def query_similar(self, ids, allow=None, deny=None, simterms=10):
+        """Get a query which returns documents which are similar to others.
+
+        The list of document IDs to base the similarity search on is given in
+        `ids`.  This should be an iterable, holding a list of strings.  If
+        any of the supplied IDs cannot be found in the database, they will be
+        ignored.  (If no IDs can be found in the database, the resulting query
+        will not match any documents.)
+
+        By default, all fields which have been indexed for freetext searching
+        will be used for the similarity calculation.  The list of fields used
+        for this can be customised using the `allow` and `deny` parameters
+        (only one of which may be specified):
+
+        - `allow`: A list of fields to base the similarity calculation on.
+        - `deny`: A list of fields not to base the similarity calculation on.
+        - `simterms`: Number of terms to use for the similarity calculation.
+
+        For convenience, any of `ids`, `allow`, or `deny` may be strings, which
+        will be treated the same as a list of length 1.
+
+        Regardless of the setting of `allow` and `deny`, only fields which have
+        been indexed for freetext searching will be used for the similarity
+        measure - all other fields will always be ignored for this purpose.
+
+        """
+        eterms, prefixes = self._get_eterms(ids, allow, deny, simterms)
+
+        # Use the "elite set" operator, which chooses the terms with the
+        # highest query weight to use.
+        q = _log(_xapian.Query, _xapian.Query.OP_ELITE_SET, eterms, simterms)
+        return q
+
+    def significant_terms(self, ids, maxterms=10, allow=None, deny=None):
+        """Get a set of "significant" terms for a document, or documents.
+
+        This has a similar interface to query_similar(): it takes a list of
+        ids, and an optional specification of a set of fields to consider.
+        Instead of returning a query, it returns a list of terms from the
+        document (or documents), which appear "significant".  Roughly,
+        in this situation significant means that the terms occur more
+        frequently in the specified document than in the rest of the corpus.
+
+        The list is in decreasing order of "significance".
+
+        By default, all terms related to fields which have been indexed for
+        freetext searching will be considered for the list of significant
+        terms.  The list of fields used for this can be customised using the
+        `allow` and `deny` parameters (only one of which may be specified):
+
+        - `allow`: A list of fields to consider.
+        - `deny`: A list of fields not to consider.
+
+        For convenience, any of `ids`, `allow`, or `deny` may be strings, which
+        will be treated the same as a list of length 1.
+
+        Regardless of the setting of `allow` and `deny`, only fields which have
+        been indexed for freetext searching will be considered - all other
+        fields will always be ignored for this purpose.
+
+        The maximum number of terms to return may be specified by the maxterms
+        parameter.
+
+        """
+        eterms, prefixes = self._get_eterms(ids, allow, deny, maxterms)
+        terms = []
+        for term in eterms:
+            pos = 0
+            for char in term:
+                if not char.isupper():
+                    break
+                pos += 1
+            field = prefixes[term[:pos]]
+            value = term[pos:]
+            terms.append((field, value))
+        return terms
+
+    def _get_eterms(self, ids, allow, deny, simterms):
+        """Get a set of terms for an expand
+
+        """
+        if self._index is None:
+            raise _errors.SearchError("SearchConnection has been closed")
+        if allow is not None and deny is not None:
+            raise _errors.SearchError("Cannot specify both `allow` and `deny`")
+
+        if isinstance(ids, basestring):
+            ids = (ids, )
+        if isinstance(allow, basestring):
+            allow = (allow, )
+        if isinstance(deny, basestring):
+            deny = (deny, )
+
+        # Set "allow" to contain a list of all the fields to use.
+        if allow is None:
+            allow = [key for key in self._field_actions]
+        if deny is not None:
+            allow = [key for key in allow if key not in deny]
+
+        # Set "prefixes" to contain a list of all the prefixes to use.
+        prefixes = {}
+        for field in allow:
+            try:
+                actions = self._field_actions[field]._actions
+            except KeyError:
+                actions = {}
+            for action, kwargslist in actions.iteritems():
+                if action == FieldActions.INDEX_FREETEXT:
+                    prefixes[self._field_mappings.get_prefix(field)] = field
+
+        # Repeat the expand until we don't get a DatabaseModifiedError
+        while True:
+            try:
+                eterms = self._perform_expand(ids, prefixes, simterms)
+                break;
+            except _xapian.DatabaseModifiedError, e:
+                self.reopen()
+        return eterms, prefixes
+
+    class ExpandDecider(_xapian.ExpandDecider):
+        def __init__(self, prefixes):
+            _xapian.ExpandDecider.__init__(self)
+            self._prefixes = prefixes
+
+        def __call__(self, term):
+            pos = 0
+            for char in term:
+                if not char.isupper():
+                    break
+                pos += 1
+            if term[:pos] in self._prefixes:
+                return True
+            return False
+
+    def _perform_expand(self, ids, prefixes, simterms):
+        """Perform an expand operation to get the terms for a similarity
+        search, given a set of ids (and a set of prefixes to restrict the
+        similarity operation to).
+
+        """
+        # Set idquery to be a query which returns the documents listed in
+        # "ids".
+        idquery = _log(_xapian.Query, _xapian.Query.OP_OR, ['Q' + id for id in ids])
+
+        enq = _log(_xapian.Enquire, self._index)
+        enq.set_query(idquery)
+        rset = _log(_xapian.RSet)
+        for id in ids:
+            pl = self._index.postlist('Q' + id)
+            try:
+                xapid = pl.next()
+                rset.add_document(xapid.docid)
+            except StopIteration:
+                pass
+
+        expanddecider = _log(self.ExpandDecider, prefixes)
+        eset = enq.get_eset(simterms, rset, 0, 1.0, expanddecider)
+        return [term.term for term in eset]
+
+    def query_all(self):
+        """A query which matches all the documents in the database.
+
+        """
+        return _log(_xapian.Query, '')
+
+    def query_none(self):
+        """A query which matches no documents in the database.
+
+        This may be useful as a placeholder in various situations.
+
+        """
+        return _log(_xapian.Query)
+
+    def spell_correct(self, querystr, allow=None, deny=None, default_op=OP_AND,
+                      default_allow=None, default_deny=None):
+        """Correct a query spelling.
+
+        This returns a version of the query string with any misspelt words
+        corrected.
+
+        - `allow`: A list of fields to allow in the query.
+        - `deny`: A list of fields not to allow in the query.
+        - `default_op`: The default operator to combine query terms with.
+        - `default_allow`: A list of fields to search for by default.
+        - `default_deny`: A list of fields not to search for by default.
+
+        Only one of `allow` and `deny` may be specified.
+
+        Only one of `default_allow` and `default_deny` may be specified.
+
+        If any of the entries in `allow` are not present in the configuration
+        for the database, or are not specified for indexing (either as
+        INDEX_EXACT or INDEX_FREETEXT), they will be ignored.  If any of the
+        entries in `deny` are not present in the configuration for the
+        database, they will be ignored.
+
+        Note that it is possible that the resulting spell-corrected query will
+        still match no documents - the user should usually check that some
+        documents are matched by the corrected query before suggesting it to
+        users.
+
+        """
+        qp = self._prepare_queryparser(allow, deny, default_op, default_allow,
+                                       default_deny)
+        try:
+            qp.parse_query(querystr,
+                           self._qp_flags_base |
+                           self._qp_flags_phrase |
+                           self._qp_flags_synonym |
+                           self._qp_flags_bool |
+                           qp.FLAG_SPELLING_CORRECTION)
+        except _xapian.QueryParserError:
+            qp.parse_query(querystr,
+                           self._qp_flags_base |
+                           self._qp_flags_phrase |
+                           self._qp_flags_synonym |
+                           qp.FLAG_SPELLING_CORRECTION)
+        corrected = qp.get_corrected_query_string()
+        if len(corrected) == 0:
+            if isinstance(querystr, unicode):
+                # Encode as UTF-8 for consistency - this happens automatically
+                # to values passed to Xapian.
+                return querystr.encode('utf-8')
+            return querystr
+        return corrected
+
+    def can_collapse_on(self, field):
+        """Check if this database supports collapsing on a specified field.
+
+        """
+        if self._index is None:
+            raise _errors.SearchError("SearchConnection has been closed")
+        try:
+            self._field_mappings.get_slot(field, 'collsort')
+        except KeyError:
+            return False
+        return True
+
+    def can_sort_on(self, field):
+        """Check if this database supports sorting on a specified field.
+
+        """
+        if self._index is None:
+            raise _errors.SearchError("SearchConnection has been closed")
+        try:
+            self._field_mappings.get_slot(field, 'collsort')
+        except KeyError:
+            return False
+        return True
+        
+    def _get_prefix_from_term(self, term):
+        """Get the prefix of a term.
+   
+        Prefixes are any initial capital letters, with the exception that R always
+        ends a prefix, even if followed by capital letters.
+        
+        """
+        for p in xrange(len(term)):
+            if term[p].islower():
+                return term[:p]
+            elif term[p] == 'R':
+                return term[:p+1]
+        return term
+
+    def _facet_query_never(self, facet, query_type):
+        """Check if a facet must never be returned by a particular query type.
+
+        Returns True if the facet must never be returned.
+
+        Returns False if the facet may be returned - either becuase there is no
+        entry for the query type, or because the entry is not
+        FacetQueryType_Never.
+
+        """
+        if query_type is None:
+            return False
+        if query_type not in self._facet_query_table:
+            return False
+        if facet not in self._facet_query_table[query_type]:
+            return False
+        return self._facet_query_table[query_type][facet] == _indexerconnection.IndexerConnection.FacetQueryType_Never
+
+    def search(self, query, startrank, endrank,
+               checkatleast=0, sortby=None, collapse=None,
+               gettags=None,
+               getfacets=None, allowfacets=None, denyfacets=None, usesubfacets=None,
+               percentcutoff=None, weightcutoff=None,
+               query_type=None):
+        """Perform a search, for documents matching a query.
+
+        - `query` is the query to perform.
+        - `startrank` is the rank of the start of the range of matching
+          documents to return (ie, the result with this rank will be returned).
+          ranks start at 0, which represents the "best" matching document.
+        - `endrank` is the rank at the end of the range of matching documents
+          to return.  This is exclusive, so the result with this rank will not
+          be returned.
+        - `checkatleast` is the minimum number of results to check for: the
+          estimate of the total number of matches will always be exact if
+          the number of matches is less than `checkatleast`.  A value of ``-1``
+          can be specified for the checkatleast parameter - this has the
+          special meaning of "check all matches", and is equivalent to passing
+          the result of get_doccount().
+        - `sortby` is the name of a field to sort by.  It may be preceded by a
+          '+' or a '-' to indicate ascending or descending order
+          (respectively).  If the first character is neither '+' or '-', the
+          sort will be in ascending order.
+        - `collapse` is the name of a field to collapse the result documents
+          on.  If this is specified, there will be at most one result in the
+          result set for each value of the field.
+        - `gettags` is the name of a field to count tag occurrences in, or a
+          list of fields to do so.
+        - `getfacets` is a boolean - if True, the matching documents will be
+          examined to build up a list of the facet values contained in them.
+        - `allowfacets` is a list of the fieldnames of facets to consider.
+        - `denyfacets` is a list of fieldnames of facets which will not be
+          considered.
+        - `usesubfacets` is a boolean - if True, only top-level facets and
+          subfacets of facets appearing in the query are considered (taking
+          precedence over `allowfacets` and `denyfacets`).
+        - `percentcutoff` is the minimum percentage a result must have to be
+          returned.
+        - `weightcutoff` is the minimum weight a result must have to be
+          returned.
+        - `query_type` is a value indicating the type of query being
+          performed. If not None, the value is used to influence which facets
+          are be returned by the get_suggested_facets() function. If the
+          value of `getfacets` is False, it has no effect.
+
+        If neither 'allowfacets' or 'denyfacets' is specified, all fields
+        holding facets will be considered (but see 'usesubfacets').
+
+        """
+        if self._index is None:
+            raise _errors.SearchError("SearchConnection has been closed")
+        if 'facets' in _checkxapian.missing_features:
+            if getfacets is not None or \
+               allowfacets is not None or \
+               denyfacets is not None or \
+               usesubfacets is not None or \
+               query_type is not None:
+                raise errors.SearchError("Facets unsupported with this release of xapian")
+        if 'tags' in _checkxapian.missing_features:
+            if gettags is not None:
+                raise errors.SearchError("Tags unsupported with this release of xapian")
+        if checkatleast == -1:
+            checkatleast = self._index.get_doccount()
+
+        enq = _log(_xapian.Enquire, self._index)
+        enq.set_query(query)
+
+        if sortby is not None:
+            asc = True
+            if sortby[0] == '-':
+                asc = False
+                sortby = sortby[1:]
+            elif sortby[0] == '+':
+                sortby = sortby[1:]
+
+            try:
+                slotnum = self._field_mappings.get_slot(sortby, 'collsort')
+            except KeyError:
+                raise _errors.SearchError("Field %r was not indexed for sorting" % sortby)
+
+            # Note: we invert the "asc" parameter, because xapian treats
+            # "ascending" as meaning "higher values are better"; in other
+            # words, it considers "ascending" to mean return results in
+            # descending order.
+            enq.set_sort_by_value_then_relevance(slotnum, not asc)
+
+        if collapse is not None:
+            try:
+                slotnum = self._field_mappings.get_slot(collapse, 'collsort')
+            except KeyError:
+                raise _errors.SearchError("Field %r was not indexed for collapsing" % collapse)
+            enq.set_collapse_key(slotnum)
+
+        maxitems = max(endrank - startrank, 0)
+        # Always check for at least one more result, so we can report whether
+        # there are more matches.
+        checkatleast = max(checkatleast, endrank + 1)
+
+        # Build the matchspy.
+        matchspies = []
+
+        # First, add a matchspy for any gettags fields
+        if isinstance(gettags, basestring):
+            if len(gettags) != 0:
+                gettags = [gettags]
+        tagspy = None
+        if gettags is not None and len(gettags) != 0:
+            tagspy = _log(_xapian.TermCountMatchSpy)
+            for field in gettags:
+                try:
+                    prefix = self._field_mappings.get_prefix(field)
+                    tagspy.add_prefix(prefix)
+                except KeyError:
+                    raise _errors.SearchError("Field %r was not indexed for tagging" % field)
+            matchspies.append(tagspy)
+
+
+        # add a matchspy for facet selection here.
+        facetspy = None
+        facetfields = []
+        if getfacets:
+            if allowfacets is not None and denyfacets is not None:
+                raise _errors.SearchError("Cannot specify both `allowfacets` and `denyfacets`")
+            if allowfacets is None:
+                allowfacets = [key for key in self._field_actions]
+            if denyfacets is not None:
+                allowfacets = [key for key in allowfacets if key not in denyfacets]
+
+            # include None in queryfacets so a top-level facet will
+            # satisfy self._facet_hierarchy.get(field) in queryfacets
+            # (i.e. always include top-level facets)
+            queryfacets = set([None])
+            if usesubfacets:
+                # add facets used in the query to queryfacets
+                termsiter = query.get_terms_begin()
+                termsend = query.get_terms_end()
+                while termsiter != termsend:
+                    prefix = self._get_prefix_from_term(termsiter.get_term())
+                    field = self._field_mappings.get_fieldname_from_prefix(prefix)
+                    if field and FieldActions.FACET in self._field_actions[field]._actions:
+                        queryfacets.add(field)
+                    termsiter.next()
+
+            for field in allowfacets:
+                try:
+                    actions = self._field_actions[field]._actions
+                except KeyError:
+                    actions = {}
+                for action, kwargslist in actions.iteritems():
+                    if action == FieldActions.FACET:
+                        # filter out non-top-level facets that aren't subfacets
+                        # of a facet in the query
+                        if usesubfacets and self._facet_hierarchy.get(field) not in queryfacets:
+                            continue
+                        # filter out facets that should never be returned for the query type
+                        if self._facet_query_never(field, query_type):
+                            continue
+                        slot = self._field_mappings.get_slot(field, 'facet')
+                        if facetspy is None:
+                            facetspy = _log(_xapian.CategorySelectMatchSpy)
+                        facettype = None
+                        for kwargs in kwargslist:
+                            facettype = kwargs.get('type', None)
+                            if facettype is not None:
+                                break
+                        if facettype is None or facettype == 'string':
+                            facetspy.add_slot(slot, True)
+                        else:
+                            facetspy.add_slot(slot)
+                        facetfields.append((field, slot, kwargslist))
+
+            if facetspy is None:
+                # Set facetspy to False, to distinguish from no facet
+                # calculation being performed.  (This will prevent an
+                # error being thrown when the list of suggested facets is
+                # requested - instead, an empty list will be returned.)
+                facetspy = False
+            else:
+                matchspies.append(facetspy)
+
+
+        # Finally, build a single matchspy to pass to get_mset().
+        if len(matchspies) == 0:
+            matchspy = None
+        elif len(matchspies) == 1:
+            matchspy = matchspies[0]
+        else:
+            matchspy = _log(_xapian.MultipleMatchDecider)
+            for spy in matchspies:
+                matchspy.append(spy)
+
+        enq.set_docid_order(enq.DONT_CARE)
+
+        # Set percentage and weight cutoffs
+        if percentcutoff is not None or weightcutoff is not None:
+            if percentcutoff is None:
+                percentcutoff = 0
+            if weightcutoff is None:
+                weightcutoff = 0
+            enq.set_cutoff(percentcutoff, weightcutoff)
+
+        # Repeat the search until we don't get a DatabaseModifiedError
+        while True:
+            try:
+                if matchspy is None:
+                    mset = enq.get_mset(startrank, maxitems, checkatleast)
+                else:
+                    mset = enq.get_mset(startrank, maxitems, checkatleast,
+                                        None, None, matchspy)
+                break
+            except _xapian.DatabaseModifiedError, e:
+                self.reopen()
+        facet_hierarchy = None
+        if usesubfacets:
+            facet_hierarchy = self._facet_hierarchy
+            
+        return SearchResults(self, enq, query, mset, self._field_mappings,
+                             tagspy, gettags, facetspy, facetfields,
+                             facet_hierarchy,
+                             self._facet_query_table.get(query_type))
+
+    def iterids(self):
+        """Get an iterator which returns all the ids in the database.
+
+        The unqiue_ids are currently returned in binary lexicographical sort
+        order, but this should not be relied on.
+
+        Note that the iterator returned by this method may raise a
+        xapian.DatabaseModifiedError exception if modifications are committed
+        to the database while the iteration is in progress.  If this happens,
+        the search connection must be reopened (by calling reopen) and the
+        iteration restarted.
+
+        """
+        if self._index is None:
+            raise _errors.SearchError("SearchConnection has been closed")
+        return _indexerconnection.PrefixedTermIter('Q', self._index.allterms())
+
+    def get_document(self, id):
+        """Get the document with the specified unique ID.
+
+        Raises a KeyError if there is no such document.  Otherwise, it returns
+        a ProcessedDocument.
+
+        """
+        if self._index is None:
+            raise _errors.SearchError("SearchConnection has been closed")
+        while True:
+            try:
+                postlist = self._index.postlist('Q' + id)
+                try:
+                    plitem = postlist.next()
+                except StopIteration:
+                    # Unique ID not found
+                    raise KeyError('Unique ID %r not found' % id)
+                try:
+                    postlist.next()
+                    raise _errors.IndexerError("Multiple documents " #pragma: no cover
+                                               "found with same unique ID")
+                except StopIteration:
+                    # Only one instance of the unique ID found, as it should be.
+                    pass
+
+                result = ProcessedDocument(self._field_mappings)
+                result.id = id
+                result._doc = self._index.get_document(plitem.docid)
+                return result
+            except _xapian.DatabaseModifiedError, e:
+                self.reopen()
+
+    def iter_synonyms(self, prefix=""):
+        """Get an iterator over the synonyms.
+
+         - `prefix`: if specified, only synonym keys with this prefix will be
+           returned.
+
+        The iterator returns 2-tuples, in which the first item is the key (ie,
+        a 2-tuple holding the term or terms which will be synonym expanded,
+        followed by the fieldname specified (or None if no fieldname)), and the
+        second item is a tuple of strings holding the synonyms for the first
+        item.
+
+        These return values are suitable for the dict() builtin, so you can
+        write things like:
+
+         >>> conn = _indexerconnection.IndexerConnection('foo')
+         >>> conn.add_synonym('foo', 'bar')
+         >>> conn.add_synonym('foo bar', 'baz')
+         >>> conn.add_synonym('foo bar', 'foo baz')
+         >>> conn.flush()
+         >>> conn = SearchConnection('foo')
+         >>> dict(conn.iter_synonyms())
+         {('foo', None): ('bar',), ('foo bar', None): ('baz', 'foo baz')}
+
+        """
+        if self._index is None:
+            raise _errors.SearchError("SearchConnection has been closed")
+        return _indexerconnection.SynonymIter(self._index, self._field_mappings, prefix)
+
+    def get_metadata(self, key):
+        """Get an item of metadata stored in the connection.
+
+        This returns a value stored by a previous call to
+        IndexerConnection.set_metadata.
+
+        If the value is not found, this will return the empty string.
+
+        """
+        if self._index is None:
+            raise _errors.IndexerError("SearchConnection has been closed")
+        if not hasattr(self._index, 'get_metadata'):
+            raise _errors.IndexerError("Version of xapian in use does not support metadata")
+        return _log(self._index.get_metadata, key)
+
+if __name__ == '__main__':
+    import doctest, sys
+    doctest.testmod (sys.modules[__name__])
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/searchconnection_doctest1.txt	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,131 @@
+
+>>> from datastructures import *
+>>> from fieldactions import *
+>>> from indexerconnection import *
+
+
+Open a connection for indexing:
+>>> iconn = IndexerConnection('foo')
+
+>>> iconn.add_field_action('author', FieldActions.STORE_CONTENT)
+>>> iconn.add_field_action('title', FieldActions.STORE_CONTENT)
+>>> iconn.add_field_action('category', FieldActions.STORE_CONTENT)
+>>> iconn.add_field_action('text', FieldActions.STORE_CONTENT)
+
+>>> iconn.add_field_action('author', FieldActions.INDEX_FREETEXT, weight=2)
+>>> iconn.add_field_action('title', FieldActions.INDEX_FREETEXT, weight=5)
+>>> iconn.add_field_action('category', FieldActions.INDEX_EXACT)
+>>> iconn.add_field_action('category', FieldActions.SORTABLE)
+>>> iconn.add_field_action('category', FieldActions.COLLAPSE)
+>>> iconn.add_field_action('text', FieldActions.INDEX_FREETEXT, language='en')
+
+Add a set of documents:
+
+>>> for i in xrange(200):
+...     doc = UnprocessedDocument()
+...     doc.fields.append(Field('author', 'Richard Boulton'))
+...     doc.fields.append(Field('category', 'Cat %d' % ((i + 5) % 20)))
+...     doc.fields.append(Field('text', 'This document is a basic test document.'))
+...     doc.fields.append(Field('title', 'Test document %d' % i))
+...     doc.fields.append(Field('text', 'More test text about this document.'))
+...     id = iconn.add(doc)
+
+We can get a document from the indexer connection, even before flushing, by
+using the get_document method.  If the id specified is not found, an error is
+raised.
+>>> iconn.get_document('1').data['category']
+['Cat 6']
+>>> print iconn.get_document('1000').data['category']
+Traceback (most recent call last):
+...
+KeyError: "Unique ID '1000' not found"
+
+If we open a search connection for a database which doesn't exist, we get an
+exception:
+>>> sconn = SearchConnection('notpresent')
+Traceback (most recent call last):
+...
+DatabaseOpeningError: Couldn't detect type of database
+
+If we open a search connection before flushing, we can't see the recent
+modifications:
+>>> sconn = SearchConnection('foo')
+>>> sconn.get_document('1').data['category']
+Traceback (most recent call last):
+...
+KeyError: "Unique ID '1' not found"
+
+
+
+Finally, we get round to flushing the indexer:
+>>> iconn.flush()
+
+We still can't see the document from the search connection.
+>>> sconn.get_document('1').data['category']
+Traceback (most recent call last):
+...
+KeyError: "Unique ID '1' not found"
+
+Now, open a new search connection - we can see the document:
+>>> sconn = SearchConnection('foo')
+>>> sconn.get_document('1').data['category']
+['Cat 6']
+
+>>> q = sconn.query_parse('document')
+>>> results = sconn.search(q, 0, 30)
+>>> len(results)
+30
+>>> [result.id for result in results]
+['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '1a', '1b', '1c', '1d']
+
+>>> result = results.get_hit(0)
+>>> result.data['text']
+['This document is a basic test document.', 'More test text about this document.']
+>>> result.highlight('text')
+['This <b>document</b> is a basic test <b>document</b>.', 'More test text about this <b>document</b>.']
+>>> result.summarise('text')
+'This <b>document</b> is a basic test <b>document</b>.\nMore test text about this <b>document</b>.'
+>>> result.summarise('text', maxlen=20)
+'This <b>document</b> is a ..'
+>>> result.summarise('title', maxlen=20)
+'Test <b>document</b> 0'
+
+
+If we collapse on categories, we just get the top result in each category:
+>>> [result.id for result in sconn.search(q, 0, 30, collapse='category')]
+['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', '10', '11', '12', '13']
+
+We can't collapse on categories which we're indexed for it:
+>>> [result.id for result in sconn.search(q, 0, 30, collapse='author')]
+Traceback (most recent call last):
+...
+SearchError: Field 'author' was not indexed for collapsing
+
+If we sort by category, we get a different order of results:
+>>> [':'.join((result.id, result.data['category'][0])) for result in sconn.search(q, 0, 30, sortby='-category')]
+['4:Cat 9', '18:Cat 9', '2c:Cat 9', '40:Cat 9', '54:Cat 9', '68:Cat 9', '7c:Cat 9', '90:Cat 9', 'a4:Cat 9', 'b8:Cat 9', '3:Cat 8', '17:Cat 8', '2b:Cat 8', '3f:Cat 8', '53:Cat 8', '67:Cat 8', '7b:Cat 8', '8f:Cat 8', 'a3:Cat 8', 'b7:Cat 8', '2:Cat 7', '16:Cat 7', '2a:Cat 7', '3e:Cat 7', '52:Cat 7', '66:Cat 7', '7a:Cat 7', '8e:Cat 7', 'a2:Cat 7', 'b6:Cat 7']
+
+We can sort in ascending order instead:
+>>> [':'.join((result.id, result.data['category'][0])) for result in sconn.search(q, 0, 30, sortby='+category')]
+['f:Cat 0', '23:Cat 0', '37:Cat 0', '4b:Cat 0', '5f:Cat 0', '73:Cat 0', '87:Cat 0', '9b:Cat 0', 'af:Cat 0', 'c3:Cat 0', '10:Cat 1', '24:Cat 1', '38:Cat 1', '4c:Cat 1', '60:Cat 1', '74:Cat 1', '88:Cat 1', '9c:Cat 1', 'b0:Cat 1', 'c4:Cat 1', '5:Cat 10', '19:Cat 10', '2d:Cat 10', '41:Cat 10', '55:Cat 10', '69:Cat 10', '7d:Cat 10', '91:Cat 10', 'a5:Cat 10', 'b9:Cat 10']
+
+Ascending order is the default, so we don't actually need the '+':
+>>> [':'.join((result.id, result.data['category'][0])) for result in sconn.search(q, 0, 30, sortby='category')]
+['f:Cat 0', '23:Cat 0', '37:Cat 0', '4b:Cat 0', '5f:Cat 0', '73:Cat 0', '87:Cat 0', '9b:Cat 0', 'af:Cat 0', 'c3:Cat 0', '10:Cat 1', '24:Cat 1', '38:Cat 1', '4c:Cat 1', '60:Cat 1', '74:Cat 1', '88:Cat 1', '9c:Cat 1', 'b0:Cat 1', 'c4:Cat 1', '5:Cat 10', '19:Cat 10', '2d:Cat 10', '41:Cat 10', '55:Cat 10', '69:Cat 10', '7d:Cat 10', '91:Cat 10', 'a5:Cat 10', 'b9:Cat 10']
+
+
+
+We can't collapse on categories which we're indexed for it:
+>>> [result.id for result in sconn.search(q, 0, 30, sortby='author')]
+Traceback (most recent call last):
+...
+SearchError: Field 'author' was not indexed for sorting
+
+
+We can collapse and sort in a single search:
+>>> [':'.join((result.id, result.data['category'][0])) for result in sconn.search(q, 0, 30, collapse="category", sortby='-category')]
+['4:Cat 9', '3:Cat 8', '2:Cat 7', '1:Cat 6', '0:Cat 5', '13:Cat 4', '12:Cat 3', '11:Cat 2', 'e:Cat 19', 'd:Cat 18', 'c:Cat 17', 'b:Cat 16', 'a:Cat 15', '9:Cat 14', '8:Cat 13', '7:Cat 12', '6:Cat 11', '5:Cat 10', '10:Cat 1', 'f:Cat 0']
+
+
+Tidy up after ourselves:
+>>> sconn.close()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/searchconnection_doctest2.txt	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,506 @@
+
+>>> from datastructures import *
+>>> from fieldactions import *
+>>> from indexerconnection import *
+
+
+Open a connection for indexing:
+>>> iconn = IndexerConnection('foo')
+
+>>> iconn.add_field_action('author', FieldActions.STORE_CONTENT)
+>>> iconn.add_field_action('title', FieldActions.STORE_CONTENT)
+>>> iconn.add_field_action('category', FieldActions.STORE_CONTENT)
+>>> iconn.add_field_action('text', FieldActions.STORE_CONTENT)
+
+>>> iconn.add_field_action('author', FieldActions.INDEX_FREETEXT, weight=2)
+>>> iconn.add_field_action('title', FieldActions.INDEX_FREETEXT, weight=5)
+>>> iconn.add_field_action('category', FieldActions.INDEX_EXACT)
+>>> iconn.add_field_action('category', FieldActions.SORTABLE)
+>>> iconn.add_field_action('category', FieldActions.COLLAPSE)
+>>> iconn.add_field_action('category', FieldActions.FACET)
+>>> iconn.add_field_action('text', FieldActions.INDEX_FREETEXT, language='en',
+...                        spell=True, stop=('basic',))
+
+>>> iconn.add_field_action('date', FieldActions.STORE_CONTENT)
+>>> iconn.add_field_action('date', FieldActions.COLLAPSE)
+>>> iconn.add_field_action('date', FieldActions.SORTABLE, type='date')
+>>> iconn.add_field_action('date', FieldActions.COLLAPSE)
+>>> iconn.add_field_action('price', FieldActions.STORE_CONTENT)
+>>> iconn.add_field_action('price', FieldActions.SORTABLE, type='float')
+>>> iconn.add_field_action('price', FieldActions.COLLAPSE)
+>>> iconn.add_field_action('price', FieldActions.FACET, type='float')
+>>> iconn.add_field_action('price3', FieldActions.SORTABLE, type='float')
+>>> iconn.add_field_action('price3', FieldActions.FACET, type='float')
+>>> iconn.add_field_action('price3', FieldActions.STORE_CONTENT)
+
+>>> iconn.add_field_action('facet1', FieldActions.FACET)
+>>> iconn.add_field_action('facet2', FieldActions.FACET)
+>>> iconn.add_field_action('facet3', FieldActions.FACET)
+>>> iconn.add_field_action('facet4', FieldActions.FACET, type='float')
+>>> iconn.add_field_action('facet5', FieldActions.FACET)
+>>> iconn.add_field_action('facet6', FieldActions.FACET)
+>>> iconn.add_field_action('facet7', FieldActions.FACET)
+>>> iconn.add_field_action('facet8', FieldActions.FACET, type='float')
+
+>>> iconn.add_field_action('tag', FieldActions.TAG)
+
+# Add this, for a regression test.
+>>> iconn.add_field_action('facet9', FieldActions.FACET, type='float')
+>>> iconn.add_field_action('facet9', FieldActions.SORTABLE)
+
+
+A field can only be sorted according to one type:
+>>> iconn.add_field_action('date', FieldActions.SORTABLE, type='float')
+Traceback (most recent call last):
+...
+IndexerError: Field 'date' is already marked for sorting, with a different sort type
+
+
+If we set the sort type to an unknown value, we get errors when it is used:
+
+>>> iconn.add_field_action('price2', FieldActions.SORTABLE, type='unknown')
+>>> doc = UnprocessedDocument()
+>>> doc.fields.append(Field('price2', '1.0'))
+>>> iconn.process(doc)
+Traceback (most recent call last):
+...
+IndexerError: Unknown sort type 'unknown' for field 'price2'
+
+
+Make another database which doesn't have any facet fields::
+
+>>> iconn2 = IndexerConnection('foo2')
+>>> iconn2.add_field_action('author', FieldActions.STORE_CONTENT)
+
+
+Add a set of documents, which dates and prices, to test sorting:
+
+>>> for i in xrange(200):
+...     doc = UnprocessedDocument()
+...     doc.fields.append(Field('author', 'Richard Boulton'))
+...     doc.fields.append(Field('category', 'Cat %d' % ((i + 5) % 20)))
+...     doc.fields.append(Field('text', 'This document is a basic test document.'))
+...     doc.fields.append(Field('title', 'Test document %d' % i))
+...     doc.fields.append(Field('text', 'More test text about this document.'))
+...     doc.fields.append(Field('date', '2007%02d%02d' % (i % 12 + 1, i // 12 + 1)))
+...     doc.fields.append(Field('price', '%f' % ((float(i) / 7) % 10)))
+...     doc.fields.append(Field('price3', '%f' % ((float(i) * 6.7))))
+...     doc.fields.append(Field('facet1', '%d' % (i // 40)))
+...     doc.fields.append(Field('facet2', '%d' % (i // 20)))
+...     doc.fields.append(Field('facet3', '%d' % (i // 12)))
+...     doc.fields.append(Field('facet4', '%d' % (i // 8)))
+...     doc.fields.append(Field('facet5', '%d' % (i // 5)))
+...     doc.fields.append(Field('facet6', '0'))
+...     doc.fields.append(Field('facet7', '2000'))
+...     doc.fields.append(Field('facet7', '2001'))
+...     doc.fields.append(Field('facet7', '%d' % (i % 2)))
+...     doc.fields.append(Field('facet8', '2000'))
+...     doc.fields.append(Field('facet8', '2001'))
+...     doc.fields.append(Field('facet8', '%d' % (i % 2)))
+...     doc.fields.append(Field('facet9', '%d' % (i // 5)))
+...     doc.fields.append(Field('tag', '%d' % (i % 5)))
+...     doc.fields.append(Field('tag', '%d' % (i % 9)))
+...     doc.fields.append(Field('tag', '%d' % (i // 5)))
+...     id = iconn.add(doc)
+...     id = iconn2.add(doc)
+
+
+Add some synonyms:
+
+>>> iconn.add_synonym('document', 'record')
+>>> iconn.add_synonym('basic test', 'exam', original_field='text')
+>>> iconn.add_synonym('document', 'notrecord')
+>>> iconn.add_synonym('documents', 'notrecord')
+>>> iconn.remove_synonym('document', 'notrecord')
+>>> iconn.clear_synonyms('documents')
+
+>>> iconn.flush()
+
+>>> dict(iconn.iter_synonyms())
+{('document', None): ('record',), ('basic test', 'text'): ('exam',)}
+>>> dict(iconn.iter_synonyms('doc'))
+{('document', None): ('record',)}
+>>> dict(iconn.iter_synonyms('toc'))
+{}
+
+
+
+
+Now, open a search connection:
+>>> sconn = SearchConnection('foo')
+>>> sconn2 = SearchConnection('foo2')
+
+We can append a close handler to notify us when the connection is closed.
+>>> def closehandler(path, userdata):
+...     print "Closing connection at path %s: %s" % (path, userdata)
+>>> sconn.append_close_handler(closehandler, "Conn1")
+>>> sconn2.append_close_handler(closehandler, "Conn2")
+
+First, check the fallback handling for queries with invalid boolean
+operations:
+>>> q = sconn.query_parse('AND document')
+>>> str(q)
+'Xapian::Query(((and:(pos=1) AND (Zdocument:(pos=2) SYNONYM record:(pos=2))) AND_MAYBE (and:(pos=1) AND document:(pos=2))))'
+
+Check that spelling correction works:
+>>> sconn.spell_correct('docment')
+'document'
+>>> sconn.spell_correct('document')
+'document'
+>>> sconn.spell_correct(u'docment')
+'document'
+>>> sconn.spell_correct(u'document')
+'document'
+
+Check that stopwording worked:
+>>> q = sconn.query_parse('basic')
+>>> results = sconn.search(q, 0, 30)
+>>> [result.id for result in results]
+[]
+
+Check that synonyms work:
+>>> dict(sconn.iter_synonyms())
+{('document', None): ('record',), ('basic test', 'text'): ('exam',)}
+>>> q = sconn.query_parse('document')
+>>> str(q)
+'Xapian::Query(((Zdocument:(pos=1) SYNONYM record:(pos=1)) AND_MAYBE document:(pos=1)))'
+
+
+Remove the synonyms for the remaining tests:
+>>> iconn.clear_synonyms('document')
+>>> iconn.clear_synonyms('basic test', field='text')
+>>> iconn.flush()
+>>> sconn.reopen()
+>>> dict(sconn.iter_synonyms())
+{}
+
+Now, parse a simple query.
+>>> q = sconn.query_parse('document')
+>>> str(q)
+'Xapian::Query((Zdocument:(pos=1) AND_MAYBE document:(pos=1)))'
+>>> results = sconn.search(q, 0, 30)
+>>> [result.id for result in results]
+['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '1a', '1b', '1c', '1d']
+
+>>> results = sconn.search(q, 0, 30, sortby="price")
+>>> prev_price = results[0].data['price']
+>>> for price in (result.data['price'] for result in results):
+...     assert(price >= prev_price)
+...     prev_price = price
+>>> [int(result.id, 16) for result in results]
+[0, 70, 140, 1, 71, 141, 2, 72, 142, 3, 73, 143, 4, 74, 144, 5, 75, 145, 6, 76, 146, 7, 77, 147, 8, 78, 148, 9, 79, 149]
+>>> [result.data['price'] for result in results]
+[['0.000000'], ['0.000000'], ['0.000000'], ['0.142857'], ['0.142857'], ['0.142857'], ['0.285714'], ['0.285714'], ['0.285714'], ['0.428571'], ['0.428571'], ['0.428571'], ['0.571429'], ['0.571429'], ['0.571429'], ['0.714286'], ['0.714286'], ['0.714286'], ['0.857143'], ['0.857143'], ['0.857143'], ['1.000000'], ['1.000000'], ['1.000000'], ['1.142857'], ['1.142857'], ['1.142857'], ['1.285714'], ['1.285714'], ['1.285714']]
+
+>>> results = sconn.search(q, 0, 30, sortby="-price")
+>>> prev_price = results[0].data['price']
+>>> for price in (result.data['price'] for result in results):
+...     assert(price <= prev_price)
+...     prev_price = price
+>>> [int(result.id, 16) for result in results]
+[69, 139, 68, 138, 67, 137, 66, 136, 65, 135, 64, 134, 63, 133, 62, 132, 61, 131, 60, 130, 59, 129, 199, 58, 128, 198, 57, 127, 197, 56]
+
+
+>>> results = sconn.search(q, 0, 30, sortby="date")
+>>> prev_date = results[0].data['date']
+>>> for date in (result.data['date'] for result in results):
+...     assert(date >= prev_date)
+...     prev_date = date
+>>> [int(result.id, 16) for result in results]
+[0, 12, 24, 36, 48, 60, 72, 84, 96, 108, 120, 132, 144, 156, 168, 180, 192, 1, 13, 25, 37, 49, 61, 73, 85, 97, 109, 121, 133, 145]
+
+>>> results = sconn.search(q, 0, 30, sortby="-date")
+>>> prev_date = results[0].data['date']
+>>> for date in (result.data['date'] for result in results):
+...     assert(date <= prev_date)
+...     prev_date = date
+>>> [int(result.id, 16) for result in results]
+[191, 179, 167, 155, 143, 131, 119, 107, 95, 83, 71, 59, 47, 35, 23, 11, 190, 178, 166, 154, 142, 130, 118, 106, 94, 82, 70, 58, 46, 34]
+
+
+
+Get a list of the facets and tags relevant for the search
+>>> results2 = sconn.search(sconn.query_all(), 0, 30, checkatleast=200,
+...                         sortby="-date", gettags=('tag'), getfacets=True)
+>>> [int(result.id, 16) for result in results2]
+[191, 179, 167, 155, 143, 131, 119, 107, 95, 83, 71, 59, 47, 35, 23, 11, 190, 178, 166, 154, 142, 130, 118, 106, 94, 82, 70, 58, 46, 34]
+>>> results2.get_top_tags('tag', 8)
+[('0', 62), ('1', 62), ('3', 61), ('2', 60), ('4', 60), ('5', 27), ('7', 27), ('6', 26)]
+
+>>> [(facet[0], len(facet[1])) for facet in results2.get_suggested_facets(maxfacets=10)]
+[('price3', 7), ('facet1', 5), ('facet4', 5), ('facet2', 10), ('facet9', 4), ('price', 4), ('facet8', 2), ('facet3', 17), ('category', 20), ('facet5', 40)]
+>>> [(facet[0], facet[1]) for facet in results2.get_suggested_facets(maxfacets=5)]
+[('price3', [((0.0, 194.30000000000001), 30), ((201.0, 395.30000000000001), 30), ((402.0, 596.29999999999995), 30), ((603.0, 797.29999999999995), 30), ((804.0, 998.29999999999995), 30), ((1005.0, 1199.3), 30), ((1206.0, 1333.3), 20)]), ('facet1', [('0', 40), ('1', 40), ('2', 40), ('3', 40), ('4', 40)]), ('facet4', [((0.0, 4.0), 40), ((5.0, 9.0), 40), ((10.0, 14.0), 40), ((15.0, 19.0), 40), ((20.0, 24.0), 40)]), ('facet2', [('0', 20), ('1', 20), ('2', 20), ('3', 20), ('4', 20), ('5', 20), ('6', 20), ('7', 20), ('8', 20), ('9', 20)]), ('facet9', [((0.0, 9.0), 50), ((10.0, 19.0), 50), ((20.0, 29.0), 50), ((30.0, 39.0), 50)])]
+
+>>> [(facet[0], len(facet[1])) for facet in results2.get_suggested_facets(maxfacets=5, required_facets='price3')]
+[('price3', 7), ('facet1', 5), ('facet4', 5), ('facet2', 10), ('facet9', 4)]
+
+>>> [(facet[0], len(facet[1])) for facet in results2.get_suggested_facets(maxfacets=5, required_facets='facet8')]
+[('price3', 7), ('facet1', 5), ('facet4', 5), ('facet2', 10), ('facet8', 2)]
+
+>>> [(facet[0], len(facet[1])) for facet in results2.get_suggested_facets(maxfacets=5, required_facets=('facet8', 'price', 'facet7'))]
+[('price3', 7), ('facet1', 5), ('price', 4), ('facet8', 2), ('facet7', 4)]
+
+>>> [(facet[0], len(facet[1])) for facet in results2.get_suggested_facets(maxfacets=5, required_facets=('facet8', 'price', 'facet7', 'price3', 'facet1'))]
+[('price3', 7), ('facet1', 5), ('price', 4), ('facet8', 2), ('facet7', 4)]
+
+>>> [(facet[0], len(facet[1])) for facet in results2.get_suggested_facets(maxfacets=5, required_facets=('facet8', 'price', 'facet7', 'price3', 'facet1', 'facet4'))]
+[('price3', 7), ('facet1', 5), ('facet4', 5), ('price', 4), ('facet8', 2), ('facet7', 4)]
+
+
+We can use a facet to restrict the search results:
+
+>>> results3 = sconn.search(sconn.query_facet('price3', (0.0, 200.0)), 0, 30,
+...                         checkatleast=200, getfacets=True)
+
+
+Check that the restriction was satisfied by all the results:
+
+>>> False in [float(result.data['price3'][0]) <= 200 for result in results3]
+False
+
+
+Getting the list of facets when there is a facet restriction in place will
+return a different selection (based on the documents satisfying the
+restriction):
+
+>>> [(facet[0], len(facet[1])) for facet in results3.get_suggested_facets(maxfacets=5)]
+[('facet5', 6), ('facet9', 6), ('price', 5), ('price3', 4), ('facet4', 4)]
+
+The suggestions for the facet we've already restricted by are for sub-values
+within the range:
+>>> results3.get_suggested_facets(maxfacets=5)[3]
+('price3', [((0.0, 46.899999999999999), 8), ((53.600000000000001, 93.799999999999997), 7), ((100.5, 147.40000000000001), 8), ((154.09999999999999, 194.30000000000001), 7)])
+
+
+Regression test: this used to give an error
+>>> results3 = sconn.search(sconn.query_facet('facet9', (0.0, 5.0)), 0, 30,
+...                         checkatleast=200, getfacets=True)
+
+
+A facet which only contains one value in the matching documents will never be
+returned as a suggestion::
+
+>>> results3 = sconn.search(sconn.query_facet('facet5', '5'), 0, 30,
+...                         checkatleast=200, getfacets=True,
+...                         allowfacets=('facet5', 'facet6'))
+>>> results3.matches_estimated
+5
+>>> results3.get_suggested_facets()
+[]
+
+
+Facet fields may contain multiple values in a single document, unless the type
+is "float" (in which case, only the final value specified in a given document
+will be stored).  Therefore, we expect facet8 to _not_ include the 2000 and
+2001 values, but facet7 should include them:
+
+>>> results3 = sconn.search(sconn.query_all(), 0, 30,
+...                         checkatleast=200, getfacets=True,
+...                         allowfacets=('facet7', 'facet8'))
+>>> results3.get_suggested_facets()
+[('facet8', [((0.0, 0.0), 100), ((1.0, 1.0), 100)]), ('facet7', [('0', 100), ('1', 100), ('2000', 200), ('2001', 200)])]
+
+
+Even if the database doesn't contain any facets, getting the list of suggested
+facets should return an empty list (this is a regression test - this used to
+raise an exception).
+
+>>> results3 = sconn2.search(sconn2.query_all(), 0, 30,
+...                          checkatleast=200, getfacets=True)
+>>> results3.get_suggested_facets()
+[]
+
+
+We can also filter the results by a range of the sortable values - for
+example, dates:
+
+>>> fq = sconn.query_filter(q, sconn.query_range('date', '20070205', '20070207'))
+>>> results = sconn.search(fq, 0, 30, sortby="date")
+>>> [int(result.id, 16) for result in results]
+[49, 61, 73]
+>>> for result in results:
+...     print "%r,%r" % (result.data['date'], result.get_value('date', 'collsort'))
+['20070205'],'20070205'
+['20070206'],'20070206'
+['20070207'],'20070207'
+
+
+We can specify semi-infinite ranges by specifying None for one of the
+endpoints:
+>>> fq = sconn.query_filter(q, sconn.query_range('date', None, '20070104'))
+>>> results = sconn.search(fq, 0, 30, sortby="date")
+>>> for result in results:
+...     print "%r,%r" % (result.data['date'], result.get_value('date', 'collsort'))
+['20070101'],'20070101'
+['20070102'],'20070102'
+['20070103'],'20070103'
+['20070104'],'20070104'
+
+
+>>> fq = sconn.query_filter(q, sconn.query_range('date', '20071214', None))
+>>> results = sconn.search(fq, 0, 30, sortby="date")
+>>> for result in results:
+...     print "%r,%r" % (result.data['date'], result.get_value('date', 'collsort'))
+['20071214'],'20071214'
+['20071215'],'20071215'
+['20071216'],'20071216'
+
+
+We can use a filter to exclude results which match a particular sub-query,
+instead of to include only those which match.
+
+>>> fq = sconn.query_filter(q, sconn.query_range('date', '20070105', '20071214'), exclude=True)
+>>> results = sconn.search(fq, 0, 30, sortby="date")
+>>> [int(result.id, 16) for result in results]
+[0, 12, 24, 36, 179, 191]
+>>> for result in results:
+...     print "%r,%r" % (result.data['date'], result.get_value('date', 'collsort'))
+['20070101'],'20070101'
+['20070102'],'20070102'
+['20070103'],'20070103'
+['20070104'],'20070104'
+['20071215'],'20071215'
+['20071216'],'20071216'
+
+
+Or we can restrict by numerical range:
+>>> fq = sconn.query_filter(q, sconn.query_range('price', '0.1428', '0.5'))
+>>> results = sconn.search(fq, 0, 30, sortby="date")
+>>> [int(result.id, 16) for result in results]
+[72, 1, 73, 2, 3, 141, 142, 71, 143]
+>>> [(result.data['price'][0]) for result in results]
+['0.285714', '0.142857', '0.428571', '0.285714', '0.428571', '0.142857', '0.285714', '0.142857', '0.428571']
+
+>>> fq = sconn.query_range('price', '0.1428', '0.5')
+>>> results = sconn.search(fq, 0, 30, sortby="date")
+>>> [int(result.id, 16) for result in results]
+[72, 1, 73, 2, 3, 141, 142, 71, 143]
+
+
+If the end of the range is lower than the start, no results can match
+>>> fq = sconn.query_filter(q, sconn.query_range('price', '0.5', '0.1428'))
+>>> results = sconn.search(fq, 0, 30, sortby="date")
+>>> [int(result.id, 16) for result in results]
+[]
+
+
+
+We can also adjust the weights of one query using a second query:
+>>> q = sconn.query_adjust(q, sconn.query_parse('cat'))
+>>> str(q)
+'Xapian::Query(((Zdocument:(pos=1) AND_MAYBE document:(pos=1)) AND_MAYBE (Zcat:(pos=1) AND_MAYBE cat:(pos=1))))'
+
+
+
+If invalid values are supplied to query_range, a SearchError is raised
+>>> sconn.query_range('date', '0.1428', '0.5')
+Traceback (most recent call last):
+...
+SearchError: Value supplied to field 'date' must be a valid date: was '0.1428': error is 'Unrecognised date format'
+
+
+Do a search which matches all documents:
+>>> q = sconn.query_all()
+>>> str(q)
+'Xapian::Query(<alldocuments>)'
+>>> results = sconn.search(q, 0, 30)
+>>> len(results)
+30
+>>> results
+<SearchResults(startrank=0, endrank=30, more_matches=True, matches_lower_bound=200, matches_upper_bound=200, matches_estimated=200, estimate_is_exact=True)>
+
+
+Do a search which uses a restricted set of default fields:
+>>> q = sconn.query_parse('richard', default_allow='author')
+>>> str(q)
+'Xapian::Query((ZXArichard:(pos=1) AND_MAYBE XArichard:(pos=1)))'
+>>> q = sconn.query_parse('richard', default_deny='category')
+>>> str(q)
+'Xapian::Query(((ZXArichard:(pos=1) OR ZXBrichard:(pos=1) OR ZXDrichard:(pos=1)) AND_MAYBE (XArichard:(pos=1) OR XBrichard:(pos=1) OR XDrichard:(pos=1))))'
+
+
+Do a search which multiplies the weights by 2:
+>>> q = sconn.query_multweight(sconn.query_parse('richard', default_allow='author'), 2)
+>>> str(q)
+'Xapian::Query(2 * (ZXArichard:(pos=1) AND_MAYBE XArichard:(pos=1)))'
+>>> q2 = sconn.query_parse('richard', default_deny='author')
+>>> q = sconn.query_composite(sconn.OP_OR, (q, q2))
+>>> str(q)
+'Xapian::Query((2 * (ZXArichard:(pos=1) AND_MAYBE XArichard:(pos=1)) OR ((ZXBrichard:(pos=1) OR ZXDrichard:(pos=1)) AND_MAYBE (XBrichard:(pos=1) OR XDrichard:(pos=1)))))'
+
+Do a similarity search
+>>> q = sconn.query_parse('document (2 OR 5 OR 8)')
+>>> results = sconn.search(q, 0, 5, sortby="date")
+>>> len(results)
+3
+>>> ids = [result.id for result in results]
+>>> len(ids)
+3
+>>> sconn.significant_terms(ids, maxterms=5)
+[('title', '8'), ('title', '5'), ('title', '2'), ('title', 'test'), ('title', 'document')]
+>>> q2 = sconn.query_similar(ids, simterms=5)
+>>> str(q2)
+'Xapian::Query((XB8 ELITE_SET 5 XB5 ELITE_SET 5 XB2 ELITE_SET 5 XBtest ELITE_SET 5 XBdocument))'
+>>> q2 = sconn.query_similar(ids, simterms=5, allow='text')
+>>> str(q2)
+'Xapian::Query((XDdocument ELITE_SET 5 XDthis ELITE_SET 5 XDtest ELITE_SET 5 XDtext ELITE_SET 5 XDmore))'
+
+Try a search with various weight cutoff restrictions:
+>>> results = sconn.search(sconn.query_parse('richard OR 7 OR 7 OR 8'), 0, 5, sortby="date")
+>>> [(result.id, result.percent, int(result.weight * 10)) for result in results]
+[('7', 55, 326), ('8', 27, 163)]
+>>> results = sconn.search(sconn.query_parse('richard OR 7 OR 7 OR 8'), 0, 5, sortby="date", percentcutoff=30)
+>>> [(result.id, result.percent, int(result.weight * 10)) for result in results]
+[('7', 55, 326)]
+>>> results = sconn.search(sconn.query_parse('richard OR 7 OR 7 OR 8'), 0, 5, sortby="date", weightcutoff=20)
+>>> [(result.id, result.percent, int(result.weight * 10)) for result in results]
+[('7', 55, 326)]
+>>> results = sconn.search(sconn.query_parse('richard OR 7 OR 7 OR 8'), 0, 5, sortby="date", percentcutoff=56)
+>>> [(result.id, result.percent, int(result.weight * 10)) for result in results]
+[]
+>>> results = sconn.search(sconn.query_parse('richard OR 7 OR 7 OR 8'), 0, 5, sortby="date", weightcutoff=33)
+>>> [(result.id, result.percent, int(result.weight * 10)) for result in results]
+[]
+
+Do a similarity search with an ID which isn't in the database:
+>>> q2 = sconn.query_similar('foo', simterms=5)
+>>> str(q2)
+'Xapian::Query()'
+
+
+Check the expand decider used by similarity reordering of queries.
+>>> res = sconn.search(q2, 0, 5)
+>>> ed = res._make_expand_decider('title')
+>>> ed('foo')
+False
+>>> ed('XBA:foo')
+False
+>>> ed('XBAfoo')
+False
+>>> ed('XB:foo')
+True
+>>> ed('XBfoo')
+True
+>>> ed('ZXBfoo')
+True
+>>> ed('ZXBfoo')
+True
+
+
+Find the interesting terms in a set of documents:
+>>> [(docid, sconn.significant_terms(docid, 3)) for docid in sconn.iterids()][:3]
+[('0', [('title', '0'), ('title', 'test'), ('title', 'document')]), ('1', [('title', '1'), ('title', 'test'), ('title', 'document')]), ('10', [('title', '16'), ('title', 'test'), ('title', 'document')])]
+
+
+Tidy up after ourselves:
+>>> sconn.close()
+Closing connection at path foo: Conn1
+
+>>> del sconn
+>>> del sconn2
+>>> del result
+>>> del results
+>>> del results2
+>>> del results3
+Closing connection at path foo2: Conn2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xappy/searchconnection_doctest3.txt	Mon Jul 27 17:35:13 2009 +0200
@@ -0,0 +1,118 @@
+
+This file mainly contains tests for error handling conditions.
+
+
+>>> from datastructures import *
+>>> from fieldactions import *
+>>> from indexerconnection import *
+
+
+Open a connection for indexing:
+>>> iconn = IndexerConnection('foo')
+
+>>> iconn.add_field_action('title', FieldActions.STORE_CONTENT)
+>>> iconn.add_field_action('text', FieldActions.STORE_CONTENT)
+
+>>> iconn.add_field_action('text', FieldActions.INDEX_FREETEXT, language='en',
+...                        spell=True, stop=('basic',))
+
+>>> iconn.add_field_action('tag', FieldActions.TAG)
+>>> iconn.add_field_action('tag2', FieldActions.TAG)
+
+
+
+>>> for i in xrange(20):
+...     doc = UnprocessedDocument()
+...     doc.fields.append(Field('text', 'This is basic test document %d.' % i))
+...     doc.fields.append(Field('title', 'Test document %d' % i))
+...     id = iconn.add(doc)
+
+Test getting and setting metadata:
+>>> iconn.get_metadata('foo')
+''
+>>> iconn.set_metadata('foo', 'bar')
+>>> iconn.get_metadata('foo')
+'bar'
+
+>>> iconn.flush()
+
+Now, open a search connection:
+>>> sconn = SearchConnection('foo')
+
+Now, parse a simple query.
+>>> q = sconn.query_parse('document')
+>>> results = sconn.search(q, 0, 20)
+>>> [result.id for result in results]
+['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', '10', '11', '12', '13']
+
+>>> result = results.get_hit(0)
+>>> result.summarise('text')
+'This is basic test <b>document</b> 0.'
+>>> result.summarise('title')
+'Test <b>document</b> 0'
+
+The maxlen (in characters) can be specified:
+>>> result.summarise('text', 5)
+'This..'
+
+If it's specified as a string (eg, unconverted output from a webapp) it should
+still work:
+>>> result.summarise('text', '5')
+'This..'
+
+
+Asking for a summary of a field which isn't known will raise a KeyError.
+
+>>> result.summarise('titl')
+Traceback (most recent call last):
+...
+KeyError: 'titl'
+
+
+Asking for a top tags of a field when no fields were specified for counting
+tags will raise a SearchError.
+
+>>> results.get_top_tags('title', 100)
+Traceback (most recent call last):
+...
+SearchError: Field 'title' was not specified for getting tags
+
+
+Asking for tags in a field which wasn't indexed for tagging will return an
+error:
+
+>>> results = sconn.search(q, 0, 20, gettags='title')
+Traceback (most recent call last):
+...
+SearchError: Field 'title' was not indexed for tagging
+
+
+Asking for top tags of a field which wasn't specified for counting tags will
+raise a SearchError.
+
+>>> results = sconn.search(q, 0, 20, gettags='tag')
+>>> results.get_top_tags('tag', 100)
+[]
+>>> results.get_top_tags('tag2', 100)
+Traceback (most recent call last):
+...
+SearchError: Field 'tag2' was not specified for getting tags
+>>> results.get_top_tags('text', 100)
+Traceback (most recent call last):
+...
+SearchError: Field 'text' was not specified for getting tags
+
+
+Asking for suggested facets if none were calculated raises a SearchError:
+>>> results.get_suggested_facets('text')
+Traceback (most recent call last):
+...
+SearchError: Facet selection wasn't enabled when the search was run
+
+
+Test getting metadata:
+>>> sconn.get_metadata('foo1')
+''
+>>> sconn.get_metadata('foo')
+'bar'
+