changeset 1467:26c8ab85dc86

completed code documentation for MoinMoin.search.builtin
author Franz Pletz <fpletz AT franz-pletz DOT org>
date Wed, 23 Aug 2006 18:57:30 +0200
parents 500e043cf7cd
children a0af7d5778de
files MoinMoin/search/builtin.py
diffstat 1 files changed, 45 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/search/builtin.py	Wed Aug 23 16:15:08 2006 +0200
+++ b/MoinMoin/search/builtin.py	Wed Aug 23 18:57:30 2006 +0200
@@ -28,6 +28,7 @@
         self.readLock = lock.ReadLock(lock_dir, timeout=10.0)
 
     def exists(self):
+        """ Checks if the queue exists on the filesystem """
         return os.path.exists(self.file)
 
     def append(self, pagename):
@@ -207,7 +208,7 @@
         
         Can be called only from a script. To index pages during a user
         request, use indexPagesInNewThread.
-        @arg files: iterator or list of files to index additionally
+        @keyword files: iterator or list of files to index additionally
         @keyword mode: set the mode of indexing the pages, either 'update', 'add' or 'rebuild'
         """
         if not self.lock.acquire(1.0):
@@ -258,6 +259,11 @@
 
         When called in a new thread, lock is acquired before the call,
         and this method must release it when it finishes or fails.
+
+        @param request: current request
+        @keyword files: iterator or list of files to index additionally
+        @keyword mode: set the mode of indexing the pages, either 'update',
+        'add' or 'rebuild'
         """
         raise NotImplemented('...')
 
@@ -302,6 +308,7 @@
         raise NotImplemented('...')
 
     def optimize(self):
+        """ Optimize the the index if possible """
         raise NotImplemented('...')
 
     def contentfilter(self, filename):
@@ -326,15 +333,14 @@
             request.log("Filter %s threw error '%s' for file %s" % (modulename, str(err), filename))
         return mt.mime_type(), data
 
-    def test(self, request):
-        raise NotImplemented('...')
-
     def _indexingRequest(self, request):
         """ Return a new request that can be used for index building.
         
         This request uses a security policy that lets the current user
         read any page. Without this policy some pages will not render,
-        which will create broken pagelinks index.        
+        which will create broken pagelinks index.
+
+        @param request: current request
         """
         from MoinMoin.request.CLI import Request
         from MoinMoin.security import Permissions
@@ -410,6 +416,10 @@
     # Private!
 
     def _xapianIndex(request):
+        """ Get the xapian index if possible
+
+        @param request: current request
+        """
         try:
             from MoinMoin.search.Xapian import Index
             index = Index(request)
@@ -425,15 +435,17 @@
         """ Search using Xapian
         
         Get a list of pages using fast xapian search and
-        return moin search in those pages.
+        return moin search in those pages if needed.
         """
         clock = self.request.clock
         pages = None
         index = self._xapianIndex(self.request)
+
         if index and self.query.xapian_wanted():
             clock.start('_xapianSearch')
             try:
                 from MoinMoin.support import xapwrap
+
                 clock.start('_xapianQuery')
                 query = self.query.xapian_term(self.request, index.allterms)
                 self.request.log("xapianSearch: query = %r" %
@@ -442,16 +454,16 @@
                 enq, mset, hits = index.search(query, sort=self.sort,
                         historysearch=self.historysearch)
                 clock.stop('_xapianQuery')
+
                 #self.request.log("xapianSearch: finds: %r" % hits)
                 def dict_decode(d):
                     """ decode dict values to unicode """
                     for k, v in d.items():
                         d[k] = d[k].decode(config.charset)
                     return d
-                #pages = [{'uid': hit['uid'], 'values': dict_decode(hit['values'])}
-                #        for hit in hits]
                 pages = [dict_decode(hit['values']) for hit in hits]
                 self.request.log("xapianSearch: finds pages: %r" % pages)
+                
                 self._xapianEnquire = enq
                 self._xapianMset = mset
                 self._xapianIndex = index
@@ -461,6 +473,7 @@
             #    pages = []
 
             try:
+                # xapian handled the full query
                 if not self.query.xapian_need_postproc():
                     clock.start('_xapianProcess')
                     try:
@@ -473,21 +486,30 @@
             # we didn't use xapian in this request
             self.request.cfg.xapian_search = 0
         
+        # some postprocessing by _moinSearch is required
         return self._moinSearch(pages)
 
     def _xapianMatchDecider(self, term, pos):
+        """ Returns correct Match object for a Xapian match
+        
+        @param term: the term as string
+        @param pos: starting position of the match
+        """
         if term[0] == 'S':      # TitleMatch
             return TitleMatch(start=pos, end=pos+len(term)-1)
         else:                   # TextMatch (incl. headers)
             return TextMatch(start=pos, end=pos+len(term))
         
-    def _xapianMatch(self, page, uid):
-        """ Get all relevant Xapian matches per document id """
+    def _xapianMatch(self, uid, page=None):
+        """ Get all relevant Xapian matches per document id
+        
+        @param uid: the id of the document in the xapian index
+        """
         positions = {}
         term = self._xapianEnquire.get_matching_terms_begin(uid)
         while term != self._xapianEnquire.get_matching_terms_end(uid):
             term_name = term.get_term()
-            for pos in self._xapianIndex.termpositions(uid,term.get_term()):
+            for pos in self._xapianIndex.termpositions(uid, term.get_term()):
                 if pos not in positions or \
                         len(positions[pos]) < len(term_name):
                     positions[pos] = term_name
@@ -505,6 +527,8 @@
         
         Return list of tuples (page, match). The list may contain
         deleted pages or pages the user may not read.
+
+        @keyword pages: optional list of pages to search in
         """
         self.request.clock.start('_moinSearch')
         from MoinMoin.Page import Page
@@ -516,8 +540,11 @@
         self.request.clock.stop('_moinSearch')
         return hits
     
-    def _moinMatch(self, page, uid):
-        """ Just kick off regular moinSearch """
+    def _moinMatch(self, page, uid=None):
+        """ Get all matches from regular moinSearch
+        
+        @param page: the current page instance
+        """
         return self.query.search(page)
 
     def _getHits(self, pages, matchSearchFunction):
@@ -554,7 +581,7 @@
                     else:
                         hits.append((wikiname, page, attachment, None))
                 else:
-                    matches = matchSearchFunction(page, uid)
+                    matches = matchSearchFunction(page=page, uid=uid)
                     if matches:
                         if not self.historysearch and \
                                 pagename in revisionCache and \
@@ -584,7 +611,10 @@
             return self.request.rootpage.getPageList(user='', exists=0)
         
     def _filter(self, hits):
-        """ Filter out deleted or acl protected pages """
+        """ Filter out deleted or acl protected pages
+        
+        @param hits: list of hits
+        """
         userMayRead = self.request.user.may.read
         fs_rootpage = self.fs_rootpage + "/"
         thiswiki = (self.request.cfg.interwikiname, 'Self')