changeset 1496:70e94a679c47

cleanup whitespace, add/fix comments
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Sat, 26 Aug 2006 17:37:17 +0200
parents f3df2ca6658f
children ed3845759431
files MoinMoin/search/Xapian.py MoinMoin/search/__init__.py MoinMoin/search/builtin.py MoinMoin/search/queryparser.py MoinMoin/search/results.py
diffstat 5 files changed, 136 insertions(+), 144 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/search/Xapian.py	Fri Aug 25 14:44:03 2006 +0200
+++ b/MoinMoin/search/Xapian.py	Sat Aug 26 17:37:17 2006 +0200
@@ -49,7 +49,7 @@
 ##############################################################################
 
 def getWikiAnalyzerFactory(request=None, language='en'):
-    """ Returns a WikiAnalyer instance
+    """ Returns a WikiAnalyzer instance
 
     @keyword request: current request object
     @keyword language: stemming language iso code, defaults to 'en'
@@ -57,6 +57,7 @@
     return (lambda: WikiAnalyzer(request, language))
 
 class WikiAnalyzer:
+    # TODO docstring
     singleword = r"[%(u)s][%(l)s]+" % {
                      'u': config.chars_upper,
                      'l': config.chars_lower,
@@ -76,7 +77,7 @@
 
     dot_re = re.compile(r"[-_/,.]")
     mail_re = re.compile(r"[-_/,.]|(@)")
-    
+
     # XXX limit stuff above to xapdoc.MAX_KEY_LEN
     # WORD_RE = re.compile('\\w{1,%i}' % MAX_KEY_LEN, re.U)
 
@@ -96,7 +97,7 @@
             """ 'encode' unicode results into whatever xapian wants """
             lower = uc.lower()
             return lower
-            
+
         if isinstance(value, list): # used for page links
             for v in value:
                 yield (enc(v), 0)
@@ -137,10 +138,9 @@
 
         @param value: string to split, must be an unicode object or a list of
                       unicode objects
-        @keyword flat_stemming: whether to yield stemmed terms
-                                automatically with the natural forms
-                                (True) or yield both at once as a tuple
-                                (False)
+        @keyword flat_stemming: whether to yield stemmed terms automatically
+                                with the natural forms (True) or
+                                yield both at once as a tuple (False)
         """
         for word, pos in self.raw_tokenize(value):
             if flat_stemming:
@@ -169,31 +169,31 @@
     prefixMap = {
         # http://svn.xapian.org/*checkout*/trunk/xapian-applications/omega/docs/termprefixes.txt
         'author': 'A',
-        'date':   'D', # numeric format: YYYYMMDD or "latest" - e.g. D20050224 or Dlatest
-                       #G   newsGroup (or similar entity - e.g. a web forum name)
+        'date': 'D',              # numeric format: YYYYMMDD or "latest" - e.g. D20050224 or Dlatest
+                                  #G   newsGroup (or similar entity - e.g. a web forum name)
         'hostname': 'H',
         'keyword': 'K',
-        'lang': 'L',   # ISO Language code
-                       #M   Month (numeric format: YYYYMM)
-                       #N   ISO couNtry code (or domaiN name)
-                       #P   Pathname
-                       #Q   uniQue id
-        'raw':  'R',   # Raw (i.e. unstemmed) term
-        'title': 'S',  # Subject (or title)
+        'lang': 'L',              # ISO Language code
+                                  #M   Month (numeric format: YYYYMM)
+                                  #N   ISO couNtry code (or domaiN name)
+                                  #P   Pathname
+                                  #Q   uniQue id
+        'raw': 'R',               # Raw (i.e. unstemmed) term
+        'title': 'S',             # Subject (or title)
         'mimetype': 'T',
-        'url': 'U',    # full URL of indexed document - if the resulting term would be > 240
-                       # characters, a hashing scheme is used to prevent overflowing
-                       # the Xapian term length limit (see omindex for how to do this).
-                       #W   "weak" (approximately 10 day intervals, taken as YYYYMMD from
-                       #  the D term, and changing the last digit to a '2' if it's a '3')
-                       #X   longer prefix for user-defined use
-        'linkto': 'XLINKTO', # this document links to that document
+        'url': 'U',               # full URL of indexed document - if the resulting term would be > 240
+                                  # characters, a hashing scheme is used to prevent overflowing
+                                  # the Xapian term length limit (see omindex for how to do this).
+                                  #W   "weak" (approximately 10 day intervals, taken as YYYYMMD from
+                                  #  the D term, and changing the last digit to a '2' if it's a '3')
+                                  #X   longer prefix for user-defined use
+        'linkto': 'XLINKTO',      # this document links to that document
         'stem_lang': 'XSTEMLANG', # ISO Language code this document was stemmed in
-        'category': 'XCAT', # category this document belongs to
-        'fulltitle': 'XFT', # full title
-        'domain': 'XDOMAIN', # standard or underlay
-        'revision': 'XREV', # revision of page
-                       #Y   year (four digits)
+        'category': 'XCAT',       # category this document belongs to
+        'fulltitle': 'XFT',       # full title
+        'domain': 'XDOMAIN',      # standard or underlay
+        'revision': 'XREV',       # revision of page
+                                  #Y   year (four digits)
     }
 
     def __init__(self, request):
@@ -206,11 +206,12 @@
 
     def _check_version(self):
         """ Checks if the correct version of Xapian is installed """
+        # XXX this check cries for troubles in future!
         if xapian.xapian_major_version() == 0 and \
                 xapian.xapian_minor_version() == 9 \
                 and xapian.xapian_revision() >= 6:
             return
-        
+
         from MoinMoin.error import ConfigurationError
         raise ConfigurationError('MoinMoin needs at least Xapian version '
                 '0.9.6 to work correctly. Either disable Xapian '
@@ -230,6 +231,7 @@
         return BaseIndex.exists(self) and os.listdir(self.dir)
 
     def _search(self, query, sort=None, historysearch=0):
+        ### XXX docstring
         while True:
             try:
                 searcher, timestamp = self.request.cfg.xapian_searchers.pop()
@@ -242,7 +244,7 @@
                 searcher.configure(self.prefixMap, self.indexValueMap)
                 timestamp = self.mtime()
                 break
-        
+
         kw = {}
         if sort == 'weight':
             # XXX: we need real weight here, like _moinSearch
@@ -256,7 +258,7 @@
             'attachment', 'mtime', 'wikiname', 'revision'], **kw)
         self.request.cfg.xapian_searchers.append((searcher, timestamp))
         return hits
-    
+
     def _do_queued_updates(self, request, amount=5):
         """ Assumes that the write lock is acquired """
         self.touch()
@@ -269,8 +271,7 @@
             p = Page(request, name)
             if request.cfg.xapian_index_history:
                 for rev in p.getRevList():
-                    self._index_page(writer, Page(request, name, rev=rev),
-                            mode='update')
+                    self._index_page(writer, Page(request, name, rev=rev), mode='update')
             else:
                 self._index_page(writer, p, mode='update')
             self.update_queue.remove([name])
@@ -385,7 +386,7 @@
                         return (lang, lang)
                 elif not line.startswith('#'):
                     break
-        
+
         if not lang:
             # no lang found at all.. fallback to default language
             lang = default_lang
@@ -582,7 +583,7 @@
             enq, mset, docs = writer.search(query, valuesWanted=['pagename',
                 'attachment', ])
             if docs:
-                doc = docs[0]   # there should be only one
+                doc = docs[0] # there should be only one
                 writer.delete_document(doc['uid'])
                 request.log('attachment %s from %s removed from index' %
                     (doc['values']['attachment'], doc['values']['pagename']))
--- a/MoinMoin/search/__init__.py	Fri Aug 25 14:44:03 2006 +0200
+++ b/MoinMoin/search/__init__.py	Sat Aug 26 17:37:17 2006 +0200
@@ -13,8 +13,7 @@
 from MoinMoin.search.queryparser import QueryParser
 from MoinMoin.search.builtin import Search
 
-def searchPages(request, query, sort='weight', mtime=None,
-        historysearch=None, **kw):
+def searchPages(request, query, sort='weight', mtime=None, historysearch=None, **kw):
     """ Search the text of all pages for query.
     
     @param request: current request
--- a/MoinMoin/search/builtin.py	Fri Aug 25 14:44:03 2006 +0200
+++ b/MoinMoin/search/builtin.py	Sat Aug 26 17:37:17 2006 +0200
@@ -1,4 +1,5 @@
 # -*- coding: iso-8859-1 -*-
+# XXX docstring incorrect
 """
     MoinMoin - search engine
     
@@ -22,6 +23,7 @@
 ##############################################################################
 
 class UpdateQueue:
+    # XXX docstring
     def __init__(self, file, lock_dir):
         self.file = file
         self.writeLock = lock.WriteLock(lock_dir, timeout=10.0)
@@ -153,8 +155,7 @@
         filesys.makeDirs(self.dir)
         self.sig_file = os.path.join(main_dir, 'complete')
         lock_dir = os.path.join(main_dir, 'index-lock')
-        self.lock = lock.WriteLock(lock_dir,
-                                   timeout=3600.0, readlocktimeout=60.0)
+        self.lock = lock.WriteLock(lock_dir, timeout=3600.0, readlocktimeout=60.0)
         #self.read_lock = lock.ReadLock(lock_dir, timeout=3600.0)
         self.update_queue = UpdateQueue(os.path.join(main_dir, 'update-queue'),
                                 os.path.join(main_dir, 'update-queue-lock'))
@@ -170,9 +171,9 @@
         raise NotImplemented('...')
 
     def exists(self):
-        """ Check if index exists """        
+        """ Check if index exists """
         return os.path.exists(self.sig_file)
-                
+
     def mtime(self):
         """ Modification time of the index """
         return os.path.getmtime(self.dir)
@@ -180,8 +181,9 @@
     def touch(self):
         """ Touch the index """
         os.utime(self.dir, None)
-    
+
     def _search(self, query):
+        # XXX docstring
         raise NotImplemented('...')
 
     def search(self, query, **kw):
@@ -252,7 +254,7 @@
         from threading import Thread
         indexThread = Thread(target=self._index_pages, args=(files, mode))
         indexThread.setDaemon(True)
-        
+
         # Join the index thread after current request finish, prevent
         # Apache CGI from killing the process.
         def joinDecorator(finish):
@@ -313,7 +315,7 @@
                     target=lockedDecorator(self._do_queued_updates),
                     args=(self._indexingRequest(self.request),))
             indexThread.setDaemon(True)
-            
+
             # Join the index thread after current request finish, prevent
             # Apache CGI from killing the process.
             def joinDecorator(finish):
@@ -321,7 +323,7 @@
                     finish()
                     indexThread.join()
                 return func
-                
+
             self.request.finish = joinDecorator(self.request.finish)
             indexThread.start()
         except:
@@ -329,10 +331,11 @@
             raise
 
     def _do_queued_updates(self, request, amount=5):
+        # XXX docstring
         raise NotImplemented('...')
 
     def optimize(self):
-        """ Optimize the the index if possible """
+        """ Optimize the index if possible """
         raise NotImplemented('...')
 
     def contentfilter(self, filename):
@@ -371,7 +374,7 @@
         request = Request(request.url)
         class SecurityPolicy(Permissions):
             def read(*args, **kw):
-                return True        
+                return True
         request.user.may = SecurityPolicy(request.user)
         return request
 
@@ -391,13 +394,14 @@
         finally:
             f.close()
 
+
 ##############################################################################
 ### Searching
 ##############################################################################
 
 class Search:
     """ A search run """
-    
+
     def __init__(self, request, query, sort='weight', mtime=None,
             historysearch=0):
         self.request = request
@@ -415,7 +419,7 @@
             hits = self._xapianSearch()
         else:
             hits = self._moinSearch()
-            
+
         # important - filter deleted pages or pages the user may not read!
         if not self.filtered:
             hits = self._filter(hits)
@@ -427,7 +431,7 @@
             self.sort = None
             mset = self._xapianMset
             estimated_hits = (
-                (mset.get_matches_estimated() == mset.get_matches_upper_bound() 
+                (mset.get_matches_estimated() == mset.get_matches_upper_bound()
                     and
                  mset.get_matches_estimated() == mset.get_matches_lower_bound())
                 and '' or 'about',
@@ -437,7 +441,6 @@
 
         return getSearchResults(self.request, self.query, hits, start,
                 self.sort, estimated_hits)
-        
 
     # ----------------------------------------------------------------
     # Private!
@@ -490,7 +493,7 @@
                     return d
                 pages = [dict_decode(hit['values']) for hit in hits]
                 self.request.log("xapianSearch: finds pages: %r" % pages)
-                
+
                 self._xapianEnquire = enq
                 self._xapianMset = mset
                 self._xapianIndex = index
@@ -512,7 +515,7 @@
         else:
             # we didn't use xapian in this request
             self.request.cfg.xapian_search = 0
-        
+
         # some postprocessing by _moinSearch is required
         return self._moinSearch(pages)
 
@@ -522,11 +525,11 @@
         @param term: the term as string
         @param pos: starting position of the match
         """
-        if term[0] == 'S':      # TitleMatch
+        if term[0] == 'S': # TitleMatch
             return TitleMatch(start=pos, end=pos+len(term)-1)
-        else:                   # TextMatch (incl. headers)
+        else: # TextMatch (incl. headers)
             return TextMatch(start=pos, end=pos+len(term))
-        
+
     def _xapianMatch(self, uid, page=None):
         """ Get all relevant Xapian matches per document id
         
@@ -545,7 +548,7 @@
             in positions.iteritems()]
 
         if not matches:
-            return [Match()]    # dummy for metadata, we got a match!
+            return [Match()] # dummy for metadata, we got a match!
 
         return matches
 
@@ -566,7 +569,7 @@
         hits = self._getHits(pages, self._moinMatch)
         self.request.clock.stop('_moinSearch')
         return hits
-    
+
     def _moinMatch(self, page, uid=None):
         """ Get all matches from regular moinSearch
         
@@ -636,7 +639,7 @@
             return self.request.rootpage.getPageList(filter=filter_)
         else:
             return self.request.rootpage.getPageList(user='', exists=0)
-        
+
     def _filter(self, hits):
         """ Filter out deleted or acl protected pages
         
@@ -650,7 +653,6 @@
                     if (not wikiname in thiswiki or
                        page.exists() and userMayRead(page.page_name) or
                        page.page_name.startswith(fs_rootpage)) and
-                       (not self.mtime or 
-                           self.mtime <= page.mtime_usecs()/1000000)]
+                       (not self.mtime or self.mtime <= page.mtime_usecs()/1000000)]
         return filtered
 
--- a/MoinMoin/search/queryparser.py	Fri Aug 25 14:44:03 2006 +0200
+++ b/MoinMoin/search/queryparser.py	Sat Aug 26 17:37:17 2006 +0200
@@ -26,7 +26,7 @@
 
 class BaseExpression:
     """ Base class for all search terms """
-    
+
     def __init__(self):
         self.negated = 0
 
@@ -35,7 +35,7 @@
 
     def negate(self):
         """ Negate the result of this term """
-        self.negated = 1 
+        self.negated = 1
 
     def pageFilter(self):
         """ Return a page filtering function
@@ -61,13 +61,13 @@
             return [Match()]
         else:
             return None
-    
+
     def costs(self):
         """ Return estimated time to calculate this term
         
         Number is relative to other terms and has no real unit.
         It allows to do the fast searches first.
-        """ 
+        """
         return 0
 
     def highlight_re(self):
@@ -117,7 +117,7 @@
 
     def subterms(self):
         return self._subterms
-    
+
     def costs(self):
         return self._costs
 
@@ -147,7 +147,7 @@
                         return False
                 return True
             return filter
-        
+
         return None
 
     def sortByCost(self):
@@ -171,7 +171,7 @@
         for s in self._subterms:
             highlight_re = s.highlight_re()
             if highlight_re: result.append(highlight_re)
-            
+
         return '|'.join(result)
 
     def xapian_wanted(self):
@@ -206,7 +206,7 @@
         if not not_terms:
             # no, just return query for not negated terms
             return t1
-        
+
         # yes, link not negated and negated terms' query with a AND_NOT query
         if len(not_terms) == 1:
             t2 = Query(not_terms[0])
@@ -218,7 +218,7 @@
 
 class OrExpression(AndExpression):
     """ A term connecting several sub terms with a logical OR """
-    
+
     operator = ' or '
 
     def search(self, page):
@@ -246,7 +246,7 @@
     Both page content and the page title are searched, using an
     additional TitleSearch term.
     """
-    
+
     def __init__(self, pattern, use_re=False, case=False):
         """ Init a text search
 
@@ -260,10 +260,10 @@
         self.case = case
         self._build_re(self._pattern, use_re=use_re, case=case)
         self.titlesearch = TitleSearch(self._pattern, use_re=use_re, case=case)
-        
+
     def costs(self):
         return 10000
-    
+
     def __unicode__(self):
         neg = self.negated and '-' or ''
         return u'%s"%s"' % (neg, unicode(self._pattern))
@@ -320,14 +320,12 @@
     def xapian_term(self, request, allterms):
         if self.use_re:
             # basic regex matching per term
-            terms = [term for term in allterms() if
-                    self.search_re.match(term)]
+            terms = [term for term in allterms() if self.search_re.match(term)]
             if not terms:
                 return Query()
             queries = [Query(Query.OP_OR, terms)]
         else:
-            analyzer = Xapian.WikiAnalyzer(request=request,
-                    language=request.cfg.language_default)
+            analyzer = Xapian.WikiAnalyzer(request=request, language=request.cfg.language_default)
             terms = self._pattern.split()
 
             # all parsed wikiwords, AND'ed
@@ -349,8 +347,7 @@
             if not self.case and stemmed:
                 new_pat = ' '.join(stemmed)
                 self._pattern = new_pat
-                self._build_re(new_pat, use_re=False, case=self.case,
-                        stemmed=True)
+                self._build_re(new_pat, use_re=False, case=self.case, stemmed=True)
 
         # titlesearch OR parsed wikiwords
         return Query(Query.OP_OR,
@@ -372,7 +369,7 @@
         self.use_re = use_re
         self.case = case
         self._build_re(self._pattern, use_re=use_re, case=case)
-        
+
     def costs(self):
         return 100
 
@@ -392,7 +389,7 @@
                 return False
             return True
         return filter
-            
+
     def search(self, page):
         # Get matches in page name
         matches = []
@@ -415,7 +412,7 @@
                         end=match.end()+post))
             else:
                 matches.append(TitleMatch(re_match=match))
-        
+
         if ((self.negated and matches) or
             (not self.negated and not matches)):
             return None
@@ -425,7 +422,7 @@
             return []
 
     def xapian_wanted(self):
-        return True             # only easy regexps possible
+        return True # only easy regexps possible
 
     def xapian_need_postproc(self):
         return self.case
@@ -451,7 +448,7 @@
             terms = self._pattern.split()
             terms = [[w for w, pos in analyzer.raw_tokenize(t)] for t in terms]
 
-            # all parsed wikiwords, AND'ed
+            # all parsed wikiwords, ANDed
             queries = []
             stemmed = []
             for t in terms:
@@ -478,8 +475,7 @@
             if not self.case and stemmed:
                 new_pat = ' '.join(stemmed)
                 self._pattern = new_pat
-                self._build_re(new_pat, use_re=False, case=self.case,
-                        stemmed=True)
+                self._build_re(new_pat, use_re=False, case=self.case, stemmed=True)
 
         return Query(Query.OP_AND, queries)
 
@@ -513,7 +509,7 @@
         else:
             self.pattern = pattern
             self.static = True
-        
+
     def costs(self):
         return 5000 # cheaper than a TextSearch
 
@@ -529,7 +525,7 @@
         matches = []
 
         Found = True
-        
+
         for link in page.getPageLinks(page.request):
             if ((self.static and self.pattern == link) or
                 (not self.static and self.search_re.match(link))):
@@ -555,7 +551,7 @@
             return []
 
     def xapian_wanted(self):
-        return True             # only easy regexps possible
+        return True # only easy regexps possible
 
     def xapian_need_postproc(self):
         return self.case
@@ -613,7 +609,7 @@
     def search(self, page):
         match = False
         body = page.getPageHeader()
-        
+
         if re.findall('#language %s' % self.pattern, body):
             match = True
 
@@ -626,10 +622,10 @@
             return []
 
     def xapian_wanted(self):
-        return True             # only easy regexps possible
+        return True # only easy regexps possible
 
     def xapian_need_postproc(self):
-        return False            # case-sensitivity would make no sense
+        return False # case-sensitivity would make no sense
 
     def xapian_term(self, request, allterms):
         self.xapian_called = True
@@ -678,7 +674,7 @@
         return u'(Category%s)' % self._pattern
 
     def xapian_wanted(self):
-        return True             # only easy regexps possible
+        return True # only easy regexps possible
 
     def xapian_need_postproc(self):
         return self.case
@@ -720,7 +716,7 @@
         self._pattern = pattern.lower()
         self.negated = 0
         self.use_re = use_re
-        self.case = False       # not case-sensitive!
+        self.case = False # not case-sensitive!
         self.xapian_called = False
         self._build_re(self._pattern, use_re=use_re, case=case)
 
@@ -738,10 +734,10 @@
         return None
 
     def xapian_wanted(self):
-        return True             # only easy regexps possible
+        return True # only easy regexps possible
 
     def xapian_need_postproc(self):
-        return False            # case-sensitivity would make no sense
+        return False # case-sensitivity would make no sense
 
     def xapian_term(self, request, allterms):
         self.xapian_called = True
@@ -780,7 +776,7 @@
         self._pattern = pattern.lower()
         self.negated = 0
         self.use_re = use_re
-        self.case = False       # not case-sensitive!
+        self.case = False # not case-sensitive!
         self.xapian_called = False
         self._build_re(self._pattern, use_re=use_re, case=case)
 
@@ -814,10 +810,10 @@
             return []
 
     def xapian_wanted(self):
-        return True             # only easy regexps possible
+        return True # only easy regexps possible
 
     def xapian_need_postproc(self):
-        return False            # case-sensitivity would make no sense
+        return False # case-sensitivity would make no sense
 
     def xapian_term(self, request, allterms):
         self.xapian_called = True
@@ -882,7 +878,7 @@
             if q:
                 result.append(q)
         return result
-            
+
     def _and_expression(self):
         result = None
         while not result and self._query:
@@ -897,13 +893,13 @@
             result.append(term)
             term = self._single_term()
         return result
-                                
+
     def _single_term(self):
         regex = (r'(?P<NEG>-?)\s*(' +              # leading '-'
                  r'(?P<OPS>\(|\)|(or\b(?!$)))|' +  # or, (, )
                  r'(?P<MOD>(\w+:)*)' +
                  r'(?P<TERM>("[^"]+")|' +
-                 r"('[^']+')|([^\s\)]+)))")             # search word itself
+                 r"('[^']+')|([^\s\)]+)))")        # search word itself
         self._query = self._query.strip()
         match = re.match(regex, self._query, re.U)
         if not match:
@@ -950,7 +946,7 @@
             elif "domain".startswith(m):
                 domain = True
 
-        # oh, let's better call xapian if we encouter this nasty regexp ;)
+        # oh, let's better call xapian if we encounter this nasty regexp ;)
         if not category:
             cat_re = re.compile(r'----\(-\*\)\(\\r\)\?\\n\)\(\.\*\)Category(.*)\\b', re.U)
             cat_match = cat_re.search(text)
@@ -986,4 +982,3 @@
                 text.startswith("'") and text.endswith("'"))
 
 
-
--- a/MoinMoin/search/results.py	Fri Aug 25 14:44:03 2006 +0200
+++ b/MoinMoin/search/results.py	Sat Aug 26 17:37:17 2006 +0200
@@ -25,7 +25,7 @@
     """
     # Default match weight
     _weight = 1.0
-    
+
     def __init__(self, start=0, end=0, re_match=None):
         self.re_match = re_match
         if not re_match:
@@ -42,7 +42,7 @@
                  self.start == other.start and
                  self.end == other.end)
         return equal
-        
+
     def __ne__(self, other):
         return not self.__eq__(other)
 
@@ -64,7 +64,7 @@
 
     # object properties
     start = property(_get_start)
-    end   = property(_get_end)
+    end = property(_get_end)
 
 
 class TextMatch(Match):
@@ -75,7 +75,7 @@
 class TitleMatch(Match):
     """ Represents a match in the page title
     
-    Has more weight as a match in the page content.
+    Has more weight than a match in the page content.
     """
     # Matches in titles are much more important in wikis. This setting
     # seems to make all pages that have matches in the title to appear
@@ -139,19 +139,16 @@
         else:
             matches = self._matches
 
-        # Filter by type and sort by sort using fast schwartzian
-        # transform.
+        # Filter by type and sort by sort using fast schwartzian transform.
         if sort == 'start':
-            tmp = [(match.start, match) for match in matches
-                   if instance(match, type)]
+            tmp = [(match.start, match) for match in matches if instance(match, type)]
         else:
-            tmp = [(match.weight(), match) for match in matches
-                   if instance(match, type)]
+            tmp = [(match.weight(), match) for match in matches if instance(match, type)]
         tmp.sort()
         if sort == 'weight':
             tmp.reverse()
         matches = [item[1] for item in tmp]
-        
+
         return matches
 
     def _unique_matches(self, type=Match):
@@ -164,10 +161,8 @@
         @rtype: list
         @return: list of matches of type, sorted by match.start
         """
-        # Filter by type and sort by match.start using fast schwartzian
-        # transform.
-        tmp = [(match.start, match) for match in self._matches
-               if isinstance(match, type)]
+        # Filter by type and sort by match.start using fast schwartzian transform.
+        tmp = [(match.start, match) for match in self._matches if isinstance(match, type)]
         tmp.sort()
 
         if not len(tmp):
@@ -183,11 +178,11 @@
             matches.append(item[1])
 
         return matches
-    
+
 
 class FoundAttachment(FoundPage):
-    """ Represent an attachment in search results """
-    
+    """ Represents an attachment in search results """
+
     def __init__(self, page_name, attachment, matches=None, page=None):
         self.page_name = page_name
         self.attachment = attachment
@@ -207,8 +202,8 @@
 
 
 class FoundRemote(FoundPage):
-    """ Represent an attachment in search results """
-    
+    """ Represents an attachment in search results """
+
     def __init__(self, wikiname, page_name, attachment, matches=None, page=None):
         self.wikiname = wikiname
         self.page_name = page_name
@@ -243,7 +238,7 @@
     by name and then by rank.
     """
     # Public functions --------------------------------------------------
-    
+
     def __init__(self, query, hits, pages, elapsed, sort, estimated_hits):
         self.query = query # the query
         self.hits = hits # hits list
@@ -263,13 +258,13 @@
         tmp.sort()
         tmp.reverse()
         self.hits = [item[2] for item in tmp]
-        
+
     def _sortByPagename(self):
         """ Sorts a list of found pages alphabetical by page name """
         tmp = [(hit.page_name, hit) for hit in self.hits]
         tmp.sort()
         self.hits = [item[1] for item in tmp]
-        
+
     def stats(self, request, formatter, hitsFrom):
         """ Return search statistics, formatted with formatter
 
@@ -331,7 +326,7 @@
         # Add pages formatted as list
         if self.hits:
             write(list(1))
-            
+
             # XXX: Do some xapian magic here
             if paging:
                 hitsTo = hitsFrom + request.cfg.search_results_per_page
@@ -353,7 +348,7 @@
                 else:
                     querydict = None
                 querystr = self.querystring(querydict)
-            
+
                 matchInfo = ''
                 if info:
                     matchInfo = self.formatInfo(f, page)
@@ -401,7 +396,7 @@
 
         if paging and len(self.hits) <= request.cfg.search_results_per_page:
             paging = False
-        
+
         # Add pages formatted as definition list
         if self.hits:
             write(f.definition_list(1))
@@ -454,14 +449,14 @@
                 write(self.formatPageLinks(hitsFrom=hitsFrom,
                     hitsPerPage=request.cfg.search_results_per_page,
                     hitsNum=len(self.hits)))
-        
+
         return self.getvalue()
 
     # Private -----------------------------------------------------------
 
     # This methods are not meant to be used by clients and may change
     # without notice.
-    
+
     def formatContext(self, page, context, maxlines):
         """ Format search context for each matched page
 
@@ -474,7 +469,7 @@
         last = len(body) - 1
         lineCount = 0
         output = []
-        
+
         # Get unique text matches sorted by match.start, try to ignore
         # matches in page header, and show the first maxlines matches.
         # TODO: when we implement weight algorithm for text matches, we
@@ -486,7 +481,7 @@
         # Format context
         while i < len(matches) and lineCount < maxlines:
             match = matches[i]
-            
+
             # Get context range for this match
             start, end = self.contextRange(context, match, start, last)
 
@@ -496,7 +491,7 @@
             # same match again on a separate line.
 
             output.append(f.text(u'...'))
-            
+
             # Get the index of the first match completely within the
             # context.
             for j in xrange(0, len(matches)):
@@ -540,9 +535,9 @@
                 # This is a page with no text, only header, for example,
                 # a redirect page.
                 output = f.text(page.page.getPageHeader(length=context))
-        
+
         return output
-        
+
     def firstInterestingMatch(self, page, matches):
         """ Return the first interesting match
 
@@ -587,7 +582,7 @@
         if cstart < start:
             cend += start - cstart
             cstart = start
-            
+
         # But if end if after last, give back context to start
         if cend > last:
             cstart -= cend - last
@@ -609,7 +604,7 @@
         """
         # Get unique title matches sorted by match.start
         matches = page.get_matches(unique=1, sort='start', type=TitleMatch)
-        
+
         # Format
         pagename = page.page_name
         f = self.formatter
@@ -627,7 +622,7 @@
         # Add text after match
         if start < len(pagename):
             output.append(f.text(pagename[start:]))
-        
+
         if page.attachment: # show the attachment that matched
             output.extend([
                     " ",
@@ -649,7 +644,7 @@
         @param location: current location in text
         @rtype: unicode
         @return: formatted match or empty string
-        """        
+        """
         start = max(location, match.start)
         if start < match.end:
             f = self.formatter
@@ -678,12 +673,12 @@
             querydict.update({'from': n * hitsPerPage})
             return self.request.page.url(self.request, querydict,
                     escape=0, relative=False)
-        
+
         pages = float(hitsNum) / hitsPerPage
         if pages - int(pages) > 0.0:
             pages = int(pages) + 1
         cur_page = hitsFrom / hitsPerPage
-        l = []
+        l = [] # XXX do not use single letter names, esp. not "l"
 
         # previous page available
         if cur_page > 0:
@@ -741,7 +736,7 @@
             f.paragraph(1, attr={'class': 'searchhitinfobar'}),
             f.text('%.1fk - ' % (page.page.size()/1024.0)),
             f.text('rev: %d %s- ' % (rev,
-                rev == page.page.getRevList()[0] and 
+                rev == page.page.getRevList()[0] and
                 '(%s) ' % _('current') or '')),
             f.text('last modified: %s' % page.page.mtime_printable(request)),
             # XXX: proper metadata