Mercurial > moin > 1.9
view MoinMoin/support/lupy/search/indexsearcher.py @ 0:77665d8e2254
tag of nonpublic@localhost--archive/moin--enterprise--1.5--base-0
(automatically generated log message)
imported from: moin--main--1.5--base-0
author | Thomas Waldmann <tw-public@gmx.de> |
---|---|
date | Thu, 22 Sep 2005 15:09:50 +0000 |
parents | |
children |
line wrap: on
line source
# This module is part of the Lupy project and is Copyright 2003 Amir # Bakhtiar (amir@divmod.org). This is free software; you can redistribute # it and/or modify it under the terms of version 2.1 of the GNU Lesser # General Public License as published by the Free Software Foundation. import math, itertools import similarity, hits from bisect import insort from MoinMoin.support.lupy.index import segment, segmentmerger from MoinMoin.support.lupy import store def openDir(directory): infos = segment.SegmentInfos() infos.read(directory) if len(infos) == 1: # index is optimized return segmentmerger.SegmentReader(infos[0], True) elif len(infos) == 0: readers = [] else: readers = [segmentmerger.SegmentReader(info,False) for info in infos[:-1]] readers.append(segmentmerger.SegmentReader(infos[-1],True)) return segmentmerger.SegmentsReader(directory, readers) def open(path): """Returns an IndexReader reading the index in an FSDirectory in the named path.""" return openDir(store.getDirectory(path, False)) class IndexSearcher: """The base class for search implementations. Implements search over a single index. Subclasses may implement search over multiple indices, and over indices on remote servers.""" def __init__(self, dirOrPath): """Creates a searcher searching the provided index. """ if isinstance(dirOrPath, basestring): self.reader = open(dirOrPath) else: self.reader = openDir(dirOrPath) def close(self): """Frees resources associated with this Searcher.""" self.reader.close() def docFreq(self, term): return self.reader.docFreq(term) def maxDoc(self): return self.reader.maxDoc() def doc(self, i): """For use by L{lupy.search.hits.Hits}.""" return self.reader.document(i) def searchAll(self, query, filter): """Lower-level search API. Returns a generator that yields all non-zero scoring documents for this query that pass the filter. Applications should only use this if they need I{all} of the matching documents. The high-level search API (L{search(Query)}) is usually more efficient, as it skips non-high-scoring hits. - C{query} to match documents - C{filter} if non-null, a bitset used to eliminate some documents """ scorer = getScorer(query, self, self.reader) if filter is not None: bits = filter.bits(reader) if scorer is None: return return itertools.imap(lambda doc, score: doc, itertools.ifilter(lambda doc, score: score > 0 and (bits is None or bits.get(doc)), scorer.score(self.reader.maxDoc()))) def search(self, query, filter=None, nDocs=None): """Search this index for documents matching C{query} and (optionally) passing the C{filter} bitvector. If C{nDocs} is specified then only the top C{nDocs} hits will be returned.""" if nDocs is None: return hits.Hits(self, query, filter) scorer = getScorer(query, self, self.reader) if scorer is None: return TopDocs(0, []) if filter is not None: bits = filter.bits(reader) else: bits = None scoreDocs = [] totalHits = [0] minScore = 0.0 for doc, scr in scorer.score(self.reader.maxDoc()): if scr > 0.0 and (bits is None or bits.get(doc)): # ignore zeroed buckets and docs not in bits totalHits[0] += 1 if scr >= minScore: # update hit queue insort(scoreDocs, ScoreDoc(doc, scr)) if len(scoreDocs) > nDocs: # if hit queue overfull # remove lowest in hit queue scoreDocs.pop() # reset minimum score minScore = scoreDocs[0].score return TopDocs(totalHits[0], scoreDocs) def fieldNames(self): # Experimental for auto queries return self.reader.fieldNames() def getScorer(query, searcher, reader): sum = query.sumOfSquaredWeights(searcher) norm = 1.0/(math.sqrt(sum) or 1.0) query.normalize(norm) return query.scorer(reader) class ScoreDoc(object): def __init__(self, d, s): self.doc = d self.score = s def __lt__(a, b): if a.score == b.score: return a.doc > b.doc else: return a.score < b.score class TopDocs(object): def __init__(self, th, sds): self.totalHits = th self.scoreDocs = sds