comparison MoinMoin/items/__init__.py @ 1958:70c534eb5596

use whoosh for index filtering
author Cheer Xiao <xiaqqaix@gmail.com>
date Fri, 08 Feb 2013 21:17:59 +0800
parents f0b346ac6338
children 5d672f909f27 f05af7eb6f50
comparison
equal deleted inserted replaced
1931:7dc5085f4960 1958:70c534eb5596
30 30
31 from flatland import Form 31 from flatland import Form
32 32
33 from jinja2 import Markup 33 from jinja2 import Markup
34 34
35 from whoosh.query import Term, And, Prefix 35 from whoosh.query import Term, Prefix, And, Or, Not
36 36
37 from MoinMoin import log 37 from MoinMoin import log
38 logging = log.getLogger(__name__) 38 logging = log.getLogger(__name__)
39 39
40 from MoinMoin.security.textcha import TextCha, TextChaizedForm 40 from MoinMoin.security.textcha import TextCha, TextChaizedForm
197 form = cls.from_flat(request.form.items() + request.files.items()) 197 form = cls.from_flat(request.form.items() + request.files.items())
198 TextCha(form).amend_form() 198 TextCha(form).amend_form()
199 return form 199 return form
200 200
201 201
202 UNKNOWN_ITEM_GROUP = "unknown items"
203
204 def _build_contenttype_query(groups):
205 """
206 Build a Whoosh query from a list of contenttype groups.
207 """
208 queries = []
209 for g in groups:
210 for e in content_registry.groups[g]:
211 ct_unicode = unicode(e.content_type)
212 queries.append(Term(CONTENTTYPE, ct_unicode))
213 queries.append(Prefix(CONTENTTYPE, ct_unicode + u';'))
214 return Or(queries)
215
202 IndexEntry = namedtuple('IndexEntry', 'relname meta') 216 IndexEntry = namedtuple('IndexEntry', 'relname meta')
203 217
204 MixedIndexEntry = namedtuple('MixedIndexEntry', 'relname meta hassubitems') 218 MixedIndexEntry = namedtuple('MixedIndexEntry', 'relname meta hassubitems')
205 219
206 class Item(object): 220 class Item(object):
506 else: 520 else:
507 files.append(IndexEntry(relname, rev.meta)) 521 files.append(IndexEntry(relname, rev.meta))
508 522
509 return dirs, files 523 return dirs, files
510 524
511 @timed() 525 def build_index_query(self, startswith=None, selected_groups=None):
512 def filter_index(self, index, startswith=None, selected_groups=None): 526 prefix = self.subitems_prefix
513 """ 527 if startswith:
514 Filter a list of IndexEntry. 528 query = Prefix(NAME_EXACT, prefix + startswith) | Prefix(NAME_EXACT, prefix + startswith.swapcase())
515 529 else:
516 :param startswith: if set, only items whose names start with startswith 530 query = Prefix(NAME_EXACT, prefix)
517 are selected. 531
518 :param selected_groups: if set, only items whose contentypes belong to 532 if selected_groups:
519 the selected contenttype_groups are selected. 533 selected_groups = set(selected_groups)
520 """ 534 has_unknown = UNKNOWN_ITEM_GROUP in selected_groups
521 if startswith is not None: 535 if has_unknown:
522 index = [e for e in index 536 selected_groups.remove(UNKNOWN_ITEM_GROUP)
523 if e.relname.startswith((startswith, startswith.swapcase()))] 537 ct_query = _build_contenttype_query(selected_groups)
524 538 if has_unknown:
525 def build_contenttypes(groups): 539 ct_query |= Not(_build_contenttype_query(content_registry.groups))
526 contenttypes = [] 540 query &= ct_query
527 for g in groups: 541
528 entries = content_registry.groups.get(g, []) # .get is a temporary workaround for "unknown items" group 542 return query
529 contenttypes.extend([e.content_type for e in entries])
530 return contenttypes
531
532 def contenttype_match(tested, cts):
533 for ct in cts:
534 if ct.issupertype(tested):
535 return True
536 return False
537
538 if selected_groups is not None:
539 selected_contenttypes = build_contenttypes(selected_groups)
540 filtered_index = [e for e in index if contenttype_match(Type(e.meta[CONTENTTYPE]), selected_contenttypes)]
541
542 unknown_item_group = "unknown items"
543 if unknown_item_group in selected_groups:
544 all_contenttypes = build_contenttypes(content_registry.group_names)
545 filtered_index.extend([e for e in index
546 if not contenttype_match(Type(e.meta[CONTENTTYPE]), all_contenttypes)])
547
548 index = filtered_index
549 return index
550 543
551 def get_index(self, startswith=None, selected_groups=None): 544 def get_index(self, startswith=None, selected_groups=None):
552 dirs, files = self.make_flat_index(self.get_subitem_revs()) 545 query = Term(WIKINAME, app.cfg.interwikiname) & self.build_index_query(startswith, selected_groups)
553 return dirs, self.filter_index(files, startswith, selected_groups) 546 revs = flaskg.storage.search(query, sortedby=NAME_EXACT, limit=None)
547 return self.make_flat_index(revs)
554 548
555 def get_mixed_index(self): 549 def get_mixed_index(self):
556 dirs, files = self.make_flat_index(self.get_subitem_revs()) 550 dirs, files = self.make_flat_index(self.get_subitem_revs())
557 dirs_dict = dict([(e.relname, MixedIndexEntry(*e, hassubitems=True)) for e in dirs]) 551 dirs_dict = dict([(e.relname, MixedIndexEntry(*e, hassubitems=True)) for e in dirs])
558 index_dict = dict([(e.relname, MixedIndexEntry(*e, hassubitems=False)) for e in files]) 552 index_dict = dict([(e.relname, MixedIndexEntry(*e, hassubitems=False)) for e in files])