1 # * coding: iso88591 * 
2 """ 
3 MoinMoin  MoinMoin.search Tests 
4 
5 We exclude underlay/system pages for some search tests to save time. 
6 
7 @copyright: 2005 by Nir Soffer <nirs@freeshell.org>, 
8 20072010 by MoinMoin:ThomasWaldmann 
9 @license: GNU GPL, see COPYING for details. 
10 """ 
11 
12 
13 import os, StringIO, time 
14 
15 import py 
16 
17 from MoinMoin.search import QueryError, _get_searcher 
18 from MoinMoin.search.queryparser import QueryParser 
19 from MoinMoin.search.builtin import MoinSearch 
20 from MoinMoin._tests import nuke_xapian_index, wikiconfig, become_trusted, create_page, nuke_page, append_page 
21 from MoinMoin.wikiutil import Version 
22 from MoinMoin.action import AttachFile 
23 
24 PY_MIN_VERSION = '1.0.0' 
25 if Version(version=py.version) < Version(version=PY_MIN_VERSION): 
26 # There are some generative tests, which won't run on older versions! 
27 # XXX These tests should be refactored to be able to be run with older versions of py. 
28 py.test.skip('Currently py version %s is needed' % PY_MIN_VERSION) 
29 
30 
31 class TestQueryParsing(object): 
32 """ search: query parser tests """ 
33 
34 def testQueryParser(self): 
35 """ search: test the query parser """ 
36 parser = QueryParser() 
37 for query, wanted in [ 
38 # Even a single term is a and expression (this is needed for xapian because it 
39 # only has AND_NOT, but not a simple NOT). This is why we have many many brackets here. 
40 ("a", '["a"]'), 
41 ("a", '["a"]'), 
42 ("a b", '["a" "b"]'), 
43 ("a b c", '["a" "b" "c"]'), 
44 ("aaa bbb ccc", '["aaa" "bbb" "ccc"]'), 
45 ("title:aaa title:bbb title:ccc", '[title:"aaa" title:"bbb" title:"ccc"]'), 
46 ("title:case:aaa title:re:bbb title:re:case:ccc", '[title:case:"aaa" title:re:"bbb" title:re:case:"ccc"]'), 
47 ("linkto:aaa", '[linkto:"aaa"]'), 
48 ("category:aaa", '[category:"aaa"]'), 
49 ("domain:aaa", '[domain:"aaa"]'), 
50 ("re:case:title:aaa", '[title:re:case:"aaa"]'), 
51 ("(aaa or bbb) and (ccc or ddd)", '[[[["aaa"] or ["bbb"]]] [[["ccc"] or ["ddd"]]]]'), 
52 ("(aaa or bbb) (ccc or ddd)", '[[[["aaa"] or ["bbb"]]] [[["ccc"] or ["ddd"]]]]'), 
53 ("aaa or bbb", '[[["aaa"] or ["bbb"]]]'), 
54 ("aaa or bbb or ccc", '[[["aaa"] or [[["bbb"] or ["ccc"]]]]]'), 
55 ("aaa or bbb and ccc", '[[["aaa"] or ["bbb" "ccc"]]]'), 
56 ("aaa and bbb or ccc", '[[["aaa" "bbb"] or ["ccc"]]]'), 
57 ("aaa and bbb and ccc", '["aaa" "bbb" "ccc"]'), 
58 ("aaa or bbb and ccc or ddd", '[[["aaa"] or [[["bbb" "ccc"] or ["ddd"]]]]]'), 
59 ("aaa or bbb ccc or ddd", '[[["aaa"] or [[["bbb" "ccc"] or ["ddd"]]]]]'), 
60 ("(HelpOn) (Administration)", '[["HelpOn"] ["Administration"]]'), 
61 ("(HelpOn) (Administration)", '[["HelpOn"] ["Administration"]]'), 
62 ("(HelpOn) and (Administration)", '[["HelpOn"] ["Administration"]]'), 
63 ("(HelpOn) and (Administration) or (Configuration)", '[[[["HelpOn"] ["Administration"]] or [["Configuration"]]]]'), 
64 ("(a) and (b) or (c) or d", '[[[["a"] ["b"]] or [[[["c"]] or ["d"]]]]]'), 
65 ("a b c d e or f g h", '[[["a" "b" "c" "d" "e"] or ["f" "g" "h"]]]'), 
66 ('"no', '[""no"]'), 
67 ('no"', '["no""]'), 
68 ("'no", "[\"'no\"]"), 
69 ("no'", "[\"no'\"]"), 
70 ('"no\'', '[""no\'"]')]: 
71 result = parser.parse_query(query) 
72 assert str(result) == wanted 
73 
74 def testQueryParserExceptions(self): 
75 """ search: test the query parser """ 
76 parser = QueryParser() 
77 
78 def _test(q): 
79 py.test.raises(QueryError, parser.parse_query, q) 
80 
81 for query in ['""', '(', ')', '(a or b']: 
82 yield _test, query 
83 
84 
85 class BaseSearchTest(object): 
86 """ search: test search """ 
87 doesnotexist = u'jfhsdaASDLASKDJ' 
88 
89 # key  page name, value  page content. If value is None page 
90 # will not be created but will be used for a search. None should 
91 # be used for pages which already exist. 
92 pages = {u'SearchTestPage': u'this is a test page', 
93 u'SearchTestLinks': u'SearchTestPage', 
94 u'SearchTestLinksLowerCase': u'searchtestpage', 
95 u'SearchTestOtherLinks': u'SearchTestLinks', 
96 u'TestEdit': u'TestEdit', 
97 u'TestOnEditing': u'another test page', 
98 u'ContentSearchUpper': u'Find the NEEDLE in the haystack.', 
99 u'ContentSearchLower': u'Find the needle in the haystack.', 
100 u'LanguageSetup': None, 
101 u'CategoryHomepage': None, 
102 u'HomePageWiki': None, 
103 u'FrontPage': None, 
104 u'RecentChanges': None, 
105 u'HelpOnCreoleSyntax': None, 
106 u'HelpIndex': None, 
107 } 
108 
109 searcher_class = None 
110 
111 def _index_update(self): 
112 pass 
113 
114 @classmethod 
115 def setup_class(cls): 
116 request = cls.request 
117 become_trusted(request) 
118 
119 for page, text in cls.pages.iteritems(): 
120 if text: 
121 create_page(request, page, text) 
122 
123 def teardown_class(self): 
124 for page, text in self.pages.iteritems(): 
125 if text: 
126 nuke_page(self.request, page) 
127 
128 def get_searcher(self, query): 
129 raise NotImplementedError 
130 
131 def search(self, query): 
132 if isinstance(query, str) or isinstance(query, unicode): 
133 query = QueryParser().parse_query(query) 
134 
135 return self.get_searcher(query).run() 
136 
137 def test_title_search_simple(self): 
138 searches = {u'title:SearchTestPage': 1, 
139 u'title:LanguageSetup': 1, 
140 u'title:HelpIndex': 1, 
141 u'title:Help': 2, 
142 u'title:TestOn': 1, 
143 u'title:SearchTestNotExisting': 0, 
144 u'title:FrontPage': 1, 
145 u'title:TestOnEditing': 1, 
146 } 
147 
148 def test(query, res_count): 
149 result = self.search(query) 
150 test_result = len(result.hits) 
151 assert test_result == res_count 
152 
153 for query, res_count in searches.iteritems(): 
154 yield query, test, query, res_count 
155 
156 def test_title_search_re(self): 
157 expected_pages = set([u'SearchTestPage', u'SearchTestLinks', u'SearchTestLinksLowerCase', u'SearchTestOtherLinks', ]) 
158 result = self.search(ur'domain:underlay domain:system title:re:\bSearchTest') 
159 found_pages = set([hit.page_name for hit in result.hits]) 
160 assert found_pages == expected_pages 
161 
162 result = self.search(ur'domain:underlay domain:system title:re:\bSearchTest\b') 
163 found_pages = set([hit.page_name for hit in result.hits]) 
164 assert not found_pages 
165 
166 def test_title_search_case(self): 
167 expected_pages = set([u'SearchTestPage', ]) 
168 result = self.search(u'domain:underlay domain:system title:case:SearchTestPage') 
169 found_pages = set([hit.page_name for hit in result.hits]) 
170 assert found_pages == expected_pages 
171 
172 result = self.search(u'domain:underlay domain:system title:case:searchtestpage') 
173 found_pages = set([hit.page_name for hit in result.hits]) 
174 assert not found_pages 
175 
176 def test_title_search_case_re(self): 
177 expected_pages = set([u'SearchTestPage', ]) 
178 result = self.search(ur'domain:underlay domain:system title:case:re:\bSearchTestPage\b') 
179 found_pages = set([hit.page_name for hit in result.hits]) 
180 assert found_pages == expected_pages 
181 
182 result = self.search(ur'domain:underlay domain:system title:case:re:\bsearchtestpage\b') 
183 found_pages = set([hit.page_name for hit in result.hits]) 
184 assert not found_pages 
185 
186 def test_linkto_search_simple(self): 
187 expected_pages = set([u'SearchTestLinks', ]) 
188 result = self.search(u'domain:underlay domain:system linkto:SearchTestPage') 
189 found_pages = set([hit.page_name for hit in result.hits]) 
190 assert found_pages == expected_pages 
191 
192 result = self.search(u'domain:underlay domain:system linkto:SearchTestNotExisting') 
193 found_pages = set([hit.page_name for hit in result.hits]) 
194 assert not found_pages 
195 
196 def test_linkto_search_re(self): 
197 expected_pages = set([u'SearchTestLinks', u'SearchTestOtherLinks', ]) 
198 result = self.search(ur'domain:underlay domain:system linkto:re:\bSearchTest') 
199 found_pages = set([hit.page_name for hit in result.hits]) 
200 assert found_pages == expected_pages 
201 
202 result = self.search(ur'domain:underlay domain:system linkto:re:\bSearchTest\b') 
203 found_pages = set([hit.page_name for hit in result.hits]) 
204 assert not found_pages 
205 
206 def test_linkto_search_case(self): 
207 expected_pages = set([u'SearchTestLinks', ]) 
208 result = self.search(u'domain:underlay domain:system linkto:case:SearchTestPage') 
209 found_pages = set([hit.page_name for hit in result.hits]) 
210 assert found_pages == expected_pages 
211 
212 result = self.search(u'domain:underlay domain:system linkto:case:searchtestpage') 
213 found_pages = set([hit.page_name for hit in result.hits]) 
214 assert not found_pages 
215 
216 def test_linkto_search_case_re(self): 
217 expected_pages = set([u'SearchTestLinks', ]) 
218 result = self.search(ur'domain:underlay domain:system linkto:case:re:\bSearchTestPage\b') 
219 found_pages = set([hit.page_name for hit in result.hits]) 
220 assert found_pages == expected_pages 
221 
222 result = self.search(ur'domain:underlay domain:system linkto:case:re:\bsearchtestpage\b') 
223 found_pages = set([hit.page_name for hit in result.hits]) 
224 assert not found_pages 
225 
226 def test_category_search_simple(self): 
227 expected_pages = set([u'HomePageWiki', ]) 
228 result = self.search(u'category:CategoryHomepage') 
229 found_pages = set([hit.page_name for hit in result.hits]) 
230 assert found_pages == expected_pages 
231 
232 result = self.search(u'category:CategorySearchTestNotExisting') 
233 found_pages = set([hit.page_name for hit in result.hits]) 
234 assert not found_pages 
235 
236 def test_category_search_re(self): 
237 expected_pages = set([u'HomePageWiki', ]) 
238 result = self.search(ur'category:re:\bCategoryHomepage\b') 
239 found_pages = set([hit.page_name for hit in result.hits]) 
240 assert found_pages == expected_pages 
241 
242 result = self.search(ur'category:re:\bCategoryHomepa\b') 
243 found_pages = set([hit.page_name for hit in result.hits]) 
244 assert not found_pages 
245 
246 def test_category_search_case(self): 
247 expected_pages = set([u'HomePageWiki', ]) 
248 result = self.search(u'category:case:CategoryHomepage') 
249 found_pages = set([hit.page_name for hit in result.hits]) 
250 assert found_pages == expected_pages 
251 
252 result = self.search(u'category:case:categoryhomepage') 
253 found_pages = set([hit.page_name for hit in result.hits]) 
254 assert not found_pages 
255 
256 def test_category_search_case_re(self): 
257 expected_pages = set([u'HomePageWiki', ]) 
258 result = self.search(ur'category:case:re:\bCategoryHomepage\b') 
259 found_pages = set([hit.page_name for hit in result.hits]) 
260 assert found_pages == expected_pages 
261 
262 result = self.search(ur'category:case:re:\bcategoryhomepage\b') 
263 found_pages = set([hit.page_name for hit in result.hits]) 
264 assert not found_pages 
265 
266 def test_mimetype_search_simple(self): 
267 result = self.search(u'mimetype:text/wiki') 
268 test_result = len(result.hits) 
269 assert test_result == 14 
270 
271 def test_mimetype_search_re(self): 
272 result = self.search(ur'mimetype:re:\btext/wiki\b') 
273 test_result = len(result.hits) 
274 assert test_result == 14 
275 
276 result = self.search(ur'category:re:\bCategoryHomepa\b') 
277 found_pages = set([hit.page_name for hit in result.hits]) 
278 assert not found_pages 
279 
280 def test_language_search_simple(self): 
281 result = self.search(u'language:en') 
282 test_result = len(result.hits) 
283 assert test_result == 14 
284 
285 def test_domain_search_simple(self): 
286 result = self.search(u'domain:system') 
287 assert result.hits 
288 
289 def test_search_and(self): 
290 """ search: title search with AND expression """ 
291 expected_pages = set([u'HelpOnCreoleSyntax', ]) 
292 result = self.search(u"title:HelpOnCreoleSyntax lang:en") 
293 found_pages = set([hit.page_name for hit in result.hits]) 
294 assert found_pages == expected_pages 
295 
296 result = self.search(u"title:HelpOnCreoleSyntax lang:de") 
297 found_pages = set([hit.page_name for hit in result.hits]) 
298 assert not found_pages 
299 
300 result = self.search(u"title:Help title:%s" % self.doesnotexist) 
301 found_pages = set([hit.page_name for hit in result.hits]) 
302 assert not found_pages 
303 
304 def testTitleSearchOR(self): 
305 """ search: title search with OR expression """ 
306 expected_pages = set([u'FrontPage', u'RecentChanges', ]) 
307 result = self.search(u"title:FrontPage or title:RecentChanges") 
308 found_pages = set([hit.page_name for hit in result.hits]) 
309 assert found_pages == expected_pages 
310 
311 def testTitleSearchNegatedFindAll(self): 
312 """ search: negated title search for some pagename that does not exist results in all pagenames """ 
313 result = self.search(u"title:%s" % self.doesnotexist) 
314 n_pages = len(self.pages) 
315 test_result = len(result.hits) 
316 assert test_result == n_pages 
317 
318 def testTitleSearchNegativeTerm(self): 
319 """ search: title search for a AND expression with a negative term """ 
320 result = self.search(u"title:FrontPage") 
321 found_pages = set([hit.page_name for hit in result.hits]) 
322 assert u'FrontPage' not in found_pages 
323 test_result = len(result.hits) 
324 n_pages = len(self.pages)  1 
325 assert test_result == n_pages 
326 
327 result = self.search(u"title:HelpOn") 
328 test_result = len(result.hits) 
329 n_pages = len(self.pages)  1 
330 assert test_result == n_pages 
331 
332 def testFullSearchNegatedFindAll(self): 
333 """ search: negated full search for some string that does not exist results in all pages """ 
334 result = self.search(u"%s" % self.doesnotexist) 
335 test_result = len(result.hits) 
336 n_pages = len(self.pages) 
337 assert test_result == n_pages 
338 
339 def testFullSearchRegexCaseInsensitive(self): 
340 """ search: full search for regular expression (case insensitive) """ 
341 search_re = 'ne{2}dle' # matches 'NEEDLE' or 'needle' or ... 
342 expected_pages = set(['ContentSearchUpper', 'ContentSearchLower', ]) 
343 result = self.search(u'domain:underlay domain:system re:%s' % search_re) 
344 found_pages = set([hit.page_name for hit in result.hits]) 
345 assert found_pages == expected_pages 
346 
347 def testFullSearchRegexCaseSensitive(self): 
348 """ search: full search for regular expression (case sensitive) """ 
349 search_re = 'ne{2}dle' # matches 'needle' 
350 expected_pages = set(['ContentSearchLower', ]) 
351 result = self.search(u'domain:underlay domain:system re:case:%s' % search_re) 
352 found_pages = set([hit.page_name for hit in result.hits]) 
353 assert found_pages == expected_pages 
354 
355 def test_title_search(self): 
356 expected_pages = set(['FrontPage', ]) 
357 query = QueryParser(titlesearch=True).parse_query('FrontPage') 
358 result = self.search(query) 
359 found_pages = set([hit.page_name for hit in result.hits]) 
360 assert found_pages == expected_pages 
361 
362 def test_create_page(self): 
363 expected_pages = set([u'TestCreatePage', ]) 
364 self.pages['TestCreatePage'] = 'some text' # Moin search must search this page 
365 try: 
366 create_page(self.request, 'TestCreatePage', self.pages['TestCreatePage']) 
367 self._index_update() 
368 result = self.search(u'domain:underlay domain:system TestCreatePage') 
369 found_pages = set([hit.page_name for hit in result.hits]) 
370 assert found_pages == expected_pages 
371 finally: 
372 nuke_page(self.request, 'TestCreatePage') 
373 self._index_update() 
374 del self.pages['TestCreatePage'] 
375 result = self.search(u'domain:underlay domain:system TestCreatePage') 
376 found_pages = set([hit.page_name for hit in result.hits]) 
377 assert not found_pages 
378 
379 def test_attachment(self): 
380 page_name = u'TestAttachment' 
381 self.pages[page_name] = 'some text' # Moin search must search this page 
382 
383 filename = "AutoCreatedSillyAttachmentForSearching.png" 
384 data = "Test content" 
385 filecontent = StringIO.StringIO(data) 
386 
387 result = self.search(filename) 
388 found_attachments = set([(hit.page_name, hit.attachment) for hit in result.hits]) 
389 assert not found_attachments 
390 
391 try: 
392 create_page(self.request, page_name, self.pages[page_name]) 
393 AttachFile.add_attachment(self.request, page_name, filename, filecontent, True) 
394 append_page(self.request, page_name, '[[attachment:%s]]' % filename) 
395 self._index_update() 
396 result = self.search(filename) 
397 found_attachments = set([(hit.page_name, hit.attachment) for hit in result.hits]) 
398 assert (page_name, '') in found_attachments 
399 assert 1 <= len(found_attachments) <= 2 
400 # Note: moin search returns (page_name, '') as only result 
401 # xapian search returns 2 results: (page_name, '') and (page_name, filename) 
402 # TODO: make behaviour the same, if possible 
403 finally: 
404 nuke_page(self.request, page_name) 
405 del self.pages[page_name] 
406 self._index_update() 
407 result = self.search(filename) 
408 found_attachments = set([(hit.page_name, hit.attachment) for hit in result.hits]) 
409 assert not found_attachments 
410 
411 def test_get_searcher(self): 
412 assert isinstance(_get_searcher(self.request, ''), self.searcher_class) 
413 
414 
415 class TestMoinSearch(BaseSearchTest): 
416 """ search: test Moin search """ 
417 searcher_class = MoinSearch 
418 
419 def get_searcher(self, query): 
420 pages = [{'pagename': page, 'attachment': '', 'wikiname': 'Self', } for page in self.pages] 
421 return MoinSearch(self.request, query, pages=pages) 
422 
423 def test_stemming(self): 
424 expected_pages = set([u'TestEdit', u'TestOnEditing', ]) 
425 result = self.search(u"title:edit") 
426 found_pages = set([hit.page_name for hit in result.hits]) 
427 assert found_pages == expected_pages 
428 
429 expected_pages = set([u'TestOnEditing', ]) 
430 result = self.search(u"title:editing") 
431 found_pages = set([hit.page_name for hit in result.hits]) 
432 assert found_pages == expected_pages 
433 
434 
435 class TestXapianSearch(BaseSearchTest): 
436 """ search: test Xapian indexing / search """ 
437 
438 class Config(wikiconfig.Config): 
439 xapian_search = True 
440 
441 def _index_update(self): 
442 # for xapian, we queue index updates so they can get indexed later. 
443 # here we make sure the queue will be processed completely, 
444 # before we continue: 
445 from MoinMoin.search.Xapian import XapianIndex 
446 XapianIndex(self.request).do_queued_updates() 
447 
448 def get_searcher(self, query): 
449 from MoinMoin.search.Xapian.search import XapianSearch 
450 return XapianSearch(self.request, query) 
451 
452 def get_moin_search_connection(self): 
453 from MoinMoin.search.Xapian import XapianIndex 
454 return XapianIndex(self.request).get_search_connection() 
455 
456 def setup_class(self): 
457 try: 
458 from MoinMoin.search.Xapian import XapianIndex 
459 from MoinMoin.search.Xapian.search import XapianSearch 
460 self.searcher_class = XapianSearch 
461 
462 except ImportError, error: 
463 if not str(error).startswith('Xapian '): 
464 raise 
465 py.test.skip('xapian is not installed') 
466 
467 nuke_xapian_index(self.request) 
468 index = XapianIndex(self.request) 
469 # Additionally, pages which were not created but supposed to be searched 
470 # are indexed. 
471 pages_to_index = [page for page in self.pages if not self.pages[page]] 
472 index.indexPages(mode='add', pages=pages_to_index) 
473 
474 super(TestXapianSearch, self).setup_class() 
475 
476 def teardown_class(self): 
477 nuke_xapian_index(self.request) 
478 
479 def test_get_all_documents(self): 
480 connection = self.get_moin_search_connection() 
481 documents = connection.get_all_documents() 
482 n_pages = len(self.pages) 
483 test_result = len(documents) 
484 assert test_result == n_pages 
485 for document in documents: 
486 assert document.data['pagename'][0] in self.pages.keys() 
487 
488 def test_xapian_term(self): 
489 parser = QueryParser() 
490 connection = self.get_moin_search_connection() 
491 
492 prefixes = {u'': ([u'', u're:', u'case:', u'case:re:'], u'SearchTestPage'), 
493 u'title:': ([u'', u're:', u'case:', u'case:re:'], u'SearchTestPage'), 
494 u'linkto:': ([u'', u're:', u'case:', u'case:re:'], u'FrontPage'), 
495 u'category:': ([u'', u're:', u'case:', u'case:re:'], u'CategoryHomepage'), 
496 u'mimetype:': ([u'', u're:'], u'text/wiki'), 
497 u'language:': ([u''], u'en'), 
498 u'domain:': ([u''], u'system'), 
499 } 
500 
501 def test_query(query): 
502 query_ = parser.parse_query(query).xapian_term(self.request, connection) 
503 print str(query_) 
504 assert not query_.empty() 
505 
506 for prefix, data in prefixes.iteritems(): 
507 modifiers, term = data 
508 for modifier in modifiers: 
509 query = ''.join([prefix, modifier, term]) 
510 yield query, test_query, query 
511 
512 def test_stemming(self): 
513 expected_pages = set([u'TestEdit', ]) 
514 result = self.search(u"title:edit") 
515 found_pages = set([hit.page_name for hit in result.hits]) 
516 assert found_pages == expected_pages 
517 
518 expected_pages = set([u'TestOnEditing', ]) 
519 result = self.search(u"title:editing") 
520 found_pages = set([hit.page_name for hit in result.hits]) 
521 assert found_pages == expected_pages 
522 
523 
524 class TestXapianSearchStemmed(TestXapianSearch): 
525 """ search: test Xapian indexing / search  with stemming enabled """ 
526 
527 class Config(wikiconfig.Config): 
528 xapian_search = True 
529 xapian_stemming = True 
530 
531 def test_stemming(self): 
532 py.test.skip("TODO fix TestXapianSearchStemmed  strange effects with stemming") 
533 
534 expected_pages = set([u'TestEdit', u'TestOnEditing', ]) 
535 result = self.search(u"title:edit") 
536 found_pages = set([hit.page_name for hit in result.hits]) 
537 assert found_pages == expected_pages 
538 
539 expected_pages = set([u'TestEdit', u'TestOnEditing', ]) 
540 result = self.search(u"title:editing") 
541 found_pages = set([hit.page_name for hit in result.hits]) 
542 assert found_pages == expected_pages 
543 
544 
545 class TestGetSearcher(object): 
546 
547 class Config(wikiconfig.Config): 
548 xapian_search = True 
549 
550 def test_get_searcher(self): 
551 assert isinstance(_get_searcher(self.request, ''), MoinSearch), 'Xapian index is not created, despite the configuration, MoinSearch must be used!' 
552 
553 coverage_modules = ['MoinMoin.search'] 
554 