changeset 792:3847b31a690f

wikiutil.mimetype2modulename, xapian indexes mimetype, xapwrap uses prefixMap on indexing (not only for queryparser)
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Mon, 01 May 2006 19:26:01 +0200
parents 50f4d23d2e37
children a465544cff9a
files MoinMoin/Xapian.py MoinMoin/support/xapwrap/document.py MoinMoin/support/xapwrap/index.py MoinMoin/wikiaction.py MoinMoin/wikiutil.py
diffstat 5 files changed, 29 insertions(+), 21 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/Xapian.py	Mon May 01 17:41:05 2006 +0200
+++ b/MoinMoin/Xapian.py	Mon May 01 19:26:01 2006 +0200
@@ -383,11 +383,8 @@
 
     def contentfilter(self, filename):
         """ Get a filter for content of filename and return unicode content. """
-        
-        def mt2mn(mt): # mimetype to modulename
-            return mt.replace("/", "_").replace("-","_").replace(".", "_")
-
         request = self.request
+        mt2mn = wikiutil.mimetype2modulename
         mimetype, encoding = wikiutil.guess_type(filename)
         if mimetype is None:
             mimetype = 'application/octet-stream'
@@ -410,7 +407,7 @@
         except (OSError, IOError), err:
             data = ''
             request.log("Filter %s threw error '%s' for file %s" % (_filter, str(err), filename))
-        return data
+        return mimetype, data
    
     def test(self, request):
         idx = xapidx.ReadOnlyIndex(self.dir)
@@ -446,14 +443,15 @@
                 updated = True
             if debug: request.log("%s %r" % (filename, updated))
             if updated:
-                file_content = self.contentfilter(filename)
+                mimetype, file_content = self.contentfilter(filename)
                 pname = xapdoc.SortKey('pagename', fs_rootpage)
                 attachment = xapdoc.SortKey('attachment', filename) # XXX we should treat files like real pages, not attachments
                 mtime = xapdoc.SortKey('mtime', mtime)
                 title = " ".join(os.path.join(fs_rootpage, filename).split("/"))
                 title = xapdoc.Keyword('title', title)
+                mimetype = xapdoc.TextField('mimetype', mimetype, True)
                 content = xapdoc.TextField('content', file_content)
-                doc = xapdoc.Document(textFields=(content,),
+                doc = xapdoc.Document(textFields=(content, mimetype, ),
                                       keywords=(title, ),
                                       sortFields=(pname, attachment, mtime,),
                                      )
@@ -550,9 +548,10 @@
                 attachment = xapdoc.SortKey('attachment', att) # this is an attachment, store its filename
                 mtime = xapdoc.SortKey('mtime', mtime)
                 title = xapdoc.Keyword('title', '%s/%s' % (pagename, att))
-                att_content = self.contentfilter(filename)
+                mimetype, att_content = self.contentfilter(filename)
+                mimetype = xapdoc.TextField('mimetype', mimetype, True)
                 content = xapdoc.TextField('content', att_content)
-                doc = xapdoc.Document(textFields=(content,),
+                doc = xapdoc.Document(textFields=(content, mimetype, ),
                                       keywords=(title, ),
                                       sortFields=(pname, attachment, mtime,),
                                      )
--- a/MoinMoin/support/xapwrap/document.py	Mon May 01 17:41:05 2006 +0200
+++ b/MoinMoin/support/xapwrap/document.py	Mon May 01 19:26:01 2006 +0200
@@ -142,7 +142,7 @@
 
         return length
 
-    def toXapianDocument(self, indexValueMap):
+    def toXapianDocument(self, indexValueMap, prefixMap=None):
         d = xapian.Document()
         position = 1
         analyzer = self.analyzerFactory()
@@ -165,14 +165,14 @@
                     # XXX FIXME: slight loss of efficiency here: token is
                     # already known to be in UTF-8 and we convert it
                     # back to unicode and then back to UTF-8 again...
-                    term = makePairForWrite(prefix, token)
+                    term = makePairForWrite(prefix, token, prefixMap)
                     d.add_posting(term, position)
                     position += 1
                 position += INTER_FIELD_POSITION_GAP
 
         # add keyword fields
         for field in self.keywords:
-            term = makePairForWrite(field.name, field.value)
+            term = makePairForWrite(field.name, field.value, prefixMap)
             d.add_term(term)
 
         # add non positional terms
@@ -281,7 +281,7 @@
         return originalText.encode(UNICODE_ENCODING, UNICODE_ERROR_POLICY)
 
 
-def makePairForWrite(prefix, token):
+def makePairForWrite(prefix, token, prefixMap=None):
     # prefixes must be uppercase; if the prefix given to us is a str
     # that happens to be UTF-8 encoded, bad things will happen when we
     # uppercase it, so we convert everything to unicode first
@@ -290,7 +290,12 @@
     if isinstance(token, str):
         token = token.decode(UNICODE_ENCODING, UNICODE_ERROR_POLICY) # XXX hardcoded UTF-8, make param
 
-    result = prefix.upper() + token
+    if prefixMap is None:
+        prefix = prefix.upper()
+    else: # we have a map, so first translate it using the map (e.g. 'title' -> 'S')
+        prefix = prefixMap.get(prefix, prefix.upper())
+
+    result = prefix + token
     # since return value is going into the db, it must be encoded as UTF-8
     result = result.encode(UNICODE_ENCODING, UNICODE_ERROR_POLICY)
     return checkKeyLen(result)
--- a/MoinMoin/support/xapwrap/index.py	Mon May 01 17:41:05 2006 +0200
+++ b/MoinMoin/support/xapwrap/index.py	Mon May 01 19:26:01 2006 +0200
@@ -809,7 +809,7 @@
             uid = int(doc.uid)
             doc.sortFields.append(SortKey('uid', uid))
             doc.keywords.append(Keyword('uid', str(uid)))
-            xapDoc = doc.toXapianDocument(self.indexValueMap)
+            xapDoc = doc.toXapianDocument(self.indexValueMap, self.prefixMap)
             self.replace_document(uid, xapDoc)
         else:
             # We need to know the uid of the doc we're going to add
@@ -823,12 +823,12 @@
             uid = self.get_lastdocid() + 1
             doc.sortFields.append(SortKey('uid', uid))
             doc.keywords.append(Keyword('uid', str(uid)))
-            xapDoc = doc.toXapianDocument(self.indexValueMap)
+            xapDoc = doc.toXapianDocument(self.indexValueMap, self.prefixMap)
             newUID = self.add_document(xapDoc)
             if newUID != uid:
                 doc.sortFields.append(SortKey('uid', newUID))
                 doc.keywords.append(Keyword('uid', str(newUID)))
-                xapDoc = doc.toXapianDocument(self.indexValueMap)
+                xapDoc = doc.toXapianDocument(self.indexValueMap, self.prefixMap)
                 self.replace_document(newUID, xapDoc)
 
             # a simpler alternative would be to add an empty document
--- a/MoinMoin/wikiaction.py	Mon May 01 17:41:05 2006 +0200
+++ b/MoinMoin/wikiaction.py	Mon May 01 19:26:01 2006 +0200
@@ -806,14 +806,14 @@
         mimetype = u"text/plain"
 
     # try to load the formatter
-    formatterName = mimetype.translate({ord(u'/'): u'_', ord(u'.'): u'_'})
+    formatterName = wikiutil.mimetype2modulename(mimetype)
     try:
-        Formatter = wikiutil.importPlugin(request.cfg, "formatter",
-                                          formatterName, "Formatter")
+        Formatter = wikiutil.importPlugin(request.cfg, "formatter", formatterName, "Formatter")
     except wikiutil.PluginMissingError:
         # default to plain text formatter
         mimetype = "text/plain"
-        from MoinMoin.formatter.text_plain import Formatter
+        formatterName = wikiutil.mimetype2modulename(mimetype)
+        Formatter = wikiutil.importPlugin(request.cfg, "formatter", formatterName, "Formatter")
 
     if "xml" in mimetype:
         mimetype = "text/xml"
--- a/MoinMoin/wikiutil.py	Mon May 01 17:41:05 2006 +0200
+++ b/MoinMoin/wikiutil.py	Mon May 01 19:26:01 2006 +0200
@@ -48,6 +48,10 @@
 }
 [mimetypes.add_type(mimetype, ext, True) for ext, mimetype in _our_types.items()]
 
+def mimetype2modulename(mimetype):
+    """ convert an original mimetype to a string usable as python module name """
+    return mimetype.replace("/", "_").replace("-","_").replace(".", "_")
+
 #############################################################################
 ### Getting data from user/Sending data to user
 #############################################################################