changeset 649:4b31e0f1bcc8

macro: PdfList based on pyPdf.PdfFileReader, reads Author and Title from each attached PDF file and outputs a table for easy access
author Reimar Bauer <rb.proj AT googlemail DOT com>
date Fri, 02 Oct 2015 08:44:15 +0200
parents 2f092fcc3999
children cd3309f76490
files data/plugin/macro/PdfList.py
diffstat 1 files changed, 151 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data/plugin/macro/PdfList.py	Fri Oct 02 08:44:15 2015 +0200
@@ -0,0 +1,151 @@
+"""
+    MoinMoin - PdfList Macro
+
+    A macro to produce a list of attached pdf by reading meta data
+
+    Usage: <<PdfList([pagename])>>
+
+    If pagename isn't set, the current pagename is used.
+
+
+    @copyright: 2004 Jacob Cohen, Nigel Metheringham,
+                2006-2015 MoinMoin:ReimarBauer
+    @license: GNU GPL, see COPYING for details.
+"""
+
+import os
+import re
+import mimetypes
+from MoinMoin.action.AttachFile import getAttachDir, getAttachUrl, _get_files
+from MoinMoin import wikiutil
+from MoinMoin import config
+
+# ToDo change to pyPDF2
+try:
+    from pyPdf import PdfFileReader
+except ImportError:
+    try:
+        from MoinMoin.support.pyPdf import PdfFileReader
+    except ImportError:
+        PdfFileReader = None
+
+def _build_filelist(request, pagename, showheader, readonly, mime_type='application/pdf', filterfn=None):
+
+    _ = request.getText
+
+    fmt = request.html_formatter
+
+    # access directory
+    attach_dir = getAttachDir(request, pagename)
+    files = _get_files(request, pagename)
+
+    if mime_type != '*':
+        files = [fname for fname in files if mime_type == mimetypes.guess_type(fname)[0]]
+    if filterfn is not None:
+        files = [fname for fname in files if filterfn(fname)]
+
+    html = []
+    if files:
+        if showheader:
+            html.append(fmt.rawHTML(_(
+                "To refer to attachments on a page, use '''{{{attachment:filename}}}''', \n"
+                "as shown below in the list of files. \n"
+                "Do '''NOT''' use the URL of the {{{[get]}}} link, \n"
+                "since this is subject to change and can break easily.",
+                wiki=True
+            )))
+
+        label_get = _("get")
+        label_edit = _("edit")
+        label_view = _("view")
+        may_write = request.user.may.write(pagename)
+
+
+        html.append(u"""\
+<script>
+function checkAll(bx, targets_name) {
+  var cbs = document.getElementsByTagName('input');
+  for(var i=0; i < cbs.length; i++) {
+    if(cbs[i].type == 'checkbox' && cbs[i].name == targets_name) {
+      cbs[i].checked = bx.checked;
+    }
+  }
+}
+</script>
+<form method="POST">
+<input type="hidden" name="action" value="AttachFile">
+<input type="hidden" name="do" value="multifile">
+""")
+
+
+        # ToDo use to DataBrowserWidget
+        html.append('<table>')
+        html.append('<tr><td><b>Author</b></td><td><b>Title</b></td><td></td><td></td></tr>')
+        for file in files:
+            html.append('<tr>')
+            html.append('<td>')
+
+            fullpath = os.path.join(attach_dir, file).encode(config.charset)
+            pdf = PdfFileReader(open(fullpath, 'rb'))
+            pdf_meta = pdf.documentInfo
+            xmp_meta = pdf.xmpMetadata
+            try:
+                pdf_meta = {"/Title": wikiutil.escape(xmp_meta.dc_title['x-default']),
+                            "/Author": wikiutil.escape(pdf_meta['/Author'])}
+            except AttributeError:
+                pass
+
+
+            links = []
+
+            links.append(fmt.url(1, getAttachUrl(pagename, file, request)) +
+                         fmt.text(label_get) +
+                         fmt.url(0))
+
+            links.append(fmt.url(1, getAttachUrl(pagename, file, request, do='view')) +
+                         fmt.text(label_view) +
+                         fmt.url(0))
+
+            if may_write and not readonly:
+                edit_url = getAttachUrl(pagename, file, request, do='modify')
+                if edit_url:
+                    links.append(fmt.url(1, edit_url) +
+                                 fmt.text(label_edit) +
+                                 fmt.url(0))
+
+
+            try:
+                html.append("%(/Author)s</td><td>%(/Title)s</td><td>" % {"/Title": wikiutil.escape(pdf_meta['/Title']),
+                                                                         "/Author": wikiutil.escape(pdf_meta['/Author'])})
+            except KeyError:
+                html.append('</td><td>%(filename)s</td><td>' % {"filename": file})
+
+            html.append("%s" % "&nbsp;</td><td> ".join(links))
+            html.append('</td>')
+            html.append('</tr>')
+
+
+        html.append('</table>')
+        html.append("</form>")
+        html.append("...based on PDF Meta information")
+
+    else:
+        if showheader:
+            html.append(fmt.paragraph(1))
+            html.append(fmt.text(_("No attachments stored for %(pagename)s") % {
+                                   'pagename': pagename}))
+            html.append(fmt.paragraph(0))
+
+    html.append(fmt.table(0))
+    return ''.join(html)
+
+def macro_PdfList(macro, pagename=None, mime_type=u'*', search_term=u'.+'):
+    # defaults if we don't get anything better
+    if PdfFileReader is None:
+        return u"please install pyPdf"
+    if not pagename:
+        pagename = macro.formatter.page.page_name
+    filterfn = re.compile(search_term, re.U).search
+    return _build_filelist(macro.request, pagename, 0, 1, mime_type='application/pdf', filterfn=filterfn)
+
+