changeset 3700:9426f34f07b9

AttachFile: refactored unzip subaction code: * simplified code * fix total size/count prediction (considering overwrite mode) * fix common zip member file path prefix removal * remove specialcasing of zip in zip (we happily unpack those files as long they are in the given limits) * if a zip exceeds some limit or is invalid because it contains stuff in different directories, completely reject it (do not partially unpack it) * 'overwrite parameter' (no UI yet, though) * better error msgs * usage of add_attachment() to avoid code duplication
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Wed, 11 Jun 2008 12:52:22 +0200
parents 7b4a32481d1e
children 882fbf5a3ba8
files MoinMoin/action/AttachFile.py
diffstat 1 files changed, 76 insertions(+), 94 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/action/AttachFile.py	Wed Jun 11 09:51:10 2008 +0200
+++ b/MoinMoin/action/AttachFile.py	Wed Jun 11 12:52:22 2008 +0200
@@ -376,28 +376,6 @@
     return _build_filelist(request, pagename, 1, 0)
 
 
-def _subdir_exception(zf):
-    """
-    Checks for the existance of one common subdirectory shared among
-    all files in the zip file. If this is the case, returns a dict of
-    original names to modified names so that such files can be unpacked
-    as the user would expect.
-    """
-
-    b = zf.namelist()
-    if not '/' in b[0]:
-        return False # no directory
-    slashoffset = b[0].index('/')
-    directory = b[0][:slashoffset]
-    for origname in b:
-        if origname.rfind('/') != slashoffset or origname[:slashoffset] != directory:
-            return False # multiple directories or different directory
-    names = {}
-    for origname in b:
-        names[origname] = origname[slashoffset+1:]
-    return names # returns dict of {origname: safename}
-
-
 def error_msg(pagename, request, msg):
     request.theme.add_msg(msg, "error")
     Page(request, pagename).send_page()
@@ -861,88 +839,92 @@
     upload_form(pagename, request, msg=msg)
 
 
-def _do_unzip(pagename, request):
+def _do_unzip(pagename, request, overwrite=False):
     _ = request.getText
-    valid_pathname = lambda name: ('/' not in name) and ('\\' not in name)
+    pagename, filename, fpath = _access_file(pagename, request)
 
-    pagename, filename, fpath = _access_file(pagename, request)
     if not (request.user.may.delete(pagename) and request.user.may.read(pagename) and request.user.may.write(pagename)):
         return _('You are not allowed to unzip attachments of this page.')
+
     if not filename:
         return # error msg already sent in _access_file
 
-    single_file_size = request.cfg.unzip_single_file_size
-    attachments_file_space = request.cfg.unzip_attachments_space
-    attachments_file_count = request.cfg.unzip_attachments_count
-
-    files = _get_files(request, pagename)
-
-    msg = ""
-    if files:
-        fsize = 0.0
-        fcount = 0
-        for f in files:
-            fsize += float(size(request, pagename, f))
-            fcount += 1
-
-        available_attachments_file_space = attachments_file_space - fsize
-        available_attachments_file_count = attachments_file_count - fcount
-
-        if zipfile.is_zipfile(fpath):
-            zf = zipfile.ZipFile(fpath)
-            sum_size_over_all_valid_files = 0.0
-            count_valid_files = 0
-            namelist = _subdir_exception(zf)
-            if not namelist: # if it's not handled by _subdir_exception()
-                # convert normal zf.namelist() to {origname:finalname} dict
-                namelist = {}
-                for name in zf.namelist():
-                    namelist[name] = name
-            for (origname, finalname) in namelist.iteritems():
-                if valid_pathname(finalname):
-                    sum_size_over_all_valid_files += zf.getinfo(origname).file_size
-                    count_valid_files += 1
+    if not zipfile.is_zipfile(fpath):
+        return _('The file %(filename)s is not a .zip file.') % {'filename': filename}
 
-            if sum_size_over_all_valid_files > available_attachments_file_space:
-                msg = _("Attachment '%(filename)s' could not be unzipped because"
-                        " the resulting files would be too large (%(space)d kB"
-                        " missing).") % {
-                            'filename': filename,
-                            'space': (sum_size_over_all_valid_files -
-                                available_attachments_file_space) / 1000 }
-            elif count_valid_files > available_attachments_file_count:
-                msg = _("Attachment '%(filename)s' could not be unzipped because"
-                        " the resulting files would be too many (%(count)d "
-                        "missing).") % {
-                            'filename': filename,
-                            'count': (count_valid_files -
-                                available_attachments_file_count) }
-            else:
-                valid_name = False
-                for (origname, finalname) in namelist.iteritems():
-                    if valid_pathname(finalname):
-                        zi = zf.getinfo(origname)
-                        if zi.file_size < single_file_size:
-                            new_file = getFilename(request, pagename, finalname)
-                            if not os.path.exists(new_file):
-                                outfile = open(new_file, 'wb')
-                                outfile.write(zf.read(origname))
-                                outfile.close()
-                                # it's not allowed to zip a zip file so it is dropped
-                                if zipfile.is_zipfile(new_file):
-                                    os.unlink(new_file)
-                                else:
-                                    valid_name = True
-                                    _addLogEntry(request, 'ATTNEW', pagename, finalname)
+    # determine how which attachment names we have and how much space each is occupying
+    curr_fsizes = dict([(f, size(request, pagename, f)) for f in _get_files(request, pagename)])
 
-                if valid_name:
-                    msg = _("Attachment '%(filename)s' unzipped.") % {'filename': filename}
-                else:
-                    msg = _("Attachment '%(filename)s' not unzipped because the "
-                            "files are too big, .zip files only, exist already or "
-                            "reside in folders.") % {'filename': filename}
+    # Checks for the existance of one common prefix path shared among
+    # all files in the zip file. If this is the case, remove the common prefix.
+    # We also prepare a dict of the new filenames->filesizes.
+    zip_path_sep = '/'  # we assume '/' is as zip standard suggests
+    fname_index = None
+    mapping = []
+    new_fsizes = {}
+    zf = zipfile.ZipFile(fpath)
+    for zi in zf.infolist():
+        name = zi.filename
+        if not name.endswith(zip_path_sep):  # a file (not a directory)
+            if fname_index is None:
+                fname_index = name.rfind(zip_path_sep) + 1
+                path = name[:fname_index]
+            if (name.rfind(zip_path_sep) + 1 != fname_index  # different prefix len
+                or
+                name[:fname_index] != path): # same len, but still different
+                mapping = []  # zip is not acceptable
+                break
+            if zi.file_size >= request.cfg.unzip_single_file_size:  # file too big
+                mapping = []  # zip is not acceptable
+                break
+            finalname = name[fname_index:]  # remove common path prefix
+            mapping.append((name, finalname))
+            new_fsizes[finalname] = zi.file_size
+
+    # now we either have an empty mapping (if the zip is not acceptable),
+    # an identity mapping (no subdirs in zip, just all flat), or
+    # a mapping (origname, finalname) where origname is the zip member filename
+    # (including some prefix path) and finalname is a simple filename.
+
+    # calculate resulting total file size / count after unzipping:
+    if overwrite:
+        curr_fsizes.update(new_fsizes)
+        total = curr_fsizes
+    else:
+        new_fsizes.update(curr_fsizes)
+        total = new_fsizes
+    total_count = len(total)
+    total_size = sum(total.values())
+
+    if not mapping:
+        msg = _("Attachment '%(filename)s' not unzipped because some files in the zip "
+                "are either not in the same directory or exceeded the single file size limit (%(maxsize_file)d kB)."
+               ) % {'filename': filename,
+                    'maxsize_file': request.cfg.unzip_single_file_size / 1000, }
+    elif total_size > request.cfg.unzip_attachments_space:
+        msg = _("Attachment '%(filename)s' not unzipped because it would have exceeded "
+                "the per page attachment storage size limit (%(size)d kB).") % {
+                    'filename': filename,
+                    'size': request.cfg.unzip_attachments_space / 1000, }
+    elif total_count > request.cfg.unzip_attachments_count:
+        msg = _("Attachment '%(filename)s' not unzipped because it would have exceeded "
+                "the per page attachment count limit (%(count)d).") % {
+                    'filename': filename,
+                    'count': request.cfg.unzip_attachments_count, }
+    else:
+        not_overwritten = []
+        for origname, finalname in mapping:
+            try:
+                # Note: reads complete zip member file into memory. ZipFile does not offer block-wise reading:
+                add_attachment(request, pagename, finalname, zf.read(origname), overwrite)
+            except AttachmentAlreadyExists:
+                not_overwritten.append(finalname)
+        if not_overwritten:
+            msg = _("Attachment '%(filename)s' partially unzipped (did not overwrite: %(filelist)s).") % {
+                    'filename': filename,
+                    'filelist': ', '.join(not_overwritten), }
         else:
-            msg = _('The file %(filename)s is not a .zip file.') % {'filename': filename}
+            msg = _("Attachment '%(filename)s' unzipped.") % {'filename': filename}
 
     upload_form(pagename, request, msg=wikiutil.escape(msg))