changeset 3635:75db5e7c09e2

csv parser: fix delimiter sniffing troubles, fix tests
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Fri, 23 May 2008 21:44:42 +0200
parents de4a7a1c89dd
children 4a1813f1cb61
files MoinMoin/parser/_tests/test_text_csv.py MoinMoin/parser/text_csv.py
diffstat 2 files changed, 14 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/parser/_tests/test_text_csv.py	Thu May 22 22:42:32 2008 +0200
+++ b/MoinMoin/parser/_tests/test_text_csv.py	Fri May 23 21:44:42 2008 +0200
@@ -55,9 +55,9 @@
         assert '<tbody><tr></tr>\n</tbody>' in  result
 
     def testnodelimiter(self):
-        """ parser.text_csv: empty line """
+        """ parser.text_csv: line without delimiter """
         result = self.parse('ABCDEFGHIJ')
-        assert '<td><strong>ABCDEFGHI</strong></td>' in  result
+        assert '<td><strong>ABCDEFGHIJ</strong></td>' in  result
 
 coverage_modules = ['MoinMoin.parser.text_csv']
 
--- a/MoinMoin/parser/text_csv.py	Thu May 22 22:42:32 2008 +0200
+++ b/MoinMoin/parser/text_csv.py	Fri May 23 21:44:42 2008 +0200
@@ -29,6 +29,7 @@
 """
 
 from csv import reader, QUOTE_NONE, QUOTE_MINIMAL, Sniffer
+from _csv import Error
 
 from MoinMoin.util.dataset import TupleDataset, Column
 from MoinMoin.widget.browser import DataBrowserWidget
@@ -55,8 +56,18 @@
         # workaround csv.reader deficiency by encoding to utf-8
         data = raw.encode('utf-8').split('\n')
         delimiter = ';'
+        # Previous versions of this parser have used only the delimiter ";" (by default).
+        # This version now tries to sniff the delimiter from the list preferred_delimiters
+        # Although the Python csv sniffer had quite some changes from py 2.3 to 2.5.1, we try
+        # to avoid problems for the case it does not find a delimiter in some given data.
+        # Newer versions of the sniffer do raise an _csv.Error while older versions do
+        # return a whitespace as delimiter.
         if data[0]:
-            delimiter = Sniffer().sniff(data[0]).delimiter
+            try:
+                preferred_delimiters =  [',', '\t', ';', ' ', ':']
+                delimiter = Sniffer().sniff(data[0], preferred_delimiters).delimiter or ';'
+            except Error:
+                pass
 
         visible = None
         hiddenindexes = []