comparison MoinMoin/support/parsedatetime/parsedatetime_consts.py @ 5185:0a6fe22644e3

updated parsedatetime to 0.8.7
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Sun, 04 Oct 2009 19:32:01 +0200
parents 62177a952833
children
comparison
equal deleted inserted replaced
5184:0970ed47d2cd 5185:0a6fe22644e3
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 2
3 """ 3 """
4 The Constants class defines all constants used by parsedatetime.py. 4 parsedatetime constants and helper functions to determine
5 regex values from Locale information if present.
6
7 Also contains the internal Locale classes to give some sane
8 defaults if PyICU is not found.
5 """ 9 """
6 10
7 __license__ = """Copyright (c) 2004-2006 Mike Taylor, All rights reserved. 11 __license__ = """
12 Copyright (c) 2004-2008 Mike Taylor
13 Copyright (c) 2006-2008 Darshana Chhajed
14 Copyright (c) 2007 Bernd Zeimetz <bzed@debian.org>
15 All rights reserved.
8 16
9 Licensed under the Apache License, Version 2.0 (the "License"); 17 Licensed under the Apache License, Version 2.0 (the "License");
10 you may not use this file except in compliance with the License. 18 you may not use this file except in compliance with the License.
11 You may obtain a copy of the License at 19 You may obtain a copy of the License at
12 20
16 distributed under the License is distributed on an "AS IS" BASIS, 24 distributed under the License is distributed on an "AS IS" BASIS,
17 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 See the License for the specific language governing permissions and 26 See the License for the specific language governing permissions and
19 limitations under the License. 27 limitations under the License.
20 """ 28 """
21 __author__ = 'Mike Taylor <http://code-bear.com>'
22 __contributors__ = [ 'Darshana Chhajed <mailto://darshana@osafoundation.org>',
23 ]
24
25 29
26 try: 30 try:
27 import PyICU as pyicu 31 import PyICU as pyicu
28 except: 32 except:
29 pyicu = None 33 pyicu = None
30 34
31 35
32 import string 36 import datetime
33 import datetime, time 37 import calendar
38 import time
39 import re
34 40
35 41
36 class pdtLocale_en: 42 class pdtLocale_en:
37 """ 43 """
38 en_US Locale constants 44 en_US Locale constants
39 45
40 This class will be used to initialize C{Constants} if PyICU is not located. 46 This class will be used to initialize L{Constants} if PyICU is not located.
41 47
42 Defined as class variables are the lists and strings needed by parsedatetime 48 Defined as class variables are the lists and strings needed by parsedatetime
43 to evaluate strings in English (US) 49 to evaluate strings for USA
44 """ 50 """
45 51
46 localeID = 'en_US' # don't use a unicode string 52 localeID = 'en_US' # don't use a unicode string
47 dateSep = u'/' 53 dateSep = [ u'/', u'.' ]
48 timeSep = u':' 54 timeSep = [ u':' ]
49 meridian = [ u'AM', u'PM' ] 55 meridian = [ u'AM', u'PM' ]
50 usesMeridian = True 56 usesMeridian = True
51 uses24 = False 57 uses24 = False
52 58
53 Weekdays = [ u'sunday', u'monday', u'tuesday', 59 Weekdays = [ u'monday', u'tuesday', u'wednesday',
54 u'wednesday', u'thursday', u'friday', u'saturday', 60 u'thursday', u'friday', u'saturday', u'sunday',
55 ] 61 ]
56 shortWeekdays = [ u'sun', u'mon', u'tues', 62 shortWeekdays = [ u'mon', u'tues', u'wed',
57 u'wed', u'thu', u'fri', u'sat', 63 u'thu', u'fri', u'sat', u'sun',
58 ] 64 ]
59 Months = [ u'january', u'february', u'march', 65 Months = [ u'january', u'february', u'march',
60 u'april', u'may', u'june', 66 u'april', u'may', u'june',
61 u'july', u'august', u'september', 67 u'july', u'august', u'september',
62 u'october', u'november', u'december', 68 u'october', u'november', u'december',
74 timeFormats = { 'full': 'h:mm:ss a z', 80 timeFormats = { 'full': 'h:mm:ss a z',
75 'long': 'h:mm:ss a z', 81 'long': 'h:mm:ss a z',
76 'medium': 'h:mm:ss a', 82 'medium': 'h:mm:ss a',
77 'short': 'h:mm a', 83 'short': 'h:mm a',
78 } 84 }
85
86 dp_order = [ u'm', u'd', u'y' ]
79 87
80 # this will be added to re_consts later 88 # this will be added to re_consts later
81 units = { 'seconds': [ 'second', 'sec' ], 89 units = { 'seconds': [ 'second', 'sec' ],
82 'minutes': [ 'minute', 'min' ], 90 'minutes': [ 'minute', 'min' ],
83 'hours': [ 'hour', 'hr' ], 91 'hours': [ 'hour', 'hr' ],
86 'months': [ 'month', 'mth' ], 94 'months': [ 'month', 'mth' ],
87 'years': [ 'year', 'yr' ], 95 'years': [ 'year', 'yr' ],
88 } 96 }
89 97
90 # text constants to be used by regex's later 98 # text constants to be used by regex's later
91 re_consts = { 'specials': 'in|on|of|at', 99 re_consts = { 'specials': 'in|on|of|at',
92 'timeseperator': ':', 100 'timeseperator': ':',
93 'daysuffix': 'rd|st|nd|th', 101 'rangeseperator': '-',
94 'meridian': 'am|pm|a.m.|p.m.|a|p', 102 'daysuffix': 'rd|st|nd|th',
95 'qunits': 'h|m|s|d|w|m|y', 103 'meridian': 'am|pm|a.m.|p.m.|a|p',
96 'now': [ 'now' ], 104 'qunits': 'h|m|s|d|w|m|y',
105 'now': [ 'now' ],
97 } 106 }
98 107
99 # Used to adjust the returned date before/after the source 108 # Used to adjust the returned date before/after the source
100 modifiers = { 'from': 1, 109 modifiers = { 'from': 1,
101 'before': -1, 110 'before': -1,
102 'after': 1, 111 'after': 1,
103 'ago': 1, 112 'ago': -1,
104 'prior': -1, 113 'prior': -1,
105 'prev': -1, 114 'prev': -1,
106 'last': -1, 115 'last': -1,
107 'next': 1, 116 'next': 1,
108 'this': 0,
109 'previous': -1, 117 'previous': -1,
110 'in a': 2, 118 'in a': 2,
111 'end of': 0, 119 'end of': 0,
112 'eo': 0, 120 'eod': 0,
121 'eo': 0
113 } 122 }
114 123
115 dayoffsets = { 'tomorrow': 1, 124 dayoffsets = { 'tomorrow': 1,
116 'today': 0, 125 'today': 0,
117 'yesterday': -1, 126 'yesterday': -1,
128 'dinner': { 'hr': 19, 'mn': 0, 'sec': 0 }, 137 'dinner': { 'hr': 19, 'mn': 0, 'sec': 0 },
129 'evening': { 'hr': 18, 'mn': 0, 'sec': 0 }, 138 'evening': { 'hr': 18, 'mn': 0, 'sec': 0 },
130 'midnight': { 'hr': 0, 'mn': 0, 'sec': 0 }, 139 'midnight': { 'hr': 0, 'mn': 0, 'sec': 0 },
131 'night': { 'hr': 21, 'mn': 0, 'sec': 0 }, 140 'night': { 'hr': 21, 'mn': 0, 'sec': 0 },
132 'tonight': { 'hr': 21, 'mn': 0, 'sec': 0 }, 141 'tonight': { 'hr': 21, 'mn': 0, 'sec': 0 },
133 } 142 'eod': { 'hr': 17, 'mn': 0, 'sec': 0 },
134 143 }
135 144
136 class pdtLocale_es: 145
137 """ 146 class pdtLocale_au:
138 es Locale constants 147 """
139 148 en_AU Locale constants
140 This class will be used to initialize C{Constants} if PyICU is not located. 149
150 This class will be used to initialize L{Constants} if PyICU is not located.
141 151
142 Defined as class variables are the lists and strings needed by parsedatetime 152 Defined as class variables are the lists and strings needed by parsedatetime
143 to evaluate strings in Spanish 153 to evaluate strings for Australia
144 154 """
145 Note that I don't speak Spanish so many of the items below are still in English 155
146 """ 156 localeID = 'en_AU' # don't use a unicode string
147 157 dateSep = [ u'-', u'/' ]
148 localeID = 'es' # don't use a unicode string 158 timeSep = [ u':' ]
149 dateSep = u'/' 159 meridian = [ u'AM', u'PM' ]
150 timeSep = u':' 160 usesMeridian = True
151 meridian = [] 161 uses24 = False
152 usesMeridian = False 162
153 uses24 = True 163 Weekdays = [ u'monday', u'tuesday', u'wednesday',
154 164 u'thursday', u'friday', u'saturday', u'sunday',
155 Weekdays = [ u'domingo', u'lunes', u'martes', 165 ]
156 u'mi\xe9rcoles', u'jueves', u'viernes', u's\xe1bado', 166 shortWeekdays = [ u'mon', u'tues', u'wed',
157 ] 167 u'thu', u'fri', u'sat', u'sun',
158 shortWeekdays = [ 'dom', u'lun', u'mar', 168 ]
159 u'mi\xe9', u'jue', u'vie', u's\xe1b', 169 Months = [ u'january', u'february', u'march',
160 ] 170 u'april', u'may', u'june',
161 Months = [ u'enero', u'febrero', u'marzo', 171 u'july', u'august', u'september',
162 u'abril', u'mayo', u'junio', 172 u'october', u'november', u'december',
163 u'julio', u'agosto', u'septiembre', 173 ]
164 u'octubre', u'noviembre', u'diciembre' 174 shortMonths = [ u'jan', u'feb', u'mar',
165 ] 175 u'apr', u'may', u'jun',
166 shortMonths = [ u'ene', u'feb', u'mar', 176 u'jul', u'aug', u'sep',
167 u'abr', u'may', u'jun', 177 u'oct', u'nov', u'dec',
168 u'jul', u'ago', u'sep', 178 ]
169 u'oct', u'nov', u'dic' 179 dateFormats = { 'full': 'EEEE, d MMMM yyyy',
170 ] 180 'long': 'd MMMM yyyy',
171 dateFormats = { 'full': "EEEE d' de 'MMMM' de 'yyyy", 181 'medium': 'dd/MM/yyyy',
172 'long': "d' de 'MMMM' de 'yyyy", 182 'short': 'd/MM/yy',
173 'medium': "dd-MMM-yy", 183 }
174 'short': "d/MM/yy", 184 timeFormats = { 'full': 'h:mm:ss a z',
175 } 185 'long': 'h:mm:ss a',
176 timeFormats = { 'full': "HH'H'mm' 'ss z", 186 'medium': 'h:mm:ss a',
177 'long': "HH:mm:ss z", 187 'short': 'h:mm a',
178 'medium': "HH:mm:ss", 188 }
179 'short': "HH:mm", 189
180 } 190 dp_order = [ u'd', u'm', u'y' ]
181 191
182 # this will be added to re_consts later 192 # this will be added to re_consts later
183 units = { 'seconds': [ 'second', 'sec' ], 193 units = { 'seconds': [ 'second', 'sec' ],
184 'minutes': [ 'minute', 'min' ], 194 'minutes': [ 'minute', 'min' ],
185 'hours': [ 'hour', 'hr' ], 195 'hours': [ 'hour', 'hr' ],
188 'months': [ 'month', 'mth' ], 198 'months': [ 'month', 'mth' ],
189 'years': [ 'year', 'yr' ], 199 'years': [ 'year', 'yr' ],
190 } 200 }
191 201
192 # text constants to be used by regex's later 202 # text constants to be used by regex's later
193 re_consts = { 'specials': 'in|on|of|at', 203 re_consts = { 'specials': 'in|on|of|at',
194 'timeseperator': timeSep, 204 'timeseperator': ':',
195 'dateseperator': dateSep, 205 'rangeseperator': '-',
196 'daysuffix': 'rd|st|nd|th', 206 'daysuffix': 'rd|st|nd|th',
197 'qunits': 'h|m|s|d|w|m|y', 207 'meridian': 'am|pm|a.m.|p.m.|a|p',
198 'now': [ 'now' ], 208 'qunits': 'h|m|s|d|w|m|y',
209 'now': [ 'now' ],
199 } 210 }
200 211
201 # Used to adjust the returned date before/after the source 212 # Used to adjust the returned date before/after the source
202 modifiers = { 'from': 1, 213 modifiers = { 'from': 1,
203 'before': -1, 214 'before': -1,
205 'ago': 1, 216 'ago': 1,
206 'prior': -1, 217 'prior': -1,
207 'prev': -1, 218 'prev': -1,
208 'last': -1, 219 'last': -1,
209 'next': 1, 220 'next': 1,
210 'this': 0,
211 'previous': -1, 221 'previous': -1,
212 'in a': 2, 222 'in a': 2,
213 'end of': 0, 223 'end of': 0,
214 'eo': 0, 224 'eo': 0,
215 } 225 }
221 231
222 # special day and/or times, i.e. lunch, noon, evening 232 # special day and/or times, i.e. lunch, noon, evening
223 # each element in the dictionary is a dictionary that is used 233 # each element in the dictionary is a dictionary that is used
224 # to fill in any value to be replace - the current date/time will 234 # to fill in any value to be replace - the current date/time will
225 # already have been populated by the method buildSources 235 # already have been populated by the method buildSources
226 re_sources = { 'noon': { 'hr': 12, 'mn': 0, 'sec': 0 }, 236 re_sources = { 'noon': { 'hr': 12, 'mn': 0, 'sec': 0 },
227 'lunch': { 'hr': 12, 'mn': 0, 'sec': 0 }, 237 'lunch': { 'hr': 12, 'mn': 0, 'sec': 0 },
228 'morning': { 'hr': 6, 'mn': 0, 'sec': 0 }, 238 'morning': { 'hr': 6, 'mn': 0, 'sec': 0 },
229 'breakfast': { 'hr': 8, 'mn': 0, 'sec': 0 }, 239 'breakfast': { 'hr': 8, 'mn': 0, 'sec': 0 },
230 'dinner': { 'hr': 19, 'mn': 0, 'sec': 0 }, 240 'dinner': { 'hr': 19, 'mn': 0, 'sec': 0 },
231 'evening': { 'hr': 18, 'mn': 0, 'sec': 0 }, 241 'evening': { 'hr': 18, 'mn': 0, 'sec': 0 },
232 'midnight': { 'hr': 0, 'mn': 0, 'sec': 0 }, 242 'midnight': { 'hr': 0, 'mn': 0, 'sec': 0 },
233 'night': { 'hr': 21, 'mn': 0, 'sec': 0 }, 243 'night': { 'hr': 21, 'mn': 0, 'sec': 0 },
234 'tonight': { 'hr': 21, 'mn': 0, 'sec': 0 }, 244 'tonight': { 'hr': 21, 'mn': 0, 'sec': 0 },
245 'eod': { 'hr': 17, 'mn': 0, 'sec': 0 },
246 }
247
248
249 class pdtLocale_es:
250 """
251 es Locale constants
252
253 This class will be used to initialize L{Constants} if PyICU is not located.
254
255 Defined as class variables are the lists and strings needed by parsedatetime
256 to evaluate strings in Spanish
257
258 Note that I don't speak Spanish so many of the items below are still in English
259 """
260
261 localeID = 'es' # don't use a unicode string
262 dateSep = [ u'/' ]
263 timeSep = [ u':' ]
264 meridian = []
265 usesMeridian = False
266 uses24 = True
267
268 Weekdays = [ u'lunes', u'martes', u'mi\xe9rcoles',
269 u'jueves', u'viernes', u's\xe1bado', u'domingo',
270 ]
271 shortWeekdays = [ u'lun', u'mar', u'mi\xe9',
272 u'jue', u'vie', u's\xe1b', u'dom',
273 ]
274 Months = [ u'enero', u'febrero', u'marzo',
275 u'abril', u'mayo', u'junio',
276 u'julio', u'agosto', u'septiembre',
277 u'octubre', u'noviembre', u'diciembre'
278 ]
279 shortMonths = [ u'ene', u'feb', u'mar',
280 u'abr', u'may', u'jun',
281 u'jul', u'ago', u'sep',
282 u'oct', u'nov', u'dic'
283 ]
284 dateFormats = { 'full': "EEEE d' de 'MMMM' de 'yyyy",
285 'long': "d' de 'MMMM' de 'yyyy",
286 'medium': "dd-MMM-yy",
287 'short': "d/MM/yy",
288 }
289 timeFormats = { 'full': "HH'H'mm' 'ss z",
290 'long': "HH:mm:ss z",
291 'medium': "HH:mm:ss",
292 'short': "HH:mm",
293 }
294
295 dp_order = [ u'd', u'm', u'y' ]
296
297 # this will be added to re_consts later
298 units = { 'seconds': [ 'second', 'sec' ],
299 'minutes': [ 'minute', 'min' ],
300 'hours': [ 'hour', 'hr' ],
301 'days': [ 'day', 'dy' ],
302 'weeks': [ 'week', 'wk' ],
303 'months': [ 'month', 'mth' ],
304 'years': [ 'year', 'yr' ],
305 }
306
307 # text constants to be used by regex's later
308 re_consts = { 'specials': 'in|on|of|at',
309 'timeseperator': timeSep,
310 'dateseperator': dateSep,
311 'rangeseperator': '-',
312 'daysuffix': 'rd|st|nd|th',
313 'qunits': 'h|m|s|d|w|m|y',
314 'now': [ 'now' ],
315 }
316
317 # Used to adjust the returned date before/after the source
318 modifiers = { 'from': 1,
319 'before': -1,
320 'after': 1,
321 'ago': 1,
322 'prior': -1,
323 'prev': -1,
324 'last': -1,
325 'next': 1,
326 'previous': -1,
327 'in a': 2,
328 'end of': 0,
329 'eo': 0,
330 }
331
332 dayoffsets = { 'tomorrow': 1,
333 'today': 0,
334 'yesterday': -1,
335 }
336
337 # special day and/or times, i.e. lunch, noon, evening
338 # each element in the dictionary is a dictionary that is used
339 # to fill in any value to be replace - the current date/time will
340 # already have been populated by the method buildSources
341 re_sources = { 'noon': { 'hr': 12, 'mn': 0, 'sec': 0 },
342 'lunch': { 'hr': 12, 'mn': 0, 'sec': 0 },
343 'morning': { 'hr': 6, 'mn': 0, 'sec': 0 },
344 'breakfast': { 'hr': 8, 'mn': 0, 'sec': 0 },
345 'dinner': { 'hr': 19, 'mn': 0, 'sec': 0 },
346 'evening': { 'hr': 18, 'mn': 0, 'sec': 0 },
347 'midnight': { 'hr': 0, 'mn': 0, 'sec': 0 },
348 'night': { 'hr': 21, 'mn': 0, 'sec': 0 },
349 'tonight': { 'hr': 21, 'mn': 0, 'sec': 0 },
350 'eod': { 'hr': 17, 'mn': 0, 'sec': 0 },
351 }
352
353
354 class pdtLocale_de:
355 """
356 de_DE Locale constants
357
358 This class will be used to initialize L{Constants} if PyICU is not located.
359
360 Contributed by Debian parsedatetime package maintainer Bernd Zeimetz <bzed@debian.org>
361
362 Defined as class variables are the lists and strings needed by parsedatetime
363 to evaluate strings for German
364 """
365
366 localeID = 'de_DE' # don't use a unicode string
367 dateSep = [ u'.' ]
368 timeSep = [ u':' ]
369 meridian = [ ]
370 usesMeridian = False
371 uses24 = True
372
373 Weekdays = [ u'montag', u'dienstag', u'mittwoch',
374 u'donnerstag', u'freitag', u'samstag', u'sonntag',
375 ]
376 shortWeekdays = [ u'mo', u'di', u'mi',
377 u'do', u'fr', u'sa', u'so',
378 ]
379 Months = [ u'januar', u'februar', u'm\xe4rz',
380 u'april', u'mai', u'juni',
381 u'juli', u'august', u'september',
382 u'oktober', u'november', u'dezember',
383 ]
384 shortMonths = [ u'jan', u'feb', u'mrz',
385 u'apr', u'mai', u'jun',
386 u'jul', u'aug', u'sep',
387 u'okt', u'nov', u'dez',
388 ]
389 dateFormats = { 'full': u'EEEE, d. MMMM yyyy',
390 'long': u'd. MMMM yyyy',
391 'medium': u'dd.MM.yyyy',
392 'short': u'dd.MM.yy'
393 }
394
395 timeFormats = { 'full': u'HH:mm:ss v',
396 'long': u'HH:mm:ss z',
397 'medium': u'HH:mm:ss',
398 'short': u'HH:mm'
399 }
400
401 dp_order = [ u'd', u'm', u'y' ]
402
403 # this will be added to re_consts later
404 units = { 'seconds': [ 'sekunden', 'sek', 's' ],
405 'minutes': [ 'minuten', 'min' , 'm' ],
406 'hours': [ 'stunden', 'std', 'h' ],
407 'days': [ 'tage', 't' ],
408 'weeks': [ 'wochen', 'w' ],
409 'months': [ 'monate' ], #the short version would be a capital M,
410 #as I understand it we can't distinguis
411 #between m for minutes and M for months.
412 'years': [ 'jahre', 'j' ],
413 }
414
415 # text constants to be used by regex's later
416 re_consts = { 'specials': 'am|dem|der|im|in|den|zum',
417 'timeseperator': ':',
418 'rangeseperator': '-',
419 'daysuffix': '',
420 'qunits': 'h|m|s|t|w|m|j',
421 'now': [ 'jetzt' ],
422 }
423
424 # Used to adjust the returned date before/after the source
425 #still looking for insight on how to translate all of them to german.
426 modifiers = { u'from': 1,
427 u'before': -1,
428 u'after': 1,
429 u'vergangener': -1,
430 u'vorheriger': -1,
431 u'prev': -1,
432 u'letzter': -1,
433 u'n\xe4chster': 1,
434 u'dieser': 0,
435 u'previous': -1,
436 u'in a': 2,
437 u'end of': 0,
438 u'eod': 0,
439 u'eo': 0,
440 }
441
442 #morgen/abermorgen does not work, see http://code.google.com/p/parsedatetime/issues/detail?id=19
443 dayoffsets = { u'morgen': 1,
444 u'heute': 0,
445 u'gestern': -1,
446 u'vorgestern': -2,
447 u'\xfcbermorgen': 2,
448 }
449
450 # special day and/or times, i.e. lunch, noon, evening
451 # each element in the dictionary is a dictionary that is used
452 # to fill in any value to be replace - the current date/time will
453 # already have been populated by the method buildSources
454 re_sources = { u'mittag': { 'hr': 12, 'mn': 0, 'sec': 0 },
455 u'mittags': { 'hr': 12, 'mn': 0, 'sec': 0 },
456 u'mittagessen': { 'hr': 12, 'mn': 0, 'sec': 0 },
457 u'morgen': { 'hr': 6, 'mn': 0, 'sec': 0 },
458 u'morgens': { 'hr': 6, 'mn': 0, 'sec': 0 },
459 u'fr\e4hst\xe4ck': { 'hr': 8, 'mn': 0, 'sec': 0 },
460 u'abendessen': { 'hr': 19, 'mn': 0, 'sec': 0 },
461 u'abend': { 'hr': 18, 'mn': 0, 'sec': 0 },
462 u'abends': { 'hr': 18, 'mn': 0, 'sec': 0 },
463 u'mitternacht': { 'hr': 0, 'mn': 0, 'sec': 0 },
464 u'nacht': { 'hr': 21, 'mn': 0, 'sec': 0 },
465 u'nachts': { 'hr': 21, 'mn': 0, 'sec': 0 },
466 u'heute abend': { 'hr': 21, 'mn': 0, 'sec': 0 },
467 u'heute nacht': { 'hr': 21, 'mn': 0, 'sec': 0 },
468 u'feierabend': { 'hr': 17, 'mn': 0, 'sec': 0 },
235 } 469 }
236 470
237 471
238 pdtLocales = { 'en_US': pdtLocale_en, 472 pdtLocales = { 'en_US': pdtLocale_en,
239 'es': pdtLocale_es, 473 'en_AU': pdtLocale_au,
474 'es_ES': pdtLocale_es,
475 'de_DE': pdtLocale_de,
240 } 476 }
241 477
242 478
243 def _initLocale(ptc): 479 def _initLocale(ptc):
244 """ 480 """
245 Helper function to initialize the different lists and strings 481 Helper function to initialize the different lists and strings
246 from either PyICU or one of the locale pdt Locales and store 482 from either PyICU or one of the internal pdt Locales and store
247 them into ptc. 483 them into ptc.
248 """ 484 """
485
486 def lcase(x):
487 return x.lower()
488
249 if pyicu and ptc.usePyICU: 489 if pyicu and ptc.usePyICU:
250 ptc.icuLocale = pyicu.Locale(ptc.localeID) 490 ptc.icuLocale = None
251 491
252 if not ptc.icuLocale: 492 if ptc.localeID is not None:
253 ptc.icuLocale = pyicu.Locale('en_US') 493 ptc.icuLocale = pyicu.Locale(ptc.localeID)
254 494
255 ptc.icuSymbols = pyicu.DateFormatSymbols(ptc.icuLocale) 495 if ptc.icuLocale is None:
256 496 for id in range(0, len(ptc.fallbackLocales)):
257 ptc.Weekdays = map(string.lower, ptc.icuSymbols.getWeekdays()[1:]) 497 ptc.localeID = ptc.fallbackLocales[id]
258 ptc.shortWeekdays = map(string.lower, ptc.icuSymbols.getShortWeekdays()[1:]) 498 ptc.icuLocale = pyicu.Locale(ptc.localeID)
259 ptc.Months = map(string.lower, ptc.icuSymbols.getMonths()) 499
260 ptc.shortMonths = map(string.lower, ptc.icuSymbols.getShortMonths()) 500 if ptc.icuLocale is not None:
501 break
502
503 ptc.icuSymbols = pyicu.DateFormatSymbols(ptc.icuLocale)
504
505 # grab ICU list of weekdays, skipping first entry which
506 # is always blank
507 wd = map(lcase, ptc.icuSymbols.getWeekdays()[1:])
508 swd = map(lcase, ptc.icuSymbols.getShortWeekdays()[1:])
509
510 # store them in our list with Monday first (ICU puts Sunday first)
511 ptc.Weekdays = wd[1:] + wd[0:1]
512 ptc.shortWeekdays = swd[1:] + swd[0:1]
513 ptc.Months = map(lcase, ptc.icuSymbols.getMonths())
514 ptc.shortMonths = map(lcase, ptc.icuSymbols.getShortMonths())
261 515
262 # not quite sure how to init this so for now 516 # not quite sure how to init this so for now
263 # set it to none so it will be set to the en_US defaults for now 517 # set it to none so it will be set to the en_US defaults for now
264 ptc.re_consts = None 518 ptc.re_consts = None
265 519 ptc.icu_df = { 'full': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kFull, ptc.icuLocale),
266 ptc.icu_df = { 'full': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kFull, ptc.icuLocale), 520 'long': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kLong, ptc.icuLocale),
267 'long': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kLong, ptc.icuLocale), 521 'medium': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kMedium, ptc.icuLocale),
268 'medium': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kMedium, ptc.icuLocale), 522 'short': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kShort, ptc.icuLocale),
269 'short': pyicu.DateFormat.createDateInstance(pyicu.DateFormat.kShort, ptc.icuLocale), 523 }
270 } 524 ptc.icu_tf = { 'full': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kFull, ptc.icuLocale),
271 ptc.icu_tf = { 'full': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kFull, ptc.icuLocale), 525 'long': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kLong, ptc.icuLocale),
272 'long': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kLong, ptc.icuLocale), 526 'medium': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kMedium, ptc.icuLocale),
273 'medium': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kMedium, ptc.icuLocale), 527 'short': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kShort, ptc.icuLocale),
274 'short': pyicu.DateFormat.createTimeInstance(pyicu.DateFormat.kShort, ptc.icuLocale), 528 }
275 } 529 ptc.dateFormats = { 'full': ptc.icu_df['full'].toPattern(),
276 530 'long': ptc.icu_df['long'].toPattern(),
277 ptc.dateFormats = { 'full': ptc.icu_df['full'].toPattern(), 531 'medium': ptc.icu_df['medium'].toPattern(),
278 'long': ptc.icu_df['long'].toPattern(), 532 'short': ptc.icu_df['short'].toPattern(),
279 'medium': ptc.icu_df['medium'].toPattern(), 533 }
280 'short': ptc.icu_df['short'].toPattern(), 534 ptc.timeFormats = { 'full': ptc.icu_tf['full'].toPattern(),
281 } 535 'long': ptc.icu_tf['long'].toPattern(),
282 ptc.timeFormats = { 'full': ptc.icu_tf['full'].toPattern(), 536 'medium': ptc.icu_tf['medium'].toPattern(),
283 'long': ptc.icu_tf['long'].toPattern(), 537 'short': ptc.icu_tf['short'].toPattern(),
284 'medium': ptc.icu_tf['medium'].toPattern(), 538 }
285 'short': ptc.icu_tf['short'].toPattern(),
286 }
287 else: 539 else:
288 if not ptc.localeID in pdtLocales: 540 if not ptc.localeID in pdtLocales:
289 ptc.localeID = 'en_US' 541 for id in range(0, len(ptc.fallbackLocales)):
290 542 ptc.localeID = ptc.fallbackLocales[id]
291 ptc.locale = pdtLocales[ptc.localeID] 543
544 if ptc.localeID in pdtLocales:
545 break
546
547 ptc.locale = pdtLocales[ptc.localeID]
548 ptc.usePyICU = False
292 549
293 ptc.Weekdays = ptc.locale.Weekdays 550 ptc.Weekdays = ptc.locale.Weekdays
294 ptc.shortWeekdays = ptc.locale.shortWeekdays 551 ptc.shortWeekdays = ptc.locale.shortWeekdays
295 ptc.Months = ptc.locale.Months 552 ptc.Months = ptc.locale.Months
296 ptc.shortMonths = ptc.locale.shortMonths 553 ptc.shortMonths = ptc.locale.shortMonths
297 ptc.dateFormats = ptc.locale.dateFormats 554 ptc.dateFormats = ptc.locale.dateFormats
298 ptc.timeFormats = ptc.locale.timeFormats 555 ptc.timeFormats = ptc.locale.timeFormats
299
300 556
301 # these values are used to setup the various bits 557 # these values are used to setup the various bits
302 # of the regex values used to parse 558 # of the regex values used to parse
303 # 559 #
304 # check if a local set of constants has been 560 # check if a local set of constants has been
321 ptc.re_values = pdtLocales['en_US'].re_consts 577 ptc.re_values = pdtLocales['en_US'].re_consts
322 ptc.Modifiers = pdtLocales['en_US'].modifiers 578 ptc.Modifiers = pdtLocales['en_US'].modifiers
323 ptc.dayOffsets = pdtLocales['en_US'].dayoffsets 579 ptc.dayOffsets = pdtLocales['en_US'].dayoffsets
324 units = pdtLocales['en_US'].units 580 units = pdtLocales['en_US'].units
325 581
326 ptc.re_values['months'] = '%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s' % tuple(ptc.Months) 582 # escape any regex special characters that may be found
327 ptc.re_values['shortmonths'] = '%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s' % tuple(ptc.shortMonths) 583 wd = tuple(map(re.escape, ptc.Weekdays))
328 ptc.re_values['days'] = '%s|%s|%s|%s|%s|%s|%s' % tuple(ptc.Weekdays) 584 swd = tuple(map(re.escape, ptc.shortWeekdays))
329 ptc.re_values['shortdays'] = '%s|%s|%s|%s|%s|%s|%s' % tuple(ptc.shortWeekdays) 585 mth = tuple(map(re.escape, ptc.Months))
586 smth = tuple(map(re.escape, ptc.shortMonths))
587
588 ptc.re_values['months'] = '%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s' % mth
589 ptc.re_values['shortmonths'] = '%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s' % smth
590 ptc.re_values['days'] = '%s|%s|%s|%s|%s|%s|%s' % wd
591 ptc.re_values['shortdays'] = '%s|%s|%s|%s|%s|%s|%s' % swd
330 592
331 l = [] 593 l = []
332 for unit in units: 594 for unit in units:
333 l.append('|'.join(units[unit])) 595 l.append('|'.join(units[unit]))
334 596
339 def _initSymbols(ptc): 601 def _initSymbols(ptc):
340 """ 602 """
341 Helper function to initialize the single character constants 603 Helper function to initialize the single character constants
342 and other symbols needed. 604 and other symbols needed.
343 """ 605 """
344 ptc.timeSep = u':' 606 ptc.timeSep = [ u':' ]
345 ptc.dateSep = u'/' 607 ptc.dateSep = [ u'/' ]
346 ptc.meridian = [ u'AM', u'PM' ] 608 ptc.meridian = [ u'AM', u'PM' ]
347 609
348 ptc.usesMeridian = True 610 ptc.usesMeridian = True
349 ptc.uses24 = False 611 ptc.uses24 = False
350 612
351 if pyicu: 613 if pyicu and ptc.usePyICU:
352 am = u'' 614 am = u''
353 pm = u'' 615 pm = u''
354 616 ts = ''
355 # ICU doesn't seem to provide directly the 617
356 # date or time seperator - so we have to 618 # ICU doesn't seem to provide directly the
357 # figure it out 619 # date or time seperator - so we have to
358 620 # figure it out
359 p = pyicu.FieldPosition(pyicu.DateFormat.AM_PM_FIELD)
360 o = ptc.icu_tf['short'] 621 o = ptc.icu_tf['short']
361
362 s = ptc.timeFormats['short'] 622 s = ptc.timeFormats['short']
363 623
364 ptc.usesMeridian = u'a' in s 624 ptc.usesMeridian = u'a' in s
365 ptc.uses24 = u'H' in s 625 ptc.uses24 = u'H' in s
366 626
367 s = o.format(datetime.datetime(2003, 10, 30, 11, 45)) # '11:45 AM' or '11:45' 627 # '11:45 AM' or '11:45'
368 628 s = o.format(datetime.datetime(2003, 10, 30, 11, 45))
369 s = s.replace('11', '').replace('45', '') # ': AM' or ':' 629
630 # ': AM' or ':'
631 s = s.replace('11', '').replace('45', '')
370 632
371 if len(s) > 0: 633 if len(s) > 0:
372 ptc.timeSep = s[0] 634 ts = s[0]
373 635
374 if ptc.usesMeridian: 636 if ptc.usesMeridian:
375 am = s[1:].strip() # 'AM' 637 # '23:45 AM' or '23:45'
376 638 am = s[1:].strip()
377 s = o.format(datetime.datetime(2003, 10, 30, 23, 45)) # '23:45 AM' or '23:45' 639 s = o.format(datetime.datetime(2003, 10, 30, 23, 45))
378 640
379 if ptc.uses24: 641 if ptc.uses24:
380 s = s.replace('23', '') 642 s = s.replace('23', '')
381 else: 643 else:
382 s = s.replace('11', '') 644 s = s.replace('11', '')
383 645
384 pm = s.replace('45', '').replace(ptc.timeSep, '').strip() # 'PM' or '' 646 # 'PM' or ''
385 647 pm = s.replace('45', '').replace(ts, '').strip()
648
649 ptc.timeSep = [ ts ]
386 ptc.meridian = [ am, pm ] 650 ptc.meridian = [ am, pm ]
387 651
652 o = ptc.icu_df['short']
653 s = o.format(datetime.datetime(2003, 10, 30, 11, 45))
654 s = s.replace('10', '').replace('30', '').replace('03', '').replace('2003', '')
655
656 if len(s) > 0:
657 ds = s[0]
658 else:
659 ds = '/'
660
661 ptc.dateSep = [ ds ]
662 s = ptc.dateFormats['short']
663 l = s.lower().split(ds)
664 dp_order = []
665
666 for s in l:
667 if len(s) > 0:
668 dp_order.append(s[:1])
669
670 ptc.dp_order = dp_order
388 else: 671 else:
389 ptc.timeSep = ptc.locale.timeSep 672 ptc.timeSep = ptc.locale.timeSep
390 ptc.dateSep = ptc.locale.dateSep 673 ptc.dateSep = ptc.locale.dateSep
391 ptc.meridian = ptc.locale.meridian 674 ptc.meridian = ptc.locale.meridian
392 ptc.usesMeridian = ptc.locale.usesMeridian 675 ptc.usesMeridian = ptc.locale.usesMeridian
393 ptc.uses24 = ptc.locale.uses24 676 ptc.uses24 = ptc.locale.uses24
677 ptc.dp_order = ptc.locale.dp_order
394 678
395 # build am and pm lists to contain 679 # build am and pm lists to contain
396 # original case, lowercase and first-char 680 # original case, lowercase and first-char
397 # versions of the meridian text 681 # versions of the meridian text
398 682
429 create the regex strings. 713 create the regex strings.
430 """ 714 """
431 # TODO add code to parse the date formats and build the regexes up from sub-parts 715 # TODO add code to parse the date formats and build the regexes up from sub-parts
432 # TODO find all hard-coded uses of date/time seperators 716 # TODO find all hard-coded uses of date/time seperators
433 717
434 ptc.RE_DATE3 = r'(?P<date>((?P<mthname>(%(months)s|%(shortmonths)s))\s?((?P<day>\d\d?)(\s|%(daysuffix)s|,|$)+)?(?P<year>\d\d\d\d)?))' % ptc.re_values 718 ptc.RE_DATE4 = r'''(?P<date>(((?P<day>\d\d?)(?P<suffix>%(daysuffix)s)?(,)?(\s)?)
435 ptc.RE_MONTH = r'(?P<month>((?P<mthname>(%(months)s|%(shortmonths)s))(\s?(?P<year>(\d\d\d\d)))?))' % ptc.re_values 719 (?P<mthname>(%(months)s|%(shortmonths)s))\s?
436 ptc.RE_WEEKDAY = r'(?P<weekday>(%(days)s|%(shortdays)s))' % ptc.re_values 720 (?P<year>\d\d(\d\d)?)?
721 )
722 )''' % ptc.re_values
723
724 # I refactored DATE3 to fix Issue 16 http://code.google.com/p/parsedatetime/issues/detail?id=16
725 # I suspect the final line was for a trailing time - but testing shows it's not needed
726 # ptc.RE_DATE3 = r'''(?P<date>((?P<mthname>(%(months)s|%(shortmonths)s))\s?
727 # ((?P<day>\d\d?)(\s?|%(daysuffix)s|$)+)?
728 # (,\s?(?P<year>\d\d(\d\d)?))?))
729 # (\s?|$|[^0-9a-zA-Z])''' % ptc.re_values
730 ptc.RE_DATE3 = r'''(?P<date>(
731 (((?P<mthname>(%(months)s|%(shortmonths)s))|
732 ((?P<day>\d\d?)(?P<suffix>%(daysuffix)s)?))(\s)?){1,2}
733 ((,)?(\s)?(?P<year>\d\d(\d\d)?))?
734 )
735 )''' % ptc.re_values
736 ptc.RE_MONTH = r'''(\s?|^)
737 (?P<month>(
738 (?P<mthname>(%(months)s|%(shortmonths)s))
739 (\s?(?P<year>(\d\d\d\d)))?
740 ))
741 (\s?|$|[^0-9a-zA-Z])''' % ptc.re_values
742 ptc.RE_WEEKDAY = r'''(\s?|^)
743 (?P<weekday>(%(days)s|%(shortdays)s))
744 (\s?|$|[^0-9a-zA-Z])''' % ptc.re_values
437 745
438 ptc.RE_SPECIAL = r'(?P<special>^[%(specials)s]+)\s+' % ptc.re_values 746 ptc.RE_SPECIAL = r'(?P<special>^[%(specials)s]+)\s+' % ptc.re_values
439 ptc.RE_UNITS = r'(?P<qty>(-?\d+\s*(?P<units>((%(units)s)s?))))' % ptc.re_values 747 ptc.RE_UNITS = r'''(?P<qty>(-?\d+\s*
440 ptc.RE_QUNITS = r'(?P<qty>(-?\d+\s?(?P<qunits>%(qunits)s)(\s|,|$)))' % ptc.re_values 748 (?P<units>((%(units)s)s?))
441 ptc.RE_MODIFIER = r'(?P<modifier>(previous|prev|last|next|this|eo|(end\sof)|(in\sa)))' % ptc.re_values 749 ))''' % ptc.re_values
442 ptc.RE_MODIFIER2 = r'(?P<modifier>(from|before|after|ago|prior))' % ptc.re_values 750 ptc.RE_QUNITS = r'''(?P<qty>(-?\d+\s?
443 ptc.RE_TIMEHMS = r'(?P<hours>\d\d?)(?P<tsep>%(timeseperator)s|)(?P<minutes>\d\d)(?:(?P=tsep)(?P<seconds>\d\d(?:[.,]\d+)?))?' % ptc.re_values 751 (?P<qunits>%(qunits)s)
444 752 (\s?|,|$)
445 ptc.RE_TIMEHMS2 = r'(?P<hours>(\d\d?))((?P<tsep>%(timeseperator)s|)(?P<minutes>(\d\d?))(?:(?P=tsep)(?P<seconds>\d\d?(?:[.,]\d+)?))?)?' % ptc.re_values 753 ))''' % ptc.re_values
754 ptc.RE_MODIFIER = r'''(\s?|^)
755 (?P<modifier>
756 (previous|prev|last|next|eod|eo|(end\sof)|(in\sa)))''' % ptc.re_values
757 ptc.RE_MODIFIER2 = r'''(\s?|^)
758 (?P<modifier>
759 (from|before|after|ago|prior))
760 (\s?|$|[^0-9a-zA-Z])''' % ptc.re_values
761 ptc.RE_TIMEHMS = r'''(\s?|^)
762 (?P<hours>\d\d?)
763 (?P<tsep>%(timeseperator)s|)
764 (?P<minutes>\d\d)
765 (?:(?P=tsep)(?P<seconds>\d\d(?:[.,]\d+)?))?''' % ptc.re_values
766 ptc.RE_TIMEHMS2 = r'''(?P<hours>(\d\d?))
767 ((?P<tsep>%(timeseperator)s|)
768 (?P<minutes>(\d\d?))
769 (?:(?P=tsep)
770 (?P<seconds>\d\d?
771 (?:[.,]\d+)?))?)?''' % ptc.re_values
446 772
447 if 'meridian' in ptc.re_values: 773 if 'meridian' in ptc.re_values:
448 ptc.RE_TIMEHMS2 += r'\s?(?P<meridian>(%(meridian)s))' % ptc.re_values 774 ptc.RE_TIMEHMS2 += r'\s?(?P<meridian>(%(meridian)s))' % ptc.re_values
449 775
450 ptc.RE_DATE = r'(?P<date>\d+([/.\\]\d+)+)' 776 dateSeps = ''.join(ptc.dateSep) + '.'
451 ptc.RE_DATE2 = r'[/.\\]' 777
452 ptc.RE_DAY = r'(?P<day>(today|tomorrow|yesterday))' % ptc.re_values 778 ptc.RE_DATE = r'''(\s?|^)
453 ptc.RE_TIME = r'\s*(?P<time>(morning|breakfast|noon|lunch|evening|midnight|tonight|dinner|night|now))' % ptc.re_values 779 (?P<date>(\d\d?[%s]\d\d?([%s]\d\d(\d\d)?)?))
780 (\s?|$|[^0-9a-zA-Z])''' % (dateSeps, dateSeps)
781 ptc.RE_DATE2 = r'[%s]' % dateSeps
782 ptc.RE_DAY = r'''(\s?|^)
783 (?P<day>(today|tomorrow|yesterday))
784 (\s?|$|[^0-9a-zA-Z])''' % ptc.re_values
785 ptc.RE_DAY2 = r'''(?P<day>\d\d?)|(?P<suffix>%(daysuffix)s)
786 ''' % ptc.re_values
787 ptc.RE_TIME = r'''(\s?|^)
788 (?P<time>(morning|breakfast|noon|lunch|evening|midnight|tonight|dinner|night|now))
789 (\s?|$|[^0-9a-zA-Z])''' % ptc.re_values
454 ptc.RE_REMAINING = r'\s+' 790 ptc.RE_REMAINING = r'\s+'
455 791
456 # Regex for date/time ranges 792 # Regex for date/time ranges
457 793 ptc.RE_RTIMEHMS = r'''(\s?|^)
458 ptc.RE_RTIMEHMS = r'(\d\d?)%(timeseperator)s(\d\d)(%(timeseperator)s(\d\d))?' % ptc.re_values 794 (\d\d?)%(timeseperator)s
459 795 (\d\d)
460 ptc.RE_RTIMEHMS2 = r'(\d\d?)(%(timeseperator)s(\d\d?))?(%(timeseperator)s(\d\d?))?' % ptc.re_values 796 (%(timeseperator)s(\d\d))?
797 (\s?|$)''' % ptc.re_values
798 ptc.RE_RTIMEHMS2 = r'''(\s?|^)
799 (\d\d?)
800 (%(timeseperator)s(\d\d?))?
801 (%(timeseperator)s(\d\d?))?''' % ptc.re_values
461 802
462 if 'meridian' in ptc.re_values: 803 if 'meridian' in ptc.re_values:
463 ptc.RE_RTIMEHMS2 += r'\s?(%(meridian)s)' % ptc.re_values 804 ptc.RE_RTIMEHMS2 += r'\s?(%(meridian)s)' % ptc.re_values
464 805
465 ptc.RE_RDATE = r'(\d+([/.\\]\d+)+)' 806 ptc.RE_RDATE = r'(\d+([%s]\d+)+)' % dateSeps
466 ptc.RE_RDATE3 = r'((((%(months)s))\s?((\d\d?)(\s|%(daysuffix)s|,|$)+)?(\d\d\d\d)?))' % ptc.re_values 807 ptc.RE_RDATE3 = r'''((((%(months)s))\s?
467 ptc.DATERNG1 = ptc.RE_RDATE + r'\s?-\s?' + ptc.RE_RDATE # "06/07/06 - 08/09/06" 808 ((\d\d?)
468 ptc.DATERNG2 = ptc.RE_RDATE3 + r'\s?-\s?' + ptc.RE_RDATE3 # "march 31 - june 1st, 2006" 809 (\s?|%(daysuffix)s|$)+)?
469 ptc.DATERNG3 = ptc.RE_RDATE3 + r'\s?' + r'-' + r'\s?(\d\d?)\s?(rd|st|nd|th)?' % ptc.re_values # "march 1rd -13th" 810 (,\s?\d\d\d\d)?))''' % ptc.re_values
470 ptc.TIMERNG1 = ptc.RE_RTIMEHMS2 + r'\s?-\s?'+ ptc.RE_RTIMEHMS2 # "4:00:55 pm - 5:90:44 am",'4p-5p' 811
471 ptc.TIMERNG2 = ptc.RE_RTIMEHMS + r'\s?-\s?'+ ptc.RE_RTIMEHMS # "4:00 - 5:90 ","4:55:55-3:44:55" 812 # "06/07/06 - 08/09/06"
472 ptc.TIMERNG3 = r'\d\d?\s?-\s?'+ ptc.RE_RTIMEHMS2 # "4-5pm " 813 ptc.DATERNG1 = ptc.RE_RDATE + r'\s?%(rangeseperator)s\s?' + ptc.RE_RDATE
814 ptc.DATERNG1 = ptc.DATERNG1 % ptc.re_values
815
816 # "march 31 - june 1st, 2006"
817 ptc.DATERNG2 = ptc.RE_RDATE3 + r'\s?%(rangeseperator)s\s?' + ptc.RE_RDATE3
818 ptc.DATERNG2 = ptc.DATERNG2 % ptc.re_values
819
820 # "march 1rd -13th"
821 ptc.DATERNG3 = ptc.RE_RDATE3 + r'\s?%(rangeseperator)s\s?(\d\d?)\s?(rd|st|nd|th)?'
822 ptc.DATERNG3 = ptc.DATERNG3 % ptc.re_values
823
824 # "4:00:55 pm - 5:90:44 am", '4p-5p'
825 ptc.TIMERNG1 = ptc.RE_RTIMEHMS2 + r'\s?%(rangeseperator)s\s?' + ptc.RE_RTIMEHMS2
826 ptc.TIMERNG1 = ptc.TIMERNG1 % ptc.re_values
827
828 # "4:00 - 5:90 ", "4:55:55-3:44:55"
829 ptc.TIMERNG2 = ptc.RE_RTIMEHMS + r'\s?%(rangeseperator)s\s?' + ptc.RE_RTIMEHMS
830 ptc.TIMERNG2 = ptc.TIMERNG2 % ptc.re_values
831
832 # "4-5pm "
833 ptc.TIMERNG3 = r'\d\d?\s?%(rangeseperator)s\s?' + ptc.RE_RTIMEHMS2
834 ptc.TIMERNG3 = ptc.TIMERNG3 % ptc.re_values
835
836 # "4:30-5pm "
837 ptc.TIMERNG4 = ptc.RE_RTIMEHMS + r'\s?%(rangeseperator)s\s?' + ptc.RE_RTIMEHMS2
838 ptc.TIMERNG4 = ptc.TIMERNG4 % ptc.re_values
473 839
474 840
475 def _initConstants(ptc): 841 def _initConstants(ptc):
476 """ 842 """
477 Create localized versions of the units, week and month names 843 Create localized versions of the units, week and month names
478 """ 844 """
479 # build weekday offsets - yes, it assumes the Weekday and shortWeekday 845 # build weekday offsets - yes, it assumes the Weekday and shortWeekday
480 # lists are in the same order and Sun..Sat 846 # lists are in the same order and Mon..Sun (Python style)
481 ptc.WeekdayOffsets = {} 847 ptc.WeekdayOffsets = {}
482 848
483 o = 0 849 o = 0
484 for key in ptc.Weekdays: 850 for key in ptc.Weekdays:
485 ptc.WeekdayOffsets[key] = o 851 ptc.WeekdayOffsets[key] = o
490 o += 1 856 o += 1
491 857
492 # build month offsets - yes, it assumes the Months and shortMonths 858 # build month offsets - yes, it assumes the Months and shortMonths
493 # lists are in the same order and Jan..Dec 859 # lists are in the same order and Jan..Dec
494 ptc.MonthOffsets = {} 860 ptc.MonthOffsets = {}
495 ptc.DaysInMonth = {}
496 861
497 o = 1 862 o = 1
498 for key in ptc.Months: 863 for key in ptc.Months:
499 ptc.MonthOffsets[key] = o 864 ptc.MonthOffsets[key] = o
500 ptc.DaysInMonth[key] = ptc.DaysInMonthList[o - 1]
501 o += 1 865 o += 1
502 o = 1 866 o = 1
503 for key in ptc.shortMonths: 867 for key in ptc.shortMonths:
504 ptc.MonthOffsets[key] = o 868 ptc.MonthOffsets[key] = o
505 ptc.DaysInMonth[key] = ptc.DaysInMonthList[o - 1]
506 o += 1 869 o += 1
507 870
871 # ptc.DaySuffixes = ptc.re_consts['daysuffix'].split('|')
872
508 873
509 class Constants: 874 class Constants:
510 """ 875 """
511 Default set of constants for parsedatetime. 876 Default set of constants for parsedatetime.
512 877
513 If PyICU is present, then the class will initialize itself to 878 If PyICU is present, then the class will first try to get PyICU
514 the current default locale or to the locale specified by C{localeID}. 879 to return a locale specified by C{localeID}. If either C{localeID} is
515 880 None or if the locale does not exist within PyICU, then each of the
516 If PyICU is not present then the class will initialize itself to 881 locales defined in C{fallbackLocales} is tried in order.
517 en_US locale or if C{localeID} is passed in and the value matches one 882
518 of the defined pdtLocales then that will be used. 883 If PyICU is not present or none of the specified locales can be used,
519 """ 884 then the class will initialize itself to the en_US locale.
520 def __init__(self, localeID=None, usePyICU=True): 885
521 if localeID is None: 886 if PyICU is not present or not requested, only the locales defined by
522 self.localeID = 'en_US' 887 C{pdtLocales} will be searched.
523 else: 888 """
524 self.localeID = localeID 889 def __init__(self, localeID=None, usePyICU=True, fallbackLocales=['en_US']):
890 self.localeID = localeID
891 self.fallbackLocales = fallbackLocales
892
893 if 'en_US' not in self.fallbackLocales:
894 self.fallbackLocales.append('en_US')
525 895
526 # define non-locale specific constants 896 # define non-locale specific constants
527 897
528 self.locale = None 898 self.locale = None
529 self.usePyICU = usePyICU 899 self.usePyICU = usePyICU
900
901 # starting cache of leap years
902 # daysInMonth will add to this if during
903 # runtime it gets a request for a year not found
904 self._leapYears = [ 1904, 1908, 1912, 1916, 1920, 1924, 1928, 1932, 1936, 1940, 1944,
905 1948, 1952, 1956, 1960, 1964, 1968, 1972, 1976, 1980, 1984, 1988,
906 1992, 1996, 2000, 2004, 2008, 2012, 2016, 2020, 2024, 2028, 2032,
907 2036, 2040, 2044, 2048, 2052, 2056, 2060, 2064, 2068, 2072, 2076,
908 2080, 2084, 2088, 2092, 2096 ]
530 909
531 self.Second = 1 910 self.Second = 1
532 self.Minute = 60 * self.Second 911 self.Minute = 60 * self.Second
533 self.Hour = 60 * self.Minute 912 self.Hour = 60 * self.Minute
534 self.Day = 24 * self.Hour 913 self.Day = 24 * self.Hour
535 self.Week = 7 * self.Day 914 self.Week = 7 * self.Day
536 self.Month = 30 * self.Day 915 self.Month = 30 * self.Day
537 self.Year = 365 * self.Day 916 self.Year = 365 * self.Day
538 917
539 self.DaysInMonthList = (31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31) 918 self.rangeSep = u'-'
919
920 self._DaysInMonthList = (31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
921
922 self.BirthdayEpoch = 50
923
924 # DOWParseStyle controls how we parse "Tuesday"
925 # If the current day was Thursday and the text to parse is "Tuesday"
926 # then the following table shows how each style would be returned
927 # -1, 0, +1
928 #
929 # Current day marked as ***
930 #
931 # Sun Mon Tue Wed Thu Fri Sat
932 # week -1
933 # current -1,0 ***
934 # week +1 +1
935 #
936 # If the current day was Monday and the text to parse is "Tuesday"
937 # then the following table shows how each style would be returned
938 # -1, 0, +1
939 #
940 # Sun Mon Tue Wed Thu Fri Sat
941 # week -1 -1
942 # current *** 0,+1
943 # week +1
944
945 self.DOWParseStyle = 1
946
947 # CurrentDOWParseStyle controls how we parse "Friday"
948 # If the current day was Friday and the text to parse is "Friday"
949 # then the following table shows how each style would be returned
950 # True/False. This also depends on DOWParseStyle.
951 #
952 # Current day marked as ***
953 #
954 # DOWParseStyle = 0
955 # Sun Mon Tue Wed Thu Fri Sat
956 # week -1
957 # current T,F
958 # week +1
959 #
960 # DOWParseStyle = -1
961 # Sun Mon Tue Wed Thu Fri Sat
962 # week -1 F
963 # current T
964 # week +1
965 #
966 # DOWParseStyle = +1
967 #
968 # Sun Mon Tue Wed Thu Fri Sat
969 # week -1
970 # current T
971 # week +1 F
972
973 self.CurrentDOWParseStyle = False
974
975 # initalize attributes to empty values to ensure
976 # they are defined
977 self.re_sources = None
978 self.re_values = None
979 self.Modifiers = None
980 self.dayOffsets = None
981 self.WeekdayOffsets = None
982 self.MonthOffsets = None
983 self.dateSep = None
984 self.timeSep = None
985 self.am = None
986 self.pm = None
987 self.meridian = None
988 self.usesMeridian = None
989 self.uses24 = None
990 self.dp_order = None
991
992 self.RE_DATE4 = r''
993 self.RE_DATE3 = r''
994 self.RE_MONTH = r''
995 self.RE_WEEKDAY = r''
996 self.RE_SPECIAL = r''
997 self.RE_UNITS = r''
998 self.RE_QUNITS = r''
999 self.RE_MODIFIER = r''
1000 self.RE_MODIFIER2 = r''
1001 self.RE_TIMEHMS = r''
1002 self.RE_TIMEHMS2 = r''
1003 self.RE_DATE = r''
1004 self.RE_DATE2 = r''
1005 self.RE_DAY = r''
1006 self.RE_DAY2 = r''
1007 self.RE_TIME = r''
1008 self.RE_REMAINING = r''
1009 self.RE_RTIMEHMS = r''
1010 self.RE_RTIMEHMS2 = r''
1011 self.RE_RDATE = r''
1012 self.RE_RDATE3 = r''
1013 self.DATERNG1 = r''
1014 self.DATERNG2 = r''
1015 self.DATERNG3 = r''
1016 self.TIMERNG1 = r''
1017 self.TIMERNG2 = r''
1018 self.TIMERNG3 = r''
1019 self.TIMERNG4 = r''
540 1020
541 _initLocale(self) 1021 _initLocale(self)
542 _initConstants(self) 1022 _initConstants(self)
543 _initSymbols(self) 1023 _initSymbols(self)
544 _initPatterns(self) 1024 _initPatterns(self)
545 1025
1026 self.re_option = re.IGNORECASE + re.VERBOSE
1027 self.cre_source = { 'CRE_SPECIAL': self.RE_SPECIAL,
1028 'CRE_UNITS': self.RE_UNITS,
1029 'CRE_QUNITS': self.RE_QUNITS,
1030 'CRE_MODIFIER': self.RE_MODIFIER,
1031 'CRE_MODIFIER2': self.RE_MODIFIER2,
1032 'CRE_TIMEHMS': self.RE_TIMEHMS,
1033 'CRE_TIMEHMS2': self.RE_TIMEHMS2,
1034 'CRE_DATE': self.RE_DATE,
1035 'CRE_DATE2': self.RE_DATE2,
1036 'CRE_DATE3': self.RE_DATE3,
1037 'CRE_DATE4': self.RE_DATE4,
1038 'CRE_MONTH': self.RE_MONTH,
1039 'CRE_WEEKDAY': self.RE_WEEKDAY,
1040 'CRE_DAY': self.RE_DAY,
1041 'CRE_DAY2': self.RE_DAY2,
1042 'CRE_TIME': self.RE_TIME,
1043 'CRE_REMAINING': self.RE_REMAINING,
1044 'CRE_RTIMEHMS': self.RE_RTIMEHMS,
1045 'CRE_RTIMEHMS2': self.RE_RTIMEHMS2,
1046 'CRE_RDATE': self.RE_RDATE,
1047 'CRE_RDATE3': self.RE_RDATE3,
1048 'CRE_TIMERNG1': self.TIMERNG1,
1049 'CRE_TIMERNG2': self.TIMERNG2,
1050 'CRE_TIMERNG3': self.TIMERNG3,
1051 'CRE_TIMERNG4': self.TIMERNG4,
1052 'CRE_DATERNG1': self.DATERNG1,
1053 'CRE_DATERNG2': self.DATERNG2,
1054 'CRE_DATERNG3': self.DATERNG3,
1055 }
1056 self.cre_keys = self.cre_source.keys()
1057
1058
1059 def __getattr__(self, name):
1060 if name in self.cre_keys:
1061 value = re.compile(self.cre_source[name], self.re_option)
1062 setattr(self, name, value)
1063 return value
1064 else:
1065 raise AttributeError, name
1066
1067 def daysInMonth(self, month, year):
1068 """
1069 Take the given month (1-12) and a given year (4 digit) return
1070 the number of days in the month adjusting for leap year as needed
1071 """
1072 result = None
1073
1074 if month > 0 and month <= 12:
1075 result = self._DaysInMonthList[month - 1]
1076
1077 if month == 2:
1078 if year in self._leapYears:
1079 result += 1
1080 else:
1081 if calendar.isleap(year):
1082 self._leapYears.append(year)
1083 result += 1
1084
1085 return result
546 1086
547 def buildSources(self, sourceTime=None): 1087 def buildSources(self, sourceTime=None):
548 """ 1088 """
549 Return a dictionary of date/time tuples based on the keys 1089 Return a dictionary of date/time tuples based on the keys
550 found in self.re_sources. 1090 found in self.re_sources.
561 sources = {} 1101 sources = {}
562 defaults = { 'yr': yr, 'mth': mth, 'dy': dy, 1102 defaults = { 'yr': yr, 'mth': mth, 'dy': dy,
563 'hr': hr, 'mn': mn, 'sec': sec, } 1103 'hr': hr, 'mn': mn, 'sec': sec, }
564 1104
565 for item in self.re_sources: 1105 for item in self.re_sources:
566 values = self.re_sources[item] 1106 values = {}
1107 source = self.re_sources[item]
567 1108
568 for key in defaults.keys(): 1109 for key in defaults.keys():
569 if not key in values: 1110 if key in source:
1111 values[key] = source[key]
1112 else:
570 values[key] = defaults[key] 1113 values[key] = defaults[key]
571 1114
572 sources[item] = ( values['yr'], values['mth'], values['dy'], 1115 sources[item] = ( values['yr'], values['mth'], values['dy'],
573 values['hr'], values['mn'], values['sec'], wd, yd, isdst ) 1116 values['hr'], values['mn'], values['sec'], wd, yd, isdst )
574 1117