comparison MoinMoin/support/parsedatetime/__init__.py @ 6098:83b1bc99457c

upgrade parsedatetime from 0.8.7 to 2.1
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Tue, 06 Sep 2016 00:09:31 +0200
parents 0a6fe22644e3
children
comparison
equal deleted inserted replaced
6097:815981fad7fd 6098:83b1bc99457c
1 # -*- coding: utf-8 -*-
2 #
3 # vim: sw=2 ts=2 sts=2
4 #
5 # Copyright 2004-2016 Mike Taylor
6 #
7 # Licensed under the Apache License, Version 2.0 (the "License");
8 # you may not use this file except in compliance with the License.
9 # You may obtain a copy of the License at
10 #
11 # http://www.apache.org/licenses/LICENSE-2.0
12 #
13 # Unless required by applicable law or agreed to in writing, software
14 # distributed under the License is distributed on an "AS IS" BASIS,
15 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 # See the License for the specific language governing permissions and
17 # limitations under the License.
18
19 """parsedatetime
20
21 Parse human-readable date/time text.
22
23 Requires Python 2.6 or later
1 """ 24 """
2 parsedatetime.py contains the C{Calendar} class where the C{parse()} 25
3 method can be found. 26 from __future__ import with_statement, absolute_import, unicode_literals
4 27
5 parsedatetime_consts.py contains the C{Constants} class that builds the 28 import re
6 various regex values using locale information if available. 29 import time
7 """ 30 import logging
8 31 import warnings
9 version = '0.8.7' 32 import datetime
10 author = 'Mike Taylor and Darshana Chhajed' 33 import calendar
11 license = """ 34 import contextlib
12 Copyright (c) 2004-2008 Mike Taylor 35 import email.utils
13 Copyright (c) 2006-2008 Darshana Chhajed 36
14 All rights reserved. 37 from .pdt_locales import (locales as _locales,
15 38 get_icu, load_locale)
16 Licensed under the Apache License, Version 2.0 (the "License"); 39 from .context import pdtContext, pdtContextStack
17 you may not use this file except in compliance with the License. 40 from .warns import pdt20DeprecationWarning
18 You may obtain a copy of the License at 41
19 42
20 http://www.apache.org/licenses/LICENSE-2.0 43 __author__ = 'Mike Taylor'
21 44 __email__ = 'bear@bear.im'
22 Unless required by applicable law or agreed to in writing, software 45 __copyright__ = 'Copyright (c) 2016 Mike Taylor'
23 distributed under the License is distributed on an "AS IS" BASIS, 46 __license__ = 'Apache License 2.0'
24 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 47 __version__ = '2.1'
25 See the License for the specific language governing permissions and 48 __url__ = 'https://github.com/bear/parsedatetime'
26 limitations under the License. 49 __download_url__ = 'https://pypi.python.org/pypi/parsedatetime'
27 """ 50 __description__ = 'Parse human-readable date/time text.'
28 51
52 # as a library, do *not* setup logging
53 # see docs.python.org/2/howto/logging.html#configuring-logging-for-a-library
54 # Set default logging handler to avoid "No handler found" warnings.
55
56 try: # Python 2.7+
57 from logging import NullHandler
58 except ImportError:
59 class NullHandler(logging.Handler):
60
61 def emit(self, record):
62 pass
63
64 log = logging.getLogger(__name__)
65 log.addHandler(NullHandler())
66
67 debug = False
68
69 pdtLocales = dict([(x, load_locale(x)) for x in _locales])
70
71
72 # Copied from feedparser.py
73 # Universal Feedparser
74 # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
75 # Originally a def inside of _parse_date_w3dtf()
76 def _extract_date(m):
77 year = int(m.group('year'))
78 if year < 100:
79 year = 100 * int(time.gmtime()[0] / 100) + int(year)
80 if year < 1000:
81 return 0, 0, 0
82 julian = m.group('julian')
83 if julian:
84 julian = int(julian)
85 month = julian / 30 + 1
86 day = julian % 30 + 1
87 jday = None
88 while jday != julian:
89 t = time.mktime((year, month, day, 0, 0, 0, 0, 0, 0))
90 jday = time.gmtime(t)[-2]
91 diff = abs(jday - julian)
92 if jday > julian:
93 if diff < day:
94 day = day - diff
95 else:
96 month = month - 1
97 day = 31
98 elif jday < julian:
99 if day + diff < 28:
100 day = day + diff
101 else:
102 month = month + 1
103 return year, month, day
104 month = m.group('month')
105 day = 1
106 if month is None:
107 month = 1
108 else:
109 month = int(month)
110 day = m.group('day')
111 if day:
112 day = int(day)
113 else:
114 day = 1
115 return year, month, day
116
117
118 # Copied from feedparser.py
119 # Universal Feedparser
120 # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
121 # Originally a def inside of _parse_date_w3dtf()
122 def _extract_time(m):
123 if not m:
124 return 0, 0, 0
125 hours = m.group('hours')
126 if not hours:
127 return 0, 0, 0
128 hours = int(hours)
129 minutes = int(m.group('minutes'))
130 seconds = m.group('seconds')
131 if seconds:
132 seconds = seconds.replace(',', '.').split('.', 1)[0]
133 seconds = int(seconds)
134 else:
135 seconds = 0
136 return hours, minutes, seconds
137
138
139 def _pop_time_accuracy(m, ctx):
140 if not m:
141 return
142 if m.group('hours'):
143 ctx.updateAccuracy(ctx.ACU_HOUR)
144 if m.group('minutes'):
145 ctx.updateAccuracy(ctx.ACU_MIN)
146 if m.group('seconds'):
147 ctx.updateAccuracy(ctx.ACU_SEC)
148
149
150 # Copied from feedparser.py
151 # Universal Feedparser
152 # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
153 # Modified to return a tuple instead of mktime
154 #
155 # Original comment:
156 # W3DTF-style date parsing adapted from PyXML xml.utils.iso8601, written by
157 # Drake and licensed under the Python license. Removed all range checking
158 # for month, day, hour, minute, and second, since mktime will normalize
159 # these later
160 def __closure_parse_date_w3dtf():
161 # the __extract_date and __extract_time methods were
162 # copied-out so they could be used by my code --bear
163 def __extract_tzd(m):
164 '''Return the Time Zone Designator as an offset in seconds from UTC.'''
165 if not m:
166 return 0
167 tzd = m.group('tzd')
168 if not tzd:
169 return 0
170 if tzd == 'Z':
171 return 0
172 hours = int(m.group('tzdhours'))
173 minutes = m.group('tzdminutes')
174 if minutes:
175 minutes = int(minutes)
176 else:
177 minutes = 0
178 offset = (hours * 60 + minutes) * 60
179 if tzd[0] == '+':
180 return -offset
181 return offset
182
183 def _parse_date_w3dtf(dateString):
184 m = __datetime_rx.match(dateString)
185 if m is None or m.group() != dateString:
186 return
187 return _extract_date(m) + _extract_time(m) + (0, 0, 0)
188
189 __date_re = (r'(?P<year>\d\d\d\d)'
190 r'(?:(?P<dsep>-|)'
191 r'(?:(?P<julian>\d\d\d)'
192 r'|(?P<month>\d\d)(?:(?P=dsep)(?P<day>\d\d))?))?')
193 __tzd_re = r'(?P<tzd>[-+](?P<tzdhours>\d\d)(?::?(?P<tzdminutes>\d\d))|Z)'
194 # __tzd_rx = re.compile(__tzd_re)
195 __time_re = (r'(?P<hours>\d\d)(?P<tsep>:|)(?P<minutes>\d\d)'
196 r'(?:(?P=tsep)(?P<seconds>\d\d(?:[.,]\d+)?))?' +
197 __tzd_re)
198 __datetime_re = '%s(?:T%s)?' % (__date_re, __time_re)
199 __datetime_rx = re.compile(__datetime_re)
200
201 return _parse_date_w3dtf
202
203
204 _parse_date_w3dtf = __closure_parse_date_w3dtf()
205 del __closure_parse_date_w3dtf
206
207 _monthnames = set([
208 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
209 'aug', 'sep', 'oct', 'nov', 'dec',
210 'january', 'february', 'march', 'april', 'may', 'june', 'july',
211 'august', 'september', 'october', 'november', 'december'])
212 _daynames = set(['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'])
213
214
215 # Copied from feedparser.py
216 # Universal Feedparser
217 # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved.
218 # Modified to return a tuple instead of mktime
219 def _parse_date_rfc822(dateString):
220 '''Parse an RFC822, RFC1123, RFC2822, or asctime-style date'''
221 data = dateString.split()
222 if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
223 del data[0]
224 if len(data) == 4:
225 s = data[3]
226 s = s.split('+', 1)
227 if len(s) == 2:
228 data[3:] = s
229 else:
230 data.append('')
231 dateString = " ".join(data)
232 if len(data) < 5:
233 dateString += ' 00:00:00 GMT'
234 return email.utils.parsedate_tz(dateString)
235
236
237 # rfc822.py defines several time zones, but we define some extra ones.
238 # 'ET' is equivalent to 'EST', etc.
239 # _additional_timezones = {'AT': -400, 'ET': -500,
240 # 'CT': -600, 'MT': -700,
241 # 'PT': -800}
242 # email.utils._timezones.update(_additional_timezones)
243
244 VERSION_FLAG_STYLE = 1
245 VERSION_CONTEXT_STYLE = 2
246
247
248 class Calendar(object):
249
250 """
251 A collection of routines to input, parse and manipulate date and times.
252 The text can either be 'normal' date values or it can be human readable.
253 """
254
255 def __init__(self, constants=None, version=VERSION_FLAG_STYLE):
256 """
257 Default constructor for the L{Calendar} class.
258
259 @type constants: object
260 @param constants: Instance of the class L{Constants}
261 @type version: integer
262 @param version: Default style version of current Calendar instance.
263 Valid value can be 1 (L{VERSION_FLAG_STYLE}) or
264 2 (L{VERSION_CONTEXT_STYLE}). See L{parse()}.
265
266 @rtype: object
267 @return: L{Calendar} instance
268 """
269 # if a constants reference is not included, use default
270 if constants is None:
271 self.ptc = Constants()
272 else:
273 self.ptc = constants
274
275 self.version = version
276 if version == VERSION_FLAG_STYLE:
277 warnings.warn(
278 'Flag style will be deprecated in parsedatetime 2.0. '
279 'Instead use the context style by instantiating `Calendar()` '
280 'with argument `version=parsedatetime.VERSION_CONTEXT_STYLE`.',
281 pdt20DeprecationWarning)
282 self._ctxStack = pdtContextStack()
283
284 @contextlib.contextmanager
285 def context(self):
286 ctx = pdtContext()
287 self._ctxStack.push(ctx)
288 yield ctx
289 ctx = self._ctxStack.pop()
290 if not self._ctxStack.isEmpty():
291 self.currentContext.update(ctx)
292
293 @property
294 def currentContext(self):
295 return self._ctxStack.last()
296
297 def _convertUnitAsWords(self, unitText):
298 """
299 Converts text units into their number value.
300
301 @type unitText: string
302 @param unitText: number text to convert
303
304 @rtype: integer
305 @return: numerical value of unitText
306 """
307 word_list, a, b = re.split(r"[,\s-]+", unitText), 0, 0
308 for word in word_list:
309 x = self.ptc.small.get(word)
310 if x is not None:
311 a += x
312 elif word == "hundred":
313 a *= 100
314 else:
315 x = self.ptc.magnitude.get(word)
316 if x is not None:
317 b += a * x
318 a = 0
319 elif word in self.ptc.ignore:
320 pass
321 else:
322 raise Exception("Unknown number: " + word)
323 return a + b
324
325 def _buildTime(self, source, quantity, modifier, units):
326 """
327 Take C{quantity}, C{modifier} and C{unit} strings and convert them
328 into values. After converting, calcuate the time and return the
329 adjusted sourceTime.
330
331 @type source: time
332 @param source: time to use as the base (or source)
333 @type quantity: string
334 @param quantity: quantity string
335 @type modifier: string
336 @param modifier: how quantity and units modify the source time
337 @type units: string
338 @param units: unit of the quantity (i.e. hours, days, months, etc)
339
340 @rtype: struct_time
341 @return: C{struct_time} of the calculated time
342 """
343 ctx = self.currentContext
344 debug and log.debug('_buildTime: [%s][%s][%s]',
345 quantity, modifier, units)
346
347 if source is None:
348 source = time.localtime()
349
350 if quantity is None:
351 quantity = ''
352 else:
353 quantity = quantity.strip()
354
355 qty = self._quantityToReal(quantity)
356
357 if modifier in self.ptc.Modifiers:
358 qty = qty * self.ptc.Modifiers[modifier]
359
360 if units is None or units == '':
361 units = 'dy'
362
363 # plurals are handled by regex's (could be a bug tho)
364
365 (yr, mth, dy, hr, mn, sec, _, _, _) = source
366
367 start = datetime.datetime(yr, mth, dy, hr, mn, sec)
368 target = start
369 # realunit = next((key for key, values in self.ptc.units.items()
370 # if any(imap(units.__contains__, values))), None)
371 realunit = units
372 for key, values in self.ptc.units.items():
373 if units in values:
374 realunit = key
375 break
376
377 debug and log.debug('units %s --> realunit %s (qty=%s)',
378 units, realunit, qty)
379
380 try:
381 if realunit in ('years', 'months'):
382 target = self.inc(start, **{realunit[:-1]: qty})
383 elif realunit in ('days', 'hours', 'minutes', 'seconds', 'weeks'):
384 delta = datetime.timedelta(**{realunit: qty})
385 target = start + delta
386 except OverflowError:
387 # OverflowError is raise when target.year larger than 9999
388 pass
389 else:
390 ctx.updateAccuracy(realunit)
391
392 return target.timetuple()
393
394 def parseDate(self, dateString, sourceTime=None):
395 """
396 Parse short-form date strings::
397
398 '05/28/2006' or '04.21'
399
400 @type dateString: string
401 @param dateString: text to convert to a C{datetime}
402 @type sourceTime: struct_time
403 @param sourceTime: C{struct_time} value to use as the base
404
405 @rtype: struct_time
406 @return: calculated C{struct_time} value of dateString
407 """
408 if sourceTime is None:
409 yr, mth, dy, hr, mn, sec, wd, yd, isdst = time.localtime()
410 else:
411 yr, mth, dy, hr, mn, sec, wd, yd, isdst = sourceTime
412
413 # values pulled from regex's will be stored here and later
414 # assigned to mth, dy, yr based on information from the locale
415 # -1 is used as the marker value because we want zero values
416 # to be passed thru so they can be flagged as errors later
417 v1 = -1
418 v2 = -1
419 v3 = -1
420 accuracy = []
421
422 s = dateString
423 m = self.ptc.CRE_DATE2.search(s)
424 if m is not None:
425 index = m.start()
426 v1 = int(s[:index])
427 s = s[index + 1:]
428
429 m = self.ptc.CRE_DATE2.search(s)
430 if m is not None:
431 index = m.start()
432 v2 = int(s[:index])
433 v3 = int(s[index + 1:])
434 else:
435 v2 = int(s.strip())
436
437 v = [v1, v2, v3]
438 d = {'m': mth, 'd': dy, 'y': yr}
439
440 # yyyy/mm/dd format
441 dp_order = self.ptc.dp_order if v1 <= 31 else ['y', 'm', 'd']
442
443 for i in range(0, 3):
444 n = v[i]
445 c = dp_order[i]
446 if n >= 0:
447 d[c] = n
448 accuracy.append({'m': pdtContext.ACU_MONTH,
449 'd': pdtContext.ACU_DAY,
450 'y': pdtContext.ACU_YEAR}[c])
451
452 # if the year is not specified and the date has already
453 # passed, increment the year
454 if v3 == -1 and ((mth > d['m']) or (mth == d['m'] and dy > d['d'])):
455 yr = d['y'] + self.ptc.YearParseStyle
456 else:
457 yr = d['y']
458
459 mth = d['m']
460 dy = d['d']
461
462 # birthday epoch constraint
463 if yr < self.ptc.BirthdayEpoch:
464 yr += 2000
465 elif yr < 100:
466 yr += 1900
467
468 daysInCurrentMonth = self.ptc.daysInMonth(mth, yr)
469 debug and log.debug('parseDate: %s %s %s %s',
470 yr, mth, dy, daysInCurrentMonth)
471
472 with self.context() as ctx:
473 if mth > 0 and mth <= 12 and dy > 0 and \
474 dy <= daysInCurrentMonth:
475 sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst)
476 ctx.updateAccuracy(*accuracy)
477 else:
478 # return current time if date string is invalid
479 sourceTime = time.localtime()
480
481 return sourceTime
482
483 def parseDateText(self, dateString, sourceTime=None):
484 """
485 Parse long-form date strings::
486
487 'May 31st, 2006'
488 'Jan 1st'
489 'July 2006'
490
491 @type dateString: string
492 @param dateString: text to convert to a datetime
493 @type sourceTime: struct_time
494 @param sourceTime: C{struct_time} value to use as the base
495
496 @rtype: struct_time
497 @return: calculated C{struct_time} value of dateString
498 """
499 if sourceTime is None:
500 yr, mth, dy, hr, mn, sec, wd, yd, isdst = time.localtime()
501 else:
502 yr, mth, dy, hr, mn, sec, wd, yd, isdst = sourceTime
503
504 currentMth = mth
505 currentDy = dy
506 accuracy = []
507
508 debug and log.debug('parseDateText currentMth %s currentDy %s',
509 mth, dy)
510
511 s = dateString.lower()
512 m = self.ptc.CRE_DATE3.search(s)
513 mth = m.group('mthname')
514 mth = self.ptc.MonthOffsets[mth]
515 accuracy.append('month')
516
517 if m.group('day') is not None:
518 dy = int(m.group('day'))
519 accuracy.append('day')
520 else:
521 dy = 1
522
523 if m.group('year') is not None:
524 yr = int(m.group('year'))
525 accuracy.append('year')
526
527 # birthday epoch constraint
528 if yr < self.ptc.BirthdayEpoch:
529 yr += 2000
530 elif yr < 100:
531 yr += 1900
532
533 elif (mth < currentMth) or (mth == currentMth and dy < currentDy):
534 # if that day and month have already passed in this year,
535 # then increment the year by 1
536 yr += self.ptc.YearParseStyle
537
538 with self.context() as ctx:
539 if dy > 0 and dy <= self.ptc.daysInMonth(mth, yr):
540 sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst)
541 ctx.updateAccuracy(*accuracy)
542 else:
543 # Return current time if date string is invalid
544 sourceTime = time.localtime()
545
546 debug and log.debug('parseDateText returned '
547 'mth %d dy %d yr %d sourceTime %s',
548 mth, dy, yr, sourceTime)
549
550 return sourceTime
551
552 def evalRanges(self, datetimeString, sourceTime=None):
553 """
554 Evaluate the C{datetimeString} text and determine if
555 it represents a date or time range.
556
557 @type datetimeString: string
558 @param datetimeString: datetime text to evaluate
559 @type sourceTime: struct_time
560 @param sourceTime: C{struct_time} value to use as the base
561
562 @rtype: tuple
563 @return: tuple of: start datetime, end datetime and the invalid flag
564 """
565 rangeFlag = retFlag = 0
566 startStr = endStr = ''
567
568 s = datetimeString.strip().lower()
569
570 if self.ptc.rangeSep in s:
571 s = s.replace(self.ptc.rangeSep, ' %s ' % self.ptc.rangeSep)
572 s = s.replace(' ', ' ')
573
574 for cre, rflag in [(self.ptc.CRE_TIMERNG1, 1),
575 (self.ptc.CRE_TIMERNG2, 2),
576 (self.ptc.CRE_TIMERNG4, 7),
577 (self.ptc.CRE_TIMERNG3, 3),
578 (self.ptc.CRE_DATERNG1, 4),
579 (self.ptc.CRE_DATERNG2, 5),
580 (self.ptc.CRE_DATERNG3, 6)]:
581 m = cre.search(s)
582 if m is not None:
583 rangeFlag = rflag
584 break
585
586 debug and log.debug('evalRanges: rangeFlag = %s [%s]', rangeFlag, s)
587
588 if m is not None:
589 if (m.group() != s):
590 # capture remaining string
591 parseStr = m.group()
592 chunk1 = s[:m.start()]
593 chunk2 = s[m.end():]
594 s = '%s %s' % (chunk1, chunk2)
595
596 sourceTime, ctx = self.parse(s, sourceTime,
597 VERSION_CONTEXT_STYLE)
598
599 if not ctx.hasDateOrTime:
600 sourceTime = None
601 else:
602 parseStr = s
603
604 if rangeFlag in (1, 2):
605 m = re.search(self.ptc.rangeSep, parseStr)
606 startStr = parseStr[:m.start()]
607 endStr = parseStr[m.start() + 1:]
608 retFlag = 2
609
610 elif rangeFlag in (3, 7):
611 m = re.search(self.ptc.rangeSep, parseStr)
612 # capturing the meridian from the end time
613 if self.ptc.usesMeridian:
614 ampm = re.search(self.ptc.am[0], parseStr)
615
616 # appending the meridian to the start time
617 if ampm is not None:
618 startStr = parseStr[:m.start()] + self.ptc.meridian[0]
619 else:
620 startStr = parseStr[:m.start()] + self.ptc.meridian[1]
621 else:
622 startStr = parseStr[:m.start()]
623
624 endStr = parseStr[m.start() + 1:]
625 retFlag = 2
626
627 elif rangeFlag == 4:
628 m = re.search(self.ptc.rangeSep, parseStr)
629 startStr = parseStr[:m.start()]
630 endStr = parseStr[m.start() + 1:]
631 retFlag = 1
632
633 elif rangeFlag == 5:
634 m = re.search(self.ptc.rangeSep, parseStr)
635 endStr = parseStr[m.start() + 1:]
636
637 # capturing the year from the end date
638 date = self.ptc.CRE_DATE3.search(endStr)
639 endYear = date.group('year')
640
641 # appending the year to the start date if the start date
642 # does not have year information and the end date does.
643 # eg : "Aug 21 - Sep 4, 2007"
644 if endYear is not None:
645 startStr = (parseStr[:m.start()]).strip()
646 date = self.ptc.CRE_DATE3.search(startStr)
647 startYear = date.group('year')
648
649 if startYear is None:
650 startStr = startStr + ', ' + endYear
651 else:
652 startStr = parseStr[:m.start()]
653
654 retFlag = 1
655
656 elif rangeFlag == 6:
657 m = re.search(self.ptc.rangeSep, parseStr)
658
659 startStr = parseStr[:m.start()]
660
661 # capturing the month from the start date
662 mth = self.ptc.CRE_DATE3.search(startStr)
663 mth = mth.group('mthname')
664
665 # appending the month name to the end date
666 endStr = mth + parseStr[(m.start() + 1):]
667
668 retFlag = 1
669
670 else:
671 # if range is not found
672 startDT = endDT = time.localtime()
673
674 if retFlag:
675 startDT, sctx = self.parse(startStr, sourceTime,
676 VERSION_CONTEXT_STYLE)
677 endDT, ectx = self.parse(endStr, sourceTime,
678 VERSION_CONTEXT_STYLE)
679
680 if not sctx.hasDateOrTime or not ectx.hasDateOrTime:
681 retFlag = 0
682
683 return startDT, endDT, retFlag
684
685 def _CalculateDOWDelta(self, wd, wkdy, offset, style, currentDayStyle):
686 """
687 Based on the C{style} and C{currentDayStyle} determine what
688 day-of-week value is to be returned.
689
690 @type wd: integer
691 @param wd: day-of-week value for the current day
692 @type wkdy: integer
693 @param wkdy: day-of-week value for the parsed day
694 @type offset: integer
695 @param offset: offset direction for any modifiers (-1, 0, 1)
696 @type style: integer
697 @param style: normally the value
698 set in C{Constants.DOWParseStyle}
699 @type currentDayStyle: integer
700 @param currentDayStyle: normally the value
701 set in C{Constants.CurrentDOWParseStyle}
702
703 @rtype: integer
704 @return: calculated day-of-week
705 """
706 diffBase = wkdy - wd
707 origOffset = offset
708
709 if offset == 2:
710 # no modifier is present.
711 # i.e. string to be parsed is just DOW
712 if wkdy * style > wd * style or \
713 currentDayStyle and wkdy == wd:
714 # wkdy located in current week
715 offset = 0
716 elif style in (-1, 1):
717 # wkdy located in last (-1) or next (1) week
718 offset = style
719 else:
720 # invalid style, or should raise error?
721 offset = 0
722
723 # offset = -1 means last week
724 # offset = 0 means current week
725 # offset = 1 means next week
726 diff = diffBase + 7 * offset
727 if style == 1 and diff < -7:
728 diff += 7
729 elif style == -1 and diff > 7:
730 diff -= 7
731
732 debug and log.debug("wd %s, wkdy %s, offset %d, "
733 "style %d, currentDayStyle %d",
734 wd, wkdy, origOffset, style, currentDayStyle)
735
736 return diff
737
738 def _quantityToReal(self, quantity):
739 """
740 Convert a quantity, either spelled-out or numeric, to a float
741
742 @type quantity: string
743 @param quantity: quantity to parse to float
744 @rtype: int
745 @return: the quantity as an float, defaulting to 0.0
746 """
747 if not quantity:
748 return 1.0
749
750 try:
751 return float(quantity.replace(',', '.'))
752 except ValueError:
753 pass
754
755 try:
756 return float(self.ptc.numbers[quantity])
757 except KeyError:
758 pass
759
760 return 0.0
761
762 def _evalModifier(self, modifier, chunk1, chunk2, sourceTime):
763 """
764 Evaluate the C{modifier} string and following text (passed in
765 as C{chunk1} and C{chunk2}) and if they match any known modifiers
766 calculate the delta and apply it to C{sourceTime}.
767
768 @type modifier: string
769 @param modifier: modifier text to apply to sourceTime
770 @type chunk1: string
771 @param chunk1: text chunk that preceded modifier (if any)
772 @type chunk2: string
773 @param chunk2: text chunk that followed modifier (if any)
774 @type sourceTime: struct_time
775 @param sourceTime: C{struct_time} value to use as the base
776
777 @rtype: tuple
778 @return: tuple of: remaining text and the modified sourceTime
779 """
780 ctx = self.currentContext
781 offset = self.ptc.Modifiers[modifier]
782
783 if sourceTime is not None:
784 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime
785 else:
786 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = time.localtime()
787
788 if self.ptc.StartTimeFromSourceTime:
789 startHour = hr
790 startMinute = mn
791 startSecond = sec
792 else:
793 startHour = 9
794 startMinute = 0
795 startSecond = 0
796
797 # capture the units after the modifier and the remaining
798 # string after the unit
799 m = self.ptc.CRE_REMAINING.search(chunk2)
800 if m is not None:
801 index = m.start() + 1
802 unit = chunk2[:m.start()]
803 chunk2 = chunk2[index:]
804 else:
805 unit = chunk2
806 chunk2 = ''
807
808 debug and log.debug("modifier [%s] chunk1 [%s] "
809 "chunk2 [%s] unit [%s]",
810 modifier, chunk1, chunk2, unit)
811
812 if unit in self.ptc.units['months']:
813 currentDaysInMonth = self.ptc.daysInMonth(mth, yr)
814 if offset == 0:
815 dy = currentDaysInMonth
816 sourceTime = (yr, mth, dy, startHour, startMinute,
817 startSecond, wd, yd, isdst)
818 elif offset == 2:
819 # if day is the last day of the month, calculate the last day
820 # of the next month
821 if dy == currentDaysInMonth:
822 dy = self.ptc.daysInMonth(mth + 1, yr)
823
824 start = datetime.datetime(yr, mth, dy, startHour,
825 startMinute, startSecond)
826 target = self.inc(start, month=1)
827 sourceTime = target.timetuple()
828 else:
829 start = datetime.datetime(yr, mth, 1, startHour,
830 startMinute, startSecond)
831 target = self.inc(start, month=offset)
832 sourceTime = target.timetuple()
833 ctx.updateAccuracy(ctx.ACU_MONTH)
834
835 elif unit in self.ptc.units['weeks']:
836 if offset == 0:
837 start = datetime.datetime(yr, mth, dy, 17, 0, 0)
838 target = start + datetime.timedelta(days=(4 - wd))
839 sourceTime = target.timetuple()
840 elif offset == 2:
841 start = datetime.datetime(yr, mth, dy, startHour,
842 startMinute, startSecond)
843 target = start + datetime.timedelta(days=7)
844 sourceTime = target.timetuple()
845 else:
846 start = datetime.datetime(yr, mth, dy, startHour,
847 startMinute, startSecond)
848 target = start + offset * datetime.timedelta(weeks=1)
849 sourceTime = target.timetuple()
850 ctx.updateAccuracy(ctx.ACU_WEEK)
851
852 elif unit in self.ptc.units['days']:
853 if offset == 0:
854 sourceTime = (yr, mth, dy, 17, 0, 0, wd, yd, isdst)
855 ctx.updateAccuracy(ctx.ACU_HALFDAY)
856 elif offset == 2:
857 start = datetime.datetime(yr, mth, dy, hr, mn, sec)
858 target = start + datetime.timedelta(days=1)
859 sourceTime = target.timetuple()
860 else:
861 start = datetime.datetime(yr, mth, dy, startHour,
862 startMinute, startSecond)
863 target = start + datetime.timedelta(days=offset)
864 sourceTime = target.timetuple()
865 ctx.updateAccuracy(ctx.ACU_DAY)
866
867 elif unit in self.ptc.units['hours']:
868 if offset == 0:
869 sourceTime = (yr, mth, dy, hr, 0, 0, wd, yd, isdst)
870 else:
871 start = datetime.datetime(yr, mth, dy, hr, 0, 0)
872 target = start + datetime.timedelta(hours=offset)
873 sourceTime = target.timetuple()
874 ctx.updateAccuracy(ctx.ACU_HOUR)
875
876 elif unit in self.ptc.units['years']:
877 if offset == 0:
878 sourceTime = (yr, 12, 31, hr, mn, sec, wd, yd, isdst)
879 elif offset == 2:
880 sourceTime = (yr + 1, mth, dy, hr, mn, sec, wd, yd, isdst)
881 else:
882 sourceTime = (yr + offset, 1, 1, startHour, startMinute,
883 startSecond, wd, yd, isdst)
884 ctx.updateAccuracy(ctx.ACU_YEAR)
885
886 elif modifier == 'eom':
887 dy = self.ptc.daysInMonth(mth, yr)
888 sourceTime = (yr, mth, dy, startHour, startMinute,
889 startSecond, wd, yd, isdst)
890 ctx.updateAccuracy(ctx.ACU_DAY)
891
892 elif modifier == 'eoy':
893 mth = 12
894 dy = self.ptc.daysInMonth(mth, yr)
895 sourceTime = (yr, mth, dy, startHour, startMinute,
896 startSecond, wd, yd, isdst)
897 ctx.updateAccuracy(ctx.ACU_MONTH)
898
899 elif self.ptc.CRE_WEEKDAY.match(unit):
900 m = self.ptc.CRE_WEEKDAY.match(unit)
901 debug and log.debug('CRE_WEEKDAY matched')
902 wkdy = m.group()
903
904 if modifier == 'eod':
905 ctx.updateAccuracy(ctx.ACU_HOUR)
906 # Calculate the upcoming weekday
907 sourceTime, subctx = self.parse(wkdy, sourceTime,
908 VERSION_CONTEXT_STYLE)
909 sTime = self.ptc.getSource(modifier, sourceTime)
910 if sTime is not None:
911 sourceTime = sTime
912 ctx.updateAccuracy(ctx.ACU_HALFDAY)
913 else:
914 # unless one of these modifiers is being applied to the
915 # day-of-week, we want to start with target as the day
916 # in the current week.
917 dowOffset = offset
918 if modifier not in ['next', 'last', 'prior', 'previous']:
919 dowOffset = 0
920
921 wkdy = self.ptc.WeekdayOffsets[wkdy]
922 diff = self._CalculateDOWDelta(
923 wd, wkdy, dowOffset, self.ptc.DOWParseStyle,
924 self.ptc.CurrentDOWParseStyle)
925 start = datetime.datetime(yr, mth, dy, startHour,
926 startMinute, startSecond)
927 target = start + datetime.timedelta(days=diff)
928
929 if chunk1 != '':
930 # consider "one day before thursday": we need to parse chunk1 ("one day")
931 # and apply according to the offset ("before"), rather than allowing the
932 # remaining parse step to apply "one day" without the offset direction.
933 t, subctx = self.parse(chunk1, sourceTime, VERSION_CONTEXT_STYLE)
934 if subctx.hasDateOrTime:
935 delta = time.mktime(t) - time.mktime(sourceTime)
936 target = start + datetime.timedelta(days=diff) + datetime.timedelta(seconds=delta * offset)
937 chunk1 = ''
938
939 sourceTime = target.timetuple()
940 ctx.updateAccuracy(ctx.ACU_DAY)
941
942 elif self.ptc.CRE_TIME.match(unit):
943 m = self.ptc.CRE_TIME.match(unit)
944 debug and log.debug('CRE_TIME matched')
945 (yr, mth, dy, hr, mn, sec, wd, yd, isdst), subctx = \
946 self.parse(unit, None, VERSION_CONTEXT_STYLE)
947
948 start = datetime.datetime(yr, mth, dy, hr, mn, sec)
949 target = start + datetime.timedelta(days=offset)
950 sourceTime = target.timetuple()
951
952 else:
953 # check if the remaining text is parsable and if so,
954 # use it as the base time for the modifier source time
955
956 debug and log.debug('check for modifications '
957 'to source time [%s] [%s]',
958 chunk1, unit)
959
960 unit = unit.strip()
961 if unit:
962 s = '%s %s' % (unit, chunk2)
963 t, subctx = self.parse(s, sourceTime, VERSION_CONTEXT_STYLE)
964
965 if subctx.hasDate: # working with dates
966 u = unit.lower()
967 if u in self.ptc.Months or \
968 u in self.ptc.shortMonths:
969 yr, mth, dy, hr, mn, sec, wd, yd, isdst = t
970 start = datetime.datetime(
971 yr, mth, dy, hr, mn, sec)
972 t = self.inc(start, year=offset).timetuple()
973 elif u in self.ptc.Weekdays:
974 t = t + datetime.timedelta(weeks=offset)
975
976 if subctx.hasDateOrTime:
977 sourceTime = t
978 chunk2 = ''
979
980 chunk1 = chunk1.strip()
981
982 # if the word after next is a number, the string is more than
983 # likely to be "next 4 hrs" which we will have to combine the
984 # units with the rest of the string
985 if chunk1:
986 try:
987 m = list(self.ptc.CRE_NUMBER.finditer(chunk1))[-1]
988 except IndexError:
989 pass
990 else:
991 qty = None
992 debug and log.debug('CRE_NUMBER matched')
993 qty = self._quantityToReal(m.group()) * offset
994 chunk1 = '%s%s%s' % (chunk1[:m.start()],
995 qty, chunk1[m.end():])
996 t, subctx = self.parse(chunk1, sourceTime,
997 VERSION_CONTEXT_STYLE)
998
999 chunk1 = ''
1000
1001 if subctx.hasDateOrTime:
1002 sourceTime = t
1003
1004 debug and log.debug('looking for modifier %s', modifier)
1005 sTime = self.ptc.getSource(modifier, sourceTime)
1006 if sTime is not None:
1007 debug and log.debug('modifier found in sources')
1008 sourceTime = sTime
1009 ctx.updateAccuracy(ctx.ACU_HALFDAY)
1010
1011 debug and log.debug('returning chunk = "%s %s" and sourceTime = %s',
1012 chunk1, chunk2, sourceTime)
1013
1014 return '%s %s' % (chunk1, chunk2), sourceTime
1015
1016 def _evalDT(self, datetimeString, sourceTime):
1017 """
1018 Calculate the datetime from known format like RFC822 or W3CDTF
1019
1020 Examples handled::
1021 RFC822, W3CDTF formatted dates
1022 HH:MM[:SS][ am/pm]
1023 MM/DD/YYYY
1024 DD MMMM YYYY
1025
1026 @type datetimeString: string
1027 @param datetimeString: text to try and parse as more "traditional"
1028 date/time text
1029 @type sourceTime: struct_time
1030 @param sourceTime: C{struct_time} value to use as the base
1031
1032 @rtype: datetime
1033 @return: calculated C{struct_time} value or current C{struct_time}
1034 if not parsed
1035 """
1036 ctx = self.currentContext
1037 s = datetimeString.strip()
1038
1039 # Given string date is a RFC822 date
1040 if sourceTime is None:
1041 sourceTime = _parse_date_rfc822(s)
1042 debug and log.debug(
1043 'attempt to parse as rfc822 - %s', str(sourceTime))
1044
1045 if sourceTime is not None:
1046 (yr, mth, dy, hr, mn, sec, wd, yd, isdst, _) = sourceTime
1047 ctx.updateAccuracy(ctx.ACU_YEAR, ctx.ACU_MONTH, ctx.ACU_DAY)
1048
1049 if hr != 0 and mn != 0 and sec != 0:
1050 ctx.updateAccuracy(ctx.ACU_HOUR, ctx.ACU_MIN, ctx.ACU_SEC)
1051
1052 sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst)
1053
1054 # Given string date is a W3CDTF date
1055 if sourceTime is None:
1056 sourceTime = _parse_date_w3dtf(s)
1057
1058 if sourceTime is not None:
1059 ctx.updateAccuracy(ctx.ACU_YEAR, ctx.ACU_MONTH, ctx.ACU_DAY,
1060 ctx.ACU_HOUR, ctx.ACU_MIN, ctx.ACU_SEC)
1061
1062 if sourceTime is None:
1063 sourceTime = time.localtime()
1064
1065 return sourceTime
1066
1067 def _evalUnits(self, datetimeString, sourceTime):
1068 """
1069 Evaluate text passed by L{_partialParseUnits()}
1070 """
1071 s = datetimeString.strip()
1072 sourceTime = self._evalDT(datetimeString, sourceTime)
1073
1074 # Given string is a time string with units like "5 hrs 30 min"
1075 modifier = '' # TODO
1076
1077 m = self.ptc.CRE_UNITS.search(s)
1078 if m is not None:
1079 units = m.group('units')
1080 quantity = s[:m.start('units')]
1081
1082 sourceTime = self._buildTime(sourceTime, quantity, modifier, units)
1083 return sourceTime
1084
1085 def _evalQUnits(self, datetimeString, sourceTime):
1086 """
1087 Evaluate text passed by L{_partialParseQUnits()}
1088 """
1089 s = datetimeString.strip()
1090 sourceTime = self._evalDT(datetimeString, sourceTime)
1091
1092 # Given string is a time string with single char units like "5 h 30 m"
1093 modifier = '' # TODO
1094
1095 m = self.ptc.CRE_QUNITS.search(s)
1096 if m is not None:
1097 units = m.group('qunits')
1098 quantity = s[:m.start('qunits')]
1099
1100 sourceTime = self._buildTime(sourceTime, quantity, modifier, units)
1101 return sourceTime
1102
1103 def _evalDateStr(self, datetimeString, sourceTime):
1104 """
1105 Evaluate text passed by L{_partialParseDateStr()}
1106 """
1107 s = datetimeString.strip()
1108 sourceTime = self._evalDT(datetimeString, sourceTime)
1109
1110 # Given string is in the format "May 23rd, 2005"
1111 debug and log.debug('checking for MMM DD YYYY')
1112 return self.parseDateText(s, sourceTime)
1113
1114 def _evalDateStd(self, datetimeString, sourceTime):
1115 """
1116 Evaluate text passed by L{_partialParseDateStd()}
1117 """
1118 s = datetimeString.strip()
1119 sourceTime = self._evalDT(datetimeString, sourceTime)
1120
1121 # Given string is in the format 07/21/2006
1122 return self.parseDate(s, sourceTime)
1123
1124 def _evalDayStr(self, datetimeString, sourceTime):
1125 """
1126 Evaluate text passed by L{_partialParseDaystr()}
1127 """
1128 s = datetimeString.strip()
1129 sourceTime = self._evalDT(datetimeString, sourceTime)
1130
1131 # Given string is a natural language date string like today, tomorrow..
1132 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime
1133
1134 try:
1135 offset = self.ptc.dayOffsets[s]
1136 except KeyError:
1137 offset = 0
1138
1139 if self.ptc.StartTimeFromSourceTime:
1140 startHour = hr
1141 startMinute = mn
1142 startSecond = sec
1143 else:
1144 startHour = 9
1145 startMinute = 0
1146 startSecond = 0
1147
1148 self.currentContext.updateAccuracy(pdtContext.ACU_DAY)
1149 start = datetime.datetime(yr, mth, dy, startHour,
1150 startMinute, startSecond)
1151 target = start + datetime.timedelta(days=offset)
1152 return target.timetuple()
1153
1154 def _evalWeekday(self, datetimeString, sourceTime):
1155 """
1156 Evaluate text passed by L{_partialParseWeekday()}
1157 """
1158 s = datetimeString.strip()
1159 sourceTime = self._evalDT(datetimeString, sourceTime)
1160
1161 # Given string is a weekday
1162 yr, mth, dy, hr, mn, sec, wd, yd, isdst = sourceTime
1163
1164 start = datetime.datetime(yr, mth, dy, hr, mn, sec)
1165 wkdy = self.ptc.WeekdayOffsets[s]
1166
1167 if wkdy > wd:
1168 qty = self._CalculateDOWDelta(wd, wkdy, 2,
1169 self.ptc.DOWParseStyle,
1170 self.ptc.CurrentDOWParseStyle)
1171 else:
1172 qty = self._CalculateDOWDelta(wd, wkdy, 2,
1173 self.ptc.DOWParseStyle,
1174 self.ptc.CurrentDOWParseStyle)
1175
1176 self.currentContext.updateAccuracy(pdtContext.ACU_DAY)
1177 target = start + datetime.timedelta(days=qty)
1178 return target.timetuple()
1179
1180 def _evalTimeStr(self, datetimeString, sourceTime):
1181 """
1182 Evaluate text passed by L{_partialParseTimeStr()}
1183 """
1184 s = datetimeString.strip()
1185 sourceTime = self._evalDT(datetimeString, sourceTime)
1186
1187 if s in self.ptc.re_values['now']:
1188 self.currentContext.updateAccuracy(pdtContext.ACU_NOW)
1189 else:
1190 # Given string is a natural language time string like
1191 # lunch, midnight, etc
1192 sTime = self.ptc.getSource(s, sourceTime)
1193 if sTime:
1194 sourceTime = sTime
1195 self.currentContext.updateAccuracy(pdtContext.ACU_HALFDAY)
1196
1197 return sourceTime
1198
1199 def _evalMeridian(self, datetimeString, sourceTime):
1200 """
1201 Evaluate text passed by L{_partialParseMeridian()}
1202 """
1203 s = datetimeString.strip()
1204 sourceTime = self._evalDT(datetimeString, sourceTime)
1205
1206 # Given string is in the format HH:MM(:SS)(am/pm)
1207 yr, mth, dy, hr, mn, sec, wd, yd, isdst = sourceTime
1208
1209 m = self.ptc.CRE_TIMEHMS2.search(s)
1210 if m is not None:
1211 dt = s[:m.start('meridian')].strip()
1212 if len(dt) <= 2:
1213 hr = int(dt)
1214 mn = 0
1215 sec = 0
1216 else:
1217 hr, mn, sec = _extract_time(m)
1218
1219 if hr == 24:
1220 hr = 0
1221
1222 meridian = m.group('meridian').lower()
1223
1224 # if 'am' found and hour is 12 - force hour to 0 (midnight)
1225 if (meridian in self.ptc.am) and hr == 12:
1226 hr = 0
1227
1228 # if 'pm' found and hour < 12, add 12 to shift to evening
1229 if (meridian in self.ptc.pm) and hr < 12:
1230 hr += 12
1231
1232 # time validation
1233 if hr < 24 and mn < 60 and sec < 60:
1234 sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst)
1235 _pop_time_accuracy(m, self.currentContext)
1236
1237 return sourceTime
1238
1239 def _evalTimeStd(self, datetimeString, sourceTime):
1240 """
1241 Evaluate text passed by L{_partialParseTimeStd()}
1242 """
1243 s = datetimeString.strip()
1244 sourceTime = self._evalDT(datetimeString, sourceTime)
1245
1246 # Given string is in the format HH:MM(:SS)
1247 yr, mth, dy, hr, mn, sec, wd, yd, isdst = sourceTime
1248
1249 m = self.ptc.CRE_TIMEHMS.search(s)
1250 if m is not None:
1251 hr, mn, sec = _extract_time(m)
1252 if hr == 24:
1253 hr = 0
1254
1255 # time validation
1256 if hr < 24 and mn < 60 and sec < 60:
1257 sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst)
1258 _pop_time_accuracy(m, self.currentContext)
1259
1260 return sourceTime
1261
1262 def _UnitsTrapped(self, s, m, key):
1263 # check if a day suffix got trapped by a unit match
1264 # for example Dec 31st would match for 31s (aka 31 seconds)
1265 # Dec 31st
1266 # ^ ^
1267 # | +-- m.start('units')
1268 # | and also m2.start('suffix')
1269 # +---- m.start('qty')
1270 # and also m2.start('day')
1271 m2 = self.ptc.CRE_DAY2.search(s)
1272 if m2 is not None:
1273 t = '%s%s' % (m2.group('day'), m.group(key))
1274 if m.start(key) == m2.start('suffix') and \
1275 m.start('qty') == m2.start('day') and \
1276 m.group('qty') == t:
1277 return True
1278 else:
1279 return False
1280 else:
1281 return False
1282
1283 def _partialParseModifier(self, s, sourceTime):
1284 """
1285 test if giving C{s} matched CRE_MODIFIER, used by L{parse()}
1286
1287 @type s: string
1288 @param s: date/time text to evaluate
1289 @type sourceTime: struct_time
1290 @param sourceTime: C{struct_time} value to use as the base
1291
1292 @rtype: tuple
1293 @return: tuple of remained date/time text, datetime object and
1294 an boolean value to describ if matched or not
1295
1296 """
1297 parseStr = None
1298 chunk1 = chunk2 = ''
1299
1300 # Modifier like next/prev/from/after/prior..
1301 m = self.ptc.CRE_MODIFIER.search(s)
1302 if m is not None:
1303 if m.group() != s:
1304 # capture remaining string
1305 parseStr = m.group()
1306 chunk1 = s[:m.start()].strip()
1307 chunk2 = s[m.end():].strip()
1308 else:
1309 parseStr = s
1310
1311 if parseStr:
1312 debug and log.debug('found (modifier) [%s][%s][%s]',
1313 parseStr, chunk1, chunk2)
1314 s, sourceTime = self._evalModifier(parseStr, chunk1,
1315 chunk2, sourceTime)
1316
1317 return s, sourceTime, bool(parseStr)
1318
1319 def _partialParseUnits(self, s, sourceTime):
1320 """
1321 test if giving C{s} matched CRE_UNITS, used by L{parse()}
1322
1323 @type s: string
1324 @param s: date/time text to evaluate
1325 @type sourceTime: struct_time
1326 @param sourceTime: C{struct_time} value to use as the base
1327
1328 @rtype: tuple
1329 @return: tuple of remained date/time text, datetime object and
1330 an boolean value to describ if matched or not
1331
1332 """
1333 parseStr = None
1334 chunk1 = chunk2 = ''
1335
1336 # Quantity + Units
1337 m = self.ptc.CRE_UNITS.search(s)
1338 if m is not None:
1339 debug and log.debug('CRE_UNITS matched')
1340 if self._UnitsTrapped(s, m, 'units'):
1341 debug and log.debug('day suffix trapped by unit match')
1342 else:
1343 if (m.group('qty') != s):
1344 # capture remaining string
1345 parseStr = m.group('qty')
1346 chunk1 = s[:m.start('qty')].strip()
1347 chunk2 = s[m.end('qty'):].strip()
1348
1349 if chunk1[-1:] == '-':
1350 parseStr = '-%s' % parseStr
1351 chunk1 = chunk1[:-1]
1352
1353 s = '%s %s' % (chunk1, chunk2)
1354 else:
1355 parseStr = s
1356 s = ''
1357
1358 if parseStr:
1359 debug and log.debug('found (units) [%s][%s][%s]',
1360 parseStr, chunk1, chunk2)
1361 sourceTime = self._evalUnits(parseStr, sourceTime)
1362
1363 return s, sourceTime, bool(parseStr)
1364
1365 def _partialParseQUnits(self, s, sourceTime):
1366 """
1367 test if giving C{s} matched CRE_QUNITS, used by L{parse()}
1368
1369 @type s: string
1370 @param s: date/time text to evaluate
1371 @type sourceTime: struct_time
1372 @param sourceTime: C{struct_time} value to use as the base
1373
1374 @rtype: tuple
1375 @return: tuple of remained date/time text, datetime object and
1376 an boolean value to describ if matched or not
1377
1378 """
1379 parseStr = None
1380 chunk1 = chunk2 = ''
1381
1382 # Quantity + Units
1383 m = self.ptc.CRE_QUNITS.search(s)
1384 if m is not None:
1385 debug and log.debug('CRE_QUNITS matched')
1386 if self._UnitsTrapped(s, m, 'qunits'):
1387 debug and log.debug(
1388 'day suffix trapped by qunit match')
1389 else:
1390 if (m.group('qty') != s):
1391 # capture remaining string
1392 parseStr = m.group('qty')
1393 chunk1 = s[:m.start('qty')].strip()
1394 chunk2 = s[m.end('qty'):].strip()
1395
1396 if chunk1[-1:] == '-':
1397 parseStr = '-%s' % parseStr
1398 chunk1 = chunk1[:-1]
1399
1400 s = '%s %s' % (chunk1, chunk2)
1401 else:
1402 parseStr = s
1403 s = ''
1404
1405 if parseStr:
1406 debug and log.debug('found (qunits) [%s][%s][%s]',
1407 parseStr, chunk1, chunk2)
1408 sourceTime = self._evalQUnits(parseStr, sourceTime)
1409
1410 return s, sourceTime, bool(parseStr)
1411
1412 def _partialParseDateStr(self, s, sourceTime):
1413 """
1414 test if giving C{s} matched CRE_DATE3, used by L{parse()}
1415
1416 @type s: string
1417 @param s: date/time text to evaluate
1418 @type sourceTime: struct_time
1419 @param sourceTime: C{struct_time} value to use as the base
1420
1421 @rtype: tuple
1422 @return: tuple of remained date/time text, datetime object and
1423 an boolean value to describ if matched or not
1424
1425 """
1426 parseStr = None
1427 chunk1 = chunk2 = ''
1428
1429 m = self.ptc.CRE_DATE3.search(s)
1430 # NO LONGER NEEDED, THE REGEXP HANDLED MTHNAME NOW
1431 # for match in self.ptc.CRE_DATE3.finditer(s):
1432 # to prevent "HH:MM(:SS) time strings" expressions from
1433 # triggering this regex, we checks if the month field
1434 # exists in the searched expression, if it doesn't exist,
1435 # the date field is not valid
1436 # if match.group('mthname'):
1437 # m = self.ptc.CRE_DATE3.search(s, match.start())
1438 # valid_date = True
1439 # break
1440
1441 # String date format
1442 if m is not None:
1443
1444 if (m.group('date') != s):
1445 # capture remaining string
1446 mStart = m.start('date')
1447 mEnd = m.end('date')
1448
1449 # we need to check that anything following the parsed
1450 # date is a time expression because it is often picked
1451 # up as a valid year if the hour is 2 digits
1452 fTime = False
1453 mm = self.ptc.CRE_TIMEHMS2.search(s)
1454 # "February 24th 1PM" doesn't get caught
1455 # "February 24th 12PM" does
1456 mYear = m.group('year')
1457 if mm is not None and mYear is not None:
1458 fTime = True
1459 else:
1460 # "February 24th 12:00"
1461 mm = self.ptc.CRE_TIMEHMS.search(s)
1462 if mm is not None and mYear is None:
1463 fTime = True
1464 if fTime:
1465 hoursStart = mm.start('hours')
1466
1467 if hoursStart < m.end('year'):
1468 mEnd = hoursStart
1469
1470 parseStr = s[mStart:mEnd]
1471 chunk1 = s[:mStart]
1472 chunk2 = s[mEnd:]
1473
1474 s = '%s %s' % (chunk1, chunk2)
1475 else:
1476 parseStr = s
1477 s = ''
1478
1479 if parseStr:
1480 debug and log.debug(
1481 'found (date3) [%s][%s][%s]', parseStr, chunk1, chunk2)
1482 sourceTime = self._evalDateStr(parseStr, sourceTime)
1483
1484 return s, sourceTime, bool(parseStr)
1485
1486 def _partialParseDateStd(self, s, sourceTime):
1487 """
1488 test if giving C{s} matched CRE_DATE, used by L{parse()}
1489
1490 @type s: string
1491 @param s: date/time text to evaluate
1492 @type sourceTime: struct_time
1493 @param sourceTime: C{struct_time} value to use as the base
1494
1495 @rtype: tuple
1496 @return: tuple of remained date/time text, datetime object and
1497 an boolean value to describ if matched or not
1498
1499 """
1500 parseStr = None
1501 chunk1 = chunk2 = ''
1502
1503 # Standard date format
1504 m = self.ptc.CRE_DATE.search(s)
1505 if m is not None:
1506
1507 if (m.group('date') != s):
1508 # capture remaining string
1509 parseStr = m.group('date')
1510 chunk1 = s[:m.start('date')]
1511 chunk2 = s[m.end('date'):]
1512 s = '%s %s' % (chunk1, chunk2)
1513 else:
1514 parseStr = s
1515 s = ''
1516
1517 if parseStr:
1518 debug and log.debug(
1519 'found (date) [%s][%s][%s]', parseStr, chunk1, chunk2)
1520 sourceTime = self._evalDateStd(parseStr, sourceTime)
1521
1522 return s, sourceTime, bool(parseStr)
1523
1524 def _partialParseDayStr(self, s, sourceTime):
1525 """
1526 test if giving C{s} matched CRE_DAY, used by L{parse()}
1527
1528 @type s: string
1529 @param s: date/time text to evaluate
1530 @type sourceTime: struct_time
1531 @param sourceTime: C{struct_time} value to use as the base
1532
1533 @rtype: tuple
1534 @return: tuple of remained date/time text, datetime object and
1535 an boolean value to describ if matched or not
1536
1537 """
1538 parseStr = None
1539 chunk1 = chunk2 = ''
1540
1541 # Natural language day strings
1542 m = self.ptc.CRE_DAY.search(s)
1543 if m is not None:
1544
1545 if (m.group() != s):
1546 # capture remaining string
1547 parseStr = m.group()
1548 chunk1 = s[:m.start()]
1549 chunk2 = s[m.end():]
1550 s = '%s %s' % (chunk1, chunk2)
1551 else:
1552 parseStr = s
1553 s = ''
1554
1555 if parseStr:
1556 debug and log.debug(
1557 'found (day) [%s][%s][%s]', parseStr, chunk1, chunk2)
1558 sourceTime = self._evalDayStr(parseStr, sourceTime)
1559
1560 return s, sourceTime, bool(parseStr)
1561
1562 def _partialParseWeekday(self, s, sourceTime):
1563 """
1564 test if giving C{s} matched CRE_WEEKDAY, used by L{parse()}
1565
1566 @type s: string
1567 @param s: date/time text to evaluate
1568 @type sourceTime: struct_time
1569 @param sourceTime: C{struct_time} value to use as the base
1570
1571 @rtype: tuple
1572 @return: tuple of remained date/time text, datetime object and
1573 an boolean value to describ if matched or not
1574
1575 """
1576 parseStr = None
1577 chunk1 = chunk2 = ''
1578
1579 # Weekday
1580 m = self.ptc.CRE_WEEKDAY.search(s)
1581 if m is not None:
1582 gv = m.group()
1583 if s not in self.ptc.dayOffsets:
1584
1585 if (gv != s):
1586 # capture remaining string
1587 parseStr = gv
1588 chunk1 = s[:m.start()]
1589 chunk2 = s[m.end():]
1590 s = '%s %s' % (chunk1, chunk2)
1591 else:
1592 parseStr = s
1593 s = ''
1594
1595 if parseStr:
1596 debug and log.debug(
1597 'found (weekday) [%s][%s][%s]', parseStr, chunk1, chunk2)
1598 sourceTime = self._evalWeekday(parseStr, sourceTime)
1599
1600 return s, sourceTime, bool(parseStr)
1601
1602 def _partialParseTimeStr(self, s, sourceTime):
1603 """
1604 test if giving C{s} matched CRE_TIME, used by L{parse()}
1605
1606 @type s: string
1607 @param s: date/time text to evaluate
1608 @type sourceTime: struct_time
1609 @param sourceTime: C{struct_time} value to use as the base
1610
1611 @rtype: tuple
1612 @return: tuple of remained date/time text, datetime object and
1613 an boolean value to describ if matched or not
1614
1615 """
1616 parseStr = None
1617 chunk1 = chunk2 = ''
1618
1619 # Natural language time strings
1620 m = self.ptc.CRE_TIME.search(s)
1621 if m is not None or s in self.ptc.re_values['now']:
1622
1623 if (m and m.group() != s):
1624 # capture remaining string
1625 parseStr = m.group()
1626 chunk1 = s[:m.start()]
1627 chunk2 = s[m.end():]
1628 s = '%s %s' % (chunk1, chunk2)
1629 else:
1630 parseStr = s
1631 s = ''
1632
1633 if parseStr:
1634 debug and log.debug(
1635 'found (time) [%s][%s][%s]', parseStr, chunk1, chunk2)
1636 sourceTime = self._evalTimeStr(parseStr, sourceTime)
1637
1638 return s, sourceTime, bool(parseStr)
1639
1640 def _partialParseMeridian(self, s, sourceTime):
1641 """
1642 test if giving C{s} matched CRE_TIMEHMS2, used by L{parse()}
1643
1644 @type s: string
1645 @param s: date/time text to evaluate
1646 @type sourceTime: struct_time
1647 @param sourceTime: C{struct_time} value to use as the base
1648
1649 @rtype: tuple
1650 @return: tuple of remained date/time text, datetime object and
1651 an boolean value to describ if matched or not
1652
1653 """
1654 parseStr = None
1655 chunk1 = chunk2 = ''
1656
1657 # HH:MM(:SS) am/pm time strings
1658 m = self.ptc.CRE_TIMEHMS2.search(s)
1659 if m is not None:
1660
1661 if m.group('minutes') is not None:
1662 if m.group('seconds') is not None:
1663 parseStr = '%s:%s:%s' % (m.group('hours'),
1664 m.group('minutes'),
1665 m.group('seconds'))
1666 else:
1667 parseStr = '%s:%s' % (m.group('hours'),
1668 m.group('minutes'))
1669 else:
1670 parseStr = m.group('hours')
1671 parseStr += ' ' + m.group('meridian')
1672
1673 chunk1 = s[:m.start()]
1674 chunk2 = s[m.end():]
1675
1676 s = '%s %s' % (chunk1, chunk2)
1677
1678 if parseStr:
1679 debug and log.debug('found (meridian) [%s][%s][%s]',
1680 parseStr, chunk1, chunk2)
1681 sourceTime = self._evalMeridian(parseStr, sourceTime)
1682
1683 return s, sourceTime, bool(parseStr)
1684
1685 def _partialParseTimeStd(self, s, sourceTime):
1686 """
1687 test if giving C{s} matched CRE_TIMEHMS, used by L{parse()}
1688
1689 @type s: string
1690 @param s: date/time text to evaluate
1691 @type sourceTime: struct_time
1692 @param sourceTime: C{struct_time} value to use as the base
1693
1694 @rtype: tuple
1695 @return: tuple of remained date/time text, datetime object and
1696 an boolean value to describ if matched or not
1697
1698 """
1699 parseStr = None
1700 chunk1 = chunk2 = ''
1701
1702 # HH:MM(:SS) time strings
1703 m = self.ptc.CRE_TIMEHMS.search(s)
1704 if m is not None:
1705
1706 if m.group('seconds') is not None:
1707 parseStr = '%s:%s:%s' % (m.group('hours'),
1708 m.group('minutes'),
1709 m.group('seconds'))
1710 chunk1 = s[:m.start('hours')]
1711 chunk2 = s[m.end('seconds'):]
1712 else:
1713 parseStr = '%s:%s' % (m.group('hours'),
1714 m.group('minutes'))
1715 chunk1 = s[:m.start('hours')]
1716 chunk2 = s[m.end('minutes'):]
1717
1718 s = '%s %s' % (chunk1, chunk2)
1719
1720 if parseStr:
1721 debug and log.debug(
1722 'found (hms) [%s][%s][%s]', parseStr, chunk1, chunk2)
1723 sourceTime = self._evalTimeStd(parseStr, sourceTime)
1724
1725 return s, sourceTime, bool(parseStr)
1726
1727 def parseDT(self, datetimeString, sourceTime=None,
1728 tzinfo=None, version=None):
1729 """
1730 C{datetimeString} is as C{.parse}, C{sourceTime} has the same semantic
1731 meaning as C{.parse}, but now also accepts datetime objects. C{tzinfo}
1732 accepts a tzinfo object. It is advisable to use pytz.
1733
1734
1735 @type datetimeString: string
1736 @param datetimeString: date/time text to evaluate
1737 @type sourceTime: struct_time, datetime, date, time
1738 @param sourceTime: time value to use as the base
1739 @type tzinfo: tzinfo
1740 @param tzinfo: Timezone to apply to generated datetime objs.
1741 @type version: integer
1742 @param version: style version, default will use L{Calendar}
1743 parameter version value
1744
1745 @rtype: tuple
1746 @return: tuple of: modified C{sourceTime} and the result flag/context
1747
1748 see .parse for return code details.
1749 """
1750 # if sourceTime has a timetuple method, use thet, else, just pass the
1751 # entire thing to parse and prey the user knows what the hell they are
1752 # doing.
1753 sourceTime = getattr(sourceTime, 'timetuple', (lambda: sourceTime))()
1754 # You REALLY SHOULD be using pytz. Using localize if available,
1755 # hacking if not. Note, None is a valid tzinfo object in the case of
1756 # the ugly hack.
1757 localize = getattr(
1758 tzinfo,
1759 'localize',
1760 (lambda dt: dt.replace(tzinfo=tzinfo)), # ugly hack is ugly :(
1761 )
1762
1763 # Punt
1764 time_struct, ret_code = self.parse(
1765 datetimeString,
1766 sourceTime=sourceTime,
1767 version=version)
1768
1769 # Comments from GHI indicate that it is desired to have the same return
1770 # signature on this method as that one it punts to, with the exception
1771 # of using datetime objects instead of time_structs.
1772 dt = localize(datetime.datetime(*time_struct[:6]))
1773 return dt, ret_code
1774
1775 def parse(self, datetimeString, sourceTime=None, version=None):
1776 """
1777 Splits the given C{datetimeString} into tokens, finds the regex
1778 patterns that match and then calculates a C{struct_time} value from
1779 the chunks.
1780
1781 If C{sourceTime} is given then the C{struct_time} value will be
1782 calculated from that value, otherwise from the current date/time.
1783
1784 If the C{datetimeString} is parsed and date/time value found, then::
1785
1786 If C{version} equals to L{VERSION_FLAG_STYLE}, the second item of
1787 the returned tuple will be a flag to let you know what kind of
1788 C{struct_time} value is being returned::
1789
1790 0 = not parsed at all
1791 1 = parsed as a C{date}
1792 2 = parsed as a C{time}
1793 3 = parsed as a C{datetime}
1794
1795 If C{version} equals to L{VERSION_CONTEXT_STYLE}, the second value
1796 will be an instance of L{pdtContext}
1797
1798 @type datetimeString: string
1799 @param datetimeString: date/time text to evaluate
1800 @type sourceTime: struct_time
1801 @param sourceTime: C{struct_time} value to use as the base
1802 @type version: integer
1803 @param version: style version, default will use L{Calendar}
1804 parameter version value
1805
1806 @rtype: tuple
1807 @return: tuple of: modified C{sourceTime} and the result flag/context
1808 """
1809 debug and log.debug('parse()')
1810
1811 datetimeString = re.sub(r'(\w)\.(\s)', r'\1\2', datetimeString)
1812 datetimeString = re.sub(r'(\w)[\'"](\s|$)', r'\1 \2', datetimeString)
1813 datetimeString = re.sub(r'(\s|^)[\'"](\w)', r'\1 \2', datetimeString)
1814
1815 if sourceTime:
1816 if isinstance(sourceTime, datetime.datetime):
1817 debug and log.debug('coercing datetime to timetuple')
1818 sourceTime = sourceTime.timetuple()
1819 else:
1820 if not isinstance(sourceTime, time.struct_time) and \
1821 not isinstance(sourceTime, tuple):
1822 raise ValueError('sourceTime is not a struct_time')
1823 else:
1824 sourceTime = time.localtime()
1825
1826 with self.context() as ctx:
1827 s = datetimeString.lower().strip()
1828 debug and log.debug('remainedString (before parsing): [%s]', s)
1829
1830 while s:
1831 for parseMeth in (self._partialParseModifier,
1832 self._partialParseUnits,
1833 self._partialParseQUnits,
1834 self._partialParseDateStr,
1835 self._partialParseDateStd,
1836 self._partialParseDayStr,
1837 self._partialParseWeekday,
1838 self._partialParseTimeStr,
1839 self._partialParseMeridian,
1840 self._partialParseTimeStd):
1841 retS, retTime, matched = parseMeth(s, sourceTime)
1842 if matched:
1843 s, sourceTime = retS.strip(), retTime
1844 break
1845 else:
1846 # nothing matched
1847 s = ''
1848
1849 debug and log.debug('hasDate: [%s], hasTime: [%s]',
1850 ctx.hasDate, ctx.hasTime)
1851 debug and log.debug('remainedString: [%s]', s)
1852
1853 # String is not parsed at all
1854 if sourceTime is None:
1855 debug and log.debug('not parsed [%s]', str(sourceTime))
1856 sourceTime = time.localtime()
1857
1858 if not isinstance(sourceTime, time.struct_time):
1859 sourceTime = time.struct_time(sourceTime)
1860
1861 version = self.version if version is None else version
1862 if version == VERSION_CONTEXT_STYLE:
1863 return sourceTime, ctx
1864 else:
1865 return sourceTime, ctx.dateTimeFlag
1866
1867 def inc(self, source, month=None, year=None):
1868 """
1869 Takes the given C{source} date, or current date if none is
1870 passed, and increments it according to the values passed in
1871 by month and/or year.
1872
1873 This routine is needed because Python's C{timedelta()} function
1874 does not allow for month or year increments.
1875
1876 @type source: struct_time
1877 @param source: C{struct_time} value to increment
1878 @type month: float or integer
1879 @param month: optional number of months to increment
1880 @type year: float or integer
1881 @param year: optional number of years to increment
1882
1883 @rtype: datetime
1884 @return: C{source} incremented by the number of months and/or years
1885 """
1886 yr = source.year
1887 mth = source.month
1888 dy = source.day
1889
1890 try:
1891 month = float(month)
1892 except (TypeError, ValueError):
1893 month = 0
1894
1895 try:
1896 year = float(year)
1897 except (TypeError, ValueError):
1898 year = 0
1899 finally:
1900 month += year * 12
1901 year = 0
1902
1903 subMi = 0.0
1904 maxDay = 0
1905 if month:
1906 mi = int(month)
1907 subMi = month - mi
1908
1909 y = int(mi / 12.0)
1910 m = mi - y * 12
1911
1912 mth = mth + m
1913 if mth < 1: # cross start-of-year?
1914 y -= 1 # yes - decrement year
1915 mth += 12 # and fix month
1916 elif mth > 12: # cross end-of-year?
1917 y += 1 # yes - increment year
1918 mth -= 12 # and fix month
1919
1920 yr += y
1921
1922 # if the day ends up past the last day of
1923 # the new month, set it to the last day
1924 maxDay = self.ptc.daysInMonth(mth, yr)
1925 if dy > maxDay:
1926 dy = maxDay
1927
1928 if yr > datetime.MAXYEAR or yr < datetime.MINYEAR:
1929 raise OverflowError('year is out of range')
1930
1931 d = source.replace(year=yr, month=mth, day=dy)
1932 if subMi:
1933 d += datetime.timedelta(days=subMi * maxDay)
1934 return source + (d - source)
1935
1936 def nlp(self, inputString, sourceTime=None, version=None):
1937 """Utilizes parse() after making judgements about what datetime
1938 information belongs together.
1939
1940 It makes logical groupings based on proximity and returns a parsed
1941 datetime for each matched grouping of datetime text, along with
1942 location info within the given inputString.
1943
1944 @type inputString: string
1945 @param inputString: natural language text to evaluate
1946 @type sourceTime: struct_time
1947 @param sourceTime: C{struct_time} value to use as the base
1948 @type version: integer
1949 @param version: style version, default will use L{Calendar}
1950 parameter version value
1951
1952 @rtype: tuple or None
1953 @return: tuple of tuples in the format (parsed_datetime as
1954 datetime.datetime, flags as int, start_pos as int,
1955 end_pos as int, matched_text as string) or None if there
1956 were no matches
1957 """
1958
1959 orig_inputstring = inputString
1960
1961 # replace periods at the end of sentences w/ spaces
1962 # opposed to removing them altogether in order to
1963 # retain relative positions (identified by alpha, period, space).
1964 # this is required for some of the regex patterns to match
1965 inputString = re.sub(r'(\w)(\.)(\s)', r'\1 \3', inputString).lower()
1966 inputString = re.sub(r'(\w)(\'|")(\s|$)', r'\1 \3', inputString)
1967 inputString = re.sub(r'(\s|^)(\'|")(\w)', r'\1 \3', inputString)
1968
1969 startpos = 0 # the start position in the inputString during the loop
1970
1971 # list of lists in format:
1972 # [startpos, endpos, matchedstring, flags, type]
1973 matches = []
1974
1975 while startpos < len(inputString):
1976
1977 # empty match
1978 leftmost_match = [0, 0, None, 0, None]
1979
1980 # Modifier like next\prev..
1981 m = self.ptc.CRE_MODIFIER.search(inputString[startpos:])
1982 if m is not None:
1983 if leftmost_match[1] == 0 or \
1984 leftmost_match[0] > m.start() + startpos:
1985 leftmost_match[0] = m.start() + startpos
1986 leftmost_match[1] = m.end() + startpos
1987 leftmost_match[2] = m.group()
1988 leftmost_match[3] = 0
1989 leftmost_match[4] = 'modifier'
1990
1991 # Quantity + Units
1992 m = self.ptc.CRE_UNITS.search(inputString[startpos:])
1993 if m is not None:
1994 debug and log.debug('CRE_UNITS matched')
1995 if self._UnitsTrapped(inputString[startpos:], m, 'units'):
1996 debug and log.debug('day suffix trapped by unit match')
1997 else:
1998
1999 if leftmost_match[1] == 0 or \
2000 leftmost_match[0] > m.start('qty') + startpos:
2001 leftmost_match[0] = m.start('qty') + startpos
2002 leftmost_match[1] = m.end('qty') + startpos
2003 leftmost_match[2] = m.group('qty')
2004 leftmost_match[3] = 3
2005 leftmost_match[4] = 'units'
2006
2007 if m.start('qty') > 0 and \
2008 inputString[m.start('qty') - 1] == '-':
2009 leftmost_match[0] = leftmost_match[0] - 1
2010 leftmost_match[2] = '-' + leftmost_match[2]
2011
2012 # Quantity + Units
2013 m = self.ptc.CRE_QUNITS.search(inputString[startpos:])
2014 if m is not None:
2015 debug and log.debug('CRE_QUNITS matched')
2016 if self._UnitsTrapped(inputString[startpos:], m, 'qunits'):
2017 debug and log.debug('day suffix trapped by qunit match')
2018 else:
2019 if leftmost_match[1] == 0 or \
2020 leftmost_match[0] > m.start('qty') + startpos:
2021 leftmost_match[0] = m.start('qty') + startpos
2022 leftmost_match[1] = m.end('qty') + startpos
2023 leftmost_match[2] = m.group('qty')
2024 leftmost_match[3] = 3
2025 leftmost_match[4] = 'qunits'
2026
2027 if m.start('qty') > 0 and \
2028 inputString[m.start('qty') - 1] == '-':
2029 leftmost_match[0] = leftmost_match[0] - 1
2030 leftmost_match[2] = '-' + leftmost_match[2]
2031
2032 m = self.ptc.CRE_DATE3.search(inputString[startpos:])
2033 # NO LONGER NEEDED, THE REGEXP HANDLED MTHNAME NOW
2034 # for match in self.ptc.CRE_DATE3.finditer(inputString[startpos:]):
2035 # to prevent "HH:MM(:SS) time strings" expressions from
2036 # triggering this regex, we checks if the month field exists
2037 # in the searched expression, if it doesn't exist, the date
2038 # field is not valid
2039 # if match.group('mthname'):
2040 # m = self.ptc.CRE_DATE3.search(inputString[startpos:],
2041 # match.start())
2042 # break
2043
2044 # String date format
2045 if m is not None:
2046 if leftmost_match[1] == 0 or \
2047 leftmost_match[0] > m.start('date') + startpos:
2048 leftmost_match[0] = m.start('date') + startpos
2049 leftmost_match[1] = m.end('date') + startpos
2050 leftmost_match[2] = m.group('date')
2051 leftmost_match[3] = 1
2052 leftmost_match[4] = 'dateStr'
2053
2054 # Standard date format
2055 m = self.ptc.CRE_DATE.search(inputString[startpos:])
2056 if m is not None:
2057 if leftmost_match[1] == 0 or \
2058 leftmost_match[0] > m.start('date') + startpos:
2059 leftmost_match[0] = m.start('date') + startpos
2060 leftmost_match[1] = m.end('date') + startpos
2061 leftmost_match[2] = m.group('date')
2062 leftmost_match[3] = 1
2063 leftmost_match[4] = 'dateStd'
2064
2065 # Natural language day strings
2066 m = self.ptc.CRE_DAY.search(inputString[startpos:])
2067 if m is not None:
2068 if leftmost_match[1] == 0 or \
2069 leftmost_match[0] > m.start() + startpos:
2070 leftmost_match[0] = m.start() + startpos
2071 leftmost_match[1] = m.end() + startpos
2072 leftmost_match[2] = m.group()
2073 leftmost_match[3] = 1
2074 leftmost_match[4] = 'dayStr'
2075
2076 # Weekday
2077 m = self.ptc.CRE_WEEKDAY.search(inputString[startpos:])
2078 if m is not None:
2079 if inputString[startpos:] not in self.ptc.dayOffsets:
2080 if leftmost_match[1] == 0 or \
2081 leftmost_match[0] > m.start() + startpos:
2082 leftmost_match[0] = m.start() + startpos
2083 leftmost_match[1] = m.end() + startpos
2084 leftmost_match[2] = m.group()
2085 leftmost_match[3] = 1
2086 leftmost_match[4] = 'weekdy'
2087
2088 # Natural language time strings
2089 m = self.ptc.CRE_TIME.search(inputString[startpos:])
2090 if m is not None:
2091 if leftmost_match[1] == 0 or \
2092 leftmost_match[0] > m.start() + startpos:
2093 leftmost_match[0] = m.start() + startpos
2094 leftmost_match[1] = m.end() + startpos
2095 leftmost_match[2] = m.group()
2096 leftmost_match[3] = 2
2097 leftmost_match[4] = 'timeStr'
2098
2099 # HH:MM(:SS) am/pm time strings
2100 m = self.ptc.CRE_TIMEHMS2.search(inputString[startpos:])
2101 if m is not None:
2102 if leftmost_match[1] == 0 or \
2103 leftmost_match[0] > m.start('hours') + startpos:
2104 leftmost_match[0] = m.start('hours') + startpos
2105 leftmost_match[1] = m.end('meridian') + startpos
2106 leftmost_match[2] = inputString[leftmost_match[0]:
2107 leftmost_match[1]]
2108 leftmost_match[3] = 2
2109 leftmost_match[4] = 'meridian'
2110
2111 # HH:MM(:SS) time strings
2112 m = self.ptc.CRE_TIMEHMS.search(inputString[startpos:])
2113 if m is not None:
2114 if leftmost_match[1] == 0 or \
2115 leftmost_match[0] > m.start('hours') + startpos:
2116 leftmost_match[0] = m.start('hours') + startpos
2117 if m.group('seconds') is not None:
2118 leftmost_match[1] = m.end('seconds') + startpos
2119 else:
2120 leftmost_match[1] = m.end('minutes') + startpos
2121 leftmost_match[2] = inputString[leftmost_match[0]:
2122 leftmost_match[1]]
2123 leftmost_match[3] = 2
2124 leftmost_match[4] = 'timeStd'
2125
2126 # Units only; must be preceded by a modifier
2127 if len(matches) > 0 and matches[-1][3] == 0:
2128 m = self.ptc.CRE_UNITS_ONLY.search(inputString[startpos:])
2129 # Ensure that any match is immediately proceded by the
2130 # modifier. "Next is the word 'month'" should not parse as a
2131 # date while "next month" should
2132 if m is not None and \
2133 inputString[startpos:startpos +
2134 m.start()].strip() == '':
2135 debug and log.debug('CRE_UNITS_ONLY matched [%s]',
2136 m.group())
2137 if leftmost_match[1] == 0 or \
2138 leftmost_match[0] > m.start() + startpos:
2139 leftmost_match[0] = m.start() + startpos
2140 leftmost_match[1] = m.end() + startpos
2141 leftmost_match[2] = m.group()
2142 leftmost_match[3] = 3
2143 leftmost_match[4] = 'unitsOnly'
2144
2145 # set the start position to the end pos of the leftmost match
2146 startpos = leftmost_match[1]
2147
2148 # nothing was detected
2149 # so break out of the loop
2150 if startpos == 0:
2151 startpos = len(inputString)
2152 else:
2153 if leftmost_match[3] > 0:
2154 m = self.ptc.CRE_NLP_PREFIX.search(
2155 inputString[:leftmost_match[0]] +
2156 ' ' + str(leftmost_match[3]))
2157 if m is not None:
2158 leftmost_match[0] = m.start('nlp_prefix')
2159 leftmost_match[2] = inputString[leftmost_match[0]:
2160 leftmost_match[1]]
2161 matches.append(leftmost_match)
2162
2163 # find matches in proximity with one another and
2164 # return all the parsed values
2165 proximity_matches = []
2166 if len(matches) > 1:
2167 combined = ''
2168 from_match_index = 0
2169 date = matches[0][3] == 1
2170 time = matches[0][3] == 2
2171 units = matches[0][3] == 3
2172 for i in range(1, len(matches)):
2173
2174 # test proximity (are there characters between matches?)
2175 endofprevious = matches[i - 1][1]
2176 begofcurrent = matches[i][0]
2177 if orig_inputstring[endofprevious:
2178 begofcurrent].lower().strip() != '':
2179 # this one isn't in proximity, but maybe
2180 # we have enough to make a datetime
2181 # TODO: make sure the combination of
2182 # formats (modifier, dateStd, etc) makes logical sense
2183 # before parsing together
2184 if date or time or units:
2185 combined = orig_inputstring[matches[from_match_index]
2186 [0]:matches[i - 1][1]]
2187 parsed_datetime, flags = self.parse(combined,
2188 sourceTime,
2189 version)
2190 proximity_matches.append((
2191 datetime.datetime(*parsed_datetime[:6]),
2192 flags,
2193 matches[from_match_index][0],
2194 matches[i - 1][1],
2195 combined))
2196 # not in proximity, reset starting from current
2197 from_match_index = i
2198 date = matches[i][3] == 1
2199 time = matches[i][3] == 2
2200 units = matches[i][3] == 3
2201 continue
2202 else:
2203 if matches[i][3] == 1:
2204 date = True
2205 if matches[i][3] == 2:
2206 time = True
2207 if matches[i][3] == 3:
2208 units = True
2209
2210 # check last
2211 # we have enough to make a datetime
2212 if date or time or units:
2213 combined = orig_inputstring[matches[from_match_index][0]:
2214 matches[len(matches) - 1][1]]
2215 parsed_datetime, flags = self.parse(combined, sourceTime,
2216 version)
2217 proximity_matches.append((
2218 datetime.datetime(*parsed_datetime[:6]),
2219 flags,
2220 matches[from_match_index][0],
2221 matches[len(matches) - 1][1],
2222 combined))
2223
2224 elif len(matches) == 0:
2225 return None
2226 else:
2227 if matches[0][3] == 0: # not enough info to parse
2228 return None
2229 else:
2230 combined = orig_inputstring[matches[0][0]:matches[0][1]]
2231 parsed_datetime, flags = self.parse(matches[0][2], sourceTime,
2232 version)
2233 proximity_matches.append((
2234 datetime.datetime(*parsed_datetime[:6]),
2235 flags,
2236 matches[0][0],
2237 matches[0][1],
2238 combined))
2239
2240 return tuple(proximity_matches)
2241
2242
2243 def _initSymbols(ptc):
2244 """
2245 Initialize symbols and single character constants.
2246 """
2247 # build am and pm lists to contain
2248 # original case, lowercase, first-char and dotted
2249 # versions of the meridian text
2250 ptc.am = ['', '']
2251 ptc.pm = ['', '']
2252 for idx, xm in enumerate(ptc.locale.meridian[:2]):
2253 # 0: am
2254 # 1: pm
2255 target = ['am', 'pm'][idx]
2256 setattr(ptc, target, [xm])
2257 target = getattr(ptc, target)
2258 if xm:
2259 lxm = xm.lower()
2260 target.extend((xm[0], '{0}.{1}.'.format(*xm),
2261 lxm, lxm[0], '{0}.{1}.'.format(*lxm)))
2262
2263
2264 class Constants(object):
2265
2266 """
2267 Default set of constants for parsedatetime.
2268
2269 If PyICU is present, then the class will first try to get PyICU
2270 to return a locale specified by C{localeID}. If either C{localeID} is
2271 None or if the locale does not exist within PyICU, then each of the
2272 locales defined in C{fallbackLocales} is tried in order.
2273
2274 If PyICU is not present or none of the specified locales can be used,
2275 then the class will initialize itself to the en_US locale.
2276
2277 if PyICU is not present or not requested, only the locales defined by
2278 C{pdtLocales} will be searched.
2279 """
2280
2281 def __init__(self, localeID=None, usePyICU=True,
2282 fallbackLocales=['en_US']):
2283 self.localeID = localeID
2284 self.fallbackLocales = fallbackLocales[:]
2285
2286 if 'en_US' not in self.fallbackLocales:
2287 self.fallbackLocales.append('en_US')
2288
2289 # define non-locale specific constants
2290 self.locale = None
2291 self.usePyICU = usePyICU
2292
2293 # starting cache of leap years
2294 # daysInMonth will add to this if during
2295 # runtime it gets a request for a year not found
2296 self._leapYears = list(range(1904, 2097, 4))
2297
2298 self.Second = 1
2299 self.Minute = 60 # 60 * self.Second
2300 self.Hour = 3600 # 60 * self.Minute
2301 self.Day = 86400 # 24 * self.Hour
2302 self.Week = 604800 # 7 * self.Day
2303 self.Month = 2592000 # 30 * self.Day
2304 self.Year = 31536000 # 365 * self.Day
2305
2306 self._DaysInMonthList = (31, 28, 31, 30, 31, 30,
2307 31, 31, 30, 31, 30, 31)
2308 self.rangeSep = '-'
2309 self.BirthdayEpoch = 50
2310
2311 # When True the starting time for all relative calculations will come
2312 # from the given SourceTime, otherwise it will be 9am
2313
2314 self.StartTimeFromSourceTime = False
2315
2316 # YearParseStyle controls how we parse "Jun 12", i.e. dates that do
2317 # not have a year present. The default is to compare the date given
2318 # to the current date, and if prior, then assume the next year.
2319 # Setting this to 0 will prevent that.
2320
2321 self.YearParseStyle = 1
2322
2323 # DOWParseStyle controls how we parse "Tuesday"
2324 # If the current day was Thursday and the text to parse is "Tuesday"
2325 # then the following table shows how each style would be returned
2326 # -1, 0, +1
2327 #
2328 # Current day marked as ***
2329 #
2330 # Sun Mon Tue Wed Thu Fri Sat
2331 # week -1
2332 # current -1,0 ***
2333 # week +1 +1
2334 #
2335 # If the current day was Monday and the text to parse is "Tuesday"
2336 # then the following table shows how each style would be returned
2337 # -1, 0, +1
2338 #
2339 # Sun Mon Tue Wed Thu Fri Sat
2340 # week -1 -1
2341 # current *** 0,+1
2342 # week +1
2343
2344 self.DOWParseStyle = 1
2345
2346 # CurrentDOWParseStyle controls how we parse "Friday"
2347 # If the current day was Friday and the text to parse is "Friday"
2348 # then the following table shows how each style would be returned
2349 # True/False. This also depends on DOWParseStyle.
2350 #
2351 # Current day marked as ***
2352 #
2353 # DOWParseStyle = 0
2354 # Sun Mon Tue Wed Thu Fri Sat
2355 # week -1
2356 # current T,F
2357 # week +1
2358 #
2359 # DOWParseStyle = -1
2360 # Sun Mon Tue Wed Thu Fri Sat
2361 # week -1 F
2362 # current T
2363 # week +1
2364 #
2365 # DOWParseStyle = +1
2366 #
2367 # Sun Mon Tue Wed Thu Fri Sat
2368 # week -1
2369 # current T
2370 # week +1 F
2371
2372 self.CurrentDOWParseStyle = False
2373
2374 if self.usePyICU:
2375 self.locale = get_icu(self.localeID)
2376
2377 if self.locale.icu is None:
2378 self.usePyICU = False
2379 self.locale = None
2380
2381 if self.locale is None:
2382 if self.localeID not in pdtLocales:
2383 for localeId in range(0, len(self.fallbackLocales)):
2384 self.localeID = self.fallbackLocales[localeId]
2385 if self.localeID in pdtLocales:
2386 break
2387
2388 self.locale = pdtLocales[self.localeID]
2389
2390 if self.locale is not None:
2391
2392 def _getLocaleDataAdjusted(localeData):
2393 """
2394 If localeData is defined as ["mon|mnd", 'tu|tues'...] then this
2395 function splits those definitions on |
2396 """
2397 adjusted = []
2398 for d in localeData:
2399 if '|' in d:
2400 adjusted += d.split("|")
2401 else:
2402 adjusted.append(d)
2403 return adjusted
2404
2405 mths = _getLocaleDataAdjusted(self.locale.Months)
2406 smths = _getLocaleDataAdjusted(self.locale.shortMonths)
2407 swds = _getLocaleDataAdjusted(self.locale.shortWeekdays)
2408 wds = _getLocaleDataAdjusted(self.locale.Weekdays)
2409
2410 re_join = lambda g: '|'.join(re.escape(i) for i in g)
2411
2412 # escape any regex special characters that may be found
2413 self.locale.re_values['months'] = re_join(mths)
2414 self.locale.re_values['shortmonths'] = re_join(smths)
2415 self.locale.re_values['days'] = re_join(wds)
2416 self.locale.re_values['shortdays'] = re_join(swds)
2417 self.locale.re_values['dayoffsets'] = \
2418 re_join(self.locale.dayOffsets)
2419 self.locale.re_values['numbers'] = \
2420 re_join(self.locale.numbers)
2421 self.locale.re_values['decimal_mark'] = \
2422 re.escape(self.locale.decimal_mark)
2423
2424 units = [unit for units in self.locale.units.values()
2425 for unit in units] # flatten
2426 units.sort(key=len, reverse=True) # longest first
2427 self.locale.re_values['units'] = re_join(units)
2428 self.locale.re_values['modifiers'] = re_join(self.locale.Modifiers)
2429 self.locale.re_values['sources'] = re_join(self.locale.re_sources)
2430
2431 # For distinguishing numeric dates from times, look for timeSep
2432 # and meridian, if specified in the locale
2433 self.locale.re_values['timecomponents'] = \
2434 re_join(self.locale.timeSep + self.locale.meridian)
2435
2436 # build weekday offsets - yes, it assumes the Weekday and
2437 # shortWeekday lists are in the same order and Mon..Sun
2438 # (Python style)
2439 def _buildOffsets(offsetDict, localeData, indexStart):
2440 o = indexStart
2441 for key in localeData:
2442 if '|' in key:
2443 for k in key.split('|'):
2444 offsetDict[k] = o
2445 else:
2446 offsetDict[key] = o
2447 o += 1
2448
2449 _buildOffsets(self.locale.WeekdayOffsets,
2450 self.locale.Weekdays, 0)
2451 _buildOffsets(self.locale.WeekdayOffsets,
2452 self.locale.shortWeekdays, 0)
2453
2454 # build month offsets - yes, it assumes the Months and shortMonths
2455 # lists are in the same order and Jan..Dec
2456 _buildOffsets(self.locale.MonthOffsets,
2457 self.locale.Months, 1)
2458 _buildOffsets(self.locale.MonthOffsets,
2459 self.locale.shortMonths, 1)
2460
2461 _initSymbols(self)
2462
2463 # TODO: add code to parse the date formats and build the regexes up
2464 # from sub-parts, find all hard-coded uses of date/time separators
2465
2466 # not being used in code, but kept in case others are manually
2467 # utilizing this regex for their own purposes
2468 self.RE_DATE4 = r'''(?P<date>
2469 (
2470 (
2471 (?P<day>\d\d?)
2472 (?P<suffix>{daysuffix})?
2473 (,)?
2474 (\s)?
2475 )
2476 (?P<mthname>
2477 \b({months}|{shortmonths})\b
2478 )\s?
2479 (?P<year>\d\d
2480 (\d\d)?
2481 )?
2482 )
2483 )'''.format(**self.locale.re_values)
2484
2485 # still not completely sure of the behavior of the regex and
2486 # whether it would be best to consume all possible irrelevant
2487 # characters before the option groups (but within the {1,3} repetition
2488 # group or inside of each option group, as it currently does
2489 # however, right now, all tests are passing that were,
2490 # including fixing the bug of matching a 4-digit year as ddyy
2491 # when the day is absent from the string
2492 self.RE_DATE3 = r'''(?P<date>
2493 (?:
2494 (?:^|\s)
2495 (?P<mthname>
2496 {months}|{shortmonths}
2497 )\b
2498 |
2499 (?:^|\s)
2500 (?P<day>[1-9]|[012]\d|3[01])
2501 (?P<suffix>{daysuffix}|)\b
2502 (?!\s*(?:{timecomponents}))
2503 |
2504 ,?\s
2505 (?P<year>\d\d(?:\d\d|))\b
2506 (?!\s*(?:{timecomponents}))
2507 ){{1,3}}
2508 (?(mthname)|$-^)
2509 )'''.format(**self.locale.re_values)
2510
2511 # not being used in code, but kept in case others are manually
2512 # utilizing this regex for their own purposes
2513 self.RE_MONTH = r'''(\s|^)
2514 (?P<month>
2515 (
2516 (?P<mthname>
2517 \b({months}|{shortmonths})\b
2518 )
2519 (\s?
2520 (?P<year>(\d{{4}}))
2521 )?
2522 )
2523 )
2524 (?=\s|$|[^\w])'''.format(**self.locale.re_values)
2525
2526 self.RE_WEEKDAY = r'''\b
2527 (?:
2528 {days}|{shortdays}
2529 )
2530 \b'''.format(**self.locale.re_values)
2531
2532 self.RE_NUMBER = (r'(\b(?:{numbers})\b|\d+(?:{decimal_mark}\d+|))'
2533 .format(**self.locale.re_values))
2534
2535 self.RE_SPECIAL = (r'(?P<special>^[{specials}]+)\s+'
2536 .format(**self.locale.re_values))
2537
2538 self.RE_UNITS_ONLY = (r'''\b({units})\b'''
2539 .format(**self.locale.re_values))
2540
2541 self.RE_UNITS = r'''\b(?P<qty>
2542 -?
2543 (?:\d+(?:{decimal_mark}\d+|)|(?:{numbers})\b)\s*
2544 (?P<units>{units})
2545 )\b'''.format(**self.locale.re_values)
2546
2547 self.RE_QUNITS = r'''\b(?P<qty>
2548 -?
2549 (?:\d+(?:{decimal_mark}\d+|)|(?:{numbers})s)\s?
2550 (?P<qunits>{qunits})
2551 )\b'''.format(**self.locale.re_values)
2552
2553 self.RE_MODIFIER = r'''\b(?:
2554 {modifiers}
2555 )\b'''.format(**self.locale.re_values)
2556
2557 self.RE_TIMEHMS = r'''([\s(\["'-]|^)
2558 (?P<hours>\d\d?)
2559 (?P<tsep>{timeseparator}|)
2560 (?P<minutes>\d\d)
2561 (?:(?P=tsep)
2562 (?P<seconds>\d\d
2563 (?:[\.,]\d+)?
2564 )
2565 )?\b'''.format(**self.locale.re_values)
2566
2567 self.RE_TIMEHMS2 = r'''([\s(\["'-]|^)
2568 (?P<hours>\d\d?)
2569 (?:
2570 (?P<tsep>{timeseparator}|)
2571 (?P<minutes>\d\d?)
2572 (?:(?P=tsep)
2573 (?P<seconds>\d\d?
2574 (?:[\.,]\d+)?
2575 )
2576 )?
2577 )?'''.format(**self.locale.re_values)
2578
2579 # 1, 2, and 3 here refer to the type of match date, time, or units
2580 self.RE_NLP_PREFIX = r'''\b(?P<nlp_prefix>
2581 (on)
2582 (\s)+1
2583 |
2584 (at|in)
2585 (\s)+2
2586 |
2587 (in)
2588 (\s)+3
2589 )'''
2590
2591 if 'meridian' in self.locale.re_values:
2592 self.RE_TIMEHMS2 += (r'\s?(?P<meridian>{meridian})\b'
2593 .format(**self.locale.re_values))
2594 else:
2595 self.RE_TIMEHMS2 += r'\b'
2596
2597 # Always support common . and - separators
2598 dateSeps = ''.join(re.escape(s)
2599 for s in self.locale.dateSep + ['-', '.'])
2600
2601 self.RE_DATE = r'''([\s(\["'-]|^)
2602 (?P<date>
2603 \d\d?[{0}]\d\d?(?:[{0}]\d\d(?:\d\d)?)?
2604 |
2605 \d{{4}}[{0}]\d\d?[{0}]\d\d?
2606 )
2607 \b'''.format(dateSeps)
2608
2609 self.RE_DATE2 = r'[{0}]'.format(dateSeps)
2610
2611 assert 'dayoffsets' in self.locale.re_values
2612
2613 self.RE_DAY = r'''\b
2614 (?:
2615 {dayoffsets}
2616 )
2617 \b'''.format(**self.locale.re_values)
2618
2619 self.RE_DAY2 = r'''(?P<day>\d\d?)
2620 (?P<suffix>{daysuffix})?
2621 '''.format(**self.locale.re_values)
2622
2623 self.RE_TIME = r'''\b
2624 (?:
2625 {sources}
2626 )
2627 \b'''.format(**self.locale.re_values)
2628
2629 self.RE_REMAINING = r'\s+'
2630
2631 # Regex for date/time ranges
2632 self.RE_RTIMEHMS = r'''(\s?|^)
2633 (\d\d?){timeseparator}
2634 (\d\d)
2635 ({timeseparator}(\d\d))?
2636 (\s?|$)'''.format(**self.locale.re_values)
2637
2638 self.RE_RTIMEHMS2 = (r'''(\s?|^)
2639 (\d\d?)
2640 ({timeseparator}(\d\d?))?
2641 ({timeseparator}(\d\d?))?'''
2642 .format(**self.locale.re_values))
2643
2644 if 'meridian' in self.locale.re_values:
2645 self.RE_RTIMEHMS2 += (r'\s?({meridian})'
2646 .format(**self.locale.re_values))
2647
2648 self.RE_RDATE = r'(\d+([%s]\d+)+)' % dateSeps
2649 self.RE_RDATE3 = r'''(
2650 (
2651 (
2652 \b({months})\b
2653 )\s?
2654 (
2655 (\d\d?)
2656 (\s?|{daysuffix}|$)+
2657 )?
2658 (,\s?\d{{4}})?
2659 )
2660 )'''.format(**self.locale.re_values)
2661
2662 # "06/07/06 - 08/09/06"
2663 self.DATERNG1 = (r'{0}\s?{rangeseparator}\s?{0}'
2664 .format(self.RE_RDATE, **self.locale.re_values))
2665
2666 # "march 31 - june 1st, 2006"
2667 self.DATERNG2 = (r'{0}\s?{rangeseparator}\s?{0}'
2668 .format(self.RE_RDATE3, **self.locale.re_values))
2669
2670 # "march 1rd -13th"
2671 self.DATERNG3 = (r'{0}\s?{rangeseparator}\s?(\d\d?)\s?(rd|st|nd|th)?'
2672 .format(self.RE_RDATE3, **self.locale.re_values))
2673
2674 # "4:00:55 pm - 5:90:44 am", '4p-5p'
2675 self.TIMERNG1 = (r'{0}\s?{rangeseparator}\s?{0}'
2676 .format(self.RE_RTIMEHMS2, **self.locale.re_values))
2677
2678 self.TIMERNG2 = (r'{0}\s?{rangeseparator}\s?{0}'
2679 .format(self.RE_RTIMEHMS, **self.locale.re_values))
2680
2681 # "4-5pm "
2682 self.TIMERNG3 = (r'\d\d?\s?{rangeseparator}\s?{0}'
2683 .format(self.RE_RTIMEHMS2, **self.locale.re_values))
2684
2685 # "4:30-5pm "
2686 self.TIMERNG4 = (r'{0}\s?{rangeseparator}\s?{1}'
2687 .format(self.RE_RTIMEHMS, self.RE_RTIMEHMS2,
2688 **self.locale.re_values))
2689
2690 self.re_option = re.IGNORECASE + re.VERBOSE
2691 self.cre_source = {'CRE_SPECIAL': self.RE_SPECIAL,
2692 'CRE_NUMBER': self.RE_NUMBER,
2693 'CRE_UNITS': self.RE_UNITS,
2694 'CRE_UNITS_ONLY': self.RE_UNITS_ONLY,
2695 'CRE_QUNITS': self.RE_QUNITS,
2696 'CRE_MODIFIER': self.RE_MODIFIER,
2697 'CRE_TIMEHMS': self.RE_TIMEHMS,
2698 'CRE_TIMEHMS2': self.RE_TIMEHMS2,
2699 'CRE_DATE': self.RE_DATE,
2700 'CRE_DATE2': self.RE_DATE2,
2701 'CRE_DATE3': self.RE_DATE3,
2702 'CRE_DATE4': self.RE_DATE4,
2703 'CRE_MONTH': self.RE_MONTH,
2704 'CRE_WEEKDAY': self.RE_WEEKDAY,
2705 'CRE_DAY': self.RE_DAY,
2706 'CRE_DAY2': self.RE_DAY2,
2707 'CRE_TIME': self.RE_TIME,
2708 'CRE_REMAINING': self.RE_REMAINING,
2709 'CRE_RTIMEHMS': self.RE_RTIMEHMS,
2710 'CRE_RTIMEHMS2': self.RE_RTIMEHMS2,
2711 'CRE_RDATE': self.RE_RDATE,
2712 'CRE_RDATE3': self.RE_RDATE3,
2713 'CRE_TIMERNG1': self.TIMERNG1,
2714 'CRE_TIMERNG2': self.TIMERNG2,
2715 'CRE_TIMERNG3': self.TIMERNG3,
2716 'CRE_TIMERNG4': self.TIMERNG4,
2717 'CRE_DATERNG1': self.DATERNG1,
2718 'CRE_DATERNG2': self.DATERNG2,
2719 'CRE_DATERNG3': self.DATERNG3,
2720 'CRE_NLP_PREFIX': self.RE_NLP_PREFIX}
2721 self.cre_keys = set(self.cre_source.keys())
2722
2723 def __getattr__(self, name):
2724 if name in self.cre_keys:
2725 value = re.compile(self.cre_source[name], self.re_option)
2726 setattr(self, name, value)
2727 return value
2728 elif name in self.locale.locale_keys:
2729 return getattr(self.locale, name)
2730 else:
2731 raise AttributeError(name)
2732
2733 def daysInMonth(self, month, year):
2734 """
2735 Take the given month (1-12) and a given year (4 digit) return
2736 the number of days in the month adjusting for leap year as needed
2737 """
2738 result = None
2739 debug and log.debug('daysInMonth(%s, %s)', month, year)
2740 if month > 0 and month <= 12:
2741 result = self._DaysInMonthList[month - 1]
2742
2743 if month == 2:
2744 if year in self._leapYears:
2745 result += 1
2746 else:
2747 if calendar.isleap(year):
2748 self._leapYears.append(year)
2749 result += 1
2750
2751 return result
2752
2753 def getSource(self, sourceKey, sourceTime=None):
2754 """
2755 GetReturn a date/time tuple based on the giving source key
2756 and the corresponding key found in self.re_sources.
2757
2758 The current time is used as the default and any specified
2759 item found in self.re_sources is inserted into the value
2760 and the generated dictionary is returned.
2761 """
2762 if sourceKey not in self.re_sources:
2763 return None
2764
2765 if sourceTime is None:
2766 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = time.localtime()
2767 else:
2768 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime
2769
2770 defaults = {'yr': yr, 'mth': mth, 'dy': dy,
2771 'hr': hr, 'mn': mn, 'sec': sec}
2772
2773 source = self.re_sources[sourceKey]
2774
2775 values = {}
2776
2777 for key, default in defaults.items():
2778 values[key] = source.get(key, default)
2779
2780 return (values['yr'], values['mth'], values['dy'],
2781 values['hr'], values['mn'], values['sec'],
2782 wd, yd, isdst)