Package parsedatetime
[hide private]
[frames] | no frames]

Source Code for Package parsedatetime

   1  # -*- coding: utf-8 -*- 
   2  # 
   3  # vim: sw=2 ts=2 sts=2 
   4  # 
   5  # Copyright 2004-2016 Mike Taylor 
   6  # 
   7  # Licensed under the Apache License, Version 2.0 (the "License"); 
   8  # you may not use this file except in compliance with the License. 
   9  # You may obtain a copy of the License at 
  10  # 
  11  #     http://www.apache.org/licenses/LICENSE-2.0 
  12  # 
  13  # Unless required by applicable law or agreed to in writing, software 
  14  # distributed under the License is distributed on an "AS IS" BASIS, 
  15  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
  16  # See the License for the specific language governing permissions and 
  17  # limitations under the License. 
  18   
  19  """parsedatetime 
  20   
  21  Parse human-readable date/time text. 
  22   
  23  Requires Python 2.6 or later 
  24  """ 
  25   
  26  from __future__ import with_statement, absolute_import, unicode_literals 
  27   
  28  import re 
  29  import time 
  30  import logging 
  31  import warnings 
  32  import datetime 
  33  import calendar 
  34  import contextlib 
  35  import email.utils 
  36   
  37  from .pdt_locales import (locales as _locales, 
  38                            get_icu, load_locale) 
  39  from .context import pdtContext, pdtContextStack 
  40  from .warns import pdt20DeprecationWarning 
  41   
  42   
  43  __author__ = 'Mike Taylor' 
  44  __email__ = 'bear@bear.im' 
  45  __copyright__ = 'Copyright (c) 2017 Mike Taylor' 
  46  __license__ = 'Apache License 2.0' 
  47  __version__ = '2.4' 
  48  __url__ = 'https://github.com/bear/parsedatetime' 
  49  __download_url__ = 'https://pypi.python.org/pypi/parsedatetime' 
  50  __description__ = 'Parse human-readable date/time text.' 
  51   
  52  # as a library, do *not* setup logging 
  53  # see docs.python.org/2/howto/logging.html#configuring-logging-for-a-library 
  54  # Set default logging handler to avoid "No handler found" warnings. 
  55   
  56  try:  # Python 2.7+ 
  57      from logging import NullHandler 
  58  except ImportError: 
59 - class NullHandler(logging.Handler):
60
61 - def emit(self, record):
62 pass
63 64 log = logging.getLogger(__name__) 65 log.addHandler(NullHandler()) 66 67 debug = False 68 69 pdtLocales = dict([(x, load_locale(x)) for x in _locales])
70 71 72 # Copied from feedparser.py 73 # Universal Feedparser 74 # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved. 75 # Originally a def inside of _parse_date_w3dtf() 76 -def _extract_date(m):
77 year = int(m.group('year')) 78 if year < 100: 79 year = 100 * int(time.gmtime()[0] / 100) + int(year) 80 if year < 1000: 81 return 0, 0, 0 82 julian = m.group('julian') 83 if julian: 84 julian = int(julian) 85 month = julian / 30 + 1 86 day = julian % 30 + 1 87 jday = None 88 while jday != julian: 89 t = time.mktime((year, month, day, 0, 0, 0, 0, 0, 0)) 90 jday = time.gmtime(t)[-2] 91 diff = abs(jday - julian) 92 if jday > julian: 93 if diff < day: 94 day = day - diff 95 else: 96 month = month - 1 97 day = 31 98 elif jday < julian: 99 if day + diff < 28: 100 day = day + diff 101 else: 102 month = month + 1 103 return year, month, day 104 month = m.group('month') 105 day = 1 106 if month is None: 107 month = 1 108 else: 109 month = int(month) 110 day = m.group('day') 111 if day: 112 day = int(day) 113 else: 114 day = 1 115 return year, month, day
116
117 118 # Copied from feedparser.py 119 # Universal Feedparser 120 # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved. 121 # Originally a def inside of _parse_date_w3dtf() 122 -def _extract_time(m):
123 if not m: 124 return 0, 0, 0 125 hours = m.group('hours') 126 if not hours: 127 return 0, 0, 0 128 hours = int(hours) 129 minutes = int(m.group('minutes')) 130 seconds = m.group('seconds') 131 if seconds: 132 seconds = seconds.replace(',', '.').split('.', 1)[0] 133 seconds = int(seconds) 134 else: 135 seconds = 0 136 return hours, minutes, seconds
137
138 139 -def _pop_time_accuracy(m, ctx):
140 if not m: 141 return 142 if m.group('hours'): 143 ctx.updateAccuracy(ctx.ACU_HOUR) 144 if m.group('minutes'): 145 ctx.updateAccuracy(ctx.ACU_MIN) 146 if m.group('seconds'): 147 ctx.updateAccuracy(ctx.ACU_SEC)
148
149 150 # Copied from feedparser.py 151 # Universal Feedparser 152 # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved. 153 # Modified to return a tuple instead of mktime 154 # 155 # Original comment: 156 # W3DTF-style date parsing adapted from PyXML xml.utils.iso8601, written by 157 # Drake and licensed under the Python license. Removed all range checking 158 # for month, day, hour, minute, and second, since mktime will normalize 159 # these later 160 -def __closure_parse_date_w3dtf():
161 # the __extract_date and __extract_time methods were 162 # copied-out so they could be used by my code --bear 163 def __extract_tzd(m): 164 '''Return the Time Zone Designator as an offset in seconds from UTC.''' 165 if not m: 166 return 0 167 tzd = m.group('tzd') 168 if not tzd: 169 return 0 170 if tzd == 'Z': 171 return 0 172 hours = int(m.group('tzdhours')) 173 minutes = m.group('tzdminutes') 174 if minutes: 175 minutes = int(minutes) 176 else: 177 minutes = 0 178 offset = (hours * 60 + minutes) * 60 179 if tzd[0] == '+': 180 return -offset 181 return offset
182 183 def _parse_date_w3dtf(dateString): 184 m = __datetime_rx.match(dateString) 185 if m is None or m.group() != dateString: 186 return 187 return _extract_date(m) + _extract_time(m) + (0, 0, 0) 188 189 __date_re = (r'(?P<year>\d\d\d\d)' 190 r'(?:(?P<dsep>-|)' 191 r'(?:(?P<julian>\d\d\d)' 192 r'|(?P<month>\d\d)(?:(?P=dsep)(?P<day>\d\d))?))?') 193 __tzd_re = r'(?P<tzd>[-+](?P<tzdhours>\d\d)(?::?(?P<tzdminutes>\d\d))|Z)' 194 # __tzd_rx = re.compile(__tzd_re) 195 __time_re = (r'(?P<hours>\d\d)(?P<tsep>:|)(?P<minutes>\d\d)' 196 r'(?:(?P=tsep)(?P<seconds>\d\d(?:[.,]\d+)?))?' + 197 __tzd_re) 198 __datetime_re = '%s(?:T%s)?' % (__date_re, __time_re) 199 __datetime_rx = re.compile(__datetime_re) 200 201 return _parse_date_w3dtf 202 203 204 _parse_date_w3dtf = __closure_parse_date_w3dtf() 205 del __closure_parse_date_w3dtf 206 207 _monthnames = set([ 208 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 209 'aug', 'sep', 'oct', 'nov', 'dec', 210 'january', 'february', 'march', 'april', 'may', 'june', 'july', 211 'august', 'september', 'october', 'november', 'december']) 212 _daynames = set(['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'])
213 214 215 # Copied from feedparser.py 216 # Universal Feedparser 217 # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved. 218 # Modified to return a tuple instead of mktime 219 -def _parse_date_rfc822(dateString):
220 '''Parse an RFC822, RFC1123, RFC2822, or asctime-style date''' 221 data = dateString.split() 222 if data[0][-1] in (',', '.') or data[0].lower() in _daynames: 223 del data[0] 224 if len(data) == 4: 225 s = data[3] 226 s = s.split('+', 1) 227 if len(s) == 2: 228 data[3:] = s 229 else: 230 data.append('') 231 dateString = " ".join(data) 232 if len(data) < 5: 233 dateString += ' 00:00:00 GMT' 234 return email.utils.parsedate_tz(dateString)
235 236 237 # rfc822.py defines several time zones, but we define some extra ones. 238 # 'ET' is equivalent to 'EST', etc. 239 # _additional_timezones = {'AT': -400, 'ET': -500, 240 # 'CT': -600, 'MT': -700, 241 # 'PT': -800} 242 # email.utils._timezones.update(_additional_timezones) 243 244 VERSION_FLAG_STYLE = 1 245 VERSION_CONTEXT_STYLE = 2
246 247 248 -class Calendar(object):
249 250 """ 251 A collection of routines to input, parse and manipulate date and times. 252 The text can either be 'normal' date values or it can be human readable. 253 """ 254
255 - def __init__(self, constants=None, version=VERSION_FLAG_STYLE):
256 """ 257 Default constructor for the L{Calendar} class. 258 259 @type constants: object 260 @param constants: Instance of the class L{Constants} 261 @type version: integer 262 @param version: Default style version of current Calendar instance. 263 Valid value can be 1 (L{VERSION_FLAG_STYLE}) or 264 2 (L{VERSION_CONTEXT_STYLE}). See L{parse()}. 265 266 @rtype: object 267 @return: L{Calendar} instance 268 """ 269 # if a constants reference is not included, use default 270 if constants is None: 271 self.ptc = Constants() 272 else: 273 self.ptc = constants 274 275 self.version = version 276 if version == VERSION_FLAG_STYLE: 277 warnings.warn( 278 'Flag style will be deprecated in parsedatetime 2.0. ' 279 'Instead use the context style by instantiating `Calendar()` ' 280 'with argument `version=parsedatetime.VERSION_CONTEXT_STYLE`.', 281 pdt20DeprecationWarning) 282 self._ctxStack = pdtContextStack()
283 284 @contextlib.contextmanager
285 - def context(self):
286 ctx = pdtContext() 287 self._ctxStack.push(ctx) 288 yield ctx 289 ctx = self._ctxStack.pop() 290 if not self._ctxStack.isEmpty(): 291 self.currentContext.update(ctx)
292 293 @property
294 - def currentContext(self):
295 return self._ctxStack.last()
296
297 - def _convertUnitAsWords(self, unitText):
298 """ 299 Converts text units into their number value. 300 301 @type unitText: string 302 @param unitText: number text to convert 303 304 @rtype: integer 305 @return: numerical value of unitText 306 """ 307 word_list, a, b = re.split(r"[,\s-]+", unitText), 0, 0 308 for word in word_list: 309 x = self.ptc.small.get(word) 310 if x is not None: 311 a += x 312 elif word == "hundred": 313 a *= 100 314 else: 315 x = self.ptc.magnitude.get(word) 316 if x is not None: 317 b += a * x 318 a = 0 319 elif word in self.ptc.ignore: 320 pass 321 else: 322 raise Exception("Unknown number: " + word) 323 return a + b
324
325 - def _buildTime(self, source, quantity, modifier, units):
326 """ 327 Take C{quantity}, C{modifier} and C{unit} strings and convert them 328 into values. After converting, calcuate the time and return the 329 adjusted sourceTime. 330 331 @type source: time 332 @param source: time to use as the base (or source) 333 @type quantity: string 334 @param quantity: quantity string 335 @type modifier: string 336 @param modifier: how quantity and units modify the source time 337 @type units: string 338 @param units: unit of the quantity (i.e. hours, days, months, etc) 339 340 @rtype: struct_time 341 @return: C{struct_time} of the calculated time 342 """ 343 ctx = self.currentContext 344 debug and log.debug('_buildTime: [%s][%s][%s]', 345 quantity, modifier, units) 346 347 if source is None: 348 source = time.localtime() 349 350 if quantity is None: 351 quantity = '' 352 else: 353 quantity = quantity.strip() 354 355 qty = self._quantityToReal(quantity) 356 357 if modifier in self.ptc.Modifiers: 358 qty = qty * self.ptc.Modifiers[modifier] 359 360 if units is None or units == '': 361 units = 'dy' 362 363 # plurals are handled by regex's (could be a bug tho) 364 365 (yr, mth, dy, hr, mn, sec, _, _, _) = source 366 367 start = datetime.datetime(yr, mth, dy, hr, mn, sec) 368 target = start 369 # realunit = next((key for key, values in self.ptc.units.items() 370 # if any(imap(units.__contains__, values))), None) 371 realunit = units 372 for key, values in self.ptc.units.items(): 373 if units in values: 374 realunit = key 375 break 376 377 debug and log.debug('units %s --> realunit %s (qty=%s)', 378 units, realunit, qty) 379 380 try: 381 if realunit in ('years', 'months'): 382 target = self.inc(start, **{realunit[:-1]: qty}) 383 elif realunit in ('days', 'hours', 'minutes', 'seconds', 'weeks'): 384 delta = datetime.timedelta(**{realunit: qty}) 385 target = start + delta 386 except OverflowError: 387 # OverflowError is raise when target.year larger than 9999 388 pass 389 else: 390 ctx.updateAccuracy(realunit) 391 392 return target.timetuple()
393
394 - def parseDate(self, dateString, sourceTime=None):
395 """ 396 Parse short-form date strings:: 397 398 '05/28/2006' or '04.21' 399 400 @type dateString: string 401 @param dateString: text to convert to a C{datetime} 402 @type sourceTime: struct_time 403 @param sourceTime: C{struct_time} value to use as the base 404 405 @rtype: struct_time 406 @return: calculated C{struct_time} value of dateString 407 """ 408 if sourceTime is None: 409 yr, mth, dy, hr, mn, sec, wd, yd, isdst = time.localtime() 410 else: 411 yr, mth, dy, hr, mn, sec, wd, yd, isdst = sourceTime 412 413 # values pulled from regex's will be stored here and later 414 # assigned to mth, dy, yr based on information from the locale 415 # -1 is used as the marker value because we want zero values 416 # to be passed thru so they can be flagged as errors later 417 v1 = -1 418 v2 = -1 419 v3 = -1 420 accuracy = [] 421 422 s = dateString 423 m = self.ptc.CRE_DATE2.search(s) 424 if m is not None: 425 index = m.start() 426 v1 = int(s[:index]) 427 s = s[index + 1:] 428 429 m = self.ptc.CRE_DATE2.search(s) 430 if m is not None: 431 index = m.start() 432 v2 = int(s[:index]) 433 v3 = int(s[index + 1:]) 434 else: 435 v2 = int(s.strip()) 436 437 v = [v1, v2, v3] 438 d = {'m': mth, 'd': dy, 'y': yr} 439 440 # yyyy/mm/dd format 441 dp_order = self.ptc.dp_order if v1 <= 31 else ['y', 'm', 'd'] 442 443 for i in range(0, 3): 444 n = v[i] 445 c = dp_order[i] 446 if n >= 0: 447 d[c] = n 448 accuracy.append({'m': pdtContext.ACU_MONTH, 449 'd': pdtContext.ACU_DAY, 450 'y': pdtContext.ACU_YEAR}[c]) 451 452 # if the year is not specified and the date has already 453 # passed, increment the year 454 if v3 == -1 and ((mth > d['m']) or (mth == d['m'] and dy > d['d'])): 455 yr = d['y'] + self.ptc.YearParseStyle 456 else: 457 yr = d['y'] 458 459 mth = d['m'] 460 dy = d['d'] 461 462 # birthday epoch constraint 463 if yr < self.ptc.BirthdayEpoch: 464 yr += 2000 465 elif yr < 100: 466 yr += 1900 467 468 daysInCurrentMonth = self.ptc.daysInMonth(mth, yr) 469 debug and log.debug('parseDate: %s %s %s %s', 470 yr, mth, dy, daysInCurrentMonth) 471 472 with self.context() as ctx: 473 if mth > 0 and mth <= 12 and dy > 0 and \ 474 dy <= daysInCurrentMonth: 475 sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst) 476 ctx.updateAccuracy(*accuracy) 477 else: 478 # return current time if date string is invalid 479 sourceTime = time.localtime() 480 481 return sourceTime
482
483 - def parseDateText(self, dateString, sourceTime=None):
484 """ 485 Parse long-form date strings:: 486 487 'May 31st, 2006' 488 'Jan 1st' 489 'July 2006' 490 491 @type dateString: string 492 @param dateString: text to convert to a datetime 493 @type sourceTime: struct_time 494 @param sourceTime: C{struct_time} value to use as the base 495 496 @rtype: struct_time 497 @return: calculated C{struct_time} value of dateString 498 """ 499 if sourceTime is None: 500 yr, mth, dy, hr, mn, sec, wd, yd, isdst = time.localtime() 501 else: 502 yr, mth, dy, hr, mn, sec, wd, yd, isdst = sourceTime 503 504 currentMth = mth 505 currentDy = dy 506 accuracy = [] 507 508 debug and log.debug('parseDateText currentMth %s currentDy %s', 509 mth, dy) 510 511 s = dateString.lower() 512 m = self.ptc.CRE_DATE3.search(s) 513 mth = m.group('mthname') 514 mth = self.ptc.MonthOffsets[mth] 515 accuracy.append('month') 516 517 if m.group('day') is not None: 518 dy = int(m.group('day')) 519 accuracy.append('day') 520 else: 521 dy = 1 522 523 if m.group('year') is not None: 524 yr = int(m.group('year')) 525 accuracy.append('year') 526 527 # birthday epoch constraint 528 if yr < self.ptc.BirthdayEpoch: 529 yr += 2000 530 elif yr < 100: 531 yr += 1900 532 533 elif (mth < currentMth) or (mth == currentMth and dy < currentDy): 534 # if that day and month have already passed in this year, 535 # then increment the year by 1 536 yr += self.ptc.YearParseStyle 537 538 with self.context() as ctx: 539 if dy > 0 and dy <= self.ptc.daysInMonth(mth, yr): 540 sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst) 541 ctx.updateAccuracy(*accuracy) 542 else: 543 # Return current time if date string is invalid 544 sourceTime = time.localtime() 545 546 debug and log.debug('parseDateText returned ' 547 'mth %d dy %d yr %d sourceTime %s', 548 mth, dy, yr, sourceTime) 549 550 return sourceTime
551
552 - def evalRanges(self, datetimeString, sourceTime=None):
553 """ 554 Evaluate the C{datetimeString} text and determine if 555 it represents a date or time range. 556 557 @type datetimeString: string 558 @param datetimeString: datetime text to evaluate 559 @type sourceTime: struct_time 560 @param sourceTime: C{struct_time} value to use as the base 561 562 @rtype: tuple 563 @return: tuple of: start datetime, end datetime and the invalid flag 564 """ 565 rangeFlag = retFlag = 0 566 startStr = endStr = '' 567 568 s = datetimeString.strip().lower() 569 570 if self.ptc.rangeSep in s: 571 s = s.replace(self.ptc.rangeSep, ' %s ' % self.ptc.rangeSep) 572 s = s.replace(' ', ' ') 573 574 for cre, rflag in [(self.ptc.CRE_TIMERNG1, 1), 575 (self.ptc.CRE_TIMERNG2, 2), 576 (self.ptc.CRE_TIMERNG4, 7), 577 (self.ptc.CRE_TIMERNG3, 3), 578 (self.ptc.CRE_DATERNG1, 4), 579 (self.ptc.CRE_DATERNG2, 5), 580 (self.ptc.CRE_DATERNG3, 6)]: 581 m = cre.search(s) 582 if m is not None: 583 rangeFlag = rflag 584 break 585 586 debug and log.debug('evalRanges: rangeFlag = %s [%s]', rangeFlag, s) 587 588 if m is not None: 589 if (m.group() != s): 590 # capture remaining string 591 parseStr = m.group() 592 chunk1 = s[:m.start()] 593 chunk2 = s[m.end():] 594 s = '%s %s' % (chunk1, chunk2) 595 596 sourceTime, ctx = self.parse(s, sourceTime, 597 VERSION_CONTEXT_STYLE) 598 599 if not ctx.hasDateOrTime: 600 sourceTime = None 601 else: 602 parseStr = s 603 604 if rangeFlag in (1, 2): 605 m = re.search(self.ptc.rangeSep, parseStr) 606 startStr = parseStr[:m.start()] 607 endStr = parseStr[m.start() + 1:] 608 retFlag = 2 609 610 elif rangeFlag in (3, 7): 611 m = re.search(self.ptc.rangeSep, parseStr) 612 # capturing the meridian from the end time 613 if self.ptc.usesMeridian: 614 ampm = re.search(self.ptc.am[0], parseStr) 615 616 # appending the meridian to the start time 617 if ampm is not None: 618 startStr = parseStr[:m.start()] + self.ptc.meridian[0] 619 else: 620 startStr = parseStr[:m.start()] + self.ptc.meridian[1] 621 else: 622 startStr = parseStr[:m.start()] 623 624 endStr = parseStr[m.start() + 1:] 625 retFlag = 2 626 627 elif rangeFlag == 4: 628 m = re.search(self.ptc.rangeSep, parseStr) 629 startStr = parseStr[:m.start()] 630 endStr = parseStr[m.start() + 1:] 631 retFlag = 1 632 633 elif rangeFlag == 5: 634 m = re.search(self.ptc.rangeSep, parseStr) 635 endStr = parseStr[m.start() + 1:] 636 637 # capturing the year from the end date 638 date = self.ptc.CRE_DATE3.search(endStr) 639 endYear = date.group('year') 640 641 # appending the year to the start date if the start date 642 # does not have year information and the end date does. 643 # eg : "Aug 21 - Sep 4, 2007" 644 if endYear is not None: 645 startStr = (parseStr[:m.start()]).strip() 646 date = self.ptc.CRE_DATE3.search(startStr) 647 startYear = date.group('year') 648 649 if startYear is None: 650 startStr = startStr + ', ' + endYear 651 else: 652 startStr = parseStr[:m.start()] 653 654 retFlag = 1 655 656 elif rangeFlag == 6: 657 m = re.search(self.ptc.rangeSep, parseStr) 658 659 startStr = parseStr[:m.start()] 660 661 # capturing the month from the start date 662 mth = self.ptc.CRE_DATE3.search(startStr) 663 mth = mth.group('mthname') 664 665 # appending the month name to the end date 666 endStr = mth + parseStr[(m.start() + 1):] 667 668 retFlag = 1 669 670 else: 671 # if range is not found 672 startDT = endDT = time.localtime() 673 674 if retFlag: 675 startDT, sctx = self.parse(startStr, sourceTime, 676 VERSION_CONTEXT_STYLE) 677 endDT, ectx = self.parse(endStr, sourceTime, 678 VERSION_CONTEXT_STYLE) 679 680 if not sctx.hasDateOrTime or not ectx.hasDateOrTime: 681 retFlag = 0 682 683 return startDT, endDT, retFlag
684
685 - def _CalculateDOWDelta(self, wd, wkdy, offset, style, currentDayStyle):
686 """ 687 Based on the C{style} and C{currentDayStyle} determine what 688 day-of-week value is to be returned. 689 690 @type wd: integer 691 @param wd: day-of-week value for the current day 692 @type wkdy: integer 693 @param wkdy: day-of-week value for the parsed day 694 @type offset: integer 695 @param offset: offset direction for any modifiers (-1, 0, 1) 696 @type style: integer 697 @param style: normally the value 698 set in C{Constants.DOWParseStyle} 699 @type currentDayStyle: integer 700 @param currentDayStyle: normally the value 701 set in C{Constants.CurrentDOWParseStyle} 702 703 @rtype: integer 704 @return: calculated day-of-week 705 """ 706 diffBase = wkdy - wd 707 origOffset = offset 708 709 if offset == 2: 710 # no modifier is present. 711 # i.e. string to be parsed is just DOW 712 if wkdy * style > wd * style or \ 713 currentDayStyle and wkdy == wd: 714 # wkdy located in current week 715 offset = 0 716 elif style in (-1, 1): 717 # wkdy located in last (-1) or next (1) week 718 offset = style 719 else: 720 # invalid style, or should raise error? 721 offset = 0 722 723 # offset = -1 means last week 724 # offset = 0 means current week 725 # offset = 1 means next week 726 diff = diffBase + 7 * offset 727 if style == 1 and diff < -7: 728 diff += 7 729 elif style == -1 and diff > 7: 730 diff -= 7 731 732 debug and log.debug("wd %s, wkdy %s, offset %d, " 733 "style %d, currentDayStyle %d", 734 wd, wkdy, origOffset, style, currentDayStyle) 735 736 return diff
737
738 - def _quantityToReal(self, quantity):
739 """ 740 Convert a quantity, either spelled-out or numeric, to a float 741 742 @type quantity: string 743 @param quantity: quantity to parse to float 744 @rtype: int 745 @return: the quantity as an float, defaulting to 0.0 746 """ 747 if not quantity: 748 return 1.0 749 750 try: 751 return float(quantity.replace(',', '.')) 752 except ValueError: 753 pass 754 755 try: 756 return float(self.ptc.numbers[quantity]) 757 except KeyError: 758 pass 759 760 return 0.0
761
762 - def _evalModifier(self, modifier, chunk1, chunk2, sourceTime):
763 """ 764 Evaluate the C{modifier} string and following text (passed in 765 as C{chunk1} and C{chunk2}) and if they match any known modifiers 766 calculate the delta and apply it to C{sourceTime}. 767 768 @type modifier: string 769 @param modifier: modifier text to apply to sourceTime 770 @type chunk1: string 771 @param chunk1: text chunk that preceded modifier (if any) 772 @type chunk2: string 773 @param chunk2: text chunk that followed modifier (if any) 774 @type sourceTime: struct_time 775 @param sourceTime: C{struct_time} value to use as the base 776 777 @rtype: tuple 778 @return: tuple of: remaining text and the modified sourceTime 779 """ 780 ctx = self.currentContext 781 offset = self.ptc.Modifiers[modifier] 782 783 if sourceTime is not None: 784 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime 785 else: 786 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = time.localtime() 787 788 if self.ptc.StartTimeFromSourceTime: 789 startHour = hr 790 startMinute = mn 791 startSecond = sec 792 else: 793 startHour = 9 794 startMinute = 0 795 startSecond = 0 796 797 # capture the units after the modifier and the remaining 798 # string after the unit 799 m = self.ptc.CRE_REMAINING.search(chunk2) 800 if m is not None: 801 index = m.start() + 1 802 unit = chunk2[:m.start()] 803 chunk2 = chunk2[index:] 804 else: 805 unit = chunk2 806 chunk2 = '' 807 808 debug and log.debug("modifier [%s] chunk1 [%s] " 809 "chunk2 [%s] unit [%s]", 810 modifier, chunk1, chunk2, unit) 811 812 if unit in self.ptc.units['months']: 813 currentDaysInMonth = self.ptc.daysInMonth(mth, yr) 814 if offset == 0: 815 dy = currentDaysInMonth 816 sourceTime = (yr, mth, dy, startHour, startMinute, 817 startSecond, wd, yd, isdst) 818 elif offset == 2: 819 # if day is the last day of the month, calculate the last day 820 # of the next month 821 if dy == currentDaysInMonth: 822 dy = self.ptc.daysInMonth(mth + 1, yr) 823 824 start = datetime.datetime(yr, mth, dy, startHour, 825 startMinute, startSecond) 826 target = self.inc(start, month=1) 827 sourceTime = target.timetuple() 828 else: 829 start = datetime.datetime(yr, mth, 1, startHour, 830 startMinute, startSecond) 831 target = self.inc(start, month=offset) 832 sourceTime = target.timetuple() 833 ctx.updateAccuracy(ctx.ACU_MONTH) 834 835 elif unit in self.ptc.units['weeks']: 836 if offset == 0: 837 start = datetime.datetime(yr, mth, dy, 17, 0, 0) 838 target = start + datetime.timedelta(days=(4 - wd)) 839 sourceTime = target.timetuple() 840 elif offset == 2: 841 start = datetime.datetime(yr, mth, dy, startHour, 842 startMinute, startSecond) 843 target = start + datetime.timedelta(days=7) 844 sourceTime = target.timetuple() 845 else: 846 start = datetime.datetime(yr, mth, dy, startHour, 847 startMinute, startSecond) 848 target = start + offset * datetime.timedelta(weeks=1) 849 sourceTime = target.timetuple() 850 ctx.updateAccuracy(ctx.ACU_WEEK) 851 852 elif unit in self.ptc.units['days']: 853 if offset == 0: 854 sourceTime = (yr, mth, dy, 17, 0, 0, wd, yd, isdst) 855 ctx.updateAccuracy(ctx.ACU_HALFDAY) 856 elif offset == 2: 857 start = datetime.datetime(yr, mth, dy, hr, mn, sec) 858 target = start + datetime.timedelta(days=1) 859 sourceTime = target.timetuple() 860 else: 861 start = datetime.datetime(yr, mth, dy, startHour, 862 startMinute, startSecond) 863 target = start + datetime.timedelta(days=offset) 864 sourceTime = target.timetuple() 865 ctx.updateAccuracy(ctx.ACU_DAY) 866 867 elif unit in self.ptc.units['hours']: 868 if offset == 0: 869 sourceTime = (yr, mth, dy, hr, 0, 0, wd, yd, isdst) 870 else: 871 start = datetime.datetime(yr, mth, dy, hr, 0, 0) 872 target = start + datetime.timedelta(hours=offset) 873 sourceTime = target.timetuple() 874 ctx.updateAccuracy(ctx.ACU_HOUR) 875 876 elif unit in self.ptc.units['years']: 877 if offset == 0: 878 sourceTime = (yr, 12, 31, hr, mn, sec, wd, yd, isdst) 879 elif offset == 2: 880 sourceTime = (yr + 1, mth, dy, hr, mn, sec, wd, yd, isdst) 881 else: 882 sourceTime = (yr + offset, 1, 1, startHour, startMinute, 883 startSecond, wd, yd, isdst) 884 ctx.updateAccuracy(ctx.ACU_YEAR) 885 886 elif modifier == 'eom': 887 dy = self.ptc.daysInMonth(mth, yr) 888 sourceTime = (yr, mth, dy, startHour, startMinute, 889 startSecond, wd, yd, isdst) 890 ctx.updateAccuracy(ctx.ACU_DAY) 891 892 elif modifier == 'eoy': 893 mth = 12 894 dy = self.ptc.daysInMonth(mth, yr) 895 sourceTime = (yr, mth, dy, startHour, startMinute, 896 startSecond, wd, yd, isdst) 897 ctx.updateAccuracy(ctx.ACU_MONTH) 898 899 elif self.ptc.CRE_WEEKDAY.match(unit): 900 m = self.ptc.CRE_WEEKDAY.match(unit) 901 debug and log.debug('CRE_WEEKDAY matched') 902 wkdy = m.group() 903 904 if modifier == 'eod': 905 ctx.updateAccuracy(ctx.ACU_HOUR) 906 # Calculate the upcoming weekday 907 sourceTime, subctx = self.parse(wkdy, sourceTime, 908 VERSION_CONTEXT_STYLE) 909 sTime = self.ptc.getSource(modifier, sourceTime) 910 if sTime is not None: 911 sourceTime = sTime 912 ctx.updateAccuracy(ctx.ACU_HALFDAY) 913 else: 914 # unless one of these modifiers is being applied to the 915 # day-of-week, we want to start with target as the day 916 # in the current week. 917 dowOffset = offset 918 relativeModifier = modifier not in ['this', 'next', 'last', 'prior', 'previous'] 919 if relativeModifier: 920 dowOffset = 0 921 922 wkdy = self.ptc.WeekdayOffsets[wkdy] 923 diff = self._CalculateDOWDelta( 924 wd, wkdy, dowOffset, self.ptc.DOWParseStyle, 925 self.ptc.CurrentDOWParseStyle) 926 start = datetime.datetime(yr, mth, dy, startHour, 927 startMinute, startSecond) 928 target = start + datetime.timedelta(days=diff) 929 930 if chunk1 != '' and relativeModifier: 931 # consider "one day before thursday": we need to parse chunk1 ("one day") 932 # and apply according to the offset ("before"), rather than allowing the 933 # remaining parse step to apply "one day" without the offset direction. 934 t, subctx = self.parse(chunk1, sourceTime, VERSION_CONTEXT_STYLE) 935 if subctx.hasDateOrTime: 936 delta = time.mktime(t) - time.mktime(sourceTime) 937 target = start + datetime.timedelta(days=diff) + datetime.timedelta(seconds=delta * offset) 938 chunk1 = '' 939 940 sourceTime = target.timetuple() 941 ctx.updateAccuracy(ctx.ACU_DAY) 942 943 elif chunk1 == '' and chunk2 == '' and self.ptc.CRE_TIME.match(unit): 944 m = self.ptc.CRE_TIME.match(unit) 945 debug and log.debug('CRE_TIME matched') 946 (yr, mth, dy, hr, mn, sec, wd, yd, isdst), subctx = \ 947 self.parse(unit, None, VERSION_CONTEXT_STYLE) 948 949 start = datetime.datetime(yr, mth, dy, hr, mn, sec) 950 target = start + datetime.timedelta(days=offset) 951 sourceTime = target.timetuple() 952 953 else: 954 # check if the remaining text is parsable and if so, 955 # use it as the base time for the modifier source time 956 957 debug and log.debug('check for modifications ' 958 'to source time [%s] [%s]', 959 chunk1, unit) 960 961 unit = unit.strip() 962 if unit: 963 s = '%s %s' % (unit, chunk2) 964 t, subctx = self.parse(s, sourceTime, VERSION_CONTEXT_STYLE) 965 966 if subctx.hasDate: # working with dates 967 u = unit.lower() 968 if u in self.ptc.Months or \ 969 u in self.ptc.shortMonths: 970 yr, mth, dy, hr, mn, sec, wd, yd, isdst = t 971 start = datetime.datetime( 972 yr, mth, dy, hr, mn, sec) 973 t = self.inc(start, year=offset).timetuple() 974 elif u in self.ptc.Weekdays: 975 t = t + datetime.timedelta(weeks=offset) 976 977 if subctx.hasDateOrTime: 978 sourceTime = t 979 chunk2 = '' 980 981 chunk1 = chunk1.strip() 982 983 # if the word after next is a number, the string is more than 984 # likely to be "next 4 hrs" which we will have to combine the 985 # units with the rest of the string 986 if chunk1: 987 try: 988 m = list(self.ptc.CRE_NUMBER.finditer(chunk1))[-1] 989 except IndexError: 990 pass 991 else: 992 qty = None 993 debug and log.debug('CRE_NUMBER matched') 994 qty = self._quantityToReal(m.group()) * offset 995 chunk1 = '%s%s%s' % (chunk1[:m.start()], 996 qty, chunk1[m.end():]) 997 t, subctx = self.parse(chunk1, sourceTime, 998 VERSION_CONTEXT_STYLE) 999 1000 chunk1 = '' 1001 1002 if subctx.hasDateOrTime: 1003 sourceTime = t 1004 1005 debug and log.debug('looking for modifier %s', modifier) 1006 sTime = self.ptc.getSource(modifier, sourceTime) 1007 if sTime is not None: 1008 debug and log.debug('modifier found in sources') 1009 sourceTime = sTime 1010 ctx.updateAccuracy(ctx.ACU_HALFDAY) 1011 1012 debug and log.debug('returning chunk = "%s %s" and sourceTime = %s', 1013 chunk1, chunk2, sourceTime) 1014 1015 return '%s %s' % (chunk1, chunk2), sourceTime
1016
1017 - def _evalDT(self, datetimeString, sourceTime):
1018 """ 1019 Calculate the datetime from known format like RFC822 or W3CDTF 1020 1021 Examples handled:: 1022 RFC822, W3CDTF formatted dates 1023 HH:MM[:SS][ am/pm] 1024 MM/DD/YYYY 1025 DD MMMM YYYY 1026 1027 @type datetimeString: string 1028 @param datetimeString: text to try and parse as more "traditional" 1029 date/time text 1030 @type sourceTime: struct_time 1031 @param sourceTime: C{struct_time} value to use as the base 1032 1033 @rtype: datetime 1034 @return: calculated C{struct_time} value or current C{struct_time} 1035 if not parsed 1036 """ 1037 ctx = self.currentContext 1038 s = datetimeString.strip() 1039 1040 # Given string date is a RFC822 date 1041 if sourceTime is None: 1042 sourceTime = _parse_date_rfc822(s) 1043 debug and log.debug( 1044 'attempt to parse as rfc822 - %s', str(sourceTime)) 1045 1046 if sourceTime is not None: 1047 (yr, mth, dy, hr, mn, sec, wd, yd, isdst, _) = sourceTime 1048 ctx.updateAccuracy(ctx.ACU_YEAR, ctx.ACU_MONTH, ctx.ACU_DAY) 1049 1050 if hr != 0 and mn != 0 and sec != 0: 1051 ctx.updateAccuracy(ctx.ACU_HOUR, ctx.ACU_MIN, ctx.ACU_SEC) 1052 1053 sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst) 1054 1055 # Given string date is a W3CDTF date 1056 if sourceTime is None: 1057 sourceTime = _parse_date_w3dtf(s) 1058 1059 if sourceTime is not None: 1060 ctx.updateAccuracy(ctx.ACU_YEAR, ctx.ACU_MONTH, ctx.ACU_DAY, 1061 ctx.ACU_HOUR, ctx.ACU_MIN, ctx.ACU_SEC) 1062 1063 if sourceTime is None: 1064 sourceTime = time.localtime() 1065 1066 return sourceTime
1067
1068 - def _evalUnits(self, datetimeString, sourceTime):
1069 """ 1070 Evaluate text passed by L{_partialParseUnits()} 1071 """ 1072 s = datetimeString.strip() 1073 sourceTime = self._evalDT(datetimeString, sourceTime) 1074 1075 # Given string is a time string with units like "5 hrs 30 min" 1076 modifier = '' # TODO 1077 1078 m = self.ptc.CRE_UNITS.search(s) 1079 if m is not None: 1080 units = m.group('units') 1081 quantity = s[:m.start('units')] 1082 1083 sourceTime = self._buildTime(sourceTime, quantity, modifier, units) 1084 return sourceTime
1085
1086 - def _evalQUnits(self, datetimeString, sourceTime):
1087 """ 1088 Evaluate text passed by L{_partialParseQUnits()} 1089 """ 1090 s = datetimeString.strip() 1091 sourceTime = self._evalDT(datetimeString, sourceTime) 1092 1093 # Given string is a time string with single char units like "5 h 30 m" 1094 modifier = '' # TODO 1095 1096 m = self.ptc.CRE_QUNITS.search(s) 1097 if m is not None: 1098 units = m.group('qunits') 1099 quantity = s[:m.start('qunits')] 1100 1101 sourceTime = self._buildTime(sourceTime, quantity, modifier, units) 1102 return sourceTime
1103
1104 - def _evalDateStr(self, datetimeString, sourceTime):
1105 """ 1106 Evaluate text passed by L{_partialParseDateStr()} 1107 """ 1108 s = datetimeString.strip() 1109 sourceTime = self._evalDT(datetimeString, sourceTime) 1110 1111 # Given string is in the format "May 23rd, 2005" 1112 debug and log.debug('checking for MMM DD YYYY') 1113 return self.parseDateText(s, sourceTime)
1114
1115 - def _evalDateStd(self, datetimeString, sourceTime):
1116 """ 1117 Evaluate text passed by L{_partialParseDateStd()} 1118 """ 1119 s = datetimeString.strip() 1120 sourceTime = self._evalDT(datetimeString, sourceTime) 1121 1122 # Given string is in the format 07/21/2006 1123 return self.parseDate(s, sourceTime)
1124
1125 - def _evalDayStr(self, datetimeString, sourceTime):
1126 """ 1127 Evaluate text passed by L{_partialParseDaystr()} 1128 """ 1129 s = datetimeString.strip() 1130 sourceTime = self._evalDT(datetimeString, sourceTime) 1131 1132 # Given string is a natural language date string like today, tomorrow.. 1133 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime 1134 1135 try: 1136 offset = self.ptc.dayOffsets[s] 1137 except KeyError: 1138 offset = 0 1139 1140 if self.ptc.StartTimeFromSourceTime: 1141 startHour = hr 1142 startMinute = mn 1143 startSecond = sec 1144 else: 1145 startHour = 9 1146 startMinute = 0 1147 startSecond = 0 1148 1149 self.currentContext.updateAccuracy(pdtContext.ACU_DAY) 1150 start = datetime.datetime(yr, mth, dy, startHour, 1151 startMinute, startSecond) 1152 target = start + datetime.timedelta(days=offset) 1153 return target.timetuple()
1154
1155 - def _evalWeekday(self, datetimeString, sourceTime):
1156 """ 1157 Evaluate text passed by L{_partialParseWeekday()} 1158 """ 1159 s = datetimeString.strip() 1160 sourceTime = self._evalDT(datetimeString, sourceTime) 1161 1162 # Given string is a weekday 1163 yr, mth, dy, hr, mn, sec, wd, yd, isdst = sourceTime 1164 1165 start = datetime.datetime(yr, mth, dy, hr, mn, sec) 1166 wkdy = self.ptc.WeekdayOffsets[s] 1167 1168 if wkdy > wd: 1169 qty = self._CalculateDOWDelta(wd, wkdy, 2, 1170 self.ptc.DOWParseStyle, 1171 self.ptc.CurrentDOWParseStyle) 1172 else: 1173 qty = self._CalculateDOWDelta(wd, wkdy, 2, 1174 self.ptc.DOWParseStyle, 1175 self.ptc.CurrentDOWParseStyle) 1176 1177 self.currentContext.updateAccuracy(pdtContext.ACU_DAY) 1178 target = start + datetime.timedelta(days=qty) 1179 return target.timetuple()
1180
1181 - def _evalTimeStr(self, datetimeString, sourceTime):
1182 """ 1183 Evaluate text passed by L{_partialParseTimeStr()} 1184 """ 1185 s = datetimeString.strip() 1186 sourceTime = self._evalDT(datetimeString, sourceTime) 1187 1188 if s in self.ptc.re_values['now']: 1189 self.currentContext.updateAccuracy(pdtContext.ACU_NOW) 1190 else: 1191 # Given string is a natural language time string like 1192 # lunch, midnight, etc 1193 sTime = self.ptc.getSource(s, sourceTime) 1194 if sTime: 1195 sourceTime = sTime 1196 self.currentContext.updateAccuracy(pdtContext.ACU_HALFDAY) 1197 1198 return sourceTime
1199
1200 - def _evalMeridian(self, datetimeString, sourceTime):
1201 """ 1202 Evaluate text passed by L{_partialParseMeridian()} 1203 """ 1204 s = datetimeString.strip() 1205 sourceTime = self._evalDT(datetimeString, sourceTime) 1206 1207 # Given string is in the format HH:MM(:SS)(am/pm) 1208 yr, mth, dy, hr, mn, sec, wd, yd, isdst = sourceTime 1209 1210 m = self.ptc.CRE_TIMEHMS2.search(s) 1211 if m is not None: 1212 dt = s[:m.start('meridian')].strip() 1213 if len(dt) <= 2: 1214 hr = int(dt) 1215 mn = 0 1216 sec = 0 1217 else: 1218 hr, mn, sec = _extract_time(m) 1219 1220 if hr == 24: 1221 hr = 0 1222 1223 meridian = m.group('meridian').lower() 1224 1225 # if 'am' found and hour is 12 - force hour to 0 (midnight) 1226 if (meridian in self.ptc.am) and hr == 12: 1227 hr = 0 1228 1229 # if 'pm' found and hour < 12, add 12 to shift to evening 1230 if (meridian in self.ptc.pm) and hr < 12: 1231 hr += 12 1232 1233 # time validation 1234 if hr < 24 and mn < 60 and sec < 60: 1235 sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst) 1236 _pop_time_accuracy(m, self.currentContext) 1237 1238 return sourceTime
1239
1240 - def _evalTimeStd(self, datetimeString, sourceTime):
1241 """ 1242 Evaluate text passed by L{_partialParseTimeStd()} 1243 """ 1244 s = datetimeString.strip() 1245 sourceTime = self._evalDT(datetimeString, sourceTime) 1246 1247 # Given string is in the format HH:MM(:SS) 1248 yr, mth, dy, hr, mn, sec, wd, yd, isdst = sourceTime 1249 1250 m = self.ptc.CRE_TIMEHMS.search(s) 1251 if m is not None: 1252 hr, mn, sec = _extract_time(m) 1253 if hr == 24: 1254 hr = 0 1255 1256 # time validation 1257 if hr < 24 and mn < 60 and sec < 60: 1258 sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst) 1259 _pop_time_accuracy(m, self.currentContext) 1260 1261 return sourceTime
1262
1263 - def _UnitsTrapped(self, s, m, key):
1264 # check if a day suffix got trapped by a unit match 1265 # for example Dec 31st would match for 31s (aka 31 seconds) 1266 # Dec 31st 1267 # ^ ^ 1268 # | +-- m.start('units') 1269 # | and also m2.start('suffix') 1270 # +---- m.start('qty') 1271 # and also m2.start('day') 1272 m2 = self.ptc.CRE_DAY2.search(s) 1273 if m2 is not None: 1274 t = '%s%s' % (m2.group('day'), m.group(key)) 1275 if m.start(key) == m2.start('suffix') and \ 1276 m.start('qty') == m2.start('day') and \ 1277 m.group('qty') == t: 1278 return True 1279 else: 1280 return False 1281 else: 1282 return False
1283
1284 - def _partialParseModifier(self, s, sourceTime):
1285 """ 1286 test if giving C{s} matched CRE_MODIFIER, used by L{parse()} 1287 1288 @type s: string 1289 @param s: date/time text to evaluate 1290 @type sourceTime: struct_time 1291 @param sourceTime: C{struct_time} value to use as the base 1292 1293 @rtype: tuple 1294 @return: tuple of remained date/time text, datetime object and 1295 an boolean value to describ if matched or not 1296 1297 """ 1298 parseStr = None 1299 chunk1 = chunk2 = '' 1300 1301 # Modifier like next/prev/from/after/prior.. 1302 m = self.ptc.CRE_MODIFIER.search(s) 1303 if m is not None: 1304 if m.group() != s: 1305 # capture remaining string 1306 parseStr = m.group() 1307 chunk1 = s[:m.start()].strip() 1308 chunk2 = s[m.end():].strip() 1309 else: 1310 parseStr = s 1311 1312 if parseStr: 1313 debug and log.debug('found (modifier) [%s][%s][%s]', 1314 parseStr, chunk1, chunk2) 1315 s, sourceTime = self._evalModifier(parseStr, chunk1, 1316 chunk2, sourceTime) 1317 1318 return s, sourceTime, bool(parseStr)
1319
1320 - def _partialParseUnits(self, s, sourceTime):
1321 """ 1322 test if giving C{s} matched CRE_UNITS, used by L{parse()} 1323 1324 @type s: string 1325 @param s: date/time text to evaluate 1326 @type sourceTime: struct_time 1327 @param sourceTime: C{struct_time} value to use as the base 1328 1329 @rtype: tuple 1330 @return: tuple of remained date/time text, datetime object and 1331 an boolean value to describ if matched or not 1332 1333 """ 1334 parseStr = None 1335 chunk1 = chunk2 = '' 1336 1337 # Quantity + Units 1338 m = self.ptc.CRE_UNITS.search(s) 1339 if m is not None: 1340 debug and log.debug('CRE_UNITS matched') 1341 if self._UnitsTrapped(s, m, 'units'): 1342 debug and log.debug('day suffix trapped by unit match') 1343 else: 1344 if (m.group('qty') != s): 1345 # capture remaining string 1346 parseStr = m.group('qty') 1347 chunk1 = s[:m.start('qty')].strip() 1348 chunk2 = s[m.end('qty'):].strip() 1349 1350 if chunk1[-1:] == '-': 1351 parseStr = '-%s' % parseStr 1352 chunk1 = chunk1[:-1] 1353 1354 s = '%s %s' % (chunk1, chunk2) 1355 else: 1356 parseStr = s 1357 s = '' 1358 1359 if parseStr: 1360 debug and log.debug('found (units) [%s][%s][%s]', 1361 parseStr, chunk1, chunk2) 1362 sourceTime = self._evalUnits(parseStr, sourceTime) 1363 1364 return s, sourceTime, bool(parseStr)
1365
1366 - def _partialParseQUnits(self, s, sourceTime):
1367 """ 1368 test if giving C{s} matched CRE_QUNITS, used by L{parse()} 1369 1370 @type s: string 1371 @param s: date/time text to evaluate 1372 @type sourceTime: struct_time 1373 @param sourceTime: C{struct_time} value to use as the base 1374 1375 @rtype: tuple 1376 @return: tuple of remained date/time text, datetime object and 1377 an boolean value to describ if matched or not 1378 1379 """ 1380 parseStr = None 1381 chunk1 = chunk2 = '' 1382 1383 # Quantity + Units 1384 m = self.ptc.CRE_QUNITS.search(s) 1385 if m is not None: 1386 debug and log.debug('CRE_QUNITS matched') 1387 if self._UnitsTrapped(s, m, 'qunits'): 1388 debug and log.debug( 1389 'day suffix trapped by qunit match') 1390 else: 1391 if (m.group('qty') != s): 1392 # capture remaining string 1393 parseStr = m.group('qty') 1394 chunk1 = s[:m.start('qty')].strip() 1395 chunk2 = s[m.end('qty'):].strip() 1396 1397 if chunk1[-1:] == '-': 1398 parseStr = '-%s' % parseStr 1399 chunk1 = chunk1[:-1] 1400 1401 s = '%s %s' % (chunk1, chunk2) 1402 else: 1403 parseStr = s 1404 s = '' 1405 1406 if parseStr: 1407 debug and log.debug('found (qunits) [%s][%s][%s]', 1408 parseStr, chunk1, chunk2) 1409 sourceTime = self._evalQUnits(parseStr, sourceTime) 1410 1411 return s, sourceTime, bool(parseStr)
1412
1413 - def _partialParseDateStr(self, s, sourceTime):
1414 """ 1415 test if giving C{s} matched CRE_DATE3, used by L{parse()} 1416 1417 @type s: string 1418 @param s: date/time text to evaluate 1419 @type sourceTime: struct_time 1420 @param sourceTime: C{struct_time} value to use as the base 1421 1422 @rtype: tuple 1423 @return: tuple of remained date/time text, datetime object and 1424 an boolean value to describ if matched or not 1425 1426 """ 1427 parseStr = None 1428 chunk1 = chunk2 = '' 1429 1430 m = self.ptc.CRE_DATE3.search(s) 1431 # NO LONGER NEEDED, THE REGEXP HANDLED MTHNAME NOW 1432 # for match in self.ptc.CRE_DATE3.finditer(s): 1433 # to prevent "HH:MM(:SS) time strings" expressions from 1434 # triggering this regex, we checks if the month field 1435 # exists in the searched expression, if it doesn't exist, 1436 # the date field is not valid 1437 # if match.group('mthname'): 1438 # m = self.ptc.CRE_DATE3.search(s, match.start()) 1439 # valid_date = True 1440 # break 1441 1442 # String date format 1443 if m is not None: 1444 1445 if (m.group('date') != s): 1446 # capture remaining string 1447 mStart = m.start('date') 1448 mEnd = m.end('date') 1449 1450 # we need to check that anything following the parsed 1451 # date is a time expression because it is often picked 1452 # up as a valid year if the hour is 2 digits 1453 fTime = False 1454 mm = self.ptc.CRE_TIMEHMS2.search(s) 1455 # "February 24th 1PM" doesn't get caught 1456 # "February 24th 12PM" does 1457 mYear = m.group('year') 1458 if mm is not None and mYear is not None: 1459 fTime = True 1460 else: 1461 # "February 24th 12:00" 1462 mm = self.ptc.CRE_TIMEHMS.search(s) 1463 if mm is not None and mYear is None: 1464 fTime = True 1465 if fTime: 1466 hoursStart = mm.start('hours') 1467 1468 if hoursStart < m.end('year'): 1469 mEnd = hoursStart 1470 1471 parseStr = s[mStart:mEnd] 1472 chunk1 = s[:mStart] 1473 chunk2 = s[mEnd:] 1474 1475 s = '%s %s' % (chunk1, chunk2) 1476 else: 1477 parseStr = s 1478 s = '' 1479 1480 if parseStr: 1481 debug and log.debug( 1482 'found (date3) [%s][%s][%s]', parseStr, chunk1, chunk2) 1483 sourceTime = self._evalDateStr(parseStr, sourceTime) 1484 1485 return s, sourceTime, bool(parseStr)
1486
1487 - def _partialParseDateStd(self, s, sourceTime):
1488 """ 1489 test if giving C{s} matched CRE_DATE, used by L{parse()} 1490 1491 @type s: string 1492 @param s: date/time text to evaluate 1493 @type sourceTime: struct_time 1494 @param sourceTime: C{struct_time} value to use as the base 1495 1496 @rtype: tuple 1497 @return: tuple of remained date/time text, datetime object and 1498 an boolean value to describ if matched or not 1499 1500 """ 1501 parseStr = None 1502 chunk1 = chunk2 = '' 1503 1504 # Standard date format 1505 m = self.ptc.CRE_DATE.search(s) 1506 if m is not None: 1507 1508 if (m.group('date') != s): 1509 # capture remaining string 1510 parseStr = m.group('date') 1511 chunk1 = s[:m.start('date')] 1512 chunk2 = s[m.end('date'):] 1513 s = '%s %s' % (chunk1, chunk2) 1514 else: 1515 parseStr = s 1516 s = '' 1517 1518 if parseStr: 1519 debug and log.debug( 1520 'found (date) [%s][%s][%s]', parseStr, chunk1, chunk2) 1521 sourceTime = self._evalDateStd(parseStr, sourceTime) 1522 1523 return s, sourceTime, bool(parseStr)
1524
1525 - def _partialParseDayStr(self, s, sourceTime):
1526 """ 1527 test if giving C{s} matched CRE_DAY, used by L{parse()} 1528 1529 @type s: string 1530 @param s: date/time text to evaluate 1531 @type sourceTime: struct_time 1532 @param sourceTime: C{struct_time} value to use as the base 1533 1534 @rtype: tuple 1535 @return: tuple of remained date/time text, datetime object and 1536 an boolean value to describ if matched or not 1537 1538 """ 1539 parseStr = None 1540 chunk1 = chunk2 = '' 1541 1542 # Natural language day strings 1543 m = self.ptc.CRE_DAY.search(s) 1544 if m is not None: 1545 1546 if (m.group() != s): 1547 # capture remaining string 1548 parseStr = m.group() 1549 chunk1 = s[:m.start()] 1550 chunk2 = s[m.end():] 1551 s = '%s %s' % (chunk1, chunk2) 1552 else: 1553 parseStr = s 1554 s = '' 1555 1556 if parseStr: 1557 debug and log.debug( 1558 'found (day) [%s][%s][%s]', parseStr, chunk1, chunk2) 1559 sourceTime = self._evalDayStr(parseStr, sourceTime) 1560 1561 return s, sourceTime, bool(parseStr)
1562
1563 - def _partialParseWeekday(self, s, sourceTime):
1564 """ 1565 test if giving C{s} matched CRE_WEEKDAY, used by L{parse()} 1566 1567 @type s: string 1568 @param s: date/time text to evaluate 1569 @type sourceTime: struct_time 1570 @param sourceTime: C{struct_time} value to use as the base 1571 1572 @rtype: tuple 1573 @return: tuple of remained date/time text, datetime object and 1574 an boolean value to describ if matched or not 1575 1576 """ 1577 parseStr = None 1578 chunk1 = chunk2 = '' 1579 1580 ctx = self.currentContext 1581 log.debug('eval %s with context - %s, %s', s, ctx.hasDate, ctx.hasTime) 1582 1583 # Weekday 1584 m = self.ptc.CRE_WEEKDAY.search(s) 1585 if m is not None: 1586 gv = m.group() 1587 if s not in self.ptc.dayOffsets: 1588 1589 if (gv != s): 1590 # capture remaining string 1591 parseStr = gv 1592 chunk1 = s[:m.start()] 1593 chunk2 = s[m.end():] 1594 s = '%s %s' % (chunk1, chunk2) 1595 else: 1596 parseStr = s 1597 s = '' 1598 1599 if parseStr and not ctx.hasDate: 1600 debug and log.debug( 1601 'found (weekday) [%s][%s][%s]', parseStr, chunk1, chunk2) 1602 sourceTime = self._evalWeekday(parseStr, sourceTime) 1603 1604 return s, sourceTime, bool(parseStr)
1605
1606 - def _partialParseTimeStr(self, s, sourceTime):
1607 """ 1608 test if giving C{s} matched CRE_TIME, used by L{parse()} 1609 1610 @type s: string 1611 @param s: date/time text to evaluate 1612 @type sourceTime: struct_time 1613 @param sourceTime: C{struct_time} value to use as the base 1614 1615 @rtype: tuple 1616 @return: tuple of remained date/time text, datetime object and 1617 an boolean value to describ if matched or not 1618 1619 """ 1620 parseStr = None 1621 chunk1 = chunk2 = '' 1622 1623 # Natural language time strings 1624 m = self.ptc.CRE_TIME.search(s) 1625 if m is not None or s in self.ptc.re_values['now']: 1626 1627 if (m and m.group() != s): 1628 # capture remaining string 1629 parseStr = m.group() 1630 chunk1 = s[:m.start()] 1631 chunk2 = s[m.end():] 1632 s = '%s %s' % (chunk1, chunk2) 1633 else: 1634 parseStr = s 1635 s = '' 1636 1637 if parseStr: 1638 debug and log.debug( 1639 'found (time) [%s][%s][%s]', parseStr, chunk1, chunk2) 1640 sourceTime = self._evalTimeStr(parseStr, sourceTime) 1641 1642 return s, sourceTime, bool(parseStr)
1643
1644 - def _partialParseMeridian(self, s, sourceTime):
1645 """ 1646 test if giving C{s} matched CRE_TIMEHMS2, used by L{parse()} 1647 1648 @type s: string 1649 @param s: date/time text to evaluate 1650 @type sourceTime: struct_time 1651 @param sourceTime: C{struct_time} value to use as the base 1652 1653 @rtype: tuple 1654 @return: tuple of remained date/time text, datetime object and 1655 an boolean value to describ if matched or not 1656 1657 """ 1658 parseStr = None 1659 chunk1 = chunk2 = '' 1660 1661 # HH:MM(:SS) am/pm time strings 1662 m = self.ptc.CRE_TIMEHMS2.search(s) 1663 if m is not None: 1664 1665 if m.group('minutes') is not None: 1666 if m.group('seconds') is not None: 1667 parseStr = '%s:%s:%s' % (m.group('hours'), 1668 m.group('minutes'), 1669 m.group('seconds')) 1670 else: 1671 parseStr = '%s:%s' % (m.group('hours'), 1672 m.group('minutes')) 1673 else: 1674 parseStr = m.group('hours') 1675 parseStr += ' ' + m.group('meridian') 1676 1677 chunk1 = s[:m.start()] 1678 chunk2 = s[m.end():] 1679 1680 s = '%s %s' % (chunk1, chunk2) 1681 1682 if parseStr: 1683 debug and log.debug('found (meridian) [%s][%s][%s]', 1684 parseStr, chunk1, chunk2) 1685 sourceTime = self._evalMeridian(parseStr, sourceTime) 1686 1687 return s, sourceTime, bool(parseStr)
1688
1689 - def _partialParseTimeStd(self, s, sourceTime):
1690 """ 1691 test if giving C{s} matched CRE_TIMEHMS, used by L{parse()} 1692 1693 @type s: string 1694 @param s: date/time text to evaluate 1695 @type sourceTime: struct_time 1696 @param sourceTime: C{struct_time} value to use as the base 1697 1698 @rtype: tuple 1699 @return: tuple of remained date/time text, datetime object and 1700 an boolean value to describ if matched or not 1701 1702 """ 1703 parseStr = None 1704 chunk1 = chunk2 = '' 1705 1706 # HH:MM(:SS) time strings 1707 m = self.ptc.CRE_TIMEHMS.search(s) 1708 if m is not None: 1709 1710 if m.group('seconds') is not None: 1711 parseStr = '%s:%s:%s' % (m.group('hours'), 1712 m.group('minutes'), 1713 m.group('seconds')) 1714 chunk1 = s[:m.start('hours')] 1715 chunk2 = s[m.end('seconds'):] 1716 else: 1717 parseStr = '%s:%s' % (m.group('hours'), 1718 m.group('minutes')) 1719 chunk1 = s[:m.start('hours')] 1720 chunk2 = s[m.end('minutes'):] 1721 1722 s = '%s %s' % (chunk1, chunk2) 1723 1724 if parseStr: 1725 debug and log.debug( 1726 'found (hms) [%s][%s][%s]', parseStr, chunk1, chunk2) 1727 sourceTime = self._evalTimeStd(parseStr, sourceTime) 1728 1729 return s, sourceTime, bool(parseStr)
1730
1731 - def parseDT(self, datetimeString, sourceTime=None, 1732 tzinfo=None, version=None):
1733 """ 1734 C{datetimeString} is as C{.parse}, C{sourceTime} has the same semantic 1735 meaning as C{.parse}, but now also accepts datetime objects. C{tzinfo} 1736 accepts a tzinfo object. It is advisable to use pytz. 1737 1738 1739 @type datetimeString: string 1740 @param datetimeString: date/time text to evaluate 1741 @type sourceTime: struct_time, datetime, date, time 1742 @param sourceTime: time value to use as the base 1743 @type tzinfo: tzinfo 1744 @param tzinfo: Timezone to apply to generated datetime objs. 1745 @type version: integer 1746 @param version: style version, default will use L{Calendar} 1747 parameter version value 1748 1749 @rtype: tuple 1750 @return: tuple of: modified C{sourceTime} and the result flag/context 1751 1752 see .parse for return code details. 1753 """ 1754 # if sourceTime has a timetuple method, use thet, else, just pass the 1755 # entire thing to parse and prey the user knows what the hell they are 1756 # doing. 1757 sourceTime = getattr(sourceTime, 'timetuple', (lambda: sourceTime))() 1758 # You REALLY SHOULD be using pytz. Using localize if available, 1759 # hacking if not. Note, None is a valid tzinfo object in the case of 1760 # the ugly hack. 1761 localize = getattr( 1762 tzinfo, 1763 'localize', 1764 (lambda dt: dt.replace(tzinfo=tzinfo)), # ugly hack is ugly :( 1765 ) 1766 1767 # Punt 1768 time_struct, ret_code = self.parse( 1769 datetimeString, 1770 sourceTime=sourceTime, 1771 version=version) 1772 1773 # Comments from GHI indicate that it is desired to have the same return 1774 # signature on this method as that one it punts to, with the exception 1775 # of using datetime objects instead of time_structs. 1776 dt = localize(datetime.datetime(*time_struct[:6])) 1777 return dt, ret_code
1778
1779 - def parse(self, datetimeString, sourceTime=None, version=None):
1780 """ 1781 Splits the given C{datetimeString} into tokens, finds the regex 1782 patterns that match and then calculates a C{struct_time} value from 1783 the chunks. 1784 1785 If C{sourceTime} is given then the C{struct_time} value will be 1786 calculated from that value, otherwise from the current date/time. 1787 1788 If the C{datetimeString} is parsed and date/time value found, then:: 1789 1790 If C{version} equals to L{VERSION_FLAG_STYLE}, the second item of 1791 the returned tuple will be a flag to let you know what kind of 1792 C{struct_time} value is being returned:: 1793 1794 0 = not parsed at all 1795 1 = parsed as a C{date} 1796 2 = parsed as a C{time} 1797 3 = parsed as a C{datetime} 1798 1799 If C{version} equals to L{VERSION_CONTEXT_STYLE}, the second value 1800 will be an instance of L{pdtContext} 1801 1802 @type datetimeString: string 1803 @param datetimeString: date/time text to evaluate 1804 @type sourceTime: struct_time 1805 @param sourceTime: C{struct_time} value to use as the base 1806 @type version: integer 1807 @param version: style version, default will use L{Calendar} 1808 parameter version value 1809 1810 @rtype: tuple 1811 @return: tuple of: modified C{sourceTime} and the result flag/context 1812 """ 1813 debug and log.debug('parse()') 1814 1815 datetimeString = re.sub(r'(\w)\.(\s)', r'\1\2', datetimeString) 1816 datetimeString = re.sub(r'(\w)[\'"](\s|$)', r'\1 \2', datetimeString) 1817 datetimeString = re.sub(r'(\s|^)[\'"](\w)', r'\1 \2', datetimeString) 1818 1819 if sourceTime: 1820 if isinstance(sourceTime, datetime.datetime): 1821 debug and log.debug('coercing datetime to timetuple') 1822 sourceTime = sourceTime.timetuple() 1823 else: 1824 if not isinstance(sourceTime, time.struct_time) and \ 1825 not isinstance(sourceTime, tuple): 1826 raise ValueError('sourceTime is not a struct_time') 1827 else: 1828 sourceTime = time.localtime() 1829 1830 with self.context() as ctx: 1831 s = datetimeString.lower().strip() 1832 debug and log.debug('remainedString (before parsing): [%s]', s) 1833 1834 while s: 1835 for parseMeth in (self._partialParseModifier, 1836 self._partialParseUnits, 1837 self._partialParseQUnits, 1838 self._partialParseDateStr, 1839 self._partialParseDateStd, 1840 self._partialParseDayStr, 1841 self._partialParseWeekday, 1842 self._partialParseTimeStr, 1843 self._partialParseMeridian, 1844 self._partialParseTimeStd): 1845 retS, retTime, matched = parseMeth(s, sourceTime) 1846 if matched: 1847 s, sourceTime = retS.strip(), retTime 1848 break 1849 else: 1850 # nothing matched 1851 s = '' 1852 1853 debug and log.debug('hasDate: [%s], hasTime: [%s]', 1854 ctx.hasDate, ctx.hasTime) 1855 debug and log.debug('remainedString: [%s]', s) 1856 1857 # String is not parsed at all 1858 if sourceTime is None: 1859 debug and log.debug('not parsed [%s]', str(sourceTime)) 1860 sourceTime = time.localtime() 1861 1862 if not isinstance(sourceTime, time.struct_time): 1863 sourceTime = time.struct_time(sourceTime) 1864 1865 version = self.version if version is None else version 1866 if version == VERSION_CONTEXT_STYLE: 1867 return sourceTime, ctx 1868 else: 1869 return sourceTime, ctx.dateTimeFlag
1870
1871 - def inc(self, source, month=None, year=None):
1872 """ 1873 Takes the given C{source} date, or current date if none is 1874 passed, and increments it according to the values passed in 1875 by month and/or year. 1876 1877 This routine is needed because Python's C{timedelta()} function 1878 does not allow for month or year increments. 1879 1880 @type source: struct_time 1881 @param source: C{struct_time} value to increment 1882 @type month: float or integer 1883 @param month: optional number of months to increment 1884 @type year: float or integer 1885 @param year: optional number of years to increment 1886 1887 @rtype: datetime 1888 @return: C{source} incremented by the number of months and/or years 1889 """ 1890 yr = source.year 1891 mth = source.month 1892 dy = source.day 1893 1894 try: 1895 month = float(month) 1896 except (TypeError, ValueError): 1897 month = 0 1898 1899 try: 1900 year = float(year) 1901 except (TypeError, ValueError): 1902 year = 0 1903 finally: 1904 month += year * 12 1905 year = 0 1906 1907 subMi = 0.0 1908 maxDay = 0 1909 if month: 1910 mi = int(month) 1911 subMi = month - mi 1912 1913 y = int(mi / 12.0) 1914 m = mi - y * 12 1915 1916 mth = mth + m 1917 if mth < 1: # cross start-of-year? 1918 y -= 1 # yes - decrement year 1919 mth += 12 # and fix month 1920 elif mth > 12: # cross end-of-year? 1921 y += 1 # yes - increment year 1922 mth -= 12 # and fix month 1923 1924 yr += y 1925 1926 # if the day ends up past the last day of 1927 # the new month, set it to the last day 1928 maxDay = self.ptc.daysInMonth(mth, yr) 1929 if dy > maxDay: 1930 dy = maxDay 1931 1932 if yr > datetime.MAXYEAR or yr < datetime.MINYEAR: 1933 raise OverflowError('year is out of range') 1934 1935 d = source.replace(year=yr, month=mth, day=dy) 1936 if subMi: 1937 d += datetime.timedelta(days=subMi * maxDay) 1938 return source + (d - source)
1939
1940 - def nlp(self, inputString, sourceTime=None, version=None):
1941 """Utilizes parse() after making judgements about what datetime 1942 information belongs together. 1943 1944 It makes logical groupings based on proximity and returns a parsed 1945 datetime for each matched grouping of datetime text, along with 1946 location info within the given inputString. 1947 1948 @type inputString: string 1949 @param inputString: natural language text to evaluate 1950 @type sourceTime: struct_time 1951 @param sourceTime: C{struct_time} value to use as the base 1952 @type version: integer 1953 @param version: style version, default will use L{Calendar} 1954 parameter version value 1955 1956 @rtype: tuple or None 1957 @return: tuple of tuples in the format (parsed_datetime as 1958 datetime.datetime, flags as int, start_pos as int, 1959 end_pos as int, matched_text as string) or None if there 1960 were no matches 1961 """ 1962 1963 orig_inputstring = inputString 1964 1965 # replace periods at the end of sentences w/ spaces 1966 # opposed to removing them altogether in order to 1967 # retain relative positions (identified by alpha, period, space). 1968 # this is required for some of the regex patterns to match 1969 inputString = re.sub(r'(\w)(\.)(\s)', r'\1 \3', inputString).lower() 1970 inputString = re.sub(r'(\w)(\'|")(\s|$)', r'\1 \3', inputString) 1971 inputString = re.sub(r'(\s|^)(\'|")(\w)', r'\1 \3', inputString) 1972 1973 startpos = 0 # the start position in the inputString during the loop 1974 1975 # list of lists in format: 1976 # [startpos, endpos, matchedstring, flags, type] 1977 matches = [] 1978 1979 while startpos < len(inputString): 1980 1981 # empty match 1982 leftmost_match = [0, 0, None, 0, None] 1983 1984 # Modifier like next\prev.. 1985 m = self.ptc.CRE_MODIFIER.search(inputString[startpos:]) 1986 if m is not None: 1987 if leftmost_match[1] == 0 or \ 1988 leftmost_match[0] > m.start() + startpos: 1989 leftmost_match[0] = m.start() + startpos 1990 leftmost_match[1] = m.end() + startpos 1991 leftmost_match[2] = m.group() 1992 leftmost_match[3] = 0 1993 leftmost_match[4] = 'modifier' 1994 1995 # Quantity + Units 1996 m = self.ptc.CRE_UNITS.search(inputString[startpos:]) 1997 if m is not None: 1998 debug and log.debug('CRE_UNITS matched') 1999 if self._UnitsTrapped(inputString[startpos:], m, 'units'): 2000 debug and log.debug('day suffix trapped by unit match') 2001 else: 2002 2003 if leftmost_match[1] == 0 or \ 2004 leftmost_match[0] > m.start('qty') + startpos: 2005 leftmost_match[0] = m.start('qty') + startpos 2006 leftmost_match[1] = m.end('qty') + startpos 2007 leftmost_match[2] = m.group('qty') 2008 leftmost_match[3] = 3 2009 leftmost_match[4] = 'units' 2010 2011 if m.start('qty') > 0 and \ 2012 inputString[m.start('qty') - 1] == '-': 2013 leftmost_match[0] = leftmost_match[0] - 1 2014 leftmost_match[2] = '-' + leftmost_match[2] 2015 2016 # Quantity + Units 2017 m = self.ptc.CRE_QUNITS.search(inputString[startpos:]) 2018 if m is not None: 2019 debug and log.debug('CRE_QUNITS matched') 2020 if self._UnitsTrapped(inputString[startpos:], m, 'qunits'): 2021 debug and log.debug('day suffix trapped by qunit match') 2022 else: 2023 if leftmost_match[1] == 0 or \ 2024 leftmost_match[0] > m.start('qty') + startpos: 2025 leftmost_match[0] = m.start('qty') + startpos 2026 leftmost_match[1] = m.end('qty') + startpos 2027 leftmost_match[2] = m.group('qty') 2028 leftmost_match[3] = 3 2029 leftmost_match[4] = 'qunits' 2030 2031 if m.start('qty') > 0 and \ 2032 inputString[m.start('qty') - 1] == '-': 2033 leftmost_match[0] = leftmost_match[0] - 1 2034 leftmost_match[2] = '-' + leftmost_match[2] 2035 2036 m = self.ptc.CRE_DATE3.search(inputString[startpos:]) 2037 # NO LONGER NEEDED, THE REGEXP HANDLED MTHNAME NOW 2038 # for match in self.ptc.CRE_DATE3.finditer(inputString[startpos:]): 2039 # to prevent "HH:MM(:SS) time strings" expressions from 2040 # triggering this regex, we checks if the month field exists 2041 # in the searched expression, if it doesn't exist, the date 2042 # field is not valid 2043 # if match.group('mthname'): 2044 # m = self.ptc.CRE_DATE3.search(inputString[startpos:], 2045 # match.start()) 2046 # break 2047 2048 # String date format 2049 if m is not None: 2050 if leftmost_match[1] == 0 or \ 2051 leftmost_match[0] > m.start('date') + startpos: 2052 leftmost_match[0] = m.start('date') + startpos 2053 leftmost_match[1] = m.end('date') + startpos 2054 leftmost_match[2] = m.group('date') 2055 leftmost_match[3] = 1 2056 leftmost_match[4] = 'dateStr' 2057 2058 # Standard date format 2059 m = self.ptc.CRE_DATE.search(inputString[startpos:]) 2060 if m is not None: 2061 if leftmost_match[1] == 0 or \ 2062 leftmost_match[0] > m.start('date') + startpos: 2063 leftmost_match[0] = m.start('date') + startpos 2064 leftmost_match[1] = m.end('date') + startpos 2065 leftmost_match[2] = m.group('date') 2066 leftmost_match[3] = 1 2067 leftmost_match[4] = 'dateStd' 2068 2069 # Natural language day strings 2070 m = self.ptc.CRE_DAY.search(inputString[startpos:]) 2071 if m is not None: 2072 if leftmost_match[1] == 0 or \ 2073 leftmost_match[0] > m.start() + startpos: 2074 leftmost_match[0] = m.start() + startpos 2075 leftmost_match[1] = m.end() + startpos 2076 leftmost_match[2] = m.group() 2077 leftmost_match[3] = 1 2078 leftmost_match[4] = 'dayStr' 2079 2080 # Weekday 2081 m = self.ptc.CRE_WEEKDAY.search(inputString[startpos:]) 2082 if m is not None: 2083 if inputString[startpos:] not in self.ptc.dayOffsets: 2084 if leftmost_match[1] == 0 or \ 2085 leftmost_match[0] > m.start() + startpos: 2086 leftmost_match[0] = m.start() + startpos 2087 leftmost_match[1] = m.end() + startpos 2088 leftmost_match[2] = m.group() 2089 leftmost_match[3] = 1 2090 leftmost_match[4] = 'weekdy' 2091 2092 # Natural language time strings 2093 m = self.ptc.CRE_TIME.search(inputString[startpos:]) 2094 if m is not None: 2095 if leftmost_match[1] == 0 or \ 2096 leftmost_match[0] > m.start() + startpos: 2097 leftmost_match[0] = m.start() + startpos 2098 leftmost_match[1] = m.end() + startpos 2099 leftmost_match[2] = m.group() 2100 leftmost_match[3] = 2 2101 leftmost_match[4] = 'timeStr' 2102 2103 # HH:MM(:SS) am/pm time strings 2104 m = self.ptc.CRE_TIMEHMS2.search(inputString[startpos:]) 2105 if m is not None: 2106 if leftmost_match[1] == 0 or \ 2107 leftmost_match[0] > m.start('hours') + startpos: 2108 leftmost_match[0] = m.start('hours') + startpos 2109 leftmost_match[1] = m.end('meridian') + startpos 2110 leftmost_match[2] = inputString[leftmost_match[0]: 2111 leftmost_match[1]] 2112 leftmost_match[3] = 2 2113 leftmost_match[4] = 'meridian' 2114 2115 # HH:MM(:SS) time strings 2116 m = self.ptc.CRE_TIMEHMS.search(inputString[startpos:]) 2117 if m is not None: 2118 if leftmost_match[1] == 0 or \ 2119 leftmost_match[0] > m.start('hours') + startpos: 2120 leftmost_match[0] = m.start('hours') + startpos 2121 if m.group('seconds') is not None: 2122 leftmost_match[1] = m.end('seconds') + startpos 2123 else: 2124 leftmost_match[1] = m.end('minutes') + startpos 2125 leftmost_match[2] = inputString[leftmost_match[0]: 2126 leftmost_match[1]] 2127 leftmost_match[3] = 2 2128 leftmost_match[4] = 'timeStd' 2129 2130 # Units only; must be preceded by a modifier 2131 if len(matches) > 0 and matches[-1][3] == 0: 2132 m = self.ptc.CRE_UNITS_ONLY.search(inputString[startpos:]) 2133 # Ensure that any match is immediately proceded by the 2134 # modifier. "Next is the word 'month'" should not parse as a 2135 # date while "next month" should 2136 if m is not None and \ 2137 inputString[startpos:startpos + 2138 m.start()].strip() == '': 2139 debug and log.debug('CRE_UNITS_ONLY matched [%s]', 2140 m.group()) 2141 if leftmost_match[1] == 0 or \ 2142 leftmost_match[0] > m.start() + startpos: 2143 leftmost_match[0] = m.start() + startpos 2144 leftmost_match[1] = m.end() + startpos 2145 leftmost_match[2] = m.group() 2146 leftmost_match[3] = 3 2147 leftmost_match[4] = 'unitsOnly' 2148 2149 # set the start position to the end pos of the leftmost match 2150 startpos = leftmost_match[1] 2151 2152 # nothing was detected 2153 # so break out of the loop 2154 if startpos == 0: 2155 startpos = len(inputString) 2156 else: 2157 if leftmost_match[3] > 0: 2158 m = self.ptc.CRE_NLP_PREFIX.search( 2159 inputString[:leftmost_match[0]] + 2160 ' ' + str(leftmost_match[3])) 2161 if m is not None: 2162 leftmost_match[0] = m.start('nlp_prefix') 2163 leftmost_match[2] = inputString[leftmost_match[0]: 2164 leftmost_match[1]] 2165 matches.append(leftmost_match) 2166 2167 # find matches in proximity with one another and 2168 # return all the parsed values 2169 proximity_matches = [] 2170 if len(matches) > 1: 2171 combined = '' 2172 from_match_index = 0 2173 date = matches[0][3] == 1 2174 time = matches[0][3] == 2 2175 units = matches[0][3] == 3 2176 for i in range(1, len(matches)): 2177 2178 # test proximity (are there characters between matches?) 2179 endofprevious = matches[i - 1][1] 2180 begofcurrent = matches[i][0] 2181 if orig_inputstring[endofprevious: 2182 begofcurrent].lower().strip() != '': 2183 # this one isn't in proximity, but maybe 2184 # we have enough to make a datetime 2185 # TODO: make sure the combination of 2186 # formats (modifier, dateStd, etc) makes logical sense 2187 # before parsing together 2188 if date or time or units: 2189 combined = orig_inputstring[matches[from_match_index] 2190 [0]:matches[i - 1][1]] 2191 parsed_datetime, flags = self.parse(combined, 2192 sourceTime, 2193 version) 2194 proximity_matches.append(( 2195 datetime.datetime(*parsed_datetime[:6]), 2196 flags, 2197 matches[from_match_index][0], 2198 matches[i - 1][1], 2199 combined)) 2200 # not in proximity, reset starting from current 2201 from_match_index = i 2202 date = matches[i][3] == 1 2203 time = matches[i][3] == 2 2204 units = matches[i][3] == 3 2205 continue 2206 else: 2207 if matches[i][3] == 1: 2208 date = True 2209 if matches[i][3] == 2: 2210 time = True 2211 if matches[i][3] == 3: 2212 units = True 2213 2214 # check last 2215 # we have enough to make a datetime 2216 if date or time or units: 2217 combined = orig_inputstring[matches[from_match_index][0]: 2218 matches[len(matches) - 1][1]] 2219 parsed_datetime, flags = self.parse(combined, sourceTime, 2220 version) 2221 proximity_matches.append(( 2222 datetime.datetime(*parsed_datetime[:6]), 2223 flags, 2224 matches[from_match_index][0], 2225 matches[len(matches) - 1][1], 2226 combined)) 2227 2228 elif len(matches) == 0: 2229 return None 2230 else: 2231 if matches[0][3] == 0: # not enough info to parse 2232 return None 2233 else: 2234 combined = orig_inputstring[matches[0][0]:matches[0][1]] 2235 parsed_datetime, flags = self.parse(matches[0][2], sourceTime, 2236 version) 2237 proximity_matches.append(( 2238 datetime.datetime(*parsed_datetime[:6]), 2239 flags, 2240 matches[0][0], 2241 matches[0][1], 2242 combined)) 2243 2244 return tuple(proximity_matches)
2245
2246 2247 -def _initSymbols(ptc):
2248 """ 2249 Initialize symbols and single character constants. 2250 """ 2251 # build am and pm lists to contain 2252 # original case, lowercase, first-char and dotted 2253 # versions of the meridian text 2254 ptc.am = ['', ''] 2255 ptc.pm = ['', ''] 2256 for idx, xm in enumerate(ptc.locale.meridian[:2]): 2257 # 0: am 2258 # 1: pm 2259 target = ['am', 'pm'][idx] 2260 setattr(ptc, target, [xm]) 2261 target = getattr(ptc, target) 2262 if xm: 2263 lxm = xm.lower() 2264 target.extend((xm[0], '{0}.{1}.'.format(*xm), 2265 lxm, lxm[0], '{0}.{1}.'.format(*lxm)))
2266
2267 2268 -class Constants(object):
2269 2270 """ 2271 Default set of constants for parsedatetime. 2272 2273 If PyICU is present, then the class will first try to get PyICU 2274 to return a locale specified by C{localeID}. If either C{localeID} is 2275 None or if the locale does not exist within PyICU, then each of the 2276 locales defined in C{fallbackLocales} is tried in order. 2277 2278 If PyICU is not present or none of the specified locales can be used, 2279 then the class will initialize itself to the en_US locale. 2280 2281 if PyICU is not present or not requested, only the locales defined by 2282 C{pdtLocales} will be searched. 2283 """ 2284
2285 - def __init__(self, localeID=None, usePyICU=True, 2286 fallbackLocales=['en_US']):
2287 self.localeID = localeID 2288 self.fallbackLocales = fallbackLocales[:] 2289 2290 if 'en_US' not in self.fallbackLocales: 2291 self.fallbackLocales.append('en_US') 2292 2293 # define non-locale specific constants 2294 self.locale = None 2295 self.usePyICU = usePyICU 2296 2297 # starting cache of leap years 2298 # daysInMonth will add to this if during 2299 # runtime it gets a request for a year not found 2300 self._leapYears = list(range(1904, 2097, 4)) 2301 2302 self.Second = 1 2303 self.Minute = 60 # 60 * self.Second 2304 self.Hour = 3600 # 60 * self.Minute 2305 self.Day = 86400 # 24 * self.Hour 2306 self.Week = 604800 # 7 * self.Day 2307 self.Month = 2592000 # 30 * self.Day 2308 self.Year = 31536000 # 365 * self.Day 2309 2310 self._DaysInMonthList = (31, 28, 31, 30, 31, 30, 2311 31, 31, 30, 31, 30, 31) 2312 self.rangeSep = '-' 2313 self.BirthdayEpoch = 50 2314 2315 # When True the starting time for all relative calculations will come 2316 # from the given SourceTime, otherwise it will be 9am 2317 2318 self.StartTimeFromSourceTime = False 2319 2320 # YearParseStyle controls how we parse "Jun 12", i.e. dates that do 2321 # not have a year present. The default is to compare the date given 2322 # to the current date, and if prior, then assume the next year. 2323 # Setting this to 0 will prevent that. 2324 2325 self.YearParseStyle = 1 2326 2327 # DOWParseStyle controls how we parse "Tuesday" 2328 # If the current day was Thursday and the text to parse is "Tuesday" 2329 # then the following table shows how each style would be returned 2330 # -1, 0, +1 2331 # 2332 # Current day marked as *** 2333 # 2334 # Sun Mon Tue Wed Thu Fri Sat 2335 # week -1 2336 # current -1,0 *** 2337 # week +1 +1 2338 # 2339 # If the current day was Monday and the text to parse is "Tuesday" 2340 # then the following table shows how each style would be returned 2341 # -1, 0, +1 2342 # 2343 # Sun Mon Tue Wed Thu Fri Sat 2344 # week -1 -1 2345 # current *** 0,+1 2346 # week +1 2347 2348 self.DOWParseStyle = 1 2349 2350 # CurrentDOWParseStyle controls how we parse "Friday" 2351 # If the current day was Friday and the text to parse is "Friday" 2352 # then the following table shows how each style would be returned 2353 # True/False. This also depends on DOWParseStyle. 2354 # 2355 # Current day marked as *** 2356 # 2357 # DOWParseStyle = 0 2358 # Sun Mon Tue Wed Thu Fri Sat 2359 # week -1 2360 # current T,F 2361 # week +1 2362 # 2363 # DOWParseStyle = -1 2364 # Sun Mon Tue Wed Thu Fri Sat 2365 # week -1 F 2366 # current T 2367 # week +1 2368 # 2369 # DOWParseStyle = +1 2370 # 2371 # Sun Mon Tue Wed Thu Fri Sat 2372 # week -1 2373 # current T 2374 # week +1 F 2375 2376 self.CurrentDOWParseStyle = False 2377 2378 if self.usePyICU: 2379 self.locale = get_icu(self.localeID) 2380 2381 if self.locale.icu is None: 2382 self.usePyICU = False 2383 self.locale = None 2384 2385 if self.locale is None: 2386 if self.localeID not in pdtLocales: 2387 for localeId in range(0, len(self.fallbackLocales)): 2388 self.localeID = self.fallbackLocales[localeId] 2389 if self.localeID in pdtLocales: 2390 break 2391 2392 self.locale = pdtLocales[self.localeID] 2393 2394 if self.locale is not None: 2395 2396 def _getLocaleDataAdjusted(localeData): 2397 """ 2398 If localeData is defined as ["mon|mnd", 'tu|tues'...] then this 2399 function splits those definitions on | 2400 """ 2401 adjusted = [] 2402 for d in localeData: 2403 if '|' in d: 2404 adjusted += d.split("|") 2405 else: 2406 adjusted.append(d) 2407 return adjusted
2408 2409 def re_join(g): 2410 return '|'.join(re.escape(i) for i in g)
2411 2412 mths = _getLocaleDataAdjusted(self.locale.Months) 2413 smths = _getLocaleDataAdjusted(self.locale.shortMonths) 2414 swds = _getLocaleDataAdjusted(self.locale.shortWeekdays) 2415 wds = _getLocaleDataAdjusted(self.locale.Weekdays) 2416 2417 # escape any regex special characters that may be found 2418 self.locale.re_values['months'] = re_join(mths) 2419 self.locale.re_values['shortmonths'] = re_join(smths) 2420 self.locale.re_values['days'] = re_join(wds) 2421 self.locale.re_values['shortdays'] = re_join(swds) 2422 self.locale.re_values['dayoffsets'] = \ 2423 re_join(self.locale.dayOffsets) 2424 self.locale.re_values['numbers'] = \ 2425 re_join(self.locale.numbers) 2426 self.locale.re_values['decimal_mark'] = \ 2427 re.escape(self.locale.decimal_mark) 2428 2429 units = [unit for units in self.locale.units.values() 2430 for unit in units] # flatten 2431 units.sort(key=len, reverse=True) # longest first 2432 self.locale.re_values['units'] = re_join(units) 2433 self.locale.re_values['modifiers'] = re_join(self.locale.Modifiers) 2434 self.locale.re_values['sources'] = re_join(self.locale.re_sources) 2435 2436 # For distinguishing numeric dates from times, look for timeSep 2437 # and meridian, if specified in the locale 2438 self.locale.re_values['timecomponents'] = \ 2439 re_join(self.locale.timeSep + self.locale.meridian) 2440 2441 # build weekday offsets - yes, it assumes the Weekday and 2442 # shortWeekday lists are in the same order and Mon..Sun 2443 # (Python style) 2444 def _buildOffsets(offsetDict, localeData, indexStart): 2445 o = indexStart 2446 for key in localeData: 2447 if '|' in key: 2448 for k in key.split('|'): 2449 offsetDict[k] = o 2450 else: 2451 offsetDict[key] = o 2452 o += 1 2453 2454 _buildOffsets(self.locale.WeekdayOffsets, 2455 self.locale.Weekdays, 0) 2456 _buildOffsets(self.locale.WeekdayOffsets, 2457 self.locale.shortWeekdays, 0) 2458 2459 # build month offsets - yes, it assumes the Months and shortMonths 2460 # lists are in the same order and Jan..Dec 2461 _buildOffsets(self.locale.MonthOffsets, 2462 self.locale.Months, 1) 2463 _buildOffsets(self.locale.MonthOffsets, 2464 self.locale.shortMonths, 1) 2465 2466 _initSymbols(self) 2467 2468 # TODO: add code to parse the date formats and build the regexes up 2469 # from sub-parts, find all hard-coded uses of date/time separators 2470 2471 # not being used in code, but kept in case others are manually 2472 # utilizing this regex for their own purposes 2473 self.RE_DATE4 = r'''(?P<date> 2474 ( 2475 ( 2476 (?P<day>\d\d?) 2477 (?P<suffix>{daysuffix})? 2478 (,)? 2479 (\s)* 2480 ) 2481 (?P<mthname> 2482 \b({months}|{shortmonths})\b 2483 )\s* 2484 (?P<year>\d\d 2485 (\d\d)? 2486 )? 2487 ) 2488 )'''.format(**self.locale.re_values) 2489 2490 # still not completely sure of the behavior of the regex and 2491 # whether it would be best to consume all possible irrelevant 2492 # characters before the option groups (but within the {1,3} repetition 2493 # group or inside of each option group, as it currently does 2494 # however, right now, all tests are passing that were, 2495 # including fixing the bug of matching a 4-digit year as ddyy 2496 # when the day is absent from the string 2497 self.RE_DATE3 = r'''(?P<date> 2498 (?: 2499 (?:^|\s+) 2500 (?P<mthname> 2501 {months}|{shortmonths} 2502 )\b 2503 | 2504 (?:^|\s+) 2505 (?P<day>[1-9]|[012]\d|3[01]) 2506 (?P<suffix>{daysuffix}|)\b 2507 (?!\s*(?:{timecomponents})) 2508 | 2509 ,?\s+ 2510 (?P<year>\d\d(?:\d\d|))\b 2511 (?!\s*(?:{timecomponents})) 2512 ){{1,3}} 2513 (?(mthname)|$-^) 2514 )'''.format(**self.locale.re_values) 2515 2516 # not being used in code, but kept in case others are manually 2517 # utilizing this regex for their own purposes 2518 self.RE_MONTH = r'''(\s+|^) 2519 (?P<month> 2520 ( 2521 (?P<mthname> 2522 \b({months}|{shortmonths})\b 2523 ) 2524 (\s* 2525 (?P<year>(\d{{4}})) 2526 )? 2527 ) 2528 ) 2529 (?=\s+|$|[^\w])'''.format(**self.locale.re_values) 2530 2531 self.RE_WEEKDAY = r'''\b 2532 (?: 2533 {days}|{shortdays} 2534 ) 2535 \b'''.format(**self.locale.re_values) 2536 2537 self.RE_NUMBER = (r'(\b(?:{numbers})\b|\d+(?:{decimal_mark}\d+|))' 2538 .format(**self.locale.re_values)) 2539 2540 self.RE_SPECIAL = (r'(?P<special>^[{specials}]+)\s+' 2541 .format(**self.locale.re_values)) 2542 2543 self.RE_UNITS_ONLY = (r'''\b({units})\b''' 2544 .format(**self.locale.re_values)) 2545 2546 self.RE_UNITS = r'''\b(?P<qty> 2547 -? 2548 (?:\d+(?:{decimal_mark}\d+|)|(?:{numbers})\b)\s* 2549 (?P<units>{units}) 2550 )\b'''.format(**self.locale.re_values) 2551 2552 self.RE_QUNITS = r'''\b(?P<qty> 2553 -? 2554 (?:\d+(?:{decimal_mark}\d+|)|(?:{numbers})\s+)\s* 2555 (?P<qunits>{qunits}) 2556 )\b'''.format(**self.locale.re_values) 2557 2558 self.RE_MODIFIER = r'''\b(?: 2559 {modifiers} 2560 )\b'''.format(**self.locale.re_values) 2561 2562 self.RE_TIMEHMS = r'''([\s(\["'-]|^) 2563 (?P<hours>\d\d?) 2564 (?P<tsep>{timeseparator}|) 2565 (?P<minutes>\d\d) 2566 (?:(?P=tsep) 2567 (?P<seconds>\d\d 2568 (?:[\.,]\d+)? 2569 ) 2570 )?\b'''.format(**self.locale.re_values) 2571 2572 self.RE_TIMEHMS2 = r'''([\s(\["'-]|^) 2573 (?P<hours>\d\d?) 2574 (?: 2575 (?P<tsep>{timeseparator}|) 2576 (?P<minutes>\d\d?) 2577 (?:(?P=tsep) 2578 (?P<seconds>\d\d? 2579 (?:[\.,]\d+)? 2580 ) 2581 )? 2582 )?'''.format(**self.locale.re_values) 2583 2584 # 1, 2, and 3 here refer to the type of match date, time, or units 2585 self.RE_NLP_PREFIX = r'''\b(?P<nlp_prefix> 2586 (on) 2587 (\s)+1 2588 | 2589 (at|in) 2590 (\s)+2 2591 | 2592 (in) 2593 (\s)+3 2594 )''' 2595 2596 if 'meridian' in self.locale.re_values: 2597 self.RE_TIMEHMS2 += (r'\s*(?P<meridian>{meridian})\b' 2598 .format(**self.locale.re_values)) 2599 else: 2600 self.RE_TIMEHMS2 += r'\b' 2601 2602 # Always support common . and - separators 2603 dateSeps = ''.join(re.escape(s) 2604 for s in self.locale.dateSep + ['-', '.']) 2605 2606 self.RE_DATE = r'''([\s(\["'-]|^) 2607 (?P<date> 2608 \d\d?[{0}]\d\d?(?:[{0}]\d\d(?:\d\d)?)? 2609 | 2610 \d{{4}}[{0}]\d\d?[{0}]\d\d? 2611 ) 2612 \b'''.format(dateSeps) 2613 2614 self.RE_DATE2 = r'[{0}]'.format(dateSeps) 2615 2616 assert 'dayoffsets' in self.locale.re_values 2617 2618 self.RE_DAY = r'''\b 2619 (?: 2620 {dayoffsets} 2621 ) 2622 \b'''.format(**self.locale.re_values) 2623 2624 self.RE_DAY2 = r'''(?P<day>\d\d?) 2625 (?P<suffix>{daysuffix})? 2626 '''.format(**self.locale.re_values) 2627 2628 self.RE_TIME = r'''\b 2629 (?: 2630 {sources} 2631 ) 2632 \b'''.format(**self.locale.re_values) 2633 2634 self.RE_REMAINING = r'\s+' 2635 2636 # Regex for date/time ranges 2637 self.RE_RTIMEHMS = r'''(\s*|^) 2638 (\d\d?){timeseparator} 2639 (\d\d) 2640 ({timeseparator}(\d\d))? 2641 (\s*|$)'''.format(**self.locale.re_values) 2642 2643 self.RE_RTIMEHMS2 = (r'''(\s*|^) 2644 (\d\d?) 2645 ({timeseparator}(\d\d?))? 2646 ({timeseparator}(\d\d?))?''' 2647 .format(**self.locale.re_values)) 2648 2649 if 'meridian' in self.locale.re_values: 2650 self.RE_RTIMEHMS2 += (r'\s*({meridian})' 2651 .format(**self.locale.re_values)) 2652 2653 self.RE_RDATE = r'(\d+([%s]\d+)+)' % dateSeps 2654 self.RE_RDATE3 = r'''( 2655 ( 2656 ( 2657 \b({months})\b 2658 )\s* 2659 ( 2660 (\d\d?) 2661 (\s?|{daysuffix}|$)+ 2662 )? 2663 (,\s*\d{{4}})? 2664 ) 2665 )'''.format(**self.locale.re_values) 2666 2667 # "06/07/06 - 08/09/06" 2668 self.DATERNG1 = (r'{0}\s*{rangeseparator}\s*{0}' 2669 .format(self.RE_RDATE, **self.locale.re_values)) 2670 2671 # "march 31 - june 1st, 2006" 2672 self.DATERNG2 = (r'{0}\s*{rangeseparator}\s*{0}' 2673 .format(self.RE_RDATE3, **self.locale.re_values)) 2674 2675 # "march 1rd -13th" 2676 self.DATERNG3 = (r'{0}\s*{rangeseparator}\s*(\d\d?)\s*(rd|st|nd|th)?' 2677 .format(self.RE_RDATE3, **self.locale.re_values)) 2678 2679 # "4:00:55 pm - 5:90:44 am", '4p-5p' 2680 self.TIMERNG1 = (r'{0}\s*{rangeseparator}\s*{0}' 2681 .format(self.RE_RTIMEHMS2, **self.locale.re_values)) 2682 2683 self.TIMERNG2 = (r'{0}\s*{rangeseparator}\s*{0}' 2684 .format(self.RE_RTIMEHMS, **self.locale.re_values)) 2685 2686 # "4-5pm " 2687 self.TIMERNG3 = (r'\d\d?\s*{rangeseparator}\s*{0}' 2688 .format(self.RE_RTIMEHMS2, **self.locale.re_values)) 2689 2690 # "4:30-5pm " 2691 self.TIMERNG4 = (r'{0}\s*{rangeseparator}\s*{1}' 2692 .format(self.RE_RTIMEHMS, self.RE_RTIMEHMS2, 2693 **self.locale.re_values)) 2694 2695 self.re_option = re.IGNORECASE + re.VERBOSE 2696 self.cre_source = {'CRE_SPECIAL': self.RE_SPECIAL, 2697 'CRE_NUMBER': self.RE_NUMBER, 2698 'CRE_UNITS': self.RE_UNITS, 2699 'CRE_UNITS_ONLY': self.RE_UNITS_ONLY, 2700 'CRE_QUNITS': self.RE_QUNITS, 2701 'CRE_MODIFIER': self.RE_MODIFIER, 2702 'CRE_TIMEHMS': self.RE_TIMEHMS, 2703 'CRE_TIMEHMS2': self.RE_TIMEHMS2, 2704 'CRE_DATE': self.RE_DATE, 2705 'CRE_DATE2': self.RE_DATE2, 2706 'CRE_DATE3': self.RE_DATE3, 2707 'CRE_DATE4': self.RE_DATE4, 2708 'CRE_MONTH': self.RE_MONTH, 2709 'CRE_WEEKDAY': self.RE_WEEKDAY, 2710 'CRE_DAY': self.RE_DAY, 2711 'CRE_DAY2': self.RE_DAY2, 2712 'CRE_TIME': self.RE_TIME, 2713 'CRE_REMAINING': self.RE_REMAINING, 2714 'CRE_RTIMEHMS': self.RE_RTIMEHMS, 2715 'CRE_RTIMEHMS2': self.RE_RTIMEHMS2, 2716 'CRE_RDATE': self.RE_RDATE, 2717 'CRE_RDATE3': self.RE_RDATE3, 2718 'CRE_TIMERNG1': self.TIMERNG1, 2719 'CRE_TIMERNG2': self.TIMERNG2, 2720 'CRE_TIMERNG3': self.TIMERNG3, 2721 'CRE_TIMERNG4': self.TIMERNG4, 2722 'CRE_DATERNG1': self.DATERNG1, 2723 'CRE_DATERNG2': self.DATERNG2, 2724 'CRE_DATERNG3': self.DATERNG3, 2725 'CRE_NLP_PREFIX': self.RE_NLP_PREFIX} 2726 self.cre_keys = set(self.cre_source.keys()) 2727
2728 - def __getattr__(self, name):
2729 if name in self.cre_keys: 2730 value = re.compile(self.cre_source[name], self.re_option) 2731 setattr(self, name, value) 2732 return value 2733 elif name in self.locale.locale_keys: 2734 return getattr(self.locale, name) 2735 else: 2736 raise AttributeError(name)
2737
2738 - def daysInMonth(self, month, year):
2739 """ 2740 Take the given month (1-12) and a given year (4 digit) return 2741 the number of days in the month adjusting for leap year as needed 2742 """ 2743 result = None 2744 debug and log.debug('daysInMonth(%s, %s)', month, year) 2745 if month > 0 and month <= 12: 2746 result = self._DaysInMonthList[month - 1] 2747 2748 if month == 2: 2749 if year in self._leapYears: 2750 result += 1 2751 else: 2752 if calendar.isleap(year): 2753 self._leapYears.append(year) 2754 result += 1 2755 2756 return result
2757
2758 - def getSource(self, sourceKey, sourceTime=None):
2759 """ 2760 GetReturn a date/time tuple based on the giving source key 2761 and the corresponding key found in self.re_sources. 2762 2763 The current time is used as the default and any specified 2764 item found in self.re_sources is inserted into the value 2765 and the generated dictionary is returned. 2766 """ 2767 if sourceKey not in self.re_sources: 2768 return None 2769 2770 if sourceTime is None: 2771 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = time.localtime() 2772 else: 2773 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime 2774 2775 defaults = {'yr': yr, 'mth': mth, 'dy': dy, 2776 'hr': hr, 'mn': mn, 'sec': sec} 2777 2778 source = self.re_sources[sourceKey] 2779 2780 values = {} 2781 2782 for key, default in defaults.items(): 2783 values[key] = source.get(key, default) 2784 2785 return (values['yr'], values['mth'], values['dy'], 2786 values['hr'], values['mn'], values['sec'], 2787 wd, yd, isdst)
2788