From 02193f39d0671022b57b5dcbae03549b163e8ef7 Mon Sep 17 00:00:00 2001 From: rkscv Date: Fri, 27 Sep 2024 05:44:04 +0800 Subject: [PATCH] Reimplement date parser (#611) --- date.go | 94 +--- date_parser.go | 1165 ++++++++++++++----------------------------- date_parser_test.go | 31 -- date_test.go | 12 +- 4 files changed, 394 insertions(+), 908 deletions(-) delete mode 100644 date_parser_test.go diff --git a/date.go b/date.go index ee29a21e..917c9292 100644 --- a/date.go +++ b/date.go @@ -25,80 +25,34 @@ type dateObject struct { msec int64 } -type dateLayoutDesc struct { - layout string - dateOnly bool -} - -var ( - dateLayoutsNumeric = []dateLayoutDesc{ - {layout: "2006-01-02T15:04:05Z0700"}, - {layout: "2006-01-02T15:04:05"}, - {layout: "2006-01-02", dateOnly: true}, - {layout: "2006-01-02 15:04:05"}, - - {layout: "2006", dateOnly: true}, - {layout: "2006-01", dateOnly: true}, - - {layout: "2006T15:04"}, - {layout: "2006-01T15:04"}, - {layout: "2006-01-02T15:04"}, - - {layout: "2006T15:04:05"}, - {layout: "2006-01T15:04:05"}, - - {layout: "2006T15:04Z0700"}, - {layout: "2006-01T15:04Z0700"}, - {layout: "2006-01-02T15:04Z0700"}, - - {layout: "2006T15:04:05Z0700"}, - {layout: "2006-01T15:04:05Z0700"}, +func dateParse(date string) (t time.Time, ok bool) { + d, ok := parseDateISOString(date) + if !ok { + d, ok = parseDateOtherString(date) } - - dateLayoutsAlpha = []dateLayoutDesc{ - {layout: time.RFC1123}, - {layout: time.RFC1123Z}, - {layout: dateTimeLayout}, - {layout: time.UnixDate}, - {layout: time.ANSIC}, - {layout: time.RubyDate}, - {layout: "Mon, _2 Jan 2006 15:04:05 GMT-0700 (MST)"}, - {layout: "Mon, _2 Jan 2006 15:04:05 -0700 (MST)"}, - {layout: "Jan _2, 2006", dateOnly: true}, - } -) - -func dateParse(date string) (time.Time, bool) { - var t time.Time - var err error - var layouts []dateLayoutDesc - if len(date) > 0 { - first := date[0] - if first <= '9' && (first >= '0' || first == '-' || first == '+') { - layouts = dateLayoutsNumeric - } else { - layouts = dateLayoutsAlpha - } - } else { - return time.Time{}, false + if !ok { + return } - for _, desc := range layouts { - var defLoc *time.Location - if desc.dateOnly { - defLoc = time.UTC - } else { - defLoc = time.Local - } - t, err = parseDate(desc.layout, date, defLoc) - if err == nil { - break - } + if d.month > 12 || + d.day > 31 || + d.hour > 24 || + d.min > 59 || + d.sec > 59 || + // special case 24:00:00.000 + (d.hour == 24 && (d.min != 0 || d.sec != 0 || d.msec != 0)) { + ok = false + return } - if err != nil { - return time.Time{}, false + var loc *time.Location + if d.isLocal { + loc = time.Local + } else { + loc = time.FixedZone("", d.timeZoneOffset*60) } - unix := timeToMsec(t) - return t, unix >= -maxTime && unix <= maxTime + t = time.Date(d.year, time.Month(d.month), d.day, d.hour, d.min, d.sec, d.msec*1e6, loc) + unixMilli := t.UnixMilli() + ok = unixMilli >= -maxTime && unixMilli <= maxTime + return } func (r *Runtime) newDateObject(t time.Time, isSet bool, proto *Object) *Object { diff --git a/date_parser.go b/date_parser.go index 762888c2..971f1eed 100644 --- a/date_parser.go +++ b/date_parser.go @@ -1,869 +1,426 @@ package goja -// This is a slightly modified version of the standard Go parser to make it more compatible with ECMAScript 5.1 -// Changes: -// - 6-digit extended years are supported in place of long year (2006) in the form of +123456 -// - Timezone formats tolerate colons, e.g. -0700 will parse -07:00 -// - Short week day will also parse long week day -// - Short month ("Jan") will also parse long month ("January") -// - Long day ("02") will also parse short day ("2"). -// - Timezone in brackets, "(MST)", will match any string in brackets (e.g. "(GMT Standard Time)") -// - If offset is not set and timezone name is unknown, an error is returned -// - If offset and timezone name are both set the offset takes precedence and the resulting Location will be FixedZone("", offset) - -// Original copyright message: - -// Copyright 2010 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - import ( - "errors" - "time" -) - -const ( - _ = iota - stdLongMonth = iota + stdNeedDate // "January" - stdMonth // "Jan" - stdNumMonth // "1" - stdZeroMonth // "01" - stdLongWeekDay // "Monday" - stdWeekDay // "Mon" - stdDay // "2" - stdUnderDay // "_2" - stdZeroDay // "02" - stdHour = iota + stdNeedClock // "15" - stdHour12 // "3" - stdZeroHour12 // "03" - stdMinute // "4" - stdZeroMinute // "04" - stdSecond // "5" - stdZeroSecond // "05" - stdLongYear = iota + stdNeedDate // "2006" - stdYear // "06" - stdPM = iota + stdNeedClock // "PM" - stdpm // "pm" - stdTZ = iota // "MST" - stdBracketTZ // "(MST)" - stdISO8601TZ // "Z0700" // prints Z for UTC - stdISO8601SecondsTZ // "Z070000" - stdISO8601ShortTZ // "Z07" - stdISO8601ColonTZ // "Z07:00" // prints Z for UTC - stdISO8601ColonSecondsTZ // "Z07:00:00" - stdNumTZ // "-0700" // always numeric - stdNumSecondsTz // "-070000" - stdNumShortTZ // "-07" // always numeric - stdNumColonTZ // "-07:00" // always numeric - stdNumColonSecondsTZ // "-07:00:00" - stdFracSecond0 // ".0", ".00", ... , trailing zeros included - stdFracSecond9 // ".9", ".99", ..., trailing zeros omitted - - stdNeedDate = 1 << 8 // need month, day, year - stdNeedClock = 2 << 8 // need hour, minute, second - stdArgShift = 16 // extra argument in high bits, above low stdArgShift - stdMask = 1<= 69 { // Unix time starts Dec 31 1969 in some time zones - year += 1900 - } else { - year += 2000 - } - case stdLongYear: - if len(value) >= 7 && (value[0] == '-' || value[0] == '+') { // extended year - neg := value[0] == '-' - p, value = value[1:7], value[7:] - year, err = atoi(p) - if neg { - if year == 0 { - err = errBad - break - } - year = -year - } - } else { - if len(value) < 4 || !isDigit(value, 0) { - err = errBad - break - } - p, value = value[0:4], value[4:] - year, err = atoi(p) - } - - case stdMonth: - month, value, err = lookup(longMonthNames, value) - if err != nil { - month, value, err = lookup(shortMonthNames, value) - } - month++ - case stdLongMonth: - month, value, err = lookup(longMonthNames, value) - month++ - case stdNumMonth, stdZeroMonth: - month, value, err = getnum(value, std == stdZeroMonth) - if month <= 0 || 12 < month { - rangeErrString = "month" - } - case stdWeekDay: - // Ignore weekday except for error checking. - _, value, err = lookup(longDayNames, value) - if err != nil { - _, value, err = lookup(shortDayNames, value) - } - case stdLongWeekDay: - _, value, err = lookup(longDayNames, value) - case stdDay, stdUnderDay, stdZeroDay: - if std == stdUnderDay && len(value) > 0 && value[0] == ' ' { - value = value[1:] - } - day, value, err = getnum(value, false) - if day < 0 { - // Note that we allow any one- or two-digit day here. - rangeErrString = "day" - } - case stdHour: - hour, value, err = getnum(value, false) - if hour < 0 || 24 <= hour { - rangeErrString = "hour" - } - case stdHour12, stdZeroHour12: - hour, value, err = getnum(value, std == stdZeroHour12) - if hour < 0 || 12 < hour { - rangeErrString = "hour" - } - case stdMinute, stdZeroMinute: - min, value, err = getnum(value, std == stdZeroMinute) - if min < 0 || 60 <= min { - rangeErrString = "minute" - } - case stdSecond, stdZeroSecond: - sec, value, err = getnum(value, std == stdZeroSecond) - if sec < 0 || 60 <= sec { - rangeErrString = "second" - break - } - // Special case: do we have a fractional second but no - // fractional second in the format? - if len(value) >= 2 && value[0] == '.' && isDigit(value, 1) { - _, std, _ = nextStdChunk(layout) - std &= stdMask - if std == stdFracSecond0 || std == stdFracSecond9 { - // Fractional second in the layout; proceed normally - break - } - // No fractional second in the layout but we have one in the input. - n := 2 - for ; n < len(value) && isDigit(value, n); n++ { - } - nsec, rangeErrString, err = parseNanoseconds(value, n) - value = value[n:] - } - case stdPM: - if len(value) < 2 { - err = errBad - break - } - p, value = value[0:2], value[2:] - switch p { - case "PM": - pmSet = true - case "AM": - amSet = true - default: - err = errBad - } - case stdpm: - if len(value) < 2 { - err = errBad - break - } - p, value = value[0:2], value[2:] - switch p { - case "pm": - pmSet = true - case "am": - amSet = true - default: - err = errBad - } - case stdISO8601TZ, stdISO8601ColonTZ, stdISO8601SecondsTZ, stdISO8601ShortTZ, stdISO8601ColonSecondsTZ, stdNumTZ, stdNumShortTZ, stdNumColonTZ, stdNumSecondsTz, stdNumColonSecondsTZ: - if (std == stdISO8601TZ || std == stdISO8601ShortTZ || std == stdISO8601ColonTZ || - std == stdISO8601SecondsTZ || std == stdISO8601ColonSecondsTZ) && len(value) >= 1 && value[0] == 'Z' { - - value = value[1:] - z = time.UTC - break - } - var sign, hour, min, seconds string - if std == stdISO8601ColonTZ || std == stdNumColonTZ || std == stdNumTZ || std == stdISO8601TZ { - if len(value) < 4 { - err = errBad - break - } - if value[3] != ':' { - if std == stdNumColonTZ || std == stdISO8601ColonTZ || len(value) < 5 { - err = errBad - break - } - sign, hour, min, seconds, value = value[0:1], value[1:3], value[3:5], "00", value[5:] - } else { - if len(value) < 6 { - err = errBad - break - } - sign, hour, min, seconds, value = value[0:1], value[1:3], value[4:6], "00", value[6:] - } - } else if std == stdNumShortTZ || std == stdISO8601ShortTZ { - if len(value) < 3 { - err = errBad - break - } - sign, hour, min, seconds, value = value[0:1], value[1:3], "00", "00", value[3:] - } else if std == stdISO8601ColonSecondsTZ || std == stdNumColonSecondsTZ || std == stdISO8601SecondsTZ || std == stdNumSecondsTz { - if len(value) < 7 { - err = errBad - break - } - if value[3] != ':' || value[6] != ':' { - if std == stdISO8601ColonSecondsTZ || std == stdNumColonSecondsTZ || len(value) < 7 { - err = errBad - break - } - sign, hour, min, seconds, value = value[0:1], value[1:3], value[3:5], value[5:7], value[7:] - } else { - if len(value) < 9 { - err = errBad - break - } - sign, hour, min, seconds, value = value[0:1], value[1:3], value[4:6], value[7:9], value[9:] - } - } - var hr, mm, ss int - hr, err = atoi(hour) - if err == nil { - mm, err = atoi(min) - } - if err == nil { - ss, err = atoi(seconds) - } - zoneOffset = (hr*60+mm)*60 + ss // offset is in seconds - switch sign[0] { - case '+': - case '-': - zoneOffset = -zoneOffset - default: - err = errBad - } - case stdTZ: - // Does it look like a time zone? - if len(value) >= 3 && value[0:3] == "UTC" { - z = time.UTC - value = value[3:] - break - } - n, ok := parseTimeZone(value) - if !ok { - err = errBad - break - } - zoneName, value = value[:n], value[n:] - case stdBracketTZ: - if len(value) < 3 || value[0] != '(' { - err = errBad - break - } - i := 1 - for ; ; i++ { - if i >= len(value) { - err = errBad - break - } - if value[i] == ')' { - zoneName, value = value[1:i], value[i+1:] - break - } - } - - case stdFracSecond0: - // stdFracSecond0 requires the exact number of digits as specified in - // the layout. - ndigit := 1 + (std >> stdArgShift) - if len(value) < ndigit { - err = errBad - break - } - nsec, rangeErrString, err = parseNanoseconds(value, ndigit) - value = value[ndigit:] +type date struct { + year, month, day int + hour, min, sec, msec int + timeZoneOffset int // time zone offset in minutes + isLocal bool +} - case stdFracSecond9: - if len(value) < 2 || value[0] != '.' || value[1] < '0' || '9' < value[1] { - // Fractional second omitted. - break - } - // Take any number of digits, even more than asked for, - // because it is what the stdSecond case would do. - i := 0 - for i < 9 && i+1 < len(value) && '0' <= value[i+1] && value[i+1] <= '9' { - i++ - } - nsec, rangeErrString, err = parseNanoseconds(value, 1+i) - value = value[1+i:] - } - if rangeErrString != "" { - return time.Time{}, &time.ParseError{Layout: alayout, Value: avalue, LayoutElem: stdstr, ValueElem: value, Message: ": " + rangeErrString + " out of range"} - } - if err != nil { - return time.Time{}, &time.ParseError{Layout: alayout, Value: avalue, LayoutElem: stdstr, ValueElem: value} - } +func skip(s string, c byte) (string, bool) { + if len(s) > 0 && s[0] == c { + return s[1:], true } - if pmSet && hour < 12 { - hour += 12 - } else if amSet && hour == 12 { - hour = 0 + return s, false +} + +func skipSpaces(s string) string { + for len(s) > 0 && s[0] == ' ' { + s = s[1:] } + return s +} - // Validate the day of the month. - if day < 1 || day > daysIn(time.Month(month), year) { - return time.Time{}, &time.ParseError{Layout: alayout, Value: avalue, ValueElem: value, Message: ": day out of range"} +func skipUntil(s string, stopList string) string { + for len(s) > 0 && !strings.ContainsRune(stopList, rune(s[0])) { + s = s[1:] } + return s +} - if z == nil { - if zoneOffset == -1 { - if zoneName != "" { - if z1, err := time.LoadLocation(zoneName); err == nil { - z = z1 - } else { - return time.Time{}, &time.ParseError{Layout: alayout, Value: avalue, ValueElem: value, Message: ": unknown timezone"} - } - } else { - z = defaultLocation +func match(s string, lower string) (string, bool) { + if len(s) < len(lower) { + return s, false + } + for i := 0; i < len(lower); i++ { + c1 := s[i] + c2 := lower[i] + if c1 != c2 { + // switch to lower-case; 'a'-'A' is known to be a single bit + c1 |= 'a' - 'A' + if c1 != c2 || c1 < 'a' || c1 > 'z' { + return s, false } - } else if zoneOffset == 0 { - z = time.UTC - } else { - z = time.FixedZone("", zoneOffset) } } - - return time.Date(year, time.Month(month), day, hour, min, sec, nsec, z), nil + return s[len(lower):], true } -var errLeadingInt = errors.New("time: bad [0-9]*") // never printed - -func signedLeadingInt(s string) (x int64, rem string, err error) { - neg := false - if s != "" && (s[0] == '-' || s[0] == '+') { - neg = s[0] == '-' - s = s[1:] +func getDigits(s string, minDigits, maxDigits int) (int, string, bool) { + var i, v int + for i < len(s) && i < maxDigits && s[i] >= '0' && s[i] <= '9' { + v = v*10 + int(s[i]-'0') + i++ } - x, rem, err = leadingInt(s) - if err != nil { - return + if i < minDigits { + return 0, s, false } - - if neg { - x = -x - } - return + return v, s[i:], true } -// leadingInt consumes the leading [0-9]* from s. -func leadingInt(s string) (x int64, rem string, err error) { - i := 0 - for ; i < len(s); i++ { - c := s[i] - if c < '0' || c > '9' { - break - } - if x > (1<<63-1)/10 { - // overflow - return 0, "", errLeadingInt +func getMilliseconds(s string) (int, string) { + mul, v := 100, 0 + if len(s) > 0 && (s[0] == '.' || s[0] == ',') { + const I_START = 1 + i := I_START + for i < len(s) && i-I_START < 9 && s[i] >= '0' && s[i] <= '9' { + v += int(s[i]-'0') * mul + mul /= 10 + i++ } - x = x*10 + int64(c) - '0' - if x < 0 { - // overflow - return 0, "", errLeadingInt + if i > I_START { + // only consume the separator if digits are present + return v, s[i:] } } - return x, s[i:], nil + return 0, s } -// nextStdChunk finds the first occurrence of a std string in -// layout and returns the text before, the std string, and the text after. -func nextStdChunk(layout string) (prefix string, std int, suffix string) { - for i := 0; i < len(layout); i++ { - switch c := int(layout[i]); c { - case 'J': // January, Jan - if len(layout) >= i+3 && layout[i:i+3] == "Jan" { - if len(layout) >= i+7 && layout[i:i+7] == "January" { - return layout[0:i], stdLongMonth, layout[i+7:] - } - if !startsWithLowerCase(layout[i+3:]) { - return layout[0:i], stdMonth, layout[i+3:] - } - } - - case 'M': // Monday, Mon, MST - if len(layout) >= i+3 { - if layout[i:i+3] == "Mon" { - if len(layout) >= i+6 && layout[i:i+6] == "Monday" { - return layout[0:i], stdLongWeekDay, layout[i+6:] - } - if !startsWithLowerCase(layout[i+3:]) { - return layout[0:i], stdWeekDay, layout[i+3:] - } - } - if layout[i:i+3] == "MST" { - return layout[0:i], stdTZ, layout[i+3:] - } - } - - case '0': // 01, 02, 03, 04, 05, 06 - if len(layout) >= i+2 && '1' <= layout[i+1] && layout[i+1] <= '6' { - return layout[0:i], std0x[layout[i+1]-'1'], layout[i+2:] - } - - case '1': // 15, 1 - if len(layout) >= i+2 && layout[i+1] == '5' { - return layout[0:i], stdHour, layout[i+2:] - } - return layout[0:i], stdNumMonth, layout[i+1:] - - case '2': // 2006, 2 - if len(layout) >= i+4 && layout[i:i+4] == "2006" { - return layout[0:i], stdLongYear, layout[i+4:] - } - return layout[0:i], stdDay, layout[i+1:] - - case '_': // _2, _2006 - if len(layout) >= i+2 && layout[i+1] == '2' { - //_2006 is really a literal _, followed by stdLongYear - if len(layout) >= i+5 && layout[i+1:i+5] == "2006" { - return layout[0 : i+1], stdLongYear, layout[i+5:] - } - return layout[0:i], stdUnderDay, layout[i+2:] - } - - case '3': - return layout[0:i], stdHour12, layout[i+1:] - - case '4': - return layout[0:i], stdMinute, layout[i+1:] - - case '5': - return layout[0:i], stdSecond, layout[i+1:] - - case 'P': // PM - if len(layout) >= i+2 && layout[i+1] == 'M' { - return layout[0:i], stdPM, layout[i+2:] - } - - case 'p': // pm - if len(layout) >= i+2 && layout[i+1] == 'm' { - return layout[0:i], stdpm, layout[i+2:] - } - - case '-': // -070000, -07:00:00, -0700, -07:00, -07 - if len(layout) >= i+7 && layout[i:i+7] == "-070000" { - return layout[0:i], stdNumSecondsTz, layout[i+7:] - } - if len(layout) >= i+9 && layout[i:i+9] == "-07:00:00" { - return layout[0:i], stdNumColonSecondsTZ, layout[i+9:] - } - if len(layout) >= i+5 && layout[i:i+5] == "-0700" { - return layout[0:i], stdNumTZ, layout[i+5:] - } - if len(layout) >= i+6 && layout[i:i+6] == "-07:00" { - return layout[0:i], stdNumColonTZ, layout[i+6:] - } - if len(layout) >= i+3 && layout[i:i+3] == "-07" { - return layout[0:i], stdNumShortTZ, layout[i+3:] - } - - case 'Z': // Z070000, Z07:00:00, Z0700, Z07:00, - if len(layout) >= i+7 && layout[i:i+7] == "Z070000" { - return layout[0:i], stdISO8601SecondsTZ, layout[i+7:] - } - if len(layout) >= i+9 && layout[i:i+9] == "Z07:00:00" { - return layout[0:i], stdISO8601ColonSecondsTZ, layout[i+9:] - } - if len(layout) >= i+5 && layout[i:i+5] == "Z0700" { - return layout[0:i], stdISO8601TZ, layout[i+5:] - } - if len(layout) >= i+6 && layout[i:i+6] == "Z07:00" { - return layout[0:i], stdISO8601ColonTZ, layout[i+6:] - } - if len(layout) >= i+3 && layout[i:i+3] == "Z07" { - return layout[0:i], stdISO8601ShortTZ, layout[i+3:] - } - - case '.': // .000 or .999 - repeated digits for fractional seconds. - if i+1 < len(layout) && (layout[i+1] == '0' || layout[i+1] == '9') { - ch := layout[i+1] - j := i + 1 - for j < len(layout) && layout[j] == ch { - j++ - } - // String of digits must end here - only fractional second is all digits. - if !isDigit(layout, j) { - std := stdFracSecond0 - if layout[i+1] == '9' { - std = stdFracSecond9 - } - std |= (j - (i + 1)) << stdArgShift - return layout[0:i], std, layout[j:] - } - } - case '(': - if len(layout) >= i+5 && layout[i:i+5] == "(MST)" { - return layout[0:i], stdBracketTZ, layout[i+5:] +// [+-]HH:mm or [+-]HHmm or Z +func getTimeZoneOffset(s string, strict bool) (int, string, bool) { + if len(s) == 0 { + return 0, s, false + } + sign := s[0] + if sign == '+' || sign == '-' { + var hh, mm, v int + var ok bool + t := s[1:] + n := len(t) + if hh, t, ok = getDigits(t, 1, 9); !ok { + return 0, s, false + } + n -= len(t) + if strict && n != 2 && n != 4 { + return 0, s, false + } + for n > 4 { + n -= 2 + hh /= 100 + } + if n > 2 { + mm = hh % 100 + hh = hh / 100 + } else if t, ok = skip(t, ':'); ok { + if mm, t, ok = getDigits(t, 2, 2); !ok { + return 0, s, false } } + if hh > 23 || mm > 59 { + return 0, s, false + } + v = hh*60 + mm + if sign == '-' { + v = -v + } + return v, t, true + } else if sign == 'Z' { + return 0, s[1:], true } - return layout, 0, "" -} - -var longDayNames = []string{ - "Sunday", - "Monday", - "Tuesday", - "Wednesday", - "Thursday", - "Friday", - "Saturday", -} - -var shortDayNames = []string{ - "Sun", - "Mon", - "Tue", - "Wed", - "Thu", - "Fri", - "Sat", -} - -var shortMonthNames = []string{ - "Jan", - "Feb", - "Mar", - "Apr", - "May", - "Jun", - "Jul", - "Aug", - "Sep", - "Oct", - "Nov", - "Dec", + return 0, s, false } -var longMonthNames = []string{ - "January", - "February", - "March", - "April", - "May", - "June", - "July", - "August", - "September", - "October", - "November", - "December", +var tzAbbrs = []struct { + nameLower string + offset int +}{ + {"gmt", 0}, // Greenwich Mean Time + {"utc", 0}, // Coordinated Universal Time + {"ut", 0}, // Universal Time + {"z", 0}, // Zulu Time + {"edt", -4 * 60}, // Eastern Daylight Time + {"est", -5 * 60}, // Eastern Standard Time + {"cdt", -5 * 60}, // Central Daylight Time + {"cst", -6 * 60}, // Central Standard Time + {"mdt", -6 * 60}, // Mountain Daylight Time + {"mst", -7 * 60}, // Mountain Standard Time + {"pdt", -7 * 60}, // Pacific Daylight Time + {"pst", -8 * 60}, // Pacific Standard Time + {"wet", +0 * 60}, // Western European Time + {"west", +1 * 60}, // Western European Summer Time + {"cet", +1 * 60}, // Central European Time + {"cest", +2 * 60}, // Central European Summer Time + {"eet", +2 * 60}, // Eastern European Time + {"eest", +3 * 60}, // Eastern European Summer Time } -// isDigit reports whether s[i] is in range and is a decimal digit. -func isDigit(s string, i int) bool { - if len(s) <= i { - return false - } - c := s[i] - return '0' <= c && c <= '9' -} - -// getnum parses s[0:1] or s[0:2] (fixed forces the latter) -// as a decimal integer and returns the integer and the -// remainder of the string. -func getnum(s string, fixed bool) (int, string, error) { - if !isDigit(s, 0) { - return 0, s, errBad - } - if !isDigit(s, 1) { - if fixed { - return 0, s, errBad +func getTimeZoneAbbr(s string) (int, string, bool) { + for _, tzAbbr := range tzAbbrs { + if s, ok := match(s, tzAbbr.nameLower); ok { + return tzAbbr.offset, s, true } - return int(s[0] - '0'), s[1:], nil } - return int(s[0]-'0')*10 + int(s[1]-'0'), s[2:], nil + return 0, s, false } -func cutspace(s string) string { - for len(s) > 0 && s[0] == ' ' { - s = s[1:] - } - return s +var monthNamesLower = []string{ + "jan", + "feb", + "mar", + "apr", + "may", + "jun", + "jul", + "aug", + "sep", + "oct", + "nov", + "dec", } -// skip removes the given prefix from value, -// treating runs of space characters as equivalent. -func skip(value, prefix string) (string, error) { - for len(prefix) > 0 { - if prefix[0] == ' ' { - if len(value) > 0 && value[0] != ' ' { - return value, errBad - } - prefix = cutspace(prefix) - value = cutspace(value) - continue +func getMonth(s string) (int, string, bool) { + for i, monthNameLower := range monthNamesLower { + if s, ok := match(s, monthNameLower); ok { + return i + 1, s, true } - if len(value) == 0 || value[0] != prefix[0] { - return value, errBad - } - prefix = prefix[1:] - value = value[1:] } - return value, nil + return 0, s, false } -// Never printed, just needs to be non-nil for return by atoi. -var atoiError = errors.New("time: invalid number") - -// Duplicates functionality in strconv, but avoids dependency. -func atoi(s string) (x int, err error) { - q, rem, err := signedLeadingInt(s) - x = int(q) - if err != nil || rem != "" { - return 0, atoiError +func parseDateISOString(s string) (date, bool) { + if len(s) == 0 { + return date{}, false } - return x, nil -} + var d = date{month: 1, day: 1} + var ok bool -// match reports whether s1 and s2 match ignoring case. -// It is assumed s1 and s2 are the same length. -func match(s1, s2 string) bool { - for i := 0; i < len(s1); i++ { - c1 := s1[i] - c2 := s2[i] - if c1 != c2 { - // Switch to lower-case; 'a'-'A' is known to be a single bit. - c1 |= 'a' - 'A' - c2 |= 'a' - 'A' - if c1 != c2 || c1 < 'a' || c1 > 'z' { - return false + // year is either yyyy digits or [+-]yyyyyy + sign := s[0] + if sign == '-' || sign == '+' { + s = s[1:] + if d.year, s, ok = getDigits(s, 6, 6); !ok { + return date{}, false + } + if sign == '-' { + if d.year == 0 { + // reject -000000 + return date{}, false } + d.year = -d.year } + } else if d.year, s, ok = getDigits(s, 4, 4); !ok { + return date{}, false } - return true -} - -func lookup(tab []string, val string) (int, string, error) { - for i, v := range tab { - if len(val) >= len(v) && match(val[0:len(v)], v) { - return i, val[len(v):], nil + if s, ok = skip(s, '-'); ok { + if d.month, s, ok = getDigits(s, 2, 2); !ok || d.month < 1 { + return date{}, false + } + if s, ok = skip(s, '-'); ok { + if d.day, s, ok = getDigits(s, 2, 2); !ok || d.day < 1 { + return date{}, false + } } } - return -1, val, errBad -} - -// daysBefore[m] counts the number of days in a non-leap year -// before month m begins. There is an entry for m=12, counting -// the number of days before January of next year (365). -var daysBefore = [...]int32{ - 0, - 31, - 31 + 28, - 31 + 28 + 31, - 31 + 28 + 31 + 30, - 31 + 28 + 31 + 30 + 31, - 31 + 28 + 31 + 30 + 31 + 30, - 31 + 28 + 31 + 30 + 31 + 30 + 31, - 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31, - 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30, - 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31, - 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31 + 30, - 31 + 28 + 31 + 30 + 31 + 30 + 31 + 31 + 30 + 31 + 30 + 31, -} - -func isLeap(year int) bool { - return year%4 == 0 && (year%100 != 0 || year%400 == 0) -} - -func daysIn(m time.Month, year int) int { - if m == time.February && isLeap(year) { - return 29 - } - return int(daysBefore[m] - daysBefore[m-1]) -} - -// parseTimeZone parses a time zone string and returns its length. Time zones -// are human-generated and unpredictable. We can't do precise error checking. -// On the other hand, for a correct parse there must be a time zone at the -// beginning of the string, so it's almost always true that there's one -// there. We look at the beginning of the string for a run of upper-case letters. -// If there are more than 5, it's an error. -// If there are 4 or 5 and the last is a T, it's a time zone. -// If there are 3, it's a time zone. -// Otherwise, other than special cases, it's not a time zone. -// GMT is special because it can have an hour offset. -func parseTimeZone(value string) (length int, ok bool) { - if len(value) < 3 { - return 0, false - } - // Special case 1: ChST and MeST are the only zones with a lower-case letter. - if len(value) >= 4 && (value[:4] == "ChST" || value[:4] == "MeST") { - return 4, true - } - // Special case 2: GMT may have an hour offset; treat it specially. - if value[:3] == "GMT" { - length = parseGMT(value) - return length, true - } - // Special Case 3: Some time zones are not named, but have +/-00 format - if value[0] == '+' || value[0] == '-' { - length = parseSignedOffset(value) - return length, true - } - // How many upper-case letters are there? Need at least three, at most five. - var nUpper int - for nUpper = 0; nUpper < 6; nUpper++ { - if nUpper >= len(value) { - break + if s, ok = skip(s, 'T'); ok { + if d.hour, s, ok = getDigits(s, 2, 2); !ok { + return date{}, false } - if c := value[nUpper]; c < 'A' || 'Z' < c { - break + if s, ok = skip(s, ':'); !ok { + return date{}, false } - } - switch nUpper { - case 0, 1, 2, 6: - return 0, false - case 5: // Must end in T to match. - if value[4] == 'T' { - return 5, true + if d.min, s, ok = getDigits(s, 2, 2); !ok { + return date{}, false } - case 4: - // Must end in T, except one special case. - if value[3] == 'T' || value[:4] == "WITA" { - return 4, true + if s, ok = skip(s, ':'); ok { + if d.sec, s, ok = getDigits(s, 2, 2); !ok { + return date{}, false + } + d.msec, s = getMilliseconds(s) } - case 3: - return 3, true + d.isLocal = true } - return 0, false -} - -// parseGMT parses a GMT time zone. The input string is known to start "GMT". -// The function checks whether that is followed by a sign and a number in the -// range -14 through 12 excluding zero. -func parseGMT(value string) int { - value = value[3:] - if len(value) == 0 { - return 3 + // parse the time zone offset if present + if len(s) > 0 { + if d.timeZoneOffset, s, ok = getTimeZoneOffset(s, true); !ok { + return date{}, false + } + d.isLocal = false } - - return 3 + parseSignedOffset(value) + // error if extraneous characters + return d, len(s) == 0 } -// parseSignedOffset parses a signed timezone offset (e.g. "+03" or "-04"). -// The function checks for a signed number in the range -14 through +12 excluding zero. -// Returns length of the found offset string or 0 otherwise -func parseSignedOffset(value string) int { - sign := value[0] - if sign != '-' && sign != '+' { - return 0 +func parseDateOtherString(s string) (date, bool) { + var d = date{ + year: 2001, + month: 1, + day: 1, + isLocal: true, } - x, rem, err := leadingInt(value[1:]) - if err != nil { - return 0 - } - if sign == '-' { - x = -x - } - if x == 0 || x < -14 || 12 < x { - return 0 - } - return len(value) - len(rem) -} - -func parseNanoseconds(value string, nbytes int) (ns int, rangeErrString string, err error) { - if value[0] != '.' { - err = errBad - return + var nums [3]int + var numIndex int + var hasYear, hasMon, hasTime, ok bool + for { + s = skipSpaces(s) + if len(s) == 0 { + break + } + c := s[0] + n, val := len(s), 0 + if c == '+' || c == '-' { + if hasTime { + if val, s, ok = getTimeZoneOffset(s, false); ok { + d.timeZoneOffset = val + d.isLocal = false + } + } + if !hasTime || !ok { + s = s[1:] + if val, s, ok = getDigits(s, 1, 9); ok { + d.year = val + if c == '-' { + if d.year == 0 { + return date{}, false + } + d.year = -d.year + } + hasYear = true + } + } + } else if val, s, ok = getDigits(s, 1, 9); ok { + if s, ok = skip(s, ':'); ok { + // time part + d.hour = val + if d.min, s, ok = getDigits(s, 1, 2); !ok { + return date{}, false + } + if s, ok = skip(s, ':'); ok { + if d.sec, s, ok = getDigits(s, 1, 2); !ok { + return date{}, false + } + d.msec, s = getMilliseconds(s) + } + hasTime = true + if t := skipSpaces(s); len(t) > 0 { + if t, ok = match(t, "pm"); ok { + if d.hour < 12 { + d.hour += 12 + } + s = t + continue + } else if t, ok = match(t, "am"); ok { + if d.hour == 12 { + d.hour = 0 + } + s = t + continue + } + } + } else if n-len(s) > 2 { + d.year = val + hasYear = true + } else if val < 1 || val > 31 { + d.year = val + if val < 100 { + d.year += 1900 + } + if val < 50 { + d.year += 100 + } + hasYear = true + } else { + if numIndex == 3 { + return date{}, false + } + nums[numIndex] = val + numIndex++ + } + } else if val, s, ok = getMonth(s); ok { + d.month = val + hasMon = true + s = skipUntil(s, "0123456789 -/(") + } else if val, s, ok = getTimeZoneAbbr(s); ok { + d.timeZoneOffset = val + if len(s) > 0 { + if c := s[0]; (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') { + return date{}, false + } + } + d.isLocal = false + continue + } else if c == '(' { + // skip parenthesized phrase + level := 1 + s = s[1:] + for len(s) > 0 && level != 0 { + if s[0] == '(' { + level++ + } else if s[0] == ')' { + level-- + } + s = s[1:] + } + if level > 0 { + return date{}, false + } + } else if c == ')' { + return date{}, false + } else { + if hasYear || hasMon || hasTime || numIndex > 0 { + return date{}, false + } + // skip a word + s = skipUntil(s, " -/(") + } + for len(s) > 0 && strings.ContainsRune("-/.,", rune(s[0])) { + s = s[1:] + } } - if ns, err = atoi(value[1:nbytes]); err != nil { - return + n := numIndex + if hasYear { + n++ } - if ns < 0 || 1e9 <= ns { - rangeErrString = "fractional second" - return + if hasMon { + n++ } - // We need nanoseconds, which means scaling by the number - // of missing digits in the format, maximum length 10. If it's - // longer than 10, we won't scale. - scaleDigits := 10 - nbytes - for i := 0; i < scaleDigits; i++ { - ns *= 10 + if n > 3 { + return date{}, false } - return -} - -// std0x records the std values for "01", "02", ..., "06". -var std0x = [...]int{stdZeroMonth, stdZeroDay, stdZeroHour12, stdZeroMinute, stdZeroSecond, stdYear} -// startsWithLowerCase reports whether the string has a lower-case letter at the beginning. -// Its purpose is to prevent matching strings like "Month" when looking for "Mon". -func startsWithLowerCase(str string) bool { - if len(str) == 0 { - return false + switch numIndex { + case 0: + if !hasYear { + return date{}, false + } + case 1: + if hasMon { + d.day = nums[0] + } else { + d.month = nums[0] + } + case 2: + if hasYear { + d.month = nums[0] + d.day = nums[1] + } else if hasMon { + d.year = nums[1] + if nums[1] < 100 { + d.year += 1900 + } + if nums[1] < 50 { + d.year += 100 + } + d.day = nums[0] + } else { + d.month = nums[0] + d.day = nums[1] + } + case 3: + d.year = nums[2] + if nums[2] < 100 { + d.year += 1900 + } + if nums[2] < 50 { + d.year += 100 + } + d.month = nums[0] + d.day = nums[1] + default: + return date{}, false } - c := str[0] - return 'a' <= c && c <= 'z' + return d, d.month > 0 && d.day > 0 } diff --git a/date_parser_test.go b/date_parser_test.go deleted file mode 100644 index c9a5446b..00000000 --- a/date_parser_test.go +++ /dev/null @@ -1,31 +0,0 @@ -package goja - -import ( - "testing" - "time" -) - -func TestParseDate(t *testing.T) { - - tst := func(layout, value string, expectedTs int64) func(t *testing.T) { - return func(t *testing.T) { - t.Parallel() - tm, err := parseDate(layout, value, time.UTC) - if err != nil { - t.Fatal(err) - } - if tm.Unix() != expectedTs { - t.Fatal(tm) - } - } - } - - t.Run("1", tst("2006-01-02T15:04:05.000Z070000", "2006-01-02T15:04:05.000+07:00:00", 1136189045)) - t.Run("2", tst("2006-01-02T15:04:05.000Z07:00:00", "2006-01-02T15:04:05.000+07:00:00", 1136189045)) - t.Run("3", tst("2006-01-02T15:04:05.000Z07:00", "2006-01-02T15:04:05.000+07:00", 1136189045)) - t.Run("4", tst("2006-01-02T15:04:05.000Z070000", "2006-01-02T15:04:05.000+070000", 1136189045)) - t.Run("5", tst("2006-01-02T15:04:05.000Z070000", "2006-01-02T15:04:05.000Z", 1136214245)) - t.Run("6", tst("2006-01-02T15:04:05.000Z0700", "2006-01-02T15:04:05.000Z", 1136214245)) - t.Run("7", tst("2006-01-02T15:04:05.000Z07", "2006-01-02T15:04:05.000Z", 1136214245)) - -} diff --git a/date_test.go b/date_test.go index fdaa0e73..6f6d6e66 100644 --- a/date_test.go +++ b/date_test.go @@ -201,19 +201,25 @@ func TestDateParse(t *testing.T) { testParse("Mon Jan 02 2006 15:04:05 GMT-0700 (GMT Standard Time)", 1136239445000); testParse("Mon Jan 2 15:04:05 MST 2006", 1136239445000); testParse("Mon Jan 02 15:04:05 MST 2006", 1136239445000); + testParse("Mon Jan 2 15:04:05 2006", 1136232245000); testParse("Mon Jan 02 15:04:05 -0700 2006", 1136239445000); + testParse("Mon Jan 02 3:4 PM -0700 2006", 1136239440000); - testParse("December 04, 1986", 534038400000); - testParse("Dec 04, 1986", 534038400000); - testParse("Dec 4, 1986", 534038400000); + testParse("December 04, 1986", 534056400000); + testParse("Dec 04, 1986", 534056400000); + testParse("Dec 4, 1986", 534056400000); testParse("2006-01-02T15:04:05.000Z", 1136214245000); testParse("2006-06-02T15:04:05.000", 1149275045000); testParse("2006-01-02T15:04:05", 1136232245000); + testParse("2006-01-02 15:04:05.123", 1136232245123); testParse("2006-01-02", 1136160000000); testParse("2006T15:04-0700", 1136153040000); + testParse("2006T15:04+07:00", 1136102640000); testParse("2006T15:04Z", 1136127840000); testParse("2019-01-01T12:00:00.52Z", 1546344000520); + testParse("2019-01T12:00:00.52Z", 1546344000520); + testParse("+002019-01-01T12:00:00.52Z", 1546344000520); var d = new Date("Mon, 02 Jan 2006 15:04:05 MST");