Skip to content
This repository has been archived by the owner on Nov 22, 2024. It is now read-only.

Commit

Permalink
improve xsd::dateTime implementation
Browse files Browse the repository at this point in the history
- use chrono instead of old crate datetime
- do not use an implicit timezone for timezone-less
  w3c/sparql-query#116
  • Loading branch information
pchampin committed Sep 19, 2024
1 parent aef6782 commit df4f8f8
Show file tree
Hide file tree
Showing 6 changed files with 188 additions and 28 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ spargebra = { version = "0.2.8", features = ["rdf-star"] }
thiserror = "1.0.44"
bigdecimal = "0.4.3"
num-bigint = "0.4.4"
datetime = "0.5.2"
lazy_static = "1.4.0"
chrono = { version = "0.4.38", features = ["alloc"] }

[dev-dependencies]
test-case = "3.3.1"
2 changes: 0 additions & 2 deletions examples/so_sparql.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
mod datetime;

use std::{env::args, sync::Arc};

use sophia::{
Expand Down
3 changes: 1 addition & 2 deletions src/stash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
use std::sync::Arc;

use bigdecimal::BigDecimal;
use datetime::ISO;
use sophia::{
api::{
ns::xsd,
Expand Down Expand Up @@ -53,7 +52,7 @@ pub fn value_ref_to_arcterm<F: FnMut(&str) -> Arc<str>>(
SparqlValue::Boolean(None) => (factory("ill-formed"), xsd::boolean),
SparqlValue::Boolean(Some(b)) => (factory(if *b { "true" } else { "false" }), xsd::boolean),
SparqlValue::DateTime(None) => (factory("ill-formed"), xsd::dateTime),
SparqlValue::DateTime(Some(d)) => (factory(&d.iso().to_string()), xsd::dateTime),
SparqlValue::DateTime(Some(d)) => (factory(&d.to_string()), xsd::dateTime),
SparqlValue::String(lex, None) => (lex.clone(), xsd::string),
SparqlValue::String(lex, Some(tag)) => {
return ArcTerm::Literal(GenericLiteral::LanguageString(lex.clone(), tag.clone()));
Expand Down
27 changes: 17 additions & 10 deletions src/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -259,19 +259,34 @@ fn test_expr(expr: &str, result: &str) -> TestResult {
#[test_case("true", "false", Some(false))]
#[test_case(
"\"2024-03-25T00:00:00\"^^xsd:dateTime",
"\"2024-03-25T00:00:00+00:00\"^^xsd:dateTime",
None
)]
#[test_case(
"\"2024-03-25T00:00:00Z\"^^xsd:dateTime",
"\"2024-03-25T00:00:00+00:00\"^^xsd:dateTime",
Some(true)
)]
#[test_case(
"\"2024-03-25T01:00:00\"^^xsd:dateTime",
"\"2024-03-25T00:00:00+0100\"^^xsd:dateTime",
"\"2024-03-25T00:00:00Z\"^^xsd:dateTime",
"\"2024-03-25T01:00:00+01:00\"^^xsd:dateTime",
Some(true)
)]
#[test_case(
"\"2024-03-25T00:00:00\"^^xsd:dateTime",
"\"2024-03-25T00:00:01\"^^xsd:dateTime",
Some(false)
)]
#[test_case(
"\"2024-03-25T00:00:00Z\"^^xsd:dateTime",
"\"2024-03-25T00:00:01Z\"^^xsd:dateTime",
Some(false)
)]
#[test_case(
"\"2024-03-25T00:00:00Z\"^^xsd:dateTime",
"\"2024-03-25T00:00:00+01:00\"^^xsd:dateTime",
Some(false)
)]
#[test_case("<tag:x>", "<tag:y>", Some(false))]
#[test_case("\"a\"^^<tag:x>", "\"a\"^^<tag:y>", None)]
#[test_case("\"a\"^^<tag:x>", "\"b\"^^<tag:x>", None)]
Expand Down Expand Up @@ -327,14 +342,6 @@ fn test_expr_eq(expr1: &str, expr2: &str, exp: Option<bool>) -> TestResult {
#[test_case("\"a\"@en", "\"b\"@en")]
#[test_case("\"10\"@en", "\"b\"@en")]
#[test_case("false", "true")]
#[test_case(
"\"2024-03-25T00:00:00\"^^xsd:dateTime",
"\"2024-03-25T00:00:01Z\"^^xsd:dateTime"
)]
#[test_case(
"\"2024-03-25T01:00:00\"^^xsd:dateTime",
"\"2024-03-25T00:00:01+0100\"^^xsd:dateTime"
)]
#[test_case(
"\"2024-03-25T00:00:00Z\"^^xsd:dateTime",
"\"2024-03-25T00:00:01Z\"^^xsd:dateTime"
Expand Down
20 changes: 7 additions & 13 deletions src/value.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
use std::{cmp::Ordering, sync::Arc};

use bigdecimal::BigDecimal;
use datetime::{DatePiece, LocalDate, LocalDateTime, LocalTime, OffsetDateTime, TimePiece};
use sophia::{
api::term::LanguageTag,
term::{ArcTerm, GenericLiteral},
};

use crate::number::SparqlNumber;

mod xsd_date_time;
use xsd_date_time::XsdDateTime;

#[derive(Clone, Debug)]
pub enum SparqlValue {
Number(SparqlNumber),
String(Arc<str>, Option<LanguageTag<Arc<str>>>),
Boolean(Option<bool>),
DateTime(Option<LocalDateTime>),
DateTime(Option<XsdDateTime>),
}

impl SparqlValue {
Expand Down Expand Up @@ -43,9 +45,7 @@ impl SparqlValue {
"double" => Some(Self::Number(SparqlNumber::parse::<f64>(lex))),
"string" => Some(Self::String(lex.clone(), None)),
"boolean" => Some(Self::Boolean(lex.parse().ok())),
"dateTime" => Some(Self::DateTime(
lex.parse::<OffsetDateTime>().ok().map(adjust_to_offset),
)),
"dateTime" => Some(Self::DateTime(lex.parse().ok())),
"nonPositiveInteger" => Some(Self::Number(
SparqlNumber::parse_integer(lex).check(|n| !n.is_positive()),
)),
Expand Down Expand Up @@ -88,7 +88,7 @@ impl SparqlValue {
(String(s1, None), String(s2, None)) => Some(s1 == s2),
(String(s1, Some(t1)), String(s2, Some(t2))) => Some(t1 == t2 && s1 == s2),
(Boolean(b1), Boolean(b2)) => Some(b1 == b2),
(DateTime(d1), DateTime(d2)) => Some(d1 == d2),
(DateTime(d1), DateTime(d2)) => d1.partial_cmp(d2).map(|o| o == Ordering::Equal),
_ => None,
}
}
Expand Down Expand Up @@ -122,16 +122,10 @@ impl PartialOrd for SparqlValue {
Some(t1.cmp(t2).then_with(|| s1.cmp(s2)))
}
(Boolean(Some(b1)), Boolean(Some(b2))) => Some(b1.cmp(b2)),
(DateTime(Some(d1)), DateTime(Some(d2))) => Some(d1.cmp(d2)),
(DateTime(Some(d1)), DateTime(Some(d2))) => d1.partial_cmp(d2),
_ => None,
}
}
}

fn adjust_to_offset(odt: OffsetDateTime) -> LocalDateTime {
let d = LocalDate::ymd(odt.year(), odt.month(), odt.day()).unwrap();
let t = LocalTime::hms_ms(odt.hour(), odt.minute(), odt.second(), odt.millisecond()).unwrap();
LocalDateTime::new(d, t)
}

const XSD: &str = "http://www.w3.org/2001/XMLSchema#";
162 changes: 162 additions & 0 deletions src/value/xsd_date_time.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
//! An XsdDateTime is a dateTime with or without a timezone.
//! See https://www.w3.org/TR/xmlschema-2/#dt-dateTime
//!
//! # Comparing dateTimes
//!
//! According to [Section 3.2.7.4 Order relation on dateTime of XML Schema Part 2](https://www.w3.org/TR/xmlschema-2/#dt-dateTime)
//! dateTimes are incomparable in some circumstances.
//! However, the XPath functions [`op:date-equal`](https://www.w3.org/TR/xpath-functions/#func-date-equal)
//! and [`op:date-less-than`](https://www.w3.org/TR/xpath-functions/#func-date-less-than),
//! which the SPARQL specification refers to,
//! solve this ambiguity with an [implicit timezone],
//! which is [controversial](https://github.com/w3c/sparql-query/issues/116).
//!
//! This implementation uses no implicit timezone.
use std::{cmp::Ordering, fmt::Display, str::FromStr};

use chrono::{format::ParseErrorKind, DateTime, FixedOffset, NaiveDateTime};

#[derive(Clone, Copy, Debug, PartialEq)]
pub enum XsdDateTime {
Naive(NaiveDateTime),
Timezoned(DateTime<FixedOffset>),
}

impl FromStr for XsdDateTime {
type Err = chrono::ParseError;

fn from_str(s: &str) -> Result<Self, Self::Err> {
DateTime::parse_from_rfc3339(s)
.map(Self::Timezoned)
.or_else(|e| {
if e.kind() == ParseErrorKind::TooShort {
s.parse().map(Self::Naive)
} else {
Err(e)
}
})
}
}

impl Display for XsdDateTime {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{}",
match self {
XsdDateTime::Naive(d) => {
let mut out = d.and_utc().to_rfc3339();
out.truncate(out.len() - 6); // truncate timezone
out
}
XsdDateTime::Timezoned(d) => {
d.to_rfc3339()
}
}
)
}
}

impl PartialOrd for XsdDateTime {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
match (self, other) {
(XsdDateTime::Naive(d1), XsdDateTime::Naive(d2)) => d1.partial_cmp(d2),
(XsdDateTime::Naive(d1), XsdDateTime::Timezoned(d2)) => {
heterogeneous_cmp(d2, d1).map(Ordering::reverse)
}
(XsdDateTime::Timezoned(d1), XsdDateTime::Naive(d2)) => heterogeneous_cmp(d1, d2),
(XsdDateTime::Timezoned(d1), XsdDateTime::Timezoned(d2)) => d1.partial_cmp(d2),
}
}
}

/// Implements https://www.w3.org/TR/xmlschema-2/#dateTime-order
fn heterogeneous_cmp(d1: &DateTime<FixedOffset>, d2: &NaiveDateTime) -> Option<Ordering> {
if d1 < &naive_to_fixed(d2, 14) {
Some(Ordering::Less)
} else if d1 > &naive_to_fixed(d2, -14) {
Some(Ordering::Greater)
} else {
None
}
}

fn naive_to_fixed(d: &NaiveDateTime, offset: i8) -> DateTime<FixedOffset> {
debug_assert!((-14..=14).contains(&offset));
let fixed_offset = FixedOffset::east_opt(offset as i32 * 3600).unwrap();
match d.and_local_timezone(fixed_offset) {
chrono::offset::LocalResult::Single(r) => r,
_ => unreachable!(), // FixedOffset has no fold or gap, so there is always a single result
}
}

#[cfg(test)]
mod test {
use super::*;
use test_case::test_case;

#[test_case("2024-09-17T12:00:00"; "no timezone")]
fn naive(d: &str) {
assert!(matches!(
d.parse::<XsdDateTime>().unwrap(),
XsdDateTime::Naive(_)
));
}

#[test_case("2024-09-17T12:00:00Z"; "z")]
#[test_case("2024-09-17T12:00:00+00:00"; "plus 0")]
#[test_case("2024-09-17T12:00:00-00:00"; "minus 0")]
#[test_case("2024-09-17T12:00:00+01:59"; "plus 1:59")]
#[test_case("2024-09-17T12:00:00-01:59"; "minus 1:59")]
#[test_case("2024-09-17T12:00:00+14:00"; "plus 14")]
#[test_case("2024-09-17T12:00:00-14:00"; "minus 14")]
fn timezoned(d: &str) {
assert!(matches!(
d.parse::<XsdDateTime>().unwrap(),
XsdDateTime::Timezoned(_)
));
}

#[test_case("2024-09-17T12:00:00"; "no timezone")]
#[test_case("2024-09-17T12:00:00+00:00"; "plus 0")]
#[test_case("2024-09-17T12:00:00+01:59"; "plus 1:59")]
#[test_case("2024-09-17T12:00:00-01:59"; "minus 1:59")]
#[test_case("2024-09-17T12:00:00+14:00"; "plus 14")]
#[test_case("2024-09-17T12:00:00-14:00"; "minus 14")]
#[test_case("2024-09-17T12:00:00.123-14:00"; "with subsec")]
fn to_string(d: &str) {
assert_eq!(d.parse::<XsdDateTime>().unwrap().to_string(), d);
}

#[test_case("2024-09-17T12:00:00Z", "2024-09-17T12:00:00Z" => true; "12z 12z")]
#[test_case("2024-09-17T12:00:00Z", "2024-09-17T12:00:00+00:00" => true; "12z 12p0")]
#[test_case("2024-09-17T12:00:00Z", "2024-09-17T13:00:00+01:00" => true; "12z 13p1")]
#[test_case("2024-09-17T12:00:00", "2024-09-17T12:00:00Z" => false; "12 12z")]
#[test_case("2024-09-17T12:00:00", "2024-09-17T13:00:00+01:00" => false; "12 13p1")]
fn equal(d1: &str, d2: &str) -> bool {
d1.parse::<XsdDateTime>().unwrap() == d2.parse::<XsdDateTime>().unwrap()
}

use Ordering::*;

#[test_case("2024-09-17T12:00:00", "2024-09-17T12:00:00", Some(Equal); "12 12")]
#[test_case("2024-09-17T12:00:00", "2024-09-17T12:00:00Z", None; "12 12z")]
#[test_case("2024-09-17T12:00:00", "2024-09-17T13:00:00+01:00", None; "12 13p1")]
#[test_case("2024-09-17T12:00:00Z", "2024-09-17T12:00:00Z", Some(Equal); "12z 12z")]
#[test_case("2024-09-17T12:00:00Z", "2024-09-17T12:00:00+00:00", Some(Equal); "12z 12p0")]
#[test_case("2024-09-17T12:00:00Z", "2024-09-17T13:00:00+01:00", Some(Equal); "12z 13p1")]
#[test_case("2024-09-17T12:00:00Z", "2024-09-17T13:00:00+02:00", Some(Greater); "12z 13p2")]
#[test_case("2024-09-17T12:00:00Z", "2024-09-17T11:00:00-02:00", Some(Less); "12z 11m2")]
#[test_case("2024-09-17T06:00:00", "2024-09-17T19:59:00Z", None; "6 19z")]
#[test_case("2024-09-17T06:00:00", "2024-09-17T20:00:00Z", None; "6 20z")]
#[test_case("2024-09-17T06:00:00", "2024-09-17T20:01:00Z", Some(Less); "6 20:01z")]
#[test_case("2024-09-17T06:00:00Z", "2024-09-17T19:59:00", None; "6z 19")]
#[test_case("2024-09-17T06:00:00Z", "2024-09-17T20:00:00", None; "6z 20")]
#[test_case("2024-09-17T06:00:00Z", "2024-09-17T20:01:00", Some(Less); "6z 20:01")]
fn partial_cmp(d1: &str, d2: &str, exp: Option<Ordering>) {
let d1 = d1.parse::<XsdDateTime>().unwrap();
let d2 = d2.parse::<XsdDateTime>().unwrap();
assert_eq!(d1.partial_cmp(&d2), exp);
assert_eq!(d2.partial_cmp(&d1), exp.map(Ordering::reverse));
}
}

0 comments on commit df4f8f8

Please sign in to comment.