diff --git a/NEWS.md b/NEWS.md index d87656c9f..45a36dcf0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -54,6 +54,8 @@ 10. `setkey()`, `setindex()`, `CJ()`, and `setnames()` now prevent creating ambiguous keys from duplicated column names, and keyed joins now error on existing duplicated key columns rather than silently giving incorrect results, [#4888](https://github.com/Rdatatable/data.table/issues/4888) and [#4891](https://github.com/Rdatatable/data.table/issues/4891). Thanks @magerton and @MichaelChirico for the reports, and @ben-schwen for the fix. +11. `between()` now supports `Date` and `IDate` bounds with default `NAbounds=TRUE`, avoiding errors like "Not yet implemented NAbounds=TRUE for this non-numeric and non-character type" when date bounds contain `NA`, [#7281](https://github.com/Rdatatable/data.table/issues/7281). Thanks @grcatlin for the report and fix, and @ben-schwen and @aitap for assistance. + ### Notes 1. {data.table} now depends on R 3.5.0 (2018). diff --git a/R/between.R b/R/between.R index 63877313d..49c65be3e 100644 --- a/R/between.R +++ b/R/between.R @@ -5,6 +5,7 @@ between = function(x, lower, upper, incbounds=TRUE, NAbounds=TRUE, check=FALSE, if (is.logical(upper)) upper = as.integer(upper) # typically NA (which is logical type) is.px = function(x) inherits(x, "POSIXct") is.i64 = function(x) inherits(x, "integer64") # this is true for nanotime too + is.Date = function(x) inherits(x, "Date") # POSIXct special handling to auto coerce character if (is.px(x) && (is.character(lower) || is.character(upper))) { tz = attr(x, "tzone", exact=TRUE) @@ -33,7 +34,14 @@ between = function(x, lower, upper, incbounds=TRUE, NAbounds=TRUE, check=FALSE, if (!is.i64(lower) && (is.integer(lower) || fitsInInt64(lower))) lower = bit64::as.integer64(lower) if (!is.i64(upper) && (is.integer(upper) || fitsInInt64(upper))) upper = bit64::as.integer64(upper) } - is.supported = function(x) is.numeric(x) || is.character(x) || is.px(x) + # Date special handling #7281 + if (is.Date(x)) { + if (is.character(lower)) lower = tryCatch(as.Date(lower), error = function(e) stopf( + "The 'x' argument of the 'between' function is Date while '%s' was not, coercion to Date failed with: %s", 'lower', conditionMessage(e))) + if (is.character(upper)) upper = tryCatch(as.Date(upper), error = function(e) stopf( + "The 'x' argument of the 'between' function is Date while '%s' was not, coercion to Date failed with: %s", 'upper', conditionMessage(e))) + } + is.supported = function(x) is.numeric(x) || is.character(x) || is.px(x) || is.Date(x) if (is.supported(x) && is.supported(lower) && is.supported(upper)) { # faster parallelised version for int/double/character # Cbetween supports length(lower)==1 (recycled) and (from v1.12.0) length(lower)==length(x). diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 59c2218e6..6db8f25bb 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14688,6 +14688,28 @@ test(2038.34, between(1:10, -Inf, Inf), rep(TRUE,10), output="between test(2038.35, between(as.double(1:10), -Inf, Inf), rep(TRUE,10), output="between parallel processing of double with closed bounds") options(old) +# `between` supports Date/IDate with missing bounds, #7281 +x = data.table( + Date = as.Date("2020-01-01") + 0:5, + Active = c(TRUE, FALSE, TRUE, FALSE, TRUE, FALSE), + InRange = c(TRUE, NA, FALSE, NA, TRUE, NA), + RangeBegin = as.Date("2020-01-01") + c(-1L, NA, 3L, NA, 3L, NA), + RangeEnd = as.Date("2020-01-01") + c( 1L, NA, 4L, NA, 5L, NA) +) +x[Active == TRUE, InRange := c(TRUE, FALSE, TRUE)] +x[InRange == TRUE, c("RangeBegin", "RangeEnd") := list(Date - 1L, Date + 1L)] +x[InRange == FALSE, c("RangeBegin", "RangeEnd") := list(Date + 1L, Date + 2L)] +test(2038.36, x[Active == TRUE & between(Date, RangeBegin, RangeEnd), Date], as.Date(c("2020-01-01", "2020-01-05"))) +# Date character bounds coercion +test(2038.37, between(x$Date, "2020-01-02", "2020-01-04"), c(FALSE, TRUE, TRUE, TRUE, FALSE, FALSE)) +x[, c("Date", "RangeBegin", "RangeEnd") := lapply(.SD, as.IDate), .SDcols = c("Date", "RangeBegin", "RangeEnd")] +test(2038.38, x[Active == TRUE & between(Date, RangeBegin, RangeEnd), Date], as.IDate(c("2020-01-01", "2020-01-05"))) + +# errors name the bound that failed coercion +x = as.Date("2020-01-01") + 0:4 +test(2038.39, between(x, "not-a-date", "2020-01-04"), error="Date while 'lower' was not, coercion to Date failed") +test(2038.40, between(x, "2020-01-02", "not-a-date"), error="Date while 'upper' was not, coercion to Date failed") + # between int64 support if (test_bit64) { as.i64 = bit64::as.integer64