Replace regex-based date/time parsing with manual parser (#520)

* Replace regex-based date/time string parsing with manually written parsing code.

* Add date/time parser tests.
This commit is contained in:
MeanSquaredError
2023-09-07 07:23:44 +03:00
committed by GitHub
parent 1cd47c77dd
commit 25bca54ba7
6 changed files with 544 additions and 111 deletions

View File

@@ -27,100 +27,237 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <regex>
#include <sqlpp11/chrono.h>
#include <cctype>
namespace sqlpp
{
namespace detail
{
// Parse a date string formatted as YYYY-MM-DD
//
inline bool parse_string_date(::sqlpp::chrono::day_point& value, const char* date_string)
inline bool parse_unsigned(int& value, const char*& input, int length)
{
static const std::regex rx{"(\\d{4})-(\\d{2})-(\\d{2})"};
std::cmatch mr;
if (std::regex_match(date_string, mr, rx) == false)
value = 0;
auto new_input = input;
while (length--)
{
return false;
auto ch = *new_input++;
if (std::isdigit(ch) == false)
{
return false;
}
value = value * 10 + ch - '0';
}
value = ::sqlpp::chrono::day_point{
::date::year{std::atoi(date_string + mr.position(1))} / // Year
std::atoi(date_string + mr.position(2)) / // Month
std::atoi(date_string + mr.position(3)) // Day of month
};
input = new_input;
return true;
}
// Parse a date string formatted as YYYY-MM-DD HH:MM:SS.US TZ
// .US are optional fractional seconds, up to 6 digits in length
// TZ is an optional time zone offset formatted as +HH[:MM] or -HH[:MM]
//
inline bool parse_string_date_time(::sqlpp::chrono::microsecond_point& value, const char* date_time_string)
inline bool parse_character(const char*& input, char ch)
{
static const std::regex rx{
"(\\d{4})-(\\d{2})-(\\d{2}) "
"(\\d{2}):(\\d{2}):(\\d{2})(?:\\.(\\d{1,6}))?"
"(?:([+-])(\\d{2})(?::(\\d{2})(?::(\\d{2}))?)?)?"
};
std::cmatch mr;
if (std::regex_match(date_time_string, mr, rx) == false)
if (*input != ch)
{
return false;
}
value =
::sqlpp::chrono::day_point{
::date::year{std::atoi(date_time_string + mr.position(1))} / // Year
std::atoi(date_time_string + mr.position(2)) / // Month
std::atoi(date_time_string + mr.position(3)) // Day of month
} +
std::chrono::hours{std::atoi(date_time_string + mr.position(4))} + // Hour
std::chrono::minutes{std::atoi(date_time_string + mr.position(5))} + // Minute
std::chrono::seconds{std::atoi(date_time_string + mr.position(6))} + // Second
::std::chrono::microseconds{ // Second fraction
mr[7].matched ? std::stoi((mr[7].str() + "000000").substr(0, 6)) : 0
};
if (mr[8].matched)
++input;
return true;
}
inline bool parse_yyyy_mm_dd(sqlpp::chrono::day_point& dp, const char*& input)
{
auto new_input = input;
int year, month, day;
if ((parse_unsigned(year, new_input, 4) == false) || (parse_character(new_input, '-') == false) ||
(parse_unsigned(month, new_input, 2) == false) || (parse_character(new_input, '-') == false) ||
(parse_unsigned(day, new_input, 2) == false))
{
const auto tz_sign = (date_time_string[mr.position(8)] == '+') ? 1 : -1;
const auto tz_offset =
std::chrono::hours{std::atoi(date_time_string + mr.position(9))} +
std::chrono::minutes{mr[10].matched ? std::atoi(date_time_string + mr.position(10)) : 0} +
std::chrono::seconds{mr[11].matched ? std::atoi(date_time_string + mr.position(11)) : 0};
value -= tz_sign * tz_offset;
return false;
}
dp = ::date::year{year} / month / day;
input = new_input;
return true;
}
inline bool parse_hh_mm_ss(std::chrono::microseconds& us, const char*& input)
{
auto new_input = input;
int hour, minute, second;
if ((parse_unsigned(hour, new_input, 2) == false) || (parse_character(new_input, ':') == false) ||
(parse_unsigned(minute, new_input, 2) == false) || (parse_character(new_input, ':') == false) ||
(parse_unsigned(second, new_input, 2) == false))
{
return false;
}
// Strings that have valid format but year, month and/or day values that fall outside of the
// correct ranges are still mapped to day_point values. For the exact rules of the mapping see
// https://en.cppreference.com/w/cpp/chrono/year_month_day/operator_days
us = std::chrono::hours{hour} + std::chrono::minutes{minute} + std::chrono::seconds{second};
input = new_input;
return true;
}
inline bool parse_ss_fraction(std::chrono::microseconds& us, const char*& input)
{
auto new_input = input;
if (parse_character(new_input, '.') == false)
{
return false;
}
int value = 0;
int len_max = 6;
int len_actual;
for (len_actual = 0; (len_actual < len_max) && std::isdigit(*new_input); ++len_actual, ++new_input)
{
value = value * 10 + *new_input - '0';
}
if (len_actual == 0)
{
return false;
}
for (; len_actual < len_max; ++len_actual)
{
value *= 10;
}
us = std::chrono::microseconds{value};
input = new_input;
return true;
}
inline bool parse_tz(std::chrono::microseconds& offset, const char*& input)
{
auto new_input = input;
int tz_sign;
if (parse_character(new_input, '+'))
{
tz_sign = 1;
}
else if (parse_character(new_input, '-'))
{
tz_sign = -1;
}
else
{
return false;
}
int hour;
if (parse_unsigned(hour, new_input, 2) == false)
{
return false;
}
offset = tz_sign * std::chrono::hours{hour};
input = new_input;
int minute;
if ((parse_character(new_input, ':') == false) || (parse_unsigned(minute, new_input, 2) == false))
{
return true;
}
offset += tz_sign * std::chrono::minutes{minute};
input = new_input;
int second;
if ((parse_character(new_input, ':') == false) || (parse_unsigned(second, new_input, 2) == false))
{
return true;
}
offset += tz_sign * std::chrono::seconds{second};
input = new_input;
return true;
}
inline bool parse_hh_mm_ss_us_tz(std::chrono::microseconds& us, const char*& input)
{
if (parse_hh_mm_ss(us, input) == false)
{
return false;
}
std::chrono::microseconds fraction;
if (parse_ss_fraction(fraction, input))
{
us += fraction;
}
std::chrono::microseconds tz_offset;
if (parse_tz(tz_offset, input))
{
us -= tz_offset;
}
return true;
}
// Parse a time string formatted as HH:MM:SS[.US][ TZ]
// .US is up to 6 digits in length
// TZ is an optional time zone offset formatted as +HH[:MM] or -HH[:MM]
// Parse timestamp formatted as YYYY-MM-DD HH:MM:SS.U+HH:MM:SS
// The microseconds and timezone offset are optional
//
inline bool parse_string_time_of_day(::std::chrono::microseconds& value, const char* time_string)
inline bool parse_timestamp(sqlpp::chrono::microsecond_point& tp, const char* date_time_string)
{
static const std::regex rx{
"(\\d{2}):(\\d{2}):(\\d{2})(?:\\.(\\d{1,6}))?"
"(?:([+-])(\\d{2})(?::(\\d{2})(?::(\\d{2}))?)?)?"
};
std::cmatch mr;
if (std::regex_match (time_string, mr, rx) == false)
sqlpp::chrono::day_point parsed_ymd;
std::chrono::microseconds parsed_tod;
if ((parse_yyyy_mm_dd(parsed_ymd, date_time_string) == false) ||
(parse_character(date_time_string, ' ') == false) ||
(parse_hh_mm_ss_us_tz(parsed_tod, date_time_string) == false))
{
return false;
}
value =
std::chrono::hours{std::atoi(time_string + mr.position(1))} + // Hour
std::chrono::minutes{std::atoi(time_string + mr.position(2))} + // Minute
std::chrono::seconds{std::atoi(time_string + mr.position(3))} + // Second
::std::chrono::microseconds{ // Second fraction
mr[4].matched ? std::stoi((mr[4].str() + "000000").substr(0, 6)) : 0
};
if (mr[5].matched)
if (*date_time_string)
{
const auto tz_sign = (time_string[mr.position(5)] == '+') ? 1 : -1;
const auto tz_offset =
std::chrono::hours{std::atoi(time_string + mr.position(6))} +
std::chrono::minutes{mr[7].matched ? std::atoi(time_string + mr.position(7)) : 0} +
std::chrono::seconds{mr[8].matched ? std::atoi(time_string + mr.position(8)) : 0};
value -= tz_sign * tz_offset;
return false;
}
tp = parsed_ymd + parsed_tod;
return true;
}
// Parse date string formatted as YYYY-MM-DD
//
inline bool parse_date(sqlpp::chrono::day_point& dp, const char* date_string)
{
if (parse_yyyy_mm_dd(dp, date_string) == false)
{
return false;
}
if (*date_string)
{
return false;
}
return true;
}
// Parse time string formatted as YYYY-MM-DD HH:MM:SS.U+HH:MM:SS
// The time-of-day part is optional
//
inline bool parse_date_or_timestamp(sqlpp::chrono::microsecond_point& tp, const char* date_time_string)
{
sqlpp::chrono::day_point parsed_ymd;
if (parse_yyyy_mm_dd(parsed_ymd, date_time_string) == false)
{
return false;
}
if (*date_time_string == 0)
{
tp = parsed_ymd;
return true;
}
std::chrono::microseconds parsed_tod;
if ((parse_character(date_time_string, ' ') == false) ||
(parse_hh_mm_ss_us_tz(parsed_tod, date_time_string) == false))
{
return false;
}
if (*date_time_string == 0)
{
tp = parsed_ymd + parsed_tod;
return true;
}
return false;
}
// Parse time of day string formatted as HH:MM:SS.U+HH:MM:SS
// The microseconds and timezone offset are optional
//
inline bool parse_time_of_day(std::chrono::microseconds& us, const char* time_string)
{
if (parse_hh_mm_ss_us_tz(us, time_string) == false)
{
return false;
}
if (*time_string)
{
return false;
}
return true;
}

View File

@@ -152,7 +152,7 @@ namespace sqlpp
if (_handle->debug)
std::cerr << "MySQL debug: date string: " << date_string << std::endl;
if (::sqlpp::detail::parse_string_date(*value, date_string) == false)
if (::sqlpp::detail::parse_date(*value, date_string) == false)
{
if (_handle->debug)
std::cerr << "MySQL debug: invalid date result: " << date_string << std::endl;
@@ -175,7 +175,7 @@ namespace sqlpp
if (_handle->debug)
std::cerr << "MySQL debug: date_time string: " << date_time_string << std::endl;
if (::sqlpp::detail::parse_string_date_time(*value, date_time_string) == false)
if (::sqlpp::detail::parse_timestamp(*value, date_time_string) == false)
{
if (_handle->debug)
std::cerr << "MySQL debug: invalid date_time result: " << date_time_string << std::endl;

View File

@@ -239,7 +239,7 @@ namespace sqlpp
{
std::cerr << "PostgreSQL debug: date string: " << date_string << std::endl;
}
if (::sqlpp::detail::parse_string_date(*value, date_string) == false)
if (::sqlpp::detail::parse_date(*value, date_string) == false)
{
if (_handle->debug())
{
@@ -269,7 +269,7 @@ namespace sqlpp
{
std::cerr << "PostgreSQL debug: got date_time string: " << date_string << std::endl;
}
if (::sqlpp::detail::parse_string_date_time(*value, date_string) == false)
if (::sqlpp::detail::parse_timestamp(*value, date_string) == false)
{
if (_handle->debug())
{
@@ -301,7 +301,7 @@ namespace sqlpp
std::cerr << "PostgreSQL debug: got time string: " << time_string << std::endl;
}
if (::sqlpp::detail::parse_string_time_of_day(*value, time_string) == false)
if (::sqlpp::detail::parse_time_of_day(*value, time_string) == false)
{
if (_handle->debug()) {
std::cerr << "PostgreSQL debug: got invalid time '" << time_string << "'" << std::endl;

View File

@@ -34,7 +34,6 @@
#include <iostream>
#include <memory>
#include <regex>
#ifdef _MSC_VER
#include <iso646.h>
@@ -46,40 +45,6 @@ namespace sqlpp
{
namespace sqlite3
{
namespace detail
{
// Parse a date string formatted as YYYY-MM-DD[ HH:MM:SS[.US]]
//
inline bool parse_string_date_opt_time(::sqlpp::chrono::microsecond_point& value, const char* date_time_string)
{
static const std::regex rx{
"(\\d{4})-(\\d{2})-(\\d{2})"
"(?: (\\d{2}):(\\d{2}):(\\d{2})(?:\\.(\\d{1,6}))?)?"
};
std::cmatch mr;
if (std::regex_match(date_time_string, mr, rx) == false)
{
return false;
}
value = ::sqlpp::chrono::day_point{
::date::year{std::atoi(date_time_string + mr.position(1))} / // Year
std::atoi(date_time_string + mr.position(2)) / // Month
std::atoi(date_time_string + mr.position(3)) // Day of month
};
if (mr[4].matched)
{
value +=
std::chrono::hours{std::atoi(date_time_string + mr.position(4))} + // Hour
std::chrono::minutes{std::atoi(date_time_string + mr.position(5))} + // Minute
std::chrono::seconds{std::atoi(date_time_string + mr.position(6))} + // Second
::std::chrono::microseconds{ // Second fraction
mr[7].matched ? std::stoi((mr[7].str() + "000000").substr(0, 6)) : 0
};
}
return true;
}
} // namespace detail
class SQLPP11_SQLITE3_EXPORT bind_result_t
{
std::shared_ptr<detail::prepared_statement_handle_t> _handle;
@@ -208,7 +173,7 @@ namespace sqlpp
reinterpret_cast<const char*>(sqlite3_column_text(_handle->sqlite_statement, static_cast<int>(index)));
if (_handle->debug)
std::cerr << "Sqlite3 debug: date string: " << date_string << std::endl;
if (::sqlpp::detail::parse_string_date(*value, date_string) == false)
if (::sqlpp::detail::parse_date(*value, date_string) == false)
{
if (_handle->debug)
std::cerr << "Sqlite3 debug: invalid date result: " << date_string << std::endl;
@@ -232,7 +197,7 @@ namespace sqlpp
if (_handle->debug)
std::cerr << "Sqlite3 debug: date_time string: " << date_time_string << std::endl;
// We treat DATETIME fields as containing either date+time or just date.
if (detail::parse_string_date_opt_time(*value, date_time_string) == false)
if (::sqlpp::detail::parse_date_or_timestamp(*value, date_time_string) == false)
{
if (_handle->debug)
std::cerr << "Sqlite3 debug: invalid date_time result: " << date_time_string << std::endl;