mirror of
https://github.com/Kitware/CMake.git
synced 2026-01-06 13:51:33 -06:00
string: Allow zero-length matches in all REGEX subcommands
The semantics mimic other languages like Python, Java, JS, etc. To advance past a zero-length match, the search algorithm first tries to find a non-zero alternative branch. If that fails, it force-advances by 1 character. Fixes: #13790, #13792, #18690, #26629
This commit is contained in:
@@ -122,6 +122,9 @@ Search and Replace With Regular Expressions
|
||||
string instead of the beginning of each repeated search.
|
||||
See policy :policy:`CMP0186`.
|
||||
|
||||
Zero-length matches are allowed in ``MATCHALL`` and ``REPLACE``.
|
||||
Previously, they triggered an error.
|
||||
|
||||
The replacement expression may contain references to subexpressions that
|
||||
didn't match anything. Previously, such references triggered an error.
|
||||
|
||||
|
||||
@@ -6,3 +6,5 @@ regex-fixes
|
||||
|
||||
* References to unmatched groups are allowed, they are replaced with empty
|
||||
strings.
|
||||
|
||||
* Zero-length matches are always allowed.
|
||||
|
||||
@@ -251,15 +251,7 @@ bool RegexMatch(std::vector<std::string> const& args,
|
||||
std::string output;
|
||||
if (re.find(input)) {
|
||||
status.GetMakefile().StoreMatches(re);
|
||||
std::string::size_type l = re.start();
|
||||
std::string::size_type r = re.end();
|
||||
if (r - l == 0) {
|
||||
std::string e = "sub-command REGEX, mode MATCH regex \"" + regex +
|
||||
"\" matched an empty string.";
|
||||
status.SetError(e);
|
||||
return false;
|
||||
}
|
||||
output = input.substr(l, r - l);
|
||||
output = re.match();
|
||||
}
|
||||
|
||||
// Store the output in the provided variable.
|
||||
@@ -298,22 +290,24 @@ bool RegexMatchAll(std::vector<std::string> const& args,
|
||||
// Scan through the input for all matches.
|
||||
std::string output;
|
||||
std::string::size_type base = 0;
|
||||
while (re.find(input, base, optAnchor)) {
|
||||
unsigned optNonEmpty = 0;
|
||||
while (re.find(input, base, optAnchor | optNonEmpty)) {
|
||||
status.GetMakefile().ClearMatches();
|
||||
status.GetMakefile().StoreMatches(re);
|
||||
std::string::size_type l = re.start();
|
||||
std::string::size_type r = re.end();
|
||||
if (r - l == 0) {
|
||||
std::string e = "sub-command REGEX, mode MATCHALL regex \"" + regex +
|
||||
"\" matched an empty string.";
|
||||
status.SetError(e);
|
||||
return false;
|
||||
}
|
||||
if (!output.empty()) {
|
||||
if (!output.empty() || optNonEmpty) {
|
||||
output += ";";
|
||||
}
|
||||
output += re.match();
|
||||
base = r;
|
||||
base = re.end();
|
||||
|
||||
if (re.start() == input.length()) {
|
||||
break;
|
||||
}
|
||||
if (re.start() == re.end()) {
|
||||
optNonEmpty = cmsys::RegularExpression::NONEMPTY_AT_OFFSET;
|
||||
} else {
|
||||
optNonEmpty = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Store the output in the provided variable.
|
||||
|
||||
@@ -33,25 +33,17 @@ bool cmStringReplaceHelper::Replace(std::string const& input,
|
||||
}
|
||||
|
||||
// Scan through the input for all matches.
|
||||
auto& re = this->RegularExpression;
|
||||
std::string::size_type base = 0;
|
||||
while (this->RegularExpression.find(input, base, optAnchor)) {
|
||||
unsigned optNonEmpty = 0;
|
||||
while (re.find(input, base, optAnchor | optNonEmpty)) {
|
||||
if (this->Makefile) {
|
||||
this->Makefile->ClearMatches();
|
||||
this->Makefile->StoreMatches(this->RegularExpression);
|
||||
this->Makefile->StoreMatches(re);
|
||||
}
|
||||
auto l2 = this->RegularExpression.start();
|
||||
auto r = this->RegularExpression.end();
|
||||
|
||||
// Concatenate the part of the input that was not matched.
|
||||
output += input.substr(base, l2 - base);
|
||||
|
||||
// Make sure the match had some text.
|
||||
if (r - l2 == 0) {
|
||||
std::ostringstream error;
|
||||
error << "regex \"" << this->RegExString << "\" matched an empty string";
|
||||
this->ErrorString = error.str();
|
||||
return false;
|
||||
}
|
||||
output += input.substr(base, re.start() - base);
|
||||
|
||||
// Concatenate the replacement for the match.
|
||||
for (auto const& replacement : this->Replacements) {
|
||||
@@ -61,7 +53,7 @@ bool cmStringReplaceHelper::Replace(std::string const& input,
|
||||
} else {
|
||||
// Replace with part of the match.
|
||||
auto n = replacement.Number;
|
||||
if (n > this->RegularExpression.num_groups()) {
|
||||
if (n > re.num_groups()) {
|
||||
std::ostringstream error;
|
||||
error << "replace expression \"" << this->ReplaceExpression
|
||||
<< "\" contains an out-of-range escape for regex \""
|
||||
@@ -69,12 +61,21 @@ bool cmStringReplaceHelper::Replace(std::string const& input,
|
||||
this->ErrorString = error.str();
|
||||
return false;
|
||||
}
|
||||
output += this->RegularExpression.match(n);
|
||||
output += re.match(n);
|
||||
}
|
||||
}
|
||||
|
||||
// Move past the match.
|
||||
base = r;
|
||||
base = re.end();
|
||||
|
||||
if (re.start() == input.length()) {
|
||||
break;
|
||||
}
|
||||
if (re.start() == re.end()) {
|
||||
optNonEmpty = cmsys::RegularExpression::NONEMPTY_AT_OFFSET;
|
||||
} else {
|
||||
optNonEmpty = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Concatenate the text after the last match.
|
||||
|
||||
@@ -84,7 +84,7 @@ check_cmake_test(String
|
||||
# Execute each test listed in StringTestScript.cmake:
|
||||
#
|
||||
set(scriptname "@CMAKE_CURRENT_SOURCE_DIR@/StringTestScript.cmake")
|
||||
set(number_of_tests_expected 73)
|
||||
set(number_of_tests_expected 70)
|
||||
|
||||
include("@CMAKE_CURRENT_SOURCE_DIR@/ExecuteScriptTests.cmake")
|
||||
execute_all_script_tests(${scriptname} number_of_tests_executed)
|
||||
|
||||
@@ -73,9 +73,6 @@ elseif(testname STREQUAL regex_match_multiple_inputs) # pass
|
||||
elseif(testname STREQUAL regex_match_bad_regex) # fail
|
||||
string(REGEX MATCH "(.*" v input)
|
||||
|
||||
elseif(testname STREQUAL regex_match_empty_string) # fail
|
||||
string(REGEX MATCH "x*" v "")
|
||||
|
||||
elseif(testname STREQUAL regex_match_no_match) # pass
|
||||
string(REGEX MATCH "xyz" v "abc")
|
||||
message(STATUS "v='${v}'")
|
||||
@@ -87,9 +84,6 @@ elseif(testname STREQUAL regex_matchall_multiple_inputs) # pass
|
||||
elseif(testname STREQUAL regex_matchall_bad_regex) # fail
|
||||
string(REGEX MATCHALL "(.*" v input)
|
||||
|
||||
elseif(testname STREQUAL regex_matchall_empty_string) # fail
|
||||
string(REGEX MATCHALL "x*" v "")
|
||||
|
||||
elseif(testname STREQUAL regex_replace_ends_with_backslash) # fail
|
||||
string(REGEX REPLACE "input" "output\\" v input1 input2 input3 input4)
|
||||
|
||||
@@ -107,9 +101,6 @@ elseif(testname STREQUAL regex_replace_has_bogus_escape) # fail
|
||||
elseif(testname STREQUAL regex_replace_bad_regex) # fail
|
||||
string(REGEX REPLACE "this (.*" "with that" v input)
|
||||
|
||||
elseif(testname STREQUAL regex_replace_empty_string) # fail
|
||||
string(REGEX REPLACE "x*" "that" v "")
|
||||
|
||||
elseif(testname STREQUAL regex_replace_index_too_small) # fail
|
||||
string(REGEX REPLACE "^this (.*)$" "with \\1 \\-1" v "this input")
|
||||
|
||||
|
||||
143
Tests/RunCMake/string/RegexEmptyMatch.cmake
Normal file
143
Tests/RunCMake/string/RegexEmptyMatch.cmake
Normal file
@@ -0,0 +1,143 @@
|
||||
cmake_policy(SET CMP0186 NEW)
|
||||
|
||||
function(check_output name expected)
|
||||
set(output "${${name}}")
|
||||
if(NOT output STREQUAL expected)
|
||||
message(FATAL_ERROR "\"string(REGEX)\" set ${name} to \"${output}\", expected \"${expected}\"")
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
# Zero-length matches in REGEX MATCH
|
||||
|
||||
string(REGEX MATCH "" out "")
|
||||
check_output(out "")
|
||||
|
||||
string(REGEX MATCH "" out "a")
|
||||
check_output(out "")
|
||||
|
||||
string(REGEX MATCH "a*" out "")
|
||||
check_output(out "")
|
||||
|
||||
string(REGEX MATCH "a*" out "a")
|
||||
check_output(out "a")
|
||||
|
||||
string(REGEX MATCH "a*" out "b")
|
||||
check_output(out "")
|
||||
|
||||
string(REGEX MATCH "a*" out "ba")
|
||||
check_output(out "")
|
||||
|
||||
# Zero-length matches in REGEX MATCHALL
|
||||
|
||||
string(REGEX MATCHALL "" out "")
|
||||
check_output(out "")
|
||||
|
||||
string(REGEX MATCHALL "" out "ab")
|
||||
check_output(out ";;")
|
||||
|
||||
string(REGEX MATCHALL "^" out "ab")
|
||||
check_output(out "")
|
||||
|
||||
string(REGEX MATCHALL "(^|,)" out "a,b")
|
||||
check_output(out ";,")
|
||||
|
||||
string(REGEX MATCHALL "(,|^)" out "a,b")
|
||||
check_output(out ";,")
|
||||
|
||||
string(REGEX MATCHALL "(^|)" out "")
|
||||
check_output(out "")
|
||||
|
||||
string(REGEX MATCHALL "(^|)" out "ab")
|
||||
check_output(out ";;")
|
||||
|
||||
string(REGEX MATCHALL "a|^" out "ab")
|
||||
check_output(out "a")
|
||||
|
||||
string(REGEX MATCHALL "$" out "ab")
|
||||
check_output(out "")
|
||||
|
||||
string(REGEX MATCHALL "($|,)" out "a,b")
|
||||
check_output(out ",;")
|
||||
|
||||
string(REGEX MATCHALL "(,|$)" out "a,b")
|
||||
check_output(out ",;")
|
||||
|
||||
string(REGEX MATCHALL "(|$)" out "")
|
||||
check_output(out "")
|
||||
|
||||
string(REGEX MATCHALL "(|$)" out "ab")
|
||||
check_output(out ";;")
|
||||
|
||||
string(REGEX MATCHALL "(b|)" out "abc")
|
||||
check_output(out ";b;;")
|
||||
|
||||
string(REGEX MATCHALL "(|b)" out "abc")
|
||||
check_output(out ";;b;;")
|
||||
|
||||
string(REGEX MATCHALL "a*" out "aaa")
|
||||
check_output(out "aaa;")
|
||||
|
||||
string(REGEX MATCHALL "(a)?(b)?" out "")
|
||||
check_output(out "")
|
||||
|
||||
string(REGEX MATCHALL "(a)?(b)?" out "abba")
|
||||
check_output(out "ab;b;a;")
|
||||
|
||||
# Zero-length matches in REGEX REPLACE
|
||||
|
||||
string(REGEX REPLACE "" "" out "")
|
||||
check_output(out "")
|
||||
|
||||
string(REGEX REPLACE "" "x" out "")
|
||||
check_output(out "x")
|
||||
|
||||
string(REGEX REPLACE "" "x" out "ab")
|
||||
check_output(out "xaxbx")
|
||||
|
||||
string(REGEX REPLACE "^" "x" out "ab")
|
||||
check_output(out "xab")
|
||||
|
||||
string(REGEX REPLACE "(^|,)" "x" out "a,b")
|
||||
check_output(out "xaxb")
|
||||
|
||||
string(REGEX REPLACE "(,|^)" "x" out "a,b")
|
||||
check_output(out "xaxb")
|
||||
|
||||
string(REGEX REPLACE "(^|)" "x" out "")
|
||||
check_output(out "x")
|
||||
|
||||
string(REGEX REPLACE "(^|)" "x" out "ab")
|
||||
check_output(out "xaxbx")
|
||||
|
||||
string(REGEX REPLACE "a|^" "x" out "ab")
|
||||
check_output(out "xb")
|
||||
|
||||
string(REGEX REPLACE "$" "x" out "ab")
|
||||
check_output(out "abx")
|
||||
|
||||
string(REGEX REPLACE "($|,)" "x" out "a,b")
|
||||
check_output(out "axbx")
|
||||
|
||||
string(REGEX REPLACE "(,|$)" "x" out "a,b")
|
||||
check_output(out "axbx")
|
||||
|
||||
string(REGEX REPLACE "(|$)" "x" out "")
|
||||
check_output(out "x")
|
||||
|
||||
string(REGEX REPLACE "(|$)" "x" out "ab")
|
||||
check_output(out "xaxbx")
|
||||
|
||||
string(REGEX REPLACE "(b|)" "x" out "abc")
|
||||
check_output(out "xaxxcx")
|
||||
|
||||
string(REGEX REPLACE "(|b)" "x" out "abc")
|
||||
check_output(out "xaxxxcx")
|
||||
|
||||
string(REGEX REPLACE "a*" "x" out "aaa")
|
||||
check_output(out "xx")
|
||||
|
||||
string(REGEX REPLACE "(a)?(b)?" "x" out "")
|
||||
check_output(out "x")
|
||||
|
||||
string(REGEX REPLACE "(a)?(b)?" "x" out "abba")
|
||||
check_output(out "xxxx")
|
||||
@@ -35,6 +35,7 @@ run_cmake(UuidBadType)
|
||||
|
||||
run_cmake(RegexClear)
|
||||
run_cmake(RegexMultiMatchClear)
|
||||
run_cmake(RegexEmptyMatch)
|
||||
run_cmake(CMP0186)
|
||||
|
||||
run_cmake(UTF-16BE)
|
||||
|
||||
Reference in New Issue
Block a user