[libc++][regex] Validate backreferences in the constructor.

This patch enables throwing exceptions for invalid backreferences
in the constructor when using the basic, extended,  grep, or egrep grammar.

This fixes bug 34297.

Differential Revision: https://reviews.llvm.org/D62453
This commit is contained in:
Mark de Wever 2020-02-20 18:13:38 -05:00 committed by Louis Dionne
parent e5782377f3
commit 72ce0c8073
2 changed files with 53 additions and 5 deletions

View File

@ -4661,6 +4661,8 @@ basic_regex<_CharT, _Traits>::__test_back_ref(_CharT c)
unsigned __val = __traits_.value(c, 10);
if (__val >= 1 && __val <= 9)
{
if (__val > mark_count())
__throw_regex_error<regex_constants::error_backref>();
__push_back_ref(__val);
return true;
}

View File

@ -18,11 +18,11 @@
#include <cassert>
#include "test_macros.h"
static bool error_badbackref_thrown(const char *pat)
static bool error_badbackref_thrown(const char *pat, std::regex::flag_type f)
{
bool result = false;
try {
std::regex re(pat);
std::regex re(pat, f);
} catch (const std::regex_error &ex) {
result = (ex.code() == std::regex_constants::error_backref);
}
@ -31,9 +31,25 @@ static bool error_badbackref_thrown(const char *pat)
int main(int, char**)
{
assert(error_badbackref_thrown("\\1abc")); // no references
assert(error_badbackref_thrown("ab(c)\\2def")); // only one reference
assert(error_badbackref_thrown("\\800000000000000000000000000000")); // overflows
// no references
assert(error_badbackref_thrown("\\1abc", std::regex_constants::ECMAScript));
assert(error_badbackref_thrown("\\1abd", std::regex::basic));
assert(error_badbackref_thrown("\\1abd", std::regex::extended));
assert(error_badbackref_thrown("\\1abd", std::regex::awk) == false);
assert(error_badbackref_thrown("\\1abd", std::regex::grep));
assert(error_badbackref_thrown("\\1abd", std::regex::egrep));
// only one reference
assert(error_badbackref_thrown("ab(c)\\2def", std::regex_constants::ECMAScript));
assert(error_badbackref_thrown("ab\\(c\\)\\2def", std::regex_constants::basic));
assert(error_badbackref_thrown("ab(c)\\2def", std::regex_constants::extended));
assert(error_badbackref_thrown("ab\\(c\\)\\2def", std::regex_constants::awk) == false);
assert(error_badbackref_thrown("ab(c)\\2def", std::regex_constants::awk) == false);
assert(error_badbackref_thrown("ab\\(c\\)\\2def", std::regex_constants::grep));
assert(error_badbackref_thrown("ab(c)\\2def", std::regex_constants::egrep));
assert(error_badbackref_thrown("\\800000000000000000000000000000", std::regex_constants::ECMAScript)); // overflows
// this should NOT throw, because we only should look at the '1'
// See https://bugs.llvm.org/show_bug.cgi?id=31387
@ -42,5 +58,35 @@ int main(int, char**)
std::regex re(pat1, pat1 + 7); // extra chars after the end.
}
// reference before group
assert(error_badbackref_thrown("\\1(abc)", std::regex_constants::ECMAScript));
assert(error_badbackref_thrown("\\1\\(abd\\)", std::regex::basic));
assert(error_badbackref_thrown("\\1(abd)", std::regex::extended));
assert(error_badbackref_thrown("\\1(abd)", std::regex::awk) == false);
assert(error_badbackref_thrown("\\1\\(abd\\)", std::regex::awk) == false);
assert(error_badbackref_thrown("\\1\\(abd\\)", std::regex::grep));
assert(error_badbackref_thrown("\\1(abd)", std::regex::egrep));
// reference limit
assert(error_badbackref_thrown("(cat)\\10", std::regex::ECMAScript));
assert(error_badbackref_thrown("\\(cat\\)\\10", std::regex::basic) == false);
assert(error_badbackref_thrown("(cat)\\10", std::regex::extended) == false);
assert(error_badbackref_thrown("\\(cat\\)\\10", std::regex::awk) == false);
assert(error_badbackref_thrown("(cat)\\10", std::regex::awk) == false);
assert(error_badbackref_thrown("\\(cat\\)\\10", std::regex::grep) == false);
assert(error_badbackref_thrown("(cat)\\10", std::regex::egrep) == false);
// https://bugs.llvm.org/show_bug.cgi?id=34297
assert(error_badbackref_thrown("(cat)\\1", std::regex::basic));
assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::basic) == false);
assert(error_badbackref_thrown("(cat)\\1", std::regex::extended) == false);
assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::extended));
assert(error_badbackref_thrown("(cat)\\1", std::regex::awk) == false);
assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::awk) == false);
assert(error_badbackref_thrown("(cat)\\1", std::regex::grep));
assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::grep) == false);
assert(error_badbackref_thrown("(cat)\\1", std::regex::egrep) == false);
assert(error_badbackref_thrown("\\(cat\\)\\1", std::regex::egrep));
return 0;
}