mirror of https://github.com/microsoft/clang.git
Implement warning for non-wide string literals with an unexpected encoding. Downgrade error for non-wide character literals with an unexpected encoding to a warning for compatibility with gcc and older versions of clang. <rdar://problem/10837678>.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@150295 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d747efaad8
commit
91359302b8
|
@ -136,9 +136,16 @@ def err_unsupported_string_concat : Error<
|
|||
"unsupported non-standard concatenation of string literals">;
|
||||
def err_bad_string_encoding : Error<
|
||||
"illegal character encoding in string literal">;
|
||||
def warn_bad_string_encoding : ExtWarn<
|
||||
"illegal character encoding in string literal">,
|
||||
InGroup<DiagGroup<"invalid-source-encoding">>;
|
||||
def err_bad_character_encoding : Error<
|
||||
"illegal character encoding in character literal">;
|
||||
|
||||
def warn_bad_character_encoding : ExtWarn<
|
||||
"illegal character encoding in character literal">,
|
||||
InGroup<DiagGroup<"invalid-source-encoding">>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PTH Diagnostics
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
|
@ -199,6 +199,7 @@ public:
|
|||
private:
|
||||
void init(const Token *StringToks, unsigned NumStringToks);
|
||||
bool CopyStringFragment(StringRef Fragment);
|
||||
bool DiagnoseBadString(const Token& Tok);
|
||||
};
|
||||
|
||||
} // end namespace clang
|
||||
|
|
|
@ -822,17 +822,32 @@ CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
|
|||
++begin;
|
||||
} while (begin != end && *begin != '\\');
|
||||
|
||||
uint32_t *tmp_begin = buffer_begin;
|
||||
char const *tmp_in_start = start;
|
||||
uint32_t *tmp_out_start = buffer_begin;
|
||||
ConversionResult res =
|
||||
ConvertUTF8toUTF32(reinterpret_cast<UTF8 const **>(&start),
|
||||
reinterpret_cast<UTF8 const *>(begin),
|
||||
&buffer_begin,buffer_end,strictConversion);
|
||||
if (res!=conversionOK) {
|
||||
PP.Diag(Loc, diag::err_bad_character_encoding);
|
||||
HadError = true;
|
||||
// If we see bad encoding for unprefixed character literals, warn and
|
||||
// simply copy the byte values, for compatibility with gcc and
|
||||
// older versions of clang.
|
||||
bool NoErrorOnBadEncoding = isAscii();
|
||||
unsigned Msg = diag::err_bad_character_encoding;
|
||||
if (NoErrorOnBadEncoding)
|
||||
Msg = diag::warn_bad_character_encoding;
|
||||
PP.Diag(Loc, Msg);
|
||||
if (NoErrorOnBadEncoding) {
|
||||
start = tmp_in_start;
|
||||
buffer_begin = tmp_out_start;
|
||||
for ( ; start != begin; ++start, ++buffer_begin)
|
||||
*buffer_begin = static_cast<uint8_t>(*start);
|
||||
} else {
|
||||
HadError = true;
|
||||
}
|
||||
} else {
|
||||
for (; tmp_begin<buffer_begin; ++tmp_begin) {
|
||||
if (*tmp_begin > largest_character_for_kind) {
|
||||
for (; tmp_out_start <buffer_begin; ++tmp_out_start) {
|
||||
if (*tmp_out_start > largest_character_for_kind) {
|
||||
HadError = true;
|
||||
PP.Diag(Loc, diag::err_character_too_large);
|
||||
}
|
||||
|
@ -1097,10 +1112,8 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
|
|||
// Copy the string over
|
||||
if (CopyStringFragment(StringRef(ThisTokBuf,ThisTokEnd-ThisTokBuf)))
|
||||
{
|
||||
if (Diags)
|
||||
Diags->Report(FullSourceLoc(StringToks[i].getLocation(), SM),
|
||||
diag::err_bad_string_encoding);
|
||||
hadError = true;
|
||||
if (DiagnoseBadString(StringToks[i]))
|
||||
hadError = true;
|
||||
}
|
||||
|
||||
} else {
|
||||
|
@ -1131,10 +1144,8 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
|
|||
// Copy the character span over.
|
||||
if (CopyStringFragment(StringRef(InStart,ThisTokBuf-InStart)))
|
||||
{
|
||||
if (Diags)
|
||||
Diags->Report(FullSourceLoc(StringToks[i].getLocation(), SM),
|
||||
diag::err_bad_string_encoding);
|
||||
hadError = true;
|
||||
if (DiagnoseBadString(StringToks[i]))
|
||||
hadError = true;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
@ -1219,6 +1230,9 @@ bool StringLiteralParser::CopyStringFragment(StringRef Fragment) {
|
|||
ConversionResult result = conversionOK;
|
||||
// Copy the character span over.
|
||||
if (CharByteWidth == 1) {
|
||||
if (!isLegalUTF8Sequence(reinterpret_cast<const UTF8*>(Fragment.begin()),
|
||||
reinterpret_cast<const UTF8*>(Fragment.end())))
|
||||
result = sourceIllegal;
|
||||
memcpy(ResultPtr, Fragment.data(), Fragment.size());
|
||||
ResultPtr += Fragment.size();
|
||||
} else if (CharByteWidth == 2) {
|
||||
|
@ -1226,7 +1240,7 @@ bool StringLiteralParser::CopyStringFragment(StringRef Fragment) {
|
|||
// FIXME: Make the type of the result buffer correct instead of
|
||||
// using reinterpret_cast.
|
||||
UTF16 *targetStart = reinterpret_cast<UTF16*>(ResultPtr);
|
||||
ConversionFlags flags = lenientConversion;
|
||||
ConversionFlags flags = strictConversion;
|
||||
result = ConvertUTF8toUTF16(
|
||||
&sourceStart,sourceStart + Fragment.size(),
|
||||
&targetStart,targetStart + 2*Fragment.size(),flags);
|
||||
|
@ -1237,7 +1251,7 @@ bool StringLiteralParser::CopyStringFragment(StringRef Fragment) {
|
|||
// FIXME: Make the type of the result buffer correct instead of
|
||||
// using reinterpret_cast.
|
||||
UTF32 *targetStart = reinterpret_cast<UTF32*>(ResultPtr);
|
||||
ConversionFlags flags = lenientConversion;
|
||||
ConversionFlags flags = strictConversion;
|
||||
result = ConvertUTF8toUTF32(
|
||||
&sourceStart,sourceStart + Fragment.size(),
|
||||
&targetStart,targetStart + 4*Fragment.size(),flags);
|
||||
|
@ -1249,6 +1263,17 @@ bool StringLiteralParser::CopyStringFragment(StringRef Fragment) {
|
|||
return result != conversionOK;
|
||||
}
|
||||
|
||||
bool StringLiteralParser::DiagnoseBadString(const Token &Tok) {
|
||||
// If we see bad encoding for unprefixed string literals, warn and
|
||||
// simply copy the byte values, for compatibility with gcc and older
|
||||
// versions of clang.
|
||||
bool NoErrorOnBadEncoding = isAscii();
|
||||
unsigned Msg = NoErrorOnBadEncoding ? diag::warn_bad_string_encoding :
|
||||
diag::err_bad_string_encoding;
|
||||
if (Diags)
|
||||
Diags->Report(FullSourceLoc(Tok.getLocation(), SM), Msg);
|
||||
return !NoErrorOnBadEncoding;
|
||||
}
|
||||
|
||||
/// getOffsetOfStringByte - This function returns the offset of the
|
||||
/// specified byte of the string data represented by Token. This handles
|
||||
|
|
|
@ -3,8 +3,13 @@
|
|||
// This file is encoded using ISO-8859-1
|
||||
|
||||
int main() {
|
||||
'é'; // expected-error {{illegal character encoding in character literal}}
|
||||
u'é'; // expected-error {{illegal character encoding in character literal}}
|
||||
U'é'; // expected-error {{illegal character encoding in character literal}}
|
||||
L'é'; // expected-error {{illegal character encoding in character literal}}
|
||||
(void)'é'; // expected-warning {{illegal character encoding in character literal}}
|
||||
(void)u'é'; // expected-error {{illegal character encoding in character literal}}
|
||||
(void)U'é'; // expected-error {{illegal character encoding in character literal}}
|
||||
(void)L'é'; // expected-error {{illegal character encoding in character literal}}
|
||||
|
||||
// For narrow character literals, since there is no error, make sure the
|
||||
// encoding is correct
|
||||
static_assert((unsigned char)'é' == 0xE9, ""); // expected-warning {{illegal character encoding in character literal}}
|
||||
static_assert('éé' == 0xE9E9, ""); // expected-warning {{illegal character encoding in character literal}} expected-warning {{multi-character character constant}}
|
||||
}
|
||||
|
|
|
@ -12,4 +12,7 @@ void f() {
|
|||
wchar_t const *d = LR"(Àéîõü)"; // expected-error {{illegal character encoding in string literal}}
|
||||
char16_t const *e = uR"(Àéîõü)"; // expected-error {{illegal character encoding in string literal}}
|
||||
char32_t const *f = UR"(Àéîõü)"; // expected-error {{illegal character encoding in string literal}}
|
||||
|
||||
char const *g = "Àéîõü"; // expected-warning {{illegal character encoding in string literal}}
|
||||
char const *h = u8"Àéîõü"; // expected-error {{illegal character encoding in string literal}}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue