Factor out repeated code parsing and concatenating header-names from

tokens.

We now actually form an angled_string_literal token for a header name by
concatenation rather than just working out what its contents would be.
This substantially simplifies downstream processing and is necessary for
C++20 header unit imports.

llvm-svn: 356433
This commit is contained in:
Richard Smith 2019-03-19 01:51:19 +00:00
parent 5a8ea4ca94
commit b9b05100c5
9 changed files with 130 additions and 206 deletions

View File

@ -1273,6 +1273,9 @@ public:
/// Lex the next token for this preprocessor.
void Lex(Token &Result);
/// Lex a token, forming a header-name token if possible.
bool LexHeaderName(Token &Result, bool AllowConcatenation = true);
void LexAfterModuleImport(Token &Result);
void makeModuleVisible(Module *M, SourceLocation Loc);
@ -1866,22 +1869,6 @@ public:
/// Return true if we're in the top-level file, not in a \#include.
bool isInPrimaryFile() const;
/// Handle cases where the \#include name is expanded
/// from a macro as multiple tokens, which need to be glued together.
///
/// This occurs for code like:
/// \code
/// \#define FOO <x/y.h>
/// \#include FOO
/// \endcode
/// because in this case, "<x/y.h>" is returned as 7 tokens, not one.
///
/// This code concatenates and consumes tokens up to the '>' token. It
/// returns false if the > was found, otherwise it returns true if it finds
/// and consumes the EOD marker.
bool ConcatenateIncludeName(SmallString<128> &FilenameBuffer,
SourceLocation &End);
/// Lex an on-off-switch (C99 6.10.6p2) and verify that it is
/// followed by EOD. Return true if the token is not a valid on-off-switch.
bool LexOnOffSwitch(tok::OnOffSwitch &Result);

View File

@ -129,11 +129,7 @@ public:
//===--------------------------------------------------------------------===//
// Misc. lexing methods.
/// After the preprocessor has parsed a \#include, lex and
/// (potentially) macro expand the filename.
///
/// If the sequence parsed is not lexically legal, emit a diagnostic and
/// return a result EOD token.
/// Lex a token, producing a header-name token if possible.
void LexIncludeFilename(Token &FilenameTok);
/// Inform the lexer whether or not we are currently lexing a

View File

@ -1480,67 +1480,6 @@ bool Preprocessor::GetIncludeFilenameSpelling(SourceLocation Loc,
return isAngled;
}
// Handle cases where the \#include name is expanded from a macro
// as multiple tokens, which need to be glued together.
//
// This occurs for code like:
// \code
// \#define FOO <a/b.h>
// \#include FOO
// \endcode
// because in this case, "<a/b.h>" is returned as 7 tokens, not one.
//
// This code concatenates and consumes tokens up to the '>' token. It returns
// false if the > was found, otherwise it returns true if it finds and consumes
// the EOD marker.
bool Preprocessor::ConcatenateIncludeName(SmallString<128> &FilenameBuffer,
SourceLocation &End) {
Token CurTok;
Lex(CurTok);
while (CurTok.isNot(tok::eod)) {
End = CurTok.getLocation();
// FIXME: Provide code completion for #includes.
if (CurTok.is(tok::code_completion)) {
setCodeCompletionReached();
Lex(CurTok);
continue;
}
// Append the spelling of this token to the buffer. If there was a space
// before it, add it now.
if (CurTok.hasLeadingSpace())
FilenameBuffer.push_back(' ');
// Get the spelling of the token, directly into FilenameBuffer if possible.
size_t PreAppendSize = FilenameBuffer.size();
FilenameBuffer.resize(PreAppendSize+CurTok.getLength());
const char *BufPtr = &FilenameBuffer[PreAppendSize];
unsigned ActualLen = getSpelling(CurTok, BufPtr);
// If the token was spelled somewhere else, copy it into FilenameBuffer.
if (BufPtr != &FilenameBuffer[PreAppendSize])
memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen);
// Resize FilenameBuffer to the correct size.
if (CurTok.getLength() != ActualLen)
FilenameBuffer.resize(PreAppendSize+ActualLen);
// If we found the '>' marker, return success.
if (CurTok.is(tok::greater))
return false;
Lex(CurTok);
}
// If we hit the eod marker, emit an error and return true so that the caller
// knows the EOD has been read.
Diag(CurTok.getLocation(), diag::err_pp_expects_filename);
return true;
}
/// Push a token onto the token stream containing an annotation.
void Preprocessor::EnterAnnotationToken(SourceRange Range,
tok::TokenKind Kind,
@ -1671,43 +1610,23 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
const FileEntry *LookupFromFile,
bool isImport) {
Token FilenameTok;
CurPPLexer->LexIncludeFilename(FilenameTok);
// Reserve a buffer to get the spelling.
SmallString<128> FilenameBuffer;
StringRef Filename;
SourceLocation End;
SourceLocation CharEnd; // the end of this directive, in characters
switch (FilenameTok.getKind()) {
case tok::eod:
// If the token kind is EOD, the error has already been diagnosed.
if (LexHeaderName(FilenameTok))
return;
case tok::angle_string_literal:
case tok::string_literal:
Filename = getSpelling(FilenameTok, FilenameBuffer);
End = FilenameTok.getLocation();
CharEnd = End.getLocWithOffset(FilenameTok.getLength());
break;
case tok::less:
// This could be a <foo/bar.h> file coming from a macro expansion. In this
// case, glue the tokens together into FilenameBuffer and interpret those.
FilenameBuffer.push_back('<');
if (ConcatenateIncludeName(FilenameBuffer, End))
return; // Found <eod> but no ">"? Diagnostic already emitted.
Filename = FilenameBuffer;
CharEnd = End.getLocWithOffset(1);
break;
default:
if (!FilenameTok.isOneOf(tok::angle_string_literal, tok::string_literal)) {
Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
DiscardUntilEndOfDirective();
if (FilenameTok.isNot(tok::eod))
DiscardUntilEndOfDirective();
return;
}
SmallString<128> FilenameBuffer;
StringRef Filename = getSpelling(FilenameTok, FilenameBuffer);
SourceLocation CharEnd = FilenameTok.getEndLoc();
CharSourceRange FilenameRange
= CharSourceRange::getCharRange(FilenameTok.getLocation(), CharEnd);
SourceRange DirectiveRange(HashLoc, FilenameTok.getLocation());
StringRef OriginalFilename = Filename;
bool isAngled =
GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename);
@ -1808,10 +1727,11 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped,
/*IsFrameworkFound=*/nullptr);
if (File) {
SourceRange Range(FilenameTok.getLocation(), CharEnd);
Diag(FilenameTok, diag::err_pp_file_not_found_angled_include_not_fatal) <<
Filename <<
FixItHint::CreateReplacement(Range, "\"" + Filename.str() + "\"");
Diag(FilenameTok,
diag::err_pp_file_not_found_angled_include_not_fatal)
<< Filename
<< FixItHint::CreateReplacement(FilenameRange,
"\"" + Filename.str() + "\"");
}
}
@ -1845,12 +1765,12 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
Callbacks ? &RelativePath : nullptr, &SuggestedModule, &IsMapped,
/*IsFrameworkFound=*/nullptr);
if (File) {
SourceRange Range(FilenameTok.getLocation(), CharEnd);
auto Hint = isAngled
? FixItHint::CreateReplacement(
Range, "<" + TypoCorrectionName.str() + ">")
: FixItHint::CreateReplacement(
Range, "\"" + TypoCorrectionName.str() + "\"");
auto Hint =
isAngled
? FixItHint::CreateReplacement(
FilenameRange, "<" + TypoCorrectionName.str() + ">")
: FixItHint::CreateReplacement(
FilenameRange, "\"" + TypoCorrectionName.str() + "\"");
Diag(FilenameTok, diag::err_pp_file_not_found_typo_not_fatal)
<< OriginalFilename << TypoCorrectionName << Hint;
// We found the file, so set the Filename to the name after typo
@ -2035,9 +1955,8 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
// For other system headers, we don't. They can be controlled separately.
auto DiagId = (FileCharacter == SrcMgr::C_User || warnByDefaultOnWrongCase(Name)) ?
diag::pp_nonportable_path : diag::pp_nonportable_system_path;
SourceRange Range(FilenameTok.getLocation(), CharEnd);
Diag(FilenameTok, DiagId) << Path <<
FixItHint::CreateReplacement(Range, Path);
FixItHint::CreateReplacement(FilenameRange, Path);
}
}
@ -2058,8 +1977,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
if (IncludeTok.getIdentifierInfo()->getPPKeywordID() !=
tok::pp___include_macros)
EnterAnnotationToken(SourceRange(HashLoc, End),
tok::annot_module_include, M);
EnterAnnotationToken(DirectiveRange, tok::annot_module_include, M);
}
return;
}
@ -2072,7 +1990,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
}
// Look up the file, create a File ID for it.
SourceLocation IncludePos = End;
SourceLocation IncludePos = FilenameTok.getLocation();
// If the filename string was the result of macro expansions, set the include
// position on the file where it will be included and after the expansions.
if (IncludePos.isMacroID())
@ -2114,7 +2032,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
// submodule.
// FIXME: There's no point doing this if we're handling a #__include_macros
// directive.
EnterAnnotationToken(SourceRange(HashLoc, End), tok::annot_module_begin, M);
EnterAnnotationToken(DirectiveRange, tok::annot_module_begin, M);
}
}

View File

@ -1153,8 +1153,11 @@ static bool EvaluateHasIncludeCommon(Token &Tok,
return false;
}
// Get '('.
PP.LexNonComment(Tok);
// Get '('. If we don't have a '(', try to form a header-name token.
do {
if (PP.LexHeaderName(Tok))
return false;
} while (Tok.getKind() == tok::comment);
// Ensure we have a '('.
if (Tok.isNot(tok::l_paren)) {
@ -1163,58 +1166,27 @@ static bool EvaluateHasIncludeCommon(Token &Tok,
PP.Diag(LParenLoc, diag::err_pp_expected_after) << II << tok::l_paren;
// If the next token looks like a filename or the start of one,
// assume it is and process it as such.
if (!Tok.is(tok::angle_string_literal) && !Tok.is(tok::string_literal) &&
!Tok.is(tok::less))
if (!Tok.is(tok::angle_string_literal) && !Tok.is(tok::string_literal))
return false;
} else {
// Save '(' location for possible missing ')' message.
LParenLoc = Tok.getLocation();
if (PP.LexHeaderName(Tok))
return false;
}
if (PP.getCurrentLexer()) {
// Get the file name.
PP.getCurrentLexer()->LexIncludeFilename(Tok);
} else {
// We're in a macro, so we can't use LexIncludeFilename; just
// grab the next token.
PP.Lex(Tok);
}
if (!Tok.isOneOf(tok::angle_string_literal, tok::string_literal)) {
PP.Diag(Tok.getLocation(), diag::err_pp_expects_filename);
return false;
}
// Reserve a buffer to get the spelling.
SmallString<128> FilenameBuffer;
StringRef Filename;
SourceLocation EndLoc;
switch (Tok.getKind()) {
case tok::eod:
// If the token kind is EOD, the error has already been diagnosed.
bool Invalid = false;
StringRef Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid);
if (Invalid)
return false;
case tok::angle_string_literal:
case tok::string_literal: {
bool Invalid = false;
Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid);
if (Invalid)
return false;
break;
}
case tok::less:
// This could be a <foo/bar.h> file coming from a macro expansion. In this
// case, glue the tokens together into FilenameBuffer and interpret those.
FilenameBuffer.push_back('<');
if (PP.ConcatenateIncludeName(FilenameBuffer, EndLoc)) {
// Let the caller know a <eod> was found by changing the Token kind.
Tok.setKind(tok::eod);
return false; // Found <eod> but no ">"? Diagnostic already emitted.
}
Filename = FilenameBuffer;
break;
default:
PP.Diag(Tok.getLocation(), diag::err_pp_expects_filename);
return false;
}
SourceLocation FilenameLoc = Tok.getLocation();
// Get ')'.

View File

@ -482,12 +482,15 @@ void Preprocessor::HandlePragmaSystemHeader(Token &SysHeaderTok) {
/// HandlePragmaDependency - Handle \#pragma GCC dependency "foo" blah.
void Preprocessor::HandlePragmaDependency(Token &DependencyTok) {
Token FilenameTok;
CurPPLexer->LexIncludeFilename(FilenameTok);
// If the token kind is EOD, the error has already been diagnosed.
if (FilenameTok.is(tok::eod))
if (LexHeaderName(FilenameTok, /*AllowConcatenation*/false))
return;
// If the next token wasn't a header-name, diagnose the error.
if (!FilenameTok.isOneOf(tok::angle_string_literal, tok::string_literal)) {
Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
return;
}
// Reserve a buffer to get the spelling.
SmallString<128> FilenameBuffer;
bool Invalid = false;
@ -662,24 +665,14 @@ void Preprocessor::HandlePragmaIncludeAlias(Token &Tok) {
// We expect either a quoted string literal, or a bracketed name
Token SourceFilenameTok;
CurPPLexer->LexIncludeFilename(SourceFilenameTok);
if (SourceFilenameTok.is(tok::eod)) {
// The diagnostic has already been handled
if (LexHeaderName(SourceFilenameTok))
return;
}
StringRef SourceFileName;
SmallString<128> FileNameBuffer;
if (SourceFilenameTok.is(tok::string_literal) ||
SourceFilenameTok.is(tok::angle_string_literal)) {
SourceFileName = getSpelling(SourceFilenameTok, FileNameBuffer);
} else if (SourceFilenameTok.is(tok::less)) {
// This could be a path instead of just a name
FileNameBuffer.push_back('<');
SourceLocation End;
if (ConcatenateIncludeName(FileNameBuffer, End))
return; // Diagnostic already emitted
SourceFileName = FileNameBuffer;
} else {
Diag(Tok, diag::warn_pragma_include_alias_expected_filename);
return;
@ -694,23 +687,13 @@ void Preprocessor::HandlePragmaIncludeAlias(Token &Tok) {
}
Token ReplaceFilenameTok;
CurPPLexer->LexIncludeFilename(ReplaceFilenameTok);
if (ReplaceFilenameTok.is(tok::eod)) {
// The diagnostic has already been handled
if (LexHeaderName(ReplaceFilenameTok))
return;
}
StringRef ReplaceFileName;
if (ReplaceFilenameTok.is(tok::string_literal) ||
ReplaceFilenameTok.is(tok::angle_string_literal)) {
ReplaceFileName = getSpelling(ReplaceFilenameTok, FileNameBuffer);
} else if (ReplaceFilenameTok.is(tok::less)) {
// This could be a path instead of just a name
FileNameBuffer.push_back('<');
SourceLocation End;
if (ConcatenateIncludeName(FileNameBuffer, End))
return; // Diagnostic already emitted
ReplaceFileName = FileNameBuffer;
} else {
Diag(Tok, diag::warn_pragma_include_alias_expected_filename);
return;

View File

@ -895,6 +895,80 @@ void Preprocessor::Lex(Token &Result) {
LastTokenWasAt = Result.is(tok::at);
}
/// Lex a header-name token (including one formed from header-name-tokens if
/// \p AllowConcatenation is \c true).
///
/// \param FilenameTok Filled in with the next token. On success, this will
/// be either an angle_header_name or a string_literal token. On
/// failure, it will be whatever other token was found instead.
/// \param AllowConcatenation If \c true, allow a < token, followed by other
/// tokens and finally a > token, to form a single header-name token.
/// \return \c true if we reached EOD or EOF while looking for a > token in
/// a concatenated header name and diagnosed it. \c false otherwise.
bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowConcatenation) {
// Lex using header-name tokenization rules if tokens are being lexed from
// a file. Just grab a token normally if we're in a macro expansion.
if (CurPPLexer)
CurPPLexer->LexIncludeFilename(FilenameTok);
else
Lex(FilenameTok);
// This could be a <foo/bar.h> file coming from a macro expansion. In this
// case, glue the tokens together into an angle_string_literal token.
if (FilenameTok.is(tok::less) && AllowConcatenation) {
SmallString<128> FilenameBuffer;
SourceLocation Start = FilenameTok.getLocation();
SourceLocation End;
FilenameBuffer.push_back('<');
// Consume tokens until we find a '>'.
while (FilenameTok.isNot(tok::greater)) {
Lex(FilenameTok);
if (FilenameTok.isOneOf(tok::eod, tok::eof)) {
Diag(FilenameTok.getLocation(), diag::err_expected) << tok::greater;
Diag(Start, diag::note_matching) << tok::less;
return true;
}
End = FilenameTok.getLocation();
// FIXME: Provide code completion for #includes.
if (FilenameTok.is(tok::code_completion)) {
setCodeCompletionReached();
Lex(FilenameTok);
continue;
}
// Append the spelling of this token to the buffer. If there was a space
// before it, add it now.
if (FilenameTok.hasLeadingSpace())
FilenameBuffer.push_back(' ');
// Get the spelling of the token, directly into FilenameBuffer if
// possible.
size_t PreAppendSize = FilenameBuffer.size();
FilenameBuffer.resize(PreAppendSize + FilenameTok.getLength());
const char *BufPtr = &FilenameBuffer[PreAppendSize];
unsigned ActualLen = getSpelling(FilenameTok, BufPtr);
// If the token was spelled somewhere else, copy it into FilenameBuffer.
if (BufPtr != &FilenameBuffer[PreAppendSize])
memcpy(&FilenameBuffer[PreAppendSize], BufPtr, ActualLen);
// Resize FilenameBuffer to the correct size.
if (FilenameTok.getLength() != ActualLen)
FilenameBuffer.resize(PreAppendSize + ActualLen);
}
FilenameTok.startToken();
FilenameTok.setKind(tok::angle_string_literal);
CreateString(FilenameBuffer, FilenameTok, Start, End);
}
return false;
}
/// Lex a token following the 'import' contextual keyword.
///
void Preprocessor::LexAfterModuleImport(Token &Result) {

View File

@ -30,9 +30,7 @@ PreprocessorLexer::PreprocessorLexer(Preprocessor *pp, FileID fid)
/// After the preprocessor has parsed a \#include, lex and
/// (potentially) macro expand the filename.
void PreprocessorLexer::LexIncludeFilename(Token &FilenameTok) {
assert(ParsingPreprocessorDirective &&
ParsingFilename == false &&
"Must be in a preprocessing directive!");
assert(ParsingFilename == false && "reentered LexIncludeFilename");
// We are now parsing a filename!
ParsingFilename = true;
@ -45,10 +43,6 @@ void PreprocessorLexer::LexIncludeFilename(Token &FilenameTok) {
// We should have obtained the filename now.
ParsingFilename = false;
// No filename?
if (FilenameTok.is(tok::eod))
PP->Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename);
}
/// getFileEntry - Return the FileEntry corresponding to this FileID. Like

View File

@ -4,5 +4,5 @@
// This file intentionally ends without a \n on the last line. Make sure your
// editor doesn't add one.
// expected-error@+1{{expected "FILENAME" or <FILENAME>}}
#include <\
// expected-error@+1{{expected '>'}} expected-note@+1{{to match this '<'}}
#include <\

View File

@ -179,7 +179,7 @@ __has_include
#if __has_include(<stdint.h>
#endif
// expected-error@+1 {{expected "FILENAME" or <FILENAME>}} // expected-error@+1 {{expected value in expression}}
// expected-error@+1 {{expected '>'}} expected-note@+1 {{to match this '<'}} // expected-error@+1 {{expected value in expression}}
#if __has_include(<stdint.h)
#endif