cmListFileLexer: Allow a leading UTF-8 Byte-Order-Mark (#11137)

Teach the lexer to read a UTF-8, UTF-16 BE/LE, or UTF-32 BE/LE
Byte-Order-Mark from the start of a file if any is present.  Report an
error on files using UTF-16 or UTF-32 and accept a UTF-8 or missing BOM.
This commit is contained in:
Brad King
2013-10-14 15:13:11 -04:00
parent 56457837e2
commit dbd933365e
19 changed files with 168 additions and 10 deletions

View File

@@ -57,13 +57,26 @@ cmListFileParser::~cmListFileParser()
bool cmListFileParser::ParseFile()
{
// Open the file.
if(!cmListFileLexer_SetFileName(this->Lexer, this->FileName))
cmListFileLexer_BOM bom;
if(!cmListFileLexer_SetFileName(this->Lexer, this->FileName, &bom))
{
cmSystemTools::Error("cmListFileCache: error can not open file ",
this->FileName);
return false;
}
// Verify the Byte-Order-Mark, if any.
if(bom != cmListFileLexer_BOM_None &&
bom != cmListFileLexer_BOM_UTF8)
{
cmListFileLexer_SetFileName(this->Lexer, 0, 0);
cmOStringStream m;
m << "File\n " << this->FileName << "\n"
<< "starts with a Byte-Order-Mark that is not UTF-8.";
this->Makefile->IssueMessage(cmake::FATAL_ERROR, m.str());
return false;
}
// Use a simple recursive-descent parser to process the token
// stream.
bool haveNewline = true;