mirror of
https://github.com/Kitware/CMake.git
synced 2026-01-06 05:40:54 -06:00
cmListFileLexer: Do not require files to be seekable
Read the BOM sequentially and store the read bytes for later use if these do not correspond to a BOM. This allows FIFO to be used as input, e.g., for piping input or Bash process substitution.
This commit is contained in:
@@ -773,6 +773,9 @@ struct cmListFileLexer_s
|
||||
size_t size;
|
||||
FILE* file;
|
||||
size_t cr;
|
||||
char read_buffer[4];
|
||||
size_t read_size;
|
||||
size_t read_position;
|
||||
char* string_buffer;
|
||||
char* string_position;
|
||||
size_t string_left;
|
||||
@@ -2626,9 +2629,26 @@ static int cmListFileLexerInput(cmListFileLexer* lexer, char* buffer,
|
||||
does not convert newlines on all platforms. Move any
|
||||
trailing CR to the start of the buffer for the next read. */
|
||||
size_t cr = lexer->cr;
|
||||
size_t n;
|
||||
size_t n = 0;
|
||||
buffer[0] = '\r';
|
||||
n = fread(buffer + cr, 1, bufferSize - cr, lexer->file);
|
||||
|
||||
size_t actualBufferSize = bufferSize - cr;
|
||||
char* p = buffer + cr;
|
||||
size_t readLeft = lexer->read_size - lexer->read_position;
|
||||
|
||||
/* Absorb the bytes that were read during BOM detection, if any. */
|
||||
if (readLeft > 0) {
|
||||
size_t actualReadSize =
|
||||
actualBufferSize >= readLeft ? readLeft : actualBufferSize;
|
||||
memcpy(p, lexer->read_buffer + lexer->read_position, actualReadSize);
|
||||
lexer->read_position += actualReadSize;
|
||||
p += actualReadSize;
|
||||
n += actualReadSize;
|
||||
actualBufferSize -= actualReadSize;
|
||||
}
|
||||
|
||||
n += fread(p, 1, actualBufferSize, lexer->file);
|
||||
|
||||
if (n) {
|
||||
char* o = buffer;
|
||||
const char* i = buffer;
|
||||
@@ -2682,6 +2702,11 @@ static void cmListFileLexerDestroy(cmListFileLexer* lexer)
|
||||
fclose(lexer->file);
|
||||
lexer->file = 0;
|
||||
}
|
||||
if (lexer->read_size != 0) {
|
||||
memset(lexer->read_buffer, 0, sizeof(lexer->read_buffer));
|
||||
lexer->read_size = 0;
|
||||
lexer->read_position = 0;
|
||||
}
|
||||
if (lexer->string_buffer) {
|
||||
free(lexer->string_buffer);
|
||||
lexer->string_buffer = 0;
|
||||
@@ -2712,45 +2737,66 @@ void cmListFileLexer_Delete(cmListFileLexer* lexer)
|
||||
}
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f)
|
||||
static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f,
|
||||
unsigned char readBuffer[4],
|
||||
size_t* readSize)
|
||||
{
|
||||
unsigned char b[2];
|
||||
size_t n;
|
||||
if (fread(b, 1, 2, f) == 2) {
|
||||
/* Read the up to four bytes that might correspond to a BOM. In case these
|
||||
bytes turn out not to represent a BOM, save them for later consumption in
|
||||
order to avoid seeking the file (which might not be seekable, e.g., if
|
||||
it's a pipe). */
|
||||
unsigned char* b = readBuffer;
|
||||
|
||||
size_t n = fread(b, 1, 2, f);
|
||||
*readSize = n; /* Initialize first and then accumulate */
|
||||
|
||||
if (n == 2) {
|
||||
if (b[0] == 0xEF && b[1] == 0xBB) {
|
||||
if (fread(b, 1, 1, f) == 1 && b[0] == 0xBF) {
|
||||
return cmListFileLexer_BOM_UTF8;
|
||||
n = fread(b + 2, 1, 1, f);
|
||||
*readSize += n;
|
||||
|
||||
if (n == 1) {
|
||||
if (b[2] == 0xBF) {
|
||||
*readSize = 0; /* We consumed the BOM: discard it */
|
||||
return cmListFileLexer_BOM_UTF8;
|
||||
}
|
||||
}
|
||||
} else if (b[0] == 0xFE && b[1] == 0xFF) {
|
||||
*readSize = 0; /* We consumed the BOM: discard it */
|
||||
/* UTF-16 BE */
|
||||
return cmListFileLexer_BOM_UTF16BE;
|
||||
} else if (b[0] == 0 && b[1] == 0) {
|
||||
if (fread(b, 1, 2, f) == 2 && b[0] == 0xFE && b[1] == 0xFF) {
|
||||
return cmListFileLexer_BOM_UTF32BE;
|
||||
n = fread(b + 2, 1, 2, f);
|
||||
*readSize += n;
|
||||
|
||||
if (n == 2) {
|
||||
if (b[2] == 0xFE && b[3] == 0xFF) {
|
||||
*readSize = 0; /* We consumed the BOM: discard it */
|
||||
return cmListFileLexer_BOM_UTF32BE;
|
||||
}
|
||||
}
|
||||
} else if (b[0] == 0xFF && b[1] == 0xFE) {
|
||||
fpos_t p;
|
||||
fgetpos(f, &p);
|
||||
n = fread(b, 1, 2, f);
|
||||
if (n == 2 && b[0] == 0 && b[1] == 0) {
|
||||
n = fread(b + 2, 1, 2, f);
|
||||
*readSize += n;
|
||||
|
||||
if (n == 2 && b[2] == 0 && b[3] == 0) {
|
||||
*readSize = 0; /* We consumed the BOM: discard it */
|
||||
return cmListFileLexer_BOM_UTF32LE;
|
||||
}
|
||||
if (fsetpos(f, &p) != 0) {
|
||||
return cmListFileLexer_BOM_Broken;
|
||||
}
|
||||
|
||||
/* In case we were able to subsequently read only a single byte out of two
|
||||
(i.e., three in total), the file must be corrupt and the BOM cannot
|
||||
represent a UTF-16-LE BOM since each code unit must consist of two
|
||||
bytes. This avoids incorrectly detecting an incomplete UTF-32-LE BOM as
|
||||
UTF-16-LE input. */
|
||||
if (n % 2 == 0) {
|
||||
*readSize = n; /* We consumed the read bytes as BOM only partially */
|
||||
memmove(b, b + 2, n);
|
||||
return cmListFileLexer_BOM_UTF16LE;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (fseek(f, 0, SEEK_SET) != 0) {
|
||||
return cmListFileLexer_BOM_Broken;
|
||||
}
|
||||
|
||||
return cmListFileLexer_BOM_None;
|
||||
}
|
||||
|
||||
@@ -2770,7 +2816,13 @@ int cmListFileLexer_SetFileName(cmListFileLexer* lexer, const char* name,
|
||||
#endif
|
||||
if (lexer->file) {
|
||||
if (bom) {
|
||||
*bom = cmListFileLexer_ReadBOM(lexer->file);
|
||||
*bom = cmListFileLexer_ReadBOM(
|
||||
lexer->file, (unsigned char*)lexer->read_buffer, &lexer->read_size);
|
||||
lexer->read_position = 0;
|
||||
} else {
|
||||
memset(lexer->read_buffer, 0, sizeof(lexer->read_buffer));
|
||||
lexer->read_size = 0;
|
||||
lexer->read_position = 0;
|
||||
}
|
||||
} else {
|
||||
result = 0;
|
||||
@@ -2789,10 +2841,15 @@ int cmListFileLexer_SetString(cmListFileLexer* lexer, char const* text,
|
||||
/* text might be not NULL while length is 0. However, on some platforms
|
||||
malloc(0) will return NULL. To avoid signaling an error to the caller in
|
||||
such cases, ensure nonzero length. */
|
||||
if (length > 0) {
|
||||
lexer->string_buffer = (char*)malloc(length);
|
||||
size_t read_size = lexer->read_size - lexer->read_position;
|
||||
size_t string_size = read_size + length;
|
||||
if (string_size > 0) {
|
||||
lexer->string_buffer = (char*)malloc(string_size);
|
||||
if (lexer->string_buffer) {
|
||||
memcpy(lexer->string_buffer, text, length);
|
||||
memcpy(lexer->string_buffer, lexer->read_buffer + lexer->read_position,
|
||||
read_size);
|
||||
memcpy(lexer->string_buffer + read_size, text, length);
|
||||
lexer->read_position += read_size;
|
||||
lexer->string_position = lexer->string_buffer;
|
||||
lexer->string_left = length;
|
||||
} else {
|
||||
|
||||
@@ -39,6 +39,9 @@ struct cmListFileLexer_s
|
||||
size_t size;
|
||||
FILE* file;
|
||||
size_t cr;
|
||||
char read_buffer[4];
|
||||
size_t read_size;
|
||||
size_t read_position;
|
||||
char* string_buffer;
|
||||
char* string_position;
|
||||
size_t string_left;
|
||||
@@ -353,9 +356,26 @@ static int cmListFileLexerInput(cmListFileLexer* lexer, char* buffer,
|
||||
does not convert newlines on all platforms. Move any
|
||||
trailing CR to the start of the buffer for the next read. */
|
||||
size_t cr = lexer->cr;
|
||||
size_t n;
|
||||
size_t n = 0;
|
||||
buffer[0] = '\r';
|
||||
n = fread(buffer + cr, 1, bufferSize - cr, lexer->file);
|
||||
|
||||
size_t actualBufferSize = bufferSize - cr;
|
||||
char* p = buffer + cr;
|
||||
size_t readLeft = lexer->read_size - lexer->read_position;
|
||||
|
||||
/* Absorb the bytes that were read during BOM detection, if any. */
|
||||
if (readLeft > 0) {
|
||||
size_t actualReadSize =
|
||||
actualBufferSize >= readLeft ? readLeft : actualBufferSize;
|
||||
memcpy(p, lexer->read_buffer + lexer->read_position, actualReadSize);
|
||||
lexer->read_position += actualReadSize;
|
||||
p += actualReadSize;
|
||||
n += actualReadSize;
|
||||
actualBufferSize -= actualReadSize;
|
||||
}
|
||||
|
||||
n += fread(p, 1, actualBufferSize, lexer->file);
|
||||
|
||||
if (n) {
|
||||
char* o = buffer;
|
||||
const char* i = buffer;
|
||||
@@ -409,6 +429,11 @@ static void cmListFileLexerDestroy(cmListFileLexer* lexer)
|
||||
fclose(lexer->file);
|
||||
lexer->file = 0;
|
||||
}
|
||||
if (lexer->read_size != 0) {
|
||||
memset(lexer->read_buffer, 0, sizeof(lexer->read_buffer));
|
||||
lexer->read_size = 0;
|
||||
lexer->read_position = 0;
|
||||
}
|
||||
if (lexer->string_buffer) {
|
||||
free(lexer->string_buffer);
|
||||
lexer->string_buffer = 0;
|
||||
@@ -439,45 +464,66 @@ void cmListFileLexer_Delete(cmListFileLexer* lexer)
|
||||
}
|
||||
|
||||
/*--------------------------------------------------------------------------*/
|
||||
static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f)
|
||||
static cmListFileLexer_BOM cmListFileLexer_ReadBOM(FILE* f,
|
||||
unsigned char readBuffer[4],
|
||||
size_t* readSize)
|
||||
{
|
||||
unsigned char b[2];
|
||||
size_t n;
|
||||
if (fread(b, 1, 2, f) == 2) {
|
||||
/* Read the up to four bytes that might correspond to a BOM. In case these
|
||||
bytes turn out not to represent a BOM, save them for later consumption in
|
||||
order to avoid seeking the file (which might not be seekable, e.g., if
|
||||
it's a pipe). */
|
||||
unsigned char* b = readBuffer;
|
||||
|
||||
size_t n = fread(b, 1, 2, f);
|
||||
*readSize = n; /* Initialize first and then accumulate */
|
||||
|
||||
if (n == 2) {
|
||||
if (b[0] == 0xEF && b[1] == 0xBB) {
|
||||
if (fread(b, 1, 1, f) == 1 && b[0] == 0xBF) {
|
||||
return cmListFileLexer_BOM_UTF8;
|
||||
n = fread(b + 2, 1, 1, f);
|
||||
*readSize += n;
|
||||
|
||||
if (n == 1) {
|
||||
if (b[2] == 0xBF) {
|
||||
*readSize = 0; /* We consumed the BOM: discard it */
|
||||
return cmListFileLexer_BOM_UTF8;
|
||||
}
|
||||
}
|
||||
} else if (b[0] == 0xFE && b[1] == 0xFF) {
|
||||
*readSize = 0; /* We consumed the BOM: discard it */
|
||||
/* UTF-16 BE */
|
||||
return cmListFileLexer_BOM_UTF16BE;
|
||||
} else if (b[0] == 0 && b[1] == 0) {
|
||||
if (fread(b, 1, 2, f) == 2 && b[0] == 0xFE && b[1] == 0xFF) {
|
||||
return cmListFileLexer_BOM_UTF32BE;
|
||||
n = fread(b + 2, 1, 2, f);
|
||||
*readSize += n;
|
||||
|
||||
if (n == 2) {
|
||||
if (b[2] == 0xFE && b[3] == 0xFF) {
|
||||
*readSize = 0; /* We consumed the BOM: discard it */
|
||||
return cmListFileLexer_BOM_UTF32BE;
|
||||
}
|
||||
}
|
||||
} else if (b[0] == 0xFF && b[1] == 0xFE) {
|
||||
fpos_t p;
|
||||
fgetpos(f, &p);
|
||||
n = fread(b, 1, 2, f);
|
||||
if (n == 2 && b[0] == 0 && b[1] == 0) {
|
||||
n = fread(b + 2, 1, 2, f);
|
||||
*readSize += n;
|
||||
|
||||
if (n == 2 && b[2] == 0 && b[3] == 0) {
|
||||
*readSize = 0; /* We consumed the BOM: discard it */
|
||||
return cmListFileLexer_BOM_UTF32LE;
|
||||
}
|
||||
if (fsetpos(f, &p) != 0) {
|
||||
return cmListFileLexer_BOM_Broken;
|
||||
}
|
||||
|
||||
/* In case we were able to subsequently read only a single byte out of two
|
||||
(i.e., three in total), the file must be corrupt and the BOM cannot
|
||||
represent a UTF-16-LE BOM since each code unit must consist of two
|
||||
bytes. This avoids incorrectly detecting an incomplete UTF-32-LE BOM as
|
||||
UTF-16-LE input. */
|
||||
if (n % 2 == 0) {
|
||||
*readSize = n; /* We consumed the read bytes as BOM only partially */
|
||||
memmove(b, b + 2, n);
|
||||
return cmListFileLexer_BOM_UTF16LE;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (fseek(f, 0, SEEK_SET) != 0) {
|
||||
return cmListFileLexer_BOM_Broken;
|
||||
}
|
||||
|
||||
return cmListFileLexer_BOM_None;
|
||||
}
|
||||
|
||||
@@ -497,7 +543,13 @@ int cmListFileLexer_SetFileName(cmListFileLexer* lexer, const char* name,
|
||||
#endif
|
||||
if (lexer->file) {
|
||||
if (bom) {
|
||||
*bom = cmListFileLexer_ReadBOM(lexer->file);
|
||||
*bom = cmListFileLexer_ReadBOM(
|
||||
lexer->file, (unsigned char*)lexer->read_buffer, &lexer->read_size);
|
||||
lexer->read_position = 0;
|
||||
} else {
|
||||
memset(lexer->read_buffer, 0, sizeof(lexer->read_buffer));
|
||||
lexer->read_size = 0;
|
||||
lexer->read_position = 0;
|
||||
}
|
||||
} else {
|
||||
result = 0;
|
||||
@@ -516,10 +568,15 @@ int cmListFileLexer_SetString(cmListFileLexer* lexer, char const* text,
|
||||
/* text might be not NULL while length is 0. However, on some platforms
|
||||
malloc(0) will return NULL. To avoid signaling an error to the caller in
|
||||
such cases, ensure nonzero length. */
|
||||
if (length > 0) {
|
||||
lexer->string_buffer = (char*)malloc(length);
|
||||
size_t read_size = lexer->read_size - lexer->read_position;
|
||||
size_t string_size = read_size + length;
|
||||
if (string_size > 0) {
|
||||
lexer->string_buffer = (char*)malloc(string_size);
|
||||
if (lexer->string_buffer) {
|
||||
memcpy(lexer->string_buffer, text, length);
|
||||
memcpy(lexer->string_buffer, lexer->read_buffer + lexer->read_position,
|
||||
read_size);
|
||||
memcpy(lexer->string_buffer + read_size, text, length);
|
||||
lexer->read_position += read_size;
|
||||
lexer->string_position = lexer->string_buffer;
|
||||
lexer->string_left = length;
|
||||
} else {
|
||||
|
||||
@@ -126,13 +126,6 @@ bool cmListFileParser::ParseFile(char const* filename)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (bom == cmListFileLexer_BOM_Broken) {
|
||||
cmListFileLexer_SetFileName(this->Lexer.get(), nullptr, nullptr);
|
||||
this->IssueFileOpenError("Error while reading Byte-Order-Mark. "
|
||||
"File not seekable?");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Verify the Byte-Order-Mark, if any.
|
||||
if (bom != cmListFileLexer_BOM_None && bom != cmListFileLexer_BOM_UTF8) {
|
||||
cmListFileLexer_SetFileName(this->Lexer.get(), nullptr, nullptr);
|
||||
|
||||
@@ -40,7 +40,6 @@ struct cmListFileLexer_Token_s
|
||||
enum cmListFileLexer_BOM_e
|
||||
{
|
||||
cmListFileLexer_BOM_None,
|
||||
cmListFileLexer_BOM_Broken,
|
||||
cmListFileLexer_BOM_UTF8,
|
||||
cmListFileLexer_BOM_UTF16BE,
|
||||
cmListFileLexer_BOM_UTF16LE,
|
||||
|
||||
@@ -1100,15 +1100,19 @@ set(CMAKE_RELATIVE_PATH_TOP_BINARY \"${RunCMake_TEST_BINARY_DIR}\")
|
||||
endfunction()
|
||||
run_cmake_depends()
|
||||
|
||||
function(reject_fifo)
|
||||
function(accept_fifo)
|
||||
find_program(BASH_EXECUTABLE bash)
|
||||
if(BASH_EXECUTABLE)
|
||||
set(BASH_COMMAND_ARGUMENT "'${CMAKE_COMMAND}' -P <(echo 'return()')")
|
||||
run_cmake_command(reject_fifo ${BASH_EXECUTABLE} -c ${BASH_COMMAND_ARGUMENT})
|
||||
run_cmake_command(accept_fifo ${BASH_EXECUTABLE} -c ${BASH_COMMAND_ARGUMENT})
|
||||
|
||||
set(source_dir ${RunCMake_SOURCE_DIR}/Toolchain)
|
||||
run_cmake_command(fifo_empty_initial_cache_process_substitution ${BASH_EXECUTABLE}
|
||||
-c "\"${CMAKE_COMMAND}\" -C <(echo) -S \"${source_dir}\" -B \"${RunCMake_BINARY_DIR}/fifo-empty-initial-cache\"")
|
||||
endif()
|
||||
endfunction()
|
||||
if(CMAKE_HOST_UNIX AND NOT CMAKE_SYSTEM_NAME STREQUAL "CYGWIN" AND NOT CMAKE_SYSTEM_NAME STREQUAL "MSYS")
|
||||
reject_fifo()
|
||||
accept_fifo()
|
||||
run_cmake_command(closed_stdin sh -c "\"${CMAKE_COMMAND}\" --version <&-")
|
||||
run_cmake_command(closed_stdout sh -c "\"${CMAKE_COMMAND}\" --version >&-")
|
||||
run_cmake_command(closed_stderr sh -c "\"${CMAKE_COMMAND}\" --version 2>&-")
|
||||
|
||||
1
Tests/RunCMake/CommandLine/accept_fifo-result.txt
Normal file
1
Tests/RunCMake/CommandLine/accept_fifo-result.txt
Normal file
@@ -0,0 +1 @@
|
||||
0
|
||||
1
Tests/RunCMake/CommandLine/accept_fifo-stderr.txt
Normal file
1
Tests/RunCMake/CommandLine/accept_fifo-stderr.txt
Normal file
@@ -0,0 +1 @@
|
||||
^$
|
||||
@@ -1,2 +0,0 @@
|
||||
CMake Error in .*
|
||||
Error while reading Byte-Order-Mark\. File not seekable\?
|
||||
1
Tests/RunCMake/Syntax/.gitattributes
vendored
1
Tests/RunCMake/Syntax/.gitattributes
vendored
@@ -1,3 +1,4 @@
|
||||
CommandTabs.cmake whitespace=-tab-in-indent
|
||||
StringCRLF.cmake eol=crlf
|
||||
BracketCRLF.cmake eol=crlf
|
||||
OneCharacter.cmake binary
|
||||
|
||||
4
Tests/RunCMake/Syntax/OneCharacter-stderr.txt
Normal file
4
Tests/RunCMake/Syntax/OneCharacter-stderr.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
CMake Error at OneCharacter.cmake:1:
|
||||
Unexpected end of file.
|
||||
|
||||
Parse error. Function missing opening "\(".
|
||||
1
Tests/RunCMake/Syntax/OneCharacter.cmake
Normal file
1
Tests/RunCMake/Syntax/OneCharacter.cmake
Normal file
@@ -0,0 +1 @@
|
||||
a
|
||||
@@ -7,6 +7,7 @@ run_cmake(BOM-UTF-32-LE)
|
||||
run_cmake(BOM-UTF-32-BE)
|
||||
run_cmake(Broken-BOM-UTF-32-LE)
|
||||
run_cmake(Broken-BOM-UTF-32-BE)
|
||||
run_cmake(OneCharacter)
|
||||
run_cmake(CommandSpaces)
|
||||
run_cmake(CommandTabs)
|
||||
run_cmake(CommandNewlines)
|
||||
|
||||
Reference in New Issue
Block a user