Files
sqlitebrowser/libs/qscintilla/lexers/LexJSON.cpp
Martin Kleusberg 90a15fde1a Revert "libs: Update bundled QScintilla to version 2.13.3"
This reverts commit 6ddd4f019f since it
breaks AppImage builds. The newer version of QScintilla depends on Qt
5.11 which is not available in the Ubuntu 18.04 the AppImages use. We
also cannot just use the version of QScintilla available in the OS
instead of the bundled one for the AppImages as that is too old for our
purposes. It is probably best to just update to Ubuntu 20.04 for the
AppImage builds in April 2023 when 18.04 reaches its end of life.
2023-02-18 16:16:58 +01:00

499 lines
13 KiB
C++

// Scintilla source code edit control
/**
* @file LexJSON.cxx
* @date February 19, 2016
* @brief Lexer for JSON and JSON-LD formats
* @author nkmathew
*
* The License.txt file describes the conditions under which this software may
* be distributed.
*
*/
#include <cstdlib>
#include <cassert>
#include <cctype>
#include <cstdio>
#include <string>
#include <vector>
#include <map>
#include "ILexer.h"
#include "Scintilla.h"
#include "SciLexer.h"
#include "WordList.h"
#include "LexAccessor.h"
#include "StyleContext.h"
#include "CharacterSet.h"
#include "LexerModule.h"
#include "OptionSet.h"
#include "DefaultLexer.h"
using namespace Scintilla;
static const char *const JSONWordListDesc[] = {
"JSON Keywords",
"JSON-LD Keywords",
0
};
/**
* Used to detect compact IRI/URLs in JSON-LD without first looking ahead for the
* colon separating the prefix and suffix
*
* https://www.w3.org/TR/json-ld/#dfn-compact-iri
*/
struct CompactIRI {
int colonCount;
bool foundInvalidChar;
CharacterSet setCompactIRI;
CompactIRI() {
colonCount = 0;
foundInvalidChar = false;
setCompactIRI = CharacterSet(CharacterSet::setAlpha, "$_-");
}
void resetState() {
colonCount = 0;
foundInvalidChar = false;
}
void checkChar(int ch) {
if (ch == ':') {
colonCount++;
} else {
foundInvalidChar |= !setCompactIRI.Contains(ch);
}
}
bool shouldHighlight() const {
return !foundInvalidChar && colonCount == 1;
}
};
/**
* Keeps track of escaped characters in strings as per:
*
* https://tools.ietf.org/html/rfc7159#section-7
*/
struct EscapeSequence {
int digitsLeft;
CharacterSet setHexDigits;
CharacterSet setEscapeChars;
EscapeSequence() {
digitsLeft = 0;
setHexDigits = CharacterSet(CharacterSet::setDigits, "ABCDEFabcdef");
setEscapeChars = CharacterSet(CharacterSet::setNone, "\\\"tnbfru/");
}
// Returns true if the following character is a valid escaped character
bool newSequence(int nextChar) {
digitsLeft = 0;
if (nextChar == 'u') {
digitsLeft = 5;
} else if (!setEscapeChars.Contains(nextChar)) {
return false;
}
return true;
}
bool atEscapeEnd() const {
return digitsLeft <= 0;
}
bool isInvalidChar(int currChar) const {
return !setHexDigits.Contains(currChar);
}
};
struct OptionsJSON {
bool foldCompact;
bool fold;
bool allowComments;
bool escapeSequence;
OptionsJSON() {
foldCompact = false;
fold = false;
allowComments = false;
escapeSequence = false;
}
};
struct OptionSetJSON : public OptionSet<OptionsJSON> {
OptionSetJSON() {
DefineProperty("lexer.json.escape.sequence", &OptionsJSON::escapeSequence,
"Set to 1 to enable highlighting of escape sequences in strings");
DefineProperty("lexer.json.allow.comments", &OptionsJSON::allowComments,
"Set to 1 to enable highlighting of line/block comments in JSON");
DefineProperty("fold.compact", &OptionsJSON::foldCompact);
DefineProperty("fold", &OptionsJSON::fold);
DefineWordListSets(JSONWordListDesc);
}
};
class LexerJSON : public DefaultLexer {
OptionsJSON options;
OptionSetJSON optSetJSON;
EscapeSequence escapeSeq;
WordList keywordsJSON;
WordList keywordsJSONLD;
CharacterSet setOperators;
CharacterSet setURL;
CharacterSet setKeywordJSONLD;
CharacterSet setKeywordJSON;
CompactIRI compactIRI;
static bool IsNextNonWhitespace(LexAccessor &styler, Sci_Position start, char ch) {
Sci_Position i = 0;
while (i < 50) {
i++;
char curr = styler.SafeGetCharAt(start+i, '\0');
char next = styler.SafeGetCharAt(start+i+1, '\0');
bool atEOL = (curr == '\r' && next != '\n') || (curr == '\n');
if (curr == ch) {
return true;
} else if (!isspacechar(curr) || atEOL) {
return false;
}
}
return false;
}
/**
* Looks for the colon following the end quote
*
* Assumes property names of lengths no longer than a 100 characters.
* The colon is also expected to be less than 50 spaces after the end
* quote for the string to be considered a property name
*/
static bool AtPropertyName(LexAccessor &styler, Sci_Position start) {
Sci_Position i = 0;
bool escaped = false;
while (i < 100) {
i++;
char curr = styler.SafeGetCharAt(start+i, '\0');
if (escaped) {
escaped = false;
continue;
}
escaped = curr == '\\';
if (curr == '"') {
return IsNextNonWhitespace(styler, start+i, ':');
} else if (!curr) {
return false;
}
}
return false;
}
static bool IsNextWordInList(WordList &keywordList, CharacterSet wordSet,
StyleContext &context, LexAccessor &styler) {
char word[51];
Sci_Position currPos = (Sci_Position) context.currentPos;
int i = 0;
while (i < 50) {
char ch = styler.SafeGetCharAt(currPos + i);
if (!wordSet.Contains(ch)) {
break;
}
word[i] = ch;
i++;
}
word[i] = '\0';
return keywordList.InList(word);
}
public:
LexerJSON() :
setOperators(CharacterSet::setNone, "[{}]:,"),
setURL(CharacterSet::setAlphaNum, "-._~:/?#[]@!$&'()*+,),="),
setKeywordJSONLD(CharacterSet::setAlpha, ":@"),
setKeywordJSON(CharacterSet::setAlpha, "$_") {
}
virtual ~LexerJSON() {}
int SCI_METHOD Version() const override {
return lvOriginal;
}
void SCI_METHOD Release() override {
delete this;
}
const char *SCI_METHOD PropertyNames() override {
return optSetJSON.PropertyNames();
}
int SCI_METHOD PropertyType(const char *name) override {
return optSetJSON.PropertyType(name);
}
const char *SCI_METHOD DescribeProperty(const char *name) override {
return optSetJSON.DescribeProperty(name);
}
Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override {
if (optSetJSON.PropertySet(&options, key, val)) {
return 0;
}
return -1;
}
Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override {
WordList *wordListN = 0;
switch (n) {
case 0:
wordListN = &keywordsJSON;
break;
case 1:
wordListN = &keywordsJSONLD;
break;
}
Sci_Position firstModification = -1;
if (wordListN) {
WordList wlNew;
wlNew.Set(wl);
if (*wordListN != wlNew) {
wordListN->Set(wl);
firstModification = 0;
}
}
return firstModification;
}
void *SCI_METHOD PrivateCall(int, void *) override {
return 0;
}
static ILexer *LexerFactoryJSON() {
return new LexerJSON;
}
const char *SCI_METHOD DescribeWordListSets() override {
return optSetJSON.DescribeWordListSets();
}
void SCI_METHOD Lex(Sci_PositionU startPos,
Sci_Position length,
int initStyle,
IDocument *pAccess) override;
void SCI_METHOD Fold(Sci_PositionU startPos,
Sci_Position length,
int initStyle,
IDocument *pAccess) override;
};
void SCI_METHOD LexerJSON::Lex(Sci_PositionU startPos,
Sci_Position length,
int initStyle,
IDocument *pAccess) {
LexAccessor styler(pAccess);
StyleContext context(startPos, length, initStyle, styler);
int stringStyleBefore = SCE_JSON_STRING;
while (context.More()) {
switch (context.state) {
case SCE_JSON_BLOCKCOMMENT:
if (context.Match("*/")) {
context.Forward();
context.ForwardSetState(SCE_JSON_DEFAULT);
}
break;
case SCE_JSON_LINECOMMENT:
if (context.atLineEnd) {
context.SetState(SCE_JSON_DEFAULT);
}
break;
case SCE_JSON_STRINGEOL:
if (context.atLineStart) {
context.SetState(SCE_JSON_DEFAULT);
}
break;
case SCE_JSON_ESCAPESEQUENCE:
escapeSeq.digitsLeft--;
if (!escapeSeq.atEscapeEnd()) {
if (escapeSeq.isInvalidChar(context.ch)) {
context.SetState(SCE_JSON_ERROR);
}
break;
}
if (context.ch == '"') {
context.SetState(stringStyleBefore);
context.ForwardSetState(SCE_C_DEFAULT);
} else if (context.ch == '\\') {
if (!escapeSeq.newSequence(context.chNext)) {
context.SetState(SCE_JSON_ERROR);
}
context.Forward();
} else {
context.SetState(stringStyleBefore);
if (context.atLineEnd) {
context.ChangeState(SCE_JSON_STRINGEOL);
}
}
break;
case SCE_JSON_PROPERTYNAME:
case SCE_JSON_STRING:
if (context.ch == '"') {
if (compactIRI.shouldHighlight()) {
context.ChangeState(SCE_JSON_COMPACTIRI);
context.ForwardSetState(SCE_JSON_DEFAULT);
compactIRI.resetState();
} else {
context.ForwardSetState(SCE_JSON_DEFAULT);
}
} else if (context.atLineEnd) {
context.ChangeState(SCE_JSON_STRINGEOL);
} else if (context.ch == '\\') {
stringStyleBefore = context.state;
if (options.escapeSequence) {
context.SetState(SCE_JSON_ESCAPESEQUENCE);
if (!escapeSeq.newSequence(context.chNext)) {
context.SetState(SCE_JSON_ERROR);
}
}
context.Forward();
} else if (context.Match("https://") ||
context.Match("http://") ||
context.Match("ssh://") ||
context.Match("git://") ||
context.Match("svn://") ||
context.Match("ftp://") ||
context.Match("mailto:")) {
// Handle most common URI schemes only
stringStyleBefore = context.state;
context.SetState(SCE_JSON_URI);
} else if (context.ch == '@') {
// https://www.w3.org/TR/json-ld/#dfn-keyword
if (IsNextWordInList(keywordsJSONLD, setKeywordJSONLD, context, styler)) {
stringStyleBefore = context.state;
context.SetState(SCE_JSON_LDKEYWORD);
}
} else {
compactIRI.checkChar(context.ch);
}
break;
case SCE_JSON_LDKEYWORD:
case SCE_JSON_URI:
if ((!setKeywordJSONLD.Contains(context.ch) &&
(context.state == SCE_JSON_LDKEYWORD)) ||
(!setURL.Contains(context.ch))) {
context.SetState(stringStyleBefore);
}
if (context.ch == '"') {
context.ForwardSetState(SCE_JSON_DEFAULT);
} else if (context.atLineEnd) {
context.ChangeState(SCE_JSON_STRINGEOL);
}
break;
case SCE_JSON_OPERATOR:
case SCE_JSON_NUMBER:
context.SetState(SCE_JSON_DEFAULT);
break;
case SCE_JSON_ERROR:
if (context.atLineEnd) {
context.SetState(SCE_JSON_DEFAULT);
}
break;
case SCE_JSON_KEYWORD:
if (!setKeywordJSON.Contains(context.ch)) {
context.SetState(SCE_JSON_DEFAULT);
}
break;
}
if (context.state == SCE_JSON_DEFAULT) {
if (context.ch == '"') {
compactIRI.resetState();
context.SetState(SCE_JSON_STRING);
Sci_Position currPos = static_cast<Sci_Position>(context.currentPos);
if (AtPropertyName(styler, currPos)) {
context.SetState(SCE_JSON_PROPERTYNAME);
}
} else if (setOperators.Contains(context.ch)) {
context.SetState(SCE_JSON_OPERATOR);
} else if (options.allowComments && context.Match("/*")) {
context.SetState(SCE_JSON_BLOCKCOMMENT);
context.Forward();
} else if (options.allowComments && context.Match("//")) {
context.SetState(SCE_JSON_LINECOMMENT);
} else if (setKeywordJSON.Contains(context.ch)) {
if (IsNextWordInList(keywordsJSON, setKeywordJSON, context, styler)) {
context.SetState(SCE_JSON_KEYWORD);
}
}
bool numberStart =
IsADigit(context.ch) && (context.chPrev == '+'||
context.chPrev == '-' ||
context.atLineStart ||
IsASpace(context.chPrev) ||
setOperators.Contains(context.chPrev));
bool exponentPart =
tolower(context.ch) == 'e' &&
IsADigit(context.chPrev) &&
(IsADigit(context.chNext) ||
context.chNext == '+' ||
context.chNext == '-');
bool signPart =
(context.ch == '-' || context.ch == '+') &&
((tolower(context.chPrev) == 'e' && IsADigit(context.chNext)) ||
((IsASpace(context.chPrev) || setOperators.Contains(context.chPrev))
&& IsADigit(context.chNext)));
bool adjacentDigit =
IsADigit(context.ch) && IsADigit(context.chPrev);
bool afterExponent = IsADigit(context.ch) && tolower(context.chPrev) == 'e';
bool dotPart = context.ch == '.' &&
IsADigit(context.chPrev) &&
IsADigit(context.chNext);
bool afterDot = IsADigit(context.ch) && context.chPrev == '.';
if (numberStart ||
exponentPart ||
signPart ||
adjacentDigit ||
dotPart ||
afterExponent ||
afterDot) {
context.SetState(SCE_JSON_NUMBER);
} else if (context.state == SCE_JSON_DEFAULT && !IsASpace(context.ch)) {
context.SetState(SCE_JSON_ERROR);
}
}
context.Forward();
}
context.Complete();
}
void SCI_METHOD LexerJSON::Fold(Sci_PositionU startPos,
Sci_Position length,
int,
IDocument *pAccess) {
if (!options.fold) {
return;
}
LexAccessor styler(pAccess);
Sci_PositionU currLine = styler.GetLine(startPos);
Sci_PositionU endPos = startPos + length;
int currLevel = SC_FOLDLEVELBASE;
if (currLine > 0)
currLevel = styler.LevelAt(currLine - 1) >> 16;
int nextLevel = currLevel;
int visibleChars = 0;
for (Sci_PositionU i = startPos; i < endPos; i++) {
char curr = styler.SafeGetCharAt(i);
char next = styler.SafeGetCharAt(i+1);
bool atEOL = (curr == '\r' && next != '\n') || (curr == '\n');
if (styler.StyleAt(i) == SCE_JSON_OPERATOR) {
if (curr == '{' || curr == '[') {
nextLevel++;
} else if (curr == '}' || curr == ']') {
nextLevel--;
}
}
if (atEOL || i == (endPos-1)) {
int level = currLevel | nextLevel << 16;
if (!visibleChars && options.foldCompact) {
level |= SC_FOLDLEVELWHITEFLAG;
} else if (nextLevel > currLevel) {
level |= SC_FOLDLEVELHEADERFLAG;
}
if (level != styler.LevelAt(currLine)) {
styler.SetLevel(currLine, level);
}
currLine++;
currLevel = nextLevel;
visibleChars = 0;
}
if (!isspacechar(curr)) {
visibleChars++;
}
}
}
LexerModule lmJSON(SCLEX_JSON,
LexerJSON::LexerFactoryJSON,
"json",
JSONWordListDesc);