Fix a parser bug where tokens are misidentified as commas. (#1502)

* Fix a parser bug where tokens are misidentified as commas.

In the old and new readers, when parsing an object, a comment
followed by any non-`}` token is treated as a comma.

The new unit test required changing the runjsontests.py
flag regime so that failure tests could be run with default settings.

* Honor allowComments==false mode.

Much of the comment handling in the parsers is bespoke, and does not
honor this flag.  By unfiying it under a common API, the parser is
simplified and strict mode is now more correctly strict.

Note that allowComments mode does not allow for comments in
arbitrary locations; they are allowed only in certain positions.
Rectifying this is a bigger effort, since collectComments mode requires
storing the comments somewhere, and it's not immediately clear
where in the DOM all such comments should live.

---------

Co-authored-by: Jordan Bayles <bayles.jordan@gmail.com>
This commit is contained in:
vslashg 2024-09-09 20:30:16 -04:00 committed by GitHub
parent c3a986600f
commit 0a9b9d9c6e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 49 additions and 55 deletions

View File

@ -190,6 +190,7 @@ private:
using Errors = std::deque<ErrorInfo>; using Errors = std::deque<ErrorInfo>;
bool readToken(Token& token); bool readToken(Token& token);
bool readTokenSkippingComments(Token& token);
void skipSpaces(); void skipSpaces();
bool match(const Char* pattern, int patternLength); bool match(const Char* pattern, int patternLength);
bool readComment(); bool readComment();
@ -221,7 +222,6 @@ private:
int& column) const; int& column) const;
String getLocationLineAndColumn(Location location) const; String getLocationLineAndColumn(Location location) const;
void addComment(Location begin, Location end, CommentPlacement placement); void addComment(Location begin, Location end, CommentPlacement placement);
void skipCommentTokens(Token& token);
static bool containsNewLine(Location begin, Location end); static bool containsNewLine(Location begin, Location end);
static String normalizeEOL(Location begin, Location end); static String normalizeEOL(Location begin, Location end);

View File

@ -240,11 +240,14 @@ static int parseCommandLine(int argc, const char* argv[], Options* opts) {
return printUsage(argv); return printUsage(argv);
} }
int index = 1; int index = 1;
if (Json::String(argv[index]) == "--json-checker") { if (Json::String(argv[index]) == "--parse-only") {
opts->features = Json::Features::strictMode();
opts->parseOnly = true; opts->parseOnly = true;
++index; ++index;
} }
if (Json::String(argv[index]) == "--strict") {
opts->features = Json::Features::strictMode();
++index;
}
if (Json::String(argv[index]) == "--json-config") { if (Json::String(argv[index]) == "--json-config") {
printConfig(); printConfig();
return 3; return 3;

View File

@ -129,7 +129,7 @@ bool Reader::parse(const char* beginDoc, const char* endDoc, Value& root,
bool successful = readValue(); bool successful = readValue();
Token token; Token token;
skipCommentTokens(token); readTokenSkippingComments(token);
if (collectComments_ && !commentsBefore_.empty()) if (collectComments_ && !commentsBefore_.empty())
root.setComment(commentsBefore_, commentAfter); root.setComment(commentsBefore_, commentAfter);
if (features_.strictRoot_) { if (features_.strictRoot_) {
@ -157,7 +157,7 @@ bool Reader::readValue() {
throwRuntimeError("Exceeded stackLimit in readValue()."); throwRuntimeError("Exceeded stackLimit in readValue().");
Token token; Token token;
skipCommentTokens(token); readTokenSkippingComments(token);
bool successful = true; bool successful = true;
if (collectComments_ && !commentsBefore_.empty()) { if (collectComments_ && !commentsBefore_.empty()) {
@ -225,14 +225,14 @@ bool Reader::readValue() {
return successful; return successful;
} }
void Reader::skipCommentTokens(Token& token) { bool Reader::readTokenSkippingComments(Token& token) {
bool success = readToken(token);
if (features_.allowComments_) { if (features_.allowComments_) {
do { while (success && token.type_ == tokenComment) {
readToken(token); success = readToken(token);
} while (token.type_ == tokenComment); }
} else {
readToken(token);
} }
return success;
} }
bool Reader::readToken(Token& token) { bool Reader::readToken(Token& token) {
@ -446,12 +446,7 @@ bool Reader::readObject(Token& token) {
Value init(objectValue); Value init(objectValue);
currentValue().swapPayload(init); currentValue().swapPayload(init);
currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetStart(token.start_ - begin_);
while (readToken(tokenName)) { while (readTokenSkippingComments(tokenName)) {
bool initialTokenOk = true;
while (tokenName.type_ == tokenComment && initialTokenOk)
initialTokenOk = readToken(tokenName);
if (!initialTokenOk)
break;
if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object if (tokenName.type_ == tokenObjectEnd && name.empty()) // empty object
return true; return true;
name.clear(); name.clear();
@ -480,15 +475,11 @@ bool Reader::readObject(Token& token) {
return recoverFromError(tokenObjectEnd); return recoverFromError(tokenObjectEnd);
Token comma; Token comma;
if (!readToken(comma) || if (!readTokenSkippingComments(comma) ||
(comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator && (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
comma.type_ != tokenComment)) {
return addErrorAndRecover("Missing ',' or '}' in object declaration", return addErrorAndRecover("Missing ',' or '}' in object declaration",
comma, tokenObjectEnd); comma, tokenObjectEnd);
} }
bool finalizeTokenOk = true;
while (comma.type_ == tokenComment && finalizeTokenOk)
finalizeTokenOk = readToken(comma);
if (comma.type_ == tokenObjectEnd) if (comma.type_ == tokenObjectEnd)
return true; return true;
} }
@ -518,10 +509,7 @@ bool Reader::readArray(Token& token) {
Token currentToken; Token currentToken;
// Accept Comment after last item in the array. // Accept Comment after last item in the array.
ok = readToken(currentToken); ok = readTokenSkippingComments(currentToken);
while (currentToken.type_ == tokenComment && ok) {
ok = readToken(currentToken);
}
bool badTokenType = (currentToken.type_ != tokenArraySeparator && bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
currentToken.type_ != tokenArrayEnd); currentToken.type_ != tokenArrayEnd);
if (!ok || badTokenType) { if (!ok || badTokenType) {
@ -943,6 +931,7 @@ private:
using Errors = std::deque<ErrorInfo>; using Errors = std::deque<ErrorInfo>;
bool readToken(Token& token); bool readToken(Token& token);
bool readTokenSkippingComments(Token& token);
void skipSpaces(); void skipSpaces();
void skipBom(bool skipBom); void skipBom(bool skipBom);
bool match(const Char* pattern, int patternLength); bool match(const Char* pattern, int patternLength);
@ -976,7 +965,6 @@ private:
int& column) const; int& column) const;
String getLocationLineAndColumn(Location location) const; String getLocationLineAndColumn(Location location) const;
void addComment(Location begin, Location end, CommentPlacement placement); void addComment(Location begin, Location end, CommentPlacement placement);
void skipCommentTokens(Token& token);
static String normalizeEOL(Location begin, Location end); static String normalizeEOL(Location begin, Location end);
static bool containsNewLine(Location begin, Location end); static bool containsNewLine(Location begin, Location end);
@ -1030,7 +1018,7 @@ bool OurReader::parse(const char* beginDoc, const char* endDoc, Value& root,
bool successful = readValue(); bool successful = readValue();
nodes_.pop(); nodes_.pop();
Token token; Token token;
skipCommentTokens(token); readTokenSkippingComments(token);
if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) { if (features_.failIfExtra_ && (token.type_ != tokenEndOfStream)) {
addError("Extra non-whitespace after JSON value.", token); addError("Extra non-whitespace after JSON value.", token);
return false; return false;
@ -1058,7 +1046,7 @@ bool OurReader::readValue() {
if (nodes_.size() > features_.stackLimit_) if (nodes_.size() > features_.stackLimit_)
throwRuntimeError("Exceeded stackLimit in readValue()."); throwRuntimeError("Exceeded stackLimit in readValue().");
Token token; Token token;
skipCommentTokens(token); readTokenSkippingComments(token);
bool successful = true; bool successful = true;
if (collectComments_ && !commentsBefore_.empty()) { if (collectComments_ && !commentsBefore_.empty()) {
@ -1145,14 +1133,14 @@ bool OurReader::readValue() {
return successful; return successful;
} }
void OurReader::skipCommentTokens(Token& token) { bool OurReader::readTokenSkippingComments(Token& token) {
bool success = readToken(token);
if (features_.allowComments_) { if (features_.allowComments_) {
do { while (success && token.type_ == tokenComment) {
readToken(token); success = readToken(token);
} while (token.type_ == tokenComment); }
} else {
readToken(token);
} }
return success;
} }
bool OurReader::readToken(Token& token) { bool OurReader::readToken(Token& token) {
@ -1449,12 +1437,7 @@ bool OurReader::readObject(Token& token) {
Value init(objectValue); Value init(objectValue);
currentValue().swapPayload(init); currentValue().swapPayload(init);
currentValue().setOffsetStart(token.start_ - begin_); currentValue().setOffsetStart(token.start_ - begin_);
while (readToken(tokenName)) { while (readTokenSkippingComments(tokenName)) {
bool initialTokenOk = true;
while (tokenName.type_ == tokenComment && initialTokenOk)
initialTokenOk = readToken(tokenName);
if (!initialTokenOk)
break;
if (tokenName.type_ == tokenObjectEnd && if (tokenName.type_ == tokenObjectEnd &&
(name.empty() || (name.empty() ||
features_.allowTrailingCommas_)) // empty object or trailing comma features_.allowTrailingCommas_)) // empty object or trailing comma
@ -1491,15 +1474,11 @@ bool OurReader::readObject(Token& token) {
return recoverFromError(tokenObjectEnd); return recoverFromError(tokenObjectEnd);
Token comma; Token comma;
if (!readToken(comma) || if (!readTokenSkippingComments(comma) ||
(comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator && (comma.type_ != tokenObjectEnd && comma.type_ != tokenArraySeparator)) {
comma.type_ != tokenComment)) {
return addErrorAndRecover("Missing ',' or '}' in object declaration", return addErrorAndRecover("Missing ',' or '}' in object declaration",
comma, tokenObjectEnd); comma, tokenObjectEnd);
} }
bool finalizeTokenOk = true;
while (comma.type_ == tokenComment && finalizeTokenOk)
finalizeTokenOk = readToken(comma);
if (comma.type_ == tokenObjectEnd) if (comma.type_ == tokenObjectEnd)
return true; return true;
} }
@ -1533,10 +1512,7 @@ bool OurReader::readArray(Token& token) {
Token currentToken; Token currentToken;
// Accept Comment after last item in the array. // Accept Comment after last item in the array.
ok = readToken(currentToken); ok = readTokenSkippingComments(currentToken);
while (currentToken.type_ == tokenComment && ok) {
ok = readToken(currentToken);
}
bool badTokenType = (currentToken.type_ != tokenArraySeparator && bool badTokenType = (currentToken.type_ != tokenArraySeparator &&
currentToken.type_ != tokenArrayEnd); currentToken.type_ != tokenArrayEnd);
if (!ok || badTokenType) { if (!ok || badTokenType) {

View File

@ -0,0 +1,4 @@
{
"a": "aaa",
"b": "bbb" // comments not allowed in strict mode
}

View File

@ -0,0 +1,4 @@
{
"a": "aaa", // comments not allowed in strict mode
"b": "bbb"
}

View File

@ -0,0 +1,3 @@
{
"array" : [1, 2, 3 /* comments not allowed in strict mode */]
}

View File

@ -0,0 +1 @@
{"one": 1 /* } */ { "two" : 2 }

View File

@ -97,14 +97,17 @@ def runAllTests(jsontest_executable_path, input_dir = None,
valgrind_path = use_valgrind and VALGRIND_CMD or '' valgrind_path = use_valgrind and VALGRIND_CMD or ''
for input_path in tests + test_jsonchecker: for input_path in tests + test_jsonchecker:
expect_failure = os.path.basename(input_path).startswith('fail') expect_failure = os.path.basename(input_path).startswith('fail')
is_json_checker_test = (input_path in test_jsonchecker) or expect_failure is_json_checker_test = input_path in test_jsonchecker
is_parse_only = is_json_checker_test or expect_failure
is_strict_test = ('_strict_' in os.path.basename(input_path)) or is_json_checker_test
print('TESTING:', input_path, end=' ') print('TESTING:', input_path, end=' ')
options = is_json_checker_test and '--json-checker' or '' options = is_parse_only and '--parse-only' or ''
options += is_strict_test and ' --strict' or ''
options += ' --json-writer %s'%writerClass options += ' --json-writer %s'%writerClass
cmd = '%s%s %s "%s"' % ( valgrind_path, jsontest_executable_path, options, cmd = '%s%s %s "%s"' % ( valgrind_path, jsontest_executable_path, options,
input_path) input_path)
status, process_output = getStatusOutput(cmd) status, process_output = getStatusOutput(cmd)
if is_json_checker_test: if is_parse_only:
if expect_failure: if expect_failure:
if not status: if not status:
print('FAILED') print('FAILED')