Skip to content

Commit 98f263e

Browse files
authored
Support validating multi-doc YAML files (#577)
Fixes: #553 Fixes: #556 Signed-off-by: Juan Cruz Viotti <[email protected]>
1 parent b7c1821 commit 98f263e

10 files changed

+379
-3
lines changed

src/command_validate.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,9 @@ auto sourcemeta::jsonschema::validate(const sourcemeta::core::Options &options)
288288
"given a single instance"};
289289
}
290290
if (std::filesystem::is_directory(instance_path) ||
291-
instance_path.extension() == ".jsonl") {
291+
instance_path.extension() == ".jsonl" ||
292+
instance_path.extension() == ".yaml" ||
293+
instance_path.extension() == ".yml") {
292294
for (const auto &entry : for_each_json({instance_path_view}, options)) {
293295
std::ostringstream error;
294296
sourcemeta::blaze::SimpleOutput output{entry.second};

src/input.h

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,14 +153,59 @@ handle_json_entry(const std::filesystem::path &entry_path,
153153
if (index == 0) {
154154
LOG_WARNING() << "The JSONL file is empty\n";
155155
}
156+
} else if (canonical.extension() == ".yaml" ||
157+
canonical.extension() == ".yml") {
158+
if (std::filesystem::is_empty(canonical)) {
159+
return;
160+
}
161+
auto stream{sourcemeta::core::read_file(canonical)};
162+
std::vector<std::pair<sourcemeta::core::JSON,
163+
sourcemeta::core::PointerPositionTracker>>
164+
documents;
165+
std::uint64_t line_offset{0};
166+
std::uint64_t max_line{0};
167+
while (stream.peek() != std::char_traits<char>::eof()) {
168+
sourcemeta::core::PointerPositionTracker positions;
169+
const std::uint64_t current_offset{line_offset};
170+
max_line = 0;
171+
auto callback = [&positions, current_offset, &max_line](
172+
const sourcemeta::core::JSON::ParsePhase phase,
173+
const sourcemeta::core::JSON::Type type,
174+
const std::uint64_t line,
175+
const std::uint64_t column,
176+
const sourcemeta::core::JSON &value) {
177+
max_line = std::max(max_line, line);
178+
positions(phase, type, line + current_offset, column, value);
179+
};
180+
documents.emplace_back(sourcemeta::core::parse_yaml(stream, callback),
181+
std::move(positions));
182+
// The YAML parser reports the line of the next document separator,
183+
// so we subtract 1 to get the actual lines consumed by this document
184+
line_offset += max_line > 0 ? max_line - 1 : 0;
185+
}
186+
187+
if (documents.size() > 1) {
188+
LOG_VERBOSE(options) << "Interpreting input as YAML multi-document: "
189+
<< canonical.string() << "\n";
190+
std::size_t index{0};
191+
for (auto &entry : documents) {
192+
result.push_back({canonical, std::move(entry.first),
193+
std::move(entry.second), index, true});
194+
index += 1;
195+
}
196+
} else if (documents.size() == 1) {
197+
result.push_back({std::move(canonical),
198+
std::move(documents.front().first),
199+
std::move(documents.front().second)});
200+
}
156201
} else {
157202
if (std::filesystem::is_empty(canonical)) {
158203
return;
159204
}
160205
sourcemeta::core::PointerPositionTracker positions;
161206
// TODO: Print a verbose message for what is getting parsed
162-
auto contents{sourcemeta::core::read_yaml_or_json(canonical,
163-
std::ref(positions))};
207+
auto contents{
208+
sourcemeta::core::read_json(canonical, std::ref(positions))};
164209
result.push_back(
165210
{std::move(canonical), std::move(contents), std::move(positions)});
166211
}

test/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,13 @@ add_jsonschema_test_unix(validate/pass_many_verbose)
141141
add_jsonschema_test_unix(validate/fail_many)
142142
add_jsonschema_test_unix(validate/fail_many_verbose)
143143
add_jsonschema_test_unix(validate/fail_yaml)
144+
add_jsonschema_test_unix(validate/pass_yaml_multi)
145+
add_jsonschema_test_unix(validate/pass_yaml_multi_verbose)
146+
add_jsonschema_test_unix(validate/pass_yaml_multi_json)
147+
add_jsonschema_test_unix(validate/fail_yaml_multi_one)
148+
add_jsonschema_test_unix(validate/fail_yaml_multi_one_verbose)
149+
add_jsonschema_test_unix(validate/fail_yaml_multi_one_json)
150+
add_jsonschema_test_unix(validate/fail_yaml_multi_blank_lines)
144151
add_jsonschema_test_unix(validate/pass_json_ref_yaml)
145152
add_jsonschema_test_unix(validate/pass_process_substitution)
146153
add_jsonschema_test_unix(validate/pass_2020_12_fast_with_template)
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#!/bin/sh
2+
3+
set -o errexit
4+
set -o nounset
5+
6+
TMP="$(mktemp -d)"
7+
clean() { rm -rf "$TMP"; }
8+
trap clean EXIT
9+
10+
cat << 'EOF' > "$TMP/schema.json"
11+
{
12+
"$schema": "http://json-schema.org/draft-04/schema#",
13+
"type": "object"
14+
}
15+
EOF
16+
17+
# YAML multi-document with blank lines around separators
18+
cat << 'EOF' > "$TMP/instance.yaml"
19+
---
20+
foo: first
21+
22+
---
23+
24+
- bar: second
25+
---
26+
baz: third
27+
EOF
28+
29+
"$1" validate "$TMP/schema.json" "$TMP/instance.yaml" --verbose 2>"$TMP/stderr.txt" \
30+
&& EXIT_CODE="$?" || EXIT_CODE="$?"
31+
test "$EXIT_CODE" = "2" || exit 1
32+
33+
cat << EOF > "$TMP/expected.txt"
34+
Interpreting input as YAML multi-document: $(realpath "$TMP")/instance.yaml
35+
ok: $(realpath "$TMP")/instance.yaml (entry #1)
36+
matches $(realpath "$TMP")/schema.json
37+
fail: $(realpath "$TMP")/instance.yaml (entry #2)
38+
39+
[
40+
{
41+
"bar": "second"
42+
}
43+
]
44+
45+
error: Schema validation failure
46+
The value was expected to be of type object but it was of type array
47+
at instance location "" (line 6, column 1)
48+
at evaluate path "/type"
49+
EOF
50+
51+
diff "$TMP/stderr.txt" "$TMP/expected.txt"
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#!/bin/sh
2+
3+
set -o errexit
4+
set -o nounset
5+
6+
TMP="$(mktemp -d)"
7+
clean() { rm -rf "$TMP"; }
8+
trap clean EXIT
9+
10+
cat << 'EOF' > "$TMP/schema.json"
11+
{
12+
"$schema": "http://json-schema.org/draft-04/schema#",
13+
"type": "object"
14+
}
15+
EOF
16+
17+
cat << 'EOF' > "$TMP/instance.yaml"
18+
---
19+
foo: 1
20+
---
21+
- foo: 2
22+
---
23+
foo: 3
24+
EOF
25+
26+
"$1" validate "$TMP/schema.json" "$TMP/instance.yaml" 2>"$TMP/stderr.txt" \
27+
&& EXIT_CODE="$?" || EXIT_CODE="$?"
28+
test "$EXIT_CODE" = "2" || exit 1
29+
30+
cat << EOF > "$TMP/expected.txt"
31+
fail: $(realpath "$TMP")/instance.yaml (entry #2)
32+
33+
[
34+
{
35+
"foo": 2
36+
}
37+
]
38+
39+
error: Schema validation failure
40+
The value was expected to be of type object but it was of type array
41+
at instance location "" (line 4, column 1)
42+
at evaluate path "/type"
43+
EOF
44+
45+
diff "$TMP/stderr.txt" "$TMP/expected.txt"
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#!/bin/sh
2+
3+
set -o errexit
4+
set -o nounset
5+
6+
TMP="$(mktemp -d)"
7+
clean() { rm -rf "$TMP"; }
8+
trap clean EXIT
9+
10+
cat << 'EOF' > "$TMP/schema.json"
11+
{
12+
"$schema": "http://json-schema.org/draft-04/schema#",
13+
"type": "object"
14+
}
15+
EOF
16+
17+
cat << 'EOF' > "$TMP/instance.yaml"
18+
---
19+
foo: 1
20+
---
21+
- foo: 2
22+
---
23+
foo: 3
24+
EOF
25+
26+
"$1" validate "$TMP/schema.json" "$TMP/instance.yaml" --json > "$TMP/stdout.txt" \
27+
&& EXIT_CODE="$?" || EXIT_CODE="$?"
28+
test "$EXIT_CODE" = "2" || exit 1
29+
30+
cat << EOF > "$TMP/expected.txt"
31+
{
32+
"valid": true
33+
}
34+
{
35+
"valid": false,
36+
"errors": [
37+
{
38+
"keywordLocation": "/type",
39+
"absoluteKeywordLocation": "file://$(realpath "$TMP")/schema.json#/type",
40+
"instanceLocation": "",
41+
"instancePosition": [ 4, 1, 5, 0 ],
42+
"error": "The value was expected to be of type object but it was of type array"
43+
}
44+
]
45+
}
46+
EOF
47+
48+
diff "$TMP/stdout.txt" "$TMP/expected.txt"
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#!/bin/sh
2+
3+
set -o errexit
4+
set -o nounset
5+
6+
TMP="$(mktemp -d)"
7+
clean() { rm -rf "$TMP"; }
8+
trap clean EXIT
9+
10+
cat << 'EOF' > "$TMP/schema.json"
11+
{
12+
"$schema": "http://json-schema.org/draft-04/schema#",
13+
"type": "object"
14+
}
15+
EOF
16+
17+
cat << 'EOF' > "$TMP/instance.yaml"
18+
---
19+
foo: 1
20+
---
21+
- foo: 2
22+
---
23+
foo: 3
24+
EOF
25+
26+
"$1" validate "$TMP/schema.json" "$TMP/instance.yaml" --verbose 2>"$TMP/stderr.txt" \
27+
&& EXIT_CODE="$?" || EXIT_CODE="$?"
28+
test "$EXIT_CODE" = "2" || exit 1
29+
30+
cat << EOF > "$TMP/expected.txt"
31+
Interpreting input as YAML multi-document: $(realpath "$TMP")/instance.yaml
32+
ok: $(realpath "$TMP")/instance.yaml (entry #1)
33+
matches $(realpath "$TMP")/schema.json
34+
fail: $(realpath "$TMP")/instance.yaml (entry #2)
35+
36+
[
37+
{
38+
"foo": 2
39+
}
40+
]
41+
42+
error: Schema validation failure
43+
The value was expected to be of type object but it was of type array
44+
at instance location "" (line 4, column 1)
45+
at evaluate path "/type"
46+
EOF
47+
48+
diff "$TMP/stderr.txt" "$TMP/expected.txt"

test/validate/pass_yaml_multi.sh

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#!/bin/sh
2+
3+
set -o errexit
4+
set -o nounset
5+
6+
TMP="$(mktemp -d)"
7+
clean() { rm -rf "$TMP"; }
8+
trap clean EXIT
9+
10+
cat << 'EOF' > "$TMP/schema.json"
11+
{
12+
"$schema": "http://json-schema.org/draft-04/schema#",
13+
"properties": {
14+
"foo": {
15+
"type": "string"
16+
}
17+
}
18+
}
19+
EOF
20+
21+
cat << 'EOF' > "$TMP/instance.yaml"
22+
---
23+
foo: first
24+
---
25+
foo: second
26+
---
27+
foo: third
28+
EOF
29+
30+
"$1" validate "$TMP/schema.json" "$TMP/instance.yaml" 2> "$TMP/output.txt" 1>&2
31+
32+
cat << EOF > "$TMP/expected.txt"
33+
EOF
34+
35+
diff "$TMP/output.txt" "$TMP/expected.txt"
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/bin/sh
2+
3+
set -o errexit
4+
set -o nounset
5+
6+
TMP="$(mktemp -d)"
7+
clean() { rm -rf "$TMP"; }
8+
trap clean EXIT
9+
10+
cat << 'EOF' > "$TMP/schema.json"
11+
{
12+
"$schema": "http://json-schema.org/draft-04/schema#",
13+
"properties": {
14+
"foo": {
15+
"type": "string"
16+
}
17+
}
18+
}
19+
EOF
20+
21+
cat << 'EOF' > "$TMP/instance.yaml"
22+
---
23+
foo: first
24+
---
25+
foo: second
26+
---
27+
foo: third
28+
EOF
29+
30+
"$1" validate "$TMP/schema.json" "$TMP/instance.yaml" --json > "$TMP/output.json" 2>&1
31+
32+
cat << EOF > "$TMP/expected.json"
33+
{
34+
"valid": true
35+
}
36+
{
37+
"valid": true
38+
}
39+
{
40+
"valid": true
41+
}
42+
EOF
43+
44+
diff "$TMP/output.json" "$TMP/expected.json"

0 commit comments

Comments
 (0)