stedolan-jq/tests/onig.test

# match builtin
[match("( )*"; "g")]
"abc"
[{"offset":0,"length":0,"string":"","captures":[{"offset":0,"string":"","length":0,"name":null}]},{"offset":1,"length":0,"string":"","captures":[{"offset":1,"string":"","length":0,"name":null}]},{"offset":2,"length":0,"string":"","captures":[{"offset":2,"string":"","length":0,"name":null}]},{"offset":3,"length":0,"string":"","captures":[{"offset":3,"string":"","length":0,"name":null}]}]

[match("( )*"; "gn")]
"abc"
[]

[match(""; "g")]
"ab"
[{"offset":0,"length":0,"string":"","captures":[]},{"offset":1,"length":0,"string":"","captures":[]},{"offset":2,"length":0,"string":"","captures":[]}]

[match("a"; "gi")]
"āáàä"
[]

[match(["(bar)"])]
"foo bar"
[{"offset": 4, "length": 3, "string": "bar", "captures":[{"offset": 4, "length": 3, "string": "bar", "name": null}]}]

# offsets account for combining codepoints and multi-byte UTF-8
[match("bar")]
"ā bar with a combining codepoint U+0304"
[{"offset": 3, "length": 3, "string": "bar", "captures":[]}]

# matches with combining codepoints still count them in their length
[match("bār")]
"a bār"
[{"offset": 2, "length": 4, "string": "bār", "captures":[]}]

[match(".+?\\b")]
"ā two-codepoint grapheme"
[{"offset": 0, "length": 2, "string": "ā", "captures":[]}]

[match(["foo (?<bar123>bar)? foo", "ig"])]
"foo bar foo foo  foo"
[{"offset": 0, "length": 11, "string": "foo bar foo", "captures":[{"offset": 4, "length": 3, "string": "bar", "name": "bar123"}]},{"offset":12, "length": 8, "string": "foo  foo", "captures":[{"offset": -1, "length": 0, "string": null, "name": "bar123"}]}]

#test builtin
[test("( )*"; "gn")]
"abc"
[false]

[test("ā")]
"ā"
[true]

capture("(?<a>[a-z]+)-(?<n>[0-9]+)")
"xyzzy-14"
{"a":"xyzzy","n":"14"}


# jq-coded utilities built on match:
#
# The second element in these tests' inputs tests the case where the
# fromstring matches both the head and tail of the string
[.[] | sub(", "; ":")]
["a,b, c, d, e,f", ", a,b, c, d, e,f, "]
["a,b:c, d, e,f",":a,b, c, d, e,f, "]

sub("^(?<head>.)"; "Head=\(.head) Tail=")
"abcdef"
"Head=a Tail=bcdef"

[.[] | gsub(", "; ":")]
["a,b, c, d, e,f",", a,b, c, d, e,f, "]
["a,b:c:d:e,f",":a,b:c:d:e,f:"]

gsub("(?<d>\\d)"; ":\(.d);")
"a1b2"
"a:1;b:2;"

gsub("a";"b")
"aaaaa"
"bbbbb"

gsub("(.*)"; ""; "x")
""
""

gsub(""; "a"; "g")
""
"a"

gsub("^"; ""; "g")
"a"
"a"

gsub(""; "a"; "g")
"a"
"aaa"

gsub("$"; "a"; "g")
"a"
"aa"

gsub("^"; "a")
""
"a"

gsub("(?=u)"; "u")
"qux"
"quux"

gsub("^.*a"; "b")
"aaa"
"b"

gsub("^.*?a"; "b")
"aaa"
"baa"

# The following is for regression testing and should not be construed as a requirement:
[gsub("a"; "b", "c")]
"a"
["b","c"]

[.[] | scan(", ")]
["a,b, c, d, e,f",", a,b, c, d, e,f, "]
[", ",", ",", ",", ",", ",", ",", ",", "]

[.[]|[[sub(", *";":")], [gsub(", *";":")], [scan(", *")]]]
["a,b, c, d, e,f",", a,b, c, d, e,f, "]
[[["a:b, c, d, e,f"],["a:b:c:d:e:f"],[",",", ",", ",", ",","]],[[":a,b, c, d, e,f, "],[":a:b:c:d:e:f:"],[", ",",",", ",", ",", ",",",", "]]]

[.[]|[[sub(", +";":")], [gsub(", +";":")], [scan(", +")]]]
["a,b, c, d, e,f",", a,b, c, d, e,f, "]
[[["a,b:c, d, e,f"],["a,b:c:d:e,f"],[", ",", ",", "]],[[":a,b, c, d, e,f, "],[":a,b:c:d:e,f:"],[", ",", ",", ",", ",", "]]]

[.[] | scan("b+"; "i")]
["","bBb","abcABBBCabbbc"]
["bBb","b","BBB","bbb"]

# reference to named captures
gsub("(?<x>.)[^a]*"; "+\(.x)-")
"Abcabc"
"+A-+a-"

gsub("(?<x>.)(?<y>[0-9])"; "\(.x|ascii_downcase)\(.y)")
"A1 B2 CD"
"a1 b2 CD"

gsub("\\b(?<x>.)"; "\(.x|ascii_downcase)")
"ABC DEF"
"aBC dEF"

gsub("[^a-z]*(?<x>[a-z]*)"; "Z\(.x)")
"123foo456bar"
"ZfooZbarZ"

# utf-8
sub("(?<x>.)"; "\(.x)!")
"’"
"’!"

[sub("a"; "b", "c")]
"a"
["b","c"]

[sub("(?<a>.)"; "\(.a|ascii_upcase)", "\(.a|ascii_downcase)", "c")]
"aB"
["AB","aB","cB"]

[gsub("(?<a>.)"; "\(.a|ascii_upcase)", "\(.a|ascii_downcase)", "c")]
"aB"
["AB","ab","cc"]

# splits and _nwise
[splits("")]
"ab"
["","a","b",""]