mirror of
https://github.com/stedolan/jq.git
synced 2024-05-11 05:55:39 +00:00
Instead of just using {"captures":[]}. sub functions are use captures for replacement expressions. If we don't populate, captures for empty matches, the replacement expression is run with an empty object as input instead of an object containing the named captures with "" as value: * before: $ jq -n '"123foo456bar" | gsub("[^a-z]*(?<x>[a-z]*)"; "Z\(.x)")' "ZfooZbarZnull" * after: $ jq -n '"123foo456bar" | gsub("[^a-z]*(?<x>[a-z]*)"; "Z\(.x)")' "ZfooZbarZ" --- I also removed a redundant result = NULL; if (result) { ... }
174 lines
3.8 KiB
Plaintext
174 lines
3.8 KiB
Plaintext
# match builtin
|
||
[match("( )*"; "g")]
|
||
"abc"
|
||
[{"offset":0,"length":0,"string":"","captures":[{"offset":0,"string":"","length":0,"name":null}]},{"offset":1,"length":0,"string":"","captures":[{"offset":1,"string":"","length":0,"name":null}]},{"offset":2,"length":0,"string":"","captures":[{"offset":2,"string":"","length":0,"name":null}]},{"offset":3,"length":0,"string":"","captures":[{"offset":3,"string":"","length":0,"name":null}]}]
|
||
|
||
[match("( )*"; "gn")]
|
||
"abc"
|
||
[]
|
||
|
||
[match(""; "g")]
|
||
"ab"
|
||
[{"offset":0,"length":0,"string":"","captures":[]},{"offset":1,"length":0,"string":"","captures":[]},{"offset":2,"length":0,"string":"","captures":[]}]
|
||
|
||
[match("a"; "gi")]
|
||
"āáàä"
|
||
[]
|
||
|
||
[match(["(bar)"])]
|
||
"foo bar"
|
||
[{"offset": 4, "length": 3, "string": "bar", "captures":[{"offset": 4, "length": 3, "string": "bar", "name": null}]}]
|
||
|
||
# offsets account for combining codepoints and multi-byte UTF-8
|
||
[match("bar")]
|
||
"ā bar with a combining codepoint U+0304"
|
||
[{"offset": 3, "length": 3, "string": "bar", "captures":[]}]
|
||
|
||
# matches with combining codepoints still count them in their length
|
||
[match("bār")]
|
||
"a bār"
|
||
[{"offset": 2, "length": 4, "string": "bār", "captures":[]}]
|
||
|
||
[match(".+?\\b")]
|
||
"ā two-codepoint grapheme"
|
||
[{"offset": 0, "length": 2, "string": "ā", "captures":[]}]
|
||
|
||
[match(["foo (?<bar123>bar)? foo", "ig"])]
|
||
"foo bar foo foo foo"
|
||
[{"offset": 0, "length": 11, "string": "foo bar foo", "captures":[{"offset": 4, "length": 3, "string": "bar", "name": "bar123"}]},{"offset":12, "length": 8, "string": "foo foo", "captures":[{"offset": -1, "length": 0, "string": null, "name": "bar123"}]}]
|
||
|
||
#test builtin
|
||
[test("( )*"; "gn")]
|
||
"abc"
|
||
[false]
|
||
|
||
[test("ā")]
|
||
"ā"
|
||
[true]
|
||
|
||
capture("(?<a>[a-z]+)-(?<n>[0-9]+)")
|
||
"xyzzy-14"
|
||
{"a":"xyzzy","n":"14"}
|
||
|
||
|
||
# jq-coded utilities built on match:
|
||
#
|
||
# The second element in these tests' inputs tests the case where the
|
||
# fromstring matches both the head and tail of the string
|
||
[.[] | sub(", "; ":")]
|
||
["a,b, c, d, e,f", ", a,b, c, d, e,f, "]
|
||
["a,b:c, d, e,f",":a,b, c, d, e,f, "]
|
||
|
||
sub("^(?<head>.)"; "Head=\(.head) Tail=")
|
||
"abcdef"
|
||
"Head=a Tail=bcdef"
|
||
|
||
[.[] | gsub(", "; ":")]
|
||
["a,b, c, d, e,f",", a,b, c, d, e,f, "]
|
||
["a,b:c:d:e,f",":a,b:c:d:e,f:"]
|
||
|
||
gsub("(?<d>\\d)"; ":\(.d);")
|
||
"a1b2"
|
||
"a:1;b:2;"
|
||
|
||
gsub("a";"b")
|
||
"aaaaa"
|
||
"bbbbb"
|
||
|
||
gsub("(.*)"; ""; "x")
|
||
""
|
||
""
|
||
|
||
gsub(""; "a"; "g")
|
||
""
|
||
"a"
|
||
|
||
gsub("^"; ""; "g")
|
||
"a"
|
||
"a"
|
||
|
||
gsub(""; "a"; "g")
|
||
"a"
|
||
"aaa"
|
||
|
||
gsub("$"; "a"; "g")
|
||
"a"
|
||
"aa"
|
||
|
||
gsub("^"; "a")
|
||
""
|
||
"a"
|
||
|
||
gsub("(?=u)"; "u")
|
||
"qux"
|
||
"quux"
|
||
|
||
gsub("^.*a"; "b")
|
||
"aaa"
|
||
"b"
|
||
|
||
gsub("^.*?a"; "b")
|
||
"aaa"
|
||
"baa"
|
||
|
||
# The following is for regression testing and should not be construed as a requirement:
|
||
[gsub("a"; "b", "c")]
|
||
"a"
|
||
["b","c"]
|
||
|
||
[.[] | scan(", ")]
|
||
["a,b, c, d, e,f",", a,b, c, d, e,f, "]
|
||
[", ",", ",", ",", ",", ",", ",", ",", "]
|
||
|
||
[.[]|[[sub(", *";":")], [gsub(", *";":")], [scan(", *")]]]
|
||
["a,b, c, d, e,f",", a,b, c, d, e,f, "]
|
||
[[["a:b, c, d, e,f"],["a:b:c:d:e:f"],[",",", ",", ",", ",","]],[[":a,b, c, d, e,f, "],[":a:b:c:d:e:f:"],[", ",",",", ",", ",", ",",",", "]]]
|
||
|
||
[.[]|[[sub(", +";":")], [gsub(", +";":")], [scan(", +")]]]
|
||
["a,b, c, d, e,f",", a,b, c, d, e,f, "]
|
||
[[["a,b:c, d, e,f"],["a,b:c:d:e,f"],[", ",", ",", "]],[[":a,b, c, d, e,f, "],[":a,b:c:d:e,f:"],[", ",", ",", ",", ",", "]]]
|
||
|
||
[.[] | scan("b+"; "i")]
|
||
["","bBb","abcABBBCabbbc"]
|
||
["bBb","b","BBB","bbb"]
|
||
|
||
# reference to named captures
|
||
gsub("(?<x>.)[^a]*"; "+\(.x)-")
|
||
"Abcabc"
|
||
"+A-+a-"
|
||
|
||
gsub("(?<x>.)(?<y>[0-9])"; "\(.x|ascii_downcase)\(.y)")
|
||
"A1 B2 CD"
|
||
"a1 b2 CD"
|
||
|
||
gsub("\\b(?<x>.)"; "\(.x|ascii_downcase)")
|
||
"ABC DEF"
|
||
"aBC dEF"
|
||
|
||
gsub("[^a-z]*(?<x>[a-z]*)"; "Z\(.x)")
|
||
"123foo456bar"
|
||
"ZfooZbarZ"
|
||
|
||
# utf-8
|
||
sub("(?<x>.)"; "\(.x)!")
|
||
"’"
|
||
"’!"
|
||
|
||
[sub("a"; "b", "c")]
|
||
"a"
|
||
["b","c"]
|
||
|
||
[sub("(?<a>.)"; "\(.a|ascii_upcase)", "\(.a|ascii_downcase)", "c")]
|
||
"aB"
|
||
["AB","aB","cB"]
|
||
|
||
[gsub("(?<a>.)"; "\(.a|ascii_upcase)", "\(.a|ascii_downcase)", "c")]
|
||
"aB"
|
||
["AB","ab","cc"]
|
||
|
||
# splits and _nwise
|
||
[splits("")]
|
||
"ab"
|
||
["","a","b",""]
|
||
|