1
0
mirror of https://github.com/stedolan/jq.git synced 2024-05-11 05:55:39 +00:00
stedolan-jq/tests/onig.test
Emanuele Torre 01dfd8b86d Populate captures also for zero-width matches
Instead of just using {"captures":[]}.

sub functions are use captures for replacement expressions.
If we don't populate, captures for empty matches, the replacement
expression is run with an empty object as input instead of an object
containing the named captures with "" as value:

* before:

  $ jq -n '"123foo456bar" | gsub("[^a-z]*(?<x>[a-z]*)"; "Z\(.x)")'
  "ZfooZbarZnull"

* after:

  $ jq -n '"123foo456bar" | gsub("[^a-z]*(?<x>[a-z]*)"; "Z\(.x)")'
  "ZfooZbarZ"

---

I also removed a redundant

  result = NULL;
  if (result) {
    ...
  }
2023-07-18 12:12:50 -05:00

174 lines
3.8 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# match builtin
[match("( )*"; "g")]
"abc"
[{"offset":0,"length":0,"string":"","captures":[{"offset":0,"string":"","length":0,"name":null}]},{"offset":1,"length":0,"string":"","captures":[{"offset":1,"string":"","length":0,"name":null}]},{"offset":2,"length":0,"string":"","captures":[{"offset":2,"string":"","length":0,"name":null}]},{"offset":3,"length":0,"string":"","captures":[{"offset":3,"string":"","length":0,"name":null}]}]
[match("( )*"; "gn")]
"abc"
[]
[match(""; "g")]
"ab"
[{"offset":0,"length":0,"string":"","captures":[]},{"offset":1,"length":0,"string":"","captures":[]},{"offset":2,"length":0,"string":"","captures":[]}]
[match("a"; "gi")]
"āáàä"
[]
[match(["(bar)"])]
"foo bar"
[{"offset": 4, "length": 3, "string": "bar", "captures":[{"offset": 4, "length": 3, "string": "bar", "name": null}]}]
# offsets account for combining codepoints and multi-byte UTF-8
[match("bar")]
"ā bar with a combining codepoint U+0304"
[{"offset": 3, "length": 3, "string": "bar", "captures":[]}]
# matches with combining codepoints still count them in their length
[match("bār")]
"a bār"
[{"offset": 2, "length": 4, "string": "bār", "captures":[]}]
[match(".+?\\b")]
"ā two-codepoint grapheme"
[{"offset": 0, "length": 2, "string": "ā", "captures":[]}]
[match(["foo (?<bar123>bar)? foo", "ig"])]
"foo bar foo foo foo"
[{"offset": 0, "length": 11, "string": "foo bar foo", "captures":[{"offset": 4, "length": 3, "string": "bar", "name": "bar123"}]},{"offset":12, "length": 8, "string": "foo foo", "captures":[{"offset": -1, "length": 0, "string": null, "name": "bar123"}]}]
#test builtin
[test("( )*"; "gn")]
"abc"
[false]
[test("ā")]
"ā"
[true]
capture("(?<a>[a-z]+)-(?<n>[0-9]+)")
"xyzzy-14"
{"a":"xyzzy","n":"14"}
# jq-coded utilities built on match:
#
# The second element in these tests' inputs tests the case where the
# fromstring matches both the head and tail of the string
[.[] | sub(", "; ":")]
["a,b, c, d, e,f", ", a,b, c, d, e,f, "]
["a,b:c, d, e,f",":a,b, c, d, e,f, "]
sub("^(?<head>.)"; "Head=\(.head) Tail=")
"abcdef"
"Head=a Tail=bcdef"
[.[] | gsub(", "; ":")]
["a,b, c, d, e,f",", a,b, c, d, e,f, "]
["a,b:c:d:e,f",":a,b:c:d:e,f:"]
gsub("(?<d>\\d)"; ":\(.d);")
"a1b2"
"a:1;b:2;"
gsub("a";"b")
"aaaaa"
"bbbbb"
gsub("(.*)"; ""; "x")
""
""
gsub(""; "a"; "g")
""
"a"
gsub("^"; ""; "g")
"a"
"a"
gsub(""; "a"; "g")
"a"
"aaa"
gsub("$"; "a"; "g")
"a"
"aa"
gsub("^"; "a")
""
"a"
gsub("(?=u)"; "u")
"qux"
"quux"
gsub("^.*a"; "b")
"aaa"
"b"
gsub("^.*?a"; "b")
"aaa"
"baa"
# The following is for regression testing and should not be construed as a requirement:
[gsub("a"; "b", "c")]
"a"
["b","c"]
[.[] | scan(", ")]
["a,b, c, d, e,f",", a,b, c, d, e,f, "]
[", ",", ",", ",", ",", ",", ",", ",", "]
[.[]|[[sub(", *";":")], [gsub(", *";":")], [scan(", *")]]]
["a,b, c, d, e,f",", a,b, c, d, e,f, "]
[[["a:b, c, d, e,f"],["a:b:c:d:e:f"],[",",", ",", ",", ",","]],[[":a,b, c, d, e,f, "],[":a:b:c:d:e:f:"],[", ",",",", ",", ",", ",",",", "]]]
[.[]|[[sub(", +";":")], [gsub(", +";":")], [scan(", +")]]]
["a,b, c, d, e,f",", a,b, c, d, e,f, "]
[[["a,b:c, d, e,f"],["a,b:c:d:e,f"],[", ",", ",", "]],[[":a,b, c, d, e,f, "],[":a,b:c:d:e,f:"],[", ",", ",", ",", ",", "]]]
[.[] | scan("b+"; "i")]
["","bBb","abcABBBCabbbc"]
["bBb","b","BBB","bbb"]
# reference to named captures
gsub("(?<x>.)[^a]*"; "+\(.x)-")
"Abcabc"
"+A-+a-"
gsub("(?<x>.)(?<y>[0-9])"; "\(.x|ascii_downcase)\(.y)")
"A1 B2 CD"
"a1 b2 CD"
gsub("\\b(?<x>.)"; "\(.x|ascii_downcase)")
"ABC DEF"
"aBC dEF"
gsub("[^a-z]*(?<x>[a-z]*)"; "Z\(.x)")
"123foo456bar"
"ZfooZbarZ"
# utf-8
sub("(?<x>.)"; "\(.x)!")
""
"!"
[sub("a"; "b", "c")]
"a"
["b","c"]
[sub("(?<a>.)"; "\(.a|ascii_upcase)", "\(.a|ascii_downcase)", "c")]
"aB"
["AB","aB","cB"]
[gsub("(?<a>.)"; "\(.a|ascii_upcase)", "\(.a|ascii_downcase)", "c")]
"aB"
["AB","ab","cc"]
# splits and _nwise
[splits("")]
"ab"
["","a","b",""]