1
0
mirror of https://github.com/stedolan/jq.git synced 2024-05-11 05:55:39 +00:00

utf8bytelength: count UTF8 string bytelength

[Builtin name changed, and it only works on string inputs. -Nico]
This commit is contained in:
Joel Nothman
2015-09-08 12:51:42 +10:00
committed by Nicolas Williams
parent 63dd033da7
commit 83e8ec587f
3 changed files with 28 additions and 0 deletions

View File

@@ -662,6 +662,18 @@ sections:
input: '[[1,2], "string", {"a":2}, null]'
output: [2, 6, 1, 0]
- title: "`utf8bytelength`"
body: |
The builtin function `utf8bytelength` outputs the number of
bytes used to encode a string in UTF-8.
examples:
- program: 'utf8bytelength'
input: '"\u03bc"'
output: [2]
- title: "`keys`, `keys_unsorted`"
body: |

View File

@@ -367,6 +367,12 @@ static jv f_tostring(jq_state *jq, jv input) {
}
}
static jv f_utf8bytelength(jq_state *jq, jv input) {
if (jv_get_kind(input) != JV_KIND_STRING)
return type_error(input, "only strings have UTF-8 byte length");
return jv_number(jv_string_length_bytes(input));
}
#define CHARS_ALPHANUM "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
static jv escape_string(jv input, const char* escapings) {
@@ -1273,6 +1279,7 @@ static const struct cfunction function_list[] = {
{(cfunction_ptr)f_greatereq, "_greatereq", 3},
{(cfunction_ptr)f_contains, "contains", 2},
{(cfunction_ptr)f_length, "length", 1},
{(cfunction_ptr)f_utf8bytelength, "utf8bytelength", 1},
{(cfunction_ptr)f_type, "type", 1},
{(cfunction_ptr)f_isinfinite, "isinfinite", 1},
{(cfunction_ptr)f_isnan, "isnan", 1},

View File

@@ -525,6 +525,15 @@ null
[[], {}, [1,2], {"a":42}, "asdf", "\u03bc"]
[0, 0, 2, 1, 4, 1]
utf8bytelength
"asdf\u03bc"
6
[.[] | try utf8bytelength catch .]
[[], {}, [1,2], 55, true, false]
["array ([]) only strings have UTF-8 byte length","object ({}) only strings have UTF-8 byte length","array ([1,2]) only strings have UTF-8 byte length","number (55) only strings have UTF-8 byte length","boolean (true) only strings have UTF-8 byte length","boolean (false) only strings have UTF-8 byte length"]
map(keys)
[{}, {"abcd":1,"abc":2,"abcde":3}, {"x":1, "z": 3, "y":2}]
[[], ["abc","abcd","abcde"], ["x","y","z"]]