mirror of
https://github.com/stedolan/jq.git
synced 2024-05-11 05:55:39 +00:00
'length' function now measures string length in codepoints, not bytes.
This commit is contained in:
@@ -187,7 +187,7 @@ static jv f_length(jv input) {
|
||||
} else if (jv_get_kind(input) == JV_KIND_OBJECT) {
|
||||
return jv_number(jv_object_length(input));
|
||||
} else if (jv_get_kind(input) == JV_KIND_STRING) {
|
||||
return jv_number(jv_string_length(input));
|
||||
return jv_number(jv_string_length_codepoints(input));
|
||||
} else if (jv_get_kind(input) == JV_KIND_NULL) {
|
||||
jv_free(input);
|
||||
return jv_number(0);
|
||||
@@ -220,7 +220,7 @@ static jv escape_string(jv input, const char* escapings) {
|
||||
|
||||
jv ret = jv_string("");
|
||||
const char* i = jv_string_value(input);
|
||||
const char* end = i + jv_string_length(jv_copy(input));
|
||||
const char* end = i + jv_string_length_bytes(jv_copy(input));
|
||||
const char* cstart;
|
||||
int c = 0;
|
||||
while ((i = jvp_utf8_next((cstart = i), end, &c))) {
|
||||
@@ -299,7 +299,7 @@ static jv f_format(jv input, jv fmt) {
|
||||
|
||||
jv line = jv_string("");
|
||||
const char* s = jv_string_value(input);
|
||||
for (int i=0; i<jv_string_length(jv_copy(input)); i++) {
|
||||
for (int i=0; i<jv_string_length_bytes(jv_copy(input)); i++) {
|
||||
unsigned ch = (unsigned)(unsigned char)*s;
|
||||
if (ch < 128 && unreserved[ch]) {
|
||||
line = jv_string_append_buf(line, s, 1);
|
||||
@@ -346,7 +346,7 @@ static jv f_format(jv input, jv fmt) {
|
||||
jv line = jv_string("");
|
||||
const char b64[64 + 1] = CHARS_ALPHANUM "+/";
|
||||
const char* data = jv_string_value(input);
|
||||
int len = jv_string_length(jv_copy(input));
|
||||
int len = jv_string_length_bytes(jv_copy(input));
|
||||
for (int i=0; i<len; i+=3) {
|
||||
uint32_t code = 0;
|
||||
int n = len - i >= 3 ? 3 : len-i;
|
||||
|
@@ -63,7 +63,7 @@ static void run_jq_tests(FILE *testdata) {
|
||||
pass = 0;
|
||||
}
|
||||
jv as_string = jv_dump_string(jv_copy(expected), rand() & ~JV_PRINT_COLOUR);
|
||||
jv reparsed = jv_parse_sized(jv_string_value(as_string), jv_string_length(jv_copy(as_string)));
|
||||
jv reparsed = jv_parse_sized(jv_string_value(as_string), jv_string_length_bytes(jv_copy(as_string)));
|
||||
assert(jv_equal(jv_copy(expected), jv_copy(reparsed)));
|
||||
jv_free(as_string);
|
||||
jv_free(reparsed);
|
||||
@@ -191,8 +191,8 @@ static void jv_test() {
|
||||
assert(jv_equal(jv_string("foo"), jv_string_sized("foo", 3)));
|
||||
char nasty[] = "foo\0";
|
||||
jv shortstr = jv_string(nasty), longstr = jv_string_sized(nasty, sizeof(nasty));
|
||||
assert(jv_string_length(shortstr) == (int)strlen(nasty));
|
||||
assert(jv_string_length(longstr) == (int)sizeof(nasty));
|
||||
assert(jv_string_length_bytes(shortstr) == (int)strlen(nasty));
|
||||
assert(jv_string_length_bytes(longstr) == (int)sizeof(nasty));
|
||||
|
||||
|
||||
char a1s[] = "hello", a2s[] = "hello", bs[] = "goodbye";
|
||||
@@ -213,7 +213,7 @@ static void jv_test() {
|
||||
for (int i=0; i<(int)sizeof(big); i++) big[i] = 'a';
|
||||
big[sizeof(big)-1] = 0;
|
||||
jv str = jv_string_fmt("%s", big);
|
||||
assert(jv_string_length(jv_copy(str)) == sizeof(big) - 1);
|
||||
assert(jv_string_length_bytes(jv_copy(str)) == sizeof(big) - 1);
|
||||
assert(!strcmp(big, jv_string_value(str)));
|
||||
jv_free(str);
|
||||
}
|
||||
|
13
jv.c
13
jv.c
@@ -8,6 +8,7 @@
|
||||
|
||||
#include "jv_alloc.h"
|
||||
#include "jv.h"
|
||||
#include "jv_unicode.h"
|
||||
|
||||
/*
|
||||
* Internal refcounting helpers
|
||||
@@ -530,13 +531,23 @@ jv jv_string(const char* str) {
|
||||
return jv_string_sized(str, strlen(str));
|
||||
}
|
||||
|
||||
int jv_string_length(jv j) {
|
||||
int jv_string_length_bytes(jv j) {
|
||||
assert(jv_get_kind(j) == JV_KIND_STRING);
|
||||
int r = jvp_string_length(jvp_string_ptr(&j.val.nontrivial));
|
||||
jv_free(j);
|
||||
return r;
|
||||
}
|
||||
|
||||
int jv_string_length_codepoints(jv j) {
|
||||
assert(jv_get_kind(j) == JV_KIND_STRING);
|
||||
const char* i = jv_string_value(j);
|
||||
const char* end = i + jv_string_length_bytes(jv_copy(j));
|
||||
int c = 0, len = 0;
|
||||
while ((i = jvp_utf8_next(i, end, &c))) len++;
|
||||
jv_free(j);
|
||||
return len;
|
||||
}
|
||||
|
||||
uint32_t jv_string_hash(jv j) {
|
||||
assert(jv_get_kind(j) == JV_KIND_STRING);
|
||||
uint32_t hash = jvp_string_hash(jvp_string_ptr(&j.val.nontrivial));
|
||||
|
3
jv.h
3
jv.h
@@ -82,7 +82,8 @@ jv jv_array_slice(jv, int, int);
|
||||
|
||||
jv jv_string(const char*);
|
||||
jv jv_string_sized(const char*, int);
|
||||
int jv_string_length(jv);
|
||||
int jv_string_length_bytes(jv);
|
||||
int jv_string_length_codepoints(jv);
|
||||
uint32_t jv_string_hash(jv);
|
||||
const char* jv_string_value(jv);
|
||||
jv jv_string_concat(jv, jv);
|
||||
|
4
jv_aux.c
4
jv_aux.c
@@ -380,8 +380,8 @@ jv jv_delpaths(jv object, jv paths) {
|
||||
static int string_cmp(const void* pa, const void* pb){
|
||||
const jv* a = pa;
|
||||
const jv* b = pb;
|
||||
int lena = jv_string_length(jv_copy(*a));
|
||||
int lenb = jv_string_length(jv_copy(*b));
|
||||
int lena = jv_string_length_bytes(jv_copy(*a));
|
||||
int lenb = jv_string_length_bytes(jv_copy(*b));
|
||||
int minlen = lena < lenb ? lena : lenb;
|
||||
int r = memcmp(jv_string_value(*a), jv_string_value(*b), minlen);
|
||||
if (r == 0) r = lena - lenb;
|
||||
|
@@ -45,7 +45,7 @@ static void put_space(int n, FILE* fout, jv* strout) {
|
||||
static void jvp_dump_string(jv str, int ascii_only, FILE* F, jv* S) {
|
||||
assert(jv_get_kind(str) == JV_KIND_STRING);
|
||||
const char* i = jv_string_value(str);
|
||||
const char* end = i + jv_string_length(jv_copy(str));
|
||||
const char* end = i + jv_string_length_bytes(jv_copy(str));
|
||||
const char* cstart;
|
||||
int c = 0;
|
||||
char buf[32];
|
||||
|
2
lexer.l
2
lexer.l
@@ -93,7 +93,7 @@ struct lexer_param;
|
||||
(\\[^u(]|\\u[a-zA-Z0-9]{0,4})+ {
|
||||
/* pass escapes to the json parser */
|
||||
jv escapes = jv_string_fmt("\"%.*s\"", yyleng, yytext);
|
||||
yylval->literal = jv_parse_sized(jv_string_value(escapes), jv_string_length(jv_copy(escapes)));
|
||||
yylval->literal = jv_parse_sized(jv_string_value(escapes), jv_string_length_bytes(jv_copy(escapes)));
|
||||
jv_free(escapes);
|
||||
return QQSTRING_TEXT;
|
||||
}
|
||||
|
2
main.c
2
main.c
@@ -69,7 +69,7 @@ static void process(jv value, int flags) {
|
||||
jv result;
|
||||
while (jv_is_valid(result = jq_next(jq))) {
|
||||
if ((options & RAW_OUTPUT) && jv_get_kind(result) == JV_KIND_STRING) {
|
||||
fwrite(jv_string_value(result), 1, jv_string_length(jv_copy(result)), stdout);
|
||||
fwrite(jv_string_value(result), 1, jv_string_length_bytes(jv_copy(result)), stdout);
|
||||
jv_free(result);
|
||||
} else {
|
||||
int dumpopts;
|
||||
|
@@ -302,8 +302,8 @@ null
|
||||
[false, false, false, false, false, false, false, false, true ]
|
||||
|
||||
[.[] | length]
|
||||
[[], {}, [1,2], {"a":42}, "asdf"]
|
||||
[0, 0, 2, 1, 4]
|
||||
[[], {}, [1,2], {"a":42}, "asdf", "\u03bc"]
|
||||
[0, 0, 2, 1, 4, 1]
|
||||
|
||||
map(keys)
|
||||
[{}, {"abcd":1,"abc":2,"abcde":3}, {"x":1, "z": 3, "y":2}]
|
||||
|
Reference in New Issue
Block a user