mirror of
https://github.com/stedolan/jq.git
synced 2024-05-11 05:55:39 +00:00
Add string slicing
This commit is contained in:
57
jv.c
57
jv.c
@@ -687,6 +687,63 @@ const char* jv_string_value(jv j) {
|
|||||||
return jvp_string_ptr(&j.val.nontrivial)->data;
|
return jvp_string_ptr(&j.val.nontrivial)->data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
jv jv_string_slice(jv j, int start, int end) {
|
||||||
|
assert(jv_get_kind(j) == JV_KIND_STRING);
|
||||||
|
const char *s = jv_string_value(j);
|
||||||
|
int len = jv_string_length_bytes(jv_copy(j));
|
||||||
|
int i;
|
||||||
|
const char *p, *e;
|
||||||
|
int c;
|
||||||
|
jv res;
|
||||||
|
|
||||||
|
if (start < 0) start = len + start;
|
||||||
|
if (end < 0) end = len + end;
|
||||||
|
|
||||||
|
if (start < 0) start = 0;
|
||||||
|
if (start > len) start = len;
|
||||||
|
if (end > len) end = len;
|
||||||
|
if (end < start) end = start;
|
||||||
|
if (start < 0 || start > end || end > len)
|
||||||
|
return jv_invalid_with_msg(jv_string("Invalid string slice indices"));
|
||||||
|
assert(0 <= start && start <= end && end <= len);
|
||||||
|
|
||||||
|
/* Look for byte offset corresponding to start codepoints */
|
||||||
|
for (p = s, i = 0; i < start; i++) {
|
||||||
|
p = jvp_utf8_next(p, s + len, &c);
|
||||||
|
if (p == NULL) {
|
||||||
|
jv_free(j);
|
||||||
|
return jv_string_empty(16);
|
||||||
|
}
|
||||||
|
if (c == -1) {
|
||||||
|
jv_free(j);
|
||||||
|
return jv_invalid_with_msg(jv_string("Invalid UTF-8 string"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Look for byte offset corresponding to end codepoints */
|
||||||
|
for (e = p; e != NULL && i < end; i++) {
|
||||||
|
e = jvp_utf8_next(e, s + len, &c);
|
||||||
|
if (e == NULL) {
|
||||||
|
e = s + len;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (c == -1) {
|
||||||
|
jv_free(j);
|
||||||
|
return jv_invalid_with_msg(jv_string("Invalid UTF-8 string"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* NOTE: Ideally we should do here what jvp_array_slice() does instead
|
||||||
|
* of allocating a new string as we do! However, we assume NUL-
|
||||||
|
* terminated strings all over, and in the jv API, so for now we waste
|
||||||
|
* memory like a drunken navy programmer. There's probably nothing we
|
||||||
|
* can do about it.
|
||||||
|
*/
|
||||||
|
res = jv_string_sized(p, e - p);
|
||||||
|
jv_free(j);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
jv jv_string_concat(jv a, jv b) {
|
jv jv_string_concat(jv a, jv b) {
|
||||||
jvp_string* sb = jvp_string_ptr(&b.val.nontrivial);
|
jvp_string* sb = jvp_string_ptr(&b.val.nontrivial);
|
||||||
jvp_string_append(&a.val.nontrivial, sb->data, jvp_string_length(sb));
|
jvp_string_append(&a.val.nontrivial, sb->data, jvp_string_length(sb));
|
||||||
|
|||||||
1
jv.h
1
jv.h
@@ -82,6 +82,7 @@ int jv_string_length_bytes(jv);
|
|||||||
int jv_string_length_codepoints(jv);
|
int jv_string_length_codepoints(jv);
|
||||||
unsigned long jv_string_hash(jv);
|
unsigned long jv_string_hash(jv);
|
||||||
const char* jv_string_value(jv);
|
const char* jv_string_value(jv);
|
||||||
|
jv jv_string_slice(jv j, int start, int end);
|
||||||
jv jv_string_concat(jv, jv);
|
jv jv_string_concat(jv, jv);
|
||||||
jv jv_string_fmt(const char*, ...);
|
jv jv_string_fmt(const char*, ...);
|
||||||
jv jv_string_append_codepoint(jv a, uint32_t c);
|
jv jv_string_append_codepoint(jv a, uint32_t c);
|
||||||
|
|||||||
16
jv_aux.c
16
jv_aux.c
@@ -3,15 +3,19 @@
|
|||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include "jv_alloc.h"
|
#include "jv_alloc.h"
|
||||||
|
|
||||||
static int parse_slice(jv array, jv slice, int* pstart, int* pend) {
|
static int parse_slice(jv j, jv slice, int* pstart, int* pend) {
|
||||||
// Array slices
|
// Array slices
|
||||||
int len = jv_array_length(jv_copy(array));
|
|
||||||
jv start_jv = jv_object_get(jv_copy(slice), jv_string("start"));
|
jv start_jv = jv_object_get(jv_copy(slice), jv_string("start"));
|
||||||
jv end_jv = jv_object_get(slice, jv_string("end"));
|
jv end_jv = jv_object_get(slice, jv_string("end"));
|
||||||
if (jv_get_kind(start_jv) == JV_KIND_NULL) {
|
if (jv_get_kind(start_jv) == JV_KIND_NULL) {
|
||||||
jv_free(start_jv);
|
jv_free(start_jv);
|
||||||
start_jv = jv_number(0);
|
start_jv = jv_number(0);
|
||||||
}
|
}
|
||||||
|
int len;
|
||||||
|
if (jv_get_kind(j) == JV_KIND_ARRAY)
|
||||||
|
len = jv_array_length(jv_copy(j));
|
||||||
|
else
|
||||||
|
len = jv_string_length_codepoints(jv_copy(j));
|
||||||
if (jv_get_kind(end_jv) == JV_KIND_NULL) {
|
if (jv_get_kind(end_jv) == JV_KIND_NULL) {
|
||||||
jv_free(end_jv);
|
jv_free(end_jv);
|
||||||
end_jv = jv_number(len);
|
end_jv = jv_number(len);
|
||||||
@@ -61,6 +65,14 @@ jv jv_get(jv t, jv k) {
|
|||||||
v = jv_invalid_with_msg(jv_string_fmt("Start and end indices of an array slice must be numbers"));
|
v = jv_invalid_with_msg(jv_string_fmt("Start and end indices of an array slice must be numbers"));
|
||||||
jv_free(t);
|
jv_free(t);
|
||||||
}
|
}
|
||||||
|
} else if (jv_get_kind(t) == JV_KIND_STRING && jv_get_kind(k) == JV_KIND_OBJECT) {
|
||||||
|
int start, end;
|
||||||
|
if (parse_slice(t, k, &start, &end)) {
|
||||||
|
v = jv_string_slice(t, start, end);
|
||||||
|
} else {
|
||||||
|
v = jv_invalid_with_msg(jv_string_fmt("Start and end indices of an string slice must be numbers"));
|
||||||
|
jv_free(t);
|
||||||
|
}
|
||||||
} else if (jv_get_kind(t) == JV_KIND_NULL &&
|
} else if (jv_get_kind(t) == JV_KIND_NULL &&
|
||||||
(jv_get_kind(k) == JV_KIND_STRING ||
|
(jv_get_kind(k) == JV_KIND_STRING ||
|
||||||
jv_get_kind(k) == JV_KIND_NUMBER ||
|
jv_get_kind(k) == JV_KIND_NUMBER ||
|
||||||
|
|||||||
Reference in New Issue
Block a user