2015-03-08 18:56:51 -05:00
# define _BSD_SOURCE
2015-03-06 00:14:15 -06:00
# define _XOPEN_SOURCE
2015-03-08 18:56:51 -05:00
# include <sys/time.h>
# include <alloca.h>
2014-06-13 17:51:41 -05:00
# include <assert.h>
2015-03-06 00:14:15 -06:00
# include <ctype.h>
2014-06-13 17:51:41 -05:00
# include <limits.h>
2013-06-17 20:50:45 -05:00
# include <math.h>
2015-02-14 13:31:34 -06:00
# ifdef HAVE_ONIGURUMA
2014-06-18 19:49:38 -04:00
# include <oniguruma.h>
2015-02-14 13:31:34 -06:00
# endif
2013-05-17 03:03:42 +10:00
# include <stdlib.h>
2012-09-18 10:17:38 +01:00
# include <string.h>
2015-03-06 00:14:15 -06:00
# include <time.h>
2012-08-16 01:00:30 +01:00
# include "builtin.h"
2012-09-17 20:59:34 +01:00
# include "compile.h"
2012-11-28 01:08:23 -06:00
# include "jq_parser.h"
2013-06-21 12:06:28 +01:00
# include "bytecode.h"
2014-08-10 16:52:03 -05:00
# include "linker.h"
2012-09-18 10:17:38 +01:00
# include "locfile.h"
2012-12-27 20:49:34 +00:00
# include "jv_unicode.h"
2012-09-18 10:17:38 +01:00
2012-12-02 22:12:08 +00:00
static jv type_error ( jv bad , const char * msg ) {
jv err = jv_invalid_with_msg ( jv_string_fmt ( " %s %s " ,
jv_kind_name ( jv_get_kind ( bad ) ) ,
msg ) ) ;
jv_free ( bad ) ;
return err ;
}
static jv type_error2 ( jv bad1 , jv bad2 , const char * msg ) {
jv err = jv_invalid_with_msg ( jv_string_fmt ( " %s and %s %s " ,
jv_kind_name ( jv_get_kind ( bad1 ) ) ,
jv_kind_name ( jv_get_kind ( bad2 ) ) ,
msg ) ) ;
jv_free ( bad1 ) ;
jv_free ( bad2 ) ;
return err ;
}
2014-07-07 22:49:46 -05:00
static jv f_plus ( jq_state * jq , jv input , jv a , jv b ) {
2012-12-02 22:12:08 +00:00
jv_free ( input ) ;
2012-12-29 16:50:58 +00:00
if ( jv_get_kind ( a ) = = JV_KIND_NULL ) {
jv_free ( a ) ;
return b ;
} else if ( jv_get_kind ( b ) = = JV_KIND_NULL ) {
jv_free ( b ) ;
return a ;
} else if ( jv_get_kind ( a ) = = JV_KIND_NUMBER & & jv_get_kind ( b ) = = JV_KIND_NUMBER ) {
2012-12-02 22:12:08 +00:00
return jv_number ( jv_number_value ( a ) +
jv_number_value ( b ) ) ;
2012-09-10 16:16:39 +01:00
} else if ( jv_get_kind ( a ) = = JV_KIND_STRING & & jv_get_kind ( b ) = = JV_KIND_STRING ) {
2012-12-02 22:12:08 +00:00
return jv_string_concat ( a , b ) ;
2012-09-02 16:31:59 +01:00
} else if ( jv_get_kind ( a ) = = JV_KIND_ARRAY & & jv_get_kind ( b ) = = JV_KIND_ARRAY ) {
2012-12-02 22:12:08 +00:00
return jv_array_concat ( a , b ) ;
2012-09-09 19:17:07 +01:00
} else if ( jv_get_kind ( a ) = = JV_KIND_OBJECT & & jv_get_kind ( b ) = = JV_KIND_OBJECT ) {
2012-12-02 22:12:08 +00:00
return jv_object_merge ( a , b ) ;
2012-08-16 01:16:08 +01:00
} else {
2012-12-02 22:12:08 +00:00
return type_error2 ( a , b , " cannot be added " ) ;
2012-08-16 01:16:08 +01:00
}
}
2014-02-21 08:46:56 +00:00
# define LIBM_DD(name) \
2014-07-07 22:49:46 -05:00
static jv f_ # # name ( jq_state * jq , jv input ) { \
2014-02-21 08:46:56 +00:00
if ( jv_get_kind ( input ) ! = JV_KIND_NUMBER ) { \
return type_error ( input , " number required " ) ; \
} \
jv ret = jv_number ( name ( jv_number_value ( input ) ) ) ; \
jv_free ( input ) ; \
return ret ; \
}
# include "libm.h"
# undef LIBM_DD
2013-09-11 20:22:56 -04:00
2014-07-07 22:49:46 -05:00
static jv f_negate ( jq_state * jq , jv input ) {
2013-01-03 12:53:06 +00:00
if ( jv_get_kind ( input ) ! = JV_KIND_NUMBER ) {
return type_error ( input , " cannot be negated " ) ;
}
jv ret = jv_number ( - jv_number_value ( input ) ) ;
jv_free ( input ) ;
return ret ;
}
2014-07-07 22:49:46 -05:00
static jv f_startswith ( jq_state * jq , jv a , jv b ) {
2015-03-06 00:18:45 -06:00
if ( jv_get_kind ( a ) ! = JV_KIND_STRING | | jv_get_kind ( b ) ! = JV_KIND_STRING )
return jv_invalid_with_msg ( jv_string ( " startswith() requires string inputs " )) ;
2013-11-28 15:17:32 -06:00
int alen = jv_string_length_bytes ( jv_copy ( a ) ) ;
int blen = jv_string_length_bytes ( jv_copy ( b ) ) ;
jv ret ;
if ( blen < = alen & & memcmp ( jv_string_value ( a ) , jv_string_value ( b ) , blen ) = = 0 )
ret = jv_true ( ) ;
else
ret = jv_false ( ) ;
jv_free ( a ) ;
jv_free ( b ) ;
return ret ;
}
2014-07-07 22:49:46 -05:00
static jv f_endswith ( jq_state * jq , jv a , jv b ) {
2015-03-06 00:18:45 -06:00
if ( jv_get_kind ( a ) ! = JV_KIND_STRING | | jv_get_kind ( b ) ! = JV_KIND_STRING )
return jv_invalid_with_msg ( jv_string ( " endswith() requires string inputs " )) ;
2013-11-28 15:17:32 -06:00
const char * astr = jv_string_value ( a ) ;
const char * bstr = jv_string_value ( b ) ;
size_t alen = jv_string_length_bytes ( jv_copy ( a ) ) ;
size_t blen = jv_string_length_bytes ( jv_copy ( b ) ) ;
jv ret ; ;
if ( alen < blen | |
memcmp ( astr + ( alen - blen ) , bstr , blen ) ! = 0 )
ret = jv_false ( ) ;
else
ret = jv_true ( ) ;
jv_free ( a ) ;
jv_free ( b ) ;
return ret ;
}
2014-07-07 22:49:46 -05:00
static jv f_ltrimstr ( jq_state * jq , jv input , jv left ) {
if ( jv_get_kind ( f_startswith ( jq , jv_copy ( input ) , jv_copy ( left ) ) ) ! = JV_KIND_TRUE ) {
2013-11-30 00:44:14 -06:00
jv_free ( left ) ;
return input ;
}
/*
* FIXME It ' d be better to share the suffix with the original input - -
* that we could do , we just can ' t share prefixes .
*/
int prefixlen = jv_string_length_bytes ( left ) ;
jv res = jv_string_sized ( jv_string_value ( input ) + prefixlen ,
jv_string_length_bytes ( jv_copy ( input ) ) - prefixlen ) ;
jv_free ( input ) ;
return res ;
}
2014-07-07 22:49:46 -05:00
static jv f_rtrimstr ( jq_state * jq , jv input , jv right ) {
if ( jv_get_kind ( f_endswith ( jq , jv_copy ( input ) , jv_copy ( right ) ) ) = = JV_KIND_TRUE ) {
2013-11-30 00:44:14 -06:00
jv res = jv_string_sized ( jv_string_value ( input ) ,
jv_string_length_bytes ( jv_copy ( input ) ) - jv_string_length_bytes ( right ) ) ;
jv_free ( input ) ;
return res ;
}
jv_free ( right ) ;
return input ;
}
2014-07-07 22:49:46 -05:00
static jv f_minus ( jq_state * jq , jv input , jv a , jv b ) {
2012-12-02 22:12:08 +00:00
jv_free ( input ) ;
2012-09-10 16:49:25 +01:00
if ( jv_get_kind ( a ) = = JV_KIND_NUMBER & & jv_get_kind ( b ) = = JV_KIND_NUMBER ) {
2012-12-02 22:12:08 +00:00
return jv_number ( jv_number_value ( a ) - jv_number_value ( b ) ) ;
2012-09-10 16:49:25 +01:00
} else if ( jv_get_kind ( a ) = = JV_KIND_ARRAY & & jv_get_kind ( b ) = = JV_KIND_ARRAY ) {
jv out = jv_array ( ) ;
2012-12-31 23:27:00 +00:00
jv_array_foreach ( a , i , x ) {
2012-09-10 16:49:25 +01:00
int include = 1 ;
2012-12-31 23:27:00 +00:00
jv_array_foreach ( b , j , y ) {
if ( jv_equal ( jv_copy ( x ) , y ) ) {
2012-09-10 16:49:25 +01:00
include = 0 ;
break ;
}
}
if ( include )
out = jv_array_append ( out , jv_copy ( x ) ) ;
jv_free ( x ) ;
}
jv_free ( a ) ;
jv_free ( b ) ;
2012-12-02 22:12:08 +00:00
return out ;
2012-09-10 16:49:25 +01:00
} else {
2012-12-02 22:12:08 +00:00
return type_error2 ( a , b , " cannot be subtracted " ) ;
2012-09-10 16:49:25 +01:00
}
}
2014-07-07 22:49:46 -05:00
static jv f_multiply ( jq_state * jq , jv input , jv a , jv b ) {
2014-06-07 21:43:30 -05:00
jv_kind ak = jv_get_kind ( a ) ;
jv_kind bk = jv_get_kind ( b ) ;
2012-12-02 22:12:08 +00:00
jv_free ( input ) ;
2014-06-07 21:43:30 -05:00
if ( ak = = JV_KIND_NUMBER & & bk = = JV_KIND_NUMBER ) {
2012-12-02 22:12:08 +00:00
return jv_number ( jv_number_value ( a ) * jv_number_value ( b ) ) ;
2014-06-07 21:43:30 -05:00
} else if ( ( ak = = JV_KIND_STRING & & bk = = JV_KIND_NUMBER ) | |
( ak = = JV_KIND_NUMBER & & bk = = JV_KIND_STRING ) ) {
jv str = a ;
jv num = b ;
if ( ak = = JV_KIND_NUMBER ) {
str = b ;
num = a ;
}
2013-11-29 15:49:11 -06:00
int n ;
2014-06-07 21:43:30 -05:00
size_t alen = jv_string_length_bytes ( jv_copy ( str ) ) ;
jv res = str ;
2013-11-29 15:49:11 -06:00
2014-06-07 21:43:30 -05:00
for ( n = jv_number_value ( num ) - 1 ; n > 0 ; n - - )
res = jv_string_append_buf ( res , jv_string_value ( str ) , alen ) ;
2013-11-29 15:49:11 -06:00
2014-06-07 21:43:30 -05:00
jv_free ( num ) ;
2013-11-29 15:49:11 -06:00
if ( n < 0 ) {
2014-06-07 21:43:30 -05:00
jv_free ( str ) ;
2013-11-29 15:49:11 -06:00
return jv_null ( ) ;
}
return res ;
2014-06-07 21:43:30 -05:00
} else if ( ak = = JV_KIND_OBJECT & & bk = = JV_KIND_OBJECT ) {
2014-03-08 03:56:05 +01:00
return jv_object_merge_recursive ( a , b ) ;
2012-09-10 16:57:17 +01:00
} else {
2012-12-02 22:12:08 +00:00
return type_error2 ( a , b , " cannot be multiplied " ) ;
2012-09-10 16:57:17 +01:00
}
}
2014-07-07 22:49:46 -05:00
static jv f_divide ( jq_state * jq , jv input , jv a , jv b ) {
2012-12-02 22:12:08 +00:00
jv_free ( input ) ;
2012-09-10 16:57:17 +01:00
if ( jv_get_kind ( a ) = = JV_KIND_NUMBER & & jv_get_kind ( b ) = = JV_KIND_NUMBER ) {
2012-12-02 22:12:08 +00:00
return jv_number ( jv_number_value ( a ) / jv_number_value ( b ) ) ;
2013-11-28 22:27:23 -06:00
} else if ( jv_get_kind ( a ) = = JV_KIND_STRING & & jv_get_kind ( b ) = = JV_KIND_STRING ) {
return jv_string_split ( a , b ) ;
2012-09-10 16:57:17 +01:00
} else {
2012-12-02 22:12:08 +00:00
return type_error2 ( a , b , " cannot be divided " ) ;
2012-09-10 16:57:17 +01:00
}
}
2014-07-07 22:49:46 -05:00
static jv f_mod ( jq_state * jq , jv input , jv a , jv b ) {
2013-06-17 20:21:37 -05:00
jv_free ( input ) ;
if ( jv_get_kind ( a ) = = JV_KIND_NUMBER & & jv_get_kind ( b ) = = JV_KIND_NUMBER ) {
2015-03-23 22:11:55 -04:00
if ( ( intmax_t ) jv_number_value ( b ) = = 0 ) {
return jv_invalid_with_msg ( jv_string ( " Cannot mod by zero. " ) ) ;
}
2013-06-17 20:21:37 -05:00
return jv_number ( ( intmax_t ) jv_number_value ( a ) % ( intmax_t ) jv_number_value ( b ) ) ;
} else {
return type_error2 ( a , b , " cannot be divided " ) ;
}
}
2014-07-07 22:49:46 -05:00
static jv f_equal ( jq_state * jq , jv input , jv a , jv b ) {
2012-12-02 22:12:08 +00:00
jv_free ( input ) ;
return jv_bool ( jv_equal ( a , b ) ) ;
2012-09-10 18:08:00 +01:00
}
2014-07-07 22:49:46 -05:00
static jv f_notequal ( jq_state * jq , jv input , jv a , jv b ) {
2012-12-02 22:12:08 +00:00
jv_free ( input ) ;
return jv_bool ( ! jv_equal ( a , b ) ) ;
2012-10-23 17:01:39 +02:00
}
2012-12-02 22:12:08 +00:00
enum cmp_op {
CMP_OP_LESS ,
CMP_OP_GREATER ,
CMP_OP_LESSEQ ,
CMP_OP_GREATEREQ
} ;
static jv order_cmp ( jv input , jv a , jv b , enum cmp_op op ) {
jv_free ( input ) ;
int r = jv_cmp ( a , b ) ;
return jv_bool ( ( op = = CMP_OP_LESS & & r < 0 ) | |
( op = = CMP_OP_LESSEQ & & r < = 0 ) | |
( op = = CMP_OP_GREATEREQ & & r > = 0 ) | |
( op = = CMP_OP_GREATER & & r > 0 ) ) ;
2012-10-07 22:34:12 +01:00
}
2014-07-07 22:49:46 -05:00
static jv f_less ( jq_state * jq , jv input , jv a , jv b ) {
2012-12-02 22:12:08 +00:00
return order_cmp ( input , a , b , CMP_OP_LESS ) ;
2012-10-07 22:34:12 +01:00
}
2014-07-07 22:49:46 -05:00
static jv f_greater ( jq_state * jq , jv input , jv a , jv b ) {
2012-12-02 22:12:08 +00:00
return order_cmp ( input , a , b , CMP_OP_GREATER ) ;
2012-10-07 22:34:12 +01:00
}
2014-07-07 22:49:46 -05:00
static jv f_lesseq ( jq_state * jq , jv input , jv a , jv b ) {
2012-12-02 22:12:08 +00:00
return order_cmp ( input , a , b , CMP_OP_LESSEQ ) ;
2012-10-07 22:34:12 +01:00
}
2014-07-07 22:49:46 -05:00
static jv f_greatereq ( jq_state * jq , jv input , jv a , jv b ) {
2012-12-02 22:12:08 +00:00
return order_cmp ( input , a , b , CMP_OP_GREATEREQ ) ;
2012-10-07 22:34:12 +01:00
}
2014-07-07 22:49:46 -05:00
static jv f_contains ( jq_state * jq , jv a , jv b ) {
2012-12-31 23:27:00 +00:00
if ( jv_get_kind ( a ) = = jv_get_kind ( b ) ) {
2012-12-02 22:12:08 +00:00
return jv_bool ( jv_contains ( a , b ) ) ;
} else {
return type_error2 ( a , b , " cannot have their containment checked " ) ;
2012-10-24 11:42:25 -07:00
}
}
2014-07-07 22:49:46 -05:00
static jv f_dump ( jq_state * jq , jv input ) {
2013-11-29 16:16:52 -06:00
return jv_dump_string ( input , 0 ) ;
}
2014-07-07 22:49:46 -05:00
static jv f_json_parse ( jq_state * jq , jv input ) {
2013-11-29 16:16:52 -06:00
if ( jv_get_kind ( input ) ! = JV_KIND_STRING )
return type_error ( input , " only strings can be parsed " ) ;
jv res = jv_parse_sized ( jv_string_value ( input ) ,
jv_string_length_bytes ( jv_copy ( input ) ) ) ;
jv_free ( input ) ;
return res ;
}
2014-07-07 22:49:46 -05:00
static jv f_tonumber ( jq_state * jq , jv input ) {
2012-12-02 22:12:08 +00:00
if ( jv_get_kind ( input ) = = JV_KIND_NUMBER ) {
return input ;
}
if ( jv_get_kind ( input ) = = JV_KIND_STRING ) {
jv parsed = jv_parse ( jv_string_value ( input ) ) ;
if ( ! jv_is_valid ( parsed ) | | jv_get_kind ( parsed ) = = JV_KIND_NUMBER ) {
jv_free ( input ) ;
return parsed ;
2012-09-10 17:08:13 +01:00
}
}
2012-12-02 22:12:08 +00:00
return type_error ( input , " cannot be parsed as a number " ) ;
2012-09-10 17:08:13 +01:00
}
2014-07-07 22:49:46 -05:00
static jv f_length ( jq_state * jq , jv input ) {
2012-12-02 22:12:08 +00:00
if ( jv_get_kind ( input ) = = JV_KIND_ARRAY ) {
return jv_number ( jv_array_length ( input ) ) ;
} else if ( jv_get_kind ( input ) = = JV_KIND_OBJECT ) {
return jv_number ( jv_object_length ( input ) ) ;
} else if ( jv_get_kind ( input ) = = JV_KIND_STRING ) {
2013-05-15 00:37:38 +01:00
return jv_number ( jv_string_length_codepoints ( input ) ) ;
2013-11-30 02:29:21 -06:00
} else if ( jv_get_kind ( input ) = = JV_KIND_NUMBER ) {
return jv_number ( fabs ( jv_number_value ( input ) ) ) ;
2012-12-16 13:10:48 +00:00
} else if ( jv_get_kind ( input ) = = JV_KIND_NULL ) {
jv_free ( input ) ;
return jv_number ( 0 ) ;
2012-09-16 11:08:42 +01:00
} else {
2012-12-02 22:12:08 +00:00
return type_error ( input , " has no length " ) ;
2012-09-16 11:08:42 +01:00
}
}
2014-07-07 22:49:46 -05:00
static jv f_tostring ( jq_state * jq , jv input ) {
2012-12-02 22:12:08 +00:00
if ( jv_get_kind ( input ) = = JV_KIND_STRING ) {
return input ;
2012-09-17 20:14:07 +01:00
} else {
2012-12-02 22:12:08 +00:00
return jv_dump_string ( input , 0 ) ;
2012-09-17 20:14:07 +01:00
}
}
2012-12-27 20:49:34 +00:00
# define CHARS_ALPHANUM "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
static jv escape_string ( jv input , const char * escapings ) {
assert ( jv_get_kind ( input ) = = JV_KIND_STRING ) ;
const char * lookup [ 128 ] = { 0 } ;
const char * p = escapings ;
while ( * p ) {
lookup [ ( int ) * p ] = p + 1 ;
p + + ;
p + = strlen ( p ) ;
p + + ;
}
jv ret = jv_string ( " " ) ;
const char * i = jv_string_value ( input ) ;
2013-05-15 00:37:38 +01:00
const char * end = i + jv_string_length_bytes ( jv_copy ( input ) ) ;
2012-12-27 20:49:34 +00:00
const char * cstart ;
int c = 0 ;
while ( ( i = jvp_utf8_next ( ( cstart = i ) , end , & c ) ) ) {
2014-12-24 11:18:53 -06:00
assert ( c > 0 ) ;
2012-12-27 20:49:34 +00:00
if ( c < 128 & & lookup [ c ] ) {
ret = jv_string_append_str ( ret , lookup [ c ] ) ;
} else {
ret = jv_string_append_buf ( ret , cstart , i - cstart ) ;
}
}
jv_free ( input ) ;
return ret ;
}
2014-07-07 22:49:46 -05:00
static jv f_format ( jq_state * jq , jv input , jv fmt ) {
2012-12-27 20:49:34 +00:00
if ( jv_get_kind ( fmt ) ! = JV_KIND_STRING ) {
jv_free ( input ) ;
return type_error ( fmt , " is not a valid format " ) ;
}
const char * fmt_s = jv_string_value ( fmt ) ;
if ( ! strcmp ( fmt_s , " json " ) ) {
jv_free ( fmt ) ;
return jv_dump_string ( input , 0 ) ;
} else if ( ! strcmp ( fmt_s , " text " ) ) {
jv_free ( fmt ) ;
2014-07-07 22:49:46 -05:00
return f_tostring ( jq , input ) ;
2014-12-24 11:18:53 -06:00
} else if ( ! strcmp ( fmt_s , " csv " ) | | ! strcmp ( fmt_s , " tsv " ) ) {
const char * quotes , * sep , * escapings ;
if ( ! strcmp ( fmt_s , " csv " ) ) {
quotes = " \" " ;
sep = " , " ;
escapings = " \" \" \" \0 " ;
} else {
assert ( ! strcmp ( fmt_s , " tsv " ) ) ;
quotes = " " ;
sep = " \t " ;
escapings = " \t \\ t \0 " ;
}
2012-12-27 20:49:34 +00:00
jv_free ( fmt ) ;
if ( jv_get_kind ( input ) ! = JV_KIND_ARRAY )
return type_error ( input , " cannot be csv-formatted, only array " ) ;
jv line = jv_string ( " " ) ;
2012-12-31 23:27:00 +00:00
jv_array_foreach ( input , i , x ) {
2014-12-24 11:18:53 -06:00
if ( i ) line = jv_string_append_str ( line , sep ) ;
2012-12-27 20:49:34 +00:00
switch ( jv_get_kind ( x ) ) {
case JV_KIND_NULL :
/* null rendered as empty string */
jv_free ( x ) ;
break ;
case JV_KIND_TRUE :
case JV_KIND_FALSE :
line = jv_string_concat ( line , jv_dump_string ( x , 0 ) ) ;
break ;
case JV_KIND_NUMBER :
if ( jv_number_value ( x ) ! = jv_number_value ( x ) ) {
/* NaN, render as empty string */
jv_free ( x ) ;
} else {
line = jv_string_concat ( line , jv_dump_string ( x , 0 ) ) ;
}
break ;
case JV_KIND_STRING : {
2014-12-24 11:18:53 -06:00
line = jv_string_append_str ( line , quotes ) ;
line = jv_string_concat ( line , escape_string ( x , escapings ) ) ;
line = jv_string_append_str ( line , quotes ) ;
2012-12-27 20:49:34 +00:00
break ;
}
default :
jv_free ( input ) ;
jv_free ( line ) ;
return type_error ( x , " is not valid in a csv row " ) ;
}
}
jv_free ( input ) ;
return line ;
} else if ( ! strcmp ( fmt_s , " html " ) ) {
jv_free ( fmt ) ;
2014-07-07 22:49:46 -05:00
return escape_string ( f_tostring ( jq , input ) , " && ; \ 0 < & lt ; \ 0 > & gt ; \ 0 ' & apos ; \ 0 \ " " \0 " ) ;
2012-12-27 20:49:34 +00:00
} else if ( ! strcmp ( fmt_s , " uri " ) ) {
jv_free ( fmt ) ;
2014-07-07 22:49:46 -05:00
input = f_tostring ( jq , input ) ;
2012-12-27 20:49:34 +00:00
int unreserved [ 128 ] = { 0 } ;
const char * p = CHARS_ALPHANUM " -_.!~*'() " ;
while ( * p ) unreserved [ ( int ) * p + + ] = 1 ;
jv line = jv_string ( " " ) ;
const char * s = jv_string_value ( input ) ;
2013-05-15 00:37:38 +01:00
for ( int i = 0 ; i < jv_string_length_bytes ( jv_copy ( input ) ) ; i + + ) {
2012-12-29 01:37:22 +00:00
unsigned ch = ( unsigned ) ( unsigned char ) * s ;
2012-12-27 20:49:34 +00:00
if ( ch < 128 & & unreserved [ ch ] ) {
line = jv_string_append_buf ( line , s , 1 ) ;
} else {
2014-06-29 19:48:01 -07:00
line = jv_string_concat ( line , jv_string_fmt ( " %%%02X " , ch ) ) ;
2012-12-27 20:49:34 +00:00
}
s + + ;
}
jv_free ( input ) ;
return line ;
} else if ( ! strcmp ( fmt_s , " sh " ) ) {
jv_free ( fmt ) ;
if ( jv_get_kind ( input ) ! = JV_KIND_ARRAY )
input = jv_array_set ( jv_array ( ) , 0 , input ) ;
jv line = jv_string ( " " ) ;
2012-12-31 23:27:00 +00:00
jv_array_foreach ( input , i , x ) {
2012-12-27 20:49:34 +00:00
if ( i ) line = jv_string_append_str ( line , " " ) ;
switch ( jv_get_kind ( x ) ) {
case JV_KIND_NULL :
case JV_KIND_TRUE :
case JV_KIND_FALSE :
case JV_KIND_NUMBER :
line = jv_string_concat ( line , jv_dump_string ( x , 0 ) ) ;
break ;
case JV_KIND_STRING : {
line = jv_string_append_str ( line , " ' " ) ;
line = jv_string_concat ( line , escape_string ( x , " '' \\ '' \0 " ) ) ;
line = jv_string_append_str ( line , " ' " ) ;
break ;
}
default :
jv_free ( input ) ;
jv_free ( line ) ;
return type_error ( x , " can not be escaped for shell " ) ;
}
}
jv_free ( input ) ;
return line ;
} else if ( ! strcmp ( fmt_s , " base64 " ) ) {
jv_free ( fmt ) ;
2014-07-07 22:49:46 -05:00
input = f_tostring ( jq , input ) ;
2012-12-27 20:49:34 +00:00
jv line = jv_string ( " " ) ;
const char b64 [ 64 + 1 ] = CHARS_ALPHANUM " +/ " ;
2014-07-01 13:17:48 -04:00
const unsigned char * data = ( const unsigned char * ) jv_string_value ( input ) ;
2013-05-15 00:37:38 +01:00
int len = jv_string_length_bytes ( jv_copy ( input ) ) ;
2012-12-27 20:49:34 +00:00
for ( int i = 0 ; i < len ; i + = 3 ) {
uint32_t code = 0 ;
int n = len - i > = 3 ? 3 : len - i ;
for ( int j = 0 ; j < 3 ; j + + ) {
code < < = 8 ;
code | = j < n ? ( unsigned ) data [ i + j ] : 0 ;
}
char buf [ 4 ] ;
for ( int j = 0 ; j < 4 ; j + + ) {
buf [ j ] = b64 [ ( code > > ( 18 - j * 6 ) ) & 0x3f ] ;
}
if ( n < 3 ) buf [ 3 ] = ' = ' ;
if ( n < 2 ) buf [ 2 ] = ' = ' ;
line = jv_string_append_buf ( line , buf , sizeof ( buf ) ) ;
}
jv_free ( input ) ;
return line ;
} else {
jv_free ( input ) ;
return jv_invalid_with_msg ( jv_string_concat ( fmt , jv_string ( " is not a valid format " ) ) ) ;
}
}
2014-07-07 22:49:46 -05:00
static jv f_keys ( jq_state * jq , jv input ) {
2012-12-02 22:12:08 +00:00
if ( jv_get_kind ( input ) = = JV_KIND_OBJECT | | jv_get_kind ( input ) = = JV_KIND_ARRAY ) {
return jv_keys ( input ) ;
2012-10-20 00:26:37 +01:00
} else {
2012-12-02 22:12:08 +00:00
return type_error ( input , " has no keys " ) ;
2012-10-20 00:26:37 +01:00
}
}
2014-09-30 21:49:37 -05:00
static jv f_keys_unsorted ( jq_state * jq , jv input ) {
if ( jv_get_kind ( input ) = = JV_KIND_OBJECT | | jv_get_kind ( input ) = = JV_KIND_ARRAY ) {
return jv_keys_unsorted ( input ) ;
} else {
return type_error ( input , " has no keys " ) ;
}
}
2014-07-07 22:49:46 -05:00
static jv f_sort ( jq_state * jq , jv input ) {
2012-12-02 22:12:08 +00:00
if ( jv_get_kind ( input ) = = JV_KIND_ARRAY ) {
return jv_sort ( input , jv_copy ( input ) ) ;
2012-11-30 20:27:16 +00:00
} else {
2012-12-02 22:12:08 +00:00
return type_error ( input , " cannot be sorted, as it is not an array " ) ;
2012-11-30 20:27:16 +00:00
}
}
2014-07-07 22:49:46 -05:00
static jv f_sort_by_impl ( jq_state * jq , jv input , jv keys ) {
2012-12-02 23:22:15 +00:00
if ( jv_get_kind ( input ) = = JV_KIND_ARRAY & &
jv_get_kind ( keys ) = = JV_KIND_ARRAY & &
jv_array_length ( jv_copy ( input ) ) = = jv_array_length ( jv_copy ( keys ) ) ) {
return jv_sort ( input , keys ) ;
} else {
return type_error2 ( input , keys , " cannot be sorted, as they are not both arrays " ) ;
}
}
2014-07-07 22:49:46 -05:00
static jv f_group_by_impl ( jq_state * jq , jv input , jv keys ) {
2012-12-02 23:22:15 +00:00
if ( jv_get_kind ( input ) = = JV_KIND_ARRAY & &
jv_get_kind ( keys ) = = JV_KIND_ARRAY & &
jv_array_length ( jv_copy ( input ) ) = = jv_array_length ( jv_copy ( keys ) ) ) {
return jv_group ( input , keys ) ;
} else {
return type_error2 ( input , keys , " cannot be sorted, as they are not both arrays " ) ;
}
}
2015-02-14 13:31:34 -06:00
# ifdef HAVE_ONIGURUMA
2014-06-18 19:49:38 -04:00
static int f_match_name_iter ( const UChar * name , const UChar * name_end , int ngroups ,
int * groups , regex_t * reg , void * arg ) {
jv captures = * ( jv * ) arg ;
for ( int i = 0 ; i < ngroups ; + + i ) {
jv cap = jv_array_get ( jv_copy ( captures ) , groups [ i ] - 1 ) ;
if ( jv_get_kind ( cap ) = = JV_KIND_OBJECT ) {
cap = jv_object_set ( cap , jv_string ( " name " ) , jv_string_sized ( ( const char * ) name , name_end - name ) ) ;
captures = jv_array_set ( captures , groups [ i ] - 1 , cap ) ;
} else {
jv_free ( cap ) ;
}
}
* ( jv * ) arg = captures ;
return 0 ;
}
2014-07-07 22:49:46 -05:00
static jv f_match ( jq_state * jq , jv input , jv regex , jv modifiers , jv testmode ) {
2014-06-18 19:49:38 -04:00
int test = jv_equal ( testmode , jv_true ( ) ) ;
jv result ;
int onigret ;
int global = 0 ;
regex_t * reg ;
OnigErrorInfo einfo ;
OnigRegion * region ;
if ( jv_get_kind ( input ) ! = JV_KIND_STRING ) {
jv_free ( regex ) ;
jv_free ( modifiers ) ;
return type_error ( input , " cannot be matched, as it is not a string " ) ;
}
if ( jv_get_kind ( regex ) ! = JV_KIND_STRING ) {
jv_free ( input ) ;
jv_free ( modifiers ) ;
return type_error ( regex , " is not a string " ) ;
}
OnigOptionType options = ONIG_OPTION_CAPTURE_GROUP ;
if ( jv_get_kind ( modifiers ) = = JV_KIND_STRING ) {
jv modarray = jv_string_explode ( jv_copy ( modifiers ) ) ;
jv_array_foreach ( modarray , i , mod ) {
switch ( ( int ) jv_number_value ( mod ) ) {
case ' g ' :
global = 1 ;
break ;
case ' i ' :
options | = ONIG_OPTION_IGNORECASE ;
break ;
case ' x ' :
options | = ONIG_OPTION_EXTEND ;
break ;
case ' m ' :
options | = ONIG_OPTION_MULTILINE ;
break ;
case ' s ' :
options | = ONIG_OPTION_SINGLELINE ;
break ;
case ' p ' :
options | = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE ;
break ;
case ' l ' :
options | = ONIG_OPTION_FIND_LONGEST ;
break ;
case ' n ' :
options | = ONIG_OPTION_FIND_NOT_EMPTY ;
break ;
default :
jv_free ( input ) ;
jv_free ( regex ) ;
jv_free ( modarray ) ;
return jv_invalid_with_msg ( jv_string_concat ( modifiers ,
jv_string ( " is not a valid modifier string " ) ) ) ;
}
}
jv_free ( modarray ) ;
} else if ( jv_get_kind ( modifiers ) ! = JV_KIND_NULL ) {
// If it isn't a string or null, then it is the wrong type...
jv_free ( input ) ;
jv_free ( regex ) ;
return type_error ( modifiers , " is not a string " ) ;
}
jv_free ( modifiers ) ;
onigret = onig_new ( & reg , ( const UChar * ) jv_string_value ( regex ) ,
( const UChar * ) ( jv_string_value ( regex ) + jv_string_length_bytes ( jv_copy ( regex ) ) ) ,
options , ONIG_ENCODING_UTF8 , ONIG_SYNTAX_PERL_NG , & einfo ) ;
if ( onigret ! = ONIG_NORMAL ) {
UChar ebuf [ ONIG_MAX_ERROR_MESSAGE_LEN ] ;
2014-10-06 21:48:17 -04:00
onig_error_code_to_str ( ebuf , onigret , & einfo ) ;
2014-06-18 19:49:38 -04:00
jv_free ( input ) ;
jv_free ( regex ) ;
return jv_invalid_with_msg ( jv_string_concat ( jv_string ( " Regex failure: " ) ,
jv_string ( ( char * ) ebuf ) ) ) ;
}
if ( ! test )
result = jv_array ( ) ;
const char * input_string = jv_string_value ( input ) ;
const UChar * start = ( const UChar * ) jv_string_value ( input ) ;
const unsigned long length = jv_string_length_bytes ( jv_copy ( input ) ) ;
const UChar * end = start + length ;
region = onig_region_new ( ) ;
do {
onigret = onig_search ( reg ,
( const UChar * ) jv_string_value ( input ) , end , /* string boundaries */
start , end , /* search boundaries */
region , ONIG_OPTION_NONE ) ;
if ( onigret > = 0 ) {
if ( test ) {
result = jv_true ( ) ;
break ;
}
// Zero-width match
if ( region - > end [ 0 ] = = region - > beg [ 0 ] ) {
unsigned long idx ;
const char * fr = ( const char * ) input_string ;
for ( idx = 0 ; fr ! = input_string + region - > beg [ 0 ] ; idx + + ) {
fr + = jvp_utf8_decode_length ( * fr ) ;
}
jv match = jv_object_set ( jv_object ( ) , jv_string ( " offset " ) , jv_number ( idx ) ) ;
match = jv_object_set ( match , jv_string ( " length " ) , jv_number ( 0 ) ) ;
match = jv_object_set ( match , jv_string ( " string " ) , jv_string ( " " ) ) ;
match = jv_object_set ( match , jv_string ( " captures " ) , jv_array ( ) ) ;
result = jv_array_append ( result , match ) ;
start + = 1 ;
continue ;
}
unsigned long idx ;
unsigned long len ;
const char * fr = ( const char * ) input_string ;
2014-08-11 14:58:01 -05:00
for ( idx = len = 0 ; fr < input_string + region - > end [ 0 ] ; len + + ) {
2014-06-18 19:49:38 -04:00
if ( fr = = input_string + region - > beg [ 0 ] ) idx = len , len = 0 ;
fr + = jvp_utf8_decode_length ( * fr ) ;
}
jv match = jv_object_set ( jv_object ( ) , jv_string ( " offset " ) , jv_number ( idx ) ) ;
unsigned long blen = region - > end [ 0 ] - region - > beg [ 0 ] ;
match = jv_object_set ( match , jv_string ( " length " ) , jv_number ( len ) ) ;
match = jv_object_set ( match , jv_string ( " string " ) , jv_string_sized ( input_string + region - > beg [ 0 ] , blen ) ) ;
jv captures = jv_array ( ) ;
for ( int i = 1 ; i < region - > num_regs ; + + i ) {
// Empty capture.
if ( region - > beg [ i ] = = region - > end [ i ] ) {
// Didn't match.
jv cap ;
if ( region - > beg [ i ] = = - 1 ) {
cap = jv_object_set ( jv_object ( ) , jv_string ( " offset " ) , jv_number ( - 1 ) ) ;
cap = jv_object_set ( cap , jv_string ( " string " ) , jv_null ( ) ) ;
} else {
fr = input_string ;
for ( idx = 0 ; fr ! = input_string + region - > beg [ i ] ; idx + + ) {
fr + = jvp_utf8_decode_length ( * fr ) ;
}
cap = jv_object_set ( jv_object ( ) , jv_string ( " offset " ) , jv_number ( idx ) ) ;
cap = jv_object_set ( cap , jv_string ( " string " ) , jv_string ( " " ) ) ;
}
cap = jv_object_set ( cap , jv_string ( " length " ) , jv_number ( 0 ) ) ;
cap = jv_object_set ( cap , jv_string ( " name " ) , jv_null ( ) ) ;
captures = jv_array_append ( captures , cap ) ;
continue ;
}
fr = input_string ;
for ( idx = len = 0 ; fr ! = input_string + region - > end [ i ] ; len + + ) {
if ( fr = = input_string + region - > beg [ i ] ) idx = len , len = 0 ;
fr + = jvp_utf8_decode_length ( * fr ) ;
}
blen = region - > end [ i ] - region - > beg [ i ] ;
jv cap = jv_object_set ( jv_object ( ) , jv_string ( " offset " ) , jv_number ( idx ) ) ;
cap = jv_object_set ( cap , jv_string ( " length " ) , jv_number ( len ) ) ;
cap = jv_object_set ( cap , jv_string ( " string " ) , jv_string_sized ( input_string + region - > beg [ i ] , blen ) ) ;
cap = jv_object_set ( cap , jv_string ( " name " ) , jv_null ( ) ) ;
captures = jv_array_append ( captures , cap ) ;
}
onig_foreach_name ( reg , f_match_name_iter , & captures ) ;
match = jv_object_set ( match , jv_string ( " captures " ) , captures ) ;
result = jv_array_append ( result , match ) ;
start = ( const UChar * ) ( input_string + region - > end [ 0 ] ) ;
onig_region_free ( region , 0 ) ;
} else if ( onigret = = ONIG_MISMATCH ) {
if ( test )
result = jv_false ( ) ;
break ;
} else { /* Error */
UChar ebuf [ ONIG_MAX_ERROR_MESSAGE_LEN ] ;
onig_error_code_to_str ( ebuf , onigret , einfo ) ;
jv_free ( result ) ;
result = jv_invalid_with_msg ( jv_string_concat ( jv_string ( " Regex failure: " ) ,
jv_string ( ( char * ) ebuf ) ) ) ;
break ;
}
} while ( global & & start ! = end ) ;
onig_region_free ( region , 1 ) ;
region = NULL ;
if ( region )
onig_region_free ( region , 1 ) ;
onig_free ( reg ) ;
jv_free ( input ) ;
jv_free ( regex ) ;
return result ;
}
2015-02-14 13:31:34 -06:00
# endif /* HAVE_ONIGURUMA */
2014-06-18 19:49:38 -04:00
2012-12-04 22:45:03 +00:00
static jv minmax_by ( jv values , jv keys , int is_min ) {
if ( jv_get_kind ( values ) ! = JV_KIND_ARRAY )
return type_error2 ( values , keys , " cannot be iterated over " ) ;
if ( jv_get_kind ( keys ) ! = JV_KIND_ARRAY )
return type_error2 ( values , keys , " cannot be iterated over " ) ;
if ( jv_array_length ( jv_copy ( values ) ) ! = jv_array_length ( jv_copy ( keys ) ) )
return type_error2 ( values , keys , " have wrong length " ) ;
if ( jv_array_length ( jv_copy ( values ) ) = = 0 ) {
jv_free ( values ) ;
jv_free ( keys ) ;
return jv_null ( ) ;
}
jv ret = jv_array_get ( jv_copy ( values ) , 0 ) ;
jv retkey = jv_array_get ( jv_copy ( keys ) , 0 ) ;
for ( int i = 1 ; i < jv_array_length ( jv_copy ( values ) ) ; i + + ) {
jv item = jv_array_get ( jv_copy ( keys ) , i ) ;
int cmp = jv_cmp ( jv_copy ( item ) , jv_copy ( retkey ) ) ;
if ( ( cmp < 0 ) = = ( is_min = = 1 ) ) {
jv_free ( retkey ) ;
retkey = item ;
jv_free ( ret ) ;
ret = jv_array_get ( jv_copy ( values ) , i ) ;
} else {
jv_free ( item ) ;
}
}
jv_free ( values ) ;
jv_free ( keys ) ;
jv_free ( retkey ) ;
return ret ;
}
2014-07-07 22:49:46 -05:00
static jv f_min ( jq_state * jq , jv x ) {
2012-12-04 22:45:03 +00:00
return minmax_by ( x , jv_copy ( x ) , 1 ) ;
}
2014-07-07 22:49:46 -05:00
static jv f_max ( jq_state * jq , jv x ) {
2012-12-04 22:45:03 +00:00
return minmax_by ( x , jv_copy ( x ) , 0 ) ;
}
2014-07-07 22:49:46 -05:00
static jv f_min_by_impl ( jq_state * jq , jv x , jv y ) {
2012-12-04 22:45:03 +00:00
return minmax_by ( x , y , 1 ) ;
}
2014-07-07 22:49:46 -05:00
static jv f_max_by_impl ( jq_state * jq , jv x , jv y ) {
2012-12-04 22:45:03 +00:00
return minmax_by ( x , y , 0 ) ;
}
2014-07-07 22:49:46 -05:00
static jv f_type ( jq_state * jq , jv input ) {
2012-12-02 22:12:08 +00:00
jv out = jv_string ( jv_kind_name ( jv_get_kind ( input ) ) ) ;
jv_free ( input ) ;
return out ;
2012-09-17 23:06:48 +01:00
}
2014-07-07 22:49:46 -05:00
static jv f_error ( jq_state * jq , jv input , jv msg ) {
2012-12-10 22:30:09 +00:00
jv_free ( input ) ;
return jv_invalid_with_msg ( msg ) ;
}
2014-06-16 22:33:24 -05:00
// FIXME Should autoconf check for this!
# ifndef WIN32
2014-06-13 17:51:41 -05:00
extern const char * * environ ;
2014-06-16 22:33:24 -05:00
# endif
2014-06-13 17:51:41 -05:00
2014-07-07 22:49:46 -05:00
static jv f_env ( jq_state * jq , jv input ) {
2014-06-13 17:51:41 -05:00
jv_free ( input ) ;
jv env = jv_object ( ) ;
const char * var , * val ;
for ( const char * * e = environ ; * e ! = NULL ; e + + ) {
var = e [ 0 ] ;
val = strchr ( e [ 0 ] , ' = ' ) ;
if ( val = = NULL )
env = jv_object_set ( env , jv_string ( var ) , jv_null ( ) ) ;
else if ( var - val < INT_MAX )
env = jv_object_set ( env , jv_string_sized ( var , val - var ) , jv_string ( val + 1 ) ) ;
}
return env ;
}
2014-12-30 13:13:30 -06:00
static jv f_get_search_list ( jq_state * jq , jv input ) {
jv_free ( input ) ;
return jq_get_lib_dirs ( jq ) ;
}
static jv f_get_prog_origin ( jq_state * jq , jv input ) {
jv_free ( input ) ;
return jq_get_prog_origin ( jq ) ;
}
static jv f_get_jq_origin ( jq_state * jq , jv input ) {
jv_free ( input ) ;
return jq_get_jq_origin ( jq ) ;
}
2014-08-21 00:04:38 -05:00
static jv f_string_split ( jq_state * jq , jv a , jv b ) {
if ( jv_get_kind ( a ) ! = JV_KIND_STRING | | jv_get_kind ( b ) ! = JV_KIND_STRING ) {
jv_free ( a ) ;
jv_free ( b ) ;
return jv_invalid_with_msg ( jv_string ( " split input and separator must be strings " ) ) ;
}
return jv_string_split ( a , b ) ;
}
static jv f_string_explode ( jq_state * jq , jv a ) {
if ( jv_get_kind ( a ) ! = JV_KIND_STRING ) {
jv_free ( a ) ;
return jv_invalid_with_msg ( jv_string ( " explode input must be a string " ) ) ;
}
return jv_string_explode ( a ) ;
}
2014-12-30 11:17:58 -06:00
static jv f_string_indexes ( jq_state * jq , jv a , jv b ) {
return jv_string_indexes ( a , b ) ;
}
2014-08-21 00:04:38 -05:00
static jv f_string_implode ( jq_state * jq , jv a ) {
if ( jv_get_kind ( a ) ! = JV_KIND_ARRAY ) {
jv_free ( a ) ;
return jv_invalid_with_msg ( jv_string ( " implode input must be an array " ) ) ;
}
return jv_string_implode ( a ) ;
}
2014-07-07 22:49:46 -05:00
static jv f_setpath ( jq_state * jq , jv a , jv b , jv c ) { return jv_setpath ( a , b , c ) ; }
static jv f_getpath ( jq_state * jq , jv a , jv b ) { return jv_getpath ( a , b ) ; }
static jv f_delpaths ( jq_state * jq , jv a , jv b ) { return jv_delpaths ( a , b ) ; }
static jv f_has ( jq_state * jq , jv a , jv b ) { return jv_has ( a , b ) ; }
2014-08-21 00:04:38 -05:00
static jv f_modulemeta ( jq_state * jq , jv a ) {
if ( jv_get_kind ( a ) ! = JV_KIND_STRING ) {
jv_free ( a ) ;
return jv_invalid_with_msg ( jv_string ( " modulemeta input module name must be a string " ) ) ;
}
return load_module_meta ( jq , a ) ;
}
Add Streaming parser (--stream)
Streaming means that outputs are produced as soon as possible. With the
`foreach` syntax one can write programs which reduce portions of the
streaming parse of a large input (reduce into proper JSON values, for
example), and discard the rest, processing incrementally.
This:
$ jq -c --stream .
should produce the same output as this:
$ jq -c '. as $dot | path(..) as $p | $dot | getpath($p) | [$p,.]'
The output of `jq --stream .` should be a sequence of`[[<path>],<leaf>]`
and `[[<path>]]` values. The latter indicate that the array/object at
that path ended.
Scalars and empty arrays and objects are leaf values for this purpose.
For example, a truncated input produces a path as soon as possible, then
later the error:
$ printf '[0,\n'|./jq -c --stream .
[[0],0]
parse error: Unfinished JSON term at EOF at line 3, column 0
$
2014-12-22 23:06:27 -06:00
static jv f_input ( jq_state * jq , jv input ) {
jv_free ( input ) ;
jq_input_cb cb ;
void * data ;
jq_get_input_cb ( jq , & cb , & data ) ;
2014-12-27 16:51:39 -06:00
if ( cb = = NULL )
return jv_invalid_with_msg ( jv_string ( " break " ) ) ;
Add Streaming parser (--stream)
Streaming means that outputs are produced as soon as possible. With the
`foreach` syntax one can write programs which reduce portions of the
streaming parse of a large input (reduce into proper JSON values, for
example), and discard the rest, processing incrementally.
This:
$ jq -c --stream .
should produce the same output as this:
$ jq -c '. as $dot | path(..) as $p | $dot | getpath($p) | [$p,.]'
The output of `jq --stream .` should be a sequence of`[[<path>],<leaf>]`
and `[[<path>]]` values. The latter indicate that the array/object at
that path ended.
Scalars and empty arrays and objects are leaf values for this purpose.
For example, a truncated input produces a path as soon as possible, then
later the error:
$ printf '[0,\n'|./jq -c --stream .
[[0],0]
parse error: Unfinished JSON term at EOF at line 3, column 0
$
2014-12-22 23:06:27 -06:00
jv v = cb ( jq , data ) ;
if ( jv_is_valid ( v ) | | jv_invalid_has_msg ( jv_copy ( v ) ) )
return v ;
return jv_invalid_with_msg ( jv_string ( " break " ) ) ;
}
2014-12-27 17:15:56 -06:00
static jv f_debug ( jq_state * jq , jv input ) {
2015-02-13 15:28:30 -06:00
jq_msg_cb cb ;
2014-12-27 17:15:56 -06:00
void * data ;
jq_get_debug_cb ( jq , & cb , & data ) ;
if ( cb ! = NULL )
2015-02-13 15:28:30 -06:00
cb ( data , jv_copy ( input ) ) ;
2014-12-27 17:15:56 -06:00
return input ;
}
Add Streaming parser (--stream)
Streaming means that outputs are produced as soon as possible. With the
`foreach` syntax one can write programs which reduce portions of the
streaming parse of a large input (reduce into proper JSON values, for
example), and discard the rest, processing incrementally.
This:
$ jq -c --stream .
should produce the same output as this:
$ jq -c '. as $dot | path(..) as $p | $dot | getpath($p) | [$p,.]'
The output of `jq --stream .` should be a sequence of`[[<path>],<leaf>]`
and `[[<path>]]` values. The latter indicate that the array/object at
that path ended.
Scalars and empty arrays and objects are leaf values for this purpose.
For example, a truncated input produces a path as soon as possible, then
later the error:
$ printf '[0,\n'|./jq -c --stream .
[[0],0]
parse error: Unfinished JSON term at EOF at line 3, column 0
$
2014-12-22 23:06:27 -06:00
2014-12-27 17:15:56 -06:00
static jv f_stderr ( jq_state * jq , jv input ) {
jv_dumpf ( jv_copy ( input ) , stderr , 0 ) ;
fprintf ( stderr , " \n " ) ;
return input ;
}
2015-03-08 18:56:51 -05:00
static jv tm2jv ( struct tm * tm ) {
return JV_ARRAY ( jv_number ( tm - > tm_year + 1900 ) ,
jv_number ( tm - > tm_mon ) ,
jv_number ( tm - > tm_mday ) ,
jv_number ( tm - > tm_hour ) ,
jv_number ( tm - > tm_min ) ,
jv_number ( tm - > tm_sec ) ,
jv_number ( tm - > tm_wday ) ,
jv_number ( tm - > tm_yday ) ) ;
}
2015-03-31 10:03:35 -05:00
/*
* mktime ( ) has side - effects and anyways , returns time in the local
* timezone , not UTC . We want timegm ( ) , which isn ' t standard .
*
* To make things worse , mktime ( ) tells you what the timezone
* adjustment is , but you have to # define _BSD_SOURCE to get this
* field of struct tm on some systems .
*
* This is all to blame on POSIX , of course .
*
* Our wrapper tries to use timegm ( ) if available , or mktime ( ) and
* correct for its side - effects if possible .
*
* Returns ( time_t ) - 2 if mktime ( ) ' s side - effects cannot be corrected .
*/
static time_t my_mktime ( struct tm * tm ) {
# ifdef HAVE_TIMEGM
return timegm ( tm ) ;
# else /* HAVE_TIMEGM */
time_t t = mktime ( & tm ) ;
if ( t = = ( time_t ) - 1 )
return t ;
# ifdef HAVE_TM_TM_GMT_OFF
return t + tm . tm_gmtoff ;
# elif defined(HAVE_TM_TM_GMT_OFF)
return t + tm . __tm_gmtoff ;
# else
return ( time_t ) - 2 ; /* Not supported */
# endif
# endif /* !HAVE_TIMEGM */
}
2015-03-06 00:14:15 -06:00
# ifdef HAVE_STRPTIME
static jv f_strptime ( jq_state * jq , jv a , jv b ) {
if ( jv_get_kind ( a ) ! = JV_KIND_STRING | | jv_get_kind ( b ) ! = JV_KIND_STRING )
2015-03-08 18:56:51 -05:00
return jv_invalid_with_msg ( jv_string ( " strptime/1 requires string inputs and arguments " ) ) ;
2015-03-06 00:14:15 -06:00
struct tm tm ;
2015-03-08 18:56:51 -05:00
memset ( & tm , 0 , sizeof ( tm ) ) ;
2015-03-06 00:14:15 -06:00
const char * input = jv_string_value ( a ) ;
const char * fmt = jv_string_value ( b ) ;
const char * end = strptime ( input , fmt , & tm ) ;
if ( end = = NULL | | ( * end ! = ' \0 ' & & ! isspace ( * end ) ) ) {
jv e = jv_invalid_with_msg ( jv_string_fmt ( " date \" %s \" does not match format \" %s \" " , input , fmt ) ) ;
jv_free ( a ) ;
jv_free ( b ) ;
return e ;
}
jv_free ( a ) ;
jv_free ( b ) ;
2015-03-31 10:03:35 -05:00
if ( tm . tm_wday = = 0 & & tm . tm_yday = = 0 & & my_mktime ( & tm ) = = ( time_t ) - 2 )
return jv_invalid_with_msg ( jv_string ( " strptime/1 not supported on this platform " ) ) ;
2015-03-08 18:56:51 -05:00
jv r = tm2jv ( & tm ) ;
2015-03-06 00:14:15 -06:00
if ( * end ! = ' \0 ' )
r = jv_array_append ( r , jv_string ( end ) ) ;
return r ;
}
# else
static jv f_strptime ( jq_state * jq , jv a , jv b ) {
2015-03-08 18:56:51 -05:00
jv_free ( a ) ;
jv_free ( b ) ;
return jv_invalid_with_msg ( jv_string ( " strptime/1 not implemented on this platform " ) ) ;
2015-03-06 00:14:15 -06:00
}
# endif
2015-03-08 18:56:51 -05:00
# define TO_TM_FIELD(t, j, i) \
2015-03-06 00:14:15 -06:00
do { \
jv n = jv_array_get ( jv_copy ( j ) , ( i ) ) ; \
2015-03-08 18:56:51 -05:00
if ( jv_get_kind ( n ) ! = ( JV_KIND_NUMBER ) ) \
return 0 ; \
2015-03-06 00:14:15 -06:00
t = jv_number_value ( n ) ; \
jv_free ( n ) ; \
} while ( 0 )
2015-03-08 18:56:51 -05:00
static int jv2tm ( jv a , struct tm * tm ) {
memset ( tm , 0 , sizeof ( * tm ) ) ;
TO_TM_FIELD ( tm - > tm_year , a , 0 ) ;
TO_TM_FIELD ( tm - > tm_mon , a , 1 ) ;
TO_TM_FIELD ( tm - > tm_mday , a , 2 ) ;
TO_TM_FIELD ( tm - > tm_hour , a , 3 ) ;
TO_TM_FIELD ( tm - > tm_min , a , 4 ) ;
TO_TM_FIELD ( tm - > tm_sec , a , 5 ) ;
tm - > tm_year - = 1900 ;
jv_free ( a ) ;
return 1 ;
}
# undef TO_TM_FIELD
2015-03-06 00:14:15 -06:00
static jv f_mktime ( jq_state * jq , jv a ) {
if ( jv_get_kind ( a ) ! = JV_KIND_ARRAY )
2015-03-08 18:56:51 -05:00
return jv_invalid_with_msg ( jv_string ( " mktime requires array inputs " ) ) ;
2015-03-06 00:14:15 -06:00
if ( jv_array_length ( jv_copy ( a ) ) < 6 )
2015-03-31 10:03:35 -05:00
return jv_invalid_with_msg ( jv_string ( " mktime requires parsed datetime inputs " ) ) ;
2015-03-06 00:14:15 -06:00
struct tm tm ;
2015-03-08 18:56:51 -05:00
if ( ! jv2tm ( a , & tm ) )
2015-03-31 10:03:35 -05:00
return jv_invalid_with_msg ( jv_string ( " mktime requires parsed datetime inputs " ) ) ;
time_t t = my_mktime ( & tm ) ;
2015-03-06 00:14:15 -06:00
if ( t = = ( time_t ) - 1 )
return jv_invalid_with_msg ( jv_string ( " invalid gmtime representation " ) ) ;
2015-03-31 10:03:35 -05:00
if ( t = = ( time_t ) - 2 )
return jv_invalid_with_msg ( jv_string ( " mktime not supported on this platform " ) ) ;
2015-03-06 00:14:15 -06:00
return jv_number ( t ) ;
}
2015-03-08 18:56:51 -05:00
# ifdef HAVE_GMTIME_R
static jv f_gmtime ( jq_state * jq , jv a ) {
if ( jv_get_kind ( a ) ! = JV_KIND_NUMBER )
return jv_invalid_with_msg ( jv_string ( " gmtime() requires numeric inputs " )) ;
struct tm tm , * tmp ;
memset ( & tm , 0 , sizeof ( tm ) ) ;
double fsecs = jv_number_value ( a ) ;
time_t secs = fsecs ;
jv_free ( a ) ;
tmp = gmtime_r ( & secs , & tm ) ;
if ( tmp = = NULL )
return jv_invalid_with_msg ( jv_string ( " errror converting number of seconds since epoch to datetime " ) ) ;
a = tm2jv ( tmp ) ;
return jv_array_set ( a , 5 , jv_number ( jv_number_value ( jv_array_get ( jv_copy ( a ) , 5 ) ) + ( fsecs - floor ( fsecs ) ) ) ) ;
}
# elif defined HAVE_GMTIME
static jv f_gmtime ( jq_state * jq , jv a ) {
if ( jv_get_kind ( a ) ! = JV_KIND_NUMBER )
return jv_invalid_with_msg ( jv_string ( " gmtime requires numeric inputs " ) ) ;
struct tm * tmp ;
memset ( & tm , 0 , sizeof ( tm ) ) ;
double fsecs = jv_number_value ( a ) ;
time_t secs = fsecs ;
jv_free ( a ) ;
tmp = gmtime ( & secs ) ;
if ( tmp = = NULL )
return jv_invalid_with_msg ( jv_string ( " errror converting number of seconds since epoch to datetime " ) ) ;
a = tm2jv ( tmp ) ;
return jv_array_set ( a , 5 , jv_number ( jv_number_value ( jv_array_get ( jv_copy ( a ) , 5 ) ) + ( fsecs - floor ( fsecs ) ) ) ) ;
}
# else
static jv f_gmtime ( jq_state * jq , jv a ) {
jv_free ( a ) ;
return jv_invalid_with_msg ( jv_string ( " gmtime not implemented on this platform " ) ) ;
}
# endif
# ifdef HAVE_STRFTIME
static jv f_strftime ( jq_state * jq , jv a , jv b ) {
if ( jv_get_kind ( a ) = = JV_KIND_NUMBER ) {
a = f_gmtime ( jq , a ) ;
} else if ( jv_get_kind ( a ) ! = JV_KIND_ARRAY ) {
return jv_invalid_with_msg ( jv_string ( " strftime/1 requires parsed datetime inputs " ) ) ;
}
struct tm tm ;
if ( ! jv2tm ( a , & tm ) )
return jv_invalid_with_msg ( jv_string ( " strftime/1 requires parsed datetime inputs " ) ) ; \
const char * fmt = jv_string_value ( b ) ;
size_t alloced = strlen ( fmt ) + 100 ;
char * buf = alloca ( alloced ) ;
size_t n = strftime ( buf , alloced , fmt , & tm ) ;
jv_free ( b ) ;
/* POSIX doesn't provide errno values for strftime() failures; weird */
if ( n = = 0 | | n > alloced )
return jv_invalid_with_msg ( jv_string ( " strftime/1: unknown system failure " ) ) ;
return jv_string ( buf ) ;
}
# else
static jv f_strftime ( jq_state * jq , jv a ) {
jv_free ( a ) ;
jv_free ( b ) ;
return jv_invalid_with_msg ( jv_string ( " strftime/1 not implemented on this platform " ) ) ;
}
# endif
# ifdef HAVE_GETTIMEOFDAY
static jv f_now ( jq_state * jq , jv a ) {
jv_free ( a ) ;
struct timeval tv ;
if ( gettimeofday ( & tv , NULL ) = = - 1 )
return jv_number ( time ( NULL ) ) ;
return jv_number ( tv . tv_sec + tv . tv_usec / 1000000.0 ) ;
}
# else
static jv f_now ( jq_state * jq , jv a ) {
jv_free ( a ) ;
return jv_number ( time ( NULL ) ) ;
}
# endif
2015-03-06 00:14:15 -06:00
2014-08-10 16:52:03 -05:00
2014-02-21 08:46:56 +00:00
# define LIBM_DD(name) \
{ ( cfunction_ptr ) f_ # # name , " _ " # name , 1 } ,
2013-05-05 22:37:46 +01:00
static const struct cfunction function_list [ ] = {
2014-02-21 08:46:56 +00:00
# include "libm.h"
2012-12-02 22:12:08 +00:00
{ ( cfunction_ptr ) f_plus , " _plus " , 3 } ,
2013-01-03 12:53:06 +00:00
{ ( cfunction_ptr ) f_negate , " _negate " , 1 } ,
2012-12-02 22:12:08 +00:00
{ ( cfunction_ptr ) f_minus , " _minus " , 3 } ,
{ ( cfunction_ptr ) f_multiply , " _multiply " , 3 } ,
{ ( cfunction_ptr ) f_divide , " _divide " , 3 } ,
2013-06-17 20:21:37 -05:00
{ ( cfunction_ptr ) f_mod , " _mod " , 3 } ,
2013-11-29 16:16:52 -06:00
{ ( cfunction_ptr ) f_dump , " tojson " , 1 } ,
{ ( cfunction_ptr ) f_json_parse , " fromjson " , 1 } ,
2012-12-02 22:12:08 +00:00
{ ( cfunction_ptr ) f_tonumber , " tonumber " , 1 } ,
{ ( cfunction_ptr ) f_tostring , " tostring " , 1 } ,
{ ( cfunction_ptr ) f_keys , " keys " , 1 } ,
2014-09-30 21:49:37 -05:00
{ ( cfunction_ptr ) f_keys_unsorted , " keys_unsorted " , 1 } ,
2013-11-28 15:17:32 -06:00
{ ( cfunction_ptr ) f_startswith , " startswith " , 2 } ,
{ ( cfunction_ptr ) f_endswith , " endswith " , 2 } ,
2013-11-30 00:44:14 -06:00
{ ( cfunction_ptr ) f_ltrimstr , " ltrimstr " , 2 } ,
{ ( cfunction_ptr ) f_rtrimstr , " rtrimstr " , 2 } ,
2014-07-07 22:49:46 -05:00
{ ( cfunction_ptr ) f_string_split , " split " , 2 } ,
{ ( cfunction_ptr ) f_string_explode , " explode " , 1 } ,
{ ( cfunction_ptr ) f_string_implode , " implode " , 1 } ,
2014-12-30 11:17:58 -06:00
{ ( cfunction_ptr ) f_string_indexes , " _strindices " , 2 } ,
2014-07-07 22:49:46 -05:00
{ ( cfunction_ptr ) f_setpath , " setpath " , 3 } , // FIXME typechecking
{ ( cfunction_ptr ) f_getpath , " getpath " , 2 } ,
{ ( cfunction_ptr ) f_delpaths , " delpaths " , 2 } ,
{ ( cfunction_ptr ) f_has , " has " , 2 } ,
2012-12-02 22:12:08 +00:00
{ ( cfunction_ptr ) f_equal , " _equal " , 3 } ,
{ ( cfunction_ptr ) f_notequal , " _notequal " , 3 } ,
{ ( cfunction_ptr ) f_less , " _less " , 3 } ,
{ ( cfunction_ptr ) f_greater , " _greater " , 3 } ,
{ ( cfunction_ptr ) f_lesseq , " _lesseq " , 3 } ,
{ ( cfunction_ptr ) f_greatereq , " _greatereq " , 3 } ,
2012-12-02 22:24:02 +00:00
{ ( cfunction_ptr ) f_contains , " contains " , 2 } ,
2012-12-02 22:12:08 +00:00
{ ( cfunction_ptr ) f_length , " length " , 1 } ,
{ ( cfunction_ptr ) f_type , " type " , 1 } ,
{ ( cfunction_ptr ) f_sort , " sort " , 1 } ,
2012-12-02 23:22:15 +00:00
{ ( cfunction_ptr ) f_sort_by_impl , " _sort_by_impl " , 2 } ,
{ ( cfunction_ptr ) f_group_by_impl , " _group_by_impl " , 2 } ,
2012-12-04 22:45:03 +00:00
{ ( cfunction_ptr ) f_min , " min " , 1 } ,
{ ( cfunction_ptr ) f_max , " max " , 1 } ,
{ ( cfunction_ptr ) f_min_by_impl , " _min_by_impl " , 2 } ,
{ ( cfunction_ptr ) f_max_by_impl , " _max_by_impl " , 2 } ,
2012-12-10 22:30:09 +00:00
{ ( cfunction_ptr ) f_error , " error " , 2 } ,
2012-12-27 20:49:34 +00:00
{ ( cfunction_ptr ) f_format , " format " , 2 } ,
2014-06-13 17:51:41 -05:00
{ ( cfunction_ptr ) f_env , " env " , 1 } ,
2014-12-30 13:13:30 -06:00
{ ( cfunction_ptr ) f_get_search_list , " get_search_list " , 1 } ,
{ ( cfunction_ptr ) f_get_prog_origin , " get_prog_origin " , 1 } ,
{ ( cfunction_ptr ) f_get_jq_origin , " get_jq_origin " , 1 } ,
2015-02-14 13:31:34 -06:00
# ifdef HAVE_ONIGURUMA
2014-06-18 19:49:38 -04:00
{ ( cfunction_ptr ) f_match , " _match_impl " , 4 } ,
2015-02-14 13:31:34 -06:00
# endif
2014-08-10 16:52:03 -05:00
{ ( cfunction_ptr ) f_modulemeta , " modulemeta " , 1 } ,
Add Streaming parser (--stream)
Streaming means that outputs are produced as soon as possible. With the
`foreach` syntax one can write programs which reduce portions of the
streaming parse of a large input (reduce into proper JSON values, for
example), and discard the rest, processing incrementally.
This:
$ jq -c --stream .
should produce the same output as this:
$ jq -c '. as $dot | path(..) as $p | $dot | getpath($p) | [$p,.]'
The output of `jq --stream .` should be a sequence of`[[<path>],<leaf>]`
and `[[<path>]]` values. The latter indicate that the array/object at
that path ended.
Scalars and empty arrays and objects are leaf values for this purpose.
For example, a truncated input produces a path as soon as possible, then
later the error:
$ printf '[0,\n'|./jq -c --stream .
[[0],0]
parse error: Unfinished JSON term at EOF at line 3, column 0
$
2014-12-22 23:06:27 -06:00
{ ( cfunction_ptr ) f_input , " _input " , 1 } ,
2014-12-27 17:15:56 -06:00
{ ( cfunction_ptr ) f_debug , " debug " , 1 } ,
2014-12-27 17:15:56 -06:00
{ ( cfunction_ptr ) f_stderr , " stderr " , 1 } ,
2015-03-06 00:14:15 -06:00
{ ( cfunction_ptr ) f_strptime , " strptime " , 2 } ,
2015-03-08 18:56:51 -05:00
{ ( cfunction_ptr ) f_strftime , " strftime " , 2 } ,
2015-03-06 00:14:15 -06:00
{ ( cfunction_ptr ) f_mktime , " mktime " , 1 } ,
2015-03-08 18:56:51 -05:00
{ ( cfunction_ptr ) f_gmtime , " gmtime " , 1 } ,
{ ( cfunction_ptr ) f_now , " now " , 1 } ,
2012-08-16 01:00:30 +01:00
} ;
2014-02-21 08:46:56 +00:00
# undef LIBM_DD
2012-09-17 20:59:34 +01:00
2012-11-26 18:53:47 +00:00
struct bytecoded_builtin { const char * name ; block code ; } ;
static block bind_bytecoded_builtins ( block b ) {
block builtins = gen_noop ( ) ;
2012-12-28 16:08:29 +00:00
{
struct bytecoded_builtin builtin_defs [ ] = {
{ " empty " , gen_op_simple ( BACKTRACK ) } ,
{ " not " , gen_condbranch ( gen_const ( jv_false ( ) ) ,
gen_const ( jv_true ( ) ) ) }
} ;
for ( unsigned i = 0 ; i < sizeof ( builtin_defs ) / sizeof ( builtin_defs [ 0 ] ) ; i + + ) {
builtins = BLOCK ( builtins , gen_function ( builtin_defs [ i ] . name , gen_noop ( ) ,
builtin_defs [ i ] . code ) ) ;
}
}
{
struct bytecoded_builtin builtin_def_1arg [ ] = {
2013-05-13 15:00:05 +01:00
{ " path " , BLOCK ( gen_op_simple ( PATH_BEGIN ) ,
2012-12-28 16:08:29 +00:00
gen_call ( " arg " , gen_noop ( ) ) ,
2013-05-13 15:00:05 +01:00
gen_op_simple ( PATH_END ) ) } ,
2012-12-28 16:08:29 +00:00
} ;
for ( unsigned i = 0 ; i < sizeof ( builtin_def_1arg ) / sizeof ( builtin_def_1arg [ 0 ] ) ; i + + ) {
builtins = BLOCK ( builtins , gen_function ( builtin_def_1arg [ i ] . name ,
2013-06-18 01:07:18 +01:00
gen_param ( " arg " ) ,
2012-12-28 16:08:29 +00:00
builtin_def_1arg [ i ] . code ) ) ;
}
2012-11-26 18:53:47 +00:00
}
2013-05-14 16:09:10 +01:00
{
2014-07-02 21:22:53 -05:00
// Note that we can now define `range` as a jq-coded function
2013-06-18 01:07:18 +01:00
block rangevar = gen_op_var_fresh ( STOREV , " rangevar " ) ;
2013-05-14 16:09:10 +01:00
block init = BLOCK ( gen_op_simple ( DUP ) , gen_call ( " start " , gen_noop ( ) ) , rangevar ) ;
block range = BLOCK ( init ,
gen_call ( " end " , gen_noop ( ) ) ,
2013-06-18 01:07:18 +01:00
gen_op_bound ( RANGE , rangevar ) ) ;
2013-05-14 16:09:10 +01:00
builtins = BLOCK ( builtins , gen_function ( " range " ,
2013-06-18 01:07:18 +01:00
BLOCK ( gen_param ( " start " ) , gen_param ( " end " ) ) ,
2013-05-14 16:09:10 +01:00
range ) ) ;
}
2012-12-28 16:08:29 +00:00
2013-05-15 01:37:15 +01:00
return block_bind_referenced ( builtins , b , OP_IS_CALL_PSEUDO ) ;
2012-11-26 18:53:47 +00:00
}
2012-09-17 20:59:34 +01:00
2014-02-21 08:46:56 +00:00
# define LIBM_DD(name) "def " #name ": _" #name ";",
2013-05-05 22:37:46 +01:00
static const char * const jq_builtins [ ] = {
2014-12-26 03:31:01 -06:00
" def error: error(.); " ,
2012-09-18 12:58:39 +01:00
" def map(f): [.[] | f]; " ,
" def select(f): if f then . else empty end; " ,
2014-10-03 17:14:30 -05:00
" def sort_by(f): _sort_by_impl(map([f])); " ,
" def group_by(f): _group_by_impl(map([f])); " ,
" def unique: group_by(.) | map(.[0]); " ,
" def unique_by(f): group_by(f) | map(.[0]); " ,
" def max_by(f): _max_by_impl(map([f])); " ,
" def min_by(f): _min_by_impl(map([f])); " ,
2014-02-21 08:46:56 +00:00
# include "libm.h"
2013-05-16 15:07:53 +01:00
" def add: reduce .[] as $x (null; . + $x); " ,
2012-12-29 16:13:06 +00:00
" def del(f): delpaths([path(f)]); " ,
2013-05-16 15:07:53 +01:00
" def _assign(paths; value): value as $v | reduce path(paths) as $p (.; setpath($p; $v)); " ,
" def _modify(paths; update): reduce path(paths) as $p (.; setpath($p; getpath($p) | update)); " ,
2015-02-18 10:21:09 -06:00
" def map_values(f): .[] |= f; " ,
2014-08-03 19:49:02 -04:00
// recurse
" def recurse(f): def r: ., (f | select(. != null) | r); r; " ,
" def recurse(f; cond): def r: ., (f | select(cond) | r); r; " ,
2014-06-18 00:15:22 +02:00
" def recurse: recurse(.[]?); " ,
" def recurse_down: recurse; " ,
2014-08-03 19:49:02 -04:00
2014-09-30 21:49:37 -05:00
" def to_entries: [keys_unsorted[] as $k | {key: $k, value: .[$k]}]; " ,
2015-02-10 08:33:56 -05:00
" def from_entries: map({(.key // .Key // .Name): (.value // .Value)}) | add | .//={}; " ,
2013-05-13 16:15:49 +01:00
" def with_entries(f): to_entries | map(f) | from_entries; " ,
2013-05-16 14:25:28 +01:00
" def reverse: [.[length - 1 - range(0;length)]]; " ,
2014-12-30 11:17:58 -06:00
" def indices($i): if type == \" array \" and ($i|type) == \" array \" then .[$i] "
" elif type == \" array \" then .[[$i]] "
" elif type == \" string \" and ($i|type) == \" string \" then _strindices($i) "
" else .[$i] end; " ,
" def index($i): indices($i) | .[0]; " , // TODO: optimize
" def rindex($i): indices($i) | .[-1:][0]; " , // TODO: optimize
2014-02-05 15:23:32 -06:00
" def paths: path(recurse(if (type|. == \" array \" or . == \" object \" ) then .[] else empty end))|select(length > 0); " ,
2014-06-18 00:15:22 +02:00
" def paths(node_filter): . as $dot|paths|select(. as $p|$dot|getpath($p)|node_filter); " ,
2014-07-07 22:25:34 -05:00
" def any(generator; condition): "
2015-03-23 14:57:35 -05:00
" [label $out | foreach generator as $i "
2014-07-07 22:25:34 -05:00
" (false; "
2015-03-23 14:57:35 -05:00
" if . then break $out elif $i | condition then true else . end; "
2014-10-21 13:00:38 +02:00
" if . then . else empty end)] | length == 1; " ,
2014-10-20 18:16:03 +02:00
" def any(condition): any(.[]; condition); " ,
" def any: any(.); " ,
2014-07-07 22:25:34 -05:00
" def all(generator; condition): "
2015-03-23 14:57:35 -05:00
" [label $out | foreach generator as $i "
2014-07-07 22:25:34 -05:00
" (true; "
2015-03-23 14:57:35 -05:00
" if .|not then break $out elif $i | condition then . else false end; "
2014-10-21 13:00:38 +02:00
" if .|not then . else empty end)] | length == 0; " ,
2014-10-20 18:16:03 +02:00
" def all(condition): all(.[]; condition); " ,
" def all: all(.); " ,
2014-03-10 01:35:44 +01:00
" def arrays: select(type == \" array \" ); " ,
" def objects: select(type == \" object \" ); " ,
" def iterables: arrays, objects; " ,
" def booleans: select(type == \" boolean \" ); " ,
" def numbers: select(type == \" number \" ); " ,
" def strings: select(type == \" string \" ); " ,
" def nulls: select(type == \" null \" ); " ,
2014-12-23 23:22:43 -06:00
" def values: select(. != null); " ,
2014-03-13 00:16:33 -05:00
" def scalars: select(. == null or . == true or . == false or type == \" number \" or type == \" string \" ); " ,
Add Streaming parser (--stream)
Streaming means that outputs are produced as soon as possible. With the
`foreach` syntax one can write programs which reduce portions of the
streaming parse of a large input (reduce into proper JSON values, for
example), and discard the rest, processing incrementally.
This:
$ jq -c --stream .
should produce the same output as this:
$ jq -c '. as $dot | path(..) as $p | $dot | getpath($p) | [$p,.]'
The output of `jq --stream .` should be a sequence of`[[<path>],<leaf>]`
and `[[<path>]]` values. The latter indicate that the array/object at
that path ended.
Scalars and empty arrays and objects are leaf values for this purpose.
For example, a truncated input produces a path as soon as possible, then
later the error:
$ printf '[0,\n'|./jq -c --stream .
[[0],0]
parse error: Unfinished JSON term at EOF at line 3, column 0
$
2014-12-22 23:06:27 -06:00
" def scalars_or_empty: select(. == null or . == true or . == false or type == \" number \" or type == \" string \" or ((type== \" array \" or type== \" object \" ) and length==0)); " ,
2014-06-18 00:15:22 +02:00
" def leaf_paths: paths(scalars); " ,
2015-01-14 12:24:27 +01:00
" def join($x): reduce .[] as $i (null; (.// \" \" ) + (if . == null then $i else $x + $i end))// \" \" ; " ,
2014-06-16 02:34:18 +02:00
" def flatten: reduce .[] as $i ([]; if $i | type == \" array \" then . + ($i | flatten) else . + [$i] end); " ,
2014-08-08 19:23:46 -05:00
" def flatten($x): reduce .[] as $i ([]; if $i | type == \" array \" and $x > 0 then . + ($i | flatten($x-1)) else . + [$i] end); " ,
" def range($x): range(0;$x); " ,
2015-03-08 18:56:51 -05:00
" def fromdateiso8601: strptime( \" %Y-%m-%dT%H:%M:%SZ \" )|mktime; " ,
" def todateiso8601: strftime( \" %Y-%m-%dT%H:%M:%SZ \" ); " ,
" def fromdate: fromdateiso8601; " ,
" def todate: todateiso8601; " ,
2015-02-14 13:31:34 -06:00
# ifdef HAVE_ONIGURUMA
2014-06-18 19:49:38 -04:00
" def match(re; mode): _match_impl(re; mode; false)|.[]; " ,
2014-08-08 19:31:11 -05:00
" def match($val): ($val|type) as $vt | if $vt == \" string \" then match($val; null) "
" elif $vt == \" array \" and ($val | length) > 1 then match($val[0]; $val[1]) "
" elif $vt == \" array \" and ($val | length) > 0 then match($val[0]; null) "
2014-07-31 20:32:44 -04:00
" else error( $vt + \" not a string or array \" ) end; " ,
2014-06-18 19:49:38 -04:00
" def test(re; mode): _match_impl(re; mode; true); " ,
2014-08-08 19:31:11 -05:00
" def test($val): ($val|type) as $vt | if $vt == \" string \" then test($val; null) "
" elif $vt == \" array \" and ($val | length) > 1 then test($val[0]; $val[1]) "
" elif $vt == \" array \" and ($val | length) > 0 then test($val[0]; null) "
2014-07-31 20:32:44 -04:00
" else error( $vt + \" not a string or array \" ) end; " ,
2014-07-19 01:08:38 -04:00
" def capture(re; mods): match(re; mods) | reduce ( .captures | .[] | select(.name != null) | { (.name) : .string } ) as $pair ({}; . + $pair); " ,
2014-08-08 19:31:11 -05:00
" def capture($val): ($val|type) as $vt | if $vt == \" string \" then capture($val; null) "
" elif $vt == \" array \" and ($val | length) > 1 then capture($val[0]; $val[1]) "
" elif $vt == \" array \" and ($val | length) > 0 then capture($val[0]; null) "
2014-07-31 20:32:44 -04:00
" else error( $vt + \" not a string or array \" ) end; " ,
" def scan(re): "
" match(re; \" g \" ) "
" | if (.captures|length > 0) "
" then [ .captures | .[] | .string ] "
" else .string "
" end ; " ,
//
// If input is an array, then emit a stream of successive subarrays of length n (or less),
// and similarly for strings.
2014-08-08 19:36:43 -05:00
" def _nwise(a; $n): if a|length <= $n then a else a[0:$n] , _nwise(a[$n:]; $n) end; " ,
" def _nwise($n): _nwise(.; $n); " ,
2014-07-31 20:32:44 -04:00
//
// splits/1 produces a stream; split/1 is retained for backward compatibility.
2014-08-08 19:31:11 -05:00
" def splits($re; flags): . as $s "
2014-07-31 20:32:44 -04:00
// # multiple occurrences of "g" are acceptable
2014-08-08 19:31:11 -05:00
" | [ match($re; \" g \" + flags) | (.offset, .offset + .length) ] "
2014-07-31 20:32:44 -04:00
" | [0] + . +[$s|length] "
2014-08-08 19:36:43 -05:00
" | _nwise(2) "
2014-07-31 20:32:44 -04:00
" | $s[.[0]:.[1] ] ; " ,
2014-08-08 19:31:11 -05:00
" def splits($re): splits($re; null); " ,
2014-07-31 20:32:44 -04:00
//
// split emits an array for backward compatibility
2014-08-08 19:31:11 -05:00
" def split($re; flags): [ splits($re; flags) ]; " ,
2014-07-31 20:32:44 -04:00
//
// If s contains capture variables, then create a capture object and pipe it to s
2014-08-08 19:31:11 -05:00
" def sub($re; s): "
2014-07-31 20:32:44 -04:00
" . as $in "
2014-08-08 19:31:11 -05:00
" | [match($re)] "
2014-10-06 14:37:57 -04:00
" | if length == 0 then $in "
" else .[0] "
" | . as $r "
// # create the \"capture\" object:
" | reduce ( $r | .captures | .[] | select(.name != null) | { (.name) : .string } ) as $pair "
" ({}; . + $pair) "
2014-11-12 20:46:34 -05:00
" | $in[0:$r.offset] + s + $in[$r.offset+$r.length:] "
2014-07-31 20:32:44 -04:00
" end ; " ,
//
// repeated substitution of re (which may contain named captures)
2014-10-06 14:37:57 -04:00
" def gsub($re; s; flags): "
2014-07-31 20:32:44 -04:00
// # _stredit(edits;s) - s is the \"to\" string, which might contain capture variables,
// # so if an edit contains captures, then create the capture object and pipe it to s
" def _stredit(edits; s): "
" if (edits|length) == 0 then . "
" else . as $in "
" | (edits|length -1) as $l "
" | (edits[$l]) as $edit "
// # create the \"capture\" object:
" | ($edit | reduce ( $edit | .captures | .[] | select(.name != null) | { (.name) : .string } ) as $pair "
" ({}; . + $pair) ) "
" | if . == {} then $in | .[0:$edit.offset]+s+.[$edit.offset+$edit.length:] | _stredit(edits[0:$l]; s) "
" else (if $l == 0 then \" \" else ($in | _stredit(edits[0:$l]; s)) end) + (. | s) "
" end "
" end ; "
2014-10-06 14:37:57 -04:00
" [match($re; flags + \" g \" )] as $edits | _stredit($edits; s) ; " ,
2015-02-14 13:31:34 -06:00
" def gsub($re; s): gsub($re; s; \" \" ); " ,
# endif /* HAVE_ONIGURUMA */
2014-07-31 20:32:44 -04:00
//#######################################################################
2014-07-02 21:22:53 -05:00
// range/3, with a `by` expression argument
2014-08-08 19:31:11 -05:00
" def range($init; $upto; $by): "
2014-08-01 00:20:30 -05:00
" def _range: "
" if ($by > 0 and . < $upto) or ($by < 0 and . > $upto) then ., ((.+$by)|_range) else . end; "
" if $by == 0 then $init else $init|_range end | select(($by > 0 and . < $upto) or ($by < 0 and . > $upto)); " ,
2014-07-02 21:45:49 -05:00
// generic iterator/generator
" def while(cond; update): "
" def _while: "
" if cond then ., (update | _while) else empty end; "
2014-12-28 00:32:06 -06:00
" _while; " ,
2014-12-12 17:35:59 -06:00
" def until(cond; next): "
" def _until: "
" if cond then . else (next|_until) end; "
" _until; " ,
2015-03-23 14:57:35 -05:00
" def limit($n; exp): if $n < 0 then exp else label $out | foreach exp as $item ([$n, null]; if .[0] < 1 then break $out else [.[0] -1, $item] end; .[1]) end; " ,
" def first(g): label $out | foreach g as $item ([false, null]; if .[0]==true then break $out else [true, $item] end; .[1]); " ,
2014-07-28 13:18:58 -05:00
" def last(g): reduce g as $item (null; $item); " ,
2014-08-08 19:23:46 -05:00
" def nth($n; g): if $n < 0 then error( \" nth doesn't support negative indices \" ) else last(limit($n + 1; g)) end; " ,
2014-07-28 13:18:58 -05:00
" def first: .[0]; " ,
" def last: .[-1]; " ,
2014-08-08 19:23:46 -05:00
" def nth($n): .[$n]; " ,
2014-10-06 14:37:57 -04:00
// # transpose a possibly jagged matrix, quickly;
// # rows are padded with nulls so the result is always rectangular.
" def transpose: "
" if . == [] then [] "
" else . as $in "
" | (map(length) | max) as $max "
" | length as $length "
" | reduce range(0; $max) as $j "
" ([]; . + [reduce range(0;$length) as $i ([]; . + [ $in[$i][$j] ] )] ) "
" end; " ,
2014-12-26 22:49:32 -06:00
" def in(xs): . as $x | xs | has($x); " ,
" def inside(xs): . as $x | xs | contains($x); " ,
2014-12-28 00:32:06 -06:00
" def input: _input; " ,
2014-12-26 22:49:32 -06:00
" def repeat(exp): "
" def _repeat: "
" exp, _repeat; "
2014-12-28 00:32:06 -06:00
" _repeat; " ,
" def inputs: try repeat(_input) catch if .== \" break \" then empty else .|error end; " ,
2014-10-06 22:51:13 -04:00
// # like ruby's downcase - only characters A to Z are affected
" def ascii_downcase: "
" explode | map( if 65 <= . and . <= 90 then . + 32 else . end) | implode; " ,
// # like ruby's upcase - only characters a to z are affected
" def ascii_upcase: "
" explode | map( if 97 <= . and . <= 122 then . - 32 else . end) | implode; " ,
2014-10-07 09:43:11 -04:00
// # Assuming the input array is sorted, bsearch/1 returns
// # the index of the target if the target is in the input array; and otherwise
// # (-1 - ix), where ix is the insertion point that would leave the array sorted.
// # If the input is not sorted, bsearch will terminate but with irrelevant results.
" def bsearch(target): "
" if length == 0 then -1 "
" elif length == 1 then "
" if target == .[0] then 0 elif target < .[0] then -1 else -2 end "
" else . as $in "
" " // # state variable: [start, end, answer]
" " // # where start and end are the upper and lower offsets to use.
" | last( [0, length-1, null] "
" | while( .[0] <= .[1] ; "
" (if .[2] != null then (.[1] = -1) " // # i.e. break
" else "
" ( ( (.[1] + .[0]) / 2 ) | floor ) as $mid "
" | $in[$mid] as $monkey "
" | if $monkey == target then (.[2] = $mid) " // # success
" elif .[0] == .[1] then (.[1] = -1) " // # failure
" elif $monkey < target then (.[0] = ($mid + 1)) "
" else (.[1] = ($mid - 1)) "
" end "
" end ))) "
" | if .[2] == null then " // # compute the insertion point
" if $in[ .[0] ] < target then (-2 -.[0]) "
" else (-1 -.[0]) "
" end "
" else .[2] "
" end "
" end; " ,
2012-09-18 10:17:38 +01:00
} ;
2014-02-21 08:46:56 +00:00
# undef LIBM_DD
2012-09-18 10:17:38 +01:00
2012-09-17 20:59:34 +01:00
2013-11-30 02:05:42 -06:00
static int builtins_bind_one ( jq_state * jq , block * bb , const char * code ) {
2014-07-09 00:55:20 -04:00
struct locfile * src ;
2015-03-29 19:12:23 -05:00
src = locfile_init ( jq , " <builtin> " , code , strlen ( code ) ) ;
2013-05-17 03:03:42 +10:00
block funcs ;
2014-07-09 00:55:20 -04:00
int nerrors = jq_parse_library ( src , & funcs ) ;
2013-05-17 03:03:42 +10:00
if ( nerrors = = 0 ) {
* bb = block_bind_referenced ( funcs , * bb , OP_IS_CALL_PSEUDO ) ;
}
2014-07-09 00:55:20 -04:00
locfile_free ( src ) ;
2013-05-17 03:03:42 +10:00
return nerrors ;
}
2013-11-30 02:05:42 -06:00
static int slurp_lib ( jq_state * jq , block * bb ) {
2013-05-17 03:03:42 +10:00
int nerrors = 0 ;
char * home = getenv ( " HOME " ) ;
if ( home ) { // silently ignore no $HOME
jv filename = jv_string_append_str ( jv_string ( home ) , " /.jq " ) ;
2013-06-15 00:08:59 -05:00
jv data = jv_load_file ( jv_string_value ( filename ) , 1 ) ;
2013-05-17 03:03:42 +10:00
if ( jv_is_valid ( data ) ) {
2013-11-30 02:05:42 -06:00
nerrors = builtins_bind_one ( jq , bb , jv_string_value ( data ) ) ;
2013-05-17 03:03:42 +10:00
}
jv_free ( filename ) ;
jv_free ( data ) ;
}
return nerrors ;
}
2013-11-30 02:05:42 -06:00
int builtins_bind ( jq_state * jq , block * bb ) {
int nerrors = slurp_lib ( jq , bb ) ;
2013-05-17 03:03:42 +10:00
if ( nerrors ) {
block_free ( * bb ) ;
return nerrors ;
}
2012-09-18 12:58:39 +01:00
for ( int i = ( int ) ( sizeof ( jq_builtins ) / sizeof ( jq_builtins [ 0 ] ) ) - 1 ; i > = 0 ; i - - ) {
2013-11-30 02:05:42 -06:00
nerrors = builtins_bind_one ( jq , bb , jq_builtins [ i ] ) ;
2012-09-18 10:17:38 +01:00
assert ( ! nerrors ) ;
2012-09-17 20:59:34 +01:00
}
2013-05-17 03:03:42 +10:00
* bb = bind_bytecoded_builtins ( * bb ) ;
* bb = gen_cbinding ( function_list , sizeof ( function_list ) / sizeof ( function_list [ 0 ] ) , * bb ) ;
return nerrors ;
2012-09-16 17:08:56 +01:00
}