1
0
mirror of https://github.com/stedolan/jq.git synced 2024-05-11 05:55:39 +00:00

Move some unicode handling stuff to a separate file.

This commit is contained in:
Stephen Dolan
2012-09-04 20:34:43 +01:00
parent 328c4a13f1
commit c5ab3b2336
4 changed files with 40 additions and 23 deletions

View File

@ -17,7 +17,7 @@ parser.tab.c: parser.y lexer.yy.h
bison -W -d parser.y -v --report-file=parser.info
parser.tab.h: parser.tab.c
parsertest: parser.tab.c lexer.yy.c main.c opcode.c bytecode.c compile.c execute.c builtin.c jv.c jv_parse.c jv_print.c jv_dtoa.c
parsertest: parser.tab.c lexer.yy.c main.c opcode.c bytecode.c compile.c execute.c builtin.c jv.c jv_parse.c jv_print.c jv_dtoa.c jv_unicode.c
$(CC) -o $@ $^
jv_test: jv_test.c jv.c jv_print.c jv_dtoa.c

View File

@ -4,6 +4,7 @@
#include "jv.h"
#include "jv_dtoa.h"
#include "jv_parse.h"
#include "jv_unicode.h"
typedef const char* presult;
@ -157,27 +158,6 @@ static int unhex4(char* hex) {
return r;
}
static int utf8_encode(int codepoint, char* out) {
assert(codepoint >= 0 && codepoint <= 0x10FFFF);
char* start = out;
if (codepoint <= 0x7F) {
*out++ = codepoint;
} else if (codepoint <= 0x7FF) {
*out++ = 0xC0 + ((codepoint & 0x7C0) >> 6);
*out++ = 0x80 + ((codepoint & 0x03F));
} else if(codepoint <= 0xFFFF) {
*out++ = 0xE0 + ((codepoint & 0xF000) >> 12);
*out++ = 0x80 + ((codepoint & 0x0FC0) >> 6);
*out++ = 0x80 + ((codepoint & 0x003F));
} else {
*out++ = 0xF0 + ((codepoint & 0x1C0000) >> 18);
*out++ = 0x80 + ((codepoint & 0x03F000) >> 12);
*out++ = 0x80 + ((codepoint & 0x000FC0) >> 6);
*out++ = 0x80 + ((codepoint & 0x00003F));
}
return out - start;
}
static pfunc found_string(struct jv_parser* p) {
char* in = p->tokenbuf;
char* out = p->tokenbuf;
@ -217,7 +197,7 @@ static pfunc found_string(struct jv_parser* p) {
|(surrogate - 0xDC00));
}
// FIXME assert valid codepoint
out += utf8_encode(codepoint, out);
out += jvp_utf8_encode(codepoint, out);
break;
default:

30
c/jv_unicode.c Normal file
View File

@ -0,0 +1,30 @@
#include <assert.h>
#include "jv_unicode.h"
int jvp_utf8_encode_length(int codepoint) {
if (codepoint <= 0x7F) return 1;
else if (codepoint <= 0x7FF) return 2;
else if (codepoint <= 0xFFFF) return 3;
else return 4;
}
int jvp_utf8_encode(int codepoint, char* out) {
assert(codepoint >= 0 && codepoint <= 0x10FFFF);
char* start = out;
if (codepoint <= 0x7F) {
*out++ = codepoint;
} else if (codepoint <= 0x7FF) {
*out++ = 0xC0 + ((codepoint & 0x7C0) >> 6);
*out++ = 0x80 + ((codepoint & 0x03F));
} else if(codepoint <= 0xFFFF) {
*out++ = 0xE0 + ((codepoint & 0xF000) >> 12);
*out++ = 0x80 + ((codepoint & 0x0FC0) >> 6);
*out++ = 0x80 + ((codepoint & 0x003F));
} else {
*out++ = 0xF0 + ((codepoint & 0x1C0000) >> 18);
*out++ = 0x80 + ((codepoint & 0x03F000) >> 12);
*out++ = 0x80 + ((codepoint & 0x000FC0) >> 6);
*out++ = 0x80 + ((codepoint & 0x00003F));
}
assert(out - start == jvp_utf8_encode_length(codepoint));
return out - start;
}

7
c/jv_unicode.h Normal file
View File

@ -0,0 +1,7 @@
#ifndef JV_UNICODE_H
#define JV_UNICODE_H
int jvp_utf8_decode_length(char startchar);
int jvp_utf8_encode_length(int codepoint);
int jvp_utf8_encode(int codepoint, char* out);
#endif