geojson2dm

Convert GeoJSON to format suitable for input to datamaps
git clone git://git.sikmir.ru/geojson2dm
Log | Files | Refs | README | LICENSE

json.c (7842B)


      1 #include <ctype.h>
      2 #include <errno.h>
      3 #include <stdint.h>
      4 #include <stdio.h>
      5 #include <stdlib.h>
      6 #include <string.h>
      7 
      8 #ifndef GETNEXT
      9 #define GETNEXT getchar_unlocked
     10 #endif
     11 
     12 #include "json.h"
     13 
     14 static int
     15 codepointtoutf8(long r, char *s)
     16 {
     17 	if (r == 0) {
     18 		return 0; /* NUL byte */
     19 	} else if (r <= 0x7F) {
     20 		/* 1 byte: 0aaaaaaa */
     21 		s[0] = r;
     22 		return 1;
     23 	} else if (r <= 0x07FF) {
     24 		/* 2 bytes: 00000aaa aabbbbbb */
     25 		s[0] = 0xC0 | ((r & 0x0007C0) >>  6); /* 110aaaaa */
     26 		s[1] = 0x80 |  (r & 0x00003F);        /* 10bbbbbb */
     27 		return 2;
     28 	} else if (r <= 0xFFFF) {
     29 		/* 3 bytes: aaaabbbb bbcccccc */
     30 		s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */
     31 		s[1] = 0x80 | ((r & 0x000FC0) >>  6); /* 10bbbbbb */
     32 		s[2] = 0x80 |  (r & 0x00003F);        /* 10cccccc */
     33 		return 3;
     34 	} else {
     35 		/* 4 bytes: 000aaabb bbbbcccc ccdddddd */
     36 		s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */
     37 		s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */
     38 		s[2] = 0x80 | ((r & 0x000FC0) >>  6); /* 10cccccc */
     39 		s[3] = 0x80 |  (r & 0x00003F);        /* 10dddddd */
     40 		return 4;
     41 	}
     42 }
     43 
     44 static int
     45 hexdigit(int c)
     46 {
     47 	if (c >= '0' && c <= '9')
     48 		return c - '0';
     49 	else if (c >= 'a' && c <= 'f')
     50 		return 10 + (c - 'a');
     51 	else if (c >= 'A' && c <= 'F')
     52 		return 10 + (c - 'A');
     53 	return 0;
     54 }
     55 
     56 static int
     57 capacity(char **value, size_t *sz, size_t cur, size_t inc)
     58 {
     59 	size_t need, newsiz;
     60 	char *newp;
     61 
     62 	/* check for addition overflow */
     63 	if (cur > SIZE_MAX - inc) {
     64 		errno = EOVERFLOW;
     65 		return -1;
     66 	}
     67 	need = cur + inc;
     68 
     69 	if (need > *sz) {
     70 		if (need > SIZE_MAX / 2) {
     71 			newsiz = SIZE_MAX;
     72 		} else {
     73 			for (newsiz = *sz < 64 ? 64 : *sz; newsiz <= need; newsiz *= 2)
     74 				;
     75 		}
     76 		if (!(newp = realloc(*value, newsiz)))
     77 			return -1; /* up to caller to free *value */
     78 		*value = newp;
     79 		*sz = newsiz;
     80 	}
     81 	return 0;
     82 }
     83 
     84 #define EXPECT_VALUE         "{[\"-0123456789tfn"
     85 #define EXPECT_STRING        "\""
     86 #define EXPECT_END           "}],"
     87 #define EXPECT_OBJECT_STRING EXPECT_STRING "}"
     88 #define EXPECT_OBJECT_KEY    ":"
     89 #define EXPECT_ARRAY_VALUE   EXPECT_VALUE "]"
     90 
     91 #define JSON_INVALID()       do { ret = JSON_ERROR_INVALID; goto end; } while (0);
     92 
     93 int
     94 parsejson(void (*cb)(struct json_node *, size_t, const char *))
     95 {
     96 	struct json_node nodes[JSON_MAX_NODE_DEPTH] = { { 0 } };
     97 	size_t depth = 0, p = 0, len, sz = 0;
     98 	long cp, hi, lo;
     99 	char pri[128], *str = NULL;
    100 	int c, i, escape, iskey = 0, ret = JSON_ERROR_MEM;
    101 	const char *expect = EXPECT_VALUE;
    102 
    103 	if (capacity(&(nodes[0].name), &(nodes[0].namesiz), 0, 1) == -1)
    104 		goto end;
    105 	nodes[0].name[0] = '\0';
    106 
    107 	while (1) {
    108 		c = GETNEXT();
    109 handlechr:
    110 		if (c == EOF)
    111 			break;
    112 
    113 		/* skip JSON white-space, (NOTE: no \v, \f, \b etc) */
    114 		if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
    115 			continue;
    116 
    117 		if (!c || !strchr(expect, c))
    118 			JSON_INVALID();
    119 
    120 		switch (c) {
    121 		case ':':
    122 			iskey = 0;
    123 			expect = EXPECT_VALUE;
    124 			break;
    125 		case '"':
    126 			nodes[depth].type = JSON_TYPE_STRING;
    127 			escape = 0;
    128 			len = 0;
    129 			while (1) {
    130 				c = GETNEXT();
    131 chr:
    132 				/* EOF or control char: 0x7f is not defined as a control char in RFC8259 */
    133 				if (c < 0x20)
    134 					JSON_INVALID();
    135 
    136 				if (escape) {
    137 escchr:
    138 					escape = 0;
    139 					switch (c) {
    140 					case '"': /* FALLTHROUGH */
    141 					case '\\':
    142 					case '/': break;
    143 					case 'b': c = '\b'; break;
    144 					case 'f': c = '\f'; break;
    145 					case 'n': c = '\n'; break;
    146 					case 'r': c = '\r'; break;
    147 					case 't': c = '\t'; break;
    148 					case 'u': /* hex hex hex hex */
    149 						if (capacity(&str, &sz, len, 4) == -1)
    150 							goto end;
    151 						for (i = 12, cp = 0; i >= 0; i -= 4) {
    152 							if ((c = GETNEXT()) == EOF || !isxdigit(c))
    153 								JSON_INVALID(); /* invalid code point */
    154 							cp |= (hexdigit(c) << i);
    155 						}
    156 						/* RFC8259 - 7. Strings - surrogates.
    157 						 * 0xd800 - 0xdbff - high surrogates */
    158 						if (cp >= 0xd800 && cp <= 0xdbff) {
    159 							if ((c = GETNEXT()) != '\\') {
    160 								len += codepointtoutf8(cp, &str[len]);
    161 								goto chr;
    162 							}
    163 							if ((c = GETNEXT()) != 'u') {
    164 								len += codepointtoutf8(cp, &str[len]);
    165 								goto escchr;
    166 							}
    167 							for (hi = cp, i = 12, lo = 0; i >= 0; i -= 4) {
    168 								if ((c = GETNEXT()) == EOF || !isxdigit(c))
    169 									JSON_INVALID(); /* invalid code point */
    170 								lo |= (hexdigit(c) << i);
    171 							}
    172 							/* 0xdc00 - 0xdfff - low surrogates */
    173 							if (lo >= 0xdc00 && lo <= 0xdfff) {
    174 								cp = (hi << 10) + lo - 56613888; /* - offset */
    175 							} else {
    176 								/* handle graceful: raw invalid output bytes */
    177 								len += codepointtoutf8(hi, &str[len]);
    178 								if (capacity(&str, &sz, len, 4) == -1)
    179 									goto end;
    180 								len += codepointtoutf8(lo, &str[len]);
    181 								continue;
    182 							}
    183 						}
    184 						len += codepointtoutf8(cp, &str[len]);
    185 						continue;
    186 					default:
    187 						JSON_INVALID(); /* invalid escape char */
    188 					}
    189 					if (capacity(&str, &sz, len, 1) == -1)
    190 						goto end;
    191 					str[len++] = c;
    192 				} else if (c == '\\') {
    193 					escape = 1;
    194 				} else if (c == '"') {
    195 					if (capacity(&str, &sz, len, 1) == -1)
    196 						goto end;
    197 					str[len++] = '\0';
    198 
    199 					if (iskey) {
    200 						/* copy string as key, including NUL byte */
    201 						if (capacity(&(nodes[depth].name), &(nodes[depth].namesiz), len, 1) == -1)
    202 							goto end;
    203 						memcpy(nodes[depth].name, str, len);
    204 					} else {
    205 						cb(nodes, depth + 1, str);
    206 					}
    207 					break;
    208 				} else {
    209 					if (capacity(&str, &sz, len, 1) == -1)
    210 						goto end;
    211 					str[len++] = c;
    212 				}
    213 			}
    214 			if (iskey)
    215 				expect = EXPECT_OBJECT_KEY;
    216 			else
    217 				expect = EXPECT_END;
    218 			break;
    219 		case '[':
    220 		case '{':
    221 			if (depth + 1 >= JSON_MAX_NODE_DEPTH)
    222 				JSON_INVALID(); /* too deep */
    223 
    224 			nodes[depth].index = 0;
    225 			if (c == '[') {
    226 				nodes[depth].type = JSON_TYPE_ARRAY;
    227 				expect = EXPECT_ARRAY_VALUE;
    228 			} else if (c == '{') {
    229 				iskey = 1;
    230 				nodes[depth].type = JSON_TYPE_OBJECT;
    231 				expect = EXPECT_OBJECT_STRING;
    232 			}
    233 
    234 			cb(nodes, depth + 1, "");
    235 
    236 			depth++;
    237 			nodes[depth].index = 0;
    238 			if (capacity(&(nodes[depth].name), &(nodes[depth].namesiz), 0, 1) == -1)
    239 				goto end;
    240 			nodes[depth].name[0] = '\0';
    241 			break;
    242 		case ']':
    243 		case '}':
    244 			if (!depth ||
    245 			   (c == ']' && nodes[depth - 1].type != JSON_TYPE_ARRAY) ||
    246 			   (c == '}' && nodes[depth - 1].type != JSON_TYPE_OBJECT))
    247 				JSON_INVALID(); /* unbalanced nodes */
    248 
    249 			nodes[--depth].index++;
    250 			expect = EXPECT_END;
    251 			break;
    252 		case ',':
    253 			if (!depth)
    254 				JSON_INVALID(); /* unbalanced nodes */
    255 
    256 			nodes[depth - 1].index++;
    257 			if (nodes[depth - 1].type == JSON_TYPE_OBJECT) {
    258 				iskey = 1;
    259 				expect = EXPECT_STRING;
    260 			} else {
    261 				expect = EXPECT_VALUE;
    262 			}
    263 			break;
    264 		case 't': /* true */
    265 			if (GETNEXT() != 'r' || GETNEXT() != 'u' || GETNEXT() != 'e')
    266 				JSON_INVALID();
    267 			nodes[depth].type = JSON_TYPE_BOOL;
    268 			cb(nodes, depth + 1, "true");
    269 			expect = EXPECT_END;
    270 			break;
    271 		case 'f': /* false */
    272 			if (GETNEXT() != 'a' || GETNEXT() != 'l' || GETNEXT() != 's' ||
    273 			    GETNEXT() != 'e')
    274 				JSON_INVALID();
    275 			nodes[depth].type = JSON_TYPE_BOOL;
    276 			cb(nodes, depth + 1, "false");
    277 			expect = EXPECT_END;
    278 			break;
    279 		case 'n': /* null */
    280 			if (GETNEXT() != 'u' || GETNEXT() != 'l' || GETNEXT() != 'l')
    281 				JSON_INVALID();
    282 			nodes[depth].type = JSON_TYPE_NULL;
    283 			cb(nodes, depth + 1, "null");
    284 			expect = EXPECT_END;
    285 			break;
    286 		default: /* number */
    287 			nodes[depth].type = JSON_TYPE_NUMBER;
    288 			p = 0;
    289 			pri[p++] = c;
    290 			expect = EXPECT_END;
    291 			while (1) {
    292 				c = GETNEXT();
    293 				if (c == EOF ||
    294 				    !c || !strchr("0123456789eE+-.", c) ||
    295 				    p + 1 >= sizeof(pri)) {
    296 					pri[p] = '\0';
    297 					cb(nodes, depth + 1, pri);
    298 					goto handlechr; /* do not read next char, handle this */
    299 				} else {
    300 					pri[p++] = c;
    301 				}
    302 			}
    303 		}
    304 	}
    305 	if (depth)
    306 		JSON_INVALID(); /* unbalanced nodes */
    307 
    308 	ret = 0; /* success */
    309 end:
    310 	for (depth = 0; depth < sizeof(nodes) / sizeof(nodes[0]); depth++)
    311 		free(nodes[depth].name);
    312 	free(str);
    313 
    314 	return ret;
    315 }