json.c (7842B)
1 #include <ctype.h> 2 #include <errno.h> 3 #include <stdint.h> 4 #include <stdio.h> 5 #include <stdlib.h> 6 #include <string.h> 7 8 #ifndef GETNEXT 9 #define GETNEXT getchar_unlocked 10 #endif 11 12 #include "json.h" 13 14 static int 15 codepointtoutf8(long r, char *s) 16 { 17 if (r == 0) { 18 return 0; /* NUL byte */ 19 } else if (r <= 0x7F) { 20 /* 1 byte: 0aaaaaaa */ 21 s[0] = r; 22 return 1; 23 } else if (r <= 0x07FF) { 24 /* 2 bytes: 00000aaa aabbbbbb */ 25 s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */ 26 s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */ 27 return 2; 28 } else if (r <= 0xFFFF) { 29 /* 3 bytes: aaaabbbb bbcccccc */ 30 s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */ 31 s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */ 32 s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */ 33 return 3; 34 } else { 35 /* 4 bytes: 000aaabb bbbbcccc ccdddddd */ 36 s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */ 37 s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */ 38 s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */ 39 s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */ 40 return 4; 41 } 42 } 43 44 static int 45 hexdigit(int c) 46 { 47 if (c >= '0' && c <= '9') 48 return c - '0'; 49 else if (c >= 'a' && c <= 'f') 50 return 10 + (c - 'a'); 51 else if (c >= 'A' && c <= 'F') 52 return 10 + (c - 'A'); 53 return 0; 54 } 55 56 static int 57 capacity(char **value, size_t *sz, size_t cur, size_t inc) 58 { 59 size_t need, newsiz; 60 char *newp; 61 62 /* check for addition overflow */ 63 if (cur > SIZE_MAX - inc) { 64 errno = EOVERFLOW; 65 return -1; 66 } 67 need = cur + inc; 68 69 if (need > *sz) { 70 if (need > SIZE_MAX / 2) { 71 newsiz = SIZE_MAX; 72 } else { 73 for (newsiz = *sz < 64 ? 64 : *sz; newsiz <= need; newsiz *= 2) 74 ; 75 } 76 if (!(newp = realloc(*value, newsiz))) 77 return -1; /* up to caller to free *value */ 78 *value = newp; 79 *sz = newsiz; 80 } 81 return 0; 82 } 83 84 #define EXPECT_VALUE "{[\"-0123456789tfn" 85 #define EXPECT_STRING "\"" 86 #define EXPECT_END "}]," 87 #define EXPECT_OBJECT_STRING EXPECT_STRING "}" 88 #define EXPECT_OBJECT_KEY ":" 89 #define EXPECT_ARRAY_VALUE EXPECT_VALUE "]" 90 91 #define JSON_INVALID() do { ret = JSON_ERROR_INVALID; goto end; } while (0); 92 93 int 94 parsejson(void (*cb)(struct json_node *, size_t, const char *)) 95 { 96 struct json_node nodes[JSON_MAX_NODE_DEPTH] = { { 0 } }; 97 size_t depth = 0, p = 0, len, sz = 0; 98 long cp, hi, lo; 99 char pri[128], *str = NULL; 100 int c, i, escape, iskey = 0, ret = JSON_ERROR_MEM; 101 const char *expect = EXPECT_VALUE; 102 103 if (capacity(&(nodes[0].name), &(nodes[0].namesiz), 0, 1) == -1) 104 goto end; 105 nodes[0].name[0] = '\0'; 106 107 while (1) { 108 c = GETNEXT(); 109 handlechr: 110 if (c == EOF) 111 break; 112 113 /* skip JSON white-space, (NOTE: no \v, \f, \b etc) */ 114 if (c == ' ' || c == '\t' || c == '\n' || c == '\r') 115 continue; 116 117 if (!c || !strchr(expect, c)) 118 JSON_INVALID(); 119 120 switch (c) { 121 case ':': 122 iskey = 0; 123 expect = EXPECT_VALUE; 124 break; 125 case '"': 126 nodes[depth].type = JSON_TYPE_STRING; 127 escape = 0; 128 len = 0; 129 while (1) { 130 c = GETNEXT(); 131 chr: 132 /* EOF or control char: 0x7f is not defined as a control char in RFC8259 */ 133 if (c < 0x20) 134 JSON_INVALID(); 135 136 if (escape) { 137 escchr: 138 escape = 0; 139 switch (c) { 140 case '"': /* FALLTHROUGH */ 141 case '\\': 142 case '/': break; 143 case 'b': c = '\b'; break; 144 case 'f': c = '\f'; break; 145 case 'n': c = '\n'; break; 146 case 'r': c = '\r'; break; 147 case 't': c = '\t'; break; 148 case 'u': /* hex hex hex hex */ 149 if (capacity(&str, &sz, len, 4) == -1) 150 goto end; 151 for (i = 12, cp = 0; i >= 0; i -= 4) { 152 if ((c = GETNEXT()) == EOF || !isxdigit(c)) 153 JSON_INVALID(); /* invalid code point */ 154 cp |= (hexdigit(c) << i); 155 } 156 /* RFC8259 - 7. Strings - surrogates. 157 * 0xd800 - 0xdbff - high surrogates */ 158 if (cp >= 0xd800 && cp <= 0xdbff) { 159 if ((c = GETNEXT()) != '\\') { 160 len += codepointtoutf8(cp, &str[len]); 161 goto chr; 162 } 163 if ((c = GETNEXT()) != 'u') { 164 len += codepointtoutf8(cp, &str[len]); 165 goto escchr; 166 } 167 for (hi = cp, i = 12, lo = 0; i >= 0; i -= 4) { 168 if ((c = GETNEXT()) == EOF || !isxdigit(c)) 169 JSON_INVALID(); /* invalid code point */ 170 lo |= (hexdigit(c) << i); 171 } 172 /* 0xdc00 - 0xdfff - low surrogates */ 173 if (lo >= 0xdc00 && lo <= 0xdfff) { 174 cp = (hi << 10) + lo - 56613888; /* - offset */ 175 } else { 176 /* handle graceful: raw invalid output bytes */ 177 len += codepointtoutf8(hi, &str[len]); 178 if (capacity(&str, &sz, len, 4) == -1) 179 goto end; 180 len += codepointtoutf8(lo, &str[len]); 181 continue; 182 } 183 } 184 len += codepointtoutf8(cp, &str[len]); 185 continue; 186 default: 187 JSON_INVALID(); /* invalid escape char */ 188 } 189 if (capacity(&str, &sz, len, 1) == -1) 190 goto end; 191 str[len++] = c; 192 } else if (c == '\\') { 193 escape = 1; 194 } else if (c == '"') { 195 if (capacity(&str, &sz, len, 1) == -1) 196 goto end; 197 str[len++] = '\0'; 198 199 if (iskey) { 200 /* copy string as key, including NUL byte */ 201 if (capacity(&(nodes[depth].name), &(nodes[depth].namesiz), len, 1) == -1) 202 goto end; 203 memcpy(nodes[depth].name, str, len); 204 } else { 205 cb(nodes, depth + 1, str); 206 } 207 break; 208 } else { 209 if (capacity(&str, &sz, len, 1) == -1) 210 goto end; 211 str[len++] = c; 212 } 213 } 214 if (iskey) 215 expect = EXPECT_OBJECT_KEY; 216 else 217 expect = EXPECT_END; 218 break; 219 case '[': 220 case '{': 221 if (depth + 1 >= JSON_MAX_NODE_DEPTH) 222 JSON_INVALID(); /* too deep */ 223 224 nodes[depth].index = 0; 225 if (c == '[') { 226 nodes[depth].type = JSON_TYPE_ARRAY; 227 expect = EXPECT_ARRAY_VALUE; 228 } else if (c == '{') { 229 iskey = 1; 230 nodes[depth].type = JSON_TYPE_OBJECT; 231 expect = EXPECT_OBJECT_STRING; 232 } 233 234 cb(nodes, depth + 1, ""); 235 236 depth++; 237 nodes[depth].index = 0; 238 if (capacity(&(nodes[depth].name), &(nodes[depth].namesiz), 0, 1) == -1) 239 goto end; 240 nodes[depth].name[0] = '\0'; 241 break; 242 case ']': 243 case '}': 244 if (!depth || 245 (c == ']' && nodes[depth - 1].type != JSON_TYPE_ARRAY) || 246 (c == '}' && nodes[depth - 1].type != JSON_TYPE_OBJECT)) 247 JSON_INVALID(); /* unbalanced nodes */ 248 249 nodes[--depth].index++; 250 expect = EXPECT_END; 251 break; 252 case ',': 253 if (!depth) 254 JSON_INVALID(); /* unbalanced nodes */ 255 256 nodes[depth - 1].index++; 257 if (nodes[depth - 1].type == JSON_TYPE_OBJECT) { 258 iskey = 1; 259 expect = EXPECT_STRING; 260 } else { 261 expect = EXPECT_VALUE; 262 } 263 break; 264 case 't': /* true */ 265 if (GETNEXT() != 'r' || GETNEXT() != 'u' || GETNEXT() != 'e') 266 JSON_INVALID(); 267 nodes[depth].type = JSON_TYPE_BOOL; 268 cb(nodes, depth + 1, "true"); 269 expect = EXPECT_END; 270 break; 271 case 'f': /* false */ 272 if (GETNEXT() != 'a' || GETNEXT() != 'l' || GETNEXT() != 's' || 273 GETNEXT() != 'e') 274 JSON_INVALID(); 275 nodes[depth].type = JSON_TYPE_BOOL; 276 cb(nodes, depth + 1, "false"); 277 expect = EXPECT_END; 278 break; 279 case 'n': /* null */ 280 if (GETNEXT() != 'u' || GETNEXT() != 'l' || GETNEXT() != 'l') 281 JSON_INVALID(); 282 nodes[depth].type = JSON_TYPE_NULL; 283 cb(nodes, depth + 1, "null"); 284 expect = EXPECT_END; 285 break; 286 default: /* number */ 287 nodes[depth].type = JSON_TYPE_NUMBER; 288 p = 0; 289 pri[p++] = c; 290 expect = EXPECT_END; 291 while (1) { 292 c = GETNEXT(); 293 if (c == EOF || 294 !c || !strchr("0123456789eE+-.", c) || 295 p + 1 >= sizeof(pri)) { 296 pri[p] = '\0'; 297 cb(nodes, depth + 1, pri); 298 goto handlechr; /* do not read next char, handle this */ 299 } else { 300 pri[p++] = c; 301 } 302 } 303 } 304 } 305 if (depth) 306 JSON_INVALID(); /* unbalanced nodes */ 307 308 ret = 0; /* success */ 309 end: 310 for (depth = 0; depth < sizeof(nodes) / sizeof(nodes[0]); depth++) 311 free(nodes[depth].name); 312 free(str); 313 314 return ret; 315 }