geojson2dm

Convert GeoJSON to format suitable for input to datamaps
Log | Files | Refs | README | LICENSE

commit 64357dfb409f891be1afd5261b918b8bb9987774
Author: Nikolay Korotkiy <sikmir@gmail.com>
Date:   Wed, 25 Aug 2021 09:50:08 +0300

initial repo

Diffstat:
A.gitignore | 2++
ALICENSE | 15+++++++++++++++
AMakefile | 16++++++++++++++++
AREADME.md | 14++++++++++++++
Ageojson2dm.c | 65+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ajson.c | 315+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ajson.h | 30++++++++++++++++++++++++++++++
Autil.c | 35+++++++++++++++++++++++++++++++++++
Autil.h | 2++
9 files changed, 494 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1,2 @@ +*.o +geojson2dm diff --git a/LICENSE b/LICENSE @@ -0,0 +1,15 @@ +ISC License + +Copyright (c) 2021 Nikolay Korotkiy <sikmir@disroot.org> + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/Makefile b/Makefile @@ -0,0 +1,16 @@ +PREFIX = /usr/local + +BIN = geojson2dm + +all: build + +build: clean + $(CC) -c json.c util.c geojson2dm.c ${CFLAGS} + $(CC) -o $(BIN) json.o util.o geojson2dm.o ${LDFLAGS} + +clean: + rm -f $(BIN) *.o + +install: all + mkdir -p "${DESTDIR}${PREFIX}/bin" + cp -f ${BIN} "${DESTDIR}${PREFIX}/bin" diff --git a/README.md b/README.md @@ -0,0 +1,14 @@ +geojson2dm +---------- + +Convert GeoJSON to format suitable for input to datamaps. + +Caveats +------- + +It is not a full-featured GeoJSON converter. + +License +------- + +ISC, see LICENSE file. diff --git a/geojson2dm.c b/geojson2dm.c @@ -0,0 +1,65 @@ +#include <ctype.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "util.h" +#include "json.h" + +struct pos { + char lat[16]; + char lon[16]; +}; + +static struct pos pos; + +void +processnode(struct json_node *nodes, size_t depth, const char *value) +{ + if (depth != 7 || + nodes[4].type != JSON_TYPE_ARRAY || + nodes[5].type != JSON_TYPE_ARRAY || + strcmp(nodes[4].name, "coordinates") != 0) + return; + + switch (nodes[5].index) { + case 0: + switch (nodes[4].index) { + case 0: + memset(&pos, 0, sizeof(pos)); + break; + default: + printf("%s,%s", pos.lat, pos.lon); + break; + } + strlcpy(pos.lon, value, sizeof(pos.lon)); + break; + case 1: + strlcpy(pos.lat, value, sizeof(pos.lat)); + switch (nodes[4].index) { + case 0: + // do nothing + break; + default: + printf(" %s,%s\n", pos.lat, pos.lon); + break; + } + break; + } +} + +int +main(void) +{ + switch (parsejson(processnode)) { + case JSON_ERROR_MEM: + fputs("error: cannot allocate enough memory\n", stderr); + return 2; + case JSON_ERROR_INVALID: + fputs("error: invalid JSON\n", stderr); + return 1; + } + + return 0; +} diff --git a/json.c b/json.c @@ -0,0 +1,315 @@ +#include <ctype.h> +#include <errno.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#ifndef GETNEXT +#define GETNEXT getchar_unlocked +#endif + +#include "json.h" + +static int +codepointtoutf8(long r, char *s) +{ + if (r == 0) { + return 0; /* NUL byte */ + } else if (r <= 0x7F) { + /* 1 byte: 0aaaaaaa */ + s[0] = r; + return 1; + } else if (r <= 0x07FF) { + /* 2 bytes: 00000aaa aabbbbbb */ + s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */ + s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */ + return 2; + } else if (r <= 0xFFFF) { + /* 3 bytes: aaaabbbb bbcccccc */ + s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */ + s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */ + s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */ + return 3; + } else { + /* 4 bytes: 000aaabb bbbbcccc ccdddddd */ + s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */ + s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */ + s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */ + s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */ + return 4; + } +} + +static int +hexdigit(int c) +{ + if (c >= '0' && c <= '9') + return c - '0'; + else if (c >= 'a' && c <= 'f') + return 10 + (c - 'a'); + else if (c >= 'A' && c <= 'F') + return 10 + (c - 'A'); + return 0; +} + +static int +capacity(char **value, size_t *sz, size_t cur, size_t inc) +{ + size_t need, newsiz; + char *newp; + + /* check for addition overflow */ + if (cur > SIZE_MAX - inc) { + errno = EOVERFLOW; + return -1; + } + need = cur + inc; + + if (need > *sz) { + if (need > SIZE_MAX / 2) { + newsiz = SIZE_MAX; + } else { + for (newsiz = *sz < 64 ? 64 : *sz; newsiz <= need; newsiz *= 2) + ; + } + if (!(newp = realloc(*value, newsiz))) + return -1; /* up to caller to free *value */ + *value = newp; + *sz = newsiz; + } + return 0; +} + +#define EXPECT_VALUE "{[\"-0123456789tfn" +#define EXPECT_STRING "\"" +#define EXPECT_END "}]," +#define EXPECT_OBJECT_STRING EXPECT_STRING "}" +#define EXPECT_OBJECT_KEY ":" +#define EXPECT_ARRAY_VALUE EXPECT_VALUE "]" + +#define JSON_INVALID() do { ret = JSON_ERROR_INVALID; goto end; } while (0); + +int +parsejson(void (*cb)(struct json_node *, size_t, const char *)) +{ + struct json_node nodes[JSON_MAX_NODE_DEPTH] = { { 0 } }; + size_t depth = 0, p = 0, len, sz = 0; + long cp, hi, lo; + char pri[128], *str = NULL; + int c, i, escape, iskey = 0, ret = JSON_ERROR_MEM; + const char *expect = EXPECT_VALUE; + + if (capacity(&(nodes[0].name), &(nodes[0].namesiz), 0, 1) == -1) + goto end; + nodes[0].name[0] = '\0'; + + while (1) { + c = GETNEXT(); +handlechr: + if (c == EOF) + break; + + /* skip JSON white-space, (NOTE: no \v, \f, \b etc) */ + if (c == ' ' || c == '\t' || c == '\n' || c == '\r') + continue; + + if (!c || !strchr(expect, c)) + JSON_INVALID(); + + switch (c) { + case ':': + iskey = 0; + expect = EXPECT_VALUE; + break; + case '"': + nodes[depth].type = JSON_TYPE_STRING; + escape = 0; + len = 0; + while (1) { + c = GETNEXT(); +chr: + /* EOF or control char: 0x7f is not defined as a control char in RFC8259 */ + if (c < 0x20) + JSON_INVALID(); + + if (escape) { +escchr: + escape = 0; + switch (c) { + case '"': /* FALLTHROUGH */ + case '\\': + case '/': break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case 'u': /* hex hex hex hex */ + if (capacity(&str, &sz, len, 4) == -1) + goto end; + for (i = 12, cp = 0; i >= 0; i -= 4) { + if ((c = GETNEXT()) == EOF || !isxdigit(c)) + JSON_INVALID(); /* invalid code point */ + cp |= (hexdigit(c) << i); + } + /* RFC8259 - 7. Strings - surrogates. + * 0xd800 - 0xdbff - high surrogates */ + if (cp >= 0xd800 && cp <= 0xdbff) { + if ((c = GETNEXT()) != '\\') { + len += codepointtoutf8(cp, &str[len]); + goto chr; + } + if ((c = GETNEXT()) != 'u') { + len += codepointtoutf8(cp, &str[len]); + goto escchr; + } + for (hi = cp, i = 12, lo = 0; i >= 0; i -= 4) { + if ((c = GETNEXT()) == EOF || !isxdigit(c)) + JSON_INVALID(); /* invalid code point */ + lo |= (hexdigit(c) << i); + } + /* 0xdc00 - 0xdfff - low surrogates */ + if (lo >= 0xdc00 && lo <= 0xdfff) { + cp = (hi << 10) + lo - 56613888; /* - offset */ + } else { + /* handle graceful: raw invalid output bytes */ + len += codepointtoutf8(hi, &str[len]); + if (capacity(&str, &sz, len, 4) == -1) + goto end; + len += codepointtoutf8(lo, &str[len]); + continue; + } + } + len += codepointtoutf8(cp, &str[len]); + continue; + default: + JSON_INVALID(); /* invalid escape char */ + } + if (capacity(&str, &sz, len, 1) == -1) + goto end; + str[len++] = c; + } else if (c == '\\') { + escape = 1; + } else if (c == '"') { + if (capacity(&str, &sz, len, 1) == -1) + goto end; + str[len++] = '\0'; + + if (iskey) { + /* copy string as key, including NUL byte */ + if (capacity(&(nodes[depth].name), &(nodes[depth].namesiz), len, 1) == -1) + goto end; + memcpy(nodes[depth].name, str, len); + } else { + cb(nodes, depth + 1, str); + } + break; + } else { + if (capacity(&str, &sz, len, 1) == -1) + goto end; + str[len++] = c; + } + } + if (iskey) + expect = EXPECT_OBJECT_KEY; + else + expect = EXPECT_END; + break; + case '[': + case '{': + if (depth + 1 >= JSON_MAX_NODE_DEPTH) + JSON_INVALID(); /* too deep */ + + nodes[depth].index = 0; + if (c == '[') { + nodes[depth].type = JSON_TYPE_ARRAY; + expect = EXPECT_ARRAY_VALUE; + } else if (c == '{') { + iskey = 1; + nodes[depth].type = JSON_TYPE_OBJECT; + expect = EXPECT_OBJECT_STRING; + } + + cb(nodes, depth + 1, ""); + + depth++; + nodes[depth].index = 0; + if (capacity(&(nodes[depth].name), &(nodes[depth].namesiz), 0, 1) == -1) + goto end; + nodes[depth].name[0] = '\0'; + break; + case ']': + case '}': + if (!depth || + (c == ']' && nodes[depth - 1].type != JSON_TYPE_ARRAY) || + (c == '}' && nodes[depth - 1].type != JSON_TYPE_OBJECT)) + JSON_INVALID(); /* unbalanced nodes */ + + nodes[--depth].index++; + expect = EXPECT_END; + break; + case ',': + if (!depth) + JSON_INVALID(); /* unbalanced nodes */ + + nodes[depth - 1].index++; + if (nodes[depth - 1].type == JSON_TYPE_OBJECT) { + iskey = 1; + expect = EXPECT_STRING; + } else { + expect = EXPECT_VALUE; + } + break; + case 't': /* true */ + if (GETNEXT() != 'r' || GETNEXT() != 'u' || GETNEXT() != 'e') + JSON_INVALID(); + nodes[depth].type = JSON_TYPE_BOOL; + cb(nodes, depth + 1, "true"); + expect = EXPECT_END; + break; + case 'f': /* false */ + if (GETNEXT() != 'a' || GETNEXT() != 'l' || GETNEXT() != 's' || + GETNEXT() != 'e') + JSON_INVALID(); + nodes[depth].type = JSON_TYPE_BOOL; + cb(nodes, depth + 1, "false"); + expect = EXPECT_END; + break; + case 'n': /* null */ + if (GETNEXT() != 'u' || GETNEXT() != 'l' || GETNEXT() != 'l') + JSON_INVALID(); + nodes[depth].type = JSON_TYPE_NULL; + cb(nodes, depth + 1, "null"); + expect = EXPECT_END; + break; + default: /* number */ + nodes[depth].type = JSON_TYPE_NUMBER; + p = 0; + pri[p++] = c; + expect = EXPECT_END; + while (1) { + c = GETNEXT(); + if (c == EOF || + !c || !strchr("0123456789eE+-.", c) || + p + 1 >= sizeof(pri)) { + pri[p] = '\0'; + cb(nodes, depth + 1, pri); + goto handlechr; /* do not read next char, handle this */ + } else { + pri[p++] = c; + } + } + } + } + if (depth) + JSON_INVALID(); /* unbalanced nodes */ + + ret = 0; /* success */ +end: + for (depth = 0; depth < sizeof(nodes) / sizeof(nodes[0]); depth++) + free(nodes[depth].name); + free(str); + + return ret; +} diff --git a/json.h b/json.h @@ -0,0 +1,30 @@ +#ifndef _JSON_H_ +#define _JSON_H_ + +#include <stddef.h> + +enum JSONType { + JSON_TYPE_ARRAY = 'a', + JSON_TYPE_OBJECT = 'o', + JSON_TYPE_STRING = 's', + JSON_TYPE_BOOL = 'b', + JSON_TYPE_NULL = '?', + JSON_TYPE_NUMBER = 'n' +}; + +enum JSONError { + JSON_ERROR_MEM = -2, + JSON_ERROR_INVALID = -1 +}; + +#define JSON_MAX_NODE_DEPTH 64 + +struct json_node { + enum JSONType type; + char *name; + size_t namesiz; + size_t index; /* count/index for array or object type */ +}; + +int parsejson(void (*cb)(struct json_node *, size_t, const char *)); +#endif diff --git a/util.c b/util.c @@ -0,0 +1,35 @@ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <sys/types.h> + +#include "util.h" + +/* + * Taken from OpenBSD. + * Copy src to string dst of size siz. At most siz-1 characters + * will be copied. Always NUL terminates (unless siz == 0). + * Returns strlen(src); if retval >= siz, truncation occurred. + */ +size_t +strlcpy(char *dst, const char *src, size_t siz) { + char *d = dst; + const char *s = src; + size_t n = siz; + + /* copy as many bytes as will fit */ + if (n != 0) { + while (--n != 0) { + if ((*d++ = *s++) == '\0') + break; + } + } + /* not enough room in dst, add NUL and traverse rest of src */ + if (n == 0) { + if (siz != 0) + *d = '\0'; /* NUL-terminate dst */ + while (*s++) + ; + } + return(s - src - 1); /* count does not include NUL */ +} diff --git a/util.h b/util.h @@ -0,0 +1,2 @@ +#undef strlcpy +size_t strlcpy(char *, const char *, size_t);