initial repo - geojson2dm - Convert GeoJSON to format suitable for input to datamaps

commit 64357dfb409f891be1afd5261b918b8bb9987774
Author: Nikolay Korotkiy <sikmir@gmail.com>
Date:   Wed, 25 Aug 2021 09:50:08 +0300

initial repo

Diffstat:
A .gitignore  | 2 ++
A LICENSE  | 15 +++++++++++++++
A Makefile  | 16 ++++++++++++++++
A README.md  | 14 ++++++++++++++
A geojson2dm.c  | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A json.c  | 315 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A json.h  | 30 ++++++++++++++++++++++++++++++
A util.c  | 35 +++++++++++++++++++++++++++++++++++
A util.h  | 2 ++

9 files changed, 494 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+*.o
+geojson2dm
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,15 @@
+ISC License
+
+Copyright (c) 2021 Nikolay Korotkiy <sikmir@disroot.org>
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
diff --git a/Makefile b/Makefile
@@ -0,0 +1,16 @@
+PREFIX = /usr/local
+
+BIN = geojson2dm
+
+all: build
+
+build: clean
+	$(CC) -c json.c util.c geojson2dm.c ${CFLAGS}
+	$(CC) -o $(BIN) json.o util.o geojson2dm.o ${LDFLAGS}
+
+clean:
+	rm -f $(BIN) *.o
+
+install: all
+	mkdir -p "${DESTDIR}${PREFIX}/bin"
+	cp -f ${BIN} "${DESTDIR}${PREFIX}/bin"
diff --git a/README.md b/README.md
@@ -0,0 +1,14 @@
+geojson2dm
+----------
+
+Convert GeoJSON to format suitable for input to datamaps.
+
+Caveats
+-------
+
+It is not a full-featured GeoJSON converter.
+
+License
+-------
+
+ISC, see LICENSE file.
diff --git a/geojson2dm.c b/geojson2dm.c
@@ -0,0 +1,65 @@
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "util.h"
+#include "json.h"
+
+struct pos {
+	char lat[16];
+	char lon[16];
+};
+
+static struct pos pos;
+
+void
+processnode(struct json_node *nodes, size_t depth, const char *value)
+{
+	if (depth != 7 ||
+	    nodes[4].type != JSON_TYPE_ARRAY ||
+	    nodes[5].type != JSON_TYPE_ARRAY ||
+	    strcmp(nodes[4].name, "coordinates") != 0)
+		return;
+
+	switch (nodes[5].index) {
+	case 0:
+		switch (nodes[4].index) {
+		case 0:
+			memset(&pos, 0, sizeof(pos));
+			break;
+		default:
+			printf("%s,%s", pos.lat, pos.lon);
+			break;
+		}
+		strlcpy(pos.lon, value, sizeof(pos.lon));
+		break;
+	case 1:
+		strlcpy(pos.lat, value, sizeof(pos.lat));
+		switch (nodes[4].index) {
+		case 0:
+			// do nothing
+			break;
+		default:
+			printf(" %s,%s\n", pos.lat, pos.lon);
+			break;
+		}
+		break;
+	}
+}
+
+int
+main(void)
+{
+	switch (parsejson(processnode)) {
+	case JSON_ERROR_MEM:
+		fputs("error: cannot allocate enough memory\n", stderr);
+		return 2;
+	case JSON_ERROR_INVALID:
+		fputs("error: invalid JSON\n", stderr);
+		return 1;
+	}
+
+	return 0;
+}
diff --git a/json.c b/json.c
@@ -0,0 +1,315 @@
+#include <ctype.h>
+#include <errno.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef GETNEXT
+#define GETNEXT getchar_unlocked
+#endif
+
+#include "json.h"
+
+static int
+codepointtoutf8(long r, char *s)
+{
+	if (r == 0) {
+		return 0; /* NUL byte */
+	} else if (r <= 0x7F) {
+		/* 1 byte: 0aaaaaaa */
+		s[0] = r;
+		return 1;
+	} else if (r <= 0x07FF) {
+		/* 2 bytes: 00000aaa aabbbbbb */
+		s[0] = 0xC0 | ((r & 0x0007C0) >>  6); /* 110aaaaa */
+		s[1] = 0x80 |  (r & 0x00003F);        /* 10bbbbbb */
+		return 2;
+	} else if (r <= 0xFFFF) {
+		/* 3 bytes: aaaabbbb bbcccccc */
+		s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */
+		s[1] = 0x80 | ((r & 0x000FC0) >>  6); /* 10bbbbbb */
+		s[2] = 0x80 |  (r & 0x00003F);        /* 10cccccc */
+		return 3;
+	} else {
+		/* 4 bytes: 000aaabb bbbbcccc ccdddddd */
+		s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */
+		s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */
+		s[2] = 0x80 | ((r & 0x000FC0) >>  6); /* 10cccccc */
+		s[3] = 0x80 |  (r & 0x00003F);        /* 10dddddd */
+		return 4;
+	}
+}
+
+static int
+hexdigit(int c)
+{
+	if (c >= '0' && c <= '9')
+		return c - '0';
+	else if (c >= 'a' && c <= 'f')
+		return 10 + (c - 'a');
+	else if (c >= 'A' && c <= 'F')
+		return 10 + (c - 'A');
+	return 0;
+}
+
+static int
+capacity(char **value, size_t *sz, size_t cur, size_t inc)
+{
+	size_t need, newsiz;
+	char *newp;
+
+	/* check for addition overflow */
+	if (cur > SIZE_MAX - inc) {
+		errno = EOVERFLOW;
+		return -1;
+	}
+	need = cur + inc;
+
+	if (need > *sz) {
+		if (need > SIZE_MAX / 2) {
+			newsiz = SIZE_MAX;
+		} else {
+			for (newsiz = *sz < 64 ? 64 : *sz; newsiz <= need; newsiz *= 2)
+				;
+		}
+		if (!(newp = realloc(*value, newsiz)))
+			return -1; /* up to caller to free *value */
+		*value = newp;
+		*sz = newsiz;
+	}
+	return 0;
+}
+
+#define EXPECT_VALUE         "{[\"-0123456789tfn"
+#define EXPECT_STRING        "\""
+#define EXPECT_END           "}],"
+#define EXPECT_OBJECT_STRING EXPECT_STRING "}"
+#define EXPECT_OBJECT_KEY    ":"
+#define EXPECT_ARRAY_VALUE   EXPECT_VALUE "]"
+
+#define JSON_INVALID()       do { ret = JSON_ERROR_INVALID; goto end; } while (0);
+
+int
+parsejson(void (*cb)(struct json_node *, size_t, const char *))
+{
+	struct json_node nodes[JSON_MAX_NODE_DEPTH] = { { 0 } };
+	size_t depth = 0, p = 0, len, sz = 0;
+	long cp, hi, lo;
+	char pri[128], *str = NULL;
+	int c, i, escape, iskey = 0, ret = JSON_ERROR_MEM;
+	const char *expect = EXPECT_VALUE;
+
+	if (capacity(&(nodes[0].name), &(nodes[0].namesiz), 0, 1) == -1)
+		goto end;
+	nodes[0].name[0] = '\0';
+
+	while (1) {
+		c = GETNEXT();
+handlechr:
+		if (c == EOF)
+			break;
+
+		/* skip JSON white-space, (NOTE: no \v, \f, \b etc) */
+		if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
+			continue;
+
+		if (!c || !strchr(expect, c))
+			JSON_INVALID();
+
+		switch (c) {
+		case ':':
+			iskey = 0;
+			expect = EXPECT_VALUE;
+			break;
+		case '"':
+			nodes[depth].type = JSON_TYPE_STRING;
+			escape = 0;
+			len = 0;
+			while (1) {
+				c = GETNEXT();
+chr:
+				/* EOF or control char: 0x7f is not defined as a control char in RFC8259 */
+				if (c < 0x20)
+					JSON_INVALID();
+
+				if (escape) {
+escchr:
+					escape = 0;
+					switch (c) {
+					case '"': /* FALLTHROUGH */
+					case '\\':
+					case '/': break;
+					case 'b': c = '\b'; break;
+					case 'f': c = '\f'; break;
+					case 'n': c = '\n'; break;
+					case 'r': c = '\r'; break;
+					case 't': c = '\t'; break;
+					case 'u': /* hex hex hex hex */
+						if (capacity(&str, &sz, len, 4) == -1)
+							goto end;
+						for (i = 12, cp = 0; i >= 0; i -= 4) {
+							if ((c = GETNEXT()) == EOF || !isxdigit(c))
+								JSON_INVALID(); /* invalid code point */
+							cp |= (hexdigit(c) << i);
+						}
+						/* RFC8259 - 7. Strings - surrogates.
+						 * 0xd800 - 0xdbff - high surrogates */
+						if (cp >= 0xd800 && cp <= 0xdbff) {
+							if ((c = GETNEXT()) != '\\') {
+								len += codepointtoutf8(cp, &str[len]);
+								goto chr;
+							}
+							if ((c = GETNEXT()) != 'u') {
+								len += codepointtoutf8(cp, &str[len]);
+								goto escchr;
+							}
+							for (hi = cp, i = 12, lo = 0; i >= 0; i -= 4) {
+								if ((c = GETNEXT()) == EOF || !isxdigit(c))
+									JSON_INVALID(); /* invalid code point */
+								lo |= (hexdigit(c) << i);
+							}
+							/* 0xdc00 - 0xdfff - low surrogates */
+							if (lo >= 0xdc00 && lo <= 0xdfff) {
+								cp = (hi << 10) + lo - 56613888; /* - offset */
+							} else {
+								/* handle graceful: raw invalid output bytes */
+								len += codepointtoutf8(hi, &str[len]);
+								if (capacity(&str, &sz, len, 4) == -1)
+									goto end;
+								len += codepointtoutf8(lo, &str[len]);
+								continue;
+							}
+						}
+						len += codepointtoutf8(cp, &str[len]);
+						continue;
+					default:
+						JSON_INVALID(); /* invalid escape char */
+					}
+					if (capacity(&str, &sz, len, 1) == -1)
+						goto end;
+					str[len++] = c;
+				} else if (c == '\\') {
+					escape = 1;
+				} else if (c == '"') {
+					if (capacity(&str, &sz, len, 1) == -1)
+						goto end;
+					str[len++] = '\0';
+
+					if (iskey) {
+						/* copy string as key, including NUL byte */
+						if (capacity(&(nodes[depth].name), &(nodes[depth].namesiz), len, 1) == -1)
+							goto end;
+						memcpy(nodes[depth].name, str, len);
+					} else {
+						cb(nodes, depth + 1, str);
+					}
+					break;
+				} else {
+					if (capacity(&str, &sz, len, 1) == -1)
+						goto end;
+					str[len++] = c;
+				}
+			}
+			if (iskey)
+				expect = EXPECT_OBJECT_KEY;
+			else
+				expect = EXPECT_END;
+			break;
+		case '[':
+		case '{':
+			if (depth + 1 >= JSON_MAX_NODE_DEPTH)
+				JSON_INVALID(); /* too deep */
+
+			nodes[depth].index = 0;
+			if (c == '[') {
+				nodes[depth].type = JSON_TYPE_ARRAY;
+				expect = EXPECT_ARRAY_VALUE;
+			} else if (c == '{') {
+				iskey = 1;
+				nodes[depth].type = JSON_TYPE_OBJECT;
+				expect = EXPECT_OBJECT_STRING;
+			}
+
+			cb(nodes, depth + 1, "");
+
+			depth++;
+			nodes[depth].index = 0;
+			if (capacity(&(nodes[depth].name), &(nodes[depth].namesiz), 0, 1) == -1)
+				goto end;
+			nodes[depth].name[0] = '\0';
+			break;
+		case ']':
+		case '}':
+			if (!depth ||
+			   (c == ']' && nodes[depth - 1].type != JSON_TYPE_ARRAY) ||
+			   (c == '}' && nodes[depth - 1].type != JSON_TYPE_OBJECT))
+				JSON_INVALID(); /* unbalanced nodes */
+
+			nodes[--depth].index++;
+			expect = EXPECT_END;
+			break;
+		case ',':
+			if (!depth)
+				JSON_INVALID(); /* unbalanced nodes */
+
+			nodes[depth - 1].index++;
+			if (nodes[depth - 1].type == JSON_TYPE_OBJECT) {
+				iskey = 1;
+				expect = EXPECT_STRING;
+			} else {
+				expect = EXPECT_VALUE;
+			}
+			break;
+		case 't': /* true */
+			if (GETNEXT() != 'r' || GETNEXT() != 'u' || GETNEXT() != 'e')
+				JSON_INVALID();
+			nodes[depth].type = JSON_TYPE_BOOL;
+			cb(nodes, depth + 1, "true");
+			expect = EXPECT_END;
+			break;
+		case 'f': /* false */
+			if (GETNEXT() != 'a' || GETNEXT() != 'l' || GETNEXT() != 's' ||
+			    GETNEXT() != 'e')
+				JSON_INVALID();
+			nodes[depth].type = JSON_TYPE_BOOL;
+			cb(nodes, depth + 1, "false");
+			expect = EXPECT_END;
+			break;
+		case 'n': /* null */
+			if (GETNEXT() != 'u' || GETNEXT() != 'l' || GETNEXT() != 'l')
+				JSON_INVALID();
+			nodes[depth].type = JSON_TYPE_NULL;
+			cb(nodes, depth + 1, "null");
+			expect = EXPECT_END;
+			break;
+		default: /* number */
+			nodes[depth].type = JSON_TYPE_NUMBER;
+			p = 0;
+			pri[p++] = c;
+			expect = EXPECT_END;
+			while (1) {
+				c = GETNEXT();
+				if (c == EOF ||
+				    !c || !strchr("0123456789eE+-.", c) ||
+				    p + 1 >= sizeof(pri)) {
+					pri[p] = '\0';
+					cb(nodes, depth + 1, pri);
+					goto handlechr; /* do not read next char, handle this */
+				} else {
+					pri[p++] = c;
+				}
+			}
+		}
+	}
+	if (depth)
+		JSON_INVALID(); /* unbalanced nodes */
+
+	ret = 0; /* success */
+end:
+	for (depth = 0; depth < sizeof(nodes) / sizeof(nodes[0]); depth++)
+		free(nodes[depth].name);
+	free(str);
+
+	return ret;
+}
diff --git a/json.h b/json.h
@@ -0,0 +1,30 @@
+#ifndef _JSON_H_
+#define _JSON_H_
+
+#include <stddef.h>
+
+enum JSONType {
+	JSON_TYPE_ARRAY  = 'a',
+	JSON_TYPE_OBJECT = 'o',
+	JSON_TYPE_STRING = 's',
+	JSON_TYPE_BOOL   = 'b',
+	JSON_TYPE_NULL   = '?',
+	JSON_TYPE_NUMBER = 'n'
+};
+
+enum JSONError {
+	JSON_ERROR_MEM     = -2,
+	JSON_ERROR_INVALID = -1
+};
+
+#define JSON_MAX_NODE_DEPTH 64
+
+struct json_node {
+	enum JSONType type;
+	char *name;
+	size_t namesiz;
+	size_t index; /* count/index for array or object type */
+};
+
+int parsejson(void (*cb)(struct json_node *, size_t, const char *));
+#endif
diff --git a/util.c b/util.c
@@ -0,0 +1,35 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/types.h>
+
+#include "util.h"
+
+/*
+ * Taken from OpenBSD.
+ * Copy src to string dst of size siz.  At most siz-1 characters
+ * will be copied.  Always NUL terminates (unless siz == 0).
+ * Returns strlen(src); if retval >= siz, truncation occurred.
+ */
+size_t
+strlcpy(char *dst, const char *src, size_t siz) {
+	char *d = dst;
+	const char *s = src;
+	size_t n = siz;
+
+	/* copy as many bytes as will fit */
+	if (n != 0) {
+		while (--n != 0) {
+			if ((*d++ = *s++) == '\0')
+				break;
+		}
+	}
+	/* not enough room in dst, add NUL and traverse rest of src */
+	if (n == 0) {
+		if (siz != 0)
+				*d = '\0'; /* NUL-terminate dst */
+		while (*s++)
+			;
+	}
+	return(s - src - 1); /* count does not include NUL */
+}
diff --git a/util.h b/util.h
@@ -0,0 +1,2 @@
+#undef strlcpy
+size_t strlcpy(char *, const char *, size_t);

	geojson2dm Convert GeoJSON to format suitable for input to datamaps
	git clone git://git.sikmir.ru/geojson2dm
	Log \| Files \| Refs \| README \| LICENSE

A	.gitignore	\|	2	++
A	LICENSE	\|	15	+++++++++++++++
A	Makefile	\|	16	++++++++++++++++
A	README.md	\|	14	++++++++++++++
A	geojson2dm.c	\|	65	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	json.c	\|	315	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	json.h	\|	30	++++++++++++++++++++++++++++++
A	util.c	\|	35	+++++++++++++++++++++++++++++++++++
A	util.h	\|	2	++