https://github.com/mapbox/tippecanoe
Raw File
Tip revision: 8cc844c9dd7cf788078e9f2f0bf77c83ea9aa787 authored by Eric Fischer on 31 March 2016, 21:33:11 UTC
Merge pull request #201 from mapbox/llong_max
Tip revision: 8cc844c
jsonpull.c
#define _GNU_SOURCE  // for asprintf()
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include <stdarg.h>
#include "jsonpull.h"

#define BUFFER 10000

json_pull *json_begin(ssize_t (*read)(struct json_pull *, char *buffer, size_t n), void *source) {
	json_pull *j = malloc(sizeof(json_pull));
	if (j == NULL) {
		perror("Out of memory");
		exit(EXIT_FAILURE);
	}

	j->error = NULL;
	j->line = 1;
	j->container = NULL;
	j->root = NULL;

	j->read = read;
	j->source = source;
	j->buffer_head = 0;
	j->buffer_tail = 0;

	j->buffer = malloc(BUFFER);
	if (j->buffer == NULL) {
		perror("Out of memory");
		exit(EXIT_FAILURE);
	}

	return j;
}

static inline int peek(json_pull *j) {
	if (j->buffer_head < j->buffer_tail) {
		return j->buffer[j->buffer_head];
	} else {
		j->buffer_head = 0;
		j->buffer_tail = j->read(j, j->buffer, BUFFER);
		if (j->buffer_head >= j->buffer_tail) {
			return EOF;
		}
		return j->buffer[j->buffer_head];
	}
}

static inline int next(json_pull *j) {
	if (j->buffer_head < j->buffer_tail) {
		return j->buffer[j->buffer_head++];
	} else {
		j->buffer_head = 0;
		j->buffer_tail = j->read(j, j->buffer, BUFFER);
		if (j->buffer_head >= j->buffer_tail) {
			return EOF;
		}
		return j->buffer[j->buffer_head++];
	}
}

static ssize_t read_file(json_pull *j, char *buffer, size_t n) {
	return fread(buffer, 1, n, j->source);
}

json_pull *json_begin_file(FILE *f) {
	return json_begin(read_file, f);
}

static ssize_t read_string(json_pull *j, char *buffer, size_t n) {
	char *cp = j->source;
	int out = 0;

	while (out < n && cp[out] != '\0') {
		buffer[out] = cp[out];
		out++;
	}

	j->source = cp + out;
	return out;
}

json_pull *json_begin_string(char *s) {
	return json_begin(read_string, s);
}

void json_end(json_pull *p) {
	free(p->buffer);
	free(p);
}

static inline int read_wrap(json_pull *j) {
	int c = next(j);

	if (c == '\n') {
		j->line++;
	}

	return c;
}

#define SIZE_FOR(i, size) ((size_t)((((i) + 31) & ~31) * size))

static json_object *fabricate_object(json_object *parent, json_type type) {
	json_object *o = malloc(sizeof(struct json_object));
	if (o == NULL) {
		perror("Out of memory");
		exit(EXIT_FAILURE);
	}
	o->type = type;
	o->parent = parent;
	o->array = NULL;
	o->keys = NULL;
	o->values = NULL;
	o->length = 0;
	return o;
}

static json_object *add_object(json_pull *j, json_type type) {
	json_object *c = j->container;
	json_object *o = fabricate_object(c, type);

	if (c != NULL) {
		if (c->type == JSON_ARRAY) {
			if (c->expect == JSON_ITEM) {
				if (SIZE_FOR(c->length + 1, sizeof(json_object *)) != SIZE_FOR(c->length, sizeof(json_object *))) {
					if (SIZE_FOR(c->length + 1, sizeof(json_object *)) < SIZE_FOR(c->length, sizeof(json_object *))) {
						fprintf(stderr, "Array size overflow\n");
						exit(EXIT_FAILURE);
					}
					c->array = realloc(c->array, SIZE_FOR(c->length + 1, sizeof(json_object *)));
					if (c->array == NULL) {
						perror("Out of memory");
						exit(EXIT_FAILURE);
					}
				}

				c->array[c->length++] = o;
				c->expect = JSON_COMMA;
			} else {
				j->error = "Expected a comma, not a list item";
				free(o);
				return NULL;
			}
		} else if (c->type == JSON_HASH) {
			if (c->expect == JSON_VALUE) {
				c->values[c->length - 1] = o;
				c->expect = JSON_COMMA;
			} else if (c->expect == JSON_KEY) {
				if (type != JSON_STRING) {
					j->error = "Hash key is not a string";
					free(o);
					return NULL;
				}

				if (SIZE_FOR(c->length + 1, sizeof(json_object *)) != SIZE_FOR(c->length, sizeof(json_object *))) {
					if (SIZE_FOR(c->length + 1, sizeof(json_object *)) < SIZE_FOR(c->length, sizeof(json_object *))) {
						fprintf(stderr, "Hash size overflow\n");
						exit(EXIT_FAILURE);
					}
					c->keys = realloc(c->keys, SIZE_FOR(c->length + 1, sizeof(json_object *)));
					c->values = realloc(c->values, SIZE_FOR(c->length + 1, sizeof(json_object *)));
					if (c->keys == NULL || c->values == NULL) {
						perror("Out of memory");
						exit(EXIT_FAILURE);
					}
				}

				c->keys[c->length] = o;
				c->values[c->length] = NULL;
				c->length++;
				c->expect = JSON_COLON;
			} else {
				j->error = "Expected a comma or colon";
				free(o);
				return NULL;
			}
		}
	} else {
		j->root = o;
	}

	return o;
}

json_object *json_hash_get(json_object *o, const char *s) {
	if (o == NULL || o->type != JSON_HASH) {
		return NULL;
	}

	size_t i;
	for (i = 0; i < o->length; i++) {
		if (o->keys[i] != NULL && o->keys[i]->type == JSON_STRING) {
			if (strcmp(o->keys[i]->string, s) == 0) {
				return o->values[i];
			}
		}
	}

	return NULL;
}

struct string {
	char *buf;
	size_t n;
	size_t nalloc;
};

static void string_init(struct string *s) {
	s->nalloc = 500;
	s->buf = malloc(s->nalloc);
	if (s->buf == NULL) {
		perror("Out of memory");
		exit(EXIT_FAILURE);
	}
	s->n = 0;
	s->buf[0] = '\0';
}

static void string_append(struct string *s, char c) {
	if (s->n + 2 >= s->nalloc) {
		size_t prev = s->nalloc;
		s->nalloc += 500;
		if (s->nalloc <= prev) {
			fprintf(stderr, "String size overflowed\n");
			exit(EXIT_FAILURE);
		}
		s->buf = realloc(s->buf, s->nalloc);
		if (s->buf == NULL) {
			perror("Out of memory");
			exit(EXIT_FAILURE);
		}
	}

	s->buf[s->n++] = c;
	s->buf[s->n] = '\0';
}

static void string_append_string(struct string *s, char *add) {
	size_t len = strlen(add);

	if (s->n + len + 1 >= s->nalloc) {
		size_t prev = s->nalloc;
		s->nalloc += 500 + len;
		if (s->nalloc <= prev) {
			fprintf(stderr, "String size overflowed\n");
			exit(EXIT_FAILURE);
		}
		s->buf = realloc(s->buf, s->nalloc);
		if (s->buf == NULL) {
			perror("Out of memory");
			exit(EXIT_FAILURE);
		}
	}

	for (; *add != '\0'; add++) {
		s->buf[s->n++] = *add;
	}

	s->buf[s->n] = '\0';
}

static void string_free(struct string *s) {
	free(s->buf);
}

json_object *json_read_separators(json_pull *j, json_separator_callback cb, void *state) {
	int c;

	// In case there is an error at the top level
	if (j->container == NULL) {
		j->root = NULL;
	}

again:
	/////////////////////////// Whitespace

	do {
		c = read_wrap(j);
		if (c == EOF) {
			if (j->container != NULL) {
				j->error = "Reached EOF without all containers being closed";
			}

			return NULL;
		}
	} while (c == ' ' || c == '\t' || c == '\r' || c == '\n');

	/////////////////////////// Arrays

	if (c == '[') {
		json_object *o = add_object(j, JSON_ARRAY);
		if (o == NULL) {
			return NULL;
		}
		j->container = o;
		j->container->expect = JSON_ITEM;

		if (cb != NULL) {
			cb(JSON_ARRAY, j, state);
		}

		goto again;
	} else if (c == ']') {
		if (j->container == NULL) {
			j->error = "Found ] at top level";
			return NULL;
		}

		if (j->container->type != JSON_ARRAY) {
			j->error = "Found ] not in an array";
			return NULL;
		}

		if (j->container->expect != JSON_COMMA) {
			if (!(j->container->expect == JSON_ITEM && j->container->length == 0)) {
				j->error = "Found ] without final element";
				return NULL;
			}
		}

		json_object *ret = j->container;
		j->container = ret->parent;
		return ret;
	}

	/////////////////////////// Hashes

	if (c == '{') {
		json_object *o = add_object(j, JSON_HASH);
		if (o == NULL) {
			return NULL;
		}
		j->container = o;
		j->container->expect = JSON_KEY;

		if (cb != NULL) {
			cb(JSON_HASH, j, state);
		}

		goto again;
	} else if (c == '}') {
		if (j->container == NULL) {
			j->error = "Found } at top level";
			return NULL;
		}

		if (j->container->type != JSON_HASH) {
			j->error = "Found } not in a hash";
			return NULL;
		}

		if (j->container->expect != JSON_COMMA) {
			if (!(j->container->expect == JSON_KEY && j->container->length == 0)) {
				j->error = "Found } without final element";
				return NULL;
			}
		}

		json_object *ret = j->container;
		j->container = ret->parent;
		return ret;
	}

	/////////////////////////// Null

	if (c == 'n') {
		if (read_wrap(j) != 'u' || read_wrap(j) != 'l' || read_wrap(j) != 'l') {
			j->error = "Found misspelling of null";
			return NULL;
		}

		return add_object(j, JSON_NULL);
	}

	/////////////////////////// True

	if (c == 't') {
		if (read_wrap(j) != 'r' || read_wrap(j) != 'u' || read_wrap(j) != 'e') {
			j->error = "Found misspelling of true";
			return NULL;
		}

		return add_object(j, JSON_TRUE);
	}

	/////////////////////////// False

	if (c == 'f') {
		if (read_wrap(j) != 'a' || read_wrap(j) != 'l' || read_wrap(j) != 's' || read_wrap(j) != 'e') {
			j->error = "Found misspelling of false";
			return NULL;
		}

		return add_object(j, JSON_FALSE);
	}

	/////////////////////////// Comma

	if (c == ',') {
		if (j->container != NULL) {
			if (j->container->expect != JSON_COMMA) {
				j->error = "Found unexpected comma";
				return NULL;
			}

			if (j->container->type == JSON_HASH) {
				j->container->expect = JSON_KEY;
			} else {
				j->container->expect = JSON_ITEM;
			}
		}

		if (cb != NULL) {
			cb(JSON_COMMA, j, state);
		}

		goto again;
	}

	/////////////////////////// Colon

	if (c == ':') {
		if (j->container == NULL) {
			j->error = "Found colon at top level";
			return NULL;
		}

		if (j->container->expect != JSON_COLON) {
			j->error = "Found unexpected colon";
			return NULL;
		}

		j->container->expect = JSON_VALUE;

		if (cb != NULL) {
			cb(JSON_COLON, j, state);
		}

		goto again;
	}

	/////////////////////////// Numbers

	if (c == '-' || (c >= '0' && c <= '9')) {
		struct string val;
		string_init(&val);

		if (c == '-') {
			string_append(&val, c);
			c = read_wrap(j);
		}

		if (c == '0') {
			string_append(&val, c);
		} else if (c >= '1' && c <= '9') {
			string_append(&val, c);
			c = peek(j);

			while (c >= '0' && c <= '9') {
				string_append(&val, read_wrap(j));
				c = peek(j);
			}
		}

		if (peek(j) == '.') {
			string_append(&val, read_wrap(j));

			c = peek(j);
			while (c >= '0' && c <= '9') {
				string_append(&val, read_wrap(j));
				c = peek(j);
			}
		}

		c = peek(j);
		if (c == 'e' || c == 'E') {
			string_append(&val, read_wrap(j));

			c = peek(j);
			if (c == '+' || c == '-') {
				string_append(&val, read_wrap(j));
			}

			c = peek(j);
			if (c < '0' || c > '9') {
				j->error = "Exponent without digits";
				string_free(&val);
				return NULL;
			}
			while (c >= '0' && c <= '9') {
				string_append(&val, read_wrap(j));
				c = peek(j);
			}
		}

		json_object *n = add_object(j, JSON_NUMBER);
		if (n != NULL) {
			n->number = atof(val.buf);
			n->string = val.buf;
			n->length = val.n;
		} else {
			string_free(&val);
		}
		return n;
	}

	/////////////////////////// Strings

	if (c == '"') {
		struct string val;
		string_init(&val);

		while ((c = read_wrap(j)) != EOF) {
			if (c == '"') {
				break;
			} else if (c == '\\') {
				c = read_wrap(j);

				if (c == '"') {
					string_append(&val, '"');
				} else if (c == '\\') {
					string_append(&val, '\\');
				} else if (c == '/') {
					string_append(&val, '/');
				} else if (c == 'b') {
					string_append(&val, '\b');
				} else if (c == 'f') {
					string_append(&val, '\f');
				} else if (c == 'n') {
					string_append(&val, '\n');
				} else if (c == 'r') {
					string_append(&val, '\r');
				} else if (c == 't') {
					string_append(&val, '\t');
				} else if (c == 'u') {
					char hex[5] = "aaaa";
					int i;
					for (i = 0; i < 4; i++) {
						hex[i] = read_wrap(j);
					}
					unsigned long ch = strtoul(hex, NULL, 16);
					if (ch <= 0x7F) {
						string_append(&val, ch);
					} else if (ch <= 0x7FF) {
						string_append(&val, 0xC0 | (ch >> 6));
						string_append(&val, 0x80 | (ch & 0x3F));
					} else {
						string_append(&val, 0xE0 | (ch >> 12));
						string_append(&val, 0x80 | ((ch >> 6) & 0x3F));
						string_append(&val, 0x80 | (ch & 0x3F));
					}
				} else {
					j->error = "Found backslash followed by unknown character";
					string_free(&val);
					return NULL;
				}
			} else {
				string_append(&val, c);
			}
		}

		json_object *s = add_object(j, JSON_STRING);
		if (s != NULL) {
			s->string = val.buf;
			s->length = val.n;
		} else {
			string_free(&val);
		}
		return s;
	}

	j->error = "Found unexpected character";
	return NULL;
}

json_object *json_read(json_pull *j) {
	return json_read_separators(j, NULL, NULL);
}

json_object *json_read_tree(json_pull *p) {
	json_object *j;

	while ((j = json_read(p)) != NULL) {
		if (j->parent == NULL) {
			return j;
		}
	}

	return NULL;
}

void json_free(json_object *o) {
	size_t i;

	if (o == NULL) {
		return;
	}

	// Free any data linked from here

	if (o->type == JSON_ARRAY) {
		json_object **a = o->array;
		size_t n = o->length;

		o->array = NULL;
		o->length = 0;

		for (i = 0; i < n; i++) {
			json_free(a[i]);
		}

		free(a);
	} else if (o->type == JSON_HASH) {
		json_object **k = o->keys;
		json_object **v = o->values;
		size_t n = o->length;

		o->keys = NULL;
		o->values = NULL;
		o->length = 0;

		for (i = 0; i < n; i++) {
			json_free(k[i]);
			json_free(v[i]);
		}

		free(k);
		free(v);
	} else if (o->type == JSON_STRING || o->type == JSON_NUMBER) {
		free(o->string);
	}

	json_disconnect(o);

	free(o);
}

void json_disconnect(json_object *o) {
	// Expunge references to this as an array element
	// or a hash key or value.

	if (o->parent != NULL) {
		if (o->parent->type == JSON_ARRAY) {
			size_t i;

			for (i = 0; i < o->parent->length; i++) {
				if (o->parent->array[i] == o) {
					break;
				}
			}

			if (i < o->parent->length) {
				memmove(o->parent->array + i, o->parent->array + i + 1, o->parent->length - i - 1);
				o->parent->length--;
			}
		}

		if (o->parent->type == JSON_HASH) {
			size_t i;

			for (i = 0; i < o->parent->length; i++) {
				if (o->parent->keys[i] == o) {
					o->parent->keys[i] = fabricate_object(o->parent, JSON_NULL);
					break;
				}
				if (o->parent->values[i] == o) {
					o->parent->values[i] = fabricate_object(o->parent, JSON_NULL);
					break;
				}
			}

			if (i < o->parent->length) {
				if (o->parent->keys[i] != NULL && o->parent->keys[i]->type == JSON_NULL) {
					if (o->parent->values[i] != NULL && o->parent->values[i]->type == JSON_NULL) {
						free(o->parent->keys[i]);
						free(o->parent->values[i]);

						memmove(o->parent->keys + i, o->parent->keys + i + 1, o->parent->length - i - 1);
						memmove(o->parent->values + i, o->parent->values + i + 1, o->parent->length - i - 1);
						o->parent->length--;
					}
				}
			}
		}
	}

	o->parent = NULL;
}

static void json_print_one(struct string *val, json_object *o) {
	if (o == NULL) {
		string_append_string(val, "NULL");
	} else if (o->type == JSON_STRING) {
		string_append(val, '\"');

		char *cp;
		for (cp = o->string; *cp != '\0'; cp++) {
			if (*cp == '\\' || *cp == '"') {
				string_append(val, '\\');
				string_append(val, *cp);
			} else if (*cp >= 0 && *cp < ' ') {
				char *s;
				if (asprintf(&s, "\\u%04x", *cp) >= 0) {
					string_append_string(val, s);
					free(s);
				}
			} else {
				string_append(val, *cp);
			}
		}

		string_append(val, '\"');
	} else if (o->type == JSON_NUMBER) {
		string_append_string(val, o->string);
	} else if (o->type == JSON_NULL) {
		string_append_string(val, "null");
	} else if (o->type == JSON_TRUE) {
		string_append_string(val, "true");
	} else if (o->type == JSON_FALSE) {
		string_append_string(val, "false");
	} else if (o->type == JSON_HASH) {
		string_append(val, '}');
	} else if (o->type == JSON_ARRAY) {
		string_append(val, ']');
	}
}

static void json_print(struct string *val, json_object *o) {
	if (o == NULL) {
		// Hash value in incompletely read hash
		string_append_string(val, "NULL");
	} else if (o->type == JSON_HASH) {
		string_append(val, '{');

		size_t i;
		for (i = 0; i < o->length; i++) {
			json_print(val, o->keys[i]);
			string_append(val, ':');
			json_print(val, o->values[i]);
			if (i + 1 < o->length) {
				string_append(val, ',');
			}
		}
		string_append(val, '}');
	} else if (o->type == JSON_ARRAY) {
		string_append(val, '[');
		size_t i;
		for (i = 0; i < o->length; i++) {
			json_print(val, o->array[i]);
			if (i + 1 < o->length) {
				string_append(val, ',');
			}
		}
		string_append(val, ']');
	} else {
		json_print_one(val, o);
	}
}

char *json_stringify(json_object *o) {
	struct string val;
	string_init(&val);
	json_print(&val, o);

	return val.buf;
}
back to top