commit d018819188eb76740622ca30805c40fff5de091c Author: Kacper Donat Date: Sat Dec 8 21:03:55 2018 +0100 okurwatodziala diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..219361f --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +* +!report/ +!*.c +!*.h +!*.tex +!Makefile +!.gitignore diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..b005f15 --- /dev/null +++ b/Makefile @@ -0,0 +1,17 @@ +CC=gcc +CFLAGS=-Wall -O0 -g +LDFLAGS=-lm -lreadline + +all: makeidx openidx + +makeidx: makeidx.o io.o common.o index.o bitmap.c +openidx: openidx.o io.o common.o index.o bitmap.c + +-include $(wildcard *.d) + +.c.o: + $(CC) $(CFLAGS) -MMD -c $< -o $@ + +clean: + rm *.o + rm *.d diff --git a/bitmap.c b/bitmap.c new file mode 100644 index 0000000..8718927 --- /dev/null +++ b/bitmap.c @@ -0,0 +1,30 @@ +#include "bitmap.h" +#include +#include + +long long bitmap_find_first(uint8_t* map, size_t length) +{ + int offset = 0; + for (unsigned i = 0; i < length; i++, offset += 8) { + if (map[i] == 0xFF) continue; + + return offset + __builtin_ctz(~map[i]); + } + + return -1; +} + +char bitmap_get(uint8_t* map, size_t pos) +{ + return map[pos / 8] & (1 << (pos % 8)); +} + +void bitmap_set(uint8_t* map, size_t pos) +{ + map[pos / 8] |= (1 << (pos % 8)); +} + +void bitmap_unset(uint8_t* map, size_t pos) +{ + map[pos / 8] &= ~(1 << (pos % 8)); +} diff --git a/bitmap.h b/bitmap.h new file mode 100644 index 0000000..629ee2f --- /dev/null +++ b/bitmap.h @@ -0,0 +1,13 @@ +#ifndef BITMAP_H +#define BITMAP_H + +#include +#include + +long long bitmap_find_first(uint8_t* map, size_t length); +char bitmap_get(uint8_t* map, size_t pos); +void bitmap_set(uint8_t* map, size_t pos); +void bitmap_unset(uint8_t* map, size_t pos); + +#endif /* BITMAP_H */ + diff --git a/btree.h b/btree.h new file mode 100644 index 0000000..e69de29 diff --git a/common.c b/common.c new file mode 100644 index 0000000..29cbb43 --- /dev/null +++ b/common.c @@ -0,0 +1,31 @@ +#include "common.h" +#include + +verbosity_t verbosity = VERBOSITY_NORMAL; + +void printfv(verbosity_t level, const char* format, ...) +{ + va_list args; + va_start(args, format); + + if (verbosity >= level) { + vprintf(format, args); + } + + va_end(args); +} + +void hexdump(const void* data, size_t length) +{ + for (size_t read = 0; read < length; read += 16) { + printf("%08zx", read); + + size_t to_read = length - read > 16 ? 16 : length - read; + for (size_t byte = 0; byte < to_read; byte++) { + printf(" %02X", ((unsigned char*)data)[read + byte]); + } + + printf("\n"); + } +} + diff --git a/common.h b/common.h new file mode 100644 index 0000000..649a13f --- /dev/null +++ b/common.h @@ -0,0 +1,20 @@ +#ifndef COMMON_H_ +#define COMMON_H_ + +#define VERBOSITY_QUIET -1 +#define VERBOSITY_NORMAL 0 +#define VERBOSITY_VERBOSE 1 +#define VERBOSITY_DEBUG 2 + +#include +#include "optparse.h" + +typedef int verbosity_t; +typedef struct optparse optparse_t; + +extern verbosity_t verbosity; + +void printfv(verbosity_t verbosity, const char* format, ...); +void hexdump(const void* data, size_t length); + +#endif diff --git a/index.c b/index.c new file mode 100644 index 0000000..3b33467 --- /dev/null +++ b/index.c @@ -0,0 +1,607 @@ +#include +#include +#include "index.h" +#include "io.h" +#include "common.h" +#include "bitmap.h" + +#define BTREE_ERR_CANNOT_OPEN_FILE -1 +#define BTREE_ERR_PAGE_SIZE_DIFFERENT -2 + +#define SWAP(type, x, y) do { type tmp; tmp = y; y = x; x = tmp; } while (0) + +/* private functions */ +void _btree_insert_into_node(btree_t *tree, btree_node_t *node, btree_entry_t *entry); +void _btree_remove_from_node(btree_t *tree, btree_node_t *node, unsigned index); + +int _btree_save_header(btree_t *tree) +{ + return file_write(tree->file, 0, &tree->header, sizeof(btree_header_t)); +} + +page_t _btree_alloc(btree_t *tree) +{ + page_t page = 2; + long long bit; + uint8_t bitmap[PAGE_SIZE]; + + file_read(tree->file, 1, bitmap, PAGE_SIZE); + bit = bitmap_find_first(bitmap, PAGE_SIZE); + bitmap_set(bitmap, bit); + file_write(tree->file, 1, bitmap, PAGE_SIZE); + + page += bit; + + printfv(VERBOSITY_DEBUG, "[alloc] allocated page %zu.\n", page); + + return page; +} + +void _btree_page_free(btree_t *tree, page_t page) +{ + printfv(VERBOSITY_DEBUG, "[alloc] freeing page %zu.\n", page); + + uint8_t bitmap[PAGE_SIZE]; + file_read(tree->file, 1, bitmap, PAGE_SIZE); + bitmap_unset(bitmap, page - 2); + file_write(tree->file, 1, bitmap, PAGE_SIZE); +} + +btree_node_t* _btree_load_node(btree_node_t *node, btree_t *tree, page_t page) +{ + char buffer[PAGE_SIZE]; + file_read(tree->file, page, buffer, PAGE_SIZE); + + memcpy(&node->header, buffer, sizeof(btree_node_header_t)); + memcpy(node->entries, buffer + sizeof(btree_node_header_t), NODE_SIZE_MAX); + + node->page = page; + + return node; +} + +void _btree_write_node(btree_node_t *node, btree_t *tree, page_t page) +{ + char buffer[PAGE_SIZE] = {}; + + node->page = page; + + memcpy(buffer, &node->header, sizeof(btree_node_header_t)); + memcpy(buffer + sizeof(btree_node_header_t), node->entries, NODE_SIZE(node) - sizeof(btree_node_header_t)); + + file_write(tree->file, page, buffer, NODE_SIZE(node)); +} +bool _btree_stack_empty(btree_t *tree) +{ + return tree->trace.current < tree->trace.stack; +} + +void _btree_stack_push(btree_t *tree, btree_node_t *node, unsigned n) +{ + printfv(VERBOSITY_DEBUG, "[btree] Pushing %zu page on parent stack, entry index %u\n", node->page, n); + tree->trace.current++; + tree->trace.current->node = *node; + tree->trace.current->entry = btree_get_entry(&tree->trace.current->node, n); + tree->trace.current->position = n; +} + + +btree_stack_elem_t* _btree_stack_pop(btree_t *tree) +{ + if (_btree_stack_empty(tree)) { + return NULL; + } + + return tree->trace.current--; +} + +void _btree_stack_clear(btree_t *tree) +{ + tree->trace.current = tree->trace.stack - 1; +} + +unsigned _btree_concat_entries(void* current, size_t na, void* added, size_t nb) +{ + // allocate temporary buffer on a stack + void* buffer = alloca(SIZEOF_ENTRIES(na + nb)); + unsigned k = 0; + page_t last = PAGE_NONE; + for (unsigned i = 0, j = 0; i < na || j < nb; k++) { + btree_entry_t *dest = NODE_ENTRY(buffer, k); + + if (i >= na) { + *dest = *NODE_ENTRY(added, j++); + } else if (j >= nb) { + *dest = *NODE_ENTRY(current, i++); + } else { + btree_entry_t *a = NODE_ENTRY(current, i); + btree_entry_t *b = NODE_ENTRY(added, j); + + if (a->key < b->key) { + *dest = *a; + i++; + } else { + *dest = *b; + j++; + } + } + + if (last) { + dest->left = last; + } + last = dest->right; + } + + // copy from buffer to first operand + memcpy(current, buffer, SIZEOF_ENTRIES(na + nb)); + return k; +} + +void _btree_node_insert_entry(btree_node_t* node, btree_entry_t entry) +{ + printfv(VERBOSITY_DEBUG, "[btree] inserting { %zu < %zu > %zu } on %zu\n", entry.left, entry.key, entry.right, node->page); + + _btree_concat_entries(node->entries, node->header.entries, &entry, 1); + node->header.entries++; +} + +void _btree_init(btree_t *tree) +{ + _btree_stack_clear(tree); +} + +int btree_init(btree_t *tree, char* filename, size_t d) +{ + printfv(VERBOSITY_DEBUG, "[btree] Initializing btree in file %s d=%zu.\n", filename, d); + tree->file = file_open(filename, "w+", PAGE_SIZE); + + if (!tree->file) { + fprintf(stderr, "Cannot open file %s.\n", filename); + return BTREE_ERR_CANNOT_OPEN_FILE; + } + + tree->header.d = d; + tree->header.page_size = PAGE_SIZE; + tree->header.root = _btree_alloc(tree); + + _btree_save_header(tree); + + btree_node_t root; + root.header.flags = NODE_IS_ROOT | NODE_IS_LEAF; + root.header.entries = 0; + + _btree_write_node(&root, tree, tree->header.root); + _btree_init(tree); + + return 0; +} + +int btree_open(btree_t *tree, char* filename) +{ + printfv(VERBOSITY_DEBUG, "[btree] Trying to open btree index in file %s\n", filename); + + tree->file = file_open(filename, "r+", PAGE_SIZE); + + if (!tree->file) { + fprintf(stderr, "Cannot open file %s.\n", filename); + return BTREE_ERR_PAGE_SIZE_DIFFERENT; + } + + file_read(tree->file, 0, &tree->header, sizeof(btree_header_t)); + + printfv(VERBOSITY_DEBUG, "[btree] Found BTREE d=%zu, ps=%zu, root=%" PRIu64 "\n", tree->header.d, tree->header.page_size, tree->header.root); + + if (tree->header.page_size != PAGE_SIZE) { + fprintf(stderr, "BTree page size mismatch, expecting %d got %zu, closing.\n", PAGE_SIZE, tree->header.page_size); + file_close(tree->file); + return BTREE_ERR_PAGE_SIZE_DIFFERENT; + } + + _btree_init(tree); + + return 0; +} + +btree_siblings_t _btree_get_siblings(btree_t *tree, page_t page) +{ + btree_siblings_t result = { PAGE_NONE, PAGE_NONE, NULL, NULL }; + + if (_btree_stack_empty(tree)) { + return result; + } + + btree_node_t *parent = &tree->trace.current->node; + + size_t total = parent->header.entries + 1; + for (unsigned i = 0; i < total; i++) { + page_t current = *(page_t*)NODE_ENTRY(parent->entries, i); + + if (current == page) { + if (i > 0) { + result.left = *(page_t*)NODE_ENTRY(parent->entries, i - 1); + result.left_entry = NODE_ENTRY(parent->entries, i - 1); + } + + if (i < total) { + result.right = *(page_t*)NODE_ENTRY(parent->entries, i + 1); + result.right_entry = NODE_ENTRY(parent->entries, i); + } + + break; + } + } + + return result; +} + +void _btree_rebalance(btree_t *tree, btree_node_t *left, btree_node_t *right, btree_entry_t* parent) +{ + btree_node_t *pnode = &tree->trace.current->node; + + size_t nl = left->header.entries; + size_t nr = right->header.entries; + size_t n = nl; + + btree_entry_t rotated = *parent; + rotated.left = NODE_ENTRY(left->entries, nl - 1)->right; + rotated.right = NODE_ENTRY(right->entries, 0)->left; + + void* buffer = alloca(SIZEOF_ENTRIES(nl + nr + 1)); + memcpy(buffer, left->entries, SIZEOF_ENTRIES(nl)); + n = _btree_concat_entries(buffer, n, right->entries, nr); + n = _btree_concat_entries(buffer, n, &rotated, 1); + + size_t pivot = n / 2; + + left->header.entries = 0; + right->header.entries = 0; + + for (unsigned i = 0; i < n; i++) { + btree_entry_t* entry = NODE_ENTRY(buffer, i); + + if (i < pivot) { + _btree_node_insert_entry(left, *entry); + } else if (i == pivot) { + parent->key = entry->key; + parent->location = entry->location; + } else { + _btree_node_insert_entry(right, *entry); + } + } + + _btree_write_node(left, tree, left->page); + _btree_write_node(right, tree, right->page); + _btree_write_node(pnode, tree, pnode->page); +} + +bool _btree_compensate_insert(btree_t *tree, btree_node_t *old, btree_entry_t *entry) +{ + btree_siblings_t siblings = _btree_get_siblings(tree, old->page); + btree_node_t other; + + if (siblings.left) { + _btree_load_node(&other, tree, siblings.left); + if (other.header.entries < 2*tree->header.d) { + _btree_node_insert_entry(&other, *entry); + _btree_rebalance(tree, &other, old, siblings.left_entry); + return true; + } + } + + if (siblings.right) { + _btree_load_node(&other, tree, siblings.right); + if (other.header.entries < 2*tree->header.d) { + _btree_node_insert_entry(&other, *entry); + _btree_rebalance(tree, old, &other, siblings.right_entry); + return true; + } + } + + return false; +} + +void _btree_split_node(btree_t *tree, btree_node_t *old, btree_entry_t *entry) +{ + btree_node_t *parent = NULL; + + size_t n = old->header.entries; + size_t half = n / 2; + + void *buffer = alloca(SIZEOF_ENTRIES(n + 1)); + + memcpy(buffer, old->entries, SIZEOF_ENTRIES(n)); + _btree_concat_entries(buffer, n, entry, 1); + + if (!_btree_stack_empty(tree)) { + btree_stack_elem_t *trace = _btree_stack_pop(tree); + parent = &trace->node; + } + + btree_node_t left = { + .header = { + .flags = old->header.flags & NODE_IS_LEAF, + .entries = 0, + }, + .page = old->page, + }; + + btree_node_t right = { + .header = { + .flags = old->header.flags & NODE_IS_LEAF, + .entries = 0, + }, + .page = _btree_alloc(tree), + }; + + if (!parent) { + parent = alloca(sizeof(btree_node_t)); + + parent->header = (btree_node_header_t){ + .flags = NODE_IS_ROOT, + .entries = 0, + }; + parent->page = _btree_alloc(tree); + + tree->header.root = parent->page; + _btree_save_header(tree); + + printfv(VERBOSITY_DEBUG, "[btree] designated new root %zu.\n", parent->page); + }; + + printfv(VERBOSITY_DEBUG, "[btree] spliting node %zu.\n", old->page); + + btree_entry_t parent_entry = *NODE_ENTRY(buffer, half); + + parent_entry.left = left.page; + parent_entry.right = right.page; + + _btree_insert_into_node(tree, parent, &parent_entry); + + for (unsigned int i = 0; i < half; i++) { + _btree_node_insert_entry(&left, *NODE_ENTRY(buffer, i)); + _btree_node_insert_entry(&right, *NODE_ENTRY(buffer, half + i + 1)); + } + + _btree_write_node(&left, tree, left.page); + _btree_write_node(&right, tree, right.page); +} + +void _btree_insert_into_node(btree_t *tree, btree_node_t *node, btree_entry_t *entry) +{ + printfv(VERBOSITY_DEBUG, "[btree] Inserting onto page %zu.\n", node->page); + + if (node->header.entries < 2 * tree->header.d) { + _btree_node_insert_entry(node, *entry); + _btree_write_node(node, tree, node->page); + return; + } + + printfv(VERBOSITY_DEBUG, "[btree] %zu Overflow!\n", node->page); + if (_btree_compensate_insert(tree, node, entry)) { + return; + } + + printfv(VERBOSITY_DEBUG, "[btree] Unable to compensate %zu!\n", node->page); + _btree_split_node(tree, node, entry); +} + +page_t btree_find(btree_t *tree, record_key_t key, btree_entry_t* dest, btree_node_t *node, unsigned *index) +{ + if (!node) { + node = alloca(sizeof(btree_node_t)); + } + + page_t page = tree->header.root; + btree_entry_t *entry; + + _btree_stack_clear(tree); + + do { + _btree_load_node(node, tree, page); + + for(unsigned i = 0; i < node->header.entries; i++) { + entry = btree_get_entry(node, i); + + if (entry->key == key) { + if (dest) memcpy(dest, entry, sizeof(btree_entry_t)); + if (index) *index = i; + + return page; + } + + if (entry->key > key && entry->left) { + page = entry->left; + _btree_stack_push(tree, node, i); + break; + } + } + + if (page == node->page && ~node->header.flags & NODE_IS_LEAF) { + page = entry->right; + _btree_stack_push(tree, node, node->header.entries - 1); + } + } while (page && ~node->header.flags & NODE_IS_LEAF); + + return PAGE_NONE; +} + +page_t btree_insert(btree_t *tree, record_t record) +{ + record_key_t key = record.key; + btree_node_t node; + + if (btree_find(tree, key, NULL, &node, NULL) != PAGE_NONE) { + printfv(VERBOSITY_DEBUG, "[btree] record with key %zu already exists.\n", key); + return PAGE_NONE; + } + btree_entry_t entry = { PAGE_NONE, key, 2137, PAGE_NONE }; + + _btree_insert_into_node(tree, &node, &entry); + return node.page; +} + +void _btree_merge(btree_t *tree, btree_node_t* left, btree_node_t* right) +{ + printfv(VERBOSITY_DEBUG, "[btree] merging %zu with %zu.\n", left->page, right->page); + + size_t nl = left->header.entries, + nr = right->header.entries, + n = 0; + + btree_stack_elem_t *parent = _btree_stack_pop(tree); + btree_entry_t middle = *parent->entry; + middle.left = NODE_ENTRY(left->entries, nl - 1)->right; + middle.right = NODE_ENTRY(right->entries, 0)->left; + + void *buffer = alloca(SIZEOF_ENTRIES(nl + nr + 1)); + n = _btree_concat_entries(buffer, n, left->entries, nl); + n = _btree_concat_entries(buffer, n, &middle, 1); + n = _btree_concat_entries(buffer, n, right->entries, nr); + + memcpy(left->entries, buffer, SIZEOF_ENTRIES(n)); + left->header.entries = n; + + _btree_page_free(tree, right->page); + + parent->entry->left = left->page; + parent->entry->right = left->page; + + _btree_remove_from_node(tree, &parent->node, parent->position); + + if (parent->node.header.flags & NODE_IS_ROOT && parent->node.header.entries == 0) { + _btree_page_free(tree, parent->node.page); + left->header.flags |= NODE_IS_ROOT; + tree->header.root = left->page; + + printfv(VERBOSITY_DEBUG, "[btree] designated new root %zu.\n", left->page); + } else { + _btree_write_node(&parent->node, tree, parent->node.page); + } + + _btree_write_node(left, tree, left->page); +} + +void _btree_fix_underflow(btree_t *tree, btree_node_t *node) { + btree_node_t sibling; + btree_siblings_t siblings = _btree_get_siblings(tree, node->page); + btree_entry_t *entry; + + if (siblings.left != PAGE_NONE) { + _btree_load_node(&sibling, tree, siblings.left); + entry = siblings.left_entry; + if (sibling.header.entries + node->header.entries >= 2*tree->header.d) { + printfv(VERBOSITY_DEBUG, "[btree] rebalancing with left sibling %zu.\n", siblings.left); + _btree_rebalance(tree, &sibling, node, siblings.left_entry); + return; + } + } + + if (siblings.right != PAGE_NONE) { + _btree_load_node(&sibling, tree, siblings.right); + entry = siblings.right_entry; + if (sibling.header.entries + node->header.entries >= 2*tree->header.d) { + printfv(VERBOSITY_DEBUG, "[btree] rebalancing with right sibling %zu.\n", siblings.right); + _btree_rebalance(tree, node, &sibling, siblings.right_entry); + return; + } + } + + printfv(VERBOSITY_DEBUG, "[btree] unable to rebalance.\n"); + + if (siblings.right != PAGE_NONE) { + _btree_merge(tree, node, &sibling); + } else { + _btree_merge(tree, &sibling, node); + } +} + +void _btree_remove_from_node(btree_t *tree, btree_node_t *node, unsigned index) +{ + printfv(VERBOSITY_DEBUG, "[btree] Removing %u entry from %zu.\n", index, node->page); + size_t n = node->header.entries; + void *buffer = alloca(SIZEOF_ENTRIES(n)); + + btree_entry_t* current; + unsigned m = 0; + for (unsigned i = 0; i < n; i++) { + current = NODE_ENTRY(node->entries, i); + + if (i != index) { + m = _btree_concat_entries(buffer, m, current, 1); + } + } + + memcpy(node->entries, buffer, SIZEOF_ENTRIES(node->header.entries)); + node->header.entries = m; + + if ((~node->header.flags & NODE_IS_ROOT) && node->header.entries < tree->header.d) { + printfv(VERBOSITY_DEBUG, "[btree] underflow in %zu.\n", node->page); + _btree_fix_underflow(tree, node); + } else { + _btree_write_node(node, tree, node->page); + } +} + + +page_t btree_remove(btree_t *tree, record_key_t key) +{ + btree_node_t node; + btree_entry_t *entry; + unsigned index; + + if (btree_find(tree, key, NULL, &node, &index) == PAGE_NONE) { + printfv(VERBOSITY_DEBUG, "[btree] record with key %zu does not exist.\n", key); + // 404 + return PAGE_NONE; + } + + entry = btree_get_entry(&node, index); + + if (node.header.flags & NODE_IS_LEAF) { + printfv(VERBOSITY_DEBUG, "[btree] removing record with key %zu from leaf %zu.\n", entry->key, node.page); + _btree_remove_from_node(tree, &node, index); + _btree_write_node(&node, tree, node.page); + } else { + printfv(VERBOSITY_DEBUG, "[btree] removing record with key %zu from node %zu.\n", entry->key, node.page); + + btree_node_t *replacement; + btree_entry_t *replaced; + + _btree_stack_push(tree, &node, index); + replacement = &tree->trace.current->node; + _btree_load_node(&node, tree, entry->right); + + while (~node.header.flags & NODE_IS_LEAF) { + _btree_stack_push(tree, &node, 0); + replaced = NODE_ENTRY(node.entries, 0); + _btree_load_node(&node, tree, replaced->left); + } + + replaced = NODE_ENTRY(node.entries, 0); + entry = btree_get_entry(replacement, index); + + printfv(VERBOSITY_DEBUG, "[btree] exchanging %zu with %zu.\n", entry->key, replaced->key); + SWAP(record_key_t, replaced->key, entry->key); + SWAP(offset_t, replaced->location, entry->location); + _btree_write_node(replacement, tree, replacement->page); + + _btree_remove_from_node(tree, &node, 0); + } + + return node.page; +} + +btree_entry_t *btree_get_entry(btree_node_t* node, unsigned n) +{ + if (n > node->header.entries) { + return NULL; + } + + return (btree_entry_t*)(node->entries + NODE_ENTRY_OFFSET(n)); +} + +void btree_close(btree_t* tree) +{ + printfv(VERBOSITY_DEBUG, "[btree] Closing index %s.\n", tree->file->filename); + file_close(tree->file); +} diff --git a/index.h b/index.h new file mode 100644 index 0000000..deb2e15 --- /dev/null +++ b/index.h @@ -0,0 +1,77 @@ +#ifndef INDEX_H +#define INDEX_H + +#include "io.h" +#include "record.h" +#include + +#define SIZEOF_ENTRIES(n) (sizeof(btree_node_header_t) + (sizeof(btree_entry_t) - sizeof(page_t))*(n) + ((n) > 0 ? 1 : 0) * sizeof(page_t)) +#define NODE_SIZE(node) SIZEOF_ENTRIES(node->header.entries) +#define NODE_SIZE_MAX (PAGE_SIZE - sizeof(btree_node_header_t)) +#define NODE_ENTRY_OFFSET(n) (sizeof(btree_entry_t) - sizeof(page_t))*(n) + +#define NODE_ENTRY(buffer, n) ((btree_entry_t*)((char*)buffer + NODE_ENTRY_OFFSET(n))) +#define ENTRY_INDEX(buffer, entry) (((char*)buffer - (char*)entry) / (sizeof(btree_entry_t) - sizeof(page_t))) + +#define NODE_IS_ROOT 1 +#define NODE_IS_LEAF 2 + +typedef struct { + size_t d; /* 8 bytes long */ + size_t page_size; /* 8 bytes long */ + page_t root; /* 8 bytes long */ +} btree_header_t; /* 24 bytes long */ + +typedef struct { + uint16_t flags; + uint16_t entries; +} btree_node_header_t; + +typedef struct { + btree_node_header_t header; + char entries[NODE_SIZE_MAX]; + page_t page; +} btree_node_t; + +typedef struct { + page_t left; + record_key_t key; + offset_t location; + page_t right; +} btree_entry_t; + +typedef struct { + btree_node_t node; + btree_entry_t *entry; + unsigned position; +} btree_stack_elem_t; + +typedef struct { + file_t *file; + btree_header_t header; + struct { + btree_stack_elem_t *current; + btree_stack_elem_t stack[100]; + } trace; +} btree_t; + +typedef struct { + page_t left; + page_t right; + + btree_entry_t *left_entry; + btree_entry_t *right_entry; +} btree_siblings_t; + +int btree_init(btree_t *tree, char* filename, size_t d); +int btree_open(btree_t *tree, char* filename); + +page_t btree_insert(btree_t *tree, record_t record); +page_t btree_remove(btree_t *tree, record_key_t key); +page_t btree_find(btree_t *tree, record_key_t key, btree_entry_t *entry, btree_node_t *node, unsigned *index); + +btree_entry_t *btree_get_entry(btree_node_t* node, unsigned n); + +void btree_close(btree_t* tree); + +#endif /* INDEX_H */ diff --git a/io.c b/io.c new file mode 100644 index 0000000..ecd2c29 --- /dev/null +++ b/io.c @@ -0,0 +1,60 @@ +#include "io.h" +#include "common.h" + +#include +#include +#include + +file_t* file_open(const char* filename, const char* mode, size_t page_size) +{ + FILE* handle = fopen(filename, mode); + + if (!handle) { + printfv(VERBOSITY_NORMAL, "Can't open file %s.\n", filename); + return NULL; + } + + printfv(VERBOSITY_DEBUG, "File %s opened in %s with page size %zu.\n", filename, mode, page_size); + file_t* result = malloc(sizeof(file_t)); + + result->page_size = page_size; + result->file = handle; + result->filename = malloc(strlen(filename) + 1); + result->buffer = malloc(page_size); + + strcpy(result->filename, filename); + + return result; +} + +void file_close(file_t* file) +{ + printfv(VERBOSITY_DEBUG, "Closing file %s.\n", file->filename); + fclose(file->file); + + free(file->filename); + free(file->buffer); + free(file); +} + +size_t file_read(file_t* file, unsigned block, void* buffer, size_t length) +{ + memset(file->buffer, 0, file->page_size); + + printfv(VERBOSITY_DEBUG, "Reading page %u of %s.\n", block, file->filename); + + fseek(file->file, block * file->page_size, SEEK_SET); + int read = fread(file->buffer, 1, file->page_size, file->file); + memcpy(buffer, file->buffer, length); + return read; +} + +size_t file_write(file_t* file, unsigned block, const void* buffer, size_t length) +{ + memset(file->buffer, 0, file->page_size); + memcpy(file->buffer, buffer, length); + + printfv(VERBOSITY_DEBUG, "Writing page %u to %s. (%zu bytes)\n", block, file->filename, length); + fseek(file->file, block * file->page_size, SEEK_SET); + return fwrite(file->buffer, 1, file->page_size, file->file); +} diff --git a/io.h b/io.h new file mode 100644 index 0000000..14a6e73 --- /dev/null +++ b/io.h @@ -0,0 +1,28 @@ +#ifndef IO_H +#define IO_H + +#define PAGE_SIZE 512 +#define PAGE_NONE 0 + +#include +#include + +typedef struct { + FILE* file; + + char* filename; + size_t page_size; + + void* buffer; +} file_t; + +typedef uint64_t page_t; +typedef uint64_t offset_t; + +file_t* file_open(const char* filename, const char* mode, size_t page_size); +void file_close(file_t* file); + +size_t file_read(file_t* file, unsigned block, void* buffer, size_t size); +size_t file_write(file_t* file, unsigned block, const void* buffer, size_t size); + +#endif diff --git a/makeidx.c b/makeidx.c new file mode 100644 index 0000000..8736452 --- /dev/null +++ b/makeidx.c @@ -0,0 +1,68 @@ +#include +#define OPTPARSE_IMPLEMENTATION +#define OPTPARSE_API static +#include "common.h" +#include "io.h" +#include "index.h" + +typedef struct { + char* index; + unsigned d; + unsigned argc; +} opts_t; + +opts_t options; + +void init_args(int args, char* argv[]) +{ + optparse_t opts; + optparse_init(&opts, argv); + + for (char opt; opt != -1; opt = optparse(&opts, "qv")) { + switch (opt) { + case 'q': + verbosity--; + break; + case 'v': + verbosity++; + break; + } + } + + char* argument; + + options.index = optparse_arg(&opts); + + if ((argument = optparse_arg(&opts))) { + options.d = strtoul(argument, NULL, 0); + } else { + options.d = 10; + } + + options.argc = opts.optind; +} + +void help(const char* name) { + printf( + "Usage:\n" + "\t %s index [d]\n" + , + name + ); +} + +int main(int argc, char* argv[]) +{ + init_args(argc, argv); + + if (options.argc < 2) { + help(argv[0]); + return 0; + } + + btree_t tree; + btree_init(&tree, options.index, options.d); + btree_close(&tree); + + return EXIT_SUCCESS; +} diff --git a/openidx.c b/openidx.c new file mode 100644 index 0000000..73d54e8 --- /dev/null +++ b/openidx.c @@ -0,0 +1,309 @@ +#include +#include +#include +#include +#include +#include + +#define OPTPARSE_IMPLEMENTATION +#define OPTPARSE_API static +#include "common.h" +#include "index.h" + +#define RESULT_EXIT -1 +#define RESULT_OK 0 + +typedef int result_t; + +typedef struct { + const char* command; + const char* help; + result_t (*function)(const char* command, char* argline); +} command_t; + +typedef struct { + char* index; +} opts_t; + +opts_t options; +btree_t tree; + +void init_args(int args, char* argv[]) +{ + optparse_t opts; + optparse_init(&opts, argv); + + for (char opt; opt != -1; opt = optparse(&opts, "qv")) { + switch (opt) { + case 'q': + verbosity--; + break; + case 'v': + verbosity++; + break; + } + } + + options.index = optparse_arg(&opts); +} + +void help(const char* name) { + printf( + "%s - some help" + , + name + ); +} + +result_t help_command(const char* command, char* args); +result_t exit_command(const char* command, char* args); +result_t insert_command(const char* command, char* args); +result_t dump_command(const char* command, char* args); +result_t print_command(const char* command, char* args); +result_t records_command(const char* command, char* args); +result_t find_command(const char* command, char* args); +result_t delete_command(const char* command, char* args); +result_t verbosity_command(const char* command, char* args); + +static command_t commands[] = { + { "help", "Prints out help", help_command }, + { "exit", "Self explanatory", exit_command }, + { "insert", "Adds record to index", insert_command }, + { "dump", "Dumps given page", dump_command }, + { "print", "Prints tree", print_command }, + { "records", "Prints records of given page", records_command }, + { "find", "Finds record", find_command }, + { "delete", "Deletes record", delete_command }, + { "verbosity", "Changes the verbosity", verbosity_command }, +}; + +result_t exit_command(const char* command, char* args) +{ + return RESULT_EXIT; +} + +result_t verbosity_command(const char* command, char* args) +{ + verbosity_t v; + char buffer[PAGE_SIZE]; + + if (sscanf(args, "%d", &v) == 1) { + verbosity = v; + } else { + printf("Usage: verbosity new\n"); + } + + return RESULT_OK; +} + +result_t dump_command(const char* command, char* args) +{ + page_t page; + char buffer[PAGE_SIZE]; + + if (sscanf(args, "%zu", &page) == 1) { + file_read(tree.file, page, buffer, PAGE_SIZE); + hexdump(buffer, PAGE_SIZE); + } else { + printf("Usage: dump page\n"); + } + + return RESULT_OK; +} + +result_t insert_command(const char* command, char* args) +{ + record_t record; + + if (sscanf(args, "%u %lf %lf", &record.key, &record.x, &record.y) == 3) { + btree_insert(&tree, record); + } else { + printf("usage: insert key x y\n"); + } + + return RESULT_OK; +} + +result_t find_command(const char* command, char* args) +{ + record_key_t key; + + if (sscanf(args, "%u", &key) == 1) { + btree_entry_t entry; + page_t page; + + if ((page = btree_find(&tree, key, &entry, NULL, NULL))) { + printf("Record %u found on page %zu, offset: %lu\n", key, page, entry.location); + } else { + printf("404 Not found\n"); + } + } else { + printf("Usage: find key\n"); + } + + return RESULT_OK; +} + +result_t delete_command(const char* command, char* args) +{ + record_key_t key; + + if (sscanf(args, "%u", &key) == 1) { + page_t page; + + if ((page = btree_remove(&tree, key))) { + printf("Record %u removed from page %zu\n", key, page); + } else { + printf("404 Not found\n"); + } + } else { + printf("Usage: find key\n"); + } + + return RESULT_OK; +} + +result_t records_command(const char* command, char* args) +{ + page_t page; + char buffer[PAGE_SIZE]; + + if (sscanf(args, "%zu", &page) == 1) { + file_read(tree.file, page, buffer, PAGE_SIZE); + + btree_node_t node; + memcpy(&node.header, buffer, sizeof(node.header)); + memcpy(&node.entries, buffer + sizeof(node.header), NODE_SIZE_MAX); + + printf( + "Node %zu, entries: %u, flags: [%c%c]\n", + page, node.header.entries, + node.header.flags & NODE_IS_ROOT ? 'R' : ' ', + node.header.flags & NODE_IS_LEAF ? 'L' : ' ' + ); + + printf("Records in node (%u):\n", node.header.entries); + for (int i = 0; i < node.header.entries; i++) { + btree_entry_t *entry = btree_get_entry(&node, i); + printf(" %zu < %u [0x%zx] > %zu\n", entry->left, entry->key, entry->location, entry->right); + } + } else { + printf("Usage: print page\n"); + } + + return RESULT_OK; +} + +void print_page(page_t page, unsigned depth, unsigned current) +{ + char buffer[PAGE_SIZE]; + char prefix[1024] = {}; + memset(prefix, ' ', current * 2); + + if (!depth || !page) return; + + file_read(tree.file, page, buffer, PAGE_SIZE); + + btree_node_t node; + memcpy(&node.header, buffer, sizeof(node.header)); + memcpy(&node.entries, buffer + sizeof(node.header), NODE_SIZE_MAX); + + if (node.header.entries == 0) { + printf("%sempty\n", prefix); + return; + } + + btree_entry_t *entry; + for (int i = 0; i < node.header.entries; i++) { + entry = btree_get_entry(&node, i); + print_page(entry->left, depth - 1, current + 1); + printf("%s%zu < %u [0x%zx] > %zu\n", prefix, entry->left, entry->key, entry->location, entry->right); + } + print_page(entry->right, depth - 1, current + 1); +} + +result_t print_command(const char* command, char* args) +{ + page_t page = 0; + unsigned depth = 4; + + if (sscanf(args, "%zu %u", &page, &depth) >= 1) { + print_page(page ? page : tree.header.root, depth, 0); + } else { + printf("Usage: print page\n"); + } + + return RESULT_OK; +} + +result_t help_command(const char* command, char* args) +{ + size_t count = sizeof(commands) / sizeof(command_t); + printf("Available commands (%zu): \n", count); + + for (size_t i = 0; i < count; i++) { + printf("\t%s - %s\n", commands[i].command, commands[i].help); + } + + return RESULT_OK; +} + +command_t* get_command(char* argline) +{ + char command[128]; + argline = strtok(argline, " \t\n\r"); + + if (!argline) { + return NULL; + } + + strcpy(command, argline); + + size_t count = sizeof(commands) / sizeof(command_t); + for (size_t i = 0; i < count; i++) { + if (strcmp(commands[i].command, command) == 0) { + return commands + i; + } + } + + return NULL; +} + +void handle_ctrlc(int sig) +{ + printf("\nCtrl-C - exiting gracefully...\n"); + btree_close(&tree); + exit(0); +} + +int main(int argc, char* argv[]) +{ + char *line, prompt[1024]; + command_t* command; + + signal(SIGINT, handle_ctrlc); + + init_args(argc, argv); + + if (btree_open(&tree, options.index) == 0) { + sprintf(prompt, "%s> ", options.index); + + while ((line = readline(prompt))) { + command = get_command(line); + + if (!command) { + printf("Unknown command!\n"); + continue; + } + + if (command->function(line, line + strlen(line) + 1) == RESULT_EXIT) { + break; + } + } + + btree_close(&tree); + return EXIT_SUCCESS; + } + + return -1; +} diff --git a/optparse.h b/optparse.h new file mode 100644 index 0000000..3a577a7 --- /dev/null +++ b/optparse.h @@ -0,0 +1,403 @@ +/* Optparse --- portable, reentrant, embeddable, getopt-like option parser + * + * This is free and unencumbered software released into the public domain. + * + * To get the implementation, define OPTPARSE_IMPLEMENTATION. + * Optionally define OPTPARSE_API to control the API's visibility + * and/or linkage (static, __attribute__, __declspec). + * + * The POSIX getopt() option parser has three fatal flaws. These flaws + * are solved by Optparse. + * + * 1) Parser state is stored entirely in global variables, some of + * which are static and inaccessible. This means only one thread can + * use getopt(). It also means it's not possible to recursively parse + * nested sub-arguments while in the middle of argument parsing. + * Optparse fixes this by storing all state on a local struct. + * + * 2) The POSIX standard provides no way to properly reset the parser. + * This means for portable code that getopt() is only good for one + * run, over one argv with one option string. It also means subcommand + * options cannot be processed with getopt(). Most implementations + * provide a method to reset the parser, but it's not portable. + * Optparse provides an optparse_arg() function for stepping over + * subcommands and continuing parsing of options with another option + * string. The Optparse struct itself can be passed around to + * subcommand handlers for additional subcommand option parsing. A + * full reset can be achieved by with an additional optparse_init(). + * + * 3) Error messages are printed to stderr. This can be disabled with + * opterr, but the messages themselves are still inaccessible. + * Optparse solves this by writing an error message in its errmsg + * field. The downside to Optparse is that this error message will + * always be in English rather than the current locale. + * + * Optparse should be familiar with anyone accustomed to getopt(), and + * it could be a nearly drop-in replacement. The option string is the + * same and the fields have the same names as the getopt() global + * variables (optarg, optind, optopt). + * + * Optparse also supports GNU-style long options with optparse_long(). + * The interface is slightly different and simpler than getopt_long(). + * + * By default, argv is permuted as it is parsed, moving non-option + * arguments to the end. This can be disabled by setting the `permute` + * field to 0 after initialization. + */ +#ifndef OPTPARSE_H +#define OPTPARSE_H + +#ifndef OPTPARSE_API +# define OPTPARSE_API +#endif + +struct optparse { + char **argv; + int permute; + int optind; + int optopt; + char *optarg; + char errmsg[64]; + int subopt; +}; + +enum optparse_argtype { + OPTPARSE_NONE, + OPTPARSE_REQUIRED, + OPTPARSE_OPTIONAL +}; + +struct optparse_long { + const char *longname; + int shortname; + enum optparse_argtype argtype; +}; + +/** + * Initializes the parser state. + */ +OPTPARSE_API +void optparse_init(struct optparse *options, char **argv); + +/** + * Read the next option in the argv array. + * @param optstring a getopt()-formatted option string. + * @return the next option character, -1 for done, or '?' for error + * + * Just like getopt(), a character followed by no colons means no + * argument. One colon means the option has a required argument. Two + * colons means the option takes an optional argument. + */ +OPTPARSE_API +int optparse(struct optparse *options, const char *optstring); + +/** + * Handles GNU-style long options in addition to getopt() options. + * This works a lot like GNU's getopt_long(). The last option in + * longopts must be all zeros, marking the end of the array. The + * longindex argument may be NULL. + */ +OPTPARSE_API +int optparse_long(struct optparse *options, + const struct optparse_long *longopts, + int *longindex); + +/** + * Used for stepping over non-option arguments. + * @return the next non-option argument, or NULL for no more arguments + * + * Argument parsing can continue with optparse() after using this + * function. That would be used to parse the options for the + * subcommand returned by optparse_arg(). This function allows you to + * ignore the value of optind. + */ +OPTPARSE_API +char *optparse_arg(struct optparse *options); + +/* Implementation */ +#ifdef OPTPARSE_IMPLEMENTATION + +#define OPTPARSE_MSG_INVALID "invalid option" +#define OPTPARSE_MSG_MISSING "option requires an argument" +#define OPTPARSE_MSG_TOOMANY "option takes no arguments" + +static int +optparse_error(struct optparse *options, const char *msg, const char *data) +{ + unsigned p = 0; + const char *sep = " -- '"; + while (*msg) + options->errmsg[p++] = *msg++; + while (*sep) + options->errmsg[p++] = *sep++; + while (p < sizeof(options->errmsg) - 2 && *data) + options->errmsg[p++] = *data++; + options->errmsg[p++] = '\''; + options->errmsg[p++] = '\0'; + return '?'; +} + +OPTPARSE_API +void +optparse_init(struct optparse *options, char **argv) +{ + options->argv = argv; + options->permute = 1; + options->optind = 1; + options->subopt = 0; + options->optarg = 0; + options->errmsg[0] = '\0'; +} + +static int +optparse_is_dashdash(const char *arg) +{ + return arg != 0 && arg[0] == '-' && arg[1] == '-' && arg[2] == '\0'; +} + +static int +optparse_is_shortopt(const char *arg) +{ + return arg != 0 && arg[0] == '-' && arg[1] != '-' && arg[1] != '\0'; +} + +static int +optparse_is_longopt(const char *arg) +{ + return arg != 0 && arg[0] == '-' && arg[1] == '-' && arg[2] != '\0'; +} + +static void +optparse_permute(struct optparse *options, int index) +{ + char *nonoption = options->argv[index]; + int i; + for (i = index; i < options->optind - 1; i++) + options->argv[i] = options->argv[i + 1]; + options->argv[options->optind - 1] = nonoption; +} + +static int +optparse_argtype(const char *optstring, char c) +{ + int count = OPTPARSE_NONE; + if (c == ':') + return -1; + for (; *optstring && c != *optstring; optstring++); + if (!*optstring) + return -1; + if (optstring[1] == ':') + count += optstring[2] == ':' ? 2 : 1; + return count; +} + +OPTPARSE_API +int +optparse(struct optparse *options, const char *optstring) +{ + int type; + char *next; + char *option = options->argv[options->optind]; + options->errmsg[0] = '\0'; + options->optopt = 0; + options->optarg = 0; + if (option == 0) { + return -1; + } else if (optparse_is_dashdash(option)) { + options->optind++; /* consume "--" */ + return -1; + } else if (!optparse_is_shortopt(option)) { + if (options->permute) { + int index = options->optind++; + int r = optparse(options, optstring); + optparse_permute(options, index); + options->optind--; + return r; + } else { + return -1; + } + } + option += options->subopt + 1; + options->optopt = option[0]; + type = optparse_argtype(optstring, option[0]); + next = options->argv[options->optind + 1]; + switch (type) { + case -1: { + char str[2] = {0, 0}; + str[0] = option[0]; + options->optind++; + return optparse_error(options, OPTPARSE_MSG_INVALID, str); + } + case OPTPARSE_NONE: + if (option[1]) { + options->subopt++; + } else { + options->subopt = 0; + options->optind++; + } + return option[0]; + case OPTPARSE_REQUIRED: + options->subopt = 0; + options->optind++; + if (option[1]) { + options->optarg = option + 1; + } else if (next != 0) { + options->optarg = next; + options->optind++; + } else { + char str[2] = {0, 0}; + str[0] = option[0]; + options->optarg = 0; + return optparse_error(options, OPTPARSE_MSG_MISSING, str); + } + return option[0]; + case OPTPARSE_OPTIONAL: + options->subopt = 0; + options->optind++; + if (option[1]) + options->optarg = option + 1; + else + options->optarg = 0; + return option[0]; + } + return 0; +} + +OPTPARSE_API +char * +optparse_arg(struct optparse *options) +{ + char *option = options->argv[options->optind]; + options->subopt = 0; + if (option != 0) + options->optind++; + return option; +} + +static int +optparse_longopts_end(const struct optparse_long *longopts, int i) +{ + return !longopts[i].longname && !longopts[i].shortname; +} + +static void +optparse_from_long(const struct optparse_long *longopts, char *optstring) +{ + char *p = optstring; + int i; + for (i = 0; !optparse_longopts_end(longopts, i); i++) { + if (longopts[i].shortname) { + int a; + *p++ = longopts[i].shortname; + for (a = 0; a < (int)longopts[i].argtype; a++) + *p++ = ':'; + } + } + *p = '\0'; +} + +/* Unlike strcmp(), handles options containing "=". */ +static int +optparse_longopts_match(const char *longname, const char *option) +{ + const char *a = option, *n = longname; + if (longname == 0) + return 0; + for (; *a && *n && *a != '='; a++, n++) + if (*a != *n) + return 0; + return *n == '\0' && (*a == '\0' || *a == '='); +} + +/* Return the part after "=", or NULL. */ +static char * +optparse_longopts_arg(char *option) +{ + for (; *option && *option != '='; option++); + if (*option == '=') + return option + 1; + else + return 0; +} + +static int +optparse_long_fallback(struct optparse *options, + const struct optparse_long *longopts, + int *longindex) +{ + int result; + char optstring[96 * 3 + 1]; /* 96 ASCII printable characters */ + optparse_from_long(longopts, optstring); + result = optparse(options, optstring); + if (longindex != 0) { + *longindex = -1; + if (result != -1) { + int i; + for (i = 0; !optparse_longopts_end(longopts, i); i++) + if (longopts[i].shortname == options->optopt) + *longindex = i; + } + } + return result; +} + +OPTPARSE_API +int +optparse_long(struct optparse *options, + const struct optparse_long *longopts, + int *longindex) +{ + int i; + char *option = options->argv[options->optind]; + if (option == 0) { + return -1; + } else if (optparse_is_dashdash(option)) { + options->optind++; /* consume "--" */ + return -1; + } else if (optparse_is_shortopt(option)) { + return optparse_long_fallback(options, longopts, longindex); + } else if (!optparse_is_longopt(option)) { + if (options->permute) { + int index = options->optind++; + int r = optparse_long(options, longopts, longindex); + optparse_permute(options, index); + options->optind--; + return r; + } else { + return -1; + } + } + + /* Parse as long option. */ + options->errmsg[0] = '\0'; + options->optopt = 0; + options->optarg = 0; + option += 2; /* skip "--" */ + options->optind++; + for (i = 0; !optparse_longopts_end(longopts, i); i++) { + const char *name = longopts[i].longname; + if (optparse_longopts_match(name, option)) { + char *arg; + if (longindex) + *longindex = i; + options->optopt = longopts[i].shortname; + arg = optparse_longopts_arg(option); + if (longopts[i].argtype == OPTPARSE_NONE && arg != 0) { + return optparse_error(options, OPTPARSE_MSG_TOOMANY, name); + } if (arg != 0) { + options->optarg = arg; + } else if (longopts[i].argtype == OPTPARSE_REQUIRED) { + options->optarg = options->argv[options->optind]; + if (options->optarg == 0) + return optparse_error(options, OPTPARSE_MSG_MISSING, name); + else + options->optind++; + } + return options->optopt; + } + } + return optparse_error(options, OPTPARSE_MSG_INVALID, option); +} + +#endif /* OPTPARSE_IMPLEMENTATION */ +#endif /* OPTPARSE_H */ diff --git a/record.h b/record.h new file mode 100644 index 0000000..aa1d0ac --- /dev/null +++ b/record.h @@ -0,0 +1,15 @@ +#ifndef RECORD_H +#define RECORD_H + +#include + +typedef uint32_t record_key_t; + +typedef struct { + record_key_t key; + double x; + double y; +} record_t; + +#endif /* RECORD_H */ +