okurwatodziala

This commit is contained in:
Kacper Donat 2018-12-08 21:03:55 +01:00
commit d018819188
15 changed files with 1685 additions and 0 deletions

7
.gitignore vendored Normal file
View File

@ -0,0 +1,7 @@
*
!report/
!*.c
!*.h
!*.tex
!Makefile
!.gitignore

17
Makefile Normal file
View File

@ -0,0 +1,17 @@
CC=gcc
CFLAGS=-Wall -O0 -g
LDFLAGS=-lm -lreadline
all: makeidx openidx
makeidx: makeidx.o io.o common.o index.o bitmap.c
openidx: openidx.o io.o common.o index.o bitmap.c
-include $(wildcard *.d)
.c.o:
$(CC) $(CFLAGS) -MMD -c $< -o $@
clean:
rm *.o
rm *.d

30
bitmap.c Normal file
View File

@ -0,0 +1,30 @@
#include "bitmap.h"
#include <stdlib.h>
#include <inttypes.h>
long long bitmap_find_first(uint8_t* map, size_t length)
{
int offset = 0;
for (unsigned i = 0; i < length; i++, offset += 8) {
if (map[i] == 0xFF) continue;
return offset + __builtin_ctz(~map[i]);
}
return -1;
}
char bitmap_get(uint8_t* map, size_t pos)
{
return map[pos / 8] & (1 << (pos % 8));
}
void bitmap_set(uint8_t* map, size_t pos)
{
map[pos / 8] |= (1 << (pos % 8));
}
void bitmap_unset(uint8_t* map, size_t pos)
{
map[pos / 8] &= ~(1 << (pos % 8));
}

13
bitmap.h Normal file
View File

@ -0,0 +1,13 @@
#ifndef BITMAP_H
#define BITMAP_H
#include <stdlib.h>
#include <inttypes.h>
long long bitmap_find_first(uint8_t* map, size_t length);
char bitmap_get(uint8_t* map, size_t pos);
void bitmap_set(uint8_t* map, size_t pos);
void bitmap_unset(uint8_t* map, size_t pos);
#endif /* BITMAP_H */

0
btree.h Normal file
View File

31
common.c Normal file
View File

@ -0,0 +1,31 @@
#include "common.h"
#include <stdarg.h>
verbosity_t verbosity = VERBOSITY_NORMAL;
void printfv(verbosity_t level, const char* format, ...)
{
va_list args;
va_start(args, format);
if (verbosity >= level) {
vprintf(format, args);
}
va_end(args);
}
void hexdump(const void* data, size_t length)
{
for (size_t read = 0; read < length; read += 16) {
printf("%08zx", read);
size_t to_read = length - read > 16 ? 16 : length - read;
for (size_t byte = 0; byte < to_read; byte++) {
printf(" %02X", ((unsigned char*)data)[read + byte]);
}
printf("\n");
}
}

20
common.h Normal file
View File

@ -0,0 +1,20 @@
#ifndef COMMON_H_
#define COMMON_H_
#define VERBOSITY_QUIET -1
#define VERBOSITY_NORMAL 0
#define VERBOSITY_VERBOSE 1
#define VERBOSITY_DEBUG 2
#include <stdio.h>
#include "optparse.h"
typedef int verbosity_t;
typedef struct optparse optparse_t;
extern verbosity_t verbosity;
void printfv(verbosity_t verbosity, const char* format, ...);
void hexdump(const void* data, size_t length);
#endif

607
index.c Normal file
View File

@ -0,0 +1,607 @@
#include <memory.h>
#include <stdbool.h>
#include "index.h"
#include "io.h"
#include "common.h"
#include "bitmap.h"
#define BTREE_ERR_CANNOT_OPEN_FILE -1
#define BTREE_ERR_PAGE_SIZE_DIFFERENT -2
#define SWAP(type, x, y) do { type tmp; tmp = y; y = x; x = tmp; } while (0)
/* private functions */
void _btree_insert_into_node(btree_t *tree, btree_node_t *node, btree_entry_t *entry);
void _btree_remove_from_node(btree_t *tree, btree_node_t *node, unsigned index);
int _btree_save_header(btree_t *tree)
{
return file_write(tree->file, 0, &tree->header, sizeof(btree_header_t));
}
page_t _btree_alloc(btree_t *tree)
{
page_t page = 2;
long long bit;
uint8_t bitmap[PAGE_SIZE];
file_read(tree->file, 1, bitmap, PAGE_SIZE);
bit = bitmap_find_first(bitmap, PAGE_SIZE);
bitmap_set(bitmap, bit);
file_write(tree->file, 1, bitmap, PAGE_SIZE);
page += bit;
printfv(VERBOSITY_DEBUG, "[alloc] allocated page %zu.\n", page);
return page;
}
void _btree_page_free(btree_t *tree, page_t page)
{
printfv(VERBOSITY_DEBUG, "[alloc] freeing page %zu.\n", page);
uint8_t bitmap[PAGE_SIZE];
file_read(tree->file, 1, bitmap, PAGE_SIZE);
bitmap_unset(bitmap, page - 2);
file_write(tree->file, 1, bitmap, PAGE_SIZE);
}
btree_node_t* _btree_load_node(btree_node_t *node, btree_t *tree, page_t page)
{
char buffer[PAGE_SIZE];
file_read(tree->file, page, buffer, PAGE_SIZE);
memcpy(&node->header, buffer, sizeof(btree_node_header_t));
memcpy(node->entries, buffer + sizeof(btree_node_header_t), NODE_SIZE_MAX);
node->page = page;
return node;
}
void _btree_write_node(btree_node_t *node, btree_t *tree, page_t page)
{
char buffer[PAGE_SIZE] = {};
node->page = page;
memcpy(buffer, &node->header, sizeof(btree_node_header_t));
memcpy(buffer + sizeof(btree_node_header_t), node->entries, NODE_SIZE(node) - sizeof(btree_node_header_t));
file_write(tree->file, page, buffer, NODE_SIZE(node));
}
bool _btree_stack_empty(btree_t *tree)
{
return tree->trace.current < tree->trace.stack;
}
void _btree_stack_push(btree_t *tree, btree_node_t *node, unsigned n)
{
printfv(VERBOSITY_DEBUG, "[btree] Pushing %zu page on parent stack, entry index %u\n", node->page, n);
tree->trace.current++;
tree->trace.current->node = *node;
tree->trace.current->entry = btree_get_entry(&tree->trace.current->node, n);
tree->trace.current->position = n;
}
btree_stack_elem_t* _btree_stack_pop(btree_t *tree)
{
if (_btree_stack_empty(tree)) {
return NULL;
}
return tree->trace.current--;
}
void _btree_stack_clear(btree_t *tree)
{
tree->trace.current = tree->trace.stack - 1;
}
unsigned _btree_concat_entries(void* current, size_t na, void* added, size_t nb)
{
// allocate temporary buffer on a stack
void* buffer = alloca(SIZEOF_ENTRIES(na + nb));
unsigned k = 0;
page_t last = PAGE_NONE;
for (unsigned i = 0, j = 0; i < na || j < nb; k++) {
btree_entry_t *dest = NODE_ENTRY(buffer, k);
if (i >= na) {
*dest = *NODE_ENTRY(added, j++);
} else if (j >= nb) {
*dest = *NODE_ENTRY(current, i++);
} else {
btree_entry_t *a = NODE_ENTRY(current, i);
btree_entry_t *b = NODE_ENTRY(added, j);
if (a->key < b->key) {
*dest = *a;
i++;
} else {
*dest = *b;
j++;
}
}
if (last) {
dest->left = last;
}
last = dest->right;
}
// copy from buffer to first operand
memcpy(current, buffer, SIZEOF_ENTRIES(na + nb));
return k;
}
void _btree_node_insert_entry(btree_node_t* node, btree_entry_t entry)
{
printfv(VERBOSITY_DEBUG, "[btree] inserting { %zu < %zu > %zu } on %zu\n", entry.left, entry.key, entry.right, node->page);
_btree_concat_entries(node->entries, node->header.entries, &entry, 1);
node->header.entries++;
}
void _btree_init(btree_t *tree)
{
_btree_stack_clear(tree);
}
int btree_init(btree_t *tree, char* filename, size_t d)
{
printfv(VERBOSITY_DEBUG, "[btree] Initializing btree in file %s d=%zu.\n", filename, d);
tree->file = file_open(filename, "w+", PAGE_SIZE);
if (!tree->file) {
fprintf(stderr, "Cannot open file %s.\n", filename);
return BTREE_ERR_CANNOT_OPEN_FILE;
}
tree->header.d = d;
tree->header.page_size = PAGE_SIZE;
tree->header.root = _btree_alloc(tree);
_btree_save_header(tree);
btree_node_t root;
root.header.flags = NODE_IS_ROOT | NODE_IS_LEAF;
root.header.entries = 0;
_btree_write_node(&root, tree, tree->header.root);
_btree_init(tree);
return 0;
}
int btree_open(btree_t *tree, char* filename)
{
printfv(VERBOSITY_DEBUG, "[btree] Trying to open btree index in file %s\n", filename);
tree->file = file_open(filename, "r+", PAGE_SIZE);
if (!tree->file) {
fprintf(stderr, "Cannot open file %s.\n", filename);
return BTREE_ERR_PAGE_SIZE_DIFFERENT;
}
file_read(tree->file, 0, &tree->header, sizeof(btree_header_t));
printfv(VERBOSITY_DEBUG, "[btree] Found BTREE d=%zu, ps=%zu, root=%" PRIu64 "\n", tree->header.d, tree->header.page_size, tree->header.root);
if (tree->header.page_size != PAGE_SIZE) {
fprintf(stderr, "BTree page size mismatch, expecting %d got %zu, closing.\n", PAGE_SIZE, tree->header.page_size);
file_close(tree->file);
return BTREE_ERR_PAGE_SIZE_DIFFERENT;
}
_btree_init(tree);
return 0;
}
btree_siblings_t _btree_get_siblings(btree_t *tree, page_t page)
{
btree_siblings_t result = { PAGE_NONE, PAGE_NONE, NULL, NULL };
if (_btree_stack_empty(tree)) {
return result;
}
btree_node_t *parent = &tree->trace.current->node;
size_t total = parent->header.entries + 1;
for (unsigned i = 0; i < total; i++) {
page_t current = *(page_t*)NODE_ENTRY(parent->entries, i);
if (current == page) {
if (i > 0) {
result.left = *(page_t*)NODE_ENTRY(parent->entries, i - 1);
result.left_entry = NODE_ENTRY(parent->entries, i - 1);
}
if (i < total) {
result.right = *(page_t*)NODE_ENTRY(parent->entries, i + 1);
result.right_entry = NODE_ENTRY(parent->entries, i);
}
break;
}
}
return result;
}
void _btree_rebalance(btree_t *tree, btree_node_t *left, btree_node_t *right, btree_entry_t* parent)
{
btree_node_t *pnode = &tree->trace.current->node;
size_t nl = left->header.entries;
size_t nr = right->header.entries;
size_t n = nl;
btree_entry_t rotated = *parent;
rotated.left = NODE_ENTRY(left->entries, nl - 1)->right;
rotated.right = NODE_ENTRY(right->entries, 0)->left;
void* buffer = alloca(SIZEOF_ENTRIES(nl + nr + 1));
memcpy(buffer, left->entries, SIZEOF_ENTRIES(nl));
n = _btree_concat_entries(buffer, n, right->entries, nr);
n = _btree_concat_entries(buffer, n, &rotated, 1);
size_t pivot = n / 2;
left->header.entries = 0;
right->header.entries = 0;
for (unsigned i = 0; i < n; i++) {
btree_entry_t* entry = NODE_ENTRY(buffer, i);
if (i < pivot) {
_btree_node_insert_entry(left, *entry);
} else if (i == pivot) {
parent->key = entry->key;
parent->location = entry->location;
} else {
_btree_node_insert_entry(right, *entry);
}
}
_btree_write_node(left, tree, left->page);
_btree_write_node(right, tree, right->page);
_btree_write_node(pnode, tree, pnode->page);
}
bool _btree_compensate_insert(btree_t *tree, btree_node_t *old, btree_entry_t *entry)
{
btree_siblings_t siblings = _btree_get_siblings(tree, old->page);
btree_node_t other;
if (siblings.left) {
_btree_load_node(&other, tree, siblings.left);
if (other.header.entries < 2*tree->header.d) {
_btree_node_insert_entry(&other, *entry);
_btree_rebalance(tree, &other, old, siblings.left_entry);
return true;
}
}
if (siblings.right) {
_btree_load_node(&other, tree, siblings.right);
if (other.header.entries < 2*tree->header.d) {
_btree_node_insert_entry(&other, *entry);
_btree_rebalance(tree, old, &other, siblings.right_entry);
return true;
}
}
return false;
}
void _btree_split_node(btree_t *tree, btree_node_t *old, btree_entry_t *entry)
{
btree_node_t *parent = NULL;
size_t n = old->header.entries;
size_t half = n / 2;
void *buffer = alloca(SIZEOF_ENTRIES(n + 1));
memcpy(buffer, old->entries, SIZEOF_ENTRIES(n));
_btree_concat_entries(buffer, n, entry, 1);
if (!_btree_stack_empty(tree)) {
btree_stack_elem_t *trace = _btree_stack_pop(tree);
parent = &trace->node;
}
btree_node_t left = {
.header = {
.flags = old->header.flags & NODE_IS_LEAF,
.entries = 0,
},
.page = old->page,
};
btree_node_t right = {
.header = {
.flags = old->header.flags & NODE_IS_LEAF,
.entries = 0,
},
.page = _btree_alloc(tree),
};
if (!parent) {
parent = alloca(sizeof(btree_node_t));
parent->header = (btree_node_header_t){
.flags = NODE_IS_ROOT,
.entries = 0,
};
parent->page = _btree_alloc(tree);
tree->header.root = parent->page;
_btree_save_header(tree);
printfv(VERBOSITY_DEBUG, "[btree] designated new root %zu.\n", parent->page);
};
printfv(VERBOSITY_DEBUG, "[btree] spliting node %zu.\n", old->page);
btree_entry_t parent_entry = *NODE_ENTRY(buffer, half);
parent_entry.left = left.page;
parent_entry.right = right.page;
_btree_insert_into_node(tree, parent, &parent_entry);
for (unsigned int i = 0; i < half; i++) {
_btree_node_insert_entry(&left, *NODE_ENTRY(buffer, i));
_btree_node_insert_entry(&right, *NODE_ENTRY(buffer, half + i + 1));
}
_btree_write_node(&left, tree, left.page);
_btree_write_node(&right, tree, right.page);
}
void _btree_insert_into_node(btree_t *tree, btree_node_t *node, btree_entry_t *entry)
{
printfv(VERBOSITY_DEBUG, "[btree] Inserting onto page %zu.\n", node->page);
if (node->header.entries < 2 * tree->header.d) {
_btree_node_insert_entry(node, *entry);
_btree_write_node(node, tree, node->page);
return;
}
printfv(VERBOSITY_DEBUG, "[btree] %zu Overflow!\n", node->page);
if (_btree_compensate_insert(tree, node, entry)) {
return;
}
printfv(VERBOSITY_DEBUG, "[btree] Unable to compensate %zu!\n", node->page);
_btree_split_node(tree, node, entry);
}
page_t btree_find(btree_t *tree, record_key_t key, btree_entry_t* dest, btree_node_t *node, unsigned *index)
{
if (!node) {
node = alloca(sizeof(btree_node_t));
}
page_t page = tree->header.root;
btree_entry_t *entry;
_btree_stack_clear(tree);
do {
_btree_load_node(node, tree, page);
for(unsigned i = 0; i < node->header.entries; i++) {
entry = btree_get_entry(node, i);
if (entry->key == key) {
if (dest) memcpy(dest, entry, sizeof(btree_entry_t));
if (index) *index = i;
return page;
}
if (entry->key > key && entry->left) {
page = entry->left;
_btree_stack_push(tree, node, i);
break;
}
}
if (page == node->page && ~node->header.flags & NODE_IS_LEAF) {
page = entry->right;
_btree_stack_push(tree, node, node->header.entries - 1);
}
} while (page && ~node->header.flags & NODE_IS_LEAF);
return PAGE_NONE;
}
page_t btree_insert(btree_t *tree, record_t record)
{
record_key_t key = record.key;
btree_node_t node;
if (btree_find(tree, key, NULL, &node, NULL) != PAGE_NONE) {
printfv(VERBOSITY_DEBUG, "[btree] record with key %zu already exists.\n", key);
return PAGE_NONE;
}
btree_entry_t entry = { PAGE_NONE, key, 2137, PAGE_NONE };
_btree_insert_into_node(tree, &node, &entry);
return node.page;
}
void _btree_merge(btree_t *tree, btree_node_t* left, btree_node_t* right)
{
printfv(VERBOSITY_DEBUG, "[btree] merging %zu with %zu.\n", left->page, right->page);
size_t nl = left->header.entries,
nr = right->header.entries,
n = 0;
btree_stack_elem_t *parent = _btree_stack_pop(tree);
btree_entry_t middle = *parent->entry;
middle.left = NODE_ENTRY(left->entries, nl - 1)->right;
middle.right = NODE_ENTRY(right->entries, 0)->left;
void *buffer = alloca(SIZEOF_ENTRIES(nl + nr + 1));
n = _btree_concat_entries(buffer, n, left->entries, nl);
n = _btree_concat_entries(buffer, n, &middle, 1);
n = _btree_concat_entries(buffer, n, right->entries, nr);
memcpy(left->entries, buffer, SIZEOF_ENTRIES(n));
left->header.entries = n;
_btree_page_free(tree, right->page);
parent->entry->left = left->page;
parent->entry->right = left->page;
_btree_remove_from_node(tree, &parent->node, parent->position);
if (parent->node.header.flags & NODE_IS_ROOT && parent->node.header.entries == 0) {
_btree_page_free(tree, parent->node.page);
left->header.flags |= NODE_IS_ROOT;
tree->header.root = left->page;
printfv(VERBOSITY_DEBUG, "[btree] designated new root %zu.\n", left->page);
} else {
_btree_write_node(&parent->node, tree, parent->node.page);
}
_btree_write_node(left, tree, left->page);
}
void _btree_fix_underflow(btree_t *tree, btree_node_t *node) {
btree_node_t sibling;
btree_siblings_t siblings = _btree_get_siblings(tree, node->page);
btree_entry_t *entry;
if (siblings.left != PAGE_NONE) {
_btree_load_node(&sibling, tree, siblings.left);
entry = siblings.left_entry;
if (sibling.header.entries + node->header.entries >= 2*tree->header.d) {
printfv(VERBOSITY_DEBUG, "[btree] rebalancing with left sibling %zu.\n", siblings.left);
_btree_rebalance(tree, &sibling, node, siblings.left_entry);
return;
}
}
if (siblings.right != PAGE_NONE) {
_btree_load_node(&sibling, tree, siblings.right);
entry = siblings.right_entry;
if (sibling.header.entries + node->header.entries >= 2*tree->header.d) {
printfv(VERBOSITY_DEBUG, "[btree] rebalancing with right sibling %zu.\n", siblings.right);
_btree_rebalance(tree, node, &sibling, siblings.right_entry);
return;
}
}
printfv(VERBOSITY_DEBUG, "[btree] unable to rebalance.\n");
if (siblings.right != PAGE_NONE) {
_btree_merge(tree, node, &sibling);
} else {
_btree_merge(tree, &sibling, node);
}
}
void _btree_remove_from_node(btree_t *tree, btree_node_t *node, unsigned index)
{
printfv(VERBOSITY_DEBUG, "[btree] Removing %u entry from %zu.\n", index, node->page);
size_t n = node->header.entries;
void *buffer = alloca(SIZEOF_ENTRIES(n));
btree_entry_t* current;
unsigned m = 0;
for (unsigned i = 0; i < n; i++) {
current = NODE_ENTRY(node->entries, i);
if (i != index) {
m = _btree_concat_entries(buffer, m, current, 1);
}
}
memcpy(node->entries, buffer, SIZEOF_ENTRIES(node->header.entries));
node->header.entries = m;
if ((~node->header.flags & NODE_IS_ROOT) && node->header.entries < tree->header.d) {
printfv(VERBOSITY_DEBUG, "[btree] underflow in %zu.\n", node->page);
_btree_fix_underflow(tree, node);
} else {
_btree_write_node(node, tree, node->page);
}
}
page_t btree_remove(btree_t *tree, record_key_t key)
{
btree_node_t node;
btree_entry_t *entry;
unsigned index;
if (btree_find(tree, key, NULL, &node, &index) == PAGE_NONE) {
printfv(VERBOSITY_DEBUG, "[btree] record with key %zu does not exist.\n", key);
// 404
return PAGE_NONE;
}
entry = btree_get_entry(&node, index);
if (node.header.flags & NODE_IS_LEAF) {
printfv(VERBOSITY_DEBUG, "[btree] removing record with key %zu from leaf %zu.\n", entry->key, node.page);
_btree_remove_from_node(tree, &node, index);
_btree_write_node(&node, tree, node.page);
} else {
printfv(VERBOSITY_DEBUG, "[btree] removing record with key %zu from node %zu.\n", entry->key, node.page);
btree_node_t *replacement;
btree_entry_t *replaced;
_btree_stack_push(tree, &node, index);
replacement = &tree->trace.current->node;
_btree_load_node(&node, tree, entry->right);
while (~node.header.flags & NODE_IS_LEAF) {
_btree_stack_push(tree, &node, 0);
replaced = NODE_ENTRY(node.entries, 0);
_btree_load_node(&node, tree, replaced->left);
}
replaced = NODE_ENTRY(node.entries, 0);
entry = btree_get_entry(replacement, index);
printfv(VERBOSITY_DEBUG, "[btree] exchanging %zu with %zu.\n", entry->key, replaced->key);
SWAP(record_key_t, replaced->key, entry->key);
SWAP(offset_t, replaced->location, entry->location);
_btree_write_node(replacement, tree, replacement->page);
_btree_remove_from_node(tree, &node, 0);
}
return node.page;
}
btree_entry_t *btree_get_entry(btree_node_t* node, unsigned n)
{
if (n > node->header.entries) {
return NULL;
}
return (btree_entry_t*)(node->entries + NODE_ENTRY_OFFSET(n));
}
void btree_close(btree_t* tree)
{
printfv(VERBOSITY_DEBUG, "[btree] Closing index %s.\n", tree->file->filename);
file_close(tree->file);
}

77
index.h Normal file
View File

@ -0,0 +1,77 @@
#ifndef INDEX_H
#define INDEX_H
#include "io.h"
#include "record.h"
#include <inttypes.h>
#define SIZEOF_ENTRIES(n) (sizeof(btree_node_header_t) + (sizeof(btree_entry_t) - sizeof(page_t))*(n) + ((n) > 0 ? 1 : 0) * sizeof(page_t))
#define NODE_SIZE(node) SIZEOF_ENTRIES(node->header.entries)
#define NODE_SIZE_MAX (PAGE_SIZE - sizeof(btree_node_header_t))
#define NODE_ENTRY_OFFSET(n) (sizeof(btree_entry_t) - sizeof(page_t))*(n)
#define NODE_ENTRY(buffer, n) ((btree_entry_t*)((char*)buffer + NODE_ENTRY_OFFSET(n)))
#define ENTRY_INDEX(buffer, entry) (((char*)buffer - (char*)entry) / (sizeof(btree_entry_t) - sizeof(page_t)))
#define NODE_IS_ROOT 1
#define NODE_IS_LEAF 2
typedef struct {
size_t d; /* 8 bytes long */
size_t page_size; /* 8 bytes long */
page_t root; /* 8 bytes long */
} btree_header_t; /* 24 bytes long */
typedef struct {
uint16_t flags;
uint16_t entries;
} btree_node_header_t;
typedef struct {
btree_node_header_t header;
char entries[NODE_SIZE_MAX];
page_t page;
} btree_node_t;
typedef struct {
page_t left;
record_key_t key;
offset_t location;
page_t right;
} btree_entry_t;
typedef struct {
btree_node_t node;
btree_entry_t *entry;
unsigned position;
} btree_stack_elem_t;
typedef struct {
file_t *file;
btree_header_t header;
struct {
btree_stack_elem_t *current;
btree_stack_elem_t stack[100];
} trace;
} btree_t;
typedef struct {
page_t left;
page_t right;
btree_entry_t *left_entry;
btree_entry_t *right_entry;
} btree_siblings_t;
int btree_init(btree_t *tree, char* filename, size_t d);
int btree_open(btree_t *tree, char* filename);
page_t btree_insert(btree_t *tree, record_t record);
page_t btree_remove(btree_t *tree, record_key_t key);
page_t btree_find(btree_t *tree, record_key_t key, btree_entry_t *entry, btree_node_t *node, unsigned *index);
btree_entry_t *btree_get_entry(btree_node_t* node, unsigned n);
void btree_close(btree_t* tree);
#endif /* INDEX_H */

60
io.c Normal file
View File

@ -0,0 +1,60 @@
#include "io.h"
#include "common.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
file_t* file_open(const char* filename, const char* mode, size_t page_size)
{
FILE* handle = fopen(filename, mode);
if (!handle) {
printfv(VERBOSITY_NORMAL, "Can't open file %s.\n", filename);
return NULL;
}
printfv(VERBOSITY_DEBUG, "File %s opened in %s with page size %zu.\n", filename, mode, page_size);
file_t* result = malloc(sizeof(file_t));
result->page_size = page_size;
result->file = handle;
result->filename = malloc(strlen(filename) + 1);
result->buffer = malloc(page_size);
strcpy(result->filename, filename);
return result;
}
void file_close(file_t* file)
{
printfv(VERBOSITY_DEBUG, "Closing file %s.\n", file->filename);
fclose(file->file);
free(file->filename);
free(file->buffer);
free(file);
}
size_t file_read(file_t* file, unsigned block, void* buffer, size_t length)
{
memset(file->buffer, 0, file->page_size);
printfv(VERBOSITY_DEBUG, "Reading page %u of %s.\n", block, file->filename);
fseek(file->file, block * file->page_size, SEEK_SET);
int read = fread(file->buffer, 1, file->page_size, file->file);
memcpy(buffer, file->buffer, length);
return read;
}
size_t file_write(file_t* file, unsigned block, const void* buffer, size_t length)
{
memset(file->buffer, 0, file->page_size);
memcpy(file->buffer, buffer, length);
printfv(VERBOSITY_DEBUG, "Writing page %u to %s. (%zu bytes)\n", block, file->filename, length);
fseek(file->file, block * file->page_size, SEEK_SET);
return fwrite(file->buffer, 1, file->page_size, file->file);
}

28
io.h Normal file
View File

@ -0,0 +1,28 @@
#ifndef IO_H
#define IO_H
#define PAGE_SIZE 512
#define PAGE_NONE 0
#include <stdio.h>
#include <inttypes.h>
typedef struct {
FILE* file;
char* filename;
size_t page_size;
void* buffer;
} file_t;
typedef uint64_t page_t;
typedef uint64_t offset_t;
file_t* file_open(const char* filename, const char* mode, size_t page_size);
void file_close(file_t* file);
size_t file_read(file_t* file, unsigned block, void* buffer, size_t size);
size_t file_write(file_t* file, unsigned block, const void* buffer, size_t size);
#endif

68
makeidx.c Normal file
View File

@ -0,0 +1,68 @@
#include <stdlib.h>
#define OPTPARSE_IMPLEMENTATION
#define OPTPARSE_API static
#include "common.h"
#include "io.h"
#include "index.h"
typedef struct {
char* index;
unsigned d;
unsigned argc;
} opts_t;
opts_t options;
void init_args(int args, char* argv[])
{
optparse_t opts;
optparse_init(&opts, argv);
for (char opt; opt != -1; opt = optparse(&opts, "qv")) {
switch (opt) {
case 'q':
verbosity--;
break;
case 'v':
verbosity++;
break;
}
}
char* argument;
options.index = optparse_arg(&opts);
if ((argument = optparse_arg(&opts))) {
options.d = strtoul(argument, NULL, 0);
} else {
options.d = 10;
}
options.argc = opts.optind;
}
void help(const char* name) {
printf(
"Usage:\n"
"\t %s index [d]\n"
,
name
);
}
int main(int argc, char* argv[])
{
init_args(argc, argv);
if (options.argc < 2) {
help(argv[0]);
return 0;
}
btree_t tree;
btree_init(&tree, options.index, options.d);
btree_close(&tree);
return EXIT_SUCCESS;
}

309
openidx.c Normal file
View File

@ -0,0 +1,309 @@
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include <sys/signal.h>
#include <readline/readline.h>
#include <readline/history.h>
#define OPTPARSE_IMPLEMENTATION
#define OPTPARSE_API static
#include "common.h"
#include "index.h"
#define RESULT_EXIT -1
#define RESULT_OK 0
typedef int result_t;
typedef struct {
const char* command;
const char* help;
result_t (*function)(const char* command, char* argline);
} command_t;
typedef struct {
char* index;
} opts_t;
opts_t options;
btree_t tree;
void init_args(int args, char* argv[])
{
optparse_t opts;
optparse_init(&opts, argv);
for (char opt; opt != -1; opt = optparse(&opts, "qv")) {
switch (opt) {
case 'q':
verbosity--;
break;
case 'v':
verbosity++;
break;
}
}
options.index = optparse_arg(&opts);
}
void help(const char* name) {
printf(
"%s - some help"
,
name
);
}
result_t help_command(const char* command, char* args);
result_t exit_command(const char* command, char* args);
result_t insert_command(const char* command, char* args);
result_t dump_command(const char* command, char* args);
result_t print_command(const char* command, char* args);
result_t records_command(const char* command, char* args);
result_t find_command(const char* command, char* args);
result_t delete_command(const char* command, char* args);
result_t verbosity_command(const char* command, char* args);
static command_t commands[] = {
{ "help", "Prints out help", help_command },
{ "exit", "Self explanatory", exit_command },
{ "insert", "Adds record to index", insert_command },
{ "dump", "Dumps given page", dump_command },
{ "print", "Prints tree", print_command },
{ "records", "Prints records of given page", records_command },
{ "find", "Finds record", find_command },
{ "delete", "Deletes record", delete_command },
{ "verbosity", "Changes the verbosity", verbosity_command },
};
result_t exit_command(const char* command, char* args)
{
return RESULT_EXIT;
}
result_t verbosity_command(const char* command, char* args)
{
verbosity_t v;
char buffer[PAGE_SIZE];
if (sscanf(args, "%d", &v) == 1) {
verbosity = v;
} else {
printf("Usage: verbosity new\n");
}
return RESULT_OK;
}
result_t dump_command(const char* command, char* args)
{
page_t page;
char buffer[PAGE_SIZE];
if (sscanf(args, "%zu", &page) == 1) {
file_read(tree.file, page, buffer, PAGE_SIZE);
hexdump(buffer, PAGE_SIZE);
} else {
printf("Usage: dump page\n");
}
return RESULT_OK;
}
result_t insert_command(const char* command, char* args)
{
record_t record;
if (sscanf(args, "%u %lf %lf", &record.key, &record.x, &record.y) == 3) {
btree_insert(&tree, record);
} else {
printf("usage: insert key x y\n");
}
return RESULT_OK;
}
result_t find_command(const char* command, char* args)
{
record_key_t key;
if (sscanf(args, "%u", &key) == 1) {
btree_entry_t entry;
page_t page;
if ((page = btree_find(&tree, key, &entry, NULL, NULL))) {
printf("Record %u found on page %zu, offset: %lu\n", key, page, entry.location);
} else {
printf("404 Not found\n");
}
} else {
printf("Usage: find key\n");
}
return RESULT_OK;
}
result_t delete_command(const char* command, char* args)
{
record_key_t key;
if (sscanf(args, "%u", &key) == 1) {
page_t page;
if ((page = btree_remove(&tree, key))) {
printf("Record %u removed from page %zu\n", key, page);
} else {
printf("404 Not found\n");
}
} else {
printf("Usage: find key\n");
}
return RESULT_OK;
}
result_t records_command(const char* command, char* args)
{
page_t page;
char buffer[PAGE_SIZE];
if (sscanf(args, "%zu", &page) == 1) {
file_read(tree.file, page, buffer, PAGE_SIZE);
btree_node_t node;
memcpy(&node.header, buffer, sizeof(node.header));
memcpy(&node.entries, buffer + sizeof(node.header), NODE_SIZE_MAX);
printf(
"Node %zu, entries: %u, flags: [%c%c]\n",
page, node.header.entries,
node.header.flags & NODE_IS_ROOT ? 'R' : ' ',
node.header.flags & NODE_IS_LEAF ? 'L' : ' '
);
printf("Records in node (%u):\n", node.header.entries);
for (int i = 0; i < node.header.entries; i++) {
btree_entry_t *entry = btree_get_entry(&node, i);
printf(" %zu < %u [0x%zx] > %zu\n", entry->left, entry->key, entry->location, entry->right);
}
} else {
printf("Usage: print page\n");
}
return RESULT_OK;
}
void print_page(page_t page, unsigned depth, unsigned current)
{
char buffer[PAGE_SIZE];
char prefix[1024] = {};
memset(prefix, ' ', current * 2);
if (!depth || !page) return;
file_read(tree.file, page, buffer, PAGE_SIZE);
btree_node_t node;
memcpy(&node.header, buffer, sizeof(node.header));
memcpy(&node.entries, buffer + sizeof(node.header), NODE_SIZE_MAX);
if (node.header.entries == 0) {
printf("%sempty\n", prefix);
return;
}
btree_entry_t *entry;
for (int i = 0; i < node.header.entries; i++) {
entry = btree_get_entry(&node, i);
print_page(entry->left, depth - 1, current + 1);
printf("%s%zu < %u [0x%zx] > %zu\n", prefix, entry->left, entry->key, entry->location, entry->right);
}
print_page(entry->right, depth - 1, current + 1);
}
result_t print_command(const char* command, char* args)
{
page_t page = 0;
unsigned depth = 4;
if (sscanf(args, "%zu %u", &page, &depth) >= 1) {
print_page(page ? page : tree.header.root, depth, 0);
} else {
printf("Usage: print page\n");
}
return RESULT_OK;
}
result_t help_command(const char* command, char* args)
{
size_t count = sizeof(commands) / sizeof(command_t);
printf("Available commands (%zu): \n", count);
for (size_t i = 0; i < count; i++) {
printf("\t%s - %s\n", commands[i].command, commands[i].help);
}
return RESULT_OK;
}
command_t* get_command(char* argline)
{
char command[128];
argline = strtok(argline, " \t\n\r");
if (!argline) {
return NULL;
}
strcpy(command, argline);
size_t count = sizeof(commands) / sizeof(command_t);
for (size_t i = 0; i < count; i++) {
if (strcmp(commands[i].command, command) == 0) {
return commands + i;
}
}
return NULL;
}
void handle_ctrlc(int sig)
{
printf("\nCtrl-C - exiting gracefully...\n");
btree_close(&tree);
exit(0);
}
int main(int argc, char* argv[])
{
char *line, prompt[1024];
command_t* command;
signal(SIGINT, handle_ctrlc);
init_args(argc, argv);
if (btree_open(&tree, options.index) == 0) {
sprintf(prompt, "%s> ", options.index);
while ((line = readline(prompt))) {
command = get_command(line);
if (!command) {
printf("Unknown command!\n");
continue;
}
if (command->function(line, line + strlen(line) + 1) == RESULT_EXIT) {
break;
}
}
btree_close(&tree);
return EXIT_SUCCESS;
}
return -1;
}

403
optparse.h Normal file
View File

@ -0,0 +1,403 @@
/* Optparse --- portable, reentrant, embeddable, getopt-like option parser
*
* This is free and unencumbered software released into the public domain.
*
* To get the implementation, define OPTPARSE_IMPLEMENTATION.
* Optionally define OPTPARSE_API to control the API's visibility
* and/or linkage (static, __attribute__, __declspec).
*
* The POSIX getopt() option parser has three fatal flaws. These flaws
* are solved by Optparse.
*
* 1) Parser state is stored entirely in global variables, some of
* which are static and inaccessible. This means only one thread can
* use getopt(). It also means it's not possible to recursively parse
* nested sub-arguments while in the middle of argument parsing.
* Optparse fixes this by storing all state on a local struct.
*
* 2) The POSIX standard provides no way to properly reset the parser.
* This means for portable code that getopt() is only good for one
* run, over one argv with one option string. It also means subcommand
* options cannot be processed with getopt(). Most implementations
* provide a method to reset the parser, but it's not portable.
* Optparse provides an optparse_arg() function for stepping over
* subcommands and continuing parsing of options with another option
* string. The Optparse struct itself can be passed around to
* subcommand handlers for additional subcommand option parsing. A
* full reset can be achieved by with an additional optparse_init().
*
* 3) Error messages are printed to stderr. This can be disabled with
* opterr, but the messages themselves are still inaccessible.
* Optparse solves this by writing an error message in its errmsg
* field. The downside to Optparse is that this error message will
* always be in English rather than the current locale.
*
* Optparse should be familiar with anyone accustomed to getopt(), and
* it could be a nearly drop-in replacement. The option string is the
* same and the fields have the same names as the getopt() global
* variables (optarg, optind, optopt).
*
* Optparse also supports GNU-style long options with optparse_long().
* The interface is slightly different and simpler than getopt_long().
*
* By default, argv is permuted as it is parsed, moving non-option
* arguments to the end. This can be disabled by setting the `permute`
* field to 0 after initialization.
*/
#ifndef OPTPARSE_H
#define OPTPARSE_H
#ifndef OPTPARSE_API
# define OPTPARSE_API
#endif
struct optparse {
char **argv;
int permute;
int optind;
int optopt;
char *optarg;
char errmsg[64];
int subopt;
};
enum optparse_argtype {
OPTPARSE_NONE,
OPTPARSE_REQUIRED,
OPTPARSE_OPTIONAL
};
struct optparse_long {
const char *longname;
int shortname;
enum optparse_argtype argtype;
};
/**
* Initializes the parser state.
*/
OPTPARSE_API
void optparse_init(struct optparse *options, char **argv);
/**
* Read the next option in the argv array.
* @param optstring a getopt()-formatted option string.
* @return the next option character, -1 for done, or '?' for error
*
* Just like getopt(), a character followed by no colons means no
* argument. One colon means the option has a required argument. Two
* colons means the option takes an optional argument.
*/
OPTPARSE_API
int optparse(struct optparse *options, const char *optstring);
/**
* Handles GNU-style long options in addition to getopt() options.
* This works a lot like GNU's getopt_long(). The last option in
* longopts must be all zeros, marking the end of the array. The
* longindex argument may be NULL.
*/
OPTPARSE_API
int optparse_long(struct optparse *options,
const struct optparse_long *longopts,
int *longindex);
/**
* Used for stepping over non-option arguments.
* @return the next non-option argument, or NULL for no more arguments
*
* Argument parsing can continue with optparse() after using this
* function. That would be used to parse the options for the
* subcommand returned by optparse_arg(). This function allows you to
* ignore the value of optind.
*/
OPTPARSE_API
char *optparse_arg(struct optparse *options);
/* Implementation */
#ifdef OPTPARSE_IMPLEMENTATION
#define OPTPARSE_MSG_INVALID "invalid option"
#define OPTPARSE_MSG_MISSING "option requires an argument"
#define OPTPARSE_MSG_TOOMANY "option takes no arguments"
static int
optparse_error(struct optparse *options, const char *msg, const char *data)
{
unsigned p = 0;
const char *sep = " -- '";
while (*msg)
options->errmsg[p++] = *msg++;
while (*sep)
options->errmsg[p++] = *sep++;
while (p < sizeof(options->errmsg) - 2 && *data)
options->errmsg[p++] = *data++;
options->errmsg[p++] = '\'';
options->errmsg[p++] = '\0';
return '?';
}
OPTPARSE_API
void
optparse_init(struct optparse *options, char **argv)
{
options->argv = argv;
options->permute = 1;
options->optind = 1;
options->subopt = 0;
options->optarg = 0;
options->errmsg[0] = '\0';
}
static int
optparse_is_dashdash(const char *arg)
{
return arg != 0 && arg[0] == '-' && arg[1] == '-' && arg[2] == '\0';
}
static int
optparse_is_shortopt(const char *arg)
{
return arg != 0 && arg[0] == '-' && arg[1] != '-' && arg[1] != '\0';
}
static int
optparse_is_longopt(const char *arg)
{
return arg != 0 && arg[0] == '-' && arg[1] == '-' && arg[2] != '\0';
}
static void
optparse_permute(struct optparse *options, int index)
{
char *nonoption = options->argv[index];
int i;
for (i = index; i < options->optind - 1; i++)
options->argv[i] = options->argv[i + 1];
options->argv[options->optind - 1] = nonoption;
}
static int
optparse_argtype(const char *optstring, char c)
{
int count = OPTPARSE_NONE;
if (c == ':')
return -1;
for (; *optstring && c != *optstring; optstring++);
if (!*optstring)
return -1;
if (optstring[1] == ':')
count += optstring[2] == ':' ? 2 : 1;
return count;
}
OPTPARSE_API
int
optparse(struct optparse *options, const char *optstring)
{
int type;
char *next;
char *option = options->argv[options->optind];
options->errmsg[0] = '\0';
options->optopt = 0;
options->optarg = 0;
if (option == 0) {
return -1;
} else if (optparse_is_dashdash(option)) {
options->optind++; /* consume "--" */
return -1;
} else if (!optparse_is_shortopt(option)) {
if (options->permute) {
int index = options->optind++;
int r = optparse(options, optstring);
optparse_permute(options, index);
options->optind--;
return r;
} else {
return -1;
}
}
option += options->subopt + 1;
options->optopt = option[0];
type = optparse_argtype(optstring, option[0]);
next = options->argv[options->optind + 1];
switch (type) {
case -1: {
char str[2] = {0, 0};
str[0] = option[0];
options->optind++;
return optparse_error(options, OPTPARSE_MSG_INVALID, str);
}
case OPTPARSE_NONE:
if (option[1]) {
options->subopt++;
} else {
options->subopt = 0;
options->optind++;
}
return option[0];
case OPTPARSE_REQUIRED:
options->subopt = 0;
options->optind++;
if (option[1]) {
options->optarg = option + 1;
} else if (next != 0) {
options->optarg = next;
options->optind++;
} else {
char str[2] = {0, 0};
str[0] = option[0];
options->optarg = 0;
return optparse_error(options, OPTPARSE_MSG_MISSING, str);
}
return option[0];
case OPTPARSE_OPTIONAL:
options->subopt = 0;
options->optind++;
if (option[1])
options->optarg = option + 1;
else
options->optarg = 0;
return option[0];
}
return 0;
}
OPTPARSE_API
char *
optparse_arg(struct optparse *options)
{
char *option = options->argv[options->optind];
options->subopt = 0;
if (option != 0)
options->optind++;
return option;
}
static int
optparse_longopts_end(const struct optparse_long *longopts, int i)
{
return !longopts[i].longname && !longopts[i].shortname;
}
static void
optparse_from_long(const struct optparse_long *longopts, char *optstring)
{
char *p = optstring;
int i;
for (i = 0; !optparse_longopts_end(longopts, i); i++) {
if (longopts[i].shortname) {
int a;
*p++ = longopts[i].shortname;
for (a = 0; a < (int)longopts[i].argtype; a++)
*p++ = ':';
}
}
*p = '\0';
}
/* Unlike strcmp(), handles options containing "=". */
static int
optparse_longopts_match(const char *longname, const char *option)
{
const char *a = option, *n = longname;
if (longname == 0)
return 0;
for (; *a && *n && *a != '='; a++, n++)
if (*a != *n)
return 0;
return *n == '\0' && (*a == '\0' || *a == '=');
}
/* Return the part after "=", or NULL. */
static char *
optparse_longopts_arg(char *option)
{
for (; *option && *option != '='; option++);
if (*option == '=')
return option + 1;
else
return 0;
}
static int
optparse_long_fallback(struct optparse *options,
const struct optparse_long *longopts,
int *longindex)
{
int result;
char optstring[96 * 3 + 1]; /* 96 ASCII printable characters */
optparse_from_long(longopts, optstring);
result = optparse(options, optstring);
if (longindex != 0) {
*longindex = -1;
if (result != -1) {
int i;
for (i = 0; !optparse_longopts_end(longopts, i); i++)
if (longopts[i].shortname == options->optopt)
*longindex = i;
}
}
return result;
}
OPTPARSE_API
int
optparse_long(struct optparse *options,
const struct optparse_long *longopts,
int *longindex)
{
int i;
char *option = options->argv[options->optind];
if (option == 0) {
return -1;
} else if (optparse_is_dashdash(option)) {
options->optind++; /* consume "--" */
return -1;
} else if (optparse_is_shortopt(option)) {
return optparse_long_fallback(options, longopts, longindex);
} else if (!optparse_is_longopt(option)) {
if (options->permute) {
int index = options->optind++;
int r = optparse_long(options, longopts, longindex);
optparse_permute(options, index);
options->optind--;
return r;
} else {
return -1;
}
}
/* Parse as long option. */
options->errmsg[0] = '\0';
options->optopt = 0;
options->optarg = 0;
option += 2; /* skip "--" */
options->optind++;
for (i = 0; !optparse_longopts_end(longopts, i); i++) {
const char *name = longopts[i].longname;
if (optparse_longopts_match(name, option)) {
char *arg;
if (longindex)
*longindex = i;
options->optopt = longopts[i].shortname;
arg = optparse_longopts_arg(option);
if (longopts[i].argtype == OPTPARSE_NONE && arg != 0) {
return optparse_error(options, OPTPARSE_MSG_TOOMANY, name);
} if (arg != 0) {
options->optarg = arg;
} else if (longopts[i].argtype == OPTPARSE_REQUIRED) {
options->optarg = options->argv[options->optind];
if (options->optarg == 0)
return optparse_error(options, OPTPARSE_MSG_MISSING, name);
else
options->optind++;
}
return options->optopt;
}
}
return optparse_error(options, OPTPARSE_MSG_INVALID, option);
}
#endif /* OPTPARSE_IMPLEMENTATION */
#endif /* OPTPARSE_H */

15
record.h Normal file
View File

@ -0,0 +1,15 @@
#ifndef RECORD_H
#define RECORD_H
#include <inttypes.h>
typedef uint32_t record_key_t;
typedef struct {
record_key_t key;
double x;
double y;
} record_t;
#endif /* RECORD_H */