SBDP02/index.c
2018-12-15 18:41:29 +01:00

743 lines
22 KiB
C

#include <memory.h>
#include <stdbool.h>
#include "index.h"
#include "io.h"
#include "common.h"
#include "bitmap.h"
#include <libgen.h>
#include <linux/limits.h>
#include <unistd.h>
#define SWAP(type, x, y) do { type __tmp__; __tmp__ = y; y = x; x = __tmp__; } while (0)
/* private functions */
void _btree_insert_into_node(btree_t *tree, btree_node_t *node, btree_entry_t *entry);
void _btree_remove_from_node(btree_t *tree, btree_node_t *node, unsigned index);
void _btree_optimize_if_needed(btree_t *tree) ;
int _btree_save_header(btree_t *tree)
{
return file_write(tree->file, 0, &tree->header, sizeof(btree_header_t));
}
page_t _btree_alloc(btree_t *tree)
{
page_t page = 1, bitmap_page = 1;
long long bit;
uint8_t bitmap[PAGE_SIZE];
do {
file_read(tree->file, bitmap_page, bitmap, PAGE_SIZE);
bit = bitmap_find_first(bitmap, PAGE_SIZE);
if (bit == -1) {
bitmap_page += PAGE_SIZE * 8 + 1;
page += PAGE_SIZE * 8 + 1;
}
} while (bit == -1);
bitmap_set(bitmap, bit);
file_write(tree->file, bitmap_page, bitmap, PAGE_SIZE);
page += bit + 1;
printfv(VERBOSITY_VERBOSE, "[alloc] allocated page %zu.\n", page);
return page;
}
void _btree_page_free(btree_t *tree, page_t page)
{
printfv(VERBOSITY_VERBOSE, "[alloc] freeing page %zu.\n", page);
uint8_t bitmap[PAGE_SIZE];
file_read(tree->file, 1 + (page / (PAGE_SIZE * 8)) * PAGE_SIZE, bitmap, PAGE_SIZE);
bitmap_unset(bitmap, page - 2);
file_write(tree->file, 1, bitmap, PAGE_SIZE);
}
btree_node_t* _btree_load_node(btree_node_t *node, btree_t *tree, page_t page)
{
char buffer[PAGE_SIZE];
file_read(tree->file, page, buffer, PAGE_SIZE);
memcpy(&node->header, buffer, sizeof(btree_node_header_t));
memcpy(node->entries, buffer + sizeof(btree_node_header_t), NODE_SIZE_MAX);
node->page = page;
return node;
}
void _btree_write_node(btree_node_t *node, btree_t *tree, page_t page)
{
char buffer[PAGE_SIZE] = {};
node->page = page;
memcpy(buffer, &node->header, sizeof(btree_node_header_t));
memcpy(buffer + sizeof(btree_node_header_t), node->entries, NODE_SIZE(node) - sizeof(btree_node_header_t));
file_write(tree->file, page, buffer, NODE_SIZE(node));
}
bool _btree_stack_empty(btree_t *tree)
{
return tree->trace.current < tree->trace.stack;
}
void _btree_stack_push(btree_t *tree, btree_node_t *node, unsigned n)
{
printfv(VERBOSITY_VERBOSE, "[btree] Pushing %zu page on parent stack, entry index %u\n", node->page, n);
tree->trace.current++;
tree->trace.current->node = *node;
tree->trace.current->entry = btree_get_entry(&tree->trace.current->node, n);
tree->trace.current->position = n;
}
btree_stack_elem_t* _btree_stack_pop(btree_t *tree)
{
if (_btree_stack_empty(tree)) {
return NULL;
}
return tree->trace.current--;
}
void _btree_stack_clear(btree_t *tree)
{
tree->trace.current = tree->trace.stack - 1;
}
unsigned _btree_concat_entries(void* current, size_t na, void* added, size_t nb)
{
// allocate temporary buffer on a stack
void* buffer = alloca(SIZEOF_ENTRIES(na + nb));
unsigned k = 0;
page_t last = PAGE_NONE;
for (unsigned i = 0, j = 0; i < na || j < nb; k++) {
btree_entry_t *dest = NODE_ENTRY(buffer, k);
if (i >= na) {
*dest = *NODE_ENTRY(added, j++);
} else if (j >= nb) {
*dest = *NODE_ENTRY(current, i++);
} else {
btree_entry_t *a = NODE_ENTRY(current, i);
btree_entry_t *b = NODE_ENTRY(added, j);
if (a->key < b->key) {
*dest = *a;
i++;
} else {
*dest = *b;
j++;
}
}
if (last) {
dest->left = last;
}
last = dest->right;
}
// copy from buffer to first operand
memcpy(current, buffer, SIZEOF_ENTRIES(na + nb));
return k;
}
void _btree_node_insert_entry(btree_node_t* node, btree_entry_t entry)
{
printfv(VERBOSITY_VERBOSE, "[btree] inserting { %zu < %zu > %zu } on %zu\n", entry.left, entry.key, entry.right, node->page);
_btree_concat_entries(node->entries, node->header.entries, &entry, 1);
node->header.entries++;
}
void _btree_init(btree_t *tree, char* mode)
{
char path[PATH_MAX];
strcpy(path, tree->header.main);
char *dir = dirname(path);
sprintf(path, "%s/%s", dir, tree->header.main);
_btree_stack_clear(tree);
tree->main = tape_open(path, mode);
}
int btree_init(btree_t *tree, char* filename, size_t d)
{
char path[PATH_MAX];
strcpy(path, filename);
char* ext = strrchr(path, '.');
if (ext) *ext = 0;
sprintf(tree->header.main, "%s.dat", path);
printfv(VERBOSITY_DEBUG, "[btree] Initializing btree in file %s d=%zu, main=%s.\n", filename, d, tree->header.main);
tree->file = file_open(filename, "w+");
if (!tree->file) {
fprintf(stderr, "Cannot open file %s.\n", filename);
return BTREE_ERR_CANNOT_OPEN_FILE;
}
tree->header.d = d;
tree->header.page_size = PAGE_SIZE;
tree->header.root = _btree_alloc(tree);
tree->header.records = 0;
tree->header.changes = 0;
_btree_save_header(tree);
btree_node_t root;
root.header.flags = NODE_IS_ROOT | NODE_IS_LEAF;
root.header.entries = 0;
_btree_write_node(&root, tree, tree->header.root);
_btree_init(tree, "w");
return 0;
}
int btree_open(btree_t *tree, char* filename)
{
printfv(VERBOSITY_DEBUG, "[btree] Trying to open btree index in file %s\n", filename);
tree->file = file_open(filename, "r+");
if (!tree->file) {
fprintf(stderr, "Cannot open file %s.\n", filename);
return BTREE_ERR_PAGE_SIZE_DIFFERENT;
}
file_read(tree->file, 0, &tree->header, sizeof(btree_header_t));
printfv(VERBOSITY_DEBUG, "[btree] Found BTREE d=%zu, ps=%zu, root=%" PRIu64 ", mainfile=%s\n", tree->header.d, tree->header.page_size, tree->header.root, tree->header.main);
if (tree->header.page_size != PAGE_SIZE) {
fprintf(stderr, "BTree page size mismatch, expecting %d got %zu, closing.\n", PAGE_SIZE, tree->header.page_size);
file_close(tree->file);
return BTREE_ERR_PAGE_SIZE_DIFFERENT;
}
_btree_init(tree, "rb+");
return 0;
}
btree_siblings_t _btree_get_siblings(btree_t *tree, page_t page)
{
btree_siblings_t result = { PAGE_NONE, PAGE_NONE, NULL, NULL };
if (_btree_stack_empty(tree)) {
return result;
}
btree_node_t *parent = &tree->trace.current->node;
size_t total = parent->header.entries + 1;
for (unsigned i = 0; i < total; i++) {
page_t current = *(page_t*)NODE_ENTRY(parent->entries, i);
if (current == page) {
if (i > 0) {
result.left = *(page_t*)NODE_ENTRY(parent->entries, i - 1);
result.left_entry = NODE_ENTRY(parent->entries, i - 1);
}
if (i < total) {
result.right = *(page_t*)NODE_ENTRY(parent->entries, i + 1);
result.right_entry = NODE_ENTRY(parent->entries, i);
}
break;
}
}
return result;
}
void _btree_rebalance(btree_t *tree, btree_node_t *left, btree_node_t *right, btree_entry_t* parent)
{
btree_node_t *pnode = &tree->trace.current->node;
size_t nl = left->header.entries;
size_t nr = right->header.entries;
size_t n = nl;
btree_entry_t rotated = *parent;
rotated.left = NODE_ENTRY(left->entries, nl - 1)->right;
rotated.right = NODE_ENTRY(right->entries, 0)->left;
void* buffer = alloca(SIZEOF_ENTRIES(nl + nr + 1));
memcpy(buffer, left->entries, SIZEOF_ENTRIES(nl));
n = _btree_concat_entries(buffer, n, right->entries, nr);
n = _btree_concat_entries(buffer, n, &rotated, 1);
size_t pivot = n / 2;
left->header.entries = 0;
right->header.entries = 0;
for (unsigned i = 0; i < n; i++) {
btree_entry_t* entry = NODE_ENTRY(buffer, i);
if (i < pivot) {
_btree_node_insert_entry(left, *entry);
} else if (i == pivot) {
parent->key = entry->key;
parent->location = entry->location;
} else {
_btree_node_insert_entry(right, *entry);
}
}
_btree_write_node(left, tree, left->page);
_btree_write_node(right, tree, right->page);
_btree_write_node(pnode, tree, pnode->page);
}
bool _btree_compensate_insert(btree_t *tree, btree_node_t *old, btree_entry_t *entry)
{
btree_siblings_t siblings = _btree_get_siblings(tree, old->page);
btree_node_t other;
if (siblings.left) {
_btree_load_node(&other, tree, siblings.left);
if (other.header.entries < 2*tree->header.d) {
_btree_node_insert_entry(old, *entry);
_btree_rebalance(tree, &other, old, siblings.left_entry);
return true;
}
}
if (siblings.right) {
_btree_load_node(&other, tree, siblings.right);
if (other.header.entries < 2*tree->header.d) {
_btree_node_insert_entry(old, *entry);
_btree_rebalance(tree, old, &other, siblings.right_entry);
return true;
}
}
return false;
}
void _btree_split_node(btree_t *tree, btree_node_t *old, btree_entry_t *entry)
{
btree_node_t *parent = NULL;
size_t n = old->header.entries;
size_t half = n / 2;
void *buffer = alloca(SIZEOF_ENTRIES(n + 1));
memcpy(buffer, old->entries, SIZEOF_ENTRIES(n));
_btree_concat_entries(buffer, n, entry, 1);
if (!_btree_stack_empty(tree)) {
btree_stack_elem_t *trace = _btree_stack_pop(tree);
parent = &trace->node;
}
btree_node_t left = {
.header = {
.flags = old->header.flags & NODE_IS_LEAF,
.entries = 0,
},
.page = old->page,
};
btree_node_t right = {
.header = {
.flags = old->header.flags & NODE_IS_LEAF,
.entries = 0,
},
.page = _btree_alloc(tree),
};
if (!parent) {
parent = alloca(sizeof(btree_node_t));
parent->header = (btree_node_header_t){
.flags = NODE_IS_ROOT,
.entries = 0,
};
parent->page = _btree_alloc(tree);
tree->header.root = parent->page;
_btree_save_header(tree);
printfv(VERBOSITY_DEBUG, "[btree] Designated new root %zu.\n", parent->page);
};
printfv(VERBOSITY_DEBUG, "[btree] Spliting node %zu.\n", old->page);
btree_entry_t parent_entry = *NODE_ENTRY(buffer, half);
parent_entry.left = left.page;
parent_entry.right = right.page;
_btree_insert_into_node(tree, parent, &parent_entry);
for (unsigned int i = 0; i < half; i++) {
_btree_node_insert_entry(&left, *NODE_ENTRY(buffer, i));
_btree_node_insert_entry(&right, *NODE_ENTRY(buffer, half + i + 1));
}
_btree_write_node(&left, tree, left.page);
_btree_write_node(&right, tree, right.page);
}
void _btree_insert_into_node(btree_t *tree, btree_node_t *node, btree_entry_t *entry)
{
printfv(VERBOSITY_DEBUG, "[btree] Inserting onto page %zu.\n", node->page);
if (node->header.entries < 2 * tree->header.d) {
_btree_node_insert_entry(node, *entry);
_btree_write_node(node, tree, node->page);
return;
}
printfv(VERBOSITY_DEBUG, "[btree] %zu Overflow!\n", node->page);
if (_btree_compensate_insert(tree, node, entry)) {
return;
}
printfv(VERBOSITY_DEBUG, "[btree] Unable to compensate %zu!\n", node->page);
_btree_split_node(tree, node, entry);
}
page_t btree_find(btree_t *tree, record_key_t key, btree_entry_t* dest, btree_node_t *node, unsigned *index)
{
if (!node) {
node = alloca(sizeof(btree_node_t));
}
page_t page = tree->header.root;
btree_entry_t *entry;
_btree_stack_clear(tree);
do {
_btree_load_node(node, tree, page);
for(unsigned i = 0; i < node->header.entries; i++) {
entry = btree_get_entry(node, i);
if (entry->key == key) {
if (dest) memcpy(dest, entry, sizeof(btree_entry_t));
if (index) *index = i;
return page;
}
if (entry->key > key && entry->left) {
page = entry->left;
_btree_stack_push(tree, node, i);
break;
}
}
if (page == node->page && ~node->header.flags & NODE_IS_LEAF) {
page = entry->right;
_btree_stack_push(tree, node, node->header.entries - 1);
}
} while (page && ~node->header.flags & NODE_IS_LEAF);
return PAGE_NONE;
}
page_t btree_insert(btree_t *tree, record_t record)
{
record_key_t key = record.key;
btree_node_t node;
if (btree_find(tree, key, NULL, &node, NULL) != PAGE_NONE) {
printfv(VERBOSITY_DEBUG, "[btree] Record with key %zu already exists.\n", key);
return PAGE_NONE;
}
offset_t offset = tape_append(tree->main, &record, sizeof(record));
btree_entry_t entry = { PAGE_NONE, key, offset, PAGE_NONE };
_btree_insert_into_node(tree, &node, &entry);
tree->header.changes++;
tree->header.records++;
_btree_optimize_if_needed(tree);
return node.page;
}
void _btree_merge(btree_t *tree, btree_node_t* left, btree_node_t* right)
{
printfv(VERBOSITY_DEBUG, "[btree] Merging %zu with %zu.\n", left->page, right->page);
size_t nl = left->header.entries,
nr = right->header.entries,
n = 0;
btree_stack_elem_t *parent = _btree_stack_pop(tree);
btree_entry_t middle = *parent->entry;
middle.left = NODE_ENTRY(left->entries, nl - 1)->right;
middle.right = NODE_ENTRY(right->entries, 0)->left;
void *buffer = alloca(SIZEOF_ENTRIES(nl + nr + 1));
n = _btree_concat_entries(buffer, n, left->entries, nl);
n = _btree_concat_entries(buffer, n, &middle, 1);
n = _btree_concat_entries(buffer, n, right->entries, nr);
memcpy(left->entries, buffer, SIZEOF_ENTRIES(n));
left->header.entries = n;
_btree_page_free(tree, right->page);
parent->entry->left = left->page;
parent->entry->right = left->page;
_btree_remove_from_node(tree, &parent->node, parent->position);
if (parent->node.header.flags & NODE_IS_ROOT && parent->node.header.entries == 0) {
_btree_page_free(tree, parent->node.page);
left->header.flags |= NODE_IS_ROOT;
tree->header.root = left->page;
printfv(VERBOSITY_DEBUG, "[btree] Designated new root %zu.\n", left->page);
} else {
_btree_write_node(&parent->node, tree, parent->node.page);
}
_btree_write_node(left, tree, left->page);
}
void _btree_fix_underflow(btree_t *tree, btree_node_t *node) {
btree_node_t sibling;
btree_siblings_t siblings = _btree_get_siblings(tree, node->page);
btree_entry_t *entry;
if (siblings.left != PAGE_NONE) {
_btree_load_node(&sibling, tree, siblings.left);
entry = siblings.left_entry;
if (sibling.header.entries + node->header.entries >= 2*tree->header.d) {
printfv(VERBOSITY_VERBOSE, "[btree] rebalancing with left sibling %zu.\n", siblings.left);
_btree_rebalance(tree, &sibling, node, siblings.left_entry);
return;
}
}
if (siblings.right != PAGE_NONE) {
_btree_load_node(&sibling, tree, siblings.right);
entry = siblings.right_entry;
if (sibling.header.entries + node->header.entries >= 2*tree->header.d) {
printfv(VERBOSITY_VERBOSE, "[btree] rebalancing with right sibling %zu.\n", siblings.right);
_btree_rebalance(tree, node, &sibling, siblings.right_entry);
return;
}
}
printfv(VERBOSITY_DEBUG, "[btree] Enable to rebalance.\n");
if (siblings.right != PAGE_NONE) {
_btree_merge(tree, node, &sibling);
} else {
_btree_merge(tree, &sibling, node);
}
}
void _btree_remove_from_node(btree_t *tree, btree_node_t *node, unsigned index)
{
printfv(VERBOSITY_DEBUG, "[btree] Removing %u entry from %zu.\n", index, node->page);
size_t n = node->header.entries;
void *buffer = alloca(SIZEOF_ENTRIES(n));
btree_entry_t* current;
unsigned m = 0;
for (unsigned i = 0; i < n; i++) {
current = NODE_ENTRY(node->entries, i);
if (i != index) {
m = _btree_concat_entries(buffer, m, current, 1);
}
}
memcpy(node->entries, buffer, SIZEOF_ENTRIES(node->header.entries));
node->header.entries = m;
if ((~node->header.flags & NODE_IS_ROOT) && node->header.entries < tree->header.d) {
printfv(VERBOSITY_DEBUG, "[btree] Underflow in %zu.\n", node->page);
_btree_fix_underflow(tree, node);
} else {
_btree_write_node(node, tree, node->page);
}
}
page_t btree_remove(btree_t *tree, record_key_t key)
{
btree_node_t node;
btree_entry_t *entry;
unsigned index;
if (btree_find(tree, key, NULL, &node, &index) == PAGE_NONE) {
printfv(VERBOSITY_DEBUG, "[btree] Record with key %zu does not exist.\n", key);
// 404
return PAGE_NONE;
}
entry = btree_get_entry(&node, index);
if (node.header.flags & NODE_IS_LEAF) {
printfv(VERBOSITY_DEBUG, "[btree] Removing record with key %zu from leaf %zu.\n", entry->key, node.page);
_btree_remove_from_node(tree, &node, index);
_btree_write_node(&node, tree, node.page);
} else {
printfv(VERBOSITY_DEBUG, "[btree] Removing record with key %zu from node %zu.\n", entry->key, node.page);
btree_node_t *replacement;
btree_entry_t *replaced;
_btree_stack_push(tree, &node, index);
replacement = &tree->trace.current->node;
_btree_load_node(&node, tree, entry->right);
while (~node.header.flags & NODE_IS_LEAF) {
_btree_stack_push(tree, &node, 0);
replaced = NODE_ENTRY(node.entries, 0);
_btree_load_node(&node, tree, replaced->left);
}
replaced = NODE_ENTRY(node.entries, 0);
entry = btree_get_entry(replacement, index);
printfv(VERBOSITY_DEBUG, "[btree] Exchanging %zu with %zu.\n", entry->key, replaced->key);
SWAP(record_key_t, replaced->key, entry->key);
SWAP(offset_t, replaced->location, entry->location);
_btree_write_node(replacement, tree, replacement->page);
_btree_remove_from_node(tree, &node, 0);
}
tree->header.changes++;
tree->header.records--;
_btree_optimize_if_needed(tree);
return node.page;
}
bool btree_update(btree_t *tree, record_key_t key, record_t record)
{
if (record.key != key) {
printfv(VERBOSITY_DEBUG, "[btree] Update key mismatch, removing %zu and adding %zu.\n", key, record.key);
btree_remove(tree, key);
return btree_insert(tree, record) != PAGE_NONE;
}
btree_entry_t entry;
if (btree_find(tree, key, &entry, NULL, NULL) == PAGE_NONE) {
printfv(VERBOSITY_DEBUG, "[btree] Record with key %zu not found.\n", key, record.key);
return false;
}
tape_write(tree->main, entry.location, &record, sizeof(record));
return true;
}
btree_entry_t *btree_get_entry(btree_node_t* node, unsigned n)
{
if (n > node->header.entries) {
return NULL;
}
return (btree_entry_t*)(node->entries + NODE_ENTRY_OFFSET(n));
}
page_t btree_read_record(btree_t *tree, record_key_t key, record_t *record)
{
btree_entry_t entry;
page_t page = btree_find(tree, key, &entry, NULL, NULL);
if (page == PAGE_NONE) {
return PAGE_NONE;
}
tape_read(tree->main, entry.location, record, sizeof(record_t));
return page;
}
void btree_close(btree_t* tree)
{
printfv(VERBOSITY_DEBUG, "[btree] Closing index %s.\n", tree->file->filename);
_btree_save_header(tree);
file_close(tree->file);
tape_close(tree->main);
}
void btree_flush(btree_t* tree)
{
file_flush(tree->file);
file_flush(tree->main->file);
}
void _btree_optimize_if_needed(btree_t *tree)
{
double ratio = (double)tree->header.changes / (double)tree->header.records;
if (tree->header.records > BTREE_OPTIMIZE_RECORDS_MIN && ratio > BTREE_OPTIMIZE_THRESHOLD) {
printfv(VERBOSITY_DEBUG, "[btree] Auto optimization triggered.\n");
btree_optimize(tree);
}
}
void _btree_optimize_page(btree_t* tree, tape_t* tmp, page_t page)
{
if (!page) return;
btree_node_t node;
btree_entry_t *entry;
record_t record;
_btree_load_node(&node, tree, page);
for (int i = 0; i < node.header.entries; i++) {
entry = btree_get_entry(&node, i);
_btree_optimize_page(tree, tmp, entry->left);
tape_read(tree->main, entry->location, &record, sizeof(record_t));
entry->location = tape_append(tmp, &record, sizeof(record_t));
}
_btree_write_node(&node, tree, node.page);
_btree_optimize_page(tree, tmp, entry->right);
}
void btree_optimize(btree_t *tree)
{
char tmp[PATH_MAX], path[PATH_MAX - 10], backup[PATH_MAX];
strcpy(path, tree->main->file->filename);
sprintf(tmp, "%s.tmp", path);
sprintf(backup, "%s.bckp", path);
printfv(VERBOSITY_DEBUG, "[btree] Optimizing btree using temp file %s.\n", tmp);
tape_t *temp = tape_open(tmp, "wb");
_btree_optimize_page(tree, temp, tree->header.root);
tape_close(temp);
tape_close(tree->main);
printfv(VERBOSITY_DEBUG, "[btree] Renaming %s to %s.\n", path, backup);
rename(path, backup);
printfv(VERBOSITY_DEBUG, "[btree] Renaming %s to %s.\n", tmp, path);
if (rename(tmp, path) == 0) {
tree->main = tape_open(path, "rb+");
tree->header.changes = 0;
}
}