From 4c83ba30367cc1974af6e592c3b70abc16d9afda Mon Sep 17 00:00:00 2001 From: Kacper Donat Date: Fri, 14 Dec 2018 19:57:49 +0100 Subject: [PATCH] cacheowanie --- Makefile | 7 ++-- btree.h | 0 index.c | 83 +++++++++++++++++++++++++++++++++++++---------- index.h | 8 ++++- io.c | 95 +++++++++++++++++++++++++++++++++++++++++------------- io.h | 41 ++++++++++++++++-------- openidx.c | 27 +++++++++++++++- readidx.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++ tape.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ tape.h | 20 ++++++++++++ 10 files changed, 402 insertions(+), 58 deletions(-) delete mode 100644 btree.h create mode 100644 readidx.c create mode 100644 tape.c create mode 100644 tape.h diff --git a/Makefile b/Makefile index b005f15..8380f1e 100644 --- a/Makefile +++ b/Makefile @@ -2,10 +2,11 @@ CC=gcc CFLAGS=-Wall -O0 -g LDFLAGS=-lm -lreadline -all: makeidx openidx +all: makeidx openidx readidx -makeidx: makeidx.o io.o common.o index.o bitmap.c -openidx: openidx.o io.o common.o index.o bitmap.c +makeidx: makeidx.o io.o common.o index.o bitmap.o tape.o +readidx: readidx.o io.o common.o index.o bitmap.o tape.o +openidx: openidx.o io.o common.o index.o bitmap.o tape.o -include $(wildcard *.d) diff --git a/btree.h b/btree.h deleted file mode 100644 index e69de29..0000000 diff --git a/index.c b/index.c index 3b33467..56cce17 100644 --- a/index.c +++ b/index.c @@ -4,11 +4,16 @@ #include "io.h" #include "common.h" #include "bitmap.h" +#include +#include #define BTREE_ERR_CANNOT_OPEN_FILE -1 #define BTREE_ERR_PAGE_SIZE_DIFFERENT -2 -#define SWAP(type, x, y) do { type tmp; tmp = y; y = x; x = tmp; } while (0) +#define SWAP(type, x, y) do { type __tmp__; __tmp__ = y; y = x; x = __tmp__; } while (0) + +unsigned reads = 0; +unsigned writes = 0; /* private functions */ void _btree_insert_into_node(btree_t *tree, btree_node_t *node, btree_entry_t *entry); @@ -21,16 +26,24 @@ int _btree_save_header(btree_t *tree) page_t _btree_alloc(btree_t *tree) { - page_t page = 2; + page_t page = 1, bitmap_page = 1; long long bit; uint8_t bitmap[PAGE_SIZE]; - file_read(tree->file, 1, bitmap, PAGE_SIZE); - bit = bitmap_find_first(bitmap, PAGE_SIZE); - bitmap_set(bitmap, bit); - file_write(tree->file, 1, bitmap, PAGE_SIZE); + do { + file_read(tree->file, bitmap_page, bitmap, PAGE_SIZE); + bit = bitmap_find_first(bitmap, PAGE_SIZE); - page += bit; + if (bit == -1) { + bitmap_page += PAGE_SIZE * 8 + 1; + page += PAGE_SIZE * 8 + 1; + } + } while (bit == -1); + + bitmap_set(bitmap, bit); + file_write(tree->file, bitmap_page, bitmap, PAGE_SIZE); + + page += bit + 1; printfv(VERBOSITY_DEBUG, "[alloc] allocated page %zu.\n", page); @@ -42,7 +55,7 @@ void _btree_page_free(btree_t *tree, page_t page) printfv(VERBOSITY_DEBUG, "[alloc] freeing page %zu.\n", page); uint8_t bitmap[PAGE_SIZE]; - file_read(tree->file, 1, bitmap, PAGE_SIZE); + file_read(tree->file, 1 + (page / (PAGE_SIZE * 8)) * PAGE_SIZE, bitmap, PAGE_SIZE); bitmap_unset(bitmap, page - 2); file_write(tree->file, 1, bitmap, PAGE_SIZE); } @@ -145,15 +158,31 @@ void _btree_node_insert_entry(btree_node_t* node, btree_entry_t entry) node->header.entries++; } -void _btree_init(btree_t *tree) +void _btree_init(btree_t *tree, char* mode) { + char path[PATH_MAX]; + strcpy(path, tree->header.main); + + char *dir = dirname(path); + sprintf(path, "%s/%s", dir, tree->header.main); + _btree_stack_clear(tree); + + tree->main = tape_open(path, mode); } int btree_init(btree_t *tree, char* filename, size_t d) { - printfv(VERBOSITY_DEBUG, "[btree] Initializing btree in file %s d=%zu.\n", filename, d); - tree->file = file_open(filename, "w+", PAGE_SIZE); + char path[PATH_MAX]; + strcpy(path, filename); + + char* ext = strrchr(path, '.'); + if (ext) *ext = 0; + + sprintf(tree->header.main, "%s.dat", path); + + printfv(VERBOSITY_DEBUG, "[btree] Initializing btree in file %s d=%zu, main=%s.\n", filename, d, tree->header.main); + tree->file = file_open(filename, "w+"); if (!tree->file) { fprintf(stderr, "Cannot open file %s.\n", filename); @@ -171,7 +200,7 @@ int btree_init(btree_t *tree, char* filename, size_t d) root.header.entries = 0; _btree_write_node(&root, tree, tree->header.root); - _btree_init(tree); + _btree_init(tree, "w"); return 0; } @@ -180,7 +209,7 @@ int btree_open(btree_t *tree, char* filename) { printfv(VERBOSITY_DEBUG, "[btree] Trying to open btree index in file %s\n", filename); - tree->file = file_open(filename, "r+", PAGE_SIZE); + tree->file = file_open(filename, "r+"); if (!tree->file) { fprintf(stderr, "Cannot open file %s.\n", filename); @@ -189,7 +218,7 @@ int btree_open(btree_t *tree, char* filename) file_read(tree->file, 0, &tree->header, sizeof(btree_header_t)); - printfv(VERBOSITY_DEBUG, "[btree] Found BTREE d=%zu, ps=%zu, root=%" PRIu64 "\n", tree->header.d, tree->header.page_size, tree->header.root); + printfv(VERBOSITY_DEBUG, "[btree] Found BTREE d=%zu, ps=%zu, root=%" PRIu64 ", mainfile=%s\n", tree->header.d, tree->header.page_size, tree->header.root, tree->header.main); if (tree->header.page_size != PAGE_SIZE) { fprintf(stderr, "BTree page size mismatch, expecting %d got %zu, closing.\n", PAGE_SIZE, tree->header.page_size); @@ -197,7 +226,7 @@ int btree_open(btree_t *tree, char* filename) return BTREE_ERR_PAGE_SIZE_DIFFERENT; } - _btree_init(tree); + _btree_init(tree, "rb+"); return 0; } @@ -427,14 +456,16 @@ page_t btree_find(btree_t *tree, record_key_t key, btree_entry_t* dest, btree_no page_t btree_insert(btree_t *tree, record_t record) { - record_key_t key = record.key; + record_key_t key = record.key; btree_node_t node; if (btree_find(tree, key, NULL, &node, NULL) != PAGE_NONE) { printfv(VERBOSITY_DEBUG, "[btree] record with key %zu already exists.\n", key); return PAGE_NONE; } - btree_entry_t entry = { PAGE_NONE, key, 2137, PAGE_NONE }; + + offset_t offset = tape_append(tree->main, &record, sizeof(record)); + btree_entry_t entry = { PAGE_NONE, key, offset, PAGE_NONE }; _btree_insert_into_node(tree, &node, &entry); return node.page; @@ -591,6 +622,23 @@ page_t btree_remove(btree_t *tree, record_key_t key) return node.page; } +bool btree_update(btree_t *tree, record_key_t key, record_t record) +{ + if (record.key != key) { + printfv(VERBOSITY_DEBUG, "[btree] update key mismatch, removing %zu and adding %zu.\n", key, record.key); + btree_remove(tree, key); + return btree_insert(tree, record) != PAGE_NONE; + } + + btree_entry_t entry; + if (btree_find(tree, key, &entry, NULL, NULL) == PAGE_NONE) { + printfv(VERBOSITY_DEBUG, "[btree] Record with key %zu not found.\n", key, record.key); + return false; + } + + return true; +} + btree_entry_t *btree_get_entry(btree_node_t* node, unsigned n) { if (n > node->header.entries) { @@ -604,4 +652,5 @@ void btree_close(btree_t* tree) { printfv(VERBOSITY_DEBUG, "[btree] Closing index %s.\n", tree->file->filename); file_close(tree->file); + tape_close(tree->main); } diff --git a/index.h b/index.h index deb2e15..a98abdc 100644 --- a/index.h +++ b/index.h @@ -3,6 +3,7 @@ #include "io.h" #include "record.h" +#include "tape.h" #include #define SIZEOF_ENTRIES(n) (sizeof(btree_node_header_t) + (sizeof(btree_entry_t) - sizeof(page_t))*(n) + ((n) > 0 ? 1 : 0) * sizeof(page_t)) @@ -16,11 +17,15 @@ #define NODE_IS_ROOT 1 #define NODE_IS_LEAF 2 +extern unsigned reads; +extern unsigned writes; + typedef struct { size_t d; /* 8 bytes long */ size_t page_size; /* 8 bytes long */ page_t root; /* 8 bytes long */ -} btree_header_t; /* 24 bytes long */ + char main[256]; /* 256 bytes long */ +} btree_header_t; /* 280 bytes long */ typedef struct { uint16_t flags; @@ -48,6 +53,7 @@ typedef struct { typedef struct { file_t *file; + tape_t *main; btree_header_t header; struct { btree_stack_elem_t *current; diff --git a/io.c b/io.c index ecd2c29..fed6371 100644 --- a/io.c +++ b/io.c @@ -5,22 +5,23 @@ #include #include -file_t* file_open(const char* filename, const char* mode, size_t page_size) +page_cache_entry_t *_file_load_page(file_t* file, page_t page); +void _file_flush_page(file_t *file, page_cache_entry_t *entry); + +file_t* file_open(const char* filename, const char* mode) { FILE* handle = fopen(filename, mode); if (!handle) { - printfv(VERBOSITY_NORMAL, "Can't open file %s.\n", filename); + printfv(VERBOSITY_NORMAL, "[io] Can't open file %s.\n", filename); return NULL; } - printfv(VERBOSITY_DEBUG, "File %s opened in %s with page size %zu.\n", filename, mode, page_size); + printfv(VERBOSITY_DEBUG, "[io] File %s opened in %s with page size %zu.\n", filename, mode, PAGE_SIZE); file_t* result = malloc(sizeof(file_t)); - result->page_size = page_size; - result->file = handle; - result->filename = malloc(strlen(filename) + 1); - result->buffer = malloc(page_size); + result->file = handle; + result->filename = malloc(strlen(filename) + 1); strcpy(result->filename, filename); @@ -29,32 +30,80 @@ file_t* file_open(const char* filename, const char* mode, size_t page_size) void file_close(file_t* file) { - printfv(VERBOSITY_DEBUG, "Closing file %s.\n", file->filename); + file_flush(file); + + printfv(VERBOSITY_DEBUG, "[io] Closing file %s.\n", file->filename); fclose(file->file); free(file->filename); - free(file->buffer); free(file); } -size_t file_read(file_t* file, unsigned block, void* buffer, size_t length) +size_t file_read(file_t* file, page_t page, void* buffer, size_t length) { - memset(file->buffer, 0, file->page_size); + page_cache_entry_t *entry = _file_load_page(file, page); + memcpy(buffer, entry->data, length); - printfv(VERBOSITY_DEBUG, "Reading page %u of %s.\n", block, file->filename); - - fseek(file->file, block * file->page_size, SEEK_SET); - int read = fread(file->buffer, 1, file->page_size, file->file); - memcpy(buffer, file->buffer, length); - return read; + return entry->size; } -size_t file_write(file_t* file, unsigned block, const void* buffer, size_t length) +size_t file_write(file_t* file, page_t page, const void* buffer, size_t length) { - memset(file->buffer, 0, file->page_size); - memcpy(file->buffer, buffer, length); + page_cache_entry_t *entry = _file_load_page(file, page); - printfv(VERBOSITY_DEBUG, "Writing page %u to %s. (%zu bytes)\n", block, file->filename, length); - fseek(file->file, block * file->page_size, SEEK_SET); - return fwrite(file->buffer, 1, file->page_size, file->file); + memset(entry->data, 0, PAGE_SIZE); + memcpy(entry->data, buffer, length); + + entry->flags |= PAGE_DIRTY; + entry->size = length; + + return entry->size; +} + +void file_flush(file_t* file) +{ + for (unsigned i = 0; i < CACHE_ENTRIES; i++) { + _file_flush_page(file, file->cache + i); + } +} + +page_cache_entry_t *_file_load_page(file_t* file, page_t page) +{ + page_cache_entry_t *entry = file->cache + (page % CACHE_ENTRIES); + + if (entry->page == page && entry->flags & PAGE_PRESENT) { + // already in cache + return entry; + } + + _file_flush_page(file, entry); + + int result = fseek(file->file, page * PAGE_SIZE, SEEK_SET); + + memset(entry->data, 0, PAGE_SIZE); + entry->size = fread(entry->data, 1, PAGE_SIZE, file->file); + if (entry->size) { + printfv(VERBOSITY_DEBUG, "[io] Loading page %u of %s (%zu bytes).\n", page, file->filename, entry->size); + reads++; + } + + entry->page = page; + entry->flags |= PAGE_PRESENT; + + return entry; +} + +void _file_flush_page(file_t *file, page_cache_entry_t *entry) +{ + if (entry->flags & PAGE_PRESENT && entry->flags & PAGE_DIRTY) { + printfv(VERBOSITY_DEBUG, "[io] Flushing page %u to %s (%zu bytes).\n", entry->page, file->filename, entry->size); + + fseek(file->file, entry->page * PAGE_SIZE, SEEK_SET); + fwrite(entry->data, 1, entry->size, file->file); + + // disable dirty flag + entry->flags &= ~PAGE_DIRTY; + + writes++; + } } diff --git a/io.h b/io.h index 14a6e73..0c33ada 100644 --- a/io.h +++ b/io.h @@ -1,28 +1,43 @@ #ifndef IO_H #define IO_H -#define PAGE_SIZE 512 -#define PAGE_NONE 0 +#define PAGE_SIZE 512 +#define PAGE_NONE 0 + +#define PAGE_PRESENT 1 +#define PAGE_DIRTY 2 + +#define CACHE_ENTRIES 16 #include #include -typedef struct { - FILE* file; - - char* filename; - size_t page_size; - - void* buffer; -} file_t; +extern unsigned reads; +extern unsigned writes; typedef uint64_t page_t; typedef uint64_t offset_t; -file_t* file_open(const char* filename, const char* mode, size_t page_size); +typedef struct { + page_t page; + size_t size; + char data[PAGE_SIZE]; + uint8_t flags; +} page_cache_entry_t; + +typedef struct { + FILE* file; + char* filename; + + page_cache_entry_t cache[CACHE_ENTRIES]; +} file_t; + +file_t* file_open(const char* filename, const char* mode); void file_close(file_t* file); -size_t file_read(file_t* file, unsigned block, void* buffer, size_t size); -size_t file_write(file_t* file, unsigned block, const void* buffer, size_t size); +size_t file_read(file_t* file, page_t page, void* buffer, size_t size); +size_t file_write(file_t* file, page_t page, const void* buffer, size_t size); + +void file_flush(file_t* file); #endif diff --git a/openidx.c b/openidx.c index 73d54e8..db757cc 100644 --- a/openidx.c +++ b/openidx.c @@ -62,6 +62,7 @@ result_t dump_command(const char* command, char* args); result_t print_command(const char* command, char* args); result_t records_command(const char* command, char* args); result_t find_command(const char* command, char* args); +result_t read_command(const char* command, char* args); result_t delete_command(const char* command, char* args); result_t verbosity_command(const char* command, char* args); @@ -73,6 +74,7 @@ static command_t commands[] = { { "print", "Prints tree", print_command }, { "records", "Prints records of given page", records_command }, { "find", "Finds record", find_command }, + { "read", "Reads record", read_command }, { "delete", "Deletes record", delete_command }, { "verbosity", "Changes the verbosity", verbosity_command }, }; @@ -144,6 +146,29 @@ result_t find_command(const char* command, char* args) return RESULT_OK; } +result_t read_command(const char* command, char* args) +{ + record_key_t key; + + if (sscanf(args, "%u", &key) == 1) { + btree_entry_t entry; + page_t page; + record_t record; + + if ((page = btree_find(&tree, key, &entry, NULL, NULL))) { + printf("Record %u found on page %zu, offset: %lu\n", key, page, entry.location); + tape_read(tree.main, entry.location, &record, sizeof(record_t)); + printf("PK: %u, x: %lf, y: %lf\n", record.key, record.x, record.y); + } else { + printf("404 Not found\n"); + } + } else { + printf("Usage: read key\n"); + } + + return RESULT_OK; +} + result_t delete_command(const char* command, char* args) { record_key_t key; @@ -217,7 +242,7 @@ void print_page(page_t page, unsigned depth, unsigned current) for (int i = 0; i < node.header.entries; i++) { entry = btree_get_entry(&node, i); print_page(entry->left, depth - 1, current + 1); - printf("%s%zu < %u [0x%zx] > %zu\n", prefix, entry->left, entry->key, entry->location, entry->right); + printf("%s%zu < %u [0x%08zx] > %zu\n", prefix, entry->left, entry->key, entry->location, entry->right); } print_page(entry->right, depth - 1, current + 1); } diff --git a/readidx.c b/readidx.c new file mode 100644 index 0000000..dfaec00 --- /dev/null +++ b/readidx.c @@ -0,0 +1,83 @@ +#include +#include + +#define OPTPARSE_IMPLEMENTATION +#define OPTPARSE_API static +#include "common.h" +#include "index.h" + +typedef struct { + char* index; +} opts_t; + +opts_t options; +btree_t tree; + +void init_args(int args, char* argv[]) +{ + optparse_t opts; + optparse_init(&opts, argv); + + for (char opt; opt != -1; opt = optparse(&opts, "qv")) { + switch (opt) { + case 'q': + verbosity--; + break; + case 'v': + verbosity++; + break; + } + } + + options.index = optparse_arg(&opts); +} + +void help(const char* name) { + printf( + "%s - some help" + , + name + ); +} + +void print_page(page_t page) +{ + if (!page) return; + + char buffer[PAGE_SIZE]; + char prefix[1024] = {}; + + file_read(tree.file, page, buffer, PAGE_SIZE); + + btree_node_t node; + memcpy(&node.header, buffer, sizeof(node.header)); + memcpy(&node.entries, buffer + sizeof(node.header), NODE_SIZE_MAX); + + btree_entry_t *entry; + for (int i = 0; i < node.header.entries; i++) { + entry = btree_get_entry(&node, i); + print_page(entry->left); + printf("%u\n", entry->key); + } + print_page(entry->right); +} + +int main(int argc, char* argv[]) +{ + if (argc < 2) { + help(argv[0]); + return 0; + } + + init_args(argc, argv); + + if (btree_open(&tree, options.index) != 0) { + fprintf(stderr, "Cannot open indes %s for reading.", options.index); + return 1; + } + + print_page(tree.header.root); + + return EXIT_SUCCESS; +} + diff --git a/tape.c b/tape.c new file mode 100644 index 0000000..c438b25 --- /dev/null +++ b/tape.c @@ -0,0 +1,96 @@ +#include "tape.h" +#include "common.h" +#include +#include +#include + +tape_t* tape_open(const char* filename, const char* mode) +{ + file_t *file = file_open(filename, mode); + printfv(VERBOSITY_VERBOSE, "Opening file %s in mode %s.\n", filename, mode); + + if (!file) { + fprintf(stderr, "Cannot open %s in %s mode.", filename, mode); + return NULL; + } + + tape_t* tape = malloc(sizeof(tape_t)); + + fseek(file->file, 0, SEEK_END); + + tape->file = file; + tape->end = ftell(file->file); + + return tape; +} + +void tape_close(tape_t* tape) +{ + file_close(tape->file); +} + +void* tape_read(tape_t* tape, offset_t offset, void* record, size_t size) +{ + char buffer[PAGE_SIZE]; + + page_t page = offset / PAGE_SIZE; + offset = offset % PAGE_SIZE; + + offset_t written = 0; + file_read(tape->file, page, buffer, PAGE_SIZE); + + while (size > PAGE_SIZE - offset) { + memcpy(record + written, buffer + offset, PAGE_SIZE - offset); + + size -= PAGE_SIZE - offset; + written += PAGE_SIZE - offset; + + page++; + offset = 0; + + file_read(tape->file, page, buffer, PAGE_SIZE); + } + + memcpy(record + written, buffer + offset, size); + + return record; +} + +offset_t tape_write(tape_t* tape, offset_t location, void* record, size_t size) +{ + char buffer[PAGE_SIZE]; + + page_t page = location / PAGE_SIZE; + offset_t offset = location % PAGE_SIZE; + + offset_t written = 0; + size_t original = file_read(tape->file, page, buffer, PAGE_SIZE); + while (size > PAGE_SIZE - offset) { + memcpy(buffer + offset, record + written, PAGE_SIZE - offset); + + size -= PAGE_SIZE - offset; + written += PAGE_SIZE - offset; + + file_write(tape->file, page, buffer, PAGE_SIZE); + + page++; + offset = 0; + + file_read(tape->file, page, buffer, PAGE_SIZE); + } + + memcpy(buffer + offset, record + written, size); + file_write(tape->file, page, buffer, offset + size > original ? offset + size : original); + + written += size; + if (location + written > tape->end) { + tape->end = location + written; + } + + return location; +} + +offset_t tape_append(tape_t* tape, void* record, size_t size) +{ + return tape_write(tape, tape->end, record, size); +} diff --git a/tape.h b/tape.h new file mode 100644 index 0000000..7c76786 --- /dev/null +++ b/tape.h @@ -0,0 +1,20 @@ +#ifndef TAPE_H_ +#define TAPE_H_ + +#include +#include "record.h" +#include "io.h" + +typedef struct { + file_t *file; + offset_t end; +} tape_t; + +tape_t* tape_open(const char* filename, const char* mode); +void tape_close(tape_t* tape); + +void* tape_read(tape_t* tape, offset_t offset, void* record, size_t size); +offset_t tape_write(tape_t* tape, offset_t offet, void* record, size_t size); +offset_t tape_append(tape_t* tape, void* record, size_t size); + +#endif