From bea9f4b3bbf11398119fb53481357f4fe0c545b6 Mon Sep 17 00:00:00 2001 From: Kacper Donat Date: Sun, 21 Oct 2018 13:04:41 +0200 Subject: [PATCH] initial commit --- .gitignore | 5 + Makefile | 19 +++ appender.c | 67 +++++++++ common.c | 22 +++ common.h | 22 +++ generator.c | 28 ++++ heap.c | 88 ++++++++++++ heap.h | 31 ++++ optparse.h | 403 ++++++++++++++++++++++++++++++++++++++++++++++++++++ reader.c | 100 +++++++++++++ record.c | 12 ++ record.h | 13 ++ sorter.c | 238 +++++++++++++++++++++++++++++++ tape.c | 157 ++++++++++++++++++++ tape.h | 36 +++++ 15 files changed, 1241 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 appender.c create mode 100644 common.c create mode 100644 common.h create mode 100644 generator.c create mode 100644 heap.c create mode 100644 heap.h create mode 100644 optparse.h create mode 100644 reader.c create mode 100644 record.c create mode 100644 record.h create mode 100644 sorter.c create mode 100644 tape.c create mode 100644 tape.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3610c07 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +* +!*.c +!*.h +!Makefile +!.gitignore diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..6431be6 --- /dev/null +++ b/Makefile @@ -0,0 +1,19 @@ +CC=gcc +CFLAGS=-Wall -O0 -g +LDFLAGS=-lm + +all: appender reader generator sorter + +appender: appender.o tape.o record.o common.o +reader: reader.o tape.o record.o common.o +generator: generator.o +sorter: sorter.o tape.o record.o heap.o common.o + +-include $(wildcard *.d) + +.c.o: + $(CC) $(CFLAGS) -MMD -c $< -o $@ + +clean: + rm *.o + rm *.d diff --git a/appender.c b/appender.c new file mode 100644 index 0000000..37311f3 --- /dev/null +++ b/appender.c @@ -0,0 +1,67 @@ +#include +#include "tape.h" +#include "record.h" +#include "common.h" + +#define WANT_IOSTAT 1 + +char* filename; +char flags; + +void init_args(int args, char* argv[]) +{ + optparse_t options; + optparse_init(&options, argv); + + for (char opt; opt != -1; opt = optparse(&options, "qvi")) { + switch (opt) { + case 'q': + verbosity--; + break; + case 'v': + verbosity++; + break; + case 'i': + flags |= WANT_IOSTAT; + } + } + + filename = optparse_arg(&options); +} + +void help(const char* name) { + printf( + "Append record to given tape at the end.\n" + "Usage:\n" + "\t%s [options] \n", + name + ); +} + +int main(int argc, const char* argv[]) +{ + if (argc < 2) { + help(argv[0]); + return 0; + } + + tape_t* tape = tape_open(argv[1], TAPE_APPEND); + + record_t record; + size_t records = 0; + + while (scanf("%lf %lf", &record.x, &record.y) > 0) { + tape_write(tape, &record, sizeof(record_t)); + printfv(VERBOSITY_NORMAL, "Record created: r = (%lf, %lf) |r| = %lf\n", record.x, record.y, record_length(record)); + records++; + } + + printfv(VERBOSITY_NORMAL, "Appended %zu record to %s.\n", records, argv[1]); + + if (flags & WANT_IOSTAT) { + iostats(); + } + + tape_close(tape); + return 1; +} diff --git a/common.c b/common.c new file mode 100644 index 0000000..b3152ad --- /dev/null +++ b/common.c @@ -0,0 +1,22 @@ +#include "common.h" +#include "tape.h" +#include + +verbosity_t verbosity = VERBOSITY_NORMAL; + +void printfv(verbosity_t level, const char* format, ...) +{ + va_list args; + va_start(args, format); + + if (verbosity >= level) { + vprintf(format, args); + } + + va_end(args); +} + +void iostats() +{ + printfv(VERBOSITY_QUIET, "IO R: %u W: %u\n", reads, writes); +} diff --git a/common.h b/common.h new file mode 100644 index 0000000..36aca66 --- /dev/null +++ b/common.h @@ -0,0 +1,22 @@ +#ifndef COMMON_H_ +#define COMMON_H_ + +#define VERBOSITY_QUIET -1 +#define VERBOSITY_NORMAL 0 +#define VERBOSITY_VERBOSE 1 +#define VERBOSITY_DEBUG 2 + +#include +#define OPTPARSE_IMPLEMENTATION +#define OPTPARSE_API static +#include "optparse.h" + +typedef char verbosity_t; +typedef struct optparse optparse_t; + +extern verbosity_t verbosity; + +void printfv(verbosity_t verbosity, const char* format, ...); +void iostats(); + +#endif diff --git a/generator.c b/generator.c new file mode 100644 index 0000000..e32098c --- /dev/null +++ b/generator.c @@ -0,0 +1,28 @@ +#include +#include +#include +#include + +double range_rand(double from, double to) +{ + return (double)rand() / RAND_MAX * (to - from) + from; +} + +int main(int argc, const char* argv[]) +{ + if (argc < 4) { + printf("usage: %s min max count\n", argv[0]); + return -1; + } + + double from = atof(argv[1]); + double to = atof(argv[2]); + int count = atoi(argv[3]); + + srand(time(0)); + while (count--) { + printf("%lf %lf\n", range_rand(from, to), range_rand(from, to)); + } + + return 1; +} diff --git a/heap.c b/heap.c new file mode 100644 index 0000000..b833abf --- /dev/null +++ b/heap.c @@ -0,0 +1,88 @@ +#include "heap.h" +#include +#define SWAP(type, x, y) do {type tmp = x; x = y; y = tmp;} while(0) + +unsigned heap_left(unsigned node) { return 2*node + 1; } +unsigned heap_right(unsigned node) { return 2*(node + 1); } +unsigned heap_parent(unsigned node) { return (node - 1) / 2; } + +void heap_init(heap_t* heap, size_t max, int (*compare)(const void* a, const void* b)) +{ + heap->records = malloc(max * sizeof(void*)); + heap->max = max; + heap->current = 0; + heap->compare = compare; +} + +void heap_free(heap_t* heap) +{ + free(heap->records); +} + +void* heap_min(heap_t* heap) +{ + if (!heap->current) { + return NULL; + } + + return heap->records[0]; +} + +void* heap_pop(heap_t* heap) { + void* result = heap_min(heap); + + if (result) { + heap_remove(heap, 0); + } + + return result; +} + +void heap_insert(heap_t* heap, void* record) +{ + unsigned pos = heap->current++; + unsigned parent; + + heap->records[pos] = record; + + while (pos && heap->compare(record, heap->records[parent = heap_parent(pos)]) < 0) { + SWAP(void*, heap->records[pos], heap->records[parent]); + pos = parent; + } +} + +void heap_remove(heap_t* heap, unsigned pos) +{ + heap->records[pos] = heap->records[--heap->current]; + while (1) { + unsigned left = heap_left(pos); + unsigned right = heap_right(pos); + + unsigned compared = left; + + if (left >= heap->current) { + break; + } + + if (right < heap->current) { + compared = heap->compare(heap->records[left], heap->records[right]) < 0 ? left : right; + } + + if (heap->compare(heap->records[compared], heap->records[pos]) < 0) { + SWAP(void*, heap->records[compared], heap->records[pos]); + pos = compared; + } else { + break; + } + } +} + +void heap_print(heap_t* heap, void (*print)(void* record)) +{ + printf("[ "); + for (unsigned i = 0; i < heap->current; i++) { + void* current = heap->records[i]; + print(current); + } + printf("]\n"); +} diff --git a/heap.h b/heap.h new file mode 100644 index 0000000..807b17e --- /dev/null +++ b/heap.h @@ -0,0 +1,31 @@ +#ifndef HEAP_H +#define HEAP_H + +#include "record.h" +#include + +typedef struct { + void** records; + + size_t max; + size_t current; + + int (*compare)(const void* a, const void* b); +} heap_t; + +unsigned heap_left(unsigned node); +unsigned heap_right(unsigned node); +unsigned heap_parent(unsigned node); + +void heap_init(heap_t* heap, size_t size, int (*compare)(const void* a, const void *b)); +void heap_free(heap_t* heap); + +void heap_insert(heap_t* heap, void* record); +void heap_remove(heap_t* heap, unsigned n); + +void* heap_min(heap_t* heap); +void* heap_pop(heap_t* heap); + +void heap_print(heap_t* heap, void (*print)(void* record)); + +#endif diff --git a/optparse.h b/optparse.h new file mode 100644 index 0000000..3a577a7 --- /dev/null +++ b/optparse.h @@ -0,0 +1,403 @@ +/* Optparse --- portable, reentrant, embeddable, getopt-like option parser + * + * This is free and unencumbered software released into the public domain. + * + * To get the implementation, define OPTPARSE_IMPLEMENTATION. + * Optionally define OPTPARSE_API to control the API's visibility + * and/or linkage (static, __attribute__, __declspec). + * + * The POSIX getopt() option parser has three fatal flaws. These flaws + * are solved by Optparse. + * + * 1) Parser state is stored entirely in global variables, some of + * which are static and inaccessible. This means only one thread can + * use getopt(). It also means it's not possible to recursively parse + * nested sub-arguments while in the middle of argument parsing. + * Optparse fixes this by storing all state on a local struct. + * + * 2) The POSIX standard provides no way to properly reset the parser. + * This means for portable code that getopt() is only good for one + * run, over one argv with one option string. It also means subcommand + * options cannot be processed with getopt(). Most implementations + * provide a method to reset the parser, but it's not portable. + * Optparse provides an optparse_arg() function for stepping over + * subcommands and continuing parsing of options with another option + * string. The Optparse struct itself can be passed around to + * subcommand handlers for additional subcommand option parsing. A + * full reset can be achieved by with an additional optparse_init(). + * + * 3) Error messages are printed to stderr. This can be disabled with + * opterr, but the messages themselves are still inaccessible. + * Optparse solves this by writing an error message in its errmsg + * field. The downside to Optparse is that this error message will + * always be in English rather than the current locale. + * + * Optparse should be familiar with anyone accustomed to getopt(), and + * it could be a nearly drop-in replacement. The option string is the + * same and the fields have the same names as the getopt() global + * variables (optarg, optind, optopt). + * + * Optparse also supports GNU-style long options with optparse_long(). + * The interface is slightly different and simpler than getopt_long(). + * + * By default, argv is permuted as it is parsed, moving non-option + * arguments to the end. This can be disabled by setting the `permute` + * field to 0 after initialization. + */ +#ifndef OPTPARSE_H +#define OPTPARSE_H + +#ifndef OPTPARSE_API +# define OPTPARSE_API +#endif + +struct optparse { + char **argv; + int permute; + int optind; + int optopt; + char *optarg; + char errmsg[64]; + int subopt; +}; + +enum optparse_argtype { + OPTPARSE_NONE, + OPTPARSE_REQUIRED, + OPTPARSE_OPTIONAL +}; + +struct optparse_long { + const char *longname; + int shortname; + enum optparse_argtype argtype; +}; + +/** + * Initializes the parser state. + */ +OPTPARSE_API +void optparse_init(struct optparse *options, char **argv); + +/** + * Read the next option in the argv array. + * @param optstring a getopt()-formatted option string. + * @return the next option character, -1 for done, or '?' for error + * + * Just like getopt(), a character followed by no colons means no + * argument. One colon means the option has a required argument. Two + * colons means the option takes an optional argument. + */ +OPTPARSE_API +int optparse(struct optparse *options, const char *optstring); + +/** + * Handles GNU-style long options in addition to getopt() options. + * This works a lot like GNU's getopt_long(). The last option in + * longopts must be all zeros, marking the end of the array. The + * longindex argument may be NULL. + */ +OPTPARSE_API +int optparse_long(struct optparse *options, + const struct optparse_long *longopts, + int *longindex); + +/** + * Used for stepping over non-option arguments. + * @return the next non-option argument, or NULL for no more arguments + * + * Argument parsing can continue with optparse() after using this + * function. That would be used to parse the options for the + * subcommand returned by optparse_arg(). This function allows you to + * ignore the value of optind. + */ +OPTPARSE_API +char *optparse_arg(struct optparse *options); + +/* Implementation */ +#ifdef OPTPARSE_IMPLEMENTATION + +#define OPTPARSE_MSG_INVALID "invalid option" +#define OPTPARSE_MSG_MISSING "option requires an argument" +#define OPTPARSE_MSG_TOOMANY "option takes no arguments" + +static int +optparse_error(struct optparse *options, const char *msg, const char *data) +{ + unsigned p = 0; + const char *sep = " -- '"; + while (*msg) + options->errmsg[p++] = *msg++; + while (*sep) + options->errmsg[p++] = *sep++; + while (p < sizeof(options->errmsg) - 2 && *data) + options->errmsg[p++] = *data++; + options->errmsg[p++] = '\''; + options->errmsg[p++] = '\0'; + return '?'; +} + +OPTPARSE_API +void +optparse_init(struct optparse *options, char **argv) +{ + options->argv = argv; + options->permute = 1; + options->optind = 1; + options->subopt = 0; + options->optarg = 0; + options->errmsg[0] = '\0'; +} + +static int +optparse_is_dashdash(const char *arg) +{ + return arg != 0 && arg[0] == '-' && arg[1] == '-' && arg[2] == '\0'; +} + +static int +optparse_is_shortopt(const char *arg) +{ + return arg != 0 && arg[0] == '-' && arg[1] != '-' && arg[1] != '\0'; +} + +static int +optparse_is_longopt(const char *arg) +{ + return arg != 0 && arg[0] == '-' && arg[1] == '-' && arg[2] != '\0'; +} + +static void +optparse_permute(struct optparse *options, int index) +{ + char *nonoption = options->argv[index]; + int i; + for (i = index; i < options->optind - 1; i++) + options->argv[i] = options->argv[i + 1]; + options->argv[options->optind - 1] = nonoption; +} + +static int +optparse_argtype(const char *optstring, char c) +{ + int count = OPTPARSE_NONE; + if (c == ':') + return -1; + for (; *optstring && c != *optstring; optstring++); + if (!*optstring) + return -1; + if (optstring[1] == ':') + count += optstring[2] == ':' ? 2 : 1; + return count; +} + +OPTPARSE_API +int +optparse(struct optparse *options, const char *optstring) +{ + int type; + char *next; + char *option = options->argv[options->optind]; + options->errmsg[0] = '\0'; + options->optopt = 0; + options->optarg = 0; + if (option == 0) { + return -1; + } else if (optparse_is_dashdash(option)) { + options->optind++; /* consume "--" */ + return -1; + } else if (!optparse_is_shortopt(option)) { + if (options->permute) { + int index = options->optind++; + int r = optparse(options, optstring); + optparse_permute(options, index); + options->optind--; + return r; + } else { + return -1; + } + } + option += options->subopt + 1; + options->optopt = option[0]; + type = optparse_argtype(optstring, option[0]); + next = options->argv[options->optind + 1]; + switch (type) { + case -1: { + char str[2] = {0, 0}; + str[0] = option[0]; + options->optind++; + return optparse_error(options, OPTPARSE_MSG_INVALID, str); + } + case OPTPARSE_NONE: + if (option[1]) { + options->subopt++; + } else { + options->subopt = 0; + options->optind++; + } + return option[0]; + case OPTPARSE_REQUIRED: + options->subopt = 0; + options->optind++; + if (option[1]) { + options->optarg = option + 1; + } else if (next != 0) { + options->optarg = next; + options->optind++; + } else { + char str[2] = {0, 0}; + str[0] = option[0]; + options->optarg = 0; + return optparse_error(options, OPTPARSE_MSG_MISSING, str); + } + return option[0]; + case OPTPARSE_OPTIONAL: + options->subopt = 0; + options->optind++; + if (option[1]) + options->optarg = option + 1; + else + options->optarg = 0; + return option[0]; + } + return 0; +} + +OPTPARSE_API +char * +optparse_arg(struct optparse *options) +{ + char *option = options->argv[options->optind]; + options->subopt = 0; + if (option != 0) + options->optind++; + return option; +} + +static int +optparse_longopts_end(const struct optparse_long *longopts, int i) +{ + return !longopts[i].longname && !longopts[i].shortname; +} + +static void +optparse_from_long(const struct optparse_long *longopts, char *optstring) +{ + char *p = optstring; + int i; + for (i = 0; !optparse_longopts_end(longopts, i); i++) { + if (longopts[i].shortname) { + int a; + *p++ = longopts[i].shortname; + for (a = 0; a < (int)longopts[i].argtype; a++) + *p++ = ':'; + } + } + *p = '\0'; +} + +/* Unlike strcmp(), handles options containing "=". */ +static int +optparse_longopts_match(const char *longname, const char *option) +{ + const char *a = option, *n = longname; + if (longname == 0) + return 0; + for (; *a && *n && *a != '='; a++, n++) + if (*a != *n) + return 0; + return *n == '\0' && (*a == '\0' || *a == '='); +} + +/* Return the part after "=", or NULL. */ +static char * +optparse_longopts_arg(char *option) +{ + for (; *option && *option != '='; option++); + if (*option == '=') + return option + 1; + else + return 0; +} + +static int +optparse_long_fallback(struct optparse *options, + const struct optparse_long *longopts, + int *longindex) +{ + int result; + char optstring[96 * 3 + 1]; /* 96 ASCII printable characters */ + optparse_from_long(longopts, optstring); + result = optparse(options, optstring); + if (longindex != 0) { + *longindex = -1; + if (result != -1) { + int i; + for (i = 0; !optparse_longopts_end(longopts, i); i++) + if (longopts[i].shortname == options->optopt) + *longindex = i; + } + } + return result; +} + +OPTPARSE_API +int +optparse_long(struct optparse *options, + const struct optparse_long *longopts, + int *longindex) +{ + int i; + char *option = options->argv[options->optind]; + if (option == 0) { + return -1; + } else if (optparse_is_dashdash(option)) { + options->optind++; /* consume "--" */ + return -1; + } else if (optparse_is_shortopt(option)) { + return optparse_long_fallback(options, longopts, longindex); + } else if (!optparse_is_longopt(option)) { + if (options->permute) { + int index = options->optind++; + int r = optparse_long(options, longopts, longindex); + optparse_permute(options, index); + options->optind--; + return r; + } else { + return -1; + } + } + + /* Parse as long option. */ + options->errmsg[0] = '\0'; + options->optopt = 0; + options->optarg = 0; + option += 2; /* skip "--" */ + options->optind++; + for (i = 0; !optparse_longopts_end(longopts, i); i++) { + const char *name = longopts[i].longname; + if (optparse_longopts_match(name, option)) { + char *arg; + if (longindex) + *longindex = i; + options->optopt = longopts[i].shortname; + arg = optparse_longopts_arg(option); + if (longopts[i].argtype == OPTPARSE_NONE && arg != 0) { + return optparse_error(options, OPTPARSE_MSG_TOOMANY, name); + } if (arg != 0) { + options->optarg = arg; + } else if (longopts[i].argtype == OPTPARSE_REQUIRED) { + options->optarg = options->argv[options->optind]; + if (options->optarg == 0) + return optparse_error(options, OPTPARSE_MSG_MISSING, name); + else + options->optind++; + } + return options->optopt; + } + } + return optparse_error(options, OPTPARSE_MSG_INVALID, option); +} + +#endif /* OPTPARSE_IMPLEMENTATION */ +#endif /* OPTPARSE_H */ diff --git a/reader.c b/reader.c new file mode 100644 index 0000000..bbe9342 --- /dev/null +++ b/reader.c @@ -0,0 +1,100 @@ +#include +#include "tape.h" +#include "record.h" +#include "common.h" + +#define WANT_SUMMARY 1 +#define WANT_IOSTAT 2 +#define WANT_EXPORT 4 + +char* filename; +char flags; + +void help(const char* name) +{ + printf( + "Reads records from tape.\n" + "Usage:\n" + "\t%s [options] \n" + "Options:\n" + "\t-q|v|vv - verbosity level, q for quiet, v for vervose vv for debug\n" + "\t-s - summary of records (R) and runs (S)\n" + "\t-i - summary IO stats\n" + "\t-e - data in export format\n" + , name + ); +} + +void init_args(int args, char* argv[]) +{ + optparse_t options; + optparse_init(&options, argv); + + for (char opt; opt != -1; opt = optparse(&options, "qvise")) { + switch (opt) { + case 'q': + verbosity--; + break; + case 'v': + verbosity++; + break; + case 'i': + flags |= WANT_IOSTAT; + break; + case 'e': + flags |= WANT_EXPORT; + break; + case 's': + flags |= WANT_SUMMARY; + break; + } + } + + filename = optparse_arg(&options); +} + +int main(int argc, char* argv[]) +{ + if (argc < 2) { + help(argv[0]); + return 0; + } + + init_args(argc, argv); + + tape_t* tape = tape_open(filename, "rb"); + + record_t record, last = { 0.0, 0.0 }; + size_t records = 0, series = 1, current = 0;; + + while (tape_read(tape, &record, sizeof(record_t)) > 0) { + if (record_compare(last, record) > 0) { + printfv(VERBOSITY_VERBOSE, "R: %zu -------------------------------------\n", current); + series++; + current = 0; + } + + if (flags & WANT_EXPORT) { + printfv(VERBOSITY_NORMAL, "%lf %lf", record.x, record.y); + } else { + printfv(VERBOSITY_NORMAL, "|r| = %lf r = (%lf, %lf)\n", record_length(record), record.x, record.y); + } + + records++; + current++; + + last = record; + } + + tape_close(tape); + + if (flags & WANT_SUMMARY) { + printf("R: %zu S: %zu\n", records, series); + } + + if (flags & WANT_IOSTAT) { + iostats(); + } + + return 1; +} diff --git a/record.c b/record.c new file mode 100644 index 0000000..0ff72d8 --- /dev/null +++ b/record.c @@ -0,0 +1,12 @@ +#include "record.h" +#include + +double record_length(const record_t a) +{ + return sqrt(a.x * a.x + a.y * a.y); +} + +double record_compare(const record_t a, const record_t b) +{ + return record_length(a) - record_length(b); +} diff --git a/record.h b/record.h new file mode 100644 index 0000000..eace15f --- /dev/null +++ b/record.h @@ -0,0 +1,13 @@ +#ifndef RECORD_H_ +#define RECORD_H_ + +typedef struct { + double x; + double y; +} record_t; + + +double record_length(const record_t a); +double record_compare(const record_t a, const record_t b); + +#endif diff --git a/sorter.c b/sorter.c new file mode 100644 index 0000000..3badc9f --- /dev/null +++ b/sorter.c @@ -0,0 +1,238 @@ +#include +#include +#include "tape.h" +#include "record.h" +#include "heap.h" +#include "common.h" + +#define SGN(x) ((x > 0) - (x < 0)) + +char* tmp_format = "tmp.%s.tape"; + +typedef struct { + record_t record; + tape_t* tape; +} entry_t; + +char chartosymbol(unsigned id) { + // normalize id + id = id % 62; + + if (id < 26) { + return 'a' + id; + } else if (id < 52) { + return 'A' + id - 26; + } else if (id < 62) { + return '0' + id - 52; + } else { + return '?'; + } +} + +void identifier(unsigned id, char* string) { + // 26 letters 26 capitals 10 digits + const unsigned base = 26 + 26 + 10; + + unsigned reminder, i = 0; + do { + reminder = id % base; + string[i++] = chartosymbol(reminder); + } while (id /= base); + + string[i] = 0; +} + +tape_t* tape_tmp(unsigned id, const char* mode, const char* format) { + char tmpname[256], ident[20]; + + identifier(id, ident); + sprintf(tmpname, format, ident); + + return tape_open(tmpname, mode); +} + +void help(const char* name) { + printf( + "Sorts reocrds from tape.\n" + "Usage:\n" + "\t%s \n", + name + ); +} + +int compare_records(const void* a, const void* b) { + double result = record_compare(*(record_t*)a, *(record_t*)b); + return SGN(result); +} + +int compare_entries(const void* a, const void* b) { + const entry_t* lhs = a; + const entry_t* rhs = b; + + return compare_records(&lhs->record, &rhs->record); +} + +void save_sorted(tape_t* tape, record_t* buffer, size_t n) { + qsort(buffer, n, sizeof(record_t), compare_records); + + for (size_t j = 0; j < n; j++) { + tape_write(tape, buffer + j, sizeof(record_t)); + } +} + +size_t make_series(tape_t* in, size_t n) { + const size_t max = PAGE_SIZE / sizeof(record_t) * (n + 1); + + size_t series = 0, i = 0; + record_t* buffer = malloc(PAGE_SIZE * (n + 1)); + + tape_t* tmp; + while (tape_read(in, buffer + i++, sizeof(record_t)) > 0) { + if (i >= max) { + tmp = tape_tmp(series % n, series >= n ? "ab" : "wb", tmp_format); + save_sorted(tmp, buffer, i); + tape_close(tmp); + + i = 0; + series++; + } + } + + if (i > 1) { + tmp = tape_tmp(series % n, series >= n ? "ab" : "wb", tmp_format); + save_sorted(tmp, buffer, i - 1); + tape_close(tmp); + series++; + } + + free(buffer); + return series; +} + +unsigned join_tapes(tape_t** tapes, size_t n, tape_t* out) { + heap_t heap; + unsigned records = 0; + + heap_init(&heap, n, compare_entries); + + // initial distribution + for (unsigned i = 0; i < n; i++) { + record_t record; + + if (tape_read(tapes[i], &record, sizeof(record_t))) { + entry_t* entry = malloc(sizeof(entry_t)); + entry->tape = tapes[i]; + entry->record = record; + + heap_insert(&heap, entry); + records++; + } + } + + if (!records) { + return records; + } + + // merge into 1 tape + entry_t* current; + while ((current = heap_pop(&heap))) { + tape_write(out, ¤t->record, sizeof(record_t)); + record_t record; + record_t* result = tape_read(current->tape, &record, sizeof(record_t)); + + if (result) { + if (record_compare(record, current->record) >= 0) { + current->record = record; + heap_insert(&heap, current); + records++; + continue; + } else { + tape_rewind(current->tape, 1, sizeof(record_t)); + } + } + + free(current); + } + + heap_free(&heap); + + return records; +} + +void print_entry(void* e) +{ + entry_t* entry = e; + printf("%lf ", record_length(entry->record)); +} + +unsigned iteration(size_t i, size_t n) +{ + size_t in_offset = (i % 2) * n; + size_t out_offset = ((i+1) % 2) * n; + + unsigned series = 0; + + tape_t** in_tapes = malloc(n * sizeof(tape_t*)); + tape_t** out_tapes = malloc(n * sizeof(tape_t*)); + heap_t heap; + heap_init(&heap, n, compare_entries); + + for (unsigned i = 0; i < n; i++) { + in_tapes[i] = tape_tmp(in_offset + i, "rb", tmp_format); + out_tapes[i] = tape_tmp(out_offset + i, "wb", tmp_format); + } + + while (join_tapes(in_tapes, n, out_tapes[series % n]) > 0) { + series++; + } + + for (unsigned i = 0; i < n; i++) { + tape_close(in_tapes[i]); + tape_close(out_tapes[i]); + } + + free(in_tapes); + free(out_tapes); + + return series; +} + +int main(int argc, const char* argv[]) +{ + if (argc < 3) { + help(argv[0]); + return 0; + } + + size_t n = 100; + + tape_t* in = tape_open(argv[1], "rb"); + tape_t* out = tape_open(argv[2], "wb"); + + size_t series = make_series(in, n); + printf("Created %zu series.\n", series); + + unsigned i = 0; + while (series > n) { + printf("Iteration %u.\n", i); + series = iteration(i++, n); + } + + printf("Final iteration.\n"); + size_t offset = (i % 2) * n; + tape_t** in_tapes = malloc(series * sizeof(tape_t*)); + for (unsigned i = 0; i < series; i++) { + in_tapes[i] = tape_tmp(offset + i, "rb", tmp_format); + } + + join_tapes(in_tapes, series, out); + + printf("Sorted file %s into %s in %u iterations.\n", in->name, out->name, i + 1); + + tape_close(in); + tape_close(out); + + printf("%u IO R: %u W: %u\n", reads + writes, reads, writes); + + return 1; +} diff --git a/tape.c b/tape.c new file mode 100644 index 0000000..5453653 --- /dev/null +++ b/tape.c @@ -0,0 +1,157 @@ +#include "tape.h" +#include "common.h" +#include +#include +#include + +unsigned reads = 0; +unsigned writes = 0; + +void _tape_load_block(tape_t* tape, unsigned n); +void _tape_flush(tape_t* tape); + +tape_t* tape_open(const char* filename, const char* mode) +{ + FILE* file = fopen(filename, mode); + printfv(VERBOSITY_VERBOSE, "Opening tape %s in mode %s.\n", filename, mode); + + if (!file) { + fprintf(stderr, "Cannot open %s in %s mode.", filename, mode); + return NULL; + } + + tape_t* tape = malloc(sizeof(tape_t)); + + tape->file = file; + tape->mode = mode; + tape->name = malloc(strlen(filename)); + tape->buffer = malloc(PAGE_SIZE); + strcpy(tape->name, filename); + + if (strcmp(mode, TAPE_READ) == 0) { + tape->read = 0; + tape->offset = 0; + tape->block = -1; + } else if (strcmp(mode, TAPE_WRITE) == 0) { + tape->read = 0; + tape->offset = 0; + tape->block = 0; + } else if (strcmp(mode, TAPE_APPEND) == 0) { + size_t pos = ftell(tape->file); + + tape->offset = pos % PAGE_SIZE; + tape->block = pos / PAGE_SIZE; + + _tape_load_block(tape, tape->block); + } else { + fprintf(stderr, "Mode %s is unknown for tapes.", mode); + } + + return tape; +} + +void tape_close(tape_t* tape) +{ + printfv(VERBOSITY_VERBOSE, "Closing tape %s.\n", tape->name); + if (strcmp(tape->mode, TAPE_WRITE) == 0 || strcmp(tape->mode, TAPE_APPEND) == 0) { + _tape_flush(tape); + } + + fclose(tape->file); + free(tape->name); + free(tape->buffer); + free(tape); +} + +void _tape_load_block(tape_t* tape, unsigned n) +{ + printfv(VERBOSITY_DEBUG, "Loading block %u of %s.\n", n, tape->name); + fseek(tape->file, n * PAGE_SIZE, SEEK_SET); + + tape->offset = 0; + tape->read = fread(tape->buffer, 1, PAGE_SIZE, tape->file); + tape->block = n; + + reads++; +} + +void _tape_flush(tape_t* tape) +{ + printfv(VERBOSITY_DEBUG, "Flushing block %d (%u bytes) of %s.\n", tape->block, tape->offset, tape->name); + fseek(tape->file, tape->block * PAGE_SIZE, SEEK_SET); + + fwrite(tape->buffer, 1, tape->offset, tape->file); + tape->offset = 0; + tape->block++; + + writes++; +} + +void* tape_read(tape_t* tape, void* record, size_t size) +{ + if (tape->offset >= tape->read) { + // load next block + _tape_load_block(tape, tape->block + 1); + } + + if (tape->offset + size > tape->read && feof(tape->file)) { + return NULL; + } + + if (tape->offset + size > tape->read) { + size_t rest = tape->read - tape->offset; + memcpy(record, tape->buffer + tape->offset, rest); + _tape_load_block(tape, tape->block + 1); + memcpy(record + rest, tape->buffer + tape->offset, size - rest); + + // move tape offset + tape->offset += size - rest; + } else { + memcpy(record, tape->buffer + tape->offset, size); + tape->offset += size; + } + + return record; +} + +int tape_rewind(tape_t* tape, size_t n, size_t size) +{ + int bytes = n * size; + + int take = tape->offset < bytes ? tape->offset : bytes; + + bytes -= take; + tape->offset -= take; + + while (bytes > 0) { + _tape_load_block(tape, tape->block - 1); + bytes -= PAGE_SIZE; + } + + tape->offset -= bytes; + return tape->block; +} + +int tape_write(tape_t* tape, void* record, size_t size) +{ + size_t written = 0; + while (tape->offset + size > PAGE_SIZE) { + size_t available = PAGE_SIZE - tape->offset; + + memcpy(tape->buffer + tape->offset, record + written, available); + + tape->offset += available; + size -= available; + written += available; + + _tape_flush(tape); + } + + if (size) { + memcpy(tape->buffer + tape->offset, record + written, size); + tape->offset += size; + written += size; + } + + return written; +} diff --git a/tape.h b/tape.h new file mode 100644 index 0000000..f493d2a --- /dev/null +++ b/tape.h @@ -0,0 +1,36 @@ +#ifndef TAPE_H_ +#define TAPE_H_ + +#include +#include "record.h" + +#define PAGE_SIZE 4096 + +#define TAPE_READ "rb" +#define TAPE_WRITE "wb" +#define TAPE_APPEND "ab" + +extern unsigned reads; +extern unsigned writes; + +typedef struct { + FILE* file; + char* name; + const char* mode; + + void* buffer; + + unsigned offset; + size_t read; + int block; +} tape_t; + +tape_t* tape_open(const char* filename, const char* mode); +void tape_close(tape_t* tape); + +void* tape_read(tape_t* tape, void* record, size_t size); +int tape_write(tape_t* tape, void* record, size_t size); + +int tape_rewind(tape_t* tape, size_t count, size_t size); + +#endif