commit bea9f4b3bbf11398119fb53481357f4fe0c545b6 Author: Kacper Donat Date: Sun Oct 21 13:04:41 2018 +0200 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3610c07 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +* +!*.c +!*.h +!Makefile +!.gitignore diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..6431be6 --- /dev/null +++ b/Makefile @@ -0,0 +1,19 @@ +CC=gcc +CFLAGS=-Wall -O0 -g +LDFLAGS=-lm + +all: appender reader generator sorter + +appender: appender.o tape.o record.o common.o +reader: reader.o tape.o record.o common.o +generator: generator.o +sorter: sorter.o tape.o record.o heap.o common.o + +-include $(wildcard *.d) + +.c.o: + $(CC) $(CFLAGS) -MMD -c $< -o $@ + +clean: + rm *.o + rm *.d diff --git a/appender.c b/appender.c new file mode 100644 index 0000000..37311f3 --- /dev/null +++ b/appender.c @@ -0,0 +1,67 @@ +#include +#include "tape.h" +#include "record.h" +#include "common.h" + +#define WANT_IOSTAT 1 + +char* filename; +char flags; + +void init_args(int args, char* argv[]) +{ + optparse_t options; + optparse_init(&options, argv); + + for (char opt; opt != -1; opt = optparse(&options, "qvi")) { + switch (opt) { + case 'q': + verbosity--; + break; + case 'v': + verbosity++; + break; + case 'i': + flags |= WANT_IOSTAT; + } + } + + filename = optparse_arg(&options); +} + +void help(const char* name) { + printf( + "Append record to given tape at the end.\n" + "Usage:\n" + "\t%s [options] \n", + name + ); +} + +int main(int argc, const char* argv[]) +{ + if (argc < 2) { + help(argv[0]); + return 0; + } + + tape_t* tape = tape_open(argv[1], TAPE_APPEND); + + record_t record; + size_t records = 0; + + while (scanf("%lf %lf", &record.x, &record.y) > 0) { + tape_write(tape, &record, sizeof(record_t)); + printfv(VERBOSITY_NORMAL, "Record created: r = (%lf, %lf) |r| = %lf\n", record.x, record.y, record_length(record)); + records++; + } + + printfv(VERBOSITY_NORMAL, "Appended %zu record to %s.\n", records, argv[1]); + + if (flags & WANT_IOSTAT) { + iostats(); + } + + tape_close(tape); + return 1; +} diff --git a/common.c b/common.c new file mode 100644 index 0000000..b3152ad --- /dev/null +++ b/common.c @@ -0,0 +1,22 @@ +#include "common.h" +#include "tape.h" +#include + +verbosity_t verbosity = VERBOSITY_NORMAL; + +void printfv(verbosity_t level, const char* format, ...) +{ + va_list args; + va_start(args, format); + + if (verbosity >= level) { + vprintf(format, args); + } + + va_end(args); +} + +void iostats() +{ + printfv(VERBOSITY_QUIET, "IO R: %u W: %u\n", reads, writes); +} diff --git a/common.h b/common.h new file mode 100644 index 0000000..36aca66 --- /dev/null +++ b/common.h @@ -0,0 +1,22 @@ +#ifndef COMMON_H_ +#define COMMON_H_ + +#define VERBOSITY_QUIET -1 +#define VERBOSITY_NORMAL 0 +#define VERBOSITY_VERBOSE 1 +#define VERBOSITY_DEBUG 2 + +#include +#define OPTPARSE_IMPLEMENTATION +#define OPTPARSE_API static +#include "optparse.h" + +typedef char verbosity_t; +typedef struct optparse optparse_t; + +extern verbosity_t verbosity; + +void printfv(verbosity_t verbosity, const char* format, ...); +void iostats(); + +#endif diff --git a/generator.c b/generator.c new file mode 100644 index 0000000..e32098c --- /dev/null +++ b/generator.c @@ -0,0 +1,28 @@ +#include +#include +#include +#include + +double range_rand(double from, double to) +{ + return (double)rand() / RAND_MAX * (to - from) + from; +} + +int main(int argc, const char* argv[]) +{ + if (argc < 4) { + printf("usage: %s min max count\n", argv[0]); + return -1; + } + + double from = atof(argv[1]); + double to = atof(argv[2]); + int count = atoi(argv[3]); + + srand(time(0)); + while (count--) { + printf("%lf %lf\n", range_rand(from, to), range_rand(from, to)); + } + + return 1; +} diff --git a/heap.c b/heap.c new file mode 100644 index 0000000..b833abf --- /dev/null +++ b/heap.c @@ -0,0 +1,88 @@ +#include "heap.h" +#include +#define SWAP(type, x, y) do {type tmp = x; x = y; y = tmp;} while(0) + +unsigned heap_left(unsigned node) { return 2*node + 1; } +unsigned heap_right(unsigned node) { return 2*(node + 1); } +unsigned heap_parent(unsigned node) { return (node - 1) / 2; } + +void heap_init(heap_t* heap, size_t max, int (*compare)(const void* a, const void* b)) +{ + heap->records = malloc(max * sizeof(void*)); + heap->max = max; + heap->current = 0; + heap->compare = compare; +} + +void heap_free(heap_t* heap) +{ + free(heap->records); +} + +void* heap_min(heap_t* heap) +{ + if (!heap->current) { + return NULL; + } + + return heap->records[0]; +} + +void* heap_pop(heap_t* heap) { + void* result = heap_min(heap); + + if (result) { + heap_remove(heap, 0); + } + + return result; +} + +void heap_insert(heap_t* heap, void* record) +{ + unsigned pos = heap->current++; + unsigned parent; + + heap->records[pos] = record; + + while (pos && heap->compare(record, heap->records[parent = heap_parent(pos)]) < 0) { + SWAP(void*, heap->records[pos], heap->records[parent]); + pos = parent; + } +} + +void heap_remove(heap_t* heap, unsigned pos) +{ + heap->records[pos] = heap->records[--heap->current]; + while (1) { + unsigned left = heap_left(pos); + unsigned right = heap_right(pos); + + unsigned compared = left; + + if (left >= heap->current) { + break; + } + + if (right < heap->current) { + compared = heap->compare(heap->records[left], heap->records[right]) < 0 ? left : right; + } + + if (heap->compare(heap->records[compared], heap->records[pos]) < 0) { + SWAP(void*, heap->records[compared], heap->records[pos]); + pos = compared; + } else { + break; + } + } +} + +void heap_print(heap_t* heap, void (*print)(void* record)) +{ + printf("[ "); + for (unsigned i = 0; i < heap->current; i++) { + void* current = heap->records[i]; + print(current); + } + printf("]\n"); +} diff --git a/heap.h b/heap.h new file mode 100644 index 0000000..807b17e --- /dev/null +++ b/heap.h @@ -0,0 +1,31 @@ +#ifndef HEAP_H +#define HEAP_H + +#include "record.h" +#include + +typedef struct { + void** records; + + size_t max; + size_t current; + + int (*compare)(const void* a, const void* b); +} heap_t; + +unsigned heap_left(unsigned node); +unsigned heap_right(unsigned node); +unsigned heap_parent(unsigned node); + +void heap_init(heap_t* heap, size_t size, int (*compare)(const void* a, const void *b)); +void heap_free(heap_t* heap); + +void heap_insert(heap_t* heap, void* record); +void heap_remove(heap_t* heap, unsigned n); + +void* heap_min(heap_t* heap); +void* heap_pop(heap_t* heap); + +void heap_print(heap_t* heap, void (*print)(void* record)); + +#endif diff --git a/optparse.h b/optparse.h new file mode 100644 index 0000000..3a577a7 --- /dev/null +++ b/optparse.h @@ -0,0 +1,403 @@ +/* Optparse --- portable, reentrant, embeddable, getopt-like option parser + * + * This is free and unencumbered software released into the public domain. + * + * To get the implementation, define OPTPARSE_IMPLEMENTATION. + * Optionally define OPTPARSE_API to control the API's visibility + * and/or linkage (static, __attribute__, __declspec). + * + * The POSIX getopt() option parser has three fatal flaws. These flaws + * are solved by Optparse. + * + * 1) Parser state is stored entirely in global variables, some of + * which are static and inaccessible. This means only one thread can + * use getopt(). It also means it's not possible to recursively parse + * nested sub-arguments while in the middle of argument parsing. + * Optparse fixes this by storing all state on a local struct. + * + * 2) The POSIX standard provides no way to properly reset the parser. + * This means for portable code that getopt() is only good for one + * run, over one argv with one option string. It also means subcommand + * options cannot be processed with getopt(). Most implementations + * provide a method to reset the parser, but it's not portable. + * Optparse provides an optparse_arg() function for stepping over + * subcommands and continuing parsing of options with another option + * string. The Optparse struct itself can be passed around to + * subcommand handlers for additional subcommand option parsing. A + * full reset can be achieved by with an additional optparse_init(). + * + * 3) Error messages are printed to stderr. This can be disabled with + * opterr, but the messages themselves are still inaccessible. + * Optparse solves this by writing an error message in its errmsg + * field. The downside to Optparse is that this error message will + * always be in English rather than the current locale. + * + * Optparse should be familiar with anyone accustomed to getopt(), and + * it could be a nearly drop-in replacement. The option string is the + * same and the fields have the same names as the getopt() global + * variables (optarg, optind, optopt). + * + * Optparse also supports GNU-style long options with optparse_long(). + * The interface is slightly different and simpler than getopt_long(). + * + * By default, argv is permuted as it is parsed, moving non-option + * arguments to the end. This can be disabled by setting the `permute` + * field to 0 after initialization. + */ +#ifndef OPTPARSE_H +#define OPTPARSE_H + +#ifndef OPTPARSE_API +# define OPTPARSE_API +#endif + +struct optparse { + char **argv; + int permute; + int optind; + int optopt; + char *optarg; + char errmsg[64]; + int subopt; +}; + +enum optparse_argtype { + OPTPARSE_NONE, + OPTPARSE_REQUIRED, + OPTPARSE_OPTIONAL +}; + +struct optparse_long { + const char *longname; + int shortname; + enum optparse_argtype argtype; +}; + +/** + * Initializes the parser state. + */ +OPTPARSE_API +void optparse_init(struct optparse *options, char **argv); + +/** + * Read the next option in the argv array. + * @param optstring a getopt()-formatted option string. + * @return the next option character, -1 for done, or '?' for error + * + * Just like getopt(), a character followed by no colons means no + * argument. One colon means the option has a required argument. Two + * colons means the option takes an optional argument. + */ +OPTPARSE_API +int optparse(struct optparse *options, const char *optstring); + +/** + * Handles GNU-style long options in addition to getopt() options. + * This works a lot like GNU's getopt_long(). The last option in + * longopts must be all zeros, marking the end of the array. The + * longindex argument may be NULL. + */ +OPTPARSE_API +int optparse_long(struct optparse *options, + const struct optparse_long *longopts, + int *longindex); + +/** + * Used for stepping over non-option arguments. + * @return the next non-option argument, or NULL for no more arguments + * + * Argument parsing can continue with optparse() after using this + * function. That would be used to parse the options for the + * subcommand returned by optparse_arg(). This function allows you to + * ignore the value of optind. + */ +OPTPARSE_API +char *optparse_arg(struct optparse *options); + +/* Implementation */ +#ifdef OPTPARSE_IMPLEMENTATION + +#define OPTPARSE_MSG_INVALID "invalid option" +#define OPTPARSE_MSG_MISSING "option requires an argument" +#define OPTPARSE_MSG_TOOMANY "option takes no arguments" + +static int +optparse_error(struct optparse *options, const char *msg, const char *data) +{ + unsigned p = 0; + const char *sep = " -- '"; + while (*msg) + options->errmsg[p++] = *msg++; + while (*sep) + options->errmsg[p++] = *sep++; + while (p < sizeof(options->errmsg) - 2 && *data) + options->errmsg[p++] = *data++; + options->errmsg[p++] = '\''; + options->errmsg[p++] = '\0'; + return '?'; +} + +OPTPARSE_API +void +optparse_init(struct optparse *options, char **argv) +{ + options->argv = argv; + options->permute = 1; + options->optind = 1; + options->subopt = 0; + options->optarg = 0; + options->errmsg[0] = '\0'; +} + +static int +optparse_is_dashdash(const char *arg) +{ + return arg != 0 && arg[0] == '-' && arg[1] == '-' && arg[2] == '\0'; +} + +static int +optparse_is_shortopt(const char *arg) +{ + return arg != 0 && arg[0] == '-' && arg[1] != '-' && arg[1] != '\0'; +} + +static int +optparse_is_longopt(const char *arg) +{ + return arg != 0 && arg[0] == '-' && arg[1] == '-' && arg[2] != '\0'; +} + +static void +optparse_permute(struct optparse *options, int index) +{ + char *nonoption = options->argv[index]; + int i; + for (i = index; i < options->optind - 1; i++) + options->argv[i] = options->argv[i + 1]; + options->argv[options->optind - 1] = nonoption; +} + +static int +optparse_argtype(const char *optstring, char c) +{ + int count = OPTPARSE_NONE; + if (c == ':') + return -1; + for (; *optstring && c != *optstring; optstring++); + if (!*optstring) + return -1; + if (optstring[1] == ':') + count += optstring[2] == ':' ? 2 : 1; + return count; +} + +OPTPARSE_API +int +optparse(struct optparse *options, const char *optstring) +{ + int type; + char *next; + char *option = options->argv[options->optind]; + options->errmsg[0] = '\0'; + options->optopt = 0; + options->optarg = 0; + if (option == 0) { + return -1; + } else if (optparse_is_dashdash(option)) { + options->optind++; /* consume "--" */ + return -1; + } else if (!optparse_is_shortopt(option)) { + if (options->permute) { + int index = options->optind++; + int r = optparse(options, optstring); + optparse_permute(options, index); + options->optind--; + return r; + } else { + return -1; + } + } + option += options->subopt + 1; + options->optopt = option[0]; + type = optparse_argtype(optstring, option[0]); + next = options->argv[options->optind + 1]; + switch (type) { + case -1: { + char str[2] = {0, 0}; + str[0] = option[0]; + options->optind++; + return optparse_error(options, OPTPARSE_MSG_INVALID, str); + } + case OPTPARSE_NONE: + if (option[1]) { + options->subopt++; + } else { + options->subopt = 0; + options->optind++; + } + return option[0]; + case OPTPARSE_REQUIRED: + options->subopt = 0; + options->optind++; + if (option[1]) { + options->optarg = option + 1; + } else if (next != 0) { + options->optarg = next; + options->optind++; + } else { + char str[2] = {0, 0}; + str[0] = option[0]; + options->optarg = 0; + return optparse_error(options, OPTPARSE_MSG_MISSING, str); + } + return option[0]; + case OPTPARSE_OPTIONAL: + options->subopt = 0; + options->optind++; + if (option[1]) + options->optarg = option + 1; + else + options->optarg = 0; + return option[0]; + } + return 0; +} + +OPTPARSE_API +char * +optparse_arg(struct optparse *options) +{ + char *option = options->argv[options->optind]; + options->subopt = 0; + if (option != 0) + options->optind++; + return option; +} + +static int +optparse_longopts_end(const struct optparse_long *longopts, int i) +{ + return !longopts[i].longname && !longopts[i].shortname; +} + +static void +optparse_from_long(const struct optparse_long *longopts, char *optstring) +{ + char *p = optstring; + int i; + for (i = 0; !optparse_longopts_end(longopts, i); i++) { + if (longopts[i].shortname) { + int a; + *p++ = longopts[i].shortname; + for (a = 0; a < (int)longopts[i].argtype; a++) + *p++ = ':'; + } + } + *p = '\0'; +} + +/* Unlike strcmp(), handles options containing "=". */ +static int +optparse_longopts_match(const char *longname, const char *option) +{ + const char *a = option, *n = longname; + if (longname == 0) + return 0; + for (; *a && *n && *a != '='; a++, n++) + if (*a != *n) + return 0; + return *n == '\0' && (*a == '\0' || *a == '='); +} + +/* Return the part after "=", or NULL. */ +static char * +optparse_longopts_arg(char *option) +{ + for (; *option && *option != '='; option++); + if (*option == '=') + return option + 1; + else + return 0; +} + +static int +optparse_long_fallback(struct optparse *options, + const struct optparse_long *longopts, + int *longindex) +{ + int result; + char optstring[96 * 3 + 1]; /* 96 ASCII printable characters */ + optparse_from_long(longopts, optstring); + result = optparse(options, optstring); + if (longindex != 0) { + *longindex = -1; + if (result != -1) { + int i; + for (i = 0; !optparse_longopts_end(longopts, i); i++) + if (longopts[i].shortname == options->optopt) + *longindex = i; + } + } + return result; +} + +OPTPARSE_API +int +optparse_long(struct optparse *options, + const struct optparse_long *longopts, + int *longindex) +{ + int i; + char *option = options->argv[options->optind]; + if (option == 0) { + return -1; + } else if (optparse_is_dashdash(option)) { + options->optind++; /* consume "--" */ + return -1; + } else if (optparse_is_shortopt(option)) { + return optparse_long_fallback(options, longopts, longindex); + } else if (!optparse_is_longopt(option)) { + if (options->permute) { + int index = options->optind++; + int r = optparse_long(options, longopts, longindex); + optparse_permute(options, index); + options->optind--; + return r; + } else { + return -1; + } + } + + /* Parse as long option. */ + options->errmsg[0] = '\0'; + options->optopt = 0; + options->optarg = 0; + option += 2; /* skip "--" */ + options->optind++; + for (i = 0; !optparse_longopts_end(longopts, i); i++) { + const char *name = longopts[i].longname; + if (optparse_longopts_match(name, option)) { + char *arg; + if (longindex) + *longindex = i; + options->optopt = longopts[i].shortname; + arg = optparse_longopts_arg(option); + if (longopts[i].argtype == OPTPARSE_NONE && arg != 0) { + return optparse_error(options, OPTPARSE_MSG_TOOMANY, name); + } if (arg != 0) { + options->optarg = arg; + } else if (longopts[i].argtype == OPTPARSE_REQUIRED) { + options->optarg = options->argv[options->optind]; + if (options->optarg == 0) + return optparse_error(options, OPTPARSE_MSG_MISSING, name); + else + options->optind++; + } + return options->optopt; + } + } + return optparse_error(options, OPTPARSE_MSG_INVALID, option); +} + +#endif /* OPTPARSE_IMPLEMENTATION */ +#endif /* OPTPARSE_H */ diff --git a/reader.c b/reader.c new file mode 100644 index 0000000..bbe9342 --- /dev/null +++ b/reader.c @@ -0,0 +1,100 @@ +#include +#include "tape.h" +#include "record.h" +#include "common.h" + +#define WANT_SUMMARY 1 +#define WANT_IOSTAT 2 +#define WANT_EXPORT 4 + +char* filename; +char flags; + +void help(const char* name) +{ + printf( + "Reads records from tape.\n" + "Usage:\n" + "\t%s [options] \n" + "Options:\n" + "\t-q|v|vv - verbosity level, q for quiet, v for vervose vv for debug\n" + "\t-s - summary of records (R) and runs (S)\n" + "\t-i - summary IO stats\n" + "\t-e - data in export format\n" + , name + ); +} + +void init_args(int args, char* argv[]) +{ + optparse_t options; + optparse_init(&options, argv); + + for (char opt; opt != -1; opt = optparse(&options, "qvise")) { + switch (opt) { + case 'q': + verbosity--; + break; + case 'v': + verbosity++; + break; + case 'i': + flags |= WANT_IOSTAT; + break; + case 'e': + flags |= WANT_EXPORT; + break; + case 's': + flags |= WANT_SUMMARY; + break; + } + } + + filename = optparse_arg(&options); +} + +int main(int argc, char* argv[]) +{ + if (argc < 2) { + help(argv[0]); + return 0; + } + + init_args(argc, argv); + + tape_t* tape = tape_open(filename, "rb"); + + record_t record, last = { 0.0, 0.0 }; + size_t records = 0, series = 1, current = 0;; + + while (tape_read(tape, &record, sizeof(record_t)) > 0) { + if (record_compare(last, record) > 0) { + printfv(VERBOSITY_VERBOSE, "R: %zu -------------------------------------\n", current); + series++; + current = 0; + } + + if (flags & WANT_EXPORT) { + printfv(VERBOSITY_NORMAL, "%lf %lf", record.x, record.y); + } else { + printfv(VERBOSITY_NORMAL, "|r| = %lf r = (%lf, %lf)\n", record_length(record), record.x, record.y); + } + + records++; + current++; + + last = record; + } + + tape_close(tape); + + if (flags & WANT_SUMMARY) { + printf("R: %zu S: %zu\n", records, series); + } + + if (flags & WANT_IOSTAT) { + iostats(); + } + + return 1; +} diff --git a/record.c b/record.c new file mode 100644 index 0000000..0ff72d8 --- /dev/null +++ b/record.c @@ -0,0 +1,12 @@ +#include "record.h" +#include + +double record_length(const record_t a) +{ + return sqrt(a.x * a.x + a.y * a.y); +} + +double record_compare(const record_t a, const record_t b) +{ + return record_length(a) - record_length(b); +} diff --git a/record.h b/record.h new file mode 100644 index 0000000..eace15f --- /dev/null +++ b/record.h @@ -0,0 +1,13 @@ +#ifndef RECORD_H_ +#define RECORD_H_ + +typedef struct { + double x; + double y; +} record_t; + + +double record_length(const record_t a); +double record_compare(const record_t a, const record_t b); + +#endif diff --git a/sorter.c b/sorter.c new file mode 100644 index 0000000..3badc9f --- /dev/null +++ b/sorter.c @@ -0,0 +1,238 @@ +#include +#include +#include "tape.h" +#include "record.h" +#include "heap.h" +#include "common.h" + +#define SGN(x) ((x > 0) - (x < 0)) + +char* tmp_format = "tmp.%s.tape"; + +typedef struct { + record_t record; + tape_t* tape; +} entry_t; + +char chartosymbol(unsigned id) { + // normalize id + id = id % 62; + + if (id < 26) { + return 'a' + id; + } else if (id < 52) { + return 'A' + id - 26; + } else if (id < 62) { + return '0' + id - 52; + } else { + return '?'; + } +} + +void identifier(unsigned id, char* string) { + // 26 letters 26 capitals 10 digits + const unsigned base = 26 + 26 + 10; + + unsigned reminder, i = 0; + do { + reminder = id % base; + string[i++] = chartosymbol(reminder); + } while (id /= base); + + string[i] = 0; +} + +tape_t* tape_tmp(unsigned id, const char* mode, const char* format) { + char tmpname[256], ident[20]; + + identifier(id, ident); + sprintf(tmpname, format, ident); + + return tape_open(tmpname, mode); +} + +void help(const char* name) { + printf( + "Sorts reocrds from tape.\n" + "Usage:\n" + "\t%s \n", + name + ); +} + +int compare_records(const void* a, const void* b) { + double result = record_compare(*(record_t*)a, *(record_t*)b); + return SGN(result); +} + +int compare_entries(const void* a, const void* b) { + const entry_t* lhs = a; + const entry_t* rhs = b; + + return compare_records(&lhs->record, &rhs->record); +} + +void save_sorted(tape_t* tape, record_t* buffer, size_t n) { + qsort(buffer, n, sizeof(record_t), compare_records); + + for (size_t j = 0; j < n; j++) { + tape_write(tape, buffer + j, sizeof(record_t)); + } +} + +size_t make_series(tape_t* in, size_t n) { + const size_t max = PAGE_SIZE / sizeof(record_t) * (n + 1); + + size_t series = 0, i = 0; + record_t* buffer = malloc(PAGE_SIZE * (n + 1)); + + tape_t* tmp; + while (tape_read(in, buffer + i++, sizeof(record_t)) > 0) { + if (i >= max) { + tmp = tape_tmp(series % n, series >= n ? "ab" : "wb", tmp_format); + save_sorted(tmp, buffer, i); + tape_close(tmp); + + i = 0; + series++; + } + } + + if (i > 1) { + tmp = tape_tmp(series % n, series >= n ? "ab" : "wb", tmp_format); + save_sorted(tmp, buffer, i - 1); + tape_close(tmp); + series++; + } + + free(buffer); + return series; +} + +unsigned join_tapes(tape_t** tapes, size_t n, tape_t* out) { + heap_t heap; + unsigned records = 0; + + heap_init(&heap, n, compare_entries); + + // initial distribution + for (unsigned i = 0; i < n; i++) { + record_t record; + + if (tape_read(tapes[i], &record, sizeof(record_t))) { + entry_t* entry = malloc(sizeof(entry_t)); + entry->tape = tapes[i]; + entry->record = record; + + heap_insert(&heap, entry); + records++; + } + } + + if (!records) { + return records; + } + + // merge into 1 tape + entry_t* current; + while ((current = heap_pop(&heap))) { + tape_write(out, ¤t->record, sizeof(record_t)); + record_t record; + record_t* result = tape_read(current->tape, &record, sizeof(record_t)); + + if (result) { + if (record_compare(record, current->record) >= 0) { + current->record = record; + heap_insert(&heap, current); + records++; + continue; + } else { + tape_rewind(current->tape, 1, sizeof(record_t)); + } + } + + free(current); + } + + heap_free(&heap); + + return records; +} + +void print_entry(void* e) +{ + entry_t* entry = e; + printf("%lf ", record_length(entry->record)); +} + +unsigned iteration(size_t i, size_t n) +{ + size_t in_offset = (i % 2) * n; + size_t out_offset = ((i+1) % 2) * n; + + unsigned series = 0; + + tape_t** in_tapes = malloc(n * sizeof(tape_t*)); + tape_t** out_tapes = malloc(n * sizeof(tape_t*)); + heap_t heap; + heap_init(&heap, n, compare_entries); + + for (unsigned i = 0; i < n; i++) { + in_tapes[i] = tape_tmp(in_offset + i, "rb", tmp_format); + out_tapes[i] = tape_tmp(out_offset + i, "wb", tmp_format); + } + + while (join_tapes(in_tapes, n, out_tapes[series % n]) > 0) { + series++; + } + + for (unsigned i = 0; i < n; i++) { + tape_close(in_tapes[i]); + tape_close(out_tapes[i]); + } + + free(in_tapes); + free(out_tapes); + + return series; +} + +int main(int argc, const char* argv[]) +{ + if (argc < 3) { + help(argv[0]); + return 0; + } + + size_t n = 100; + + tape_t* in = tape_open(argv[1], "rb"); + tape_t* out = tape_open(argv[2], "wb"); + + size_t series = make_series(in, n); + printf("Created %zu series.\n", series); + + unsigned i = 0; + while (series > n) { + printf("Iteration %u.\n", i); + series = iteration(i++, n); + } + + printf("Final iteration.\n"); + size_t offset = (i % 2) * n; + tape_t** in_tapes = malloc(series * sizeof(tape_t*)); + for (unsigned i = 0; i < series; i++) { + in_tapes[i] = tape_tmp(offset + i, "rb", tmp_format); + } + + join_tapes(in_tapes, series, out); + + printf("Sorted file %s into %s in %u iterations.\n", in->name, out->name, i + 1); + + tape_close(in); + tape_close(out); + + printf("%u IO R: %u W: %u\n", reads + writes, reads, writes); + + return 1; +} diff --git a/tape.c b/tape.c new file mode 100644 index 0000000..5453653 --- /dev/null +++ b/tape.c @@ -0,0 +1,157 @@ +#include "tape.h" +#include "common.h" +#include +#include +#include + +unsigned reads = 0; +unsigned writes = 0; + +void _tape_load_block(tape_t* tape, unsigned n); +void _tape_flush(tape_t* tape); + +tape_t* tape_open(const char* filename, const char* mode) +{ + FILE* file = fopen(filename, mode); + printfv(VERBOSITY_VERBOSE, "Opening tape %s in mode %s.\n", filename, mode); + + if (!file) { + fprintf(stderr, "Cannot open %s in %s mode.", filename, mode); + return NULL; + } + + tape_t* tape = malloc(sizeof(tape_t)); + + tape->file = file; + tape->mode = mode; + tape->name = malloc(strlen(filename)); + tape->buffer = malloc(PAGE_SIZE); + strcpy(tape->name, filename); + + if (strcmp(mode, TAPE_READ) == 0) { + tape->read = 0; + tape->offset = 0; + tape->block = -1; + } else if (strcmp(mode, TAPE_WRITE) == 0) { + tape->read = 0; + tape->offset = 0; + tape->block = 0; + } else if (strcmp(mode, TAPE_APPEND) == 0) { + size_t pos = ftell(tape->file); + + tape->offset = pos % PAGE_SIZE; + tape->block = pos / PAGE_SIZE; + + _tape_load_block(tape, tape->block); + } else { + fprintf(stderr, "Mode %s is unknown for tapes.", mode); + } + + return tape; +} + +void tape_close(tape_t* tape) +{ + printfv(VERBOSITY_VERBOSE, "Closing tape %s.\n", tape->name); + if (strcmp(tape->mode, TAPE_WRITE) == 0 || strcmp(tape->mode, TAPE_APPEND) == 0) { + _tape_flush(tape); + } + + fclose(tape->file); + free(tape->name); + free(tape->buffer); + free(tape); +} + +void _tape_load_block(tape_t* tape, unsigned n) +{ + printfv(VERBOSITY_DEBUG, "Loading block %u of %s.\n", n, tape->name); + fseek(tape->file, n * PAGE_SIZE, SEEK_SET); + + tape->offset = 0; + tape->read = fread(tape->buffer, 1, PAGE_SIZE, tape->file); + tape->block = n; + + reads++; +} + +void _tape_flush(tape_t* tape) +{ + printfv(VERBOSITY_DEBUG, "Flushing block %d (%u bytes) of %s.\n", tape->block, tape->offset, tape->name); + fseek(tape->file, tape->block * PAGE_SIZE, SEEK_SET); + + fwrite(tape->buffer, 1, tape->offset, tape->file); + tape->offset = 0; + tape->block++; + + writes++; +} + +void* tape_read(tape_t* tape, void* record, size_t size) +{ + if (tape->offset >= tape->read) { + // load next block + _tape_load_block(tape, tape->block + 1); + } + + if (tape->offset + size > tape->read && feof(tape->file)) { + return NULL; + } + + if (tape->offset + size > tape->read) { + size_t rest = tape->read - tape->offset; + memcpy(record, tape->buffer + tape->offset, rest); + _tape_load_block(tape, tape->block + 1); + memcpy(record + rest, tape->buffer + tape->offset, size - rest); + + // move tape offset + tape->offset += size - rest; + } else { + memcpy(record, tape->buffer + tape->offset, size); + tape->offset += size; + } + + return record; +} + +int tape_rewind(tape_t* tape, size_t n, size_t size) +{ + int bytes = n * size; + + int take = tape->offset < bytes ? tape->offset : bytes; + + bytes -= take; + tape->offset -= take; + + while (bytes > 0) { + _tape_load_block(tape, tape->block - 1); + bytes -= PAGE_SIZE; + } + + tape->offset -= bytes; + return tape->block; +} + +int tape_write(tape_t* tape, void* record, size_t size) +{ + size_t written = 0; + while (tape->offset + size > PAGE_SIZE) { + size_t available = PAGE_SIZE - tape->offset; + + memcpy(tape->buffer + tape->offset, record + written, available); + + tape->offset += available; + size -= available; + written += available; + + _tape_flush(tape); + } + + if (size) { + memcpy(tape->buffer + tape->offset, record + written, size); + tape->offset += size; + written += size; + } + + return written; +} diff --git a/tape.h b/tape.h new file mode 100644 index 0000000..f493d2a --- /dev/null +++ b/tape.h @@ -0,0 +1,36 @@ +#ifndef TAPE_H_ +#define TAPE_H_ + +#include +#include "record.h" + +#define PAGE_SIZE 4096 + +#define TAPE_READ "rb" +#define TAPE_WRITE "wb" +#define TAPE_APPEND "ab" + +extern unsigned reads; +extern unsigned writes; + +typedef struct { + FILE* file; + char* name; + const char* mode; + + void* buffer; + + unsigned offset; + size_t read; + int block; +} tape_t; + +tape_t* tape_open(const char* filename, const char* mode); +void tape_close(tape_t* tape); + +void* tape_read(tape_t* tape, void* record, size_t size); +int tape_write(tape_t* tape, void* record, size_t size); + +int tape_rewind(tape_t* tape, size_t count, size_t size); + +#endif