SBDP01/sorter.c
2018-10-21 13:51:28 +02:00

292 lines
7.0 KiB
C

#include <math.h>
#include <stdlib.h>
#include "tape.h"
#include "record.h"
#include "heap.h"
#include "common.h"
#define SGN(x) ((x > 0) - (x < 0))
#define WANT_SUMMARY 1
#define WANT_IOSTAT 2
#define WANT_STOP 4
char *in_filename, *out_filename;
char flags;
char *tmp_format = "tmp.%s.tape";
size_t n = 100;
typedef struct {
record_t record;
tape_t* tape;
} entry_t;
void init_args(int args, char* argv[])
{
optparse_t options;
optparse_init(&options, argv);
for (char opt; opt != -1; opt = optparse(&options, "qvipsn:t:")) {
switch (opt) {
case 'q':
verbosity--;
break;
case 'v':
verbosity++;
break;
case 'i':
flags |= WANT_IOSTAT;
break;
case 's':
flags |= WANT_SUMMARY;
break;
case 'p':
flags |= WANT_STOP;
break;
case 'n':
sscanf(options.optarg, "%zu", &n);
break;
case 't':
tmp_format = options.optarg;
break;
}
}
in_filename = optparse_arg(&options);
out_filename = optparse_arg(&options);
}
char chartosymbol(unsigned id) {
// normalize id
id = id % 62;
if (id < 26) {
return 'a' + id;
} else if (id < 52) {
return 'A' + id - 26;
} else if (id < 62) {
return '0' + id - 52;
} else {
return '?';
}
}
void identifier(unsigned id, char* string) {
// 26 letters 26 capitals 10 digits
const unsigned base = 26 + 26 + 10;
unsigned reminder, i = 0;
do {
reminder = id % base;
string[i++] = chartosymbol(reminder);
} while (id /= base);
string[i] = 0;
}
tape_t* tape_tmp(unsigned id, const char* mode, const char* format) {
char tmpname[256], ident[20];
identifier(id, ident);
sprintf(tmpname, format, ident);
return tape_open(tmpname, mode);
}
void help(const char* name) {
printf(
"Sorts reocrds from tape.\n"
"Usage:\n"
"\t%s [options] <in-tape> <out-tape>\n"
"Options:\n"
"\t-q|v|vv - verbosity level, q for quiet, v for vervose vv for debug\n"
"\t-s - summary of records (R) and runs (S)\n"
"\t-i - summary IO stats\n"
"\t-p - pause after each iteration\n"
"\t-n buffers - number of buffers, must be greater than 2 (default: 100)\n"
"\t-t format - temporary tape file name format, must include %%s identifier (default: tmp.%%s.tape)\n"
,
name
);
}
int compare_records(const void* a, const void* b) {
double result = record_compare(*(record_t*)a, *(record_t*)b);
return SGN(result);
}
int compare_entries(const void* a, const void* b) {
const entry_t* lhs = a;
const entry_t* rhs = b;
return compare_records(&lhs->record, &rhs->record);
}
void save_sorted(tape_t* tape, record_t* buffer, size_t n) {
qsort(buffer, n, sizeof(record_t), compare_records);
for (size_t j = 0; j < n; j++) {
tape_write(tape, buffer + j, sizeof(record_t));
}
}
size_t make_series(tape_t* in, size_t n) {
const size_t max = PAGE_SIZE / sizeof(record_t) * (n + 1);
size_t series = 0, i = 0;
record_t* buffer = malloc(PAGE_SIZE * (n + 1));
tape_t* tmp;
while (tape_read(in, buffer + i++, sizeof(record_t)) > 0) {
if (i >= max) {
tmp = tape_tmp(series % n, series >= n ? "ab" : "wb", tmp_format);
save_sorted(tmp, buffer, i);
tape_close(tmp);
i = 0;
series++;
}
}
if (i > 1) {
tmp = tape_tmp(series % n, series >= n ? "ab" : "wb", tmp_format);
save_sorted(tmp, buffer, i - 1);
tape_close(tmp);
series++;
}
free(buffer);
return series;
}
unsigned join_tapes(tape_t** tapes, size_t n, tape_t* out) {
heap_t heap;
unsigned records = 0;
heap_init(&heap, n, compare_entries);
// initial distribution
for (unsigned i = 0; i < n; i++) {
record_t record;
if (tape_read(tapes[i], &record, sizeof(record_t))) {
entry_t* entry = malloc(sizeof(entry_t));
entry->tape = tapes[i];
entry->record = record;
heap_insert(&heap, entry);
records++;
}
}
if (!records) {
return records;
}
// merge into 1 tape
entry_t* current;
while ((current = heap_pop(&heap))) {
tape_write(out, &current->record, sizeof(record_t));
record_t record;
record_t* result = tape_read(current->tape, &record, sizeof(record_t));
if (result) {
if (record_compare(record, current->record) >= 0) {
current->record = record;
heap_insert(&heap, current);
records++;
continue;
} else {
tape_rewind(current->tape, 1, sizeof(record_t));
}
}
free(current);
}
heap_free(&heap);
return records;
}
unsigned iteration(size_t i, size_t n)
{
size_t in_offset = (i % 2) * n;
size_t out_offset = ((i+1) % 2) * n;
unsigned series = 0;
tape_t** in_tapes = malloc(n * sizeof(tape_t*));
tape_t** out_tapes = malloc(n * sizeof(tape_t*));
heap_t heap;
heap_init(&heap, n, compare_entries);
for (unsigned i = 0; i < n; i++) {
in_tapes[i] = tape_tmp(in_offset + i, "rb", tmp_format);
out_tapes[i] = tape_tmp(out_offset + i, "wb", tmp_format);
}
while (join_tapes(in_tapes, n, out_tapes[series % n]) > 0) {
series++;
}
for (unsigned i = 0; i < n; i++) {
tape_close(in_tapes[i]);
tape_close(out_tapes[i]);
}
free(in_tapes);
free(out_tapes);
return series;
}
int main(int argc, char* argv[])
{
if (argc < 3) {
help(argv[0]);
return 0;
}
init_args(argc, argv);
tape_t* in = tape_open(in_filename, "rb");
tape_t* out = tape_open(out_filename, "wb");
size_t series = make_series(in, n);
printfv(VERBOSITY_NORMAL, "Created %zu series using %zu buffers.\n", series, n);
unsigned i = 0;
while (series > n) {
printfv(VERBOSITY_NORMAL, "Iteration %u.\n", i);
series = iteration(i++, n);
if (flags & WANT_STOP) {
printf("Look at the files and press ENTER to continue...");
getchar();
}
}
printfv(VERBOSITY_NORMAL, "Final iteration.\n");
size_t offset = (i % 2) * n;
tape_t** in_tapes = malloc(series * sizeof(tape_t*));
for (unsigned i = 0; i < series; i++) {
in_tapes[i] = tape_tmp(offset + i, "rb", tmp_format);
}
join_tapes(in_tapes, series, out);
tape_close(in);
tape_close(out);
if (flags & WANT_SUMMARY) {
printf("Sorted file %s into %s in %u iterations.\n", in_filename, out_filename, i + 1);
}
if (flags & WANT_IOSTAT) {
iostats();
}
return EXIT_SUCCESS;
}