292 lines
7.0 KiB
C
292 lines
7.0 KiB
C
#include <math.h>
|
|
#include <stdlib.h>
|
|
#include "tape.h"
|
|
#include "record.h"
|
|
#include "heap.h"
|
|
#include "common.h"
|
|
|
|
#define SGN(x) ((x > 0) - (x < 0))
|
|
|
|
#define WANT_SUMMARY 1
|
|
#define WANT_IOSTAT 2
|
|
#define WANT_STOP 4
|
|
|
|
char *in_filename, *out_filename;
|
|
char flags;
|
|
char *tmp_format = "tmp.%s.tape";
|
|
size_t n = 100;
|
|
|
|
typedef struct {
|
|
record_t record;
|
|
tape_t* tape;
|
|
} entry_t;
|
|
|
|
void init_args(int args, char* argv[])
|
|
{
|
|
optparse_t options;
|
|
optparse_init(&options, argv);
|
|
|
|
for (char opt; opt != -1; opt = optparse(&options, "qvipsn:t:")) {
|
|
switch (opt) {
|
|
case 'q':
|
|
verbosity--;
|
|
break;
|
|
case 'v':
|
|
verbosity++;
|
|
break;
|
|
case 'i':
|
|
flags |= WANT_IOSTAT;
|
|
break;
|
|
case 's':
|
|
flags |= WANT_SUMMARY;
|
|
break;
|
|
case 'p':
|
|
flags |= WANT_STOP;
|
|
break;
|
|
case 'n':
|
|
sscanf(options.optarg, "%zu", &n);
|
|
break;
|
|
case 't':
|
|
tmp_format = options.optarg;
|
|
break;
|
|
}
|
|
}
|
|
|
|
in_filename = optparse_arg(&options);
|
|
out_filename = optparse_arg(&options);
|
|
}
|
|
|
|
char chartosymbol(unsigned id) {
|
|
// normalize id
|
|
id = id % 62;
|
|
|
|
if (id < 26) {
|
|
return 'a' + id;
|
|
} else if (id < 52) {
|
|
return 'A' + id - 26;
|
|
} else if (id < 62) {
|
|
return '0' + id - 52;
|
|
} else {
|
|
return '?';
|
|
}
|
|
}
|
|
|
|
void identifier(unsigned id, char* string) {
|
|
// 26 letters 26 capitals 10 digits
|
|
const unsigned base = 26 + 26 + 10;
|
|
|
|
unsigned reminder, i = 0;
|
|
do {
|
|
reminder = id % base;
|
|
string[i++] = chartosymbol(reminder);
|
|
} while (id /= base);
|
|
|
|
string[i] = 0;
|
|
}
|
|
|
|
tape_t* tape_tmp(unsigned id, const char* mode, const char* format) {
|
|
char tmpname[256], ident[20];
|
|
|
|
identifier(id, ident);
|
|
sprintf(tmpname, format, ident);
|
|
|
|
return tape_open(tmpname, mode);
|
|
}
|
|
|
|
void help(const char* name) {
|
|
printf(
|
|
"Sorts reocrds from tape.\n"
|
|
"Usage:\n"
|
|
"\t%s [options] <in-tape> <out-tape>\n"
|
|
"Options:\n"
|
|
"\t-q|v|vv - verbosity level, q for quiet, v for vervose vv for debug\n"
|
|
"\t-s - summary of records (R) and runs (S)\n"
|
|
"\t-i - summary IO stats\n"
|
|
"\t-p - pause after each iteration\n"
|
|
"\t-n buffers - number of buffers, must be greater than 2 (default: 100)\n"
|
|
"\t-t format - temporary tape file name format, must include %%s identifier (default: tmp.%%s.tape)\n"
|
|
,
|
|
name
|
|
);
|
|
}
|
|
|
|
int compare_records(const void* a, const void* b) {
|
|
double result = record_compare(*(record_t*)a, *(record_t*)b);
|
|
return SGN(result);
|
|
}
|
|
|
|
int compare_entries(const void* a, const void* b) {
|
|
const entry_t* lhs = a;
|
|
const entry_t* rhs = b;
|
|
|
|
return compare_records(&lhs->record, &rhs->record);
|
|
}
|
|
|
|
void save_sorted(tape_t* tape, record_t* buffer, size_t n) {
|
|
qsort(buffer, n, sizeof(record_t), compare_records);
|
|
|
|
for (size_t j = 0; j < n; j++) {
|
|
tape_write(tape, buffer + j, sizeof(record_t));
|
|
}
|
|
}
|
|
|
|
size_t make_series(tape_t* in, size_t n) {
|
|
const size_t max = PAGE_SIZE / sizeof(record_t) * (n + 1);
|
|
|
|
size_t series = 0, i = 0;
|
|
record_t* buffer = malloc(PAGE_SIZE * (n + 1));
|
|
|
|
tape_t* tmp;
|
|
while (tape_read(in, buffer + i++, sizeof(record_t)) > 0) {
|
|
if (i >= max) {
|
|
tmp = tape_tmp(series % n, series >= n ? "ab" : "wb", tmp_format);
|
|
save_sorted(tmp, buffer, i);
|
|
tape_close(tmp);
|
|
|
|
i = 0;
|
|
series++;
|
|
}
|
|
}
|
|
|
|
if (i > 1) {
|
|
tmp = tape_tmp(series % n, series >= n ? "ab" : "wb", tmp_format);
|
|
save_sorted(tmp, buffer, i - 1);
|
|
tape_close(tmp);
|
|
series++;
|
|
}
|
|
|
|
free(buffer);
|
|
return series;
|
|
}
|
|
|
|
unsigned join_tapes(tape_t** tapes, size_t n, tape_t* out) {
|
|
heap_t heap;
|
|
unsigned records = 0;
|
|
|
|
heap_init(&heap, n, compare_entries);
|
|
|
|
// initial distribution
|
|
for (unsigned i = 0; i < n; i++) {
|
|
record_t record;
|
|
|
|
if (tape_read(tapes[i], &record, sizeof(record_t))) {
|
|
entry_t* entry = malloc(sizeof(entry_t));
|
|
entry->tape = tapes[i];
|
|
entry->record = record;
|
|
|
|
heap_insert(&heap, entry);
|
|
records++;
|
|
}
|
|
}
|
|
|
|
if (!records) {
|
|
return records;
|
|
}
|
|
|
|
// merge into 1 tape
|
|
entry_t* current;
|
|
while ((current = heap_pop(&heap))) {
|
|
tape_write(out, ¤t->record, sizeof(record_t));
|
|
record_t record;
|
|
record_t* result = tape_read(current->tape, &record, sizeof(record_t));
|
|
|
|
if (result) {
|
|
if (record_compare(record, current->record) >= 0) {
|
|
current->record = record;
|
|
heap_insert(&heap, current);
|
|
records++;
|
|
continue;
|
|
} else {
|
|
tape_rewind(current->tape, 1, sizeof(record_t));
|
|
}
|
|
}
|
|
|
|
free(current);
|
|
}
|
|
|
|
heap_free(&heap);
|
|
|
|
return records;
|
|
}
|
|
|
|
unsigned iteration(size_t i, size_t n)
|
|
{
|
|
size_t in_offset = (i % 2) * n;
|
|
size_t out_offset = ((i+1) % 2) * n;
|
|
|
|
unsigned series = 0;
|
|
|
|
tape_t** in_tapes = malloc(n * sizeof(tape_t*));
|
|
tape_t** out_tapes = malloc(n * sizeof(tape_t*));
|
|
heap_t heap;
|
|
heap_init(&heap, n, compare_entries);
|
|
|
|
for (unsigned i = 0; i < n; i++) {
|
|
in_tapes[i] = tape_tmp(in_offset + i, "rb", tmp_format);
|
|
out_tapes[i] = tape_tmp(out_offset + i, "wb", tmp_format);
|
|
}
|
|
|
|
while (join_tapes(in_tapes, n, out_tapes[series % n]) > 0) {
|
|
series++;
|
|
}
|
|
|
|
for (unsigned i = 0; i < n; i++) {
|
|
tape_close(in_tapes[i]);
|
|
tape_close(out_tapes[i]);
|
|
}
|
|
|
|
free(in_tapes);
|
|
free(out_tapes);
|
|
|
|
return series;
|
|
}
|
|
|
|
int main(int argc, char* argv[])
|
|
{
|
|
if (argc < 3) {
|
|
help(argv[0]);
|
|
return 0;
|
|
}
|
|
|
|
init_args(argc, argv);
|
|
|
|
tape_t* in = tape_open(in_filename, "rb");
|
|
tape_t* out = tape_open(out_filename, "wb");
|
|
|
|
size_t series = make_series(in, n);
|
|
printfv(VERBOSITY_NORMAL, "Created %zu series using %zu buffers.\n", series, n);
|
|
|
|
unsigned i = 0;
|
|
while (series > n) {
|
|
printfv(VERBOSITY_NORMAL, "Iteration %u.\n", i);
|
|
series = iteration(i++, n);
|
|
|
|
if (flags & WANT_STOP) {
|
|
printf("Look at the files and press ENTER to continue...");
|
|
getchar();
|
|
}
|
|
}
|
|
|
|
printfv(VERBOSITY_NORMAL, "Final iteration.\n");
|
|
size_t offset = (i % 2) * n;
|
|
tape_t** in_tapes = malloc(series * sizeof(tape_t*));
|
|
for (unsigned i = 0; i < series; i++) {
|
|
in_tapes[i] = tape_tmp(offset + i, "rb", tmp_format);
|
|
}
|
|
|
|
join_tapes(in_tapes, series, out);
|
|
|
|
tape_close(in);
|
|
tape_close(out);
|
|
|
|
if (flags & WANT_SUMMARY) {
|
|
printf("Sorted file %s into %s in %u iterations.\n", in_filename, out_filename, i + 1);
|
|
}
|
|
|
|
if (flags & WANT_IOSTAT) {
|
|
iostats();
|
|
}
|
|
|
|
return EXIT_SUCCESS;
|
|
}
|