239 lines
5.4 KiB
C
239 lines
5.4 KiB
C
#include <math.h>
|
|
#include <stdlib.h>
|
|
#include "tape.h"
|
|
#include "record.h"
|
|
#include "heap.h"
|
|
#include "common.h"
|
|
|
|
#define SGN(x) ((x > 0) - (x < 0))
|
|
|
|
char* tmp_format = "tmp.%s.tape";
|
|
|
|
typedef struct {
|
|
record_t record;
|
|
tape_t* tape;
|
|
} entry_t;
|
|
|
|
char chartosymbol(unsigned id) {
|
|
// normalize id
|
|
id = id % 62;
|
|
|
|
if (id < 26) {
|
|
return 'a' + id;
|
|
} else if (id < 52) {
|
|
return 'A' + id - 26;
|
|
} else if (id < 62) {
|
|
return '0' + id - 52;
|
|
} else {
|
|
return '?';
|
|
}
|
|
}
|
|
|
|
void identifier(unsigned id, char* string) {
|
|
// 26 letters 26 capitals 10 digits
|
|
const unsigned base = 26 + 26 + 10;
|
|
|
|
unsigned reminder, i = 0;
|
|
do {
|
|
reminder = id % base;
|
|
string[i++] = chartosymbol(reminder);
|
|
} while (id /= base);
|
|
|
|
string[i] = 0;
|
|
}
|
|
|
|
tape_t* tape_tmp(unsigned id, const char* mode, const char* format) {
|
|
char tmpname[256], ident[20];
|
|
|
|
identifier(id, ident);
|
|
sprintf(tmpname, format, ident);
|
|
|
|
return tape_open(tmpname, mode);
|
|
}
|
|
|
|
void help(const char* name) {
|
|
printf(
|
|
"Sorts reocrds from tape.\n"
|
|
"Usage:\n"
|
|
"\t%s <in-tape> <out-tape>\n",
|
|
name
|
|
);
|
|
}
|
|
|
|
int compare_records(const void* a, const void* b) {
|
|
double result = record_compare(*(record_t*)a, *(record_t*)b);
|
|
return SGN(result);
|
|
}
|
|
|
|
int compare_entries(const void* a, const void* b) {
|
|
const entry_t* lhs = a;
|
|
const entry_t* rhs = b;
|
|
|
|
return compare_records(&lhs->record, &rhs->record);
|
|
}
|
|
|
|
void save_sorted(tape_t* tape, record_t* buffer, size_t n) {
|
|
qsort(buffer, n, sizeof(record_t), compare_records);
|
|
|
|
for (size_t j = 0; j < n; j++) {
|
|
tape_write(tape, buffer + j, sizeof(record_t));
|
|
}
|
|
}
|
|
|
|
size_t make_series(tape_t* in, size_t n) {
|
|
const size_t max = PAGE_SIZE / sizeof(record_t) * (n + 1);
|
|
|
|
size_t series = 0, i = 0;
|
|
record_t* buffer = malloc(PAGE_SIZE * (n + 1));
|
|
|
|
tape_t* tmp;
|
|
while (tape_read(in, buffer + i++, sizeof(record_t)) > 0) {
|
|
if (i >= max) {
|
|
tmp = tape_tmp(series % n, series >= n ? "ab" : "wb", tmp_format);
|
|
save_sorted(tmp, buffer, i);
|
|
tape_close(tmp);
|
|
|
|
i = 0;
|
|
series++;
|
|
}
|
|
}
|
|
|
|
if (i > 1) {
|
|
tmp = tape_tmp(series % n, series >= n ? "ab" : "wb", tmp_format);
|
|
save_sorted(tmp, buffer, i - 1);
|
|
tape_close(tmp);
|
|
series++;
|
|
}
|
|
|
|
free(buffer);
|
|
return series;
|
|
}
|
|
|
|
unsigned join_tapes(tape_t** tapes, size_t n, tape_t* out) {
|
|
heap_t heap;
|
|
unsigned records = 0;
|
|
|
|
heap_init(&heap, n, compare_entries);
|
|
|
|
// initial distribution
|
|
for (unsigned i = 0; i < n; i++) {
|
|
record_t record;
|
|
|
|
if (tape_read(tapes[i], &record, sizeof(record_t))) {
|
|
entry_t* entry = malloc(sizeof(entry_t));
|
|
entry->tape = tapes[i];
|
|
entry->record = record;
|
|
|
|
heap_insert(&heap, entry);
|
|
records++;
|
|
}
|
|
}
|
|
|
|
if (!records) {
|
|
return records;
|
|
}
|
|
|
|
// merge into 1 tape
|
|
entry_t* current;
|
|
while ((current = heap_pop(&heap))) {
|
|
tape_write(out, ¤t->record, sizeof(record_t));
|
|
record_t record;
|
|
record_t* result = tape_read(current->tape, &record, sizeof(record_t));
|
|
|
|
if (result) {
|
|
if (record_compare(record, current->record) >= 0) {
|
|
current->record = record;
|
|
heap_insert(&heap, current);
|
|
records++;
|
|
continue;
|
|
} else {
|
|
tape_rewind(current->tape, 1, sizeof(record_t));
|
|
}
|
|
}
|
|
|
|
free(current);
|
|
}
|
|
|
|
heap_free(&heap);
|
|
|
|
return records;
|
|
}
|
|
|
|
void print_entry(void* e)
|
|
{
|
|
entry_t* entry = e;
|
|
printf("%lf ", record_length(entry->record));
|
|
}
|
|
|
|
unsigned iteration(size_t i, size_t n)
|
|
{
|
|
size_t in_offset = (i % 2) * n;
|
|
size_t out_offset = ((i+1) % 2) * n;
|
|
|
|
unsigned series = 0;
|
|
|
|
tape_t** in_tapes = malloc(n * sizeof(tape_t*));
|
|
tape_t** out_tapes = malloc(n * sizeof(tape_t*));
|
|
heap_t heap;
|
|
heap_init(&heap, n, compare_entries);
|
|
|
|
for (unsigned i = 0; i < n; i++) {
|
|
in_tapes[i] = tape_tmp(in_offset + i, "rb", tmp_format);
|
|
out_tapes[i] = tape_tmp(out_offset + i, "wb", tmp_format);
|
|
}
|
|
|
|
while (join_tapes(in_tapes, n, out_tapes[series % n]) > 0) {
|
|
series++;
|
|
}
|
|
|
|
for (unsigned i = 0; i < n; i++) {
|
|
tape_close(in_tapes[i]);
|
|
tape_close(out_tapes[i]);
|
|
}
|
|
|
|
free(in_tapes);
|
|
free(out_tapes);
|
|
|
|
return series;
|
|
}
|
|
|
|
int main(int argc, const char* argv[])
|
|
{
|
|
if (argc < 3) {
|
|
help(argv[0]);
|
|
return 0;
|
|
}
|
|
|
|
size_t n = 100;
|
|
|
|
tape_t* in = tape_open(argv[1], "rb");
|
|
tape_t* out = tape_open(argv[2], "wb");
|
|
|
|
size_t series = make_series(in, n);
|
|
printf("Created %zu series.\n", series);
|
|
|
|
unsigned i = 0;
|
|
while (series > n) {
|
|
printf("Iteration %u.\n", i);
|
|
series = iteration(i++, n);
|
|
}
|
|
|
|
printf("Final iteration.\n");
|
|
size_t offset = (i % 2) * n;
|
|
tape_t** in_tapes = malloc(series * sizeof(tape_t*));
|
|
for (unsigned i = 0; i < series; i++) {
|
|
in_tapes[i] = tape_tmp(offset + i, "rb", tmp_format);
|
|
}
|
|
|
|
join_tapes(in_tapes, series, out);
|
|
|
|
printf("Sorted file %s into %s in %u iterations.\n", in->name, out->name, i + 1);
|
|
|
|
tape_close(in);
|
|
tape_close(out);
|
|
|
|
printf("%u IO R: %u W: %u\n", reads + writes, reads, writes);
|
|
|
|
return 1;
|
|
}
|