#include #include #include "tape.h" #include "record.h" #include "heap.h" #include "common.h" #define SGN(x) ((x > 0) - (x < 0)) char* tmp_format = "tmp.%s.tape"; typedef struct { record_t record; tape_t* tape; } entry_t; char chartosymbol(unsigned id) { // normalize id id = id % 62; if (id < 26) { return 'a' + id; } else if (id < 52) { return 'A' + id - 26; } else if (id < 62) { return '0' + id - 52; } else { return '?'; } } void identifier(unsigned id, char* string) { // 26 letters 26 capitals 10 digits const unsigned base = 26 + 26 + 10; unsigned reminder, i = 0; do { reminder = id % base; string[i++] = chartosymbol(reminder); } while (id /= base); string[i] = 0; } tape_t* tape_tmp(unsigned id, const char* mode, const char* format) { char tmpname[256], ident[20]; identifier(id, ident); sprintf(tmpname, format, ident); return tape_open(tmpname, mode); } void help(const char* name) { printf( "Sorts reocrds from tape.\n" "Usage:\n" "\t%s \n", name ); } int compare_records(const void* a, const void* b) { double result = record_compare(*(record_t*)a, *(record_t*)b); return SGN(result); } int compare_entries(const void* a, const void* b) { const entry_t* lhs = a; const entry_t* rhs = b; return compare_records(&lhs->record, &rhs->record); } void save_sorted(tape_t* tape, record_t* buffer, size_t n) { qsort(buffer, n, sizeof(record_t), compare_records); for (size_t j = 0; j < n; j++) { tape_write(tape, buffer + j, sizeof(record_t)); } } size_t make_series(tape_t* in, size_t n) { const size_t max = PAGE_SIZE / sizeof(record_t) * (n + 1); size_t series = 0, i = 0; record_t* buffer = malloc(PAGE_SIZE * (n + 1)); tape_t* tmp; while (tape_read(in, buffer + i++, sizeof(record_t)) > 0) { if (i >= max) { tmp = tape_tmp(series % n, series >= n ? "ab" : "wb", tmp_format); save_sorted(tmp, buffer, i); tape_close(tmp); i = 0; series++; } } if (i > 1) { tmp = tape_tmp(series % n, series >= n ? "ab" : "wb", tmp_format); save_sorted(tmp, buffer, i - 1); tape_close(tmp); series++; } free(buffer); return series; } unsigned join_tapes(tape_t** tapes, size_t n, tape_t* out) { heap_t heap; unsigned records = 0; heap_init(&heap, n, compare_entries); // initial distribution for (unsigned i = 0; i < n; i++) { record_t record; if (tape_read(tapes[i], &record, sizeof(record_t))) { entry_t* entry = malloc(sizeof(entry_t)); entry->tape = tapes[i]; entry->record = record; heap_insert(&heap, entry); records++; } } if (!records) { return records; } // merge into 1 tape entry_t* current; while ((current = heap_pop(&heap))) { tape_write(out, ¤t->record, sizeof(record_t)); record_t record; record_t* result = tape_read(current->tape, &record, sizeof(record_t)); if (result) { if (record_compare(record, current->record) >= 0) { current->record = record; heap_insert(&heap, current); records++; continue; } else { tape_rewind(current->tape, 1, sizeof(record_t)); } } free(current); } heap_free(&heap); return records; } void print_entry(void* e) { entry_t* entry = e; printf("%lf ", record_length(entry->record)); } unsigned iteration(size_t i, size_t n) { size_t in_offset = (i % 2) * n; size_t out_offset = ((i+1) % 2) * n; unsigned series = 0; tape_t** in_tapes = malloc(n * sizeof(tape_t*)); tape_t** out_tapes = malloc(n * sizeof(tape_t*)); heap_t heap; heap_init(&heap, n, compare_entries); for (unsigned i = 0; i < n; i++) { in_tapes[i] = tape_tmp(in_offset + i, "rb", tmp_format); out_tapes[i] = tape_tmp(out_offset + i, "wb", tmp_format); } while (join_tapes(in_tapes, n, out_tapes[series % n]) > 0) { series++; } for (unsigned i = 0; i < n; i++) { tape_close(in_tapes[i]); tape_close(out_tapes[i]); } free(in_tapes); free(out_tapes); return series; } int main(int argc, const char* argv[]) { if (argc < 3) { help(argv[0]); return 0; } size_t n = 100; tape_t* in = tape_open(argv[1], "rb"); tape_t* out = tape_open(argv[2], "wb"); size_t series = make_series(in, n); printf("Created %zu series.\n", series); unsigned i = 0; while (series > n) { printf("Iteration %u.\n", i); series = iteration(i++, n); } printf("Final iteration.\n"); size_t offset = (i % 2) * n; tape_t** in_tapes = malloc(series * sizeof(tape_t*)); for (unsigned i = 0; i < series; i++) { in_tapes[i] = tape_tmp(offset + i, "rb", tmp_format); } join_tapes(in_tapes, series, out); printf("Sorted file %s into %s in %u iterations.\n", in->name, out->name, i + 1); tape_close(in); tape_close(out); printf("%u IO R: %u W: %u\n", reads + writes, reads, writes); return 1; }