#include #include #include "tape.h" #include "record.h" #include "heap.h" #include "common.h" #define SGN(x) ((x > 0) - (x < 0)) #define WANT_SUMMARY 1 #define WANT_IOSTAT 2 #define WANT_STOP 4 char *in_filename, *out_filename; char flags; char *tmp_format = "tmp.%s.tape"; size_t n = 100; typedef struct { record_t record; tape_t* tape; } entry_t; void init_args(int args, char* argv[]) { optparse_t options; optparse_init(&options, argv); for (char opt; opt != -1; opt = optparse(&options, "qvipsn:t:")) { switch (opt) { case 'q': verbosity--; break; case 'v': verbosity++; break; case 'i': flags |= WANT_IOSTAT; break; case 's': flags |= WANT_SUMMARY; break; case 'p': flags |= WANT_STOP; break; case 'n': sscanf(options.optarg, "%zu", &n); break; case 't': tmp_format = options.optarg; break; } } in_filename = optparse_arg(&options); out_filename = optparse_arg(&options); } char chartosymbol(unsigned id) { // normalize id id = id % 62; if (id < 26) { return 'a' + id; } else if (id < 52) { return 'A' + id - 26; } else if (id < 62) { return '0' + id - 52; } else { return '?'; } } void identifier(unsigned id, char* string) { // 26 letters 26 capitals 10 digits const unsigned base = 26 + 26 + 10; unsigned reminder, i = 0; do { reminder = id % base; string[i++] = chartosymbol(reminder); } while (id /= base); string[i] = 0; } tape_t* tape_tmp(unsigned id, const char* mode, const char* format) { char tmpname[256], ident[20]; identifier(id, ident); sprintf(tmpname, format, ident); return tape_open(tmpname, mode); } void help(const char* name) { printf( "Sorts reocrds from tape.\n" "Usage:\n" "\t%s [options] \n" "Options:\n" "\t-q|v|vv - verbosity level, q for quiet, v for vervose vv for debug\n" "\t-s - summary of records (R) and runs (S)\n" "\t-i - summary IO stats\n" "\t-p - pause after each iteration\n" "\t-n buffers - number of buffers, must be greater than 2 (default: 100)\n" "\t-t format - temporary tape file name format, must include %%s identifier (default: tmp.%%s.tape)\n" , name ); } int compare_records(const void* a, const void* b) { double result = record_compare(*(record_t*)a, *(record_t*)b); return SGN(result); } int compare_entries(const void* a, const void* b) { const entry_t* lhs = a; const entry_t* rhs = b; return compare_records(&lhs->record, &rhs->record); } void save_sorted(tape_t* tape, record_t* buffer, size_t n) { qsort(buffer, n, sizeof(record_t), compare_records); for (size_t j = 0; j < n; j++) { tape_write(tape, buffer + j, sizeof(record_t)); } } size_t make_series(tape_t* in, size_t n) { const size_t max = PAGE_SIZE / sizeof(record_t) * (n + 1); size_t series = 0, i = 0; record_t* buffer = malloc(PAGE_SIZE * (n + 1)); tape_t* tmp; while (tape_read(in, buffer + i++, sizeof(record_t)) > 0) { if (i >= max) { tmp = tape_tmp(series % n, series >= n ? "ab" : "wb", tmp_format); save_sorted(tmp, buffer, i); tape_close(tmp); i = 0; series++; } } if (i > 1) { tmp = tape_tmp(series % n, series >= n ? "ab" : "wb", tmp_format); save_sorted(tmp, buffer, i - 1); tape_close(tmp); series++; } free(buffer); return series; } unsigned join_tapes(tape_t** tapes, size_t n, tape_t* out) { heap_t heap; unsigned records = 0; heap_init(&heap, n, compare_entries); // initial distribution for (unsigned i = 0; i < n; i++) { record_t record; if (tape_read(tapes[i], &record, sizeof(record_t))) { entry_t* entry = malloc(sizeof(entry_t)); entry->tape = tapes[i]; entry->record = record; heap_insert(&heap, entry); records++; } } if (!records) { return records; } // merge into 1 tape entry_t* current; while ((current = heap_pop(&heap))) { tape_write(out, ¤t->record, sizeof(record_t)); record_t record; record_t* result = tape_read(current->tape, &record, sizeof(record_t)); if (result) { if (record_compare(record, current->record) >= 0) { current->record = record; heap_insert(&heap, current); records++; continue; } else { tape_rewind(current->tape, 1, sizeof(record_t)); } } free(current); } heap_free(&heap); return records; } unsigned iteration(size_t i, size_t n) { size_t in_offset = (i % 2) * n; size_t out_offset = ((i+1) % 2) * n; unsigned series = 0; tape_t** in_tapes = malloc(n * sizeof(tape_t*)); tape_t** out_tapes = malloc(n * sizeof(tape_t*)); heap_t heap; heap_init(&heap, n, compare_entries); for (unsigned i = 0; i < n; i++) { in_tapes[i] = tape_tmp(in_offset + i, "rb", tmp_format); out_tapes[i] = tape_tmp(out_offset + i, "wb", tmp_format); } while (join_tapes(in_tapes, n, out_tapes[series % n]) > 0) { series++; } for (unsigned i = 0; i < n; i++) { tape_close(in_tapes[i]); tape_close(out_tapes[i]); } free(in_tapes); free(out_tapes); return series; } int main(int argc, char* argv[]) { if (argc < 3) { help(argv[0]); return 0; } init_args(argc, argv); tape_t* in = tape_open(in_filename, "rb"); tape_t* out = tape_open(out_filename, "wb"); size_t series = make_series(in, n); printfv(VERBOSITY_NORMAL, "Created %zu series using %zu buffers.\n", series, n); unsigned i = 0; while (series > n) { printfv(VERBOSITY_NORMAL, "Iteration %u.\n", i); series = iteration(i++, n); if (flags & WANT_STOP) { printf("Look at the files and press ENTER to continue..."); getchar(); } } printfv(VERBOSITY_NORMAL, "Final iteration.\n"); size_t offset = (i % 2) * n; tape_t** in_tapes = malloc(series * sizeof(tape_t*)); for (unsigned i = 0; i < series; i++) { in_tapes[i] = tape_tmp(offset + i, "rb", tmp_format); } join_tapes(in_tapes, series, out); tape_close(in); tape_close(out); if (flags & WANT_SUMMARY) { printf("Sorted file %s into %s in %u iterations.\n", in_filename, out_filename, i + 1); } if (flags & WANT_IOSTAT) { iostats(); } return EXIT_SUCCESS; }