diff --git a/transform_wav.py b/transform_wav.py new file mode 100644 index 0000000..ba38a26 --- /dev/null +++ b/transform_wav.py @@ -0,0 +1,93 @@ +# -*- coding: utf-8 -*- +""" +Basic command-line tool used to transform .wav files. + +@author: Szymon Szczyrbak +""" + +import wave +import numpy as np +import struct +from argparse import ArgumentParser + +parser = ArgumentParser(description="Preform transformation on wav files.") +parser.add_argument('--input', '-i', dest='input_path', help="Input wav file path.") +parser.add_argument('--output', '-o', dest='output_path', help="Output wav file path.") +parser.add_argument('-p', dest='pitch_shift_value', type=int, help="Pitch shift value.") +parser.add_argument('-a', dest='amplitude_multiplier_value', type=float, help="Amplitude multiplier value.") + +# https://stackoverflow.com/questions/43963982/python-change-pitch-of-wav-file +def shift_pitch(input_path, output_path, pitch_shift_value): + # Read input file. + wr = wave.open(input_path, 'r') + # Set the parameters for the output file. + par = list(wr.getparams()) + par[3] = 0 # The number of samples will be set by writeframes. + par = tuple(par) + ww = wave.open(output_path, 'w') + ww.setparams(par) + + fr = 20 # TODO: Not sure what does it do... Higher number reduces reverb? + sz = wr.getframerate()//fr # Number of samples processed in one loop iteration. + + c = int(wr.getnframes()/sz) # number of samples / + shift = pitch_shift_value//fr + for num in range(c): + # Read chunk and split into left and right channels + da = np.fromstring(wr.readframes(sz), dtype=np.int16) + left, right = da[0::2], da[1::2] + # Extract the frequencies using FFT. + lf, rf = np.fft.rfft(left), np.fft.rfft(right) + # Increase the pitch by rolling arrays. + lf, rf = np.roll(lf, shift), np.roll(rf, shift) + # Highest frequencies rolled at the start of the array. Zero 'em. + if pitch_shift_value > 0: + lf[0:shift], rf[0:shift] = 0, 0 + else: + lf[shift-1:-1], rf[shift-1:-1] = 0, 0 # TODO: Not sure if it's alright for negative shift. + # Inverse FFT. + nl, nr = np.fft.irfft(lf), np.fft.irfft(rf) + # Combine left and right channel. + ns = np.column_stack((nl, nr)).ravel().astype(np.int16) + # Write to output file. + ww.writeframes(ns.tostring()) + wr.close() + ww.close() + + +# https://stackoverflow.com/questions/13329617/change-the-volume-of-a-wav-file-in-python +def multiply_amplitude(input_path, output_path, amplitude_multiplier_value): + # Read input file. + wr = wave.open(input_path, 'r') + samples_num = wr.getparams()[3] + # Set the parameters for the output file. + par = list(wr.getparams()) + par[3] = 0 # The number of samples will be set by writeframes. + par = tuple(par) + ww = wave.open(output_path, 'w') + ww.setparams(par) + + da = np.fromstring(wr.readframes(samples_num), np.int16) * amplitude_multiplier_value # multiply amplitude + da = da.astype(np.int16) + ns = struct.pack('h'*len(da), *da) + ww.writeframes(ns) + + wr.close() + ww.close() + + +def main(): + args = parser.parse_args() + input_path = args.input_path + output_path = args.output_path + pitch_shift_value = args.pitch_shift_value + amplitude_multiplier_value = args.amplitude_multiplier_value + if isinstance(pitch_shift_value, int): + shift_pitch(input_path, output_path, pitch_shift_value) + elif isinstance(amplitude_multiplier_value, float): + # TODO: Weird noises for amplitude x5. + multiply_amplitude(input_path, output_path, amplitude_multiplier_value) + # TODO: More transformations. Should only one transformation be preformed in one run? + +if __name__ == "__main__": + main() \ No newline at end of file