# -*- coding: utf-8 -*- """ Basic command-line tool used to transform .wav files. @author: Szymon Szczyrbak @author: Kacper Donat """ import wave import numpy as np import struct import resampy from argparse import ArgumentParser from noise import pnoise1 from colorednoise import powerlaw_psd_gaussian # https://stackoverflow.com/questions/43963982/python-change-pitch-of-wav-file def shift_pitch(da, wr, pitch_shift_value): multiplier = 1.0 + ((pitch_shift_value - 1.0) * (2**(1/12) - 1.0)) * 0.1; left, right = da[0::2], da[1::2] # Extract the frequencies using FFT. lf, rf = np.fft.rfft(left), np.fft.rfft(right) size = len(lf) new_size = int(size * multiplier) lf, rf = resampy.resample(lf, size, new_size), resampy.resample(rf, size, new_size) # Inverse FFT. nl, nr = np.fft.irfft(lf), np.fft.irfft(rf) # Combine left and right channel. return np.column_stack((nl, nr)).ravel().astype(np.int16) # https://stackoverflow.com/questions/13329617/change-the-volume-of-a-wav-file-in-python def multiply_amplitude(samples, wr, i, amplitude_multiplier_value): amplitude_multiplier_value *= 2 noise = np.vectorize(lambda x: (1.0 - amplitude_multiplier_value / 2) + pnoise1((x + i) * 0.05 / wr.getframerate(), octaves=5) * amplitude_multiplier_value) modulate = np.fromfunction(noise, samples.shape, dtype=float) return (samples * modulate).astype(np.int16) # multiply amplitude def add_noise(samples, wr, amplitude=0.1): noise = powerlaw_psd_gaussian(1, samples.shape) RMS = np.mean(samples ** 2) An = np.sqrt(RMS * amplitude) return (samples + noise * An).astype(np.int16) def transform_file(input_path, output_path, pitch_shift_value=None, amplitude_multiplier_value=None, noise_amplitude=None): # Read input file. wr = wave.open(input_path, 'r') samples_num = wr.getparams()[3] # Set the parameters for the output file. par = list(wr.getparams()) par[3] = 0 # The number of samples will be set by writeframes. par = tuple(par) # Open output file ww = wave.open(output_path, 'w') ww.setparams(par) fr = 5 per_loop = wr.getframerate() // fr count = wr.getnframes() // per_loop for i in range(count): ns = np.frombuffer(wr.readframes(per_loop), dtype=np.int16) if isinstance(pitch_shift_value, float) and pitch_shift_value - 1.0 > 0.001: ns = shift_pitch(ns, wr, pitch_shift_value) if isinstance(amplitude_multiplier_value, float) and amplitude_multiplier_value > 0.01: ns = multiply_amplitude(ns, wr, i * per_loop, amplitude_multiplier_value) if isinstance(noise_amplitude, float) and noise_amplitude > 0.0: ns = add_noise(ns, wr, noise_amplitude) ww.writeframes(ns) wr.close() ww.close() parser = ArgumentParser(description="Preform transformation on wav files.") parser.add_argument('--input', '-i', dest='input_path', help="Input wav file path.") parser.add_argument('--output', '-o', dest='output_path', help="Output wav file path.") parser.add_argument('-p', dest='pitch_shift_value', type=float, help="Pitch shift value.") parser.add_argument('-a', dest='amplitude_multiplier_value', type=float, help="Amplitude multiplier value.") parser.add_argument('-n', dest='noise_amplitude', type=float, help="Amplitude of noise.") def main(): args = parser.parse_args() input_path = args.input_path output_path = args.output_path transform_file( input_path, output_path, pitch_shift_value=args.pitch_shift_value, amplitude_multiplier_value=args.amplitude_multiplier_value, noise_amplitude=args.noise_amplitude ) if __name__ == "__main__": main()