Add wav transform script.

2020-05-26 22:28:18 +02:00 · 2020-05-26 22:28:18 +02:00 · 22726834f3
commit 22726834f3
parent 7c2485ce9b
1 changed files with 93 additions and 0 deletions
--- a/transform_wav.py
+++ b/transform_wav.py
@ -0,0 +1,93 @@
+# -*- coding: utf-8 -*-
+"""
+Basic command-line tool used to transform .wav files.
+
+@author: Szymon Szczyrbak
+"""
+
+import wave
+import numpy as np
+import struct
+from argparse import ArgumentParser
+
+parser = ArgumentParser(description="Preform transformation on wav files.")
+parser.add_argument('--input', '-i', dest='input_path', help="Input wav file path.")
+parser.add_argument('--output', '-o', dest='output_path', help="Output wav file path.")
+parser.add_argument('-p', dest='pitch_shift_value', type=int, help="Pitch shift value.")
+parser.add_argument('-a', dest='amplitude_multiplier_value', type=float, help="Amplitude multiplier value.")
+
+# https://stackoverflow.com/questions/43963982/python-change-pitch-of-wav-file
+def shift_pitch(input_path, output_path, pitch_shift_value):
+    # Read input file.
+    wr = wave.open(input_path, 'r')
+    # Set the parameters for the output file.
+    par = list(wr.getparams())
+    par[3] = 0  # The number of samples will be set by writeframes.
+    par = tuple(par)
+    ww = wave.open(output_path, 'w')
+    ww.setparams(par)
+    
+    fr = 20 # TODO: Not sure what does it do... Higher number reduces reverb?
+    sz = wr.getframerate()//fr  # Number of samples processed in one loop iteration.
+    
+    c = int(wr.getnframes()/sz)  # number of samples / 
+    shift = pitch_shift_value//fr
+    for num in range(c):
+        # Read chunk  and split into left and right channels
+        da = np.fromstring(wr.readframes(sz), dtype=np.int16)
+        left, right = da[0::2], da[1::2]
+        # Extract the frequencies using FFT.
+        lf, rf = np.fft.rfft(left), np.fft.rfft(right)
+        # Increase the pitch by rolling arrays.
+        lf, rf = np.roll(lf, shift), np.roll(rf, shift)
+        # Highest frequencies rolled at the start of the array. Zero 'em.
+        if pitch_shift_value > 0:
+            lf[0:shift], rf[0:shift] = 0, 0
+        else:
+            lf[shift-1:-1], rf[shift-1:-1] = 0, 0 # TODO: Not sure if it's alright for negative shift.
+        # Inverse FFT.
+        nl, nr = np.fft.irfft(lf), np.fft.irfft(rf)
+        # Combine left and right channel.
+        ns = np.column_stack((nl, nr)).ravel().astype(np.int16)
+        # Write to output file.
+        ww.writeframes(ns.tostring())
+    wr.close()
+    ww.close()
+
+
+# https://stackoverflow.com/questions/13329617/change-the-volume-of-a-wav-file-in-python
+def multiply_amplitude(input_path, output_path, amplitude_multiplier_value):
+    # Read input file.
+    wr = wave.open(input_path, 'r')
+    samples_num = wr.getparams()[3] 
+    # Set the parameters for the output file.
+    par = list(wr.getparams())
+    par[3] = 0  # The number of samples will be set by writeframes.
+    par = tuple(par)
+    ww = wave.open(output_path, 'w')
+    ww.setparams(par)
+    
+    da = np.fromstring(wr.readframes(samples_num), np.int16) * amplitude_multiplier_value # multiply amplitude
+    da = da.astype(np.int16)
+    ns = struct.pack('h'*len(da), *da)
+    ww.writeframes(ns)
+    
+    wr.close()
+    ww.close()
+
+
+def main():
+    args = parser.parse_args()
+    input_path = args.input_path
+    output_path = args.output_path
+    pitch_shift_value = args.pitch_shift_value
+    amplitude_multiplier_value = args.amplitude_multiplier_value
+    if isinstance(pitch_shift_value, int):
+        shift_pitch(input_path, output_path, pitch_shift_value)
+    elif isinstance(amplitude_multiplier_value, float):
+        # TODO: Weird noises for amplitude x5.
+        multiply_amplitude(input_path, output_path, amplitude_multiplier_value)
+    # TODO: More transformations. Should only one transformation be preformed in one run?
+
+if __name__ == "__main__":
+    main()