Source code for zounds.synthesize.synthesize



import numpy as np
from scipy.fftpack import dct, idct
from scipy.signal import resample

from zounds.core import ArrayWithUnits, IdentityDimension
from zounds.spectral import DCTIV, LinearScale
from zounds.spectral import FrequencyDimension
from zounds.spectral.sliding_window import \
    IdentityWindowingFunc, OggVorbisWindowingFunc
from zounds.timeseries import \
    nearest_audio_sample_rate, Seconds, AudioSamples, TimeDimension


class ShortTimeTransformSynthesizer(object):
    def __init__(self):
        super(ShortTimeTransformSynthesizer, self).__init__()

    def _transform(self, frames):
        return frames

    def _windowing_function(self):
        return IdentityWindowingFunc()

    def _overlap_add(self, frames):
        time_dim = frames.dimensions[0]
        sample_freq = time_dim.duration / frames.shape[-1]
        windowsize = int(np.round(time_dim.duration / sample_freq))
        hopsize = int(np.round(time_dim.frequency / sample_freq))

        # create an empty array of audio samples
        arr = np.zeros(int(time_dim.end / sample_freq))
        windowed_frames = self._windowing_function() * frames

        for i, f in enumerate(windowed_frames):
            start = i * hopsize
            stop = start + windowsize
            l = len(arr[start:stop])
            arr[start:stop] += f[:l]

        sr = nearest_audio_sample_rate(Seconds(1) / sample_freq)
        return AudioSamples(arr, sr)

    def synthesize(self, frames):
        audio = self._transform(frames)
        ts = ArrayWithUnits(audio, [frames.dimensions[0], IdentityDimension()])
        return self._overlap_add(ts)


class WindowedAudioSynthesizer(ShortTimeTransformSynthesizer):
    def __init__(self):
        super(WindowedAudioSynthesizer, self).__init__()


[docs]class FFTSynthesizer(ShortTimeTransformSynthesizer): """ Inverts the short-time fourier transform, e.g. the output of the :class:`~zounds.spectral.FFT` processing node. Here's an example that extracts a short-time fourier transform, and then inverts it. .. code:: python import zounds STFT = zounds.stft( resample_to=zounds.SR11025(), store_fft=True) @zounds.simple_in_memory_settings class Sound(STFT): pass # produce some additive sine waves sine_synth = zounds.SineSynthesizer(zounds.SR22050()) samples = sine_synth.synthesize( zounds.Seconds(4), freqs_in_hz=[220, 400, 880]) # process the sound, including a short-time fourier transform feature _id = Sound.process(meta=samples.encode()) snd = Sound(_id) # invert the frequency-domain feature to reover the original audio fft_synth = zounds.FFTSynthesizer() recon = fft_synth.synthesize(snd.fft) print recon.__class__ # AudioSamples instance with reconstructed audio See Also: :class:`~zounds.spectral.FFT` """ def __init__(self): super(FFTSynthesizer, self).__init__() def _windowing_function(self): return OggVorbisWindowingFunc() def _transform(self, frames): return np.fft.irfft(frames, norm='ortho')
[docs]class DCTSynthesizer(ShortTimeTransformSynthesizer): """ Inverts the short-time discrete cosine transform (type II), e.g., the output of the :class:`~zounds.spectral.DCT` processing node Here's an example that extracts a short-time discrete cosine transform, and then inverts it. .. code:: python import zounds Resampled = zounds.resampled(resample_to=zounds.SR11025()) @zounds.simple_in_memory_settings class Sound(Resampled): windowed = zounds.ArrayWithUnitsFeature( zounds.SlidingWindow, needs=Resampled.resampled, wscheme=zounds.HalfLapped(), wfunc=zounds.OggVorbisWindowingFunc(), store=False) dct = zounds.ArrayWithUnitsFeature( zounds.DCT, needs=windowed, store=True) # produce some additive sine waves sine_synth = zounds.SineSynthesizer(zounds.SR22050()) samples = sine_synth.synthesize( zounds.Seconds(4), freqs_in_hz=[220, 400, 880]) # process the sound, including a short-time fourier transform feature _id = Sound.process(meta=samples.encode()) snd = Sound(_id) # invert the frequency-domain feature to reover the original audio dct_synth = zounds.DCTSynthesizer() recon = dct_synth.synthesize(snd.dct) print recon.__class__ # AudioSamples instance with reconstructed audio See Also: :class:`~zounds.spectral.DCT` """ def __init__(self, windowing_func=IdentityWindowingFunc()): super(DCTSynthesizer, self).__init__() self.windowing_func = windowing_func def _windowing_function(self): return self.windowing_func def _transform(self, frames): return idct(frames, norm='ortho')
[docs]class DCTIVSynthesizer(ShortTimeTransformSynthesizer): """ Inverts the short-time discrete cosine transform (type IV), e.g., the output of the :class:`~zounds.spectral.DCTIV` processing node. Here's an example that extracts a short-time DCT-IV transform, and inverts it. .. code:: python import zounds Resampled = zounds.resampled(resample_to=zounds.SR11025()) @zounds.simple_in_memory_settings class Sound(Resampled): windowed = zounds.ArrayWithUnitsFeature( zounds.SlidingWindow, needs=Resampled.resampled, wscheme=zounds.HalfLapped(), wfunc=zounds.OggVorbisWindowingFunc(), store=False) dct = zounds.ArrayWithUnitsFeature( zounds.DCTIV, needs=windowed, store=True) # produce some additive sine waves sine_synth = zounds.SineSynthesizer(zounds.SR22050()) samples = sine_synth.synthesize( zounds.Seconds(4), freqs_in_hz=[220, 400, 880]) # process the sound, including a short-time fourier transform feature _id = Sound.process(meta=samples.encode()) snd = Sound(_id) # invert the frequency-domain feature to reover the original audio dct_synth = zounds.DCTIVSynthesizer() recon = dct_synth.synthesize(snd.dct) print recon.__class__ # AudioSamples instance with reconstructed audio See Also: :class:`~zounds.spectral.DCTIV` """ def __init__(self, windowing_func=IdentityWindowingFunc()): super(DCTIVSynthesizer, self).__init__() self.windowing_func = windowing_func def _windowing_function(self): return self.windowing_func def _transform(self, frames): return list(DCTIV()._process(frames))[0]
[docs]class MDCTSynthesizer(ShortTimeTransformSynthesizer): """ Inverts the modified discrete cosine transform, e.g., the output of the :class:`~zounds.spectral.MDCT` processing node. Here's an example that extracts a short-time MDCT transform, and inverts it. .. code:: python import zounds Resampled = zounds.resampled(resample_to=zounds.SR11025()) @zounds.simple_in_memory_settings class Sound(Resampled): windowed = zounds.ArrayWithUnitsFeature( zounds.SlidingWindow, needs=Resampled.resampled, wscheme=zounds.HalfLapped(), wfunc=zounds.OggVorbisWindowingFunc(), store=False) mdct = zounds.ArrayWithUnitsFeature( zounds.MDCT, needs=windowed, store=True) # produce some additive sine waves sine_synth = zounds.SineSynthesizer(zounds.SR22050()) samples = sine_synth.synthesize( zounds.Seconds(4), freqs_in_hz=[220, 400, 880]) # process the sound, including a short-time fourier transform feature _id = Sound.process(meta=samples.encode()) snd = Sound(_id) # invert the frequency-domain feature to reover the original audio mdct_synth = zounds.MDCTSynthesizer() recon = mdct_synth.synthesize(snd.mdct) print recon.__class__ # AudioSamples instance with reconstructed audio See Also: :class:`~zounds.spectral.MDCT` """ def __init__(self): super(MDCTSynthesizer, self).__init__() def _windowing_function(self): return OggVorbisWindowingFunc() def _transform(self, frames): l = frames.shape[1] t = np.arange(0, 2 * l) f = np.arange(0, l) cpi = -1j * np.pi a = frames * np.exp(cpi * (f + 0.5) * (l + 1) / 2 / l) b = np.fft.fft(a, 2 * l) return np.sqrt(2 / l) * np.real(b * np.exp(cpi * t / 2 / l))
class FrequencyDecompositionSynthesizer(object): def __init__(self, samplerate, output_size): super(FrequencyDecompositionSynthesizer, self).__init__() self.output_size = output_size self.samplerate = samplerate def synthesize(self, x, bands=None): output = ArrayWithUnits( np.zeros((len(x), self.output_size)), dimensions=[x.time_dimension, TimeDimension(*self.samplerate)]) for i, band in enumerate(x.scale): if bands and i not in bands: continue output += resample(x[:, band], self.output_size, axis=-1) return output class BaseFrequencyAdaptiveSynthesizer(object): def __init__( self, scale, band_transform, short_time_synth, samplerate, coeffs_dtype, scale_slices_always_even): super(BaseFrequencyAdaptiveSynthesizer, self).__init__() self.scale_slices_always_even = scale_slices_always_even self.coeffs_dtype = coeffs_dtype self.scale = scale self.samplerate = samplerate self.short_time_synth = short_time_synth self.band_transform = band_transform def _n_linear_scale_bands(self, frequency_adaptive_coeffs): raise NotImplementedError() def synthesize(self, freq_adaptive_coeffs): fac = freq_adaptive_coeffs linear_scale = LinearScale.from_sample_rate( self.samplerate, self._n_linear_scale_bands(fac), always_even=self.scale_slices_always_even) frequency_dimension = FrequencyDimension(linear_scale) coeffs = ArrayWithUnits( np.zeros((len(fac), linear_scale.n_bands), dtype=self.coeffs_dtype), dimensions=[fac.dimensions[0], frequency_dimension]) for band in self.scale: coeffs[:, band] += self.band_transform(fac[:, band], norm='ortho') return self.short_time_synth.synthesize(coeffs)
[docs]class FrequencyAdaptiveDCTSynthesizer(BaseFrequencyAdaptiveSynthesizer): """ Invert a frequency-adaptive transform, e.g., one produced by the :class:`zounds.spectral.FrequencyAdaptiveTransform` processing node which has used a discrete cosine transform in its `transform` parameter. Args: scale (FrequencyScale): The scale used to produce the frequency-adaptive transform samplerate (SampleRate): The audio samplerate of the audio that was originally transformed Here's an example of how you might first extract a frequency-adaptive representation, and then invert it: .. code:: python import zounds import scipy import numpy as np samplerate = zounds.SR11025() Resampled = zounds.resampled(resample_to=samplerate) scale = zounds.GeometricScale( 100, 5000, bandwidth_ratio=0.089, n_bands=100) scale.ensure_overlap_ratio(0.5) @zounds.simple_in_memory_settings class Sound(Resampled): long_windowed = zounds.ArrayWithUnitsFeature( zounds.SlidingWindow, wscheme=zounds.SampleRate( frequency=zounds.Milliseconds(500), duration=zounds.Seconds(1)), wfunc=zounds.OggVorbisWindowingFunc(), needs=Resampled.resampled) dct = zounds.ArrayWithUnitsFeature( zounds.DCT, scale_always_even=True, needs=long_windowed) freq_adaptive = zounds.FrequencyAdaptiveFeature( zounds.FrequencyAdaptiveTransform, transform=scipy.fftpack.idct, window_func=np.hanning, scale=scale, needs=dct, store=True) # produce some additive sine waves sine_synth = zounds.SineSynthesizer(zounds.SR22050()) samples = sine_synth.synthesize( zounds.Seconds(10), freqs_in_hz=[220, 440, 880]) # process the sound, including a short-time fourier transform feature _id = Sound.process(meta=samples.encode()) snd = Sound(_id) # invert the sound synth = zounds.FrequencyAdaptiveDCTSynthesizer(scale, samplerate) recon = synth.synthesize(snd.freq_adaptive) print recon # AudioSamples instance with the reconstructed sound See Also: :class:`~zounds.spectral.DCT` :class:`~zounds.spectral.FrequencyAdaptive` :class:`~zounds.spectral.FrequencyAdaptiveTransform` """ def __init__(self, scale, samplerate): super(FrequencyAdaptiveDCTSynthesizer, self).__init__( scale, dct, DCTSynthesizer(), samplerate, np.float64, scale_slices_always_even=True) def _n_linear_scale_bands(self, frequency_adaptive_coeffs): fac = frequency_adaptive_coeffs.dimensions[0] return int(fac.duration / self.samplerate.frequency)
[docs]class FrequencyAdaptiveFFTSynthesizer(BaseFrequencyAdaptiveSynthesizer): """ Invert a frequency-adaptive transform, e.g., one produced by the :class:`zounds.spectral.FrequencyAdaptiveTransform` processing node which has used a fast fouriter transform in its `transform` parameter. Args: scale (FrequencyScale): The scale used to produce the frequency-adaptive transform samplerate (SampleRate): The audio samplerate of the audio that was originally transformed Here's an example of how you might first extract a frequency-adaptive representation, and then invert it: .. code:: python import zounds import numpy as np samplerate = zounds.SR11025() Resampled = zounds.resampled(resample_to=samplerate) scale = zounds.GeometricScale(100, 5000, bandwidth_ratio=0.089, n_bands=100) scale.ensure_overlap_ratio(0.5) @zounds.simple_in_memory_settings class Sound(Resampled): long_windowed = zounds.ArrayWithUnitsFeature( zounds.SlidingWindow, wscheme=zounds.SampleRate( frequency=zounds.Milliseconds(500), duration=zounds.Seconds(1)), wfunc=zounds.OggVorbisWindowingFunc(), needs=Resampled.resampled) fft = zounds.ArrayWithUnitsFeature( zounds.FFT, needs=long_windowed) freq_adaptive = zounds.FrequencyAdaptiveFeature( zounds.FrequencyAdaptiveTransform, transform=np.fft.irfft, window_func=np.hanning, scale=scale, needs=fft, store=True) # produce some additive sine waves sine_synth = zounds.SineSynthesizer(zounds.SR22050()) samples = sine_synth.synthesize( zounds.Seconds(10), freqs_in_hz=[220, 440, 880]) # process the sound, including a short-time fourier transform feature _id = Sound.process(meta=samples.encode()) snd = Sound(_id) # invert the sound synth = zounds.FrequencyAdaptiveFFTSynthesizer(scale, samplerate) recon = synth.synthesize(snd.freq_adaptive) print recon # AudioSamples instance with the reconstructed sound See Also: :class:`~zounds.spectral.FFT` :class:`~zounds.spectral.FrequencyAdaptive` :class:`~zounds.spectral.FrequencyAdaptiveTransform` """ def __init__(self, scale, samplerate): super(FrequencyAdaptiveFFTSynthesizer, self).__init__( scale, np.fft.rfft, FFTSynthesizer(), samplerate, np.complex128, scale_slices_always_even=False) def _n_linear_scale_bands(self, frequency_adaptive_coeffs): # https://docs.scipy.org/doc/numpy/reference/generated/numpy.fft.rfft.html#numpy.fft.rfft fac = frequency_adaptive_coeffs.dimensions[0] raw_samples = int(fac.duration / self.samplerate.frequency) return int(raw_samples // 2) + 1
[docs]class SineSynthesizer(object): """ Synthesize sine waves Args: samplerate (Samplerate): the samplerate at which the sine waves should be synthesized Examples: >>> import zounds >>> synth = zounds.SineSynthesizer(zounds.SR22050()) >>> samples = synth.synthesize( \ zounds.Seconds(1), freqs_in_hz=[220., 440.]) >>> samples AudioSamples([ 0. , 0.09384942, 0.18659419, ..., -0.27714552, -0.18659419, -0.09384942]) >>> len(samples) 22050 See Also: :class:`TickSynthesizer` :class:`NoiseSynthesizer` :class:`SilenceSynthesizer` """ def __init__(self, samplerate): super(SineSynthesizer, self).__init__() self.samplerate = samplerate
[docs] def synthesize(self, duration, freqs_in_hz=[440.]): """ Synthesize one or more sine waves Args: duration (numpy.timdelta64): The duration of the sound to be synthesized freqs_in_hz (list of float): Numbers representing the frequencies in hz that should be synthesized """ freqs = np.array(freqs_in_hz) scaling = 1 / len(freqs) sr = int(self.samplerate) cps = freqs / sr ts = (duration / Seconds(1)) * sr ranges = np.array([np.arange(0, ts * c, c) for c in cps]) raw = (np.sin(ranges * (2 * np.pi)) * scaling).sum(axis=0) return AudioSamples(raw, self.samplerate)
[docs]class TickSynthesizer(object): """ Synthesize short, percussive, periodic "ticks" Args: samplerate (SampleRate): the samplerate at which the ticks should be synthesized Examples: >>> import zounds >>> synth = zounds.TickSynthesizer(zounds.SR22050()) >>> samples = synth.synthesize(\ duration=zounds.Seconds(3), tick_frequency=zounds.Milliseconds(100)) >>> samples AudioSamples([ -3.91624993e-01, -8.96939666e-01, 4.18165378e-01, ..., -4.08054347e-04, -2.32257899e-04, 0.00000000e+00]) See Also: :class:`SineSynthesizer` :class:`NoiseSynthesizer` :class:`SilenceSynthesizer` """ def __init__(self, samplerate): super(TickSynthesizer, self).__init__() self.samplerate = samplerate
[docs] def synthesize(self, duration, tick_frequency): """ Synthesize periodic "ticks", generated from white noise and an envelope Args: duration (numpy.timedelta64): The total duration of the sound to be synthesized tick_frequency (numpy.timedelta64): The frequency of the ticking sound """ sr = self.samplerate.samples_per_second # create a short, tick sound tick = np.random.uniform(low=-1., high=1., size=int(sr * .1)) tick *= np.linspace(1, 0, len(tick)) # create silence samples = np.zeros(int(sr * (duration / Seconds(1)))) ticks_per_second = Seconds(1) / tick_frequency # introduce periodic ticking sound step = int(sr // ticks_per_second) for i in range(0, len(samples), step): size = len(samples[i:i + len(tick)]) samples[i:i + len(tick)] += tick[:size] return AudioSamples(samples, self.samplerate)
[docs]class NoiseSynthesizer(object): """ Synthesize white noise Args: samplerate (SampleRate): the samplerate at which the ticks should be synthesized Examples: >>> import zounds >>> synth = zounds.NoiseSynthesizer(zounds.SR44100()) >>> samples = synth.synthesize(zounds.Seconds(2)) >>> samples AudioSamples([ 0.1137964 , -0.02613194, 0.30963904, ..., -0.71398137, -0.99840281, 0.74310827]) See Also: :class:`SineSynthesizer` :class:`TickSynthesizer` :class:`SilenceSynthesizer` """ def __init__(self, samplerate): super(NoiseSynthesizer, self).__init__() self.samplerate = samplerate
[docs] def synthesize(self, duration): """ Synthesize white noise Args: duration (numpy.timedelta64): The duration of the synthesized sound """ sr = self.samplerate.samples_per_second seconds = duration / Seconds(1) samples = np.random.uniform(low=-1., high=1., size=int(sr * seconds)) return AudioSamples(samples, self.samplerate)
[docs]class SilenceSynthesizer(object): """ Synthesize silence Args: samplerate (SampleRate): the samplerate at which the ticks should be synthesized Examples: >>> import zounds >>> synth = zounds.SilenceSynthesizer(zounds.SR11025()) >>> samples = synth.synthesize(zounds.Seconds(5)) >>> samples AudioSamples([ 0., 0., 0., ..., 0., 0., 0.]) """ def __init__(self, samplerate): super(SilenceSynthesizer, self).__init__() self.samplerate = samplerate
[docs] def synthesize(self, duration): """ Synthesize silence Args: duration (numpy.timedelta64): The duration of the synthesized sound """ return AudioSamples.silence(self.samplerate, duration)