import numpy as np
from scipy.fftpack import dct, idct
from scipy.signal import resample
from zounds.core import ArrayWithUnits, IdentityDimension
from zounds.spectral import DCTIV, LinearScale
from zounds.spectral import FrequencyDimension
from zounds.spectral.sliding_window import \
IdentityWindowingFunc, OggVorbisWindowingFunc
from zounds.timeseries import \
nearest_audio_sample_rate, Seconds, AudioSamples, TimeDimension
class ShortTimeTransformSynthesizer(object):
def __init__(self):
super(ShortTimeTransformSynthesizer, self).__init__()
def _transform(self, frames):
return frames
def _windowing_function(self):
return IdentityWindowingFunc()
def _overlap_add(self, frames):
time_dim = frames.dimensions[0]
sample_freq = time_dim.duration / frames.shape[-1]
windowsize = int(np.round(time_dim.duration / sample_freq))
hopsize = int(np.round(time_dim.frequency / sample_freq))
# create an empty array of audio samples
arr = np.zeros(int(time_dim.end / sample_freq))
windowed_frames = self._windowing_function() * frames
for i, f in enumerate(windowed_frames):
start = i * hopsize
stop = start + windowsize
l = len(arr[start:stop])
arr[start:stop] += f[:l]
sr = nearest_audio_sample_rate(Seconds(1) / sample_freq)
return AudioSamples(arr, sr)
def synthesize(self, frames):
audio = self._transform(frames)
ts = ArrayWithUnits(audio, [frames.dimensions[0], IdentityDimension()])
return self._overlap_add(ts)
class WindowedAudioSynthesizer(ShortTimeTransformSynthesizer):
def __init__(self):
super(WindowedAudioSynthesizer, self).__init__()
[docs]class FFTSynthesizer(ShortTimeTransformSynthesizer):
"""
Inverts the short-time fourier transform, e.g. the output of the
:class:`~zounds.spectral.FFT` processing node.
Here's an example that extracts a short-time fourier transform, and then
inverts it.
.. code:: python
import zounds
STFT = zounds.stft(
resample_to=zounds.SR11025(),
store_fft=True)
@zounds.simple_in_memory_settings
class Sound(STFT):
pass
# produce some additive sine waves
sine_synth = zounds.SineSynthesizer(zounds.SR22050())
samples = sine_synth.synthesize(
zounds.Seconds(4), freqs_in_hz=[220, 400, 880])
# process the sound, including a short-time fourier transform feature
_id = Sound.process(meta=samples.encode())
snd = Sound(_id)
# invert the frequency-domain feature to reover the original audio
fft_synth = zounds.FFTSynthesizer()
recon = fft_synth.synthesize(snd.fft)
print recon.__class__ # AudioSamples instance with reconstructed audio
See Also:
:class:`~zounds.spectral.FFT`
"""
def __init__(self):
super(FFTSynthesizer, self).__init__()
def _windowing_function(self):
return OggVorbisWindowingFunc()
def _transform(self, frames):
return np.fft.irfft(frames, norm='ortho')
[docs]class DCTSynthesizer(ShortTimeTransformSynthesizer):
"""
Inverts the short-time discrete cosine transform (type II), e.g., the output
of the :class:`~zounds.spectral.DCT` processing node
Here's an example that extracts a short-time discrete cosine transform, and
then inverts it.
.. code:: python
import zounds
Resampled = zounds.resampled(resample_to=zounds.SR11025())
@zounds.simple_in_memory_settings
class Sound(Resampled):
windowed = zounds.ArrayWithUnitsFeature(
zounds.SlidingWindow,
needs=Resampled.resampled,
wscheme=zounds.HalfLapped(),
wfunc=zounds.OggVorbisWindowingFunc(),
store=False)
dct = zounds.ArrayWithUnitsFeature(
zounds.DCT,
needs=windowed,
store=True)
# produce some additive sine waves
sine_synth = zounds.SineSynthesizer(zounds.SR22050())
samples = sine_synth.synthesize(
zounds.Seconds(4), freqs_in_hz=[220, 400, 880])
# process the sound, including a short-time fourier transform feature
_id = Sound.process(meta=samples.encode())
snd = Sound(_id)
# invert the frequency-domain feature to reover the original audio
dct_synth = zounds.DCTSynthesizer()
recon = dct_synth.synthesize(snd.dct)
print recon.__class__ # AudioSamples instance with reconstructed audio
See Also:
:class:`~zounds.spectral.DCT`
"""
def __init__(self, windowing_func=IdentityWindowingFunc()):
super(DCTSynthesizer, self).__init__()
self.windowing_func = windowing_func
def _windowing_function(self):
return self.windowing_func
def _transform(self, frames):
return idct(frames, norm='ortho')
[docs]class DCTIVSynthesizer(ShortTimeTransformSynthesizer):
"""
Inverts the short-time discrete cosine transform (type IV), e.g., the output
of the :class:`~zounds.spectral.DCTIV` processing node.
Here's an example that extracts a short-time DCT-IV transform, and inverts
it.
.. code:: python
import zounds
Resampled = zounds.resampled(resample_to=zounds.SR11025())
@zounds.simple_in_memory_settings
class Sound(Resampled):
windowed = zounds.ArrayWithUnitsFeature(
zounds.SlidingWindow,
needs=Resampled.resampled,
wscheme=zounds.HalfLapped(),
wfunc=zounds.OggVorbisWindowingFunc(),
store=False)
dct = zounds.ArrayWithUnitsFeature(
zounds.DCTIV,
needs=windowed,
store=True)
# produce some additive sine waves
sine_synth = zounds.SineSynthesizer(zounds.SR22050())
samples = sine_synth.synthesize(
zounds.Seconds(4), freqs_in_hz=[220, 400, 880])
# process the sound, including a short-time fourier transform feature
_id = Sound.process(meta=samples.encode())
snd = Sound(_id)
# invert the frequency-domain feature to reover the original audio
dct_synth = zounds.DCTIVSynthesizer()
recon = dct_synth.synthesize(snd.dct)
print recon.__class__ # AudioSamples instance with reconstructed audio
See Also:
:class:`~zounds.spectral.DCTIV`
"""
def __init__(self, windowing_func=IdentityWindowingFunc()):
super(DCTIVSynthesizer, self).__init__()
self.windowing_func = windowing_func
def _windowing_function(self):
return self.windowing_func
def _transform(self, frames):
return list(DCTIV()._process(frames))[0]
[docs]class MDCTSynthesizer(ShortTimeTransformSynthesizer):
"""
Inverts the modified discrete cosine transform, e.g., the output of the
:class:`~zounds.spectral.MDCT` processing node.
Here's an example that extracts a short-time MDCT transform, and inverts
it.
.. code:: python
import zounds
Resampled = zounds.resampled(resample_to=zounds.SR11025())
@zounds.simple_in_memory_settings
class Sound(Resampled):
windowed = zounds.ArrayWithUnitsFeature(
zounds.SlidingWindow,
needs=Resampled.resampled,
wscheme=zounds.HalfLapped(),
wfunc=zounds.OggVorbisWindowingFunc(),
store=False)
mdct = zounds.ArrayWithUnitsFeature(
zounds.MDCT,
needs=windowed,
store=True)
# produce some additive sine waves
sine_synth = zounds.SineSynthesizer(zounds.SR22050())
samples = sine_synth.synthesize(
zounds.Seconds(4), freqs_in_hz=[220, 400, 880])
# process the sound, including a short-time fourier transform feature
_id = Sound.process(meta=samples.encode())
snd = Sound(_id)
# invert the frequency-domain feature to reover the original audio
mdct_synth = zounds.MDCTSynthesizer()
recon = mdct_synth.synthesize(snd.mdct)
print recon.__class__ # AudioSamples instance with reconstructed audio
See Also:
:class:`~zounds.spectral.MDCT`
"""
def __init__(self):
super(MDCTSynthesizer, self).__init__()
def _windowing_function(self):
return OggVorbisWindowingFunc()
def _transform(self, frames):
l = frames.shape[1]
t = np.arange(0, 2 * l)
f = np.arange(0, l)
cpi = -1j * np.pi
a = frames * np.exp(cpi * (f + 0.5) * (l + 1) / 2 / l)
b = np.fft.fft(a, 2 * l)
return np.sqrt(2 / l) * np.real(b * np.exp(cpi * t / 2 / l))
class FrequencyDecompositionSynthesizer(object):
def __init__(self, samplerate, output_size):
super(FrequencyDecompositionSynthesizer, self).__init__()
self.output_size = output_size
self.samplerate = samplerate
def synthesize(self, x, bands=None):
output = ArrayWithUnits(
np.zeros((len(x), self.output_size)),
dimensions=[x.time_dimension, TimeDimension(*self.samplerate)])
for i, band in enumerate(x.scale):
if bands and i not in bands:
continue
output += resample(x[:, band], self.output_size, axis=-1)
return output
class BaseFrequencyAdaptiveSynthesizer(object):
def __init__(
self,
scale,
band_transform,
short_time_synth,
samplerate,
coeffs_dtype,
scale_slices_always_even):
super(BaseFrequencyAdaptiveSynthesizer, self).__init__()
self.scale_slices_always_even = scale_slices_always_even
self.coeffs_dtype = coeffs_dtype
self.scale = scale
self.samplerate = samplerate
self.short_time_synth = short_time_synth
self.band_transform = band_transform
def _n_linear_scale_bands(self, frequency_adaptive_coeffs):
raise NotImplementedError()
def synthesize(self, freq_adaptive_coeffs):
fac = freq_adaptive_coeffs
linear_scale = LinearScale.from_sample_rate(
self.samplerate,
self._n_linear_scale_bands(fac),
always_even=self.scale_slices_always_even)
frequency_dimension = FrequencyDimension(linear_scale)
coeffs = ArrayWithUnits(
np.zeros((len(fac), linear_scale.n_bands), dtype=self.coeffs_dtype),
dimensions=[fac.dimensions[0], frequency_dimension])
for band in self.scale:
coeffs[:, band] += self.band_transform(fac[:, band], norm='ortho')
return self.short_time_synth.synthesize(coeffs)
[docs]class FrequencyAdaptiveDCTSynthesizer(BaseFrequencyAdaptiveSynthesizer):
"""
Invert a frequency-adaptive transform, e.g., one produced by the
:class:`zounds.spectral.FrequencyAdaptiveTransform` processing node which
has used a discrete cosine transform in its `transform` parameter.
Args:
scale (FrequencyScale): The scale used to produce the frequency-adaptive
transform
samplerate (SampleRate): The audio samplerate of the audio that was
originally transformed
Here's an example of how you might first extract a frequency-adaptive
representation, and then invert it:
.. code:: python
import zounds
import scipy
import numpy as np
samplerate = zounds.SR11025()
Resampled = zounds.resampled(resample_to=samplerate)
scale = zounds.GeometricScale(
100, 5000, bandwidth_ratio=0.089, n_bands=100)
scale.ensure_overlap_ratio(0.5)
@zounds.simple_in_memory_settings
class Sound(Resampled):
long_windowed = zounds.ArrayWithUnitsFeature(
zounds.SlidingWindow,
wscheme=zounds.SampleRate(
frequency=zounds.Milliseconds(500),
duration=zounds.Seconds(1)),
wfunc=zounds.OggVorbisWindowingFunc(),
needs=Resampled.resampled)
dct = zounds.ArrayWithUnitsFeature(
zounds.DCT,
scale_always_even=True,
needs=long_windowed)
freq_adaptive = zounds.FrequencyAdaptiveFeature(
zounds.FrequencyAdaptiveTransform,
transform=scipy.fftpack.idct,
window_func=np.hanning,
scale=scale,
needs=dct,
store=True)
# produce some additive sine waves
sine_synth = zounds.SineSynthesizer(zounds.SR22050())
samples = sine_synth.synthesize(
zounds.Seconds(10), freqs_in_hz=[220, 440, 880])
# process the sound, including a short-time fourier transform feature
_id = Sound.process(meta=samples.encode())
snd = Sound(_id)
# invert the sound
synth = zounds.FrequencyAdaptiveDCTSynthesizer(scale, samplerate)
recon = synth.synthesize(snd.freq_adaptive)
print recon # AudioSamples instance with the reconstructed sound
See Also:
:class:`~zounds.spectral.DCT`
:class:`~zounds.spectral.FrequencyAdaptive`
:class:`~zounds.spectral.FrequencyAdaptiveTransform`
"""
def __init__(self, scale, samplerate):
super(FrequencyAdaptiveDCTSynthesizer, self).__init__(
scale,
dct,
DCTSynthesizer(),
samplerate,
np.float64,
scale_slices_always_even=True)
def _n_linear_scale_bands(self, frequency_adaptive_coeffs):
fac = frequency_adaptive_coeffs.dimensions[0]
return int(fac.duration / self.samplerate.frequency)
[docs]class FrequencyAdaptiveFFTSynthesizer(BaseFrequencyAdaptiveSynthesizer):
"""
Invert a frequency-adaptive transform, e.g., one produced by the
:class:`zounds.spectral.FrequencyAdaptiveTransform` processing node which
has used a fast fouriter transform in its `transform` parameter.
Args:
scale (FrequencyScale): The scale used to produce the frequency-adaptive
transform
samplerate (SampleRate): The audio samplerate of the audio that was
originally transformed
Here's an example of how you might first extract a frequency-adaptive
representation, and then invert it:
.. code:: python
import zounds
import numpy as np
samplerate = zounds.SR11025()
Resampled = zounds.resampled(resample_to=samplerate)
scale = zounds.GeometricScale(100, 5000, bandwidth_ratio=0.089, n_bands=100)
scale.ensure_overlap_ratio(0.5)
@zounds.simple_in_memory_settings
class Sound(Resampled):
long_windowed = zounds.ArrayWithUnitsFeature(
zounds.SlidingWindow,
wscheme=zounds.SampleRate(
frequency=zounds.Milliseconds(500),
duration=zounds.Seconds(1)),
wfunc=zounds.OggVorbisWindowingFunc(),
needs=Resampled.resampled)
fft = zounds.ArrayWithUnitsFeature(
zounds.FFT,
needs=long_windowed)
freq_adaptive = zounds.FrequencyAdaptiveFeature(
zounds.FrequencyAdaptiveTransform,
transform=np.fft.irfft,
window_func=np.hanning,
scale=scale,
needs=fft,
store=True)
# produce some additive sine waves
sine_synth = zounds.SineSynthesizer(zounds.SR22050())
samples = sine_synth.synthesize(
zounds.Seconds(10), freqs_in_hz=[220, 440, 880])
# process the sound, including a short-time fourier transform feature
_id = Sound.process(meta=samples.encode())
snd = Sound(_id)
# invert the sound
synth = zounds.FrequencyAdaptiveFFTSynthesizer(scale, samplerate)
recon = synth.synthesize(snd.freq_adaptive)
print recon # AudioSamples instance with the reconstructed sound
See Also:
:class:`~zounds.spectral.FFT`
:class:`~zounds.spectral.FrequencyAdaptive`
:class:`~zounds.spectral.FrequencyAdaptiveTransform`
"""
def __init__(self, scale, samplerate):
super(FrequencyAdaptiveFFTSynthesizer, self).__init__(
scale,
np.fft.rfft,
FFTSynthesizer(),
samplerate,
np.complex128,
scale_slices_always_even=False)
def _n_linear_scale_bands(self, frequency_adaptive_coeffs):
# https://docs.scipy.org/doc/numpy/reference/generated/numpy.fft.rfft.html#numpy.fft.rfft
fac = frequency_adaptive_coeffs.dimensions[0]
raw_samples = int(fac.duration / self.samplerate.frequency)
return int(raw_samples // 2) + 1
[docs]class SineSynthesizer(object):
"""
Synthesize sine waves
Args:
samplerate (Samplerate): the samplerate at which the sine waves should
be synthesized
Examples:
>>> import zounds
>>> synth = zounds.SineSynthesizer(zounds.SR22050())
>>> samples = synth.synthesize( \
zounds.Seconds(1), freqs_in_hz=[220., 440.])
>>> samples
AudioSamples([ 0. , 0.09384942, 0.18659419, ..., -0.27714552,
-0.18659419, -0.09384942])
>>> len(samples)
22050
See Also:
:class:`TickSynthesizer`
:class:`NoiseSynthesizer`
:class:`SilenceSynthesizer`
"""
def __init__(self, samplerate):
super(SineSynthesizer, self).__init__()
self.samplerate = samplerate
[docs] def synthesize(self, duration, freqs_in_hz=[440.]):
"""
Synthesize one or more sine waves
Args:
duration (numpy.timdelta64): The duration of the sound to be
synthesized
freqs_in_hz (list of float): Numbers representing the frequencies
in hz that should be synthesized
"""
freqs = np.array(freqs_in_hz)
scaling = 1 / len(freqs)
sr = int(self.samplerate)
cps = freqs / sr
ts = (duration / Seconds(1)) * sr
ranges = np.array([np.arange(0, ts * c, c) for c in cps])
raw = (np.sin(ranges * (2 * np.pi)) * scaling).sum(axis=0)
return AudioSamples(raw, self.samplerate)
[docs]class TickSynthesizer(object):
"""
Synthesize short, percussive, periodic "ticks"
Args:
samplerate (SampleRate): the samplerate at which the ticks should be
synthesized
Examples:
>>> import zounds
>>> synth = zounds.TickSynthesizer(zounds.SR22050())
>>> samples = synth.synthesize(\
duration=zounds.Seconds(3), tick_frequency=zounds.Milliseconds(100))
>>> samples
AudioSamples([ -3.91624993e-01, -8.96939666e-01, 4.18165378e-01, ...,
-4.08054347e-04, -2.32257899e-04, 0.00000000e+00])
See Also:
:class:`SineSynthesizer`
:class:`NoiseSynthesizer`
:class:`SilenceSynthesizer`
"""
def __init__(self, samplerate):
super(TickSynthesizer, self).__init__()
self.samplerate = samplerate
[docs] def synthesize(self, duration, tick_frequency):
"""
Synthesize periodic "ticks", generated from white noise and an envelope
Args:
duration (numpy.timedelta64): The total duration of the sound to be
synthesized
tick_frequency (numpy.timedelta64): The frequency of the ticking
sound
"""
sr = self.samplerate.samples_per_second
# create a short, tick sound
tick = np.random.uniform(low=-1., high=1., size=int(sr * .1))
tick *= np.linspace(1, 0, len(tick))
# create silence
samples = np.zeros(int(sr * (duration / Seconds(1))))
ticks_per_second = Seconds(1) / tick_frequency
# introduce periodic ticking sound
step = int(sr // ticks_per_second)
for i in range(0, len(samples), step):
size = len(samples[i:i + len(tick)])
samples[i:i + len(tick)] += tick[:size]
return AudioSamples(samples, self.samplerate)
[docs]class NoiseSynthesizer(object):
"""
Synthesize white noise
Args:
samplerate (SampleRate): the samplerate at which the ticks should be
synthesized
Examples:
>>> import zounds
>>> synth = zounds.NoiseSynthesizer(zounds.SR44100())
>>> samples = synth.synthesize(zounds.Seconds(2))
>>> samples
AudioSamples([ 0.1137964 , -0.02613194, 0.30963904, ..., -0.71398137,
-0.99840281, 0.74310827])
See Also:
:class:`SineSynthesizer`
:class:`TickSynthesizer`
:class:`SilenceSynthesizer`
"""
def __init__(self, samplerate):
super(NoiseSynthesizer, self).__init__()
self.samplerate = samplerate
[docs] def synthesize(self, duration):
"""
Synthesize white noise
Args:
duration (numpy.timedelta64): The duration of the synthesized sound
"""
sr = self.samplerate.samples_per_second
seconds = duration / Seconds(1)
samples = np.random.uniform(low=-1., high=1., size=int(sr * seconds))
return AudioSamples(samples, self.samplerate)
[docs]class SilenceSynthesizer(object):
"""
Synthesize silence
Args:
samplerate (SampleRate): the samplerate at which the ticks should be
synthesized
Examples:
>>> import zounds
>>> synth = zounds.SilenceSynthesizer(zounds.SR11025())
>>> samples = synth.synthesize(zounds.Seconds(5))
>>> samples
AudioSamples([ 0., 0., 0., ..., 0., 0., 0.])
"""
def __init__(self, samplerate):
super(SilenceSynthesizer, self).__init__()
self.samplerate = samplerate
[docs] def synthesize(self, duration):
"""
Synthesize silence
Args:
duration (numpy.timedelta64): The duration of the synthesized sound
"""
return AudioSamples.silence(self.samplerate, duration)