Source code for zounds.spectral.spectral


import numpy as np
from featureflow import Node
from scipy.fftpack import dct
from scipy.stats.mstats import gmean

from .functional import fft, mdct
from .frequencyscale import LinearScale, ChromaScale, BarkScale
from .weighting import AWeighting
from .tfrepresentation import FrequencyDimension
from .frequencyadaptive import FrequencyAdaptive
from zounds.core import ArrayWithUnits, IdentityDimension
from zounds.nputil import safe_log
from zounds.timeseries import audio_sample_rate
from .sliding_window import HanningWindowingFunc


[docs]class FrequencyWeighting(Node): """ `FrequencyWeighting` is a processing node that expects to be passed an :class:`~zounds.core.ArrayWithUnits` instance whose last dimension is a :class:`~zounds.spectral.FrequencyDimension` Args: weighting (FrequencyWeighting): the frequency weighting to apply needs (Node): a processing node on which this node depends whose last dimension is a :class:`~zounds.spectral.FrequencyDimension` """ def __init__(self, weighting=None, needs=None): super(FrequencyWeighting, self).__init__(needs=needs) self.weighting = weighting def _process(self, data): yield data * self.weighting
[docs]class FFT(Node): """ A processing node that performs an FFT of a real-valued signal Args: axis (int): The axis over which the FFT should be computed padding_samples (int): number of zero samples to pad each window with before applying the FFT needs (Node): a processing node on which this one depends See Also: :class:`~zounds.synthesize.FFTSynthesizer` """ def __init__(self, needs=None, axis=-1, padding_samples=0): super(FFT, self).__init__(needs=needs) self._axis = axis self._padding_samples = padding_samples def _process(self, data): yield fft(data, axis=self._axis, padding_samples=self._padding_samples)
[docs]class DCT(Node): """ A processing node that performs a Type II Discrete Cosine Transform (https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II) of the input Args: axis (int): The axis over which to perform the DCT transform needs (Node): a processing node on which this one depends See Also: :class:`~zounds.synthesize.DctSynthesizer` """ def __init__(self, axis=-1, scale_always_even=False, needs=None): super(DCT, self).__init__(needs=needs) self.scale_always_even = scale_always_even self._axis = axis def _process(self, data): transformed = dct(data, norm='ortho', axis=self._axis) sr = audio_sample_rate( int(data.shape[1] / data.dimensions[0].duration_in_seconds)) scale = LinearScale.from_sample_rate( sr, transformed.shape[-1], always_even=self.scale_always_even) yield ArrayWithUnits( transformed, [data.dimensions[0], FrequencyDimension(scale)])
[docs]class DCTIV(Node): """ A processing node that performs a Type IV Discrete Cosine Transform (https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-IV) of the input Args: needs (Node): a processing node on which this one depends See Also: :class:`~zounds.synthesize.DCTIVSynthesizer` """ def __init__(self, scale_always_even=False, needs=None): super(DCTIV, self).__init__(needs=needs) self.scale_always_even = scale_always_even def _process_raw(self, data): l = data.shape[1] tf = np.arange(0, l) z = np.zeros((len(data), l * 2)) z[:, :l] = (data * np.exp(-1j * np.pi * tf / 2 / l)).real z = np.fft.fft(z)[:, :l] raw = np.sqrt(2 / l) * \ (z * np.exp(-1j * np.pi * (tf + 0.5) / 2 / l)).real return raw def _process(self, data): raw = self._process_raw(data) sr = audio_sample_rate( int(data.shape[1] / data.dimensions[0].duration_in_seconds)) scale = LinearScale.from_sample_rate( sr, data.shape[1], always_even=self.scale_always_even) yield ArrayWithUnits( raw, [data.dimensions[0], FrequencyDimension(scale)])
[docs]class MDCT(Node): """ A processing node that performs a modified discrete cosine transform (https://en.wikipedia.org/wiki/Modified_discrete_cosine_transform) of the input. This is really just a lapped version of the DCT-IV transform Args: needs (Node): a processing node on which this one depends See Also: :class:`~zounds.synthesize.MDCTSynthesizer` """ def __init__(self, needs=None): super(MDCT, self).__init__(needs=needs) def _process(self, data): transformed = mdct(data) sr = audio_sample_rate(data.dimensions[1].samples_per_second) scale = LinearScale.from_sample_rate(sr, transformed.shape[1]) yield ArrayWithUnits( transformed, [data.dimensions[0], FrequencyDimension(scale)])
[docs]class FrequencyAdaptiveTransform(Node): """ A processing node that expects to receive the input from a frequency domain transformation (e.g. :class:`~zounds.spectral.FFT`), and produces a :class:`~zounds.spectral.FrequencyAdaptive` instance where time resolution can vary by frequency. This is similar to, but not precisely the same as ideas introduced in: * `A quasi-orthogonal, invertible, and perceptually relevant time-frequency transform for audio coding <https://hal-amu.archives-ouvertes.fr/hal-01194806/document>`_ * `A FRAMEWORK FOR INVERTIBLE, REAL-TIME CONSTANT-Q TRANSFORMS <http://www.univie.ac.at/nonstatgab/pdf_files/dogrhove12_amsart.pdf>`_ Args: transform (function): the transform to be applied to each frequency band scale (FrequencyScale): the scale used to take frequency band slices window_func (numpy.ndarray): the windowing function to apply each band before the transform is applied check_scale_overlap_ratio (bool): If this feature is to be used for resynthesis later, ensure that each frequency band overlaps with the previous one by at least half, to ensure artifact-free synthesis See Also: :class:`~zounds.spectral.FrequencyAdaptive` :class:`~zounds.synthesize.FrequencyAdaptiveDCTSynthesizer` :class:`~zounds.synthesize.FrequencyAdaptiveFFTSynthesizer` """ def __init__( self, transform=None, scale=None, window_func=None, check_scale_overlap_ratio=False, needs=None): super(FrequencyAdaptiveTransform, self).__init__(needs=needs) if check_scale_overlap_ratio: try: scale.ensure_overlap_ratio(0.5) except AssertionError as e: raise ValueError(*e.args) self._window_func = window_func or np.ones self._scale = scale self._transform = transform def _process_band(self, data, band): try: raw_coeffs = data[:, band] except IndexError: raise ValueError( 'data must have FrequencyDimension as its last dimension, ' 'but it was {dim}'.format(dim=data.dimensions[-1])) window = self._window_func(raw_coeffs.shape[1]) return self._transform(raw_coeffs * window[None, :], norm='ortho') def _process(self, data): yield FrequencyAdaptive( [self._process_band(data, band) for band in self._scale], data.dimensions[0], self._scale)
class BaseScaleApplication(Node): def __init__(self, scale, window, needs=None): super(BaseScaleApplication, self).__init__(needs=needs) self.window = window self.scale = scale def _new_dim(self): return FrequencyDimension(self.scale) def _preprocess(self, data): return data def _process(self, data): x = self._preprocess(data) x = self.scale.apply(x, self.window) yield ArrayWithUnits( x, data.dimensions[:-1] + (self._new_dim(),))
[docs]class Chroma(BaseScaleApplication): def __init__( self, frequency_band, window=HanningWindowingFunc(), needs=None): super(Chroma, self).__init__( ChromaScale(frequency_band), window, needs=needs) def _new_dim(self): return IdentityDimension() def _preprocess(self, data): return np.abs(data) * AWeighting()
[docs]class BarkBands(BaseScaleApplication): def __init__( self, frequency_band, n_bands=100, window=HanningWindowingFunc(), needs=None): super(BarkBands, self).__init__( BarkScale(frequency_band, n_bands), window, needs=needs) def _preprocess(self, data): return np.abs(data)
[docs]class SpectralCentroid(Node): """ Indicates where the "center of mass" of the spectrum is. Perceptually, it has a robust connection with the impression of "brightness" of a sound. It is calculated as the weighted mean of the frequencies present in the signal, determined using a Fourier transform, with their magnitudes as the weights... -- http://en.wikipedia.org/wiki/Spectral_centroid """ def __init__(self, needs=None): super(SpectralCentroid, self).__init__(needs=needs) def _first_chunk(self, data): self._bins = np.arange(1, data.shape[-1] + 1) self._bins_sum = np.sum(self._bins) return data def _process(self, data): data = np.abs(data) yield (data * self._bins).sum(axis=1) / self._bins_sum
[docs]class SpectralFlatness(Node): """ Spectral flatness or tonality coefficient, also known as Wiener entropy, is a measure used in digital signal processing to characterize an audio spectrum. Spectral flatness is typically measured in decibels, and provides a way to quantify how tone-like a sound is, as opposed to being noise-like. The meaning of tonal in this context is in the sense of the amount of peaks or resonant structure in a power spectrum, as opposed to flat spectrum of a white noise. A high spectral flatness indicates that the spectrum has a similar amount of power in all spectral bands - this would sound similar to white noise, and the graph of the spectrum would appear relatively flat and smooth. A low spectral flatness indicates that the spectral power is concentrated in a relatively small number of bands - this would typically sound like a mixture of sine waves, and the spectrum would appear "spiky"... -- http://en.wikipedia.org/wiki/Spectral_flatness """ def __init__(self, needs=None): super(SpectralFlatness, self).__init__(needs=needs) def _process(self, data): data = np.abs(data) mean = data.mean(axis=1) mean[mean == 0] = -1e5 flatness = gmean(data, axis=1) / mean yield ArrayWithUnits(flatness, data.dimensions[:1])
[docs]class BFCC(Node): """ Bark frequency cepstral coefficients """ def __init__(self, needs=None, n_coeffs=13, exclude=1): super(BFCC, self).__init__(needs=needs) self._n_coeffs = n_coeffs self._exclude = exclude def _process(self, data): data = np.abs(data) bfcc = dct(safe_log(data), axis=1) \ [:, self._exclude: self._exclude + self._n_coeffs] yield ArrayWithUnits( bfcc.copy(), [data.dimensions[0], IdentityDimension()])