Source code for zounds.spectral.frequencyscale


import numpy as np
import bisect


class Hertz(float):
    def __init__(self, hz):
        try:
            self.hz = hz.hz
        except AttributeError:
            self.hz = hz

    def __neg__(self):
        return Hertz(-self.hz)

    def __add__(self, other):
        try:
            other = other.hz
        except AttributeError:
            pass
        return Hertz(self.hz + other)

    def __float__(self):
        return self.hz


Hz = Hertz


# TODO: What commonalities can be factored out of this class and TimeSlice?
[docs]class FrequencyBand(object): """ Represents an interval, or band of frequencies in hertz (cycles per second) Args: start_hz (float): The lower bound of the frequency band in hertz stop_hz (float): The upper bound of the frequency band in hertz Examples:: >>> import zounds >>> band = zounds.FrequencyBand(500, 1000) >>> band.center_frequency 750.0 >>> band.bandwidth 500 """ def __init__(self, start_hz, stop_hz): super(FrequencyBand, self).__init__() if stop_hz <= start_hz: raise ValueError('stop_hz must be greater than start_hz') self.stop_hz = stop_hz self.start_hz = start_hz def __eq__(self, other): try: return \ self.start_hz == other.start_hz \ and self.stop_hz == other.stop_hz except AttributeError: return super(FrequencyBand, self).__eq__(other) def __hash__(self): return (self.__class__.__name__, self.start_hz, self.stop_hz).__hash__()
[docs] def intersect(self, other): """ Return the intersection between this frequency band and another. Args: other (FrequencyBand): the instance to intersect with Examples:: >>> import zounds >>> b1 = zounds.FrequencyBand(500, 1000) >>> b2 = zounds.FrequencyBand(900, 2000) >>> intersection = b1.intersect(b2) >>> intersection.start_hz, intersection.stop_hz (900, 1000) """ lowest_stop = min(self.stop_hz, other.stop_hz) highest_start = max(self.start_hz, other.start_hz) return FrequencyBand(highest_start, lowest_stop)
@classmethod def audible_range(cls, samplerate): return FrequencyBand(Hz(20), Hz(samplerate.nyquist)) def bandwidth_ratio(self, other): return other.bandwidth / self.bandwidth def intersection_ratio(self, other): intersection = self.intersect(other) return self.bandwidth_ratio(intersection)
[docs] @staticmethod def from_start(start_hz, bandwidth_hz): """ Produce a :class:`FrequencyBand` instance from a lower bound and bandwidth Args: start_hz (float): the lower bound of the desired FrequencyBand bandwidth_hz (float): the bandwidth of the desired FrequencyBand """ return FrequencyBand(start_hz, start_hz + bandwidth_hz)
@staticmethod def from_center(center_hz, bandwidth_hz): half_bandwidth = bandwidth_hz / 2 return FrequencyBand( center_hz - half_bandwidth, center_hz + half_bandwidth) @property def bandwidth(self): """ The span of this frequency band, in hertz """ return self.stop_hz - self.start_hz @property def center_frequency(self): return self.start_hz + (self.bandwidth / 2) def __repr__(self): return '''FrequencyBand( start_hz={start_hz}, stop_hz={stop_hz}, center={center}, bandwidth={bandwidth})'''.format( start_hz=self.start_hz, stop_hz=self.stop_hz, center=self.center_frequency, bandwidth=self.bandwidth)
[docs]class FrequencyScale(object): """ Represents a set of frequency bands with monotonically increasing start frequencies Args: frequency_band (FrequencyBand): A band representing the entire span of this scale. E.g., one might want to generate a scale spanning the entire range of human hearing by starting with :code:`FrequencyBand(20, 20000)` n_bands (int): The number of bands in this scale always_even (bool): when converting frequency slices to integer indices that numpy can understand, should the slice size always be even? See Also: :class:`~zounds.spectral.LinearScale` :class:`~zounds.spectral.GeometricScale` """ def __init__(self, frequency_band, n_bands, always_even=False): super(FrequencyScale, self).__init__() self.always_even = always_even self.n_bands = n_bands self.frequency_band = frequency_band self._bands = None self._starts = None self._stops = None @property def bands(self): """ An iterable of all bands in this scale """ if self._bands is None: self._bands = self._compute_bands() return self._bands @property def band_starts(self): if self._starts is None: self._starts = [b.start_hz for b in self.bands] return self._starts @property def band_stops(self): if self._stops is None: self._stops = [b.stop_hz for b in self.bands] return self._stops def _compute_bands(self): raise NotImplementedError() def __len__(self): return self.n_bands @property def center_frequencies(self): """ An iterable of the center frequencies of each band in this scale """ return (band.center_frequency for band in self) @property def bandwidths(self): """ An iterable of the bandwidths of each band in this scale """ return (band.bandwidth for band in self)
[docs] def ensure_overlap_ratio(self, required_ratio=0.5): """ Ensure that every adjacent pair of frequency bands meets the overlap ratio criteria. This can be helpful in scenarios where a scale is being used in an invertible transform, and something like the `constant overlap add constraint <https://ccrma.stanford.edu/~jos/sasp/Constant_Overlap_Add_COLA_Cases.html>`_ must be met in order to not introduce artifacts in the reconstruction. Args: required_ratio (float): The required overlap ratio between all adjacent frequency band pairs Raises: AssertionError: when the overlap ratio for one or more adjacent frequency band pairs is not met """ msg = \ 'band {i}: ratio must be at least {required_ratio} but was {ratio}' for i in range(0, len(self) - 1): b1 = self[i] b2 = self[i + 1] try: ratio = b1.intersection_ratio(b2) except ValueError: ratio = 0 if ratio < required_ratio: raise AssertionError(msg.format(**locals()))
@property def Q(self): """ The quality factor of the scale, or, the ratio of center frequencies to bandwidths """ return np.array(list(self.center_frequencies)) \ / np.array(list(self.bandwidths)) @property def start_hz(self): """ The lower bound of this frequency scale """ return self.frequency_band.start_hz @property def stop_hz(self): """ The upper bound of this frequency scale """ return self.frequency_band.stop_hz def _basis(self, other_scale, window): weights = np.zeros((len(self), len(other_scale))) for i, band in enumerate(self): band_slice = other_scale.get_slice(band) slce = weights[i, band_slice] slce[:] = window * np.ones(len(slce)) return weights def apply(self, time_frequency_repr, window): basis = self._basis(time_frequency_repr.dimensions[-1].scale, window) transformed = np.dot(basis, time_frequency_repr.T).T return transformed def __eq__(self, other): return \ self.__class__ == other.__class__ \ and self.frequency_band == other.frequency_band \ and self.n_bands == other.n_bands def __iter__(self): return iter(self.bands) def _construct_scale_from_slice(self, bands): freq_band = FrequencyBand(bands[0].start_hz, bands[-1].stop_hz) return self.__class__(freq_band, len(bands))
[docs] def get_slice(self, frequency_band): """ Given a frequency band, and a frequency dimension comprised of n_samples, return a slice using integer indices that may be used to extract only the frequency samples that intersect with the frequency band """ index = frequency_band if isinstance(index, slice): types = { index.start.__class__, index.stop.__class__, index.step.__class__ } if Hertz not in types: return index try: start = Hertz(0) if index.start is None else index.start if start < Hertz(0): start = self.stop_hz + start stop = self.stop_hz if index.stop is None else index.stop if stop < Hertz(0): stop = self.stop_hz + stop frequency_band = FrequencyBand(start, stop) except (ValueError, TypeError): pass start_index = bisect.bisect_left( self.band_stops, frequency_band.start_hz) stop_index = bisect.bisect_left( self.band_starts, frequency_band.stop_hz) if self.always_even and (stop_index - start_index) % 2: # KLUDGE: This is simple, but it may make sense to choose move the # upper *or* lower bound, based on which one introduces a lower # error stop_index += 1 return slice(start_index, stop_index)
def __getitem__(self, index): try: # index is an integer or slice bands = self.bands[index] except TypeError: # index is a frequency band bands = self.bands[self.get_slice(index)] if isinstance(bands, FrequencyBand): return bands return self._construct_scale_from_slice(bands) def __str__(self): cls = self.__class__.__name__ return '{cls}(band={self.frequency_band}, n_bands={self.n_bands})' \ .format(**locals()) def __repr__(self): return self.__str__()
[docs]class LinearScale(FrequencyScale): """ A linear frequency scale with constant bandwidth. Appropriate for use with transforms whose coefficients also lie on a linear frequency scale, e.g. the FFT or DCT transforms. Args: frequency_band (FrequencyBand): A band representing the entire span of this scale. E.g., one might want to generate a scale spanning the entire range of human hearing by starting with :code:`FrequencyBand(20, 20000)` n_bands (int): The number of bands in this scale always_even (bool): when converting frequency slices to integer indices that numpy can understand, should the slice size always be even? Examples: >>> from zounds import FrequencyBand, LinearScale >>> scale = LinearScale(FrequencyBand(20, 20000), 10) >>> scale LinearScale(band=FrequencyBand( start_hz=20, stop_hz=20000, center=10010.0, bandwidth=19980), n_bands=10) >>> scale.Q array([ 0.51001001, 1.51001001, 2.51001001, 3.51001001, 4.51001001, 5.51001001, 6.51001001, 7.51001001, 8.51001001, 9.51001001]) """ def __init__(self, frequency_band, n_bands, always_even=False): super(LinearScale, self).__init__(frequency_band, n_bands, always_even)
[docs] @staticmethod def from_sample_rate(sample_rate, n_bands, always_even=False): """ Return a :class:`~zounds.spectral.LinearScale` instance whose upper frequency bound is informed by the nyquist frequency of the sample rate. Args: sample_rate (SamplingRate): the sample rate whose nyquist frequency will serve as the upper frequency bound of this scale n_bands (int): the number of evenly-spaced frequency bands """ fb = FrequencyBand(0, sample_rate.nyquist) return LinearScale(fb, n_bands, always_even=always_even)
def _compute_bands(self): freqs = np.linspace( self.start_hz, self.stop_hz, self.n_bands, endpoint=False) # constant, non-overlapping bandwidth bandwidth = freqs[1] - freqs[0] return tuple(FrequencyBand(f, f + bandwidth) for f in freqs)
# class LogScale(FrequencyScale): # def __init__(self, frequency_band, n_bands, always_even=False): # super(LogScale, self).__init__( # frequency_band, n_bands, always_even=always_even) # # def _compute_bands(self): # center_freqs = np.logspace( # np.log10(self.start_hz), # np.log10(self.stop_hz), # self.n_bands + 1) # # variable bandwidth # bandwidths = np.diff(center_freqs) # return tuple(FrequencyBand.from_center(cf, bw) # for (cf, bw) in zip(center_freqs[:-1], bandwidths))
[docs]class GeometricScale(FrequencyScale): """ A constant-Q scale whose center frequencies progress geometrically rather than linearly Args: start_center_hz (int): the center frequency of the first band in the scale stop_center_hz (int): the center frequency of the last band in the scale bandwidth_ratio (float): the center frequency to bandwidth ratio n_bands (int): the total number of bands Examples: >>> from zounds import GeometricScale >>> scale = GeometricScale(20, 20000, 0.05, 10) >>> scale GeometricScale(band=FrequencyBand( start_hz=19.5, stop_hz=20500.0, center=10259.75, bandwidth=20480.5), n_bands=10) >>> scale.Q array([ 20., 20., 20., 20., 20., 20., 20., 20., 20., 20.]) >>> list(scale.center_frequencies) [20.000000000000004, 43.088693800637671, 92.831776672255558, 200.00000000000003, 430.88693800637651, 928.31776672255558, 2000.0000000000005, 4308.8693800637648, 9283.1776672255564, 20000.000000000004] """ def __init__( self, start_center_hz, stop_center_hz, bandwidth_ratio, n_bands, always_even=False): self.__bands = [ FrequencyBand.from_center(cf, cf * bandwidth_ratio) for cf in np.geomspace(start_center_hz, stop_center_hz, num=n_bands) ] band = FrequencyBand(self.__bands[0].start_hz, self.__bands[-1].stop_hz) super(GeometricScale, self).__init__( band, n_bands, always_even=always_even) self.start_center_hz = start_center_hz self.stop_center_hz = stop_center_hz self.bandwidth_ratio = bandwidth_ratio def _construct_scale_from_slice(self, bands): return ExplicitScale(bands) def __eq__(self, other): return \ super(GeometricScale, self).__eq__(other) \ and self.start_center_hz == other.start_center_hz \ and self.stop_center_hz == other.stop_center_hz \ and self.bandwidth_ratio == other.bandwidth_ratio def _compute_bands(self): return self.__bands
[docs]class ExplicitScale(FrequencyScale): """ A scale where the frequency bands are provided explicitly, rather than computed Args: bands (list of FrequencyBand): The explicit bands used by this scale See Also: :class:`~zounds.spectral.FrequencyAdaptive` """ def __init__(self, bands): bands = list(bands) frequency_band = FrequencyBand(bands[0].start_hz, bands[-1].stop_hz) super(ExplicitScale, self).__init__( frequency_band, len(bands), always_even=False) self._bands = bands def _construct_scale_from_slice(self, bands): return ExplicitScale(bands) def _compute_bands(self): return self._bands def __eq__(self, other): return all([a == b for (a, b) in zip(self, other)])
class Bark(Hertz): def __init__(self, bark): self.bark = bark super(Bark, self).__init__(Bark.to_hz(bark)) @staticmethod def to_hz(bark): return 300. * ((np.e ** (bark / 6.0)) - (np.e ** (-bark / 6.))) @staticmethod def to_bark(hz): return 6. * np.log((hz / 600.) + np.sqrt((hz / 600.) ** 2 + 1)) def equivalent_rectangular_bandwidth(hz): return (0.108 * hz) + 24.7 class BarkScale(FrequencyScale): def __init__(self, frequency_band, n_bands): super(BarkScale, self).__init__(frequency_band, n_bands) def _compute_bands(self): start = Bark.to_bark(self.frequency_band.start_hz) stop = Bark.to_bark(self.frequency_band.stop_hz) barks = np.linspace(start, stop, self.n_bands) center_frequencies_hz = Bark.to_hz(barks) bandwidths = equivalent_rectangular_bandwidth(center_frequencies_hz) return [ FrequencyBand.from_center(c, b) for c, b in zip(center_frequencies_hz, bandwidths)] class Mel(Hertz): def __init__(self, mel): self.mel = mel super(Mel, self).__init__(Mel.to_hz(mel)) @staticmethod def to_hz(mel): return 700 * ((np.e ** (mel / 1127)) - 1) @staticmethod def to_mel(hz): return 1127 * np.log(1 + (hz / 700)) class MelScale(FrequencyScale): def __init__(self, frequency_band, n_bands): super(MelScale, self).__init__(frequency_band, n_bands) def _compute_bands(self): start = Mel.to_mel(self.frequency_band.start_hz) stop = Mel.to_mel(self.frequency_band.stop_hz) mels = np.linspace(start, stop, self.n_bands) center_frequencies_hz = Mel.to_hz(mels) bandwidths = equivalent_rectangular_bandwidth(center_frequencies_hz) return [ FrequencyBand.from_center(c, b) for c, b in zip(center_frequencies_hz, bandwidths)] class ChromaScale(FrequencyScale): def __init__(self, frequency_band): self._a440 = 440. self._a = 2 ** (1 / 12.) super(ChromaScale, self).__init__(frequency_band, n_bands=12) def _compute_bands(self): raise NotImplementedError() def get_slice(self, frequency_band): raise NotImplementedError() def _semitones_to_hz(self, semitone): return self._a440 * (self._a ** semitone) def _hz_to_semitones(self, hz): """ Convert hertz into a number of semitones above or below some reference value, in this case, A440 """ return np.log(hz / self._a440) / np.log(self._a) def _basis(self, other_scale, window): basis = np.zeros((self.n_bands, len(other_scale))) # for each tone in the twelve-tone scale, generate narrow frequency # bands for every octave of that note that falls within the frequency # band. start_semitones = \ int(np.round(self._hz_to_semitones(self.frequency_band.start_hz))) stop_semitones = \ int(np.round(self._hz_to_semitones(self.frequency_band.stop_hz))) semitones = np.arange(start_semitones - 1, stop_semitones) hz = self._semitones_to_hz(semitones) bands = [] for i in range(0, len(semitones) - 2): fh, mh, lh = hz[i: i + 3] bands.append(FrequencyBand(fh, lh)) for semitone, band in zip(semitones, bands): slce = other_scale.get_slice(band) chroma_index = semitone % self.n_bands slce = basis[chroma_index, slce] slce[:] += np.ones(len(slce)) * window return basis