masks (np.array): A Numpy array containing the lists of Binary Mask objects for each channel.
def __init__(self, input_audio_signal, num_sources, num_templates=50, distance_measure="euclidean",
num_iterations=50, random_seed=None, kmeans_kwargs=None, convert_to_mono=False,
mask_type=mask_separation_base.MaskSeparationBase.BINARY_MASK, mfcc_range=(1, 14), n_mfcc=20):
super(NMF_MFCC, self).__init__(input_audio_signal=input_audio_signal, mask_type=mask_type)
self.num_sources = num_sources
self.num_templates = num_templates
self.distance_measure = distance_measure
self.num_iterations = num_iterations
self.random_seed = random_seed
self.kmeans_kwargs = kmeans_kwargs
self.convert_to_mono = convert_to_mono
self.n_mfcc = n_mfcc
self.mask_type = mask_type
self.signal_stft = None
self.input_audio_signal = input_audio_signal
self.labeled_templates = None
self.sources = []
self.masks = []
// Convert the stereo signal to mono if indicated
if self.convert_to_mono:
self.input_audio_signal.to_mono(overwrite=True, remove_channels=False)
// Set the MFCC range
if isinstance(mfcc_range, int) and mfcc_range < n_mfcc:
self.mfcc_start, self.mfcc_end = 1, mfcc_range
elif isinstance(mfcc_range, (tuple, list)) and len(mfcc_range) == 2:
self.mfcc_start, self.mfcc_end = mfcc_range[0], mfcc_range[1]
else:
raise ValueError("mfcc_range is not set correctly! Must be a tuple or list with min and max, or int (max)")
// If kmeans_kwargs does not include the "random_state", use the random_seed instead. Else, use the value
// provided in the dictionary. If kmeans_kwargs is None, use the random_seed.
self.kmeans_random_seed = kmeans_kwargs.pop("random_state", random_seed) \
if isinstance(kmeans_kwargs, dict) else random_seed
// Initialize the K Means clusterer
if isinstance(self.kmeans_kwargs, dict):
self.clusterer = sklearn.cluster.KMeans(n_clusters=self.num_sources, random_state=self.kmeans_random_seed,
**kmeans_kwargs)
else:
self.clusterer = sklearn.cluster.KMeans(n_clusters=self.num_sources, random_state=self.kmeans_random_seed)
def run(self):
This function calls TransformerNMF on the magnitude spectrogram of each channel in the input audio signal.
The templates and activation matrices returned are clustered using K-Means clustering. These clusters are used