
Before Change

plt.subplot(3, 1, 3)
librosa.display.specshow(librosa.amplitude_to_db(S_foreground[:, idx], ref=np.max),
                         y_axis="log", x_axis="time", sr=sr)

After Change

// Plot a 5-second slice of the spectrum
idx = slice(*librosa.time_to_frames([10, 15], sr=sr))
fig, ax = plt.subplots()
img = librosa.display.specshow(librosa.amplitude_to_db(S_full[:, idx], ref=np.max),
                         y_axis="log", x_axis="time", sr=sr, ax=ax)
fig.colorbar(img, ax=ax)

// The wiggly lines above are due to the vocal component.
// Our goal is to separate them from the accompanying
// instrumentation.

// We"ll compare frames using cosine similarity, and aggregate similar frames
// by taking their (per-frequency) median value.
// To avoid being biased by local continuity, we constrain similar frames to be
// separated by at least 2 seconds.
// This suppresses sparse/non-repetetitive deviations from the average spectrum,
// and works well to discard vocal elements.

S_filter = librosa.decompose.nn_filter(S_full,
                                       width=int(librosa.time_to_frames(2, sr=sr)))

// The output of the filter shouldn"t be greater than the input
// if we assume signals are additive.  Taking the pointwise minimium
// with the input spectrum forces this.
S_filter = np.minimum(S_full, S_filter)

// The raw filter output can be used as a mask,
// but it sounds better if we use soft-masking.

// We can also use a margin to reduce bleed between the vocals and instrumentation masks.
// Note: the margins need not be equal for foreground and background separation
margin_i, margin_v = 2, 10
power = 2

mask_i = librosa.util.softmask(S_filter,
                               margin_i * (S_full - S_filter),

mask_v = librosa.util.softmask(S_full - S_filter,
                               margin_v * S_filter,

// Once we have the masks, simply multiply them with the input spectrum
// to separate the components

S_foreground = mask_v * S_full
S_background = mask_i * S_full

// Plot the same slice, but separated into its foreground and background

// sphinx_gallery_thumbnail_number = 2

fig, ax = plt.subplots(nrows=3, sharex=True, sharey=True)
img = librosa.display.specshow(librosa.amplitude_to_db(S_full[:, idx], ref=np.max),
                         y_axis="log", x_axis="time", sr=sr, ax=ax[0])
ax[0].set(title="Full spectrum")

librosa.display.specshow(librosa.amplitude_to_db(S_background[:, idx], ref=np.max),
                         y_axis="log", x_axis="time", sr=sr, ax=ax[1])

librosa.display.specshow(librosa.amplitude_to_db(S_foreground[:, idx], ref=np.max),
                         y_axis="log", x_axis="time", sr=sr, ax=ax[2])
fig.colorbar(img, ax=ax)
Italian Trulli

Frequency: 3

Non-data size: 13


Project Name: librosa/librosa
Commit Name: 24d6f5e42d775949e191122091826eefa7bf1246
Time: 2020-06-27
Author: bmcfee@users.noreply.github.com
File Name: docs/examples/plot_vocal_separation.py
Class Name:
Method Name:

Project Name: librosa/librosa
Commit Name: 24d6f5e42d775949e191122091826eefa7bf1246
Time: 2020-06-27
Author: bmcfee@users.noreply.github.com
File Name: docs/examples/plot_presets.py
Class Name:
Method Name:

Project Name: librosa/librosa
Commit Name: 24d6f5e42d775949e191122091826eefa7bf1246
Time: 2020-06-27
Author: bmcfee@users.noreply.github.com
File Name: docs/examples/plot_pcen_stream.py
Class Name:
Method Name:

Project Name: librosa/librosa
Commit Name: 24d6f5e42d775949e191122091826eefa7bf1246
Time: 2020-06-27
Author: bmcfee@users.noreply.github.com
File Name: docs/examples/plot_vocal_separation.py
Class Name:
Method Name: