我可以信任哪一种工具?
问题描述:
我似乎已经确定我可以信任的工具的问题...我可以信任哪一种工具?
我一直在测试的工具是Librosa和Kaldi在的audio file的40个滤波器能 地块的可视化数据集的创建。
使用卡尔迪中的这些配置来提取滤波器组能量。
fbank.conf
--htk-compat=false
--window-type=hamming
--sample-frequency=16000
--num-mel-bins=40
--use-log-fbank=true
提取的数据是使用librosa
情节作图。 Librosa
利用matplotlib
pcolormesh
,这意味着不应该有任何区别,除了librosa
提供了一个更容易使用的API。
print static.shape
print type(static)
print np.min(static)
print np.max(static)
fig = plt.figure()
librosa.display.specshow(static.T,sr=16000,x_axis='frames',y_axis='mel',hop_length=160,cmap=cm.jet)
#plt.axis('off')
plt.title("log mel power spectrum of " + name)
plt.colorbar(format='%+02.0f dB')
plt.tight_layout()
plt.savefig(plot+"/"+name+"_plot_static_conv.png")
plt.show()
输出:
(474, 40)
<type 'numpy.ndarray'>
-1.828067
22.70058
Got bus address: "unix:abstract=/tmp/dbus-aYbBS1JWyw,guid=17dd413abcda54272e1d93d159174cdf"
Connected to accessibility bus at: "unix:abstract=/tmp/dbus-aYbBS1JWyw,guid=17dd413abcda54272e1d93d159174cdf"
Registered DEC: true
Registered event listener change listener: true
类似的情节在Librosa创建为这样:
audio_path="../../../../Dropbox/SI1392.wav"
#audio_path = librosa.util.example_audio_file()
print "Example audio found"
y, sr = librosa.load(audio_path)
print "Example audio loaded"
specto = librosa.feature.melspectrogram(y, sr=sr, n_fft=400, hop_length=160, n_mels=40)
print "Example audio spectogram"
log_specto = librosa.core.logamplitude(specto)
print "min and max"
print np.min(log_specto)
print np.max(log_specto)
print "Example audio log specto"
plt.figure(figsize=(12,4))
librosa.display.specshow(log_specto,sr=sr,x_axis='frames', y_axis='mel', hop_length=160,cmap=cm.jet)
plt.title('mel power spectrogram')
plt.colorbar(format='%+02.0f dB')
plt.tight_layout()
print "See"
print specto.shape
print log_specto.shape
plt.show()
输出该:
libraries loaded!
Example audio found
Example audio loaded
Example audio spectogram
min and max
-84.6796661558
-4.67966615584
Example audio log specto
See
(40, 657)
(40, 657)
尽管有颜色,但两者都显示类似的图,但能量范围似乎有点不同。
Kaldi有-1.828067/22.70058
最小/ MAX和Librosa具有最小/最大-84.6796661558/-4.67966615584
问题是我想保存这些地块作为numpy的阵列,用于进一步处理。
这似乎创造一个不同的情节.. 使用Librosa数据,我创建的情节一样:
plt.figure()
min_max_scaled_log_specto = min_max_scaler.fit_transform(log_specto)
convert = plt.get_cmap(cm.jet)
numpy_static = convert(min_max_scaled_log_specto)
plt.imshow(np.flipud(log_specto), aspect='auto')
plt.colorbar()
print "Sooo?"
plt.show()
这是完美的......它类似于原始数据集..
但随着Kaldi我从这个代码这个情节:
convert = plt.get_cmap(cm.jet)
numpy_output_static = convert(np.flipud(static.T))
plt.imshow(numpy_output_static,aspect = 'auto')
plt.show()
raw_input("sadas")
我从以前的帖子,对于红色发生的历史的原因可能是由于范围和标准化前,将有助于发现 - 但这导致此:
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0,1))
convert = plt.get_cmap(cm.jet)
numpy_output_static = convert(min_max_scaler.fit_transform(np.flipud(static.T)))
plt.imshow(numpy_output_static,aspect = 'auto')
plt.show()
但是,这绝不可能与Kaldi阴谋的原始阴谋有关...那么,为什么它看起来像这样?为什么我能够用从Librosa提取的能量而不是从Kaldi提取的能量来绘制它?对于Librosa
最小工作示例:与kaldi
#
# Minimal example of Librosa plot example.
# Made for testing the plot, and test for accurat
# Conversion between the two parts.
#
import os
import sys
from os import listdir
from os.path import isfile, join
import numpy as np
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.colors import Normalize
import matplotlib
from PIL import Image
import librosa
import colormaps as cmaps
import librosa.display
import ast
from scipy.misc import toimage
from matplotlib import cm
from sklearn import preprocessing
print "libraries loaded!"
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0,1))
audio_path="../../../../Dropbox/SI1392.wav"
#audio_path = librosa.util.example_audio_file()
print "Example audio found"
y, sr = librosa.load(audio_path)
print "Example audio loaded"
specto = librosa.feature.melspectrogram(y, sr=sr, n_fft=400, hop_length=160, n_mels=40)
print "Example audio spectogram"
log_specto = librosa.core.logamplitude(specto)
print "min and max"
print np.min(log_specto)
print np.max(log_specto)
print "Example audio log specto"
plt.figure(figsize=(12,4))
librosa.display.specshow(log_specto,sr=sr,x_axis='frames', y_axis='mel', hop_length=160,cmap=cm.jet)
plt.title('mel power spectrogram')
plt.colorbar(format='%+02.0f dB')
plt.tight_layout()
print "See"
#plt.show()
print specto.shape
print log_specto.shape
plt.figure()
min_max_scaled_log_specto = min_max_scaler.fit_transform(log_specto)
convert = plt.get_cmap(cm.jet)
numpy_static = convert(min_max_scaled_log_specto)
plt.imshow(np.flipud(log_specto), aspect='auto')
plt.colorbar()
print "Sooo?"
plt.show()
最小工作示例 - (真实数据):
#
# Extracted version:
#
#
#
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from PIL import Image
import librosa
import librosa.display
from matplotlib import cm
from sklearn import preprocessing
import ast
import urllib
import os
import sys
from os import listdir
from os.path import isfile, join
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0,1))
def make_plot_store_data(name,interweaved,static,delta,delta_delta,isTrain,isTest,isDev):
print static.shape
print type(static)
print np.min(static)
print np.max(static)
fig = plt.figure()
librosa.display.specshow(static.T,sr=16000,x_axis='frames',y_axis='mel',hop_length=160,cmap=cm.jet)
#plt.axis('off')
plt.title("log mel power spectrum of " + name)
plt.colorbar(format='%+02.0f dB')
plt.tight_layout()
#plt.show()
#plt.close()
#raw_input("asd")
if isTrain == True:
plt.figure()
convert = plt.get_cmap(cm.jet)
numpy_output_static = convert(min_max_scaler.fit_transform(np.flipud(static.T)))
plt.imshow(numpy_output_static,aspect = 'auto')
plt.show()
raw_input("sadas")
link = "https://gist.githubusercontent.com/Miail/51311b34f5e5333bbddf9cb17c737ea4/raw/786b72477190023e93b9dd0cbbb43284ab59921b/feature.txt"
f = urllib.urlopen(link)
temp_list = []
for line in f:
entries = 0
data_splitted = line.split()
if len(data_splitted) == 2:
file_name = data_splitted[0]
else:
entries = 1+entries
if data_splitted[-1] == ']':
temp_list.extend([ast.literal_eval(i) for i in data_splitted[:-1]])
else:
temp_list.extend([ast.literal_eval(i) for i in data_splitted])
dimension = 120
entries = len(temp_list)/dimension
data = np.array(temp_list)
interweaved = data.reshape(entries,dimension)
static =interweaved[:,:-80]
delta =interweaved[:,40:-40]
delta_delta =interweaved[:,80:]
plot_interweaved = data.reshape(entries*3,dimension/3)
print static.shape
print delta.shape
print delta_delta.shape
make_plot_store_data(file_name,plot_interweaved,static,delta,delta_delta,True,False,False)
答
我似乎找到了答案从另一个post类似于此..
问题是我正常化..所以,而不是做
numpy_output_static = convert(min_max_scaler.fit_transform(np.flipud(static.T)))
我应该做的
norm_static = matplotlib.colors.Normalize(vmin=static.min(),vmax=static.max())
numpy_output_static = convert(norm_static(np.flipud(static.T)))