Это мой подробный код, который я написал для классификатора Байеса. Я столкнулся с набором ошибок во время выполнения частей постановки задачи 7 и 8. Это постановка задачи, и я закомментировал каждый раздел своего кода на основе постановки задачи.
Вот мой код:
# Import necessary libraries
import IPython . display as display
import librosa
import librosa . display
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
!pip install seglearn # to install the seglearn package
from seglearn . transform import Segment
from scipy . fft import dct , idct
from scipy.stats import multivariate_normal
# Problem 1: Segment the input sequence {s[n]} into N vectors
# Load the audio file
sample_audio = 'LinkinPark.wav'
x, sr = librosa.load(sample_audio, sr=44100)
# Slice the audio into two 30-second segments
x_v = x[:sr*30]
x_g = x[sr*30:sr*60]
X = np.stack((x_v, x_g))
# Labels for the two classes
y = np.array([0, 1])
# Compute the number of samples per frame for 0.02 seconds
samples_per_frame = int(sr * 0.02)
# Segment the audio into fixed-width frames
segmenter = Segment(width=samples_per_frame, overlap=0).fit(X, y)
s_d = segmenter.transform(X, y)
print("Problem 1: Size of each segmented vector:", s_d[0].shape)
# Problem 2: Compute the DCT on each D-dimensional vector
X_d = dct(s_d[0], axis=1)
print("Problem 2: Size of generated feature vector:", X_d.shape)
# Problem 3: Visualize the generated dataset on 2D space
plt.scatter(X_d[:, 0], X_d[:, 1], c=y[s_d[1]], cmap='viridis', label='Classes')
plt.title('2D Visualization of Dataset')
plt.xlabel('DCT Coefficient 1')
plt.ylabel('DCT Coefficient 2')
plt.legend()
plt.show()
# Problem 4: Compute the mean vector and covariance matrix for each class
unique_classes = np.unique(s_d[1])
mean_vectors = []
cov_matrices = []
for cls in unique_classes:
class_samples = X_d[s_d[1] == cls]
mean_vectors.append(np.mean(class_samples, axis=0))
cov_matrices.append(np.cov(class_samples, rowvar=False))
print("Problem 4: Size of mean vector:", np.array(mean_vectors).shape)
print("Size of covariance matrix:", np.array(cov_matrices).shape)
# Problem 5: Compute likelihood, prior probabilities, and P(x)
def compute_likelihood(x, mean, cov):
d = mean.shape[0]
# Calculate the determinant and inverse of the covariance matrix using SVD
# This helps in handling ill-conditioned matrices that might cause overflow
U, S, V = np.linalg.svd(cov)
det_cov = np.prod(S)
inv_cov = V.T @ np.diag(1/S) @ U.T
# Compute the exponent term directly
exponent = -0.5 * (x - mean).T @ inv_cov @ (x - mean)
# Use log to avoid overflow
log_likelihood = exponent - 0.5 * (d * np.log(2 * np.pi) + np.log(det_cov))
# Return the log likelihood
return log_likelihood
priors = np.array([np.sum(s_d[1] == cls) / len(s_d[1]) for cls in unique_classes])
print("Problem 5: Prior probabilities:", priors)
# Problem 6: Create the test set using the audio file and show dimensions
x_test, sr = librosa.load(sample_audio, sr=44100)
x_test_segmented = x_test[sr*60:sr*90] # Segment for test (e.g., 60-90 seconds)
y_test_segment = np.array([1])
test_segmenter = Segment(width=samples_per_frame, overlap=0).fit(np.array([x_test_segmented]), y_test_segment)
test_s_d = test_segmenter.transform(np.array([x_test_segmented]), y_test_segment)
X_test_d = dct(test_s_d[0], axis=1)
y_test = test_s_d[1]
print("Problem 6: Dimensions of test feature vector:", X_test_d.shape)
# Problem 7: Compute posterior probabilities and classification accuracy
posterior_probs = []
class_predictions = []
for test_vector in X_test_d:
class_posteriors = []
max_log_posterior = -np.inf # Track maximum log-posterior for stability
for cls in unique_classes:
log_likelihood = compute_likelihood(test_vector, mean_vectors[cls], cov_matrices[cls])
log_posterior = np.log(priors[cls]) + log_likelihood
class_posteriors.append(log_posterior)
max_log_posterior = max(max_log_posterior, log_posterior)
# Normalize to avoid overflow using log-sum-exp trick
exp_posteriors = np.exp(np.array(class_posteriors) - max_log_posterior)
normalized_posteriors = exp_posteriors / np.sum(exp_posteriors)
posterior_probs.append(normalized_posteriors)
class_predictions.append(np.argmax(normalized_posteriors))
posterior_probs = np.array(posterior_probs)
class_predictions = np.array(class_predictions)
accuracy = np.mean(class_predictions == y_test)
print("Problem 7: Classification accuracy:", accuracy)
# Problem 8: Bayes classifier using original signal vectors
def bayes_classifier(input_vector, mean_vectors, cov_matrices, priors):
posteriors = []
max_log_posterior = -np.inf # Track maximum log-posterior for stability
for cls in range(len(mean_vectors)):
log_likelihood = compute_likelihood(input_vector, mean_vectors[cls], cov_matrices[cls])
log_posterior = np.log(priors[cls]) + log_likelihood
posteriors.append(log_posterior)
max_log_posterior = max(max_log_posterior, log_posterior)
# Normalize to avoid overflow using log-sum-exp trick
exp_posteriors = np.exp(np.array(posteriors) - max_log_posterior)
normalized_posteriors = exp_posteriors / np.sum(exp_posteriors)
return np.argmax(normalized_posteriors)
original_signal_prediction = bayes_classifier(X_test_d[0], mean_vectors, cov_matrices, priors)
print("Problem 8: Predicted class for the original signal vector:", original_signal_prediction)
Я получаю сообщение об ошибке для этой части (Задача 7. Вычисление апостериорных вероятностей и точности классификации).
:110: RuntimeWarning: в вычитании обнаружено недопустимое значение
exp_posteriors = np.exp(np.array(class_posteriors) - max_log_posterior)
Может ли кто-нибудь написать правильную часть кода для задачи 7 и задачи 8 ?
Это мой подробный код, который я написал для классификатора Байеса. Я столкнулся с набором ошибок во время выполнения частей постановки задачи 7 и 8. Это постановка задачи, и я закомментировал каждый раздел своего кода на основе постановки задачи. Вот мой код: [code]# Import necessary libraries import IPython . display as display import librosa import librosa . display import numpy as np import matplotlib.pyplot as plt from sklearn.decomposition import PCA
!pip install seglearn # to install the seglearn package from seglearn . transform import Segment from scipy . fft import dct , idct from scipy.stats import multivariate_normal
# Problem 1: Segment the input sequence {s[n]} into N vectors # Load the audio file sample_audio = 'LinkinPark.wav' x, sr = librosa.load(sample_audio, sr=44100)
# Slice the audio into two 30-second segments x_v = x[:sr*30] x_g = x[sr*30:sr*60] X = np.stack((x_v, x_g))
# Labels for the two classes y = np.array([0, 1])
# Compute the number of samples per frame for 0.02 seconds samples_per_frame = int(sr * 0.02)
# Segment the audio into fixed-width frames segmenter = Segment(width=samples_per_frame, overlap=0).fit(X, y) s_d = segmenter.transform(X, y)
print("Problem 1: Size of each segmented vector:", s_d[0].shape)
# Problem 2: Compute the DCT on each D-dimensional vector X_d = dct(s_d[0], axis=1)
print("Problem 2: Size of generated feature vector:", X_d.shape)
# Problem 3: Visualize the generated dataset on 2D space plt.scatter(X_d[:, 0], X_d[:, 1], c=y[s_d[1]], cmap='viridis', label='Classes') plt.title('2D Visualization of Dataset') plt.xlabel('DCT Coefficient 1') plt.ylabel('DCT Coefficient 2') plt.legend() plt.show()
# Problem 4: Compute the mean vector and covariance matrix for each class unique_classes = np.unique(s_d[1]) mean_vectors = [] cov_matrices = []
for cls in unique_classes: class_samples = X_d[s_d[1] == cls] mean_vectors.append(np.mean(class_samples, axis=0)) cov_matrices.append(np.cov(class_samples, rowvar=False))
print("Problem 4: Size of mean vector:", np.array(mean_vectors).shape) print("Size of covariance matrix:", np.array(cov_matrices).shape)
# Problem 5: Compute likelihood, prior probabilities, and P(x) def compute_likelihood(x, mean, cov): d = mean.shape[0]
# Calculate the determinant and inverse of the covariance matrix using SVD # This helps in handling ill-conditioned matrices that might cause overflow U, S, V = np.linalg.svd(cov) det_cov = np.prod(S) inv_cov = V.T @ np.diag(1/S) @ U.T
# Compute the exponent term directly exponent = -0.5 * (x - mean).T @ inv_cov @ (x - mean) # Use log to avoid overflow log_likelihood = exponent - 0.5 * (d * np.log(2 * np.pi) + np.log(det_cov))
# Return the log likelihood return log_likelihood
priors = np.array([np.sum(s_d[1] == cls) / len(s_d[1]) for cls in unique_classes])
print("Problem 5: Prior probabilities:", priors)
# Problem 6: Create the test set using the audio file and show dimensions x_test, sr = librosa.load(sample_audio, sr=44100) x_test_segmented = x_test[sr*60:sr*90] # Segment for test (e.g., 60-90 seconds) y_test_segment = np.array([1])
# Problem 8: Bayes classifier using original signal vectors def bayes_classifier(input_vector, mean_vectors, cov_matrices, priors): posteriors = [] max_log_posterior = -np.inf # Track maximum log-posterior for stability for cls in range(len(mean_vectors)): log_likelihood = compute_likelihood(input_vector, mean_vectors[cls], cov_matrices[cls]) log_posterior = np.log(priors[cls]) + log_likelihood posteriors.append(log_posterior) max_log_posterior = max(max_log_posterior, log_posterior)
# Normalize to avoid overflow using log-sum-exp trick exp_posteriors = np.exp(np.array(posteriors) - max_log_posterior) normalized_posteriors = exp_posteriors / np.sum(exp_posteriors) return np.argmax(normalized_posteriors)
original_signal_prediction = bayes_classifier(X_test_d[0], mean_vectors, cov_matrices, priors) print("Problem 8: Predicted class for the original signal vector:", original_signal_prediction) [/code] Я получаю сообщение об ошибке для этой части (Задача 7. Вычисление апостериорных вероятностей и точности классификации). :110: RuntimeWarning: в вычитании обнаружено недопустимое значение exp_posteriors = np.exp(np.array(class_posteriors) - max_log_posterior) Может ли кто-нибудь написать правильную часть кода для задачи 7 и задачи 8 ?