Ошибка кода байесовского классификатора. Необходимо исправить ошибку времени выполнения в проблемах 7 и 8 [закрыто]

Ошибка кода байесовского классификатора. Необходимо исправить ошибку времени выполнения в проблемах 7 и 8 [закрыто] ⇐ Python

1 сообщение • Страница 1 из 1

Anonymous

Ошибка кода байесовского классификатора. Необходимо исправить ошибку времени выполнения в проблемах 7 и 8 [закрыто]

Цитата

Сообщение Anonymous » 16 янв 2025, 00:10

Это мой подробный код, который я написал для классификатора Байеса. Я столкнулся с набором ошибок во время выполнения частей постановки задачи 7 и 8. Это постановка задачи, и я закомментировал каждый раздел своего кода на основе постановки задачи.
Вот мой код:

Код: Выделить всё

# Import necessary libraries
import IPython . display as display
import librosa
import librosa . display
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

!pip install seglearn # to install the seglearn package
from seglearn . transform import Segment
from scipy .  fft import dct , idct
from scipy.stats import multivariate_normal

# Problem 1: Segment the input sequence {s[n]} into N vectors
# Load the audio file
sample_audio = 'LinkinPark.wav'
x, sr = librosa.load(sample_audio, sr=44100)

# Slice the audio into two 30-second segments
x_v = x[:sr*30]
x_g = x[sr*30:sr*60]
X = np.stack((x_v, x_g))

# Labels for the two classes
y = np.array([0, 1])

# Compute the number of samples per frame for 0.02 seconds
samples_per_frame = int(sr * 0.02)

# Segment the audio into fixed-width frames
segmenter = Segment(width=samples_per_frame, overlap=0).fit(X, y)
s_d = segmenter.transform(X, y)

print("Problem 1: Size of each segmented vector:", s_d[0].shape)

# Problem 2: Compute the DCT on each D-dimensional vector
X_d = dct(s_d[0], axis=1)

print("Problem 2: Size of generated feature vector:", X_d.shape)

# Problem 3: Visualize the generated dataset on 2D space
plt.scatter(X_d[:, 0], X_d[:, 1], c=y[s_d[1]], cmap='viridis', label='Classes')
plt.title('2D Visualization of Dataset')
plt.xlabel('DCT Coefficient 1')
plt.ylabel('DCT Coefficient 2')
plt.legend()
plt.show()

# Problem 4: Compute the mean vector and covariance matrix for each class
unique_classes = np.unique(s_d[1])
mean_vectors = []
cov_matrices = []

for cls in unique_classes:
class_samples = X_d[s_d[1] == cls]
mean_vectors.append(np.mean(class_samples, axis=0))
cov_matrices.append(np.cov(class_samples, rowvar=False))

print("Problem 4: Size of mean vector:", np.array(mean_vectors).shape)
print("Size of covariance matrix:", np.array(cov_matrices).shape)

# Problem 5: Compute likelihood, prior probabilities, and P(x)
def compute_likelihood(x, mean, cov):
d = mean.shape[0]

# Calculate the determinant and inverse of the covariance matrix using SVD
# This helps in handling ill-conditioned matrices that might cause overflow
U, S, V = np.linalg.svd(cov)
det_cov = np.prod(S)
inv_cov = V.T @ np.diag(1/S) @ U.T

# Compute the exponent term directly
exponent = -0.5 * (x - mean).T @ inv_cov @ (x - mean)
# Use log to avoid overflow
log_likelihood = exponent - 0.5 * (d * np.log(2 * np.pi) + np.log(det_cov))

# Return the log likelihood
return log_likelihood

priors = np.array([np.sum(s_d[1] == cls) / len(s_d[1]) for cls in unique_classes])

print("Problem 5: Prior probabilities:", priors)

# Problem 6: Create the test set using the audio file and show dimensions
x_test, sr = librosa.load(sample_audio, sr=44100)
x_test_segmented = x_test[sr*60:sr*90]  # Segment for test (e.g., 60-90 seconds)
y_test_segment = np.array([1])

test_segmenter = Segment(width=samples_per_frame, overlap=0).fit(np.array([x_test_segmented]), y_test_segment)
test_s_d = test_segmenter.transform(np.array([x_test_segmented]), y_test_segment)
X_test_d = dct(test_s_d[0], axis=1)

y_test = test_s_d[1]
print("Problem 6: Dimensions of test feature vector:", X_test_d.shape)

# Problem 7: Compute posterior probabilities and classification accuracy
posterior_probs = []
class_predictions = []

for test_vector in X_test_d:
class_posteriors = []
max_log_posterior = -np.inf  # Track maximum log-posterior for stability
for cls in unique_classes:
log_likelihood = compute_likelihood(test_vector, mean_vectors[cls], cov_matrices[cls])
log_posterior = np.log(priors[cls]) + log_likelihood
class_posteriors.append(log_posterior)
max_log_posterior = max(max_log_posterior, log_posterior)

# Normalize to avoid overflow using log-sum-exp trick
exp_posteriors = np.exp(np.array(class_posteriors) - max_log_posterior)
normalized_posteriors = exp_posteriors / np.sum(exp_posteriors)
posterior_probs.append(normalized_posteriors)
class_predictions.append(np.argmax(normalized_posteriors))

posterior_probs = np.array(posterior_probs)
class_predictions = np.array(class_predictions)

accuracy = np.mean(class_predictions == y_test)
print("Problem 7: Classification accuracy:", accuracy)

# Problem 8:  Bayes classifier using original signal vectors
def bayes_classifier(input_vector, mean_vectors, cov_matrices, priors):
posteriors = []
max_log_posterior = -np.inf  # Track maximum log-posterior for stability
for cls in range(len(mean_vectors)):
log_likelihood = compute_likelihood(input_vector, mean_vectors[cls], cov_matrices[cls])
log_posterior = np.log(priors[cls]) + log_likelihood
posteriors.append(log_posterior)
max_log_posterior = max(max_log_posterior, log_posterior)

# Normalize to avoid overflow using log-sum-exp trick
exp_posteriors = np.exp(np.array(posteriors) - max_log_posterior)
normalized_posteriors = exp_posteriors / np.sum(exp_posteriors)
return np.argmax(normalized_posteriors)

original_signal_prediction = bayes_classifier(X_test_d[0], mean_vectors, cov_matrices, priors)
print("Problem 8: Predicted class for the original signal vector:", original_signal_prediction)

Я получаю сообщение об ошибке для этой части (Задача 7. Вычисление апостериорных вероятностей и точности классификации).
:110: RuntimeWarning: в вычитании обнаружено недопустимое значение
exp_posteriors = np.exp(np.array(class_posteriors) - max_log_posterior)
Может ли кто-нибудь написать правильную часть кода для задачи 7 и задачи 8 ?

Подробнее здесь: https://stackoverflow.com/questions/793 ... 7-and-prob

1736975420

Anonymous

Это мой подробный код, который я написал для классификатора Байеса. Я столкнулся с набором ошибок во время выполнения частей постановки задачи 7 и 8. Это постановка задачи, и я закомментировал каждый раздел своего кода на основе постановки задачи.
Вот мой код:
[code]# Import necessary libraries
import IPython . display as display
import librosa
import librosa . display
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

!pip install seglearn # to install the seglearn package
from seglearn . transform import Segment
from scipy .  fft import dct , idct
from scipy.stats import multivariate_normal

# Problem 1: Segment the input sequence {s[n]} into N vectors
# Load the audio file
sample_audio = 'LinkinPark.wav'
x, sr = librosa.load(sample_audio, sr=44100)

# Slice the audio into two 30-second segments
x_v = x[:sr*30]
x_g = x[sr*30:sr*60]
X = np.stack((x_v, x_g))

# Labels for the two classes
y = np.array([0, 1])

# Compute the number of samples per frame for 0.02 seconds
samples_per_frame = int(sr * 0.02)

# Segment the audio into fixed-width frames
segmenter = Segment(width=samples_per_frame, overlap=0).fit(X, y)
s_d = segmenter.transform(X, y)

print("Problem 1: Size of each segmented vector:", s_d[0].shape)

# Problem 2: Compute the DCT on each D-dimensional vector
X_d = dct(s_d[0], axis=1)

print("Problem 2: Size of generated feature vector:", X_d.shape)

# Problem 3: Visualize the generated dataset on 2D space
plt.scatter(X_d[:, 0], X_d[:, 1], c=y[s_d[1]], cmap='viridis', label='Classes')
plt.title('2D Visualization of Dataset')
plt.xlabel('DCT Coefficient 1')
plt.ylabel('DCT Coefficient 2')
plt.legend()
plt.show()

# Problem 4: Compute the mean vector and covariance matrix for each class
unique_classes = np.unique(s_d[1])
mean_vectors = []
cov_matrices = []

for cls in unique_classes:
class_samples = X_d[s_d[1] == cls]
mean_vectors.append(np.mean(class_samples, axis=0))
cov_matrices.append(np.cov(class_samples, rowvar=False))

print("Problem 4: Size of mean vector:", np.array(mean_vectors).shape)
print("Size of covariance matrix:", np.array(cov_matrices).shape)

# Problem 5: Compute likelihood, prior probabilities, and P(x)
def compute_likelihood(x, mean, cov):
d = mean.shape[0]

# Calculate the determinant and inverse of the covariance matrix using SVD
# This helps in handling ill-conditioned matrices that might cause overflow
U, S, V = np.linalg.svd(cov)
det_cov = np.prod(S)
inv_cov = V.T @ np.diag(1/S) @ U.T

# Compute the exponent term directly
exponent = -0.5 * (x - mean).T @ inv_cov @ (x - mean)
# Use log to avoid overflow
log_likelihood = exponent - 0.5 * (d * np.log(2 * np.pi) + np.log(det_cov))

# Return the log likelihood
return log_likelihood

priors = np.array([np.sum(s_d[1] == cls) / len(s_d[1]) for cls in unique_classes])

print("Problem 5: Prior probabilities:", priors)

# Problem 6: Create the test set using the audio file and show dimensions
x_test, sr = librosa.load(sample_audio, sr=44100)
x_test_segmented = x_test[sr*60:sr*90]  # Segment for test (e.g., 60-90 seconds)
y_test_segment = np.array([1])

test_segmenter = Segment(width=samples_per_frame, overlap=0).fit(np.array([x_test_segmented]), y_test_segment)
test_s_d = test_segmenter.transform(np.array([x_test_segmented]), y_test_segment)
X_test_d = dct(test_s_d[0], axis=1)

y_test = test_s_d[1]
print("Problem 6: Dimensions of test feature vector:", X_test_d.shape)

# Problem 7: Compute posterior probabilities and classification accuracy
posterior_probs = []
class_predictions = []

for test_vector in X_test_d:
class_posteriors = []
max_log_posterior = -np.inf  # Track maximum log-posterior for stability
for cls in unique_classes:
log_likelihood = compute_likelihood(test_vector, mean_vectors[cls], cov_matrices[cls])
log_posterior = np.log(priors[cls]) + log_likelihood
class_posteriors.append(log_posterior)
max_log_posterior = max(max_log_posterior, log_posterior)

# Normalize to avoid overflow using log-sum-exp trick
exp_posteriors = np.exp(np.array(class_posteriors) - max_log_posterior)
normalized_posteriors = exp_posteriors / np.sum(exp_posteriors)
posterior_probs.append(normalized_posteriors)
class_predictions.append(np.argmax(normalized_posteriors))

posterior_probs = np.array(posterior_probs)
class_predictions = np.array(class_predictions)

accuracy = np.mean(class_predictions == y_test)
print("Problem 7: Classification accuracy:", accuracy)

# Problem 8:  Bayes classifier using original signal vectors
def bayes_classifier(input_vector, mean_vectors, cov_matrices, priors):
posteriors = []
max_log_posterior = -np.inf  # Track maximum log-posterior for stability
for cls in range(len(mean_vectors)):
log_likelihood = compute_likelihood(input_vector, mean_vectors[cls], cov_matrices[cls])
log_posterior = np.log(priors[cls]) + log_likelihood
posteriors.append(log_posterior)
max_log_posterior = max(max_log_posterior, log_posterior)

# Normalize to avoid overflow using log-sum-exp trick
exp_posteriors = np.exp(np.array(posteriors) - max_log_posterior)
normalized_posteriors = exp_posteriors / np.sum(exp_posteriors)
return np.argmax(normalized_posteriors)

original_signal_prediction = bayes_classifier(X_test_d[0], mean_vectors, cov_matrices, priors)
print("Problem 8: Predicted class for the original signal vector:", original_signal_prediction)
[/code]
Я получаю сообщение об ошибке для этой части (Задача 7. Вычисление апостериорных вероятностей и точности классификации).
:110: RuntimeWarning: в вычитании обнаружено недопустимое значение
exp_posteriors = np.exp(np.array(class_posteriors) - max_log_posterior)
Может ли кто-нибудь написать правильную часть кода для задачи 7 и задачи 8 ? 

Подробнее здесь: [url]https://stackoverflow.com/questions/79359212/bayes-classifier-code-error-need-fix-for-a-runtime-error-in-problem-7-and-prob[/url]