Обработка сигналов выходит за рамки моей компетенции, поэтому я хотел бы получить обратную связь:
- Правильный ли этот подход или в моем анализе есть очевидные ошибки?
- Есть ли более быстрые/эффективные способы сделать это? (Без распараллеливания)
- Есть ли крайние случаи, которые этот подход пропускает?
Код: Выделить всё
# Analyzes the frequency content of audio files to verify whether
# the reported bitrate reflects the actual audio quality, or whether
# the file has been transcoded from a lower-quality source.
import os
import time
import subprocess
import numpy as np
LIBS_PATH = "../libs"
def generate_spectrogram(path: str):
base = os.path.splitext(os.path.basename(path))[0]
output_path = f"{base}_spectrogram.png"
subprocess.run(
[
f"{LIBS_PATH}/ffmpeg",
"-v", "error", # Suppress all non-error messages
"-i", path, # Input file
"-y", output_path, # Output file, overwrite if exists
"-lavfi", "showspectrumpic=s=400x250:legend=1:scale=log:color=intensity", # Settings for the spectrogram
],
check = True,
)
def read_audio_chunk(path: str, start_at_sec: float):
DURATION_SEC = 2
# Use `ffmpeg` to decode the chunk of the audio file to raw PCM samples.
result = subprocess.run(
[
f"{LIBS_PATH}/ffmpeg",
"-v", "error", # Suppress all non-error messages
"-ss", str(start_at_sec), # Seek to this position in the track before decoding
"-i", path, # Input file
"-t", str(DURATION_SEC), # Decode only this many seconds
"-ac", "1", # Downmix to mono to simplify the analysis
"-f", "f32le", # Set output as raw little-endian float32 samples with no container
"-" # Write to stdout
],
capture_output = True,
)
return np.frombuffer(result.stdout, dtype = np.float32)
# Finds the frequency cutoff of an audio file.
#
# Scan the PCM samples in 4 different chunks spaced through the track. For each chunk,
# apply Real Fast Fourier Transform (RFFT) to separate the frequencies into bins, and analyze
# them to find the highest frequency bin across the track that is not "silence".
def find_frequency_cutoff(path: str):
# Use `ffprobe` to get the metadata we need to interpret the PCM samples
ffprobe_out = subprocess.run(
[
f"{LIBS_PATH}/ffprobe",
"-v", "error", # Suppress all non-error messages
"-select_streams", "a:0", # Target only the first audio stream, ignoring video streams
"-show_entries", "stream=sample_rate,bit_rate,codec_name,duration", # Fields to extract, one per output line
"-of", "default=noprint_wrappers=1:nokey=1", # Output values only, no keys or section headers
path # Input file
],
capture_output = True,
text = True,
)
lines = ffprobe_out.stdout.strip().splitlines()
codec = lines[0]
sample_rate = int(lines[1])
duration_sec = float(lines[2])
bitrate_bps = int(lines[3])
# =====
max_cutoff_hz = -1.0
for pct in [0.25, 0.4, 0.6, 0.75]:
start_at_sec = duration_sec * pct
chunk = read_audio_chunk(path, start_at_sec)
# Apply a Hanning window to reduce spectral leakage, then compute
# the frequency spectrum using RFFT.
chunk = chunk * np.hanning(len(chunk))
spectrum = np.abs(np.fft.rfft(chunk))
peak = spectrum.max()
if peak == 0:
continue # This chunk is silent
# Normalize to `[0, 1]` so all files share the same reference level.
spectrum = spectrum / peak
# Convert to dBFS (decibels relative to full scale)
# 0 dBFS is the loudest bin; everything else is negative.
spectrum = 20 * np.log10(spectrum)
# =====
bin_resolution_hz = sample_rate / len(chunk) # Hz per bin
BAND_SIZE = 50
# Scan from the highest frequency bin downward, looking for the
# first `band` where most bins are above the noise floor.
for i in range(len(spectrum), BAND_SIZE, -1):
band = spectrum[i - BAND_SIZE : i]
# Sounds below this dBFS level are considered "silence".
NOISE_FLOOR_DBFS = -100
# If most bins are above the noise floor, it's probably real content
# rather than just noise. If so, the upper edge of this band is the cutoff.
if (band > NOISE_FLOOR_DBFS).mean() >= 0.5:
cutoff_hz = (i - 1) * bin_resolution_hz
max_cutoff_hz = max(max_cutoff_hz, cutoff_hz)
break
# If we never found a band above the noise floor, the file is likely silent/corrupt.
if max_cutoff_hz < 0:
max_cutoff_hz = None
return path, codec, bitrate_bps, max_cutoff_hz
# =====
FILES = [
os.path.join("../downloads", f)
for f in os.listdir("../downloads")
if f.endswith(".mp3")
]
COL_SIZE = [20, 6, 10, 10]
header = f"{'File':{COL_SIZE[3]}}"
print(header)
print("-" * len(header))
started_at = time.time()
for p in FILES:
path, codec, bitrate, cutoff_hz = find_frequency_cutoff(p)
if cutoff_hz is None:
cutoff_str = "N/A"
else:
cutoff_str = f"{cutoff_hz:.0f} Hz"
bitrate = f"{bitrate / 1000:.0f} kBPS"
name = os.path.basename(p)
if len(name) > COL_SIZE[0]:
name = name[:COL_SIZE[0] - 4] + "..."
print(f"{name:{COL_SIZE[3]}}")
elapsed_sec = time.time() - started_at
print(f"\nAnalyzed {len(FILES)} files in {elapsed_sec:.1f} seconds")
Мобильная версия