From d93fb443562fb1588a3056a21987213bff926a6a Mon Sep 17 00:00:00 2001 From: jo Date: Fri, 21 Jan 2022 09:09:42 +0100 Subject: [PATCH] feat(analyzer): analyze cuepoint using ffmpeg - store cue(in|out) as strings - reraise when executable was not found BREAKING CHANGE: The analyzer requires 'ffmpeg'. The 'silan' system dependency can be removed. --- analyzer/libretime_analyzer/ffmpeg.py | 56 +++++++- .../steps/analyze_cuepoint.py | 129 +++++++----------- analyzer/packages.ini | 5 +- analyzer/tests/ffmpeg_test.py | 84 +++++++++++- analyzer/tests/steps/analyze_cuepoint_test.py | 47 ++----- 5 files changed, 200 insertions(+), 121 deletions(-) diff --git a/analyzer/libretime_analyzer/ffmpeg.py b/analyzer/libretime_analyzer/ffmpeg.py index 943aecd73..31ca69640 100644 --- a/analyzer/libretime_analyzer/ffmpeg.py +++ b/analyzer/libretime_analyzer/ffmpeg.py @@ -1,6 +1,7 @@ import re +from math import inf from pathlib import Path -from typing import Optional +from typing import List, Optional, Tuple from .utils import run_ @@ -54,3 +55,56 @@ def compute_replaygain(filepath: Path) -> Optional[float]: if track_gain_match: return float(track_gain_match.group(1)) + + +_SILENCE_DETECT_RE = re.compile( + r"\[silencedetect.*\] silence_(start|end): (-?\d+(?:\.\d+)?)(?: \| silence_duration: (\d+(?:\.\d+)?))?" +) + + +def compute_silences(filepath: Path) -> List[Tuple[float, float]]: + """ + Compute silence will analyse the given audio file and return a list of silences. + """ + cmd = _ffmpeg( + *("-i", filepath), + "-vn", + *("-filter", "highpass=frequency=1000"), + *("-filter", "silencedetect=noise=0.15:duration=1"), + ) + + starts, ends = [], [] + for line in cmd.stderr.splitlines(): + match = _SILENCE_DETECT_RE.search(line) + if match is None: + continue + + kind = match.group(1) + if kind == "start": + start = float(match.group(2)) + start = max(start, 0.0) + starts.append(start) + elif kind == "end": + end = float(match.group(2)) + ends.append(end) + + # ffmpeg v3 (bionic) does not warn about silence end when the track ends. + # Set the last silence ending to infinity, and clamp it to the track duration before + # using this value. + if len(starts) - 1 == len(ends): + ends.append(inf) + + return list(zip(starts, ends)) + + +def probe_duration(filepath: Path) -> float: + """ + Probe duration will probe the given audio file and return the duration. + """ + cmd = _ffprobe( + *("-i", filepath), + *("-show_entries", "format=duration"), + *("-v", "quiet"), + *("-of", "csv=p=0"), + ) + return float(cmd.stdout.strip("\n")) diff --git a/analyzer/libretime_analyzer/steps/analyze_cuepoint.py b/analyzer/libretime_analyzer/steps/analyze_cuepoint.py index 0ee481cee..0e73a27f7 100644 --- a/analyzer/libretime_analyzer/steps/analyze_cuepoint.py +++ b/analyzer/libretime_analyzer/steps/analyze_cuepoint.py @@ -1,94 +1,69 @@ -import datetime -import json -import subprocess +from datetime import timedelta +from math import isclose +from subprocess import CalledProcessError from typing import Any, Dict from loguru import logger -SILAN_EXECUTABLE = "silan" +from ..ffmpeg import compute_silences, probe_duration -def analyze_cuepoint(filename: str, metadata: Dict[str, Any]): - """Extracts the cue-in and cue-out times along and sets the file duration based on that. - The cue points are there to skip the silence at the start and end of a track, and are determined - using "silan", which analyzes the loudness in a track. - :param filename: The full path to the file to analyzer - :param metadata: A metadata dictionary where the results will be put - :return: The metadata dictionary +def analyze_cuepoint(filepath: str, metadata: Dict[str, Any]) -> Dict[str, Any]: """ - """ The silan -F 0.99 parameter tweaks the highpass filter. The default is 0.98, but at that setting, - the unit test on the short m4a file fails. With the new setting, it gets the correct cue-in time and - all the unit tests pass. + Extracts the cuein and cueout times along and sets the file duration using ffmpeg. """ - command = [ - SILAN_EXECUTABLE, - "-b", - "-F", - "0.99", - "-f", - "JSON", - "-t", - "1.0", - filename, - ] + try: - results_json = subprocess.check_output( - command, stderr=subprocess.STDOUT, close_fds=True - ) - try: - results_json = results_json.decode() - except (UnicodeDecodeError, AttributeError): - pass - silan_results = json.loads(results_json) + duration = probe_duration(filepath) - # Defensive coding against Silan wildly miscalculating the cue in and out times: - silan_length_seconds = float(silan_results["file duration"]) - silan_cuein = format(silan_results["sound"][0][0], "f") - silan_cueout = format(silan_results["sound"][0][1], "f") + if "length_seconds" in metadata and not isclose( + metadata["length_seconds"], + duration, + abs_tol=0.1, + ): + logger.warning( + f"existing duration {metadata['length_seconds']} differs " + f"from the probed duration {duration}." + ) - # Sanity check the results against any existing metadata passed to us (presumably extracted by Mutagen): - if "length_seconds" in metadata: - # Silan has a rare bug where it can massively overestimate the length or cue out time sometimes. - if (silan_length_seconds - metadata["length_seconds"] > 3) or ( - float(silan_cueout) - metadata["length_seconds"] > 2 + metadata["length_seconds"] = duration + metadata["length"] = str(timedelta(seconds=duration)) + metadata["cuein"] = 0.0 + metadata["cueout"] = duration + + silences = compute_silences(filepath) + + if len(silences) > 2: + # Only keep first and last silence + silences = silences[:: len(silences) - 1] + + for silence in silences: + # Sanity check + if silence[0] >= silence[1]: + raise ValueError( + f"silence starts ({silence[0]}) after ending ({silence[1]})" + ) + + # Is this really the first silence ? + if isclose( + 0.0, + max(0.0, silence[0]), # Clamp negative value + abs_tol=0.1, ): - # Don't trust anything silan says then... - raise Exception( - "Silan cue out {0} or length {1} differs too much from the Mutagen length {2}. Ignoring Silan values.".format( - silan_cueout, - silan_length_seconds, - metadata["length_seconds"], - ) - ) - # Don't allow silan to trim more than the greater of 3 seconds or 5% off the start of a track - if float(silan_cuein) > max(silan_length_seconds * 0.05, 3): - raise Exception( - "Silan cue in time {0} too big, ignoring.".format(silan_cuein) - ) - else: - # Only use the Silan track length in the worst case, where Mutagen didn't give us one for some reason. - # (This is mostly to make the unit tests still pass.) - # Convert the length into a formatted time string. - metadata["length_seconds"] = silan_length_seconds # - track_length = datetime.timedelta(seconds=metadata["length_seconds"]) - metadata["length"] = str(track_length) + metadata["cuein"] = max(0.0, silence[1]) - """ XXX: I've commented out the track_length stuff below because Mutagen seems more accurate than silan - as of Mutagen version 1.31. We are always going to use Mutagen's length now because Silan's - length can be off by a few seconds reasonably often. - """ + # Is this really the last silence ? + elif isclose( + min(silence[1], duration), # Clamp infinity value + duration, + abs_tol=0.1, + ): + metadata["cueout"] = min(silence[0], duration) - metadata["cuein"] = silan_cuein - metadata["cueout"] = silan_cueout + metadata["cuein"] = format(metadata["cuein"], "f") + metadata["cueout"] = format(metadata["cueout"], "f") - except OSError as e: # silan was not found - logger.warning( - "Failed to run: %s - %s. %s" - % (command[0], e.strerror, "Do you have silan installed?") - ) - except subprocess.CalledProcessError as e: # silan returned an error code - logger.warning("%s %s %s", e.cmd, e.output, e.returncode) - except Exception as e: - logger.warning(e) + except (CalledProcessError, OSError): + pass return metadata diff --git a/analyzer/packages.ini b/analyzer/packages.ini index 409aa0641..3c00717bc 100644 --- a/analyzer/packages.ini +++ b/analyzer/packages.ini @@ -27,12 +27,9 @@ liquidsoap = buster, bullseye, bionic, focal python3-pika = buster, bullseye, bionic, focal [ffmpeg] -# Detect replaygain +# Detect duration, silences and replaygain ffmpeg = buster, bullseye, bionic, focal -[silan] -silan = buster, bullseye, bionic, focal - [=development] # Generate fixtures ffmpeg = buster, bullseye, bionic, focal diff --git a/analyzer/tests/ffmpeg_test.py b/analyzer/tests/ffmpeg_test.py index c17669c8d..47ab63236 100644 --- a/analyzer/tests/ffmpeg_test.py +++ b/analyzer/tests/ffmpeg_test.py @@ -1,7 +1,15 @@ +from math import inf + import distro import pytest -from libretime_analyzer.ffmpeg import compute_replaygain, probe_replaygain +from libretime_analyzer.ffmpeg import ( + _SILENCE_DETECT_RE, + compute_replaygain, + compute_silences, + probe_duration, + probe_replaygain, +) from .fixtures import FILES @@ -28,3 +36,77 @@ def test_compute_replaygain(filepath, replaygain): tolerance = 5 assert compute_replaygain(filepath) == pytest.approx(replaygain, abs=tolerance) + + +# Be sure to test a matrix of integer / float, positive / negative values +SILENCE_DETECT_RE_RAW = """ +[silencedetect @ 0x563121aee500] silence_start: -0.00154195 +[silencedetect @ 0x563121aee500] silence_end: 0.998458 | silence_duration: 1 +[silencedetect @ 0x563121aee500] silence_start: 2.99383 +[silencedetect @ 0x563121aee500] silence_end: 4.99229 | silence_duration: 1.99846 +[silencedetect @ 0x563121aee500] silence_start: 6.98766 +[silencedetect @ 0x563121aee500] silence_end: 8.98612 | silence_duration: 1.99846 +[silencedetect @ 0x563121aee500] silence_start: 12 +[silencedetect @ 0x563121aee500] silence_end: 13 | silence_duration: 1 +""" + +SILENCE_DETECT_RE_EXPECTED = [ + ("start", -0.00154195), + ("end", 0.998458), + ("start", 2.99383), + ("end", 4.99229), + ("start", 6.98766), + ("end", 8.98612), + ("start", 12.0), + ("end", 13.0), +] + + +@pytest.mark.parametrize( + "line,expected", + zip( + SILENCE_DETECT_RE_RAW.strip().splitlines(), + SILENCE_DETECT_RE_EXPECTED, + ), +) +def test_silence_detect_re(line, expected): + match = _SILENCE_DETECT_RE.search(line) + assert match is not None + assert match.group(1) == expected[0] + assert float(match.group(2)) == expected[1] + + +@pytest.mark.parametrize( + "filepath,length,cuein,cueout", + map( + lambda i: pytest.param(i.path, i.length, i.cuein, i.cueout, id=i.path.name), + FILES, + ), +) +def test_compute_silences(filepath, length, cuein, cueout): + result = compute_silences(filepath) + + if cuein != 0.0: + assert len(result) > 0 + first = result.pop(0) + assert first[0] == pytest.approx(0.0, abs=0.1) + assert first[1] == pytest.approx(cuein, abs=1) + + if cueout != length: + # ffmpeg v3 (bionic) does not warn about silence end when the track ends. + # Check for infinity on last silence ending + if distro.codename() == "bionic": + length = inf + + assert len(result) > 0 + last = result.pop() + assert last[0] == pytest.approx(cueout, abs=1) + assert last[1] == pytest.approx(length, abs=0.1) + + +@pytest.mark.parametrize( + "filepath,length", + map(lambda i: pytest.param(i.path, i.length, id=i.path.name), FILES), +) +def test_probe_duration(filepath, length): + assert probe_duration(filepath) == pytest.approx(length, abs=0.05) diff --git a/analyzer/tests/steps/analyze_cuepoint_test.py b/analyzer/tests/steps/analyze_cuepoint_test.py index c6e5240ea..46151425d 100644 --- a/analyzer/tests/steps/analyze_cuepoint_test.py +++ b/analyzer/tests/steps/analyze_cuepoint_test.py @@ -1,51 +1,22 @@ -from unittest.mock import patch - -import distro import pytest from libretime_analyzer.steps.analyze_cuepoint import analyze_cuepoint -from ..fixtures import FILE_INVALID_DRM, FILES +from ..fixtures import FILES @pytest.mark.parametrize( "filepath,length,cuein,cueout", - map(lambda i: (str(i.path), i.length, i.cuein, i.cueout), FILES), + map( + lambda i: pytest.param( + str(i.path), i.length, i.cuein, i.cueout, id=i.path.name + ), + FILES, + ), ) def test_analyze_cuepoint(filepath, length, cuein, cueout): metadata = analyze_cuepoint(filepath, dict()) assert metadata["length_seconds"] == pytest.approx(length, abs=0.1) - - # Silan does not work with m4a files yet - if filepath.endswith("m4a"): - return - - # Silan does not work with mp3 on buster, bullseye, focal - if filepath.endswith("mp3") and distro.codename() in ( - "buster", - "bullseye", - "focal", - ): - return - - assert float(metadata["cuein"]) == pytest.approx(cuein, abs=0.5) - assert float(metadata["cueout"]) == pytest.approx(cueout, abs=0.5) - - -def test_analyze_cuepoint_missing_silan(): - with patch( - "libretime_analyzer.steps.analyze_cuepoint.SILAN_EXECUTABLE", - "foobar", - ): - analyze_cuepoint(str(FILES[0].path), dict()) - - -def test_analyze_cuepoint_invalid_filepath(): - with pytest.raises(KeyError): - test_analyze_cuepoint("non-existent-file", None, None, None) - - -def test_analyze_cuepoint_invalid_wma(): - with pytest.raises(KeyError): - test_analyze_cuepoint(FILE_INVALID_DRM, None, None, None) + assert float(metadata["cuein"]) == pytest.approx(float(cuein), abs=1) + assert float(metadata["cueout"]) == pytest.approx(float(cueout), abs=1)