feat(analyzer): analyze cuepoint using ffmpeg

- store cue(in|out) as strings - reraise when executable was not found BREAKING CHANGE: The analyzer requires 'ffmpeg'. The 'silan' system dependency can be removed.
2022-01-21 09:09:42 +01:00 · 2022-01-21 09:09:42 +01:00 · d93fb44356
parent ceab19271d
commit d93fb44356
5 changed files with 200 additions and 121 deletions
--- a/analyzer/libretime_analyzer/ffmpeg.py
+++ b/analyzer/libretime_analyzer/ffmpeg.py
@ -1,6 +1,7 @@
 import re
+from math import inf
 from pathlib import Path
-from typing import Optional
+from typing import List, Optional, Tuple

 from .utils import run_

@ -54,3 +55,56 @@ def compute_replaygain(filepath: Path) -> Optional[float]:

    if track_gain_match:
        return float(track_gain_match.group(1))
+
+
+_SILENCE_DETECT_RE = re.compile(
+    r"\[silencedetect.*\] silence_(start|end): (-?\d+(?:\.\d+)?)(?: \| silence_duration: (\d+(?:\.\d+)?))?"
+)
+
+
+def compute_silences(filepath: Path) -> List[Tuple[float, float]]:
+    """
+    Compute silence will analyse the given audio file and return a list of silences.
+    """
+    cmd = _ffmpeg(
+        *("-i", filepath),
+        "-vn",
+        *("-filter", "highpass=frequency=1000"),
+        *("-filter", "silencedetect=noise=0.15:duration=1"),
+    )
+
+    starts, ends = [], []
+    for line in cmd.stderr.splitlines():
+        match = _SILENCE_DETECT_RE.search(line)
+        if match is None:
+            continue
+
+        kind = match.group(1)
+        if kind == "start":
+            start = float(match.group(2))
+            start = max(start, 0.0)
+            starts.append(start)
+        elif kind == "end":
+            end = float(match.group(2))
+            ends.append(end)
+
+    # ffmpeg v3 (bionic) does not warn about silence end when the track ends.
+    # Set the last silence ending to infinity, and clamp it to the track duration before
+    # using this value.
+    if len(starts) - 1 == len(ends):
+        ends.append(inf)
+
+    return list(zip(starts, ends))
+
+
+def probe_duration(filepath: Path) -> float:
+    """
+    Probe duration will probe the given audio file and return the duration.
+    """
+    cmd = _ffprobe(
+        *("-i", filepath),
+        *("-show_entries", "format=duration"),
+        *("-v", "quiet"),
+        *("-of", "csv=p=0"),
+    )
+    return float(cmd.stdout.strip("\n"))
--- a/analyzer/libretime_analyzer/steps/analyze_cuepoint.py
+++ b/analyzer/libretime_analyzer/steps/analyze_cuepoint.py
@ -1,94 +1,69 @@
-import datetime
-import json
-import subprocess
+from datetime import timedelta
+from math import isclose
+from subprocess import CalledProcessError
 from typing import Any, Dict

 from loguru import logger

-SILAN_EXECUTABLE = "silan"
+from ..ffmpeg import compute_silences, probe_duration


-def analyze_cuepoint(filename: str, metadata: Dict[str, Any]):
-    """Extracts the cue-in and cue-out times along and sets the file duration based on that.
-        The cue points are there to skip the silence at the start and end of a track, and are determined
-        using "silan", which analyzes the loudness in a track.
-    :param filename: The full path to the file to analyzer
-    :param metadata: A metadata dictionary where the results will be put
-    :return: The metadata dictionary
+def analyze_cuepoint(filepath: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
    """
-    """ The silan -F 0.99 parameter tweaks the highpass filter. The default is 0.98, but at that setting,
-        the unit test on the short m4a file fails. With the new setting, it gets the correct cue-in time and
-        all the unit tests pass.
+    Extracts the cuein and cueout times along and sets the file duration using ffmpeg.
    """
-    command = [
-        SILAN_EXECUTABLE,
-        "-b",
-        "-F",
-        "0.99",
-        "-f",
-        "JSON",
-        "-t",
-        "1.0",
-        filename,
-    ]
+
    try:
-        results_json = subprocess.check_output(
-            command, stderr=subprocess.STDOUT, close_fds=True
-        )
-        try:
-            results_json = results_json.decode()
-        except (UnicodeDecodeError, AttributeError):
-            pass
-        silan_results = json.loads(results_json)
+        duration = probe_duration(filepath)

-        # Defensive coding against Silan wildly miscalculating the cue in and out times:
-        silan_length_seconds = float(silan_results["file duration"])
-        silan_cuein = format(silan_results["sound"][0][0], "f")
-        silan_cueout = format(silan_results["sound"][0][1], "f")
+        if "length_seconds" in metadata and not isclose(
+            metadata["length_seconds"],
+            duration,
+            abs_tol=0.1,
+        ):
+            logger.warning(
+                f"existing duration {metadata['length_seconds']} differs "
+                f"from the probed duration {duration}."
+            )

-        # Sanity check the results against any existing metadata passed to us (presumably extracted by Mutagen):
-        if "length_seconds" in metadata:
-            # Silan has a rare bug where it can massively overestimate the length or cue out time sometimes.
-            if (silan_length_seconds - metadata["length_seconds"] > 3) or (
-                float(silan_cueout) - metadata["length_seconds"] > 2
+        metadata["length_seconds"] = duration
+        metadata["length"] = str(timedelta(seconds=duration))
+        metadata["cuein"] = 0.0
+        metadata["cueout"] = duration
+
+        silences = compute_silences(filepath)
+
+        if len(silences) > 2:
+            # Only keep first and last silence
+            silences = silences[:: len(silences) - 1]
+
+        for silence in silences:
+            # Sanity check
+            if silence[0] >= silence[1]:
+                raise ValueError(
+                    f"silence starts ({silence[0]}) after ending ({silence[1]})"
+                )
+
+            # Is this really the first silence ?
+            if isclose(
+                0.0,
+                max(0.0, silence[0]),  # Clamp negative value
+                abs_tol=0.1,
            ):
-                # Don't trust anything silan says then...
-                raise Exception(
-                    "Silan cue out {0} or length {1} differs too much from the Mutagen length {2}. Ignoring Silan values.".format(
-                        silan_cueout,
-                        silan_length_seconds,
-                        metadata["length_seconds"],
-                    )
-                )
-            # Don't allow silan to trim more than the greater of 3 seconds or 5% off the start of a track
-            if float(silan_cuein) > max(silan_length_seconds * 0.05, 3):
-                raise Exception(
-                    "Silan cue in time {0} too big, ignoring.".format(silan_cuein)
-                )
-        else:
-            # Only use the Silan track length in the worst case, where Mutagen didn't give us one for some reason.
-            # (This is mostly to make the unit tests still pass.)
-            # Convert the length into a formatted time string.
-            metadata["length_seconds"] = silan_length_seconds  #
-            track_length = datetime.timedelta(seconds=metadata["length_seconds"])
-            metadata["length"] = str(track_length)
+                metadata["cuein"] = max(0.0, silence[1])

-        """ XXX: I've commented out the track_length stuff below because Mutagen seems more accurate than silan
-                    as of Mutagen version 1.31. We are always going to use Mutagen's length now because Silan's
-                    length can be off by a few seconds reasonably often.
-        """
+            # Is this really the last silence ?
+            elif isclose(
+                min(silence[1], duration),  # Clamp infinity value
+                duration,
+                abs_tol=0.1,
+            ):
+                metadata["cueout"] = min(silence[0], duration)

-        metadata["cuein"] = silan_cuein
-        metadata["cueout"] = silan_cueout
+        metadata["cuein"] = format(metadata["cuein"], "f")
+        metadata["cueout"] = format(metadata["cueout"], "f")

-    except OSError as e:  # silan was not found
-        logger.warning(
-            "Failed to run: %s - %s. %s"
-            % (command[0], e.strerror, "Do you have silan installed?")
-        )
-    except subprocess.CalledProcessError as e:  # silan returned an error code
-        logger.warning("%s %s %s", e.cmd, e.output, e.returncode)
-    except Exception as e:
-        logger.warning(e)
+    except (CalledProcessError, OSError):
+        pass

    return metadata
--- a/analyzer/packages.ini
+++ b/analyzer/packages.ini
@ -27,12 +27,9 @@ liquidsoap = buster, bullseye, bionic, focal
 python3-pika = buster, bullseye, bionic, focal

 [ffmpeg]
-# Detect replaygain
+# Detect duration, silences and replaygain
 ffmpeg = buster, bullseye, bionic, focal

-[silan]
-silan = buster, bullseye, bionic, focal
-
 [=development]
 # Generate fixtures
 ffmpeg = buster, bullseye, bionic, focal
--- a/analyzer/tests/ffmpeg_test.py
+++ b/analyzer/tests/ffmpeg_test.py
@ -1,7 +1,15 @@
+from math import inf
+
 import distro
 import pytest

-from libretime_analyzer.ffmpeg import compute_replaygain, probe_replaygain
+from libretime_analyzer.ffmpeg import (
+    _SILENCE_DETECT_RE,
+    compute_replaygain,
+    compute_silences,
+    probe_duration,
+    probe_replaygain,
+)

 from .fixtures import FILES

@ -28,3 +36,77 @@ def test_compute_replaygain(filepath, replaygain):
        tolerance = 5

    assert compute_replaygain(filepath) == pytest.approx(replaygain, abs=tolerance)
+
+
+# Be sure to test a matrix of integer / float, positive / negative values
+SILENCE_DETECT_RE_RAW = """
+[silencedetect @ 0x563121aee500] silence_start: -0.00154195
+[silencedetect @ 0x563121aee500] silence_end: 0.998458 | silence_duration: 1
+[silencedetect @ 0x563121aee500] silence_start: 2.99383
+[silencedetect @ 0x563121aee500] silence_end: 4.99229 | silence_duration: 1.99846
+[silencedetect @ 0x563121aee500] silence_start: 6.98766
+[silencedetect @ 0x563121aee500] silence_end: 8.98612 | silence_duration: 1.99846
+[silencedetect @ 0x563121aee500] silence_start: 12
+[silencedetect @ 0x563121aee500] silence_end: 13 | silence_duration: 1
+"""
+
+SILENCE_DETECT_RE_EXPECTED = [
+    ("start", -0.00154195),
+    ("end", 0.998458),
+    ("start", 2.99383),
+    ("end", 4.99229),
+    ("start", 6.98766),
+    ("end", 8.98612),
+    ("start", 12.0),
+    ("end", 13.0),
+]
+
+
+@pytest.mark.parametrize(
+    "line,expected",
+    zip(
+        SILENCE_DETECT_RE_RAW.strip().splitlines(),
+        SILENCE_DETECT_RE_EXPECTED,
+    ),
+)
+def test_silence_detect_re(line, expected):
+    match = _SILENCE_DETECT_RE.search(line)
+    assert match is not None
+    assert match.group(1) == expected[0]
+    assert float(match.group(2)) == expected[1]
+
+
+@pytest.mark.parametrize(
+    "filepath,length,cuein,cueout",
+    map(
+        lambda i: pytest.param(i.path, i.length, i.cuein, i.cueout, id=i.path.name),
+        FILES,
+    ),
+)
+def test_compute_silences(filepath, length, cuein, cueout):
+    result = compute_silences(filepath)
+
+    if cuein != 0.0:
+        assert len(result) > 0
+        first = result.pop(0)
+        assert first[0] == pytest.approx(0.0, abs=0.1)
+        assert first[1] == pytest.approx(cuein, abs=1)
+
+    if cueout != length:
+        # ffmpeg v3 (bionic) does not warn about silence end when the track ends.
+        # Check for infinity on last silence ending
+        if distro.codename() == "bionic":
+            length = inf
+
+        assert len(result) > 0
+        last = result.pop()
+        assert last[0] == pytest.approx(cueout, abs=1)
+        assert last[1] == pytest.approx(length, abs=0.1)
+
+
+@pytest.mark.parametrize(
+    "filepath,length",
+    map(lambda i: pytest.param(i.path, i.length, id=i.path.name), FILES),
+)
+def test_probe_duration(filepath, length):
+    assert probe_duration(filepath) == pytest.approx(length, abs=0.05)
--- a/analyzer/tests/steps/analyze_cuepoint_test.py
+++ b/analyzer/tests/steps/analyze_cuepoint_test.py
@ -1,51 +1,22 @@
-from unittest.mock import patch
-
-import distro
 import pytest

 from libretime_analyzer.steps.analyze_cuepoint import analyze_cuepoint

-from ..fixtures import FILE_INVALID_DRM, FILES
+from ..fixtures import FILES


@pytest.mark.parametrize(
    "filepath,length,cuein,cueout",
-    map(lambda i: (str(i.path), i.length, i.cuein, i.cueout), FILES),
+    map(
+        lambda i: pytest.param(
+            str(i.path), i.length, i.cuein, i.cueout, id=i.path.name
+        ),
+        FILES,
+    ),
 )
 def test_analyze_cuepoint(filepath, length, cuein, cueout):
    metadata = analyze_cuepoint(filepath, dict())

    assert metadata["length_seconds"] == pytest.approx(length, abs=0.1)
-
-    # Silan does not work with m4a files yet
-    if filepath.endswith("m4a"):
-        return
-
-    # Silan does not work with mp3 on buster, bullseye, focal
-    if filepath.endswith("mp3") and distro.codename() in (
-        "buster",
-        "bullseye",
-        "focal",
-    ):
-        return
-
-    assert float(metadata["cuein"]) == pytest.approx(cuein, abs=0.5)
-    assert float(metadata["cueout"]) == pytest.approx(cueout, abs=0.5)
-
-
-def test_analyze_cuepoint_missing_silan():
-    with patch(
-        "libretime_analyzer.steps.analyze_cuepoint.SILAN_EXECUTABLE",
-        "foobar",
-    ):
-        analyze_cuepoint(str(FILES[0].path), dict())
-
-
-def test_analyze_cuepoint_invalid_filepath():
-    with pytest.raises(KeyError):
-        test_analyze_cuepoint("non-existent-file", None, None, None)
-
-
-def test_analyze_cuepoint_invalid_wma():
-    with pytest.raises(KeyError):
-        test_analyze_cuepoint(FILE_INVALID_DRM, None, None, None)
+    assert float(metadata["cuein"]) == pytest.approx(float(cuein), abs=1)
+    assert float(metadata["cueout"]) == pytest.approx(float(cueout), abs=1)