refactor(analyzer): redefine *_analyzer into steps

- update imports and names
- define step as a protocol
- extract analyzer function from classes
This commit is contained in:
jo 2022-01-17 20:31:43 +01:00 committed by Kyle Robbertze
parent f6a52c8324
commit 2cae31a97a
14 changed files with 452 additions and 480 deletions

View File

@ -7,7 +7,7 @@ import time
import pika import pika
from loguru import logger from loguru import logger
from .analyzer_pipeline import AnalyzerPipeline from .pipeline import Pipeline
from .status_reporter import StatusReporter from .status_reporter import StatusReporter
EXCHANGE = "airtime-uploads" EXCHANGE = "airtime-uploads"
@ -265,7 +265,7 @@ class MessageListener:
q = queue.Queue() q = queue.Queue()
try: try:
AnalyzerPipeline.run_analysis( Pipeline.run_analysis(
q, q,
audio_file_path, audio_file_path,
import_directory, import_directory,
@ -276,7 +276,7 @@ class MessageListener:
metadata = q.get() metadata = q.get()
except Exception as e: except Exception as e:
logger.error("Analyzer pipeline exception: %s" % str(e)) logger.error("Analyzer pipeline exception: %s" % str(e))
metadata["import_status"] = AnalyzerPipeline.IMPORT_STATUS_FAILED metadata["import_status"] = Pipeline.IMPORT_STATUS_FAILED
# Ensure our queue doesn't fill up and block due to unexpected behaviour. Defensive code. # Ensure our queue doesn't fill up and block due to unexpected behaviour. Defensive code.
while not q.empty(): while not q.empty():

View File

@ -4,14 +4,14 @@ from queue import Queue
from loguru import logger from loguru import logger
from .cuepoint_analyzer import CuePointAnalyzer from .steps.analyze_cuepoint import analyze_cuepoint
from .filemover_analyzer import FileMoverAnalyzer from .steps.analyze_metadata import analyze_metadata
from .metadata_analyzer import MetadataAnalyzer from .steps.analyze_playability import UnplayableFileError, analyze_playability
from .playability_analyzer import PlayabilityAnalyzer, UnplayableFileError from .steps.analyze_replaygain import analyze_replaygain
from .replaygain_analyzer import ReplayGainAnalyzer from .steps.organise_file import organise_file
class AnalyzerPipeline: class Pipeline:
"""Analyzes and imports an audio file into the Airtime library. """Analyzes and imports an audio file into the Airtime library.
This currently performs metadata extraction (eg. gets the ID3 tags from an MP3), This currently performs metadata extraction (eg. gets the ID3 tags from an MP3),
@ -80,12 +80,12 @@ class AnalyzerPipeline:
metadata = dict() metadata = dict()
metadata["file_prefix"] = file_prefix metadata["file_prefix"] = file_prefix
metadata = MetadataAnalyzer.analyze(audio_file_path, metadata) metadata = analyze_metadata(audio_file_path, metadata)
metadata = CuePointAnalyzer.analyze(audio_file_path, metadata) metadata = analyze_cuepoint(audio_file_path, metadata)
metadata = ReplayGainAnalyzer.analyze(audio_file_path, metadata) metadata = analyze_replaygain(audio_file_path, metadata)
metadata = PlayabilityAnalyzer.analyze(audio_file_path, metadata) metadata = analyze_playability(audio_file_path, metadata)
metadata = FileMoverAnalyzer.move( metadata = organise_file(
audio_file_path, import_directory, original_filename, metadata audio_file_path, import_directory, original_filename, metadata
) )
@ -99,7 +99,7 @@ class AnalyzerPipeline:
queue.put(metadata) queue.put(metadata)
except UnplayableFileError as e: except UnplayableFileError as e:
logger.exception(e) logger.exception(e)
metadata["import_status"] = AnalyzerPipeline.IMPORT_STATUS_FAILED metadata["import_status"] = Pipeline.IMPORT_STATUS_FAILED
metadata["reason"] = "The file could not be played." metadata["reason"] = "The file could not be played."
raise e raise e
except Exception as e: except Exception as e:

View File

@ -1,19 +1,14 @@
import datetime import datetime
import json import json
import subprocess import subprocess
from typing import Any, Dict
from loguru import logger from loguru import logger
from .analyzer import Analyzer SILAN_EXECUTABLE = "silan"
class CuePointAnalyzer(Analyzer): def analyze_cuepoint(filename: str, metadata: Dict[str, Any]):
"""This class extracts the cue-in time, cue-out time, and length of a track using silan."""
SILAN_EXECUTABLE = "silan"
@staticmethod
def analyze(filename, metadata):
"""Extracts the cue-in and cue-out times along and sets the file duration based on that. """Extracts the cue-in and cue-out times along and sets the file duration based on that.
The cue points are there to skip the silence at the start and end of a track, and are determined The cue points are there to skip the silence at the start and end of a track, and are determined
using "silan", which analyzes the loudness in a track. using "silan", which analyzes the loudness in a track.
@ -26,7 +21,7 @@ class CuePointAnalyzer(Analyzer):
all the unit tests pass. all the unit tests pass.
""" """
command = [ command = [
CuePointAnalyzer.SILAN_EXECUTABLE, SILAN_EXECUTABLE,
"-b", "-b",
"-F", "-F",
"0.99", "0.99",

View File

@ -2,17 +2,14 @@ import datetime
import hashlib import hashlib
import os import os
import wave import wave
from typing import Any, Dict
import magic import magic
import mutagen import mutagen
from loguru import logger from loguru import logger
from .analyzer import Analyzer
def analyze_metadata(filename: str, metadata: Dict[str, Any]):
class MetadataAnalyzer(Analyzer):
@staticmethod
def analyze(filename, metadata):
"""Extract audio metadata from tags embedded in the file (eg. ID3 tags) """Extract audio metadata from tags embedded in the file (eg. ID3 tags)
Keyword arguments: Keyword arguments:
@ -54,7 +51,7 @@ class MetadataAnalyzer(Analyzer):
mime_check = ms.buffer(fh.read(2014)) mime_check = ms.buffer(fh.read(2014))
metadata["mime"] = mime_check metadata["mime"] = mime_check
if mime_check == "audio/x-wav": if mime_check == "audio/x-wav":
return MetadataAnalyzer._analyze_wave(filename, metadata) return _analyze_wave(filename, metadata)
# Extract metadata from an audio file using mutagen # Extract metadata from an audio file using mutagen
audio_file = mutagen.File(filename, easy=True) audio_file = mutagen.File(filename, easy=True)
@ -174,8 +171,8 @@ class MetadataAnalyzer(Analyzer):
return metadata return metadata
@staticmethod
def _analyze_wave(filename, metadata): def _analyze_wave(filename, metadata):
try: try:
reader = wave.open(filename, "rb") reader = wave.open(filename, "rb")
metadata["channels"] = reader.getnchannels() metadata["channels"] = reader.getnchannels()

View File

@ -1,30 +1,26 @@
__author__ = "asantoni" __author__ = "asantoni"
import subprocess import subprocess
from typing import Any, Dict
from loguru import logger from loguru import logger
from .analyzer import Analyzer
class UnplayableFileError(Exception): class UnplayableFileError(Exception):
pass pass
class PlayabilityAnalyzer(Analyzer): LIQUIDSOAP_EXECUTABLE = "liquidsoap"
"""This class checks if a file can actually be played with Liquidsoap."""
LIQUIDSOAP_EXECUTABLE = "liquidsoap"
@staticmethod def analyze_playability(filename: str, metadata: Dict[str, Any]):
def analyze(filename, metadata):
"""Checks if a file can be played by Liquidsoap. """Checks if a file can be played by Liquidsoap.
:param filename: The full path to the file to analyzer :param filename: The full path to the file to analyzer
:param metadata: A metadata dictionary where the results will be put :param metadata: A metadata dictionary where the results will be put
:return: The metadata dictionary :return: The metadata dictionary
""" """
command = [ command = [
PlayabilityAnalyzer.LIQUIDSOAP_EXECUTABLE, LIQUIDSOAP_EXECUTABLE,
"-v", "-v",
"-c", "-c",
"output.dummy(audio_to_stereo(single(argv(1))))", "output.dummy(audio_to_stereo(single(argv(1))))",

View File

@ -1,18 +1,13 @@
import re import re
import subprocess import subprocess
from typing import Any, Dict
from loguru import logger from loguru import logger
from .analyzer import Analyzer REPLAYGAIN_EXECUTABLE = "replaygain" # From the rgain3 python package
class ReplayGainAnalyzer(Analyzer): def analyze_replaygain(filename: str, metadata: Dict[str, Any]):
"""This class extracts the ReplayGain using a tool from the python-rgain package."""
REPLAYGAIN_EXECUTABLE = "replaygain" # From the rgain3 python package
@staticmethod
def analyze(filename, metadata):
"""Extracts the Replaygain loudness normalization factor of a track. """Extracts the Replaygain loudness normalization factor of a track.
:param filename: The full path to the file to analyzer :param filename: The full path to the file to analyzer
:param metadata: A metadata dictionary where the results will be put :param metadata: A metadata dictionary where the results will be put
@ -20,7 +15,7 @@ class ReplayGainAnalyzer(Analyzer):
""" """
""" The -d flag means do a dry-run, ie. don't modify the file directly. """ The -d flag means do a dry-run, ie. don't modify the file directly.
""" """
command = [ReplayGainAnalyzer.REPLAYGAIN_EXECUTABLE, "-d", filename] command = [REPLAYGAIN_EXECUTABLE, "-d", filename]
try: try:
results = subprocess.check_output( results = subprocess.check_output(
command, command,

View File

@ -6,11 +6,11 @@ import uuid
from loguru import logger from loguru import logger
from .analyzer import Analyzer
def organise_file(audio_file_path, import_directory, original_filename, metadata):
"""Move the file at audio_file_path over into the import_directory/import,
renaming it to original_filename.
class FileMoverAnalyzer(Analyzer):
"""
This analyzer copies a file over from a temporary directory (stor/organize) This analyzer copies a file over from a temporary directory (stor/organize)
into the Airtime library (stor/imported). into the Airtime library (stor/imported).
@ -18,17 +18,6 @@ class FileMoverAnalyzer(Analyzer):
- The filename is of the first file preserved. - The filename is of the first file preserved.
- The filename of the second file has the timestamp attached to it. - The filename of the second file has the timestamp attached to it.
- The filename of the third file has a UUID placed after the timestamp, but ONLY IF it's imported within 1 second of the second file (ie. if the timestamp is the same). - The filename of the third file has a UUID placed after the timestamp, but ONLY IF it's imported within 1 second of the second file (ie. if the timestamp is the same).
"""
@staticmethod
def analyze(audio_file_path, metadata):
"""Dummy method because we need more info than analyze gets passed to it"""
raise Exception("Use FileMoverAnalyzer.move() instead.")
@staticmethod
def move(audio_file_path, import_directory, original_filename, metadata):
"""Move the file at audio_file_path over into the import_directory/import,
renaming it to original_filename.
Keyword arguments: Keyword arguments:
audio_file_path: Path to the file to be imported. audio_file_path: Path to the file to be imported.
@ -74,9 +63,7 @@ class FileMoverAnalyzer(Analyzer):
if "album_title" in metadata: if "album_title" in metadata:
final_file_path += "/" + metadata["album_title"][0:max_dir_len] final_file_path += "/" + metadata["album_title"][0:max_dir_len]
# Note that orig_file_extension includes the "." already # Note that orig_file_extension includes the "." already
final_file_path += ( final_file_path += "/" + orig_file_basename[0:max_file_len] + orig_file_extension
"/" + orig_file_basename[0:max_file_len] + orig_file_extension
)
# Ensure any redundant slashes are stripped # Ensure any redundant slashes are stripped
final_file_path = os.path.normpath(final_file_path) final_file_path = os.path.normpath(final_file_path)

View File

@ -1,9 +1,7 @@
# TODO: use an abstract base class (ie. import from abc ...) once we have python >=3.3 that supports @staticmethod with @abstractmethod from typing import Any, Dict, Protocol
class Analyzer: class Step(Protocol):
"""Abstract base class for all "analyzers"."""
@staticmethod @staticmethod
def analyze(filename, metadata): def __call__(filename: str, metadata: Dict[str, Any]):
raise NotImplementedError ...

View File

@ -5,14 +5,14 @@ from queue import Queue
import pytest import pytest
from libretime_analyzer.analyzer_pipeline import AnalyzerPipeline from libretime_analyzer.pipeline import Pipeline
from .conftest import AUDIO_FILENAME, AUDIO_IMPORT_DEST from .conftest import AUDIO_FILENAME, AUDIO_IMPORT_DEST
def test_run_analysis(src_dir, dest_dir): def test_run_analysis(src_dir, dest_dir):
queue = Queue() queue = Queue()
AnalyzerPipeline.run_analysis( Pipeline.run_analysis(
queue, queue,
os.path.join(src_dir, AUDIO_FILENAME), os.path.join(src_dir, AUDIO_FILENAME),
dest_dir, dest_dir,
@ -46,4 +46,4 @@ def test_run_analysis(src_dir, dest_dir):
) )
def test_run_analysis_wrong_params(params, exception): def test_run_analysis_wrong_params(params, exception):
with pytest.raises(exception): with pytest.raises(exception):
AnalyzerPipeline.run_analysis(*params) Pipeline.run_analysis(*params)

View File

@ -1,9 +1,11 @@
from unittest.mock import patch
import distro import distro
import pytest import pytest
from libretime_analyzer.cuepoint_analyzer import CuePointAnalyzer from libretime_analyzer.steps.analyze_cuepoint import analyze_cuepoint
from .fixtures import FILE_INVALID_DRM, FILES, Fixture from ..fixtures import FILE_INVALID_DRM, FILES, Fixture
@pytest.mark.parametrize( @pytest.mark.parametrize(
@ -11,7 +13,7 @@ from .fixtures import FILE_INVALID_DRM, FILES, Fixture
map(lambda i: (str(i.path), i.length, i.cuein, i.cueout), FILES), map(lambda i: (str(i.path), i.length, i.cuein, i.cueout), FILES),
) )
def test_analyze(filepath, length, cuein, cueout): def test_analyze(filepath, length, cuein, cueout):
metadata = CuePointAnalyzer.analyze(filepath, dict()) metadata = analyze_cuepoint(filepath, dict())
assert metadata["length_seconds"] == pytest.approx(length, abs=0.1) assert metadata["length_seconds"] == pytest.approx(length, abs=0.1)
@ -32,10 +34,11 @@ def test_analyze(filepath, length, cuein, cueout):
def test_analyze_missing_silan(): def test_analyze_missing_silan():
old = CuePointAnalyzer.SILAN_EXECUTABLE with patch(
CuePointAnalyzer.SILAN_EXECUTABLE = "foobar" "libretime_analyzer.steps.analyze_cuepoint.SILAN_EXECUTABLE",
CuePointAnalyzer.analyze(str(FILES[0].path), dict()) "foobar",
CuePointAnalyzer.SILAN_EXECUTABLE = old ):
analyze_cuepoint(str(FILES[0].path), dict())
def test_analyze_invalid_filepath(): def test_analyze_invalid_filepath():

View File

@ -4,9 +4,9 @@ from unittest import mock
import mutagen import mutagen
import pytest import pytest
from libretime_analyzer.metadata_analyzer import MetadataAnalyzer from libretime_analyzer.steps.analyze_metadata import analyze_metadata
from .fixtures import FILE_INVALID_DRM, FILE_INVALID_TXT, FILES_TAGGED, FixtureMeta from ..fixtures import FILE_INVALID_DRM, FILE_INVALID_TXT, FILES_TAGGED, FixtureMeta
@pytest.mark.parametrize( @pytest.mark.parametrize(
@ -18,7 +18,7 @@ from .fixtures import FILE_INVALID_DRM, FILE_INVALID_TXT, FILES_TAGGED, FixtureM
) )
def test_analyze_wrong_params(params, exception): def test_analyze_wrong_params(params, exception):
with pytest.raises(exception): with pytest.raises(exception):
MetadataAnalyzer.analyze(*params) analyze_metadata(*params)
@pytest.mark.parametrize( @pytest.mark.parametrize(
@ -26,7 +26,7 @@ def test_analyze_wrong_params(params, exception):
map(lambda i: (str(i.path), i.metadata), FILES_TAGGED), map(lambda i: (str(i.path), i.metadata), FILES_TAGGED),
) )
def test_analyze(filepath: str, metadata: dict): def test_analyze(filepath: str, metadata: dict):
found = MetadataAnalyzer.analyze(filepath, dict()) found = analyze_metadata(filepath, dict())
# Mutagen does not support wav files yet # Mutagen does not support wav files yet
if filepath.endswith("wav"): if filepath.endswith("wav"):
@ -50,12 +50,12 @@ def test_analyze(filepath: str, metadata: dict):
def test_invalid_wma(): def test_invalid_wma():
metadata = MetadataAnalyzer.analyze(str(FILE_INVALID_DRM), dict()) metadata = analyze_metadata(str(FILE_INVALID_DRM), dict())
assert metadata["mime"] == "audio/x-ms-wma" assert metadata["mime"] == "audio/x-ms-wma"
def test_unparsable_file(): def test_unparsable_file():
metadata = MetadataAnalyzer.analyze(str(FILE_INVALID_TXT), dict()) metadata = analyze_metadata(str(FILE_INVALID_TXT), dict())
assert metadata == { assert metadata == {
"filesize": 10, "filesize": 10,
"ftype": "audioclip", "ftype": "audioclip",

View File

@ -1,12 +1,14 @@
from unittest.mock import patch
import distro import distro
import pytest import pytest
from libretime_analyzer.playability_analyzer import ( from libretime_analyzer.steps.analyze_playability import (
PlayabilityAnalyzer,
UnplayableFileError, UnplayableFileError,
analyze_playability,
) )
from .fixtures import FILE_INVALID_DRM, FILES, Fixture from ..fixtures import FILE_INVALID_DRM, FILES, Fixture
@pytest.mark.parametrize( @pytest.mark.parametrize(
@ -14,14 +16,15 @@ from .fixtures import FILE_INVALID_DRM, FILES, Fixture
map(lambda i: str(i.path), FILES), map(lambda i: str(i.path), FILES),
) )
def test_analyze(filepath): def test_analyze(filepath):
PlayabilityAnalyzer.analyze(filepath, dict()) analyze_playability(filepath, dict())
def test_analyze_missing_liquidsoap(): def test_analyze_missing_liquidsoap():
old = PlayabilityAnalyzer.LIQUIDSOAP_EXECUTABLE with patch(
PlayabilityAnalyzer.LIQUIDSOAP_EXECUTABLE = "foobar" "libretime_analyzer.steps.analyze_playability.LIQUIDSOAP_EXECUTABLE",
PlayabilityAnalyzer.analyze(str(FILES[0].path), dict()) "foobar",
PlayabilityAnalyzer.LIQUIDSOAP_EXECUTABLE = old ):
analyze_playability(str(FILES[0].path), dict())
def test_analyze_invalid_filepath(): def test_analyze_invalid_filepath():

View File

@ -1,8 +1,10 @@
from unittest.mock import patch
import pytest import pytest
from libretime_analyzer.replaygain_analyzer import ReplayGainAnalyzer from libretime_analyzer.steps.analyze_replaygain import analyze_replaygain
from .fixtures import FILE_INVALID_DRM, FILES, Fixture from ..fixtures import FILE_INVALID_DRM, FILES, Fixture
@pytest.mark.parametrize( @pytest.mark.parametrize(
@ -10,15 +12,16 @@ from .fixtures import FILE_INVALID_DRM, FILES, Fixture
map(lambda i: (str(i.path), i.replaygain), FILES), map(lambda i: (str(i.path), i.replaygain), FILES),
) )
def test_analyze(filepath, replaygain): def test_analyze(filepath, replaygain):
metadata = ReplayGainAnalyzer.analyze(filepath, dict()) metadata = analyze_replaygain(filepath, dict())
assert metadata["replay_gain"] == pytest.approx(replaygain, abs=0.6) assert metadata["replay_gain"] == pytest.approx(replaygain, abs=0.6)
def test_analyze_missing_replaygain(): def test_analyze_missing_replaygain():
old = ReplayGainAnalyzer.REPLAYGAIN_EXECUTABLE with patch(
ReplayGainAnalyzer.REPLAYGAIN_EXECUTABLE = "foobar" "libretime_analyzer.steps.analyze_replaygain.REPLAYGAIN_EXECUTABLE",
ReplayGainAnalyzer.analyze(str(FILES[0].path), dict()) "foobar",
ReplayGainAnalyzer.REPLAYGAIN_EXECUTABLE = old ):
analyze_replaygain(str(FILES[0].path), dict())
def test_analyze_invalid_filepath(): def test_analyze_invalid_filepath():

View File

@ -6,14 +6,9 @@ from unittest import mock
import pytest import pytest
from libretime_analyzer.filemover_analyzer import FileMoverAnalyzer from libretime_analyzer.steps.organise_file import organise_file
from .conftest import AUDIO_FILENAME from ..conftest import AUDIO_FILENAME
def test_analyze():
with pytest.raises(Exception):
FileMoverAnalyzer.analyze("foo", dict())
@pytest.mark.parametrize( @pytest.mark.parametrize(
@ -27,11 +22,11 @@ def test_analyze():
) )
def test_move_wrong_params(params, exception): def test_move_wrong_params(params, exception):
with pytest.raises(exception): with pytest.raises(exception):
FileMoverAnalyzer.move(*params) organise_file(*params)
def test_move(src_dir, dest_dir): def test_organise_file(src_dir, dest_dir):
FileMoverAnalyzer.move( organise_file(
os.path.join(src_dir, AUDIO_FILENAME), os.path.join(src_dir, AUDIO_FILENAME),
dest_dir, dest_dir,
AUDIO_FILENAME, AUDIO_FILENAME,
@ -40,8 +35,8 @@ def test_move(src_dir, dest_dir):
assert os.path.exists(os.path.join(dest_dir, AUDIO_FILENAME)) assert os.path.exists(os.path.join(dest_dir, AUDIO_FILENAME))
def test_move_samefile(src_dir): def test_organise_file_samefile(src_dir):
FileMoverAnalyzer.move( organise_file(
os.path.join(src_dir, AUDIO_FILENAME), os.path.join(src_dir, AUDIO_FILENAME),
src_dir, src_dir,
AUDIO_FILENAME, AUDIO_FILENAME,
@ -52,11 +47,11 @@ def test_move_samefile(src_dir):
def import_and_restore(src_dir, dest_dir) -> dict: def import_and_restore(src_dir, dest_dir) -> dict:
""" """
Small helper to test the FileMoverAnalyzer.move function. Small helper to test the organise_file function.
Move the file and restore it back to it's origine. Move the file and restore it back to it's origine.
""" """
# Import the file # Import the file
metadata = FileMoverAnalyzer.move( metadata = organise_file(
os.path.join(src_dir, AUDIO_FILENAME), os.path.join(src_dir, AUDIO_FILENAME),
dest_dir, dest_dir,
AUDIO_FILENAME, AUDIO_FILENAME,
@ -88,7 +83,7 @@ def test_move_triplicate_file(src_dir, dest_dir):
# Here we use mock to patch out the time.localtime() function so that it # Here we use mock to patch out the time.localtime() function so that it
# always returns the same value. This allows us to consistently simulate this test cases # always returns the same value. This allows us to consistently simulate this test cases
# where the last two of the three files are imported at the same time as the timestamp. # where the last two of the three files are imported at the same time as the timestamp.
with mock.patch("libretime_analyzer.filemover_analyzer.time") as mock_time: with mock.patch("libretime_analyzer.steps.organise_file.time") as mock_time:
mock_time.localtime.return_value = time.localtime() # date(2010, 10, 8) mock_time.localtime.return_value = time.localtime() # date(2010, 10, 8)
mock_time.side_effect = time.time mock_time.side_effect = time.time
@ -113,7 +108,7 @@ def test_move_triplicate_file(src_dir, dest_dir):
def test_move_bad_permissions_dest_dir(src_dir): def test_move_bad_permissions_dest_dir(src_dir):
with pytest.raises(OSError): with pytest.raises(OSError):
# /sys is using sysfs on Linux, which is unwritable # /sys is using sysfs on Linux, which is unwritable
FileMoverAnalyzer.move( organise_file(
os.path.join(src_dir, AUDIO_FILENAME), os.path.join(src_dir, AUDIO_FILENAME),
"/sys/foobar", "/sys/foobar",
AUDIO_FILENAME, AUDIO_FILENAME,