diff --git a/analyzer/libretime_analyzer/pipeline/analyze_metadata.py b/analyzer/libretime_analyzer/pipeline/analyze_metadata.py index 8e725bdd5..b0576ba4b 100644 --- a/analyzer/libretime_analyzer/pipeline/analyze_metadata.py +++ b/analyzer/libretime_analyzer/pipeline/analyze_metadata.py @@ -5,10 +5,24 @@ from typing import Any, Dict import mutagen from libretime_shared.files import compute_md5 +from mutagen.easyid3 import EasyID3 logger = logging.getLogger(__name__) +def flatten(xss): + return [x for xs in xss for x in xs] + + +def comment_get(id3, _): + comments = [v.text for k, v in id3.items() if "COMM" in k or "comment" in k] + + return flatten(comments) + + +EasyID3.RegisterKey("comment", comment_get) + + def analyze_metadata(filepath_: str, metadata: Dict[str, Any]): """ Extract audio metadata from tags embedded in the file using mutagen. @@ -71,34 +85,36 @@ def analyze_metadata(filepath_: str, metadata: Dict[str, Any]): except (AttributeError, KeyError, IndexError): pass - extracted_tags_mapping = { - "title": "track_title", - "artist": "artist_name", - "album": "album_title", - "bpm": "bpm", - "composer": "composer", - "conductor": "conductor", - "copyright": "copyright", - "comment": "comment", - "encoded_by": "encoder", - "genre": "genre", - "isrc": "isrc", - "label": "label", - "organization": "label", - # "length": "length", - "language": "language", - "last_modified": "last_modified", - "mood": "mood", - "bit_rate": "bit_rate", - "replay_gain": "replaygain", - # "tracknumber": "track_number", - # "track_total": "track_total", - "website": "website", - "date": "year", - # "mime_type": "mime", - } + extracted_tags_mapping = [ + ("title", "track_title"), + ("artist", "artist_name"), + ("album", "album_title"), + ("bpm", "bpm"), + ("composer", "composer"), + ("conductor", "conductor"), + ("copyright", "copyright"), + ("comment", "comment"), + ("comment", "comments"), + ("comment", "description"), + ("encoded_by", "encoder"), + ("genre", "genre"), + ("isrc", "isrc"), + ("label", "label"), + ("organization", "label"), + # ("length", "length"), + ("language", "language"), + ("last_modified", "last_modified"), + ("mood", "mood"), + ("bit_rate", "bit_rate"), + ("replay_gain", "replaygain"), + # ("tracknumber", "track_number"), + # ("track_total", "track_total"), + ("website", "website"), + ("date", "year"), + # ("mime_type", "mime"), + ] - for extracted_key, metadata_key in extracted_tags_mapping.items(): + for extracted_key, metadata_key in extracted_tags_mapping: try: metadata[metadata_key] = extracted[extracted_key] if isinstance(metadata[metadata_key], list): diff --git a/analyzer/tests/fixtures/__init__.py b/analyzer/tests/fixtures/__init__.py index a778930dc..44f2d644d 100644 --- a/analyzer/tests/fixtures/__init__.py +++ b/analyzer/tests/fixtures/__init__.py @@ -96,12 +96,18 @@ tags = { "comment": "Test Comment", } +mp3Tags = { + **tags, + "comments": tags["comment"], + "description": tags["comment"], +} + FILES_TAGGED = [ FixtureMeta( here / "s1-jointstereo-tagged.mp3", { **meta, - **tags, + **mp3Tags, "bit_rate": approx(128000, abs=1e2), "channels": 2, "mime": "audio/mp3", @@ -111,7 +117,7 @@ FILES_TAGGED = [ here / "s1-mono-tagged.mp3", { **meta, - **tags, + **mp3Tags, "bit_rate": approx(64000, abs=1e2), "channels": 1, "mime": "audio/mp3", @@ -121,7 +127,7 @@ FILES_TAGGED = [ here / "s1-stereo-tagged.mp3", { **meta, - **tags, + **mp3Tags, "bit_rate": approx(128000, abs=1e2), "channels": 2, "mime": "audio/mp3", @@ -151,7 +157,7 @@ FILES_TAGGED = [ here / "s1-mono-tagged.m4a", { **meta, - **tags, + **mp3Tags, "bit_rate": approx(65000, abs=5e4), "channels": 2, # Weird "mime": "audio/mp4", @@ -161,7 +167,7 @@ FILES_TAGGED = [ here / "s1-stereo-tagged.m4a", { **meta, - **tags, + **mp3Tags, "bit_rate": approx(128000, abs=1e5), "channels": 2, "mime": "audio/mp4", @@ -228,12 +234,18 @@ tags = { "comment": "Ł Ą Ż Ę Ć Ń Ś Ź", } +mp3Tags = { + **tags, + "comments": tags["comment"], + "description": tags["comment"], +} + FILES_TAGGED += [ FixtureMeta( here / "s1-jointstereo-tagged-utf8.mp3", { **meta, - **tags, + **mp3Tags, "bit_rate": approx(128000, abs=1e2), "channels": 2, "mime": "audio/mp3", @@ -243,7 +255,7 @@ FILES_TAGGED += [ here / "s1-mono-tagged-utf8.mp3", { **meta, - **tags, + **mp3Tags, "bit_rate": approx(64000, abs=1e2), "channels": 1, "mime": "audio/mp3", @@ -253,7 +265,7 @@ FILES_TAGGED += [ here / "s1-stereo-tagged-utf8.mp3", { **meta, - **tags, + **mp3Tags, "bit_rate": approx(128000, abs=1e2), "channels": 2, "mime": "audio/mp3", @@ -283,7 +295,7 @@ FILES_TAGGED += [ here / "s1-mono-tagged-utf8.m4a", { **meta, - **tags, + **mp3Tags, "bit_rate": approx(65000, abs=5e4), "channels": 2, # Weird "mime": "audio/mp4", @@ -293,7 +305,7 @@ FILES_TAGGED += [ here / "s1-stereo-tagged-utf8.m4a", { **meta, - **tags, + **mp3Tags, "bit_rate": approx(128000, abs=1e5), "channels": 2, "mime": "audio/mp4", diff --git a/analyzer/tests/pipeline/analyze_metadata_test.py b/analyzer/tests/pipeline/analyze_metadata_test.py index f0598d672..ec735f696 100644 --- a/analyzer/tests/pipeline/analyze_metadata_test.py +++ b/analyzer/tests/pipeline/analyze_metadata_test.py @@ -27,8 +27,8 @@ def test_analyze_metadata(filepath: Path, metadata: dict): del metadata["length"] del found["length"] - # mp3,ogg,flac files does not support comments yet - if not filepath.suffix == ".m4a": + # ogg,flac files does not support comments yet + if not filepath.suffix == ".m4a" and not filepath.suffix == ".mp3": if "comment" in metadata: del metadata["comment"]