sintonia/analyzer/airtime_analyzer/metadata_analyzer.py

import datetime
import hashlib
import logging
import os
import time
import wave

import magic
import mutagen

from .analyzer import Analyzer


class MetadataAnalyzer(Analyzer):
    @staticmethod
    def analyze(filename, metadata):
        """Extract audio metadata from tags embedded in the file (eg. ID3 tags)

        Keyword arguments:
            filename: The path to the audio file to extract metadata from.
            metadata: A dictionary that the extracted metadata will be added to.
        """
        if not isinstance(filename, str):
            raise TypeError(
                "filename must be string. Was of type " + type(filename).__name__
            )
        if not isinstance(metadata, dict):
            raise TypeError(
                "metadata must be a dict. Was of type " + type(metadata).__name__
            )
        if not os.path.exists(filename):
            raise FileNotFoundError("audio file not found: {}".format(filename))

        # Airtime <= 2.5.x nonsense:
        metadata["ftype"] = "audioclip"
        # Other fields we'll want to set for Airtime:
        metadata["hidden"] = False

        # Get file size and md5 hash of the file
        metadata["filesize"] = os.path.getsize(filename)

        with open(filename, "rb") as fh:
            m = hashlib.md5()
            while True:
                data = fh.read(8192)
                if not data:
                    break
                m.update(data)
            metadata["md5"] = m.hexdigest()

        # Mutagen doesn't handle WAVE files so we use a different package
        ms = magic.open(magic.MIME_TYPE)
        ms.load()
        with open(filename, "rb") as fh:
            mime_check = ms.buffer(fh.read(2014))
        metadata["mime"] = mime_check
        if mime_check == "audio/x-wav":
            return MetadataAnalyzer._analyze_wave(filename, metadata)

        # Extract metadata from an audio file using mutagen
        audio_file = mutagen.File(filename, easy=True)

        # Bail if the file couldn't be parsed. The title should stay as the filename
        # inside Airtime.
        if (
            audio_file == None
        ):  # Don't use "if not" here. It is wrong due to mutagen's design.
            return metadata
        # Note that audio_file can equal {} if the file is valid but there's no metadata tags.
        # We can still try to grab the info variables below.

        # Grab other file information that isn't encoded in a tag, but instead usually
        # in the file header. Mutagen breaks that out into a separate "info" object:
        info = audio_file.info
        if hasattr(info, "sample_rate"):  # Mutagen is annoying and inconsistent
            metadata["sample_rate"] = info.sample_rate
        if hasattr(info, "length"):
            metadata["length_seconds"] = info.length
            # Converting the length in seconds (float) to a formatted time string
            track_length = datetime.timedelta(seconds=info.length)
            metadata["length"] = str(
                track_length
            )  # time.strftime("%H:%M:%S.%f", track_length)
            # Other fields for Airtime
            metadata["cueout"] = metadata["length"]

        # Set a default cue in time in seconds
        metadata["cuein"] = 0.0

        if hasattr(info, "bitrate"):
            metadata["bit_rate"] = info.bitrate

        # Use the mutagen to get the MIME type, if it has one. This is more reliable and
        # consistent for certain types of MP3s or MPEG files than the MIMEs returned by magic.
        if audio_file.mime:
            metadata["mime"] = audio_file.mime[0]

        # Try to get the number of channels if mutagen can...
        try:
            # Special handling for getting the # of channels from MP3s. It's in the "mode" field
            # which is 0=Stereo, 1=Joint Stereo, 2=Dual Channel, 3=Mono. Part of the ID3 spec...
            if metadata["mime"] in ["audio/mpeg", "audio/mp3"]:
                if info.mode == 3:
                    metadata["channels"] = 1
                else:
                    metadata["channels"] = 2
            else:
                metadata["channels"] = info.channels
        except (AttributeError, KeyError):
            # If mutagen can't figure out the number of channels, we'll just leave it out...
            pass

        # Try to extract the number of tracks on the album if we can (the "track total")
        try:
            track_number = audio_file["tracknumber"]
            if isinstance(track_number, list):  # Sometimes tracknumber is a list, ugh
                track_number = track_number[0]
            track_number_tokens = track_number
            if "/" in track_number:
                track_number_tokens = track_number.split("/")
                track_number = track_number_tokens[0]
            elif "-" in track_number:
                track_number_tokens = track_number.split("-")
                track_number = track_number_tokens[0]
            metadata["track_number"] = track_number
            track_total = track_number_tokens[1]
            metadata["track_total"] = track_total
        except (AttributeError, KeyError, IndexError):
            # If we couldn't figure out the track_number or track_total, just ignore it...
            pass

        # We normalize the mutagen tags slightly here, so in case mutagen changes,
        # we find the
        mutagen_to_airtime_mapping = {
            "title": "track_title",
            "artist": "artist_name",
            "album": "album_title",
            "bpm": "bpm",
            "composer": "composer",
            "conductor": "conductor",
            "copyright": "copyright",
            "comment": "comment",
            "encoded_by": "encoder",
            "genre": "genre",
            "isrc": "isrc",
            "label": "label",
            "organization": "label",
            #'length':       'length',
            "language": "language",
            "last_modified": "last_modified",
            "mood": "mood",
            "bit_rate": "bit_rate",
            "replay_gain": "replaygain",
            #'tracknumber':  'track_number',
            #'track_total':  'track_total',
            "website": "website",
            "date": "year",
            #'mime_type':    'mime',
        }

        for mutagen_tag, airtime_tag in mutagen_to_airtime_mapping.items():
            try:
                metadata[airtime_tag] = audio_file[mutagen_tag]

                # Some tags are returned as lists because there could be multiple values.
                # This is unusual so we're going to always just take the first item in the list.
                if isinstance(metadata[airtime_tag], list):
                    if metadata[airtime_tag]:
                        metadata[airtime_tag] = metadata[airtime_tag][0]
                    else:  # Handle empty lists
                        metadata[airtime_tag] = ""

            except KeyError:
                continue

        return metadata

    @staticmethod
    def _analyze_wave(filename, metadata):
        try:
            reader = wave.open(filename, "rb")
            metadata["channels"] = reader.getnchannels()
            metadata["sample_rate"] = reader.getframerate()
            length_seconds = float(reader.getnframes()) / float(metadata["sample_rate"])
            # Converting the length in seconds (float) to a formatted time string
            track_length = datetime.timedelta(seconds=length_seconds)
            metadata["length"] = str(
                track_length
            )  # time.strftime("%H:%M:%S.%f", track_length)
            metadata["length_seconds"] = length_seconds
            metadata["cueout"] = metadata["length"]
        except wave.Error as ex:
            logging.error("Invalid WAVE file: {}".format(str(ex)))
            raise
        return metadata