184 lines
7.6 KiB
Python
184 lines
7.6 KiB
Python
import time
|
|
import datetime
|
|
import mutagen
|
|
import magic
|
|
import wave
|
|
import logging
|
|
import os
|
|
import hashlib
|
|
from .analyzer import Analyzer
|
|
|
|
class MetadataAnalyzer(Analyzer):
|
|
|
|
@staticmethod
|
|
def analyze(filename, metadata):
|
|
''' Extract audio metadata from tags embedded in the file (eg. ID3 tags)
|
|
|
|
Keyword arguments:
|
|
filename: The path to the audio file to extract metadata from.
|
|
metadata: A dictionary that the extracted metadata will be added to.
|
|
'''
|
|
if not isinstance(filename, str):
|
|
raise TypeError("filename must be string. Was of type " + type(filename).__name__)
|
|
if not isinstance(metadata, dict):
|
|
raise TypeError("metadata must be a dict. Was of type " + type(metadata).__name__)
|
|
if not os.path.exists(filename):
|
|
raise FileNotFoundError("audio file not found: {}".format(filename))
|
|
|
|
#Airtime <= 2.5.x nonsense:
|
|
metadata["ftype"] = "audioclip"
|
|
#Other fields we'll want to set for Airtime:
|
|
metadata["hidden"] = False
|
|
|
|
# Get file size and md5 hash of the file
|
|
metadata["filesize"] = os.path.getsize(filename)
|
|
|
|
with open(filename, 'rb') as fh:
|
|
m = hashlib.md5()
|
|
while True:
|
|
data = fh.read(8192)
|
|
if not data:
|
|
break
|
|
m.update(data)
|
|
metadata["md5"] = m.hexdigest()
|
|
|
|
# Mutagen doesn't handle WAVE files so we use a different package
|
|
ms = magic.open(magic.MIME_TYPE)
|
|
ms.load()
|
|
with open(filename, 'rb') as fh:
|
|
mime_check = ms.buffer(fh.read(2014))
|
|
metadata["mime"] = mime_check
|
|
if mime_check == 'audio/x-wav':
|
|
return MetadataAnalyzer._analyze_wave(filename, metadata)
|
|
|
|
#Extract metadata from an audio file using mutagen
|
|
audio_file = mutagen.File(filename, easy=True)
|
|
|
|
#Bail if the file couldn't be parsed. The title should stay as the filename
|
|
#inside Airtime.
|
|
if audio_file == None: # Don't use "if not" here. It is wrong due to mutagen's design.
|
|
return metadata
|
|
# Note that audio_file can equal {} if the file is valid but there's no metadata tags.
|
|
# We can still try to grab the info variables below.
|
|
|
|
#Grab other file information that isn't encoded in a tag, but instead usually
|
|
#in the file header. Mutagen breaks that out into a separate "info" object:
|
|
info = audio_file.info
|
|
if hasattr(info, "sample_rate"): # Mutagen is annoying and inconsistent
|
|
metadata["sample_rate"] = info.sample_rate
|
|
if hasattr(info, "length"):
|
|
metadata["length_seconds"] = info.length
|
|
#Converting the length in seconds (float) to a formatted time string
|
|
track_length = datetime.timedelta(seconds=info.length)
|
|
metadata["length"] = str(track_length) #time.strftime("%H:%M:%S.%f", track_length)
|
|
# Other fields for Airtime
|
|
metadata["cueout"] = metadata["length"]
|
|
|
|
# Set a default cue in time in seconds
|
|
metadata["cuein"] = 0.0;
|
|
|
|
if hasattr(info, "bitrate"):
|
|
metadata["bit_rate"] = info.bitrate
|
|
|
|
# Use the mutagen to get the MIME type, if it has one. This is more reliable and
|
|
# consistent for certain types of MP3s or MPEG files than the MIMEs returned by magic.
|
|
if audio_file.mime:
|
|
metadata["mime"] = audio_file.mime[0]
|
|
|
|
#Try to get the number of channels if mutagen can...
|
|
try:
|
|
#Special handling for getting the # of channels from MP3s. It's in the "mode" field
|
|
#which is 0=Stereo, 1=Joint Stereo, 2=Dual Channel, 3=Mono. Part of the ID3 spec...
|
|
if metadata["mime"] in ["audio/mpeg", 'audio/mp3']:
|
|
if info.mode == 3:
|
|
metadata["channels"] = 1
|
|
else:
|
|
metadata["channels"] = 2
|
|
else:
|
|
metadata["channels"] = info.channels
|
|
except (AttributeError, KeyError):
|
|
#If mutagen can't figure out the number of channels, we'll just leave it out...
|
|
pass
|
|
|
|
#Try to extract the number of tracks on the album if we can (the "track total")
|
|
try:
|
|
track_number = audio_file["tracknumber"]
|
|
if isinstance(track_number, list): # Sometimes tracknumber is a list, ugh
|
|
track_number = track_number[0]
|
|
track_number_tokens = track_number
|
|
if '/' in track_number:
|
|
track_number_tokens = track_number.split('/')
|
|
track_number = track_number_tokens[0]
|
|
elif '-' in track_number:
|
|
track_number_tokens = track_number.split('-')
|
|
track_number = track_number_tokens[0]
|
|
metadata["track_number"] = track_number
|
|
track_total = track_number_tokens[1]
|
|
metadata["track_total"] = track_total
|
|
except (AttributeError, KeyError, IndexError):
|
|
#If we couldn't figure out the track_number or track_total, just ignore it...
|
|
pass
|
|
|
|
#We normalize the mutagen tags slightly here, so in case mutagen changes,
|
|
#we find the
|
|
mutagen_to_airtime_mapping = {
|
|
'title': 'track_title',
|
|
'artist': 'artist_name',
|
|
'album': 'album_title',
|
|
'bpm': 'bpm',
|
|
'composer': 'composer',
|
|
'conductor': 'conductor',
|
|
'copyright': 'copyright',
|
|
'comment': 'comment',
|
|
'encoded_by': 'encoder',
|
|
'genre': 'genre',
|
|
'isrc': 'isrc',
|
|
'label': 'label',
|
|
'organization': 'label',
|
|
#'length': 'length',
|
|
'language': 'language',
|
|
'last_modified':'last_modified',
|
|
'mood': 'mood',
|
|
'bit_rate': 'bit_rate',
|
|
'replay_gain': 'replaygain',
|
|
#'tracknumber': 'track_number',
|
|
#'track_total': 'track_total',
|
|
'website': 'website',
|
|
'date': 'year',
|
|
#'mime_type': 'mime',
|
|
}
|
|
|
|
for mutagen_tag, airtime_tag in mutagen_to_airtime_mapping.items():
|
|
try:
|
|
metadata[airtime_tag] = audio_file[mutagen_tag]
|
|
|
|
# Some tags are returned as lists because there could be multiple values.
|
|
# This is unusual so we're going to always just take the first item in the list.
|
|
if isinstance(metadata[airtime_tag], list):
|
|
if metadata[airtime_tag]:
|
|
metadata[airtime_tag] = metadata[airtime_tag][0]
|
|
else: # Handle empty lists
|
|
metadata[airtime_tag] = ""
|
|
|
|
except KeyError:
|
|
continue
|
|
|
|
return metadata
|
|
|
|
@staticmethod
|
|
def _analyze_wave(filename, metadata):
|
|
try:
|
|
reader = wave.open(filename, 'rb')
|
|
metadata["channels"] = reader.getnchannels()
|
|
metadata["sample_rate"] = reader.getframerate()
|
|
length_seconds = float(reader.getnframes()) / float(metadata["sample_rate"])
|
|
#Converting the length in seconds (float) to a formatted time string
|
|
track_length = datetime.timedelta(seconds=length_seconds)
|
|
metadata["length"] = str(track_length) #time.strftime("%H:%M:%S.%f", track_length)
|
|
metadata["length_seconds"] = length_seconds
|
|
metadata["cueout"] = metadata["length"]
|
|
except wave.Error as ex:
|
|
logging.error("Invalid WAVE file: {}".format(str(ex)))
|
|
raise
|
|
return metadata
|