150 lines
5.6 KiB
Python
150 lines
5.6 KiB
Python
import cgi
|
|
import json
|
|
import os
|
|
import posixpath
|
|
import shutil
|
|
import tempfile
|
|
import traceback
|
|
from contextlib import closing
|
|
from io import StringIO
|
|
from urllib.parse import urlsplit
|
|
|
|
import mutagen
|
|
import requests
|
|
from celery import Celery
|
|
from celery.utils.log import get_task_logger
|
|
|
|
celery = Celery()
|
|
logger = get_task_logger(__name__)
|
|
|
|
|
|
@celery.task(name="podcast-download", acks_late=True)
|
|
def podcast_download(
|
|
id, url, callback_url, api_key, podcast_name, album_override, track_title
|
|
):
|
|
"""
|
|
Download a podcast episode
|
|
|
|
:param id: episode unique ID
|
|
:param url: download url for the episode
|
|
:param callback_url: callback URL to send the downloaded file to
|
|
:param api_key: API key for callback authentication
|
|
:param podcast_name: Name of podcast to be added to id3 metadata for smartblock
|
|
:param album_override: Passing whether to override the album id3 even if it exists
|
|
:param track_title: Passing the title of the episode from feed to override the metadata
|
|
|
|
:return: JSON formatted string of a dictionary of download statuses
|
|
and file identifiers (for successful uploads)
|
|
:rtype: string
|
|
"""
|
|
# Object to store file IDs, episode IDs, and download status
|
|
# (important if there's an error before the file is posted)
|
|
obj = {"episodeid": id}
|
|
try:
|
|
re = None
|
|
with closing(requests.get(url, stream=True)) as r:
|
|
filename = get_filename(r)
|
|
with tempfile.NamedTemporaryFile(mode="wb+", delete=False) as audiofile:
|
|
r.raw.decode_content = True
|
|
shutil.copyfileobj(r.raw, audiofile)
|
|
# mutagen should be able to guess the write file type
|
|
metadata_audiofile = mutagen.File(audiofile.name, easy=True)
|
|
# if for some reason this should fail lets try it as a mp3 specific code
|
|
if metadata_audiofile == None:
|
|
# if this happens then mutagen couldn't guess what type of file it is
|
|
mp3suffix = ("mp3", "MP3", "Mp3", "mP3")
|
|
# so we treat it like a mp3 if it has a mp3 file extension and hope for the best
|
|
if filename.endswith(mp3suffix):
|
|
metadata_audiofile = mutagen.mp3.MP3(
|
|
audiofile.name, ID3=mutagen.easyid3.EasyID3
|
|
)
|
|
# replace track metadata as indicated by album_override setting
|
|
# replace album title as needed
|
|
metadata_audiofile = podcast_override_metadata(
|
|
metadata_audiofile, podcast_name, album_override, track_title
|
|
)
|
|
metadata_audiofile.save()
|
|
filetypeinfo = metadata_audiofile.pprint()
|
|
logger.info(
|
|
"filetypeinfo is {}".format(filetypeinfo.encode("ascii", "ignore"))
|
|
)
|
|
re = requests.post(
|
|
callback_url,
|
|
files={"file": (filename, open(audiofile.name, "rb"))},
|
|
auth=requests.auth.HTTPBasicAuth(api_key, ""),
|
|
)
|
|
re.raise_for_status()
|
|
try:
|
|
response = re.content.decode()
|
|
except (UnicodeDecodeError, AttributeError):
|
|
response = re.content
|
|
f = json.loads(
|
|
response
|
|
) # Read the response from the media API to get the file id
|
|
obj["fileid"] = f["id"]
|
|
obj["status"] = 1
|
|
except Exception as e:
|
|
obj["error"] = e.message
|
|
logger.info(f"Error during file download: {e}")
|
|
logger.debug("Original Traceback: %s" % (traceback.format_exc(e)))
|
|
obj["status"] = 0
|
|
return json.dumps(obj)
|
|
|
|
|
|
def podcast_override_metadata(m, podcast_name, override, track_title):
|
|
"""
|
|
Override m['album'] if empty or forced with override arg
|
|
"""
|
|
# if the album override option is enabled replace the album id3 tag with the podcast name even if the album tag contains data
|
|
if override is True:
|
|
logger.debug(
|
|
"overriding album name to {} in podcast".format(
|
|
podcast_name.encode("ascii", "ignore")
|
|
)
|
|
)
|
|
m["album"] = podcast_name
|
|
m["title"] = track_title
|
|
m["artist"] = podcast_name
|
|
else:
|
|
# replace the album id3 tag with the podcast name if the album tag is empty
|
|
try:
|
|
m["album"]
|
|
except KeyError:
|
|
logger.debug(
|
|
"setting new album name to {} in podcast".format(
|
|
podcast_name.encode("ascii", "ignore")
|
|
)
|
|
)
|
|
m["album"] = podcast_name
|
|
return m
|
|
|
|
|
|
def get_filename(r):
|
|
"""
|
|
Given a request object to a file resource, get the name of the file to be downloaded
|
|
by parsing either the content disposition or the request URL
|
|
|
|
:param r: request object
|
|
|
|
:return: the file name
|
|
:rtype: string
|
|
"""
|
|
# Try to get the filename from the content disposition
|
|
d = r.headers.get("Content-Disposition")
|
|
filename = ""
|
|
if d:
|
|
try:
|
|
_, params = cgi.parse_header(d)
|
|
filename = params["filename"]
|
|
except Exception as e:
|
|
# We end up here if we get a Content-Disposition header with no filename
|
|
logger.warn(
|
|
"Couldn't find file name in Content-Disposition header, using url"
|
|
)
|
|
if not filename:
|
|
# Since we don't necessarily get the filename back in the response headers,
|
|
# parse the URL and get the filename and extension
|
|
path = urlsplit(r.url).path
|
|
filename = posixpath.basename(path)
|
|
return filename
|