fix(worker): rewrite podcast download task
- Fixes bad exception handling when facing an invalid podcast episode file. Fix #2083
This commit is contained in:
parent
2afb766b41
commit
08a44186aa
|
@ -181,11 +181,11 @@ class Application_Service_PodcastEpisodeService extends Application_Service_Thir
|
||||||
private function _download($id, $url, $title, $album_override, $track_title = null)
|
private function _download($id, $url, $title, $album_override, $track_title = null)
|
||||||
{
|
{
|
||||||
$data = [
|
$data = [
|
||||||
'id' => $id,
|
'episode_id' => $id,
|
||||||
'url' => $url,
|
'episode_url' => $url,
|
||||||
|
'episode_title' => $track_title,
|
||||||
'podcast_name' => $title,
|
'podcast_name' => $title,
|
||||||
'album_override' => $album_override,
|
'override_album' => $album_override,
|
||||||
'track_title' => $track_title,
|
|
||||||
];
|
];
|
||||||
$task = $this->_executeTask(static::$_CELERY_TASKS[self::DOWNLOAD], $data);
|
$task = $this->_executeTask(static::$_CELERY_TASKS[self::DOWNLOAD], $data);
|
||||||
// Get the created ThirdPartyTaskReference and set the episode ID so
|
// Get the created ThirdPartyTaskReference and set the episode ID so
|
||||||
|
|
|
@ -1,18 +1,16 @@
|
||||||
import json
|
import json
|
||||||
import shutil
|
|
||||||
import tempfile
|
|
||||||
import traceback
|
|
||||||
from cgi import parse_header
|
from cgi import parse_header
|
||||||
from contextlib import closing
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from tempfile import NamedTemporaryFile
|
||||||
|
from typing import Any, Dict, Optional
|
||||||
from urllib.parse import urlsplit
|
from urllib.parse import urlsplit
|
||||||
|
|
||||||
import mutagen
|
import mutagen
|
||||||
import requests
|
import requests
|
||||||
from celery import Celery
|
from celery import Celery
|
||||||
from celery.utils.log import get_task_logger
|
from celery.utils.log import get_task_logger
|
||||||
from requests import Response
|
from mutagen import MutagenError
|
||||||
|
from requests import RequestException, Response
|
||||||
|
|
||||||
from .config import config
|
from .config import config
|
||||||
|
|
||||||
|
@ -22,108 +20,95 @@ logger = get_task_logger(__name__)
|
||||||
|
|
||||||
@worker.task(name="podcast-download", acks_late=True)
|
@worker.task(name="podcast-download", acks_late=True)
|
||||||
def podcast_download(
|
def podcast_download(
|
||||||
id: int,
|
episode_id: int,
|
||||||
url: str,
|
episode_url: str,
|
||||||
|
episode_title: Optional[str],
|
||||||
podcast_name: str,
|
podcast_name: str,
|
||||||
album_override: bool,
|
override_album: bool,
|
||||||
track_title: Optional[str],
|
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Download a podcast episode.
|
Download a podcast episode.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
id: Episode ID.
|
episode_id: Episode ID.
|
||||||
url: Episode download url.
|
episode_url: Episode download url.
|
||||||
|
episode_title: Episode title to override the title metadata.
|
||||||
podcast_name: Podcast name to save to the metadata.
|
podcast_name: Podcast name to save to the metadata.
|
||||||
album_override: Whether to override the album metadata.
|
override_album: Whether to override the album metadata.
|
||||||
track_title: Episode title to override the title metadata.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Status of the podcast download as JSON string.
|
Status of the podcast download as JSON string.
|
||||||
"""
|
"""
|
||||||
# Object to store file IDs, episode IDs, and download status
|
result: Dict[str, Any] = {"episodeid": episode_id}
|
||||||
# (important if there's an error before the file is posted)
|
|
||||||
obj = {"episodeid": id}
|
|
||||||
try:
|
try:
|
||||||
re = None
|
# Download podcast episode file
|
||||||
with closing(requests.get(url, stream=True)) as r:
|
|
||||||
filename = extract_filename(r)
|
|
||||||
with tempfile.NamedTemporaryFile(mode="wb+", delete=False) as audiofile:
|
|
||||||
r.raw.decode_content = True
|
|
||||||
shutil.copyfileobj(r.raw, audiofile)
|
|
||||||
# mutagen should be able to guess the write file type
|
|
||||||
metadata_audiofile = mutagen.File(audiofile.name, easy=True)
|
|
||||||
# if for some reason this should fail lets try it as a mp3 specific code
|
|
||||||
if metadata_audiofile == None:
|
|
||||||
# if this happens then mutagen couldn't guess what type of file it is
|
|
||||||
mp3suffix = ("mp3", "MP3", "Mp3", "mP3")
|
|
||||||
# so we treat it like a mp3 if it has a mp3 file extension and hope for the best
|
|
||||||
if filename.endswith(mp3suffix):
|
|
||||||
metadata_audiofile = mutagen.mp3.MP3(
|
|
||||||
audiofile.name, ID3=mutagen.easyid3.EasyID3
|
|
||||||
)
|
|
||||||
# replace track metadata as indicated by album_override setting
|
|
||||||
# replace album title as needed
|
|
||||||
metadata_audiofile = podcast_override_metadata(
|
|
||||||
metadata_audiofile, podcast_name, album_override, track_title
|
|
||||||
)
|
|
||||||
metadata_audiofile.save()
|
|
||||||
filetypeinfo = metadata_audiofile.pprint()
|
|
||||||
logger.info(
|
|
||||||
"filetypeinfo is {}".format(filetypeinfo.encode("ascii", "ignore"))
|
|
||||||
)
|
|
||||||
callback_url = f"{config.general.public_url}/rest/media"
|
|
||||||
callback_api_key = config.general.api_key
|
|
||||||
|
|
||||||
re = requests.post(
|
|
||||||
callback_url,
|
|
||||||
files={"file": (filename, open(audiofile.name, "rb"))},
|
|
||||||
auth=requests.auth.HTTPBasicAuth(callback_api_key, ""),
|
|
||||||
)
|
|
||||||
re.raise_for_status()
|
|
||||||
try:
|
try:
|
||||||
response = re.content.decode()
|
with requests.get(episode_url, stream=True, timeout=30) as resp:
|
||||||
except (UnicodeDecodeError, AttributeError):
|
resp.raise_for_status()
|
||||||
response = re.content
|
|
||||||
f = json.loads(
|
|
||||||
response
|
|
||||||
) # Read the response from the media API to get the file id
|
|
||||||
obj["fileid"] = f["id"]
|
|
||||||
obj["status"] = 1
|
|
||||||
except Exception as e:
|
|
||||||
obj["error"] = e.message
|
|
||||||
logger.info(f"Error during file download: {e}")
|
|
||||||
logger.debug("Original Traceback: %s" % (traceback.format_exc(e)))
|
|
||||||
obj["status"] = 0
|
|
||||||
return json.dumps(obj)
|
|
||||||
|
|
||||||
|
filename = extract_filename(resp)
|
||||||
|
|
||||||
def podcast_override_metadata(m, podcast_name, override, track_title):
|
# The filename extension helps to determine the file type using mutagen
|
||||||
"""
|
with NamedTemporaryFile(suffix=filename, delete=False) as tmp_file:
|
||||||
Override m['album'] if empty or forced with override arg
|
for chunk in resp.iter_content(chunk_size=2048):
|
||||||
"""
|
tmp_file.write(chunk)
|
||||||
# if the album override option is enabled replace the album id3 tag with the podcast name even if the album tag contains data
|
|
||||||
if override is True:
|
except RequestException as exception:
|
||||||
logger.debug(
|
logger.exception(f"could not download podcast episode {episode_id}")
|
||||||
"overriding album name to {} in podcast".format(
|
raise exception
|
||||||
podcast_name.encode("ascii", "ignore")
|
|
||||||
)
|
# Save metadata to podcast episode file
|
||||||
)
|
|
||||||
m["album"] = podcast_name
|
|
||||||
m["title"] = track_title
|
|
||||||
m["artist"] = podcast_name
|
|
||||||
else:
|
|
||||||
# replace the album id3 tag with the podcast name if the album tag is empty
|
|
||||||
try:
|
try:
|
||||||
m["album"]
|
metadata = mutagen.File(tmp_file.name, easy=True)
|
||||||
except KeyError:
|
if metadata is None:
|
||||||
logger.debug(
|
raise MutagenError(
|
||||||
"setting new album name to {} in podcast".format(
|
f"could not determine episode {episode_id} file type"
|
||||||
podcast_name.encode("ascii", "ignore")
|
|
||||||
)
|
)
|
||||||
)
|
|
||||||
m["album"] = podcast_name
|
if override_album:
|
||||||
return m
|
logger.debug(f"overriding album name with podcast name {podcast_name}")
|
||||||
|
metadata["artist"] = podcast_name
|
||||||
|
metadata["album"] = podcast_name
|
||||||
|
metadata["title"] = episode_title
|
||||||
|
|
||||||
|
elif "album" not in metadata:
|
||||||
|
logger.debug(f"setting album name to podcast name {podcast_name}")
|
||||||
|
metadata["album"] = podcast_name
|
||||||
|
|
||||||
|
metadata.save()
|
||||||
|
logger.debug(f"saved metadata {metadata}")
|
||||||
|
|
||||||
|
except MutagenError as exception:
|
||||||
|
logger.exception(f"could not save episode {episode_id} metadata")
|
||||||
|
raise exception
|
||||||
|
|
||||||
|
# Upload podcast episode file
|
||||||
|
try:
|
||||||
|
with requests.post(
|
||||||
|
f"{config.general.public_url}/rest/media",
|
||||||
|
files={"file": (filename, open(tmp_file.name, "rb"))},
|
||||||
|
auth=(config.general.api_key, ""),
|
||||||
|
timeout=30,
|
||||||
|
) as upload_resp:
|
||||||
|
upload_resp.raise_for_status()
|
||||||
|
upload_payload = upload_resp.json()
|
||||||
|
|
||||||
|
result["fileid"] = upload_payload["id"]
|
||||||
|
result["status"] = 1
|
||||||
|
|
||||||
|
except RequestException as exception:
|
||||||
|
logger.exception(f"could not upload episode {episode_id}")
|
||||||
|
raise exception
|
||||||
|
|
||||||
|
except (RequestException, MutagenError) as exception:
|
||||||
|
result["status"] = 0
|
||||||
|
result["error"] = str(exception)
|
||||||
|
|
||||||
|
if tmp_file is not None:
|
||||||
|
Path(tmp_file.name).unlink()
|
||||||
|
|
||||||
|
return json.dumps(result)
|
||||||
|
|
||||||
|
|
||||||
def extract_filename(response: Response) -> str:
|
def extract_filename(response: Response) -> str:
|
||||||
|
|
|
@ -22,6 +22,7 @@ setup(
|
||||||
],
|
],
|
||||||
extras_require={
|
extras_require={
|
||||||
"dev": [
|
"dev": [
|
||||||
|
"requests-mock",
|
||||||
"types-requests",
|
"types-requests",
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
here = Path(__file__).parent
|
||||||
|
fixtures_path = here
|
||||||
|
|
||||||
|
# malformed.mp3 is related to https://github.com/libretime/libretime/issues/668!!
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1,7 +1,60 @@
|
||||||
|
import json
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from requests import Response
|
from requests import Response
|
||||||
|
|
||||||
from libretime_worker.tasks import extract_filename
|
from libretime_worker.tasks import extract_filename, podcast_download
|
||||||
|
|
||||||
|
from .fixtures import fixtures_path
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"file",
|
||||||
|
[
|
||||||
|
("s1-stereo.ogg"),
|
||||||
|
("s1-stereo-tagged.mp3"),
|
||||||
|
("malformed.mp3"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
@pytest.mark.parametrize("override_album", [(True), (False)])
|
||||||
|
def test_podcast_download(requests_mock, file, override_album):
|
||||||
|
episode_url = f"https://remote.example.org/{file}"
|
||||||
|
episode_filepath = fixtures_path / file
|
||||||
|
|
||||||
|
requests_mock.get(episode_url, content=episode_filepath.read_bytes())
|
||||||
|
requests_mock.post("http://localhost/rest/media", json={"id": 1})
|
||||||
|
|
||||||
|
result = podcast_download(
|
||||||
|
episode_id=1,
|
||||||
|
episode_url=episode_url,
|
||||||
|
episode_title="My episode",
|
||||||
|
podcast_name="My podcast!",
|
||||||
|
override_album=override_album,
|
||||||
|
)
|
||||||
|
assert json.loads(result) == {
|
||||||
|
"episodeid": 1,
|
||||||
|
"fileid": 1,
|
||||||
|
"status": 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_podcast_download_invalid_file(requests_mock):
|
||||||
|
episode_url = "https://remote.example.org/invalid"
|
||||||
|
requests_mock.get(episode_url, content=b"some invalid content")
|
||||||
|
requests_mock.post("http://localhost/rest/media", json={"id": 1})
|
||||||
|
|
||||||
|
result = podcast_download(
|
||||||
|
episode_id=1,
|
||||||
|
episode_url=episode_url,
|
||||||
|
episode_title="My episode",
|
||||||
|
podcast_name="My podcast!",
|
||||||
|
override_album=False,
|
||||||
|
)
|
||||||
|
assert json.loads(result) == {
|
||||||
|
"episodeid": 1,
|
||||||
|
"status": 0,
|
||||||
|
"error": "could not determine episode 1 file type",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
|
|
Loading…
Reference in New Issue