diff --git a/legacy/application/services/PodcastEpisodeService.php b/legacy/application/services/PodcastEpisodeService.php index c7f60b251..3187b3048 100644 --- a/legacy/application/services/PodcastEpisodeService.php +++ b/legacy/application/services/PodcastEpisodeService.php @@ -181,11 +181,11 @@ class Application_Service_PodcastEpisodeService extends Application_Service_Thir private function _download($id, $url, $title, $album_override, $track_title = null) { $data = [ - 'id' => $id, - 'url' => $url, + 'episode_id' => $id, + 'episode_url' => $url, + 'episode_title' => $track_title, 'podcast_name' => $title, - 'album_override' => $album_override, - 'track_title' => $track_title, + 'override_album' => $album_override, ]; $task = $this->_executeTask(static::$_CELERY_TASKS[self::DOWNLOAD], $data); // Get the created ThirdPartyTaskReference and set the episode ID so diff --git a/worker/libretime_worker/tasks.py b/worker/libretime_worker/tasks.py index bea4b1ebe..1a07ef1d0 100644 --- a/worker/libretime_worker/tasks.py +++ b/worker/libretime_worker/tasks.py @@ -1,18 +1,16 @@ import json -import shutil -import tempfile -import traceback from cgi import parse_header -from contextlib import closing from pathlib import Path -from typing import Optional +from tempfile import NamedTemporaryFile +from typing import Any, Dict, Optional from urllib.parse import urlsplit import mutagen import requests from celery import Celery from celery.utils.log import get_task_logger -from requests import Response +from mutagen import MutagenError +from requests import RequestException, Response from .config import config @@ -22,108 +20,95 @@ logger = get_task_logger(__name__) @worker.task(name="podcast-download", acks_late=True) def podcast_download( - id: int, - url: str, + episode_id: int, + episode_url: str, + episode_title: Optional[str], podcast_name: str, - album_override: bool, - track_title: Optional[str], + override_album: bool, ): """ Download a podcast episode. Args: - id: Episode ID. - url: Episode download url. + episode_id: Episode ID. + episode_url: Episode download url. + episode_title: Episode title to override the title metadata. podcast_name: Podcast name to save to the metadata. - album_override: Whether to override the album metadata. - track_title: Episode title to override the title metadata. + override_album: Whether to override the album metadata. Returns: Status of the podcast download as JSON string. """ - # Object to store file IDs, episode IDs, and download status - # (important if there's an error before the file is posted) - obj = {"episodeid": id} + result: Dict[str, Any] = {"episodeid": episode_id} + try: - re = None - with closing(requests.get(url, stream=True)) as r: - filename = extract_filename(r) - with tempfile.NamedTemporaryFile(mode="wb+", delete=False) as audiofile: - r.raw.decode_content = True - shutil.copyfileobj(r.raw, audiofile) - # mutagen should be able to guess the write file type - metadata_audiofile = mutagen.File(audiofile.name, easy=True) - # if for some reason this should fail lets try it as a mp3 specific code - if metadata_audiofile == None: - # if this happens then mutagen couldn't guess what type of file it is - mp3suffix = ("mp3", "MP3", "Mp3", "mP3") - # so we treat it like a mp3 if it has a mp3 file extension and hope for the best - if filename.endswith(mp3suffix): - metadata_audiofile = mutagen.mp3.MP3( - audiofile.name, ID3=mutagen.easyid3.EasyID3 - ) - # replace track metadata as indicated by album_override setting - # replace album title as needed - metadata_audiofile = podcast_override_metadata( - metadata_audiofile, podcast_name, album_override, track_title - ) - metadata_audiofile.save() - filetypeinfo = metadata_audiofile.pprint() - logger.info( - "filetypeinfo is {}".format(filetypeinfo.encode("ascii", "ignore")) - ) - callback_url = f"{config.general.public_url}/rest/media" - callback_api_key = config.general.api_key - - re = requests.post( - callback_url, - files={"file": (filename, open(audiofile.name, "rb"))}, - auth=requests.auth.HTTPBasicAuth(callback_api_key, ""), - ) - re.raise_for_status() + # Download podcast episode file try: - response = re.content.decode() - except (UnicodeDecodeError, AttributeError): - response = re.content - f = json.loads( - response - ) # Read the response from the media API to get the file id - obj["fileid"] = f["id"] - obj["status"] = 1 - except Exception as e: - obj["error"] = e.message - logger.info(f"Error during file download: {e}") - logger.debug("Original Traceback: %s" % (traceback.format_exc(e))) - obj["status"] = 0 - return json.dumps(obj) + with requests.get(episode_url, stream=True, timeout=30) as resp: + resp.raise_for_status() + filename = extract_filename(resp) -def podcast_override_metadata(m, podcast_name, override, track_title): - """ - Override m['album'] if empty or forced with override arg - """ - # if the album override option is enabled replace the album id3 tag with the podcast name even if the album tag contains data - if override is True: - logger.debug( - "overriding album name to {} in podcast".format( - podcast_name.encode("ascii", "ignore") - ) - ) - m["album"] = podcast_name - m["title"] = track_title - m["artist"] = podcast_name - else: - # replace the album id3 tag with the podcast name if the album tag is empty + # The filename extension helps to determine the file type using mutagen + with NamedTemporaryFile(suffix=filename, delete=False) as tmp_file: + for chunk in resp.iter_content(chunk_size=2048): + tmp_file.write(chunk) + + except RequestException as exception: + logger.exception(f"could not download podcast episode {episode_id}") + raise exception + + # Save metadata to podcast episode file try: - m["album"] - except KeyError: - logger.debug( - "setting new album name to {} in podcast".format( - podcast_name.encode("ascii", "ignore") + metadata = mutagen.File(tmp_file.name, easy=True) + if metadata is None: + raise MutagenError( + f"could not determine episode {episode_id} file type" ) - ) - m["album"] = podcast_name - return m + + if override_album: + logger.debug(f"overriding album name with podcast name {podcast_name}") + metadata["artist"] = podcast_name + metadata["album"] = podcast_name + metadata["title"] = episode_title + + elif "album" not in metadata: + logger.debug(f"setting album name to podcast name {podcast_name}") + metadata["album"] = podcast_name + + metadata.save() + logger.debug(f"saved metadata {metadata}") + + except MutagenError as exception: + logger.exception(f"could not save episode {episode_id} metadata") + raise exception + + # Upload podcast episode file + try: + with requests.post( + f"{config.general.public_url}/rest/media", + files={"file": (filename, open(tmp_file.name, "rb"))}, + auth=(config.general.api_key, ""), + timeout=30, + ) as upload_resp: + upload_resp.raise_for_status() + upload_payload = upload_resp.json() + + result["fileid"] = upload_payload["id"] + result["status"] = 1 + + except RequestException as exception: + logger.exception(f"could not upload episode {episode_id}") + raise exception + + except (RequestException, MutagenError) as exception: + result["status"] = 0 + result["error"] = str(exception) + + if tmp_file is not None: + Path(tmp_file.name).unlink() + + return json.dumps(result) def extract_filename(response: Response) -> str: diff --git a/worker/setup.py b/worker/setup.py index 4cc1c1b9d..d2e03a5ad 100644 --- a/worker/setup.py +++ b/worker/setup.py @@ -22,6 +22,7 @@ setup( ], extras_require={ "dev": [ + "requests-mock", "types-requests", ], }, diff --git a/worker/tests/__init__.py b/worker/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/worker/tests/fixtures/__init__.py b/worker/tests/fixtures/__init__.py new file mode 100644 index 000000000..c6f334193 --- /dev/null +++ b/worker/tests/fixtures/__init__.py @@ -0,0 +1,6 @@ +from pathlib import Path + +here = Path(__file__).parent +fixtures_path = here + +# malformed.mp3 is related to https://github.com/libretime/libretime/issues/668!! diff --git a/worker/tests/fixtures/malformed.mp3 b/worker/tests/fixtures/malformed.mp3 new file mode 100644 index 000000000..34cc25444 Binary files /dev/null and b/worker/tests/fixtures/malformed.mp3 differ diff --git a/worker/tests/fixtures/s1-stereo-tagged.mp3 b/worker/tests/fixtures/s1-stereo-tagged.mp3 new file mode 100644 index 000000000..fdffce97e Binary files /dev/null and b/worker/tests/fixtures/s1-stereo-tagged.mp3 differ diff --git a/worker/tests/fixtures/s1-stereo.ogg b/worker/tests/fixtures/s1-stereo.ogg new file mode 100644 index 000000000..dbe1bf245 Binary files /dev/null and b/worker/tests/fixtures/s1-stereo.ogg differ diff --git a/worker/tests/tasks_test.py b/worker/tests/tasks_test.py index 8f5e0b973..caf356951 100644 --- a/worker/tests/tasks_test.py +++ b/worker/tests/tasks_test.py @@ -1,7 +1,60 @@ +import json + import pytest from requests import Response -from libretime_worker.tasks import extract_filename +from libretime_worker.tasks import extract_filename, podcast_download + +from .fixtures import fixtures_path + + +@pytest.mark.parametrize( + "file", + [ + ("s1-stereo.ogg"), + ("s1-stereo-tagged.mp3"), + ("malformed.mp3"), + ], +) +@pytest.mark.parametrize("override_album", [(True), (False)]) +def test_podcast_download(requests_mock, file, override_album): + episode_url = f"https://remote.example.org/{file}" + episode_filepath = fixtures_path / file + + requests_mock.get(episode_url, content=episode_filepath.read_bytes()) + requests_mock.post("http://localhost/rest/media", json={"id": 1}) + + result = podcast_download( + episode_id=1, + episode_url=episode_url, + episode_title="My episode", + podcast_name="My podcast!", + override_album=override_album, + ) + assert json.loads(result) == { + "episodeid": 1, + "fileid": 1, + "status": 1, + } + + +def test_podcast_download_invalid_file(requests_mock): + episode_url = "https://remote.example.org/invalid" + requests_mock.get(episode_url, content=b"some invalid content") + requests_mock.post("http://localhost/rest/media", json={"id": 1}) + + result = podcast_download( + episode_id=1, + episode_url=episode_url, + episode_title="My episode", + podcast_name="My podcast!", + override_album=False, + ) + assert json.loads(result) == { + "episodeid": 1, + "status": 0, + "error": "could not determine episode 1 file type", + } @pytest.mark.parametrize(