refactor(worker): rewrite extract_filename from download

This commit is contained in:
jo 2022-09-09 11:51:16 +02:00 committed by Kyle Robbertze
parent 4daa0aaca0
commit ab6cebb6ed
1 changed files with 16 additions and 27 deletions

View File

@ -1,16 +1,17 @@
import cgi
import json import json
import posixpath
import shutil import shutil
import tempfile import tempfile
import traceback import traceback
from cgi import parse_header
from contextlib import closing from contextlib import closing
from pathlib import Path
from urllib.parse import urlsplit from urllib.parse import urlsplit
import mutagen import mutagen
import requests import requests
from celery import Celery from celery import Celery
from celery.utils.log import get_task_logger from celery.utils.log import get_task_logger
from requests import Response
from .config import config from .config import config
@ -45,7 +46,7 @@ def podcast_download(
try: try:
re = None re = None
with closing(requests.get(url, stream=True)) as r: with closing(requests.get(url, stream=True)) as r:
filename = get_filename(r) filename = extract_filename(r)
with tempfile.NamedTemporaryFile(mode="wb+", delete=False) as audiofile: with tempfile.NamedTemporaryFile(mode="wb+", delete=False) as audiofile:
r.raw.decode_content = True r.raw.decode_content = True
shutil.copyfileobj(r.raw, audiofile) shutil.copyfileobj(r.raw, audiofile)
@ -124,31 +125,19 @@ def podcast_override_metadata(m, podcast_name, override, track_title):
return m return m
def get_filename(r): def extract_filename(response: Response) -> str:
""" """
Given a request object to a file resource, get the name of the file to be downloaded Extract the filename from a download request.
by parsing either the content disposition or the request URL
:param r: request object Args:
response: Download request response.
:return: the file name Returns:
:rtype: string Extracted filename.
""" """
# Try to get the filename from the content disposition if "Content-Disposition" in response.headers:
d = r.headers.get("Content-Disposition") _, params = parse_header(response.headers["Content-Disposition"])
filename = "" if "filename" in params:
if d: return params["filename"]
try:
_, params = cgi.parse_header(d) return Path(urlsplit(response.url).path).name
filename = params["filename"]
except Exception as e:
# We end up here if we get a Content-Disposition header with no filename
logger.warning(
"Couldn't find file name in Content-Disposition header, using url"
)
if not filename:
# Since we don't necessarily get the filename back in the response headers,
# parse the URL and get the filename and extension
path = urlsplit(r.url).path
filename = posixpath.basename(path)
return filename