sintonia/analyzer/libretime_analyzer/status_reporter.py

285 lines
12 KiB
Python
Raw Normal View History

import collections
import json
import logging
import pickle
2020-01-16 15:32:51 +01:00
import queue
import threading
import time
import traceback
2020-01-16 15:32:51 +01:00
from urllib.parse import urlparse
import requests
# Disable urllib3 warnings because these can cause a rare deadlock due to Python 2's crappy internal non-reentrant locking
# around POSIX stuff. See SAAS-714. The hasattr() is for compatibility with older versions of requests.
2021-05-27 16:23:02 +02:00
if hasattr(requests, "packages"):
requests.packages.urllib3.disable_warnings()
2021-05-27 16:23:02 +02:00
class PicklableHttpRequest:
def __init__(self, method, url, data, api_key):
self.method = method
self.url = url
self.data = data
self.api_key = api_key
def create_request(self):
2021-05-27 16:23:02 +02:00
return requests.Request(
method=self.method,
url=self.url,
data=self.data,
auth=requests.auth.HTTPBasicAuth(self.api_key, ""),
)
def process_http_requests(ipc_queue, http_retry_queue_path):
2021-05-27 16:23:02 +02:00
"""Runs in a separate thread and performs all the HTTP requests where we're
reporting extracted audio file metadata or errors back to the Airtime web application.
2021-05-27 16:23:02 +02:00
This process also checks every 5 seconds if there's failed HTTP requests that we
need to retry. We retry failed HTTP requests so that we don't lose uploads if the
web server is temporarily down.
2021-05-27 16:23:02 +02:00
"""
# Store any failed requests (eg. due to web server errors or downtime) to be
# retried later:
retry_queue = collections.deque()
shutdown = False
# Unpickle retry_queue from disk so that we won't have lost any uploads
# if airtime_analyzer is shut down while the web server is down or unreachable,
# and there were failed HTTP requests pending, waiting to be retried.
try:
2021-05-27 16:23:02 +02:00
with open(http_retry_queue_path, "rb") as pickle_file:
retry_queue = pickle.load(pickle_file)
except IOError as e:
if e.errno == 2:
pass
else:
raise e
except Exception as e:
# If we fail to unpickle a saved queue of failed HTTP requests, then we'll just log an error
# and continue because those HTTP requests are lost anyways. The pickled file will be
# overwritten the next time the analyzer is shut down too.
logging.error("Failed to unpickle %s. Continuing..." % http_retry_queue_path)
pass
while True:
try:
while not shutdown:
try:
request = ipc_queue.get(block=True, timeout=5)
2021-05-27 16:23:02 +02:00
if (
isinstance(request, str) and request == "shutdown"
): # Bit of a cheat
shutdown = True
break
if not isinstance(request, PicklableHttpRequest):
2021-05-27 16:23:02 +02:00
raise TypeError(
"request must be a PicklableHttpRequest. Was of type "
+ type(request).__name__
)
2020-01-16 15:32:51 +01:00
except queue.Empty:
request = None
# If there's no new HTTP request we need to execute, let's check our "retry
# queue" and see if there's any failed HTTP requests we can retry:
if request:
send_http_request(request, retry_queue)
else:
# Using a for loop instead of while so we only iterate over all the requests once!
for i in range(len(retry_queue)):
request = retry_queue.popleft()
send_http_request(request, retry_queue)
logging.info("Shutting down status_reporter")
# Pickle retry_queue to disk so that we don't lose uploads if we're shut down while
# while the web server is down or unreachable.
2021-05-27 16:23:02 +02:00
with open(http_retry_queue_path, "wb") as pickle_file:
pickle.dump(retry_queue, pickle_file)
return
2021-05-27 16:23:02 +02:00
except Exception as e: # Terrible top-level exception handler to prevent the thread from dying, just in case.
if shutdown:
return
logging.exception("Unhandled exception in StatusReporter")
logging.exception(e)
logging.info("Restarting StatusReporter thread")
2021-05-27 16:23:02 +02:00
time.sleep(2) # Throttle it
def send_http_request(picklable_request, retry_queue):
if not isinstance(picklable_request, PicklableHttpRequest):
2021-05-27 16:23:02 +02:00
raise TypeError(
"picklable_request must be a PicklableHttpRequest. Was of type "
+ type(picklable_request).__name__
)
try:
bare_request = picklable_request.create_request()
s = requests.Session()
prepared_request = s.prepare_request(bare_request)
2021-05-27 16:23:02 +02:00
r = s.send(
prepared_request, timeout=StatusReporter._HTTP_REQUEST_TIMEOUT, verify=False
) # SNI is a pain in the ass
r.raise_for_status() # Raise an exception if there was an http error code returned
logging.info("HTTP request sent successfully.")
except requests.exceptions.HTTPError as e:
if e.response.status_code == 422:
# Do no retry the request if there was a metadata validation error
2021-05-27 16:23:02 +02:00
logging.error(
"HTTP request failed due to an HTTP exception. Exception was: %s"
% str(e)
)
else:
# The request failed with an error 500 probably, so let's check if Airtime and/or
# the web server are broken. If not, then our request was probably causing an
# error 500 in the media API (ie. a bug), so there's no point in retrying it.
logging.error("HTTP request failed. Exception was: %s" % str(e))
parsed_url = urlparse(e.response.request.url)
if is_web_server_broken(parsed_url.scheme + "://" + parsed_url.netloc):
# If the web server is having problems, retry the request later:
retry_queue.append(picklable_request)
# Otherwise, if the request was bad, the request is never retried.
# You will have to find these bad requests in logs or you'll be
# notified by sentry.
except requests.exceptions.ConnectionError as e:
2021-05-27 16:23:02 +02:00
logging.error(
"HTTP request failed due to a connection error. Retrying later. %s" % str(e)
)
retry_queue.append(picklable_request) # Retry it later
except Exception as e:
logging.error("HTTP request failed with unhandled exception. %s" % str(e))
logging.error(traceback.format_exc())
# Don't put the request into the retry queue, just give up on this one.
# I'm doing this to protect against us getting some pathological request
# that breaks our code. I don't want us pickling data that potentially
# breaks airtime_analyzer.
2021-05-27 16:23:02 +02:00
def is_web_server_broken(url):
2021-05-27 16:23:02 +02:00
"""Do a naive test to check if the web server we're trying to access is down.
We use this to try to differentiate between error 500s that are coming
from (for example) a bug in the Airtime Media REST API and error 500s
caused by Airtime or the webserver itself being broken temporarily.
"""
try:
test_req = requests.get(url, verify=False)
test_req.raise_for_status()
except Exception as e:
2014-10-22 01:23:48 +02:00
return True
else:
# The request worked fine, so the web server and Airtime are still up.
2021-05-27 16:23:02 +02:00
return False
2014-10-22 01:23:48 +02:00
return False
2021-05-27 16:23:02 +02:00
class StatusReporter:
"""Reports the extracted audio file metadata and job status back to the
Airtime web application.
"""
_HTTP_REQUEST_TIMEOUT = 30
2021-05-27 16:23:02 +02:00
""" We use multiprocessing.Process again here because we need a thread for this stuff
anyways, and Python gives us process isolation for free (crash safety).
2021-05-27 16:23:02 +02:00
"""
2020-01-16 15:32:51 +01:00
_ipc_queue = queue.Queue()
2021-05-27 16:23:02 +02:00
# _http_thread = multiprocessing.Process(target=process_http_requests,
# args=(_ipc_queue,))
2015-05-19 18:02:19 +02:00
_http_thread = None
@classmethod
def start_thread(self, http_retry_queue_path):
2021-05-27 16:23:02 +02:00
StatusReporter._http_thread = threading.Thread(
target=process_http_requests,
args=(StatusReporter._ipc_queue, http_retry_queue_path),
)
2015-05-19 18:02:19 +02:00
StatusReporter._http_thread.start()
@classmethod
def stop_thread(self):
logging.info("Terminating status_reporter process")
2021-05-27 16:23:02 +02:00
# StatusReporter._http_thread.terminate() # Triggers SIGTERM on the child process
StatusReporter._ipc_queue.put("shutdown") # Special trigger
2015-05-19 18:02:19 +02:00
StatusReporter._http_thread.join()
@classmethod
def _send_http_request(self, request):
StatusReporter._ipc_queue.put(request)
@classmethod
def report_success_to_callback_url(self, callback_url, api_key, audio_metadata):
2021-05-27 16:23:02 +02:00
"""Report the extracted metadata and status of the successfully imported file
to the callback URL (which should be the Airtime File Upload API)
"""
put_payload = json.dumps(audio_metadata)
2021-05-27 16:23:02 +02:00
# r = requests.Request(method='PUT', url=callback_url, data=put_payload,
# auth=requests.auth.HTTPBasicAuth(api_key, ''))
2021-05-27 16:23:02 +02:00
"""
2021-05-27 16:33:20 +02:00
r = requests.Request(method='PUT', url=callback_url, data=put_payload,
auth=requests.auth.HTTPBasicAuth(api_key, ''))
StatusReporter._send_http_request(r)
2021-05-27 16:23:02 +02:00
"""
2021-05-27 16:23:02 +02:00
StatusReporter._send_http_request(
PicklableHttpRequest(
method="PUT", url=callback_url, data=put_payload, api_key=api_key
)
)
2021-05-27 16:23:02 +02:00
"""
try:
r.raise_for_status() # Raise an exception if there was an http error code returned
except requests.exceptions.RequestException:
StatusReporter._ipc_queue.put(r.prepare())
2021-05-27 16:23:02 +02:00
"""
2021-05-27 16:33:20 +02:00
"""
# Encode the audio metadata as json and post it back to the callback_url
put_payload = json.dumps(audio_metadata)
logging.debug("sending http put with payload: " + put_payload)
2021-05-27 16:33:20 +02:00
r = requests.put(callback_url, data=put_payload,
auth=requests.auth.HTTPBasicAuth(api_key, ''),
timeout=StatusReporter._HTTP_REQUEST_TIMEOUT)
logging.debug("HTTP request returned status: " + str(r.status_code))
logging.debug(r.text) # log the response body
#TODO: queue up failed requests and try them again later.
r.raise_for_status() # Raise an exception if there was an http error code returned
2021-05-27 16:23:02 +02:00
"""
@classmethod
2021-05-27 16:23:02 +02:00
def report_failure_to_callback_url(
self, callback_url, api_key, import_status, reason
):
if not isinstance(import_status, int):
raise TypeError(
"import_status must be an integer. Was of type "
+ type(import_status).__name__
)
logging.debug("Reporting import failure to Airtime REST API...")
audio_metadata = dict()
audio_metadata["import_status"] = import_status
audio_metadata["comment"] = reason # hack attack
put_payload = json.dumps(audio_metadata)
2021-05-27 16:23:02 +02:00
# logging.debug("sending http put with payload: " + put_payload)
"""
2021-05-27 16:33:20 +02:00
r = requests.put(callback_url, data=put_payload,
auth=requests.auth.HTTPBasicAuth(api_key, ''),
timeout=StatusReporter._HTTP_REQUEST_TIMEOUT)
2021-05-27 16:23:02 +02:00
"""
StatusReporter._send_http_request(
PicklableHttpRequest(
method="PUT", url=callback_url, data=put_payload, api_key=api_key
)
)
"""
logging.debug("HTTP request returned status: " + str(r.status_code))
logging.debug(r.text) # log the response body
#TODO: queue up failed requests and try them again later.
r.raise_for_status() # raise an exception if there was an http error code returned
2021-05-27 16:23:02 +02:00
"""