From 9eb0f2f3b33256f5d46c659e3d8390affa4155ad Mon Sep 17 00:00:00 2001 From: drigato Date: Mon, 26 May 2014 17:13:45 -0400 Subject: [PATCH 1/2] CC-5859: Airtime Analyzer: format fields before writing to db --- airtime_mvc/application/forms/EditAudioMD.php | 165 +++++++++++------- .../rest/controllers/MediaController.php | 35 ++-- 2 files changed, 124 insertions(+), 76 deletions(-) diff --git a/airtime_mvc/application/forms/EditAudioMD.php b/airtime_mvc/application/forms/EditAudioMD.php index cbb45af02..69ddbccde 100644 --- a/airtime_mvc/application/forms/EditAudioMD.php +++ b/airtime_mvc/application/forms/EditAudioMD.php @@ -15,39 +15,62 @@ class Application_Form_EditAudioMD extends Zend_Form 'value' => $p_id )); // Add title field - $this->addElement('text', 'track_title', array( + /*$this->addElement('text', 'track_title', array( 'label' => _('Title:'), 'class' => 'input_text', 'filters' => array('StringTrim'), - )); + ));*/ + $track_title = new Zend_Form_Element_Text('track_title'); + $track_title->class = 'input_text'; + $track_title->setLabel(_('Title:')) + ->setFilters(array('StringTrim')) + ->setValidators(array( + new Zend_Validate_StringLength(array('max' => 512)) + )); + $this->addElement($track_title); // Add artist field - $this->addElement('text', 'artist_name', array( + /*$this->addElement('text', 'artist_name', array( 'label' => _('Creator:'), 'class' => 'input_text', 'filters' => array('StringTrim'), - )); + ));*/ + $artist_name = new Zend_Form_Element_Text('artist_name'); + $artist_name->class = 'input_text'; + $artist_name->setLabel(_('Creator:')) + ->setFilters(array('StringTrim')) + ->setValidators(array( + new Zend_Validate_StringLength(array('max' => 512)) + )); + $this->addElement($artist_name); // Add album field - $this->addElement('text', 'album_title', array( - 'label' => _('Album:'), - 'class' => 'input_text', - 'filters' => array('StringTrim') - )); + $album_title = new Zend_Form_Element_Text('album_title'); + $album_title->class = 'input_text'; + $album_title->setLabel(_('Album:')) + ->setFilters(array('StringTrim')) + ->setValidators(array( + new Zend_Validate_StringLength(array('max' => 512)) + ));; + $this->addElement($album_title); // Add track number field - $this->addElement('text', 'track_number', array( - 'label' => _('Track:'), - 'class' => 'input_text', - 'filters' => array('StringTrim'), - )); + $track_number = new Zend_Form_Element('track_number'); + $track_number->class = 'input_text'; + $track_number->setLabel('Track Number:') + ->setFilters(array('StringTrim')) + ->setValidators(array(new Zend_Validate_Digits())); + $this->addElement($track_number); // Add genre field - $this->addElement('text', 'genre', array( - 'label' => _('Genre:'), - 'class' => 'input_text', - 'filters' => array('StringTrim') - )); + $genre = new Zend_Form_Element('genre'); + $genre->class = 'input_text'; + $genre->setLabel(_('Genre:')) + ->setFilters(array('StringTrim')) + ->setValidators(array( + new Zend_Validate_StringLength(array('max' => 64)) + )); + $this->addElement($genre); // Add year field $year = new Zend_Form_Element_Text('year'); @@ -63,32 +86,44 @@ class Application_Form_EditAudioMD extends Zend_Form $this->addElement($year); // Add label field - $this->addElement('text', 'label', array( - 'label' => _('Label:'), - 'class' => 'input_text', - 'filters' => array('StringTrim') - )); + $label = new Zend_Form_Element('label'); + $label->class = 'input_text'; + $label->setLabel(_('Label:')) + ->setFilters(array('StringTrim')) + ->setValidators(array( + new Zend_Validate_StringLength(array('max' => 512)) + )); + $this->addElement($label); // Add composer field - $this->addElement('text', 'composer', array( - 'label' => _('Composer:'), - 'class' => 'input_text', - 'filters' => array('StringTrim') - )); + $composer = new Zend_Form_Element('composer'); + $composer->class = 'input_text'; + $composer->setLabel(_('Composer:')) + ->setFilters(array('StringTrim')) + ->setValidators(array( + new Zend_Validate_StringLength(array('max' => 512)) + )); + $this->addElement($composer); // Add conductor field - $this->addElement('text', 'conductor', array( - 'label' => _('Conductor:'), - 'class' => 'input_text', - 'filters' => array('StringTrim') - )); + $conductor = new Zend_Form_Element('conductor'); + $conductor->class = 'input_text'; + $conductor->setLabel(_('Conductor:')) + ->setFilters(array('StringTrim')) + ->setValidators(array( + new Zend_Validate_StringLength(array('max' => 512)) + )); + $this->addElement($conductor); // Add mood field - $this->addElement('text', 'mood', array( - 'label' => _('Mood:'), - 'class' => 'input_text', - 'filters' => array('StringTrim') - )); + $mood = new Zend_Form_Element('mood'); + $mood->class = 'input_text'; + $mood->setLabel(_('Mood:')) + ->setFilters(array('StringTrim')) + ->setValidators(array( + new Zend_Validate_StringLength(array('max' => 64)) + )); + $this->addElement($mood); // Add bmp field $bpm = new Zend_Form_Element_Text('bpm'); @@ -101,32 +136,44 @@ class Application_Form_EditAudioMD extends Zend_Form $this->addElement($bpm); // Add copyright field - $this->addElement('text', 'copyright', array( - 'label' => _('Copyright:'), - 'class' => 'input_text', - 'filters' => array('StringTrim') - )); + $copyright = new Zend_Form_Element('copyright'); + $copyright->class = 'input_text'; + $copyright->setLabel(_('Copyright:')) + ->setFilters(array('StringTrim')) + ->setValidators(array( + new Zend_Validate_StringLength(array('max' => 512)) + )); + $this->addElement($copyright); // Add isrc number field - $this->addElement('text', 'isrc_number', array( - 'label' => _('ISRC Number:'), - 'class' => 'input_text', - 'filters' => array('StringTrim') - )); + $isrc_number = new Zend_Form_Element('isrc_number'); + $isrc_number->class = 'input_text'; + $isrc_number->setLabel(_('ISRC Number:')) + ->setFilters(array('StringTrim')) + ->setValidators(array( + new Zend_Validate_StringLength(array('max' => 512)) + )); + $this->addElement($isrc_number); // Add website field - $this->addElement('text', 'info_url', array( - 'label' => _('Website:'), - 'class' => 'input_text', - 'filters' => array('StringTrim') - )); + $info_url = new Zend_Form_Element('info_url'); + $info_url->class = 'input_text'; + $info_url->setLabel(_('Website:')) + ->setFilters(array('StringTrim')) + ->setValidators(array( + new Zend_Validate_StringLength(array('max' => 512)) + )); + $this->addElement($info_url); // Add language field - $this->addElement('text', 'language', array( - 'label' => _('Language:'), - 'class' => 'input_text', - 'filters' => array('StringTrim') - )); + $language = new Zend_Form_Element('language'); + $language->class = 'input_text'; + $language->setLabel(_('Language:')) + ->setFilters(array('StringTrim')) + ->setValidators(array( + new Zend_Validate_StringLength(array('max' => 512)) + )); + $this->addElement($language); // Add the submit button $this->addElement('button', 'editmdsave', array( diff --git a/airtime_mvc/application/modules/rest/controllers/MediaController.php b/airtime_mvc/application/modules/rest/controllers/MediaController.php index e4f78652f..704ee2052 100644 --- a/airtime_mvc/application/modules/rest/controllers/MediaController.php +++ b/airtime_mvc/application/modules/rest/controllers/MediaController.php @@ -222,7 +222,6 @@ class Rest_MediaController extends Zend_Rest_Controller $requestData = json_decode($this->getRequest()->getRawBody(), true); $whiteList = $this->removeBlacklistedFieldsFromRequestData($requestData); $whiteList = $this->stripTimeStampFromYearTag($whiteList); - $whiteList = $this->truncateGenreTag($whiteList); if (!$this->validateRequestData($file, $whiteList)) { $file->save(); @@ -382,12 +381,29 @@ class Rest_MediaController extends Zend_Rest_Controller $resp->appendBody("ERROR: Invalid data"); } - private function validateRequestData($file, $whiteList) + private function validateRequestData($file, &$whiteList) { // EditAudioMD form is used here for validation $fileForm = new Application_Form_EditAudioMD(); $fileForm->startForm($file->getDbId()); $fileForm->populate($whiteList); + + /* + * Here we are truncating metadata of any characters greater than the + * max string length set in the database. In the rare case a track's + * genre is more than 64 chars, for example, we don't want to reject + * tracks for that reason + */ + foreach($whiteList as $tag => &$value) { + if ($fileForm->getElement($tag)) { + $stringLengthValidator = $fileForm->getElement($tag)->getValidator('StringLength'); + //$stringLengthValidator will be false if the StringLength validator doesn't exist on the current element + //in which case we don't have to truncate the extra characters + if ($stringLengthValidator) { + $value = substr($value, 0, $stringLengthValidator->getMax()); + } + } + } if (!$fileForm->isValidPartial($whiteList)) { $file->setDbImportStatus(2); @@ -510,20 +526,5 @@ class Rest_MediaController extends Zend_Rest_Controller } return $metadata; } - - /** The genre tag in our cc_files schema is currently a varchar(64). It's possible for MP3 genre tags - * to be longer than that, so we have to truncate longer genres. (We've seen ridiculously long genre tags.) - * @param string array $metadata - */ - private function truncateGenreTag($metadata) - { - if (isset($metadata["genre"])) - { - if (strlen($metadata["genre"]) >= 64) { - $metadata["genre"] = substr($metadata["genre"], 0, 64); - } - } - return $metadata; - } } From d21cb596cd8dd4bf956249f0d8a2b56c755d4b0c Mon Sep 17 00:00:00 2001 From: Albert Santoni Date: Mon, 26 May 2014 18:59:28 -0400 Subject: [PATCH 2/2] CC-5860: Analyzer HTTP requests can hang indefinitely * Log a backtrace when sent the USR2 signal (kill -SIGUSR2 ) * Rigged up up something to strace and restart when we see a request hanging --- .../airtime_analyzer/airtime_analyzer.py | 26 +++++-- .../airtime_analyzer/status_reporter.py | 72 +++++++++++++++++-- 2 files changed, 89 insertions(+), 9 deletions(-) diff --git a/python_apps/airtime_analyzer/airtime_analyzer/airtime_analyzer.py b/python_apps/airtime_analyzer/airtime_analyzer/airtime_analyzer.py index 39f3039f4..56739f200 100644 --- a/python_apps/airtime_analyzer/airtime_analyzer/airtime_analyzer.py +++ b/python_apps/airtime_analyzer/airtime_analyzer/airtime_analyzer.py @@ -4,6 +4,8 @@ import ConfigParser import logging import logging.handlers import sys +import signal +import traceback from functools import partial from metadata_analyzer import MetadataAnalyzer from replaygain_analyzer import ReplayGainAnalyzer @@ -23,6 +25,9 @@ class AirtimeAnalyzerServer: def __init__(self, rmq_config_path, http_retry_queue_path, debug=False): + # Debug console. Access with 'kill -SIGUSR2 ' + signal.signal(signal.SIGUSR2, lambda sig, frame: AirtimeAnalyzerServer.dump_stacktrace()) + # Configure logging self.setup_logging(debug) @@ -30,13 +35,13 @@ class AirtimeAnalyzerServer: rabbitmq_config = self.read_config_file(rmq_config_path) # Start up the StatusReporter process - StatusReporter.start_child_process(http_retry_queue_path) + StatusReporter.start_thread(http_retry_queue_path) # Start listening for RabbitMQ messages telling us about newly - # uploaded files. + # uploaded files. This blocks until we recieve a shutdown signal. self._msg_listener = MessageListener(rabbitmq_config) - StatusReporter.stop_child_process() + StatusReporter.stop_thread() def setup_logging(self, debug): @@ -81,4 +86,17 @@ class AirtimeAnalyzerServer: exit(-1) return config - + + @classmethod + def dump_stacktrace(stack): + ''' Dump a stacktrace for all threads ''' + code = [] + for threadId, stack in sys._current_frames().items(): + code.append("\n# ThreadID: %s" % threadId) + for filename, lineno, name, line in traceback.extract_stack(stack): + code.append('File: "%s", line %d, in %s' % (filename, lineno, name)) + if line: + code.append(" %s" % (line.strip())) + logging.info('\n'.join(code)) + + diff --git a/python_apps/airtime_analyzer/airtime_analyzer/status_reporter.py b/python_apps/airtime_analyzer/airtime_analyzer/status_reporter.py index 0a69461ba..a93ab1f8a 100644 --- a/python_apps/airtime_analyzer/airtime_analyzer/status_reporter.py +++ b/python_apps/airtime_analyzer/airtime_analyzer/status_reporter.py @@ -3,8 +3,12 @@ import json import logging import collections import Queue -import signal +import subprocess import multiprocessing +import time +import sys +import traceback +import os import pickle import threading @@ -84,10 +88,13 @@ def send_http_request(picklable_request, retry_queue): if not isinstance(picklable_request, PicklableHttpRequest): raise TypeError("picklable_request must be a PicklableHttpRequest. Was of type " + type(picklable_request).__name__) try: - prepared_request = picklable_request.create_request() - prepared_request = prepared_request.prepare() + t = threading.Timer(60, alert_hung_request) + t.start() + bare_request = picklable_request.create_request() s = requests.Session() + prepared_request = s.prepare_request(bare_request) r = s.send(prepared_request, timeout=StatusReporter._HTTP_REQUEST_TIMEOUT) + t.cancel() # Watchdog no longer needed. r.raise_for_status() # Raise an exception if there was an http error code returned logging.info("HTTP request sent successfully.") except requests.exceptions.RequestException as e: @@ -105,6 +112,61 @@ def send_http_request(picklable_request, retry_queue): # that breaks our code. I don't want us pickling data that potentially # breaks airtime_analyzer. +def alert_hung_request(): + ''' Temporary function to alert our Airtime developers when we have a request that's + blocked indefinitely. We're working with the python requests developers to figure this + one out. (We need to strace airtime_analyzer to figure out where exactly it's blocked.) + There's some weird circumstance where this can happen, even though we specify a timeout. + ''' + pid = os.getpid() + + # Capture a list of the open file/socket handles so we can interpret the strace log + lsof_log = subprocess.check_output(["lsof -p %s" % str(pid)], shell=True) + + strace_log = "" + # Run strace on us for 10 seconds + try: + subprocess.check_output(["timeout 10 strace -p %s -s 1000 -f -v -o /var/log/airtime/airtime_analyzer_strace.log -ff " % str(pid)], + shell=True) + except subprocess.CalledProcessError as e: # When the timeout fires, it returns a crazy code + strace_log = e.output + pass + + + # Dump a traceback + code = [] + for threadId, stack in sys._current_frames().items(): + code.append("\n# ThreadID: %s" % threadId) + for filename, lineno, name, line in traceback.extract_stack(stack): + code.append('File: "%s", line %d, in %s' % (filename, lineno, name)) + if line: + code.append(" %s" % (line.strip())) + stack_trace = ('\n'.join(code)) + + logging.critical(stack_trace) + logging.critical(strace_log) + logging.critical(lsof_log) + # Exit the program so that upstart respawns us + #sys.exit(-1) #deadlocks :( + subprocess.check_output(["kill -9 %s" % str(pid)], shell=True) # Ugh, avert your eyes + +''' +request_running = False +request_running_lock = threading.Lock() +def is_request_running(): + request_running_lock.acquire() + rr = request_running + request_running_lock.release() + return rr +def set_request_running(is_running): + request_running_lock.acquire() + request_running = is_running + request_running_lock.release() +def is_request_hung(): +''' + + + class StatusReporter(): @@ -122,13 +184,13 @@ class StatusReporter(): _request_process = None @classmethod - def start_child_process(self, http_retry_queue_path): + def start_thread(self, http_retry_queue_path): StatusReporter._request_process = threading.Thread(target=process_http_requests, args=(StatusReporter._ipc_queue,http_retry_queue_path)) StatusReporter._request_process.start() @classmethod - def stop_child_process(self): + def stop_thread(self): logging.info("Terminating status_reporter process") #StatusReporter._request_process.terminate() # Triggers SIGTERM on the child process StatusReporter._ipc_queue.put("shutdown") # Special trigger