Initial work on automatic ingest for imported podcasts

This commit is contained in:
Duncan Sommerville 2015-10-15 14:44:17 -04:00
parent a2d725f2b9
commit bddc121c2d
7 changed files with 157 additions and 20 deletions

View file

@ -28,6 +28,7 @@ require_once "GoogleAnalytics.php";
require_once "Timezone.php";
require_once "CeleryManager.php";
require_once "TaskManager.php";
require_once "PodcastManager.php";
require_once "UsabilityHints.php";
require_once __DIR__.'/models/formatters/LengthFormatter.php';
require_once __DIR__.'/common/widgets/Table.php';

View file

@ -17,6 +17,11 @@ class CeleryManager {
*/
private static $_CELERY_RESULTS_EXCHANGE = 'celeryresults';
/**
* @var PropelCollection cache of any pending CeleryTasks results for a service or task
*/
private static $_pendingTasks;
/**
* Connect to the Celery daemon via amqp
*
@ -79,7 +84,7 @@ class CeleryManager {
// If the message isn't ready yet (Celery hasn't finished the task), throw an exception.
if ($message == FALSE) {
if (self::_checkMessageTimeout($task)) {
if (static::_checkMessageTimeout($task)) {
// If the task times out, mark it as failed. We don't want to remove the
// track reference here in case it was a deletion that failed, for example.
$task->setDbStatus(CELERY_FAILED_STATUS)->save();
@ -104,8 +109,8 @@ class CeleryManager {
* @return bool true if there are any pending tasks, otherwise false
*/
public static function isBrokerTaskQueueEmpty($taskName = "", $serviceName = "") {
$pendingTasks = self::_getPendingTasks($taskName, $serviceName);
return empty($pendingTasks);
self::$_pendingTasks = static::_getPendingTasks($taskName, $serviceName);
return empty(self::$_pendingTasks);
}
/**
@ -119,11 +124,12 @@ class CeleryManager {
* @param string $serviceName the name of the service to poll for
*/
public static function pollBrokerTaskQueue($taskName = "", $serviceName = "") {
$pendingTasks = self::_getPendingTasks($taskName, $serviceName);
$pendingTasks = empty(self::$_pendingTasks) ? static::_getPendingTasks($taskName, $serviceName)
: self::$_pendingTasks;
foreach ($pendingTasks as $task) {
try {
$message = self::_getTaskMessage($task);
self::_processTaskMessage($task, $message);
$message = static::_getTaskMessage($task);
static::_processTaskMessage($task, $message);
} catch (CeleryTimeoutException $e) {
Logging::warn($e->getMessage());
} catch (Exception $e) {

View file

@ -0,0 +1,74 @@
<?php
class PodcastManager {
/**
* @var int how often, in seconds, to check for and ingest new podcast episodes
*/
private static $_PODCAST_POLL_INTERVAL_SECONDS = 3600; // 1 hour
/**
* Check whether $_PODCAST_POLL_INTERVAL_SECONDS have passed since the last call to
* downloadNewestEpisodes
*
* @return bool true if $_PODCAST_POLL_INTERVAL_SECONDS has passed since the last check
*/
public static function hasPodcastPollIntervalPassed() {
$lastPolled = Application_Model_Preference::getPodcastPollLock();
return empty($lastPolled) || (microtime(true) > $lastPolled + self::$_PODCAST_POLL_INTERVAL_SECONDS);
}
/**
* Find all podcasts flagged for automatic ingest whose most recent episode has
* yet to be downloaded and download it with Celery
*
* @throws InvalidPodcastException
* @throws PodcastNotFoundException
*/
public static function downloadNewestEpisodes() {
$autoIngestPodcasts = static::_getAutoIngestPodcasts();
$service = new Application_Service_PodcastEpisodeService();
$episodes = array();
foreach ($autoIngestPodcasts as $podcast) {
/** @var ImportedPodcast $podcast */
$podcastArray = Application_Service_PodcastService::getPodcastById($podcast->getDbId());
// A bit hacky... sort the episodes by publication date to get the most recent
usort($podcastArray["episodes"], array(static::class, "_sortByEpisodePubDate"));
$episodeData = $podcastArray["episodes"][0];
$episode = PodcastEpisodesQuery::create()->findOneByDbEpisodeGuid($episodeData["guid"]);
// Make sure there's no existing episode placeholder or import, and that the data is non-empty
if (empty($episode) && !empty($episodeData)) {
$placeholder = $service->addPodcastEpisodePlaceholder($podcast->getDbId(), $episodeData);
array_push($episodes, $placeholder);
}
}
$service->downloadEpisodes($episodes);
Application_Model_Preference::setPodcastPollLock(microtime(true));
}
/**
* Find all podcasts flagged for automatic ingest
*
* @return PropelObjectCollection collection of ImportedPodcast objects
* flagged for automatic ingest
*/
protected static function _getAutoIngestPodcasts() {
return ImportedPodcastQuery::create()
->filterByDbAutoIngest(true)
->find();
}
/**
* Custom sort function for podcast episodes
*
* @param array $a first episode array to compare
* @param array $b second episode array to compare
* @return bool boolean for ordering
*/
protected static function _sortByEpisodePubDate($a, $b) {
if ($a["pub_date"] == $b["pub_date"]) return 0;
return ($a["pub_date"] < $b["pub_date"]) ? 1 : -1; // Descending order
}
}

View file

@ -149,7 +149,7 @@ final class TaskManager {
}
/**
* Interface AirtimeTask Interface for task operations - also acts as task type ENUM
* Interface AirtimeTask Interface for task operations
*/
interface AirtimeTask {
@ -215,6 +215,29 @@ class CeleryTask implements AirtimeTask {
}
/**
* Class PodcastTask
*/
class PodcastTask implements AirtimeTask {
/**
* Check whether or not the podcast polling interval has passed
*
* @return bool true if the podcast polling interval has passed
*/
public function shouldBeRun() {
return PodcastManager::hasPodcastPollIntervalPassed();
}
/**
* Download the latest episode for all podcasts flagged for automatic ingest
*/
public function run() {
PodcastManager::downloadNewestEpisodes();
}
}
/**
* Class TaskFactory Factory class to abstract task instantiation
*/
@ -227,6 +250,7 @@ class TaskFactory {
const UPGRADE = "upgrade";
const CELERY = "celery";
const PODCAST = "podcast";
/**
* @var array map of arbitrary identifiers to class names to be instantiated reflectively
@ -234,6 +258,7 @@ class TaskFactory {
public static $tasks = array(
"upgrade" => "UpgradeTask",
"celery" => "CeleryTask",
"podcast" => "PodcastTask",
);
/**

View file

@ -1502,4 +1502,12 @@ class Application_Model_Preference
{
self::setValue("whats_new_dialog_viewed", $value, true);
}
public static function getPodcastPollLock() {
return self::getValue("podcast_poll_lock");
}
public static function setPodcastPollLock($value) {
self::setValue("podcast_poll_lock", $value);
}
}

View file

@ -123,7 +123,8 @@ class Application_Service_PodcastEpisodeService extends Application_Service_Thir
}
/**
* Given an array of episodes, extract the download URLs and send them to Celery
* Given an array of episodes, store them in the database as placeholder objects until
* they can be processed by Celery
*
* @param int $podcastId Podcast object identifier
* @param array $episodes array of podcast episodes
@ -133,17 +134,38 @@ class Application_Service_PodcastEpisodeService extends Application_Service_Thir
public function addPodcastEpisodePlaceholders($podcastId, $episodes) {
$storedEpisodes = array();
foreach ($episodes as $episode) {
$e = new PodcastEpisodes();
$e->setDbPodcastId($podcastId);
$e->setDbDownloadUrl($episode["enclosure"]["link"]);
$e->setDbEpisodeGuid($episode["guid"]);
$e->setDbPublicationDate($episode["pub_date"]);
$e->save();
$e = $this->addPodcastEpisodePlaceholder($podcastId, $episode);
array_push($storedEpisodes, $e);
}
return $storedEpisodes;
}
/**
* Given an episode, store it in the database as a placeholder object until
* it can be processed by Celery
*
* @param int $podcastId Podcast object identifier
* @param array $episode array of podcast episode data
*
* @return PodcastEpisodes the stored PodcastEpisodes object
*/
public function addPodcastEpisodePlaceholder($podcastId, $episode) {
// We need to check whether the array is parsed directly from the SimplePie
// feed object, or whether it's passed in as json
if ($episode["enclosure"] instanceof SimplePie_Enclosure) {
$url = $episode["enclosure"]->get_link();
} else {
$url = $episode["enclosure"]["link"];
}
$e = new PodcastEpisodes();
$e->setDbPodcastId($podcastId);
$e->setDbDownloadUrl($url);
$e->setDbEpisodeGuid($episode["guid"]);
$e->setDbPublicationDate($episode["pub_date"]);
$e->save();
return $e;
}
/**
* Given an array of episodes, extract the IDs and download URLs and send them to Celery
*
@ -156,6 +178,7 @@ class Application_Service_PodcastEpisodeService extends Application_Service_Thir
array_push($episodeUrls, array("id" => $episode->getDbId(),
"url" => $episode->getDbDownloadUrl()));
}
if (empty($episodeUrls)) return;
$this->_download($episodeUrls);
}

View file

@ -37,7 +37,7 @@ class Application_Service_PodcastService
/**
* Returns parsed rss feed, or false if the given URL cannot be downloaded
*
* @param $podcastUrl String containing the podcast feed URL
* @param string $feedUrl String containing the podcast feed URL
*
* @return mixed
*/
@ -57,9 +57,9 @@ class Application_Service_PodcastService
/** Creates a Podcast object from the given podcast URL.
* This is used by our Podcast REST API
*
* @param $feedUrl Podcast RSS Feed Url
* @param string $feedUrl Podcast RSS Feed Url
*
* @return array - Podcast Array with a full list of episodes
* @return array Podcast Array with a full list of episodes
* @throws Exception
* @throws InvalidPodcastException
* @throws PodcastLimitReachedException