From 4d08953d88f69dc0a1e92bf2c1c0b2be76d5c301 Mon Sep 17 00:00:00 2001 From: helpfulrobot Date: Sat, 26 Dec 2015 09:37:25 +1300 Subject: [PATCH] Converted to PSR-2 --- code/controllers/CMSExternalLinks.php | 97 ++--- code/jobs/CheckExternalLinksJob.php | 50 +-- code/model/BrokenExternalLink.php | 111 +++--- code/model/BrokenExternalPageTrack.php | 38 +- code/model/BrokenExternalPageTrackStatus.php | 220 ++++++----- code/reports/BrokenExternalLinksReport.php | 140 +++---- code/tasks/CheckExternalLinksTask.php | 395 ++++++++++--------- code/tasks/CurlLinkChecker.php | 83 ++-- code/tasks/LinkChecker.php | 19 +- tests/ExternalLinksTest.php | 311 ++++++++------- 10 files changed, 771 insertions(+), 693 deletions(-) diff --git a/code/controllers/CMSExternalLinks.php b/code/controllers/CMSExternalLinks.php index 2b0efb6..fe0be6b 100644 --- a/code/controllers/CMSExternalLinks.php +++ b/code/controllers/CMSExternalLinks.php @@ -1,54 +1,61 @@ response); - $this->response - ->addHeader('Content-Type', 'application/json') - ->addHeader('Content-Encoding', 'UTF-8') - ->addHeader('X-Content-Type-Options', 'nosniff'); + /* + * Respond to Ajax requests for info on a running job + * + * @return string JSON string detailing status of the job + */ + public function getJobStatus() + { + // Set headers + HTTP::set_cache_age(0); + HTTP::add_cache_headers($this->response); + $this->response + ->addHeader('Content-Type', 'application/json') + ->addHeader('Content-Encoding', 'UTF-8') + ->addHeader('X-Content-Type-Options', 'nosniff'); - // Format status - $track = BrokenExternalPageTrackStatus::get_latest(); - if($track) return json_encode(array( - 'TrackID' => $track->ID, - 'Status' => $track->Status, - 'Completed' => $track->getCompletedPages(), - 'Total' => $track->getTotalPages() - )); - } + // Format status + $track = BrokenExternalPageTrackStatus::get_latest(); + if ($track) { + return json_encode(array( + 'TrackID' => $track->ID, + 'Status' => $track->Status, + 'Completed' => $track->getCompletedPages(), + 'Total' => $track->getTotalPages() + )); + } + } - /* - * Starts a broken external link check - */ - public function start() { - // return if the a job is already running - $status = BrokenExternalPageTrackStatus::get_latest(); - if ($status && $status->Status == 'Running') return; + /* + * Starts a broken external link check + */ + public function start() + { + // return if the a job is already running + $status = BrokenExternalPageTrackStatus::get_latest(); + if ($status && $status->Status == 'Running') { + return; + } - // Create a new job - if (class_exists('QueuedJobService')) { - // Force the creation of a new run - BrokenExternalPageTrackStatus::create_status(); - $checkLinks = new CheckExternalLinksJob(); - singleton('QueuedJobService')->queueJob($checkLinks); - } else { - //TODO this hangs as it waits for the connection to be released - // should return back and continue processing - // http://us3.php.net/manual/en/features.connection-handling.php - $task = CheckExternalLinksTask::create(); - $task->runLinksCheck(); - } - } + // Create a new job + if (class_exists('QueuedJobService')) { + // Force the creation of a new run + BrokenExternalPageTrackStatus::create_status(); + $checkLinks = new CheckExternalLinksJob(); + singleton('QueuedJobService')->queueJob($checkLinks); + } else { + //TODO this hangs as it waits for the connection to be released + // should return back and continue processing + // http://us3.php.net/manual/en/features.connection-handling.php + $task = CheckExternalLinksTask::create(); + $task->runLinksCheck(); + } + } } diff --git a/code/jobs/CheckExternalLinksJob.php b/code/jobs/CheckExternalLinksJob.php index 3f4311a..a8c6cdb 100644 --- a/code/jobs/CheckExternalLinksJob.php +++ b/code/jobs/CheckExternalLinksJob.php @@ -1,34 +1,40 @@ runLinksCheck(1); - $this->currentStep = $track->CompletedPages; - $this->totalSteps = $track->TotalPages; - $this->isComplete = $track->Status === 'Completed'; - } + public function getSignature() + { + return md5(get_class($this)); + } + /** + * Check an individual page + */ + public function process() + { + $task = CheckExternalLinksTask::create(); + $track = $task->runLinksCheck(1); + $this->currentStep = $track->CompletedPages; + $this->totalSteps = $track->TotalPages; + $this->isComplete = $track->Status === 'Completed'; + } } diff --git a/code/model/BrokenExternalLink.php b/code/model/BrokenExternalLink.php index 18c484f..6214f0c 100644 --- a/code/model/BrokenExternalLink.php +++ b/code/model/BrokenExternalLink.php @@ -6,66 +6,69 @@ * @method BrokenExternalPageTrack Track() * @method BrokenExternalPageTrackStatus Status() */ -class BrokenExternalLink extends DataObject { +class BrokenExternalLink extends DataObject +{ - private static $db = array( - 'Link' => 'Varchar(2083)', // 2083 is the maximum length of a URL in Internet Explorer. - 'HTTPCode' =>'Int' - ); + private static $db = array( + 'Link' => 'Varchar(2083)', // 2083 is the maximum length of a URL in Internet Explorer. + 'HTTPCode' =>'Int' + ); - private static $has_one = array( - 'Track' => 'BrokenExternalPageTrack', - 'Status' => 'BrokenExternalPageTrackStatus' - ); + private static $has_one = array( + 'Track' => 'BrokenExternalPageTrack', + 'Status' => 'BrokenExternalPageTrackStatus' + ); - private static $summary_fields = array( - 'Created' => 'Checked', - 'Link' => 'External Link', - 'HTTPCodeDescription' => 'HTTP Error Code', - 'Page.Title' => 'Page link is on' - ); + private static $summary_fields = array( + 'Created' => 'Checked', + 'Link' => 'External Link', + 'HTTPCodeDescription' => 'HTTP Error Code', + 'Page.Title' => 'Page link is on' + ); - private static $searchable_fields = array( - 'HTTPCode' => array('title' => 'HTTP Code') - ); + private static $searchable_fields = array( + 'HTTPCode' => array('title' => 'HTTP Code') + ); - /** - * @return SiteTree - */ - public function Page() { - return $this->Track()->Page(); - } + /** + * @return SiteTree + */ + public function Page() + { + return $this->Track()->Page(); + } - public function canEdit($member = false) { - return false; - } + public function canEdit($member = false) + { + return false; + } - public function canView($member = false) { - $member = $member ? $member : Member::currentUser(); - $codes = array('content-authors', 'administrators'); - return Permission::checkMember($member, $codes); - } + public function canView($member = false) + { + $member = $member ? $member : Member::currentUser(); + $codes = array('content-authors', 'administrators'); + return Permission::checkMember($member, $codes); + } - /** - * Retrieve a human readable description of a response code - * - * @return string - */ - public function getHTTPCodeDescription() { - $code = $this->HTTPCode; - if(empty($code)) { - // Assume that $code = 0 means there was no response - $description = _t('BrokenExternalLink.NOTAVAILABLE', 'Server Not Available'); - } elseif( - ($descriptions = Config::inst()->get('SS_HTTPResponse', 'status_codes')) - && isset($descriptions[$code]) - ) { - $description = $descriptions[$code]; - } else { - $description = _t('BrokenExternalLink.UNKNOWNRESPONSE', 'Unknown Response Code'); - } - return sprintf("%d (%s)", $code, $description); - } + /** + * Retrieve a human readable description of a response code + * + * @return string + */ + public function getHTTPCodeDescription() + { + $code = $this->HTTPCode; + if (empty($code)) { + // Assume that $code = 0 means there was no response + $description = _t('BrokenExternalLink.NOTAVAILABLE', 'Server Not Available'); + } elseif ( + ($descriptions = Config::inst()->get('SS_HTTPResponse', 'status_codes')) + && isset($descriptions[$code]) + ) { + $description = $descriptions[$code]; + } else { + $description = _t('BrokenExternalLink.UNKNOWNRESPONSE', 'Unknown Response Code'); + } + return sprintf("%d (%s)", $code, $description); + } } - - diff --git a/code/model/BrokenExternalPageTrack.php b/code/model/BrokenExternalPageTrack.php index 828b541..db80eb9 100644 --- a/code/model/BrokenExternalPageTrack.php +++ b/code/model/BrokenExternalPageTrack.php @@ -3,26 +3,28 @@ /** * Represents a track for a single page */ -class BrokenExternalPageTrack extends DataObject { +class BrokenExternalPageTrack extends DataObject +{ - private static $db = array( - 'Processed' => 'Boolean' - ); + private static $db = array( + 'Processed' => 'Boolean' + ); - private static $has_one = array( - 'Page' => 'SiteTree', - 'Status' => 'BrokenExternalPageTrackStatus' - ); + private static $has_one = array( + 'Page' => 'SiteTree', + 'Status' => 'BrokenExternalPageTrackStatus' + ); - private static $has_many = array( - 'BrokenLinks' => 'BrokenExternalLink' - ); + private static $has_many = array( + 'BrokenLinks' => 'BrokenExternalLink' + ); - /** - * @return SiteTree - */ - public function Page() { - return Versioned::get_by_stage('SiteTree', 'Stage') - ->byID($this->PageID); - } + /** + * @return SiteTree + */ + public function Page() + { + return Versioned::get_by_stage('SiteTree', 'Stage') + ->byID($this->PageID); + } } diff --git a/code/model/BrokenExternalPageTrackStatus.php b/code/model/BrokenExternalPageTrackStatus.php index 141b996..abe9e87 100644 --- a/code/model/BrokenExternalPageTrackStatus.php +++ b/code/model/BrokenExternalPageTrackStatus.php @@ -8,121 +8,133 @@ * @property int $TotalPages Get total pages count * @property int $CompletedPages Get completed pages count */ -class BrokenExternalPageTrackStatus extends DataObject { +class BrokenExternalPageTrackStatus extends DataObject +{ - private static $db = array( - 'Status' => 'Enum("Completed, Running", "Running")', - 'JobInfo' => 'Varchar(255)' - ); + private static $db = array( + 'Status' => 'Enum("Completed, Running", "Running")', + 'JobInfo' => 'Varchar(255)' + ); - private static $has_many = array( - 'TrackedPages' => 'BrokenExternalPageTrack', - 'BrokenLinks' => 'BrokenExternalLink' - ); + private static $has_many = array( + 'TrackedPages' => 'BrokenExternalPageTrack', + 'BrokenLinks' => 'BrokenExternalLink' + ); - /** - * Get the latest track status - * - * @return self - */ - public static function get_latest() { - return self::get() - ->sort('ID', 'DESC') - ->first(); - } + /** + * Get the latest track status + * + * @return self + */ + public static function get_latest() + { + return self::get() + ->sort('ID', 'DESC') + ->first(); + } - /** - * Gets the list of Pages yet to be checked - * - * @return DataList - */ - public function getIncompletePageList() { - $pageIDs = $this - ->getIncompleteTracks() - ->column('PageID'); - if($pageIDs) return Versioned::get_by_stage('SiteTree', 'Stage') - ->byIDs($pageIDs); - } + /** + * Gets the list of Pages yet to be checked + * + * @return DataList + */ + public function getIncompletePageList() + { + $pageIDs = $this + ->getIncompleteTracks() + ->column('PageID'); + if ($pageIDs) { + return Versioned::get_by_stage('SiteTree', 'Stage') + ->byIDs($pageIDs); + } + } - /** - * Get the list of incomplete BrokenExternalPageTrack - * - * @return DataList - */ - public function getIncompleteTracks() { - return $this - ->TrackedPages() - ->filter('Processed', 0); - } + /** + * Get the list of incomplete BrokenExternalPageTrack + * + * @return DataList + */ + public function getIncompleteTracks() + { + return $this + ->TrackedPages() + ->filter('Processed', 0); + } - /** - * Get total pages count - */ - public function getTotalPages() { - return $this->TrackedPages()->count(); - } + /** + * Get total pages count + */ + public function getTotalPages() + { + return $this->TrackedPages()->count(); + } - /** - * Get completed pages count - */ - public function getCompletedPages() { - return $this - ->TrackedPages() - ->filter('Processed', 1) - ->count(); - } + /** + * Get completed pages count + */ + public function getCompletedPages() + { + return $this + ->TrackedPages() + ->filter('Processed', 1) + ->count(); + } - /** - * Returns the latest run, or otherwise creates a new one - * - * @return self - */ - public static function get_or_create() { - // Check the current status - $status = self::get_latest(); - if ($status && $status->Status == 'Running') { - $status->updateStatus(); - return $status; - } + /** + * Returns the latest run, or otherwise creates a new one + * + * @return self + */ + public static function get_or_create() + { + // Check the current status + $status = self::get_latest(); + if ($status && $status->Status == 'Running') { + $status->updateStatus(); + return $status; + } - return self::create_status(); - } + return self::create_status(); + } - /* - * Create and prepare a new status - * - * @return self - */ - public static function create_status() { - // If the script is to be started create a new status - $status = self::create(); - $status->updateJobInfo('Creating new tracking object'); + /* + * Create and prepare a new status + * + * @return self + */ + public static function create_status() + { + // If the script is to be started create a new status + $status = self::create(); + $status->updateJobInfo('Creating new tracking object'); - // Setup all pages to test - $pageIDs = Versioned::get_by_stage('SiteTree', 'Stage') - ->column('ID'); - foreach ($pageIDs as $pageID) { - $trackPage = BrokenExternalPageTrack::create(); - $trackPage->PageID = $pageID; - $trackPage->StatusID = $status->ID; - $trackPage->write(); - } + // Setup all pages to test + $pageIDs = Versioned::get_by_stage('SiteTree', 'Stage') + ->column('ID'); + foreach ($pageIDs as $pageID) { + $trackPage = BrokenExternalPageTrack::create(); + $trackPage->PageID = $pageID; + $trackPage->StatusID = $status->ID; + $trackPage->write(); + } - return $status; - } + return $status; + } - public function updateJobInfo($message) { - $this->JobInfo = $message; - $this->write(); - } + public function updateJobInfo($message) + { + $this->JobInfo = $message; + $this->write(); + } - /** - * Self check status - */ - public function updateStatus() { - if ($this->CompletedPages == $this->TotalPages) { - $this->Status = 'Completed'; - $this->updateJobInfo('Setting to completed'); - } - } -} \ No newline at end of file + /** + * Self check status + */ + public function updateStatus() + { + if ($this->CompletedPages == $this->TotalPages) { + $this->Status = 'Completed'; + $this->updateJobInfo('Setting to completed'); + } + } +} diff --git a/code/reports/BrokenExternalLinksReport.php b/code/reports/BrokenExternalLinksReport.php index 233d898..6b1be33 100644 --- a/code/reports/BrokenExternalLinksReport.php +++ b/code/reports/BrokenExternalLinksReport.php @@ -6,78 +6,86 @@ * @subpackage content */ -class BrokenExternalLinksReport extends SS_Report { +class BrokenExternalLinksReport extends SS_Report +{ - /** - * Returns the report title - * - * @return string - */ - public function title() { - return _t('ExternalBrokenLinksReport.EXTERNALBROKENLINKS', "External broken links report"); - } + /** + * Returns the report title + * + * @return string + */ + public function title() + { + return _t('ExternalBrokenLinksReport.EXTERNALBROKENLINKS', "External broken links report"); + } - public function columns() { - return array( - "Created" => "Checked", - 'Link' => array( - 'title' => 'External Link', - 'formatting' => function($value, $item) { - return sprintf( - '%s', - Convert::raw2att($item->Link), - Convert::raw2xml($item->Link) - ); - } - ), - 'HTTPCodeDescription' => 'HTTP Error Code', - "Title" => array( - "title" => 'Page link is on', - 'formatting' => function($value, $item) { - $page = $item->Page(); - return sprintf( - '%s', - Convert::raw2att($page->CMSEditLink()), - Convert::raw2xml($page->Title) - ); - } - ) - ); - } + public function columns() + { + return array( + "Created" => "Checked", + 'Link' => array( + 'title' => 'External Link', + 'formatting' => function ($value, $item) { + return sprintf( + '%s', + Convert::raw2att($item->Link), + Convert::raw2xml($item->Link) + ); + } + ), + 'HTTPCodeDescription' => 'HTTP Error Code', + "Title" => array( + "title" => 'Page link is on', + 'formatting' => function ($value, $item) { + $page = $item->Page(); + return sprintf( + '%s', + Convert::raw2att($page->CMSEditLink()), + Convert::raw2xml($page->Title) + ); + } + ) + ); + } - /** - * Alias of columns(), to support the export to csv action - * in {@link GridFieldExportButton} generateExportFileData method. - * @return array - */ - public function getColumns() { - return $this->columns(); - } + /** + * Alias of columns(), to support the export to csv action + * in {@link GridFieldExportButton} generateExportFileData method. + * @return array + */ + public function getColumns() + { + return $this->columns(); + } - public function sourceRecords() { - $track = BrokenExternalPageTrackStatus::get_latest(); - if ($track) return $track->BrokenLinks(); - return new ArrayList(); - } + public function sourceRecords() + { + $track = BrokenExternalPageTrackStatus::get_latest(); + if ($track) { + return $track->BrokenLinks(); + } + return new ArrayList(); + } - public function getCMSFields() { - Requirements::javascript('externallinks/javascript/BrokenExternalLinksReport.js'); - $fields = parent::getCMSFields(); + public function getCMSFields() + { + Requirements::javascript('externallinks/javascript/BrokenExternalLinksReport.js'); + $fields = parent::getCMSFields(); - $reportResultSpan = '

'; - $reportResult = new LiteralField('ResultTitle', $reportResultSpan); - $fields->push($reportResult); + $reportResultSpan = '

'; + $reportResult = new LiteralField('ResultTitle', $reportResultSpan); + $fields->push($reportResult); - $button = ''; - $runReportButton = new LiteralField( - 'runReport', - sprintf( - $button, - _t('ExternalBrokenLinksReport.RUNREPORT', 'Create new report') - ) - ); - $fields->push($runReportButton); + $button = ''; + $runReportButton = new LiteralField( + 'runReport', + sprintf( + $button, + _t('ExternalBrokenLinksReport.RUNREPORT', 'Create new report') + ) + ); + $fields->push($runReportButton); - return $fields; - } + return $fields; + } } diff --git a/code/tasks/CheckExternalLinksTask.php b/code/tasks/CheckExternalLinksTask.php index 6c3fadf..4b0f258 100644 --- a/code/tasks/CheckExternalLinksTask.php +++ b/code/tasks/CheckExternalLinksTask.php @@ -1,188 +1,213 @@ '%$LinkChecker' - ); - - /** - * @var bool - */ - protected $silent = false; - - /** - * @var LinkChecker - */ - protected $linkChecker; - - protected $title = 'Checking broken External links in the SiteTree'; - - protected $description = 'A task that records external broken links in the SiteTree'; - - protected $enabled = true; - - /** - * Log a message - * - * @param string $message - */ - protected function log($message) { - if(!$this->silent) Debug::message($message); - } - - public function run($request) { - $this->runLinksCheck(); - } - /** - * Turn on or off message output - * - * @param bool $silent - */ - public function setSilent($silent) { - $this->silent = $silent; - } - - /** - * @param LinkChecker $linkChecker - */ - public function setLinkChecker(LinkChecker $linkChecker) { - $this->linkChecker = $linkChecker; - } - - /** - * @return LinkChecker - */ - public function getLinkChecker() { - return $this->linkChecker; - } - - /** - * Check the status of a single link on a page - * - * @param BrokenExternalPageTrack $pageTrack - * @param DOMNode $link - */ - protected function checkPageLink(BrokenExternalPageTrack $pageTrack, DOMNode $link) { - $class = $link->getAttribute('class'); - $href = $link->getAttribute('href'); - $markedBroken = preg_match('/\b(ss-broken)\b/', $class); - - // Check link - $httpCode = $this->linkChecker->checkLink($href); - if($httpCode === null) return; // Null link means uncheckable, such as an internal link - - // If this code is broken then mark as such - if($foundBroken = $this->isCodeBroken($httpCode)) { - // Create broken record - $brokenLink = new BrokenExternalLink(); - $brokenLink->Link = $href; - $brokenLink->HTTPCode = $httpCode; - $brokenLink->TrackID = $pageTrack->ID; - $brokenLink->StatusID = $pageTrack->StatusID; // Slight denormalisation here for performance reasons - $brokenLink->write(); - } - - // Check if we need to update CSS class, otherwise return - if($markedBroken == $foundBroken) return; - if($foundBroken) { - $class .= ' ss-broken'; - } else { - $class = preg_replace('/\s*\b(ss-broken)\b\s*/', ' ', $class); - } - $link->setAttribute('class', trim($class)); - } - - /** - * Determine if the given HTTP code is "broken" - * - * @param int $httpCode - * @return bool True if this is a broken code - */ - protected function isCodeBroken($httpCode) { - // Null represents no request attempted - if($httpCode === null) return false; - - // do we have any whitelisted codes - $ignoreCodes = Config::inst()->get('CheckExternalLinks', 'IgnoreCodes'); - if(is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) return false; - - // Check if code is outside valid range - return $httpCode < 200 || $httpCode > 302; - } - - /** - * Runs the links checker and returns the track used - * - * @param int $limit Limit to number of pages to run, or null to run all - * @return BrokenExternalPageTrackStatus - */ - public function runLinksCheck($limit = null) { - // Check the current status - $status = BrokenExternalPageTrackStatus::get_or_create(); - - // Calculate pages to run - $pageTracks = $status->getIncompleteTracks(); - if($limit) $pageTracks = $pageTracks->limit($limit); - - // Check each page - foreach ($pageTracks as $pageTrack) { - // Flag as complete - $pageTrack->Processed = 1; - $pageTrack->write(); - - // Check value of html area - $page = $pageTrack->Page(); - $this->log("Checking {$page->Title}"); - $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); - if (!$htmlValue->isValid()) continue; - - // Check each link - $links = $htmlValue->getElementsByTagName('a'); - foreach($links as $link) { - $this->checkPageLink($pageTrack, $link); - } - - // Update content of page based on link fixes / breakages - $htmlValue->saveHTML(); - $page->Content = $htmlValue->getContent(); - $page->write(); - - // Once all links have been created for this page update HasBrokenLinks - $count = $pageTrack->BrokenLinks()->count(); - $this->log("Found {$count} broken links"); - if($count) { - // Bypass the ORM as syncLinkTracking does not allow you to update HasBrokenLink to true - DB::query(sprintf( - 'UPDATE "SiteTree" SET "HasBrokenLink" = 1 WHERE "ID" = \'%d\'', - intval($pageTrack->ID) - )); - } - } - - $status->updateJobInfo('Updating completed pages'); - $status->updateStatus(); - return $status; - } - - private function updateCompletedPages($trackID = 0) { - $noPages = BrokenExternalPageTrack::get() - ->filter(array( - 'TrackID' => $trackID, - 'Processed' => 1 - )) - ->count(); - $track = BrokenExternalPageTrackStatus::get_latest(); - $track->CompletedPages = $noPages; - $track->write(); - return $noPages; - } - - private function updateJobInfo($message) { - $track = BrokenExternalPageTrackStatus::get_latest(); - if($track) { - $track->JobInfo = $message; - $track->write(); - } - } +class CheckExternalLinksTask extends BuildTask +{ + + private static $dependencies = array( + 'LinkChecker' => '%$LinkChecker' + ); + + /** + * @var bool + */ + protected $silent = false; + + /** + * @var LinkChecker + */ + protected $linkChecker; + + protected $title = 'Checking broken External links in the SiteTree'; + + protected $description = 'A task that records external broken links in the SiteTree'; + + protected $enabled = true; + + /** + * Log a message + * + * @param string $message + */ + protected function log($message) + { + if (!$this->silent) { + Debug::message($message); + } + } + + public function run($request) + { + $this->runLinksCheck(); + } + /** + * Turn on or off message output + * + * @param bool $silent + */ + public function setSilent($silent) + { + $this->silent = $silent; + } + + /** + * @param LinkChecker $linkChecker + */ + public function setLinkChecker(LinkChecker $linkChecker) + { + $this->linkChecker = $linkChecker; + } + + /** + * @return LinkChecker + */ + public function getLinkChecker() + { + return $this->linkChecker; + } + + /** + * Check the status of a single link on a page + * + * @param BrokenExternalPageTrack $pageTrack + * @param DOMNode $link + */ + protected function checkPageLink(BrokenExternalPageTrack $pageTrack, DOMNode $link) + { + $class = $link->getAttribute('class'); + $href = $link->getAttribute('href'); + $markedBroken = preg_match('/\b(ss-broken)\b/', $class); + + // Check link + $httpCode = $this->linkChecker->checkLink($href); + if ($httpCode === null) { + return; + } // Null link means uncheckable, such as an internal link + + // If this code is broken then mark as such + if ($foundBroken = $this->isCodeBroken($httpCode)) { + // Create broken record + $brokenLink = new BrokenExternalLink(); + $brokenLink->Link = $href; + $brokenLink->HTTPCode = $httpCode; + $brokenLink->TrackID = $pageTrack->ID; + $brokenLink->StatusID = $pageTrack->StatusID; // Slight denormalisation here for performance reasons + $brokenLink->write(); + } + + // Check if we need to update CSS class, otherwise return + if ($markedBroken == $foundBroken) { + return; + } + if ($foundBroken) { + $class .= ' ss-broken'; + } else { + $class = preg_replace('/\s*\b(ss-broken)\b\s*/', ' ', $class); + } + $link->setAttribute('class', trim($class)); + } + + /** + * Determine if the given HTTP code is "broken" + * + * @param int $httpCode + * @return bool True if this is a broken code + */ + protected function isCodeBroken($httpCode) + { + // Null represents no request attempted + if ($httpCode === null) { + return false; + } + + // do we have any whitelisted codes + $ignoreCodes = Config::inst()->get('CheckExternalLinks', 'IgnoreCodes'); + if (is_array($ignoreCodes) && in_array($httpCode, $ignoreCodes)) { + return false; + } + + // Check if code is outside valid range + return $httpCode < 200 || $httpCode > 302; + } + + /** + * Runs the links checker and returns the track used + * + * @param int $limit Limit to number of pages to run, or null to run all + * @return BrokenExternalPageTrackStatus + */ + public function runLinksCheck($limit = null) + { + // Check the current status + $status = BrokenExternalPageTrackStatus::get_or_create(); + + // Calculate pages to run + $pageTracks = $status->getIncompleteTracks(); + if ($limit) { + $pageTracks = $pageTracks->limit($limit); + } + + // Check each page + foreach ($pageTracks as $pageTrack) { + // Flag as complete + $pageTrack->Processed = 1; + $pageTrack->write(); + + // Check value of html area + $page = $pageTrack->Page(); + $this->log("Checking {$page->Title}"); + $htmlValue = Injector::inst()->create('HTMLValue', $page->Content); + if (!$htmlValue->isValid()) { + continue; + } + + // Check each link + $links = $htmlValue->getElementsByTagName('a'); + foreach ($links as $link) { + $this->checkPageLink($pageTrack, $link); + } + + // Update content of page based on link fixes / breakages + $htmlValue->saveHTML(); + $page->Content = $htmlValue->getContent(); + $page->write(); + + // Once all links have been created for this page update HasBrokenLinks + $count = $pageTrack->BrokenLinks()->count(); + $this->log("Found {$count} broken links"); + if ($count) { + // Bypass the ORM as syncLinkTracking does not allow you to update HasBrokenLink to true + DB::query(sprintf( + 'UPDATE "SiteTree" SET "HasBrokenLink" = 1 WHERE "ID" = \'%d\'', + intval($pageTrack->ID) + )); + } + } + + $status->updateJobInfo('Updating completed pages'); + $status->updateStatus(); + return $status; + } + + private function updateCompletedPages($trackID = 0) + { + $noPages = BrokenExternalPageTrack::get() + ->filter(array( + 'TrackID' => $trackID, + 'Processed' => 1 + )) + ->count(); + $track = BrokenExternalPageTrackStatus::get_latest(); + $track->CompletedPages = $noPages; + $track->write(); + return $noPages; + } + + private function updateJobInfo($message) + { + $track = BrokenExternalPageTrackStatus::get_latest(); + if ($track) { + $track->JobInfo = $message; + $track->write(); + } + } } diff --git a/code/tasks/CurlLinkChecker.php b/code/tasks/CurlLinkChecker.php index a84439d..0600fb7 100644 --- a/code/tasks/CurlLinkChecker.php +++ b/code/tasks/CurlLinkChecker.php @@ -3,47 +3,54 @@ /** * Check links using curl */ -class CurlLinkChecker implements LinkChecker { +class CurlLinkChecker implements LinkChecker +{ - /** - * Return cache - * - * @return Zend_Cache_Frontend - */ - protected function getCache() { - return SS_Cache::factory( - __CLASS__, - 'Output', - array('automatic_serialization' => true) - ); - } + /** + * Return cache + * + * @return Zend_Cache_Frontend + */ + protected function getCache() + { + return SS_Cache::factory( + __CLASS__, + 'Output', + array('automatic_serialization' => true) + ); + } - /** - * Determine the http status code for a given link - * - * @param string $href URL to check - * @return int HTTP status code, or null if not checkable (not a link) - */ - public function checkLink($href) { - // Skip non-external links - if(!preg_match('/^https?[^:]*:\/\//', $href)) return null; + /** + * Determine the http status code for a given link + * + * @param string $href URL to check + * @return int HTTP status code, or null if not checkable (not a link) + */ + public function checkLink($href) + { + // Skip non-external links + if (!preg_match('/^https?[^:]*:\/\//', $href)) { + return null; + } - // Check if we have a cached result - $cacheKey = md5($href); - $result = $this->getCache()->load($cacheKey); - if($result !== false) return $result; + // Check if we have a cached result + $cacheKey = md5($href); + $result = $this->getCache()->load($cacheKey); + if ($result !== false) { + return $result; + } - // No cached result so just request - $handle = curl_init($href); - curl_setopt($handle, CURLOPT_RETURNTRANSFER, TRUE); - curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5); - curl_setopt($handle, CURLOPT_TIMEOUT, 10); - curl_exec($handle); - $httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE); - curl_close($handle); + // No cached result so just request + $handle = curl_init($href); + curl_setopt($handle, CURLOPT_RETURNTRANSFER, true); + curl_setopt($handle, CURLOPT_CONNECTTIMEOUT, 5); + curl_setopt($handle, CURLOPT_TIMEOUT, 10); + curl_exec($handle); + $httpCode = curl_getinfo($handle, CURLINFO_HTTP_CODE); + curl_close($handle); - // Cache result - $this->getCache()->save($httpCode, $cacheKey); - return $httpCode; - } + // Cache result + $this->getCache()->save($httpCode, $cacheKey); + return $httpCode; + } } diff --git a/code/tasks/LinkChecker.php b/code/tasks/LinkChecker.php index 629f177..10c9995 100644 --- a/code/tasks/LinkChecker.php +++ b/code/tasks/LinkChecker.php @@ -3,13 +3,14 @@ /** * Provides an interface for checking that a link is valid */ -interface LinkChecker { - - /** - * Determine the http status code for a given link - * - * @param string $href URL to check - * @return int HTTP status code, or null if not checkable (not a link) - */ - public function checkLink($href); +interface LinkChecker +{ + + /** + * Determine the http status code for a given link + * + * @param string $href URL to check + * @return int HTTP status code, or null if not checkable (not a link) + */ + public function checkLink($href); } diff --git a/tests/ExternalLinksTest.php b/tests/ExternalLinksTest.php index 754aeaf..bf4b54f 100644 --- a/tests/ExternalLinksTest.php +++ b/tests/ExternalLinksTest.php @@ -1,157 +1,164 @@ skipTest = true; - return $this->markTestSkipped("These tests need the Phockito module installed to run"); - } - - // Mock link checker - $checker = Phockito::mock('LinkChecker'); - Phockito::when($checker) - ->checkLink('http://www.working.com') - ->return(200); - - Phockito::when($checker) - ->checkLink('http://www.broken.com/url/thing') // 404 on working site - ->return(404); - - Phockito::when($checker) - ->checkLink('http://www.broken.com') // 403 on working site - ->return(403); - - Phockito::when($checker) - ->checkLink('http://www.nodomain.com') // no ping - ->return(0); - - Phockito::when($checker) - ->checkLink('/internal/link') - ->return(null); - - Phockito::when($checker) - ->checkLink('[sitetree_link,id=9999]') - ->return(null); - - Phockito::when($checker) - ->checkLink('home') - ->return(null); - - Phockito::when($checker) - ->checkLink('broken-internal') - ->return(null); - - Phockito::when($checker) - ->checkLink('[sitetree_link,id=1]') - ->return(null); - - Phockito::when($checker) - ->checkLink(Hamcrest_Matchers::anything()) // anything else is 404 - ->return(404); - - Injector::inst()->registerService($checker, 'LinkChecker'); - } - - public function tearDown() { - Injector::unnest(); - parent::tearDown(); - } - - public function testLinks() { - // Run link checker - $task = CheckExternalLinksTask::create(); - $task->setSilent(true); // Be quiet during the test! - $task->runLinksCheck(); - - // Get all links checked - $status = BrokenExternalPageTrackStatus::get_latest(); - $this->assertEquals('Completed', $status->Status); - $this->assertEquals(5, $status->TotalPages); - $this->assertEquals(5, $status->CompletedPages); - - // Check all pages have had the correct HTML adjusted - for($i = 1; $i <= 5; $i++) { - $page = $this->objFromFixture('ExternalLinksTest_Page', 'page'.$i); - $this->assertNotEmpty($page->Content); - $this->assertEquals( - $page->ExpectedContent, - $page->Content, - "Assert that the content of page{$i} has been updated" - ); - } - - // Check that the correct report of broken links is generated - $links = $status - ->BrokenLinks() - ->sort('Link'); - - $this->assertEquals(4, $links->count()); - $this->assertEquals( - array( - 'http://www.broken.com', - 'http://www.broken.com/url/thing', - 'http://www.broken.com/url/thing', - 'http://www.nodomain.com' - ), - array_values($links->map('ID', 'Link')->toArray()) - ); - - // Check response codes are correct - $expected = array( - 'http://www.broken.com' => 403, - 'http://www.broken.com/url/thing' => 404, - 'http://www.nodomain.com' => 0 - ); - $actual = $links->map('Link', 'HTTPCode')->toArray(); - $this->assertEquals($expected, $actual); - - // Check response descriptions are correct - i18n::set_locale('en_NZ'); - $expected = array( - 'http://www.broken.com' => '403 (Forbidden)', - 'http://www.broken.com/url/thing' => '404 (Not Found)', - 'http://www.nodomain.com' => '0 (Server Not Available)' - ); - $actual = $links->map('Link', 'HTTPCodeDescription')->toArray(); - $this->assertEquals($expected, $actual); - } - - /** - * Test that broken links appears in the reports list - */ - public function testReportExists() { - $reports = SS_Report::get_reports(); - $reportNames = array(); - foreach($reports as $report) { - $reportNames[] = $report->class; - } - $this->assertContains('BrokenExternalLinksReport',$reportNames, - 'BrokenExternalLinksReport is in reports list'); - } +class ExternalLinksTest extends SapphireTest +{ + + protected static $fixture_file = 'ExternalLinksTest.yml'; + + protected $extraDataObjects = array( + 'ExternalLinksTest_Page' + ); + + public function setUpOnce() + { + if (class_exists('Phockito')) { + Phockito::include_hamcrest(false); + } + + parent::setUpOnce(); + } + + public function setUp() + { + parent::setUp(); + + Injector::nest(); + + // Check dependencies + if (!class_exists('Phockito')) { + $this->skipTest = true; + return $this->markTestSkipped("These tests need the Phockito module installed to run"); + } + + // Mock link checker + $checker = Phockito::mock('LinkChecker'); + Phockito::when($checker) + ->checkLink('http://www.working.com') + ->return(200); + + Phockito::when($checker) + ->checkLink('http://www.broken.com/url/thing') // 404 on working site + ->return(404); + + Phockito::when($checker) + ->checkLink('http://www.broken.com') // 403 on working site + ->return(403); + + Phockito::when($checker) + ->checkLink('http://www.nodomain.com') // no ping + ->return(0); + + Phockito::when($checker) + ->checkLink('/internal/link') + ->return(null); + + Phockito::when($checker) + ->checkLink('[sitetree_link,id=9999]') + ->return(null); + + Phockito::when($checker) + ->checkLink('home') + ->return(null); + + Phockito::when($checker) + ->checkLink('broken-internal') + ->return(null); + + Phockito::when($checker) + ->checkLink('[sitetree_link,id=1]') + ->return(null); + + Phockito::when($checker) + ->checkLink(Hamcrest_Matchers::anything()) // anything else is 404 + ->return(404); + + Injector::inst()->registerService($checker, 'LinkChecker'); + } + + public function tearDown() + { + Injector::unnest(); + parent::tearDown(); + } + + public function testLinks() + { + // Run link checker + $task = CheckExternalLinksTask::create(); + $task->setSilent(true); // Be quiet during the test! + $task->runLinksCheck(); + + // Get all links checked + $status = BrokenExternalPageTrackStatus::get_latest(); + $this->assertEquals('Completed', $status->Status); + $this->assertEquals(5, $status->TotalPages); + $this->assertEquals(5, $status->CompletedPages); + + // Check all pages have had the correct HTML adjusted + for ($i = 1; $i <= 5; $i++) { + $page = $this->objFromFixture('ExternalLinksTest_Page', 'page'.$i); + $this->assertNotEmpty($page->Content); + $this->assertEquals( + $page->ExpectedContent, + $page->Content, + "Assert that the content of page{$i} has been updated" + ); + } + + // Check that the correct report of broken links is generated + $links = $status + ->BrokenLinks() + ->sort('Link'); + + $this->assertEquals(4, $links->count()); + $this->assertEquals( + array( + 'http://www.broken.com', + 'http://www.broken.com/url/thing', + 'http://www.broken.com/url/thing', + 'http://www.nodomain.com' + ), + array_values($links->map('ID', 'Link')->toArray()) + ); + + // Check response codes are correct + $expected = array( + 'http://www.broken.com' => 403, + 'http://www.broken.com/url/thing' => 404, + 'http://www.nodomain.com' => 0 + ); + $actual = $links->map('Link', 'HTTPCode')->toArray(); + $this->assertEquals($expected, $actual); + + // Check response descriptions are correct + i18n::set_locale('en_NZ'); + $expected = array( + 'http://www.broken.com' => '403 (Forbidden)', + 'http://www.broken.com/url/thing' => '404 (Not Found)', + 'http://www.nodomain.com' => '0 (Server Not Available)' + ); + $actual = $links->map('Link', 'HTTPCodeDescription')->toArray(); + $this->assertEquals($expected, $actual); + } + + /** + * Test that broken links appears in the reports list + */ + public function testReportExists() + { + $reports = SS_Report::get_reports(); + $reportNames = array(); + foreach ($reports as $report) { + $reportNames[] = $report->class; + } + $this->assertContains('BrokenExternalLinksReport', $reportNames, + 'BrokenExternalLinksReport is in reports list'); + } } -class ExternalLinksTest_Page extends Page implements TestOnly { - private static $db = array( - 'ExpectedContent' => 'HTMLText' - ); +class ExternalLinksTest_Page extends Page implements TestOnly +{ + private static $db = array( + 'ExpectedContent' => 'HTMLText' + ); }