From 138db7691d12aead24eca5eb36f5c9bf520e1cca Mon Sep 17 00:00:00 2001 From: ejb Date: Wed, 15 Apr 2015 16:44:02 +0100 Subject: [PATCH] Simplified proxy configuration --- Readme.md | 6 +++++- api/scraper.php | 21 +++++++-------------- config_examples/HOW TO CONFIGURE.md | 2 +- config_examples/proxy.example.php | 18 ++++++++---------- index.html | 2 +- 5 files changed, 22 insertions(+), 27 deletions(-) diff --git a/Readme.md b/Readme.md index 8615a61..7437b31 100644 --- a/Readme.md +++ b/Readme.md @@ -102,10 +102,14 @@ Then in `schema.json`, specify the function's name in the *pattern* field of you ## Running behind a proxy -Add a file called *proxy.php* to the config directory with a function called `file_get_contents_with_proxy`. This function should accept a URL as an argument, and return the HTML of the specified URL. +Add a file called *proxy.php* to the config directory. Within the file, use [`stream_context_set_default`](http://php.net/manual/en/function.stream-context-set-default.php) to configure the proxy. ## Changelog +### 2.0.0 + +- Simplified proxy configuration + ### 1.0.1 - New content types: 'url' and 'strict-url' diff --git a/api/scraper.php b/api/scraper.php index 3acccac..1befbee 100644 --- a/api/scraper.php +++ b/api/scraper.php @@ -10,7 +10,6 @@ include '../config/proxy.php'; } - function checkUrl( $url ) { if ( file_exists('../config/custom.php') ) { @@ -21,11 +20,7 @@ function checkUrl( $url ) { return false; } - if (function_exists('file_get_contents_with_proxy')) { - $file = file_get_contents_with_proxy( $url ); - } else { - $file = file_get_contents( $url ); - } + $file = file_get_contents( $url ); if (!$file) { return false; @@ -105,15 +100,13 @@ function checkUrl( $url ) { $strict = true; } $item_url = $schema[$i]['contents']; - if (function_exists('file_get_contents_with_proxy')) { - file_get_contents_with_proxy( $item_url ); - } else { + if (!empty($item_url)) { file_get_contents( $item_url ); - } - $headerStatus = checkHeaderStatus($http_response_header, $strict); - $schema[$i]['_requestHeader'] = $http_response_header; - if ( !is_null($http_response_header) && ($headerStatus === false) ) { - $schema[$i]['ok'] = false; + $headerStatus = checkHeaderStatus($http_response_header, $strict); + $schema[$i]['_requestHeader'] = $http_response_header; + if ( !is_null($http_response_header) && ($headerStatus === false) ) { + $schema[$i]['ok'] = false; + } } } diff --git a/config_examples/HOW TO CONFIGURE.md b/config_examples/HOW TO CONFIGURE.md index e6f1cf1..79e7cd8 100644 --- a/config_examples/HOW TO CONFIGURE.md +++ b/config_examples/HOW TO CONFIGURE.md @@ -12,4 +12,4 @@ Used to specify custom selectors and validators. See Readme for more info. ## proxy.php -Add a file called *proxy.php* to the config directory with a function called `file_get_contents_with_proxy`. This function should accept a URL as an argument, and return the HTML of the specified URL. +Add a file called *proxy.php* to the config directory. Within the file, use [`stream_context_set_default`](http://php.net/manual/en/function.stream-context-set-default.php) to configure the proxy. diff --git a/config_examples/proxy.example.php b/config_examples/proxy.example.php index 0f33b78..2c7f0b2 100644 --- a/config_examples/proxy.example.php +++ b/config_examples/proxy.example.php @@ -4,15 +4,13 @@ This is an example of how one might use the meta checker with a proxy. */ -if (file_exists('../my/internal/proxy.php')) { - include_once('../my/internal/proxy.php'); - function file_get_contents_with_proxy($url) { - return myInternalProxy($url); - } -} else { - function file_get_contents_with_proxy($url){ - return file_get_contents($url); - } -} +stream_context_set_default( + array( + 'http' => array( + 'proxy' => "tcp://proxy.mycompany.net:80", + 'request_fulluri' => true, + ) + ) +); ?> \ No newline at end of file diff --git a/index.html b/index.html index 23223b2..224fad5 100644 --- a/index.html +++ b/index.html @@ -54,7 +54,7 @@

{{group}}

- +