Changeset 168

Show
Ignore:
Timestamp:
03/12/08 23:23:54 (5 years ago)
Author:
Slynderdale
Message:

Added a couple bug fixes and improvements. Now when someone posts a url using the https scheme, it will check to see if openssl is loaded before processing that url, otherwise it skips it. Also originally, if someone posted a url that was in the cache, the URL would halt at that url even if there were others after it in the same message, now it simply skips the cached urls. Also the url is now cleaned, stripped of its scheme and anchors before checking and storing it to the cache.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • trunk/Phergie/Plugin/Url.php

    r165 r168  
    7878            foreach ($matches as $m) { 
    7979                $url = rtrim($m[1], '), ].?!'); 
     80                $scheme = strtolower(parse_url($url, PHP_URL_SCHEME)); 
     81                if ($scheme == 'https' && !extension_loaded('openssl')) 
     82                        continue; 
     83 
    8084                // Convert url 
    8185                $tinyUrl = $this->tinyUrl($url); 
     
    8387                                // Prevent spamfest 
    8488                                if($this->checkURLCache($url, $tinyUrl)) 
    85                                         return; 
     89                                        continue; 
    8690 
    8791                $opts = array('http' => 
     
    9498                $context = stream_context_create($opts); 
    9599 
     100                $this->debug("Opening Stream for $url"); 
    96101                                $previousHandler = set_error_handler(array($this, 'errorHandler')); 
    97102                if ($page = fopen($url, 'r', false, $context)) { 
     103                        $this->debug("Opened Stream for $url"); 
    98104                    $data = stream_get_meta_data($page); 
    99105                    foreach ($data['wrapper_data'] as $header) { 
     
    181187        $source = $this->event->getSource(); 
    182188 
    183         // Transform the URL and TinyURL into a HEX CRC32 checksum to prevent potential problems 
    184         // and minimize the size of the cache for less cache bloat. 
    185         $url = dechex(crc32($url)); $tiny = dechex(crc32($tiny)); 
     189        /** 
     190        * Transform the URL and TinyURL into a HEX CRC32 checksum to prevent potential problems 
     191        * and minimize the size of the cache for less cache bloat. 
     192        */ 
     193        $url = $this->getURLChecksum($url);  $tiny = $this->getURLChecksum($tiny); 
    186194        $cache = array 
    187195        ( 
     
    191199 
    192200        $expire = $this->expire; 
    193         // If cache expiration is enabled, check to see if the given url has expired in the cache 
    194         // If expire is disabled, simply check to see if the url is listed 
     201        /** 
     202        * If cache expiration is enabled, check to see if the given url has expired in the cache 
     203        * If expire is disabled, simply check to see if the url is listed 
     204        */ 
    195205        if (($expire > 0 && (($cache['url'] + $expire) > time() || ($cache['tiny'] + $expire) > time())) || 
    196206            ($expire <= 0 && (isset($cache['url']) || isset($cache['tiny'])))) { 
     
    213223        $source = $this->event->getSource(); 
    214224 
    215         // Transform the URL and TinyURL into a HEX CRC32 checksum to prevent potential problems 
    216         // and minimize the size of the cache for less cache bloat. 
    217         $url = dechex(crc32($url)); $tiny = dechex(crc32($tiny)); 
     225        /** 
     226        * Transform the URL and TinyURL into a HEX CRC32 checksum to prevent potential problems 
     227        * and minimize the size of the cache for less cache bloat. 
     228        */ 
     229        $url = $this->getURLChecksum($url);  $tiny = $this->getURLChecksum($tiny); 
    218230        $time = time(); 
    219231 
     
    252264 
    253265        /** 
    254          * Custom error handler meant to handle 404 errors and such 
    255          */ 
     266        * Custom error handler meant to handle 404 errors and such 
     267        */ 
    256268        public function errorHandler($errno, $errstr, $errfile, $errline) 
    257269        { 
     
    263275            return false; 
    264276        } 
     277 
     278        /** 
     279        * Takes a url, parses and cleans the URL without of all the junk 
     280        * and then return the hex checksum of the url. 
     281        */ 
     282        public function getURLChecksum($url) { 
     283                $parsed = parse_url($url); 
     284                if (is_array($parsed)) { 
     285                    $url = $parsed['host']; 
     286                    $url .= isset($parsed['port']) ? ':'.$parsed['port'] : ''; 
     287                    if(isset($parsed['path'])) 
     288                    { 
     289                        $url .= (substr($parsed['path'], 0, 1) == '/') ? $parsed['path'] : ('/'.$parsed['path']); 
     290                    } 
     291                    $url .= isset($parsed['query']) ? '?'.$parsed['query'] : ''; 
     292                } 
     293                return dechex(crc32($url)); 
     294        } 
    265295}