root/trunk/Phergie/Plugin/Url.php @ 145

Revision 145, 7.4 KB (checked in by Slynderdale, 5 years ago)

Updated URL to have it have both a limit to the number of URLs that are cached and also have them expire after a certain period of time. Also URL now has per channel cacheing instead of a global cache.

Line 
1<?php
2
3/**
4* @see Phergie_Plugin_Abstract_Base
5*/
6require_once 'Phergie/Plugin/Abstract/Base.php';
7
8/**
9* Monitors incoming messages for instances of URLs and responds with messages
10* containing relevant information about detected URLs.
11*/
12class Phergie_Plugin_Url extends Phergie_Plugin_Abstract_Base
13{
14    /**
15    * Links output format
16    *
17    * Can use the variables %title% and %link% in it to display page titles
18    * and links
19    *
20    * @var string
21    */
22    protected $format = '%title% [ %link% ]';
23
24    /**
25    * Url cache to prevent spamming, especially with multiple bots on the same channel
26    */
27    protected $urlCache = array();
28    protected $tinyCache = array();
29
30    /**
31    * The time in seconds to store the cached entries
32    * Setting it to 0 or below disables the cache expiration
33    */
34    protected $expire = 1800;
35
36    /**
37    * The number of entries to keep in the cache at one time per channel
38    * Setting it to 0 or below disables the cache limit
39    */
40    protected $limit = 10;
41
42    /**
43    * Initializes settings
44    *
45    * @return void
46    */
47    public function init()
48    {
49        $format = $this->getPluginIni('format');
50        if ($format) {
51            $this->format = $format;
52        }
53    }
54
55    /**
56    * Checks an incoming message for the presence of a URL and, if one is
57    * found, responds with its title if it is an HTML document and the
58    * TinyURL equivalent of its original URL if it meets length requirements.
59    *
60    * @return void
61    */
62    public function onPrivmsg()
63    {
64        // URL Match
65        if (preg_match_all('#(https?://(?:[a-z0-9_-]+\.)+[a-z0-9]{1,6}[^\s]*)#is', $this->event->getArgument(1), $matches, PREG_SET_ORDER)) {
66            $titleLength = $this->getPluginIni('title_length');
67            foreach ($matches as $m) {
68                $url = rtrim($m[1], '), ].?!');
69                // Convert url
70                $tinyUrl = $this->tinyUrl($url);
71
72                // Prevent spamfest
73                if($this->checkURLCache($url, $tinyUrl))
74                    return;
75
76                $opts = array('http' =>
77                    array(
78                        'timeout' => 4,
79                        'method' => 'GET',
80                        'user_agent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.12) Gecko/20080201 Firefox/2.0.0.12'
81                    )
82                );
83                $context = stream_context_create($opts);
84
85                if ($page = @fopen($url, 'r', false, $context)) {
86                    $data = stream_get_meta_data($page);
87                    foreach ($data['wrapper_data'] as $header) {
88                        if (preg_match('/^Content-Type: ([^;]+)/', $header, $match)
89                            && ! preg_match('#^(text/x?html|application/xhtml+xml)$#', $match[1])) {
90                            $title = $match[1];
91                        }
92                    }
93                    if (!isset ($title)) {
94                        $content = '';
95                        while ($chunk = fread($page, 512)) {
96                            $content .= $chunk;
97                            if (preg_match('#<title[^>]*>([^<]*)#is', $content, $m)) {
98                                $content .= fread($page, 256);
99                                preg_match('#<title[^>]*>([^<]*)#is', $content, $m);
100                                $title = preg_replace('#\s+#', ' ', $m[1]);
101                                $title = $this->decode($title, $titleLength);
102                                break;
103                            }
104                            if (preg_match('#</head>|<body#i', $content)) {
105                                break;
106                            }
107                        }
108                    }
109                    fclose($page);
110                }
111
112                if (!isset($title)) {
113                    if ($tinyUrl === $url) {
114                        unset($tinyUrl, $url);
115                        return;
116                    }
117                    $title = '[ No Title ]';
118                }
119
120                $this->doPrivmsg(
121                    $this->event->getSource(),
122                    str_replace(
123                        array('%title%', '%link%'),
124                        array($title, $tinyUrl),
125                        $this->format
126                    )
127                );
128
129                // Update cache
130                $this->updateURLCache($url, $tinyUrl);
131                unset($title, $tinyUrl, $title);
132            }
133        }
134    }
135
136    /**
137    * Checks a given URL and TinyURL against the cache to verify if they were
138    * previously posted on the channel.
139    *
140    * @param string $url The URL to check against
141    * @param string $tiny The TinyURL to check against
142    * @return bool
143    */
144    protected function checkURLCache($url, $tiny) {
145        $source = $this->event->getSource();
146
147        // Transform the URL and TinyURL into a HEX CRC32 checksum to prevent potential problems
148        // and minimize the size of the cache for less cache bloat.
149        $url = dechex(crc32($url)); $tiny = dechex(crc32($tiny));
150        $cache = array('url' => $this->urlCache[$source][$url], 'tiny' => $this->tinyCache[$source][$tiny]);
151
152        $expire = $this->expire;
153        // If cache expiration is enabled, check to see if the given url has expired in the cache
154        // If expire is disabled, simply check to see if the url is listed
155        if (($expire > 0 && (($cache['url'] + $expire) > time() || ($cache['tiny'] + $expire) > time())) ||
156            ($expire <= 0 && (isset($cache['url']) || isset($cache['tiny'])))) {
157            unset($cache, $url, $tiny, $expire);
158            return true;
159        }
160        unset($cache, $url, $tiny, $expire);
161        return false;
162    }
163
164    /**
165    * Updates the cache and adds the given URL and TinyURL to the cache. It
166    * also handles cleaning the cache of old entries as well.
167    *
168    * @param string $url The URL to add to the cache
169    * @param string $tiny The TinyURL to add to the cache
170    * @return bool
171    */
172    protected function updateURLCache($url, $tiny) {
173        $source = $this->event->getSource();
174
175        // Transform the URL and TinyURL into a HEX CRC32 checksum to prevent potential problems
176        // and minimize the size of the cache for less cache bloat.
177        $url = dechex(crc32($url)); $tiny = dechex(crc32($tiny));
178        $time = time();
179
180        // Handle the URL cache and remove old entries that surpass the limit if enabled
181        $this->urlCache[$source][$url] = $time;
182        if ($this->limit > 0 && count($this->urlCache[$source]) > $this->limit) {
183            asort($this->urlCache[$source], SORT_NUMERIC);
184            array_shift($this->urlCache[$source]);
185        }
186
187        // Handle the TinyURL cache and remove old entries that surpass the limit if enabled
188        $this->tinyCache[$source][$url] = $time;
189        if ($this->limit > 0 && count($this->tinyCache[$source]) > $this->limit) {
190            asort($this->tinyCache[$source], SORT_NUMERIC);
191            array_shift($this->tinyCache[$source]);
192        }
193        unset($url, $tiny, $time);
194    }
195
196    /**
197    * Transliterates a UTF-8 string into corresponding ASCII characters and
198    * truncates and appends an ellipsis to the string if it exceeds a given
199    * length.
200    *
201    * @param string $str String to decode
202    * @param int $trim Maximum string length, optional
203    * @return string
204    */
205    protected function decode($str, $trim = null) {
206        $out = $this->decodeTranslit($str);
207        if($trim > 0) {
208            $out = substr($out, 0, $trim) . (strlen($out) > $trim ? '...' : '');
209        }
210        return $out;
211    }
212}
Note: See TracBrowser for help on using the browser.