root/trunk/Phergie/Plugin/FeedTicker.php @ 138

Revision 138, 8.8 KB (checked in by Slynderdale, 5 years ago)

Fixes #32 and adds transliteration support to Phergie's base as well as making URL and FeedTicker? use the new transliterate function.

Line 
1<?php
2
3/**
4* @see Phergie_Plugin_Abstract_Cron
5*/
6require_once 'Phergie/Plugin/Abstract/Cron.php';
7
8/**
9* Sporadically syndicates items from a given set of feeds to the channel.
10*/
11class Phergie_Plugin_FeedTicker extends Phergie_Plugin_Abstract_Cron
12{
13    /**
14    * Delay in seconds for syndicating feeds, set to 30 minutes
15    *
16    * @var int
17    */
18    protected $defaultDelay = 1800;
19
20    /**
21    * Feed data
22    *
23    * @see run()
24    * @var array
25    */
26    protected $feeds = null;
27
28    /**
29    * Filter feed title data
30    *
31    * @see run()
32    * @var array
33    */
34    protected $filterTitle = null;
35
36    /**
37    * Filter feed url data
38    *
39    * @see run()
40    * @var array
41    */
42    protected $filterUrl = null;
43
44    /**
45    * Filter article title data
46    *
47    * @see run()
48    * @var array
49    */
50    protected $filterArticle = null;
51
52    /**
53    * Cache of the last update to check for new entries
54    *
55    * @see run()
56    * @var array
57    */
58    protected $cache = null;
59
60    /**
61    * Queue of items to be dispatched
62    *
63    * @see checkQueue()
64    * @see run()
65    * @var array
66    */
67    protected $queue = array();
68
69    /**
70    * Time at which the checkQueue method will be allowed to dispatch
71    * another item
72    *
73    * @see checkQueue()
74    * @var int
75    */
76    protected $nextOutput = null;
77
78    /**
79    * Feed output format; can use the variables %title%, %link% and %feed% to
80    * display article titles, links and feed titles
81    *
82    * @var string
83    */
84    protected $format = '%title% [ %link% ]';
85
86    /**
87    * Processes necessary configuration setting values.
88    *
89    * @return void
90    */
91    public function init()
92    {
93        // Global Feed Title, Feed URL and Article Title Filters
94        $globalTitle = trim($this->getPluginIni('filter_title'));
95        $globalUrl = trim($this->getPluginIni('filter_url'));
96        $globalArticle = trim($this->getPluginIni('filter_article'));
97
98        $i = 0;
99        $this->feeds = array();
100        do {
101            // Feed and Chan data
102            $feed = $this->getPluginIni('feed' . $i);
103            $chans = $this->getPluginIni('chans' . $i);
104            // Feed Title, Feed URL and Article Title Filter Data
105            $filterTitle = $this->getPluginIni('filter_title' . $i);
106            $filterUrl = $this->getPluginIni('filter_url' . $i);
107            $filterArticle = $this->getPluginIni('filter_article' . $i);
108
109            if (!empty($feed) && !empty($chans)) {
110                $this->feeds[] = array($feed, preg_split('#[\s\r\n,]+#', $chans));
111                // Feed Title, Feed URL and Article Title Filters
112                $filterTrim = "| \t\n\r\0\v\0xa0";
113                $this->filterTitle[] = trim(implode('|', array($globalTitle, $filterTitle)), $filterTrim);
114                $this->filterUrl[] = trim(implode('|', array($globalUrl, $filterUrl)), $filterTrim);
115                $this->filterArticle[] = trim(implode('|', array($globalArticle, $filterArticle)), $filterTrim);
116            }
117        } while (++$i < 10);
118        if($this->getPluginIni('format') != null) {
119            $this->format = $this->getPluginIni('format');
120        }
121    }
122
123    /**
124    * Overrides Cron method to add checkQueue to this, so that whenever a user
125    * sends a message to a channel in which the bot is present, the queue is
126    * checked. This behavior attempts to prevent the bot from spamming the
127    * channel if no users are conversing and everything is retained in the
128    * queue until channel activity is detected.
129    *
130    * @return void
131    */
132    public function onPrivmsg()
133    {
134        parent::onPrivmsg();
135
136        $this->checkQueue();
137    }
138
139    /**
140    * Checks the queue for new items and send out one if the necessary time
141    * limit has elapsed since the last item was sent. Uses the format setting
142    * to format new items.
143    *
144    * @return void
145    */
146    protected function checkQueue()
147    {
148        if (!empty ($this->queue) && time() > $this->nextOutput) {
149            list ($title, $url, $chans, $feedTitle) = array_shift($this->queue);
150            foreach ($chans as $chan) {
151                $this->doPrivmsg(
152                    $chan,
153                    str_replace(
154                        array('%title%', '%link%', '%feed%'),
155                        array($title, $url, $feedTitle),
156                        $this->format
157                    )
158                );
159            }
160            $this->nextOutput = time() + 60;
161        }
162    }
163
164    /**
165    * Retrieves feeds and fills the queue with new items that were not
166    * previously in the cache.
167    *
168    * Technical data if you want to extend this method to parse another type
169    * of source differently :
170    *
171    * Feeds are arrays such as: array("feed url", array("chan1", "chan2"))
172    *
173    * Cache management is up to you and is only internally used by this
174    * method.
175    *
176    * The queue is an array of items to be dispatched, these items are as
177    * such :
178    *   array("item title", "item url", array("chan1", "chan2"), "feed title")
179    *
180    * @return void
181    */
182    protected function run()
183    {
184        $retrieved = array();
185
186        $context = stream_context_create(array('http' => array(
187            'timeout' => 5,
188            'user_agent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9b3) Gecko/2008020514 Firefox/3.0b3'
189        )));
190
191        // Retrieve each feed
192        foreach ($this->feeds as $id => $feed) {
193            list ($url, $chans) = $feed;
194
195            $content = @file_get_contents($url, null, $context);
196            if (empty ($content)) {
197                $this->debug('Feed empty: ' . $url);
198                continue;
199            }
200
201            // RSS/RDF Feed
202            if (strpos($content, '<rss version=') !== false || strpos($content, '<rdf') !== false) {
203                $xml = new SimpleXMLElement($content);
204                $feedTitle = (string) $xml->channel->title;
205                foreach ($xml->channel->item as $item) {
206                    $retrieved[$id][] = array((string) $item->title, (string) $item->link, $chans, $feedTitle);
207                }
208            } elseif(strpos($content, '/Atom') !== false) { // ATOM Feed
209                $xml = new SimpleXMLElement($content);
210                $feedTitle = (string) $xml->title;
211                foreach($xml->entry as $item) {
212                    $retrieved[$id][] = array((string) $item->title, (string) $item->link[0]['href'], $chans, $feedTitle);
213                }
214            } else { // Trouble
215                $this->debug('Feed format unrecognized: ' . $url);
216                continue;
217            }
218        }
219
220        // First run, fill cache and don't output anything
221        if($this->cache === null) {
222            $this->cache = $retrieved;
223            return;
224        }
225
226        // Latter run, compare retrieved data to cache and queue new items
227        foreach ($retrieved as $id => $articles) {
228            $articles = array_reverse($articles);
229            foreach ($articles as $article) {
230                if ((!isset ($this->cache[$id])
231                    || array_search($article, $this->cache[$id]) === false) &&
232                    $this->filterCheck($id, $article[0], $article[1], $article[3])) {
233                    // Decode and trim article title
234                    $article[0] = $this->decode($article[0], 40);
235                    // Convert link with TinyURL if required
236                    $article[1] = $this->tinyUrl($article[1]);
237                    // Decode and trim feed title
238                    $article[3] = $this->decode($article[3], 20);
239                    $this->queue[] = $article;
240                }
241            }
242            // Cache current data for next run
243            $this->cache[$id] = $articles;
244        }
245    }
246
247    // Checks the given feed Title and URL as well as article title against the feed filters
248    protected function filterCheck($id, $title, $url, $article) {
249          // Feed Title, Feed URL and Article Title Filters
250        $filterTitle = $this->filterTitle[$id];
251        $filterUrl = $this->filterUrl[$id];
252        $filterArticle = $this->filterArticle[$id];
253
254        // Check against the filters if any are set
255          if (($filterTitle && preg_match('{'.$filterTitle.'}im', $title, $match)) ||
256              ($filterUrl && preg_match('{'.$filterUrl.'}im', $url, $match)) ||
257              ($filterArticle && preg_match('{'.$filterArticle.'}im', $article, $match))) {
258              return false;
259          }
260          return true;
261    }
262
263    /**
264    * Transliterates a UTF-8 string into corresponding ASCII characters and
265    * truncates and appends an ellipsis to the string if it exceeds a given
266    * length.
267    *
268    * @param string $str String to decode
269    * @param int $trim Maximum string length, optional
270    * @return string
271    */
272    protected function decode($str, $trim = null) {
273          $out = $this->decodeTranslit($str);
274        if($trim > 0) {
275            $out = substr($out, 0, $trim) . (strlen($out) > $trim ? '...' : '');
276        }
277        return $out;
278    }
279}
Note: See TracBrowser for help on using the browser.