| 1 | <?php
|
|---|
| 2 | /**
|
|---|
| 3 | * A collection of functions for manipulating strings
|
|---|
| 4 | *
|
|---|
| 5 | * PHP version 5
|
|---|
| 6 | *
|
|---|
| 7 | * LICENSE: Hotaru CMS is free software: you can redistribute it and/or
|
|---|
| 8 | * modify it under the terms of the GNU General Public License as
|
|---|
| 9 | * published by the Free Software Foundation, either version 3 of
|
|---|
| 10 | * the License, or (at your option) any later version.
|
|---|
| 11 | *
|
|---|
| 12 | * Hotaru CMS is distributed in the hope that it will be useful, but WITHOUT
|
|---|
| 13 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|---|
| 14 | * FITNESS FOR A PARTICULAR PURPOSE.
|
|---|
| 15 | *
|
|---|
| 16 | * You should have received a copy of the GNU General Public License along
|
|---|
| 17 | * with Hotaru CMS. If not, see http://www.gnu.org/licenses/.
|
|---|
| 18 | *
|
|---|
| 19 | * @category Content Management System
|
|---|
| 20 | * @package HotaruCMS
|
|---|
| 21 | * @author Nick Ramsay <admin@hotarucms.org>
|
|---|
| 22 | * @copyright Copyright (c) 2009, Hotaru CMS
|
|---|
| 23 | * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License
|
|---|
| 24 | * @link http://www.hotarucms.org/
|
|---|
| 25 | */
|
|---|
| 26 |
|
|---|
| 27 | /**
|
|---|
| 28 | * Truncate a string
|
|---|
| 29 | *
|
|---|
| 30 | * @param string $string
|
|---|
| 31 | * @param int truncate to X characters
|
|---|
| 32 | * @para, bool $dot adds ... if true
|
|---|
| 33 | * @return string
|
|---|
| 34 | */
|
|---|
| 35 | function truncate($string, $chars=0, $dot=true)
|
|---|
| 36 | {
|
|---|
| 37 | $length = strlen($string);
|
|---|
| 38 | $truncated = substr(strip_tags($string), 0, $chars); // strips tags to prevent broken tags
|
|---|
| 39 | if ($dot && ($length >= $chars)) {
|
|---|
| 40 | $truncated .= '...';
|
|---|
| 41 | }
|
|---|
| 42 | return $truncated;
|
|---|
| 43 | }
|
|---|
| 44 |
|
|---|
| 45 |
|
|---|
| 46 | /**
|
|---|
| 47 | * Strip a string from the end of a string
|
|---|
| 48 | *
|
|---|
| 49 | * @param string $string
|
|---|
| 50 | * @param string $remove part of the string to strip
|
|---|
| 51 | * @return string
|
|---|
| 52 | */
|
|---|
| 53 | function rstrtrim($str, $remove=null)
|
|---|
| 54 | {
|
|---|
| 55 | $str = (string)$str;
|
|---|
| 56 | $remove = (string)$remove;
|
|---|
| 57 |
|
|---|
| 58 | if (empty($remove)) {
|
|---|
| 59 | return rtrim($str);
|
|---|
| 60 | }
|
|---|
| 61 |
|
|---|
| 62 | $len = strlen($remove);
|
|---|
| 63 | $offset = strlen($str)-$len;
|
|---|
| 64 |
|
|---|
| 65 | while($offset > 0 && $offset == strpos($str, $remove, $offset))
|
|---|
| 66 | {
|
|---|
| 67 | $str = substr($str, 0, $offset);
|
|---|
| 68 | $offset = strlen($str)-$len;
|
|---|
| 69 | }
|
|---|
| 70 |
|
|---|
| 71 | return rtrim($str);
|
|---|
| 72 | }
|
|---|
| 73 |
|
|---|
| 74 |
|
|---|
| 75 | /**
|
|---|
| 76 | * Changes 'plugin_name' into 'Plugin Name'
|
|---|
| 77 | *
|
|---|
| 78 | * @param string $string e.g. a plugin folder name
|
|---|
| 79 | * @param string $delim - the character to replace underscores with
|
|---|
| 80 | * @return string
|
|---|
| 81 | */
|
|---|
| 82 | function make_name($string, $delim = '_', $caps = true)
|
|---|
| 83 | {
|
|---|
| 84 | $dep_array = array();
|
|---|
| 85 | $dep_array = explode($delim, trim($string));
|
|---|
| 86 | if ($caps) {
|
|---|
| 87 | $dep_array = array_map('ucfirst', $dep_array);
|
|---|
| 88 | $string = implode(' ', $dep_array);
|
|---|
| 89 | } else {
|
|---|
| 90 | $string = ucfirst(implode(' ', $dep_array));
|
|---|
| 91 | }
|
|---|
| 92 |
|
|---|
| 93 | return $string;
|
|---|
| 94 | }
|
|---|
| 95 |
|
|---|
| 96 |
|
|---|
| 97 | /**
|
|---|
| 98 | * Generates a random string
|
|---|
| 99 | *
|
|---|
| 100 | * @param int $length
|
|---|
| 101 | * @return string
|
|---|
| 102 | * @link http://us2.php.net/manual/en/ref.strings.php (Moe 10-July-2007)
|
|---|
| 103 | */
|
|---|
| 104 | function random_string($length = 8)
|
|---|
| 105 | {
|
|---|
| 106 | $chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxwz0123456789";
|
|---|
| 107 | $string = '';
|
|---|
| 108 | for($i = 0; $i < $length; $i++){
|
|---|
| 109 | $rand_key = mt_rand(0, strlen($chars));
|
|---|
| 110 | $string .= substr($chars, $rand_key, 1);
|
|---|
| 111 | }
|
|---|
| 112 | return str_shuffle($string);
|
|---|
| 113 | }
|
|---|
| 114 |
|
|---|
| 115 |
|
|---|
| 116 | /**
|
|---|
| 117 | * Sanitize input
|
|---|
| 118 | *
|
|---|
| 119 | * @param string $var the string to sanitize
|
|---|
| 120 | * @param string $santype type of sanitation: 'all', 'ents', 'tags'
|
|---|
| 121 | * @param string $allowable_tags
|
|---|
| 122 | * @return string|false
|
|---|
| 123 | *
|
|---|
| 124 | * Note: Borrowed from SWCMS
|
|---|
| 125 | */
|
|---|
| 126 | function sanitize($var, $santype = 'all', $allowable_tags = '')
|
|---|
| 127 | {
|
|---|
| 128 | // htmlentities & Strip tags
|
|---|
| 129 | if ($santype == 'all') {
|
|---|
| 130 | if (!get_magic_quotes_gpc()) {
|
|---|
| 131 | return htmlentities(strip_tags($var, $allowable_tags),ENT_QUOTES,'UTF-8');
|
|---|
| 132 | }
|
|---|
| 133 | else {
|
|---|
| 134 | return stripslashes(htmlentities(strip_tags($var, $allowable_tags),ENT_QUOTES,'UTF-8'));
|
|---|
| 135 | }
|
|---|
| 136 | return false;
|
|---|
| 137 | }
|
|---|
| 138 |
|
|---|
| 139 | // Strip tags
|
|---|
| 140 | if ($santype == 'tags') {
|
|---|
| 141 | if (!get_magic_quotes_gpc()) {
|
|---|
| 142 | return strip_tags($var, $allowable_tags);
|
|---|
| 143 | }
|
|---|
| 144 | else {
|
|---|
| 145 | return stripslashes(strip_tags($var, $allowable_tags));
|
|---|
| 146 | }
|
|---|
| 147 | return false;
|
|---|
| 148 | }
|
|---|
| 149 |
|
|---|
| 150 | // htmlentities
|
|---|
| 151 | if ($santype == 'ents') {
|
|---|
| 152 | if (!get_magic_quotes_gpc()) {
|
|---|
| 153 | return htmlentities($var,ENT_QUOTES,'UTF-8');
|
|---|
| 154 | }
|
|---|
| 155 | else {
|
|---|
| 156 | return stripslashes(htmlentities($var,ENT_QUOTES,'UTF-8'));
|
|---|
| 157 | }
|
|---|
| 158 | return false;
|
|---|
| 159 | }
|
|---|
| 160 | }
|
|---|
| 161 |
|
|---|
| 162 |
|
|---|
| 163 | /**
|
|---|
| 164 | * Make a url friendly - a dash-separated string
|
|---|
| 165 | *
|
|---|
| 166 | * @param string $input url to format
|
|---|
| 167 | * @return string|false
|
|---|
| 168 | *
|
|---|
| 169 | * Note: These functions seem to overlap each other a bit...
|
|---|
| 170 | */
|
|---|
| 171 | function make_url_friendly($input)
|
|---|
| 172 | {
|
|---|
| 173 | $output = replace_symbols($input);
|
|---|
| 174 | $output = mb_substr($output, 0, 240);
|
|---|
| 175 | $output = mb_strtolower($output);
|
|---|
| 176 | $output = trim($output);
|
|---|
| 177 |
|
|---|
| 178 | //From Wordpress and http://www.bernzilla.com/item.php?id=1007
|
|---|
| 179 | $output = sanitize_title_with_dashes($output);
|
|---|
| 180 |
|
|---|
| 181 | $output = urldecode($output);
|
|---|
| 182 |
|
|---|
| 183 | if ($output) { return $output; } else { return false; }
|
|---|
| 184 | }
|
|---|
| 185 |
|
|---|
| 186 |
|
|---|
| 187 | /**
|
|---|
| 188 | * Replace symbols and ascii characters with simpler alternatives
|
|---|
| 189 | *
|
|---|
| 190 | * @param string $input
|
|---|
| 191 | * @return string
|
|---|
| 192 | *
|
|---|
| 193 | * Note: Adapted from SWCMS
|
|---|
| 194 | */
|
|---|
| 195 | function replace_symbols($input)
|
|---|
| 196 | {
|
|---|
| 197 | // FOR THIS TO WORK, THIS FUNCS.STRINGS.PHP FILE MUST BE SAVED
|
|---|
| 198 | // IN UTF-8 CHARACTER ENCODING !!!
|
|---|
| 199 |
|
|---|
| 200 | // Replace spaces with hyphens
|
|---|
| 201 | $output = preg_replace('/\s+/' , '-' , $input);
|
|---|
| 202 |
|
|---|
| 203 | // Replace other characters
|
|---|
| 204 | $output = str_replace("--", "-", $output);
|
|---|
| 205 | $output = str_replace("/", "", $output);
|
|---|
| 206 | $output = str_replace("\\", "", $output);
|
|---|
| 207 | $output = str_replace("'", "", $output);
|
|---|
| 208 | $output = str_replace(",", "", $output);
|
|---|
| 209 | $output = str_replace(";", "", $output);
|
|---|
| 210 | $output = str_replace(":", "", $output);
|
|---|
| 211 | $output = str_replace(".", "-", $output);
|
|---|
| 212 | $output = str_replace("?", "", $output);
|
|---|
| 213 | $output = str_replace("=", "-", $output);
|
|---|
| 214 | $output = str_replace("+", "", $output);
|
|---|
| 215 | $output = str_replace("$", "", $output);
|
|---|
| 216 | $output = str_replace("&", "", $output);
|
|---|
| 217 | $output = str_replace("!", "", $output);
|
|---|
| 218 | $output = str_replace(">>", "-", $output);
|
|---|
| 219 | $output = str_replace(">", "-", $output);
|
|---|
| 220 | $output = str_replace("<<", "-", $output);
|
|---|
| 221 | $output = str_replace("<", "-", $output);
|
|---|
| 222 | $output = str_replace("*", "", $output);
|
|---|
| 223 | $output = str_replace(")", "", $output);
|
|---|
| 224 | $output = str_replace("(", "", $output);
|
|---|
| 225 | $output = str_replace("[", "", $output);
|
|---|
| 226 | $output = str_replace("]", "", $output);
|
|---|
| 227 | $output = str_replace("^", "", $output);
|
|---|
| 228 | $output = str_replace("%", "", $output);
|
|---|
| 229 | $output = str_replace("#", "", $output);
|
|---|
| 230 | $output = str_replace("@", "", $output);
|
|---|
| 231 | $output = str_replace("`", "", $output);
|
|---|
| 232 | $output = str_replace("‘", "", $output);
|
|---|
| 233 | $output = str_replace("’", "", $output);
|
|---|
| 234 | $output = str_replace("“", "", $output);
|
|---|
| 235 | $output = str_replace("”", "", $output);
|
|---|
| 236 | $output = str_replace("~", "", $output);
|
|---|
| 237 | $output = str_replace("–", "-", $output);
|
|---|
| 238 | $output = str_replace("\"", "", $output);
|
|---|
| 239 | $output = str_replace("|", "", $output);
|
|---|
| 240 | $output = str_replace("«", "", $output);
|
|---|
| 241 | $output = str_replace("»", "", $output);
|
|---|
| 242 | $output = str_replace("‹", "", $output);
|
|---|
| 243 | $output = str_replace("›", "", $output);
|
|---|
| 244 | $output = str_replace("…", "", $output);
|
|---|
| 245 | $output = str_replace("--", "-", $output);
|
|---|
| 246 | $output = str_replace("---", "-", $output);
|
|---|
| 247 | $output = str_replace("—", "-", $output);
|
|---|
| 248 |
|
|---|
| 249 | return $output;
|
|---|
| 250 | }
|
|---|
| 251 |
|
|---|
| 252 |
|
|---|
| 253 | /**
|
|---|
| 254 | * Get rid of any dangerous or unwanted characters
|
|---|
| 255 | *
|
|---|
| 256 | * @param string $title
|
|---|
| 257 | *
|
|---|
| 258 | * Note: Borrowed from Wordpress
|
|---|
| 259 | */
|
|---|
| 260 | function sanitize_title_with_dashes($title)
|
|---|
| 261 | {
|
|---|
| 262 | $title = strip_tags($title);
|
|---|
| 263 |
|
|---|
| 264 | // Preserve escaped octets.
|
|---|
| 265 | $title = preg_replace('|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $title);
|
|---|
| 266 |
|
|---|
| 267 | // Remove percent signs that are not part of an octet.
|
|---|
| 268 | $title = str_replace('%', '', $title);
|
|---|
| 269 |
|
|---|
| 270 | // Restore octets.
|
|---|
| 271 | $title = preg_replace('|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $title);
|
|---|
| 272 |
|
|---|
| 273 | $title = remove_accents($title);
|
|---|
| 274 |
|
|---|
| 275 | if (seems_utf8($title)) {
|
|---|
| 276 | if (function_exists('mb_strtolower')) {
|
|---|
| 277 | $title = mb_strtolower($title, 'UTF-8');
|
|---|
| 278 | }
|
|---|
| 279 | $title = utf8_uri_encode($title, 200);
|
|---|
| 280 | }
|
|---|
| 281 |
|
|---|
| 282 | $title = strtolower($title);
|
|---|
| 283 | $title = preg_replace('/&.+?;/', '', $title); // kill entities
|
|---|
| 284 | $title = preg_replace('/[^%a-z0-9 _-]/', '', $title);
|
|---|
| 285 | $title = preg_replace('/\s+/', '-', $title);
|
|---|
| 286 | $title = preg_replace('|-+|', '-', $title);
|
|---|
| 287 | $title = trim($title, '-');
|
|---|
| 288 |
|
|---|
| 289 | return $title;
|
|---|
| 290 | }
|
|---|
| 291 |
|
|---|
| 292 |
|
|---|
| 293 | /**
|
|---|
| 294 | * Remove accents from characters
|
|---|
| 295 | *
|
|---|
| 296 | * @param string $string
|
|---|
| 297 | * @return string
|
|---|
| 298 | *
|
|---|
| 299 | * Note: Borrowed from Wordpress
|
|---|
| 300 | */
|
|---|
| 301 | function remove_accents($string)
|
|---|
| 302 | {
|
|---|
| 303 | if ( !preg_match('/[\x80-\xff]/', $string) ) {
|
|---|
| 304 | return $string;
|
|---|
| 305 | }
|
|---|
| 306 |
|
|---|
| 307 | if (seems_utf8($string)) {
|
|---|
| 308 | $chars = array(
|
|---|
| 309 | // Decompositions for Latin-1 Supplement
|
|---|
| 310 | chr(195).chr(128) => 'A', chr(195).chr(129) => 'A',
|
|---|
| 311 | chr(195).chr(130) => 'A', chr(195).chr(131) => 'A',
|
|---|
| 312 | chr(195).chr(132) => 'A', chr(195).chr(133) => 'A',
|
|---|
| 313 | chr(195).chr(135) => 'C', chr(195).chr(136) => 'E',
|
|---|
| 314 | chr(195).chr(137) => 'E', chr(195).chr(138) => 'E',
|
|---|
| 315 | chr(195).chr(139) => 'E', chr(195).chr(140) => 'I',
|
|---|
| 316 | chr(195).chr(141) => 'I', chr(195).chr(142) => 'I',
|
|---|
| 317 | chr(195).chr(143) => 'I', chr(195).chr(145) => 'N',
|
|---|
| 318 | chr(195).chr(146) => 'O', chr(195).chr(147) => 'O',
|
|---|
| 319 | chr(195).chr(148) => 'O', chr(195).chr(149) => 'O',
|
|---|
| 320 | chr(195).chr(150) => 'O', chr(195).chr(153) => 'U',
|
|---|
| 321 | chr(195).chr(154) => 'U', chr(195).chr(155) => 'U',
|
|---|
| 322 | chr(195).chr(156) => 'U', chr(195).chr(157) => 'Y',
|
|---|
| 323 | chr(195).chr(159) => 's', chr(195).chr(160) => 'a',
|
|---|
| 324 | chr(195).chr(161) => 'a', chr(195).chr(162) => 'a',
|
|---|
| 325 | chr(195).chr(163) => 'a', chr(195).chr(164) => 'a',
|
|---|
| 326 | chr(195).chr(165) => 'a', chr(195).chr(167) => 'c',
|
|---|
| 327 | chr(195).chr(168) => 'e', chr(195).chr(169) => 'e',
|
|---|
| 328 | chr(195).chr(170) => 'e', chr(195).chr(171) => 'e',
|
|---|
| 329 | chr(195).chr(172) => 'i', chr(195).chr(173) => 'i',
|
|---|
| 330 | chr(195).chr(174) => 'i', chr(195).chr(175) => 'i',
|
|---|
| 331 | chr(195).chr(177) => 'n', chr(195).chr(178) => 'o',
|
|---|
| 332 | chr(195).chr(179) => 'o', chr(195).chr(180) => 'o',
|
|---|
| 333 | chr(195).chr(181) => 'o', chr(195).chr(182) => 'o',
|
|---|
| 334 | chr(195).chr(182) => 'o', chr(195).chr(185) => 'u',
|
|---|
| 335 | chr(195).chr(186) => 'u', chr(195).chr(187) => 'u',
|
|---|
| 336 | chr(195).chr(188) => 'u', chr(195).chr(189) => 'y',
|
|---|
| 337 | chr(195).chr(191) => 'y',
|
|---|
| 338 | // Decompositions for Latin Extended-A
|
|---|
| 339 | chr(196).chr(128) => 'A', chr(196).chr(129) => 'a',
|
|---|
| 340 | chr(196).chr(130) => 'A', chr(196).chr(131) => 'a',
|
|---|
| 341 | chr(196).chr(132) => 'A', chr(196).chr(133) => 'a',
|
|---|
| 342 | chr(196).chr(134) => 'C', chr(196).chr(135) => 'c',
|
|---|
| 343 | chr(196).chr(136) => 'C', chr(196).chr(137) => 'c',
|
|---|
| 344 | chr(196).chr(138) => 'C', chr(196).chr(139) => 'c',
|
|---|
| 345 | chr(196).chr(140) => 'C', chr(196).chr(141) => 'c',
|
|---|
| 346 | chr(196).chr(142) => 'D', chr(196).chr(143) => 'd',
|
|---|
| 347 | chr(196).chr(144) => 'D', chr(196).chr(145) => 'd',
|
|---|
| 348 | chr(196).chr(146) => 'E', chr(196).chr(147) => 'e',
|
|---|
| 349 | chr(196).chr(148) => 'E', chr(196).chr(149) => 'e',
|
|---|
| 350 | chr(196).chr(150) => 'E', chr(196).chr(151) => 'e',
|
|---|
| 351 | chr(196).chr(152) => 'E', chr(196).chr(153) => 'e',
|
|---|
| 352 | chr(196).chr(154) => 'E', chr(196).chr(155) => 'e',
|
|---|
| 353 | chr(196).chr(156) => 'G', chr(196).chr(157) => 'g',
|
|---|
| 354 | chr(196).chr(158) => 'G', chr(196).chr(159) => 'g',
|
|---|
| 355 | chr(196).chr(160) => 'G', chr(196).chr(161) => 'g',
|
|---|
| 356 | chr(196).chr(162) => 'G', chr(196).chr(163) => 'g',
|
|---|
| 357 | chr(196).chr(164) => 'H', chr(196).chr(165) => 'h',
|
|---|
| 358 | chr(196).chr(166) => 'H', chr(196).chr(167) => 'h',
|
|---|
| 359 | chr(196).chr(168) => 'I', chr(196).chr(169) => 'i',
|
|---|
| 360 | chr(196).chr(170) => 'I', chr(196).chr(171) => 'i',
|
|---|
| 361 | chr(196).chr(172) => 'I', chr(196).chr(173) => 'i',
|
|---|
| 362 | chr(196).chr(174) => 'I', chr(196).chr(175) => 'i',
|
|---|
| 363 | chr(196).chr(176) => 'I', chr(196).chr(177) => 'i',
|
|---|
| 364 | chr(196).chr(178) => 'IJ',chr(196).chr(179) => 'ij',
|
|---|
| 365 | chr(196).chr(180) => 'J', chr(196).chr(181) => 'j',
|
|---|
| 366 | chr(196).chr(182) => 'K', chr(196).chr(183) => 'k',
|
|---|
| 367 | chr(196).chr(184) => 'k', chr(196).chr(185) => 'L',
|
|---|
| 368 | chr(196).chr(186) => 'l', chr(196).chr(187) => 'L',
|
|---|
| 369 | chr(196).chr(188) => 'l', chr(196).chr(189) => 'L',
|
|---|
| 370 | chr(196).chr(190) => 'l', chr(196).chr(191) => 'L',
|
|---|
| 371 | chr(197).chr(128) => 'l', chr(197).chr(129) => 'L',
|
|---|
| 372 | chr(197).chr(130) => 'l', chr(197).chr(131) => 'N',
|
|---|
| 373 | chr(197).chr(132) => 'n', chr(197).chr(133) => 'N',
|
|---|
| 374 | chr(197).chr(134) => 'n', chr(197).chr(135) => 'N',
|
|---|
| 375 | chr(197).chr(136) => 'n', chr(197).chr(137) => 'N',
|
|---|
| 376 | chr(197).chr(138) => 'n', chr(197).chr(139) => 'N',
|
|---|
| 377 | chr(197).chr(140) => 'O', chr(197).chr(141) => 'o',
|
|---|
| 378 | chr(197).chr(142) => 'O', chr(197).chr(143) => 'o',
|
|---|
| 379 | chr(197).chr(144) => 'O', chr(197).chr(145) => 'o',
|
|---|
| 380 | chr(197).chr(146) => 'OE',chr(197).chr(147) => 'oe',
|
|---|
| 381 | chr(197).chr(148) => 'R',chr(197).chr(149) => 'r',
|
|---|
| 382 | chr(197).chr(150) => 'R',chr(197).chr(151) => 'r',
|
|---|
| 383 | chr(197).chr(152) => 'R',chr(197).chr(153) => 'r',
|
|---|
| 384 | chr(197).chr(154) => 'S',chr(197).chr(155) => 's',
|
|---|
| 385 | chr(197).chr(156) => 'S',chr(197).chr(157) => 's',
|
|---|
| 386 | chr(197).chr(158) => 'S',chr(197).chr(159) => 's',
|
|---|
| 387 | chr(197).chr(160) => 'S', chr(197).chr(161) => 's',
|
|---|
| 388 | chr(197).chr(162) => 'T', chr(197).chr(163) => 't',
|
|---|
| 389 | chr(197).chr(164) => 'T', chr(197).chr(165) => 't',
|
|---|
| 390 | chr(197).chr(166) => 'T', chr(197).chr(167) => 't',
|
|---|
| 391 | chr(197).chr(168) => 'U', chr(197).chr(169) => 'u',
|
|---|
| 392 | chr(197).chr(170) => 'U', chr(197).chr(171) => 'u',
|
|---|
| 393 | chr(197).chr(172) => 'U', chr(197).chr(173) => 'u',
|
|---|
| 394 | chr(197).chr(174) => 'U', chr(197).chr(175) => 'u',
|
|---|
| 395 | chr(197).chr(176) => 'U', chr(197).chr(177) => 'u',
|
|---|
| 396 | chr(197).chr(178) => 'U', chr(197).chr(179) => 'u',
|
|---|
| 397 | chr(197).chr(180) => 'W', chr(197).chr(181) => 'w',
|
|---|
| 398 | chr(197).chr(182) => 'Y', chr(197).chr(183) => 'y',
|
|---|
| 399 | chr(197).chr(184) => 'Y', chr(197).chr(185) => 'Z',
|
|---|
| 400 | chr(197).chr(186) => 'z', chr(197).chr(187) => 'Z',
|
|---|
| 401 | chr(197).chr(188) => 'z', chr(197).chr(189) => 'Z',
|
|---|
| 402 | chr(197).chr(190) => 'z', chr(197).chr(191) => 's',
|
|---|
| 403 | // Euro Sign
|
|---|
| 404 | chr(226).chr(130).chr(172) => 'E',
|
|---|
| 405 | // GBP (Pound) Sign
|
|---|
| 406 | chr(194).chr(163) => '');
|
|---|
| 407 |
|
|---|
| 408 | $string = strtr($string, $chars);
|
|---|
| 409 | } else {
|
|---|
| 410 | // Assume ISO-8859-1 if not UTF-8
|
|---|
| 411 | $chars['in'] = chr(128).chr(131).chr(138).chr(142).chr(154).chr(158)
|
|---|
| 412 | .chr(159).chr(162).chr(165).chr(181).chr(192).chr(193).chr(194)
|
|---|
| 413 | .chr(195).chr(196).chr(197).chr(199).chr(200).chr(201).chr(202)
|
|---|
| 414 | .chr(203).chr(204).chr(205).chr(206).chr(207).chr(209).chr(210)
|
|---|
| 415 | .chr(211).chr(212).chr(213).chr(214).chr(216).chr(217).chr(218)
|
|---|
| 416 | .chr(219).chr(220).chr(221).chr(224).chr(225).chr(226).chr(227)
|
|---|
| 417 | .chr(228).chr(229).chr(231).chr(232).chr(233).chr(234).chr(235)
|
|---|
| 418 | .chr(236).chr(237).chr(238).chr(239).chr(241).chr(242).chr(243)
|
|---|
| 419 | .chr(244).chr(245).chr(246).chr(248).chr(249).chr(250).chr(251)
|
|---|
| 420 | .chr(252).chr(253).chr(255);
|
|---|
| 421 |
|
|---|
| 422 | $chars['out'] = "EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy";
|
|---|
| 423 |
|
|---|
| 424 | $string = strtr($string, $chars['in'], $chars['out']);
|
|---|
| 425 | $double_chars['in'] = array(chr(140), chr(156), chr(198), chr(208), chr(222), chr(223), chr(230), chr(240), chr(254));
|
|---|
| 426 | $double_chars['out'] = array('OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th');
|
|---|
| 427 | $string = str_replace($double_chars['in'], $double_chars['out'], $string);
|
|---|
| 428 | }
|
|---|
| 429 |
|
|---|
| 430 | return $string;
|
|---|
| 431 | }
|
|---|
| 432 |
|
|---|
| 433 |
|
|---|
| 434 | /**
|
|---|
| 435 | * Determine if the string is utf8
|
|---|
| 436 | *
|
|---|
| 437 | * @param string $str
|
|---|
| 438 | * @return bool
|
|---|
| 439 | *
|
|---|
| 440 | * Note: Borrowed from Wordpress (by bmorel at ssi dot fr )
|
|---|
| 441 | */
|
|---|
| 442 | function seems_utf8($str)
|
|---|
| 443 | {
|
|---|
| 444 | $length = strlen($str);
|
|---|
| 445 | for ($i=0; $i < $length; $i++)
|
|---|
| 446 | {
|
|---|
| 447 | if (ord($str[$i]) < 0x80) {
|
|---|
| 448 | continue; // 0bbbbbbb
|
|---|
| 449 |
|
|---|
| 450 | } elseif ((ord($str[$i]) & 0xE0) == 0xC0) {
|
|---|
| 451 | $n=1; // 110bbbbb
|
|---|
| 452 |
|
|---|
| 453 | } elseif ((ord($str[$i]) & 0xF0) == 0xE0) {
|
|---|
| 454 | $n=2; // 1110bbbb
|
|---|
| 455 |
|
|---|
| 456 | } elseif ((ord($str[$i]) & 0xF8) == 0xF0) {
|
|---|
| 457 | $n=3; // 11110bbb
|
|---|
| 458 |
|
|---|
| 459 | } elseif ((ord($str[$i]) & 0xFC) == 0xF8) {
|
|---|
| 460 | $n=4; // 111110bb
|
|---|
| 461 |
|
|---|
| 462 | } elseif ((ord($str[$i]) & 0xFE) == 0xFC) {
|
|---|
| 463 | $n=5; // 1111110b
|
|---|
| 464 |
|
|---|
| 465 | } else {
|
|---|
| 466 | return false; // Does not match any model
|
|---|
| 467 | }
|
|---|
| 468 |
|
|---|
| 469 | for ($j=0; $j<$n; $j++)
|
|---|
| 470 | {
|
|---|
| 471 | // n bytes matching 10bbbbbb follow ?
|
|---|
| 472 | if ((++$i == $length) || ((ord($str[$i]) & 0xC0) != 0x80)) {
|
|---|
| 473 | return false;
|
|---|
| 474 | }
|
|---|
| 475 | }
|
|---|
| 476 | }
|
|---|
| 477 | return true;
|
|---|
| 478 | }
|
|---|
| 479 |
|
|---|
| 480 |
|
|---|
| 481 | /**
|
|---|
| 482 | * Encodes a utf8 string
|
|---|
| 483 | *
|
|---|
| 484 | * @param string $utf8_string
|
|---|
| 485 | * @param int $length
|
|---|
| 486 | * @return string
|
|---|
| 487 | *
|
|---|
| 488 | * Note: Borrowed from Wordpress
|
|---|
| 489 | */
|
|---|
| 490 | function utf8_uri_encode( $utf8_string, $length = 0 )
|
|---|
| 491 | {
|
|---|
| 492 | $unicode = '';
|
|---|
| 493 | $values = array();
|
|---|
| 494 | $num_octets = 1;
|
|---|
| 495 | $unicode_length = 0;
|
|---|
| 496 |
|
|---|
| 497 | $string_length = strlen( $utf8_string );
|
|---|
| 498 | for ($i = 0; $i < $string_length; $i++ )
|
|---|
| 499 | {
|
|---|
| 500 | $value = ord( $utf8_string[ $i ] );
|
|---|
| 501 |
|
|---|
| 502 | if ( $value < 128 )
|
|---|
| 503 | {
|
|---|
| 504 | if ($length && ( $unicode_length >= $length )) {
|
|---|
| 505 | break;
|
|---|
| 506 | }
|
|---|
| 507 | $unicode .= chr($value);
|
|---|
| 508 | $unicode_length++;
|
|---|
| 509 | }
|
|---|
| 510 | else
|
|---|
| 511 | {
|
|---|
| 512 | if (count( $values ) == 0) {
|
|---|
| 513 | $num_octets = ( $value < 224 ) ? 2 : 3;
|
|---|
| 514 | }
|
|---|
| 515 |
|
|---|
| 516 | $values[] = $value;
|
|---|
| 517 |
|
|---|
| 518 | if ($length && ($unicode_length + ($num_octets * 3)) > $length) {
|
|---|
| 519 | break;
|
|---|
| 520 | }
|
|---|
| 521 |
|
|---|
| 522 | if (count($values) == $num_octets)
|
|---|
| 523 | {
|
|---|
| 524 | if ($num_octets == 3) {
|
|---|
| 525 | $unicode .= '%' . dechex($values[0]) . '%' . dechex($values[1]) . '%' . dechex($values[2]);
|
|---|
| 526 | $unicode_length += 9;
|
|---|
| 527 | } else {
|
|---|
| 528 | $unicode .= '%' . dechex($values[0]) . '%' . dechex($values[1]);
|
|---|
| 529 | $unicode_length += 6;
|
|---|
| 530 | }
|
|---|
| 531 |
|
|---|
| 532 | $values = array();
|
|---|
| 533 | $num_octets = 1;
|
|---|
| 534 | }
|
|---|
| 535 | }
|
|---|
| 536 | }
|
|---|
| 537 |
|
|---|
| 538 | return $unicode;
|
|---|
| 539 | }
|
|---|
| 540 |
|
|---|
| 541 |
|
|---|
| 542 | /**
|
|---|
| 543 | * Strip domain from url
|
|---|
| 544 | *
|
|---|
| 545 | * @param string $url
|
|---|
| 546 | * @return string|false $domain - including http://
|
|---|
| 547 | */
|
|---|
| 548 | function get_domain($url = '')
|
|---|
| 549 | {
|
|---|
| 550 | $parsed = parse_url($url);
|
|---|
| 551 | if (isset($parsed['scheme'])){
|
|---|
| 552 | $domain = $parsed['scheme'] . "://" . $parsed['host'];
|
|---|
| 553 | return $domain;
|
|---|
| 554 | }
|
|---|
| 555 |
|
|---|
| 556 | return false;
|
|---|
| 557 | }
|
|---|
| 558 |
|
|---|
| 559 |
|
|---|
| 560 | /**
|
|---|
| 561 | * Strip foreign characters from latin1/utf8 database yuckiness
|
|---|
| 562 | *
|
|---|
| 563 | * @param string $str
|
|---|
| 564 | * @return string
|
|---|
| 565 | */
|
|---|
| 566 | function strip_foreign_characters($str)
|
|---|
| 567 | {
|
|---|
| 568 | $str = str_replace('Â', '', $str);
|
|---|
| 569 | $str = str_replace('’', '\'', $str);
|
|---|
| 570 | $str = str_replace('–', '-', $str);
|
|---|
| 571 | $str = str_replace('“', '"', $str);
|
|---|
| 572 | $str = str_replace('â€', '"', $str);
|
|---|
| 573 | return $str;
|
|---|
| 574 | }
|
|---|
| 575 |
|
|---|
| 576 |
|
|---|
| 577 | /**
|
|---|
| 578 | * Count urls within a block of text
|
|---|
| 579 | *
|
|---|
| 580 | * @return int
|
|---|
| 581 | * @link http://www.liamdelahunty.com/tips/php_url_count_check_for_comment_spam.php
|
|---|
| 582 | */
|
|---|
| 583 | function countUrls($text = '')
|
|---|
| 584 | {
|
|---|
| 585 | //$http = substr_count($text, "http");
|
|---|
| 586 | $href = substr_count($text, "href");
|
|---|
| 587 | $url = substr_count($text, "[url");
|
|---|
| 588 |
|
|---|
| 589 | return $href + $url;
|
|---|
| 590 | }
|
|---|
| 591 | ?> |
|---|