text_helper.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567
  1. <?php
  2. /**
  3. * CodeIgniter
  4. *
  5. * An open source application development framework for PHP
  6. *
  7. * This content is released under the MIT License (MIT)
  8. *
  9. * Copyright (c) 2014 - 2019, British Columbia Institute of Technology
  10. *
  11. * Permission is hereby granted, free of charge, to any person obtaining a copy
  12. * of this software and associated documentation files (the "Software"), to deal
  13. * in the Software without restriction, including without limitation the rights
  14. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  15. * copies of the Software, and to permit persons to whom the Software is
  16. * furnished to do so, subject to the following conditions:
  17. *
  18. * The above copyright notice and this permission notice shall be included in
  19. * all copies or substantial portions of the Software.
  20. *
  21. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  22. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  23. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  24. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  25. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  26. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  27. * THE SOFTWARE.
  28. *
  29. * @package CodeIgniter
  30. * @author EllisLab Dev Team
  31. * @copyright Copyright (c) 2008 - 2014, EllisLab, Inc. (https://ellislab.com/)
  32. * @copyright Copyright (c) 2014 - 2019, British Columbia Institute of Technology (https://bcit.ca/)
  33. * @license https://opensource.org/licenses/MIT MIT License
  34. * @link https://codeigniter.com
  35. * @since Version 1.0.0
  36. * @filesource
  37. */
  38. defined('BASEPATH') OR exit('No direct script access allowed');
  39. /**
  40. * CodeIgniter Text Helpers
  41. *
  42. * @package CodeIgniter
  43. * @subpackage Helpers
  44. * @category Helpers
  45. * @author EllisLab Dev Team
  46. * @link https://codeigniter.com/user_guide/helpers/text_helper.html
  47. */
  48. // ------------------------------------------------------------------------
  49. if ( ! function_exists('word_limiter'))
  50. {
  51. /**
  52. * Word Limiter
  53. *
  54. * Limits a string to X number of words.
  55. *
  56. * @param string
  57. * @param int
  58. * @param string the end character. Usually an ellipsis
  59. * @return string
  60. */
  61. function word_limiter($str, $limit = 100, $end_char = '&#8230;')
  62. {
  63. if (trim($str) === '')
  64. {
  65. return $str;
  66. }
  67. preg_match('/^\s*+(?:\S++\s*+){1,'.(int) $limit.'}/', $str, $matches);
  68. if (strlen($str) === strlen($matches[0]))
  69. {
  70. $end_char = '';
  71. }
  72. return rtrim($matches[0]).$end_char;
  73. }
  74. }
  75. // ------------------------------------------------------------------------
  76. if ( ! function_exists('character_limiter'))
  77. {
  78. /**
  79. * Character Limiter
  80. *
  81. * Limits the string based on the character count. Preserves complete words
  82. * so the character count may not be exactly as specified.
  83. *
  84. * @param string
  85. * @param int
  86. * @param string the end character. Usually an ellipsis
  87. * @return string
  88. */
  89. function character_limiter($str, $n = 500, $end_char = '&#8230;')
  90. {
  91. if (mb_strlen($str) < $n)
  92. {
  93. return $str;
  94. }
  95. // a bit complicated, but faster than preg_replace with \s+
  96. $str = preg_replace('/ {2,}/', ' ', str_replace(array("\r", "\n", "\t", "\v", "\f"), ' ', $str));
  97. if (mb_strlen($str) <= $n)
  98. {
  99. return $str;
  100. }
  101. $out = '';
  102. foreach (explode(' ', trim($str)) as $val)
  103. {
  104. $out .= $val.' ';
  105. if (mb_strlen($out) >= $n)
  106. {
  107. $out = trim($out);
  108. return (mb_strlen($out) === mb_strlen($str)) ? $out : $out.$end_char;
  109. }
  110. }
  111. }
  112. }
  113. // ------------------------------------------------------------------------
  114. if ( ! function_exists('ascii_to_entities'))
  115. {
  116. /**
  117. * High ASCII to Entities
  118. *
  119. * Converts high ASCII text and MS Word special characters to character entities
  120. *
  121. * @param string $str
  122. * @return string
  123. */
  124. function ascii_to_entities($str)
  125. {
  126. $out = '';
  127. $length = defined('MB_OVERLOAD_STRING')
  128. ? mb_strlen($str, '8bit') - 1
  129. : strlen($str) - 1;
  130. for ($i = 0, $count = 1, $temp = array(); $i <= $length; $i++)
  131. {
  132. $ordinal = ord($str[$i]);
  133. if ($ordinal < 128)
  134. {
  135. /*
  136. If the $temp array has a value but we have moved on, then it seems only
  137. fair that we output that entity and restart $temp before continuing. -Paul
  138. */
  139. if (count($temp) === 1)
  140. {
  141. $out .= '&#'.array_shift($temp).';';
  142. $count = 1;
  143. }
  144. $out .= $str[$i];
  145. }
  146. else
  147. {
  148. if (count($temp) === 0)
  149. {
  150. $count = ($ordinal < 224) ? 2 : 3;
  151. }
  152. $temp[] = $ordinal;
  153. if (count($temp) === $count)
  154. {
  155. $number = ($count === 3)
  156. ? (($temp[0] % 16) * 4096) + (($temp[1] % 64) * 64) + ($temp[2] % 64)
  157. : (($temp[0] % 32) * 64) + ($temp[1] % 64);
  158. $out .= '&#'.$number.';';
  159. $count = 1;
  160. $temp = array();
  161. }
  162. // If this is the last iteration, just output whatever we have
  163. elseif ($i === $length)
  164. {
  165. $out .= '&#'.implode(';', $temp).';';
  166. }
  167. }
  168. }
  169. return $out;
  170. }
  171. }
  172. // ------------------------------------------------------------------------
  173. if ( ! function_exists('entities_to_ascii'))
  174. {
  175. /**
  176. * Entities to ASCII
  177. *
  178. * Converts character entities back to ASCII
  179. *
  180. * @param string
  181. * @param bool
  182. * @return string
  183. */
  184. function entities_to_ascii($str, $all = TRUE)
  185. {
  186. if (preg_match_all('/\&#(\d+)\;/', $str, $matches))
  187. {
  188. for ($i = 0, $s = count($matches[0]); $i < $s; $i++)
  189. {
  190. $digits = $matches[1][$i];
  191. $out = '';
  192. if ($digits < 128)
  193. {
  194. $out .= chr($digits);
  195. }
  196. elseif ($digits < 2048)
  197. {
  198. $out .= chr(192 + (($digits - ($digits % 64)) / 64)).chr(128 + ($digits % 64));
  199. }
  200. else
  201. {
  202. $out .= chr(224 + (($digits - ($digits % 4096)) / 4096))
  203. .chr(128 + ((($digits % 4096) - ($digits % 64)) / 64))
  204. .chr(128 + ($digits % 64));
  205. }
  206. $str = str_replace($matches[0][$i], $out, $str);
  207. }
  208. }
  209. if ($all)
  210. {
  211. return str_replace(
  212. array('&amp;', '&lt;', '&gt;', '&quot;', '&apos;', '&#45;'),
  213. array('&', '<', '>', '"', "'", '-'),
  214. $str
  215. );
  216. }
  217. return $str;
  218. }
  219. }
  220. // ------------------------------------------------------------------------
  221. if ( ! function_exists('word_censor'))
  222. {
  223. /**
  224. * Word Censoring Function
  225. *
  226. * Supply a string and an array of disallowed words and any
  227. * matched words will be converted to #### or to the replacement
  228. * word you've submitted.
  229. *
  230. * @param string the text string
  231. * @param string the array of censored words
  232. * @param string the optional replacement value
  233. * @return string
  234. */
  235. function word_censor($str, $censored, $replacement = '')
  236. {
  237. if ( ! is_array($censored))
  238. {
  239. return $str;
  240. }
  241. $str = ' '.$str.' ';
  242. // \w, \b and a few others do not match on a unicode character
  243. // set for performance reasons. As a result words like über
  244. // will not match on a word boundary. Instead, we'll assume that
  245. // a bad word will be bookeneded by any of these characters.
  246. $delim = '[-_\'\"`(){}<>\[\]|!?@#%&,.:;^~*+=\/ 0-9\n\r\t]';
  247. foreach ($censored as $badword)
  248. {
  249. $badword = str_replace('\*', '\w*?', preg_quote($badword, '/'));
  250. if ($replacement !== '')
  251. {
  252. $str = preg_replace(
  253. "/({$delim})(".$badword.")({$delim})/i",
  254. "\\1{$replacement}\\3",
  255. $str
  256. );
  257. }
  258. elseif (preg_match_all("/{$delim}(".$badword."){$delim}/i", $str, $matches, PREG_PATTERN_ORDER | PREG_OFFSET_CAPTURE))
  259. {
  260. $matches = $matches[1];
  261. for ($i = count($matches) - 1; $i >= 0; $i--)
  262. {
  263. $length = strlen($matches[$i][0]);
  264. $str = substr_replace(
  265. $str,
  266. str_repeat('#', $length),
  267. $matches[$i][1],
  268. $length
  269. );
  270. }
  271. }
  272. }
  273. return trim($str);
  274. }
  275. }
  276. // ------------------------------------------------------------------------
  277. if ( ! function_exists('highlight_code'))
  278. {
  279. /**
  280. * Code Highlighter
  281. *
  282. * Colorizes code strings
  283. *
  284. * @param string the text string
  285. * @return string
  286. */
  287. function highlight_code($str)
  288. {
  289. /* The highlight string function encodes and highlights
  290. * brackets so we need them to start raw.
  291. *
  292. * Also replace any existing PHP tags to temporary markers
  293. * so they don't accidentally break the string out of PHP,
  294. * and thus, thwart the highlighting.
  295. */
  296. $str = str_replace(
  297. array('&lt;', '&gt;', '<?', '?>', '<%', '%>', '\\', '</script>'),
  298. array('<', '>', 'phptagopen', 'phptagclose', 'asptagopen', 'asptagclose', 'backslashtmp', 'scriptclose'),
  299. $str
  300. );
  301. // The highlight_string function requires that the text be surrounded
  302. // by PHP tags, which we will remove later
  303. $str = highlight_string('<?php '.$str.' ?>', TRUE);
  304. // Remove our artificially added PHP, and the syntax highlighting that came with it
  305. $str = preg_replace(
  306. array(
  307. '/<span style="color: #([A-Z0-9]+)">&lt;\?php(&nbsp;| )/i',
  308. '/(<span style="color: #[A-Z0-9]+">.*?)\?&gt;<\/span>\n<\/span>\n<\/code>/is',
  309. '/<span style="color: #[A-Z0-9]+"\><\/span>/i'
  310. ),
  311. array(
  312. '<span style="color: #$1">',
  313. "$1</span>\n</span>\n</code>",
  314. ''
  315. ),
  316. $str
  317. );
  318. // Replace our markers back to PHP tags.
  319. return str_replace(
  320. array('phptagopen', 'phptagclose', 'asptagopen', 'asptagclose', 'backslashtmp', 'scriptclose'),
  321. array('&lt;?', '?&gt;', '&lt;%', '%&gt;', '\\', '&lt;/script&gt;'),
  322. $str
  323. );
  324. }
  325. }
  326. // ------------------------------------------------------------------------
  327. if ( ! function_exists('highlight_phrase'))
  328. {
  329. /**
  330. * Phrase Highlighter
  331. *
  332. * Highlights a phrase within a text string
  333. *
  334. * @param string $str the text string
  335. * @param string $phrase the phrase you'd like to highlight
  336. * @param string $tag_open the openging tag to precede the phrase with
  337. * @param string $tag_close the closing tag to end the phrase with
  338. * @return string
  339. */
  340. function highlight_phrase($str, $phrase, $tag_open = '<mark>', $tag_close = '</mark>')
  341. {
  342. return ($str !== '' && $phrase !== '')
  343. ? preg_replace('/('.preg_quote($phrase, '/').')/i'.(UTF8_ENABLED ? 'u' : ''), $tag_open.'\\1'.$tag_close, $str)
  344. : $str;
  345. }
  346. }
  347. // ------------------------------------------------------------------------
  348. if ( ! function_exists('convert_accented_characters'))
  349. {
  350. /**
  351. * Convert Accented Foreign Characters to ASCII
  352. *
  353. * @param string $str Input string
  354. * @return string
  355. */
  356. function convert_accented_characters($str)
  357. {
  358. static $array_from, $array_to;
  359. if ( ! is_array($array_from))
  360. {
  361. if (file_exists(APPPATH.'config/foreign_chars.php'))
  362. {
  363. include(APPPATH.'config/foreign_chars.php');
  364. }
  365. if (file_exists(APPPATH.'config/'.ENVIRONMENT.'/foreign_chars.php'))
  366. {
  367. include(APPPATH.'config/'.ENVIRONMENT.'/foreign_chars.php');
  368. }
  369. if (empty($foreign_characters) OR ! is_array($foreign_characters))
  370. {
  371. $array_from = array();
  372. $array_to = array();
  373. return $str;
  374. }
  375. $array_from = array_keys($foreign_characters);
  376. $array_to = array_values($foreign_characters);
  377. }
  378. return preg_replace($array_from, $array_to, $str);
  379. }
  380. }
  381. // ------------------------------------------------------------------------
  382. if ( ! function_exists('word_wrap'))
  383. {
  384. /**
  385. * Word Wrap
  386. *
  387. * Wraps text at the specified character. Maintains the integrity of words.
  388. * Anything placed between {unwrap}{/unwrap} will not be word wrapped, nor
  389. * will URLs.
  390. *
  391. * @param string $str the text string
  392. * @param int $charlim = 76 the number of characters to wrap at
  393. * @return string
  394. */
  395. function word_wrap($str, $charlim = 76)
  396. {
  397. // Set the character limit
  398. is_numeric($charlim) OR $charlim = 76;
  399. // Reduce multiple spaces
  400. $str = preg_replace('| +|', ' ', $str);
  401. // Standardize newlines
  402. if (strpos($str, "\r") !== FALSE)
  403. {
  404. $str = str_replace(array("\r\n", "\r"), "\n", $str);
  405. }
  406. // If the current word is surrounded by {unwrap} tags we'll
  407. // strip the entire chunk and replace it with a marker.
  408. $unwrap = array();
  409. if (preg_match_all('|\{unwrap\}(.+?)\{/unwrap\}|s', $str, $matches))
  410. {
  411. for ($i = 0, $c = count($matches[0]); $i < $c; $i++)
  412. {
  413. $unwrap[] = $matches[1][$i];
  414. $str = str_replace($matches[0][$i], '{{unwrapped'.$i.'}}', $str);
  415. }
  416. }
  417. // Use PHP's native function to do the initial wordwrap.
  418. // We set the cut flag to FALSE so that any individual words that are
  419. // too long get left alone. In the next step we'll deal with them.
  420. $str = wordwrap($str, $charlim, "\n", FALSE);
  421. // Split the string into individual lines of text and cycle through them
  422. $output = '';
  423. foreach (explode("\n", $str) as $line)
  424. {
  425. // Is the line within the allowed character count?
  426. // If so we'll join it to the output and continue
  427. if (mb_strlen($line) <= $charlim)
  428. {
  429. $output .= $line."\n";
  430. continue;
  431. }
  432. $temp = '';
  433. while (mb_strlen($line) > $charlim)
  434. {
  435. // If the over-length word is a URL we won't wrap it
  436. if (preg_match('!\[url.+\]|://|www\.!', $line))
  437. {
  438. break;
  439. }
  440. // Trim the word down
  441. $temp .= mb_substr($line, 0, $charlim - 1);
  442. $line = mb_substr($line, $charlim - 1);
  443. }
  444. // If $temp contains data it means we had to split up an over-length
  445. // word into smaller chunks so we'll add it back to our current line
  446. if ($temp !== '')
  447. {
  448. $output .= $temp."\n".$line."\n";
  449. }
  450. else
  451. {
  452. $output .= $line."\n";
  453. }
  454. }
  455. // Put our markers back
  456. if (count($unwrap) > 0)
  457. {
  458. foreach ($unwrap as $key => $val)
  459. {
  460. $output = str_replace('{{unwrapped'.$key.'}}', $val, $output);
  461. }
  462. }
  463. return $output;
  464. }
  465. }
  466. // ------------------------------------------------------------------------
  467. if ( ! function_exists('ellipsize'))
  468. {
  469. /**
  470. * Ellipsize String
  471. *
  472. * This function will strip tags from a string, split it at its max_length and ellipsize
  473. *
  474. * @param string string to ellipsize
  475. * @param int max length of string
  476. * @param mixed int (1|0) or float, .5, .2, etc for position to split
  477. * @param string ellipsis ; Default '...'
  478. * @return string ellipsized string
  479. */
  480. function ellipsize($str, $max_length, $position = 1, $ellipsis = '&hellip;')
  481. {
  482. // Strip tags
  483. $str = trim(strip_tags($str));
  484. // Is the string long enough to ellipsize?
  485. if (mb_strlen($str) <= $max_length)
  486. {
  487. return $str;
  488. }
  489. $beg = mb_substr($str, 0, floor($max_length * $position));
  490. $position = ($position > 1) ? 1 : $position;
  491. if ($position === 1)
  492. {
  493. $end = mb_substr($str, 0, -($max_length - mb_strlen($beg)));
  494. }
  495. else
  496. {
  497. $end = mb_substr($str, -($max_length - mb_strlen($beg)));
  498. }
  499. return $beg.$ellipsis.$end;
  500. }
  501. }