Security.php 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091
  1. <?php
  2. /**
  3. * CodeIgniter
  4. *
  5. * An open source application development framework for PHP
  6. *
  7. * This content is released under the MIT License (MIT)
  8. *
  9. * Copyright (c) 2014 - 2019, British Columbia Institute of Technology
  10. *
  11. * Permission is hereby granted, free of charge, to any person obtaining a copy
  12. * of this software and associated documentation files (the "Software"), to deal
  13. * in the Software without restriction, including without limitation the rights
  14. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  15. * copies of the Software, and to permit persons to whom the Software is
  16. * furnished to do so, subject to the following conditions:
  17. *
  18. * The above copyright notice and this permission notice shall be included in
  19. * all copies or substantial portions of the Software.
  20. *
  21. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  22. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  23. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  24. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  25. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  26. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  27. * THE SOFTWARE.
  28. *
  29. * @package CodeIgniter
  30. * @author EllisLab Dev Team
  31. * @copyright Copyright (c) 2008 - 2014, EllisLab, Inc. (https://ellislab.com/)
  32. * @copyright Copyright (c) 2014 - 2019, British Columbia Institute of Technology (https://bcit.ca/)
  33. * @license https://opensource.org/licenses/MIT MIT License
  34. * @link https://codeigniter.com
  35. * @since Version 1.0.0
  36. * @filesource
  37. */
  38. defined('BASEPATH') OR exit('No direct script access allowed');
  39. /**
  40. * Security Class
  41. *
  42. * @package CodeIgniter
  43. * @subpackage Libraries
  44. * @category Security
  45. * @author EllisLab Dev Team
  46. * @link https://codeigniter.com/user_guide/libraries/security.html
  47. */
  48. class CI_Security {
  49. /**
  50. * List of sanitize filename strings
  51. *
  52. * @var array
  53. */
  54. public $filename_bad_chars = array(
  55. '../', '<!--', '-->', '<', '>',
  56. "'", '"', '&', '$', '#',
  57. '{', '}', '[', ']', '=',
  58. ';', '?', '%20', '%22',
  59. '%3c', // <
  60. '%253c', // <
  61. '%3e', // >
  62. '%0e', // >
  63. '%28', // (
  64. '%29', // )
  65. '%2528', // (
  66. '%26', // &
  67. '%24', // $
  68. '%3f', // ?
  69. '%3b', // ;
  70. '%3d' // =
  71. );
  72. /**
  73. * Character set
  74. *
  75. * Will be overridden by the constructor.
  76. *
  77. * @var string
  78. */
  79. public $charset = 'UTF-8';
  80. /**
  81. * XSS Hash
  82. *
  83. * Random Hash for protecting URLs.
  84. *
  85. * @var string
  86. */
  87. protected $_xss_hash;
  88. /**
  89. * CSRF Hash
  90. *
  91. * Random hash for Cross Site Request Forgery protection cookie
  92. *
  93. * @var string
  94. */
  95. protected $_csrf_hash;
  96. /**
  97. * CSRF Expire time
  98. *
  99. * Expiration time for Cross Site Request Forgery protection cookie.
  100. * Defaults to two hours (in seconds).
  101. *
  102. * @var int
  103. */
  104. protected $_csrf_expire = 7200;
  105. /**
  106. * CSRF Token name
  107. *
  108. * Token name for Cross Site Request Forgery protection cookie.
  109. *
  110. * @var string
  111. */
  112. protected $_csrf_token_name = 'ci_csrf_token';
  113. /**
  114. * CSRF Cookie name
  115. *
  116. * Cookie name for Cross Site Request Forgery protection cookie.
  117. *
  118. * @var string
  119. */
  120. protected $_csrf_cookie_name = 'ci_csrf_token';
  121. /**
  122. * List of never allowed strings
  123. *
  124. * @var array
  125. */
  126. protected $_never_allowed_str = array(
  127. 'document.cookie' => '[removed]',
  128. '(document).cookie' => '[removed]',
  129. 'document.write' => '[removed]',
  130. '(document).write' => '[removed]',
  131. '.parentNode' => '[removed]',
  132. '.innerHTML' => '[removed]',
  133. '-moz-binding' => '[removed]',
  134. '<!--' => '&lt;!--',
  135. '-->' => '--&gt;',
  136. '<![CDATA[' => '&lt;![CDATA[',
  137. '<comment>' => '&lt;comment&gt;',
  138. '<%' => '&lt;&#37;'
  139. );
  140. /**
  141. * List of never allowed regex replacements
  142. *
  143. * @var array
  144. */
  145. protected $_never_allowed_regex = array(
  146. 'javascript\s*:',
  147. '(\(?document\)?|\(?window\)?(\.document)?)\.(location|on\w*)',
  148. 'expression\s*(\(|&\#40;)', // CSS and IE
  149. 'vbscript\s*:', // IE, surprise!
  150. 'wscript\s*:', // IE
  151. 'jscript\s*:', // IE
  152. 'vbs\s*:', // IE
  153. 'Redirect\s+30\d',
  154. "([\"'])?data\s*:[^\\1]*?base64[^\\1]*?,[^\\1]*?\\1?"
  155. );
  156. /**
  157. * Class constructor
  158. *
  159. * @return void
  160. */
  161. public function __construct()
  162. {
  163. // Is CSRF protection enabled?
  164. if (config_item('csrf_protection'))
  165. {
  166. // CSRF config
  167. foreach (array('csrf_expire', 'csrf_token_name', 'csrf_cookie_name') as $key)
  168. {
  169. if (NULL !== ($val = config_item($key)))
  170. {
  171. $this->{'_'.$key} = $val;
  172. }
  173. }
  174. // Append application specific cookie prefix
  175. if ($cookie_prefix = config_item('cookie_prefix'))
  176. {
  177. $this->_csrf_cookie_name = $cookie_prefix.$this->_csrf_cookie_name;
  178. }
  179. // Set the CSRF hash
  180. $this->_csrf_set_hash();
  181. }
  182. $this->charset = strtoupper(config_item('charset'));
  183. log_message('info', 'Security Class Initialized');
  184. }
  185. // --------------------------------------------------------------------
  186. /**
  187. * CSRF Verify
  188. *
  189. * @return CI_Security
  190. */
  191. public function csrf_verify()
  192. {
  193. // If it's not a POST request we will set the CSRF cookie
  194. if (strtoupper($_SERVER['REQUEST_METHOD']) !== 'POST')
  195. {
  196. return $this->csrf_set_cookie();
  197. }
  198. // Check if URI has been whitelisted from CSRF checks
  199. if ($exclude_uris = config_item('csrf_exclude_uris'))
  200. {
  201. $uri = load_class('URI', 'core');
  202. foreach ($exclude_uris as $excluded)
  203. {
  204. if (preg_match('#^'.$excluded.'$#i'.(UTF8_ENABLED ? 'u' : ''), $uri->uri_string()))
  205. {
  206. return $this;
  207. }
  208. }
  209. }
  210. // Check CSRF token validity, but don't error on mismatch just yet - we'll want to regenerate
  211. $valid = isset($_POST[$this->_csrf_token_name], $_COOKIE[$this->_csrf_cookie_name])
  212. && is_string($_POST[$this->_csrf_token_name]) && is_string($_COOKIE[$this->_csrf_cookie_name])
  213. && hash_equals($_POST[$this->_csrf_token_name], $_COOKIE[$this->_csrf_cookie_name]);
  214. // We kill this since we're done and we don't want to pollute the _POST array
  215. unset($_POST[$this->_csrf_token_name]);
  216. // Regenerate on every submission?
  217. if (config_item('csrf_regenerate'))
  218. {
  219. // Nothing should last forever
  220. unset($_COOKIE[$this->_csrf_cookie_name]);
  221. $this->_csrf_hash = NULL;
  222. }
  223. $this->_csrf_set_hash();
  224. $this->csrf_set_cookie();
  225. if ($valid !== TRUE)
  226. {
  227. $this->csrf_show_error();
  228. }
  229. log_message('info', 'CSRF token verified');
  230. return $this;
  231. }
  232. // --------------------------------------------------------------------
  233. /**
  234. * CSRF Set Cookie
  235. *
  236. * @codeCoverageIgnore
  237. * @return CI_Security
  238. */
  239. public function csrf_set_cookie()
  240. {
  241. $expire = time() + $this->_csrf_expire;
  242. $secure_cookie = (bool) config_item('cookie_secure');
  243. if ($secure_cookie && ! is_https())
  244. {
  245. return FALSE;
  246. }
  247. setcookie(
  248. $this->_csrf_cookie_name,
  249. $this->_csrf_hash,
  250. $expire,
  251. config_item('cookie_path'),
  252. config_item('cookie_domain'),
  253. $secure_cookie,
  254. config_item('cookie_httponly')
  255. );
  256. log_message('info', 'CSRF cookie sent');
  257. return $this;
  258. }
  259. // --------------------------------------------------------------------
  260. /**
  261. * Show CSRF Error
  262. *
  263. * @return void
  264. */
  265. public function csrf_show_error()
  266. {
  267. show_error('The action you have requested is not allowed.', 403);
  268. }
  269. // --------------------------------------------------------------------
  270. /**
  271. * Get CSRF Hash
  272. *
  273. * @see CI_Security::$_csrf_hash
  274. * @return string CSRF hash
  275. */
  276. public function get_csrf_hash()
  277. {
  278. return $this->_csrf_hash;
  279. }
  280. // --------------------------------------------------------------------
  281. /**
  282. * Get CSRF Token Name
  283. *
  284. * @see CI_Security::$_csrf_token_name
  285. * @return string CSRF token name
  286. */
  287. public function get_csrf_token_name()
  288. {
  289. return $this->_csrf_token_name;
  290. }
  291. // --------------------------------------------------------------------
  292. /**
  293. * XSS Clean
  294. *
  295. * Sanitizes data so that Cross Site Scripting Hacks can be
  296. * prevented. This method does a fair amount of work but
  297. * it is extremely thorough, designed to prevent even the
  298. * most obscure XSS attempts. Nothing is ever 100% foolproof,
  299. * of course, but I haven't been able to get anything passed
  300. * the filter.
  301. *
  302. * Note: Should only be used to deal with data upon submission.
  303. * It's not something that should be used for general
  304. * runtime processing.
  305. *
  306. * @link http://channel.bitflux.ch/wiki/XSS_Prevention
  307. * Based in part on some code and ideas from Bitflux.
  308. *
  309. * @link http://ha.ckers.org/xss.html
  310. * To help develop this script I used this great list of
  311. * vulnerabilities along with a few other hacks I've
  312. * harvested from examining vulnerabilities in other programs.
  313. *
  314. * @param string|string[] $str Input data
  315. * @param bool $is_image Whether the input is an image
  316. * @return string
  317. */
  318. public function xss_clean($str, $is_image = FALSE)
  319. {
  320. // Is the string an array?
  321. if (is_array($str))
  322. {
  323. foreach ($str as $key => &$value)
  324. {
  325. $str[$key] = $this->xss_clean($value);
  326. }
  327. return $str;
  328. }
  329. // Remove Invisible Characters
  330. $str = remove_invisible_characters($str);
  331. /*
  332. * URL Decode
  333. *
  334. * Just in case stuff like this is submitted:
  335. *
  336. * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
  337. *
  338. * Note: Use rawurldecode() so it does not remove plus signs
  339. */
  340. if (stripos($str, '%') !== false)
  341. {
  342. do
  343. {
  344. $oldstr = $str;
  345. $str = rawurldecode($str);
  346. $str = preg_replace_callback('#%(?:\s*[0-9a-f]){2,}#i', array($this, '_urldecodespaces'), $str);
  347. }
  348. while ($oldstr !== $str);
  349. unset($oldstr);
  350. }
  351. /*
  352. * Convert character entities to ASCII
  353. *
  354. * This permits our tests below to work reliably.
  355. * We only convert entities that are within tags since
  356. * these are the ones that will pose security problems.
  357. */
  358. $str = preg_replace_callback("/[^a-z0-9>]+[a-z0-9]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
  359. $str = preg_replace_callback('/<\w+.*/si', array($this, '_decode_entity'), $str);
  360. // Remove Invisible Characters Again!
  361. $str = remove_invisible_characters($str);
  362. /*
  363. * Convert all tabs to spaces
  364. *
  365. * This prevents strings like this: ja vascript
  366. * NOTE: we deal with spaces between characters later.
  367. * NOTE: preg_replace was found to be amazingly slow here on
  368. * large blocks of data, so we use str_replace.
  369. */
  370. $str = str_replace("\t", ' ', $str);
  371. // Capture converted string for later comparison
  372. $converted_string = $str;
  373. // Remove Strings that are never allowed
  374. $str = $this->_do_never_allowed($str);
  375. /*
  376. * Makes PHP tags safe
  377. *
  378. * Note: XML tags are inadvertently replaced too:
  379. *
  380. * <?xml
  381. *
  382. * But it doesn't seem to pose a problem.
  383. */
  384. if ($is_image === TRUE)
  385. {
  386. // Images have a tendency to have the PHP short opening and
  387. // closing tags every so often so we skip those and only
  388. // do the long opening tags.
  389. $str = preg_replace('/<\?(php)/i', '&lt;?\\1', $str);
  390. }
  391. else
  392. {
  393. $str = str_replace(array('<?', '?'.'>'), array('&lt;?', '?&gt;'), $str);
  394. }
  395. /*
  396. * Compact any exploded words
  397. *
  398. * This corrects words like: j a v a s c r i p t
  399. * These words are compacted back to their correct state.
  400. */
  401. $words = array(
  402. 'javascript', 'expression', 'vbscript', 'jscript', 'wscript',
  403. 'vbs', 'script', 'base64', 'applet', 'alert', 'document',
  404. 'write', 'cookie', 'window', 'confirm', 'prompt', 'eval'
  405. );
  406. foreach ($words as $word)
  407. {
  408. $word = implode('\s*', str_split($word)).'\s*';
  409. // We only want to do this when it is followed by a non-word character
  410. // That way valid stuff like "dealer to" does not become "dealerto"
  411. $str = preg_replace_callback('#('.substr($word, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
  412. }
  413. /*
  414. * Remove disallowed Javascript in links or img tags
  415. * We used to do some version comparisons and use of stripos(),
  416. * but it is dog slow compared to these simplified non-capturing
  417. * preg_match(), especially if the pattern exists in the string
  418. *
  419. * Note: It was reported that not only space characters, but all in
  420. * the following pattern can be parsed as separators between a tag name
  421. * and its attributes: [\d\s"\'`;,\/\=\(\x00\x0B\x09\x0C]
  422. * ... however, remove_invisible_characters() above already strips the
  423. * hex-encoded ones, so we'll skip them below.
  424. */
  425. do
  426. {
  427. $original = $str;
  428. if (preg_match('/<a/i', $str))
  429. {
  430. $str = preg_replace_callback('#<a(?:rea)?[^a-z0-9>]+([^>]*?)(?:>|$)#si', array($this, '_js_link_removal'), $str);
  431. }
  432. if (preg_match('/<img/i', $str))
  433. {
  434. $str = preg_replace_callback('#<img[^a-z0-9]+([^>]*?)(?:\s?/?>|$)#si', array($this, '_js_img_removal'), $str);
  435. }
  436. if (preg_match('/script|xss/i', $str))
  437. {
  438. $str = preg_replace('#</*(?:script|xss).*?>#si', '[removed]', $str);
  439. }
  440. }
  441. while ($original !== $str);
  442. unset($original);
  443. /*
  444. * Sanitize naughty HTML elements
  445. *
  446. * If a tag containing any of the words in the list
  447. * below is found, the tag gets converted to entities.
  448. *
  449. * So this: <blink>
  450. * Becomes: &lt;blink&gt;
  451. */
  452. $pattern = '#'
  453. .'<((?<slash>/*\s*)((?<tagName>[a-z0-9]+)(?=[^a-z0-9]|$)|.+)' // tag start and name, followed by a non-tag character
  454. .'[^\s\042\047a-z0-9>/=]*' // a valid attribute character immediately after the tag would count as a separator
  455. // optional attributes
  456. .'(?<attributes>(?:[\s\042\047/=]*' // non-attribute characters, excluding > (tag close) for obvious reasons
  457. .'[^\s\042\047>/=]+' // attribute characters
  458. // optional attribute-value
  459. .'(?:\s*=' // attribute-value separator
  460. .'(?:[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*))' // single, double or non-quoted value
  461. .')?' // end optional attribute-value group
  462. .')*)' // end optional attributes group
  463. .'[^>]*)(?<closeTag>\>)?#isS';
  464. // Note: It would be nice to optimize this for speed, BUT
  465. // only matching the naughty elements here results in
  466. // false positives and in turn - vulnerabilities!
  467. do
  468. {
  469. $old_str = $str;
  470. $str = preg_replace_callback($pattern, array($this, '_sanitize_naughty_html'), $str);
  471. }
  472. while ($old_str !== $str);
  473. unset($old_str);
  474. /*
  475. * Sanitize naughty scripting elements
  476. *
  477. * Similar to above, only instead of looking for
  478. * tags it looks for PHP and JavaScript commands
  479. * that are disallowed. Rather than removing the
  480. * code, it simply converts the parenthesis to entities
  481. * rendering the code un-executable.
  482. *
  483. * For example: eval('some code')
  484. * Becomes: eval&#40;'some code'&#41;
  485. */
  486. $str = preg_replace(
  487. '#(alert|prompt|confirm|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si',
  488. '\\1\\2&#40;\\3&#41;',
  489. $str
  490. );
  491. // Same thing, but for "tag functions" (e.g. eval`some code`)
  492. // See https://github.com/bcit-ci/CodeIgniter/issues/5420
  493. $str = preg_replace(
  494. '#(alert|prompt|confirm|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)`(.*?)`#si',
  495. '\\1\\2&#96;\\3&#96;',
  496. $str
  497. );
  498. // Final clean up
  499. // This adds a bit of extra precaution in case
  500. // something got through the above filters
  501. $str = $this->_do_never_allowed($str);
  502. /*
  503. * Images are Handled in a Special Way
  504. * - Essentially, we want to know that after all of the character
  505. * conversion is done whether any unwanted, likely XSS, code was found.
  506. * If not, we return TRUE, as the image is clean.
  507. * However, if the string post-conversion does not matched the
  508. * string post-removal of XSS, then it fails, as there was unwanted XSS
  509. * code found and removed/changed during processing.
  510. */
  511. if ($is_image === TRUE)
  512. {
  513. return ($str === $converted_string);
  514. }
  515. return $str;
  516. }
  517. // --------------------------------------------------------------------
  518. /**
  519. * XSS Hash
  520. *
  521. * Generates the XSS hash if needed and returns it.
  522. *
  523. * @see CI_Security::$_xss_hash
  524. * @return string XSS hash
  525. */
  526. public function xss_hash()
  527. {
  528. if ($this->_xss_hash === NULL)
  529. {
  530. $rand = $this->get_random_bytes(16);
  531. $this->_xss_hash = ($rand === FALSE)
  532. ? md5(uniqid(mt_rand(), TRUE))
  533. : bin2hex($rand);
  534. }
  535. return $this->_xss_hash;
  536. }
  537. // --------------------------------------------------------------------
  538. /**
  539. * Get random bytes
  540. *
  541. * @param int $length Output length
  542. * @return string
  543. */
  544. public function get_random_bytes($length)
  545. {
  546. if (empty($length) OR ! ctype_digit((string) $length))
  547. {
  548. return FALSE;
  549. }
  550. if (function_exists('random_bytes'))
  551. {
  552. try
  553. {
  554. // The cast is required to avoid TypeError
  555. return random_bytes((int) $length);
  556. }
  557. catch (Exception $e)
  558. {
  559. // If random_bytes() can't do the job, we can't either ...
  560. // There's no point in using fallbacks.
  561. log_message('error', $e->getMessage());
  562. return FALSE;
  563. }
  564. }
  565. // Unfortunately, none of the following PRNGs is guaranteed to exist ...
  566. if (defined('MCRYPT_DEV_URANDOM') && ($output = mcrypt_create_iv($length, MCRYPT_DEV_URANDOM)) !== FALSE)
  567. {
  568. return $output;
  569. }
  570. if (is_readable('/dev/urandom') && ($fp = fopen('/dev/urandom', 'rb')) !== FALSE)
  571. {
  572. // Try not to waste entropy ...
  573. is_php('5.4') && stream_set_chunk_size($fp, $length);
  574. $output = fread($fp, $length);
  575. fclose($fp);
  576. if ($output !== FALSE)
  577. {
  578. return $output;
  579. }
  580. }
  581. if (function_exists('openssl_random_pseudo_bytes'))
  582. {
  583. return openssl_random_pseudo_bytes($length);
  584. }
  585. return FALSE;
  586. }
  587. // --------------------------------------------------------------------
  588. /**
  589. * HTML Entities Decode
  590. *
  591. * A replacement for html_entity_decode()
  592. *
  593. * The reason we are not using html_entity_decode() by itself is because
  594. * while it is not technically correct to leave out the semicolon
  595. * at the end of an entity most browsers will still interpret the entity
  596. * correctly. html_entity_decode() does not convert entities without
  597. * semicolons, so we are left with our own little solution here. Bummer.
  598. *
  599. * @link http://php.net/html-entity-decode
  600. *
  601. * @param string $str Input
  602. * @param string $charset Character set
  603. * @return string
  604. */
  605. public function entity_decode($str, $charset = NULL)
  606. {
  607. if (strpos($str, '&') === FALSE)
  608. {
  609. return $str;
  610. }
  611. static $_entities;
  612. isset($charset) OR $charset = $this->charset;
  613. $flag = is_php('5.4')
  614. ? ENT_COMPAT | ENT_HTML5
  615. : ENT_COMPAT;
  616. if ( ! isset($_entities))
  617. {
  618. $_entities = array_map('strtolower', get_html_translation_table(HTML_ENTITIES, $flag, $charset));
  619. // If we're not on PHP 5.4+, add the possibly dangerous HTML 5
  620. // entities to the array manually
  621. if ($flag === ENT_COMPAT)
  622. {
  623. $_entities[':'] = '&colon;';
  624. $_entities['('] = '&lpar;';
  625. $_entities[')'] = '&rpar;';
  626. $_entities["\n"] = '&NewLine;';
  627. $_entities["\t"] = '&Tab;';
  628. }
  629. }
  630. do
  631. {
  632. $str_compare = $str;
  633. // Decode standard entities, avoiding false positives
  634. if (preg_match_all('/&[a-z]{2,}(?![a-z;])/i', $str, $matches))
  635. {
  636. $replace = array();
  637. $matches = array_unique(array_map('strtolower', $matches[0]));
  638. foreach ($matches as &$match)
  639. {
  640. if (($char = array_search($match.';', $_entities, TRUE)) !== FALSE)
  641. {
  642. $replace[$match] = $char;
  643. }
  644. }
  645. $str = str_replace(array_keys($replace), array_values($replace), $str);
  646. }
  647. // Decode numeric & UTF16 two byte entities
  648. $str = html_entity_decode(
  649. preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
  650. $flag,
  651. $charset
  652. );
  653. if ($flag === ENT_COMPAT)
  654. {
  655. $str = str_replace(array_values($_entities), array_keys($_entities), $str);
  656. }
  657. }
  658. while ($str_compare !== $str);
  659. return $str;
  660. }
  661. // --------------------------------------------------------------------
  662. /**
  663. * Sanitize Filename
  664. *
  665. * @param string $str Input file name
  666. * @param bool $relative_path Whether to preserve paths
  667. * @return string
  668. */
  669. public function sanitize_filename($str, $relative_path = FALSE)
  670. {
  671. $bad = $this->filename_bad_chars;
  672. if ( ! $relative_path)
  673. {
  674. $bad[] = './';
  675. $bad[] = '/';
  676. }
  677. $str = remove_invisible_characters($str, FALSE);
  678. do
  679. {
  680. $old = $str;
  681. $str = str_replace($bad, '', $str);
  682. }
  683. while ($old !== $str);
  684. return stripslashes($str);
  685. }
  686. // ----------------------------------------------------------------
  687. /**
  688. * Strip Image Tags
  689. *
  690. * @param string $str
  691. * @return string
  692. */
  693. public function strip_image_tags($str)
  694. {
  695. return preg_replace(
  696. array(
  697. '#<img[\s/]+.*?src\s*=\s*(["\'])([^\\1]+?)\\1.*?\>#i',
  698. '#<img[\s/]+.*?src\s*=\s*?(([^\s"\'=<>`]+)).*?\>#i'
  699. ),
  700. '\\2',
  701. $str
  702. );
  703. }
  704. // ----------------------------------------------------------------
  705. /**
  706. * URL-decode taking spaces into account
  707. *
  708. * @see https://github.com/bcit-ci/CodeIgniter/issues/4877
  709. * @param array $matches
  710. * @return string
  711. */
  712. protected function _urldecodespaces($matches)
  713. {
  714. $input = $matches[0];
  715. $nospaces = preg_replace('#\s+#', '', $input);
  716. return ($nospaces === $input)
  717. ? $input
  718. : rawurldecode($nospaces);
  719. }
  720. // ----------------------------------------------------------------
  721. /**
  722. * Compact Exploded Words
  723. *
  724. * Callback method for xss_clean() to remove whitespace from
  725. * things like 'j a v a s c r i p t'.
  726. *
  727. * @used-by CI_Security::xss_clean()
  728. * @param array $matches
  729. * @return string
  730. */
  731. protected function _compact_exploded_words($matches)
  732. {
  733. return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
  734. }
  735. // --------------------------------------------------------------------
  736. /**
  737. * Sanitize Naughty HTML
  738. *
  739. * Callback method for xss_clean() to remove naughty HTML elements.
  740. *
  741. * @used-by CI_Security::xss_clean()
  742. * @param array $matches
  743. * @return string
  744. */
  745. protected function _sanitize_naughty_html($matches)
  746. {
  747. static $naughty_tags = array(
  748. 'alert', 'area', 'prompt', 'confirm', 'applet', 'audio', 'basefont', 'base', 'behavior', 'bgsound',
  749. 'blink', 'body', 'embed', 'expression', 'form', 'frameset', 'frame', 'head', 'html', 'ilayer',
  750. 'iframe', 'input', 'button', 'select', 'isindex', 'layer', 'link', 'meta', 'keygen', 'object',
  751. 'plaintext', 'style', 'script', 'textarea', 'title', 'math', 'video', 'svg', 'xml', 'xss'
  752. );
  753. static $evil_attributes = array(
  754. 'on\w+', 'style', 'xmlns', 'formaction', 'form', 'xlink:href', 'FSCommand', 'seekSegmentTime'
  755. );
  756. // First, escape unclosed tags
  757. if (empty($matches['closeTag']))
  758. {
  759. return '&lt;'.$matches[1];
  760. }
  761. // Is the element that we caught naughty? If so, escape it
  762. elseif (in_array(strtolower($matches['tagName']), $naughty_tags, TRUE))
  763. {
  764. return '&lt;'.$matches[1].'&gt;';
  765. }
  766. // For other tags, see if their attributes are "evil" and strip those
  767. elseif (isset($matches['attributes']))
  768. {
  769. // We'll store the already filtered attributes here
  770. $attributes = array();
  771. // Attribute-catching pattern
  772. $attributes_pattern = '#'
  773. .'(?<name>[^\s\042\047>/=]+)' // attribute characters
  774. // optional attribute-value
  775. .'(?:\s*=(?<value>[^\s\042\047=><`]+|\s*\042[^\042]*\042|\s*\047[^\047]*\047|\s*(?U:[^\s\042\047=><`]*)))' // attribute-value separator
  776. .'#i';
  777. // Blacklist pattern for evil attribute names
  778. $is_evil_pattern = '#^('.implode('|', $evil_attributes).')$#i';
  779. // Each iteration filters a single attribute
  780. do
  781. {
  782. // Strip any non-alpha characters that may precede an attribute.
  783. // Browsers often parse these incorrectly and that has been a
  784. // of numerous XSS issues we've had.
  785. $matches['attributes'] = preg_replace('#^[^a-z]+#i', '', $matches['attributes']);
  786. if ( ! preg_match($attributes_pattern, $matches['attributes'], $attribute, PREG_OFFSET_CAPTURE))
  787. {
  788. // No (valid) attribute found? Discard everything else inside the tag
  789. break;
  790. }
  791. if (
  792. // Is it indeed an "evil" attribute?
  793. preg_match($is_evil_pattern, $attribute['name'][0])
  794. // Or does it have an equals sign, but no value and not quoted? Strip that too!
  795. OR (trim($attribute['value'][0]) === '')
  796. )
  797. {
  798. $attributes[] = 'xss=removed';
  799. }
  800. else
  801. {
  802. $attributes[] = $attribute[0][0];
  803. }
  804. $matches['attributes'] = substr($matches['attributes'], $attribute[0][1] + strlen($attribute[0][0]));
  805. }
  806. while ($matches['attributes'] !== '');
  807. $attributes = empty($attributes)
  808. ? ''
  809. : ' '.implode(' ', $attributes);
  810. return '<'.$matches['slash'].$matches['tagName'].$attributes.'>';
  811. }
  812. return $matches[0];
  813. }
  814. // --------------------------------------------------------------------
  815. /**
  816. * JS Link Removal
  817. *
  818. * Callback method for xss_clean() to sanitize links.
  819. *
  820. * This limits the PCRE backtracks, making it more performance friendly
  821. * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
  822. * PHP 5.2+ on link-heavy strings.
  823. *
  824. * @used-by CI_Security::xss_clean()
  825. * @param array $match
  826. * @return string
  827. */
  828. protected function _js_link_removal($match)
  829. {
  830. return str_replace(
  831. $match[1],
  832. preg_replace(
  833. '#href=.*?(?:(?:alert|prompt|confirm)(?:\(|&\#40;|`|&\#96;)|javascript:|livescript:|mocha:|charset=|window\.|\(?document\)?\.|\.cookie|<script|<xss|d\s*a\s*t\s*a\s*:)#si',
  834. '',
  835. $this->_filter_attributes($match[1])
  836. ),
  837. $match[0]
  838. );
  839. }
  840. // --------------------------------------------------------------------
  841. /**
  842. * JS Image Removal
  843. *
  844. * Callback method for xss_clean() to sanitize image tags.
  845. *
  846. * This limits the PCRE backtracks, making it more performance friendly
  847. * and prevents PREG_BACKTRACK_LIMIT_ERROR from being triggered in
  848. * PHP 5.2+ on image tag heavy strings.
  849. *
  850. * @used-by CI_Security::xss_clean()
  851. * @param array $match
  852. * @return string
  853. */
  854. protected function _js_img_removal($match)
  855. {
  856. return str_replace(
  857. $match[1],
  858. preg_replace(
  859. '#src=.*?(?:(?:alert|prompt|confirm|eval)(?:\(|&\#40;|`|&\#96;)|javascript:|livescript:|mocha:|charset=|window\.|\(?document\)?\.|\.cookie|<script|<xss|base64\s*,)#si',
  860. '',
  861. $this->_filter_attributes($match[1])
  862. ),
  863. $match[0]
  864. );
  865. }
  866. // --------------------------------------------------------------------
  867. /**
  868. * Attribute Conversion
  869. *
  870. * @used-by CI_Security::xss_clean()
  871. * @param array $match
  872. * @return string
  873. */
  874. protected function _convert_attribute($match)
  875. {
  876. return str_replace(array('>', '<', '\\'), array('&gt;', '&lt;', '\\\\'), $match[0]);
  877. }
  878. // --------------------------------------------------------------------
  879. /**
  880. * Filter Attributes
  881. *
  882. * Filters tag attributes for consistency and safety.
  883. *
  884. * @used-by CI_Security::_js_img_removal()
  885. * @used-by CI_Security::_js_link_removal()
  886. * @param string $str
  887. * @return string
  888. */
  889. protected function _filter_attributes($str)
  890. {
  891. $out = '';
  892. if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\042|\047)([^\\1]*?)\\1#is', $str, $matches))
  893. {
  894. foreach ($matches[0] as $match)
  895. {
  896. $out .= preg_replace('#/\*.*?\*/#s', '', $match);
  897. }
  898. }
  899. return $out;
  900. }
  901. // --------------------------------------------------------------------
  902. /**
  903. * HTML Entity Decode Callback
  904. *
  905. * @used-by CI_Security::xss_clean()
  906. * @param array $match
  907. * @return string
  908. */
  909. protected function _decode_entity($match)
  910. {
  911. // Protect GET variables in URLs
  912. // 901119URL5918AMP18930PROTECT8198
  913. $match = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-/]+)|i', $this->xss_hash().'\\1=\\2', $match[0]);
  914. // Decode, then un-protect URL GET vars
  915. return str_replace(
  916. $this->xss_hash(),
  917. '&',
  918. $this->entity_decode($match, $this->charset)
  919. );
  920. }
  921. // --------------------------------------------------------------------
  922. /**
  923. * Do Never Allowed
  924. *
  925. * @used-by CI_Security::xss_clean()
  926. * @param string
  927. * @return string
  928. */
  929. protected function _do_never_allowed($str)
  930. {
  931. $str = str_replace(array_keys($this->_never_allowed_str), $this->_never_allowed_str, $str);
  932. foreach ($this->_never_allowed_regex as $regex)
  933. {
  934. $str = preg_replace('#'.$regex.'#is', '[removed]', $str);
  935. }
  936. return $str;
  937. }
  938. // --------------------------------------------------------------------
  939. /**
  940. * Set CSRF Hash and Cookie
  941. *
  942. * @return string
  943. */
  944. protected function _csrf_set_hash()
  945. {
  946. if ($this->_csrf_hash === NULL)
  947. {
  948. // If the cookie exists we will use its value.
  949. // We don't necessarily want to regenerate it with
  950. // each page load since a page could contain embedded
  951. // sub-pages causing this feature to fail
  952. if (isset($_COOKIE[$this->_csrf_cookie_name]) && is_string($_COOKIE[$this->_csrf_cookie_name])
  953. && preg_match('#^[0-9a-f]{32}$#iS', $_COOKIE[$this->_csrf_cookie_name]) === 1)
  954. {
  955. return $this->_csrf_hash = $_COOKIE[$this->_csrf_cookie_name];
  956. }
  957. $rand = $this->get_random_bytes(16);
  958. $this->_csrf_hash = ($rand === FALSE)
  959. ? md5(uniqid(mt_rand(), TRUE))
  960. : bin2hex($rand);
  961. }
  962. return $this->_csrf_hash;
  963. }
  964. }