AttrDef.php 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. <?php
  2. /**
  3. * Base class for all validating attribute definitions.
  4. *
  5. * This family of classes forms the core for not only HTML attribute validation,
  6. * but also any sort of string that needs to be validated or cleaned (which
  7. * means CSS properties and composite definitions are defined here too).
  8. * Besides defining (through code) what precisely makes the string valid,
  9. * subclasses are also responsible for cleaning the code if possible.
  10. */
  11. abstract class HTMLPurifier_AttrDef
  12. {
  13. /**
  14. * Tells us whether or not an HTML attribute is minimized.
  15. * Has no meaning in other contexts.
  16. * @type bool
  17. */
  18. public $minimized = false;
  19. /**
  20. * Tells us whether or not an HTML attribute is required.
  21. * Has no meaning in other contexts
  22. * @type bool
  23. */
  24. public $required = false;
  25. /**
  26. * Validates and cleans passed string according to a definition.
  27. *
  28. * @param string $string String to be validated and cleaned.
  29. * @param HTMLPurifier_Config $config Mandatory HTMLPurifier_Config object.
  30. * @param HTMLPurifier_Context $context Mandatory HTMLPurifier_Context object.
  31. */
  32. abstract public function validate($string, $config, $context);
  33. /**
  34. * Convenience method that parses a string as if it were CDATA.
  35. *
  36. * This method process a string in the manner specified at
  37. * <http://www.w3.org/TR/html4/types.html#h-6.2> by removing
  38. * leading and trailing whitespace, ignoring line feeds, and replacing
  39. * carriage returns and tabs with spaces. While most useful for HTML
  40. * attributes specified as CDATA, it can also be applied to most CSS
  41. * values.
  42. *
  43. * @note This method is not entirely standards compliant, as trim() removes
  44. * more types of whitespace than specified in the spec. In practice,
  45. * this is rarely a problem, as those extra characters usually have
  46. * already been removed by HTMLPurifier_Encoder.
  47. *
  48. * @warning This processing is inconsistent with XML's whitespace handling
  49. * as specified by section 3.3.3 and referenced XHTML 1.0 section
  50. * 4.7. However, note that we are NOT necessarily
  51. * parsing XML, thus, this behavior may still be correct. We
  52. * assume that newlines have been normalized.
  53. */
  54. public function parseCDATA($string)
  55. {
  56. $string = trim($string);
  57. $string = str_replace(array("\n", "\t", "\r"), ' ', $string);
  58. return $string;
  59. }
  60. /**
  61. * Factory method for creating this class from a string.
  62. * @param string $string String construction info
  63. * @return HTMLPurifier_AttrDef Created AttrDef object corresponding to $string
  64. */
  65. public function make($string)
  66. {
  67. // default implementation, return a flyweight of this object.
  68. // If $string has an effect on the returned object (i.e. you
  69. // need to overload this method), it is best
  70. // to clone or instantiate new copies. (Instantiation is safer.)
  71. return $this;
  72. }
  73. /**
  74. * Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work
  75. * properly. THIS IS A HACK!
  76. * @param string $string a CSS colour definition
  77. * @return string
  78. */
  79. protected function mungeRgb($string)
  80. {
  81. $p = '\s*(\d+(\.\d+)?([%]?))\s*';
  82. if (preg_match('/(rgba|hsla)\(/', $string)) {
  83. return preg_replace('/(rgba|hsla)\('.$p.','.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8,\11)', $string);
  84. }
  85. return preg_replace('/(rgb|hsl)\('.$p.','.$p.','.$p.'\)/', '\1(\2,\5,\8)', $string);
  86. }
  87. /**
  88. * Parses a possibly escaped CSS string and returns the "pure"
  89. * version of it.
  90. */
  91. protected function expandCSSEscape($string)
  92. {
  93. // flexibly parse it
  94. $ret = '';
  95. for ($i = 0, $c = strlen($string); $i < $c; $i++) {
  96. if ($string[$i] === '\\') {
  97. $i++;
  98. if ($i >= $c) {
  99. $ret .= '\\';
  100. break;
  101. }
  102. if (ctype_xdigit($string[$i])) {
  103. $code = $string[$i];
  104. for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
  105. if (!ctype_xdigit($string[$i])) {
  106. break;
  107. }
  108. $code .= $string[$i];
  109. }
  110. // We have to be extremely careful when adding
  111. // new characters, to make sure we're not breaking
  112. // the encoding.
  113. $char = HTMLPurifier_Encoder::unichr(hexdec($code));
  114. if (HTMLPurifier_Encoder::cleanUTF8($char) === '') {
  115. continue;
  116. }
  117. $ret .= $char;
  118. if ($i < $c && trim($string[$i]) !== '') {
  119. $i--;
  120. }
  121. continue;
  122. }
  123. if ($string[$i] === "\n") {
  124. continue;
  125. }
  126. }
  127. $ret .= $string[$i];
  128. }
  129. return $ret;
  130. }
  131. }
  132. // vim: et sw=4 sts=4