Tidy.php 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. <?php
  2. /**
  3. * Abstract class for a set of proprietary modules that clean up (tidy)
  4. * poorly written HTML.
  5. * @todo Figure out how to protect some of these methods/properties
  6. */
  7. class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
  8. {
  9. /**
  10. * List of supported levels.
  11. * Index zero is a special case "no fixes" level.
  12. * @type array
  13. */
  14. public $levels = array(0 => 'none', 'light', 'medium', 'heavy');
  15. /**
  16. * Default level to place all fixes in.
  17. * Disabled by default.
  18. * @type string
  19. */
  20. public $defaultLevel = null;
  21. /**
  22. * Lists of fixes used by getFixesForLevel().
  23. * Format is:
  24. * HTMLModule_Tidy->fixesForLevel[$level] = array('fix-1', 'fix-2');
  25. * @type array
  26. */
  27. public $fixesForLevel = array(
  28. 'light' => array(),
  29. 'medium' => array(),
  30. 'heavy' => array()
  31. );
  32. /**
  33. * Lazy load constructs the module by determining the necessary
  34. * fixes to create and then delegating to the populate() function.
  35. * @param HTMLPurifier_Config $config
  36. * @todo Wildcard matching and error reporting when an added or
  37. * subtracted fix has no effect.
  38. */
  39. public function setup($config)
  40. {
  41. // create fixes, initialize fixesForLevel
  42. $fixes = $this->makeFixes();
  43. $this->makeFixesForLevel($fixes);
  44. // figure out which fixes to use
  45. $level = $config->get('HTML.TidyLevel');
  46. $fixes_lookup = $this->getFixesForLevel($level);
  47. // get custom fix declarations: these need namespace processing
  48. $add_fixes = $config->get('HTML.TidyAdd');
  49. $remove_fixes = $config->get('HTML.TidyRemove');
  50. foreach ($fixes as $name => $fix) {
  51. // needs to be refactored a little to implement globbing
  52. if (isset($remove_fixes[$name]) ||
  53. (!isset($add_fixes[$name]) && !isset($fixes_lookup[$name]))) {
  54. unset($fixes[$name]);
  55. }
  56. }
  57. // populate this module with necessary fixes
  58. $this->populate($fixes);
  59. }
  60. /**
  61. * Retrieves all fixes per a level, returning fixes for that specific
  62. * level as well as all levels below it.
  63. * @param string $level level identifier, see $levels for valid values
  64. * @return array Lookup up table of fixes
  65. */
  66. public function getFixesForLevel($level)
  67. {
  68. if ($level == $this->levels[0]) {
  69. return array();
  70. }
  71. $activated_levels = array();
  72. for ($i = 1, $c = count($this->levels); $i < $c; $i++) {
  73. $activated_levels[] = $this->levels[$i];
  74. if ($this->levels[$i] == $level) {
  75. break;
  76. }
  77. }
  78. if ($i == $c) {
  79. trigger_error(
  80. 'Tidy level ' . htmlspecialchars($level) . ' not recognized',
  81. E_USER_WARNING
  82. );
  83. return array();
  84. }
  85. $ret = array();
  86. foreach ($activated_levels as $level) {
  87. foreach ($this->fixesForLevel[$level] as $fix) {
  88. $ret[$fix] = true;
  89. }
  90. }
  91. return $ret;
  92. }
  93. /**
  94. * Dynamically populates the $fixesForLevel member variable using
  95. * the fixes array. It may be custom overloaded, used in conjunction
  96. * with $defaultLevel, or not used at all.
  97. * @param array $fixes
  98. */
  99. public function makeFixesForLevel($fixes)
  100. {
  101. if (!isset($this->defaultLevel)) {
  102. return;
  103. }
  104. if (!isset($this->fixesForLevel[$this->defaultLevel])) {
  105. trigger_error(
  106. 'Default level ' . $this->defaultLevel . ' does not exist',
  107. E_USER_ERROR
  108. );
  109. return;
  110. }
  111. $this->fixesForLevel[$this->defaultLevel] = array_keys($fixes);
  112. }
  113. /**
  114. * Populates the module with transforms and other special-case code
  115. * based on a list of fixes passed to it
  116. * @param array $fixes Lookup table of fixes to activate
  117. */
  118. public function populate($fixes)
  119. {
  120. foreach ($fixes as $name => $fix) {
  121. // determine what the fix is for
  122. list($type, $params) = $this->getFixType($name);
  123. switch ($type) {
  124. case 'attr_transform_pre':
  125. case 'attr_transform_post':
  126. $attr = $params['attr'];
  127. if (isset($params['element'])) {
  128. $element = $params['element'];
  129. if (empty($this->info[$element])) {
  130. $e = $this->addBlankElement($element);
  131. } else {
  132. $e = $this->info[$element];
  133. }
  134. } else {
  135. $type = "info_$type";
  136. $e = $this;
  137. }
  138. $e->{$type}[$attr] = $fix;
  139. break;
  140. case 'tag_transform':
  141. $this->info_tag_transform[$params['element']] = $fix;
  142. break;
  143. case 'child':
  144. case 'content_model_type':
  145. $element = $params['element'];
  146. if (empty($this->info[$element])) {
  147. $e = $this->addBlankElement($element);
  148. } else {
  149. $e = $this->info[$element];
  150. }
  151. $e->$type = $fix;
  152. break;
  153. default:
  154. trigger_error("Fix type $type not supported", E_USER_ERROR);
  155. break;
  156. }
  157. }
  158. }
  159. /**
  160. * Parses a fix name and determines what kind of fix it is, as well
  161. * as other information defined by the fix
  162. * @param $name String name of fix
  163. * @return array(string $fix_type, array $fix_parameters)
  164. * @note $fix_parameters is type dependant, see populate() for usage
  165. * of these parameters
  166. */
  167. public function getFixType($name)
  168. {
  169. // parse it
  170. $property = $attr = null;
  171. if (strpos($name, '#') !== false) {
  172. list($name, $property) = explode('#', $name);
  173. }
  174. if (strpos($name, '@') !== false) {
  175. list($name, $attr) = explode('@', $name);
  176. }
  177. // figure out the parameters
  178. $params = array();
  179. if ($name !== '') {
  180. $params['element'] = $name;
  181. }
  182. if (!is_null($attr)) {
  183. $params['attr'] = $attr;
  184. }
  185. // special case: attribute transform
  186. if (!is_null($attr)) {
  187. if (is_null($property)) {
  188. $property = 'pre';
  189. }
  190. $type = 'attr_transform_' . $property;
  191. return array($type, $params);
  192. }
  193. // special case: tag transform
  194. if (is_null($property)) {
  195. return array('tag_transform', $params);
  196. }
  197. return array($property, $params);
  198. }
  199. /**
  200. * Defines all fixes the module will perform in a compact
  201. * associative array of fix name to fix implementation.
  202. * @return array
  203. */
  204. public function makeFixes()
  205. {
  206. return array();
  207. }
  208. }
  209. // vim: et sw=4 sts=4