| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493 | <?php/** * Definition of the purified HTML that describes allowed children, * attributes, and many other things. * * Conventions: * * All member variables that are prefixed with info * (including the main $info array) are used by HTML Purifier internals * and should not be directly edited when customizing the HTMLDefinition. * They can usually be set via configuration directives or custom * modules. * * On the other hand, member variables without the info prefix are used * internally by the HTMLDefinition and MUST NOT be used by other HTML * Purifier internals. Many of them, however, are public, and may be * edited by userspace code to tweak the behavior of HTMLDefinition. * * @note This class is inspected by Printer_HTMLDefinition; please *       update that class if things here change. * * @warning Directives that change this object's structure must be in *          the HTML or Attr namespace! */class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition{    // FULLY-PUBLIC VARIABLES ---------------------------------------------    /**     * Associative array of element names to HTMLPurifier_ElementDef.     * @type HTMLPurifier_ElementDef[]     */    public $info = array();    /**     * Associative array of global attribute name to attribute definition.     * @type array     */    public $info_global_attr = array();    /**     * String name of parent element HTML will be going into.     * @type string     */    public $info_parent = 'div';    /**     * Definition for parent element, allows parent element to be a     * tag that's not allowed inside the HTML fragment.     * @type HTMLPurifier_ElementDef     */    public $info_parent_def;    /**     * String name of element used to wrap inline elements in block context.     * @type string     * @note This is rarely used except for BLOCKQUOTEs in strict mode     */    public $info_block_wrapper = 'p';    /**     * Associative array of deprecated tag name to HTMLPurifier_TagTransform.     * @type array     */    public $info_tag_transform = array();    /**     * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.     * @type HTMLPurifier_AttrTransform[]     */    public $info_attr_transform_pre = array();    /**     * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.     * @type HTMLPurifier_AttrTransform[]     */    public $info_attr_transform_post = array();    /**     * Nested lookup array of content set name (Block, Inline) to     * element name to whether or not it belongs in that content set.     * @type array     */    public $info_content_sets = array();    /**     * Indexed list of HTMLPurifier_Injector to be used.     * @type HTMLPurifier_Injector[]     */    public $info_injector = array();    /**     * Doctype object     * @type HTMLPurifier_Doctype     */    public $doctype;    // RAW CUSTOMIZATION STUFF --------------------------------------------    /**     * Adds a custom attribute to a pre-existing element     * @note This is strictly convenience, and does not have a corresponding     *       method in HTMLPurifier_HTMLModule     * @param string $element_name Element name to add attribute to     * @param string $attr_name Name of attribute     * @param mixed $def Attribute definition, can be string or object, see     *             HTMLPurifier_AttrTypes for details     */    public function addAttribute($element_name, $attr_name, $def)    {        $module = $this->getAnonymousModule();        if (!isset($module->info[$element_name])) {            $element = $module->addBlankElement($element_name);        } else {            $element = $module->info[$element_name];        }        $element->attr[$attr_name] = $def;    }    /**     * Adds a custom element to your HTML definition     * @see HTMLPurifier_HTMLModule::addElement() for detailed     *       parameter and return value descriptions.     */    public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array())    {        $module = $this->getAnonymousModule();        // assume that if the user is calling this, the element        // is safe. This may not be a good idea        $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes);        return $element;    }    /**     * Adds a blank element to your HTML definition, for overriding     * existing behavior     * @param string $element_name     * @return HTMLPurifier_ElementDef     * @see HTMLPurifier_HTMLModule::addBlankElement() for detailed     *       parameter and return value descriptions.     */    public function addBlankElement($element_name)    {        $module  = $this->getAnonymousModule();        $element = $module->addBlankElement($element_name);        return $element;    }    /**     * Retrieves a reference to the anonymous module, so you can     * bust out advanced features without having to make your own     * module.     * @return HTMLPurifier_HTMLModule     */    public function getAnonymousModule()    {        if (!$this->_anonModule) {            $this->_anonModule = new HTMLPurifier_HTMLModule();            $this->_anonModule->name = 'Anonymous';        }        return $this->_anonModule;    }    private $_anonModule = null;    // PUBLIC BUT INTERNAL VARIABLES --------------------------------------    /**     * @type string     */    public $type = 'HTML';    /**     * @type HTMLPurifier_HTMLModuleManager     */    public $manager;    /**     * Performs low-cost, preliminary initialization.     */    public function __construct()    {        $this->manager = new HTMLPurifier_HTMLModuleManager();    }    /**     * @param HTMLPurifier_Config $config     */    protected function doSetup($config)    {        $this->processModules($config);        $this->setupConfigStuff($config);        unset($this->manager);        // cleanup some of the element definitions        foreach ($this->info as $k => $v) {            unset($this->info[$k]->content_model);            unset($this->info[$k]->content_model_type);        }    }    /**     * Extract out the information from the manager     * @param HTMLPurifier_Config $config     */    protected function processModules($config)    {        if ($this->_anonModule) {            // for user specific changes            // this is late-loaded so we don't have to deal with PHP4            // reference wonky-ness            $this->manager->addModule($this->_anonModule);            unset($this->_anonModule);        }        $this->manager->setup($config);        $this->doctype = $this->manager->doctype;        foreach ($this->manager->modules as $module) {            foreach ($module->info_tag_transform as $k => $v) {                if ($v === false) {                    unset($this->info_tag_transform[$k]);                } else {                    $this->info_tag_transform[$k] = $v;                }            }            foreach ($module->info_attr_transform_pre as $k => $v) {                if ($v === false) {                    unset($this->info_attr_transform_pre[$k]);                } else {                    $this->info_attr_transform_pre[$k] = $v;                }            }            foreach ($module->info_attr_transform_post as $k => $v) {                if ($v === false) {                    unset($this->info_attr_transform_post[$k]);                } else {                    $this->info_attr_transform_post[$k] = $v;                }            }            foreach ($module->info_injector as $k => $v) {                if ($v === false) {                    unset($this->info_injector[$k]);                } else {                    $this->info_injector[$k] = $v;                }            }        }        $this->info = $this->manager->getElements();        $this->info_content_sets = $this->manager->contentSets->lookup;    }    /**     * Sets up stuff based on config. We need a better way of doing this.     * @param HTMLPurifier_Config $config     */    protected function setupConfigStuff($config)    {        $block_wrapper = $config->get('HTML.BlockWrapper');        if (isset($this->info_content_sets['Block'][$block_wrapper])) {            $this->info_block_wrapper = $block_wrapper;        } else {            trigger_error(                'Cannot use non-block element as block wrapper',                E_USER_ERROR            );        }        $parent = $config->get('HTML.Parent');        $def = $this->manager->getElement($parent, true);        if ($def) {            $this->info_parent = $parent;            $this->info_parent_def = $def;        } else {            trigger_error(                'Cannot use unrecognized element as parent',                E_USER_ERROR            );            $this->info_parent_def = $this->manager->getElement($this->info_parent, true);        }        // support template text        $support = "(for information on implementing this, see the support forums) ";        // setup allowed elements -----------------------------------------        $allowed_elements = $config->get('HTML.AllowedElements');        $allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early        if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {            $allowed = $config->get('HTML.Allowed');            if (is_string($allowed)) {                list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);            }        }        if (is_array($allowed_elements)) {            foreach ($this->info as $name => $d) {                if (!isset($allowed_elements[$name])) {                    unset($this->info[$name]);                }                unset($allowed_elements[$name]);            }            // emit errors            foreach ($allowed_elements as $element => $d) {                $element = htmlspecialchars($element); // PHP doesn't escape errors, be careful!                trigger_error("Element '$element' is not supported $support", E_USER_WARNING);            }        }        // setup allowed attributes ---------------------------------------        $allowed_attributes_mutable = $allowed_attributes; // by copy!        if (is_array($allowed_attributes)) {            // This actually doesn't do anything, since we went away from            // global attributes. It's possible that userland code uses            // it, but HTMLModuleManager doesn't!            foreach ($this->info_global_attr as $attr => $x) {                $keys = array($attr, "*@$attr", "*.$attr");                $delete = true;                foreach ($keys as $key) {                    if ($delete && isset($allowed_attributes[$key])) {                        $delete = false;                    }                    if (isset($allowed_attributes_mutable[$key])) {                        unset($allowed_attributes_mutable[$key]);                    }                }                if ($delete) {                    unset($this->info_global_attr[$attr]);                }            }            foreach ($this->info as $tag => $info) {                foreach ($info->attr as $attr => $x) {                    $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr");                    $delete = true;                    foreach ($keys as $key) {                        if ($delete && isset($allowed_attributes[$key])) {                            $delete = false;                        }                        if (isset($allowed_attributes_mutable[$key])) {                            unset($allowed_attributes_mutable[$key]);                        }                    }                    if ($delete) {                        if ($this->info[$tag]->attr[$attr]->required) {                            trigger_error(                                "Required attribute '$attr' in element '$tag' " .                                "was not allowed, which means '$tag' will not be allowed either",                                E_USER_WARNING                            );                        }                        unset($this->info[$tag]->attr[$attr]);                    }                }            }            // emit errors            foreach ($allowed_attributes_mutable as $elattr => $d) {                $bits = preg_split('/[.@]/', $elattr, 2);                $c = count($bits);                switch ($c) {                    case 2:                        if ($bits[0] !== '*') {                            $element = htmlspecialchars($bits[0]);                            $attribute = htmlspecialchars($bits[1]);                            if (!isset($this->info[$element])) {                                trigger_error(                                    "Cannot allow attribute '$attribute' if element " .                                    "'$element' is not allowed/supported $support"                                );                            } else {                                trigger_error(                                    "Attribute '$attribute' in element '$element' not supported $support",                                    E_USER_WARNING                                );                            }                            break;                        }                        // otherwise fall through                    case 1:                        $attribute = htmlspecialchars($bits[0]);                        trigger_error(                            "Global attribute '$attribute' is not ".                            "supported in any elements $support",                            E_USER_WARNING                        );                        break;                }            }        }        // setup forbidden elements ---------------------------------------        $forbidden_elements   = $config->get('HTML.ForbiddenElements');        $forbidden_attributes = $config->get('HTML.ForbiddenAttributes');        foreach ($this->info as $tag => $info) {            if (isset($forbidden_elements[$tag])) {                unset($this->info[$tag]);                continue;            }            foreach ($info->attr as $attr => $x) {                if (isset($forbidden_attributes["$tag@$attr"]) ||                    isset($forbidden_attributes["*@$attr"]) ||                    isset($forbidden_attributes[$attr])                ) {                    unset($this->info[$tag]->attr[$attr]);                    continue;                } elseif (isset($forbidden_attributes["$tag.$attr"])) { // this segment might get removed eventually                    // $tag.$attr are not user supplied, so no worries!                    trigger_error(                        "Error with $tag.$attr: tag.attr syntax not supported for " .                        "HTML.ForbiddenAttributes; use tag@attr instead",                        E_USER_WARNING                    );                }            }        }        foreach ($forbidden_attributes as $key => $v) {            if (strlen($key) < 2) {                continue;            }            if ($key[0] != '*') {                continue;            }            if ($key[1] == '.') {                trigger_error(                    "Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead",                    E_USER_WARNING                );            }        }        // setup injectors -----------------------------------------------------        foreach ($this->info_injector as $i => $injector) {            if ($injector->checkNeeded($config) !== false) {                // remove injector that does not have it's required                // elements/attributes present, and is thus not needed.                unset($this->info_injector[$i]);            }        }    }    /**     * Parses a TinyMCE-flavored Allowed Elements and Attributes list into     * separate lists for processing. Format is element[attr1|attr2],element2...     * @warning Although it's largely drawn from TinyMCE's implementation,     *      it is different, and you'll probably have to modify your lists     * @param array $list String list to parse     * @return array     * @todo Give this its own class, probably static interface     */    public function parseTinyMCEAllowedList($list)    {        $list = str_replace(array(' ', "\t"), '', $list);        $elements = array();        $attributes = array();        $chunks = preg_split('/(,|[\n\r]+)/', $list);        foreach ($chunks as $chunk) {            if (empty($chunk)) {                continue;            }            // remove TinyMCE element control characters            if (!strpos($chunk, '[')) {                $element = $chunk;                $attr = false;            } else {                list($element, $attr) = explode('[', $chunk);            }            if ($element !== '*') {                $elements[$element] = true;            }            if (!$attr) {                continue;            }            $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]            $attr = explode('|', $attr);            foreach ($attr as $key) {                $attributes["$element.$key"] = true;            }        }        return array($elements, $attributes);    }}// vim: et sw=4 sts=4
 |