| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111 | <?php/** * Class that handles operations involving percent-encoding in URIs. * * @warning *      Be careful when reusing instances of PercentEncoder. The object *      you use for normalize() SHOULD NOT be used for encode(), or *      vice-versa. */class HTMLPurifier_PercentEncoder{    /**     * Reserved characters to preserve when using encode().     * @type array     */    protected $preserve = array();    /**     * String of characters that should be preserved while using encode().     * @param bool $preserve     */    public function __construct($preserve = false)    {        // unreserved letters, ought to const-ify        for ($i = 48; $i <= 57; $i++) { // digits            $this->preserve[$i] = true;        }        for ($i = 65; $i <= 90; $i++) { // upper-case            $this->preserve[$i] = true;        }        for ($i = 97; $i <= 122; $i++) { // lower-case            $this->preserve[$i] = true;        }        $this->preserve[45] = true; // Dash         -        $this->preserve[46] = true; // Period       .        $this->preserve[95] = true; // Underscore   _        $this->preserve[126]= true; // Tilde        ~        // extra letters not to escape        if ($preserve !== false) {            for ($i = 0, $c = strlen($preserve); $i < $c; $i++) {                $this->preserve[ord($preserve[$i])] = true;            }        }    }    /**     * Our replacement for urlencode, it encodes all non-reserved characters,     * as well as any extra characters that were instructed to be preserved.     * @note     *      Assumes that the string has already been normalized, making any     *      and all percent escape sequences valid. Percents will not be     *      re-escaped, regardless of their status in $preserve     * @param string $string String to be encoded     * @return string Encoded string.     */    public function encode($string)    {        $ret = '';        for ($i = 0, $c = strlen($string); $i < $c; $i++) {            if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])])) {                $ret .= '%' . sprintf('%02X', $int);            } else {                $ret .= $string[$i];            }        }        return $ret;    }    /**     * Fix up percent-encoding by decoding unreserved characters and normalizing.     * @warning This function is affected by $preserve, even though the     *          usual desired behavior is for this not to preserve those     *          characters. Be careful when reusing instances of PercentEncoder!     * @param string $string String to normalize     * @return string     */    public function normalize($string)    {        if ($string == '') {            return '';        }        $parts = explode('%', $string);        $ret = array_shift($parts);        foreach ($parts as $part) {            $length = strlen($part);            if ($length < 2) {                $ret .= '%25' . $part;                continue;            }            $encoding = substr($part, 0, 2);            $text     = substr($part, 2);            if (!ctype_xdigit($encoding)) {                $ret .= '%25' . $part;                continue;            }            $int = hexdec($encoding);            if (isset($this->preserve[$int])) {                $ret .= chr($int) . $text;                continue;            }            $encoding = strtoupper($encoding);            $ret .= '%' . $encoding . $text;        }        return $ret;    }}// vim: et sw=4 sts=4
 |