| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071 | <?php/** * Parses a URI into the components and fragment identifier as specified * by RFC 3986. */class HTMLPurifier_URIParser{    /**     * Instance of HTMLPurifier_PercentEncoder to do normalization with.     */    protected $percentEncoder;    public function __construct()    {        $this->percentEncoder = new HTMLPurifier_PercentEncoder();    }    /**     * Parses a URI.     * @param $uri string URI to parse     * @return HTMLPurifier_URI representation of URI. This representation has     *         not been validated yet and may not conform to RFC.     */    public function parse($uri)    {        $uri = $this->percentEncoder->normalize($uri);        // Regexp is as per Appendix B.        // Note that ["<>] are an addition to the RFC's recommended        // characters, because they represent external delimeters.        $r_URI = '!'.            '(([a-zA-Z0-9\.\+\-]+):)?'. // 2. Scheme            '(//([^/?#"<>]*))?'. // 4. Authority            '([^?#"<>]*)'.       // 5. Path            '(\?([^#"<>]*))?'.   // 7. Query            '(#([^"<>]*))?'.     // 8. Fragment            '!';        $matches = array();        $result = preg_match($r_URI, $uri, $matches);        if (!$result) return false; // *really* invalid URI        // seperate out parts        $scheme     = !empty($matches[1]) ? $matches[2] : null;        $authority  = !empty($matches[3]) ? $matches[4] : null;        $path       = $matches[5]; // always present, can be empty        $query      = !empty($matches[6]) ? $matches[7] : null;        $fragment   = !empty($matches[8]) ? $matches[9] : null;        // further parse authority        if ($authority !== null) {            $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";            $matches = array();            preg_match($r_authority, $authority, $matches);            $userinfo   = !empty($matches[1]) ? $matches[2] : null;            $host       = !empty($matches[3]) ? $matches[3] : '';            $port       = !empty($matches[4]) ? (int) $matches[5] : null;        } else {            $port = $host = $userinfo = null;        }        return new HTMLPurifier_URI(            $scheme, $userinfo, $host, $port, $path, $query, $fragment);    }}// vim: et sw=4 sts=4
 |