| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071 | 
							- <?php
 
- /**
 
-  * Parses a URI into the components and fragment identifier as specified
 
-  * by RFC 3986.
 
-  */
 
- class HTMLPurifier_URIParser
 
- {
 
-     /**
 
-      * Instance of HTMLPurifier_PercentEncoder to do normalization with.
 
-      */
 
-     protected $percentEncoder;
 
-     public function __construct()
 
-     {
 
-         $this->percentEncoder = new HTMLPurifier_PercentEncoder();
 
-     }
 
-     /**
 
-      * Parses a URI.
 
-      * @param $uri string URI to parse
 
-      * @return HTMLPurifier_URI representation of URI. This representation has
 
-      *         not been validated yet and may not conform to RFC.
 
-      */
 
-     public function parse($uri)
 
-     {
 
-         $uri = $this->percentEncoder->normalize($uri);
 
-         // Regexp is as per Appendix B.
 
-         // Note that ["<>] are an addition to the RFC's recommended
 
-         // characters, because they represent external delimeters.
 
-         $r_URI = '!'.
 
-             '(([a-zA-Z0-9\.\+\-]+):)?'. // 2. Scheme
 
-             '(//([^/?#"<>]*))?'. // 4. Authority
 
-             '([^?#"<>]*)'.       // 5. Path
 
-             '(\?([^#"<>]*))?'.   // 7. Query
 
-             '(#([^"<>]*))?'.     // 8. Fragment
 
-             '!';
 
-         $matches = array();
 
-         $result = preg_match($r_URI, $uri, $matches);
 
-         if (!$result) return false; // *really* invalid URI
 
-         // seperate out parts
 
-         $scheme     = !empty($matches[1]) ? $matches[2] : null;
 
-         $authority  = !empty($matches[3]) ? $matches[4] : null;
 
-         $path       = $matches[5]; // always present, can be empty
 
-         $query      = !empty($matches[6]) ? $matches[7] : null;
 
-         $fragment   = !empty($matches[8]) ? $matches[9] : null;
 
-         // further parse authority
 
-         if ($authority !== null) {
 
-             $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
 
-             $matches = array();
 
-             preg_match($r_authority, $authority, $matches);
 
-             $userinfo   = !empty($matches[1]) ? $matches[2] : null;
 
-             $host       = !empty($matches[3]) ? $matches[3] : '';
 
-             $port       = !empty($matches[4]) ? (int) $matches[5] : null;
 
-         } else {
 
-             $port = $host = $userinfo = null;
 
-         }
 
-         return new HTMLPurifier_URI(
 
-             $scheme, $userinfo, $host, $port, $path, $query, $fragment);
 
-     }
 
- }
 
- // vim: et sw=4 sts=4
 
 
  |