Uri.php 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733
  1. <?php
  2. declare(strict_types=1);
  3. namespace GuzzleHttp\Psr7;
  4. use GuzzleHttp\Psr7\Exception\MalformedUriException;
  5. use Psr\Http\Message\UriInterface;
  6. /**
  7. * PSR-7 URI implementation.
  8. *
  9. * @author Michael Dowling
  10. * @author Tobias Schultze
  11. * @author Matthew Weier O'Phinney
  12. */
  13. class Uri implements UriInterface
  14. {
  15. /**
  16. * Absolute http and https URIs require a host per RFC 7230 Section 2.7
  17. * but in generic URIs the host can be empty. So for http(s) URIs
  18. * we apply this default host when no host is given yet to form a
  19. * valid URI.
  20. */
  21. private const HTTP_DEFAULT_HOST = 'localhost';
  22. private const DEFAULT_PORTS = [
  23. 'http' => 80,
  24. 'https' => 443,
  25. 'ftp' => 21,
  26. 'gopher' => 70,
  27. 'nntp' => 119,
  28. 'news' => 119,
  29. 'telnet' => 23,
  30. 'tn3270' => 23,
  31. 'imap' => 143,
  32. 'pop' => 110,
  33. 'ldap' => 389,
  34. ];
  35. /**
  36. * Unreserved characters for use in a regex.
  37. *
  38. * @link https://tools.ietf.org/html/rfc3986#section-2.3
  39. */
  40. private const CHAR_UNRESERVED = 'a-zA-Z0-9_\-\.~';
  41. /**
  42. * Sub-delims for use in a regex.
  43. *
  44. * @link https://tools.ietf.org/html/rfc3986#section-2.2
  45. */
  46. private const CHAR_SUB_DELIMS = '!\$&\'\(\)\*\+,;=';
  47. private const QUERY_SEPARATORS_REPLACEMENT = ['=' => '%3D', '&' => '%26'];
  48. /** @var string Uri scheme. */
  49. private $scheme = '';
  50. /** @var string Uri user info. */
  51. private $userInfo = '';
  52. /** @var string Uri host. */
  53. private $host = '';
  54. /** @var int|null Uri port. */
  55. private $port;
  56. /** @var string Uri path. */
  57. private $path = '';
  58. /** @var string Uri query string. */
  59. private $query = '';
  60. /** @var string Uri fragment. */
  61. private $fragment = '';
  62. /** @var string|null String representation */
  63. private $composedComponents;
  64. public function __construct(string $uri = '')
  65. {
  66. if ($uri !== '') {
  67. $parts = self::parse($uri);
  68. if ($parts === false) {
  69. throw new MalformedUriException("Unable to parse URI: $uri");
  70. }
  71. $this->applyParts($parts);
  72. }
  73. }
  74. /**
  75. * UTF-8 aware \parse_url() replacement.
  76. *
  77. * The internal function produces broken output for non ASCII domain names
  78. * (IDN) when used with locales other than "C".
  79. *
  80. * On the other hand, cURL understands IDN correctly only when UTF-8 locale
  81. * is configured ("C.UTF-8", "en_US.UTF-8", etc.).
  82. *
  83. * @see https://bugs.php.net/bug.php?id=52923
  84. * @see https://www.php.net/manual/en/function.parse-url.php#114817
  85. * @see https://curl.haxx.se/libcurl/c/CURLOPT_URL.html#ENCODING
  86. *
  87. * @return array|false
  88. */
  89. private static function parse(string $url)
  90. {
  91. // If IPv6
  92. $prefix = '';
  93. if (preg_match('%^(.*://\[[0-9:a-f]+\])(.*?)$%', $url, $matches)) {
  94. /** @var array{0:string, 1:string, 2:string} $matches */
  95. $prefix = $matches[1];
  96. $url = $matches[2];
  97. }
  98. /** @var string */
  99. $encodedUrl = preg_replace_callback(
  100. '%[^:/@?&=#]+%usD',
  101. static function ($matches) {
  102. return urlencode($matches[0]);
  103. },
  104. $url
  105. );
  106. $result = parse_url($prefix . $encodedUrl);
  107. if ($result === false) {
  108. return false;
  109. }
  110. return array_map('urldecode', $result);
  111. }
  112. public function __toString(): string
  113. {
  114. if ($this->composedComponents === null) {
  115. $this->composedComponents = self::composeComponents(
  116. $this->scheme,
  117. $this->getAuthority(),
  118. $this->path,
  119. $this->query,
  120. $this->fragment
  121. );
  122. }
  123. return $this->composedComponents;
  124. }
  125. /**
  126. * Composes a URI reference string from its various components.
  127. *
  128. * Usually this method does not need to be called manually but instead is used indirectly via
  129. * `Psr\Http\Message\UriInterface::__toString`.
  130. *
  131. * PSR-7 UriInterface treats an empty component the same as a missing component as
  132. * getQuery(), getFragment() etc. always return a string. This explains the slight
  133. * difference to RFC 3986 Section 5.3.
  134. *
  135. * Another adjustment is that the authority separator is added even when the authority is missing/empty
  136. * for the "file" scheme. This is because PHP stream functions like `file_get_contents` only work with
  137. * `file:///myfile` but not with `file:/myfile` although they are equivalent according to RFC 3986. But
  138. * `file:///` is the more common syntax for the file scheme anyway (Chrome for example redirects to
  139. * that format).
  140. *
  141. * @link https://tools.ietf.org/html/rfc3986#section-5.3
  142. */
  143. public static function composeComponents(?string $scheme, ?string $authority, string $path, ?string $query, ?string $fragment): string
  144. {
  145. $uri = '';
  146. // weak type checks to also accept null until we can add scalar type hints
  147. if ($scheme != '') {
  148. $uri .= $scheme . ':';
  149. }
  150. if ($authority != ''|| $scheme === 'file') {
  151. $uri .= '//' . $authority;
  152. }
  153. $uri .= $path;
  154. if ($query != '') {
  155. $uri .= '?' . $query;
  156. }
  157. if ($fragment != '') {
  158. $uri .= '#' . $fragment;
  159. }
  160. return $uri;
  161. }
  162. /**
  163. * Whether the URI has the default port of the current scheme.
  164. *
  165. * `Psr\Http\Message\UriInterface::getPort` may return null or the standard port. This method can be used
  166. * independently of the implementation.
  167. */
  168. public static function isDefaultPort(UriInterface $uri): bool
  169. {
  170. return $uri->getPort() === null
  171. || (isset(self::DEFAULT_PORTS[$uri->getScheme()]) && $uri->getPort() === self::DEFAULT_PORTS[$uri->getScheme()]);
  172. }
  173. /**
  174. * Whether the URI is absolute, i.e. it has a scheme.
  175. *
  176. * An instance of UriInterface can either be an absolute URI or a relative reference. This method returns true
  177. * if it is the former. An absolute URI has a scheme. A relative reference is used to express a URI relative
  178. * to another URI, the base URI. Relative references can be divided into several forms:
  179. * - network-path references, e.g. '//example.com/path'
  180. * - absolute-path references, e.g. '/path'
  181. * - relative-path references, e.g. 'subpath'
  182. *
  183. * @see Uri::isNetworkPathReference
  184. * @see Uri::isAbsolutePathReference
  185. * @see Uri::isRelativePathReference
  186. * @link https://tools.ietf.org/html/rfc3986#section-4
  187. */
  188. public static function isAbsolute(UriInterface $uri): bool
  189. {
  190. return $uri->getScheme() !== '';
  191. }
  192. /**
  193. * Whether the URI is a network-path reference.
  194. *
  195. * A relative reference that begins with two slash characters is termed an network-path reference.
  196. *
  197. * @link https://tools.ietf.org/html/rfc3986#section-4.2
  198. */
  199. public static function isNetworkPathReference(UriInterface $uri): bool
  200. {
  201. return $uri->getScheme() === '' && $uri->getAuthority() !== '';
  202. }
  203. /**
  204. * Whether the URI is a absolute-path reference.
  205. *
  206. * A relative reference that begins with a single slash character is termed an absolute-path reference.
  207. *
  208. * @link https://tools.ietf.org/html/rfc3986#section-4.2
  209. */
  210. public static function isAbsolutePathReference(UriInterface $uri): bool
  211. {
  212. return $uri->getScheme() === ''
  213. && $uri->getAuthority() === ''
  214. && isset($uri->getPath()[0])
  215. && $uri->getPath()[0] === '/';
  216. }
  217. /**
  218. * Whether the URI is a relative-path reference.
  219. *
  220. * A relative reference that does not begin with a slash character is termed a relative-path reference.
  221. *
  222. * @link https://tools.ietf.org/html/rfc3986#section-4.2
  223. */
  224. public static function isRelativePathReference(UriInterface $uri): bool
  225. {
  226. return $uri->getScheme() === ''
  227. && $uri->getAuthority() === ''
  228. && (!isset($uri->getPath()[0]) || $uri->getPath()[0] !== '/');
  229. }
  230. /**
  231. * Whether the URI is a same-document reference.
  232. *
  233. * A same-document reference refers to a URI that is, aside from its fragment
  234. * component, identical to the base URI. When no base URI is given, only an empty
  235. * URI reference (apart from its fragment) is considered a same-document reference.
  236. *
  237. * @param UriInterface $uri The URI to check
  238. * @param UriInterface|null $base An optional base URI to compare against
  239. *
  240. * @link https://tools.ietf.org/html/rfc3986#section-4.4
  241. */
  242. public static function isSameDocumentReference(UriInterface $uri, UriInterface $base = null): bool
  243. {
  244. if ($base !== null) {
  245. $uri = UriResolver::resolve($base, $uri);
  246. return ($uri->getScheme() === $base->getScheme())
  247. && ($uri->getAuthority() === $base->getAuthority())
  248. && ($uri->getPath() === $base->getPath())
  249. && ($uri->getQuery() === $base->getQuery());
  250. }
  251. return $uri->getScheme() === '' && $uri->getAuthority() === '' && $uri->getPath() === '' && $uri->getQuery() === '';
  252. }
  253. /**
  254. * Creates a new URI with a specific query string value removed.
  255. *
  256. * Any existing query string values that exactly match the provided key are
  257. * removed.
  258. *
  259. * @param UriInterface $uri URI to use as a base.
  260. * @param string $key Query string key to remove.
  261. */
  262. public static function withoutQueryValue(UriInterface $uri, string $key): UriInterface
  263. {
  264. $result = self::getFilteredQueryString($uri, [$key]);
  265. return $uri->withQuery(implode('&', $result));
  266. }
  267. /**
  268. * Creates a new URI with a specific query string value.
  269. *
  270. * Any existing query string values that exactly match the provided key are
  271. * removed and replaced with the given key value pair.
  272. *
  273. * A value of null will set the query string key without a value, e.g. "key"
  274. * instead of "key=value".
  275. *
  276. * @param UriInterface $uri URI to use as a base.
  277. * @param string $key Key to set.
  278. * @param string|null $value Value to set
  279. */
  280. public static function withQueryValue(UriInterface $uri, string $key, ?string $value): UriInterface
  281. {
  282. $result = self::getFilteredQueryString($uri, [$key]);
  283. $result[] = self::generateQueryString($key, $value);
  284. return $uri->withQuery(implode('&', $result));
  285. }
  286. /**
  287. * Creates a new URI with multiple specific query string values.
  288. *
  289. * It has the same behavior as withQueryValue() but for an associative array of key => value.
  290. *
  291. * @param UriInterface $uri URI to use as a base.
  292. * @param array<string, string|null> $keyValueArray Associative array of key and values
  293. */
  294. public static function withQueryValues(UriInterface $uri, array $keyValueArray): UriInterface
  295. {
  296. $result = self::getFilteredQueryString($uri, array_keys($keyValueArray));
  297. foreach ($keyValueArray as $key => $value) {
  298. $result[] = self::generateQueryString((string) $key, $value !== null ? (string) $value : null);
  299. }
  300. return $uri->withQuery(implode('&', $result));
  301. }
  302. /**
  303. * Creates a URI from a hash of `parse_url` components.
  304. *
  305. * @link http://php.net/manual/en/function.parse-url.php
  306. *
  307. * @throws MalformedUriException If the components do not form a valid URI.
  308. */
  309. public static function fromParts(array $parts): UriInterface
  310. {
  311. $uri = new self();
  312. $uri->applyParts($parts);
  313. $uri->validateState();
  314. return $uri;
  315. }
  316. public function getScheme(): string
  317. {
  318. return $this->scheme;
  319. }
  320. public function getAuthority(): string
  321. {
  322. $authority = $this->host;
  323. if ($this->userInfo !== '') {
  324. $authority = $this->userInfo . '@' . $authority;
  325. }
  326. if ($this->port !== null) {
  327. $authority .= ':' . $this->port;
  328. }
  329. return $authority;
  330. }
  331. public function getUserInfo(): string
  332. {
  333. return $this->userInfo;
  334. }
  335. public function getHost(): string
  336. {
  337. return $this->host;
  338. }
  339. public function getPort(): ?int
  340. {
  341. return $this->port;
  342. }
  343. public function getPath(): string
  344. {
  345. return $this->path;
  346. }
  347. public function getQuery(): string
  348. {
  349. return $this->query;
  350. }
  351. public function getFragment(): string
  352. {
  353. return $this->fragment;
  354. }
  355. public function withScheme($scheme): UriInterface
  356. {
  357. $scheme = $this->filterScheme($scheme);
  358. if ($this->scheme === $scheme) {
  359. return $this;
  360. }
  361. $new = clone $this;
  362. $new->scheme = $scheme;
  363. $new->composedComponents = null;
  364. $new->removeDefaultPort();
  365. $new->validateState();
  366. return $new;
  367. }
  368. public function withUserInfo($user, $password = null): UriInterface
  369. {
  370. $info = $this->filterUserInfoComponent($user);
  371. if ($password !== null) {
  372. $info .= ':' . $this->filterUserInfoComponent($password);
  373. }
  374. if ($this->userInfo === $info) {
  375. return $this;
  376. }
  377. $new = clone $this;
  378. $new->userInfo = $info;
  379. $new->composedComponents = null;
  380. $new->validateState();
  381. return $new;
  382. }
  383. public function withHost($host): UriInterface
  384. {
  385. $host = $this->filterHost($host);
  386. if ($this->host === $host) {
  387. return $this;
  388. }
  389. $new = clone $this;
  390. $new->host = $host;
  391. $new->composedComponents = null;
  392. $new->validateState();
  393. return $new;
  394. }
  395. public function withPort($port): UriInterface
  396. {
  397. $port = $this->filterPort($port);
  398. if ($this->port === $port) {
  399. return $this;
  400. }
  401. $new = clone $this;
  402. $new->port = $port;
  403. $new->composedComponents = null;
  404. $new->removeDefaultPort();
  405. $new->validateState();
  406. return $new;
  407. }
  408. public function withPath($path): UriInterface
  409. {
  410. $path = $this->filterPath($path);
  411. if ($this->path === $path) {
  412. return $this;
  413. }
  414. $new = clone $this;
  415. $new->path = $path;
  416. $new->composedComponents = null;
  417. $new->validateState();
  418. return $new;
  419. }
  420. public function withQuery($query): UriInterface
  421. {
  422. $query = $this->filterQueryAndFragment($query);
  423. if ($this->query === $query) {
  424. return $this;
  425. }
  426. $new = clone $this;
  427. $new->query = $query;
  428. $new->composedComponents = null;
  429. return $new;
  430. }
  431. public function withFragment($fragment): UriInterface
  432. {
  433. $fragment = $this->filterQueryAndFragment($fragment);
  434. if ($this->fragment === $fragment) {
  435. return $this;
  436. }
  437. $new = clone $this;
  438. $new->fragment = $fragment;
  439. $new->composedComponents = null;
  440. return $new;
  441. }
  442. /**
  443. * Apply parse_url parts to a URI.
  444. *
  445. * @param array $parts Array of parse_url parts to apply.
  446. */
  447. private function applyParts(array $parts): void
  448. {
  449. $this->scheme = isset($parts['scheme'])
  450. ? $this->filterScheme($parts['scheme'])
  451. : '';
  452. $this->userInfo = isset($parts['user'])
  453. ? $this->filterUserInfoComponent($parts['user'])
  454. : '';
  455. $this->host = isset($parts['host'])
  456. ? $this->filterHost($parts['host'])
  457. : '';
  458. $this->port = isset($parts['port'])
  459. ? $this->filterPort($parts['port'])
  460. : null;
  461. $this->path = isset($parts['path'])
  462. ? $this->filterPath($parts['path'])
  463. : '';
  464. $this->query = isset($parts['query'])
  465. ? $this->filterQueryAndFragment($parts['query'])
  466. : '';
  467. $this->fragment = isset($parts['fragment'])
  468. ? $this->filterQueryAndFragment($parts['fragment'])
  469. : '';
  470. if (isset($parts['pass'])) {
  471. $this->userInfo .= ':' . $this->filterUserInfoComponent($parts['pass']);
  472. }
  473. $this->removeDefaultPort();
  474. }
  475. /**
  476. * @param mixed $scheme
  477. *
  478. * @throws \InvalidArgumentException If the scheme is invalid.
  479. */
  480. private function filterScheme($scheme): string
  481. {
  482. if (!is_string($scheme)) {
  483. throw new \InvalidArgumentException('Scheme must be a string');
  484. }
  485. return \strtr($scheme, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz');
  486. }
  487. /**
  488. * @param mixed $component
  489. *
  490. * @throws \InvalidArgumentException If the user info is invalid.
  491. */
  492. private function filterUserInfoComponent($component): string
  493. {
  494. if (!is_string($component)) {
  495. throw new \InvalidArgumentException('User info must be a string');
  496. }
  497. return preg_replace_callback(
  498. '/(?:[^%' . self::CHAR_UNRESERVED . self::CHAR_SUB_DELIMS . ']+|%(?![A-Fa-f0-9]{2}))/',
  499. [$this, 'rawurlencodeMatchZero'],
  500. $component
  501. );
  502. }
  503. /**
  504. * @param mixed $host
  505. *
  506. * @throws \InvalidArgumentException If the host is invalid.
  507. */
  508. private function filterHost($host): string
  509. {
  510. if (!is_string($host)) {
  511. throw new \InvalidArgumentException('Host must be a string');
  512. }
  513. return \strtr($host, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'abcdefghijklmnopqrstuvwxyz');
  514. }
  515. /**
  516. * @param mixed $port
  517. *
  518. * @throws \InvalidArgumentException If the port is invalid.
  519. */
  520. private function filterPort($port): ?int
  521. {
  522. if ($port === null) {
  523. return null;
  524. }
  525. $port = (int) $port;
  526. if (0 > $port || 0xffff < $port) {
  527. throw new \InvalidArgumentException(
  528. sprintf('Invalid port: %d. Must be between 0 and 65535', $port)
  529. );
  530. }
  531. return $port;
  532. }
  533. /**
  534. * @param string[] $keys
  535. *
  536. * @return string[]
  537. */
  538. private static function getFilteredQueryString(UriInterface $uri, array $keys): array
  539. {
  540. $current = $uri->getQuery();
  541. if ($current === '') {
  542. return [];
  543. }
  544. $decodedKeys = array_map('rawurldecode', $keys);
  545. return array_filter(explode('&', $current), function ($part) use ($decodedKeys) {
  546. return !in_array(rawurldecode(explode('=', $part)[0]), $decodedKeys, true);
  547. });
  548. }
  549. private static function generateQueryString(string $key, ?string $value): string
  550. {
  551. // Query string separators ("=", "&") within the key or value need to be encoded
  552. // (while preventing double-encoding) before setting the query string. All other
  553. // chars that need percent-encoding will be encoded by withQuery().
  554. $queryString = strtr($key, self::QUERY_SEPARATORS_REPLACEMENT);
  555. if ($value !== null) {
  556. $queryString .= '=' . strtr($value, self::QUERY_SEPARATORS_REPLACEMENT);
  557. }
  558. return $queryString;
  559. }
  560. private function removeDefaultPort(): void
  561. {
  562. if ($this->port !== null && self::isDefaultPort($this)) {
  563. $this->port = null;
  564. }
  565. }
  566. /**
  567. * Filters the path of a URI
  568. *
  569. * @param mixed $path
  570. *
  571. * @throws \InvalidArgumentException If the path is invalid.
  572. */
  573. private function filterPath($path): string
  574. {
  575. if (!is_string($path)) {
  576. throw new \InvalidArgumentException('Path must be a string');
  577. }
  578. return preg_replace_callback(
  579. '/(?:[^' . self::CHAR_UNRESERVED . self::CHAR_SUB_DELIMS . '%:@\/]++|%(?![A-Fa-f0-9]{2}))/',
  580. [$this, 'rawurlencodeMatchZero'],
  581. $path
  582. );
  583. }
  584. /**
  585. * Filters the query string or fragment of a URI.
  586. *
  587. * @param mixed $str
  588. *
  589. * @throws \InvalidArgumentException If the query or fragment is invalid.
  590. */
  591. private function filterQueryAndFragment($str): string
  592. {
  593. if (!is_string($str)) {
  594. throw new \InvalidArgumentException('Query and fragment must be a string');
  595. }
  596. return preg_replace_callback(
  597. '/(?:[^' . self::CHAR_UNRESERVED . self::CHAR_SUB_DELIMS . '%:@\/\?]++|%(?![A-Fa-f0-9]{2}))/',
  598. [$this, 'rawurlencodeMatchZero'],
  599. $str
  600. );
  601. }
  602. private function rawurlencodeMatchZero(array $match): string
  603. {
  604. return rawurlencode($match[0]);
  605. }
  606. private function validateState(): void
  607. {
  608. if ($this->host === '' && ($this->scheme === 'http' || $this->scheme === 'https')) {
  609. $this->host = self::HTTP_DEFAULT_HOST;
  610. }
  611. if ($this->getAuthority() === '') {
  612. if (0 === strpos($this->path, '//')) {
  613. throw new MalformedUriException('The path of a URI without an authority must not start with two slashes "//"');
  614. }
  615. if ($this->scheme === '' && false !== strpos(explode('/', $this->path, 2)[0], ':')) {
  616. throw new MalformedUriException('A relative URI must not have a path beginning with a segment containing a colon');
  617. }
  618. } elseif (isset($this->path[0]) && $this->path[0] !== '/') {
  619. throw new MalformedUriException('The path of a URI with an authority must start with a slash "/" or be empty');
  620. }
  621. }
  622. }