1<?php
2
3// SPDX-FileCopyrightText: 2004-2023 Ryan Parman, Sam Sneddon, Ryan McCue
4// SPDX-License-Identifier: BSD-3-Clause
5
6declare(strict_types=1);
7
8namespace SimplePie;
9
10use SimplePie\HTTP\Response;
11
12/**
13 * Used for fetching remote files and reading local files
14 *
15 * Supports HTTP 1.0 via cURL or fsockopen, with spotty HTTP 1.1 support
16 *
17 * This class can be overloaded with {@see \SimplePie\SimplePie::set_file_class()}
18 *
19 * @todo Move to properly supporting RFC2616 (HTTP/1.1)
20 */
21class File implements Response
22{
23 /**
24 * @var string The final URL after following all redirects
25 * @deprecated Use `get_final_requested_uri()` method.
26 */
27 public $url;
28
29 /**
30 * @var ?string User agent to use in requests
31 * @deprecated Set the user agent in constructor.
32 */
33 public $useragent;
34
35 /** @var bool */
36 public $success = true;
37
38 /** @var array<string, non-empty-array<string>> Canonical representation of headers */
39 private $parsed_headers = [];
40 /** @var array<string, string> Last known value of $headers property (used to detect external modification) */
41 private $last_headers = [];
42 /**
43 * @var array<string, string> Headers as string for BC
44 * @deprecated Use `get_headers()` method.
45 */
46 public $headers = [];
47
48 /**
49 * @var ?string Body of the HTTP response
50 * @deprecated Use `get_body_content()` method.
51 */
52 public $body;
53
54 /**
55 * @var int Status code of the HTTP response
56 * @deprecated Use `get_status_code()` method.
57 */
58 public $status_code = 0;
59
60 /** @var non-negative-int Number of redirect that were already performed during this request sequence. */
61 public $redirects = 0;
62
63 /** @var ?string */
64 public $error;
65
66 /**
67 * @var int-mask-of<SimplePie::FILE_SOURCE_*> Bit mask representing the method used to fetch the file and whether it is a local file or remote file obtained over HTTP.
68 * @deprecated Backend is implementation detail which you should not care about; to see if the file was retrieved over HTTP, check if `get_final_requested_uri()` with `Misc::is_remote_uri()`.
69 */
70 public $method = \SimplePie\SimplePie::FILE_SOURCE_NONE;
71
72 /**
73 * @var string The permanent URL or the resource (first URL after the prefix of (only) permanent redirects)
74 * @deprecated Use `get_permanent_uri()` method.
75 */
76 public $permanent_url;
77 /** @var bool Whether the permanent URL is still writeable (prefix of permanent redirects has not ended) */
78 private $permanentUrlMutable = true;
79
80 /**
81 * @param string $url
82 * @param int $timeout
83 * @param int $redirects
84 * @param ?array<string, string> $headers
85 * @param ?string $useragent
86 * @param bool $force_fsockopen
87 * @param array<int, mixed> $curl_options
88 */
89 public function __construct(string $url, int $timeout = 10, int $redirects = 5, ?array $headers = null, ?string $useragent = null, bool $force_fsockopen = false, array $curl_options = [])
90 {
91 if (function_exists('idn_to_ascii')) {
92 $parsed = \SimplePie\Misc::parse_url($url);
93 if ($parsed['authority'] !== '' && !ctype_print($parsed['authority'])) {
94 $authority = (string) \idn_to_ascii($parsed['authority'], \IDNA_NONTRANSITIONAL_TO_ASCII, \INTL_IDNA_VARIANT_UTS46);
95 $url = \SimplePie\Misc::compress_parse_url($parsed['scheme'], $authority, $parsed['path'], $parsed['query'], null);
96 }
97 }
98 $this->url = $url;
99 if ($this->permanentUrlMutable) {
100 $this->permanent_url = $url;
101 }
102 $this->useragent = $useragent;
103 if (preg_match('/^http(s)?:\/\//i', $url)) {
104 if ($useragent === null) {
105 $useragent = (string) ini_get('user_agent');
106 $this->useragent = $useragent;
107 }
108 if (!is_array($headers)) {
109 $headers = [];
110 }
111 if (!$force_fsockopen && function_exists('curl_exec')) {
112 $this->method = \SimplePie\SimplePie::FILE_SOURCE_REMOTE | \SimplePie\SimplePie::FILE_SOURCE_CURL;
113 $fp = curl_init();
114 $headers2 = [];
115 foreach ($headers as $key => $value) {
116 $headers2[] = "$key: $value";
117 }
118 if (isset($curl_options[CURLOPT_HTTPHEADER])) {
119 if (is_array($curl_options[CURLOPT_HTTPHEADER])) {
120 $headers2 = array_merge($headers2, $curl_options[CURLOPT_HTTPHEADER]);
121 }
122 unset($curl_options[CURLOPT_HTTPHEADER]);
123 }
124 if (version_compare(\SimplePie\Misc::get_curl_version(), '7.10.5', '>=')) {
125 curl_setopt($fp, CURLOPT_ENCODING, '');
126 }
127 curl_setopt($fp, CURLOPT_URL, $url);
128 curl_setopt($fp, CURLOPT_HEADER, 1);
129 curl_setopt($fp, CURLOPT_RETURNTRANSFER, 1);
130 curl_setopt($fp, CURLOPT_FAILONERROR, 1);
131 curl_setopt($fp, CURLOPT_TIMEOUT, $timeout);
132 curl_setopt($fp, CURLOPT_CONNECTTIMEOUT, $timeout);
133 curl_setopt($fp, CURLOPT_REFERER, \SimplePie\Misc::url_remove_credentials($url));
134 curl_setopt($fp, CURLOPT_USERAGENT, $useragent);
135 curl_setopt($fp, CURLOPT_HTTPHEADER, $headers2);
136 foreach ($curl_options as $curl_param => $curl_value) {
137 curl_setopt($fp, $curl_param, $curl_value);
138 }
139
140 $responseHeaders = curl_exec($fp);
141 if (curl_errno($fp) === CURLE_WRITE_ERROR || curl_errno($fp) === CURLE_BAD_CONTENT_ENCODING) {
142 curl_setopt($fp, CURLOPT_ENCODING, 'none');
143 $responseHeaders = curl_exec($fp);
144 }
145 $this->status_code = curl_getinfo($fp, CURLINFO_HTTP_CODE);
146 if (curl_errno($fp)) {
147 $this->error = 'cURL error ' . curl_errno($fp) . ': ' . curl_error($fp);
148 $this->success = false;
149 } else {
150 // Use the updated url provided by curl_getinfo after any redirects.
151 if ($info = curl_getinfo($fp)) {
152 $this->url = $info['url'];
153 }
154 // For PHPStan: We already checked that error did not occur.
155 assert(is_array($info) && $info['redirect_count'] >= 0);
156 if (\PHP_VERSION_ID < 80000) {
157 curl_close($fp);
158 }
159 $responseHeaders = \SimplePie\HTTP\Parser::prepareHeaders((string) $responseHeaders, $info['redirect_count'] + 1);
160 $parser = new \SimplePie\HTTP\Parser($responseHeaders, true);
161 if ($parser->parse()) {
162 $this->set_headers($parser->headers);
163 $this->body = $parser->body;
164 $this->status_code = $parser->status_code;
165 if ((in_array($this->status_code, [300, 301, 302, 303, 307]) || $this->status_code > 307 && $this->status_code < 400) && ($locationHeader = $this->get_header_line('location')) !== '' && $this->redirects < $redirects) {
166 $this->redirects++;
167 $location = \SimplePie\Misc::absolutize_url($locationHeader, $url);
168 if ($location === false) {
169 $this->error = "Invalid redirect location, trying to base “{$locationHeader}” onto “{$url}”";
170 $this->success = false;
171 return;
172 }
173 $this->permanentUrlMutable = $this->permanentUrlMutable && ($this->status_code == 301 || $this->status_code == 308);
174 $this->__construct($location, $timeout, $redirects, $headers, $useragent, $force_fsockopen, $curl_options);
175 return;
176 }
177 }
178 }
179 } else {
180 $this->method = \SimplePie\SimplePie::FILE_SOURCE_REMOTE | \SimplePie\SimplePie::FILE_SOURCE_FSOCKOPEN;
181 if (($url_parts = parse_url($url)) === false) {
182 throw new \InvalidArgumentException('Malformed URL: ' . $url);
183 }
184 if (!isset($url_parts['host'])) {
185 throw new \InvalidArgumentException('Missing hostname: ' . $url);
186 }
187 $socket_host = $url_parts['host'];
188 if (isset($url_parts['scheme']) && strtolower($url_parts['scheme']) === 'https') {
189 $socket_host = 'ssl://' . $socket_host;
190 $url_parts['port'] = 443;
191 }
192 if (!isset($url_parts['port'])) {
193 $url_parts['port'] = 80;
194 }
195 $fp = @fsockopen($socket_host, $url_parts['port'], $errno, $errstr, $timeout);
196 if (!$fp) {
197 $this->error = 'fsockopen error: ' . $errstr;
198 $this->success = false;
199 } else {
200 stream_set_timeout($fp, $timeout);
201 if (isset($url_parts['path'])) {
202 if (isset($url_parts['query'])) {
203 $get = "$url_parts[path]?$url_parts[query]";
204 } else {
205 $get = $url_parts['path'];
206 }
207 } else {
208 $get = '/';
209 }
210 $out = "GET $get HTTP/1.1\r\n";
211 $out .= "Host: $url_parts[host]\r\n";
212 $out .= "User-Agent: $useragent\r\n";
213 if (extension_loaded('zlib')) {
214 $out .= "Accept-Encoding: x-gzip,gzip,deflate\r\n";
215 }
216
217 if (isset($url_parts['user']) && isset($url_parts['pass'])) {
218 $out .= "Authorization: Basic " . base64_encode("$url_parts[user]:$url_parts[pass]") . "\r\n";
219 }
220 foreach ($headers as $key => $value) {
221 $out .= "$key: $value\r\n";
222 }
223 $out .= "Connection: Close\r\n\r\n";
224 fwrite($fp, $out);
225
226 $info = stream_get_meta_data($fp);
227
228 $responseHeaders = '';
229 while (!$info['eof'] && !$info['timed_out']) {
230 $responseHeaders .= fread($fp, 1160);
231 $info = stream_get_meta_data($fp);
232 }
233 if (!$info['timed_out']) {
234 $parser = new \SimplePie\HTTP\Parser($responseHeaders, true);
235 if ($parser->parse()) {
236 $this->set_headers($parser->headers);
237 $this->body = $parser->body;
238 $this->status_code = $parser->status_code;
239 if ((in_array($this->status_code, [300, 301, 302, 303, 307]) || $this->status_code > 307 && $this->status_code < 400) && ($locationHeader = $this->get_header_line('location')) !== '' && $this->redirects < $redirects) {
240 $this->redirects++;
241 $location = \SimplePie\Misc::absolutize_url($locationHeader, $url);
242 $this->permanentUrlMutable = $this->permanentUrlMutable && ($this->status_code == 301 || $this->status_code == 308);
243 if ($location === false) {
244 $this->error = "Invalid redirect location, trying to base “{$locationHeader}” onto “{$url}”";
245 $this->success = false;
246 return;
247 }
248 $this->__construct($location, $timeout, $redirects, $headers, $useragent, $force_fsockopen, $curl_options);
249 return;
250 }
251 if (($contentEncodingHeader = $this->get_header_line('content-encoding')) !== '') {
252 // Hey, we act dumb elsewhere, so let's do that here too
253 switch (strtolower(trim($contentEncodingHeader, "\x09\x0A\x0D\x20"))) {
254 case 'gzip':
255 case 'x-gzip':
256 if (($decompressed = gzdecode($this->body)) === false) {
257 $this->error = 'Unable to decode HTTP "gzip" stream';
258 $this->success = false;
259 } else {
260 $this->body = $decompressed;
261 }
262 break;
263
264 case 'deflate':
265 if (($decompressed = gzinflate($this->body)) !== false) {
266 $this->body = $decompressed;
267 } elseif (($decompressed = gzuncompress($this->body)) !== false) {
268 $this->body = $decompressed;
269 } elseif (($decompressed = gzdecode($this->body)) !== false) {
270 $this->body = $decompressed;
271 } else {
272 $this->error = 'Unable to decode HTTP "deflate" stream';
273 $this->success = false;
274 }
275 break;
276
277 default:
278 $this->error = 'Unknown content coding';
279 $this->success = false;
280 }
281 }
282 }
283 } else {
284 $this->error = 'fsocket timed out';
285 $this->success = false;
286 }
287 fclose($fp);
288 }
289 }
290 } else {
291 $this->method = \SimplePie\SimplePie::FILE_SOURCE_LOCAL | \SimplePie\SimplePie::FILE_SOURCE_FILE_GET_CONTENTS;
292 if (empty($url) || !is_readable($url) || false === $filebody = file_get_contents($url)) {
293 $this->body = '';
294 $this->error = sprintf('file "%s" is not readable', $url);
295 $this->success = false;
296 } else {
297 $this->body = $filebody;
298 $this->status_code = 200;
299 }
300 }
301 if ($this->success) {
302 assert($this->body !== null); // For PHPStan
303 // Leading whitespace may cause XML parsing errors (XML declaration cannot be preceded by anything other than BOM) so we trim it.
304 // Note that unlike built-in `trim` function’s default settings, we do not trim `\x00` to avoid breaking characters in UTF-16 or UTF-32 encoded strings.
305 // We also only do that when the whitespace is followed by `<`, so that we do not break e.g. UTF-16LE encoded whitespace like `\n\x00` in half.
306 $this->body = preg_replace('/^[ \n\r\t\v]+</', '<', $this->body);
307 }
308 }
309
310 public function get_permanent_uri(): string
311 {
312 return (string) $this->permanent_url;
313 }
314
315 public function get_final_requested_uri(): string
316 {
317 return (string) $this->url;
318 }
319
320 public function get_status_code(): int
321 {
322 return (int) $this->status_code;
323 }
324
325 public function get_headers(): array
326 {
327 $this->maybe_update_headers();
328 return $this->parsed_headers;
329 }
330
331 public function has_header(string $name): bool
332 {
333 $this->maybe_update_headers();
334 return $this->get_header($name) !== [];
335 }
336
337 public function get_header(string $name): array
338 {
339 $this->maybe_update_headers();
340 return $this->parsed_headers[strtolower($name)] ?? [];
341 }
342
343 public function with_header(string $name, $value)
344 {
345 $this->maybe_update_headers();
346 $new = clone $this;
347
348 $newHeader = [
349 strtolower($name) => (array) $value,
350 ];
351 $new->set_headers($newHeader + $this->get_headers());
352
353 return $new;
354 }
355
356 public function get_header_line(string $name): string
357 {
358 $this->maybe_update_headers();
359 return implode(', ', $this->get_header($name));
360 }
361
362 public function get_body_content(): string
363 {
364 return (string) $this->body;
365 }
366
367 /**
368 * Check if the $headers property was changed and update the internal state accordingly.
369 */
370 private function maybe_update_headers(): void
371 {
372 if ($this->headers !== $this->last_headers) {
373 $this->parsed_headers = array_map(
374 function (string $header_line): array {
375 if (strpos($header_line, ',') === false) {
376 return [$header_line];
377 } else {
378 return array_map('trim', explode(',', $header_line));
379 }
380 },
381 $this->headers
382 );
383 }
384 $this->last_headers = $this->headers;
385 }
386
387 /**
388 * Sets headers internally.
389 *
390 * @param array<string, non-empty-array<string>> $headers
391 */
392 private function set_headers(array $headers): void
393 {
394 $this->parsed_headers = $headers;
395 $this->headers = self::flatten_headers($headers);
396 $this->last_headers = $this->headers;
397 }
398
399 /**
400 * Converts PSR-7 compatible headers into a legacy format.
401 *
402 * @param array<string, non-empty-array<string>> $headers
403 *
404 * @return array<string, string>
405 */
406 private function flatten_headers(array $headers): array
407 {
408 return array_map(function (array $values): string {
409 return implode(',', $values);
410 }, $headers);
411 }
412
413 /**
414 * Create a File instance from another Response
415 *
416 * For BC reasons in some places there MUST be a `File` instance
417 * instead of a `Response` implementation
418 *
419 * @see Locator::__construct()
420 * @internal
421 */
422 final public static function fromResponse(Response $response): self
423 {
424 $headers = [];
425
426 foreach ($response->get_headers() as $name => $header) {
427 $headers[$name] = implode(', ', $header);
428 }
429
430 /** @var File */
431 $file = (new \ReflectionClass(File::class))->newInstanceWithoutConstructor();
432
433 $file->url = $response->get_final_requested_uri();
434 $file->useragent = null;
435 $file->headers = $headers;
436 $file->body = $response->get_body_content();
437 $file->status_code = $response->get_status_code();
438 $file->permanent_url = $response->get_permanent_uri();
439
440 return $file;
441 }
442}
443
444class_alias('SimplePie\File', 'SimplePie_File');
445