run:R W Run
7.09 KB
2026-03-11 16:18:52
R W Run
2.71 KB
2026-03-11 16:18:52
R W Run
16.3 KB
2026-03-11 16:18:52
R W Run
24.79 KB
2026-03-11 16:18:52
R W Run
21.95 KB
2026-03-11 16:18:52
R W Run
11.07 KB
2026-03-11 16:18:52
R W Run
208.44 KB
2026-03-11 16:18:52
R W Run
1.07 KB
2026-03-11 16:18:52
R W Run
1.6 KB
2026-03-11 16:18:52
R W Run
147.75 KB
2026-03-11 16:18:52
R W Run
1.38 KB
2026-03-11 16:18:52
R W Run
3.33 KB
2026-03-11 16:18:52
R W Run
3.52 KB
2026-03-11 16:18:52
R W Run
78.28 KB
2026-03-11 16:18:52
R W Run
error_log
πŸ“„class-wp-html-doctype-info.php
1<?php
2/**
3 * HTML API: WP_HTML_Doctype_Info class
4 *
5 * @package WordPress
6 * @subpackage HTML-API
7 * @since 6.7.0
8 */
9
10/**
11 * Core class used by the HTML API to represent a DOCTYPE declaration.
12 *
13 * This class parses DOCTYPE tokens for the full parser in the HTML Processor.
14 * Most code interacting with HTML won't need to parse DOCTYPE declarations;
15 * the HTML Processor is one exception. Consult the HTML Processor for proper
16 * parsing of an HTML document.
17 *
18 * A DOCTYPE declaration may indicate its document compatibility mode, which impacts
19 * the structure of the following HTML as well as the behavior of CSS class selectors.
20 * There are three possible modes:
21 *
22 * - "no-quirks" and "limited-quirks" modes (also called "standards mode").
23 * - "quirks" mode.
24 *
25 * These modes mostly determine whether CSS class name selectors match values in the
26 * HTML `class` attribute in an ASCII-case-insensitive way (quirks mode), or whether
27 * they match only when byte-for-byte identical (no-quirks mode).
28 *
29 * All HTML documents should start with the standard HTML5 DOCTYPE: `<!DOCTYPE html>`.
30 *
31 * > DOCTYPEs are required for legacy reasons. When omitted, browsers tend to use a different
32 * > rendering mode that is incompatible with some specifications. Including the DOCTYPE in a
33 * > document ensures that the browser makes a best-effort attempt at following the
34 * > relevant specifications.
35 *
36 * @see https://html.spec.whatwg.org/#the-doctype
37 *
38 * DOCTYPE declarations comprise four properties: a name, public identifier, system identifier,
39 * and an indication of which document compatibility mode they would imply if an HTML parser
40 * hadn't already determined it from other information.
41 *
42 * @see https://html.spec.whatwg.org/#the-initial-insertion-mode
43 *
44 * Historically, the DOCTYPE declaration was used in SGML documents to instruct a parser how
45 * to interpret the various tags and entities within a document. Its role in HTML diverged
46 * from how it was used in SGML and no meaning should be back-read into HTML based on how it
47 * is used in SGML, XML, or XHTML documents.
48 *
49 * @see https://www.iso.org/standard/16387.html
50 *
51 * @since 6.7.0
52 *
53 * @access private
54 *
55 * @see WP_HTML_Processor
56 */
57class WP_HTML_Doctype_Info {
58 /**
59 * Name of the DOCTYPE: should be "html" for HTML documents.
60 *
61 * This value should be considered "read only" and not modified.
62 *
63 * Historically the DOCTYPE name indicates name of the document's root element.
64 *
65 * <!DOCTYPE html>
66 * ╰──┴── name is "html".
67 *
68 * @see https://html.spec.whatwg.org/#tokenization
69 *
70 * @since 6.7.0
71 *
72 * @var string|null
73 */
74 public $name = null;
75
76 /**
77 * Public identifier of the DOCTYPE.
78 *
79 * This value should be considered "read only" and not modified.
80 *
81 * The public identifier is optional and should not appear in HTML documents.
82 * A `null` value indicates that no public identifier was present in the DOCTYPE.
83 *
84 * Historically the presence of the public identifier indicated that a document
85 * was meant to be shared between computer systems and the value indicated to a
86 * knowledgeable parser how to find the relevant document type definition (DTD).
87 *
88 * <!DOCTYPE html PUBLIC "public id goes here in quotes">
89 * β”‚ β”‚ ╰─── public identifier ─────╯
90 * ╰──┴── name is "html".
91 *
92 * @see https://html.spec.whatwg.org/#tokenization
93 *
94 * @since 6.7.0
95 *
96 * @var string|null
97 */
98 public $public_identifier = null;
99
100 /**
101 * System identifier of the DOCTYPE.
102 *
103 * This value should be considered "read only" and not modified.
104 *
105 * The system identifier is optional and should not appear in HTML documents.
106 * A `null` value indicates that no system identifier was present in the DOCTYPE.
107 *
108 * Historically the system identifier specified where a relevant document type
109 * declaration for the given document is stored and may be retrieved.
110 *
111 * <!DOCTYPE html SYSTEM "system id goes here in quotes">
112 * β”‚ β”‚ ╰──── system identifier ────╯
113 * ╰──┴── name is "html".
114 *
115 * If a public identifier were provided it would indicate to a knowledgeable
116 * parser how to interpret the system identifier.
117 *
118 * <!DOCTYPE html PUBLIC "public id goes here in quotes" "system id goes here in quotes">
119 * β”‚ β”‚ ╰─── public identifier ─────╯ ╰──── system identifier ────╯
120 * ╰──┴── name is "html".
121 *
122 * @see https://html.spec.whatwg.org/#tokenization
123 *
124 * @since 6.7.0
125 *
126 * @var string|null
127 */
128 public $system_identifier = null;
129
130 /**
131 * Which document compatibility mode this DOCTYPE declaration indicates.
132 *
133 * This value should be considered "read only" and not modified.
134 *
135 * When an HTML parser has not already set the document compatibility mode,
136 * (e.g. "quirks" or "no-quirks" mode), it will be inferred from the properties
137 * of the appropriate DOCTYPE declaration, if one exists. The DOCTYPE can
138 * indicate one of three possible document compatibility modes:
139 *
140 * - "no-quirks" and "limited-quirks" modes (also called "standards" mode).
141 * - "quirks" mode (also called `CSS1Compat` mode).
142 *
143 * An appropriate DOCTYPE is one encountered in the "initial" insertion mode,
144 * before the HTML element has been opened and before finding any other
145 * DOCTYPE declaration tokens.
146 *
147 * @see https://html.spec.whatwg.org/#the-initial-insertion-mode
148 *
149 * @since 6.7.0
150 *
151 * @var string One of "no-quirks", "limited-quirks", or "quirks".
152 */
153 public $indicated_compatibility_mode;
154
155 /**
156 * Constructor.
157 *
158 * This class should not be instantiated directly.
159 * Use the static {@see self::from_doctype_token} method instead.
160 *
161 * The arguments to this constructor correspond to the "DOCTYPE token"
162 * as defined in the HTML specification.
163 *
164 * > DOCTYPE tokens have a name, a public identifier, a system identifier,
165 * > and a force-quirks flag. When a DOCTYPE token is created, its name, public identifier,
166 * > and system identifier must be marked as missing (which is a distinct state from the
167 * > empty string), and the force-quirks flag must be set to off (its other state is on).
168 *
169 * @see https://html.spec.whatwg.org/multipage/parsing.html#tokenization
170 *
171 * @since 6.7.0
172 *
173 * @param string|null $name Name of the DOCTYPE.
174 * @param string|null $public_identifier Public identifier of the DOCTYPE.
175 * @param string|null $system_identifier System identifier of the DOCTYPE.
176 * @param bool $force_quirks_flag Whether the force-quirks flag is set for the token.
177 */
178 private function __construct(
179 ?string $name,
180 ?string $public_identifier,
181 ?string $system_identifier,
182 bool $force_quirks_flag
183 ) {
184 $this->name = $name;
185 $this->public_identifier = $public_identifier;
186 $this->system_identifier = $system_identifier;
187
188 /*
189 * > If the DOCTYPE token matches one of the conditions in the following list,
190 * > then set the Document to quirks mode:
191 */
192
193 /*
194 * > The force-quirks flag is set to on.
195 */
196 if ( $force_quirks_flag ) {
197 $this->indicated_compatibility_mode = 'quirks';
198 return;
199 }
200
201 /*
202 * Normative documents will contain the literal `<!DOCTYPE html>` with no
203 * public or system identifiers; short-circuit to avoid extra parsing.
204 */
205 if ( 'html' === $name && null === $public_identifier && null === $system_identifier ) {
206 $this->indicated_compatibility_mode = 'no-quirks';
207 return;
208 }
209
210 /*
211 * > The name is not "html".
212 *
213 * The tokenizer must report the name in lower case even if provided in
214 * the document in upper case; thus no conversion is required here.
215 */
216 if ( 'html' !== $name ) {
217 $this->indicated_compatibility_mode = 'quirks';
218 return;
219 }
220
221 /*
222 * Set up some variables to handle the rest of the conditions.
223 *
224 * > set...the public identifier...to...the empty string if the public identifier was missing.
225 * > set...the system identifier...to...the empty string if the system identifier was missing.
226 * >
227 * > The system identifier and public identifier strings must be compared...
228 * > in an ASCII case-insensitive manner.
229 * >
230 * > A system identifier whose value is the empty string is not considered missing
231 * > for the purposes of the conditions above.
232 */
233 $system_identifier_is_missing = null === $system_identifier;
234 $public_identifier = null === $public_identifier ? '' : strtolower( $public_identifier );
235 $system_identifier = null === $system_identifier ? '' : strtolower( $system_identifier );
236
237 /*
238 * > The public identifier is set to…
239 */
240 if (
241 '-//w3o//dtd w3 html strict 3.0//en//' === $public_identifier ||
242 '-/w3c/dtd html 4.0 transitional/en' === $public_identifier ||
243 'html' === $public_identifier
244 ) {
245 $this->indicated_compatibility_mode = 'quirks';
246 return;
247 }
248
249 /*
250 * > The system identifier is set to…
251 */
252 if ( 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd' === $system_identifier ) {
253 $this->indicated_compatibility_mode = 'quirks';
254 return;
255 }
256
257 /*
258 * All of the following conditions depend on matching the public identifier.
259 * If the public identifier is empty, none of the following conditions will match.
260 */
261 if ( '' === $public_identifier ) {
262 $this->indicated_compatibility_mode = 'no-quirks';
263 return;
264 }
265
266 /*
267 * > The public identifier starts with…
268 *
269 * @todo Optimize this matching. It shouldn't be a large overall performance issue,
270 * however, as only a single DOCTYPE declaration token should ever be parsed,
271 * and normative documents will have exited before reaching this condition.
272 */
273 if (
274 str_starts_with( $public_identifier, '+//silmaril//dtd html pro v0r11 19970101//' ) ||
275 str_starts_with( $public_identifier, '-//as//dtd html 3.0 aswedit + extensions//' ) ||
276 str_starts_with( $public_identifier, '-//advasoft ltd//dtd html 3.0 aswedit + extensions//' ) ||
277 str_starts_with( $public_identifier, '-//ietf//dtd html 2.0 level 1//' ) ||
278 str_starts_with( $public_identifier, '-//ietf//dtd html 2.0 level 2//' ) ||
279 str_starts_with( $public_identifier, '-//ietf//dtd html 2.0 strict level 1//' ) ||
280 str_starts_with( $public_identifier, '-//ietf//dtd html 2.0 strict level 2//' ) ||
281 str_starts_with( $public_identifier, '-//ietf//dtd html 2.0 strict//' ) ||
282 str_starts_with( $public_identifier, '-//ietf//dtd html 2.0//' ) ||
283 str_starts_with( $public_identifier, '-//ietf//dtd html 2.1e//' ) ||
284 str_starts_with( $public_identifier, '-//ietf//dtd html 3.0//' ) ||
285 str_starts_with( $public_identifier, '-//ietf//dtd html 3.2 final//' ) ||
286 str_starts_with( $public_identifier, '-//ietf//dtd html 3.2//' ) ||
287 str_starts_with( $public_identifier, '-//ietf//dtd html 3//' ) ||
288 str_starts_with( $public_identifier, '-//ietf//dtd html level 0//' ) ||
289 str_starts_with( $public_identifier, '-//ietf//dtd html level 1//' ) ||
290 str_starts_with( $public_identifier, '-//ietf//dtd html level 2//' ) ||
291 str_starts_with( $public_identifier, '-//ietf//dtd html level 3//' ) ||
292 str_starts_with( $public_identifier, '-//ietf//dtd html strict level 0//' ) ||
293 str_starts_with( $public_identifier, '-//ietf//dtd html strict level 1//' ) ||
294 str_starts_with( $public_identifier, '-//ietf//dtd html strict level 2//' ) ||
295 str_starts_with( $public_identifier, '-//ietf//dtd html strict level 3//' ) ||
296 str_starts_with( $public_identifier, '-//ietf//dtd html strict//' ) ||
297 str_starts_with( $public_identifier, '-//ietf//dtd html//' ) ||
298 str_starts_with( $public_identifier, '-//metrius//dtd metrius presentational//' ) ||
299 str_starts_with( $public_identifier, '-//microsoft//dtd internet explorer 2.0 html strict//' ) ||
300 str_starts_with( $public_identifier, '-//microsoft//dtd internet explorer 2.0 html//' ) ||
301 str_starts_with( $public_identifier, '-//microsoft//dtd internet explorer 2.0 tables//' ) ||
302 str_starts_with( $public_identifier, '-//microsoft//dtd internet explorer 3.0 html strict//' ) ||
303 str_starts_with( $public_identifier, '-//microsoft//dtd internet explorer 3.0 html//' ) ||
304 str_starts_with( $public_identifier, '-//microsoft//dtd internet explorer 3.0 tables//' ) ||
305 str_starts_with( $public_identifier, '-//netscape comm. corp.//dtd html//' ) ||
306 str_starts_with( $public_identifier, '-//netscape comm. corp.//dtd strict html//' ) ||
307 str_starts_with( $public_identifier, "-//o'reilly and associates//dtd html 2.0//" ) ||
308 str_starts_with( $public_identifier, "-//o'reilly and associates//dtd html extended 1.0//" ) ||
309 str_starts_with( $public_identifier, "-//o'reilly and associates//dtd html extended relaxed 1.0//" ) ||
310 str_starts_with( $public_identifier, '-//sq//dtd html 2.0 hotmetal + extensions//' ) ||
311 str_starts_with( $public_identifier, '-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//' ) ||
312 str_starts_with( $public_identifier, '-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//' ) ||
313 str_starts_with( $public_identifier, '-//spyglass//dtd html 2.0 extended//' ) ||
314 str_starts_with( $public_identifier, '-//sun microsystems corp.//dtd hotjava html//' ) ||
315 str_starts_with( $public_identifier, '-//sun microsystems corp.//dtd hotjava strict html//' ) ||
316 str_starts_with( $public_identifier, '-//w3c//dtd html 3 1995-03-24//' ) ||
317 str_starts_with( $public_identifier, '-//w3c//dtd html 3.2 draft//' ) ||
318 str_starts_with( $public_identifier, '-//w3c//dtd html 3.2 final//' ) ||
319 str_starts_with( $public_identifier, '-//w3c//dtd html 3.2//' ) ||
320 str_starts_with( $public_identifier, '-//w3c//dtd html 3.2s draft//' ) ||
321 str_starts_with( $public_identifier, '-//w3c//dtd html 4.0 frameset//' ) ||
322 str_starts_with( $public_identifier, '-//w3c//dtd html 4.0 transitional//' ) ||
323 str_starts_with( $public_identifier, '-//w3c//dtd html experimental 19960712//' ) ||
324 str_starts_with( $public_identifier, '-//w3c//dtd html experimental 970421//' ) ||
325 str_starts_with( $public_identifier, '-//w3c//dtd w3 html//' ) ||
326 str_starts_with( $public_identifier, '-//w3o//dtd w3 html 3.0//' ) ||
327 str_starts_with( $public_identifier, '-//webtechs//dtd mozilla html 2.0//' ) ||
328 str_starts_with( $public_identifier, '-//webtechs//dtd mozilla html//' )
329 ) {
330 $this->indicated_compatibility_mode = 'quirks';
331 return;
332 }
333
334 /*
335 * > The system identifier is missing and the public identifier starts with…
336 */
337 if (
338 $system_identifier_is_missing && (
339 str_starts_with( $public_identifier, '-//w3c//dtd html 4.01 frameset//' ) ||
340 str_starts_with( $public_identifier, '-//w3c//dtd html 4.01 transitional//' )
341 )
342 ) {
343 $this->indicated_compatibility_mode = 'quirks';
344 return;
345 }
346
347 /*
348 * > Otherwise, if the DOCTYPE token matches one of the conditions in
349 * > the following list, then set the Document to limited-quirks mode.
350 */
351
352 /*
353 * > The public identifier starts with…
354 */
355 if (
356 str_starts_with( $public_identifier, '-//w3c//dtd xhtml 1.0 frameset//' ) ||
357 str_starts_with( $public_identifier, '-//w3c//dtd xhtml 1.0 transitional//' )
358 ) {
359 $this->indicated_compatibility_mode = 'limited-quirks';
360 return;
361 }
362
363 /*
364 * > The system identifier is not missing and the public identifier starts with…
365 */
366 if (
367 ! $system_identifier_is_missing && (
368 str_starts_with( $public_identifier, '-//w3c//dtd html 4.01 frameset//' ) ||
369 str_starts_with( $public_identifier, '-//w3c//dtd html 4.01 transitional//' )
370 )
371 ) {
372 $this->indicated_compatibility_mode = 'limited-quirks';
373 return;
374 }
375
376 $this->indicated_compatibility_mode = 'no-quirks';
377 }
378
379 /**
380 * Creates a WP_HTML_Doctype_Info instance by parsing a raw DOCTYPE declaration token.
381 *
382 * Use this method to parse a DOCTYPE declaration token and get access to its properties
383 * via the returned WP_HTML_Doctype_Info class instance. The provided input must parse
384 * properly as a DOCTYPE declaration, though it must not represent a valid DOCTYPE.
385 *
386 * Example:
387 *
388 * // Normative HTML DOCTYPE declaration.
389 * $doctype = WP_HTML_Doctype_Info::from_doctype_token( '<!DOCTYPE html>' );
390 * 'no-quirks' === $doctype->indicated_compatibility_mode;
391 *
392 * // A nonsensical DOCTYPE is still valid, and will indicate "quirks" mode.
393 * $doctype = WP_HTML_Doctype_Info::from_doctype_token( '<!doctypeJSON SILLY "nonsense\'>' );
394 * 'quirks' === $doctype->indicated_compatibility_mode;
395 *
396 * // Textual quirks present in raw HTML are handled appropriately.
397 * $doctype = WP_HTML_Doctype_Info::from_doctype_token( "<!DOCTYPE\nhtml\n>" );
398 * 'no-quirks' === $doctype->indicated_compatibility_mode;
399 *
400 * // Anything other than a proper DOCTYPE declaration token fails to parse.
401 * null === WP_HTML_Doctype_Info::from_doctype_token( ' <!DOCTYPE>' );
402 * null === WP_HTML_Doctype_Info::from_doctype_token( '<!DOCTYPE ><p>' );
403 * null === WP_HTML_Doctype_Info::from_doctype_token( '<!TYPEDOC>' );
404 * null === WP_HTML_Doctype_Info::from_doctype_token( 'html' );
405 * null === WP_HTML_Doctype_Info::from_doctype_token( '<?xml version="1.0" encoding="UTF-8" ?>' );
406 *
407 * @since 6.7.0
408 *
409 * @param string $doctype_html The complete raw DOCTYPE HTML string, e.g. `<!DOCTYPE html>`.
410 *
411 * @return WP_HTML_Doctype_Info|null A WP_HTML_Doctype_Info instance will be returned if the
412 * provided DOCTYPE HTML is a valid DOCTYPE. Otherwise, null.
413 */
414 public static function from_doctype_token( string $doctype_html ): ?self {
415 $doctype_name = null;
416 $doctype_public_id = null;
417 $doctype_system_id = null;
418
419 $end = strlen( $doctype_html ) - 1;
420
421 /*
422 * This parser combines the rules for parsing DOCTYPE tokens found in the HTML
423 * specification for the DOCTYPE related tokenizer states.
424 *
425 * @see https://html.spec.whatwg.org/#doctype-state
426 */
427
428 /*
429 * - Valid DOCTYPE HTML token must be at least `<!DOCTYPE>` assuming a complete token not
430 * ending in end-of-file.
431 * - It must start with an ASCII case-insensitive match for `<!DOCTYPE`.
432 * - The only occurrence of `>` must be the final byte in the HTML string.
433 */
434 if (
435 $end < 9 ||
436 0 !== substr_compare( $doctype_html, '<!DOCTYPE', 0, 9, true )
437 ) {
438 return null;
439 }
440
441 $at = 9;
442 // Is there one and only one `>`?
443 if ( '>' !== $doctype_html[ $end ] || ( strcspn( $doctype_html, '>', $at ) + $at ) < $end ) {
444 return null;
445 }
446
447 /*
448 * Perform newline normalization and ensure the $end value is correct after normalization.
449 *
450 * @see https://html.spec.whatwg.org/#preprocessing-the-input-stream
451 * @see https://infra.spec.whatwg.org/#normalize-newlines
452 */
453 $doctype_html = str_replace( "\r\n", "\n", $doctype_html );
454 $doctype_html = str_replace( "\r", "\n", $doctype_html );
455 $end = strlen( $doctype_html ) - 1;
456
457 /*
458 * In this state, the doctype token has been found and its "content" optionally including the
459 * name, public identifier, and system identifier is between the current position and the end.
460 *
461 * "<!DOCTYPE...declaration...>"
462 * ╰─ $at ╰─ $end
463 *
464 * It's also possible that the declaration part is empty.
465 *
466 * ╭─ $at
467 * "<!DOCTYPE>"
468 * ╰─ $end
469 *
470 * Rules for parsing ">" which terminates the DOCTYPE do not need to be considered as they
471 * have been handled above in the condition that the provided DOCTYPE HTML must contain
472 * exactly one ">" character in the final position.
473 */
474
475 /*
476 *
477 * Parsing effectively begins in "Before DOCTYPE name state". Ignore whitespace and
478 * proceed to the next state.
479 *
480 * @see https://html.spec.whatwg.org/#before-doctype-name-state
481 */
482 $at += strspn( $doctype_html, " \t\n\f\r", $at );
483
484 if ( $at >= $end ) {
485 return new self( $doctype_name, $doctype_public_id, $doctype_system_id, true );
486 }
487
488 $name_length = strcspn( $doctype_html, " \t\n\f\r", $at, $end - $at );
489 $doctype_name = str_replace( "\0", "\u{FFFD}", strtolower( substr( $doctype_html, $at, $name_length ) ) );
490
491 $at += $name_length;
492 $at += strspn( $doctype_html, " \t\n\f\r", $at, $end - $at );
493 if ( $at >= $end ) {
494 return new self( $doctype_name, $doctype_public_id, $doctype_system_id, false );
495 }
496
497 /*
498 * "After DOCTYPE name state"
499 *
500 * Find a case-insensitive match for "PUBLIC" or "SYSTEM" at this point.
501 * Otherwise, set force-quirks and enter bogus DOCTYPE state (skip the rest of the doctype).
502 *
503 * @see https://html.spec.whatwg.org/#after-doctype-name-state
504 */
505 if ( $at + 6 >= $end ) {
506 return new self( $doctype_name, $doctype_public_id, $doctype_system_id, true );
507 }
508
509 /*
510 * > If the six characters starting from the current input character are an ASCII
511 * > case-insensitive match for the word "PUBLIC", then consume those characters
512 * > and switch to the after DOCTYPE public keyword state.
513 */
514 if ( 0 === substr_compare( $doctype_html, 'PUBLIC', $at, 6, true ) ) {
515 $at += 6;
516 $at += strspn( $doctype_html, " \t\n\f\r", $at, $end - $at );
517 if ( $at >= $end ) {
518 return new self( $doctype_name, $doctype_public_id, $doctype_system_id, true );
519 }
520 goto parse_doctype_public_identifier;
521 }
522
523 /*
524 * > Otherwise, if the six characters starting from the current input character are an ASCII
525 * > case-insensitive match for the word "SYSTEM", then consume those characters and switch
526 * > to the after DOCTYPE system keyword state.
527 */
528 if ( 0 === substr_compare( $doctype_html, 'SYSTEM', $at, 6, true ) ) {
529 $at += 6;
530 $at += strspn( $doctype_html, " \t\n\f\r", $at, $end - $at );
531 if ( $at >= $end ) {
532 return new self( $doctype_name, $doctype_public_id, $doctype_system_id, true );
533 }
534 goto parse_doctype_system_identifier;
535 }
536
537 /*
538 * > Otherwise, this is an invalid-character-sequence-after-doctype-name parse error.
539 * > Set the current DOCTYPE token's force-quirks flag to on. Reconsume in the bogus
540 * > DOCTYPE state.
541 */
542 return new self( $doctype_name, $doctype_public_id, $doctype_system_id, true );
543
544 parse_doctype_public_identifier:
545 /*
546 * The parser should enter "DOCTYPE public identifier (double-quoted) state" or
547 * "DOCTYPE public identifier (single-quoted) state" by finding one of the valid quotes.
548 * Anything else forces quirks mode and ignores the rest of the contents.
549 *
550 * @see https://html.spec.whatwg.org/#doctype-public-identifier-(double-quoted)-state
551 * @see https://html.spec.whatwg.org/#doctype-public-identifier-(single-quoted)-state
552 */
553 $closer_quote = $doctype_html[ $at ];
554
555 /*
556 * > This is a missing-quote-before-doctype-public-identifier parse error. Set the
557 * > current DOCTYPE token's force-quirks flag to on. Reconsume in the bogus DOCTYPE state.
558 */
559 if ( '"' !== $closer_quote && "'" !== $closer_quote ) {
560 return new self( $doctype_name, $doctype_public_id, $doctype_system_id, true );
561 }
562
563 ++$at;
564
565 $identifier_length = strcspn( $doctype_html, $closer_quote, $at, $end - $at );
566 $doctype_public_id = str_replace( "\0", "\u{FFFD}", substr( $doctype_html, $at, $identifier_length ) );
567
568 $at += $identifier_length;
569 if ( $at >= $end || $closer_quote !== $doctype_html[ $at ] ) {
570 return new self( $doctype_name, $doctype_public_id, $doctype_system_id, true );
571 }
572
573 ++$at;
574
575 /*
576 * "Between DOCTYPE public and system identifiers state"
577 *
578 * Advance through whitespace between public and system identifiers.
579 *
580 * @see https://html.spec.whatwg.org/#between-doctype-public-and-system-identifiers-state
581 */
582 $at += strspn( $doctype_html, " \t\n\f\r", $at, $end - $at );
583 if ( $at >= $end ) {
584 return new self( $doctype_name, $doctype_public_id, $doctype_system_id, false );
585 }
586
587 parse_doctype_system_identifier:
588 /*
589 * The parser should enter "DOCTYPE system identifier (double-quoted) state" or
590 * "DOCTYPE system identifier (single-quoted) state" by finding one of the valid quotes.
591 * Anything else forces quirks mode and ignores the rest of the contents.
592 *
593 * @see https://html.spec.whatwg.org/#doctype-system-identifier-(double-quoted)-state
594 * @see https://html.spec.whatwg.org/#doctype-system-identifier-(single-quoted)-state
595 */
596 $closer_quote = $doctype_html[ $at ];
597
598 /*
599 * > This is a missing-quote-before-doctype-system-identifier parse error. Set the
600 * > current DOCTYPE token's force-quirks flag to on. Reconsume in the bogus DOCTYPE state.
601 */
602 if ( '"' !== $closer_quote && "'" !== $closer_quote ) {
603 return new self( $doctype_name, $doctype_public_id, $doctype_system_id, true );
604 }
605
606 ++$at;
607
608 $identifier_length = strcspn( $doctype_html, $closer_quote, $at, $end - $at );
609 $doctype_system_id = str_replace( "\0", "\u{FFFD}", substr( $doctype_html, $at, $identifier_length ) );
610
611 $at += $identifier_length;
612 if ( $at >= $end || $closer_quote !== $doctype_html[ $at ] ) {
613 return new self( $doctype_name, $doctype_public_id, $doctype_system_id, true );
614 }
615
616 return new self( $doctype_name, $doctype_public_id, $doctype_system_id, false );
617 }
618}
619