1<?php
2/**
3 * HTML API: WP_HTML_Processor_State class
4 *
5 * @package WordPress
6 * @subpackage HTML-API
7 * @since 6.4.0
8 */
9
10/**
11 * Core class used by the HTML processor during HTML parsing
12 * for managing the internal parsing state.
13 *
14 * This class is designed for internal use by the HTML processor.
15 *
16 * @since 6.4.0
17 *
18 * @access private
19 *
20 * @see WP_HTML_Processor
21 */
22class WP_HTML_Processor_State {
23 /*
24 * Insertion mode constants.
25 *
26 * These constants exist and are named to make it easier to
27 * discover and recognize the supported insertion modes in
28 * the parser.
29 *
30 * Out of all the possible insertion modes, only those
31 * supported by the parser are listed here. As support
32 * is added to the parser for more modes, add them here
33 * following the same naming and value pattern.
34 *
35 * @see https://html.spec.whatwg.org/#the-insertion-mode
36 */
37
38 /**
39 * Initial insertion mode for full HTML parser.
40 *
41 * @since 6.4.0
42 *
43 * @see https://html.spec.whatwg.org/#the-initial-insertion-mode
44 * @see WP_HTML_Processor_State::$insertion_mode
45 *
46 * @var string
47 */
48 const INSERTION_MODE_INITIAL = 'insertion-mode-initial';
49
50 /**
51 * Before HTML insertion mode for full HTML parser.
52 *
53 * @since 6.7.0
54 *
55 * @see https://html.spec.whatwg.org/#the-before-html-insertion-mode
56 * @see WP_HTML_Processor_State::$insertion_mode
57 *
58 * @var string
59 */
60 const INSERTION_MODE_BEFORE_HTML = 'insertion-mode-before-html';
61
62 /**
63 * Before head insertion mode for full HTML parser.
64 *
65 * @since 6.7.0
66 *
67 * @see https://html.spec.whatwg.org/#parsing-main-beforehead
68 * @see WP_HTML_Processor_State::$insertion_mode
69 *
70 * @var string
71 */
72 const INSERTION_MODE_BEFORE_HEAD = 'insertion-mode-before-head';
73
74 /**
75 * In head insertion mode for full HTML parser.
76 *
77 * @since 6.7.0
78 *
79 * @see https://html.spec.whatwg.org/#parsing-main-inhead
80 * @see WP_HTML_Processor_State::$insertion_mode
81 *
82 * @var string
83 */
84 const INSERTION_MODE_IN_HEAD = 'insertion-mode-in-head';
85
86 /**
87 * In head noscript insertion mode for full HTML parser.
88 *
89 * @since 6.7.0
90 *
91 * @see https://html.spec.whatwg.org/#parsing-main-inheadnoscript
92 * @see WP_HTML_Processor_State::$insertion_mode
93 *
94 * @var string
95 */
96 const INSERTION_MODE_IN_HEAD_NOSCRIPT = 'insertion-mode-in-head-noscript';
97
98 /**
99 * After head insertion mode for full HTML parser.
100 *
101 * @since 6.7.0
102 *
103 * @see https://html.spec.whatwg.org/#parsing-main-afterhead
104 * @see WP_HTML_Processor_State::$insertion_mode
105 *
106 * @var string
107 */
108 const INSERTION_MODE_AFTER_HEAD = 'insertion-mode-after-head';
109
110 /**
111 * In body insertion mode for full HTML parser.
112 *
113 * @since 6.4.0
114 *
115 * @see https://html.spec.whatwg.org/#parsing-main-inbody
116 * @see WP_HTML_Processor_State::$insertion_mode
117 *
118 * @var string
119 */
120 const INSERTION_MODE_IN_BODY = 'insertion-mode-in-body';
121
122 /**
123 * In table insertion mode for full HTML parser.
124 *
125 * @since 6.7.0
126 *
127 * @see https://html.spec.whatwg.org/#parsing-main-intable
128 * @see WP_HTML_Processor_State::$insertion_mode
129 *
130 * @var string
131 */
132 const INSERTION_MODE_IN_TABLE = 'insertion-mode-in-table';
133
134 /**
135 * In table text insertion mode for full HTML parser.
136 *
137 * @since 6.7.0
138 *
139 * @see https://html.spec.whatwg.org/#parsing-main-intabletext
140 * @see WP_HTML_Processor_State::$insertion_mode
141 *
142 * @var string
143 */
144 const INSERTION_MODE_IN_TABLE_TEXT = 'insertion-mode-in-table-text';
145
146 /**
147 * In caption insertion mode for full HTML parser.
148 *
149 * @since 6.7.0
150 *
151 * @see https://html.spec.whatwg.org/#parsing-main-incaption
152 * @see WP_HTML_Processor_State::$insertion_mode
153 *
154 * @var string
155 */
156 const INSERTION_MODE_IN_CAPTION = 'insertion-mode-in-caption';
157
158 /**
159 * In column group insertion mode for full HTML parser.
160 *
161 * @since 6.7.0
162 *
163 * @see https://html.spec.whatwg.org/#parsing-main-incolumngroup
164 * @see WP_HTML_Processor_State::$insertion_mode
165 *
166 * @var string
167 */
168 const INSERTION_MODE_IN_COLUMN_GROUP = 'insertion-mode-in-column-group';
169
170 /**
171 * In table body insertion mode for full HTML parser.
172 *
173 * @since 6.7.0
174 *
175 * @see https://html.spec.whatwg.org/#parsing-main-intablebody
176 * @see WP_HTML_Processor_State::$insertion_mode
177 *
178 * @var string
179 */
180 const INSERTION_MODE_IN_TABLE_BODY = 'insertion-mode-in-table-body';
181
182 /**
183 * In row insertion mode for full HTML parser.
184 *
185 * @since 6.7.0
186 *
187 * @see https://html.spec.whatwg.org/#parsing-main-inrow
188 * @see WP_HTML_Processor_State::$insertion_mode
189 *
190 * @var string
191 */
192 const INSERTION_MODE_IN_ROW = 'insertion-mode-in-row';
193
194 /**
195 * In cell insertion mode for full HTML parser.
196 *
197 * @since 6.7.0
198 *
199 * @see https://html.spec.whatwg.org/#parsing-main-incell
200 * @see WP_HTML_Processor_State::$insertion_mode
201 *
202 * @var string
203 */
204 const INSERTION_MODE_IN_CELL = 'insertion-mode-in-cell';
205
206 /**
207 * In select insertion mode for full HTML parser.
208 *
209 * @since 6.7.0
210 *
211 * @see https://html.spec.whatwg.org/#parsing-main-inselect
212 * @see WP_HTML_Processor_State::$insertion_mode
213 *
214 * @var string
215 */
216 const INSERTION_MODE_IN_SELECT = 'insertion-mode-in-select';
217
218 /**
219 * In select in table insertion mode for full HTML parser.
220 *
221 * @since 6.7.0
222 *
223 * @see https://html.spec.whatwg.org/#parsing-main-inselectintable
224 * @see WP_HTML_Processor_State::$insertion_mode
225 *
226 * @var string
227 */
228 const INSERTION_MODE_IN_SELECT_IN_TABLE = 'insertion-mode-in-select-in-table';
229
230 /**
231 * In template insertion mode for full HTML parser.
232 *
233 * @since 6.7.0
234 *
235 * @see https://html.spec.whatwg.org/#parsing-main-intemplate
236 * @see WP_HTML_Processor_State::$insertion_mode
237 *
238 * @var string
239 */
240 const INSERTION_MODE_IN_TEMPLATE = 'insertion-mode-in-template';
241
242 /**
243 * After body insertion mode for full HTML parser.
244 *
245 * @since 6.7.0
246 *
247 * @see https://html.spec.whatwg.org/#parsing-main-afterbody
248 * @see WP_HTML_Processor_State::$insertion_mode
249 *
250 * @var string
251 */
252 const INSERTION_MODE_AFTER_BODY = 'insertion-mode-after-body';
253
254 /**
255 * In frameset insertion mode for full HTML parser.
256 *
257 * @since 6.7.0
258 *
259 * @see https://html.spec.whatwg.org/#parsing-main-inframeset
260 * @see WP_HTML_Processor_State::$insertion_mode
261 *
262 * @var string
263 */
264 const INSERTION_MODE_IN_FRAMESET = 'insertion-mode-in-frameset';
265
266 /**
267 * After frameset insertion mode for full HTML parser.
268 *
269 * @since 6.7.0
270 *
271 * @see https://html.spec.whatwg.org/#parsing-main-afterframeset
272 * @see WP_HTML_Processor_State::$insertion_mode
273 *
274 * @var string
275 */
276 const INSERTION_MODE_AFTER_FRAMESET = 'insertion-mode-after-frameset';
277
278 /**
279 * After after body insertion mode for full HTML parser.
280 *
281 * @since 6.7.0
282 *
283 * @see https://html.spec.whatwg.org/#the-after-after-body-insertion-mode
284 * @see WP_HTML_Processor_State::$insertion_mode
285 *
286 * @var string
287 */
288 const INSERTION_MODE_AFTER_AFTER_BODY = 'insertion-mode-after-after-body';
289
290 /**
291 * After after frameset insertion mode for full HTML parser.
292 *
293 * @since 6.7.0
294 *
295 * @see https://html.spec.whatwg.org/#the-after-after-frameset-insertion-mode
296 * @see WP_HTML_Processor_State::$insertion_mode
297 *
298 * @var string
299 */
300 const INSERTION_MODE_AFTER_AFTER_FRAMESET = 'insertion-mode-after-after-frameset';
301
302 /**
303 * The stack of template insertion modes.
304 *
305 * @since 6.7.0
306 *
307 * @see https://html.spec.whatwg.org/#the-insertion-mode:stack-of-template-insertion-modes
308 *
309 * @var array<string>
310 */
311 public $stack_of_template_insertion_modes = array();
312
313 /**
314 * Tracks open elements while scanning HTML.
315 *
316 * This property is initialized in the constructor and never null.
317 *
318 * @since 6.4.0
319 *
320 * @see https://html.spec.whatwg.org/#stack-of-open-elements
321 *
322 * @var WP_HTML_Open_Elements
323 */
324 public $stack_of_open_elements;
325
326 /**
327 * Tracks open formatting elements, used to handle mis-nested formatting element tags.
328 *
329 * This property is initialized in the constructor and never null.
330 *
331 * @since 6.4.0
332 *
333 * @see https://html.spec.whatwg.org/#list-of-active-formatting-elements
334 *
335 * @var WP_HTML_Active_Formatting_Elements
336 */
337 public $active_formatting_elements;
338
339 /**
340 * Refers to the currently-matched tag, if any.
341 *
342 * @since 6.4.0
343 *
344 * @var WP_HTML_Token|null
345 */
346 public $current_token = null;
347
348 /**
349 * Tree construction insertion mode.
350 *
351 * @since 6.4.0
352 *
353 * @see https://html.spec.whatwg.org/#insertion-mode
354 *
355 * @var string
356 */
357 public $insertion_mode = self::INSERTION_MODE_INITIAL;
358
359 /**
360 * Context node initializing fragment parser, if created as a fragment parser.
361 *
362 * @since 6.4.0
363 * @deprecated 6.8.0 WP_HTML_Processor tracks the context_node internally.
364 *
365 * @var null
366 */
367 public $context_node = null;
368
369 /**
370 * The recognized encoding of the input byte stream.
371 *
372 * > The stream of code points that comprises the input to the tokenization
373 * > stage will be initially seen by the user agent as a stream of bytes
374 * > (typically coming over the network or from the local file system).
375 * > The bytes encode the actual characters according to a particular character
376 * > encoding, which the user agent uses to decode the bytes into characters.
377 *
378 * @since 6.7.0
379 *
380 * @var string|null
381 */
382 public $encoding = null;
383
384 /**
385 * The parser's confidence in the input encoding.
386 *
387 * > When the HTML parser is decoding an input byte stream, it uses a character
388 * > encoding and a confidence. The confidence is either tentative, certain, or
389 * > irrelevant. The encoding used, and whether the confidence in that encoding
390 * > is tentative or certain, is used during the parsing to determine whether to
391 * > change the encoding. If no encoding is necessary, e.g. because the parser is
392 * > operating on a Unicode stream and doesn't have to use a character encoding
393 * > at all, then the confidence is irrelevant.
394 *
395 * @since 6.7.0
396 *
397 * @var string
398 */
399 public $encoding_confidence = 'tentative';
400
401 /**
402 * HEAD element pointer.
403 *
404 * @since 6.7.0
405 *
406 * @see https://html.spec.whatwg.org/multipage/parsing.html#head-element-pointer
407 *
408 * @var WP_HTML_Token|null
409 */
410 public $head_element = null;
411
412 /**
413 * FORM element pointer.
414 *
415 * > points to the last form element that was opened and whose end tag has
416 * > not yet been seen. It is used to make form controls associate with
417 * > forms in the face of dramatically bad markup, for historical reasons.
418 * > It is ignored inside template elements.
419 *
420 * @todo This may be invalidated by a seek operation.
421 *
422 * @see https://html.spec.whatwg.org/#form-element-pointer
423 *
424 * @since 6.7.0
425 *
426 * @var WP_HTML_Token|null
427 */
428 public $form_element = null;
429
430 /**
431 * The frameset-ok flag indicates if a `FRAMESET` element is allowed in the current state.
432 *
433 * > The frameset-ok flag is set to "ok" when the parser is created. It is set to "not ok" after certain tokens are seen.
434 *
435 * @since 6.4.0
436 *
437 * @see https://html.spec.whatwg.org/#frameset-ok-flag
438 *
439 * @var bool
440 */
441 public $frameset_ok = true;
442
443 /**
444 * Constructor - creates a new and empty state value.
445 *
446 * @since 6.4.0
447 *
448 * @see WP_HTML_Processor
449 */
450 public function __construct() {
451 $this->stack_of_open_elements = new WP_HTML_Open_Elements();
452 $this->active_formatting_elements = new WP_HTML_Active_Formatting_Elements();
453 }
454}
455