run:R W Run
DIR
2026-03-11 16:18:51
R W Run
DIR
2026-03-11 16:18:52
R W Run
DIR
2026-03-11 16:18:51
R W Run
DIR
2026-03-11 16:18:52
R W Run
DIR
2026-03-11 16:18:52
R W Run
DIR
2026-03-11 16:18:51
R W Run
DIR
2026-03-11 16:18:51
R W Run
DIR
2026-03-11 16:18:51
R W Run
DIR
2026-03-11 16:18:51
R W Run
DIR
2026-03-11 16:18:52
R W Run
DIR
2026-03-11 16:18:52
R W Run
DIR
2026-03-11 16:18:52
R W Run
DIR
2026-03-11 16:18:51
R W Run
DIR
2026-03-11 16:18:52
R W Run
DIR
2026-03-11 16:18:52
R W Run
DIR
2026-03-11 16:18:51
R W Run
DIR
2026-03-11 16:18:52
R W Run
DIR
2026-03-11 16:18:51
R W Run
DIR
2026-03-11 16:18:51
R W Run
DIR
2026-03-11 16:18:51
R W Run
DIR
2026-03-11 16:18:52
R W Run
DIR
2026-03-11 16:18:52
R W Run
DIR
2026-03-11 16:18:52
R W Run
DIR
2026-03-11 16:18:52
R W Run
DIR
2026-03-11 16:18:51
R W Run
DIR
2026-03-11 16:18:52
R W Run
DIR
2026-03-11 16:18:52
R W Run
DIR
2026-03-11 16:18:51
R W Run
DIR
2026-03-11 16:18:51
R W Run
23.8 KB
2026-03-11 16:18:51
R W Run
7.8 KB
2026-03-11 16:18:52
R W Run
36.1 KB
2026-03-11 16:18:51
R W Run
11.9 KB
2026-03-11 16:18:52
R W Run
18.94 KB
2026-03-11 16:18:52
R W Run
7.35 KB
2026-03-11 16:18:52
R W Run
28.6 KB
2026-03-11 16:18:51
R W Run
316 By
2026-03-11 16:18:51
R W Run
12.9 KB
2026-03-11 16:18:51
R W Run
61.02 KB
2026-03-11 16:18:52
R W Run
15 KB
2026-03-11 16:18:51
R W Run
112.05 KB
2026-03-11 16:18:51
R W Run
12.47 KB
2026-03-11 16:18:51
R W Run
15.07 KB
2026-03-11 16:18:52
R W Run
9.84 KB
2026-03-11 16:18:52
R W Run
13.17 KB
2026-03-11 16:18:52
R W Run
33.83 KB
2026-03-11 16:18:51
R W Run
42.63 KB
2026-03-11 16:18:51
R W Run
55.71 KB
2026-03-11 16:18:52
R W Run
12.53 KB
2026-03-11 16:18:51
R W Run
2.55 KB
2026-03-11 16:18:52
R W Run
28.92 KB
2026-03-11 16:18:52
R W Run
539 By
2026-03-11 16:18:51
R W Run
367 By
2026-03-11 16:18:52
R W Run
42.65 KB
2026-03-11 16:18:51
R W Run
401 By
2026-03-11 16:18:51
R W Run
6.61 KB
2026-03-11 16:18:51
R W Run
664 By
2026-03-11 16:18:52
R W Run
20.63 KB
2026-03-11 16:18:51
R W Run
2.18 KB
2026-03-11 16:18:52
R W Run
453 By
2026-03-11 16:18:52
R W Run
457 By
2026-03-11 16:18:51
R W Run
36.83 KB
2026-03-11 16:18:52
R W Run
2.41 KB
2026-03-11 16:18:52
R W Run
8.28 KB
2026-03-11 16:18:51
R W Run
13.89 KB
2026-03-11 16:18:51
R W Run
11.76 KB
2026-03-11 16:18:51
R W Run
2.65 KB
2026-03-11 16:18:51
R W Run
7.43 KB
2026-03-11 16:18:51
R W Run
17.46 KB
2026-03-11 16:18:51
R W Run
5.14 KB
2026-03-11 16:18:52
R W Run
16.7 KB
2026-03-11 16:18:51
R W Run
8.28 KB
2026-03-11 16:18:52
R W Run
2.92 KB
2026-03-11 16:18:52
R W Run
1.32 KB
2026-03-11 16:18:51
R W Run
4.6 KB
2026-03-11 16:18:52
R W Run
11.62 KB
2026-03-11 16:18:52
R W Run
2.5 KB
2026-03-11 16:18:51
R W Run
1.97 KB
2026-03-11 16:18:51
R W Run
11.25 KB
2026-03-11 16:18:52
R W Run
5.32 KB
2026-03-11 16:18:51
R W Run
10.99 KB
2026-03-11 16:18:52
R W Run
68.32 KB
2026-03-11 16:18:51
R W Run
6.34 KB
2026-03-11 16:18:51
R W Run
5.49 KB
2026-03-11 16:18:51
R W Run
1.99 KB
2026-03-11 16:18:52
R W Run
7.02 KB
2026-03-11 16:18:51
R W Run
4.91 KB
2026-03-11 16:18:52
R W Run
16.86 KB
2026-03-11 16:18:51
R W Run
24.23 KB
2026-03-11 16:18:51
R W Run
3.97 KB
2026-03-11 16:18:51
R W Run
47.66 KB
2026-03-11 16:18:51
R W Run
9.22 KB
2026-03-11 16:18:51
R W Run
25.51 KB
2026-03-11 16:18:51
R W Run
198.38 KB
2026-03-11 16:18:52
R W Run
56.65 KB
2026-03-11 16:18:51
R W Run
10.46 KB
2026-03-11 16:18:51
R W Run
10.95 KB
2026-03-11 16:18:52
R W Run
29.26 KB
2026-03-11 16:18:51
R W Run
70.91 KB
2026-03-11 16:18:52
R W Run
35.3 KB
2026-03-11 16:18:52
R W Run
16.61 KB
2026-03-11 16:18:52
R W Run
2.57 KB
2026-03-11 16:18:52
R W Run
39.83 KB
2026-03-11 16:18:51
R W Run
70.64 KB
2026-03-11 16:18:51
R W Run
15.56 KB
2026-03-11 16:18:52
R W Run
7.33 KB
2026-03-11 16:18:52
R W Run
253 By
2026-03-11 16:18:51
R W Run
7.96 KB
2026-03-11 16:18:52
R W Run
3.23 KB
2026-03-11 16:18:52
R W Run
969 By
2026-03-11 16:18:52
R W Run
16.28 KB
2026-03-11 16:18:51
R W Run
7.22 KB
2026-03-11 16:18:51
R W Run
12.95 KB
2026-03-11 16:18:51
R W Run
6.53 KB
2026-03-11 16:18:51
R W Run
3.42 KB
2026-03-11 16:18:52
R W Run
5.84 KB
2026-03-11 16:18:51
R W Run
1.97 KB
2026-03-11 16:18:51
R W Run
4.3 KB
2026-03-11 16:18:52
R W Run
2.91 KB
2026-03-11 16:18:51
R W Run
16.46 KB
2026-03-11 16:18:52
R W Run
40.6 KB
2026-03-11 16:18:51
R W Run
20.22 KB
2026-03-11 16:18:51
R W Run
36.11 KB
2026-03-11 16:18:52
R W Run
17.01 KB
2026-03-11 16:18:51
R W Run
7.27 KB
2026-03-11 16:18:52
R W Run
6.62 KB
2026-03-11 16:18:52
R W Run
16.49 KB
2026-03-11 16:18:52
R W Run
1.79 KB
2026-03-11 16:18:52
R W Run
29.82 KB
2026-03-11 16:18:51
R W Run
6.67 KB
2026-03-11 16:18:52
R W Run
8.98 KB
2026-03-11 16:18:52
R W Run
19.42 KB
2026-03-11 16:18:51
R W Run
12.01 KB
2026-03-11 16:18:51
R W Run
17.11 KB
2026-03-11 16:18:51
R W Run
6.74 KB
2026-03-11 16:18:52
R W Run
30.93 KB
2026-03-11 16:18:51
R W Run
4.99 KB
2026-03-11 16:18:51
R W Run
4.25 KB
2026-03-11 16:18:51
R W Run
24.72 KB
2026-03-11 16:18:51
R W Run
29.96 KB
2026-03-11 16:18:52
R W Run
6.41 KB
2026-03-11 16:18:51
R W Run
160 KB
2026-03-11 16:18:51
R W Run
6.72 KB
2026-03-11 16:18:52
R W Run
10.92 KB
2026-03-11 16:18:51
R W Run
4.77 KB
2026-03-11 16:18:51
R W Run
3.38 KB
2026-03-11 16:18:51
R W Run
11.18 KB
2026-03-11 16:18:51
R W Run
62.19 KB
2026-03-11 16:18:51
R W Run
2.46 KB
2026-03-11 16:18:51
R W Run
9.17 KB
2026-03-11 16:18:51
R W Run
32.15 KB
2026-03-11 16:18:51
R W Run
34.05 KB
2026-03-11 16:18:52
R W Run
7.15 KB
2026-03-11 16:18:51
R W Run
3.47 KB
2026-03-11 16:18:52
R W Run
1.87 KB
2026-03-11 16:18:52
R W Run
30.91 KB
2026-03-11 16:18:51
R W Run
7.29 KB
2026-03-11 16:18:52
R W Run
7.35 KB
2026-03-11 16:18:51
R W Run
12.54 KB
2026-03-11 16:18:51
R W Run
19.12 KB
2026-03-11 16:18:51
R W Run
18.12 KB
2026-03-11 16:18:52
R W Run
39.99 KB
2026-03-11 16:18:52
R W Run
5.17 KB
2026-03-11 16:18:52
R W Run
979 By
2026-03-11 16:18:51
R W Run
18.44 KB
2026-03-11 16:18:52
R W Run
10.24 KB
2026-03-11 16:18:51
R W Run
1.77 KB
2026-03-11 16:18:52
R W Run
34.9 KB
2026-03-11 16:18:51
R W Run
7.19 KB
2026-03-11 16:18:52
R W Run
160.5 KB
2026-03-11 16:18:51
R W Run
64.27 KB
2026-03-11 16:18:51
R W Run
27.95 KB
2026-03-11 16:18:51
R W Run
4.69 KB
2026-03-11 16:18:51
R W Run
2.94 KB
2026-03-11 16:18:51
R W Run
43.13 KB
2026-03-11 16:18:52
R W Run
2.25 KB
2026-03-11 16:18:52
R W Run
22.5 KB
2026-03-11 16:18:51
R W Run
13.01 KB
2026-03-11 16:18:52
R W Run
3.27 KB
2026-03-11 16:18:51
R W Run
18 KB
2026-03-11 16:18:51
R W Run
210.4 KB
2026-03-11 16:18:52
R W Run
25.86 KB
2026-03-11 16:18:52
R W Run
115.85 KB
2026-03-11 16:18:51
R W Run
373 By
2026-03-11 16:18:52
R W Run
343 By
2026-03-11 16:18:52
R W Run
338 By
2026-03-11 16:18:51
R W Run
100.73 KB
2026-03-11 16:18:52
R W Run
130.93 KB
2026-03-11 16:18:51
R W Run
19.1 KB
2026-03-11 16:18:51
R W Run
17.41 KB
2026-03-11 16:18:52
R W Run
41.98 KB
2026-03-11 16:18:52
R W Run
400 By
2026-03-11 16:18:52
R W Run
11.1 KB
2026-03-11 16:18:52
R W Run
37.02 KB
2026-03-11 16:18:51
R W Run
2.24 KB
2026-03-11 16:18:51
R W Run
188.13 KB
2026-03-11 16:18:51
R W Run
338 By
2026-03-11 16:18:51
R W Run
38 KB
2026-03-11 16:18:51
R W Run
4.02 KB
2026-03-11 16:18:52
R W Run
5.38 KB
2026-03-11 16:18:51
R W Run
3.05 KB
2026-03-11 16:18:52
R W Run
2.61 KB
2026-03-11 16:18:51
R W Run
1.16 KB
2026-03-11 16:18:52
R W Run
4.04 KB
2026-03-11 16:18:51
R W Run
3.71 KB
2026-03-11 16:18:51
R W Run
24.6 KB
2026-03-11 16:18:51
R W Run
9.56 KB
2026-03-11 16:18:51
R W Run
346.43 KB
2026-03-11 16:18:52
R W Run
281.84 KB
2026-03-11 16:18:52
R W Run
14.95 KB
2026-03-11 16:18:51
R W Run
8.44 KB
2026-03-11 16:18:52
R W Run
168.95 KB
2026-03-11 16:18:52
R W Run
20.71 KB
2026-03-11 16:18:52
R W Run
25.27 KB
2026-03-11 16:18:51
R W Run
5.72 KB
2026-03-11 16:18:51
R W Run
4.63 KB
2026-03-11 16:18:52
R W Run
81.73 KB
2026-03-11 16:18:51
R W Run
67.18 KB
2026-03-11 16:18:51
R W Run
156.36 KB
2026-03-11 16:18:52
R W Run
55.19 KB
2026-03-11 16:18:51
R W Run
162 By
2026-03-11 16:18:51
R W Run
61.72 KB
2026-03-11 16:18:51
R W Run
216.06 KB
2026-03-11 16:18:52
R W Run
65.09 KB
2026-03-11 16:18:51
R W Run
25.24 KB
2026-03-11 16:18:52
R W Run
4.81 KB
2026-03-11 16:18:51
R W Run
6.48 KB
2026-03-11 16:18:52
R W Run
21.25 KB
2026-03-11 16:18:51
R W Run
2.79 KB
2026-03-11 16:18:52
R W Run
89.69 KB
2026-03-11 16:18:52
R W Run
19.42 KB
2026-03-11 16:18:52
R W Run
3.69 KB
2026-03-11 16:18:52
R W Run
4.11 KB
2026-03-11 16:18:51
R W Run
40.74 KB
2026-03-11 16:18:51
R W Run
25.38 KB
2026-03-11 16:18:51
R W Run
43.31 KB
2026-03-11 16:18:52
R W Run
102.57 KB
2026-03-11 16:18:52
R W Run
6.18 KB
2026-03-11 16:18:51
R W Run
124.47 KB
2026-03-11 16:18:52
R W Run
35.65 KB
2026-03-11 16:18:52
R W Run
6.94 KB
2026-03-11 16:18:52
R W Run
67.04 KB
2026-03-11 16:18:52
R W Run
10.62 KB
2026-03-11 16:18:51
R W Run
289.35 KB
2026-03-11 16:18:52
R W Run
36.23 KB
2026-03-11 16:18:51
R W Run
200 By
2026-03-11 16:18:52
R W Run
200 By
2026-03-11 16:18:52
R W Run
98.29 KB
2026-03-11 16:18:52
R W Run
30.02 KB
2026-03-11 16:18:52
R W Run
19.03 KB
2026-03-11 16:18:52
R W Run
5.06 KB
2026-03-11 16:18:52
R W Run
255 By
2026-03-11 16:18:51
R W Run
22.66 KB
2026-03-11 16:18:52
R W Run
154.63 KB
2026-03-11 16:18:51
R W Run
9.68 KB
2026-03-11 16:18:51
R W Run
258 By
2026-03-11 16:18:51
R W Run
23.49 KB
2026-03-11 16:18:51
R W Run
3.16 KB
2026-03-11 16:18:51
R W Run
8.4 KB
2026-03-11 16:18:52
R W Run
441 By
2026-03-11 16:18:51
R W Run
7.39 KB
2026-03-11 16:18:51
R W Run
173 KB
2026-03-11 16:18:52
R W Run
544 By
2026-03-11 16:18:52
R W Run
4.17 KB
2026-03-11 16:18:51
R W Run
35.97 KB
2026-03-11 16:18:52
R W Run
1.69 KB
2026-03-11 16:18:51
R W Run
2.84 KB
2026-03-11 16:18:52
R W Run
6.09 KB
2026-03-11 16:18:51
R W Run
8.71 KB
2026-03-11 16:18:51
R W Run
131.84 KB
2026-03-11 16:18:51
R W Run
37.45 KB
2026-03-11 16:18:51
R W Run
173.89 KB
2026-03-11 16:18:51
R W Run
7.09 KB
2026-03-11 16:18:51
R W Run
6.41 KB
2026-03-11 16:18:51
R W Run
1.08 KB
2026-03-11 16:18:51
R W Run
69.46 KB
2026-03-11 16:18:52
R W Run
445 By
2026-03-11 16:18:51
R W Run
799 By
2026-03-11 16:18:52
R W Run
error_log
πŸ“„class-wp-token-map.php
1<?php
2
3/**
4 * Class for efficiently looking up and mapping string keys to string values, with limits.
5 *
6 * @package WordPress
7 * @since 6.6.0
8 */
9
10/**
11 * WP_Token_Map class.
12 *
13 * Use this class in specific circumstances with a static set of lookup keys which map to
14 * a static set of transformed values. For example, this class is used to map HTML named
15 * character references to their equivalent UTF-8 values.
16 *
17 * This class works differently than code calling `in_array()` and other methods. It
18 * internalizes lookup logic and provides helper interfaces to optimize lookup and
19 * transformation. It provides a method for precomputing the lookup tables and storing
20 * them as PHP source code.
21 *
22 * All tokens and substitutions must be shorter than 256 bytes.
23 *
24 * Example:
25 *
26 * $smilies = WP_Token_Map::from_array( array(
27 * '8O' => '😯',
28 * ':(' => 'πŸ™',
29 * ':)' => 'πŸ™‚',
30 * ':?' => 'πŸ˜•',
31 * ) );
32 *
33 * true === $smilies->contains( ':)' );
34 * false === $smilies->contains( 'simile' );
35 *
36 * 'πŸ˜•' === $smilies->read_token( 'Not sure :?.', 9, $length_of_smily_syntax );
37 * 2 === $length_of_smily_syntax;
38 *
39 * ## Precomputing the Token Map.
40 *
41 * Creating the class involves some work sorting and organizing the tokens and their
42 * replacement values. In order to skip this, it's possible for the class to export
43 * its state and be used as actual PHP source code.
44 *
45 * Example:
46 *
47 * // Export with four spaces as the indent, only for the sake of this docblock.
48 * // The default indent is a tab character.
49 * $indent = ' ';
50 * echo $smilies->precomputed_php_source_table( $indent );
51 *
52 * // Output, to be pasted into a PHP source file:
53 * WP_Token_Map::from_precomputed_table(
54 * array(
55 * "storage_version" => "6.6.0",
56 * "key_length" => 2,
57 * "groups" => "",
58 * "long_words" => array(),
59 * "small_words" => "8O\x00:)\x00:(\x00:?\x00",
60 * "small_mappings" => array( "😯", "πŸ™‚", "πŸ™", "πŸ˜•" )
61 * )
62 * );
63 *
64 * ## Large vs. small words.
65 *
66 * This class uses a short prefix called the "key" to optimize lookup of its tokens.
67 * This means that some tokens may be shorter than or equal in length to that key.
68 * Those words that are longer than the key are called "large" while those shorter
69 * than or equal to the key length are called "small."
70 *
71 * This separation of large and small words is incidental to the way this class
72 * optimizes lookup, and should be considered an internal implementation detail
73 * of the class. It may still be important to be aware of it, however.
74 *
75 * ## Determining Key Length.
76 *
77 * The choice of the size of the key length should be based on the data being stored in
78 * the token map. It should divide the data as evenly as possible, but should not create
79 * so many groups that a large fraction of the groups only contain a single token.
80 *
81 * For the HTML5 named character references, a key length of 2 was found to provide a
82 * sufficient spread and should be a good default for relatively large sets of tokens.
83 *
84 * However, for some data sets this might be too long. For example, a list of smilies
85 * may be too small for a key length of 2. Perhaps 1 would be more appropriate. It's
86 * best to experiment and determine empirically which values are appropriate.
87 *
88 * ## Generate Pre-Computed Source Code.
89 *
90 * Since the `WP_Token_Map` is designed for relatively static lookups, it can be
91 * advantageous to precompute the values and instantiate a table that has already
92 * sorted and grouped the tokens and built the lookup strings.
93 *
94 * This can be done with `WP_Token_Map::precomputed_php_source_table()`.
95 *
96 * Note that if there is a leading character that all tokens need, such as `&` for
97 * HTML named character references, it can be beneficial to exclude this from the
98 * token map. Instead, find occurrences of the leading character and then use the
99 * token map to see if the following characters complete the token.
100 *
101 * Example:
102 *
103 * $map = WP_Token_Map::from_array( array( 'simple_smile:' => 'πŸ™‚', 'sob:' => '😭', 'soba:' => '🍜' ) );
104 * echo $map->precomputed_php_source_table();
105 * // Output
106 * WP_Token_Map::from_precomputed_table(
107 * array(
108 * "storage_version" => "6.6.0",
109 * "key_length" => 2,
110 * "groups" => "si\x00so\x00",
111 * "long_words" => array(
112 * // simple_smile:[πŸ™‚].
113 * "\x0bmple_smile:\x04πŸ™‚",
114 * // soba:[🍜] sob:[😭].
115 * "\x03ba:\x04🍜\x02b:\x04😭",
116 * ),
117 * "short_words" => "",
118 * "short_mappings" => array()
119 * }
120 * );
121 *
122 * This precomputed value can be stored directly in source code and will skip the
123 * startup cost of generating the lookup strings. See `$html5_named_character_entities`.
124 *
125 * Note that any updates to the precomputed format should update the storage version
126 * constant. It would also be best to provide an update function to take older known
127 * versions and upgrade them in place when loading into `from_precomputed_table()`.
128 *
129 * ## Future Direction.
130 *
131 * It may be viable to dynamically increase the length limits such that there's no need to impose them.
132 * The limit appears because of the packing structure, which indicates how many bytes each segment of
133 * text in the lookup tables spans. If, however, care were taken to track the longest word length, then
134 * the packing structure could change its representation to allow for that. Each additional byte storing
135 * length, however, increases the memory overhead and lookup runtime.
136 *
137 * An alternative approach could be to borrow the UTF-8 variable-length encoding and store lengths of less
138 * than 127 as a single byte with the high bit unset, storing longer lengths as the combination of
139 * continuation bytes.
140 *
141 * Since it has not been shown during the development of this class that longer strings are required, this
142 * update is deferred until such a need is clear.
143 *
144 * @since 6.6.0
145 */
146class WP_Token_Map {
147 /**
148 * Denotes the version of the code which produces pre-computed source tables.
149 *
150 * This version will be used not only to verify pre-computed data, but also
151 * to upgrade pre-computed data from older versions. Choosing a name that
152 * corresponds to the WordPress release will help people identify where an
153 * old copy of data came from.
154 */
155 const STORAGE_VERSION = '6.6.0-trunk';
156
157 /**
158 * Maximum length for each key and each transformed value in the table (in bytes).
159 *
160 * @since 6.6.0
161 */
162 const MAX_LENGTH = 256;
163
164 /**
165 * How many bytes of each key are used to form a group key for lookup.
166 * This also determines whether a word is considered short or long.
167 *
168 * @since 6.6.0
169 *
170 * @var int
171 */
172 private $key_length = 2;
173
174 /**
175 * Stores an optimized form of the word set, where words are grouped
176 * by a prefix of the `$key_length` and then collapsed into a string.
177 *
178 * In each group, the keys and lookups form a packed data structure.
179 * The keys in the string are stripped of their "group key," which is
180 * the prefix of length `$this->key_length` shared by all of the items
181 * in the group. Each word in the string is prefixed by a single byte
182 * whose raw unsigned integer value represents how many bytes follow.
183 *
184 * β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”
185 * β”‚ Length of rest β”‚ Rest of key β”‚ Length of value β”‚ Value β”‚
186 * β”‚ of key (bytes) β”‚ β”‚ (bytes) β”‚ β”‚
187 * β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€
188 * β”‚ 0x08 β”‚ nterDot; β”‚ 0x02 β”‚ Β· β”‚
189 * β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”˜
190 *
191 * In this example, the key `CenterDot;` has a group key `Ce`, leaving
192 * eight bytes for the rest of the key, `nterDot;`, and two bytes for
193 * the transformed value `Β·` (or U+B7 or "\xC2\xB7").
194 *
195 * Example:
196 *
197 * // Stores array( 'CenterDot;' => 'Β·', 'Cedilla;' => 'ΒΈ' ).
198 * $groups = "Ce\x00";
199 * $large_words = array( "\x08nterDot;\x02Β·\x06dilla;\x02ΒΈ" )
200 *
201 * The prefixes appear in the `$groups` string, each followed by a null
202 * byte. This makes for quick lookup of where in the group string the key
203 * is found, and then a simple division converts that offset into the index
204 * in the `$large_words` array where the group string is to be found.
205 *
206 * This lookup data structure is designed to optimize cache locality and
207 * minimize indirect memory reads when matching strings in the set.
208 *
209 * @since 6.6.0
210 *
211 * @var array
212 */
213 private $large_words = array();
214
215 /**
216 * Stores the group keys for sequential string lookup.
217 *
218 * The offset into this string where the group key appears corresponds with the index
219 * into the group array where the rest of the group string appears. This is an optimization
220 * to improve cache locality while searching and minimize indirect memory accesses.
221 *
222 * @since 6.6.0
223 *
224 * @var string
225 */
226 private $groups = '';
227
228 /**
229 * Stores an optimized row of small words, where every entry is
230 * `$this->key_size + 1` bytes long and zero-extended.
231 *
232 * This packing allows for direct lookup of a short word followed
233 * by the null byte, if extended to `$this->key_size + 1`.
234 *
235 * Example:
236 *
237 * // Stores array( 'GT', 'LT', 'gt', 'lt' ).
238 * "GT\x00LT\x00gt\x00lt\x00"
239 *
240 * @since 6.6.0
241 *
242 * @var string
243 */
244 private $small_words = '';
245
246 /**
247 * Replacements for the small words, in the same order they appear.
248 *
249 * With the position of a small word it's possible to index the translation
250 * directly, as its position in the `$small_words` string corresponds to
251 * the index of the replacement in the `$small_mapping` array.
252 *
253 * Example:
254 *
255 * array( '>', '<', '>', '<' )
256 *
257 * @since 6.6.0
258 *
259 * @var string[]
260 */
261 private $small_mappings = array();
262
263 /**
264 * Create a token map using an associative array of key/value pairs as the input.
265 *
266 * Example:
267 *
268 * $smilies = WP_Token_Map::from_array( array(
269 * '8O' => '😯',
270 * ':(' => 'πŸ™',
271 * ':)' => 'πŸ™‚',
272 * ':?' => 'πŸ˜•',
273 * ) );
274 *
275 * @since 6.6.0
276 *
277 * @param array $mappings The keys transform into the values, both are strings.
278 * @param int $key_length Determines the group key length. Leave at the default value
279 * of 2 unless there's an empirical reason to change it.
280 *
281 * @return WP_Token_Map|null Token map, unless unable to create it.
282 */
283 public static function from_array( array $mappings, int $key_length = 2 ): ?WP_Token_Map {
284 $map = new WP_Token_Map();
285 $map->key_length = $key_length;
286
287 // Start by grouping words.
288
289 $groups = array();
290 $shorts = array();
291 foreach ( $mappings as $word => $mapping ) {
292 if (
293 self::MAX_LENGTH <= strlen( $word ) ||
294 self::MAX_LENGTH <= strlen( $mapping )
295 ) {
296 _doing_it_wrong(
297 __METHOD__,
298 sprintf(
299 /* translators: 1: maximum byte length (a count) */
300 __( 'Token Map tokens and substitutions must all be shorter than %1$d bytes.' ),
301 self::MAX_LENGTH
302 ),
303 '6.6.0'
304 );
305 return null;
306 }
307
308 $length = strlen( $word );
309
310 if ( $key_length >= $length ) {
311 $shorts[] = $word;
312 } else {
313 $group = substr( $word, 0, $key_length );
314
315 if ( ! isset( $groups[ $group ] ) ) {
316 $groups[ $group ] = array();
317 }
318
319 $groups[ $group ][] = array( substr( $word, $key_length ), $mapping );
320 }
321 }
322
323 /*
324 * Sort the words to ensure that no smaller substring of a match masks the full match.
325 * For example, `Cap` should not match before `CapitalDifferentialD`.
326 */
327 usort( $shorts, 'WP_Token_Map::longest_first_then_alphabetical' );
328 foreach ( $groups as $group_key => $group ) {
329 usort(
330 $groups[ $group_key ],
331 static function ( array $a, array $b ): int {
332 return self::longest_first_then_alphabetical( $a[0], $b[0] );
333 }
334 );
335 }
336
337 // Finally construct the optimized lookups.
338
339 foreach ( $shorts as $word ) {
340 $map->small_words .= str_pad( $word, $key_length + 1, "\x00", STR_PAD_RIGHT );
341 $map->small_mappings[] = $mappings[ $word ];
342 }
343
344 $group_keys = array_keys( $groups );
345 sort( $group_keys );
346
347 foreach ( $group_keys as $group ) {
348 $map->groups .= "{$group}\x00";
349
350 $group_string = '';
351
352 foreach ( $groups[ $group ] as $group_word ) {
353 list( $word, $mapping ) = $group_word;
354
355 $word_length = pack( 'C', strlen( $word ) );
356 $mapping_length = pack( 'C', strlen( $mapping ) );
357 $group_string .= "{$word_length}{$word}{$mapping_length}{$mapping}";
358 }
359
360 $map->large_words[] = $group_string;
361 }
362
363 return $map;
364 }
365
366 /**
367 * Creates a token map from a pre-computed table.
368 * This skips the initialization cost of generating the table.
369 *
370 * This function should only be used to load data created with
371 * WP_Token_Map::precomputed_php_source_tag().
372 *
373 * @since 6.6.0
374 *
375 * @param array $state {
376 * Stores pre-computed state for directly loading into a Token Map.
377 *
378 * @type string $storage_version Which version of the code produced this state.
379 * @type int $key_length Group key length.
380 * @type string $groups Group lookup index.
381 * @type array $large_words Large word groups and packed strings.
382 * @type string $small_words Small words packed string.
383 * @type array $small_mappings Small word mappings.
384 * }
385 *
386 * @return WP_Token_Map Map with precomputed data loaded.
387 */
388 public static function from_precomputed_table( $state ): ?WP_Token_Map {
389 $has_necessary_state = isset(
390 $state['storage_version'],
391 $state['key_length'],
392 $state['groups'],
393 $state['large_words'],
394 $state['small_words'],
395 $state['small_mappings']
396 );
397
398 if ( ! $has_necessary_state ) {
399 _doing_it_wrong(
400 __METHOD__,
401 __( 'Missing required inputs to pre-computed WP_Token_Map.' ),
402 '6.6.0'
403 );
404 return null;
405 }
406
407 if ( self::STORAGE_VERSION !== $state['storage_version'] ) {
408 _doing_it_wrong(
409 __METHOD__,
410 /* translators: 1: version string, 2: version string. */
411 sprintf( __( 'Loaded version \'%1$s\' incompatible with expected version \'%2$s\'.' ), $state['storage_version'], self::STORAGE_VERSION ),
412 '6.6.0'
413 );
414 return null;
415 }
416
417 $map = new WP_Token_Map();
418
419 $map->key_length = $state['key_length'];
420 $map->groups = $state['groups'];
421 $map->large_words = $state['large_words'];
422 $map->small_words = $state['small_words'];
423 $map->small_mappings = $state['small_mappings'];
424
425 return $map;
426 }
427
428 /**
429 * Indicates if a given word is a lookup key in the map.
430 *
431 * Example:
432 *
433 * true === $smilies->contains( ':)' );
434 * false === $smilies->contains( 'simile' );
435 *
436 * @since 6.6.0
437 *
438 * @param string $word Determine if this word is a lookup key in the map.
439 * @param string $case_sensitivity Optional. Pass 'ascii-case-insensitive' to ignore ASCII case when matching. Default 'case-sensitive'.
440 * @return bool Whether there's an entry for the given word in the map.
441 */
442 public function contains( string $word, string $case_sensitivity = 'case-sensitive' ): bool {
443 $ignore_case = 'ascii-case-insensitive' === $case_sensitivity;
444
445 if ( $this->key_length >= strlen( $word ) ) {
446 if ( 0 === strlen( $this->small_words ) ) {
447 return false;
448 }
449
450 $term = str_pad( $word, $this->key_length + 1, "\x00", STR_PAD_RIGHT );
451 $word_at = $ignore_case ? stripos( $this->small_words, $term ) : strpos( $this->small_words, $term );
452 if ( false === $word_at ) {
453 return false;
454 }
455
456 return true;
457 }
458
459 $group_key = substr( $word, 0, $this->key_length );
460 $group_at = $ignore_case ? stripos( $this->groups, $group_key ) : strpos( $this->groups, $group_key );
461 if ( false === $group_at ) {
462 return false;
463 }
464 $group = $this->large_words[ $group_at / ( $this->key_length + 1 ) ];
465 $group_length = strlen( $group );
466 $slug = substr( $word, $this->key_length );
467 $length = strlen( $slug );
468 $at = 0;
469
470 while ( $at < $group_length ) {
471 $token_length = unpack( 'C', $group[ $at++ ] )[1];
472 $token_at = $at;
473 $at += $token_length;
474 $mapping_length = unpack( 'C', $group[ $at++ ] )[1];
475 $mapping_at = $at;
476
477 if ( $token_length === $length && 0 === substr_compare( $group, $slug, $token_at, $token_length, $ignore_case ) ) {
478 return true;
479 }
480
481 $at = $mapping_at + $mapping_length;
482 }
483
484 return false;
485 }
486
487 /**
488 * If the text starting at a given offset is a lookup key in the map,
489 * return the corresponding transformation from the map, else `false`.
490 *
491 * This function returns the translated string, but accepts an optional
492 * parameter `$matched_token_byte_length`, which communicates how many
493 * bytes long the lookup key was, if it found one. This can be used to
494 * advance a cursor in calling code if a lookup key was found.
495 *
496 * Example:
497 *
498 * false === $smilies->read_token( 'Not sure :?.', 0, $token_byte_length );
499 * 'πŸ˜•' === $smilies->read_token( 'Not sure :?.', 9, $token_byte_length );
500 * 2 === $token_byte_length;
501 *
502 * Example:
503 *
504 * while ( $at < strlen( $input ) ) {
505 * $next_at = strpos( $input, ':', $at );
506 * if ( false === $next_at ) {
507 * break;
508 * }
509 *
510 * $smily = $smilies->read_token( $input, $next_at, $token_byte_length );
511 * if ( false === $next_at ) {
512 * ++$at;
513 * continue;
514 * }
515 *
516 * $prefix = substr( $input, $at, $next_at - $at );
517 * $at += $token_byte_length;
518 * $output .= "{$prefix}{$smily}";
519 * }
520 *
521 * @since 6.6.0
522 *
523 * @param string $text String in which to search for a lookup key.
524 * @param int $offset Optional. How many bytes into the string where the lookup key ought to start. Default 0.
525 * @param int|null &$matched_token_byte_length Optional. Holds byte-length of found token matched, otherwise not set. Default null.
526 * @param string $case_sensitivity Optional. Pass 'ascii-case-insensitive' to ignore ASCII case when matching. Default 'case-sensitive'.
527 *
528 * @return string|null Mapped value of lookup key if found, otherwise `null`.
529 */
530 public function read_token( string $text, int $offset = 0, &$matched_token_byte_length = null, $case_sensitivity = 'case-sensitive' ): ?string {
531 $ignore_case = 'ascii-case-insensitive' === $case_sensitivity;
532 $text_length = strlen( $text );
533
534 // Search for a long word first, if the text is long enough, and if that fails, a short one.
535 if ( $text_length > $this->key_length ) {
536 $group_key = substr( $text, $offset, $this->key_length );
537
538 $group_at = $ignore_case ? stripos( $this->groups, $group_key ) : strpos( $this->groups, $group_key );
539 if ( false === $group_at ) {
540 // Perhaps a short word then.
541 return strlen( $this->small_words ) > 0
542 ? $this->read_small_token( $text, $offset, $matched_token_byte_length, $case_sensitivity )
543 : null;
544 }
545
546 $group = $this->large_words[ $group_at / ( $this->key_length + 1 ) ];
547 $group_length = strlen( $group );
548 $at = 0;
549 while ( $at < $group_length ) {
550 $token_length = unpack( 'C', $group[ $at++ ] )[1];
551 $token = substr( $group, $at, $token_length );
552 $at += $token_length;
553 $mapping_length = unpack( 'C', $group[ $at++ ] )[1];
554 $mapping_at = $at;
555
556 if ( 0 === substr_compare( $text, $token, $offset + $this->key_length, $token_length, $ignore_case ) ) {
557 $matched_token_byte_length = $this->key_length + $token_length;
558 return substr( $group, $mapping_at, $mapping_length );
559 }
560
561 $at = $mapping_at + $mapping_length;
562 }
563 }
564
565 // Perhaps a short word then.
566 return strlen( $this->small_words ) > 0
567 ? $this->read_small_token( $text, $offset, $matched_token_byte_length, $case_sensitivity )
568 : null;
569 }
570
571 /**
572 * Finds a match for a short word at the index.
573 *
574 * @since 6.6.0
575 *
576 * @param string $text String in which to search for a lookup key.
577 * @param int $offset Optional. How many bytes into the string where the lookup key ought to start. Default 0.
578 * @param int|null &$matched_token_byte_length Optional. Holds byte-length of found lookup key if matched, otherwise not set. Default null.
579 * @param string $case_sensitivity Optional. Pass 'ascii-case-insensitive' to ignore ASCII case when matching. Default 'case-sensitive'.
580 *
581 * @return string|null Mapped value of lookup key if found, otherwise `null`.
582 */
583 private function read_small_token( string $text, int $offset = 0, &$matched_token_byte_length = null, $case_sensitivity = 'case-sensitive' ): ?string {
584 $ignore_case = 'ascii-case-insensitive' === $case_sensitivity;
585 $small_length = strlen( $this->small_words );
586 $search_text = substr( $text, $offset, $this->key_length );
587 if ( $ignore_case ) {
588 $search_text = strtoupper( $search_text );
589 }
590 $starting_char = $search_text[0];
591
592 $at = 0;
593 while ( $at < $small_length ) {
594 if (
595 $starting_char !== $this->small_words[ $at ] &&
596 ( ! $ignore_case || strtoupper( $this->small_words[ $at ] ) !== $starting_char )
597 ) {
598 $at += $this->key_length + 1;
599 continue;
600 }
601
602 for ( $adjust = 1; $adjust < $this->key_length; $adjust++ ) {
603 if ( "\x00" === $this->small_words[ $at + $adjust ] ) {
604 $matched_token_byte_length = $adjust;
605 return $this->small_mappings[ $at / ( $this->key_length + 1 ) ];
606 }
607
608 if (
609 $search_text[ $adjust ] !== $this->small_words[ $at + $adjust ] &&
610 ( ! $ignore_case || strtoupper( $this->small_words[ $at + $adjust ] !== $search_text[ $adjust ] ) )
611 ) {
612 $at += $this->key_length + 1;
613 continue 2;
614 }
615 }
616
617 $matched_token_byte_length = $adjust;
618 return $this->small_mappings[ $at / ( $this->key_length + 1 ) ];
619 }
620
621 return null;
622 }
623
624 /**
625 * Exports the token map into an associate array of key/value pairs.
626 *
627 * Example:
628 *
629 * $smilies->to_array() === array(
630 * '8O' => '😯',
631 * ':(' => 'πŸ™',
632 * ':)' => 'πŸ™‚',
633 * ':?' => 'πŸ˜•',
634 * );
635 *
636 * @return array The lookup key/substitution values as an associate array.
637 */
638 public function to_array(): array {
639 $tokens = array();
640
641 $at = 0;
642 $small_mapping = 0;
643 $small_length = strlen( $this->small_words );
644 while ( $at < $small_length ) {
645 $key = rtrim( substr( $this->small_words, $at, $this->key_length + 1 ), "\x00" );
646 $value = $this->small_mappings[ $small_mapping++ ];
647 $tokens[ $key ] = $value;
648
649 $at += $this->key_length + 1;
650 }
651
652 foreach ( $this->large_words as $index => $group ) {
653 $prefix = substr( $this->groups, $index * ( $this->key_length + 1 ), 2 );
654 $group_length = strlen( $group );
655 $at = 0;
656 while ( $at < $group_length ) {
657 $length = unpack( 'C', $group[ $at++ ] )[1];
658 $key = $prefix . substr( $group, $at, $length );
659
660 $at += $length;
661 $length = unpack( 'C', $group[ $at++ ] )[1];
662 $value = substr( $group, $at, $length );
663
664 $tokens[ $key ] = $value;
665 $at += $length;
666 }
667 }
668
669 return $tokens;
670 }
671
672 /**
673 * Export the token map for quick loading in PHP source code.
674 *
675 * This function has a specific purpose, to make loading of static token maps fast.
676 * It's used to ensure that the HTML character reference lookups add a minimal cost
677 * to initializing the PHP process.
678 *
679 * Example:
680 *
681 * echo $smilies->precomputed_php_source_table();
682 *
683 * // Output.
684 * WP_Token_Map::from_precomputed_table(
685 * array(
686 * "storage_version" => "6.6.0",
687 * "key_length" => 2,
688 * "groups" => "",
689 * "long_words" => array(),
690 * "small_words" => "8O\x00:)\x00:(\x00:?\x00",
691 * "small_mappings" => array( "😯", "πŸ™‚", "πŸ™", "πŸ˜•" )
692 * )
693 * );
694 *
695 * @since 6.6.0
696 *
697 * @param string $indent Optional. Use this string for indentation, or rely on the default horizontal tab character. Default "\t".
698 * @return string Value which can be pasted into a PHP source file for quick loading of table.
699 */
700 public function precomputed_php_source_table( string $indent = "\t" ): string {
701 $i1 = $indent;
702 $i2 = $i1 . $indent;
703 $i3 = $i2 . $indent;
704
705 $class_version = self::STORAGE_VERSION;
706
707 $output = self::class . "::from_precomputed_table(\n";
708 $output .= "{$i1}array(\n";
709 $output .= "{$i2}\"storage_version\" => \"{$class_version}\",\n";
710 $output .= "{$i2}\"key_length\" => {$this->key_length},\n";
711
712 $group_line = str_replace( "\x00", "\\x00", $this->groups );
713 $output .= "{$i2}\"groups\" => \"{$group_line}\",\n";
714
715 $output .= "{$i2}\"large_words\" => array(\n";
716
717 $prefixes = explode( "\x00", $this->groups );
718 foreach ( $prefixes as $index => $prefix ) {
719 if ( '' === $prefix ) {
720 break;
721 }
722 $group = $this->large_words[ $index ];
723 $group_length = strlen( $group );
724 $comment_line = "{$i3}//";
725 $data_line = "{$i3}\"";
726 $at = 0;
727 while ( $at < $group_length ) {
728 $token_length = unpack( 'C', $group[ $at++ ] )[1];
729 $token = substr( $group, $at, $token_length );
730 $at += $token_length;
731 $mapping_length = unpack( 'C', $group[ $at++ ] )[1];
732 $mapping = substr( $group, $at, $mapping_length );
733 $at += $mapping_length;
734
735 $token_digits = str_pad( dechex( $token_length ), 2, '0', STR_PAD_LEFT );
736 $mapping_digits = str_pad( dechex( $mapping_length ), 2, '0', STR_PAD_LEFT );
737
738 $mapping = preg_replace_callback(
739 "~[\\x00-\\x1f\\x22\\x5c]~",
740 static function ( $match_result ) {
741 switch ( $match_result[0] ) {
742 case '"':
743 return '\\"';
744
745 case '\\':
746 return '\\\\';
747
748 default:
749 $hex = dechex( ord( $match_result[0] ) );
750 return "\\x{$hex}";
751 }
752 },
753 $mapping
754 );
755
756 $comment_line .= " {$prefix}{$token}[{$mapping}]";
757 $data_line .= "\\x{$token_digits}{$token}\\x{$mapping_digits}{$mapping}";
758 }
759 $comment_line .= ".\n";
760 $data_line .= "\",\n";
761
762 $output .= $comment_line;
763 $output .= $data_line;
764 }
765
766 $output .= "{$i2}),\n";
767
768 $small_words = array();
769 $small_length = strlen( $this->small_words );
770 $at = 0;
771 while ( $at < $small_length ) {
772 $small_words[] = substr( $this->small_words, $at, $this->key_length + 1 );
773 $at += $this->key_length + 1;
774 }
775
776 $small_text = str_replace( "\x00", '\x00', implode( '', $small_words ) );
777 $output .= "{$i2}\"small_words\" => \"{$small_text}\",\n";
778
779 $output .= "{$i2}\"small_mappings\" => array(\n";
780 foreach ( $this->small_mappings as $mapping ) {
781 $output .= "{$i3}\"{$mapping}\",\n";
782 }
783 $output .= "{$i2})\n";
784 $output .= "{$i1})\n";
785 $output .= ')';
786
787 return $output;
788 }
789
790 /**
791 * Compares two strings, returning the longest, or whichever
792 * is first alphabetically if they are the same length.
793 *
794 * This is an important sort when building the token map because
795 * it should not form a match on a substring of a longer potential
796 * match. For example, it should not detect `Cap` when matching
797 * against the string `CapitalDifferentialD`.
798 *
799 * @since 6.6.0
800 *
801 * @param string $a First string to compare.
802 * @param string $b Second string to compare.
803 * @return int -1 or lower if `$a` is less than `$b`; 1 or greater if `$a` is greater than `$b`, and 0 if they are equal.
804 */
805 private static function longest_first_then_alphabetical( string $a, string $b ): int {
806 if ( $a === $b ) {
807 return 0;
808 }
809
810 $length_a = strlen( $a );
811 $length_b = strlen( $b );
812
813 // Longer strings are less-than for comparison's sake.
814 if ( $length_a !== $length_b ) {
815 return $length_b - $length_a;
816 }
817
818 return strcmp( $a, $b );
819 }
820}
821