TokenizerPatterns.php 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. *
  5. * (c) Fabien Potencier <fabien@symfony.com>
  6. *
  7. * For the full copyright and license information, please view the LICENSE
  8. * file that was distributed with this source code.
  9. */
  10. namespace Symfony\Component\CssSelector\Parser\Tokenizer;
  11. /**
  12. * CSS selector tokenizer patterns builder.
  13. *
  14. * This component is a port of the Python cssselect library,
  15. * which is copyright Ian Bicking, @see https://github.com/SimonSapin/cssselect.
  16. *
  17. * @author Jean-François Simon <jeanfrancois.simon@sensiolabs.com>
  18. *
  19. * @internal
  20. */
  21. class TokenizerPatterns
  22. {
  23. /**
  24. * @var string
  25. */
  26. private $unicodeEscapePattern;
  27. /**
  28. * @var string
  29. */
  30. private $simpleEscapePattern;
  31. /**
  32. * @var string
  33. */
  34. private $newLineEscapePattern;
  35. /**
  36. * @var string
  37. */
  38. private $escapePattern;
  39. /**
  40. * @var string
  41. */
  42. private $stringEscapePattern;
  43. /**
  44. * @var string
  45. */
  46. private $nonAsciiPattern;
  47. /**
  48. * @var string
  49. */
  50. private $nmCharPattern;
  51. /**
  52. * @var string
  53. */
  54. private $nmStartPattern;
  55. /**
  56. * @var string
  57. */
  58. private $identifierPattern;
  59. /**
  60. * @var string
  61. */
  62. private $hashPattern;
  63. /**
  64. * @var string
  65. */
  66. private $numberPattern;
  67. /**
  68. * @var string
  69. */
  70. private $quotedStringPattern;
  71. public function __construct()
  72. {
  73. $this->unicodeEscapePattern = '\\\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?';
  74. $this->simpleEscapePattern = '\\\\(.)';
  75. $this->newLineEscapePattern = '\\\\(?:\n|\r\n|\r|\f)';
  76. $this->escapePattern = $this->unicodeEscapePattern.'|\\\\[^\n\r\f0-9a-f]';
  77. $this->stringEscapePattern = $this->newLineEscapePattern.'|'.$this->escapePattern;
  78. $this->nonAsciiPattern = '[^\x00-\x7F]';
  79. $this->nmCharPattern = '[_a-z0-9-]|'.$this->escapePattern.'|'.$this->nonAsciiPattern;
  80. $this->nmStartPattern = '[_a-z]|'.$this->escapePattern.'|'.$this->nonAsciiPattern;
  81. $this->identifierPattern = '(?:'.$this->nmStartPattern.')(?:'.$this->nmCharPattern.')*';
  82. $this->hashPattern = '#((?:'.$this->nmCharPattern.')+)';
  83. $this->numberPattern = '[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)';
  84. $this->quotedStringPattern = '([^\n\r\f%s]|'.$this->stringEscapePattern.')*';
  85. }
  86. /**
  87. * @return string
  88. */
  89. public function getNewLineEscapePattern()
  90. {
  91. return '~^'.$this->newLineEscapePattern.'~';
  92. }
  93. /**
  94. * @return string
  95. */
  96. public function getSimpleEscapePattern()
  97. {
  98. return '~^'.$this->simpleEscapePattern.'~';
  99. }
  100. /**
  101. * @return string
  102. */
  103. public function getUnicodeEscapePattern()
  104. {
  105. return '~^'.$this->unicodeEscapePattern.'~i';
  106. }
  107. /**
  108. * @return string
  109. */
  110. public function getIdentifierPattern()
  111. {
  112. return '~^'.$this->identifierPattern.'~i';
  113. }
  114. /**
  115. * @return string
  116. */
  117. public function getHashPattern()
  118. {
  119. return '~^'.$this->hashPattern.'~i';
  120. }
  121. /**
  122. * @return string
  123. */
  124. public function getNumberPattern()
  125. {
  126. return '~^'.$this->numberPattern.'~';
  127. }
  128. /**
  129. * @param string $quote
  130. *
  131. * @return string
  132. */
  133. public function getQuotedStringPattern($quote)
  134. {
  135. return '~^'.sprintf($this->quotedStringPattern, $quote).'~i';
  136. }
  137. }