EGOCMS  24.0
EGOTEC Content-Managament-System
Ego_DomQuery.php
gehe zur Dokumentation dieser Datei
1 <?php
14 class Ego_DomQuery {
18  public $doc;
19 
23  public $xpath;
24 
28  private $smarty = null;
29 
33  private $fetch = true;
34 
38  private $site = null;
39 
46  public function __construct($content, &$doc = null)
47  {
48  $this->doc = $this->load($content, $doc ? $doc : new DOMDocument());
49  $this->xpath = new DOMXPath($this->doc);
50  }
51 
58  public function setSmarty($smarty)
59  {
60  $this->smarty = $smarty;
61  }
62 
69  public function setSite($site)
70  {
71  $this->site = $site;
72  }
73 
81  private function load($content, $doc = null)
82  {
83  if (!$doc) {
84  $doc = $this->doc;
85  }
86  @$doc->loadHTML(
87  '<?xml encoding="UTF-8">' . ((string) $content),
88  LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD | LIBXML_PARSEHUGE/* | LIBXML_SCHEMA_CREATE @TODO erst ab PHP 5.5.2 */
89  );
90  foreach ($doc->childNodes as $item) {
91  if ($item->nodeType == XML_PI_NODE) {
92  $doc->removeChild($item);
93  }
94  }
95  $doc->encoding = 'UTF-8';
96  return $doc;
97  }
98 
105  public static function validHTML($html)
106  {
107  // Void Elemente haben kein End-Tag
108  return str_replace(array_map(function($tag) {
109  return "</$tag>";
110  }, explode(',', 'area,base,br,col,command,embed,hr,img,input,keygen,link,meta,param,source,track,wbr')), '', $html);
111  }
112 
119  public function getHTML($node = null)
120  {
121  // In "CODE" Elementen müssen die Entitäten immer kodiert sein
122  $codes = $this->doc->getElementsByTagName('code');
123  foreach ($codes as $code) {
124  $html = self::validHTML($this->getInnerHTML($code));
125  $this->setInnerHTML([$code], htmlentities($html));
126  }
127 
128  // Sicherstellen, dass maskierte "<" und ">" auch nach der Dekodierung aller Entitäten maskiert bleiben
129  $html = str_replace(['&lt;', '&gt;'], ['&amp;lt;', '&amp;gt;'], self::validHTML($this->doc->saveHTML($node)));
130  return html_entity_decode($html, ENT_NOQUOTES | ENT_HTML5, 'UTF-8');
131  }
132 
139  public function getInnerHTML($node) {
140  $html = '';
141  foreach ($node->childNodes as $child) {
142  $html .= $node->ownerDocument->saveHTML($child);
143  }
144  return $html;
145  }
146 
154  public function setInnerHTML($nodes, $callback = null)
155  {
156  foreach ($nodes as $node) {
157  for ($i = $node->childNodes->length - 1; $i >= 0; $i--) {
158  $node->removeChild($node->childNodes->item($i));
159  }
160  if (is_string($callback)) {
161  $content = $callback;
162  } else {
163  $content = $callback ? $callback($node) : '';
164  }
165 
166  $doc = $this->load("<html><body>$content</body></html>", new DOMDocument());
167  $fragment = $node->ownerDocument->importNode($doc->documentElement, true);
168  $parent = $fragment->childNodes->item(0);
169  for ($i = $parent->childNodes->length - 1; $i >= 0; $i--) {
170  $node->insertBefore($parent->childNodes->item($i), $node->firstChild);
171  }
172  }
173  return $nodes;
174  }
175 
183  public function setText($nodes, $callback = null)
184  {
185  foreach ($nodes as $node) {
186  if (is_string($callback)) {
187  $content = $callback;
188  } else {
189  $content = $callback ? $callback($node) : '';
190  }
191 
192  $parents = $this->setInnerHTML(array($this->doc->createElement('div')), $content);
193  $reference = $node;
194  for ($i = $parents[0]->childNodes->length - 1; $i >= 0; $i--) {
195  $child = $parents[0]->childNodes->item($i);
196  $node->parentNode->insertBefore($child, $reference);
197  $reference = $child;
198  }
199  $node->parentNode->removeChild($node);
200  }
201  return $nodes;
202  }
203 
212  public function xpath($expression, $contextNode = null, $registerNodeNS = true)
213  {
214  return $this->xpath->query($expression, $contextNode, $registerNodeNS);
215  }
216 
225  public function query($expression, $contextNode = null, $registerNodeNS = true)
226  {
227  return $this->xpath($this->transform($expression), $contextNode, $registerNodeNS);
228  }
229 
244  public function modify($params, $literal = false)
245  {
253  $parse_wrapper = function($html, $wrapper) use ($literal) {
254  $output = ($this->smarty ?? $GLOBALS['smarty'])->fetch('string:' . str_replace(
255  '<%>',
256  $literal ? "{literal}$html{/literal}" : $html,
257  $wrapper
258  ));
259  return $output;
260  };
261 
262  if ($params['wrapper'] && strpos($params['wrapper'], 'file:') === 0) {
263  // Wrapper HTML aus einer Datei beziehen
264  $params['wrapper'] = Ego_System::file_get_contents(($this->site ?? $GLOBALS['site'])->getSiteFile(substr($params['wrapper'], 5)));
265  }
266 
267  $nodes = null;
268  if ($params['xpath']) {
269  $nodes = $this->xpath($params['xpath']);
270  } elseif ($params['selector']) {
271  $nodes = $this->query($params['selector']);
272  }
273  if ($nodes) {
274  foreach ($nodes as $node) {
275  if ($params['node']) {
276  $content = ($this->smarty ?? $GLOBALS['smarty'])->fetch("string:{$params['node']}");
277  $doc = $this->load("<html><body>" . trim(self::validHTML($content)) . "</body></html>", new DOMDocument());
278  $fragment = $node->ownerDocument->importNode($doc->documentElement, true);
279  $new_node = $fragment->childNodes->item(0)->childNodes->item(0);
280 
281  if ($params['replace']) {
282  // Element ersetzen
283  $node->parentNode->replaceChild($new_node, $node);
284  } else {
285  // Neues Element einfügen
286  if ($params['before']) {
287  // ...vor einem bestimmten Element
288  $parents = $this->query($params['before']);
289  foreach ($parents as $parent) {
290  $parent->parentNode->insertBefore($new_node, $parent);
291  break;
292  }
293  } else {
294  // ...als letztes Element
295  $node->appendChild($new_node);
296  }
297  }
298  } elseif ($params['inner']) {
299  foreach ($this->setInnerHTML([$node], $parse_wrapper($this->getInnerHTML($node), $params['inner'])) as $inner) {
300  $wrapper = '<root>' . $inner->ownerDocument->saveHTML($inner) . '</root>';
301  $domQuery = new Ego_DomQuery($wrapper);
302  foreach ($domQuery->doc->firstChild->childNodes as $child) {
303  $node->parentNode->insertBefore($node->ownerDocument->importNode($child, true), $node);
304  }
305  $node->parentNode->removeChild($node);
306  }
307  } elseif ($params['wrapper']) {
308  // Element umschließen
309  $wrapper = '<root>' . $parse_wrapper($node->ownerDocument->saveHTML($node), $params['wrapper']) . '</root>';
310  $domQuery = new Ego_DomQuery($wrapper);
311  foreach ($domQuery->doc->firstChild->childNodes as $child) {
312  $node->parentNode->insertBefore($node->ownerDocument->importNode($child, true), $node);
313  }
314  $node->parentNode->removeChild($node);
315  } elseif ($params['attribute'] == 'class') {
316  // Attribut ändern
317  if (!$params['replace'] && ($className = $node->getAttribute('class'))) {
318  $classes = explode(' ', $className);
319  } else {
320  $classes = [];
321  }
322 
323  $classes = array_unique(array_merge($classes, explode(' ', $params['value'] ?? '')));
324 
325  // Klassen entfernen
326  if ($params['remove']) {
327  $classes = array_filter($classes, function($class) use ($params) {
328  return !in_array($class, $params['remove']);
329  });
330  }
331 
332  $node->setAttribute('class', implode(' ', $classes));
333  } elseif ($params['remove']) {
334  $node->removeAttribute($params['attribute']);
335  } else {
336  $value = $params['value'] ?? '';
337 
338  // Den Wert eines Attributs übernehmen
339  if ($value[0] === '@') {
340  $value = $node->getAttribute(substr($value, 1));
341  }
342 
343  $node->setAttribute($params['attribute'], $value);
344  }
345  }
346  } elseif ($params['wrapper']) {
347  // HTML umschließen
348  $wrapper = '<root>' . $parse_wrapper($this->doc->saveHTML(), $params['wrapper']) . '</root>';
349  $domQuery = new Ego_DomQuery($wrapper);
350  while ($this->doc->hasChildNodes()) {
351  $this->doc->removeChild($this->doc->firstChild);
352  }
353  foreach ($domQuery->doc->firstChild->childNodes as $child) {
354  $this->doc->appendChild($this->doc->importNode($child, true));
355  }
356  }
357  }
358 
367  private function transform($path)
368  {
369  $path = (string) $path;
370  if (strstr($path, ',')) {
371  $paths = explode(',', $path);
372  $expressions = array();
373  foreach ($paths as $path) {
374  $xpath = $this->transform(trim($path));
375  if (is_string($xpath)) {
376  $expressions[] = $xpath;
377  } elseif (is_array($xpath)) {
378  $expressions = array_merge($expressions, $xpath);
379  }
380  }
381  return implode('|', $expressions);
382  }
383 
384  $paths = array('//');
385  $path = preg_replace('|\s+>\s+|', '>', $path);
386  $segments = preg_split('/\s+/', $path);
387  foreach ($segments as $key => $segment) {
388  $pathSegment = $this->tokenize($segment);
389  if (0 == $key) {
390  if (0 === strpos($pathSegment, '[contains(')) {
391  $paths[0] .= '*' . ltrim($pathSegment, '*');
392  } else {
393  $paths[0] .= $pathSegment;
394  }
395  continue;
396  }
397  if (0 === strpos($pathSegment, '[contains(')) {
398  foreach ($paths as $pathKey => $xpath) {
399  $paths[$pathKey] .= '//*' . ltrim($pathSegment, '*');
400  $paths[] = $xpath . $pathSegment;
401  }
402  } else {
403  foreach ($paths as $pathKey => $xpath) {
404  $paths[$pathKey] .= '//' . $pathSegment;
405  }
406  }
407  }
408 
409  if (1 == count($paths)) {
410  return $paths[0];
411  }
412  return implode('|', $paths);
413  }
414 
423  private function tokenize($expression)
424  {
425  // Child selectors
426  $expression = str_replace('>', '/', $expression);
427 
428  // Attribute only
429  $expression = preg_replace('|\[([^=]*)\]|i', "[@\$1]", $expression);
430 
431  // IDs
432  $expression = preg_replace('|#([a-z][a-z0-9_-]*)|i', '[@id=\'$1\']', $expression);
433  $expression = preg_replace('|(?<![a-z0-9_-])(\[@id=)|i', '*$1', $expression);
434 
435  // arbitrary attribute strict equality
436  $expression = preg_replace_callback(
437  '|\[@?([a-z0-9_-]+)=[\'"]([^\'"]+)[\'"]\]|i',
438  function ($matches) {
439  return '[@' . strtolower($matches[1]) . "='" . $matches[2] . "']";
440  },
441  $expression
442  );
443 
444  // arbitrary attribute contains full word
445  $expression = preg_replace_callback(
446  '|\[([a-z0-9_-]+)~=[\'"]([^\'"]+)[\'"]\]|i',
447  function ($matches) {
448  return "[contains(concat(' ', normalize-space(@" . strtolower($matches[1]) . "), ' '), ' "
449  . $matches[2] . " ')]";
450  },
451  $expression
452  );
453 
454  // arbitrary attribute contains specified content
455  $expression = preg_replace_callback(
456  '|\[([a-z0-9_-]+)\*=[\'"]([^\'"]+)[\'"]\]|i',
457  function ($matches) {
458  return "[contains(@" . strtolower($matches[1]) . ", '"
459  . $matches[2] . "')]";
460  },
461  $expression
462  );
463 
464  // Classes
465  if (false === strpos($expression, "[@")) {
466  $expression = preg_replace(
467  '|\.([a-z][a-z0-9_-]*)|i',
468  "[contains(concat(' ', normalize-space(@class), ' '), ' \$1 ')]",
469  $expression
470  );
471  }
472 
473  // :not(...)
474  $expression = preg_replace('|:not\‍(\[(.*?)]\‍)|i', "[not(\$1)]", $expression);
475 
477  $expression = str_replace('**', '*', $expression);
478 
479  return $expression;
480  }
481 
494  public function insertNode($node, $params) {
495  if (is_string($node)) {
496  // HTML String in DOMNode umwandeln
497  $wrapper = '<root>' . $node . '</root>';
498  $domQuery = new Ego_DomQuery($wrapper);
499  $node = $this->doc->importNode($domQuery->doc->firstChild->firstChild, true);
500  }
501 
502  if ($node) {
503  $nodes = null;
504  if ($params['xpath']) {
505  $nodes = $this->xpath($params['xpath']);
506  } elseif ($params['selector']) {
507  $nodes = $this->query($params['selector']);
508  }
509  if ($nodes) {
510  $element = $nodes[0];
511 
512  if ($element) {
513  switch ($params['position']) {
514  case 'before':
515  $element->parentNode->insertBefore($node, $element);
516  break;
517  case 'after':
518  $element->parentNode->insertBefore($node, $element->nextSibling);
519  break;
520  case 'first':
521  $element->insertBefore($node, $element->firstChild);
522  break;
523  case 'last':
524  $element->appendChild($node);
525  }
526  }
527  }
528  }
529  }
530 }
531 ?>
modify($params, $literal=false)
getInnerHTML($node)
setSmarty($smarty)
insertNode($node, $params)
__construct($content, &$doc=null)
query($expression, $contextNode=null, $registerNodeNS=true)
setInnerHTML($nodes, $callback=null)
getHTML($node=null)
xpath($expression, $contextNode=null, $registerNodeNS=true)
setText($nodes, $callback=null)
setSite($site)
static validHTML($html)
static file_get_contents($filename, $utf8=true, $context=null)