28 private $smarty =
null;
33 private $fetch =
true;
48 $this->doc = $this->load($content,
$doc ?
$doc :
new DOMDocument());
49 $this->
xpath =
new DOMXPath($this->doc);
60 $this->smarty = $smarty;
81 private function load($content,
$doc =
null)
87 '<?xml encoding="UTF-8">' . ((
string) $content),
88 LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD | LIBXML_PARSEHUGE
90 foreach (
$doc->childNodes as $item) {
91 if ($item->nodeType == XML_PI_NODE) {
92 $doc->removeChild($item);
95 $doc->encoding =
'UTF-8';
108 return str_replace(array_map(
function($tag) {
110 }, explode(
',',
'area,base,br,col,command,embed,hr,img,input,keygen,link,meta,param,source,track,wbr')),
'', $html);
122 $codes = $this->doc->getElementsByTagName(
'code');
123 foreach ($codes as $code) {
129 $html = str_replace([
'<',
'>'], [
'&lt;',
'&gt;'], self::validHTML($this->doc->saveHTML($node)));
130 return html_entity_decode($html, ENT_NOQUOTES | ENT_HTML5,
'UTF-8');
141 foreach ($node->childNodes as $child) {
142 $html .= $node->ownerDocument->saveHTML($child);
156 foreach ($nodes as $node) {
157 for ($i = $node->childNodes->length - 1; $i >= 0; $i--) {
158 $node->removeChild($node->childNodes->item($i));
160 if (is_string($callback)) {
161 $content = $callback;
163 $content = $callback ? $callback($node) :
'';
166 $doc = $this->load(
"<html><body>$content</body></html>",
new DOMDocument());
167 $fragment = $node->ownerDocument->importNode(
$doc->documentElement,
true);
168 $parent = $fragment->childNodes->item(0);
169 for ($i = $parent->childNodes->length - 1; $i >= 0; $i--) {
170 $node->insertBefore($parent->childNodes->item($i), $node->firstChild);
183 public function setText($nodes, $callback =
null)
185 foreach ($nodes as $node) {
186 if (is_string($callback)) {
187 $content = $callback;
189 $content = $callback ? $callback($node) :
'';
192 $parents = $this->
setInnerHTML(array($this->doc->createElement(
'div')), $content);
194 for ($i = $parents[0]->childNodes->length - 1; $i >= 0; $i--) {
195 $child = $parents[0]->childNodes->item($i);
196 $node->parentNode->insertBefore($child, $reference);
199 $node->parentNode->removeChild($node);
212 public function xpath($expression, $contextNode =
null, $registerNodeNS =
true)
214 return $this->
xpath->query($expression, $contextNode, $registerNodeNS);
225 public function query($expression, $contextNode =
null, $registerNodeNS =
true)
227 return $this->
xpath($this->transform($expression), $contextNode, $registerNodeNS);
244 public function modify($params, $literal =
false)
253 $parse_wrapper =
function($html, $wrapper) use ($literal) {
254 $output = ($this->smarty ?? $GLOBALS[
'smarty'])->fetch(
'string:' . str_replace(
256 $literal ?
"{literal}$html{/literal}" : $html,
262 if ($params[
'wrapper'] && strpos($params[
'wrapper'],
'file:') === 0) {
268 if ($params[
'xpath']) {
269 $nodes = $this->
xpath($params[
'xpath']);
270 } elseif ($params[
'selector']) {
271 $nodes = $this->
query($params[
'selector']);
274 foreach ($nodes as $node) {
275 if ($params[
'node']) {
276 $content = ($this->smarty ?? $GLOBALS[
'smarty'])->fetch(
"string:{$params['node']}");
277 $doc = $this->load(
"<html><body>" . trim(self::validHTML($content)) .
"</body></html>",
new DOMDocument());
278 $fragment = $node->ownerDocument->importNode(
$doc->documentElement,
true);
279 $new_node = $fragment->childNodes->item(0)->childNodes->item(0);
281 if ($params[
'replace']) {
283 $node->parentNode->replaceChild($new_node, $node);
286 if ($params[
'before']) {
288 $parents = $this->
query($params[
'before']);
289 foreach ($parents as $parent) {
290 $parent->parentNode->insertBefore($new_node, $parent);
295 $node->appendChild($new_node);
298 } elseif ($params[
'inner']) {
300 $wrapper =
'<root>' . $inner->ownerDocument->saveHTML($inner) .
'</root>';
302 foreach ($domQuery->doc->firstChild->childNodes as $child) {
303 $node->parentNode->insertBefore($node->ownerDocument->importNode($child,
true), $node);
305 $node->parentNode->removeChild($node);
307 } elseif ($params[
'wrapper']) {
309 $wrapper =
'<root>' . $parse_wrapper($node->ownerDocument->saveHTML($node), $params[
'wrapper']) .
'</root>';
311 foreach ($domQuery->doc->firstChild->childNodes as $child) {
312 $node->parentNode->insertBefore($node->ownerDocument->importNode($child,
true), $node);
314 $node->parentNode->removeChild($node);
315 } elseif ($params[
'attribute'] ==
'class') {
317 if (!$params[
'replace'] && ($className = $node->getAttribute(
'class'))) {
318 $classes = explode(
' ', $className);
323 $classes = array_unique(array_merge($classes, explode(
' ', $params[
'value'] ??
'')));
326 if ($params[
'remove']) {
327 $classes = array_filter($classes,
function($class) use ($params) {
328 return !in_array($class, $params[
'remove']);
332 $node->setAttribute(
'class', implode(
' ', $classes));
333 } elseif ($params[
'remove']) {
334 $node->removeAttribute($params[
'attribute']);
336 $value = $params[
'value'] ??
'';
339 if ($value[0] ===
'@') {
340 $value = $node->getAttribute(substr($value, 1));
343 $node->setAttribute($params[
'attribute'], $value);
346 } elseif ($params[
'wrapper']) {
348 $wrapper =
'<root>' . $parse_wrapper($this->doc->saveHTML(), $params[
'wrapper']) .
'</root>';
350 while ($this->doc->hasChildNodes()) {
351 $this->doc->removeChild($this->doc->firstChild);
353 foreach ($domQuery->doc->firstChild->childNodes as $child) {
354 $this->doc->appendChild($this->doc->importNode($child,
true));
367 private function transform($path)
369 $path = (string) $path;
370 if (strstr($path,
',')) {
371 $paths = explode(
',', $path);
372 $expressions = array();
373 foreach ($paths as $path) {
374 $xpath = $this->transform(trim($path));
377 } elseif (is_array(
$xpath)) {
378 $expressions = array_merge($expressions,
$xpath);
381 return implode(
'|', $expressions);
384 $paths = array(
'//');
385 $path = preg_replace(
'|\s+>\s+|',
'>', $path);
386 $segments = preg_split(
'/\s+/', $path);
387 foreach ($segments as $key => $segment) {
388 $pathSegment = $this->tokenize($segment);
390 if (0 === strpos($pathSegment,
'[contains(')) {
391 $paths[0] .=
'*' . ltrim($pathSegment,
'*');
393 $paths[0] .= $pathSegment;
397 if (0 === strpos($pathSegment,
'[contains(')) {
398 foreach ($paths as $pathKey =>
$xpath) {
399 $paths[$pathKey] .=
'//*' . ltrim($pathSegment,
'*');
400 $paths[] =
$xpath . $pathSegment;
403 foreach ($paths as $pathKey =>
$xpath) {
404 $paths[$pathKey] .=
'//' . $pathSegment;
409 if (1 == count($paths)) {
412 return implode(
'|', $paths);
423 private function tokenize($expression)
426 $expression = str_replace(
'>',
'/', $expression);
429 $expression = preg_replace(
'|\[([^=]*)\]|i',
"[@\$1]", $expression);
432 $expression = preg_replace(
'|#([a-z][a-z0-9_-]*)|i',
'[@id=\'$1\']', $expression);
433 $expression = preg_replace(
'|(?<![a-z0-9_-])(\[@id=)|i',
'*$1', $expression);
436 $expression = preg_replace_callback(
437 '|\[@?([a-z0-9_-]+)=[\'"]([^\'"]+)[\'"]\]|i',
438 function ($matches) {
439 return '[@' . strtolower($matches[1]) .
"='" . $matches[2] .
"']";
445 $expression = preg_replace_callback(
446 '|\[([a-z0-9_-]+)~=[\'"]([^\'"]+)[\'"]\]|i',
447 function ($matches) {
448 return "[contains(concat(' ', normalize-space(@" . strtolower($matches[1]) .
"), ' '), ' "
449 . $matches[2] .
" ')]";
455 $expression = preg_replace_callback(
456 '|\[([a-z0-9_-]+)\*=[\'"]([^\'"]+)[\'"]\]|i',
457 function ($matches) {
458 return "[contains(@" . strtolower($matches[1]) .
", '"
459 . $matches[2] .
"')]";
465 if (
false === strpos($expression,
"[@")) {
466 $expression = preg_replace(
467 '|\.([a-z][a-z0-9_-]*)|i',
468 "[contains(concat(' ', normalize-space(@class), ' '), ' \$1 ')]",
474 $expression = preg_replace(
'|:not\(\[(.*?)]\)|i',
"[not(\$1)]", $expression);
477 $expression = str_replace(
'**',
'*', $expression);
495 if (is_string($node)) {
497 $wrapper =
'<root>' . $node .
'</root>';
499 $node = $this->doc->importNode($domQuery->doc->firstChild->firstChild,
true);
504 if ($params[
'xpath']) {
505 $nodes = $this->
xpath($params[
'xpath']);
506 } elseif ($params[
'selector']) {
507 $nodes = $this->
query($params[
'selector']);
510 $element = $nodes[0];
513 switch ($params[
'position']) {
515 $element->parentNode->insertBefore($node, $element);
518 $element->parentNode->insertBefore($node, $element->nextSibling);
521 $element->insertBefore($node, $element->firstChild);
524 $element->appendChild($node);
modify($params, $literal=false)
insertNode($node, $params)
__construct($content, &$doc=null)
query($expression, $contextNode=null, $registerNodeNS=true)
setInnerHTML($nodes, $callback=null)
xpath($expression, $contextNode=null, $registerNodeNS=true)
setText($nodes, $callback=null)
static file_get_contents($filename, $utf8=true, $context=null)