EGOCMS  24.0
EGOTEC Content-Managament-System
Ego_Search.php
gehe zur Dokumentation dieser Datei
1 <?php
8 abstract class Ego_Search
9 {
10  protected $config = [];
11 
27  abstract function delete( $index );
28 
45  abstract function reset();
46 
63  abstract function update($index, $page);
64 
84  abstract function search($search, $relation, $query);
85 
91  protected $extraQuery = '';
92 
104  public function globalSearch($search, $sites = array(), $query = array(), $param = array(), $sort = array(), $filter = '') {
105  $this->checkSearch($search);
106 
107  $pages = array();
108  if (empty($sites)) {
109  $sites = Ego_System::getAllSites();
110  }
111  foreach ($sites as $site) {
112  if (is_string($site)) {
113  $site = new Site($site);
114  }
115  try {
116  $lang = $_REQUEST['lang'] ? $_REQUEST['lang'] : ($GLOBALS['site'] ? $GLOBALS['site']->language : null);
117  if ($lang) {
118  $site->setLanguage($lang);
119  }
120  } catch (Exception $e) {
121  // Mandant existiert nicht in dieser Sprache, ignorieren
122  continue;
123  }
124  foreach ($site->getPages($query, array_merge($param, array('fulltext' => $search, 'filter' => $filter))) as $page) {
125  $pages[] = $page;
126  }
127  }
128  return $this->sortPages($pages, $query['order'], $sort);
129  }
130 
150  protected function sortPages($pages, $order = '', $sort = array()) {
151  if (!empty($order) && preg_match('/^([^ ]+) ?(asc|desc)?/i', $order, $match)) {
152  return Ego_System::sortPages($pages, 'field', $match[1], $match[2] ? strtolower($match[2]) : 'asc');
153  } elseif (!empty($sort)) {
154  return Ego_System::sortPages($pages, $sort[0], $sort[1], $sort[2]);
155  }
156  return Ego_System::sortPages($pages, 'field', 'score', 'desc');
157  }
158 
166  protected function _getContent($page, $k) {
167  switch ($k) {
168  // Schlagwörter
169  case 'keywords':
170  $content = str_replace(',', ' ', $page->getKeywords($page->getSite()->language, true));
171  if ($page->getSite()->admin['keyword_register_own_site']) {
172  $keyword_parents = $page->getParents(array(
173  'fields' => 'name,extra',
174  'where' => "type='_keywords/entry'"
175  ),array(
176  'auth_or' => "1=1"
177  ));
178  foreach ($keyword_parents as $keyword) {
179  $content .= ' '.$keyword->field['name'];
180 
181  // Synonyme
182  require_once 'base/Ego_Combo.php';
183  $combo = new Ego_Combo($keyword->extra['synonym']);
184  foreach ($combo->getText() as $text) {
185  $content .= ' '.$text;
186  }
187  }
188  }
189  break;
190 
191  // Page Extra
192  case 'extra':
193  $extra = Ego_System::arrayValuesRecursive($this->filterExtra($page)); // Keine Schlüssel in den Suchindex aufnehmen.
194  $content = implode(' ', $extra);
195  break;
196 
197  // Page Inhalt
198  case 'content':
199  $content = !$page->extra['quarantine'] ? $page->field['content'] : '';
200 
201  // Alle Inhalte aus extra._contents an den Inhalt von field.content hängen
202  $content .= ' ' . (is_array($page->extra['_contents'])
203  ? implode(
204  ' ',
205  array_filter(
206  array_map(
207  function ($value) {
208  return trim(strip_tags(implode(' ', $value)));
209  },
211  $page->extra['_contents'],
212  function ($value) {
213  if (
214  preg_match('/^(index\.php\?|https?:\/\/|[^@ ]+@[^ ]+|\d{4}-\d{2}-\d{2}|\d{2}:\d{2}:?)/si', $value) // Keine URLs, E-Mail und Datum/Uhrzeit
215  || preg_match('/^\{.*?\}$/si', $value) // Kein JSON
216  ) {
217  $value = '';
218  } else {
219  // Zeilenumbrüche umwandeln
220  $value = preg_replace('/(\r\n|\r|\n)/s', ' ', $value);
221  }
222  return $value;
223  }
224  )
225  )
226  )
227  )
228  : '');
229 
230  // Inhalte aus dem Mediapool indizieren
231  if ($page->getSite()->admin['mediapool']['active']) {
232  $files = $page->getMediapool()->list();
233  foreach ($files as $file) {
234  if (($file['nav_hide']&1) === 0) { // Nur, wenn die Datei in der Suche gefunden werden darf
235  $file_content = '';
236  if (!$file['quarantine'] && !$file['isImage'] && !$file['isVideo'] && !$file['isArchive']) {
237  if (method_exists($this, 'indexFile')) {
238  // Die verwendete SearchEngine hat eine eigene Methode für die Datei Indizierung
239  $file_content = $this->indexFile($page, $file['file']);
240  } else if ($GLOBALS['egotec_conf']['openoffice']['active']) {
241  // Office Indizierung verwenden
242  require_once 'openoffice/converter.php';
243  $file_content = convert_content($file['file'], $file['suffix'], $file['mime']);
244  }
245  }
246  $content .= ' ' . implode(' ', array_filter([
247  $file['name'],
248  $file['title'],
249  strip_tags($file['description']),
250  $file_content
251  ]));
252  }
253  }
254  }
255 
256  // Inhalt säubern
257  $content = trim(
258  preg_replace( // Mehrfache Leerzeichen entfernen
259  '/\s{2,}?/ms',
260  ' ',
261  $content
262  )
263  );
264  break;
265 
266  // Page Felder
267  default:
268  $content = $page->field[$k];
269  }
270 
271  return $this->filterContent($content, $k);
272  }
273 
281  protected function _getExtra($page, $clean = false) {
282  return Ego_System::arrayFlatRecursive($this->filterExtra($page), function($content) use ($clean) {
283  // Werte aus dem Extrafeld entfernen, die nicht gefunden werden sollen
284  if ($clean) {
285  // Keine 0 und 1
286  $content = preg_replace('/(?<!\d)(0|1)(?!\d)/', '', $content);
287  }
288  return $this->filterContent($content);
289  });
290  }
291 
299  public function filterContent($content, $k = 'extra') {
300  if (
301  in_array($k, array('short', 'content', 'extra'))
302  && is_string($content) // Falls doch Objekte im Extrafeld gelandet sind.
303  && preg_match_all('/(title|alt)=(["\'])(.*?)\\2/ims', $content, $matches)
304  ) {
305  // Bestimmte HTML Attribute beibehalten
306  foreach ($matches[3] as $match) {
307  $content .= " $match";
308  }
309  }
310 
311  $content = preg_replace('/index\.php\?[^ \'"]+/', '', $content);
312  $content = preg_replace('/<[^>]*>/', ' ', $content);
313  $content = preg_replace('/&[^ ;]+;/', ' ', $content);
314  $content = preg_replace('/\s+/', ' ', $content);
315  $content = strtr($content, "\"\n\r\t", ' ');
316  return trim($content);
317  }
318 
325  public function filterExtra($page) {
326  $extra = $page->extra;
327  if (!is_array($extra)) { // Vor der Verwendung sicherstellen, dass es sich auch um ein Array handelt
328  $extra = array();
329  } else {
330  // Mediapool Quarantäne merken
331  if (isset($extra['mediapool']) && stristr(json_encode($extra['mediapool']), '"quarantine"')) {
332  $extra['quarantine'] = 1;
333  }
334 
335  unset(
336  $extra['history'],
337  $extra['language_link'],
338  $extra['language_standard'],
339  $extra['origImgWidth'],
340  $extra['origImgHeight'],
341  $extra['origFileSize'],
342  $extra['clones'],
343  $extra['clone_original'],
344  $extra['_blocks'],
345  $extra['_layout'],
346  $extra['_forms'],
347  $extra['_template'],
348  $extra['_style'],
349  $extra['_asis'],
350  $extra['mediapool']
351  );
352  $site = $page->getSite();
353  if ($file = $site->getSiteFile('admin/search_index.php')) {
354  require_once($file);
355  $extra = search_index($page, $extra);
356  }
357 
358  // Wiedervorlage Datum in Zeitstempel umwandeln
359  if (isset($extra['workflow_reminder']['r_date'])) {
360  $extra['workflow_reminder']['r_date'] = strtotime($extra['workflow_reminder']['r_date']);
361  }
362  }
363  return $extra;
364  }
365 
391  public function setExtraQuery($query, $bind = array()) {
392  // SQL Query generieren
393  $sql_query = $query;
394  foreach (preg_split('/\s+(and|or)\s+/si', $query) as $sub_query) {
395  if (preg_match('/(!?extra\.[^ !=<>]+)(\s*(!=|=)\s*(.*?))?$/si', trim($sub_query, '() '), $matches)) {
396  $param = $matches[1];
397  $operator = mb_strtolower($matches[3]);
398  $value = trim($matches[4], '\'"');
399  if (is_numeric($value)) {
400  $value = (int) $value;
401  }
402 
403  // Wert darf nicht im Extrafeld gesetzt sein
404  $exclude = false;
405  if ($param[0] == '!') {
406  $exclude = true;
407  $param = substr($param, 1);
408  }
409 
410  if (!in_array($GLOBALS['egotec_conf']['search_engine'], ['lucene', 'elastic'])) {
411  $param = substr($param, 6);
412  }
413 
414  // Für die Extra Suche mit den SQL Treibern gibt es keine Binds
415  if (is_array($bind) && strpos($value, ':') === 0 && isset($bind[substr($value, 1)])) {
416  $value = $bind[substr($value, 1)];
417  }
418 
419  if ($exclude) {
420  // Ausnahme bilden
421  $replace = "extra NOT LIKE '%s:" . strlen($param) . ":" . $param . ";%'";
422  } else {
423  // SQL Syntax schreiben
424  $replace = "extra ";
425  if (is_string($value)) {
426  $value = "'%s:" . strlen($param) . ":\"" . $param . "\";s:" . strlen($value) . ":\"" . $value . "\"%'";
427  } else {
428  $value = "'%s:" . strlen($param) . ":\"" . $param . "\";_:" . $value . "%'";
429  }
430 
431  // Vergleich bilden
432  switch ($operator) {
433  case '=':
434  $replace .= "LIKE $value";
435  break;
436  case '!=':
437  $replace .= "NOT LIKE $value";
438  break;
439  default:
440  $replace .= "LIKE $value";
441  }
442  }
443 
444  $sql_query = str_replace($matches[0], $replace, $sql_query);
445  }
446  }
447 
448  $this->extraQuery = $sql_query;
449  }
450 
460  protected function prepareSearch($search, $filter = '', $original = false, $fuzzy = false) {
461  $search = mb_strtolower(trim($search));
462  foreach (array('+', '-', '~', '&') as $char) {
463  // Diese Zeichen dürfen nicht mehrmals nacheinander vorkommen
464  $search = preg_replace('/[' . $char . ']{2,}/', $char, $search);
465  }
466  // Andere Lucene Query Zeichen escapen
467  $search = preg_replace('%([\|!(){}[\]^*?:/]+)%', '\\\\$1', $search);
468 
469  // Hotfix: Punkt am Ende funktioniert nicht
470  $search = rtrim($search, '.');
471 
472  $search_asterix = ''; // Die Suche mit * erweitern
473  $search_fuzzy = ''; // Die Suche mit ~ erweitern
474  $in_string = '';
475  $in_word = '';
476  $in_fuzzy = '';
477 
478  // Fuzzy Einstellung setzen
479  $fuzzy_num = '';
480  if ($GLOBALS['egotec_conf']['search']['fuzzy_num']) {
481  $fuzzy_num = $GLOBALS['egotec_conf']['search']['fuzzy_num'];
482  }
483 
484  for ($i = 0; $i < strlen($search); $i++) {
485  $character = $search[$i];
486  switch ($character) {
487  case '"':
488  if ($in_string == '"') {
489  $in_string = '';
490  } else {
491  $in_string = '"';
492  }
493  $search_asterix .= $character;
494  $search_fuzzy .= $character;
495  if (!$in_string) {
496  $search_fuzzy .= '~' . $fuzzy_num;
497  }
498  $in_word = '';
499  break;
500  case ' ':
501  if (
502  !$in_string
503  && !$in_fuzzy
504  && !in_array(substr($search_asterix, -1), array('"', '*'))
505  ) {
506  $search_asterix .= '*';
507  $search_fuzzy .= '~' . $fuzzy_num;
508  }
509  $search_asterix .= $character;
510  $search_fuzzy .= $character;
511  $in_word = $in_fuzzy = '';
512  break;
513  case '-':
514  // Suche mit Bindestrichen ermöglichen
515  if ($search_asterix != '' && substr($search_asterix, -1) != ' ') {
516  $search_asterix .= ' '; // @TODO Workaround, nicht die Lösung.
517  } else {
518  $search_asterix .= $character;
519  }
520  break;
521  case '~':
522  // Suche mit Tilde (Fuzzy Search)
523  $search_asterix = substr_replace($search_asterix, '', strrpos($search_asterix, '*'), 1);
524  $search_asterix .= $character;
525  $search_fuzzy .= $character;
526  $in_fuzzy = $character;
527  break;
528  default:
529  if ($this->config['leading_wildcard'] && !$in_word) {
530  // Asterix vor einem Wort
531  if (!$in_string && !in_array($character, array('+', '-', '*')) && $search_asterix[strlen($search_asterix) - 1] != '-') {
532  $search_asterix .= '*';
533  }
534  $in_word = $character;
535  }
536  $search_asterix .= $character;
537  $search_fuzzy .= $character;
538  }
539  }
540  if ($character != '"' && !$in_fuzzy) {
541  if ($character != '*') {
542  // Am Ende noch einen Asterix hinzufügen
543  $search_asterix .= '*';
544  }
545  if ($search_asterix != '*' && $character != '~') {
546  // Am Ende noch eine Tilde hinzufügen
547  $search_fuzzy .= '~' . $fuzzy_num;
548  }
549  }
550 
551  $search_string = $search_asterix;
552  if ($fuzzy) {
553  $search_string .= " $search_fuzzy";
554  }
555 
556  if (!$original) {
557  // Lucene Filter hinzufügen
558  if (is_string($filter) && !empty($filter)) {
559  if ($search_string == '*') {
560  $search_string = $filter;
561  } else {
562  $search_string = "($search_string) AND ($filter)";
563  }
564  }
565 
566  // Zusätzlicher Query für die Extra Suche
567  if (!empty($this->extraQuery)) {
568  if ($search_string == '*') {
569  $search_string = $this->extraQuery;
570  } else {
571  $search_string = "($search_string) AND ({$this->extraQuery})";
572  }
573  }
574 
575  // Ausrufezeichen maskieren
576  $search_string = str_replace('!', '\!', $search_string);
577  }
578 
579  return $search_string;
580  }
581 
585  function clearCache() {
586  return;
587  }
588 
595  protected function checkSearch($search) {
596  $maxlength = $GLOBALS['egotec_conf']['search']['maxlength']
597  ? $GLOBALS['egotec_conf']['search']['maxlength']
598  : 200;
599  if (mb_strlen($search) > $maxlength) {
600  Ego_System::header(400);
601  exit;
602  }
603  }
604 
614  public function getSuggestions($query, $sites = [], $max = 5) {
615  return [];
616  }
617 
627  public function getCorrections($query, $sites = [], $max = 3) {
628  return [];
629  }
630 }
sortPages($pages, $order='', $sort=array())
Definition: Ego_Search.php:150
_getExtra($page, $clean=false)
Definition: Ego_Search.php:281
setExtraQuery($query, $bind=array())
Definition: Ego_Search.php:391
checkSearch($search)
Definition: Ego_Search.php:595
filterContent($content, $k='extra')
Definition: Ego_Search.php:299
update($index, $page)
filterExtra($page)
Definition: Ego_Search.php:325
globalSearch($search, $sites=array(), $query=array(), $param=array(), $sort=array(), $filter='')
Definition: Ego_Search.php:104
search($search, $relation, $query)
_getContent($page, $k)
Definition: Ego_Search.php:166
static sortPages($pages, $sorttype='field', $sortby='id', $sortdirection="asc")
static arrayFlatRecursive($array, $callback=null)
static getAllSites($username='', $perm='', $table=false, $type='')
static arrayValuesRecursive($array)
Definition: Site.php:30