EGOCMS  24.0
EGOTEC Content-Managament-System
Ego_Search_Elastic.php
gehe zur Dokumentation dieser Datei
1 <?php
7 require_once('base/Ego_Search.php');
8 require_once('composer/vendor/autoload.php');
9 
17 {
23  protected $client = null;
24 
30  private $officeImport = false;
31 
37  protected $maxClauseCount = 10000;
38 
44  private $throwException = false;
45 
46  public function optimize() {
47  // no function
48  }
49 
58  public function __construct($table = '', $param = [], bool $checkHealthy = false) {
59  $this->throwException = $GLOBALS['egotec_conf']['elastic']['throw_exception'] || $param['throw_exception'];
60 
61  if (!Ego_System::checkLicence($GLOBALS['egotec_conf']['lib_dir'] . 'elastic')) {
62  throw new Exception("missing licence");
63  }
64 
65  $this->officeImport = Ego_System::checkLicence($GLOBALS['egotec_conf']['lib_dir'] . 'office');
66 
67  if ($GLOBALS['egotec_conf']['elastic']['maxclause']) {
68  $this->maxClauseCount = (int) $GLOBALS['egotec_conf']['elastic']['maxclause'];
69  }
70 
71  $this->config = [
72  'index' => '', // Pfad für den Index (demo_de / wiki_de / multimedia_de)
73  'leading_wildcard' => true
74  ];
75 
76  if (empty($table)) {
77  $table = $GLOBALS['site']->pageTable;
78  }
79 
80  if (!$GLOBALS['egotec_conf']['elastic']['max_results']) {
81  $GLOBALS['egotec_conf']['elastic']['max_results'] = 10000;
82  }
83 
84  // examples host
85  // https://username:password@foo.com:9200/
86  // http://foo.com:9200/
87 
88  $hosts = [];
89  for ($i = 0; $i < 4; $i++) {
90  if ($GLOBALS['egotec_conf']['elastic']['host' . $i]) {
91  $hosts[] = trim($GLOBALS['egotec_conf']['elastic']['host' . $i], '/');
92  }
93  }
94 
95  if (empty($hosts)) {
96  throw new Exception("missing hosts for elastic");
97  }
98 
99  $this->client = Elasticsearch\ClientBuilder::create()
100  ->setHandler(Elasticsearch\ClientBuilder::multiHandler())
101  ->setHosts($hosts)
102  ->build();
103 
104  $this->config['index'] = strtolower($table);
105  $this->config['table'] = $table;
106  $this->config['param'] = $param;
107 
108 
109  try {
110  $this->indexCreate();
111  } catch (Exception $e) {
112  if ($checkHealthy) {
113  throw $e;
114  } else {
115  $this->error($e);
116  }
117  }
118 
119  $this->createPipeline();
120  }
121 
129  public function delete($id) {
130  $params = [
131  'index' => $this->config['index'],
132  'id' => $id
133  ];
134 
135  $this->client->delete($params);
136 
137  return true;
138  }
139 
146  public function reset() {
147  $GLOBALS['monitor']['search_reset']++;
148 
149  $this->indexDelete(false);
150 
151  try {
152  $this->indexCreate();
153  } catch (Exception $e) {
154  $this->error($e);
155  }
156 
157  return true;
158  }
159 
160 
166  public function resetAll() {
167  $GLOBALS['monitor']['search_reset_all']++;
168 
169  $this->indexDelete(true);
170 
171  return true;
172  }
173 
180  public function indexCreate() {
181  $params = [
182  'index' => $this->config['index']
183  ];
184 
185  if (!$this->client->indices()->exists($params)) {
186  $language = preg_match('/_([^_]{2})$/', $params['index'], $matches) ? $matches[1] : '';
187  $stopwords = [
188  'ar' => '_arabic_',
189  'br' => '_brazilian_',
190  'bg' => '_bulgarian_',
191  'cn' => '_cjk_',
192  'zh' => '_cjk_',
193  'ja' => '_cjk_',
194  'ko' => '_cjk_',
195  'cs' => '_czech_',
196  'da' => '_danish_',
197  'nl' => '_dutch_',
198  'en' => '_english_',
199  'us' => '_english_',
200  'uk' => '_english_',
201  'fi' => '_finnish_',
202  'fr' => '_french_',
203  'de' => '_german_',
204  'ed' => '_german_',
205  'el' => '_greek_',
206  'hi' => '_hindi_',
207  'hu' => '_hungarian_',
208  'id' => '_indonesian_',
209  'ga' => '_irish_',
210  'it' => '_italian_',
211  'lv' => '_latvian_',
212  'lt' => '_lithuanian_',
213  'no' => '_norwegian_',
214  'pt' => '_portuguese_',
215  'ro' => '_romanian_',
216  'ru' => '_russian_',
217  'es' => '_spanish_',
218  'sv' => '_swedish_',
219  'th' => '_thai_',
220  'tr' => '_turkish_'
221  ];
222 
228  $sortable_fields = function() {
229  $arr = [];
230  foreach ([
231  'id' => 'long',
232  'name' => 'text',
233  'type' => 'text',
234  'a_date' => 'date',
235  'c_date' => 'date',
236  'release_from' => 'date',
237  'release_until' => 'date',
238  'order_field' => 'long',
239  'structure' => 'nested',
240  'main_category' => 'long'
241  ] as $name => $type) {
242  $params = ['type' => $type];
243  if ($name == 'structure') {
244  $params['properties'] = [
245  'parent' => [
246  'type' => 'integer',
247  'fields' => [
248  'keyword' => [
249  'type' => 'keyword'
250  ]
251  ]
252  ],
253  'position' => [
254  'type' => 'integer',
255  'fields' => [
256  'keyword' => [
257  'type' => 'keyword'
258  ]
259  ]
260  ]
261  ];
262  } else {
263  $params['fields'] = [
264  'keyword' => [
265  'type' => 'keyword'
266  ]
267  ];
268  }
269  if ($type == 'date') {
270  $params['format'] = 'epoch_second';
271  }
272  $arr[$name] = $params;
273  }
274  return $arr;
275  };
276  $params = [
277  'index' => $this->config['index'],
278  'include_type_name' => true,
279  'body' => [
280  'settings' => [
281  'number_of_shards' => $GLOBALS['egotec_conf']['elastic']['number_of_shards'],
282  'number_of_replicas' => $GLOBALS['egotec_conf']['elastic']['number_of_replicas'] ?: 0,
283  'index.mapping.ignore_malformed' => true,
284  'index.mapping.total_fields.limit' => $this->maxClauseCount,
285  'analysis' => [
286  'analyzer' => [
287  'default' => [
288  'type' => 'custom',
289  'tokenizer' => 'standard',
290  'filter' => ['lowercase', 'stop']
291  ],
292  'trigram' => [
293  'type' => 'custom',
294  'tokenizer' => 'standard',
295  'filter' => ['lowercase', 'shingle']
296  ]
297  ],
298  'filter' => [
299  'shingle' => [
300  'type' => 'shingle',
301  'min_shingle_size' => 2,
302  'max_shingle_size' => 3
303  ],
304  'stop' => [
305  'type' => 'stop',
306  'ignore_case' => true,
307  'stopwords' => $stopwords[$language] ?? $stopwords['en']
308  ]
309  ]
310  ]
311  ],
312  'mappings' => [
313  '_doc' => [
314  'properties' => array_merge($sortable_fields(), [
315  'suggest' => [
316  'type' => 'completion',
317  'contexts' => [[
318  'name' => 'ignore_search',
319  'type' => 'category',
320  'path' => 'ignore_search'
321  ]]
322  ],
323  'title' => [
324  'type' => 'text',
325  'fields' => [
326  'keyword' => [
327  'type' => 'keyword'
328  ],
329  'trigram' => [
330  'type' => 'text',
331  'analyzer' => 'trigram'
332  ]
333  ]
334  ]
335  ])
336  ]
337  ]
338  ]
339  ];
340 
341  try {
342  $this->client->indices()->create($params);
343  } catch (Exception $e) {
344  if (strpos($e->getMessage(), 'resource_already_exists_exception') === false) {
345  throw new Exception($e->getMessage(), $e->getCode());
346  }
347  }
348  }
349 
350  return true;
351  }
352 
358  public function indexExists(): bool {
359  return $this->client->indices()->exists([
360  'index' => $this->config['index']
361  ]);
362  }
363 
371  public function indexDelete($all) {
372  $params = [
373  'index' => $all ? "_all" : $this->config['index'],
374  'client' => ['ignore' => [400, 404]]
375  ];
376 
377  if ($this->client->indices()->exists($params)) {
378  $this->client->indices()->delete($params);
379  }
380 
381  return true;
382  }
383 
384  public function updateBulk($pages, $recursive = true) {
385  try {
386  foreach ($pages as $page) {
387  $page = $this->indexFiles($page);
388 
389  $params['body'][] = [
390  'index' => [
391  '_index' => $this->config['index'],
392  '_id' => $page->field['id']
393  ]
394  ];
395 
396  $params['body'][] = $this->getBody($page);
397 
398  // Für alle untergeordneten Seiten die Strukturen aktualisieren (falls sich die variable Sortierung geändert hat)
399  if ($recursive) {
400  foreach ($page->getChildren([], ['auth_or' => '1=1']) as $child) {
401  $child->updateIndex($this, true, false);
402  }
403  }
404  }
405 
406  $this->client->bulk($params);
407  } catch (Exception $e) {
408  foreach ($pages as $page) {
409  Ego_Action::add(Ego_Action::PAGE_UPDATE_INDEX, [
410  'identity' => $page->getIdentity(),
411  'recursive' => $recursive
412  ], $e);
413  }
414  Ego_Action::throw($e);
415  }
416  }
417 
427  public function update($index, $page, $count = [], $recursive = true) {
428  try {
429  $page = $this->indexFiles($page);
430 
431  $GLOBALS['monitor']['search_update_count']++;
432 
433  $params = [
434  'index' => $this->config['index'],
435  'id' => $index,
436  'body' => $this->getBody($page),
437  'client' => ['ignore' => 404]
438  ];
439 
440  $this->client->index($params);
441  } catch (Exception $e) {
442  Ego_Action::add(Ego_Action::PAGE_UPDATE_INDEX, [
443  'identity' => $page->getIdentity(),
444  'recursive' => $recursive
445  ], $e, true);
446  }
447 
448  // Für alle untergeordneten Seiten die Strukturen aktualisieren (falls sich die variable Sortierung geändert hat)
449  if ($recursive) {
450  foreach ($page->getChildren([], ['auth_or' => '1=1']) as $child) {
451  $child->updateIndex($this, true, false);
452  }
453  }
454 
455  return true;
456  }
457 
463  private function getBody($page) {
464  $page = clone $page;
465 
466  $keywords = $this->_getContent($page, 'keywords');
467  $name = $this->_getContent($page, 'name');
468  $title = $this->_getContent($page, 'title');
469  $short = $this->_getContent($page, 'short');
470  $content = $this->_getContent($page, 'content');
471 
472  // Blöcke aus dem Extrafeld nicht doppelt in die Suche übernehmen. Diese sind bereits in "content".
473  $extra = $this->_getExtra($page);
474  unset($page->extra['_contents']);
475  $extra_values = $this->_getExtra($page, true);
476 
477  $suggestions = array_values(
478  array_unique(
479  array_filter(
480  array_map(
481  function ($value) {
482  return Ego_System::filterNonUtf8(trim(
483  preg_replace('/[^a-zA-Z0-9äöüÄÖÜß-]/', '', $value),
484  '-'
485  ));
486  },
487  explode(
488  ' ',
489  mb_strtolower(
490  html_entity_decode(
491  implode(
492  ' ',
493  [$keywords, $name, $title, $short, $content]
494  )
495  )
496  )
497  )
498  ),
499  function ($value) {
500  return
501  !is_numeric($value)
502  && strlen($value) > 2
503  && !preg_match('/^[a-z0-9]{32}$/i', $value) // Keine Hashes
504  ;
505  }
506  )
507  )
508  );
509 
516  $convert_date = function($date) {
517  if (!$date || $date == '0000-00-00 00:00:00') {
518  return null;
519  }
520  return strtotime($date);
521  };
522 
523  // Die Struktur ermitteln, damit die variable Sortierung abgefragt werden kann
524  $structure = [];
525  $db = new_db_connection([
526  'table' => $page->getSite()->pageTable . '_children',
527  'where' => 'child = :id',
528  'bind' => [
529  'id' => $page->field['id']
530  ]
531  ]);
532  while ($db->nextRecord()) {
533  $structure[] = [
534  'parent' => $db->Record['page_id'],
535  'position' => $db->Record['idx']
536  ];
537  }
538 
539  return [
540  'id' => $page->field['id'],
541  'keywords' => $keywords,
542  'url' => $this->_getContent($page, 'url'),
543  'name' => $name,
544  'title' => $title,
545  'short' => $short,
546  'content' => $content,
547  'a_date' => $convert_date($page->field['a_date']),
548  'c_date' => $convert_date($page->field['c_date']),
549  'release_from' => $convert_date($page->field['release_from']),
550  'release_until' => $convert_date($page->field['release_until']),
551  'extra' => $extra,
552  'extra_values' => trim(
553  implode(
554  ' ',
555  array_values(
556  array_map(
557  function ($value) {
558  return implode(' ', $value);
559  },
560  $extra_values
561  )
562  )
563  )
564  ),
565  'type' => $page->field['type'],
566  'ignore_search' => (($page->field['nav_hide'] & 4) == 4) ? '1' : '0',
567  'inactive' => (int)$page->field['inactive'],
568  'deleted' => (int)$page->field['deleted'],
569  'order_field' => (int)$page->field['order_field'],
570  'structure' => $structure,
571  'main_category' => (int)$page->extra['main_category'],
572  'suggest' => [
573  'input' => $page->isActive() ? $suggestions : []
574  ]];
575  }
576 
591  private function getSearchParam($tables, $search, $filter, $fuzzy, $id_list = [], $limit = '', $order = '', $where = '') {
592  $fields = ['keywords', 'url', 'name', 'title', 'short', 'content', 'extra_values'];
593  $indexes = $tables ? $tables : [$this->config['index']];
594 
595  // Verwendete "rewrite" Einstellung
596  $rewrite = 'top_terms_' . $this->maxClauseCount;
597 
598  // Immer alles klein verwenden: Elastic startet eine Case Sensitive Suche, wenn ein Großbuchstabe im Suchbegriff vorkommt
599  $search = trim(mb_strtolower($search));
600 
601  // Suchbegriff automatisch korrigieren
602  $search = trim(preg_replace([
603  '/[?*+-]( |$)/',
604  '/(^| )\{(.*?)\}/'
605  ], [
606  ' ',
607  '\\1\\2'
608  ], $search));
609 
610  // Stop Wörter entfernen
611  if (empty($GLOBALS['elastic']['stop_words'])) {
612  $search_phrases = [];
613  $search_no_phrases = $search; // Suchbegriff ohne Phrasen
614  if (preg_match_all('/".*?"/', $search, $matches)) {
615  $search_phrases = $matches[0];
616  $search_no_phrases = trim(preg_replace(['/".*?"/', '/\s+/'], ['', ' '], $search)); // Suchbegriff ohne Phrasen
617  }
618 
619  // Stop Wörter ermitteln
620  $language = $GLOBALS['site'] ? $GLOBALS['site']->language : 'de';
621  if (Ego_System::file_exists($file = $GLOBALS['egotec_conf']['lib_dir'] . 'base/search/' . $language . '.txt')) {
622  $stop_words = array_filter(explode("\n", Ego_System::file_get_contents($file)));
623 
624  $search_no_phrases = implode(' ', array_filter(explode(' ', $search_no_phrases), function($value) use ($stop_words) {
625  return !in_array($value, $stop_words);
626  }));
627  $search = $search_no_phrases . (!empty($search_phrases) ? ' ' . implode(' ', $search_phrases) : '');
628  }
629  }
630 
631  /* Suchbegriffe mit Bindestrich automatisch um die einzelnen Begriffe erweitern (diese werden sonst nicht gefunden)
632  * Ausnahme: die Bindestriche kommen in einer Phrase vor (innerhalb von Hochkommata) */
633  $search_no_phrases = trim(preg_replace(['/".*?"/', '/\s+/'], ['', ' '], $search)); // Suchbegriff ohne Phrasen
634  if (preg_match_all('/( |^)([^+-][^ "]+-[^ "]+)/is', $search_no_phrases, $matches)) {
635  foreach ($matches[2] as $word) {
636  $search .= ' ' . str_replace('-', ' ', $word);
637  }
638  }
639 
640  // Suchrelevanz für Ein-Mandanten-Suche hinzufügen
641  $count = [];
642  if ($tables === null) {
643  try {
644  [$name, $lang] = preg_split('/_(?=[^_]*$)/', $this->config['table']);
645  $site = new Site($name, $lang);
646  $count = $site->getSearchCount();
647  } catch (Site_Exception $e) {
648  // ignorieren
649  }
650  }
651 
652  $must = [];
653  $must_not = [];
654  $should = [];
655  $must_should = [];
656 
657  if (preg_match_all('/(".*?"|[^ ]+)/is', $search, $matches)) {
658  // Zusätzliche Phrasen-Suche ausführen
659  $match_phrase = sizeof($matches[0]) > 1 && !preg_match('/(^| )["*+-]/', $search);
660 
661  foreach ($fields as $field) {
662  $boost = 1;
663  if ($field == 'extra_values' && isset($count['extra'])) {
664  $boost = (int)$count['extra'];
665  } elseif (isset($count[$field])) {
666  $boost = (int)$count[$field];
667  }
668 
669  // Zu kurze Suchbegriffe entfernen (Standard Mindestlänge: 3 Zeichen)
670  $filtered_query = array_filter($matches[0], function($s) {
671  return mb_strlen(trim($s, '"+-*')) >= (isset($GLOBALS['egotec_conf']['elastic']['minlength'])
672  ? (int) $GLOBALS['egotec_conf']['elastic']['minlength']
673  : 3);
674  });
675 
676  foreach ($filtered_query as $query) {
677  // Hotfix: Enthält der Suchbegriff ein "-", muss dieser eine exakte Suche auslösen
678  if (!in_array($query[0], ['"', '-']) && strpos($query, '-') !== false) {
679  $query = "\"$query\"";
680  }
681 
682  if ($query[0] == '"') {
683  // Phrase
684  $query = trim($query, '"');
685  $should[] = [
686  'match_phrase' => [
687  $field => [
688  'query' => $query,
689  'boost' => $boost
690  ]
691  ]
692  ];
693  } else {
694  if ($fuzzy) {
695  // Fuzzy
696  $should[] = [
697  'fuzzy' => [
698  $field => [
699  'value' => $query,
700  'rewrite' => $rewrite,
701  'boost' => $boost
702  ]
703  ]
704  ];
705  } elseif (preg_match('/^(.*?)~([0-9.]+|)$/', $query, $match)) {
706  // Fuzzy für diesen Suchbegriff
707  $query = $match[1];
708  $fuzziness = floatval($match[2] !== '' ? $match[2] : 1);
709 
710  $should[] = [
711  'fuzzy' => [
712  $field => [
713  'value' => $query,
714  'rewrite' => $rewrite,
715  'boost' => $boost,
716  'fuzziness' => round(2 * $fuzziness)
717  ]
718  ]
719  ];
720  } else {
721  // Wildcard
722  $wildcard = function ($query) {
723  if (strpos($query, '*') === false) {
724  $query = "*$query*";
725  }
726  return $query;
727  };
728 
729  switch ($query[0]) {
730  case '+':
731  $query = ltrim($query, '+');
732  $must_should[md5($query)][] = [
733  'wildcard' => [
734  $field => [
735  'value' => $wildcard($query),
736  'rewrite' => $rewrite,
737  'boost' => $boost
738  ]
739  ]
740  ];
741  break;
742  case '-':
743  $query = ltrim($query, '-');
744  $must_not[] = [
745  'wildcard' => [
746  $field => [
747  'value' => $wildcard($query),
748  'rewrite' => $rewrite,
749  'boost' => $boost
750  ]
751  ]
752  ];
753  break;
754  default:
755  $should[] = [
756  'wildcard' => [
757  $field => [
758  'value' => $wildcard($query),
759  'rewrite' => $rewrite,
760  'boost' => $boost
761  ]
762  ]
763  ];
764  }
765  }
766  }
767  }
768 
769  // Ein Suchbegriff mit mehreren Wörtern erzeugt gleichzeitig auch eine bevorzugte Phrasen-Suche
770  if ($match_phrase) {
771  $should[] = [
772  'match_phrase' => [
773  $field => [
774  'query' => trim($search),
775  'boost' => $boost + 1
776  ]
777  ]
778  ];
779  }
780  }
781  }
782 
783  $minimum_should_match = $search == '' && (!empty($filter) || !empty($this->extraQuery)) ? 0 : 1;
784 
785  if (sizeof($must_should)) {
786  $minimum_should_match = 0;
787 
788  foreach ($must_should as $items) {
789  $must[] = [
790  'bool' => [
791  'should' => $items,
792  'minimum_should_match' => 1
793  ]
794  ];
795  }
796  }
797 
798  if ($this->config['param']['only_active']) {
799  $must[] = [
800  'match' => [
801  'inactive' => 0
802  ]
803  ];
804  }
805  if (!$this->config['param']['deleted'] && !$this->config['param']['deleted_or']) {
806  $must[] = [
807  'match' => [
808  'deleted' => 0
809  ]
810  ];
811  }
812  if ($this->config['param']['search']) {
813  $must[] = [
814  'match' => [
815  'ignore_search' => '0'
816  ]
817  ];
818  }
819 
820  // Nur Treffer in einer ID Liste ermitteln
821  if (!empty($id_list)) {
822  $must[] = [
823  'constant_score' => [
824  'filter' => [
825  'terms' => [
826  '_id' => $id_list
827  ]
828  ]
829  ]
830  ];
831  }
832 
833  // Extra-Suche hinzufügen
834  if (!empty($this->extraQuery)) {
835  $must[] = [
836  'query_string' => [
837  'query' => $this->extraQuery
838  ]
839  ];
840  }
841 
842  // Filter ermitteln
843  if (!empty($filter)) {
844  if ($search == '') {
845  /* Ohne Suchbegriff liefert die Suche keine Ergebnisse.
846  * Der Filter wird dann zum Suchkriterium. */
847  $must[] = [
848  'query_string' => [
849  'query' => $filter
850  ]
851  ];
852  $filter = [];
853  } else {
854  $filter = [
855  'query_string' => [
856  'query' => $filter
857  ]
858  ];
859  }
860  } else {
861  $filter = [];
862  }
863 
864  // Limit ermitteln (standardmäßig werden alle Ergebnisse zurückgegeben)
865  if ($limit) {
866  $limit_parts = explode(',', $limit);
867 
868  if (count($limit_parts) > 1) {
869  $from = trim($limit_parts[0]);
870  $size = trim($limit_parts[1]);
871  } else {
872  $from = 0;
873  $size = trim($limit_parts[0]);
874  }
875  } else {
876  $from = 0;
877  $size = (int) $GLOBALS['egotec_conf']['elastic']['max_results'];
878  }
879 
880  // Sortierung ermitteln
881  $sort = [];
882  $score_sorted = false;
883 
884  if ($order) {
885  foreach (explode(',', $order) as $order_part) {
886  $parts = explode(' ', trim($order_part), 2);
887  $key = trim($parts[0]);
888 
889  // Wenn nach der Relevanz der Ergebnisse sortiert wird, darf nicht nach "_score.keywords" sortiert werden
890  if ($key === 'score') {
891  $sort[] = ['_score' => ['order' => $parts[1]]];
892  $score_sorted = true;
893  continue;
894  }
895 
896  $parent_id = null;
897  if (strpos($key, '.') !== false) {
898  foreach ($indexes as $index) {
899  if (strpos($key, "{$index}.") === 0) {
900  $key = substr($key, strlen("{$index}."));
901  break;
902  } elseif (
903  $where
904  && $key == "{$index}_children.idx"
905  && preg_match('/' . $index . '_children\.page_id\s*=\s*(\d+)/', $where, $match)
906  ) {
907  $key = 'structure.position';
908  $parent_id = $match[1];
909  break;
910  } else {
911  continue 2;
912  }
913  }
914  }
915  if (!preg_match('/\.keyword$/i', $key)) {
916  $key .= '.keyword';
917  }
918 
919  // Weitere Parameter für bestimmte Felder
920  $params = [];
921  if ($parent_id && strpos($key, 'structure.') === 0) {
922  $params['nested'] = [
923  'path' => 'structure',
924  'filter' => [
925  'term' => ['structure.parent' => $parent_id]
926  ]
927  ];
928  }
929 
930  $sort[] = [
931  $key => array_merge([
932  'order' => strtolower(trim($parts[1]))
933  ], $params)
934  ];
935  }
936  }
937 
938  // Wenn nicht bereits nach "_score" sortiert wird
939  if (!$score_sorted) {
940  $sort[] = ['_score' => [
941  'order' => 'desc'
942  ]];
943  }
944 
945  // Standardmäßig immer nach der ID sortieren (relevant, wenn der "_score" identisch ist)
946  $sort[] = ['id' => [
947  'order' => 'asc'
948  ]];
949 
950  return [
951  'index' => $indexes,
952  // KEINE Inhalte zurückliefern
953  '_source' => false,
954  'track_scores' => true,
955  'body' => [
956  'from' => $from,
957  'size' => $size,
958  'sort' => $sort,
959  'query' => [
960  'bool' => [
961  'must' => $must,
962  'must_not' => $must_not,
963  'should' => $should,
964  'filter' => $filter,
965  'minimum_should_match' => $minimum_should_match
966  ]
967  ]
968  ]
969  ];
970  }
971 
975  private function createPipeline() {
976  if ($this->officeImport) {
977  $params = [
978  'id' => 'attachment',
979  'body' => [
980  'description' => 'Extract attachment information',
981  'processors' => [
982  [
983  'attachment' => [
984  'field' => 'base64',
985  'indexed_chars' => -1
986  ]
987  ]
988  ]
989  ]
990  ];
991 
992  $this->client->ingest()->putPipeline($params);
993  }
994  }
995 
1004  protected function indexFile(Page $page, $path) {
1005  if (!$this->officeImport || $page->extra['quarantine'] || !Ego_System::file_exists($path)) {
1006  return null;
1007  }
1008 
1009  @ini_set("memory_limit", "-1");
1010 
1011  $pageTable = strtolower($page->getSite()->pageTable);
1012  $identity = $page->getIdentity();
1013 
1014  $params = [
1015  'index' => $pageTable,
1016  'id' => $identity,
1017  'pipeline' => 'attachment',
1018  'body' => [
1019  'base64' => base64_encode(Ego_System::file_get_contents($path, false))
1020  ]
1021  ];
1022 
1023  try {
1024  $result = $this->client->index($params);
1025  } catch (Exception $e) {
1026  egotec_error_log('Elasticsearch: error while indexing file of ' . $page->getIdentity() . ' (' . $e->getMessage() . ')');
1027  return null;
1028  }
1029 
1030  if ($result) {
1031  $content = $this->getIndexFile($pageTable, $identity);
1032  $this->delete($identity);
1033 
1034  return Ego_System::stringEncode($content);
1035  }
1036 
1037  return null;
1038  }
1039 
1048  private function getIndexFile($pageTable, $identity) {
1049  $params = [
1050  'index' => strtolower($pageTable),
1051  'id' => $identity
1052  ];
1053 
1054  $response = $this->client->get($params);
1055  return $response['_source']['attachment']['content'];
1056  }
1057 
1065  private function indexFiles(Page $page) {
1066  if (!$this->officeImport || $page->extra['quarantine']) {
1067  return $page;
1068  }
1069 
1070  if (
1071  $page->field['type'] == 'multimedia/file'
1072  && !$page->extra['_indexed']
1073  ) {
1074  if (!preg_match('/(image|video|audio|zip|exe|rar|octet-stream|postscript)/is', $page->extra['mime_type'])) {
1075  $page->extra['_indexed'] = true;
1076  try {
1077  $content = $this->indexFile($page, $GLOBALS['egotec_conf']['var_dir'] . 'media/' . $page->getSite()->name . '/' . $page->getMediaFilename());
1078  $page->field['content'] = Ego_System::filterNonUtf8($content, '', true);
1079  } catch (Exception $e) {
1080  // Datei kann nicht indiziert werden
1081  if (
1082  strpos($e->getMessage(), 'EncryptedDocumentException') === false
1083  && strpos($e->getMessage(), 'TikaException') === false
1084  ) {
1085  egotec_error_log('Elasticsearch: error while indexing file of ' . $page->getIdentity() . ' (' . $e->getMessage() . ')');
1086  }
1087  }
1088 
1089  $page->update([], true, true);
1090  }
1091  }
1092 
1093  return $page;
1094  }
1095 
1107  public function search($search, $relation, $query, $filter = '', $fuzzy = false) {
1108  $this->checkSearch($search);
1109 
1110  $GLOBALS['monitor']['search_count']++;
1111  $GLOBALS['monitor']['search_length'] += mb_strlen($search);
1112  $GLOBALS['monitor']['search_words'] += substr_count($search, ' ') + 1;
1113 
1114  $params = $this->getSearchParam(null, $search, $filter, $fuzzy, $query['id_list'], $query['limit'] ?? '', $query['order'] ?? '', $query['where'] ?? '');
1115 
1116  $start = microtime(true);
1117  try {
1118  $results = $this->client->search($params);
1119  } catch (Exception $e) {
1120  $this->error($e);
1121  }
1122 
1123  $hits = $results["hits"]["hits"];
1124  $stop = microtime(true);
1125 
1126  $duration = (int)(($stop - $start) * 1000);
1127  $GLOBALS['monitor']['search_duration'] += $duration;
1128 
1129  if (empty($hits)) {
1130  unset($query['order']);
1131  unset($query['limit']);
1132  unset($query['bind']);
1133  $query['where'] = '1=0';
1134  return $query;
1135  }
1136 
1137  $ids = [];
1138 
1139  foreach ($hits as $key => $value) {
1140  $ids[$value["_id"]] = $query['order'] ? count($hits) - $key : $value["_score"];
1141  }
1142 
1143  if ($query['order']) {
1144  $min_value = 0;
1145  $multiply = 1;
1146  } else {
1147  $max_value = $results["hits"]["max_score"];
1148  $min_value = min(array_values($ids));
1149  if ($max_value == $min_value) {
1150  $multiply = 4 / $max_value;
1151  $min_value = 0;
1152  } else {
1153  $multiply = 4 / ($max_value - $min_value);
1154  }
1155  }
1156 
1157  return $this->buildQuery($ids, $relation, $query, $min_value, $multiply);
1158  }
1159 
1172  public function globalSearch($search, $sites = [], $query = [], $param = [], $sort = [], $filter = '') {
1173  $this->checkSearch($search);
1174 
1175  $start1 = microtime(true);
1176  $GLOBALS['monitor']['search_global_count']++;
1177  $GLOBALS['monitor']['search_global_length'] += mb_strlen($search);
1178  $GLOBALS['monitor']['search_global_words'] += substr_count($search, ' ') + 1;
1179 
1180  // Standardmäßig im Frontend nur Seiten anzeigen, die auch von der Suche gefunden werden dürfen
1181  if (!isset($param['search']) && empty($GLOBALS['admin_area'])) {
1182  $param['search'] = true;
1183  }
1184 
1185  // Betroffene Tabellen ermitteln
1186  $lang = $_REQUEST['lang'] ? $_REQUEST['lang'] : ($GLOBALS['site'] ? $GLOBALS['site']->language : null);
1187  $relations = [];
1188  if (empty($sites)) {
1189  // keine bestimmten Mandanten, also alle durchsuchen
1190  $sites = Ego_System::getAllSites();
1191  }
1192  $tables = $this->getTables($sites, $lang, $relations);
1193 
1194  // Suche durchführen
1195  $start2 = microtime(true);
1196  $params = $this->getSearchParam($tables, $search, $filter, (bool)$param['fuzzy']);
1197  try {
1198  $results = $this->client->search($params);
1199  } catch (Exception $e) {
1200  $this->error($e);
1201  }
1202  $stop2 = microtime(true);
1203  $duration = (int)(($stop2 - $start2) * 1000);
1204  $GLOBALS['monitor']['search_global_d2'] += $duration;
1205 
1206  $hits = $results["hits"]["hits"];
1207 
1208  if (empty($hits)) {
1209  return [];
1210  }
1211 
1212  $min_value = PHP_INT_MAX;
1213 
1214  $sorted_hits = [];
1215  foreach ($hits as $key => $hit) {
1216  if (!isset($sorted_hits[$hit['_index']])) {
1217  $sorted_hits[$hit['_index']] = [];
1218  }
1219  $sorted_hits[$hit['_index']][$hit['_id']] = $query['order'] ? count($hits) - $key : $hit['_score'];
1220  if ($hit['_score'] < $min_value) {
1221  $min_value = $hit['_score'];
1222  }
1223  }
1224 
1225  if ($query['order']) {
1226  $min_value = 0;
1227  $multiply = 1;
1228  } else {
1229  $max_value = $results["hits"]["max_score"];
1230  if ($max_value == $min_value) {
1231  $multiply = 4 / $max_value;
1232  $min_value = 0;
1233  } else {
1234  $multiply = 4 / ($max_value - $min_value);
1235  }
1236  }
1237 
1238  // Treffer sammeln und sortiert zurückliefern
1239  $pages = [];
1240  foreach ($sorted_hits as $table => $ids) {
1241  $site = $relations[$table];
1242  foreach ($site->getPages($this->buildQuery($ids, $site->pageTable . '.id', $query, $min_value, $multiply), $param) as $page) {
1243  $pages[] = $page;
1244  }
1245  }
1246  $pages = $this->sortPages($pages, $query['order'], $sort);
1247 
1248  $stop1 = microtime(true);
1249  $duration = (int)(($stop1 - $start1) * 1000);
1250  $GLOBALS['monitor']['search_global_d1'] += $duration;
1251 
1252  return $pages;
1253  }
1254 
1264  public function getSuggestions($query, $sites = [], $max = 5) {
1265  $tables = $this->config['index'];
1266  if (!empty($sites)) {
1267  $tables = $this->getTables($sites);
1268  }
1269  try {
1270  $results = $this->client->search([
1271  'index' => $tables,
1272  // KEINE Inhalte zurückliefern
1273  '_source' => false,
1274  'sort' => ['_score'], // The order defaults to desc when sorting on the _score, and defaults to asc when sorting on anything else.
1275  'body' => [
1276  'suggest' => [
1277  'suggest' => [
1278  'prefix' => $query,
1279  'completion' => [
1280  'field' => 'suggest',
1281  'skip_duplicates' => true,
1282  'size' => $max,
1283  'contexts' => [
1284  'ignore_search' => ['0']
1285  ]
1286  ]
1287  ]
1288  ]
1289  ]
1290  ]);
1291  } catch (Exception $e) {
1292  $this->error($e);
1293  }
1294 
1295  $suggestions = [];
1296  if (is_array($results['suggest']['suggest'][0]['options'])) {
1297  foreach ($results['suggest']['suggest'][0]['options'] as $result) {
1298  if (mb_strtolower($query) != mb_strtolower($result['text'])) {
1299  if (preg_match('/^(.*?)_([^_]+)$/', $result['_index'], $match)) {
1300  // Suchvorschläge nur von Seiten liefern, die man sehen darf
1301  try {
1302  $site = new Site($match[1], $match[2]);
1303  $page = $site->getPage($result['_id']);
1304 
1305  if ($page && $page->hasRights(['view'])) {
1306  $suggestions[] = $result['text'];
1307  }
1308  } catch (Site_Exception $e) {
1309  // ignorieren
1310  }
1311  } else {
1312  $suggestions[] = $result['text'];
1313  }
1314  }
1315  }
1316  }
1317 
1318  return $suggestions;
1319  }
1320 
1330  public function getCorrections($query, $sites = [], $max = 3) {
1331  $tables = $this->config['index'];
1332  if (!empty($sites)) {
1333  $tables = $this->getTables($sites);
1334  }
1335  try {
1336  $results = $this->client->search([
1337  'index' => $tables,
1338  'body' => [
1339  'suggest' => [
1340  'text' => $query,
1341  'simple_phrase' => [
1342  'phrase' => [
1343  'field' => 'title.trigram',
1344  'size' => $max,
1345  'gram_size' => 3,
1346  'direct_generator' => [[
1347  'field' => 'title.trigram',
1348  'suggest_mode' => 'always'
1349  ]],
1350  'highlight' => [
1351  'pre_tag' => '<em>',
1352  'post_tag' => '</em>'
1353  ]
1354  ]
1355  ]
1356  ]
1357  ]
1358  ]);
1359  } catch (Exception $e) {
1360  $this->error($e);
1361  }
1362 
1363  $corrections = [];
1364  if (is_array($results['suggest']['simple_phrase'][0]['options']) && !empty($results['suggest']['simple_phrase'][0]['options'])) {
1365  $corrections = $results['suggest']['simple_phrase'][0]['options'];
1366  }
1367 
1368  return $corrections;
1369  }
1370 
1382  private function buildQuery($result, $relation, $query, $min_value, $multiply) {
1383  if (!empty($result)) {
1384  $ids = array_keys($result);
1385  if (empty($query['where'])) {
1386  $query['where'] = '';
1387  } else {
1388  $query['where'] .= ' AND ';
1389  }
1390 
1391  // Für Oracle splitten: maximal 1000 pro Liste
1392  $id_groups = [];
1393  foreach (array_chunk($ids, 999) as $id_group) {
1394  $id_groups[] = "$relation IN (" . implode(', ', $id_group) . ")";
1395  }
1396  $query['where'] .= '(' . implode(' OR ', $id_groups) . ')';
1397 
1398  $field2 = 'CASE ';
1399  foreach ($result as $id => $value) {
1400  $field2 .= "WHEN $relation = $id THEN " . (str_replace(',', '.', ($value - $min_value) * $multiply)) . " ";
1401  }
1402  $field2 .= 'ELSE 0 END AS score';
1403  $query['fields2'][] = $field2;
1404  $query['order'] = 'score DESC';
1405  } else { // No result, so construct an query with no results.
1406  unset($query['order']);
1407  unset($query['limit']);
1408  unset($query['bind']);
1409  $query['where'] = '1=0';
1410  }
1411  return $query;
1412  }
1413 
1422  public function setExtraQuery($query, $bind = []) {
1423  // Binds anwenden
1424  if (is_array($bind)) {
1425  // Zeichen, die von Elastic verwendet werden: + - && || ! ( ) { } [ ] ^ " ~ * ? : \ /
1426  $reserved_characters = preg_quote('+-&|!(){}[]^"~*?:\\');
1427  foreach ($bind as $key => $value) {
1428  $query = str_replace(
1429  ":$key",
1430  preg_replace_callback(
1431  '/[' . $reserved_characters . ']/',
1432  function ($matches) {
1433  return '\\' . $matches[0];
1434  },
1435  $value
1436  ),
1437  $query
1438  );
1439  }
1440  }
1441 
1442  // Elastic Query generieren
1443  $Elastic_query = $query;
1444  $sub_queries = preg_split('/\s+(and|or)\s+/si', $query);
1445  foreach ($sub_queries as $sub_query) {
1446  if (preg_match('/(!?(extra\.)?[^ !=<>]+)(\s*(like|>=|<=|!=|=|>|<)\s*(.*?))?$/si', trim($sub_query, '() '), $matches)) {
1447  $param = $matches[1];
1448  $operator = mb_strtolower($matches[4]);
1449  $value = trim($matches[5], '\'"');
1450 
1451  // Feld darf nicht im Dokument gesetzt sein
1452  $exclude = false;
1453  if ($param[0] == '!') {
1454  $exclude = true;
1455  $param = substr($param, 1);
1456  }
1457 
1458  if ($exclude) {
1459  // Ausnahme bilden
1460  $replace = "-$param:*" . (sizeof($sub_queries) == 1 ? ' AND *' : '');
1461  } else {
1462  // Elastic Syntax schreiben
1463  $replace = "$param:";
1464  if (!is_numeric($value)) {
1465  if ($operator == 'like') {
1466  $value = $this->prepareSearch(str_replace('%', '', $value), '', true);
1467  } else {
1468  $value = '"' . $value . '"';
1469  }
1470  }
1471 
1472  // Vergleich bilden
1473  switch ($operator) {
1474  case '=':
1475  case 'like':
1476  $replace .= $value;
1477  break;
1478  case '!=':
1479  $replace = "($param:* NOT $param:$value)";
1480  break;
1481  case '>':
1482  $replace .= '{' . $value . ' TO *}';
1483  break;
1484  case '>=':
1485  $replace .= '[' . $value . ' TO *]';
1486  break;
1487  case '<':
1488  $replace .= '{* TO ' . $value . '}';
1489  break;
1490  case '<=':
1491  $replace .= '[* TO ' . $value . ']';
1492  break;
1493  default:
1494  $replace .= $value;
1495  }
1496  }
1497 
1498  $Elastic_query = str_replace($matches[0], $replace, $Elastic_query);
1499  }
1500  }
1501 
1502  $this->extraQuery = $Elastic_query;
1503  }
1504 
1510  public function getConfig() {
1511  return $this->config;
1512  }
1513 
1519  public function clearCache() {
1520  foreach ($this->hosts as $host) {
1521  @file_get_contents($host . '_cache/clear');
1522  }
1523  }
1524 
1533  private function getTables($sites, $lang = '', &$relations = []) {
1534  $tables = '';
1535  if (!$lang) {
1536  $lang = $_REQUEST['lang'] ? $_REQUEST['lang'] : ($GLOBALS['site'] ? $GLOBALS['site']->language : null);
1537  }
1538  foreach ($sites as $site) {
1539  try {
1540  if (is_string($site)) {
1541  $site = new Site($site);
1542  }
1543  if ($lang) {
1544  $site->setLanguage($lang);
1545  }
1546  } catch (Exception $e) {
1547  // Mandant existiert nicht in dieser Sprache, ignorieren
1548  continue;
1549  }
1550  $table = strtolower($site->pageTable);
1551  $tables = $tables . ',' . $table;
1552  $relations[$table] = $site;
1553  }
1554  return ltrim($tables, ',');
1555  }
1556 
1564  private function error($e) {
1565  if ($this->throwException) {
1566  $class = get_class($e);
1567  throw new $class($e->getMessage(), $e->getCode());
1568  } else {
1569  $response = @json_decode($e->getMessage(), true);
1570  $messages = [];
1571  if ($response['error']) {
1572  if ($response['error']['root_cause']) {
1573  foreach ($response['error']['root_cause'] as $cause) {
1574  $messages[] = $cause['type'] . ' for index ' . $cause['index'];
1575  }
1576  }
1577  if ($response['error']['failed_shards']) {
1578  foreach ($response['error']['failed_shards'] as $shard) {
1579  if (!empty($shard['reason']['caused_by'])) {
1580  $messages[] = 'caused by ' . $shard['reason']['caused_by']['type'] . ' (reason: ' . $shard['reason']['caused_by']['reason'] . ')';
1581  }
1582  }
1583  }
1584  }
1585  if (empty($messages)) {
1586  $messages[] = $e->getMessage();
1587  }
1588  egotec_error_log('Elastic Exception thrown (' . get_class($e) . ', Code ' . $e->getCode() . '): ' . implode('; ', $messages));
1589  Ego_System::header($response['status'] ?? 400);
1590  exit;
1591  }
1592  }
1593 }
setExtraQuery($query, $bind=[])
getCorrections($query, $sites=[], $max=3)
indexFile(Page $page, $path)
getSuggestions($query, $sites=[], $max=5)
__construct($table='', $param=[], bool $checkHealthy=false)
updateBulk($pages, $recursive=true)
update($index, $page, $count=[], $recursive=true)
globalSearch($search, $sites=[], $query=[], $param=[], $sort=[], $filter='')
search($search, $relation, $query, $filter='', $fuzzy=false)
static filterNonUtf8($s, $substitute="", $strict=false)
Definition: Ego_System.php:481
static checkLicence($ini_path)
static getAllSites($username='', $perm='', $table=false, $type='')
static file_exists($file)
static file_get_contents($filename, $utf8=true, $context=null)
static stringEncode($string, $from='UTF-8', $to='UTF-8')
Definition: Ego_System.php:615
Definition: Page.php:28
getSite()
Definition: Page.php:5374
getMediaFilename($force_lang=false, $suffix="")
Definition: Page.php:5962
getIdentity()
Definition: Page.php:11918
hasRights($rights, $user_id=false, $cache=true)
Definition: Page.php:876
update($param=array(), $matrix_flag=true, $asis=false, $silent=false)
Definition: Page.php:4124
Definition: Site.php:30