diff --git a/engines/librex/fallback.php b/engines/librex/fallback.php index c5d73369..4aab449a 100644 --- a/engines/librex/fallback.php +++ b/engines/librex/fallback.php @@ -61,8 +61,8 @@ function get_librex_results($opts) { $results = $librex_request->get_results(); if (!empty($results)) { - $results["fallback_source"] = parse_url($instance)["host"]; - error_log($results["fallback_source"]); + $results["results_source"] = parse_url($instance)["host"]; + error_log($results["results_source"]); return $results; } diff --git a/engines/text/brave.php b/engines/text/brave.php new file mode 100644 index 00000000..bc46ce8e --- /dev/null +++ b/engines/text/brave.php @@ -0,0 +1,69 @@ +query)); + + $results_language = $this->opts->language; + $number_of_results = $this->opts->number_of_results; + + // TODO find the right parameters for the url + $url = "https://search.brave.com/search?q=$query_encoded&nfpr=1&spellcheck=0&start=$this->page"; + + if (3 > strlen($results_language) && 0 < strlen($results_language)) { + $url .= "&lr=lang_$results_language"; + $url .= "&hl=$results_language"; + } + + if (3 > strlen($number_of_results) && 0 < strlen($number_of_results)) + $url .= "&num=$number_of_results"; + + if (isset($_COOKIE["safe_search"])) + $url .= "&safe=medium"; + + return $url; + } + + public function parse_results($response) { + $results = array(); + $xpath = get_xpath($response); + + if (!$xpath) + return $results; + + foreach($xpath->query("//div[@id='results']//div[contains(@class, 'snippet')]") as $result) { + $url = $xpath->evaluate(".//a[contains(@class, 'h')]//@href", $result)[0]; + + if ($url == null) + continue; + + $url = $url->textContent; + + if (!empty($results) && array_key_exists("url", $results) && end($results)["url"] == $url->textContent) + continue; + + $title = $xpath->evaluate(".//a[contains(@class, 'h')]//div[contains(@class, 'url')]", $result)[0]; + + if ($title == null) + continue; + $title = $title->textContent; + + $description = $xpath->evaluate(".//div[contains(@class, 'snippet-content')]//div[contains(@class, 'snippet-description')]", $result)[0]->textContent; + + array_push($results, + array ( + "title" => htmlspecialchars($title), + "url" => htmlspecialchars($url), + // base_url is to be removed in the future, see #47 + "base_url" => htmlspecialchars(get_base_url($url)), + "description" => $description == null ? + TEXTS["result_no_description"] : + htmlspecialchars($description) + ) + ); + + } + return $results; + } + + } +?> diff --git a/engines/text/ecosia.php b/engines/text/ecosia.php new file mode 100644 index 00000000..f3af1889 --- /dev/null +++ b/engines/text/ecosia.php @@ -0,0 +1,63 @@ +query)); + + $results_language = $this->opts->language; + $number_of_results = $this->opts->number_of_results; + + // TODO figure out how to not autocorrect + $url = "https://www.ecosia.org/search?method=index&q=$query_encoded&p=$this->page"; + + if (!is_null($results_language)) + $url .= "&lang=$results_language"; + + return $url; + } + + public function parse_results($response) { + $results = array(); + $xpath = get_xpath($response); + + if (!$xpath) + return $results; + + + foreach($xpath->query("//div[contains(@class, 'mainline__result-wrapper')]") as $result) { + $url = $xpath->evaluate(".//article//div[contains(@class, 'result__body')]//div[contains(@class, 'result__header')]//div[contains(@class, 'result__info')]//a[contains(@class, 'result__link')]//@href", $result)[0]; + + if ($url == null) + continue; + + $url = $url->textContent; + + if (!empty($results) && array_key_exists("url", $results) && end($results)["url"] == $url->textContent) + continue; + + $title = $xpath->evaluate(".//article//div[contains(@class, 'result__body')]//div[contains(@class, 'result__header')]//div[contains(@class, 'result__title')]//a//h2", $result)[0]; + + if ($title == null) + continue; + + $title = $title->textContent; + + $description = $xpath->evaluate(".//article//div[contains(@class, 'result__body')]//div[contains(@class, 'result__columns')]//div[contains(@class, 'result__columns-start')]//div//div//div/p", $result)[0]->textContent; + + array_push($results, + array ( + "title" => htmlspecialchars($title), + "url" => htmlspecialchars($url), + // base_url is to be removed in the future, see #47 + "base_url" => htmlspecialchars(get_base_url($url)), + "description" => $description == null ? + TEXTS["result_no_description"] : + htmlspecialchars($description) + ) + ); + + } + return $results; + } + + } +?> diff --git a/engines/text/google.php b/engines/text/google.php index b6645423..5ce877dc 100644 --- a/engines/text/google.php +++ b/engines/text/google.php @@ -46,7 +46,7 @@ public function parse_results($response) { if ($url == null) continue; - if (!empty($results) && array_key_exists("url", $results) && end($results)["url"] == $url->textContent) + if (!empty($results) && array_key_exists("url", $results) && end($results)["url"] == $url->textContent) continue; $url = $url->textContent; diff --git a/engines/text/mojeek.php b/engines/text/mojeek.php new file mode 100644 index 00000000..886a748d --- /dev/null +++ b/engines/text/mojeek.php @@ -0,0 +1,64 @@ +query)); + + $results_language = $this->opts->language; + $number_of_results = $this->opts->number_of_results; + + // TODO figure out how to not autocorrect + $url = "https://www.mojeek.com/search?q=$query_encoded&p=$this->page"; + + // TODO language setting + if (!is_null($results_language)) + $url .= "&lang=$results_language"; + + return $url; + } + + public function parse_results($response) { + $results = array(); + $xpath = get_xpath($response); + + if (!$xpath) + return $results; + + + foreach($xpath->query("//ul[contains(@class, 'results-standard')]//li") as $result) { + $url = $xpath->evaluate(".//h2//a[contains(@class, 'title')]//@href", $result)[0]; + + if ($url == null) + continue; + + $url = $url->textContent; + + if (!empty($results) && array_key_exists("url", $results) && end($results)["url"] == $url->textContent) + continue; + + $title = $xpath->evaluate(".//h2//a[contains(@class, 'title')]", $result)[0]; + + if ($title == null) + continue; + + $title = $title->textContent; + + $description = $xpath->evaluate(".//p[contains(@class, 's')]", $result)[0]->textContent; + + array_push($results, + array ( + "title" => htmlspecialchars($title), + "url" => htmlspecialchars($url), + // base_url is to be removed in the future, see #47 + "base_url" => htmlspecialchars(get_base_url($url)), + "description" => $description == null ? + TEXTS["result_no_description"] : + htmlspecialchars($description) + ) + ); + + } + return $results; + } + + } +?> diff --git a/engines/text/text.php b/engines/text/text.php index 15960e37..1d6bd9e1 100644 --- a/engines/text/text.php +++ b/engines/text/text.php @@ -1,42 +1,101 @@ engines = get_engines(); + shuffle($this->engines); + $this->query = $opts->query; + $this->cache_key = "text:" . $this->query; + $this->page = $opts->page; $this->opts = $opts; - $this->engine = $opts->preferred_engines["text"] ?? "google"; + $this->engine = $opts->engine; $query_parts = explode(" ", $this->query); $last_word_query = end($query_parts); if (substr($this->query, 0, 1) == "!" || substr($last_word_query, 0, 1) == "!") check_ddg_bang($this->query, $opts); - if ($this->engine == "google") { - + if (has_cached_results($this->cache_key)) + return; + + if ($this->engine == "auto") + $this->engine = $this->select_engine(); + + // no engine was selected + if (is_null($this->engine)) + return; + + // this only happens if a specific engine was selected, not if auto is used + if (has_cooldown($this->engine, $this->opts->cooldowns)) + return; + + $this->engine_request = $this->get_engine_request($this->engine, $opts, $mh); + + if (is_null($this->engine_request)) + return; + + require "engines/special/special.php"; + $this->special_request = get_special_search_request($opts, $mh); + } + private function select_engine() { + if (sizeof($this->engines) == 0) + return null; + + $engine = array_pop($this->engines); + + // if this engine is on cooldown, try again + if (!has_cooldown($engine, $this->opts->cooldowns)) + return $engine; + + return $this->select_engine(); + } + + private function get_engine_request($engine, $opts, $mh) { + if ($engine == "google") { require "engines/text/google.php"; - $this->engine_request = new GoogleRequest($opts, $mh); + return new GoogleRequest($opts, $mh); } - if ($this->engine == "duckduckgo") { + if ($engine == "duckduckgo") { require "engines/text/duckduckgo.php"; - $this->engine_request = new DuckDuckGoRequest($opts, $mh); + return new DuckDuckGoRequest($opts, $mh); } - if (has_cooldown($this->engine, $this->opts->cooldowns) && !has_cached_results($this->engine_request->url)) { - // TODO dont add it in the first place - curl_multi_remove_handle($mh, $this->engine_request->ch); - $this->engine_request = null; - return; + if ($engine == "brave") { + require "engines/text/brave.php"; + return new BraveSearchRequest($opts, $mh); } + if ($engine == "yandex") { + require "engines/text/yandex.php"; + return new YandexSearchRequest($opts, $mh); + } - require "engines/special/special.php"; - $this->special_request = get_special_search_request($opts, $mh); + if ($engine == "ecosia") { + require "engines/text/ecosia.php"; + return new EcosiaSearchRequest($opts, $mh); + } + + if ($engine == "mojeek") { + require "engines/text/mojeek.php"; + return new MojeekSearchRequest($opts, $mh); + } + + // if an invalid engine is selected, don't give any results + return null; } public function parse_results($response) { + if (has_cached_results($this->cache_key)) + return fetch_cached_results($this->cache_key); + if (!isset($this->engine_request)) return array(); @@ -53,6 +112,11 @@ public function parse_results($response) { } } + if (!empty($results)) { + $results["results_source"] = parse_url($this->engine_request->url)["host"]; + store_cached_results($this->cache_key, $results); + } + return $results; } @@ -98,6 +162,8 @@ public static function print_results($results, $opts) { echo "