diff --git a/engines/librex/fallback.php b/engines/librex/fallback.php index c5d73369..4aab449a 100644 --- a/engines/librex/fallback.php +++ b/engines/librex/fallback.php @@ -61,8 +61,8 @@ function get_librex_results($opts) { $results = $librex_request->get_results(); if (!empty($results)) { - $results["fallback_source"] = parse_url($instance)["host"]; - error_log($results["fallback_source"]); + $results["results_source"] = parse_url($instance)["host"]; + error_log($results["results_source"]); return $results; } diff --git a/engines/text/brave.php b/engines/text/brave.php new file mode 100644 index 00000000..bc46ce8e --- /dev/null +++ b/engines/text/brave.php @@ -0,0 +1,69 @@ +query)); + + $results_language = $this->opts->language; + $number_of_results = $this->opts->number_of_results; + + // TODO find the right parameters for the url + $url = "https://search.brave.com/search?q=$query_encoded&nfpr=1&spellcheck=0&start=$this->page"; + + if (3 > strlen($results_language) && 0 < strlen($results_language)) { + $url .= "&lr=lang_$results_language"; + $url .= "&hl=$results_language"; + } + + if (3 > strlen($number_of_results) && 0 < strlen($number_of_results)) + $url .= "&num=$number_of_results"; + + if (isset($_COOKIE["safe_search"])) + $url .= "&safe=medium"; + + return $url; + } + + public function parse_results($response) { + $results = array(); + $xpath = get_xpath($response); + + if (!$xpath) + return $results; + + foreach($xpath->query("//div[@id='results']//div[contains(@class, 'snippet')]") as $result) { + $url = $xpath->evaluate(".//a[contains(@class, 'h')]//@href", $result)[0]; + + if ($url == null) + continue; + + $url = $url->textContent; + + if (!empty($results) && array_key_exists("url", $results) && end($results)["url"] == $url->textContent) + continue; + + $title = $xpath->evaluate(".//a[contains(@class, 'h')]//div[contains(@class, 'url')]", $result)[0]; + + if ($title == null) + continue; + $title = $title->textContent; + + $description = $xpath->evaluate(".//div[contains(@class, 'snippet-content')]//div[contains(@class, 'snippet-description')]", $result)[0]->textContent; + + array_push($results, + array ( + "title" => htmlspecialchars($title), + "url" => htmlspecialchars($url), + // base_url is to be removed in the future, see #47 + "base_url" => htmlspecialchars(get_base_url($url)), + "description" => $description == null ? + TEXTS["result_no_description"] : + htmlspecialchars($description) + ) + ); + + } + return $results; + } + + } +?> diff --git a/engines/text/ecosia.php b/engines/text/ecosia.php new file mode 100644 index 00000000..f3af1889 --- /dev/null +++ b/engines/text/ecosia.php @@ -0,0 +1,63 @@ +query)); + + $results_language = $this->opts->language; + $number_of_results = $this->opts->number_of_results; + + // TODO figure out how to not autocorrect + $url = "https://www.ecosia.org/search?method=index&q=$query_encoded&p=$this->page"; + + if (!is_null($results_language)) + $url .= "&lang=$results_language"; + + return $url; + } + + public function parse_results($response) { + $results = array(); + $xpath = get_xpath($response); + + if (!$xpath) + return $results; + + + foreach($xpath->query("//div[contains(@class, 'mainline__result-wrapper')]") as $result) { + $url = $xpath->evaluate(".//article//div[contains(@class, 'result__body')]//div[contains(@class, 'result__header')]//div[contains(@class, 'result__info')]//a[contains(@class, 'result__link')]//@href", $result)[0]; + + if ($url == null) + continue; + + $url = $url->textContent; + + if (!empty($results) && array_key_exists("url", $results) && end($results)["url"] == $url->textContent) + continue; + + $title = $xpath->evaluate(".//article//div[contains(@class, 'result__body')]//div[contains(@class, 'result__header')]//div[contains(@class, 'result__title')]//a//h2", $result)[0]; + + if ($title == null) + continue; + + $title = $title->textContent; + + $description = $xpath->evaluate(".//article//div[contains(@class, 'result__body')]//div[contains(@class, 'result__columns')]//div[contains(@class, 'result__columns-start')]//div//div//div/p", $result)[0]->textContent; + + array_push($results, + array ( + "title" => htmlspecialchars($title), + "url" => htmlspecialchars($url), + // base_url is to be removed in the future, see #47 + "base_url" => htmlspecialchars(get_base_url($url)), + "description" => $description == null ? + TEXTS["result_no_description"] : + htmlspecialchars($description) + ) + ); + + } + return $results; + } + + } +?> diff --git a/engines/text/google.php b/engines/text/google.php index b6645423..5ce877dc 100644 --- a/engines/text/google.php +++ b/engines/text/google.php @@ -46,7 +46,7 @@ public function parse_results($response) { if ($url == null) continue; - if (!empty($results) && array_key_exists("url", $results) && end($results)["url"] == $url->textContent) + if (!empty($results) && array_key_exists("url", $results) && end($results)["url"] == $url->textContent) continue; $url = $url->textContent; diff --git a/engines/text/mojeek.php b/engines/text/mojeek.php new file mode 100644 index 00000000..886a748d --- /dev/null +++ b/engines/text/mojeek.php @@ -0,0 +1,64 @@ +query)); + + $results_language = $this->opts->language; + $number_of_results = $this->opts->number_of_results; + + // TODO figure out how to not autocorrect + $url = "https://www.mojeek.com/search?q=$query_encoded&p=$this->page"; + + // TODO language setting + if (!is_null($results_language)) + $url .= "&lang=$results_language"; + + return $url; + } + + public function parse_results($response) { + $results = array(); + $xpath = get_xpath($response); + + if (!$xpath) + return $results; + + + foreach($xpath->query("//ul[contains(@class, 'results-standard')]//li") as $result) { + $url = $xpath->evaluate(".//h2//a[contains(@class, 'title')]//@href", $result)[0]; + + if ($url == null) + continue; + + $url = $url->textContent; + + if (!empty($results) && array_key_exists("url", $results) && end($results)["url"] == $url->textContent) + continue; + + $title = $xpath->evaluate(".//h2//a[contains(@class, 'title')]", $result)[0]; + + if ($title == null) + continue; + + $title = $title->textContent; + + $description = $xpath->evaluate(".//p[contains(@class, 's')]", $result)[0]->textContent; + + array_push($results, + array ( + "title" => htmlspecialchars($title), + "url" => htmlspecialchars($url), + // base_url is to be removed in the future, see #47 + "base_url" => htmlspecialchars(get_base_url($url)), + "description" => $description == null ? + TEXTS["result_no_description"] : + htmlspecialchars($description) + ) + ); + + } + return $results; + } + + } +?> diff --git a/engines/text/text.php b/engines/text/text.php index 15960e37..1d6bd9e1 100644 --- a/engines/text/text.php +++ b/engines/text/text.php @@ -1,42 +1,101 @@ engines = get_engines(); + shuffle($this->engines); + $this->query = $opts->query; + $this->cache_key = "text:" . $this->query; + $this->page = $opts->page; $this->opts = $opts; - $this->engine = $opts->preferred_engines["text"] ?? "google"; + $this->engine = $opts->engine; $query_parts = explode(" ", $this->query); $last_word_query = end($query_parts); if (substr($this->query, 0, 1) == "!" || substr($last_word_query, 0, 1) == "!") check_ddg_bang($this->query, $opts); - if ($this->engine == "google") { - + if (has_cached_results($this->cache_key)) + return; + + if ($this->engine == "auto") + $this->engine = $this->select_engine(); + + // no engine was selected + if (is_null($this->engine)) + return; + + // this only happens if a specific engine was selected, not if auto is used + if (has_cooldown($this->engine, $this->opts->cooldowns)) + return; + + $this->engine_request = $this->get_engine_request($this->engine, $opts, $mh); + + if (is_null($this->engine_request)) + return; + + require "engines/special/special.php"; + $this->special_request = get_special_search_request($opts, $mh); + } + private function select_engine() { + if (sizeof($this->engines) == 0) + return null; + + $engine = array_pop($this->engines); + + // if this engine is on cooldown, try again + if (!has_cooldown($engine, $this->opts->cooldowns)) + return $engine; + + return $this->select_engine(); + } + + private function get_engine_request($engine, $opts, $mh) { + if ($engine == "google") { require "engines/text/google.php"; - $this->engine_request = new GoogleRequest($opts, $mh); + return new GoogleRequest($opts, $mh); } - if ($this->engine == "duckduckgo") { + if ($engine == "duckduckgo") { require "engines/text/duckduckgo.php"; - $this->engine_request = new DuckDuckGoRequest($opts, $mh); + return new DuckDuckGoRequest($opts, $mh); } - if (has_cooldown($this->engine, $this->opts->cooldowns) && !has_cached_results($this->engine_request->url)) { - // TODO dont add it in the first place - curl_multi_remove_handle($mh, $this->engine_request->ch); - $this->engine_request = null; - return; + if ($engine == "brave") { + require "engines/text/brave.php"; + return new BraveSearchRequest($opts, $mh); } + if ($engine == "yandex") { + require "engines/text/yandex.php"; + return new YandexSearchRequest($opts, $mh); + } - require "engines/special/special.php"; - $this->special_request = get_special_search_request($opts, $mh); + if ($engine == "ecosia") { + require "engines/text/ecosia.php"; + return new EcosiaSearchRequest($opts, $mh); + } + + if ($engine == "mojeek") { + require "engines/text/mojeek.php"; + return new MojeekSearchRequest($opts, $mh); + } + + // if an invalid engine is selected, don't give any results + return null; } public function parse_results($response) { + if (has_cached_results($this->cache_key)) + return fetch_cached_results($this->cache_key); + if (!isset($this->engine_request)) return array(); @@ -53,6 +112,11 @@ public function parse_results($response) { } } + if (!empty($results)) { + $results["results_source"] = parse_url($this->engine_request->url)["host"]; + store_cached_results($this->cache_key, $results); + } + return $results; } @@ -98,6 +162,8 @@ public static function print_results($results, $opts) { echo "
"; foreach($results as $result) { + if (!is_array($result)) + continue; if (!array_key_exists("title", $result)) continue; @@ -130,7 +196,7 @@ function check_ddg_bang($query, $opts) { $search_word = substr(explode(" ", $query)[0], 1); else $search_word = substr(end(explode(" ", $query)), 1); - + $bang_url = null; foreach($bangs as $bang) { diff --git a/engines/text/yandex.php b/engines/text/yandex.php new file mode 100644 index 00000000..f003926a --- /dev/null +++ b/engines/text/yandex.php @@ -0,0 +1,68 @@ +query)); + + $results_language = $this->opts->language; + $number_of_results = $this->opts->number_of_results; + + $url = "https://yandex.com/search?text=$query_encoded&nfpr=1&p=$this->page&noreask=1"; + + if (!is_null($results_language)) + $url .= "&lang=$results_language"; + + return $url; + } + + public function parse_results($response) { + $results = array(); + $xpath = get_xpath($response); + + if (!$xpath) + return $results; + + $r = $xpath->query("//ul[@id='search-result']"); + if (empty($r)) + return array("error" => array( + "message" => TEXTS["failure_empty"] + )); + + foreach($xpath->query("//li[contains(@class, 'serp-item')]") as $result) { + $url = $xpath->evaluate(".//div//div//a[contains(@class, 'link')]//@href", $result)[0]; + + if ($url == null) + continue; + + $url = $url->textContent; + + if (!empty($results) && array_key_exists("url", $results) && end($results)["url"] == $url->textContent) + continue; + + $title = $xpath->evaluate(".//div//div//a[contains(@class, 'link')]//h2[contains(@class, 'OrganicTitle-LinkText')]//span", $result)[0]; + + if ($title == null) + continue; + + $title = $title->textContent; + + $description = $xpath->evaluate(".//div[contains(@class, 'Organic-ContentWrapper')]//div[contains(@class, 'text-container')]//span", $result)[0]->textContent; + + array_push($results, + array ( + "title" => htmlspecialchars($title), + "url" => htmlspecialchars($url), + // base_url is to be removed in the future, see #47 + "base_url" => htmlspecialchars(get_base_url($url)), + "description" => $description == null ? + TEXTS["result_no_description"] : + htmlspecialchars($description) + ) + ); + + } + + return $results; + } + + } +?> diff --git a/locale/en.php b/locale/en.php index 17d936a9..801c7061 100644 --- a/locale/en.php +++ b/locale/en.php @@ -32,6 +32,7 @@ "settings_search_settings" => "Search settings", "settings_language" => "Language", + "settings_preferred_engine" => "Preferred Engine", "settings_number_of_results" => "Number of results per page", diff --git a/misc/search_engine.php b/misc/search_engine.php index 66526404..cba668c5 100644 --- a/misc/search_engine.php +++ b/misc/search_engine.php @@ -92,6 +92,8 @@ function load_opts() { $opts->curl_settings[CURLOPT_FOLLOWLOCATION] ??= true; + $opts->engine = $_REQUEST["engine"] ?? $_COOKIE["engine"] ?? $opts->preferred_engines["text"] ?? "auto"; + return $opts; } @@ -105,6 +107,7 @@ function opts_to_params($opts) { $params .= "&safe=" . ($opts->safe_search ? 1 : 0); $params .= "&nf=" . ($opts->disable_frontends ? 1 : 0); $params .= "&ns=" . ($opts->disable_special ? 1 : 0); + $params .= "&engine=" . ($opts->engine ?? "auto"); return $params; } diff --git a/misc/tools.php b/misc/tools.php index ee805e66..e69c6b53 100644 --- a/misc/tools.php +++ b/misc/tools.php @@ -99,9 +99,9 @@ function remove_special($string) { function print_elapsed_time($start_time, $results, $opts) { $source = ""; - if (($opts->show_result_source ?? true) && array_key_exists("fallback_source", $results)) { - $source = " from " . $results["fallback_source"]; - unset($results["fallback_source"]); + if (array_key_exists("results_source", $results)) { + $source = " from " . $results["results_source"]; + unset($results["results_source"]); } $end_time = number_format(microtime(true) - $start_time, 2, '.', ''); diff --git a/settings.php b/settings.php index 44be8384..02ddbab1 100644 --- a/settings.php +++ b/settings.php @@ -95,6 +95,32 @@

+
+
+ + +
+
+ + " > +
+