Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement more sources for text results #96

Merged
merged 13 commits into from
Dec 24, 2023
4 changes: 2 additions & 2 deletions engines/librex/fallback.php
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ function get_librex_results($opts) {
$results = $librex_request->get_results();

if (!empty($results)) {
$results["fallback_source"] = parse_url($instance)["host"];
error_log($results["fallback_source"]);
$results["results_source"] = parse_url($instance)["host"];
error_log($results["results_source"]);
return $results;
}

Expand Down
69 changes: 69 additions & 0 deletions engines/text/brave.php
Ahwxorg marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
<?php
class BraveSearchRequest extends EngineRequest {
public function get_request_url() {
$query_encoded = str_replace("%22", "\"", urlencode($this->query));

$results_language = $this->opts->language;
$number_of_results = $this->opts->number_of_results;

// TODO find the right parameters for the url
$url = "https://search.brave.com/search?q=$query_encoded&nfpr=1&spellcheck=0&start=$this->page";

if (3 > strlen($results_language) && 0 < strlen($results_language)) {
$url .= "&lr=lang_$results_language";
$url .= "&hl=$results_language";
}

if (3 > strlen($number_of_results) && 0 < strlen($number_of_results))
$url .= "&num=$number_of_results";

if (isset($_COOKIE["safe_search"]))
$url .= "&safe=medium";

return $url;
}

public function parse_results($response) {
$results = array();
$xpath = get_xpath($response);

if (!$xpath)
return $results;

foreach($xpath->query("//div[@id='results']//div[contains(@class, 'snippet')]") as $result) {
$url = $xpath->evaluate(".//a[contains(@class, 'h')]//@href", $result)[0];

if ($url == null)
continue;

$url = $url->textContent;

if (!empty($results) && array_key_exists("url", $results) && end($results)["url"] == $url->textContent)
continue;

$title = $xpath->evaluate(".//a[contains(@class, 'h')]//div[contains(@class, 'url')]", $result)[0];

if ($title == null)
continue;
$title = $title->textContent;

$description = $xpath->evaluate(".//div[contains(@class, 'snippet-content')]//div[contains(@class, 'snippet-description')]", $result)[0]->textContent;

array_push($results,
array (
"title" => htmlspecialchars($title),
"url" => htmlspecialchars($url),
// base_url is to be removed in the future, see #47
"base_url" => htmlspecialchars(get_base_url($url)),
"description" => $description == null ?
TEXTS["result_no_description"] :
htmlspecialchars($description)
)
);

}
return $results;
}

}
?>
63 changes: 63 additions & 0 deletions engines/text/ecosia.php
Ahwxorg marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
<?php
class EcosiaSearchRequest extends EngineRequest {
public function get_request_url() {
$query_encoded = str_replace("%22", "\"", urlencode($this->query));

$results_language = $this->opts->language;
$number_of_results = $this->opts->number_of_results;

// TODO figure out how to not autocorrect
$url = "https://www.ecosia.org/search?method=index&q=$query_encoded&p=$this->page";

if (!is_null($results_language))
$url .= "&lang=$results_language";

return $url;
}

public function parse_results($response) {
$results = array();
$xpath = get_xpath($response);

if (!$xpath)
return $results;


foreach($xpath->query("//div[contains(@class, 'mainline__result-wrapper')]") as $result) {
$url = $xpath->evaluate(".//article//div[contains(@class, 'result__body')]//div[contains(@class, 'result__header')]//div[contains(@class, 'result__info')]//a[contains(@class, 'result__link')]//@href", $result)[0];

if ($url == null)
continue;

$url = $url->textContent;

if (!empty($results) && array_key_exists("url", $results) && end($results)["url"] == $url->textContent)
continue;

$title = $xpath->evaluate(".//article//div[contains(@class, 'result__body')]//div[contains(@class, 'result__header')]//div[contains(@class, 'result__title')]//a//h2", $result)[0];

if ($title == null)
continue;

$title = $title->textContent;

$description = $xpath->evaluate(".//article//div[contains(@class, 'result__body')]//div[contains(@class, 'result__columns')]//div[contains(@class, 'result__columns-start')]//div//div//div/p", $result)[0]->textContent;

array_push($results,
array (
"title" => htmlspecialchars($title),
"url" => htmlspecialchars($url),
// base_url is to be removed in the future, see #47
"base_url" => htmlspecialchars(get_base_url($url)),
"description" => $description == null ?
TEXTS["result_no_description"] :
htmlspecialchars($description)
)
);

}
return $results;
}

}
?>
2 changes: 1 addition & 1 deletion engines/text/google.php
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public function parse_results($response) {
if ($url == null)
continue;

if (!empty($results) && array_key_exists("url", $results) && end($results)["url"] == $url->textContent)
if (!empty($results) && array_key_exists("url", $results) && end($results)["url"] == $url->textContent)
continue;

$url = $url->textContent;
Expand Down
64 changes: 64 additions & 0 deletions engines/text/mojeek.php
Ahwxorg marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
<?php
class MojeekSearchRequest extends EngineRequest {
public function get_request_url() {
$query_encoded = str_replace("%22", "\"", urlencode($this->query));

$results_language = $this->opts->language;
$number_of_results = $this->opts->number_of_results;

// TODO figure out how to not autocorrect
$url = "https://www.mojeek.com/search?q=$query_encoded&p=$this->page";

// TODO language setting
if (!is_null($results_language))
$url .= "&lang=$results_language";

return $url;
}

public function parse_results($response) {
$results = array();
$xpath = get_xpath($response);

if (!$xpath)
return $results;


foreach($xpath->query("//ul[contains(@class, 'results-standard')]//li") as $result) {
$url = $xpath->evaluate(".//h2//a[contains(@class, 'title')]//@href", $result)[0];

if ($url == null)
continue;

$url = $url->textContent;

if (!empty($results) && array_key_exists("url", $results) && end($results)["url"] == $url->textContent)
continue;

$title = $xpath->evaluate(".//h2//a[contains(@class, 'title')]", $result)[0];

if ($title == null)
continue;

$title = $title->textContent;

$description = $xpath->evaluate(".//p[contains(@class, 's')]", $result)[0]->textContent;

array_push($results,
array (
"title" => htmlspecialchars($title),
"url" => htmlspecialchars($url),
// base_url is to be removed in the future, see #47
"base_url" => htmlspecialchars(get_base_url($url)),
"description" => $description == null ?
TEXTS["result_no_description"] :
htmlspecialchars($description)
)
);

}
return $results;
}

}
?>
94 changes: 80 additions & 14 deletions engines/text/text.php
Ahwxorg marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -1,42 +1,101 @@
<?php
function get_engines() {
return array("google", "duckduckgo", "brave", "yandex", "ecosia", "mojeek");
}

class TextSearch extends EngineRequest {
protected $engine, $engine_request, $special_request;
public function __construct($opts, $mh) {
$this->engines = get_engines();
shuffle($this->engines);

$this->query = $opts->query;
$this->cache_key = "text:" . $this->query;

$this->page = $opts->page;
$this->opts = $opts;

$this->engine = $opts->preferred_engines["text"] ?? "google";
$this->engine = $opts->engine;

$query_parts = explode(" ", $this->query);
$last_word_query = end($query_parts);
if (substr($this->query, 0, 1) == "!" || substr($last_word_query, 0, 1) == "!")
check_ddg_bang($this->query, $opts);

if ($this->engine == "google") {

if (has_cached_results($this->cache_key))
return;

if ($this->engine == "auto")
$this->engine = $this->select_engine();

// no engine was selected
if (is_null($this->engine))
return;

// this only happens if a specific engine was selected, not if auto is used
if (has_cooldown($this->engine, $this->opts->cooldowns))
return;

$this->engine_request = $this->get_engine_request($this->engine, $opts, $mh);

if (is_null($this->engine_request))
return;

require "engines/special/special.php";
$this->special_request = get_special_search_request($opts, $mh);
}
private function select_engine() {
if (sizeof($this->engines) == 0)
return null;

$engine = array_pop($this->engines);

// if this engine is on cooldown, try again
if (!has_cooldown($engine, $this->opts->cooldowns))
return $engine;

return $this->select_engine();
}

private function get_engine_request($engine, $opts, $mh) {
if ($engine == "google") {
require "engines/text/google.php";
$this->engine_request = new GoogleRequest($opts, $mh);
return new GoogleRequest($opts, $mh);
}

if ($this->engine == "duckduckgo") {
if ($engine == "duckduckgo") {
require "engines/text/duckduckgo.php";
$this->engine_request = new DuckDuckGoRequest($opts, $mh);
return new DuckDuckGoRequest($opts, $mh);
}

if (has_cooldown($this->engine, $this->opts->cooldowns) && !has_cached_results($this->engine_request->url)) {
// TODO dont add it in the first place
curl_multi_remove_handle($mh, $this->engine_request->ch);
$this->engine_request = null;
return;
if ($engine == "brave") {
require "engines/text/brave.php";
return new BraveSearchRequest($opts, $mh);
}

if ($engine == "yandex") {
require "engines/text/yandex.php";
return new YandexSearchRequest($opts, $mh);
}

require "engines/special/special.php";
$this->special_request = get_special_search_request($opts, $mh);
if ($engine == "ecosia") {
require "engines/text/ecosia.php";
return new EcosiaSearchRequest($opts, $mh);
}

if ($engine == "mojeek") {
require "engines/text/mojeek.php";
return new MojeekSearchRequest($opts, $mh);
}

// if an invalid engine is selected, don't give any results
return null;
}

public function parse_results($response) {
if (has_cached_results($this->cache_key))
return fetch_cached_results($this->cache_key);

if (!isset($this->engine_request))
return array();

Expand All @@ -53,6 +112,11 @@ public function parse_results($response) {
}
}

if (!empty($results)) {
$results["results_source"] = parse_url($this->engine_request->url)["host"];
store_cached_results($this->cache_key, $results);
}

return $results;
}

Expand Down Expand Up @@ -98,6 +162,8 @@ public static function print_results($results, $opts) {
echo "<div class=\"text-result-container\">";

foreach($results as $result) {
if (!is_array($result))
continue;
if (!array_key_exists("title", $result))
continue;

Expand Down Expand Up @@ -130,7 +196,7 @@ function check_ddg_bang($query, $opts) {
$search_word = substr(explode(" ", $query)[0], 1);
else
$search_word = substr(end(explode(" ", $query)), 1);

$bang_url = null;

foreach($bangs as $bang) {
Expand Down
Loading