Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

moving wikihotness script + tables #5

Merged
merged 3 commits into from
Nov 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
/node_modules/
composer.lock
package-lock.json
.idea/
.DS_Store
Binary file added cron/.DS_Store
Binary file not shown.
184 changes: 184 additions & 0 deletions cron/wiki_hotness.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
<?php
chdir( __DIR__ );
require_once '../../../../../config/db_config.php';
require_once '../../../lp-config/wikis.php';

$db_host = $server;
$db_name = 'liquipedia';
$db_user = $login;
$db_password = $pass;
unset( $login, $pass, $server );

$db = null;
try {
$db = new PDO( 'mysql:host=' . $db_host . ';dbname=' . $db_name, $db_user, $db_password );
$db->setAttribute( PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION );
$db->setAttribute( PDO::ATTR_DEFAULT_FETCH_MODE, PDO::FETCH_ASSOC );
$db->setAttribute( PDO::ATTR_EMULATE_PREPARES, false );
} catch ( PDOException $e ) {
echo "Connection Error: " . $e->getMessage();
}
if ( $db === null ) {
die( 'Could not connect to database' );
}

$db->exec( 'SET NAMES utf8' );

$wiki_hits = [];

$ch = curl_init();
curl_setopt( $ch, CURLOPT_HTTPHEADER, [ 'Host: liquipedia.net', 'User-Agent: wiki-hotness/0.1' ] );
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true );
curl_setopt( $ch, CURLOPT_FAILONERROR, true );
curl_setopt( $ch, CURLOPT_ENCODING, "" );

foreach ( $liquipedia_wikis as $wiki => $info ) {
$time = $info[ 'hot_threshold' ];
$dbPrefix = $info[ 'db_prefix' ];

print "Processing $wiki (threshold: $time, database: $dbPrefix)...\n";

$db->prepare(
'DELETE FROM `wiki_hits` ' .
'WHERE `datum` < UNIX_TIMESTAMP(NOW() - INTERVAL ' . $time . ' ) ' .
'AND `wiki` = :wiki' )->execute( [ 'wiki' => $wiki ] );

$sql = 'SELECT url, COUNT(*) AS count FROM wiki_hits WHERE wiki = :wiki GROUP BY url ORDER BY count DESC LIMIT 15';
$sqlStmt = $db->prepare( $sql );
$sqlStmt->execute( [ 'wiki' => $wiki ] );
$res = $sqlStmt->fetchAll();

$num_found = 0;
foreach ( $res as $row ) {
$page = $row[ 'url' ];

if ( $page == "Main_Page" ) {
continue;
}

if ( !strncmp( $page, "Special:", 8 ) ) {
continue;
}

// Try to match MW encoding
$encoded_page = rawurlencode( $page );
$encoded_page = str_replace(
[ '%2F', '%3A', '%28', '%29', '%21' ],
[ '/', ':', '(', ')', '!' ],
$encoded_page
);

// Query varnish directly to avoid frontend HTTPS overhead
$full_url = "http://127.0.0.1:6081/$wiki/$encoded_page";

curl_setopt( $ch, CURLOPT_URL, $full_url );

$ret = curl_exec( $ch );
if ( !curl_errno( $ch ) ) {
// Don't parse more than necessary
$ret = mb_substr( $ret, 0, 1024 );
if ( preg_match( "/<title>(.+?)<\/title>/", $ret, $m ) ) {
$display_title = $m[1];

// Strip " - Liquipedia Wild Rift Wiki" etc
$pos = mb_strpos( $display_title, ' - Liquipedia ' );
if ( $pos != -1 ) {
$display_title = mb_substr( $display_title, 0, $pos );
}

$display_title = html_entity_decode( $display_title, ENT_QUOTES, 'UTF-8' );
}
}

$oldTextSql = 'SELECT old_text '
. 'FROM ' . $dbPrefix . 'text t, '
. $dbPrefix . 'page p, '
. $dbPrefix . 'flaggedpages f, '
. $dbPrefix . 'content c, '
. $dbPrefix . 'slots s '
. 'WHERE t.old_id = SUBSTR(c.content_address, 4) '
. 'AND c.content_id = s.slot_content_id '
. 'AND s.slot_revision_id = f.fp_stable '
. 'AND f.fp_page_id = p.page_id '
. 'AND p.page_namespace IN (0, 134) '
. 'AND p.page_title = :pageTitle';

$oldTextStmt = $db->prepare( $oldTextSql );
$oldTextStmt->execute( [ 'pageTitle' => $page ] );
$oldText = $oldTextStmt->fetch();
if ( !$oldText ) {
continue;
}

/*if ( preg_match( "/{{DISPLAYTITLE:(.+?)}}/", $oldText[ 'old_text' ], $m ) ) {
$display_title = trim( $m[ 1 ] );
} else if ( preg_match( "/{{Infobox player\s*\|\s?id\s?=\s?(.+?)[\\n|}|\|]/i", $oldText[ 'old_text' ], $m ) ) {
$display_title = trim( $m[ 1 ] );
}*/
if ( preg_match( '/\|tickername=(.+?)[\|}\r\n]/', $oldText[ 'old_text' ], $m ) ) {
$ticker_title = trim( $m[ 1 ] );
}

// Try to use display title without ending disambiguation brackets if it's too long
if ( mb_strlen( $display_title ) > 45 ) {
$new_title = preg_replace( "/\s+\(.+\)$/u", "", $display_title );
if ( !empty( $new_title ) ) {
$display_title = $new_title;
}
}

if ( !empty( $display_title ) && mb_strlen( $display_title ) <= 35 ) {
$title = $display_title;
} elseif ( !empty( $ticker_title ) ) {
$title = $ticker_title;
} elseif ( !empty( $display_title ) ) {
$title = $display_title;
} else {
$title = str_replace( '_', ' ', $page );
}

$wiki_hits[] = [
'title' => $title,
'url' => $row[ 'url' ],
'wiki' => $wiki,
'hits' => (int)$row[ 'count' ],
];

print "Found {$row[ 'url' ]} ($title) - {$row[ 'count' ]}\n";
if ( $num_found >= 10 ) {
break;
}

unset( $display_title );
unset( $ticker_title );
}
}

$db->exec( 'LOCK TABLES wiki_hot WRITE' );
$db->exec( 'DELETE FROM wiki_hot' );

foreach ( $wiki_hits as $row ) {

$url = mb_substr( $row[ 'url' ], 0, 255 );
$count = $row[ 'hits' ];
$wiki = $row[ 'wiki' ];
$title = mb_substr( $row[ 'title' ], 0, 255 );

$insertData = [
'wiki' => $wiki,
'url' => $url,
'title' => $title,
'count' => $count
];

$insertSql = 'INSERT INTO wiki_hot (wiki, page, title, hits) VALUES ( :wiki, :url, :title, :count )';
$db->prepare( $insertSql )->execute( $insertData );
}

$db->exec( 'UNLOCK TABLES' );

curl_setopt( $ch, CURLOPT_CUSTOMREQUEST, 'PURGE' );
curl_setopt( $ch, CURLOPT_URL, "http://127.0.0.1:6081/" );
curl_exec( $ch );
$code = curl_getinfo( $ch, CURLINFO_HTTP_CODE );
print "Purge: $code\n";
8 changes: 8 additions & 0 deletions sql/wiki_hits.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CREATE TABLE IF NOT EXISTS `wiki_hits` (
`url` varbinary(255) NOT NULL,
`wiki` varbinary(24) NOT NULL,
`datum` int(10) UNSIGNED NOT NULL
) /*$wgDBTableOptions*/;

ALTER TABLE `wiki_hits`
ADD KEY `wiki` (`wiki`,`datum`);
9 changes: 9 additions & 0 deletions sql/wiki_hot.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
CREATE TABLE IF NOT EXISTS `wiki_hot` (
`wiki` varbinary(16) NOT NULL,
`page` varbinary(128) NOT NULL,
`title` varbinary(128) NOT NULL,
`hits` int(11) NOT NULL
) /*$wgDBTableOptions*/;

ALTER TABLE `wiki_hot`
ADD KEY `wiki` (`wiki`);
12 changes: 8 additions & 4 deletions src/Api/TrendingPages.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,24 @@
class TrendingPages extends ApiBase {

public function execute() {
global $TL_DB; // phpcs:ignore
// Tell squids to cache
$this->getMain()->setCacheMode( 'public' );
// Set the squid & private cache time in seconds
$this->getMain()->setCacheMaxAge( 300 );
$trendingArticles = [];

$wiki = substr( $this->getConfig()->get( 'ScriptPath' ), 1 );
$config = MediaWikiServices::getInstance()->getMainConfig();
$loadBalancer = MediaWikiServices::getInstance()->getDBLoadBalancer();
$dbr = $loadBalancer->getConnection( DB_REPLICA, '', $TL_DB );
$dbr = $loadBalancer->getConnection( DB_REPLICA, [], $config->get( 'DBname' ) );
$res = $dbr->select(
'wiki_hot', '*', [
'wiki_hot',
'*',
[
'wiki' => $wiki
], __METHOD__, [
],
__METHOD__,
[
'ORDER BY' => 'hits DESC',
'LIMIT' => 10
]
Expand Down
7 changes: 2 additions & 5 deletions src/Helper.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,8 @@ public static function getWikiList() {
public static function getWikiHotList() {
$services = MediaWikiServices::getInstance();
$loadBalancer = $services->getDBLoadBalancer();
$dbr = $loadBalancer->getConnection(
DB_REPLICA,
[],
'liquid-'
);
$config = $services->getMainConfig();
$dbr = $loadBalancer->getConnection( DB_REPLICA, [], $config->get( 'DBname' ) );
$res = $dbr->select(
'wiki_hot',
[
Expand Down
9 changes: 6 additions & 3 deletions src/Hooks/SchemaHookHandler.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,13 @@ class SchemaHookHandler implements
public function onLoadExtensionSchemaUpdates( $updater ) {
$db = $updater->getDB();
$config = MediaWikiServices::getInstance()->getMainConfig();
if ( !$db->tableExists( $config->get( 'DBname' ) . '.wiki_list',
$tables = [ 'wiki_list', 'wiki_hot', 'wiki_hits' ];
foreach ( $tables as $table ) {
if ( !$db->tableExists( $config->get( 'DBname' ) . '.' . $table,
__METHOD__ ) ) {
$updater->addExtensionTable( 'wiki_list',
__DIR__ . '/../sql/wiki_list.sql' );
$updater->addExtensionTable( $table,
__DIR__ . '/../../sql/' . $table . '.sql' );
}
}
}

Expand Down
15 changes: 9 additions & 6 deletions src/Rest/TrendingPages.php
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,20 @@ public function __construct(
* @return Response
*/
public function run(): Response {
global $TL_DB; // phpcs:ignores

$wiki = substr( $this->config->get( 'ScriptPath' ), 1 );

$loadBalancer = $this->loadBalancerFactory->getMainLB( $TL_DB );
$dbr = $loadBalancer->getConnectionRef( DB_REPLICA, [], $TL_DB );
$config = MediaWikiServices::getInstance()->getMainConfig();
$loadBalancer = MediaWikiServices::getInstance()->getDBLoadBalancer();
$dbr = $loadBalancer->getConnection( DB_REPLICA, [], $config->get( 'DBname' ) );

$res = $dbr->select(
'wiki_hot', '*', [
'wiki_hot',
'*',
[
'wiki' => $wiki
], __METHOD__, [
],
__METHOD__,
[
'ORDER BY' => 'hits DESC',
'LIMIT' => 10
]
Expand Down