Skip to content

Commit

Permalink
moving wikihotness script + tables (#5)
Browse files Browse the repository at this point in the history
* moving wikihotness script + tables

* remove space

* Fix wikis.php path

---------

Co-authored-by: Shashank Atreya <[email protected]>
Co-authored-by: Alex Winkler <[email protected]>
  • Loading branch information
3 people authored Nov 7, 2023
1 parent 27845e8 commit fb26e87
Show file tree
Hide file tree
Showing 9 changed files with 228 additions and 18 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
/node_modules/
composer.lock
package-lock.json
.idea/
.DS_Store
Binary file added cron/.DS_Store
Binary file not shown.
184 changes: 184 additions & 0 deletions cron/wiki_hotness.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
<?php
chdir( __DIR__ );
require_once '../../../../../config/db_config.php';
require_once '../../../lp-config/wikis.php';

$db_host = $server;
$db_name = 'liquipedia';
$db_user = $login;
$db_password = $pass;
unset( $login, $pass, $server );

$db = null;
try {
$db = new PDO( 'mysql:host=' . $db_host . ';dbname=' . $db_name, $db_user, $db_password );
$db->setAttribute( PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION );
$db->setAttribute( PDO::ATTR_DEFAULT_FETCH_MODE, PDO::FETCH_ASSOC );
$db->setAttribute( PDO::ATTR_EMULATE_PREPARES, false );
} catch ( PDOException $e ) {
echo "Connection Error: " . $e->getMessage();
}
if ( $db === null ) {
die( 'Could not connect to database' );
}

$db->exec( 'SET NAMES utf8' );

$wiki_hits = [];

$ch = curl_init();
curl_setopt( $ch, CURLOPT_HTTPHEADER, [ 'Host: liquipedia.net', 'User-Agent: wiki-hotness/0.1' ] );
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true );
curl_setopt( $ch, CURLOPT_FAILONERROR, true );
curl_setopt( $ch, CURLOPT_ENCODING, "" );

foreach ( $liquipedia_wikis as $wiki => $info ) {
$time = $info[ 'hot_threshold' ];
$dbPrefix = $info[ 'db_prefix' ];

print "Processing $wiki (threshold: $time, database: $dbPrefix)...\n";

$db->prepare(
'DELETE FROM `wiki_hits` ' .
'WHERE `datum` < UNIX_TIMESTAMP(NOW() - INTERVAL ' . $time . ' ) ' .
'AND `wiki` = :wiki' )->execute( [ 'wiki' => $wiki ] );

$sql = 'SELECT url, COUNT(*) AS count FROM wiki_hits WHERE wiki = :wiki GROUP BY url ORDER BY count DESC LIMIT 15';
$sqlStmt = $db->prepare( $sql );
$sqlStmt->execute( [ 'wiki' => $wiki ] );
$res = $sqlStmt->fetchAll();

$num_found = 0;
foreach ( $res as $row ) {
$page = $row[ 'url' ];

if ( $page == "Main_Page" ) {
continue;
}

if ( !strncmp( $page, "Special:", 8 ) ) {
continue;
}

// Try to match MW encoding
$encoded_page = rawurlencode( $page );
$encoded_page = str_replace(
[ '%2F', '%3A', '%28', '%29', '%21' ],
[ '/', ':', '(', ')', '!' ],
$encoded_page
);

// Query varnish directly to avoid frontend HTTPS overhead
$full_url = "http://127.0.0.1:6081/$wiki/$encoded_page";

curl_setopt( $ch, CURLOPT_URL, $full_url );

$ret = curl_exec( $ch );
if ( !curl_errno( $ch ) ) {
// Don't parse more than necessary
$ret = mb_substr( $ret, 0, 1024 );
if ( preg_match( "/<title>(.+?)<\/title>/", $ret, $m ) ) {
$display_title = $m[1];

// Strip " - Liquipedia Wild Rift Wiki" etc
$pos = mb_strpos( $display_title, ' - Liquipedia ' );
if ( $pos != -1 ) {
$display_title = mb_substr( $display_title, 0, $pos );
}

$display_title = html_entity_decode( $display_title, ENT_QUOTES, 'UTF-8' );
}
}

$oldTextSql = 'SELECT old_text '
. 'FROM ' . $dbPrefix . 'text t, '
. $dbPrefix . 'page p, '
. $dbPrefix . 'flaggedpages f, '
. $dbPrefix . 'content c, '
. $dbPrefix . 'slots s '
. 'WHERE t.old_id = SUBSTR(c.content_address, 4) '
. 'AND c.content_id = s.slot_content_id '
. 'AND s.slot_revision_id = f.fp_stable '
. 'AND f.fp_page_id = p.page_id '
. 'AND p.page_namespace IN (0, 134) '
. 'AND p.page_title = :pageTitle';

$oldTextStmt = $db->prepare( $oldTextSql );
$oldTextStmt->execute( [ 'pageTitle' => $page ] );
$oldText = $oldTextStmt->fetch();
if ( !$oldText ) {
continue;
}

/*if ( preg_match( "/{{DISPLAYTITLE:(.+?)}}/", $oldText[ 'old_text' ], $m ) ) {
$display_title = trim( $m[ 1 ] );
} else if ( preg_match( "/{{Infobox player\s*\|\s?id\s?=\s?(.+?)[\\n|}|\|]/i", $oldText[ 'old_text' ], $m ) ) {
$display_title = trim( $m[ 1 ] );
}*/
if ( preg_match( '/\|tickername=(.+?)[\|}\r\n]/', $oldText[ 'old_text' ], $m ) ) {
$ticker_title = trim( $m[ 1 ] );
}

// Try to use display title without ending disambiguation brackets if it's too long
if ( mb_strlen( $display_title ) > 45 ) {
$new_title = preg_replace( "/\s+\(.+\)$/u", "", $display_title );
if ( !empty( $new_title ) ) {
$display_title = $new_title;
}
}

if ( !empty( $display_title ) && mb_strlen( $display_title ) <= 35 ) {
$title = $display_title;
} elseif ( !empty( $ticker_title ) ) {
$title = $ticker_title;
} elseif ( !empty( $display_title ) ) {
$title = $display_title;
} else {
$title = str_replace( '_', ' ', $page );
}

$wiki_hits[] = [
'title' => $title,
'url' => $row[ 'url' ],
'wiki' => $wiki,
'hits' => (int)$row[ 'count' ],
];

print "Found {$row[ 'url' ]} ($title) - {$row[ 'count' ]}\n";
if ( $num_found >= 10 ) {
break;
}

unset( $display_title );
unset( $ticker_title );
}
}

$db->exec( 'LOCK TABLES wiki_hot WRITE' );
$db->exec( 'DELETE FROM wiki_hot' );

foreach ( $wiki_hits as $row ) {

$url = mb_substr( $row[ 'url' ], 0, 255 );
$count = $row[ 'hits' ];
$wiki = $row[ 'wiki' ];
$title = mb_substr( $row[ 'title' ], 0, 255 );

$insertData = [
'wiki' => $wiki,
'url' => $url,
'title' => $title,
'count' => $count
];

$insertSql = 'INSERT INTO wiki_hot (wiki, page, title, hits) VALUES ( :wiki, :url, :title, :count )';
$db->prepare( $insertSql )->execute( $insertData );
}

$db->exec( 'UNLOCK TABLES' );

curl_setopt( $ch, CURLOPT_CUSTOMREQUEST, 'PURGE' );
curl_setopt( $ch, CURLOPT_URL, "http://127.0.0.1:6081/" );
curl_exec( $ch );
$code = curl_getinfo( $ch, CURLINFO_HTTP_CODE );
print "Purge: $code\n";
8 changes: 8 additions & 0 deletions sql/wiki_hits.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CREATE TABLE IF NOT EXISTS `wiki_hits` (
`url` varbinary(255) NOT NULL,
`wiki` varbinary(24) NOT NULL,
`datum` int(10) UNSIGNED NOT NULL
) /*$wgDBTableOptions*/;

ALTER TABLE `wiki_hits`
ADD KEY `wiki` (`wiki`,`datum`);
9 changes: 9 additions & 0 deletions sql/wiki_hot.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
CREATE TABLE IF NOT EXISTS `wiki_hot` (
`wiki` varbinary(16) NOT NULL,
`page` varbinary(128) NOT NULL,
`title` varbinary(128) NOT NULL,
`hits` int(11) NOT NULL
) /*$wgDBTableOptions*/;

ALTER TABLE `wiki_hot`
ADD KEY `wiki` (`wiki`);
12 changes: 8 additions & 4 deletions src/Api/TrendingPages.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,24 @@
class TrendingPages extends ApiBase {

public function execute() {
global $TL_DB; // phpcs:ignore
// Tell squids to cache
$this->getMain()->setCacheMode( 'public' );
// Set the squid & private cache time in seconds
$this->getMain()->setCacheMaxAge( 300 );
$trendingArticles = [];

$wiki = substr( $this->getConfig()->get( 'ScriptPath' ), 1 );
$config = MediaWikiServices::getInstance()->getMainConfig();
$loadBalancer = MediaWikiServices::getInstance()->getDBLoadBalancer();
$dbr = $loadBalancer->getConnection( DB_REPLICA, '', $TL_DB );
$dbr = $loadBalancer->getConnection( DB_REPLICA, [], $config->get( 'DBname' ) );
$res = $dbr->select(
'wiki_hot', '*', [
'wiki_hot',
'*',
[
'wiki' => $wiki
], __METHOD__, [
],
__METHOD__,
[
'ORDER BY' => 'hits DESC',
'LIMIT' => 10
]
Expand Down
7 changes: 2 additions & 5 deletions src/Helper.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,8 @@ public static function getWikiList() {
public static function getWikiHotList() {
$services = MediaWikiServices::getInstance();
$loadBalancer = $services->getDBLoadBalancer();
$dbr = $loadBalancer->getConnection(
DB_REPLICA,
[],
'liquid-'
);
$config = $services->getMainConfig();
$dbr = $loadBalancer->getConnection( DB_REPLICA, [], $config->get( 'DBname' ) );
$res = $dbr->select(
'wiki_hot',
[
Expand Down
9 changes: 6 additions & 3 deletions src/Hooks/SchemaHookHandler.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,13 @@ class SchemaHookHandler implements
public function onLoadExtensionSchemaUpdates( $updater ) {
$db = $updater->getDB();
$config = MediaWikiServices::getInstance()->getMainConfig();
if ( !$db->tableExists( $config->get( 'DBname' ) . '.wiki_list',
$tables = [ 'wiki_list', 'wiki_hot', 'wiki_hits' ];
foreach ( $tables as $table ) {
if ( !$db->tableExists( $config->get( 'DBname' ) . '.' . $table,
__METHOD__ ) ) {
$updater->addExtensionTable( 'wiki_list',
__DIR__ . '/../sql/wiki_list.sql' );
$updater->addExtensionTable( $table,
__DIR__ . '/../../sql/' . $table . '.sql' );
}
}
}

Expand Down
15 changes: 9 additions & 6 deletions src/Rest/TrendingPages.php
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,20 @@ public function __construct(
* @return Response
*/
public function run(): Response {
global $TL_DB; // phpcs:ignores

$wiki = substr( $this->config->get( 'ScriptPath' ), 1 );

$loadBalancer = $this->loadBalancerFactory->getMainLB( $TL_DB );
$dbr = $loadBalancer->getConnectionRef( DB_REPLICA, [], $TL_DB );
$config = MediaWikiServices::getInstance()->getMainConfig();
$loadBalancer = MediaWikiServices::getInstance()->getDBLoadBalancer();
$dbr = $loadBalancer->getConnection( DB_REPLICA, [], $config->get( 'DBname' ) );

$res = $dbr->select(
'wiki_hot', '*', [
'wiki_hot',
'*',
[
'wiki' => $wiki
], __METHOD__, [
],
__METHOD__,
[
'ORDER BY' => 'hits DESC',
'LIMIT' => 10
]
Expand Down

0 comments on commit fb26e87

Please sign in to comment.