-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
moving wikihotness script + tables (#5)
* moving wikihotness script + tables * remove space * Fix wikis.php path --------- Co-authored-by: Shashank Atreya <[email protected]> Co-authored-by: Alex Winkler <[email protected]>
- Loading branch information
1 parent
27845e8
commit fb26e87
Showing
9 changed files
with
228 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,3 +3,5 @@ | |
/node_modules/ | ||
composer.lock | ||
package-lock.json | ||
.idea/ | ||
.DS_Store |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,184 @@ | ||
<?php | ||
chdir( __DIR__ ); | ||
require_once '../../../../../config/db_config.php'; | ||
require_once '../../../lp-config/wikis.php'; | ||
|
||
$db_host = $server; | ||
$db_name = 'liquipedia'; | ||
$db_user = $login; | ||
$db_password = $pass; | ||
unset( $login, $pass, $server ); | ||
|
||
$db = null; | ||
try { | ||
$db = new PDO( 'mysql:host=' . $db_host . ';dbname=' . $db_name, $db_user, $db_password ); | ||
$db->setAttribute( PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION ); | ||
$db->setAttribute( PDO::ATTR_DEFAULT_FETCH_MODE, PDO::FETCH_ASSOC ); | ||
$db->setAttribute( PDO::ATTR_EMULATE_PREPARES, false ); | ||
} catch ( PDOException $e ) { | ||
echo "Connection Error: " . $e->getMessage(); | ||
} | ||
if ( $db === null ) { | ||
die( 'Could not connect to database' ); | ||
} | ||
|
||
$db->exec( 'SET NAMES utf8' ); | ||
|
||
$wiki_hits = []; | ||
|
||
$ch = curl_init(); | ||
curl_setopt( $ch, CURLOPT_HTTPHEADER, [ 'Host: liquipedia.net', 'User-Agent: wiki-hotness/0.1' ] ); | ||
curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true ); | ||
curl_setopt( $ch, CURLOPT_FAILONERROR, true ); | ||
curl_setopt( $ch, CURLOPT_ENCODING, "" ); | ||
|
||
foreach ( $liquipedia_wikis as $wiki => $info ) { | ||
$time = $info[ 'hot_threshold' ]; | ||
$dbPrefix = $info[ 'db_prefix' ]; | ||
|
||
print "Processing $wiki (threshold: $time, database: $dbPrefix)...\n"; | ||
|
||
$db->prepare( | ||
'DELETE FROM `wiki_hits` ' . | ||
'WHERE `datum` < UNIX_TIMESTAMP(NOW() - INTERVAL ' . $time . ' ) ' . | ||
'AND `wiki` = :wiki' )->execute( [ 'wiki' => $wiki ] ); | ||
|
||
$sql = 'SELECT url, COUNT(*) AS count FROM wiki_hits WHERE wiki = :wiki GROUP BY url ORDER BY count DESC LIMIT 15'; | ||
$sqlStmt = $db->prepare( $sql ); | ||
$sqlStmt->execute( [ 'wiki' => $wiki ] ); | ||
$res = $sqlStmt->fetchAll(); | ||
|
||
$num_found = 0; | ||
foreach ( $res as $row ) { | ||
$page = $row[ 'url' ]; | ||
|
||
if ( $page == "Main_Page" ) { | ||
continue; | ||
} | ||
|
||
if ( !strncmp( $page, "Special:", 8 ) ) { | ||
continue; | ||
} | ||
|
||
// Try to match MW encoding | ||
$encoded_page = rawurlencode( $page ); | ||
$encoded_page = str_replace( | ||
[ '%2F', '%3A', '%28', '%29', '%21' ], | ||
[ '/', ':', '(', ')', '!' ], | ||
$encoded_page | ||
); | ||
|
||
// Query varnish directly to avoid frontend HTTPS overhead | ||
$full_url = "http://127.0.0.1:6081/$wiki/$encoded_page"; | ||
|
||
curl_setopt( $ch, CURLOPT_URL, $full_url ); | ||
|
||
$ret = curl_exec( $ch ); | ||
if ( !curl_errno( $ch ) ) { | ||
// Don't parse more than necessary | ||
$ret = mb_substr( $ret, 0, 1024 ); | ||
if ( preg_match( "/<title>(.+?)<\/title>/", $ret, $m ) ) { | ||
$display_title = $m[1]; | ||
|
||
// Strip " - Liquipedia Wild Rift Wiki" etc | ||
$pos = mb_strpos( $display_title, ' - Liquipedia ' ); | ||
if ( $pos != -1 ) { | ||
$display_title = mb_substr( $display_title, 0, $pos ); | ||
} | ||
|
||
$display_title = html_entity_decode( $display_title, ENT_QUOTES, 'UTF-8' ); | ||
} | ||
} | ||
|
||
$oldTextSql = 'SELECT old_text ' | ||
. 'FROM ' . $dbPrefix . 'text t, ' | ||
. $dbPrefix . 'page p, ' | ||
. $dbPrefix . 'flaggedpages f, ' | ||
. $dbPrefix . 'content c, ' | ||
. $dbPrefix . 'slots s ' | ||
. 'WHERE t.old_id = SUBSTR(c.content_address, 4) ' | ||
. 'AND c.content_id = s.slot_content_id ' | ||
. 'AND s.slot_revision_id = f.fp_stable ' | ||
. 'AND f.fp_page_id = p.page_id ' | ||
. 'AND p.page_namespace IN (0, 134) ' | ||
. 'AND p.page_title = :pageTitle'; | ||
|
||
$oldTextStmt = $db->prepare( $oldTextSql ); | ||
$oldTextStmt->execute( [ 'pageTitle' => $page ] ); | ||
$oldText = $oldTextStmt->fetch(); | ||
if ( !$oldText ) { | ||
continue; | ||
} | ||
|
||
/*if ( preg_match( "/{{DISPLAYTITLE:(.+?)}}/", $oldText[ 'old_text' ], $m ) ) { | ||
$display_title = trim( $m[ 1 ] ); | ||
} else if ( preg_match( "/{{Infobox player\s*\|\s?id\s?=\s?(.+?)[\\n|}|\|]/i", $oldText[ 'old_text' ], $m ) ) { | ||
$display_title = trim( $m[ 1 ] ); | ||
}*/ | ||
if ( preg_match( '/\|tickername=(.+?)[\|}\r\n]/', $oldText[ 'old_text' ], $m ) ) { | ||
$ticker_title = trim( $m[ 1 ] ); | ||
} | ||
|
||
// Try to use display title without ending disambiguation brackets if it's too long | ||
if ( mb_strlen( $display_title ) > 45 ) { | ||
$new_title = preg_replace( "/\s+\(.+\)$/u", "", $display_title ); | ||
if ( !empty( $new_title ) ) { | ||
$display_title = $new_title; | ||
} | ||
} | ||
|
||
if ( !empty( $display_title ) && mb_strlen( $display_title ) <= 35 ) { | ||
$title = $display_title; | ||
} elseif ( !empty( $ticker_title ) ) { | ||
$title = $ticker_title; | ||
} elseif ( !empty( $display_title ) ) { | ||
$title = $display_title; | ||
} else { | ||
$title = str_replace( '_', ' ', $page ); | ||
} | ||
|
||
$wiki_hits[] = [ | ||
'title' => $title, | ||
'url' => $row[ 'url' ], | ||
'wiki' => $wiki, | ||
'hits' => (int)$row[ 'count' ], | ||
]; | ||
|
||
print "Found {$row[ 'url' ]} ($title) - {$row[ 'count' ]}\n"; | ||
if ( $num_found >= 10 ) { | ||
break; | ||
} | ||
|
||
unset( $display_title ); | ||
unset( $ticker_title ); | ||
} | ||
} | ||
|
||
$db->exec( 'LOCK TABLES wiki_hot WRITE' ); | ||
$db->exec( 'DELETE FROM wiki_hot' ); | ||
|
||
foreach ( $wiki_hits as $row ) { | ||
|
||
$url = mb_substr( $row[ 'url' ], 0, 255 ); | ||
$count = $row[ 'hits' ]; | ||
$wiki = $row[ 'wiki' ]; | ||
$title = mb_substr( $row[ 'title' ], 0, 255 ); | ||
|
||
$insertData = [ | ||
'wiki' => $wiki, | ||
'url' => $url, | ||
'title' => $title, | ||
'count' => $count | ||
]; | ||
|
||
$insertSql = 'INSERT INTO wiki_hot (wiki, page, title, hits) VALUES ( :wiki, :url, :title, :count )'; | ||
$db->prepare( $insertSql )->execute( $insertData ); | ||
} | ||
|
||
$db->exec( 'UNLOCK TABLES' ); | ||
|
||
curl_setopt( $ch, CURLOPT_CUSTOMREQUEST, 'PURGE' ); | ||
curl_setopt( $ch, CURLOPT_URL, "http://127.0.0.1:6081/" ); | ||
curl_exec( $ch ); | ||
$code = curl_getinfo( $ch, CURLINFO_HTTP_CODE ); | ||
print "Purge: $code\n"; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
CREATE TABLE IF NOT EXISTS `wiki_hits` ( | ||
`url` varbinary(255) NOT NULL, | ||
`wiki` varbinary(24) NOT NULL, | ||
`datum` int(10) UNSIGNED NOT NULL | ||
) /*$wgDBTableOptions*/; | ||
|
||
ALTER TABLE `wiki_hits` | ||
ADD KEY `wiki` (`wiki`,`datum`); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
CREATE TABLE IF NOT EXISTS `wiki_hot` ( | ||
`wiki` varbinary(16) NOT NULL, | ||
`page` varbinary(128) NOT NULL, | ||
`title` varbinary(128) NOT NULL, | ||
`hits` int(11) NOT NULL | ||
) /*$wgDBTableOptions*/; | ||
|
||
ALTER TABLE `wiki_hot` | ||
ADD KEY `wiki` (`wiki`); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters