Skip to content

Commit

Permalink
Merge pull request #97 from netsage-project/new-disciplines
Browse files Browse the repository at this point in the history
New disciplines
  • Loading branch information
lisaens authored Oct 23, 2019
2 parents ace3c7b + 3117c0c commit aa0157d
Show file tree
Hide file tree
Showing 15 changed files with 1,024 additions and 1,060 deletions.
86 changes: 60 additions & 26 deletions bin/resourcedb-export.pl
Original file line number Diff line number Diff line change
Expand Up @@ -12,35 +12,44 @@
use Data::Dumper;
use Encode;

# This script will pull data out of the Science Registry database and write it to a .json file (used by resourcedb-make-mmdb.pl)
# AND to a .yaml file (for use with logstash translate filter)
# This script will pull data out of the Science Registry database and write it to a
# .json file (used by resourcedb-make-mmdb.pl which creates the fake geoip mmdb file used for logstash SciReg tagging!! )
# AND to a .yaml file (used with logstash translate filter at one point; maybe still useful someday)
# AND to a .csv file (to parse or send to a human; delimiter is | )
# RUNS FROM CRON
# ~10/23/19 - Filter out resources with discipline = "Unknown" or "non-science" as we don't want to tag flows with those disciplines.

# Defaults
my $help;
# Use same config file as resourcedb (Science Registry)
my $config_file = "/etc/grnoc/netsage/resourcedb/config.xml";
# Name output file with the current timestamp
my $output_file = "/etc/grnoc/netsage/resourcedb/datadump_".time().".json";
my $output_file = "/etc/grnoc/netsage/resourcedb/scireg_".time().".json";

#-----------------------------
sub usage() {
print " USAGE: perl resourcedb-export.pl [-c <config file>] [-o <output file>] [-h]
Without parameters, the defaults are
config_file = /etc/grnoc/netsage/resourcedb/config.xml
output_file = /etc/grnoc/netsage/resourcedb/datadump_<timestamp>.yaml (.json file will have same name. Must run as sudo) \n";
print " USAGE: perl resourcedb-export.pl [-c/--config <config file>] [-o/==output <output-dir/filename.json> [-h/--help]
Without parameters, the defaults are
config_file = /etc/grnoc/netsage/resourcedb/config.xml
output = /etc/grnoc/netsage/resourcedb/scireg_<timestamp>.json (.yaml and .csv are written too)
(.yaml and .csv files will have same name. Must run as sudo) \n";
exit;
}
#-----------------------------

# defaults can be overridden on command line (-c and -o)
GetOptions( 'config|c=s' => \$config_file,
'output|o=s' => \$output_file,
'help|h|?' => \$help
'output|o=s' => \$output_file,
'help|h|?' => \$help
);

# did they ask for help?
usage() if $help;

if ( $output_file !~ /\.json/) {
print "your output filename has to end in .json\n";
die;
}

# Read config file to get db connection info
if (! -f $config_file) {
Expand All @@ -64,23 +73,34 @@ ()
my $host = $config->get( '/config/database-host' );
my $port = $config->get( '/config/database-port' );

# csv file - columns wanted, in order. Values are db columns/hash keys
my @csv_columns = ("ip_block_id", "addresses_str", "org_name", "resource", "description", "country_code", "discipline", "role");
my $csv_header = "resource_id|IPs|org|resource|description|country_code|discipline|role \n";

# Try to open output files
#json file
$output_file =~ s/yaml$/json/; # just in case
# json file
my $fh;
if (! open($fh, '>', $output_file) ) {
print "Could not open json output file $output_file\n";
die;
}
#yaml file
my $output_file_yaml = $output_file;
$output_file_yaml =~ s/json$/yaml/;
# yaml file
my $output_file_yaml = $output_file;
$output_file_yaml =~ s/json$/yaml/;
my $fh_yaml;
if (! open($fh_yaml, '>', $output_file_yaml) ) {
print "Could not open yaml output file $output_file_yaml\n";
die;
}

# csv file
my $output_file_csv = $output_file;
$output_file_csv =~ s/json$/csv/;
my $fh_csv;
if (! open($fh_csv, '>', $output_file_csv) ) {
print "Could not open csv output file $output_file_csv\n";
die;
}
print $fh_csv $csv_header;

# Connect to db
my $dbq = GRNOC::DatabaseQuery->new(
Expand All @@ -95,10 +115,11 @@ ()
if(!$conn_res){
die ("Error connecting to mysql.");
}
# tells dbq to expect/use unicode which is what's in the db
# tells dbq to expect/use unicode which is what's in the db
$dbq->{'dbh'}->do("SET NAMES utf8mb4;");

# Get info about resources
# FILTER OUT resources with discipline = "Unknown" or "non-science" AS WE DON'T WANT TO TAG FLOWS WITH THOSE DISCIPLINES.
my $resources = $dbq->select(
table => 'ip_block JOIN organization ON ip_block.organization_id = organization.organization_id '.
'JOIN discipline ON ip_block.discipline_id = discipline.discipline_id '.
Expand All @@ -123,7 +144,8 @@ ()
'organization.latitude as org_latitude',
'organization.longitude as org_longitude',
'organization.country_code as org_country_code'
]
],
where => { 'discipline.name' => [ -and => {'!=','Unknown'}, {'!=','non-science'} ] }
);

if (!$resources) {
Expand Down Expand Up @@ -154,13 +176,21 @@ ()
foreach my $proj (@$projects) {
push(@{$res->{'projects'}}, $proj);
}
### print Dumper($res); ###
###print Dumper($res); ###

# For json file
push(@all_resources, $res);

# For csv file
my $line = "";
foreach my $col (@csv_columns) {
$line = $line . $res->{$col} . '|';
}
$line =~ s/\n/ /g;
$line = $line."\n";

# For yaml file
# strip /xx's from addresses and expand any ip blocks
# strip /xx's from addresses and expand any ip blocks
my @ip_array = split(",", $res->{'addresses_str'});
my @final_ips;
foreach my $ip (@ip_array) {
Expand All @@ -175,7 +205,7 @@ ()
push(@final_ips, $ipblock->addr());
} elsif ($slash < 28 or ($slash >32 and $slash < 124)) {
# if there are too many ip's in the block, write a regular expression that matches ip's in the block
# NetAddr::IP -> re() - Returns a Perl regular expression that will match an IP address within the given subnet.
# NetAddr::IP -> re() - Returns a Perl regular expression that will match an IP address within the given subnet.
# Defaults to ipV4 notation. Will return an ipV6 regex if the address in not in ipV4 space.
push(@final_ips, $ipblock->re());
} else {
Expand All @@ -188,20 +218,25 @@ ()
}
}
# remove dups and join array elements with |
@final_ips = uniq(@final_ips);
@final_ips = uniq(@final_ips);
my $ip_regex = join( "|", @final_ips );

# convert resource info into a json string, then convert that (which is utf8 from the db) to perl characters
# with internal utf-8 tags, using decode.
# convert resource info into a json string, then convert that (which is utf8 from the db) to perl characters
# with internal utf-8 tags, using decode.
my $res_json = decode( 'utf-8', encode_json($res) );
# html-encode simple single quotes since we'll use them to start and end the string that holds the json and escaping them
# html-encode simple single quotes since we'll use them to start and end the string that holds the json and escaping them
# doesn't work ("s are already escaped)
$res_json =~ s/'/&apos;/g;

# write ip:'data' line to file (perl knows how to write utf8 chars)
print $fh_yaml "'".$ip_regex."' : '".$res_json."'\n" ;

# csv file line
print $fh_csv $line;
}

close($fh_yaml);
close($fh_csv);

# Add an array of IPs in addition to the addresses string (for the old pipeline scireg tagger)
foreach my $resrc (@all_resources) {
Expand All @@ -214,5 +249,4 @@ ()
print $fh $json ;
close($fh);

print "Wrote $output_file and $output_file_yaml \n";

print "Wrote $output_file and $output_file_yaml and $output_file_csv \n";
4 changes: 4 additions & 0 deletions bin/resourcedb-init-db
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
#!/usr/bin/perl

# This is a script to run when installing Science Registry for the first time.
# It will create database users for the scripts to use and write /etc/grnoc/resourcedb/config.xml
# RUN MANUALLY with sudo

use strict;
use warnings;

Expand Down
1 change: 1 addition & 0 deletions bin/resourcedb-make-mmdb.pl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# Each db entry is for an individual cidr address. They are sorted so the longest prefixes (most specific addresses) come last, since
# the logstash GEOIP FILTER gets the LAST MATCH for an IP address.
# If successful, this script writes a timestamp to status.txt in /var/lib/grnoc/scienceregistry-mmdb-file/status.txt.
# RUNS VIA CRON

# see https://blog.maxmind.com/2015/09/29/building-your-own-mmdb-database-for-fun-and-profit/
# also https://stackoverflow.com/questions/47655730/maxmind-writer-to-create-custom-database-to-use-with-geoip-in-elk-stack
Expand Down
8 changes: 7 additions & 1 deletion bin/resourcedb-update-db
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
#!/usr/bin/perl

# When first installing the Science Registry (resourcedb), and after every upgrade,
# run a script to set up the database tables and/or make any changes needed.
# RUN MANUALLY

use strict;
use warnings;

Expand All @@ -11,6 +15,7 @@ use DBI;
use constant DEFAULT_CONFIG_FILE => '/etc/grnoc/netsage/resourcedb/config.xml';

sub main {

my $config = GRNOC::Config->new(
config_file => DEFAULT_CONFIG_FILE,
force_array => 0
Expand All @@ -30,11 +35,12 @@ sub main {

my ($version, $err) = $db_util->install_database();
if (defined $err) {
warn "$err";
print "error: $err \n";
return;
}

return 1;
}

main();
print " DONE \n";
2 changes: 1 addition & 1 deletion lib/GRNOC/NetSage/ResourceDB.pm
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ package GRNOC::NetSage::ResourceDB;
use strict;
use warnings;

our $VERSION = '0.11.0';
our $VERSION = '0.12.0';

sub new {

Expand Down
Loading

0 comments on commit aa0157d

Please sign in to comment.