Skip to content

Commit

Permalink
Modernize Python 2 code to get ready for Python 3 AGAIN
Browse files Browse the repository at this point in the history
  • Loading branch information
cclauss committed Dec 25, 2017
1 parent 6aa0b1b commit 18d4533
Show file tree
Hide file tree
Showing 11 changed files with 65 additions and 54 deletions.
3 changes: 2 additions & 1 deletion atm/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""An AutoML framework.
"""
from __future__ import absolute_import

import config, constants, database, enter_data, method, metrics, model, utilities, worker
from . import config, constants, database, enter_data, method, metrics, model, utilities, worker
5 changes: 3 additions & 2 deletions atm/database.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import print_function
from sqlalchemy import (create_engine, inspect, exists, Column, Unicode, String,
ForeignKey, Integer, Boolean, DateTime, Enum, MetaData,
Numeric, Table, Text)
Expand Down Expand Up @@ -65,8 +66,8 @@ def call(db, *args, **kwargs):
result = default()
argstr = ', '.join([str(a) for a in args])
kwargstr = ', '.join(['%s=%s' % kv for kv in kwargs.items()])
print "Error in %s(%s, %s):" % (func.__name__, argstr, kwargstr)
print traceback.format_exc()
print("Error in %s(%s, %s):" % (func.__name__, argstr, kwargstr))
print(traceback.format_exc())
finally:
session.close()

Expand Down
37 changes: 19 additions & 18 deletions atm/enter_data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import print_function
import argparse
import os
import warnings
Expand Down Expand Up @@ -98,10 +99,10 @@ def enter_dataset(db, run_config, aws_config=None):
Returns: the generated dataset object
"""
print 'downloading data...'
print('downloading data...')
train_path, test_path = download_data(run_config.train_path,
run_config.test_path, aws_config)
print 'creating dataset...'
print('creating dataset...')
dataset = create_dataset(db, run_config.label_column, train_path, test_path,
run_config.data_description)
run_config.dataset_id = dataset.id
Expand Down Expand Up @@ -138,16 +139,16 @@ def enter_datarun(sql_config, run_config, aws_config=None,
# enumerate all combinations of categorical variables for this method
method = Method(METHODS_MAP[m])
method_parts[m] = method.get_hyperpartitions()
print 'method', m, 'has', len(method_parts[m]), 'hyperpartitions'
print('method', m, 'has', len(method_parts[m]), 'hyperpartitions')

print
print()
# create hyperpartitions and datarun(s)
run_ids = []
if not run_per_partition:
print 'saving datarun...'
print('saving datarun...')
datarun = create_datarun(db, dataset, run_config)

print 'saving hyperpartions...'
print('saving hyperpartions...')
for method, parts in method_parts.items():
for part in parts:
# if necessary, create a new datarun for each hyperpartition.
Expand All @@ -164,20 +165,20 @@ def enter_datarun(sql_config, run_config, aws_config=None,
categoricals=part.categoricals,
status=PartitionStatus.INCOMPLETE)

print 'done!'
print
print '========== Summary =========='
print 'Dataset ID:', dataset.id
print 'Training data:', dataset.train_path
print 'Test data:', (dataset.test_path or '(None)')
print('done!')
print()
print('========== Summary ==========')
print('Dataset ID:', dataset.id)
print('Training data:', dataset.train_path)
print('Test data:', (dataset.test_path or '(None)'))
if run_per_partition:
print 'Datarun IDs:', ', '.join(map(str, run_ids))
print('Datarun IDs:', ', '.join(map(str, run_ids)))
else:
print 'Datarun ID:', datarun.id
print 'Hyperpartition selection strategy:', datarun.selector
print 'Parameter tuning strategy:', datarun.tuner
print 'Budget: %d (%s)' % (datarun.budget, datarun.budget_type)
print
print('Datarun ID:', datarun.id)
print('Hyperpartition selection strategy:', datarun.selector)
print('Parameter tuning strategy:', datarun.tuner)
print('Budget: %d (%s)' % (datarun.budget, datarun.budget_type))
print()

return run_ids or datarun.id

Expand Down
5 changes: 3 additions & 2 deletions atm/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
:synopsis: Model around classification method.
"""
from __future__ import print_function
import numpy as np
import pandas as pd
import time
Expand Down Expand Up @@ -110,8 +111,8 @@ def make_pipeline(self):
self.dimensions = int(pca_dims)
else:
self.dimensions = int(pca_dims * float(self.num_features))
print "*** Using PCA to reduce %d features to %d dimensions" %\
(self.num_features, self.dimensions)
print("*** Using PCA to reduce %d features to %d dimensions" %\
(self.num_features, self.dimensions))
pca = decomposition.PCA(n_components=self.dimensions, whiten=whiten)
steps.append(('pca', pca))

Expand Down
11 changes: 6 additions & 5 deletions atm/utilities.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import print_function
import pickle
import urllib2
import hashlib
Expand Down Expand Up @@ -61,7 +62,7 @@ def get_public_ip():
if match:
public_ip = match.group()
except Exception as e: # any exception, doesn't matter what
print 'could not get public IP:', e
print('could not get public IP:', e)
public_ip = 'localhost'

return public_ip
Expand Down Expand Up @@ -210,7 +211,7 @@ def download_file_s3(aws_path, aws_config, local_folder=DATA_PATH):
path = keyname

if os.path.isfile(path):
print 'file %s already exists!' % path
print('file %s already exists!' % path)
return path

conn = S3Connection(aws_config.access_key, aws_config.secret_key)
Expand All @@ -221,7 +222,7 @@ def download_file_s3(aws_path, aws_config, local_folder=DATA_PATH):
else:
aws_keyname = keyname

print 'downloading data from S3...'
print('downloading data from S3...')
s3key = Key(bucket)
s3key.key = aws_keyname
s3key.get_contents_to_filename(path)
Expand All @@ -239,10 +240,10 @@ def download_file_http(url, local_folder=DATA_PATH):
path = filename

if os.path.isfile(path):
print 'file %s already exists!' % path
print('file %s already exists!' % path)
return path

print 'downloading data from %s...' % url
print('downloading data from %s...' % url)
f = urllib2.urlopen(url)
data = f.read()
with open(path, "wb") as outfile:
Expand Down
3 changes: 2 additions & 1 deletion atm/worker.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/python2.7
from __future__ import print_function
from atm.config import *
from atm.constants import *
from atm.utilities import *
Expand Down Expand Up @@ -54,7 +55,7 @@ def _log(msg, stdout=True):
with open(LOG_FILE, 'a') as lf:
lf.write(msg + '\n')
if stdout:
print msg
print(msg)


# Exception thrown when something goes wrong for the worker, but the worker
Expand Down
1 change: 1 addition & 0 deletions fabfile.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import print_function
from fabric.api import *
from fabric.colors import green as _green, yellow as _yellow
import boto.ec2
Expand Down
3 changes: 2 additions & 1 deletion test/btb_test.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import print_function
import argparse
import os
import random
Expand Down Expand Up @@ -61,7 +62,7 @@

datasets = os.listdir(BASELINE_PATH)
datasets = datasets[:5]
print 'using datasets', ', '.join(datasets)
print('using datasets', ', '.join(datasets))

# generate datasets and dataruns
for ds in datasets:
Expand Down
5 changes: 3 additions & 2 deletions test/end_to_end_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/python2.7
from __future__ import print_function
import argparse
import os
import yaml
Expand Down Expand Up @@ -65,7 +66,7 @@

db = Database(**vars(sql_config))

print 'creating dataruns...'
print('creating dataruns...')
datarun_ids = []
for ds in DATASETS:
run_config.train_path = join(DATA_DIR, ds)
Expand All @@ -75,7 +76,7 @@

work_parallel(db=db, datarun_ids=datarun_ids, n_procs=args.processes)

print 'workers finished.'
print('workers finished.')

for rid in datarun_ids:
print_summary(db, rid)
7 changes: 4 additions & 3 deletions test/method_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/python2.7
from __future__ import print_function
import argparse
import os
import yaml
Expand Down Expand Up @@ -39,7 +40,7 @@
run_path=RUN_CONFIG)
db = Database(**vars(sql_config))

print 'creating dataruns...'
print('creating dataruns...')
datarun_ids = []
for ds in DATASETS:
run_config.train_path = join(DATA_DIR, ds)
Expand All @@ -48,11 +49,11 @@
datarun_ids.extend(enter_datarun(sql_config, run_config, aws_config,
run_per_partition=True))

print 'computing on dataruns', datarun_ids
print('computing on dataruns', datarun_ids)
work_parallel(db=db, datarun_ids=datarun_ids, aws_config=aws_config,
n_procs=args.processes)

print 'workers finished.'
print('workers finished.')

for rid in datarun_ids:
print_hp_summary(db, rid)
39 changes: 20 additions & 19 deletions test/utilities.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import print_function
import argparse
import numpy as np

Expand All @@ -20,7 +21,7 @@ def get_best_so_far(db, datarun_id):
# generate a list of the "best so far" score after each classifier was
# computed (in chronological order)
classifiers = db.get_classifiers(datarun_id=datarun_id)
print 'run %s: %d classifiers' % (datarun_id, len(classifiers))
print('run %s: %d classifiers' % (datarun_id, len(classifiers)))
y = []
for l in classifiers:
best_so_far = max(y + [l.cv_judgment_metric])
Expand Down Expand Up @@ -57,19 +58,19 @@ def graph_series(length, title, **series):
def print_summary(db, rid):
run = db.get_datarun(rid)
ds = db.get_dataset(run.dataset_id)
print
print 'Dataset %s' % ds
print 'Datarun %s' % run
print()
print('Dataset %s' % ds)
print('Datarun %s' % run)

classifiers = db.get_classifiers(datarun_id=rid)
print 'Classifiers: %d total' % len(classifiers)
print('Classifiers: %d total' % len(classifiers))

best = db.get_best_classifier(datarun_id=run.id)
if best is not None:
score = best.cv_judgment_metric
err = 2 * best.cv_judgment_metric_stdev
print 'Best result overall: classifier %d, %s = %.3f +- %.3f' %\
(best.id, run.metric, score, err)
print('Best result overall: classifier %d, %s = %.3f +- %.3f' %\
(best.id, run.metric, score, err))


def print_method_summary(db, rid):
Expand All @@ -84,22 +85,22 @@ def print_method_summary(db, rid):
alg_map[hp.method][hp.id].append(l)

for alg, hp_map in alg_map.items():
print
print 'method %s:' % alg
print()
print('method %s:' % alg)

classifiers = sum(hp_map.values(), [])
errored = len([l for l in classifiers if l.status ==
ClassifierStatus.ERRORED])
complete = len([l for l in classifiers if l.status ==
ClassifierStatus.COMPLETE])
print '\t%d errored, %d complete' % (errored, complete)
print('\t%d errored, %d complete' % (errored, complete))

best = db.get_best_classifier(datarun_id=rid, method=alg)
if best is not None:
score = best.cv_judgment_metric
err = 2 * best.cv_judgment_metric_stdev
print '\tBest: classifier %s, %s = %.3f +- %.3f' % (best, run.metric,
score, err)
print('\tBest: classifier %s, %s = %.3f +- %.3f' % (best, run.metric,
score, err))

def print_hp_summary(db, rid):
run = db.get_datarun(rid)
Expand All @@ -111,25 +112,25 @@ def print_hp_summary(db, rid):
part_map[hp].append(c)

for hp, classifiers in part_map.items():
print
print 'hyperpartition', hp
print db.get_hyperpartition(hp)
print()
print('hyperpartition', hp)
print(db.get_hyperpartition(hp))

errored = len([c for c in classifiers if c.status ==
ClassifierStatus.ERRORED])
complete = len([c for c in classifiers if c.status ==
ClassifierStatus.COMPLETE])
print '\t%d errored, %d complete' % (errored, complete)
print('\t%d errored, %d complete' % (errored, complete))

best = db.get_best_classifier(datarun_id=rid, hyperpartition_id=hp)
if best is not None:
score = best.cv_judgment_metric
err = 2 * best.cv_judgment_metric_stdev
print '\tBest: classifier %s, %s = %.3f +- %.3f' % (best, run.metric,
score, err)
print('\tBest: classifier %s, %s = %.3f +- %.3f' % (best, run.metric,
score, err))

def work_parallel(db, datarun_ids=None, aws_config=None, n_procs=4):
print 'starting workers...'
print('starting workers...')
kwargs = dict(db=db, datarun_ids=datarun_ids, save_files=False,
choose_randomly=True, cloud_mode=False,
aws_config=aws_config, wait=False)
Expand Down

0 comments on commit 18d4533

Please sign in to comment.