Modernize Python 2 code to get ready for Python 3 AGAIN

iPRoBe-lab · Dec 25, 2017 · 18d4533 · 18d4533
1 parent 6aa0b1b
commit 18d4533
Show file tree

Hide file tree

Showing 11 changed files with 65 additions and 54 deletions.
diff --git a/atm/__init__.py b/atm/__init__.py
@@ -1,4 +1,5 @@
 """An AutoML framework.
 """
+from __future__ import absolute_import
 
-import config, constants, database, enter_data, method, metrics, model, utilities, worker
+from . import config, constants, database, enter_data, method, metrics, model, utilities, worker
diff --git a/atm/database.py b/atm/database.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 from sqlalchemy import (create_engine, inspect, exists, Column, Unicode, String,
                         ForeignKey, Integer, Boolean, DateTime, Enum, MetaData,
                         Numeric, Table, Text)
@@ -65,8 +66,8 @@ def call(db, *args, **kwargs):
                 result = default()
                 argstr = ', '.join([str(a) for a in args])
                 kwargstr = ', '.join(['%s=%s' % kv for kv in kwargs.items()])
-                print "Error in %s(%s, %s):" % (func.__name__, argstr, kwargstr)
-                print traceback.format_exc()
+                print("Error in %s(%s, %s):" % (func.__name__, argstr, kwargstr))
+                print(traceback.format_exc())
             finally:
                 session.close()
 

diff --git a/atm/enter_data.py b/atm/enter_data.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 import argparse
 import os
 import warnings
@@ -98,10 +99,10 @@ def enter_dataset(db, run_config, aws_config=None):
 
     Returns: the generated dataset object
     """
-    print 'downloading data...'
+    print('downloading data...')
     train_path, test_path = download_data(run_config.train_path,
                                           run_config.test_path, aws_config)
-    print 'creating dataset...'
+    print('creating dataset...')
     dataset = create_dataset(db, run_config.label_column, train_path, test_path,
                              run_config.data_description)
     run_config.dataset_id = dataset.id
@@ -138,16 +139,16 @@ def enter_datarun(sql_config, run_config, aws_config=None,
         # enumerate all combinations of categorical variables for this method
         method = Method(METHODS_MAP[m])
         method_parts[m] = method.get_hyperpartitions()
-        print 'method', m, 'has', len(method_parts[m]), 'hyperpartitions'
+        print('method', m, 'has', len(method_parts[m]), 'hyperpartitions')
 
-    print
+    print()
     # create hyperpartitions and datarun(s)
     run_ids = []
     if not run_per_partition:
-        print 'saving datarun...'
+        print('saving datarun...')
         datarun = create_datarun(db, dataset, run_config)
 
-    print 'saving hyperpartions...'
+    print('saving hyperpartions...')
     for method, parts in method_parts.items():
         for part in parts:
             # if necessary, create a new datarun for each hyperpartition.
@@ -164,20 +165,20 @@ def enter_datarun(sql_config, run_config, aws_config=None,
                                      categoricals=part.categoricals,
                                      status=PartitionStatus.INCOMPLETE)
 
-    print 'done!'
-    print
-    print '========== Summary =========='
-    print 'Dataset ID:', dataset.id
-    print 'Training data:', dataset.train_path
-    print 'Test data:', (dataset.test_path or '(None)')
+    print('done!')
+    print()
+    print('========== Summary ==========')
+    print('Dataset ID:', dataset.id)
+    print('Training data:', dataset.train_path)
+    print('Test data:', (dataset.test_path or '(None)'))
     if run_per_partition:
-        print 'Datarun IDs:', ', '.join(map(str, run_ids))
+        print('Datarun IDs:', ', '.join(map(str, run_ids)))
     else:
-        print 'Datarun ID:', datarun.id
-    print 'Hyperpartition selection strategy:', datarun.selector
-    print 'Parameter tuning strategy:', datarun.tuner
-    print 'Budget: %d (%s)' % (datarun.budget, datarun.budget_type)
-    print
+        print('Datarun ID:', datarun.id)
+    print('Hyperpartition selection strategy:', datarun.selector)
+    print('Parameter tuning strategy:', datarun.tuner)
+    print('Budget: %d (%s)' % (datarun.budget, datarun.budget_type))
+    print()
 
     return run_ids or datarun.id
 

diff --git a/atm/model.py b/atm/model.py
@@ -3,6 +3,7 @@
    :synopsis: Model around classification method.
 
 """
+from __future__ import print_function
 import numpy as np
 import pandas as pd
 import time
@@ -110,8 +111,8 @@ def make_pipeline(self):
                 self.dimensions = int(pca_dims)
             else:
                 self.dimensions = int(pca_dims * float(self.num_features))
-                print "*** Using PCA to reduce %d features to %d dimensions" %\
-                    (self.num_features, self.dimensions)
+                print("*** Using PCA to reduce %d features to %d dimensions" %\
+                    (self.num_features, self.dimensions))
                 pca = decomposition.PCA(n_components=self.dimensions, whiten=whiten)
                 steps.append(('pca', pca))
 

diff --git a/atm/utilities.py b/atm/utilities.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 import pickle
 import urllib2
 import hashlib
@@ -61,7 +62,7 @@ def get_public_ip():
             if match:
                 public_ip = match.group()
         except Exception as e:  # any exception, doesn't matter what
-            print 'could not get public IP:', e
+            print('could not get public IP:', e)
             public_ip = 'localhost'
 
     return public_ip
@@ -210,7 +211,7 @@ def download_file_s3(aws_path, aws_config, local_folder=DATA_PATH):
         path = keyname
 
     if os.path.isfile(path):
-        print 'file %s already exists!' % path
+        print('file %s already exists!' % path)
         return path
 
     conn = S3Connection(aws_config.access_key, aws_config.secret_key)
@@ -221,7 +222,7 @@ def download_file_s3(aws_path, aws_config, local_folder=DATA_PATH):
     else:
         aws_keyname = keyname
 
-    print 'downloading data from S3...'
+    print('downloading data from S3...')
     s3key = Key(bucket)
     s3key.key = aws_keyname
     s3key.get_contents_to_filename(path)
@@ -239,10 +240,10 @@ def download_file_http(url, local_folder=DATA_PATH):
         path = filename
 
     if os.path.isfile(path):
-        print 'file %s already exists!' % path
+        print('file %s already exists!' % path)
         return path
 
-    print 'downloading data from %s...' % url
+    print('downloading data from %s...' % url)
     f = urllib2.urlopen(url)
     data = f.read()
     with open(path, "wb") as outfile:

diff --git a/atm/worker.py b/atm/worker.py
@@ -1,4 +1,5 @@
 #!/usr/bin/python2.7
+from __future__ import print_function
 from atm.config import *
 from atm.constants import *
 from atm.utilities import *
@@ -54,7 +55,7 @@ def _log(msg, stdout=True):
     with open(LOG_FILE, 'a') as lf:
         lf.write(msg + '\n')
     if stdout:
-        print msg
+        print(msg)
 
 
 # Exception thrown when something goes wrong for the worker, but the worker

diff --git a/fabfile.py b/fabfile.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 from fabric.api import *
 from fabric.colors import green as _green, yellow as _yellow
 import boto.ec2

diff --git a/test/btb_test.py b/test/btb_test.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 import argparse
 import os
 import random
@@ -61,7 +62,7 @@
 
 datasets = os.listdir(BASELINE_PATH)
 datasets = datasets[:5]
-print 'using datasets', ', '.join(datasets)
+print('using datasets', ', '.join(datasets))
 
 # generate datasets and dataruns
 for ds in datasets:

diff --git a/test/end_to_end_test.py b/test/end_to_end_test.py
@@ -1,4 +1,5 @@
 #!/usr/bin/python2.7
+from __future__ import print_function
 import argparse
 import os
 import yaml
@@ -65,7 +66,7 @@
 
 db = Database(**vars(sql_config))
 
-print 'creating dataruns...'
+print('creating dataruns...')
 datarun_ids = []
 for ds in DATASETS:
     run_config.train_path = join(DATA_DIR, ds)
@@ -75,7 +76,7 @@
 
 work_parallel(db=db, datarun_ids=datarun_ids, n_procs=args.processes)
 
-print 'workers finished.'
+print('workers finished.')
 
 for rid in datarun_ids:
     print_summary(db, rid)
diff --git a/test/method_test.py b/test/method_test.py
@@ -1,4 +1,5 @@
 #!/usr/bin/python2.7
+from __future__ import print_function
 import argparse
 import os
 import yaml
@@ -39,7 +40,7 @@
                                                  run_path=RUN_CONFIG)
 db = Database(**vars(sql_config))
 
-print 'creating dataruns...'
+print('creating dataruns...')
 datarun_ids = []
 for ds in DATASETS:
     run_config.train_path = join(DATA_DIR, ds)
@@ -48,11 +49,11 @@
     datarun_ids.extend(enter_datarun(sql_config, run_config, aws_config,
                                      run_per_partition=True))
 
-print 'computing on dataruns', datarun_ids
+print('computing on dataruns', datarun_ids)
 work_parallel(db=db, datarun_ids=datarun_ids, aws_config=aws_config,
               n_procs=args.processes)
 
-print 'workers finished.'
+print('workers finished.')
 
 for rid in datarun_ids:
     print_hp_summary(db, rid)
diff --git a/test/utilities.py b/test/utilities.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
 import argparse
 import numpy as np
 
@@ -20,7 +21,7 @@ def get_best_so_far(db, datarun_id):
     # generate a list of the "best so far" score after each classifier was
     # computed (in chronological order)
     classifiers = db.get_classifiers(datarun_id=datarun_id)
-    print 'run %s: %d classifiers' % (datarun_id, len(classifiers))
+    print('run %s: %d classifiers' % (datarun_id, len(classifiers)))
     y = []
     for l in classifiers:
         best_so_far = max(y + [l.cv_judgment_metric])
@@ -57,19 +58,19 @@ def graph_series(length, title, **series):
 def print_summary(db, rid):
     run = db.get_datarun(rid)
     ds = db.get_dataset(run.dataset_id)
-    print
-    print 'Dataset %s' % ds
-    print 'Datarun %s' % run
+    print()
+    print('Dataset %s' % ds)
+    print('Datarun %s' % run)
 
     classifiers = db.get_classifiers(datarun_id=rid)
-    print 'Classifiers: %d total' % len(classifiers)
+    print('Classifiers: %d total' % len(classifiers))
 
     best = db.get_best_classifier(datarun_id=run.id)
     if best is not None:
         score = best.cv_judgment_metric
         err = 2 * best.cv_judgment_metric_stdev
-        print 'Best result overall: classifier %d, %s = %.3f +- %.3f' %\
-            (best.id, run.metric, score, err)
+        print('Best result overall: classifier %d, %s = %.3f +- %.3f' %\
+            (best.id, run.metric, score, err))
 
 
 def print_method_summary(db, rid):
@@ -84,22 +85,22 @@ def print_method_summary(db, rid):
         alg_map[hp.method][hp.id].append(l)
 
     for alg, hp_map in alg_map.items():
-        print
-        print 'method %s:' % alg
+        print()
+        print('method %s:' % alg)
 
         classifiers = sum(hp_map.values(), [])
         errored = len([l for l in classifiers if l.status ==
                        ClassifierStatus.ERRORED])
         complete = len([l for l in classifiers if l.status ==
                         ClassifierStatus.COMPLETE])
-        print '\t%d errored, %d complete' % (errored, complete)
+        print('\t%d errored, %d complete' % (errored, complete))
 
         best = db.get_best_classifier(datarun_id=rid, method=alg)
         if best is not None:
             score = best.cv_judgment_metric
             err = 2 * best.cv_judgment_metric_stdev
-            print '\tBest: classifier %s, %s = %.3f +- %.3f' % (best, run.metric,
-                                                                score, err)
+            print('\tBest: classifier %s, %s = %.3f +- %.3f' % (best, run.metric,
+                                                                score, err))
 
 def print_hp_summary(db, rid):
     run = db.get_datarun(rid)
@@ -111,25 +112,25 @@ def print_hp_summary(db, rid):
         part_map[hp].append(c)
 
     for hp, classifiers in part_map.items():
-        print
-        print 'hyperpartition', hp
-        print db.get_hyperpartition(hp)
+        print()
+        print('hyperpartition', hp)
+        print(db.get_hyperpartition(hp))
 
         errored = len([c for c in classifiers if c.status ==
                        ClassifierStatus.ERRORED])
         complete = len([c for c in classifiers if c.status ==
                         ClassifierStatus.COMPLETE])
-        print '\t%d errored, %d complete' % (errored, complete)
+        print('\t%d errored, %d complete' % (errored, complete))
 
         best = db.get_best_classifier(datarun_id=rid, hyperpartition_id=hp)
         if best is not None:
             score = best.cv_judgment_metric
             err = 2 * best.cv_judgment_metric_stdev
-            print '\tBest: classifier %s, %s = %.3f +- %.3f' % (best, run.metric,
-                                                                score, err)
+            print('\tBest: classifier %s, %s = %.3f +- %.3f' % (best, run.metric,
+                                                                score, err))
 
 def work_parallel(db, datarun_ids=None, aws_config=None, n_procs=4):
-    print 'starting workers...'
+    print('starting workers...')
     kwargs = dict(db=db, datarun_ids=datarun_ids, save_files=False,
                   choose_randomly=True, cloud_mode=False,
                   aws_config=aws_config, wait=False)