diff --git a/lib/datasets/apc.py b/lib/datasets/apc.py index a98c7a7d4..81b6ac27a 100644 --- a/lib/datasets/apc.py +++ b/lib/datasets/apc.py @@ -320,10 +320,10 @@ def _do_python_eval(self, output_dir = 'output'): performance['precs'] += [prec] performance['prauc'] += [prauc] - - print('AP for {} = {:.4f}'.format(cls, ap)) - print('PR curve AUC for {:s} = {}'.format(cls, prauc)) - print('ROC curve AUC for {:s} = {}'.format(cls, rocauc)) + if ap != 0: #TODO For DEBUG, remove this + print('AP for {} = {:.4f}'.format(cls, ap)) + print('PR curve AUC for {:s} = {}'.format(cls, prauc)) + print('ROC curve AUC for {:s} = {}'.format(cls, rocauc)) with open(os.path.join(output_dir, cls + '_pr.pkl'), 'w') as f: cPickle.dump({'ap': ap, 'rec': rec, 'prec': prec, 'prauc': prauc, \ 'tpr': tpr, 'fpr': fpr, 'rocauc': rocauc}, f) diff --git a/lib/datasets/apc_evalv2.py b/lib/datasets/apc_evalv2.py index f81272163..b877a77d4 100644 --- a/lib/datasets/apc_evalv2.py +++ b/lib/datasets/apc_evalv2.py @@ -11,6 +11,9 @@ import copy from sklearn.metrics import auc +# 20 thresholds varying from 0 to 0.95 with interval 0.05 +thresh_range = np.arange(0.,1.,0.05) + def parse_rec(filename): """ Parse a DR APC xml file """ tree = ET.parse(filename) @@ -46,7 +49,7 @@ def apc_auc(x, y, curve='pr'): def maxOverlaps(BBGT, bb): - # compute overlaps + """compute max overlaps between detected bb and BBGTs""" # intersection ixmin = np.maximum(BBGT[:, 0], bb[0]) iymin = np.maximum(BBGT[:, 1], bb[1]) @@ -73,16 +76,14 @@ def apc_evalv2(detpath, imagesetfile, classname, cachedir, - ovthresh=0.5, - use_07_metric=False): - """rec, prec, ap = acp_eval(detpath, + ovthresh=0.5): + """rec, prec, ap = acp_evalv2(detpath, annopath, imagesetfile, classname, - [ovthresh], - [use_07_metric]) + [ovthresh]) - Top level function that does the PASCAL VOC evaluation. + Top level function that does a PASCAL VOC like evaluation per bbox. detpath: Path to detections detpath.format(classname) should produce the detection results file. @@ -92,8 +93,6 @@ def apc_evalv2(detpath, classname: Category name (duh) cachedir: Directory for caching the annotations [ovthresh]: Overlap threshold (default = 0.5) - [use_07_metric]: Whether to use VOC07's 11 point AP computation - (default False) """ # assumes detections are in detpath.format(classname) # assumes annotations are in annopath.format(imagename) @@ -116,20 +115,25 @@ def apc_evalv2(detpath, ### Calculate rec, prec, and auc for pr curve fn by comparing BBGT amd BB detected per bbox # extract gt objects for this class - class_recs = {} - npos = 0 - negative_gt = [] + class_recs = {} # ground truth for target class + class_recs_neg = {} # ground truth for the other classes (negatives) + npos = 0 # number of positive objects to be detected + nneg = 0 # number of negative objects to be rejected for imagename in imagenames: + # read ground truth for current class R = [obj for obj in recs[imagename] if obj['name'] == classname] - if R == []: - negative_gt.append(imagename) bbox = np.array([x['bbox'] for x in R]) npos = npos + len(bbox) det = [False] * len(R) class_recs[imagename] = {'bbox': bbox, 'det': det} + # read ground truth for the other classes + R_neg = [obj for obj in recs[imagename] if obj['name'] != classname] + bbox_neg = np.array([x['bbox'] for x in R_neg]) + nneg = nneg + len(bbox_neg) + class_recs_neg[imagename] = {'bbox': bbox_neg} - # read dets + # read detections detfile = detpath.format(classname) with open(detfile, 'r') as f: lines = f.readlines() @@ -140,33 +144,25 @@ def apc_evalv2(detpath, confidence = np.array([float(x[1]) for x in splitlines]) BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) - # sort by confidence -# sorted_ind = np.argsort(-confidence) -# sorted_scores = np.sort(-confidence) -# BB = BB[sorted_ind, :] -# image_ids = [image_ids[x] for x in sorted_ind] - - # filter by confidence - threshs = np.arange(0.,1.,0.05) - tps = np.zeros(len(threshs)) - fps = np.zeros(len(threshs)) - tns = np.zeros(len(threshs)) - for t, thresh in enumerate(threshs): + # compute for each threshold from 0 to 0.95 with interval 0.05 + tps = np.zeros(len(thresh_range)) # correct detections + fps = np.zeros(len(thresh_range)) # target objects are wrongly rejected + tns = np.zeros(len(thresh_range)) # untarget objects are correctly rejected + for t, thresh in enumerate(thresh_range): + # filter by confidence keep_ind = np.where(confidence > thresh)[0] - if len(keep_ind) > 0: + if len(keep_ind) > 0: # if there is detections left _BB = BB[keep_ind, :] _image_ids = [image_ids[x] for x in keep_ind] _class_recs = copy.deepcopy(class_recs) + _class_recs_neg = copy.deepcopy(class_recs_neg) - # find true negative predictions and count tn - negative_preds = set(imagenames) - set(_image_ids) - true_negatives = set(negative_gt).intersection(negative_preds) - tn = len(true_negatives) - # go down dets and mark TPs and FPs + # go down dets and mark TPs, FPs and TNs per bbox nd = len(_image_ids) - tp = 0. - fp = 0. - for d in range(nd): + tn = nneg + tp = 0 + fp = 0 + for d in xrange(0, nd): R = _class_recs[_image_ids[d]] bb = _BB[d, :].astype(float) ovmax = -np.inf @@ -177,22 +173,194 @@ def apc_evalv2(detpath, if ovmax > ovthresh: if not R['det'][jmax]: - tp += 1. + tp += 1 R['det'][jmax] = 1 else: - print (classname, thresh) - fp += 1. + fp += 1 else: - fp += 1. - # go down _class_recs and mark TNs + fp += 1 + # if untargeted objects are wrongly accepted + R_neg = _class_recs_neg[_image_ids[d]] + ovmax_neg = -np.inf + BBGT_neg = R_neg['bbox'].astype(float) + if BBGT_neg.size > 0: + ovmax_neg, _ = maxOverlaps(BBGT_neg, bb) + #TODO what if target object and untarget object are overlaped? + if ovmax_neg > ovthresh and ovmax < ovthresh: + tn -= 1 + + else: # no detection left after filtering + tp = 0 + fp = -1 + tn = nneg + # add tp, fp and tn for t-th thresh + tps[t] = tp + fps[t] = fp + tns[t] = tn + + # compute fn and recall + fns = npos - tps # number of objects not detected + if npos > 0: + rec = tps / float(npos) + else: + rec = np.ones(len(tps)) + # compute tpr and fpr per bbox + tpr = rec + fpr = [fp / np.maximum(float(fp + tn), np.finfo(np.float64).eps) if fp != -1 else 0. for (fp,tn) in zip(fps,tns)] + + # avoid divide by zero in case the first detection matches a difficult + # ground truth, prec = 1 and recall = 0 if no detection kept + prec = [tp / np.maximum(float(tp + fp), np.finfo(np.float64).eps) if fp != -1 else 1. for (tp,fp) in zip(tps,fps)] + prauc = apc_auc(rec, prec, 'pr') + rocauc = apc_auc(fpr, tpr, 'roc') + + return rec, prec, prauc, tpr, fpr, rocauc + + + + + + + + +# Deprecated performance calculation per image +def apc_evalv2_bak(detpath, + annopath, + imagesetfile, + classname, + cachedir, + ovthresh=0.5): + """rec, prec, ap = acp_evalv2(detpath, + annopath, + imagesetfile, + classname, + [ovthresh]) + + Top level function that does a PASCAL VOC like evaluation per bbox. + + detpath: Path to detections + detpath.format(classname) should produce the detection results file. + annopath: Path to annotations + annopath.format(imagename) should be the xml annotations file. + imagesetfile: Text file containing the list of images, one image per line. + classname: Category name (duh) + cachedir: Directory for caching the annotations + [ovthresh]: Overlap threshold (default = 0.5) + """ + # assumes detections are in detpath.format(classname) + # assumes annotations are in annopath.format(imagename) + # assumes imagesetfile is a text file with each line an image name + # cachedir caches the annotations in a pickle file + + # read list of images + with open(imagesetfile, 'r') as f: + lines = f.readlines() + imagenames = [x.strip() for x in lines] + + # load annots + recs = {} + for i, imagename in enumerate(imagenames): + recs[imagename] = parse_rec(annopath.format(imagename)) + if i % 100 == 0: + print 'Reading annotation for {:d}/{:d}'.format( + i + 1, len(imagenames)) + + + ### Calculate rec, prec, and auc for pr curve fn by comparing BBGT amd BB detected per bbox + # extract gt objects for this class + class_recs = {} + npos = 0 + negative_gt = [] # negative ground truth per image + for imagename in imagenames: + R = [obj for obj in recs[imagename] if obj['name'] == classname] + if R == []: + negative_gt.append(imagename) + bbox = np.array([x['bbox'] for x in R]) + npos = npos + len(bbox) + det = [False] * len(R) + class_recs[imagename] = {'bbox': bbox, + 'det': det} + + # read dets + detfile = detpath.format(classname) + with open(detfile, 'r') as f: + lines = f.readlines() - else: # no detection remained + splitlines = [x.strip().split(' ') for x in lines] + # Filenames are split on ' '. In the image filenames there is already a space, therefore use the 2nd space as split + image_ids = [x[0] for x in splitlines] + confidence = np.array([float(x[1]) for x in splitlines]) + BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) + + # filter by confidence + thresh_range = np.arange(0.,1.,0.05) + tps = np.zeros(len(thresh_range)) + fps = np.zeros(len(thresh_range)) + tns = np.zeros(len(thresh_range)) + tps_im = np.zeros(len(thresh_range)) + fps_im = np.zeros(len(thresh_range)) + fns_im = np.zeros(len(thresh_range)) + for t, thresh in enumerate(thresh_range): + keep_ind = np.where(confidence > thresh)[0] + if len(keep_ind) > 0: + _BB = BB[keep_ind, :] + _image_ids = [image_ids[x] for x in keep_ind] + _class_recs = copy.deepcopy(class_recs) + + # find true negative predictions and count tn + negative_preds = set(imagenames) - set(_image_ids) # negative predictions per image + true_negatives = set(negative_gt).intersection(negative_preds) + tn = len(true_negatives) + + # go down dets and mark TPs and FPs per bbox and tp_im, fp_im per image + #nd = len(_image_ids) + tp = 0. + fp = 0. + tp_im = 0. + fp_im = 0. + fn_im = 0. + positive_imgs = set(_image_ids) + for image_id in positive_imgs: + ds = np.where(np.array(_image_ids)==image_id)[0] + for d in ds: + R = _class_recs[_image_ids[d]] + bb = _BB[d, :].astype(float) + ovmax = -np.inf + BBGT = R['bbox'].astype(float) + + if BBGT.size > 0: + ovmax, jmax = maxOverlaps(BBGT, bb) + + if ovmax > ovthresh: + if not R['det'][jmax]: + tp += 1. + R['det'][jmax] = 1 + else: + fp += 1. + else: + fp += 1. + # tp_im is number of correctly detected images + det = _class_recs[image_id]['det'] + if len(ds) > sum(det): + fp_im += 1. + elif len(ds) == sum(det) == len(det): + tp_im += 1. + elif sum(det) < len(det): + fn_im += 1. + + else: # no detection left after filtering tp = 0. fp = -1. - # compute tp and fp for t-th thresh + tp_im = 0. + fp_im = -1. + # add tp, fp and tn for t-th thresh tps[t] = tp fps[t] = fp tns[t] = tn + # add tp_im, fp_im, tn_im and fn_im for t-th thresh + tps_im[t] = tp_im + fps_im[t] = fp_im + fns_im[t] = fn_im # compute fn and precision recall fns = npos - tps @@ -200,7 +368,7 @@ def apc_evalv2(detpath, rec = tps / float(npos) else: rec = np.ones(len(tps)) - # compute tpr and fpr + # compute tpr and fpr per bbox tpr = rec fpr = [fp / np.maximum(float(fp + tn), np.finfo(np.float64).eps) if fp != -1 else 0. for (fp,tn) in zip(fps,tns)] @@ -210,14 +378,21 @@ def apc_evalv2(detpath, prauc = apc_auc(rec, prec, 'pr') rocauc = apc_auc(fpr, tpr, 'roc') + # comput tpr and fpr per image + tpr_im = [ tp_im / np.maximum(float(tp_im + fp_im), np.finfo(np.float64).eps) if fp_im != -1 else 0. for (tp_im,fp_im) in zip(tps_im,fps_im)] + fpr_im = [fp_im / np.maximum(float(fp_im + tn), np.finfo(np.float64).eps) if fp_im != -1 else 1. for (fp_im,tn) in zip(fps_im,tns)] + rocauc_im = apc_auc(fpr_im, tpr_im, 'roc') + + + return rec, prec, prauc, tpr_im, fpr_im, rocauc_im + - return rec, prec, prauc, tpr, fpr, rocauc # Deprecated tn measures # Problem: easily more 1000 true negatives -def computeTN(detpath, +def computeTN_bak(detpath, annopath, imagesetfile, classname, diff --git a/lib/fast_rcnn/test.py b/lib/fast_rcnn/test.py index 5f0f86c68..dbdde9528 100644 --- a/lib/fast_rcnn/test.py +++ b/lib/fast_rcnn/test.py @@ -284,8 +284,8 @@ def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False): # start from j = 1 to preclude the background class - # background class included here to calculate true negative - for j in range(0, imdb.num_classes): + # if background class is included here, it is for calculating true negative + for j in range(1, imdb.num_classes): ## prefix cls_ stands for class cls_scores = scores[:, j] cls_boxes = boxes[:, j*4:(j+1)*4] # each class has 4 columns, select the correct columns @@ -312,7 +312,7 @@ def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False): # Apply threshold on the proposals image_scores = np.hstack([all_boxes[i][j][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > 0: - for j in xrange(0, imdb.num_classes): #TODO here we include class '__background__' + for j in xrange(1, imdb.num_classes): #TODO here we include class '__background__' keep = np.where(all_boxes[i][j][:, -1] >= thresh)[0] #TODO score threshhold for each class all_boxes[i][j] = all_boxes[i][j][keep, :] diff --git a/tools/evaluate_AUC.py b/tools/evaluate_AUC.py index ba5a6844c..530b27bbb 100755 --- a/tools/evaluate_AUC.py +++ b/tools/evaluate_AUC.py @@ -11,7 +11,7 @@ import _init_paths from fast_rcnn.test import test_net -from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list +from fast_rcnn.config import cfg, cfg_from_file, cfg_from_list, get_output_dir from datasets.factory import get_imdb from test_iters import make_figure import caffe @@ -22,6 +22,10 @@ import matplotlib.pyplot as plt import math from sklearn.metrics import auc +from classes import inverse_classes, inverse_classes_short +import cPickle +import getpass +USER = getpass.getuser() def parse_args(): """ @@ -66,9 +70,6 @@ def parse_args(): args = parser.parse_args() return args -def AUC_Calculator(classes, tprss, fprss): - pass - def ROC_Vis(classes, tprss, fprss): tprss = np.array(tprss) fprss = np.array(fprss) @@ -76,21 +77,30 @@ def ROC_Vis(classes, tprss, fprss): num_figs = len(classes) - 1 num_horizontal = 5 num_vertical = int(math.ceil(num_figs/float(num_horizontal))) - fig, axs = plt.subplots(num_vertical, num_horizontal, figsize=(15, 6), facecolor='w', edgecolor='k') + fig, axs = plt.subplots(num_vertical, num_horizontal, figsize=(15, 24), facecolor='w', edgecolor='k') fig.subplots_adjust(hspace = .5, wspace=.5) + optimal_idc, optimal_threshs = determine_threshold(classes, fprss, tprss, mode='roc') axs = axs.ravel() for i in xrange(0,num_figs): fpr = np.concatenate(([1.], fprss[i,:], [0.])) tpr = np.concatenate(([1.], tprss[i,:], [0.])) - axs[i].plot(fpr, tpr, 'bo-', linewidth=5) - axs[i].set_title(classes[i+1]) + axs[i].plot(fpr, tpr, 'b-', linewidth=3) + axs[i].set_title(inverse_classes_short[i+1]) axs[i].set_xlabel('FP rate') axs[i].set_xlim(0.,1.) axs[i].set_ylabel('TP rate') axs[i].set_ylim(0.,1.) - plt.show() - + axs[i].spines['top'].set_color('none') + axs[i].spines['right'].set_color('none') + axs[i].xaxis.set_ticks_position('bottom') + axs[i].yaxis.set_ticks_position('left') + # highlight best rec-prec pair + cls = classes[i+1] + axs[i].scatter(fprss[i, optimal_idc[cls]],tprss[i, optimal_idc[cls]], s=100, c='r', label='optimal thresh: {}'.format(optimal_threshs[cls])) + axs[i].legend(loc='lower right', numpoints=1, fontsize=8) + + plt.savefig('/home/{}/workspace/ROC.png'.format(USER)) else: print 'Error: Number of classses {} are not consistent with number of classes in tp array and fp arrays {}'.format(len(classes)-1,tprss.shape[0]) @@ -101,26 +111,89 @@ def PR_Vis(classes, recss, precss): num_figs = len(classes) - 1 num_horizontal = 5 num_vertical = int(math.ceil(num_figs/float(num_horizontal))) - fig, axs = plt.subplots(num_vertical, num_horizontal, figsize=(15, 6), facecolor='w', edgecolor='k') - fig.subplots_adjust(hspace = .5, wspace=.5) + fig, axs = plt.subplots(num_vertical, num_horizontal, figsize=(15, 24), facecolor='w', edgecolor='k') + fig.subplots_adjust(hspace = .5, wspace = .5) + optimal_idc, optimal_threshs = determine_threshold(classes, recss, precss, mode='pr') - axs = axs.ravel() + axs = axs.ravel() for i in xrange(0,num_figs): rec = np.concatenate(([1.], recss[i,:], [0.])) prec = np.concatenate(([0.], precss[i,:], [1.])) - axs[i].plot(rec, prec, 'b-', linewidth=5) - axs[i].set_title(classes[i+1]) + axs[i].plot(rec, prec, 'b-', linewidth=3) + axs[i].set_title(inverse_classes_short[i+1]) axs[i].set_xlabel('Recall') axs[i].set_xlim(0.,1.) axs[i].set_ylabel('Precision') axs[i].set_ylim(0.,1.) - plt.show() - + axs[i].spines['top'].set_color('none') + axs[i].spines['right'].set_color('none') + axs[i].xaxis.set_ticks_position('bottom') + axs[i].yaxis.set_ticks_position('left') + # highlight best rec-prec pair + cls = classes[i+1] + ot = axs[i].scatter(recss[i, optimal_idc[cls]],precss[i, optimal_idc[cls]], s=100, c='r', label='optimal thresh: {}'.format(optimal_threshs[cls])) + axs[i].legend(loc='lower right', numpoints=1, fontsize=8) + plt.savefig('/home/{}/workspace/PR.png'.format(USER)) else: print 'Error: Number of classses {} are not consistent with number of classes in recall array and precision arrays {}'.format(len(classes)-1,recss.shape[0]) +def determine_threshold(classes, xss, yss, mode='pr'): + xss = np.array(xss) + yss = np.array(yss) + thresholds = np.arange(0.,1., 0.05) + idc = {} + optimal_threshs = {} + if mode == 'pr': + "criterion: maximum f1_score" + scores = 2 * (yss * xss) / \ + np.maximum((yss + xss), np.finfo(np.float64).eps) + + else: + "criterion: maximum Youden index" + scores = yss - xss + # pick median if there are multiple thresholds + for i in xrange(0, len(classes)-1): + cls = classes[i+1] + if np.sum(scores[i,:]) == 0: + idx = 0. + idc[cls] = idx + optimal_threshs[cls] = thresholds[idx] + else: + #print (mode, cls, scores[i,:]) + thresh_ids = np.where(scores[i,:] == np.max(scores[i,:])) + idx = thresh_ids[0][len(thresh_ids[0]) / 2] + idc[cls] = idx + optimal_threshs[cls] = thresholds[idx] + print '{} Threshold for class {} is {}'.format(mode, cls, thresholds[idx]) + return idc, optimal_threshs + +def readCache(pkl_dir = 'output'): + performance = {} + performance['aps'] = [] + performance['recs'] = [] + performance['precs'] = [] + performance['prauc'] = [] + performance['tprs'] = [] + performance['fprs'] = [] + performance['rocauc'] = [] + classes = ['__background__'] + for i,cls in inverse_classes.items(): + classes.append(cls) + with open (os.path.join(pkl_dir, cls + '_pr.pkl'), 'rb') as f: + obj = cPickle.load(f) + performance['aps']= [obj['ap']] + performance['tprs'] += [obj['tpr']] + performance['fprs'] += [obj['fpr']] + performance['rocauc'] += [obj['rocauc']] + performance['recs'] += [obj['rec']] + performance['precs'] += [obj['prec']] + performance['prauc'] += [obj['prauc']] + return (classes, performance) + + if __name__ == '__main__': + DEBUG = 0 args = parse_args() print('Called with args:') @@ -144,7 +217,7 @@ def PR_Vis(classes, recss, precss): caffe.set_device(args.gpu_id) print "TEST NET creation parameters:" print "prototxt: " - print args.prototxt +# print args.prototxt imdb = get_imdb(args.imdb_name) print "IMDB: " @@ -154,19 +227,25 @@ def PR_Vis(classes, recss, precss): imdb.set_proposal_method(cfg.TEST.PROPOSAL_METHOD) print "Model path:" - print args.caffemodel +# print args.caffemodel # do one detection and save the detections.pkl net = caffe.Net(args.prototxt, args.caffemodel, caffe.TEST) net.name = os.path.splitext(os.path.basename(args.caffemodel))[0] - (classes, performance) = test_net(net, imdb, max_per_image=args.max_per_image, thresh=0.) + + output_dir = get_output_dir(imdb, net) + if DEBUG: + (classes, performance) = readCache(output_dir) + else: + (classes, performance) = test_net(net, imdb, max_per_image=args.max_per_image, thresh=0.) recss = performance['recs'] precss = performance['precs'] tprss = performance['tprs'] fprss = performance['fprs'] + ROC_Vis(classes, tprss, fprss) PR_Vis(classes, recss, precss)