Skip to content

Commit

Permalink
Performance Evaluation: Improve true negatives calculation for ROC curve
Browse files Browse the repository at this point in the history
  • Loading branch information
JihongJu committed May 4, 2016
1 parent b0f7aaf commit 60e6ef9
Show file tree
Hide file tree
Showing 4 changed files with 326 additions and 72 deletions.
8 changes: 4 additions & 4 deletions lib/datasets/apc.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,10 +320,10 @@ def _do_python_eval(self, output_dir = 'output'):
performance['precs'] += [prec]
performance['prauc'] += [prauc]


print('AP for {} = {:.4f}'.format(cls, ap))
print('PR curve AUC for {:s} = {}'.format(cls, prauc))
print('ROC curve AUC for {:s} = {}'.format(cls, rocauc))
if ap != 0: #TODO For DEBUG, remove this
print('AP for {} = {:.4f}'.format(cls, ap))
print('PR curve AUC for {:s} = {}'.format(cls, prauc))
print('ROC curve AUC for {:s} = {}'.format(cls, rocauc))
with open(os.path.join(output_dir, cls + '_pr.pkl'), 'w') as f:
cPickle.dump({'ap': ap, 'rec': rec, 'prec': prec, 'prauc': prauc, \
'tpr': tpr, 'fpr': fpr, 'rocauc': rocauc}, f)
Expand Down
267 changes: 221 additions & 46 deletions lib/datasets/apc_evalv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
import copy
from sklearn.metrics import auc

# 20 thresholds varying from 0 to 0.95 with interval 0.05
thresh_range = np.arange(0.,1.,0.05)

def parse_rec(filename):
""" Parse a DR APC xml file """
tree = ET.parse(filename)
Expand Down Expand Up @@ -46,7 +49,7 @@ def apc_auc(x, y, curve='pr'):


def maxOverlaps(BBGT, bb):
# compute overlaps
"""compute max overlaps between detected bb and BBGTs"""
# intersection
ixmin = np.maximum(BBGT[:, 0], bb[0])
iymin = np.maximum(BBGT[:, 1], bb[1])
Expand All @@ -73,16 +76,14 @@ def apc_evalv2(detpath,
imagesetfile,
classname,
cachedir,
ovthresh=0.5,
use_07_metric=False):
"""rec, prec, ap = acp_eval(detpath,
ovthresh=0.5):
"""rec, prec, ap = acp_evalv2(detpath,
annopath,
imagesetfile,
classname,
[ovthresh],
[use_07_metric])
[ovthresh])
Top level function that does the PASCAL VOC evaluation.
Top level function that does a PASCAL VOC like evaluation per bbox.
detpath: Path to detections
detpath.format(classname) should produce the detection results file.
Expand All @@ -92,8 +93,6 @@ def apc_evalv2(detpath,
classname: Category name (duh)
cachedir: Directory for caching the annotations
[ovthresh]: Overlap threshold (default = 0.5)
[use_07_metric]: Whether to use VOC07's 11 point AP computation
(default False)
"""
# assumes detections are in detpath.format(classname)
# assumes annotations are in annopath.format(imagename)
Expand All @@ -116,20 +115,25 @@ def apc_evalv2(detpath,

### Calculate rec, prec, and auc for pr curve fn by comparing BBGT amd BB detected per bbox
# extract gt objects for this class
class_recs = {}
npos = 0
negative_gt = []
class_recs = {} # ground truth for target class
class_recs_neg = {} # ground truth for the other classes (negatives)
npos = 0 # number of positive objects to be detected
nneg = 0 # number of negative objects to be rejected
for imagename in imagenames:
# read ground truth for current class
R = [obj for obj in recs[imagename] if obj['name'] == classname]
if R == []:
negative_gt.append(imagename)
bbox = np.array([x['bbox'] for x in R])
npos = npos + len(bbox)
det = [False] * len(R)
class_recs[imagename] = {'bbox': bbox,
'det': det}
# read ground truth for the other classes
R_neg = [obj for obj in recs[imagename] if obj['name'] != classname]
bbox_neg = np.array([x['bbox'] for x in R_neg])
nneg = nneg + len(bbox_neg)
class_recs_neg[imagename] = {'bbox': bbox_neg}

# read dets
# read detections
detfile = detpath.format(classname)
with open(detfile, 'r') as f:
lines = f.readlines()
Expand All @@ -140,33 +144,25 @@ def apc_evalv2(detpath,
confidence = np.array([float(x[1]) for x in splitlines])
BB = np.array([[float(z) for z in x[2:]] for x in splitlines])

# sort by confidence
# sorted_ind = np.argsort(-confidence)
# sorted_scores = np.sort(-confidence)
# BB = BB[sorted_ind, :]
# image_ids = [image_ids[x] for x in sorted_ind]

# filter by confidence
threshs = np.arange(0.,1.,0.05)
tps = np.zeros(len(threshs))
fps = np.zeros(len(threshs))
tns = np.zeros(len(threshs))
for t, thresh in enumerate(threshs):
# compute for each threshold from 0 to 0.95 with interval 0.05
tps = np.zeros(len(thresh_range)) # correct detections
fps = np.zeros(len(thresh_range)) # target objects are wrongly rejected
tns = np.zeros(len(thresh_range)) # untarget objects are correctly rejected
for t, thresh in enumerate(thresh_range):
# filter by confidence
keep_ind = np.where(confidence > thresh)[0]
if len(keep_ind) > 0:
if len(keep_ind) > 0: # if there is detections left
_BB = BB[keep_ind, :]
_image_ids = [image_ids[x] for x in keep_ind]
_class_recs = copy.deepcopy(class_recs)
_class_recs_neg = copy.deepcopy(class_recs_neg)

# find true negative predictions and count tn
negative_preds = set(imagenames) - set(_image_ids)
true_negatives = set(negative_gt).intersection(negative_preds)
tn = len(true_negatives)
# go down dets and mark TPs and FPs
# go down dets and mark TPs, FPs and TNs per bbox
nd = len(_image_ids)
tp = 0.
fp = 0.
for d in range(nd):
tn = nneg
tp = 0
fp = 0
for d in xrange(0, nd):
R = _class_recs[_image_ids[d]]
bb = _BB[d, :].astype(float)
ovmax = -np.inf
Expand All @@ -177,30 +173,202 @@ def apc_evalv2(detpath,

if ovmax > ovthresh:
if not R['det'][jmax]:
tp += 1.
tp += 1
R['det'][jmax] = 1
else:
print (classname, thresh)
fp += 1.
fp += 1
else:
fp += 1.
# go down _class_recs and mark TNs
fp += 1
# if untargeted objects are wrongly accepted
R_neg = _class_recs_neg[_image_ids[d]]
ovmax_neg = -np.inf
BBGT_neg = R_neg['bbox'].astype(float)
if BBGT_neg.size > 0:
ovmax_neg, _ = maxOverlaps(BBGT_neg, bb)
#TODO what if target object and untarget object are overlaped?
if ovmax_neg > ovthresh and ovmax < ovthresh:
tn -= 1

else: # no detection left after filtering
tp = 0
fp = -1
tn = nneg
# add tp, fp and tn for t-th thresh
tps[t] = tp
fps[t] = fp
tns[t] = tn

# compute fn and recall
fns = npos - tps # number of objects not detected
if npos > 0:
rec = tps / float(npos)
else:
rec = np.ones(len(tps))
# compute tpr and fpr per bbox
tpr = rec
fpr = [fp / np.maximum(float(fp + tn), np.finfo(np.float64).eps) if fp != -1 else 0. for (fp,tn) in zip(fps,tns)]

# avoid divide by zero in case the first detection matches a difficult
# ground truth, prec = 1 and recall = 0 if no detection kept
prec = [tp / np.maximum(float(tp + fp), np.finfo(np.float64).eps) if fp != -1 else 1. for (tp,fp) in zip(tps,fps)]
prauc = apc_auc(rec, prec, 'pr')
rocauc = apc_auc(fpr, tpr, 'roc')

return rec, prec, prauc, tpr, fpr, rocauc








# Deprecated performance calculation per image
def apc_evalv2_bak(detpath,
annopath,
imagesetfile,
classname,
cachedir,
ovthresh=0.5):
"""rec, prec, ap = acp_evalv2(detpath,
annopath,
imagesetfile,
classname,
[ovthresh])
Top level function that does a PASCAL VOC like evaluation per bbox.
detpath: Path to detections
detpath.format(classname) should produce the detection results file.
annopath: Path to annotations
annopath.format(imagename) should be the xml annotations file.
imagesetfile: Text file containing the list of images, one image per line.
classname: Category name (duh)
cachedir: Directory for caching the annotations
[ovthresh]: Overlap threshold (default = 0.5)
"""
# assumes detections are in detpath.format(classname)
# assumes annotations are in annopath.format(imagename)
# assumes imagesetfile is a text file with each line an image name
# cachedir caches the annotations in a pickle file

# read list of images
with open(imagesetfile, 'r') as f:
lines = f.readlines()
imagenames = [x.strip() for x in lines]

# load annots
recs = {}
for i, imagename in enumerate(imagenames):
recs[imagename] = parse_rec(annopath.format(imagename))
if i % 100 == 0:
print 'Reading annotation for {:d}/{:d}'.format(
i + 1, len(imagenames))


### Calculate rec, prec, and auc for pr curve fn by comparing BBGT amd BB detected per bbox
# extract gt objects for this class
class_recs = {}
npos = 0
negative_gt = [] # negative ground truth per image
for imagename in imagenames:
R = [obj for obj in recs[imagename] if obj['name'] == classname]
if R == []:
negative_gt.append(imagename)
bbox = np.array([x['bbox'] for x in R])
npos = npos + len(bbox)
det = [False] * len(R)
class_recs[imagename] = {'bbox': bbox,
'det': det}

# read dets
detfile = detpath.format(classname)
with open(detfile, 'r') as f:
lines = f.readlines()

else: # no detection remained
splitlines = [x.strip().split(' ') for x in lines]
# Filenames are split on ' '. In the image filenames there is already a space, therefore use the 2nd space as split
image_ids = [x[0] for x in splitlines]
confidence = np.array([float(x[1]) for x in splitlines])
BB = np.array([[float(z) for z in x[2:]] for x in splitlines])

# filter by confidence
thresh_range = np.arange(0.,1.,0.05)
tps = np.zeros(len(thresh_range))
fps = np.zeros(len(thresh_range))
tns = np.zeros(len(thresh_range))
tps_im = np.zeros(len(thresh_range))
fps_im = np.zeros(len(thresh_range))
fns_im = np.zeros(len(thresh_range))
for t, thresh in enumerate(thresh_range):
keep_ind = np.where(confidence > thresh)[0]
if len(keep_ind) > 0:
_BB = BB[keep_ind, :]
_image_ids = [image_ids[x] for x in keep_ind]
_class_recs = copy.deepcopy(class_recs)

# find true negative predictions and count tn
negative_preds = set(imagenames) - set(_image_ids) # negative predictions per image
true_negatives = set(negative_gt).intersection(negative_preds)
tn = len(true_negatives)

# go down dets and mark TPs and FPs per bbox and tp_im, fp_im per image
#nd = len(_image_ids)
tp = 0.
fp = 0.
tp_im = 0.
fp_im = 0.
fn_im = 0.
positive_imgs = set(_image_ids)
for image_id in positive_imgs:
ds = np.where(np.array(_image_ids)==image_id)[0]
for d in ds:
R = _class_recs[_image_ids[d]]
bb = _BB[d, :].astype(float)
ovmax = -np.inf
BBGT = R['bbox'].astype(float)

if BBGT.size > 0:
ovmax, jmax = maxOverlaps(BBGT, bb)

if ovmax > ovthresh:
if not R['det'][jmax]:
tp += 1.
R['det'][jmax] = 1
else:
fp += 1.
else:
fp += 1.
# tp_im is number of correctly detected images
det = _class_recs[image_id]['det']
if len(ds) > sum(det):
fp_im += 1.
elif len(ds) == sum(det) == len(det):
tp_im += 1.
elif sum(det) < len(det):
fn_im += 1.

else: # no detection left after filtering
tp = 0.
fp = -1.
# compute tp and fp for t-th thresh
tp_im = 0.
fp_im = -1.
# add tp, fp and tn for t-th thresh
tps[t] = tp
fps[t] = fp
tns[t] = tn
# add tp_im, fp_im, tn_im and fn_im for t-th thresh
tps_im[t] = tp_im
fps_im[t] = fp_im
fns_im[t] = fn_im

# compute fn and precision recall
fns = npos - tps
if npos > 0:
rec = tps / float(npos)
else:
rec = np.ones(len(tps))
# compute tpr and fpr
# compute tpr and fpr per bbox
tpr = rec
fpr = [fp / np.maximum(float(fp + tn), np.finfo(np.float64).eps) if fp != -1 else 0. for (fp,tn) in zip(fps,tns)]

Expand All @@ -210,14 +378,21 @@ def apc_evalv2(detpath,
prauc = apc_auc(rec, prec, 'pr')
rocauc = apc_auc(fpr, tpr, 'roc')

# comput tpr and fpr per image
tpr_im = [ tp_im / np.maximum(float(tp_im + fp_im), np.finfo(np.float64).eps) if fp_im != -1 else 0. for (tp_im,fp_im) in zip(tps_im,fps_im)]
fpr_im = [fp_im / np.maximum(float(fp_im + tn), np.finfo(np.float64).eps) if fp_im != -1 else 1. for (fp_im,tn) in zip(fps_im,tns)]
rocauc_im = apc_auc(fpr_im, tpr_im, 'roc')


return rec, prec, prauc, tpr_im, fpr_im, rocauc_im


return rec, prec, prauc, tpr, fpr, rocauc



# Deprecated tn measures
# Problem: easily more 1000 true negatives
def computeTN(detpath,
def computeTN_bak(detpath,
annopath,
imagesetfile,
classname,
Expand Down
6 changes: 3 additions & 3 deletions lib/fast_rcnn/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,8 +284,8 @@ def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False):


# start from j = 1 to preclude the background class
# background class included here to calculate true negative
for j in range(0, imdb.num_classes):
# if background class is included here, it is for calculating true negative
for j in range(1, imdb.num_classes):
## prefix cls_ stands for class
cls_scores = scores[:, j]
cls_boxes = boxes[:, j*4:(j+1)*4] # each class has 4 columns, select the correct columns
Expand All @@ -312,7 +312,7 @@ def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False):
# Apply threshold on the proposals
image_scores = np.hstack([all_boxes[i][j][:, -1] for j in xrange(1, imdb.num_classes)])
if len(image_scores) > 0:
for j in xrange(0, imdb.num_classes): #TODO here we include class '__background__'
for j in xrange(1, imdb.num_classes): #TODO here we include class '__background__'
keep = np.where(all_boxes[i][j][:, -1] >= thresh)[0] #TODO score threshhold for each class
all_boxes[i][j] = all_boxes[i][j][keep, :]

Expand Down
Loading

0 comments on commit 60e6ef9

Please sign in to comment.