Performance Evaluation: Improve true negatives calculation for ROC curve

Delft-APC · May 4, 2016 · 60e6ef9 · 60e6ef9
1 parent b0f7aaf
commit 60e6ef9
Show file tree

Hide file tree

Showing 4 changed files with 326 additions and 72 deletions.
diff --git a/lib/datasets/apc.py b/lib/datasets/apc.py
@@ -320,10 +320,10 @@ def _do_python_eval(self, output_dir = 'output'):
 			performance['precs'] += [prec]
 			performance['prauc'] += [prauc]
 
-
-			print('AP for {} = {:.4f}'.format(cls, ap))
-			print('PR curve AUC for {:s} = {}'.format(cls, prauc))
-			print('ROC curve AUC for {:s} = {}'.format(cls, rocauc))
+			if ap != 0: #TODO For DEBUG, remove this
+				print('AP for {} = {:.4f}'.format(cls, ap))
+				print('PR curve AUC for {:s} = {}'.format(cls, prauc))
+				print('ROC curve AUC for {:s} = {}'.format(cls, rocauc))
 			with open(os.path.join(output_dir, cls + '_pr.pkl'), 'w') as f:
 				cPickle.dump({'ap': ap, 'rec': rec, 'prec': prec, 'prauc': prauc, \
 					'tpr': tpr, 'fpr': fpr, 'rocauc': rocauc}, f)

diff --git a/lib/datasets/apc_evalv2.py b/lib/datasets/apc_evalv2.py
@@ -11,6 +11,9 @@
 import copy
 from sklearn.metrics import auc
 
+# 20 thresholds varying from 0 to 0.95 with interval 0.05
+thresh_range = np.arange(0.,1.,0.05)
+
 def parse_rec(filename):
     """ Parse a DR APC xml file """
     tree = ET.parse(filename)
@@ -46,7 +49,7 @@ def apc_auc(x, y, curve='pr'):
 
 
 def maxOverlaps(BBGT, bb):
-	# compute overlaps
+	"""compute max overlaps between detected bb and BBGTs"""
 	# intersection
 	ixmin = np.maximum(BBGT[:, 0], bb[0])
 	iymin = np.maximum(BBGT[:, 1], bb[1])
@@ -73,16 +76,14 @@ def apc_evalv2(detpath,
 		imagesetfile,
 		classname,
 		cachedir,
-		ovthresh=0.5,
-		use_07_metric=False):
-	"""rec, prec, ap = acp_eval(detpath,
+		ovthresh=0.5):
+	"""rec, prec, ap = acp_evalv2(detpath,
 								annopath,
 								imagesetfile,
 								classname,
-								[ovthresh],
-								[use_07_metric])
+								[ovthresh])
 
-	Top level function that does the PASCAL VOC evaluation.
+	Top level function that does a PASCAL VOC like evaluation per bbox.
 
 	detpath: Path to detections
 		detpath.format(classname) should produce the detection results file.
@@ -92,8 +93,6 @@ def apc_evalv2(detpath,
 	classname: Category name (duh)
 	cachedir: Directory for caching the annotations
 	[ovthresh]: Overlap threshold (default = 0.5)
-	[use_07_metric]: Whether to use VOC07's 11 point AP computation
-		(default False)
 	"""
 	# assumes detections are in detpath.format(classname)
 	# assumes annotations are in annopath.format(imagename)
@@ -116,20 +115,25 @@ def apc_evalv2(detpath,
 
 	### Calculate rec, prec, and auc for pr curve fn by comparing BBGT amd BB detected per bbox
 	# extract gt objects for this class
-	class_recs = {}
-	npos = 0
-	negative_gt = []
+	class_recs = {}			# ground truth for target class
+	class_recs_neg = {}		# ground truth for the other classes (negatives)
+	npos = 0				# number of positive objects to be detected
+	nneg = 0				# number of negative objects to be rejected
 	for imagename in imagenames:
+		# read ground truth for current class
 		R = [obj for obj in recs[imagename] if obj['name'] == classname]
-		if R  == []:
-			negative_gt.append(imagename)
 		bbox = np.array([x['bbox'] for x in R])
 		npos = npos + len(bbox)
 		det = [False] * len(R)
 		class_recs[imagename] = {'bbox': bbox,
 					'det': det}
+		# read ground truth for the other classes
+		R_neg = [obj for obj in recs[imagename] if obj['name'] != classname]
+		bbox_neg = np.array([x['bbox'] for x in R_neg])
+		nneg = nneg + len(bbox_neg)
+		class_recs_neg[imagename] = {'bbox': bbox_neg}
 
-	# read dets
+	# read detections
 	detfile = detpath.format(classname)
 	with open(detfile, 'r') as f:
 		lines = f.readlines()
@@ -140,33 +144,25 @@ def apc_evalv2(detpath,
 	confidence = np.array([float(x[1]) for x in splitlines])
 	BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
 
-	# sort by confidence
-#	sorted_ind = np.argsort(-confidence)
-#	sorted_scores = np.sort(-confidence)
-#	BB = BB[sorted_ind, :]
-#	image_ids = [image_ids[x] for x in sorted_ind]
-
-	# filter by confidence
-	threshs = np.arange(0.,1.,0.05)
-	tps = np.zeros(len(threshs))
-	fps = np.zeros(len(threshs))
-	tns = np.zeros(len(threshs))
-	for t, thresh in enumerate(threshs):
+	# compute for each threshold from 0 to 0.95 with interval 0.05
+	tps = np.zeros(len(thresh_range))		# correct detections
+	fps = np.zeros(len(thresh_range))		# target objects are wrongly rejected
+	tns = np.zeros(len(thresh_range))		# untarget objects are correctly rejected
+	for t, thresh in enumerate(thresh_range):
+		# filter by confidence
 		keep_ind = np.where(confidence > thresh)[0]
-		if len(keep_ind) > 0:
+		if len(keep_ind) > 0:	# if there is detections left
 			_BB = BB[keep_ind, :]
 			_image_ids = [image_ids[x] for x in keep_ind]
 			_class_recs = copy.deepcopy(class_recs)
+			_class_recs_neg = copy.deepcopy(class_recs_neg)
 
-			# find true negative predictions and count tn
-			negative_preds = set(imagenames) - set(_image_ids)
-			true_negatives = set(negative_gt).intersection(negative_preds)
-			tn = len(true_negatives)
-			# go down dets and mark TPs and FPs
+			# go down dets and mark TPs, FPs and TNs per bbox
 			nd = len(_image_ids)
-			tp = 0.
-			fp = 0.
-			for d in range(nd):
+			tn = nneg
+			tp = 0
+			fp = 0
+			for d in xrange(0, nd):
 				R = _class_recs[_image_ids[d]]
 				bb = _BB[d, :].astype(float)
 				ovmax = -np.inf
@@ -177,30 +173,202 @@ def apc_evalv2(detpath,
 
 				if ovmax > ovthresh:
 					if not R['det'][jmax]:
-						tp += 1.
+						tp += 1
 						R['det'][jmax] = 1
 					else:
-						print (classname, thresh)
-						fp += 1.
+						fp += 1
 				else:
-					fp += 1.
-			# go down _class_recs and mark TNs
+					fp += 1
+				# if untargeted objects are wrongly accepted
+				R_neg = _class_recs_neg[_image_ids[d]]
+				ovmax_neg = -np.inf
+				BBGT_neg = R_neg['bbox'].astype(float)
+				if BBGT_neg.size > 0:
+					ovmax_neg, _ = maxOverlaps(BBGT_neg, bb)
+				#TODO what if target object and untarget object are overlaped?
+				if ovmax_neg > ovthresh and ovmax < ovthresh:
+					tn -= 1
+
+		else: # no detection left after filtering
+			tp = 0
+			fp = -1
+			tn = nneg
+		# add tp, fp and tn for t-th thresh
+		tps[t] = tp
+		fps[t] = fp
+		tns[t] = tn
+
+	# compute fn and recall
+	fns = npos - tps		# number of objects not detected
+	if npos > 0:
+		rec = tps / float(npos)
+	else:
+		rec = np.ones(len(tps))
+	# compute tpr and fpr per bbox
+	tpr = rec
+	fpr = [fp / np.maximum(float(fp + tn), np.finfo(np.float64).eps) if fp != -1 else 0. for (fp,tn) in zip(fps,tns)]
+
+	# avoid divide by zero in case the first detection matches a difficult
+	# ground truth, prec = 1 and recall = 0 if no detection kept
+	prec = [tp / np.maximum(float(tp + fp), np.finfo(np.float64).eps) if fp != -1 else 1. for (tp,fp) in zip(tps,fps)]
+	prauc = apc_auc(rec, prec, 'pr')
+	rocauc = apc_auc(fpr, tpr, 'roc')
+
+	return rec, prec, prauc, tpr, fpr, rocauc
+
+
+
+
+
+
+
+
+# Deprecated performance calculation per image
+def apc_evalv2_bak(detpath,
+		annopath,
+		imagesetfile,
+		classname,
+		cachedir,
+		ovthresh=0.5):
+	"""rec, prec, ap = acp_evalv2(detpath,
+								annopath,
+								imagesetfile,
+								classname,
+								[ovthresh])
+
+	Top level function that does a PASCAL VOC like evaluation per bbox.
+
+	detpath: Path to detections
+		detpath.format(classname) should produce the detection results file.
+	annopath: Path to annotations
+		annopath.format(imagename) should be the xml annotations file.
+	imagesetfile: Text file containing the list of images, one image per line.
+	classname: Category name (duh)
+	cachedir: Directory for caching the annotations
+	[ovthresh]: Overlap threshold (default = 0.5)
+	"""
+	# assumes detections are in detpath.format(classname)
+	# assumes annotations are in annopath.format(imagename)
+	# assumes imagesetfile is a text file with each line an image name
+	# cachedir caches the annotations in a pickle file
+
+	# read list of images
+	with open(imagesetfile, 'r') as f:
+		lines = f.readlines()
+	imagenames = [x.strip() for x in lines]
+
+	# load annots
+	recs = {}
+	for i, imagename in enumerate(imagenames):
+		recs[imagename] = parse_rec(annopath.format(imagename))
+		if i % 100 == 0:
+			print 'Reading annotation for {:d}/{:d}'.format(
+					i + 1, len(imagenames))
+
+
+	### Calculate rec, prec, and auc for pr curve fn by comparing BBGT amd BB detected per bbox
+	# extract gt objects for this class
+	class_recs = {}
+	npos = 0
+	negative_gt = [] # negative ground truth per image
+	for imagename in imagenames:
+		R = [obj for obj in recs[imagename] if obj['name'] == classname]
+		if R  == []:
+			negative_gt.append(imagename)
+		bbox = np.array([x['bbox'] for x in R])
+		npos = npos + len(bbox)
+		det = [False] * len(R)
+		class_recs[imagename] = {'bbox': bbox,
+					'det': det}
+
+	# read dets
+	detfile = detpath.format(classname)
+	with open(detfile, 'r') as f:
+		lines = f.readlines()
 
-		else: # no detection remained
+	splitlines = [x.strip().split(' ') for x in lines]
+	# Filenames are split on ' '. In the image filenames there is already a space, therefore use the 2nd space as split
+	image_ids = [x[0] for x in splitlines]
+	confidence = np.array([float(x[1]) for x in splitlines])
+	BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
+
+	# filter by confidence
+	thresh_range = np.arange(0.,1.,0.05)
+	tps = np.zeros(len(thresh_range))
+	fps = np.zeros(len(thresh_range))
+	tns = np.zeros(len(thresh_range))
+	tps_im = np.zeros(len(thresh_range))
+	fps_im = np.zeros(len(thresh_range))
+	fns_im = np.zeros(len(thresh_range))
+	for t, thresh in enumerate(thresh_range):
+		keep_ind = np.where(confidence > thresh)[0]
+		if len(keep_ind) > 0:
+			_BB = BB[keep_ind, :]
+			_image_ids = [image_ids[x] for x in keep_ind]
+			_class_recs = copy.deepcopy(class_recs)
+
+			# find true negative predictions and count tn
+			negative_preds = set(imagenames) - set(_image_ids) # negative predictions per image
+			true_negatives = set(negative_gt).intersection(negative_preds)
+			tn = len(true_negatives)
+
+			# go down dets and mark TPs and FPs per bbox and tp_im, fp_im per image
+			#nd = len(_image_ids)
+			tp = 0.
+			fp = 0.
+			tp_im = 0.
+			fp_im = 0.
+			fn_im = 0.
+			positive_imgs = set(_image_ids)
+			for image_id in positive_imgs:
+				ds = np.where(np.array(_image_ids)==image_id)[0]
+				for d in ds:
+					R = _class_recs[_image_ids[d]]
+					bb = _BB[d, :].astype(float)
+					ovmax = -np.inf
+					BBGT = R['bbox'].astype(float)
+
+					if BBGT.size > 0:
+						ovmax, jmax = maxOverlaps(BBGT, bb)
+
+					if ovmax > ovthresh:
+						if not R['det'][jmax]:
+							tp += 1.
+							R['det'][jmax] = 1
+						else:
+							fp += 1.
+					else:
+						fp += 1.
+				# tp_im is number of correctly detected images
+				det = _class_recs[image_id]['det']
+				if len(ds) > sum(det):
+					fp_im += 1.
+				elif len(ds) == sum(det) == len(det):
+					tp_im += 1.
+				elif sum(det) < len(det):
+					fn_im += 1.
+
+		else: # no detection left after filtering
 			tp = 0.
 			fp = -1.
-		# compute tp and fp for t-th thresh
+			tp_im = 0.
+			fp_im = -1.
+		# add tp, fp and tn for t-th thresh
 		tps[t] = tp
 		fps[t] = fp
 		tns[t] = tn
+		# add tp_im, fp_im, tn_im and fn_im for t-th thresh
+		tps_im[t] = tp_im
+		fps_im[t] = fp_im
+		fns_im[t] = fn_im
 
 	# compute fn and precision recall
 	fns = npos - tps
 	if npos > 0:
 		rec = tps / float(npos)
 	else:
 		rec = np.ones(len(tps))
-	# compute tpr and fpr
+	# compute tpr and fpr per bbox
 	tpr = rec
 	fpr = [fp / np.maximum(float(fp + tn), np.finfo(np.float64).eps) if fp != -1 else 0. for (fp,tn) in zip(fps,tns)]
 
@@ -210,14 +378,21 @@ def apc_evalv2(detpath,
 	prauc = apc_auc(rec, prec, 'pr')
 	rocauc = apc_auc(fpr, tpr, 'roc')
 
+	# comput tpr and fpr per image
+	tpr_im = [ tp_im / np.maximum(float(tp_im + fp_im), np.finfo(np.float64).eps) if fp_im != -1 else 0. for (tp_im,fp_im) in zip(tps_im,fps_im)]
+	fpr_im = [fp_im / np.maximum(float(fp_im + tn), np.finfo(np.float64).eps) if fp_im != -1 else 1. for (fp_im,tn) in zip(fps_im,tns)]
+	rocauc_im = apc_auc(fpr_im, tpr_im, 'roc')
+
+
+	return rec, prec, prauc, tpr_im, fpr_im, rocauc_im
+
 
-	return rec, prec, prauc, tpr, fpr, rocauc
 
 
 
 # Deprecated tn measures
 # Problem: easily more 1000 true negatives
-def computeTN(detpath,
+def computeTN_bak(detpath,
 		annopath,
 		imagesetfile,
 		classname,

diff --git a/lib/fast_rcnn/test.py b/lib/fast_rcnn/test.py
@@ -284,8 +284,8 @@ def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False):
 
 
 		# start from j = 1 to preclude the background class
-		# background class included here to calculate true negative
-		for j in range(0, imdb.num_classes):
+		# if background class is included here, it is for calculating true negative
+		for j in range(1, imdb.num_classes):
 			## prefix cls_ stands for class
 			cls_scores = scores[:, j]
 			cls_boxes = boxes[:, j*4:(j+1)*4] # each class has 4 columns, select the correct columns
@@ -312,7 +312,7 @@ def test_net(net, imdb, max_per_image=100, thresh=0.05, vis=False):
 		# Apply threshold on the proposals
 		image_scores = np.hstack([all_boxes[i][j][:, -1] for j in xrange(1, imdb.num_classes)])
 		if len(image_scores) > 0:
-			for j in xrange(0, imdb.num_classes):						#TODO here we include class '__background__'
+			for j in xrange(1, imdb.num_classes):						#TODO here we include class '__background__'
 				keep = np.where(all_boxes[i][j][:, -1] >= thresh)[0]	#TODO score threshhold for each class
 				all_boxes[i][j] = all_boxes[i][j][keep, :]