Skip to content

Commit

Permalink
Ability to specify extra features available to trained attacks.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 711869941
  • Loading branch information
tensorflower-gardener committed Jan 15, 2025
1 parent 94ee5f6 commit cee477c
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,14 @@ class AttackInputData:
entropy_train: Optional[np.ndarray] = None
entropy_test: Optional[np.ndarray] = None

# Extra features for the training and test sets. This may include metadata,
# additional losses, model outputs that are available to the adversary.
#
# These features will be used (in addition to logits/probabilities, losses)
# for trained attacks.
extra_features_train: Optional[np.ndarray] = None
extra_features_test: Optional[np.ndarray] = None

# If loss is not explicitly specified, this function will be used to derive
# loss from logits and labels. It can be a pre-defined `LossFunction` or its
# string representation, or a callable.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,16 @@ def create_attacker_data(attack_input_data: data_structures.AttackInputData,
Returns:
AttackerData.
"""
attack_input_train = _column_stack(attack_input_data.logits_or_probs_train,
attack_input_data.get_loss_train())
attack_input_test = _column_stack(attack_input_data.logits_or_probs_test,
attack_input_data.get_loss_test())
attack_input_train = _column_stack(
attack_input_data.logits_or_probs_train,
attack_input_data.get_loss_train(),
attack_input_data.extra_features_train,
)
attack_input_test = _column_stack(
attack_input_data.logits_or_probs_test,
attack_input_data.get_loss_test(),
attack_input_data.extra_features_test,
)

ntrain, ntest = attack_input_train.shape[0], attack_input_test.shape[0]
features_all = np.concatenate((attack_input_train, attack_input_test))
Expand Down Expand Up @@ -118,22 +124,32 @@ def _sample_multidimensional_array(array, size):
return array[indices]


def _column_stack(logits, loss):
"""Stacks logits and losses.
def _column_stack(logits, loss, extra_features):
"""Stacks logits, losses, and extra features.
In case that only one exists, returns that one.
Args:
logits: logits array
loss: loss array
extra_features: extra features array
Returns:
stacked logits and losses (or only one if both do not exist).
stacked logits, losses, and extra features (of any that exists).
"""
if logits is None:
return np.expand_dims(loss, axis=-1)
if loss is None:
return logits
return np.column_stack((logits, loss))
columns = []
if logits is not None:
columns.append(logits)
if loss is not None:
if len(loss.shape) == 1:
loss = np.expand_dims(loss, axis=-1)
columns.append(loss)
if extra_features is not None:
columns.append(extra_features)

if not columns:
raise ValueError('logits, loss, and extra_features cannot all be None.')
if len(columns) == 1:
return columns[0]
return np.column_stack(columns)


class TrainedAttacker(object):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,16 @@ def test_multilabel_create_attacker_data_loss_and_logits(self):
attack_input.is_multilabel_data(),
msg='Expected multilabel check to pass.')

def test_create_attacker_data_loss_and_extra_features(self):
attack_input = AttackInputData(
loss_train=np.array([1, 3]),
loss_test=np.array([2, 4]),
extra_features_train=np.array([[2, 3], [4, 5]]),
extra_features_test=np.array([[3, 4], [5, 6]]),
)
attacker_data = models.create_attacker_data(attack_input, 2)
self.assertSequenceEqual(attacker_data.features_all.shape, [4, 3])

def test_multilabel_create_attacker_data_logits_labels_sample_weights(self):
attack_input = AttackInputData(
logits_train=np.array([[1, 2], [5, 6], [8, 9]]),
Expand Down

0 comments on commit cee477c

Please sign in to comment.