Module acd.scores.score_funcs
Expand source code
import copy
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from .cd import cd, cd_text
from ..util.conv2dnp import conv2dnp
def gradient_times_input_scores(im: np.ndarray, ind: int, model, device='cuda'):
'''
Params
------
im: np.ndarray
Image to get scores with respect to
ind: int
Which class to take gradient with respect to
'''
ind = torch.LongTensor([np.int(ind)]).to(device)
if im.grad is not None:
im.grad.data.zero_()
pred = model(im)
crit = nn.NLLLoss()
loss = crit(pred, ind)
loss.backward()
res = im.grad * im
return res.data.cpu().numpy()[0, 0]
def ig_scores_2d(model, im_torch, num_classes=10, im_size=28, sweep_dim=1, ind=None, device='cuda'):
'''Compute integrated gradients scores (2D input)
'''
for p in model.parameters():
if p.grad is not None:
p.grad.data.zero_()
# What class to produce explanations for
output = np.zeros((im_size * im_size // (sweep_dim * sweep_dim), num_classes))
if ind is None:
ind = range(num_classes)
for class_to_explain in ind:
# _, class_to_explain = model(im_torch).max(1); class_to_explain = class_to_explain.data[0]
M = 100
criterion = torch.nn.L1Loss(size_average=False)
mult_grid = np.array(range(M)) / (M - 1)
baseline = torch.zeros(im_torch.shape).to(device)
input_vecs = torch.empty((M, baseline.shape[1], baseline.shape[2], baseline.shape[3]),
dtype=torch.float32,
device=device, requires_grad=False)
'''
input_vecs = torch.Tensor(M, baseline.size(1),
baseline.size(2), baseline.size(3)).to(device)
input_vecs.requires_grad = True
'''
for i, prop in enumerate(mult_grid):
input_vecs[i].data = baseline + (prop * (im_torch.to(device) - baseline))
input_vecs.requires_grad = True
# input_vecs = input_vecs
out = F.softmax(model(input_vecs))[:, class_to_explain]
loss = criterion(out, torch.zeros(M).to(device))
loss.backward()
imps = input_vecs.grad.mean(0).data.cpu() * (im_torch.data.cpu() - baseline.cpu())
ig_scores = imps.sum(1)
# Sanity check: this should be small-ish
# print((out[-1] - out[0]).data[0] - ig_scores.sum())
scores = ig_scores.cpu().numpy().reshape((1, im_size, im_size, 1))
kernel = np.ones(shape=(sweep_dim, sweep_dim, 1, 1))
scores_convd = conv2dnp(scores, kernel, stride=(sweep_dim, sweep_dim))
output[:, class_to_explain] = scores_convd.flatten()
return output
def ig_scores_1d(batch, model, inputs, device='cuda'):
'''Compute integrated gradients scores (1D input)
'''
for p in model.parameters():
if p.grad is not None:
p.grad.data.zero_()
M = 1000
criterion = torch.nn.L1Loss(size_average=False)
mult_grid = np.array(range(M)) / (M - 1)
word_vecs = model.embed(batch.text).data
baseline_text = copy.deepcopy(batch.text)
baseline_text.data[:, :] = inputs.vocab.stoi['.']
baseline = model.embed(baseline_text).data
input_vecs = torch.Tensor(baseline.size(0), M, baseline.size(2)).to(device)
for i, prop in enumerate(mult_grid):
input_vecs[:, i, :] = baseline + (prop * (word_vecs - baseline)).to(device)
input_vecs = input_vecs
hidden = (torch.zeros(1, M, model.hidden_dim).to(device),
torch.zeros(1, M, model.hidden_dim).to(device))
lstm_out, hidden = model.lstm(input_vecs, hidden)
logits = F.softmax(model.hidden_to_label(lstm_out[-1]))[:, 0]
loss = criterion(logits, torch.zeros(M).to(device))
loss.backward()
imps = input_vecs.grad.mean(1).data * (word_vecs[:, 0] - baseline[:, 0])
zero_pred = logits[0]
scores = imps.sum(1)
# for i in range(sent_len):
# print(ig_scores[i], text_orig[i])
# Sanity check: this should be small-ish
# print((logits[-1] - zero_pred) - ig_scores.sum())
return scores.cpu().numpy()
def get_scores_1d(batch, model, method, label, only_one, score_orig, text_orig, subtract=False, device='cuda'):
'''Return attribution scores for 1D input
Params
------
method: str
What type of method to use for attribution (e.g. cd, occlusion)
Returns
-------
scores: np.ndarray
Higher scores are more important
'''
# calculate scores
if method == 'cd':
if only_one:
num_words = batch.text.data.cpu().numpy().shape[0]
scores = np.expand_dims(cd_text(batch, model, start=0, stop=num_words), axis=0)
else:
starts, stops = tiles_to_cd(batch)
batch.text.data = torch.LongTensor(text_orig).to(device)
scores = np.array([cd_text(batch, model, start=starts[i], stop=stops[i])
for i in range(len(starts))])
else:
scores = model(batch).data.cpu().numpy()
if method == 'occlusion' and not only_one:
scores = score_orig - scores
# get score for other class
if subtract:
return scores[:, label] - scores[:, int(1 - label)]
else:
return scores[:, label]
def get_scores_2d(model, method, ims, im_torch=None, pred_ims=None, model_type=None, device='cuda'):
'''Return attribution scores for 2D input
Params
------
method: str
What type of method to use for attribution (e.g. cd, occlusion)
ims: np.ndarray (1 x C x H x W )
Tiles to pass as masks to cd
Returns
-------
scores: np.ndarray
Higher scores are more important
'''
scores = []
if method == 'cd':
for i in range(ims.shape[0]): # can use tqdm here, need to use batches
scores.append(cd(im_torch, model, np.expand_dims(ims[i], 0), model_type,
device=device)[0].data.cpu().numpy())
scores = np.squeeze(np.array(scores))
elif method == 'build_up':
for i in range(ims.shape[0]): # can use tqdm here, need to use batches
scores.append(pred_ims(model, ims[i])[0])
scores = np.squeeze(np.array(scores))
elif method == 'occlusion':
for i in range(ims.shape[0]): # can use tqdm here, need to use batches
scores.append(pred_ims(model, ims[i])[0])
scores = -1 * np.squeeze(np.array(scores))
if scores.ndim == 1:
scores = scores.reshape(1, -1)
return scores
def tiles_to_cd(batch):
'''Converts build up tiles into indices for cd
Cd requires batch of [start, stop) with unigrams working
build up tiles are of the form [0, 0, 12, 35, 0, 0]
return a list of starts and indices
'''
starts, stops = [], []
tiles = batch.text.data.cpu().numpy()
L = tiles.shape[0]
for c in range(tiles.shape[1]):
text = tiles[:, c]
start = 0
stop = L - 1
while text[start] == 0:
start += 1
while text[stop] == 0:
stop -= 1
starts.append(start)
stops.append(stop)
return starts, stops
Functions
def get_scores_1d(batch, model, method, label, only_one, score_orig, text_orig, subtract=False, device='cuda')
-
Return attribution scores for 1D input Params
method
:str
- What type of method to use for attribution (e.g. cd, occlusion)
Returns
scores
:np.ndarray
- Higher scores are more important
Expand source code
def get_scores_1d(batch, model, method, label, only_one, score_orig, text_orig, subtract=False, device='cuda'): '''Return attribution scores for 1D input Params ------ method: str What type of method to use for attribution (e.g. cd, occlusion) Returns ------- scores: np.ndarray Higher scores are more important ''' # calculate scores if method == 'cd': if only_one: num_words = batch.text.data.cpu().numpy().shape[0] scores = np.expand_dims(cd_text(batch, model, start=0, stop=num_words), axis=0) else: starts, stops = tiles_to_cd(batch) batch.text.data = torch.LongTensor(text_orig).to(device) scores = np.array([cd_text(batch, model, start=starts[i], stop=stops[i]) for i in range(len(starts))]) else: scores = model(batch).data.cpu().numpy() if method == 'occlusion' and not only_one: scores = score_orig - scores # get score for other class if subtract: return scores[:, label] - scores[:, int(1 - label)] else: return scores[:, label]
def get_scores_2d(model, method, ims, im_torch=None, pred_ims=None, model_type=None, device='cuda')
-
Return attribution scores for 2D input Params
method
:str
- What type of method to use for attribution (e.g. cd, occlusion)
ims
:np.ndarray
(1
x
C
x
H
x
W
)- Tiles to pass as masks to cd
Returns
scores
:np.ndarray
- Higher scores are more important
Expand source code
def get_scores_2d(model, method, ims, im_torch=None, pred_ims=None, model_type=None, device='cuda'): '''Return attribution scores for 2D input Params ------ method: str What type of method to use for attribution (e.g. cd, occlusion) ims: np.ndarray (1 x C x H x W ) Tiles to pass as masks to cd Returns ------- scores: np.ndarray Higher scores are more important ''' scores = [] if method == 'cd': for i in range(ims.shape[0]): # can use tqdm here, need to use batches scores.append(cd(im_torch, model, np.expand_dims(ims[i], 0), model_type, device=device)[0].data.cpu().numpy()) scores = np.squeeze(np.array(scores)) elif method == 'build_up': for i in range(ims.shape[0]): # can use tqdm here, need to use batches scores.append(pred_ims(model, ims[i])[0]) scores = np.squeeze(np.array(scores)) elif method == 'occlusion': for i in range(ims.shape[0]): # can use tqdm here, need to use batches scores.append(pred_ims(model, ims[i])[0]) scores = -1 * np.squeeze(np.array(scores)) if scores.ndim == 1: scores = scores.reshape(1, -1) return scores
def gradient_times_input_scores(im, ind, model, device='cuda')
-
Params
im
:np.ndarray
- Image to get scores with respect to
ind
:int
- Which class to take gradient with respect to
Expand source code
def gradient_times_input_scores(im: np.ndarray, ind: int, model, device='cuda'): ''' Params ------ im: np.ndarray Image to get scores with respect to ind: int Which class to take gradient with respect to ''' ind = torch.LongTensor([np.int(ind)]).to(device) if im.grad is not None: im.grad.data.zero_() pred = model(im) crit = nn.NLLLoss() loss = crit(pred, ind) loss.backward() res = im.grad * im return res.data.cpu().numpy()[0, 0]
def ig_scores_1d(batch, model, inputs, device='cuda')
-
Compute integrated gradients scores (1D input)
Expand source code
def ig_scores_1d(batch, model, inputs, device='cuda'): '''Compute integrated gradients scores (1D input) ''' for p in model.parameters(): if p.grad is not None: p.grad.data.zero_() M = 1000 criterion = torch.nn.L1Loss(size_average=False) mult_grid = np.array(range(M)) / (M - 1) word_vecs = model.embed(batch.text).data baseline_text = copy.deepcopy(batch.text) baseline_text.data[:, :] = inputs.vocab.stoi['.'] baseline = model.embed(baseline_text).data input_vecs = torch.Tensor(baseline.size(0), M, baseline.size(2)).to(device) for i, prop in enumerate(mult_grid): input_vecs[:, i, :] = baseline + (prop * (word_vecs - baseline)).to(device) input_vecs = input_vecs hidden = (torch.zeros(1, M, model.hidden_dim).to(device), torch.zeros(1, M, model.hidden_dim).to(device)) lstm_out, hidden = model.lstm(input_vecs, hidden) logits = F.softmax(model.hidden_to_label(lstm_out[-1]))[:, 0] loss = criterion(logits, torch.zeros(M).to(device)) loss.backward() imps = input_vecs.grad.mean(1).data * (word_vecs[:, 0] - baseline[:, 0]) zero_pred = logits[0] scores = imps.sum(1) # for i in range(sent_len): # print(ig_scores[i], text_orig[i]) # Sanity check: this should be small-ish # print((logits[-1] - zero_pred) - ig_scores.sum()) return scores.cpu().numpy()
def ig_scores_2d(model, im_torch, num_classes=10, im_size=28, sweep_dim=1, ind=None, device='cuda')
-
Compute integrated gradients scores (2D input)
Expand source code
def ig_scores_2d(model, im_torch, num_classes=10, im_size=28, sweep_dim=1, ind=None, device='cuda'): '''Compute integrated gradients scores (2D input) ''' for p in model.parameters(): if p.grad is not None: p.grad.data.zero_() # What class to produce explanations for output = np.zeros((im_size * im_size // (sweep_dim * sweep_dim), num_classes)) if ind is None: ind = range(num_classes) for class_to_explain in ind: # _, class_to_explain = model(im_torch).max(1); class_to_explain = class_to_explain.data[0] M = 100 criterion = torch.nn.L1Loss(size_average=False) mult_grid = np.array(range(M)) / (M - 1) baseline = torch.zeros(im_torch.shape).to(device) input_vecs = torch.empty((M, baseline.shape[1], baseline.shape[2], baseline.shape[3]), dtype=torch.float32, device=device, requires_grad=False) ''' input_vecs = torch.Tensor(M, baseline.size(1), baseline.size(2), baseline.size(3)).to(device) input_vecs.requires_grad = True ''' for i, prop in enumerate(mult_grid): input_vecs[i].data = baseline + (prop * (im_torch.to(device) - baseline)) input_vecs.requires_grad = True # input_vecs = input_vecs out = F.softmax(model(input_vecs))[:, class_to_explain] loss = criterion(out, torch.zeros(M).to(device)) loss.backward() imps = input_vecs.grad.mean(0).data.cpu() * (im_torch.data.cpu() - baseline.cpu()) ig_scores = imps.sum(1) # Sanity check: this should be small-ish # print((out[-1] - out[0]).data[0] - ig_scores.sum()) scores = ig_scores.cpu().numpy().reshape((1, im_size, im_size, 1)) kernel = np.ones(shape=(sweep_dim, sweep_dim, 1, 1)) scores_convd = conv2dnp(scores, kernel, stride=(sweep_dim, sweep_dim)) output[:, class_to_explain] = scores_convd.flatten() return output
def tiles_to_cd(batch)
-
Converts build up tiles into indices for cd Cd requires batch of [start, stop) with unigrams working build up tiles are of the form [0, 0, 12, 35, 0, 0] return a list of starts and indices
Expand source code
def tiles_to_cd(batch): '''Converts build up tiles into indices for cd Cd requires batch of [start, stop) with unigrams working build up tiles are of the form [0, 0, 12, 35, 0, 0] return a list of starts and indices ''' starts, stops = [], [] tiles = batch.text.data.cpu().numpy() L = tiles.shape[0] for c in range(tiles.shape[1]): text = tiles[:, c] start = 0 stop = L - 1 while text[start] == 0: start += 1 while text[stop] == 0: stop -= 1 starts.append(start) stops.append(stop) return starts, stops