Content - 55c4c66a0bcb0685ac3616a8af02099e60ecf814 - dd8be13/interval_attack.py

visit type:
https://github.com/tcwangshiqi-columbia/Interval-Attack

15 April 2020, 01:16:42 UTC
Tip revision: 7a51ffe4cdf057db37ad521d2d790f8846d3e77b authored by Shiqi Wang on 17 June 2019, 07:52:42 UTC
update Readme
Tip revision: 7a51ffe
interval_attack.py
import tensorflow as tf
import numpy as np
import json
import sys
import math
from tensorflow.examples.tutorials.mnist import input_data
from scipy.spatial.distance import pdist, squareform
from model import Model
import argparse
import time

from symbolic_interval import *


def cw_attack(sess, model, x, y, config, x_nat=None):
	if(x_nat is None):
		x_nat = x
	x_adv = np.copy(x)
	y_adv = np.copy(y)

	for i in range(args.iters):
		g, p = sess.run([model.cw_grad, model.y_pred], feed_dict={
						model.x_input: x_adv,
						model.y_input: y_adv
					})


		x_adv += config['a'] * np.sign(g)

		x_adv = np.clip(x_adv, x_nat - config['epsilon'], x_nat + config['epsilon']) 
		x_adv = np.clip(x_adv, 0, 1)
		
	return x_adv, y_adv


def pgd_attack(sess, model, x, y, config, x_nat=None):
	if(x_nat is None):
		x_nat = x
	x_adv = np.copy(x)
	y_adv = np.copy(y)

	for i in range(args.iters):
		g, p = sess.run([model.pgd_grad, model.y_pred], feed_dict={
						model.x_input: x_adv,
						model.y_input: y_adv
					})

		x_adv += config['a'] * np.sign(g)

		x_adv = np.clip(x_adv, x_nat - config['epsilon'], x_nat + config['epsilon']) 
		x_adv = np.clip(x_adv, 0, 1) 
		
	return x_adv, y_adv

def svgd_kernel(theta):
	sq_dist = pdist(theta)
	pairwise_dists = squareform(sq_dist)**2

	h = np.median(pairwise_dists)  
	h = np.sqrt(0.5 * h / np.log(theta.shape[0]))

	# compute the rbf kernel
	Kxy = np.exp( -pairwise_dists / h**2 / 2)

	dxkxy = -np.matmul(Kxy, theta)
	sumkxy = np.sum(Kxy, axis=1)
	for i in range(theta.shape[1]):
	  dxkxy[:, i] = dxkxy[:,i] + np.multiply(theta[:,i],sumkxy)
	dxkxy = dxkxy / (h**2)
	return (Kxy, dxkxy)




def wgf_kernel(theta):
	sq_dist = pdist(theta)
	pairwise_dists = squareform(sq_dist)**2
	
	h = np.median(pairwise_dists)  
	h = np.sqrt(0.5 * h / np.log(theta.shape[0] + 1))

	Kxy = np.exp( -pairwise_dists / h**2 / 2)
	Kxy = np.multiply((pairwise_dists / h**2 / 2 - 1), Kxy)
	
	dxkxy = -np.matmul(Kxy, theta)
	sumkxy = np.sum(Kxy, axis=1)
	for i in range(theta.shape[1]):
	  dxkxy[:, i] = dxkxy[:,i] + np.multiply(theta[:,i],sumkxy)

	return (Kxy, dxkxy)


def blob(sess, model, x, y, config, x_nat):
	"""Given a set of examples (x_nat, y), returns a set of adversarial
	   examples within epsilon of x_nat in l_infinity norm."""
	
	x_adv = np.copy(x)
	batch_size = x_adv.shape[0]  

	idx = np.arange(x.shape[0])
	permutation = np.arange(x.shape[0])
	
	for epoch in range(20):
		np.random.shuffle(idx)
		x_nat, x_adv, y = x_nat[idx], x_adv[idx], y[idx]
		permutation = permutation[idx]

		for ei in range(10):

			tf_grad = model.blob_grad
			grad = sess.run(tf_grad, feed_dict={model.x_input: x_adv,
											   model.y_input: y})
											 
										
			kxy, dxkxy = svgd_kernel(x_adv)
			x_adv += config['a'] * np.sign(1.1*(-(np.matmul(kxy, -grad) + dxkxy)/batch_size) + grad)

			x_adv = np.clip(x_adv, x_nat - config['epsilon'], x_nat + config['epsilon']) 
			x_adv = np.clip(x_adv, 0, 1) # ensure valid pixel range

	inv_permutation = np.argsort(permutation)
	x_adv = x_adv[inv_permutation]
	x_nat = x_nat[inv_permutation]
	y = y[inv_permutation]

	return x_adv


def dgf(sess, model, x, y, config, x_nat):
	"""Given a set of examples (x_nat, y), returns a set of adversarial
	   examples within epsilon of x_nat in l_infinity norm."""
	
	x_adv = np.copy(x)
	batch_size = x_adv.shape[0] 

	idx = np.arange(x.shape[0])
	permutation = np.arange(x.shape[0])

	for epoch in range(20):
		np.random.shuffle(idx)
		x_nat, x_adv, y = x_nat[idx], x_adv[idx], y[idx]
		permutation = permutation[idx]
		
		for ei in range(5):

			tf_grad = model.blob_grad
			grad = sess.run(tf_grad, feed_dict={model.x_input: x_adv,
											   model.y_input: y})
											 
			c = -0.001																 
										
			kxy, dxkxy = wgf_kernel(x_adv)
			x_adv += config['a'] * np.sign(c*dxkxy + grad)


			x_adv = np.clip(x_adv, x_nat - config['epsilon'], x_nat + config['epsilon']) 
			x_adv = np.clip(x_adv, 0, 1) # ensure valid pixel range

	inv_permutation = np.argsort(permutation)
	x_adv = x_adv[inv_permutation]
	x_nat = x_nat[inv_permutation]
	y = y[inv_permutation]

	return x_adv

	
	
def eval(x, y, sess, model, blist = None):
	y_p = sess.run(model.y_pred,\
				feed_dict={model.x_input:x,\
				model.y_input:y
			})
	if(blist is not None):
		#print blist
		blist = np.logical_or(blist, np.not_equal(y_p, y))
		acc = np.sum((1-blist).astype(np.float32))
	else:
		acc = np.sum(np.equal(y_p, y).astype(np.float32))
	acc = acc / float(y.shape[0]) * 100.0
	return acc




def interval_attack_exchange1(sess, model,\
					tf_graph_param_whole, naive_graph, x, x_adv, y, config, bool_list):
	#x_adv = np.copy(x)
	y_adv = np.copy(y)
	start = time.time()

	for i in range(args.interval_iters):
		
		if(args.im==INTERVAL_CW):
			x_adv, y_adv = cw_attack(sess, model, x_adv, y_adv, config, x)
		if(args.im==INTERVAL_PGD):
			x_adv, y_adv = pgd_attack(sess, model, x_adv, y_adv, config, x)
		if(args.im==INTERVAL_DGF):
			x_adv_new = dgf(sess, model, x_adv, y, config, x_nat=x)
			x_adv = x_adv_new*(1.0-bool_list).reshape(-1,1)+x_adv*bool_list.reshape(-1,1)
		if(INTERVAL_BLOB):
			x_adv_new = blob(sess, model, x_adv, y, config, x_nat=x)
			x_adv = x_adv_new*(1.0-bool_list).reshape(-1,1)+x_adv*bool_list.reshape(-1,1)
			 

		p = sess.run(model.y_pred, feed_dict={
						model.x_input: x_adv,
						model.y_input: y_adv
					})
		bool_list = np.logical_or(bool_list, np.not_equal(p, y))
		print "iter:", i, " adv_acc:", (1.0-np.sum(bool_list.astype(np.float32))/x.shape[0])*100.0

		for j in range(x.shape[0]):
			if(bool_list[j]):
				continue

			ed = 1
			stuck = False
			while(True):
				ed = min(ed, config['epsilon']/2/args.estep)
				eq, u, l = propagate_whole(sess, w_dict, x_adv[j:j+1], y[j:j+1], tf_graph_param_whole, naive_graph, ed)
				'''
				eq, u, l = sess.run([equation,
										upper, lower], feed_dict={
										model.x_input:x_adv[j:j+1],
										model.y_input:y_adv[j:j+1],
										estep_decay:ed
									})
				'''
				d = np.argmax(u)
				#print "iter:", i, "sample:", j, "estep_decay:", ed, "new_estep:", args.estep*ed, "robust_output:", d, "true_label:", y[j]
				#print "upper:", u

				if(d == y_adv[j]):
					# print "Still the same label for the range"
					if(ed == config['epsilon']/2/args.estep):
						stuck = True
						break
					ed *= 3
				else:
					# print "label changed, break"
					eq = eq[0, :, d]
					break

			if(stuck): continue

			pos = (eq>0).astype(np.float32)*2*ed
			pos = pos - ed

			x_adv[j] += pos

		x_adv = np.clip(x_adv, x - config['epsilon'], x + config['epsilon']) 
		x_adv = np.clip(x_adv, 0, 1)

	if(args.im==INTERVAL_CW):
		x_adv, y_adv = cw_attack(sess, model, x_adv, y_adv, config, x)
	if(args.im==INTERVAL_PGD):
		x_adv, y_adv = pgd_attack(sess, model, x_adv, y_adv, config, x)
	if(args.im==INTERVAL_DGF):
		x_adv_new = dgf(sess, model, x_adv, y_adv, config, x_nat=x)
		x_adv = x_adv_new*(1.0-bool_list).reshape(-1,1)+x_adv*bool_list.reshape(-1,1)
	if(INTERVAL_BLOB):
		x_adv_new = blob(sess, model, x_adv, y, config, x_nat=x)
		x_adv = x_adv_new*(1.0-bool_list).reshape(-1,1)+x_adv*bool_list.reshape(-1,1)

	p = sess.run(model.y_pred, feed_dict={
					model.x_input: x_adv,
					model.y_input: y_adv
				})
	bool_list = np.logical_or(bool_list, np.not_equal(p, y))

	return x_adv, y_adv, bool_list



def check(x, x_nat, epsilon):
	for i in range(x.shape[0]):
		#print (x_nat-x)
		for xi, xi_nat in zip(x[i], x_nat[i]):
			if((xi>=xi_nat+epsilon+0.00001) or (xi<=xi_nat-epsilon-0.00001)):
				print "wrong adv generated!"
				print xi-xi_nat
				exit(1)
			if(xi>1 or xi<0):
				print "wrong adv generated, out of 0 and 1!"
				print xi
				exit(1)



# method
PGD = 1
CW = 2
INTERVALGD = 3
VERIFIED = 4
WHITE = 5
INTERVAL_FREQUENT = 6
INTERVAL_SPSA = 7
INTERVAL_WIDTH = 8
INTERVAL_EX = 9
INTERVAL_EX1 = 10
BLOB = 11
DGF = 12

INTERVAL_ITER = 40

#interval grad method
INTERVAL_BASE = 0
INTERVAL_PGD = 1
INTERVAL_CW = 2
INTERVAL_DGF = 3
INTERVAL_BLOB = 4

np.random.seed(0)


if __name__ == '__main__':

	parser = argparse.ArgumentParser()
	parser.add_argument('--method', type=int, default=INTERVAL_EX1)
	parser.add_argument('--baseline', action='store_true', default=False)
	parser.add_argument('--im', type=int, default=INTERVAL_BLOB)
	parser.add_argument('--random', type=int, default=50)
	# epsilon used for estimating the interval gradient
	parser.add_argument('--estep', type=float, default=0.01)
	# batch_size that is used for calculating symbolic propagation
	# in case out of memory
	parser.add_argument('--batch_size', type=int, default=500)
	# how many batches needed to test
	parser.add_argument('--batches', type=int, default=1)
	parser.add_argument('--iters', type=int, default=40)
	parser.add_argument('--interval_iters', type=int, default=3)
	parser.add_argument('--estep_decay', type=float, default=0.94)
	parser.add_argument('--check', action='store_true', default=True)
	args = parser.parse_args()


	with open('config.json') as config_file:
	  config = json.load(config_file)
	num_eval_examples = config['num_eval_examples']
	eval_batch_size = config['eval_batch_size']
	eval_on_cpu = config['eval_on_cpu']

	model_dir = config['model_dir']

	# Set upd the data, hyperparameters, and the model
	mnist = input_data.read_data_sets('MNIST_data', one_hot=False)
	x = mnist.test.images[:args.batch_size*args.batches]
	y = mnist.test.labels[:args.batch_size*args.batches]


	if eval_on_cpu:
	  with tf.device("/cpu:0"):
		model = Model()
	else:
	  model = Model()

	global_step = tf.contrib.framework.get_or_create_global_step()

	if not os.path.exists(model_dir):
	  os.makedirs(model_dir)
	eval_dir = os.path.join(model_dir, 'eval')
	if not os.path.exists(eval_dir):
	  os.makedirs(eval_dir)

	last_checkpoint_filename = ''
	already_seen_state = False

	weights = tf.trainable_variables()
	saver = tf.train.Saver()

	model_file = tf.train.latest_checkpoint(model_dir)
	

	if(args.method==INTERVAL_EX1):
		with tf.Session() as sess:
			saver.restore(sess, model_file)

			w = sess.run(weights)
			w_dict = {"w_conv1":w[0], "b_conv1":w[1],\
					 "w_conv2":w[2], "b_conv2":w[3],\
					 "w_fc1":w[4], "b_fc1":w[5],\
					 "w_fc2":w[6], "b_fc2":w[7]}
		# tf_x1, tf_y1, error_range, estep_decay, upper, lower, param_conv1, param_conv2, param_conv2_input, param_fc, param_fc_input
		naive_graph = build_tf_graph(w_dict)
		tf_graph_param_whole = build_tf_graph_whole(w_dict, args.estep)

		d_xent = tf.nn.sparse_softmax_cross_entropy_with_logits(
			labels=model.y_input, logits=tf_graph_param_whole[7])

		interval_loss = d_xent

	start = time.time()
	saver = tf.train.Saver()
	with tf.Session() as sess:
		saver.restore(sess, model_file)

		x_adv_list = []

		for batch in range(args.batches):

			start_batch = batch*args.batch_size
			end_batch = start_batch + args.batch_size
			print "batch", batch, "start:", start_batch, "end:", end_batch

			x = mnist.test.images[start_batch:end_batch]
			y = mnist.test.labels[start_batch:end_batch]

			if(args.method==PGD):
				
				x_adv = np.copy(x)
				bool_list = np.zeros(x.shape[0]).astype(np.bool)
				for rs in range(args.random):
					print "random starts:", rs
					#x_adv = x_adv + np.random.uniform(-config['epsilon'], config['epsilon'], x.shape)
					x_adv = (x + np.random.uniform(-config['epsilon'], config['epsilon'], x.shape))*(1.0-bool_list).reshape(-1,1)+x_adv*bool_list.reshape(-1,1)
					x_adv = np.clip(x_adv, 0, 1)
					x_adv, y_adv = pgd_attack(sess, model, x_adv, y, config, x_nat=x)

					p = sess.run(model.y_pred, feed_dict={
							model.x_input: x_adv,
							model.y_input: y
						})
					bool_list = np.logical_or(bool_list, np.not_equal(p, y))
					print "adv_acc:", (1.0-np.sum(bool_list.astype(np.float32))/x.shape[0])*100.0

				acc = eval(x, y, sess, model)
				adv_acc = eval(x_adv, y, sess, model)

				print "adv_acc:", adv_acc, " acc:", acc

				

			elif(args.method==CW):

				x_adv = np.copy(x)
				bool_list = np.zeros(x.shape[0]).astype(np.bool)
				for rs in range(args.random):
					print "random starts:", rs
					#x_adv = x_adv + np.random.uniform(-config['epsilon'], config['epsilon'], x.shape)
					x_adv = (x + np.random.uniform(-config['epsilon'], config['epsilon'], x.shape))*(1.0-bool_list).reshape(-1,1)+x_adv*bool_list.reshape(-1,1)
					x_adv = np.clip(x_adv, 0, 1)
					x_adv, y_adv = cw_attack(sess, model, x_adv, y, config, x_nat=x)

					p = sess.run(model.y_pred, feed_dict={
							model.x_input: x_adv,
							model.y_input: y
						})
					bool_list = np.logical_or(bool_list, np.not_equal(p, y))
					print "adv_acc:", (1.0-np.sum(bool_list.astype(np.float32))/x.shape[0])*100.0

				acc = eval(x, y, sess, model)
				adv_acc = eval(x_adv, y, sess, model)

				print "adv_acc:", adv_acc, " acc:", acc


			elif(args.method==VERIFIED):
				pass

			elif(args.method==BLOB):
				x_adv = np.copy(x)
				bool_list = np.zeros(x.shape[0]).astype(np.bool)
				for rs in range(args.random):
					print "random starts:", rs
					#x_adv = x_adv + np.random.uniform(-config['epsilon'], config['epsilon'], x.shape)
					x_adv = (x + np.random.uniform(-config['epsilon'], config['epsilon'], x.shape))*(1.0-bool_list).reshape(-1,1)+x_adv*bool_list.reshape(-1,1)
					x_adv = np.clip(x_adv, 0, 1)
					x_adv_new = blob(sess, model, x_adv, y, config, x_nat=x)
					x_adv = x_adv_new*(1.0-bool_list).reshape(-1,1)+x_adv*bool_list.reshape(-1,1)

					p = sess.run(model.y_pred, feed_dict={
							model.x_input: x_adv,
							model.y_input: y
						})
					bool_list = np.logical_or(bool_list, np.not_equal(p, y))
					print "adv_acc:", (1.0-np.sum(bool_list.astype(np.float32))/x.shape[0])*100.0

				acc = eval(x, y, sess, model)
				adv_acc = eval(x_adv, y, sess, model)

				print "adv_acc:", adv_acc, " acc:", acc

			elif(args.method==DGF):
				x_adv = np.copy(x)
				bool_list = np.zeros(x.shape[0]).astype(np.bool)
				for rs in range(args.random):
					print "random starts:", rs
					#x_adv = x_adv + np.random.uniform(-config['epsilon'], config['epsilon'], x.shape)
					x_adv = (x + np.random.uniform(-config['epsilon'], config['epsilon'], x.shape))*(1.0-bool_list).reshape(-1,1)+x_adv*bool_list.reshape(-1,1)
					x_adv = np.clip(x_adv, 0, 1)

					x_adv = dgf(sess, model, x_adv, y, config, x_nat=x)

					p = sess.run(model.y_pred, feed_dict={
							model.x_input: x_adv,
							model.y_input: y
						})
					bool_list = np.logical_or(bool_list, np.not_equal(p, y))
					print "adv_acc:", (1.0-np.sum(bool_list.astype(np.float32))/x.shape[0])*100.0

				acc = eval(x, y, sess, model)
				adv_acc = eval(x_adv, y, sess, model)

				print "adv_acc:", adv_acc, " acc:", acc


			elif(args.method==INTERVAL_EX1):

				x_adv = []
				y_adv = []
				blist = []
					
				x_adv = np.copy(x)
				bool_list = np.zeros(x.shape[0]).astype(np.bool)

				for rs in range(args.random):
					if(rs>=2): args.interval_iters = 1
					print "random starts:", rs
					if(rs!=0):
						x_adv = (x + np.random.uniform(-config['epsilon'], config['epsilon'], x.shape))*(1.0-bool_list).reshape(-1,1)+x_adv*bool_list.reshape(-1,1)
						x_adv = np.clip(x_adv, 0, 1)
					x_adv, y_adv, bool_list = interval_attack_exchange1(sess, model,\
										tf_graph_param_whole, naive_graph,\
										x, x_adv, y, config, bool_list)

					#blist.append(bool_list)
					#print np.sum(bool_list.astype(np.float32))/x1.shape[0]*100

					acc = eval(x, y, sess, model)
					adv_acc = eval(x_adv, y, sess, model)

					print "adv_acc:", adv_acc, " acc:", acc
				
			else:
				x_adv = []
				y_adv = []
				for i in range(args.batches):
					
					x1 = x[i*args.batch_size:(i+1)*args.batch_size]
					y1 = y[i*args.batch_size:(i+1)*args.batch_size]
					
					x1, y1 = interval_attack(sess, model, interval_grad,\
									estep_decay, x1, y1, config)
					x_adv.append(x1)
					y_adv.append(y1)
				
				x_adv = np.array(x_adv).reshape(-1, 784)
				y_adv = np.array(y_adv).reshape(-1)

				if(args.im==INTERVAL_CW):
					x_adv, y_adv = cw_attack(sess, model, x_adv, y_adv, config, x)
				if(args.im==INTERVAL_PGD):
					x_adv, y_adv = pgd_attack(sess, model, x_adv, y_adv, config, x)

				acc = eval(x, y, sess, model)
				adv_acc = eval(x_adv, y_adv, sess, model)

				if(args.check):
					check(x_adv, x, config['epsilon'])

				print "adv_acc:", adv_acc, " acc:", acc

			if(args.check):
				x_nat = mnist.test.images[start_batch:end_batch]
				check(x_adv, x_nat, config['epsilon'])
				print ("check passed!")

			y_nat = mnist.test.labels[start_batch:end_batch]
			adv_acc = eval(x_adv, y_nat, sess, model)
			print "final adv_acc:", adv_acc
			np.save("x_adv_attack1_"+str(args.method)+"_b"+str(batch), x_adv)
			x_adv_list.append(x_adv) 

			print "time:", time.time()-start
		


		x_adv = np.array(x_adv_list).reshape(-1,784)
		if(args.check):
			x_nat = mnist.test.images
			check(x_adv, x_nat, config['epsilon'])
			print ("Final check passed!")

		y_nat = mnist.test.labels
		adv_acc = eval(x_adv, y_nat, sess, model)
		print "Final adv_acc:", adv_acc
		np.save("x_adv_attack1_"+str(args.method)+"_final", x_adv)

		print "time:", time.time()-start