mirror of
				https://github.com/linuxkit/linuxkit.git
				synced 2025-10-31 04:53:35 +00:00 
			
		
		
		
	It's slightly embarrassing that this old snapshot was kept around here rotting for so long, but thankfully something is finally being done about it. Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
		
			
				
	
	
		
			21881 lines
		
	
	
		
			719 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
			
		
		
	
	
			21881 lines
		
	
	
		
			719 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/config.c	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,353 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#include "config.h"
 | |
| +#include "device.h"
 | |
| +#include "socket.h"
 | |
| +#include "packets.h"
 | |
| +#include "timers.h"
 | |
| +#include "hashtables.h"
 | |
| +#include "peer.h"
 | |
| +#include "uapi.h"
 | |
| +
 | |
| +static int set_device_port(struct wireguard_device *wg, u16 port)
 | |
| +{
 | |
| +	struct wireguard_peer *peer, *temp;
 | |
| +	socket_uninit(wg);
 | |
| +	wg->incoming_port = port;
 | |
| +	if (!(netdev_pub(wg)->flags & IFF_UP))
 | |
| +		return 0;
 | |
| +	peer_for_each (wg, peer, temp, false)
 | |
| +		socket_clear_peer_endpoint_src(peer);
 | |
| +	return socket_init(wg);
 | |
| +}
 | |
| +
 | |
| +static int set_ipmask(struct wireguard_peer *peer, void __user *user_ipmask)
 | |
| +{
 | |
| +	int ret = -EINVAL;
 | |
| +	struct wgipmask in_ipmask;
 | |
| +
 | |
| +	if (copy_from_user(&in_ipmask, user_ipmask, sizeof(in_ipmask)))
 | |
| +		return -EFAULT;
 | |
| +
 | |
| +	if (in_ipmask.family == AF_INET && in_ipmask.cidr <= 32)
 | |
| +		ret = routing_table_insert_v4(&peer->device->peer_routing_table, &in_ipmask.ip4, in_ipmask.cidr, peer);
 | |
| +	else if (in_ipmask.family == AF_INET6 && in_ipmask.cidr <= 128)
 | |
| +		ret = routing_table_insert_v6(&peer->device->peer_routing_table, &in_ipmask.ip6, in_ipmask.cidr, peer);
 | |
| +
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +static const u8 zeros[WG_KEY_LEN] = { 0 };
 | |
| +
 | |
| +static int set_peer(struct wireguard_device *wg, void __user *user_peer, size_t *len)
 | |
| +{
 | |
| +	int ret = 0;
 | |
| +	size_t i;
 | |
| +	struct wgpeer in_peer;
 | |
| +	void __user *user_ipmask;
 | |
| +	struct wireguard_peer *peer = NULL;
 | |
| +
 | |
| +	if (copy_from_user(&in_peer, user_peer, sizeof(in_peer)))
 | |
| +		return -EFAULT;
 | |
| +
 | |
| +	if (!memcmp(zeros, in_peer.public_key, NOISE_PUBLIC_KEY_LEN))
 | |
| +		return -EINVAL; /* Can't add a peer with no public key. */
 | |
| +
 | |
| +	peer = pubkey_hashtable_lookup(&wg->peer_hashtable, in_peer.public_key);
 | |
| +	if (!peer) { /* Peer doesn't exist yet. Add a new one. */
 | |
| +		if (in_peer.flags & WGPEER_REMOVE_ME)
 | |
| +			return -ENODEV; /* Tried to remove a non-existing peer. */
 | |
| +		if (in_peer.flags & WGPEER_REMOVE_PRESHARED_KEY)
 | |
| +			return -EINVAL; /* Tried to remove a psk for a non-existing peer. */
 | |
| +
 | |
| +		down_read(&wg->static_identity.lock);
 | |
| +		if (wg->static_identity.has_identity && !memcmp(in_peer.public_key, wg->static_identity.static_public, NOISE_PUBLIC_KEY_LEN)) {
 | |
| +			/* We silently ignore peers that have the same public key as the device. The reason we do it silently
 | |
| +			 * is that we'd like for people to be able to reuse the same set of API calls across peers. */
 | |
| +			up_read(&wg->static_identity.lock);
 | |
| +			goto out;
 | |
| +		}
 | |
| +		up_read(&wg->static_identity.lock);
 | |
| +
 | |
| +		peer = peer_rcu_get(peer_create(wg, in_peer.public_key, in_peer.preshared_key));
 | |
| +		if (!peer)
 | |
| +			return -ENOMEM;
 | |
| +		if (netdev_pub(wg)->flags & IFF_UP)
 | |
| +			timers_init_peer(peer);
 | |
| +	}
 | |
| +
 | |
| +	if (in_peer.flags & WGPEER_REMOVE_ME) {
 | |
| +		peer_put(peer);
 | |
| +		peer_remove(peer);
 | |
| +		goto out;
 | |
| +	}
 | |
| +
 | |
| +	if (in_peer.flags & WGPEER_REMOVE_PRESHARED_KEY) {
 | |
| +		down_write(&peer->handshake.lock);
 | |
| +		memset(&peer->handshake.preshared_key, 0, NOISE_SYMMETRIC_KEY_LEN);
 | |
| +		up_write(&peer->handshake.lock);
 | |
| +	} else if (memcmp(zeros, in_peer.preshared_key, WG_KEY_LEN)) {
 | |
| +		down_write(&peer->handshake.lock);
 | |
| +		memcpy(&peer->handshake.preshared_key, in_peer.preshared_key, NOISE_SYMMETRIC_KEY_LEN);
 | |
| +		up_write(&peer->handshake.lock);
 | |
| +	}
 | |
| +
 | |
| +	if (in_peer.endpoint.addr.sa_family == AF_INET || in_peer.endpoint.addr.sa_family == AF_INET6) {
 | |
| +		struct endpoint endpoint = { { { 0 } } };
 | |
| +		memcpy(&endpoint, &in_peer.endpoint, sizeof(in_peer.endpoint));
 | |
| +		socket_set_peer_endpoint(peer, &endpoint);
 | |
| +	}
 | |
| +
 | |
| +	if (in_peer.flags & WGPEER_REPLACE_IPMASKS)
 | |
| +		routing_table_remove_by_peer(&wg->peer_routing_table, peer);
 | |
| +	for (i = 0, user_ipmask = user_peer + sizeof(struct wgpeer); i < in_peer.num_ipmasks; ++i, user_ipmask += sizeof(struct wgipmask)) {
 | |
| +		ret = set_ipmask(peer, user_ipmask);
 | |
| +		if (ret)
 | |
| +			break;
 | |
| +	}
 | |
| +
 | |
| +	if (in_peer.persistent_keepalive_interval != (u16)-1) {
 | |
| +		const bool send_keepalive = !peer->persistent_keepalive_interval && in_peer.persistent_keepalive_interval && netdev_pub(wg)->flags & IFF_UP;
 | |
| +		peer->persistent_keepalive_interval = (unsigned long)in_peer.persistent_keepalive_interval * HZ;
 | |
| +		if (send_keepalive)
 | |
| +			packet_send_keepalive(peer);
 | |
| +	}
 | |
| +
 | |
| +	if (netdev_pub(wg)->flags & IFF_UP)
 | |
| +		packet_send_queue(peer);
 | |
| +
 | |
| +	peer_put(peer);
 | |
| +
 | |
| +out:
 | |
| +	if (!ret)
 | |
| +		*len = sizeof(struct wgpeer) + (in_peer.num_ipmasks * sizeof(struct wgipmask));
 | |
| +
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +int config_set_device(struct wireguard_device *wg, void __user *user_device)
 | |
| +{
 | |
| +	int ret;
 | |
| +	size_t i, offset;
 | |
| +	struct wireguard_peer *peer, *temp;
 | |
| +	struct wgdevice in_device;
 | |
| +	void __user *user_peer;
 | |
| +	bool modified_static_identity = false;
 | |
| +
 | |
| +	BUILD_BUG_ON(WG_KEY_LEN != NOISE_PUBLIC_KEY_LEN);
 | |
| +	BUILD_BUG_ON(WG_KEY_LEN != NOISE_SYMMETRIC_KEY_LEN);
 | |
| +
 | |
| +	mutex_lock(&wg->device_update_lock);
 | |
| +
 | |
| +	ret = -EFAULT;
 | |
| +	if (copy_from_user(&in_device, user_device, sizeof(in_device)))
 | |
| +		goto out;
 | |
| +
 | |
| +	ret = -EPROTO;
 | |
| +	if (in_device.version_magic != WG_API_VERSION_MAGIC)
 | |
| +		goto out;
 | |
| +
 | |
| +	if (in_device.fwmark || (!in_device.fwmark && (in_device.flags & WGDEVICE_REMOVE_FWMARK))) {
 | |
| +		wg->fwmark = in_device.fwmark;
 | |
| +		peer_for_each (wg, peer, temp, false)
 | |
| +			socket_clear_peer_endpoint_src(peer);
 | |
| +	}
 | |
| +
 | |
| +	if (in_device.port) {
 | |
| +		ret = set_device_port(wg, in_device.port);
 | |
| +		if (ret)
 | |
| +			goto out;
 | |
| +	}
 | |
| +
 | |
| +	if (in_device.flags & WGDEVICE_REPLACE_PEERS)
 | |
| +		peer_remove_all(wg);
 | |
| +
 | |
| +	if (in_device.flags & WGDEVICE_REMOVE_PRIVATE_KEY) {
 | |
| +		noise_set_static_identity_private_key(&wg->static_identity, NULL);
 | |
| +		modified_static_identity = true;
 | |
| +	} else if (memcmp(zeros, in_device.private_key, WG_KEY_LEN)) {
 | |
| +		u8 public_key[NOISE_PUBLIC_KEY_LEN] = { 0 };
 | |
| +		struct wireguard_peer *peer;
 | |
| +		/* We remove before setting, to prevent race, which means doing two 25519-genpub ops. */
 | |
| +		bool unused __attribute((unused)) = curve25519_generate_public(public_key, in_device.private_key);
 | |
| +		peer = pubkey_hashtable_lookup(&wg->peer_hashtable, public_key);
 | |
| +		if (peer) {
 | |
| +			peer_put(peer);
 | |
| +			peer_remove(peer);
 | |
| +		}
 | |
| +
 | |
| +		noise_set_static_identity_private_key(&wg->static_identity, in_device.private_key);
 | |
| +		modified_static_identity = true;
 | |
| +	}
 | |
| +
 | |
| +	if (modified_static_identity) {
 | |
| +		peer_for_each (wg, peer, temp, false) {
 | |
| +			if (!noise_precompute_static_static(peer))
 | |
| +				peer_remove(peer);
 | |
| +		}
 | |
| +		cookie_checker_precompute_device_keys(&wg->cookie_checker);
 | |
| +	}
 | |
| +
 | |
| +	for (i = 0, offset = 0, user_peer = user_device + sizeof(struct wgdevice); i < in_device.num_peers; ++i, user_peer += offset) {
 | |
| +		ret = set_peer(wg, user_peer, &offset);
 | |
| +		if (ret)
 | |
| +			goto out;
 | |
| +	}
 | |
| +	ret = 0;
 | |
| +out:
 | |
| +	mutex_unlock(&wg->device_update_lock);
 | |
| +	memzero_explicit(&in_device.private_key, NOISE_PUBLIC_KEY_LEN);
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +struct data_remaining {
 | |
| +	void __user *data;
 | |
| +	size_t out_len;
 | |
| +	size_t count;
 | |
| +};
 | |
| +
 | |
| +static inline int use_data(struct data_remaining *data, size_t size)
 | |
| +{
 | |
| +	if (data->out_len < size)
 | |
| +		return -EMSGSIZE;
 | |
| +	data->out_len -= size;
 | |
| +	data->data += size;
 | |
| +	++data->count;
 | |
| +	return 0;
 | |
| +}
 | |
| +
 | |
| +static int populate_ipmask(void *ctx, union nf_inet_addr ip, u8 cidr, int family)
 | |
| +{
 | |
| +	int ret;
 | |
| +	struct data_remaining *data = ctx;
 | |
| +	void __user *uipmask = data->data;
 | |
| +	struct wgipmask out_ipmask;
 | |
| +
 | |
| +	memset(&out_ipmask, 0, sizeof(struct wgipmask));
 | |
| +
 | |
| +	ret = use_data(data, sizeof(struct wgipmask));
 | |
| +	if (ret)
 | |
| +		return ret;
 | |
| +
 | |
| +	out_ipmask.cidr = cidr;
 | |
| +	out_ipmask.family = family;
 | |
| +	if (family == AF_INET)
 | |
| +		out_ipmask.ip4 = ip.in;
 | |
| +	else if (family == AF_INET6)
 | |
| +		out_ipmask.ip6 = ip.in6;
 | |
| +
 | |
| +	if (copy_to_user(uipmask, &out_ipmask, sizeof(out_ipmask)))
 | |
| +		ret = -EFAULT;
 | |
| +
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +static int populate_peer(struct wireguard_peer *peer, struct data_remaining *data)
 | |
| +{
 | |
| +	int ret = 0;
 | |
| +	void __user *upeer = data->data;
 | |
| +	struct wgpeer out_peer;
 | |
| +	struct data_remaining ipmasks_data = { NULL };
 | |
| +
 | |
| +	memset(&out_peer, 0, sizeof(struct wgpeer));
 | |
| +
 | |
| +	ret = use_data(data, sizeof(struct wgpeer));
 | |
| +	if (ret)
 | |
| +		return ret;
 | |
| +
 | |
| +	down_read(&peer->handshake.lock);
 | |
| +	memcpy(out_peer.public_key, peer->handshake.remote_static, NOISE_PUBLIC_KEY_LEN);
 | |
| +	memcpy(out_peer.preshared_key, peer->handshake.preshared_key, NOISE_SYMMETRIC_KEY_LEN);
 | |
| +	up_read(&peer->handshake.lock);
 | |
| +
 | |
| +	read_lock_bh(&peer->endpoint_lock);
 | |
| +	if (peer->endpoint.addr.sa_family == AF_INET)
 | |
| +		out_peer.endpoint.addr4 = peer->endpoint.addr4;
 | |
| +	else if (peer->endpoint.addr.sa_family == AF_INET6)
 | |
| +		out_peer.endpoint.addr6 = peer->endpoint.addr6;
 | |
| +	read_unlock_bh(&peer->endpoint_lock);
 | |
| +	out_peer.last_handshake_time = peer->walltime_last_handshake;
 | |
| +	out_peer.tx_bytes = peer->tx_bytes;
 | |
| +	out_peer.rx_bytes = peer->rx_bytes;
 | |
| +	out_peer.persistent_keepalive_interval = (u16)(peer->persistent_keepalive_interval / HZ);
 | |
| +
 | |
| +	ipmasks_data.out_len = data->out_len;
 | |
| +	ipmasks_data.data = data->data;
 | |
| +	ret = routing_table_walk_ips_by_peer_sleepable(&peer->device->peer_routing_table, &ipmasks_data, peer, populate_ipmask);
 | |
| +	if (ret)
 | |
| +		return ret;
 | |
| +	data->out_len = ipmasks_data.out_len;
 | |
| +	data->data = ipmasks_data.data;
 | |
| +	out_peer.num_ipmasks = ipmasks_data.count;
 | |
| +
 | |
| +	if (copy_to_user(upeer, &out_peer, sizeof(out_peer)))
 | |
| +		ret = -EFAULT;
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +int config_get_device(struct wireguard_device *wg, void __user *user_device)
 | |
| +{
 | |
| +	int ret;
 | |
| +	struct wireguard_peer *peer, *temp;
 | |
| +	struct net_device *dev = netdev_pub(wg);
 | |
| +	struct data_remaining peer_data = { NULL };
 | |
| +	struct wgdevice out_device;
 | |
| +	struct wgdevice in_device;
 | |
| +
 | |
| +	BUILD_BUG_ON(WG_KEY_LEN != NOISE_PUBLIC_KEY_LEN);
 | |
| +	BUILD_BUG_ON(WG_KEY_LEN != NOISE_SYMMETRIC_KEY_LEN);
 | |
| +
 | |
| +	memset(&out_device, 0, sizeof(struct wgdevice));
 | |
| +
 | |
| +	mutex_lock(&wg->device_update_lock);
 | |
| +
 | |
| +	if (!user_device) {
 | |
| +		ret = peer_total_count(wg) * sizeof(struct wgpeer)
 | |
| +		    + routing_table_count_nodes(&wg->peer_routing_table) * sizeof(struct wgipmask);
 | |
| +		goto out;
 | |
| +	}
 | |
| +
 | |
| +	ret = -EFAULT;
 | |
| +	if (copy_from_user(&in_device, user_device, sizeof(in_device)))
 | |
| +		goto out;
 | |
| +
 | |
| +	ret = -EPROTO;
 | |
| +	if (in_device.version_magic != WG_API_VERSION_MAGIC)
 | |
| +		goto out;
 | |
| +
 | |
| +	out_device.version_magic = WG_API_VERSION_MAGIC;
 | |
| +	out_device.port = wg->incoming_port;
 | |
| +	out_device.fwmark = wg->fwmark;
 | |
| +	memcpy(out_device.interface, dev->name, IFNAMSIZ);
 | |
| +
 | |
| +	down_read(&wg->static_identity.lock);
 | |
| +	if (wg->static_identity.has_identity) {
 | |
| +		memcpy(out_device.private_key, wg->static_identity.static_private, WG_KEY_LEN);
 | |
| +		memcpy(out_device.public_key, wg->static_identity.static_public, WG_KEY_LEN);
 | |
| +	}
 | |
| +	up_read(&wg->static_identity.lock);
 | |
| +
 | |
| +	peer_data.out_len = in_device.peers_size;
 | |
| +	peer_data.data = user_device + sizeof(struct wgdevice);
 | |
| +
 | |
| +	ret = 0;
 | |
| +	peer_for_each (wg, peer, temp, false) {
 | |
| +		ret = populate_peer(peer, &peer_data);
 | |
| +		if (ret)
 | |
| +			break;
 | |
| +	}
 | |
| +	if (ret)
 | |
| +		goto out;
 | |
| +	out_device.num_peers = peer_data.count;
 | |
| +
 | |
| +	ret = -EFAULT;
 | |
| +	if (copy_to_user(user_device, &out_device, sizeof(out_device)))
 | |
| +		goto out;
 | |
| +
 | |
| +	ret = 0;
 | |
| +
 | |
| +out:
 | |
| +	mutex_unlock(&wg->device_update_lock);
 | |
| +	memzero_explicit(&out_device.private_key, NOISE_PUBLIC_KEY_LEN);
 | |
| +	return ret;
 | |
| +}
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/cookie.c	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,192 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#include "cookie.h"
 | |
| +#include "peer.h"
 | |
| +#include "device.h"
 | |
| +#include "messages.h"
 | |
| +#include "ratelimiter.h"
 | |
| +#include "crypto/blake2s.h"
 | |
| +#include "crypto/chacha20poly1305.h"
 | |
| +
 | |
| +#include <linux/jiffies.h>
 | |
| +#include <net/ipv6.h>
 | |
| +#include <crypto/algapi.h>
 | |
| +
 | |
| +void cookie_checker_init(struct cookie_checker *checker, struct wireguard_device *wg)
 | |
| +{
 | |
| +	init_rwsem(&checker->secret_lock);
 | |
| +	checker->secret_birthdate = get_jiffies_64();
 | |
| +	get_random_bytes(checker->secret, NOISE_HASH_LEN);
 | |
| +	checker->device = wg;
 | |
| +}
 | |
| +
 | |
| +enum { COOKIE_KEY_LABEL_LEN = 8 };
 | |
| +static const u8 mac1_key_label[COOKIE_KEY_LABEL_LEN] = "mac1----";
 | |
| +static const u8 cookie_key_label[COOKIE_KEY_LABEL_LEN] = "cookie--";
 | |
| +
 | |
| +static void precompute_key(u8 key[NOISE_SYMMETRIC_KEY_LEN], const u8 pubkey[NOISE_PUBLIC_KEY_LEN], const u8 label[COOKIE_KEY_LABEL_LEN])
 | |
| +{
 | |
| +	struct blake2s_state blake;
 | |
| +	blake2s_init(&blake, NOISE_SYMMETRIC_KEY_LEN);
 | |
| +	blake2s_update(&blake, label, COOKIE_KEY_LABEL_LEN);
 | |
| +	blake2s_update(&blake, pubkey, NOISE_PUBLIC_KEY_LEN);
 | |
| +	blake2s_final(&blake, key, NOISE_SYMMETRIC_KEY_LEN);
 | |
| +}
 | |
| +
 | |
| +void cookie_checker_precompute_device_keys(struct cookie_checker *checker)
 | |
| +{
 | |
| +	down_read(&checker->device->static_identity.lock);
 | |
| +	if (likely(checker->device->static_identity.has_identity)) {
 | |
| +		precompute_key(checker->cookie_encryption_key, checker->device->static_identity.static_public, cookie_key_label);
 | |
| +		precompute_key(checker->message_mac1_key, checker->device->static_identity.static_public, mac1_key_label);
 | |
| +	}
 | |
| +	else {
 | |
| +		memset(checker->cookie_encryption_key, 0, NOISE_SYMMETRIC_KEY_LEN);
 | |
| +		memset(checker->message_mac1_key, 0, NOISE_SYMMETRIC_KEY_LEN);
 | |
| +	}
 | |
| +	up_read(&checker->device->static_identity.lock);
 | |
| +}
 | |
| +
 | |
| +void cookie_checker_precompute_peer_keys(struct wireguard_peer *peer)
 | |
| +{
 | |
| +	precompute_key(peer->latest_cookie.cookie_decryption_key, peer->handshake.remote_static, cookie_key_label);
 | |
| +	precompute_key(peer->latest_cookie.message_mac1_key, peer->handshake.remote_static, mac1_key_label);
 | |
| +}
 | |
| +
 | |
| +void cookie_init(struct cookie *cookie)
 | |
| +{
 | |
| +	memset(cookie, 0, sizeof(struct cookie));
 | |
| +	init_rwsem(&cookie->lock);
 | |
| +}
 | |
| +
 | |
| +static void compute_mac1(u8 mac1[COOKIE_LEN], const void *message, size_t len, const u8 key[NOISE_SYMMETRIC_KEY_LEN])
 | |
| +{
 | |
| +	len = len - sizeof(struct message_macs) + offsetof(struct message_macs, mac1);
 | |
| +	blake2s(mac1, message, key, COOKIE_LEN, len, NOISE_SYMMETRIC_KEY_LEN);
 | |
| +}
 | |
| +
 | |
| +static void compute_mac2(u8 mac2[COOKIE_LEN], const void *message, size_t len, const u8 cookie[COOKIE_LEN])
 | |
| +{
 | |
| +	len = len - sizeof(struct message_macs) + offsetof(struct message_macs, mac2);
 | |
| +	blake2s(mac2, message, cookie, COOKIE_LEN, len, COOKIE_LEN);
 | |
| +}
 | |
| +
 | |
| +static void make_cookie(u8 cookie[COOKIE_LEN], struct sk_buff *skb, struct cookie_checker *checker)
 | |
| +{
 | |
| +	struct blake2s_state state;
 | |
| +
 | |
| +	if (!time_is_after_jiffies64(checker->secret_birthdate + COOKIE_SECRET_MAX_AGE)) {
 | |
| +		down_write(&checker->secret_lock);
 | |
| +		checker->secret_birthdate = get_jiffies_64();
 | |
| +		get_random_bytes(checker->secret, NOISE_HASH_LEN);
 | |
| +		up_write(&checker->secret_lock);
 | |
| +	}
 | |
| +
 | |
| +	down_read(&checker->secret_lock);
 | |
| +
 | |
| +	blake2s_init_key(&state, COOKIE_LEN, checker->secret, NOISE_HASH_LEN);
 | |
| +	if (skb->protocol == htons(ETH_P_IP))
 | |
| +		blake2s_update(&state, (u8 *)&ip_hdr(skb)->saddr, sizeof(struct in_addr));
 | |
| +	else if (skb->protocol == htons(ETH_P_IPV6))
 | |
| +		blake2s_update(&state, (u8 *)&ipv6_hdr(skb)->saddr, sizeof(struct in6_addr));
 | |
| +	blake2s_update(&state, (u8 *)&udp_hdr(skb)->source, sizeof(__be16));
 | |
| +	blake2s_final(&state, cookie, COOKIE_LEN);
 | |
| +
 | |
| +	up_read(&checker->secret_lock);
 | |
| +}
 | |
| +
 | |
| +enum cookie_mac_state cookie_validate_packet(struct cookie_checker *checker, struct sk_buff *skb, bool check_cookie)
 | |
| +{
 | |
| +	u8 computed_mac[COOKIE_LEN];
 | |
| +	u8 cookie[COOKIE_LEN];
 | |
| +	enum cookie_mac_state ret;
 | |
| +	struct message_macs *macs = (struct message_macs *)(skb->data + skb->len - sizeof(struct message_macs));
 | |
| +
 | |
| +	ret = INVALID_MAC;
 | |
| +	compute_mac1(computed_mac, skb->data, skb->len, checker->message_mac1_key);
 | |
| +	if (crypto_memneq(computed_mac, macs->mac1, COOKIE_LEN))
 | |
| +		goto out;
 | |
| +
 | |
| +	ret = VALID_MAC_BUT_NO_COOKIE;
 | |
| +
 | |
| +	if (!check_cookie)
 | |
| +		goto out;
 | |
| +
 | |
| +	make_cookie(cookie, skb, checker);
 | |
| +
 | |
| +	compute_mac2(computed_mac, skb->data, skb->len, cookie);
 | |
| +	if (crypto_memneq(computed_mac, macs->mac2, COOKIE_LEN))
 | |
| +		goto out;
 | |
| +
 | |
| +	ret = VALID_MAC_WITH_COOKIE_BUT_RATELIMITED;
 | |
| +	if (!ratelimiter_allow(skb, dev_net(netdev_pub(checker->device))))
 | |
| +		goto out;
 | |
| +
 | |
| +	ret = VALID_MAC_WITH_COOKIE;
 | |
| +
 | |
| +out:
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +void cookie_add_mac_to_packet(void *message, size_t len, struct wireguard_peer *peer)
 | |
| +{
 | |
| +	struct message_macs *macs = (struct message_macs *)((u8 *)message + len - sizeof(struct message_macs));
 | |
| +
 | |
| +	down_write(&peer->latest_cookie.lock);
 | |
| +	compute_mac1(macs->mac1, message, len, peer->latest_cookie.message_mac1_key);
 | |
| +	memcpy(peer->latest_cookie.last_mac1_sent, macs->mac1, COOKIE_LEN);
 | |
| +	peer->latest_cookie.have_sent_mac1 = true;
 | |
| +	up_write(&peer->latest_cookie.lock);
 | |
| +
 | |
| +	down_read(&peer->latest_cookie.lock);
 | |
| +	if (peer->latest_cookie.is_valid && time_is_after_jiffies64(peer->latest_cookie.birthdate + COOKIE_SECRET_MAX_AGE - COOKIE_SECRET_LATENCY))
 | |
| +		compute_mac2(macs->mac2, message, len, peer->latest_cookie.cookie);
 | |
| +	else
 | |
| +		memset(macs->mac2, 0, COOKIE_LEN);
 | |
| +	up_read(&peer->latest_cookie.lock);
 | |
| +}
 | |
| +
 | |
| +void cookie_message_create(struct message_handshake_cookie *dst, struct sk_buff *skb, __le32 index, struct cookie_checker *checker)
 | |
| +{
 | |
| +	struct message_macs *macs = (struct message_macs *)((u8 *)skb->data + skb->len - sizeof(struct message_macs));
 | |
| +	u8 cookie[COOKIE_LEN];
 | |
| +
 | |
| +	dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE);
 | |
| +	dst->receiver_index = index;
 | |
| +	get_random_bytes_wait(dst->nonce, COOKIE_NONCE_LEN);
 | |
| +
 | |
| +	make_cookie(cookie, skb, checker);
 | |
| +	xchacha20poly1305_encrypt(dst->encrypted_cookie, cookie, COOKIE_LEN, macs->mac1, COOKIE_LEN, dst->nonce, checker->cookie_encryption_key);
 | |
| +}
 | |
| +
 | |
| +void cookie_message_consume(struct message_handshake_cookie *src, struct wireguard_device *wg)
 | |
| +{
 | |
| +	u8 cookie[COOKIE_LEN];
 | |
| +	struct index_hashtable_entry *entry;
 | |
| +	bool ret;
 | |
| +
 | |
| +	entry = index_hashtable_lookup(&wg->index_hashtable, INDEX_HASHTABLE_HANDSHAKE | INDEX_HASHTABLE_KEYPAIR, src->receiver_index);
 | |
| +	if (unlikely(!entry))
 | |
| +		return;
 | |
| +
 | |
| +	down_read(&entry->peer->latest_cookie.lock);
 | |
| +	if (unlikely(!entry->peer->latest_cookie.have_sent_mac1)) {
 | |
| +		up_read(&entry->peer->latest_cookie.lock);
 | |
| +		goto out;
 | |
| +	}
 | |
| +	ret = xchacha20poly1305_decrypt(cookie, src->encrypted_cookie, sizeof(src->encrypted_cookie), entry->peer->latest_cookie.last_mac1_sent, COOKIE_LEN, src->nonce, entry->peer->latest_cookie.cookie_decryption_key);
 | |
| +	up_read(&entry->peer->latest_cookie.lock);
 | |
| +
 | |
| +	if (ret) {
 | |
| +		down_write(&entry->peer->latest_cookie.lock);
 | |
| +		memcpy(entry->peer->latest_cookie.cookie, cookie, COOKIE_LEN);
 | |
| +		entry->peer->latest_cookie.birthdate = get_jiffies_64();
 | |
| +		entry->peer->latest_cookie.is_valid = true;
 | |
| +		entry->peer->latest_cookie.have_sent_mac1 = false;
 | |
| +		up_write(&entry->peer->latest_cookie.lock);
 | |
| +	} else
 | |
| +		net_dbg_ratelimited("%s: Could not decrypt invalid cookie response\n", netdev_pub(wg)->name);
 | |
| +
 | |
| +out:
 | |
| +	peer_put(entry->peer);
 | |
| +}
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/data.c	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,433 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#include "noise.h"
 | |
| +#include "device.h"
 | |
| +#include "peer.h"
 | |
| +#include "messages.h"
 | |
| +#include "packets.h"
 | |
| +#include "hashtables.h"
 | |
| +
 | |
| +#include <linux/rcupdate.h>
 | |
| +#include <linux/slab.h>
 | |
| +#include <linux/bitmap.h>
 | |
| +#include <linux/scatterlist.h>
 | |
| +#include <net/ip_tunnels.h>
 | |
| +#include <net/xfrm.h>
 | |
| +#include <crypto/algapi.h>
 | |
| +
 | |
| +struct encryption_ctx {
 | |
| +	struct padata_priv padata;
 | |
| +	struct sk_buff_head queue;
 | |
| +	struct wireguard_peer *peer;
 | |
| +	struct noise_keypair *keypair;
 | |
| +};
 | |
| +
 | |
| +struct decryption_ctx {
 | |
| +	struct padata_priv padata;
 | |
| +	struct endpoint endpoint;
 | |
| +	struct sk_buff *skb;
 | |
| +	struct noise_keypair *keypair;
 | |
| +};
 | |
| +
 | |
| +#ifdef CONFIG_WIREGUARD_PARALLEL
 | |
| +static struct kmem_cache *encryption_ctx_cache __read_mostly;
 | |
| +static struct kmem_cache *decryption_ctx_cache __read_mostly;
 | |
| +
 | |
| +int __init packet_init_data_caches(void)
 | |
| +{
 | |
| +	encryption_ctx_cache = kmem_cache_create("wireguard_encryption_ctx", sizeof(struct encryption_ctx), 0, 0, NULL);
 | |
| +	if (!encryption_ctx_cache)
 | |
| +		return -ENOMEM;
 | |
| +	decryption_ctx_cache = kmem_cache_create("wireguard_decryption_ctx", sizeof(struct decryption_ctx), 0, 0, NULL);
 | |
| +	if (!decryption_ctx_cache) {
 | |
| +		kmem_cache_destroy(encryption_ctx_cache);
 | |
| +		return -ENOMEM;
 | |
| +	}
 | |
| +	return 0;
 | |
| +}
 | |
| +
 | |
| +void packet_deinit_data_caches(void)
 | |
| +{
 | |
| +	kmem_cache_destroy(encryption_ctx_cache);
 | |
| +	kmem_cache_destroy(decryption_ctx_cache);
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
| +/* This is RFC6479, a replay detection bitmap algorithm that avoids bitshifts */
 | |
| +static inline bool counter_validate(union noise_counter *counter, u64 their_counter)
 | |
| +{
 | |
| +	bool ret = false;
 | |
| +	unsigned long index, index_current, top, i;
 | |
| +	spin_lock_bh(&counter->receive.lock);
 | |
| +
 | |
| +	if (unlikely(counter->receive.counter >= REJECT_AFTER_MESSAGES + 1 || their_counter >= REJECT_AFTER_MESSAGES))
 | |
| +		goto out;
 | |
| +
 | |
| +	++their_counter;
 | |
| +
 | |
| +	if (unlikely((COUNTER_WINDOW_SIZE + their_counter) < counter->receive.counter))
 | |
| +		goto out;
 | |
| +
 | |
| +	index = their_counter >> ilog2(BITS_PER_LONG);
 | |
| +
 | |
| +	if (likely(their_counter > counter->receive.counter)) {
 | |
| +		index_current = counter->receive.counter >> ilog2(BITS_PER_LONG);
 | |
| +		top = min_t(unsigned long, index - index_current, COUNTER_BITS_TOTAL / BITS_PER_LONG);
 | |
| +		for (i = 1; i <= top; ++i)
 | |
| +			counter->receive.backtrack[(i + index_current) & ((COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1)] = 0;
 | |
| +		counter->receive.counter = their_counter;
 | |
| +	}
 | |
| +
 | |
| +	index &= (COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1;
 | |
| +	ret = !test_and_set_bit(their_counter & (BITS_PER_LONG - 1), &counter->receive.backtrack[index]);
 | |
| +
 | |
| +out:
 | |
| +	spin_unlock_bh(&counter->receive.lock);
 | |
| +	return ret;
 | |
| +}
 | |
| +#include "selftest/counter.h"
 | |
| +
 | |
| +static inline unsigned int skb_padding(struct sk_buff *skb)
 | |
| +{
 | |
| +	/* We do this modulo business with the MTU, just in case the networking layer
 | |
| +	 * gives us a packet that's bigger than the MTU. Now that we support GSO, this
 | |
| +	 * shouldn't be a real problem, and this can likely be removed. But, caution! */
 | |
| +	unsigned int last_unit = skb->len % skb->dev->mtu;
 | |
| +	unsigned int padded_size = (last_unit + MESSAGE_PADDING_MULTIPLE - 1) & ~(MESSAGE_PADDING_MULTIPLE - 1);
 | |
| +	if (padded_size > skb->dev->mtu)
 | |
| +		padded_size = skb->dev->mtu;
 | |
| +	return padded_size - last_unit;
 | |
| +}
 | |
| +
 | |
| +static inline void skb_reset(struct sk_buff *skb)
 | |
| +{
 | |
| +	skb_scrub_packet(skb, false);
 | |
| +	memset(&skb->headers_start, 0, offsetof(struct sk_buff, headers_end) - offsetof(struct sk_buff, headers_start));
 | |
| +	skb->queue_mapping = 0;
 | |
| +	skb->nohdr = 0;
 | |
| +	skb->peeked = 0;
 | |
| +	skb->mac_len = 0;
 | |
| +	skb->dev = NULL;
 | |
| +#ifdef CONFIG_NET_SCHED
 | |
| +	skb->tc_index = 0;
 | |
| +	skb_reset_tc(skb);
 | |
| +#endif
 | |
| +	skb->hdr_len = skb_headroom(skb);
 | |
| +	skb_reset_mac_header(skb);
 | |
| +	skb_reset_network_header(skb);
 | |
| +	skb_probe_transport_header(skb, 0);
 | |
| +	skb_reset_inner_headers(skb);
 | |
| +}
 | |
| +
 | |
| +static inline bool skb_encrypt(struct sk_buff *skb, struct noise_keypair *keypair, bool have_simd)
 | |
| +{
 | |
| +	struct scatterlist sg[MAX_SKB_FRAGS * 2 + 1];
 | |
| +	struct message_data *header;
 | |
| +	unsigned int padding_len, plaintext_len, trailer_len;
 | |
| +	int num_frags;
 | |
| +	struct sk_buff *trailer;
 | |
| +
 | |
| +	/* Store the ds bit in the cb */
 | |
| +	PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0 /* No outer TOS: no leak. TODO: should we use flowi->tos as outer? */, ip_hdr(skb), skb);
 | |
| +
 | |
| +	/* Calculate lengths */
 | |
| +	padding_len = skb_padding(skb);
 | |
| +	trailer_len = padding_len + noise_encrypted_len(0);
 | |
| +	plaintext_len = skb->len + padding_len;
 | |
| +
 | |
| +	/* Expand data section to have room for padding and auth tag */
 | |
| +	num_frags = skb_cow_data(skb, trailer_len, &trailer);
 | |
| +	if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg)))
 | |
| +		return false;
 | |
| +
 | |
| +	/* Set the padding to zeros, and make sure it and the auth tag are part of the skb */
 | |
| +	memset(skb_tail_pointer(trailer), 0, padding_len);
 | |
| +
 | |
| +	/* Expand head section to have room for our header and the network stack's headers. */
 | |
| +	if (unlikely(skb_cow_head(skb, DATA_PACKET_HEAD_ROOM) < 0))
 | |
| +		return false;
 | |
| +
 | |
| +	/* We have to remember to add the checksum to the innerpacket, in case the receiver forwards it. */
 | |
| +	if (likely(!skb_checksum_setup(skb, true)))
 | |
| +		skb_checksum_help(skb);
 | |
| +
 | |
| +	/* Only after checksumming can we safely add on the padding at the end and the header. */
 | |
| +	header = (struct message_data *)skb_push(skb, sizeof(struct message_data));
 | |
| +	header->header.type = cpu_to_le32(MESSAGE_DATA);
 | |
| +	header->key_idx = keypair->remote_index;
 | |
| +	header->counter = cpu_to_le64(PACKET_CB(skb)->nonce);
 | |
| +	pskb_put(skb, trailer, trailer_len);
 | |
| +
 | |
| +	/* Now we can encrypt the scattergather segments */
 | |
| +	sg_init_table(sg, num_frags);
 | |
| +	if (skb_to_sgvec(skb, sg, sizeof(struct message_data), noise_encrypted_len(plaintext_len)) <= 0)
 | |
| +		return false;
 | |
| +	return chacha20poly1305_encrypt_sg(sg, sg, plaintext_len, NULL, 0, PACKET_CB(skb)->nonce, keypair->sending.key, have_simd);
 | |
| +}
 | |
| +
 | |
| +static inline bool skb_decrypt(struct sk_buff *skb, struct noise_symmetric_key *key)
 | |
| +{
 | |
| +	struct scatterlist sg[MAX_SKB_FRAGS * 2 + 1];
 | |
| +	struct sk_buff *trailer;
 | |
| +	int num_frags;
 | |
| +
 | |
| +	if (unlikely(!key))
 | |
| +		return false;
 | |
| +
 | |
| +	if (unlikely(!key->is_valid || time_is_before_eq_jiffies64(key->birthdate + REJECT_AFTER_TIME) || key->counter.receive.counter >= REJECT_AFTER_MESSAGES)) {
 | |
| +		key->is_valid = false;
 | |
| +		return false;
 | |
| +	}
 | |
| +
 | |
| +	PACKET_CB(skb)->nonce = le64_to_cpu(((struct message_data *)skb->data)->counter);
 | |
| +	skb_pull(skb, sizeof(struct message_data));
 | |
| +	num_frags = skb_cow_data(skb, 0, &trailer);
 | |
| +	if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg)))
 | |
| +		return false;
 | |
| +
 | |
| +	sg_init_table(sg, num_frags);
 | |
| +	if (skb_to_sgvec(skb, sg, 0, skb->len) <= 0)
 | |
| +		return false;
 | |
| +
 | |
| +	if (!chacha20poly1305_decrypt_sg(sg, sg, skb->len, NULL, 0, PACKET_CB(skb)->nonce, key->key))
 | |
| +		return false;
 | |
| +
 | |
| +	return !pskb_trim(skb, skb->len - noise_encrypted_len(0));
 | |
| +}
 | |
| +
 | |
| +static inline bool get_encryption_nonce(u64 *nonce, struct noise_symmetric_key *key)
 | |
| +{
 | |
| +	if (unlikely(!key))
 | |
| +		return false;
 | |
| +
 | |
| +	if (unlikely(!key->is_valid || time_is_before_eq_jiffies64(key->birthdate + REJECT_AFTER_TIME))) {
 | |
| +		key->is_valid = false;
 | |
| +		return false;
 | |
| +	}
 | |
| +
 | |
| +	*nonce = atomic64_inc_return(&key->counter.counter) - 1;
 | |
| +	if (*nonce >= REJECT_AFTER_MESSAGES) {
 | |
| +		key->is_valid = false;
 | |
| +		return false;
 | |
| +	}
 | |
| +
 | |
| +	return true;
 | |
| +}
 | |
| +
 | |
| +static inline void queue_encrypt_reset(struct sk_buff_head *queue, struct noise_keypair *keypair)
 | |
| +{
 | |
| +	struct sk_buff *skb, *tmp;
 | |
| +	bool have_simd = chacha20poly1305_init_simd();
 | |
| +	skb_queue_walk_safe (queue, skb, tmp) {
 | |
| +		if (unlikely(!skb_encrypt(skb, keypair, have_simd))) {
 | |
| +			__skb_unlink(skb, queue);
 | |
| +			kfree_skb(skb);
 | |
| +			continue;
 | |
| +		}
 | |
| +		skb_reset(skb);
 | |
| +	}
 | |
| +	chacha20poly1305_deinit_simd(have_simd);
 | |
| +	noise_keypair_put(keypair);
 | |
| +}
 | |
| +
 | |
| +#ifdef CONFIG_WIREGUARD_PARALLEL
 | |
| +static void begin_parallel_encryption(struct padata_priv *padata)
 | |
| +{
 | |
| +	struct encryption_ctx *ctx = container_of(padata, struct encryption_ctx, padata);
 | |
| +#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && defined(CONFIG_ARM)
 | |
| +	local_bh_enable();
 | |
| +#endif
 | |
| +	queue_encrypt_reset(&ctx->queue, ctx->keypair);
 | |
| +#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && defined(CONFIG_ARM)
 | |
| +	local_bh_disable();
 | |
| +#endif
 | |
| +	padata_do_serial(padata);
 | |
| +}
 | |
| +
 | |
| +static void finish_parallel_encryption(struct padata_priv *padata)
 | |
| +{
 | |
| +	struct encryption_ctx *ctx = container_of(padata, struct encryption_ctx, padata);
 | |
| +	packet_create_data_done(&ctx->queue, ctx->peer);
 | |
| +	atomic_dec(&ctx->peer->parallel_encryption_inflight);
 | |
| +	peer_put(ctx->peer);
 | |
| +	kmem_cache_free(encryption_ctx_cache, ctx);
 | |
| +}
 | |
| +
 | |
| +static inline unsigned int choose_cpu(__le32 key)
 | |
| +{
 | |
| +	unsigned int cpu_index, cpu, cb_cpu;
 | |
| +
 | |
| +	/* This ensures that packets encrypted to the same key are sent in-order. */
 | |
| +	cpu_index = ((__force unsigned int)key) % cpumask_weight(cpu_online_mask);
 | |
| +	cb_cpu = cpumask_first(cpu_online_mask);
 | |
| +	for (cpu = 0; cpu < cpu_index; ++cpu)
 | |
| +		cb_cpu = cpumask_next(cb_cpu, cpu_online_mask);
 | |
| +
 | |
| +	return cb_cpu;
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
| +int packet_create_data(struct sk_buff_head *queue, struct wireguard_peer *peer)
 | |
| +{
 | |
| +	int ret = -ENOKEY;
 | |
| +	struct noise_keypair *keypair;
 | |
| +	struct sk_buff *skb;
 | |
| +
 | |
| +	rcu_read_lock_bh();
 | |
| +	keypair = noise_keypair_get(rcu_dereference_bh(peer->keypairs.current_keypair));
 | |
| +	if (unlikely(!keypair))
 | |
| +		goto err_rcu;
 | |
| +	rcu_read_unlock_bh();
 | |
| +
 | |
| +	skb_queue_walk (queue, skb) {
 | |
| +		if (unlikely(!get_encryption_nonce(&PACKET_CB(skb)->nonce, &keypair->sending)))
 | |
| +			goto err;
 | |
| +
 | |
| +		/* After the first time through the loop, if we've suceeded with a legitimate nonce,
 | |
| +		 * then we don't want a -ENOKEY error if subsequent nonces fail. Rather, if this
 | |
| +		 * condition arises, we simply want error out hard, and drop the entire queue. This
 | |
| +		 * is partially lazy programming and TODO: this could be made to only requeue the
 | |
| +		 * ones that had no nonce. But I'm not sure it's worth the added complexity, given
 | |
| +		 * how rarely that condition should arise. */
 | |
| +		ret = -EPIPE;
 | |
| +	}
 | |
| +
 | |
| +#ifdef CONFIG_WIREGUARD_PARALLEL
 | |
| +	if ((skb_queue_len(queue) > 1 || queue->next->len > 256 || atomic_read(&peer->parallel_encryption_inflight) > 0) && cpumask_weight(cpu_online_mask) > 1) {
 | |
| +		struct encryption_ctx *ctx = kmem_cache_alloc(encryption_ctx_cache, GFP_ATOMIC);
 | |
| +		if (!ctx)
 | |
| +			goto serial_encrypt;
 | |
| +		skb_queue_head_init(&ctx->queue);
 | |
| +		skb_queue_splice_init(queue, &ctx->queue);
 | |
| +		memset(&ctx->padata, 0, sizeof(ctx->padata));
 | |
| +		ctx->padata.parallel = begin_parallel_encryption;
 | |
| +		ctx->padata.serial = finish_parallel_encryption;
 | |
| +		ctx->keypair = keypair;
 | |
| +		ctx->peer = peer_rcu_get(peer);
 | |
| +		ret = -EBUSY;
 | |
| +		if (unlikely(!ctx->peer))
 | |
| +			goto err_parallel;
 | |
| +		atomic_inc(&peer->parallel_encryption_inflight);
 | |
| +		if (unlikely(padata_do_parallel(peer->device->encrypt_pd, &ctx->padata, choose_cpu(keypair->remote_index)))) {
 | |
| +			atomic_dec(&peer->parallel_encryption_inflight);
 | |
| +			peer_put(ctx->peer);
 | |
| +err_parallel:
 | |
| +			skb_queue_splice(&ctx->queue, queue);
 | |
| +			kmem_cache_free(encryption_ctx_cache, ctx);
 | |
| +			goto err;
 | |
| +		}
 | |
| +	} else
 | |
| +serial_encrypt:
 | |
| +#endif
 | |
| +	{
 | |
| +		queue_encrypt_reset(queue, keypair);
 | |
| +		packet_create_data_done(queue, peer);
 | |
| +	}
 | |
| +	return 0;
 | |
| +
 | |
| +err:
 | |
| +	noise_keypair_put(keypair);
 | |
| +	return ret;
 | |
| +err_rcu:
 | |
| +	rcu_read_unlock_bh();
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +static void begin_decrypt_packet(struct decryption_ctx *ctx)
 | |
| +{
 | |
| +	if (unlikely(socket_endpoint_from_skb(&ctx->endpoint, ctx->skb) < 0 || !skb_decrypt(ctx->skb, &ctx->keypair->receiving))) {
 | |
| +		peer_put(ctx->keypair->entry.peer);
 | |
| +		noise_keypair_put(ctx->keypair);
 | |
| +		dev_kfree_skb(ctx->skb);
 | |
| +		ctx->skb = NULL;
 | |
| +	}
 | |
| +}
 | |
| +
 | |
| +static void finish_decrypt_packet(struct decryption_ctx *ctx)
 | |
| +{
 | |
| +	bool used_new_key;
 | |
| +
 | |
| +	if (!ctx->skb)
 | |
| +		return;
 | |
| +
 | |
| +	if (unlikely(!counter_validate(&ctx->keypair->receiving.counter, PACKET_CB(ctx->skb)->nonce))) {
 | |
| +		net_dbg_ratelimited("%s: Packet has invalid nonce %Lu (max %Lu)\n", netdev_pub(ctx->keypair->entry.peer->device)->name, PACKET_CB(ctx->skb)->nonce, ctx->keypair->receiving.counter.receive.counter);
 | |
| +		peer_put(ctx->keypair->entry.peer);
 | |
| +		noise_keypair_put(ctx->keypair);
 | |
| +		dev_kfree_skb(ctx->skb);
 | |
| +		return;
 | |
| +	}
 | |
| +
 | |
| +	used_new_key = noise_received_with_keypair(&ctx->keypair->entry.peer->keypairs, ctx->keypair);
 | |
| +	skb_reset(ctx->skb);
 | |
| +	packet_consume_data_done(ctx->skb, ctx->keypair->entry.peer, &ctx->endpoint, used_new_key);
 | |
| +	noise_keypair_put(ctx->keypair);
 | |
| +}
 | |
| +
 | |
| +#ifdef CONFIG_WIREGUARD_PARALLEL
 | |
| +static void begin_parallel_decryption(struct padata_priv *padata)
 | |
| +{
 | |
| +	struct decryption_ctx *ctx = container_of(padata, struct decryption_ctx, padata);
 | |
| +#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && defined(CONFIG_ARM)
 | |
| +	local_bh_enable();
 | |
| +#endif
 | |
| +	begin_decrypt_packet(ctx);
 | |
| +#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && defined(CONFIG_ARM)
 | |
| +	local_bh_disable();
 | |
| +#endif
 | |
| +	padata_do_serial(padata);
 | |
| +}
 | |
| +
 | |
| +static void finish_parallel_decryption(struct padata_priv *padata)
 | |
| +{
 | |
| +	struct decryption_ctx *ctx = container_of(padata, struct decryption_ctx, padata);
 | |
| +	finish_decrypt_packet(ctx);
 | |
| +	kmem_cache_free(decryption_ctx_cache, ctx);
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
| +void packet_consume_data(struct sk_buff *skb, struct wireguard_device *wg)
 | |
| +{
 | |
| +	struct noise_keypair *keypair;
 | |
| +	__le32 idx = ((struct message_data *)skb->data)->key_idx;
 | |
| +
 | |
| +	rcu_read_lock_bh();
 | |
| +	keypair = noise_keypair_get((struct noise_keypair *)index_hashtable_lookup(&wg->index_hashtable, INDEX_HASHTABLE_KEYPAIR, idx));
 | |
| +	rcu_read_unlock_bh();
 | |
| +	if (unlikely(!keypair))
 | |
| +		goto err;
 | |
| +
 | |
| +#ifdef CONFIG_WIREGUARD_PARALLEL
 | |
| +	if (cpumask_weight(cpu_online_mask) > 1) {
 | |
| +		struct decryption_ctx *ctx = kmem_cache_alloc(decryption_ctx_cache, GFP_ATOMIC);
 | |
| +		if (unlikely(!ctx))
 | |
| +			goto err_peer;
 | |
| +		ctx->skb = skb;
 | |
| +		ctx->keypair = keypair;
 | |
| +		memset(&ctx->padata, 0, sizeof(ctx->padata));
 | |
| +		ctx->padata.parallel = begin_parallel_decryption;
 | |
| +		ctx->padata.serial = finish_parallel_decryption;
 | |
| +		if (unlikely(padata_do_parallel(wg->decrypt_pd, &ctx->padata, choose_cpu(idx)))) {
 | |
| +			kmem_cache_free(decryption_ctx_cache, ctx);
 | |
| +			goto err_peer;
 | |
| +		}
 | |
| +	} else
 | |
| +#endif
 | |
| +	{
 | |
| +		struct decryption_ctx ctx = {
 | |
| +			.skb = skb,
 | |
| +			.keypair = keypair
 | |
| +		};
 | |
| +		begin_decrypt_packet(&ctx);
 | |
| +		finish_decrypt_packet(&ctx);
 | |
| +	}
 | |
| +	return;
 | |
| +
 | |
| +#ifdef CONFIG_WIREGUARD_PARALLEL
 | |
| +err_peer:
 | |
| +	peer_put(keypair->entry.peer);
 | |
| +	noise_keypair_put(keypair);
 | |
| +#endif
 | |
| +err:
 | |
| +	dev_kfree_skb(skb);
 | |
| +}
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/device.c	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,392 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#include "packets.h"
 | |
| +#include "socket.h"
 | |
| +#include "timers.h"
 | |
| +#include "device.h"
 | |
| +#include "config.h"
 | |
| +#include "ratelimiter.h"
 | |
| +#include "peer.h"
 | |
| +#include "uapi.h"
 | |
| +#include "messages.h"
 | |
| +
 | |
| +#include <linux/module.h>
 | |
| +#include <linux/rtnetlink.h>
 | |
| +#include <linux/inet.h>
 | |
| +#include <linux/netdevice.h>
 | |
| +#include <linux/inetdevice.h>
 | |
| +#include <linux/if_arp.h>
 | |
| +#include <linux/icmp.h>
 | |
| +#include <linux/suspend.h>
 | |
| +#include <net/icmp.h>
 | |
| +#include <net/rtnetlink.h>
 | |
| +#include <net/ip_tunnels.h>
 | |
| +#include <net/addrconf.h>
 | |
| +
 | |
| +static LIST_HEAD(device_list);
 | |
| +
 | |
| +static int open(struct net_device *dev)
 | |
| +{
 | |
| +	int ret;
 | |
| +	struct wireguard_peer *peer, *temp;
 | |
| +	struct wireguard_device *wg = netdev_priv(dev);
 | |
| +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
 | |
| +	struct inet6_dev *dev_v6 = __in6_dev_get(dev);
 | |
| +#endif
 | |
| +	struct in_device *dev_v4 = __in_dev_get_rtnl(dev);
 | |
| +
 | |
| +	if (dev_v4) {
 | |
| +		/* TODO: when we merge to mainline, put this check near the ip_rt_send_redirect
 | |
| +		 * call of ip_forward in net/ipv4/ip_forward.c, similar to the current secpath
 | |
| +		 * check, rather than turning it off like this. This is just a stop gap solution
 | |
| +		 * while we're an out of tree module. */
 | |
| +		IN_DEV_CONF_SET(dev_v4, SEND_REDIRECTS, false);
 | |
| +		IPV4_DEVCONF_ALL(dev_net(dev), SEND_REDIRECTS) = false;
 | |
| +	}
 | |
| +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
 | |
| +	if (dev_v6)
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
 | |
| +		dev_v6->addr_gen_mode = IN6_ADDR_GEN_MODE_NONE;
 | |
| +#else
 | |
| +		dev_v6->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_NONE;
 | |
| +#endif
 | |
| +#endif
 | |
| +
 | |
| +	ret = socket_init(wg);
 | |
| +	if (ret < 0)
 | |
| +		return ret;
 | |
| +	peer_for_each (wg, peer, temp, true) {
 | |
| +		timers_init_peer(peer);
 | |
| +		packet_send_queue(peer);
 | |
| +		if (peer->persistent_keepalive_interval)
 | |
| +			packet_send_keepalive(peer);
 | |
| +	}
 | |
| +	return 0;
 | |
| +}
 | |
| +
 | |
| +#ifdef CONFIG_PM_SLEEP
 | |
| +static int suspending_clear_noise_peers(struct notifier_block *nb, unsigned long action, void *data)
 | |
| +{
 | |
| +	struct wireguard_device *wg;
 | |
| +	struct wireguard_peer *peer, *temp;
 | |
| +
 | |
| +	if (action != PM_HIBERNATION_PREPARE && action != PM_SUSPEND_PREPARE)
 | |
| +		return 0;
 | |
| +
 | |
| +	rtnl_lock();
 | |
| +	list_for_each_entry (wg, &device_list, device_list) {
 | |
| +		peer_for_each (wg, peer, temp, true) {
 | |
| +			noise_handshake_clear(&peer->handshake);
 | |
| +			noise_keypairs_clear(&peer->keypairs);
 | |
| +			if (peer->timers_enabled)
 | |
| +				del_timer(&peer->timer_kill_ephemerals);
 | |
| +		}
 | |
| +	}
 | |
| +	rtnl_unlock();
 | |
| +	rcu_barrier_bh();
 | |
| +
 | |
| +	return 0;
 | |
| +}
 | |
| +static struct notifier_block clear_peers_on_suspend = { .notifier_call = suspending_clear_noise_peers };
 | |
| +#endif
 | |
| +
 | |
| +static int stop(struct net_device *dev)
 | |
| +{
 | |
| +	struct wireguard_device *wg = netdev_priv(dev);
 | |
| +	struct wireguard_peer *peer, *temp;
 | |
| +	peer_for_each (wg, peer, temp, true) {
 | |
| +		timers_uninit_peer(peer);
 | |
| +		noise_handshake_clear(&peer->handshake);
 | |
| +		noise_keypairs_clear(&peer->keypairs);
 | |
| +		if (peer->timers_enabled)
 | |
| +			del_timer(&peer->timer_kill_ephemerals);
 | |
| +	}
 | |
| +	skb_queue_purge(&wg->incoming_handshakes);
 | |
| +	socket_uninit(wg);
 | |
| +	return 0;
 | |
| +}
 | |
| +
 | |
| +static netdev_tx_t xmit(struct sk_buff *skb, struct net_device *dev)
 | |
| +{
 | |
| +	struct wireguard_device *wg = netdev_priv(dev);
 | |
| +	struct wireguard_peer *peer;
 | |
| +	struct sk_buff *next;
 | |
| +	int ret;
 | |
| +
 | |
| +	if (unlikely(dev_recursion_level() > 4)) {
 | |
| +		ret = -ELOOP;
 | |
| +		net_dbg_ratelimited("%s: Routing loop detected\n", dev->name);
 | |
| +		goto err;
 | |
| +	}
 | |
| +
 | |
| +	if (unlikely(skb_examine_untrusted_ip_hdr(skb) != skb->protocol)) {
 | |
| +		ret = -EPROTONOSUPPORT;
 | |
| +		net_dbg_ratelimited("%s: Invalid IP packet\n", dev->name);
 | |
| +		goto err;
 | |
| +	}
 | |
| +
 | |
| +	peer = routing_table_lookup_dst(&wg->peer_routing_table, skb);
 | |
| +	if (unlikely(!peer)) {
 | |
| +		ret = -ENOKEY;
 | |
| +		net_dbg_skb_ratelimited("%s: No peer is configured for %pISc\n", dev->name, skb);
 | |
| +		goto err;
 | |
| +	}
 | |
| +
 | |
| +	read_lock_bh(&peer->endpoint_lock);
 | |
| +	ret = peer->endpoint.addr.sa_family != AF_INET && peer->endpoint.addr.sa_family != AF_INET6;
 | |
| +	read_unlock_bh(&peer->endpoint_lock);
 | |
| +	if (unlikely(ret)) {
 | |
| +		ret = -EDESTADDRREQ;
 | |
| +		net_dbg_ratelimited("%s: No valid endpoint has been configured or discovered for peer %Lu\n", dev->name, peer->internal_id);
 | |
| +		goto err_peer;
 | |
| +	}
 | |
| +
 | |
| +	/* If the queue is getting too big, we start removing the oldest packets until it's small again.
 | |
| +	 * We do this before adding the new packet, so we don't remove GSO segments that are in excess. */
 | |
| +	while (skb_queue_len(&peer->tx_packet_queue) > MAX_QUEUED_OUTGOING_PACKETS)
 | |
| +		dev_kfree_skb(skb_dequeue(&peer->tx_packet_queue));
 | |
| +
 | |
| +	if (!skb_is_gso(skb))
 | |
| +		skb->next = NULL;
 | |
| +	else {
 | |
| +		struct sk_buff *segs = skb_gso_segment(skb, 0);
 | |
| +		if (unlikely(IS_ERR(segs))) {
 | |
| +			ret = PTR_ERR(segs);
 | |
| +			goto err_peer;
 | |
| +		}
 | |
| +		dev_kfree_skb(skb);
 | |
| +		skb = segs;
 | |
| +	}
 | |
| +	do {
 | |
| +		next = skb->next;
 | |
| +		skb->next = skb->prev = NULL;
 | |
| +
 | |
| +		skb = skb_share_check(skb, GFP_ATOMIC);
 | |
| +		if (unlikely(!skb))
 | |
| +			continue;
 | |
| +
 | |
| +		/* We only need to keep the original dst around for icmp,
 | |
| +		 * so at this point we're in a position to drop it. */
 | |
| +		skb_dst_drop(skb);
 | |
| +
 | |
| +		skb_queue_tail(&peer->tx_packet_queue, skb);
 | |
| +	} while ((skb = next) != NULL);
 | |
| +
 | |
| +	packet_send_queue(peer);
 | |
| +	peer_put(peer);
 | |
| +	return NETDEV_TX_OK;
 | |
| +
 | |
| +err_peer:
 | |
| +	peer_put(peer);
 | |
| +err:
 | |
| +	++dev->stats.tx_errors;
 | |
| +	if (skb->protocol == htons(ETH_P_IP))
 | |
| +		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
 | |
| +	else if (skb->protocol == htons(ETH_P_IPV6))
 | |
| +		icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
 | |
| +	kfree_skb(skb);
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +static int ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 | |
| +{
 | |
| +	struct wireguard_device *wg = netdev_priv(dev);
 | |
| +
 | |
| +	if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN))
 | |
| +		return -EPERM;
 | |
| +
 | |
| +	switch (cmd) {
 | |
| +	case WG_GET_DEVICE:
 | |
| +		return config_get_device(wg, ifr->ifr_ifru.ifru_data);
 | |
| +	case WG_SET_DEVICE:
 | |
| +		return config_set_device(wg, ifr->ifr_ifru.ifru_data);
 | |
| +	}
 | |
| +	return -EINVAL;
 | |
| +}
 | |
| +
 | |
| +static const struct net_device_ops netdev_ops = {
 | |
| +	.ndo_open		= open,
 | |
| +	.ndo_stop		= stop,
 | |
| +	.ndo_start_xmit		= xmit,
 | |
| +	.ndo_get_stats64	= ip_tunnel_get_stats64,
 | |
| +	.ndo_do_ioctl		= ioctl
 | |
| +};
 | |
| +
 | |
| +static void destruct(struct net_device *dev)
 | |
| +{
 | |
| +	struct wireguard_device *wg = netdev_priv(dev);
 | |
| +
 | |
| +	rtnl_lock();
 | |
| +	list_del(&wg->device_list);
 | |
| +	rtnl_unlock();
 | |
| +	mutex_lock(&wg->device_update_lock);
 | |
| +	peer_remove_all(wg);
 | |
| +	wg->incoming_port = 0;
 | |
| +	destroy_workqueue(wg->incoming_handshake_wq);
 | |
| +	destroy_workqueue(wg->peer_wq);
 | |
| +#ifdef CONFIG_WIREGUARD_PARALLEL
 | |
| +	padata_free(wg->encrypt_pd);
 | |
| +	padata_free(wg->decrypt_pd);
 | |
| +	destroy_workqueue(wg->crypt_wq);
 | |
| +#endif
 | |
| +	routing_table_free(&wg->peer_routing_table);
 | |
| +	ratelimiter_uninit();
 | |
| +	memzero_explicit(&wg->static_identity, sizeof(struct noise_static_identity));
 | |
| +	skb_queue_purge(&wg->incoming_handshakes);
 | |
| +	socket_uninit(wg);
 | |
| +	mutex_unlock(&wg->device_update_lock);
 | |
| +	free_percpu(dev->tstats);
 | |
| +	free_percpu(wg->incoming_handshakes_worker);
 | |
| +	put_net(wg->creating_net);
 | |
| +
 | |
| +	pr_debug("%s: Interface deleted\n", dev->name);
 | |
| +	free_netdev(dev);
 | |
| +}
 | |
| +
 | |
| +static void setup(struct net_device *dev)
 | |
| +{
 | |
| +	struct wireguard_device *wg = netdev_priv(dev);
 | |
| +	enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO | NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA };
 | |
| +
 | |
| +	dev->netdev_ops = &netdev_ops;
 | |
| +	dev->hard_header_len = 0;
 | |
| +	dev->addr_len = 0;
 | |
| +	dev->needed_headroom = DATA_PACKET_HEAD_ROOM;
 | |
| +	dev->needed_tailroom = noise_encrypted_len(MESSAGE_PADDING_MULTIPLE);
 | |
| +	dev->type = ARPHRD_NONE;
 | |
| +	dev->flags = IFF_POINTOPOINT | IFF_NOARP;
 | |
| +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 3, 0)
 | |
| +	dev->flags |= IFF_NO_QUEUE;
 | |
| +#else
 | |
| +	dev->tx_queue_len = 0;
 | |
| +#endif
 | |
| +	dev->features |= NETIF_F_LLTX;
 | |
| +	dev->features |= WG_NETDEV_FEATURES;
 | |
| +	dev->hw_features |= WG_NETDEV_FEATURES;
 | |
| +	dev->hw_enc_features |= WG_NETDEV_FEATURES;
 | |
| +	dev->mtu = ETH_DATA_LEN - MESSAGE_MINIMUM_LENGTH - sizeof(struct udphdr) - max(sizeof(struct ipv6hdr), sizeof(struct iphdr));
 | |
| +
 | |
| +	/* We need to keep the dst around in case of icmp replies. */
 | |
| +	netif_keep_dst(dev);
 | |
| +
 | |
| +	memset(wg, 0, sizeof(struct wireguard_device));
 | |
| +}
 | |
| +
 | |
| +static int newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[])
 | |
| +{
 | |
| +	int ret = -ENOMEM, cpu;
 | |
| +	struct wireguard_device *wg = netdev_priv(dev);
 | |
| +
 | |
| +	wg->creating_net = get_net(src_net);
 | |
| +	init_rwsem(&wg->static_identity.lock);
 | |
| +	mutex_init(&wg->socket_update_lock);
 | |
| +	mutex_init(&wg->device_update_lock);
 | |
| +	skb_queue_head_init(&wg->incoming_handshakes);
 | |
| +	pubkey_hashtable_init(&wg->peer_hashtable);
 | |
| +	index_hashtable_init(&wg->index_hashtable);
 | |
| +	routing_table_init(&wg->peer_routing_table);
 | |
| +	cookie_checker_init(&wg->cookie_checker, wg);
 | |
| +	INIT_LIST_HEAD(&wg->peer_list);
 | |
| +
 | |
| +	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
 | |
| +	if (!dev->tstats)
 | |
| +		goto error_1;
 | |
| +
 | |
| +	wg->incoming_handshakes_worker = alloc_percpu(struct handshake_worker);
 | |
| +	if (!wg->incoming_handshakes_worker)
 | |
| +		goto error_2;
 | |
| +	for_each_possible_cpu (cpu) {
 | |
| +		per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->wg = wg;
 | |
| +		INIT_WORK(&per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->work, packet_process_queued_handshake_packets);
 | |
| +	}
 | |
| +	atomic_set(&wg->incoming_handshake_seqnr, 0);
 | |
| +
 | |
| +	wg->incoming_handshake_wq = alloc_workqueue("wg-kex-%s", WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name);
 | |
| +	if (!wg->incoming_handshake_wq)
 | |
| +		goto error_3;
 | |
| +
 | |
| +	wg->peer_wq = alloc_workqueue("wg-kex-%s", WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name);
 | |
| +	if (!wg->peer_wq)
 | |
| +		goto error_4;
 | |
| +
 | |
| +#ifdef CONFIG_WIREGUARD_PARALLEL
 | |
| +	wg->crypt_wq = alloc_workqueue("wg-crypt-%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 2, dev->name);
 | |
| +	if (!wg->crypt_wq)
 | |
| +		goto error_5;
 | |
| +
 | |
| +	wg->encrypt_pd = padata_alloc_possible(wg->crypt_wq);
 | |
| +	if (!wg->encrypt_pd)
 | |
| +		goto error_6;
 | |
| +	padata_start(wg->encrypt_pd);
 | |
| +
 | |
| +	wg->decrypt_pd = padata_alloc_possible(wg->crypt_wq);
 | |
| +	if (!wg->decrypt_pd)
 | |
| +		goto error_7;
 | |
| +	padata_start(wg->decrypt_pd);
 | |
| +#endif
 | |
| +
 | |
| +	ret = ratelimiter_init();
 | |
| +	if (ret < 0)
 | |
| +		goto error_8;
 | |
| +
 | |
| +	ret = register_netdevice(dev);
 | |
| +	if (ret < 0)
 | |
| +		goto error_9;
 | |
| +
 | |
| +	list_add(&wg->device_list, &device_list);
 | |
| +
 | |
| +	/* We wait until the end to assign priv_destructor, so that register_netdevice doesn't
 | |
| +	 * call it for us if it fails. */
 | |
| +	dev->priv_destructor = destruct;
 | |
| +
 | |
| +	pr_debug("%s: Interface created\n", dev->name);
 | |
| +	return ret;
 | |
| +
 | |
| +error_9:
 | |
| +	ratelimiter_uninit();
 | |
| +error_8:
 | |
| +#ifdef CONFIG_WIREGUARD_PARALLEL
 | |
| +	padata_free(wg->decrypt_pd);
 | |
| +error_7:
 | |
| +	padata_free(wg->encrypt_pd);
 | |
| +error_6:
 | |
| +	destroy_workqueue(wg->crypt_wq);
 | |
| +error_5:
 | |
| +#endif
 | |
| +	destroy_workqueue(wg->peer_wq);
 | |
| +error_4:
 | |
| +	destroy_workqueue(wg->incoming_handshake_wq);
 | |
| +error_3:
 | |
| +	free_percpu(wg->incoming_handshakes_worker);
 | |
| +error_2:
 | |
| +	free_percpu(dev->tstats);
 | |
| +error_1:
 | |
| +	put_net(src_net);
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +static struct rtnl_link_ops link_ops __read_mostly = {
 | |
| +	.kind			= KBUILD_MODNAME,
 | |
| +	.priv_size		= sizeof(struct wireguard_device),
 | |
| +	.setup			= setup,
 | |
| +	.newlink		= newlink,
 | |
| +};
 | |
| +
 | |
| +int __init device_init(void)
 | |
| +{
 | |
| +#ifdef CONFIG_PM_SLEEP
 | |
| +	int ret = register_pm_notifier(&clear_peers_on_suspend);
 | |
| +	if (ret)
 | |
| +		return ret;
 | |
| +#endif
 | |
| +	return rtnl_link_register(&link_ops);
 | |
| +}
 | |
| +
 | |
| +void __exit device_uninit(void)
 | |
| +{
 | |
| +	rtnl_link_unregister(&link_ops);
 | |
| +#ifdef CONFIG_PM_SLEEP
 | |
| +	unregister_pm_notifier(&clear_peers_on_suspend);
 | |
| +#endif
 | |
| +	rcu_barrier_bh();
 | |
| +}
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/hashtables.c	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,137 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#include "hashtables.h"
 | |
| +#include "peer.h"
 | |
| +#include "noise.h"
 | |
| +
 | |
| +static inline struct hlist_head *pubkey_bucket(struct pubkey_hashtable *table, const u8 pubkey[NOISE_PUBLIC_KEY_LEN])
 | |
| +{
 | |
| +	/* siphash gives us a secure 64bit number based on a random key. Since the bits are
 | |
| +	 * uniformly distributed, we can then mask off to get the bits we need. */
 | |
| +	return &table->hashtable[siphash(pubkey, NOISE_PUBLIC_KEY_LEN, &table->key) & (HASH_SIZE(table->hashtable) - 1)];
 | |
| +}
 | |
| +
 | |
| +void pubkey_hashtable_init(struct pubkey_hashtable *table)
 | |
| +{
 | |
| +	get_random_bytes(&table->key, sizeof(table->key));
 | |
| +	hash_init(table->hashtable);
 | |
| +	mutex_init(&table->lock);
 | |
| +}
 | |
| +
 | |
| +void pubkey_hashtable_add(struct pubkey_hashtable *table, struct wireguard_peer *peer)
 | |
| +{
 | |
| +	mutex_lock(&table->lock);
 | |
| +	hlist_add_head_rcu(&peer->pubkey_hash, pubkey_bucket(table, peer->handshake.remote_static));
 | |
| +	mutex_unlock(&table->lock);
 | |
| +}
 | |
| +
 | |
| +void pubkey_hashtable_remove(struct pubkey_hashtable *table, struct wireguard_peer *peer)
 | |
| +{
 | |
| +	mutex_lock(&table->lock);
 | |
| +	hlist_del_init_rcu(&peer->pubkey_hash);
 | |
| +	mutex_unlock(&table->lock);
 | |
| +}
 | |
| +
 | |
| +/* Returns a strong reference to a peer */
 | |
| +struct wireguard_peer *pubkey_hashtable_lookup(struct pubkey_hashtable *table, const u8 pubkey[NOISE_PUBLIC_KEY_LEN])
 | |
| +{
 | |
| +	struct wireguard_peer *iter_peer, *peer = NULL;
 | |
| +	rcu_read_lock_bh();
 | |
| +	hlist_for_each_entry_rcu_bh (iter_peer, pubkey_bucket(table, pubkey), pubkey_hash) {
 | |
| +		if (!memcmp(pubkey, iter_peer->handshake.remote_static, NOISE_PUBLIC_KEY_LEN)) {
 | |
| +			peer = iter_peer;
 | |
| +			break;
 | |
| +		}
 | |
| +	}
 | |
| +	peer = peer_get(peer);
 | |
| +	rcu_read_unlock_bh();
 | |
| +	return peer;
 | |
| +}
 | |
| +
 | |
| +static inline struct hlist_head *index_bucket(struct index_hashtable *table, const __le32 index)
 | |
| +{
 | |
| +	/* Since the indices are random and thus all bits are uniformly distributed,
 | |
| +	 * we can find its bucket simply by masking. */
 | |
| +	return &table->hashtable[(__force u32)index & (HASH_SIZE(table->hashtable) - 1)];
 | |
| +}
 | |
| +
 | |
| +void index_hashtable_init(struct index_hashtable *table)
 | |
| +{
 | |
| +	hash_init(table->hashtable);
 | |
| +	spin_lock_init(&table->lock);
 | |
| +}
 | |
| +
 | |
| +__le32 index_hashtable_insert(struct index_hashtable *table, struct index_hashtable_entry *entry)
 | |
| +{
 | |
| +	struct index_hashtable_entry *existing_entry;
 | |
| +
 | |
| +	spin_lock_bh(&table->lock);
 | |
| +	hlist_del_init_rcu(&entry->index_hash);
 | |
| +	spin_unlock_bh(&table->lock);
 | |
| +
 | |
| +	rcu_read_lock_bh();
 | |
| +
 | |
| +search_unused_slot:
 | |
| +	/* First we try to find an unused slot, randomly, while unlocked. */
 | |
| +	entry->index = (__force __le32)get_random_u32();
 | |
| +	hlist_for_each_entry_rcu_bh (existing_entry, index_bucket(table, entry->index), index_hash) {
 | |
| +		if (existing_entry->index == entry->index)
 | |
| +			goto search_unused_slot; /* If it's already in use, we continue searching. */
 | |
| +	}
 | |
| +
 | |
| +	/* Once we've found an unused slot, we lock it, and then double-check
 | |
| +	 * that nobody else stole it from us. */
 | |
| +	spin_lock_bh(&table->lock);
 | |
| +	hlist_for_each_entry_rcu_bh (existing_entry, index_bucket(table, entry->index), index_hash) {
 | |
| +		if (existing_entry->index == entry->index) {
 | |
| +			spin_unlock_bh(&table->lock);
 | |
| +			goto search_unused_slot; /* If it was stolen, we start over. */
 | |
| +		}
 | |
| +	}
 | |
| +	/* Otherwise, we know we have it exclusively (since we're locked), so we insert. */
 | |
| +	hlist_add_head_rcu(&entry->index_hash, index_bucket(table, entry->index));
 | |
| +	spin_unlock_bh(&table->lock);
 | |
| +
 | |
| +	rcu_read_unlock_bh();
 | |
| +
 | |
| +	return entry->index;
 | |
| +}
 | |
| +
 | |
| +bool index_hashtable_replace(struct index_hashtable *table, struct index_hashtable_entry *old, struct index_hashtable_entry *new)
 | |
| +{
 | |
| +	if (unlikely(hlist_unhashed(&old->index_hash)))
 | |
| +		return false;
 | |
| +	spin_lock_bh(&table->lock);
 | |
| +	new->index = old->index;
 | |
| +	hlist_replace_rcu(&old->index_hash, &new->index_hash);
 | |
| +	INIT_HLIST_NODE(&old->index_hash);
 | |
| +	spin_unlock_bh(&table->lock);
 | |
| +	return true;
 | |
| +}
 | |
| +
 | |
| +void index_hashtable_remove(struct index_hashtable *table, struct index_hashtable_entry *entry)
 | |
| +{
 | |
| +	spin_lock_bh(&table->lock);
 | |
| +	hlist_del_init_rcu(&entry->index_hash);
 | |
| +	spin_unlock_bh(&table->lock);
 | |
| +}
 | |
| +
 | |
| +/* Returns a strong reference to a entry->peer */
 | |
| +struct index_hashtable_entry *index_hashtable_lookup(struct index_hashtable *table, const enum index_hashtable_type type_mask, const __le32 index)
 | |
| +{
 | |
| +	struct index_hashtable_entry *iter_entry, *entry = NULL;
 | |
| +	rcu_read_lock_bh();
 | |
| +	hlist_for_each_entry_rcu_bh (iter_entry, index_bucket(table, index), index_hash) {
 | |
| +		if (iter_entry->index == index && (iter_entry->type & type_mask)) {
 | |
| +			entry = iter_entry;
 | |
| +			break;
 | |
| +		}
 | |
| +	}
 | |
| +	if (likely(entry)) {
 | |
| +		entry->peer = peer_get(entry->peer);
 | |
| +		if (unlikely(!entry->peer))
 | |
| +			entry = NULL;
 | |
| +	}
 | |
| +	rcu_read_unlock_bh();
 | |
| +	return entry;
 | |
| +}
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/main.c	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,68 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#include "version.h"
 | |
| +#include "device.h"
 | |
| +#include "noise.h"
 | |
| +#include "packets.h"
 | |
| +#include "ratelimiter.h"
 | |
| +#include "crypto/chacha20poly1305.h"
 | |
| +#include "crypto/blake2s.h"
 | |
| +#include "crypto/curve25519.h"
 | |
| +
 | |
| +#include <linux/version.h>
 | |
| +#include <linux/init.h>
 | |
| +#include <linux/module.h>
 | |
| +#include <net/rtnetlink.h>
 | |
| +
 | |
| +static int __init mod_init(void)
 | |
| +{
 | |
| +	int ret;
 | |
| +
 | |
| +	chacha20poly1305_fpu_init();
 | |
| +	blake2s_fpu_init();
 | |
| +	curve25519_fpu_init();
 | |
| +#ifdef DEBUG
 | |
| +	if (!routing_table_selftest() || !packet_counter_selftest() || !curve25519_selftest() || !chacha20poly1305_selftest() || !blake2s_selftest() || !ratelimiter_selftest())
 | |
| +		return -ENOTRECOVERABLE;
 | |
| +#endif
 | |
| +	noise_init();
 | |
| +
 | |
| +#ifdef CONFIG_WIREGUARD_PARALLEL
 | |
| +	ret = packet_init_data_caches();
 | |
| +	if (ret < 0)
 | |
| +		goto err_packet;
 | |
| +#endif
 | |
| +
 | |
| +	ret = device_init();
 | |
| +	if (ret < 0)
 | |
| +		goto err_device;
 | |
| +
 | |
| +	pr_info("WireGuard " WIREGUARD_VERSION " loaded. See www.wireguard.io for information.\n");
 | |
| +	pr_info("Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.\n");
 | |
| +
 | |
| +	return 0;
 | |
| +
 | |
| +err_device:
 | |
| +#ifdef CONFIG_WIREGUARD_PARALLEL
 | |
| +	packet_deinit_data_caches();
 | |
| +err_packet:
 | |
| +#endif
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +static void __exit mod_exit(void)
 | |
| +{
 | |
| +	device_uninit();
 | |
| +#ifdef CONFIG_WIREGUARD_PARALLEL
 | |
| +	packet_deinit_data_caches();
 | |
| +#endif
 | |
| +	pr_debug("WireGuard unloaded\n");
 | |
| +}
 | |
| +
 | |
| +module_init(mod_init);
 | |
| +module_exit(mod_exit);
 | |
| +MODULE_LICENSE("GPL v2");
 | |
| +MODULE_DESCRIPTION("Fast, secure, and modern VPN tunnel");
 | |
| +MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
 | |
| +MODULE_VERSION(WIREGUARD_VERSION);
 | |
| +MODULE_ALIAS_RTNL_LINK(KBUILD_MODNAME);
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/noise.c	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,612 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#include "noise.h"
 | |
| +#include "device.h"
 | |
| +#include "peer.h"
 | |
| +#include "messages.h"
 | |
| +#include "packets.h"
 | |
| +#include "hashtables.h"
 | |
| +
 | |
| +#include <linux/rcupdate.h>
 | |
| +#include <linux/slab.h>
 | |
| +#include <linux/bitmap.h>
 | |
| +#include <linux/scatterlist.h>
 | |
| +#include <linux/highmem.h>
 | |
| +#include <crypto/algapi.h>
 | |
| +
 | |
| +/* This implements Noise_IKpsk2:
 | |
| + *
 | |
| + * <- s
 | |
| + * ******
 | |
| + * -> e, es, s, ss, {t}
 | |
| + * <- e, ee, se, psk, {}
 | |
| + */
 | |
| +
 | |
| +static const u8 handshake_name[37] = "Noise_IKpsk2_25519_ChaChaPoly_BLAKE2s";
 | |
| +static const u8 identifier_name[34] = "WireGuard v1 zx2c4 Jason@zx2c4.com";
 | |
| +static u8 handshake_init_hash[NOISE_HASH_LEN] __read_mostly;
 | |
| +static u8 handshake_init_chaining_key[NOISE_HASH_LEN] __read_mostly;
 | |
| +static atomic64_t keypair_counter = ATOMIC64_INIT(0);
 | |
| +
 | |
| +void __init noise_init(void)
 | |
| +{
 | |
| +	struct blake2s_state blake;
 | |
| +	blake2s(handshake_init_chaining_key, handshake_name, NULL, NOISE_HASH_LEN, sizeof(handshake_name), 0);
 | |
| +	blake2s_init(&blake, NOISE_HASH_LEN);
 | |
| +	blake2s_update(&blake, handshake_init_chaining_key, NOISE_HASH_LEN);
 | |
| +	blake2s_update(&blake, identifier_name, sizeof(identifier_name));
 | |
| +	blake2s_final(&blake, handshake_init_hash, NOISE_HASH_LEN);
 | |
| +}
 | |
| +
 | |
| +bool noise_precompute_static_static(struct wireguard_peer *peer)
 | |
| +{
 | |
| +	if (peer->handshake.static_identity->has_identity)
 | |
| +		return curve25519(peer->handshake.precomputed_static_static, peer->handshake.static_identity->static_private, peer->handshake.remote_static);
 | |
| +	memset(peer->handshake.precomputed_static_static, 0, NOISE_PUBLIC_KEY_LEN);
 | |
| +	return true;
 | |
| +}
 | |
| +
 | |
| +bool noise_handshake_init(struct noise_handshake *handshake, struct noise_static_identity *static_identity, const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], struct wireguard_peer *peer)
 | |
| +{
 | |
| +	memset(handshake, 0, sizeof(struct noise_handshake));
 | |
| +	init_rwsem(&handshake->lock);
 | |
| +	handshake->entry.type = INDEX_HASHTABLE_HANDSHAKE;
 | |
| +	handshake->entry.peer = peer;
 | |
| +	memcpy(handshake->remote_static, peer_public_key, NOISE_PUBLIC_KEY_LEN);
 | |
| +	memcpy(handshake->preshared_key, peer_preshared_key, NOISE_SYMMETRIC_KEY_LEN);
 | |
| +	handshake->static_identity = static_identity;
 | |
| +	handshake->state = HANDSHAKE_ZEROED;
 | |
| +	return noise_precompute_static_static(peer);
 | |
| +}
 | |
| +
 | |
| +static void handshake_zero(struct noise_handshake *handshake)
 | |
| +{
 | |
| +	memset(&handshake->ephemeral_private, 0, NOISE_PUBLIC_KEY_LEN);
 | |
| +	memset(&handshake->remote_ephemeral, 0, NOISE_PUBLIC_KEY_LEN);
 | |
| +	memset(&handshake->hash, 0, NOISE_HASH_LEN);
 | |
| +	memset(&handshake->chaining_key, 0, NOISE_HASH_LEN);
 | |
| +	handshake->remote_index = 0;
 | |
| +	handshake->state = HANDSHAKE_ZEROED;
 | |
| +}
 | |
| +
 | |
| +void noise_handshake_clear(struct noise_handshake *handshake)
 | |
| +{
 | |
| +	index_hashtable_remove(&handshake->entry.peer->device->index_hashtable, &handshake->entry);
 | |
| +	down_write(&handshake->lock);
 | |
| +	handshake_zero(handshake);
 | |
| +	up_write(&handshake->lock);
 | |
| +	index_hashtable_remove(&handshake->entry.peer->device->index_hashtable, &handshake->entry);
 | |
| +}
 | |
| +
 | |
| +static struct noise_keypair *keypair_create(struct wireguard_peer *peer)
 | |
| +{
 | |
| +	struct noise_keypair *keypair = kzalloc(sizeof(struct noise_keypair), GFP_KERNEL);
 | |
| +	if (unlikely(!keypair))
 | |
| +		return NULL;
 | |
| +	keypair->internal_id = atomic64_inc_return(&keypair_counter);
 | |
| +	keypair->entry.type = INDEX_HASHTABLE_KEYPAIR;
 | |
| +	keypair->entry.peer = peer;
 | |
| +	kref_init(&keypair->refcount);
 | |
| +	return keypair;
 | |
| +}
 | |
| +
 | |
| +static void keypair_free_rcu(struct rcu_head *rcu)
 | |
| +{
 | |
| +	struct noise_keypair *keypair = container_of(rcu, struct noise_keypair, rcu);
 | |
| +	net_dbg_ratelimited("%s: Keypair %Lu destroyed for peer %Lu\n", netdev_pub(keypair->entry.peer->device)->name, keypair->internal_id, keypair->entry.peer->internal_id);
 | |
| +	kzfree(keypair);
 | |
| +}
 | |
| +
 | |
| +static void keypair_free_kref(struct kref *kref)
 | |
| +{
 | |
| +	struct noise_keypair *keypair = container_of(kref, struct noise_keypair, refcount);
 | |
| +	index_hashtable_remove(&keypair->entry.peer->device->index_hashtable, &keypair->entry);
 | |
| +	call_rcu_bh(&keypair->rcu, keypair_free_rcu);
 | |
| +}
 | |
| +
 | |
| +void noise_keypair_put(struct noise_keypair *keypair)
 | |
| +{
 | |
| +	if (unlikely(!keypair))
 | |
| +		return;
 | |
| +	kref_put(&keypair->refcount, keypair_free_kref);
 | |
| +}
 | |
| +
 | |
| +struct noise_keypair *noise_keypair_get(struct noise_keypair *keypair)
 | |
| +{
 | |
| +	RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(), "Calling noise_keypair_get without holding the RCU BH read lock");
 | |
| +	if (unlikely(!keypair || !kref_get_unless_zero(&keypair->refcount)))
 | |
| +		return NULL;
 | |
| +	return keypair;
 | |
| +}
 | |
| +
 | |
| +void noise_keypairs_clear(struct noise_keypairs *keypairs)
 | |
| +{
 | |
| +	struct noise_keypair *old;
 | |
| +	mutex_lock(&keypairs->keypair_update_lock);
 | |
| +	old = rcu_dereference_protected(keypairs->previous_keypair, lockdep_is_held(&keypairs->keypair_update_lock));
 | |
| +	rcu_assign_pointer(keypairs->previous_keypair, NULL);
 | |
| +	noise_keypair_put(old);
 | |
| +	old = rcu_dereference_protected(keypairs->next_keypair, lockdep_is_held(&keypairs->keypair_update_lock));
 | |
| +	rcu_assign_pointer(keypairs->next_keypair, NULL);
 | |
| +	noise_keypair_put(old);
 | |
| +	old = rcu_dereference_protected(keypairs->current_keypair, lockdep_is_held(&keypairs->keypair_update_lock));
 | |
| +	rcu_assign_pointer(keypairs->current_keypair, NULL);
 | |
| +	noise_keypair_put(old);
 | |
| +	mutex_unlock(&keypairs->keypair_update_lock);
 | |
| +}
 | |
| +
 | |
| +static void add_new_keypair(struct noise_keypairs *keypairs, struct noise_keypair *new_keypair)
 | |
| +{
 | |
| +	struct noise_keypair *previous_keypair, *next_keypair, *current_keypair;
 | |
| +
 | |
| +	mutex_lock(&keypairs->keypair_update_lock);
 | |
| +	previous_keypair = rcu_dereference_protected(keypairs->previous_keypair, lockdep_is_held(&keypairs->keypair_update_lock));
 | |
| +	next_keypair = rcu_dereference_protected(keypairs->next_keypair, lockdep_is_held(&keypairs->keypair_update_lock));
 | |
| +	current_keypair =  rcu_dereference_protected(keypairs->current_keypair, lockdep_is_held(&keypairs->keypair_update_lock));
 | |
| +	if (new_keypair->i_am_the_initiator) {
 | |
| +		/* If we're the initiator, it means we've sent a handshake, and received
 | |
| +		 * a confirmation response, which means this new keypair can now be used. */
 | |
| +		if (next_keypair) {
 | |
| +			/* If there already was a next keypair pending, we demote it to be
 | |
| +			 * the previous keypair, and free the existing current.
 | |
| +			 * TODO: note that this means KCI can result in this transition. It
 | |
| +			 * would perhaps be more sound to always just get rid of the unused
 | |
| +			 * next keypair instead of putting it in the previous slot, but this
 | |
| +			 * might be a bit less robust. Something to think about and decide on. */
 | |
| +			rcu_assign_pointer(keypairs->next_keypair, NULL);
 | |
| +			rcu_assign_pointer(keypairs->previous_keypair, next_keypair);
 | |
| +			noise_keypair_put(current_keypair);
 | |
| +		} else	/* If there wasn't an existing next keypair, we replace the
 | |
| +			 * previous with the current one. */
 | |
| +			rcu_assign_pointer(keypairs->previous_keypair, current_keypair);
 | |
| +		/* At this point we can get rid of the old previous keypair, and set up
 | |
| +		 * the new keypair. */
 | |
| +		noise_keypair_put(previous_keypair);
 | |
| +		rcu_assign_pointer(keypairs->current_keypair, new_keypair);
 | |
| +	} else {
 | |
| +		/* If we're the responder, it means we can't use the new keypair until
 | |
| +		 * we receive confirmation via the first data packet, so we get rid of
 | |
| +		 * the existing previous one, the possibly existing next one, and slide
 | |
| +		 * in the new next one. */
 | |
| +		rcu_assign_pointer(keypairs->next_keypair, new_keypair);
 | |
| +		noise_keypair_put(next_keypair);
 | |
| +		rcu_assign_pointer(keypairs->previous_keypair, NULL);
 | |
| +		noise_keypair_put(previous_keypair);
 | |
| +	}
 | |
| +	mutex_unlock(&keypairs->keypair_update_lock);
 | |
| +}
 | |
| +
 | |
| +bool noise_received_with_keypair(struct noise_keypairs *keypairs, struct noise_keypair *received_keypair)
 | |
| +{
 | |
| +	bool ret = false;
 | |
| +	struct noise_keypair *old_keypair;
 | |
| +
 | |
| +	/* TODO: probably this needs the actual mutex, but we're in atomic context,
 | |
| +	 * so we can't take it here. Instead we just rely on RCU for the lookups. */
 | |
| +	rcu_read_lock_bh();
 | |
| +	if (unlikely(received_keypair == rcu_dereference_bh(keypairs->next_keypair))) {
 | |
| +		ret = true;
 | |
| +		/* When we've finally received the confirmation, we slide the next
 | |
| +		 * into the current, the current into the previous, and get rid of
 | |
| +		 * the old previous. */
 | |
| +		old_keypair = rcu_dereference_bh(keypairs->previous_keypair);
 | |
| +		rcu_assign_pointer(keypairs->previous_keypair, rcu_dereference_bh(keypairs->current_keypair));
 | |
| +		noise_keypair_put(old_keypair);
 | |
| +		rcu_assign_pointer(keypairs->current_keypair, received_keypair);
 | |
| +		rcu_assign_pointer(keypairs->next_keypair, NULL);
 | |
| +	}
 | |
| +	rcu_read_unlock_bh();
 | |
| +
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +void noise_set_static_identity_private_key(struct noise_static_identity *static_identity, const u8 private_key[NOISE_PUBLIC_KEY_LEN])
 | |
| +{
 | |
| +	down_write(&static_identity->lock);
 | |
| +	if (private_key) {
 | |
| +		memcpy(static_identity->static_private, private_key, NOISE_PUBLIC_KEY_LEN);
 | |
| +		static_identity->has_identity = curve25519_generate_public(static_identity->static_public, private_key);
 | |
| +	} else {
 | |
| +		memset(static_identity->static_private, 0, NOISE_PUBLIC_KEY_LEN);
 | |
| +		memset(static_identity->static_public, 0, NOISE_PUBLIC_KEY_LEN);
 | |
| +		static_identity->has_identity = false;
 | |
| +	}
 | |
| +	up_write(&static_identity->lock);
 | |
| +}
 | |
| +
 | |
| +/* This is Hugo Krawczyk's HKDF:
 | |
| + *  - https://eprint.iacr.org/2010/264.pdf
 | |
| + *  - https://tools.ietf.org/html/rfc5869
 | |
| + */
 | |
| +static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data, size_t first_len, size_t second_len, size_t third_len, size_t data_len, const u8 chaining_key[NOISE_HASH_LEN])
 | |
| +{
 | |
| +	u8 secret[BLAKE2S_OUTBYTES];
 | |
| +	u8 output[BLAKE2S_OUTBYTES + 1];
 | |
| +	BUG_ON(first_len > BLAKE2S_OUTBYTES || second_len > BLAKE2S_OUTBYTES || third_len > BLAKE2S_OUTBYTES || ((second_len || second_dst || third_len || third_dst) && (!first_len || !first_dst)) || ((third_len || third_dst) && (!second_len || !second_dst)));
 | |
| +
 | |
| +	/* Extract entropy from data into secret */
 | |
| +	blake2s_hmac(secret, data, chaining_key, BLAKE2S_OUTBYTES, data_len, NOISE_HASH_LEN);
 | |
| +
 | |
| +	if (!first_dst || !first_len)
 | |
| +		goto out;
 | |
| +
 | |
| +	/* Expand first key: key = secret, data = 0x1 */
 | |
| +	output[0] = 1;
 | |
| +	blake2s_hmac(output, output, secret, BLAKE2S_OUTBYTES, 1, BLAKE2S_OUTBYTES);
 | |
| +	memcpy(first_dst, output, first_len);
 | |
| +
 | |
| +	if (!second_dst || !second_len)
 | |
| +		goto out;
 | |
| +
 | |
| +	/* Expand second key: key = secret, data = first-key || 0x2 */
 | |
| +	output[BLAKE2S_OUTBYTES] = 2;
 | |
| +	blake2s_hmac(output, output, secret, BLAKE2S_OUTBYTES, BLAKE2S_OUTBYTES + 1, BLAKE2S_OUTBYTES);
 | |
| +	memcpy(second_dst, output, second_len);
 | |
| +
 | |
| +	if (!third_dst || !third_len)
 | |
| +		goto out;
 | |
| +
 | |
| +	/* Expand third key: key = secret, data = second-key || 0x3 */
 | |
| +	output[BLAKE2S_OUTBYTES] = 3;
 | |
| +	blake2s_hmac(output, output, secret, BLAKE2S_OUTBYTES, BLAKE2S_OUTBYTES + 1, BLAKE2S_OUTBYTES);
 | |
| +	memcpy(third_dst, output, third_len);
 | |
| +
 | |
| +out:
 | |
| +	/* Clear sensitive data from stack */
 | |
| +	memzero_explicit(secret, BLAKE2S_OUTBYTES);
 | |
| +	memzero_explicit(output, BLAKE2S_OUTBYTES + 1);
 | |
| +}
 | |
| +
 | |
| +static void symmetric_key_init(struct noise_symmetric_key *key)
 | |
| +{
 | |
| +	spin_lock_init(&key->counter.receive.lock);
 | |
| +	atomic64_set(&key->counter.counter, 0);
 | |
| +	memset(key->counter.receive.backtrack, 0, sizeof(key->counter.receive.backtrack));
 | |
| +	key->birthdate = get_jiffies_64();
 | |
| +	key->is_valid = true;
 | |
| +}
 | |
| +
 | |
| +static void derive_keys(struct noise_symmetric_key *first_dst, struct noise_symmetric_key *second_dst, const u8 chaining_key[NOISE_HASH_LEN])
 | |
| +{
 | |
| +	kdf(first_dst->key, second_dst->key, NULL, NULL, NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, 0, chaining_key);
 | |
| +	symmetric_key_init(first_dst);
 | |
| +	symmetric_key_init(second_dst);
 | |
| +}
 | |
| +
 | |
| +static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN], u8 key[NOISE_SYMMETRIC_KEY_LEN], const u8 private[NOISE_PUBLIC_KEY_LEN], const u8 public[NOISE_PUBLIC_KEY_LEN])
 | |
| +{
 | |
| +	u8 dh_calculation[NOISE_PUBLIC_KEY_LEN];
 | |
| +	if (unlikely(!curve25519(dh_calculation, private, public)))
 | |
| +		return false;
 | |
| +	kdf(chaining_key, key, NULL, dh_calculation, NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, chaining_key);
 | |
| +	memzero_explicit(dh_calculation, NOISE_PUBLIC_KEY_LEN);
 | |
| +	return true;
 | |
| +}
 | |
| +
 | |
| +static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len)
 | |
| +{
 | |
| +	struct blake2s_state blake;
 | |
| +	blake2s_init(&blake, NOISE_HASH_LEN);
 | |
| +	blake2s_update(&blake, hash, NOISE_HASH_LEN);
 | |
| +	blake2s_update(&blake, src, src_len);
 | |
| +	blake2s_final(&blake, hash, NOISE_HASH_LEN);
 | |
| +}
 | |
| +
 | |
| +static void mix_psk(u8 chaining_key[NOISE_HASH_LEN], u8 hash[NOISE_HASH_LEN], u8 key[NOISE_SYMMETRIC_KEY_LEN], const u8 psk[NOISE_SYMMETRIC_KEY_LEN])
 | |
| +{
 | |
| +	u8 temp_hash[NOISE_HASH_LEN];
 | |
| +	kdf(chaining_key, temp_hash, key, psk, NOISE_HASH_LEN, NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, chaining_key);
 | |
| +	mix_hash(hash, temp_hash, NOISE_HASH_LEN);
 | |
| +	memzero_explicit(temp_hash, NOISE_HASH_LEN);
 | |
| +}
 | |
| +
 | |
| +static void handshake_init(u8 chaining_key[NOISE_HASH_LEN], u8 hash[NOISE_HASH_LEN], const u8 remote_static[NOISE_PUBLIC_KEY_LEN])
 | |
| +{
 | |
| +	memcpy(hash, handshake_init_hash, NOISE_HASH_LEN);
 | |
| +	memcpy(chaining_key, handshake_init_chaining_key, NOISE_HASH_LEN);
 | |
| +	mix_hash(hash, remote_static, NOISE_PUBLIC_KEY_LEN);
 | |
| +}
 | |
| +
 | |
| +static void message_encrypt(u8 *dst_ciphertext, const u8 *src_plaintext, size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN], u8 hash[NOISE_HASH_LEN])
 | |
| +{
 | |
| +	chacha20poly1305_encrypt(dst_ciphertext, src_plaintext, src_len, hash, NOISE_HASH_LEN, 0 /* Always zero for Noise_IK */, key);
 | |
| +	mix_hash(hash, dst_ciphertext, noise_encrypted_len(src_len));
 | |
| +}
 | |
| +
 | |
| +static bool message_decrypt(u8 *dst_plaintext, const u8 *src_ciphertext, size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN], u8 hash[NOISE_HASH_LEN])
 | |
| +{
 | |
| +	if (!chacha20poly1305_decrypt(dst_plaintext, src_ciphertext, src_len, hash, NOISE_HASH_LEN, 0 /* Always zero for Noise_IK */, key))
 | |
| +		return false;
 | |
| +	mix_hash(hash, src_ciphertext, src_len);
 | |
| +	return true;
 | |
| +}
 | |
| +
 | |
| +static void message_ephemeral(u8 ephemeral_dst[NOISE_PUBLIC_KEY_LEN], const u8 ephemeral_src[NOISE_PUBLIC_KEY_LEN], u8 chaining_key[NOISE_HASH_LEN], u8 hash[NOISE_HASH_LEN])
 | |
| +{
 | |
| +	if (ephemeral_dst != ephemeral_src)
 | |
| +		memcpy(ephemeral_dst, ephemeral_src, NOISE_PUBLIC_KEY_LEN);
 | |
| +	mix_hash(hash, ephemeral_src, NOISE_PUBLIC_KEY_LEN);
 | |
| +	kdf(chaining_key, NULL, NULL, ephemeral_src, NOISE_HASH_LEN, 0, 0, NOISE_PUBLIC_KEY_LEN, chaining_key);
 | |
| +}
 | |
| +
 | |
| +static void tai64n_now(u8 output[NOISE_TIMESTAMP_LEN])
 | |
| +{
 | |
| +	struct timeval now;
 | |
| +	do_gettimeofday(&now);
 | |
| +	/* https://cr.yp.to/libtai/tai64.html */
 | |
| +	*(__be64 *)output = cpu_to_be64(4611686018427387914ULL + now.tv_sec);
 | |
| +	*(__be32 *)(output + sizeof(__be64)) = cpu_to_be32(1000 * now.tv_usec + 500);
 | |
| +}
 | |
| +
 | |
| +bool noise_handshake_create_initiation(struct message_handshake_initiation *dst, struct noise_handshake *handshake)
 | |
| +{
 | |
| +	u8 timestamp[NOISE_TIMESTAMP_LEN];
 | |
| +	u8 key[NOISE_SYMMETRIC_KEY_LEN];
 | |
| +	bool ret = false;
 | |
| +
 | |
| +	down_read(&handshake->static_identity->lock);
 | |
| +	down_write(&handshake->lock);
 | |
| +
 | |
| +	if (unlikely(!handshake->static_identity->has_identity))
 | |
| +		goto out;
 | |
| +
 | |
| +	dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION);
 | |
| +
 | |
| +	handshake_init(handshake->chaining_key, handshake->hash, handshake->remote_static);
 | |
| +
 | |
| +	/* e */
 | |
| +	curve25519_generate_secret(handshake->ephemeral_private);
 | |
| +	if (!curve25519_generate_public(dst->unencrypted_ephemeral, handshake->ephemeral_private))
 | |
| +		goto out;
 | |
| +	message_ephemeral(dst->unencrypted_ephemeral, dst->unencrypted_ephemeral, handshake->chaining_key, handshake->hash);
 | |
| +
 | |
| +	/* es */
 | |
| +	if (!mix_dh(handshake->chaining_key, key, handshake->ephemeral_private, handshake->remote_static))
 | |
| +		goto out;
 | |
| +
 | |
| +	/* s */
 | |
| +	message_encrypt(dst->encrypted_static, handshake->static_identity->static_public, NOISE_PUBLIC_KEY_LEN, key, handshake->hash);
 | |
| +
 | |
| +	/* ss */
 | |
| +	kdf(handshake->chaining_key, key, NULL, handshake->precomputed_static_static, NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, handshake->chaining_key);
 | |
| +
 | |
| +	/* {t} */
 | |
| +	tai64n_now(timestamp);
 | |
| +	message_encrypt(dst->encrypted_timestamp, timestamp, NOISE_TIMESTAMP_LEN, key, handshake->hash);
 | |
| +
 | |
| +	dst->sender_index = index_hashtable_insert(&handshake->entry.peer->device->index_hashtable, &handshake->entry);
 | |
| +
 | |
| +	handshake->state = HANDSHAKE_CREATED_INITIATION;
 | |
| +	ret = true;
 | |
| +
 | |
| +out:
 | |
| +	up_write(&handshake->lock);
 | |
| +	up_read(&handshake->static_identity->lock);
 | |
| +	memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +struct wireguard_peer *noise_handshake_consume_initiation(struct message_handshake_initiation *src, struct wireguard_device *wg)
 | |
| +{
 | |
| +	bool replay_attack, flood_attack;
 | |
| +	u8 s[NOISE_PUBLIC_KEY_LEN];
 | |
| +	u8 e[NOISE_PUBLIC_KEY_LEN];
 | |
| +	u8 t[NOISE_TIMESTAMP_LEN];
 | |
| +	struct noise_handshake *handshake;
 | |
| +	struct wireguard_peer *wg_peer = NULL;
 | |
| +	u8 key[NOISE_SYMMETRIC_KEY_LEN];
 | |
| +	u8 hash[NOISE_HASH_LEN];
 | |
| +	u8 chaining_key[NOISE_HASH_LEN];
 | |
| +
 | |
| +	down_read(&wg->static_identity.lock);
 | |
| +	if (unlikely(!wg->static_identity.has_identity))
 | |
| +		goto out;
 | |
| +
 | |
| +	handshake_init(chaining_key, hash, wg->static_identity.static_public);
 | |
| +
 | |
| +	/* e */
 | |
| +	message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash);
 | |
| +
 | |
| +	/* es */
 | |
| +	if (!mix_dh(chaining_key, key, wg->static_identity.static_private, e))
 | |
| +		goto out;
 | |
| +
 | |
| +	/* s */
 | |
| +	if (!message_decrypt(s, src->encrypted_static, sizeof(src->encrypted_static), key, hash))
 | |
| +		goto out;
 | |
| +
 | |
| +	/* Lookup which peer we're actually talking to */
 | |
| +	wg_peer = pubkey_hashtable_lookup(&wg->peer_hashtable, s);
 | |
| +	if (!wg_peer)
 | |
| +		goto out;
 | |
| +	handshake = &wg_peer->handshake;
 | |
| +
 | |
| +	/* ss */
 | |
| +	kdf(chaining_key, key, NULL, handshake->precomputed_static_static, NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, chaining_key);
 | |
| +
 | |
| +	/* {t} */
 | |
| +	if (!message_decrypt(t, src->encrypted_timestamp, sizeof(src->encrypted_timestamp), key, hash))
 | |
| +		goto out;
 | |
| +
 | |
| +	down_read(&handshake->lock);
 | |
| +	replay_attack = memcmp(t, handshake->latest_timestamp, NOISE_TIMESTAMP_LEN) <= 0;
 | |
| +	flood_attack = !time_is_before_jiffies64(handshake->last_initiation_consumption + INITIATIONS_PER_SECOND);
 | |
| +	up_read(&handshake->lock);
 | |
| +	if (replay_attack || flood_attack) {
 | |
| +		peer_put(wg_peer);
 | |
| +		wg_peer = NULL;
 | |
| +		goto out;
 | |
| +	}
 | |
| +
 | |
| +	/* Success! Copy everything to peer */
 | |
| +	down_write(&handshake->lock);
 | |
| +	memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN);
 | |
| +	memcpy(handshake->latest_timestamp, t, NOISE_TIMESTAMP_LEN);
 | |
| +	memcpy(handshake->hash, hash, NOISE_HASH_LEN);
 | |
| +	memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN);
 | |
| +	handshake->remote_index = src->sender_index;
 | |
| +	handshake->last_initiation_consumption = get_jiffies_64();
 | |
| +	handshake->state = HANDSHAKE_CONSUMED_INITIATION;
 | |
| +	up_write(&handshake->lock);
 | |
| +
 | |
| +out:
 | |
| +	memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
 | |
| +	memzero_explicit(hash, NOISE_HASH_LEN);
 | |
| +	memzero_explicit(chaining_key, NOISE_HASH_LEN);
 | |
| +	up_read(&wg->static_identity.lock);
 | |
| +	return wg_peer;
 | |
| +}
 | |
| +
 | |
| +bool noise_handshake_create_response(struct message_handshake_response *dst, struct noise_handshake *handshake)
 | |
| +{
 | |
| +	bool ret = false;
 | |
| +	u8 key[NOISE_SYMMETRIC_KEY_LEN];
 | |
| +	down_read(&handshake->static_identity->lock);
 | |
| +	down_write(&handshake->lock);
 | |
| +
 | |
| +	if (handshake->state != HANDSHAKE_CONSUMED_INITIATION)
 | |
| +		goto out;
 | |
| +
 | |
| +	dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE);
 | |
| +	dst->receiver_index = handshake->remote_index;
 | |
| +
 | |
| +	/* e */
 | |
| +	curve25519_generate_secret(handshake->ephemeral_private);
 | |
| +	if (!curve25519_generate_public(dst->unencrypted_ephemeral, handshake->ephemeral_private))
 | |
| +		goto out;
 | |
| +	message_ephemeral(dst->unencrypted_ephemeral, dst->unencrypted_ephemeral, handshake->chaining_key, handshake->hash);
 | |
| +
 | |
| +	/* ee */
 | |
| +	if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private, handshake->remote_ephemeral))
 | |
| +		goto out;
 | |
| +
 | |
| +	/* se */
 | |
| +	if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private, handshake->remote_static))
 | |
| +		goto out;
 | |
| +
 | |
| +	/* psk */
 | |
| +	mix_psk(handshake->chaining_key, handshake->hash, key, handshake->preshared_key);
 | |
| +
 | |
| +	/* {} */
 | |
| +	message_encrypt(dst->encrypted_nothing, NULL, 0, key, handshake->hash);
 | |
| +
 | |
| +	dst->sender_index = index_hashtable_insert(&handshake->entry.peer->device->index_hashtable, &handshake->entry);
 | |
| +
 | |
| +	handshake->state = HANDSHAKE_CREATED_RESPONSE;
 | |
| +	ret = true;
 | |
| +
 | |
| +out:
 | |
| +	up_write(&handshake->lock);
 | |
| +	up_read(&handshake->static_identity->lock);
 | |
| +	memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +struct wireguard_peer *noise_handshake_consume_response(struct message_handshake_response *src, struct wireguard_device *wg)
 | |
| +{
 | |
| +	struct noise_handshake *handshake;
 | |
| +	struct wireguard_peer *ret_peer = NULL;
 | |
| +	u8 key[NOISE_SYMMETRIC_KEY_LEN];
 | |
| +	u8 hash[NOISE_HASH_LEN];
 | |
| +	u8 chaining_key[NOISE_HASH_LEN];
 | |
| +	u8 e[NOISE_PUBLIC_KEY_LEN];
 | |
| +	u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN];
 | |
| +	u8 static_private[NOISE_PUBLIC_KEY_LEN];
 | |
| +	enum noise_handshake_state state = HANDSHAKE_ZEROED;
 | |
| +
 | |
| +	down_read(&wg->static_identity.lock);
 | |
| +
 | |
| +	if (unlikely(!wg->static_identity.has_identity))
 | |
| +		goto out;
 | |
| +
 | |
| +	handshake = (struct noise_handshake *)index_hashtable_lookup(&wg->index_hashtable, INDEX_HASHTABLE_HANDSHAKE, src->receiver_index);
 | |
| +	if (unlikely(!handshake))
 | |
| +		goto out;
 | |
| +
 | |
| +	down_read(&handshake->lock);
 | |
| +	state = handshake->state;
 | |
| +	memcpy(hash, handshake->hash, NOISE_HASH_LEN);
 | |
| +	memcpy(chaining_key, handshake->chaining_key, NOISE_HASH_LEN);
 | |
| +	memcpy(ephemeral_private, handshake->ephemeral_private, NOISE_PUBLIC_KEY_LEN);
 | |
| +	up_read(&handshake->lock);
 | |
| +
 | |
| +	if (state != HANDSHAKE_CREATED_INITIATION)
 | |
| +		goto fail;
 | |
| +
 | |
| +	/* e */
 | |
| +	message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash);
 | |
| +
 | |
| +	/* ee */
 | |
| +	if (!mix_dh(chaining_key, NULL, ephemeral_private, e))
 | |
| +		goto out;
 | |
| +
 | |
| +	/* se */
 | |
| +	if (!mix_dh(chaining_key, NULL, wg->static_identity.static_private, e))
 | |
| +		goto out;
 | |
| +
 | |
| +	/* psk */
 | |
| +	mix_psk(chaining_key, hash, key, handshake->preshared_key);
 | |
| +
 | |
| +	/* {} */
 | |
| +	if (!message_decrypt(NULL, src->encrypted_nothing, sizeof(src->encrypted_nothing), key, hash))
 | |
| +		goto fail;
 | |
| +
 | |
| +	/* Success! Copy everything to peer */
 | |
| +	down_write(&handshake->lock);
 | |
| +	/* It's important to check that the state is still the same, while we have an exclusive lock */
 | |
| +	if (handshake->state != state) {
 | |
| +		up_write(&handshake->lock);
 | |
| +		goto fail;
 | |
| +	}
 | |
| +	memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN);
 | |
| +	memcpy(handshake->hash, hash, NOISE_HASH_LEN);
 | |
| +	memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN);
 | |
| +	handshake->remote_index = src->sender_index;
 | |
| +	handshake->state = HANDSHAKE_CONSUMED_RESPONSE;
 | |
| +	up_write(&handshake->lock);
 | |
| +	ret_peer = handshake->entry.peer;
 | |
| +	goto out;
 | |
| +
 | |
| +fail:
 | |
| +	peer_put(handshake->entry.peer);
 | |
| +out:
 | |
| +	memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
 | |
| +	memzero_explicit(hash, NOISE_HASH_LEN);
 | |
| +	memzero_explicit(chaining_key, NOISE_HASH_LEN);
 | |
| +	memzero_explicit(ephemeral_private, NOISE_PUBLIC_KEY_LEN);
 | |
| +	memzero_explicit(static_private, NOISE_PUBLIC_KEY_LEN);
 | |
| +	up_read(&wg->static_identity.lock);
 | |
| +	return ret_peer;
 | |
| +}
 | |
| +
 | |
| +bool noise_handshake_begin_session(struct noise_handshake *handshake, struct noise_keypairs *keypairs, bool i_am_the_initiator)
 | |
| +{
 | |
| +	struct noise_keypair *new_keypair;
 | |
| +
 | |
| +	down_write(&handshake->lock);
 | |
| +	if (handshake->state != HANDSHAKE_CREATED_RESPONSE && handshake->state != HANDSHAKE_CONSUMED_RESPONSE)
 | |
| +		goto fail;
 | |
| +
 | |
| +	new_keypair = keypair_create(handshake->entry.peer);
 | |
| +	if (!new_keypair)
 | |
| +		goto fail;
 | |
| +	new_keypair->i_am_the_initiator = i_am_the_initiator;
 | |
| +	new_keypair->remote_index = handshake->remote_index;
 | |
| +
 | |
| +	if (i_am_the_initiator)
 | |
| +		derive_keys(&new_keypair->sending, &new_keypair->receiving, handshake->chaining_key);
 | |
| +	else
 | |
| +		derive_keys(&new_keypair->receiving, &new_keypair->sending, handshake->chaining_key);
 | |
| +
 | |
| +	handshake_zero(handshake);
 | |
| +	add_new_keypair(keypairs, new_keypair);
 | |
| +	net_dbg_ratelimited("%s: Keypair %Lu created for peer %Lu\n", netdev_pub(new_keypair->entry.peer->device)->name, new_keypair->internal_id, new_keypair->entry.peer->internal_id);
 | |
| +	WARN_ON(!index_hashtable_replace(&handshake->entry.peer->device->index_hashtable, &handshake->entry, &new_keypair->entry));
 | |
| +	up_write(&handshake->lock);
 | |
| +
 | |
| +	return true;
 | |
| +
 | |
| +fail:
 | |
| +	up_write(&handshake->lock);
 | |
| +	return false;
 | |
| +}
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/peer.c	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,130 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#include "peer.h"
 | |
| +#include "device.h"
 | |
| +#include "packets.h"
 | |
| +#include "timers.h"
 | |
| +#include "hashtables.h"
 | |
| +#include "noise.h"
 | |
| +
 | |
| +#include <linux/kref.h>
 | |
| +#include <linux/lockdep.h>
 | |
| +#include <linux/rcupdate.h>
 | |
| +#include <linux/list.h>
 | |
| +
 | |
| +static atomic64_t peer_counter = ATOMIC64_INIT(0);
 | |
| +
 | |
| +struct wireguard_peer *peer_create(struct wireguard_device *wg, const u8 public_key[NOISE_PUBLIC_KEY_LEN], const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN])
 | |
| +{
 | |
| +	struct wireguard_peer *peer;
 | |
| +	lockdep_assert_held(&wg->device_update_lock);
 | |
| +
 | |
| +	if (peer_total_count(wg) >= MAX_PEERS_PER_DEVICE)
 | |
| +		return NULL;
 | |
| +
 | |
| +	peer = kzalloc(sizeof(struct wireguard_peer), GFP_KERNEL);
 | |
| +	if (!peer)
 | |
| +		return NULL;
 | |
| +
 | |
| +	if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)) {
 | |
| +		kfree(peer);
 | |
| +		return NULL;
 | |
| +	}
 | |
| +
 | |
| +	peer->internal_id = atomic64_inc_return(&peer_counter);
 | |
| +	peer->device = wg;
 | |
| +	cookie_init(&peer->latest_cookie);
 | |
| +	if (!noise_handshake_init(&peer->handshake, &wg->static_identity, public_key, preshared_key, peer)) {
 | |
| +		kfree(peer);
 | |
| +		return NULL;
 | |
| +	}
 | |
| +	cookie_checker_precompute_peer_keys(peer);
 | |
| +	mutex_init(&peer->keypairs.keypair_update_lock);
 | |
| +	INIT_WORK(&peer->transmit_handshake_work, packet_send_queued_handshakes);
 | |
| +	rwlock_init(&peer->endpoint_lock);
 | |
| +	skb_queue_head_init(&peer->tx_packet_queue);
 | |
| +	kref_init(&peer->refcount);
 | |
| +	pubkey_hashtable_add(&wg->peer_hashtable, peer);
 | |
| +	list_add_tail(&peer->peer_list, &wg->peer_list);
 | |
| +#ifdef CONFIG_WIREGUARD_PARALLEL
 | |
| +	atomic_set(&peer->parallel_encryption_inflight, 0);
 | |
| +#endif
 | |
| +	pr_debug("%s: Peer %Lu created\n", netdev_pub(wg)->name, peer->internal_id);
 | |
| +	return peer;
 | |
| +}
 | |
| +
 | |
| +struct wireguard_peer *peer_get(struct wireguard_peer *peer)
 | |
| +{
 | |
| +	RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(), "Calling peer_get without holding the RCU read lock");
 | |
| +	if (unlikely(!peer || !kref_get_unless_zero(&peer->refcount)))
 | |
| +		return NULL;
 | |
| +	return peer;
 | |
| +}
 | |
| +
 | |
| +struct wireguard_peer *peer_rcu_get(struct wireguard_peer *peer)
 | |
| +{
 | |
| +	rcu_read_lock_bh();
 | |
| +	peer = peer_get(peer);
 | |
| +	rcu_read_unlock_bh();
 | |
| +	return peer;
 | |
| +}
 | |
| +
 | |
| +/* We have a separate "remove" function to get rid of the final reference because
 | |
| + * peer_list, clearing handshakes, and flushing all require mutexes which requires
 | |
| + * sleeping, which must only be done from certain contexts. */
 | |
| +void peer_remove(struct wireguard_peer *peer)
 | |
| +{
 | |
| +	if (unlikely(!peer))
 | |
| +		return;
 | |
| +	lockdep_assert_held(&peer->device->device_update_lock);
 | |
| +	noise_handshake_clear(&peer->handshake);
 | |
| +	noise_keypairs_clear(&peer->keypairs);
 | |
| +	list_del(&peer->peer_list);
 | |
| +	timers_uninit_peer(peer);
 | |
| +	routing_table_remove_by_peer(&peer->device->peer_routing_table, peer);
 | |
| +	pubkey_hashtable_remove(&peer->device->peer_hashtable, peer);
 | |
| +	if (peer->device->peer_wq)
 | |
| +		flush_workqueue(peer->device->peer_wq);
 | |
| +	skb_queue_purge(&peer->tx_packet_queue);
 | |
| +	peer_put(peer);
 | |
| +}
 | |
| +
 | |
| +static void rcu_release(struct rcu_head *rcu)
 | |
| +{
 | |
| +	struct wireguard_peer *peer = container_of(rcu, struct wireguard_peer, rcu);
 | |
| +	pr_debug("%s: Peer %Lu (%pISpfsc) destroyed\n", netdev_pub(peer->device)->name, peer->internal_id, &peer->endpoint.addr);
 | |
| +	skb_queue_purge(&peer->tx_packet_queue);
 | |
| +	dst_cache_destroy(&peer->endpoint_cache);
 | |
| +	kzfree(peer);
 | |
| +}
 | |
| +
 | |
| +static void kref_release(struct kref *refcount)
 | |
| +{
 | |
| +	struct wireguard_peer *peer = container_of(refcount, struct wireguard_peer, refcount);
 | |
| +	call_rcu_bh(&peer->rcu, rcu_release);
 | |
| +}
 | |
| +
 | |
| +void peer_put(struct wireguard_peer *peer)
 | |
| +{
 | |
| +	if (unlikely(!peer))
 | |
| +		return;
 | |
| +	kref_put(&peer->refcount, kref_release);
 | |
| +}
 | |
| +
 | |
| +void peer_remove_all(struct wireguard_device *wg)
 | |
| +{
 | |
| +	struct wireguard_peer *peer, *temp;
 | |
| +	lockdep_assert_held(&wg->device_update_lock);
 | |
| +	list_for_each_entry_safe (peer, temp, &wg->peer_list, peer_list)
 | |
| +		peer_remove(peer);
 | |
| +}
 | |
| +
 | |
| +unsigned int peer_total_count(struct wireguard_device *wg)
 | |
| +{
 | |
| +	unsigned int i = 0;
 | |
| +	struct wireguard_peer *peer;
 | |
| +	lockdep_assert_held(&wg->device_update_lock);
 | |
| +	list_for_each_entry (peer, &wg->peer_list, peer_list)
 | |
| +		++i;
 | |
| +	return i;
 | |
| +}
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/ratelimiter.c	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,194 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#include "ratelimiter.h"
 | |
| +#include <linux/siphash.h>
 | |
| +#include <linux/mm.h>
 | |
| +#include <linux/slab.h>
 | |
| +#include <net/ip.h>
 | |
| +
 | |
| +static struct kmem_cache *entry_cache;
 | |
| +static hsiphash_key_t key;
 | |
| +static spinlock_t table_lock = __SPIN_LOCK_UNLOCKED("ratelimiter_table_lock");
 | |
| +static atomic64_t refcnt = ATOMIC64_INIT(0);
 | |
| +static atomic_t total_entries = ATOMIC_INIT(0);
 | |
| +static unsigned int max_entries, table_size;
 | |
| +static void gc_entries(struct work_struct *);
 | |
| +static DECLARE_DEFERRABLE_WORK(gc_work, gc_entries);
 | |
| +static struct hlist_head *table_v4;
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +static struct hlist_head *table_v6;
 | |
| +#endif
 | |
| +
 | |
| +struct entry {
 | |
| +	u64 last_time_ns, tokens;
 | |
| +	__be64 ip;
 | |
| +	void *net;
 | |
| +	spinlock_t lock;
 | |
| +	struct hlist_node hash;
 | |
| +	struct rcu_head rcu;
 | |
| +};
 | |
| +
 | |
| +enum {
 | |
| +	PACKETS_PER_SECOND = 20,
 | |
| +	PACKETS_BURSTABLE = 5,
 | |
| +	PACKET_COST = NSEC_PER_SEC / PACKETS_PER_SECOND,
 | |
| +	TOKEN_MAX = PACKET_COST * PACKETS_BURSTABLE
 | |
| +};
 | |
| +
 | |
| +static void entry_free(struct rcu_head *rcu)
 | |
| +{
 | |
| +	kmem_cache_free(entry_cache, container_of(rcu, struct entry, rcu));
 | |
| +	atomic_dec(&total_entries);
 | |
| +}
 | |
| +
 | |
| +static void entry_uninit(struct entry *entry)
 | |
| +{
 | |
| +	hlist_del_rcu(&entry->hash);
 | |
| +	call_rcu_bh(&entry->rcu, entry_free);
 | |
| +}
 | |
| +
 | |
| +/* Calling this function with a NULL work uninits all entries. */
 | |
| +static void gc_entries(struct work_struct *work)
 | |
| +{
 | |
| +	unsigned int i;
 | |
| +	struct entry *entry;
 | |
| +	struct hlist_node *temp;
 | |
| +	const u64 now = ktime_get_ns();
 | |
| +
 | |
| +	for (i = 0; i < table_size; ++i) {
 | |
| +		spin_lock(&table_lock);
 | |
| +		hlist_for_each_entry_safe (entry, temp, &table_v4[i], hash) {
 | |
| +			if (unlikely(!work) || now - entry->last_time_ns > NSEC_PER_SEC)
 | |
| +				entry_uninit(entry);
 | |
| +		}
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +		hlist_for_each_entry_safe (entry, temp, &table_v6[i], hash) {
 | |
| +			if (unlikely(!work) || now - entry->last_time_ns > NSEC_PER_SEC)
 | |
| +				entry_uninit(entry);
 | |
| +		}
 | |
| +#endif
 | |
| +		spin_unlock(&table_lock);
 | |
| +		if (likely(work))
 | |
| +			cond_resched();
 | |
| +	}
 | |
| +	if (likely(work))
 | |
| +		queue_delayed_work(system_power_efficient_wq, &gc_work, HZ);
 | |
| +}
 | |
| +
 | |
| +bool ratelimiter_allow(struct sk_buff *skb, struct net *net)
 | |
| +{
 | |
| +	struct entry *entry;
 | |
| +	struct hlist_head *bucket;
 | |
| +	struct { __be64 ip; u32 net; } data = { .net = (unsigned long)net & 0xffffffff };
 | |
| +
 | |
| +	if (skb->protocol == htons(ETH_P_IP)) {
 | |
| +		data.ip = (__force __be64)ip_hdr(skb)->saddr;
 | |
| +		bucket = &table_v4[hsiphash(&data, sizeof(u32) * 3, &key) & (table_size - 1)];
 | |
| +	}
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +	else if (skb->protocol == htons(ETH_P_IPV6)) {
 | |
| +		memcpy(&data.ip, &ipv6_hdr(skb)->saddr, sizeof(__be64)); /* Only 64 bits */
 | |
| +		bucket = &table_v6[hsiphash(&data, sizeof(u32) * 3, &key) & (table_size - 1)];
 | |
| +	}
 | |
| +#endif
 | |
| +	else
 | |
| +		return false;
 | |
| +	rcu_read_lock();
 | |
| +	hlist_for_each_entry_rcu (entry, bucket, hash) {
 | |
| +		if (entry->net == net && entry->ip == data.ip) {
 | |
| +			u64 now, tokens;
 | |
| +			bool ret;
 | |
| +			/* Inspired by nft_limit.c, but this is actually a slightly different
 | |
| +			 * algorithm. Namely, we incorporate the burst as part of the maximum
 | |
| +			 * tokens, rather than as part of the rate. */
 | |
| +			spin_lock(&entry->lock);
 | |
| +			now = ktime_get_ns();
 | |
| +			tokens = min_t(u64, TOKEN_MAX, entry->tokens + now - entry->last_time_ns);
 | |
| +			entry->last_time_ns = now;
 | |
| +			ret = tokens >= PACKET_COST;
 | |
| +			entry->tokens = ret ? tokens - PACKET_COST : tokens;
 | |
| +			spin_unlock(&entry->lock);
 | |
| +			rcu_read_unlock();
 | |
| +			return ret;
 | |
| +		}
 | |
| +	}
 | |
| +	rcu_read_unlock();
 | |
| +
 | |
| +	if (atomic_inc_return(&total_entries) > max_entries)
 | |
| +		goto err_oom;
 | |
| +
 | |
| +	entry = kmem_cache_alloc(entry_cache, GFP_KERNEL);
 | |
| +	if (!entry)
 | |
| +		goto err_oom;
 | |
| +
 | |
| +	entry->net = net;
 | |
| +	entry->ip = data.ip;
 | |
| +	INIT_HLIST_NODE(&entry->hash);
 | |
| +	spin_lock_init(&entry->lock);
 | |
| +	entry->last_time_ns = ktime_get_ns();
 | |
| +	entry->tokens = TOKEN_MAX - PACKET_COST;
 | |
| +	spin_lock(&table_lock);
 | |
| +	hlist_add_head_rcu(&entry->hash, bucket);
 | |
| +	spin_unlock(&table_lock);
 | |
| +	return true;
 | |
| +
 | |
| +err_oom:
 | |
| +	atomic_dec(&total_entries);
 | |
| +	return false;
 | |
| +}
 | |
| +
 | |
| +int ratelimiter_init(void)
 | |
| +{
 | |
| +	if (atomic64_inc_return(&refcnt) != 1)
 | |
| +		return 0;
 | |
| +
 | |
| +	entry_cache = kmem_cache_create("wireguard_ratelimiter", sizeof(struct entry), 0, 0, NULL);
 | |
| +	if (!entry_cache)
 | |
| +		goto err;
 | |
| +
 | |
| +	/* xt_hashlimit.c uses a slightly different algorithm for ratelimiting,
 | |
| +	 * but what it shares in common is that it uses a massive hashtable. So,
 | |
| +	 * we borrow their wisdom about good table sizes on different systems
 | |
| +	 * dependent on RAM. This calculation here comes from there. */
 | |
| +	table_size = (totalram_pages > (1 << 30) / PAGE_SIZE) ? 8192 : max_t(unsigned long, 16, roundup_pow_of_two((totalram_pages << PAGE_SHIFT) / (1 << 14) / sizeof(struct hlist_head)));
 | |
| +	max_entries = table_size * 8;
 | |
| +
 | |
| +	table_v4 = kvzalloc(table_size * sizeof(struct hlist_head), GFP_KERNEL);
 | |
| +	if (!table_v4)
 | |
| +		goto err_kmemcache;
 | |
| +
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +	table_v6 = kvzalloc(table_size * sizeof(struct hlist_head), GFP_KERNEL);
 | |
| +	if (!table_v6) {
 | |
| +		kvfree(table_v4);
 | |
| +		goto err_kmemcache;
 | |
| +	}
 | |
| +#endif
 | |
| +
 | |
| +	queue_delayed_work(system_power_efficient_wq, &gc_work, HZ);
 | |
| +	get_random_bytes(&key, sizeof(key));
 | |
| +	return 0;
 | |
| +
 | |
| +err_kmemcache:
 | |
| +	kmem_cache_destroy(entry_cache);
 | |
| +err:
 | |
| +	atomic64_dec(&refcnt);
 | |
| +	return -ENOMEM;
 | |
| +}
 | |
| +
 | |
| +void ratelimiter_uninit(void)
 | |
| +{
 | |
| +	if (atomic64_dec_return(&refcnt))
 | |
| +		return;
 | |
| +
 | |
| +	cancel_delayed_work_sync(&gc_work);
 | |
| +	gc_entries(NULL);
 | |
| +	synchronize_rcu();
 | |
| +	kvfree(table_v4);
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +	kvfree(table_v6);
 | |
| +#endif
 | |
| +	kmem_cache_destroy(entry_cache);
 | |
| +}
 | |
| +
 | |
| +#include "selftest/ratelimiter.h"
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/receive.c	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,311 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#include "packets.h"
 | |
| +#include "device.h"
 | |
| +#include "peer.h"
 | |
| +#include "timers.h"
 | |
| +#include "messages.h"
 | |
| +#include "cookie.h"
 | |
| +
 | |
| +#include <linux/ip.h>
 | |
| +#include <linux/ipv6.h>
 | |
| +#include <linux/udp.h>
 | |
| +#include <net/ip_tunnels.h>
 | |
| +
 | |
| +static inline void rx_stats(struct wireguard_peer *peer, size_t len)
 | |
| +{
 | |
| +	struct pcpu_sw_netstats *tstats = get_cpu_ptr(netdev_pub(peer->device)->tstats);
 | |
| +	u64_stats_update_begin(&tstats->syncp);
 | |
| +	tstats->rx_bytes += len;
 | |
| +	++tstats->rx_packets;
 | |
| +	u64_stats_update_end(&tstats->syncp);
 | |
| +	put_cpu_ptr(tstats);
 | |
| +	peer->rx_bytes += len;
 | |
| +}
 | |
| +
 | |
| +static inline void update_latest_addr(struct wireguard_peer *peer, struct sk_buff *skb)
 | |
| +{
 | |
| +	struct endpoint endpoint;
 | |
| +	if (!socket_endpoint_from_skb(&endpoint, skb))
 | |
| +		socket_set_peer_endpoint(peer, &endpoint);
 | |
| +}
 | |
| +
 | |
| +static inline int skb_prepare_header(struct sk_buff *skb, struct wireguard_device *wg)
 | |
| +{
 | |
| +	struct udphdr *udp;
 | |
| +	size_t data_offset, data_len;
 | |
| +	enum message_type message_type;
 | |
| +	if (unlikely(skb_examine_untrusted_ip_hdr(skb) != skb->protocol || skb_transport_header(skb) < skb->head || (skb_transport_header(skb) + sizeof(struct udphdr)) > skb_tail_pointer(skb)))
 | |
| +		return -EINVAL; /* Bogus IP header */
 | |
| +	udp = udp_hdr(skb);
 | |
| +	data_offset = (u8 *)udp - skb->data;
 | |
| +	if (unlikely(data_offset > U16_MAX || data_offset + sizeof(struct udphdr) > skb->len))
 | |
| +		return -EINVAL;  /* Packet has offset at impossible location or isn't big enough to have UDP fields*/
 | |
| +	data_len = ntohs(udp->len);
 | |
| +	if (unlikely(data_len < sizeof(struct udphdr) || data_len > skb->len - data_offset))
 | |
| +		return -EINVAL;  /* UDP packet is reporting too small of a size or lying about its size */
 | |
| +	data_len -= sizeof(struct udphdr);
 | |
| +	data_offset = (u8 *)udp + sizeof(struct udphdr) - skb->data;
 | |
| +	if (unlikely(!pskb_may_pull(skb, data_offset + sizeof(struct message_header)) || pskb_trim(skb, data_len + data_offset) < 0))
 | |
| +		return -EINVAL;
 | |
| +	skb_pull(skb, data_offset);
 | |
| +	if (unlikely(skb->len != data_len))
 | |
| +		return -EINVAL; /* Final len does not agree with calculated len */
 | |
| +	message_type = message_determine_type(skb);
 | |
| +	__skb_push(skb, data_offset);
 | |
| +	if (unlikely(!pskb_may_pull(skb, data_offset + message_header_sizes[message_type])))
 | |
| +		return -EINVAL;
 | |
| +	__skb_pull(skb, data_offset);
 | |
| +	return message_type;
 | |
| +}
 | |
| +
 | |
| +static void receive_handshake_packet(struct wireguard_device *wg, struct sk_buff *skb)
 | |
| +{
 | |
| +	static unsigned long last_under_load = 0; /* Yes this is global, so that our load calculation applies to the whole system. */
 | |
| +	struct wireguard_peer *peer = NULL;
 | |
| +	enum message_type message_type;
 | |
| +	bool under_load;
 | |
| +	enum cookie_mac_state mac_state;
 | |
| +	bool packet_needs_cookie;
 | |
| +
 | |
| +	message_type = message_determine_type(skb);
 | |
| +
 | |
| +	if (message_type == MESSAGE_HANDSHAKE_COOKIE) {
 | |
| +		net_dbg_skb_ratelimited("%s: Receiving cookie response from %pISpfsc\n", netdev_pub(wg)->name, skb);
 | |
| +		cookie_message_consume((struct message_handshake_cookie *)skb->data, wg);
 | |
| +		return;
 | |
| +	}
 | |
| +
 | |
| +	under_load = skb_queue_len(&wg->incoming_handshakes) >= MAX_QUEUED_INCOMING_HANDSHAKES / 8;
 | |
| +	if (under_load)
 | |
| +		last_under_load = jiffies;
 | |
| +	else
 | |
| +		under_load = time_is_after_jiffies(last_under_load + HZ);
 | |
| +	mac_state = cookie_validate_packet(&wg->cookie_checker, skb, under_load);
 | |
| +	if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) || (!under_load && mac_state == VALID_MAC_BUT_NO_COOKIE))
 | |
| +		packet_needs_cookie = false;
 | |
| +	else if (under_load && mac_state == VALID_MAC_BUT_NO_COOKIE)
 | |
| +		packet_needs_cookie = true;
 | |
| +	else {
 | |
| +		net_dbg_skb_ratelimited("%s: Invalid MAC of handshake, dropping packet from %pISpfsc\n", netdev_pub(wg)->name, skb);
 | |
| +		return;
 | |
| +	}
 | |
| +
 | |
| +	switch (message_type) {
 | |
| +	case MESSAGE_HANDSHAKE_INITIATION: {
 | |
| +		struct message_handshake_initiation *message = (struct message_handshake_initiation *)skb->data;
 | |
| +		if (packet_needs_cookie) {
 | |
| +			packet_send_handshake_cookie(wg, skb, message->sender_index);
 | |
| +			return;
 | |
| +		}
 | |
| +		peer = noise_handshake_consume_initiation(message, wg);
 | |
| +		if (unlikely(!peer)) {
 | |
| +			net_dbg_skb_ratelimited("%s: Invalid handshake initiation from %pISpfsc\n", netdev_pub(wg)->name, skb);
 | |
| +			return;
 | |
| +		}
 | |
| +		update_latest_addr(peer, skb);
 | |
| +		net_dbg_ratelimited("%s: Receiving handshake initiation from peer %Lu (%pISpfsc)\n", netdev_pub(wg)->name, peer->internal_id, &peer->endpoint.addr);
 | |
| +		packet_send_handshake_response(peer);
 | |
| +		break;
 | |
| +	}
 | |
| +	case MESSAGE_HANDSHAKE_RESPONSE: {
 | |
| +		struct message_handshake_response *message = (struct message_handshake_response *)skb->data;
 | |
| +		if (packet_needs_cookie) {
 | |
| +			packet_send_handshake_cookie(wg, skb, message->sender_index);
 | |
| +			return;
 | |
| +		}
 | |
| +		peer = noise_handshake_consume_response(message, wg);
 | |
| +		if (unlikely(!peer)) {
 | |
| +			net_dbg_skb_ratelimited("%s: Invalid handshake response from %pISpfsc\n", netdev_pub(wg)->name, skb);
 | |
| +			return;
 | |
| +		}
 | |
| +		update_latest_addr(peer, skb);
 | |
| +		net_dbg_ratelimited("%s: Receiving handshake response from peer %Lu (%pISpfsc)\n", netdev_pub(wg)->name, peer->internal_id, &peer->endpoint.addr);
 | |
| +		if (noise_handshake_begin_session(&peer->handshake, &peer->keypairs, true)) {
 | |
| +			timers_ephemeral_key_created(peer);
 | |
| +			timers_handshake_complete(peer);
 | |
| +			peer->sent_lastminute_handshake = false;
 | |
| +			/* Calling this function will either send any existing packets in the queue
 | |
| +			 * and not send a keepalive, which is the best case, Or, if there's nothing
 | |
| +			 * in the queue, it will send a keepalive, in order to give immediate
 | |
| +			 * confirmation of the session. */
 | |
| +			packet_send_keepalive(peer);
 | |
| +		}
 | |
| +		break;
 | |
| +	}
 | |
| +	default:
 | |
| +		WARN(1, "Somehow a wrong type of packet wound up in the handshake queue!\n");
 | |
| +		return;
 | |
| +	}
 | |
| +
 | |
| +	BUG_ON(!peer);
 | |
| +
 | |
| +	rx_stats(peer, skb->len);
 | |
| +	timers_any_authenticated_packet_received(peer);
 | |
| +	timers_any_authenticated_packet_traversal(peer);
 | |
| +	peer_put(peer);
 | |
| +}
 | |
| +
 | |
| +void packet_process_queued_handshake_packets(struct work_struct *work)
 | |
| +{
 | |
| +	struct wireguard_device *wg = container_of(work, struct handshake_worker, work)->wg;
 | |
| +	struct sk_buff *skb;
 | |
| +
 | |
| +	while ((skb = skb_dequeue(&wg->incoming_handshakes)) != NULL) {
 | |
| +		receive_handshake_packet(wg, skb);
 | |
| +		dev_kfree_skb(skb);
 | |
| +		cond_resched();
 | |
| +	}
 | |
| +}
 | |
| +
 | |
| +static void keep_key_fresh(struct wireguard_peer *peer)
 | |
| +{
 | |
| +	struct noise_keypair *keypair;
 | |
| +	bool send = false;
 | |
| +	if (peer->sent_lastminute_handshake)
 | |
| +		return;
 | |
| +
 | |
| +	rcu_read_lock_bh();
 | |
| +	keypair = rcu_dereference_bh(peer->keypairs.current_keypair);
 | |
| +	if (likely(keypair && keypair->sending.is_valid) && keypair->i_am_the_initiator &&
 | |
| +	    unlikely(time_is_before_eq_jiffies64(keypair->sending.birthdate + REJECT_AFTER_TIME - KEEPALIVE_TIMEOUT - REKEY_TIMEOUT)))
 | |
| +		send = true;
 | |
| +	rcu_read_unlock_bh();
 | |
| +
 | |
| +	if (send) {
 | |
| +		peer->sent_lastminute_handshake = true;
 | |
| +		packet_queue_handshake_initiation(peer, false);
 | |
| +	}
 | |
| +}
 | |
| +
 | |
| +void packet_consume_data_done(struct sk_buff *skb, struct wireguard_peer *peer, struct endpoint *endpoint, bool used_new_key)
 | |
| +{
 | |
| +	struct net_device *dev;
 | |
| +	struct wireguard_peer *routed_peer;
 | |
| +	struct wireguard_device *wg;
 | |
| +	unsigned int len;
 | |
| +
 | |
| +	socket_set_peer_endpoint(peer, endpoint);
 | |
| +
 | |
| +	wg = peer->device;
 | |
| +	dev = netdev_pub(wg);
 | |
| +
 | |
| +	if (unlikely(used_new_key)) {
 | |
| +		peer->sent_lastminute_handshake = false;
 | |
| +		packet_send_queue(peer);
 | |
| +		timers_handshake_complete(peer);
 | |
| +	}
 | |
| +
 | |
| +	keep_key_fresh(peer);
 | |
| +
 | |
| +	/* A packet with length 0 is a keepalive packet */
 | |
| +	if (unlikely(!skb->len)) {
 | |
| +		net_dbg_ratelimited("%s: Receiving keepalive packet from peer %Lu (%pISpfsc)\n", netdev_pub(peer->device)->name, peer->internal_id, &peer->endpoint.addr);
 | |
| +		goto packet_processed;
 | |
| +	}
 | |
| +
 | |
| +	if (unlikely(skb_network_header(skb) < skb->head))
 | |
| +		goto dishonest_packet_size;
 | |
| +	if (unlikely(!(pskb_network_may_pull(skb, sizeof(struct iphdr)) && (ip_hdr(skb)->version == 4 || (ip_hdr(skb)->version == 6 && pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))))))
 | |
| +		goto dishonest_packet_type;
 | |
| +
 | |
| +	skb->dev = dev;
 | |
| +	skb->ip_summed = CHECKSUM_UNNECESSARY;
 | |
| +	skb->protocol = skb_examine_untrusted_ip_hdr(skb);
 | |
| +	if (skb->protocol == htons(ETH_P_IP)) {
 | |
| +		len = ntohs(ip_hdr(skb)->tot_len);
 | |
| +		if (unlikely(len < sizeof(struct iphdr)))
 | |
| +			goto dishonest_packet_size;
 | |
| +		if (INET_ECN_is_ce(PACKET_CB(skb)->ds))
 | |
| +			IP_ECN_set_ce(ip_hdr(skb));
 | |
| +
 | |
| +	} else if (skb->protocol == htons(ETH_P_IPV6)) {
 | |
| +		len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr);
 | |
| +		if (INET_ECN_is_ce(PACKET_CB(skb)->ds))
 | |
| +			IP6_ECN_set_ce(skb, ipv6_hdr(skb));
 | |
| +	} else
 | |
| +		goto dishonest_packet_type;
 | |
| +
 | |
| +	if (unlikely(len > skb->len)) {
 | |
| +		goto dishonest_packet_size;
 | |
| +	}
 | |
| +	if (len < skb->len && unlikely(pskb_trim(skb, len)))
 | |
| +		goto packet_processed;
 | |
| +
 | |
| +	timers_data_received(peer);
 | |
| +
 | |
| +	routed_peer = routing_table_lookup_src(&wg->peer_routing_table, skb);
 | |
| +	peer_put(routed_peer); /* We don't need the extra reference. */
 | |
| +
 | |
| +	if (unlikely(routed_peer != peer))
 | |
| +		goto dishonest_packet_peer;
 | |
| +
 | |
| +	len = skb->len;
 | |
| +	if (likely(netif_rx(skb) == NET_RX_SUCCESS))
 | |
| +		rx_stats(peer, len);
 | |
| +	else {
 | |
| +		++dev->stats.rx_dropped;
 | |
| +		net_dbg_ratelimited("%s: Failed to give packet to userspace from peer %Lu (%pISpfsc)\n", netdev_pub(peer->device)->name, peer->internal_id, &peer->endpoint.addr);
 | |
| +	}
 | |
| +	goto continue_processing;
 | |
| +
 | |
| +dishonest_packet_peer:
 | |
| +	net_dbg_skb_ratelimited("%s: Packet has unallowed src IP (%pISc) from peer %Lu (%pISpfsc)\n", netdev_pub(peer->device)->name, skb, peer->internal_id, &peer->endpoint.addr);
 | |
| +	++dev->stats.rx_errors;
 | |
| +	++dev->stats.rx_frame_errors;
 | |
| +	goto packet_processed;
 | |
| +dishonest_packet_type:
 | |
| +	net_dbg_ratelimited("%s: Packet is neither ipv4 nor ipv6 from peer %Lu (%pISpfsc)\n", netdev_pub(peer->device)->name, peer->internal_id, &peer->endpoint.addr);
 | |
| +	++dev->stats.rx_errors;
 | |
| +	++dev->stats.rx_frame_errors;
 | |
| +	goto packet_processed;
 | |
| +dishonest_packet_size:
 | |
| +	net_dbg_ratelimited("%s: Packet has incorrect size from peer %Lu (%pISpfsc)\n", netdev_pub(peer->device)->name, peer->internal_id, &peer->endpoint.addr);
 | |
| +	++dev->stats.rx_errors;
 | |
| +	++dev->stats.rx_length_errors;
 | |
| +	goto packet_processed;
 | |
| +packet_processed:
 | |
| +	dev_kfree_skb(skb);
 | |
| +continue_processing:
 | |
| +	timers_any_authenticated_packet_received(peer);
 | |
| +	timers_any_authenticated_packet_traversal(peer);
 | |
| +	peer_put(peer);
 | |
| +}
 | |
| +
 | |
| +void packet_receive(struct wireguard_device *wg, struct sk_buff *skb)
 | |
| +{
 | |
| +	int message_type = skb_prepare_header(skb, wg);
 | |
| +	if (unlikely(message_type < 0))
 | |
| +		goto err;
 | |
| +	switch (message_type) {
 | |
| +	case MESSAGE_HANDSHAKE_INITIATION:
 | |
| +	case MESSAGE_HANDSHAKE_RESPONSE:
 | |
| +	case MESSAGE_HANDSHAKE_COOKIE: {
 | |
| +		int cpu_index, cpu, target_cpu;
 | |
| +		if (skb_queue_len(&wg->incoming_handshakes) > MAX_QUEUED_INCOMING_HANDSHAKES) {
 | |
| +			net_dbg_skb_ratelimited("%s: Too many handshakes queued, dropping packet from %pISpfsc\n", netdev_pub(wg)->name, skb);
 | |
| +			goto err;
 | |
| +		}
 | |
| +		skb_queue_tail(&wg->incoming_handshakes, skb);
 | |
| +		/* Select the CPU in a round-robin */
 | |
| +		cpu_index = ((unsigned int)atomic_inc_return(&wg->incoming_handshake_seqnr)) % cpumask_weight(cpu_online_mask);
 | |
| +		target_cpu = cpumask_first(cpu_online_mask);
 | |
| +		for (cpu = 0; cpu < cpu_index; ++cpu)
 | |
| +			target_cpu = cpumask_next(target_cpu, cpu_online_mask);
 | |
| +		/* Queues up a call to packet_process_queued_handshake_packets(skb): */
 | |
| +		queue_work_on(target_cpu, wg->incoming_handshake_wq, &per_cpu_ptr(wg->incoming_handshakes_worker, target_cpu)->work);
 | |
| +		break;
 | |
| +	}
 | |
| +	case MESSAGE_DATA:
 | |
| +		PACKET_CB(skb)->ds = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
 | |
| +		packet_consume_data(skb, wg);
 | |
| +		break;
 | |
| +	default:
 | |
| +		net_dbg_skb_ratelimited("%s: Invalid packet from %pISpfsc\n", netdev_pub(wg)->name, skb);
 | |
| +		goto err;
 | |
| +	}
 | |
| +	return;
 | |
| +
 | |
| +err:
 | |
| +	dev_kfree_skb(skb);
 | |
| +}
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/routingtable.c	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,345 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#include "routingtable.h"
 | |
| +#include "peer.h"
 | |
| +
 | |
| +struct routing_table_node {
 | |
| +	struct routing_table_node __rcu *bit[2];
 | |
| +	struct rcu_head rcu;
 | |
| +	struct wireguard_peer *peer;
 | |
| +	u8 cidr, bit_at_a, bit_at_b;
 | |
| +	u8 bits[] __aligned(__alignof__(u64));
 | |
| +};
 | |
| +
 | |
| +static inline void copy_and_assign_cidr(struct routing_table_node *node, const u8 *src, u8 cidr)
 | |
| +{
 | |
| +	memcpy(node->bits, src, (cidr + 7) / 8);
 | |
| +	node->bits[(cidr + 7) / 8 - 1] &= 0xff << ((8 - (cidr % 8)) % 8);
 | |
| +	node->cidr = cidr;
 | |
| +	node->bit_at_a = cidr / 8;
 | |
| +	node->bit_at_b = 7 - (cidr % 8);
 | |
| +}
 | |
| +#define choose_node(parent, key) parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1]
 | |
| +
 | |
| +static void node_free_rcu(struct rcu_head *rcu)
 | |
| +{
 | |
| +	kfree(container_of(rcu, struct routing_table_node, rcu));
 | |
| +}
 | |
| +#define push(p, lock) ({ \
 | |
| +	if (rcu_access_pointer(p)) { \
 | |
| +		BUG_ON(len >= 128); \
 | |
| +		stack[len++] = lock ? rcu_dereference_protected(p, lockdep_is_held((struct mutex *)lock)) : rcu_dereference_bh(p); \
 | |
| +	} \
 | |
| +	true; \
 | |
| +})
 | |
| +#define walk_prep \
 | |
| +	struct routing_table_node *stack[128], *node; \
 | |
| +	unsigned int len;
 | |
| +#define walk(top, lock) for (len = 0, push(top, lock); len > 0 && (node = stack[--len]) && push(node->bit[0], lock) && push(node->bit[1], lock);)
 | |
| +
 | |
| +static void free_root_node(struct routing_table_node __rcu *top, struct mutex *lock)
 | |
| +{
 | |
| +	walk_prep;
 | |
| +	walk (top, lock)
 | |
| +		call_rcu_bh(&node->rcu, node_free_rcu);
 | |
| +}
 | |
| +
 | |
| +static size_t count_nodes(struct routing_table_node __rcu *top)
 | |
| +{
 | |
| +	size_t ret = 0;
 | |
| +	walk_prep;
 | |
| +	walk (top, NULL) {
 | |
| +		if (node->peer)
 | |
| +			++ret;
 | |
| +	}
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +static int walk_ips_by_peer(struct routing_table_node __rcu *top, int family, void *ctx, struct wireguard_peer *peer, int (*func)(void *ctx, union nf_inet_addr ip, u8 cidr, int family), struct mutex *maybe_lock)
 | |
| +{
 | |
| +	int ret;
 | |
| +	union nf_inet_addr ip = { .all = { 0 } };
 | |
| +	walk_prep;
 | |
| +
 | |
| +	if (unlikely(!peer))
 | |
| +		return 0;
 | |
| +
 | |
| +	walk (top, maybe_lock) {
 | |
| +		if (node->peer != peer)
 | |
| +			continue;
 | |
| +		memcpy(ip.all, node->bits, family == AF_INET6 ? 16 : 4);
 | |
| +		ret = func(ctx, ip, node->cidr, family);
 | |
| +		if (ret)
 | |
| +			return ret;
 | |
| +	}
 | |
| +	return 0;
 | |
| +}
 | |
| +#undef push
 | |
| +
 | |
| +#define ref(p) rcu_access_pointer(p)
 | |
| +#define deref(p) rcu_dereference_protected(*p, lockdep_is_held(lock))
 | |
| +#define push(p) ({ BUG_ON(len >= 128); stack[len++] = p; })
 | |
| +static void walk_remove_by_peer(struct routing_table_node __rcu **top, struct wireguard_peer *peer, struct mutex *lock)
 | |
| +{
 | |
| +	struct routing_table_node __rcu **stack[128], **nptr, *node, *prev;
 | |
| +	unsigned int len;
 | |
| +
 | |
| +	if (unlikely(!peer || !ref(*top)))
 | |
| +		return;
 | |
| +
 | |
| +	for (prev = NULL, len = 0, push(top); len > 0; prev = node) {
 | |
| +		nptr = stack[len - 1];
 | |
| +		node = deref(nptr);
 | |
| +		if (!node) {
 | |
| +			--len;
 | |
| +			continue;
 | |
| +		}
 | |
| +		if (!prev || ref(prev->bit[0]) == node || ref(prev->bit[1]) == node) {
 | |
| +			if (ref(node->bit[0]))
 | |
| +				push(&node->bit[0]);
 | |
| +			else if (ref(node->bit[1]))
 | |
| +				push(&node->bit[1]);
 | |
| +		} else if (ref(node->bit[0]) == prev) {
 | |
| +			if (ref(node->bit[1]))
 | |
| +				push(&node->bit[1]);
 | |
| +		} else {
 | |
| +			if (node->peer == peer) {
 | |
| +				node->peer = NULL;
 | |
| +				if (!node->bit[0] || !node->bit[1]) {
 | |
| +					rcu_assign_pointer(*nptr, deref(&node->bit[!ref(node->bit[0])]));
 | |
| +					call_rcu_bh(&node->rcu, node_free_rcu);
 | |
| +					node = deref(nptr);
 | |
| +				}
 | |
| +			}
 | |
| +			--len;
 | |
| +		}
 | |
| +	}
 | |
| +}
 | |
| +#undef ref
 | |
| +#undef deref
 | |
| +#undef push
 | |
| +
 | |
| +static inline unsigned int fls128(u64 a, u64 b)
 | |
| +{
 | |
| +	return a ? fls64(a) + 64 : fls64(b);
 | |
| +}
 | |
| +
 | |
| +static inline u8 common_bits(const struct routing_table_node *node, const u8 *key, u8 bits)
 | |
| +{
 | |
| +	if (bits == 32)
 | |
| +		return 32 - fls(be32_to_cpu(*(const __be32 *)node->bits ^ *(const __be32 *)key));
 | |
| +	else if (bits == 128)
 | |
| +		return 128 - fls128(be64_to_cpu(*(const __be64 *)&node->bits[0] ^ *(const __be64 *)&key[0]), be64_to_cpu(*(const __be64 *)&node->bits[8] ^ *(const __be64 *)&key[8]));
 | |
| +	BUG();
 | |
| +	return 0;
 | |
| +}
 | |
| +
 | |
| +static inline struct routing_table_node *find_node(struct routing_table_node *trie, u8 bits, const u8 *key)
 | |
| +{
 | |
| +	struct routing_table_node *node = trie, *found = NULL;
 | |
| +
 | |
| +	while (node && common_bits(node, key, bits) >= node->cidr) {
 | |
| +		if (node->peer)
 | |
| +			found = node;
 | |
| +		if (node->cidr == bits)
 | |
| +			break;
 | |
| +		node = rcu_dereference_bh(choose_node(node, key));
 | |
| +	}
 | |
| +	return found;
 | |
| +}
 | |
| +
 | |
| +/* Returns a strong reference to a peer */
 | |
| +static inline struct wireguard_peer *lookup(struct routing_table_node __rcu *root, u8 bits, const void *ip)
 | |
| +{
 | |
| +	struct wireguard_peer *peer = NULL;
 | |
| +	struct routing_table_node *node;
 | |
| +
 | |
| +	rcu_read_lock_bh();
 | |
| +	node = find_node(rcu_dereference_bh(root), bits, ip);
 | |
| +	if (node)
 | |
| +		peer = peer_get(node->peer);
 | |
| +	rcu_read_unlock_bh();
 | |
| +	return peer;
 | |
| +}
 | |
| +
 | |
| +static inline bool node_placement(struct routing_table_node __rcu *trie, const u8 *key, u8 cidr, u8 bits, struct routing_table_node **rnode, struct mutex *lock)
 | |
| +{
 | |
| +	bool exact = false;
 | |
| +	struct routing_table_node *parent = NULL, *node = rcu_dereference_protected(trie, lockdep_is_held(lock));
 | |
| +
 | |
| +	while (node && node->cidr <= cidr && common_bits(node, key, bits) >= node->cidr) {
 | |
| +		parent = node;
 | |
| +		if (parent->cidr == cidr) {
 | |
| +			exact = true;
 | |
| +			break;
 | |
| +		}
 | |
| +		node = rcu_dereference_protected(choose_node(parent, key), lockdep_is_held(lock));
 | |
| +	}
 | |
| +	*rnode = parent;
 | |
| +	return exact;
 | |
| +}
 | |
| +
 | |
| +static int add(struct routing_table_node __rcu **trie, u8 bits, const u8 *key, u8 cidr, struct wireguard_peer *peer, struct mutex *lock)
 | |
| +{
 | |
| +	struct routing_table_node *node, *parent, *down, *newnode;
 | |
| +
 | |
| +	if (!rcu_access_pointer(*trie)) {
 | |
| +		node = kzalloc(sizeof(*node) + (bits + 7) / 8, GFP_KERNEL);
 | |
| +		if (!node)
 | |
| +			return -ENOMEM;
 | |
| +		node->peer = peer;
 | |
| +		copy_and_assign_cidr(node, key, cidr);
 | |
| +		rcu_assign_pointer(*trie, node);
 | |
| +		return 0;
 | |
| +	}
 | |
| +	if (node_placement(*trie, key, cidr, bits, &node, lock)) {
 | |
| +		node->peer = peer;
 | |
| +		return 0;
 | |
| +	}
 | |
| +
 | |
| +	newnode = kzalloc(sizeof(*node) + (bits + 7) / 8, GFP_KERNEL);
 | |
| +	if (!newnode)
 | |
| +		return -ENOMEM;
 | |
| +	newnode->peer = peer;
 | |
| +	copy_and_assign_cidr(newnode, key, cidr);
 | |
| +
 | |
| +	if (!node)
 | |
| +		down = rcu_dereference_protected(*trie, lockdep_is_held(lock));
 | |
| +	else {
 | |
| +		down = rcu_dereference_protected(choose_node(node, key), lockdep_is_held(lock));
 | |
| +		if (!down) {
 | |
| +			rcu_assign_pointer(choose_node(node, key), newnode);
 | |
| +			return 0;
 | |
| +		}
 | |
| +	}
 | |
| +	cidr = min(cidr, common_bits(down, key, bits));
 | |
| +	parent = node;
 | |
| +
 | |
| +	if (newnode->cidr == cidr) {
 | |
| +		rcu_assign_pointer(choose_node(newnode, down->bits), down);
 | |
| +		if (!parent)
 | |
| +			rcu_assign_pointer(*trie, newnode);
 | |
| +		else
 | |
| +			rcu_assign_pointer(choose_node(parent, newnode->bits), newnode);
 | |
| +	} else {
 | |
| +		node = kzalloc(sizeof(*node) + (bits + 7) / 8, GFP_KERNEL);
 | |
| +		if (!node) {
 | |
| +			kfree(newnode);
 | |
| +			return -ENOMEM;
 | |
| +		}
 | |
| +		copy_and_assign_cidr(node, newnode->bits, cidr);
 | |
| +
 | |
| +		rcu_assign_pointer(choose_node(node, down->bits), down);
 | |
| +		rcu_assign_pointer(choose_node(node, newnode->bits), newnode);
 | |
| +		if (!parent)
 | |
| +			rcu_assign_pointer(*trie, node);
 | |
| +		else
 | |
| +			rcu_assign_pointer(choose_node(parent, node->bits), node);
 | |
| +	}
 | |
| +	return 0;
 | |
| +}
 | |
| +
 | |
| +void routing_table_init(struct routing_table *table)
 | |
| +{
 | |
| +	memset(table, 0, sizeof(struct routing_table));
 | |
| +	mutex_init(&table->table_update_lock);
 | |
| +}
 | |
| +
 | |
| +void routing_table_free(struct routing_table *table)
 | |
| +{
 | |
| +	mutex_lock(&table->table_update_lock);
 | |
| +	free_root_node(table->root4, &table->table_update_lock);
 | |
| +	rcu_assign_pointer(table->root4, NULL);
 | |
| +	free_root_node(table->root6, &table->table_update_lock);
 | |
| +	rcu_assign_pointer(table->root6, NULL);
 | |
| +	mutex_unlock(&table->table_update_lock);
 | |
| +}
 | |
| +
 | |
| +int routing_table_insert_v4(struct routing_table *table, const struct in_addr *ip, u8 cidr, struct wireguard_peer *peer)
 | |
| +{
 | |
| +	int ret;
 | |
| +	if (unlikely(cidr > 32 || !peer))
 | |
| +		return -EINVAL;
 | |
| +	mutex_lock(&table->table_update_lock);
 | |
| +	ret = add(&table->root4, 32, (const u8 *)ip, cidr, peer, &table->table_update_lock);
 | |
| +	mutex_unlock(&table->table_update_lock);
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +int routing_table_insert_v6(struct routing_table *table, const struct in6_addr *ip, u8 cidr, struct wireguard_peer *peer)
 | |
| +{
 | |
| +	int ret;
 | |
| +	if (unlikely(cidr > 128 || !peer))
 | |
| +		return -EINVAL;
 | |
| +	mutex_lock(&table->table_update_lock);
 | |
| +	ret = add(&table->root6, 128, (const u8 *)ip, cidr, peer, &table->table_update_lock);
 | |
| +	mutex_unlock(&table->table_update_lock);
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +void routing_table_remove_by_peer(struct routing_table *table, struct wireguard_peer *peer)
 | |
| +{
 | |
| +	mutex_lock(&table->table_update_lock);
 | |
| +	walk_remove_by_peer(&table->root4, peer, &table->table_update_lock);
 | |
| +	walk_remove_by_peer(&table->root6, peer, &table->table_update_lock);
 | |
| +	mutex_unlock(&table->table_update_lock);
 | |
| +}
 | |
| +
 | |
| +size_t routing_table_count_nodes(struct routing_table *table)
 | |
| +{
 | |
| +	size_t ret;
 | |
| +	rcu_read_lock_bh();
 | |
| +	ret = count_nodes(table->root4) + count_nodes(table->root6);
 | |
| +	rcu_read_unlock_bh();
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +int routing_table_walk_ips_by_peer(struct routing_table *table, void *ctx, struct wireguard_peer *peer, int (*func)(void *ctx, union nf_inet_addr ip, u8 cidr, int family))
 | |
| +{
 | |
| +	int ret;
 | |
| +	rcu_read_lock_bh();
 | |
| +	ret = walk_ips_by_peer(table->root4, AF_INET, ctx, peer, func, NULL);
 | |
| +	rcu_read_unlock_bh();
 | |
| +	if (ret)
 | |
| +		return ret;
 | |
| +	rcu_read_lock_bh();
 | |
| +	ret = walk_ips_by_peer(table->root6, AF_INET6, ctx, peer, func, NULL);
 | |
| +	rcu_read_unlock_bh();
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +int routing_table_walk_ips_by_peer_sleepable(struct routing_table *table, void *ctx, struct wireguard_peer *peer, int (*func)(void *ctx, union nf_inet_addr ip, u8 cidr, int family))
 | |
| +{
 | |
| +	int ret;
 | |
| +	mutex_lock(&table->table_update_lock);
 | |
| +	ret = walk_ips_by_peer(table->root4, AF_INET, ctx, peer, func, &table->table_update_lock);
 | |
| +	mutex_unlock(&table->table_update_lock);
 | |
| +	if (ret)
 | |
| +		return ret;
 | |
| +	mutex_lock(&table->table_update_lock);
 | |
| +	ret = walk_ips_by_peer(table->root6, AF_INET6, ctx, peer, func, &table->table_update_lock);
 | |
| +	mutex_unlock(&table->table_update_lock);
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +/* Returns a strong reference to a peer */
 | |
| +struct wireguard_peer *routing_table_lookup_dst(struct routing_table *table, struct sk_buff *skb)
 | |
| +{
 | |
| +	if (skb->protocol == htons(ETH_P_IP))
 | |
| +		return lookup(table->root4, 32, &ip_hdr(skb)->daddr);
 | |
| +	else if (skb->protocol == htons(ETH_P_IPV6))
 | |
| +		return lookup(table->root6, 128, &ipv6_hdr(skb)->daddr);
 | |
| +	return NULL;
 | |
| +}
 | |
| +
 | |
| +/* Returns a strong reference to a peer */
 | |
| +struct wireguard_peer *routing_table_lookup_src(struct routing_table *table, struct sk_buff *skb)
 | |
| +{
 | |
| +	if (skb->protocol == htons(ETH_P_IP))
 | |
| +		return lookup(table->root4, 32, &ip_hdr(skb)->saddr);
 | |
| +	else if (skb->protocol == htons(ETH_P_IPV6))
 | |
| +		return lookup(table->root6, 128, &ipv6_hdr(skb)->saddr);
 | |
| +	return NULL;
 | |
| +}
 | |
| +
 | |
| +#include "selftest/routingtable.h"
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/send.c	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,197 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#include "packets.h"
 | |
| +#include "timers.h"
 | |
| +#include "device.h"
 | |
| +#include "peer.h"
 | |
| +#include "socket.h"
 | |
| +#include "messages.h"
 | |
| +#include "cookie.h"
 | |
| +
 | |
| +#include <linux/uio.h>
 | |
| +#include <linux/inetdevice.h>
 | |
| +#include <linux/socket.h>
 | |
| +#include <linux/jiffies.h>
 | |
| +#include <net/udp.h>
 | |
| +#include <net/sock.h>
 | |
| +
 | |
| +static void packet_send_handshake_initiation(struct wireguard_peer *peer)
 | |
| +{
 | |
| +	struct message_handshake_initiation packet;
 | |
| +
 | |
| +	down_write(&peer->handshake.lock);
 | |
| +	if (!time_is_before_jiffies64(peer->last_sent_handshake + REKEY_TIMEOUT)) {
 | |
| +		up_write(&peer->handshake.lock);
 | |
| +		return; /* This function is rate limited. */
 | |
| +	}
 | |
| +	peer->last_sent_handshake = get_jiffies_64();
 | |
| +	up_write(&peer->handshake.lock);
 | |
| +
 | |
| +	net_dbg_ratelimited("%s: Sending handshake initiation to peer %Lu (%pISpfsc)\n", netdev_pub(peer->device)->name, peer->internal_id, &peer->endpoint.addr);
 | |
| +
 | |
| +	if (noise_handshake_create_initiation(&packet, &peer->handshake)) {
 | |
| +		cookie_add_mac_to_packet(&packet, sizeof(packet), peer);
 | |
| +		timers_any_authenticated_packet_traversal(peer);
 | |
| +		socket_send_buffer_to_peer(peer, &packet, sizeof(struct message_handshake_initiation), HANDSHAKE_DSCP);
 | |
| +		timers_handshake_initiated(peer);
 | |
| +	}
 | |
| +}
 | |
| +
 | |
| +void packet_send_queued_handshakes(struct work_struct *work)
 | |
| +{
 | |
| +	struct wireguard_peer *peer = container_of(work, struct wireguard_peer, transmit_handshake_work);
 | |
| +	packet_send_handshake_initiation(peer);
 | |
| +	peer_put(peer);
 | |
| +}
 | |
| +
 | |
| +void packet_queue_handshake_initiation(struct wireguard_peer *peer, bool is_retry)
 | |
| +{
 | |
| +	if (!is_retry)
 | |
| +		peer->timer_handshake_attempts = 0;
 | |
| +
 | |
| +	/* First checking the timestamp here is just an optimization; it will
 | |
| +	 * be caught while properly locked inside the actual work queue. */
 | |
| +	if (!time_is_before_jiffies64(peer->last_sent_handshake + REKEY_TIMEOUT))
 | |
| +		return;
 | |
| +
 | |
| +	peer = peer_rcu_get(peer);
 | |
| +	if (unlikely(!peer))
 | |
| +		return;
 | |
| +
 | |
| +	/* Queues up calling packet_send_queued_handshakes(peer), where we do a peer_put(peer) after: */
 | |
| +	if (!queue_work(peer->device->peer_wq, &peer->transmit_handshake_work))
 | |
| +		peer_put(peer); /* If the work was already queued, we want to drop the extra reference */
 | |
| +}
 | |
| +
 | |
| +void packet_send_handshake_response(struct wireguard_peer *peer)
 | |
| +{
 | |
| +	struct message_handshake_response packet;
 | |
| +
 | |
| +	net_dbg_ratelimited("%s: Sending handshake response to peer %Lu (%pISpfsc)\n", netdev_pub(peer->device)->name, peer->internal_id, &peer->endpoint.addr);
 | |
| +	peer->last_sent_handshake = get_jiffies_64();
 | |
| +
 | |
| +	if (noise_handshake_create_response(&packet, &peer->handshake)) {
 | |
| +		cookie_add_mac_to_packet(&packet, sizeof(packet), peer);
 | |
| +		if (noise_handshake_begin_session(&peer->handshake, &peer->keypairs, false)) {
 | |
| +			timers_ephemeral_key_created(peer);
 | |
| +			timers_any_authenticated_packet_traversal(peer);
 | |
| +			socket_send_buffer_to_peer(peer, &packet, sizeof(struct message_handshake_response), HANDSHAKE_DSCP);
 | |
| +		}
 | |
| +	}
 | |
| +}
 | |
| +
 | |
| +void packet_send_handshake_cookie(struct wireguard_device *wg, struct sk_buff *initiating_skb, __le32 sender_index)
 | |
| +{
 | |
| +	struct message_handshake_cookie packet;
 | |
| +
 | |
| +	net_dbg_skb_ratelimited("%s: Sending cookie response for denied handshake message for %pISpfsc\n", netdev_pub(wg)->name, initiating_skb);
 | |
| +	cookie_message_create(&packet, initiating_skb, sender_index, &wg->cookie_checker);
 | |
| +	socket_send_buffer_as_reply_to_skb(wg, initiating_skb, &packet, sizeof(packet));
 | |
| +}
 | |
| +
 | |
| +static inline void keep_key_fresh(struct wireguard_peer *peer)
 | |
| +{
 | |
| +	struct noise_keypair *keypair;
 | |
| +	bool send = false;
 | |
| +
 | |
| +	rcu_read_lock_bh();
 | |
| +	keypair = rcu_dereference_bh(peer->keypairs.current_keypair);
 | |
| +	if (likely(keypair && keypair->sending.is_valid) &&
 | |
| +	   (unlikely(atomic64_read(&keypair->sending.counter.counter) > REKEY_AFTER_MESSAGES) ||
 | |
| +	   (keypair->i_am_the_initiator && unlikely(time_is_before_eq_jiffies64(keypair->sending.birthdate + REKEY_AFTER_TIME)))))
 | |
| +		send = true;
 | |
| +	rcu_read_unlock_bh();
 | |
| +
 | |
| +	if (send)
 | |
| +		packet_queue_handshake_initiation(peer, false);
 | |
| +}
 | |
| +
 | |
| +void packet_send_keepalive(struct wireguard_peer *peer)
 | |
| +{
 | |
| +	struct sk_buff *skb;
 | |
| +	if (!skb_queue_len(&peer->tx_packet_queue)) {
 | |
| +		skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH, GFP_ATOMIC);
 | |
| +		if (unlikely(!skb))
 | |
| +			return;
 | |
| +		skb_reserve(skb, DATA_PACKET_HEAD_ROOM);
 | |
| +		skb->dev = netdev_pub(peer->device);
 | |
| +		skb_queue_tail(&peer->tx_packet_queue, skb);
 | |
| +		net_dbg_ratelimited("%s: Sending keepalive packet to peer %Lu (%pISpfsc)\n", netdev_pub(peer->device)->name, peer->internal_id, &peer->endpoint.addr);
 | |
| +	}
 | |
| +	packet_send_queue(peer);
 | |
| +}
 | |
| +
 | |
| +void packet_create_data_done(struct sk_buff_head *queue, struct wireguard_peer *peer)
 | |
| +{
 | |
| +	struct sk_buff *skb, *tmp;
 | |
| +	bool is_keepalive, data_sent = false;
 | |
| +
 | |
| +	if (unlikely(!skb_queue_len(queue)))
 | |
| +		return;
 | |
| +
 | |
| +	timers_any_authenticated_packet_traversal(peer);
 | |
| +	skb_queue_walk_safe (queue, skb, tmp) {
 | |
| +		is_keepalive = skb->len == message_data_len(0);
 | |
| +		if (likely(!socket_send_skb_to_peer(peer, skb, PACKET_CB(skb)->ds) && !is_keepalive))
 | |
| +			data_sent = true;
 | |
| +	}
 | |
| +	if (likely(data_sent))
 | |
| +		timers_data_sent(peer);
 | |
| +
 | |
| +	keep_key_fresh(peer);
 | |
| +
 | |
| +	if (unlikely(peer->need_resend_queue))
 | |
| +		packet_send_queue(peer);
 | |
| +}
 | |
| +
 | |
| +void packet_send_queue(struct wireguard_peer *peer)
 | |
| +{
 | |
| +	struct sk_buff_head queue;
 | |
| +
 | |
| +	peer->need_resend_queue = false;
 | |
| +
 | |
| +	/* Steal the current queue into our local one. */
 | |
| +	skb_queue_head_init(&queue);
 | |
| +	spin_lock_bh(&peer->tx_packet_queue.lock);
 | |
| +	skb_queue_splice_init(&peer->tx_packet_queue, &queue);
 | |
| +	spin_unlock_bh(&peer->tx_packet_queue.lock);
 | |
| +
 | |
| +	if (unlikely(!skb_queue_len(&queue)))
 | |
| +		return;
 | |
| +
 | |
| +	/* We submit it for encryption and sending. */
 | |
| +	switch (packet_create_data(&queue, peer)) {
 | |
| +	case 0:
 | |
| +		break;
 | |
| +	case -EBUSY:
 | |
| +		/* EBUSY happens when the parallel workers are all filled up, in which
 | |
| +		 * case we should requeue everything. */
 | |
| +
 | |
| +		/* First, we mark that we should try to do this later, when existing
 | |
| +		 * jobs are done. */
 | |
| +		peer->need_resend_queue = true;
 | |
| +
 | |
| +		/* We stick the remaining skbs from local_queue at the top of the peer's
 | |
| +		 * queue again, setting the top of local_queue to be the skb that begins
 | |
| +		 * the requeueing. */
 | |
| +		spin_lock_bh(&peer->tx_packet_queue.lock);
 | |
| +		skb_queue_splice(&queue, &peer->tx_packet_queue);
 | |
| +		spin_unlock_bh(&peer->tx_packet_queue.lock);
 | |
| +		break;
 | |
| +	case -ENOKEY:
 | |
| +		/* ENOKEY means that we don't have a valid session for the peer, which
 | |
| +		 * means we should initiate a session, but after requeuing like above. */
 | |
| +
 | |
| +		spin_lock_bh(&peer->tx_packet_queue.lock);
 | |
| +		skb_queue_splice(&queue, &peer->tx_packet_queue);
 | |
| +		spin_unlock_bh(&peer->tx_packet_queue.lock);
 | |
| +
 | |
| +		packet_queue_handshake_initiation(peer, false);
 | |
| +		break;
 | |
| +	default:
 | |
| +		/* If we failed for any other reason, we want to just free the packets and
 | |
| +		 * forget about them. We do this unlocked, since we're the only ones with
 | |
| +		 * a reference to the local queue. */
 | |
| +		__skb_queue_purge(&queue);
 | |
| +	}
 | |
| +}
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/socket.c	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,387 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#include "device.h"
 | |
| +#include "peer.h"
 | |
| +#include "socket.h"
 | |
| +#include "packets.h"
 | |
| +#include "messages.h"
 | |
| +
 | |
| +#include <linux/ctype.h>
 | |
| +#include <linux/net.h>
 | |
| +#include <linux/if_vlan.h>
 | |
| +#include <linux/if_ether.h>
 | |
| +#include <linux/inetdevice.h>
 | |
| +#include <net/udp_tunnel.h>
 | |
| +#include <net/ipv6.h>
 | |
| +
 | |
| +static inline int send4(struct wireguard_device *wg, struct sk_buff *skb, struct endpoint *endpoint, u8 ds, struct dst_cache *cache)
 | |
| +{
 | |
| +	struct flowi4 fl = {
 | |
| +		.saddr = endpoint->src4.s_addr,
 | |
| +		.daddr = endpoint->addr4.sin_addr.s_addr,
 | |
| +		.fl4_dport = endpoint->addr4.sin_port,
 | |
| +		.fl4_sport = htons(wg->incoming_port),
 | |
| +		.flowi4_mark = wg->fwmark,
 | |
| +		.flowi4_proto = IPPROTO_UDP
 | |
| +	};
 | |
| +	struct rtable *rt = NULL;
 | |
| +	struct sock *sock;
 | |
| +	int ret = 0;
 | |
| +
 | |
| +	skb->next = skb->prev = NULL;
 | |
| +	skb->dev = netdev_pub(wg);
 | |
| +
 | |
| +	rcu_read_lock_bh();
 | |
| +	sock = rcu_dereference_bh(wg->sock4);
 | |
| +
 | |
| +	if (unlikely(!sock)) {
 | |
| +		ret = -ENONET;
 | |
| +		goto err;
 | |
| +	}
 | |
| +
 | |
| +	if (cache)
 | |
| +		rt = dst_cache_get_ip4(cache, &fl.saddr);
 | |
| +
 | |
| +	if (!rt) {
 | |
| +		security_sk_classify_flow(sock, flowi4_to_flowi(&fl));
 | |
| +		rt = ip_route_output_flow(sock_net(sock), &fl, sock);
 | |
| +		if (unlikely(endpoint->src4.s_addr && ((IS_ERR(rt) && PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) && !inet_confirm_addr(sock_net(sock), rcu_dereference_bh(rt->dst.dev->ip_ptr), 0, fl.saddr, RT_SCOPE_HOST))))) {
 | |
| +			endpoint->src4.s_addr = fl.saddr = 0;
 | |
| +			if (cache)
 | |
| +				dst_cache_reset(cache);
 | |
| +			if (!IS_ERR(rt))
 | |
| +				ip_rt_put(rt);
 | |
| +			rt = ip_route_output_flow(sock_net(sock), &fl, sock);
 | |
| +		}
 | |
| +		if (unlikely(IS_ERR(rt))) {
 | |
| +			ret = PTR_ERR(rt);
 | |
| +			net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", netdev_pub(wg)->name, &endpoint->addr, ret);
 | |
| +			goto err;
 | |
| +		} else if (unlikely(rt->dst.dev == skb->dev)) {
 | |
| +			ip_rt_put(rt);
 | |
| +			ret = -ELOOP;
 | |
| +			net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n", netdev_pub(wg)->name, &endpoint->addr);
 | |
| +			goto err;
 | |
| +		}
 | |
| +		if (cache)
 | |
| +			dst_cache_set_ip4(cache, &rt->dst, fl.saddr);
 | |
| +	}
 | |
| +	udp_tunnel_xmit_skb(rt, sock, skb,
 | |
| +			    fl.saddr, fl.daddr,
 | |
| +			    ds, ip4_dst_hoplimit(&rt->dst), 0,
 | |
| +			    fl.fl4_sport, fl.fl4_dport,
 | |
| +			    false, false);
 | |
| +	goto out;
 | |
| +
 | |
| +err:
 | |
| +	kfree_skb(skb);
 | |
| +out:
 | |
| +	rcu_read_unlock_bh();
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +static inline int send6(struct wireguard_device *wg, struct sk_buff *skb, struct endpoint *endpoint, u8 ds, struct dst_cache *cache)
 | |
| +{
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +	struct flowi6 fl = {
 | |
| +		.saddr = endpoint->src6,
 | |
| +		.daddr = endpoint->addr6.sin6_addr,
 | |
| +		.fl6_dport = endpoint->addr6.sin6_port,
 | |
| +		.fl6_sport = htons(wg->incoming_port),
 | |
| +		.flowi6_mark = wg->fwmark,
 | |
| +		.flowi6_oif = endpoint->addr6.sin6_scope_id,
 | |
| +		.flowi6_proto = IPPROTO_UDP
 | |
| +		/* TODO: addr->sin6_flowinfo */
 | |
| +	};
 | |
| +	struct dst_entry *dst = NULL;
 | |
| +	struct sock *sock;
 | |
| +	int ret = 0;
 | |
| +
 | |
| +	skb->next = skb->prev = NULL;
 | |
| +	skb->dev = netdev_pub(wg);
 | |
| +
 | |
| +	rcu_read_lock_bh();
 | |
| +	sock = rcu_dereference_bh(wg->sock6);
 | |
| +
 | |
| +	if (unlikely(!sock)) {
 | |
| +		ret = -ENONET;
 | |
| +		goto err;
 | |
| +	}
 | |
| +
 | |
| +	if (cache)
 | |
| +		dst = dst_cache_get_ip6(cache, &fl.saddr);
 | |
| +
 | |
| +	if (!dst) {
 | |
| +		security_sk_classify_flow(sock, flowi6_to_flowi(&fl));
 | |
| +		if (unlikely(!ipv6_addr_any(&fl.saddr) && !ipv6_chk_addr(sock_net(sock), &fl.saddr, NULL, 0))) {
 | |
| +			endpoint->src6 = fl.saddr = in6addr_any;
 | |
| +			if (cache)
 | |
| +				dst_cache_reset(cache);
 | |
| +		}
 | |
| +		ret = ipv6_stub->ipv6_dst_lookup(sock_net(sock), sock, &dst, &fl);
 | |
| +		if (unlikely(ret)) {
 | |
| +			net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n", netdev_pub(wg)->name, &endpoint->addr, ret);
 | |
| +			goto err;
 | |
| +		} else if (unlikely(dst->dev == skb->dev)) {
 | |
| +			dst_release(dst);
 | |
| +			ret = -ELOOP;
 | |
| +			net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n", netdev_pub(wg)->name, &endpoint->addr);
 | |
| +			goto err;
 | |
| +		}
 | |
| +		if (cache)
 | |
| +			dst_cache_set_ip6(cache, dst, &fl.saddr);
 | |
| +	}
 | |
| +
 | |
| +	udp_tunnel6_xmit_skb(dst, sock, skb, skb->dev,
 | |
| +			     &fl.saddr, &fl.daddr,
 | |
| +			     ds, ip6_dst_hoplimit(dst), 0,
 | |
| +			     fl.fl6_sport, fl.fl6_dport,
 | |
| +			     false);
 | |
| +	goto out;
 | |
| +
 | |
| +err:
 | |
| +	kfree_skb(skb);
 | |
| +out:
 | |
| +	rcu_read_unlock_bh();
 | |
| +	return ret;
 | |
| +#else
 | |
| +	return -EAFNOSUPPORT;
 | |
| +#endif
 | |
| +}
 | |
| +
 | |
| +int socket_send_skb_to_peer(struct wireguard_peer *peer, struct sk_buff *skb, u8 ds)
 | |
| +{
 | |
| +	size_t skb_len = skb->len;
 | |
| +	int ret = -EAFNOSUPPORT;
 | |
| +
 | |
| +	read_lock_bh(&peer->endpoint_lock);
 | |
| +	if (peer->endpoint.addr.sa_family == AF_INET)
 | |
| +		ret = send4(peer->device, skb, &peer->endpoint, ds, &peer->endpoint_cache);
 | |
| +	else if (peer->endpoint.addr.sa_family == AF_INET6)
 | |
| +		ret = send6(peer->device, skb, &peer->endpoint, ds, &peer->endpoint_cache);
 | |
| +	if (likely(!ret))
 | |
| +		peer->tx_bytes += skb_len;
 | |
| +	read_unlock_bh(&peer->endpoint_lock);
 | |
| +
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +int socket_send_buffer_to_peer(struct wireguard_peer *peer, void *buffer, size_t len, u8 ds)
 | |
| +{
 | |
| +	struct sk_buff *skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC);
 | |
| +	if (unlikely(!skb))
 | |
| +		return -ENOMEM;
 | |
| +	skb_reserve(skb, SKB_HEADER_LEN);
 | |
| +	memcpy(skb_put(skb, len), buffer, len);
 | |
| +	return socket_send_skb_to_peer(peer, skb, ds);
 | |
| +}
 | |
| +
 | |
| +int socket_send_buffer_as_reply_to_skb(struct wireguard_device *wg, struct sk_buff *in_skb, void *out_buffer, size_t len)
 | |
| +{
 | |
| +	int ret = 0;
 | |
| +	struct sk_buff *skb;
 | |
| +	struct endpoint endpoint;
 | |
| +
 | |
| +	if (unlikely(!in_skb))
 | |
| +		return -EINVAL;
 | |
| +	ret = socket_endpoint_from_skb(&endpoint, in_skb);
 | |
| +	if (unlikely(ret < 0))
 | |
| +		return ret;
 | |
| +
 | |
| +	skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC);
 | |
| +	if (unlikely(!skb))
 | |
| +		return -ENOMEM;
 | |
| +	skb_reserve(skb, SKB_HEADER_LEN);
 | |
| +	memcpy(skb_put(skb, len), out_buffer, len);
 | |
| +
 | |
| +	if (endpoint.addr.sa_family == AF_INET)
 | |
| +		ret = send4(wg, skb, &endpoint, 0, NULL);
 | |
| +	else if (endpoint.addr.sa_family == AF_INET6)
 | |
| +		ret = send6(wg, skb, &endpoint, 0, NULL);
 | |
| +	else
 | |
| +		ret = -EAFNOSUPPORT;
 | |
| +
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +int socket_endpoint_from_skb(struct endpoint *endpoint, struct sk_buff *skb)
 | |
| +{
 | |
| +	memset(endpoint, 0, sizeof(struct endpoint));
 | |
| +	if (skb->protocol == htons(ETH_P_IP)) {
 | |
| +		endpoint->addr4.sin_family = AF_INET;
 | |
| +		endpoint->addr4.sin_port = udp_hdr(skb)->source;
 | |
| +		endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr;
 | |
| +		endpoint->src4.s_addr = ip_hdr(skb)->daddr;
 | |
| +	} else if (skb->protocol == htons(ETH_P_IPV6)) {
 | |
| +		endpoint->addr6.sin6_family = AF_INET6;
 | |
| +		endpoint->addr6.sin6_port = udp_hdr(skb)->source;
 | |
| +		endpoint->addr6.sin6_addr = ipv6_hdr(skb)->saddr;
 | |
| +		endpoint->addr6.sin6_scope_id = ipv6_iface_scope_id(&ipv6_hdr(skb)->saddr, skb->skb_iif);
 | |
| +		/* TODO: endpoint->addr6.sin6_flowinfo */
 | |
| +		endpoint->src6 = ipv6_hdr(skb)->daddr;
 | |
| +	} else
 | |
| +		return -EINVAL;
 | |
| +	return 0;
 | |
| +}
 | |
| +
 | |
| +void socket_set_peer_endpoint(struct wireguard_peer *peer, struct endpoint *endpoint)
 | |
| +{
 | |
| +	if (endpoint->addr.sa_family == AF_INET) {
 | |
| +		read_lock_bh(&peer->endpoint_lock);
 | |
| +		if (likely(peer->endpoint.addr4.sin_family == AF_INET &&
 | |
| +			   peer->endpoint.addr4.sin_port == endpoint->addr4.sin_port &&
 | |
| +			   peer->endpoint.addr4.sin_addr.s_addr == endpoint->addr4.sin_addr.s_addr &&
 | |
| +			   peer->endpoint.src4.s_addr == endpoint->src4.s_addr))
 | |
| +			goto out;
 | |
| +		read_unlock_bh(&peer->endpoint_lock);
 | |
| +		write_lock_bh(&peer->endpoint_lock);
 | |
| +		peer->endpoint.addr4 = endpoint->addr4;
 | |
| +		peer->endpoint.src4 = endpoint->src4;
 | |
| +	} else if (endpoint->addr.sa_family == AF_INET6) {
 | |
| +		read_lock_bh(&peer->endpoint_lock);
 | |
| +		if (likely(peer->endpoint.addr6.sin6_family == AF_INET6 &&
 | |
| +			   peer->endpoint.addr6.sin6_port == endpoint->addr6.sin6_port &&
 | |
| +			   /* TODO: peer->endpoint.addr6.sin6_flowinfo == endpoint->addr6.sin6_flowinfo && */
 | |
| +			   ipv6_addr_equal(&peer->endpoint.addr6.sin6_addr, &endpoint->addr6.sin6_addr) &&
 | |
| +			   peer->endpoint.addr6.sin6_scope_id == endpoint->addr6.sin6_scope_id &&
 | |
| +			   ipv6_addr_equal(&peer->endpoint.src6, &endpoint->src6)))
 | |
| +			goto out;
 | |
| +		read_unlock_bh(&peer->endpoint_lock);
 | |
| +		write_lock_bh(&peer->endpoint_lock);
 | |
| +		peer->endpoint.addr6 = endpoint->addr6;
 | |
| +		peer->endpoint.src6 = endpoint->src6;
 | |
| +	} else
 | |
| +		return;
 | |
| +	dst_cache_reset(&peer->endpoint_cache);
 | |
| +	write_unlock_bh(&peer->endpoint_lock);
 | |
| +	return;
 | |
| +out:
 | |
| +	read_unlock_bh(&peer->endpoint_lock);
 | |
| +}
 | |
| +
 | |
| +void socket_clear_peer_endpoint_src(struct wireguard_peer *peer)
 | |
| +{
 | |
| +	write_lock_bh(&peer->endpoint_lock);
 | |
| +	memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6));
 | |
| +	dst_cache_reset(&peer->endpoint_cache);
 | |
| +	write_unlock_bh(&peer->endpoint_lock);
 | |
| +}
 | |
| +
 | |
| +static int receive(struct sock *sk, struct sk_buff *skb)
 | |
| +{
 | |
| +	struct wireguard_device *wg;
 | |
| +
 | |
| +	if (unlikely(!sk))
 | |
| +		goto err;
 | |
| +	wg = sk->sk_user_data;
 | |
| +	if (unlikely(!wg))
 | |
| +		goto err;
 | |
| +	packet_receive(wg, skb);
 | |
| +	return 0;
 | |
| +
 | |
| +err:
 | |
| +	kfree_skb(skb);
 | |
| +	return 0;
 | |
| +}
 | |
| +
 | |
| +static inline void sock_free(struct sock *sock)
 | |
| +{
 | |
| +	if (unlikely(!sock))
 | |
| +		return;
 | |
| +	sk_clear_memalloc(sock);
 | |
| +	udp_tunnel_sock_release(sock->sk_socket);
 | |
| +}
 | |
| +
 | |
| +static inline void set_sock_opts(struct socket *sock)
 | |
| +{
 | |
| +	sock->sk->sk_allocation = GFP_ATOMIC;
 | |
| +	sock->sk->sk_sndbuf = INT_MAX;
 | |
| +	sk_set_memalloc(sock->sk);
 | |
| +}
 | |
| +
 | |
| +int socket_init(struct wireguard_device *wg)
 | |
| +{
 | |
| +	int ret = 0;
 | |
| +	struct udp_tunnel_sock_cfg cfg = {
 | |
| +		.sk_user_data = wg,
 | |
| +		.encap_type = 1,
 | |
| +		.encap_rcv = receive
 | |
| +	};
 | |
| +	struct socket *new4 = NULL;
 | |
| +	struct udp_port_cfg port4 = {
 | |
| +		.family = AF_INET,
 | |
| +		.local_ip.s_addr = htonl(INADDR_ANY),
 | |
| +		.local_udp_port = htons(wg->incoming_port),
 | |
| +		.use_udp_checksums = true
 | |
| +	};
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +	int retries = 0;
 | |
| +	struct socket *new6 = NULL;
 | |
| +	struct udp_port_cfg port6 = {
 | |
| +		.family = AF_INET6,
 | |
| +		.local_ip6 = IN6ADDR_ANY_INIT,
 | |
| +		.local_udp_port = htons(wg->incoming_port),
 | |
| +		.use_udp6_tx_checksums = true,
 | |
| +		.use_udp6_rx_checksums = true,
 | |
| +		.ipv6_v6only = true
 | |
| +	};
 | |
| +#endif
 | |
| +	mutex_lock(&wg->socket_update_lock);
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +retry:
 | |
| +#endif
 | |
| +	if (rcu_dereference_protected(wg->sock4, lockdep_is_held(&wg->socket_update_lock)) ||
 | |
| +	    rcu_dereference_protected(wg->sock6, lockdep_is_held(&wg->socket_update_lock))) {
 | |
| +		ret = -EADDRINUSE;
 | |
| +		goto out;
 | |
| +	}
 | |
| +
 | |
| +	ret = udp_sock_create(wg->creating_net, &port4, &new4);
 | |
| +	if (ret < 0) {
 | |
| +		pr_err("%s: Could not create IPv4 socket\n", netdev_pub(wg)->name);
 | |
| +		goto out;
 | |
| +	}
 | |
| +	wg->incoming_port = ntohs(inet_sk(new4->sk)->inet_sport);
 | |
| +	set_sock_opts(new4);
 | |
| +	setup_udp_tunnel_sock(wg->creating_net, new4, &cfg);
 | |
| +	rcu_assign_pointer(wg->sock4, new4->sk);
 | |
| +
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +	if (!ipv6_mod_enabled())
 | |
| +		goto out;
 | |
| +	port6.local_udp_port = htons(wg->incoming_port);
 | |
| +	ret = udp_sock_create(wg->creating_net, &port6, &new6);
 | |
| +	if (ret < 0) {
 | |
| +		udp_tunnel_sock_release(new4);
 | |
| +		rcu_assign_pointer(wg->sock4, NULL);
 | |
| +		if (ret == -EADDRINUSE && !port4.local_udp_port && retries++ < 100)
 | |
| +			goto retry;
 | |
| +		if (!port4.local_udp_port)
 | |
| +			wg->incoming_port = 0;
 | |
| +		pr_err("%s: Could not create IPv6 socket\n", netdev_pub(wg)->name);
 | |
| +		goto out;
 | |
| +	}
 | |
| +	set_sock_opts(new6);
 | |
| +	setup_udp_tunnel_sock(wg->creating_net, new6, &cfg);
 | |
| +	rcu_assign_pointer(wg->sock6, new6->sk);
 | |
| +#endif
 | |
| +
 | |
| +out:
 | |
| +	mutex_unlock(&wg->socket_update_lock);
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +void socket_uninit(struct wireguard_device *wg)
 | |
| +{
 | |
| +	struct sock *old4, *old6;
 | |
| +	mutex_lock(&wg->socket_update_lock);
 | |
| +	old4 = rcu_dereference_protected(wg->sock4, lockdep_is_held(&wg->socket_update_lock));
 | |
| +	old6 = rcu_dereference_protected(wg->sock6, lockdep_is_held(&wg->socket_update_lock));
 | |
| +	rcu_assign_pointer(wg->sock4, NULL);
 | |
| +	rcu_assign_pointer(wg->sock6, NULL);
 | |
| +	mutex_unlock(&wg->socket_update_lock);
 | |
| +	synchronize_rcu_bh();
 | |
| +	synchronize_net();
 | |
| +	sock_free(old4);
 | |
| +	sock_free(old6);
 | |
| +}
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/timers.c	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,179 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#include "timers.h"
 | |
| +#include "device.h"
 | |
| +#include "peer.h"
 | |
| +#include "packets.h"
 | |
| +
 | |
| +/*
 | |
| + * Timer for retransmitting the handshake if we don't hear back after `REKEY_TIMEOUT + jitter` ms
 | |
| + * Timer for sending empty packet if we have received a packet but after have not sent one for `KEEPALIVE_TIMEOUT` ms
 | |
| + * Timer for initiating new handshake if we have sent a packet but after have not received one (even empty) for `(KEEPALIVE_TIMEOUT + REKEY_TIMEOUT)` ms
 | |
| + * Timer for zeroing out all ephemeral keys after `(REJECT_AFTER_TIME * 3)` ms if no new keys have been received
 | |
| + * Timer for, if enabled, sending an empty authenticated packet every user-specified seconds
 | |
| + */
 | |
| +
 | |
| +/* This rounds the time down to the closest power of two of the closest quarter second. */
 | |
| +static inline unsigned long slack_time(unsigned long time)
 | |
| +{
 | |
| +	return time & ~(roundup_pow_of_two(HZ / 4) - 1);
 | |
| +}
 | |
| +
 | |
| +#define peer_get_from_ptr(ptr) \
 | |
| +	struct wireguard_peer *peer = peer_rcu_get((struct wireguard_peer *)ptr); \
 | |
| +	if (unlikely(!peer)) \
 | |
| +		return;
 | |
| +
 | |
| +static void expired_retransmit_handshake(unsigned long ptr)
 | |
| +{
 | |
| +	peer_get_from_ptr(ptr);
 | |
| +	if (peer->timer_handshake_attempts > MAX_TIMER_HANDSHAKES) {
 | |
| +		pr_debug("%s: Handshake for peer %Lu (%pISpfsc) did not complete after %d attempts, giving up\n", netdev_pub(peer->device)->name, peer->internal_id, &peer->endpoint.addr, MAX_TIMER_HANDSHAKES + 2);
 | |
| +
 | |
| +		del_timer(&peer->timer_send_keepalive);
 | |
| +		/* We remove all existing packets and don't try again,
 | |
| +		 * if we try unsuccessfully for too long to make a handshake. */
 | |
| +		skb_queue_purge(&peer->tx_packet_queue);
 | |
| +		/* We set a timer for destroying any residue that might be left
 | |
| +		 * of a partial exchange. */
 | |
| +		if (likely(peer->timers_enabled) && !timer_pending(&peer->timer_kill_ephemerals))
 | |
| +			mod_timer(&peer->timer_kill_ephemerals, jiffies + (REJECT_AFTER_TIME * 3));
 | |
| +	} else {
 | |
| +		++peer->timer_handshake_attempts;
 | |
| +		pr_debug("%s: Handshake for peer %Lu (%pISpfsc) did not complete after %d seconds, retrying (try %d)\n", netdev_pub(peer->device)->name, peer->internal_id, &peer->endpoint.addr, REKEY_TIMEOUT / HZ, peer->timer_handshake_attempts + 1);
 | |
| +
 | |
| +		/* We clear the endpoint address src address, in case this is the cause of trouble. */
 | |
| +		socket_clear_peer_endpoint_src(peer);
 | |
| +
 | |
| +		packet_queue_handshake_initiation(peer, true);
 | |
| +	}
 | |
| +	peer_put(peer);
 | |
| +}
 | |
| +
 | |
| +static void expired_send_keepalive(unsigned long ptr)
 | |
| +{
 | |
| +	peer_get_from_ptr(ptr);
 | |
| +	packet_send_keepalive(peer);
 | |
| +	if (peer->timer_need_another_keepalive) {
 | |
| +		peer->timer_need_another_keepalive = false;
 | |
| +		if (peer->timers_enabled)
 | |
| +			mod_timer(&peer->timer_send_keepalive, jiffies + KEEPALIVE_TIMEOUT);
 | |
| +	}
 | |
| +	peer_put(peer);
 | |
| +}
 | |
| +
 | |
| +static void expired_new_handshake(unsigned long ptr)
 | |
| +{
 | |
| +	peer_get_from_ptr(ptr);
 | |
| +	pr_debug("%s: Retrying handshake with peer %Lu (%pISpfsc) because we stopped hearing back after %d seconds\n", netdev_pub(peer->device)->name, peer->internal_id, &peer->endpoint.addr, (KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) / HZ);
 | |
| +	/* We clear the endpoint address src address, in case this is the cause of trouble. */
 | |
| +	socket_clear_peer_endpoint_src(peer);
 | |
| +	packet_queue_handshake_initiation(peer, false);
 | |
| +	peer_put(peer);
 | |
| +}
 | |
| +
 | |
| +static void expired_kill_ephemerals(unsigned long ptr)
 | |
| +{
 | |
| +	peer_get_from_ptr(ptr);
 | |
| +	if (!queue_work(peer->device->peer_wq, &peer->clear_peer_work)) /* Takes our reference. */
 | |
| +		peer_put(peer); /* If the work was already on the queue, we want to drop the extra reference */
 | |
| +}
 | |
| +static void queued_expired_kill_ephemerals(struct work_struct *work)
 | |
| +{
 | |
| +	struct wireguard_peer *peer = container_of(work, struct wireguard_peer, clear_peer_work);
 | |
| +	pr_debug("%s: Zeroing out all keys for peer %Lu (%pISpfsc), since we haven't received a new one in %d seconds\n", netdev_pub(peer->device)->name, peer->internal_id, &peer->endpoint.addr, (REJECT_AFTER_TIME * 3) / HZ);
 | |
| +	noise_handshake_clear(&peer->handshake);
 | |
| +	noise_keypairs_clear(&peer->keypairs);
 | |
| +	peer_put(peer);
 | |
| +}
 | |
| +
 | |
| +static void expired_send_persistent_keepalive(unsigned long ptr)
 | |
| +{
 | |
| +	peer_get_from_ptr(ptr);
 | |
| +	if (likely(peer->persistent_keepalive_interval))
 | |
| +		packet_send_keepalive(peer);
 | |
| +	peer_put(peer);
 | |
| +}
 | |
| +
 | |
| +/* Should be called after an authenticated data packet is sent. */
 | |
| +void timers_data_sent(struct wireguard_peer *peer)
 | |
| +{
 | |
| +	if (likely(peer->timers_enabled))
 | |
| +		del_timer(&peer->timer_send_keepalive);
 | |
| +
 | |
| +	if (likely(peer->timers_enabled) && !timer_pending(&peer->timer_new_handshake))
 | |
| +		mod_timer(&peer->timer_new_handshake, jiffies + KEEPALIVE_TIMEOUT + REKEY_TIMEOUT);
 | |
| +}
 | |
| +
 | |
| +/* Should be called after an authenticated data packet is received. */
 | |
| +void timers_data_received(struct wireguard_peer *peer)
 | |
| +{
 | |
| +	if (likely(peer->timers_enabled) && !timer_pending(&peer->timer_send_keepalive))
 | |
| +		mod_timer(&peer->timer_send_keepalive, jiffies + KEEPALIVE_TIMEOUT);
 | |
| +	else
 | |
| +		peer->timer_need_another_keepalive = true;
 | |
| +}
 | |
| +
 | |
| +/* Should be called after any type of authenticated packet is received -- keepalive or data. */
 | |
| +void timers_any_authenticated_packet_received(struct wireguard_peer *peer)
 | |
| +{
 | |
| +	if (likely(peer->timers_enabled))
 | |
| +		del_timer(&peer->timer_new_handshake);
 | |
| +}
 | |
| +
 | |
| +/* Should be called after a handshake initiation message is sent. */
 | |
| +void timers_handshake_initiated(struct wireguard_peer *peer)
 | |
| +{
 | |
| +	if (likely(peer->timers_enabled)) {
 | |
| +		del_timer(&peer->timer_send_keepalive);
 | |
| +		mod_timer(&peer->timer_retransmit_handshake, slack_time(jiffies + REKEY_TIMEOUT + prandom_u32_max(REKEY_TIMEOUT_JITTER_MAX)));
 | |
| +	}
 | |
| +}
 | |
| +
 | |
| +/* Should be called after a handshake response message is received and processed or when getting key confirmation via the first data message. */
 | |
| +void timers_handshake_complete(struct wireguard_peer *peer)
 | |
| +{
 | |
| +	if (likely(peer->timers_enabled))
 | |
| +		del_timer(&peer->timer_retransmit_handshake);
 | |
| +	peer->timer_handshake_attempts = 0;
 | |
| +	do_gettimeofday(&peer->walltime_last_handshake);
 | |
| +}
 | |
| +
 | |
| +/* Should be called after an ephemeral key is created, which is before sending a handshake response or after receiving a handshake response. */
 | |
| +void timers_ephemeral_key_created(struct wireguard_peer *peer)
 | |
| +{
 | |
| +	if (likely(peer->timers_enabled))
 | |
| +		mod_timer(&peer->timer_kill_ephemerals, jiffies + (REJECT_AFTER_TIME * 3));
 | |
| +}
 | |
| +
 | |
| +/* Should be called before a packet with authentication -- data, keepalive, either handshake -- is sent, or after one is received. */
 | |
| +void timers_any_authenticated_packet_traversal(struct wireguard_peer *peer)
 | |
| +{
 | |
| +	if (peer->persistent_keepalive_interval && likely(peer->timers_enabled))
 | |
| +		mod_timer(&peer->timer_persistent_keepalive, slack_time(jiffies + peer->persistent_keepalive_interval));
 | |
| +}
 | |
| +
 | |
| +void timers_init_peer(struct wireguard_peer *peer)
 | |
| +{
 | |
| +	peer->timers_enabled = true;
 | |
| +	setup_timer(&peer->timer_retransmit_handshake, expired_retransmit_handshake, (unsigned long)peer);
 | |
| +	setup_timer(&peer->timer_send_keepalive, expired_send_keepalive, (unsigned long)peer);
 | |
| +	setup_timer(&peer->timer_new_handshake, expired_new_handshake, (unsigned long)peer);
 | |
| +	setup_timer(&peer->timer_kill_ephemerals, expired_kill_ephemerals, (unsigned long)peer);
 | |
| +	setup_timer(&peer->timer_persistent_keepalive, expired_send_persistent_keepalive, (unsigned long)peer);
 | |
| +	INIT_WORK(&peer->clear_peer_work, queued_expired_kill_ephemerals);
 | |
| +}
 | |
| +
 | |
| +void timers_uninit_peer(struct wireguard_peer *peer)
 | |
| +{
 | |
| +	if (!peer->timers_enabled)
 | |
| +		return;
 | |
| +	peer->timers_enabled = false;
 | |
| +	wmb();
 | |
| +	del_timer_sync(&peer->timer_retransmit_handshake);
 | |
| +	del_timer_sync(&peer->timer_send_keepalive);
 | |
| +	del_timer_sync(&peer->timer_new_handshake);
 | |
| +	del_timer_sync(&peer->timer_kill_ephemerals);
 | |
| +	del_timer_sync(&peer->timer_persistent_keepalive);
 | |
| +	flush_work(&peer->clear_peer_work);
 | |
| +}
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/config.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,11 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#ifndef WGCONFIG_H
 | |
| +#define WGCONFIG_H
 | |
| +
 | |
| +struct wireguard_device;
 | |
| +
 | |
| +int config_get_device(struct wireguard_device *wg, void __user *udevice);
 | |
| +int config_set_device(struct wireguard_device *wg, void __user *udevice);
 | |
| +
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/cookie.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,51 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#ifndef WGCOOKIE_H
 | |
| +#define WGCOOKIE_H
 | |
| +
 | |
| +#include "messages.h"
 | |
| +#include <linux/rwsem.h>
 | |
| +
 | |
| +struct wireguard_peer;
 | |
| +struct wireguard_device;
 | |
| +struct sk_buff;
 | |
| +
 | |
| +struct cookie_checker {
 | |
| +	u8 secret[NOISE_HASH_LEN];
 | |
| +	u8 cookie_encryption_key[NOISE_SYMMETRIC_KEY_LEN];
 | |
| +	u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN];
 | |
| +	u64 secret_birthdate;
 | |
| +	struct rw_semaphore secret_lock;
 | |
| +	struct wireguard_device *device;
 | |
| +};
 | |
| +
 | |
| +struct cookie {
 | |
| +	u64 birthdate;
 | |
| +	bool is_valid;
 | |
| +	u8 cookie[COOKIE_LEN];
 | |
| +	bool have_sent_mac1;
 | |
| +	u8 last_mac1_sent[COOKIE_LEN];
 | |
| +	u8 cookie_decryption_key[NOISE_SYMMETRIC_KEY_LEN];
 | |
| +	u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN];
 | |
| +	struct rw_semaphore lock;
 | |
| +};
 | |
| +
 | |
| +enum cookie_mac_state {
 | |
| +	INVALID_MAC,
 | |
| +	VALID_MAC_BUT_NO_COOKIE,
 | |
| +	VALID_MAC_WITH_COOKIE_BUT_RATELIMITED,
 | |
| +	VALID_MAC_WITH_COOKIE
 | |
| +};
 | |
| +
 | |
| +void cookie_checker_init(struct cookie_checker *checker, struct wireguard_device *wg);
 | |
| +void cookie_checker_precompute_device_keys(struct cookie_checker *checker);
 | |
| +void cookie_checker_precompute_peer_keys(struct wireguard_peer *peer);
 | |
| +void cookie_init(struct cookie *cookie);
 | |
| +
 | |
| +enum cookie_mac_state cookie_validate_packet(struct cookie_checker *checker, struct sk_buff *skb, bool check_cookie);
 | |
| +void cookie_add_mac_to_packet(void *message, size_t len, struct wireguard_peer *peer);
 | |
| +
 | |
| +void cookie_message_create(struct message_handshake_cookie *src, struct sk_buff *skb, __le32 index, struct cookie_checker *checker);
 | |
| +void cookie_message_consume(struct message_handshake_cookie *src, struct wireguard_device *wg);
 | |
| +
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/device.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,51 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#ifndef WGDEVICE_H
 | |
| +#define WGDEVICE_H
 | |
| +
 | |
| +#include "noise.h"
 | |
| +#include "routingtable.h"
 | |
| +#include "hashtables.h"
 | |
| +#include "cookie.h"
 | |
| +
 | |
| +#include <linux/types.h>
 | |
| +#include <linux/netdevice.h>
 | |
| +#include <linux/workqueue.h>
 | |
| +#include <linux/mutex.h>
 | |
| +#include <linux/net.h>
 | |
| +#include <linux/padata.h>
 | |
| +
 | |
| +struct wireguard_device;
 | |
| +struct handshake_worker {
 | |
| +	struct wireguard_device *wg;
 | |
| +	struct work_struct work;
 | |
| +};
 | |
| +
 | |
| +struct wireguard_device {
 | |
| +	struct list_head device_list;
 | |
| +	struct sock __rcu *sock4, *sock6;
 | |
| +	u16 incoming_port;
 | |
| +	u32 fwmark;
 | |
| +	struct net *creating_net;
 | |
| +	struct noise_static_identity static_identity;
 | |
| +	struct workqueue_struct *incoming_handshake_wq, *peer_wq;
 | |
| +	struct sk_buff_head incoming_handshakes;
 | |
| +	atomic_t incoming_handshake_seqnr;
 | |
| +	struct handshake_worker __percpu *incoming_handshakes_worker;
 | |
| +	struct cookie_checker cookie_checker;
 | |
| +	struct pubkey_hashtable peer_hashtable;
 | |
| +	struct index_hashtable index_hashtable;
 | |
| +	struct routing_table peer_routing_table;
 | |
| +	struct list_head peer_list;
 | |
| +	struct mutex device_update_lock;
 | |
| +	struct mutex socket_update_lock;
 | |
| +#ifdef CONFIG_WIREGUARD_PARALLEL
 | |
| +	struct workqueue_struct *crypt_wq;
 | |
| +	struct padata_instance *encrypt_pd, *decrypt_pd;
 | |
| +#endif
 | |
| +};
 | |
| +
 | |
| +int device_init(void);
 | |
| +void device_uninit(void);
 | |
| +
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/hashtables.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,47 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#ifndef HASHTABLES_H
 | |
| +#define HASHTABLES_H
 | |
| +
 | |
| +#include "messages.h"
 | |
| +
 | |
| +#include <linux/hashtable.h>
 | |
| +#include <linux/mutex.h>
 | |
| +#include <linux/siphash.h>
 | |
| +
 | |
| +struct wireguard_peer;
 | |
| +
 | |
| +struct pubkey_hashtable {
 | |
| +	DECLARE_HASHTABLE(hashtable, 8);
 | |
| +	siphash_key_t key;
 | |
| +	struct mutex lock;
 | |
| +};
 | |
| +
 | |
| +void pubkey_hashtable_init(struct pubkey_hashtable *table);
 | |
| +void pubkey_hashtable_add(struct pubkey_hashtable *table, struct wireguard_peer *peer);
 | |
| +void pubkey_hashtable_remove(struct pubkey_hashtable *table, struct wireguard_peer *peer);
 | |
| +struct wireguard_peer *pubkey_hashtable_lookup(struct pubkey_hashtable *table, const u8 pubkey[NOISE_PUBLIC_KEY_LEN]);
 | |
| +
 | |
| +struct index_hashtable {
 | |
| +	DECLARE_HASHTABLE(hashtable, 10);
 | |
| +	spinlock_t lock;
 | |
| +};
 | |
| +
 | |
| +enum index_hashtable_type {
 | |
| +	INDEX_HASHTABLE_HANDSHAKE = (1 << 0),
 | |
| +	INDEX_HASHTABLE_KEYPAIR = (1 << 1)
 | |
| +};
 | |
| +
 | |
| +struct index_hashtable_entry {
 | |
| +	struct wireguard_peer *peer;
 | |
| +	struct hlist_node index_hash;
 | |
| +	enum index_hashtable_type type;
 | |
| +	__le32 index;
 | |
| +};
 | |
| +void index_hashtable_init(struct index_hashtable *table);
 | |
| +__le32 index_hashtable_insert(struct index_hashtable *table, struct index_hashtable_entry *entry);
 | |
| +bool index_hashtable_replace(struct index_hashtable *table, struct index_hashtable_entry *old, struct index_hashtable_entry *new);
 | |
| +void index_hashtable_remove(struct index_hashtable *table, struct index_hashtable_entry *entry);
 | |
| +struct index_hashtable_entry *index_hashtable_lookup(struct index_hashtable *table, const enum index_hashtable_type type_mask, const __le32 index);
 | |
| +
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/messages.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,150 @@
 | |
| +/*
 | |
| + * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
 | |
| + *
 | |
| + * See doc/protocol.md for more info
 | |
| + */
 | |
| +
 | |
| +#ifndef MESSAGES_H
 | |
| +#define MESSAGES_H
 | |
| +
 | |
| +#include "crypto/curve25519.h"
 | |
| +#include "crypto/chacha20poly1305.h"
 | |
| +#include "crypto/blake2s.h"
 | |
| +
 | |
| +#include <linux/kernel.h>
 | |
| +#include <linux/param.h>
 | |
| +#include <linux/skbuff.h>
 | |
| +
 | |
| +enum noise_lengths {
 | |
| +	NOISE_PUBLIC_KEY_LEN = CURVE25519_POINT_SIZE,
 | |
| +	NOISE_SYMMETRIC_KEY_LEN = CHACHA20POLY1305_KEYLEN,
 | |
| +	NOISE_TIMESTAMP_LEN = sizeof(u64) + sizeof(u32),
 | |
| +	NOISE_AUTHTAG_LEN = CHACHA20POLY1305_AUTHTAGLEN,
 | |
| +	NOISE_HASH_LEN = BLAKE2S_OUTBYTES
 | |
| +};
 | |
| +
 | |
| +#define noise_encrypted_len(plain_len) (plain_len + NOISE_AUTHTAG_LEN)
 | |
| +
 | |
| +enum cookie_values {
 | |
| +	COOKIE_SECRET_MAX_AGE = 2 * 60 * HZ,
 | |
| +	COOKIE_SECRET_LATENCY = 5 * HZ,
 | |
| +	COOKIE_NONCE_LEN = XCHACHA20POLY1305_NONCELEN,
 | |
| +	COOKIE_LEN = 16
 | |
| +};
 | |
| +
 | |
| +enum counter_values {
 | |
| +	COUNTER_BITS_TOTAL = 2048,
 | |
| +	COUNTER_REDUNDANT_BITS = BITS_PER_LONG,
 | |
| +	COUNTER_WINDOW_SIZE = COUNTER_BITS_TOTAL - COUNTER_REDUNDANT_BITS
 | |
| +};
 | |
| +
 | |
| +enum limits {
 | |
| +	REKEY_AFTER_MESSAGES = U64_MAX - 0xffff,
 | |
| +	REJECT_AFTER_MESSAGES = U64_MAX - COUNTER_WINDOW_SIZE - 1,
 | |
| +	REKEY_TIMEOUT = 5 * HZ,
 | |
| +	REKEY_TIMEOUT_JITTER_MAX = HZ / 3,
 | |
| +	REKEY_AFTER_TIME = 120 * HZ,
 | |
| +	REJECT_AFTER_TIME = 180 * HZ,
 | |
| +	INITIATIONS_PER_SECOND = HZ / 50,
 | |
| +	MAX_PEERS_PER_DEVICE = U16_MAX,
 | |
| +	KEEPALIVE_TIMEOUT = 10 * HZ,
 | |
| +	MAX_TIMER_HANDSHAKES = (90 * HZ) / REKEY_TIMEOUT,
 | |
| +	MAX_QUEUED_INCOMING_HANDSHAKES = 4096,
 | |
| +	MAX_QUEUED_OUTGOING_PACKETS = 1024
 | |
| +};
 | |
| +
 | |
| +enum message_type {
 | |
| +	MESSAGE_INVALID = 0,
 | |
| +	MESSAGE_HANDSHAKE_INITIATION = 1,
 | |
| +	MESSAGE_HANDSHAKE_RESPONSE = 2,
 | |
| +	MESSAGE_HANDSHAKE_COOKIE = 3,
 | |
| +	MESSAGE_DATA = 4,
 | |
| +	MESSAGE_TOTAL = 5
 | |
| +};
 | |
| +
 | |
| +struct message_header {
 | |
| +	/* The actual layout of this that we want is:
 | |
| +	 * u8 type
 | |
| +	 * u8 reserved_zero[3]
 | |
| +	 *
 | |
| +	 * But it turns out that by encoding this as little endian,
 | |
| +	 * we achieve the same thing, and it makes checking faster.
 | |
| +	 */
 | |
| +	__le32 type;
 | |
| +};
 | |
| +
 | |
| +struct message_macs {
 | |
| +	u8 mac1[COOKIE_LEN];
 | |
| +	u8 mac2[COOKIE_LEN];
 | |
| +};
 | |
| +
 | |
| +struct message_handshake_initiation {
 | |
| +	struct message_header header;
 | |
| +	__le32 sender_index;
 | |
| +	u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN];
 | |
| +	u8 encrypted_static[noise_encrypted_len(NOISE_PUBLIC_KEY_LEN)];
 | |
| +	u8 encrypted_timestamp[noise_encrypted_len(NOISE_TIMESTAMP_LEN)];
 | |
| +	struct message_macs macs;
 | |
| +};
 | |
| +
 | |
| +struct message_handshake_response {
 | |
| +	struct message_header header;
 | |
| +	__le32 sender_index;
 | |
| +	__le32 receiver_index;
 | |
| +	u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN];
 | |
| +	u8 encrypted_nothing[noise_encrypted_len(0)];
 | |
| +	struct message_macs macs;
 | |
| +};
 | |
| +
 | |
| +struct message_handshake_cookie {
 | |
| +	struct message_header header;
 | |
| +	__le32 receiver_index;
 | |
| +	u8 nonce[COOKIE_NONCE_LEN];
 | |
| +	u8 encrypted_cookie[noise_encrypted_len(COOKIE_LEN)];
 | |
| +};
 | |
| +
 | |
| +struct message_data {
 | |
| +	struct message_header header;
 | |
| +	__le32 key_idx;
 | |
| +	__le64 counter;
 | |
| +	u8 encrypted_data[];
 | |
| +};
 | |
| +
 | |
| +#define message_data_len(plain_len) (noise_encrypted_len(plain_len) + sizeof(struct message_data))
 | |
| +
 | |
| +enum message_alignments {
 | |
| +	MESSAGE_PADDING_MULTIPLE = 16,
 | |
| +	MESSAGE_MINIMUM_LENGTH = message_data_len(0)
 | |
| +};
 | |
| +
 | |
| +#define SKB_HEADER_LEN (max(sizeof(struct iphdr), sizeof(struct ipv6hdr)) + sizeof(struct udphdr) + NET_SKB_PAD)
 | |
| +#define DATA_PACKET_HEAD_ROOM ALIGN(sizeof(struct message_data) + SKB_HEADER_LEN, 4)
 | |
| +
 | |
| +enum {
 | |
| +	HANDSHAKE_DSCP = 0b10001000 /* AF41, plus 00 ECN */
 | |
| +};
 | |
| +
 | |
| +static const unsigned int message_header_sizes[MESSAGE_TOTAL] = {
 | |
| +	[MESSAGE_HANDSHAKE_INITIATION] = sizeof(struct message_handshake_initiation),
 | |
| +	[MESSAGE_HANDSHAKE_RESPONSE] = sizeof(struct message_handshake_response),
 | |
| +	[MESSAGE_HANDSHAKE_COOKIE] = sizeof(struct message_handshake_cookie),
 | |
| +	[MESSAGE_DATA] = sizeof(struct message_data)
 | |
| +};
 | |
| +
 | |
| +static inline enum message_type message_determine_type(struct sk_buff *skb)
 | |
| +{
 | |
| +	struct message_header *header = (struct message_header *)skb->data;
 | |
| +	if (unlikely(skb->len < sizeof(struct message_header)))
 | |
| +		return MESSAGE_INVALID;
 | |
| +	if (header->type == cpu_to_le32(MESSAGE_DATA) && skb->len >= MESSAGE_MINIMUM_LENGTH)
 | |
| +		return MESSAGE_DATA;
 | |
| +	if (header->type == cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION) && skb->len == sizeof(struct message_handshake_initiation))
 | |
| +		return MESSAGE_HANDSHAKE_INITIATION;
 | |
| +	if (header->type == cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE) && skb->len == sizeof(struct message_handshake_response))
 | |
| +		return MESSAGE_HANDSHAKE_RESPONSE;
 | |
| +	if (header->type == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE) && skb->len == sizeof(struct message_handshake_cookie))
 | |
| +		return MESSAGE_HANDSHAKE_COOKIE;
 | |
| +	return MESSAGE_INVALID;
 | |
| +}
 | |
| +
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/noise.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,122 @@
 | |
| +/*
 | |
| + * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
 | |
| + *
 | |
| + * See doc/protocol.md and https://github.com/trevp/noise/blob/master/noise.md for more info
 | |
| + */
 | |
| +
 | |
| +#ifndef NOISE_H
 | |
| +#define NOISE_H
 | |
| +
 | |
| +#include "messages.h"
 | |
| +#include "hashtables.h"
 | |
| +
 | |
| +#include <linux/types.h>
 | |
| +#include <linux/spinlock.h>
 | |
| +#include <linux/atomic.h>
 | |
| +#include <linux/rwsem.h>
 | |
| +#include <linux/mutex.h>
 | |
| +#include <linux/jiffies.h>
 | |
| +#include <linux/kref.h>
 | |
| +
 | |
| +union noise_counter {
 | |
| +	struct {
 | |
| +		u64 counter;
 | |
| +		unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG];
 | |
| +		spinlock_t lock;
 | |
| +	} receive;
 | |
| +	atomic64_t counter;
 | |
| +};
 | |
| +
 | |
| +struct noise_symmetric_key {
 | |
| +	u8 key[NOISE_SYMMETRIC_KEY_LEN];
 | |
| +	union noise_counter counter;
 | |
| +	u64 birthdate;
 | |
| +	bool is_valid;
 | |
| +};
 | |
| +
 | |
| +struct noise_keypair {
 | |
| +	struct index_hashtable_entry entry;
 | |
| +	struct noise_symmetric_key sending;
 | |
| +	struct noise_symmetric_key receiving;
 | |
| +	__le32 remote_index;
 | |
| +	bool i_am_the_initiator;
 | |
| +	struct kref refcount;
 | |
| +	struct rcu_head rcu;
 | |
| +	u64 internal_id;
 | |
| +};
 | |
| +
 | |
| +struct noise_keypairs {
 | |
| +	struct noise_keypair __rcu *current_keypair;
 | |
| +	struct noise_keypair __rcu *previous_keypair;
 | |
| +	struct noise_keypair __rcu *next_keypair;
 | |
| +	struct mutex keypair_update_lock;
 | |
| +};
 | |
| +
 | |
| +struct noise_static_identity {
 | |
| +	bool has_identity;
 | |
| +	u8 static_public[NOISE_PUBLIC_KEY_LEN];
 | |
| +	u8 static_private[NOISE_PUBLIC_KEY_LEN];
 | |
| +	struct rw_semaphore lock;
 | |
| +};
 | |
| +
 | |
| +enum noise_handshake_state {
 | |
| +	HANDSHAKE_ZEROED,
 | |
| +	HANDSHAKE_CREATED_INITIATION,
 | |
| +	HANDSHAKE_CONSUMED_INITIATION,
 | |
| +	HANDSHAKE_CREATED_RESPONSE,
 | |
| +	HANDSHAKE_CONSUMED_RESPONSE
 | |
| +};
 | |
| +
 | |
| +struct noise_handshake {
 | |
| +	struct index_hashtable_entry entry;
 | |
| +
 | |
| +	enum noise_handshake_state state;
 | |
| +	u64 last_initiation_consumption;
 | |
| +
 | |
| +	struct noise_static_identity *static_identity;
 | |
| +
 | |
| +	u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN];
 | |
| +	u8 remote_static[NOISE_PUBLIC_KEY_LEN];
 | |
| +	u8 remote_ephemeral[NOISE_PUBLIC_KEY_LEN];
 | |
| +	u8 precomputed_static_static[NOISE_PUBLIC_KEY_LEN];
 | |
| +
 | |
| +	u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN];
 | |
| +
 | |
| +	u8 hash[NOISE_HASH_LEN];
 | |
| +	u8 chaining_key[NOISE_HASH_LEN];
 | |
| +
 | |
| +	u8 latest_timestamp[NOISE_TIMESTAMP_LEN];
 | |
| +	__le32 remote_index;
 | |
| +
 | |
| +	/* Protects all members except the immutable (after noise_handshake_init): remote_static, precomputed_static_static, static_identity */
 | |
| +	struct rw_semaphore lock;
 | |
| +};
 | |
| +
 | |
| +struct wireguard_peer;
 | |
| +struct wireguard_device;
 | |
| +struct message_header;
 | |
| +struct message_handshake_initiation;
 | |
| +struct message_handshake_response;
 | |
| +struct message_data;
 | |
| +struct message_handshake_cookie;
 | |
| +
 | |
| +void noise_init(void);
 | |
| +bool noise_handshake_init(struct noise_handshake *handshake, struct noise_static_identity *static_identity, const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN], const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN], struct wireguard_peer *peer);
 | |
| +void noise_handshake_clear(struct noise_handshake *handshake);
 | |
| +void noise_keypair_put(struct noise_keypair *keypair);
 | |
| +struct noise_keypair *noise_keypair_get(struct noise_keypair *keypair);
 | |
| +void noise_keypairs_clear(struct noise_keypairs *keypairs);
 | |
| +bool noise_received_with_keypair(struct noise_keypairs *keypairs, struct noise_keypair *received_keypair);
 | |
| +
 | |
| +void noise_set_static_identity_private_key(struct noise_static_identity *static_identity, const u8 private_key[NOISE_PUBLIC_KEY_LEN]);
 | |
| +bool noise_precompute_static_static(struct wireguard_peer *peer);
 | |
| +
 | |
| +bool noise_handshake_create_initiation(struct message_handshake_initiation *dst, struct noise_handshake *handshake);
 | |
| +struct wireguard_peer *noise_handshake_consume_initiation(struct message_handshake_initiation *src, struct wireguard_device *wg);
 | |
| +
 | |
| +bool noise_handshake_create_response(struct message_handshake_response *dst, struct noise_handshake *peer);
 | |
| +struct wireguard_peer *noise_handshake_consume_response(struct message_handshake_response *src, struct wireguard_device *wg);
 | |
| +
 | |
| +bool noise_handshake_begin_session(struct noise_handshake *handshake, struct noise_keypairs *keypairs, bool i_am_the_initiator);
 | |
| +
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/packets.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,63 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#ifndef PACKETS_H
 | |
| +#define PACKETS_H
 | |
| +
 | |
| +#include "noise.h"
 | |
| +#include "messages.h"
 | |
| +#include "socket.h"
 | |
| +
 | |
| +#include <linux/types.h>
 | |
| +#include <linux/padata.h>
 | |
| +#include <linux/skbuff.h>
 | |
| +#include <linux/ip.h>
 | |
| +#include <linux/ipv6.h>
 | |
| +
 | |
| +struct wireguard_device;
 | |
| +struct wireguard_peer;
 | |
| +struct sk_buff;
 | |
| +
 | |
| +struct packet_cb {
 | |
| +	u64 nonce;
 | |
| +	u8 ds;
 | |
| +};
 | |
| +#define PACKET_CB(skb) ((struct packet_cb *)skb->cb)
 | |
| +
 | |
| +/* receive.c */
 | |
| +void packet_receive(struct wireguard_device *wg, struct sk_buff *skb);
 | |
| +void packet_process_queued_handshake_packets(struct work_struct *work);
 | |
| +void packet_consume_data_done(struct sk_buff *skb, struct wireguard_peer *peer, struct endpoint *endpoint, bool used_new_key);
 | |
| +
 | |
| +/* send.c */
 | |
| +void packet_send_queue(struct wireguard_peer *peer);
 | |
| +void packet_send_keepalive(struct wireguard_peer *peer);
 | |
| +void packet_queue_handshake_initiation(struct wireguard_peer *peer, bool is_retry);
 | |
| +void packet_send_queued_handshakes(struct work_struct *work);
 | |
| +void packet_send_handshake_response(struct wireguard_peer *peer);
 | |
| +void packet_send_handshake_cookie(struct wireguard_device *wg, struct sk_buff *initiating_skb, __le32 sender_index);
 | |
| +void packet_create_data_done(struct sk_buff_head *queue, struct wireguard_peer *peer);
 | |
| +
 | |
| +/* data.c */
 | |
| +int packet_create_data(struct sk_buff_head *queue, struct wireguard_peer *peer);
 | |
| +void packet_consume_data(struct sk_buff *skb, struct wireguard_device *wg);
 | |
| +
 | |
| +/* Returns either the correct skb->protocol value, or 0 if invalid. */
 | |
| +static inline __be16 skb_examine_untrusted_ip_hdr(struct sk_buff *skb)
 | |
| +{
 | |
| +	if (skb_network_header(skb) >= skb->head && (skb_network_header(skb) + sizeof(struct iphdr)) <= skb_tail_pointer(skb) && ip_hdr(skb)->version == 4)
 | |
| +		return htons(ETH_P_IP);
 | |
| +	if (skb_network_header(skb) >= skb->head && (skb_network_header(skb) + sizeof(struct ipv6hdr)) <= skb_tail_pointer(skb) && ipv6_hdr(skb)->version == 6)
 | |
| +		return htons(ETH_P_IPV6);
 | |
| +	return 0;
 | |
| +}
 | |
| +
 | |
| +#ifdef CONFIG_WIREGUARD_PARALLEL
 | |
| +int packet_init_data_caches(void);
 | |
| +void packet_deinit_data_caches(void);
 | |
| +#endif
 | |
| +
 | |
| +#ifdef DEBUG
 | |
| +bool packet_counter_selftest(void);
 | |
| +#endif
 | |
| +
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/peer.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,100 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#ifndef PEER_H
 | |
| +#define PEER_H
 | |
| +
 | |
| +#include "noise.h"
 | |
| +#include "cookie.h"
 | |
| +
 | |
| +#include <linux/types.h>
 | |
| +#include <linux/netfilter.h>
 | |
| +#include <linux/spinlock.h>
 | |
| +#include <linux/kref.h>
 | |
| +#include <net/dst_cache.h>
 | |
| +
 | |
| +struct wireguard_device;
 | |
| +
 | |
| +struct endpoint {
 | |
| +	union {
 | |
| +		struct sockaddr addr;
 | |
| +		struct sockaddr_in addr4;
 | |
| +		struct sockaddr_in6 addr6;
 | |
| +	};
 | |
| +	union {
 | |
| +		struct in_addr src4;
 | |
| +		struct in6_addr src6;
 | |
| +	};
 | |
| +};
 | |
| +
 | |
| +struct wireguard_peer {
 | |
| +	struct wireguard_device *device;
 | |
| +	struct endpoint endpoint;
 | |
| +	struct dst_cache endpoint_cache;
 | |
| +	rwlock_t endpoint_lock;
 | |
| +	struct noise_handshake handshake;
 | |
| +	struct noise_keypairs keypairs;
 | |
| +	u64 last_sent_handshake;
 | |
| +	struct work_struct transmit_handshake_work, clear_peer_work;
 | |
| +	struct cookie latest_cookie;
 | |
| +	struct hlist_node pubkey_hash;
 | |
| +	u64 rx_bytes, tx_bytes;
 | |
| +	struct timer_list timer_retransmit_handshake, timer_send_keepalive, timer_new_handshake, timer_kill_ephemerals, timer_persistent_keepalive;
 | |
| +	unsigned int timer_handshake_attempts;
 | |
| +	unsigned long persistent_keepalive_interval;
 | |
| +	bool timers_enabled;
 | |
| +	bool timer_need_another_keepalive;
 | |
| +	bool need_resend_queue;
 | |
| +	bool sent_lastminute_handshake;
 | |
| +	struct timeval walltime_last_handshake;
 | |
| +	struct sk_buff_head tx_packet_queue;
 | |
| +	struct kref refcount;
 | |
| +	struct rcu_head rcu;
 | |
| +	struct list_head peer_list;
 | |
| +	u64 internal_id;
 | |
| +#ifdef CONFIG_WIREGUARD_PARALLEL
 | |
| +	atomic_t parallel_encryption_inflight;
 | |
| +#endif
 | |
| +};
 | |
| +
 | |
| +struct wireguard_peer *peer_create(struct wireguard_device *wg, const u8 public_key[NOISE_PUBLIC_KEY_LEN], const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]);
 | |
| +
 | |
| +struct wireguard_peer *peer_get(struct wireguard_peer *peer);
 | |
| +struct wireguard_peer *peer_rcu_get(struct wireguard_peer *peer);
 | |
| +
 | |
| +void peer_put(struct wireguard_peer *peer);
 | |
| +void peer_remove(struct wireguard_peer *peer);
 | |
| +void peer_remove_all(struct wireguard_device *wg);
 | |
| +
 | |
| +struct wireguard_peer *peer_lookup_by_index(struct wireguard_device *wg, u32 index);
 | |
| +
 | |
| +unsigned int peer_total_count(struct wireguard_device *wg);
 | |
| +
 | |
| +/* This is a macro iterator of essentially this:
 | |
| + *
 | |
| + * if (__should_lock)
 | |
| + *	mutex_lock(&(__wg)->device_update_lock);
 | |
| + * else
 | |
| + *	lockdep_assert_held(&(__wg)->device_update_lock)
 | |
| + * list_for_each_entry_safe (__peer, __temp, &(__wg)->peer_list, peer_list) {
 | |
| + *	__peer = peer_rcu_get(__peer);
 | |
| + *	if (!__peer)
 | |
| + *		continue;
 | |
| + *	ITERATOR_BODY
 | |
| + *	peer_put(__peer);
 | |
| + * }
 | |
| + * if (__should_lock)
 | |
| + *	mutex_unlock(&(__wg)->device_update_lock);
 | |
| + *
 | |
| + * While it's really ugly to look at, the code gcc produces from it is actually perfect.
 | |
| + */
 | |
| +#define pfe_label(n) __PASTE(__PASTE(pfe_label_, n ## _), __LINE__)
 | |
| +#define peer_for_each(__wg, __peer, __temp, __should_lock) \
 | |
| +	if (1) { if (__should_lock) mutex_lock(&(__wg)->device_update_lock); else lockdep_assert_held(&(__wg)->device_update_lock); goto pfe_label(1); } else pfe_label(1): \
 | |
| +	if (1) goto pfe_label(2); else while (1) if (1) { if (__should_lock) mutex_unlock(&(__wg)->device_update_lock); break; } else pfe_label(2): \
 | |
| +	list_for_each_entry_safe (__peer, __temp, &(__wg)->peer_list, peer_list) \
 | |
| +	if (0) pfe_label(3): break; else \
 | |
| +	if (0); else for (__peer = peer_rcu_get(peer); __peer;) if (1) { goto pfe_label(4); pfe_label(5): break; } else while (1) if (1) goto pfe_label(5); else pfe_label(4): \
 | |
| +	if (1) { goto pfe_label(6); pfe_label(7):; } else while (1) if (1) goto pfe_label(3); else while (1) if (1) goto pfe_label(7); else pfe_label(6): \
 | |
| +	if (1) { goto pfe_label(8); pfe_label(9): peer_put(__peer); break; pfe_label(10): peer_put(__peer); } else while (1) if (1) goto pfe_label(9); else while (1) if (1) goto pfe_label(10); else pfe_label(8):
 | |
| +
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/ratelimiter.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,16 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#ifndef RATELIMITER_H
 | |
| +#define RATELIMITER_H
 | |
| +
 | |
| +#include <linux/skbuff.h>
 | |
| +
 | |
| +int ratelimiter_init(void);
 | |
| +void ratelimiter_uninit(void);
 | |
| +bool ratelimiter_allow(struct sk_buff *skb, struct net *net);
 | |
| +
 | |
| +#ifdef DEBUG
 | |
| +bool ratelimiter_selftest(void);
 | |
| +#endif
 | |
| +
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/routingtable.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,36 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#ifndef ROUTINGTABLE_H
 | |
| +#define ROUTINGTABLE_H
 | |
| +
 | |
| +#include <linux/mutex.h>
 | |
| +#include <linux/ip.h>
 | |
| +#include <linux/ipv6.h>
 | |
| +
 | |
| +struct wireguard_peer;
 | |
| +struct routing_table_node;
 | |
| +
 | |
| +struct routing_table {
 | |
| +	struct routing_table_node __rcu *root4;
 | |
| +	struct routing_table_node __rcu *root6;
 | |
| +	struct mutex table_update_lock;
 | |
| +};
 | |
| +
 | |
| +void routing_table_init(struct routing_table *table);
 | |
| +void routing_table_free(struct routing_table *table);
 | |
| +int routing_table_insert_v4(struct routing_table *table, const struct in_addr *ip, u8 cidr, struct wireguard_peer *peer);
 | |
| +int routing_table_insert_v6(struct routing_table *table, const struct in6_addr *ip, u8 cidr, struct wireguard_peer *peer);
 | |
| +void routing_table_remove_by_peer(struct routing_table *table, struct wireguard_peer *peer);
 | |
| +size_t routing_table_count_nodes(struct routing_table *table);
 | |
| +int routing_table_walk_ips_by_peer(struct routing_table *table, void *ctx, struct wireguard_peer *peer, int (*func)(void *ctx, union nf_inet_addr ip, u8 cidr, int family));
 | |
| +int routing_table_walk_ips_by_peer_sleepable(struct routing_table *table, void *ctx, struct wireguard_peer *peer, int (*func)(void *ctx, union nf_inet_addr ip, u8 cidr, int family));
 | |
| +
 | |
| +/* These return a strong reference to a peer: */
 | |
| +struct wireguard_peer *routing_table_lookup_dst(struct routing_table *table, struct sk_buff *skb);
 | |
| +struct wireguard_peer *routing_table_lookup_src(struct routing_table *table, struct sk_buff *skb);
 | |
| +
 | |
| +#ifdef DEBUG
 | |
| +bool routing_table_selftest(void);
 | |
| +#endif
 | |
| +
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/socket.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,24 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#ifndef WGSOCKET_H
 | |
| +#define WGSOCKET_H
 | |
| +
 | |
| +#include <linux/netdevice.h>
 | |
| +#include <linux/udp.h>
 | |
| +#include <linux/if_vlan.h>
 | |
| +#include <linux/if_ether.h>
 | |
| +
 | |
| +struct wireguard_device;
 | |
| +struct endpoint;
 | |
| +
 | |
| +int socket_init(struct wireguard_device *wg);
 | |
| +void socket_uninit(struct wireguard_device *wg);
 | |
| +int socket_send_buffer_to_peer(struct wireguard_peer *peer, void *data, size_t len, u8 ds);
 | |
| +int socket_send_skb_to_peer(struct wireguard_peer *peer, struct sk_buff *skb, u8 ds);
 | |
| +int socket_send_buffer_as_reply_to_skb(struct wireguard_device *wg, struct sk_buff *in_skb, void *out_buffer, size_t len);
 | |
| +
 | |
| +int socket_endpoint_from_skb(struct endpoint *endpoint, struct sk_buff *skb);
 | |
| +void socket_set_peer_endpoint(struct wireguard_peer *peer, struct endpoint *endpoint);
 | |
| +void socket_clear_peer_endpoint_src(struct wireguard_peer *peer);
 | |
| +
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/timers.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,19 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#ifndef WGTIMERS_H
 | |
| +#define WGTIMERS_H
 | |
| +
 | |
| +struct wireguard_peer;
 | |
| +
 | |
| +void timers_init_peer(struct wireguard_peer *peer);
 | |
| +void timers_uninit_peer(struct wireguard_peer *peer);
 | |
| +
 | |
| +void timers_data_sent(struct wireguard_peer *peer);
 | |
| +void timers_data_received(struct wireguard_peer *peer);
 | |
| +void timers_any_authenticated_packet_received(struct wireguard_peer *peer);
 | |
| +void timers_handshake_initiated(struct wireguard_peer *peer);
 | |
| +void timers_handshake_complete(struct wireguard_peer *peer);
 | |
| +void timers_ephemeral_key_created(struct wireguard_peer *peer);
 | |
| +void timers_any_authenticated_packet_traversal(struct wireguard_peer *peer);
 | |
| +
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/uapi.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,166 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
 | |
| + *
 | |
| + * Userspace API for WireGuard
 | |
| + * ---------------------------
 | |
| + *
 | |
| + * ioctl(WG_GET_DEVICE, { .ifr_name: "wg0", .ifr_data: NULL }):
 | |
| + *
 | |
| + *     Returns the number of bytes required to hold the peers of a device (`ret_peers_size`).
 | |
| + *
 | |
| + * ioctl(WG_GET_DEVICE, { .ifr_name: "wg0", .ifr_data: user_pointer }):
 | |
| + *
 | |
| + *     Retrevies device info, peer info, and ipmask info.
 | |
| + *
 | |
| + *     `user_pointer` must point to a region of memory of size `sizeof(struct wgdevice) + ret_peers_size`
 | |
| + *     and containing the structure `struct wgdevice { .peers_size: ret_peers_size }`.
 | |
| + *
 | |
| + *     Writes to `user_pointer` a succession of structs:
 | |
| + *
 | |
| + *         struct wgdevice { .num_peers = 3 }
 | |
| + *             struct wgpeer { .num_ipmasks = 4 }
 | |
| + *                 struct wgipmask
 | |
| + *                 struct wgipmask
 | |
| + *                 struct wgipmask
 | |
| + *                 struct wgipmask
 | |
| + *             struct wgpeer { .num_ipmasks = 2 }
 | |
| + *                 struct wgipmask
 | |
| + *                 struct wgipmask
 | |
| + *             struct wgpeer { .num_ipmasks = 0 }
 | |
| + *
 | |
| + *     Returns 0 on success. Returns -EMSGSIZE if there is too much data for the size of passed-in
 | |
| + *     memory, in which case, this should be recalculated using the call above. Returns -errno if
 | |
| + *     another error occured.
 | |
| + *
 | |
| + * ioctl(WG_SET_DEVICE, { .ifr_name: "wg0", .ifr_data: user_pointer }):
 | |
| + *
 | |
| + *     Sets device info, peer info, and ipmask info.
 | |
| + *
 | |
| + *     `user_pointer` must point to a region of memory containing a succession of structs:
 | |
| + *
 | |
| + *         struct wgdevice { .num_peers = 3 }
 | |
| + *             struct wgpeer { .num_ipmasks = 4 }
 | |
| + *                 struct wgipmask
 | |
| + *                 struct wgipmask
 | |
| + *                 struct wgipmask
 | |
| + *                 struct wgipmask
 | |
| + *             struct wgpeer { .num_ipmasks = 2 }
 | |
| + *                 struct wgipmask
 | |
| + *                 struct wgipmask
 | |
| + *             struct wgpeer { .num_ipmasks = 0 }
 | |
| + *
 | |
| + *     If `wgdevice->flags & WGDEVICE_REPLACE_PEERS` is true, removes all peers of device before adding new ones.
 | |
| + *     If `wgpeer->flags & WGPEER_REMOVE_ME` is true, the peer identified by `wgpeer->public_key` is removed.
 | |
| + *     If `wgpeer->flags & WGPEER_REPLACE_IPMASKS` is true, removes all ipmasks before adding new ones.
 | |
| + *     If `wgdevice->private_key` is filled with zeros, no action is taken on the private key.
 | |
| + *     If `wgdevice->preshared_key` is filled with zeros, no action is taken on the preshared key.
 | |
| + *     If `wgdevice->flags & WGDEVICE_REMOVE_PRIVATE_KEY` is true, the private key is removed.
 | |
| + *     If `wgdevice->flags & WGDEVICE_REMOVE_PRESHARED_KEY` is true, the preshared key is removed.
 | |
| + *
 | |
| + *     Returns 0 on success, or -errno if an error occurred.
 | |
| + */
 | |
| +
 | |
| +
 | |
| +#ifndef WGUAPI_H
 | |
| +#define WGUAPI_H
 | |
| +
 | |
| +#ifdef __linux__
 | |
| +#include <linux/types.h>
 | |
| +#else
 | |
| +#include <stdint.h>
 | |
| +typedef uint8_t __u8;
 | |
| +typedef uint16_t __u16;
 | |
| +typedef uint32_t __u32;
 | |
| +typedef uint64_t __u64;
 | |
| +typedef int32_t __s32;
 | |
| +#endif
 | |
| +#ifdef __KERNEL__
 | |
| +#include <linux/time.h>
 | |
| +#include <linux/socket.h>
 | |
| +#else
 | |
| +#include <net/if.h>
 | |
| +#include <netinet/in.h>
 | |
| +#include <sys/time.h>
 | |
| +#include <sys/socket.h>
 | |
| +#endif
 | |
| +
 | |
| +#define WG_GET_DEVICE (SIOCDEVPRIVATE + 0)
 | |
| +#define WG_SET_DEVICE (SIOCDEVPRIVATE + 1)
 | |
| +
 | |
| +#define WG_KEY_LEN 32
 | |
| +
 | |
| +struct wgipmask {
 | |
| +	__s32 family;
 | |
| +	union {
 | |
| +		struct in_addr ip4;
 | |
| +		struct in6_addr ip6;
 | |
| +	};
 | |
| +	__u8 cidr;
 | |
| +};
 | |
| +
 | |
| +enum {
 | |
| +	WGPEER_REMOVE_ME = (1 << 0),
 | |
| +	WGPEER_REPLACE_IPMASKS = (1 << 1),
 | |
| +	WGPEER_REMOVE_PRESHARED_KEY = (1 << 2)
 | |
| +};
 | |
| +
 | |
| +struct wgpeer {
 | |
| +	__u8 public_key[WG_KEY_LEN]; /* Get/Set */
 | |
| +	__u8 preshared_key[WG_KEY_LEN]; /* Get/Set */
 | |
| +	__u32 flags; /* Set */
 | |
| +
 | |
| +	union {
 | |
| +		struct sockaddr addr;
 | |
| +		struct sockaddr_in addr4;
 | |
| +		struct sockaddr_in6 addr6;
 | |
| +	} endpoint; /* Get/Set */
 | |
| +
 | |
| +	struct timeval last_handshake_time; /* Get */
 | |
| +	__u64 rx_bytes, tx_bytes; /* Get */
 | |
| +	__u16 persistent_keepalive_interval; /* Get/Set -- 0 = off, 0xffff = unset */
 | |
| +
 | |
| +	__u16 num_ipmasks; /* Get/Set */
 | |
| +};
 | |
| +
 | |
| +enum {
 | |
| +	WGDEVICE_REPLACE_PEERS = (1 << 0),
 | |
| +	WGDEVICE_REMOVE_PRIVATE_KEY = (1 << 1),
 | |
| +	WGDEVICE_REMOVE_FWMARK = (1 << 2)
 | |
| +};
 | |
| +
 | |
| +enum {
 | |
| +	WG_API_VERSION_MAGIC = 0xbeef0002
 | |
| +};
 | |
| +
 | |
| +struct wgdevice {
 | |
| +	__u32 version_magic; /* Must be value of WG_API_VERSION_MAGIC */
 | |
| +	char interface[IFNAMSIZ]; /* Get */
 | |
| +	__u32 flags; /* Set */
 | |
| +
 | |
| +	__u8 public_key[WG_KEY_LEN]; /* Get */
 | |
| +	__u8 private_key[WG_KEY_LEN]; /* Get/Set */
 | |
| +	__u32 fwmark; /* Get/Set */
 | |
| +	__u16 port; /* Get/Set */
 | |
| +
 | |
| +	union {
 | |
| +		__u16 num_peers; /* Get/Set */
 | |
| +		__u32 peers_size; /* Get */
 | |
| +	};
 | |
| +};
 | |
| +
 | |
| +/* These are simply for convenience in iterating. It allows you to write something like:
 | |
| + *
 | |
| + *    for_each_wgpeer(device, peer, i) {
 | |
| + *        for_each_wgipmask(peer, ipmask, j) {
 | |
| + *            do_something_with_ipmask(ipmask);
 | |
| + *        }
 | |
| + *     }
 | |
| + */
 | |
| +#define for_each_wgpeer(__dev, __peer, __i) for ((__i) = 0, (__peer) = (struct wgpeer *)((uint8_t *)(__dev) + sizeof(struct wgdevice)); \
 | |
| +						 (__i) < (__dev)->num_peers; \
 | |
| +						 ++(__i), (__peer) = (struct wgpeer *)((uint8_t *)(__peer) + sizeof(struct wgpeer) + (sizeof(struct wgipmask) * (__peer)->num_ipmasks)))
 | |
| +
 | |
| +#define for_each_wgipmask(__peer, __ipmask, __i) for ((__i) = 0, (__ipmask) = (struct wgipmask *)((uint8_t *)(__peer) + sizeof(struct wgpeer)); \
 | |
| +						 (__i) < (__peer)->num_ipmasks; \
 | |
| +						 ++(__i), (__ipmask) = (struct wgipmask *)((uint8_t *)(__ipmask) + sizeof(struct wgipmask)))
 | |
| +
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/version.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1 @@
 | |
| +#define WIREGUARD_VERSION "0.0.20170706"
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/selftest/blake2s.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,556 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#ifdef DEBUG
 | |
| +static const u8 blake2s_testvecs[][BLAKE2S_OUTBYTES] __initconst = {
 | |
| +	{ 0x69, 0x21, 0x7A, 0x30, 0x79, 0x90, 0x80, 0x94, 0xE1, 0x11, 0x21, 0xD0, 0x42, 0x35, 0x4A, 0x7C, 0x1F, 0x55, 0xB6, 0x48, 0x2C, 0xA1, 0xA5, 0x1E, 0x1B, 0x25, 0x0D, 0xFD, 0x1E, 0xD0, 0xEE, 0xF9 },
 | |
| +	{ 0xE3, 0x4D, 0x74, 0xDB, 0xAF, 0x4F, 0xF4, 0xC6, 0xAB, 0xD8, 0x71, 0xCC, 0x22, 0x04, 0x51, 0xD2, 0xEA, 0x26, 0x48, 0x84, 0x6C, 0x77, 0x57, 0xFB, 0xAA, 0xC8, 0x2F, 0xE5, 0x1A, 0xD6, 0x4B, 0xEA },
 | |
| +	{ 0xDD, 0xAD, 0x9A, 0xB1, 0x5D, 0xAC, 0x45, 0x49, 0xBA, 0x42, 0xF4, 0x9D, 0x26, 0x24, 0x96, 0xBE, 0xF6, 0xC0, 0xBA, 0xE1, 0xDD, 0x34, 0x2A, 0x88, 0x08, 0xF8, 0xEA, 0x26, 0x7C, 0x6E, 0x21, 0x0C },
 | |
| +	{ 0xE8, 0xF9, 0x1C, 0x6E, 0xF2, 0x32, 0xA0, 0x41, 0x45, 0x2A, 0xB0, 0xE1, 0x49, 0x07, 0x0C, 0xDD, 0x7D, 0xD1, 0x76, 0x9E, 0x75, 0xB3, 0xA5, 0x92, 0x1B, 0xE3, 0x78, 0x76, 0xC4, 0x5C, 0x99, 0x00 },
 | |
| +	{ 0x0C, 0xC7, 0x0E, 0x00, 0x34, 0x8B, 0x86, 0xBA, 0x29, 0x44, 0xD0, 0xC3, 0x20, 0x38, 0xB2, 0x5C, 0x55, 0x58, 0x4F, 0x90, 0xDF, 0x23, 0x04, 0xF5, 0x5F, 0xA3, 0x32, 0xAF, 0x5F, 0xB0, 0x1E, 0x20 },
 | |
| +	{ 0xEC, 0x19, 0x64, 0x19, 0x10, 0x87, 0xA4, 0xFE, 0x9D, 0xF1, 0xC7, 0x95, 0x34, 0x2A, 0x02, 0xFF, 0xC1, 0x91, 0xA5, 0xB2, 0x51, 0x76, 0x48, 0x56, 0xAE, 0x5B, 0x8B, 0x57, 0x69, 0xF0, 0xC6, 0xCD },
 | |
| +	{ 0xE1, 0xFA, 0x51, 0x61, 0x8D, 0x7D, 0xF4, 0xEB, 0x70, 0xCF, 0x0D, 0x5A, 0x9E, 0x90, 0x6F, 0x80, 0x6E, 0x9D, 0x19, 0xF7, 0xF4, 0xF0, 0x1E, 0x3B, 0x62, 0x12, 0x88, 0xE4, 0x12, 0x04, 0x05, 0xD6 },
 | |
| +	{ 0x59, 0x80, 0x01, 0xFA, 0xFB, 0xE8, 0xF9, 0x4E, 0xC6, 0x6D, 0xC8, 0x27, 0xD0, 0x12, 0xCF, 0xCB, 0xBA, 0x22, 0x28, 0x56, 0x9F, 0x44, 0x8E, 0x89, 0xEA, 0x22, 0x08, 0xC8, 0xBF, 0x76, 0x92, 0x93 },
 | |
| +	{ 0xC7, 0xE8, 0x87, 0xB5, 0x46, 0x62, 0x36, 0x35, 0xE9, 0x3E, 0x04, 0x95, 0x59, 0x8F, 0x17, 0x26, 0x82, 0x19, 0x96, 0xC2, 0x37, 0x77, 0x05, 0xB9, 0x3A, 0x1F, 0x63, 0x6F, 0x87, 0x2B, 0xFA, 0x2D },
 | |
| +	{ 0xC3, 0x15, 0xA4, 0x37, 0xDD, 0x28, 0x06, 0x2A, 0x77, 0x0D, 0x48, 0x19, 0x67, 0x13, 0x6B, 0x1B, 0x5E, 0xB8, 0x8B, 0x21, 0xEE, 0x53, 0xD0, 0x32, 0x9C, 0x58, 0x97, 0x12, 0x6E, 0x9D, 0xB0, 0x2C },
 | |
| +	{ 0xBB, 0x47, 0x3D, 0xED, 0xDC, 0x05, 0x5F, 0xEA, 0x62, 0x28, 0xF2, 0x07, 0xDA, 0x57, 0x53, 0x47, 0xBB, 0x00, 0x40, 0x4C, 0xD3, 0x49, 0xD3, 0x8C, 0x18, 0x02, 0x63, 0x07, 0xA2, 0x24, 0xCB, 0xFF },
 | |
| +	{ 0x68, 0x7E, 0x18, 0x73, 0xA8, 0x27, 0x75, 0x91, 0xBB, 0x33, 0xD9, 0xAD, 0xF9, 0xA1, 0x39, 0x12, 0xEF, 0xEF, 0xE5, 0x57, 0xCA, 0xFC, 0x39, 0xA7, 0x95, 0x26, 0x23, 0xE4, 0x72, 0x55, 0xF1, 0x6D },
 | |
| +	{ 0x1A, 0xC7, 0xBA, 0x75, 0x4D, 0x6E, 0x2F, 0x94, 0xE0, 0xE8, 0x6C, 0x46, 0xBF, 0xB2, 0x62, 0xAB, 0xBB, 0x74, 0xF4, 0x50, 0xEF, 0x45, 0x6D, 0x6B, 0x4D, 0x97, 0xAA, 0x80, 0xCE, 0x6D, 0xA7, 0x67 },
 | |
| +	{ 0x01, 0x2C, 0x97, 0x80, 0x96, 0x14, 0x81, 0x6B, 0x5D, 0x94, 0x94, 0x47, 0x7D, 0x4B, 0x68, 0x7D, 0x15, 0xB9, 0x6E, 0xB6, 0x9C, 0x0E, 0x80, 0x74, 0xA8, 0x51, 0x6F, 0x31, 0x22, 0x4B, 0x5C, 0x98 },
 | |
| +	{ 0x91, 0xFF, 0xD2, 0x6C, 0xFA, 0x4D, 0xA5, 0x13, 0x4C, 0x7E, 0xA2, 0x62, 0xF7, 0x88, 0x9C, 0x32, 0x9F, 0x61, 0xF6, 0xA6, 0x57, 0x22, 0x5C, 0xC2, 0x12, 0xF4, 0x00, 0x56, 0xD9, 0x86, 0xB3, 0xF4 },
 | |
| +	{ 0xD9, 0x7C, 0x82, 0x8D, 0x81, 0x82, 0xA7, 0x21, 0x80, 0xA0, 0x6A, 0x78, 0x26, 0x83, 0x30, 0x67, 0x3F, 0x7C, 0x4E, 0x06, 0x35, 0x94, 0x7C, 0x04, 0xC0, 0x23, 0x23, 0xFD, 0x45, 0xC0, 0xA5, 0x2D },
 | |
| +	{ 0xEF, 0xC0, 0x4C, 0xDC, 0x39, 0x1C, 0x7E, 0x91, 0x19, 0xBD, 0x38, 0x66, 0x8A, 0x53, 0x4E, 0x65, 0xFE, 0x31, 0x03, 0x6D, 0x6A, 0x62, 0x11, 0x2E, 0x44, 0xEB, 0xEB, 0x11, 0xF9, 0xC5, 0x70, 0x80 },
 | |
| +	{ 0x99, 0x2C, 0xF5, 0xC0, 0x53, 0x44, 0x2A, 0x5F, 0xBC, 0x4F, 0xAF, 0x58, 0x3E, 0x04, 0xE5, 0x0B, 0xB7, 0x0D, 0x2F, 0x39, 0xFB, 0xB6, 0xA5, 0x03, 0xF8, 0x9E, 0x56, 0xA6, 0x3E, 0x18, 0x57, 0x8A },
 | |
| +	{ 0x38, 0x64, 0x0E, 0x9F, 0x21, 0x98, 0x3E, 0x67, 0xB5, 0x39, 0xCA, 0xCC, 0xAE, 0x5E, 0xCF, 0x61, 0x5A, 0xE2, 0x76, 0x4F, 0x75, 0xA0, 0x9C, 0x9C, 0x59, 0xB7, 0x64, 0x83, 0xC1, 0xFB, 0xC7, 0x35 },
 | |
| +	{ 0x21, 0x3D, 0xD3, 0x4C, 0x7E, 0xFE, 0x4F, 0xB2, 0x7A, 0x6B, 0x35, 0xF6, 0xB4, 0x00, 0x0D, 0x1F, 0xE0, 0x32, 0x81, 0xAF, 0x3C, 0x72, 0x3E, 0x5C, 0x9F, 0x94, 0x74, 0x7A, 0x5F, 0x31, 0xCD, 0x3B },
 | |
| +	{ 0xEC, 0x24, 0x6E, 0xEE, 0xB9, 0xCE, 0xD3, 0xF7, 0xAD, 0x33, 0xED, 0x28, 0x66, 0x0D, 0xD9, 0xBB, 0x07, 0x32, 0x51, 0x3D, 0xB4, 0xE2, 0xFA, 0x27, 0x8B, 0x60, 0xCD, 0xE3, 0x68, 0x2A, 0x4C, 0xCD },
 | |
| +	{ 0xAC, 0x9B, 0x61, 0xD4, 0x46, 0x64, 0x8C, 0x30, 0x05, 0xD7, 0x89, 0x2B, 0xF3, 0xA8, 0x71, 0x9F, 0x4C, 0x81, 0x81, 0xCF, 0xDC, 0xBC, 0x2B, 0x79, 0xFE, 0xF1, 0x0A, 0x27, 0x9B, 0x91, 0x10, 0x95 },
 | |
| +	{ 0x7B, 0xF8, 0xB2, 0x29, 0x59, 0xE3, 0x4E, 0x3A, 0x43, 0xF7, 0x07, 0x92, 0x23, 0xE8, 0x3A, 0x97, 0x54, 0x61, 0x7D, 0x39, 0x1E, 0x21, 0x3D, 0xFD, 0x80, 0x8E, 0x41, 0xB9, 0xBE, 0xAD, 0x4C, 0xE7 },
 | |
| +	{ 0x68, 0xD4, 0xB5, 0xD4, 0xFA, 0x0E, 0x30, 0x2B, 0x64, 0xCC, 0xC5, 0xAF, 0x79, 0x29, 0x13, 0xAC, 0x4C, 0x88, 0xEC, 0x95, 0xC0, 0x7D, 0xDF, 0x40, 0x69, 0x42, 0x56, 0xEB, 0x88, 0xCE, 0x9F, 0x3D },
 | |
| +	{ 0xB2, 0xC2, 0x42, 0x0F, 0x05, 0xF9, 0xAB, 0xE3, 0x63, 0x15, 0x91, 0x93, 0x36, 0xB3, 0x7E, 0x4E, 0x0F, 0xA3, 0x3F, 0xF7, 0xE7, 0x6A, 0x49, 0x27, 0x67, 0x00, 0x6F, 0xDB, 0x5D, 0x93, 0x54, 0x62 },
 | |
| +	{ 0x13, 0x4F, 0x61, 0xBB, 0xD0, 0xBB, 0xB6, 0x9A, 0xED, 0x53, 0x43, 0x90, 0x45, 0x51, 0xA3, 0xE6, 0xC1, 0xAA, 0x7D, 0xCD, 0xD7, 0x7E, 0x90, 0x3E, 0x70, 0x23, 0xEB, 0x7C, 0x60, 0x32, 0x0A, 0xA7 },
 | |
| +	{ 0x46, 0x93, 0xF9, 0xBF, 0xF7, 0xD4, 0xF3, 0x98, 0x6A, 0x7D, 0x17, 0x6E, 0x6E, 0x06, 0xF7, 0x2A, 0xD1, 0x49, 0x0D, 0x80, 0x5C, 0x99, 0xE2, 0x53, 0x47, 0xB8, 0xDE, 0x77, 0xB4, 0xDB, 0x6D, 0x9B },
 | |
| +	{ 0x85, 0x3E, 0x26, 0xF7, 0x41, 0x95, 0x3B, 0x0F, 0xD5, 0xBD, 0xB4, 0x24, 0xE8, 0xAB, 0x9E, 0x8B, 0x37, 0x50, 0xEA, 0xA8, 0xEF, 0x61, 0xE4, 0x79, 0x02, 0xC9, 0x1E, 0x55, 0x4E, 0x9C, 0x73, 0xB9 },
 | |
| +	{ 0xF7, 0xDE, 0x53, 0x63, 0x61, 0xAB, 0xAA, 0x0E, 0x15, 0x81, 0x56, 0xCF, 0x0E, 0xA4, 0xF6, 0x3A, 0x99, 0xB5, 0xE4, 0x05, 0x4F, 0x8F, 0xA4, 0xC9, 0xD4, 0x5F, 0x62, 0x85, 0xCA, 0xD5, 0x56, 0x94 },
 | |
| +	{ 0x4C, 0x23, 0x06, 0x08, 0x86, 0x0A, 0x99, 0xAE, 0x8D, 0x7B, 0xD5, 0xC2, 0xCC, 0x17, 0xFA, 0x52, 0x09, 0x6B, 0x9A, 0x61, 0xBE, 0xDB, 0x17, 0xCB, 0x76, 0x17, 0x86, 0x4A, 0xD2, 0x9C, 0xA7, 0xA6 },
 | |
| +	{ 0xAE, 0xB9, 0x20, 0xEA, 0x87, 0x95, 0x2D, 0xAD, 0xB1, 0xFB, 0x75, 0x92, 0x91, 0xE3, 0x38, 0x81, 0x39, 0xA8, 0x72, 0x86, 0x50, 0x01, 0x88, 0x6E, 0xD8, 0x47, 0x52, 0xE9, 0x3C, 0x25, 0x0C, 0x2A },
 | |
| +	{ 0xAB, 0xA4, 0xAD, 0x9B, 0x48, 0x0B, 0x9D, 0xF3, 0xD0, 0x8C, 0xA5, 0xE8, 0x7B, 0x0C, 0x24, 0x40, 0xD4, 0xE4, 0xEA, 0x21, 0x22, 0x4C, 0x2E, 0xB4, 0x2C, 0xBA, 0xE4, 0x69, 0xD0, 0x89, 0xB9, 0x31 },
 | |
| +	{ 0x05, 0x82, 0x56, 0x07, 0xD7, 0xFD, 0xF2, 0xD8, 0x2E, 0xF4, 0xC3, 0xC8, 0xC2, 0xAE, 0xA9, 0x61, 0xAD, 0x98, 0xD6, 0x0E, 0xDF, 0xF7, 0xD0, 0x18, 0x98, 0x3E, 0x21, 0x20, 0x4C, 0x0D, 0x93, 0xD1 },
 | |
| +	{ 0xA7, 0x42, 0xF8, 0xB6, 0xAF, 0x82, 0xD8, 0xA6, 0xCA, 0x23, 0x57, 0xC5, 0xF1, 0xCF, 0x91, 0xDE, 0xFB, 0xD0, 0x66, 0x26, 0x7D, 0x75, 0xC0, 0x48, 0xB3, 0x52, 0x36, 0x65, 0x85, 0x02, 0x59, 0x62 },
 | |
| +	{ 0x2B, 0xCA, 0xC8, 0x95, 0x99, 0x00, 0x0B, 0x42, 0xC9, 0x5A, 0xE2, 0x38, 0x35, 0xA7, 0x13, 0x70, 0x4E, 0xD7, 0x97, 0x89, 0xC8, 0x4F, 0xEF, 0x14, 0x9A, 0x87, 0x4F, 0xF7, 0x33, 0xF0, 0x17, 0xA2 },
 | |
| +	{ 0xAC, 0x1E, 0xD0, 0x7D, 0x04, 0x8F, 0x10, 0x5A, 0x9E, 0x5B, 0x7A, 0xB8, 0x5B, 0x09, 0xA4, 0x92, 0xD5, 0xBA, 0xFF, 0x14, 0xB8, 0xBF, 0xB0, 0xE9, 0xFD, 0x78, 0x94, 0x86, 0xEE, 0xA2, 0xB9, 0x74 },
 | |
| +	{ 0xE4, 0x8D, 0x0E, 0xCF, 0xAF, 0x49, 0x7D, 0x5B, 0x27, 0xC2, 0x5D, 0x99, 0xE1, 0x56, 0xCB, 0x05, 0x79, 0xD4, 0x40, 0xD6, 0xE3, 0x1F, 0xB6, 0x24, 0x73, 0x69, 0x6D, 0xBF, 0x95, 0xE0, 0x10, 0xE4 },
 | |
| +	{ 0x12, 0xA9, 0x1F, 0xAD, 0xF8, 0xB2, 0x16, 0x44, 0xFD, 0x0F, 0x93, 0x4F, 0x3C, 0x4A, 0x8F, 0x62, 0xBA, 0x86, 0x2F, 0xFD, 0x20, 0xE8, 0xE9, 0x61, 0x15, 0x4C, 0x15, 0xC1, 0x38, 0x84, 0xED, 0x3D },
 | |
| +	{ 0x7C, 0xBE, 0xE9, 0x6E, 0x13, 0x98, 0x97, 0xDC, 0x98, 0xFB, 0xEF, 0x3B, 0xE8, 0x1A, 0xD4, 0xD9, 0x64, 0xD2, 0x35, 0xCB, 0x12, 0x14, 0x1F, 0xB6, 0x67, 0x27, 0xE6, 0xE5, 0xDF, 0x73, 0xA8, 0x78 },
 | |
| +	{ 0xEB, 0xF6, 0x6A, 0xBB, 0x59, 0x7A, 0xE5, 0x72, 0xA7, 0x29, 0x7C, 0xB0, 0x87, 0x1E, 0x35, 0x5A, 0xCC, 0xAF, 0xAD, 0x83, 0x77, 0xB8, 0xE7, 0x8B, 0xF1, 0x64, 0xCE, 0x2A, 0x18, 0xDE, 0x4B, 0xAF },
 | |
| +	{ 0x71, 0xB9, 0x33, 0xB0, 0x7E, 0x4F, 0xF7, 0x81, 0x8C, 0xE0, 0x59, 0xD0, 0x08, 0x82, 0x9E, 0x45, 0x3C, 0x6F, 0xF0, 0x2E, 0xC0, 0xA7, 0xDB, 0x39, 0x3F, 0xC2, 0xD8, 0x70, 0xF3, 0x7A, 0x72, 0x86 },
 | |
| +	{ 0x7C, 0xF7, 0xC5, 0x13, 0x31, 0x22, 0x0B, 0x8D, 0x3E, 0xBA, 0xED, 0x9C, 0x29, 0x39, 0x8A, 0x16, 0xD9, 0x81, 0x56, 0xE2, 0x61, 0x3C, 0xB0, 0x88, 0xF2, 0xB0, 0xE0, 0x8A, 0x1B, 0xE4, 0xCF, 0x4F },
 | |
| +	{ 0x3E, 0x41, 0xA1, 0x08, 0xE0, 0xF6, 0x4A, 0xD2, 0x76, 0xB9, 0x79, 0xE1, 0xCE, 0x06, 0x82, 0x79, 0xE1, 0x6F, 0x7B, 0xC7, 0xE4, 0xAA, 0x1D, 0x21, 0x1E, 0x17, 0xB8, 0x11, 0x61, 0xDF, 0x16, 0x02 },
 | |
| +	{ 0x88, 0x65, 0x02, 0xA8, 0x2A, 0xB4, 0x7B, 0xA8, 0xD8, 0x67, 0x10, 0xAA, 0x9D, 0xE3, 0xD4, 0x6E, 0xA6, 0x5C, 0x47, 0xAF, 0x6E, 0xE8, 0xDE, 0x45, 0x0C, 0xCE, 0xB8, 0xB1, 0x1B, 0x04, 0x5F, 0x50 },
 | |
| +	{ 0xC0, 0x21, 0xBC, 0x5F, 0x09, 0x54, 0xFE, 0xE9, 0x4F, 0x46, 0xEA, 0x09, 0x48, 0x7E, 0x10, 0xA8, 0x48, 0x40, 0xD0, 0x2F, 0x64, 0x81, 0x0B, 0xC0, 0x8D, 0x9E, 0x55, 0x1F, 0x7D, 0x41, 0x68, 0x14 },
 | |
| +	{ 0x20, 0x30, 0x51, 0x6E, 0x8A, 0x5F, 0xE1, 0x9A, 0xE7, 0x9C, 0x33, 0x6F, 0xCE, 0x26, 0x38, 0x2A, 0x74, 0x9D, 0x3F, 0xD0, 0xEC, 0x91, 0xE5, 0x37, 0xD4, 0xBD, 0x23, 0x58, 0xC1, 0x2D, 0xFB, 0x22 },
 | |
| +	{ 0x55, 0x66, 0x98, 0xDA, 0xC8, 0x31, 0x7F, 0xD3, 0x6D, 0xFB, 0xDF, 0x25, 0xA7, 0x9C, 0xB1, 0x12, 0xD5, 0x42, 0x58, 0x60, 0x60, 0x5C, 0xBA, 0xF5, 0x07, 0xF2, 0x3B, 0xF7, 0xE9, 0xF4, 0x2A, 0xFE },
 | |
| +	{ 0x2F, 0x86, 0x7B, 0xA6, 0x77, 0x73, 0xFD, 0xC3, 0xE9, 0x2F, 0xCE, 0xD9, 0x9A, 0x64, 0x09, 0xAD, 0x39, 0xD0, 0xB8, 0x80, 0xFD, 0xE8, 0xF1, 0x09, 0xA8, 0x17, 0x30, 0xC4, 0x45, 0x1D, 0x01, 0x78 },
 | |
| +	{ 0x17, 0x2E, 0xC2, 0x18, 0xF1, 0x19, 0xDF, 0xAE, 0x98, 0x89, 0x6D, 0xFF, 0x29, 0xDD, 0x98, 0x76, 0xC9, 0x4A, 0xF8, 0x74, 0x17, 0xF9, 0xAE, 0x4C, 0x70, 0x14, 0xBB, 0x4E, 0x4B, 0x96, 0xAF, 0xC7 },
 | |
| +	{ 0x3F, 0x85, 0x81, 0x4A, 0x18, 0x19, 0x5F, 0x87, 0x9A, 0xA9, 0x62, 0xF9, 0x5D, 0x26, 0xBD, 0x82, 0xA2, 0x78, 0xF2, 0xB8, 0x23, 0x20, 0x21, 0x8F, 0x6B, 0x3B, 0xD6, 0xF7, 0xF6, 0x67, 0xA6, 0xD9 },
 | |
| +	{ 0x1B, 0x61, 0x8F, 0xBA, 0xA5, 0x66, 0xB3, 0xD4, 0x98, 0xC1, 0x2E, 0x98, 0x2C, 0x9E, 0xC5, 0x2E, 0x4D, 0xA8, 0x5A, 0x8C, 0x54, 0xF3, 0x8F, 0x34, 0xC0, 0x90, 0x39, 0x4F, 0x23, 0xC1, 0x84, 0xC1 },
 | |
| +	{ 0x0C, 0x75, 0x8F, 0xB5, 0x69, 0x2F, 0xFD, 0x41, 0xA3, 0x57, 0x5D, 0x0A, 0xF0, 0x0C, 0xC7, 0xFB, 0xF2, 0xCB, 0xE5, 0x90, 0x5A, 0x58, 0x32, 0x3A, 0x88, 0xAE, 0x42, 0x44, 0xF6, 0xE4, 0xC9, 0x93 },
 | |
| +	{ 0xA9, 0x31, 0x36, 0x0C, 0xAD, 0x62, 0x8C, 0x7F, 0x12, 0xA6, 0xC1, 0xC4, 0xB7, 0x53, 0xB0, 0xF4, 0x06, 0x2A, 0xEF, 0x3C, 0xE6, 0x5A, 0x1A, 0xE3, 0xF1, 0x93, 0x69, 0xDA, 0xDF, 0x3A, 0xE2, 0x3D },
 | |
| +	{ 0xCB, 0xAC, 0x7D, 0x77, 0x3B, 0x1E, 0x3B, 0x3C, 0x66, 0x91, 0xD7, 0xAB, 0xB7, 0xE9, 0xDF, 0x04, 0x5C, 0x8B, 0xA1, 0x92, 0x68, 0xDE, 0xD1, 0x53, 0x20, 0x7F, 0x5E, 0x80, 0x43, 0x52, 0xEC, 0x5D },
 | |
| +	{ 0x23, 0xA1, 0x96, 0xD3, 0x80, 0x2E, 0xD3, 0xC1, 0xB3, 0x84, 0x01, 0x9A, 0x82, 0x32, 0x58, 0x40, 0xD3, 0x2F, 0x71, 0x95, 0x0C, 0x45, 0x80, 0xB0, 0x34, 0x45, 0xE0, 0x89, 0x8E, 0x14, 0x05, 0x3C },
 | |
| +	{ 0xF4, 0x49, 0x54, 0x70, 0xF2, 0x26, 0xC8, 0xC2, 0x14, 0xBE, 0x08, 0xFD, 0xFA, 0xD4, 0xBC, 0x4A, 0x2A, 0x9D, 0xBE, 0xA9, 0x13, 0x6A, 0x21, 0x0D, 0xF0, 0xD4, 0xB6, 0x49, 0x29, 0xE6, 0xFC, 0x14 },
 | |
| +	{ 0xE2, 0x90, 0xDD, 0x27, 0x0B, 0x46, 0x7F, 0x34, 0xAB, 0x1C, 0x00, 0x2D, 0x34, 0x0F, 0xA0, 0x16, 0x25, 0x7F, 0xF1, 0x9E, 0x58, 0x33, 0xFD, 0xBB, 0xF2, 0xCB, 0x40, 0x1C, 0x3B, 0x28, 0x17, 0xDE },
 | |
| +	{ 0x9F, 0xC7, 0xB5, 0xDE, 0xD3, 0xC1, 0x50, 0x42, 0xB2, 0xA6, 0x58, 0x2D, 0xC3, 0x9B, 0xE0, 0x16, 0xD2, 0x4A, 0x68, 0x2D, 0x5E, 0x61, 0xAD, 0x1E, 0xFF, 0x9C, 0x63, 0x30, 0x98, 0x48, 0xF7, 0x06 },
 | |
| +	{ 0x8C, 0xCA, 0x67, 0xA3, 0x6D, 0x17, 0xD5, 0xE6, 0x34, 0x1C, 0xB5, 0x92, 0xFD, 0x7B, 0xEF, 0x99, 0x26, 0xC9, 0xE3, 0xAA, 0x10, 0x27, 0xEA, 0x11, 0xA7, 0xD8, 0xBD, 0x26, 0x0B, 0x57, 0x6E, 0x04 },
 | |
| +	{ 0x40, 0x93, 0x92, 0xF5, 0x60, 0xF8, 0x68, 0x31, 0xDA, 0x43, 0x73, 0xEE, 0x5E, 0x00, 0x74, 0x26, 0x05, 0x95, 0xD7, 0xBC, 0x24, 0x18, 0x3B, 0x60, 0xED, 0x70, 0x0D, 0x45, 0x83, 0xD3, 0xF6, 0xF0 },
 | |
| +	{ 0x28, 0x02, 0x16, 0x5D, 0xE0, 0x90, 0x91, 0x55, 0x46, 0xF3, 0x39, 0x8C, 0xD8, 0x49, 0x16, 0x4A, 0x19, 0xF9, 0x2A, 0xDB, 0xC3, 0x61, 0xAD, 0xC9, 0x9B, 0x0F, 0x20, 0xC8, 0xEA, 0x07, 0x10, 0x54 },
 | |
| +	{ 0xAD, 0x83, 0x91, 0x68, 0xD9, 0xF8, 0xA4, 0xBE, 0x95, 0xBA, 0x9E, 0xF9, 0xA6, 0x92, 0xF0, 0x72, 0x56, 0xAE, 0x43, 0xFE, 0x6F, 0x98, 0x64, 0xE2, 0x90, 0x69, 0x1B, 0x02, 0x56, 0xCE, 0x50, 0xA9 },
 | |
| +	{ 0x75, 0xFD, 0xAA, 0x50, 0x38, 0xC2, 0x84, 0xB8, 0x6D, 0x6E, 0x8A, 0xFF, 0xE8, 0xB2, 0x80, 0x7E, 0x46, 0x7B, 0x86, 0x60, 0x0E, 0x79, 0xAF, 0x36, 0x89, 0xFB, 0xC0, 0x63, 0x28, 0xCB, 0xF8, 0x94 },
 | |
| +	{ 0xE5, 0x7C, 0xB7, 0x94, 0x87, 0xDD, 0x57, 0x90, 0x24, 0x32, 0xB2, 0x50, 0x73, 0x38, 0x13, 0xBD, 0x96, 0xA8, 0x4E, 0xFC, 0xE5, 0x9F, 0x65, 0x0F, 0xAC, 0x26, 0xE6, 0x69, 0x6A, 0xEF, 0xAF, 0xC3 },
 | |
| +	{ 0x56, 0xF3, 0x4E, 0x8B, 0x96, 0x55, 0x7E, 0x90, 0xC1, 0xF2, 0x4B, 0x52, 0xD0, 0xC8, 0x9D, 0x51, 0x08, 0x6A, 0xCF, 0x1B, 0x00, 0xF6, 0x34, 0xCF, 0x1D, 0xDE, 0x92, 0x33, 0xB8, 0xEA, 0xAA, 0x3E },
 | |
| +	{ 0x1B, 0x53, 0xEE, 0x94, 0xAA, 0xF3, 0x4E, 0x4B, 0x15, 0x9D, 0x48, 0xDE, 0x35, 0x2C, 0x7F, 0x06, 0x61, 0xD0, 0xA4, 0x0E, 0xDF, 0xF9, 0x5A, 0x0B, 0x16, 0x39, 0xB4, 0x09, 0x0E, 0x97, 0x44, 0x72 },
 | |
| +	{ 0x05, 0x70, 0x5E, 0x2A, 0x81, 0x75, 0x7C, 0x14, 0xBD, 0x38, 0x3E, 0xA9, 0x8D, 0xDA, 0x54, 0x4E, 0xB1, 0x0E, 0x6B, 0xC0, 0x7B, 0xAE, 0x43, 0x5E, 0x25, 0x18, 0xDB, 0xE1, 0x33, 0x52, 0x53, 0x75 },
 | |
| +	{ 0xD8, 0xB2, 0x86, 0x6E, 0x8A, 0x30, 0x9D, 0xB5, 0x3E, 0x52, 0x9E, 0xC3, 0x29, 0x11, 0xD8, 0x2F, 0x5C, 0xA1, 0x6C, 0xFF, 0x76, 0x21, 0x68, 0x91, 0xA9, 0x67, 0x6A, 0xA3, 0x1A, 0xAA, 0x6C, 0x42 },
 | |
| +	{ 0xF5, 0x04, 0x1C, 0x24, 0x12, 0x70, 0xEB, 0x04, 0xC7, 0x1E, 0xC2, 0xC9, 0x5D, 0x4C, 0x38, 0xD8, 0x03, 0xB1, 0x23, 0x7B, 0x0F, 0x29, 0xFD, 0x4D, 0xB3, 0xEB, 0x39, 0x76, 0x69, 0xE8, 0x86, 0x99 },
 | |
| +	{ 0x9A, 0x4C, 0xE0, 0x77, 0xC3, 0x49, 0x32, 0x2F, 0x59, 0x5E, 0x0E, 0xE7, 0x9E, 0xD0, 0xDA, 0x5F, 0xAB, 0x66, 0x75, 0x2C, 0xBF, 0xEF, 0x8F, 0x87, 0xD0, 0xE9, 0xD0, 0x72, 0x3C, 0x75, 0x30, 0xDD },
 | |
| +	{ 0x65, 0x7B, 0x09, 0xF3, 0xD0, 0xF5, 0x2B, 0x5B, 0x8F, 0x2F, 0x97, 0x16, 0x3A, 0x0E, 0xDF, 0x0C, 0x04, 0xF0, 0x75, 0x40, 0x8A, 0x07, 0xBB, 0xEB, 0x3A, 0x41, 0x01, 0xA8, 0x91, 0x99, 0x0D, 0x62 },
 | |
| +	{ 0x1E, 0x3F, 0x7B, 0xD5, 0xA5, 0x8F, 0xA5, 0x33, 0x34, 0x4A, 0xA8, 0xED, 0x3A, 0xC1, 0x22, 0xBB, 0x9E, 0x70, 0xD4, 0xEF, 0x50, 0xD0, 0x04, 0x53, 0x08, 0x21, 0x94, 0x8F, 0x5F, 0xE6, 0x31, 0x5A },
 | |
| +	{ 0x80, 0xDC, 0xCF, 0x3F, 0xD8, 0x3D, 0xFD, 0x0D, 0x35, 0xAA, 0x28, 0x58, 0x59, 0x22, 0xAB, 0x89, 0xD5, 0x31, 0x39, 0x97, 0x67, 0x3E, 0xAF, 0x90, 0x5C, 0xEA, 0x9C, 0x0B, 0x22, 0x5C, 0x7B, 0x5F },
 | |
| +	{ 0x8A, 0x0D, 0x0F, 0xBF, 0x63, 0x77, 0xD8, 0x3B, 0xB0, 0x8B, 0x51, 0x4B, 0x4B, 0x1C, 0x43, 0xAC, 0xC9, 0x5D, 0x75, 0x17, 0x14, 0xF8, 0x92, 0x56, 0x45, 0xCB, 0x6B, 0xC8, 0x56, 0xCA, 0x15, 0x0A },
 | |
| +	{ 0x9F, 0xA5, 0xB4, 0x87, 0x73, 0x8A, 0xD2, 0x84, 0x4C, 0xC6, 0x34, 0x8A, 0x90, 0x19, 0x18, 0xF6, 0x59, 0xA3, 0xB8, 0x9E, 0x9C, 0x0D, 0xFE, 0xEA, 0xD3, 0x0D, 0xD9, 0x4B, 0xCF, 0x42, 0xEF, 0x8E },
 | |
| +	{ 0x80, 0x83, 0x2C, 0x4A, 0x16, 0x77, 0xF5, 0xEA, 0x25, 0x60, 0xF6, 0x68, 0xE9, 0x35, 0x4D, 0xD3, 0x69, 0x97, 0xF0, 0x37, 0x28, 0xCF, 0xA5, 0x5E, 0x1B, 0x38, 0x33, 0x7C, 0x0C, 0x9E, 0xF8, 0x18 },
 | |
| +	{ 0xAB, 0x37, 0xDD, 0xB6, 0x83, 0x13, 0x7E, 0x74, 0x08, 0x0D, 0x02, 0x6B, 0x59, 0x0B, 0x96, 0xAE, 0x9B, 0xB4, 0x47, 0x72, 0x2F, 0x30, 0x5A, 0x5A, 0xC5, 0x70, 0xEC, 0x1D, 0xF9, 0xB1, 0x74, 0x3C },
 | |
| +	{ 0x3E, 0xE7, 0x35, 0xA6, 0x94, 0xC2, 0x55, 0x9B, 0x69, 0x3A, 0xA6, 0x86, 0x29, 0x36, 0x1E, 0x15, 0xD1, 0x22, 0x65, 0xAD, 0x6A, 0x3D, 0xED, 0xF4, 0x88, 0xB0, 0xB0, 0x0F, 0xAC, 0x97, 0x54, 0xBA },
 | |
| +	{ 0xD6, 0xFC, 0xD2, 0x32, 0x19, 0xB6, 0x47, 0xE4, 0xCB, 0xD5, 0xEB, 0x2D, 0x0A, 0xD0, 0x1E, 0xC8, 0x83, 0x8A, 0x4B, 0x29, 0x01, 0xFC, 0x32, 0x5C, 0xC3, 0x70, 0x19, 0x81, 0xCA, 0x6C, 0x88, 0x8B },
 | |
| +	{ 0x05, 0x20, 0xEC, 0x2F, 0x5B, 0xF7, 0xA7, 0x55, 0xDA, 0xCB, 0x50, 0xC6, 0xBF, 0x23, 0x3E, 0x35, 0x15, 0x43, 0x47, 0x63, 0xDB, 0x01, 0x39, 0xCC, 0xD9, 0xFA, 0xEF, 0xBB, 0x82, 0x07, 0x61, 0x2D },
 | |
| +	{ 0xAF, 0xF3, 0xB7, 0x5F, 0x3F, 0x58, 0x12, 0x64, 0xD7, 0x66, 0x16, 0x62, 0xB9, 0x2F, 0x5A, 0xD3, 0x7C, 0x1D, 0x32, 0xBD, 0x45, 0xFF, 0x81, 0xA4, 0xED, 0x8A, 0xDC, 0x9E, 0xF3, 0x0D, 0xD9, 0x89 },
 | |
| +	{ 0xD0, 0xDD, 0x65, 0x0B, 0xEF, 0xD3, 0xBA, 0x63, 0xDC, 0x25, 0x10, 0x2C, 0x62, 0x7C, 0x92, 0x1B, 0x9C, 0xBE, 0xB0, 0xB1, 0x30, 0x68, 0x69, 0x35, 0xB5, 0xC9, 0x27, 0xCB, 0x7C, 0xCD, 0x5E, 0x3B },
 | |
| +	{ 0xE1, 0x14, 0x98, 0x16, 0xB1, 0x0A, 0x85, 0x14, 0xFB, 0x3E, 0x2C, 0xAB, 0x2C, 0x08, 0xBE, 0xE9, 0xF7, 0x3C, 0xE7, 0x62, 0x21, 0x70, 0x12, 0x46, 0xA5, 0x89, 0xBB, 0xB6, 0x73, 0x02, 0xD8, 0xA9 },
 | |
| +	{ 0x7D, 0xA3, 0xF4, 0x41, 0xDE, 0x90, 0x54, 0x31, 0x7E, 0x72, 0xB5, 0xDB, 0xF9, 0x79, 0xDA, 0x01, 0xE6, 0xBC, 0xEE, 0xBB, 0x84, 0x78, 0xEA, 0xE6, 0xA2, 0x28, 0x49, 0xD9, 0x02, 0x92, 0x63, 0x5C },
 | |
| +	{ 0x12, 0x30, 0xB1, 0xFC, 0x8A, 0x7D, 0x92, 0x15, 0xED, 0xC2, 0xD4, 0xA2, 0xDE, 0xCB, 0xDD, 0x0A, 0x6E, 0x21, 0x6C, 0x92, 0x42, 0x78, 0xC9, 0x1F, 0xC5, 0xD1, 0x0E, 0x7D, 0x60, 0x19, 0x2D, 0x94 },
 | |
| +	{ 0x57, 0x50, 0xD7, 0x16, 0xB4, 0x80, 0x8F, 0x75, 0x1F, 0xEB, 0xC3, 0x88, 0x06, 0xBA, 0x17, 0x0B, 0xF6, 0xD5, 0x19, 0x9A, 0x78, 0x16, 0xBE, 0x51, 0x4E, 0x3F, 0x93, 0x2F, 0xBE, 0x0C, 0xB8, 0x71 },
 | |
| +	{ 0x6F, 0xC5, 0x9B, 0x2F, 0x10, 0xFE, 0xBA, 0x95, 0x4A, 0xA6, 0x82, 0x0B, 0x3C, 0xA9, 0x87, 0xEE, 0x81, 0xD5, 0xCC, 0x1D, 0xA3, 0xC6, 0x3C, 0xE8, 0x27, 0x30, 0x1C, 0x56, 0x9D, 0xFB, 0x39, 0xCE },
 | |
| +	{ 0xC7, 0xC3, 0xFE, 0x1E, 0xEB, 0xDC, 0x7B, 0x5A, 0x93, 0x93, 0x26, 0xE8, 0xDD, 0xB8, 0x3E, 0x8B, 0xF2, 0xB7, 0x80, 0xB6, 0x56, 0x78, 0xCB, 0x62, 0xF2, 0x08, 0xB0, 0x40, 0xAB, 0xDD, 0x35, 0xE2 },
 | |
| +	{ 0x0C, 0x75, 0xC1, 0xA1, 0x5C, 0xF3, 0x4A, 0x31, 0x4E, 0xE4, 0x78, 0xF4, 0xA5, 0xCE, 0x0B, 0x8A, 0x6B, 0x36, 0x52, 0x8E, 0xF7, 0xA8, 0x20, 0x69, 0x6C, 0x3E, 0x42, 0x46, 0xC5, 0xA1, 0x58, 0x64 },
 | |
| +	{ 0x21, 0x6D, 0xC1, 0x2A, 0x10, 0x85, 0x69, 0xA3, 0xC7, 0xCD, 0xDE, 0x4A, 0xED, 0x43, 0xA6, 0xC3, 0x30, 0x13, 0x9D, 0xDA, 0x3C, 0xCC, 0x4A, 0x10, 0x89, 0x05, 0xDB, 0x38, 0x61, 0x89, 0x90, 0x50 },
 | |
| +	{ 0xA5, 0x7B, 0xE6, 0xAE, 0x67, 0x56, 0xF2, 0x8B, 0x02, 0xF5, 0x9D, 0xAD, 0xF7, 0xE0, 0xD7, 0xD8, 0x80, 0x7F, 0x10, 0xFA, 0x15, 0xCE, 0xD1, 0xAD, 0x35, 0x85, 0x52, 0x1A, 0x1D, 0x99, 0x5A, 0x89 },
 | |
| +	{ 0x81, 0x6A, 0xEF, 0x87, 0x59, 0x53, 0x71, 0x6C, 0xD7, 0xA5, 0x81, 0xF7, 0x32, 0xF5, 0x3D, 0xD4, 0x35, 0xDA, 0xB6, 0x6D, 0x09, 0xC3, 0x61, 0xD2, 0xD6, 0x59, 0x2D, 0xE1, 0x77, 0x55, 0xD8, 0xA8 },
 | |
| +	{ 0x9A, 0x76, 0x89, 0x32, 0x26, 0x69, 0x3B, 0x6E, 0xA9, 0x7E, 0x6A, 0x73, 0x8F, 0x9D, 0x10, 0xFB, 0x3D, 0x0B, 0x43, 0xAE, 0x0E, 0x8B, 0x7D, 0x81, 0x23, 0xEA, 0x76, 0xCE, 0x97, 0x98, 0x9C, 0x7E },
 | |
| +	{ 0x8D, 0xAE, 0xDB, 0x9A, 0x27, 0x15, 0x29, 0xDB, 0xB7, 0xDC, 0x3B, 0x60, 0x7F, 0xE5, 0xEB, 0x2D, 0x32, 0x11, 0x77, 0x07, 0x58, 0xDD, 0x3B, 0x0A, 0x35, 0x93, 0xD2, 0xD7, 0x95, 0x4E, 0x2D, 0x5B },
 | |
| +	{ 0x16, 0xDB, 0xC0, 0xAA, 0x5D, 0xD2, 0xC7, 0x74, 0xF5, 0x05, 0x10, 0x0F, 0x73, 0x37, 0x86, 0xD8, 0xA1, 0x75, 0xFC, 0xBB, 0xB5, 0x9C, 0x43, 0xE1, 0xFB, 0xFF, 0x3E, 0x1E, 0xAF, 0x31, 0xCB, 0x4A },
 | |
| +	{ 0x86, 0x06, 0xCB, 0x89, 0x9C, 0x6A, 0xEA, 0xF5, 0x1B, 0x9D, 0xB0, 0xFE, 0x49, 0x24, 0xA9, 0xFD, 0x5D, 0xAB, 0xC1, 0x9F, 0x88, 0x26, 0xF2, 0xBC, 0x1C, 0x1D, 0x7D, 0xA1, 0x4D, 0x2C, 0x2C, 0x99 },
 | |
| +	{ 0x84, 0x79, 0x73, 0x1A, 0xED, 0xA5, 0x7B, 0xD3, 0x7E, 0xAD, 0xB5, 0x1A, 0x50, 0x7E, 0x30, 0x7F, 0x3B, 0xD9, 0x5E, 0x69, 0xDB, 0xCA, 0x94, 0xF3, 0xBC, 0x21, 0x72, 0x60, 0x66, 0xAD, 0x6D, 0xFD },
 | |
| +	{ 0x58, 0x47, 0x3A, 0x9E, 0xA8, 0x2E, 0xFA, 0x3F, 0x3B, 0x3D, 0x8F, 0xC8, 0x3E, 0xD8, 0x86, 0x31, 0x27, 0xB3, 0x3A, 0xE8, 0xDE, 0xAE, 0x63, 0x07, 0x20, 0x1E, 0xDB, 0x6D, 0xDE, 0x61, 0xDE, 0x29 },
 | |
| +	{ 0x9A, 0x92, 0x55, 0xD5, 0x3A, 0xF1, 0x16, 0xDE, 0x8B, 0xA2, 0x7C, 0xE3, 0x5B, 0x4C, 0x7E, 0x15, 0x64, 0x06, 0x57, 0xA0, 0xFC, 0xB8, 0x88, 0xC7, 0x0D, 0x95, 0x43, 0x1D, 0xAC, 0xD8, 0xF8, 0x30 },
 | |
| +	{ 0x9E, 0xB0, 0x5F, 0xFB, 0xA3, 0x9F, 0xD8, 0x59, 0x6A, 0x45, 0x49, 0x3E, 0x18, 0xD2, 0x51, 0x0B, 0xF3, 0xEF, 0x06, 0x5C, 0x51, 0xD6, 0xE1, 0x3A, 0xBE, 0x66, 0xAA, 0x57, 0xE0, 0x5C, 0xFD, 0xB7 },
 | |
| +	{ 0x81, 0xDC, 0xC3, 0xA5, 0x05, 0xEA, 0xCE, 0x3F, 0x87, 0x9D, 0x8F, 0x70, 0x27, 0x76, 0x77, 0x0F, 0x9D, 0xF5, 0x0E, 0x52, 0x1D, 0x14, 0x28, 0xA8, 0x5D, 0xAF, 0x04, 0xF9, 0xAD, 0x21, 0x50, 0xE0 },
 | |
| +	{ 0xE3, 0xE3, 0xC4, 0xAA, 0x3A, 0xCB, 0xBC, 0x85, 0x33, 0x2A, 0xF9, 0xD5, 0x64, 0xBC, 0x24, 0x16, 0x5E, 0x16, 0x87, 0xF6, 0xB1, 0xAD, 0xCB, 0xFA, 0xE7, 0x7A, 0x8F, 0x03, 0xC7, 0x2A, 0xC2, 0x8C },
 | |
| +	{ 0x67, 0x46, 0xC8, 0x0B, 0x4E, 0xB5, 0x6A, 0xEA, 0x45, 0xE6, 0x4E, 0x72, 0x89, 0xBB, 0xA3, 0xED, 0xBF, 0x45, 0xEC, 0xF8, 0x20, 0x64, 0x81, 0xFF, 0x63, 0x02, 0x12, 0x29, 0x84, 0xCD, 0x52, 0x6A },
 | |
| +	{ 0x2B, 0x62, 0x8E, 0x52, 0x76, 0x4D, 0x7D, 0x62, 0xC0, 0x86, 0x8B, 0x21, 0x23, 0x57, 0xCD, 0xD1, 0x2D, 0x91, 0x49, 0x82, 0x2F, 0x4E, 0x98, 0x45, 0xD9, 0x18, 0xA0, 0x8D, 0x1A, 0xE9, 0x90, 0xC0 },
 | |
| +	{ 0xE4, 0xBF, 0xE8, 0x0D, 0x58, 0xC9, 0x19, 0x94, 0x61, 0x39, 0x09, 0xDC, 0x4B, 0x1A, 0x12, 0x49, 0x68, 0x96, 0xC0, 0x04, 0xAF, 0x7B, 0x57, 0x01, 0x48, 0x3D, 0xE4, 0x5D, 0x28, 0x23, 0xD7, 0x8E },
 | |
| +	{ 0xEB, 0xB4, 0xBA, 0x15, 0x0C, 0xEF, 0x27, 0x34, 0x34, 0x5B, 0x5D, 0x64, 0x1B, 0xBE, 0xD0, 0x3A, 0x21, 0xEA, 0xFA, 0xE9, 0x33, 0xC9, 0x9E, 0x00, 0x92, 0x12, 0xEF, 0x04, 0x57, 0x4A, 0x85, 0x30 },
 | |
| +	{ 0x39, 0x66, 0xEC, 0x73, 0xB1, 0x54, 0xAC, 0xC6, 0x97, 0xAC, 0x5C, 0xF5, 0xB2, 0x4B, 0x40, 0xBD, 0xB0, 0xDB, 0x9E, 0x39, 0x88, 0x36, 0xD7, 0x6D, 0x4B, 0x88, 0x0E, 0x3B, 0x2A, 0xF1, 0xAA, 0x27 },
 | |
| +	{ 0xEF, 0x7E, 0x48, 0x31, 0xB3, 0xA8, 0x46, 0x36, 0x51, 0x8D, 0x6E, 0x4B, 0xFC, 0xE6, 0x4A, 0x43, 0xDB, 0x2A, 0x5D, 0xDA, 0x9C, 0xCA, 0x2B, 0x44, 0xF3, 0x90, 0x33, 0xBD, 0xC4, 0x0D, 0x62, 0x43 },
 | |
| +	{ 0x7A, 0xBF, 0x6A, 0xCF, 0x5C, 0x8E, 0x54, 0x9D, 0xDB, 0xB1, 0x5A, 0xE8, 0xD8, 0xB3, 0x88, 0xC1, 0xC1, 0x97, 0xE6, 0x98, 0x73, 0x7C, 0x97, 0x85, 0x50, 0x1E, 0xD1, 0xF9, 0x49, 0x30, 0xB7, 0xD9 },
 | |
| +	{ 0x88, 0x01, 0x8D, 0xED, 0x66, 0x81, 0x3F, 0x0C, 0xA9, 0x5D, 0xEF, 0x47, 0x4C, 0x63, 0x06, 0x92, 0x01, 0x99, 0x67, 0xB9, 0xE3, 0x68, 0x88, 0xDA, 0xDD, 0x94, 0x12, 0x47, 0x19, 0xB6, 0x82, 0xF6 },
 | |
| +	{ 0x39, 0x30, 0x87, 0x6B, 0x9F, 0xC7, 0x52, 0x90, 0x36, 0xB0, 0x08, 0xB1, 0xB8, 0xBB, 0x99, 0x75, 0x22, 0xA4, 0x41, 0x63, 0x5A, 0x0C, 0x25, 0xEC, 0x02, 0xFB, 0x6D, 0x90, 0x26, 0xE5, 0x5A, 0x97 },
 | |
| +	{ 0x0A, 0x40, 0x49, 0xD5, 0x7E, 0x83, 0x3B, 0x56, 0x95, 0xFA, 0xC9, 0x3D, 0xD1, 0xFB, 0xEF, 0x31, 0x66, 0xB4, 0x4B, 0x12, 0xAD, 0x11, 0x24, 0x86, 0x62, 0x38, 0x3A, 0xE0, 0x51, 0xE1, 0x58, 0x27 },
 | |
| +	{ 0x81, 0xDC, 0xC0, 0x67, 0x8B, 0xB6, 0xA7, 0x65, 0xE4, 0x8C, 0x32, 0x09, 0x65, 0x4F, 0xE9, 0x00, 0x89, 0xCE, 0x44, 0xFF, 0x56, 0x18, 0x47, 0x7E, 0x39, 0xAB, 0x28, 0x64, 0x76, 0xDF, 0x05, 0x2B },
 | |
| +	{ 0xE6, 0x9B, 0x3A, 0x36, 0xA4, 0x46, 0x19, 0x12, 0xDC, 0x08, 0x34, 0x6B, 0x11, 0xDD, 0xCB, 0x9D, 0xB7, 0x96, 0xF8, 0x85, 0xFD, 0x01, 0x93, 0x6E, 0x66, 0x2F, 0xE2, 0x92, 0x97, 0xB0, 0x99, 0xA4 },
 | |
| +	{ 0x5A, 0xC6, 0x50, 0x3B, 0x0D, 0x8D, 0xA6, 0x91, 0x76, 0x46, 0xE6, 0xDC, 0xC8, 0x7E, 0xDC, 0x58, 0xE9, 0x42, 0x45, 0x32, 0x4C, 0xC2, 0x04, 0xF4, 0xDD, 0x4A, 0xF0, 0x15, 0x63, 0xAC, 0xD4, 0x27 },
 | |
| +	{ 0xDF, 0x6D, 0xDA, 0x21, 0x35, 0x9A, 0x30, 0xBC, 0x27, 0x17, 0x80, 0x97, 0x1C, 0x1A, 0xBD, 0x56, 0xA6, 0xEF, 0x16, 0x7E, 0x48, 0x08, 0x87, 0x88, 0x8E, 0x73, 0xA8, 0x6D, 0x3B, 0xF6, 0x05, 0xE9 },
 | |
| +	{ 0xE8, 0xE6, 0xE4, 0x70, 0x71, 0xE7, 0xB7, 0xDF, 0x25, 0x80, 0xF2, 0x25, 0xCF, 0xBB, 0xED, 0xF8, 0x4C, 0xE6, 0x77, 0x46, 0x62, 0x66, 0x28, 0xD3, 0x30, 0x97, 0xE4, 0xB7, 0xDC, 0x57, 0x11, 0x07 },
 | |
| +	{ 0x53, 0xE4, 0x0E, 0xAD, 0x62, 0x05, 0x1E, 0x19, 0xCB, 0x9B, 0xA8, 0x13, 0x3E, 0x3E, 0x5C, 0x1C, 0xE0, 0x0D, 0xDC, 0xAD, 0x8A, 0xCF, 0x34, 0x2A, 0x22, 0x43, 0x60, 0xB0, 0xAC, 0xC1, 0x47, 0x77 },
 | |
| +	{ 0x9C, 0xCD, 0x53, 0xFE, 0x80, 0xBE, 0x78, 0x6A, 0xA9, 0x84, 0x63, 0x84, 0x62, 0xFB, 0x28, 0xAF, 0xDF, 0x12, 0x2B, 0x34, 0xD7, 0x8F, 0x46, 0x87, 0xEC, 0x63, 0x2B, 0xB1, 0x9D, 0xE2, 0x37, 0x1A },
 | |
| +	{ 0xCB, 0xD4, 0x80, 0x52, 0xC4, 0x8D, 0x78, 0x84, 0x66, 0xA3, 0xE8, 0x11, 0x8C, 0x56, 0xC9, 0x7F, 0xE1, 0x46, 0xE5, 0x54, 0x6F, 0xAA, 0xF9, 0x3E, 0x2B, 0xC3, 0xC4, 0x7E, 0x45, 0x93, 0x97, 0x53 },
 | |
| +	{ 0x25, 0x68, 0x83, 0xB1, 0x4E, 0x2A, 0xF4, 0x4D, 0xAD, 0xB2, 0x8E, 0x1B, 0x34, 0xB2, 0xAC, 0x0F, 0x0F, 0x4C, 0x91, 0xC3, 0x4E, 0xC9, 0x16, 0x9E, 0x29, 0x03, 0x61, 0x58, 0xAC, 0xAA, 0x95, 0xB9 },
 | |
| +	{ 0x44, 0x71, 0xB9, 0x1A, 0xB4, 0x2D, 0xB7, 0xC4, 0xDD, 0x84, 0x90, 0xAB, 0x95, 0xA2, 0xEE, 0x8D, 0x04, 0xE3, 0xEF, 0x5C, 0x3D, 0x6F, 0xC7, 0x1A, 0xC7, 0x4B, 0x2B, 0x26, 0x91, 0x4D, 0x16, 0x41 },
 | |
| +	{ 0xA5, 0xEB, 0x08, 0x03, 0x8F, 0x8F, 0x11, 0x55, 0xED, 0x86, 0xE6, 0x31, 0x90, 0x6F, 0xC1, 0x30, 0x95, 0xF6, 0xBB, 0xA4, 0x1D, 0xE5, 0xD4, 0xE7, 0x95, 0x75, 0x8E, 0xC8, 0xC8, 0xDF, 0x8A, 0xF1 },
 | |
| +	{ 0xDC, 0x1D, 0xB6, 0x4E, 0xD8, 0xB4, 0x8A, 0x91, 0x0E, 0x06, 0x0A, 0x6B, 0x86, 0x63, 0x74, 0xC5, 0x78, 0x78, 0x4E, 0x9A, 0xC4, 0x9A, 0xB2, 0x77, 0x40, 0x92, 0xAC, 0x71, 0x50, 0x19, 0x34, 0xAC },
 | |
| +	{ 0x28, 0x54, 0x13, 0xB2, 0xF2, 0xEE, 0x87, 0x3D, 0x34, 0x31, 0x9E, 0xE0, 0xBB, 0xFB, 0xB9, 0x0F, 0x32, 0xDA, 0x43, 0x4C, 0xC8, 0x7E, 0x3D, 0xB5, 0xED, 0x12, 0x1B, 0xB3, 0x98, 0xED, 0x96, 0x4B },
 | |
| +	{ 0x02, 0x16, 0xE0, 0xF8, 0x1F, 0x75, 0x0F, 0x26, 0xF1, 0x99, 0x8B, 0xC3, 0x93, 0x4E, 0x3E, 0x12, 0x4C, 0x99, 0x45, 0xE6, 0x85, 0xA6, 0x0B, 0x25, 0xE8, 0xFB, 0xD9, 0x62, 0x5A, 0xB6, 0xB5, 0x99 },
 | |
| +	{ 0x38, 0xC4, 0x10, 0xF5, 0xB9, 0xD4, 0x07, 0x20, 0x50, 0x75, 0x5B, 0x31, 0xDC, 0xA8, 0x9F, 0xD5, 0x39, 0x5C, 0x67, 0x85, 0xEE, 0xB3, 0xD7, 0x90, 0xF3, 0x20, 0xFF, 0x94, 0x1C, 0x5A, 0x93, 0xBF },
 | |
| +	{ 0xF1, 0x84, 0x17, 0xB3, 0x9D, 0x61, 0x7A, 0xB1, 0xC1, 0x8F, 0xDF, 0x91, 0xEB, 0xD0, 0xFC, 0x6D, 0x55, 0x16, 0xBB, 0x34, 0xCF, 0x39, 0x36, 0x40, 0x37, 0xBC, 0xE8, 0x1F, 0xA0, 0x4C, 0xEC, 0xB1 },
 | |
| +	{ 0x1F, 0xA8, 0x77, 0xDE, 0x67, 0x25, 0x9D, 0x19, 0x86, 0x3A, 0x2A, 0x34, 0xBC, 0xC6, 0x96, 0x2A, 0x2B, 0x25, 0xFC, 0xBF, 0x5C, 0xBE, 0xCD, 0x7E, 0xDE, 0x8F, 0x1F, 0xA3, 0x66, 0x88, 0xA7, 0x96 },
 | |
| +	{ 0x5B, 0xD1, 0x69, 0xE6, 0x7C, 0x82, 0xC2, 0xC2, 0xE9, 0x8E, 0xF7, 0x00, 0x8B, 0xDF, 0x26, 0x1F, 0x2D, 0xDF, 0x30, 0xB1, 0xC0, 0x0F, 0x9E, 0x7F, 0x27, 0x5B, 0xB3, 0xE8, 0xA2, 0x8D, 0xC9, 0xA2 },
 | |
| +	{ 0xC8, 0x0A, 0xBE, 0xEB, 0xB6, 0x69, 0xAD, 0x5D, 0xEE, 0xB5, 0xF5, 0xEC, 0x8E, 0xA6, 0xB7, 0xA0, 0x5D, 0xDF, 0x7D, 0x31, 0xEC, 0x4C, 0x0A, 0x2E, 0xE2, 0x0B, 0x0B, 0x98, 0xCA, 0xEC, 0x67, 0x46 },
 | |
| +	{ 0xE7, 0x6D, 0x3F, 0xBD, 0xA5, 0xBA, 0x37, 0x4E, 0x6B, 0xF8, 0xE5, 0x0F, 0xAD, 0xC3, 0xBB, 0xB9, 0xBA, 0x5C, 0x20, 0x6E, 0xBD, 0xEC, 0x89, 0xA3, 0xA5, 0x4C, 0xF3, 0xDD, 0x84, 0xA0, 0x70, 0x16 },
 | |
| +	{ 0x7B, 0xBA, 0x9D, 0xC5, 0xB5, 0xDB, 0x20, 0x71, 0xD1, 0x77, 0x52, 0xB1, 0x04, 0x4C, 0x1E, 0xCE, 0xD9, 0x6A, 0xAF, 0x2D, 0xD4, 0x6E, 0x9B, 0x43, 0x37, 0x50, 0xE8, 0xEA, 0x0D, 0xCC, 0x18, 0x70 },
 | |
| +	{ 0xF2, 0x9B, 0x1B, 0x1A, 0xB9, 0xBA, 0xB1, 0x63, 0x01, 0x8E, 0xE3, 0xDA, 0x15, 0x23, 0x2C, 0xCA, 0x78, 0xEC, 0x52, 0xDB, 0xC3, 0x4E, 0xDA, 0x5B, 0x82, 0x2E, 0xC1, 0xD8, 0x0F, 0xC2, 0x1B, 0xD0 },
 | |
| +	{ 0x9E, 0xE3, 0xE3, 0xE7, 0xE9, 0x00, 0xF1, 0xE1, 0x1D, 0x30, 0x8C, 0x4B, 0x2B, 0x30, 0x76, 0xD2, 0x72, 0xCF, 0x70, 0x12, 0x4F, 0x9F, 0x51, 0xE1, 0xDA, 0x60, 0xF3, 0x78, 0x46, 0xCD, 0xD2, 0xF4 },
 | |
| +	{ 0x70, 0xEA, 0x3B, 0x01, 0x76, 0x92, 0x7D, 0x90, 0x96, 0xA1, 0x85, 0x08, 0xCD, 0x12, 0x3A, 0x29, 0x03, 0x25, 0x92, 0x0A, 0x9D, 0x00, 0xA8, 0x9B, 0x5D, 0xE0, 0x42, 0x73, 0xFB, 0xC7, 0x6B, 0x85 },
 | |
| +	{ 0x67, 0xDE, 0x25, 0xC0, 0x2A, 0x4A, 0xAB, 0xA2, 0x3B, 0xDC, 0x97, 0x3C, 0x8B, 0xB0, 0xB5, 0x79, 0x6D, 0x47, 0xCC, 0x06, 0x59, 0xD4, 0x3D, 0xFF, 0x1F, 0x97, 0xDE, 0x17, 0x49, 0x63, 0xB6, 0x8E },
 | |
| +	{ 0xB2, 0x16, 0x8E, 0x4E, 0x0F, 0x18, 0xB0, 0xE6, 0x41, 0x00, 0xB5, 0x17, 0xED, 0x95, 0x25, 0x7D, 0x73, 0xF0, 0x62, 0x0D, 0xF8, 0x85, 0xC1, 0x3D, 0x2E, 0xCF, 0x79, 0x36, 0x7B, 0x38, 0x4C, 0xEE },
 | |
| +	{ 0x2E, 0x7D, 0xEC, 0x24, 0x28, 0x85, 0x3B, 0x2C, 0x71, 0x76, 0x07, 0x45, 0x54, 0x1F, 0x7A, 0xFE, 0x98, 0x25, 0xB5, 0xDD, 0x77, 0xDF, 0x06, 0x51, 0x1D, 0x84, 0x41, 0xA9, 0x4B, 0xAC, 0xC9, 0x27 },
 | |
| +	{ 0xCA, 0x9F, 0xFA, 0xC4, 0xC4, 0x3F, 0x0B, 0x48, 0x46, 0x1D, 0xC5, 0xC2, 0x63, 0xBE, 0xA3, 0xF6, 0xF0, 0x06, 0x11, 0xCE, 0xAC, 0xAB, 0xF6, 0xF8, 0x95, 0xBA, 0x2B, 0x01, 0x01, 0xDB, 0xB6, 0x8D },
 | |
| +	{ 0x74, 0x10, 0xD4, 0x2D, 0x8F, 0xD1, 0xD5, 0xE9, 0xD2, 0xF5, 0x81, 0x5C, 0xB9, 0x34, 0x17, 0x99, 0x88, 0x28, 0xEF, 0x3C, 0x42, 0x30, 0xBF, 0xBD, 0x41, 0x2D, 0xF0, 0xA4, 0xA7, 0xA2, 0x50, 0x7A },
 | |
| +	{ 0x50, 0x10, 0xF6, 0x84, 0x51, 0x6D, 0xCC, 0xD0, 0xB6, 0xEE, 0x08, 0x52, 0xC2, 0x51, 0x2B, 0x4D, 0xC0, 0x06, 0x6C, 0xF0, 0xD5, 0x6F, 0x35, 0x30, 0x29, 0x78, 0xDB, 0x8A, 0xE3, 0x2C, 0x6A, 0x81 },
 | |
| +	{ 0xAC, 0xAA, 0xB5, 0x85, 0xF7, 0xB7, 0x9B, 0x71, 0x99, 0x35, 0xCE, 0xB8, 0x95, 0x23, 0xDD, 0xC5, 0x48, 0x27, 0xF7, 0x5C, 0x56, 0x88, 0x38, 0x56, 0x15, 0x4A, 0x56, 0xCD, 0xCD, 0x5E, 0xE9, 0x88 },
 | |
| +	{ 0x66, 0x6D, 0xE5, 0xD1, 0x44, 0x0F, 0xEE, 0x73, 0x31, 0xAA, 0xF0, 0x12, 0x3A, 0x62, 0xEF, 0x2D, 0x8B, 0xA5, 0x74, 0x53, 0xA0, 0x76, 0x96, 0x35, 0xAC, 0x6C, 0xD0, 0x1E, 0x63, 0x3F, 0x77, 0x12 },
 | |
| +	{ 0xA6, 0xF9, 0x86, 0x58, 0xF6, 0xEA, 0xBA, 0xF9, 0x02, 0xD8, 0xB3, 0x87, 0x1A, 0x4B, 0x10, 0x1D, 0x16, 0x19, 0x6E, 0x8A, 0x4B, 0x24, 0x1E, 0x15, 0x58, 0xFE, 0x29, 0x96, 0x6E, 0x10, 0x3E, 0x8D },
 | |
| +	{ 0x89, 0x15, 0x46, 0xA8, 0xB2, 0x9F, 0x30, 0x47, 0xDD, 0xCF, 0xE5, 0xB0, 0x0E, 0x45, 0xFD, 0x55, 0x75, 0x63, 0x73, 0x10, 0x5E, 0xA8, 0x63, 0x7D, 0xFC, 0xFF, 0x54, 0x7B, 0x6E, 0xA9, 0x53, 0x5F },
 | |
| +	{ 0x18, 0xDF, 0xBC, 0x1A, 0xC5, 0xD2, 0x5B, 0x07, 0x61, 0x13, 0x7D, 0xBD, 0x22, 0xC1, 0x7C, 0x82, 0x9D, 0x0F, 0x0E, 0xF1, 0xD8, 0x23, 0x44, 0xE9, 0xC8, 0x9C, 0x28, 0x66, 0x94, 0xDA, 0x24, 0xE8 },
 | |
| +	{ 0xB5, 0x4B, 0x9B, 0x67, 0xF8, 0xFE, 0xD5, 0x4B, 0xBF, 0x5A, 0x26, 0x66, 0xDB, 0xDF, 0x4B, 0x23, 0xCF, 0xF1, 0xD1, 0xB6, 0xF4, 0xAF, 0xC9, 0x85, 0xB2, 0xE6, 0xD3, 0x30, 0x5A, 0x9F, 0xF8, 0x0F },
 | |
| +	{ 0x7D, 0xB4, 0x42, 0xE1, 0x32, 0xBA, 0x59, 0xBC, 0x12, 0x89, 0xAA, 0x98, 0xB0, 0xD3, 0xE8, 0x06, 0x00, 0x4F, 0x8E, 0xC1, 0x28, 0x11, 0xAF, 0x1E, 0x2E, 0x33, 0xC6, 0x9B, 0xFD, 0xE7, 0x29, 0xE1 },
 | |
| +	{ 0x25, 0x0F, 0x37, 0xCD, 0xC1, 0x5E, 0x81, 0x7D, 0x2F, 0x16, 0x0D, 0x99, 0x56, 0xC7, 0x1F, 0xE3, 0xEB, 0x5D, 0xB7, 0x45, 0x56, 0xE4, 0xAD, 0xF9, 0xA4, 0xFF, 0xAF, 0xBA, 0x74, 0x01, 0x03, 0x96 },
 | |
| +	{ 0x4A, 0xB8, 0xA3, 0xDD, 0x1D, 0xDF, 0x8A, 0xD4, 0x3D, 0xAB, 0x13, 0xA2, 0x7F, 0x66, 0xA6, 0x54, 0x4F, 0x29, 0x05, 0x97, 0xFA, 0x96, 0x04, 0x0E, 0x0E, 0x1D, 0xB9, 0x26, 0x3A, 0xA4, 0x79, 0xF8 },
 | |
| +	{ 0xEE, 0x61, 0x72, 0x7A, 0x07, 0x66, 0xDF, 0x93, 0x9C, 0xCD, 0xC8, 0x60, 0x33, 0x40, 0x44, 0xC7, 0x9A, 0x3C, 0x9B, 0x15, 0x62, 0x00, 0xBC, 0x3A, 0xA3, 0x29, 0x73, 0x48, 0x3D, 0x83, 0x41, 0xAE },
 | |
| +	{ 0x3F, 0x68, 0xC7, 0xEC, 0x63, 0xAC, 0x11, 0xEB, 0xB9, 0x8F, 0x94, 0xB3, 0x39, 0xB0, 0x5C, 0x10, 0x49, 0x84, 0xFD, 0xA5, 0x01, 0x03, 0x06, 0x01, 0x44, 0xE5, 0xA2, 0xBF, 0xCC, 0xC9, 0xDA, 0x95 },
 | |
| +	{ 0x05, 0x6F, 0x29, 0x81, 0x6B, 0x8A, 0xF8, 0xF5, 0x66, 0x82, 0xBC, 0x4D, 0x7C, 0xF0, 0x94, 0x11, 0x1D, 0xA7, 0x73, 0x3E, 0x72, 0x6C, 0xD1, 0x3D, 0x6B, 0x3E, 0x8E, 0xA0, 0x3E, 0x92, 0xA0, 0xD5 },
 | |
| +	{ 0xF5, 0xEC, 0x43, 0xA2, 0x8A, 0xCB, 0xEF, 0xF1, 0xF3, 0x31, 0x8A, 0x5B, 0xCA, 0xC7, 0xC6, 0x6D, 0xDB, 0x52, 0x30, 0xB7, 0x9D, 0xB2, 0xD1, 0x05, 0xBC, 0xBE, 0x15, 0xF3, 0xC1, 0x14, 0x8D, 0x69 },
 | |
| +	{ 0x2A, 0x69, 0x60, 0xAD, 0x1D, 0x8D, 0xD5, 0x47, 0x55, 0x5C, 0xFB, 0xD5, 0xE4, 0x60, 0x0F, 0x1E, 0xAA, 0x1C, 0x8E, 0xDA, 0x34, 0xDE, 0x03, 0x74, 0xEC, 0x4A, 0x26, 0xEA, 0xAA, 0xA3, 0x3B, 0x4E },
 | |
| +	{ 0xDC, 0xC1, 0xEA, 0x7B, 0xAA, 0xB9, 0x33, 0x84, 0xF7, 0x6B, 0x79, 0x68, 0x66, 0x19, 0x97, 0x54, 0x74, 0x2F, 0x7B, 0x96, 0xD6, 0xB4, 0xC1, 0x20, 0x16, 0x5C, 0x04, 0xA6, 0xC4, 0xF5, 0xCE, 0x10 },
 | |
| +	{ 0x13, 0xD5, 0xDF, 0x17, 0x92, 0x21, 0x37, 0x9C, 0x6A, 0x78, 0xC0, 0x7C, 0x79, 0x3F, 0xF5, 0x34, 0x87, 0xCA, 0xE6, 0xBF, 0x9F, 0xE8, 0x82, 0x54, 0x1A, 0xB0, 0xE7, 0x35, 0xE3, 0xEA, 0xDA, 0x3B },
 | |
| +	{ 0x8C, 0x59, 0xE4, 0x40, 0x76, 0x41, 0xA0, 0x1E, 0x8F, 0xF9, 0x1F, 0x99, 0x80, 0xDC, 0x23, 0x6F, 0x4E, 0xCD, 0x6F, 0xCF, 0x52, 0x58, 0x9A, 0x09, 0x9A, 0x96, 0x16, 0x33, 0x96, 0x77, 0x14, 0xE1 },
 | |
| +	{ 0x83, 0x3B, 0x1A, 0xC6, 0xA2, 0x51, 0xFD, 0x08, 0xFD, 0x6D, 0x90, 0x8F, 0xEA, 0x2A, 0x4E, 0xE1, 0xE0, 0x40, 0xBC, 0xA9, 0x3F, 0xC1, 0xA3, 0x8E, 0xC3, 0x82, 0x0E, 0x0C, 0x10, 0xBD, 0x82, 0xEA },
 | |
| +	{ 0xA2, 0x44, 0xF9, 0x27, 0xF3, 0xB4, 0x0B, 0x8F, 0x6C, 0x39, 0x15, 0x70, 0xC7, 0x65, 0x41, 0x8F, 0x2F, 0x6E, 0x70, 0x8E, 0xAC, 0x90, 0x06, 0xC5, 0x1A, 0x7F, 0xEF, 0xF4, 0xAF, 0x3B, 0x2B, 0x9E },
 | |
| +	{ 0x3D, 0x99, 0xED, 0x95, 0x50, 0xCF, 0x11, 0x96, 0xE6, 0xC4, 0xD2, 0x0C, 0x25, 0x96, 0x20, 0xF8, 0x58, 0xC3, 0xD7, 0x03, 0x37, 0x4C, 0x12, 0x8C, 0xE7, 0xB5, 0x90, 0x31, 0x0C, 0x83, 0x04, 0x6D },
 | |
| +	{ 0x2B, 0x35, 0xC4, 0x7D, 0x7B, 0x87, 0x76, 0x1F, 0x0A, 0xE4, 0x3A, 0xC5, 0x6A, 0xC2, 0x7B, 0x9F, 0x25, 0x83, 0x03, 0x67, 0xB5, 0x95, 0xBE, 0x8C, 0x24, 0x0E, 0x94, 0x60, 0x0C, 0x6E, 0x33, 0x12 },
 | |
| +	{ 0x5D, 0x11, 0xED, 0x37, 0xD2, 0x4D, 0xC7, 0x67, 0x30, 0x5C, 0xB7, 0xE1, 0x46, 0x7D, 0x87, 0xC0, 0x65, 0xAC, 0x4B, 0xC8, 0xA4, 0x26, 0xDE, 0x38, 0x99, 0x1F, 0xF5, 0x9A, 0xA8, 0x73, 0x5D, 0x02 },
 | |
| +	{ 0xB8, 0x36, 0x47, 0x8E, 0x1C, 0xA0, 0x64, 0x0D, 0xCE, 0x6F, 0xD9, 0x10, 0xA5, 0x09, 0x62, 0x72, 0xC8, 0x33, 0x09, 0x90, 0xCD, 0x97, 0x86, 0x4A, 0xC2, 0xBF, 0x14, 0xEF, 0x6B, 0x23, 0x91, 0x4A },
 | |
| +	{ 0x91, 0x00, 0xF9, 0x46, 0xD6, 0xCC, 0xDE, 0x3A, 0x59, 0x7F, 0x90, 0xD3, 0x9F, 0xC1, 0x21, 0x5B, 0xAD, 0xDC, 0x74, 0x13, 0x64, 0x3D, 0x85, 0xC2, 0x1C, 0x3E, 0xEE, 0x5D, 0x2D, 0xD3, 0x28, 0x94 },
 | |
| +	{ 0xDA, 0x70, 0xEE, 0xDD, 0x23, 0xE6, 0x63, 0xAA, 0x1A, 0x74, 0xB9, 0x76, 0x69, 0x35, 0xB4, 0x79, 0x22, 0x2A, 0x72, 0xAF, 0xBA, 0x5C, 0x79, 0x51, 0x58, 0xDA, 0xD4, 0x1A, 0x3B, 0xD7, 0x7E, 0x40 },
 | |
| +	{ 0xF0, 0x67, 0xED, 0x6A, 0x0D, 0xBD, 0x43, 0xAA, 0x0A, 0x92, 0x54, 0xE6, 0x9F, 0xD6, 0x6B, 0xDD, 0x8A, 0xCB, 0x87, 0xDE, 0x93, 0x6C, 0x25, 0x8C, 0xFB, 0x02, 0x28, 0x5F, 0x2C, 0x11, 0xFA, 0x79 },
 | |
| +	{ 0x71, 0x5C, 0x99, 0xC7, 0xD5, 0x75, 0x80, 0xCF, 0x97, 0x53, 0xB4, 0xC1, 0xD7, 0x95, 0xE4, 0x5A, 0x83, 0xFB, 0xB2, 0x28, 0xC0, 0xD3, 0x6F, 0xBE, 0x20, 0xFA, 0xF3, 0x9B, 0xDD, 0x6D, 0x4E, 0x85 },
 | |
| +	{ 0xE4, 0x57, 0xD6, 0xAD, 0x1E, 0x67, 0xCB, 0x9B, 0xBD, 0x17, 0xCB, 0xD6, 0x98, 0xFA, 0x6D, 0x7D, 0xAE, 0x0C, 0x9B, 0x7A, 0xD6, 0xCB, 0xD6, 0x53, 0x96, 0x34, 0xE3, 0x2A, 0x71, 0x9C, 0x84, 0x92 },
 | |
| +	{ 0xEC, 0xE3, 0xEA, 0x81, 0x03, 0xE0, 0x24, 0x83, 0xC6, 0x4A, 0x70, 0xA4, 0xBD, 0xCE, 0xE8, 0xCE, 0xB6, 0x27, 0x8F, 0x25, 0x33, 0xF3, 0xF4, 0x8D, 0xBE, 0xED, 0xFB, 0xA9, 0x45, 0x31, 0xD4, 0xAE },
 | |
| +	{ 0x38, 0x8A, 0xA5, 0xD3, 0x66, 0x7A, 0x97, 0xC6, 0x8D, 0x3D, 0x56, 0xF8, 0xF3, 0xEE, 0x8D, 0x3D, 0x36, 0x09, 0x1F, 0x17, 0xFE, 0x5D, 0x1B, 0x0D, 0x5D, 0x84, 0xC9, 0x3B, 0x2F, 0xFE, 0x40, 0xBD },
 | |
| +	{ 0x8B, 0x6B, 0x31, 0xB9, 0xAD, 0x7C, 0x3D, 0x5C, 0xD8, 0x4B, 0xF9, 0x89, 0x47, 0xB9, 0xCD, 0xB5, 0x9D, 0xF8, 0xA2, 0x5F, 0xF7, 0x38, 0x10, 0x10, 0x13, 0xBE, 0x4F, 0xD6, 0x5E, 0x1D, 0xD1, 0xA3 },
 | |
| +	{ 0x06, 0x62, 0x91, 0xF6, 0xBB, 0xD2, 0x5F, 0x3C, 0x85, 0x3D, 0xB7, 0xD8, 0xB9, 0x5C, 0x9A, 0x1C, 0xFB, 0x9B, 0xF1, 0xC1, 0xC9, 0x9F, 0xB9, 0x5A, 0x9B, 0x78, 0x69, 0xD9, 0x0F, 0x1C, 0x29, 0x03 },
 | |
| +	{ 0xA7, 0x07, 0xEF, 0xBC, 0xCD, 0xCE, 0xED, 0x42, 0x96, 0x7A, 0x66, 0xF5, 0x53, 0x9B, 0x93, 0xED, 0x75, 0x60, 0xD4, 0x67, 0x30, 0x40, 0x16, 0xC4, 0x78, 0x0D, 0x77, 0x55, 0xA5, 0x65, 0xD4, 0xC4 },
 | |
| +	{ 0x38, 0xC5, 0x3D, 0xFB, 0x70, 0xBE, 0x7E, 0x79, 0x2B, 0x07, 0xA6, 0xA3, 0x5B, 0x8A, 0x6A, 0x0A, 0xBA, 0x02, 0xC5, 0xC5, 0xF3, 0x8B, 0xAF, 0x5C, 0x82, 0x3F, 0xDF, 0xD9, 0xE4, 0x2D, 0x65, 0x7E },
 | |
| +	{ 0xF2, 0x91, 0x13, 0x86, 0x50, 0x1D, 0x9A, 0xB9, 0xD7, 0x20, 0xCF, 0x8A, 0xD1, 0x05, 0x03, 0xD5, 0x63, 0x4B, 0xF4, 0xB7, 0xD1, 0x2B, 0x56, 0xDF, 0xB7, 0x4F, 0xEC, 0xC6, 0xE4, 0x09, 0x3F, 0x68 },
 | |
| +	{ 0xC6, 0xF2, 0xBD, 0xD5, 0x2B, 0x81, 0xE6, 0xE4, 0xF6, 0x59, 0x5A, 0xBD, 0x4D, 0x7F, 0xB3, 0x1F, 0x65, 0x11, 0x69, 0xD0, 0x0F, 0xF3, 0x26, 0x92, 0x6B, 0x34, 0x94, 0x7B, 0x28, 0xA8, 0x39, 0x59 },
 | |
| +	{ 0x29, 0x3D, 0x94, 0xB1, 0x8C, 0x98, 0xBB, 0x32, 0x23, 0x36, 0x6B, 0x8C, 0xE7, 0x4C, 0x28, 0xFB, 0xDF, 0x28, 0xE1, 0xF8, 0x4A, 0x33, 0x50, 0xB0, 0xEB, 0x2D, 0x18, 0x04, 0xA5, 0x77, 0x57, 0x9B },
 | |
| +	{ 0x2C, 0x2F, 0xA5, 0xC0, 0xB5, 0x15, 0x33, 0x16, 0x5B, 0xC3, 0x75, 0xC2, 0x2E, 0x27, 0x81, 0x76, 0x82, 0x70, 0xA3, 0x83, 0x98, 0x5D, 0x13, 0xBD, 0x6B, 0x67, 0xB6, 0xFD, 0x67, 0xF8, 0x89, 0xEB },
 | |
| +	{ 0xCA, 0xA0, 0x9B, 0x82, 0xB7, 0x25, 0x62, 0xE4, 0x3F, 0x4B, 0x22, 0x75, 0xC0, 0x91, 0x91, 0x8E, 0x62, 0x4D, 0x91, 0x16, 0x61, 0xCC, 0x81, 0x1B, 0xB5, 0xFA, 0xEC, 0x51, 0xF6, 0x08, 0x8E, 0xF7 },
 | |
| +	{ 0x24, 0x76, 0x1E, 0x45, 0xE6, 0x74, 0x39, 0x53, 0x79, 0xFB, 0x17, 0x72, 0x9C, 0x78, 0xCB, 0x93, 0x9E, 0x6F, 0x74, 0xC5, 0xDF, 0xFB, 0x9C, 0x96, 0x1F, 0x49, 0x59, 0x82, 0xC3, 0xED, 0x1F, 0xE3 },
 | |
| +	{ 0x55, 0xB7, 0x0A, 0x82, 0x13, 0x1E, 0xC9, 0x48, 0x88, 0xD7, 0xAB, 0x54, 0xA7, 0xC5, 0x15, 0x25, 0x5C, 0x39, 0x38, 0xBB, 0x10, 0xBC, 0x78, 0x4D, 0xC9, 0xB6, 0x7F, 0x07, 0x6E, 0x34, 0x1A, 0x73 },
 | |
| +	{ 0x6A, 0xB9, 0x05, 0x7B, 0x97, 0x7E, 0xBC, 0x3C, 0xA4, 0xD4, 0xCE, 0x74, 0x50, 0x6C, 0x25, 0xCC, 0xCD, 0xC5, 0x66, 0x49, 0x7C, 0x45, 0x0B, 0x54, 0x15, 0xA3, 0x94, 0x86, 0xF8, 0x65, 0x7A, 0x03 },
 | |
| +	{ 0x24, 0x06, 0x6D, 0xEE, 0xE0, 0xEC, 0xEE, 0x15, 0xA4, 0x5F, 0x0A, 0x32, 0x6D, 0x0F, 0x8D, 0xBC, 0x79, 0x76, 0x1E, 0xBB, 0x93, 0xCF, 0x8C, 0x03, 0x77, 0xAF, 0x44, 0x09, 0x78, 0xFC, 0xF9, 0x94 },
 | |
| +	{ 0x20, 0x00, 0x0D, 0x3F, 0x66, 0xBA, 0x76, 0x86, 0x0D, 0x5A, 0x95, 0x06, 0x88, 0xB9, 0xAA, 0x0D, 0x76, 0xCF, 0xEA, 0x59, 0xB0, 0x05, 0xD8, 0x59, 0x91, 0x4B, 0x1A, 0x46, 0x65, 0x3A, 0x93, 0x9B },
 | |
| +	{ 0xB9, 0x2D, 0xAA, 0x79, 0x60, 0x3E, 0x3B, 0xDB, 0xC3, 0xBF, 0xE0, 0xF4, 0x19, 0xE4, 0x09, 0xB2, 0xEA, 0x10, 0xDC, 0x43, 0x5B, 0xEE, 0xFE, 0x29, 0x59, 0xDA, 0x16, 0x89, 0x5D, 0x5D, 0xCA, 0x1C },
 | |
| +	{ 0xE9, 0x47, 0x94, 0x87, 0x05, 0xB2, 0x06, 0xD5, 0x72, 0xB0, 0xE8, 0xF6, 0x2F, 0x66, 0xA6, 0x55, 0x1C, 0xBD, 0x6B, 0xC3, 0x05, 0xD2, 0x6C, 0xE7, 0x53, 0x9A, 0x12, 0xF9, 0xAA, 0xDF, 0x75, 0x71 },
 | |
| +	{ 0x3D, 0x67, 0xC1, 0xB3, 0xF9, 0xB2, 0x39, 0x10, 0xE3, 0xD3, 0x5E, 0x6B, 0x0F, 0x2C, 0xCF, 0x44, 0xA0, 0xB5, 0x40, 0xA4, 0x5C, 0x18, 0xBA, 0x3C, 0x36, 0x26, 0x4D, 0xD4, 0x8E, 0x96, 0xAF, 0x6A },
 | |
| +	{ 0xC7, 0x55, 0x8B, 0xAB, 0xDA, 0x04, 0xBC, 0xCB, 0x76, 0x4D, 0x0B, 0xBF, 0x33, 0x58, 0x42, 0x51, 0x41, 0x90, 0x2D, 0x22, 0x39, 0x1D, 0x9F, 0x8C, 0x59, 0x15, 0x9F, 0xEC, 0x9E, 0x49, 0xB1, 0x51 },
 | |
| +	{ 0x0B, 0x73, 0x2B, 0xB0, 0x35, 0x67, 0x5A, 0x50, 0xFF, 0x58, 0xF2, 0xC2, 0x42, 0xE4, 0x71, 0x0A, 0xEC, 0xE6, 0x46, 0x70, 0x07, 0x9C, 0x13, 0x04, 0x4C, 0x79, 0xC9, 0xB7, 0x49, 0x1F, 0x70, 0x00 },
 | |
| +	{ 0xD1, 0x20, 0xB5, 0xEF, 0x6D, 0x57, 0xEB, 0xF0, 0x6E, 0xAF, 0x96, 0xBC, 0x93, 0x3C, 0x96, 0x7B, 0x16, 0xCB, 0xE6, 0xE2, 0xBF, 0x00, 0x74, 0x1C, 0x30, 0xAA, 0x1C, 0x54, 0xBA, 0x64, 0x80, 0x1F },
 | |
| +	{ 0x58, 0xD2, 0x12, 0xAD, 0x6F, 0x58, 0xAE, 0xF0, 0xF8, 0x01, 0x16, 0xB4, 0x41, 0xE5, 0x7F, 0x61, 0x95, 0xBF, 0xEF, 0x26, 0xB6, 0x14, 0x63, 0xED, 0xEC, 0x11, 0x83, 0xCD, 0xB0, 0x4F, 0xE7, 0x6D },
 | |
| +	{ 0xB8, 0x83, 0x6F, 0x51, 0xD1, 0xE2, 0x9B, 0xDF, 0xDB, 0xA3, 0x25, 0x56, 0x53, 0x60, 0x26, 0x8B, 0x8F, 0xAD, 0x62, 0x74, 0x73, 0xED, 0xEC, 0xEF, 0x7E, 0xAE, 0xFE, 0xE8, 0x37, 0xC7, 0x40, 0x03 },
 | |
| +	{ 0xC5, 0x47, 0xA3, 0xC1, 0x24, 0xAE, 0x56, 0x85, 0xFF, 0xA7, 0xB8, 0xED, 0xAF, 0x96, 0xEC, 0x86, 0xF8, 0xB2, 0xD0, 0xD5, 0x0C, 0xEE, 0x8B, 0xE3, 0xB1, 0xF0, 0xC7, 0x67, 0x63, 0x06, 0x9D, 0x9C },
 | |
| +	{ 0x5D, 0x16, 0x8B, 0x76, 0x9A, 0x2F, 0x67, 0x85, 0x3D, 0x62, 0x95, 0xF7, 0x56, 0x8B, 0xE4, 0x0B, 0xB7, 0xA1, 0x6B, 0x8D, 0x65, 0xBA, 0x87, 0x63, 0x5D, 0x19, 0x78, 0xD2, 0xAB, 0x11, 0xBA, 0x2A },
 | |
| +	{ 0xA2, 0xF6, 0x75, 0xDC, 0x73, 0x02, 0x63, 0x8C, 0xB6, 0x02, 0x01, 0x06, 0x4C, 0xA5, 0x50, 0x77, 0x71, 0x4D, 0x71, 0xFE, 0x09, 0x6A, 0x31, 0x5F, 0x2F, 0xE7, 0x40, 0x12, 0x77, 0xCA, 0xA5, 0xAF },
 | |
| +	{ 0xC8, 0xAA, 0xB5, 0xCD, 0x01, 0x60, 0xAE, 0x78, 0xCD, 0x2E, 0x8A, 0xC5, 0xFB, 0x0E, 0x09, 0x3C, 0xDB, 0x5C, 0x4B, 0x60, 0x52, 0xA0, 0xA9, 0x7B, 0xB0, 0x42, 0x16, 0x82, 0x6F, 0xA7, 0xA4, 0x37 },
 | |
| +	{ 0xFF, 0x68, 0xCA, 0x40, 0x35, 0xBF, 0xEB, 0x43, 0xFB, 0xF1, 0x45, 0xFD, 0xDD, 0x5E, 0x43, 0xF1, 0xCE, 0xA5, 0x4F, 0x11, 0xF7, 0xBE, 0xE1, 0x30, 0x58, 0xF0, 0x27, 0x32, 0x9A, 0x4A, 0x5F, 0xA4 },
 | |
| +	{ 0x1D, 0x4E, 0x54, 0x87, 0xAE, 0x3C, 0x74, 0x0F, 0x2B, 0xA6, 0xE5, 0x41, 0xAC, 0x91, 0xBC, 0x2B, 0xFC, 0xD2, 0x99, 0x9C, 0x51, 0x8D, 0x80, 0x7B, 0x42, 0x67, 0x48, 0x80, 0x3A, 0x35, 0x0F, 0xD4 },
 | |
| +	{ 0x6D, 0x24, 0x4E, 0x1A, 0x06, 0xCE, 0x4E, 0xF5, 0x78, 0xDD, 0x0F, 0x63, 0xAF, 0xF0, 0x93, 0x67, 0x06, 0x73, 0x51, 0x19, 0xCA, 0x9C, 0x8D, 0x22, 0xD8, 0x6C, 0x80, 0x14, 0x14, 0xAB, 0x97, 0x41 },
 | |
| +	{ 0xDE, 0xCF, 0x73, 0x29, 0xDB, 0xCC, 0x82, 0x7B, 0x8F, 0xC5, 0x24, 0xC9, 0x43, 0x1E, 0x89, 0x98, 0x02, 0x9E, 0xCE, 0x12, 0xCE, 0x93, 0xB7, 0xB2, 0xF3, 0xE7, 0x69, 0xA9, 0x41, 0xFB, 0x8C, 0xEA },
 | |
| +	{ 0x2F, 0xAF, 0xCC, 0x0F, 0x2E, 0x63, 0xCB, 0xD0, 0x77, 0x55, 0xBE, 0x7B, 0x75, 0xEC, 0xEA, 0x0A, 0xDF, 0xF9, 0xAA, 0x5E, 0xDE, 0x2A, 0x52, 0xFD, 0xAB, 0x4D, 0xFD, 0x03, 0x74, 0xCD, 0x48, 0x3F },
 | |
| +	{ 0xAA, 0x85, 0x01, 0x0D, 0xD4, 0x6A, 0x54, 0x6B, 0x53, 0x5E, 0xF4, 0xCF, 0x5F, 0x07, 0xD6, 0x51, 0x61, 0xE8, 0x98, 0x28, 0xF3, 0xA7, 0x7D, 0xB7, 0xB9, 0xB5, 0x6F, 0x0D, 0xF5, 0x9A, 0xAE, 0x45 },
 | |
| +	{ 0x07, 0xE8, 0xE1, 0xEE, 0x73, 0x2C, 0xB0, 0xD3, 0x56, 0xC9, 0xC0, 0xD1, 0x06, 0x9C, 0x89, 0xD1, 0x7A, 0xDF, 0x6A, 0x9A, 0x33, 0x4F, 0x74, 0x5E, 0xC7, 0x86, 0x73, 0x32, 0x54, 0x8C, 0xA8, 0xE9 },
 | |
| +	{ 0x0E, 0x01, 0xE8, 0x1C, 0xAD, 0xA8, 0x16, 0x2B, 0xFD, 0x5F, 0x8A, 0x8C, 0x81, 0x8A, 0x6C, 0x69, 0xFE, 0xDF, 0x02, 0xCE, 0xB5, 0x20, 0x85, 0x23, 0xCB, 0xE5, 0x31, 0x3B, 0x89, 0xCA, 0x10, 0x53 },
 | |
| +	{ 0x6B, 0xB6, 0xC6, 0x47, 0x26, 0x55, 0x08, 0x43, 0x99, 0x85, 0x2E, 0x00, 0x24, 0x9F, 0x8C, 0xB2, 0x47, 0x89, 0x6D, 0x39, 0x2B, 0x02, 0xD7, 0x3B, 0x7F, 0x0D, 0xD8, 0x18, 0xE1, 0xE2, 0x9B, 0x07 },
 | |
| +	{ 0x42, 0xD4, 0x63, 0x6E, 0x20, 0x60, 0xF0, 0x8F, 0x41, 0xC8, 0x82, 0xE7, 0x6B, 0x39, 0x6B, 0x11, 0x2E, 0xF6, 0x27, 0xCC, 0x24, 0xC4, 0x3D, 0xD5, 0xF8, 0x3A, 0x1D, 0x1A, 0x7E, 0xAD, 0x71, 0x1A },
 | |
| +	{ 0x48, 0x58, 0xC9, 0xA1, 0x88, 0xB0, 0x23, 0x4F, 0xB9, 0xA8, 0xD4, 0x7D, 0x0B, 0x41, 0x33, 0x65, 0x0A, 0x03, 0x0B, 0xD0, 0x61, 0x1B, 0x87, 0xC3, 0x89, 0x2E, 0x94, 0x95, 0x1F, 0x8D, 0xF8, 0x52 },
 | |
| +	{ 0x3F, 0xAB, 0x3E, 0x36, 0x98, 0x8D, 0x44, 0x5A, 0x51, 0xC8, 0x78, 0x3E, 0x53, 0x1B, 0xE3, 0xA0, 0x2B, 0xE4, 0x0C, 0xD0, 0x47, 0x96, 0xCF, 0xB6, 0x1D, 0x40, 0x34, 0x74, 0x42, 0xD3, 0xF7, 0x94 },
 | |
| +	{ 0xEB, 0xAB, 0xC4, 0x96, 0x36, 0xBD, 0x43, 0x3D, 0x2E, 0xC8, 0xF0, 0xE5, 0x18, 0x73, 0x2E, 0xF8, 0xFA, 0x21, 0xD4, 0xD0, 0x71, 0xCC, 0x3B, 0xC4, 0x6C, 0xD7, 0x9F, 0xA3, 0x8A, 0x28, 0xB8, 0x10 },
 | |
| +	{ 0xA1, 0xD0, 0x34, 0x35, 0x23, 0xB8, 0x93, 0xFC, 0xA8, 0x4F, 0x47, 0xFE, 0xB4, 0xA6, 0x4D, 0x35, 0x0A, 0x17, 0xD8, 0xEE, 0xF5, 0x49, 0x7E, 0xCE, 0x69, 0x7D, 0x02, 0xD7, 0x91, 0x78, 0xB5, 0x91 },
 | |
| +	{ 0x26, 0x2E, 0xBF, 0xD9, 0x13, 0x0B, 0x7D, 0x28, 0x76, 0x0D, 0x08, 0xEF, 0x8B, 0xFD, 0x3B, 0x86, 0xCD, 0xD3, 0xB2, 0x11, 0x3D, 0x2C, 0xAE, 0xF7, 0xEA, 0x95, 0x1A, 0x30, 0x3D, 0xFA, 0x38, 0x46 },
 | |
| +	{ 0xF7, 0x61, 0x58, 0xED, 0xD5, 0x0A, 0x15, 0x4F, 0xA7, 0x82, 0x03, 0xED, 0x23, 0x62, 0x93, 0x2F, 0xCB, 0x82, 0x53, 0xAA, 0xE3, 0x78, 0x90, 0x3E, 0xDE, 0xD1, 0xE0, 0x3F, 0x70, 0x21, 0xA2, 0x57 },
 | |
| +	{ 0x26, 0x17, 0x8E, 0x95, 0x0A, 0xC7, 0x22, 0xF6, 0x7A, 0xE5, 0x6E, 0x57, 0x1B, 0x28, 0x4C, 0x02, 0x07, 0x68, 0x4A, 0x63, 0x34, 0xA1, 0x77, 0x48, 0xA9, 0x4D, 0x26, 0x0B, 0xC5, 0xF5, 0x52, 0x74 },
 | |
| +	{ 0xC3, 0x78, 0xD1, 0xE4, 0x93, 0xB4, 0x0E, 0xF1, 0x1F, 0xE6, 0xA1, 0x5D, 0x9C, 0x27, 0x37, 0xA3, 0x78, 0x09, 0x63, 0x4C, 0x5A, 0xBA, 0xD5, 0xB3, 0x3D, 0x7E, 0x39, 0x3B, 0x4A, 0xE0, 0x5D, 0x03 },
 | |
| +	{ 0x98, 0x4B, 0xD8, 0x37, 0x91, 0x01, 0xBE, 0x8F, 0xD8, 0x06, 0x12, 0xD8, 0xEA, 0x29, 0x59, 0xA7, 0x86, 0x5E, 0xC9, 0x71, 0x85, 0x23, 0x55, 0x01, 0x07, 0xAE, 0x39, 0x38, 0xDF, 0x32, 0x01, 0x1B },
 | |
| +	{ 0xC6, 0xF2, 0x5A, 0x81, 0x2A, 0x14, 0x48, 0x58, 0xAC, 0x5C, 0xED, 0x37, 0xA9, 0x3A, 0x9F, 0x47, 0x59, 0xBA, 0x0B, 0x1C, 0x0F, 0xDC, 0x43, 0x1D, 0xCE, 0x35, 0xF9, 0xEC, 0x1F, 0x1F, 0x4A, 0x99 },
 | |
| +	{ 0x92, 0x4C, 0x75, 0xC9, 0x44, 0x24, 0xFF, 0x75, 0xE7, 0x4B, 0x8B, 0x4E, 0x94, 0x35, 0x89, 0x58, 0xB0, 0x27, 0xB1, 0x71, 0xDF, 0x5E, 0x57, 0x89, 0x9A, 0xD0, 0xD4, 0xDA, 0xC3, 0x73, 0x53, 0xB6 },
 | |
| +	{ 0x0A, 0xF3, 0x58, 0x92, 0xA6, 0x3F, 0x45, 0x93, 0x1F, 0x68, 0x46, 0xED, 0x19, 0x03, 0x61, 0xCD, 0x07, 0x30, 0x89, 0xE0, 0x77, 0x16, 0x57, 0x14, 0xB5, 0x0B, 0x81, 0xA2, 0xE3, 0xDD, 0x9B, 0xA1 },
 | |
| +	{ 0xCC, 0x80, 0xCE, 0xFB, 0x26, 0xC3, 0xB2, 0xB0, 0xDA, 0xEF, 0x23, 0x3E, 0x60, 0x6D, 0x5F, 0xFC, 0x80, 0xFA, 0x17, 0x42, 0x7D, 0x18, 0xE3, 0x04, 0x89, 0x67, 0x3E, 0x06, 0xEF, 0x4B, 0x87, 0xF7 },
 | |
| +	{ 0xC2, 0xF8, 0xC8, 0x11, 0x74, 0x47, 0xF3, 0x97, 0x8B, 0x08, 0x18, 0xDC, 0xF6, 0xF7, 0x01, 0x16, 0xAC, 0x56, 0xFD, 0x18, 0x4D, 0xD1, 0x27, 0x84, 0x94, 0xE1, 0x03, 0xFC, 0x6D, 0x74, 0xA8, 0x87 },
 | |
| +	{ 0xBD, 0xEC, 0xF6, 0xBF, 0xC1, 0xBA, 0x0D, 0xF6, 0xE8, 0x62, 0xC8, 0x31, 0x99, 0x22, 0x07, 0x79, 0x6A, 0xCC, 0x79, 0x79, 0x68, 0x35, 0x88, 0x28, 0xC0, 0x6E, 0x7A, 0x51, 0xE0, 0x90, 0x09, 0x8F },
 | |
| +	{ 0x24, 0xD1, 0xA2, 0x6E, 0x3D, 0xAB, 0x02, 0xFE, 0x45, 0x72, 0xD2, 0xAA, 0x7D, 0xBD, 0x3E, 0xC3, 0x0F, 0x06, 0x93, 0xDB, 0x26, 0xF2, 0x73, 0xD0, 0xAB, 0x2C, 0xB0, 0xC1, 0x3B, 0x5E, 0x64, 0x51 },
 | |
| +	{ 0xEC, 0x56, 0xF5, 0x8B, 0x09, 0x29, 0x9A, 0x30, 0x0B, 0x14, 0x05, 0x65, 0xD7, 0xD3, 0xE6, 0x87, 0x82, 0xB6, 0xE2, 0xFB, 0xEB, 0x4B, 0x7E, 0xA9, 0x7A, 0xC0, 0x57, 0x98, 0x90, 0x61, 0xDD, 0x3F },
 | |
| +	{ 0x11, 0xA4, 0x37, 0xC1, 0xAB, 0xA3, 0xC1, 0x19, 0xDD, 0xFA, 0xB3, 0x1B, 0x3E, 0x8C, 0x84, 0x1D, 0xEE, 0xEB, 0x91, 0x3E, 0xF5, 0x7F, 0x7E, 0x48, 0xF2, 0xC9, 0xCF, 0x5A, 0x28, 0xFA, 0x42, 0xBC },
 | |
| +	{ 0x53, 0xC7, 0xE6, 0x11, 0x4B, 0x85, 0x0A, 0x2C, 0xB4, 0x96, 0xC9, 0xB3, 0xC6, 0x9A, 0x62, 0x3E, 0xAE, 0xA2, 0xCB, 0x1D, 0x33, 0xDD, 0x81, 0x7E, 0x47, 0x65, 0xED, 0xAA, 0x68, 0x23, 0xC2, 0x28 },
 | |
| +	{ 0x15, 0x4C, 0x3E, 0x96, 0xFE, 0xE5, 0xDB, 0x14, 0xF8, 0x77, 0x3E, 0x18, 0xAF, 0x14, 0x85, 0x79, 0x13, 0x50, 0x9D, 0xA9, 0x99, 0xB4, 0x6C, 0xDD, 0x3D, 0x4C, 0x16, 0x97, 0x60, 0xC8, 0x3A, 0xD2 },
 | |
| +	{ 0x40, 0xB9, 0x91, 0x6F, 0x09, 0x3E, 0x02, 0x7A, 0x87, 0x86, 0x64, 0x18, 0x18, 0x92, 0x06, 0x20, 0x47, 0x2F, 0xBC, 0xF6, 0x8F, 0x70, 0x1D, 0x1B, 0x68, 0x06, 0x32, 0xE6, 0x99, 0x6B, 0xDE, 0xD3 },
 | |
| +	{ 0x24, 0xC4, 0xCB, 0xBA, 0x07, 0x11, 0x98, 0x31, 0xA7, 0x26, 0xB0, 0x53, 0x05, 0xD9, 0x6D, 0xA0, 0x2F, 0xF8, 0xB1, 0x48, 0xF0, 0xDA, 0x44, 0x0F, 0xE2, 0x33, 0xBC, 0xAA, 0x32, 0xC7, 0x2F, 0x6F },
 | |
| +	{ 0x5D, 0x20, 0x15, 0x10, 0x25, 0x00, 0x20, 0xB7, 0x83, 0x68, 0x96, 0x88, 0xAB, 0xBF, 0x8E, 0xCF, 0x25, 0x94, 0xA9, 0x6A, 0x08, 0xF2, 0xBF, 0xEC, 0x6C, 0xE0, 0x57, 0x44, 0x65, 0xDD, 0xED, 0x71 },
 | |
| +	{ 0x04, 0x3B, 0x97, 0xE3, 0x36, 0xEE, 0x6F, 0xDB, 0xBE, 0x2B, 0x50, 0xF2, 0x2A, 0xF8, 0x32, 0x75, 0xA4, 0x08, 0x48, 0x05, 0xD2, 0xD5, 0x64, 0x59, 0x62, 0x45, 0x4B, 0x6C, 0x9B, 0x80, 0x53, 0xA0 },
 | |
| +	{ 0x56, 0x48, 0x35, 0xCB, 0xAE, 0xA7, 0x74, 0x94, 0x85, 0x68, 0xBE, 0x36, 0xCF, 0x52, 0xFC, 0xDD, 0x83, 0x93, 0x4E, 0xB0, 0xA2, 0x75, 0x12, 0xDB, 0xE3, 0xE2, 0xDB, 0x47, 0xB9, 0xE6, 0x63, 0x5A },
 | |
| +	{ 0xF2, 0x1C, 0x33, 0xF4, 0x7B, 0xDE, 0x40, 0xA2, 0xA1, 0x01, 0xC9, 0xCD, 0xE8, 0x02, 0x7A, 0xAF, 0x61, 0xA3, 0x13, 0x7D, 0xE2, 0x42, 0x2B, 0x30, 0x03, 0x5A, 0x04, 0xC2, 0x70, 0x89, 0x41, 0x83 },
 | |
| +	{ 0x9D, 0xB0, 0xEF, 0x74, 0xE6, 0x6C, 0xBB, 0x84, 0x2E, 0xB0, 0xE0, 0x73, 0x43, 0xA0, 0x3C, 0x5C, 0x56, 0x7E, 0x37, 0x2B, 0x3F, 0x23, 0xB9, 0x43, 0xC7, 0x88, 0xA4, 0xF2, 0x50, 0xF6, 0x78, 0x91 },
 | |
| +	{ 0xAB, 0x8D, 0x08, 0x65, 0x5F, 0xF1, 0xD3, 0xFE, 0x87, 0x58, 0xD5, 0x62, 0x23, 0x5F, 0xD2, 0x3E, 0x7C, 0xF9, 0xDC, 0xAA, 0xD6, 0x58, 0x87, 0x2A, 0x49, 0xE5, 0xD3, 0x18, 0x3B, 0x6C, 0xCE, 0xBD },
 | |
| +	{ 0x6F, 0x27, 0xF7, 0x7E, 0x7B, 0xCF, 0x46, 0xA1, 0xE9, 0x63, 0xAD, 0xE0, 0x30, 0x97, 0x33, 0x54, 0x30, 0x31, 0xDC, 0xCD, 0xD4, 0x7C, 0xAA, 0xC1, 0x74, 0xD7, 0xD2, 0x7C, 0xE8, 0x07, 0x7E, 0x8B },
 | |
| +	{ 0xE3, 0xCD, 0x54, 0xDA, 0x7E, 0x44, 0x4C, 0xAA, 0x62, 0x07, 0x56, 0x95, 0x25, 0xA6, 0x70, 0xEB, 0xAE, 0x12, 0x78, 0xDE, 0x4E, 0x3F, 0xE2, 0x68, 0x4B, 0x3E, 0x33, 0xF5, 0xEF, 0x90, 0xCC, 0x1B },
 | |
| +	{ 0xB2, 0xC3, 0xE3, 0x3A, 0x51, 0xD2, 0x2C, 0x4C, 0x08, 0xFC, 0x09, 0x89, 0xC8, 0x73, 0xC9, 0xCC, 0x41, 0x50, 0x57, 0x9B, 0x1E, 0x61, 0x63, 0xFA, 0x69, 0x4A, 0xD5, 0x1D, 0x53, 0xD7, 0x12, 0xDC },
 | |
| +	{ 0xBE, 0x7F, 0xDA, 0x98, 0x3E, 0x13, 0x18, 0x9B, 0x4C, 0x77, 0xE0, 0xA8, 0x09, 0x20, 0xB6, 0xE0, 0xE0, 0xEA, 0x80, 0xC3, 0xB8, 0x4D, 0xBE, 0x7E, 0x71, 0x17, 0xD2, 0x53, 0xF4, 0x81, 0x12, 0xF4 },
 | |
| +	{ 0xB6, 0x00, 0x8C, 0x28, 0xFA, 0xE0, 0x8A, 0xA4, 0x27, 0xE5, 0xBD, 0x3A, 0xAD, 0x36, 0xF1, 0x00, 0x21, 0xF1, 0x6C, 0x77, 0xCF, 0xEA, 0xBE, 0xD0, 0x7F, 0x97, 0xCC, 0x7D, 0xC1, 0xF1, 0x28, 0x4A },
 | |
| +	{ 0x6E, 0x4E, 0x67, 0x60, 0xC5, 0x38, 0xF2, 0xE9, 0x7B, 0x3A, 0xDB, 0xFB, 0xBC, 0xDE, 0x57, 0xF8, 0x96, 0x6B, 0x7E, 0xA8, 0xFC, 0xB5, 0xBF, 0x7E, 0xFE, 0xC9, 0x13, 0xFD, 0x2A, 0x2B, 0x0C, 0x55 },
 | |
| +	{ 0x4A, 0xE5, 0x1F, 0xD1, 0x83, 0x4A, 0xA5, 0xBD, 0x9A, 0x6F, 0x7E, 0xC3, 0x9F, 0xC6, 0x63, 0x33, 0x8D, 0xC5, 0xD2, 0xE2, 0x07, 0x61, 0x56, 0x6D, 0x90, 0xCC, 0x68, 0xB1, 0xCB, 0x87, 0x5E, 0xD8 },
 | |
| +	{ 0xB6, 0x73, 0xAA, 0xD7, 0x5A, 0xB1, 0xFD, 0xB5, 0x40, 0x1A, 0xBF, 0xA1, 0xBF, 0x89, 0xF3, 0xAD, 0xD2, 0xEB, 0xC4, 0x68, 0xDF, 0x36, 0x24, 0xA4, 0x78, 0xF4, 0xFE, 0x85, 0x9D, 0x8D, 0x55, 0xE2 },
 | |
| +	{ 0x13, 0xC9, 0x47, 0x1A, 0x98, 0x55, 0x91, 0x35, 0x39, 0x83, 0x66, 0x60, 0x39, 0x8D, 0xA0, 0xF3, 0xF9, 0x9A, 0xDA, 0x08, 0x47, 0x9C, 0x69, 0xD1, 0xB7, 0xFC, 0xAA, 0x34, 0x61, 0xDD, 0x7E, 0x59 },
 | |
| +	{ 0x2C, 0x11, 0xF4, 0xA7, 0xF9, 0x9A, 0x1D, 0x23, 0xA5, 0x8B, 0xB6, 0x36, 0x35, 0x0F, 0xE8, 0x49, 0xF2, 0x9C, 0xBA, 0xC1, 0xB2, 0xA1, 0x11, 0x2D, 0x9F, 0x1E, 0xD5, 0xBC, 0x5B, 0x31, 0x3C, 0xCD },
 | |
| +	{ 0xC7, 0xD3, 0xC0, 0x70, 0x6B, 0x11, 0xAE, 0x74, 0x1C, 0x05, 0xA1, 0xEF, 0x15, 0x0D, 0xD6, 0x5B, 0x54, 0x94, 0xD6, 0xD5, 0x4C, 0x9A, 0x86, 0xE2, 0x61, 0x78, 0x54, 0xE6, 0xAE, 0xEE, 0xBB, 0xD9 },
 | |
| +	{ 0x19, 0x4E, 0x10, 0xC9, 0x38, 0x93, 0xAF, 0xA0, 0x64, 0xC3, 0xAC, 0x04, 0xC0, 0xDD, 0x80, 0x8D, 0x79, 0x1C, 0x3D, 0x4B, 0x75, 0x56, 0xE8, 0x9D, 0x8D, 0x9C, 0xB2, 0x25, 0xC4, 0xB3, 0x33, 0x39 },
 | |
| +	{ 0x6F, 0xC4, 0x98, 0x8B, 0x8F, 0x78, 0x54, 0x6B, 0x16, 0x88, 0x99, 0x18, 0x45, 0x90, 0x8F, 0x13, 0x4B, 0x6A, 0x48, 0x2E, 0x69, 0x94, 0xB3, 0xD4, 0x83, 0x17, 0xBF, 0x08, 0xDB, 0x29, 0x21, 0x85 },
 | |
| +	{ 0x56, 0x65, 0xBE, 0xB8, 0xB0, 0x95, 0x55, 0x25, 0x81, 0x3B, 0x59, 0x81, 0xCD, 0x14, 0x2E, 0xD4, 0xD0, 0x3F, 0xBA, 0x38, 0xA6, 0xF3, 0xE5, 0xAD, 0x26, 0x8E, 0x0C, 0xC2, 0x70, 0xD1, 0xCD, 0x11 },
 | |
| +	{ 0xB8, 0x83, 0xD6, 0x8F, 0x5F, 0xE5, 0x19, 0x36, 0x43, 0x1B, 0xA4, 0x25, 0x67, 0x38, 0x05, 0x3B, 0x1D, 0x04, 0x26, 0xD4, 0xCB, 0x64, 0xB1, 0x6E, 0x83, 0xBA, 0xDC, 0x5E, 0x9F, 0xBE, 0x3B, 0x81 },
 | |
| +	{ 0x53, 0xE7, 0xB2, 0x7E, 0xA5, 0x9C, 0x2F, 0x6D, 0xBB, 0x50, 0x76, 0x9E, 0x43, 0x55, 0x4D, 0xF3, 0x5A, 0xF8, 0x9F, 0x48, 0x22, 0xD0, 0x46, 0x6B, 0x00, 0x7D, 0xD6, 0xF6, 0xDE, 0xAF, 0xFF, 0x02 },
 | |
| +	{ 0x1F, 0x1A, 0x02, 0x29, 0xD4, 0x64, 0x0F, 0x01, 0x90, 0x15, 0x88, 0xD9, 0xDE, 0xC2, 0x2D, 0x13, 0xFC, 0x3E, 0xB3, 0x4A, 0x61, 0xB3, 0x29, 0x38, 0xEF, 0xBF, 0x53, 0x34, 0xB2, 0x80, 0x0A, 0xFA },
 | |
| +	{ 0xC2, 0xB4, 0x05, 0xAF, 0xA0, 0xFA, 0x66, 0x68, 0x85, 0x2A, 0xEE, 0x4D, 0x88, 0x04, 0x08, 0x53, 0xFA, 0xB8, 0x00, 0xE7, 0x2B, 0x57, 0x58, 0x14, 0x18, 0xE5, 0x50, 0x6F, 0x21, 0x4C, 0x7D, 0x1F },
 | |
| +	{ 0xC0, 0x8A, 0xA1, 0xC2, 0x86, 0xD7, 0x09, 0xFD, 0xC7, 0x47, 0x37, 0x44, 0x97, 0x71, 0x88, 0xC8, 0x95, 0xBA, 0x01, 0x10, 0x14, 0x24, 0x7E, 0x4E, 0xFA, 0x8D, 0x07, 0xE7, 0x8F, 0xEC, 0x69, 0x5C },
 | |
| +	{ 0xF0, 0x3F, 0x57, 0x89, 0xD3, 0x33, 0x6B, 0x80, 0xD0, 0x02, 0xD5, 0x9F, 0xDF, 0x91, 0x8B, 0xDB, 0x77, 0x5B, 0x00, 0x95, 0x6E, 0xD5, 0x52, 0x8E, 0x86, 0xAA, 0x99, 0x4A, 0xCB, 0x38, 0xFE, 0x2D }
 | |
| +};
 | |
| +
 | |
| +static const u8 blake2s_keyed_testvecs[][BLAKE2S_OUTBYTES] __initconst = {
 | |
| +	{ 0x48, 0xA8, 0x99, 0x7D, 0xA4, 0x07, 0x87, 0x6B, 0x3D, 0x79, 0xC0, 0xD9, 0x23, 0x25, 0xAD, 0x3B, 0x89, 0xCB, 0xB7, 0x54, 0xD8, 0x6A, 0xB7, 0x1A, 0xEE, 0x04, 0x7A, 0xD3, 0x45, 0xFD, 0x2C, 0x49 },
 | |
| +	{ 0x40, 0xD1, 0x5F, 0xEE, 0x7C, 0x32, 0x88, 0x30, 0x16, 0x6A, 0xC3, 0xF9, 0x18, 0x65, 0x0F, 0x80, 0x7E, 0x7E, 0x01, 0xE1, 0x77, 0x25, 0x8C, 0xDC, 0x0A, 0x39, 0xB1, 0x1F, 0x59, 0x80, 0x66, 0xF1 },
 | |
| +	{ 0x6B, 0xB7, 0x13, 0x00, 0x64, 0x4C, 0xD3, 0x99, 0x1B, 0x26, 0xCC, 0xD4, 0xD2, 0x74, 0xAC, 0xD1, 0xAD, 0xEA, 0xB8, 0xB1, 0xD7, 0x91, 0x45, 0x46, 0xC1, 0x19, 0x8B, 0xBE, 0x9F, 0xC9, 0xD8, 0x03 },
 | |
| +	{ 0x1D, 0x22, 0x0D, 0xBE, 0x2E, 0xE1, 0x34, 0x66, 0x1F, 0xDF, 0x6D, 0x9E, 0x74, 0xB4, 0x17, 0x04, 0x71, 0x05, 0x56, 0xF2, 0xF6, 0xE5, 0xA0, 0x91, 0xB2, 0x27, 0x69, 0x74, 0x45, 0xDB, 0xEA, 0x6B },
 | |
| +	{ 0xF6, 0xC3, 0xFB, 0xAD, 0xB4, 0xCC, 0x68, 0x7A, 0x00, 0x64, 0xA5, 0xBE, 0x6E, 0x79, 0x1B, 0xEC, 0x63, 0xB8, 0x68, 0xAD, 0x62, 0xFB, 0xA6, 0x1B, 0x37, 0x57, 0xEF, 0x9C, 0xA5, 0x2E, 0x05, 0xB2 },
 | |
| +	{ 0x49, 0xC1, 0xF2, 0x11, 0x88, 0xDF, 0xD7, 0x69, 0xAE, 0xA0, 0xE9, 0x11, 0xDD, 0x6B, 0x41, 0xF1, 0x4D, 0xAB, 0x10, 0x9D, 0x2B, 0x85, 0x97, 0x7A, 0xA3, 0x08, 0x8B, 0x5C, 0x70, 0x7E, 0x85, 0x98 },
 | |
| +	{ 0xFD, 0xD8, 0x99, 0x3D, 0xCD, 0x43, 0xF6, 0x96, 0xD4, 0x4F, 0x3C, 0xEA, 0x0F, 0xF3, 0x53, 0x45, 0x23, 0x4E, 0xC8, 0xEE, 0x08, 0x3E, 0xB3, 0xCA, 0xDA, 0x01, 0x7C, 0x7F, 0x78, 0xC1, 0x71, 0x43 },
 | |
| +	{ 0xE6, 0xC8, 0x12, 0x56, 0x37, 0x43, 0x8D, 0x09, 0x05, 0xB7, 0x49, 0xF4, 0x65, 0x60, 0xAC, 0x89, 0xFD, 0x47, 0x1C, 0xF8, 0x69, 0x2E, 0x28, 0xFA, 0xB9, 0x82, 0xF7, 0x3F, 0x01, 0x9B, 0x83, 0xA9 },
 | |
| +	{ 0x19, 0xFC, 0x8C, 0xA6, 0x97, 0x9D, 0x60, 0xE6, 0xED, 0xD3, 0xB4, 0x54, 0x1E, 0x2F, 0x96, 0x7C, 0xED, 0x74, 0x0D, 0xF6, 0xEC, 0x1E, 0xAE, 0xBB, 0xFE, 0x81, 0x38, 0x32, 0xE9, 0x6B, 0x29, 0x74 },
 | |
| +	{ 0xA6, 0xAD, 0x77, 0x7C, 0xE8, 0x81, 0xB5, 0x2B, 0xB5, 0xA4, 0x42, 0x1A, 0xB6, 0xCD, 0xD2, 0xDF, 0xBA, 0x13, 0xE9, 0x63, 0x65, 0x2D, 0x4D, 0x6D, 0x12, 0x2A, 0xEE, 0x46, 0x54, 0x8C, 0x14, 0xA7 },
 | |
| +	{ 0xF5, 0xC4, 0xB2, 0xBA, 0x1A, 0x00, 0x78, 0x1B, 0x13, 0xAB, 0xA0, 0x42, 0x52, 0x42, 0xC6, 0x9C, 0xB1, 0x55, 0x2F, 0x3F, 0x71, 0xA9, 0xA3, 0xBB, 0x22, 0xB4, 0xA6, 0xB4, 0x27, 0x7B, 0x46, 0xDD },
 | |
| +	{ 0xE3, 0x3C, 0x4C, 0x9B, 0xD0, 0xCC, 0x7E, 0x45, 0xC8, 0x0E, 0x65, 0xC7, 0x7F, 0xA5, 0x99, 0x7F, 0xEC, 0x70, 0x02, 0x73, 0x85, 0x41, 0x50, 0x9E, 0x68, 0xA9, 0x42, 0x38, 0x91, 0xE8, 0x22, 0xA3 },
 | |
| +	{ 0xFB, 0xA1, 0x61, 0x69, 0xB2, 0xC3, 0xEE, 0x10, 0x5B, 0xE6, 0xE1, 0xE6, 0x50, 0xE5, 0xCB, 0xF4, 0x07, 0x46, 0xB6, 0x75, 0x3D, 0x03, 0x6A, 0xB5, 0x51, 0x79, 0x01, 0x4A, 0xD7, 0xEF, 0x66, 0x51 },
 | |
| +	{ 0xF5, 0xC4, 0xBE, 0xC6, 0xD6, 0x2F, 0xC6, 0x08, 0xBF, 0x41, 0xCC, 0x11, 0x5F, 0x16, 0xD6, 0x1C, 0x7E, 0xFD, 0x3F, 0xF6, 0xC6, 0x56, 0x92, 0xBB, 0xE0, 0xAF, 0xFF, 0xB1, 0xFE, 0xDE, 0x74, 0x75 },
 | |
| +	{ 0xA4, 0x86, 0x2E, 0x76, 0xDB, 0x84, 0x7F, 0x05, 0xBA, 0x17, 0xED, 0xE5, 0xDA, 0x4E, 0x7F, 0x91, 0xB5, 0x92, 0x5C, 0xF1, 0xAD, 0x4B, 0xA1, 0x27, 0x32, 0xC3, 0x99, 0x57, 0x42, 0xA5, 0xCD, 0x6E },
 | |
| +	{ 0x65, 0xF4, 0xB8, 0x60, 0xCD, 0x15, 0xB3, 0x8E, 0xF8, 0x14, 0xA1, 0xA8, 0x04, 0x31, 0x4A, 0x55, 0xBE, 0x95, 0x3C, 0xAA, 0x65, 0xFD, 0x75, 0x8A, 0xD9, 0x89, 0xFF, 0x34, 0xA4, 0x1C, 0x1E, 0xEA },
 | |
| +	{ 0x19, 0xBA, 0x23, 0x4F, 0x0A, 0x4F, 0x38, 0x63, 0x7D, 0x18, 0x39, 0xF9, 0xD9, 0xF7, 0x6A, 0xD9, 0x1C, 0x85, 0x22, 0x30, 0x71, 0x43, 0xC9, 0x7D, 0x5F, 0x93, 0xF6, 0x92, 0x74, 0xCE, 0xC9, 0xA7 },
 | |
| +	{ 0x1A, 0x67, 0x18, 0x6C, 0xA4, 0xA5, 0xCB, 0x8E, 0x65, 0xFC, 0xA0, 0xE2, 0xEC, 0xBC, 0x5D, 0xDC, 0x14, 0xAE, 0x38, 0x1B, 0xB8, 0xBF, 0xFE, 0xB9, 0xE0, 0xA1, 0x03, 0x44, 0x9E, 0x3E, 0xF0, 0x3C },
 | |
| +	{ 0xAF, 0xBE, 0xA3, 0x17, 0xB5, 0xA2, 0xE8, 0x9C, 0x0B, 0xD9, 0x0C, 0xCF, 0x5D, 0x7F, 0xD0, 0xED, 0x57, 0xFE, 0x58, 0x5E, 0x4B, 0xE3, 0x27, 0x1B, 0x0A, 0x6B, 0xF0, 0xF5, 0x78, 0x6B, 0x0F, 0x26 },
 | |
| +	{ 0xF1, 0xB0, 0x15, 0x58, 0xCE, 0x54, 0x12, 0x62, 0xF5, 0xEC, 0x34, 0x29, 0x9D, 0x6F, 0xB4, 0x09, 0x00, 0x09, 0xE3, 0x43, 0x4B, 0xE2, 0xF4, 0x91, 0x05, 0xCF, 0x46, 0xAF, 0x4D, 0x2D, 0x41, 0x24 },
 | |
| +	{ 0x13, 0xA0, 0xA0, 0xC8, 0x63, 0x35, 0x63, 0x5E, 0xAA, 0x74, 0xCA, 0x2D, 0x5D, 0x48, 0x8C, 0x79, 0x7B, 0xBB, 0x4F, 0x47, 0xDC, 0x07, 0x10, 0x50, 0x15, 0xED, 0x6A, 0x1F, 0x33, 0x09, 0xEF, 0xCE },
 | |
| +	{ 0x15, 0x80, 0xAF, 0xEE, 0xBE, 0xBB, 0x34, 0x6F, 0x94, 0xD5, 0x9F, 0xE6, 0x2D, 0xA0, 0xB7, 0x92, 0x37, 0xEA, 0xD7, 0xB1, 0x49, 0x1F, 0x56, 0x67, 0xA9, 0x0E, 0x45, 0xED, 0xF6, 0xCA, 0x8B, 0x03 },
 | |
| +	{ 0x20, 0xBE, 0x1A, 0x87, 0x5B, 0x38, 0xC5, 0x73, 0xDD, 0x7F, 0xAA, 0xA0, 0xDE, 0x48, 0x9D, 0x65, 0x5C, 0x11, 0xEF, 0xB6, 0xA5, 0x52, 0x69, 0x8E, 0x07, 0xA2, 0xD3, 0x31, 0xB5, 0xF6, 0x55, 0xC3 },
 | |
| +	{ 0xBE, 0x1F, 0xE3, 0xC4, 0xC0, 0x40, 0x18, 0xC5, 0x4C, 0x4A, 0x0F, 0x6B, 0x9A, 0x2E, 0xD3, 0xC5, 0x3A, 0xBE, 0x3A, 0x9F, 0x76, 0xB4, 0xD2, 0x6D, 0xE5, 0x6F, 0xC9, 0xAE, 0x95, 0x05, 0x9A, 0x99 },
 | |
| +	{ 0xE3, 0xE3, 0xAC, 0xE5, 0x37, 0xEB, 0x3E, 0xDD, 0x84, 0x63, 0xD9, 0xAD, 0x35, 0x82, 0xE1, 0x3C, 0xF8, 0x65, 0x33, 0xFF, 0xDE, 0x43, 0xD6, 0x68, 0xDD, 0x2E, 0x93, 0xBB, 0xDB, 0xD7, 0x19, 0x5A },
 | |
| +	{ 0x11, 0x0C, 0x50, 0xC0, 0xBF, 0x2C, 0x6E, 0x7A, 0xEB, 0x7E, 0x43, 0x5D, 0x92, 0xD1, 0x32, 0xAB, 0x66, 0x55, 0x16, 0x8E, 0x78, 0xA2, 0xDE, 0xCD, 0xEC, 0x33, 0x30, 0x77, 0x76, 0x84, 0xD9, 0xC1 },
 | |
| +	{ 0xE9, 0xBA, 0x8F, 0x50, 0x5C, 0x9C, 0x80, 0xC0, 0x86, 0x66, 0xA7, 0x01, 0xF3, 0x36, 0x7E, 0x6C, 0xC6, 0x65, 0xF3, 0x4B, 0x22, 0xE7, 0x3C, 0x3C, 0x04, 0x17, 0xEB, 0x1C, 0x22, 0x06, 0x08, 0x2F },
 | |
| +	{ 0x26, 0xCD, 0x66, 0xFC, 0xA0, 0x23, 0x79, 0xC7, 0x6D, 0xF1, 0x23, 0x17, 0x05, 0x2B, 0xCA, 0xFD, 0x6C, 0xD8, 0xC3, 0xA7, 0xB8, 0x90, 0xD8, 0x05, 0xF3, 0x6C, 0x49, 0x98, 0x97, 0x82, 0x43, 0x3A },
 | |
| +	{ 0x21, 0x3F, 0x35, 0x96, 0xD6, 0xE3, 0xA5, 0xD0, 0xE9, 0x93, 0x2C, 0xD2, 0x15, 0x91, 0x46, 0x01, 0x5E, 0x2A, 0xBC, 0x94, 0x9F, 0x47, 0x29, 0xEE, 0x26, 0x32, 0xFE, 0x1E, 0xDB, 0x78, 0xD3, 0x37 },
 | |
| +	{ 0x10, 0x15, 0xD7, 0x01, 0x08, 0xE0, 0x3B, 0xE1, 0xC7, 0x02, 0xFE, 0x97, 0x25, 0x36, 0x07, 0xD1, 0x4A, 0xEE, 0x59, 0x1F, 0x24, 0x13, 0xEA, 0x67, 0x87, 0x42, 0x7B, 0x64, 0x59, 0xFF, 0x21, 0x9A },
 | |
| +	{ 0x3C, 0xA9, 0x89, 0xDE, 0x10, 0xCF, 0xE6, 0x09, 0x90, 0x94, 0x72, 0xC8, 0xD3, 0x56, 0x10, 0x80, 0x5B, 0x2F, 0x97, 0x77, 0x34, 0xCF, 0x65, 0x2C, 0xC6, 0x4B, 0x3B, 0xFC, 0x88, 0x2D, 0x5D, 0x89 },
 | |
| +	{ 0xB6, 0x15, 0x6F, 0x72, 0xD3, 0x80, 0xEE, 0x9E, 0xA6, 0xAC, 0xD1, 0x90, 0x46, 0x4F, 0x23, 0x07, 0xA5, 0xC1, 0x79, 0xEF, 0x01, 0xFD, 0x71, 0xF9, 0x9F, 0x2D, 0x0F, 0x7A, 0x57, 0x36, 0x0A, 0xEA },
 | |
| +	{ 0xC0, 0x3B, 0xC6, 0x42, 0xB2, 0x09, 0x59, 0xCB, 0xE1, 0x33, 0xA0, 0x30, 0x3E, 0x0C, 0x1A, 0xBF, 0xF3, 0xE3, 0x1E, 0xC8, 0xE1, 0xA3, 0x28, 0xEC, 0x85, 0x65, 0xC3, 0x6D, 0xEC, 0xFF, 0x52, 0x65 },
 | |
| +	{ 0x2C, 0x3E, 0x08, 0x17, 0x6F, 0x76, 0x0C, 0x62, 0x64, 0xC3, 0xA2, 0xCD, 0x66, 0xFE, 0xC6, 0xC3, 0xD7, 0x8D, 0xE4, 0x3F, 0xC1, 0x92, 0x45, 0x7B, 0x2A, 0x4A, 0x66, 0x0A, 0x1E, 0x0E, 0xB2, 0x2B },
 | |
| +	{ 0xF7, 0x38, 0xC0, 0x2F, 0x3C, 0x1B, 0x19, 0x0C, 0x51, 0x2B, 0x1A, 0x32, 0xDE, 0xAB, 0xF3, 0x53, 0x72, 0x8E, 0x0E, 0x9A, 0xB0, 0x34, 0x49, 0x0E, 0x3C, 0x34, 0x09, 0x94, 0x6A, 0x97, 0xAE, 0xEC },
 | |
| +	{ 0x8B, 0x18, 0x80, 0xDF, 0x30, 0x1C, 0xC9, 0x63, 0x41, 0x88, 0x11, 0x08, 0x89, 0x64, 0x83, 0x92, 0x87, 0xFF, 0x7F, 0xE3, 0x1C, 0x49, 0xEA, 0x6E, 0xBD, 0x9E, 0x48, 0xBD, 0xEE, 0xE4, 0x97, 0xC5 },
 | |
| +	{ 0x1E, 0x75, 0xCB, 0x21, 0xC6, 0x09, 0x89, 0x02, 0x03, 0x75, 0xF1, 0xA7, 0xA2, 0x42, 0x83, 0x9F, 0x0B, 0x0B, 0x68, 0x97, 0x3A, 0x4C, 0x2A, 0x05, 0xCF, 0x75, 0x55, 0xED, 0x5A, 0xAE, 0xC4, 0xC1 },
 | |
| +	{ 0x62, 0xBF, 0x8A, 0x9C, 0x32, 0xA5, 0xBC, 0xCF, 0x29, 0x0B, 0x6C, 0x47, 0x4D, 0x75, 0xB2, 0xA2, 0xA4, 0x09, 0x3F, 0x1A, 0x9E, 0x27, 0x13, 0x94, 0x33, 0xA8, 0xF2, 0xB3, 0xBC, 0xE7, 0xB8, 0xD7 },
 | |
| +	{ 0x16, 0x6C, 0x83, 0x50, 0xD3, 0x17, 0x3B, 0x5E, 0x70, 0x2B, 0x78, 0x3D, 0xFD, 0x33, 0xC6, 0x6E, 0xE0, 0x43, 0x27, 0x42, 0xE9, 0xB9, 0x2B, 0x99, 0x7F, 0xD2, 0x3C, 0x60, 0xDC, 0x67, 0x56, 0xCA },
 | |
| +	{ 0x04, 0x4A, 0x14, 0xD8, 0x22, 0xA9, 0x0C, 0xAC, 0xF2, 0xF5, 0xA1, 0x01, 0x42, 0x8A, 0xDC, 0x8F, 0x41, 0x09, 0x38, 0x6C, 0xCB, 0x15, 0x8B, 0xF9, 0x05, 0xC8, 0x61, 0x8B, 0x8E, 0xE2, 0x4E, 0xC3 },
 | |
| +	{ 0x38, 0x7D, 0x39, 0x7E, 0xA4, 0x3A, 0x99, 0x4B, 0xE8, 0x4D, 0x2D, 0x54, 0x4A, 0xFB, 0xE4, 0x81, 0xA2, 0x00, 0x0F, 0x55, 0x25, 0x26, 0x96, 0xBB, 0xA2, 0xC5, 0x0C, 0x8E, 0xBD, 0x10, 0x13, 0x47 },
 | |
| +	{ 0x56, 0xF8, 0xCC, 0xF1, 0xF8, 0x64, 0x09, 0xB4, 0x6C, 0xE3, 0x61, 0x66, 0xAE, 0x91, 0x65, 0x13, 0x84, 0x41, 0x57, 0x75, 0x89, 0xDB, 0x08, 0xCB, 0xC5, 0xF6, 0x6C, 0xA2, 0x97, 0x43, 0xB9, 0xFD },
 | |
| +	{ 0x97, 0x06, 0xC0, 0x92, 0xB0, 0x4D, 0x91, 0xF5, 0x3D, 0xFF, 0x91, 0xFA, 0x37, 0xB7, 0x49, 0x3D, 0x28, 0xB5, 0x76, 0xB5, 0xD7, 0x10, 0x46, 0x9D, 0xF7, 0x94, 0x01, 0x66, 0x22, 0x36, 0xFC, 0x03 },
 | |
| +	{ 0x87, 0x79, 0x68, 0x68, 0x6C, 0x06, 0x8C, 0xE2, 0xF7, 0xE2, 0xAD, 0xCF, 0xF6, 0x8B, 0xF8, 0x74, 0x8E, 0xDF, 0x3C, 0xF8, 0x62, 0xCF, 0xB4, 0xD3, 0x94, 0x7A, 0x31, 0x06, 0x95, 0x80, 0x54, 0xE3 },
 | |
| +	{ 0x88, 0x17, 0xE5, 0x71, 0x98, 0x79, 0xAC, 0xF7, 0x02, 0x47, 0x87, 0xEC, 0xCD, 0xB2, 0x71, 0x03, 0x55, 0x66, 0xCF, 0xA3, 0x33, 0xE0, 0x49, 0x40, 0x7C, 0x01, 0x78, 0xCC, 0xC5, 0x7A, 0x5B, 0x9F },
 | |
| +	{ 0x89, 0x38, 0x24, 0x9E, 0x4B, 0x50, 0xCA, 0xDA, 0xCC, 0xDF, 0x5B, 0x18, 0x62, 0x13, 0x26, 0xCB, 0xB1, 0x52, 0x53, 0xE3, 0x3A, 0x20, 0xF5, 0x63, 0x6E, 0x99, 0x5D, 0x72, 0x47, 0x8D, 0xE4, 0x72 },
 | |
| +	{ 0xF1, 0x64, 0xAB, 0xBA, 0x49, 0x63, 0xA4, 0x4D, 0x10, 0x72, 0x57, 0xE3, 0x23, 0x2D, 0x90, 0xAC, 0xA5, 0xE6, 0x6A, 0x14, 0x08, 0x24, 0x8C, 0x51, 0x74, 0x1E, 0x99, 0x1D, 0xB5, 0x22, 0x77, 0x56 },
 | |
| +	{ 0xD0, 0x55, 0x63, 0xE2, 0xB1, 0xCB, 0xA0, 0xC4, 0xA2, 0xA1, 0xE8, 0xBD, 0xE3, 0xA1, 0xA0, 0xD9, 0xF5, 0xB4, 0x0C, 0x85, 0xA0, 0x70, 0xD6, 0xF5, 0xFB, 0x21, 0x06, 0x6E, 0xAD, 0x5D, 0x06, 0x01 },
 | |
| +	{ 0x03, 0xFB, 0xB1, 0x63, 0x84, 0xF0, 0xA3, 0x86, 0x6F, 0x4C, 0x31, 0x17, 0x87, 0x76, 0x66, 0xEF, 0xBF, 0x12, 0x45, 0x97, 0x56, 0x4B, 0x29, 0x3D, 0x4A, 0xAB, 0x0D, 0x26, 0x9F, 0xAB, 0xDD, 0xFA },
 | |
| +	{ 0x5F, 0xA8, 0x48, 0x6A, 0xC0, 0xE5, 0x29, 0x64, 0xD1, 0x88, 0x1B, 0xBE, 0x33, 0x8E, 0xB5, 0x4B, 0xE2, 0xF7, 0x19, 0x54, 0x92, 0x24, 0x89, 0x20, 0x57, 0xB4, 0xDA, 0x04, 0xBA, 0x8B, 0x34, 0x75 },
 | |
| +	{ 0xCD, 0xFA, 0xBC, 0xEE, 0x46, 0x91, 0x11, 0x11, 0x23, 0x6A, 0x31, 0x70, 0x8B, 0x25, 0x39, 0xD7, 0x1F, 0xC2, 0x11, 0xD9, 0xB0, 0x9C, 0x0D, 0x85, 0x30, 0xA1, 0x1E, 0x1D, 0xBF, 0x6E, 0xED, 0x01 },
 | |
| +	{ 0x4F, 0x82, 0xDE, 0x03, 0xB9, 0x50, 0x47, 0x93, 0xB8, 0x2A, 0x07, 0xA0, 0xBD, 0xCD, 0xFF, 0x31, 0x4D, 0x75, 0x9E, 0x7B, 0x62, 0xD2, 0x6B, 0x78, 0x49, 0x46, 0xB0, 0xD3, 0x6F, 0x91, 0x6F, 0x52 },
 | |
| +	{ 0x25, 0x9E, 0xC7, 0xF1, 0x73, 0xBC, 0xC7, 0x6A, 0x09, 0x94, 0xC9, 0x67, 0xB4, 0xF5, 0xF0, 0x24, 0xC5, 0x60, 0x57, 0xFB, 0x79, 0xC9, 0x65, 0xC4, 0xFA, 0xE4, 0x18, 0x75, 0xF0, 0x6A, 0x0E, 0x4C },
 | |
| +	{ 0x19, 0x3C, 0xC8, 0xE7, 0xC3, 0xE0, 0x8B, 0xB3, 0x0F, 0x54, 0x37, 0xAA, 0x27, 0xAD, 0xE1, 0xF1, 0x42, 0x36, 0x9B, 0x24, 0x6A, 0x67, 0x5B, 0x23, 0x83, 0xE6, 0xDA, 0x9B, 0x49, 0xA9, 0x80, 0x9E },
 | |
| +	{ 0x5C, 0x10, 0x89, 0x6F, 0x0E, 0x28, 0x56, 0xB2, 0xA2, 0xEE, 0xE0, 0xFE, 0x4A, 0x2C, 0x16, 0x33, 0x56, 0x5D, 0x18, 0xF0, 0xE9, 0x3E, 0x1F, 0xAB, 0x26, 0xC3, 0x73, 0xE8, 0xF8, 0x29, 0x65, 0x4D },
 | |
| +	{ 0xF1, 0x60, 0x12, 0xD9, 0x3F, 0x28, 0x85, 0x1A, 0x1E, 0xB9, 0x89, 0xF5, 0xD0, 0xB4, 0x3F, 0x3F, 0x39, 0xCA, 0x73, 0xC9, 0xA6, 0x2D, 0x51, 0x81, 0xBF, 0xF2, 0x37, 0x53, 0x6B, 0xD3, 0x48, 0xC3 },
 | |
| +	{ 0x29, 0x66, 0xB3, 0xCF, 0xAE, 0x1E, 0x44, 0xEA, 0x99, 0x6D, 0xC5, 0xD6, 0x86, 0xCF, 0x25, 0xFA, 0x05, 0x3F, 0xB6, 0xF6, 0x72, 0x01, 0xB9, 0xE4, 0x6E, 0xAD, 0xE8, 0x5D, 0x0A, 0xD6, 0xB8, 0x06 },
 | |
| +	{ 0xDD, 0xB8, 0x78, 0x24, 0x85, 0xE9, 0x00, 0xBC, 0x60, 0xBC, 0xF4, 0xC3, 0x3A, 0x6F, 0xD5, 0x85, 0x68, 0x0C, 0xC6, 0x83, 0xD5, 0x16, 0xEF, 0xA0, 0x3E, 0xB9, 0x98, 0x5F, 0xAD, 0x87, 0x15, 0xFB },
 | |
| +	{ 0x4C, 0x4D, 0x6E, 0x71, 0xAE, 0xA0, 0x57, 0x86, 0x41, 0x31, 0x48, 0xFC, 0x7A, 0x78, 0x6B, 0x0E, 0xCA, 0xF5, 0x82, 0xCF, 0xF1, 0x20, 0x9F, 0x5A, 0x80, 0x9F, 0xBA, 0x85, 0x04, 0xCE, 0x66, 0x2C },
 | |
| +	{ 0xFB, 0x4C, 0x5E, 0x86, 0xD7, 0xB2, 0x22, 0x9B, 0x99, 0xB8, 0xBA, 0x6D, 0x94, 0xC2, 0x47, 0xEF, 0x96, 0x4A, 0xA3, 0xA2, 0xBA, 0xE8, 0xED, 0xC7, 0x75, 0x69, 0xF2, 0x8D, 0xBB, 0xFF, 0x2D, 0x4E },
 | |
| +	{ 0xE9, 0x4F, 0x52, 0x6D, 0xE9, 0x01, 0x96, 0x33, 0xEC, 0xD5, 0x4A, 0xC6, 0x12, 0x0F, 0x23, 0x95, 0x8D, 0x77, 0x18, 0xF1, 0xE7, 0x71, 0x7B, 0xF3, 0x29, 0x21, 0x1A, 0x4F, 0xAE, 0xED, 0x4E, 0x6D },
 | |
| +	{ 0xCB, 0xD6, 0x66, 0x0A, 0x10, 0xDB, 0x3F, 0x23, 0xF7, 0xA0, 0x3D, 0x4B, 0x9D, 0x40, 0x44, 0xC7, 0x93, 0x2B, 0x28, 0x01, 0xAC, 0x89, 0xD6, 0x0B, 0xC9, 0xEB, 0x92, 0xD6, 0x5A, 0x46, 0xC2, 0xA0 },
 | |
| +	{ 0x88, 0x18, 0xBB, 0xD3, 0xDB, 0x4D, 0xC1, 0x23, 0xB2, 0x5C, 0xBB, 0xA5, 0xF5, 0x4C, 0x2B, 0xC4, 0xB3, 0xFC, 0xF9, 0xBF, 0x7D, 0x7A, 0x77, 0x09, 0xF4, 0xAE, 0x58, 0x8B, 0x26, 0x7C, 0x4E, 0xCE },
 | |
| +	{ 0xC6, 0x53, 0x82, 0x51, 0x3F, 0x07, 0x46, 0x0D, 0xA3, 0x98, 0x33, 0xCB, 0x66, 0x6C, 0x5E, 0xD8, 0x2E, 0x61, 0xB9, 0xE9, 0x98, 0xF4, 0xB0, 0xC4, 0x28, 0x7C, 0xEE, 0x56, 0xC3, 0xCC, 0x9B, 0xCD },
 | |
| +	{ 0x89, 0x75, 0xB0, 0x57, 0x7F, 0xD3, 0x55, 0x66, 0xD7, 0x50, 0xB3, 0x62, 0xB0, 0x89, 0x7A, 0x26, 0xC3, 0x99, 0x13, 0x6D, 0xF0, 0x7B, 0xAB, 0xAB, 0xBD, 0xE6, 0x20, 0x3F, 0xF2, 0x95, 0x4E, 0xD4 },
 | |
| +	{ 0x21, 0xFE, 0x0C, 0xEB, 0x00, 0x52, 0xBE, 0x7F, 0xB0, 0xF0, 0x04, 0x18, 0x7C, 0xAC, 0xD7, 0xDE, 0x67, 0xFA, 0x6E, 0xB0, 0x93, 0x8D, 0x92, 0x76, 0x77, 0xF2, 0x39, 0x8C, 0x13, 0x23, 0x17, 0xA8 },
 | |
| +	{ 0x2E, 0xF7, 0x3F, 0x3C, 0x26, 0xF1, 0x2D, 0x93, 0x88, 0x9F, 0x3C, 0x78, 0xB6, 0xA6, 0x6C, 0x1D, 0x52, 0xB6, 0x49, 0xDC, 0x9E, 0x85, 0x6E, 0x2C, 0x17, 0x2E, 0xA7, 0xC5, 0x8A, 0xC2, 0xB5, 0xE3 },
 | |
| +	{ 0x38, 0x8A, 0x3C, 0xD5, 0x6D, 0x73, 0x86, 0x7A, 0xBB, 0x5F, 0x84, 0x01, 0x49, 0x2B, 0x6E, 0x26, 0x81, 0xEB, 0x69, 0x85, 0x1E, 0x76, 0x7F, 0xD8, 0x42, 0x10, 0xA5, 0x60, 0x76, 0xFB, 0x3D, 0xD3 },
 | |
| +	{ 0xAF, 0x53, 0x3E, 0x02, 0x2F, 0xC9, 0x43, 0x9E, 0x4E, 0x3C, 0xB8, 0x38, 0xEC, 0xD1, 0x86, 0x92, 0x23, 0x2A, 0xDF, 0x6F, 0xE9, 0x83, 0x95, 0x26, 0xD3, 0xC3, 0xDD, 0x1B, 0x71, 0x91, 0x0B, 0x1A },
 | |
| +	{ 0x75, 0x1C, 0x09, 0xD4, 0x1A, 0x93, 0x43, 0x88, 0x2A, 0x81, 0xCD, 0x13, 0xEE, 0x40, 0x81, 0x8D, 0x12, 0xEB, 0x44, 0xC6, 0xC7, 0xF4, 0x0D, 0xF1, 0x6E, 0x4A, 0xEA, 0x8F, 0xAB, 0x91, 0x97, 0x2A },
 | |
| +	{ 0x5B, 0x73, 0xDD, 0xB6, 0x8D, 0x9D, 0x2B, 0x0A, 0xA2, 0x65, 0xA0, 0x79, 0x88, 0xD6, 0xB8, 0x8A, 0xE9, 0xAA, 0xC5, 0x82, 0xAF, 0x83, 0x03, 0x2F, 0x8A, 0x9B, 0x21, 0xA2, 0xE1, 0xB7, 0xBF, 0x18 },
 | |
| +	{ 0x3D, 0xA2, 0x91, 0x26, 0xC7, 0xC5, 0xD7, 0xF4, 0x3E, 0x64, 0x24, 0x2A, 0x79, 0xFE, 0xAA, 0x4E, 0xF3, 0x45, 0x9C, 0xDE, 0xCC, 0xC8, 0x98, 0xED, 0x59, 0xA9, 0x7F, 0x6E, 0xC9, 0x3B, 0x9D, 0xAB },
 | |
| +	{ 0x56, 0x6D, 0xC9, 0x20, 0x29, 0x3D, 0xA5, 0xCB, 0x4F, 0xE0, 0xAA, 0x8A, 0xBD, 0xA8, 0xBB, 0xF5, 0x6F, 0x55, 0x23, 0x13, 0xBF, 0xF1, 0x90, 0x46, 0x64, 0x1E, 0x36, 0x15, 0xC1, 0xE3, 0xED, 0x3F },
 | |
| +	{ 0x41, 0x15, 0xBE, 0xA0, 0x2F, 0x73, 0xF9, 0x7F, 0x62, 0x9E, 0x5C, 0x55, 0x90, 0x72, 0x0C, 0x01, 0xE7, 0xE4, 0x49, 0xAE, 0x2A, 0x66, 0x97, 0xD4, 0xD2, 0x78, 0x33, 0x21, 0x30, 0x36, 0x92, 0xF9 },
 | |
| +	{ 0x4C, 0xE0, 0x8F, 0x47, 0x62, 0x46, 0x8A, 0x76, 0x70, 0x01, 0x21, 0x64, 0x87, 0x8D, 0x68, 0x34, 0x0C, 0x52, 0xA3, 0x5E, 0x66, 0xC1, 0x88, 0x4D, 0x5C, 0x86, 0x48, 0x89, 0xAB, 0xC9, 0x66, 0x77 },
 | |
| +	{ 0x81, 0xEA, 0x0B, 0x78, 0x04, 0x12, 0x4E, 0x0C, 0x22, 0xEA, 0x5F, 0xC7, 0x11, 0x04, 0xA2, 0xAF, 0xCB, 0x52, 0xA1, 0xFA, 0x81, 0x6F, 0x3E, 0xCB, 0x7D, 0xCB, 0x5D, 0x9D, 0xEA, 0x17, 0x86, 0xD0 },
 | |
| +	{ 0xFE, 0x36, 0x27, 0x33, 0xB0, 0x5F, 0x6B, 0xED, 0xAF, 0x93, 0x79, 0xD7, 0xF7, 0x93, 0x6E, 0xDE, 0x20, 0x9B, 0x1F, 0x83, 0x23, 0xC3, 0x92, 0x25, 0x49, 0xD9, 0xE7, 0x36, 0x81, 0xB5, 0xDB, 0x7B },
 | |
| +	{ 0xEF, 0xF3, 0x7D, 0x30, 0xDF, 0xD2, 0x03, 0x59, 0xBE, 0x4E, 0x73, 0xFD, 0xF4, 0x0D, 0x27, 0x73, 0x4B, 0x3D, 0xF9, 0x0A, 0x97, 0xA5, 0x5E, 0xD7, 0x45, 0x29, 0x72, 0x94, 0xCA, 0x85, 0xD0, 0x9F },
 | |
| +	{ 0x17, 0x2F, 0xFC, 0x67, 0x15, 0x3D, 0x12, 0xE0, 0xCA, 0x76, 0xA8, 0xB6, 0xCD, 0x5D, 0x47, 0x31, 0x88, 0x5B, 0x39, 0xCE, 0x0C, 0xAC, 0x93, 0xA8, 0x97, 0x2A, 0x18, 0x00, 0x6C, 0x8B, 0x8B, 0xAF },
 | |
| +	{ 0xC4, 0x79, 0x57, 0xF1, 0xCC, 0x88, 0xE8, 0x3E, 0xF9, 0x44, 0x58, 0x39, 0x70, 0x9A, 0x48, 0x0A, 0x03, 0x6B, 0xED, 0x5F, 0x88, 0xAC, 0x0F, 0xCC, 0x8E, 0x1E, 0x70, 0x3F, 0xFA, 0xAC, 0x13, 0x2C },
 | |
| +	{ 0x30, 0xF3, 0x54, 0x83, 0x70, 0xCF, 0xDC, 0xED, 0xA5, 0xC3, 0x7B, 0x56, 0x9B, 0x61, 0x75, 0xE7, 0x99, 0xEE, 0xF1, 0xA6, 0x2A, 0xAA, 0x94, 0x32, 0x45, 0xAE, 0x76, 0x69, 0xC2, 0x27, 0xA7, 0xB5 },
 | |
| +	{ 0xC9, 0x5D, 0xCB, 0x3C, 0xF1, 0xF2, 0x7D, 0x0E, 0xEF, 0x2F, 0x25, 0xD2, 0x41, 0x38, 0x70, 0x90, 0x4A, 0x87, 0x7C, 0x4A, 0x56, 0xC2, 0xDE, 0x1E, 0x83, 0xE2, 0xBC, 0x2A, 0xE2, 0xE4, 0x68, 0x21 },
 | |
| +	{ 0xD5, 0xD0, 0xB5, 0xD7, 0x05, 0x43, 0x4C, 0xD4, 0x6B, 0x18, 0x57, 0x49, 0xF6, 0x6B, 0xFB, 0x58, 0x36, 0xDC, 0xDF, 0x6E, 0xE5, 0x49, 0xA2, 0xB7, 0xA4, 0xAE, 0xE7, 0xF5, 0x80, 0x07, 0xCA, 0xAF },
 | |
| +	{ 0xBB, 0xC1, 0x24, 0xA7, 0x12, 0xF1, 0x5D, 0x07, 0xC3, 0x00, 0xE0, 0x5B, 0x66, 0x83, 0x89, 0xA4, 0x39, 0xC9, 0x17, 0x77, 0xF7, 0x21, 0xF8, 0x32, 0x0C, 0x1C, 0x90, 0x78, 0x06, 0x6D, 0x2C, 0x7E },
 | |
| +	{ 0xA4, 0x51, 0xB4, 0x8C, 0x35, 0xA6, 0xC7, 0x85, 0x4C, 0xFA, 0xAE, 0x60, 0x26, 0x2E, 0x76, 0x99, 0x08, 0x16, 0x38, 0x2A, 0xC0, 0x66, 0x7E, 0x5A, 0x5C, 0x9E, 0x1B, 0x46, 0xC4, 0x34, 0x2D, 0xDF },
 | |
| +	{ 0xB0, 0xD1, 0x50, 0xFB, 0x55, 0xE7, 0x78, 0xD0, 0x11, 0x47, 0xF0, 0xB5, 0xD8, 0x9D, 0x99, 0xEC, 0xB2, 0x0F, 0xF0, 0x7E, 0x5E, 0x67, 0x60, 0xD6, 0xB6, 0x45, 0xEB, 0x5B, 0x65, 0x4C, 0x62, 0x2B },
 | |
| +	{ 0x34, 0xF7, 0x37, 0xC0, 0xAB, 0x21, 0x99, 0x51, 0xEE, 0xE8, 0x9A, 0x9F, 0x8D, 0xAC, 0x29, 0x9C, 0x9D, 0x4C, 0x38, 0xF3, 0x3F, 0xA4, 0x94, 0xC5, 0xC6, 0xEE, 0xFC, 0x92, 0xB6, 0xDB, 0x08, 0xBC },
 | |
| +	{ 0x1A, 0x62, 0xCC, 0x3A, 0x00, 0x80, 0x0D, 0xCB, 0xD9, 0x98, 0x91, 0x08, 0x0C, 0x1E, 0x09, 0x84, 0x58, 0x19, 0x3A, 0x8C, 0xC9, 0xF9, 0x70, 0xEA, 0x99, 0xFB, 0xEF, 0xF0, 0x03, 0x18, 0xC2, 0x89 },
 | |
| +	{ 0xCF, 0xCE, 0x55, 0xEB, 0xAF, 0xC8, 0x40, 0xD7, 0xAE, 0x48, 0x28, 0x1C, 0x7F, 0xD5, 0x7E, 0xC8, 0xB4, 0x82, 0xD4, 0xB7, 0x04, 0x43, 0x74, 0x95, 0x49, 0x5A, 0xC4, 0x14, 0xCF, 0x4A, 0x37, 0x4B },
 | |
| +	{ 0x67, 0x46, 0xFA, 0xCF, 0x71, 0x14, 0x6D, 0x99, 0x9D, 0xAB, 0xD0, 0x5D, 0x09, 0x3A, 0xE5, 0x86, 0x64, 0x8D, 0x1E, 0xE2, 0x8E, 0x72, 0x61, 0x7B, 0x99, 0xD0, 0xF0, 0x08, 0x6E, 0x1E, 0x45, 0xBF },
 | |
| +	{ 0x57, 0x1C, 0xED, 0x28, 0x3B, 0x3F, 0x23, 0xB4, 0xE7, 0x50, 0xBF, 0x12, 0xA2, 0xCA, 0xF1, 0x78, 0x18, 0x47, 0xBD, 0x89, 0x0E, 0x43, 0x60, 0x3C, 0xDC, 0x59, 0x76, 0x10, 0x2B, 0x7B, 0xB1, 0x1B },
 | |
| +	{ 0xCF, 0xCB, 0x76, 0x5B, 0x04, 0x8E, 0x35, 0x02, 0x2C, 0x5D, 0x08, 0x9D, 0x26, 0xE8, 0x5A, 0x36, 0xB0, 0x05, 0xA2, 0xB8, 0x04, 0x93, 0xD0, 0x3A, 0x14, 0x4E, 0x09, 0xF4, 0x09, 0xB6, 0xAF, 0xD1 },
 | |
| +	{ 0x40, 0x50, 0xC7, 0xA2, 0x77, 0x05, 0xBB, 0x27, 0xF4, 0x20, 0x89, 0xB2, 0x99, 0xF3, 0xCB, 0xE5, 0x05, 0x4E, 0xAD, 0x68, 0x72, 0x7E, 0x8E, 0xF9, 0x31, 0x8C, 0xE6, 0xF2, 0x5C, 0xD6, 0xF3, 0x1D },
 | |
| +	{ 0x18, 0x40, 0x70, 0xBD, 0x5D, 0x26, 0x5F, 0xBD, 0xC1, 0x42, 0xCD, 0x1C, 0x5C, 0xD0, 0xD7, 0xE4, 0x14, 0xE7, 0x03, 0x69, 0xA2, 0x66, 0xD6, 0x27, 0xC8, 0xFB, 0xA8, 0x4F, 0xA5, 0xE8, 0x4C, 0x34 },
 | |
| +	{ 0x9E, 0xDD, 0xA9, 0xA4, 0x44, 0x39, 0x02, 0xA9, 0x58, 0x8C, 0x0D, 0x0C, 0xCC, 0x62, 0xB9, 0x30, 0x21, 0x84, 0x79, 0xA6, 0x84, 0x1E, 0x6F, 0xE7, 0xD4, 0x30, 0x03, 0xF0, 0x4B, 0x1F, 0xD6, 0x43 },
 | |
| +	{ 0xE4, 0x12, 0xFE, 0xEF, 0x79, 0x08, 0x32, 0x4A, 0x6D, 0xA1, 0x84, 0x16, 0x29, 0xF3, 0x5D, 0x3D, 0x35, 0x86, 0x42, 0x01, 0x93, 0x10, 0xEC, 0x57, 0xC6, 0x14, 0x83, 0x6B, 0x63, 0xD3, 0x07, 0x63 },
 | |
| +	{ 0x1A, 0x2B, 0x8E, 0xDF, 0xF3, 0xF9, 0xAC, 0xC1, 0x55, 0x4F, 0xCB, 0xAE, 0x3C, 0xF1, 0xD6, 0x29, 0x8C, 0x64, 0x62, 0xE2, 0x2E, 0x5E, 0xB0, 0x25, 0x96, 0x84, 0xF8, 0x35, 0x01, 0x2B, 0xD1, 0x3F },
 | |
| +	{ 0x28, 0x8C, 0x4A, 0xD9, 0xB9, 0x40, 0x97, 0x62, 0xEA, 0x07, 0xC2, 0x4A, 0x41, 0xF0, 0x4F, 0x69, 0xA7, 0xD7, 0x4B, 0xEE, 0x2D, 0x95, 0x43, 0x53, 0x74, 0xBD, 0xE9, 0x46, 0xD7, 0x24, 0x1C, 0x7B },
 | |
| +	{ 0x80, 0x56, 0x91, 0xBB, 0x28, 0x67, 0x48, 0xCF, 0xB5, 0x91, 0xD3, 0xAE, 0xBE, 0x7E, 0x6F, 0x4E, 0x4D, 0xC6, 0xE2, 0x80, 0x8C, 0x65, 0x14, 0x3C, 0xC0, 0x04, 0xE4, 0xEB, 0x6F, 0xD0, 0x9D, 0x43 },
 | |
| +	{ 0xD4, 0xAC, 0x8D, 0x3A, 0x0A, 0xFC, 0x6C, 0xFA, 0x7B, 0x46, 0x0A, 0xE3, 0x00, 0x1B, 0xAE, 0xB3, 0x6D, 0xAD, 0xB3, 0x7D, 0xA0, 0x7D, 0x2E, 0x8A, 0xC9, 0x18, 0x22, 0xDF, 0x34, 0x8A, 0xED, 0x3D },
 | |
| +	{ 0xC3, 0x76, 0x61, 0x70, 0x14, 0xD2, 0x01, 0x58, 0xBC, 0xED, 0x3D, 0x3B, 0xA5, 0x52, 0xB6, 0xEC, 0xCF, 0x84, 0xE6, 0x2A, 0xA3, 0xEB, 0x65, 0x0E, 0x90, 0x02, 0x9C, 0x84, 0xD1, 0x3E, 0xEA, 0x69 },
 | |
| +	{ 0xC4, 0x1F, 0x09, 0xF4, 0x3C, 0xEC, 0xAE, 0x72, 0x93, 0xD6, 0x00, 0x7C, 0xA0, 0xA3, 0x57, 0x08, 0x7D, 0x5A, 0xE5, 0x9B, 0xE5, 0x00, 0xC1, 0xCD, 0x5B, 0x28, 0x9E, 0xE8, 0x10, 0xC7, 0xB0, 0x82 },
 | |
| +	{ 0x03, 0xD1, 0xCE, 0xD1, 0xFB, 0xA5, 0xC3, 0x91, 0x55, 0xC4, 0x4B, 0x77, 0x65, 0xCB, 0x76, 0x0C, 0x78, 0x70, 0x8D, 0xCF, 0xC8, 0x0B, 0x0B, 0xD8, 0xAD, 0xE3, 0xA5, 0x6D, 0xA8, 0x83, 0x0B, 0x29 },
 | |
| +	{ 0x09, 0xBD, 0xE6, 0xF1, 0x52, 0x21, 0x8D, 0xC9, 0x2C, 0x41, 0xD7, 0xF4, 0x53, 0x87, 0xE6, 0x3E, 0x58, 0x69, 0xD8, 0x07, 0xEC, 0x70, 0xB8, 0x21, 0x40, 0x5D, 0xBD, 0x88, 0x4B, 0x7F, 0xCF, 0x4B },
 | |
| +	{ 0x71, 0xC9, 0x03, 0x6E, 0x18, 0x17, 0x9B, 0x90, 0xB3, 0x7D, 0x39, 0xE9, 0xF0, 0x5E, 0xB8, 0x9C, 0xC5, 0xFC, 0x34, 0x1F, 0xD7, 0xC4, 0x77, 0xD0, 0xD7, 0x49, 0x32, 0x85, 0xFA, 0xCA, 0x08, 0xA4 },
 | |
| +	{ 0x59, 0x16, 0x83, 0x3E, 0xBB, 0x05, 0xCD, 0x91, 0x9C, 0xA7, 0xFE, 0x83, 0xB6, 0x92, 0xD3, 0x20, 0x5B, 0xEF, 0x72, 0x39, 0x2B, 0x2C, 0xF6, 0xBB, 0x0A, 0x6D, 0x43, 0xF9, 0x94, 0xF9, 0x5F, 0x11 },
 | |
| +	{ 0xF6, 0x3A, 0xAB, 0x3E, 0xC6, 0x41, 0xB3, 0xB0, 0x24, 0x96, 0x4C, 0x2B, 0x43, 0x7C, 0x04, 0xF6, 0x04, 0x3C, 0x4C, 0x7E, 0x02, 0x79, 0x23, 0x99, 0x95, 0x40, 0x19, 0x58, 0xF8, 0x6B, 0xBE, 0x54 },
 | |
| +	{ 0xF1, 0x72, 0xB1, 0x80, 0xBF, 0xB0, 0x97, 0x40, 0x49, 0x31, 0x20, 0xB6, 0x32, 0x6C, 0xBD, 0xC5, 0x61, 0xE4, 0x77, 0xDE, 0xF9, 0xBB, 0xCF, 0xD2, 0x8C, 0xC8, 0xC1, 0xC5, 0xE3, 0x37, 0x9A, 0x31 },
 | |
| +	{ 0xCB, 0x9B, 0x89, 0xCC, 0x18, 0x38, 0x1D, 0xD9, 0x14, 0x1A, 0xDE, 0x58, 0x86, 0x54, 0xD4, 0xE6, 0xA2, 0x31, 0xD5, 0xBF, 0x49, 0xD4, 0xD5, 0x9A, 0xC2, 0x7D, 0x86, 0x9C, 0xBE, 0x10, 0x0C, 0xF3 },
 | |
| +	{ 0x7B, 0xD8, 0x81, 0x50, 0x46, 0xFD, 0xD8, 0x10, 0xA9, 0x23, 0xE1, 0x98, 0x4A, 0xAE, 0xBD, 0xCD, 0xF8, 0x4D, 0x87, 0xC8, 0x99, 0x2D, 0x68, 0xB5, 0xEE, 0xB4, 0x60, 0xF9, 0x3E, 0xB3, 0xC8, 0xD7 },
 | |
| +	{ 0x60, 0x7B, 0xE6, 0x68, 0x62, 0xFD, 0x08, 0xEE, 0x5B, 0x19, 0xFA, 0xCA, 0xC0, 0x9D, 0xFD, 0xBC, 0xD4, 0x0C, 0x31, 0x21, 0x01, 0xD6, 0x6E, 0x6E, 0xBD, 0x2B, 0x84, 0x1F, 0x1B, 0x9A, 0x93, 0x25 },
 | |
| +	{ 0x9F, 0xE0, 0x3B, 0xBE, 0x69, 0xAB, 0x18, 0x34, 0xF5, 0x21, 0x9B, 0x0D, 0xA8, 0x8A, 0x08, 0xB3, 0x0A, 0x66, 0xC5, 0x91, 0x3F, 0x01, 0x51, 0x96, 0x3C, 0x36, 0x05, 0x60, 0xDB, 0x03, 0x87, 0xB3 },
 | |
| +	{ 0x90, 0xA8, 0x35, 0x85, 0x71, 0x7B, 0x75, 0xF0, 0xE9, 0xB7, 0x25, 0xE0, 0x55, 0xEE, 0xEE, 0xB9, 0xE7, 0xA0, 0x28, 0xEA, 0x7E, 0x6C, 0xBC, 0x07, 0xB2, 0x09, 0x17, 0xEC, 0x03, 0x63, 0xE3, 0x8C },
 | |
| +	{ 0x33, 0x6E, 0xA0, 0x53, 0x0F, 0x4A, 0x74, 0x69, 0x12, 0x6E, 0x02, 0x18, 0x58, 0x7E, 0xBB, 0xDE, 0x33, 0x58, 0xA0, 0xB3, 0x1C, 0x29, 0xD2, 0x00, 0xF7, 0xDC, 0x7E, 0xB1, 0x5C, 0x6A, 0xAD, 0xD8 },
 | |
| +	{ 0xA7, 0x9E, 0x76, 0xDC, 0x0A, 0xBC, 0xA4, 0x39, 0x6F, 0x07, 0x47, 0xCD, 0x7B, 0x74, 0x8D, 0xF9, 0x13, 0x00, 0x76, 0x26, 0xB1, 0xD6, 0x59, 0xDA, 0x0C, 0x1F, 0x78, 0xB9, 0x30, 0x3D, 0x01, 0xA3 },
 | |
| +	{ 0x44, 0xE7, 0x8A, 0x77, 0x37, 0x56, 0xE0, 0x95, 0x15, 0x19, 0x50, 0x4D, 0x70, 0x38, 0xD2, 0x8D, 0x02, 0x13, 0xA3, 0x7E, 0x0C, 0xE3, 0x75, 0x37, 0x17, 0x57, 0xBC, 0x99, 0x63, 0x11, 0xE3, 0xB8 },
 | |
| +	{ 0x77, 0xAC, 0x01, 0x2A, 0x3F, 0x75, 0x4D, 0xCF, 0xEA, 0xB5, 0xEB, 0x99, 0x6B, 0xE9, 0xCD, 0x2D, 0x1F, 0x96, 0x11, 0x1B, 0x6E, 0x49, 0xF3, 0x99, 0x4D, 0xF1, 0x81, 0xF2, 0x85, 0x69, 0xD8, 0x25 },
 | |
| +	{ 0xCE, 0x5A, 0x10, 0xDB, 0x6F, 0xCC, 0xDA, 0xF1, 0x40, 0xAA, 0xA4, 0xDE, 0xD6, 0x25, 0x0A, 0x9C, 0x06, 0xE9, 0x22, 0x2B, 0xC9, 0xF9, 0xF3, 0x65, 0x8A, 0x4A, 0xFF, 0x93, 0x5F, 0x2B, 0x9F, 0x3A },
 | |
| +	{ 0xEC, 0xC2, 0x03, 0xA7, 0xFE, 0x2B, 0xE4, 0xAB, 0xD5, 0x5B, 0xB5, 0x3E, 0x6E, 0x67, 0x35, 0x72, 0xE0, 0x07, 0x8D, 0xA8, 0xCD, 0x37, 0x5E, 0xF4, 0x30, 0xCC, 0x97, 0xF9, 0xF8, 0x00, 0x83, 0xAF },
 | |
| +	{ 0x14, 0xA5, 0x18, 0x6D, 0xE9, 0xD7, 0xA1, 0x8B, 0x04, 0x12, 0xB8, 0x56, 0x3E, 0x51, 0xCC, 0x54, 0x33, 0x84, 0x0B, 0x4A, 0x12, 0x9A, 0x8F, 0xF9, 0x63, 0xB3, 0x3A, 0x3C, 0x4A, 0xFE, 0x8E, 0xBB },
 | |
| +	{ 0x13, 0xF8, 0xEF, 0x95, 0xCB, 0x86, 0xE6, 0xA6, 0x38, 0x93, 0x1C, 0x8E, 0x10, 0x76, 0x73, 0xEB, 0x76, 0xBA, 0x10, 0xD7, 0xC2, 0xCD, 0x70, 0xB9, 0xD9, 0x92, 0x0B, 0xBE, 0xED, 0x92, 0x94, 0x09 },
 | |
| +	{ 0x0B, 0x33, 0x8F, 0x4E, 0xE1, 0x2F, 0x2D, 0xFC, 0xB7, 0x87, 0x13, 0x37, 0x79, 0x41, 0xE0, 0xB0, 0x63, 0x21, 0x52, 0x58, 0x1D, 0x13, 0x32, 0x51, 0x6E, 0x4A, 0x2C, 0xAB, 0x19, 0x42, 0xCC, 0xA4 },
 | |
| +	{ 0xEA, 0xAB, 0x0E, 0xC3, 0x7B, 0x3B, 0x8A, 0xB7, 0x96, 0xE9, 0xF5, 0x72, 0x38, 0xDE, 0x14, 0xA2, 0x64, 0xA0, 0x76, 0xF3, 0x88, 0x7D, 0x86, 0xE2, 0x9B, 0xB5, 0x90, 0x6D, 0xB5, 0xA0, 0x0E, 0x02 },
 | |
| +	{ 0x23, 0xCB, 0x68, 0xB8, 0xC0, 0xE6, 0xDC, 0x26, 0xDC, 0x27, 0x76, 0x6D, 0xDC, 0x0A, 0x13, 0xA9, 0x94, 0x38, 0xFD, 0x55, 0x61, 0x7A, 0xA4, 0x09, 0x5D, 0x8F, 0x96, 0x97, 0x20, 0xC8, 0x72, 0xDF },
 | |
| +	{ 0x09, 0x1D, 0x8E, 0xE3, 0x0D, 0x6F, 0x29, 0x68, 0xD4, 0x6B, 0x68, 0x7D, 0xD6, 0x52, 0x92, 0x66, 0x57, 0x42, 0xDE, 0x0B, 0xB8, 0x3D, 0xCC, 0x00, 0x04, 0xC7, 0x2C, 0xE1, 0x00, 0x07, 0xA5, 0x49 },
 | |
| +	{ 0x7F, 0x50, 0x7A, 0xBC, 0x6D, 0x19, 0xBA, 0x00, 0xC0, 0x65, 0xA8, 0x76, 0xEC, 0x56, 0x57, 0x86, 0x88, 0x82, 0xD1, 0x8A, 0x22, 0x1B, 0xC4, 0x6C, 0x7A, 0x69, 0x12, 0x54, 0x1F, 0x5B, 0xC7, 0xBA },
 | |
| +	{ 0xA0, 0x60, 0x7C, 0x24, 0xE1, 0x4E, 0x8C, 0x22, 0x3D, 0xB0, 0xD7, 0x0B, 0x4D, 0x30, 0xEE, 0x88, 0x01, 0x4D, 0x60, 0x3F, 0x43, 0x7E, 0x9E, 0x02, 0xAA, 0x7D, 0xAF, 0xA3, 0xCD, 0xFB, 0xAD, 0x94 },
 | |
| +	{ 0xDD, 0xBF, 0xEA, 0x75, 0xCC, 0x46, 0x78, 0x82, 0xEB, 0x34, 0x83, 0xCE, 0x5E, 0x2E, 0x75, 0x6A, 0x4F, 0x47, 0x01, 0xB7, 0x6B, 0x44, 0x55, 0x19, 0xE8, 0x9F, 0x22, 0xD6, 0x0F, 0xA8, 0x6E, 0x06 },
 | |
| +	{ 0x0C, 0x31, 0x1F, 0x38, 0xC3, 0x5A, 0x4F, 0xB9, 0x0D, 0x65, 0x1C, 0x28, 0x9D, 0x48, 0x68, 0x56, 0xCD, 0x14, 0x13, 0xDF, 0x9B, 0x06, 0x77, 0xF5, 0x3E, 0xCE, 0x2C, 0xD9, 0xE4, 0x77, 0xC6, 0x0A },
 | |
| +	{ 0x46, 0xA7, 0x3A, 0x8D, 0xD3, 0xE7, 0x0F, 0x59, 0xD3, 0x94, 0x2C, 0x01, 0xDF, 0x59, 0x9D, 0xEF, 0x78, 0x3C, 0x9D, 0xA8, 0x2F, 0xD8, 0x32, 0x22, 0xCD, 0x66, 0x2B, 0x53, 0xDC, 0xE7, 0xDB, 0xDF },
 | |
| +	{ 0xAD, 0x03, 0x8F, 0xF9, 0xB1, 0x4D, 0xE8, 0x4A, 0x80, 0x1E, 0x4E, 0x62, 0x1C, 0xE5, 0xDF, 0x02, 0x9D, 0xD9, 0x35, 0x20, 0xD0, 0xC2, 0xFA, 0x38, 0xBF, 0xF1, 0x76, 0xA8, 0xB1, 0xD1, 0x69, 0x8C },
 | |
| +	{ 0xAB, 0x70, 0xC5, 0xDF, 0xBD, 0x1E, 0xA8, 0x17, 0xFE, 0xD0, 0xCD, 0x06, 0x72, 0x93, 0xAB, 0xF3, 0x19, 0xE5, 0xD7, 0x90, 0x1C, 0x21, 0x41, 0xD5, 0xD9, 0x9B, 0x23, 0xF0, 0x3A, 0x38, 0xE7, 0x48 },
 | |
| +	{ 0x1F, 0xFF, 0xDA, 0x67, 0x93, 0x2B, 0x73, 0xC8, 0xEC, 0xAF, 0x00, 0x9A, 0x34, 0x91, 0xA0, 0x26, 0x95, 0x3B, 0xAB, 0xFE, 0x1F, 0x66, 0x3B, 0x06, 0x97, 0xC3, 0xC4, 0xAE, 0x8B, 0x2E, 0x7D, 0xCB },
 | |
| +	{ 0xB0, 0xD2, 0xCC, 0x19, 0x47, 0x2D, 0xD5, 0x7F, 0x2B, 0x17, 0xEF, 0xC0, 0x3C, 0x8D, 0x58, 0xC2, 0x28, 0x3D, 0xBB, 0x19, 0xDA, 0x57, 0x2F, 0x77, 0x55, 0x85, 0x5A, 0xA9, 0x79, 0x43, 0x17, 0xA0 },
 | |
| +	{ 0xA0, 0xD1, 0x9A, 0x6E, 0xE3, 0x39, 0x79, 0xC3, 0x25, 0x51, 0x0E, 0x27, 0x66, 0x22, 0xDF, 0x41, 0xF7, 0x15, 0x83, 0xD0, 0x75, 0x01, 0xB8, 0x70, 0x71, 0x12, 0x9A, 0x0A, 0xD9, 0x47, 0x32, 0xA5 },
 | |
| +	{ 0x72, 0x46, 0x42, 0xA7, 0x03, 0x2D, 0x10, 0x62, 0xB8, 0x9E, 0x52, 0xBE, 0xA3, 0x4B, 0x75, 0xDF, 0x7D, 0x8F, 0xE7, 0x72, 0xD9, 0xFE, 0x3C, 0x93, 0xDD, 0xF3, 0xC4, 0x54, 0x5A, 0xB5, 0xA9, 0x9B },
 | |
| +	{ 0xAD, 0xE5, 0xEA, 0xA7, 0xE6, 0x1F, 0x67, 0x2D, 0x58, 0x7E, 0xA0, 0x3D, 0xAE, 0x7D, 0x7B, 0x55, 0x22, 0x9C, 0x01, 0xD0, 0x6B, 0xC0, 0xA5, 0x70, 0x14, 0x36, 0xCB, 0xD1, 0x83, 0x66, 0xA6, 0x26 },
 | |
| +	{ 0x01, 0x3B, 0x31, 0xEB, 0xD2, 0x28, 0xFC, 0xDD, 0xA5, 0x1F, 0xAB, 0xB0, 0x3B, 0xB0, 0x2D, 0x60, 0xAC, 0x20, 0xCA, 0x21, 0x5A, 0xAF, 0xA8, 0x3B, 0xDD, 0x85, 0x5E, 0x37, 0x55, 0xA3, 0x5F, 0x0B },
 | |
| +	{ 0x33, 0x2E, 0xD4, 0x0B, 0xB1, 0x0D, 0xDE, 0x3C, 0x95, 0x4A, 0x75, 0xD7, 0xB8, 0x99, 0x9D, 0x4B, 0x26, 0xA1, 0xC0, 0x63, 0xC1, 0xDC, 0x6E, 0x32, 0xC1, 0xD9, 0x1B, 0xAB, 0x7B, 0xBB, 0x7D, 0x16 },
 | |
| +	{ 0xC7, 0xA1, 0x97, 0xB3, 0xA0, 0x5B, 0x56, 0x6B, 0xCC, 0x9F, 0xAC, 0xD2, 0x0E, 0x44, 0x1D, 0x6F, 0x6C, 0x28, 0x60, 0xAC, 0x96, 0x51, 0xCD, 0x51, 0xD6, 0xB9, 0xD2, 0xCD, 0xEE, 0xEA, 0x03, 0x90 },
 | |
| +	{ 0xBD, 0x9C, 0xF6, 0x4E, 0xA8, 0x95, 0x3C, 0x03, 0x71, 0x08, 0xE6, 0xF6, 0x54, 0x91, 0x4F, 0x39, 0x58, 0xB6, 0x8E, 0x29, 0xC1, 0x67, 0x00, 0xDC, 0x18, 0x4D, 0x94, 0xA2, 0x17, 0x08, 0xFF, 0x60 },
 | |
| +	{ 0x88, 0x35, 0xB0, 0xAC, 0x02, 0x11, 0x51, 0xDF, 0x71, 0x64, 0x74, 0xCE, 0x27, 0xCE, 0x4D, 0x3C, 0x15, 0xF0, 0xB2, 0xDA, 0xB4, 0x80, 0x03, 0xCF, 0x3F, 0x3E, 0xFD, 0x09, 0x45, 0x10, 0x6B, 0x9A },
 | |
| +	{ 0x3B, 0xFE, 0xFA, 0x33, 0x01, 0xAA, 0x55, 0xC0, 0x80, 0x19, 0x0C, 0xFF, 0xDA, 0x8E, 0xAE, 0x51, 0xD9, 0xAF, 0x48, 0x8B, 0x4C, 0x1F, 0x24, 0xC3, 0xD9, 0xA7, 0x52, 0x42, 0xFD, 0x8E, 0xA0, 0x1D },
 | |
| +	{ 0x08, 0x28, 0x4D, 0x14, 0x99, 0x3C, 0xD4, 0x7D, 0x53, 0xEB, 0xAE, 0xCF, 0x0D, 0xF0, 0x47, 0x8C, 0xC1, 0x82, 0xC8, 0x9C, 0x00, 0xE1, 0x85, 0x9C, 0x84, 0x85, 0x16, 0x86, 0xDD, 0xF2, 0xC1, 0xB7 },
 | |
| +	{ 0x1E, 0xD7, 0xEF, 0x9F, 0x04, 0xC2, 0xAC, 0x8D, 0xB6, 0xA8, 0x64, 0xDB, 0x13, 0x10, 0x87, 0xF2, 0x70, 0x65, 0x09, 0x8E, 0x69, 0xC3, 0xFE, 0x78, 0x71, 0x8D, 0x9B, 0x94, 0x7F, 0x4A, 0x39, 0xD0 },
 | |
| +	{ 0xC1, 0x61, 0xF2, 0xDC, 0xD5, 0x7E, 0x9C, 0x14, 0x39, 0xB3, 0x1A, 0x9D, 0xD4, 0x3D, 0x8F, 0x3D, 0x7D, 0xD8, 0xF0, 0xEB, 0x7C, 0xFA, 0xC6, 0xFB, 0x25, 0xA0, 0xF2, 0x8E, 0x30, 0x6F, 0x06, 0x61 },
 | |
| +	{ 0xC0, 0x19, 0x69, 0xAD, 0x34, 0xC5, 0x2C, 0xAF, 0x3D, 0xC4, 0xD8, 0x0D, 0x19, 0x73, 0x5C, 0x29, 0x73, 0x1A, 0xC6, 0xE7, 0xA9, 0x20, 0x85, 0xAB, 0x92, 0x50, 0xC4, 0x8D, 0xEA, 0x48, 0xA3, 0xFC },
 | |
| +	{ 0x17, 0x20, 0xB3, 0x65, 0x56, 0x19, 0xD2, 0xA5, 0x2B, 0x35, 0x21, 0xAE, 0x0E, 0x49, 0xE3, 0x45, 0xCB, 0x33, 0x89, 0xEB, 0xD6, 0x20, 0x8A, 0xCA, 0xF9, 0xF1, 0x3F, 0xDA, 0xCC, 0xA8, 0xBE, 0x49 },
 | |
| +	{ 0x75, 0x62, 0x88, 0x36, 0x1C, 0x83, 0xE2, 0x4C, 0x61, 0x7C, 0xF9, 0x5C, 0x90, 0x5B, 0x22, 0xD0, 0x17, 0xCD, 0xC8, 0x6F, 0x0B, 0xF1, 0xD6, 0x58, 0xF4, 0x75, 0x6C, 0x73, 0x79, 0x87, 0x3B, 0x7F },
 | |
| +	{ 0xE7, 0xD0, 0xED, 0xA3, 0x45, 0x26, 0x93, 0xB7, 0x52, 0xAB, 0xCD, 0xA1, 0xB5, 0x5E, 0x27, 0x6F, 0x82, 0x69, 0x8F, 0x5F, 0x16, 0x05, 0x40, 0x3E, 0xFF, 0x83, 0x0B, 0xEA, 0x00, 0x71, 0xA3, 0x94 },
 | |
| +	{ 0x2C, 0x82, 0xEC, 0xAA, 0x6B, 0x84, 0x80, 0x3E, 0x04, 0x4A, 0xF6, 0x31, 0x18, 0xAF, 0xE5, 0x44, 0x68, 0x7C, 0xB6, 0xE6, 0xC7, 0xDF, 0x49, 0xED, 0x76, 0x2D, 0xFD, 0x7C, 0x86, 0x93, 0xA1, 0xBC },
 | |
| +	{ 0x61, 0x36, 0xCB, 0xF4, 0xB4, 0x41, 0x05, 0x6F, 0xA1, 0xE2, 0x72, 0x24, 0x98, 0x12, 0x5D, 0x6D, 0xED, 0x45, 0xE1, 0x7B, 0x52, 0x14, 0x39, 0x59, 0xC7, 0xF4, 0xD4, 0xE3, 0x95, 0x21, 0x8A, 0xC2 },
 | |
| +	{ 0x72, 0x1D, 0x32, 0x45, 0xAA, 0xFE, 0xF2, 0x7F, 0x6A, 0x62, 0x4F, 0x47, 0x95, 0x4B, 0x6C, 0x25, 0x50, 0x79, 0x52, 0x6F, 0xFA, 0x25, 0xE9, 0xFF, 0x77, 0xE5, 0xDC, 0xFF, 0x47, 0x3B, 0x15, 0x97 },
 | |
| +	{ 0x9D, 0xD2, 0xFB, 0xD8, 0xCE, 0xF1, 0x6C, 0x35, 0x3C, 0x0A, 0xC2, 0x11, 0x91, 0xD5, 0x09, 0xEB, 0x28, 0xDD, 0x9E, 0x3E, 0x0D, 0x8C, 0xEA, 0x5D, 0x26, 0xCA, 0x83, 0x93, 0x93, 0x85, 0x1C, 0x3A },
 | |
| +	{ 0xB2, 0x39, 0x4C, 0xEA, 0xCD, 0xEB, 0xF2, 0x1B, 0xF9, 0xDF, 0x2C, 0xED, 0x98, 0xE5, 0x8F, 0x1C, 0x3A, 0x4B, 0xBB, 0xFF, 0x66, 0x0D, 0xD9, 0x00, 0xF6, 0x22, 0x02, 0xD6, 0x78, 0x5C, 0xC4, 0x6E },
 | |
| +	{ 0x57, 0x08, 0x9F, 0x22, 0x27, 0x49, 0xAD, 0x78, 0x71, 0x76, 0x5F, 0x06, 0x2B, 0x11, 0x4F, 0x43, 0xBA, 0x20, 0xEC, 0x56, 0x42, 0x2A, 0x8B, 0x1E, 0x3F, 0x87, 0x19, 0x2C, 0x0E, 0xA7, 0x18, 0xC6 },
 | |
| +	{ 0xE4, 0x9A, 0x94, 0x59, 0x96, 0x1C, 0xD3, 0x3C, 0xDF, 0x4A, 0xAE, 0x1B, 0x10, 0x78, 0xA5, 0xDE, 0xA7, 0xC0, 0x40, 0xE0, 0xFE, 0xA3, 0x40, 0xC9, 0x3A, 0x72, 0x48, 0x72, 0xFC, 0x4A, 0xF8, 0x06 },
 | |
| +	{ 0xED, 0xE6, 0x7F, 0x72, 0x0E, 0xFF, 0xD2, 0xCA, 0x9C, 0x88, 0x99, 0x41, 0x52, 0xD0, 0x20, 0x1D, 0xEE, 0x6B, 0x0A, 0x2D, 0x2C, 0x07, 0x7A, 0xCA, 0x6D, 0xAE, 0x29, 0xF7, 0x3F, 0x8B, 0x63, 0x09 },
 | |
| +	{ 0xE0, 0xF4, 0x34, 0xBF, 0x22, 0xE3, 0x08, 0x80, 0x39, 0xC2, 0x1F, 0x71, 0x9F, 0xFC, 0x67, 0xF0, 0xF2, 0xCB, 0x5E, 0x98, 0xA7, 0xA0, 0x19, 0x4C, 0x76, 0xE9, 0x6B, 0xF4, 0xE8, 0xE1, 0x7E, 0x61 },
 | |
| +	{ 0x27, 0x7C, 0x04, 0xE2, 0x85, 0x34, 0x84, 0xA4, 0xEB, 0xA9, 0x10, 0xAD, 0x33, 0x6D, 0x01, 0xB4, 0x77, 0xB6, 0x7C, 0xC2, 0x00, 0xC5, 0x9F, 0x3C, 0x8D, 0x77, 0xEE, 0xF8, 0x49, 0x4F, 0x29, 0xCD },
 | |
| +	{ 0x15, 0x6D, 0x57, 0x47, 0xD0, 0xC9, 0x9C, 0x7F, 0x27, 0x09, 0x7D, 0x7B, 0x7E, 0x00, 0x2B, 0x2E, 0x18, 0x5C, 0xB7, 0x2D, 0x8D, 0xD7, 0xEB, 0x42, 0x4A, 0x03, 0x21, 0x52, 0x81, 0x61, 0x21, 0x9F },
 | |
| +	{ 0x20, 0xDD, 0xD1, 0xED, 0x9B, 0x1C, 0xA8, 0x03, 0x94, 0x6D, 0x64, 0xA8, 0x3A, 0xE4, 0x65, 0x9D, 0xA6, 0x7F, 0xBA, 0x7A, 0x1A, 0x3E, 0xDD, 0xB1, 0xE1, 0x03, 0xC0, 0xF5, 0xE0, 0x3E, 0x3A, 0x2C },
 | |
| +	{ 0xF0, 0xAF, 0x60, 0x4D, 0x3D, 0xAB, 0xBF, 0x9A, 0x0F, 0x2A, 0x7D, 0x3D, 0xDA, 0x6B, 0xD3, 0x8B, 0xBA, 0x72, 0xC6, 0xD0, 0x9B, 0xE4, 0x94, 0xFC, 0xEF, 0x71, 0x3F, 0xF1, 0x01, 0x89, 0xB6, 0xE6 },
 | |
| +	{ 0x98, 0x02, 0xBB, 0x87, 0xDE, 0xF4, 0xCC, 0x10, 0xC4, 0xA5, 0xFD, 0x49, 0xAA, 0x58, 0xDF, 0xE2, 0xF3, 0xFD, 0xDB, 0x46, 0xB4, 0x70, 0x88, 0x14, 0xEA, 0xD8, 0x1D, 0x23, 0xBA, 0x95, 0x13, 0x9B },
 | |
| +	{ 0x4F, 0x8C, 0xE1, 0xE5, 0x1D, 0x2F, 0xE7, 0xF2, 0x40, 0x43, 0xA9, 0x04, 0xD8, 0x98, 0xEB, 0xFC, 0x91, 0x97, 0x54, 0x18, 0x75, 0x34, 0x13, 0xAA, 0x09, 0x9B, 0x79, 0x5E, 0xCB, 0x35, 0xCE, 0xDB },
 | |
| +	{ 0xBD, 0xDC, 0x65, 0x14, 0xD7, 0xEE, 0x6A, 0xCE, 0x0A, 0x4A, 0xC1, 0xD0, 0xE0, 0x68, 0x11, 0x22, 0x88, 0xCB, 0xCF, 0x56, 0x04, 0x54, 0x64, 0x27, 0x05, 0x63, 0x01, 0x77, 0xCB, 0xA6, 0x08, 0xBD },
 | |
| +	{ 0xD6, 0x35, 0x99, 0x4F, 0x62, 0x91, 0x51, 0x7B, 0x02, 0x81, 0xFF, 0xDD, 0x49, 0x6A, 0xFA, 0x86, 0x27, 0x12, 0xE5, 0xB3, 0xC4, 0xE5, 0x2E, 0x4C, 0xD5, 0xFD, 0xAE, 0x8C, 0x0E, 0x72, 0xFB, 0x08 },
 | |
| +	{ 0x87, 0x8D, 0x9C, 0xA6, 0x00, 0xCF, 0x87, 0xE7, 0x69, 0xCC, 0x30, 0x5C, 0x1B, 0x35, 0x25, 0x51, 0x86, 0x61, 0x5A, 0x73, 0xA0, 0xDA, 0x61, 0x3B, 0x5F, 0x1C, 0x98, 0xDB, 0xF8, 0x12, 0x83, 0xEA },
 | |
| +	{ 0xA6, 0x4E, 0xBE, 0x5D, 0xC1, 0x85, 0xDE, 0x9F, 0xDD, 0xE7, 0x60, 0x7B, 0x69, 0x98, 0x70, 0x2E, 0xB2, 0x34, 0x56, 0x18, 0x49, 0x57, 0x30, 0x7D, 0x2F, 0xA7, 0x2E, 0x87, 0xA4, 0x77, 0x02, 0xD6 },
 | |
| +	{ 0xCE, 0x50, 0xEA, 0xB7, 0xB5, 0xEB, 0x52, 0xBD, 0xC9, 0xAD, 0x8E, 0x5A, 0x48, 0x0A, 0xB7, 0x80, 0xCA, 0x93, 0x20, 0xE4, 0x43, 0x60, 0xB1, 0xFE, 0x37, 0xE0, 0x3F, 0x2F, 0x7A, 0xD7, 0xDE, 0x01 },
 | |
| +	{ 0xEE, 0xDD, 0xB7, 0xC0, 0xDB, 0x6E, 0x30, 0xAB, 0xE6, 0x6D, 0x79, 0xE3, 0x27, 0x51, 0x1E, 0x61, 0xFC, 0xEB, 0xBC, 0x29, 0xF1, 0x59, 0xB4, 0x0A, 0x86, 0xB0, 0x46, 0xEC, 0xF0, 0x51, 0x38, 0x23 },
 | |
| +	{ 0x78, 0x7F, 0xC9, 0x34, 0x40, 0xC1, 0xEC, 0x96, 0xB5, 0xAD, 0x01, 0xC1, 0x6C, 0xF7, 0x79, 0x16, 0xA1, 0x40, 0x5F, 0x94, 0x26, 0x35, 0x6E, 0xC9, 0x21, 0xD8, 0xDF, 0xF3, 0xEA, 0x63, 0xB7, 0xE0 },
 | |
| +	{ 0x7F, 0x0D, 0x5E, 0xAB, 0x47, 0xEE, 0xFD, 0xA6, 0x96, 0xC0, 0xBF, 0x0F, 0xBF, 0x86, 0xAB, 0x21, 0x6F, 0xCE, 0x46, 0x1E, 0x93, 0x03, 0xAB, 0xA6, 0xAC, 0x37, 0x41, 0x20, 0xE8, 0x90, 0xE8, 0xDF },
 | |
| +	{ 0xB6, 0x80, 0x04, 0xB4, 0x2F, 0x14, 0xAD, 0x02, 0x9F, 0x4C, 0x2E, 0x03, 0xB1, 0xD5, 0xEB, 0x76, 0xD5, 0x71, 0x60, 0xE2, 0x64, 0x76, 0xD2, 0x11, 0x31, 0xBE, 0xF2, 0x0A, 0xDA, 0x7D, 0x27, 0xF4 },
 | |
| +	{ 0xB0, 0xC4, 0xEB, 0x18, 0xAE, 0x25, 0x0B, 0x51, 0xA4, 0x13, 0x82, 0xEA, 0xD9, 0x2D, 0x0D, 0xC7, 0x45, 0x5F, 0x93, 0x79, 0xFC, 0x98, 0x84, 0x42, 0x8E, 0x47, 0x70, 0x60, 0x8D, 0xB0, 0xFA, 0xEC },
 | |
| +	{ 0xF9, 0x2B, 0x7A, 0x87, 0x0C, 0x05, 0x9F, 0x4D, 0x46, 0x46, 0x4C, 0x82, 0x4E, 0xC9, 0x63, 0x55, 0x14, 0x0B, 0xDC, 0xE6, 0x81, 0x32, 0x2C, 0xC3, 0xA9, 0x92, 0xFF, 0x10, 0x3E, 0x3F, 0xEA, 0x52 },
 | |
| +	{ 0x53, 0x64, 0x31, 0x26, 0x14, 0x81, 0x33, 0x98, 0xCC, 0x52, 0x5D, 0x4C, 0x4E, 0x14, 0x6E, 0xDE, 0xB3, 0x71, 0x26, 0x5F, 0xBA, 0x19, 0x13, 0x3A, 0x2C, 0x3D, 0x21, 0x59, 0x29, 0x8A, 0x17, 0x42 },
 | |
| +	{ 0xF6, 0x62, 0x0E, 0x68, 0xD3, 0x7F, 0xB2, 0xAF, 0x50, 0x00, 0xFC, 0x28, 0xE2, 0x3B, 0x83, 0x22, 0x97, 0xEC, 0xD8, 0xBC, 0xE9, 0x9E, 0x8B, 0xE4, 0xD0, 0x4E, 0x85, 0x30, 0x9E, 0x3D, 0x33, 0x74 },
 | |
| +	{ 0x53, 0x16, 0xA2, 0x79, 0x69, 0xD7, 0xFE, 0x04, 0xFF, 0x27, 0xB2, 0x83, 0x96, 0x1B, 0xFF, 0xC3, 0xBF, 0x5D, 0xFB, 0x32, 0xFB, 0x6A, 0x89, 0xD1, 0x01, 0xC6, 0xC3, 0xB1, 0x93, 0x7C, 0x28, 0x71 },
 | |
| +	{ 0x81, 0xD1, 0x66, 0x4F, 0xDF, 0x3C, 0xB3, 0x3C, 0x24, 0xEE, 0xBA, 0xC0, 0xBD, 0x64, 0x24, 0x4B, 0x77, 0xC4, 0xAB, 0xEA, 0x90, 0xBB, 0xE8, 0xB5, 0xEE, 0x0B, 0x2A, 0xAF, 0xCF, 0x2D, 0x6A, 0x53 },
 | |
| +	{ 0x34, 0x57, 0x82, 0xF2, 0x95, 0xB0, 0x88, 0x03, 0x52, 0xE9, 0x24, 0xA0, 0x46, 0x7B, 0x5F, 0xBC, 0x3E, 0x8F, 0x3B, 0xFB, 0xC3, 0xC7, 0xE4, 0x8B, 0x67, 0x09, 0x1F, 0xB5, 0xE8, 0x0A, 0x94, 0x42 },
 | |
| +	{ 0x79, 0x41, 0x11, 0xEA, 0x6C, 0xD6, 0x5E, 0x31, 0x1F, 0x74, 0xEE, 0x41, 0xD4, 0x76, 0xCB, 0x63, 0x2C, 0xE1, 0xE4, 0xB0, 0x51, 0xDC, 0x1D, 0x9E, 0x9D, 0x06, 0x1A, 0x19, 0xE1, 0xD0, 0xBB, 0x49 },
 | |
| +	{ 0x2A, 0x85, 0xDA, 0xF6, 0x13, 0x88, 0x16, 0xB9, 0x9B, 0xF8, 0xD0, 0x8B, 0xA2, 0x11, 0x4B, 0x7A, 0xB0, 0x79, 0x75, 0xA7, 0x84, 0x20, 0xC1, 0xA3, 0xB0, 0x6A, 0x77, 0x7C, 0x22, 0xDD, 0x8B, 0xCB },
 | |
| +	{ 0x89, 0xB0, 0xD5, 0xF2, 0x89, 0xEC, 0x16, 0x40, 0x1A, 0x06, 0x9A, 0x96, 0x0D, 0x0B, 0x09, 0x3E, 0x62, 0x5D, 0xA3, 0xCF, 0x41, 0xEE, 0x29, 0xB5, 0x9B, 0x93, 0x0C, 0x58, 0x20, 0x14, 0x54, 0x55 },
 | |
| +	{ 0xD0, 0xFD, 0xCB, 0x54, 0x39, 0x43, 0xFC, 0x27, 0xD2, 0x08, 0x64, 0xF5, 0x21, 0x81, 0x47, 0x1B, 0x94, 0x2C, 0xC7, 0x7C, 0xA6, 0x75, 0xBC, 0xB3, 0x0D, 0xF3, 0x1D, 0x35, 0x8E, 0xF7, 0xB1, 0xEB },
 | |
| +	{ 0xB1, 0x7E, 0xA8, 0xD7, 0x70, 0x63, 0xC7, 0x09, 0xD4, 0xDC, 0x6B, 0x87, 0x94, 0x13, 0xC3, 0x43, 0xE3, 0x79, 0x0E, 0x9E, 0x62, 0xCA, 0x85, 0xB7, 0x90, 0x0B, 0x08, 0x6F, 0x6B, 0x75, 0xC6, 0x72 },
 | |
| +	{ 0xE7, 0x1A, 0x3E, 0x2C, 0x27, 0x4D, 0xB8, 0x42, 0xD9, 0x21, 0x14, 0xF2, 0x17, 0xE2, 0xC0, 0xEA, 0xC8, 0xB4, 0x50, 0x93, 0xFD, 0xFD, 0x9D, 0xF4, 0xCA, 0x71, 0x62, 0x39, 0x48, 0x62, 0xD5, 0x01 },
 | |
| +	{ 0xC0, 0x47, 0x67, 0x59, 0xAB, 0x7A, 0xA3, 0x33, 0x23, 0x4F, 0x6B, 0x44, 0xF5, 0xFD, 0x85, 0x83, 0x90, 0xEC, 0x23, 0x69, 0x4C, 0x62, 0x2C, 0xB9, 0x86, 0xE7, 0x69, 0xC7, 0x8E, 0xDD, 0x73, 0x3E },
 | |
| +	{ 0x9A, 0xB8, 0xEA, 0xBB, 0x14, 0x16, 0x43, 0x4D, 0x85, 0x39, 0x13, 0x41, 0xD5, 0x69, 0x93, 0xC5, 0x54, 0x58, 0x16, 0x7D, 0x44, 0x18, 0xB1, 0x9A, 0x0F, 0x2A, 0xD8, 0xB7, 0x9A, 0x83, 0xA7, 0x5B },
 | |
| +	{ 0x79, 0x92, 0xD0, 0xBB, 0xB1, 0x5E, 0x23, 0x82, 0x6F, 0x44, 0x3E, 0x00, 0x50, 0x5D, 0x68, 0xD3, 0xED, 0x73, 0x72, 0x99, 0x5A, 0x5C, 0x3E, 0x49, 0x86, 0x54, 0x10, 0x2F, 0xBC, 0xD0, 0x96, 0x4E },
 | |
| +	{ 0xC0, 0x21, 0xB3, 0x00, 0x85, 0x15, 0x14, 0x35, 0xDF, 0x33, 0xB0, 0x07, 0xCC, 0xEC, 0xC6, 0x9D, 0xF1, 0x26, 0x9F, 0x39, 0xBA, 0x25, 0x09, 0x2B, 0xED, 0x59, 0xD9, 0x32, 0xAC, 0x0F, 0xDC, 0x28 },
 | |
| +	{ 0x91, 0xA2, 0x5E, 0xC0, 0xEC, 0x0D, 0x9A, 0x56, 0x7F, 0x89, 0xC4, 0xBF, 0xE1, 0xA6, 0x5A, 0x0E, 0x43, 0x2D, 0x07, 0x06, 0x4B, 0x41, 0x90, 0xE2, 0x7D, 0xFB, 0x81, 0x90, 0x1F, 0xD3, 0x13, 0x9B },
 | |
| +	{ 0x59, 0x50, 0xD3, 0x9A, 0x23, 0xE1, 0x54, 0x5F, 0x30, 0x12, 0x70, 0xAA, 0x1A, 0x12, 0xF2, 0xE6, 0xC4, 0x53, 0x77, 0x6E, 0x4D, 0x63, 0x55, 0xDE, 0x42, 0x5C, 0xC1, 0x53, 0xF9, 0x81, 0x88, 0x67 },
 | |
| +	{ 0xD7, 0x9F, 0x14, 0x72, 0x0C, 0x61, 0x0A, 0xF1, 0x79, 0xA3, 0x76, 0x5D, 0x4B, 0x7C, 0x09, 0x68, 0xF9, 0x77, 0x96, 0x2D, 0xBF, 0x65, 0x5B, 0x52, 0x12, 0x72, 0xB6, 0xF1, 0xE1, 0x94, 0x48, 0x8E },
 | |
| +	{ 0xE9, 0x53, 0x1B, 0xFC, 0x8B, 0x02, 0x99, 0x5A, 0xEA, 0xA7, 0x5B, 0xA2, 0x70, 0x31, 0xFA, 0xDB, 0xCB, 0xF4, 0xA0, 0xDA, 0xB8, 0x96, 0x1D, 0x92, 0x96, 0xCD, 0x7E, 0x84, 0xD2, 0x5D, 0x60, 0x06 },
 | |
| +	{ 0x34, 0xE9, 0xC2, 0x6A, 0x01, 0xD7, 0xF1, 0x61, 0x81, 0xB4, 0x54, 0xA9, 0xD1, 0x62, 0x3C, 0x23, 0x3C, 0xB9, 0x9D, 0x31, 0xC6, 0x94, 0x65, 0x6E, 0x94, 0x13, 0xAC, 0xA3, 0xE9, 0x18, 0x69, 0x2F },
 | |
| +	{ 0xD9, 0xD7, 0x42, 0x2F, 0x43, 0x7B, 0xD4, 0x39, 0xDD, 0xD4, 0xD8, 0x83, 0xDA, 0xE2, 0xA0, 0x83, 0x50, 0x17, 0x34, 0x14, 0xBE, 0x78, 0x15, 0x51, 0x33, 0xFF, 0xF1, 0x96, 0x4C, 0x3D, 0x79, 0x72 },
 | |
| +	{ 0x4A, 0xEE, 0x0C, 0x7A, 0xAF, 0x07, 0x54, 0x14, 0xFF, 0x17, 0x93, 0xEA, 0xD7, 0xEA, 0xCA, 0x60, 0x17, 0x75, 0xC6, 0x15, 0xDB, 0xD6, 0x0B, 0x64, 0x0B, 0x0A, 0x9F, 0x0C, 0xE5, 0x05, 0xD4, 0x35 },
 | |
| +	{ 0x6B, 0xFD, 0xD1, 0x54, 0x59, 0xC8, 0x3B, 0x99, 0xF0, 0x96, 0xBF, 0xB4, 0x9E, 0xE8, 0x7B, 0x06, 0x3D, 0x69, 0xC1, 0x97, 0x4C, 0x69, 0x28, 0xAC, 0xFC, 0xFB, 0x40, 0x99, 0xF8, 0xC4, 0xEF, 0x67 },
 | |
| +	{ 0x9F, 0xD1, 0xC4, 0x08, 0xFD, 0x75, 0xC3, 0x36, 0x19, 0x3A, 0x2A, 0x14, 0xD9, 0x4F, 0x6A, 0xF5, 0xAD, 0xF0, 0x50, 0xB8, 0x03, 0x87, 0xB4, 0xB0, 0x10, 0xFB, 0x29, 0xF4, 0xCC, 0x72, 0x70, 0x7C },
 | |
| +	{ 0x13, 0xC8, 0x84, 0x80, 0xA5, 0xD0, 0x0D, 0x6C, 0x8C, 0x7A, 0xD2, 0x11, 0x0D, 0x76, 0xA8, 0x2D, 0x9B, 0x70, 0xF4, 0xFA, 0x66, 0x96, 0xD4, 0xE5, 0xDD, 0x42, 0xA0, 0x66, 0xDC, 0xAF, 0x99, 0x20 },
 | |
| +	{ 0x82, 0x0E, 0x72, 0x5E, 0xE2, 0x5F, 0xE8, 0xFD, 0x3A, 0x8D, 0x5A, 0xBE, 0x4C, 0x46, 0xC3, 0xBA, 0x88, 0x9D, 0xE6, 0xFA, 0x91, 0x91, 0xAA, 0x22, 0xBA, 0x67, 0xD5, 0x70, 0x54, 0x21, 0x54, 0x2B },
 | |
| +	{ 0x32, 0xD9, 0x3A, 0x0E, 0xB0, 0x2F, 0x42, 0xFB, 0xBC, 0xAF, 0x2B, 0xAD, 0x00, 0x85, 0xB2, 0x82, 0xE4, 0x60, 0x46, 0xA4, 0xDF, 0x7A, 0xD1, 0x06, 0x57, 0xC9, 0xD6, 0x47, 0x63, 0x75, 0xB9, 0x3E },
 | |
| +	{ 0xAD, 0xC5, 0x18, 0x79, 0x05, 0xB1, 0x66, 0x9C, 0xD8, 0xEC, 0x9C, 0x72, 0x1E, 0x19, 0x53, 0x78, 0x6B, 0x9D, 0x89, 0xA9, 0xBA, 0xE3, 0x07, 0x80, 0xF1, 0xE1, 0xEA, 0xB2, 0x4A, 0x00, 0x52, 0x3C },
 | |
| +	{ 0xE9, 0x07, 0x56, 0xFF, 0x7F, 0x9A, 0xD8, 0x10, 0xB2, 0x39, 0xA1, 0x0C, 0xED, 0x2C, 0xF9, 0xB2, 0x28, 0x43, 0x54, 0xC1, 0xF8, 0xC7, 0xE0, 0xAC, 0xCC, 0x24, 0x61, 0xDC, 0x79, 0x6D, 0x6E, 0x89 },
 | |
| +	{ 0x12, 0x51, 0xF7, 0x6E, 0x56, 0x97, 0x84, 0x81, 0x87, 0x53, 0x59, 0x80, 0x1D, 0xB5, 0x89, 0xA0, 0xB2, 0x2F, 0x86, 0xD8, 0xD6, 0x34, 0xDC, 0x04, 0x50, 0x6F, 0x32, 0x2E, 0xD7, 0x8F, 0x17, 0xE8 },
 | |
| +	{ 0x3A, 0xFA, 0x89, 0x9F, 0xD9, 0x80, 0xE7, 0x3E, 0xCB, 0x7F, 0x4D, 0x8B, 0x8F, 0x29, 0x1D, 0xC9, 0xAF, 0x79, 0x6B, 0xC6, 0x5D, 0x27, 0xF9, 0x74, 0xC6, 0xF1, 0x93, 0xC9, 0x19, 0x1A, 0x09, 0xFD },
 | |
| +	{ 0xAA, 0x30, 0x5B, 0xE2, 0x6E, 0x5D, 0xED, 0xDC, 0x3C, 0x10, 0x10, 0xCB, 0xC2, 0x13, 0xF9, 0x5F, 0x05, 0x1C, 0x78, 0x5C, 0x5B, 0x43, 0x1E, 0x6A, 0x7C, 0xD0, 0x48, 0xF1, 0x61, 0x78, 0x75, 0x28 },
 | |
| +	{ 0x8E, 0xA1, 0x88, 0x4F, 0xF3, 0x2E, 0x9D, 0x10, 0xF0, 0x39, 0xB4, 0x07, 0xD0, 0xD4, 0x4E, 0x7E, 0x67, 0x0A, 0xBD, 0x88, 0x4A, 0xEE, 0xE0, 0xFB, 0x75, 0x7A, 0xE9, 0x4E, 0xAA, 0x97, 0x37, 0x3D },
 | |
| +	{ 0xD4, 0x82, 0xB2, 0x15, 0x5D, 0x4D, 0xEC, 0x6B, 0x47, 0x36, 0xA1, 0xF1, 0x61, 0x7B, 0x53, 0xAA, 0xA3, 0x73, 0x10, 0x27, 0x7D, 0x3F, 0xEF, 0x0C, 0x37, 0xAD, 0x41, 0x76, 0x8F, 0xC2, 0x35, 0xB4 },
 | |
| +	{ 0x4D, 0x41, 0x39, 0x71, 0x38, 0x7E, 0x7A, 0x88, 0x98, 0xA8, 0xDC, 0x2A, 0x27, 0x50, 0x07, 0x78, 0x53, 0x9E, 0xA2, 0x14, 0xA2, 0xDF, 0xE9, 0xB3, 0xD7, 0xE8, 0xEB, 0xDC, 0xE5, 0xCF, 0x3D, 0xB3 },
 | |
| +	{ 0x69, 0x6E, 0x5D, 0x46, 0xE6, 0xC5, 0x7E, 0x87, 0x96, 0xE4, 0x73, 0x5D, 0x08, 0x91, 0x6E, 0x0B, 0x79, 0x29, 0xB3, 0xCF, 0x29, 0x8C, 0x29, 0x6D, 0x22, 0xE9, 0xD3, 0x01, 0x96, 0x53, 0x37, 0x1C },
 | |
| +	{ 0x1F, 0x56, 0x47, 0xC1, 0xD3, 0xB0, 0x88, 0x22, 0x88, 0x85, 0x86, 0x5C, 0x89, 0x40, 0x90, 0x8B, 0xF4, 0x0D, 0x1A, 0x82, 0x72, 0x82, 0x19, 0x73, 0xB1, 0x60, 0x00, 0x8E, 0x7A, 0x3C, 0xE2, 0xEB },
 | |
| +	{ 0xB6, 0xE7, 0x6C, 0x33, 0x0F, 0x02, 0x1A, 0x5B, 0xDA, 0x65, 0x87, 0x50, 0x10, 0xB0, 0xED, 0xF0, 0x91, 0x26, 0xC0, 0xF5, 0x10, 0xEA, 0x84, 0x90, 0x48, 0x19, 0x20, 0x03, 0xAE, 0xF4, 0xC6, 0x1C },
 | |
| +	{ 0x3C, 0xD9, 0x52, 0xA0, 0xBE, 0xAD, 0xA4, 0x1A, 0xBB, 0x42, 0x4C, 0xE4, 0x7F, 0x94, 0xB4, 0x2B, 0xE6, 0x4E, 0x1F, 0xFB, 0x0F, 0xD0, 0x78, 0x22, 0x76, 0x80, 0x79, 0x46, 0xD0, 0xD0, 0xBC, 0x55 },
 | |
| +	{ 0x98, 0xD9, 0x26, 0x77, 0x43, 0x9B, 0x41, 0xB7, 0xBB, 0x51, 0x33, 0x12, 0xAF, 0xB9, 0x2B, 0xCC, 0x8E, 0xE9, 0x68, 0xB2, 0xE3, 0xB2, 0x38, 0xCE, 0xCB, 0x9B, 0x0F, 0x34, 0xC9, 0xBB, 0x63, 0xD0 },
 | |
| +	{ 0xEC, 0xBC, 0xA2, 0xCF, 0x08, 0xAE, 0x57, 0xD5, 0x17, 0xAD, 0x16, 0x15, 0x8A, 0x32, 0xBF, 0xA7, 0xDC, 0x03, 0x82, 0xEA, 0xED, 0xA1, 0x28, 0xE9, 0x18, 0x86, 0x73, 0x4C, 0x24, 0xA0, 0xB2, 0x9D },
 | |
| +	{ 0x94, 0x2C, 0xC7, 0xC0, 0xB5, 0x2E, 0x2B, 0x16, 0xA4, 0xB8, 0x9F, 0xA4, 0xFC, 0x7E, 0x0B, 0xF6, 0x09, 0xE2, 0x9A, 0x08, 0xC1, 0xA8, 0x54, 0x34, 0x52, 0xB7, 0x7C, 0x7B, 0xFD, 0x11, 0xBB, 0x28 },
 | |
| +	{ 0x8A, 0x06, 0x5D, 0x8B, 0x61, 0xA0, 0xDF, 0xFB, 0x17, 0x0D, 0x56, 0x27, 0x73, 0x5A, 0x76, 0xB0, 0xE9, 0x50, 0x60, 0x37, 0x80, 0x8C, 0xBA, 0x16, 0xC3, 0x45, 0x00, 0x7C, 0x9F, 0x79, 0xCF, 0x8F },
 | |
| +	{ 0x1B, 0x9F, 0xA1, 0x97, 0x14, 0x65, 0x9C, 0x78, 0xFF, 0x41, 0x38, 0x71, 0x84, 0x92, 0x15, 0x36, 0x10, 0x29, 0xAC, 0x80, 0x2B, 0x1C, 0xBC, 0xD5, 0x4E, 0x40, 0x8B, 0xD8, 0x72, 0x87, 0xF8, 0x1F },
 | |
| +	{ 0x8D, 0xAB, 0x07, 0x1B, 0xCD, 0x6C, 0x72, 0x92, 0xA9, 0xEF, 0x72, 0x7B, 0x4A, 0xE0, 0xD8, 0x67, 0x13, 0x30, 0x1D, 0xA8, 0x61, 0x8D, 0x9A, 0x48, 0xAD, 0xCE, 0x55, 0xF3, 0x03, 0xA8, 0x69, 0xA1 },
 | |
| +	{ 0x82, 0x53, 0xE3, 0xE7, 0xC7, 0xB6, 0x84, 0xB9, 0xCB, 0x2B, 0xEB, 0x01, 0x4C, 0xE3, 0x30, 0xFF, 0x3D, 0x99, 0xD1, 0x7A, 0xBB, 0xDB, 0xAB, 0xE4, 0xF4, 0xD6, 0x74, 0xDE, 0xD5, 0x3F, 0xFC, 0x6B },
 | |
| +	{ 0xF1, 0x95, 0xF3, 0x21, 0xE9, 0xE3, 0xD6, 0xBD, 0x7D, 0x07, 0x45, 0x04, 0xDD, 0x2A, 0xB0, 0xE6, 0x24, 0x1F, 0x92, 0xE7, 0x84, 0xB1, 0xAA, 0x27, 0x1F, 0xF6, 0x48, 0xB1, 0xCA, 0xB6, 0xD7, 0xF6 },
 | |
| +	{ 0x27, 0xE4, 0xCC, 0x72, 0x09, 0x0F, 0x24, 0x12, 0x66, 0x47, 0x6A, 0x7C, 0x09, 0x49, 0x5F, 0x2D, 0xB1, 0x53, 0xD5, 0xBC, 0xBD, 0x76, 0x19, 0x03, 0xEF, 0x79, 0x27, 0x5E, 0xC5, 0x6B, 0x2E, 0xD8 },
 | |
| +	{ 0x89, 0x9C, 0x24, 0x05, 0x78, 0x8E, 0x25, 0xB9, 0x9A, 0x18, 0x46, 0x35, 0x5E, 0x64, 0x6D, 0x77, 0xCF, 0x40, 0x00, 0x83, 0x41, 0x5F, 0x7D, 0xC5, 0xAF, 0xE6, 0x9D, 0x6E, 0x17, 0xC0, 0x00, 0x23 },
 | |
| +	{ 0xA5, 0x9B, 0x78, 0xC4, 0x90, 0x57, 0x44, 0x07, 0x6B, 0xFE, 0xE8, 0x94, 0xDE, 0x70, 0x7D, 0x4F, 0x12, 0x0B, 0x5C, 0x68, 0x93, 0xEA, 0x04, 0x00, 0x29, 0x7D, 0x0B, 0xB8, 0x34, 0x72, 0x76, 0x32 },
 | |
| +	{ 0x59, 0xDC, 0x78, 0xB1, 0x05, 0x64, 0x97, 0x07, 0xA2, 0xBB, 0x44, 0x19, 0xC4, 0x8F, 0x00, 0x54, 0x00, 0xD3, 0x97, 0x3D, 0xE3, 0x73, 0x66, 0x10, 0x23, 0x04, 0x35, 0xB1, 0x04, 0x24, 0xB2, 0x4F },
 | |
| +	{ 0xC0, 0x14, 0x9D, 0x1D, 0x7E, 0x7A, 0x63, 0x53, 0xA6, 0xD9, 0x06, 0xEF, 0xE7, 0x28, 0xF2, 0xF3, 0x29, 0xFE, 0x14, 0xA4, 0x14, 0x9A, 0x3E, 0xA7, 0x76, 0x09, 0xBC, 0x42, 0xB9, 0x75, 0xDD, 0xFA },
 | |
| +	{ 0xA3, 0x2F, 0x24, 0x14, 0x74, 0xA6, 0xC1, 0x69, 0x32, 0xE9, 0x24, 0x3B, 0xE0, 0xCF, 0x09, 0xBC, 0xDC, 0x7E, 0x0C, 0xA0, 0xE7, 0xA6, 0xA1, 0xB9, 0xB1, 0xA0, 0xF0, 0x1E, 0x41, 0x50, 0x23, 0x77 },
 | |
| +	{ 0xB2, 0x39, 0xB2, 0xE4, 0xF8, 0x18, 0x41, 0x36, 0x1C, 0x13, 0x39, 0xF6, 0x8E, 0x2C, 0x35, 0x9F, 0x92, 0x9A, 0xF9, 0xAD, 0x9F, 0x34, 0xE0, 0x1A, 0xAB, 0x46, 0x31, 0xAD, 0x6D, 0x55, 0x00, 0xB0 },
 | |
| +	{ 0x85, 0xFB, 0x41, 0x9C, 0x70, 0x02, 0xA3, 0xE0, 0xB4, 0xB6, 0xEA, 0x09, 0x3B, 0x4C, 0x1A, 0xC6, 0x93, 0x66, 0x45, 0xB6, 0x5D, 0xAC, 0x5A, 0xC1, 0x5A, 0x85, 0x28, 0xB7, 0xB9, 0x4C, 0x17, 0x54 },
 | |
| +	{ 0x96, 0x19, 0x72, 0x06, 0x25, 0xF1, 0x90, 0xB9, 0x3A, 0x3F, 0xAD, 0x18, 0x6A, 0xB3, 0x14, 0x18, 0x96, 0x33, 0xC0, 0xD3, 0xA0, 0x1E, 0x6F, 0x9B, 0xC8, 0xC4, 0xA8, 0xF8, 0x2F, 0x38, 0x3D, 0xBF },
 | |
| +	{ 0x7D, 0x62, 0x0D, 0x90, 0xFE, 0x69, 0xFA, 0x46, 0x9A, 0x65, 0x38, 0x38, 0x89, 0x70, 0xA1, 0xAA, 0x09, 0xBB, 0x48, 0xA2, 0xD5, 0x9B, 0x34, 0x7B, 0x97, 0xE8, 0xCE, 0x71, 0xF4, 0x8C, 0x7F, 0x46 },
 | |
| +	{ 0x29, 0x43, 0x83, 0x56, 0x85, 0x96, 0xFB, 0x37, 0xC7, 0x5B, 0xBA, 0xCD, 0x97, 0x9C, 0x5F, 0xF6, 0xF2, 0x0A, 0x55, 0x6B, 0xF8, 0x87, 0x9C, 0xC7, 0x29, 0x24, 0x85, 0x5D, 0xF9, 0xB8, 0x24, 0x0E },
 | |
| +	{ 0x16, 0xB1, 0x8A, 0xB3, 0x14, 0x35, 0x9C, 0x2B, 0x83, 0x3C, 0x1C, 0x69, 0x86, 0xD4, 0x8C, 0x55, 0xA9, 0xFC, 0x97, 0xCD, 0xE9, 0xA3, 0xC1, 0xF1, 0x0A, 0x31, 0x77, 0x14, 0x0F, 0x73, 0xF7, 0x38 },
 | |
| +	{ 0x8C, 0xBB, 0xDD, 0x14, 0xBC, 0x33, 0xF0, 0x4C, 0xF4, 0x58, 0x13, 0xE4, 0xA1, 0x53, 0xA2, 0x73, 0xD3, 0x6A, 0xDA, 0xD5, 0xCE, 0x71, 0xF4, 0x99, 0xEE, 0xB8, 0x7F, 0xB8, 0xAC, 0x63, 0xB7, 0x29 },
 | |
| +	{ 0x69, 0xC9, 0xA4, 0x98, 0xDB, 0x17, 0x4E, 0xCA, 0xEF, 0xCC, 0x5A, 0x3A, 0xC9, 0xFD, 0xED, 0xF0, 0xF8, 0x13, 0xA5, 0xBE, 0xC7, 0x27, 0xF1, 0xE7, 0x75, 0xBA, 0xBD, 0xEC, 0x77, 0x18, 0x81, 0x6E },
 | |
| +	{ 0xB4, 0x62, 0xC3, 0xBE, 0x40, 0x44, 0x8F, 0x1D, 0x4F, 0x80, 0x62, 0x62, 0x54, 0xE5, 0x35, 0xB0, 0x8B, 0xC9, 0xCD, 0xCF, 0xF5, 0x99, 0xA7, 0x68, 0x57, 0x8D, 0x4B, 0x28, 0x81, 0xA8, 0xE3, 0xF0 },
 | |
| +	{ 0x55, 0x3E, 0x9D, 0x9C, 0x5F, 0x36, 0x0A, 0xC0, 0xB7, 0x4A, 0x7D, 0x44, 0xE5, 0xA3, 0x91, 0xDA, 0xD4, 0xCE, 0xD0, 0x3E, 0x0C, 0x24, 0x18, 0x3B, 0x7E, 0x8E, 0xCA, 0xBD, 0xF1, 0x71, 0x5A, 0x64 },
 | |
| +	{ 0x7A, 0x7C, 0x55, 0xA5, 0x6F, 0xA9, 0xAE, 0x51, 0xE6, 0x55, 0xE0, 0x19, 0x75, 0xD8, 0xA6, 0xFF, 0x4A, 0xE9, 0xE4, 0xB4, 0x86, 0xFC, 0xBE, 0x4E, 0xAC, 0x04, 0x45, 0x88, 0xF2, 0x45, 0xEB, 0xEA },
 | |
| +	{ 0x2A, 0xFD, 0xF3, 0xC8, 0x2A, 0xBC, 0x48, 0x67, 0xF5, 0xDE, 0x11, 0x12, 0x86, 0xC2, 0xB3, 0xBE, 0x7D, 0x6E, 0x48, 0x65, 0x7B, 0xA9, 0x23, 0xCF, 0xBF, 0x10, 0x1A, 0x6D, 0xFC, 0xF9, 0xDB, 0x9A },
 | |
| +	{ 0x41, 0x03, 0x7D, 0x2E, 0xDC, 0xDC, 0xE0, 0xC4, 0x9B, 0x7F, 0xB4, 0xA6, 0xAA, 0x09, 0x99, 0xCA, 0x66, 0x97, 0x6C, 0x74, 0x83, 0xAF, 0xE6, 0x31, 0xD4, 0xED, 0xA2, 0x83, 0x14, 0x4F, 0x6D, 0xFC },
 | |
| +	{ 0xC4, 0x46, 0x6F, 0x84, 0x97, 0xCA, 0x2E, 0xEB, 0x45, 0x83, 0xA0, 0xB0, 0x8E, 0x9D, 0x9A, 0xC7, 0x43, 0x95, 0x70, 0x9F, 0xDA, 0x10, 0x9D, 0x24, 0xF2, 0xE4, 0x46, 0x21, 0x96, 0x77, 0x9C, 0x5D },
 | |
| +	{ 0x75, 0xF6, 0x09, 0x33, 0x8A, 0xA6, 0x7D, 0x96, 0x9A, 0x2A, 0xE2, 0xA2, 0x36, 0x2B, 0x2D, 0xA9, 0xD7, 0x7C, 0x69, 0x5D, 0xFD, 0x1D, 0xF7, 0x22, 0x4A, 0x69, 0x01, 0xDB, 0x93, 0x2C, 0x33, 0x64 },
 | |
| +	{ 0x68, 0x60, 0x6C, 0xEB, 0x98, 0x9D, 0x54, 0x88, 0xFC, 0x7C, 0xF6, 0x49, 0xF3, 0xD7, 0xC2, 0x72, 0xEF, 0x05, 0x5D, 0xA1, 0xA9, 0x3F, 0xAE, 0xCD, 0x55, 0xFE, 0x06, 0xF6, 0x96, 0x70, 0x98, 0xCA },
 | |
| +	{ 0x44, 0x34, 0x6B, 0xDE, 0xB7, 0xE0, 0x52, 0xF6, 0x25, 0x50, 0x48, 0xF0, 0xD9, 0xB4, 0x2C, 0x42, 0x5B, 0xAB, 0x9C, 0x3D, 0xD2, 0x41, 0x68, 0x21, 0x2C, 0x3E, 0xCF, 0x1E, 0xBF, 0x34, 0xE6, 0xAE },
 | |
| +	{ 0x8E, 0x9C, 0xF6, 0xE1, 0xF3, 0x66, 0x47, 0x1F, 0x2A, 0xC7, 0xD2, 0xEE, 0x9B, 0x5E, 0x62, 0x66, 0xFD, 0xA7, 0x1F, 0x8F, 0x2E, 0x41, 0x09, 0xF2, 0x23, 0x7E, 0xD5, 0xF8, 0x81, 0x3F, 0xC7, 0x18 },
 | |
| +	{ 0x84, 0xBB, 0xEB, 0x84, 0x06, 0xD2, 0x50, 0x95, 0x1F, 0x8C, 0x1B, 0x3E, 0x86, 0xA7, 0xC0, 0x10, 0x08, 0x29, 0x21, 0x83, 0x3D, 0xFD, 0x95, 0x55, 0xA2, 0xF9, 0x09, 0xB1, 0x08, 0x6E, 0xB4, 0xB8 },
 | |
| +	{ 0xEE, 0x66, 0x6F, 0x3E, 0xEF, 0x0F, 0x7E, 0x2A, 0x9C, 0x22, 0x29, 0x58, 0xC9, 0x7E, 0xAF, 0x35, 0xF5, 0x1C, 0xED, 0x39, 0x3D, 0x71, 0x44, 0x85, 0xAB, 0x09, 0xA0, 0x69, 0x34, 0x0F, 0xDF, 0x88 },
 | |
| +	{ 0xC1, 0x53, 0xD3, 0x4A, 0x65, 0xC4, 0x7B, 0x4A, 0x62, 0xC5, 0xCA, 0xCF, 0x24, 0x01, 0x09, 0x75, 0xD0, 0x35, 0x6B, 0x2F, 0x32, 0xC8, 0xF5, 0xDA, 0x53, 0x0D, 0x33, 0x88, 0x16, 0xAD, 0x5D, 0xE6 },
 | |
| +	{ 0x9F, 0xC5, 0x45, 0x01, 0x09, 0xE1, 0xB7, 0x79, 0xF6, 0xC7, 0xAE, 0x79, 0xD5, 0x6C, 0x27, 0x63, 0x5C, 0x8D, 0xD4, 0x26, 0xC5, 0xA9, 0xD5, 0x4E, 0x25, 0x78, 0xDB, 0x98, 0x9B, 0x8C, 0x3B, 0x4E },
 | |
| +	{ 0xD1, 0x2B, 0xF3, 0x73, 0x2E, 0xF4, 0xAF, 0x5C, 0x22, 0xFA, 0x90, 0x35, 0x6A, 0xF8, 0xFC, 0x50, 0xFC, 0xB4, 0x0F, 0x8F, 0x2E, 0xA5, 0xC8, 0x59, 0x47, 0x37, 0xA3, 0xB3, 0xD5, 0xAB, 0xDB, 0xD7 },
 | |
| +	{ 0x11, 0x03, 0x0B, 0x92, 0x89, 0xBB, 0xA5, 0xAF, 0x65, 0x26, 0x06, 0x72, 0xAB, 0x6F, 0xEE, 0x88, 0xB8, 0x74, 0x20, 0xAC, 0xEF, 0x4A, 0x17, 0x89, 0xA2, 0x07, 0x3B, 0x7E, 0xC2, 0xF2, 0xA0, 0x9E },
 | |
| +	{ 0x69, 0xCB, 0x19, 0x2B, 0x84, 0x44, 0x00, 0x5C, 0x8C, 0x0C, 0xEB, 0x12, 0xC8, 0x46, 0x86, 0x07, 0x68, 0x18, 0x8C, 0xDA, 0x0A, 0xEC, 0x27, 0xA9, 0xC8, 0xA5, 0x5C, 0xDE, 0xE2, 0x12, 0x36, 0x32 },
 | |
| +	{ 0xDB, 0x44, 0x4C, 0x15, 0x59, 0x7B, 0x5F, 0x1A, 0x03, 0xD1, 0xF9, 0xED, 0xD1, 0x6E, 0x4A, 0x9F, 0x43, 0xA6, 0x67, 0xCC, 0x27, 0x51, 0x75, 0xDF, 0xA2, 0xB7, 0x04, 0xE3, 0xBB, 0x1A, 0x9B, 0x83 },
 | |
| +	{ 0x3F, 0xB7, 0x35, 0x06, 0x1A, 0xBC, 0x51, 0x9D, 0xFE, 0x97, 0x9E, 0x54, 0xC1, 0xEE, 0x5B, 0xFA, 0xD0, 0xA9, 0xD8, 0x58, 0xB3, 0x31, 0x5B, 0xAD, 0x34, 0xBD, 0xE9, 0x99, 0xEF, 0xD7, 0x24, 0xDD }
 | |
| +};
 | |
| +
 | |
| +bool __init blake2s_selftest(void)
 | |
| +{
 | |
| +	u8 key[BLAKE2S_KEYBYTES];
 | |
| +	u8 buf[ARRAY_SIZE(blake2s_testvecs)];
 | |
| +	u8 hash[BLAKE2S_OUTBYTES];
 | |
| +	size_t i;
 | |
| +	bool success = true;
 | |
| +
 | |
| +	for (i = 0; i < BLAKE2S_KEYBYTES; ++i)
 | |
| +		key[i] = (u8)i;
 | |
| +
 | |
| +	for (i = 0; i < ARRAY_SIZE(blake2s_testvecs); ++i)
 | |
| +		buf[i] = (u8)i;
 | |
| +
 | |
| +	for (i = 0; i < ARRAY_SIZE(blake2s_keyed_testvecs); ++i) {
 | |
| +		blake2s(hash, buf, key, BLAKE2S_OUTBYTES, i, BLAKE2S_KEYBYTES);
 | |
| +		if (memcmp(hash, blake2s_keyed_testvecs[i], BLAKE2S_OUTBYTES)) {
 | |
| +			pr_info("blake2s keyed self-test %zu: FAIL\n", i + 1);
 | |
| +			success = false;
 | |
| +		}
 | |
| +	}
 | |
| +
 | |
| +	for (i = 0; i < ARRAY_SIZE(blake2s_testvecs); ++i) {
 | |
| +		blake2s(hash, buf, NULL, BLAKE2S_OUTBYTES, i, 0);
 | |
| +		if (memcmp(hash, blake2s_testvecs[i], BLAKE2S_OUTBYTES)) {
 | |
| +			pr_info("blake2s unkeyed self-test %zu: FAIL\n", i + i);
 | |
| +			success = false;
 | |
| +		}
 | |
| +	}
 | |
| +
 | |
| +	if (success)
 | |
| +		pr_info("blake2s self-tests: pass\n");
 | |
| +	return success;
 | |
| +}
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/selftest/chacha20poly1305.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,89 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#ifdef DEBUG
 | |
| +/* ChaCha20-Poly1305 AEAD test vectors from RFC7539 2.8.2 */
 | |
| +struct chacha20poly1305_testvec {
 | |
| +	u8 *key, *nonce, *assoc, *input, *result;
 | |
| +	size_t alen, ilen;
 | |
| +};
 | |
| +static const struct chacha20poly1305_testvec chacha20poly1305_enc_vectors[] __initconst = { {
 | |
| +	.key	= "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a\xf3\x33\x88\x86\x04\xf6\xb5\xf0\x47\x39\x17\xc1\x40\x2b\x80\x09\x9d\xca\x5c\xbc\x20\x70\x75\xc0",
 | |
| +	.nonce	= "\x01\x02\x03\x04\x05\x06\x07\x08",
 | |
| +	.assoc	= "\xf3\x33\x88\x86\x00\x00\x00\x00\x00\x00\x4e\x91",
 | |
| +	.alen	= 12,
 | |
| +	.input	= "\x49\x6e\x74\x65\x72\x6e\x65\x74\x2d\x44\x72\x61\x66\x74\x73\x20\x61\x72\x65\x20\x64\x72\x61\x66\x74\x20\x64\x6f\x63\x75\x6d\x65\x6e\x74\x73\x20\x76\x61\x6c\x69\x64\x20\x66\x6f\x72\x20\x61\x20\x6d\x61\x78\x69\x6d\x75\x6d\x20\x6f\x66\x20\x73\x69\x78\x20\x6d\x6f\x6e\x74\x68\x73\x20\x61\x6e\x64\x20\x6d\x61\x79\x20\x62\x65\x20\x75\x70\x64\x61\x74\x65\x64\x2c\x20\x72\x65\x70\x6c\x61\x63\x65\x64\x2c\x20\x6f\x72\x20\x6f\x62\x73\x6f\x6c\x65\x74\x65\x64\x20\x62\x79\x20\x6f\x74\x68\x65\x72\x20\x64\x6f\x63\x75\x6d\x65\x6e\x74\x73\x20\x61\x74\x20\x61\x6e\x79\x20\x74\x69\x6d\x65\x2e\x20\x49\x74\x20\x69\x73\x20\x69\x6e\x61\x70\x70\x72\x6f\x70\x72\x69\x61\x74\x65\x20\x74\x6f\x20\x75\x73\x65\x20\x49\x6e\x74\x65\x72\x6e\x65\x74\x2d\x44\x72\x61\x66\x74\x73\x20\x61\x73\x20\x72\x65\x66\x65\x72\x65\x6e\x63\x65\x20\x6d\x61\x74\x65\x72\x69\x61\x6c\x20\x6f\x72\x20\x74\x6f\x20\x63\x69\x74\x65\x20\x74\x68\x65\x6d\x20\x6f\x74\x68\x65\x72\x20\x74\x68\x61\x6e\x20\x61\x73\x20\x2f\xe2\x80\x9c\x77\x6f\x72\x6b\x20\x69\x6e\x20\x70\x72\x6f\x67\x72\x65\x73\x73\x2e\x2f\xe2\x80\x9d",
 | |
| +	.ilen	= 265,
 | |
| +	.result	= "\x64\xa0\x86\x15\x75\x86\x1a\xf4\x60\xf0\x62\xc7\x9b\xe6\x43\xbd\x5e\x80\x5c\xfd\x34\x5c\xf3\x89\xf1\x08\x67\x0a\xc7\x6c\x8c\xb2\x4c\x6c\xfc\x18\x75\x5d\x43\xee\xa0\x9e\xe9\x4e\x38\x2d\x26\xb0\xbd\xb7\xb7\x3c\x32\x1b\x01\x00\xd4\xf0\x3b\x7f\x35\x58\x94\xcf\x33\x2f\x83\x0e\x71\x0b\x97\xce\x98\xc8\xa8\x4a\xbd\x0b\x94\x81\x14\xad\x17\x6e\x00\x8d\x33\xbd\x60\xf9\x82\xb1\xff\x37\xc8\x55\x97\x97\xa0\x6e\xf4\xf0\xef\x61\xc1\x86\x32\x4e\x2b\x35\x06\x38\x36\x06\x90\x7b\x6a\x7c\x02\xb0\xf9\xf6\x15\x7b\x53\xc8\x67\xe4\xb9\x16\x6c\x76\x7b\x80\x4d\x46\xa5\x9b\x52\x16\xcd\xe7\xa4\xe9\x90\x40\xc5\xa4\x04\x33\x22\x5e\xe2\x82\xa1\xb0\xa0\x6c\x52\x3e\xaf\x45\x34\xd7\xf8\x3f\xa1\x15\x5b\x00\x47\x71\x8c\xbc\x54\x6a\x0d\x07\x2b\x04\xb3\x56\x4e\xea\x1b\x42\x22\x73\xf5\x48\x27\x1a\x0b\xb2\x31\x60\x53\xfa\x76\x99\x19\x55\xeb\xd6\x31\x59\x43\x4e\xce\xbb\x4e\x46\x6d\xae\x5a\x10\x73\xa6\x72\x76\x27\x09\x7a\x10\x49\xe6\x17\xd9\x1d\x36\x10\x94\xfa\x68\xf0\xff\x77\x98\x71\x30\x30\x5b\xea\xba\x2e\xda\x04\xdf\x99\x7b\x71\x4d\x6c\x6f\x2c\x29\xa6\xad\x5c\xb4\x02\x2b\x02\x70\x9b\xee\xad\x9d\x67\x89\x0c\xbb\x22\x39\x23\x36\xfe\xa1\x85\x1f\x38"
 | |
| +} };
 | |
| +static const struct chacha20poly1305_testvec chacha20poly1305_dec_vectors[] __initconst = {	{
 | |
| +	.key	= "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a\xf3\x33\x88\x86\x04\xf6\xb5\xf0\x47\x39\x17\xc1\x40\x2b\x80\x09\x9d\xca\x5c\xbc\x20\x70\x75\xc0",
 | |
| +	.nonce	= "\x01\x02\x03\x04\x05\x06\x07\x08",
 | |
| +	.assoc	= "\xf3\x33\x88\x86\x00\x00\x00\x00\x00\x00\x4e\x91",
 | |
| +	.alen	= 12,
 | |
| +	.input	= "\x64\xa0\x86\x15\x75\x86\x1a\xf4\x60\xf0\x62\xc7\x9b\xe6\x43\xbd\x5e\x80\x5c\xfd\x34\x5c\xf3\x89\xf1\x08\x67\x0a\xc7\x6c\x8c\xb2\x4c\x6c\xfc\x18\x75\x5d\x43\xee\xa0\x9e\xe9\x4e\x38\x2d\x26\xb0\xbd\xb7\xb7\x3c\x32\x1b\x01\x00\xd4\xf0\x3b\x7f\x35\x58\x94\xcf\x33\x2f\x83\x0e\x71\x0b\x97\xce\x98\xc8\xa8\x4a\xbd\x0b\x94\x81\x14\xad\x17\x6e\x00\x8d\x33\xbd\x60\xf9\x82\xb1\xff\x37\xc8\x55\x97\x97\xa0\x6e\xf4\xf0\xef\x61\xc1\x86\x32\x4e\x2b\x35\x06\x38\x36\x06\x90\x7b\x6a\x7c\x02\xb0\xf9\xf6\x15\x7b\x53\xc8\x67\xe4\xb9\x16\x6c\x76\x7b\x80\x4d\x46\xa5\x9b\x52\x16\xcd\xe7\xa4\xe9\x90\x40\xc5\xa4\x04\x33\x22\x5e\xe2\x82\xa1\xb0\xa0\x6c\x52\x3e\xaf\x45\x34\xd7\xf8\x3f\xa1\x15\x5b\x00\x47\x71\x8c\xbc\x54\x6a\x0d\x07\x2b\x04\xb3\x56\x4e\xea\x1b\x42\x22\x73\xf5\x48\x27\x1a\x0b\xb2\x31\x60\x53\xfa\x76\x99\x19\x55\xeb\xd6\x31\x59\x43\x4e\xce\xbb\x4e\x46\x6d\xae\x5a\x10\x73\xa6\x72\x76\x27\x09\x7a\x10\x49\xe6\x17\xd9\x1d\x36\x10\x94\xfa\x68\xf0\xff\x77\x98\x71\x30\x30\x5b\xea\xba\x2e\xda\x04\xdf\x99\x7b\x71\x4d\x6c\x6f\x2c\x29\xa6\xad\x5c\xb4\x02\x2b\x02\x70\x9b\xee\xad\x9d\x67\x89\x0c\xbb\x22\x39\x23\x36\xfe\xa1\x85\x1f\x38",
 | |
| +	.ilen	= 281,
 | |
| +	.result	= "\x49\x6e\x74\x65\x72\x6e\x65\x74\x2d\x44\x72\x61\x66\x74\x73\x20\x61\x72\x65\x20\x64\x72\x61\x66\x74\x20\x64\x6f\x63\x75\x6d\x65\x6e\x74\x73\x20\x76\x61\x6c\x69\x64\x20\x66\x6f\x72\x20\x61\x20\x6d\x61\x78\x69\x6d\x75\x6d\x20\x6f\x66\x20\x73\x69\x78\x20\x6d\x6f\x6e\x74\x68\x73\x20\x61\x6e\x64\x20\x6d\x61\x79\x20\x62\x65\x20\x75\x70\x64\x61\x74\x65\x64\x2c\x20\x72\x65\x70\x6c\x61\x63\x65\x64\x2c\x20\x6f\x72\x20\x6f\x62\x73\x6f\x6c\x65\x74\x65\x64\x20\x62\x79\x20\x6f\x74\x68\x65\x72\x20\x64\x6f\x63\x75\x6d\x65\x6e\x74\x73\x20\x61\x74\x20\x61\x6e\x79\x20\x74\x69\x6d\x65\x2e\x20\x49\x74\x20\x69\x73\x20\x69\x6e\x61\x70\x70\x72\x6f\x70\x72\x69\x61\x74\x65\x20\x74\x6f\x20\x75\x73\x65\x20\x49\x6e\x74\x65\x72\x6e\x65\x74\x2d\x44\x72\x61\x66\x74\x73\x20\x61\x73\x20\x72\x65\x66\x65\x72\x65\x6e\x63\x65\x20\x6d\x61\x74\x65\x72\x69\x61\x6c\x20\x6f\x72\x20\x74\x6f\x20\x63\x69\x74\x65\x20\x74\x68\x65\x6d\x20\x6f\x74\x68\x65\x72\x20\x74\x68\x61\x6e\x20\x61\x73\x20\x2f\xe2\x80\x9c\x77\x6f\x72\x6b\x20\x69\x6e\x20\x70\x72\x6f\x67\x72\x65\x73\x73\x2e\x2f\xe2\x80\x9d"
 | |
| +} };
 | |
| +
 | |
| +static const struct chacha20poly1305_testvec xchacha20poly1305_enc_vectors[] __initconst = { {
 | |
| +	.key	= "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a\xf3\x33\x88\x86\x04\xf6\xb5\xf0\x47\x39\x17\xc1\x40\x2b\x80\x09\x9d\xca\x5c\xbc\x20\x70\x75\xc0",
 | |
| +	.nonce	= "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17",
 | |
| +	.assoc	= "\xf3\x33\x88\x86\x00\x00\x00\x00\x00\x00\x4e\x91",
 | |
| +	.alen	= 12,
 | |
| +	.input	= "\x49\x6e\x74\x65\x72\x6e\x65\x74\x2d\x44\x72\x61\x66\x74\x73\x20\x61\x72\x65\x20\x64\x72\x61\x66\x74\x20\x64\x6f\x63\x75\x6d\x65\x6e\x74\x73\x20\x76\x61\x6c\x69\x64\x20\x66\x6f\x72\x20\x61\x20\x6d\x61\x78\x69\x6d\x75\x6d\x20\x6f\x66\x20\x73\x69\x78\x20\x6d\x6f\x6e\x74\x68\x73\x20\x61\x6e\x64\x20\x6d\x61\x79\x20\x62\x65\x20\x75\x70\x64\x61\x74\x65\x64\x2c\x20\x72\x65\x70\x6c\x61\x63\x65\x64\x2c\x20\x6f\x72\x20\x6f\x62\x73\x6f\x6c\x65\x74\x65\x64\x20\x62\x79\x20\x6f\x74\x68\x65\x72\x20\x64\x6f\x63\x75\x6d\x65\x6e\x74\x73\x20\x61\x74\x20\x61\x6e\x79\x20\x74\x69\x6d\x65\x2e\x20\x49\x74\x20\x69\x73\x20\x69\x6e\x61\x70\x70\x72\x6f\x70\x72\x69\x61\x74\x65\x20\x74\x6f\x20\x75\x73\x65\x20\x49\x6e\x74\x65\x72\x6e\x65\x74\x2d\x44\x72\x61\x66\x74\x73\x20\x61\x73\x20\x72\x65\x66\x65\x72\x65\x6e\x63\x65\x20\x6d\x61\x74\x65\x72\x69\x61\x6c\x20\x6f\x72\x20\x74\x6f\x20\x63\x69\x74\x65\x20\x74\x68\x65\x6d\x20\x6f\x74\x68\x65\x72\x20\x74\x68\x61\x6e\x20\x61\x73\x20\x2f\xe2\x80\x9c\x77\x6f\x72\x6b\x20\x69\x6e\x20\x70\x72\x6f\x67\x72\x65\x73\x73\x2e\x2f\xe2\x80\x9d",
 | |
| +	.ilen	= 265,
 | |
| +	.result	= "\x1a\x6e\x3a\xd9\xfd\x41\x3f\x77\x54\x72\x0a\x70\x9a\xa0\x29\x92\x2e\xed\x93\xcf\x0f\x71\x88\x18\x7a\x9d\x2d\x24\xe0\xf5\xea\x3d\x55\x64\xd7\xad\x2a\x1a\x1f\x7e\x86\x6d\xb0\xce\x80\x41\x72\x86\x26\xee\x84\xd7\xef\x82\x9e\xe2\x60\x9d\x5a\xfc\xf0\xe4\x19\x85\xea\x09\xc6\xfb\xb3\xa9\x50\x09\xec\x5e\x11\x90\xa1\xc5\x4e\x49\xef\x50\xd8\x8f\xe0\x78\xd7\xfd\xb9\x3b\xc9\xf2\x91\xc8\x25\xc8\xa7\x63\x60\xce\x10\xcd\xc6\x7f\xf8\x16\xf8\xe1\x0a\xd9\xde\x79\x50\x33\xf2\x16\x0f\x17\xba\xb8\x5d\xd8\xdf\x4e\x51\xa8\x39\xd0\x85\xca\x46\x6a\x10\xa7\xa3\x88\xef\x79\xb9\xf8\x24\xf3\xe0\x71\x7b\x76\x28\x46\x3a\x3a\x1b\x91\xb6\xd4\x3e\x23\xe5\x44\x15\xbf\x60\x43\x9d\xa4\xbb\xd5\x5f\x89\xeb\xef\x8e\xfd\xdd\xb4\x0d\x46\xf0\x69\x23\x63\xae\x94\xf5\x5e\xa5\xad\x13\x1c\x41\x76\xe6\x90\xd6\x6d\xa2\x8f\x97\x4c\xa8\x0b\xcf\x8d\x43\x2b\x9c\x9b\xc5\x58\xa5\xb6\x95\x9a\xbf\x81\xc6\x54\xc9\x66\x0c\xe5\x4f\x6a\x53\xa1\xe5\x0c\xba\x31\xde\x34\x64\x73\x8a\x3b\xbd\x92\x01\xdb\x71\x69\xf3\x58\x99\xbc\xd1\xcb\x4a\x05\xe2\x58\x9c\x25\x17\xcd\xdc\x83\xb7\xff\xfb\x09\x61\xad\xbf\x13\x5b\x5e\xed\x46\x82\x6f\x22\xd8\x93\xa6\x85\x5b\x40\x39\x5c\xc5\x9c"
 | |
| +} };
 | |
| +static const struct chacha20poly1305_testvec xchacha20poly1305_dec_vectors[] __initconst = {	{
 | |
| +	.key	= "\x1c\x92\x40\xa5\xeb\x55\xd3\x8a\xf3\x33\x88\x86\x04\xf6\xb5\xf0\x47\x39\x17\xc1\x40\x2b\x80\x09\x9d\xca\x5c\xbc\x20\x70\x75\xc0",
 | |
| +	.nonce	= "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17",
 | |
| +	.assoc	= "\xf3\x33\x88\x86\x00\x00\x00\x00\x00\x00\x4e\x91",
 | |
| +	.alen	= 12,
 | |
| +	.input	= "\x1a\x6e\x3a\xd9\xfd\x41\x3f\x77\x54\x72\x0a\x70\x9a\xa0\x29\x92\x2e\xed\x93\xcf\x0f\x71\x88\x18\x7a\x9d\x2d\x24\xe0\xf5\xea\x3d\x55\x64\xd7\xad\x2a\x1a\x1f\x7e\x86\x6d\xb0\xce\x80\x41\x72\x86\x26\xee\x84\xd7\xef\x82\x9e\xe2\x60\x9d\x5a\xfc\xf0\xe4\x19\x85\xea\x09\xc6\xfb\xb3\xa9\x50\x09\xec\x5e\x11\x90\xa1\xc5\x4e\x49\xef\x50\xd8\x8f\xe0\x78\xd7\xfd\xb9\x3b\xc9\xf2\x91\xc8\x25\xc8\xa7\x63\x60\xce\x10\xcd\xc6\x7f\xf8\x16\xf8\xe1\x0a\xd9\xde\x79\x50\x33\xf2\x16\x0f\x17\xba\xb8\x5d\xd8\xdf\x4e\x51\xa8\x39\xd0\x85\xca\x46\x6a\x10\xa7\xa3\x88\xef\x79\xb9\xf8\x24\xf3\xe0\x71\x7b\x76\x28\x46\x3a\x3a\x1b\x91\xb6\xd4\x3e\x23\xe5\x44\x15\xbf\x60\x43\x9d\xa4\xbb\xd5\x5f\x89\xeb\xef\x8e\xfd\xdd\xb4\x0d\x46\xf0\x69\x23\x63\xae\x94\xf5\x5e\xa5\xad\x13\x1c\x41\x76\xe6\x90\xd6\x6d\xa2\x8f\x97\x4c\xa8\x0b\xcf\x8d\x43\x2b\x9c\x9b\xc5\x58\xa5\xb6\x95\x9a\xbf\x81\xc6\x54\xc9\x66\x0c\xe5\x4f\x6a\x53\xa1\xe5\x0c\xba\x31\xde\x34\x64\x73\x8a\x3b\xbd\x92\x01\xdb\x71\x69\xf3\x58\x99\xbc\xd1\xcb\x4a\x05\xe2\x58\x9c\x25\x17\xcd\xdc\x83\xb7\xff\xfb\x09\x61\xad\xbf\x13\x5b\x5e\xed\x46\x82\x6f\x22\xd8\x93\xa6\x85\x5b\x40\x39\x5c\xc5\x9c",
 | |
| +	.ilen	= 281,
 | |
| +	.result	= "\x49\x6e\x74\x65\x72\x6e\x65\x74\x2d\x44\x72\x61\x66\x74\x73\x20\x61\x72\x65\x20\x64\x72\x61\x66\x74\x20\x64\x6f\x63\x75\x6d\x65\x6e\x74\x73\x20\x76\x61\x6c\x69\x64\x20\x66\x6f\x72\x20\x61\x20\x6d\x61\x78\x69\x6d\x75\x6d\x20\x6f\x66\x20\x73\x69\x78\x20\x6d\x6f\x6e\x74\x68\x73\x20\x61\x6e\x64\x20\x6d\x61\x79\x20\x62\x65\x20\x75\x70\x64\x61\x74\x65\x64\x2c\x20\x72\x65\x70\x6c\x61\x63\x65\x64\x2c\x20\x6f\x72\x20\x6f\x62\x73\x6f\x6c\x65\x74\x65\x64\x20\x62\x79\x20\x6f\x74\x68\x65\x72\x20\x64\x6f\x63\x75\x6d\x65\x6e\x74\x73\x20\x61\x74\x20\x61\x6e\x79\x20\x74\x69\x6d\x65\x2e\x20\x49\x74\x20\x69\x73\x20\x69\x6e\x61\x70\x70\x72\x6f\x70\x72\x69\x61\x74\x65\x20\x74\x6f\x20\x75\x73\x65\x20\x49\x6e\x74\x65\x72\x6e\x65\x74\x2d\x44\x72\x61\x66\x74\x73\x20\x61\x73\x20\x72\x65\x66\x65\x72\x65\x6e\x63\x65\x20\x6d\x61\x74\x65\x72\x69\x61\x6c\x20\x6f\x72\x20\x74\x6f\x20\x63\x69\x74\x65\x20\x74\x68\x65\x6d\x20\x6f\x74\x68\x65\x72\x20\x74\x68\x61\x6e\x20\x61\x73\x20\x2f\xe2\x80\x9c\x77\x6f\x72\x6b\x20\x69\x6e\x20\x70\x72\x6f\x67\x72\x65\x73\x73\x2e\x2f\xe2\x80\x9d"
 | |
| +} };
 | |
| +
 | |
| +bool __init chacha20poly1305_selftest(void)
 | |
| +{
 | |
| +	size_t i;
 | |
| +	u8 computed_result[512];
 | |
| +	bool success = true, ret;
 | |
| +
 | |
| +	for (i = 0; i < ARRAY_SIZE(chacha20poly1305_enc_vectors); ++i) {
 | |
| +		memset(computed_result, 0, sizeof(computed_result));
 | |
| +		chacha20poly1305_encrypt(computed_result, chacha20poly1305_enc_vectors[i].input, chacha20poly1305_enc_vectors[i].ilen, chacha20poly1305_enc_vectors[i].assoc, chacha20poly1305_enc_vectors[i].alen, le64_to_cpu(*(__force __le64 *)chacha20poly1305_enc_vectors[i].nonce), chacha20poly1305_enc_vectors[i].key);
 | |
| +		if (memcmp(computed_result, chacha20poly1305_enc_vectors[i].result, chacha20poly1305_enc_vectors[i].ilen + POLY1305_MAC_SIZE)) {
 | |
| +			pr_info("chacha20poly1305 encryption self-test %zu: FAIL\n", i + 1);
 | |
| +			success = false;
 | |
| +		}
 | |
| +	}
 | |
| +	for (i = 0; i < ARRAY_SIZE(chacha20poly1305_dec_vectors); ++i) {
 | |
| +		memset(computed_result, 0, sizeof(computed_result));
 | |
| +		ret = chacha20poly1305_decrypt(computed_result, chacha20poly1305_dec_vectors[i].input, chacha20poly1305_dec_vectors[i].ilen, chacha20poly1305_dec_vectors[i].assoc, chacha20poly1305_dec_vectors[i].alen, le64_to_cpu(*(__force __le64 *)chacha20poly1305_dec_vectors[i].nonce), chacha20poly1305_dec_vectors[i].key);
 | |
| +		if (!ret || memcmp(computed_result, chacha20poly1305_dec_vectors[i].result, chacha20poly1305_dec_vectors[i].ilen - POLY1305_MAC_SIZE)) {
 | |
| +			pr_info("chacha20poly1305 decryption self-test %zu: FAIL\n", i + 1);
 | |
| +			success = false;
 | |
| +		}
 | |
| +	}
 | |
| +	for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_enc_vectors); ++i) {
 | |
| +		memset(computed_result, 0, sizeof(computed_result));
 | |
| +		xchacha20poly1305_encrypt(computed_result, xchacha20poly1305_enc_vectors[i].input, xchacha20poly1305_enc_vectors[i].ilen, xchacha20poly1305_enc_vectors[i].assoc, xchacha20poly1305_enc_vectors[i].alen, xchacha20poly1305_enc_vectors[i].nonce, xchacha20poly1305_enc_vectors[i].key);
 | |
| +		if (memcmp(computed_result, xchacha20poly1305_enc_vectors[i].result, xchacha20poly1305_enc_vectors[i].ilen + POLY1305_MAC_SIZE)) {
 | |
| +			pr_info("xchacha20poly1305 encryption self-test %zu: FAIL\n", i + 1);
 | |
| +			success = false;
 | |
| +		}
 | |
| +	}
 | |
| +	for (i = 0; i < ARRAY_SIZE(xchacha20poly1305_dec_vectors); ++i) {
 | |
| +		memset(computed_result, 0, sizeof(computed_result));
 | |
| +		ret = xchacha20poly1305_decrypt(computed_result, xchacha20poly1305_dec_vectors[i].input, xchacha20poly1305_dec_vectors[i].ilen, xchacha20poly1305_dec_vectors[i].assoc, xchacha20poly1305_dec_vectors[i].alen, xchacha20poly1305_dec_vectors[i].nonce, xchacha20poly1305_dec_vectors[i].key);
 | |
| +		if (!ret || memcmp(computed_result, xchacha20poly1305_dec_vectors[i].result, xchacha20poly1305_dec_vectors[i].ilen - POLY1305_MAC_SIZE)) {
 | |
| +			pr_info("xchacha20poly1305 decryption self-test %zu: FAIL\n", i + 1);
 | |
| +			success = false;
 | |
| +		}
 | |
| +	}
 | |
| +	if (success)
 | |
| +		pr_info("chacha20poly1305 self-tests: pass\n");
 | |
| +	return success;
 | |
| +}
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/selftest/counter.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,89 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#ifdef DEBUG
 | |
| +bool __init packet_counter_selftest(void)
 | |
| +{
 | |
| +	bool success = true;
 | |
| +	unsigned int test_num = 0, i;
 | |
| +	union noise_counter counter;
 | |
| +
 | |
| +#define T_INIT do { memset(&counter, 0, sizeof(union noise_counter)); spin_lock_init(&counter.receive.lock); } while (0)
 | |
| +#define T_LIM (COUNTER_WINDOW_SIZE + 1)
 | |
| +#define T(n, v) do { ++test_num; if (counter_validate(&counter, n) != v) { pr_info("nonce counter self-test %u: FAIL\n", test_num); success = false; } } while (0)
 | |
| +	T_INIT;
 | |
| +	/*  1 */ T(0, true);
 | |
| +	/*  2 */ T(1, true);
 | |
| +	/*  3 */ T(1, false);
 | |
| +	/*  4 */ T(9, true);
 | |
| +	/*  5 */ T(8, true);
 | |
| +	/*  6 */ T(7, true);
 | |
| +	/*  7 */ T(7, false);
 | |
| +	/*  8 */ T(T_LIM, true);
 | |
| +	/*  9 */ T(T_LIM - 1, true);
 | |
| +	/* 10 */ T(T_LIM - 1, false);
 | |
| +	/* 11 */ T(T_LIM - 2, true);
 | |
| +	/* 12 */ T(2, true);
 | |
| +	/* 13 */ T(2, false);
 | |
| +	/* 14 */ T(T_LIM + 16, true);
 | |
| +	/* 15 */ T(3, false);
 | |
| +	/* 16 */ T(T_LIM + 16, false);
 | |
| +	/* 17 */ T(T_LIM * 4, true);
 | |
| +	/* 18 */ T(T_LIM * 4 - (T_LIM - 1), true);
 | |
| +	/* 19 */ T(10, false);
 | |
| +	/* 20 */ T(T_LIM * 4 - T_LIM, false);
 | |
| +	/* 21 */ T(T_LIM * 4 - (T_LIM + 1), false);
 | |
| +	/* 22 */ T(T_LIM * 4 - (T_LIM - 2), true);
 | |
| +	/* 23 */ T(T_LIM * 4 + 1 - T_LIM, false);
 | |
| +	/* 24 */ T(0, false);
 | |
| +	/* 25 */ T(REJECT_AFTER_MESSAGES, false);
 | |
| +	/* 26 */ T(REJECT_AFTER_MESSAGES - 1, true);
 | |
| +	/* 27 */ T(REJECT_AFTER_MESSAGES, false);
 | |
| +	/* 28 */ T(REJECT_AFTER_MESSAGES - 1, false);
 | |
| +	/* 29 */ T(REJECT_AFTER_MESSAGES - 2, true);
 | |
| +	/* 30 */ T(REJECT_AFTER_MESSAGES + 1, false);
 | |
| +	/* 31 */ T(REJECT_AFTER_MESSAGES + 2, false);
 | |
| +	/* 32 */ T(REJECT_AFTER_MESSAGES - 2, false);
 | |
| +	/* 33 */ T(REJECT_AFTER_MESSAGES - 3, true);
 | |
| +	/* 34 */ T(0, false);
 | |
| +
 | |
| +	T_INIT;
 | |
| +	for (i = 1; i <= COUNTER_WINDOW_SIZE; ++i)
 | |
| +		T(i, true);
 | |
| +	T(0, true);
 | |
| +	T(0, false);
 | |
| +
 | |
| +	T_INIT;
 | |
| +	for (i = 2; i <= COUNTER_WINDOW_SIZE + 1; ++i)
 | |
| +		T(i, true);
 | |
| +	T(1, true);
 | |
| +	T(0, false);
 | |
| +
 | |
| +	T_INIT;
 | |
| +	for (i = COUNTER_WINDOW_SIZE + 1; i-- > 0 ;)
 | |
| +		T(i, true);
 | |
| +
 | |
| +	T_INIT;
 | |
| +	for (i = COUNTER_WINDOW_SIZE + 2; i-- > 1 ;)
 | |
| +		T(i, true);
 | |
| +	T(0, false);
 | |
| +
 | |
| +	T_INIT;
 | |
| +	for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1 ;)
 | |
| +		T(i, true);
 | |
| +	T(COUNTER_WINDOW_SIZE + 1, true);
 | |
| +	T(0, false);
 | |
| +
 | |
| +	T_INIT;
 | |
| +	for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1 ;)
 | |
| +		T(i, true);
 | |
| +	T(0, true);
 | |
| +	T(COUNTER_WINDOW_SIZE + 1, true);
 | |
| +#undef T
 | |
| +#undef T_LIM
 | |
| +#undef T_INIT
 | |
| +
 | |
| +	if (success)
 | |
| +		pr_info("nonce counter self-tests: pass\n");
 | |
| +	return success;
 | |
| +}
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/selftest/curve25519.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,74 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#ifdef DEBUG
 | |
| +struct curve25519_test_vector {
 | |
| +	u8 private[CURVE25519_POINT_SIZE];
 | |
| +	u8 public[CURVE25519_POINT_SIZE];
 | |
| +	u8 result[CURVE25519_POINT_SIZE];
 | |
| +	bool valid;
 | |
| +};
 | |
| +static const struct curve25519_test_vector curve25519_test_vectors[] __initconst = {
 | |
| +	{
 | |
| +		.private = { 0x77, 0x07, 0x6d, 0x0a, 0x73, 0x18, 0xa5, 0x7d, 0x3c, 0x16, 0xc1, 0x72, 0x51, 0xb2, 0x66, 0x45, 0xdf, 0x4c, 0x2f, 0x87, 0xeb, 0xc0, 0x99, 0x2a, 0xb1, 0x77, 0xfb, 0xa5, 0x1d, 0xb9, 0x2c, 0x2a },
 | |
| +		.public = { 0xde, 0x9e, 0xdb, 0x7d, 0x7b, 0x7d, 0xc1, 0xb4, 0xd3, 0x5b, 0x61, 0xc2, 0xec, 0xe4, 0x35, 0x37, 0x3f, 0x83, 0x43, 0xc8, 0x5b, 0x78, 0x67, 0x4d, 0xad, 0xfc, 0x7e, 0x14, 0x6f, 0x88, 0x2b, 0x4f },
 | |
| +		.result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1, 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25, 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33, 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 },
 | |
| +		.valid = true
 | |
| +	},
 | |
| +	{
 | |
| +		.private = { 0x5d, 0xab, 0x08, 0x7e, 0x62, 0x4a, 0x8a, 0x4b, 0x79, 0xe1, 0x7f, 0x8b, 0x83, 0x80, 0x0e, 0xe6, 0x6f, 0x3b, 0xb1, 0x29, 0x26, 0x18, 0xb6, 0xfd, 0x1c, 0x2f, 0x8b, 0x27, 0xff, 0x88, 0xe0, 0xeb },
 | |
| +		.public = { 0x85, 0x20, 0xf0, 0x09, 0x89, 0x30, 0xa7, 0x54, 0x74, 0x8b, 0x7d, 0xdc, 0xb4, 0x3e, 0xf7, 0x5a, 0x0d, 0xbf, 0x3a, 0x0d, 0x26, 0x38, 0x1a, 0xf4, 0xeb, 0xa4, 0xa9, 0x8e, 0xaa, 0x9b, 0x4e, 0x6a },
 | |
| +		.result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1, 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25, 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33, 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 },
 | |
| +		.valid = true
 | |
| +	},
 | |
| +	{
 | |
| +		.private = { 1 },
 | |
| +		.public = { 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
 | |
| +		.result = { 0x3c, 0x77, 0x77, 0xca, 0xf9, 0x97, 0xb2, 0x64, 0x41, 0x60, 0x77, 0x66, 0x5b, 0x4e, 0x22, 0x9d, 0xb, 0x95, 0x48, 0xdc, 0xc, 0xd8, 0x19, 0x98, 0xdd, 0xcd, 0xc5, 0xc8, 0x53, 0x3c, 0x79, 0x7f },
 | |
| +		.valid = true
 | |
| +	},
 | |
| +	{
 | |
| +		.private = { 1 },
 | |
| +		.public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
 | |
| +		.result = { 0xb3, 0x2d, 0x13, 0x62, 0xc2, 0x48, 0xd6, 0x2f, 0xe6, 0x26, 0x19, 0xcf, 0xf0, 0x4d, 0xd4, 0x3d, 0xb7, 0x3f, 0xfc, 0x1b, 0x63, 0x8, 0xed, 0xe3, 0xb, 0x78, 0xd8, 0x73, 0x80, 0xf1, 0xe8, 0x34 },
 | |
| +		.valid = true
 | |
| +	},
 | |
| +	{
 | |
| +		.private = { 0xa5, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d, 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd, 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18, 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0xc4 },
 | |
| +		.public = { 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb, 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c, 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b, 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c },
 | |
| +		.result = { 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90, 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f, 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7, 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 },
 | |
| +		.valid = true
 | |
| +	},
 | |
| +	{
 | |
| +		.private = { 1, 2, 3, 4 },
 | |
| +		.public = { 0 },
 | |
| +		.result = { 0 },
 | |
| +		.valid = false
 | |
| +	},
 | |
| +	{
 | |
| +		.private = { 2, 4, 6, 8 },
 | |
| +		.public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8 },
 | |
| +		.result = { 0 },
 | |
| +		.valid = false
 | |
| +	}
 | |
| +};
 | |
| +bool __init curve25519_selftest(void)
 | |
| +{
 | |
| +	bool success = true, ret;
 | |
| +	size_t i = 0;
 | |
| +	u8 out[CURVE25519_POINT_SIZE];
 | |
| +
 | |
| +	for (i = 0; i < ARRAY_SIZE(curve25519_test_vectors); ++i) {
 | |
| +		memset(out, 0, CURVE25519_POINT_SIZE);
 | |
| +		ret = curve25519(out, curve25519_test_vectors[i].private, curve25519_test_vectors[i].public);
 | |
| +		if (ret != curve25519_test_vectors[i].valid || memcmp(out, curve25519_test_vectors[i].result, CURVE25519_POINT_SIZE)) {
 | |
| +			pr_info("curve25519 self-test %zu: FAIL\n", i + 1);
 | |
| +			success = false;
 | |
| +			break;
 | |
| +		}
 | |
| +	}
 | |
| +
 | |
| +	if (success)
 | |
| +		pr_info("curve25519 self-tests: pass\n");
 | |
| +	return success;
 | |
| +}
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/selftest/ratelimiter.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,113 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#ifdef DEBUG
 | |
| +
 | |
| +static const struct { bool result; unsigned int msec_to_sleep_before; } expected_results[] __initconst = {
 | |
| +	[0 ... PACKETS_BURSTABLE - 1] = { true, 0 },
 | |
| +	[PACKETS_BURSTABLE] = { false, 0 },
 | |
| +	[PACKETS_BURSTABLE + 1] = { true, MSEC_PER_SEC / PACKETS_PER_SECOND },
 | |
| +	[PACKETS_BURSTABLE + 2] = { false, 0 },
 | |
| +	[PACKETS_BURSTABLE + 3] = { true, (MSEC_PER_SEC / PACKETS_PER_SECOND) * 2 },
 | |
| +	[PACKETS_BURSTABLE + 4] = { true, 0 },
 | |
| +	[PACKETS_BURSTABLE + 5] = { false, 0 }
 | |
| +};
 | |
| +
 | |
| +bool __init ratelimiter_selftest(void)
 | |
| +{
 | |
| +	struct sk_buff *skb4;
 | |
| +	struct iphdr *hdr4;
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +	struct sk_buff *skb6;
 | |
| +	struct ipv6hdr *hdr6;
 | |
| +#endif
 | |
| +	int i = -1, ret = false;
 | |
| +
 | |
| +	BUILD_BUG_ON(MSEC_PER_SEC % PACKETS_PER_SECOND != 0);
 | |
| +
 | |
| +	if (ratelimiter_init())
 | |
| +		goto out;
 | |
| +	if (ratelimiter_init()) {
 | |
| +		ratelimiter_uninit();
 | |
| +		goto out;
 | |
| +	}
 | |
| +	if (ratelimiter_init()) {
 | |
| +		ratelimiter_uninit();
 | |
| +		ratelimiter_uninit();
 | |
| +		goto out;
 | |
| +	}
 | |
| +
 | |
| +	skb4 = alloc_skb(sizeof(struct iphdr), GFP_KERNEL);
 | |
| +	if (!skb4)
 | |
| +		goto err_nofree;
 | |
| +	skb4->protocol = htons(ETH_P_IP);
 | |
| +	hdr4 = (struct iphdr *)skb_put(skb4, sizeof(struct iphdr));
 | |
| +	hdr4->saddr = htonl(8182);
 | |
| +	skb_reset_network_header(skb4);
 | |
| +
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +	skb6 = alloc_skb(sizeof(struct ipv6hdr), GFP_KERNEL);
 | |
| +	if (!skb6) {
 | |
| +		kfree_skb(skb4);
 | |
| +		goto err_nofree;
 | |
| +	}
 | |
| +	skb6->protocol = htons(ETH_P_IPV6);
 | |
| +	hdr6 = (struct ipv6hdr *)skb_put(skb6, sizeof(struct ipv6hdr));
 | |
| +	hdr6->saddr.in6_u.u6_addr32[0] = htonl(1212);
 | |
| +	hdr6->saddr.in6_u.u6_addr32[1] = htonl(289188);
 | |
| +	skb_reset_network_header(skb6);
 | |
| +#endif
 | |
| +
 | |
| +	for (i = 0; i < ARRAY_SIZE(expected_results); ++i) {
 | |
| +		if (expected_results[i].msec_to_sleep_before)
 | |
| +			msleep(expected_results[i].msec_to_sleep_before);
 | |
| +
 | |
| +		if (ratelimiter_allow(skb4, &init_net) != expected_results[i].result)
 | |
| +			goto err;
 | |
| +		hdr4->saddr = htonl(ntohl(hdr4->saddr) + i + 1);
 | |
| +		if (!ratelimiter_allow(skb4, &init_net))
 | |
| +			goto err;
 | |
| +		hdr4->saddr = htonl(ntohl(hdr4->saddr) - i - 1);
 | |
| +
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +		hdr6->saddr.in6_u.u6_addr32[2] = hdr6->saddr.in6_u.u6_addr32[3] = htonl(i);
 | |
| +		if (ratelimiter_allow(skb6, &init_net) != expected_results[i].result)
 | |
| +			goto err;
 | |
| +		hdr6->saddr.in6_u.u6_addr32[0] = htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) + i + 1);
 | |
| +		if (!ratelimiter_allow(skb6, &init_net))
 | |
| +			goto err;
 | |
| +		hdr6->saddr.in6_u.u6_addr32[0] = htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) - i - 1);
 | |
| +#endif
 | |
| +	}
 | |
| +
 | |
| +	gc_entries(NULL);
 | |
| +	rcu_barrier_bh();
 | |
| +
 | |
| +	if (atomic_read(&total_entries))
 | |
| +		goto err;
 | |
| +
 | |
| +	for (i = 0; i <= max_entries; ++i) {
 | |
| +		hdr4->saddr = htonl(i);
 | |
| +		if (ratelimiter_allow(skb4, &init_net) != (i != max_entries))
 | |
| +			goto err;
 | |
| +	}
 | |
| +
 | |
| +	ret = true;
 | |
| +
 | |
| +err:
 | |
| +	kfree_skb(skb4);
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +	kfree_skb(skb6);
 | |
| +#endif
 | |
| +err_nofree:
 | |
| +	ratelimiter_uninit();
 | |
| +	ratelimiter_uninit();
 | |
| +	ratelimiter_uninit();
 | |
| +out:
 | |
| +	if (ret)
 | |
| +		pr_info("ratelimiter self-tests: pass\n");
 | |
| +	else
 | |
| +		pr_info("ratelimiter self-test %d: fail\n", i);
 | |
| +
 | |
| +	return ret;
 | |
| +}
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/selftest/routingtable.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,504 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#ifdef DEBUG
 | |
| +
 | |
| +#ifdef DEBUG_PRINT_TRIE_GRAPHVIZ
 | |
| +#include <linux/siphash.h>
 | |
| +static __init void print_node(struct routing_table_node *node, u8 bits)
 | |
| +{
 | |
| +	u32 color = 0;
 | |
| +	char *style = "dotted";
 | |
| +	char *fmt_connection = KERN_DEBUG "\t\"%p/%d\" -> \"%p/%d\";\n";
 | |
| +	char *fmt_declaration = KERN_DEBUG "\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n";
 | |
| +	if (bits == 32) {
 | |
| +		fmt_connection = KERN_DEBUG "\t\"%pI4/%d\" -> \"%pI4/%d\";\n";
 | |
| +		fmt_declaration = KERN_DEBUG "\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n";
 | |
| +	} else if (bits == 128) {
 | |
| +		fmt_connection = KERN_DEBUG "\t\"%pI6/%d\" -> \"%pI6/%d\";\n";
 | |
| +		fmt_declaration = KERN_DEBUG "\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n";
 | |
| +	}
 | |
| +	if (node->peer) {
 | |
| +		hsiphash_key_t key = { 0 };
 | |
| +		memcpy(&key, &node->peer, sizeof(node->peer));
 | |
| +		color = hsiphash_1u32(0xdeadbeef, &key) % 200 << 16 | hsiphash_1u32(0xbabecafe, &key) % 200 << 8 | hsiphash_1u32(0xabad1dea, &key) % 200;
 | |
| +		style = "bold";
 | |
| +	}
 | |
| +	printk(fmt_declaration, node->bits, node->cidr, style, color);
 | |
| +	if (node->bit[0]) {
 | |
| +		printk(fmt_connection, node->bits, node->cidr, node->bit[0]->bits, node->bit[0]->cidr);
 | |
| +		print_node(node->bit[0], bits);
 | |
| +	}
 | |
| +	if (node->bit[1]) {
 | |
| +		printk(fmt_connection, node->bits, node->cidr, node->bit[1]->bits, node->bit[1]->cidr);
 | |
| +		print_node(node->bit[1], bits);
 | |
| +	}
 | |
| +}
 | |
| +static __init void print_tree(struct routing_table_node *top, u8 bits)
 | |
| +{
 | |
| +	printk(KERN_DEBUG "digraph trie {\n");
 | |
| +	print_node(top, bits);
 | |
| +	printk(KERN_DEBUG "}\n");
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
| +#ifdef DEBUG_RANDOM_TRIE
 | |
| +#define NUM_PEERS 2000
 | |
| +#define NUM_RAND_ROUTES 400
 | |
| +#define NUM_MUTATED_ROUTES 100
 | |
| +#define NUM_QUERIES (NUM_RAND_ROUTES * NUM_MUTATED_ROUTES * 30)
 | |
| +#include <linux/random.h>
 | |
| +struct horrible_routing_table {
 | |
| +	struct hlist_head head;
 | |
| +};
 | |
| +struct horrible_routing_table_node {
 | |
| +	struct hlist_node table;
 | |
| +	union nf_inet_addr ip;
 | |
| +	union nf_inet_addr mask;
 | |
| +	uint8_t ip_version;
 | |
| +	void *value;
 | |
| +};
 | |
| +static __init void horrible_routing_table_init(struct horrible_routing_table *table)
 | |
| +{
 | |
| +	INIT_HLIST_HEAD(&table->head);
 | |
| +}
 | |
| +static __init void horrible_routing_table_free(struct horrible_routing_table *table)
 | |
| +{
 | |
| +	struct hlist_node *h;
 | |
| +	struct horrible_routing_table_node *node;
 | |
| +	hlist_for_each_entry_safe (node, h, &table->head, table) {
 | |
| +		hlist_del(&node->table);
 | |
| +		kfree(node);
 | |
| +	};
 | |
| +}
 | |
| +static __init inline union nf_inet_addr horrible_cidr_to_mask(uint8_t cidr)
 | |
| +{
 | |
| +	union nf_inet_addr mask;
 | |
| +	memset(&mask, 0x00, 128 / 8);
 | |
| +	memset(&mask, 0xff, cidr / 8);
 | |
| +	if (cidr % 32)
 | |
| +		mask.all[cidr / 32] = htonl((0xFFFFFFFFUL << (32 - (cidr % 32))) & 0xFFFFFFFFUL);
 | |
| +	return mask;
 | |
| +}
 | |
| +static __init inline uint8_t horrible_mask_to_cidr(union nf_inet_addr subnet)
 | |
| +{
 | |
| +	return hweight32(subnet.all[0])
 | |
| +	     + hweight32(subnet.all[1])
 | |
| +	     + hweight32(subnet.all[2])
 | |
| +	     + hweight32(subnet.all[3]);
 | |
| +}
 | |
| +static __init inline void horrible_mask_self(struct horrible_routing_table_node *node)
 | |
| +{
 | |
| +	if (node->ip_version == 4)
 | |
| +		node->ip.ip &= node->mask.ip;
 | |
| +	else if (node->ip_version == 6) {
 | |
| +		node->ip.ip6[0] &= node->mask.ip6[0];
 | |
| +		node->ip.ip6[1] &= node->mask.ip6[1];
 | |
| +		node->ip.ip6[2] &= node->mask.ip6[2];
 | |
| +		node->ip.ip6[3] &= node->mask.ip6[3];
 | |
| +	}
 | |
| +}
 | |
| +static __init inline bool horrible_match_v4(const struct horrible_routing_table_node *node, struct in_addr *ip)
 | |
| +{
 | |
| +	return (ip->s_addr & node->mask.ip) == node->ip.ip;
 | |
| +}
 | |
| +static __init inline bool horrible_match_v6(const struct horrible_routing_table_node *node, struct in6_addr *ip)
 | |
| +{
 | |
| +	return	(ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) == node->ip.ip6[0] &&
 | |
| +		(ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) == node->ip.ip6[1] &&
 | |
| +		(ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) == node->ip.ip6[2] &&
 | |
| +		(ip->in6_u.u6_addr32[3] & node->mask.ip6[3]) == node->ip.ip6[3];
 | |
| +}
 | |
| +static __init void horrible_insert_ordered(struct horrible_routing_table *table, struct horrible_routing_table_node *node)
 | |
| +{
 | |
| +	struct horrible_routing_table_node *other = NULL, *where = NULL;
 | |
| +	uint8_t my_cidr = horrible_mask_to_cidr(node->mask);
 | |
| +	hlist_for_each_entry (other, &table->head, table) {
 | |
| +		if (!memcmp(&other->mask, &node->mask, sizeof(union nf_inet_addr)) &&
 | |
| +		    !memcmp(&other->ip, &node->ip, sizeof(union nf_inet_addr)) &&
 | |
| +		    other->ip_version == node->ip_version) {
 | |
| +			other->value = node->value;
 | |
| +			kfree(node);
 | |
| +			return;
 | |
| +		}
 | |
| +		where = other;
 | |
| +		if (horrible_mask_to_cidr(other->mask) <= my_cidr)
 | |
| +			break;
 | |
| +	}
 | |
| +	if (!other && !where)
 | |
| +		hlist_add_head(&node->table, &table->head);
 | |
| +	else if (!other)
 | |
| +		hlist_add_behind(&node->table, &where->table);
 | |
| +	else
 | |
| +		hlist_add_before(&node->table, &where->table);
 | |
| +}
 | |
| +static __init int horrible_routing_table_insert_v4(struct horrible_routing_table *table, struct in_addr *ip, uint8_t cidr, void *value)
 | |
| +{
 | |
| +	struct horrible_routing_table_node *node = kzalloc(sizeof(struct horrible_routing_table_node), GFP_KERNEL);
 | |
| +	if (!node)
 | |
| +		return -ENOMEM;
 | |
| +	node->ip.in = *ip;
 | |
| +	node->mask = horrible_cidr_to_mask(cidr);
 | |
| +	node->ip_version = 4;
 | |
| +	node->value = value;
 | |
| +	horrible_mask_self(node);
 | |
| +	horrible_insert_ordered(table, node);
 | |
| +	return 0;
 | |
| +}
 | |
| +static __init int horrible_routing_table_insert_v6(struct horrible_routing_table *table, struct in6_addr *ip, uint8_t cidr, void *value)
 | |
| +{
 | |
| +	struct horrible_routing_table_node *node = kzalloc(sizeof(struct horrible_routing_table_node), GFP_KERNEL);
 | |
| +	if (!node)
 | |
| +		return -ENOMEM;
 | |
| +	node->ip.in6 = *ip;
 | |
| +	node->mask = horrible_cidr_to_mask(cidr);
 | |
| +	node->ip_version = 6;
 | |
| +	node->value = value;
 | |
| +	horrible_mask_self(node);
 | |
| +	horrible_insert_ordered(table, node);
 | |
| +	return 0;
 | |
| +}
 | |
| +static __init void *horrible_routing_table_lookup_v4(struct horrible_routing_table *table, struct in_addr *ip)
 | |
| +{
 | |
| +	struct horrible_routing_table_node *node;
 | |
| +	void *ret = NULL;
 | |
| +	hlist_for_each_entry (node, &table->head, table) {
 | |
| +		if (node->ip_version != 4)
 | |
| +			continue;
 | |
| +		if (horrible_match_v4(node, ip)) {
 | |
| +			ret = node->value;
 | |
| +			break;
 | |
| +		}
 | |
| +	};
 | |
| +	return ret;
 | |
| +}
 | |
| +static __init void *horrible_routing_table_lookup_v6(struct horrible_routing_table *table, struct in6_addr *ip)
 | |
| +{
 | |
| +	struct horrible_routing_table_node *node;
 | |
| +	void *ret = NULL;
 | |
| +	hlist_for_each_entry (node, &table->head, table) {
 | |
| +		if (node->ip_version != 6)
 | |
| +			continue;
 | |
| +		if (horrible_match_v6(node, ip)) {
 | |
| +			ret = node->value;
 | |
| +			break;
 | |
| +		}
 | |
| +	};
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +static __init bool randomized_test(void)
 | |
| +{
 | |
| +	bool ret = false;
 | |
| +	unsigned int i, j, k, mutate_amount, cidr;
 | |
| +	struct wireguard_peer **peers, *peer;
 | |
| +	struct routing_table t;
 | |
| +	struct horrible_routing_table h;
 | |
| +	u8 ip[16], mutate_mask[16], mutated[16];
 | |
| +
 | |
| +	routing_table_init(&t);
 | |
| +	horrible_routing_table_init(&h);
 | |
| +
 | |
| +	peers = kcalloc(NUM_PEERS, sizeof(struct wireguard_peer *), GFP_KERNEL);
 | |
| +	if (!peers) {
 | |
| +		pr_info("routing table random self-test: out of memory\n");
 | |
| +		goto free;
 | |
| +	}
 | |
| +	for (i = 0; i < NUM_PEERS; ++i) {
 | |
| +		peers[i] = kzalloc(sizeof(struct wireguard_peer), GFP_KERNEL);
 | |
| +		if (!peers[i]) {
 | |
| +			pr_info("routing table random self-test: out of memory\n");
 | |
| +			goto free;
 | |
| +		}
 | |
| +		kref_init(&peers[i]->refcount);
 | |
| +	}
 | |
| +
 | |
| +	for (i = 0; i < NUM_RAND_ROUTES; ++i) {
 | |
| +		prandom_bytes(ip, 4);
 | |
| +		cidr = prandom_u32_max(32) + 1;
 | |
| +		peer = peers[prandom_u32_max(NUM_PEERS)];
 | |
| +		if (routing_table_insert_v4(&t, (struct in_addr *)ip, cidr, peer) < 0) {
 | |
| +			pr_info("routing table random self-test: out of memory\n");
 | |
| +			goto free;
 | |
| +		}
 | |
| +		if (horrible_routing_table_insert_v4(&h, (struct in_addr *)ip, cidr, peer) < 0) {
 | |
| +			pr_info("routing table random self-test: out of memory\n");
 | |
| +			goto free;
 | |
| +		}
 | |
| +		for (j = 0; j < NUM_MUTATED_ROUTES; ++j) {
 | |
| +			memcpy(mutated, ip, 4);
 | |
| +			prandom_bytes(mutate_mask, 4);
 | |
| +			mutate_amount = prandom_u32_max(32);
 | |
| +			for (k = 0; k < mutate_amount / 8; ++k)
 | |
| +				mutate_mask[k] = 0xff;
 | |
| +			mutate_mask[k] = 0xff << ((8 - (mutate_amount % 8)) % 8);
 | |
| +			for (; k < 4; ++k)
 | |
| +				mutate_mask[k] = 0;
 | |
| +			for (k = 0; k < 4; ++k)
 | |
| +				mutated[k] = (mutated[k] & mutate_mask[k]) | (~mutate_mask[k] & prandom_u32_max(256));
 | |
| +			cidr = prandom_u32_max(32) + 1;
 | |
| +			peer = peers[prandom_u32_max(NUM_PEERS)];
 | |
| +			if (routing_table_insert_v4(&t, (struct in_addr *)mutated, cidr, peer) < 0) {
 | |
| +				pr_info("routing table random self-test: out of memory\n");
 | |
| +				goto free;
 | |
| +			}
 | |
| +			if (horrible_routing_table_insert_v4(&h, (struct in_addr *)mutated, cidr, peer)) {
 | |
| +				pr_info("routing table random self-test: out of memory\n");
 | |
| +				goto free;
 | |
| +			}
 | |
| +		}
 | |
| +	}
 | |
| +
 | |
| +	for (i = 0; i < NUM_RAND_ROUTES; ++i) {
 | |
| +		prandom_bytes(ip, 16);
 | |
| +		cidr = prandom_u32_max(128) + 1;
 | |
| +		peer = peers[prandom_u32_max(NUM_PEERS)];
 | |
| +		if (routing_table_insert_v6(&t, (struct in6_addr *)ip, cidr, peer) < 0) {
 | |
| +			pr_info("routing table random self-test: out of memory\n");
 | |
| +			goto free;
 | |
| +		}
 | |
| +		if (horrible_routing_table_insert_v6(&h, (struct in6_addr *)ip, cidr, peer) < 0) {
 | |
| +			pr_info("routing table random self-test: out of memory\n");
 | |
| +			goto free;
 | |
| +		}
 | |
| +		for (j = 0; j < NUM_MUTATED_ROUTES; ++j) {
 | |
| +			memcpy(mutated, ip, 16);
 | |
| +			prandom_bytes(mutate_mask, 16);
 | |
| +			mutate_amount = prandom_u32_max(128);
 | |
| +			for (k = 0; k < mutate_amount / 8; ++k)
 | |
| +				mutate_mask[k] = 0xff;
 | |
| +			mutate_mask[k] = 0xff << ((8 - (mutate_amount % 8)) % 8);
 | |
| +			for (; k < 4; ++k)
 | |
| +				mutate_mask[k] = 0;
 | |
| +			for (k = 0; k < 4; ++k)
 | |
| +				mutated[k] = (mutated[k] & mutate_mask[k]) | (~mutate_mask[k] & prandom_u32_max(256));
 | |
| +			cidr = prandom_u32_max(128) + 1;
 | |
| +			peer = peers[prandom_u32_max(NUM_PEERS)];
 | |
| +			if (routing_table_insert_v6(&t, (struct in6_addr *)mutated, cidr, peer) < 0) {
 | |
| +				pr_info("routing table random self-test: out of memory\n");
 | |
| +				goto free;
 | |
| +			}
 | |
| +			if (horrible_routing_table_insert_v6(&h, (struct in6_addr *)mutated, cidr, peer)) {
 | |
| +				pr_info("routing table random self-test: out of memory\n");
 | |
| +				goto free;
 | |
| +			}
 | |
| +		}
 | |
| +	}
 | |
| +
 | |
| +#ifdef DEBUG_PRINT_TRIE_GRAPHVIZ
 | |
| +	print_tree(t.root4, 32);
 | |
| +	print_tree(t.root6, 128);
 | |
| +#endif
 | |
| +
 | |
| +	for (i = 0; i < NUM_QUERIES; ++i) {
 | |
| +		prandom_bytes(ip, 4);
 | |
| +		if (lookup(t.root4, 32, ip) != horrible_routing_table_lookup_v4(&h, (struct in_addr *)ip)) {
 | |
| +			pr_info("routing table random self-test: FAIL\n");
 | |
| +			goto free;
 | |
| +		}
 | |
| +	}
 | |
| +
 | |
| +	for (i = 0; i < NUM_QUERIES; ++i) {
 | |
| +		prandom_bytes(ip, 16);
 | |
| +		if (lookup(t.root6, 128, ip) != horrible_routing_table_lookup_v6(&h, (struct in6_addr *)ip)) {
 | |
| +			pr_info("routing table random self-test: FAIL\n");
 | |
| +			goto free;
 | |
| +		}
 | |
| +	}
 | |
| +	ret = true;
 | |
| +
 | |
| +free:
 | |
| +	routing_table_free(&t);
 | |
| +	horrible_routing_table_free(&h);
 | |
| +	if (peers) {
 | |
| +		for (i = 0; i < NUM_PEERS; ++i)
 | |
| +			kfree(peers[i]);
 | |
| +	}
 | |
| +	kfree(peers);
 | |
| +	return ret;
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
| +static __init inline struct in_addr *ip4(u8 a, u8 b, u8 c, u8 d)
 | |
| +{
 | |
| +	static struct in_addr ip;
 | |
| +	u8 *split = (u8 *)&ip;
 | |
| +	split[0] = a;
 | |
| +	split[1] = b;
 | |
| +	split[2] = c;
 | |
| +	split[3] = d;
 | |
| +	return &ip;
 | |
| +}
 | |
| +static __init inline struct in6_addr *ip6(u32 a, u32 b, u32 c, u32 d)
 | |
| +{
 | |
| +	static struct in6_addr ip;
 | |
| +	__be32 *split = (__be32 *)&ip;
 | |
| +	split[0] = cpu_to_be32(a);
 | |
| +	split[1] = cpu_to_be32(b);
 | |
| +	split[2] = cpu_to_be32(c);
 | |
| +	split[3] = cpu_to_be32(d);
 | |
| +	return &ip;
 | |
| +}
 | |
| +
 | |
| +#define init_peer(name) do { \
 | |
| +	name = kzalloc(sizeof(struct wireguard_peer), GFP_KERNEL); \
 | |
| +	if (!name) { \
 | |
| +		pr_info("routing table self-test: out of memory\n"); \
 | |
| +		goto free; \
 | |
| +	} \
 | |
| +	kref_init(&name->refcount); \
 | |
| +} while (0)
 | |
| +
 | |
| +#define insert(version, mem, ipa, ipb, ipc, ipd, cidr) \
 | |
| +	routing_table_insert_v##version(&t, ip##version(ipa, ipb, ipc, ipd), cidr, mem)
 | |
| +
 | |
| +#define maybe_fail \
 | |
| +	++i; \
 | |
| +	if (!_s) { \
 | |
| +		pr_info("routing table self-test %zu: FAIL\n", i); \
 | |
| +		success = false; \
 | |
| +	}
 | |
| +
 | |
| +#define test(version, mem, ipa, ipb, ipc, ipd) do { \
 | |
| +	bool _s = lookup(t.root##version, version == 4 ? 32 : 128, ip##version(ipa, ipb, ipc, ipd)) == mem; \
 | |
| +	maybe_fail \
 | |
| +} while (0)
 | |
| +
 | |
| +#define test_negative(version, mem, ipa, ipb, ipc, ipd) do { \
 | |
| +	bool _s = lookup(t.root##version, version == 4 ? 32 : 128, ip##version(ipa, ipb, ipc, ipd)) != mem; \
 | |
| +	maybe_fail \
 | |
| +} while (0)
 | |
| +
 | |
| +bool __init routing_table_selftest(void)
 | |
| +{
 | |
| +	struct routing_table t;
 | |
| +	struct wireguard_peer *a = NULL, *b = NULL, *c = NULL, *d = NULL, *e = NULL, *f = NULL, *g = NULL, *h = NULL;
 | |
| +	size_t i = 0;
 | |
| +	bool success = false;
 | |
| +	struct in6_addr ip;
 | |
| +	__be64 part;
 | |
| +
 | |
| +	routing_table_init(&t);
 | |
| +	init_peer(a);
 | |
| +	init_peer(b);
 | |
| +	init_peer(c);
 | |
| +	init_peer(d);
 | |
| +	init_peer(e);
 | |
| +	init_peer(f);
 | |
| +	init_peer(g);
 | |
| +	init_peer(h);
 | |
| +
 | |
| +	insert(4, a, 192, 168, 4, 0, 24);
 | |
| +	insert(4, b, 192, 168, 4, 4, 32);
 | |
| +	insert(4, c, 192, 168, 0, 0, 16);
 | |
| +	insert(4, d, 192, 95, 5, 64, 27);
 | |
| +	insert(4, c, 192, 95, 5, 65, 27); /* replaces previous entry, and maskself is required */
 | |
| +	insert(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128);
 | |
| +	insert(6, c, 0x26075300, 0x60006b00, 0, 0, 64);
 | |
| +	insert(4, e, 0, 0, 0, 0, 0);
 | |
| +	insert(6, e, 0, 0, 0, 0, 0);
 | |
| +	insert(6, f, 0, 0, 0, 0, 0); /* replaces previous entry */
 | |
| +	insert(6, g, 0x24046800, 0, 0, 0, 32);
 | |
| +	insert(6, h, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 64); /* maskself is required */
 | |
| +	insert(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 128);
 | |
| +	insert(6, c, 0x24446800, 0x40e40800, 0xdeaebeef, 0xdefbeef, 128);
 | |
| +	insert(6, b, 0x24446800, 0xf0e40800, 0xeeaebeef, 0, 98);
 | |
| +	insert(4, g, 64, 15, 112, 0, 20);
 | |
| +	insert(4, h, 64, 15, 123, 211, 25); /* maskself is required */
 | |
| +	insert(4, a, 10, 0, 0, 0, 25);
 | |
| +	insert(4, b, 10, 0, 0, 128, 25);
 | |
| +	insert(4, a, 10, 1, 0, 0, 30);
 | |
| +	insert(4, b, 10, 1, 0, 4, 30);
 | |
| +	insert(4, c, 10, 1, 0, 8, 29);
 | |
| +	insert(4, d, 10, 1, 0, 16, 29);
 | |
| +
 | |
| +#ifdef DEBUG_PRINT_TRIE_GRAPHVIZ
 | |
| +	print_tree(t.root4, 32);
 | |
| +	print_tree(t.root6, 128);
 | |
| +#endif
 | |
| +
 | |
| +	success = true;
 | |
| +
 | |
| +	test(4, a, 192, 168, 4, 20);
 | |
| +	test(4, a, 192, 168, 4, 0);
 | |
| +	test(4, b, 192, 168, 4, 4);
 | |
| +	test(4, c, 192, 168, 200, 182);
 | |
| +	test(4, c, 192, 95, 5, 68);
 | |
| +	test(4, e, 192, 95, 5, 96);
 | |
| +	test(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543);
 | |
| +	test(6, c, 0x26075300, 0x60006b00, 0, 0xc02e01ee);
 | |
| +	test(6, f, 0x26075300, 0x60006b01, 0, 0);
 | |
| +	test(6, g, 0x24046800, 0x40040806, 0, 0x1006);
 | |
| +	test(6, g, 0x24046800, 0x40040806, 0x1234, 0x5678);
 | |
| +	test(6, f, 0x240467ff, 0x40040806, 0x1234, 0x5678);
 | |
| +	test(6, f, 0x24046801, 0x40040806, 0x1234, 0x5678);
 | |
| +	test(6, h, 0x24046800, 0x40040800, 0x1234, 0x5678);
 | |
| +	test(6, h, 0x24046800, 0x40040800, 0, 0);
 | |
| +	test(6, h, 0x24046800, 0x40040800, 0x10101010, 0x10101010);
 | |
| +	test(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef);
 | |
| +	test(4, g, 64, 15, 116, 26);
 | |
| +	test(4, g, 64, 15, 127, 3);
 | |
| +	test(4, g, 64, 15, 123, 1);
 | |
| +	test(4, h, 64, 15, 123, 128);
 | |
| +	test(4, h, 64, 15, 123, 129);
 | |
| +	test(4, a, 10, 0, 0, 52);
 | |
| +	test(4, b, 10, 0, 0, 220);
 | |
| +	test(4, a, 10, 1, 0, 2);
 | |
| +	test(4, b, 10, 1, 0, 6);
 | |
| +	test(4, c, 10, 1, 0, 10);
 | |
| +	test(4, d, 10, 1, 0, 20);
 | |
| +
 | |
| +	insert(4, a, 1, 0, 0, 0, 32);
 | |
| +	insert(4, a, 64, 0, 0, 0, 32);
 | |
| +	insert(4, a, 128, 0, 0, 0, 32);
 | |
| +	insert(4, a, 192, 0, 0, 0, 32);
 | |
| +	insert(4, a, 255, 0, 0, 0, 32);
 | |
| +	routing_table_remove_by_peer(&t, a);
 | |
| +	test_negative(4, a, 1, 0, 0, 0);
 | |
| +	test_negative(4, a, 64, 0, 0, 0);
 | |
| +	test_negative(4, a, 128, 0, 0, 0);
 | |
| +	test_negative(4, a, 192, 0, 0, 0);
 | |
| +	test_negative(4, a, 255, 0, 0, 0);
 | |
| +
 | |
| +	routing_table_free(&t);
 | |
| +	routing_table_init(&t);
 | |
| +	insert(4, a, 192, 168, 0, 0, 16);
 | |
| +	insert(4, a, 192, 168, 0, 0, 24);
 | |
| +	routing_table_remove_by_peer(&t, a);
 | |
| +	test_negative(4, a, 192, 168, 0, 1);
 | |
| +
 | |
| +	/* These will hit the BUG_ON(len >= 128) in free_node if something goes wrong. */
 | |
| +	for (i = 0; i < 128; ++i) {
 | |
| +		part = cpu_to_be64(~(1LLU << (i % 64)));
 | |
| +		memset(&ip, 0xff, 16);
 | |
| +		memcpy((u8 *)&ip + (i < 64) * 8, &part, 8);
 | |
| +		routing_table_insert_v6(&t, &ip, 128, a);
 | |
| +	}
 | |
| +
 | |
| +#ifdef DEBUG_RANDOM_TRIE
 | |
| +	if (success)
 | |
| +		success = randomized_test();
 | |
| +#endif
 | |
| +
 | |
| +	if (success)
 | |
| +		pr_info("routing table self-tests: pass\n");
 | |
| +
 | |
| +free:
 | |
| +	routing_table_free(&t);
 | |
| +	kfree(a);
 | |
| +	kfree(b);
 | |
| +	kfree(c);
 | |
| +	kfree(d);
 | |
| +	kfree(e);
 | |
| +	kfree(f);
 | |
| +	kfree(g);
 | |
| +	kfree(h);
 | |
| +
 | |
| +	return success;
 | |
| +}
 | |
| +#undef test_negative
 | |
| +#undef test
 | |
| +#undef remove
 | |
| +#undef insert
 | |
| +#undef init_peer
 | |
| +
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/crypto/blake2s.c	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,302 @@
 | |
| +/* Original author: Samuel Neves <sneves@dei.uc.pt>
 | |
| + *
 | |
| + * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
 | |
| + */
 | |
| +
 | |
| +#include "blake2s.h"
 | |
| +
 | |
| +#include <linux/types.h>
 | |
| +#include <linux/string.h>
 | |
| +#include <linux/kernel.h>
 | |
| +
 | |
| +typedef struct {
 | |
| +	u8 digest_length;
 | |
| +	u8 key_length;
 | |
| +	u8 fanout;
 | |
| +	u8 depth;
 | |
| +	u32 leaf_length;
 | |
| +	u8 node_offset[6];
 | |
| +	u8 node_depth;
 | |
| +	u8 inner_length;
 | |
| +	u8 salt[8];
 | |
| +	u8 personal[8];
 | |
| +} __packed blake2s_param;
 | |
| +
 | |
| +static const u32 blake2s_iv[8] = {
 | |
| +	0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
 | |
| +	0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
 | |
| +};
 | |
| +
 | |
| +static const u8 blake2s_sigma[10][16] = {
 | |
| +	{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
 | |
| +	{14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
 | |
| +	{11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
 | |
| +	{7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
 | |
| +	{9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
 | |
| +	{2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
 | |
| +	{12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
 | |
| +	{13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
 | |
| +	{6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
 | |
| +	{10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
 | |
| +};
 | |
| +
 | |
| +static inline u32 le32_to_cpuvp(const void *p)
 | |
| +{
 | |
| +	return le32_to_cpup(p);
 | |
| +}
 | |
| +
 | |
| +static inline void blake2s_set_lastblock(struct blake2s_state *state)
 | |
| +{
 | |
| +	if (state->last_node)
 | |
| +		state->f[1] = -1;
 | |
| +	state->f[0] = -1;
 | |
| +}
 | |
| +
 | |
| +static inline void blake2s_increment_counter(struct blake2s_state *state, const u32 inc)
 | |
| +{
 | |
| +	state->t[0] += inc;
 | |
| +	state->t[1] += (state->t[0] < inc);
 | |
| +}
 | |
| +
 | |
| +static inline void blake2s_init_param(struct blake2s_state *state, const blake2s_param *param)
 | |
| +{
 | |
| +	const u32 *p;
 | |
| +	int i;
 | |
| +	memset(state, 0, sizeof(struct blake2s_state));
 | |
| +	for (i = 0; i < 8; ++i)
 | |
| +		state->h[i] = blake2s_iv[i];
 | |
| +	p = (const u32 *)param;
 | |
| +	/* IV XOR ParamBlock */
 | |
| +	for (i = 0; i < 8; ++i)
 | |
| +		state->h[i] ^= le32_to_cpuvp(&p[i]);
 | |
| +}
 | |
| +
 | |
| +void blake2s_init(struct blake2s_state *state, const u8 outlen)
 | |
| +{
 | |
| +	blake2s_param param = {
 | |
| +		.digest_length = outlen,
 | |
| +		.fanout = 1,
 | |
| +		.depth = 1
 | |
| +	};
 | |
| +
 | |
| +#ifdef DEBUG
 | |
| +	BUG_ON(!outlen || outlen > BLAKE2S_OUTBYTES);
 | |
| +#endif
 | |
| +	blake2s_init_param(state, ¶m);
 | |
| +}
 | |
| +
 | |
| +void blake2s_init_key(struct blake2s_state *state, const u8 outlen, const void *key, const u8 keylen)
 | |
| +{
 | |
| +	blake2s_param param = {
 | |
| +		.digest_length = outlen,
 | |
| +		.key_length = keylen,
 | |
| +		.fanout = 1,
 | |
| +		.depth = 1
 | |
| +	};
 | |
| +	u8 block[BLAKE2S_BLOCKBYTES] = { 0 };
 | |
| +
 | |
| +#ifdef DEBUG
 | |
| +	BUG_ON(!outlen || outlen > BLAKE2S_OUTBYTES || !key || !keylen || keylen > BLAKE2S_KEYBYTES);
 | |
| +#endif
 | |
| +	blake2s_init_param(state, ¶m);
 | |
| +	memcpy(block, key, keylen);
 | |
| +	blake2s_update(state, block, BLAKE2S_BLOCKBYTES);
 | |
| +	memzero_explicit(block, BLAKE2S_BLOCKBYTES);
 | |
| +}
 | |
| +
 | |
| +#ifdef CONFIG_X86_64
 | |
| +#include <asm/cpufeature.h>
 | |
| +#include <asm/processor.h>
 | |
| +#include <asm/fpu/api.h>
 | |
| +#include <asm/simd.h>
 | |
| +static bool blake2s_use_avx __read_mostly = false;
 | |
| +void __init blake2s_fpu_init(void)
 | |
| +{
 | |
| +	blake2s_use_avx = boot_cpu_has(X86_FEATURE_AVX);
 | |
| +}
 | |
| +asmlinkage void blake2s_compress_avx(struct blake2s_state *state, const u8 block[BLAKE2S_BLOCKBYTES]);
 | |
| +#else
 | |
| +void __init blake2s_fpu_init(void) { }
 | |
| +#endif
 | |
| +
 | |
| +static inline void blake2s_compress(struct blake2s_state *state, const u8 block[BLAKE2S_BLOCKBYTES])
 | |
| +{
 | |
| +	u32 m[16];
 | |
| +	u32 v[16];
 | |
| +	int i;
 | |
| +
 | |
| +#ifdef CONFIG_X86_64
 | |
| +	if (blake2s_use_avx && irq_fpu_usable()) {
 | |
| +		kernel_fpu_begin();
 | |
| +		blake2s_compress_avx(state, block);
 | |
| +		kernel_fpu_end();
 | |
| +		return;
 | |
| +	}
 | |
| +#endif
 | |
| +
 | |
| +	for (i = 0; i < 16; ++i)
 | |
| +		m[i] = le32_to_cpuvp(block + i * sizeof(m[i]));
 | |
| +
 | |
| +	for (i = 0; i < 8; ++i)
 | |
| +		v[i] = state->h[i];
 | |
| +
 | |
| +	v[8] = blake2s_iv[0];
 | |
| +	v[9] = blake2s_iv[1];
 | |
| +	v[10] = blake2s_iv[2];
 | |
| +	v[11] = blake2s_iv[3];
 | |
| +	v[12] = state->t[0] ^ blake2s_iv[4];
 | |
| +	v[13] = state->t[1] ^ blake2s_iv[5];
 | |
| +	v[14] = state->f[0] ^ blake2s_iv[6];
 | |
| +	v[15] = state->f[1] ^ blake2s_iv[7];
 | |
| +#define G(r,i,a,b,c,d) \
 | |
| +	do { \
 | |
| +		a += b + m[blake2s_sigma[r][2 * i + 0]]; \
 | |
| +		d = ror32(d ^ a, 16); \
 | |
| +		c += d; \
 | |
| +		b = ror32(b ^ c, 12); \
 | |
| +		a += b + m[blake2s_sigma[r][2 * i + 1]]; \
 | |
| +		d = ror32(d ^ a, 8); \
 | |
| +		c += d; \
 | |
| +		b = ror32(b ^ c, 7); \
 | |
| +	} while(0)
 | |
| +#define ROUND(r)  \
 | |
| +	do { \
 | |
| +	G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
 | |
| +	G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
 | |
| +	G(r,2,v[ 2],v[ 6],v[10],v[14]); \
 | |
| +	G(r,3,v[ 3],v[ 7],v[11],v[15]); \
 | |
| +	G(r,4,v[ 0],v[ 5],v[10],v[15]); \
 | |
| +	G(r,5,v[ 1],v[ 6],v[11],v[12]); \
 | |
| +	G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
 | |
| +	G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
 | |
| +} while(0)
 | |
| +	ROUND(0);
 | |
| +	ROUND(1);
 | |
| +	ROUND(2);
 | |
| +	ROUND(3);
 | |
| +	ROUND(4);
 | |
| +	ROUND(5);
 | |
| +	ROUND(6);
 | |
| +	ROUND(7);
 | |
| +	ROUND(8);
 | |
| +	ROUND(9);
 | |
| +
 | |
| +	for (i = 0; i < 8; ++i)
 | |
| +		state->h[i] = state->h[i] ^ v[i] ^ v[i + 8];
 | |
| +#undef G
 | |
| +#undef ROUND
 | |
| +}
 | |
| +
 | |
| +void blake2s_update(struct blake2s_state *state, const u8 *in, u64 inlen)
 | |
| +{
 | |
| +	size_t left, fill;
 | |
| +	while (inlen > 0) {
 | |
| +		left = state->buflen;
 | |
| +		fill = 2 * BLAKE2S_BLOCKBYTES - left;
 | |
| +
 | |
| +		if (inlen > fill) {
 | |
| +			memcpy(state->buf + left, in, fill); // Fill buffer
 | |
| +			state->buflen += fill;
 | |
| +			blake2s_increment_counter(state, BLAKE2S_BLOCKBYTES);
 | |
| +			blake2s_compress(state, state->buf); // Compress
 | |
| +			memcpy(state->buf, state->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES);// Shift buffer left
 | |
| +			state->buflen -= BLAKE2S_BLOCKBYTES;
 | |
| +			in += fill;
 | |
| +			inlen -= fill;
 | |
| +		} else { // inlen <= fill
 | |
| +			memcpy(state->buf + left, in, inlen);
 | |
| +			state->buflen += inlen; // Be lazy, do not compress
 | |
| +			in += inlen;
 | |
| +			inlen -= inlen;
 | |
| +		}
 | |
| +	}
 | |
| +}
 | |
| +
 | |
| +void blake2s_final(struct blake2s_state *state, u8 *out, u8 outlen)
 | |
| +{
 | |
| +	u8 buffer[BLAKE2S_OUTBYTES] = { 0 };
 | |
| +	int i;
 | |
| +
 | |
| +#ifdef DEBUG
 | |
| +	BUG_ON(!out || !outlen || outlen > BLAKE2S_OUTBYTES);
 | |
| +#endif
 | |
| +
 | |
| +	if (state->buflen > BLAKE2S_BLOCKBYTES) {
 | |
| +		blake2s_increment_counter(state, BLAKE2S_BLOCKBYTES);
 | |
| +		blake2s_compress(state, state->buf);
 | |
| +		state->buflen -= BLAKE2S_BLOCKBYTES;
 | |
| +		memcpy(state->buf, state->buf + BLAKE2S_BLOCKBYTES, state->buflen);
 | |
| +	}
 | |
| +
 | |
| +	blake2s_increment_counter(state, (u32) state->buflen);
 | |
| +	blake2s_set_lastblock(state);
 | |
| +	memset(state->buf + state->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - state->buflen); /* Padding */
 | |
| +	blake2s_compress(state, state->buf);
 | |
| +
 | |
| +	for (i = 0; i < 8; ++i) /* output full hash to temp buffer */
 | |
| +		*(__le32 *)(buffer + sizeof(state->h[i]) * i) = cpu_to_le32(state->h[i]);
 | |
| +
 | |
| +	memcpy(out, buffer, outlen);
 | |
| +
 | |
| +	/* Burn state from stack */
 | |
| +	memzero_explicit(buffer, BLAKE2S_OUTBYTES);
 | |
| +	memzero_explicit(state, sizeof(struct blake2s_state));
 | |
| +}
 | |
| +
 | |
| +void blake2s(u8 *out, const u8 *in, const u8 *key, const u8 outlen, u64 inlen, const u8 keylen)
 | |
| +{
 | |
| +	struct blake2s_state state;
 | |
| +
 | |
| +#ifdef DEBUG
 | |
| +	BUG_ON((!in && inlen > 0) || !out || !outlen || outlen > BLAKE2S_OUTBYTES || keylen > BLAKE2S_KEYBYTES);
 | |
| +#endif
 | |
| +
 | |
| +	if (keylen > 0 && key)
 | |
| +		blake2s_init_key(&state, outlen, key, keylen);
 | |
| +	else
 | |
| +		blake2s_init(&state, outlen);
 | |
| +
 | |
| +	blake2s_update(&state, in, inlen);
 | |
| +	blake2s_final(&state, out, outlen);
 | |
| +}
 | |
| +
 | |
| +void blake2s_hmac(u8 *out, const u8 *in, const u8 *key, const u8 outlen, const u64 inlen, const u64 keylen)
 | |
| +{
 | |
| +	struct blake2s_state state;
 | |
| +	u8 o_key[BLAKE2S_BLOCKBYTES] = { 0 };
 | |
| +	u8 i_key[BLAKE2S_BLOCKBYTES] = { 0 };
 | |
| +	u8 i_hash[BLAKE2S_OUTBYTES];
 | |
| +	int i;
 | |
| +
 | |
| +	if (keylen > BLAKE2S_BLOCKBYTES) {
 | |
| +		blake2s_init(&state, BLAKE2S_OUTBYTES);
 | |
| +		blake2s_update(&state, key, keylen);
 | |
| +		blake2s_final(&state, o_key, BLAKE2S_OUTBYTES);
 | |
| +		memcpy(i_key, o_key, BLAKE2S_OUTBYTES);
 | |
| +	} else {
 | |
| +		memcpy(o_key, key, keylen);
 | |
| +		memcpy(i_key, key, keylen);
 | |
| +	}
 | |
| +
 | |
| +	for (i = 0; i < BLAKE2S_BLOCKBYTES; ++i) {
 | |
| +		o_key[i] ^= 0x5c;
 | |
| +		i_key[i] ^= 0x36;
 | |
| +	}
 | |
| +
 | |
| +	blake2s_init(&state, BLAKE2S_OUTBYTES);
 | |
| +	blake2s_update(&state, i_key, BLAKE2S_BLOCKBYTES);
 | |
| +	blake2s_update(&state, in, inlen);
 | |
| +	blake2s_final(&state, i_hash, BLAKE2S_OUTBYTES);
 | |
| +
 | |
| +	blake2s_init(&state, BLAKE2S_OUTBYTES);
 | |
| +	blake2s_update(&state, o_key, BLAKE2S_BLOCKBYTES);
 | |
| +	blake2s_update(&state, i_hash, BLAKE2S_OUTBYTES);
 | |
| +	blake2s_final(&state, i_hash, BLAKE2S_OUTBYTES);
 | |
| +
 | |
| +	memcpy(out, i_hash, outlen);
 | |
| +	memzero_explicit(o_key, BLAKE2S_BLOCKBYTES);
 | |
| +	memzero_explicit(i_key, BLAKE2S_BLOCKBYTES);
 | |
| +	memzero_explicit(i_hash, BLAKE2S_OUTBYTES);
 | |
| +}
 | |
| +
 | |
| +#include "../selftest/blake2s.h"
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/crypto/chacha20poly1305.c	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,886 @@
 | |
| +/*
 | |
| + * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
 | |
| + * Copyright 2015 Martin Willi.
 | |
| + */
 | |
| +
 | |
| +#include "chacha20poly1305.h"
 | |
| +
 | |
| +#include <linux/kernel.h>
 | |
| +#include <linux/string.h>
 | |
| +#include <linux/version.h>
 | |
| +#include <crypto/algapi.h>
 | |
| +#include <crypto/scatterwalk.h>
 | |
| +#include <asm/unaligned.h>
 | |
| +
 | |
| +#if defined(CONFIG_X86_64)
 | |
| +#include <asm/cpufeature.h>
 | |
| +#include <asm/processor.h>
 | |
| +#ifdef CONFIG_AS_SSSE3
 | |
| +asmlinkage void hchacha20_asm_ssse3(u8 *derived_key, const u8 *nonce, const u8 *key);
 | |
| +asmlinkage void chacha20_asm_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
 | |
| +asmlinkage void chacha20_asm_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src);
 | |
| +#endif
 | |
| +#ifdef CONFIG_AS_AVX2
 | |
| +asmlinkage void chacha20_asm_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src);
 | |
| +#endif
 | |
| +asmlinkage void poly1305_asm_block_sse2(u32 *h, const u8 *src, const u32 *r, unsigned int blocks);
 | |
| +asmlinkage void poly1305_asm_2block_sse2(u32 *h, const u8 *src, const u32 *r, unsigned int blocks, const u32 *u);
 | |
| +#ifdef CONFIG_AS_AVX2
 | |
| +asmlinkage void poly1305_asm_4block_avx2(u32 *h, const u8 *src, const u32 *r, unsigned int blocks, const u32 *u);
 | |
| +#endif
 | |
| +static bool chacha20poly1305_use_avx2 __read_mostly = false;
 | |
| +static bool chacha20poly1305_use_ssse3 __read_mostly = false;
 | |
| +static bool chacha20poly1305_use_sse2 __read_mostly = false;
 | |
| +void chacha20poly1305_fpu_init(void)
 | |
| +{
 | |
| +	chacha20poly1305_use_sse2 = boot_cpu_has(X86_FEATURE_XMM2);
 | |
| +	chacha20poly1305_use_ssse3 = boot_cpu_has(X86_FEATURE_SSSE3);
 | |
| +	chacha20poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2);
 | |
| +}
 | |
| +#elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON)
 | |
| +#include <asm/hwcap.h>
 | |
| +#include <asm/neon.h>
 | |
| +asmlinkage void chacha20_asm_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
 | |
| +asmlinkage void chacha20_asm_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
 | |
| +static bool chacha20poly1305_use_neon __read_mostly = false;
 | |
| +void __init chacha20poly1305_fpu_init(void)
 | |
| +{
 | |
| +#if defined(CONFIG_ARM64)
 | |
| +	chacha20poly1305_use_neon = elf_hwcap & HWCAP_ASIMD;
 | |
| +#elif defined(CONFIG_ARM)
 | |
| +	chacha20poly1305_use_neon = elf_hwcap & HWCAP_NEON;
 | |
| +#endif
 | |
| +}
 | |
| +#else
 | |
| +void __init chacha20poly1305_fpu_init(void) { }
 | |
| +#endif
 | |
| +
 | |
| +#define CHACHA20_IV_SIZE	16
 | |
| +#define CHACHA20_KEY_SIZE	32
 | |
| +#define CHACHA20_BLOCK_SIZE	64
 | |
| +#define POLY1305_BLOCK_SIZE	16
 | |
| +#define POLY1305_KEY_SIZE	32
 | |
| +#define POLY1305_MAC_SIZE	16
 | |
| +
 | |
| +static inline u32 le32_to_cpuvp(const void *p)
 | |
| +{
 | |
| +	return le32_to_cpup(p);
 | |
| +}
 | |
| +
 | |
| +static inline u64 le64_to_cpuvp(const void *p)
 | |
| +{
 | |
| +	return le64_to_cpup(p);
 | |
| +}
 | |
| +
 | |
| +static inline u32 rotl32(u32 v, u8 n)
 | |
| +{
 | |
| +	return (v << n) | (v >> (sizeof(v) * 8 - n));
 | |
| +}
 | |
| +
 | |
| +static inline u64 mlt(u64 a, u64 b)
 | |
| +{
 | |
| +	return a * b;
 | |
| +}
 | |
| +
 | |
| +static inline u32 sr(u64 v, u_char n)
 | |
| +{
 | |
| +	return v >> n;
 | |
| +}
 | |
| +
 | |
| +static inline u32 and(u32 v, u32 mask)
 | |
| +{
 | |
| +	return v & mask;
 | |
| +}
 | |
| +
 | |
| +struct chacha20_ctx {
 | |
| +	u32 state[CHACHA20_BLOCK_SIZE / sizeof(u32)];
 | |
| +} __aligned(32);
 | |
| +
 | |
| +static void chacha20_generic_block(struct chacha20_ctx *ctx, void *stream)
 | |
| +{
 | |
| +	u32 x[CHACHA20_BLOCK_SIZE / sizeof(u32)];
 | |
| +	__le32 *out = stream;
 | |
| +	int i;
 | |
| +
 | |
| +	for (i = 0; i < ARRAY_SIZE(x); i++)
 | |
| +		x[i] = ctx->state[i];
 | |
| +
 | |
| +	for (i = 0; i < 20; i += 2) {
 | |
| +		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],  16);
 | |
| +		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],  16);
 | |
| +		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],  16);
 | |
| +		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],  16);
 | |
| +
 | |
| +		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],  12);
 | |
| +		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],  12);
 | |
| +		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10], 12);
 | |
| +		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11], 12);
 | |
| +
 | |
| +		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],   8);
 | |
| +		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],   8);
 | |
| +		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],   8);
 | |
| +		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],   8);
 | |
| +
 | |
| +		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],   7);
 | |
| +		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],   7);
 | |
| +		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10],  7);
 | |
| +		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11],  7);
 | |
| +
 | |
| +		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],  16);
 | |
| +		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],  16);
 | |
| +		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],  16);
 | |
| +		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],  16);
 | |
| +
 | |
| +		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10], 12);
 | |
| +		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11], 12);
 | |
| +		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],  12);
 | |
| +		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],  12);
 | |
| +
 | |
| +		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],   8);
 | |
| +		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],   8);
 | |
| +		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],   8);
 | |
| +		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],   8);
 | |
| +
 | |
| +		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10],  7);
 | |
| +		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11],  7);
 | |
| +		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],   7);
 | |
| +		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],   7);
 | |
| +	}
 | |
| +
 | |
| +	for (i = 0; i < ARRAY_SIZE(x); i++)
 | |
| +		out[i] = cpu_to_le32(x[i] + ctx->state[i]);
 | |
| +
 | |
| +	ctx->state[12]++;
 | |
| +}
 | |
| +
 | |
| +static const char constant[16] = "expand 32-byte k";
 | |
| +
 | |
| +static void hchacha20_generic(u8 derived_key[CHACHA20POLY1305_KEYLEN], const u8 nonce[16], const u8 key[CHACHA20POLY1305_KEYLEN])
 | |
| +{
 | |
| +	u32 x[CHACHA20_BLOCK_SIZE / sizeof(u32)];
 | |
| +	__le32 *out = (__force __le32 *)derived_key;
 | |
| +	int i;
 | |
| +
 | |
| +	x[0]  = le32_to_cpuvp(constant +  0);
 | |
| +	x[1]  = le32_to_cpuvp(constant +  4);
 | |
| +	x[2]  = le32_to_cpuvp(constant +  8);
 | |
| +	x[3]  = le32_to_cpuvp(constant + 12);
 | |
| +	x[4]  = le32_to_cpuvp(key + 0);
 | |
| +	x[5]  = le32_to_cpuvp(key + 4);
 | |
| +	x[6]  = le32_to_cpuvp(key + 8);
 | |
| +	x[7]  = le32_to_cpuvp(key + 12);
 | |
| +	x[8]  = le32_to_cpuvp(key + 16);
 | |
| +	x[9]  = le32_to_cpuvp(key + 20);
 | |
| +	x[10] = le32_to_cpuvp(key + 24);
 | |
| +	x[11] = le32_to_cpuvp(key + 28);
 | |
| +	x[12]  = le32_to_cpuvp(nonce +  0);
 | |
| +	x[13]  = le32_to_cpuvp(nonce +  4);
 | |
| +	x[14]  = le32_to_cpuvp(nonce +  8);
 | |
| +	x[15]  = le32_to_cpuvp(nonce + 12);
 | |
| +
 | |
| +	for (i = 0; i < 20; i += 2) {
 | |
| +		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],  16);
 | |
| +		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],  16);
 | |
| +		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],  16);
 | |
| +		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],  16);
 | |
| +
 | |
| +		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],  12);
 | |
| +		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],  12);
 | |
| +		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10], 12);
 | |
| +		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11], 12);
 | |
| +
 | |
| +		x[0]  += x[4];    x[12] = rotl32(x[12] ^ x[0],   8);
 | |
| +		x[1]  += x[5];    x[13] = rotl32(x[13] ^ x[1],   8);
 | |
| +		x[2]  += x[6];    x[14] = rotl32(x[14] ^ x[2],   8);
 | |
| +		x[3]  += x[7];    x[15] = rotl32(x[15] ^ x[3],   8);
 | |
| +
 | |
| +		x[8]  += x[12];   x[4]  = rotl32(x[4]  ^ x[8],   7);
 | |
| +		x[9]  += x[13];   x[5]  = rotl32(x[5]  ^ x[9],   7);
 | |
| +		x[10] += x[14];   x[6]  = rotl32(x[6]  ^ x[10],  7);
 | |
| +		x[11] += x[15];   x[7]  = rotl32(x[7]  ^ x[11],  7);
 | |
| +
 | |
| +		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],  16);
 | |
| +		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],  16);
 | |
| +		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],  16);
 | |
| +		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],  16);
 | |
| +
 | |
| +		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10], 12);
 | |
| +		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11], 12);
 | |
| +		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],  12);
 | |
| +		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],  12);
 | |
| +
 | |
| +		x[0]  += x[5];    x[15] = rotl32(x[15] ^ x[0],   8);
 | |
| +		x[1]  += x[6];    x[12] = rotl32(x[12] ^ x[1],   8);
 | |
| +		x[2]  += x[7];    x[13] = rotl32(x[13] ^ x[2],   8);
 | |
| +		x[3]  += x[4];    x[14] = rotl32(x[14] ^ x[3],   8);
 | |
| +
 | |
| +		x[10] += x[15];   x[5]  = rotl32(x[5]  ^ x[10],  7);
 | |
| +		x[11] += x[12];   x[6]  = rotl32(x[6]  ^ x[11],  7);
 | |
| +		x[8]  += x[13];   x[7]  = rotl32(x[7]  ^ x[8],   7);
 | |
| +		x[9]  += x[14];   x[4]  = rotl32(x[4]  ^ x[9],   7);
 | |
| +	}
 | |
| +
 | |
| +	out[0] = cpu_to_le32(x[0]);
 | |
| +	out[1] = cpu_to_le32(x[1]);
 | |
| +	out[2] = cpu_to_le32(x[2]);
 | |
| +	out[3] = cpu_to_le32(x[3]);
 | |
| +	out[4] = cpu_to_le32(x[12]);
 | |
| +	out[5] = cpu_to_le32(x[13]);
 | |
| +	out[6] = cpu_to_le32(x[14]);
 | |
| +	out[7] = cpu_to_le32(x[15]);
 | |
| +}
 | |
| +
 | |
| +static inline void hchacha20(u8 derived_key[CHACHA20POLY1305_KEYLEN], const u8 nonce[16], const u8 key[CHACHA20POLY1305_KEYLEN], bool have_simd)
 | |
| +{
 | |
| +	if (!have_simd)
 | |
| +		goto no_simd;
 | |
| +
 | |
| +#if defined(CONFIG_X86_64) && defined(CONFIG_AS_SSSE3)
 | |
| +	if (chacha20poly1305_use_ssse3) {
 | |
| +		hchacha20_asm_ssse3(derived_key, nonce, key);
 | |
| +		return;
 | |
| +	}
 | |
| +#endif
 | |
| +
 | |
| +no_simd:
 | |
| +	hchacha20_generic(derived_key, nonce, key);
 | |
| +}
 | |
| +
 | |
| +static void chacha20_keysetup(struct chacha20_ctx *ctx, const u8 key[CHACHA20_KEY_SIZE], const u8 nonce[sizeof(u64)])
 | |
| +{
 | |
| +	ctx->state[0]  = le32_to_cpuvp(constant +  0);
 | |
| +	ctx->state[1]  = le32_to_cpuvp(constant +  4);
 | |
| +	ctx->state[2]  = le32_to_cpuvp(constant +  8);
 | |
| +	ctx->state[3]  = le32_to_cpuvp(constant + 12);
 | |
| +	ctx->state[4]  = le32_to_cpuvp(key + 0);
 | |
| +	ctx->state[5]  = le32_to_cpuvp(key + 4);
 | |
| +	ctx->state[6]  = le32_to_cpuvp(key + 8);
 | |
| +	ctx->state[7]  = le32_to_cpuvp(key + 12);
 | |
| +	ctx->state[8]  = le32_to_cpuvp(key + 16);
 | |
| +	ctx->state[9]  = le32_to_cpuvp(key + 20);
 | |
| +	ctx->state[10] = le32_to_cpuvp(key + 24);
 | |
| +	ctx->state[11] = le32_to_cpuvp(key + 28);
 | |
| +	ctx->state[12] = 0;
 | |
| +	ctx->state[13] = 0;
 | |
| +	ctx->state[14] = le32_to_cpuvp(nonce + 0);
 | |
| +	ctx->state[15] = le32_to_cpuvp(nonce + 4);
 | |
| +}
 | |
| +
 | |
| +static void chacha20_crypt(struct chacha20_ctx *ctx, u8 *dst, const u8 *src, unsigned int bytes, bool have_simd)
 | |
| +{
 | |
| +	u8 buf[CHACHA20_BLOCK_SIZE];
 | |
| +
 | |
| +	if (!have_simd
 | |
| +#if defined(CONFIG_X86_64)
 | |
| +		|| !chacha20poly1305_use_ssse3
 | |
| +
 | |
| +#elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON)
 | |
| +		|| !chacha20poly1305_use_neon
 | |
| +#endif
 | |
| +	)
 | |
| +		goto no_simd;
 | |
| +
 | |
| +#if defined(CONFIG_X86_64)
 | |
| +#ifdef CONFIG_AS_AVX2
 | |
| +	if (chacha20poly1305_use_avx2) {
 | |
| +		while (bytes >= CHACHA20_BLOCK_SIZE * 8) {
 | |
| +			chacha20_asm_8block_xor_avx2(ctx->state, dst, src);
 | |
| +			bytes -= CHACHA20_BLOCK_SIZE * 8;
 | |
| +			src += CHACHA20_BLOCK_SIZE * 8;
 | |
| +			dst += CHACHA20_BLOCK_SIZE * 8;
 | |
| +			ctx->state[12] += 8;
 | |
| +		}
 | |
| +	}
 | |
| +#endif
 | |
| +#ifdef CONFIG_AS_SSSE3
 | |
| +	while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
 | |
| +		chacha20_asm_4block_xor_ssse3(ctx->state, dst, src);
 | |
| +		bytes -= CHACHA20_BLOCK_SIZE * 4;
 | |
| +		src += CHACHA20_BLOCK_SIZE * 4;
 | |
| +		dst += CHACHA20_BLOCK_SIZE * 4;
 | |
| +		ctx->state[12] += 4;
 | |
| +	}
 | |
| +	while (bytes >= CHACHA20_BLOCK_SIZE) {
 | |
| +		chacha20_asm_block_xor_ssse3(ctx->state, dst, src);
 | |
| +		bytes -= CHACHA20_BLOCK_SIZE;
 | |
| +		src += CHACHA20_BLOCK_SIZE;
 | |
| +		dst += CHACHA20_BLOCK_SIZE;
 | |
| +		ctx->state[12]++;
 | |
| +	}
 | |
| +	if (bytes) {
 | |
| +		memcpy(buf, src, bytes);
 | |
| +		chacha20_asm_block_xor_ssse3(ctx->state, buf, buf);
 | |
| +		memcpy(dst, buf, bytes);
 | |
| +	}
 | |
| +	return;
 | |
| +#endif
 | |
| +#elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON)
 | |
| +	while (bytes >= CHACHA20_BLOCK_SIZE * 4) {
 | |
| +		chacha20_asm_4block_xor_neon(ctx->state, dst, src);
 | |
| +		bytes -= CHACHA20_BLOCK_SIZE * 4;
 | |
| +		src += CHACHA20_BLOCK_SIZE * 4;
 | |
| +		dst += CHACHA20_BLOCK_SIZE * 4;
 | |
| +		ctx->state[12] += 4;
 | |
| +	}
 | |
| +	while (bytes >= CHACHA20_BLOCK_SIZE) {
 | |
| +		chacha20_asm_block_xor_neon(ctx->state, dst, src);
 | |
| +		bytes -= CHACHA20_BLOCK_SIZE;
 | |
| +		src += CHACHA20_BLOCK_SIZE;
 | |
| +		dst += CHACHA20_BLOCK_SIZE;
 | |
| +		ctx->state[12]++;
 | |
| +	}
 | |
| +	if (bytes) {
 | |
| +		memcpy(buf, src, bytes);
 | |
| +		chacha20_asm_block_xor_neon(ctx->state, buf, buf);
 | |
| +		memcpy(dst, buf, bytes);
 | |
| +	}
 | |
| +	return;
 | |
| +#endif
 | |
| +
 | |
| +no_simd:
 | |
| +	if (dst != src)
 | |
| +		memcpy(dst, src, bytes);
 | |
| +
 | |
| +	while (bytes >= CHACHA20_BLOCK_SIZE) {
 | |
| +		chacha20_generic_block(ctx, buf);
 | |
| +		crypto_xor(dst, buf, CHACHA20_BLOCK_SIZE);
 | |
| +		bytes -= CHACHA20_BLOCK_SIZE;
 | |
| +		dst += CHACHA20_BLOCK_SIZE;
 | |
| +	}
 | |
| +	if (bytes) {
 | |
| +		chacha20_generic_block(ctx, buf);
 | |
| +		crypto_xor(dst, buf, bytes);
 | |
| +	}
 | |
| +}
 | |
| +
 | |
| +struct poly1305_ctx {
 | |
| +	/* key */
 | |
| +	u32 r[5];
 | |
| +	/* finalize key */
 | |
| +	u32 s[4];
 | |
| +	/* accumulator */
 | |
| +	u32 h[5];
 | |
| +	/* partial buffer */
 | |
| +	u8 buf[POLY1305_BLOCK_SIZE];
 | |
| +	/* bytes used in partial buffer */
 | |
| +	unsigned int buflen;
 | |
| +	/* derived key u set? */
 | |
| +	bool uset;
 | |
| +	/* derived keys r^3, r^4 set? */
 | |
| +	bool wset;
 | |
| +	/* derived Poly1305 key r^2 */
 | |
| +	u32 u[5];
 | |
| +	/* derived Poly1305 key r^3 */
 | |
| +	u32 r3[5];
 | |
| +	/* derived Poly1305 key r^4 */
 | |
| +	u32 r4[5];
 | |
| +};
 | |
| +
 | |
| +static void poly1305_init(struct poly1305_ctx *ctx, const u8 key[POLY1305_KEY_SIZE])
 | |
| +{
 | |
| +	memset(ctx, 0, sizeof(struct poly1305_ctx));
 | |
| +	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
 | |
| +	ctx->r[0] = (le32_to_cpuvp(key +  0) >> 0) & 0x3ffffff;
 | |
| +	ctx->r[1] = (get_unaligned_le32(key +  3) >> 2) & 0x3ffff03;
 | |
| +	ctx->r[2] = (get_unaligned_le32(key +  6) >> 4) & 0x3ffc0ff;
 | |
| +	ctx->r[3] = (get_unaligned_le32(key +  9) >> 6) & 0x3f03fff;
 | |
| +	ctx->r[4] = (le32_to_cpuvp(key + 12) >> 8) & 0x00fffff;
 | |
| +	ctx->s[0] = le32_to_cpuvp(key +  16);
 | |
| +	ctx->s[1] = le32_to_cpuvp(key +  20);
 | |
| +	ctx->s[2] = le32_to_cpuvp(key +  24);
 | |
| +	ctx->s[3] = le32_to_cpuvp(key +  28);
 | |
| +}
 | |
| +
 | |
| +static unsigned int poly1305_generic_blocks(struct poly1305_ctx *ctx, const u8 *src, unsigned int srclen, u32 hibit)
 | |
| +{
 | |
| +	u32 r0, r1, r2, r3, r4;
 | |
| +	u32 s1, s2, s3, s4;
 | |
| +	u32 h0, h1, h2, h3, h4;
 | |
| +	u64 d0, d1, d2, d3, d4;
 | |
| +
 | |
| +	r0 = ctx->r[0];
 | |
| +	r1 = ctx->r[1];
 | |
| +	r2 = ctx->r[2];
 | |
| +	r3 = ctx->r[3];
 | |
| +	r4 = ctx->r[4];
 | |
| +
 | |
| +	s1 = r1 * 5;
 | |
| +	s2 = r2 * 5;
 | |
| +	s3 = r3 * 5;
 | |
| +	s4 = r4 * 5;
 | |
| +
 | |
| +	h0 = ctx->h[0];
 | |
| +	h1 = ctx->h[1];
 | |
| +	h2 = ctx->h[2];
 | |
| +	h3 = ctx->h[3];
 | |
| +	h4 = ctx->h[4];
 | |
| +
 | |
| +	while (likely(srclen >= POLY1305_BLOCK_SIZE)) {
 | |
| +		/* h += m[i] */
 | |
| +		h0 += (le32_to_cpuvp(src +  0) >> 0) & 0x3ffffff;
 | |
| +		h1 += (get_unaligned_le32(src +  3) >> 2) & 0x3ffffff;
 | |
| +		h2 += (get_unaligned_le32(src +  6) >> 4) & 0x3ffffff;
 | |
| +		h3 += (get_unaligned_le32(src +  9) >> 6) & 0x3ffffff;
 | |
| +		h4 += (le32_to_cpuvp(src + 12) >> 8) | hibit;
 | |
| +
 | |
| +		/* h *= r */
 | |
| +		d0 = mlt(h0, r0) + mlt(h1, s4) + mlt(h2, s3) + mlt(h3, s2) + mlt(h4, s1);
 | |
| +		d1 = mlt(h0, r1) + mlt(h1, r0) + mlt(h2, s4) + mlt(h3, s3) + mlt(h4, s2);
 | |
| +		d2 = mlt(h0, r2) + mlt(h1, r1) + mlt(h2, r0) + mlt(h3, s4) + mlt(h4, s3);
 | |
| +		d3 = mlt(h0, r3) + mlt(h1, r2) + mlt(h2, r1) + mlt(h3, r0) + mlt(h4, s4);
 | |
| +		d4 = mlt(h0, r4) + mlt(h1, r3) + mlt(h2, r2) + mlt(h3, r1) + mlt(h4, r0);
 | |
| +
 | |
| +		/* (partial) h %= p */
 | |
| +		d1 += sr(d0, 26);     h0 = and(d0, 0x3ffffff);
 | |
| +		d2 += sr(d1, 26);     h1 = and(d1, 0x3ffffff);
 | |
| +		d3 += sr(d2, 26);     h2 = and(d2, 0x3ffffff);
 | |
| +		d4 += sr(d3, 26);     h3 = and(d3, 0x3ffffff);
 | |
| +		h0 += sr(d4, 26) * 5; h4 = and(d4, 0x3ffffff);
 | |
| +		h1 += h0 >> 26;       h0 = h0 & 0x3ffffff;
 | |
| +
 | |
| +		src += POLY1305_BLOCK_SIZE;
 | |
| +		srclen -= POLY1305_BLOCK_SIZE;
 | |
| +	}
 | |
| +
 | |
| +	ctx->h[0] = h0;
 | |
| +	ctx->h[1] = h1;
 | |
| +	ctx->h[2] = h2;
 | |
| +	ctx->h[3] = h3;
 | |
| +	ctx->h[4] = h4;
 | |
| +
 | |
| +	return srclen;
 | |
| +}
 | |
| +
 | |
| +#ifdef CONFIG_X86_64
 | |
| +static void poly1305_simd_mult(u32 *a, const u32 *b)
 | |
| +{
 | |
| +	u8 m[POLY1305_BLOCK_SIZE];
 | |
| +
 | |
| +	memset(m, 0, sizeof(m));
 | |
| +	/* The poly1305 block function adds a hi-bit to the accumulator which
 | |
| +	 * we don't need for key multiplication; compensate for it. */
 | |
| +	a[4] -= 1 << 24;
 | |
| +	poly1305_asm_block_sse2(a, m, b, 1);
 | |
| +}
 | |
| +
 | |
| +static unsigned int poly1305_simd_blocks(struct poly1305_ctx *ctx, const u8 *src, unsigned int srclen)
 | |
| +{
 | |
| +	unsigned int blocks;
 | |
| +
 | |
| +#ifdef CONFIG_AS_AVX2
 | |
| +	if (chacha20poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) {
 | |
| +		if (unlikely(!ctx->wset)) {
 | |
| +			if (!ctx->uset) {
 | |
| +				memcpy(ctx->u, ctx->r, sizeof(ctx->u));
 | |
| +				poly1305_simd_mult(ctx->u, ctx->r);
 | |
| +				ctx->uset = true;
 | |
| +			}
 | |
| +			memcpy(ctx->r3, ctx->u, sizeof(ctx->u));
 | |
| +			poly1305_simd_mult(ctx->r3, ctx->r);
 | |
| +			memcpy(ctx->r4, ctx->r3, sizeof(ctx->u));
 | |
| +			poly1305_simd_mult(ctx->r4, ctx->r);
 | |
| +			ctx->wset = true;
 | |
| +		}
 | |
| +		blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
 | |
| +		poly1305_asm_4block_avx2(ctx->h, src, ctx->r, blocks, ctx->u);
 | |
| +		src += POLY1305_BLOCK_SIZE * 4 * blocks;
 | |
| +		srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
 | |
| +	}
 | |
| +#endif
 | |
| +	if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
 | |
| +		if (unlikely(!ctx->uset)) {
 | |
| +			memcpy(ctx->u, ctx->r, sizeof(ctx->u));
 | |
| +			poly1305_simd_mult(ctx->u, ctx->r);
 | |
| +			ctx->uset = true;
 | |
| +		}
 | |
| +		blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
 | |
| +		poly1305_asm_2block_sse2(ctx->h, src, ctx->r, blocks, ctx->u);
 | |
| +		src += POLY1305_BLOCK_SIZE * 2 * blocks;
 | |
| +		srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
 | |
| +	}
 | |
| +	if (srclen >= POLY1305_BLOCK_SIZE) {
 | |
| +		poly1305_asm_block_sse2(ctx->h, src, ctx->r, 1);
 | |
| +		srclen -= POLY1305_BLOCK_SIZE;
 | |
| +	}
 | |
| +	return srclen;
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
| +static void poly1305_update(struct poly1305_ctx *ctx, const u8 *src, unsigned int srclen, bool have_simd)
 | |
| +{
 | |
| +	unsigned int bytes;
 | |
| +
 | |
| +	if (unlikely(ctx->buflen)) {
 | |
| +		bytes = min(srclen, POLY1305_BLOCK_SIZE - ctx->buflen);
 | |
| +		memcpy(ctx->buf + ctx->buflen, src, bytes);
 | |
| +		src += bytes;
 | |
| +		srclen -= bytes;
 | |
| +		ctx->buflen += bytes;
 | |
| +
 | |
| +		if (ctx->buflen == POLY1305_BLOCK_SIZE) {
 | |
| +#ifdef CONFIG_X86_64
 | |
| +			if (have_simd && chacha20poly1305_use_sse2)
 | |
| +				poly1305_simd_blocks(ctx, ctx->buf, POLY1305_BLOCK_SIZE);
 | |
| +			else
 | |
| +#endif
 | |
| +				poly1305_generic_blocks(ctx, ctx->buf, POLY1305_BLOCK_SIZE, 1 << 24);
 | |
| +			ctx->buflen = 0;
 | |
| +		}
 | |
| +	}
 | |
| +
 | |
| +	if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
 | |
| +#ifdef CONFIG_X86_64
 | |
| +		if (have_simd && chacha20poly1305_use_sse2)
 | |
| +			bytes = poly1305_simd_blocks(ctx, src, srclen);
 | |
| +		else
 | |
| +#endif
 | |
| +			bytes = poly1305_generic_blocks(ctx, src, srclen, 1 << 24);
 | |
| +		src += srclen - bytes;
 | |
| +		srclen = bytes;
 | |
| +	}
 | |
| +
 | |
| +	if (unlikely(srclen)) {
 | |
| +		ctx->buflen = srclen;
 | |
| +		memcpy(ctx->buf, src, srclen);
 | |
| +	}
 | |
| +}
 | |
| +
 | |
| +static void poly1305_finish(struct poly1305_ctx *ctx, u8 *dst)
 | |
| +{
 | |
| +	__le32 *mac = (__le32 *)dst;
 | |
| +	u32 h0, h1, h2, h3, h4;
 | |
| +	u32 g0, g1, g2, g3, g4;
 | |
| +	u32 mask;
 | |
| +	u64 f = 0;
 | |
| +
 | |
| +	if (unlikely(ctx->buflen)) {
 | |
| +		ctx->buf[ctx->buflen++] = 1;
 | |
| +		memset(ctx->buf + ctx->buflen, 0, POLY1305_BLOCK_SIZE - ctx->buflen);
 | |
| +		poly1305_generic_blocks(ctx, ctx->buf, POLY1305_BLOCK_SIZE, 0);
 | |
| +	}
 | |
| +
 | |
| +	/* fully carry h */
 | |
| +	h0 = ctx->h[0];
 | |
| +	h1 = ctx->h[1];
 | |
| +	h2 = ctx->h[2];
 | |
| +	h3 = ctx->h[3];
 | |
| +	h4 = ctx->h[4];
 | |
| +
 | |
| +	h2 += (h1 >> 26);     h1 = h1 & 0x3ffffff;
 | |
| +	h3 += (h2 >> 26);     h2 = h2 & 0x3ffffff;
 | |
| +	h4 += (h3 >> 26);     h3 = h3 & 0x3ffffff;
 | |
| +	h0 += (h4 >> 26) * 5; h4 = h4 & 0x3ffffff;
 | |
| +	h1 += (h0 >> 26);     h0 = h0 & 0x3ffffff;
 | |
| +
 | |
| +	/* compute h + -p */
 | |
| +	g0 = h0 + 5;
 | |
| +	g1 = h1 + (g0 >> 26);             g0 &= 0x3ffffff;
 | |
| +	g2 = h2 + (g1 >> 26);             g1 &= 0x3ffffff;
 | |
| +	g3 = h3 + (g2 >> 26);             g2 &= 0x3ffffff;
 | |
| +	g4 = h4 + (g3 >> 26) - (1 << 26); g3 &= 0x3ffffff;
 | |
| +
 | |
| +	/* select h if h < p, or h + -p if h >= p */
 | |
| +	mask = (g4 >> ((sizeof(u32) * 8) - 1)) - 1;
 | |
| +	g0 &= mask;
 | |
| +	g1 &= mask;
 | |
| +	g2 &= mask;
 | |
| +	g3 &= mask;
 | |
| +	g4 &= mask;
 | |
| +	mask = ~mask;
 | |
| +	h0 = (h0 & mask) | g0;
 | |
| +	h1 = (h1 & mask) | g1;
 | |
| +	h2 = (h2 & mask) | g2;
 | |
| +	h3 = (h3 & mask) | g3;
 | |
| +	h4 = (h4 & mask) | g4;
 | |
| +
 | |
| +	/* h = h % (2^128) */
 | |
| +	h0 = (h0 >>  0) | (h1 << 26);
 | |
| +	h1 = (h1 >>  6) | (h2 << 20);
 | |
| +	h2 = (h2 >> 12) | (h3 << 14);
 | |
| +	h3 = (h3 >> 18) | (h4 <<  8);
 | |
| +
 | |
| +	/* mac = (h + s) % (2^128) */
 | |
| +	f = (f >> 32) + h0 + ctx->s[0]; mac[0] = cpu_to_le32(f);
 | |
| +	f = (f >> 32) + h1 + ctx->s[1]; mac[1] = cpu_to_le32(f);
 | |
| +	f = (f >> 32) + h2 + ctx->s[2]; mac[2] = cpu_to_le32(f);
 | |
| +	f = (f >> 32) + h3 + ctx->s[3]; mac[3] = cpu_to_le32(f);
 | |
| +}
 | |
| +
 | |
| +static const u8 pad0[16] = { 0 };
 | |
| +
 | |
| +static struct crypto_alg chacha20_alg = {
 | |
| +	.cra_blocksize = 1,
 | |
| +	.cra_alignmask = sizeof(u32) - 1
 | |
| +};
 | |
| +static struct crypto_blkcipher chacha20_cipher = {
 | |
| +	.base = {
 | |
| +		.__crt_alg = &chacha20_alg
 | |
| +	}
 | |
| +};
 | |
| +static struct blkcipher_desc chacha20_desc = {
 | |
| +	.tfm = &chacha20_cipher
 | |
| +};
 | |
| +
 | |
| +static inline void __chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
 | |
| +					      const u8 *ad, const size_t ad_len,
 | |
| +					      const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN],
 | |
| +					      bool have_simd)
 | |
| +{
 | |
| +	struct poly1305_ctx poly1305_state;
 | |
| +	struct chacha20_ctx chacha20_state;
 | |
| +	u8 block0[CHACHA20_BLOCK_SIZE] = { 0 };
 | |
| +	__le64 len;
 | |
| +	__le64 le_nonce = cpu_to_le64(nonce);
 | |
| +
 | |
| +	chacha20_keysetup(&chacha20_state, key, (u8 *)&le_nonce);
 | |
| +
 | |
| +	chacha20_crypt(&chacha20_state, block0, block0, sizeof(block0), have_simd);
 | |
| +	poly1305_init(&poly1305_state, block0);
 | |
| +	memzero_explicit(block0, sizeof(block0));
 | |
| +
 | |
| +	poly1305_update(&poly1305_state, ad, ad_len, have_simd);
 | |
| +	poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd);
 | |
| +
 | |
| +	chacha20_crypt(&chacha20_state, dst, src, src_len, have_simd);
 | |
| +
 | |
| +	poly1305_update(&poly1305_state, dst, src_len, have_simd);
 | |
| +	poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, have_simd);
 | |
| +
 | |
| +	len = cpu_to_le64(ad_len);
 | |
| +	poly1305_update(&poly1305_state, (u8 *)&len, sizeof(len), have_simd);
 | |
| +
 | |
| +	len = cpu_to_le64(src_len);
 | |
| +	poly1305_update(&poly1305_state, (u8 *)&len, sizeof(len), have_simd);
 | |
| +
 | |
| +	poly1305_finish(&poly1305_state, dst + src_len);
 | |
| +
 | |
| +	memzero_explicit(&poly1305_state, sizeof(poly1305_state));
 | |
| +	memzero_explicit(&chacha20_state, sizeof(chacha20_state));
 | |
| +}
 | |
| +
 | |
| +void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
 | |
| +			      const u8 *ad, const size_t ad_len,
 | |
| +			      const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN])
 | |
| +{
 | |
| +	bool have_simd;
 | |
| +	have_simd = chacha20poly1305_init_simd();
 | |
| +	__chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, nonce, key, have_simd);
 | |
| +	chacha20poly1305_deinit_simd(have_simd);
 | |
| +}
 | |
| +
 | |
| +bool chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len,
 | |
| +				 const u8 *ad, const size_t ad_len,
 | |
| +				 const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN],
 | |
| +				 bool have_simd)
 | |
| +{
 | |
| +	struct poly1305_ctx poly1305_state;
 | |
| +	struct chacha20_ctx chacha20_state;
 | |
| +	int ret = 0;
 | |
| +	struct blkcipher_walk walk;
 | |
| +	u8 block0[CHACHA20_BLOCK_SIZE] = { 0 };
 | |
| +	u8 mac[POLY1305_MAC_SIZE];
 | |
| +	__le64 len;
 | |
| +	__le64 le_nonce = cpu_to_le64(nonce);
 | |
| +
 | |
| +	chacha20_keysetup(&chacha20_state, key, (u8 *)&le_nonce);
 | |
| +
 | |
| +	chacha20_crypt(&chacha20_state, block0, block0, sizeof(block0), have_simd);
 | |
| +	poly1305_init(&poly1305_state, block0);
 | |
| +	memzero_explicit(block0, sizeof(block0));
 | |
| +
 | |
| +	poly1305_update(&poly1305_state, ad, ad_len, have_simd);
 | |
| +	poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd);
 | |
| +
 | |
| +	if (likely(src_len)) {
 | |
| +		blkcipher_walk_init(&walk, dst, src, src_len);
 | |
| +		ret = blkcipher_walk_virt_block(&chacha20_desc, &walk, CHACHA20_BLOCK_SIZE);
 | |
| +		while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
 | |
| +			size_t chunk_len = rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE);
 | |
| +			chacha20_crypt(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, chunk_len, have_simd);
 | |
| +			poly1305_update(&poly1305_state, walk.dst.virt.addr, chunk_len, have_simd);
 | |
| +			ret = blkcipher_walk_done(&chacha20_desc, &walk, walk.nbytes % CHACHA20_BLOCK_SIZE);
 | |
| +		}
 | |
| +		if (walk.nbytes) {
 | |
| +			chacha20_crypt(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes, have_simd);
 | |
| +			poly1305_update(&poly1305_state, walk.dst.virt.addr, walk.nbytes, have_simd);
 | |
| +			ret = blkcipher_walk_done(&chacha20_desc, &walk, 0);
 | |
| +		}
 | |
| +	}
 | |
| +	if (unlikely(ret))
 | |
| +		goto err;
 | |
| +
 | |
| +	poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf, have_simd);
 | |
| +
 | |
| +	len = cpu_to_le64(ad_len);
 | |
| +	poly1305_update(&poly1305_state, (u8 *)&len, sizeof(len), have_simd);
 | |
| +
 | |
| +	len = cpu_to_le64(src_len);
 | |
| +	poly1305_update(&poly1305_state, (u8 *)&len, sizeof(len), have_simd);
 | |
| +
 | |
| +	poly1305_finish(&poly1305_state, mac);
 | |
| +	scatterwalk_map_and_copy(mac, dst, src_len, sizeof(mac), 1);
 | |
| +err:
 | |
| +	memzero_explicit(&poly1305_state, sizeof(poly1305_state));
 | |
| +	memzero_explicit(&chacha20_state, sizeof(chacha20_state));
 | |
| +	memzero_explicit(mac, sizeof(mac));
 | |
| +	return !ret;
 | |
| +}
 | |
| +
 | |
| +static inline bool __chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
 | |
| +					      const u8 *ad, const size_t ad_len,
 | |
| +					      const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN],
 | |
| +					      bool have_simd)
 | |
| +{
 | |
| +	struct poly1305_ctx poly1305_state;
 | |
| +	struct chacha20_ctx chacha20_state;
 | |
| +	int ret;
 | |
| +	u8 block0[CHACHA20_BLOCK_SIZE] = { 0 };
 | |
| +	u8 mac[POLY1305_MAC_SIZE];
 | |
| +	size_t dst_len;
 | |
| +	__le64 len;
 | |
| +	__le64 le_nonce = cpu_to_le64(nonce);
 | |
| +
 | |
| +	if (unlikely(src_len < POLY1305_MAC_SIZE))
 | |
| +		return false;
 | |
| +
 | |
| +	chacha20_keysetup(&chacha20_state, key, (u8 *)&le_nonce);
 | |
| +
 | |
| +	chacha20_crypt(&chacha20_state, block0, block0, sizeof(block0), have_simd);
 | |
| +	poly1305_init(&poly1305_state, block0);
 | |
| +	memzero_explicit(block0, sizeof(block0));
 | |
| +
 | |
| +	poly1305_update(&poly1305_state, ad, ad_len, have_simd);
 | |
| +	poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd);
 | |
| +
 | |
| +	dst_len = src_len - POLY1305_MAC_SIZE;
 | |
| +	poly1305_update(&poly1305_state, src, dst_len, have_simd);
 | |
| +	poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf, have_simd);
 | |
| +
 | |
| +	len = cpu_to_le64(ad_len);
 | |
| +	poly1305_update(&poly1305_state, (u8 *)&len, sizeof(len), have_simd);
 | |
| +
 | |
| +	len = cpu_to_le64(dst_len);
 | |
| +	poly1305_update(&poly1305_state, (u8 *)&len, sizeof(len), have_simd);
 | |
| +
 | |
| +	poly1305_finish(&poly1305_state, mac);
 | |
| +	memzero_explicit(&poly1305_state, sizeof(poly1305_state));
 | |
| +
 | |
| +	ret = crypto_memneq(mac, src + dst_len, POLY1305_MAC_SIZE);
 | |
| +	memzero_explicit(mac, POLY1305_MAC_SIZE);
 | |
| +	if (likely(!ret))
 | |
| +		chacha20_crypt(&chacha20_state, dst, src, dst_len, have_simd);
 | |
| +
 | |
| +	memzero_explicit(&chacha20_state, sizeof(chacha20_state));
 | |
| +
 | |
| +	return !ret;
 | |
| +}
 | |
| +
 | |
| +bool chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
 | |
| +			      const u8 *ad, const size_t ad_len,
 | |
| +			      const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN])
 | |
| +{
 | |
| +	bool have_simd, ret;
 | |
| +	have_simd = chacha20poly1305_init_simd();
 | |
| +	ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, nonce, key, have_simd);
 | |
| +	chacha20poly1305_deinit_simd(have_simd);
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +bool chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len,
 | |
| +				 const u8 *ad, const size_t ad_len,
 | |
| +				 const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN])
 | |
| +{
 | |
| +	struct poly1305_ctx poly1305_state;
 | |
| +	struct chacha20_ctx chacha20_state;
 | |
| +	struct blkcipher_walk walk;
 | |
| +	int ret = 0;
 | |
| +	u8 block0[CHACHA20_BLOCK_SIZE] = { 0 };
 | |
| +	u8 read_mac[POLY1305_MAC_SIZE], computed_mac[POLY1305_MAC_SIZE];
 | |
| +	size_t dst_len;
 | |
| +	__le64 len;
 | |
| +	__le64 le_nonce = cpu_to_le64(nonce);
 | |
| +	bool have_simd;
 | |
| +
 | |
| +	if (unlikely(src_len < POLY1305_MAC_SIZE))
 | |
| +		return false;
 | |
| +
 | |
| +	have_simd = chacha20poly1305_init_simd();
 | |
| +
 | |
| +	chacha20_keysetup(&chacha20_state, key, (u8 *)&le_nonce);
 | |
| +
 | |
| +	chacha20_crypt(&chacha20_state, block0, block0, sizeof(block0), have_simd);
 | |
| +	poly1305_init(&poly1305_state, block0);
 | |
| +	memzero_explicit(block0, sizeof(block0));
 | |
| +
 | |
| +	poly1305_update(&poly1305_state, ad, ad_len, have_simd);
 | |
| +	poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf, have_simd);
 | |
| +
 | |
| +	dst_len = src_len - POLY1305_MAC_SIZE;
 | |
| +	if (likely(dst_len)) {
 | |
| +		blkcipher_walk_init(&walk, dst, src, dst_len);
 | |
| +		ret = blkcipher_walk_virt_block(&chacha20_desc, &walk, CHACHA20_BLOCK_SIZE);
 | |
| +		while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
 | |
| +			size_t chunk_len = rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE);
 | |
| +			poly1305_update(&poly1305_state, walk.src.virt.addr, chunk_len, have_simd);
 | |
| +			chacha20_crypt(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, chunk_len, have_simd);
 | |
| +			ret = blkcipher_walk_done(&chacha20_desc, &walk, walk.nbytes % CHACHA20_BLOCK_SIZE);
 | |
| +		}
 | |
| +		if (walk.nbytes) {
 | |
| +			poly1305_update(&poly1305_state, walk.src.virt.addr, walk.nbytes, have_simd);
 | |
| +			chacha20_crypt(&chacha20_state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes, have_simd);
 | |
| +			ret = blkcipher_walk_done(&chacha20_desc, &walk, 0);
 | |
| +		}
 | |
| +	}
 | |
| +	if (unlikely(ret))
 | |
| +		goto err;
 | |
| +
 | |
| +	poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf, have_simd);
 | |
| +
 | |
| +	len = cpu_to_le64(ad_len);
 | |
| +	poly1305_update(&poly1305_state, (u8 *)&len, sizeof(len), have_simd);
 | |
| +
 | |
| +	len = cpu_to_le64(dst_len);
 | |
| +	poly1305_update(&poly1305_state, (u8 *)&len, sizeof(len), have_simd);
 | |
| +
 | |
| +	poly1305_finish(&poly1305_state, computed_mac);
 | |
| +	memzero_explicit(&poly1305_state, sizeof(poly1305_state));
 | |
| +
 | |
| +	scatterwalk_map_and_copy(read_mac, src, dst_len, POLY1305_MAC_SIZE, 0);
 | |
| +	ret = crypto_memneq(read_mac, computed_mac, POLY1305_MAC_SIZE);
 | |
| +err:
 | |
| +	memzero_explicit(read_mac, POLY1305_MAC_SIZE);
 | |
| +	memzero_explicit(computed_mac, POLY1305_MAC_SIZE);
 | |
| +	memzero_explicit(&chacha20_state, sizeof(chacha20_state));
 | |
| +	chacha20poly1305_deinit_simd(have_simd);
 | |
| +	return !ret;
 | |
| +}
 | |
| +
 | |
| +
 | |
| +void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
 | |
| +			       const u8 *ad, const size_t ad_len,
 | |
| +			       const u8 nonce[XCHACHA20POLY1305_NONCELEN],
 | |
| +			       const u8 key[CHACHA20POLY1305_KEYLEN])
 | |
| +{
 | |
| +	bool have_simd = chacha20poly1305_init_simd();
 | |
| +	u8 derived_key[CHACHA20POLY1305_KEYLEN] __aligned(16);
 | |
| +	hchacha20(derived_key, nonce, key, have_simd);
 | |
| +	__chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len, le64_to_cpuvp(nonce + 16), derived_key, have_simd);
 | |
| +	memzero_explicit(derived_key, CHACHA20POLY1305_KEYLEN);
 | |
| +	chacha20poly1305_deinit_simd(have_simd);
 | |
| +}
 | |
| +
 | |
| +bool xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
 | |
| +			       const u8 *ad, const size_t ad_len,
 | |
| +			       const u8 nonce[XCHACHA20POLY1305_NONCELEN],
 | |
| +			       const u8 key[CHACHA20POLY1305_KEYLEN])
 | |
| +{
 | |
| +	bool ret, have_simd = chacha20poly1305_init_simd();
 | |
| +	u8 derived_key[CHACHA20POLY1305_KEYLEN] __aligned(16);
 | |
| +	hchacha20(derived_key, nonce, key, have_simd);
 | |
| +	ret = __chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len, le64_to_cpuvp(nonce + 16), derived_key, have_simd);
 | |
| +	memzero_explicit(derived_key, CHACHA20POLY1305_KEYLEN);
 | |
| +	chacha20poly1305_deinit_simd(have_simd);
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +#include "../selftest/chacha20poly1305.h"
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/crypto/curve25519.c	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,1631 @@
 | |
| +/* Original author: Adam Langley <agl@imperialviolet.org>
 | |
| + *
 | |
| + * Copyright 2008 Google Inc. All Rights Reserved.
 | |
| + * Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
 | |
| + */
 | |
| +
 | |
| +#include "curve25519.h"
 | |
| +
 | |
| +#include <linux/string.h>
 | |
| +#include <linux/random.h>
 | |
| +#include <crypto/algapi.h>
 | |
| +
 | |
| +#define ARCH_HAS_SEPARATE_IRQ_STACK
 | |
| +
 | |
| +#if (defined(CONFIG_MIPS) && LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)) || defined(CONFIG_ARM)
 | |
| +#undef ARCH_HAS_SEPARATE_IRQ_STACK
 | |
| +#endif
 | |
| +
 | |
| +static __always_inline void normalize_secret(u8 secret[CURVE25519_POINT_SIZE])
 | |
| +{
 | |
| +	secret[0] &= 248;
 | |
| +	secret[31] &= 127;
 | |
| +	secret[31] |= 64;
 | |
| +}
 | |
| +static const u8 null_point[CURVE25519_POINT_SIZE] = { 0 };
 | |
| +
 | |
| +#if defined(CONFIG_X86_64)
 | |
| +#include <asm/cpufeature.h>
 | |
| +#include <asm/processor.h>
 | |
| +#include <asm/fpu/api.h>
 | |
| +#include <asm/simd.h>
 | |
| +static bool curve25519_use_avx __read_mostly = false;
 | |
| +void curve25519_fpu_init(void)
 | |
| +{
 | |
| +	curve25519_use_avx = boot_cpu_has(X86_FEATURE_AVX);
 | |
| +}
 | |
| +
 | |
| +typedef u64 fe[10];
 | |
| +typedef u64 fe51[5];
 | |
| +asmlinkage void curve25519_sandy2x_ladder(fe *, const u8 *);
 | |
| +asmlinkage void curve25519_sandy2x_ladder_base(fe *, const u8 *);
 | |
| +asmlinkage void curve25519_sandy2x_fe51_pack(u8 *, const fe51 *);
 | |
| +asmlinkage void curve25519_sandy2x_fe51_mul(fe51 *, const fe51 *, const fe51 *);
 | |
| +asmlinkage void curve25519_sandy2x_fe51_nsquare(fe51 *, const fe51 *, int);
 | |
| +
 | |
| +static inline u32 le24_to_cpupv(const u8 *in)
 | |
| +{
 | |
| +	return le16_to_cpup((__le16 *)in) | ((u32)in[2]) << 16;
 | |
| +}
 | |
| +
 | |
| +static inline void fe_frombytes(fe h, const u8 *s)
 | |
| +{
 | |
| +	u64 h0 = le32_to_cpup((__le32 *)s);
 | |
| +	u64 h1 = le24_to_cpupv(s + 4) << 6;
 | |
| +	u64 h2 = le24_to_cpupv(s + 7) << 5;
 | |
| +	u64 h3 = le24_to_cpupv(s + 10) << 3;
 | |
| +	u64 h4 = le24_to_cpupv(s + 13) << 2;
 | |
| +	u64 h5 = le32_to_cpup((__le32 *)(s + 16));
 | |
| +	u64 h6 = le24_to_cpupv(s + 20) << 7;
 | |
| +	u64 h7 = le24_to_cpupv(s + 23) << 5;
 | |
| +	u64 h8 = le24_to_cpupv(s + 26) << 4;
 | |
| +	u64 h9 = (le24_to_cpupv(s + 29) & 8388607) << 2;
 | |
| +	u64 carry0, carry1, carry2, carry3, carry4, carry5, carry6, carry7, carry8, carry9;
 | |
| +
 | |
| +	carry9 = h9 >> 25; h0 += carry9 * 19; h9 &= 0x1FFFFFF;
 | |
| +	carry1 = h1 >> 25; h2 += carry1; h1 &= 0x1FFFFFF;
 | |
| +	carry3 = h3 >> 25; h4 += carry3; h3 &= 0x1FFFFFF;
 | |
| +	carry5 = h5 >> 25; h6 += carry5; h5 &= 0x1FFFFFF;
 | |
| +	carry7 = h7 >> 25; h8 += carry7; h7 &= 0x1FFFFFF;
 | |
| +
 | |
| +	carry0 = h0 >> 26; h1 += carry0; h0 &= 0x3FFFFFF;
 | |
| +	carry2 = h2 >> 26; h3 += carry2; h2 &= 0x3FFFFFF;
 | |
| +	carry4 = h4 >> 26; h5 += carry4; h4 &= 0x3FFFFFF;
 | |
| +	carry6 = h6 >> 26; h7 += carry6; h6 &= 0x3FFFFFF;
 | |
| +	carry8 = h8 >> 26; h9 += carry8; h8 &= 0x3FFFFFF;
 | |
| +
 | |
| +	h[0] = h0;
 | |
| +	h[1] = h1;
 | |
| +	h[2] = h2;
 | |
| +	h[3] = h3;
 | |
| +	h[4] = h4;
 | |
| +	h[5] = h5;
 | |
| +	h[6] = h6;
 | |
| +	h[7] = h7;
 | |
| +	h[8] = h8;
 | |
| +	h[9] = h9;
 | |
| +}
 | |
| +
 | |
| +static inline void fe51_invert(fe51 *r, const fe51 *x)
 | |
| +{
 | |
| +	fe51 z2, z9, z11, z2_5_0, z2_10_0, z2_20_0, z2_50_0, z2_100_0, t;
 | |
| +
 | |
| +	/* 2 */ curve25519_sandy2x_fe51_nsquare(&z2, x, 1);
 | |
| +	/* 4 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z2, 1);
 | |
| +	/* 8 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&t, 1);
 | |
| +	/* 9 */ curve25519_sandy2x_fe51_mul(&z9, (const fe51 *)&t, x);
 | |
| +	/* 11 */ curve25519_sandy2x_fe51_mul(&z11, (const fe51 *)&z9, (const fe51 *)&z2);
 | |
| +	/* 22 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z11, 1);
 | |
| +	/* 2^5 - 2^0 = 31 */ curve25519_sandy2x_fe51_mul(&z2_5_0, (const fe51 *)&t, (const fe51 *)&z9);
 | |
| +
 | |
| +	/* 2^10 - 2^5 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z2_5_0, 5);
 | |
| +	/* 2^10 - 2^0 */ curve25519_sandy2x_fe51_mul(&z2_10_0, (const fe51 *)&t, (const fe51 *)&z2_5_0);
 | |
| +
 | |
| +	/* 2^20 - 2^10 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z2_10_0, 10);
 | |
| +	/* 2^20 - 2^0 */ curve25519_sandy2x_fe51_mul(&z2_20_0, (const fe51 *)&t, (const fe51 *)&z2_10_0);
 | |
| +
 | |
| +	/* 2^40 - 2^20 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z2_20_0, 20);
 | |
| +	/* 2^40 - 2^0 */ curve25519_sandy2x_fe51_mul(&t, (const fe51 *)&t, (const fe51 *)&z2_20_0);
 | |
| +
 | |
| +	/* 2^50 - 2^10 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&t, 10);
 | |
| +	/* 2^50 - 2^0 */ curve25519_sandy2x_fe51_mul(&z2_50_0, (const fe51 *)&t, (const fe51 *)&z2_10_0);
 | |
| +
 | |
| +	/* 2^100 - 2^50 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z2_50_0, 50);
 | |
| +	/* 2^100 - 2^0 */ curve25519_sandy2x_fe51_mul(&z2_100_0, (const fe51 *)&t, (const fe51 *)&z2_50_0);
 | |
| +
 | |
| +	/* 2^200 - 2^100 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&z2_100_0, 100);
 | |
| +	/* 2^200 - 2^0 */ curve25519_sandy2x_fe51_mul(&t, (const fe51 *)&t, (const fe51 *)&z2_100_0);
 | |
| +
 | |
| +	/* 2^250 - 2^50 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&t, 50);
 | |
| +	/* 2^250 - 2^0 */ curve25519_sandy2x_fe51_mul(&t, (const fe51 *)&t, (const fe51 *)&z2_50_0);
 | |
| +
 | |
| +	/* 2^255 - 2^5 */ curve25519_sandy2x_fe51_nsquare(&t, (const fe51 *)&t, 5);
 | |
| +	/* 2^255 - 21 */ curve25519_sandy2x_fe51_mul(r, (const fe51 *)t, (const fe51 *)&z11);
 | |
| +}
 | |
| +
 | |
| +static void curve25519_sandy2x(u8 mypublic[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE], const u8 basepoint[CURVE25519_POINT_SIZE])
 | |
| +{
 | |
| +	u8 e[32];
 | |
| +	fe var[3];
 | |
| +	fe51 x_51, z_51;
 | |
| +	memcpy(e, secret, 32);
 | |
| +	normalize_secret(e);
 | |
| +#define x1 var[0]
 | |
| +#define x2 var[1]
 | |
| +#define z2 var[2]
 | |
| +	fe_frombytes(x1, basepoint);
 | |
| +	curve25519_sandy2x_ladder(var, e);
 | |
| +	z_51[0] = (z2[1] << 26) + z2[0];
 | |
| +	z_51[1] = (z2[3] << 26) + z2[2];
 | |
| +	z_51[2] = (z2[5] << 26) + z2[4];
 | |
| +	z_51[3] = (z2[7] << 26) + z2[6];
 | |
| +	z_51[4] = (z2[9] << 26) + z2[8];
 | |
| +	x_51[0] = (x2[1] << 26) + x2[0];
 | |
| +	x_51[1] = (x2[3] << 26) + x2[2];
 | |
| +	x_51[2] = (x2[5] << 26) + x2[4];
 | |
| +	x_51[3] = (x2[7] << 26) + x2[6];
 | |
| +	x_51[4] = (x2[9] << 26) + x2[8];
 | |
| +#undef x1
 | |
| +#undef x2
 | |
| +#undef z2
 | |
| +	fe51_invert(&z_51, (const fe51 *)&z_51);
 | |
| +	curve25519_sandy2x_fe51_mul(&x_51, (const fe51 *)&x_51, (const fe51 *)&z_51);
 | |
| +	curve25519_sandy2x_fe51_pack(mypublic, (const fe51 *)&x_51);
 | |
| +
 | |
| +	memzero_explicit(e, sizeof(e));
 | |
| +	memzero_explicit(var, sizeof(var));
 | |
| +	memzero_explicit(x_51, sizeof(x_51));
 | |
| +	memzero_explicit(z_51, sizeof(z_51));
 | |
| +}
 | |
| +
 | |
| +static void curve25519_sandy2x_base(u8 pub[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE])
 | |
| +{
 | |
| +	u8 e[32];
 | |
| +	fe var[3];
 | |
| +	fe51 x_51, z_51;
 | |
| +	memcpy(e, secret, 32);
 | |
| +	normalize_secret(e);
 | |
| +	curve25519_sandy2x_ladder_base(var, e);
 | |
| +#define x2 var[0]
 | |
| +#define z2 var[1]
 | |
| +	z_51[0] = (z2[1] << 26) + z2[0];
 | |
| +	z_51[1] = (z2[3] << 26) + z2[2];
 | |
| +	z_51[2] = (z2[5] << 26) + z2[4];
 | |
| +	z_51[3] = (z2[7] << 26) + z2[6];
 | |
| +	z_51[4] = (z2[9] << 26) + z2[8];
 | |
| +	x_51[0] = (x2[1] << 26) + x2[0];
 | |
| +	x_51[1] = (x2[3] << 26) + x2[2];
 | |
| +	x_51[2] = (x2[5] << 26) + x2[4];
 | |
| +	x_51[3] = (x2[7] << 26) + x2[6];
 | |
| +	x_51[4] = (x2[9] << 26) + x2[8];
 | |
| +#undef x2
 | |
| +#undef z2
 | |
| +	fe51_invert(&z_51, (const fe51 *)&z_51);
 | |
| +	curve25519_sandy2x_fe51_mul(&x_51, (const fe51 *)&x_51, (const fe51 *)&z_51);
 | |
| +	curve25519_sandy2x_fe51_pack(pub, (const fe51 *)&x_51);
 | |
| +
 | |
| +	memzero_explicit(e, sizeof(e));
 | |
| +	memzero_explicit(var, sizeof(var));
 | |
| +	memzero_explicit(x_51, sizeof(x_51));
 | |
| +	memzero_explicit(z_51, sizeof(z_51));
 | |
| +}
 | |
| +#elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && defined(CONFIG_ARM)
 | |
| +#include <asm/hwcap.h>
 | |
| +#include <asm/neon.h>
 | |
| +#include <asm/simd.h>
 | |
| +asmlinkage void curve25519_asm_neon(u8 mypublic[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE], const u8 basepoint[CURVE25519_POINT_SIZE]);
 | |
| +static bool curve25519_use_neon __read_mostly = false;
 | |
| +void __init curve25519_fpu_init(void)
 | |
| +{
 | |
| +	curve25519_use_neon = elf_hwcap & HWCAP_NEON;
 | |
| +}
 | |
| +#else
 | |
| +void __init curve25519_fpu_init(void) { }
 | |
| +#endif
 | |
| +
 | |
| +#ifdef __SIZEOF_INT128__
 | |
| +typedef u64 limb;
 | |
| +typedef limb felem[5];
 | |
| +typedef __uint128_t u128;
 | |
| +
 | |
| +/* Sum two numbers: output += in */
 | |
| +static __always_inline void fsum(limb *output, const limb *in)
 | |
| +{
 | |
| +	output[0] += in[0];
 | |
| +	output[1] += in[1];
 | |
| +	output[2] += in[2];
 | |
| +	output[3] += in[3];
 | |
| +	output[4] += in[4];
 | |
| +}
 | |
| +
 | |
| +/* Find the difference of two numbers: output = in - output
 | |
| + * (note the order of the arguments!)
 | |
| + *
 | |
| + * Assumes that out[i] < 2**52
 | |
| + * On return, out[i] < 2**55
 | |
| + */
 | |
| +static __always_inline void fdifference_backwards(felem out, const felem in)
 | |
| +{
 | |
| +	/* 152 is 19 << 3 */
 | |
| +	static const limb two54m152 = (((limb)1) << 54) - 152;
 | |
| +	static const limb two54m8 = (((limb)1) << 54) - 8;
 | |
| +
 | |
| +	out[0] = in[0] + two54m152 - out[0];
 | |
| +	out[1] = in[1] + two54m8 - out[1];
 | |
| +	out[2] = in[2] + two54m8 - out[2];
 | |
| +	out[3] = in[3] + two54m8 - out[3];
 | |
| +	out[4] = in[4] + two54m8 - out[4];
 | |
| +}
 | |
| +
 | |
| +/* Multiply a number by a scalar: output = in * scalar */
 | |
| +static __always_inline void fscalar_product(felem output, const felem in, const limb scalar)
 | |
| +{
 | |
| +	u128 a;
 | |
| +
 | |
| +	a = ((u128) in[0]) * scalar;
 | |
| +	output[0] = ((limb)a) & 0x7ffffffffffffUL;
 | |
| +
 | |
| +	a = ((u128) in[1]) * scalar + ((limb) (a >> 51));
 | |
| +	output[1] = ((limb)a) & 0x7ffffffffffffUL;
 | |
| +
 | |
| +	a = ((u128) in[2]) * scalar + ((limb) (a >> 51));
 | |
| +	output[2] = ((limb)a) & 0x7ffffffffffffUL;
 | |
| +
 | |
| +	a = ((u128) in[3]) * scalar + ((limb) (a >> 51));
 | |
| +	output[3] = ((limb)a) & 0x7ffffffffffffUL;
 | |
| +
 | |
| +	a = ((u128) in[4]) * scalar + ((limb) (a >> 51));
 | |
| +	output[4] = ((limb)a) & 0x7ffffffffffffUL;
 | |
| +
 | |
| +	output[0] += (a >> 51) * 19;
 | |
| +}
 | |
| +
 | |
| +/* Multiply two numbers: output = in2 * in
 | |
| + *
 | |
| + * output must be distinct to both inputs. The inputs are reduced coefficient
 | |
| + * form, the output is not.
 | |
| + *
 | |
| + * Assumes that in[i] < 2**55 and likewise for in2.
 | |
| + * On return, output[i] < 2**52
 | |
| + */
 | |
| +static __always_inline void fmul(felem output, const felem in2, const felem in)
 | |
| +{
 | |
| +	u128 t[5];
 | |
| +	limb r0,r1,r2,r3,r4,s0,s1,s2,s3,s4,c;
 | |
| +
 | |
| +	r0 = in[0];
 | |
| +	r1 = in[1];
 | |
| +	r2 = in[2];
 | |
| +	r3 = in[3];
 | |
| +	r4 = in[4];
 | |
| +
 | |
| +	s0 = in2[0];
 | |
| +	s1 = in2[1];
 | |
| +	s2 = in2[2];
 | |
| +	s3 = in2[3];
 | |
| +	s4 = in2[4];
 | |
| +
 | |
| +	t[0]  =  ((u128) r0) * s0;
 | |
| +	t[1]  =  ((u128) r0) * s1 + ((u128) r1) * s0;
 | |
| +	t[2]  =  ((u128) r0) * s2 + ((u128) r2) * s0 + ((u128) r1) * s1;
 | |
| +	t[3]  =  ((u128) r0) * s3 + ((u128) r3) * s0 + ((u128) r1) * s2 + ((u128) r2) * s1;
 | |
| +	t[4]  =  ((u128) r0) * s4 + ((u128) r4) * s0 + ((u128) r3) * s1 + ((u128) r1) * s3 + ((u128) r2) * s2;
 | |
| +
 | |
| +	r4 *= 19;
 | |
| +	r1 *= 19;
 | |
| +	r2 *= 19;
 | |
| +	r3 *= 19;
 | |
| +
 | |
| +	t[0] += ((u128) r4) * s1 + ((u128) r1) * s4 + ((u128) r2) * s3 + ((u128) r3) * s2;
 | |
| +	t[1] += ((u128) r4) * s2 + ((u128) r2) * s4 + ((u128) r3) * s3;
 | |
| +	t[2] += ((u128) r4) * s3 + ((u128) r3) * s4;
 | |
| +	t[3] += ((u128) r4) * s4;
 | |
| +
 | |
| +			r0 = (limb)t[0] & 0x7ffffffffffffUL; c = (limb)(t[0] >> 51);
 | |
| +	t[1] += c;      r1 = (limb)t[1] & 0x7ffffffffffffUL; c = (limb)(t[1] >> 51);
 | |
| +	t[2] += c;      r2 = (limb)t[2] & 0x7ffffffffffffUL; c = (limb)(t[2] >> 51);
 | |
| +	t[3] += c;      r3 = (limb)t[3] & 0x7ffffffffffffUL; c = (limb)(t[3] >> 51);
 | |
| +	t[4] += c;      r4 = (limb)t[4] & 0x7ffffffffffffUL; c = (limb)(t[4] >> 51);
 | |
| +	r0 +=   c * 19; c = r0 >> 51; r0 = r0 & 0x7ffffffffffffUL;
 | |
| +	r1 +=   c;      c = r1 >> 51; r1 = r1 & 0x7ffffffffffffUL;
 | |
| +	r2 +=   c;
 | |
| +
 | |
| +	output[0] = r0;
 | |
| +	output[1] = r1;
 | |
| +	output[2] = r2;
 | |
| +	output[3] = r3;
 | |
| +	output[4] = r4;
 | |
| +}
 | |
| +
 | |
| +static __always_inline void fsquare_times(felem output, const felem in, limb count)
 | |
| +{
 | |
| +	u128 t[5];
 | |
| +	limb r0,r1,r2,r3,r4,c;
 | |
| +	limb d0,d1,d2,d4,d419;
 | |
| +
 | |
| +	r0 = in[0];
 | |
| +	r1 = in[1];
 | |
| +	r2 = in[2];
 | |
| +	r3 = in[3];
 | |
| +	r4 = in[4];
 | |
| +
 | |
| +	do {
 | |
| +		d0 = r0 * 2;
 | |
| +		d1 = r1 * 2;
 | |
| +		d2 = r2 * 2 * 19;
 | |
| +		d419 = r4 * 19;
 | |
| +		d4 = d419 * 2;
 | |
| +
 | |
| +		t[0] = ((u128) r0) * r0 + ((u128) d4) * r1 + (((u128) d2) * (r3     ));
 | |
| +		t[1] = ((u128) d0) * r1 + ((u128) d4) * r2 + (((u128) r3) * (r3 * 19));
 | |
| +		t[2] = ((u128) d0) * r2 + ((u128) r1) * r1 + (((u128) d4) * (r3     ));
 | |
| +		t[3] = ((u128) d0) * r3 + ((u128) d1) * r2 + (((u128) r4) * (d419   ));
 | |
| +		t[4] = ((u128) d0) * r4 + ((u128) d1) * r3 + (((u128) r2) * (r2     ));
 | |
| +
 | |
| +				r0 = (limb)t[0] & 0x7ffffffffffffUL; c = (limb)(t[0] >> 51);
 | |
| +		t[1] += c;      r1 = (limb)t[1] & 0x7ffffffffffffUL; c = (limb)(t[1] >> 51);
 | |
| +		t[2] += c;      r2 = (limb)t[2] & 0x7ffffffffffffUL; c = (limb)(t[2] >> 51);
 | |
| +		t[3] += c;      r3 = (limb)t[3] & 0x7ffffffffffffUL; c = (limb)(t[3] >> 51);
 | |
| +		t[4] += c;      r4 = (limb)t[4] & 0x7ffffffffffffUL; c = (limb)(t[4] >> 51);
 | |
| +		r0 +=   c * 19; c = r0 >> 51; r0 = r0 & 0x7ffffffffffffUL;
 | |
| +		r1 +=   c;      c = r1 >> 51; r1 = r1 & 0x7ffffffffffffUL;
 | |
| +		r2 +=   c;
 | |
| +	} while(--count);
 | |
| +
 | |
| +	output[0] = r0;
 | |
| +	output[1] = r1;
 | |
| +	output[2] = r2;
 | |
| +	output[3] = r3;
 | |
| +	output[4] = r4;
 | |
| +}
 | |
| +
 | |
| +/* Load a little-endian 64-bit number  */
 | |
| +static inline limb load_limb(const u8 *in)
 | |
| +{
 | |
| +	return le64_to_cpu(*(__le64 *)in);
 | |
| +}
 | |
| +
 | |
| +static inline void store_limb(u8 *out, limb in)
 | |
| +{
 | |
| +	*(__le64 *)out = cpu_to_le64(in);
 | |
| +}
 | |
| +
 | |
| +/* Take a little-endian, 32-byte number and expand it into polynomial form */
 | |
| +static inline void fexpand(limb *output, const u8 *in)
 | |
| +{
 | |
| +	output[0] = load_limb(in) & 0x7ffffffffffffUL;
 | |
| +	output[1] = (load_limb(in + 6) >> 3) & 0x7ffffffffffffUL;
 | |
| +	output[2] = (load_limb(in + 12) >> 6) & 0x7ffffffffffffUL;
 | |
| +	output[3] = (load_limb(in + 19) >> 1) & 0x7ffffffffffffUL;
 | |
| +	output[4] = (load_limb(in + 24) >> 12) & 0x7ffffffffffffUL;
 | |
| +}
 | |
| +
 | |
| +/* Take a fully reduced polynomial form number and contract it into a
 | |
| + * little-endian, 32-byte array
 | |
| + */
 | |
| +static void fcontract(u8 *output, const felem input)
 | |
| +{
 | |
| +	u128 t[5];
 | |
| +
 | |
| +	t[0] = input[0];
 | |
| +	t[1] = input[1];
 | |
| +	t[2] = input[2];
 | |
| +	t[3] = input[3];
 | |
| +	t[4] = input[4];
 | |
| +
 | |
| +	t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffffUL;
 | |
| +	t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffffUL;
 | |
| +	t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffffUL;
 | |
| +	t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffffUL;
 | |
| +	t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffffUL;
 | |
| +
 | |
| +	t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffffUL;
 | |
| +	t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffffUL;
 | |
| +	t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffffUL;
 | |
| +	t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffffUL;
 | |
| +	t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffffUL;
 | |
| +
 | |
| +	/* now t is between 0 and 2^255-1, properly carried. */
 | |
| +	/* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */
 | |
| +
 | |
| +	t[0] += 19;
 | |
| +
 | |
| +	t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffffUL;
 | |
| +	t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffffUL;
 | |
| +	t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffffUL;
 | |
| +	t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffffUL;
 | |
| +	t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffffUL;
 | |
| +
 | |
| +	/* now between 19 and 2^255-1 in both cases, and offset by 19. */
 | |
| +
 | |
| +	t[0] += 0x8000000000000UL - 19;
 | |
| +	t[1] += 0x8000000000000UL - 1;
 | |
| +	t[2] += 0x8000000000000UL - 1;
 | |
| +	t[3] += 0x8000000000000UL - 1;
 | |
| +	t[4] += 0x8000000000000UL - 1;
 | |
| +
 | |
| +	/* now between 2^255 and 2^256-20, and offset by 2^255. */
 | |
| +
 | |
| +	t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffffUL;
 | |
| +	t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffffUL;
 | |
| +	t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffffUL;
 | |
| +	t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffffUL;
 | |
| +	t[4] &= 0x7ffffffffffffUL;
 | |
| +
 | |
| +	store_limb(output,    t[0] | (t[1] << 51));
 | |
| +	store_limb(output+8,  (t[1] >> 13) | (t[2] << 38));
 | |
| +	store_limb(output+16, (t[2] >> 26) | (t[3] << 25));
 | |
| +	store_limb(output+24, (t[3] >> 39) | (t[4] << 12));
 | |
| +}
 | |
| +
 | |
| +/* Input: Q, Q', Q-Q'
 | |
| + * Output: 2Q, Q+Q'
 | |
| + *
 | |
| + *   x2 z3: long form
 | |
| + *   x3 z3: long form
 | |
| + *   x z: short form, destroyed
 | |
| + *   xprime zprime: short form, destroyed
 | |
| + *   qmqp: short form, preserved
 | |
| + */
 | |
| +static void fmonty(limb *x2, limb *z2, /* output 2Q */
 | |
| +			 limb *x3, limb *z3, /* output Q + Q' */
 | |
| +			 limb *x, limb *z,   /* input Q */
 | |
| +			 limb *xprime, limb *zprime, /* input Q' */
 | |
| +			 const limb *qmqp /* input Q - Q' */)
 | |
| +{
 | |
| +	limb origx[5], origxprime[5], zzz[5], xx[5], zz[5], xxprime[5], zzprime[5], zzzprime[5];
 | |
| +
 | |
| +	memcpy(origx, x, 5 * sizeof(limb));
 | |
| +	fsum(x, z);
 | |
| +	fdifference_backwards(z, origx);  // does x - z
 | |
| +
 | |
| +	memcpy(origxprime, xprime, sizeof(limb) * 5);
 | |
| +	fsum(xprime, zprime);
 | |
| +	fdifference_backwards(zprime, origxprime);
 | |
| +	fmul(xxprime, xprime, z);
 | |
| +	fmul(zzprime, x, zprime);
 | |
| +	memcpy(origxprime, xxprime, sizeof(limb) * 5);
 | |
| +	fsum(xxprime, zzprime);
 | |
| +	fdifference_backwards(zzprime, origxprime);
 | |
| +	fsquare_times(x3, xxprime, 1);
 | |
| +	fsquare_times(zzzprime, zzprime, 1);
 | |
| +	fmul(z3, zzzprime, qmqp);
 | |
| +
 | |
| +	fsquare_times(xx, x, 1);
 | |
| +	fsquare_times(zz, z, 1);
 | |
| +	fmul(x2, xx, zz);
 | |
| +	fdifference_backwards(zz, xx);  // does zz = xx - zz
 | |
| +	fscalar_product(zzz, zz, 121665);
 | |
| +	fsum(zzz, xx);
 | |
| +	fmul(z2, zz, zzz);
 | |
| +}
 | |
| +
 | |
| +/* Maybe swap the contents of two limb arrays (@a and @b), each @len elements
 | |
| + * long. Perform the swap iff @swap is non-zero.
 | |
| + *
 | |
| + * This function performs the swap without leaking any side-channel
 | |
| + * information.
 | |
| + */
 | |
| +static void swap_conditional(limb a[5], limb b[5], limb iswap)
 | |
| +{
 | |
| +	unsigned i;
 | |
| +	const limb swap = -iswap;
 | |
| +
 | |
| +	for (i = 0; i < 5; ++i) {
 | |
| +		const limb x = swap & (a[i] ^ b[i]);
 | |
| +		a[i] ^= x;
 | |
| +		b[i] ^= x;
 | |
| +	}
 | |
| +}
 | |
| +
 | |
| +/* Calculates nQ where Q is the x-coordinate of a point on the curve
 | |
| + *
 | |
| + *   resultx/resultz: the x coordinate of the resulting curve point (short form)
 | |
| + *   n: a little endian, 32-byte number
 | |
| + *   q: a point of the curve (short form)
 | |
| + */
 | |
| +static void cmult(limb *resultx, limb *resultz, const u8 *n, const limb *q)
 | |
| +{
 | |
| +	limb a[5] = {0}, b[5] = {1}, c[5] = {1}, d[5] = {0};
 | |
| +	limb *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t;
 | |
| +	limb e[5] = {0}, f[5] = {1}, g[5] = {0}, h[5] = {1};
 | |
| +	limb *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h;
 | |
| +
 | |
| +	unsigned i, j;
 | |
| +
 | |
| +	memcpy(nqpqx, q, sizeof(limb) * 5);
 | |
| +
 | |
| +	for (i = 0; i < 32; ++i) {
 | |
| +		u8 byte = n[31 - i];
 | |
| +		for (j = 0; j < 8; ++j) {
 | |
| +			const limb bit = byte >> 7;
 | |
| +
 | |
| +			swap_conditional(nqx, nqpqx, bit);
 | |
| +			swap_conditional(nqz, nqpqz, bit);
 | |
| +			fmonty(nqx2, nqz2,
 | |
| +						 nqpqx2, nqpqz2,
 | |
| +						 nqx, nqz,
 | |
| +						 nqpqx, nqpqz,
 | |
| +						 q);
 | |
| +			swap_conditional(nqx2, nqpqx2, bit);
 | |
| +			swap_conditional(nqz2, nqpqz2, bit);
 | |
| +
 | |
| +			t = nqx;
 | |
| +			nqx = nqx2;
 | |
| +			nqx2 = t;
 | |
| +			t = nqz;
 | |
| +			nqz = nqz2;
 | |
| +			nqz2 = t;
 | |
| +			t = nqpqx;
 | |
| +			nqpqx = nqpqx2;
 | |
| +			nqpqx2 = t;
 | |
| +			t = nqpqz;
 | |
| +			nqpqz = nqpqz2;
 | |
| +			nqpqz2 = t;
 | |
| +
 | |
| +			byte <<= 1;
 | |
| +		}
 | |
| +	}
 | |
| +
 | |
| +	memcpy(resultx, nqx, sizeof(limb) * 5);
 | |
| +	memcpy(resultz, nqz, sizeof(limb) * 5);
 | |
| +}
 | |
| +
 | |
| +static void crecip(felem out, const felem z)
 | |
| +{
 | |
| +	felem a,t0,b,c;
 | |
| +
 | |
| +	/* 2 */ fsquare_times(a, z, 1); // a = 2
 | |
| +	/* 8 */ fsquare_times(t0, a, 2);
 | |
| +	/* 9 */ fmul(b, t0, z); // b = 9
 | |
| +	/* 11 */ fmul(a, b, a); // a = 11
 | |
| +	/* 22 */ fsquare_times(t0, a, 1);
 | |
| +	/* 2^5 - 2^0 = 31 */ fmul(b, t0, b);
 | |
| +	/* 2^10 - 2^5 */ fsquare_times(t0, b, 5);
 | |
| +	/* 2^10 - 2^0 */ fmul(b, t0, b);
 | |
| +	/* 2^20 - 2^10 */ fsquare_times(t0, b, 10);
 | |
| +	/* 2^20 - 2^0 */ fmul(c, t0, b);
 | |
| +	/* 2^40 - 2^20 */ fsquare_times(t0, c, 20);
 | |
| +	/* 2^40 - 2^0 */ fmul(t0, t0, c);
 | |
| +	/* 2^50 - 2^10 */ fsquare_times(t0, t0, 10);
 | |
| +	/* 2^50 - 2^0 */ fmul(b, t0, b);
 | |
| +	/* 2^100 - 2^50 */ fsquare_times(t0, b, 50);
 | |
| +	/* 2^100 - 2^0 */ fmul(c, t0, b);
 | |
| +	/* 2^200 - 2^100 */ fsquare_times(t0, c, 100);
 | |
| +	/* 2^200 - 2^0 */ fmul(t0, t0, c);
 | |
| +	/* 2^250 - 2^50 */ fsquare_times(t0, t0, 50);
 | |
| +	/* 2^250 - 2^0 */ fmul(t0, t0, b);
 | |
| +	/* 2^255 - 2^5 */ fsquare_times(t0, t0, 5);
 | |
| +	/* 2^255 - 21 */ fmul(out, t0, a);
 | |
| +}
 | |
| +
 | |
| +bool curve25519(u8 mypublic[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE], const u8 basepoint[CURVE25519_POINT_SIZE])
 | |
| +{
 | |
| +#ifdef CONFIG_X86_64
 | |
| +	if (curve25519_use_avx && irq_fpu_usable()) {
 | |
| +		kernel_fpu_begin();
 | |
| +		curve25519_sandy2x(mypublic, secret, basepoint);
 | |
| +		kernel_fpu_end();
 | |
| +	} else
 | |
| +#endif
 | |
| +	{
 | |
| +		limb bp[5], x[5], z[5], zmone[5];
 | |
| +		u8 e[32];
 | |
| +
 | |
| +		memcpy(e, secret, 32);
 | |
| +		normalize_secret(e);
 | |
| +
 | |
| +		fexpand(bp, basepoint);
 | |
| +		cmult(x, z, e, bp);
 | |
| +		crecip(zmone, z);
 | |
| +		fmul(z, x, zmone);
 | |
| +		fcontract(mypublic, z);
 | |
| +
 | |
| +		memzero_explicit(e, sizeof(e));
 | |
| +		memzero_explicit(bp, sizeof(bp));
 | |
| +		memzero_explicit(x, sizeof(x));
 | |
| +		memzero_explicit(z, sizeof(z));
 | |
| +		memzero_explicit(zmone, sizeof(zmone));
 | |
| +	}
 | |
| +	return crypto_memneq(mypublic, null_point, CURVE25519_POINT_SIZE);
 | |
| +}
 | |
| +
 | |
| +bool curve25519_generate_public(u8 pub[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE])
 | |
| +{
 | |
| +	static const u8 basepoint[CURVE25519_POINT_SIZE] __aligned(32) = { 9 };
 | |
| +#ifdef CONFIG_X86_64
 | |
| +	if (curve25519_use_avx && irq_fpu_usable()) {
 | |
| +		kernel_fpu_begin();
 | |
| +		curve25519_sandy2x_base(pub, secret);
 | |
| +		kernel_fpu_end();
 | |
| +		return crypto_memneq(pub, null_point, CURVE25519_POINT_SIZE);
 | |
| +	}
 | |
| +#endif
 | |
| +	return curve25519(pub, secret, basepoint);
 | |
| +}
 | |
| +#else
 | |
| +typedef s64 limb;
 | |
| +
 | |
| +/* Field element representation:
 | |
| + *
 | |
| + * Field elements are written as an array of signed, 64-bit limbs, least
 | |
| + * significant first. The value of the field element is:
 | |
| + *   x[0] + 2^26·x[1] + x^51·x[2] + 2^102·x[3] + ...
 | |
| + *
 | |
| + * i.e. the limbs are 26, 25, 26, 25, ... bits wide. */
 | |
| +
 | |
| +/* Sum two numbers: output += in */
 | |
| +static void fsum(limb *output, const limb *in)
 | |
| +{
 | |
| +	unsigned i;
 | |
| +	for (i = 0; i < 10; i += 2) {
 | |
| +		output[0 + i] = output[0 + i] + in[0 + i];
 | |
| +		output[1 + i] = output[1 + i] + in[1 + i];
 | |
| +	}
 | |
| +}
 | |
| +
 | |
| +/* Find the difference of two numbers: output = in - output
 | |
| + * (note the order of the arguments!). */
 | |
| +static void fdifference(limb *output, const limb *in)
 | |
| +{
 | |
| +	unsigned i;
 | |
| +	for (i = 0; i < 10; ++i)
 | |
| +		output[i] = in[i] - output[i];
 | |
| +}
 | |
| +
 | |
| +/* Multiply a number by a scalar: output = in * scalar */
 | |
| +static void fscalar_product(limb *output, const limb *in, const limb scalar)
 | |
| +{
 | |
| +	unsigned i;
 | |
| +	for (i = 0; i < 10; ++i)
 | |
| +		output[i] = in[i] * scalar;
 | |
| +}
 | |
| +
 | |
| +/* Multiply two numbers: output = in2 * in
 | |
| + *
 | |
| + * output must be distinct to both inputs. The inputs are reduced coefficient
 | |
| + * form, the output is not.
 | |
| + *
 | |
| + * output[x] <= 14 * the largest product of the input limbs. */
 | |
| +static void fproduct(limb *output, const limb *in2, const limb *in)
 | |
| +{
 | |
| +	output[0] =       ((limb) ((s32) in2[0])) * ((s32) in[0]);
 | |
| +	output[1] =       ((limb) ((s32) in2[0])) * ((s32) in[1]) +
 | |
| +					    ((limb) ((s32) in2[1])) * ((s32) in[0]);
 | |
| +	output[2] =  2 *  ((limb) ((s32) in2[1])) * ((s32) in[1]) +
 | |
| +					    ((limb) ((s32) in2[0])) * ((s32) in[2]) +
 | |
| +					    ((limb) ((s32) in2[2])) * ((s32) in[0]);
 | |
| +	output[3] =       ((limb) ((s32) in2[1])) * ((s32) in[2]) +
 | |
| +					    ((limb) ((s32) in2[2])) * ((s32) in[1]) +
 | |
| +					    ((limb) ((s32) in2[0])) * ((s32) in[3]) +
 | |
| +					    ((limb) ((s32) in2[3])) * ((s32) in[0]);
 | |
| +	output[4] =       ((limb) ((s32) in2[2])) * ((s32) in[2]) +
 | |
| +				       2 * (((limb) ((s32) in2[1])) * ((s32) in[3]) +
 | |
| +					    ((limb) ((s32) in2[3])) * ((s32) in[1])) +
 | |
| +					    ((limb) ((s32) in2[0])) * ((s32) in[4]) +
 | |
| +					    ((limb) ((s32) in2[4])) * ((s32) in[0]);
 | |
| +	output[5] =       ((limb) ((s32) in2[2])) * ((s32) in[3]) +
 | |
| +					    ((limb) ((s32) in2[3])) * ((s32) in[2]) +
 | |
| +					    ((limb) ((s32) in2[1])) * ((s32) in[4]) +
 | |
| +					    ((limb) ((s32) in2[4])) * ((s32) in[1]) +
 | |
| +					    ((limb) ((s32) in2[0])) * ((s32) in[5]) +
 | |
| +					    ((limb) ((s32) in2[5])) * ((s32) in[0]);
 | |
| +	output[6] =  2 * (((limb) ((s32) in2[3])) * ((s32) in[3]) +
 | |
| +					    ((limb) ((s32) in2[1])) * ((s32) in[5]) +
 | |
| +					    ((limb) ((s32) in2[5])) * ((s32) in[1])) +
 | |
| +					    ((limb) ((s32) in2[2])) * ((s32) in[4]) +
 | |
| +					    ((limb) ((s32) in2[4])) * ((s32) in[2]) +
 | |
| +					    ((limb) ((s32) in2[0])) * ((s32) in[6]) +
 | |
| +					    ((limb) ((s32) in2[6])) * ((s32) in[0]);
 | |
| +	output[7] =       ((limb) ((s32) in2[3])) * ((s32) in[4]) +
 | |
| +					    ((limb) ((s32) in2[4])) * ((s32) in[3]) +
 | |
| +					    ((limb) ((s32) in2[2])) * ((s32) in[5]) +
 | |
| +					    ((limb) ((s32) in2[5])) * ((s32) in[2]) +
 | |
| +					    ((limb) ((s32) in2[1])) * ((s32) in[6]) +
 | |
| +					    ((limb) ((s32) in2[6])) * ((s32) in[1]) +
 | |
| +					    ((limb) ((s32) in2[0])) * ((s32) in[7]) +
 | |
| +					    ((limb) ((s32) in2[7])) * ((s32) in[0]);
 | |
| +	output[8] =       ((limb) ((s32) in2[4])) * ((s32) in[4]) +
 | |
| +				       2 * (((limb) ((s32) in2[3])) * ((s32) in[5]) +
 | |
| +					    ((limb) ((s32) in2[5])) * ((s32) in[3]) +
 | |
| +					    ((limb) ((s32) in2[1])) * ((s32) in[7]) +
 | |
| +					    ((limb) ((s32) in2[7])) * ((s32) in[1])) +
 | |
| +					    ((limb) ((s32) in2[2])) * ((s32) in[6]) +
 | |
| +					    ((limb) ((s32) in2[6])) * ((s32) in[2]) +
 | |
| +					    ((limb) ((s32) in2[0])) * ((s32) in[8]) +
 | |
| +					    ((limb) ((s32) in2[8])) * ((s32) in[0]);
 | |
| +	output[9] =       ((limb) ((s32) in2[4])) * ((s32) in[5]) +
 | |
| +					    ((limb) ((s32) in2[5])) * ((s32) in[4]) +
 | |
| +					    ((limb) ((s32) in2[3])) * ((s32) in[6]) +
 | |
| +					    ((limb) ((s32) in2[6])) * ((s32) in[3]) +
 | |
| +					    ((limb) ((s32) in2[2])) * ((s32) in[7]) +
 | |
| +					    ((limb) ((s32) in2[7])) * ((s32) in[2]) +
 | |
| +					    ((limb) ((s32) in2[1])) * ((s32) in[8]) +
 | |
| +					    ((limb) ((s32) in2[8])) * ((s32) in[1]) +
 | |
| +					    ((limb) ((s32) in2[0])) * ((s32) in[9]) +
 | |
| +					    ((limb) ((s32) in2[9])) * ((s32) in[0]);
 | |
| +	output[10] = 2 * (((limb) ((s32) in2[5])) * ((s32) in[5]) +
 | |
| +					    ((limb) ((s32) in2[3])) * ((s32) in[7]) +
 | |
| +					    ((limb) ((s32) in2[7])) * ((s32) in[3]) +
 | |
| +					    ((limb) ((s32) in2[1])) * ((s32) in[9]) +
 | |
| +					    ((limb) ((s32) in2[9])) * ((s32) in[1])) +
 | |
| +					    ((limb) ((s32) in2[4])) * ((s32) in[6]) +
 | |
| +					    ((limb) ((s32) in2[6])) * ((s32) in[4]) +
 | |
| +					    ((limb) ((s32) in2[2])) * ((s32) in[8]) +
 | |
| +					    ((limb) ((s32) in2[8])) * ((s32) in[2]);
 | |
| +	output[11] =      ((limb) ((s32) in2[5])) * ((s32) in[6]) +
 | |
| +					    ((limb) ((s32) in2[6])) * ((s32) in[5]) +
 | |
| +					    ((limb) ((s32) in2[4])) * ((s32) in[7]) +
 | |
| +					    ((limb) ((s32) in2[7])) * ((s32) in[4]) +
 | |
| +					    ((limb) ((s32) in2[3])) * ((s32) in[8]) +
 | |
| +					    ((limb) ((s32) in2[8])) * ((s32) in[3]) +
 | |
| +					    ((limb) ((s32) in2[2])) * ((s32) in[9]) +
 | |
| +					    ((limb) ((s32) in2[9])) * ((s32) in[2]);
 | |
| +	output[12] =      ((limb) ((s32) in2[6])) * ((s32) in[6]) +
 | |
| +				       2 * (((limb) ((s32) in2[5])) * ((s32) in[7]) +
 | |
| +					    ((limb) ((s32) in2[7])) * ((s32) in[5]) +
 | |
| +					    ((limb) ((s32) in2[3])) * ((s32) in[9]) +
 | |
| +					    ((limb) ((s32) in2[9])) * ((s32) in[3])) +
 | |
| +					    ((limb) ((s32) in2[4])) * ((s32) in[8]) +
 | |
| +					    ((limb) ((s32) in2[8])) * ((s32) in[4]);
 | |
| +	output[13] =      ((limb) ((s32) in2[6])) * ((s32) in[7]) +
 | |
| +					    ((limb) ((s32) in2[7])) * ((s32) in[6]) +
 | |
| +					    ((limb) ((s32) in2[5])) * ((s32) in[8]) +
 | |
| +					    ((limb) ((s32) in2[8])) * ((s32) in[5]) +
 | |
| +					    ((limb) ((s32) in2[4])) * ((s32) in[9]) +
 | |
| +					    ((limb) ((s32) in2[9])) * ((s32) in[4]);
 | |
| +	output[14] = 2 * (((limb) ((s32) in2[7])) * ((s32) in[7]) +
 | |
| +					    ((limb) ((s32) in2[5])) * ((s32) in[9]) +
 | |
| +					    ((limb) ((s32) in2[9])) * ((s32) in[5])) +
 | |
| +					    ((limb) ((s32) in2[6])) * ((s32) in[8]) +
 | |
| +					    ((limb) ((s32) in2[8])) * ((s32) in[6]);
 | |
| +	output[15] =      ((limb) ((s32) in2[7])) * ((s32) in[8]) +
 | |
| +					    ((limb) ((s32) in2[8])) * ((s32) in[7]) +
 | |
| +					    ((limb) ((s32) in2[6])) * ((s32) in[9]) +
 | |
| +					    ((limb) ((s32) in2[9])) * ((s32) in[6]);
 | |
| +	output[16] =      ((limb) ((s32) in2[8])) * ((s32) in[8]) +
 | |
| +				       2 * (((limb) ((s32) in2[7])) * ((s32) in[9]) +
 | |
| +					    ((limb) ((s32) in2[9])) * ((s32) in[7]));
 | |
| +	output[17] =      ((limb) ((s32) in2[8])) * ((s32) in[9]) +
 | |
| +					    ((limb) ((s32) in2[9])) * ((s32) in[8]);
 | |
| +	output[18] = 2 *  ((limb) ((s32) in2[9])) * ((s32) in[9]);
 | |
| +}
 | |
| +
 | |
| +/* Reduce a long form to a short form by taking the input mod 2^255 - 19.
 | |
| + *
 | |
| + * On entry: |output[i]| < 14*2^54
 | |
| + * On exit: |output[0..8]| < 280*2^54 */
 | |
| +static void freduce_degree(limb *output)
 | |
| +{
 | |
| +	/* Each of these shifts and adds ends up multiplying the value by 19.
 | |
| +	 *
 | |
| +	 * For output[0..8], the absolute entry value is < 14*2^54 and we add, at
 | |
| +	 * most, 19*14*2^54 thus, on exit, |output[0..8]| < 280*2^54. */
 | |
| +	output[8] += output[18] << 4;
 | |
| +	output[8] += output[18] << 1;
 | |
| +	output[8] += output[18];
 | |
| +	output[7] += output[17] << 4;
 | |
| +	output[7] += output[17] << 1;
 | |
| +	output[7] += output[17];
 | |
| +	output[6] += output[16] << 4;
 | |
| +	output[6] += output[16] << 1;
 | |
| +	output[6] += output[16];
 | |
| +	output[5] += output[15] << 4;
 | |
| +	output[5] += output[15] << 1;
 | |
| +	output[5] += output[15];
 | |
| +	output[4] += output[14] << 4;
 | |
| +	output[4] += output[14] << 1;
 | |
| +	output[4] += output[14];
 | |
| +	output[3] += output[13] << 4;
 | |
| +	output[3] += output[13] << 1;
 | |
| +	output[3] += output[13];
 | |
| +	output[2] += output[12] << 4;
 | |
| +	output[2] += output[12] << 1;
 | |
| +	output[2] += output[12];
 | |
| +	output[1] += output[11] << 4;
 | |
| +	output[1] += output[11] << 1;
 | |
| +	output[1] += output[11];
 | |
| +	output[0] += output[10] << 4;
 | |
| +	output[0] += output[10] << 1;
 | |
| +	output[0] += output[10];
 | |
| +}
 | |
| +
 | |
| +#if (-1 & 3) != 3
 | |
| +#error "This code only works on a two's complement system"
 | |
| +#endif
 | |
| +
 | |
| +/* return v / 2^26, using only shifts and adds.
 | |
| + *
 | |
| + * On entry: v can take any value. */
 | |
| +static inline limb div_by_2_26(const limb v)
 | |
| +{
 | |
| +	/* High word of v; no shift needed. */
 | |
| +	const u32 highword = (u32) (((u64) v) >> 32);
 | |
| +	/* Set to all 1s if v was negative; else set to 0s. */
 | |
| +	const s32 sign = ((s32) highword) >> 31;
 | |
| +	/* Set to 0x3ffffff if v was negative; else set to 0. */
 | |
| +	const s32 roundoff = ((u32) sign) >> 6;
 | |
| +	/* Should return v / (1<<26) */
 | |
| +	return (v + roundoff) >> 26;
 | |
| +}
 | |
| +
 | |
| +/* return v / (2^25), using only shifts and adds.
 | |
| + *
 | |
| + * On entry: v can take any value. */
 | |
| +static inline limb div_by_2_25(const limb v)
 | |
| +{
 | |
| +	/* High word of v; no shift needed*/
 | |
| +	const u32 highword = (u32) (((u64) v) >> 32);
 | |
| +	/* Set to all 1s if v was negative; else set to 0s. */
 | |
| +	const s32 sign = ((s32) highword) >> 31;
 | |
| +	/* Set to 0x1ffffff if v was negative; else set to 0. */
 | |
| +	const s32 roundoff = ((u32) sign) >> 7;
 | |
| +	/* Should return v / (1<<25) */
 | |
| +	return (v + roundoff) >> 25;
 | |
| +}
 | |
| +
 | |
| +/* Reduce all coefficients of the short form input so that |x| < 2^26.
 | |
| + *
 | |
| + * On entry: |output[i]| < 280*2^54 */
 | |
| +static void freduce_coefficients(limb *output)
 | |
| +{
 | |
| +	unsigned i;
 | |
| +
 | |
| +	output[10] = 0;
 | |
| +
 | |
| +	for (i = 0; i < 10; i += 2) {
 | |
| +		limb over = div_by_2_26(output[i]);
 | |
| +		/* The entry condition (that |output[i]| < 280*2^54) means that over is, at
 | |
| +		 * most, 280*2^28 in the first iteration of this loop. This is added to the
 | |
| +		 * next limb and we can approximate the resulting bound of that limb by
 | |
| +		 * 281*2^54. */
 | |
| +		output[i] -= over << 26;
 | |
| +		output[i+1] += over;
 | |
| +
 | |
| +		/* For the first iteration, |output[i+1]| < 281*2^54, thus |over| <
 | |
| +		 * 281*2^29. When this is added to the next limb, the resulting bound can
 | |
| +		 * be approximated as 281*2^54.
 | |
| +		 *
 | |
| +		 * For subsequent iterations of the loop, 281*2^54 remains a conservative
 | |
| +		 * bound and no overflow occurs. */
 | |
| +		over = div_by_2_25(output[i+1]);
 | |
| +		output[i+1] -= over << 25;
 | |
| +		output[i+2] += over;
 | |
| +	}
 | |
| +	/* Now |output[10]| < 281*2^29 and all other coefficients are reduced. */
 | |
| +	output[0] += output[10] << 4;
 | |
| +	output[0] += output[10] << 1;
 | |
| +	output[0] += output[10];
 | |
| +
 | |
| +	output[10] = 0;
 | |
| +
 | |
| +	/* Now output[1..9] are reduced, and |output[0]| < 2^26 + 19*281*2^29
 | |
| +	 * So |over| will be no more than 2^16. */
 | |
| +	{
 | |
| +		limb over = div_by_2_26(output[0]);
 | |
| +		output[0] -= over << 26;
 | |
| +		output[1] += over;
 | |
| +	}
 | |
| +
 | |
| +	/* Now output[0,2..9] are reduced, and |output[1]| < 2^25 + 2^16 < 2^26. The
 | |
| +	 * bound on |output[1]| is sufficient to meet our needs. */
 | |
| +}
 | |
| +
 | |
| +/* A helpful wrapper around fproduct: output = in * in2.
 | |
| + *
 | |
| + * On entry: |in[i]| < 2^27 and |in2[i]| < 2^27.
 | |
| + *
 | |
| + * output must be distinct to both inputs. The output is reduced degree
 | |
| + * (indeed, one need only provide storage for 10 limbs) and |output[i]| < 2^26. */
 | |
| +static void fmul(limb *output, const limb *in, const limb *in2)
 | |
| +{
 | |
| +	limb t[19];
 | |
| +	fproduct(t, in, in2);
 | |
| +	/* |t[i]| < 14*2^54 */
 | |
| +	freduce_degree(t);
 | |
| +	freduce_coefficients(t);
 | |
| +	/* |t[i]| < 2^26 */
 | |
| +	memcpy(output, t, sizeof(limb) * 10);
 | |
| +}
 | |
| +
 | |
| +/* Square a number: output = in**2
 | |
| + *
 | |
| + * output must be distinct from the input. The inputs are reduced coefficient
 | |
| + * form, the output is not.
 | |
| + *
 | |
| + * output[x] <= 14 * the largest product of the input limbs. */
 | |
| +static void fsquare_inner(limb *output, const limb *in)
 | |
| +{
 | |
| +	output[0] =       ((limb) ((s32) in[0])) * ((s32) in[0]);
 | |
| +	output[1] =  2 *  ((limb) ((s32) in[0])) * ((s32) in[1]);
 | |
| +	output[2] =  2 * (((limb) ((s32) in[1])) * ((s32) in[1]) +
 | |
| +					    ((limb) ((s32) in[0])) * ((s32) in[2]));
 | |
| +	output[3] =  2 * (((limb) ((s32) in[1])) * ((s32) in[2]) +
 | |
| +					    ((limb) ((s32) in[0])) * ((s32) in[3]));
 | |
| +	output[4] =       ((limb) ((s32) in[2])) * ((s32) in[2]) +
 | |
| +				       4 *  ((limb) ((s32) in[1])) * ((s32) in[3]) +
 | |
| +				       2 *  ((limb) ((s32) in[0])) * ((s32) in[4]);
 | |
| +	output[5] =  2 * (((limb) ((s32) in[2])) * ((s32) in[3]) +
 | |
| +					    ((limb) ((s32) in[1])) * ((s32) in[4]) +
 | |
| +					    ((limb) ((s32) in[0])) * ((s32) in[5]));
 | |
| +	output[6] =  2 * (((limb) ((s32) in[3])) * ((s32) in[3]) +
 | |
| +					    ((limb) ((s32) in[2])) * ((s32) in[4]) +
 | |
| +					    ((limb) ((s32) in[0])) * ((s32) in[6]) +
 | |
| +				       2 *  ((limb) ((s32) in[1])) * ((s32) in[5]));
 | |
| +	output[7] =  2 * (((limb) ((s32) in[3])) * ((s32) in[4]) +
 | |
| +					    ((limb) ((s32) in[2])) * ((s32) in[5]) +
 | |
| +					    ((limb) ((s32) in[1])) * ((s32) in[6]) +
 | |
| +					    ((limb) ((s32) in[0])) * ((s32) in[7]));
 | |
| +	output[8] =       ((limb) ((s32) in[4])) * ((s32) in[4]) +
 | |
| +				       2 * (((limb) ((s32) in[2])) * ((s32) in[6]) +
 | |
| +					    ((limb) ((s32) in[0])) * ((s32) in[8]) +
 | |
| +				       2 * (((limb) ((s32) in[1])) * ((s32) in[7]) +
 | |
| +					    ((limb) ((s32) in[3])) * ((s32) in[5])));
 | |
| +	output[9] =  2 * (((limb) ((s32) in[4])) * ((s32) in[5]) +
 | |
| +					    ((limb) ((s32) in[3])) * ((s32) in[6]) +
 | |
| +					    ((limb) ((s32) in[2])) * ((s32) in[7]) +
 | |
| +					    ((limb) ((s32) in[1])) * ((s32) in[8]) +
 | |
| +					    ((limb) ((s32) in[0])) * ((s32) in[9]));
 | |
| +	output[10] = 2 * (((limb) ((s32) in[5])) * ((s32) in[5]) +
 | |
| +					    ((limb) ((s32) in[4])) * ((s32) in[6]) +
 | |
| +					    ((limb) ((s32) in[2])) * ((s32) in[8]) +
 | |
| +				       2 * (((limb) ((s32) in[3])) * ((s32) in[7]) +
 | |
| +					    ((limb) ((s32) in[1])) * ((s32) in[9])));
 | |
| +	output[11] = 2 * (((limb) ((s32) in[5])) * ((s32) in[6]) +
 | |
| +					    ((limb) ((s32) in[4])) * ((s32) in[7]) +
 | |
| +					    ((limb) ((s32) in[3])) * ((s32) in[8]) +
 | |
| +					    ((limb) ((s32) in[2])) * ((s32) in[9]));
 | |
| +	output[12] =      ((limb) ((s32) in[6])) * ((s32) in[6]) +
 | |
| +				       2 * (((limb) ((s32) in[4])) * ((s32) in[8]) +
 | |
| +				       2 * (((limb) ((s32) in[5])) * ((s32) in[7]) +
 | |
| +					    ((limb) ((s32) in[3])) * ((s32) in[9])));
 | |
| +	output[13] = 2 * (((limb) ((s32) in[6])) * ((s32) in[7]) +
 | |
| +					    ((limb) ((s32) in[5])) * ((s32) in[8]) +
 | |
| +					    ((limb) ((s32) in[4])) * ((s32) in[9]));
 | |
| +	output[14] = 2 * (((limb) ((s32) in[7])) * ((s32) in[7]) +
 | |
| +					    ((limb) ((s32) in[6])) * ((s32) in[8]) +
 | |
| +				       2 *  ((limb) ((s32) in[5])) * ((s32) in[9]));
 | |
| +	output[15] = 2 * (((limb) ((s32) in[7])) * ((s32) in[8]) +
 | |
| +					    ((limb) ((s32) in[6])) * ((s32) in[9]));
 | |
| +	output[16] =      ((limb) ((s32) in[8])) * ((s32) in[8]) +
 | |
| +				       4 *  ((limb) ((s32) in[7])) * ((s32) in[9]);
 | |
| +	output[17] = 2 *  ((limb) ((s32) in[8])) * ((s32) in[9]);
 | |
| +	output[18] = 2 *  ((limb) ((s32) in[9])) * ((s32) in[9]);
 | |
| +}
 | |
| +
 | |
| +/* fsquare sets output = in^2.
 | |
| + *
 | |
| + * On entry: The |in| argument is in reduced coefficients form and |in[i]| <
 | |
| + * 2^27.
 | |
| + *
 | |
| + * On exit: The |output| argument is in reduced coefficients form (indeed, one
 | |
| + * need only provide storage for 10 limbs) and |out[i]| < 2^26. */
 | |
| +static void fsquare(limb *output, const limb *in)
 | |
| +{
 | |
| +	limb t[19];
 | |
| +	fsquare_inner(t, in);
 | |
| +	/* |t[i]| < 14*2^54 because the largest product of two limbs will be <
 | |
| +	 * 2^(27+27) and fsquare_inner adds together, at most, 14 of those
 | |
| +	 * products. */
 | |
| +	freduce_degree(t);
 | |
| +	freduce_coefficients(t);
 | |
| +	/* |t[i]| < 2^26 */
 | |
| +	memcpy(output, t, sizeof(limb) * 10);
 | |
| +}
 | |
| +
 | |
| +/* Take a little-endian, 32-byte number and expand it into polynomial form */
 | |
| +static inline void fexpand(limb *output, const u8 *input)
 | |
| +{
 | |
| +#define F(n,start,shift,mask) \
 | |
| +	output[n] = ((((limb) input[start + 0]) | \
 | |
| +		      ((limb) input[start + 1]) << 8 | \
 | |
| +		      ((limb) input[start + 2]) << 16 | \
 | |
| +		      ((limb) input[start + 3]) << 24) >> shift) & mask;
 | |
| +	F(0, 0, 0, 0x3ffffff);
 | |
| +	F(1, 3, 2, 0x1ffffff);
 | |
| +	F(2, 6, 3, 0x3ffffff);
 | |
| +	F(3, 9, 5, 0x1ffffff);
 | |
| +	F(4, 12, 6, 0x3ffffff);
 | |
| +	F(5, 16, 0, 0x1ffffff);
 | |
| +	F(6, 19, 1, 0x3ffffff);
 | |
| +	F(7, 22, 3, 0x1ffffff);
 | |
| +	F(8, 25, 4, 0x3ffffff);
 | |
| +	F(9, 28, 6, 0x1ffffff);
 | |
| +#undef F
 | |
| +}
 | |
| +
 | |
| +#if (-32 >> 1) != -16
 | |
| +#error "This code only works when >> does sign-extension on negative numbers"
 | |
| +#endif
 | |
| +
 | |
| +/* s32_eq returns 0xffffffff iff a == b and zero otherwise. */
 | |
| +static s32 s32_eq(s32 a, s32 b)
 | |
| +{
 | |
| +	a = ~(a ^ b);
 | |
| +	a &= a << 16;
 | |
| +	a &= a << 8;
 | |
| +	a &= a << 4;
 | |
| +	a &= a << 2;
 | |
| +	a &= a << 1;
 | |
| +	return a >> 31;
 | |
| +}
 | |
| +
 | |
| +/* s32_gte returns 0xffffffff if a >= b and zero otherwise, where a and b are
 | |
| + * both non-negative. */
 | |
| +static s32 s32_gte(s32 a, s32 b)
 | |
| +{
 | |
| +	a -= b;
 | |
| +	/* a >= 0 iff a >= b. */
 | |
| +	return ~(a >> 31);
 | |
| +}
 | |
| +
 | |
| +/* Take a fully reduced polynomial form number and contract it into a
 | |
| + * little-endian, 32-byte array.
 | |
| + *
 | |
| + * On entry: |input_limbs[i]| < 2^26 */
 | |
| +static void fcontract(u8 *output, limb *input_limbs)
 | |
| +{
 | |
| +	int i;
 | |
| +	int j;
 | |
| +	s32 input[10];
 | |
| +	s32 mask;
 | |
| +
 | |
| +	/* |input_limbs[i]| < 2^26, so it's valid to convert to an s32. */
 | |
| +	for (i = 0; i < 10; i++) {
 | |
| +		input[i] = input_limbs[i];
 | |
| +	}
 | |
| +
 | |
| +	for (j = 0; j < 2; ++j) {
 | |
| +		for (i = 0; i < 9; ++i) {
 | |
| +			if ((i & 1) == 1) {
 | |
| +				/* This calculation is a time-invariant way to make input[i]
 | |
| +				 * non-negative by borrowing from the next-larger limb. */
 | |
| +				const s32 mask = input[i] >> 31;
 | |
| +				const s32 carry = -((input[i] & mask) >> 25);
 | |
| +				input[i] = input[i] + (carry << 25);
 | |
| +				input[i+1] = input[i+1] - carry;
 | |
| +			} else {
 | |
| +				const s32 mask = input[i] >> 31;
 | |
| +				const s32 carry = -((input[i] & mask) >> 26);
 | |
| +				input[i] = input[i] + (carry << 26);
 | |
| +				input[i+1] = input[i+1] - carry;
 | |
| +			}
 | |
| +		}
 | |
| +
 | |
| +		/* There's no greater limb for input[9] to borrow from, but we can multiply
 | |
| +		 * by 19 and borrow from input[0], which is valid mod 2^255-19. */
 | |
| +		{
 | |
| +			const s32 mask = input[9] >> 31;
 | |
| +			const s32 carry = -((input[9] & mask) >> 25);
 | |
| +			input[9] = input[9] + (carry << 25);
 | |
| +			input[0] = input[0] - (carry * 19);
 | |
| +		}
 | |
| +
 | |
| +		/* After the first iteration, input[1..9] are non-negative and fit within
 | |
| +		 * 25 or 26 bits, depending on position. However, input[0] may be
 | |
| +		 * negative. */
 | |
| +	}
 | |
| +
 | |
| +	/* The first borrow-propagation pass above ended with every limb
 | |
| +	   except (possibly) input[0] non-negative.
 | |
| +	   If input[0] was negative after the first pass, then it was because of a
 | |
| +	   carry from input[9]. On entry, input[9] < 2^26 so the carry was, at most,
 | |
| +	   one, since (2**26-1) >> 25 = 1. Thus input[0] >= -19.
 | |
| +	   In the second pass, each limb is decreased by at most one. Thus the second
 | |
| +	   borrow-propagation pass could only have wrapped around to decrease
 | |
| +	   input[0] again if the first pass left input[0] negative *and* input[1]
 | |
| +	   through input[9] were all zero.  In that case, input[1] is now 2^25 - 1,
 | |
| +	   and this last borrow-propagation step will leave input[1] non-negative. */
 | |
| +	{
 | |
| +		const s32 mask = input[0] >> 31;
 | |
| +		const s32 carry = -((input[0] & mask) >> 26);
 | |
| +		input[0] = input[0] + (carry << 26);
 | |
| +		input[1] = input[1] - carry;
 | |
| +	}
 | |
| +
 | |
| +	/* All input[i] are now non-negative. However, there might be values between
 | |
| +	 * 2^25 and 2^26 in a limb which is, nominally, 25 bits wide. */
 | |
| +	for (j = 0; j < 2; j++) {
 | |
| +		for (i = 0; i < 9; i++) {
 | |
| +			if ((i & 1) == 1) {
 | |
| +				const s32 carry = input[i] >> 25;
 | |
| +				input[i] &= 0x1ffffff;
 | |
| +				input[i+1] += carry;
 | |
| +			} else {
 | |
| +				const s32 carry = input[i] >> 26;
 | |
| +				input[i] &= 0x3ffffff;
 | |
| +				input[i+1] += carry;
 | |
| +			}
 | |
| +		}
 | |
| +
 | |
| +		{
 | |
| +			const s32 carry = input[9] >> 25;
 | |
| +			input[9] &= 0x1ffffff;
 | |
| +			input[0] += 19*carry;
 | |
| +		}
 | |
| +	}
 | |
| +
 | |
| +	/* If the first carry-chain pass, just above, ended up with a carry from
 | |
| +	 * input[9], and that caused input[0] to be out-of-bounds, then input[0] was
 | |
| +	 * < 2^26 + 2*19, because the carry was, at most, two.
 | |
| +	 *
 | |
| +	 * If the second pass carried from input[9] again then input[0] is < 2*19 and
 | |
| +	 * the input[9] -> input[0] carry didn't push input[0] out of bounds. */
 | |
| +
 | |
| +	/* It still remains the case that input might be between 2^255-19 and 2^255.
 | |
| +	 * In this case, input[1..9] must take their maximum value and input[0] must
 | |
| +	 * be >= (2^255-19) & 0x3ffffff, which is 0x3ffffed. */
 | |
| +	mask = s32_gte(input[0], 0x3ffffed);
 | |
| +	for (i = 1; i < 10; i++) {
 | |
| +		if ((i & 1) == 1) {
 | |
| +			mask &= s32_eq(input[i], 0x1ffffff);
 | |
| +		} else {
 | |
| +			mask &= s32_eq(input[i], 0x3ffffff);
 | |
| +		}
 | |
| +	}
 | |
| +
 | |
| +	/* mask is either 0xffffffff (if input >= 2^255-19) and zero otherwise. Thus
 | |
| +	 * this conditionally subtracts 2^255-19. */
 | |
| +	input[0] -= mask & 0x3ffffed;
 | |
| +
 | |
| +	for (i = 1; i < 10; i++) {
 | |
| +		if ((i & 1) == 1) {
 | |
| +			input[i] -= mask & 0x1ffffff;
 | |
| +		} else {
 | |
| +			input[i] -= mask & 0x3ffffff;
 | |
| +		}
 | |
| +	}
 | |
| +
 | |
| +	input[1] <<= 2;
 | |
| +	input[2] <<= 3;
 | |
| +	input[3] <<= 5;
 | |
| +	input[4] <<= 6;
 | |
| +	input[6] <<= 1;
 | |
| +	input[7] <<= 3;
 | |
| +	input[8] <<= 4;
 | |
| +	input[9] <<= 6;
 | |
| +#define F(i, s) \
 | |
| +	output[s+0] |=  input[i] & 0xff; \
 | |
| +	output[s+1]  = (input[i] >> 8) & 0xff; \
 | |
| +	output[s+2]  = (input[i] >> 16) & 0xff; \
 | |
| +	output[s+3]  = (input[i] >> 24) & 0xff;
 | |
| +	output[0] = 0;
 | |
| +	output[16] = 0;
 | |
| +	F(0,0);
 | |
| +	F(1,3);
 | |
| +	F(2,6);
 | |
| +	F(3,9);
 | |
| +	F(4,12);
 | |
| +	F(5,16);
 | |
| +	F(6,19);
 | |
| +	F(7,22);
 | |
| +	F(8,25);
 | |
| +	F(9,28);
 | |
| +#undef F
 | |
| +}
 | |
| +
 | |
| +/* Conditionally swap two reduced-form limb arrays if 'iswap' is 1, but leave
 | |
| + * them unchanged if 'iswap' is 0.  Runs in data-invariant time to avoid
 | |
| + * side-channel attacks.
 | |
| + *
 | |
| + * NOTE that this function requires that 'iswap' be 1 or 0; other values give
 | |
| + * wrong results.  Also, the two limb arrays must be in reduced-coefficient,
 | |
| + * reduced-degree form: the values in a[10..19] or b[10..19] aren't swapped,
 | |
| + * and all all values in a[0..9],b[0..9] must have magnitude less than
 | |
| + * INT32_MAX. */
 | |
| +static void swap_conditional(limb a[19], limb b[19], limb iswap)
 | |
| +{
 | |
| +	unsigned i;
 | |
| +	const s32 swap = (s32) -iswap;
 | |
| +
 | |
| +	for (i = 0; i < 10; ++i) {
 | |
| +		const s32 x = swap & ( ((s32)a[i]) ^ ((s32)b[i]) );
 | |
| +		a[i] = ((s32)a[i]) ^ x;
 | |
| +		b[i] = ((s32)b[i]) ^ x;
 | |
| +	}
 | |
| +}
 | |
| +
 | |
| +static void crecip(limb *out, const limb *z)
 | |
| +{
 | |
| +	limb z2[10];
 | |
| +	limb z9[10];
 | |
| +	limb z11[10];
 | |
| +	limb z2_5_0[10];
 | |
| +	limb z2_10_0[10];
 | |
| +	limb z2_20_0[10];
 | |
| +	limb z2_50_0[10];
 | |
| +	limb z2_100_0[10];
 | |
| +	limb t0[10];
 | |
| +	limb t1[10];
 | |
| +	int i;
 | |
| +
 | |
| +	/* 2 */ fsquare(z2,z);
 | |
| +	/* 4 */ fsquare(t1,z2);
 | |
| +	/* 8 */ fsquare(t0,t1);
 | |
| +	/* 9 */ fmul(z9,t0,z);
 | |
| +	/* 11 */ fmul(z11,z9,z2);
 | |
| +	/* 22 */ fsquare(t0,z11);
 | |
| +	/* 2^5 - 2^0 = 31 */ fmul(z2_5_0,t0,z9);
 | |
| +
 | |
| +	/* 2^6 - 2^1 */ fsquare(t0,z2_5_0);
 | |
| +	/* 2^7 - 2^2 */ fsquare(t1,t0);
 | |
| +	/* 2^8 - 2^3 */ fsquare(t0,t1);
 | |
| +	/* 2^9 - 2^4 */ fsquare(t1,t0);
 | |
| +	/* 2^10 - 2^5 */ fsquare(t0,t1);
 | |
| +	/* 2^10 - 2^0 */ fmul(z2_10_0,t0,z2_5_0);
 | |
| +
 | |
| +	/* 2^11 - 2^1 */ fsquare(t0,z2_10_0);
 | |
| +	/* 2^12 - 2^2 */ fsquare(t1,t0);
 | |
| +	/* 2^20 - 2^10 */ for (i = 2; i < 10; i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
 | |
| +	/* 2^20 - 2^0 */ fmul(z2_20_0,t1,z2_10_0);
 | |
| +
 | |
| +	/* 2^21 - 2^1 */ fsquare(t0,z2_20_0);
 | |
| +	/* 2^22 - 2^2 */ fsquare(t1,t0);
 | |
| +	/* 2^40 - 2^20 */ for (i = 2; i < 20; i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
 | |
| +	/* 2^40 - 2^0 */ fmul(t0,t1,z2_20_0);
 | |
| +
 | |
| +	/* 2^41 - 2^1 */ fsquare(t1,t0);
 | |
| +	/* 2^42 - 2^2 */ fsquare(t0,t1);
 | |
| +	/* 2^50 - 2^10 */ for (i = 2; i < 10; i += 2) { fsquare(t1,t0); fsquare(t0,t1); }
 | |
| +	/* 2^50 - 2^0 */ fmul(z2_50_0,t0,z2_10_0);
 | |
| +
 | |
| +	/* 2^51 - 2^1 */ fsquare(t0,z2_50_0);
 | |
| +	/* 2^52 - 2^2 */ fsquare(t1,t0);
 | |
| +	/* 2^100 - 2^50 */ for (i = 2; i < 50; i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
 | |
| +	/* 2^100 - 2^0 */ fmul(z2_100_0,t1,z2_50_0);
 | |
| +
 | |
| +	/* 2^101 - 2^1 */ fsquare(t1,z2_100_0);
 | |
| +	/* 2^102 - 2^2 */ fsquare(t0,t1);
 | |
| +	/* 2^200 - 2^100 */ for (i = 2; i < 100; i += 2) { fsquare(t1,t0); fsquare(t0,t1); }
 | |
| +	/* 2^200 - 2^0 */ fmul(t1,t0,z2_100_0);
 | |
| +
 | |
| +	/* 2^201 - 2^1 */ fsquare(t0,t1);
 | |
| +	/* 2^202 - 2^2 */ fsquare(t1,t0);
 | |
| +	/* 2^250 - 2^50 */ for (i = 2; i < 50; i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
 | |
| +	/* 2^250 - 2^0 */ fmul(t0,t1,z2_50_0);
 | |
| +
 | |
| +	/* 2^251 - 2^1 */ fsquare(t1,t0);
 | |
| +	/* 2^252 - 2^2 */ fsquare(t0,t1);
 | |
| +	/* 2^253 - 2^3 */ fsquare(t1,t0);
 | |
| +	/* 2^254 - 2^4 */ fsquare(t0,t1);
 | |
| +	/* 2^255 - 2^5 */ fsquare(t1,t0);
 | |
| +	/* 2^255 - 21 */ fmul(out,t1,z11);
 | |
| +}
 | |
| +
 | |
| +
 | |
| +#ifdef ARCH_HAS_SEPARATE_IRQ_STACK
 | |
| +/* Input: Q, Q', Q-Q'
 | |
| + * Output: 2Q, Q+Q'
 | |
| + *
 | |
| + *   x2 z3: long form
 | |
| + *   x3 z3: long form
 | |
| + *   x z: short form, destroyed
 | |
| + *   xprime zprime: short form, destroyed
 | |
| + *   qmqp: short form, preserved
 | |
| + *
 | |
| + * On entry and exit, the absolute value of the limbs of all inputs and outputs
 | |
| + * are < 2^26. */
 | |
| +static void fmonty(limb *x2, limb *z2,  /* output 2Q */
 | |
| +		   limb *x3, limb *z3,  /* output Q + Q' */
 | |
| +		   limb *x, limb *z,    /* input Q */
 | |
| +		   limb *xprime, limb *zprime,  /* input Q' */
 | |
| +		   const limb *qmqp /* input Q - Q' */)
 | |
| +{
 | |
| +	limb origx[10], origxprime[10], zzz[19], xx[19], zz[19], xxprime[19],
 | |
| +				zzprime[19], zzzprime[19], xxxprime[19];
 | |
| +
 | |
| +	memcpy(origx, x, 10 * sizeof(limb));
 | |
| +	fsum(x, z);
 | |
| +	/* |x[i]| < 2^27 */
 | |
| +	fdifference(z, origx);  /* does x - z */
 | |
| +	/* |z[i]| < 2^27 */
 | |
| +
 | |
| +	memcpy(origxprime, xprime, sizeof(limb) * 10);
 | |
| +	fsum(xprime, zprime);
 | |
| +	/* |xprime[i]| < 2^27 */
 | |
| +	fdifference(zprime, origxprime);
 | |
| +	/* |zprime[i]| < 2^27 */
 | |
| +	fproduct(xxprime, xprime, z);
 | |
| +	/* |xxprime[i]| < 14*2^54: the largest product of two limbs will be <
 | |
| +	 * 2^(27+27) and fproduct adds together, at most, 14 of those products.
 | |
| +	 * (Approximating that to 2^58 doesn't work out.) */
 | |
| +	fproduct(zzprime, x, zprime);
 | |
| +	/* |zzprime[i]| < 14*2^54 */
 | |
| +	freduce_degree(xxprime);
 | |
| +	freduce_coefficients(xxprime);
 | |
| +	/* |xxprime[i]| < 2^26 */
 | |
| +	freduce_degree(zzprime);
 | |
| +	freduce_coefficients(zzprime);
 | |
| +	/* |zzprime[i]| < 2^26 */
 | |
| +	memcpy(origxprime, xxprime, sizeof(limb) * 10);
 | |
| +	fsum(xxprime, zzprime);
 | |
| +	/* |xxprime[i]| < 2^27 */
 | |
| +	fdifference(zzprime, origxprime);
 | |
| +	/* |zzprime[i]| < 2^27 */
 | |
| +	fsquare(xxxprime, xxprime);
 | |
| +	/* |xxxprime[i]| < 2^26 */
 | |
| +	fsquare(zzzprime, zzprime);
 | |
| +	/* |zzzprime[i]| < 2^26 */
 | |
| +	fproduct(zzprime, zzzprime, qmqp);
 | |
| +	/* |zzprime[i]| < 14*2^52 */
 | |
| +	freduce_degree(zzprime);
 | |
| +	freduce_coefficients(zzprime);
 | |
| +	/* |zzprime[i]| < 2^26 */
 | |
| +	memcpy(x3, xxxprime, sizeof(limb) * 10);
 | |
| +	memcpy(z3, zzprime, sizeof(limb) * 10);
 | |
| +
 | |
| +	fsquare(xx, x);
 | |
| +	/* |xx[i]| < 2^26 */
 | |
| +	fsquare(zz, z);
 | |
| +	/* |zz[i]| < 2^26 */
 | |
| +	fproduct(x2, xx, zz);
 | |
| +	/* |x2[i]| < 14*2^52 */
 | |
| +	freduce_degree(x2);
 | |
| +	freduce_coefficients(x2);
 | |
| +	/* |x2[i]| < 2^26 */
 | |
| +	fdifference(zz, xx);  // does zz = xx - zz
 | |
| +	/* |zz[i]| < 2^27 */
 | |
| +	memset(zzz + 10, 0, sizeof(limb) * 9);
 | |
| +	fscalar_product(zzz, zz, 121665);
 | |
| +	/* |zzz[i]| < 2^(27+17) */
 | |
| +	/* No need to call freduce_degree here:
 | |
| +		 fscalar_product doesn't increase the degree of its input. */
 | |
| +	freduce_coefficients(zzz);
 | |
| +	/* |zzz[i]| < 2^26 */
 | |
| +	fsum(zzz, xx);
 | |
| +	/* |zzz[i]| < 2^27 */
 | |
| +	fproduct(z2, zz, zzz);
 | |
| +	/* |z2[i]| < 14*2^(26+27) */
 | |
| +	freduce_degree(z2);
 | |
| +	freduce_coefficients(z2);
 | |
| +	/* |z2|i| < 2^26 */
 | |
| +}
 | |
| +
 | |
| +/* Calculates nQ where Q is the x-coordinate of a point on the curve
 | |
| + *
 | |
| + *   resultx/resultz: the x coordinate of the resulting curve point (short form)
 | |
| + *   n: a little endian, 32-byte number
 | |
| + *   q: a point of the curve (short form) */
 | |
| +static void cmult(limb *resultx, limb *resultz, const u8 *n, const limb *q)
 | |
| +{
 | |
| +	limb a[19] = {0}, b[19] = {1}, c[19] = {1}, d[19] = {0};
 | |
| +	limb *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t;
 | |
| +	limb e[19] = {0}, f[19] = {1}, g[19] = {0}, h[19] = {1};
 | |
| +	limb *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h;
 | |
| +
 | |
| +	unsigned i, j;
 | |
| +
 | |
| +	memcpy(nqpqx, q, sizeof(limb) * 10);
 | |
| +
 | |
| +	for (i = 0; i < 32; ++i) {
 | |
| +		u8 byte = n[31 - i];
 | |
| +		for (j = 0; j < 8; ++j) {
 | |
| +			const limb bit = byte >> 7;
 | |
| +
 | |
| +			swap_conditional(nqx, nqpqx, bit);
 | |
| +			swap_conditional(nqz, nqpqz, bit);
 | |
| +			fmonty(nqx2, nqz2,
 | |
| +			       nqpqx2, nqpqz2,
 | |
| +			       nqx, nqz,
 | |
| +			       nqpqx, nqpqz,
 | |
| +			       q);
 | |
| +			swap_conditional(nqx2, nqpqx2, bit);
 | |
| +			swap_conditional(nqz2, nqpqz2, bit);
 | |
| +
 | |
| +			t = nqx;
 | |
| +			nqx = nqx2;
 | |
| +			nqx2 = t;
 | |
| +			t = nqz;
 | |
| +			nqz = nqz2;
 | |
| +			nqz2 = t;
 | |
| +			t = nqpqx;
 | |
| +			nqpqx = nqpqx2;
 | |
| +			nqpqx2 = t;
 | |
| +			t = nqpqz;
 | |
| +			nqpqz = nqpqz2;
 | |
| +			nqpqz2 = t;
 | |
| +
 | |
| +			byte <<= 1;
 | |
| +		}
 | |
| +	}
 | |
| +
 | |
| +	memcpy(resultx, nqx, sizeof(limb) * 10);
 | |
| +	memcpy(resultz, nqz, sizeof(limb) * 10);
 | |
| +}
 | |
| +
 | |
| +bool curve25519(u8 mypublic[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE], const u8 basepoint[CURVE25519_POINT_SIZE])
 | |
| +{
 | |
| +#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && defined(CONFIG_ARM)
 | |
| +	if (curve25519_use_neon && may_use_simd()) {
 | |
| +		kernel_neon_begin();
 | |
| +		curve25519_asm_neon(mypublic, secret, basepoint);
 | |
| +		kernel_neon_end();
 | |
| +	} else
 | |
| +#endif
 | |
| +	{
 | |
| +		limb bp[10], x[10], z[11], zmone[10];
 | |
| +		u8 e[32];
 | |
| +
 | |
| +		memcpy(e, secret, 32);
 | |
| +		normalize_secret(e);
 | |
| +
 | |
| +		fexpand(bp, basepoint);
 | |
| +		cmult(x, z, e, bp);
 | |
| +		crecip(zmone, z);
 | |
| +		fmul(z, x, zmone);
 | |
| +		fcontract(mypublic, z);
 | |
| +
 | |
| +		memzero_explicit(e, sizeof(e));
 | |
| +		memzero_explicit(bp, sizeof(bp));
 | |
| +		memzero_explicit(x, sizeof(x));
 | |
| +		memzero_explicit(z, sizeof(z));
 | |
| +		memzero_explicit(zmone, sizeof(zmone));
 | |
| +	}
 | |
| +	return crypto_memneq(mypublic, null_point, CURVE25519_POINT_SIZE);
 | |
| +}
 | |
| +#else
 | |
| +struct other_stack {
 | |
| +	limb origx[10], origxprime[10], zzz[19], xx[19], zz[19], xxprime[19], zzprime[19], zzzprime[19], xxxprime[19];
 | |
| +	limb a[19], b[19], c[19], d[19], e[19], f[19], g[19], h[19];
 | |
| +	limb bp[10], x[10], z[11], zmone[10];
 | |
| +	u8 ee[32];
 | |
| +};
 | |
| +
 | |
| +/* Input: Q, Q', Q-Q'
 | |
| + * Output: 2Q, Q+Q'
 | |
| + *
 | |
| + *   x2 z3: long form
 | |
| + *   x3 z3: long form
 | |
| + *   x z: short form, destroyed
 | |
| + *   xprime zprime: short form, destroyed
 | |
| + *   qmqp: short form, preserved
 | |
| + *
 | |
| + * On entry and exit, the absolute value of the limbs of all inputs and outputs
 | |
| + * are < 2^26. */
 | |
| +static void fmonty(struct other_stack *s,
 | |
| +		   limb *x2, limb *z2,  /* output 2Q */
 | |
| +		   limb *x3, limb *z3,  /* output Q + Q' */
 | |
| +		   limb *x, limb *z,    /* input Q */
 | |
| +		   limb *xprime, limb *zprime,  /* input Q' */
 | |
| +		   const limb *qmqp /* input Q - Q' */)
 | |
| +{
 | |
| +	memcpy(s->origx, x, 10 * sizeof(limb));
 | |
| +	fsum(x, z);
 | |
| +	/* |x[i]| < 2^27 */
 | |
| +	fdifference(z, s->origx);  /* does x - z */
 | |
| +	/* |z[i]| < 2^27 */
 | |
| +
 | |
| +	memcpy(s->origxprime, xprime, sizeof(limb) * 10);
 | |
| +	fsum(xprime, zprime);
 | |
| +	/* |xprime[i]| < 2^27 */
 | |
| +	fdifference(zprime, s->origxprime);
 | |
| +	/* |zprime[i]| < 2^27 */
 | |
| +	fproduct(s->xxprime, xprime, z);
 | |
| +	/* |s->xxprime[i]| < 14*2^54: the largest product of two limbs will be <
 | |
| +	 * 2^(27+27) and fproduct adds together, at most, 14 of those products.
 | |
| +	 * (Approximating that to 2^58 doesn't work out.) */
 | |
| +	fproduct(s->zzprime, x, zprime);
 | |
| +	/* |s->zzprime[i]| < 14*2^54 */
 | |
| +	freduce_degree(s->xxprime);
 | |
| +	freduce_coefficients(s->xxprime);
 | |
| +	/* |s->xxprime[i]| < 2^26 */
 | |
| +	freduce_degree(s->zzprime);
 | |
| +	freduce_coefficients(s->zzprime);
 | |
| +	/* |s->zzprime[i]| < 2^26 */
 | |
| +	memcpy(s->origxprime, s->xxprime, sizeof(limb) * 10);
 | |
| +	fsum(s->xxprime, s->zzprime);
 | |
| +	/* |s->xxprime[i]| < 2^27 */
 | |
| +	fdifference(s->zzprime, s->origxprime);
 | |
| +	/* |s->zzprime[i]| < 2^27 */
 | |
| +	fsquare(s->xxxprime, s->xxprime);
 | |
| +	/* |s->xxxprime[i]| < 2^26 */
 | |
| +	fsquare(s->zzzprime, s->zzprime);
 | |
| +	/* |s->zzzprime[i]| < 2^26 */
 | |
| +	fproduct(s->zzprime, s->zzzprime, qmqp);
 | |
| +	/* |s->zzprime[i]| < 14*2^52 */
 | |
| +	freduce_degree(s->zzprime);
 | |
| +	freduce_coefficients(s->zzprime);
 | |
| +	/* |s->zzprime[i]| < 2^26 */
 | |
| +	memcpy(x3, s->xxxprime, sizeof(limb) * 10);
 | |
| +	memcpy(z3, s->zzprime, sizeof(limb) * 10);
 | |
| +
 | |
| +	fsquare(s->xx, x);
 | |
| +	/* |s->xx[i]| < 2^26 */
 | |
| +	fsquare(s->zz, z);
 | |
| +	/* |s->zz[i]| < 2^26 */
 | |
| +	fproduct(x2, s->xx, s->zz);
 | |
| +	/* |x2[i]| < 14*2^52 */
 | |
| +	freduce_degree(x2);
 | |
| +	freduce_coefficients(x2);
 | |
| +	/* |x2[i]| < 2^26 */
 | |
| +	fdifference(s->zz, s->xx);  // does s->zz = s->xx - s->zz
 | |
| +	/* |s->zz[i]| < 2^27 */
 | |
| +	memset(s->zzz + 10, 0, sizeof(limb) * 9);
 | |
| +	fscalar_product(s->zzz, s->zz, 121665);
 | |
| +	/* |s->zzz[i]| < 2^(27+17) */
 | |
| +	/* No need to call freduce_degree here:
 | |
| +		 fscalar_product doesn't increase the degree of its input. */
 | |
| +	freduce_coefficients(s->zzz);
 | |
| +	/* |s->zzz[i]| < 2^26 */
 | |
| +	fsum(s->zzz, s->xx);
 | |
| +	/* |s->zzz[i]| < 2^27 */
 | |
| +	fproduct(z2, s->zz, s->zzz);
 | |
| +	/* |z2[i]| < 14*2^(26+27) */
 | |
| +	freduce_degree(z2);
 | |
| +	freduce_coefficients(z2);
 | |
| +	/* |z2|i| < 2^26 */
 | |
| +}
 | |
| +
 | |
| +/* Calculates nQ where Q is the x-coordinate of a point on the curve
 | |
| + *
 | |
| + *   resultx/resultz: the x coordinate of the resulting curve point (short form)
 | |
| + *   n: a little endian, 32-byte number
 | |
| + *   q: a point of the curve (short form) */
 | |
| +static void cmult(struct other_stack *s, limb *resultx, limb *resultz, const u8 *n, const limb *q)
 | |
| +{
 | |
| +	unsigned i, j;
 | |
| +	limb *nqpqx = s->a, *nqpqz = s->b, *nqx = s->c, *nqz = s->d, *t;
 | |
| +	limb *nqpqx2 = s->e, *nqpqz2 = s->f, *nqx2 = s->g, *nqz2 = s->h;
 | |
| +
 | |
| +	*nqpqz = *nqx = *nqpqz2 = *nqz2 = 1;
 | |
| +	memcpy(nqpqx, q, sizeof(limb) * 10);
 | |
| +
 | |
| +	for (i = 0; i < 32; ++i) {
 | |
| +		u8 byte = n[31 - i];
 | |
| +		for (j = 0; j < 8; ++j) {
 | |
| +			const limb bit = byte >> 7;
 | |
| +
 | |
| +			swap_conditional(nqx, nqpqx, bit);
 | |
| +			swap_conditional(nqz, nqpqz, bit);
 | |
| +			fmonty(s,
 | |
| +			       nqx2, nqz2,
 | |
| +			       nqpqx2, nqpqz2,
 | |
| +			       nqx, nqz,
 | |
| +			       nqpqx, nqpqz,
 | |
| +			       q);
 | |
| +			swap_conditional(nqx2, nqpqx2, bit);
 | |
| +			swap_conditional(nqz2, nqpqz2, bit);
 | |
| +
 | |
| +			t = nqx;
 | |
| +			nqx = nqx2;
 | |
| +			nqx2 = t;
 | |
| +			t = nqz;
 | |
| +			nqz = nqz2;
 | |
| +			nqz2 = t;
 | |
| +			t = nqpqx;
 | |
| +			nqpqx = nqpqx2;
 | |
| +			nqpqx2 = t;
 | |
| +			t = nqpqz;
 | |
| +			nqpqz = nqpqz2;
 | |
| +			nqpqz2 = t;
 | |
| +
 | |
| +			byte <<= 1;
 | |
| +		}
 | |
| +	}
 | |
| +
 | |
| +	memcpy(resultx, nqx, sizeof(limb) * 10);
 | |
| +	memcpy(resultz, nqz, sizeof(limb) * 10);
 | |
| +}
 | |
| +
 | |
| +bool curve25519(u8 mypublic[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE], const u8 basepoint[CURVE25519_POINT_SIZE])
 | |
| +{
 | |
| +#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && defined(CONFIG_ARM)
 | |
| +	if (curve25519_use_neon && may_use_simd()) {
 | |
| +		kernel_neon_begin();
 | |
| +		curve25519_asm_neon(mypublic, secret, basepoint);
 | |
| +		kernel_neon_end();
 | |
| +	} else
 | |
| +#endif
 | |
| +	{
 | |
| +		struct other_stack *s = kzalloc(sizeof(struct other_stack), GFP_KERNEL);
 | |
| +		if (unlikely(!s))
 | |
| +			return false;
 | |
| +
 | |
| +		memcpy(s->ee, secret, 32);
 | |
| +		normalize_secret(s->ee);
 | |
| +
 | |
| +		fexpand(s->bp, basepoint);
 | |
| +		cmult(s, s->x, s->z, s->ee, s->bp);
 | |
| +		crecip(s->zmone, s->z);
 | |
| +		fmul(s->z, s->x, s->zmone);
 | |
| +		fcontract(mypublic, s->z);
 | |
| +
 | |
| +		kzfree(s);
 | |
| +	}
 | |
| +	return crypto_memneq(mypublic, null_point, CURVE25519_POINT_SIZE);
 | |
| +}
 | |
| +#endif
 | |
| +bool curve25519_generate_public(u8 pub[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE])
 | |
| +{
 | |
| +	static const u8 basepoint[CURVE25519_POINT_SIZE] __aligned(32) = { 9 };
 | |
| +	return curve25519(pub, secret, basepoint);
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
| +void curve25519_generate_secret(u8 secret[CURVE25519_POINT_SIZE])
 | |
| +{
 | |
| +	get_random_bytes_wait(secret, CURVE25519_POINT_SIZE);
 | |
| +	normalize_secret(secret);
 | |
| +}
 | |
| +
 | |
| +#include "../selftest/curve25519.h"
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/crypto/blake2s.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,38 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#ifndef BLAKE2S_H
 | |
| +#define BLAKE2S_H
 | |
| +
 | |
| +#include <linux/types.h>
 | |
| +
 | |
| +enum blake2s_lengths {
 | |
| +	BLAKE2S_BLOCKBYTES = 64,
 | |
| +	BLAKE2S_OUTBYTES = 32,
 | |
| +	BLAKE2S_KEYBYTES = 32
 | |
| +};
 | |
| +
 | |
| +struct blake2s_state {
 | |
| +	u32 h[8];
 | |
| +	u32 t[2];
 | |
| +	u32 f[2];
 | |
| +	u8 buf[2 * BLAKE2S_BLOCKBYTES];
 | |
| +	size_t buflen;
 | |
| +	u8 last_node;
 | |
| +};
 | |
| +
 | |
| +void blake2s(u8 *out, const u8 *in, const u8 *key, const u8 outlen, const u64 inlen, const u8 keylen);
 | |
| +
 | |
| +void blake2s_init(struct blake2s_state *state, const u8 outlen);
 | |
| +void blake2s_init_key(struct blake2s_state *state, const u8 outlen, const void *key, const u8 keylen);
 | |
| +void blake2s_update(struct blake2s_state *state, const u8 *in, u64 inlen);
 | |
| +void blake2s_final(struct blake2s_state *state, u8 *out, u8 outlen);
 | |
| +
 | |
| +void blake2s_hmac(u8 *out, const u8 *in, const u8 *key, const u8 outlen, const u64 inlen, const u64 keylen);
 | |
| +
 | |
| +void blake2s_fpu_init(void);
 | |
| +
 | |
| +#ifdef DEBUG
 | |
| +bool blake2s_selftest(void);
 | |
| +#endif
 | |
| +
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/crypto/chacha20poly1305.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,88 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#ifndef CHACHA20POLY1305_H
 | |
| +#define CHACHA20POLY1305_H
 | |
| +
 | |
| +#include <linux/types.h>
 | |
| +
 | |
| +struct scatterlist;
 | |
| +
 | |
| +enum chacha20poly1305_lengths {
 | |
| +	XCHACHA20POLY1305_NONCELEN = 24,
 | |
| +	CHACHA20POLY1305_KEYLEN = 32,
 | |
| +	CHACHA20POLY1305_AUTHTAGLEN = 16
 | |
| +};
 | |
| +
 | |
| +void chacha20poly1305_fpu_init(void);
 | |
| +
 | |
| +void chacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
 | |
| +			      const u8 *ad, const size_t ad_len,
 | |
| +			      const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN]);
 | |
| +
 | |
| +bool __must_check chacha20poly1305_encrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len,
 | |
| +				 const u8 *ad, const size_t ad_len,
 | |
| +				 const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN],
 | |
| +				 bool have_simd);
 | |
| +
 | |
| +bool __must_check chacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
 | |
| +			      const u8 *ad, const size_t ad_len,
 | |
| +			      const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN]);
 | |
| +
 | |
| +bool __must_check chacha20poly1305_decrypt_sg(struct scatterlist *dst, struct scatterlist *src, const size_t src_len,
 | |
| +				 const u8 *ad, const size_t ad_len,
 | |
| +				 const u64 nonce, const u8 key[CHACHA20POLY1305_KEYLEN]);
 | |
| +
 | |
| +void xchacha20poly1305_encrypt(u8 *dst, const u8 *src, const size_t src_len,
 | |
| +			       const u8 *ad, const size_t ad_len,
 | |
| +			       const u8 nonce[XCHACHA20POLY1305_NONCELEN],
 | |
| +			       const u8 key[CHACHA20POLY1305_KEYLEN]);
 | |
| +
 | |
| +bool __must_check xchacha20poly1305_decrypt(u8 *dst, const u8 *src, const size_t src_len,
 | |
| +			       const u8 *ad, const size_t ad_len,
 | |
| +			       const u8 nonce[XCHACHA20POLY1305_NONCELEN],
 | |
| +			       const u8 key[CHACHA20POLY1305_KEYLEN]);
 | |
| +
 | |
| +#if defined(CONFIG_X86_64)
 | |
| +#include <linux/version.h>
 | |
| +#include <asm/fpu/api.h>
 | |
| +#include <asm/simd.h>
 | |
| +#elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON)
 | |
| +#include <asm/neon.h>
 | |
| +#include <asm/simd.h>
 | |
| +#endif
 | |
| +
 | |
| +static inline bool chacha20poly1305_init_simd(void)
 | |
| +{
 | |
| +	bool have_simd = false;
 | |
| +#if defined(CONFIG_X86_64)
 | |
| +	have_simd = irq_fpu_usable();
 | |
| +	if (have_simd)
 | |
| +		kernel_fpu_begin();
 | |
| +#elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON)
 | |
| +#if defined(CONFIG_ARM64)
 | |
| +	have_simd = true; /* ARM64 supports NEON in any context. */
 | |
| +#elif defined(CONFIG_ARM)
 | |
| +	have_simd = may_use_simd(); /* ARM doesn't support NEON in interrupt context. */
 | |
| +#endif
 | |
| +	if (have_simd)
 | |
| +		kernel_neon_begin();
 | |
| +#endif
 | |
| +	return have_simd;
 | |
| +}
 | |
| +
 | |
| +static inline void chacha20poly1305_deinit_simd(bool was_on)
 | |
| +{
 | |
| +#if defined(CONFIG_X86_64)
 | |
| +	if (was_on)
 | |
| +		kernel_fpu_end();
 | |
| +#elif IS_ENABLED(CONFIG_KERNEL_MODE_NEON)
 | |
| +	if (was_on)
 | |
| +		kernel_neon_end();
 | |
| +#endif
 | |
| +}
 | |
| +
 | |
| +#ifdef DEBUG
 | |
| +bool chacha20poly1305_selftest(void);
 | |
| +#endif
 | |
| +
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/crypto/curve25519.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,22 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#ifndef CURVE25519_H
 | |
| +#define CURVE25519_H
 | |
| +
 | |
| +#include <linux/types.h>
 | |
| +
 | |
| +enum curve25519_lengths {
 | |
| +	CURVE25519_POINT_SIZE = 32
 | |
| +};
 | |
| +
 | |
| +bool __must_check curve25519(u8 mypublic[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE], const u8 basepoint[CURVE25519_POINT_SIZE]);
 | |
| +void curve25519_generate_secret(u8 secret[CURVE25519_POINT_SIZE]);
 | |
| +bool __must_check curve25519_generate_public(u8 pub[CURVE25519_POINT_SIZE], const u8 secret[CURVE25519_POINT_SIZE]);
 | |
| +
 | |
| +void curve25519_fpu_init(void);
 | |
| +
 | |
| +#ifdef DEBUG
 | |
| +bool curve25519_selftest(void);
 | |
| +#endif
 | |
| +
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/crypto/blake2s-avx-x86_64.S	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,576 @@
 | |
| +/*
 | |
| + * Copyright (C) 2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
 | |
| + * Based on algorithms from Samuel Neves <sneves@dei.uc.pt>
 | |
| + */
 | |
| +
 | |
| +#include <linux/linkage.h>
 | |
| +
 | |
| +.section .rodata.cst32.BLAKECONST, "aM", @progbits, 32
 | |
| +.align 32
 | |
| +IV:	.octa 0xA54FF53A3C6EF372BB67AE856A09E667
 | |
| +	.octa 0x5BE0CD191F83D9AB9B05688C510E527F
 | |
| +.section .rodata.cst16.ROT16, "aM", @progbits, 16
 | |
| +.align 16
 | |
| +ROT16:	.octa 0x0D0C0F0E09080B0A0504070601000302
 | |
| +.section .rodata.cst16.ROR328, "aM", @progbits, 16
 | |
| +.align 16
 | |
| +ROR328:	.octa 0x0C0F0E0D080B0A090407060500030201
 | |
| +
 | |
| +.text
 | |
| +ENTRY(blake2s_compress_avx)
 | |
| +	vmovdqu		IV+16(%rip), %xmm1
 | |
| +	vmovdqu		(%rsi), %xmm4
 | |
| +	vpxor		32(%rdi), %xmm1, %xmm1
 | |
| +	vmovdqu		16(%rsi), %xmm3
 | |
| +	vshufps		$136, %xmm3, %xmm4, %xmm6
 | |
| +	vmovdqa		ROT16(%rip), %xmm7
 | |
| +	vpaddd		(%rdi), %xmm6, %xmm6
 | |
| +	vpaddd		16(%rdi), %xmm6, %xmm6
 | |
| +	vpxor		%xmm6, %xmm1, %xmm1
 | |
| +	vmovdqu		IV(%rip), %xmm8
 | |
| +	vpshufb		%xmm7, %xmm1, %xmm1
 | |
| +	vmovdqu		48(%rsi), %xmm5
 | |
| +	vpaddd		%xmm1, %xmm8, %xmm8
 | |
| +	vpxor		16(%rdi), %xmm8, %xmm9
 | |
| +	vmovdqu		32(%rsi), %xmm2
 | |
| +	vpblendw	$12, %xmm3, %xmm5, %xmm13
 | |
| +	vshufps		$221, %xmm5, %xmm2, %xmm12
 | |
| +	vpunpckhqdq	%xmm2, %xmm4, %xmm14
 | |
| +	vpslld		$20, %xmm9, %xmm0
 | |
| +	vpsrld		$12, %xmm9, %xmm9
 | |
| +	vpxor		%xmm0, %xmm9, %xmm0
 | |
| +	vshufps		$221, %xmm3, %xmm4, %xmm9
 | |
| +	vpaddd		%xmm9, %xmm6, %xmm9
 | |
| +	vpaddd		%xmm0, %xmm9, %xmm9
 | |
| +	vpxor		%xmm9, %xmm1, %xmm1
 | |
| +	vmovdqa		ROR328(%rip), %xmm6
 | |
| +	vpshufb		%xmm6, %xmm1, %xmm1
 | |
| +	vpaddd		%xmm1, %xmm8, %xmm8
 | |
| +	vpxor		%xmm8, %xmm0, %xmm0
 | |
| +	vpshufd		$147, %xmm1, %xmm1
 | |
| +	vpshufd		$78, %xmm8, %xmm8
 | |
| +	vpslld		$25, %xmm0, %xmm10
 | |
| +	vpsrld		$7, %xmm0, %xmm0
 | |
| +	vpxor		%xmm10, %xmm0, %xmm0
 | |
| +	vshufps		$136, %xmm5, %xmm2, %xmm10
 | |
| +	vpshufd		$57, %xmm0, %xmm0
 | |
| +	vpaddd		%xmm10, %xmm9, %xmm9
 | |
| +	vpaddd		%xmm0, %xmm9, %xmm9
 | |
| +	vpxor		%xmm9, %xmm1, %xmm1
 | |
| +	vpaddd		%xmm12, %xmm9, %xmm9
 | |
| +	vpblendw	$12, %xmm2, %xmm3, %xmm12
 | |
| +	vpshufb		%xmm7, %xmm1, %xmm1
 | |
| +	vpaddd		%xmm1, %xmm8, %xmm8
 | |
| +	vpxor		%xmm8, %xmm0, %xmm10
 | |
| +	vpslld		$20, %xmm10, %xmm0
 | |
| +	vpsrld		$12, %xmm10, %xmm10
 | |
| +	vpxor		%xmm0, %xmm10, %xmm0
 | |
| +	vpaddd		%xmm0, %xmm9, %xmm9
 | |
| +	vpxor		%xmm9, %xmm1, %xmm1
 | |
| +	vpshufb		%xmm6, %xmm1, %xmm1
 | |
| +	vpaddd		%xmm1, %xmm8, %xmm8
 | |
| +	vpxor		%xmm8, %xmm0, %xmm0
 | |
| +	vpshufd		$57, %xmm1, %xmm1
 | |
| +	vpshufd		$78, %xmm8, %xmm8
 | |
| +	vpslld		$25, %xmm0, %xmm10
 | |
| +	vpsrld		$7, %xmm0, %xmm0
 | |
| +	vpxor		%xmm10, %xmm0, %xmm0
 | |
| +	vpslldq		$4, %xmm5, %xmm10
 | |
| +	vpblendw	$240, %xmm10, %xmm12, %xmm12
 | |
| +	vpshufd		$147, %xmm0, %xmm0
 | |
| +	vpshufd		$147, %xmm12, %xmm12
 | |
| +	vpaddd		%xmm9, %xmm12, %xmm12
 | |
| +	vpaddd		%xmm0, %xmm12, %xmm12
 | |
| +	vpxor		%xmm12, %xmm1, %xmm1
 | |
| +	vpshufb		%xmm7, %xmm1, %xmm1
 | |
| +	vpaddd		%xmm1, %xmm8, %xmm8
 | |
| +	vpxor		%xmm8, %xmm0, %xmm11
 | |
| +	vpslld		$20, %xmm11, %xmm9
 | |
| +	vpsrld		$12, %xmm11, %xmm11
 | |
| +	vpxor		%xmm9, %xmm11, %xmm0
 | |
| +	vpshufd		$8, %xmm2, %xmm9
 | |
| +	vpblendw	$192, %xmm5, %xmm3, %xmm11
 | |
| +	vpblendw	$240, %xmm11, %xmm9, %xmm9
 | |
| +	vpshufd		$177, %xmm9, %xmm9
 | |
| +	vpaddd		%xmm12, %xmm9, %xmm9
 | |
| +	vpaddd		%xmm0, %xmm9, %xmm11
 | |
| +	vpxor		%xmm11, %xmm1, %xmm1
 | |
| +	vpshufb		%xmm6, %xmm1, %xmm1
 | |
| +	vpaddd		%xmm1, %xmm8, %xmm8
 | |
| +	vpxor		%xmm8, %xmm0, %xmm9
 | |
| +	vpshufd		$147, %xmm1, %xmm1
 | |
| +	vpshufd		$78, %xmm8, %xmm8
 | |
| +	vpslld		$25, %xmm9, %xmm0
 | |
| +	vpsrld		$7, %xmm9, %xmm9
 | |
| +	vpxor		%xmm0, %xmm9, %xmm0
 | |
| +	vpslldq		$4, %xmm3, %xmm9
 | |
| +	vpblendw	$48, %xmm9, %xmm2, %xmm9
 | |
| +	vpblendw	$240, %xmm9, %xmm4, %xmm9
 | |
| +	vpshufd		$57, %xmm0, %xmm0
 | |
| +	vpshufd		$177, %xmm9, %xmm9
 | |
| +	vpaddd		%xmm11, %xmm9, %xmm9
 | |
| +	vpaddd		%xmm0, %xmm9, %xmm9
 | |
| +	vpxor		%xmm9, %xmm1, %xmm1
 | |
| +	vpshufb		%xmm7, %xmm1, %xmm1
 | |
| +	vpaddd		%xmm1, %xmm8, %xmm11
 | |
| +	vpxor		%xmm11, %xmm0, %xmm0
 | |
| +	vpslld		$20, %xmm0, %xmm8
 | |
| +	vpsrld		$12, %xmm0, %xmm0
 | |
| +	vpxor		%xmm8, %xmm0, %xmm0
 | |
| +	vpunpckhdq	%xmm3, %xmm4, %xmm8
 | |
| +	vpblendw	$12, %xmm10, %xmm8, %xmm12
 | |
| +	vpshufd		$177, %xmm12, %xmm12
 | |
| +	vpaddd		%xmm9, %xmm12, %xmm9
 | |
| +	vpaddd		%xmm0, %xmm9, %xmm9
 | |
| +	vpxor		%xmm9, %xmm1, %xmm1
 | |
| +	vpshufb		%xmm6, %xmm1, %xmm1
 | |
| +	vpaddd		%xmm1, %xmm11, %xmm11
 | |
| +	vpxor		%xmm11, %xmm0, %xmm0
 | |
| +	vpshufd		$57, %xmm1, %xmm1
 | |
| +	vpshufd		$78, %xmm11, %xmm11
 | |
| +	vpslld		$25, %xmm0, %xmm12
 | |
| +	vpsrld		$7, %xmm0, %xmm0
 | |
| +	vpxor		%xmm12, %xmm0, %xmm0
 | |
| +	vpunpckhdq	%xmm5, %xmm2, %xmm12
 | |
| +	vpshufd		$147, %xmm0, %xmm0
 | |
| +	vpblendw	$15, %xmm13, %xmm12, %xmm12
 | |
| +	vpslldq		$8, %xmm5, %xmm13
 | |
| +	vpshufd		$210, %xmm12, %xmm12
 | |
| +	vpaddd		%xmm9, %xmm12, %xmm9
 | |
| +	vpaddd		%xmm0, %xmm9, %xmm9
 | |
| +	vpxor		%xmm9, %xmm1, %xmm1
 | |
| +	vpshufb		%xmm7, %xmm1, %xmm1
 | |
| +	vpaddd		%xmm1, %xmm11, %xmm11
 | |
| +	vpxor		%xmm11, %xmm0, %xmm0
 | |
| +	vpslld		$20, %xmm0, %xmm12
 | |
| +	vpsrld		$12, %xmm0, %xmm0
 | |
| +	vpxor		%xmm12, %xmm0, %xmm0
 | |
| +	vpunpckldq	%xmm4, %xmm2, %xmm12
 | |
| +	vpblendw	$240, %xmm4, %xmm12, %xmm12
 | |
| +	vpblendw	$192, %xmm13, %xmm12, %xmm12
 | |
| +	vpsrldq		$12, %xmm3, %xmm13
 | |
| +	vpaddd		%xmm12, %xmm9, %xmm9
 | |
| +	vpaddd		%xmm0, %xmm9, %xmm9
 | |
| +	vpxor		%xmm9, %xmm1, %xmm1
 | |
| +	vpshufb		%xmm6, %xmm1, %xmm1
 | |
| +	vpaddd		%xmm1, %xmm11, %xmm11
 | |
| +	vpxor		%xmm11, %xmm0, %xmm0
 | |
| +	vpshufd		$147, %xmm1, %xmm1
 | |
| +	vpshufd		$78, %xmm11, %xmm11
 | |
| +	vpslld		$25, %xmm0, %xmm12
 | |
| +	vpsrld		$7, %xmm0, %xmm0
 | |
| +	vpxor		%xmm12, %xmm0, %xmm0
 | |
| +	vpblendw	$60, %xmm2, %xmm4, %xmm12
 | |
| +	vpblendw	$3, %xmm13, %xmm12, %xmm12
 | |
| +	vpshufd		$57, %xmm0, %xmm0
 | |
| +	vpshufd		$78, %xmm12, %xmm12
 | |
| +	vpaddd		%xmm9, %xmm12, %xmm9
 | |
| +	vpaddd		%xmm0, %xmm9, %xmm9
 | |
| +	vpxor		%xmm9, %xmm1, %xmm1
 | |
| +	vpshufb		%xmm7, %xmm1, %xmm1
 | |
| +	vpaddd		%xmm1, %xmm11, %xmm11
 | |
| +	vpxor		%xmm11, %xmm0, %xmm12
 | |
| +	vpslld		$20, %xmm12, %xmm13
 | |
| +	vpsrld		$12, %xmm12, %xmm0
 | |
| +	vpblendw	$51, %xmm3, %xmm4, %xmm12
 | |
| +	vpxor		%xmm13, %xmm0, %xmm0
 | |
| +	vpblendw	$192, %xmm10, %xmm12, %xmm10
 | |
| +	vpslldq		$8, %xmm2, %xmm12
 | |
| +	vpshufd		$27, %xmm10, %xmm10
 | |
| +	vpaddd		%xmm9, %xmm10, %xmm9
 | |
| +	vpaddd		%xmm0, %xmm9, %xmm9
 | |
| +	vpxor		%xmm9, %xmm1, %xmm1
 | |
| +	vpshufb		%xmm6, %xmm1, %xmm1
 | |
| +	vpaddd		%xmm1, %xmm11, %xmm11
 | |
| +	vpxor		%xmm11, %xmm0, %xmm0
 | |
| +	vpshufd		$57, %xmm1, %xmm1
 | |
| +	vpshufd		$78, %xmm11, %xmm11
 | |
| +	vpslld		$25, %xmm0, %xmm10
 | |
| +	vpsrld		$7, %xmm0, %xmm0
 | |
| +	vpxor		%xmm10, %xmm0, %xmm0
 | |
| +	vpunpckhdq	%xmm2, %xmm8, %xmm10
 | |
| +	vpshufd		$147, %xmm0, %xmm0
 | |
| +	vpblendw	$12, %xmm5, %xmm10, %xmm10
 | |
| +	vpshufd		$210, %xmm10, %xmm10
 | |
| +	vpaddd		%xmm9, %xmm10, %xmm9
 | |
| +	vpaddd		%xmm0, %xmm9, %xmm9
 | |
| +	vpxor		%xmm9, %xmm1, %xmm1
 | |
| +	vpshufb		%xmm7, %xmm1, %xmm1
 | |
| +	vpaddd		%xmm1, %xmm11, %xmm11
 | |
| +	vpxor		%xmm11, %xmm0, %xmm10
 | |
| +	vpslld		$20, %xmm10, %xmm0
 | |
| +	vpsrld		$12, %xmm10, %xmm10
 | |
| +	vpxor		%xmm0, %xmm10, %xmm0
 | |
| +	vpblendw	$12, %xmm4, %xmm5, %xmm10
 | |
| +	vpblendw	$192, %xmm12, %xmm10, %xmm10
 | |
| +	vpunpckldq	%xmm2, %xmm4, %xmm12
 | |
| +	vpshufd		$135, %xmm10, %xmm10
 | |
| +	vpaddd		%xmm9, %xmm10, %xmm9
 | |
| +	vpaddd		%xmm0, %xmm9, %xmm9
 | |
| +	vpxor		%xmm9, %xmm1, %xmm1
 | |
| +	vpshufb		%xmm6, %xmm1, %xmm1
 | |
| +	vpaddd		%xmm1, %xmm11, %xmm13
 | |
| +	vpxor		%xmm13, %xmm0, %xmm0
 | |
| +	vpshufd		$147, %xmm1, %xmm1
 | |
| +	vpshufd		$78, %xmm13, %xmm13
 | |
| +	vpslld		$25, %xmm0, %xmm10
 | |
| +	vpsrld		$7, %xmm0, %xmm0
 | |
| +	vpxor		%xmm10, %xmm0, %xmm0
 | |
| +	vpblendw	$15, %xmm3, %xmm4, %xmm10
 | |
| +	vpblendw	$192, %xmm5, %xmm10, %xmm10
 | |
| +	vpshufd		$57, %xmm0, %xmm0
 | |
| +	vpshufd		$198, %xmm10, %xmm10
 | |
| +	vpaddd		%xmm9, %xmm10, %xmm10
 | |
| +	vpaddd		%xmm0, %xmm10, %xmm10
 | |
| +	vpxor		%xmm10, %xmm1, %xmm1
 | |
| +	vpshufb		%xmm7, %xmm1, %xmm1
 | |
| +	vpaddd		%xmm1, %xmm13, %xmm13
 | |
| +	vpxor		%xmm13, %xmm0, %xmm9
 | |
| +	vpslld		$20, %xmm9, %xmm0
 | |
| +	vpsrld		$12, %xmm9, %xmm9
 | |
| +	vpxor		%xmm0, %xmm9, %xmm0
 | |
| +	vpunpckhdq	%xmm2, %xmm3, %xmm9
 | |
| +	vpunpcklqdq	%xmm12, %xmm9, %xmm15
 | |
| +	vpunpcklqdq	%xmm12, %xmm8, %xmm12
 | |
| +	vpblendw	$15, %xmm5, %xmm8, %xmm8
 | |
| +	vpaddd		%xmm15, %xmm10, %xmm15
 | |
| +	vpaddd		%xmm0, %xmm15, %xmm15
 | |
| +	vpxor		%xmm15, %xmm1, %xmm1
 | |
| +	vpshufd		$141, %xmm8, %xmm8
 | |
| +	vpshufb		%xmm6, %xmm1, %xmm1
 | |
| +	vpaddd		%xmm1, %xmm13, %xmm13
 | |
| +	vpxor		%xmm13, %xmm0, %xmm0
 | |
| +	vpshufd		$57, %xmm1, %xmm1
 | |
| +	vpshufd		$78, %xmm13, %xmm13
 | |
| +	vpslld		$25, %xmm0, %xmm10
 | |
| +	vpsrld		$7, %xmm0, %xmm0
 | |
| +	vpxor		%xmm10, %xmm0, %xmm0
 | |
| +	vpunpcklqdq	%xmm2, %xmm3, %xmm10
 | |
| +	vpshufd		$147, %xmm0, %xmm0
 | |
| +	vpblendw	$51, %xmm14, %xmm10, %xmm14
 | |
| +	vpshufd		$135, %xmm14, %xmm14
 | |
| +	vpaddd		%xmm15, %xmm14, %xmm14
 | |
| +	vpaddd		%xmm0, %xmm14, %xmm14
 | |
| +	vpxor		%xmm14, %xmm1, %xmm1
 | |
| +	vpunpcklqdq	%xmm3, %xmm4, %xmm15
 | |
| +	vpshufb		%xmm7, %xmm1, %xmm1
 | |
| +	vpaddd		%xmm1, %xmm13, %xmm13
 | |
| +	vpxor		%xmm13, %xmm0, %xmm0
 | |
| +	vpslld		$20, %xmm0, %xmm11
 | |
| +	vpsrld		$12, %xmm0, %xmm0
 | |
| +	vpxor		%xmm11, %xmm0, %xmm0
 | |
| +	vpunpckhqdq	%xmm5, %xmm3, %xmm11
 | |
| +	vpblendw	$51, %xmm15, %xmm11, %xmm11
 | |
| +	vpunpckhqdq	%xmm3, %xmm5, %xmm15
 | |
| +	vpaddd		%xmm11, %xmm14, %xmm11
 | |
| +	vpaddd		%xmm0, %xmm11, %xmm11
 | |
| +	vpxor		%xmm11, %xmm1, %xmm1
 | |
| +	vpshufb		%xmm6, %xmm1, %xmm1
 | |
| +	vpaddd		%xmm1, %xmm13, %xmm13
 | |
| +	vpxor		%xmm13, %xmm0, %xmm0
 | |
| +	vpshufd		$147, %xmm1, %xmm1
 | |
| +	vpshufd		$78, %xmm13, %xmm13
 | |
| +	vpslld		$25, %xmm0, %xmm14
 | |
| +	vpsrld		$7, %xmm0, %xmm0
 | |
| +	vpxor		%xmm14, %xmm0, %xmm14
 | |
| +	vpunpckhqdq	%xmm4, %xmm2, %xmm0
 | |
| +	vpshufd		$57, %xmm14, %xmm14
 | |
| +	vpblendw	$51, %xmm15, %xmm0, %xmm15
 | |
| +	vpaddd		%xmm15, %xmm11, %xmm15
 | |
| +	vpaddd		%xmm14, %xmm15, %xmm15
 | |
| +	vpxor		%xmm15, %xmm1, %xmm1
 | |
| +	vpshufb		%xmm7, %xmm1, %xmm1
 | |
| +	vpaddd		%xmm1, %xmm13, %xmm13
 | |
| +	vpxor		%xmm13, %xmm14, %xmm14
 | |
| +	vpslld		$20, %xmm14, %xmm11
 | |
| +	vpsrld		$12, %xmm14, %xmm14
 | |
| +	vpxor		%xmm11, %xmm14, %xmm14
 | |
| +	vpblendw	$3, %xmm2, %xmm4, %xmm11
 | |
| +	vpslldq		$8, %xmm11, %xmm0
 | |
| +	vpblendw	$15, %xmm5, %xmm0, %xmm0
 | |
| +	vpshufd		$99, %xmm0, %xmm0
 | |
| +	vpaddd		%xmm15, %xmm0, %xmm15
 | |
| +	vpaddd		%xmm14, %xmm15, %xmm15
 | |
| +	vpxor		%xmm15, %xmm1, %xmm0
 | |
| +	vpaddd		%xmm12, %xmm15, %xmm15
 | |
| +	vpshufb		%xmm6, %xmm0, %xmm0
 | |
| +	vpaddd		%xmm0, %xmm13, %xmm13
 | |
| +	vpxor		%xmm13, %xmm14, %xmm14
 | |
| +	vpshufd		$57, %xmm0, %xmm0
 | |
| +	vpshufd		$78, %xmm13, %xmm13
 | |
| +	vpslld		$25, %xmm14, %xmm1
 | |
| +	vpsrld		$7, %xmm14, %xmm14
 | |
| +	vpxor		%xmm1, %xmm14, %xmm14
 | |
| +	vpblendw	$3, %xmm5, %xmm4, %xmm1
 | |
| +	vpshufd		$147, %xmm14, %xmm14
 | |
| +	vpaddd		%xmm14, %xmm15, %xmm15
 | |
| +	vpxor		%xmm15, %xmm0, %xmm0
 | |
| +	vpshufb		%xmm7, %xmm0, %xmm0
 | |
| +	vpaddd		%xmm0, %xmm13, %xmm13
 | |
| +	vpxor		%xmm13, %xmm14, %xmm14
 | |
| +	vpslld		$20, %xmm14, %xmm12
 | |
| +	vpsrld		$12, %xmm14, %xmm14
 | |
| +	vpxor		%xmm12, %xmm14, %xmm14
 | |
| +	vpsrldq		$4, %xmm2, %xmm12
 | |
| +	vpblendw	$60, %xmm12, %xmm1, %xmm1
 | |
| +	vpaddd		%xmm1, %xmm15, %xmm15
 | |
| +	vpaddd		%xmm14, %xmm15, %xmm15
 | |
| +	vpxor		%xmm15, %xmm0, %xmm0
 | |
| +	vpblendw	$12, %xmm4, %xmm3, %xmm1
 | |
| +	vpshufb		%xmm6, %xmm0, %xmm0
 | |
| +	vpaddd		%xmm0, %xmm13, %xmm13
 | |
| +	vpxor		%xmm13, %xmm14, %xmm14
 | |
| +	vpshufd		$147, %xmm0, %xmm0
 | |
| +	vpshufd		$78, %xmm13, %xmm13
 | |
| +	vpslld		$25, %xmm14, %xmm12
 | |
| +	vpsrld		$7, %xmm14, %xmm14
 | |
| +	vpxor		%xmm12, %xmm14, %xmm14
 | |
| +	vpsrldq		$4, %xmm5, %xmm12
 | |
| +	vpblendw	$48, %xmm12, %xmm1, %xmm1
 | |
| +	vpshufd		$33, %xmm5, %xmm12
 | |
| +	vpshufd		$57, %xmm14, %xmm14
 | |
| +	vpshufd		$108, %xmm1, %xmm1
 | |
| +	vpblendw	$51, %xmm12, %xmm10, %xmm12
 | |
| +	vpaddd		%xmm15, %xmm1, %xmm15
 | |
| +	vpaddd		%xmm14, %xmm15, %xmm15
 | |
| +	vpxor		%xmm15, %xmm0, %xmm0
 | |
| +	vpaddd		%xmm12, %xmm15, %xmm15
 | |
| +	vpshufb		%xmm7, %xmm0, %xmm0
 | |
| +	vpaddd		%xmm0, %xmm13, %xmm1
 | |
| +	vpxor		%xmm1, %xmm14, %xmm14
 | |
| +	vpslld		$20, %xmm14, %xmm13
 | |
| +	vpsrld		$12, %xmm14, %xmm14
 | |
| +	vpxor		%xmm13, %xmm14, %xmm14
 | |
| +	vpslldq		$12, %xmm3, %xmm13
 | |
| +	vpaddd		%xmm14, %xmm15, %xmm15
 | |
| +	vpxor		%xmm15, %xmm0, %xmm0
 | |
| +	vpshufb		%xmm6, %xmm0, %xmm0
 | |
| +	vpaddd		%xmm0, %xmm1, %xmm1
 | |
| +	vpxor		%xmm1, %xmm14, %xmm14
 | |
| +	vpshufd		$57, %xmm0, %xmm0
 | |
| +	vpshufd		$78, %xmm1, %xmm1
 | |
| +	vpslld		$25, %xmm14, %xmm12
 | |
| +	vpsrld		$7, %xmm14, %xmm14
 | |
| +	vpxor		%xmm12, %xmm14, %xmm14
 | |
| +	vpblendw	$51, %xmm5, %xmm4, %xmm12
 | |
| +	vpshufd		$147, %xmm14, %xmm14
 | |
| +	vpblendw	$192, %xmm13, %xmm12, %xmm12
 | |
| +	vpaddd		%xmm12, %xmm15, %xmm15
 | |
| +	vpaddd		%xmm14, %xmm15, %xmm15
 | |
| +	vpxor		%xmm15, %xmm0, %xmm0
 | |
| +	vpsrldq		$4, %xmm3, %xmm12
 | |
| +	vpshufb		%xmm7, %xmm0, %xmm0
 | |
| +	vpaddd		%xmm0, %xmm1, %xmm1
 | |
| +	vpxor		%xmm1, %xmm14, %xmm14
 | |
| +	vpslld		$20, %xmm14, %xmm13
 | |
| +	vpsrld		$12, %xmm14, %xmm14
 | |
| +	vpxor		%xmm13, %xmm14, %xmm14
 | |
| +	vpblendw	$48, %xmm2, %xmm5, %xmm13
 | |
| +	vpblendw	$3, %xmm12, %xmm13, %xmm13
 | |
| +	vpshufd		$156, %xmm13, %xmm13
 | |
| +	vpaddd		%xmm15, %xmm13, %xmm15
 | |
| +	vpaddd		%xmm14, %xmm15, %xmm15
 | |
| +	vpxor		%xmm15, %xmm0, %xmm0
 | |
| +	vpshufb		%xmm6, %xmm0, %xmm0
 | |
| +	vpaddd		%xmm0, %xmm1, %xmm1
 | |
| +	vpxor		%xmm1, %xmm14, %xmm14
 | |
| +	vpshufd		$147, %xmm0, %xmm0
 | |
| +	vpshufd		$78, %xmm1, %xmm1
 | |
| +	vpslld		$25, %xmm14, %xmm13
 | |
| +	vpsrld		$7, %xmm14, %xmm14
 | |
| +	vpxor		%xmm13, %xmm14, %xmm14
 | |
| +	vpunpcklqdq	%xmm2, %xmm4, %xmm13
 | |
| +	vpshufd		$57, %xmm14, %xmm14
 | |
| +	vpblendw	$12, %xmm12, %xmm13, %xmm12
 | |
| +	vpshufd		$180, %xmm12, %xmm12
 | |
| +	vpaddd		%xmm15, %xmm12, %xmm15
 | |
| +	vpaddd		%xmm14, %xmm15, %xmm15
 | |
| +	vpxor		%xmm15, %xmm0, %xmm0
 | |
| +	vpshufb		%xmm7, %xmm0, %xmm0
 | |
| +	vpaddd		%xmm0, %xmm1, %xmm1
 | |
| +	vpxor		%xmm1, %xmm14, %xmm14
 | |
| +	vpslld		$20, %xmm14, %xmm12
 | |
| +	vpsrld		$12, %xmm14, %xmm14
 | |
| +	vpxor		%xmm12, %xmm14, %xmm14
 | |
| +	vpunpckhqdq	%xmm9, %xmm4, %xmm12
 | |
| +	vpshufd		$198, %xmm12, %xmm12
 | |
| +	vpaddd		%xmm15, %xmm12, %xmm15
 | |
| +	vpaddd		%xmm14, %xmm15, %xmm15
 | |
| +	vpxor		%xmm15, %xmm0, %xmm0
 | |
| +	vpaddd		%xmm15, %xmm8, %xmm15
 | |
| +	vpshufb		%xmm6, %xmm0, %xmm0
 | |
| +	vpaddd		%xmm0, %xmm1, %xmm1
 | |
| +	vpxor		%xmm1, %xmm14, %xmm14
 | |
| +	vpshufd		$57, %xmm0, %xmm0
 | |
| +	vpshufd		$78, %xmm1, %xmm1
 | |
| +	vpslld		$25, %xmm14, %xmm12
 | |
| +	vpsrld		$7, %xmm14, %xmm14
 | |
| +	vpxor		%xmm12, %xmm14, %xmm14
 | |
| +	vpsrldq		$4, %xmm4, %xmm12
 | |
| +	vpshufd		$147, %xmm14, %xmm14
 | |
| +	vpaddd		%xmm14, %xmm15, %xmm15
 | |
| +	vpxor		%xmm15, %xmm0, %xmm0
 | |
| +	vpshufb		%xmm7, %xmm0, %xmm0
 | |
| +	vpaddd		%xmm0, %xmm1, %xmm1
 | |
| +	vpxor		%xmm1, %xmm14, %xmm14
 | |
| +	vpslld		$20, %xmm14, %xmm8
 | |
| +	vpsrld		$12, %xmm14, %xmm14
 | |
| +	vpxor		%xmm14, %xmm8, %xmm14
 | |
| +	vpblendw	$48, %xmm5, %xmm2, %xmm8
 | |
| +	vpblendw	$3, %xmm12, %xmm8, %xmm8
 | |
| +	vpunpckhqdq	%xmm5, %xmm4, %xmm12
 | |
| +	vpshufd		$75, %xmm8, %xmm8
 | |
| +	vpblendw	$60, %xmm10, %xmm12, %xmm10
 | |
| +	vpaddd		%xmm15, %xmm8, %xmm15
 | |
| +	vpaddd		%xmm14, %xmm15, %xmm15
 | |
| +	vpxor		%xmm0, %xmm15, %xmm0
 | |
| +	vpshufd		$45, %xmm10, %xmm10
 | |
| +	vpshufb		%xmm6, %xmm0, %xmm0
 | |
| +	vpaddd		%xmm15, %xmm10, %xmm15
 | |
| +	vpaddd		%xmm0, %xmm1, %xmm1
 | |
| +	vpxor		%xmm1, %xmm14, %xmm14
 | |
| +	vpshufd		$147, %xmm0, %xmm0
 | |
| +	vpshufd		$78, %xmm1, %xmm1
 | |
| +	vpslld		$25, %xmm14, %xmm8
 | |
| +	vpsrld		$7, %xmm14, %xmm14
 | |
| +	vpxor		%xmm14, %xmm8, %xmm8
 | |
| +	vpshufd		$57, %xmm8, %xmm8
 | |
| +	vpaddd		%xmm8, %xmm15, %xmm15
 | |
| +	vpxor		%xmm0, %xmm15, %xmm0
 | |
| +	vpshufb		%xmm7, %xmm0, %xmm0
 | |
| +	vpaddd		%xmm0, %xmm1, %xmm1
 | |
| +	vpxor		%xmm8, %xmm1, %xmm8
 | |
| +	vpslld		$20, %xmm8, %xmm10
 | |
| +	vpsrld		$12, %xmm8, %xmm8
 | |
| +	vpxor		%xmm8, %xmm10, %xmm10
 | |
| +	vpunpckldq	%xmm3, %xmm4, %xmm8
 | |
| +	vpunpcklqdq	%xmm9, %xmm8, %xmm9
 | |
| +	vpaddd		%xmm9, %xmm15, %xmm9
 | |
| +	vpaddd		%xmm10, %xmm9, %xmm9
 | |
| +	vpxor		%xmm0, %xmm9, %xmm8
 | |
| +	vpshufb		%xmm6, %xmm8, %xmm8
 | |
| +	vpaddd		%xmm8, %xmm1, %xmm1
 | |
| +	vpxor		%xmm1, %xmm10, %xmm10
 | |
| +	vpshufd		$57, %xmm8, %xmm8
 | |
| +	vpshufd		$78, %xmm1, %xmm1
 | |
| +	vpslld		$25, %xmm10, %xmm12
 | |
| +	vpsrld		$7, %xmm10, %xmm10
 | |
| +	vpxor		%xmm10, %xmm12, %xmm10
 | |
| +	vpblendw	$48, %xmm4, %xmm3, %xmm12
 | |
| +	vpshufd		$147, %xmm10, %xmm0
 | |
| +	vpunpckhdq	%xmm5, %xmm3, %xmm10
 | |
| +	vpshufd		$78, %xmm12, %xmm12
 | |
| +	vpunpcklqdq	%xmm4, %xmm10, %xmm10
 | |
| +	vpblendw	$192, %xmm2, %xmm10, %xmm10
 | |
| +	vpshufhw	$78, %xmm10, %xmm10
 | |
| +	vpaddd		%xmm10, %xmm9, %xmm10
 | |
| +	vpaddd		%xmm0, %xmm10, %xmm10
 | |
| +	vpxor		%xmm8, %xmm10, %xmm8
 | |
| +	vpshufb		%xmm7, %xmm8, %xmm8
 | |
| +	vpaddd		%xmm8, %xmm1, %xmm1
 | |
| +	vpxor		%xmm0, %xmm1, %xmm9
 | |
| +	vpslld		$20, %xmm9, %xmm0
 | |
| +	vpsrld		$12, %xmm9, %xmm9
 | |
| +	vpxor		%xmm9, %xmm0, %xmm0
 | |
| +	vpunpckhdq	%xmm5, %xmm4, %xmm9
 | |
| +	vpblendw	$240, %xmm9, %xmm2, %xmm13
 | |
| +	vpshufd		$39, %xmm13, %xmm13
 | |
| +	vpaddd		%xmm10, %xmm13, %xmm10
 | |
| +	vpaddd		%xmm0, %xmm10, %xmm10
 | |
| +	vpxor		%xmm8, %xmm10, %xmm8
 | |
| +	vpblendw	$12, %xmm4, %xmm2, %xmm13
 | |
| +	vpshufb		%xmm6, %xmm8, %xmm8
 | |
| +	vpslldq		$4, %xmm13, %xmm13
 | |
| +	vpblendw	$15, %xmm5, %xmm13, %xmm13
 | |
| +	vpaddd		%xmm8, %xmm1, %xmm1
 | |
| +	vpxor		%xmm1, %xmm0, %xmm0
 | |
| +	vpaddd		%xmm13, %xmm10, %xmm13
 | |
| +	vpshufd		$147, %xmm8, %xmm8
 | |
| +	vpshufd		$78, %xmm1, %xmm1
 | |
| +	vpslld		$25, %xmm0, %xmm14
 | |
| +	vpsrld		$7, %xmm0, %xmm0
 | |
| +	vpxor		%xmm0, %xmm14, %xmm14
 | |
| +	vpshufd		$57, %xmm14, %xmm14
 | |
| +	vpaddd		%xmm14, %xmm13, %xmm13
 | |
| +	vpxor		%xmm8, %xmm13, %xmm8
 | |
| +	vpaddd		%xmm13, %xmm12, %xmm12
 | |
| +	vpshufb		%xmm7, %xmm8, %xmm8
 | |
| +	vpaddd		%xmm8, %xmm1, %xmm1
 | |
| +	vpxor		%xmm14, %xmm1, %xmm14
 | |
| +	vpslld		$20, %xmm14, %xmm10
 | |
| +	vpsrld		$12, %xmm14, %xmm14
 | |
| +	vpxor		%xmm14, %xmm10, %xmm10
 | |
| +	vpaddd		%xmm10, %xmm12, %xmm12
 | |
| +	vpxor		%xmm8, %xmm12, %xmm8
 | |
| +	vpshufb		%xmm6, %xmm8, %xmm8
 | |
| +	vpaddd		%xmm8, %xmm1, %xmm1
 | |
| +	vpxor		%xmm1, %xmm10, %xmm0
 | |
| +	vpshufd		$57, %xmm8, %xmm8
 | |
| +	vpshufd		$78, %xmm1, %xmm1
 | |
| +	vpslld		$25, %xmm0, %xmm10
 | |
| +	vpsrld		$7, %xmm0, %xmm0
 | |
| +	vpxor		%xmm0, %xmm10, %xmm10
 | |
| +	vpblendw	$48, %xmm2, %xmm3, %xmm0
 | |
| +	vpblendw	$15, %xmm11, %xmm0, %xmm0
 | |
| +	vpshufd		$147, %xmm10, %xmm10
 | |
| +	vpshufd		$114, %xmm0, %xmm0
 | |
| +	vpaddd		%xmm12, %xmm0, %xmm0
 | |
| +	vpaddd		%xmm10, %xmm0, %xmm0
 | |
| +	vpxor		%xmm8, %xmm0, %xmm8
 | |
| +	vpshufb		%xmm7, %xmm8, %xmm8
 | |
| +	vpaddd		%xmm8, %xmm1, %xmm1
 | |
| +	vpxor		%xmm10, %xmm1, %xmm10
 | |
| +	vpslld		$20, %xmm10, %xmm11
 | |
| +	vpsrld		$12, %xmm10, %xmm10
 | |
| +	vpxor		%xmm10, %xmm11, %xmm10
 | |
| +	vpslldq		$4, %xmm4, %xmm11
 | |
| +	vpblendw	$192, %xmm11, %xmm3, %xmm3
 | |
| +	vpunpckldq	%xmm5, %xmm4, %xmm4
 | |
| +	vpshufd		$99, %xmm3, %xmm3
 | |
| +	vpaddd		%xmm0, %xmm3, %xmm3
 | |
| +	vpaddd		%xmm10, %xmm3, %xmm3
 | |
| +	vpxor		%xmm8, %xmm3, %xmm11
 | |
| +	vpunpckldq	%xmm5, %xmm2, %xmm0
 | |
| +	vpblendw	$192, %xmm2, %xmm5, %xmm2
 | |
| +	vpshufb		%xmm6, %xmm11, %xmm11
 | |
| +	vpunpckhqdq	%xmm0, %xmm9, %xmm0
 | |
| +	vpblendw	$15, %xmm4, %xmm2, %xmm4
 | |
| +	vpaddd		%xmm11, %xmm1, %xmm1
 | |
| +	vpxor		%xmm1, %xmm10, %xmm10
 | |
| +	vpshufd		$147, %xmm11, %xmm11
 | |
| +	vpshufd		$201, %xmm0, %xmm0
 | |
| +	vpslld		$25, %xmm10, %xmm8
 | |
| +	vpsrld		$7, %xmm10, %xmm10
 | |
| +	vpxor		%xmm10, %xmm8, %xmm10
 | |
| +	vpshufd		$78, %xmm1, %xmm1
 | |
| +	vpaddd		%xmm3, %xmm0, %xmm0
 | |
| +	vpshufd		$27, %xmm4, %xmm4
 | |
| +	vpshufd		$57, %xmm10, %xmm10
 | |
| +	vpaddd		%xmm10, %xmm0, %xmm0
 | |
| +	vpxor		%xmm11, %xmm0, %xmm11
 | |
| +	vpaddd		%xmm0, %xmm4, %xmm0
 | |
| +	vpshufb		%xmm7, %xmm11, %xmm7
 | |
| +	vpaddd		%xmm7, %xmm1, %xmm1
 | |
| +	vpxor		%xmm10, %xmm1, %xmm10
 | |
| +	vpslld		$20, %xmm10, %xmm8
 | |
| +	vpsrld		$12, %xmm10, %xmm10
 | |
| +	vpxor		%xmm10, %xmm8, %xmm8
 | |
| +	vpaddd		%xmm8, %xmm0, %xmm0
 | |
| +	vpxor		%xmm7, %xmm0, %xmm7
 | |
| +	vpshufb		%xmm6, %xmm7, %xmm6
 | |
| +	vpaddd		%xmm6, %xmm1, %xmm1
 | |
| +	vpxor		%xmm1, %xmm8, %xmm8
 | |
| +	vpshufd		$78, %xmm1, %xmm1
 | |
| +	vpshufd		$57, %xmm6, %xmm6
 | |
| +	vpslld		$25, %xmm8, %xmm2
 | |
| +	vpsrld		$7, %xmm8, %xmm8
 | |
| +	vpxor		%xmm8, %xmm2, %xmm8
 | |
| +	vpxor		(%rdi), %xmm1, %xmm1
 | |
| +	vpshufd		$147, %xmm8, %xmm8
 | |
| +	vpxor		%xmm0, %xmm1, %xmm0
 | |
| +	vmovups		%xmm0, (%rdi)
 | |
| +	vpxor		16(%rdi), %xmm8, %xmm0
 | |
| +	vpxor		%xmm6, %xmm0, %xmm6
 | |
| +	vmovups		%xmm6, 16(%rdi)
 | |
| +	ret
 | |
| +ENDPROC(blake2s_compress_avx)
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/crypto/chacha20-avx2-x86_64.S	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,446 @@
 | |
| +/*
 | |
| + * ChaCha20 256-bit cipher algorithm, RFC7539, x64 AVX2 functions
 | |
| + *
 | |
| + * Copyright (C) 2015 Martin Willi
 | |
| + *
 | |
| + * This program is free software; you can redistribute it and/or modify
 | |
| + * it under the terms of the GNU General Public License as published by
 | |
| + * the Free Software Foundation; either version 2 of the License, or
 | |
| + * (at your option) any later version.
 | |
| + */
 | |
| +
 | |
| +#include <linux/linkage.h>
 | |
| +
 | |
| +.section .rodata.cst32.ROT8, "aM", @progbits, 32
 | |
| +.align 32
 | |
| +ROT8:	.octa 0x0e0d0c0f0a09080b0605040702010003
 | |
| +	.octa 0x0e0d0c0f0a09080b0605040702010003
 | |
| +.section .rodata.cst32.ROT16, "aM", @progbits, 32
 | |
| +.align 32
 | |
| +ROT16:	.octa 0x0d0c0f0e09080b0a0504070601000302
 | |
| +	.octa 0x0d0c0f0e09080b0a0504070601000302
 | |
| +.section .rodata.cst32.CTRINC, "aM", @progbits, 32
 | |
| +.align 32
 | |
| +CTRINC:	.octa 0x00000003000000020000000100000000
 | |
| +	.octa 0x00000007000000060000000500000004
 | |
| +
 | |
| +.text
 | |
| +
 | |
| +ENTRY(chacha20_asm_8block_xor_avx2)
 | |
| +	# %rdi: Input state matrix, s
 | |
| +	# %rsi: 8 data blocks output, o
 | |
| +	# %rdx: 8 data blocks input, i
 | |
| +
 | |
| +	# This function encrypts eight consecutive ChaCha20 blocks by loading
 | |
| +	# the state matrix in AVX registers eight times. As we need some
 | |
| +	# scratch registers, we save the first four registers on the stack. The
 | |
| +	# algorithm performs each operation on the corresponding word of each
 | |
| +	# state matrix, hence requires no word shuffling. For final XORing step
 | |
| +	# we transpose the matrix by interleaving 32-, 64- and then 128-bit
 | |
| +	# words, which allows us to do XOR in AVX registers. 8/16-bit word
 | |
| +	# rotation is done with the slightly better performing byte shuffling,
 | |
| +	# 7/12-bit word rotation uses traditional shift+OR.
 | |
| +
 | |
| +	vzeroupper
 | |
| +	# 4 * 32 byte stack, 32-byte aligned
 | |
| +	mov		%rsp, %r8
 | |
| +	and		$~31, %rsp
 | |
| +	sub		$0x80, %rsp
 | |
| +
 | |
| +	# x0..15[0-7] = s[0..15]
 | |
| +	vpbroadcastd	0x00(%rdi),%ymm0
 | |
| +	vpbroadcastd	0x04(%rdi),%ymm1
 | |
| +	vpbroadcastd	0x08(%rdi),%ymm2
 | |
| +	vpbroadcastd	0x0c(%rdi),%ymm3
 | |
| +	vpbroadcastd	0x10(%rdi),%ymm4
 | |
| +	vpbroadcastd	0x14(%rdi),%ymm5
 | |
| +	vpbroadcastd	0x18(%rdi),%ymm6
 | |
| +	vpbroadcastd	0x1c(%rdi),%ymm7
 | |
| +	vpbroadcastd	0x20(%rdi),%ymm8
 | |
| +	vpbroadcastd	0x24(%rdi),%ymm9
 | |
| +	vpbroadcastd	0x28(%rdi),%ymm10
 | |
| +	vpbroadcastd	0x2c(%rdi),%ymm11
 | |
| +	vpbroadcastd	0x30(%rdi),%ymm12
 | |
| +	vpbroadcastd	0x34(%rdi),%ymm13
 | |
| +	vpbroadcastd	0x38(%rdi),%ymm14
 | |
| +	vpbroadcastd	0x3c(%rdi),%ymm15
 | |
| +	# x0..3 on stack
 | |
| +	vmovdqa		%ymm0,0x00(%rsp)
 | |
| +	vmovdqa		%ymm1,0x20(%rsp)
 | |
| +	vmovdqa		%ymm2,0x40(%rsp)
 | |
| +	vmovdqa		%ymm3,0x60(%rsp)
 | |
| +
 | |
| +	vmovdqa		CTRINC(%rip),%ymm1
 | |
| +	vmovdqa		ROT8(%rip),%ymm2
 | |
| +	vmovdqa		ROT16(%rip),%ymm3
 | |
| +
 | |
| +	# x12 += counter values 0-3
 | |
| +	vpaddd		%ymm1,%ymm12,%ymm12
 | |
| +
 | |
| +	mov		$10,%ecx
 | |
| +
 | |
| +.Ldoubleround8:
 | |
| +	# x0 += x4, x12 = rotl32(x12 ^ x0, 16)
 | |
| +	vpaddd		0x00(%rsp),%ymm4,%ymm0
 | |
| +	vmovdqa		%ymm0,0x00(%rsp)
 | |
| +	vpxor		%ymm0,%ymm12,%ymm12
 | |
| +	vpshufb		%ymm3,%ymm12,%ymm12
 | |
| +	# x1 += x5, x13 = rotl32(x13 ^ x1, 16)
 | |
| +	vpaddd		0x20(%rsp),%ymm5,%ymm0
 | |
| +	vmovdqa		%ymm0,0x20(%rsp)
 | |
| +	vpxor		%ymm0,%ymm13,%ymm13
 | |
| +	vpshufb		%ymm3,%ymm13,%ymm13
 | |
| +	# x2 += x6, x14 = rotl32(x14 ^ x2, 16)
 | |
| +	vpaddd		0x40(%rsp),%ymm6,%ymm0
 | |
| +	vmovdqa		%ymm0,0x40(%rsp)
 | |
| +	vpxor		%ymm0,%ymm14,%ymm14
 | |
| +	vpshufb		%ymm3,%ymm14,%ymm14
 | |
| +	# x3 += x7, x15 = rotl32(x15 ^ x3, 16)
 | |
| +	vpaddd		0x60(%rsp),%ymm7,%ymm0
 | |
| +	vmovdqa		%ymm0,0x60(%rsp)
 | |
| +	vpxor		%ymm0,%ymm15,%ymm15
 | |
| +	vpshufb		%ymm3,%ymm15,%ymm15
 | |
| +
 | |
| +	# x8 += x12, x4 = rotl32(x4 ^ x8, 12)
 | |
| +	vpaddd		%ymm12,%ymm8,%ymm8
 | |
| +	vpxor		%ymm8,%ymm4,%ymm4
 | |
| +	vpslld		$12,%ymm4,%ymm0
 | |
| +	vpsrld		$20,%ymm4,%ymm4
 | |
| +	vpor		%ymm0,%ymm4,%ymm4
 | |
| +	# x9 += x13, x5 = rotl32(x5 ^ x9, 12)
 | |
| +	vpaddd		%ymm13,%ymm9,%ymm9
 | |
| +	vpxor		%ymm9,%ymm5,%ymm5
 | |
| +	vpslld		$12,%ymm5,%ymm0
 | |
| +	vpsrld		$20,%ymm5,%ymm5
 | |
| +	vpor		%ymm0,%ymm5,%ymm5
 | |
| +	# x10 += x14, x6 = rotl32(x6 ^ x10, 12)
 | |
| +	vpaddd		%ymm14,%ymm10,%ymm10
 | |
| +	vpxor		%ymm10,%ymm6,%ymm6
 | |
| +	vpslld		$12,%ymm6,%ymm0
 | |
| +	vpsrld		$20,%ymm6,%ymm6
 | |
| +	vpor		%ymm0,%ymm6,%ymm6
 | |
| +	# x11 += x15, x7 = rotl32(x7 ^ x11, 12)
 | |
| +	vpaddd		%ymm15,%ymm11,%ymm11
 | |
| +	vpxor		%ymm11,%ymm7,%ymm7
 | |
| +	vpslld		$12,%ymm7,%ymm0
 | |
| +	vpsrld		$20,%ymm7,%ymm7
 | |
| +	vpor		%ymm0,%ymm7,%ymm7
 | |
| +
 | |
| +	# x0 += x4, x12 = rotl32(x12 ^ x0, 8)
 | |
| +	vpaddd		0x00(%rsp),%ymm4,%ymm0
 | |
| +	vmovdqa		%ymm0,0x00(%rsp)
 | |
| +	vpxor		%ymm0,%ymm12,%ymm12
 | |
| +	vpshufb		%ymm2,%ymm12,%ymm12
 | |
| +	# x1 += x5, x13 = rotl32(x13 ^ x1, 8)
 | |
| +	vpaddd		0x20(%rsp),%ymm5,%ymm0
 | |
| +	vmovdqa		%ymm0,0x20(%rsp)
 | |
| +	vpxor		%ymm0,%ymm13,%ymm13
 | |
| +	vpshufb		%ymm2,%ymm13,%ymm13
 | |
| +	# x2 += x6, x14 = rotl32(x14 ^ x2, 8)
 | |
| +	vpaddd		0x40(%rsp),%ymm6,%ymm0
 | |
| +	vmovdqa		%ymm0,0x40(%rsp)
 | |
| +	vpxor		%ymm0,%ymm14,%ymm14
 | |
| +	vpshufb		%ymm2,%ymm14,%ymm14
 | |
| +	# x3 += x7, x15 = rotl32(x15 ^ x3, 8)
 | |
| +	vpaddd		0x60(%rsp),%ymm7,%ymm0
 | |
| +	vmovdqa		%ymm0,0x60(%rsp)
 | |
| +	vpxor		%ymm0,%ymm15,%ymm15
 | |
| +	vpshufb		%ymm2,%ymm15,%ymm15
 | |
| +
 | |
| +	# x8 += x12, x4 = rotl32(x4 ^ x8, 7)
 | |
| +	vpaddd		%ymm12,%ymm8,%ymm8
 | |
| +	vpxor		%ymm8,%ymm4,%ymm4
 | |
| +	vpslld		$7,%ymm4,%ymm0
 | |
| +	vpsrld		$25,%ymm4,%ymm4
 | |
| +	vpor		%ymm0,%ymm4,%ymm4
 | |
| +	# x9 += x13, x5 = rotl32(x5 ^ x9, 7)
 | |
| +	vpaddd		%ymm13,%ymm9,%ymm9
 | |
| +	vpxor		%ymm9,%ymm5,%ymm5
 | |
| +	vpslld		$7,%ymm5,%ymm0
 | |
| +	vpsrld		$25,%ymm5,%ymm5
 | |
| +	vpor		%ymm0,%ymm5,%ymm5
 | |
| +	# x10 += x14, x6 = rotl32(x6 ^ x10, 7)
 | |
| +	vpaddd		%ymm14,%ymm10,%ymm10
 | |
| +	vpxor		%ymm10,%ymm6,%ymm6
 | |
| +	vpslld		$7,%ymm6,%ymm0
 | |
| +	vpsrld		$25,%ymm6,%ymm6
 | |
| +	vpor		%ymm0,%ymm6,%ymm6
 | |
| +	# x11 += x15, x7 = rotl32(x7 ^ x11, 7)
 | |
| +	vpaddd		%ymm15,%ymm11,%ymm11
 | |
| +	vpxor		%ymm11,%ymm7,%ymm7
 | |
| +	vpslld		$7,%ymm7,%ymm0
 | |
| +	vpsrld		$25,%ymm7,%ymm7
 | |
| +	vpor		%ymm0,%ymm7,%ymm7
 | |
| +
 | |
| +	# x0 += x5, x15 = rotl32(x15 ^ x0, 16)
 | |
| +	vpaddd		0x00(%rsp),%ymm5,%ymm0
 | |
| +	vmovdqa		%ymm0,0x00(%rsp)
 | |
| +	vpxor		%ymm0,%ymm15,%ymm15
 | |
| +	vpshufb		%ymm3,%ymm15,%ymm15
 | |
| +	# x1 += x6, x12 = rotl32(x12 ^ x1, 16)%ymm0
 | |
| +	vpaddd		0x20(%rsp),%ymm6,%ymm0
 | |
| +	vmovdqa		%ymm0,0x20(%rsp)
 | |
| +	vpxor		%ymm0,%ymm12,%ymm12
 | |
| +	vpshufb		%ymm3,%ymm12,%ymm12
 | |
| +	# x2 += x7, x13 = rotl32(x13 ^ x2, 16)
 | |
| +	vpaddd		0x40(%rsp),%ymm7,%ymm0
 | |
| +	vmovdqa		%ymm0,0x40(%rsp)
 | |
| +	vpxor		%ymm0,%ymm13,%ymm13
 | |
| +	vpshufb		%ymm3,%ymm13,%ymm13
 | |
| +	# x3 += x4, x14 = rotl32(x14 ^ x3, 16)
 | |
| +	vpaddd		0x60(%rsp),%ymm4,%ymm0
 | |
| +	vmovdqa		%ymm0,0x60(%rsp)
 | |
| +	vpxor		%ymm0,%ymm14,%ymm14
 | |
| +	vpshufb		%ymm3,%ymm14,%ymm14
 | |
| +
 | |
| +	# x10 += x15, x5 = rotl32(x5 ^ x10, 12)
 | |
| +	vpaddd		%ymm15,%ymm10,%ymm10
 | |
| +	vpxor		%ymm10,%ymm5,%ymm5
 | |
| +	vpslld		$12,%ymm5,%ymm0
 | |
| +	vpsrld		$20,%ymm5,%ymm5
 | |
| +	vpor		%ymm0,%ymm5,%ymm5
 | |
| +	# x11 += x12, x6 = rotl32(x6 ^ x11, 12)
 | |
| +	vpaddd		%ymm12,%ymm11,%ymm11
 | |
| +	vpxor		%ymm11,%ymm6,%ymm6
 | |
| +	vpslld		$12,%ymm6,%ymm0
 | |
| +	vpsrld		$20,%ymm6,%ymm6
 | |
| +	vpor		%ymm0,%ymm6,%ymm6
 | |
| +	# x8 += x13, x7 = rotl32(x7 ^ x8, 12)
 | |
| +	vpaddd		%ymm13,%ymm8,%ymm8
 | |
| +	vpxor		%ymm8,%ymm7,%ymm7
 | |
| +	vpslld		$12,%ymm7,%ymm0
 | |
| +	vpsrld		$20,%ymm7,%ymm7
 | |
| +	vpor		%ymm0,%ymm7,%ymm7
 | |
| +	# x9 += x14, x4 = rotl32(x4 ^ x9, 12)
 | |
| +	vpaddd		%ymm14,%ymm9,%ymm9
 | |
| +	vpxor		%ymm9,%ymm4,%ymm4
 | |
| +	vpslld		$12,%ymm4,%ymm0
 | |
| +	vpsrld		$20,%ymm4,%ymm4
 | |
| +	vpor		%ymm0,%ymm4,%ymm4
 | |
| +
 | |
| +	# x0 += x5, x15 = rotl32(x15 ^ x0, 8)
 | |
| +	vpaddd		0x00(%rsp),%ymm5,%ymm0
 | |
| +	vmovdqa		%ymm0,0x00(%rsp)
 | |
| +	vpxor		%ymm0,%ymm15,%ymm15
 | |
| +	vpshufb		%ymm2,%ymm15,%ymm15
 | |
| +	# x1 += x6, x12 = rotl32(x12 ^ x1, 8)
 | |
| +	vpaddd		0x20(%rsp),%ymm6,%ymm0
 | |
| +	vmovdqa		%ymm0,0x20(%rsp)
 | |
| +	vpxor		%ymm0,%ymm12,%ymm12
 | |
| +	vpshufb		%ymm2,%ymm12,%ymm12
 | |
| +	# x2 += x7, x13 = rotl32(x13 ^ x2, 8)
 | |
| +	vpaddd		0x40(%rsp),%ymm7,%ymm0
 | |
| +	vmovdqa		%ymm0,0x40(%rsp)
 | |
| +	vpxor		%ymm0,%ymm13,%ymm13
 | |
| +	vpshufb		%ymm2,%ymm13,%ymm13
 | |
| +	# x3 += x4, x14 = rotl32(x14 ^ x3, 8)
 | |
| +	vpaddd		0x60(%rsp),%ymm4,%ymm0
 | |
| +	vmovdqa		%ymm0,0x60(%rsp)
 | |
| +	vpxor		%ymm0,%ymm14,%ymm14
 | |
| +	vpshufb		%ymm2,%ymm14,%ymm14
 | |
| +
 | |
| +	# x10 += x15, x5 = rotl32(x5 ^ x10, 7)
 | |
| +	vpaddd		%ymm15,%ymm10,%ymm10
 | |
| +	vpxor		%ymm10,%ymm5,%ymm5
 | |
| +	vpslld		$7,%ymm5,%ymm0
 | |
| +	vpsrld		$25,%ymm5,%ymm5
 | |
| +	vpor		%ymm0,%ymm5,%ymm5
 | |
| +	# x11 += x12, x6 = rotl32(x6 ^ x11, 7)
 | |
| +	vpaddd		%ymm12,%ymm11,%ymm11
 | |
| +	vpxor		%ymm11,%ymm6,%ymm6
 | |
| +	vpslld		$7,%ymm6,%ymm0
 | |
| +	vpsrld		$25,%ymm6,%ymm6
 | |
| +	vpor		%ymm0,%ymm6,%ymm6
 | |
| +	# x8 += x13, x7 = rotl32(x7 ^ x8, 7)
 | |
| +	vpaddd		%ymm13,%ymm8,%ymm8
 | |
| +	vpxor		%ymm8,%ymm7,%ymm7
 | |
| +	vpslld		$7,%ymm7,%ymm0
 | |
| +	vpsrld		$25,%ymm7,%ymm7
 | |
| +	vpor		%ymm0,%ymm7,%ymm7
 | |
| +	# x9 += x14, x4 = rotl32(x4 ^ x9, 7)
 | |
| +	vpaddd		%ymm14,%ymm9,%ymm9
 | |
| +	vpxor		%ymm9,%ymm4,%ymm4
 | |
| +	vpslld		$7,%ymm4,%ymm0
 | |
| +	vpsrld		$25,%ymm4,%ymm4
 | |
| +	vpor		%ymm0,%ymm4,%ymm4
 | |
| +
 | |
| +	dec		%ecx
 | |
| +	jnz		.Ldoubleround8
 | |
| +
 | |
| +	# x0..15[0-3] += s[0..15]
 | |
| +	vpbroadcastd	0x00(%rdi),%ymm0
 | |
| +	vpaddd		0x00(%rsp),%ymm0,%ymm0
 | |
| +	vmovdqa		%ymm0,0x00(%rsp)
 | |
| +	vpbroadcastd	0x04(%rdi),%ymm0
 | |
| +	vpaddd		0x20(%rsp),%ymm0,%ymm0
 | |
| +	vmovdqa		%ymm0,0x20(%rsp)
 | |
| +	vpbroadcastd	0x08(%rdi),%ymm0
 | |
| +	vpaddd		0x40(%rsp),%ymm0,%ymm0
 | |
| +	vmovdqa		%ymm0,0x40(%rsp)
 | |
| +	vpbroadcastd	0x0c(%rdi),%ymm0
 | |
| +	vpaddd		0x60(%rsp),%ymm0,%ymm0
 | |
| +	vmovdqa		%ymm0,0x60(%rsp)
 | |
| +	vpbroadcastd	0x10(%rdi),%ymm0
 | |
| +	vpaddd		%ymm0,%ymm4,%ymm4
 | |
| +	vpbroadcastd	0x14(%rdi),%ymm0
 | |
| +	vpaddd		%ymm0,%ymm5,%ymm5
 | |
| +	vpbroadcastd	0x18(%rdi),%ymm0
 | |
| +	vpaddd		%ymm0,%ymm6,%ymm6
 | |
| +	vpbroadcastd	0x1c(%rdi),%ymm0
 | |
| +	vpaddd		%ymm0,%ymm7,%ymm7
 | |
| +	vpbroadcastd	0x20(%rdi),%ymm0
 | |
| +	vpaddd		%ymm0,%ymm8,%ymm8
 | |
| +	vpbroadcastd	0x24(%rdi),%ymm0
 | |
| +	vpaddd		%ymm0,%ymm9,%ymm9
 | |
| +	vpbroadcastd	0x28(%rdi),%ymm0
 | |
| +	vpaddd		%ymm0,%ymm10,%ymm10
 | |
| +	vpbroadcastd	0x2c(%rdi),%ymm0
 | |
| +	vpaddd		%ymm0,%ymm11,%ymm11
 | |
| +	vpbroadcastd	0x30(%rdi),%ymm0
 | |
| +	vpaddd		%ymm0,%ymm12,%ymm12
 | |
| +	vpbroadcastd	0x34(%rdi),%ymm0
 | |
| +	vpaddd		%ymm0,%ymm13,%ymm13
 | |
| +	vpbroadcastd	0x38(%rdi),%ymm0
 | |
| +	vpaddd		%ymm0,%ymm14,%ymm14
 | |
| +	vpbroadcastd	0x3c(%rdi),%ymm0
 | |
| +	vpaddd		%ymm0,%ymm15,%ymm15
 | |
| +
 | |
| +	# x12 += counter values 0-3
 | |
| +	vpaddd		%ymm1,%ymm12,%ymm12
 | |
| +
 | |
| +	# interleave 32-bit words in state n, n+1
 | |
| +	vmovdqa		0x00(%rsp),%ymm0
 | |
| +	vmovdqa		0x20(%rsp),%ymm1
 | |
| +	vpunpckldq	%ymm1,%ymm0,%ymm2
 | |
| +	vpunpckhdq	%ymm1,%ymm0,%ymm1
 | |
| +	vmovdqa		%ymm2,0x00(%rsp)
 | |
| +	vmovdqa		%ymm1,0x20(%rsp)
 | |
| +	vmovdqa		0x40(%rsp),%ymm0
 | |
| +	vmovdqa		0x60(%rsp),%ymm1
 | |
| +	vpunpckldq	%ymm1,%ymm0,%ymm2
 | |
| +	vpunpckhdq	%ymm1,%ymm0,%ymm1
 | |
| +	vmovdqa		%ymm2,0x40(%rsp)
 | |
| +	vmovdqa		%ymm1,0x60(%rsp)
 | |
| +	vmovdqa		%ymm4,%ymm0
 | |
| +	vpunpckldq	%ymm5,%ymm0,%ymm4
 | |
| +	vpunpckhdq	%ymm5,%ymm0,%ymm5
 | |
| +	vmovdqa		%ymm6,%ymm0
 | |
| +	vpunpckldq	%ymm7,%ymm0,%ymm6
 | |
| +	vpunpckhdq	%ymm7,%ymm0,%ymm7
 | |
| +	vmovdqa		%ymm8,%ymm0
 | |
| +	vpunpckldq	%ymm9,%ymm0,%ymm8
 | |
| +	vpunpckhdq	%ymm9,%ymm0,%ymm9
 | |
| +	vmovdqa		%ymm10,%ymm0
 | |
| +	vpunpckldq	%ymm11,%ymm0,%ymm10
 | |
| +	vpunpckhdq	%ymm11,%ymm0,%ymm11
 | |
| +	vmovdqa		%ymm12,%ymm0
 | |
| +	vpunpckldq	%ymm13,%ymm0,%ymm12
 | |
| +	vpunpckhdq	%ymm13,%ymm0,%ymm13
 | |
| +	vmovdqa		%ymm14,%ymm0
 | |
| +	vpunpckldq	%ymm15,%ymm0,%ymm14
 | |
| +	vpunpckhdq	%ymm15,%ymm0,%ymm15
 | |
| +
 | |
| +	# interleave 64-bit words in state n, n+2
 | |
| +	vmovdqa		0x00(%rsp),%ymm0
 | |
| +	vmovdqa		0x40(%rsp),%ymm2
 | |
| +	vpunpcklqdq	%ymm2,%ymm0,%ymm1
 | |
| +	vpunpckhqdq	%ymm2,%ymm0,%ymm2
 | |
| +	vmovdqa		%ymm1,0x00(%rsp)
 | |
| +	vmovdqa		%ymm2,0x40(%rsp)
 | |
| +	vmovdqa		0x20(%rsp),%ymm0
 | |
| +	vmovdqa		0x60(%rsp),%ymm2
 | |
| +	vpunpcklqdq	%ymm2,%ymm0,%ymm1
 | |
| +	vpunpckhqdq	%ymm2,%ymm0,%ymm2
 | |
| +	vmovdqa		%ymm1,0x20(%rsp)
 | |
| +	vmovdqa		%ymm2,0x60(%rsp)
 | |
| +	vmovdqa		%ymm4,%ymm0
 | |
| +	vpunpcklqdq	%ymm6,%ymm0,%ymm4
 | |
| +	vpunpckhqdq	%ymm6,%ymm0,%ymm6
 | |
| +	vmovdqa		%ymm5,%ymm0
 | |
| +	vpunpcklqdq	%ymm7,%ymm0,%ymm5
 | |
| +	vpunpckhqdq	%ymm7,%ymm0,%ymm7
 | |
| +	vmovdqa		%ymm8,%ymm0
 | |
| +	vpunpcklqdq	%ymm10,%ymm0,%ymm8
 | |
| +	vpunpckhqdq	%ymm10,%ymm0,%ymm10
 | |
| +	vmovdqa		%ymm9,%ymm0
 | |
| +	vpunpcklqdq	%ymm11,%ymm0,%ymm9
 | |
| +	vpunpckhqdq	%ymm11,%ymm0,%ymm11
 | |
| +	vmovdqa		%ymm12,%ymm0
 | |
| +	vpunpcklqdq	%ymm14,%ymm0,%ymm12
 | |
| +	vpunpckhqdq	%ymm14,%ymm0,%ymm14
 | |
| +	vmovdqa		%ymm13,%ymm0
 | |
| +	vpunpcklqdq	%ymm15,%ymm0,%ymm13
 | |
| +	vpunpckhqdq	%ymm15,%ymm0,%ymm15
 | |
| +
 | |
| +	# interleave 128-bit words in state n, n+4
 | |
| +	vmovdqa		0x00(%rsp),%ymm0
 | |
| +	vperm2i128	$0x20,%ymm4,%ymm0,%ymm1
 | |
| +	vperm2i128	$0x31,%ymm4,%ymm0,%ymm4
 | |
| +	vmovdqa		%ymm1,0x00(%rsp)
 | |
| +	vmovdqa		0x20(%rsp),%ymm0
 | |
| +	vperm2i128	$0x20,%ymm5,%ymm0,%ymm1
 | |
| +	vperm2i128	$0x31,%ymm5,%ymm0,%ymm5
 | |
| +	vmovdqa		%ymm1,0x20(%rsp)
 | |
| +	vmovdqa		0x40(%rsp),%ymm0
 | |
| +	vperm2i128	$0x20,%ymm6,%ymm0,%ymm1
 | |
| +	vperm2i128	$0x31,%ymm6,%ymm0,%ymm6
 | |
| +	vmovdqa		%ymm1,0x40(%rsp)
 | |
| +	vmovdqa		0x60(%rsp),%ymm0
 | |
| +	vperm2i128	$0x20,%ymm7,%ymm0,%ymm1
 | |
| +	vperm2i128	$0x31,%ymm7,%ymm0,%ymm7
 | |
| +	vmovdqa		%ymm1,0x60(%rsp)
 | |
| +	vperm2i128	$0x20,%ymm12,%ymm8,%ymm0
 | |
| +	vperm2i128	$0x31,%ymm12,%ymm8,%ymm12
 | |
| +	vmovdqa		%ymm0,%ymm8
 | |
| +	vperm2i128	$0x20,%ymm13,%ymm9,%ymm0
 | |
| +	vperm2i128	$0x31,%ymm13,%ymm9,%ymm13
 | |
| +	vmovdqa		%ymm0,%ymm9
 | |
| +	vperm2i128	$0x20,%ymm14,%ymm10,%ymm0
 | |
| +	vperm2i128	$0x31,%ymm14,%ymm10,%ymm14
 | |
| +	vmovdqa		%ymm0,%ymm10
 | |
| +	vperm2i128	$0x20,%ymm15,%ymm11,%ymm0
 | |
| +	vperm2i128	$0x31,%ymm15,%ymm11,%ymm15
 | |
| +	vmovdqa		%ymm0,%ymm11
 | |
| +
 | |
| +	# xor with corresponding input, write to output
 | |
| +	vmovdqa		0x00(%rsp),%ymm0
 | |
| +	vpxor		0x0000(%rdx),%ymm0,%ymm0
 | |
| +	vmovdqu		%ymm0,0x0000(%rsi)
 | |
| +	vmovdqa		0x20(%rsp),%ymm0
 | |
| +	vpxor		0x0080(%rdx),%ymm0,%ymm0
 | |
| +	vmovdqu		%ymm0,0x0080(%rsi)
 | |
| +	vmovdqa		0x40(%rsp),%ymm0
 | |
| +	vpxor		0x0040(%rdx),%ymm0,%ymm0
 | |
| +	vmovdqu		%ymm0,0x0040(%rsi)
 | |
| +	vmovdqa		0x60(%rsp),%ymm0
 | |
| +	vpxor		0x00c0(%rdx),%ymm0,%ymm0
 | |
| +	vmovdqu		%ymm0,0x00c0(%rsi)
 | |
| +	vpxor		0x0100(%rdx),%ymm4,%ymm4
 | |
| +	vmovdqu		%ymm4,0x0100(%rsi)
 | |
| +	vpxor		0x0180(%rdx),%ymm5,%ymm5
 | |
| +	vmovdqu		%ymm5,0x00180(%rsi)
 | |
| +	vpxor		0x0140(%rdx),%ymm6,%ymm6
 | |
| +	vmovdqu		%ymm6,0x0140(%rsi)
 | |
| +	vpxor		0x01c0(%rdx),%ymm7,%ymm7
 | |
| +	vmovdqu		%ymm7,0x01c0(%rsi)
 | |
| +	vpxor		0x0020(%rdx),%ymm8,%ymm8
 | |
| +	vmovdqu		%ymm8,0x0020(%rsi)
 | |
| +	vpxor		0x00a0(%rdx),%ymm9,%ymm9
 | |
| +	vmovdqu		%ymm9,0x00a0(%rsi)
 | |
| +	vpxor		0x0060(%rdx),%ymm10,%ymm10
 | |
| +	vmovdqu		%ymm10,0x0060(%rsi)
 | |
| +	vpxor		0x00e0(%rdx),%ymm11,%ymm11
 | |
| +	vmovdqu		%ymm11,0x00e0(%rsi)
 | |
| +	vpxor		0x0120(%rdx),%ymm12,%ymm12
 | |
| +	vmovdqu		%ymm12,0x0120(%rsi)
 | |
| +	vpxor		0x01a0(%rdx),%ymm13,%ymm13
 | |
| +	vmovdqu		%ymm13,0x01a0(%rsi)
 | |
| +	vpxor		0x0160(%rdx),%ymm14,%ymm14
 | |
| +	vmovdqu		%ymm14,0x0160(%rsi)
 | |
| +	vpxor		0x01e0(%rdx),%ymm15,%ymm15
 | |
| +	vmovdqu		%ymm15,0x01e0(%rsi)
 | |
| +
 | |
| +	vzeroupper
 | |
| +	mov		%r8,%rsp
 | |
| +	ret
 | |
| +ENDPROC(chacha20_asm_8block_xor_avx2)
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/crypto/chacha20-neon-arm64.S	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,450 @@
 | |
| +/*
 | |
| + * ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
 | |
| + *
 | |
| + * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
 | |
| + *
 | |
| + * This program is free software; you can redistribute it and/or modify
 | |
| + * it under the terms of the GNU General Public License version 2 as
 | |
| + * published by the Free Software Foundation.
 | |
| + *
 | |
| + * Based on:
 | |
| + * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions
 | |
| + *
 | |
| + * Copyright (C) 2015 Martin Willi
 | |
| + *
 | |
| + * This program is free software; you can redistribute it and/or modify
 | |
| + * it under the terms of the GNU General Public License as published by
 | |
| + * the Free Software Foundation; either version 2 of the License, or
 | |
| + * (at your option) any later version.
 | |
| + */
 | |
| +
 | |
| +#include <linux/linkage.h>
 | |
| +
 | |
| +	.text
 | |
| +	.align		6
 | |
| +
 | |
| +ENTRY(chacha20_asm_block_xor_neon)
 | |
| +	// x0: Input state matrix, s
 | |
| +	// x1: 1 data block output, o
 | |
| +	// x2: 1 data block input, i
 | |
| +
 | |
| +	//
 | |
| +	// This function encrypts one ChaCha20 block by loading the state matrix
 | |
| +	// in four NEON registers. It performs matrix operation on four words in
 | |
| +	// parallel, but requires shuffling to rearrange the words after each
 | |
| +	// round.
 | |
| +	//
 | |
| +
 | |
| +	// x0..3 = s0..3
 | |
| +	adr		x3, ROT8
 | |
| +	ld1		{v0.4s-v3.4s}, [x0]
 | |
| +	ld1		{v8.4s-v11.4s}, [x0]
 | |
| +	ld1		{v12.4s}, [x3]
 | |
| +
 | |
| +	mov		x3, #10
 | |
| +
 | |
| +.Ldoubleround:
 | |
| +	// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
 | |
| +	add		v0.4s, v0.4s, v1.4s
 | |
| +	eor		v3.16b, v3.16b, v0.16b
 | |
| +	rev32		v3.8h, v3.8h
 | |
| +
 | |
| +	// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
 | |
| +	add		v2.4s, v2.4s, v3.4s
 | |
| +	eor		v4.16b, v1.16b, v2.16b
 | |
| +	shl		v1.4s, v4.4s, #12
 | |
| +	sri		v1.4s, v4.4s, #20
 | |
| +
 | |
| +	// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
 | |
| +	add		v0.4s, v0.4s, v1.4s
 | |
| +	eor		v3.16b, v3.16b, v0.16b
 | |
| +	tbl		v3.16b, {v3.16b}, v12.16b
 | |
| +
 | |
| +	// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
 | |
| +	add		v2.4s, v2.4s, v3.4s
 | |
| +	eor		v4.16b, v1.16b, v2.16b
 | |
| +	shl		v1.4s, v4.4s, #7
 | |
| +	sri		v1.4s, v4.4s, #25
 | |
| +
 | |
| +	// x1 = shuffle32(x1, MASK(0, 3, 2, 1))
 | |
| +	ext		v1.16b, v1.16b, v1.16b, #4
 | |
| +	// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
 | |
| +	ext		v2.16b, v2.16b, v2.16b, #8
 | |
| +	// x3 = shuffle32(x3, MASK(2, 1, 0, 3))
 | |
| +	ext		v3.16b, v3.16b, v3.16b, #12
 | |
| +
 | |
| +	// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
 | |
| +	add		v0.4s, v0.4s, v1.4s
 | |
| +	eor		v3.16b, v3.16b, v0.16b
 | |
| +	rev32		v3.8h, v3.8h
 | |
| +
 | |
| +	// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
 | |
| +	add		v2.4s, v2.4s, v3.4s
 | |
| +	eor		v4.16b, v1.16b, v2.16b
 | |
| +	shl		v1.4s, v4.4s, #12
 | |
| +	sri		v1.4s, v4.4s, #20
 | |
| +
 | |
| +	// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
 | |
| +	add		v0.4s, v0.4s, v1.4s
 | |
| +	eor		v3.16b, v3.16b, v0.16b
 | |
| +	tbl		v3.16b, {v3.16b}, v12.16b
 | |
| +
 | |
| +	// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
 | |
| +	add		v2.4s, v2.4s, v3.4s
 | |
| +	eor		v4.16b, v1.16b, v2.16b
 | |
| +	shl		v1.4s, v4.4s, #7
 | |
| +	sri		v1.4s, v4.4s, #25
 | |
| +
 | |
| +	// x1 = shuffle32(x1, MASK(2, 1, 0, 3))
 | |
| +	ext		v1.16b, v1.16b, v1.16b, #12
 | |
| +	// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
 | |
| +	ext		v2.16b, v2.16b, v2.16b, #8
 | |
| +	// x3 = shuffle32(x3, MASK(0, 3, 2, 1))
 | |
| +	ext		v3.16b, v3.16b, v3.16b, #4
 | |
| +
 | |
| +	subs		x3, x3, #1
 | |
| +	b.ne		.Ldoubleround
 | |
| +
 | |
| +	ld1		{v4.16b-v7.16b}, [x2]
 | |
| +
 | |
| +	// o0 = i0 ^ (x0 + s0)
 | |
| +	add		v0.4s, v0.4s, v8.4s
 | |
| +	eor		v0.16b, v0.16b, v4.16b
 | |
| +
 | |
| +	// o1 = i1 ^ (x1 + s1)
 | |
| +	add		v1.4s, v1.4s, v9.4s
 | |
| +	eor		v1.16b, v1.16b, v5.16b
 | |
| +
 | |
| +	// o2 = i2 ^ (x2 + s2)
 | |
| +	add		v2.4s, v2.4s, v10.4s
 | |
| +	eor		v2.16b, v2.16b, v6.16b
 | |
| +
 | |
| +	// o3 = i3 ^ (x3 + s3)
 | |
| +	add		v3.4s, v3.4s, v11.4s
 | |
| +	eor		v3.16b, v3.16b, v7.16b
 | |
| +
 | |
| +	st1		{v0.16b-v3.16b}, [x1]
 | |
| +
 | |
| +	ret
 | |
| +ENDPROC(chacha20_asm_block_xor_neon)
 | |
| +
 | |
| +	.align		6
 | |
| +ENTRY(chacha20_asm_4block_xor_neon)
 | |
| +	// x0: Input state matrix, s
 | |
| +	// x1: 4 data blocks output, o
 | |
| +	// x2: 4 data blocks input, i
 | |
| +
 | |
| +	//
 | |
| +	// This function encrypts four consecutive ChaCha20 blocks by loading
 | |
| +	// the state matrix in NEON registers four times. The algorithm performs
 | |
| +	// each operation on the corresponding word of each state matrix, hence
 | |
| +	// requires no word shuffling. For final XORing step we transpose the
 | |
| +	// matrix by interleaving 32- and then 64-bit words, which allows us to
 | |
| +	// do XOR in NEON registers.
 | |
| +	//
 | |
| +	adr		x3, CTRINC		// ... and ROT8
 | |
| +	ld1		{v30.4s-v31.4s}, [x3]
 | |
| +
 | |
| +	// x0..15[0-3] = s0..3[0..3]
 | |
| +	mov		x4, x0
 | |
| +	ld4r		{ v0.4s- v3.4s}, [x4], #16
 | |
| +	ld4r		{ v4.4s- v7.4s}, [x4], #16
 | |
| +	ld4r		{ v8.4s-v11.4s}, [x4], #16
 | |
| +	ld4r		{v12.4s-v15.4s}, [x4]
 | |
| +
 | |
| +	// x12 += counter values 0-3
 | |
| +	add		v12.4s, v12.4s, v30.4s
 | |
| +
 | |
| +	mov		x3, #10
 | |
| +
 | |
| +.Ldoubleround4:
 | |
| +	// x0 += x4, x12 = rotl32(x12 ^ x0, 16)
 | |
| +	// x1 += x5, x13 = rotl32(x13 ^ x1, 16)
 | |
| +	// x2 += x6, x14 = rotl32(x14 ^ x2, 16)
 | |
| +	// x3 += x7, x15 = rotl32(x15 ^ x3, 16)
 | |
| +	add		v0.4s, v0.4s, v4.4s
 | |
| +	add		v1.4s, v1.4s, v5.4s
 | |
| +	add		v2.4s, v2.4s, v6.4s
 | |
| +	add		v3.4s, v3.4s, v7.4s
 | |
| +
 | |
| +	eor		v12.16b, v12.16b, v0.16b
 | |
| +	eor		v13.16b, v13.16b, v1.16b
 | |
| +	eor		v14.16b, v14.16b, v2.16b
 | |
| +	eor		v15.16b, v15.16b, v3.16b
 | |
| +
 | |
| +	rev32		v12.8h, v12.8h
 | |
| +	rev32		v13.8h, v13.8h
 | |
| +	rev32		v14.8h, v14.8h
 | |
| +	rev32		v15.8h, v15.8h
 | |
| +
 | |
| +	// x8 += x12, x4 = rotl32(x4 ^ x8, 12)
 | |
| +	// x9 += x13, x5 = rotl32(x5 ^ x9, 12)
 | |
| +	// x10 += x14, x6 = rotl32(x6 ^ x10, 12)
 | |
| +	// x11 += x15, x7 = rotl32(x7 ^ x11, 12)
 | |
| +	add		v8.4s, v8.4s, v12.4s
 | |
| +	add		v9.4s, v9.4s, v13.4s
 | |
| +	add		v10.4s, v10.4s, v14.4s
 | |
| +	add		v11.4s, v11.4s, v15.4s
 | |
| +
 | |
| +	eor		v16.16b, v4.16b, v8.16b
 | |
| +	eor		v17.16b, v5.16b, v9.16b
 | |
| +	eor		v18.16b, v6.16b, v10.16b
 | |
| +	eor		v19.16b, v7.16b, v11.16b
 | |
| +
 | |
| +	shl		v4.4s, v16.4s, #12
 | |
| +	shl		v5.4s, v17.4s, #12
 | |
| +	shl		v6.4s, v18.4s, #12
 | |
| +	shl		v7.4s, v19.4s, #12
 | |
| +
 | |
| +	sri		v4.4s, v16.4s, #20
 | |
| +	sri		v5.4s, v17.4s, #20
 | |
| +	sri		v6.4s, v18.4s, #20
 | |
| +	sri		v7.4s, v19.4s, #20
 | |
| +
 | |
| +	// x0 += x4, x12 = rotl32(x12 ^ x0, 8)
 | |
| +	// x1 += x5, x13 = rotl32(x13 ^ x1, 8)
 | |
| +	// x2 += x6, x14 = rotl32(x14 ^ x2, 8)
 | |
| +	// x3 += x7, x15 = rotl32(x15 ^ x3, 8)
 | |
| +	add		v0.4s, v0.4s, v4.4s
 | |
| +	add		v1.4s, v1.4s, v5.4s
 | |
| +	add		v2.4s, v2.4s, v6.4s
 | |
| +	add		v3.4s, v3.4s, v7.4s
 | |
| +
 | |
| +	eor		v12.16b, v12.16b, v0.16b
 | |
| +	eor		v13.16b, v13.16b, v1.16b
 | |
| +	eor		v14.16b, v14.16b, v2.16b
 | |
| +	eor		v15.16b, v15.16b, v3.16b
 | |
| +
 | |
| +	tbl		v12.16b, {v12.16b}, v31.16b
 | |
| +	tbl		v13.16b, {v13.16b}, v31.16b
 | |
| +	tbl		v14.16b, {v14.16b}, v31.16b
 | |
| +	tbl		v15.16b, {v15.16b}, v31.16b
 | |
| +
 | |
| +	// x8 += x12, x4 = rotl32(x4 ^ x8, 7)
 | |
| +	// x9 += x13, x5 = rotl32(x5 ^ x9, 7)
 | |
| +	// x10 += x14, x6 = rotl32(x6 ^ x10, 7)
 | |
| +	// x11 += x15, x7 = rotl32(x7 ^ x11, 7)
 | |
| +	add		v8.4s, v8.4s, v12.4s
 | |
| +	add		v9.4s, v9.4s, v13.4s
 | |
| +	add		v10.4s, v10.4s, v14.4s
 | |
| +	add		v11.4s, v11.4s, v15.4s
 | |
| +
 | |
| +	eor		v16.16b, v4.16b, v8.16b
 | |
| +	eor		v17.16b, v5.16b, v9.16b
 | |
| +	eor		v18.16b, v6.16b, v10.16b
 | |
| +	eor		v19.16b, v7.16b, v11.16b
 | |
| +
 | |
| +	shl		v4.4s, v16.4s, #7
 | |
| +	shl		v5.4s, v17.4s, #7
 | |
| +	shl		v6.4s, v18.4s, #7
 | |
| +	shl		v7.4s, v19.4s, #7
 | |
| +
 | |
| +	sri		v4.4s, v16.4s, #25
 | |
| +	sri		v5.4s, v17.4s, #25
 | |
| +	sri		v6.4s, v18.4s, #25
 | |
| +	sri		v7.4s, v19.4s, #25
 | |
| +
 | |
| +	// x0 += x5, x15 = rotl32(x15 ^ x0, 16)
 | |
| +	// x1 += x6, x12 = rotl32(x12 ^ x1, 16)
 | |
| +	// x2 += x7, x13 = rotl32(x13 ^ x2, 16)
 | |
| +	// x3 += x4, x14 = rotl32(x14 ^ x3, 16)
 | |
| +	add		v0.4s, v0.4s, v5.4s
 | |
| +	add		v1.4s, v1.4s, v6.4s
 | |
| +	add		v2.4s, v2.4s, v7.4s
 | |
| +	add		v3.4s, v3.4s, v4.4s
 | |
| +
 | |
| +	eor		v15.16b, v15.16b, v0.16b
 | |
| +	eor		v12.16b, v12.16b, v1.16b
 | |
| +	eor		v13.16b, v13.16b, v2.16b
 | |
| +	eor		v14.16b, v14.16b, v3.16b
 | |
| +
 | |
| +	rev32		v15.8h, v15.8h
 | |
| +	rev32		v12.8h, v12.8h
 | |
| +	rev32		v13.8h, v13.8h
 | |
| +	rev32		v14.8h, v14.8h
 | |
| +
 | |
| +	// x10 += x15, x5 = rotl32(x5 ^ x10, 12)
 | |
| +	// x11 += x12, x6 = rotl32(x6 ^ x11, 12)
 | |
| +	// x8 += x13, x7 = rotl32(x7 ^ x8, 12)
 | |
| +	// x9 += x14, x4 = rotl32(x4 ^ x9, 12)
 | |
| +	add		v10.4s, v10.4s, v15.4s
 | |
| +	add		v11.4s, v11.4s, v12.4s
 | |
| +	add		v8.4s, v8.4s, v13.4s
 | |
| +	add		v9.4s, v9.4s, v14.4s
 | |
| +
 | |
| +	eor		v16.16b, v5.16b, v10.16b
 | |
| +	eor		v17.16b, v6.16b, v11.16b
 | |
| +	eor		v18.16b, v7.16b, v8.16b
 | |
| +	eor		v19.16b, v4.16b, v9.16b
 | |
| +
 | |
| +	shl		v5.4s, v16.4s, #12
 | |
| +	shl		v6.4s, v17.4s, #12
 | |
| +	shl		v7.4s, v18.4s, #12
 | |
| +	shl		v4.4s, v19.4s, #12
 | |
| +
 | |
| +	sri		v5.4s, v16.4s, #20
 | |
| +	sri		v6.4s, v17.4s, #20
 | |
| +	sri		v7.4s, v18.4s, #20
 | |
| +	sri		v4.4s, v19.4s, #20
 | |
| +
 | |
| +	// x0 += x5, x15 = rotl32(x15 ^ x0, 8)
 | |
| +	// x1 += x6, x12 = rotl32(x12 ^ x1, 8)
 | |
| +	// x2 += x7, x13 = rotl32(x13 ^ x2, 8)
 | |
| +	// x3 += x4, x14 = rotl32(x14 ^ x3, 8)
 | |
| +	add		v0.4s, v0.4s, v5.4s
 | |
| +	add		v1.4s, v1.4s, v6.4s
 | |
| +	add		v2.4s, v2.4s, v7.4s
 | |
| +	add		v3.4s, v3.4s, v4.4s
 | |
| +
 | |
| +	eor		v15.16b, v15.16b, v0.16b
 | |
| +	eor		v12.16b, v12.16b, v1.16b
 | |
| +	eor		v13.16b, v13.16b, v2.16b
 | |
| +	eor		v14.16b, v14.16b, v3.16b
 | |
| +
 | |
| +	tbl		v15.16b, {v15.16b}, v31.16b
 | |
| +	tbl		v12.16b, {v12.16b}, v31.16b
 | |
| +	tbl		v13.16b, {v13.16b}, v31.16b
 | |
| +	tbl		v14.16b, {v14.16b}, v31.16b
 | |
| +
 | |
| +	// x10 += x15, x5 = rotl32(x5 ^ x10, 7)
 | |
| +	// x11 += x12, x6 = rotl32(x6 ^ x11, 7)
 | |
| +	// x8 += x13, x7 = rotl32(x7 ^ x8, 7)
 | |
| +	// x9 += x14, x4 = rotl32(x4 ^ x9, 7)
 | |
| +	add		v10.4s, v10.4s, v15.4s
 | |
| +	add		v11.4s, v11.4s, v12.4s
 | |
| +	add		v8.4s, v8.4s, v13.4s
 | |
| +	add		v9.4s, v9.4s, v14.4s
 | |
| +
 | |
| +	eor		v16.16b, v5.16b, v10.16b
 | |
| +	eor		v17.16b, v6.16b, v11.16b
 | |
| +	eor		v18.16b, v7.16b, v8.16b
 | |
| +	eor		v19.16b, v4.16b, v9.16b
 | |
| +
 | |
| +	shl		v5.4s, v16.4s, #7
 | |
| +	shl		v6.4s, v17.4s, #7
 | |
| +	shl		v7.4s, v18.4s, #7
 | |
| +	shl		v4.4s, v19.4s, #7
 | |
| +
 | |
| +	sri		v5.4s, v16.4s, #25
 | |
| +	sri		v6.4s, v17.4s, #25
 | |
| +	sri		v7.4s, v18.4s, #25
 | |
| +	sri		v4.4s, v19.4s, #25
 | |
| +
 | |
| +	subs		x3, x3, #1
 | |
| +	b.ne		.Ldoubleround4
 | |
| +
 | |
| +	ld4r		{v16.4s-v19.4s}, [x0], #16
 | |
| +	ld4r		{v20.4s-v23.4s}, [x0], #16
 | |
| +
 | |
| +	// x12 += counter values 0-3
 | |
| +	add		v12.4s, v12.4s, v30.4s
 | |
| +
 | |
| +	// x0[0-3] += s0[0]
 | |
| +	// x1[0-3] += s0[1]
 | |
| +	// x2[0-3] += s0[2]
 | |
| +	// x3[0-3] += s0[3]
 | |
| +	add		v0.4s, v0.4s, v16.4s
 | |
| +	add		v1.4s, v1.4s, v17.4s
 | |
| +	add		v2.4s, v2.4s, v18.4s
 | |
| +	add		v3.4s, v3.4s, v19.4s
 | |
| +
 | |
| +	ld4r		{v24.4s-v27.4s}, [x0], #16
 | |
| +	ld4r		{v28.4s-v31.4s}, [x0]
 | |
| +
 | |
| +	// x4[0-3] += s1[0]
 | |
| +	// x5[0-3] += s1[1]
 | |
| +	// x6[0-3] += s1[2]
 | |
| +	// x7[0-3] += s1[3]
 | |
| +	add		v4.4s, v4.4s, v20.4s
 | |
| +	add		v5.4s, v5.4s, v21.4s
 | |
| +	add		v6.4s, v6.4s, v22.4s
 | |
| +	add		v7.4s, v7.4s, v23.4s
 | |
| +
 | |
| +	// x8[0-3] += s2[0]
 | |
| +	// x9[0-3] += s2[1]
 | |
| +	// x10[0-3] += s2[2]
 | |
| +	// x11[0-3] += s2[3]
 | |
| +	add		v8.4s, v8.4s, v24.4s
 | |
| +	add		v9.4s, v9.4s, v25.4s
 | |
| +	add		v10.4s, v10.4s, v26.4s
 | |
| +	add		v11.4s, v11.4s, v27.4s
 | |
| +
 | |
| +	// x12[0-3] += s3[0]
 | |
| +	// x13[0-3] += s3[1]
 | |
| +	// x14[0-3] += s3[2]
 | |
| +	// x15[0-3] += s3[3]
 | |
| +	add		v12.4s, v12.4s, v28.4s
 | |
| +	add		v13.4s, v13.4s, v29.4s
 | |
| +	add		v14.4s, v14.4s, v30.4s
 | |
| +	add		v15.4s, v15.4s, v31.4s
 | |
| +
 | |
| +	// interleave 32-bit words in state n, n+1
 | |
| +	zip1		v16.4s, v0.4s, v1.4s
 | |
| +	zip2		v17.4s, v0.4s, v1.4s
 | |
| +	zip1		v18.4s, v2.4s, v3.4s
 | |
| +	zip2		v19.4s, v2.4s, v3.4s
 | |
| +	zip1		v20.4s, v4.4s, v5.4s
 | |
| +	zip2		v21.4s, v4.4s, v5.4s
 | |
| +	zip1		v22.4s, v6.4s, v7.4s
 | |
| +	zip2		v23.4s, v6.4s, v7.4s
 | |
| +	zip1		v24.4s, v8.4s, v9.4s
 | |
| +	zip2		v25.4s, v8.4s, v9.4s
 | |
| +	zip1		v26.4s, v10.4s, v11.4s
 | |
| +	zip2		v27.4s, v10.4s, v11.4s
 | |
| +	zip1		v28.4s, v12.4s, v13.4s
 | |
| +	zip2		v29.4s, v12.4s, v13.4s
 | |
| +	zip1		v30.4s, v14.4s, v15.4s
 | |
| +	zip2		v31.4s, v14.4s, v15.4s
 | |
| +
 | |
| +	// interleave 64-bit words in state n, n+2
 | |
| +	zip1		v0.2d, v16.2d, v18.2d
 | |
| +	zip2		v4.2d, v16.2d, v18.2d
 | |
| +	zip1		v8.2d, v17.2d, v19.2d
 | |
| +	zip2		v12.2d, v17.2d, v19.2d
 | |
| +	ld1		{v16.16b-v19.16b}, [x2], #64
 | |
| +
 | |
| +	zip1		v1.2d, v20.2d, v22.2d
 | |
| +	zip2		v5.2d, v20.2d, v22.2d
 | |
| +	zip1		v9.2d, v21.2d, v23.2d
 | |
| +	zip2		v13.2d, v21.2d, v23.2d
 | |
| +	ld1		{v20.16b-v23.16b}, [x2], #64
 | |
| +
 | |
| +	zip1		v2.2d, v24.2d, v26.2d
 | |
| +	zip2		v6.2d, v24.2d, v26.2d
 | |
| +	zip1		v10.2d, v25.2d, v27.2d
 | |
| +	zip2		v14.2d, v25.2d, v27.2d
 | |
| +	ld1		{v24.16b-v27.16b}, [x2], #64
 | |
| +
 | |
| +	zip1		v3.2d, v28.2d, v30.2d
 | |
| +	zip2		v7.2d, v28.2d, v30.2d
 | |
| +	zip1		v11.2d, v29.2d, v31.2d
 | |
| +	zip2		v15.2d, v29.2d, v31.2d
 | |
| +	ld1		{v28.16b-v31.16b}, [x2]
 | |
| +
 | |
| +	// xor with corresponding input, write to output
 | |
| +	eor		v16.16b, v16.16b, v0.16b
 | |
| +	eor		v17.16b, v17.16b, v1.16b
 | |
| +	eor		v18.16b, v18.16b, v2.16b
 | |
| +	eor		v19.16b, v19.16b, v3.16b
 | |
| +	eor		v20.16b, v20.16b, v4.16b
 | |
| +	eor		v21.16b, v21.16b, v5.16b
 | |
| +	st1		{v16.16b-v19.16b}, [x1], #64
 | |
| +	eor		v22.16b, v22.16b, v6.16b
 | |
| +	eor		v23.16b, v23.16b, v7.16b
 | |
| +	eor		v24.16b, v24.16b, v8.16b
 | |
| +	eor		v25.16b, v25.16b, v9.16b
 | |
| +	st1		{v20.16b-v23.16b}, [x1], #64
 | |
| +	eor		v26.16b, v26.16b, v10.16b
 | |
| +	eor		v27.16b, v27.16b, v11.16b
 | |
| +	eor		v28.16b, v28.16b, v12.16b
 | |
| +	st1		{v24.16b-v27.16b}, [x1], #64
 | |
| +	eor		v29.16b, v29.16b, v13.16b
 | |
| +	eor		v30.16b, v30.16b, v14.16b
 | |
| +	eor		v31.16b, v31.16b, v15.16b
 | |
| +	st1		{v28.16b-v31.16b}, [x1]
 | |
| +
 | |
| +	ret
 | |
| +ENDPROC(chacha20_asm_4block_xor_neon)
 | |
| +
 | |
| +CTRINC:	.word		0, 1, 2, 3
 | |
| +ROT8:	.word		0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/crypto/chacha20-neon-arm.S	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,523 @@
 | |
| +/*
 | |
| + * ChaCha20 256-bit cipher algorithm, RFC7539, ARM NEON functions
 | |
| + *
 | |
| + * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
 | |
| + *
 | |
| + * This program is free software; you can redistribute it and/or modify
 | |
| + * it under the terms of the GNU General Public License version 2 as
 | |
| + * published by the Free Software Foundation.
 | |
| + *
 | |
| + * Based on:
 | |
| + * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSE3 functions
 | |
| + *
 | |
| + * Copyright (C) 2015 Martin Willi
 | |
| + *
 | |
| + * This program is free software; you can redistribute it and/or modify
 | |
| + * it under the terms of the GNU General Public License as published by
 | |
| + * the Free Software Foundation; either version 2 of the License, or
 | |
| + * (at your option) any later version.
 | |
| + */
 | |
| +
 | |
| +#include <linux/linkage.h>
 | |
| +
 | |
| +	.text
 | |
| +	.fpu		neon
 | |
| +	.align		5
 | |
| +
 | |
| +ENTRY(chacha20_asm_block_xor_neon)
 | |
| +	// r0: Input state matrix, s
 | |
| +	// r1: 1 data block output, o
 | |
| +	// r2: 1 data block input, i
 | |
| +
 | |
| +	//
 | |
| +	// This function encrypts one ChaCha20 block by loading the state matrix
 | |
| +	// in four NEON registers. It performs matrix operation on four words in
 | |
| +	// parallel, but requireds shuffling to rearrange the words after each
 | |
| +	// round.
 | |
| +	//
 | |
| +
 | |
| +	// x0..3 = s0..3
 | |
| +	add		ip, r0, #0x20
 | |
| +	vld1.32		{q0-q1}, [r0]
 | |
| +	vld1.32		{q2-q3}, [ip]
 | |
| +
 | |
| +	vmov		q8, q0
 | |
| +	vmov		q9, q1
 | |
| +	vmov		q10, q2
 | |
| +	vmov		q11, q3
 | |
| +
 | |
| +	mov		r3, #10
 | |
| +
 | |
| +.Ldoubleround:
 | |
| +	// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
 | |
| +	vadd.i32	q0, q0, q1
 | |
| +	veor		q4, q3, q0
 | |
| +	vshl.u32	q3, q4, #16
 | |
| +	vsri.u32	q3, q4, #16
 | |
| +
 | |
| +	// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
 | |
| +	vadd.i32	q2, q2, q3
 | |
| +	veor		q4, q1, q2
 | |
| +	vshl.u32	q1, q4, #12
 | |
| +	vsri.u32	q1, q4, #20
 | |
| +
 | |
| +	// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
 | |
| +	vadd.i32	q0, q0, q1
 | |
| +	veor		q4, q3, q0
 | |
| +	vshl.u32	q3, q4, #8
 | |
| +	vsri.u32	q3, q4, #24
 | |
| +
 | |
| +	// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
 | |
| +	vadd.i32	q2, q2, q3
 | |
| +	veor		q4, q1, q2
 | |
| +	vshl.u32	q1, q4, #7
 | |
| +	vsri.u32	q1, q4, #25
 | |
| +
 | |
| +	// x1 = shuffle32(x1, MASK(0, 3, 2, 1))
 | |
| +	vext.8		q1, q1, q1, #4
 | |
| +	// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
 | |
| +	vext.8		q2, q2, q2, #8
 | |
| +	// x3 = shuffle32(x3, MASK(2, 1, 0, 3))
 | |
| +	vext.8		q3, q3, q3, #12
 | |
| +
 | |
| +	// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
 | |
| +	vadd.i32	q0, q0, q1
 | |
| +	veor		q4, q3, q0
 | |
| +	vshl.u32	q3, q4, #16
 | |
| +	vsri.u32	q3, q4, #16
 | |
| +
 | |
| +	// x2 += x3, x1 = rotl32(x1 ^ x2, 12)
 | |
| +	vadd.i32	q2, q2, q3
 | |
| +	veor		q4, q1, q2
 | |
| +	vshl.u32	q1, q4, #12
 | |
| +	vsri.u32	q1, q4, #20
 | |
| +
 | |
| +	// x0 += x1, x3 = rotl32(x3 ^ x0, 8)
 | |
| +	vadd.i32	q0, q0, q1
 | |
| +	veor		q4, q3, q0
 | |
| +	vshl.u32	q3, q4, #8
 | |
| +	vsri.u32	q3, q4, #24
 | |
| +
 | |
| +	// x2 += x3, x1 = rotl32(x1 ^ x2, 7)
 | |
| +	vadd.i32	q2, q2, q3
 | |
| +	veor		q4, q1, q2
 | |
| +	vshl.u32	q1, q4, #7
 | |
| +	vsri.u32	q1, q4, #25
 | |
| +
 | |
| +	// x1 = shuffle32(x1, MASK(2, 1, 0, 3))
 | |
| +	vext.8		q1, q1, q1, #12
 | |
| +	// x2 = shuffle32(x2, MASK(1, 0, 3, 2))
 | |
| +	vext.8		q2, q2, q2, #8
 | |
| +	// x3 = shuffle32(x3, MASK(0, 3, 2, 1))
 | |
| +	vext.8		q3, q3, q3, #4
 | |
| +
 | |
| +	subs		r3, r3, #1
 | |
| +	bne		.Ldoubleround
 | |
| +
 | |
| +	add		ip, r2, #0x20
 | |
| +	vld1.8		{q4-q5}, [r2]
 | |
| +	vld1.8		{q6-q7}, [ip]
 | |
| +
 | |
| +	// o0 = i0 ^ (x0 + s0)
 | |
| +	vadd.i32	q0, q0, q8
 | |
| +	veor		q0, q0, q4
 | |
| +
 | |
| +	// o1 = i1 ^ (x1 + s1)
 | |
| +	vadd.i32	q1, q1, q9
 | |
| +	veor		q1, q1, q5
 | |
| +
 | |
| +	// o2 = i2 ^ (x2 + s2)
 | |
| +	vadd.i32	q2, q2, q10
 | |
| +	veor		q2, q2, q6
 | |
| +
 | |
| +	// o3 = i3 ^ (x3 + s3)
 | |
| +	vadd.i32	q3, q3, q11
 | |
| +	veor		q3, q3, q7
 | |
| +
 | |
| +	add		ip, r1, #0x20
 | |
| +	vst1.8		{q0-q1}, [r1]
 | |
| +	vst1.8		{q2-q3}, [ip]
 | |
| +
 | |
| +	bx		lr
 | |
| +ENDPROC(chacha20_asm_block_xor_neon)
 | |
| +
 | |
| +	.align		5
 | |
| +ENTRY(chacha20_asm_4block_xor_neon)
 | |
| +	push		{r4-r6, lr}
 | |
| +	mov		ip, sp			// preserve the stack pointer
 | |
| +	sub		r3, sp, #0x20		// allocate a 32 byte buffer
 | |
| +	bic		r3, r3, #0x1f		// aligned to 32 bytes
 | |
| +	mov		sp, r3
 | |
| +
 | |
| +	// r0: Input state matrix, s
 | |
| +	// r1: 4 data blocks output, o
 | |
| +	// r2: 4 data blocks input, i
 | |
| +
 | |
| +	//
 | |
| +	// This function encrypts four consecutive ChaCha20 blocks by loading
 | |
| +	// the state matrix in NEON registers four times. The algorithm performs
 | |
| +	// each operation on the corresponding word of each state matrix, hence
 | |
| +	// requires no word shuffling. For final XORing step we transpose the
 | |
| +	// matrix by interleaving 32- and then 64-bit words, which allows us to
 | |
| +	// do XOR in NEON registers.
 | |
| +	//
 | |
| +
 | |
| +	// x0..15[0-3] = s0..3[0..3]
 | |
| +	add		r3, r0, #0x20
 | |
| +	vld1.32		{q0-q1}, [r0]
 | |
| +	vld1.32		{q2-q3}, [r3]
 | |
| +
 | |
| +	adr		r3, CTRINC
 | |
| +	vdup.32		q15, d7[1]
 | |
| +	vdup.32		q14, d7[0]
 | |
| +	vld1.32		{q11}, [r3, :128]
 | |
| +	vdup.32		q13, d6[1]
 | |
| +	vdup.32		q12, d6[0]
 | |
| +	vadd.i32	q12, q12, q11		// x12 += counter values 0-3
 | |
| +	vdup.32		q11, d5[1]
 | |
| +	vdup.32		q10, d5[0]
 | |
| +	vdup.32		q9, d4[1]
 | |
| +	vdup.32		q8, d4[0]
 | |
| +	vdup.32		q7, d3[1]
 | |
| +	vdup.32		q6, d3[0]
 | |
| +	vdup.32		q5, d2[1]
 | |
| +	vdup.32		q4, d2[0]
 | |
| +	vdup.32		q3, d1[1]
 | |
| +	vdup.32		q2, d1[0]
 | |
| +	vdup.32		q1, d0[1]
 | |
| +	vdup.32		q0, d0[0]
 | |
| +
 | |
| +	mov		r3, #10
 | |
| +
 | |
| +.Ldoubleround4:
 | |
| +	// x0 += x4, x12 = rotl32(x12 ^ x0, 16)
 | |
| +	// x1 += x5, x13 = rotl32(x13 ^ x1, 16)
 | |
| +	// x2 += x6, x14 = rotl32(x14 ^ x2, 16)
 | |
| +	// x3 += x7, x15 = rotl32(x15 ^ x3, 16)
 | |
| +	vadd.i32	q0, q0, q4
 | |
| +	vadd.i32	q1, q1, q5
 | |
| +	vadd.i32	q2, q2, q6
 | |
| +	vadd.i32	q3, q3, q7
 | |
| +
 | |
| +	veor		q12, q12, q0
 | |
| +	veor		q13, q13, q1
 | |
| +	veor		q14, q14, q2
 | |
| +	veor		q15, q15, q3
 | |
| +
 | |
| +	vrev32.16	q12, q12
 | |
| +	vrev32.16	q13, q13
 | |
| +	vrev32.16	q14, q14
 | |
| +	vrev32.16	q15, q15
 | |
| +
 | |
| +	// x8 += x12, x4 = rotl32(x4 ^ x8, 12)
 | |
| +	// x9 += x13, x5 = rotl32(x5 ^ x9, 12)
 | |
| +	// x10 += x14, x6 = rotl32(x6 ^ x10, 12)
 | |
| +	// x11 += x15, x7 = rotl32(x7 ^ x11, 12)
 | |
| +	vadd.i32	q8, q8, q12
 | |
| +	vadd.i32	q9, q9, q13
 | |
| +	vadd.i32	q10, q10, q14
 | |
| +	vadd.i32	q11, q11, q15
 | |
| +
 | |
| +	vst1.32		{q8-q9}, [sp, :256]
 | |
| +
 | |
| +	veor		q8, q4, q8
 | |
| +	veor		q9, q5, q9
 | |
| +	vshl.u32	q4, q8, #12
 | |
| +	vshl.u32	q5, q9, #12
 | |
| +	vsri.u32	q4, q8, #20
 | |
| +	vsri.u32	q5, q9, #20
 | |
| +
 | |
| +	veor		q8, q6, q10
 | |
| +	veor		q9, q7, q11
 | |
| +	vshl.u32	q6, q8, #12
 | |
| +	vshl.u32	q7, q9, #12
 | |
| +	vsri.u32	q6, q8, #20
 | |
| +	vsri.u32	q7, q9, #20
 | |
| +
 | |
| +	// x0 += x4, x12 = rotl32(x12 ^ x0, 8)
 | |
| +	// x1 += x5, x13 = rotl32(x13 ^ x1, 8)
 | |
| +	// x2 += x6, x14 = rotl32(x14 ^ x2, 8)
 | |
| +	// x3 += x7, x15 = rotl32(x15 ^ x3, 8)
 | |
| +	vadd.i32	q0, q0, q4
 | |
| +	vadd.i32	q1, q1, q5
 | |
| +	vadd.i32	q2, q2, q6
 | |
| +	vadd.i32	q3, q3, q7
 | |
| +
 | |
| +	veor		q8, q12, q0
 | |
| +	veor		q9, q13, q1
 | |
| +	vshl.u32	q12, q8, #8
 | |
| +	vshl.u32	q13, q9, #8
 | |
| +	vsri.u32	q12, q8, #24
 | |
| +	vsri.u32	q13, q9, #24
 | |
| +
 | |
| +	veor		q8, q14, q2
 | |
| +	veor		q9, q15, q3
 | |
| +	vshl.u32	q14, q8, #8
 | |
| +	vshl.u32	q15, q9, #8
 | |
| +	vsri.u32	q14, q8, #24
 | |
| +	vsri.u32	q15, q9, #24
 | |
| +
 | |
| +	vld1.32		{q8-q9}, [sp, :256]
 | |
| +
 | |
| +	// x8 += x12, x4 = rotl32(x4 ^ x8, 7)
 | |
| +	// x9 += x13, x5 = rotl32(x5 ^ x9, 7)
 | |
| +	// x10 += x14, x6 = rotl32(x6 ^ x10, 7)
 | |
| +	// x11 += x15, x7 = rotl32(x7 ^ x11, 7)
 | |
| +	vadd.i32	q8, q8, q12
 | |
| +	vadd.i32	q9, q9, q13
 | |
| +	vadd.i32	q10, q10, q14
 | |
| +	vadd.i32	q11, q11, q15
 | |
| +
 | |
| +	vst1.32		{q8-q9}, [sp, :256]
 | |
| +
 | |
| +	veor		q8, q4, q8
 | |
| +	veor		q9, q5, q9
 | |
| +	vshl.u32	q4, q8, #7
 | |
| +	vshl.u32	q5, q9, #7
 | |
| +	vsri.u32	q4, q8, #25
 | |
| +	vsri.u32	q5, q9, #25
 | |
| +
 | |
| +	veor		q8, q6, q10
 | |
| +	veor		q9, q7, q11
 | |
| +	vshl.u32	q6, q8, #7
 | |
| +	vshl.u32	q7, q9, #7
 | |
| +	vsri.u32	q6, q8, #25
 | |
| +	vsri.u32	q7, q9, #25
 | |
| +
 | |
| +	vld1.32		{q8-q9}, [sp, :256]
 | |
| +
 | |
| +	// x0 += x5, x15 = rotl32(x15 ^ x0, 16)
 | |
| +	// x1 += x6, x12 = rotl32(x12 ^ x1, 16)
 | |
| +	// x2 += x7, x13 = rotl32(x13 ^ x2, 16)
 | |
| +	// x3 += x4, x14 = rotl32(x14 ^ x3, 16)
 | |
| +	vadd.i32	q0, q0, q5
 | |
| +	vadd.i32	q1, q1, q6
 | |
| +	vadd.i32	q2, q2, q7
 | |
| +	vadd.i32	q3, q3, q4
 | |
| +
 | |
| +	veor		q15, q15, q0
 | |
| +	veor		q12, q12, q1
 | |
| +	veor		q13, q13, q2
 | |
| +	veor		q14, q14, q3
 | |
| +
 | |
| +	vrev32.16	q15, q15
 | |
| +	vrev32.16	q12, q12
 | |
| +	vrev32.16	q13, q13
 | |
| +	vrev32.16	q14, q14
 | |
| +
 | |
| +	// x10 += x15, x5 = rotl32(x5 ^ x10, 12)
 | |
| +	// x11 += x12, x6 = rotl32(x6 ^ x11, 12)
 | |
| +	// x8 += x13, x7 = rotl32(x7 ^ x8, 12)
 | |
| +	// x9 += x14, x4 = rotl32(x4 ^ x9, 12)
 | |
| +	vadd.i32	q10, q10, q15
 | |
| +	vadd.i32	q11, q11, q12
 | |
| +	vadd.i32	q8, q8, q13
 | |
| +	vadd.i32	q9, q9, q14
 | |
| +
 | |
| +	vst1.32		{q8-q9}, [sp, :256]
 | |
| +
 | |
| +	veor		q8, q7, q8
 | |
| +	veor		q9, q4, q9
 | |
| +	vshl.u32	q7, q8, #12
 | |
| +	vshl.u32	q4, q9, #12
 | |
| +	vsri.u32	q7, q8, #20
 | |
| +	vsri.u32	q4, q9, #20
 | |
| +
 | |
| +	veor		q8, q5, q10
 | |
| +	veor		q9, q6, q11
 | |
| +	vshl.u32	q5, q8, #12
 | |
| +	vshl.u32	q6, q9, #12
 | |
| +	vsri.u32	q5, q8, #20
 | |
| +	vsri.u32	q6, q9, #20
 | |
| +
 | |
| +	// x0 += x5, x15 = rotl32(x15 ^ x0, 8)
 | |
| +	// x1 += x6, x12 = rotl32(x12 ^ x1, 8)
 | |
| +	// x2 += x7, x13 = rotl32(x13 ^ x2, 8)
 | |
| +	// x3 += x4, x14 = rotl32(x14 ^ x3, 8)
 | |
| +	vadd.i32	q0, q0, q5
 | |
| +	vadd.i32	q1, q1, q6
 | |
| +	vadd.i32	q2, q2, q7
 | |
| +	vadd.i32	q3, q3, q4
 | |
| +
 | |
| +	veor		q8, q15, q0
 | |
| +	veor		q9, q12, q1
 | |
| +	vshl.u32	q15, q8, #8
 | |
| +	vshl.u32	q12, q9, #8
 | |
| +	vsri.u32	q15, q8, #24
 | |
| +	vsri.u32	q12, q9, #24
 | |
| +
 | |
| +	veor		q8, q13, q2
 | |
| +	veor		q9, q14, q3
 | |
| +	vshl.u32	q13, q8, #8
 | |
| +	vshl.u32	q14, q9, #8
 | |
| +	vsri.u32	q13, q8, #24
 | |
| +	vsri.u32	q14, q9, #24
 | |
| +
 | |
| +	vld1.32		{q8-q9}, [sp, :256]
 | |
| +
 | |
| +	// x10 += x15, x5 = rotl32(x5 ^ x10, 7)
 | |
| +	// x11 += x12, x6 = rotl32(x6 ^ x11, 7)
 | |
| +	// x8 += x13, x7 = rotl32(x7 ^ x8, 7)
 | |
| +	// x9 += x14, x4 = rotl32(x4 ^ x9, 7)
 | |
| +	vadd.i32	q10, q10, q15
 | |
| +	vadd.i32	q11, q11, q12
 | |
| +	vadd.i32	q8, q8, q13
 | |
| +	vadd.i32	q9, q9, q14
 | |
| +
 | |
| +	vst1.32		{q8-q9}, [sp, :256]
 | |
| +
 | |
| +	veor		q8, q7, q8
 | |
| +	veor		q9, q4, q9
 | |
| +	vshl.u32	q7, q8, #7
 | |
| +	vshl.u32	q4, q9, #7
 | |
| +	vsri.u32	q7, q8, #25
 | |
| +	vsri.u32	q4, q9, #25
 | |
| +
 | |
| +	veor		q8, q5, q10
 | |
| +	veor		q9, q6, q11
 | |
| +	vshl.u32	q5, q8, #7
 | |
| +	vshl.u32	q6, q9, #7
 | |
| +	vsri.u32	q5, q8, #25
 | |
| +	vsri.u32	q6, q9, #25
 | |
| +
 | |
| +	subs		r3, r3, #1
 | |
| +	beq		0f
 | |
| +
 | |
| +	vld1.32		{q8-q9}, [sp, :256]
 | |
| +	b		.Ldoubleround4
 | |
| +
 | |
| +	// x0[0-3] += s0[0]
 | |
| +	// x1[0-3] += s0[1]
 | |
| +	// x2[0-3] += s0[2]
 | |
| +	// x3[0-3] += s0[3]
 | |
| +0:	ldmia		r0!, {r3-r6}
 | |
| +	vdup.32		q8, r3
 | |
| +	vdup.32		q9, r4
 | |
| +	vadd.i32	q0, q0, q8
 | |
| +	vadd.i32	q1, q1, q9
 | |
| +	vdup.32		q8, r5
 | |
| +	vdup.32		q9, r6
 | |
| +	vadd.i32	q2, q2, q8
 | |
| +	vadd.i32	q3, q3, q9
 | |
| +
 | |
| +	// x4[0-3] += s1[0]
 | |
| +	// x5[0-3] += s1[1]
 | |
| +	// x6[0-3] += s1[2]
 | |
| +	// x7[0-3] += s1[3]
 | |
| +	ldmia		r0!, {r3-r6}
 | |
| +	vdup.32		q8, r3
 | |
| +	vdup.32		q9, r4
 | |
| +	vadd.i32	q4, q4, q8
 | |
| +	vadd.i32	q5, q5, q9
 | |
| +	vdup.32		q8, r5
 | |
| +	vdup.32		q9, r6
 | |
| +	vadd.i32	q6, q6, q8
 | |
| +	vadd.i32	q7, q7, q9
 | |
| +
 | |
| +	// interleave 32-bit words in state n, n+1
 | |
| +	vzip.32		q0, q1
 | |
| +	vzip.32		q2, q3
 | |
| +	vzip.32		q4, q5
 | |
| +	vzip.32		q6, q7
 | |
| +
 | |
| +	// interleave 64-bit words in state n, n+2
 | |
| +	vswp		d1, d4
 | |
| +	vswp		d3, d6
 | |
| +	vswp		d9, d12
 | |
| +	vswp		d11, d14
 | |
| +
 | |
| +	// xor with corresponding input, write to output
 | |
| +	vld1.8		{q8-q9}, [r2]!
 | |
| +	veor		q8, q8, q0
 | |
| +	veor		q9, q9, q4
 | |
| +	vst1.8		{q8-q9}, [r1]!
 | |
| +
 | |
| +	vld1.32		{q8-q9}, [sp, :256]
 | |
| +
 | |
| +	// x8[0-3] += s2[0]
 | |
| +	// x9[0-3] += s2[1]
 | |
| +	// x10[0-3] += s2[2]
 | |
| +	// x11[0-3] += s2[3]
 | |
| +	ldmia		r0!, {r3-r6}
 | |
| +	vdup.32		q0, r3
 | |
| +	vdup.32		q4, r4
 | |
| +	vadd.i32	q8, q8, q0
 | |
| +	vadd.i32	q9, q9, q4
 | |
| +	vdup.32		q0, r5
 | |
| +	vdup.32		q4, r6
 | |
| +	vadd.i32	q10, q10, q0
 | |
| +	vadd.i32	q11, q11, q4
 | |
| +
 | |
| +	// x12[0-3] += s3[0]
 | |
| +	// x13[0-3] += s3[1]
 | |
| +	// x14[0-3] += s3[2]
 | |
| +	// x15[0-3] += s3[3]
 | |
| +	ldmia		r0!, {r3-r6}
 | |
| +	vdup.32		q0, r3
 | |
| +	vdup.32		q4, r4
 | |
| +	adr		r3, CTRINC
 | |
| +	vadd.i32	q12, q12, q0
 | |
| +	vld1.32		{q0}, [r3, :128]
 | |
| +	vadd.i32	q13, q13, q4
 | |
| +	vadd.i32	q12, q12, q0		// x12 += counter values 0-3
 | |
| +
 | |
| +	vdup.32		q0, r5
 | |
| +	vdup.32		q4, r6
 | |
| +	vadd.i32	q14, q14, q0
 | |
| +	vadd.i32	q15, q15, q4
 | |
| +
 | |
| +	// interleave 32-bit words in state n, n+1
 | |
| +	vzip.32		q8, q9
 | |
| +	vzip.32		q10, q11
 | |
| +	vzip.32		q12, q13
 | |
| +	vzip.32		q14, q15
 | |
| +
 | |
| +	// interleave 64-bit words in state n, n+2
 | |
| +	vswp		d17, d20
 | |
| +	vswp		d19, d22
 | |
| +	vswp		d25, d28
 | |
| +	vswp		d27, d30
 | |
| +
 | |
| +	vmov		q4, q1
 | |
| +
 | |
| +	vld1.8		{q0-q1}, [r2]!
 | |
| +	veor		q0, q0, q8
 | |
| +	veor		q1, q1, q12
 | |
| +	vst1.8		{q0-q1}, [r1]!
 | |
| +
 | |
| +	vld1.8		{q0-q1}, [r2]!
 | |
| +	veor		q0, q0, q2
 | |
| +	veor		q1, q1, q6
 | |
| +	vst1.8		{q0-q1}, [r1]!
 | |
| +
 | |
| +	vld1.8		{q0-q1}, [r2]!
 | |
| +	veor		q0, q0, q10
 | |
| +	veor		q1, q1, q14
 | |
| +	vst1.8		{q0-q1}, [r1]!
 | |
| +
 | |
| +	vld1.8		{q0-q1}, [r2]!
 | |
| +	veor		q0, q0, q4
 | |
| +	veor		q1, q1, q5
 | |
| +	vst1.8		{q0-q1}, [r1]!
 | |
| +
 | |
| +	vld1.8		{q0-q1}, [r2]!
 | |
| +	veor		q0, q0, q9
 | |
| +	veor		q1, q1, q13
 | |
| +	vst1.8		{q0-q1}, [r1]!
 | |
| +
 | |
| +	vld1.8		{q0-q1}, [r2]!
 | |
| +	veor		q0, q0, q3
 | |
| +	veor		q1, q1, q7
 | |
| +	vst1.8		{q0-q1}, [r1]!
 | |
| +
 | |
| +	vld1.8		{q0-q1}, [r2]
 | |
| +	veor		q0, q0, q11
 | |
| +	veor		q1, q1, q15
 | |
| +	vst1.8		{q0-q1}, [r1]
 | |
| +
 | |
| +	mov		sp, ip
 | |
| +	pop		{r4-r6, pc}
 | |
| +ENDPROC(chacha20_asm_4block_xor_neon)
 | |
| +
 | |
| +	.align		4
 | |
| +CTRINC:	.word		0, 1, 2, 3
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/crypto/chacha20-ssse3-x86_64.S	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,734 @@
 | |
| +/*
 | |
| + * ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions
 | |
| + *
 | |
| + * Copyright (C) 2015 Martin Willi
 | |
| + * Copyright (C) 2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
 | |
| + *
 | |
| + * This program is free software; you can redistribute it and/or modify
 | |
| + * it under the terms of the GNU General Public License as published by
 | |
| + * the Free Software Foundation; either version 2 of the License, or
 | |
| + * (at your option) any later version.
 | |
| + */
 | |
| +
 | |
| +#include <linux/linkage.h>
 | |
| +
 | |
| +.section .rodata.cst16.ROT8, "aM", @progbits, 16
 | |
| +.align 16
 | |
| +ROT8:.octa 0x0e0d0c0f0a09080b0605040702010003
 | |
| +.section .rodata.cst16.ROT16, "aM", @progbits, 16
 | |
| +.align 16
 | |
| +ROT16:	.octa 0x0d0c0f0e09080b0a0504070601000302
 | |
| +.section .rodata.cst16.CTRINC, "aM", @progbits, 16
 | |
| +.align 16
 | |
| +CTRINC:	.octa 0x00000003000000020000000100000000
 | |
| +.section .rodata.cst16.CHACONST, "aM", @progbits, 16
 | |
| +.align 16
 | |
| +CONST:	.ascii "expand 32-byte k"
 | |
| +
 | |
| +.text
 | |
| +
 | |
| +ENTRY(chacha20_asm_block_xor_ssse3)
 | |
| +	# %rdi: Input state matrix, s
 | |
| +	# %rsi: 1 data block output, o
 | |
| +	# %rdx: 1 data block input, i
 | |
| +
 | |
| +	# This function encrypts one ChaCha20 block by loading the state matrix
 | |
| +	# in four SSE registers. It performs matrix operation on four words in
 | |
| +	# parallel, but requireds shuffling to rearrange the words after each
 | |
| +	# round. 8/16-bit word rotation is done with the slightly better
 | |
| +	# performing SSSE3 byte shuffling, 7/12-bit word rotation uses
 | |
| +	# traditional shift+OR.
 | |
| +
 | |
| +	# x0..3 = s0..3
 | |
| +	movdqa		0x00(%rdi),%xmm0
 | |
| +	movdqa		0x10(%rdi),%xmm1
 | |
| +	movdqa		0x20(%rdi),%xmm2
 | |
| +	movdqa		0x30(%rdi),%xmm3
 | |
| +	movdqa		%xmm0,%xmm8
 | |
| +	movdqa		%xmm1,%xmm9
 | |
| +	movdqa		%xmm2,%xmm10
 | |
| +	movdqa		%xmm3,%xmm11
 | |
| +
 | |
| +	movdqa		ROT8(%rip),%xmm4
 | |
| +	movdqa		ROT16(%rip),%xmm5
 | |
| +
 | |
| +	mov	$10,%ecx
 | |
| +
 | |
| +.Ldoubleround:
 | |
| +
 | |
| +	# x0 += x1, x3 = rotl32(x3 ^ x0, 16)
 | |
| +	paddd		%xmm1,%xmm0
 | |
| +	pxor		%xmm0,%xmm3
 | |
| +	pshufb		%xmm5,%xmm3
 | |
| +
 | |
| +	# x2 += x3, x1 = rotl32(x1 ^ x2, 12)
 | |
| +	paddd		%xmm3,%xmm2
 | |
| +	pxor		%xmm2,%xmm1
 | |
| +	movdqa		%xmm1,%xmm6
 | |
| +	pslld		$12,%xmm6
 | |
| +	psrld		$20,%xmm1
 | |
| +	por		%xmm6,%xmm1
 | |
| +
 | |
| +	# x0 += x1, x3 = rotl32(x3 ^ x0, 8)
 | |
| +	paddd		%xmm1,%xmm0
 | |
| +	pxor		%xmm0,%xmm3
 | |
| +	pshufb		%xmm4,%xmm3
 | |
| +
 | |
| +	# x2 += x3, x1 = rotl32(x1 ^ x2, 7)
 | |
| +	paddd		%xmm3,%xmm2
 | |
| +	pxor		%xmm2,%xmm1
 | |
| +	movdqa		%xmm1,%xmm7
 | |
| +	pslld		$7,%xmm7
 | |
| +	psrld		$25,%xmm1
 | |
| +	por		%xmm7,%xmm1
 | |
| +
 | |
| +	# x1 = shuffle32(x1, MASK(0, 3, 2, 1))
 | |
| +	pshufd		$0x39,%xmm1,%xmm1
 | |
| +	# x2 = shuffle32(x2, MASK(1, 0, 3, 2))
 | |
| +	pshufd		$0x4e,%xmm2,%xmm2
 | |
| +	# x3 = shuffle32(x3, MASK(2, 1, 0, 3))
 | |
| +	pshufd		$0x93,%xmm3,%xmm3
 | |
| +
 | |
| +	# x0 += x1, x3 = rotl32(x3 ^ x0, 16)
 | |
| +	paddd		%xmm1,%xmm0
 | |
| +	pxor		%xmm0,%xmm3
 | |
| +	pshufb		%xmm5,%xmm3
 | |
| +
 | |
| +	# x2 += x3, x1 = rotl32(x1 ^ x2, 12)
 | |
| +	paddd		%xmm3,%xmm2
 | |
| +	pxor		%xmm2,%xmm1
 | |
| +	movdqa		%xmm1,%xmm6
 | |
| +	pslld		$12,%xmm6
 | |
| +	psrld		$20,%xmm1
 | |
| +	por		%xmm6,%xmm1
 | |
| +
 | |
| +	# x0 += x1, x3 = rotl32(x3 ^ x0, 8)
 | |
| +	paddd		%xmm1,%xmm0
 | |
| +	pxor		%xmm0,%xmm3
 | |
| +	pshufb		%xmm4,%xmm3
 | |
| +
 | |
| +	# x2 += x3, x1 = rotl32(x1 ^ x2, 7)
 | |
| +	paddd		%xmm3,%xmm2
 | |
| +	pxor		%xmm2,%xmm1
 | |
| +	movdqa		%xmm1,%xmm7
 | |
| +	pslld		$7,%xmm7
 | |
| +	psrld		$25,%xmm1
 | |
| +	por		%xmm7,%xmm1
 | |
| +
 | |
| +	# x1 = shuffle32(x1, MASK(2, 1, 0, 3))
 | |
| +	pshufd		$0x93,%xmm1,%xmm1
 | |
| +	# x2 = shuffle32(x2, MASK(1, 0, 3, 2))
 | |
| +	pshufd		$0x4e,%xmm2,%xmm2
 | |
| +	# x3 = shuffle32(x3, MASK(0, 3, 2, 1))
 | |
| +	pshufd		$0x39,%xmm3,%xmm3
 | |
| +
 | |
| +	dec		%ecx
 | |
| +	jnz		.Ldoubleround
 | |
| +
 | |
| +	# o0 = i0 ^ (x0 + s0)
 | |
| +	movdqu		0x00(%rdx),%xmm4
 | |
| +	paddd		%xmm8,%xmm0
 | |
| +	pxor		%xmm4,%xmm0
 | |
| +	movdqu		%xmm0,0x00(%rsi)
 | |
| +	# o1 = i1 ^ (x1 + s1)
 | |
| +	movdqu		0x10(%rdx),%xmm5
 | |
| +	paddd		%xmm9,%xmm1
 | |
| +	pxor		%xmm5,%xmm1
 | |
| +	movdqu		%xmm1,0x10(%rsi)
 | |
| +	# o2 = i2 ^ (x2 + s2)
 | |
| +	movdqu		0x20(%rdx),%xmm6
 | |
| +	paddd		%xmm10,%xmm2
 | |
| +	pxor		%xmm6,%xmm2
 | |
| +	movdqu		%xmm2,0x20(%rsi)
 | |
| +	# o3 = i3 ^ (x3 + s3)
 | |
| +	movdqu		0x30(%rdx),%xmm7
 | |
| +	paddd		%xmm11,%xmm3
 | |
| +	pxor		%xmm7,%xmm3
 | |
| +	movdqu		%xmm3,0x30(%rsi)
 | |
| +
 | |
| +	ret
 | |
| +ENDPROC(chacha20_asm_block_xor_ssse3)
 | |
| +
 | |
| +ENTRY(chacha20_asm_4block_xor_ssse3)
 | |
| +	# %rdi: Input state matrix, s
 | |
| +	# %rsi: 4 data blocks output, o
 | |
| +	# %rdx: 4 data blocks input, i
 | |
| +
 | |
| +	# This function encrypts four consecutive ChaCha20 blocks by loading the
 | |
| +	# the state matrix in SSE registers four times. As we need some scratch
 | |
| +	# registers, we save the first four registers on the stack. The
 | |
| +	# algorithm performs each operation on the corresponding word of each
 | |
| +	# state matrix, hence requires no word shuffling. For final XORing step
 | |
| +	# we transpose the matrix by interleaving 32- and then 64-bit words,
 | |
| +	# which allows us to do XOR in SSE registers. 8/16-bit word rotation is
 | |
| +	# done with the slightly better performing SSSE3 byte shuffling,
 | |
| +	# 7/12-bit word rotation uses traditional shift+OR.
 | |
| +
 | |
| +	mov		%rsp,%r11
 | |
| +	sub		$0x80,%rsp
 | |
| +	and		$~63,%rsp
 | |
| +
 | |
| +	# x0..15[0-3] = s0..3[0..3]
 | |
| +	movq		0x00(%rdi),%xmm1
 | |
| +	pshufd		$0x00,%xmm1,%xmm0
 | |
| +	pshufd		$0x55,%xmm1,%xmm1
 | |
| +	movq		0x08(%rdi),%xmm3
 | |
| +	pshufd		$0x00,%xmm3,%xmm2
 | |
| +	pshufd		$0x55,%xmm3,%xmm3
 | |
| +	movq		0x10(%rdi),%xmm5
 | |
| +	pshufd		$0x00,%xmm5,%xmm4
 | |
| +	pshufd		$0x55,%xmm5,%xmm5
 | |
| +	movq		0x18(%rdi),%xmm7
 | |
| +	pshufd		$0x00,%xmm7,%xmm6
 | |
| +	pshufd		$0x55,%xmm7,%xmm7
 | |
| +	movq		0x20(%rdi),%xmm9
 | |
| +	pshufd		$0x00,%xmm9,%xmm8
 | |
| +	pshufd		$0x55,%xmm9,%xmm9
 | |
| +	movq		0x28(%rdi),%xmm11
 | |
| +	pshufd		$0x00,%xmm11,%xmm10
 | |
| +	pshufd		$0x55,%xmm11,%xmm11
 | |
| +	movq		0x30(%rdi),%xmm13
 | |
| +	pshufd		$0x00,%xmm13,%xmm12
 | |
| +	pshufd		$0x55,%xmm13,%xmm13
 | |
| +	movq		0x38(%rdi),%xmm15
 | |
| +	pshufd		$0x00,%xmm15,%xmm14
 | |
| +	pshufd		$0x55,%xmm15,%xmm15
 | |
| +	# x0..3 on stack
 | |
| +	movdqa		%xmm0,0x00(%rsp)
 | |
| +	movdqa		%xmm1,0x10(%rsp)
 | |
| +	movdqa		%xmm2,0x20(%rsp)
 | |
| +	movdqa		%xmm3,0x30(%rsp)
 | |
| +
 | |
| +	movdqa		CTRINC(%rip),%xmm1
 | |
| +	movdqa		ROT8(%rip),%xmm2
 | |
| +	movdqa		ROT16(%rip),%xmm3
 | |
| +
 | |
| +	# x12 += counter values 0-3
 | |
| +	paddd		%xmm1,%xmm12
 | |
| +
 | |
| +	mov		$10,%ecx
 | |
| +
 | |
| +.Ldoubleround4:
 | |
| +	# x0 += x4, x12 = rotl32(x12 ^ x0, 16)
 | |
| +	movdqa		0x00(%rsp),%xmm0
 | |
| +	paddd		%xmm4,%xmm0
 | |
| +	movdqa		%xmm0,0x00(%rsp)
 | |
| +	pxor		%xmm0,%xmm12
 | |
| +	pshufb		%xmm3,%xmm12
 | |
| +	# x1 += x5, x13 = rotl32(x13 ^ x1, 16)
 | |
| +	movdqa		0x10(%rsp),%xmm0
 | |
| +	paddd		%xmm5,%xmm0
 | |
| +	movdqa		%xmm0,0x10(%rsp)
 | |
| +	pxor		%xmm0,%xmm13
 | |
| +	pshufb		%xmm3,%xmm13
 | |
| +	# x2 += x6, x14 = rotl32(x14 ^ x2, 16)
 | |
| +	movdqa		0x20(%rsp),%xmm0
 | |
| +	paddd		%xmm6,%xmm0
 | |
| +	movdqa		%xmm0,0x20(%rsp)
 | |
| +	pxor		%xmm0,%xmm14
 | |
| +	pshufb		%xmm3,%xmm14
 | |
| +	# x3 += x7, x15 = rotl32(x15 ^ x3, 16)
 | |
| +	movdqa		0x30(%rsp),%xmm0
 | |
| +	paddd		%xmm7,%xmm0
 | |
| +	movdqa		%xmm0,0x30(%rsp)
 | |
| +	pxor		%xmm0,%xmm15
 | |
| +	pshufb		%xmm3,%xmm15
 | |
| +
 | |
| +	# x8 += x12, x4 = rotl32(x4 ^ x8, 12)
 | |
| +	paddd		%xmm12,%xmm8
 | |
| +	pxor		%xmm8,%xmm4
 | |
| +	movdqa		%xmm4,%xmm0
 | |
| +	pslld		$12,%xmm0
 | |
| +	psrld		$20,%xmm4
 | |
| +	por		%xmm0,%xmm4
 | |
| +	# x9 += x13, x5 = rotl32(x5 ^ x9, 12)
 | |
| +	paddd		%xmm13,%xmm9
 | |
| +	pxor		%xmm9,%xmm5
 | |
| +	movdqa		%xmm5,%xmm0
 | |
| +	pslld		$12,%xmm0
 | |
| +	psrld		$20,%xmm5
 | |
| +	por		%xmm0,%xmm5
 | |
| +	# x10 += x14, x6 = rotl32(x6 ^ x10, 12)
 | |
| +	paddd		%xmm14,%xmm10
 | |
| +	pxor		%xmm10,%xmm6
 | |
| +	movdqa		%xmm6,%xmm0
 | |
| +	pslld		$12,%xmm0
 | |
| +	psrld		$20,%xmm6
 | |
| +	por		%xmm0,%xmm6
 | |
| +	# x11 += x15, x7 = rotl32(x7 ^ x11, 12)
 | |
| +	paddd		%xmm15,%xmm11
 | |
| +	pxor		%xmm11,%xmm7
 | |
| +	movdqa		%xmm7,%xmm0
 | |
| +	pslld		$12,%xmm0
 | |
| +	psrld		$20,%xmm7
 | |
| +	por		%xmm0,%xmm7
 | |
| +
 | |
| +	# x0 += x4, x12 = rotl32(x12 ^ x0, 8)
 | |
| +	movdqa		0x00(%rsp),%xmm0
 | |
| +	paddd		%xmm4,%xmm0
 | |
| +	movdqa		%xmm0,0x00(%rsp)
 | |
| +	pxor		%xmm0,%xmm12
 | |
| +	pshufb		%xmm2,%xmm12
 | |
| +	# x1 += x5, x13 = rotl32(x13 ^ x1, 8)
 | |
| +	movdqa		0x10(%rsp),%xmm0
 | |
| +	paddd		%xmm5,%xmm0
 | |
| +	movdqa		%xmm0,0x10(%rsp)
 | |
| +	pxor		%xmm0,%xmm13
 | |
| +	pshufb		%xmm2,%xmm13
 | |
| +	# x2 += x6, x14 = rotl32(x14 ^ x2, 8)
 | |
| +	movdqa		0x20(%rsp),%xmm0
 | |
| +	paddd		%xmm6,%xmm0
 | |
| +	movdqa		%xmm0,0x20(%rsp)
 | |
| +	pxor		%xmm0,%xmm14
 | |
| +	pshufb		%xmm2,%xmm14
 | |
| +	# x3 += x7, x15 = rotl32(x15 ^ x3, 8)
 | |
| +	movdqa		0x30(%rsp),%xmm0
 | |
| +	paddd		%xmm7,%xmm0
 | |
| +	movdqa		%xmm0,0x30(%rsp)
 | |
| +	pxor		%xmm0,%xmm15
 | |
| +	pshufb		%xmm2,%xmm15
 | |
| +
 | |
| +	# x8 += x12, x4 = rotl32(x4 ^ x8, 7)
 | |
| +	paddd		%xmm12,%xmm8
 | |
| +	pxor		%xmm8,%xmm4
 | |
| +	movdqa		%xmm4,%xmm0
 | |
| +	pslld		$7,%xmm0
 | |
| +	psrld		$25,%xmm4
 | |
| +	por		%xmm0,%xmm4
 | |
| +	# x9 += x13, x5 = rotl32(x5 ^ x9, 7)
 | |
| +	paddd		%xmm13,%xmm9
 | |
| +	pxor		%xmm9,%xmm5
 | |
| +	movdqa		%xmm5,%xmm0
 | |
| +	pslld		$7,%xmm0
 | |
| +	psrld		$25,%xmm5
 | |
| +	por		%xmm0,%xmm5
 | |
| +	# x10 += x14, x6 = rotl32(x6 ^ x10, 7)
 | |
| +	paddd		%xmm14,%xmm10
 | |
| +	pxor		%xmm10,%xmm6
 | |
| +	movdqa		%xmm6,%xmm0
 | |
| +	pslld		$7,%xmm0
 | |
| +	psrld		$25,%xmm6
 | |
| +	por		%xmm0,%xmm6
 | |
| +	# x11 += x15, x7 = rotl32(x7 ^ x11, 7)
 | |
| +	paddd		%xmm15,%xmm11
 | |
| +	pxor		%xmm11,%xmm7
 | |
| +	movdqa		%xmm7,%xmm0
 | |
| +	pslld		$7,%xmm0
 | |
| +	psrld		$25,%xmm7
 | |
| +	por		%xmm0,%xmm7
 | |
| +
 | |
| +	# x0 += x5, x15 = rotl32(x15 ^ x0, 16)
 | |
| +	movdqa		0x00(%rsp),%xmm0
 | |
| +	paddd		%xmm5,%xmm0
 | |
| +	movdqa		%xmm0,0x00(%rsp)
 | |
| +	pxor		%xmm0,%xmm15
 | |
| +	pshufb		%xmm3,%xmm15
 | |
| +	# x1 += x6, x12 = rotl32(x12 ^ x1, 16)
 | |
| +	movdqa		0x10(%rsp),%xmm0
 | |
| +	paddd		%xmm6,%xmm0
 | |
| +	movdqa		%xmm0,0x10(%rsp)
 | |
| +	pxor		%xmm0,%xmm12
 | |
| +	pshufb		%xmm3,%xmm12
 | |
| +	# x2 += x7, x13 = rotl32(x13 ^ x2, 16)
 | |
| +	movdqa		0x20(%rsp),%xmm0
 | |
| +	paddd		%xmm7,%xmm0
 | |
| +	movdqa		%xmm0,0x20(%rsp)
 | |
| +	pxor		%xmm0,%xmm13
 | |
| +	pshufb		%xmm3,%xmm13
 | |
| +	# x3 += x4, x14 = rotl32(x14 ^ x3, 16)
 | |
| +	movdqa		0x30(%rsp),%xmm0
 | |
| +	paddd		%xmm4,%xmm0
 | |
| +	movdqa		%xmm0,0x30(%rsp)
 | |
| +	pxor		%xmm0,%xmm14
 | |
| +	pshufb		%xmm3,%xmm14
 | |
| +
 | |
| +	# x10 += x15, x5 = rotl32(x5 ^ x10, 12)
 | |
| +	paddd		%xmm15,%xmm10
 | |
| +	pxor		%xmm10,%xmm5
 | |
| +	movdqa		%xmm5,%xmm0
 | |
| +	pslld		$12,%xmm0
 | |
| +	psrld		$20,%xmm5
 | |
| +	por		%xmm0,%xmm5
 | |
| +	# x11 += x12, x6 = rotl32(x6 ^ x11, 12)
 | |
| +	paddd		%xmm12,%xmm11
 | |
| +	pxor		%xmm11,%xmm6
 | |
| +	movdqa		%xmm6,%xmm0
 | |
| +	pslld		$12,%xmm0
 | |
| +	psrld		$20,%xmm6
 | |
| +	por		%xmm0,%xmm6
 | |
| +	# x8 += x13, x7 = rotl32(x7 ^ x8, 12)
 | |
| +	paddd		%xmm13,%xmm8
 | |
| +	pxor		%xmm8,%xmm7
 | |
| +	movdqa		%xmm7,%xmm0
 | |
| +	pslld		$12,%xmm0
 | |
| +	psrld		$20,%xmm7
 | |
| +	por		%xmm0,%xmm7
 | |
| +	# x9 += x14, x4 = rotl32(x4 ^ x9, 12)
 | |
| +	paddd		%xmm14,%xmm9
 | |
| +	pxor		%xmm9,%xmm4
 | |
| +	movdqa		%xmm4,%xmm0
 | |
| +	pslld		$12,%xmm0
 | |
| +	psrld		$20,%xmm4
 | |
| +	por		%xmm0,%xmm4
 | |
| +
 | |
| +	# x0 += x5, x15 = rotl32(x15 ^ x0, 8)
 | |
| +	movdqa		0x00(%rsp),%xmm0
 | |
| +	paddd		%xmm5,%xmm0
 | |
| +	movdqa		%xmm0,0x00(%rsp)
 | |
| +	pxor		%xmm0,%xmm15
 | |
| +	pshufb		%xmm2,%xmm15
 | |
| +	# x1 += x6, x12 = rotl32(x12 ^ x1, 8)
 | |
| +	movdqa		0x10(%rsp),%xmm0
 | |
| +	paddd		%xmm6,%xmm0
 | |
| +	movdqa		%xmm0,0x10(%rsp)
 | |
| +	pxor		%xmm0,%xmm12
 | |
| +	pshufb		%xmm2,%xmm12
 | |
| +	# x2 += x7, x13 = rotl32(x13 ^ x2, 8)
 | |
| +	movdqa		0x20(%rsp),%xmm0
 | |
| +	paddd		%xmm7,%xmm0
 | |
| +	movdqa		%xmm0,0x20(%rsp)
 | |
| +	pxor		%xmm0,%xmm13
 | |
| +	pshufb		%xmm2,%xmm13
 | |
| +	# x3 += x4, x14 = rotl32(x14 ^ x3, 8)
 | |
| +	movdqa		0x30(%rsp),%xmm0
 | |
| +	paddd		%xmm4,%xmm0
 | |
| +	movdqa		%xmm0,0x30(%rsp)
 | |
| +	pxor		%xmm0,%xmm14
 | |
| +	pshufb		%xmm2,%xmm14
 | |
| +
 | |
| +	# x10 += x15, x5 = rotl32(x5 ^ x10, 7)
 | |
| +	paddd		%xmm15,%xmm10
 | |
| +	pxor		%xmm10,%xmm5
 | |
| +	movdqa		%xmm5,%xmm0
 | |
| +	pslld		$7,%xmm0
 | |
| +	psrld		$25,%xmm5
 | |
| +	por		%xmm0,%xmm5
 | |
| +	# x11 += x12, x6 = rotl32(x6 ^ x11, 7)
 | |
| +	paddd		%xmm12,%xmm11
 | |
| +	pxor		%xmm11,%xmm6
 | |
| +	movdqa		%xmm6,%xmm0
 | |
| +	pslld		$7,%xmm0
 | |
| +	psrld		$25,%xmm6
 | |
| +	por		%xmm0,%xmm6
 | |
| +	# x8 += x13, x7 = rotl32(x7 ^ x8, 7)
 | |
| +	paddd		%xmm13,%xmm8
 | |
| +	pxor		%xmm8,%xmm7
 | |
| +	movdqa		%xmm7,%xmm0
 | |
| +	pslld		$7,%xmm0
 | |
| +	psrld		$25,%xmm7
 | |
| +	por		%xmm0,%xmm7
 | |
| +	# x9 += x14, x4 = rotl32(x4 ^ x9, 7)
 | |
| +	paddd		%xmm14,%xmm9
 | |
| +	pxor		%xmm9,%xmm4
 | |
| +	movdqa		%xmm4,%xmm0
 | |
| +	pslld		$7,%xmm0
 | |
| +	psrld		$25,%xmm4
 | |
| +	por		%xmm0,%xmm4
 | |
| +
 | |
| +	dec		%ecx
 | |
| +	jnz		.Ldoubleround4
 | |
| +
 | |
| +	# x0[0-3] += s0[0]
 | |
| +	# x1[0-3] += s0[1]
 | |
| +	movq		0x00(%rdi),%xmm3
 | |
| +	pshufd		$0x00,%xmm3,%xmm2
 | |
| +	pshufd		$0x55,%xmm3,%xmm3
 | |
| +	paddd		0x00(%rsp),%xmm2
 | |
| +	movdqa		%xmm2,0x00(%rsp)
 | |
| +	paddd		0x10(%rsp),%xmm3
 | |
| +	movdqa		%xmm3,0x10(%rsp)
 | |
| +	# x2[0-3] += s0[2]
 | |
| +	# x3[0-3] += s0[3]
 | |
| +	movq		0x08(%rdi),%xmm3
 | |
| +	pshufd		$0x00,%xmm3,%xmm2
 | |
| +	pshufd		$0x55,%xmm3,%xmm3
 | |
| +	paddd		0x20(%rsp),%xmm2
 | |
| +	movdqa		%xmm2,0x20(%rsp)
 | |
| +	paddd		0x30(%rsp),%xmm3
 | |
| +	movdqa		%xmm3,0x30(%rsp)
 | |
| +
 | |
| +	# x4[0-3] += s1[0]
 | |
| +	# x5[0-3] += s1[1]
 | |
| +	movq		0x10(%rdi),%xmm3
 | |
| +	pshufd		$0x00,%xmm3,%xmm2
 | |
| +	pshufd		$0x55,%xmm3,%xmm3
 | |
| +	paddd		%xmm2,%xmm4
 | |
| +	paddd		%xmm3,%xmm5
 | |
| +	# x6[0-3] += s1[2]
 | |
| +	# x7[0-3] += s1[3]
 | |
| +	movq		0x18(%rdi),%xmm3
 | |
| +	pshufd		$0x00,%xmm3,%xmm2
 | |
| +	pshufd		$0x55,%xmm3,%xmm3
 | |
| +	paddd		%xmm2,%xmm6
 | |
| +	paddd		%xmm3,%xmm7
 | |
| +
 | |
| +	# x8[0-3] += s2[0]
 | |
| +	# x9[0-3] += s2[1]
 | |
| +	movq		0x20(%rdi),%xmm3
 | |
| +	pshufd		$0x00,%xmm3,%xmm2
 | |
| +	pshufd		$0x55,%xmm3,%xmm3
 | |
| +	paddd		%xmm2,%xmm8
 | |
| +	paddd		%xmm3,%xmm9
 | |
| +	# x10[0-3] += s2[2]
 | |
| +	# x11[0-3] += s2[3]
 | |
| +	movq		0x28(%rdi),%xmm3
 | |
| +	pshufd		$0x00,%xmm3,%xmm2
 | |
| +	pshufd		$0x55,%xmm3,%xmm3
 | |
| +	paddd		%xmm2,%xmm10
 | |
| +	paddd		%xmm3,%xmm11
 | |
| +
 | |
| +	# x12[0-3] += s3[0]
 | |
| +	# x13[0-3] += s3[1]
 | |
| +	movq		0x30(%rdi),%xmm3
 | |
| +	pshufd		$0x00,%xmm3,%xmm2
 | |
| +	pshufd		$0x55,%xmm3,%xmm3
 | |
| +	paddd		%xmm2,%xmm12
 | |
| +	paddd		%xmm3,%xmm13
 | |
| +	# x14[0-3] += s3[2]
 | |
| +	# x15[0-3] += s3[3]
 | |
| +	movq		0x38(%rdi),%xmm3
 | |
| +	pshufd		$0x00,%xmm3,%xmm2
 | |
| +	pshufd		$0x55,%xmm3,%xmm3
 | |
| +	paddd		%xmm2,%xmm14
 | |
| +	paddd		%xmm3,%xmm15
 | |
| +
 | |
| +	# x12 += counter values 0-3
 | |
| +	paddd		%xmm1,%xmm12
 | |
| +
 | |
| +	# interleave 32-bit words in state n, n+1
 | |
| +	movdqa		0x00(%rsp),%xmm0
 | |
| +	movdqa		0x10(%rsp),%xmm1
 | |
| +	movdqa		%xmm0,%xmm2
 | |
| +	punpckldq	%xmm1,%xmm2
 | |
| +	punpckhdq	%xmm1,%xmm0
 | |
| +	movdqa		%xmm2,0x00(%rsp)
 | |
| +	movdqa		%xmm0,0x10(%rsp)
 | |
| +	movdqa		0x20(%rsp),%xmm0
 | |
| +	movdqa		0x30(%rsp),%xmm1
 | |
| +	movdqa		%xmm0,%xmm2
 | |
| +	punpckldq	%xmm1,%xmm2
 | |
| +	punpckhdq	%xmm1,%xmm0
 | |
| +	movdqa		%xmm2,0x20(%rsp)
 | |
| +	movdqa		%xmm0,0x30(%rsp)
 | |
| +	movdqa		%xmm4,%xmm0
 | |
| +	punpckldq	%xmm5,%xmm4
 | |
| +	punpckhdq	%xmm5,%xmm0
 | |
| +	movdqa		%xmm0,%xmm5
 | |
| +	movdqa		%xmm6,%xmm0
 | |
| +	punpckldq	%xmm7,%xmm6
 | |
| +	punpckhdq	%xmm7,%xmm0
 | |
| +	movdqa		%xmm0,%xmm7
 | |
| +	movdqa		%xmm8,%xmm0
 | |
| +	punpckldq	%xmm9,%xmm8
 | |
| +	punpckhdq	%xmm9,%xmm0
 | |
| +	movdqa		%xmm0,%xmm9
 | |
| +	movdqa		%xmm10,%xmm0
 | |
| +	punpckldq	%xmm11,%xmm10
 | |
| +	punpckhdq	%xmm11,%xmm0
 | |
| +	movdqa		%xmm0,%xmm11
 | |
| +	movdqa		%xmm12,%xmm0
 | |
| +	punpckldq	%xmm13,%xmm12
 | |
| +	punpckhdq	%xmm13,%xmm0
 | |
| +	movdqa		%xmm0,%xmm13
 | |
| +	movdqa		%xmm14,%xmm0
 | |
| +	punpckldq	%xmm15,%xmm14
 | |
| +	punpckhdq	%xmm15,%xmm0
 | |
| +	movdqa		%xmm0,%xmm15
 | |
| +
 | |
| +	# interleave 64-bit words in state n, n+2
 | |
| +	movdqa		0x00(%rsp),%xmm0
 | |
| +	movdqa		0x20(%rsp),%xmm1
 | |
| +	movdqa		%xmm0,%xmm2
 | |
| +	punpcklqdq	%xmm1,%xmm2
 | |
| +	punpckhqdq	%xmm1,%xmm0
 | |
| +	movdqa		%xmm2,0x00(%rsp)
 | |
| +	movdqa		%xmm0,0x20(%rsp)
 | |
| +	movdqa		0x10(%rsp),%xmm0
 | |
| +	movdqa		0x30(%rsp),%xmm1
 | |
| +	movdqa		%xmm0,%xmm2
 | |
| +	punpcklqdq	%xmm1,%xmm2
 | |
| +	punpckhqdq	%xmm1,%xmm0
 | |
| +	movdqa		%xmm2,0x10(%rsp)
 | |
| +	movdqa		%xmm0,0x30(%rsp)
 | |
| +	movdqa		%xmm4,%xmm0
 | |
| +	punpcklqdq	%xmm6,%xmm4
 | |
| +	punpckhqdq	%xmm6,%xmm0
 | |
| +	movdqa		%xmm0,%xmm6
 | |
| +	movdqa		%xmm5,%xmm0
 | |
| +	punpcklqdq	%xmm7,%xmm5
 | |
| +	punpckhqdq	%xmm7,%xmm0
 | |
| +	movdqa		%xmm0,%xmm7
 | |
| +	movdqa		%xmm8,%xmm0
 | |
| +	punpcklqdq	%xmm10,%xmm8
 | |
| +	punpckhqdq	%xmm10,%xmm0
 | |
| +	movdqa		%xmm0,%xmm10
 | |
| +	movdqa		%xmm9,%xmm0
 | |
| +	punpcklqdq	%xmm11,%xmm9
 | |
| +	punpckhqdq	%xmm11,%xmm0
 | |
| +	movdqa		%xmm0,%xmm11
 | |
| +	movdqa		%xmm12,%xmm0
 | |
| +	punpcklqdq	%xmm14,%xmm12
 | |
| +	punpckhqdq	%xmm14,%xmm0
 | |
| +	movdqa		%xmm0,%xmm14
 | |
| +	movdqa		%xmm13,%xmm0
 | |
| +	punpcklqdq	%xmm15,%xmm13
 | |
| +	punpckhqdq	%xmm15,%xmm0
 | |
| +	movdqa		%xmm0,%xmm15
 | |
| +
 | |
| +	# xor with corresponding input, write to output
 | |
| +	movdqa		0x00(%rsp),%xmm0
 | |
| +	movdqu		0x00(%rdx),%xmm1
 | |
| +	pxor		%xmm1,%xmm0
 | |
| +	movdqu		%xmm0,0x00(%rsi)
 | |
| +	movdqa		0x10(%rsp),%xmm0
 | |
| +	movdqu		0x80(%rdx),%xmm1
 | |
| +	pxor		%xmm1,%xmm0
 | |
| +	movdqu		%xmm0,0x80(%rsi)
 | |
| +	movdqa		0x20(%rsp),%xmm0
 | |
| +	movdqu		0x40(%rdx),%xmm1
 | |
| +	pxor		%xmm1,%xmm0
 | |
| +	movdqu		%xmm0,0x40(%rsi)
 | |
| +	movdqa		0x30(%rsp),%xmm0
 | |
| +	movdqu		0xc0(%rdx),%xmm1
 | |
| +	pxor		%xmm1,%xmm0
 | |
| +	movdqu		%xmm0,0xc0(%rsi)
 | |
| +	movdqu		0x10(%rdx),%xmm1
 | |
| +	pxor		%xmm1,%xmm4
 | |
| +	movdqu		%xmm4,0x10(%rsi)
 | |
| +	movdqu		0x90(%rdx),%xmm1
 | |
| +	pxor		%xmm1,%xmm5
 | |
| +	movdqu		%xmm5,0x90(%rsi)
 | |
| +	movdqu		0x50(%rdx),%xmm1
 | |
| +	pxor		%xmm1,%xmm6
 | |
| +	movdqu		%xmm6,0x50(%rsi)
 | |
| +	movdqu		0xd0(%rdx),%xmm1
 | |
| +	pxor		%xmm1,%xmm7
 | |
| +	movdqu		%xmm7,0xd0(%rsi)
 | |
| +	movdqu		0x20(%rdx),%xmm1
 | |
| +	pxor		%xmm1,%xmm8
 | |
| +	movdqu		%xmm8,0x20(%rsi)
 | |
| +	movdqu		0xa0(%rdx),%xmm1
 | |
| +	pxor		%xmm1,%xmm9
 | |
| +	movdqu		%xmm9,0xa0(%rsi)
 | |
| +	movdqu		0x60(%rdx),%xmm1
 | |
| +	pxor		%xmm1,%xmm10
 | |
| +	movdqu		%xmm10,0x60(%rsi)
 | |
| +	movdqu		0xe0(%rdx),%xmm1
 | |
| +	pxor		%xmm1,%xmm11
 | |
| +	movdqu		%xmm11,0xe0(%rsi)
 | |
| +	movdqu		0x30(%rdx),%xmm1
 | |
| +	pxor		%xmm1,%xmm12
 | |
| +	movdqu		%xmm12,0x30(%rsi)
 | |
| +	movdqu		0xb0(%rdx),%xmm1
 | |
| +	pxor		%xmm1,%xmm13
 | |
| +	movdqu		%xmm13,0xb0(%rsi)
 | |
| +	movdqu		0x70(%rdx),%xmm1
 | |
| +	pxor		%xmm1,%xmm14
 | |
| +	movdqu		%xmm14,0x70(%rsi)
 | |
| +	movdqu		0xf0(%rdx),%xmm1
 | |
| +	pxor		%xmm1,%xmm15
 | |
| +	movdqu		%xmm15,0xf0(%rsi)
 | |
| +
 | |
| +	mov		%r11,%rsp
 | |
| +	ret
 | |
| +ENDPROC(chacha20_asm_4block_xor_ssse3)
 | |
| +
 | |
| +ENTRY(hchacha20_asm_ssse3)
 | |
| +	# %rdi: 32 byte output key, o
 | |
| +	# %rsi: 16 byte nonce, n
 | |
| +	# %rdx: 32 byte input key, i
 | |
| +
 | |
| +	# x0 = constant
 | |
| +	movdqa		CONST(%rip),%xmm0
 | |
| +	# x1, x2 = i
 | |
| +	movdqu		0x00(%rdx),%xmm1
 | |
| +	movdqu		0x10(%rdx),%xmm2
 | |
| +	# x3 = n
 | |
| +	movdqu		0x00(%rsi),%xmm3
 | |
| +
 | |
| +	movdqa		%xmm0,%xmm8
 | |
| +	movdqa		%xmm1,%xmm9
 | |
| +	movdqa		%xmm2,%xmm10
 | |
| +	movdqa		%xmm3,%xmm11
 | |
| +	movdqa		ROT8(%rip),%xmm4
 | |
| +	movdqa		ROT16(%rip),%xmm5
 | |
| +
 | |
| +	mov	$10,%ecx
 | |
| +
 | |
| +.Lhdoubleround:
 | |
| +
 | |
| +	# x0 += x1, x3 = rotl32(x3 ^ x0, 16)
 | |
| +	paddd		%xmm1,%xmm0
 | |
| +	pxor		%xmm0,%xmm3
 | |
| +	pshufb		%xmm5,%xmm3
 | |
| +
 | |
| +	# x2 += x3, x1 = rotl32(x1 ^ x2, 12)
 | |
| +	paddd		%xmm3,%xmm2
 | |
| +	pxor		%xmm2,%xmm1
 | |
| +	movdqa		%xmm1,%xmm6
 | |
| +	pslld		$12,%xmm6
 | |
| +	psrld		$20,%xmm1
 | |
| +	por		%xmm6,%xmm1
 | |
| +
 | |
| +	# x0 += x1, x3 = rotl32(x3 ^ x0, 8)
 | |
| +	paddd		%xmm1,%xmm0
 | |
| +	pxor		%xmm0,%xmm3
 | |
| +	pshufb		%xmm4,%xmm3
 | |
| +
 | |
| +	# x2 += x3, x1 = rotl32(x1 ^ x2, 7)
 | |
| +	paddd		%xmm3,%xmm2
 | |
| +	pxor		%xmm2,%xmm1
 | |
| +	movdqa		%xmm1,%xmm7
 | |
| +	pslld		$7,%xmm7
 | |
| +	psrld		$25,%xmm1
 | |
| +	por		%xmm7,%xmm1
 | |
| +
 | |
| +	# x1 = shuffle32(x1, MASK(0, 3, 2, 1))
 | |
| +	pshufd		$0x39,%xmm1,%xmm1
 | |
| +	# x2 = shuffle32(x2, MASK(1, 0, 3, 2))
 | |
| +	pshufd		$0x4e,%xmm2,%xmm2
 | |
| +	# x3 = shuffle32(x3, MASK(2, 1, 0, 3))
 | |
| +	pshufd		$0x93,%xmm3,%xmm3
 | |
| +
 | |
| +	# x0 += x1, x3 = rotl32(x3 ^ x0, 16)
 | |
| +	paddd		%xmm1,%xmm0
 | |
| +	pxor		%xmm0,%xmm3
 | |
| +	pshufb		%xmm5,%xmm3
 | |
| +
 | |
| +	# x2 += x3, x1 = rotl32(x1 ^ x2, 12)
 | |
| +	paddd		%xmm3,%xmm2
 | |
| +	pxor		%xmm2,%xmm1
 | |
| +	movdqa		%xmm1,%xmm6
 | |
| +	pslld		$12,%xmm6
 | |
| +	psrld		$20,%xmm1
 | |
| +	por		%xmm6,%xmm1
 | |
| +
 | |
| +	# x0 += x1, x3 = rotl32(x3 ^ x0, 8)
 | |
| +	paddd		%xmm1,%xmm0
 | |
| +	pxor		%xmm0,%xmm3
 | |
| +	pshufb		%xmm4,%xmm3
 | |
| +
 | |
| +	# x2 += x3, x1 = rotl32(x1 ^ x2, 7)
 | |
| +	paddd		%xmm3,%xmm2
 | |
| +	pxor		%xmm2,%xmm1
 | |
| +	movdqa		%xmm1,%xmm7
 | |
| +	pslld		$7,%xmm7
 | |
| +	psrld		$25,%xmm1
 | |
| +	por		%xmm7,%xmm1
 | |
| +
 | |
| +	# x1 = shuffle32(x1, MASK(2, 1, 0, 3))
 | |
| +	pshufd		$0x93,%xmm1,%xmm1
 | |
| +	# x2 = shuffle32(x2, MASK(1, 0, 3, 2))
 | |
| +	pshufd		$0x4e,%xmm2,%xmm2
 | |
| +	# x3 = shuffle32(x3, MASK(0, 3, 2, 1))
 | |
| +	pshufd		$0x39,%xmm3,%xmm3
 | |
| +
 | |
| +	dec		%ecx
 | |
| +	jnz		.Lhdoubleround
 | |
| +
 | |
| +	# o0 = x0
 | |
| +	movdqu		%xmm0,0x00(%rdi)
 | |
| +	# o1 = x3
 | |
| +	movdqu		%xmm3,0x10(%rdi)
 | |
| +	ret
 | |
| +ENDPROC(hchacha20_asm_ssse3)
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/crypto/curve25519-avx-x86_64.S	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,3259 @@
 | |
| +/*
 | |
| + * Copyright (C) 2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
 | |
| + * Based on algorithms from Tung Chou <blueprint@crypto.tw>
 | |
| + */
 | |
| +
 | |
| +#include <linux/linkage.h>
 | |
| +
 | |
| +.data
 | |
| +.align 16
 | |
| +curve25519_sandy2x_v0_0: .quad 0, 0
 | |
| +curve25519_sandy2x_v1_0: .quad 1, 0
 | |
| +curve25519_sandy2x_v2_1: .quad 2, 1
 | |
| +curve25519_sandy2x_v9_0: .quad 9, 0
 | |
| +curve25519_sandy2x_v9_9: .quad 9, 9
 | |
| +curve25519_sandy2x_v19_19: .quad 19, 19
 | |
| +curve25519_sandy2x_v38_1: .quad 38, 1
 | |
| +curve25519_sandy2x_v38_38: .quad 38, 38
 | |
| +curve25519_sandy2x_v121666_121666: .quad 121666, 121666
 | |
| +curve25519_sandy2x_m25: .quad 33554431, 33554431
 | |
| +curve25519_sandy2x_m26: .quad 67108863, 67108863
 | |
| +curve25519_sandy2x_subc0: .quad 0x07FFFFDA, 0x03FFFFFE
 | |
| +curve25519_sandy2x_subc2: .quad 0x07FFFFFE, 0x03FFFFFE
 | |
| +curve25519_sandy2x_REDMASK51: .quad 0x0007FFFFFFFFFFFF
 | |
| +
 | |
| +.text
 | |
| +.align 32
 | |
| +ENTRY(curve25519_sandy2x_fe51_mul)
 | |
| +	mov %rsp,%r11
 | |
| +	and $31,%r11
 | |
| +	add $96,%r11
 | |
| +	sub %r11,%rsp
 | |
| +	movq %r11,0(%rsp)
 | |
| +	movq %r12,8(%rsp)
 | |
| +	movq %r13,16(%rsp)
 | |
| +	movq %r14,24(%rsp)
 | |
| +	movq %r15,32(%rsp)
 | |
| +	movq %rbx,40(%rsp)
 | |
| +	movq %rbp,48(%rsp)
 | |
| +	movq %rdi,56(%rsp)
 | |
| +	mov %rdx,%rcx
 | |
| +	movq 24(%rsi),%rdx
 | |
| +	imulq $19,%rdx,%rax
 | |
| +	movq %rax,64(%rsp)
 | |
| +	mulq 16(%rcx)
 | |
| +	mov %rax,%r8
 | |
| +	mov %rdx,%r9
 | |
| +	movq 32(%rsi),%rdx
 | |
| +	imulq $19,%rdx,%rax
 | |
| +	movq %rax,72(%rsp)
 | |
| +	mulq 8(%rcx)
 | |
| +	add %rax,%r8
 | |
| +	adc %rdx,%r9
 | |
| +	movq 0(%rsi),%rax
 | |
| +	mulq 0(%rcx)
 | |
| +	add %rax,%r8
 | |
| +	adc %rdx,%r9
 | |
| +	movq 0(%rsi),%rax
 | |
| +	mulq 8(%rcx)
 | |
| +	mov %rax,%r10
 | |
| +	mov %rdx,%r11
 | |
| +	movq 0(%rsi),%rax
 | |
| +	mulq 16(%rcx)
 | |
| +	mov %rax,%r12
 | |
| +	mov %rdx,%r13
 | |
| +	movq 0(%rsi),%rax
 | |
| +	mulq 24(%rcx)
 | |
| +	mov %rax,%r14
 | |
| +	mov %rdx,%r15
 | |
| +	movq 0(%rsi),%rax
 | |
| +	mulq 32(%rcx)
 | |
| +	mov %rax,%rbx
 | |
| +	mov %rdx,%rbp
 | |
| +	movq 8(%rsi),%rax
 | |
| +	mulq 0(%rcx)
 | |
| +	add %rax,%r10
 | |
| +	adc %rdx,%r11
 | |
| +	movq 8(%rsi),%rax
 | |
| +	mulq 8(%rcx)
 | |
| +	add %rax,%r12
 | |
| +	adc %rdx,%r13
 | |
| +	movq 8(%rsi),%rax
 | |
| +	mulq 16(%rcx)
 | |
| +	add %rax,%r14
 | |
| +	adc %rdx,%r15
 | |
| +	movq 8(%rsi),%rax
 | |
| +	mulq 24(%rcx)
 | |
| +	add %rax,%rbx
 | |
| +	adc %rdx,%rbp
 | |
| +	movq 8(%rsi),%rdx
 | |
| +	imulq $19,%rdx,%rax
 | |
| +	mulq 32(%rcx)
 | |
| +	add %rax,%r8
 | |
| +	adc %rdx,%r9
 | |
| +	movq 16(%rsi),%rax
 | |
| +	mulq 0(%rcx)
 | |
| +	add %rax,%r12
 | |
| +	adc %rdx,%r13
 | |
| +	movq 16(%rsi),%rax
 | |
| +	mulq 8(%rcx)
 | |
| +	add %rax,%r14
 | |
| +	adc %rdx,%r15
 | |
| +	movq 16(%rsi),%rax
 | |
| +	mulq 16(%rcx)
 | |
| +	add %rax,%rbx
 | |
| +	adc %rdx,%rbp
 | |
| +	movq 16(%rsi),%rdx
 | |
| +	imulq $19,%rdx,%rax
 | |
| +	mulq 24(%rcx)
 | |
| +	add %rax,%r8
 | |
| +	adc %rdx,%r9
 | |
| +	movq 16(%rsi),%rdx
 | |
| +	imulq $19,%rdx,%rax
 | |
| +	mulq 32(%rcx)
 | |
| +	add %rax,%r10
 | |
| +	adc %rdx,%r11
 | |
| +	movq 24(%rsi),%rax
 | |
| +	mulq 0(%rcx)
 | |
| +	add %rax,%r14
 | |
| +	adc %rdx,%r15
 | |
| +	movq 24(%rsi),%rax
 | |
| +	mulq 8(%rcx)
 | |
| +	add %rax,%rbx
 | |
| +	adc %rdx,%rbp
 | |
| +	movq 64(%rsp),%rax
 | |
| +	mulq 24(%rcx)
 | |
| +	add %rax,%r10
 | |
| +	adc %rdx,%r11
 | |
| +	movq 64(%rsp),%rax
 | |
| +	mulq 32(%rcx)
 | |
| +	add %rax,%r12
 | |
| +	adc %rdx,%r13
 | |
| +	movq 32(%rsi),%rax
 | |
| +	mulq 0(%rcx)
 | |
| +	add %rax,%rbx
 | |
| +	adc %rdx,%rbp
 | |
| +	movq 72(%rsp),%rax
 | |
| +	mulq 16(%rcx)
 | |
| +	add %rax,%r10
 | |
| +	adc %rdx,%r11
 | |
| +	movq 72(%rsp),%rax
 | |
| +	mulq 24(%rcx)
 | |
| +	add %rax,%r12
 | |
| +	adc %rdx,%r13
 | |
| +	movq 72(%rsp),%rax
 | |
| +	mulq 32(%rcx)
 | |
| +	add %rax,%r14
 | |
| +	adc %rdx,%r15
 | |
| +	movq curve25519_sandy2x_REDMASK51(%rip),%rsi
 | |
| +	shld $13,%r8,%r9
 | |
| +	and %rsi,%r8
 | |
| +	shld $13,%r10,%r11
 | |
| +	and %rsi,%r10
 | |
| +	add %r9,%r10
 | |
| +	shld $13,%r12,%r13
 | |
| +	and %rsi,%r12
 | |
| +	add %r11,%r12
 | |
| +	shld $13,%r14,%r15
 | |
| +	and %rsi,%r14
 | |
| +	add %r13,%r14
 | |
| +	shld $13,%rbx,%rbp
 | |
| +	and %rsi,%rbx
 | |
| +	add %r15,%rbx
 | |
| +	imulq $19,%rbp,%rdx
 | |
| +	add %rdx,%r8
 | |
| +	mov %r8,%rdx
 | |
| +	shr $51,%rdx
 | |
| +	add %r10,%rdx
 | |
| +	mov %rdx,%rcx
 | |
| +	shr $51,%rdx
 | |
| +	and %rsi,%r8
 | |
| +	add %r12,%rdx
 | |
| +	mov %rdx,%r9
 | |
| +	shr $51,%rdx
 | |
| +	and %rsi,%rcx
 | |
| +	add %r14,%rdx
 | |
| +	mov %rdx,%rax
 | |
| +	shr $51,%rdx
 | |
| +	and %rsi,%r9
 | |
| +	add %rbx,%rdx
 | |
| +	mov %rdx,%r10
 | |
| +	shr $51,%rdx
 | |
| +	and %rsi,%rax
 | |
| +	imulq $19,%rdx,%rdx
 | |
| +	add %rdx,%r8
 | |
| +	and %rsi,%r10
 | |
| +	movq %r8,0(%rdi)
 | |
| +	movq %rcx,8(%rdi)
 | |
| +	movq %r9,16(%rdi)
 | |
| +	movq %rax,24(%rdi)
 | |
| +	movq %r10,32(%rdi)
 | |
| +	movq 0(%rsp),%r11
 | |
| +	movq 8(%rsp),%r12
 | |
| +	movq 16(%rsp),%r13
 | |
| +	movq 24(%rsp),%r14
 | |
| +	movq 32(%rsp),%r15
 | |
| +	movq 40(%rsp),%rbx
 | |
| +	movq 48(%rsp),%rbp
 | |
| +	add %r11,%rsp
 | |
| +	mov %rdi,%rax
 | |
| +	mov %rsi,%rdx
 | |
| +	ret
 | |
| +ENDPROC(curve25519_sandy2x_fe51_mul)
 | |
| +
 | |
| +.align 32
 | |
| +ENTRY(curve25519_sandy2x_fe51_nsquare)
 | |
| +	mov %rsp,%r11
 | |
| +	and $31,%r11
 | |
| +	add $64,%r11
 | |
| +	sub %r11,%rsp
 | |
| +	movq %r11,0(%rsp)
 | |
| +	movq %r12,8(%rsp)
 | |
| +	movq %r13,16(%rsp)
 | |
| +	movq %r14,24(%rsp)
 | |
| +	movq %r15,32(%rsp)
 | |
| +	movq %rbx,40(%rsp)
 | |
| +	movq %rbp,48(%rsp)
 | |
| +	movq 0(%rsi),%rcx
 | |
| +	movq 8(%rsi),%r8
 | |
| +	movq 16(%rsi),%r9
 | |
| +	movq 24(%rsi),%rax
 | |
| +	movq 32(%rsi),%rsi
 | |
| +	movq %r9,16(%rdi)
 | |
| +	movq %rax,24(%rdi)
 | |
| +	movq %rsi,32(%rdi)
 | |
| +	mov %rdx,%rsi
 | |
| +
 | |
| +	.align 16
 | |
| +	.Lloop:
 | |
| +	sub $1,%rsi
 | |
| +	mov %rcx,%rax
 | |
| +	mul %rcx
 | |
| +	add %rcx,%rcx
 | |
| +	mov %rax,%r9
 | |
| +	mov %rdx,%r10
 | |
| +	mov %rcx,%rax
 | |
| +	mul %r8
 | |
| +	mov %rax,%r11
 | |
| +	mov %rdx,%r12
 | |
| +	mov %rcx,%rax
 | |
| +	mulq 16(%rdi)
 | |
| +	mov %rax,%r13
 | |
| +	mov %rdx,%r14
 | |
| +	mov %rcx,%rax
 | |
| +	mulq 24(%rdi)
 | |
| +	mov %rax,%r15
 | |
| +	mov %rdx,%rbx
 | |
| +	mov %rcx,%rax
 | |
| +	mulq 32(%rdi)
 | |
| +	mov %rax,%rcx
 | |
| +	mov %rdx,%rbp
 | |
| +	mov %r8,%rax
 | |
| +	mul %r8
 | |
| +	add %r8,%r8
 | |
| +	add %rax,%r13
 | |
| +	adc %rdx,%r14
 | |
| +	mov %r8,%rax
 | |
| +	mulq 16(%rdi)
 | |
| +	add %rax,%r15
 | |
| +	adc %rdx,%rbx
 | |
| +	mov %r8,%rax
 | |
| +	imulq $19, %r8,%r8
 | |
| +	mulq 24(%rdi)
 | |
| +	add %rax,%rcx
 | |
| +	adc %rdx,%rbp
 | |
| +	mov %r8,%rax
 | |
| +	mulq 32(%rdi)
 | |
| +	add %rax,%r9
 | |
| +	adc %rdx,%r10
 | |
| +	movq 16(%rdi),%rax
 | |
| +	mulq 16(%rdi)
 | |
| +	add %rax,%rcx
 | |
| +	adc %rdx,%rbp
 | |
| +	shld $13,%rcx,%rbp
 | |
| +	movq 16(%rdi),%rax
 | |
| +	imulq $38, %rax,%rax
 | |
| +	mulq 24(%rdi)
 | |
| +	add %rax,%r9
 | |
| +	adc %rdx,%r10
 | |
| +	shld $13,%r9,%r10
 | |
| +	movq 16(%rdi),%rax
 | |
| +	imulq $38, %rax,%rax
 | |
| +	mulq 32(%rdi)
 | |
| +	add %rax,%r11
 | |
| +	adc %rdx,%r12
 | |
| +	movq 24(%rdi),%rax
 | |
| +	imulq $19, %rax,%rax
 | |
| +	mulq 24(%rdi)
 | |
| +	add %rax,%r11
 | |
| +	adc %rdx,%r12
 | |
| +	shld $13,%r11,%r12
 | |
| +	movq 24(%rdi),%rax
 | |
| +	imulq $38, %rax,%rax
 | |
| +	mulq 32(%rdi)
 | |
| +	add %rax,%r13
 | |
| +	adc %rdx,%r14
 | |
| +	shld $13,%r13,%r14
 | |
| +	movq 32(%rdi),%rax
 | |
| +	imulq $19, %rax,%rax
 | |
| +	mulq 32(%rdi)
 | |
| +	add %rax,%r15
 | |
| +	adc %rdx,%rbx
 | |
| +	shld $13,%r15,%rbx
 | |
| +	movq curve25519_sandy2x_REDMASK51(%rip),%rdx
 | |
| +	and %rdx,%rcx
 | |
| +	add %rbx,%rcx
 | |
| +	and %rdx,%r9
 | |
| +	and %rdx,%r11
 | |
| +	add %r10,%r11
 | |
| +	and %rdx,%r13
 | |
| +	add %r12,%r13
 | |
| +	and %rdx,%r15
 | |
| +	add %r14,%r15
 | |
| +	imulq $19, %rbp,%rbp
 | |
| +	lea (%r9,%rbp),%r9
 | |
| +	mov %r9,%rax
 | |
| +	shr $51,%r9
 | |
| +	add %r11,%r9
 | |
| +	and %rdx,%rax
 | |
| +	mov %r9,%r8
 | |
| +	shr $51,%r9
 | |
| +	add %r13,%r9
 | |
| +	and %rdx,%r8
 | |
| +	mov %r9,%r10
 | |
| +	shr $51,%r9
 | |
| +	add %r15,%r9
 | |
| +	and %rdx,%r10
 | |
| +	movq %r10,16(%rdi)
 | |
| +	mov %r9,%r10
 | |
| +	shr $51,%r9
 | |
| +	add %rcx,%r9
 | |
| +	and %rdx,%r10
 | |
| +	movq %r10,24(%rdi)
 | |
| +	mov %r9,%r10
 | |
| +	shr $51,%r9
 | |
| +	imulq $19, %r9,%r9
 | |
| +	lea (%rax,%r9),%rcx
 | |
| +	and %rdx,%r10
 | |
| +	movq %r10,32(%rdi)
 | |
| +	cmp $0,%rsi
 | |
| +	jne .Lloop
 | |
| +
 | |
| +	movq %rcx,0(%rdi)
 | |
| +	movq %r8,8(%rdi)
 | |
| +	movq 0(%rsp),%r11
 | |
| +	movq 8(%rsp),%r12
 | |
| +	movq 16(%rsp),%r13
 | |
| +	movq 24(%rsp),%r14
 | |
| +	movq 32(%rsp),%r15
 | |
| +	movq 40(%rsp),%rbx
 | |
| +	movq 48(%rsp),%rbp
 | |
| +	add %r11,%rsp
 | |
| +	ret
 | |
| +ENDPROC(curve25519_sandy2x_fe51_nsquare)
 | |
| +
 | |
| +.align 32
 | |
| +ENTRY(curve25519_sandy2x_fe51_pack)
 | |
| +	mov %rsp,%r11
 | |
| +	and $31,%r11
 | |
| +	add $32,%r11
 | |
| +	sub %r11,%rsp
 | |
| +	movq %r11,0(%rsp)
 | |
| +	movq %r12,8(%rsp)
 | |
| +	movq 0(%rsi),%rdx
 | |
| +	movq 8(%rsi),%rcx
 | |
| +	movq 16(%rsi),%r8
 | |
| +	movq 24(%rsi),%r9
 | |
| +	movq 32(%rsi),%rsi
 | |
| +	movq curve25519_sandy2x_REDMASK51(%rip),%rax
 | |
| +	lea -18(%rax),%r10
 | |
| +	mov $3,%r11
 | |
| +
 | |
| +	.align 16
 | |
| +	.Lreduceloop:
 | |
| +	mov %rdx,%r12
 | |
| +	shr $51,%r12
 | |
| +	and %rax,%rdx
 | |
| +	add %r12,%rcx
 | |
| +	mov %rcx,%r12
 | |
| +	shr $51,%r12
 | |
| +	and %rax,%rcx
 | |
| +	add %r12,%r8
 | |
| +	mov %r8,%r12
 | |
| +	shr $51,%r12
 | |
| +	and %rax,%r8
 | |
| +	add %r12,%r9
 | |
| +	mov %r9,%r12
 | |
| +	shr $51,%r12
 | |
| +	and %rax,%r9
 | |
| +	add %r12,%rsi
 | |
| +	mov %rsi,%r12
 | |
| +	shr $51,%r12
 | |
| +	and %rax,%rsi
 | |
| +	imulq $19, %r12,%r12
 | |
| +	add %r12,%rdx
 | |
| +	sub $1,%r11
 | |
| +	ja .Lreduceloop
 | |
| +
 | |
| +	mov $1,%r12
 | |
| +	cmp %r10,%rdx
 | |
| +	cmovl %r11,%r12
 | |
| +	cmp %rax,%rcx
 | |
| +	cmovne %r11,%r12
 | |
| +	cmp %rax,%r8
 | |
| +	cmovne %r11,%r12
 | |
| +	cmp %rax,%r9
 | |
| +	cmovne %r11,%r12
 | |
| +	cmp %rax,%rsi
 | |
| +	cmovne %r11,%r12
 | |
| +	neg %r12
 | |
| +	and %r12,%rax
 | |
| +	and %r12,%r10
 | |
| +	sub %r10,%rdx
 | |
| +	sub %rax,%rcx
 | |
| +	sub %rax,%r8
 | |
| +	sub %rax,%r9
 | |
| +	sub %rax,%rsi
 | |
| +	mov %rdx,%rax
 | |
| +	and $0xFF,%eax
 | |
| +	movb %al,0(%rdi)
 | |
| +	mov %rdx,%rax
 | |
| +	shr $8,%rax
 | |
| +	and $0xFF,%eax
 | |
| +	movb %al,1(%rdi)
 | |
| +	mov %rdx,%rax
 | |
| +	shr $16,%rax
 | |
| +	and $0xFF,%eax
 | |
| +	movb %al,2(%rdi)
 | |
| +	mov %rdx,%rax
 | |
| +	shr $24,%rax
 | |
| +	and $0xFF,%eax
 | |
| +	movb %al,3(%rdi)
 | |
| +	mov %rdx,%rax
 | |
| +	shr $32,%rax
 | |
| +	and $0xFF,%eax
 | |
| +	movb %al,4(%rdi)
 | |
| +	mov %rdx,%rax
 | |
| +	shr $40,%rax
 | |
| +	and $0xFF,%eax
 | |
| +	movb %al,5(%rdi)
 | |
| +	mov %rdx,%rdx
 | |
| +	shr $48,%rdx
 | |
| +	mov %rcx,%rax
 | |
| +	shl $3,%rax
 | |
| +	and $0xF8,%eax
 | |
| +	xor %rdx,%rax
 | |
| +	movb %al,6(%rdi)
 | |
| +	mov %rcx,%rdx
 | |
| +	shr $5,%rdx
 | |
| +	and $0xFF,%edx
 | |
| +	movb %dl,7(%rdi)
 | |
| +	mov %rcx,%rdx
 | |
| +	shr $13,%rdx
 | |
| +	and $0xFF,%edx
 | |
| +	movb %dl,8(%rdi)
 | |
| +	mov %rcx,%rdx
 | |
| +	shr $21,%rdx
 | |
| +	and $0xFF,%edx
 | |
| +	movb %dl,9(%rdi)
 | |
| +	mov %rcx,%rdx
 | |
| +	shr $29,%rdx
 | |
| +	and $0xFF,%edx
 | |
| +	movb %dl,10(%rdi)
 | |
| +	mov %rcx,%rdx
 | |
| +	shr $37,%rdx
 | |
| +	and $0xFF,%edx
 | |
| +	movb %dl,11(%rdi)
 | |
| +	mov %rcx,%rdx
 | |
| +	shr $45,%rdx
 | |
| +	mov %r8,%rcx
 | |
| +	shl $6,%rcx
 | |
| +	and $0xC0,%ecx
 | |
| +	xor %rdx,%rcx
 | |
| +	movb %cl,12(%rdi)
 | |
| +	mov %r8,%rdx
 | |
| +	shr $2,%rdx
 | |
| +	and $0xFF,%edx
 | |
| +	movb %dl,13(%rdi)
 | |
| +	mov %r8,%rdx
 | |
| +	shr $10,%rdx
 | |
| +	and $0xFF,%edx
 | |
| +	movb %dl,14(%rdi)
 | |
| +	mov %r8,%rdx
 | |
| +	shr $18,%rdx
 | |
| +	and $0xFF,%edx
 | |
| +	movb %dl,15(%rdi)
 | |
| +	mov %r8,%rdx
 | |
| +	shr $26,%rdx
 | |
| +	and $0xFF,%edx
 | |
| +	movb %dl,16(%rdi)
 | |
| +	mov %r8,%rdx
 | |
| +	shr $34,%rdx
 | |
| +	and $0xFF,%edx
 | |
| +	movb %dl,17(%rdi)
 | |
| +	mov %r8,%rdx
 | |
| +	shr $42,%rdx
 | |
| +	movb %dl,18(%rdi)
 | |
| +	mov %r8,%rdx
 | |
| +	shr $50,%rdx
 | |
| +	mov %r9,%rcx
 | |
| +	shl $1,%rcx
 | |
| +	and $0xFE,%ecx
 | |
| +	xor %rdx,%rcx
 | |
| +	movb %cl,19(%rdi)
 | |
| +	mov %r9,%rdx
 | |
| +	shr $7,%rdx
 | |
| +	and $0xFF,%edx
 | |
| +	movb %dl,20(%rdi)
 | |
| +	mov %r9,%rdx
 | |
| +	shr $15,%rdx
 | |
| +	and $0xFF,%edx
 | |
| +	movb %dl,21(%rdi)
 | |
| +	mov %r9,%rdx
 | |
| +	shr $23,%rdx
 | |
| +	and $0xFF,%edx
 | |
| +	movb %dl,22(%rdi)
 | |
| +	mov %r9,%rdx
 | |
| +	shr $31,%rdx
 | |
| +	and $0xFF,%edx
 | |
| +	movb %dl,23(%rdi)
 | |
| +	mov %r9,%rdx
 | |
| +	shr $39,%rdx
 | |
| +	and $0xFF,%edx
 | |
| +	movb %dl,24(%rdi)
 | |
| +	mov %r9,%rdx
 | |
| +	shr $47,%rdx
 | |
| +	mov %rsi,%rcx
 | |
| +	shl $4,%rcx
 | |
| +	and $0xF0,%ecx
 | |
| +	xor %rdx,%rcx
 | |
| +	movb %cl,25(%rdi)
 | |
| +	mov %rsi,%rdx
 | |
| +	shr $4,%rdx
 | |
| +	and $0xFF,%edx
 | |
| +	movb %dl,26(%rdi)
 | |
| +	mov %rsi,%rdx
 | |
| +	shr $12,%rdx
 | |
| +	and $0xFF,%edx
 | |
| +	movb %dl,27(%rdi)
 | |
| +	mov %rsi,%rdx
 | |
| +	shr $20,%rdx
 | |
| +	and $0xFF,%edx
 | |
| +	movb %dl,28(%rdi)
 | |
| +	mov %rsi,%rdx
 | |
| +	shr $28,%rdx
 | |
| +	and $0xFF,%edx
 | |
| +	movb %dl,29(%rdi)
 | |
| +	mov %rsi,%rdx
 | |
| +	shr $36,%rdx
 | |
| +	and $0xFF,%edx
 | |
| +	movb %dl,30(%rdi)
 | |
| +	mov %rsi,%rsi
 | |
| +	shr $44,%rsi
 | |
| +	movb %sil,31(%rdi)
 | |
| +	movq 0(%rsp),%r11
 | |
| +	movq 8(%rsp),%r12
 | |
| +	add %r11,%rsp
 | |
| +	ret
 | |
| +ENDPROC(curve25519_sandy2x_fe51_pack)
 | |
| +
 | |
| +.align 32
 | |
| +ENTRY(curve25519_sandy2x_ladder)
 | |
| +	mov %rsp,%r11
 | |
| +	and $31,%r11
 | |
| +	add $1856,%r11
 | |
| +	sub %r11,%rsp
 | |
| +	movq %r11,1824(%rsp)
 | |
| +	movq %r12,1832(%rsp)
 | |
| +	movq %r13,1840(%rsp)
 | |
| +	movq %r14,1848(%rsp)
 | |
| +	vmovdqa curve25519_sandy2x_v0_0(%rip),%xmm0
 | |
| +	vmovdqa curve25519_sandy2x_v1_0(%rip),%xmm1
 | |
| +	vmovdqu 0(%rdi),%xmm2
 | |
| +	vmovdqa %xmm2,0(%rsp)
 | |
| +	vmovdqu 16(%rdi),%xmm2
 | |
| +	vmovdqa %xmm2,16(%rsp)
 | |
| +	vmovdqu 32(%rdi),%xmm2
 | |
| +	vmovdqa %xmm2,32(%rsp)
 | |
| +	vmovdqu 48(%rdi),%xmm2
 | |
| +	vmovdqa %xmm2,48(%rsp)
 | |
| +	vmovdqu 64(%rdi),%xmm2
 | |
| +	vmovdqa %xmm2,64(%rsp)
 | |
| +	vmovdqa %xmm1,80(%rsp)
 | |
| +	vmovdqa %xmm0,96(%rsp)
 | |
| +	vmovdqa %xmm0,112(%rsp)
 | |
| +	vmovdqa %xmm0,128(%rsp)
 | |
| +	vmovdqa %xmm0,144(%rsp)
 | |
| +	vmovdqa %xmm1,%xmm0
 | |
| +	vpxor %xmm1,%xmm1,%xmm1
 | |
| +	vpxor %xmm2,%xmm2,%xmm2
 | |
| +	vpxor %xmm3,%xmm3,%xmm3
 | |
| +	vpxor %xmm4,%xmm4,%xmm4
 | |
| +	vpxor %xmm5,%xmm5,%xmm5
 | |
| +	vpxor %xmm6,%xmm6,%xmm6
 | |
| +	vpxor %xmm7,%xmm7,%xmm7
 | |
| +	vpxor %xmm8,%xmm8,%xmm8
 | |
| +	vpxor %xmm9,%xmm9,%xmm9
 | |
| +	vmovdqu 0(%rdi),%xmm10
 | |
| +	vmovdqa %xmm10,160(%rsp)
 | |
| +	vmovdqu 16(%rdi),%xmm10
 | |
| +	vmovdqa %xmm10,176(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm10,%xmm10
 | |
| +	vmovdqa %xmm10,192(%rsp)
 | |
| +	vmovdqu 32(%rdi),%xmm10
 | |
| +	vmovdqa %xmm10,208(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm10,%xmm10
 | |
| +	vmovdqa %xmm10,224(%rsp)
 | |
| +	vmovdqu 48(%rdi),%xmm10
 | |
| +	vmovdqa %xmm10,240(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm10,%xmm10
 | |
| +	vmovdqa %xmm10,256(%rsp)
 | |
| +	vmovdqu 64(%rdi),%xmm10
 | |
| +	vmovdqa %xmm10,272(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm10,%xmm10
 | |
| +	vmovdqa %xmm10,288(%rsp)
 | |
| +	vmovdqu 8(%rdi),%xmm10
 | |
| +	vpmuludq curve25519_sandy2x_v2_1(%rip),%xmm10,%xmm10
 | |
| +	vmovdqa %xmm10,304(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm10,%xmm10
 | |
| +	vmovdqa %xmm10,320(%rsp)
 | |
| +	vmovdqu 24(%rdi),%xmm10
 | |
| +	vpmuludq curve25519_sandy2x_v2_1(%rip),%xmm10,%xmm10
 | |
| +	vmovdqa %xmm10,336(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm10,%xmm10
 | |
| +	vmovdqa %xmm10,352(%rsp)
 | |
| +	vmovdqu 40(%rdi),%xmm10
 | |
| +	vpmuludq curve25519_sandy2x_v2_1(%rip),%xmm10,%xmm10
 | |
| +	vmovdqa %xmm10,368(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm10,%xmm10
 | |
| +	vmovdqa %xmm10,384(%rsp)
 | |
| +	vmovdqu 56(%rdi),%xmm10
 | |
| +	vpmuludq curve25519_sandy2x_v2_1(%rip),%xmm10,%xmm10
 | |
| +	vmovdqa %xmm10,400(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm10,%xmm10
 | |
| +	vmovdqa %xmm10,416(%rsp)
 | |
| +	vmovdqu 0(%rdi),%xmm10
 | |
| +	vmovdqu 64(%rdi),%xmm11
 | |
| +	vblendps $12, %xmm11, %xmm10, %xmm10
 | |
| +	vpshufd $2,%xmm10,%xmm10
 | |
| +	vpmuludq curve25519_sandy2x_v38_1(%rip),%xmm10,%xmm10
 | |
| +	vmovdqa %xmm10,432(%rsp)
 | |
| +	movq 0(%rsi),%rdx
 | |
| +	movq 8(%rsi),%rcx
 | |
| +	movq 16(%rsi),%r8
 | |
| +	movq 24(%rsi),%r9
 | |
| +	shrd $1,%rcx,%rdx
 | |
| +	shrd $1,%r8,%rcx
 | |
| +	shrd $1,%r9,%r8
 | |
| +	shr $1,%r9
 | |
| +	xorq 0(%rsi),%rdx
 | |
| +	xorq 8(%rsi),%rcx
 | |
| +	xorq 16(%rsi),%r8
 | |
| +	xorq 24(%rsi),%r9
 | |
| +	leaq 800(%rsp),%rsi
 | |
| +	mov $64,%rax
 | |
| +
 | |
| +	.align 16
 | |
| +	.Lladder_small_loop:
 | |
| +	mov %rdx,%r10
 | |
| +	mov %rcx,%r11
 | |
| +	mov %r8,%r12
 | |
| +	mov %r9,%r13
 | |
| +	shr $1,%rdx
 | |
| +	shr $1,%rcx
 | |
| +	shr $1,%r8
 | |
| +	shr $1,%r9
 | |
| +	and $1,%r10d
 | |
| +	and $1,%r11d
 | |
| +	and $1,%r12d
 | |
| +	and $1,%r13d
 | |
| +	neg %r10
 | |
| +	neg %r11
 | |
| +	neg %r12
 | |
| +	neg %r13
 | |
| +	movl %r10d,0(%rsi)
 | |
| +	movl %r11d,256(%rsi)
 | |
| +	movl %r12d,512(%rsi)
 | |
| +	movl %r13d,768(%rsi)
 | |
| +	add $4,%rsi
 | |
| +	sub $1,%rax
 | |
| +	jne .Lladder_small_loop
 | |
| +	mov $255,%rdx
 | |
| +	add $760,%rsi
 | |
| +
 | |
| +	.align 16
 | |
| +	.Lladder_loop:
 | |
| +	sub $1,%rdx
 | |
| +	vbroadcastss 0(%rsi),%xmm10
 | |
| +	sub $4,%rsi
 | |
| +	vmovdqa 0(%rsp),%xmm11
 | |
| +	vmovdqa 80(%rsp),%xmm12
 | |
| +	vpxor %xmm11,%xmm0,%xmm13
 | |
| +	vpand %xmm10,%xmm13,%xmm13
 | |
| +	vpxor %xmm13,%xmm0,%xmm0
 | |
| +	vpxor %xmm13,%xmm11,%xmm11
 | |
| +	vpxor %xmm12,%xmm1,%xmm13
 | |
| +	vpand %xmm10,%xmm13,%xmm13
 | |
| +	vpxor %xmm13,%xmm1,%xmm1
 | |
| +	vpxor %xmm13,%xmm12,%xmm12
 | |
| +	vmovdqa 16(%rsp),%xmm13
 | |
| +	vmovdqa 96(%rsp),%xmm14
 | |
| +	vpxor %xmm13,%xmm2,%xmm15
 | |
| +	vpand %xmm10,%xmm15,%xmm15
 | |
| +	vpxor %xmm15,%xmm2,%xmm2
 | |
| +	vpxor %xmm15,%xmm13,%xmm13
 | |
| +	vpxor %xmm14,%xmm3,%xmm15
 | |
| +	vpand %xmm10,%xmm15,%xmm15
 | |
| +	vpxor %xmm15,%xmm3,%xmm3
 | |
| +	vpxor %xmm15,%xmm14,%xmm14
 | |
| +	vmovdqa %xmm13,0(%rsp)
 | |
| +	vmovdqa %xmm14,16(%rsp)
 | |
| +	vmovdqa 32(%rsp),%xmm13
 | |
| +	vmovdqa 112(%rsp),%xmm14
 | |
| +	vpxor %xmm13,%xmm4,%xmm15
 | |
| +	vpand %xmm10,%xmm15,%xmm15
 | |
| +	vpxor %xmm15,%xmm4,%xmm4
 | |
| +	vpxor %xmm15,%xmm13,%xmm13
 | |
| +	vpxor %xmm14,%xmm5,%xmm15
 | |
| +	vpand %xmm10,%xmm15,%xmm15
 | |
| +	vpxor %xmm15,%xmm5,%xmm5
 | |
| +	vpxor %xmm15,%xmm14,%xmm14
 | |
| +	vmovdqa %xmm13,32(%rsp)
 | |
| +	vmovdqa %xmm14,80(%rsp)
 | |
| +	vmovdqa 48(%rsp),%xmm13
 | |
| +	vmovdqa 128(%rsp),%xmm14
 | |
| +	vpxor %xmm13,%xmm6,%xmm15
 | |
| +	vpand %xmm10,%xmm15,%xmm15
 | |
| +	vpxor %xmm15,%xmm6,%xmm6
 | |
| +	vpxor %xmm15,%xmm13,%xmm13
 | |
| +	vpxor %xmm14,%xmm7,%xmm15
 | |
| +	vpand %xmm10,%xmm15,%xmm15
 | |
| +	vpxor %xmm15,%xmm7,%xmm7
 | |
| +	vpxor %xmm15,%xmm14,%xmm14
 | |
| +	vmovdqa %xmm13,48(%rsp)
 | |
| +	vmovdqa %xmm14,96(%rsp)
 | |
| +	vmovdqa 64(%rsp),%xmm13
 | |
| +	vmovdqa 144(%rsp),%xmm14
 | |
| +	vpxor %xmm13,%xmm8,%xmm15
 | |
| +	vpand %xmm10,%xmm15,%xmm15
 | |
| +	vpxor %xmm15,%xmm8,%xmm8
 | |
| +	vpxor %xmm15,%xmm13,%xmm13
 | |
| +	vpxor %xmm14,%xmm9,%xmm15
 | |
| +	vpand %xmm10,%xmm15,%xmm15
 | |
| +	vpxor %xmm15,%xmm9,%xmm9
 | |
| +	vpxor %xmm15,%xmm14,%xmm14
 | |
| +	vmovdqa %xmm13,64(%rsp)
 | |
| +	vmovdqa %xmm14,112(%rsp)
 | |
| +	vpaddq curve25519_sandy2x_subc0(%rip),%xmm11,%xmm10
 | |
| +	vpsubq %xmm12,%xmm10,%xmm10
 | |
| +	vpaddq %xmm12,%xmm11,%xmm11
 | |
| +	vpunpckhqdq %xmm10,%xmm11,%xmm12
 | |
| +	vpunpcklqdq %xmm10,%xmm11,%xmm10
 | |
| +	vpaddq %xmm1,%xmm0,%xmm11
 | |
| +	vpaddq curve25519_sandy2x_subc0(%rip),%xmm0,%xmm0
 | |
| +	vpsubq %xmm1,%xmm0,%xmm0
 | |
| +	vpunpckhqdq %xmm11,%xmm0,%xmm1
 | |
| +	vpunpcklqdq %xmm11,%xmm0,%xmm0
 | |
| +	vpmuludq %xmm0,%xmm10,%xmm11
 | |
| +	vpmuludq %xmm1,%xmm10,%xmm13
 | |
| +	vmovdqa %xmm1,128(%rsp)
 | |
| +	vpaddq %xmm1,%xmm1,%xmm1
 | |
| +	vpmuludq %xmm0,%xmm12,%xmm14
 | |
| +	vmovdqa %xmm0,144(%rsp)
 | |
| +	vpaddq %xmm14,%xmm13,%xmm13
 | |
| +	vpmuludq %xmm1,%xmm12,%xmm0
 | |
| +	vmovdqa %xmm1,448(%rsp)
 | |
| +	vpaddq %xmm3,%xmm2,%xmm1
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm2,%xmm2
 | |
| +	vpsubq %xmm3,%xmm2,%xmm2
 | |
| +	vpunpckhqdq %xmm1,%xmm2,%xmm3
 | |
| +	vpunpcklqdq %xmm1,%xmm2,%xmm1
 | |
| +	vpmuludq %xmm1,%xmm10,%xmm2
 | |
| +	vpaddq %xmm2,%xmm0,%xmm0
 | |
| +	vpmuludq %xmm3,%xmm10,%xmm2
 | |
| +	vmovdqa %xmm3,464(%rsp)
 | |
| +	vpaddq %xmm3,%xmm3,%xmm3
 | |
| +	vpmuludq %xmm1,%xmm12,%xmm14
 | |
| +	vmovdqa %xmm1,480(%rsp)
 | |
| +	vpaddq %xmm14,%xmm2,%xmm2
 | |
| +	vpmuludq %xmm3,%xmm12,%xmm1
 | |
| +	vmovdqa %xmm3,496(%rsp)
 | |
| +	vpaddq %xmm5,%xmm4,%xmm3
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm4,%xmm4
 | |
| +	vpsubq %xmm5,%xmm4,%xmm4
 | |
| +	vpunpckhqdq %xmm3,%xmm4,%xmm5
 | |
| +	vpunpcklqdq %xmm3,%xmm4,%xmm3
 | |
| +	vpmuludq %xmm3,%xmm10,%xmm4
 | |
| +	vpaddq %xmm4,%xmm1,%xmm1
 | |
| +	vpmuludq %xmm5,%xmm10,%xmm4
 | |
| +	vmovdqa %xmm5,512(%rsp)
 | |
| +	vpaddq %xmm5,%xmm5,%xmm5
 | |
| +	vpmuludq %xmm3,%xmm12,%xmm14
 | |
| +	vmovdqa %xmm3,528(%rsp)
 | |
| +	vpaddq %xmm14,%xmm4,%xmm4
 | |
| +	vpaddq %xmm7,%xmm6,%xmm3
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm6,%xmm6
 | |
| +	vpsubq %xmm7,%xmm6,%xmm6
 | |
| +	vpunpckhqdq %xmm3,%xmm6,%xmm7
 | |
| +	vpunpcklqdq %xmm3,%xmm6,%xmm3
 | |
| +	vpmuludq %xmm3,%xmm10,%xmm6
 | |
| +	vpmuludq %xmm5,%xmm12,%xmm14
 | |
| +	vmovdqa %xmm5,544(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm5,%xmm5
 | |
| +	vmovdqa %xmm5,560(%rsp)
 | |
| +	vpaddq %xmm14,%xmm6,%xmm6
 | |
| +	vpmuludq %xmm7,%xmm10,%xmm5
 | |
| +	vmovdqa %xmm7,576(%rsp)
 | |
| +	vpaddq %xmm7,%xmm7,%xmm7
 | |
| +	vpmuludq %xmm3,%xmm12,%xmm14
 | |
| +	vmovdqa %xmm3,592(%rsp)
 | |
| +	vpaddq %xmm14,%xmm5,%xmm5
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3
 | |
| +	vmovdqa %xmm3,608(%rsp)
 | |
| +	vpaddq %xmm9,%xmm8,%xmm3
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm8,%xmm8
 | |
| +	vpsubq %xmm9,%xmm8,%xmm8
 | |
| +	vpunpckhqdq %xmm3,%xmm8,%xmm9
 | |
| +	vpunpcklqdq %xmm3,%xmm8,%xmm3
 | |
| +	vmovdqa %xmm3,624(%rsp)
 | |
| +	vpmuludq %xmm7,%xmm12,%xmm8
 | |
| +	vmovdqa %xmm7,640(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm7,%xmm7
 | |
| +	vmovdqa %xmm7,656(%rsp)
 | |
| +	vpmuludq %xmm3,%xmm10,%xmm7
 | |
| +	vpaddq %xmm7,%xmm8,%xmm8
 | |
| +	vpmuludq %xmm9,%xmm10,%xmm7
 | |
| +	vmovdqa %xmm9,672(%rsp)
 | |
| +	vpaddq %xmm9,%xmm9,%xmm9
 | |
| +	vpmuludq %xmm3,%xmm12,%xmm10
 | |
| +	vpaddq %xmm10,%xmm7,%xmm7
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3
 | |
| +	vmovdqa %xmm3,688(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm12,%xmm12
 | |
| +	vpmuludq %xmm9,%xmm12,%xmm3
 | |
| +	vmovdqa %xmm9,704(%rsp)
 | |
| +	vpaddq %xmm3,%xmm11,%xmm11
 | |
| +	vmovdqa 0(%rsp),%xmm3
 | |
| +	vmovdqa 16(%rsp),%xmm9
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm10
 | |
| +	vpsubq %xmm9,%xmm10,%xmm10
 | |
| +	vpaddq %xmm9,%xmm3,%xmm3
 | |
| +	vpunpckhqdq %xmm10,%xmm3,%xmm9
 | |
| +	vpunpcklqdq %xmm10,%xmm3,%xmm3
 | |
| +	vpmuludq 144(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm0,%xmm0
 | |
| +	vpmuludq 128(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm2,%xmm2
 | |
| +	vpmuludq 480(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm1,%xmm1
 | |
| +	vpmuludq 464(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm4,%xmm4
 | |
| +	vpmuludq 528(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm6,%xmm6
 | |
| +	vpmuludq 512(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm5,%xmm5
 | |
| +	vpmuludq 592(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm8,%xmm8
 | |
| +	vpmuludq 576(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm7,%xmm7
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3
 | |
| +	vpmuludq 624(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm11,%xmm11
 | |
| +	vpmuludq 672(%rsp),%xmm3,%xmm3
 | |
| +	vpaddq %xmm3,%xmm13,%xmm13
 | |
| +	vpmuludq 144(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm2,%xmm2
 | |
| +	vpmuludq 448(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm1,%xmm1
 | |
| +	vpmuludq 480(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm4,%xmm4
 | |
| +	vpmuludq 496(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm6,%xmm6
 | |
| +	vpmuludq 528(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm5,%xmm5
 | |
| +	vpmuludq 544(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm8,%xmm8
 | |
| +	vpmuludq 592(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm7,%xmm7
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm9,%xmm9
 | |
| +	vpmuludq 640(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm11,%xmm11
 | |
| +	vpmuludq 624(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm13,%xmm13
 | |
| +	vpmuludq 704(%rsp),%xmm9,%xmm9
 | |
| +	vpaddq %xmm9,%xmm0,%xmm0
 | |
| +	vmovdqa 32(%rsp),%xmm3
 | |
| +	vmovdqa 80(%rsp),%xmm9
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm10
 | |
| +	vpsubq %xmm9,%xmm10,%xmm10
 | |
| +	vpaddq %xmm9,%xmm3,%xmm3
 | |
| +	vpunpckhqdq %xmm10,%xmm3,%xmm9
 | |
| +	vpunpcklqdq %xmm10,%xmm3,%xmm3
 | |
| +	vpmuludq 144(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm1,%xmm1
 | |
| +	vpmuludq 128(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm4,%xmm4
 | |
| +	vpmuludq 480(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm6,%xmm6
 | |
| +	vpmuludq 464(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm5,%xmm5
 | |
| +	vpmuludq 528(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm8,%xmm8
 | |
| +	vpmuludq 512(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm7,%xmm7
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3
 | |
| +	vpmuludq 592(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm11,%xmm11
 | |
| +	vpmuludq 576(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm13,%xmm13
 | |
| +	vpmuludq 624(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm0,%xmm0
 | |
| +	vpmuludq 672(%rsp),%xmm3,%xmm3
 | |
| +	vpaddq %xmm3,%xmm2,%xmm2
 | |
| +	vpmuludq 144(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm4,%xmm4
 | |
| +	vpmuludq 448(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm6,%xmm6
 | |
| +	vpmuludq 480(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm5,%xmm5
 | |
| +	vpmuludq 496(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm8,%xmm8
 | |
| +	vpmuludq 528(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm7,%xmm7
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm9,%xmm9
 | |
| +	vpmuludq 544(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm11,%xmm11
 | |
| +	vpmuludq 592(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm13,%xmm13
 | |
| +	vpmuludq 640(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm0,%xmm0
 | |
| +	vpmuludq 624(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm2,%xmm2
 | |
| +	vpmuludq 704(%rsp),%xmm9,%xmm9
 | |
| +	vpaddq %xmm9,%xmm1,%xmm1
 | |
| +	vmovdqa 48(%rsp),%xmm3
 | |
| +	vmovdqa 96(%rsp),%xmm9
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm10
 | |
| +	vpsubq %xmm9,%xmm10,%xmm10
 | |
| +	vpaddq %xmm9,%xmm3,%xmm3
 | |
| +	vpunpckhqdq %xmm10,%xmm3,%xmm9
 | |
| +	vpunpcklqdq %xmm10,%xmm3,%xmm3
 | |
| +	vpmuludq 144(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm6,%xmm6
 | |
| +	vpmuludq 128(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm5,%xmm5
 | |
| +	vpmuludq 480(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm8,%xmm8
 | |
| +	vpmuludq 464(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm7,%xmm7
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3
 | |
| +	vpmuludq 528(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm11,%xmm11
 | |
| +	vpmuludq 512(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm13,%xmm13
 | |
| +	vpmuludq 592(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm0,%xmm0
 | |
| +	vpmuludq 576(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm2,%xmm2
 | |
| +	vpmuludq 624(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm1,%xmm1
 | |
| +	vpmuludq 672(%rsp),%xmm3,%xmm3
 | |
| +	vpaddq %xmm3,%xmm4,%xmm4
 | |
| +	vpmuludq 144(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm5,%xmm5
 | |
| +	vpmuludq 448(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm8,%xmm8
 | |
| +	vpmuludq 480(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm7,%xmm7
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm9,%xmm9
 | |
| +	vpmuludq 496(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm11,%xmm11
 | |
| +	vpmuludq 528(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm13,%xmm13
 | |
| +	vpmuludq 544(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm0,%xmm0
 | |
| +	vpmuludq 592(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm2,%xmm2
 | |
| +	vpmuludq 640(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm1,%xmm1
 | |
| +	vpmuludq 624(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm4,%xmm4
 | |
| +	vpmuludq 704(%rsp),%xmm9,%xmm9
 | |
| +	vpaddq %xmm9,%xmm6,%xmm6
 | |
| +	vmovdqa 64(%rsp),%xmm3
 | |
| +	vmovdqa 112(%rsp),%xmm9
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm10
 | |
| +	vpsubq %xmm9,%xmm10,%xmm10
 | |
| +	vpaddq %xmm9,%xmm3,%xmm3
 | |
| +	vpunpckhqdq %xmm10,%xmm3,%xmm9
 | |
| +	vpunpcklqdq %xmm10,%xmm3,%xmm3
 | |
| +	vpmuludq 144(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm8,%xmm8
 | |
| +	vpmuludq 128(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm7,%xmm7
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3
 | |
| +	vpmuludq 480(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm11,%xmm11
 | |
| +	vpmuludq 464(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm13,%xmm13
 | |
| +	vpmuludq 528(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm0,%xmm0
 | |
| +	vpmuludq 512(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm2,%xmm2
 | |
| +	vpmuludq 592(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm1,%xmm1
 | |
| +	vpmuludq 576(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm4,%xmm4
 | |
| +	vpmuludq 624(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm6,%xmm6
 | |
| +	vpmuludq 672(%rsp),%xmm3,%xmm3
 | |
| +	vpaddq %xmm3,%xmm5,%xmm5
 | |
| +	vpmuludq 144(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm7,%xmm7
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm9,%xmm9
 | |
| +	vpmuludq 448(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm11,%xmm11
 | |
| +	vpmuludq 480(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm13,%xmm13
 | |
| +	vpmuludq 496(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm0,%xmm0
 | |
| +	vpmuludq 528(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm2,%xmm2
 | |
| +	vpmuludq 544(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm1,%xmm1
 | |
| +	vpmuludq 592(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm4,%xmm4
 | |
| +	vpmuludq 640(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm6,%xmm6
 | |
| +	vpmuludq 624(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm5,%xmm5
 | |
| +	vpmuludq 704(%rsp),%xmm9,%xmm9
 | |
| +	vpaddq %xmm9,%xmm8,%xmm8
 | |
| +	vpsrlq $25,%xmm4,%xmm3
 | |
| +	vpaddq %xmm3,%xmm6,%xmm6
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm4,%xmm4
 | |
| +	vpsrlq $26,%xmm11,%xmm3
 | |
| +	vpaddq %xmm3,%xmm13,%xmm13
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm11,%xmm11
 | |
| +	vpsrlq $26,%xmm6,%xmm3
 | |
| +	vpaddq %xmm3,%xmm5,%xmm5
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm6,%xmm6
 | |
| +	vpsrlq $25,%xmm13,%xmm3
 | |
| +	vpaddq %xmm3,%xmm0,%xmm0
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm13,%xmm13
 | |
| +	vpsrlq $25,%xmm5,%xmm3
 | |
| +	vpaddq %xmm3,%xmm8,%xmm8
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm5,%xmm5
 | |
| +	vpsrlq $26,%xmm0,%xmm3
 | |
| +	vpaddq %xmm3,%xmm2,%xmm2
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm0,%xmm0
 | |
| +	vpsrlq $26,%xmm8,%xmm3
 | |
| +	vpaddq %xmm3,%xmm7,%xmm7
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm8,%xmm8
 | |
| +	vpsrlq $25,%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm1,%xmm1
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm2,%xmm2
 | |
| +	vpsrlq $25,%xmm7,%xmm3
 | |
| +	vpsllq $4,%xmm3,%xmm9
 | |
| +	vpaddq %xmm3,%xmm11,%xmm11
 | |
| +	vpsllq $1,%xmm3,%xmm3
 | |
| +	vpaddq %xmm3,%xmm9,%xmm9
 | |
| +	vpaddq %xmm9,%xmm11,%xmm11
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm7,%xmm7
 | |
| +	vpsrlq $26,%xmm1,%xmm3
 | |
| +	vpaddq %xmm3,%xmm4,%xmm4
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm1,%xmm1
 | |
| +	vpsrlq $26,%xmm11,%xmm3
 | |
| +	vpaddq %xmm3,%xmm13,%xmm13
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm11,%xmm11
 | |
| +	vpsrlq $25,%xmm4,%xmm3
 | |
| +	vpaddq %xmm3,%xmm6,%xmm6
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm4,%xmm4
 | |
| +	vpunpcklqdq %xmm13,%xmm11,%xmm3
 | |
| +	vpunpckhqdq %xmm13,%xmm11,%xmm9
 | |
| +	vpaddq curve25519_sandy2x_subc0(%rip),%xmm9,%xmm10
 | |
| +	vpsubq %xmm3,%xmm10,%xmm10
 | |
| +	vpaddq %xmm9,%xmm3,%xmm3
 | |
| +	vpunpckhqdq %xmm3,%xmm10,%xmm9
 | |
| +	vpunpcklqdq %xmm3,%xmm10,%xmm10
 | |
| +	vpmuludq %xmm10,%xmm10,%xmm3
 | |
| +	vpaddq %xmm10,%xmm10,%xmm10
 | |
| +	vpmuludq %xmm9,%xmm10,%xmm11
 | |
| +	vpunpcklqdq %xmm2,%xmm0,%xmm12
 | |
| +	vpunpckhqdq %xmm2,%xmm0,%xmm0
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm0,%xmm2
 | |
| +	vpsubq %xmm12,%xmm2,%xmm2
 | |
| +	vpaddq %xmm0,%xmm12,%xmm12
 | |
| +	vpunpckhqdq %xmm12,%xmm2,%xmm0
 | |
| +	vpunpcklqdq %xmm12,%xmm2,%xmm2
 | |
| +	vpmuludq %xmm2,%xmm10,%xmm12
 | |
| +	vpaddq %xmm9,%xmm9,%xmm13
 | |
| +	vpmuludq %xmm13,%xmm9,%xmm9
 | |
| +	vpaddq %xmm9,%xmm12,%xmm12
 | |
| +	vpmuludq %xmm0,%xmm10,%xmm9
 | |
| +	vpmuludq %xmm2,%xmm13,%xmm14
 | |
| +	vpaddq %xmm14,%xmm9,%xmm9
 | |
| +	vpunpcklqdq %xmm4,%xmm1,%xmm14
 | |
| +	vpunpckhqdq %xmm4,%xmm1,%xmm1
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm1,%xmm4
 | |
| +	vpsubq %xmm14,%xmm4,%xmm4
 | |
| +	vpaddq %xmm1,%xmm14,%xmm14
 | |
| +	vpunpckhqdq %xmm14,%xmm4,%xmm1
 | |
| +	vpunpcklqdq %xmm14,%xmm4,%xmm4
 | |
| +	vmovdqa %xmm1,0(%rsp)
 | |
| +	vpaddq %xmm1,%xmm1,%xmm1
 | |
| +	vmovdqa %xmm1,16(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1
 | |
| +	vmovdqa %xmm1,32(%rsp)
 | |
| +	vpmuludq %xmm4,%xmm10,%xmm1
 | |
| +	vpmuludq %xmm2,%xmm2,%xmm14
 | |
| +	vpaddq %xmm14,%xmm1,%xmm1
 | |
| +	vpmuludq 0(%rsp),%xmm10,%xmm14
 | |
| +	vpmuludq %xmm4,%xmm13,%xmm15
 | |
| +	vpaddq %xmm15,%xmm14,%xmm14
 | |
| +	vpunpcklqdq %xmm5,%xmm6,%xmm15
 | |
| +	vpunpckhqdq %xmm5,%xmm6,%xmm5
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm5,%xmm6
 | |
| +	vpsubq %xmm15,%xmm6,%xmm6
 | |
| +	vpaddq %xmm5,%xmm15,%xmm15
 | |
| +	vpunpckhqdq %xmm15,%xmm6,%xmm5
 | |
| +	vpunpcklqdq %xmm15,%xmm6,%xmm6
 | |
| +	vmovdqa %xmm6,48(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm6,%xmm6
 | |
| +	vmovdqa %xmm6,64(%rsp)
 | |
| +	vmovdqa %xmm5,80(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v38_38(%rip),%xmm5,%xmm5
 | |
| +	vmovdqa %xmm5,96(%rsp)
 | |
| +	vpmuludq 48(%rsp),%xmm10,%xmm5
 | |
| +	vpaddq %xmm0,%xmm0,%xmm6
 | |
| +	vpmuludq %xmm6,%xmm0,%xmm0
 | |
| +	vpaddq %xmm0,%xmm5,%xmm5
 | |
| +	vpmuludq 80(%rsp),%xmm10,%xmm0
 | |
| +	vpmuludq %xmm4,%xmm6,%xmm15
 | |
| +	vpaddq %xmm15,%xmm0,%xmm0
 | |
| +	vpmuludq %xmm6,%xmm13,%xmm15
 | |
| +	vpaddq %xmm15,%xmm1,%xmm1
 | |
| +	vpmuludq %xmm6,%xmm2,%xmm15
 | |
| +	vpaddq %xmm15,%xmm14,%xmm14
 | |
| +	vpunpcklqdq %xmm7,%xmm8,%xmm15
 | |
| +	vpunpckhqdq %xmm7,%xmm8,%xmm7
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm7,%xmm8
 | |
| +	vpsubq %xmm15,%xmm8,%xmm8
 | |
| +	vpaddq %xmm7,%xmm15,%xmm15
 | |
| +	vpunpckhqdq %xmm15,%xmm8,%xmm7
 | |
| +	vpunpcklqdq %xmm15,%xmm8,%xmm8
 | |
| +	vmovdqa %xmm8,112(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm8,%xmm8
 | |
| +	vmovdqa %xmm8,448(%rsp)
 | |
| +	vpmuludq 112(%rsp),%xmm10,%xmm8
 | |
| +	vpmuludq %xmm7,%xmm10,%xmm10
 | |
| +	vpmuludq curve25519_sandy2x_v38_38(%rip),%xmm7,%xmm15
 | |
| +	vpmuludq %xmm15,%xmm7,%xmm7
 | |
| +	vpaddq %xmm7,%xmm8,%xmm8
 | |
| +	vpmuludq %xmm15,%xmm13,%xmm7
 | |
| +	vpaddq %xmm7,%xmm3,%xmm3
 | |
| +	vpmuludq %xmm15,%xmm2,%xmm7
 | |
| +	vpaddq %xmm7,%xmm11,%xmm11
 | |
| +	vpmuludq 80(%rsp),%xmm13,%xmm7
 | |
| +	vpaddq %xmm7,%xmm7,%xmm7
 | |
| +	vpaddq %xmm7,%xmm8,%xmm8
 | |
| +	vpmuludq 16(%rsp),%xmm13,%xmm7
 | |
| +	vpaddq %xmm7,%xmm5,%xmm5
 | |
| +	vpmuludq 48(%rsp),%xmm13,%xmm7
 | |
| +	vpaddq %xmm7,%xmm0,%xmm0
 | |
| +	vpmuludq 112(%rsp),%xmm13,%xmm7
 | |
| +	vpaddq %xmm7,%xmm10,%xmm10
 | |
| +	vpmuludq %xmm15,%xmm6,%xmm7
 | |
| +	vpaddq %xmm7,%xmm12,%xmm12
 | |
| +	vpmuludq %xmm15,%xmm4,%xmm7
 | |
| +	vpaddq %xmm7,%xmm9,%xmm9
 | |
| +	vpaddq %xmm2,%xmm2,%xmm2
 | |
| +	vpmuludq %xmm4,%xmm2,%xmm7
 | |
| +	vpaddq %xmm7,%xmm5,%xmm5
 | |
| +	vpmuludq 448(%rsp),%xmm2,%xmm7
 | |
| +	vpaddq %xmm7,%xmm3,%xmm3
 | |
| +	vpmuludq 448(%rsp),%xmm6,%xmm7
 | |
| +	vpaddq %xmm7,%xmm11,%xmm11
 | |
| +	vpmuludq 0(%rsp),%xmm2,%xmm7
 | |
| +	vpaddq %xmm7,%xmm0,%xmm0
 | |
| +	vpmuludq 48(%rsp),%xmm2,%xmm7
 | |
| +	vpaddq %xmm7,%xmm8,%xmm8
 | |
| +	vpmuludq 80(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm10,%xmm10
 | |
| +	vpmuludq 96(%rsp),%xmm4,%xmm2
 | |
| +	vpaddq %xmm2,%xmm11,%xmm11
 | |
| +	vpmuludq %xmm4,%xmm4,%xmm2
 | |
| +	vpaddq %xmm2,%xmm8,%xmm8
 | |
| +	vpaddq %xmm4,%xmm4,%xmm2
 | |
| +	vpmuludq 448(%rsp),%xmm2,%xmm4
 | |
| +	vpaddq %xmm4,%xmm12,%xmm12
 | |
| +	vpmuludq 16(%rsp),%xmm15,%xmm4
 | |
| +	vpaddq %xmm4,%xmm1,%xmm1
 | |
| +	vpmuludq 48(%rsp),%xmm15,%xmm4
 | |
| +	vpaddq %xmm4,%xmm14,%xmm14
 | |
| +	vpmuludq 96(%rsp),%xmm6,%xmm4
 | |
| +	vpaddq %xmm4,%xmm3,%xmm3
 | |
| +	vmovdqa 16(%rsp),%xmm4
 | |
| +	vpmuludq 448(%rsp),%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm9,%xmm9
 | |
| +	vpmuludq 16(%rsp),%xmm6,%xmm4
 | |
| +	vpaddq %xmm4,%xmm8,%xmm8
 | |
| +	vpmuludq 48(%rsp),%xmm6,%xmm4
 | |
| +	vpaddq %xmm4,%xmm10,%xmm10
 | |
| +	vpmuludq 80(%rsp),%xmm15,%xmm4
 | |
| +	vpaddq %xmm4,%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm5,%xmm5
 | |
| +	vpmuludq 112(%rsp),%xmm15,%xmm4
 | |
| +	vpaddq %xmm4,%xmm0,%xmm0
 | |
| +	vmovdqa 48(%rsp),%xmm4
 | |
| +	vpaddq %xmm4,%xmm4,%xmm4
 | |
| +	vpmuludq 448(%rsp),%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm1,%xmm1
 | |
| +	vmovdqa 80(%rsp),%xmm4
 | |
| +	vpaddq %xmm4,%xmm4,%xmm4
 | |
| +	vpmuludq 448(%rsp),%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm14,%xmm14
 | |
| +	vpmuludq 64(%rsp),%xmm2,%xmm4
 | |
| +	vpaddq %xmm4,%xmm3,%xmm3
 | |
| +	vmovdqa 16(%rsp),%xmm4
 | |
| +	vpmuludq 64(%rsp),%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm11,%xmm11
 | |
| +	vmovdqa 16(%rsp),%xmm4
 | |
| +	vpmuludq 96(%rsp),%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm12,%xmm12
 | |
| +	vmovdqa 48(%rsp),%xmm4
 | |
| +	vpmuludq 96(%rsp),%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm9,%xmm9
 | |
| +	vpmuludq 0(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm10,%xmm10
 | |
| +	vmovdqa 32(%rsp),%xmm2
 | |
| +	vpmuludq 0(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm3,%xmm3
 | |
| +	vmovdqa 64(%rsp),%xmm2
 | |
| +	vpmuludq 48(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm12,%xmm12
 | |
| +	vmovdqa 96(%rsp),%xmm2
 | |
| +	vpmuludq 80(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm1,%xmm1
 | |
| +	vmovdqa 448(%rsp),%xmm2
 | |
| +	vpmuludq 112(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm5,%xmm5
 | |
| +	vpsrlq $26,%xmm3,%xmm2
 | |
| +	vpaddq %xmm2,%xmm11,%xmm11
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm3,%xmm3
 | |
| +	vpsrlq $25,%xmm14,%xmm2
 | |
| +	vpaddq %xmm2,%xmm5,%xmm5
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm14,%xmm14
 | |
| +	vpsrlq $25,%xmm11,%xmm2
 | |
| +	vpaddq %xmm2,%xmm12,%xmm12
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm11,%xmm11
 | |
| +	vpsrlq $26,%xmm5,%xmm2
 | |
| +	vpaddq %xmm2,%xmm0,%xmm0
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm5,%xmm5
 | |
| +	vpsrlq $26,%xmm12,%xmm2
 | |
| +	vpaddq %xmm2,%xmm9,%xmm9
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm12,%xmm12
 | |
| +	vpsrlq $25,%xmm0,%xmm2
 | |
| +	vpaddq %xmm2,%xmm8,%xmm8
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm0,%xmm0
 | |
| +	vpsrlq $25,%xmm9,%xmm2
 | |
| +	vpaddq %xmm2,%xmm1,%xmm1
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm9,%xmm9
 | |
| +	vpsrlq $26,%xmm8,%xmm2
 | |
| +	vpaddq %xmm2,%xmm10,%xmm10
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm8,%xmm8
 | |
| +	vpsrlq $26,%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm14,%xmm14
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm1,%xmm1
 | |
| +	vpsrlq $25,%xmm10,%xmm2
 | |
| +	vpsllq $4,%xmm2,%xmm4
 | |
| +	vpaddq %xmm2,%xmm3,%xmm3
 | |
| +	vpsllq $1,%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm3,%xmm3
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10
 | |
| +	vpsrlq $25,%xmm14,%xmm2
 | |
| +	vpaddq %xmm2,%xmm5,%xmm5
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm14,%xmm14
 | |
| +	vpsrlq $26,%xmm3,%xmm2
 | |
| +	vpaddq %xmm2,%xmm11,%xmm11
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm3,%xmm3
 | |
| +	vpunpckhqdq %xmm11,%xmm3,%xmm2
 | |
| +	vmovdqa %xmm2,0(%rsp)
 | |
| +	vpshufd $0,%xmm3,%xmm2
 | |
| +	vpshufd $0,%xmm11,%xmm3
 | |
| +	vpmuludq 160(%rsp),%xmm2,%xmm4
 | |
| +	vpmuludq 432(%rsp),%xmm3,%xmm6
 | |
| +	vpaddq %xmm6,%xmm4,%xmm4
 | |
| +	vpmuludq 176(%rsp),%xmm2,%xmm6
 | |
| +	vpmuludq 304(%rsp),%xmm3,%xmm7
 | |
| +	vpaddq %xmm7,%xmm6,%xmm6
 | |
| +	vpmuludq 208(%rsp),%xmm2,%xmm7
 | |
| +	vpmuludq 336(%rsp),%xmm3,%xmm11
 | |
| +	vpaddq %xmm11,%xmm7,%xmm7
 | |
| +	vpmuludq 240(%rsp),%xmm2,%xmm11
 | |
| +	vpmuludq 368(%rsp),%xmm3,%xmm13
 | |
| +	vpaddq %xmm13,%xmm11,%xmm11
 | |
| +	vpmuludq 272(%rsp),%xmm2,%xmm2
 | |
| +	vpmuludq 400(%rsp),%xmm3,%xmm3
 | |
| +	vpaddq %xmm3,%xmm2,%xmm2
 | |
| +	vpunpckhqdq %xmm9,%xmm12,%xmm3
 | |
| +	vmovdqa %xmm3,16(%rsp)
 | |
| +	vpshufd $0,%xmm12,%xmm3
 | |
| +	vpshufd $0,%xmm9,%xmm9
 | |
| +	vpmuludq 288(%rsp),%xmm3,%xmm12
 | |
| +	vpaddq %xmm12,%xmm4,%xmm4
 | |
| +	vpmuludq 416(%rsp),%xmm9,%xmm12
 | |
| +	vpaddq %xmm12,%xmm4,%xmm4
 | |
| +	vpmuludq 160(%rsp),%xmm3,%xmm12
 | |
| +	vpaddq %xmm12,%xmm6,%xmm6
 | |
| +	vpmuludq 432(%rsp),%xmm9,%xmm12
 | |
| +	vpaddq %xmm12,%xmm6,%xmm6
 | |
| +	vpmuludq 176(%rsp),%xmm3,%xmm12
 | |
| +	vpaddq %xmm12,%xmm7,%xmm7
 | |
| +	vpmuludq 304(%rsp),%xmm9,%xmm12
 | |
| +	vpaddq %xmm12,%xmm7,%xmm7
 | |
| +	vpmuludq 208(%rsp),%xmm3,%xmm12
 | |
| +	vpaddq %xmm12,%xmm11,%xmm11
 | |
| +	vpmuludq 336(%rsp),%xmm9,%xmm12
 | |
| +	vpaddq %xmm12,%xmm11,%xmm11
 | |
| +	vpmuludq 240(%rsp),%xmm3,%xmm3
 | |
| +	vpaddq %xmm3,%xmm2,%xmm2
 | |
| +	vpmuludq 368(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm2,%xmm2
 | |
| +	vpunpckhqdq %xmm14,%xmm1,%xmm3
 | |
| +	vmovdqa %xmm3,32(%rsp)
 | |
| +	vpshufd $0,%xmm1,%xmm1
 | |
| +	vpshufd $0,%xmm14,%xmm3
 | |
| +	vpmuludq 256(%rsp),%xmm1,%xmm9
 | |
| +	vpaddq %xmm9,%xmm4,%xmm4
 | |
| +	vpmuludq 384(%rsp),%xmm3,%xmm9
 | |
| +	vpaddq %xmm9,%xmm4,%xmm4
 | |
| +	vpmuludq 288(%rsp),%xmm1,%xmm9
 | |
| +	vpaddq %xmm9,%xmm6,%xmm6
 | |
| +	vpmuludq 416(%rsp),%xmm3,%xmm9
 | |
| +	vpaddq %xmm9,%xmm6,%xmm6
 | |
| +	vpmuludq 160(%rsp),%xmm1,%xmm9
 | |
| +	vpaddq %xmm9,%xmm7,%xmm7
 | |
| +	vpmuludq 432(%rsp),%xmm3,%xmm9
 | |
| +	vpaddq %xmm9,%xmm7,%xmm7
 | |
| +	vpmuludq 176(%rsp),%xmm1,%xmm9
 | |
| +	vpaddq %xmm9,%xmm11,%xmm11
 | |
| +	vpmuludq 304(%rsp),%xmm3,%xmm9
 | |
| +	vpaddq %xmm9,%xmm11,%xmm11
 | |
| +	vpmuludq 208(%rsp),%xmm1,%xmm1
 | |
| +	vpaddq %xmm1,%xmm2,%xmm2
 | |
| +	vpmuludq 336(%rsp),%xmm3,%xmm1
 | |
| +	vpaddq %xmm1,%xmm2,%xmm2
 | |
| +	vpunpckhqdq %xmm0,%xmm5,%xmm1
 | |
| +	vmovdqa %xmm1,48(%rsp)
 | |
| +	vpshufd $0,%xmm5,%xmm1
 | |
| +	vpshufd $0,%xmm0,%xmm0
 | |
| +	vpmuludq 224(%rsp),%xmm1,%xmm3
 | |
| +	vpaddq %xmm3,%xmm4,%xmm4
 | |
| +	vpmuludq 352(%rsp),%xmm0,%xmm3
 | |
| +	vpaddq %xmm3,%xmm4,%xmm4
 | |
| +	vpmuludq 256(%rsp),%xmm1,%xmm3
 | |
| +	vpaddq %xmm3,%xmm6,%xmm6
 | |
| +	vpmuludq 384(%rsp),%xmm0,%xmm3
 | |
| +	vpaddq %xmm3,%xmm6,%xmm6
 | |
| +	vpmuludq 288(%rsp),%xmm1,%xmm3
 | |
| +	vpaddq %xmm3,%xmm7,%xmm7
 | |
| +	vpmuludq 416(%rsp),%xmm0,%xmm3
 | |
| +	vpaddq %xmm3,%xmm7,%xmm7
 | |
| +	vpmuludq 160(%rsp),%xmm1,%xmm3
 | |
| +	vpaddq %xmm3,%xmm11,%xmm11
 | |
| +	vpmuludq 432(%rsp),%xmm0,%xmm3
 | |
| +	vpaddq %xmm3,%xmm11,%xmm11
 | |
| +	vpmuludq 176(%rsp),%xmm1,%xmm1
 | |
| +	vpaddq %xmm1,%xmm2,%xmm2
 | |
| +	vpmuludq 304(%rsp),%xmm0,%xmm0
 | |
| +	vpaddq %xmm0,%xmm2,%xmm2
 | |
| +	vpunpckhqdq %xmm10,%xmm8,%xmm0
 | |
| +	vmovdqa %xmm0,64(%rsp)
 | |
| +	vpshufd $0,%xmm8,%xmm0
 | |
| +	vpshufd $0,%xmm10,%xmm1
 | |
| +	vpmuludq 192(%rsp),%xmm0,%xmm3
 | |
| +	vpaddq %xmm3,%xmm4,%xmm4
 | |
| +	vpmuludq 320(%rsp),%xmm1,%xmm3
 | |
| +	vpaddq %xmm3,%xmm4,%xmm4
 | |
| +	vpmuludq 224(%rsp),%xmm0,%xmm3
 | |
| +	vpaddq %xmm3,%xmm6,%xmm6
 | |
| +	vpmuludq 352(%rsp),%xmm1,%xmm3
 | |
| +	vpaddq %xmm3,%xmm6,%xmm6
 | |
| +	vpmuludq 256(%rsp),%xmm0,%xmm3
 | |
| +	vpaddq %xmm3,%xmm7,%xmm7
 | |
| +	vpmuludq 384(%rsp),%xmm1,%xmm3
 | |
| +	vpaddq %xmm3,%xmm7,%xmm7
 | |
| +	vpmuludq 288(%rsp),%xmm0,%xmm3
 | |
| +	vpaddq %xmm3,%xmm11,%xmm11
 | |
| +	vpmuludq 416(%rsp),%xmm1,%xmm3
 | |
| +	vpaddq %xmm3,%xmm11,%xmm11
 | |
| +	vpmuludq 160(%rsp),%xmm0,%xmm0
 | |
| +	vpaddq %xmm0,%xmm2,%xmm2
 | |
| +	vpmuludq 432(%rsp),%xmm1,%xmm0
 | |
| +	vpaddq %xmm0,%xmm2,%xmm2
 | |
| +	vmovdqa %xmm4,80(%rsp)
 | |
| +	vmovdqa %xmm6,96(%rsp)
 | |
| +	vmovdqa %xmm7,112(%rsp)
 | |
| +	vmovdqa %xmm11,448(%rsp)
 | |
| +	vmovdqa %xmm2,496(%rsp)
 | |
| +	vmovdqa 144(%rsp),%xmm0
 | |
| +	vpmuludq %xmm0,%xmm0,%xmm1
 | |
| +	vpaddq %xmm0,%xmm0,%xmm0
 | |
| +	vmovdqa 128(%rsp),%xmm2
 | |
| +	vpmuludq %xmm2,%xmm0,%xmm3
 | |
| +	vmovdqa 480(%rsp),%xmm4
 | |
| +	vpmuludq %xmm4,%xmm0,%xmm5
 | |
| +	vmovdqa 464(%rsp),%xmm6
 | |
| +	vpmuludq %xmm6,%xmm0,%xmm7
 | |
| +	vmovdqa 528(%rsp),%xmm8
 | |
| +	vpmuludq %xmm8,%xmm0,%xmm9
 | |
| +	vpmuludq 512(%rsp),%xmm0,%xmm10
 | |
| +	vpmuludq 592(%rsp),%xmm0,%xmm11
 | |
| +	vpmuludq 576(%rsp),%xmm0,%xmm12
 | |
| +	vpmuludq 624(%rsp),%xmm0,%xmm13
 | |
| +	vmovdqa 672(%rsp),%xmm14
 | |
| +	vpmuludq %xmm14,%xmm0,%xmm0
 | |
| +	vpmuludq curve25519_sandy2x_v38_38(%rip),%xmm14,%xmm15
 | |
| +	vpmuludq %xmm15,%xmm14,%xmm14
 | |
| +	vpaddq %xmm14,%xmm13,%xmm13
 | |
| +	vpaddq %xmm6,%xmm6,%xmm14
 | |
| +	vpmuludq %xmm14,%xmm6,%xmm6
 | |
| +	vpaddq %xmm6,%xmm11,%xmm11
 | |
| +	vpaddq %xmm2,%xmm2,%xmm6
 | |
| +	vpmuludq %xmm6,%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm5,%xmm5
 | |
| +	vpmuludq %xmm15,%xmm6,%xmm2
 | |
| +	vpaddq %xmm2,%xmm1,%xmm1
 | |
| +	vpmuludq %xmm15,%xmm4,%xmm2
 | |
| +	vpaddq %xmm2,%xmm3,%xmm3
 | |
| +	vpmuludq 544(%rsp),%xmm6,%xmm2
 | |
| +	vpaddq %xmm2,%xmm11,%xmm11
 | |
| +	vpmuludq 592(%rsp),%xmm6,%xmm2
 | |
| +	vpaddq %xmm2,%xmm12,%xmm12
 | |
| +	vpmuludq 640(%rsp),%xmm6,%xmm2
 | |
| +	vpaddq %xmm2,%xmm13,%xmm13
 | |
| +	vpmuludq 624(%rsp),%xmm6,%xmm2
 | |
| +	vpaddq %xmm2,%xmm0,%xmm0
 | |
| +	vpmuludq %xmm4,%xmm6,%xmm2
 | |
| +	vpaddq %xmm2,%xmm7,%xmm7
 | |
| +	vpmuludq %xmm14,%xmm6,%xmm2
 | |
| +	vpaddq %xmm2,%xmm9,%xmm9
 | |
| +	vpmuludq %xmm8,%xmm6,%xmm2
 | |
| +	vpaddq %xmm2,%xmm10,%xmm10
 | |
| +	vpmuludq %xmm15,%xmm14,%xmm2
 | |
| +	vpaddq %xmm2,%xmm5,%xmm5
 | |
| +	vpmuludq %xmm15,%xmm8,%xmm2
 | |
| +	vpaddq %xmm2,%xmm7,%xmm7
 | |
| +	vpmuludq %xmm4,%xmm4,%xmm2
 | |
| +	vpaddq %xmm2,%xmm9,%xmm9
 | |
| +	vpmuludq %xmm14,%xmm4,%xmm2
 | |
| +	vpaddq %xmm2,%xmm10,%xmm10
 | |
| +	vpaddq %xmm4,%xmm4,%xmm2
 | |
| +	vpmuludq %xmm8,%xmm2,%xmm4
 | |
| +	vpaddq %xmm4,%xmm11,%xmm11
 | |
| +	vpmuludq 688(%rsp),%xmm2,%xmm4
 | |
| +	vpaddq %xmm4,%xmm1,%xmm1
 | |
| +	vpmuludq 688(%rsp),%xmm14,%xmm4
 | |
| +	vpaddq %xmm4,%xmm3,%xmm3
 | |
| +	vpmuludq 512(%rsp),%xmm2,%xmm4
 | |
| +	vpaddq %xmm4,%xmm12,%xmm12
 | |
| +	vpmuludq 592(%rsp),%xmm2,%xmm4
 | |
| +	vpaddq %xmm4,%xmm13,%xmm13
 | |
| +	vpmuludq 576(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm0,%xmm0
 | |
| +	vpmuludq 656(%rsp),%xmm8,%xmm2
 | |
| +	vpaddq %xmm2,%xmm3,%xmm3
 | |
| +	vpmuludq %xmm8,%xmm14,%xmm2
 | |
| +	vpaddq %xmm2,%xmm12,%xmm12
 | |
| +	vpmuludq %xmm8,%xmm8,%xmm2
 | |
| +	vpaddq %xmm2,%xmm13,%xmm13
 | |
| +	vpaddq %xmm8,%xmm8,%xmm2
 | |
| +	vpmuludq 688(%rsp),%xmm2,%xmm4
 | |
| +	vpaddq %xmm4,%xmm5,%xmm5
 | |
| +	vpmuludq 544(%rsp),%xmm15,%xmm4
 | |
| +	vpaddq %xmm4,%xmm9,%xmm9
 | |
| +	vpmuludq 592(%rsp),%xmm15,%xmm4
 | |
| +	vpaddq %xmm4,%xmm10,%xmm10
 | |
| +	vpmuludq 656(%rsp),%xmm14,%xmm4
 | |
| +	vpaddq %xmm4,%xmm1,%xmm1
 | |
| +	vmovdqa 544(%rsp),%xmm4
 | |
| +	vpmuludq 688(%rsp),%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm7,%xmm7
 | |
| +	vpmuludq 544(%rsp),%xmm14,%xmm4
 | |
| +	vpaddq %xmm4,%xmm13,%xmm13
 | |
| +	vpmuludq 592(%rsp),%xmm14,%xmm4
 | |
| +	vpaddq %xmm4,%xmm0,%xmm0
 | |
| +	vpmuludq 640(%rsp),%xmm15,%xmm4
 | |
| +	vpaddq %xmm4,%xmm11,%xmm11
 | |
| +	vpmuludq 624(%rsp),%xmm15,%xmm4
 | |
| +	vpaddq %xmm4,%xmm12,%xmm12
 | |
| +	vmovdqa 592(%rsp),%xmm4
 | |
| +	vpaddq %xmm4,%xmm4,%xmm4
 | |
| +	vpmuludq 688(%rsp),%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm9,%xmm9
 | |
| +	vpmuludq 608(%rsp),%xmm2,%xmm4
 | |
| +	vpaddq %xmm4,%xmm1,%xmm1
 | |
| +	vmovdqa 544(%rsp),%xmm4
 | |
| +	vpmuludq 608(%rsp),%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm3,%xmm3
 | |
| +	vmovdqa 544(%rsp),%xmm4
 | |
| +	vpmuludq 656(%rsp),%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm5,%xmm5
 | |
| +	vmovdqa 592(%rsp),%xmm4
 | |
| +	vpmuludq 656(%rsp),%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm7,%xmm7
 | |
| +	vmovdqa 640(%rsp),%xmm4
 | |
| +	vpmuludq 688(%rsp),%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm10,%xmm10
 | |
| +	vpmuludq 512(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm0,%xmm0
 | |
| +	vmovdqa 560(%rsp),%xmm2
 | |
| +	vpmuludq 512(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm1,%xmm1
 | |
| +	vmovdqa 608(%rsp),%xmm2
 | |
| +	vpmuludq 592(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm5,%xmm5
 | |
| +	vmovdqa 656(%rsp),%xmm2
 | |
| +	vpmuludq 576(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm9,%xmm9
 | |
| +	vmovdqa 688(%rsp),%xmm2
 | |
| +	vpmuludq 624(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm11,%xmm11
 | |
| +	vpsrlq $26,%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm3,%xmm3
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm1,%xmm1
 | |
| +	vpsrlq $25,%xmm10,%xmm2
 | |
| +	vpaddq %xmm2,%xmm11,%xmm11
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10
 | |
| +	vpsrlq $25,%xmm3,%xmm2
 | |
| +	vpaddq %xmm2,%xmm5,%xmm5
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm3,%xmm3
 | |
| +	vpsrlq $26,%xmm11,%xmm2
 | |
| +	vpaddq %xmm2,%xmm12,%xmm12
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm11,%xmm11
 | |
| +	vpsrlq $26,%xmm5,%xmm2
 | |
| +	vpaddq %xmm2,%xmm7,%xmm7
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm5,%xmm5
 | |
| +	vpsrlq $25,%xmm12,%xmm2
 | |
| +	vpaddq %xmm2,%xmm13,%xmm13
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm12,%xmm12
 | |
| +	vpsrlq $25,%xmm7,%xmm2
 | |
| +	vpaddq %xmm2,%xmm9,%xmm9
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm7,%xmm7
 | |
| +	vpsrlq $26,%xmm13,%xmm2
 | |
| +	vpaddq %xmm2,%xmm0,%xmm0
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm13,%xmm13
 | |
| +	vpsrlq $26,%xmm9,%xmm2
 | |
| +	vpaddq %xmm2,%xmm10,%xmm10
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm9,%xmm9
 | |
| +	vpsrlq $25,%xmm0,%xmm2
 | |
| +	vpsllq $4,%xmm2,%xmm4
 | |
| +	vpaddq %xmm2,%xmm1,%xmm1
 | |
| +	vpsllq $1,%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm1,%xmm1
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm0,%xmm0
 | |
| +	vpsrlq $25,%xmm10,%xmm2
 | |
| +	vpaddq %xmm2,%xmm11,%xmm11
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10
 | |
| +	vpsrlq $26,%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm3,%xmm3
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm1,%xmm1
 | |
| +	vpunpckhqdq %xmm3,%xmm1,%xmm2
 | |
| +	vpunpcklqdq %xmm3,%xmm1,%xmm1
 | |
| +	vmovdqa %xmm1,464(%rsp)
 | |
| +	vpaddq curve25519_sandy2x_subc0(%rip),%xmm2,%xmm3
 | |
| +	vpsubq %xmm1,%xmm3,%xmm3
 | |
| +	vpunpckhqdq %xmm3,%xmm2,%xmm1
 | |
| +	vpunpcklqdq %xmm3,%xmm2,%xmm2
 | |
| +	vmovdqa %xmm2,480(%rsp)
 | |
| +	vmovdqa %xmm1,512(%rsp)
 | |
| +	vpsllq $1,%xmm1,%xmm1
 | |
| +	vmovdqa %xmm1,528(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm3,%xmm3
 | |
| +	vmovdqa 80(%rsp),%xmm1
 | |
| +	vpunpcklqdq %xmm1,%xmm3,%xmm2
 | |
| +	vpunpckhqdq %xmm1,%xmm3,%xmm1
 | |
| +	vpunpckhqdq %xmm7,%xmm5,%xmm3
 | |
| +	vpunpcklqdq %xmm7,%xmm5,%xmm4
 | |
| +	vmovdqa %xmm4,544(%rsp)
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm5
 | |
| +	vpsubq %xmm4,%xmm5,%xmm5
 | |
| +	vpunpckhqdq %xmm5,%xmm3,%xmm4
 | |
| +	vpunpcklqdq %xmm5,%xmm3,%xmm3
 | |
| +	vmovdqa %xmm3,560(%rsp)
 | |
| +	vmovdqa %xmm4,576(%rsp)
 | |
| +	vpsllq $1,%xmm4,%xmm4
 | |
| +	vmovdqa %xmm4,592(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm5,%xmm5
 | |
| +	vmovdqa 96(%rsp),%xmm3
 | |
| +	vpunpcklqdq %xmm3,%xmm5,%xmm4
 | |
| +	vpunpckhqdq %xmm3,%xmm5,%xmm3
 | |
| +	vpunpckhqdq %xmm10,%xmm9,%xmm5
 | |
| +	vpunpcklqdq %xmm10,%xmm9,%xmm6
 | |
| +	vmovdqa %xmm6,608(%rsp)
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm5,%xmm7
 | |
| +	vpsubq %xmm6,%xmm7,%xmm7
 | |
| +	vpunpckhqdq %xmm7,%xmm5,%xmm6
 | |
| +	vpunpcklqdq %xmm7,%xmm5,%xmm5
 | |
| +	vmovdqa %xmm5,624(%rsp)
 | |
| +	vmovdqa %xmm6,640(%rsp)
 | |
| +	vpsllq $1,%xmm6,%xmm6
 | |
| +	vmovdqa %xmm6,656(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm7,%xmm7
 | |
| +	vmovdqa 112(%rsp),%xmm5
 | |
| +	vpunpcklqdq %xmm5,%xmm7,%xmm6
 | |
| +	vpunpckhqdq %xmm5,%xmm7,%xmm5
 | |
| +	vpunpckhqdq %xmm12,%xmm11,%xmm7
 | |
| +	vpunpcklqdq %xmm12,%xmm11,%xmm8
 | |
| +	vmovdqa %xmm8,672(%rsp)
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm7,%xmm9
 | |
| +	vpsubq %xmm8,%xmm9,%xmm9
 | |
| +	vpunpckhqdq %xmm9,%xmm7,%xmm8
 | |
| +	vpunpcklqdq %xmm9,%xmm7,%xmm7
 | |
| +	vmovdqa %xmm7,688(%rsp)
 | |
| +	vmovdqa %xmm8,704(%rsp)
 | |
| +	vpsllq $1,%xmm8,%xmm8
 | |
| +	vmovdqa %xmm8,720(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm9,%xmm9
 | |
| +	vmovdqa 448(%rsp),%xmm7
 | |
| +	vpunpcklqdq %xmm7,%xmm9,%xmm8
 | |
| +	vpunpckhqdq %xmm7,%xmm9,%xmm7
 | |
| +	vpunpckhqdq %xmm0,%xmm13,%xmm9
 | |
| +	vpunpcklqdq %xmm0,%xmm13,%xmm0
 | |
| +	vmovdqa %xmm0,448(%rsp)
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm9,%xmm10
 | |
| +	vpsubq %xmm0,%xmm10,%xmm10
 | |
| +	vpunpckhqdq %xmm10,%xmm9,%xmm0
 | |
| +	vpunpcklqdq %xmm10,%xmm9,%xmm9
 | |
| +	vmovdqa %xmm9,736(%rsp)
 | |
| +	vmovdqa %xmm0,752(%rsp)
 | |
| +	vpsllq $1,%xmm0,%xmm0
 | |
| +	vmovdqa %xmm0,768(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm10,%xmm10
 | |
| +	vmovdqa 496(%rsp),%xmm0
 | |
| +	vpunpcklqdq %xmm0,%xmm10,%xmm9
 | |
| +	vpunpckhqdq %xmm0,%xmm10,%xmm0
 | |
| +	vpsrlq $26,%xmm2,%xmm10
 | |
| +	vpaddq %xmm10,%xmm1,%xmm1
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm2,%xmm2
 | |
| +	vpsrlq $25,%xmm5,%xmm10
 | |
| +	vpaddq %xmm10,%xmm8,%xmm8
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm5,%xmm5
 | |
| +	vpsrlq $25,%xmm1,%xmm10
 | |
| +	vpaddq %xmm10,%xmm4,%xmm4
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm1,%xmm1
 | |
| +	vpsrlq $26,%xmm8,%xmm10
 | |
| +	vpaddq %xmm10,%xmm7,%xmm7
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm8,%xmm8
 | |
| +	vpsrlq $26,%xmm4,%xmm10
 | |
| +	vpaddq %xmm10,%xmm3,%xmm3
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm4,%xmm4
 | |
| +	vpsrlq $25,%xmm7,%xmm10
 | |
| +	vpaddq %xmm10,%xmm9,%xmm9
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm7,%xmm7
 | |
| +	vpsrlq $25,%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm6,%xmm6
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm3,%xmm3
 | |
| +	vpsrlq $26,%xmm9,%xmm10
 | |
| +	vpaddq %xmm10,%xmm0,%xmm0
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm9,%xmm9
 | |
| +	vpsrlq $26,%xmm6,%xmm10
 | |
| +	vpaddq %xmm10,%xmm5,%xmm5
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm6,%xmm6
 | |
| +	vpsrlq $25,%xmm0,%xmm10
 | |
| +	vpsllq $4,%xmm10,%xmm11
 | |
| +	vpaddq %xmm10,%xmm2,%xmm2
 | |
| +	vpsllq $1,%xmm10,%xmm10
 | |
| +	vpaddq %xmm10,%xmm11,%xmm11
 | |
| +	vpaddq %xmm11,%xmm2,%xmm2
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm0,%xmm0
 | |
| +	vpsrlq $25,%xmm5,%xmm10
 | |
| +	vpaddq %xmm10,%xmm8,%xmm8
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm5,%xmm5
 | |
| +	vpsrlq $26,%xmm2,%xmm10
 | |
| +	vpaddq %xmm10,%xmm1,%xmm1
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm2,%xmm2
 | |
| +	vpunpckhqdq %xmm1,%xmm2,%xmm10
 | |
| +	vmovdqa %xmm10,80(%rsp)
 | |
| +	vpunpcklqdq %xmm1,%xmm2,%xmm1
 | |
| +	vpunpckhqdq %xmm3,%xmm4,%xmm2
 | |
| +	vmovdqa %xmm2,96(%rsp)
 | |
| +	vpunpcklqdq %xmm3,%xmm4,%xmm2
 | |
| +	vpunpckhqdq %xmm5,%xmm6,%xmm3
 | |
| +	vmovdqa %xmm3,112(%rsp)
 | |
| +	vpunpcklqdq %xmm5,%xmm6,%xmm3
 | |
| +	vpunpckhqdq %xmm7,%xmm8,%xmm4
 | |
| +	vmovdqa %xmm4,128(%rsp)
 | |
| +	vpunpcklqdq %xmm7,%xmm8,%xmm4
 | |
| +	vpunpckhqdq %xmm0,%xmm9,%xmm5
 | |
| +	vmovdqa %xmm5,144(%rsp)
 | |
| +	vpunpcklqdq %xmm0,%xmm9,%xmm0
 | |
| +	vmovdqa 464(%rsp),%xmm5
 | |
| +	vpaddq %xmm5,%xmm1,%xmm1
 | |
| +	vpunpcklqdq %xmm1,%xmm5,%xmm6
 | |
| +	vpunpckhqdq %xmm1,%xmm5,%xmm1
 | |
| +	vpmuludq 512(%rsp),%xmm6,%xmm5
 | |
| +	vpmuludq 480(%rsp),%xmm1,%xmm7
 | |
| +	vpaddq %xmm7,%xmm5,%xmm5
 | |
| +	vpmuludq 560(%rsp),%xmm6,%xmm7
 | |
| +	vpmuludq 528(%rsp),%xmm1,%xmm8
 | |
| +	vpaddq %xmm8,%xmm7,%xmm7
 | |
| +	vpmuludq 576(%rsp),%xmm6,%xmm8
 | |
| +	vpmuludq 560(%rsp),%xmm1,%xmm9
 | |
| +	vpaddq %xmm9,%xmm8,%xmm8
 | |
| +	vpmuludq 624(%rsp),%xmm6,%xmm9
 | |
| +	vpmuludq 592(%rsp),%xmm1,%xmm10
 | |
| +	vpaddq %xmm10,%xmm9,%xmm9
 | |
| +	vpmuludq 640(%rsp),%xmm6,%xmm10
 | |
| +	vpmuludq 624(%rsp),%xmm1,%xmm11
 | |
| +	vpaddq %xmm11,%xmm10,%xmm10
 | |
| +	vpmuludq 688(%rsp),%xmm6,%xmm11
 | |
| +	vpmuludq 656(%rsp),%xmm1,%xmm12
 | |
| +	vpaddq %xmm12,%xmm11,%xmm11
 | |
| +	vpmuludq 704(%rsp),%xmm6,%xmm12
 | |
| +	vpmuludq 688(%rsp),%xmm1,%xmm13
 | |
| +	vpaddq %xmm13,%xmm12,%xmm12
 | |
| +	vpmuludq 736(%rsp),%xmm6,%xmm13
 | |
| +	vpmuludq 720(%rsp),%xmm1,%xmm14
 | |
| +	vpaddq %xmm14,%xmm13,%xmm13
 | |
| +	vpmuludq 752(%rsp),%xmm6,%xmm14
 | |
| +	vpmuludq 736(%rsp),%xmm1,%xmm15
 | |
| +	vpaddq %xmm15,%xmm14,%xmm14
 | |
| +	vpmuludq 480(%rsp),%xmm6,%xmm6
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1
 | |
| +	vpmuludq 768(%rsp),%xmm1,%xmm1
 | |
| +	vpaddq %xmm1,%xmm6,%xmm6
 | |
| +	vmovdqa 544(%rsp),%xmm1
 | |
| +	vpaddq %xmm1,%xmm2,%xmm2
 | |
| +	vpunpcklqdq %xmm2,%xmm1,%xmm15
 | |
| +	vpunpckhqdq %xmm2,%xmm1,%xmm1
 | |
| +	vpmuludq 480(%rsp),%xmm15,%xmm2
 | |
| +	vpaddq %xmm2,%xmm7,%xmm7
 | |
| +	vpmuludq 512(%rsp),%xmm15,%xmm2
 | |
| +	vpaddq %xmm2,%xmm8,%xmm8
 | |
| +	vpmuludq 560(%rsp),%xmm15,%xmm2
 | |
| +	vpaddq %xmm2,%xmm9,%xmm9
 | |
| +	vpmuludq 576(%rsp),%xmm15,%xmm2
 | |
| +	vpaddq %xmm2,%xmm10,%xmm10
 | |
| +	vpmuludq 624(%rsp),%xmm15,%xmm2
 | |
| +	vpaddq %xmm2,%xmm11,%xmm11
 | |
| +	vpmuludq 640(%rsp),%xmm15,%xmm2
 | |
| +	vpaddq %xmm2,%xmm12,%xmm12
 | |
| +	vpmuludq 688(%rsp),%xmm15,%xmm2
 | |
| +	vpaddq %xmm2,%xmm13,%xmm13
 | |
| +	vpmuludq 704(%rsp),%xmm15,%xmm2
 | |
| +	vpaddq %xmm2,%xmm14,%xmm14
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm15,%xmm15
 | |
| +	vpmuludq 736(%rsp),%xmm15,%xmm2
 | |
| +	vpaddq %xmm2,%xmm6,%xmm6
 | |
| +	vpmuludq 752(%rsp),%xmm15,%xmm15
 | |
| +	vpaddq %xmm15,%xmm5,%xmm5
 | |
| +	vpmuludq 480(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm8,%xmm8
 | |
| +	vpmuludq 528(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm9,%xmm9
 | |
| +	vpmuludq 560(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm10,%xmm10
 | |
| +	vpmuludq 592(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm11,%xmm11
 | |
| +	vpmuludq 624(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm12,%xmm12
 | |
| +	vpmuludq 656(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm13,%xmm13
 | |
| +	vpmuludq 688(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm14,%xmm14
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1
 | |
| +	vpmuludq 720(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm6,%xmm6
 | |
| +	vpmuludq 736(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm5,%xmm5
 | |
| +	vpmuludq 768(%rsp),%xmm1,%xmm1
 | |
| +	vpaddq %xmm1,%xmm7,%xmm7
 | |
| +	vmovdqa 608(%rsp),%xmm1
 | |
| +	vpaddq %xmm1,%xmm3,%xmm3
 | |
| +	vpunpcklqdq %xmm3,%xmm1,%xmm2
 | |
| +	vpunpckhqdq %xmm3,%xmm1,%xmm1
 | |
| +	vpmuludq 480(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm9,%xmm9
 | |
| +	vpmuludq 512(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm10,%xmm10
 | |
| +	vpmuludq 560(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm11,%xmm11
 | |
| +	vpmuludq 576(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm12,%xmm12
 | |
| +	vpmuludq 624(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm13,%xmm13
 | |
| +	vpmuludq 640(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm14,%xmm14
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm2,%xmm2
 | |
| +	vpmuludq 688(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm6,%xmm6
 | |
| +	vpmuludq 704(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm5,%xmm5
 | |
| +	vpmuludq 736(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm7,%xmm7
 | |
| +	vpmuludq 752(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm8,%xmm8
 | |
| +	vpmuludq 480(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm10,%xmm10
 | |
| +	vpmuludq 528(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm11,%xmm11
 | |
| +	vpmuludq 560(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm12,%xmm12
 | |
| +	vpmuludq 592(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm13,%xmm13
 | |
| +	vpmuludq 624(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm14,%xmm14
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1
 | |
| +	vpmuludq 656(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm6,%xmm6
 | |
| +	vpmuludq 688(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm5,%xmm5
 | |
| +	vpmuludq 720(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm7,%xmm7
 | |
| +	vpmuludq 736(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm8,%xmm8
 | |
| +	vpmuludq 768(%rsp),%xmm1,%xmm1
 | |
| +	vpaddq %xmm1,%xmm9,%xmm9
 | |
| +	vmovdqa 672(%rsp),%xmm1
 | |
| +	vpaddq %xmm1,%xmm4,%xmm4
 | |
| +	vpunpcklqdq %xmm4,%xmm1,%xmm2
 | |
| +	vpunpckhqdq %xmm4,%xmm1,%xmm1
 | |
| +	vpmuludq 480(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm11,%xmm11
 | |
| +	vpmuludq 512(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm12,%xmm12
 | |
| +	vpmuludq 560(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm13,%xmm13
 | |
| +	vpmuludq 576(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm14,%xmm14
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm2,%xmm2
 | |
| +	vpmuludq 624(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm6,%xmm6
 | |
| +	vpmuludq 640(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm5,%xmm5
 | |
| +	vpmuludq 688(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm7,%xmm7
 | |
| +	vpmuludq 704(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm8,%xmm8
 | |
| +	vpmuludq 736(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm9,%xmm9
 | |
| +	vpmuludq 752(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm10,%xmm10
 | |
| +	vpmuludq 480(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm12,%xmm12
 | |
| +	vpmuludq 528(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm13,%xmm13
 | |
| +	vpmuludq 560(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm14,%xmm14
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1
 | |
| +	vpmuludq 592(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm6,%xmm6
 | |
| +	vpmuludq 624(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm5,%xmm5
 | |
| +	vpmuludq 656(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm7,%xmm7
 | |
| +	vpmuludq 688(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm8,%xmm8
 | |
| +	vpmuludq 720(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm9,%xmm9
 | |
| +	vpmuludq 736(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm10,%xmm10
 | |
| +	vpmuludq 768(%rsp),%xmm1,%xmm1
 | |
| +	vpaddq %xmm1,%xmm11,%xmm11
 | |
| +	vmovdqa 448(%rsp),%xmm1
 | |
| +	vpaddq %xmm1,%xmm0,%xmm0
 | |
| +	vpunpcklqdq %xmm0,%xmm1,%xmm2
 | |
| +	vpunpckhqdq %xmm0,%xmm1,%xmm0
 | |
| +	vpmuludq 480(%rsp),%xmm2,%xmm1
 | |
| +	vpaddq %xmm1,%xmm13,%xmm13
 | |
| +	vpmuludq 512(%rsp),%xmm2,%xmm1
 | |
| +	vpaddq %xmm1,%xmm14,%xmm14
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm2,%xmm2
 | |
| +	vpmuludq 560(%rsp),%xmm2,%xmm1
 | |
| +	vpaddq %xmm1,%xmm6,%xmm6
 | |
| +	vpmuludq 576(%rsp),%xmm2,%xmm1
 | |
| +	vpaddq %xmm1,%xmm5,%xmm5
 | |
| +	vpmuludq 624(%rsp),%xmm2,%xmm1
 | |
| +	vpaddq %xmm1,%xmm7,%xmm7
 | |
| +	vpmuludq 640(%rsp),%xmm2,%xmm1
 | |
| +	vpaddq %xmm1,%xmm8,%xmm8
 | |
| +	vpmuludq 688(%rsp),%xmm2,%xmm1
 | |
| +	vpaddq %xmm1,%xmm9,%xmm9
 | |
| +	vpmuludq 704(%rsp),%xmm2,%xmm1
 | |
| +	vpaddq %xmm1,%xmm10,%xmm10
 | |
| +	vpmuludq 736(%rsp),%xmm2,%xmm1
 | |
| +	vpaddq %xmm1,%xmm11,%xmm11
 | |
| +	vpmuludq 752(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm12,%xmm12
 | |
| +	vpmuludq 480(%rsp),%xmm0,%xmm1
 | |
| +	vpaddq %xmm1,%xmm14,%xmm14
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm0,%xmm0
 | |
| +	vpmuludq 528(%rsp),%xmm0,%xmm1
 | |
| +	vpaddq %xmm1,%xmm6,%xmm6
 | |
| +	vpmuludq 560(%rsp),%xmm0,%xmm1
 | |
| +	vpaddq %xmm1,%xmm5,%xmm5
 | |
| +	vpmuludq 592(%rsp),%xmm0,%xmm1
 | |
| +	vpaddq %xmm1,%xmm7,%xmm7
 | |
| +	vpmuludq 624(%rsp),%xmm0,%xmm1
 | |
| +	vpaddq %xmm1,%xmm8,%xmm8
 | |
| +	vpmuludq 656(%rsp),%xmm0,%xmm1
 | |
| +	vpaddq %xmm1,%xmm9,%xmm9
 | |
| +	vpmuludq 688(%rsp),%xmm0,%xmm1
 | |
| +	vpaddq %xmm1,%xmm10,%xmm10
 | |
| +	vpmuludq 720(%rsp),%xmm0,%xmm1
 | |
| +	vpaddq %xmm1,%xmm11,%xmm11
 | |
| +	vpmuludq 736(%rsp),%xmm0,%xmm1
 | |
| +	vpaddq %xmm1,%xmm12,%xmm12
 | |
| +	vpmuludq 768(%rsp),%xmm0,%xmm0
 | |
| +	vpaddq %xmm0,%xmm13,%xmm13
 | |
| +	vpsrlq $26,%xmm6,%xmm0
 | |
| +	vpaddq %xmm0,%xmm5,%xmm5
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm6,%xmm6
 | |
| +	vpsrlq $25,%xmm10,%xmm0
 | |
| +	vpaddq %xmm0,%xmm11,%xmm11
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10
 | |
| +	vpsrlq $25,%xmm5,%xmm0
 | |
| +	vpaddq %xmm0,%xmm7,%xmm7
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm5,%xmm5
 | |
| +	vpsrlq $26,%xmm11,%xmm0
 | |
| +	vpaddq %xmm0,%xmm12,%xmm12
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm11,%xmm11
 | |
| +	vpsrlq $26,%xmm7,%xmm0
 | |
| +	vpaddq %xmm0,%xmm8,%xmm8
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm7,%xmm7
 | |
| +	vpsrlq $25,%xmm12,%xmm0
 | |
| +	vpaddq %xmm0,%xmm13,%xmm13
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm12,%xmm12
 | |
| +	vpsrlq $25,%xmm8,%xmm0
 | |
| +	vpaddq %xmm0,%xmm9,%xmm9
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm8,%xmm8
 | |
| +	vpsrlq $26,%xmm13,%xmm0
 | |
| +	vpaddq %xmm0,%xmm14,%xmm14
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm13,%xmm13
 | |
| +	vpsrlq $26,%xmm9,%xmm0
 | |
| +	vpaddq %xmm0,%xmm10,%xmm10
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm9,%xmm9
 | |
| +	vpsrlq $25,%xmm14,%xmm0
 | |
| +	vpsllq $4,%xmm0,%xmm1
 | |
| +	vpaddq %xmm0,%xmm6,%xmm6
 | |
| +	vpsllq $1,%xmm0,%xmm0
 | |
| +	vpaddq %xmm0,%xmm1,%xmm1
 | |
| +	vpaddq %xmm1,%xmm6,%xmm6
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm14,%xmm14
 | |
| +	vpsrlq $25,%xmm10,%xmm0
 | |
| +	vpaddq %xmm0,%xmm11,%xmm11
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10
 | |
| +	vpsrlq $26,%xmm6,%xmm0
 | |
| +	vpaddq %xmm0,%xmm5,%xmm5
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm6,%xmm6
 | |
| +	vpunpckhqdq %xmm5,%xmm6,%xmm1
 | |
| +	vpunpcklqdq %xmm5,%xmm6,%xmm0
 | |
| +	vpunpckhqdq %xmm8,%xmm7,%xmm3
 | |
| +	vpunpcklqdq %xmm8,%xmm7,%xmm2
 | |
| +	vpunpckhqdq %xmm10,%xmm9,%xmm5
 | |
| +	vpunpcklqdq %xmm10,%xmm9,%xmm4
 | |
| +	vpunpckhqdq %xmm12,%xmm11,%xmm7
 | |
| +	vpunpcklqdq %xmm12,%xmm11,%xmm6
 | |
| +	vpunpckhqdq %xmm14,%xmm13,%xmm9
 | |
| +	vpunpcklqdq %xmm14,%xmm13,%xmm8
 | |
| +	cmp $0,%rdx
 | |
| +	jne .Lladder_loop
 | |
| +	vmovdqu %xmm1,160(%rdi)
 | |
| +	vmovdqu %xmm0,80(%rdi)
 | |
| +	vmovdqu %xmm3,176(%rdi)
 | |
| +	vmovdqu %xmm2,96(%rdi)
 | |
| +	vmovdqu %xmm5,192(%rdi)
 | |
| +	vmovdqu %xmm4,112(%rdi)
 | |
| +	vmovdqu %xmm7,208(%rdi)
 | |
| +	vmovdqu %xmm6,128(%rdi)
 | |
| +	vmovdqu %xmm9,224(%rdi)
 | |
| +	vmovdqu %xmm8,144(%rdi)
 | |
| +	movq 1824(%rsp),%r11
 | |
| +	movq 1832(%rsp),%r12
 | |
| +	movq 1840(%rsp),%r13
 | |
| +	movq 1848(%rsp),%r14
 | |
| +	add %r11,%rsp
 | |
| +	ret
 | |
| +ENDPROC(curve25519_sandy2x_ladder)
 | |
| +
 | |
| +.align 32
 | |
| +ENTRY(curve25519_sandy2x_ladder_base)
 | |
| +	mov %rsp,%r11
 | |
| +	and $31,%r11
 | |
| +	add $1568,%r11
 | |
| +	sub %r11,%rsp
 | |
| +	movq %r11,1536(%rsp)
 | |
| +	movq %r12,1544(%rsp)
 | |
| +	movq %r13,1552(%rsp)
 | |
| +	vmovdqa curve25519_sandy2x_v0_0(%rip),%xmm0
 | |
| +	vmovdqa curve25519_sandy2x_v1_0(%rip),%xmm1
 | |
| +	vmovdqa curve25519_sandy2x_v9_0(%rip),%xmm2
 | |
| +	vmovdqa %xmm2,0(%rsp)
 | |
| +	vmovdqa %xmm0,16(%rsp)
 | |
| +	vmovdqa %xmm0,32(%rsp)
 | |
| +	vmovdqa %xmm0,48(%rsp)
 | |
| +	vmovdqa %xmm0,64(%rsp)
 | |
| +	vmovdqa %xmm1,80(%rsp)
 | |
| +	vmovdqa %xmm0,96(%rsp)
 | |
| +	vmovdqa %xmm0,112(%rsp)
 | |
| +	vmovdqa %xmm0,128(%rsp)
 | |
| +	vmovdqa %xmm0,144(%rsp)
 | |
| +	vmovdqa %xmm1,%xmm0
 | |
| +	vpxor %xmm1,%xmm1,%xmm1
 | |
| +	vpxor %xmm2,%xmm2,%xmm2
 | |
| +	vpxor %xmm3,%xmm3,%xmm3
 | |
| +	vpxor %xmm4,%xmm4,%xmm4
 | |
| +	vpxor %xmm5,%xmm5,%xmm5
 | |
| +	vpxor %xmm6,%xmm6,%xmm6
 | |
| +	vpxor %xmm7,%xmm7,%xmm7
 | |
| +	vpxor %xmm8,%xmm8,%xmm8
 | |
| +	vpxor %xmm9,%xmm9,%xmm9
 | |
| +	movq 0(%rsi),%rdx
 | |
| +	movq 8(%rsi),%rcx
 | |
| +	movq 16(%rsi),%r8
 | |
| +	movq 24(%rsi),%r9
 | |
| +	shrd $1,%rcx,%rdx
 | |
| +	shrd $1,%r8,%rcx
 | |
| +	shrd $1,%r9,%r8
 | |
| +	shr $1,%r9
 | |
| +	xorq 0(%rsi),%rdx
 | |
| +	xorq 8(%rsi),%rcx
 | |
| +	xorq 16(%rsi),%r8
 | |
| +	xorq 24(%rsi),%r9
 | |
| +	leaq 512(%rsp),%rsi
 | |
| +	mov $64,%rax
 | |
| +
 | |
| +	.align 16
 | |
| +	.Lladder_base_small_loop:
 | |
| +	mov %rdx,%r10
 | |
| +	mov %rcx,%r11
 | |
| +	mov %r8,%r12
 | |
| +	mov %r9,%r13
 | |
| +	shr $1,%rdx
 | |
| +	shr $1,%rcx
 | |
| +	shr $1,%r8
 | |
| +	shr $1,%r9
 | |
| +	and $1,%r10d
 | |
| +	and $1,%r11d
 | |
| +	and $1,%r12d
 | |
| +	and $1,%r13d
 | |
| +	neg %r10
 | |
| +	neg %r11
 | |
| +	neg %r12
 | |
| +	neg %r13
 | |
| +	movl %r10d,0(%rsi)
 | |
| +	movl %r11d,256(%rsi)
 | |
| +	movl %r12d,512(%rsi)
 | |
| +	movl %r13d,768(%rsi)
 | |
| +	add $4,%rsi
 | |
| +	sub $1,%rax
 | |
| +	jne .Lladder_base_small_loop
 | |
| +	mov $255,%rdx
 | |
| +	add $760,%rsi
 | |
| +
 | |
| +	.align 16
 | |
| +	.Lladder_base_loop:
 | |
| +	sub $1,%rdx
 | |
| +	vbroadcastss 0(%rsi),%xmm10
 | |
| +	sub $4,%rsi
 | |
| +	vmovdqa 0(%rsp),%xmm11
 | |
| +	vmovdqa 80(%rsp),%xmm12
 | |
| +	vpxor %xmm11,%xmm0,%xmm13
 | |
| +	vpand %xmm10,%xmm13,%xmm13
 | |
| +	vpxor %xmm13,%xmm0,%xmm0
 | |
| +	vpxor %xmm13,%xmm11,%xmm11
 | |
| +	vpxor %xmm12,%xmm1,%xmm13
 | |
| +	vpand %xmm10,%xmm13,%xmm13
 | |
| +	vpxor %xmm13,%xmm1,%xmm1
 | |
| +	vpxor %xmm13,%xmm12,%xmm12
 | |
| +	vmovdqa 16(%rsp),%xmm13
 | |
| +	vmovdqa 96(%rsp),%xmm14
 | |
| +	vpxor %xmm13,%xmm2,%xmm15
 | |
| +	vpand %xmm10,%xmm15,%xmm15
 | |
| +	vpxor %xmm15,%xmm2,%xmm2
 | |
| +	vpxor %xmm15,%xmm13,%xmm13
 | |
| +	vpxor %xmm14,%xmm3,%xmm15
 | |
| +	vpand %xmm10,%xmm15,%xmm15
 | |
| +	vpxor %xmm15,%xmm3,%xmm3
 | |
| +	vpxor %xmm15,%xmm14,%xmm14
 | |
| +	vmovdqa %xmm13,0(%rsp)
 | |
| +	vmovdqa %xmm14,16(%rsp)
 | |
| +	vmovdqa 32(%rsp),%xmm13
 | |
| +	vmovdqa 112(%rsp),%xmm14
 | |
| +	vpxor %xmm13,%xmm4,%xmm15
 | |
| +	vpand %xmm10,%xmm15,%xmm15
 | |
| +	vpxor %xmm15,%xmm4,%xmm4
 | |
| +	vpxor %xmm15,%xmm13,%xmm13
 | |
| +	vpxor %xmm14,%xmm5,%xmm15
 | |
| +	vpand %xmm10,%xmm15,%xmm15
 | |
| +	vpxor %xmm15,%xmm5,%xmm5
 | |
| +	vpxor %xmm15,%xmm14,%xmm14
 | |
| +	vmovdqa %xmm13,32(%rsp)
 | |
| +	vmovdqa %xmm14,80(%rsp)
 | |
| +	vmovdqa 48(%rsp),%xmm13
 | |
| +	vmovdqa 128(%rsp),%xmm14
 | |
| +	vpxor %xmm13,%xmm6,%xmm15
 | |
| +	vpand %xmm10,%xmm15,%xmm15
 | |
| +	vpxor %xmm15,%xmm6,%xmm6
 | |
| +	vpxor %xmm15,%xmm13,%xmm13
 | |
| +	vpxor %xmm14,%xmm7,%xmm15
 | |
| +	vpand %xmm10,%xmm15,%xmm15
 | |
| +	vpxor %xmm15,%xmm7,%xmm7
 | |
| +	vpxor %xmm15,%xmm14,%xmm14
 | |
| +	vmovdqa %xmm13,48(%rsp)
 | |
| +	vmovdqa %xmm14,96(%rsp)
 | |
| +	vmovdqa 64(%rsp),%xmm13
 | |
| +	vmovdqa 144(%rsp),%xmm14
 | |
| +	vpxor %xmm13,%xmm8,%xmm15
 | |
| +	vpand %xmm10,%xmm15,%xmm15
 | |
| +	vpxor %xmm15,%xmm8,%xmm8
 | |
| +	vpxor %xmm15,%xmm13,%xmm13
 | |
| +	vpxor %xmm14,%xmm9,%xmm15
 | |
| +	vpand %xmm10,%xmm15,%xmm15
 | |
| +	vpxor %xmm15,%xmm9,%xmm9
 | |
| +	vpxor %xmm15,%xmm14,%xmm14
 | |
| +	vmovdqa %xmm13,64(%rsp)
 | |
| +	vmovdqa %xmm14,112(%rsp)
 | |
| +	vpaddq curve25519_sandy2x_subc0(%rip),%xmm11,%xmm10
 | |
| +	vpsubq %xmm12,%xmm10,%xmm10
 | |
| +	vpaddq %xmm12,%xmm11,%xmm11
 | |
| +	vpunpckhqdq %xmm10,%xmm11,%xmm12
 | |
| +	vpunpcklqdq %xmm10,%xmm11,%xmm10
 | |
| +	vpaddq %xmm1,%xmm0,%xmm11
 | |
| +	vpaddq curve25519_sandy2x_subc0(%rip),%xmm0,%xmm0
 | |
| +	vpsubq %xmm1,%xmm0,%xmm0
 | |
| +	vpunpckhqdq %xmm11,%xmm0,%xmm1
 | |
| +	vpunpcklqdq %xmm11,%xmm0,%xmm0
 | |
| +	vpmuludq %xmm0,%xmm10,%xmm11
 | |
| +	vpmuludq %xmm1,%xmm10,%xmm13
 | |
| +	vmovdqa %xmm1,128(%rsp)
 | |
| +	vpaddq %xmm1,%xmm1,%xmm1
 | |
| +	vpmuludq %xmm0,%xmm12,%xmm14
 | |
| +	vmovdqa %xmm0,144(%rsp)
 | |
| +	vpaddq %xmm14,%xmm13,%xmm13
 | |
| +	vpmuludq %xmm1,%xmm12,%xmm0
 | |
| +	vmovdqa %xmm1,160(%rsp)
 | |
| +	vpaddq %xmm3,%xmm2,%xmm1
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm2,%xmm2
 | |
| +	vpsubq %xmm3,%xmm2,%xmm2
 | |
| +	vpunpckhqdq %xmm1,%xmm2,%xmm3
 | |
| +	vpunpcklqdq %xmm1,%xmm2,%xmm1
 | |
| +	vpmuludq %xmm1,%xmm10,%xmm2
 | |
| +	vpaddq %xmm2,%xmm0,%xmm0
 | |
| +	vpmuludq %xmm3,%xmm10,%xmm2
 | |
| +	vmovdqa %xmm3,176(%rsp)
 | |
| +	vpaddq %xmm3,%xmm3,%xmm3
 | |
| +	vpmuludq %xmm1,%xmm12,%xmm14
 | |
| +	vmovdqa %xmm1,192(%rsp)
 | |
| +	vpaddq %xmm14,%xmm2,%xmm2
 | |
| +	vpmuludq %xmm3,%xmm12,%xmm1
 | |
| +	vmovdqa %xmm3,208(%rsp)
 | |
| +	vpaddq %xmm5,%xmm4,%xmm3
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm4,%xmm4
 | |
| +	vpsubq %xmm5,%xmm4,%xmm4
 | |
| +	vpunpckhqdq %xmm3,%xmm4,%xmm5
 | |
| +	vpunpcklqdq %xmm3,%xmm4,%xmm3
 | |
| +	vpmuludq %xmm3,%xmm10,%xmm4
 | |
| +	vpaddq %xmm4,%xmm1,%xmm1
 | |
| +	vpmuludq %xmm5,%xmm10,%xmm4
 | |
| +	vmovdqa %xmm5,224(%rsp)
 | |
| +	vpaddq %xmm5,%xmm5,%xmm5
 | |
| +	vpmuludq %xmm3,%xmm12,%xmm14
 | |
| +	vmovdqa %xmm3,240(%rsp)
 | |
| +	vpaddq %xmm14,%xmm4,%xmm4
 | |
| +	vpaddq %xmm7,%xmm6,%xmm3
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm6,%xmm6
 | |
| +	vpsubq %xmm7,%xmm6,%xmm6
 | |
| +	vpunpckhqdq %xmm3,%xmm6,%xmm7
 | |
| +	vpunpcklqdq %xmm3,%xmm6,%xmm3
 | |
| +	vpmuludq %xmm3,%xmm10,%xmm6
 | |
| +	vpmuludq %xmm5,%xmm12,%xmm14
 | |
| +	vmovdqa %xmm5,256(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm5,%xmm5
 | |
| +	vmovdqa %xmm5,272(%rsp)
 | |
| +	vpaddq %xmm14,%xmm6,%xmm6
 | |
| +	vpmuludq %xmm7,%xmm10,%xmm5
 | |
| +	vmovdqa %xmm7,288(%rsp)
 | |
| +	vpaddq %xmm7,%xmm7,%xmm7
 | |
| +	vpmuludq %xmm3,%xmm12,%xmm14
 | |
| +	vmovdqa %xmm3,304(%rsp)
 | |
| +	vpaddq %xmm14,%xmm5,%xmm5
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3
 | |
| +	vmovdqa %xmm3,320(%rsp)
 | |
| +	vpaddq %xmm9,%xmm8,%xmm3
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm8,%xmm8
 | |
| +	vpsubq %xmm9,%xmm8,%xmm8
 | |
| +	vpunpckhqdq %xmm3,%xmm8,%xmm9
 | |
| +	vpunpcklqdq %xmm3,%xmm8,%xmm3
 | |
| +	vmovdqa %xmm3,336(%rsp)
 | |
| +	vpmuludq %xmm7,%xmm12,%xmm8
 | |
| +	vmovdqa %xmm7,352(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm7,%xmm7
 | |
| +	vmovdqa %xmm7,368(%rsp)
 | |
| +	vpmuludq %xmm3,%xmm10,%xmm7
 | |
| +	vpaddq %xmm7,%xmm8,%xmm8
 | |
| +	vpmuludq %xmm9,%xmm10,%xmm7
 | |
| +	vmovdqa %xmm9,384(%rsp)
 | |
| +	vpaddq %xmm9,%xmm9,%xmm9
 | |
| +	vpmuludq %xmm3,%xmm12,%xmm10
 | |
| +	vpaddq %xmm10,%xmm7,%xmm7
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3
 | |
| +	vmovdqa %xmm3,400(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm12,%xmm12
 | |
| +	vpmuludq %xmm9,%xmm12,%xmm3
 | |
| +	vmovdqa %xmm9,416(%rsp)
 | |
| +	vpaddq %xmm3,%xmm11,%xmm11
 | |
| +	vmovdqa 0(%rsp),%xmm3
 | |
| +	vmovdqa 16(%rsp),%xmm9
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm10
 | |
| +	vpsubq %xmm9,%xmm10,%xmm10
 | |
| +	vpaddq %xmm9,%xmm3,%xmm3
 | |
| +	vpunpckhqdq %xmm10,%xmm3,%xmm9
 | |
| +	vpunpcklqdq %xmm10,%xmm3,%xmm3
 | |
| +	vpmuludq 144(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm0,%xmm0
 | |
| +	vpmuludq 128(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm2,%xmm2
 | |
| +	vpmuludq 192(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm1,%xmm1
 | |
| +	vpmuludq 176(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm4,%xmm4
 | |
| +	vpmuludq 240(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm6,%xmm6
 | |
| +	vpmuludq 224(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm5,%xmm5
 | |
| +	vpmuludq 304(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm8,%xmm8
 | |
| +	vpmuludq 288(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm7,%xmm7
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3
 | |
| +	vpmuludq 336(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm11,%xmm11
 | |
| +	vpmuludq 384(%rsp),%xmm3,%xmm3
 | |
| +	vpaddq %xmm3,%xmm13,%xmm13
 | |
| +	vpmuludq 144(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm2,%xmm2
 | |
| +	vpmuludq 160(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm1,%xmm1
 | |
| +	vpmuludq 192(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm4,%xmm4
 | |
| +	vpmuludq 208(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm6,%xmm6
 | |
| +	vpmuludq 240(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm5,%xmm5
 | |
| +	vpmuludq 256(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm8,%xmm8
 | |
| +	vpmuludq 304(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm7,%xmm7
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm9,%xmm9
 | |
| +	vpmuludq 352(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm11,%xmm11
 | |
| +	vpmuludq 336(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm13,%xmm13
 | |
| +	vpmuludq 416(%rsp),%xmm9,%xmm9
 | |
| +	vpaddq %xmm9,%xmm0,%xmm0
 | |
| +	vmovdqa 32(%rsp),%xmm3
 | |
| +	vmovdqa 80(%rsp),%xmm9
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm10
 | |
| +	vpsubq %xmm9,%xmm10,%xmm10
 | |
| +	vpaddq %xmm9,%xmm3,%xmm3
 | |
| +	vpunpckhqdq %xmm10,%xmm3,%xmm9
 | |
| +	vpunpcklqdq %xmm10,%xmm3,%xmm3
 | |
| +	vpmuludq 144(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm1,%xmm1
 | |
| +	vpmuludq 128(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm4,%xmm4
 | |
| +	vpmuludq 192(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm6,%xmm6
 | |
| +	vpmuludq 176(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm5,%xmm5
 | |
| +	vpmuludq 240(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm8,%xmm8
 | |
| +	vpmuludq 224(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm7,%xmm7
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3
 | |
| +	vpmuludq 304(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm11,%xmm11
 | |
| +	vpmuludq 288(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm13,%xmm13
 | |
| +	vpmuludq 336(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm0,%xmm0
 | |
| +	vpmuludq 384(%rsp),%xmm3,%xmm3
 | |
| +	vpaddq %xmm3,%xmm2,%xmm2
 | |
| +	vpmuludq 144(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm4,%xmm4
 | |
| +	vpmuludq 160(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm6,%xmm6
 | |
| +	vpmuludq 192(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm5,%xmm5
 | |
| +	vpmuludq 208(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm8,%xmm8
 | |
| +	vpmuludq 240(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm7,%xmm7
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm9,%xmm9
 | |
| +	vpmuludq 256(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm11,%xmm11
 | |
| +	vpmuludq 304(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm13,%xmm13
 | |
| +	vpmuludq 352(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm0,%xmm0
 | |
| +	vpmuludq 336(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm2,%xmm2
 | |
| +	vpmuludq 416(%rsp),%xmm9,%xmm9
 | |
| +	vpaddq %xmm9,%xmm1,%xmm1
 | |
| +	vmovdqa 48(%rsp),%xmm3
 | |
| +	vmovdqa 96(%rsp),%xmm9
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm10
 | |
| +	vpsubq %xmm9,%xmm10,%xmm10
 | |
| +	vpaddq %xmm9,%xmm3,%xmm3
 | |
| +	vpunpckhqdq %xmm10,%xmm3,%xmm9
 | |
| +	vpunpcklqdq %xmm10,%xmm3,%xmm3
 | |
| +	vpmuludq 144(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm6,%xmm6
 | |
| +	vpmuludq 128(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm5,%xmm5
 | |
| +	vpmuludq 192(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm8,%xmm8
 | |
| +	vpmuludq 176(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm7,%xmm7
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3
 | |
| +	vpmuludq 240(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm11,%xmm11
 | |
| +	vpmuludq 224(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm13,%xmm13
 | |
| +	vpmuludq 304(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm0,%xmm0
 | |
| +	vpmuludq 288(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm2,%xmm2
 | |
| +	vpmuludq 336(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm1,%xmm1
 | |
| +	vpmuludq 384(%rsp),%xmm3,%xmm3
 | |
| +	vpaddq %xmm3,%xmm4,%xmm4
 | |
| +	vpmuludq 144(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm5,%xmm5
 | |
| +	vpmuludq 160(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm8,%xmm8
 | |
| +	vpmuludq 192(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm7,%xmm7
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm9,%xmm9
 | |
| +	vpmuludq 208(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm11,%xmm11
 | |
| +	vpmuludq 240(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm13,%xmm13
 | |
| +	vpmuludq 256(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm0,%xmm0
 | |
| +	vpmuludq 304(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm2,%xmm2
 | |
| +	vpmuludq 352(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm1,%xmm1
 | |
| +	vpmuludq 336(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm4,%xmm4
 | |
| +	vpmuludq 416(%rsp),%xmm9,%xmm9
 | |
| +	vpaddq %xmm9,%xmm6,%xmm6
 | |
| +	vmovdqa 64(%rsp),%xmm3
 | |
| +	vmovdqa 112(%rsp),%xmm9
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm10
 | |
| +	vpsubq %xmm9,%xmm10,%xmm10
 | |
| +	vpaddq %xmm9,%xmm3,%xmm3
 | |
| +	vpunpckhqdq %xmm10,%xmm3,%xmm9
 | |
| +	vpunpcklqdq %xmm10,%xmm3,%xmm3
 | |
| +	vpmuludq 144(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm8,%xmm8
 | |
| +	vpmuludq 128(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm7,%xmm7
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm3,%xmm3
 | |
| +	vpmuludq 192(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm11,%xmm11
 | |
| +	vpmuludq 176(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm13,%xmm13
 | |
| +	vpmuludq 240(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm0,%xmm0
 | |
| +	vpmuludq 224(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm2,%xmm2
 | |
| +	vpmuludq 304(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm1,%xmm1
 | |
| +	vpmuludq 288(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm4,%xmm4
 | |
| +	vpmuludq 336(%rsp),%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm6,%xmm6
 | |
| +	vpmuludq 384(%rsp),%xmm3,%xmm3
 | |
| +	vpaddq %xmm3,%xmm5,%xmm5
 | |
| +	vpmuludq 144(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm7,%xmm7
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm9,%xmm9
 | |
| +	vpmuludq 160(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm11,%xmm11
 | |
| +	vpmuludq 192(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm13,%xmm13
 | |
| +	vpmuludq 208(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm0,%xmm0
 | |
| +	vpmuludq 240(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm2,%xmm2
 | |
| +	vpmuludq 256(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm1,%xmm1
 | |
| +	vpmuludq 304(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm4,%xmm4
 | |
| +	vpmuludq 352(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm6,%xmm6
 | |
| +	vpmuludq 336(%rsp),%xmm9,%xmm3
 | |
| +	vpaddq %xmm3,%xmm5,%xmm5
 | |
| +	vpmuludq 416(%rsp),%xmm9,%xmm9
 | |
| +	vpaddq %xmm9,%xmm8,%xmm8
 | |
| +	vpsrlq $25,%xmm4,%xmm3
 | |
| +	vpaddq %xmm3,%xmm6,%xmm6
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm4,%xmm4
 | |
| +	vpsrlq $26,%xmm11,%xmm3
 | |
| +	vpaddq %xmm3,%xmm13,%xmm13
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm11,%xmm11
 | |
| +	vpsrlq $26,%xmm6,%xmm3
 | |
| +	vpaddq %xmm3,%xmm5,%xmm5
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm6,%xmm6
 | |
| +	vpsrlq $25,%xmm13,%xmm3
 | |
| +	vpaddq %xmm3,%xmm0,%xmm0
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm13,%xmm13
 | |
| +	vpsrlq $25,%xmm5,%xmm3
 | |
| +	vpaddq %xmm3,%xmm8,%xmm8
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm5,%xmm5
 | |
| +	vpsrlq $26,%xmm0,%xmm3
 | |
| +	vpaddq %xmm3,%xmm2,%xmm2
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm0,%xmm0
 | |
| +	vpsrlq $26,%xmm8,%xmm3
 | |
| +	vpaddq %xmm3,%xmm7,%xmm7
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm8,%xmm8
 | |
| +	vpsrlq $25,%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm1,%xmm1
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm2,%xmm2
 | |
| +	vpsrlq $25,%xmm7,%xmm3
 | |
| +	vpsllq $4,%xmm3,%xmm9
 | |
| +	vpaddq %xmm3,%xmm11,%xmm11
 | |
| +	vpsllq $1,%xmm3,%xmm3
 | |
| +	vpaddq %xmm3,%xmm9,%xmm9
 | |
| +	vpaddq %xmm9,%xmm11,%xmm11
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm7,%xmm7
 | |
| +	vpsrlq $26,%xmm1,%xmm3
 | |
| +	vpaddq %xmm3,%xmm4,%xmm4
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm1,%xmm1
 | |
| +	vpsrlq $26,%xmm11,%xmm3
 | |
| +	vpaddq %xmm3,%xmm13,%xmm13
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm11,%xmm11
 | |
| +	vpsrlq $25,%xmm4,%xmm3
 | |
| +	vpaddq %xmm3,%xmm6,%xmm6
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm4,%xmm4
 | |
| +	vpunpcklqdq %xmm13,%xmm11,%xmm3
 | |
| +	vpunpckhqdq %xmm13,%xmm11,%xmm9
 | |
| +	vpaddq curve25519_sandy2x_subc0(%rip),%xmm9,%xmm10
 | |
| +	vpsubq %xmm3,%xmm10,%xmm10
 | |
| +	vpaddq %xmm9,%xmm3,%xmm3
 | |
| +	vpunpckhqdq %xmm3,%xmm10,%xmm9
 | |
| +	vpunpcklqdq %xmm3,%xmm10,%xmm10
 | |
| +	vpmuludq %xmm10,%xmm10,%xmm3
 | |
| +	vpaddq %xmm10,%xmm10,%xmm10
 | |
| +	vpmuludq %xmm9,%xmm10,%xmm11
 | |
| +	vpunpcklqdq %xmm2,%xmm0,%xmm12
 | |
| +	vpunpckhqdq %xmm2,%xmm0,%xmm0
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm0,%xmm2
 | |
| +	vpsubq %xmm12,%xmm2,%xmm2
 | |
| +	vpaddq %xmm0,%xmm12,%xmm12
 | |
| +	vpunpckhqdq %xmm12,%xmm2,%xmm0
 | |
| +	vpunpcklqdq %xmm12,%xmm2,%xmm2
 | |
| +	vpmuludq %xmm2,%xmm10,%xmm12
 | |
| +	vpaddq %xmm9,%xmm9,%xmm13
 | |
| +	vpmuludq %xmm13,%xmm9,%xmm9
 | |
| +	vpaddq %xmm9,%xmm12,%xmm12
 | |
| +	vpmuludq %xmm0,%xmm10,%xmm9
 | |
| +	vpmuludq %xmm2,%xmm13,%xmm14
 | |
| +	vpaddq %xmm14,%xmm9,%xmm9
 | |
| +	vpunpcklqdq %xmm4,%xmm1,%xmm14
 | |
| +	vpunpckhqdq %xmm4,%xmm1,%xmm1
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm1,%xmm4
 | |
| +	vpsubq %xmm14,%xmm4,%xmm4
 | |
| +	vpaddq %xmm1,%xmm14,%xmm14
 | |
| +	vpunpckhqdq %xmm14,%xmm4,%xmm1
 | |
| +	vpunpcklqdq %xmm14,%xmm4,%xmm4
 | |
| +	vmovdqa %xmm1,0(%rsp)
 | |
| +	vpaddq %xmm1,%xmm1,%xmm1
 | |
| +	vmovdqa %xmm1,16(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1
 | |
| +	vmovdqa %xmm1,32(%rsp)
 | |
| +	vpmuludq %xmm4,%xmm10,%xmm1
 | |
| +	vpmuludq %xmm2,%xmm2,%xmm14
 | |
| +	vpaddq %xmm14,%xmm1,%xmm1
 | |
| +	vpmuludq 0(%rsp),%xmm10,%xmm14
 | |
| +	vpmuludq %xmm4,%xmm13,%xmm15
 | |
| +	vpaddq %xmm15,%xmm14,%xmm14
 | |
| +	vpunpcklqdq %xmm5,%xmm6,%xmm15
 | |
| +	vpunpckhqdq %xmm5,%xmm6,%xmm5
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm5,%xmm6
 | |
| +	vpsubq %xmm15,%xmm6,%xmm6
 | |
| +	vpaddq %xmm5,%xmm15,%xmm15
 | |
| +	vpunpckhqdq %xmm15,%xmm6,%xmm5
 | |
| +	vpunpcklqdq %xmm15,%xmm6,%xmm6
 | |
| +	vmovdqa %xmm6,48(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm6,%xmm6
 | |
| +	vmovdqa %xmm6,64(%rsp)
 | |
| +	vmovdqa %xmm5,80(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v38_38(%rip),%xmm5,%xmm5
 | |
| +	vmovdqa %xmm5,96(%rsp)
 | |
| +	vpmuludq 48(%rsp),%xmm10,%xmm5
 | |
| +	vpaddq %xmm0,%xmm0,%xmm6
 | |
| +	vpmuludq %xmm6,%xmm0,%xmm0
 | |
| +	vpaddq %xmm0,%xmm5,%xmm5
 | |
| +	vpmuludq 80(%rsp),%xmm10,%xmm0
 | |
| +	vpmuludq %xmm4,%xmm6,%xmm15
 | |
| +	vpaddq %xmm15,%xmm0,%xmm0
 | |
| +	vpmuludq %xmm6,%xmm13,%xmm15
 | |
| +	vpaddq %xmm15,%xmm1,%xmm1
 | |
| +	vpmuludq %xmm6,%xmm2,%xmm15
 | |
| +	vpaddq %xmm15,%xmm14,%xmm14
 | |
| +	vpunpcklqdq %xmm7,%xmm8,%xmm15
 | |
| +	vpunpckhqdq %xmm7,%xmm8,%xmm7
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm7,%xmm8
 | |
| +	vpsubq %xmm15,%xmm8,%xmm8
 | |
| +	vpaddq %xmm7,%xmm15,%xmm15
 | |
| +	vpunpckhqdq %xmm15,%xmm8,%xmm7
 | |
| +	vpunpcklqdq %xmm15,%xmm8,%xmm8
 | |
| +	vmovdqa %xmm8,112(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm8,%xmm8
 | |
| +	vmovdqa %xmm8,160(%rsp)
 | |
| +	vpmuludq 112(%rsp),%xmm10,%xmm8
 | |
| +	vpmuludq %xmm7,%xmm10,%xmm10
 | |
| +	vpmuludq curve25519_sandy2x_v38_38(%rip),%xmm7,%xmm15
 | |
| +	vpmuludq %xmm15,%xmm7,%xmm7
 | |
| +	vpaddq %xmm7,%xmm8,%xmm8
 | |
| +	vpmuludq %xmm15,%xmm13,%xmm7
 | |
| +	vpaddq %xmm7,%xmm3,%xmm3
 | |
| +	vpmuludq %xmm15,%xmm2,%xmm7
 | |
| +	vpaddq %xmm7,%xmm11,%xmm11
 | |
| +	vpmuludq 80(%rsp),%xmm13,%xmm7
 | |
| +	vpaddq %xmm7,%xmm7,%xmm7
 | |
| +	vpaddq %xmm7,%xmm8,%xmm8
 | |
| +	vpmuludq 16(%rsp),%xmm13,%xmm7
 | |
| +	vpaddq %xmm7,%xmm5,%xmm5
 | |
| +	vpmuludq 48(%rsp),%xmm13,%xmm7
 | |
| +	vpaddq %xmm7,%xmm0,%xmm0
 | |
| +	vpmuludq 112(%rsp),%xmm13,%xmm7
 | |
| +	vpaddq %xmm7,%xmm10,%xmm10
 | |
| +	vpmuludq %xmm15,%xmm6,%xmm7
 | |
| +	vpaddq %xmm7,%xmm12,%xmm12
 | |
| +	vpmuludq %xmm15,%xmm4,%xmm7
 | |
| +	vpaddq %xmm7,%xmm9,%xmm9
 | |
| +	vpaddq %xmm2,%xmm2,%xmm2
 | |
| +	vpmuludq %xmm4,%xmm2,%xmm7
 | |
| +	vpaddq %xmm7,%xmm5,%xmm5
 | |
| +	vpmuludq 160(%rsp),%xmm2,%xmm7
 | |
| +	vpaddq %xmm7,%xmm3,%xmm3
 | |
| +	vpmuludq 160(%rsp),%xmm6,%xmm7
 | |
| +	vpaddq %xmm7,%xmm11,%xmm11
 | |
| +	vpmuludq 0(%rsp),%xmm2,%xmm7
 | |
| +	vpaddq %xmm7,%xmm0,%xmm0
 | |
| +	vpmuludq 48(%rsp),%xmm2,%xmm7
 | |
| +	vpaddq %xmm7,%xmm8,%xmm8
 | |
| +	vpmuludq 80(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm10,%xmm10
 | |
| +	vpmuludq 96(%rsp),%xmm4,%xmm2
 | |
| +	vpaddq %xmm2,%xmm11,%xmm11
 | |
| +	vpmuludq %xmm4,%xmm4,%xmm2
 | |
| +	vpaddq %xmm2,%xmm8,%xmm8
 | |
| +	vpaddq %xmm4,%xmm4,%xmm2
 | |
| +	vpmuludq 160(%rsp),%xmm2,%xmm4
 | |
| +	vpaddq %xmm4,%xmm12,%xmm12
 | |
| +	vpmuludq 16(%rsp),%xmm15,%xmm4
 | |
| +	vpaddq %xmm4,%xmm1,%xmm1
 | |
| +	vpmuludq 48(%rsp),%xmm15,%xmm4
 | |
| +	vpaddq %xmm4,%xmm14,%xmm14
 | |
| +	vpmuludq 96(%rsp),%xmm6,%xmm4
 | |
| +	vpaddq %xmm4,%xmm3,%xmm3
 | |
| +	vmovdqa 16(%rsp),%xmm4
 | |
| +	vpmuludq 160(%rsp),%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm9,%xmm9
 | |
| +	vpmuludq 16(%rsp),%xmm6,%xmm4
 | |
| +	vpaddq %xmm4,%xmm8,%xmm8
 | |
| +	vpmuludq 48(%rsp),%xmm6,%xmm4
 | |
| +	vpaddq %xmm4,%xmm10,%xmm10
 | |
| +	vpmuludq 80(%rsp),%xmm15,%xmm4
 | |
| +	vpaddq %xmm4,%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm5,%xmm5
 | |
| +	vpmuludq 112(%rsp),%xmm15,%xmm4
 | |
| +	vpaddq %xmm4,%xmm0,%xmm0
 | |
| +	vmovdqa 48(%rsp),%xmm4
 | |
| +	vpaddq %xmm4,%xmm4,%xmm4
 | |
| +	vpmuludq 160(%rsp),%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm1,%xmm1
 | |
| +	vmovdqa 80(%rsp),%xmm4
 | |
| +	vpaddq %xmm4,%xmm4,%xmm4
 | |
| +	vpmuludq 160(%rsp),%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm14,%xmm14
 | |
| +	vpmuludq 64(%rsp),%xmm2,%xmm4
 | |
| +	vpaddq %xmm4,%xmm3,%xmm3
 | |
| +	vmovdqa 16(%rsp),%xmm4
 | |
| +	vpmuludq 64(%rsp),%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm11,%xmm11
 | |
| +	vmovdqa 16(%rsp),%xmm4
 | |
| +	vpmuludq 96(%rsp),%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm12,%xmm12
 | |
| +	vmovdqa 48(%rsp),%xmm4
 | |
| +	vpmuludq 96(%rsp),%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm9,%xmm9
 | |
| +	vpmuludq 0(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm10,%xmm10
 | |
| +	vmovdqa 32(%rsp),%xmm2
 | |
| +	vpmuludq 0(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm3,%xmm3
 | |
| +	vmovdqa 64(%rsp),%xmm2
 | |
| +	vpmuludq 48(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm12,%xmm12
 | |
| +	vmovdqa 96(%rsp),%xmm2
 | |
| +	vpmuludq 80(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm1,%xmm1
 | |
| +	vmovdqa 160(%rsp),%xmm2
 | |
| +	vpmuludq 112(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm5,%xmm5
 | |
| +	vpsrlq $26,%xmm3,%xmm2
 | |
| +	vpaddq %xmm2,%xmm11,%xmm11
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm3,%xmm3
 | |
| +	vpsrlq $25,%xmm14,%xmm2
 | |
| +	vpaddq %xmm2,%xmm5,%xmm5
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm14,%xmm14
 | |
| +	vpsrlq $25,%xmm11,%xmm2
 | |
| +	vpaddq %xmm2,%xmm12,%xmm12
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm11,%xmm11
 | |
| +	vpsrlq $26,%xmm5,%xmm2
 | |
| +	vpaddq %xmm2,%xmm0,%xmm0
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm5,%xmm5
 | |
| +	vpsrlq $26,%xmm12,%xmm2
 | |
| +	vpaddq %xmm2,%xmm9,%xmm9
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm12,%xmm12
 | |
| +	vpsrlq $25,%xmm0,%xmm2
 | |
| +	vpaddq %xmm2,%xmm8,%xmm8
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm0,%xmm0
 | |
| +	vpsrlq $25,%xmm9,%xmm2
 | |
| +	vpaddq %xmm2,%xmm1,%xmm1
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm9,%xmm9
 | |
| +	vpsrlq $26,%xmm8,%xmm2
 | |
| +	vpaddq %xmm2,%xmm10,%xmm10
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm8,%xmm8
 | |
| +	vpsrlq $26,%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm14,%xmm14
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm1,%xmm1
 | |
| +	vpsrlq $25,%xmm10,%xmm2
 | |
| +	vpsllq $4,%xmm2,%xmm4
 | |
| +	vpaddq %xmm2,%xmm3,%xmm3
 | |
| +	vpsllq $1,%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm3,%xmm3
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10
 | |
| +	vpsrlq $25,%xmm14,%xmm2
 | |
| +	vpaddq %xmm2,%xmm5,%xmm5
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm14,%xmm14
 | |
| +	vpsrlq $26,%xmm3,%xmm2
 | |
| +	vpaddq %xmm2,%xmm11,%xmm11
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm3,%xmm3
 | |
| +	vpunpckhqdq %xmm11,%xmm3,%xmm2
 | |
| +	vmovdqa %xmm2,0(%rsp)
 | |
| +	vpunpcklqdq %xmm11,%xmm3,%xmm2
 | |
| +	vpmuludq curve25519_sandy2x_v9_9(%rip),%xmm2,%xmm2
 | |
| +	vmovdqa %xmm2,80(%rsp)
 | |
| +	vpunpckhqdq %xmm9,%xmm12,%xmm2
 | |
| +	vmovdqa %xmm2,16(%rsp)
 | |
| +	vpunpcklqdq %xmm9,%xmm12,%xmm2
 | |
| +	vpmuludq curve25519_sandy2x_v9_9(%rip),%xmm2,%xmm2
 | |
| +	vmovdqa %xmm2,96(%rsp)
 | |
| +	vpunpckhqdq %xmm14,%xmm1,%xmm2
 | |
| +	vmovdqa %xmm2,32(%rsp)
 | |
| +	vpunpcklqdq %xmm14,%xmm1,%xmm1
 | |
| +	vpmuludq curve25519_sandy2x_v9_9(%rip),%xmm1,%xmm1
 | |
| +	vmovdqa %xmm1,112(%rsp)
 | |
| +	vpunpckhqdq %xmm0,%xmm5,%xmm1
 | |
| +	vmovdqa %xmm1,48(%rsp)
 | |
| +	vpunpcklqdq %xmm0,%xmm5,%xmm0
 | |
| +	vpmuludq curve25519_sandy2x_v9_9(%rip),%xmm0,%xmm0
 | |
| +	vmovdqa %xmm0,160(%rsp)
 | |
| +	vpunpckhqdq %xmm10,%xmm8,%xmm0
 | |
| +	vmovdqa %xmm0,64(%rsp)
 | |
| +	vpunpcklqdq %xmm10,%xmm8,%xmm0
 | |
| +	vpmuludq curve25519_sandy2x_v9_9(%rip),%xmm0,%xmm0
 | |
| +	vmovdqa %xmm0,208(%rsp)
 | |
| +	vmovdqa 144(%rsp),%xmm0
 | |
| +	vpmuludq %xmm0,%xmm0,%xmm1
 | |
| +	vpaddq %xmm0,%xmm0,%xmm0
 | |
| +	vmovdqa 128(%rsp),%xmm2
 | |
| +	vpmuludq %xmm2,%xmm0,%xmm3
 | |
| +	vmovdqa 192(%rsp),%xmm4
 | |
| +	vpmuludq %xmm4,%xmm0,%xmm5
 | |
| +	vmovdqa 176(%rsp),%xmm6
 | |
| +	vpmuludq %xmm6,%xmm0,%xmm7
 | |
| +	vmovdqa 240(%rsp),%xmm8
 | |
| +	vpmuludq %xmm8,%xmm0,%xmm9
 | |
| +	vpmuludq 224(%rsp),%xmm0,%xmm10
 | |
| +	vpmuludq 304(%rsp),%xmm0,%xmm11
 | |
| +	vpmuludq 288(%rsp),%xmm0,%xmm12
 | |
| +	vpmuludq 336(%rsp),%xmm0,%xmm13
 | |
| +	vmovdqa 384(%rsp),%xmm14
 | |
| +	vpmuludq %xmm14,%xmm0,%xmm0
 | |
| +	vpmuludq curve25519_sandy2x_v38_38(%rip),%xmm14,%xmm15
 | |
| +	vpmuludq %xmm15,%xmm14,%xmm14
 | |
| +	vpaddq %xmm14,%xmm13,%xmm13
 | |
| +	vpaddq %xmm6,%xmm6,%xmm14
 | |
| +	vpmuludq %xmm14,%xmm6,%xmm6
 | |
| +	vpaddq %xmm6,%xmm11,%xmm11
 | |
| +	vpaddq %xmm2,%xmm2,%xmm6
 | |
| +	vpmuludq %xmm6,%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm5,%xmm5
 | |
| +	vpmuludq %xmm15,%xmm6,%xmm2
 | |
| +	vpaddq %xmm2,%xmm1,%xmm1
 | |
| +	vpmuludq %xmm15,%xmm4,%xmm2
 | |
| +	vpaddq %xmm2,%xmm3,%xmm3
 | |
| +	vpmuludq 256(%rsp),%xmm6,%xmm2
 | |
| +	vpaddq %xmm2,%xmm11,%xmm11
 | |
| +	vpmuludq 304(%rsp),%xmm6,%xmm2
 | |
| +	vpaddq %xmm2,%xmm12,%xmm12
 | |
| +	vpmuludq 352(%rsp),%xmm6,%xmm2
 | |
| +	vpaddq %xmm2,%xmm13,%xmm13
 | |
| +	vpmuludq 336(%rsp),%xmm6,%xmm2
 | |
| +	vpaddq %xmm2,%xmm0,%xmm0
 | |
| +	vpmuludq %xmm4,%xmm6,%xmm2
 | |
| +	vpaddq %xmm2,%xmm7,%xmm7
 | |
| +	vpmuludq %xmm14,%xmm6,%xmm2
 | |
| +	vpaddq %xmm2,%xmm9,%xmm9
 | |
| +	vpmuludq %xmm8,%xmm6,%xmm2
 | |
| +	vpaddq %xmm2,%xmm10,%xmm10
 | |
| +	vpmuludq %xmm15,%xmm14,%xmm2
 | |
| +	vpaddq %xmm2,%xmm5,%xmm5
 | |
| +	vpmuludq %xmm15,%xmm8,%xmm2
 | |
| +	vpaddq %xmm2,%xmm7,%xmm7
 | |
| +	vpmuludq %xmm4,%xmm4,%xmm2
 | |
| +	vpaddq %xmm2,%xmm9,%xmm9
 | |
| +	vpmuludq %xmm14,%xmm4,%xmm2
 | |
| +	vpaddq %xmm2,%xmm10,%xmm10
 | |
| +	vpaddq %xmm4,%xmm4,%xmm2
 | |
| +	vpmuludq %xmm8,%xmm2,%xmm4
 | |
| +	vpaddq %xmm4,%xmm11,%xmm11
 | |
| +	vpmuludq 400(%rsp),%xmm2,%xmm4
 | |
| +	vpaddq %xmm4,%xmm1,%xmm1
 | |
| +	vpmuludq 400(%rsp),%xmm14,%xmm4
 | |
| +	vpaddq %xmm4,%xmm3,%xmm3
 | |
| +	vpmuludq 224(%rsp),%xmm2,%xmm4
 | |
| +	vpaddq %xmm4,%xmm12,%xmm12
 | |
| +	vpmuludq 304(%rsp),%xmm2,%xmm4
 | |
| +	vpaddq %xmm4,%xmm13,%xmm13
 | |
| +	vpmuludq 288(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm0,%xmm0
 | |
| +	vpmuludq 368(%rsp),%xmm8,%xmm2
 | |
| +	vpaddq %xmm2,%xmm3,%xmm3
 | |
| +	vpmuludq %xmm8,%xmm14,%xmm2
 | |
| +	vpaddq %xmm2,%xmm12,%xmm12
 | |
| +	vpmuludq %xmm8,%xmm8,%xmm2
 | |
| +	vpaddq %xmm2,%xmm13,%xmm13
 | |
| +	vpaddq %xmm8,%xmm8,%xmm2
 | |
| +	vpmuludq 400(%rsp),%xmm2,%xmm4
 | |
| +	vpaddq %xmm4,%xmm5,%xmm5
 | |
| +	vpmuludq 256(%rsp),%xmm15,%xmm4
 | |
| +	vpaddq %xmm4,%xmm9,%xmm9
 | |
| +	vpmuludq 304(%rsp),%xmm15,%xmm4
 | |
| +	vpaddq %xmm4,%xmm10,%xmm10
 | |
| +	vpmuludq 368(%rsp),%xmm14,%xmm4
 | |
| +	vpaddq %xmm4,%xmm1,%xmm1
 | |
| +	vmovdqa 256(%rsp),%xmm4
 | |
| +	vpmuludq 400(%rsp),%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm7,%xmm7
 | |
| +	vpmuludq 256(%rsp),%xmm14,%xmm4
 | |
| +	vpaddq %xmm4,%xmm13,%xmm13
 | |
| +	vpmuludq 304(%rsp),%xmm14,%xmm4
 | |
| +	vpaddq %xmm4,%xmm0,%xmm0
 | |
| +	vpmuludq 352(%rsp),%xmm15,%xmm4
 | |
| +	vpaddq %xmm4,%xmm11,%xmm11
 | |
| +	vpmuludq 336(%rsp),%xmm15,%xmm4
 | |
| +	vpaddq %xmm4,%xmm12,%xmm12
 | |
| +	vmovdqa 304(%rsp),%xmm4
 | |
| +	vpaddq %xmm4,%xmm4,%xmm4
 | |
| +	vpmuludq 400(%rsp),%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm9,%xmm9
 | |
| +	vpmuludq 320(%rsp),%xmm2,%xmm4
 | |
| +	vpaddq %xmm4,%xmm1,%xmm1
 | |
| +	vmovdqa 256(%rsp),%xmm4
 | |
| +	vpmuludq 320(%rsp),%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm3,%xmm3
 | |
| +	vmovdqa 256(%rsp),%xmm4
 | |
| +	vpmuludq 368(%rsp),%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm5,%xmm5
 | |
| +	vmovdqa 304(%rsp),%xmm4
 | |
| +	vpmuludq 368(%rsp),%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm7,%xmm7
 | |
| +	vmovdqa 352(%rsp),%xmm4
 | |
| +	vpmuludq 400(%rsp),%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm10,%xmm10
 | |
| +	vpmuludq 224(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm0,%xmm0
 | |
| +	vmovdqa 272(%rsp),%xmm2
 | |
| +	vpmuludq 224(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm1,%xmm1
 | |
| +	vmovdqa 320(%rsp),%xmm2
 | |
| +	vpmuludq 304(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm5,%xmm5
 | |
| +	vmovdqa 368(%rsp),%xmm2
 | |
| +	vpmuludq 288(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm9,%xmm9
 | |
| +	vmovdqa 400(%rsp),%xmm2
 | |
| +	vpmuludq 336(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm11,%xmm11
 | |
| +	vpsrlq $26,%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm3,%xmm3
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm1,%xmm1
 | |
| +	vpsrlq $25,%xmm10,%xmm2
 | |
| +	vpaddq %xmm2,%xmm11,%xmm11
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10
 | |
| +	vpsrlq $25,%xmm3,%xmm2
 | |
| +	vpaddq %xmm2,%xmm5,%xmm5
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm3,%xmm3
 | |
| +	vpsrlq $26,%xmm11,%xmm2
 | |
| +	vpaddq %xmm2,%xmm12,%xmm12
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm11,%xmm11
 | |
| +	vpsrlq $26,%xmm5,%xmm2
 | |
| +	vpaddq %xmm2,%xmm7,%xmm7
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm5,%xmm5
 | |
| +	vpsrlq $25,%xmm12,%xmm2
 | |
| +	vpaddq %xmm2,%xmm13,%xmm13
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm12,%xmm12
 | |
| +	vpsrlq $25,%xmm7,%xmm2
 | |
| +	vpaddq %xmm2,%xmm9,%xmm9
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm7,%xmm7
 | |
| +	vpsrlq $26,%xmm13,%xmm2
 | |
| +	vpaddq %xmm2,%xmm0,%xmm0
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm13,%xmm13
 | |
| +	vpsrlq $26,%xmm9,%xmm2
 | |
| +	vpaddq %xmm2,%xmm10,%xmm10
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm9,%xmm9
 | |
| +	vpsrlq $25,%xmm0,%xmm2
 | |
| +	vpsllq $4,%xmm2,%xmm4
 | |
| +	vpaddq %xmm2,%xmm1,%xmm1
 | |
| +	vpsllq $1,%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm4,%xmm4
 | |
| +	vpaddq %xmm4,%xmm1,%xmm1
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm0,%xmm0
 | |
| +	vpsrlq $25,%xmm10,%xmm2
 | |
| +	vpaddq %xmm2,%xmm11,%xmm11
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10
 | |
| +	vpsrlq $26,%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm3,%xmm3
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm1,%xmm1
 | |
| +	vpunpckhqdq %xmm3,%xmm1,%xmm2
 | |
| +	vpunpcklqdq %xmm3,%xmm1,%xmm1
 | |
| +	vmovdqa %xmm1,176(%rsp)
 | |
| +	vpaddq curve25519_sandy2x_subc0(%rip),%xmm2,%xmm3
 | |
| +	vpsubq %xmm1,%xmm3,%xmm3
 | |
| +	vpunpckhqdq %xmm3,%xmm2,%xmm1
 | |
| +	vpunpcklqdq %xmm3,%xmm2,%xmm2
 | |
| +	vmovdqa %xmm2,192(%rsp)
 | |
| +	vmovdqa %xmm1,224(%rsp)
 | |
| +	vpsllq $1,%xmm1,%xmm1
 | |
| +	vmovdqa %xmm1,240(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm3,%xmm3
 | |
| +	vmovdqa 80(%rsp),%xmm1
 | |
| +	vpunpcklqdq %xmm1,%xmm3,%xmm2
 | |
| +	vpunpckhqdq %xmm1,%xmm3,%xmm1
 | |
| +	vpunpckhqdq %xmm7,%xmm5,%xmm3
 | |
| +	vpunpcklqdq %xmm7,%xmm5,%xmm4
 | |
| +	vmovdqa %xmm4,256(%rsp)
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm3,%xmm5
 | |
| +	vpsubq %xmm4,%xmm5,%xmm5
 | |
| +	vpunpckhqdq %xmm5,%xmm3,%xmm4
 | |
| +	vpunpcklqdq %xmm5,%xmm3,%xmm3
 | |
| +	vmovdqa %xmm3,272(%rsp)
 | |
| +	vmovdqa %xmm4,288(%rsp)
 | |
| +	vpsllq $1,%xmm4,%xmm4
 | |
| +	vmovdqa %xmm4,304(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm5,%xmm5
 | |
| +	vmovdqa 96(%rsp),%xmm3
 | |
| +	vpunpcklqdq %xmm3,%xmm5,%xmm4
 | |
| +	vpunpckhqdq %xmm3,%xmm5,%xmm3
 | |
| +	vpunpckhqdq %xmm10,%xmm9,%xmm5
 | |
| +	vpunpcklqdq %xmm10,%xmm9,%xmm6
 | |
| +	vmovdqa %xmm6,320(%rsp)
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm5,%xmm7
 | |
| +	vpsubq %xmm6,%xmm7,%xmm7
 | |
| +	vpunpckhqdq %xmm7,%xmm5,%xmm6
 | |
| +	vpunpcklqdq %xmm7,%xmm5,%xmm5
 | |
| +	vmovdqa %xmm5,336(%rsp)
 | |
| +	vmovdqa %xmm6,352(%rsp)
 | |
| +	vpsllq $1,%xmm6,%xmm6
 | |
| +	vmovdqa %xmm6,368(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm7,%xmm7
 | |
| +	vmovdqa 112(%rsp),%xmm5
 | |
| +	vpunpcklqdq %xmm5,%xmm7,%xmm6
 | |
| +	vpunpckhqdq %xmm5,%xmm7,%xmm5
 | |
| +	vpunpckhqdq %xmm12,%xmm11,%xmm7
 | |
| +	vpunpcklqdq %xmm12,%xmm11,%xmm8
 | |
| +	vmovdqa %xmm8,384(%rsp)
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm7,%xmm9
 | |
| +	vpsubq %xmm8,%xmm9,%xmm9
 | |
| +	vpunpckhqdq %xmm9,%xmm7,%xmm8
 | |
| +	vpunpcklqdq %xmm9,%xmm7,%xmm7
 | |
| +	vmovdqa %xmm7,400(%rsp)
 | |
| +	vmovdqa %xmm8,416(%rsp)
 | |
| +	vpsllq $1,%xmm8,%xmm8
 | |
| +	vmovdqa %xmm8,432(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm9,%xmm9
 | |
| +	vmovdqa 160(%rsp),%xmm7
 | |
| +	vpunpcklqdq %xmm7,%xmm9,%xmm8
 | |
| +	vpunpckhqdq %xmm7,%xmm9,%xmm7
 | |
| +	vpunpckhqdq %xmm0,%xmm13,%xmm9
 | |
| +	vpunpcklqdq %xmm0,%xmm13,%xmm0
 | |
| +	vmovdqa %xmm0,160(%rsp)
 | |
| +	vpaddq curve25519_sandy2x_subc2(%rip),%xmm9,%xmm10
 | |
| +	vpsubq %xmm0,%xmm10,%xmm10
 | |
| +	vpunpckhqdq %xmm10,%xmm9,%xmm0
 | |
| +	vpunpcklqdq %xmm10,%xmm9,%xmm9
 | |
| +	vmovdqa %xmm9,448(%rsp)
 | |
| +	vmovdqa %xmm0,464(%rsp)
 | |
| +	vpsllq $1,%xmm0,%xmm0
 | |
| +	vmovdqa %xmm0,480(%rsp)
 | |
| +	vpmuludq curve25519_sandy2x_v121666_121666(%rip),%xmm10,%xmm10
 | |
| +	vmovdqa 208(%rsp),%xmm0
 | |
| +	vpunpcklqdq %xmm0,%xmm10,%xmm9
 | |
| +	vpunpckhqdq %xmm0,%xmm10,%xmm0
 | |
| +	vpsrlq $26,%xmm2,%xmm10
 | |
| +	vpaddq %xmm10,%xmm1,%xmm1
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm2,%xmm2
 | |
| +	vpsrlq $25,%xmm5,%xmm10
 | |
| +	vpaddq %xmm10,%xmm8,%xmm8
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm5,%xmm5
 | |
| +	vpsrlq $25,%xmm1,%xmm10
 | |
| +	vpaddq %xmm10,%xmm4,%xmm4
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm1,%xmm1
 | |
| +	vpsrlq $26,%xmm8,%xmm10
 | |
| +	vpaddq %xmm10,%xmm7,%xmm7
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm8,%xmm8
 | |
| +	vpsrlq $26,%xmm4,%xmm10
 | |
| +	vpaddq %xmm10,%xmm3,%xmm3
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm4,%xmm4
 | |
| +	vpsrlq $25,%xmm7,%xmm10
 | |
| +	vpaddq %xmm10,%xmm9,%xmm9
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm7,%xmm7
 | |
| +	vpsrlq $25,%xmm3,%xmm10
 | |
| +	vpaddq %xmm10,%xmm6,%xmm6
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm3,%xmm3
 | |
| +	vpsrlq $26,%xmm9,%xmm10
 | |
| +	vpaddq %xmm10,%xmm0,%xmm0
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm9,%xmm9
 | |
| +	vpsrlq $26,%xmm6,%xmm10
 | |
| +	vpaddq %xmm10,%xmm5,%xmm5
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm6,%xmm6
 | |
| +	vpsrlq $25,%xmm0,%xmm10
 | |
| +	vpsllq $4,%xmm10,%xmm11
 | |
| +	vpaddq %xmm10,%xmm2,%xmm2
 | |
| +	vpsllq $1,%xmm10,%xmm10
 | |
| +	vpaddq %xmm10,%xmm11,%xmm11
 | |
| +	vpaddq %xmm11,%xmm2,%xmm2
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm0,%xmm0
 | |
| +	vpsrlq $25,%xmm5,%xmm10
 | |
| +	vpaddq %xmm10,%xmm8,%xmm8
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm5,%xmm5
 | |
| +	vpsrlq $26,%xmm2,%xmm10
 | |
| +	vpaddq %xmm10,%xmm1,%xmm1
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm2,%xmm2
 | |
| +	vpunpckhqdq %xmm1,%xmm2,%xmm10
 | |
| +	vmovdqa %xmm10,80(%rsp)
 | |
| +	vpunpcklqdq %xmm1,%xmm2,%xmm1
 | |
| +	vpunpckhqdq %xmm3,%xmm4,%xmm2
 | |
| +	vmovdqa %xmm2,96(%rsp)
 | |
| +	vpunpcklqdq %xmm3,%xmm4,%xmm2
 | |
| +	vpunpckhqdq %xmm5,%xmm6,%xmm3
 | |
| +	vmovdqa %xmm3,112(%rsp)
 | |
| +	vpunpcklqdq %xmm5,%xmm6,%xmm3
 | |
| +	vpunpckhqdq %xmm7,%xmm8,%xmm4
 | |
| +	vmovdqa %xmm4,128(%rsp)
 | |
| +	vpunpcklqdq %xmm7,%xmm8,%xmm4
 | |
| +	vpunpckhqdq %xmm0,%xmm9,%xmm5
 | |
| +	vmovdqa %xmm5,144(%rsp)
 | |
| +	vpunpcklqdq %xmm0,%xmm9,%xmm0
 | |
| +	vmovdqa 176(%rsp),%xmm5
 | |
| +	vpaddq %xmm5,%xmm1,%xmm1
 | |
| +	vpunpcklqdq %xmm1,%xmm5,%xmm6
 | |
| +	vpunpckhqdq %xmm1,%xmm5,%xmm1
 | |
| +	vpmuludq 224(%rsp),%xmm6,%xmm5
 | |
| +	vpmuludq 192(%rsp),%xmm1,%xmm7
 | |
| +	vpaddq %xmm7,%xmm5,%xmm5
 | |
| +	vpmuludq 272(%rsp),%xmm6,%xmm7
 | |
| +	vpmuludq 240(%rsp),%xmm1,%xmm8
 | |
| +	vpaddq %xmm8,%xmm7,%xmm7
 | |
| +	vpmuludq 288(%rsp),%xmm6,%xmm8
 | |
| +	vpmuludq 272(%rsp),%xmm1,%xmm9
 | |
| +	vpaddq %xmm9,%xmm8,%xmm8
 | |
| +	vpmuludq 336(%rsp),%xmm6,%xmm9
 | |
| +	vpmuludq 304(%rsp),%xmm1,%xmm10
 | |
| +	vpaddq %xmm10,%xmm9,%xmm9
 | |
| +	vpmuludq 352(%rsp),%xmm6,%xmm10
 | |
| +	vpmuludq 336(%rsp),%xmm1,%xmm11
 | |
| +	vpaddq %xmm11,%xmm10,%xmm10
 | |
| +	vpmuludq 400(%rsp),%xmm6,%xmm11
 | |
| +	vpmuludq 368(%rsp),%xmm1,%xmm12
 | |
| +	vpaddq %xmm12,%xmm11,%xmm11
 | |
| +	vpmuludq 416(%rsp),%xmm6,%xmm12
 | |
| +	vpmuludq 400(%rsp),%xmm1,%xmm13
 | |
| +	vpaddq %xmm13,%xmm12,%xmm12
 | |
| +	vpmuludq 448(%rsp),%xmm6,%xmm13
 | |
| +	vpmuludq 432(%rsp),%xmm1,%xmm14
 | |
| +	vpaddq %xmm14,%xmm13,%xmm13
 | |
| +	vpmuludq 464(%rsp),%xmm6,%xmm14
 | |
| +	vpmuludq 448(%rsp),%xmm1,%xmm15
 | |
| +	vpaddq %xmm15,%xmm14,%xmm14
 | |
| +	vpmuludq 192(%rsp),%xmm6,%xmm6
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1
 | |
| +	vpmuludq 480(%rsp),%xmm1,%xmm1
 | |
| +	vpaddq %xmm1,%xmm6,%xmm6
 | |
| +	vmovdqa 256(%rsp),%xmm1
 | |
| +	vpaddq %xmm1,%xmm2,%xmm2
 | |
| +	vpunpcklqdq %xmm2,%xmm1,%xmm15
 | |
| +	vpunpckhqdq %xmm2,%xmm1,%xmm1
 | |
| +	vpmuludq 192(%rsp),%xmm15,%xmm2
 | |
| +	vpaddq %xmm2,%xmm7,%xmm7
 | |
| +	vpmuludq 224(%rsp),%xmm15,%xmm2
 | |
| +	vpaddq %xmm2,%xmm8,%xmm8
 | |
| +	vpmuludq 272(%rsp),%xmm15,%xmm2
 | |
| +	vpaddq %xmm2,%xmm9,%xmm9
 | |
| +	vpmuludq 288(%rsp),%xmm15,%xmm2
 | |
| +	vpaddq %xmm2,%xmm10,%xmm10
 | |
| +	vpmuludq 336(%rsp),%xmm15,%xmm2
 | |
| +	vpaddq %xmm2,%xmm11,%xmm11
 | |
| +	vpmuludq 352(%rsp),%xmm15,%xmm2
 | |
| +	vpaddq %xmm2,%xmm12,%xmm12
 | |
| +	vpmuludq 400(%rsp),%xmm15,%xmm2
 | |
| +	vpaddq %xmm2,%xmm13,%xmm13
 | |
| +	vpmuludq 416(%rsp),%xmm15,%xmm2
 | |
| +	vpaddq %xmm2,%xmm14,%xmm14
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm15,%xmm15
 | |
| +	vpmuludq 448(%rsp),%xmm15,%xmm2
 | |
| +	vpaddq %xmm2,%xmm6,%xmm6
 | |
| +	vpmuludq 464(%rsp),%xmm15,%xmm15
 | |
| +	vpaddq %xmm15,%xmm5,%xmm5
 | |
| +	vpmuludq 192(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm8,%xmm8
 | |
| +	vpmuludq 240(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm9,%xmm9
 | |
| +	vpmuludq 272(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm10,%xmm10
 | |
| +	vpmuludq 304(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm11,%xmm11
 | |
| +	vpmuludq 336(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm12,%xmm12
 | |
| +	vpmuludq 368(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm13,%xmm13
 | |
| +	vpmuludq 400(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm14,%xmm14
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1
 | |
| +	vpmuludq 432(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm6,%xmm6
 | |
| +	vpmuludq 448(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm5,%xmm5
 | |
| +	vpmuludq 480(%rsp),%xmm1,%xmm1
 | |
| +	vpaddq %xmm1,%xmm7,%xmm7
 | |
| +	vmovdqa 320(%rsp),%xmm1
 | |
| +	vpaddq %xmm1,%xmm3,%xmm3
 | |
| +	vpunpcklqdq %xmm3,%xmm1,%xmm2
 | |
| +	vpunpckhqdq %xmm3,%xmm1,%xmm1
 | |
| +	vpmuludq 192(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm9,%xmm9
 | |
| +	vpmuludq 224(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm10,%xmm10
 | |
| +	vpmuludq 272(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm11,%xmm11
 | |
| +	vpmuludq 288(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm12,%xmm12
 | |
| +	vpmuludq 336(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm13,%xmm13
 | |
| +	vpmuludq 352(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm14,%xmm14
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm2,%xmm2
 | |
| +	vpmuludq 400(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm6,%xmm6
 | |
| +	vpmuludq 416(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm5,%xmm5
 | |
| +	vpmuludq 448(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm7,%xmm7
 | |
| +	vpmuludq 464(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm8,%xmm8
 | |
| +	vpmuludq 192(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm10,%xmm10
 | |
| +	vpmuludq 240(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm11,%xmm11
 | |
| +	vpmuludq 272(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm12,%xmm12
 | |
| +	vpmuludq 304(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm13,%xmm13
 | |
| +	vpmuludq 336(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm14,%xmm14
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1
 | |
| +	vpmuludq 368(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm6,%xmm6
 | |
| +	vpmuludq 400(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm5,%xmm5
 | |
| +	vpmuludq 432(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm7,%xmm7
 | |
| +	vpmuludq 448(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm8,%xmm8
 | |
| +	vpmuludq 480(%rsp),%xmm1,%xmm1
 | |
| +	vpaddq %xmm1,%xmm9,%xmm9
 | |
| +	vmovdqa 384(%rsp),%xmm1
 | |
| +	vpaddq %xmm1,%xmm4,%xmm4
 | |
| +	vpunpcklqdq %xmm4,%xmm1,%xmm2
 | |
| +	vpunpckhqdq %xmm4,%xmm1,%xmm1
 | |
| +	vpmuludq 192(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm11,%xmm11
 | |
| +	vpmuludq 224(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm12,%xmm12
 | |
| +	vpmuludq 272(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm13,%xmm13
 | |
| +	vpmuludq 288(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm14,%xmm14
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm2,%xmm2
 | |
| +	vpmuludq 336(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm6,%xmm6
 | |
| +	vpmuludq 352(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm5,%xmm5
 | |
| +	vpmuludq 400(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm7,%xmm7
 | |
| +	vpmuludq 416(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm8,%xmm8
 | |
| +	vpmuludq 448(%rsp),%xmm2,%xmm3
 | |
| +	vpaddq %xmm3,%xmm9,%xmm9
 | |
| +	vpmuludq 464(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm10,%xmm10
 | |
| +	vpmuludq 192(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm12,%xmm12
 | |
| +	vpmuludq 240(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm13,%xmm13
 | |
| +	vpmuludq 272(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm14,%xmm14
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm1,%xmm1
 | |
| +	vpmuludq 304(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm6,%xmm6
 | |
| +	vpmuludq 336(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm5,%xmm5
 | |
| +	vpmuludq 368(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm7,%xmm7
 | |
| +	vpmuludq 400(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm8,%xmm8
 | |
| +	vpmuludq 432(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm9,%xmm9
 | |
| +	vpmuludq 448(%rsp),%xmm1,%xmm2
 | |
| +	vpaddq %xmm2,%xmm10,%xmm10
 | |
| +	vpmuludq 480(%rsp),%xmm1,%xmm1
 | |
| +	vpaddq %xmm1,%xmm11,%xmm11
 | |
| +	vmovdqa 160(%rsp),%xmm1
 | |
| +	vpaddq %xmm1,%xmm0,%xmm0
 | |
| +	vpunpcklqdq %xmm0,%xmm1,%xmm2
 | |
| +	vpunpckhqdq %xmm0,%xmm1,%xmm0
 | |
| +	vpmuludq 192(%rsp),%xmm2,%xmm1
 | |
| +	vpaddq %xmm1,%xmm13,%xmm13
 | |
| +	vpmuludq 224(%rsp),%xmm2,%xmm1
 | |
| +	vpaddq %xmm1,%xmm14,%xmm14
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm2,%xmm2
 | |
| +	vpmuludq 272(%rsp),%xmm2,%xmm1
 | |
| +	vpaddq %xmm1,%xmm6,%xmm6
 | |
| +	vpmuludq 288(%rsp),%xmm2,%xmm1
 | |
| +	vpaddq %xmm1,%xmm5,%xmm5
 | |
| +	vpmuludq 336(%rsp),%xmm2,%xmm1
 | |
| +	vpaddq %xmm1,%xmm7,%xmm7
 | |
| +	vpmuludq 352(%rsp),%xmm2,%xmm1
 | |
| +	vpaddq %xmm1,%xmm8,%xmm8
 | |
| +	vpmuludq 400(%rsp),%xmm2,%xmm1
 | |
| +	vpaddq %xmm1,%xmm9,%xmm9
 | |
| +	vpmuludq 416(%rsp),%xmm2,%xmm1
 | |
| +	vpaddq %xmm1,%xmm10,%xmm10
 | |
| +	vpmuludq 448(%rsp),%xmm2,%xmm1
 | |
| +	vpaddq %xmm1,%xmm11,%xmm11
 | |
| +	vpmuludq 464(%rsp),%xmm2,%xmm2
 | |
| +	vpaddq %xmm2,%xmm12,%xmm12
 | |
| +	vpmuludq 192(%rsp),%xmm0,%xmm1
 | |
| +	vpaddq %xmm1,%xmm14,%xmm14
 | |
| +	vpmuludq curve25519_sandy2x_v19_19(%rip),%xmm0,%xmm0
 | |
| +	vpmuludq 240(%rsp),%xmm0,%xmm1
 | |
| +	vpaddq %xmm1,%xmm6,%xmm6
 | |
| +	vpmuludq 272(%rsp),%xmm0,%xmm1
 | |
| +	vpaddq %xmm1,%xmm5,%xmm5
 | |
| +	vpmuludq 304(%rsp),%xmm0,%xmm1
 | |
| +	vpaddq %xmm1,%xmm7,%xmm7
 | |
| +	vpmuludq 336(%rsp),%xmm0,%xmm1
 | |
| +	vpaddq %xmm1,%xmm8,%xmm8
 | |
| +	vpmuludq 368(%rsp),%xmm0,%xmm1
 | |
| +	vpaddq %xmm1,%xmm9,%xmm9
 | |
| +	vpmuludq 400(%rsp),%xmm0,%xmm1
 | |
| +	vpaddq %xmm1,%xmm10,%xmm10
 | |
| +	vpmuludq 432(%rsp),%xmm0,%xmm1
 | |
| +	vpaddq %xmm1,%xmm11,%xmm11
 | |
| +	vpmuludq 448(%rsp),%xmm0,%xmm1
 | |
| +	vpaddq %xmm1,%xmm12,%xmm12
 | |
| +	vpmuludq 480(%rsp),%xmm0,%xmm0
 | |
| +	vpaddq %xmm0,%xmm13,%xmm13
 | |
| +	vpsrlq $26,%xmm6,%xmm0
 | |
| +	vpaddq %xmm0,%xmm5,%xmm5
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm6,%xmm6
 | |
| +	vpsrlq $25,%xmm10,%xmm0
 | |
| +	vpaddq %xmm0,%xmm11,%xmm11
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10
 | |
| +	vpsrlq $25,%xmm5,%xmm0
 | |
| +	vpaddq %xmm0,%xmm7,%xmm7
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm5,%xmm5
 | |
| +	vpsrlq $26,%xmm11,%xmm0
 | |
| +	vpaddq %xmm0,%xmm12,%xmm12
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm11,%xmm11
 | |
| +	vpsrlq $26,%xmm7,%xmm0
 | |
| +	vpaddq %xmm0,%xmm8,%xmm8
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm7,%xmm7
 | |
| +	vpsrlq $25,%xmm12,%xmm0
 | |
| +	vpaddq %xmm0,%xmm13,%xmm13
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm12,%xmm12
 | |
| +	vpsrlq $25,%xmm8,%xmm0
 | |
| +	vpaddq %xmm0,%xmm9,%xmm9
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm8,%xmm8
 | |
| +	vpsrlq $26,%xmm13,%xmm0
 | |
| +	vpaddq %xmm0,%xmm14,%xmm14
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm13,%xmm13
 | |
| +	vpsrlq $26,%xmm9,%xmm0
 | |
| +	vpaddq %xmm0,%xmm10,%xmm10
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm9,%xmm9
 | |
| +	vpsrlq $25,%xmm14,%xmm0
 | |
| +	vpsllq $4,%xmm0,%xmm1
 | |
| +	vpaddq %xmm0,%xmm6,%xmm6
 | |
| +	vpsllq $1,%xmm0,%xmm0
 | |
| +	vpaddq %xmm0,%xmm1,%xmm1
 | |
| +	vpaddq %xmm1,%xmm6,%xmm6
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm14,%xmm14
 | |
| +	vpsrlq $25,%xmm10,%xmm0
 | |
| +	vpaddq %xmm0,%xmm11,%xmm11
 | |
| +	vpand curve25519_sandy2x_m25(%rip),%xmm10,%xmm10
 | |
| +	vpsrlq $26,%xmm6,%xmm0
 | |
| +	vpaddq %xmm0,%xmm5,%xmm5
 | |
| +	vpand curve25519_sandy2x_m26(%rip),%xmm6,%xmm6
 | |
| +	vpunpckhqdq %xmm5,%xmm6,%xmm1
 | |
| +	vpunpcklqdq %xmm5,%xmm6,%xmm0
 | |
| +	vpunpckhqdq %xmm8,%xmm7,%xmm3
 | |
| +	vpunpcklqdq %xmm8,%xmm7,%xmm2
 | |
| +	vpunpckhqdq %xmm10,%xmm9,%xmm5
 | |
| +	vpunpcklqdq %xmm10,%xmm9,%xmm4
 | |
| +	vpunpckhqdq %xmm12,%xmm11,%xmm7
 | |
| +	vpunpcklqdq %xmm12,%xmm11,%xmm6
 | |
| +	vpunpckhqdq %xmm14,%xmm13,%xmm9
 | |
| +	vpunpcklqdq %xmm14,%xmm13,%xmm8
 | |
| +	cmp $0,%rdx
 | |
| +	jne .Lladder_base_loop
 | |
| +	vmovdqu %xmm1,80(%rdi)
 | |
| +	vmovdqu %xmm0,0(%rdi)
 | |
| +	vmovdqu %xmm3,96(%rdi)
 | |
| +	vmovdqu %xmm2,16(%rdi)
 | |
| +	vmovdqu %xmm5,112(%rdi)
 | |
| +	vmovdqu %xmm4,32(%rdi)
 | |
| +	vmovdqu %xmm7,128(%rdi)
 | |
| +	vmovdqu %xmm6,48(%rdi)
 | |
| +	vmovdqu %xmm9,144(%rdi)
 | |
| +	vmovdqu %xmm8,64(%rdi)
 | |
| +	movq 1536(%rsp),%r11
 | |
| +	movq 1544(%rsp),%r12
 | |
| +	movq 1552(%rsp),%r13
 | |
| +	add %r11,%rsp
 | |
| +	ret
 | |
| +ENDPROC(curve25519_sandy2x_ladder_base)
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/crypto/curve25519-neon-arm.S	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,2104 @@
 | |
| +/*
 | |
| + * Copyright (C) 2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
 | |
| + * Based on algorithms from Daniel J. Bernstein and Peter Schwabe.
 | |
| + */
 | |
| +
 | |
| +#include <linux/linkage.h>
 | |
| +
 | |
| +	.text
 | |
| +	.fpu		neon
 | |
| +	.align		4
 | |
| +
 | |
| +ENTRY(curve25519_asm_neon)
 | |
| +	vpush {q4,q5,q6,q7}
 | |
| +	mov r12,sp
 | |
| +	sub sp,sp,#736
 | |
| +	and sp,sp,#0xffffffe0
 | |
| +	strd r4,[sp,#0]
 | |
| +	strd r6,[sp,#8]
 | |
| +	strd r8,[sp,#16]
 | |
| +	strd r10,[sp,#24]
 | |
| +	str r12,[sp,#480]
 | |
| +	str r14,[sp,#484]
 | |
| +	mov r0,r0
 | |
| +	mov r1,r1
 | |
| +	mov r2,r2
 | |
| +	add r3,sp,#32
 | |
| +	ldr r4,=0
 | |
| +	ldr r5,=254
 | |
| +	vmov.i32 q0,#1
 | |
| +	vshr.u64 q1,q0,#7
 | |
| +	vshr.u64 q0,q0,#8
 | |
| +	vmov.i32 d4,#19
 | |
| +	vmov.i32 d5,#38
 | |
| +	add r6,sp,#512
 | |
| +	vst1.8 {d2-d3},[r6,: 128]
 | |
| +	add r6,sp,#528
 | |
| +	vst1.8 {d0-d1},[r6,: 128]
 | |
| +	add r6,sp,#544
 | |
| +	vst1.8 {d4-d5},[r6,: 128]
 | |
| +	add r6,r3,#0
 | |
| +	vmov.i32 q2,#0
 | |
| +	vst1.8 {d4-d5},[r6,: 128]!
 | |
| +	vst1.8 {d4-d5},[r6,: 128]!
 | |
| +	vst1.8 d4,[r6,: 64]
 | |
| +	add r6,r3,#0
 | |
| +	ldr r7,=960
 | |
| +	sub r7,r7,#2
 | |
| +	neg r7,r7
 | |
| +	sub r7,r7,r7,LSL #7
 | |
| +	str r7,[r6]
 | |
| +	add r6,sp,#704
 | |
| +	vld1.8 {d4-d5},[r1]!
 | |
| +	vld1.8 {d6-d7},[r1]
 | |
| +	vst1.8 {d4-d5},[r6,: 128]!
 | |
| +	vst1.8 {d6-d7},[r6,: 128]
 | |
| +	sub r1,r6,#16
 | |
| +	ldrb r6,[r1]
 | |
| +	and r6,r6,#248
 | |
| +	strb r6,[r1]
 | |
| +	ldrb r6,[r1,#31]
 | |
| +	and r6,r6,#127
 | |
| +	orr r6,r6,#64
 | |
| +	strb r6,[r1,#31]
 | |
| +	vmov.i64 q2,#0xffffffff
 | |
| +	vshr.u64 q3,q2,#7
 | |
| +	vshr.u64 q2,q2,#6
 | |
| +	vld1.8 {d8},[r2]
 | |
| +	vld1.8 {d10},[r2]
 | |
| +	add r2,r2,#6
 | |
| +	vld1.8 {d12},[r2]
 | |
| +	vld1.8 {d14},[r2]
 | |
| +	add r2,r2,#6
 | |
| +	vld1.8 {d16},[r2]
 | |
| +	add r2,r2,#4
 | |
| +	vld1.8 {d18},[r2]
 | |
| +	vld1.8 {d20},[r2]
 | |
| +	add r2,r2,#6
 | |
| +	vld1.8 {d22},[r2]
 | |
| +	add r2,r2,#2
 | |
| +	vld1.8 {d24},[r2]
 | |
| +	vld1.8 {d26},[r2]
 | |
| +	vshr.u64 q5,q5,#26
 | |
| +	vshr.u64 q6,q6,#3
 | |
| +	vshr.u64 q7,q7,#29
 | |
| +	vshr.u64 q8,q8,#6
 | |
| +	vshr.u64 q10,q10,#25
 | |
| +	vshr.u64 q11,q11,#3
 | |
| +	vshr.u64 q12,q12,#12
 | |
| +	vshr.u64 q13,q13,#38
 | |
| +	vand q4,q4,q2
 | |
| +	vand q6,q6,q2
 | |
| +	vand q8,q8,q2
 | |
| +	vand q10,q10,q2
 | |
| +	vand q2,q12,q2
 | |
| +	vand q5,q5,q3
 | |
| +	vand q7,q7,q3
 | |
| +	vand q9,q9,q3
 | |
| +	vand q11,q11,q3
 | |
| +	vand q3,q13,q3
 | |
| +	add r2,r3,#48
 | |
| +	vadd.i64 q12,q4,q1
 | |
| +	vadd.i64 q13,q10,q1
 | |
| +	vshr.s64 q12,q12,#26
 | |
| +	vshr.s64 q13,q13,#26
 | |
| +	vadd.i64 q5,q5,q12
 | |
| +	vshl.i64 q12,q12,#26
 | |
| +	vadd.i64 q14,q5,q0
 | |
| +	vadd.i64 q11,q11,q13
 | |
| +	vshl.i64 q13,q13,#26
 | |
| +	vadd.i64 q15,q11,q0
 | |
| +	vsub.i64 q4,q4,q12
 | |
| +	vshr.s64 q12,q14,#25
 | |
| +	vsub.i64 q10,q10,q13
 | |
| +	vshr.s64 q13,q15,#25
 | |
| +	vadd.i64 q6,q6,q12
 | |
| +	vshl.i64 q12,q12,#25
 | |
| +	vadd.i64 q14,q6,q1
 | |
| +	vadd.i64 q2,q2,q13
 | |
| +	vsub.i64 q5,q5,q12
 | |
| +	vshr.s64 q12,q14,#26
 | |
| +	vshl.i64 q13,q13,#25
 | |
| +	vadd.i64 q14,q2,q1
 | |
| +	vadd.i64 q7,q7,q12
 | |
| +	vshl.i64 q12,q12,#26
 | |
| +	vadd.i64 q15,q7,q0
 | |
| +	vsub.i64 q11,q11,q13
 | |
| +	vshr.s64 q13,q14,#26
 | |
| +	vsub.i64 q6,q6,q12
 | |
| +	vshr.s64 q12,q15,#25
 | |
| +	vadd.i64 q3,q3,q13
 | |
| +	vshl.i64 q13,q13,#26
 | |
| +	vadd.i64 q14,q3,q0
 | |
| +	vadd.i64 q8,q8,q12
 | |
| +	vshl.i64 q12,q12,#25
 | |
| +	vadd.i64 q15,q8,q1
 | |
| +	add r2,r2,#8
 | |
| +	vsub.i64 q2,q2,q13
 | |
| +	vshr.s64 q13,q14,#25
 | |
| +	vsub.i64 q7,q7,q12
 | |
| +	vshr.s64 q12,q15,#26
 | |
| +	vadd.i64 q14,q13,q13
 | |
| +	vadd.i64 q9,q9,q12
 | |
| +	vtrn.32 d12,d14
 | |
| +	vshl.i64 q12,q12,#26
 | |
| +	vtrn.32 d13,d15
 | |
| +	vadd.i64 q0,q9,q0
 | |
| +	vadd.i64 q4,q4,q14
 | |
| +	vst1.8 d12,[r2,: 64]!
 | |
| +	vshl.i64 q6,q13,#4
 | |
| +	vsub.i64 q7,q8,q12
 | |
| +	vshr.s64 q0,q0,#25
 | |
| +	vadd.i64 q4,q4,q6
 | |
| +	vadd.i64 q6,q10,q0
 | |
| +	vshl.i64 q0,q0,#25
 | |
| +	vadd.i64 q8,q6,q1
 | |
| +	vadd.i64 q4,q4,q13
 | |
| +	vshl.i64 q10,q13,#25
 | |
| +	vadd.i64 q1,q4,q1
 | |
| +	vsub.i64 q0,q9,q0
 | |
| +	vshr.s64 q8,q8,#26
 | |
| +	vsub.i64 q3,q3,q10
 | |
| +	vtrn.32 d14,d0
 | |
| +	vshr.s64 q1,q1,#26
 | |
| +	vtrn.32 d15,d1
 | |
| +	vadd.i64 q0,q11,q8
 | |
| +	vst1.8 d14,[r2,: 64]
 | |
| +	vshl.i64 q7,q8,#26
 | |
| +	vadd.i64 q5,q5,q1
 | |
| +	vtrn.32 d4,d6
 | |
| +	vshl.i64 q1,q1,#26
 | |
| +	vtrn.32 d5,d7
 | |
| +	vsub.i64 q3,q6,q7
 | |
| +	add r2,r2,#16
 | |
| +	vsub.i64 q1,q4,q1
 | |
| +	vst1.8 d4,[r2,: 64]
 | |
| +	vtrn.32 d6,d0
 | |
| +	vtrn.32 d7,d1
 | |
| +	sub r2,r2,#8
 | |
| +	vtrn.32 d2,d10
 | |
| +	vtrn.32 d3,d11
 | |
| +	vst1.8 d6,[r2,: 64]
 | |
| +	sub r2,r2,#24
 | |
| +	vst1.8 d2,[r2,: 64]
 | |
| +	add r2,r3,#96
 | |
| +	vmov.i32 q0,#0
 | |
| +	vmov.i64 d2,#0xff
 | |
| +	vmov.i64 d3,#0
 | |
| +	vshr.u32 q1,q1,#7
 | |
| +	vst1.8 {d2-d3},[r2,: 128]!
 | |
| +	vst1.8 {d0-d1},[r2,: 128]!
 | |
| +	vst1.8 d0,[r2,: 64]
 | |
| +	add r2,r3,#144
 | |
| +	vmov.i32 q0,#0
 | |
| +	vst1.8 {d0-d1},[r2,: 128]!
 | |
| +	vst1.8 {d0-d1},[r2,: 128]!
 | |
| +	vst1.8 d0,[r2,: 64]
 | |
| +	add r2,r3,#240
 | |
| +	vmov.i32 q0,#0
 | |
| +	vmov.i64 d2,#0xff
 | |
| +	vmov.i64 d3,#0
 | |
| +	vshr.u32 q1,q1,#7
 | |
| +	vst1.8 {d2-d3},[r2,: 128]!
 | |
| +	vst1.8 {d0-d1},[r2,: 128]!
 | |
| +	vst1.8 d0,[r2,: 64]
 | |
| +	add r2,r3,#48
 | |
| +	add r6,r3,#192
 | |
| +	vld1.8 {d0-d1},[r2,: 128]!
 | |
| +	vld1.8 {d2-d3},[r2,: 128]!
 | |
| +	vld1.8 {d4},[r2,: 64]
 | |
| +	vst1.8 {d0-d1},[r6,: 128]!
 | |
| +	vst1.8 {d2-d3},[r6,: 128]!
 | |
| +	vst1.8 d4,[r6,: 64]
 | |
| +	.Lmainloop:
 | |
| +	mov r2,r5,LSR #3
 | |
| +	and r6,r5,#7
 | |
| +	ldrb r2,[r1,r2]
 | |
| +	mov r2,r2,LSR r6
 | |
| +	and r2,r2,#1
 | |
| +	str r5,[sp,#488]
 | |
| +	eor r4,r4,r2
 | |
| +	str r2,[sp,#492]
 | |
| +	neg r2,r4
 | |
| +	add r4,r3,#96
 | |
| +	add r5,r3,#192
 | |
| +	add r6,r3,#144
 | |
| +	vld1.8 {d8-d9},[r4,: 128]!
 | |
| +	add r7,r3,#240
 | |
| +	vld1.8 {d10-d11},[r5,: 128]!
 | |
| +	veor q6,q4,q5
 | |
| +	vld1.8 {d14-d15},[r6,: 128]!
 | |
| +	vdup.i32 q8,r2
 | |
| +	vld1.8 {d18-d19},[r7,: 128]!
 | |
| +	veor q10,q7,q9
 | |
| +	vld1.8 {d22-d23},[r4,: 128]!
 | |
| +	vand q6,q6,q8
 | |
| +	vld1.8 {d24-d25},[r5,: 128]!
 | |
| +	vand q10,q10,q8
 | |
| +	vld1.8 {d26-d27},[r6,: 128]!
 | |
| +	veor q4,q4,q6
 | |
| +	vld1.8 {d28-d29},[r7,: 128]!
 | |
| +	veor q5,q5,q6
 | |
| +	vld1.8 {d0},[r4,: 64]
 | |
| +	veor q6,q7,q10
 | |
| +	vld1.8 {d2},[r5,: 64]
 | |
| +	veor q7,q9,q10
 | |
| +	vld1.8 {d4},[r6,: 64]
 | |
| +	veor q9,q11,q12
 | |
| +	vld1.8 {d6},[r7,: 64]
 | |
| +	veor q10,q0,q1
 | |
| +	sub r2,r4,#32
 | |
| +	vand q9,q9,q8
 | |
| +	sub r4,r5,#32
 | |
| +	vand q10,q10,q8
 | |
| +	sub r5,r6,#32
 | |
| +	veor q11,q11,q9
 | |
| +	sub r6,r7,#32
 | |
| +	veor q0,q0,q10
 | |
| +	veor q9,q12,q9
 | |
| +	veor q1,q1,q10
 | |
| +	veor q10,q13,q14
 | |
| +	veor q12,q2,q3
 | |
| +	vand q10,q10,q8
 | |
| +	vand q8,q12,q8
 | |
| +	veor q12,q13,q10
 | |
| +	veor q2,q2,q8
 | |
| +	veor q10,q14,q10
 | |
| +	veor q3,q3,q8
 | |
| +	vadd.i32 q8,q4,q6
 | |
| +	vsub.i32 q4,q4,q6
 | |
| +	vst1.8 {d16-d17},[r2,: 128]!
 | |
| +	vadd.i32 q6,q11,q12
 | |
| +	vst1.8 {d8-d9},[r5,: 128]!
 | |
| +	vsub.i32 q4,q11,q12
 | |
| +	vst1.8 {d12-d13},[r2,: 128]!
 | |
| +	vadd.i32 q6,q0,q2
 | |
| +	vst1.8 {d8-d9},[r5,: 128]!
 | |
| +	vsub.i32 q0,q0,q2
 | |
| +	vst1.8 d12,[r2,: 64]
 | |
| +	vadd.i32 q2,q5,q7
 | |
| +	vst1.8 d0,[r5,: 64]
 | |
| +	vsub.i32 q0,q5,q7
 | |
| +	vst1.8 {d4-d5},[r4,: 128]!
 | |
| +	vadd.i32 q2,q9,q10
 | |
| +	vst1.8 {d0-d1},[r6,: 128]!
 | |
| +	vsub.i32 q0,q9,q10
 | |
| +	vst1.8 {d4-d5},[r4,: 128]!
 | |
| +	vadd.i32 q2,q1,q3
 | |
| +	vst1.8 {d0-d1},[r6,: 128]!
 | |
| +	vsub.i32 q0,q1,q3
 | |
| +	vst1.8 d4,[r4,: 64]
 | |
| +	vst1.8 d0,[r6,: 64]
 | |
| +	add r2,sp,#544
 | |
| +	add r4,r3,#96
 | |
| +	add r5,r3,#144
 | |
| +	vld1.8 {d0-d1},[r2,: 128]
 | |
| +	vld1.8 {d2-d3},[r4,: 128]!
 | |
| +	vld1.8 {d4-d5},[r5,: 128]!
 | |
| +	vzip.i32 q1,q2
 | |
| +	vld1.8 {d6-d7},[r4,: 128]!
 | |
| +	vld1.8 {d8-d9},[r5,: 128]!
 | |
| +	vshl.i32 q5,q1,#1
 | |
| +	vzip.i32 q3,q4
 | |
| +	vshl.i32 q6,q2,#1
 | |
| +	vld1.8 {d14},[r4,: 64]
 | |
| +	vshl.i32 q8,q3,#1
 | |
| +	vld1.8 {d15},[r5,: 64]
 | |
| +	vshl.i32 q9,q4,#1
 | |
| +	vmul.i32 d21,d7,d1
 | |
| +	vtrn.32 d14,d15
 | |
| +	vmul.i32 q11,q4,q0
 | |
| +	vmul.i32 q0,q7,q0
 | |
| +	vmull.s32 q12,d2,d2
 | |
| +	vmlal.s32 q12,d11,d1
 | |
| +	vmlal.s32 q12,d12,d0
 | |
| +	vmlal.s32 q12,d13,d23
 | |
| +	vmlal.s32 q12,d16,d22
 | |
| +	vmlal.s32 q12,d7,d21
 | |
| +	vmull.s32 q10,d2,d11
 | |
| +	vmlal.s32 q10,d4,d1
 | |
| +	vmlal.s32 q10,d13,d0
 | |
| +	vmlal.s32 q10,d6,d23
 | |
| +	vmlal.s32 q10,d17,d22
 | |
| +	vmull.s32 q13,d10,d4
 | |
| +	vmlal.s32 q13,d11,d3
 | |
| +	vmlal.s32 q13,d13,d1
 | |
| +	vmlal.s32 q13,d16,d0
 | |
| +	vmlal.s32 q13,d17,d23
 | |
| +	vmlal.s32 q13,d8,d22
 | |
| +	vmull.s32 q1,d10,d5
 | |
| +	vmlal.s32 q1,d11,d4
 | |
| +	vmlal.s32 q1,d6,d1
 | |
| +	vmlal.s32 q1,d17,d0
 | |
| +	vmlal.s32 q1,d8,d23
 | |
| +	vmull.s32 q14,d10,d6
 | |
| +	vmlal.s32 q14,d11,d13
 | |
| +	vmlal.s32 q14,d4,d4
 | |
| +	vmlal.s32 q14,d17,d1
 | |
| +	vmlal.s32 q14,d18,d0
 | |
| +	vmlal.s32 q14,d9,d23
 | |
| +	vmull.s32 q11,d10,d7
 | |
| +	vmlal.s32 q11,d11,d6
 | |
| +	vmlal.s32 q11,d12,d5
 | |
| +	vmlal.s32 q11,d8,d1
 | |
| +	vmlal.s32 q11,d19,d0
 | |
| +	vmull.s32 q15,d10,d8
 | |
| +	vmlal.s32 q15,d11,d17
 | |
| +	vmlal.s32 q15,d12,d6
 | |
| +	vmlal.s32 q15,d13,d5
 | |
| +	vmlal.s32 q15,d19,d1
 | |
| +	vmlal.s32 q15,d14,d0
 | |
| +	vmull.s32 q2,d10,d9
 | |
| +	vmlal.s32 q2,d11,d8
 | |
| +	vmlal.s32 q2,d12,d7
 | |
| +	vmlal.s32 q2,d13,d6
 | |
| +	vmlal.s32 q2,d14,d1
 | |
| +	vmull.s32 q0,d15,d1
 | |
| +	vmlal.s32 q0,d10,d14
 | |
| +	vmlal.s32 q0,d11,d19
 | |
| +	vmlal.s32 q0,d12,d8
 | |
| +	vmlal.s32 q0,d13,d17
 | |
| +	vmlal.s32 q0,d6,d6
 | |
| +	add r2,sp,#512
 | |
| +	vld1.8 {d18-d19},[r2,: 128]
 | |
| +	vmull.s32 q3,d16,d7
 | |
| +	vmlal.s32 q3,d10,d15
 | |
| +	vmlal.s32 q3,d11,d14
 | |
| +	vmlal.s32 q3,d12,d9
 | |
| +	vmlal.s32 q3,d13,d8
 | |
| +	add r2,sp,#528
 | |
| +	vld1.8 {d8-d9},[r2,: 128]
 | |
| +	vadd.i64 q5,q12,q9
 | |
| +	vadd.i64 q6,q15,q9
 | |
| +	vshr.s64 q5,q5,#26
 | |
| +	vshr.s64 q6,q6,#26
 | |
| +	vadd.i64 q7,q10,q5
 | |
| +	vshl.i64 q5,q5,#26
 | |
| +	vadd.i64 q8,q7,q4
 | |
| +	vadd.i64 q2,q2,q6
 | |
| +	vshl.i64 q6,q6,#26
 | |
| +	vadd.i64 q10,q2,q4
 | |
| +	vsub.i64 q5,q12,q5
 | |
| +	vshr.s64 q8,q8,#25
 | |
| +	vsub.i64 q6,q15,q6
 | |
| +	vshr.s64 q10,q10,#25
 | |
| +	vadd.i64 q12,q13,q8
 | |
| +	vshl.i64 q8,q8,#25
 | |
| +	vadd.i64 q13,q12,q9
 | |
| +	vadd.i64 q0,q0,q10
 | |
| +	vsub.i64 q7,q7,q8
 | |
| +	vshr.s64 q8,q13,#26
 | |
| +	vshl.i64 q10,q10,#25
 | |
| +	vadd.i64 q13,q0,q9
 | |
| +	vadd.i64 q1,q1,q8
 | |
| +	vshl.i64 q8,q8,#26
 | |
| +	vadd.i64 q15,q1,q4
 | |
| +	vsub.i64 q2,q2,q10
 | |
| +	vshr.s64 q10,q13,#26
 | |
| +	vsub.i64 q8,q12,q8
 | |
| +	vshr.s64 q12,q15,#25
 | |
| +	vadd.i64 q3,q3,q10
 | |
| +	vshl.i64 q10,q10,#26
 | |
| +	vadd.i64 q13,q3,q4
 | |
| +	vadd.i64 q14,q14,q12
 | |
| +	add r2,r3,#288
 | |
| +	vshl.i64 q12,q12,#25
 | |
| +	add r4,r3,#336
 | |
| +	vadd.i64 q15,q14,q9
 | |
| +	add r2,r2,#8
 | |
| +	vsub.i64 q0,q0,q10
 | |
| +	add r4,r4,#8
 | |
| +	vshr.s64 q10,q13,#25
 | |
| +	vsub.i64 q1,q1,q12
 | |
| +	vshr.s64 q12,q15,#26
 | |
| +	vadd.i64 q13,q10,q10
 | |
| +	vadd.i64 q11,q11,q12
 | |
| +	vtrn.32 d16,d2
 | |
| +	vshl.i64 q12,q12,#26
 | |
| +	vtrn.32 d17,d3
 | |
| +	vadd.i64 q1,q11,q4
 | |
| +	vadd.i64 q4,q5,q13
 | |
| +	vst1.8 d16,[r2,: 64]!
 | |
| +	vshl.i64 q5,q10,#4
 | |
| +	vst1.8 d17,[r4,: 64]!
 | |
| +	vsub.i64 q8,q14,q12
 | |
| +	vshr.s64 q1,q1,#25
 | |
| +	vadd.i64 q4,q4,q5
 | |
| +	vadd.i64 q5,q6,q1
 | |
| +	vshl.i64 q1,q1,#25
 | |
| +	vadd.i64 q6,q5,q9
 | |
| +	vadd.i64 q4,q4,q10
 | |
| +	vshl.i64 q10,q10,#25
 | |
| +	vadd.i64 q9,q4,q9
 | |
| +	vsub.i64 q1,q11,q1
 | |
| +	vshr.s64 q6,q6,#26
 | |
| +	vsub.i64 q3,q3,q10
 | |
| +	vtrn.32 d16,d2
 | |
| +	vshr.s64 q9,q9,#26
 | |
| +	vtrn.32 d17,d3
 | |
| +	vadd.i64 q1,q2,q6
 | |
| +	vst1.8 d16,[r2,: 64]
 | |
| +	vshl.i64 q2,q6,#26
 | |
| +	vst1.8 d17,[r4,: 64]
 | |
| +	vadd.i64 q6,q7,q9
 | |
| +	vtrn.32 d0,d6
 | |
| +	vshl.i64 q7,q9,#26
 | |
| +	vtrn.32 d1,d7
 | |
| +	vsub.i64 q2,q5,q2
 | |
| +	add r2,r2,#16
 | |
| +	vsub.i64 q3,q4,q7
 | |
| +	vst1.8 d0,[r2,: 64]
 | |
| +	add r4,r4,#16
 | |
| +	vst1.8 d1,[r4,: 64]
 | |
| +	vtrn.32 d4,d2
 | |
| +	vtrn.32 d5,d3
 | |
| +	sub r2,r2,#8
 | |
| +	sub r4,r4,#8
 | |
| +	vtrn.32 d6,d12
 | |
| +	vtrn.32 d7,d13
 | |
| +	vst1.8 d4,[r2,: 64]
 | |
| +	vst1.8 d5,[r4,: 64]
 | |
| +	sub r2,r2,#24
 | |
| +	sub r4,r4,#24
 | |
| +	vst1.8 d6,[r2,: 64]
 | |
| +	vst1.8 d7,[r4,: 64]
 | |
| +	add r2,r3,#240
 | |
| +	add r4,r3,#96
 | |
| +	vld1.8 {d0-d1},[r4,: 128]!
 | |
| +	vld1.8 {d2-d3},[r4,: 128]!
 | |
| +	vld1.8 {d4},[r4,: 64]
 | |
| +	add r4,r3,#144
 | |
| +	vld1.8 {d6-d7},[r4,: 128]!
 | |
| +	vtrn.32 q0,q3
 | |
| +	vld1.8 {d8-d9},[r4,: 128]!
 | |
| +	vshl.i32 q5,q0,#4
 | |
| +	vtrn.32 q1,q4
 | |
| +	vshl.i32 q6,q3,#4
 | |
| +	vadd.i32 q5,q5,q0
 | |
| +	vadd.i32 q6,q6,q3
 | |
| +	vshl.i32 q7,q1,#4
 | |
| +	vld1.8 {d5},[r4,: 64]
 | |
| +	vshl.i32 q8,q4,#4
 | |
| +	vtrn.32 d4,d5
 | |
| +	vadd.i32 q7,q7,q1
 | |
| +	vadd.i32 q8,q8,q4
 | |
| +	vld1.8 {d18-d19},[r2,: 128]!
 | |
| +	vshl.i32 q10,q2,#4
 | |
| +	vld1.8 {d22-d23},[r2,: 128]!
 | |
| +	vadd.i32 q10,q10,q2
 | |
| +	vld1.8 {d24},[r2,: 64]
 | |
| +	vadd.i32 q5,q5,q0
 | |
| +	add r2,r3,#192
 | |
| +	vld1.8 {d26-d27},[r2,: 128]!
 | |
| +	vadd.i32 q6,q6,q3
 | |
| +	vld1.8 {d28-d29},[r2,: 128]!
 | |
| +	vadd.i32 q8,q8,q4
 | |
| +	vld1.8 {d25},[r2,: 64]
 | |
| +	vadd.i32 q10,q10,q2
 | |
| +	vtrn.32 q9,q13
 | |
| +	vadd.i32 q7,q7,q1
 | |
| +	vadd.i32 q5,q5,q0
 | |
| +	vtrn.32 q11,q14
 | |
| +	vadd.i32 q6,q6,q3
 | |
| +	add r2,sp,#560
 | |
| +	vadd.i32 q10,q10,q2
 | |
| +	vtrn.32 d24,d25
 | |
| +	vst1.8 {d12-d13},[r2,: 128]
 | |
| +	vshl.i32 q6,q13,#1
 | |
| +	add r2,sp,#576
 | |
| +	vst1.8 {d20-d21},[r2,: 128]
 | |
| +	vshl.i32 q10,q14,#1
 | |
| +	add r2,sp,#592
 | |
| +	vst1.8 {d12-d13},[r2,: 128]
 | |
| +	vshl.i32 q15,q12,#1
 | |
| +	vadd.i32 q8,q8,q4
 | |
| +	vext.32 d10,d31,d30,#0
 | |
| +	vadd.i32 q7,q7,q1
 | |
| +	add r2,sp,#608
 | |
| +	vst1.8 {d16-d17},[r2,: 128]
 | |
| +	vmull.s32 q8,d18,d5
 | |
| +	vmlal.s32 q8,d26,d4
 | |
| +	vmlal.s32 q8,d19,d9
 | |
| +	vmlal.s32 q8,d27,d3
 | |
| +	vmlal.s32 q8,d22,d8
 | |
| +	vmlal.s32 q8,d28,d2
 | |
| +	vmlal.s32 q8,d23,d7
 | |
| +	vmlal.s32 q8,d29,d1
 | |
| +	vmlal.s32 q8,d24,d6
 | |
| +	vmlal.s32 q8,d25,d0
 | |
| +	add r2,sp,#624
 | |
| +	vst1.8 {d14-d15},[r2,: 128]
 | |
| +	vmull.s32 q2,d18,d4
 | |
| +	vmlal.s32 q2,d12,d9
 | |
| +	vmlal.s32 q2,d13,d8
 | |
| +	vmlal.s32 q2,d19,d3
 | |
| +	vmlal.s32 q2,d22,d2
 | |
| +	vmlal.s32 q2,d23,d1
 | |
| +	vmlal.s32 q2,d24,d0
 | |
| +	add r2,sp,#640
 | |
| +	vst1.8 {d20-d21},[r2,: 128]
 | |
| +	vmull.s32 q7,d18,d9
 | |
| +	vmlal.s32 q7,d26,d3
 | |
| +	vmlal.s32 q7,d19,d8
 | |
| +	vmlal.s32 q7,d27,d2
 | |
| +	vmlal.s32 q7,d22,d7
 | |
| +	vmlal.s32 q7,d28,d1
 | |
| +	vmlal.s32 q7,d23,d6
 | |
| +	vmlal.s32 q7,d29,d0
 | |
| +	add r2,sp,#656
 | |
| +	vst1.8 {d10-d11},[r2,: 128]
 | |
| +	vmull.s32 q5,d18,d3
 | |
| +	vmlal.s32 q5,d19,d2
 | |
| +	vmlal.s32 q5,d22,d1
 | |
| +	vmlal.s32 q5,d23,d0
 | |
| +	vmlal.s32 q5,d12,d8
 | |
| +	add r2,sp,#672
 | |
| +	vst1.8 {d16-d17},[r2,: 128]
 | |
| +	vmull.s32 q4,d18,d8
 | |
| +	vmlal.s32 q4,d26,d2
 | |
| +	vmlal.s32 q4,d19,d7
 | |
| +	vmlal.s32 q4,d27,d1
 | |
| +	vmlal.s32 q4,d22,d6
 | |
| +	vmlal.s32 q4,d28,d0
 | |
| +	vmull.s32 q8,d18,d7
 | |
| +	vmlal.s32 q8,d26,d1
 | |
| +	vmlal.s32 q8,d19,d6
 | |
| +	vmlal.s32 q8,d27,d0
 | |
| +	add r2,sp,#576
 | |
| +	vld1.8 {d20-d21},[r2,: 128]
 | |
| +	vmlal.s32 q7,d24,d21
 | |
| +	vmlal.s32 q7,d25,d20
 | |
| +	vmlal.s32 q4,d23,d21
 | |
| +	vmlal.s32 q4,d29,d20
 | |
| +	vmlal.s32 q8,d22,d21
 | |
| +	vmlal.s32 q8,d28,d20
 | |
| +	vmlal.s32 q5,d24,d20
 | |
| +	add r2,sp,#576
 | |
| +	vst1.8 {d14-d15},[r2,: 128]
 | |
| +	vmull.s32 q7,d18,d6
 | |
| +	vmlal.s32 q7,d26,d0
 | |
| +	add r2,sp,#656
 | |
| +	vld1.8 {d30-d31},[r2,: 128]
 | |
| +	vmlal.s32 q2,d30,d21
 | |
| +	vmlal.s32 q7,d19,d21
 | |
| +	vmlal.s32 q7,d27,d20
 | |
| +	add r2,sp,#624
 | |
| +	vld1.8 {d26-d27},[r2,: 128]
 | |
| +	vmlal.s32 q4,d25,d27
 | |
| +	vmlal.s32 q8,d29,d27
 | |
| +	vmlal.s32 q8,d25,d26
 | |
| +	vmlal.s32 q7,d28,d27
 | |
| +	vmlal.s32 q7,d29,d26
 | |
| +	add r2,sp,#608
 | |
| +	vld1.8 {d28-d29},[r2,: 128]
 | |
| +	vmlal.s32 q4,d24,d29
 | |
| +	vmlal.s32 q8,d23,d29
 | |
| +	vmlal.s32 q8,d24,d28
 | |
| +	vmlal.s32 q7,d22,d29
 | |
| +	vmlal.s32 q7,d23,d28
 | |
| +	add r2,sp,#608
 | |
| +	vst1.8 {d8-d9},[r2,: 128]
 | |
| +	add r2,sp,#560
 | |
| +	vld1.8 {d8-d9},[r2,: 128]
 | |
| +	vmlal.s32 q7,d24,d9
 | |
| +	vmlal.s32 q7,d25,d31
 | |
| +	vmull.s32 q1,d18,d2
 | |
| +	vmlal.s32 q1,d19,d1
 | |
| +	vmlal.s32 q1,d22,d0
 | |
| +	vmlal.s32 q1,d24,d27
 | |
| +	vmlal.s32 q1,d23,d20
 | |
| +	vmlal.s32 q1,d12,d7
 | |
| +	vmlal.s32 q1,d13,d6
 | |
| +	vmull.s32 q6,d18,d1
 | |
| +	vmlal.s32 q6,d19,d0
 | |
| +	vmlal.s32 q6,d23,d27
 | |
| +	vmlal.s32 q6,d22,d20
 | |
| +	vmlal.s32 q6,d24,d26
 | |
| +	vmull.s32 q0,d18,d0
 | |
| +	vmlal.s32 q0,d22,d27
 | |
| +	vmlal.s32 q0,d23,d26
 | |
| +	vmlal.s32 q0,d24,d31
 | |
| +	vmlal.s32 q0,d19,d20
 | |
| +	add r2,sp,#640
 | |
| +	vld1.8 {d18-d19},[r2,: 128]
 | |
| +	vmlal.s32 q2,d18,d7
 | |
| +	vmlal.s32 q2,d19,d6
 | |
| +	vmlal.s32 q5,d18,d6
 | |
| +	vmlal.s32 q5,d19,d21
 | |
| +	vmlal.s32 q1,d18,d21
 | |
| +	vmlal.s32 q1,d19,d29
 | |
| +	vmlal.s32 q0,d18,d28
 | |
| +	vmlal.s32 q0,d19,d9
 | |
| +	vmlal.s32 q6,d18,d29
 | |
| +	vmlal.s32 q6,d19,d28
 | |
| +	add r2,sp,#592
 | |
| +	vld1.8 {d18-d19},[r2,: 128]
 | |
| +	add r2,sp,#512
 | |
| +	vld1.8 {d22-d23},[r2,: 128]
 | |
| +	vmlal.s32 q5,d19,d7
 | |
| +	vmlal.s32 q0,d18,d21
 | |
| +	vmlal.s32 q0,d19,d29
 | |
| +	vmlal.s32 q6,d18,d6
 | |
| +	add r2,sp,#528
 | |
| +	vld1.8 {d6-d7},[r2,: 128]
 | |
| +	vmlal.s32 q6,d19,d21
 | |
| +	add r2,sp,#576
 | |
| +	vld1.8 {d18-d19},[r2,: 128]
 | |
| +	vmlal.s32 q0,d30,d8
 | |
| +	add r2,sp,#672
 | |
| +	vld1.8 {d20-d21},[r2,: 128]
 | |
| +	vmlal.s32 q5,d30,d29
 | |
| +	add r2,sp,#608
 | |
| +	vld1.8 {d24-d25},[r2,: 128]
 | |
| +	vmlal.s32 q1,d30,d28
 | |
| +	vadd.i64 q13,q0,q11
 | |
| +	vadd.i64 q14,q5,q11
 | |
| +	vmlal.s32 q6,d30,d9
 | |
| +	vshr.s64 q4,q13,#26
 | |
| +	vshr.s64 q13,q14,#26
 | |
| +	vadd.i64 q7,q7,q4
 | |
| +	vshl.i64 q4,q4,#26
 | |
| +	vadd.i64 q14,q7,q3
 | |
| +	vadd.i64 q9,q9,q13
 | |
| +	vshl.i64 q13,q13,#26
 | |
| +	vadd.i64 q15,q9,q3
 | |
| +	vsub.i64 q0,q0,q4
 | |
| +	vshr.s64 q4,q14,#25
 | |
| +	vsub.i64 q5,q5,q13
 | |
| +	vshr.s64 q13,q15,#25
 | |
| +	vadd.i64 q6,q6,q4
 | |
| +	vshl.i64 q4,q4,#25
 | |
| +	vadd.i64 q14,q6,q11
 | |
| +	vadd.i64 q2,q2,q13
 | |
| +	vsub.i64 q4,q7,q4
 | |
| +	vshr.s64 q7,q14,#26
 | |
| +	vshl.i64 q13,q13,#25
 | |
| +	vadd.i64 q14,q2,q11
 | |
| +	vadd.i64 q8,q8,q7
 | |
| +	vshl.i64 q7,q7,#26
 | |
| +	vadd.i64 q15,q8,q3
 | |
| +	vsub.i64 q9,q9,q13
 | |
| +	vshr.s64 q13,q14,#26
 | |
| +	vsub.i64 q6,q6,q7
 | |
| +	vshr.s64 q7,q15,#25
 | |
| +	vadd.i64 q10,q10,q13
 | |
| +	vshl.i64 q13,q13,#26
 | |
| +	vadd.i64 q14,q10,q3
 | |
| +	vadd.i64 q1,q1,q7
 | |
| +	add r2,r3,#144
 | |
| +	vshl.i64 q7,q7,#25
 | |
| +	add r4,r3,#96
 | |
| +	vadd.i64 q15,q1,q11
 | |
| +	add r2,r2,#8
 | |
| +	vsub.i64 q2,q2,q13
 | |
| +	add r4,r4,#8
 | |
| +	vshr.s64 q13,q14,#25
 | |
| +	vsub.i64 q7,q8,q7
 | |
| +	vshr.s64 q8,q15,#26
 | |
| +	vadd.i64 q14,q13,q13
 | |
| +	vadd.i64 q12,q12,q8
 | |
| +	vtrn.32 d12,d14
 | |
| +	vshl.i64 q8,q8,#26
 | |
| +	vtrn.32 d13,d15
 | |
| +	vadd.i64 q3,q12,q3
 | |
| +	vadd.i64 q0,q0,q14
 | |
| +	vst1.8 d12,[r2,: 64]!
 | |
| +	vshl.i64 q7,q13,#4
 | |
| +	vst1.8 d13,[r4,: 64]!
 | |
| +	vsub.i64 q1,q1,q8
 | |
| +	vshr.s64 q3,q3,#25
 | |
| +	vadd.i64 q0,q0,q7
 | |
| +	vadd.i64 q5,q5,q3
 | |
| +	vshl.i64 q3,q3,#25
 | |
| +	vadd.i64 q6,q5,q11
 | |
| +	vadd.i64 q0,q0,q13
 | |
| +	vshl.i64 q7,q13,#25
 | |
| +	vadd.i64 q8,q0,q11
 | |
| +	vsub.i64 q3,q12,q3
 | |
| +	vshr.s64 q6,q6,#26
 | |
| +	vsub.i64 q7,q10,q7
 | |
| +	vtrn.32 d2,d6
 | |
| +	vshr.s64 q8,q8,#26
 | |
| +	vtrn.32 d3,d7
 | |
| +	vadd.i64 q3,q9,q6
 | |
| +	vst1.8 d2,[r2,: 64]
 | |
| +	vshl.i64 q6,q6,#26
 | |
| +	vst1.8 d3,[r4,: 64]
 | |
| +	vadd.i64 q1,q4,q8
 | |
| +	vtrn.32 d4,d14
 | |
| +	vshl.i64 q4,q8,#26
 | |
| +	vtrn.32 d5,d15
 | |
| +	vsub.i64 q5,q5,q6
 | |
| +	add r2,r2,#16
 | |
| +	vsub.i64 q0,q0,q4
 | |
| +	vst1.8 d4,[r2,: 64]
 | |
| +	add r4,r4,#16
 | |
| +	vst1.8 d5,[r4,: 64]
 | |
| +	vtrn.32 d10,d6
 | |
| +	vtrn.32 d11,d7
 | |
| +	sub r2,r2,#8
 | |
| +	sub r4,r4,#8
 | |
| +	vtrn.32 d0,d2
 | |
| +	vtrn.32 d1,d3
 | |
| +	vst1.8 d10,[r2,: 64]
 | |
| +	vst1.8 d11,[r4,: 64]
 | |
| +	sub r2,r2,#24
 | |
| +	sub r4,r4,#24
 | |
| +	vst1.8 d0,[r2,: 64]
 | |
| +	vst1.8 d1,[r4,: 64]
 | |
| +	add r2,r3,#288
 | |
| +	add r4,r3,#336
 | |
| +	vld1.8 {d0-d1},[r2,: 128]!
 | |
| +	vld1.8 {d2-d3},[r4,: 128]!
 | |
| +	vsub.i32 q0,q0,q1
 | |
| +	vld1.8 {d2-d3},[r2,: 128]!
 | |
| +	vld1.8 {d4-d5},[r4,: 128]!
 | |
| +	vsub.i32 q1,q1,q2
 | |
| +	add r5,r3,#240
 | |
| +	vld1.8 {d4},[r2,: 64]
 | |
| +	vld1.8 {d6},[r4,: 64]
 | |
| +	vsub.i32 q2,q2,q3
 | |
| +	vst1.8 {d0-d1},[r5,: 128]!
 | |
| +	vst1.8 {d2-d3},[r5,: 128]!
 | |
| +	vst1.8 d4,[r5,: 64]
 | |
| +	add r2,r3,#144
 | |
| +	add r4,r3,#96
 | |
| +	add r5,r3,#144
 | |
| +	add r6,r3,#192
 | |
| +	vld1.8 {d0-d1},[r2,: 128]!
 | |
| +	vld1.8 {d2-d3},[r4,: 128]!
 | |
| +	vsub.i32 q2,q0,q1
 | |
| +	vadd.i32 q0,q0,q1
 | |
| +	vld1.8 {d2-d3},[r2,: 128]!
 | |
| +	vld1.8 {d6-d7},[r4,: 128]!
 | |
| +	vsub.i32 q4,q1,q3
 | |
| +	vadd.i32 q1,q1,q3
 | |
| +	vld1.8 {d6},[r2,: 64]
 | |
| +	vld1.8 {d10},[r4,: 64]
 | |
| +	vsub.i32 q6,q3,q5
 | |
| +	vadd.i32 q3,q3,q5
 | |
| +	vst1.8 {d4-d5},[r5,: 128]!
 | |
| +	vst1.8 {d0-d1},[r6,: 128]!
 | |
| +	vst1.8 {d8-d9},[r5,: 128]!
 | |
| +	vst1.8 {d2-d3},[r6,: 128]!
 | |
| +	vst1.8 d12,[r5,: 64]
 | |
| +	vst1.8 d6,[r6,: 64]
 | |
| +	add r2,r3,#0
 | |
| +	add r4,r3,#240
 | |
| +	vld1.8 {d0-d1},[r4,: 128]!
 | |
| +	vld1.8 {d2-d3},[r4,: 128]!
 | |
| +	vld1.8 {d4},[r4,: 64]
 | |
| +	add r4,r3,#336
 | |
| +	vld1.8 {d6-d7},[r4,: 128]!
 | |
| +	vtrn.32 q0,q3
 | |
| +	vld1.8 {d8-d9},[r4,: 128]!
 | |
| +	vshl.i32 q5,q0,#4
 | |
| +	vtrn.32 q1,q4
 | |
| +	vshl.i32 q6,q3,#4
 | |
| +	vadd.i32 q5,q5,q0
 | |
| +	vadd.i32 q6,q6,q3
 | |
| +	vshl.i32 q7,q1,#4
 | |
| +	vld1.8 {d5},[r4,: 64]
 | |
| +	vshl.i32 q8,q4,#4
 | |
| +	vtrn.32 d4,d5
 | |
| +	vadd.i32 q7,q7,q1
 | |
| +	vadd.i32 q8,q8,q4
 | |
| +	vld1.8 {d18-d19},[r2,: 128]!
 | |
| +	vshl.i32 q10,q2,#4
 | |
| +	vld1.8 {d22-d23},[r2,: 128]!
 | |
| +	vadd.i32 q10,q10,q2
 | |
| +	vld1.8 {d24},[r2,: 64]
 | |
| +	vadd.i32 q5,q5,q0
 | |
| +	add r2,r3,#288
 | |
| +	vld1.8 {d26-d27},[r2,: 128]!
 | |
| +	vadd.i32 q6,q6,q3
 | |
| +	vld1.8 {d28-d29},[r2,: 128]!
 | |
| +	vadd.i32 q8,q8,q4
 | |
| +	vld1.8 {d25},[r2,: 64]
 | |
| +	vadd.i32 q10,q10,q2
 | |
| +	vtrn.32 q9,q13
 | |
| +	vadd.i32 q7,q7,q1
 | |
| +	vadd.i32 q5,q5,q0
 | |
| +	vtrn.32 q11,q14
 | |
| +	vadd.i32 q6,q6,q3
 | |
| +	add r2,sp,#560
 | |
| +	vadd.i32 q10,q10,q2
 | |
| +	vtrn.32 d24,d25
 | |
| +	vst1.8 {d12-d13},[r2,: 128]
 | |
| +	vshl.i32 q6,q13,#1
 | |
| +	add r2,sp,#576
 | |
| +	vst1.8 {d20-d21},[r2,: 128]
 | |
| +	vshl.i32 q10,q14,#1
 | |
| +	add r2,sp,#592
 | |
| +	vst1.8 {d12-d13},[r2,: 128]
 | |
| +	vshl.i32 q15,q12,#1
 | |
| +	vadd.i32 q8,q8,q4
 | |
| +	vext.32 d10,d31,d30,#0
 | |
| +	vadd.i32 q7,q7,q1
 | |
| +	add r2,sp,#608
 | |
| +	vst1.8 {d16-d17},[r2,: 128]
 | |
| +	vmull.s32 q8,d18,d5
 | |
| +	vmlal.s32 q8,d26,d4
 | |
| +	vmlal.s32 q8,d19,d9
 | |
| +	vmlal.s32 q8,d27,d3
 | |
| +	vmlal.s32 q8,d22,d8
 | |
| +	vmlal.s32 q8,d28,d2
 | |
| +	vmlal.s32 q8,d23,d7
 | |
| +	vmlal.s32 q8,d29,d1
 | |
| +	vmlal.s32 q8,d24,d6
 | |
| +	vmlal.s32 q8,d25,d0
 | |
| +	add r2,sp,#624
 | |
| +	vst1.8 {d14-d15},[r2,: 128]
 | |
| +	vmull.s32 q2,d18,d4
 | |
| +	vmlal.s32 q2,d12,d9
 | |
| +	vmlal.s32 q2,d13,d8
 | |
| +	vmlal.s32 q2,d19,d3
 | |
| +	vmlal.s32 q2,d22,d2
 | |
| +	vmlal.s32 q2,d23,d1
 | |
| +	vmlal.s32 q2,d24,d0
 | |
| +	add r2,sp,#640
 | |
| +	vst1.8 {d20-d21},[r2,: 128]
 | |
| +	vmull.s32 q7,d18,d9
 | |
| +	vmlal.s32 q7,d26,d3
 | |
| +	vmlal.s32 q7,d19,d8
 | |
| +	vmlal.s32 q7,d27,d2
 | |
| +	vmlal.s32 q7,d22,d7
 | |
| +	vmlal.s32 q7,d28,d1
 | |
| +	vmlal.s32 q7,d23,d6
 | |
| +	vmlal.s32 q7,d29,d0
 | |
| +	add r2,sp,#656
 | |
| +	vst1.8 {d10-d11},[r2,: 128]
 | |
| +	vmull.s32 q5,d18,d3
 | |
| +	vmlal.s32 q5,d19,d2
 | |
| +	vmlal.s32 q5,d22,d1
 | |
| +	vmlal.s32 q5,d23,d0
 | |
| +	vmlal.s32 q5,d12,d8
 | |
| +	add r2,sp,#672
 | |
| +	vst1.8 {d16-d17},[r2,: 128]
 | |
| +	vmull.s32 q4,d18,d8
 | |
| +	vmlal.s32 q4,d26,d2
 | |
| +	vmlal.s32 q4,d19,d7
 | |
| +	vmlal.s32 q4,d27,d1
 | |
| +	vmlal.s32 q4,d22,d6
 | |
| +	vmlal.s32 q4,d28,d0
 | |
| +	vmull.s32 q8,d18,d7
 | |
| +	vmlal.s32 q8,d26,d1
 | |
| +	vmlal.s32 q8,d19,d6
 | |
| +	vmlal.s32 q8,d27,d0
 | |
| +	add r2,sp,#576
 | |
| +	vld1.8 {d20-d21},[r2,: 128]
 | |
| +	vmlal.s32 q7,d24,d21
 | |
| +	vmlal.s32 q7,d25,d20
 | |
| +	vmlal.s32 q4,d23,d21
 | |
| +	vmlal.s32 q4,d29,d20
 | |
| +	vmlal.s32 q8,d22,d21
 | |
| +	vmlal.s32 q8,d28,d20
 | |
| +	vmlal.s32 q5,d24,d20
 | |
| +	add r2,sp,#576
 | |
| +	vst1.8 {d14-d15},[r2,: 128]
 | |
| +	vmull.s32 q7,d18,d6
 | |
| +	vmlal.s32 q7,d26,d0
 | |
| +	add r2,sp,#656
 | |
| +	vld1.8 {d30-d31},[r2,: 128]
 | |
| +	vmlal.s32 q2,d30,d21
 | |
| +	vmlal.s32 q7,d19,d21
 | |
| +	vmlal.s32 q7,d27,d20
 | |
| +	add r2,sp,#624
 | |
| +	vld1.8 {d26-d27},[r2,: 128]
 | |
| +	vmlal.s32 q4,d25,d27
 | |
| +	vmlal.s32 q8,d29,d27
 | |
| +	vmlal.s32 q8,d25,d26
 | |
| +	vmlal.s32 q7,d28,d27
 | |
| +	vmlal.s32 q7,d29,d26
 | |
| +	add r2,sp,#608
 | |
| +	vld1.8 {d28-d29},[r2,: 128]
 | |
| +	vmlal.s32 q4,d24,d29
 | |
| +	vmlal.s32 q8,d23,d29
 | |
| +	vmlal.s32 q8,d24,d28
 | |
| +	vmlal.s32 q7,d22,d29
 | |
| +	vmlal.s32 q7,d23,d28
 | |
| +	add r2,sp,#608
 | |
| +	vst1.8 {d8-d9},[r2,: 128]
 | |
| +	add r2,sp,#560
 | |
| +	vld1.8 {d8-d9},[r2,: 128]
 | |
| +	vmlal.s32 q7,d24,d9
 | |
| +	vmlal.s32 q7,d25,d31
 | |
| +	vmull.s32 q1,d18,d2
 | |
| +	vmlal.s32 q1,d19,d1
 | |
| +	vmlal.s32 q1,d22,d0
 | |
| +	vmlal.s32 q1,d24,d27
 | |
| +	vmlal.s32 q1,d23,d20
 | |
| +	vmlal.s32 q1,d12,d7
 | |
| +	vmlal.s32 q1,d13,d6
 | |
| +	vmull.s32 q6,d18,d1
 | |
| +	vmlal.s32 q6,d19,d0
 | |
| +	vmlal.s32 q6,d23,d27
 | |
| +	vmlal.s32 q6,d22,d20
 | |
| +	vmlal.s32 q6,d24,d26
 | |
| +	vmull.s32 q0,d18,d0
 | |
| +	vmlal.s32 q0,d22,d27
 | |
| +	vmlal.s32 q0,d23,d26
 | |
| +	vmlal.s32 q0,d24,d31
 | |
| +	vmlal.s32 q0,d19,d20
 | |
| +	add r2,sp,#640
 | |
| +	vld1.8 {d18-d19},[r2,: 128]
 | |
| +	vmlal.s32 q2,d18,d7
 | |
| +	vmlal.s32 q2,d19,d6
 | |
| +	vmlal.s32 q5,d18,d6
 | |
| +	vmlal.s32 q5,d19,d21
 | |
| +	vmlal.s32 q1,d18,d21
 | |
| +	vmlal.s32 q1,d19,d29
 | |
| +	vmlal.s32 q0,d18,d28
 | |
| +	vmlal.s32 q0,d19,d9
 | |
| +	vmlal.s32 q6,d18,d29
 | |
| +	vmlal.s32 q6,d19,d28
 | |
| +	add r2,sp,#592
 | |
| +	vld1.8 {d18-d19},[r2,: 128]
 | |
| +	add r2,sp,#512
 | |
| +	vld1.8 {d22-d23},[r2,: 128]
 | |
| +	vmlal.s32 q5,d19,d7
 | |
| +	vmlal.s32 q0,d18,d21
 | |
| +	vmlal.s32 q0,d19,d29
 | |
| +	vmlal.s32 q6,d18,d6
 | |
| +	add r2,sp,#528
 | |
| +	vld1.8 {d6-d7},[r2,: 128]
 | |
| +	vmlal.s32 q6,d19,d21
 | |
| +	add r2,sp,#576
 | |
| +	vld1.8 {d18-d19},[r2,: 128]
 | |
| +	vmlal.s32 q0,d30,d8
 | |
| +	add r2,sp,#672
 | |
| +	vld1.8 {d20-d21},[r2,: 128]
 | |
| +	vmlal.s32 q5,d30,d29
 | |
| +	add r2,sp,#608
 | |
| +	vld1.8 {d24-d25},[r2,: 128]
 | |
| +	vmlal.s32 q1,d30,d28
 | |
| +	vadd.i64 q13,q0,q11
 | |
| +	vadd.i64 q14,q5,q11
 | |
| +	vmlal.s32 q6,d30,d9
 | |
| +	vshr.s64 q4,q13,#26
 | |
| +	vshr.s64 q13,q14,#26
 | |
| +	vadd.i64 q7,q7,q4
 | |
| +	vshl.i64 q4,q4,#26
 | |
| +	vadd.i64 q14,q7,q3
 | |
| +	vadd.i64 q9,q9,q13
 | |
| +	vshl.i64 q13,q13,#26
 | |
| +	vadd.i64 q15,q9,q3
 | |
| +	vsub.i64 q0,q0,q4
 | |
| +	vshr.s64 q4,q14,#25
 | |
| +	vsub.i64 q5,q5,q13
 | |
| +	vshr.s64 q13,q15,#25
 | |
| +	vadd.i64 q6,q6,q4
 | |
| +	vshl.i64 q4,q4,#25
 | |
| +	vadd.i64 q14,q6,q11
 | |
| +	vadd.i64 q2,q2,q13
 | |
| +	vsub.i64 q4,q7,q4
 | |
| +	vshr.s64 q7,q14,#26
 | |
| +	vshl.i64 q13,q13,#25
 | |
| +	vadd.i64 q14,q2,q11
 | |
| +	vadd.i64 q8,q8,q7
 | |
| +	vshl.i64 q7,q7,#26
 | |
| +	vadd.i64 q15,q8,q3
 | |
| +	vsub.i64 q9,q9,q13
 | |
| +	vshr.s64 q13,q14,#26
 | |
| +	vsub.i64 q6,q6,q7
 | |
| +	vshr.s64 q7,q15,#25
 | |
| +	vadd.i64 q10,q10,q13
 | |
| +	vshl.i64 q13,q13,#26
 | |
| +	vadd.i64 q14,q10,q3
 | |
| +	vadd.i64 q1,q1,q7
 | |
| +	add r2,r3,#288
 | |
| +	vshl.i64 q7,q7,#25
 | |
| +	add r4,r3,#96
 | |
| +	vadd.i64 q15,q1,q11
 | |
| +	add r2,r2,#8
 | |
| +	vsub.i64 q2,q2,q13
 | |
| +	add r4,r4,#8
 | |
| +	vshr.s64 q13,q14,#25
 | |
| +	vsub.i64 q7,q8,q7
 | |
| +	vshr.s64 q8,q15,#26
 | |
| +	vadd.i64 q14,q13,q13
 | |
| +	vadd.i64 q12,q12,q8
 | |
| +	vtrn.32 d12,d14
 | |
| +	vshl.i64 q8,q8,#26
 | |
| +	vtrn.32 d13,d15
 | |
| +	vadd.i64 q3,q12,q3
 | |
| +	vadd.i64 q0,q0,q14
 | |
| +	vst1.8 d12,[r2,: 64]!
 | |
| +	vshl.i64 q7,q13,#4
 | |
| +	vst1.8 d13,[r4,: 64]!
 | |
| +	vsub.i64 q1,q1,q8
 | |
| +	vshr.s64 q3,q3,#25
 | |
| +	vadd.i64 q0,q0,q7
 | |
| +	vadd.i64 q5,q5,q3
 | |
| +	vshl.i64 q3,q3,#25
 | |
| +	vadd.i64 q6,q5,q11
 | |
| +	vadd.i64 q0,q0,q13
 | |
| +	vshl.i64 q7,q13,#25
 | |
| +	vadd.i64 q8,q0,q11
 | |
| +	vsub.i64 q3,q12,q3
 | |
| +	vshr.s64 q6,q6,#26
 | |
| +	vsub.i64 q7,q10,q7
 | |
| +	vtrn.32 d2,d6
 | |
| +	vshr.s64 q8,q8,#26
 | |
| +	vtrn.32 d3,d7
 | |
| +	vadd.i64 q3,q9,q6
 | |
| +	vst1.8 d2,[r2,: 64]
 | |
| +	vshl.i64 q6,q6,#26
 | |
| +	vst1.8 d3,[r4,: 64]
 | |
| +	vadd.i64 q1,q4,q8
 | |
| +	vtrn.32 d4,d14
 | |
| +	vshl.i64 q4,q8,#26
 | |
| +	vtrn.32 d5,d15
 | |
| +	vsub.i64 q5,q5,q6
 | |
| +	add r2,r2,#16
 | |
| +	vsub.i64 q0,q0,q4
 | |
| +	vst1.8 d4,[r2,: 64]
 | |
| +	add r4,r4,#16
 | |
| +	vst1.8 d5,[r4,: 64]
 | |
| +	vtrn.32 d10,d6
 | |
| +	vtrn.32 d11,d7
 | |
| +	sub r2,r2,#8
 | |
| +	sub r4,r4,#8
 | |
| +	vtrn.32 d0,d2
 | |
| +	vtrn.32 d1,d3
 | |
| +	vst1.8 d10,[r2,: 64]
 | |
| +	vst1.8 d11,[r4,: 64]
 | |
| +	sub r2,r2,#24
 | |
| +	sub r4,r4,#24
 | |
| +	vst1.8 d0,[r2,: 64]
 | |
| +	vst1.8 d1,[r4,: 64]
 | |
| +	add r2,sp,#544
 | |
| +	add r4,r3,#144
 | |
| +	add r5,r3,#192
 | |
| +	vld1.8 {d0-d1},[r2,: 128]
 | |
| +	vld1.8 {d2-d3},[r4,: 128]!
 | |
| +	vld1.8 {d4-d5},[r5,: 128]!
 | |
| +	vzip.i32 q1,q2
 | |
| +	vld1.8 {d6-d7},[r4,: 128]!
 | |
| +	vld1.8 {d8-d9},[r5,: 128]!
 | |
| +	vshl.i32 q5,q1,#1
 | |
| +	vzip.i32 q3,q4
 | |
| +	vshl.i32 q6,q2,#1
 | |
| +	vld1.8 {d14},[r4,: 64]
 | |
| +	vshl.i32 q8,q3,#1
 | |
| +	vld1.8 {d15},[r5,: 64]
 | |
| +	vshl.i32 q9,q4,#1
 | |
| +	vmul.i32 d21,d7,d1
 | |
| +	vtrn.32 d14,d15
 | |
| +	vmul.i32 q11,q4,q0
 | |
| +	vmul.i32 q0,q7,q0
 | |
| +	vmull.s32 q12,d2,d2
 | |
| +	vmlal.s32 q12,d11,d1
 | |
| +	vmlal.s32 q12,d12,d0
 | |
| +	vmlal.s32 q12,d13,d23
 | |
| +	vmlal.s32 q12,d16,d22
 | |
| +	vmlal.s32 q12,d7,d21
 | |
| +	vmull.s32 q10,d2,d11
 | |
| +	vmlal.s32 q10,d4,d1
 | |
| +	vmlal.s32 q10,d13,d0
 | |
| +	vmlal.s32 q10,d6,d23
 | |
| +	vmlal.s32 q10,d17,d22
 | |
| +	vmull.s32 q13,d10,d4
 | |
| +	vmlal.s32 q13,d11,d3
 | |
| +	vmlal.s32 q13,d13,d1
 | |
| +	vmlal.s32 q13,d16,d0
 | |
| +	vmlal.s32 q13,d17,d23
 | |
| +	vmlal.s32 q13,d8,d22
 | |
| +	vmull.s32 q1,d10,d5
 | |
| +	vmlal.s32 q1,d11,d4
 | |
| +	vmlal.s32 q1,d6,d1
 | |
| +	vmlal.s32 q1,d17,d0
 | |
| +	vmlal.s32 q1,d8,d23
 | |
| +	vmull.s32 q14,d10,d6
 | |
| +	vmlal.s32 q14,d11,d13
 | |
| +	vmlal.s32 q14,d4,d4
 | |
| +	vmlal.s32 q14,d17,d1
 | |
| +	vmlal.s32 q14,d18,d0
 | |
| +	vmlal.s32 q14,d9,d23
 | |
| +	vmull.s32 q11,d10,d7
 | |
| +	vmlal.s32 q11,d11,d6
 | |
| +	vmlal.s32 q11,d12,d5
 | |
| +	vmlal.s32 q11,d8,d1
 | |
| +	vmlal.s32 q11,d19,d0
 | |
| +	vmull.s32 q15,d10,d8
 | |
| +	vmlal.s32 q15,d11,d17
 | |
| +	vmlal.s32 q15,d12,d6
 | |
| +	vmlal.s32 q15,d13,d5
 | |
| +	vmlal.s32 q15,d19,d1
 | |
| +	vmlal.s32 q15,d14,d0
 | |
| +	vmull.s32 q2,d10,d9
 | |
| +	vmlal.s32 q2,d11,d8
 | |
| +	vmlal.s32 q2,d12,d7
 | |
| +	vmlal.s32 q2,d13,d6
 | |
| +	vmlal.s32 q2,d14,d1
 | |
| +	vmull.s32 q0,d15,d1
 | |
| +	vmlal.s32 q0,d10,d14
 | |
| +	vmlal.s32 q0,d11,d19
 | |
| +	vmlal.s32 q0,d12,d8
 | |
| +	vmlal.s32 q0,d13,d17
 | |
| +	vmlal.s32 q0,d6,d6
 | |
| +	add r2,sp,#512
 | |
| +	vld1.8 {d18-d19},[r2,: 128]
 | |
| +	vmull.s32 q3,d16,d7
 | |
| +	vmlal.s32 q3,d10,d15
 | |
| +	vmlal.s32 q3,d11,d14
 | |
| +	vmlal.s32 q3,d12,d9
 | |
| +	vmlal.s32 q3,d13,d8
 | |
| +	add r2,sp,#528
 | |
| +	vld1.8 {d8-d9},[r2,: 128]
 | |
| +	vadd.i64 q5,q12,q9
 | |
| +	vadd.i64 q6,q15,q9
 | |
| +	vshr.s64 q5,q5,#26
 | |
| +	vshr.s64 q6,q6,#26
 | |
| +	vadd.i64 q7,q10,q5
 | |
| +	vshl.i64 q5,q5,#26
 | |
| +	vadd.i64 q8,q7,q4
 | |
| +	vadd.i64 q2,q2,q6
 | |
| +	vshl.i64 q6,q6,#26
 | |
| +	vadd.i64 q10,q2,q4
 | |
| +	vsub.i64 q5,q12,q5
 | |
| +	vshr.s64 q8,q8,#25
 | |
| +	vsub.i64 q6,q15,q6
 | |
| +	vshr.s64 q10,q10,#25
 | |
| +	vadd.i64 q12,q13,q8
 | |
| +	vshl.i64 q8,q8,#25
 | |
| +	vadd.i64 q13,q12,q9
 | |
| +	vadd.i64 q0,q0,q10
 | |
| +	vsub.i64 q7,q7,q8
 | |
| +	vshr.s64 q8,q13,#26
 | |
| +	vshl.i64 q10,q10,#25
 | |
| +	vadd.i64 q13,q0,q9
 | |
| +	vadd.i64 q1,q1,q8
 | |
| +	vshl.i64 q8,q8,#26
 | |
| +	vadd.i64 q15,q1,q4
 | |
| +	vsub.i64 q2,q2,q10
 | |
| +	vshr.s64 q10,q13,#26
 | |
| +	vsub.i64 q8,q12,q8
 | |
| +	vshr.s64 q12,q15,#25
 | |
| +	vadd.i64 q3,q3,q10
 | |
| +	vshl.i64 q10,q10,#26
 | |
| +	vadd.i64 q13,q3,q4
 | |
| +	vadd.i64 q14,q14,q12
 | |
| +	add r2,r3,#144
 | |
| +	vshl.i64 q12,q12,#25
 | |
| +	add r4,r3,#192
 | |
| +	vadd.i64 q15,q14,q9
 | |
| +	add r2,r2,#8
 | |
| +	vsub.i64 q0,q0,q10
 | |
| +	add r4,r4,#8
 | |
| +	vshr.s64 q10,q13,#25
 | |
| +	vsub.i64 q1,q1,q12
 | |
| +	vshr.s64 q12,q15,#26
 | |
| +	vadd.i64 q13,q10,q10
 | |
| +	vadd.i64 q11,q11,q12
 | |
| +	vtrn.32 d16,d2
 | |
| +	vshl.i64 q12,q12,#26
 | |
| +	vtrn.32 d17,d3
 | |
| +	vadd.i64 q1,q11,q4
 | |
| +	vadd.i64 q4,q5,q13
 | |
| +	vst1.8 d16,[r2,: 64]!
 | |
| +	vshl.i64 q5,q10,#4
 | |
| +	vst1.8 d17,[r4,: 64]!
 | |
| +	vsub.i64 q8,q14,q12
 | |
| +	vshr.s64 q1,q1,#25
 | |
| +	vadd.i64 q4,q4,q5
 | |
| +	vadd.i64 q5,q6,q1
 | |
| +	vshl.i64 q1,q1,#25
 | |
| +	vadd.i64 q6,q5,q9
 | |
| +	vadd.i64 q4,q4,q10
 | |
| +	vshl.i64 q10,q10,#25
 | |
| +	vadd.i64 q9,q4,q9
 | |
| +	vsub.i64 q1,q11,q1
 | |
| +	vshr.s64 q6,q6,#26
 | |
| +	vsub.i64 q3,q3,q10
 | |
| +	vtrn.32 d16,d2
 | |
| +	vshr.s64 q9,q9,#26
 | |
| +	vtrn.32 d17,d3
 | |
| +	vadd.i64 q1,q2,q6
 | |
| +	vst1.8 d16,[r2,: 64]
 | |
| +	vshl.i64 q2,q6,#26
 | |
| +	vst1.8 d17,[r4,: 64]
 | |
| +	vadd.i64 q6,q7,q9
 | |
| +	vtrn.32 d0,d6
 | |
| +	vshl.i64 q7,q9,#26
 | |
| +	vtrn.32 d1,d7
 | |
| +	vsub.i64 q2,q5,q2
 | |
| +	add r2,r2,#16
 | |
| +	vsub.i64 q3,q4,q7
 | |
| +	vst1.8 d0,[r2,: 64]
 | |
| +	add r4,r4,#16
 | |
| +	vst1.8 d1,[r4,: 64]
 | |
| +	vtrn.32 d4,d2
 | |
| +	vtrn.32 d5,d3
 | |
| +	sub r2,r2,#8
 | |
| +	sub r4,r4,#8
 | |
| +	vtrn.32 d6,d12
 | |
| +	vtrn.32 d7,d13
 | |
| +	vst1.8 d4,[r2,: 64]
 | |
| +	vst1.8 d5,[r4,: 64]
 | |
| +	sub r2,r2,#24
 | |
| +	sub r4,r4,#24
 | |
| +	vst1.8 d6,[r2,: 64]
 | |
| +	vst1.8 d7,[r4,: 64]
 | |
| +	add r2,r3,#336
 | |
| +	add r4,r3,#288
 | |
| +	vld1.8 {d0-d1},[r2,: 128]!
 | |
| +	vld1.8 {d2-d3},[r4,: 128]!
 | |
| +	vadd.i32 q0,q0,q1
 | |
| +	vld1.8 {d2-d3},[r2,: 128]!
 | |
| +	vld1.8 {d4-d5},[r4,: 128]!
 | |
| +	vadd.i32 q1,q1,q2
 | |
| +	add r5,r3,#288
 | |
| +	vld1.8 {d4},[r2,: 64]
 | |
| +	vld1.8 {d6},[r4,: 64]
 | |
| +	vadd.i32 q2,q2,q3
 | |
| +	vst1.8 {d0-d1},[r5,: 128]!
 | |
| +	vst1.8 {d2-d3},[r5,: 128]!
 | |
| +	vst1.8 d4,[r5,: 64]
 | |
| +	add r2,r3,#48
 | |
| +	add r4,r3,#144
 | |
| +	vld1.8 {d0-d1},[r4,: 128]!
 | |
| +	vld1.8 {d2-d3},[r4,: 128]!
 | |
| +	vld1.8 {d4},[r4,: 64]
 | |
| +	add r4,r3,#288
 | |
| +	vld1.8 {d6-d7},[r4,: 128]!
 | |
| +	vtrn.32 q0,q3
 | |
| +	vld1.8 {d8-d9},[r4,: 128]!
 | |
| +	vshl.i32 q5,q0,#4
 | |
| +	vtrn.32 q1,q4
 | |
| +	vshl.i32 q6,q3,#4
 | |
| +	vadd.i32 q5,q5,q0
 | |
| +	vadd.i32 q6,q6,q3
 | |
| +	vshl.i32 q7,q1,#4
 | |
| +	vld1.8 {d5},[r4,: 64]
 | |
| +	vshl.i32 q8,q4,#4
 | |
| +	vtrn.32 d4,d5
 | |
| +	vadd.i32 q7,q7,q1
 | |
| +	vadd.i32 q8,q8,q4
 | |
| +	vld1.8 {d18-d19},[r2,: 128]!
 | |
| +	vshl.i32 q10,q2,#4
 | |
| +	vld1.8 {d22-d23},[r2,: 128]!
 | |
| +	vadd.i32 q10,q10,q2
 | |
| +	vld1.8 {d24},[r2,: 64]
 | |
| +	vadd.i32 q5,q5,q0
 | |
| +	add r2,r3,#240
 | |
| +	vld1.8 {d26-d27},[r2,: 128]!
 | |
| +	vadd.i32 q6,q6,q3
 | |
| +	vld1.8 {d28-d29},[r2,: 128]!
 | |
| +	vadd.i32 q8,q8,q4
 | |
| +	vld1.8 {d25},[r2,: 64]
 | |
| +	vadd.i32 q10,q10,q2
 | |
| +	vtrn.32 q9,q13
 | |
| +	vadd.i32 q7,q7,q1
 | |
| +	vadd.i32 q5,q5,q0
 | |
| +	vtrn.32 q11,q14
 | |
| +	vadd.i32 q6,q6,q3
 | |
| +	add r2,sp,#560
 | |
| +	vadd.i32 q10,q10,q2
 | |
| +	vtrn.32 d24,d25
 | |
| +	vst1.8 {d12-d13},[r2,: 128]
 | |
| +	vshl.i32 q6,q13,#1
 | |
| +	add r2,sp,#576
 | |
| +	vst1.8 {d20-d21},[r2,: 128]
 | |
| +	vshl.i32 q10,q14,#1
 | |
| +	add r2,sp,#592
 | |
| +	vst1.8 {d12-d13},[r2,: 128]
 | |
| +	vshl.i32 q15,q12,#1
 | |
| +	vadd.i32 q8,q8,q4
 | |
| +	vext.32 d10,d31,d30,#0
 | |
| +	vadd.i32 q7,q7,q1
 | |
| +	add r2,sp,#608
 | |
| +	vst1.8 {d16-d17},[r2,: 128]
 | |
| +	vmull.s32 q8,d18,d5
 | |
| +	vmlal.s32 q8,d26,d4
 | |
| +	vmlal.s32 q8,d19,d9
 | |
| +	vmlal.s32 q8,d27,d3
 | |
| +	vmlal.s32 q8,d22,d8
 | |
| +	vmlal.s32 q8,d28,d2
 | |
| +	vmlal.s32 q8,d23,d7
 | |
| +	vmlal.s32 q8,d29,d1
 | |
| +	vmlal.s32 q8,d24,d6
 | |
| +	vmlal.s32 q8,d25,d0
 | |
| +	add r2,sp,#624
 | |
| +	vst1.8 {d14-d15},[r2,: 128]
 | |
| +	vmull.s32 q2,d18,d4
 | |
| +	vmlal.s32 q2,d12,d9
 | |
| +	vmlal.s32 q2,d13,d8
 | |
| +	vmlal.s32 q2,d19,d3
 | |
| +	vmlal.s32 q2,d22,d2
 | |
| +	vmlal.s32 q2,d23,d1
 | |
| +	vmlal.s32 q2,d24,d0
 | |
| +	add r2,sp,#640
 | |
| +	vst1.8 {d20-d21},[r2,: 128]
 | |
| +	vmull.s32 q7,d18,d9
 | |
| +	vmlal.s32 q7,d26,d3
 | |
| +	vmlal.s32 q7,d19,d8
 | |
| +	vmlal.s32 q7,d27,d2
 | |
| +	vmlal.s32 q7,d22,d7
 | |
| +	vmlal.s32 q7,d28,d1
 | |
| +	vmlal.s32 q7,d23,d6
 | |
| +	vmlal.s32 q7,d29,d0
 | |
| +	add r2,sp,#656
 | |
| +	vst1.8 {d10-d11},[r2,: 128]
 | |
| +	vmull.s32 q5,d18,d3
 | |
| +	vmlal.s32 q5,d19,d2
 | |
| +	vmlal.s32 q5,d22,d1
 | |
| +	vmlal.s32 q5,d23,d0
 | |
| +	vmlal.s32 q5,d12,d8
 | |
| +	add r2,sp,#672
 | |
| +	vst1.8 {d16-d17},[r2,: 128]
 | |
| +	vmull.s32 q4,d18,d8
 | |
| +	vmlal.s32 q4,d26,d2
 | |
| +	vmlal.s32 q4,d19,d7
 | |
| +	vmlal.s32 q4,d27,d1
 | |
| +	vmlal.s32 q4,d22,d6
 | |
| +	vmlal.s32 q4,d28,d0
 | |
| +	vmull.s32 q8,d18,d7
 | |
| +	vmlal.s32 q8,d26,d1
 | |
| +	vmlal.s32 q8,d19,d6
 | |
| +	vmlal.s32 q8,d27,d0
 | |
| +	add r2,sp,#576
 | |
| +	vld1.8 {d20-d21},[r2,: 128]
 | |
| +	vmlal.s32 q7,d24,d21
 | |
| +	vmlal.s32 q7,d25,d20
 | |
| +	vmlal.s32 q4,d23,d21
 | |
| +	vmlal.s32 q4,d29,d20
 | |
| +	vmlal.s32 q8,d22,d21
 | |
| +	vmlal.s32 q8,d28,d20
 | |
| +	vmlal.s32 q5,d24,d20
 | |
| +	add r2,sp,#576
 | |
| +	vst1.8 {d14-d15},[r2,: 128]
 | |
| +	vmull.s32 q7,d18,d6
 | |
| +	vmlal.s32 q7,d26,d0
 | |
| +	add r2,sp,#656
 | |
| +	vld1.8 {d30-d31},[r2,: 128]
 | |
| +	vmlal.s32 q2,d30,d21
 | |
| +	vmlal.s32 q7,d19,d21
 | |
| +	vmlal.s32 q7,d27,d20
 | |
| +	add r2,sp,#624
 | |
| +	vld1.8 {d26-d27},[r2,: 128]
 | |
| +	vmlal.s32 q4,d25,d27
 | |
| +	vmlal.s32 q8,d29,d27
 | |
| +	vmlal.s32 q8,d25,d26
 | |
| +	vmlal.s32 q7,d28,d27
 | |
| +	vmlal.s32 q7,d29,d26
 | |
| +	add r2,sp,#608
 | |
| +	vld1.8 {d28-d29},[r2,: 128]
 | |
| +	vmlal.s32 q4,d24,d29
 | |
| +	vmlal.s32 q8,d23,d29
 | |
| +	vmlal.s32 q8,d24,d28
 | |
| +	vmlal.s32 q7,d22,d29
 | |
| +	vmlal.s32 q7,d23,d28
 | |
| +	add r2,sp,#608
 | |
| +	vst1.8 {d8-d9},[r2,: 128]
 | |
| +	add r2,sp,#560
 | |
| +	vld1.8 {d8-d9},[r2,: 128]
 | |
| +	vmlal.s32 q7,d24,d9
 | |
| +	vmlal.s32 q7,d25,d31
 | |
| +	vmull.s32 q1,d18,d2
 | |
| +	vmlal.s32 q1,d19,d1
 | |
| +	vmlal.s32 q1,d22,d0
 | |
| +	vmlal.s32 q1,d24,d27
 | |
| +	vmlal.s32 q1,d23,d20
 | |
| +	vmlal.s32 q1,d12,d7
 | |
| +	vmlal.s32 q1,d13,d6
 | |
| +	vmull.s32 q6,d18,d1
 | |
| +	vmlal.s32 q6,d19,d0
 | |
| +	vmlal.s32 q6,d23,d27
 | |
| +	vmlal.s32 q6,d22,d20
 | |
| +	vmlal.s32 q6,d24,d26
 | |
| +	vmull.s32 q0,d18,d0
 | |
| +	vmlal.s32 q0,d22,d27
 | |
| +	vmlal.s32 q0,d23,d26
 | |
| +	vmlal.s32 q0,d24,d31
 | |
| +	vmlal.s32 q0,d19,d20
 | |
| +	add r2,sp,#640
 | |
| +	vld1.8 {d18-d19},[r2,: 128]
 | |
| +	vmlal.s32 q2,d18,d7
 | |
| +	vmlal.s32 q2,d19,d6
 | |
| +	vmlal.s32 q5,d18,d6
 | |
| +	vmlal.s32 q5,d19,d21
 | |
| +	vmlal.s32 q1,d18,d21
 | |
| +	vmlal.s32 q1,d19,d29
 | |
| +	vmlal.s32 q0,d18,d28
 | |
| +	vmlal.s32 q0,d19,d9
 | |
| +	vmlal.s32 q6,d18,d29
 | |
| +	vmlal.s32 q6,d19,d28
 | |
| +	add r2,sp,#592
 | |
| +	vld1.8 {d18-d19},[r2,: 128]
 | |
| +	add r2,sp,#512
 | |
| +	vld1.8 {d22-d23},[r2,: 128]
 | |
| +	vmlal.s32 q5,d19,d7
 | |
| +	vmlal.s32 q0,d18,d21
 | |
| +	vmlal.s32 q0,d19,d29
 | |
| +	vmlal.s32 q6,d18,d6
 | |
| +	add r2,sp,#528
 | |
| +	vld1.8 {d6-d7},[r2,: 128]
 | |
| +	vmlal.s32 q6,d19,d21
 | |
| +	add r2,sp,#576
 | |
| +	vld1.8 {d18-d19},[r2,: 128]
 | |
| +	vmlal.s32 q0,d30,d8
 | |
| +	add r2,sp,#672
 | |
| +	vld1.8 {d20-d21},[r2,: 128]
 | |
| +	vmlal.s32 q5,d30,d29
 | |
| +	add r2,sp,#608
 | |
| +	vld1.8 {d24-d25},[r2,: 128]
 | |
| +	vmlal.s32 q1,d30,d28
 | |
| +	vadd.i64 q13,q0,q11
 | |
| +	vadd.i64 q14,q5,q11
 | |
| +	vmlal.s32 q6,d30,d9
 | |
| +	vshr.s64 q4,q13,#26
 | |
| +	vshr.s64 q13,q14,#26
 | |
| +	vadd.i64 q7,q7,q4
 | |
| +	vshl.i64 q4,q4,#26
 | |
| +	vadd.i64 q14,q7,q3
 | |
| +	vadd.i64 q9,q9,q13
 | |
| +	vshl.i64 q13,q13,#26
 | |
| +	vadd.i64 q15,q9,q3
 | |
| +	vsub.i64 q0,q0,q4
 | |
| +	vshr.s64 q4,q14,#25
 | |
| +	vsub.i64 q5,q5,q13
 | |
| +	vshr.s64 q13,q15,#25
 | |
| +	vadd.i64 q6,q6,q4
 | |
| +	vshl.i64 q4,q4,#25
 | |
| +	vadd.i64 q14,q6,q11
 | |
| +	vadd.i64 q2,q2,q13
 | |
| +	vsub.i64 q4,q7,q4
 | |
| +	vshr.s64 q7,q14,#26
 | |
| +	vshl.i64 q13,q13,#25
 | |
| +	vadd.i64 q14,q2,q11
 | |
| +	vadd.i64 q8,q8,q7
 | |
| +	vshl.i64 q7,q7,#26
 | |
| +	vadd.i64 q15,q8,q3
 | |
| +	vsub.i64 q9,q9,q13
 | |
| +	vshr.s64 q13,q14,#26
 | |
| +	vsub.i64 q6,q6,q7
 | |
| +	vshr.s64 q7,q15,#25
 | |
| +	vadd.i64 q10,q10,q13
 | |
| +	vshl.i64 q13,q13,#26
 | |
| +	vadd.i64 q14,q10,q3
 | |
| +	vadd.i64 q1,q1,q7
 | |
| +	add r2,r3,#240
 | |
| +	vshl.i64 q7,q7,#25
 | |
| +	add r4,r3,#144
 | |
| +	vadd.i64 q15,q1,q11
 | |
| +	add r2,r2,#8
 | |
| +	vsub.i64 q2,q2,q13
 | |
| +	add r4,r4,#8
 | |
| +	vshr.s64 q13,q14,#25
 | |
| +	vsub.i64 q7,q8,q7
 | |
| +	vshr.s64 q8,q15,#26
 | |
| +	vadd.i64 q14,q13,q13
 | |
| +	vadd.i64 q12,q12,q8
 | |
| +	vtrn.32 d12,d14
 | |
| +	vshl.i64 q8,q8,#26
 | |
| +	vtrn.32 d13,d15
 | |
| +	vadd.i64 q3,q12,q3
 | |
| +	vadd.i64 q0,q0,q14
 | |
| +	vst1.8 d12,[r2,: 64]!
 | |
| +	vshl.i64 q7,q13,#4
 | |
| +	vst1.8 d13,[r4,: 64]!
 | |
| +	vsub.i64 q1,q1,q8
 | |
| +	vshr.s64 q3,q3,#25
 | |
| +	vadd.i64 q0,q0,q7
 | |
| +	vadd.i64 q5,q5,q3
 | |
| +	vshl.i64 q3,q3,#25
 | |
| +	vadd.i64 q6,q5,q11
 | |
| +	vadd.i64 q0,q0,q13
 | |
| +	vshl.i64 q7,q13,#25
 | |
| +	vadd.i64 q8,q0,q11
 | |
| +	vsub.i64 q3,q12,q3
 | |
| +	vshr.s64 q6,q6,#26
 | |
| +	vsub.i64 q7,q10,q7
 | |
| +	vtrn.32 d2,d6
 | |
| +	vshr.s64 q8,q8,#26
 | |
| +	vtrn.32 d3,d7
 | |
| +	vadd.i64 q3,q9,q6
 | |
| +	vst1.8 d2,[r2,: 64]
 | |
| +	vshl.i64 q6,q6,#26
 | |
| +	vst1.8 d3,[r4,: 64]
 | |
| +	vadd.i64 q1,q4,q8
 | |
| +	vtrn.32 d4,d14
 | |
| +	vshl.i64 q4,q8,#26
 | |
| +	vtrn.32 d5,d15
 | |
| +	vsub.i64 q5,q5,q6
 | |
| +	add r2,r2,#16
 | |
| +	vsub.i64 q0,q0,q4
 | |
| +	vst1.8 d4,[r2,: 64]
 | |
| +	add r4,r4,#16
 | |
| +	vst1.8 d5,[r4,: 64]
 | |
| +	vtrn.32 d10,d6
 | |
| +	vtrn.32 d11,d7
 | |
| +	sub r2,r2,#8
 | |
| +	sub r4,r4,#8
 | |
| +	vtrn.32 d0,d2
 | |
| +	vtrn.32 d1,d3
 | |
| +	vst1.8 d10,[r2,: 64]
 | |
| +	vst1.8 d11,[r4,: 64]
 | |
| +	sub r2,r2,#24
 | |
| +	sub r4,r4,#24
 | |
| +	vst1.8 d0,[r2,: 64]
 | |
| +	vst1.8 d1,[r4,: 64]
 | |
| +	ldr r2,[sp,#488]
 | |
| +	ldr r4,[sp,#492]
 | |
| +	subs r5,r2,#1
 | |
| +	bge .Lmainloop
 | |
| +	add r1,r3,#144
 | |
| +	add r2,r3,#336
 | |
| +	vld1.8 {d0-d1},[r1,: 128]!
 | |
| +	vld1.8 {d2-d3},[r1,: 128]!
 | |
| +	vld1.8 {d4},[r1,: 64]
 | |
| +	vst1.8 {d0-d1},[r2,: 128]!
 | |
| +	vst1.8 {d2-d3},[r2,: 128]!
 | |
| +	vst1.8 d4,[r2,: 64]
 | |
| +	ldr r1,=0
 | |
| +	.Linvertloop:
 | |
| +	add r2,r3,#144
 | |
| +	ldr r4,=0
 | |
| +	ldr r5,=2
 | |
| +	cmp r1,#1
 | |
| +	ldreq r5,=1
 | |
| +	addeq r2,r3,#336
 | |
| +	addeq r4,r3,#48
 | |
| +	cmp r1,#2
 | |
| +	ldreq r5,=1
 | |
| +	addeq r2,r3,#48
 | |
| +	cmp r1,#3
 | |
| +	ldreq r5,=5
 | |
| +	addeq r4,r3,#336
 | |
| +	cmp r1,#4
 | |
| +	ldreq r5,=10
 | |
| +	cmp r1,#5
 | |
| +	ldreq r5,=20
 | |
| +	cmp r1,#6
 | |
| +	ldreq r5,=10
 | |
| +	addeq r2,r3,#336
 | |
| +	addeq r4,r3,#336
 | |
| +	cmp r1,#7
 | |
| +	ldreq r5,=50
 | |
| +	cmp r1,#8
 | |
| +	ldreq r5,=100
 | |
| +	cmp r1,#9
 | |
| +	ldreq r5,=50
 | |
| +	addeq r2,r3,#336
 | |
| +	cmp r1,#10
 | |
| +	ldreq r5,=5
 | |
| +	addeq r2,r3,#48
 | |
| +	cmp r1,#11
 | |
| +	ldreq r5,=0
 | |
| +	addeq r2,r3,#96
 | |
| +	add r6,r3,#144
 | |
| +	add r7,r3,#288
 | |
| +	vld1.8 {d0-d1},[r6,: 128]!
 | |
| +	vld1.8 {d2-d3},[r6,: 128]!
 | |
| +	vld1.8 {d4},[r6,: 64]
 | |
| +	vst1.8 {d0-d1},[r7,: 128]!
 | |
| +	vst1.8 {d2-d3},[r7,: 128]!
 | |
| +	vst1.8 d4,[r7,: 64]
 | |
| +	cmp r5,#0
 | |
| +	beq .Lskipsquaringloop
 | |
| +	.Lsquaringloop:
 | |
| +	add r6,r3,#288
 | |
| +	add r7,r3,#288
 | |
| +	add r8,r3,#288
 | |
| +	vmov.i32 q0,#19
 | |
| +	vmov.i32 q1,#0
 | |
| +	vmov.i32 q2,#1
 | |
| +	vzip.i32 q1,q2
 | |
| +	vld1.8 {d4-d5},[r7,: 128]!
 | |
| +	vld1.8 {d6-d7},[r7,: 128]!
 | |
| +	vld1.8 {d9},[r7,: 64]
 | |
| +	vld1.8 {d10-d11},[r6,: 128]!
 | |
| +	add r7,sp,#416
 | |
| +	vld1.8 {d12-d13},[r6,: 128]!
 | |
| +	vmul.i32 q7,q2,q0
 | |
| +	vld1.8 {d8},[r6,: 64]
 | |
| +	vext.32 d17,d11,d10,#1
 | |
| +	vmul.i32 q9,q3,q0
 | |
| +	vext.32 d16,d10,d8,#1
 | |
| +	vshl.u32 q10,q5,q1
 | |
| +	vext.32 d22,d14,d4,#1
 | |
| +	vext.32 d24,d18,d6,#1
 | |
| +	vshl.u32 q13,q6,q1
 | |
| +	vshl.u32 d28,d8,d2
 | |
| +	vrev64.i32 d22,d22
 | |
| +	vmul.i32 d1,d9,d1
 | |
| +	vrev64.i32 d24,d24
 | |
| +	vext.32 d29,d8,d13,#1
 | |
| +	vext.32 d0,d1,d9,#1
 | |
| +	vrev64.i32 d0,d0
 | |
| +	vext.32 d2,d9,d1,#1
 | |
| +	vext.32 d23,d15,d5,#1
 | |
| +	vmull.s32 q4,d20,d4
 | |
| +	vrev64.i32 d23,d23
 | |
| +	vmlal.s32 q4,d21,d1
 | |
| +	vrev64.i32 d2,d2
 | |
| +	vmlal.s32 q4,d26,d19
 | |
| +	vext.32 d3,d5,d15,#1
 | |
| +	vmlal.s32 q4,d27,d18
 | |
| +	vrev64.i32 d3,d3
 | |
| +	vmlal.s32 q4,d28,d15
 | |
| +	vext.32 d14,d12,d11,#1
 | |
| +	vmull.s32 q5,d16,d23
 | |
| +	vext.32 d15,d13,d12,#1
 | |
| +	vmlal.s32 q5,d17,d4
 | |
| +	vst1.8 d8,[r7,: 64]!
 | |
| +	vmlal.s32 q5,d14,d1
 | |
| +	vext.32 d12,d9,d8,#0
 | |
| +	vmlal.s32 q5,d15,d19
 | |
| +	vmov.i64 d13,#0
 | |
| +	vmlal.s32 q5,d29,d18
 | |
| +	vext.32 d25,d19,d7,#1
 | |
| +	vmlal.s32 q6,d20,d5
 | |
| +	vrev64.i32 d25,d25
 | |
| +	vmlal.s32 q6,d21,d4
 | |
| +	vst1.8 d11,[r7,: 64]!
 | |
| +	vmlal.s32 q6,d26,d1
 | |
| +	vext.32 d9,d10,d10,#0
 | |
| +	vmlal.s32 q6,d27,d19
 | |
| +	vmov.i64 d8,#0
 | |
| +	vmlal.s32 q6,d28,d18
 | |
| +	vmlal.s32 q4,d16,d24
 | |
| +	vmlal.s32 q4,d17,d5
 | |
| +	vmlal.s32 q4,d14,d4
 | |
| +	vst1.8 d12,[r7,: 64]!
 | |
| +	vmlal.s32 q4,d15,d1
 | |
| +	vext.32 d10,d13,d12,#0
 | |
| +	vmlal.s32 q4,d29,d19
 | |
| +	vmov.i64 d11,#0
 | |
| +	vmlal.s32 q5,d20,d6
 | |
| +	vmlal.s32 q5,d21,d5
 | |
| +	vmlal.s32 q5,d26,d4
 | |
| +	vext.32 d13,d8,d8,#0
 | |
| +	vmlal.s32 q5,d27,d1
 | |
| +	vmov.i64 d12,#0
 | |
| +	vmlal.s32 q5,d28,d19
 | |
| +	vst1.8 d9,[r7,: 64]!
 | |
| +	vmlal.s32 q6,d16,d25
 | |
| +	vmlal.s32 q6,d17,d6
 | |
| +	vst1.8 d10,[r7,: 64]
 | |
| +	vmlal.s32 q6,d14,d5
 | |
| +	vext.32 d8,d11,d10,#0
 | |
| +	vmlal.s32 q6,d15,d4
 | |
| +	vmov.i64 d9,#0
 | |
| +	vmlal.s32 q6,d29,d1
 | |
| +	vmlal.s32 q4,d20,d7
 | |
| +	vmlal.s32 q4,d21,d6
 | |
| +	vmlal.s32 q4,d26,d5
 | |
| +	vext.32 d11,d12,d12,#0
 | |
| +	vmlal.s32 q4,d27,d4
 | |
| +	vmov.i64 d10,#0
 | |
| +	vmlal.s32 q4,d28,d1
 | |
| +	vmlal.s32 q5,d16,d0
 | |
| +	sub r6,r7,#32
 | |
| +	vmlal.s32 q5,d17,d7
 | |
| +	vmlal.s32 q5,d14,d6
 | |
| +	vext.32 d30,d9,d8,#0
 | |
| +	vmlal.s32 q5,d15,d5
 | |
| +	vld1.8 {d31},[r6,: 64]!
 | |
| +	vmlal.s32 q5,d29,d4
 | |
| +	vmlal.s32 q15,d20,d0
 | |
| +	vext.32 d0,d6,d18,#1
 | |
| +	vmlal.s32 q15,d21,d25
 | |
| +	vrev64.i32 d0,d0
 | |
| +	vmlal.s32 q15,d26,d24
 | |
| +	vext.32 d1,d7,d19,#1
 | |
| +	vext.32 d7,d10,d10,#0
 | |
| +	vmlal.s32 q15,d27,d23
 | |
| +	vrev64.i32 d1,d1
 | |
| +	vld1.8 {d6},[r6,: 64]
 | |
| +	vmlal.s32 q15,d28,d22
 | |
| +	vmlal.s32 q3,d16,d4
 | |
| +	add r6,r6,#24
 | |
| +	vmlal.s32 q3,d17,d2
 | |
| +	vext.32 d4,d31,d30,#0
 | |
| +	vmov d17,d11
 | |
| +	vmlal.s32 q3,d14,d1
 | |
| +	vext.32 d11,d13,d13,#0
 | |
| +	vext.32 d13,d30,d30,#0
 | |
| +	vmlal.s32 q3,d15,d0
 | |
| +	vext.32 d1,d8,d8,#0
 | |
| +	vmlal.s32 q3,d29,d3
 | |
| +	vld1.8 {d5},[r6,: 64]
 | |
| +	sub r6,r6,#16
 | |
| +	vext.32 d10,d6,d6,#0
 | |
| +	vmov.i32 q1,#0xffffffff
 | |
| +	vshl.i64 q4,q1,#25
 | |
| +	add r7,sp,#512
 | |
| +	vld1.8 {d14-d15},[r7,: 128]
 | |
| +	vadd.i64 q9,q2,q7
 | |
| +	vshl.i64 q1,q1,#26
 | |
| +	vshr.s64 q10,q9,#26
 | |
| +	vld1.8 {d0},[r6,: 64]!
 | |
| +	vadd.i64 q5,q5,q10
 | |
| +	vand q9,q9,q1
 | |
| +	vld1.8 {d16},[r6,: 64]!
 | |
| +	add r6,sp,#528
 | |
| +	vld1.8 {d20-d21},[r6,: 128]
 | |
| +	vadd.i64 q11,q5,q10
 | |
| +	vsub.i64 q2,q2,q9
 | |
| +	vshr.s64 q9,q11,#25
 | |
| +	vext.32 d12,d5,d4,#0
 | |
| +	vand q11,q11,q4
 | |
| +	vadd.i64 q0,q0,q9
 | |
| +	vmov d19,d7
 | |
| +	vadd.i64 q3,q0,q7
 | |
| +	vsub.i64 q5,q5,q11
 | |
| +	vshr.s64 q11,q3,#26
 | |
| +	vext.32 d18,d11,d10,#0
 | |
| +	vand q3,q3,q1
 | |
| +	vadd.i64 q8,q8,q11
 | |
| +	vadd.i64 q11,q8,q10
 | |
| +	vsub.i64 q0,q0,q3
 | |
| +	vshr.s64 q3,q11,#25
 | |
| +	vand q11,q11,q4
 | |
| +	vadd.i64 q3,q6,q3
 | |
| +	vadd.i64 q6,q3,q7
 | |
| +	vsub.i64 q8,q8,q11
 | |
| +	vshr.s64 q11,q6,#26
 | |
| +	vand q6,q6,q1
 | |
| +	vadd.i64 q9,q9,q11
 | |
| +	vadd.i64 d25,d19,d21
 | |
| +	vsub.i64 q3,q3,q6
 | |
| +	vshr.s64 d23,d25,#25
 | |
| +	vand q4,q12,q4
 | |
| +	vadd.i64 d21,d23,d23
 | |
| +	vshl.i64 d25,d23,#4
 | |
| +	vadd.i64 d21,d21,d23
 | |
| +	vadd.i64 d25,d25,d21
 | |
| +	vadd.i64 d4,d4,d25
 | |
| +	vzip.i32 q0,q8
 | |
| +	vadd.i64 d12,d4,d14
 | |
| +	add r6,r8,#8
 | |
| +	vst1.8 d0,[r6,: 64]
 | |
| +	vsub.i64 d19,d19,d9
 | |
| +	add r6,r6,#16
 | |
| +	vst1.8 d16,[r6,: 64]
 | |
| +	vshr.s64 d22,d12,#26
 | |
| +	vand q0,q6,q1
 | |
| +	vadd.i64 d10,d10,d22
 | |
| +	vzip.i32 q3,q9
 | |
| +	vsub.i64 d4,d4,d0
 | |
| +	sub r6,r6,#8
 | |
| +	vst1.8 d6,[r6,: 64]
 | |
| +	add r6,r6,#16
 | |
| +	vst1.8 d18,[r6,: 64]
 | |
| +	vzip.i32 q2,q5
 | |
| +	sub r6,r6,#32
 | |
| +	vst1.8 d4,[r6,: 64]
 | |
| +	subs r5,r5,#1
 | |
| +	bhi .Lsquaringloop
 | |
| +	.Lskipsquaringloop:
 | |
| +	mov r2,r2
 | |
| +	add r5,r3,#288
 | |
| +	add r6,r3,#144
 | |
| +	vmov.i32 q0,#19
 | |
| +	vmov.i32 q1,#0
 | |
| +	vmov.i32 q2,#1
 | |
| +	vzip.i32 q1,q2
 | |
| +	vld1.8 {d4-d5},[r5,: 128]!
 | |
| +	vld1.8 {d6-d7},[r5,: 128]!
 | |
| +	vld1.8 {d9},[r5,: 64]
 | |
| +	vld1.8 {d10-d11},[r2,: 128]!
 | |
| +	add r5,sp,#416
 | |
| +	vld1.8 {d12-d13},[r2,: 128]!
 | |
| +	vmul.i32 q7,q2,q0
 | |
| +	vld1.8 {d8},[r2,: 64]
 | |
| +	vext.32 d17,d11,d10,#1
 | |
| +	vmul.i32 q9,q3,q0
 | |
| +	vext.32 d16,d10,d8,#1
 | |
| +	vshl.u32 q10,q5,q1
 | |
| +	vext.32 d22,d14,d4,#1
 | |
| +	vext.32 d24,d18,d6,#1
 | |
| +	vshl.u32 q13,q6,q1
 | |
| +	vshl.u32 d28,d8,d2
 | |
| +	vrev64.i32 d22,d22
 | |
| +	vmul.i32 d1,d9,d1
 | |
| +	vrev64.i32 d24,d24
 | |
| +	vext.32 d29,d8,d13,#1
 | |
| +	vext.32 d0,d1,d9,#1
 | |
| +	vrev64.i32 d0,d0
 | |
| +	vext.32 d2,d9,d1,#1
 | |
| +	vext.32 d23,d15,d5,#1
 | |
| +	vmull.s32 q4,d20,d4
 | |
| +	vrev64.i32 d23,d23
 | |
| +	vmlal.s32 q4,d21,d1
 | |
| +	vrev64.i32 d2,d2
 | |
| +	vmlal.s32 q4,d26,d19
 | |
| +	vext.32 d3,d5,d15,#1
 | |
| +	vmlal.s32 q4,d27,d18
 | |
| +	vrev64.i32 d3,d3
 | |
| +	vmlal.s32 q4,d28,d15
 | |
| +	vext.32 d14,d12,d11,#1
 | |
| +	vmull.s32 q5,d16,d23
 | |
| +	vext.32 d15,d13,d12,#1
 | |
| +	vmlal.s32 q5,d17,d4
 | |
| +	vst1.8 d8,[r5,: 64]!
 | |
| +	vmlal.s32 q5,d14,d1
 | |
| +	vext.32 d12,d9,d8,#0
 | |
| +	vmlal.s32 q5,d15,d19
 | |
| +	vmov.i64 d13,#0
 | |
| +	vmlal.s32 q5,d29,d18
 | |
| +	vext.32 d25,d19,d7,#1
 | |
| +	vmlal.s32 q6,d20,d5
 | |
| +	vrev64.i32 d25,d25
 | |
| +	vmlal.s32 q6,d21,d4
 | |
| +	vst1.8 d11,[r5,: 64]!
 | |
| +	vmlal.s32 q6,d26,d1
 | |
| +	vext.32 d9,d10,d10,#0
 | |
| +	vmlal.s32 q6,d27,d19
 | |
| +	vmov.i64 d8,#0
 | |
| +	vmlal.s32 q6,d28,d18
 | |
| +	vmlal.s32 q4,d16,d24
 | |
| +	vmlal.s32 q4,d17,d5
 | |
| +	vmlal.s32 q4,d14,d4
 | |
| +	vst1.8 d12,[r5,: 64]!
 | |
| +	vmlal.s32 q4,d15,d1
 | |
| +	vext.32 d10,d13,d12,#0
 | |
| +	vmlal.s32 q4,d29,d19
 | |
| +	vmov.i64 d11,#0
 | |
| +	vmlal.s32 q5,d20,d6
 | |
| +	vmlal.s32 q5,d21,d5
 | |
| +	vmlal.s32 q5,d26,d4
 | |
| +	vext.32 d13,d8,d8,#0
 | |
| +	vmlal.s32 q5,d27,d1
 | |
| +	vmov.i64 d12,#0
 | |
| +	vmlal.s32 q5,d28,d19
 | |
| +	vst1.8 d9,[r5,: 64]!
 | |
| +	vmlal.s32 q6,d16,d25
 | |
| +	vmlal.s32 q6,d17,d6
 | |
| +	vst1.8 d10,[r5,: 64]
 | |
| +	vmlal.s32 q6,d14,d5
 | |
| +	vext.32 d8,d11,d10,#0
 | |
| +	vmlal.s32 q6,d15,d4
 | |
| +	vmov.i64 d9,#0
 | |
| +	vmlal.s32 q6,d29,d1
 | |
| +	vmlal.s32 q4,d20,d7
 | |
| +	vmlal.s32 q4,d21,d6
 | |
| +	vmlal.s32 q4,d26,d5
 | |
| +	vext.32 d11,d12,d12,#0
 | |
| +	vmlal.s32 q4,d27,d4
 | |
| +	vmov.i64 d10,#0
 | |
| +	vmlal.s32 q4,d28,d1
 | |
| +	vmlal.s32 q5,d16,d0
 | |
| +	sub r2,r5,#32
 | |
| +	vmlal.s32 q5,d17,d7
 | |
| +	vmlal.s32 q5,d14,d6
 | |
| +	vext.32 d30,d9,d8,#0
 | |
| +	vmlal.s32 q5,d15,d5
 | |
| +	vld1.8 {d31},[r2,: 64]!
 | |
| +	vmlal.s32 q5,d29,d4
 | |
| +	vmlal.s32 q15,d20,d0
 | |
| +	vext.32 d0,d6,d18,#1
 | |
| +	vmlal.s32 q15,d21,d25
 | |
| +	vrev64.i32 d0,d0
 | |
| +	vmlal.s32 q15,d26,d24
 | |
| +	vext.32 d1,d7,d19,#1
 | |
| +	vext.32 d7,d10,d10,#0
 | |
| +	vmlal.s32 q15,d27,d23
 | |
| +	vrev64.i32 d1,d1
 | |
| +	vld1.8 {d6},[r2,: 64]
 | |
| +	vmlal.s32 q15,d28,d22
 | |
| +	vmlal.s32 q3,d16,d4
 | |
| +	add r2,r2,#24
 | |
| +	vmlal.s32 q3,d17,d2
 | |
| +	vext.32 d4,d31,d30,#0
 | |
| +	vmov d17,d11
 | |
| +	vmlal.s32 q3,d14,d1
 | |
| +	vext.32 d11,d13,d13,#0
 | |
| +	vext.32 d13,d30,d30,#0
 | |
| +	vmlal.s32 q3,d15,d0
 | |
| +	vext.32 d1,d8,d8,#0
 | |
| +	vmlal.s32 q3,d29,d3
 | |
| +	vld1.8 {d5},[r2,: 64]
 | |
| +	sub r2,r2,#16
 | |
| +	vext.32 d10,d6,d6,#0
 | |
| +	vmov.i32 q1,#0xffffffff
 | |
| +	vshl.i64 q4,q1,#25
 | |
| +	add r5,sp,#512
 | |
| +	vld1.8 {d14-d15},[r5,: 128]
 | |
| +	vadd.i64 q9,q2,q7
 | |
| +	vshl.i64 q1,q1,#26
 | |
| +	vshr.s64 q10,q9,#26
 | |
| +	vld1.8 {d0},[r2,: 64]!
 | |
| +	vadd.i64 q5,q5,q10
 | |
| +	vand q9,q9,q1
 | |
| +	vld1.8 {d16},[r2,: 64]!
 | |
| +	add r2,sp,#528
 | |
| +	vld1.8 {d20-d21},[r2,: 128]
 | |
| +	vadd.i64 q11,q5,q10
 | |
| +	vsub.i64 q2,q2,q9
 | |
| +	vshr.s64 q9,q11,#25
 | |
| +	vext.32 d12,d5,d4,#0
 | |
| +	vand q11,q11,q4
 | |
| +	vadd.i64 q0,q0,q9
 | |
| +	vmov d19,d7
 | |
| +	vadd.i64 q3,q0,q7
 | |
| +	vsub.i64 q5,q5,q11
 | |
| +	vshr.s64 q11,q3,#26
 | |
| +	vext.32 d18,d11,d10,#0
 | |
| +	vand q3,q3,q1
 | |
| +	vadd.i64 q8,q8,q11
 | |
| +	vadd.i64 q11,q8,q10
 | |
| +	vsub.i64 q0,q0,q3
 | |
| +	vshr.s64 q3,q11,#25
 | |
| +	vand q11,q11,q4
 | |
| +	vadd.i64 q3,q6,q3
 | |
| +	vadd.i64 q6,q3,q7
 | |
| +	vsub.i64 q8,q8,q11
 | |
| +	vshr.s64 q11,q6,#26
 | |
| +	vand q6,q6,q1
 | |
| +	vadd.i64 q9,q9,q11
 | |
| +	vadd.i64 d25,d19,d21
 | |
| +	vsub.i64 q3,q3,q6
 | |
| +	vshr.s64 d23,d25,#25
 | |
| +	vand q4,q12,q4
 | |
| +	vadd.i64 d21,d23,d23
 | |
| +	vshl.i64 d25,d23,#4
 | |
| +	vadd.i64 d21,d21,d23
 | |
| +	vadd.i64 d25,d25,d21
 | |
| +	vadd.i64 d4,d4,d25
 | |
| +	vzip.i32 q0,q8
 | |
| +	vadd.i64 d12,d4,d14
 | |
| +	add r2,r6,#8
 | |
| +	vst1.8 d0,[r2,: 64]
 | |
| +	vsub.i64 d19,d19,d9
 | |
| +	add r2,r2,#16
 | |
| +	vst1.8 d16,[r2,: 64]
 | |
| +	vshr.s64 d22,d12,#26
 | |
| +	vand q0,q6,q1
 | |
| +	vadd.i64 d10,d10,d22
 | |
| +	vzip.i32 q3,q9
 | |
| +	vsub.i64 d4,d4,d0
 | |
| +	sub r2,r2,#8
 | |
| +	vst1.8 d6,[r2,: 64]
 | |
| +	add r2,r2,#16
 | |
| +	vst1.8 d18,[r2,: 64]
 | |
| +	vzip.i32 q2,q5
 | |
| +	sub r2,r2,#32
 | |
| +	vst1.8 d4,[r2,: 64]
 | |
| +	cmp r4,#0
 | |
| +	beq .Lskippostcopy
 | |
| +	add r2,r3,#144
 | |
| +	mov r4,r4
 | |
| +	vld1.8 {d0-d1},[r2,: 128]!
 | |
| +	vld1.8 {d2-d3},[r2,: 128]!
 | |
| +	vld1.8 {d4},[r2,: 64]
 | |
| +	vst1.8 {d0-d1},[r4,: 128]!
 | |
| +	vst1.8 {d2-d3},[r4,: 128]!
 | |
| +	vst1.8 d4,[r4,: 64]
 | |
| +	.Lskippostcopy:
 | |
| +	cmp r1,#1
 | |
| +	bne .Lskipfinalcopy
 | |
| +	add r2,r3,#288
 | |
| +	add r4,r3,#144
 | |
| +	vld1.8 {d0-d1},[r2,: 128]!
 | |
| +	vld1.8 {d2-d3},[r2,: 128]!
 | |
| +	vld1.8 {d4},[r2,: 64]
 | |
| +	vst1.8 {d0-d1},[r4,: 128]!
 | |
| +	vst1.8 {d2-d3},[r4,: 128]!
 | |
| +	vst1.8 d4,[r4,: 64]
 | |
| +	.Lskipfinalcopy:
 | |
| +	add r1,r1,#1
 | |
| +	cmp r1,#12
 | |
| +	blo .Linvertloop
 | |
| +	add r1,r3,#144
 | |
| +	ldr r2,[r1],#4
 | |
| +	ldr r3,[r1],#4
 | |
| +	ldr r4,[r1],#4
 | |
| +	ldr r5,[r1],#4
 | |
| +	ldr r6,[r1],#4
 | |
| +	ldr r7,[r1],#4
 | |
| +	ldr r8,[r1],#4
 | |
| +	ldr r9,[r1],#4
 | |
| +	ldr r10,[r1],#4
 | |
| +	ldr r1,[r1]
 | |
| +	add r11,r1,r1,LSL #4
 | |
| +	add r11,r11,r1,LSL #1
 | |
| +	add r11,r11,#16777216
 | |
| +	mov r11,r11,ASR #25
 | |
| +	add r11,r11,r2
 | |
| +	mov r11,r11,ASR #26
 | |
| +	add r11,r11,r3
 | |
| +	mov r11,r11,ASR #25
 | |
| +	add r11,r11,r4
 | |
| +	mov r11,r11,ASR #26
 | |
| +	add r11,r11,r5
 | |
| +	mov r11,r11,ASR #25
 | |
| +	add r11,r11,r6
 | |
| +	mov r11,r11,ASR #26
 | |
| +	add r11,r11,r7
 | |
| +	mov r11,r11,ASR #25
 | |
| +	add r11,r11,r8
 | |
| +	mov r11,r11,ASR #26
 | |
| +	add r11,r11,r9
 | |
| +	mov r11,r11,ASR #25
 | |
| +	add r11,r11,r10
 | |
| +	mov r11,r11,ASR #26
 | |
| +	add r11,r11,r1
 | |
| +	mov r11,r11,ASR #25
 | |
| +	add r2,r2,r11
 | |
| +	add r2,r2,r11,LSL #1
 | |
| +	add r2,r2,r11,LSL #4
 | |
| +	mov r11,r2,ASR #26
 | |
| +	add r3,r3,r11
 | |
| +	sub r2,r2,r11,LSL #26
 | |
| +	mov r11,r3,ASR #25
 | |
| +	add r4,r4,r11
 | |
| +	sub r3,r3,r11,LSL #25
 | |
| +	mov r11,r4,ASR #26
 | |
| +	add r5,r5,r11
 | |
| +	sub r4,r4,r11,LSL #26
 | |
| +	mov r11,r5,ASR #25
 | |
| +	add r6,r6,r11
 | |
| +	sub r5,r5,r11,LSL #25
 | |
| +	mov r11,r6,ASR #26
 | |
| +	add r7,r7,r11
 | |
| +	sub r6,r6,r11,LSL #26
 | |
| +	mov r11,r7,ASR #25
 | |
| +	add r8,r8,r11
 | |
| +	sub r7,r7,r11,LSL #25
 | |
| +	mov r11,r8,ASR #26
 | |
| +	add r9,r9,r11
 | |
| +	sub r8,r8,r11,LSL #26
 | |
| +	mov r11,r9,ASR #25
 | |
| +	add r10,r10,r11
 | |
| +	sub r9,r9,r11,LSL #25
 | |
| +	mov r11,r10,ASR #26
 | |
| +	add r1,r1,r11
 | |
| +	sub r10,r10,r11,LSL #26
 | |
| +	mov r11,r1,ASR #25
 | |
| +	sub r1,r1,r11,LSL #25
 | |
| +	add r2,r2,r3,LSL #26
 | |
| +	mov r3,r3,LSR #6
 | |
| +	add r3,r3,r4,LSL #19
 | |
| +	mov r4,r4,LSR #13
 | |
| +	add r4,r4,r5,LSL #13
 | |
| +	mov r5,r5,LSR #19
 | |
| +	add r5,r5,r6,LSL #6
 | |
| +	add r6,r7,r8,LSL #25
 | |
| +	mov r7,r8,LSR #7
 | |
| +	add r7,r7,r9,LSL #19
 | |
| +	mov r8,r9,LSR #13
 | |
| +	add r8,r8,r10,LSL #12
 | |
| +	mov r9,r10,LSR #20
 | |
| +	add r1,r9,r1,LSL #6
 | |
| +	str r2,[r0],#4
 | |
| +	str r3,[r0],#4
 | |
| +	str r4,[r0],#4
 | |
| +	str r5,[r0],#4
 | |
| +	str r6,[r0],#4
 | |
| +	str r7,[r0],#4
 | |
| +	str r8,[r0],#4
 | |
| +	str r1,[r0]
 | |
| +	ldrd r4,[sp,#0]
 | |
| +	ldrd r6,[sp,#8]
 | |
| +	ldrd r8,[sp,#16]
 | |
| +	ldrd r10,[sp,#24]
 | |
| +	ldr r12,[sp,#480]
 | |
| +	ldr r14,[sp,#484]
 | |
| +	ldr r0,=0
 | |
| +	mov sp,r12
 | |
| +	vpop {q4,q5,q6,q7}
 | |
| +	bx lr
 | |
| +ENDPROC(curve25519_asm_neon)
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/crypto/poly1305-avx2-x86_64.S	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,387 @@
 | |
| +/*
 | |
| + * Poly1305 authenticator algorithm, RFC7539, x64 AVX2 functions
 | |
| + *
 | |
| + * Copyright (C) 2015 Martin Willi
 | |
| + *
 | |
| + * This program is free software; you can redistribute it and/or modify
 | |
| + * it under the terms of the GNU General Public License as published by
 | |
| + * the Free Software Foundation; either version 2 of the License, or
 | |
| + * (at your option) any later version.
 | |
| + */
 | |
| +
 | |
| +#include <linux/linkage.h>
 | |
| +
 | |
| +.section .rodata.cst32.ANMASK, "aM", @progbits, 32
 | |
| +.align 32
 | |
| +ANMASK:	.octa 0x0000000003ffffff0000000003ffffff
 | |
| +	.octa 0x0000000003ffffff0000000003ffffff
 | |
| +.section .rodata.cst32.ORMASK, "aM", @progbits, 32
 | |
| +.align 32
 | |
| +ORMASK:	.octa 0x00000000010000000000000001000000
 | |
| +	.octa 0x00000000010000000000000001000000
 | |
| +
 | |
| +.text
 | |
| +
 | |
| +#define h0 0x00(%rdi)
 | |
| +#define h1 0x04(%rdi)
 | |
| +#define h2 0x08(%rdi)
 | |
| +#define h3 0x0c(%rdi)
 | |
| +#define h4 0x10(%rdi)
 | |
| +#define r0 0x00(%rdx)
 | |
| +#define r1 0x04(%rdx)
 | |
| +#define r2 0x08(%rdx)
 | |
| +#define r3 0x0c(%rdx)
 | |
| +#define r4 0x10(%rdx)
 | |
| +#define u0 0x00(%r8)
 | |
| +#define u1 0x04(%r8)
 | |
| +#define u2 0x08(%r8)
 | |
| +#define u3 0x0c(%r8)
 | |
| +#define u4 0x10(%r8)
 | |
| +#define w0 0x14(%r8)
 | |
| +#define w1 0x18(%r8)
 | |
| +#define w2 0x1c(%r8)
 | |
| +#define w3 0x20(%r8)
 | |
| +#define w4 0x24(%r8)
 | |
| +#define y0 0x28(%r8)
 | |
| +#define y1 0x2c(%r8)
 | |
| +#define y2 0x30(%r8)
 | |
| +#define y3 0x34(%r8)
 | |
| +#define y4 0x38(%r8)
 | |
| +#define m %rsi
 | |
| +#define hc0 %ymm0
 | |
| +#define hc1 %ymm1
 | |
| +#define hc2 %ymm2
 | |
| +#define hc3 %ymm3
 | |
| +#define hc4 %ymm4
 | |
| +#define hc0x %xmm0
 | |
| +#define hc1x %xmm1
 | |
| +#define hc2x %xmm2
 | |
| +#define hc3x %xmm3
 | |
| +#define hc4x %xmm4
 | |
| +#define t1 %ymm5
 | |
| +#define t2 %ymm6
 | |
| +#define t1x %xmm5
 | |
| +#define t2x %xmm6
 | |
| +#define ruwy0 %ymm7
 | |
| +#define ruwy1 %ymm8
 | |
| +#define ruwy2 %ymm9
 | |
| +#define ruwy3 %ymm10
 | |
| +#define ruwy4 %ymm11
 | |
| +#define ruwy0x %xmm7
 | |
| +#define ruwy1x %xmm8
 | |
| +#define ruwy2x %xmm9
 | |
| +#define ruwy3x %xmm10
 | |
| +#define ruwy4x %xmm11
 | |
| +#define svxz1 %ymm12
 | |
| +#define svxz2 %ymm13
 | |
| +#define svxz3 %ymm14
 | |
| +#define svxz4 %ymm15
 | |
| +#define d0 %r9
 | |
| +#define d1 %r10
 | |
| +#define d2 %r11
 | |
| +#define d3 %r12
 | |
| +#define d4 %r13
 | |
| +
 | |
| +ENTRY(poly1305_asm_4block_avx2)
 | |
| +	# %rdi: Accumulator h[5]
 | |
| +	# %rsi: 64 byte input block m
 | |
| +	# %rdx: Poly1305 key r[5]
 | |
| +	# %rcx: Quadblock count
 | |
| +	# %r8:  Poly1305 derived key r^2 u[5], r^3 w[5], r^4 y[5],
 | |
| +
 | |
| +	# This four-block variant uses loop unrolled block processing. It
 | |
| +	# requires 4 Poly1305 keys: r, r^2, r^3 and r^4:
 | |
| +	# h = (h + m) * r  =>  h = (h + m1) * r^4 + m2 * r^3 + m3 * r^2 + m4 * r
 | |
| +
 | |
| +	vzeroupper
 | |
| +	push		%rbx
 | |
| +	push		%r12
 | |
| +	push		%r13
 | |
| +
 | |
| +	# combine r0,u0,w0,y0
 | |
| +	vmovd		y0,ruwy0x
 | |
| +	vmovd		w0,t1x
 | |
| +	vpunpcklqdq	t1,ruwy0,ruwy0
 | |
| +	vmovd		u0,t1x
 | |
| +	vmovd		r0,t2x
 | |
| +	vpunpcklqdq	t2,t1,t1
 | |
| +	vperm2i128	$0x20,t1,ruwy0,ruwy0
 | |
| +
 | |
| +	# combine r1,u1,w1,y1 and s1=r1*5,v1=u1*5,x1=w1*5,z1=y1*5
 | |
| +	vmovd		y1,ruwy1x
 | |
| +	vmovd		w1,t1x
 | |
| +	vpunpcklqdq	t1,ruwy1,ruwy1
 | |
| +	vmovd		u1,t1x
 | |
| +	vmovd		r1,t2x
 | |
| +	vpunpcklqdq	t2,t1,t1
 | |
| +	vperm2i128	$0x20,t1,ruwy1,ruwy1
 | |
| +	vpslld		$2,ruwy1,svxz1
 | |
| +	vpaddd		ruwy1,svxz1,svxz1
 | |
| +
 | |
| +	# combine r2,u2,w2,y2 and s2=r2*5,v2=u2*5,x2=w2*5,z2=y2*5
 | |
| +	vmovd		y2,ruwy2x
 | |
| +	vmovd		w2,t1x
 | |
| +	vpunpcklqdq	t1,ruwy2,ruwy2
 | |
| +	vmovd		u2,t1x
 | |
| +	vmovd		r2,t2x
 | |
| +	vpunpcklqdq	t2,t1,t1
 | |
| +	vperm2i128	$0x20,t1,ruwy2,ruwy2
 | |
| +	vpslld		$2,ruwy2,svxz2
 | |
| +	vpaddd		ruwy2,svxz2,svxz2
 | |
| +
 | |
| +	# combine r3,u3,w3,y3 and s3=r3*5,v3=u3*5,x3=w3*5,z3=y3*5
 | |
| +	vmovd		y3,ruwy3x
 | |
| +	vmovd		w3,t1x
 | |
| +	vpunpcklqdq	t1,ruwy3,ruwy3
 | |
| +	vmovd		u3,t1x
 | |
| +	vmovd		r3,t2x
 | |
| +	vpunpcklqdq	t2,t1,t1
 | |
| +	vperm2i128	$0x20,t1,ruwy3,ruwy3
 | |
| +	vpslld		$2,ruwy3,svxz3
 | |
| +	vpaddd		ruwy3,svxz3,svxz3
 | |
| +
 | |
| +	# combine r4,u4,w4,y4 and s4=r4*5,v4=u4*5,x4=w4*5,z4=y4*5
 | |
| +	vmovd		y4,ruwy4x
 | |
| +	vmovd		w4,t1x
 | |
| +	vpunpcklqdq	t1,ruwy4,ruwy4
 | |
| +	vmovd		u4,t1x
 | |
| +	vmovd		r4,t2x
 | |
| +	vpunpcklqdq	t2,t1,t1
 | |
| +	vperm2i128	$0x20,t1,ruwy4,ruwy4
 | |
| +	vpslld		$2,ruwy4,svxz4
 | |
| +	vpaddd		ruwy4,svxz4,svxz4
 | |
| +
 | |
| +.Ldoblock4:
 | |
| +	# hc0 = [m[48-51] & 0x3ffffff, m[32-35] & 0x3ffffff,
 | |
| +	#	 m[16-19] & 0x3ffffff, m[ 0- 3] & 0x3ffffff + h0]
 | |
| +	vmovd		0x00(m),hc0x
 | |
| +	vmovd		0x10(m),t1x
 | |
| +	vpunpcklqdq	t1,hc0,hc0
 | |
| +	vmovd		0x20(m),t1x
 | |
| +	vmovd		0x30(m),t2x
 | |
| +	vpunpcklqdq	t2,t1,t1
 | |
| +	vperm2i128	$0x20,t1,hc0,hc0
 | |
| +	vpand		ANMASK(%rip),hc0,hc0
 | |
| +	vmovd		h0,t1x
 | |
| +	vpaddd		t1,hc0,hc0
 | |
| +	# hc1 = [(m[51-54] >> 2) & 0x3ffffff, (m[35-38] >> 2) & 0x3ffffff,
 | |
| +	#	 (m[19-22] >> 2) & 0x3ffffff, (m[ 3- 6] >> 2) & 0x3ffffff + h1]
 | |
| +	vmovd		0x03(m),hc1x
 | |
| +	vmovd		0x13(m),t1x
 | |
| +	vpunpcklqdq	t1,hc1,hc1
 | |
| +	vmovd		0x23(m),t1x
 | |
| +	vmovd		0x33(m),t2x
 | |
| +	vpunpcklqdq	t2,t1,t1
 | |
| +	vperm2i128	$0x20,t1,hc1,hc1
 | |
| +	vpsrld		$2,hc1,hc1
 | |
| +	vpand		ANMASK(%rip),hc1,hc1
 | |
| +	vmovd		h1,t1x
 | |
| +	vpaddd		t1,hc1,hc1
 | |
| +	# hc2 = [(m[54-57] >> 4) & 0x3ffffff, (m[38-41] >> 4) & 0x3ffffff,
 | |
| +	#	 (m[22-25] >> 4) & 0x3ffffff, (m[ 6- 9] >> 4) & 0x3ffffff + h2]
 | |
| +	vmovd		0x06(m),hc2x
 | |
| +	vmovd		0x16(m),t1x
 | |
| +	vpunpcklqdq	t1,hc2,hc2
 | |
| +	vmovd		0x26(m),t1x
 | |
| +	vmovd		0x36(m),t2x
 | |
| +	vpunpcklqdq	t2,t1,t1
 | |
| +	vperm2i128	$0x20,t1,hc2,hc2
 | |
| +	vpsrld		$4,hc2,hc2
 | |
| +	vpand		ANMASK(%rip),hc2,hc2
 | |
| +	vmovd		h2,t1x
 | |
| +	vpaddd		t1,hc2,hc2
 | |
| +	# hc3 = [(m[57-60] >> 6) & 0x3ffffff, (m[41-44] >> 6) & 0x3ffffff,
 | |
| +	#	 (m[25-28] >> 6) & 0x3ffffff, (m[ 9-12] >> 6) & 0x3ffffff + h3]
 | |
| +	vmovd		0x09(m),hc3x
 | |
| +	vmovd		0x19(m),t1x
 | |
| +	vpunpcklqdq	t1,hc3,hc3
 | |
| +	vmovd		0x29(m),t1x
 | |
| +	vmovd		0x39(m),t2x
 | |
| +	vpunpcklqdq	t2,t1,t1
 | |
| +	vperm2i128	$0x20,t1,hc3,hc3
 | |
| +	vpsrld		$6,hc3,hc3
 | |
| +	vpand		ANMASK(%rip),hc3,hc3
 | |
| +	vmovd		h3,t1x
 | |
| +	vpaddd		t1,hc3,hc3
 | |
| +	# hc4 = [(m[60-63] >> 8) | (1<<24), (m[44-47] >> 8) | (1<<24),
 | |
| +	#	 (m[28-31] >> 8) | (1<<24), (m[12-15] >> 8) | (1<<24) + h4]
 | |
| +	vmovd		0x0c(m),hc4x
 | |
| +	vmovd		0x1c(m),t1x
 | |
| +	vpunpcklqdq	t1,hc4,hc4
 | |
| +	vmovd		0x2c(m),t1x
 | |
| +	vmovd		0x3c(m),t2x
 | |
| +	vpunpcklqdq	t2,t1,t1
 | |
| +	vperm2i128	$0x20,t1,hc4,hc4
 | |
| +	vpsrld		$8,hc4,hc4
 | |
| +	vpor		ORMASK(%rip),hc4,hc4
 | |
| +	vmovd		h4,t1x
 | |
| +	vpaddd		t1,hc4,hc4
 | |
| +
 | |
| +	# t1 = [ hc0[3] * r0, hc0[2] * u0, hc0[1] * w0, hc0[0] * y0 ]
 | |
| +	vpmuludq	hc0,ruwy0,t1
 | |
| +	# t1 += [ hc1[3] * s4, hc1[2] * v4, hc1[1] * x4, hc1[0] * z4 ]
 | |
| +	vpmuludq	hc1,svxz4,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	# t1 += [ hc2[3] * s3, hc2[2] * v3, hc2[1] * x3, hc2[0] * z3 ]
 | |
| +	vpmuludq	hc2,svxz3,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	# t1 += [ hc3[3] * s2, hc3[2] * v2, hc3[1] * x2, hc3[0] * z2 ]
 | |
| +	vpmuludq	hc3,svxz2,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	# t1 += [ hc4[3] * s1, hc4[2] * v1, hc4[1] * x1, hc4[0] * z1 ]
 | |
| +	vpmuludq	hc4,svxz1,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	# d0 = t1[0] + t1[1] + t[2] + t[3]
 | |
| +	vpermq		$0xee,t1,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	vpsrldq		$8,t1,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	vmovq		t1x,d0
 | |
| +
 | |
| +	# t1 = [ hc0[3] * r1, hc0[2] * u1,hc0[1] * w1, hc0[0] * y1 ]
 | |
| +	vpmuludq	hc0,ruwy1,t1
 | |
| +	# t1 += [ hc1[3] * r0, hc1[2] * u0, hc1[1] * w0, hc1[0] * y0 ]
 | |
| +	vpmuludq	hc1,ruwy0,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	# t1 += [ hc2[3] * s4, hc2[2] * v4, hc2[1] * x4, hc2[0] * z4 ]
 | |
| +	vpmuludq	hc2,svxz4,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	# t1 += [ hc3[3] * s3, hc3[2] * v3, hc3[1] * x3, hc3[0] * z3 ]
 | |
| +	vpmuludq	hc3,svxz3,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	# t1 += [ hc4[3] * s2, hc4[2] * v2, hc4[1] * x2, hc4[0] * z2 ]
 | |
| +	vpmuludq	hc4,svxz2,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	# d1 = t1[0] + t1[1] + t1[3] + t1[4]
 | |
| +	vpermq		$0xee,t1,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	vpsrldq		$8,t1,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	vmovq		t1x,d1
 | |
| +
 | |
| +	# t1 = [ hc0[3] * r2, hc0[2] * u2, hc0[1] * w2, hc0[0] * y2 ]
 | |
| +	vpmuludq	hc0,ruwy2,t1
 | |
| +	# t1 += [ hc1[3] * r1, hc1[2] * u1, hc1[1] * w1, hc1[0] * y1 ]
 | |
| +	vpmuludq	hc1,ruwy1,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	# t1 += [ hc2[3] * r0, hc2[2] * u0, hc2[1] * w0, hc2[0] * y0 ]
 | |
| +	vpmuludq	hc2,ruwy0,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	# t1 += [ hc3[3] * s4, hc3[2] * v4, hc3[1] * x4, hc3[0] * z4 ]
 | |
| +	vpmuludq	hc3,svxz4,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	# t1 += [ hc4[3] * s3, hc4[2] * v3, hc4[1] * x3, hc4[0] * z3 ]
 | |
| +	vpmuludq	hc4,svxz3,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	# d2 = t1[0] + t1[1] + t1[2] + t1[3]
 | |
| +	vpermq		$0xee,t1,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	vpsrldq		$8,t1,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	vmovq		t1x,d2
 | |
| +
 | |
| +	# t1 = [ hc0[3] * r3, hc0[2] * u3, hc0[1] * w3, hc0[0] * y3 ]
 | |
| +	vpmuludq	hc0,ruwy3,t1
 | |
| +	# t1 += [ hc1[3] * r2, hc1[2] * u2, hc1[1] * w2, hc1[0] * y2 ]
 | |
| +	vpmuludq	hc1,ruwy2,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	# t1 += [ hc2[3] * r1, hc2[2] * u1, hc2[1] * w1, hc2[0] * y1 ]
 | |
| +	vpmuludq	hc2,ruwy1,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	# t1 += [ hc3[3] * r0, hc3[2] * u0, hc3[1] * w0, hc3[0] * y0 ]
 | |
| +	vpmuludq	hc3,ruwy0,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	# t1 += [ hc4[3] * s4, hc4[2] * v4, hc4[1] * x4, hc4[0] * z4 ]
 | |
| +	vpmuludq	hc4,svxz4,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	# d3 = t1[0] + t1[1] + t1[2] + t1[3]
 | |
| +	vpermq		$0xee,t1,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	vpsrldq		$8,t1,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	vmovq		t1x,d3
 | |
| +
 | |
| +	# t1 = [ hc0[3] * r4, hc0[2] * u4, hc0[1] * w4, hc0[0] * y4 ]
 | |
| +	vpmuludq	hc0,ruwy4,t1
 | |
| +	# t1 += [ hc1[3] * r3, hc1[2] * u3, hc1[1] * w3, hc1[0] * y3 ]
 | |
| +	vpmuludq	hc1,ruwy3,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	# t1 += [ hc2[3] * r2, hc2[2] * u2, hc2[1] * w2, hc2[0] * y2 ]
 | |
| +	vpmuludq	hc2,ruwy2,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	# t1 += [ hc3[3] * r1, hc3[2] * u1, hc3[1] * w1, hc3[0] * y1 ]
 | |
| +	vpmuludq	hc3,ruwy1,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	# t1 += [ hc4[3] * r0, hc4[2] * u0, hc4[1] * w0, hc4[0] * y0 ]
 | |
| +	vpmuludq	hc4,ruwy0,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	# d4 = t1[0] + t1[1] + t1[2] + t1[3]
 | |
| +	vpermq		$0xee,t1,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	vpsrldq		$8,t1,t2
 | |
| +	vpaddq		t2,t1,t1
 | |
| +	vmovq		t1x,d4
 | |
| +
 | |
| +	# d1 += d0 >> 26
 | |
| +	mov		d0,%rax
 | |
| +	shr		$26,%rax
 | |
| +	add		%rax,d1
 | |
| +	# h0 = d0 & 0x3ffffff
 | |
| +	mov		d0,%rbx
 | |
| +	and		$0x3ffffff,%ebx
 | |
| +
 | |
| +	# d2 += d1 >> 26
 | |
| +	mov		d1,%rax
 | |
| +	shr		$26,%rax
 | |
| +	add		%rax,d2
 | |
| +	# h1 = d1 & 0x3ffffff
 | |
| +	mov		d1,%rax
 | |
| +	and		$0x3ffffff,%eax
 | |
| +	mov		%eax,h1
 | |
| +
 | |
| +	# d3 += d2 >> 26
 | |
| +	mov		d2,%rax
 | |
| +	shr		$26,%rax
 | |
| +	add		%rax,d3
 | |
| +	# h2 = d2 & 0x3ffffff
 | |
| +	mov		d2,%rax
 | |
| +	and		$0x3ffffff,%eax
 | |
| +	mov		%eax,h2
 | |
| +
 | |
| +	# d4 += d3 >> 26
 | |
| +	mov		d3,%rax
 | |
| +	shr		$26,%rax
 | |
| +	add		%rax,d4
 | |
| +	# h3 = d3 & 0x3ffffff
 | |
| +	mov		d3,%rax
 | |
| +	and		$0x3ffffff,%eax
 | |
| +	mov		%eax,h3
 | |
| +
 | |
| +	# h0 += (d4 >> 26) * 5
 | |
| +	mov		d4,%rax
 | |
| +	shr		$26,%rax
 | |
| +	lea		(%eax,%eax,4),%eax
 | |
| +	add		%eax,%ebx
 | |
| +	# h4 = d4 & 0x3ffffff
 | |
| +	mov		d4,%rax
 | |
| +	and		$0x3ffffff,%eax
 | |
| +	mov		%eax,h4
 | |
| +
 | |
| +	# h1 += h0 >> 26
 | |
| +	mov		%ebx,%eax
 | |
| +	shr		$26,%eax
 | |
| +	add		%eax,h1
 | |
| +	# h0 = h0 & 0x3ffffff
 | |
| +	andl		$0x3ffffff,%ebx
 | |
| +	mov		%ebx,h0
 | |
| +
 | |
| +	add		$0x40,m
 | |
| +	dec		%rcx
 | |
| +	jnz		.Ldoblock4
 | |
| +
 | |
| +	vzeroupper
 | |
| +	pop		%r13
 | |
| +	pop		%r12
 | |
| +	pop		%rbx
 | |
| +	ret
 | |
| +ENDPROC(poly1305_asm_4block_avx2)
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/crypto/poly1305-sse2-x86_64.S	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,583 @@
 | |
| +/*
 | |
| + * Poly1305 authenticator algorithm, RFC7539, x64 SSE2 functions
 | |
| + *
 | |
| + * Copyright (C) 2015 Martin Willi
 | |
| + *
 | |
| + * This program is free software; you can redistribute it and/or modify
 | |
| + * it under the terms of the GNU General Public License as published by
 | |
| + * the Free Software Foundation; either version 2 of the License, or
 | |
| + * (at your option) any later version.
 | |
| + */
 | |
| +
 | |
| +#include <linux/linkage.h>
 | |
| +
 | |
| +.section .rodata.cst16.ANMASK, "aM", @progbits, 16
 | |
| +.align 16
 | |
| +ANMASK:	.octa 0x0000000003ffffff0000000003ffffff
 | |
| +.section .rodata.cst16.ORMASK, "aM", @progbits, 16
 | |
| +.align 16
 | |
| +ORMASK:	.octa 0x00000000010000000000000001000000
 | |
| +
 | |
| +.text
 | |
| +
 | |
| +#define h0 0x00(%rdi)
 | |
| +#define h1 0x04(%rdi)
 | |
| +#define h2 0x08(%rdi)
 | |
| +#define h3 0x0c(%rdi)
 | |
| +#define h4 0x10(%rdi)
 | |
| +#define r0 0x00(%rdx)
 | |
| +#define r1 0x04(%rdx)
 | |
| +#define r2 0x08(%rdx)
 | |
| +#define r3 0x0c(%rdx)
 | |
| +#define r4 0x10(%rdx)
 | |
| +#define s1 0x00(%rsp)
 | |
| +#define s2 0x04(%rsp)
 | |
| +#define s3 0x08(%rsp)
 | |
| +#define s4 0x0c(%rsp)
 | |
| +#define m %rsi
 | |
| +#define h01 %xmm0
 | |
| +#define h23 %xmm1
 | |
| +#define h44 %xmm2
 | |
| +#define t1 %xmm3
 | |
| +#define t2 %xmm4
 | |
| +#define t3 %xmm5
 | |
| +#define t4 %xmm6
 | |
| +#define mask %xmm7
 | |
| +#define d0 %r8
 | |
| +#define d1 %r9
 | |
| +#define d2 %r10
 | |
| +#define d3 %r11
 | |
| +#define d4 %r12
 | |
| +
 | |
| +ENTRY(poly1305_asm_block_sse2)
 | |
| +	# %rdi: Accumulator h[5]
 | |
| +	# %rsi: 16 byte input block m
 | |
| +	# %rdx: Poly1305 key r[5]
 | |
| +	# %rcx: Block count
 | |
| +
 | |
| +	# This single block variant tries to improve performance by doing two
 | |
| +	# multiplications in parallel using SSE instructions. There is quite
 | |
| +	# some quardword packing involved, hence the speedup is marginal.
 | |
| +
 | |
| +	push		%rbx
 | |
| +	push		%r12
 | |
| +	sub		$0x10,%rsp
 | |
| +
 | |
| +	# s1..s4 = r1..r4 * 5
 | |
| +	mov		r1,%eax
 | |
| +	lea		(%eax,%eax,4),%eax
 | |
| +	mov		%eax,s1
 | |
| +	mov		r2,%eax
 | |
| +	lea		(%eax,%eax,4),%eax
 | |
| +	mov		%eax,s2
 | |
| +	mov		r3,%eax
 | |
| +	lea		(%eax,%eax,4),%eax
 | |
| +	mov		%eax,s3
 | |
| +	mov		r4,%eax
 | |
| +	lea		(%eax,%eax,4),%eax
 | |
| +	mov		%eax,s4
 | |
| +
 | |
| +	movdqa		ANMASK(%rip),mask
 | |
| +
 | |
| +.Ldoblock:
 | |
| +	# h01 = [0, h1, 0, h0]
 | |
| +	# h23 = [0, h3, 0, h2]
 | |
| +	# h44 = [0, h4, 0, h4]
 | |
| +	movd		h0,h01
 | |
| +	movd		h1,t1
 | |
| +	movd		h2,h23
 | |
| +	movd		h3,t2
 | |
| +	movd		h4,h44
 | |
| +	punpcklqdq	t1,h01
 | |
| +	punpcklqdq	t2,h23
 | |
| +	punpcklqdq	h44,h44
 | |
| +
 | |
| +	# h01 += [ (m[3-6] >> 2) & 0x3ffffff, m[0-3] & 0x3ffffff ]
 | |
| +	movd		0x00(m),t1
 | |
| +	movd		0x03(m),t2
 | |
| +	psrld		$2,t2
 | |
| +	punpcklqdq	t2,t1
 | |
| +	pand		mask,t1
 | |
| +	paddd		t1,h01
 | |
| +	# h23 += [ (m[9-12] >> 6) & 0x3ffffff, (m[6-9] >> 4) & 0x3ffffff ]
 | |
| +	movd		0x06(m),t1
 | |
| +	movd		0x09(m),t2
 | |
| +	psrld		$4,t1
 | |
| +	psrld		$6,t2
 | |
| +	punpcklqdq	t2,t1
 | |
| +	pand		mask,t1
 | |
| +	paddd		t1,h23
 | |
| +	# h44 += [ (m[12-15] >> 8) | (1 << 24), (m[12-15] >> 8) | (1 << 24) ]
 | |
| +	mov		0x0c(m),%eax
 | |
| +	shr		$8,%eax
 | |
| +	or		$0x01000000,%eax
 | |
| +	movd		%eax,t1
 | |
| +	pshufd		$0xc4,t1,t1
 | |
| +	paddd		t1,h44
 | |
| +
 | |
| +	# t1[0] = h0 * r0 + h2 * s3
 | |
| +	# t1[1] = h1 * s4 + h3 * s2
 | |
| +	movd		r0,t1
 | |
| +	movd		s4,t2
 | |
| +	punpcklqdq	t2,t1
 | |
| +	pmuludq		h01,t1
 | |
| +	movd		s3,t2
 | |
| +	movd		s2,t3
 | |
| +	punpcklqdq	t3,t2
 | |
| +	pmuludq		h23,t2
 | |
| +	paddq		t2,t1
 | |
| +	# t2[0] = h0 * r1 + h2 * s4
 | |
| +	# t2[1] = h1 * r0 + h3 * s3
 | |
| +	movd		r1,t2
 | |
| +	movd		r0,t3
 | |
| +	punpcklqdq	t3,t2
 | |
| +	pmuludq		h01,t2
 | |
| +	movd		s4,t3
 | |
| +	movd		s3,t4
 | |
| +	punpcklqdq	t4,t3
 | |
| +	pmuludq		h23,t3
 | |
| +	paddq		t3,t2
 | |
| +	# t3[0] = h4 * s1
 | |
| +	# t3[1] = h4 * s2
 | |
| +	movd		s1,t3
 | |
| +	movd		s2,t4
 | |
| +	punpcklqdq	t4,t3
 | |
| +	pmuludq		h44,t3
 | |
| +	# d0 = t1[0] + t1[1] + t3[0]
 | |
| +	# d1 = t2[0] + t2[1] + t3[1]
 | |
| +	movdqa		t1,t4
 | |
| +	punpcklqdq	t2,t4
 | |
| +	punpckhqdq	t2,t1
 | |
| +	paddq		t4,t1
 | |
| +	paddq		t3,t1
 | |
| +	movq		t1,d0
 | |
| +	psrldq		$8,t1
 | |
| +	movq		t1,d1
 | |
| +
 | |
| +	# t1[0] = h0 * r2 + h2 * r0
 | |
| +	# t1[1] = h1 * r1 + h3 * s4
 | |
| +	movd		r2,t1
 | |
| +	movd		r1,t2
 | |
| +	punpcklqdq 	t2,t1
 | |
| +	pmuludq		h01,t1
 | |
| +	movd		r0,t2
 | |
| +	movd		s4,t3
 | |
| +	punpcklqdq	t3,t2
 | |
| +	pmuludq		h23,t2
 | |
| +	paddq		t2,t1
 | |
| +	# t2[0] = h0 * r3 + h2 * r1
 | |
| +	# t2[1] = h1 * r2 + h3 * r0
 | |
| +	movd		r3,t2
 | |
| +	movd		r2,t3
 | |
| +	punpcklqdq	t3,t2
 | |
| +	pmuludq		h01,t2
 | |
| +	movd		r1,t3
 | |
| +	movd		r0,t4
 | |
| +	punpcklqdq	t4,t3
 | |
| +	pmuludq		h23,t3
 | |
| +	paddq		t3,t2
 | |
| +	# t3[0] = h4 * s3
 | |
| +	# t3[1] = h4 * s4
 | |
| +	movd		s3,t3
 | |
| +	movd		s4,t4
 | |
| +	punpcklqdq	t4,t3
 | |
| +	pmuludq		h44,t3
 | |
| +	# d2 = t1[0] + t1[1] + t3[0]
 | |
| +	# d3 = t2[0] + t2[1] + t3[1]
 | |
| +	movdqa		t1,t4
 | |
| +	punpcklqdq	t2,t4
 | |
| +	punpckhqdq	t2,t1
 | |
| +	paddq		t4,t1
 | |
| +	paddq		t3,t1
 | |
| +	movq		t1,d2
 | |
| +	psrldq		$8,t1
 | |
| +	movq		t1,d3
 | |
| +
 | |
| +	# t1[0] = h0 * r4 + h2 * r2
 | |
| +	# t1[1] = h1 * r3 + h3 * r1
 | |
| +	movd		r4,t1
 | |
| +	movd		r3,t2
 | |
| +	punpcklqdq	t2,t1
 | |
| +	pmuludq		h01,t1
 | |
| +	movd		r2,t2
 | |
| +	movd		r1,t3
 | |
| +	punpcklqdq	t3,t2
 | |
| +	pmuludq		h23,t2
 | |
| +	paddq		t2,t1
 | |
| +	# t3[0] = h4 * r0
 | |
| +	movd		r0,t3
 | |
| +	pmuludq		h44,t3
 | |
| +	# d4 = t1[0] + t1[1] + t3[0]
 | |
| +	movdqa		t1,t4
 | |
| +	psrldq		$8,t4
 | |
| +	paddq		t4,t1
 | |
| +	paddq		t3,t1
 | |
| +	movq		t1,d4
 | |
| +
 | |
| +	# d1 += d0 >> 26
 | |
| +	mov		d0,%rax
 | |
| +	shr		$26,%rax
 | |
| +	add		%rax,d1
 | |
| +	# h0 = d0 & 0x3ffffff
 | |
| +	mov		d0,%rbx
 | |
| +	and		$0x3ffffff,%ebx
 | |
| +
 | |
| +	# d2 += d1 >> 26
 | |
| +	mov		d1,%rax
 | |
| +	shr		$26,%rax
 | |
| +	add		%rax,d2
 | |
| +	# h1 = d1 & 0x3ffffff
 | |
| +	mov		d1,%rax
 | |
| +	and		$0x3ffffff,%eax
 | |
| +	mov		%eax,h1
 | |
| +
 | |
| +	# d3 += d2 >> 26
 | |
| +	mov		d2,%rax
 | |
| +	shr		$26,%rax
 | |
| +	add		%rax,d3
 | |
| +	# h2 = d2 & 0x3ffffff
 | |
| +	mov		d2,%rax
 | |
| +	and		$0x3ffffff,%eax
 | |
| +	mov		%eax,h2
 | |
| +
 | |
| +	# d4 += d3 >> 26
 | |
| +	mov		d3,%rax
 | |
| +	shr		$26,%rax
 | |
| +	add		%rax,d4
 | |
| +	# h3 = d3 & 0x3ffffff
 | |
| +	mov		d3,%rax
 | |
| +	and		$0x3ffffff,%eax
 | |
| +	mov		%eax,h3
 | |
| +
 | |
| +	# h0 += (d4 >> 26) * 5
 | |
| +	mov		d4,%rax
 | |
| +	shr		$26,%rax
 | |
| +	lea		(%eax,%eax,4),%eax
 | |
| +	add		%eax,%ebx
 | |
| +	# h4 = d4 & 0x3ffffff
 | |
| +	mov		d4,%rax
 | |
| +	and		$0x3ffffff,%eax
 | |
| +	mov		%eax,h4
 | |
| +
 | |
| +	# h1 += h0 >> 26
 | |
| +	mov		%ebx,%eax
 | |
| +	shr		$26,%eax
 | |
| +	add		%eax,h1
 | |
| +	# h0 = h0 & 0x3ffffff
 | |
| +	andl		$0x3ffffff,%ebx
 | |
| +	mov		%ebx,h0
 | |
| +
 | |
| +	add		$0x10,m
 | |
| +	dec		%rcx
 | |
| +	jnz		.Ldoblock
 | |
| +
 | |
| +	add		$0x10,%rsp
 | |
| +	pop		%r12
 | |
| +	pop		%rbx
 | |
| +	ret
 | |
| +ENDPROC(poly1305_asm_block_sse2)
 | |
| +
 | |
| +
 | |
| +#define u0 0x00(%r8)
 | |
| +#define u1 0x04(%r8)
 | |
| +#define u2 0x08(%r8)
 | |
| +#define u3 0x0c(%r8)
 | |
| +#define u4 0x10(%r8)
 | |
| +#define hc0 %xmm0
 | |
| +#define hc1 %xmm1
 | |
| +#define hc2 %xmm2
 | |
| +#define hc3 %xmm5
 | |
| +#define hc4 %xmm6
 | |
| +#define ru0 %xmm7
 | |
| +#define ru1 %xmm8
 | |
| +#define ru2 %xmm9
 | |
| +#define ru3 %xmm10
 | |
| +#define ru4 %xmm11
 | |
| +#define sv1 %xmm12
 | |
| +#define sv2 %xmm13
 | |
| +#define sv3 %xmm14
 | |
| +#define sv4 %xmm15
 | |
| +#undef d0
 | |
| +#define d0 %r13
 | |
| +
 | |
| +ENTRY(poly1305_asm_2block_sse2)
 | |
| +	# %rdi: Accumulator h[5]
 | |
| +	# %rsi: 16 byte input block m
 | |
| +	# %rdx: Poly1305 key r[5]
 | |
| +	# %rcx: Doubleblock count
 | |
| +	# %r8:  Poly1305 derived key r^2 u[5]
 | |
| +
 | |
| +	# This two-block variant further improves performance by using loop
 | |
| +	# unrolled block processing. This is more straight forward and does
 | |
| +	# less byte shuffling, but requires a second Poly1305 key r^2:
 | |
| +	# h = (h + m) * r    =>    h = (h + m1) * r^2 + m2 * r
 | |
| +
 | |
| +	push		%rbx
 | |
| +	push		%r12
 | |
| +	push		%r13
 | |
| +
 | |
| +	# combine r0,u0
 | |
| +	movd		u0,ru0
 | |
| +	movd		r0,t1
 | |
| +	punpcklqdq	t1,ru0
 | |
| +
 | |
| +	# combine r1,u1 and s1=r1*5,v1=u1*5
 | |
| +	movd		u1,ru1
 | |
| +	movd		r1,t1
 | |
| +	punpcklqdq	t1,ru1
 | |
| +	movdqa		ru1,sv1
 | |
| +	pslld		$2,sv1
 | |
| +	paddd		ru1,sv1
 | |
| +
 | |
| +	# combine r2,u2 and s2=r2*5,v2=u2*5
 | |
| +	movd		u2,ru2
 | |
| +	movd		r2,t1
 | |
| +	punpcklqdq	t1,ru2
 | |
| +	movdqa		ru2,sv2
 | |
| +	pslld		$2,sv2
 | |
| +	paddd		ru2,sv2
 | |
| +
 | |
| +	# combine r3,u3 and s3=r3*5,v3=u3*5
 | |
| +	movd		u3,ru3
 | |
| +	movd		r3,t1
 | |
| +	punpcklqdq	t1,ru3
 | |
| +	movdqa		ru3,sv3
 | |
| +	pslld		$2,sv3
 | |
| +	paddd		ru3,sv3
 | |
| +
 | |
| +	# combine r4,u4 and s4=r4*5,v4=u4*5
 | |
| +	movd		u4,ru4
 | |
| +	movd		r4,t1
 | |
| +	punpcklqdq	t1,ru4
 | |
| +	movdqa		ru4,sv4
 | |
| +	pslld		$2,sv4
 | |
| +	paddd		ru4,sv4
 | |
| +
 | |
| +.Ldoblock2:
 | |
| +	# hc0 = [ m[16-19] & 0x3ffffff, h0 + m[0-3] & 0x3ffffff ]
 | |
| +	movd		0x00(m),hc0
 | |
| +	movd		0x10(m),t1
 | |
| +	punpcklqdq	t1,hc0
 | |
| +	pand		ANMASK(%rip),hc0
 | |
| +	movd		h0,t1
 | |
| +	paddd		t1,hc0
 | |
| +	# hc1 = [ (m[19-22] >> 2) & 0x3ffffff, h1 + (m[3-6] >> 2) & 0x3ffffff ]
 | |
| +	movd		0x03(m),hc1
 | |
| +	movd		0x13(m),t1
 | |
| +	punpcklqdq	t1,hc1
 | |
| +	psrld		$2,hc1
 | |
| +	pand		ANMASK(%rip),hc1
 | |
| +	movd		h1,t1
 | |
| +	paddd		t1,hc1
 | |
| +	# hc2 = [ (m[22-25] >> 4) & 0x3ffffff, h2 + (m[6-9] >> 4) & 0x3ffffff ]
 | |
| +	movd		0x06(m),hc2
 | |
| +	movd		0x16(m),t1
 | |
| +	punpcklqdq	t1,hc2
 | |
| +	psrld		$4,hc2
 | |
| +	pand		ANMASK(%rip),hc2
 | |
| +	movd		h2,t1
 | |
| +	paddd		t1,hc2
 | |
| +	# hc3 = [ (m[25-28] >> 6) & 0x3ffffff, h3 + (m[9-12] >> 6) & 0x3ffffff ]
 | |
| +	movd		0x09(m),hc3
 | |
| +	movd		0x19(m),t1
 | |
| +	punpcklqdq	t1,hc3
 | |
| +	psrld		$6,hc3
 | |
| +	pand		ANMASK(%rip),hc3
 | |
| +	movd		h3,t1
 | |
| +	paddd		t1,hc3
 | |
| +	# hc4 = [ (m[28-31] >> 8) | (1<<24), h4 + (m[12-15] >> 8) | (1<<24) ]
 | |
| +	movd		0x0c(m),hc4
 | |
| +	movd		0x1c(m),t1
 | |
| +	punpcklqdq	t1,hc4
 | |
| +	psrld		$8,hc4
 | |
| +	por		ORMASK(%rip),hc4
 | |
| +	movd		h4,t1
 | |
| +	paddd		t1,hc4
 | |
| +
 | |
| +	# t1 = [ hc0[1] * r0, hc0[0] * u0 ]
 | |
| +	movdqa		ru0,t1
 | |
| +	pmuludq		hc0,t1
 | |
| +	# t1 += [ hc1[1] * s4, hc1[0] * v4 ]
 | |
| +	movdqa		sv4,t2
 | |
| +	pmuludq		hc1,t2
 | |
| +	paddq		t2,t1
 | |
| +	# t1 += [ hc2[1] * s3, hc2[0] * v3 ]
 | |
| +	movdqa		sv3,t2
 | |
| +	pmuludq		hc2,t2
 | |
| +	paddq		t2,t1
 | |
| +	# t1 += [ hc3[1] * s2, hc3[0] * v2 ]
 | |
| +	movdqa		sv2,t2
 | |
| +	pmuludq		hc3,t2
 | |
| +	paddq		t2,t1
 | |
| +	# t1 += [ hc4[1] * s1, hc4[0] * v1 ]
 | |
| +	movdqa		sv1,t2
 | |
| +	pmuludq		hc4,t2
 | |
| +	paddq		t2,t1
 | |
| +	# d0 = t1[0] + t1[1]
 | |
| +	movdqa		t1,t2
 | |
| +	psrldq		$8,t2
 | |
| +	paddq		t2,t1
 | |
| +	movq		t1,d0
 | |
| +
 | |
| +	# t1 = [ hc0[1] * r1, hc0[0] * u1 ]
 | |
| +	movdqa		ru1,t1
 | |
| +	pmuludq		hc0,t1
 | |
| +	# t1 += [ hc1[1] * r0, hc1[0] * u0 ]
 | |
| +	movdqa		ru0,t2
 | |
| +	pmuludq		hc1,t2
 | |
| +	paddq		t2,t1
 | |
| +	# t1 += [ hc2[1] * s4, hc2[0] * v4 ]
 | |
| +	movdqa		sv4,t2
 | |
| +	pmuludq		hc2,t2
 | |
| +	paddq		t2,t1
 | |
| +	# t1 += [ hc3[1] * s3, hc3[0] * v3 ]
 | |
| +	movdqa		sv3,t2
 | |
| +	pmuludq		hc3,t2
 | |
| +	paddq		t2,t1
 | |
| +	# t1 += [ hc4[1] * s2, hc4[0] * v2 ]
 | |
| +	movdqa		sv2,t2
 | |
| +	pmuludq		hc4,t2
 | |
| +	paddq		t2,t1
 | |
| +	# d1 = t1[0] + t1[1]
 | |
| +	movdqa		t1,t2
 | |
| +	psrldq		$8,t2
 | |
| +	paddq		t2,t1
 | |
| +	movq		t1,d1
 | |
| +
 | |
| +	# t1 = [ hc0[1] * r2, hc0[0] * u2 ]
 | |
| +	movdqa		ru2,t1
 | |
| +	pmuludq		hc0,t1
 | |
| +	# t1 += [ hc1[1] * r1, hc1[0] * u1 ]
 | |
| +	movdqa		ru1,t2
 | |
| +	pmuludq		hc1,t2
 | |
| +	paddq		t2,t1
 | |
| +	# t1 += [ hc2[1] * r0, hc2[0] * u0 ]
 | |
| +	movdqa		ru0,t2
 | |
| +	pmuludq		hc2,t2
 | |
| +	paddq		t2,t1
 | |
| +	# t1 += [ hc3[1] * s4, hc3[0] * v4 ]
 | |
| +	movdqa		sv4,t2
 | |
| +	pmuludq		hc3,t2
 | |
| +	paddq		t2,t1
 | |
| +	# t1 += [ hc4[1] * s3, hc4[0] * v3 ]
 | |
| +	movdqa		sv3,t2
 | |
| +	pmuludq		hc4,t2
 | |
| +	paddq		t2,t1
 | |
| +	# d2 = t1[0] + t1[1]
 | |
| +	movdqa		t1,t2
 | |
| +	psrldq		$8,t2
 | |
| +	paddq		t2,t1
 | |
| +	movq		t1,d2
 | |
| +
 | |
| +	# t1 = [ hc0[1] * r3, hc0[0] * u3 ]
 | |
| +	movdqa		ru3,t1
 | |
| +	pmuludq		hc0,t1
 | |
| +	# t1 += [ hc1[1] * r2, hc1[0] * u2 ]
 | |
| +	movdqa		ru2,t2
 | |
| +	pmuludq		hc1,t2
 | |
| +	paddq		t2,t1
 | |
| +	# t1 += [ hc2[1] * r1, hc2[0] * u1 ]
 | |
| +	movdqa		ru1,t2
 | |
| +	pmuludq		hc2,t2
 | |
| +	paddq		t2,t1
 | |
| +	# t1 += [ hc3[1] * r0, hc3[0] * u0 ]
 | |
| +	movdqa		ru0,t2
 | |
| +	pmuludq		hc3,t2
 | |
| +	paddq		t2,t1
 | |
| +	# t1 += [ hc4[1] * s4, hc4[0] * v4 ]
 | |
| +	movdqa		sv4,t2
 | |
| +	pmuludq		hc4,t2
 | |
| +	paddq		t2,t1
 | |
| +	# d3 = t1[0] + t1[1]
 | |
| +	movdqa		t1,t2
 | |
| +	psrldq		$8,t2
 | |
| +	paddq		t2,t1
 | |
| +	movq		t1,d3
 | |
| +
 | |
| +	# t1 = [ hc0[1] * r4, hc0[0] * u4 ]
 | |
| +	movdqa		ru4,t1
 | |
| +	pmuludq		hc0,t1
 | |
| +	# t1 += [ hc1[1] * r3, hc1[0] * u3 ]
 | |
| +	movdqa		ru3,t2
 | |
| +	pmuludq		hc1,t2
 | |
| +	paddq		t2,t1
 | |
| +	# t1 += [ hc2[1] * r2, hc2[0] * u2 ]
 | |
| +	movdqa		ru2,t2
 | |
| +	pmuludq		hc2,t2
 | |
| +	paddq		t2,t1
 | |
| +	# t1 += [ hc3[1] * r1, hc3[0] * u1 ]
 | |
| +	movdqa		ru1,t2
 | |
| +	pmuludq		hc3,t2
 | |
| +	paddq		t2,t1
 | |
| +	# t1 += [ hc4[1] * r0, hc4[0] * u0 ]
 | |
| +	movdqa		ru0,t2
 | |
| +	pmuludq		hc4,t2
 | |
| +	paddq		t2,t1
 | |
| +	# d4 = t1[0] + t1[1]
 | |
| +	movdqa		t1,t2
 | |
| +	psrldq		$8,t2
 | |
| +	paddq		t2,t1
 | |
| +	movq		t1,d4
 | |
| +
 | |
| +	# d1 += d0 >> 26
 | |
| +	mov		d0,%rax
 | |
| +	shr		$26,%rax
 | |
| +	add		%rax,d1
 | |
| +	# h0 = d0 & 0x3ffffff
 | |
| +	mov		d0,%rbx
 | |
| +	and		$0x3ffffff,%ebx
 | |
| +
 | |
| +	# d2 += d1 >> 26
 | |
| +	mov		d1,%rax
 | |
| +	shr		$26,%rax
 | |
| +	add		%rax,d2
 | |
| +	# h1 = d1 & 0x3ffffff
 | |
| +	mov		d1,%rax
 | |
| +	and		$0x3ffffff,%eax
 | |
| +	mov		%eax,h1
 | |
| +
 | |
| +	# d3 += d2 >> 26
 | |
| +	mov		d2,%rax
 | |
| +	shr		$26,%rax
 | |
| +	add		%rax,d3
 | |
| +	# h2 = d2 & 0x3ffffff
 | |
| +	mov		d2,%rax
 | |
| +	and		$0x3ffffff,%eax
 | |
| +	mov		%eax,h2
 | |
| +
 | |
| +	# d4 += d3 >> 26
 | |
| +	mov		d3,%rax
 | |
| +	shr		$26,%rax
 | |
| +	add		%rax,d4
 | |
| +	# h3 = d3 & 0x3ffffff
 | |
| +	mov		d3,%rax
 | |
| +	and		$0x3ffffff,%eax
 | |
| +	mov		%eax,h3
 | |
| +
 | |
| +	# h0 += (d4 >> 26) * 5
 | |
| +	mov		d4,%rax
 | |
| +	shr		$26,%rax
 | |
| +	lea		(%eax,%eax,4),%eax
 | |
| +	add		%eax,%ebx
 | |
| +	# h4 = d4 & 0x3ffffff
 | |
| +	mov		d4,%rax
 | |
| +	and		$0x3ffffff,%eax
 | |
| +	mov		%eax,h4
 | |
| +
 | |
| +	# h1 += h0 >> 26
 | |
| +	mov		%ebx,%eax
 | |
| +	shr		$26,%eax
 | |
| +	add		%eax,h1
 | |
| +	# h0 = h0 & 0x3ffffff
 | |
| +	andl		$0x3ffffff,%ebx
 | |
| +	mov		%ebx,h0
 | |
| +
 | |
| +	add		$0x20,m
 | |
| +	dec		%rcx
 | |
| +	jnz		.Ldoblock2
 | |
| +
 | |
| +	pop		%r13
 | |
| +	pop		%r12
 | |
| +	pop		%rbx
 | |
| +	ret
 | |
| +ENDPROC(poly1305_asm_2block_sse2)
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/Makefile	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,36 @@
 | |
| +ccflags-y := -O3 -fvisibility=hidden
 | |
| +ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DDEBUG -g
 | |
| +ccflags-y += -Wframe-larger-than=8192
 | |
| +ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt'
 | |
| +wireguard-y := main.o noise.o device.o peer.o timers.o data.o send.o receive.o socket.o config.o hashtables.o routingtable.o ratelimiter.o cookie.o
 | |
| +wireguard-y += crypto/curve25519.o crypto/chacha20poly1305.o crypto/blake2s.o
 | |
| +
 | |
| +ifeq ($(CONFIG_X86_64),y)
 | |
| +	wireguard-y += crypto/chacha20-ssse3-x86_64.o crypto/poly1305-sse2-x86_64.o
 | |
| +	avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
 | |
| +	ifeq ($(avx_supported),yes)
 | |
| +		wireguard-y += crypto/blake2s-avx-x86_64.o crypto/curve25519-avx-x86_64.o
 | |
| +	endif
 | |
| +	avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1$(comma)4)$(comma)%ymm2,yes,no)
 | |
| +	ifeq ($(avx2_supported),yes)
 | |
| +		wireguard-y += crypto/chacha20-avx2-x86_64.o crypto/poly1305-avx2-x86_64.o
 | |
| +	endif
 | |
| +endif
 | |
| +
 | |
| +ifeq ($(CONFIG_ARM64),y)
 | |
| +	wireguard-$(CONFIG_KERNEL_MODE_NEON) += crypto/chacha20-neon-arm64.o
 | |
| +endif
 | |
| +ifeq ($(CONFIG_ARM),y)
 | |
| +	wireguard-$(CONFIG_KERNEL_MODE_NEON) += crypto/chacha20-neon-arm.o crypto/curve25519-neon-arm.o
 | |
| +endif
 | |
| +
 | |
| +ifneq ($(KBUILD_EXTMOD),)
 | |
| +CONFIG_WIREGUARD := m
 | |
| +ifneq ($(CONFIG_SMP),)
 | |
| +ccflags-y += -DCONFIG_WIREGUARD_PARALLEL=y
 | |
| +endif
 | |
| +endif
 | |
| +
 | |
| +include $(src)/compat/Makefile.include
 | |
| +
 | |
| +obj-$(CONFIG_WIREGUARD) := wireguard.o
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/Kconfig	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,40 @@
 | |
| +config WIREGUARD
 | |
| +	tristate "IP: WireGuard secure network tunnel"
 | |
| +	depends on NET && INET
 | |
| +	select NET_UDP_TUNNEL
 | |
| +	select CRYPTO_BLKCIPHER
 | |
| +	select NEON
 | |
| +	select KERNEL_MODE_NEON
 | |
| +	default m
 | |
| +	---help---
 | |
| +	  WireGuard is a secure, fast, and easy to use replacement for IPSec
 | |
| +	  that uses modern cryptography and clever networking tricks. It's
 | |
| +	  designed to be fairly general purpose and abstract enough to fit most
 | |
| +	  use cases, while at the same time remaining extremely simple to
 | |
| +	  configure. See www.wireguard.io for more info.
 | |
| +
 | |
| +	  It's safe to say Y or M here, as the driver is very lightweight and
 | |
| +	  is only in use when an administrator chooses to add an interface.
 | |
| +
 | |
| +config WIREGUARD_PARALLEL
 | |
| +	bool "Enable parallel engine"
 | |
| +	depends on SMP && WIREGUARD
 | |
| +	select PADATA
 | |
| +	default y
 | |
| +	---help---
 | |
| +	  This will allow WireGuard to utilize all CPU cores when encrypting
 | |
| +	  and decrypting packets.
 | |
| +	  
 | |
| +	  It's safe to say Y here, and you probably should, as the performance
 | |
| +	  improvements are substantial.
 | |
| +
 | |
| +config WIREGUARD_DEBUG
 | |
| +	bool "Debugging checks and verbose messages"
 | |
| +	depends on WIREGUARD
 | |
| +	---help---
 | |
| +	  This will write log messages for handshake and other events
 | |
| +	  that occur for a WireGuard interface. It will also perform some
 | |
| +	  extra validation checks and unit tests at various points. This is
 | |
| +	  only useful for debugging.
 | |
| +	  
 | |
| +	  Say N here unless you know what you're doing.
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/compat/fpu/include/asm/fpu/api.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1 @@
 | |
| +#include <asm/i387.h>
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/compat/siphash/include/linux/siphash.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,140 @@
 | |
| +/* Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
 | |
| + *
 | |
| + * This file is provided under a dual BSD/GPLv2 license.
 | |
| + *
 | |
| + * SipHash: a fast short-input PRF
 | |
| + * https://131002.net/siphash/
 | |
| + *
 | |
| + * This implementation is specifically for SipHash2-4 for a secure PRF
 | |
| + * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for
 | |
| + * hashtables.
 | |
| + */
 | |
| +
 | |
| +#ifndef _LINUX_SIPHASH_H
 | |
| +#define _LINUX_SIPHASH_H
 | |
| +
 | |
| +#include <linux/types.h>
 | |
| +#include <linux/kernel.h>
 | |
| +
 | |
| +#define SIPHASH_ALIGNMENT __alignof__(u64)
 | |
| +typedef struct {
 | |
| +	u64 key[2];
 | |
| +} siphash_key_t;
 | |
| +
 | |
| +u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key);
 | |
| +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 | |
| +u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key);
 | |
| +#endif
 | |
| +
 | |
| +u64 siphash_1u64(const u64 a, const siphash_key_t *key);
 | |
| +u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key);
 | |
| +u64 siphash_3u64(const u64 a, const u64 b, const u64 c,
 | |
| +		 const siphash_key_t *key);
 | |
| +u64 siphash_4u64(const u64 a, const u64 b, const u64 c, const u64 d,
 | |
| +		 const siphash_key_t *key);
 | |
| +u64 siphash_1u32(const u32 a, const siphash_key_t *key);
 | |
| +u64 siphash_3u32(const u32 a, const u32 b, const u32 c,
 | |
| +		 const siphash_key_t *key);
 | |
| +
 | |
| +static inline u64 siphash_2u32(const u32 a, const u32 b,
 | |
| +			       const siphash_key_t *key)
 | |
| +{
 | |
| +	return siphash_1u64((u64)b << 32 | a, key);
 | |
| +}
 | |
| +static inline u64 siphash_4u32(const u32 a, const u32 b, const u32 c,
 | |
| +			       const u32 d, const siphash_key_t *key)
 | |
| +{
 | |
| +	return siphash_2u64((u64)b << 32 | a, (u64)d << 32 | c, key);
 | |
| +}
 | |
| +
 | |
| +
 | |
| +static inline u64 ___siphash_aligned(const __le64 *data, size_t len,
 | |
| +				     const siphash_key_t *key)
 | |
| +{
 | |
| +	if (__builtin_constant_p(len) && len == 4)
 | |
| +		return siphash_1u32(le32_to_cpup((const __le32 *)data), key);
 | |
| +	if (__builtin_constant_p(len) && len == 8)
 | |
| +		return siphash_1u64(le64_to_cpu(data[0]), key);
 | |
| +	if (__builtin_constant_p(len) && len == 16)
 | |
| +		return siphash_2u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]),
 | |
| +				    key);
 | |
| +	if (__builtin_constant_p(len) && len == 24)
 | |
| +		return siphash_3u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]),
 | |
| +				    le64_to_cpu(data[2]), key);
 | |
| +	if (__builtin_constant_p(len) && len == 32)
 | |
| +		return siphash_4u64(le64_to_cpu(data[0]), le64_to_cpu(data[1]),
 | |
| +				    le64_to_cpu(data[2]), le64_to_cpu(data[3]),
 | |
| +				    key);
 | |
| +	return __siphash_aligned(data, len, key);
 | |
| +}
 | |
| +
 | |
| +/**
 | |
| + * siphash - compute 64-bit siphash PRF value
 | |
| + * @data: buffer to hash
 | |
| + * @size: size of @data
 | |
| + * @key: the siphash key
 | |
| + */
 | |
| +static inline u64 siphash(const void *data, size_t len,
 | |
| +			  const siphash_key_t *key)
 | |
| +{
 | |
| +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 | |
| +	if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT))
 | |
| +		return __siphash_unaligned(data, len, key);
 | |
| +#endif
 | |
| +	return ___siphash_aligned(data, len, key);
 | |
| +}
 | |
| +
 | |
| +#define HSIPHASH_ALIGNMENT __alignof__(unsigned long)
 | |
| +typedef struct {
 | |
| +	unsigned long key[2];
 | |
| +} hsiphash_key_t;
 | |
| +
 | |
| +u32 __hsiphash_aligned(const void *data, size_t len,
 | |
| +		       const hsiphash_key_t *key);
 | |
| +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 | |
| +u32 __hsiphash_unaligned(const void *data, size_t len,
 | |
| +			 const hsiphash_key_t *key);
 | |
| +#endif
 | |
| +
 | |
| +u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key);
 | |
| +u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key);
 | |
| +u32 hsiphash_3u32(const u32 a, const u32 b, const u32 c,
 | |
| +		  const hsiphash_key_t *key);
 | |
| +u32 hsiphash_4u32(const u32 a, const u32 b, const u32 c, const u32 d,
 | |
| +		  const hsiphash_key_t *key);
 | |
| +
 | |
| +static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len,
 | |
| +				      const hsiphash_key_t *key)
 | |
| +{
 | |
| +	if (__builtin_constant_p(len) && len == 4)
 | |
| +		return hsiphash_1u32(le32_to_cpu(data[0]), key);
 | |
| +	if (__builtin_constant_p(len) && len == 8)
 | |
| +		return hsiphash_2u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
 | |
| +				     key);
 | |
| +	if (__builtin_constant_p(len) && len == 12)
 | |
| +		return hsiphash_3u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
 | |
| +				     le32_to_cpu(data[2]), key);
 | |
| +	if (__builtin_constant_p(len) && len == 16)
 | |
| +		return hsiphash_4u32(le32_to_cpu(data[0]), le32_to_cpu(data[1]),
 | |
| +				     le32_to_cpu(data[2]), le32_to_cpu(data[3]),
 | |
| +				     key);
 | |
| +	return __hsiphash_aligned(data, len, key);
 | |
| +}
 | |
| +
 | |
| +/**
 | |
| + * hsiphash - compute 32-bit hsiphash PRF value
 | |
| + * @data: buffer to hash
 | |
| + * @size: size of @data
 | |
| + * @key: the hsiphash key
 | |
| + */
 | |
| +static inline u32 hsiphash(const void *data, size_t len,
 | |
| +			   const hsiphash_key_t *key)
 | |
| +{
 | |
| +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 | |
| +	if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT))
 | |
| +		return __hsiphash_unaligned(data, len, key);
 | |
| +#endif
 | |
| +	return ___hsiphash_aligned(data, len, key);
 | |
| +}
 | |
| +
 | |
| +#endif /* _LINUX_SIPHASH_H */
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/compat/siphash/siphash.c	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,539 @@
 | |
| +/* Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
 | |
| + *
 | |
| + * This file is provided under a dual BSD/GPLv2 license.
 | |
| + *
 | |
| + * SipHash: a fast short-input PRF
 | |
| + * https://131002.net/siphash/
 | |
| + *
 | |
| + * This implementation is specifically for SipHash2-4 for a secure PRF
 | |
| + * and HalfSipHash1-3/SipHash1-3 for an insecure PRF only suitable for
 | |
| + * hashtables.
 | |
| + */
 | |
| +
 | |
| +#include <linux/siphash.h>
 | |
| +#include <asm/unaligned.h>
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
 | |
| +#ifdef __LITTLE_ENDIAN
 | |
| +#define bytemask_from_count(cnt)	(~(~0ul << (cnt)*8))
 | |
| +#else
 | |
| +#define bytemask_from_count(cnt)	(~(~0ul >> (cnt)*8))
 | |
| +#endif
 | |
| +#endif
 | |
| +
 | |
| +#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
 | |
| +#include <linux/dcache.h>
 | |
| +#include <asm/word-at-a-time.h>
 | |
| +#endif
 | |
| +
 | |
| +#define SIPROUND \
 | |
| +	do { \
 | |
| +	v0 += v1; v1 = rol64(v1, 13); v1 ^= v0; v0 = rol64(v0, 32); \
 | |
| +	v2 += v3; v3 = rol64(v3, 16); v3 ^= v2; \
 | |
| +	v0 += v3; v3 = rol64(v3, 21); v3 ^= v0; \
 | |
| +	v2 += v1; v1 = rol64(v1, 17); v1 ^= v2; v2 = rol64(v2, 32); \
 | |
| +	} while (0)
 | |
| +
 | |
| +#define PREAMBLE(len) \
 | |
| +	u64 v0 = 0x736f6d6570736575ULL; \
 | |
| +	u64 v1 = 0x646f72616e646f6dULL; \
 | |
| +	u64 v2 = 0x6c7967656e657261ULL; \
 | |
| +	u64 v3 = 0x7465646279746573ULL; \
 | |
| +	u64 b = ((u64)(len)) << 56; \
 | |
| +	v3 ^= key->key[1]; \
 | |
| +	v2 ^= key->key[0]; \
 | |
| +	v1 ^= key->key[1]; \
 | |
| +	v0 ^= key->key[0];
 | |
| +
 | |
| +#define POSTAMBLE \
 | |
| +	v3 ^= b; \
 | |
| +	SIPROUND; \
 | |
| +	SIPROUND; \
 | |
| +	v0 ^= b; \
 | |
| +	v2 ^= 0xff; \
 | |
| +	SIPROUND; \
 | |
| +	SIPROUND; \
 | |
| +	SIPROUND; \
 | |
| +	SIPROUND; \
 | |
| +	return (v0 ^ v1) ^ (v2 ^ v3);
 | |
| +
 | |
| +u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key)
 | |
| +{
 | |
| +	const u8 *end = data + len - (len % sizeof(u64));
 | |
| +	const u8 left = len & (sizeof(u64) - 1);
 | |
| +	u64 m;
 | |
| +	PREAMBLE(len)
 | |
| +	for (; data != end; data += sizeof(u64)) {
 | |
| +		m = le64_to_cpup(data);
 | |
| +		v3 ^= m;
 | |
| +		SIPROUND;
 | |
| +		SIPROUND;
 | |
| +		v0 ^= m;
 | |
| +	}
 | |
| +#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
 | |
| +	if (left)
 | |
| +		b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
 | |
| +						  bytemask_from_count(left)));
 | |
| +#else
 | |
| +	switch (left) {
 | |
| +	case 7: b |= ((u64)end[6]) << 48;
 | |
| +	case 6: b |= ((u64)end[5]) << 40;
 | |
| +	case 5: b |= ((u64)end[4]) << 32;
 | |
| +	case 4: b |= le32_to_cpup(data); break;
 | |
| +	case 3: b |= ((u64)end[2]) << 16;
 | |
| +	case 2: b |= le16_to_cpup(data); break;
 | |
| +	case 1: b |= end[0];
 | |
| +	}
 | |
| +#endif
 | |
| +	POSTAMBLE
 | |
| +}
 | |
| +
 | |
| +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 | |
| +u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key)
 | |
| +{
 | |
| +	const u8 *end = data + len - (len % sizeof(u64));
 | |
| +	const u8 left = len & (sizeof(u64) - 1);
 | |
| +	u64 m;
 | |
| +	PREAMBLE(len)
 | |
| +	for (; data != end; data += sizeof(u64)) {
 | |
| +		m = get_unaligned_le64(data);
 | |
| +		v3 ^= m;
 | |
| +		SIPROUND;
 | |
| +		SIPROUND;
 | |
| +		v0 ^= m;
 | |
| +	}
 | |
| +#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
 | |
| +	if (left)
 | |
| +		b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
 | |
| +						  bytemask_from_count(left)));
 | |
| +#else
 | |
| +	switch (left) {
 | |
| +	case 7: b |= ((u64)end[6]) << 48;
 | |
| +	case 6: b |= ((u64)end[5]) << 40;
 | |
| +	case 5: b |= ((u64)end[4]) << 32;
 | |
| +	case 4: b |= get_unaligned_le32(end); break;
 | |
| +	case 3: b |= ((u64)end[2]) << 16;
 | |
| +	case 2: b |= get_unaligned_le16(end); break;
 | |
| +	case 1: b |= end[0];
 | |
| +	}
 | |
| +#endif
 | |
| +	POSTAMBLE
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
| +/**
 | |
| + * siphash_1u64 - compute 64-bit siphash PRF value of a u64
 | |
| + * @first: first u64
 | |
| + * @key: the siphash key
 | |
| + */
 | |
| +u64 siphash_1u64(const u64 first, const siphash_key_t *key)
 | |
| +{
 | |
| +	PREAMBLE(8)
 | |
| +	v3 ^= first;
 | |
| +	SIPROUND;
 | |
| +	SIPROUND;
 | |
| +	v0 ^= first;
 | |
| +	POSTAMBLE
 | |
| +}
 | |
| +
 | |
| +/**
 | |
| + * siphash_2u64 - compute 64-bit siphash PRF value of 2 u64
 | |
| + * @first: first u64
 | |
| + * @second: second u64
 | |
| + * @key: the siphash key
 | |
| + */
 | |
| +u64 siphash_2u64(const u64 first, const u64 second, const siphash_key_t *key)
 | |
| +{
 | |
| +	PREAMBLE(16)
 | |
| +	v3 ^= first;
 | |
| +	SIPROUND;
 | |
| +	SIPROUND;
 | |
| +	v0 ^= first;
 | |
| +	v3 ^= second;
 | |
| +	SIPROUND;
 | |
| +	SIPROUND;
 | |
| +	v0 ^= second;
 | |
| +	POSTAMBLE
 | |
| +}
 | |
| +
 | |
| +/**
 | |
| + * siphash_3u64 - compute 64-bit siphash PRF value of 3 u64
 | |
| + * @first: first u64
 | |
| + * @second: second u64
 | |
| + * @third: third u64
 | |
| + * @key: the siphash key
 | |
| + */
 | |
| +u64 siphash_3u64(const u64 first, const u64 second, const u64 third,
 | |
| +		 const siphash_key_t *key)
 | |
| +{
 | |
| +	PREAMBLE(24)
 | |
| +	v3 ^= first;
 | |
| +	SIPROUND;
 | |
| +	SIPROUND;
 | |
| +	v0 ^= first;
 | |
| +	v3 ^= second;
 | |
| +	SIPROUND;
 | |
| +	SIPROUND;
 | |
| +	v0 ^= second;
 | |
| +	v3 ^= third;
 | |
| +	SIPROUND;
 | |
| +	SIPROUND;
 | |
| +	v0 ^= third;
 | |
| +	POSTAMBLE
 | |
| +}
 | |
| +
 | |
| +/**
 | |
| + * siphash_4u64 - compute 64-bit siphash PRF value of 4 u64
 | |
| + * @first: first u64
 | |
| + * @second: second u64
 | |
| + * @third: third u64
 | |
| + * @forth: forth u64
 | |
| + * @key: the siphash key
 | |
| + */
 | |
| +u64 siphash_4u64(const u64 first, const u64 second, const u64 third,
 | |
| +		 const u64 forth, const siphash_key_t *key)
 | |
| +{
 | |
| +	PREAMBLE(32)
 | |
| +	v3 ^= first;
 | |
| +	SIPROUND;
 | |
| +	SIPROUND;
 | |
| +	v0 ^= first;
 | |
| +	v3 ^= second;
 | |
| +	SIPROUND;
 | |
| +	SIPROUND;
 | |
| +	v0 ^= second;
 | |
| +	v3 ^= third;
 | |
| +	SIPROUND;
 | |
| +	SIPROUND;
 | |
| +	v0 ^= third;
 | |
| +	v3 ^= forth;
 | |
| +	SIPROUND;
 | |
| +	SIPROUND;
 | |
| +	v0 ^= forth;
 | |
| +	POSTAMBLE
 | |
| +}
 | |
| +
 | |
| +u64 siphash_1u32(const u32 first, const siphash_key_t *key)
 | |
| +{
 | |
| +	PREAMBLE(4)
 | |
| +	b |= first;
 | |
| +	POSTAMBLE
 | |
| +}
 | |
| +
 | |
| +u64 siphash_3u32(const u32 first, const u32 second, const u32 third,
 | |
| +		 const siphash_key_t *key)
 | |
| +{
 | |
| +	u64 combined = (u64)second << 32 | first;
 | |
| +	PREAMBLE(12)
 | |
| +	v3 ^= combined;
 | |
| +	SIPROUND;
 | |
| +	SIPROUND;
 | |
| +	v0 ^= combined;
 | |
| +	b |= third;
 | |
| +	POSTAMBLE
 | |
| +}
 | |
| +
 | |
| +#if BITS_PER_LONG == 64
 | |
| +/* Note that on 64-bit, we make HalfSipHash1-3 actually be SipHash1-3, for
 | |
| + * performance reasons. On 32-bit, below, we actually implement HalfSipHash1-3.
 | |
| + */
 | |
| +
 | |
| +#define HSIPROUND SIPROUND
 | |
| +#define HPREAMBLE(len) PREAMBLE(len)
 | |
| +#define HPOSTAMBLE \
 | |
| +	v3 ^= b; \
 | |
| +	HSIPROUND; \
 | |
| +	v0 ^= b; \
 | |
| +	v2 ^= 0xff; \
 | |
| +	HSIPROUND; \
 | |
| +	HSIPROUND; \
 | |
| +	HSIPROUND; \
 | |
| +	return (v0 ^ v1) ^ (v2 ^ v3);
 | |
| +
 | |
| +u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
 | |
| +{
 | |
| +	const u8 *end = data + len - (len % sizeof(u64));
 | |
| +	const u8 left = len & (sizeof(u64) - 1);
 | |
| +	u64 m;
 | |
| +	HPREAMBLE(len)
 | |
| +	for (; data != end; data += sizeof(u64)) {
 | |
| +		m = le64_to_cpup(data);
 | |
| +		v3 ^= m;
 | |
| +		HSIPROUND;
 | |
| +		v0 ^= m;
 | |
| +	}
 | |
| +#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
 | |
| +	if (left)
 | |
| +		b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
 | |
| +						  bytemask_from_count(left)));
 | |
| +#else
 | |
| +	switch (left) {
 | |
| +	case 7: b |= ((u64)end[6]) << 48;
 | |
| +	case 6: b |= ((u64)end[5]) << 40;
 | |
| +	case 5: b |= ((u64)end[4]) << 32;
 | |
| +	case 4: b |= le32_to_cpup(data); break;
 | |
| +	case 3: b |= ((u64)end[2]) << 16;
 | |
| +	case 2: b |= le16_to_cpup(data); break;
 | |
| +	case 1: b |= end[0];
 | |
| +	}
 | |
| +#endif
 | |
| +	HPOSTAMBLE
 | |
| +}
 | |
| +
 | |
| +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 | |
| +u32 __hsiphash_unaligned(const void *data, size_t len,
 | |
| +			 const hsiphash_key_t *key)
 | |
| +{
 | |
| +	const u8 *end = data + len - (len % sizeof(u64));
 | |
| +	const u8 left = len & (sizeof(u64) - 1);
 | |
| +	u64 m;
 | |
| +	HPREAMBLE(len)
 | |
| +	for (; data != end; data += sizeof(u64)) {
 | |
| +		m = get_unaligned_le64(data);
 | |
| +		v3 ^= m;
 | |
| +		HSIPROUND;
 | |
| +		v0 ^= m;
 | |
| +	}
 | |
| +#if defined(CONFIG_DCACHE_WORD_ACCESS) && BITS_PER_LONG == 64
 | |
| +	if (left)
 | |
| +		b |= le64_to_cpu((__force __le64)(load_unaligned_zeropad(data) &
 | |
| +						  bytemask_from_count(left)));
 | |
| +#else
 | |
| +	switch (left) {
 | |
| +	case 7: b |= ((u64)end[6]) << 48;
 | |
| +	case 6: b |= ((u64)end[5]) << 40;
 | |
| +	case 5: b |= ((u64)end[4]) << 32;
 | |
| +	case 4: b |= get_unaligned_le32(end); break;
 | |
| +	case 3: b |= ((u64)end[2]) << 16;
 | |
| +	case 2: b |= get_unaligned_le16(end); break;
 | |
| +	case 1: b |= end[0];
 | |
| +	}
 | |
| +#endif
 | |
| +	HPOSTAMBLE
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
| +/**
 | |
| + * hsiphash_1u32 - compute 64-bit hsiphash PRF value of a u32
 | |
| + * @first: first u32
 | |
| + * @key: the hsiphash key
 | |
| + */
 | |
| +u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key)
 | |
| +{
 | |
| +	HPREAMBLE(4)
 | |
| +	b |= first;
 | |
| +	HPOSTAMBLE
 | |
| +}
 | |
| +
 | |
| +/**
 | |
| + * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32
 | |
| + * @first: first u32
 | |
| + * @second: second u32
 | |
| + * @key: the hsiphash key
 | |
| + */
 | |
| +u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key)
 | |
| +{
 | |
| +	u64 combined = (u64)second << 32 | first;
 | |
| +	HPREAMBLE(8)
 | |
| +	v3 ^= combined;
 | |
| +	HSIPROUND;
 | |
| +	v0 ^= combined;
 | |
| +	HPOSTAMBLE
 | |
| +}
 | |
| +
 | |
| +/**
 | |
| + * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32
 | |
| + * @first: first u32
 | |
| + * @second: second u32
 | |
| + * @third: third u32
 | |
| + * @key: the hsiphash key
 | |
| + */
 | |
| +u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third,
 | |
| +		  const hsiphash_key_t *key)
 | |
| +{
 | |
| +	u64 combined = (u64)second << 32 | first;
 | |
| +	HPREAMBLE(12)
 | |
| +	v3 ^= combined;
 | |
| +	HSIPROUND;
 | |
| +	v0 ^= combined;
 | |
| +	b |= third;
 | |
| +	HPOSTAMBLE
 | |
| +}
 | |
| +
 | |
| +/**
 | |
| + * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32
 | |
| + * @first: first u32
 | |
| + * @second: second u32
 | |
| + * @third: third u32
 | |
| + * @forth: forth u32
 | |
| + * @key: the hsiphash key
 | |
| + */
 | |
| +u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third,
 | |
| +		  const u32 forth, const hsiphash_key_t *key)
 | |
| +{
 | |
| +	u64 combined = (u64)second << 32 | first;
 | |
| +	HPREAMBLE(16)
 | |
| +	v3 ^= combined;
 | |
| +	HSIPROUND;
 | |
| +	v0 ^= combined;
 | |
| +	combined = (u64)forth << 32 | third;
 | |
| +	v3 ^= combined;
 | |
| +	HSIPROUND;
 | |
| +	v0 ^= combined;
 | |
| +	HPOSTAMBLE
 | |
| +}
 | |
| +#else
 | |
| +#define HSIPROUND \
 | |
| +	do { \
 | |
| +	v0 += v1; v1 = rol32(v1, 5); v1 ^= v0; v0 = rol32(v0, 16); \
 | |
| +	v2 += v3; v3 = rol32(v3, 8); v3 ^= v2; \
 | |
| +	v0 += v3; v3 = rol32(v3, 7); v3 ^= v0; \
 | |
| +	v2 += v1; v1 = rol32(v1, 13); v1 ^= v2; v2 = rol32(v2, 16); \
 | |
| +	} while (0)
 | |
| +
 | |
| +#define HPREAMBLE(len) \
 | |
| +	u32 v0 = 0; \
 | |
| +	u32 v1 = 0; \
 | |
| +	u32 v2 = 0x6c796765U; \
 | |
| +	u32 v3 = 0x74656462U; \
 | |
| +	u32 b = ((u32)(len)) << 24; \
 | |
| +	v3 ^= key->key[1]; \
 | |
| +	v2 ^= key->key[0]; \
 | |
| +	v1 ^= key->key[1]; \
 | |
| +	v0 ^= key->key[0];
 | |
| +
 | |
| +#define HPOSTAMBLE \
 | |
| +	v3 ^= b; \
 | |
| +	HSIPROUND; \
 | |
| +	v0 ^= b; \
 | |
| +	v2 ^= 0xff; \
 | |
| +	HSIPROUND; \
 | |
| +	HSIPROUND; \
 | |
| +	HSIPROUND; \
 | |
| +	return v1 ^ v3;
 | |
| +
 | |
| +u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key)
 | |
| +{
 | |
| +	const u8 *end = data + len - (len % sizeof(u32));
 | |
| +	const u8 left = len & (sizeof(u32) - 1);
 | |
| +	u32 m;
 | |
| +	HPREAMBLE(len)
 | |
| +	for (; data != end; data += sizeof(u32)) {
 | |
| +		m = le32_to_cpup(data);
 | |
| +		v3 ^= m;
 | |
| +		HSIPROUND;
 | |
| +		v0 ^= m;
 | |
| +	}
 | |
| +	switch (left) {
 | |
| +	case 3: b |= ((u32)end[2]) << 16;
 | |
| +	case 2: b |= le16_to_cpup(data); break;
 | |
| +	case 1: b |= end[0];
 | |
| +	}
 | |
| +	HPOSTAMBLE
 | |
| +}
 | |
| +
 | |
| +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 | |
| +u32 __hsiphash_unaligned(const void *data, size_t len,
 | |
| +			 const hsiphash_key_t *key)
 | |
| +{
 | |
| +	const u8 *end = data + len - (len % sizeof(u32));
 | |
| +	const u8 left = len & (sizeof(u32) - 1);
 | |
| +	u32 m;
 | |
| +	HPREAMBLE(len)
 | |
| +	for (; data != end; data += sizeof(u32)) {
 | |
| +		m = get_unaligned_le32(data);
 | |
| +		v3 ^= m;
 | |
| +		HSIPROUND;
 | |
| +		v0 ^= m;
 | |
| +	}
 | |
| +	switch (left) {
 | |
| +	case 3: b |= ((u32)end[2]) << 16;
 | |
| +	case 2: b |= get_unaligned_le16(end); break;
 | |
| +	case 1: b |= end[0];
 | |
| +	}
 | |
| +	HPOSTAMBLE
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
| +/**
 | |
| + * hsiphash_1u32 - compute 32-bit hsiphash PRF value of a u32
 | |
| + * @first: first u32
 | |
| + * @key: the hsiphash key
 | |
| + */
 | |
| +u32 hsiphash_1u32(const u32 first, const hsiphash_key_t *key)
 | |
| +{
 | |
| +	HPREAMBLE(4)
 | |
| +	v3 ^= first;
 | |
| +	HSIPROUND;
 | |
| +	v0 ^= first;
 | |
| +	HPOSTAMBLE
 | |
| +}
 | |
| +
 | |
| +/**
 | |
| + * hsiphash_2u32 - compute 32-bit hsiphash PRF value of 2 u32
 | |
| + * @first: first u32
 | |
| + * @second: second u32
 | |
| + * @key: the hsiphash key
 | |
| + */
 | |
| +u32 hsiphash_2u32(const u32 first, const u32 second, const hsiphash_key_t *key)
 | |
| +{
 | |
| +	HPREAMBLE(8)
 | |
| +	v3 ^= first;
 | |
| +	HSIPROUND;
 | |
| +	v0 ^= first;
 | |
| +	v3 ^= second;
 | |
| +	HSIPROUND;
 | |
| +	v0 ^= second;
 | |
| +	HPOSTAMBLE
 | |
| +}
 | |
| +
 | |
| +/**
 | |
| + * hsiphash_3u32 - compute 32-bit hsiphash PRF value of 3 u32
 | |
| + * @first: first u32
 | |
| + * @second: second u32
 | |
| + * @third: third u32
 | |
| + * @key: the hsiphash key
 | |
| + */
 | |
| +u32 hsiphash_3u32(const u32 first, const u32 second, const u32 third,
 | |
| +		  const hsiphash_key_t *key)
 | |
| +{
 | |
| +	HPREAMBLE(12)
 | |
| +	v3 ^= first;
 | |
| +	HSIPROUND;
 | |
| +	v0 ^= first;
 | |
| +	v3 ^= second;
 | |
| +	HSIPROUND;
 | |
| +	v0 ^= second;
 | |
| +	v3 ^= third;
 | |
| +	HSIPROUND;
 | |
| +	v0 ^= third;
 | |
| +	HPOSTAMBLE
 | |
| +}
 | |
| +
 | |
| +/**
 | |
| + * hsiphash_4u32 - compute 32-bit hsiphash PRF value of 4 u32
 | |
| + * @first: first u32
 | |
| + * @second: second u32
 | |
| + * @third: third u32
 | |
| + * @forth: forth u32
 | |
| + * @key: the hsiphash key
 | |
| + */
 | |
| +u32 hsiphash_4u32(const u32 first, const u32 second, const u32 third,
 | |
| +		  const u32 forth, const hsiphash_key_t *key)
 | |
| +{
 | |
| +	HPREAMBLE(16)
 | |
| +	v3 ^= first;
 | |
| +	HSIPROUND;
 | |
| +	v0 ^= first;
 | |
| +	v3 ^= second;
 | |
| +	HSIPROUND;
 | |
| +	v0 ^= second;
 | |
| +	v3 ^= third;
 | |
| +	HSIPROUND;
 | |
| +	v0 ^= third;
 | |
| +	v3 ^= forth;
 | |
| +	HSIPROUND;
 | |
| +	v0 ^= forth;
 | |
| +	HPOSTAMBLE
 | |
| +}
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/compat/Makefile.include	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,41 @@
 | |
| +ifeq ($(wildcard $(src)/compat/compat.h),)
 | |
| +ccflags-y += -include $(srctree)/$(src)/compat/compat.h
 | |
| +else
 | |
| +ccflags-y += -include $(src)/compat/compat.h
 | |
| +endif
 | |
| +
 | |
| +ifeq ($(wildcard $(srctree)/include/linux/siphash.h),)
 | |
| +ccflags-y += -I$(src)/compat/siphash/include
 | |
| +wireguard-y += compat/siphash/siphash.o
 | |
| +endif
 | |
| +
 | |
| +ifeq ($(wildcard $(srctree)/include/net/dst_cache.h),)
 | |
| +ccflags-y += -I$(src)/compat/dst_cache/include
 | |
| +wireguard-y += compat/dst_cache/dst_cache.o
 | |
| +endif
 | |
| +
 | |
| +ifeq ($(wildcard $(srctree)/arch/x86/include/asm/fpu/api.h),)
 | |
| +ccflags-y += -I$(src)/compat/fpu/include
 | |
| +endif
 | |
| +
 | |
| +ifeq ($(wildcard $(srctree)/arch/x86/include/asm/simd.h),)
 | |
| +ccflags-y += -I$(src)/compat/simd/include
 | |
| +endif
 | |
| +
 | |
| +ifeq ($(wildcard $(srctree)/include/net/udp_tunnel.h),)
 | |
| +ccflags-y += -I$(src)/compat/udp_tunnel/include
 | |
| +wireguard-y += compat/udp_tunnel/udp_tunnel.o
 | |
| +endif
 | |
| +
 | |
| +ifeq ($(shell grep -F "int crypto_memneq" "$(srctree)/include/crypto/algapi.h"),)
 | |
| +ccflags-y += -include $(src)/compat/memneq/include.h
 | |
| +wireguard-y += compat/memneq/memneq.o
 | |
| +endif
 | |
| +
 | |
| +ifneq ($(KBUILD_EXTMOD),)
 | |
| +ifneq ($(CONFIG_SMP),)
 | |
| +ifeq (,$(filter $(CONFIG_PADATA),y m))
 | |
| +wireguard-y += compat/padata/padata.o
 | |
| +endif
 | |
| +endif
 | |
| +endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/compat/udp_tunnel/udp_tunnel.c	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,380 @@
 | |
| +#include <linux/module.h>
 | |
| +#include <linux/errno.h>
 | |
| +#include <linux/socket.h>
 | |
| +#include <linux/udp.h>
 | |
| +#include <linux/types.h>
 | |
| +#include <linux/kernel.h>
 | |
| +#include <net/net_namespace.h>
 | |
| +#include <net/inet_common.h>
 | |
| +#include <net/udp.h>
 | |
| +#include <net/udp_tunnel.h>
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
 | |
| +#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
 | |
| +#define rcu_dereference_sk_user_data(sk) rcu_dereference(__sk_user_data((sk)))
 | |
| +#define rcu_assign_sk_user_data(sk, ptr) rcu_assign_pointer(__sk_user_data((sk)), ptr)
 | |
| +#endif
 | |
| +
 | |
| +/* This is global so, uh, only one real call site... This is the kind of horrific hack you'd expect to see in compat code. */
 | |
| +static udp_tunnel_encap_rcv_t encap_rcv = NULL;
 | |
| +static void our_sk_data_ready(struct sock *sk
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0)
 | |
| +			      ,int unused_vulnerable_length_param
 | |
| +#endif
 | |
| +			      )
 | |
| +{
 | |
| +	struct sk_buff *skb;
 | |
| +	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 | |
| +		skb_orphan(skb);
 | |
| +		sk_mem_reclaim(sk);
 | |
| +		encap_rcv(sk, skb);
 | |
| +	}
 | |
| +}
 | |
| +
 | |
| +int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
 | |
| +		     struct socket **sockp)
 | |
| +{
 | |
| +	int err;
 | |
| +	struct socket *sock = NULL;
 | |
| +	struct sockaddr_in udp_addr;
 | |
| +
 | |
| +	err = __sock_create(net, AF_INET, SOCK_DGRAM, 0, &sock, 1);
 | |
| +	if (err < 0)
 | |
| +		goto error;
 | |
| +
 | |
| +	udp_addr.sin_family = AF_INET;
 | |
| +	udp_addr.sin_addr = cfg->local_ip;
 | |
| +	udp_addr.sin_port = cfg->local_udp_port;
 | |
| +	err = kernel_bind(sock, (struct sockaddr *)&udp_addr,
 | |
| +			  sizeof(udp_addr));
 | |
| +	if (err < 0)
 | |
| +		goto error;
 | |
| +
 | |
| +	if (cfg->peer_udp_port) {
 | |
| +		udp_addr.sin_family = AF_INET;
 | |
| +		udp_addr.sin_addr = cfg->peer_ip;
 | |
| +		udp_addr.sin_port = cfg->peer_udp_port;
 | |
| +		err = kernel_connect(sock, (struct sockaddr *)&udp_addr,
 | |
| +				     sizeof(udp_addr), 0);
 | |
| +		if (err < 0)
 | |
| +			goto error;
 | |
| +	}
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)
 | |
| +	sock->sk->sk_no_check = !cfg->use_udp_checksums;
 | |
| +#else
 | |
| +	sock->sk->sk_no_check_tx = !cfg->use_udp_checksums;
 | |
| +#endif
 | |
| +
 | |
| +	*sockp = sock;
 | |
| +	return 0;
 | |
| +
 | |
| +error:
 | |
| +	if (sock) {
 | |
| +		kernel_sock_shutdown(sock, SHUT_RDWR);
 | |
| +		sock_release(sock);
 | |
| +	}
 | |
| +	*sockp = NULL;
 | |
| +	return err;
 | |
| +}
 | |
| +
 | |
| +void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
 | |
| +			   struct udp_tunnel_sock_cfg *cfg)
 | |
| +{
 | |
| +	inet_sk(sock->sk)->mc_loop = 0;
 | |
| +	encap_rcv = cfg->encap_rcv;
 | |
| +	rcu_assign_sk_user_data(sock->sk, cfg->sk_user_data);
 | |
| +	sock->sk->sk_data_ready = our_sk_data_ready;
 | |
| +}
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)
 | |
| +static inline __sum16 udp_v4_check(int len, __be32 saddr,
 | |
| +				   __be32 daddr, __wsum base)
 | |
| +{
 | |
| +	return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base);
 | |
| +}
 | |
| +
 | |
| +static void udp_set_csum(bool nocheck, struct sk_buff *skb,
 | |
| +		  __be32 saddr, __be32 daddr, int len)
 | |
| +{
 | |
| +	struct udphdr *uh = udp_hdr(skb);
 | |
| +
 | |
| +	if (nocheck)
 | |
| +		uh->check = 0;
 | |
| +	else if (skb_is_gso(skb))
 | |
| +		uh->check = ~udp_v4_check(len, saddr, daddr, 0);
 | |
| +	else if (skb_dst(skb) && skb_dst(skb)->dev &&
 | |
| +		 (skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
 | |
| +
 | |
| +		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
 | |
| +
 | |
| +		skb->ip_summed = CHECKSUM_PARTIAL;
 | |
| +		skb->csum_start = skb_transport_header(skb) - skb->head;
 | |
| +		skb->csum_offset = offsetof(struct udphdr, check);
 | |
| +		uh->check = ~udp_v4_check(len, saddr, daddr, 0);
 | |
| +	} else {
 | |
| +		__wsum csum;
 | |
| +
 | |
| +		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
 | |
| +
 | |
| +		uh->check = 0;
 | |
| +		csum = skb_checksum(skb, 0, len, 0);
 | |
| +		uh->check = udp_v4_check(len, saddr, daddr, csum);
 | |
| +		if (uh->check == 0)
 | |
| +			uh->check = CSUM_MANGLED_0;
 | |
| +
 | |
| +		skb->ip_summed = CHECKSUM_UNNECESSARY;
 | |
| +	}
 | |
| +}
 | |
| +
 | |
| +#endif
 | |
| +
 | |
| +static void fake_destructor(struct sk_buff *skb)
 | |
| +{
 | |
| +}
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0)
 | |
| +static void our_iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
 | |
| +		  __be32 src, __be32 dst, __u8 proto,
 | |
| +		  __u8 tos, __u8 ttl, __be16 df, bool xnet)
 | |
| +{
 | |
| +	struct iphdr *iph;
 | |
| +
 | |
| +	skb_scrub_packet(skb, xnet);
 | |
| +
 | |
| +	skb->rxhash = 0;
 | |
| +	skb_dst_set(skb, &rt->dst);
 | |
| +	memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
 | |
| +
 | |
| +	/* Push down and install the IP header. */
 | |
| +	skb_push(skb, sizeof(struct iphdr));
 | |
| +	skb_reset_network_header(skb);
 | |
| +
 | |
| +	iph = ip_hdr(skb);
 | |
| +
 | |
| +	iph->version	=	4;
 | |
| +	iph->ihl	=	sizeof(struct iphdr) >> 2;
 | |
| +	iph->frag_off	=	df;
 | |
| +	iph->protocol	=	proto;
 | |
| +	iph->tos	=	tos;
 | |
| +	iph->daddr	=	dst;
 | |
| +	iph->saddr	=	src;
 | |
| +	iph->ttl	=	ttl;
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 53)
 | |
| +	__ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
 | |
| +#else
 | |
| +	__ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1);
 | |
| +#endif
 | |
| +
 | |
| +	iptunnel_xmit(skb, skb->dev);
 | |
| +}
 | |
| +#define iptunnel_xmit our_iptunnel_xmit
 | |
| +#endif
 | |
| +
 | |
| +void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
 | |
| +			 __be32 src, __be32 dst, __u8 tos, __u8 ttl,
 | |
| +			 __be16 df, __be16 src_port, __be16 dst_port,
 | |
| +			 bool xnet, bool nocheck)
 | |
| +{
 | |
| +	struct udphdr *uh;
 | |
| +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)
 | |
| +	struct net_device *dev = skb->dev;
 | |
| +	int ret;
 | |
| +#endif
 | |
| +
 | |
| +	__skb_push(skb, sizeof(*uh));
 | |
| +	skb_reset_transport_header(skb);
 | |
| +	uh = udp_hdr(skb);
 | |
| +
 | |
| +	uh->dest = dst_port;
 | |
| +	uh->source = src_port;
 | |
| +	uh->len = htons(skb->len);
 | |
| +
 | |
| +	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 | |
| +
 | |
| +	udp_set_csum(nocheck, skb, src, dst, skb->len);
 | |
| +
 | |
| +	if (!skb->sk)
 | |
| +		skb->sk = sk;
 | |
| +	if (!skb->destructor)
 | |
| +		skb->destructor = fake_destructor;
 | |
| +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)
 | |
| +	ret =
 | |
| +#endif
 | |
| +	     iptunnel_xmit(
 | |
| +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)
 | |
| +			   sk,
 | |
| +#endif
 | |
| +			   rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet);
 | |
| +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)
 | |
| +	iptunnel_xmit_stats(ret, &dev->stats, dev->tstats);
 | |
| +#endif
 | |
| +}
 | |
| +
 | |
| +void udp_tunnel_sock_release(struct socket *sock)
 | |
| +{
 | |
| +	rcu_assign_sk_user_data(sock->sk, NULL);
 | |
| +	kernel_sock_shutdown(sock, SHUT_RDWR);
 | |
| +	sock_release(sock);
 | |
| +}
 | |
| +
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +#include <linux/module.h>
 | |
| +#include <linux/errno.h>
 | |
| +#include <linux/socket.h>
 | |
| +#include <linux/udp.h>
 | |
| +#include <linux/types.h>
 | |
| +#include <linux/kernel.h>
 | |
| +#include <linux/in6.h>
 | |
| +#include <net/udp.h>
 | |
| +#include <net/udp_tunnel.h>
 | |
| +#include <net/net_namespace.h>
 | |
| +#include <net/netns/generic.h>
 | |
| +#include <net/ip6_tunnel.h>
 | |
| +#include <net/ip6_checksum.h>
 | |
| +
 | |
| +int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
 | |
| +		     struct socket **sockp)
 | |
| +{
 | |
| +	struct sockaddr_in6 udp6_addr;
 | |
| +	int err;
 | |
| +	struct socket *sock = NULL;
 | |
| +
 | |
| +	err = __sock_create(net, AF_INET6, SOCK_DGRAM, 0, &sock, 1);
 | |
| +	if (err < 0)
 | |
| +		goto error;
 | |
| +
 | |
| +	if (cfg->ipv6_v6only) {
 | |
| +		int val = 1;
 | |
| +
 | |
| +		err = kernel_setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY,
 | |
| +					(char *) &val, sizeof(val));
 | |
| +		if (err < 0)
 | |
| +			goto error;
 | |
| +	}
 | |
| +
 | |
| +	udp6_addr.sin6_family = AF_INET6;
 | |
| +	memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6,
 | |
| +	       sizeof(udp6_addr.sin6_addr));
 | |
| +	udp6_addr.sin6_port = cfg->local_udp_port;
 | |
| +	err = kernel_bind(sock, (struct sockaddr *)&udp6_addr,
 | |
| +			  sizeof(udp6_addr));
 | |
| +	if (err < 0)
 | |
| +		goto error;
 | |
| +
 | |
| +	if (cfg->peer_udp_port) {
 | |
| +		udp6_addr.sin6_family = AF_INET6;
 | |
| +		memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6,
 | |
| +		       sizeof(udp6_addr.sin6_addr));
 | |
| +		udp6_addr.sin6_port = cfg->peer_udp_port;
 | |
| +		err = kernel_connect(sock,
 | |
| +				     (struct sockaddr *)&udp6_addr,
 | |
| +				     sizeof(udp6_addr), 0);
 | |
| +	}
 | |
| +	if (err < 0)
 | |
| +		goto error;
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)
 | |
| +	sock->sk->sk_no_check = !cfg->use_udp_checksums;
 | |
| +#else
 | |
| +	udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums);
 | |
| +	udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums);
 | |
| +#endif
 | |
| +
 | |
| +	*sockp = sock;
 | |
| +	return 0;
 | |
| +
 | |
| +error:
 | |
| +	if (sock) {
 | |
| +		kernel_sock_shutdown(sock, SHUT_RDWR);
 | |
| +		sock_release(sock);
 | |
| +	}
 | |
| +	*sockp = NULL;
 | |
| +	return err;
 | |
| +}
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 0)
 | |
| +static inline __sum16 udp_v6_check(int len,
 | |
| +		const struct in6_addr *saddr,
 | |
| +		const struct in6_addr *daddr,
 | |
| +		__wsum base)
 | |
| +{
 | |
| +	return csum_ipv6_magic(saddr, daddr, len, IPPROTO_UDP, base);
 | |
| +}
 | |
| +static void udp6_set_csum(bool nocheck, struct sk_buff *skb,
 | |
| +		   const struct in6_addr *saddr,
 | |
| +		   const struct in6_addr *daddr, int len)
 | |
| +{
 | |
| +	struct udphdr *uh = udp_hdr(skb);
 | |
| +
 | |
| +	if (nocheck)
 | |
| +		uh->check = 0;
 | |
| +	else if (skb_is_gso(skb))
 | |
| +		uh->check = ~udp_v6_check(len, saddr, daddr, 0);
 | |
| +	else if (skb_dst(skb) && skb_dst(skb)->dev &&
 | |
| +		 (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) {
 | |
| +
 | |
| +		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
 | |
| +
 | |
| +		skb->ip_summed = CHECKSUM_PARTIAL;
 | |
| +		skb->csum_start = skb_transport_header(skb) - skb->head;
 | |
| +		skb->csum_offset = offsetof(struct udphdr, check);
 | |
| +		uh->check = ~udp_v6_check(len, saddr, daddr, 0);
 | |
| +	} else {
 | |
| +		__wsum csum;
 | |
| +
 | |
| +		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
 | |
| +
 | |
| +		uh->check = 0;
 | |
| +		csum = skb_checksum(skb, 0, len, 0);
 | |
| +		uh->check = udp_v6_check(len, saddr, daddr, csum);
 | |
| +		if (uh->check == 0)
 | |
| +			uh->check = CSUM_MANGLED_0;
 | |
| +
 | |
| +		skb->ip_summed = CHECKSUM_UNNECESSARY;
 | |
| +	}
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
| +int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
 | |
| +			 struct sk_buff *skb,
 | |
| +			 struct net_device *dev, struct in6_addr *saddr,
 | |
| +			 struct in6_addr *daddr,
 | |
| +			 __u8 prio, __u8 ttl, __be32 label,
 | |
| +			 __be16 src_port, __be16 dst_port, bool nocheck)
 | |
| +{
 | |
| +	struct udphdr *uh;
 | |
| +	struct ipv6hdr *ip6h;
 | |
| +
 | |
| +	__skb_push(skb, sizeof(*uh));
 | |
| +	skb_reset_transport_header(skb);
 | |
| +	uh = udp_hdr(skb);
 | |
| +
 | |
| +	uh->dest = dst_port;
 | |
| +	uh->source = src_port;
 | |
| +
 | |
| +	uh->len = htons(skb->len);
 | |
| +
 | |
| +	skb_dst_set(skb, dst);
 | |
| +
 | |
| +	udp6_set_csum(nocheck, skb, saddr, daddr, skb->len);
 | |
| +
 | |
| +	__skb_push(skb, sizeof(*ip6h));
 | |
| +	skb_reset_network_header(skb);
 | |
| +	ip6h		  = ipv6_hdr(skb);
 | |
| +	ip6_flow_hdr(ip6h, prio, label);
 | |
| +	ip6h->payload_len = htons(skb->len);
 | |
| +	ip6h->nexthdr     = IPPROTO_UDP;
 | |
| +	ip6h->hop_limit   = ttl;
 | |
| +	ip6h->daddr	  = *daddr;
 | |
| +	ip6h->saddr	  = *saddr;
 | |
| +
 | |
| +	if (!skb->sk)
 | |
| +		skb->sk = sk;
 | |
| +	if (!skb->destructor)
 | |
| +		skb->destructor = fake_destructor;
 | |
| +
 | |
| +	ip6tunnel_xmit(skb, dev);
 | |
| +	return 0;
 | |
| +}
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/compat/udp_tunnel/udp_tunnel_partial_compat.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,215 @@
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
 | |
| +#define udp_sock_create4 udp_sock_create
 | |
| +#define udp_sock_create6 udp_sock_create
 | |
| +#include <linux/socket.h>
 | |
| +#include <linux/if.h>
 | |
| +#include <linux/in.h>
 | |
| +#include <net/ip_tunnels.h>
 | |
| +#include <net/udp.h>
 | |
| +#include <net/inet_common.h>
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +#include <linux/in6.h>
 | |
| +#include <net/ipv6.h>
 | |
| +#include <net/addrconf.h>
 | |
| +#include <net/ip6_checksum.h>
 | |
| +#include <net/ip6_tunnel.h>
 | |
| +#endif
 | |
| +static inline void fake_destructor(struct sk_buff *skb)
 | |
| +{
 | |
| +}
 | |
| +typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
 | |
| +struct udp_tunnel_sock_cfg {
 | |
| +        void *sk_user_data;
 | |
| +        __u8  encap_type;
 | |
| +        udp_tunnel_encap_rcv_t encap_rcv;
 | |
| +};
 | |
| +/* This is global so, uh, only one real call site... This is the kind of horrific hack you'd expect to see in compat code. */
 | |
| +static udp_tunnel_encap_rcv_t encap_rcv = NULL;
 | |
| +static void our_sk_data_ready(struct sock *sk)
 | |
| +{
 | |
| +	struct sk_buff *skb;
 | |
| +	while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 | |
| +		skb_orphan(skb);
 | |
| +		sk_mem_reclaim(sk);
 | |
| +		encap_rcv(sk, skb);
 | |
| +	}
 | |
| +}
 | |
| +static inline void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
 | |
| +                           struct udp_tunnel_sock_cfg *cfg)
 | |
| +{
 | |
| +	struct sock *sk = sock->sk;
 | |
| +	inet_sk(sk)->mc_loop = 0;
 | |
| +	encap_rcv = cfg->encap_rcv;
 | |
| +	rcu_assign_sk_user_data(sk, cfg->sk_user_data);
 | |
| +	sk->sk_data_ready = our_sk_data_ready;
 | |
| +}
 | |
| +static inline void udp_tunnel_sock_release(struct socket *sock)
 | |
| +{
 | |
| +	rcu_assign_sk_user_data(sock->sk, NULL);
 | |
| +	kernel_sock_shutdown(sock, SHUT_RDWR);
 | |
| +	sk_release_kernel(sock->sk);
 | |
| +}
 | |
| +static inline int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
 | |
| +                        struct sk_buff *skb, __be32 src, __be32 dst,
 | |
| +                        __u8 tos, __u8 ttl, __be16 df, __be16 src_port,
 | |
| +                        __be16 dst_port, bool xnet)
 | |
| +{
 | |
| +	struct udphdr *uh;
 | |
| +	__skb_push(skb, sizeof(*uh));
 | |
| +	skb_reset_transport_header(skb);
 | |
| +	uh = udp_hdr(skb);
 | |
| +	uh->dest = dst_port;
 | |
| +	uh->source = src_port;
 | |
| +	uh->len = htons(skb->len);
 | |
| +	udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len);
 | |
| +	return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP,
 | |
| +			     tos, ttl, df, xnet);
 | |
| +}
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +static inline int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
 | |
| +                         struct sk_buff *skb, struct net_device *dev,
 | |
| +                         struct in6_addr *saddr, struct in6_addr *daddr,
 | |
| +                         __u8 prio, __u8 ttl, __be16 src_port,
 | |
| +                         __be16 dst_port)
 | |
| +{
 | |
| +	struct udphdr *uh;
 | |
| +	struct ipv6hdr *ip6h;
 | |
| +	struct sock *sk = sock->sk;
 | |
| +	__skb_push(skb, sizeof(*uh));
 | |
| +	skb_reset_transport_header(skb);
 | |
| +	uh = udp_hdr(skb);
 | |
| +	uh->dest = dst_port;
 | |
| +	uh->source = src_port;
 | |
| +	uh->len = htons(skb->len);
 | |
| +	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 | |
| +	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED
 | |
| +			    | IPSKB_REROUTED);
 | |
| +	skb_dst_set(skb, dst);
 | |
| +	udp6_set_csum(udp_get_no_check6_tx(sk), skb, &inet6_sk(sk)->saddr,
 | |
| +	              &sk->sk_v6_daddr, skb->len);
 | |
| +	__skb_push(skb, sizeof(*ip6h));
 | |
| +	skb_reset_network_header(skb);
 | |
| +	ip6h		  = ipv6_hdr(skb);
 | |
| +	ip6_flow_hdr(ip6h, prio, htonl(0));
 | |
| +	ip6h->payload_len = htons(skb->len);
 | |
| +	ip6h->nexthdr     = IPPROTO_UDP;
 | |
| +	ip6h->hop_limit   = ttl;
 | |
| +	ip6h->daddr	  = *daddr;
 | |
| +	ip6h->saddr	  = *saddr;
 | |
| +	ip6tunnel_xmit(skb, dev);
 | |
| +	return 0;
 | |
| +}
 | |
| +#endif
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
 | |
| +#include <linux/in.h>
 | |
| +#include <linux/in6.h>
 | |
| +#include <linux/udp.h>
 | |
| +#include <linux/skbuff.h>
 | |
| +#include <linux/if.h>
 | |
| +#include <net/udp_tunnel.h>
 | |
| +#define udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { struct net_device *dev__ = (c)->dev; int ret__; ret__ = udp_tunnel_xmit_skb((b)->sk_socket, a, c, d, e, f, g, h, i, j, k); iptunnel_xmit_stats(ret__, &dev__->stats, dev__->tstats); } while (0)
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +#define udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) udp_tunnel6_xmit_skb((b)->sk_socket, a, c, d, e, f, g, h, j, k);
 | |
| +#endif
 | |
| +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
 | |
| +#include <linux/if.h>
 | |
| +#include <net/udp_tunnel.h>
 | |
| +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 18, 0)
 | |
| +static inline void fake_destructor(struct sk_buff *skb)
 | |
| +{
 | |
| +}
 | |
| +#endif
 | |
| +#define udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { struct net_device *dev__ = (c)->dev; int ret__; if (!(c)->destructor) (c)->destructor = fake_destructor; if (!(c)->sk) (c)->sk = (b); ret__ = udp_tunnel_xmit_skb(a, c, d, e, f, g, h, i, j, k, l); iptunnel_xmit_stats(ret__, &dev__->stats, dev__->tstats); } while (0)
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +#define udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { if (!(c)->destructor) (c)->destructor = fake_destructor; if (!(c)->sk) (c)->sk = (b); udp_tunnel6_xmit_skb(a, c, d, e, f, g, h, j, k, l); } while(0)
 | |
| +#endif
 | |
| +#else
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
 | |
| +#include <linux/if.h>
 | |
| +#include <net/udp_tunnel.h>
 | |
| +#define udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) do { struct net_device *dev__ = (c)->dev; int ret__ = udp_tunnel_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l);  iptunnel_xmit_stats(ret__, &dev__->stats, dev__->tstats); } while (0)
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 6, 0) && IS_ENABLED(CONFIG_IPV6) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
 | |
| +#include <linux/if.h>
 | |
| +#include <net/udp_tunnel.h>
 | |
| +#define udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, i, j, k, l) udp_tunnel6_xmit_skb(a, b, c, d, e, f, g, h, j, k, l)
 | |
| +#endif
 | |
| +
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
 | |
| +#include <linux/skbuff.h>
 | |
| +#include <linux/if.h>
 | |
| +#include <net/udp_tunnel.h>
 | |
| +struct udp_port_cfg_new {
 | |
| +	u8 family;
 | |
| +	union {
 | |
| +		struct in_addr local_ip;
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +		struct in6_addr local_ip6;
 | |
| +#endif
 | |
| +	};
 | |
| +	union {
 | |
| +		struct in_addr peer_ip;
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +		struct in6_addr peer_ip6;
 | |
| +#endif
 | |
| +	};
 | |
| +	__be16 local_udp_port;
 | |
| +	__be16 peer_udp_port;
 | |
| +	unsigned int use_udp_checksums:1, use_udp6_tx_checksums:1, use_udp6_rx_checksums:1, ipv6_v6only:1;
 | |
| +};
 | |
| +static inline int __maybe_unused udp_sock_create_new(struct net *net, struct udp_port_cfg_new *cfg, struct socket **sockp)
 | |
| +{
 | |
| +	struct udp_port_cfg old_cfg = {
 | |
| +		.family = cfg->family,
 | |
| +		.local_ip = cfg->local_ip,
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +		.local_ip6 = cfg->local_ip6,
 | |
| +#endif
 | |
| +		.peer_ip = cfg->peer_ip,
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +		.peer_ip6 = cfg->peer_ip6,
 | |
| +#endif
 | |
| +		.local_udp_port = cfg->local_udp_port,
 | |
| +		.peer_udp_port = cfg->peer_udp_port,
 | |
| +		.use_udp_checksums = cfg->use_udp_checksums,
 | |
| +		.use_udp6_tx_checksums = cfg->use_udp6_tx_checksums,
 | |
| +		.use_udp6_rx_checksums = cfg->use_udp6_rx_checksums
 | |
| +	};
 | |
| +	if (cfg->family == AF_INET)
 | |
| +		return udp_sock_create4(net, &old_cfg, sockp);
 | |
| +
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +	if (cfg->family == AF_INET6) {
 | |
| +		int ret;
 | |
| +		int old_bindv6only;
 | |
| +		struct net *nobns;
 | |
| +
 | |
| +		if (cfg->ipv6_v6only) {
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0)
 | |
| +			nobns = &init_net;
 | |
| +#else
 | |
| +			nobns = net;
 | |
| +#endif
 | |
| +			/* Since udp_port_cfg only learned of ipv6_v6only in 4.3, we do this horrible
 | |
| +			 * hack here and set the sysctl variable temporarily to something that will
 | |
| +			 * set the right option for us in sock_create. It's super racey! */
 | |
| +			old_bindv6only = nobns->ipv6.sysctl.bindv6only;
 | |
| +			nobns->ipv6.sysctl.bindv6only = 1;
 | |
| +		}
 | |
| +		ret = udp_sock_create6(net, &old_cfg, sockp);
 | |
| +		if (cfg->ipv6_v6only)
 | |
| +			nobns->ipv6.sysctl.bindv6only = old_bindv6only;
 | |
| +		return ret;
 | |
| +	}
 | |
| +#endif
 | |
| +	return -EPFNOSUPPORT;
 | |
| +}
 | |
| +#define udp_port_cfg udp_port_cfg_new
 | |
| +#define udp_sock_create(a, b, c) udp_sock_create_new(a, b, c)
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/compat/udp_tunnel/include/net/udp_tunnel.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,94 @@
 | |
| +#ifndef __NET_UDP_TUNNEL_H
 | |
| +#define __NET_UDP_TUNNEL_H
 | |
| +
 | |
| +#include <net/ip_tunnels.h>
 | |
| +#include <net/udp.h>
 | |
| +
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +#include <net/ipv6.h>
 | |
| +#include <net/addrconf.h>
 | |
| +#endif
 | |
| +
 | |
| +struct udp_port_cfg {
 | |
| +	u8			family;
 | |
| +
 | |
| +	/* Used only for kernel-created sockets */
 | |
| +	union {
 | |
| +		struct in_addr		local_ip;
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +		struct in6_addr		local_ip6;
 | |
| +#endif
 | |
| +	};
 | |
| +
 | |
| +	union {
 | |
| +		struct in_addr		peer_ip;
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +		struct in6_addr		peer_ip6;
 | |
| +#endif
 | |
| +	};
 | |
| +
 | |
| +	__be16			local_udp_port;
 | |
| +	__be16			peer_udp_port;
 | |
| +	unsigned int		use_udp_checksums:1,
 | |
| +				use_udp6_tx_checksums:1,
 | |
| +				use_udp6_rx_checksums:1,
 | |
| +				ipv6_v6only:1;
 | |
| +};
 | |
| +
 | |
| +int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
 | |
| +		     struct socket **sockp);
 | |
| +
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
 | |
| +		     struct socket **sockp);
 | |
| +#else
 | |
| +static inline int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
 | |
| +				   struct socket **sockp)
 | |
| +{
 | |
| +	return 0;
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
| +static inline int udp_sock_create(struct net *net,
 | |
| +				  struct udp_port_cfg *cfg,
 | |
| +				  struct socket **sockp)
 | |
| +{
 | |
| +	if (cfg->family == AF_INET)
 | |
| +		return udp_sock_create4(net, cfg, sockp);
 | |
| +
 | |
| +	if (cfg->family == AF_INET6)
 | |
| +		return udp_sock_create6(net, cfg, sockp);
 | |
| +
 | |
| +	return -EPFNOSUPPORT;
 | |
| +}
 | |
| +
 | |
| +typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
 | |
| +
 | |
| +struct udp_tunnel_sock_cfg {
 | |
| +	void *sk_user_data;
 | |
| +	__u8  encap_type;
 | |
| +	udp_tunnel_encap_rcv_t encap_rcv;
 | |
| +};
 | |
| +
 | |
| +/* Setup the given (UDP) sock to receive UDP encapsulated packets */
 | |
| +void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
 | |
| +			   struct udp_tunnel_sock_cfg *sock_cfg);
 | |
| +
 | |
| +/* Transmit the skb using UDP encapsulation. */
 | |
| +void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
 | |
| +			 __be32 src, __be32 dst, __u8 tos, __u8 ttl,
 | |
| +			 __be16 df, __be16 src_port, __be16 dst_port,
 | |
| +			 bool xnet, bool nocheck);
 | |
| +
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk,
 | |
| +			 struct sk_buff *skb,
 | |
| +			 struct net_device *dev, struct in6_addr *saddr,
 | |
| +			 struct in6_addr *daddr,
 | |
| +			 __u8 prio, __u8 ttl, __be32 label,
 | |
| +			 __be16 src_port, __be16 dst_port, bool nocheck);
 | |
| +#endif
 | |
| +
 | |
| +void udp_tunnel_sock_release(struct socket *sock);
 | |
| +
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/compat/memneq/include.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,5 @@
 | |
| +extern noinline unsigned long __crypto_memneq(const void *a, const void *b, size_t size);
 | |
| +static inline int crypto_memneq(const void *a, const void *b, size_t size)
 | |
| +{
 | |
| +	return __crypto_memneq(a, b, size) != 0UL ? 1 : 0;
 | |
| +}
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/compat/memneq/memneq.c	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,170 @@
 | |
| +/*
 | |
| + * Constant-time equality testing of memory regions.
 | |
| + *
 | |
| + * Authors:
 | |
| + *
 | |
| + *   James Yonan <james@openvpn.net>
 | |
| + *   Daniel Borkmann <dborkman@redhat.com>
 | |
| + *
 | |
| + * This file is provided under a dual BSD/GPLv2 license.  When using or
 | |
| + * redistributing this file, you may do so under either license.
 | |
| + *
 | |
| + * GPL LICENSE SUMMARY
 | |
| + *
 | |
| + * Copyright(c) 2013 OpenVPN Technologies, Inc. All rights reserved.
 | |
| + *
 | |
| + * This program is free software; you can redistribute it and/or modify
 | |
| + * it under the terms of version 2 of the GNU General Public License as
 | |
| + * published by the Free Software Foundation.
 | |
| + *
 | |
| + * This program is distributed in the hope that it will be useful, but
 | |
| + * WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
| + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
| + * General Public License for more details.
 | |
| + *
 | |
| + * You should have received a copy of the GNU General Public License
 | |
| + * along with this program; if not, write to the Free Software
 | |
| + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 | |
| + * The full GNU General Public License is included in this distribution
 | |
| + * in the file called LICENSE.GPL.
 | |
| + *
 | |
| + * BSD LICENSE
 | |
| + *
 | |
| + * Copyright(c) 2013 OpenVPN Technologies, Inc. All rights reserved.
 | |
| + *
 | |
| + * Redistribution and use in source and binary forms, with or without
 | |
| + * modification, are permitted provided that the following conditions
 | |
| + * are met:
 | |
| + *
 | |
| + *   * Redistributions of source code must retain the above copyright
 | |
| + *     notice, this list of conditions and the following disclaimer.
 | |
| + *   * Redistributions in binary form must reproduce the above copyright
 | |
| + *     notice, this list of conditions and the following disclaimer in
 | |
| + *     the documentation and/or other materials provided with the
 | |
| + *     distribution.
 | |
| + *   * Neither the name of OpenVPN Technologies nor the names of its
 | |
| + *     contributors may be used to endorse or promote products derived
 | |
| + *     from this software without specific prior written permission.
 | |
| + *
 | |
| + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 | |
| + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 | |
| + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 | |
| + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 | |
| + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 | |
| + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 | |
| + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 | |
| + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | |
| + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | |
| + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 | |
| + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | |
| + */
 | |
| +
 | |
| +#include <crypto/algapi.h>
 | |
| +
 | |
| +/* Make the optimizer believe the variable can be manipulated arbitrarily. */
 | |
| +#define COMPILER_OPTIMIZER_HIDE_VAR(var) __asm__ ("" : "=r" (var) : "0" (var))
 | |
| +
 | |
| +#ifndef __HAVE_ARCH_CRYPTO_MEMNEQ
 | |
| +
 | |
| +/* Generic path for arbitrary size */
 | |
| +static inline unsigned long
 | |
| +__crypto_memneq_generic(const void *a, const void *b, size_t size)
 | |
| +{
 | |
| +	unsigned long neq = 0;
 | |
| +
 | |
| +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
 | |
| +	while (size >= sizeof(unsigned long)) {
 | |
| +		neq |= *(unsigned long *)a ^ *(unsigned long *)b;
 | |
| +		COMPILER_OPTIMIZER_HIDE_VAR(neq);
 | |
| +		a += sizeof(unsigned long);
 | |
| +		b += sizeof(unsigned long);
 | |
| +		size -= sizeof(unsigned long);
 | |
| +	}
 | |
| +#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
 | |
| +	while (size > 0) {
 | |
| +		neq |= *(unsigned char *)a ^ *(unsigned char *)b;
 | |
| +		COMPILER_OPTIMIZER_HIDE_VAR(neq);
 | |
| +		a += 1;
 | |
| +		b += 1;
 | |
| +		size -= 1;
 | |
| +	}
 | |
| +	return neq;
 | |
| +}
 | |
| +
 | |
| +/* Loop-free fast-path for frequently used 16-byte size */
 | |
| +static inline unsigned long __crypto_memneq_16(const void *a, const void *b)
 | |
| +{
 | |
| +	unsigned long neq = 0;
 | |
| +
 | |
| +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 | |
| +	if (sizeof(unsigned long) == 8) {
 | |
| +		neq |= *(unsigned long *)(a)   ^ *(unsigned long *)(b);
 | |
| +		COMPILER_OPTIMIZER_HIDE_VAR(neq);
 | |
| +		neq |= *(unsigned long *)(a+8) ^ *(unsigned long *)(b+8);
 | |
| +		COMPILER_OPTIMIZER_HIDE_VAR(neq);
 | |
| +	} else if (sizeof(unsigned int) == 4) {
 | |
| +		neq |= *(unsigned int *)(a)    ^ *(unsigned int *)(b);
 | |
| +		COMPILER_OPTIMIZER_HIDE_VAR(neq);
 | |
| +		neq |= *(unsigned int *)(a+4)  ^ *(unsigned int *)(b+4);
 | |
| +		COMPILER_OPTIMIZER_HIDE_VAR(neq);
 | |
| +		neq |= *(unsigned int *)(a+8)  ^ *(unsigned int *)(b+8);
 | |
| +		COMPILER_OPTIMIZER_HIDE_VAR(neq);
 | |
| +		neq |= *(unsigned int *)(a+12) ^ *(unsigned int *)(b+12);
 | |
| +		COMPILER_OPTIMIZER_HIDE_VAR(neq);
 | |
| +	} else
 | |
| +#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
 | |
| +	{
 | |
| +		neq |= *(unsigned char *)(a)    ^ *(unsigned char *)(b);
 | |
| +		COMPILER_OPTIMIZER_HIDE_VAR(neq);
 | |
| +		neq |= *(unsigned char *)(a+1)  ^ *(unsigned char *)(b+1);
 | |
| +		COMPILER_OPTIMIZER_HIDE_VAR(neq);
 | |
| +		neq |= *(unsigned char *)(a+2)  ^ *(unsigned char *)(b+2);
 | |
| +		COMPILER_OPTIMIZER_HIDE_VAR(neq);
 | |
| +		neq |= *(unsigned char *)(a+3)  ^ *(unsigned char *)(b+3);
 | |
| +		COMPILER_OPTIMIZER_HIDE_VAR(neq);
 | |
| +		neq |= *(unsigned char *)(a+4)  ^ *(unsigned char *)(b+4);
 | |
| +		COMPILER_OPTIMIZER_HIDE_VAR(neq);
 | |
| +		neq |= *(unsigned char *)(a+5)  ^ *(unsigned char *)(b+5);
 | |
| +		COMPILER_OPTIMIZER_HIDE_VAR(neq);
 | |
| +		neq |= *(unsigned char *)(a+6)  ^ *(unsigned char *)(b+6);
 | |
| +		COMPILER_OPTIMIZER_HIDE_VAR(neq);
 | |
| +		neq |= *(unsigned char *)(a+7)  ^ *(unsigned char *)(b+7);
 | |
| +		COMPILER_OPTIMIZER_HIDE_VAR(neq);
 | |
| +		neq |= *(unsigned char *)(a+8)  ^ *(unsigned char *)(b+8);
 | |
| +		COMPILER_OPTIMIZER_HIDE_VAR(neq);
 | |
| +		neq |= *(unsigned char *)(a+9)  ^ *(unsigned char *)(b+9);
 | |
| +		COMPILER_OPTIMIZER_HIDE_VAR(neq);
 | |
| +		neq |= *(unsigned char *)(a+10) ^ *(unsigned char *)(b+10);
 | |
| +		COMPILER_OPTIMIZER_HIDE_VAR(neq);
 | |
| +		neq |= *(unsigned char *)(a+11) ^ *(unsigned char *)(b+11);
 | |
| +		COMPILER_OPTIMIZER_HIDE_VAR(neq);
 | |
| +		neq |= *(unsigned char *)(a+12) ^ *(unsigned char *)(b+12);
 | |
| +		COMPILER_OPTIMIZER_HIDE_VAR(neq);
 | |
| +		neq |= *(unsigned char *)(a+13) ^ *(unsigned char *)(b+13);
 | |
| +		COMPILER_OPTIMIZER_HIDE_VAR(neq);
 | |
| +		neq |= *(unsigned char *)(a+14) ^ *(unsigned char *)(b+14);
 | |
| +		COMPILER_OPTIMIZER_HIDE_VAR(neq);
 | |
| +		neq |= *(unsigned char *)(a+15) ^ *(unsigned char *)(b+15);
 | |
| +		COMPILER_OPTIMIZER_HIDE_VAR(neq);
 | |
| +	}
 | |
| +
 | |
| +	return neq;
 | |
| +}
 | |
| +
 | |
| +/* Compare two areas of memory without leaking timing information,
 | |
| + * and with special optimizations for common sizes.  Users should
 | |
| + * not call this function directly, but should instead use
 | |
| + * crypto_memneq defined in crypto/algapi.h.
 | |
| + */
 | |
| +noinline unsigned long __crypto_memneq(const void *a, const void *b,
 | |
| +				       size_t size)
 | |
| +{
 | |
| +	switch (size) {
 | |
| +	case 16:
 | |
| +		return __crypto_memneq_16(a, b);
 | |
| +	default:
 | |
| +		return __crypto_memneq_generic(a, b, size);
 | |
| +	}
 | |
| +}
 | |
| +
 | |
| +#endif /* __HAVE_ARCH_CRYPTO_MEMNEQ */
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/compat/compat.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,399 @@
 | |
| +/* Copyright (C) 2015-2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */
 | |
| +
 | |
| +#ifndef COMPAT_H
 | |
| +#define COMPAT_H
 | |
| +
 | |
| +#include <linux/kconfig.h>
 | |
| +#include <linux/version.h>
 | |
| +#include <linux/types.h>
 | |
| +#include <generated/utsrelease.h>
 | |
| +
 | |
| +#ifdef RHEL_MAJOR
 | |
| +#if RHEL_MAJOR == 7
 | |
| +#define ISRHEL7
 | |
| +#endif
 | |
| +#endif
 | |
| +#ifdef UTS_UBUNTU_RELEASE_ABI
 | |
| +#if LINUX_VERSION_CODE == KERNEL_VERSION(3, 13, 11)
 | |
| +#define ISUBUNTU1404
 | |
| +#endif
 | |
| +#endif
 | |
| +#ifdef CONFIG_SUSE_KERNEL
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
 | |
| +#define ISOPENSUSE42
 | |
| +#endif
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
 | |
| +#error "WireGuard requires Linux >= 3.10"
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 0, 0) && defined(CONFIG_X86_64)
 | |
| +#define CONFIG_AS_SSSE3
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0) && !defined(ISRHEL7)
 | |
| +#define headers_start data
 | |
| +#define headers_end data
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)
 | |
| +#include "udp_tunnel/udp_tunnel_partial_compat.h"
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) && !defined(DEBUG) && defined(net_dbg_ratelimited)
 | |
| +#undef net_dbg_ratelimited
 | |
| +#define net_dbg_ratelimited(fmt, ...) do { if (0) no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); } while (0)
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)
 | |
| +#define RCU_LOCKDEP_WARN(cond, message) rcu_lockdep_assert(!(cond), message)
 | |
| +#endif
 | |
| +
 | |
| +#if ((LINUX_VERSION_CODE > KERNEL_VERSION(3, 19, 0) && LINUX_VERSION_CODE < KERNEL_VERSION(3, 19, 6)) || \
 | |
| +    (LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 12) && LINUX_VERSION_CODE > KERNEL_VERSION(3, 17, 0)) || \
 | |
| +    (LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 8) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)) || \
 | |
| +    LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 40)) && !defined(ISRHEL7) && !defined(ISUBUNTU1404)
 | |
| +#define dev_recursion_level() 0
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0) && !defined(ISRHEL7)
 | |
| +#define ipv6_dst_lookup(a, b, c, d) ipv6_dst_lookup(b, c, d)
 | |
| +#endif
 | |
| +
 | |
| +#if ((LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 5) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0)) || \
 | |
| +    (LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 17) && LINUX_VERSION_CODE > KERNEL_VERSION(3, 19, 0)) || \
 | |
| +    (LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 27) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)) || \
 | |
| +    (LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 8) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)) || \
 | |
| +    (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 40) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) || \
 | |
| +    (LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 54))) && !defined(ISUBUNTU1404)
 | |
| +#include <linux/if.h>
 | |
| +#include <net/ip_tunnels.h>
 | |
| +#define IP6_ECN_set_ce(a, b) IP6_ECN_set_ce(b)
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 9, 0)
 | |
| +#define time_is_before_jiffies64(a) time_after64(get_jiffies_64(), a)
 | |
| +#define time_is_after_jiffies64(a) time_before64(get_jiffies_64(), a)
 | |
| +#define time_is_before_eq_jiffies64(a) time_after_eq64(get_jiffies_64(), a)
 | |
| +#define time_is_after_eq_jiffies64(a) time_before_eq64(get_jiffies_64(), a)
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) && IS_ENABLED(CONFIG_IPV6) && !defined(ISRHEL7)
 | |
| +#include <net/ipv6.h>
 | |
| +struct ipv6_stub_type {
 | |
| +	void *udpv6_encap_enable;
 | |
| +	int (*ipv6_dst_lookup)(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6);
 | |
| +};
 | |
| +static const struct ipv6_stub_type ipv6_stub_impl = {
 | |
| +	.udpv6_encap_enable = (void *)1,
 | |
| +	.ipv6_dst_lookup = ip6_dst_lookup
 | |
| +};
 | |
| +static const struct ipv6_stub_type *ipv6_stub = &ipv6_stub_impl;
 | |
| +#endif
 | |
| +
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0) && IS_ENABLED(CONFIG_IPV6) && !defined(ISOPENSUSE42)
 | |
| +#include <net/addrconf.h>
 | |
| +static inline bool ipv6_mod_enabled(void)
 | |
| +{
 | |
| +	return ipv6_stub->udpv6_encap_enable != NULL;
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
 | |
| +#include <linux/skbuff.h>
 | |
| +static inline void skb_reset_tc(struct sk_buff *skb)
 | |
| +{
 | |
| +#ifdef CONFIG_NET_CLS_ACT
 | |
| +	skb->tc_verd = 0;
 | |
| +#endif
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0)
 | |
| +#include <linux/siphash.h>
 | |
| +static inline u32 get_random_u32(void)
 | |
| +{
 | |
| +	static siphash_key_t key;
 | |
| +	static u32 counter = 0;
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 4, 0)
 | |
| +	static bool has_seeded = false;
 | |
| +	if (unlikely(!has_seeded)) {
 | |
| +		get_random_bytes(&key, sizeof(key));
 | |
| +		has_seeded = true;
 | |
| +	}
 | |
| +#else
 | |
| +	get_random_once(&key, sizeof(key));
 | |
| +#endif
 | |
| +	return siphash_2u32(counter++, get_random_int(), &key);
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0) && !defined(ISRHEL7)
 | |
| +static inline void netif_keep_dst(struct net_device *dev)
 | |
| +{
 | |
| +	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) && !defined(ISRHEL7)
 | |
| +#define pcpu_sw_netstats pcpu_tstats
 | |
| +#define netdev_alloc_pcpu_stats alloc_percpu
 | |
| +#elif LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) && !defined(ISRHEL7)
 | |
| +#define netdev_alloc_pcpu_stats(type)					\
 | |
| +({									\
 | |
| +	typeof(type) __percpu *pcpu_stats = alloc_percpu(type);		\
 | |
| +	if (pcpu_stats)	{						\
 | |
| +		int __cpu;						\
 | |
| +		for_each_possible_cpu (__cpu) {				\
 | |
| +			typeof(type) *stat;				\
 | |
| +			stat = per_cpu_ptr(pcpu_stats, __cpu);		\
 | |
| +			u64_stats_init(&stat->syncp);			\
 | |
| +		}							\
 | |
| +	}								\
 | |
| +	pcpu_stats;							\
 | |
| +})
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
 | |
| +#include "checksum/checksum_partial_compat.h"
 | |
| +static inline void *our_pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
 | |
| +{
 | |
| +	if (tail != skb) {
 | |
| +		skb->data_len += len;
 | |
| +		skb->len += len;
 | |
| +	}
 | |
| +	return skb_put(tail, len);
 | |
| +}
 | |
| +#define pskb_put our_pskb_put
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0) && !defined(ISRHEL7)
 | |
| +#include <net/xfrm.h>
 | |
| +static inline void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 | |
| +{
 | |
| +#ifdef CONFIG_CAVIUM_OCTEON_IPFWD_OFFLOAD
 | |
| +	memset(&skb->cvm_info, 0, sizeof(skb->cvm_info));
 | |
| +	skb->cvm_reserved = 0;
 | |
| +#endif
 | |
| +	skb->tstamp.tv64 = 0;
 | |
| +	skb->pkt_type = PACKET_HOST;
 | |
| +	skb->skb_iif = 0;
 | |
| +	skb_dst_drop(skb);
 | |
| +	secpath_reset(skb);
 | |
| +	nf_reset(skb);
 | |
| +	nf_reset_trace(skb);
 | |
| +	if (!xnet)
 | |
| +		return;
 | |
| +	skb_orphan(skb);
 | |
| +	skb->mark = 0;
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
| +#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) || defined(ISUBUNTU1404)) && !defined(ISRHEL7)
 | |
| +#include <linux/random.h>
 | |
| +static inline u32 prandom_u32_max(u32 ep_ro)
 | |
| +{
 | |
| +	return (u32)(((u64) prandom_u32() * ep_ro) >> 32);
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 75) && !defined(ISRHEL7)
 | |
| +#define U8_MAX ((u8)~0U)
 | |
| +#define S8_MAX ((s8)(U8_MAX >> 1))
 | |
| +#define S8_MIN ((s8)(-S8_MAX - 1))
 | |
| +#define U16_MAX ((u16)~0U)
 | |
| +#define S16_MAX ((s16)(U16_MAX >> 1))
 | |
| +#define S16_MIN ((s16)(-S16_MAX - 1))
 | |
| +#define U32_MAX ((u32)~0U)
 | |
| +#define S32_MAX ((s32)(U32_MAX >> 1))
 | |
| +#define S32_MIN ((s32)(-S32_MAX - 1))
 | |
| +#define U64_MAX ((u64)~0ULL)
 | |
| +#define S64_MAX ((s64)(U64_MAX >> 1))
 | |
| +#define S64_MIN ((s64)(-S64_MAX - 1))
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 60) && !defined(ISRHEL7)
 | |
| +/* Making this static may very well invalidate its usefulness,
 | |
| + * but so it goes with compat code. */
 | |
| +static inline void memzero_explicit(void *s, size_t count)
 | |
| +{
 | |
| +	memset(s, 0, count);
 | |
| +	barrier();
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 0) && !defined(ISRHEL7)
 | |
| +static const struct in6_addr our_in6addr_any = IN6ADDR_ANY_INIT;
 | |
| +#define in6addr_any our_in6addr_any
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0)
 | |
| +#include <linux/completion.h>
 | |
| +#include <linux/random.h>
 | |
| +#include <linux/errno.h>
 | |
| +struct rng_initializer {
 | |
| +	struct completion done;
 | |
| +	struct random_ready_callback cb;
 | |
| +};
 | |
| +static inline void rng_initialized_callback(struct random_ready_callback *cb)
 | |
| +{
 | |
| +	complete(&container_of(cb, struct rng_initializer, cb)->done);
 | |
| +}
 | |
| +static inline int wait_for_random_bytes(void)
 | |
| +{
 | |
| +	static bool rng_is_initialized = false;
 | |
| +	int ret;
 | |
| +	if (unlikely(!rng_is_initialized)) {
 | |
| +		struct rng_initializer rng = {
 | |
| +			.done = COMPLETION_INITIALIZER(rng.done),
 | |
| +			.cb = { .owner = THIS_MODULE, .func = rng_initialized_callback }
 | |
| +		};
 | |
| +		ret = add_random_ready_callback(&rng.cb);
 | |
| +		if (!ret) {
 | |
| +			ret = wait_for_completion_interruptible(&rng.done);
 | |
| +			if (ret) {
 | |
| +				del_random_ready_callback(&rng.cb);
 | |
| +				return ret;
 | |
| +			}
 | |
| +		} else if (ret != -EALREADY)
 | |
| +			return ret;
 | |
| +		rng_is_initialized = true;
 | |
| +	}
 | |
| +	return 0;
 | |
| +}
 | |
| +#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0)
 | |
| +/* This is a disaster. Without this API, we really have no way of
 | |
| + * knowing if it's initialized. We just return that it has and hope
 | |
| + * for the best... */
 | |
| +static inline int wait_for_random_bytes(void)
 | |
| +{
 | |
| +	return 0;
 | |
| +}
 | |
| +#endif
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 13, 0)
 | |
| +static inline int get_random_bytes_wait(void *buf, int nbytes)
 | |
| +{
 | |
| +	int ret = wait_for_random_bytes();
 | |
| +	if (unlikely(ret))
 | |
| +		return ret;
 | |
| +	get_random_bytes(buf, nbytes);
 | |
| +	return 0;
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 11, 0) && !defined(ISRHEL7)
 | |
| +#define system_power_efficient_wq system_unbound_wq
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) && !defined(ISRHEL7)
 | |
| +#include <linux/ktime.h>
 | |
| +static inline u64 ktime_get_ns(void)
 | |
| +{
 | |
| +	return ktime_to_ns(ktime_get());
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0)
 | |
| +#include <linux/inetdevice.h>
 | |
| +#define inet_confirm_addr(a,b,c,d,e) inet_confirm_addr(b,c,d,e)
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 12, 0)
 | |
| +#include <linux/vmalloc.h>
 | |
| +#include <linux/mm.h>
 | |
| +#include <linux/slab.h>
 | |
| +static inline void *kvmalloc(size_t size, gfp_t flags)
 | |
| +{
 | |
| +	gfp_t kmalloc_flags = flags;
 | |
| +	void *ret;
 | |
| +	if (size > PAGE_SIZE) {
 | |
| +		kmalloc_flags |= __GFP_NOWARN;
 | |
| +		if (!(kmalloc_flags & __GFP_REPEAT) || (size <= PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
 | |
| +			kmalloc_flags |= __GFP_NORETRY;
 | |
| +	}
 | |
| +	ret = kmalloc(size, kmalloc_flags);
 | |
| +	if (ret || size <= PAGE_SIZE)
 | |
| +		return ret;
 | |
| +	return __vmalloc(size, flags, PAGE_KERNEL);
 | |
| +}
 | |
| +static inline void *kvzalloc(size_t size, gfp_t flags)
 | |
| +{
 | |
| +	return kvmalloc(size, flags | __GFP_ZERO);
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
| +#if ((LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 41)) && !defined(ISUBUNTU1404)
 | |
| +#include <linux/vmalloc.h>
 | |
| +static inline void kvfree(const void *addr)
 | |
| +{
 | |
| +	if (is_vmalloc_addr(addr))
 | |
| +		vfree(addr);
 | |
| +	else
 | |
| +		kfree(addr);
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 9)
 | |
| +#include <linux/netdevice.h>
 | |
| +#define priv_destructor destructor
 | |
| +#endif
 | |
| +
 | |
| +/* https://lkml.org/lkml/2017/6/23/790 */
 | |
| +#if IS_ENABLED(CONFIG_NF_CONNTRACK)
 | |
| +#include <linux/ip.h>
 | |
| +#include <linux/icmpv6.h>
 | |
| +#include <net/ipv6.h>
 | |
| +#include <net/icmp.h>
 | |
| +#include <net/netfilter/nf_conntrack.h>
 | |
| +#include <net/netfilter/nf_nat_core.h>
 | |
| +static inline void new_icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 | |
| +{
 | |
| +	enum ip_conntrack_info ctinfo;
 | |
| +	struct nf_conn *ct = nf_ct_get(skb_in, &ctinfo);
 | |
| +	if (skb_network_header(skb_in) < skb_in->head || (skb_network_header(skb_in) + sizeof(struct iphdr)) > skb_tail_pointer(skb_in))
 | |
| +		return;
 | |
| +	if (ct)
 | |
| +		ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip;
 | |
| +	icmp_send(skb_in, type, code, info);
 | |
| +}
 | |
| +static inline void new_icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 | |
| +{
 | |
| +	enum ip_conntrack_info ctinfo;
 | |
| +	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
 | |
| +	if (skb_network_header(skb) < skb->head || (skb_network_header(skb) + sizeof(struct ipv6hdr)) > skb_tail_pointer(skb))
 | |
| +		return;
 | |
| +	if (ct)
 | |
| +		ipv6_hdr(skb)->saddr = ct->tuplehash[0].tuple.src.u3.in6;
 | |
| +	icmpv6_send(skb, type, code, info);
 | |
| +}
 | |
| +#define icmp_send(a,b,c,d) new_icmp_send(a,b,c,d)
 | |
| +#define icmpv6_send(a,b,c,d) new_icmpv6_send(a,b,c,d)
 | |
| +#endif
 | |
| +
 | |
| +/* https://lkml.org/lkml/2015/6/12/415 */
 | |
| +#include <linux/netdevice.h>
 | |
| +static inline struct net_device *netdev_pub(void *dev)
 | |
| +{
 | |
| +	return (struct net_device *)((char *)dev - ALIGN(sizeof(struct net_device), NETDEV_ALIGN));
 | |
| +}
 | |
| +
 | |
| +/* PaX compatibility */
 | |
| +#ifdef CONSTIFY_PLUGIN
 | |
| +#include <linux/cache.h>
 | |
| +#undef __read_mostly
 | |
| +#define __read_mostly
 | |
| +#endif
 | |
| +
 | |
| +#if defined(CONFIG_DYNAMIC_DEBUG) || defined(DEBUG)
 | |
| +#define net_dbg_skb_ratelimited(fmt, dev, skb, ...) do { \
 | |
| +	struct endpoint __endpoint; \
 | |
| +	socket_endpoint_from_skb(&__endpoint, skb); \
 | |
| +	net_dbg_ratelimited(fmt, dev, &__endpoint.addr, ##__VA_ARGS__); \
 | |
| +} while(0)
 | |
| +#else
 | |
| +#define net_dbg_skb_ratelimited(fmt, skb, ...)
 | |
| +#endif
 | |
| +
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/compat/dst_cache/dst_cache.c	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,175 @@
 | |
| +/*
 | |
| + * net/core/dst_cache.c - dst entry cache
 | |
| + *
 | |
| + * Copyright (c) 2016 Paolo Abeni <pabeni@redhat.com>
 | |
| + *
 | |
| + * This program is free software; you can redistribute it and/or modify
 | |
| + * it under the terms of the GNU General Public License as published by
 | |
| + * the Free Software Foundation; either version 2 of the License, or
 | |
| + * (at your option) any later version.
 | |
| + */
 | |
| +
 | |
| +#include <linux/kernel.h>
 | |
| +#include <linux/percpu.h>
 | |
| +#include <net/dst_cache.h>
 | |
| +#include <net/route.h>
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +#include <net/ip6_fib.h>
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 2, 0)
 | |
| +static inline u32 rt6_get_cookie(const struct rt6_info *rt)
 | |
| +{
 | |
| +	if ((unlikely(rt->dst.flags & DST_NOCACHE) && rt->dst.from))
 | |
| +		rt = (struct rt6_info *)(rt->dst.from);
 | |
| +
 | |
| +	return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
 | |
| +}
 | |
| +#endif
 | |
| +#endif
 | |
| +#include <uapi/linux/in.h>
 | |
| +
 | |
| +struct dst_cache_pcpu {
 | |
| +	unsigned long refresh_ts;
 | |
| +	struct dst_entry *dst;
 | |
| +	u32 cookie;
 | |
| +	union {
 | |
| +		struct in_addr in_saddr;
 | |
| +		struct in6_addr in6_saddr;
 | |
| +	};
 | |
| +};
 | |
| +
 | |
| +static void dst_cache_per_cpu_dst_set(struct dst_cache_pcpu *dst_cache,
 | |
| +				      struct dst_entry *dst, u32 cookie)
 | |
| +{
 | |
| +	dst_release(dst_cache->dst);
 | |
| +	if (dst)
 | |
| +		dst_hold(dst);
 | |
| +
 | |
| +	dst_cache->cookie = cookie;
 | |
| +	dst_cache->dst = dst;
 | |
| +}
 | |
| +
 | |
| +static struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache,
 | |
| +					       struct dst_cache_pcpu *idst)
 | |
| +{
 | |
| +	struct dst_entry *dst;
 | |
| +
 | |
| +	dst = idst->dst;
 | |
| +	if (!dst)
 | |
| +		goto fail;
 | |
| +
 | |
| +	/* the cache already hold a dst reference; it can't go away */
 | |
| +	dst_hold(dst);
 | |
| +
 | |
| +	if (unlikely(!time_after(idst->refresh_ts, dst_cache->reset_ts) ||
 | |
| +		     (dst->obsolete && !dst->ops->check(dst, idst->cookie)))) {
 | |
| +		dst_cache_per_cpu_dst_set(idst, NULL, 0);
 | |
| +		dst_release(dst);
 | |
| +		goto fail;
 | |
| +	}
 | |
| +	return dst;
 | |
| +
 | |
| +fail:
 | |
| +	idst->refresh_ts = jiffies;
 | |
| +	return NULL;
 | |
| +}
 | |
| +
 | |
| +struct dst_entry *dst_cache_get(struct dst_cache *dst_cache)
 | |
| +{
 | |
| +	if (!dst_cache->cache)
 | |
| +		return NULL;
 | |
| +
 | |
| +	return dst_cache_per_cpu_get(dst_cache, this_cpu_ptr(dst_cache->cache));
 | |
| +}
 | |
| +
 | |
| +struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr)
 | |
| +{
 | |
| +	struct dst_cache_pcpu *idst;
 | |
| +	struct dst_entry *dst;
 | |
| +
 | |
| +	if (!dst_cache->cache)
 | |
| +		return NULL;
 | |
| +
 | |
| +	idst = this_cpu_ptr(dst_cache->cache);
 | |
| +	dst = dst_cache_per_cpu_get(dst_cache, idst);
 | |
| +	if (!dst)
 | |
| +		return NULL;
 | |
| +
 | |
| +	*saddr = idst->in_saddr.s_addr;
 | |
| +	return container_of(dst, struct rtable, dst);
 | |
| +}
 | |
| +
 | |
| +void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
 | |
| +		       __be32 saddr)
 | |
| +{
 | |
| +	struct dst_cache_pcpu *idst;
 | |
| +
 | |
| +	if (!dst_cache->cache)
 | |
| +		return;
 | |
| +
 | |
| +	idst = this_cpu_ptr(dst_cache->cache);
 | |
| +	dst_cache_per_cpu_dst_set(idst, dst, 0);
 | |
| +	idst->in_saddr.s_addr = saddr;
 | |
| +}
 | |
| +
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
 | |
| +		       const struct in6_addr *addr)
 | |
| +{
 | |
| +	struct dst_cache_pcpu *idst;
 | |
| +
 | |
| +	if (!dst_cache->cache)
 | |
| +		return;
 | |
| +
 | |
| +	idst = this_cpu_ptr(dst_cache->cache);
 | |
| +	dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst,
 | |
| +				  rt6_get_cookie((struct rt6_info *)dst));
 | |
| +	idst->in6_saddr = *addr;
 | |
| +}
 | |
| +
 | |
| +struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache,
 | |
| +				    struct in6_addr *saddr)
 | |
| +{
 | |
| +	struct dst_cache_pcpu *idst;
 | |
| +	struct dst_entry *dst;
 | |
| +
 | |
| +	if (!dst_cache->cache)
 | |
| +		return NULL;
 | |
| +
 | |
| +	idst = this_cpu_ptr(dst_cache->cache);
 | |
| +	dst = dst_cache_per_cpu_get(dst_cache, idst);
 | |
| +	if (!dst)
 | |
| +		return NULL;
 | |
| +
 | |
| +	*saddr = idst->in6_saddr;
 | |
| +	return dst;
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
| +int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp)
 | |
| +{
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
 | |
| +	BUG_ON(gfp & GFP_ATOMIC);
 | |
| +	dst_cache->cache = alloc_percpu(struct dst_cache_pcpu);
 | |
| +#else
 | |
| +	dst_cache->cache = alloc_percpu_gfp(struct dst_cache_pcpu,
 | |
| +					    gfp | __GFP_ZERO);
 | |
| +#endif
 | |
| +	if (!dst_cache->cache)
 | |
| +		return -ENOMEM;
 | |
| +
 | |
| +	dst_cache_reset(dst_cache);
 | |
| +	return 0;
 | |
| +}
 | |
| +
 | |
| +void dst_cache_destroy(struct dst_cache *dst_cache)
 | |
| +{
 | |
| +	int i;
 | |
| +
 | |
| +	if (!dst_cache->cache)
 | |
| +		return;
 | |
| +
 | |
| +	for_each_possible_cpu(i)
 | |
| +		dst_release(per_cpu_ptr(dst_cache->cache, i)->dst);
 | |
| +
 | |
| +	free_percpu(dst_cache->cache);
 | |
| +}
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/compat/dst_cache/include/net/dst_cache.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,97 @@
 | |
| +#ifndef _NET_DST_CACHE_H
 | |
| +#define _NET_DST_CACHE_H
 | |
| +
 | |
| +#include <linux/jiffies.h>
 | |
| +#include <net/dst.h>
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +#include <net/ip6_fib.h>
 | |
| +#endif
 | |
| +
 | |
| +struct dst_cache {
 | |
| +	struct dst_cache_pcpu __percpu *cache;
 | |
| +	unsigned long reset_ts;
 | |
| +};
 | |
| +
 | |
| +/**
 | |
| + *	dst_cache_get - perform cache lookup
 | |
| + *	@dst_cache: the cache
 | |
| + *
 | |
| + *	The caller should use dst_cache_get_ip4() if it need to retrieve the
 | |
| + *	source address to be used when xmitting to the cached dst.
 | |
| + *	local BH must be disabled.
 | |
| + */
 | |
| +struct dst_entry *dst_cache_get(struct dst_cache *dst_cache);
 | |
| +
 | |
| +/**
 | |
| + *	dst_cache_get_ip4 - perform cache lookup and fetch ipv4 source address
 | |
| + *	@dst_cache: the cache
 | |
| + *	@saddr: return value for the retrieved source address
 | |
| + *
 | |
| + *	local BH must be disabled.
 | |
| + */
 | |
| +struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr);
 | |
| +
 | |
| +/**
 | |
| + *	dst_cache_set_ip4 - store the ipv4 dst into the cache
 | |
| + *	@dst_cache: the cache
 | |
| + *	@dst: the entry to be cached
 | |
| + *	@saddr: the source address to be stored inside the cache
 | |
| + *
 | |
| + *	local BH must be disabled.
 | |
| + */
 | |
| +void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
 | |
| +		       __be32 saddr);
 | |
| +
 | |
| +#if IS_ENABLED(CONFIG_IPV6)
 | |
| +
 | |
| +/**
 | |
| + *	dst_cache_set_ip6 - store the ipv6 dst into the cache
 | |
| + *	@dst_cache: the cache
 | |
| + *	@dst: the entry to be cached
 | |
| + *	@saddr: the source address to be stored inside the cache
 | |
| + *
 | |
| + *	local BH must be disabled.
 | |
| + */
 | |
| +void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
 | |
| +		       const struct in6_addr *addr);
 | |
| +
 | |
| +/**
 | |
| + *	dst_cache_get_ip6 - perform cache lookup and fetch ipv6 source address
 | |
| + *	@dst_cache: the cache
 | |
| + *	@saddr: return value for the retrieved source address
 | |
| + *
 | |
| + *	local BH must be disabled.
 | |
| + */
 | |
| +struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache,
 | |
| +				    struct in6_addr *saddr);
 | |
| +#endif
 | |
| +
 | |
| +/**
 | |
| + *	dst_cache_reset - invalidate the cache contents
 | |
| + *	@dst_cache: the cache
 | |
| + *
 | |
| + *	This do not free the cached dst to avoid races and contentions.
 | |
| + *	the dst will be freed on later cache lookup.
 | |
| + */
 | |
| +static inline void dst_cache_reset(struct dst_cache *dst_cache)
 | |
| +{
 | |
| +	dst_cache->reset_ts = jiffies;
 | |
| +}
 | |
| +
 | |
| +/**
 | |
| + *	dst_cache_init - initialize the cache, allocating the required storage
 | |
| + *	@dst_cache: the cache
 | |
| + *	@gfp: allocation flags
 | |
| + */
 | |
| +int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp);
 | |
| +
 | |
| +/**
 | |
| + *	dst_cache_destroy - empty the cache and free the allocated storage
 | |
| + *	@dst_cache: the cache
 | |
| + *
 | |
| + *	No synchronization is enforced: it must be called only when the cache
 | |
| + *	is unsed.
 | |
| + */
 | |
| +void dst_cache_destroy(struct dst_cache *dst_cache);
 | |
| +
 | |
| +#endif
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/compat/simd/include/asm/simd.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1 @@
 | |
| +#include <asm/i387.h>
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/compat/padata/padata.c	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,895 @@
 | |
| +/*
 | |
| + * padata.c - generic interface to process data streams in parallel
 | |
| + *
 | |
| + * See Documentation/padata.txt for an api documentation.
 | |
| + *
 | |
| + * Copyright (C) 2008, 2009 secunet Security Networks AG
 | |
| + * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com>
 | |
| + *
 | |
| + * This program is free software; you can redistribute it and/or modify it
 | |
| + * under the terms and conditions of the GNU General Public License,
 | |
| + * version 2, as published by the Free Software Foundation.
 | |
| + *
 | |
| + * This program is distributed in the hope it will be useful, but WITHOUT
 | |
| + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 | |
| + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 | |
| + * more details.
 | |
| + *
 | |
| + * You should have received a copy of the GNU General Public License along with
 | |
| + * this program; if not, write to the Free Software Foundation, Inc.,
 | |
| + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
 | |
| + */
 | |
| +
 | |
| +#include <linux/export.h>
 | |
| +#include <linux/cpumask.h>
 | |
| +#include <linux/err.h>
 | |
| +#include <linux/cpu.h>
 | |
| +#include <linux/padata.h>
 | |
| +#include <linux/mutex.h>
 | |
| +#include <linux/sched.h>
 | |
| +#include <linux/slab.h>
 | |
| +#include <linux/sysfs.h>
 | |
| +#include <linux/rcupdate.h>
 | |
| +#include <linux/module.h>
 | |
| +#include <linux/version.h>
 | |
| +
 | |
| +#define MAX_OBJ_NUM 1000
 | |
| +
 | |
| +static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
 | |
| +{
 | |
| +	int cpu, target_cpu;
 | |
| +
 | |
| +	target_cpu = cpumask_first(pd->cpumask.pcpu);
 | |
| +	for (cpu = 0; cpu < cpu_index; cpu++)
 | |
| +		target_cpu = cpumask_next(target_cpu, pd->cpumask.pcpu);
 | |
| +
 | |
| +	return target_cpu;
 | |
| +}
 | |
| +
 | |
| +static int padata_cpu_hash(struct parallel_data *pd)
 | |
| +{
 | |
| +	int cpu_index;
 | |
| +	/*
 | |
| +	 * Hash the sequence numbers to the cpus by taking
 | |
| +	 * seq_nr mod. number of cpus in use.
 | |
| +	 */
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)
 | |
| +	spin_lock(&pd->seq_lock);
 | |
| +	cpu_index =  pd->seq_nr % cpumask_weight(pd->cpumask.pcpu);
 | |
| +	pd->seq_nr++;
 | |
| +	spin_unlock(&pd->seq_lock);
 | |
| +#else
 | |
| +	unsigned int seq_nr = atomic_inc_return(&pd->seq_nr);
 | |
| +	cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu);
 | |
| +#endif
 | |
| +
 | |
| +	return padata_index_to_cpu(pd, cpu_index);
 | |
| +}
 | |
| +
 | |
| +static void padata_parallel_worker(struct work_struct *parallel_work)
 | |
| +{
 | |
| +	struct padata_parallel_queue *pqueue;
 | |
| +	LIST_HEAD(local_list);
 | |
| +
 | |
| +	local_bh_disable();
 | |
| +	pqueue = container_of(parallel_work,
 | |
| +			      struct padata_parallel_queue, work);
 | |
| +
 | |
| +	spin_lock(&pqueue->parallel.lock);
 | |
| +	list_replace_init(&pqueue->parallel.list, &local_list);
 | |
| +	spin_unlock(&pqueue->parallel.lock);
 | |
| +
 | |
| +	while (!list_empty(&local_list)) {
 | |
| +		struct padata_priv *padata;
 | |
| +
 | |
| +		padata = list_entry(local_list.next,
 | |
| +				    struct padata_priv, list);
 | |
| +
 | |
| +		list_del_init(&padata->list);
 | |
| +
 | |
| +		padata->parallel(padata);
 | |
| +	}
 | |
| +
 | |
| +	local_bh_enable();
 | |
| +}
 | |
| +
 | |
| +/**
 | |
| + * padata_do_parallel - padata parallelization function
 | |
| + *
 | |
| + * @pinst: padata instance
 | |
| + * @padata: object to be parallelized
 | |
| + * @cb_cpu: cpu the serialization callback function will run on,
 | |
| + *          must be in the serial cpumask of padata(i.e. cpumask.cbcpu).
 | |
| + *
 | |
| + * The parallelization callback function will run with BHs off.
 | |
| + * Note: Every object which is parallelized by padata_do_parallel
 | |
| + * must be seen by padata_do_serial.
 | |
| + */
 | |
| +int padata_do_parallel(struct padata_instance *pinst,
 | |
| +		       struct padata_priv *padata, int cb_cpu)
 | |
| +{
 | |
| +	int target_cpu, err;
 | |
| +	struct padata_parallel_queue *queue;
 | |
| +	struct parallel_data *pd;
 | |
| +
 | |
| +	rcu_read_lock_bh();
 | |
| +
 | |
| +	pd = rcu_dereference_bh(pinst->pd);
 | |
| +
 | |
| +	err = -EINVAL;
 | |
| +	if (!(pinst->flags & PADATA_INIT) || pinst->flags & PADATA_INVALID)
 | |
| +		goto out;
 | |
| +
 | |
| +	if (!cpumask_test_cpu(cb_cpu, pd->cpumask.cbcpu))
 | |
| +		goto out;
 | |
| +
 | |
| +	err =  -EBUSY;
 | |
| +	if ((pinst->flags & PADATA_RESET))
 | |
| +		goto out;
 | |
| +
 | |
| +	if (atomic_read(&pd->refcnt) >= MAX_OBJ_NUM)
 | |
| +		goto out;
 | |
| +
 | |
| +	err = 0;
 | |
| +	atomic_inc(&pd->refcnt);
 | |
| +	padata->pd = pd;
 | |
| +	padata->cb_cpu = cb_cpu;
 | |
| +
 | |
| +	target_cpu = padata_cpu_hash(pd);
 | |
| +	queue = per_cpu_ptr(pd->pqueue, target_cpu);
 | |
| +
 | |
| +	spin_lock(&queue->parallel.lock);
 | |
| +	list_add_tail(&padata->list, &queue->parallel.list);
 | |
| +	spin_unlock(&queue->parallel.lock);
 | |
| +
 | |
| +	queue_work_on(target_cpu, pinst->wq, &queue->work);
 | |
| +
 | |
| +out:
 | |
| +	rcu_read_unlock_bh();
 | |
| +
 | |
| +	return err;
 | |
| +}
 | |
| +
 | |
| +/*
 | |
| + * padata_get_next - Get the next object that needs serialization.
 | |
| + *
 | |
| + * Return values are:
 | |
| + *
 | |
| + * A pointer to the control struct of the next object that needs
 | |
| + * serialization, if present in one of the percpu reorder queues.
 | |
| + *
 | |
| + * -EINPROGRESS, if the next object that needs serialization will
 | |
| + *  be parallel processed by another cpu and is not yet present in
 | |
| + *  the cpu's reorder queue.
 | |
| + *
 | |
| + * -ENODATA, if this cpu has to do the parallel processing for
 | |
| + *  the next object.
 | |
| + */
 | |
| +static struct padata_priv *padata_get_next(struct parallel_data *pd)
 | |
| +{
 | |
| +	int cpu, num_cpus;
 | |
| +	unsigned int next_nr, next_index;
 | |
| +	struct padata_parallel_queue *next_queue;
 | |
| +	struct padata_priv *padata;
 | |
| +	struct padata_list *reorder;
 | |
| +
 | |
| +	num_cpus = cpumask_weight(pd->cpumask.pcpu);
 | |
| +
 | |
| +	/*
 | |
| +	 * Calculate the percpu reorder queue and the sequence
 | |
| +	 * number of the next object.
 | |
| +	 */
 | |
| +	next_nr = pd->processed;
 | |
| +	next_index = next_nr % num_cpus;
 | |
| +	cpu = padata_index_to_cpu(pd, next_index);
 | |
| +	next_queue = per_cpu_ptr(pd->pqueue, cpu);
 | |
| +
 | |
| +	reorder = &next_queue->reorder;
 | |
| +
 | |
| +	spin_lock(&reorder->lock);
 | |
| +	if (!list_empty(&reorder->list)) {
 | |
| +		padata = list_entry(reorder->list.next,
 | |
| +				    struct padata_priv, list);
 | |
| +
 | |
| +		list_del_init(&padata->list);
 | |
| +		atomic_dec(&pd->reorder_objects);
 | |
| +
 | |
| +		pd->processed++;
 | |
| +
 | |
| +		spin_unlock(&reorder->lock);
 | |
| +		goto out;
 | |
| +	}
 | |
| +	spin_unlock(&reorder->lock);
 | |
| +
 | |
| +	if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) {
 | |
| +		padata = ERR_PTR(-ENODATA);
 | |
| +		goto out;
 | |
| +	}
 | |
| +
 | |
| +	padata = ERR_PTR(-EINPROGRESS);
 | |
| +out:
 | |
| +	return padata;
 | |
| +}
 | |
| +
 | |
| +static void padata_reorder(struct parallel_data *pd)
 | |
| +{
 | |
| +	int cb_cpu;
 | |
| +	struct padata_priv *padata;
 | |
| +	struct padata_serial_queue *squeue;
 | |
| +	struct padata_instance *pinst = pd->pinst;
 | |
| +
 | |
| +	/*
 | |
| +	 * We need to ensure that only one cpu can work on dequeueing of
 | |
| +	 * the reorder queue the time. Calculating in which percpu reorder
 | |
| +	 * queue the next object will arrive takes some time. A spinlock
 | |
| +	 * would be highly contended. Also it is not clear in which order
 | |
| +	 * the objects arrive to the reorder queues. So a cpu could wait to
 | |
| +	 * get the lock just to notice that there is nothing to do at the
 | |
| +	 * moment. Therefore we use a trylock and let the holder of the lock
 | |
| +	 * care for all the objects enqueued during the holdtime of the lock.
 | |
| +	 */
 | |
| +	if (!spin_trylock_bh(&pd->lock))
 | |
| +		return;
 | |
| +
 | |
| +	while (1) {
 | |
| +		padata = padata_get_next(pd);
 | |
| +
 | |
| +		/*
 | |
| +		 * If the next object that needs serialization is parallel
 | |
| +		 * processed by another cpu and is still on it's way to the
 | |
| +		 * cpu's reorder queue, nothing to do for now.
 | |
| +		 */
 | |
| +		if (PTR_ERR(padata) == -EINPROGRESS)
 | |
| +			break;
 | |
| +
 | |
| +		/*
 | |
| +		 * This cpu has to do the parallel processing of the next
 | |
| +		 * object. It's waiting in the cpu's parallelization queue,
 | |
| +		 * so exit immediately.
 | |
| +		 */
 | |
| +		if (PTR_ERR(padata) == -ENODATA) {
 | |
| +			del_timer(&pd->timer);
 | |
| +			spin_unlock_bh(&pd->lock);
 | |
| +			return;
 | |
| +		}
 | |
| +
 | |
| +		cb_cpu = padata->cb_cpu;
 | |
| +		squeue = per_cpu_ptr(pd->squeue, cb_cpu);
 | |
| +
 | |
| +		spin_lock(&squeue->serial.lock);
 | |
| +		list_add_tail(&padata->list, &squeue->serial.list);
 | |
| +		spin_unlock(&squeue->serial.lock);
 | |
| +
 | |
| +		queue_work_on(cb_cpu, pinst->wq, &squeue->work);
 | |
| +	}
 | |
| +
 | |
| +	spin_unlock_bh(&pd->lock);
 | |
| +
 | |
| +	/*
 | |
| +	 * The next object that needs serialization might have arrived to
 | |
| +	 * the reorder queues in the meantime, we will be called again
 | |
| +	 * from the timer function if no one else cares for it.
 | |
| +	 */
 | |
| +	if (atomic_read(&pd->reorder_objects)
 | |
| +			&& !(pinst->flags & PADATA_RESET))
 | |
| +		mod_timer(&pd->timer, jiffies + HZ);
 | |
| +	else
 | |
| +		del_timer(&pd->timer);
 | |
| +
 | |
| +	return;
 | |
| +}
 | |
| +
 | |
| +static void padata_reorder_timer(unsigned long arg)
 | |
| +{
 | |
| +	struct parallel_data *pd = (struct parallel_data *)arg;
 | |
| +
 | |
| +	padata_reorder(pd);
 | |
| +}
 | |
| +
 | |
| +static void padata_serial_worker(struct work_struct *serial_work)
 | |
| +{
 | |
| +	struct padata_serial_queue *squeue;
 | |
| +	struct parallel_data *pd;
 | |
| +	LIST_HEAD(local_list);
 | |
| +
 | |
| +	local_bh_disable();
 | |
| +	squeue = container_of(serial_work, struct padata_serial_queue, work);
 | |
| +	pd = squeue->pd;
 | |
| +
 | |
| +	spin_lock(&squeue->serial.lock);
 | |
| +	list_replace_init(&squeue->serial.list, &local_list);
 | |
| +	spin_unlock(&squeue->serial.lock);
 | |
| +
 | |
| +	while (!list_empty(&local_list)) {
 | |
| +		struct padata_priv *padata;
 | |
| +
 | |
| +		padata = list_entry(local_list.next,
 | |
| +				    struct padata_priv, list);
 | |
| +
 | |
| +		list_del_init(&padata->list);
 | |
| +
 | |
| +		padata->serial(padata);
 | |
| +		atomic_dec(&pd->refcnt);
 | |
| +	}
 | |
| +	local_bh_enable();
 | |
| +}
 | |
| +
 | |
| +/**
 | |
| + * padata_do_serial - padata serialization function
 | |
| + *
 | |
| + * @padata: object to be serialized.
 | |
| + *
 | |
| + * padata_do_serial must be called for every parallelized object.
 | |
| + * The serialization callback function will run with BHs off.
 | |
| + */
 | |
| +void padata_do_serial(struct padata_priv *padata)
 | |
| +{
 | |
| +	int cpu;
 | |
| +	struct padata_parallel_queue *pqueue;
 | |
| +	struct parallel_data *pd;
 | |
| +
 | |
| +	pd = padata->pd;
 | |
| +
 | |
| +	cpu = get_cpu();
 | |
| +	pqueue = per_cpu_ptr(pd->pqueue, cpu);
 | |
| +
 | |
| +	spin_lock(&pqueue->reorder.lock);
 | |
| +	atomic_inc(&pd->reorder_objects);
 | |
| +	list_add_tail(&padata->list, &pqueue->reorder.list);
 | |
| +	spin_unlock(&pqueue->reorder.lock);
 | |
| +
 | |
| +	put_cpu();
 | |
| +
 | |
| +	padata_reorder(pd);
 | |
| +}
 | |
| +
 | |
| +static int padata_setup_cpumasks(struct parallel_data *pd,
 | |
| +				 const struct cpumask *pcpumask,
 | |
| +				 const struct cpumask *cbcpumask)
 | |
| +{
 | |
| +	if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
 | |
| +		return -ENOMEM;
 | |
| +
 | |
| +	cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask);
 | |
| +	if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) {
 | |
| +		free_cpumask_var(pd->cpumask.pcpu);
 | |
| +		return -ENOMEM;
 | |
| +	}
 | |
| +
 | |
| +	cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask);
 | |
| +	return 0;
 | |
| +}
 | |
| +
 | |
| +static void __padata_list_init(struct padata_list *pd_list)
 | |
| +{
 | |
| +	INIT_LIST_HEAD(&pd_list->list);
 | |
| +	spin_lock_init(&pd_list->lock);
 | |
| +}
 | |
| +
 | |
| +/* Initialize all percpu queues used by serial workers */
 | |
| +static void padata_init_squeues(struct parallel_data *pd)
 | |
| +{
 | |
| +	int cpu;
 | |
| +	struct padata_serial_queue *squeue;
 | |
| +
 | |
| +	for_each_cpu(cpu, pd->cpumask.cbcpu) {
 | |
| +		squeue = per_cpu_ptr(pd->squeue, cpu);
 | |
| +		squeue->pd = pd;
 | |
| +		__padata_list_init(&squeue->serial);
 | |
| +		INIT_WORK(&squeue->work, padata_serial_worker);
 | |
| +	}
 | |
| +}
 | |
| +
 | |
| +/* Initialize all percpu queues used by parallel workers */
 | |
| +static void padata_init_pqueues(struct parallel_data *pd)
 | |
| +{
 | |
| +	int cpu_index, cpu;
 | |
| +	struct padata_parallel_queue *pqueue;
 | |
| +
 | |
| +	cpu_index = 0;
 | |
| +	for_each_cpu(cpu, pd->cpumask.pcpu) {
 | |
| +		pqueue = per_cpu_ptr(pd->pqueue, cpu);
 | |
| +		pqueue->pd = pd;
 | |
| +		pqueue->cpu_index = cpu_index;
 | |
| +		cpu_index++;
 | |
| +
 | |
| +		__padata_list_init(&pqueue->reorder);
 | |
| +		__padata_list_init(&pqueue->parallel);
 | |
| +		INIT_WORK(&pqueue->work, padata_parallel_worker);
 | |
| +		atomic_set(&pqueue->num_obj, 0);
 | |
| +	}
 | |
| +}
 | |
| +
 | |
| +/* Allocate and initialize the internal cpumask dependend resources. */
 | |
| +static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
 | |
| +					     const struct cpumask *pcpumask,
 | |
| +					     const struct cpumask *cbcpumask)
 | |
| +{
 | |
| +	struct parallel_data *pd;
 | |
| +
 | |
| +	pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL);
 | |
| +	if (!pd)
 | |
| +		goto err;
 | |
| +
 | |
| +	pd->pqueue = alloc_percpu(struct padata_parallel_queue);
 | |
| +	if (!pd->pqueue)
 | |
| +		goto err_free_pd;
 | |
| +
 | |
| +	pd->squeue = alloc_percpu(struct padata_serial_queue);
 | |
| +	if (!pd->squeue)
 | |
| +		goto err_free_pqueue;
 | |
| +	if (padata_setup_cpumasks(pd, pcpumask, cbcpumask) < 0)
 | |
| +		goto err_free_squeue;
 | |
| +
 | |
| +	padata_init_pqueues(pd);
 | |
| +	padata_init_squeues(pd);
 | |
| +	setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd);
 | |
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0)
 | |
| +	pd->seq_nr = 0;
 | |
| +#else
 | |
| +	atomic_set(&pd->seq_nr, -1);
 | |
| +#endif
 | |
| +	atomic_set(&pd->reorder_objects, 0);
 | |
| +	atomic_set(&pd->refcnt, 0);
 | |
| +	pd->pinst = pinst;
 | |
| +	spin_lock_init(&pd->lock);
 | |
| +
 | |
| +	return pd;
 | |
| +
 | |
| +err_free_squeue:
 | |
| +	free_percpu(pd->squeue);
 | |
| +err_free_pqueue:
 | |
| +	free_percpu(pd->pqueue);
 | |
| +err_free_pd:
 | |
| +	kfree(pd);
 | |
| +err:
 | |
| +	return NULL;
 | |
| +}
 | |
| +
 | |
| +static void padata_free_pd(struct parallel_data *pd)
 | |
| +{
 | |
| +	free_cpumask_var(pd->cpumask.pcpu);
 | |
| +	free_cpumask_var(pd->cpumask.cbcpu);
 | |
| +	free_percpu(pd->pqueue);
 | |
| +	free_percpu(pd->squeue);
 | |
| +	kfree(pd);
 | |
| +}
 | |
| +
 | |
| +/* Flush all objects out of the padata queues. */
 | |
| +static void padata_flush_queues(struct parallel_data *pd)
 | |
| +{
 | |
| +	int cpu;
 | |
| +	struct padata_parallel_queue *pqueue;
 | |
| +	struct padata_serial_queue *squeue;
 | |
| +
 | |
| +	for_each_cpu(cpu, pd->cpumask.pcpu) {
 | |
| +		pqueue = per_cpu_ptr(pd->pqueue, cpu);
 | |
| +		flush_work(&pqueue->work);
 | |
| +	}
 | |
| +
 | |
| +	del_timer_sync(&pd->timer);
 | |
| +
 | |
| +	if (atomic_read(&pd->reorder_objects))
 | |
| +		padata_reorder(pd);
 | |
| +
 | |
| +	for_each_cpu(cpu, pd->cpumask.cbcpu) {
 | |
| +		squeue = per_cpu_ptr(pd->squeue, cpu);
 | |
| +		flush_work(&squeue->work);
 | |
| +	}
 | |
| +
 | |
| +	BUG_ON(atomic_read(&pd->refcnt) != 0);
 | |
| +}
 | |
| +
 | |
| +static void __padata_start(struct padata_instance *pinst)
 | |
| +{
 | |
| +	pinst->flags |= PADATA_INIT;
 | |
| +}
 | |
| +
 | |
| +static void __padata_stop(struct padata_instance *pinst)
 | |
| +{
 | |
| +	if (!(pinst->flags & PADATA_INIT))
 | |
| +		return;
 | |
| +
 | |
| +	pinst->flags &= ~PADATA_INIT;
 | |
| +
 | |
| +	synchronize_rcu();
 | |
| +
 | |
| +	get_online_cpus();
 | |
| +	padata_flush_queues(pinst->pd);
 | |
| +	put_online_cpus();
 | |
| +}
 | |
| +
 | |
| +/* Replace the internal control structure with a new one. */
 | |
| +static void padata_replace(struct padata_instance *pinst,
 | |
| +			   struct parallel_data *pd_new)
 | |
| +{
 | |
| +	struct parallel_data *pd_old = pinst->pd;
 | |
| +	int notification_mask = 0;
 | |
| +
 | |
| +	pinst->flags |= PADATA_RESET;
 | |
| +
 | |
| +	rcu_assign_pointer(pinst->pd, pd_new);
 | |
| +
 | |
| +	synchronize_rcu();
 | |
| +
 | |
| +	if (!cpumask_equal(pd_old->cpumask.pcpu, pd_new->cpumask.pcpu))
 | |
| +		notification_mask |= PADATA_CPU_PARALLEL;
 | |
| +	if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu))
 | |
| +		notification_mask |= PADATA_CPU_SERIAL;
 | |
| +
 | |
| +	padata_flush_queues(pd_old);
 | |
| +	padata_free_pd(pd_old);
 | |
| +
 | |
| +	if (notification_mask)
 | |
| +		blocking_notifier_call_chain(&pinst->cpumask_change_notifier,
 | |
| +					     notification_mask,
 | |
| +					     &pd_new->cpumask);
 | |
| +
 | |
| +	pinst->flags &= ~PADATA_RESET;
 | |
| +}
 | |
| +
 | |
| +/**
 | |
| + * padata_register_cpumask_notifier - Registers a notifier that will be called
 | |
| + *                             if either pcpu or cbcpu or both cpumasks change.
 | |
| + *
 | |
| + * @pinst: A poineter to padata instance
 | |
| + * @nblock: A pointer to notifier block.
 | |
| + */
 | |
| +int padata_register_cpumask_notifier(struct padata_instance *pinst,
 | |
| +				     struct notifier_block *nblock)
 | |
| +{
 | |
| +	return blocking_notifier_chain_register(&pinst->cpumask_change_notifier,
 | |
| +						nblock);
 | |
| +}
 | |
| +
 | |
| +/**
 | |
| + * padata_unregister_cpumask_notifier - Unregisters cpumask notifier
 | |
| + *        registered earlier  using padata_register_cpumask_notifier
 | |
| + *
 | |
| + * @pinst: A pointer to data instance.
 | |
| + * @nlock: A pointer to notifier block.
 | |
| + */
 | |
| +int padata_unregister_cpumask_notifier(struct padata_instance *pinst,
 | |
| +				       struct notifier_block *nblock)
 | |
| +{
 | |
| +	return blocking_notifier_chain_unregister(
 | |
| +		&pinst->cpumask_change_notifier,
 | |
| +		nblock);
 | |
| +}
 | |
| +
 | |
| +
 | |
| +/* If cpumask contains no active cpu, we mark the instance as invalid. */
 | |
| +static bool padata_validate_cpumask(struct padata_instance *pinst,
 | |
| +				    const struct cpumask *cpumask)
 | |
| +{
 | |
| +	if (!cpumask_intersects(cpumask, cpu_online_mask)) {
 | |
| +		pinst->flags |= PADATA_INVALID;
 | |
| +		return false;
 | |
| +	}
 | |
| +
 | |
| +	pinst->flags &= ~PADATA_INVALID;
 | |
| +	return true;
 | |
| +}
 | |
| +
 | |
| +static int __padata_set_cpumasks(struct padata_instance *pinst,
 | |
| +				 cpumask_var_t pcpumask,
 | |
| +				 cpumask_var_t cbcpumask)
 | |
| +{
 | |
| +	int valid;
 | |
| +	struct parallel_data *pd;
 | |
| +
 | |
| +	valid = padata_validate_cpumask(pinst, pcpumask);
 | |
| +	if (!valid) {
 | |
| +		__padata_stop(pinst);
 | |
| +		goto out_replace;
 | |
| +	}
 | |
| +
 | |
| +	valid = padata_validate_cpumask(pinst, cbcpumask);
 | |
| +	if (!valid)
 | |
| +		__padata_stop(pinst);
 | |
| +
 | |
| +out_replace:
 | |
| +	pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
 | |
| +	if (!pd)
 | |
| +		return -ENOMEM;
 | |
| +
 | |
| +	cpumask_copy(pinst->cpumask.pcpu, pcpumask);
 | |
| +	cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
 | |
| +
 | |
| +	padata_replace(pinst, pd);
 | |
| +
 | |
| +	if (valid)
 | |
| +		__padata_start(pinst);
 | |
| +
 | |
| +	return 0;
 | |
| +}
 | |
| +
 | |
| +/**
 | |
| + * padata_set_cpumask: Sets specified by @cpumask_type cpumask to the value
 | |
| + *                     equivalent to @cpumask.
 | |
| + *
 | |
| + * @pinst: padata instance
 | |
| + * @cpumask_type: PADATA_CPU_SERIAL or PADATA_CPU_PARALLEL corresponding
 | |
| + *                to parallel and serial cpumasks respectively.
 | |
| + * @cpumask: the cpumask to use
 | |
| + */
 | |
| +int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
 | |
| +		       cpumask_var_t cpumask)
 | |
| +{
 | |
| +	struct cpumask *serial_mask, *parallel_mask;
 | |
| +	int err = -EINVAL;
 | |
| +
 | |
| +	mutex_lock(&pinst->lock);
 | |
| +	get_online_cpus();
 | |
| +
 | |
| +	switch (cpumask_type) {
 | |
| +	case PADATA_CPU_PARALLEL:
 | |
| +		serial_mask = pinst->cpumask.cbcpu;
 | |
| +		parallel_mask = cpumask;
 | |
| +		break;
 | |
| +	case PADATA_CPU_SERIAL:
 | |
| +		parallel_mask = pinst->cpumask.pcpu;
 | |
| +		serial_mask = cpumask;
 | |
| +		break;
 | |
| +	default:
 | |
| +		 goto out;
 | |
| +	}
 | |
| +
 | |
| +	err =  __padata_set_cpumasks(pinst, parallel_mask, serial_mask);
 | |
| +
 | |
| +out:
 | |
| +	put_online_cpus();
 | |
| +	mutex_unlock(&pinst->lock);
 | |
| +
 | |
| +	return err;
 | |
| +}
 | |
| +
 | |
| +/**
 | |
| + * padata_start - start the parallel processing
 | |
| + *
 | |
| + * @pinst: padata instance to start
 | |
| + */
 | |
| +int padata_start(struct padata_instance *pinst)
 | |
| +{
 | |
| +	int err = 0;
 | |
| +
 | |
| +	mutex_lock(&pinst->lock);
 | |
| +
 | |
| +	if (pinst->flags & PADATA_INVALID)
 | |
| +		err = -EINVAL;
 | |
| +
 | |
| +	 __padata_start(pinst);
 | |
| +
 | |
| +	mutex_unlock(&pinst->lock);
 | |
| +
 | |
| +	return err;
 | |
| +}
 | |
| +
 | |
| +/**
 | |
| + * padata_stop - stop the parallel processing
 | |
| + *
 | |
| + * @pinst: padata instance to stop
 | |
| + */
 | |
| +void padata_stop(struct padata_instance *pinst)
 | |
| +{
 | |
| +	mutex_lock(&pinst->lock);
 | |
| +	__padata_stop(pinst);
 | |
| +	mutex_unlock(&pinst->lock);
 | |
| +}
 | |
| +
 | |
| +static void __padata_free(struct padata_instance *pinst)
 | |
| +{
 | |
| +	padata_stop(pinst);
 | |
| +	padata_free_pd(pinst->pd);
 | |
| +	free_cpumask_var(pinst->cpumask.pcpu);
 | |
| +	free_cpumask_var(pinst->cpumask.cbcpu);
 | |
| +	kfree(pinst);
 | |
| +}
 | |
| +
 | |
| +#define kobj2pinst(_kobj)					\
 | |
| +	container_of(_kobj, struct padata_instance, kobj)
 | |
| +#define attr2pentry(_attr)					\
 | |
| +	container_of(_attr, struct padata_sysfs_entry, attr)
 | |
| +
 | |
| +static void padata_sysfs_release(struct kobject *kobj)
 | |
| +{
 | |
| +	struct padata_instance *pinst = kobj2pinst(kobj);
 | |
| +	__padata_free(pinst);
 | |
| +}
 | |
| +
 | |
| +struct padata_sysfs_entry {
 | |
| +	struct attribute attr;
 | |
| +	ssize_t (*show)(struct padata_instance *, struct attribute *, char *);
 | |
| +	ssize_t (*store)(struct padata_instance *, struct attribute *,
 | |
| +			 const char *, size_t);
 | |
| +};
 | |
| +
 | |
| +static ssize_t show_cpumask(struct padata_instance *pinst,
 | |
| +			    struct attribute *attr,  char *buf)
 | |
| +{
 | |
| +	struct cpumask *cpumask;
 | |
| +	ssize_t len;
 | |
| +
 | |
| +	mutex_lock(&pinst->lock);
 | |
| +	if (!strcmp(attr->name, "serial_cpumask"))
 | |
| +		cpumask = pinst->cpumask.cbcpu;
 | |
| +	else
 | |
| +		cpumask = pinst->cpumask.pcpu;
 | |
| +
 | |
| +	len = snprintf(buf, PAGE_SIZE, "%*pb\n",
 | |
| +		       nr_cpu_ids, cpumask_bits(cpumask));
 | |
| +	mutex_unlock(&pinst->lock);
 | |
| +	return len < PAGE_SIZE ? len : -EINVAL;
 | |
| +}
 | |
| +
 | |
| +static ssize_t store_cpumask(struct padata_instance *pinst,
 | |
| +			     struct attribute *attr,
 | |
| +			     const char *buf, size_t count)
 | |
| +{
 | |
| +	cpumask_var_t new_cpumask;
 | |
| +	ssize_t ret;
 | |
| +	int mask_type;
 | |
| +
 | |
| +	if (!alloc_cpumask_var(&new_cpumask, GFP_KERNEL))
 | |
| +		return -ENOMEM;
 | |
| +
 | |
| +	ret = bitmap_parse(buf, count, cpumask_bits(new_cpumask),
 | |
| +			   nr_cpumask_bits);
 | |
| +	if (ret < 0)
 | |
| +		goto out;
 | |
| +
 | |
| +	mask_type = !strcmp(attr->name, "serial_cpumask") ?
 | |
| +		PADATA_CPU_SERIAL : PADATA_CPU_PARALLEL;
 | |
| +	ret = padata_set_cpumask(pinst, mask_type, new_cpumask);
 | |
| +	if (!ret)
 | |
| +		ret = count;
 | |
| +
 | |
| +out:
 | |
| +	free_cpumask_var(new_cpumask);
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +#define PADATA_ATTR_RW(_name, _show_name, _store_name)		\
 | |
| +	static struct padata_sysfs_entry _name##_attr =		\
 | |
| +		__ATTR(_name, 0644, _show_name, _store_name)
 | |
| +#define PADATA_ATTR_RO(_name, _show_name)		\
 | |
| +	static struct padata_sysfs_entry _name##_attr = \
 | |
| +		__ATTR(_name, 0400, _show_name, NULL)
 | |
| +
 | |
| +PADATA_ATTR_RW(serial_cpumask, show_cpumask, store_cpumask);
 | |
| +PADATA_ATTR_RW(parallel_cpumask, show_cpumask, store_cpumask);
 | |
| +
 | |
| +/*
 | |
| + * Padata sysfs provides the following objects:
 | |
| + * serial_cpumask   [RW] - cpumask for serial workers
 | |
| + * parallel_cpumask [RW] - cpumask for parallel workers
 | |
| + */
 | |
| +static struct attribute *padata_default_attrs[] = {
 | |
| +	&serial_cpumask_attr.attr,
 | |
| +	¶llel_cpumask_attr.attr,
 | |
| +	NULL,
 | |
| +};
 | |
| +
 | |
| +static ssize_t padata_sysfs_show(struct kobject *kobj,
 | |
| +				 struct attribute *attr, char *buf)
 | |
| +{
 | |
| +	struct padata_instance *pinst;
 | |
| +	struct padata_sysfs_entry *pentry;
 | |
| +	ssize_t ret = -EIO;
 | |
| +
 | |
| +	pinst = kobj2pinst(kobj);
 | |
| +	pentry = attr2pentry(attr);
 | |
| +	if (pentry->show)
 | |
| +		ret = pentry->show(pinst, attr, buf);
 | |
| +
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +static ssize_t padata_sysfs_store(struct kobject *kobj, struct attribute *attr,
 | |
| +				  const char *buf, size_t count)
 | |
| +{
 | |
| +	struct padata_instance *pinst;
 | |
| +	struct padata_sysfs_entry *pentry;
 | |
| +	ssize_t ret = -EIO;
 | |
| +
 | |
| +	pinst = kobj2pinst(kobj);
 | |
| +	pentry = attr2pentry(attr);
 | |
| +	if (pentry->show)
 | |
| +		ret = pentry->store(pinst, attr, buf, count);
 | |
| +
 | |
| +	return ret;
 | |
| +}
 | |
| +
 | |
| +static const struct sysfs_ops padata_sysfs_ops = {
 | |
| +	.show = padata_sysfs_show,
 | |
| +	.store = padata_sysfs_store,
 | |
| +};
 | |
| +
 | |
| +static struct kobj_type padata_attr_type = {
 | |
| +	.sysfs_ops = &padata_sysfs_ops,
 | |
| +	.default_attrs = padata_default_attrs,
 | |
| +	.release = padata_sysfs_release,
 | |
| +};
 | |
| +
 | |
| +/**
 | |
| + * padata_alloc_possible - Allocate and initialize padata instance.
 | |
| + *                         Use the cpu_possible_mask for serial and
 | |
| + *                         parallel workers.
 | |
| + *
 | |
| + * @wq: workqueue to use for the allocated padata instance
 | |
| + */
 | |
| +struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq)
 | |
| +{
 | |
| +	return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
 | |
| +}
 | |
| +
 | |
| +/**
 | |
| + * padata_alloc - allocate and initialize a padata instance and specify
 | |
| + *                cpumasks for serial and parallel workers.
 | |
| + *
 | |
| + * @wq: workqueue to use for the allocated padata instance
 | |
| + * @pcpumask: cpumask that will be used for padata parallelization
 | |
| + * @cbcpumask: cpumask that will be used for padata serialization
 | |
| + */
 | |
| +struct padata_instance *padata_alloc(struct workqueue_struct *wq,
 | |
| +				     const struct cpumask *pcpumask,
 | |
| +				     const struct cpumask *cbcpumask)
 | |
| +{
 | |
| +	struct padata_instance *pinst;
 | |
| +	struct parallel_data *pd = NULL;
 | |
| +
 | |
| +	pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL);
 | |
| +	if (!pinst)
 | |
| +		goto err;
 | |
| +
 | |
| +	get_online_cpus();
 | |
| +	if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
 | |
| +		goto err_free_inst;
 | |
| +	if (!alloc_cpumask_var(&pinst->cpumask.cbcpu, GFP_KERNEL)) {
 | |
| +		free_cpumask_var(pinst->cpumask.pcpu);
 | |
| +		goto err_free_inst;
 | |
| +	}
 | |
| +	if (!padata_validate_cpumask(pinst, pcpumask) ||
 | |
| +	    !padata_validate_cpumask(pinst, cbcpumask))
 | |
| +		goto err_free_masks;
 | |
| +
 | |
| +	pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
 | |
| +	if (!pd)
 | |
| +		goto err_free_masks;
 | |
| +
 | |
| +	rcu_assign_pointer(pinst->pd, pd);
 | |
| +
 | |
| +	pinst->wq = wq;
 | |
| +
 | |
| +	cpumask_copy(pinst->cpumask.pcpu, pcpumask);
 | |
| +	cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
 | |
| +
 | |
| +	pinst->flags = 0;
 | |
| +
 | |
| +	put_online_cpus();
 | |
| +
 | |
| +	BLOCKING_INIT_NOTIFIER_HEAD(&pinst->cpumask_change_notifier);
 | |
| +	kobject_init(&pinst->kobj, &padata_attr_type);
 | |
| +	mutex_init(&pinst->lock);
 | |
| +
 | |
| +	return pinst;
 | |
| +
 | |
| +err_free_masks:
 | |
| +	free_cpumask_var(pinst->cpumask.pcpu);
 | |
| +	free_cpumask_var(pinst->cpumask.cbcpu);
 | |
| +err_free_inst:
 | |
| +	kfree(pinst);
 | |
| +	put_online_cpus();
 | |
| +err:
 | |
| +	return NULL;
 | |
| +}
 | |
| +
 | |
| +/**
 | |
| + * padata_free - free a padata instance
 | |
| + *
 | |
| + * @padata_inst: padata instance to free
 | |
| + */
 | |
| +void padata_free(struct padata_instance *pinst)
 | |
| +{
 | |
| +	kobject_put(&pinst->kobj);
 | |
| +}
 | |
| --- /dev/null	2017-07-05 16:27:37.615351856 +0200
 | |
| +++ b/net/wireguard/compat/checksum/checksum_partial_compat.h	2017-07-06 18:17:33.000000000 +0200
 | |
| @@ -0,0 +1,201 @@
 | |
| +#include <net/route.h>
 | |
| +#include <net/esp.h>
 | |
| +#include <net/ip.h>
 | |
| +#include <net/ipv6.h>
 | |
| +#define IP6_MF          0x0001
 | |
| +#define IP6_OFFSET      0xFFF8
 | |
| +static inline int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len, unsigned int max)
 | |
| +{
 | |
| +	if (skb_headlen(skb) >= len)
 | |
| +		return 0;
 | |
| +	if (max > skb->len)
 | |
| +		max = skb->len;
 | |
| +	if (__pskb_pull_tail(skb, max - skb_headlen(skb)) == NULL)
 | |
| +		return -ENOMEM;
 | |
| +	if (skb_headlen(skb) < len)
 | |
| +		return -EPROTO;
 | |
| +	return 0;
 | |
| +}
 | |
| +#define MAX_IP_HDR_LEN 128
 | |
| +static inline int skb_checksum_setup_ip(struct sk_buff *skb, bool recalculate)
 | |
| +{
 | |
| +	unsigned int off;
 | |
| +	bool fragment;
 | |
| +	int err;
 | |
| +	fragment = false;
 | |
| +	err = skb_maybe_pull_tail(skb, sizeof(struct iphdr), MAX_IP_HDR_LEN);
 | |
| +	if (err < 0)
 | |
| +		goto out;
 | |
| +	if (ip_hdr(skb)->frag_off & htons(IP_OFFSET | IP_MF))
 | |
| +		fragment = true;
 | |
| +	off = ip_hdrlen(skb);
 | |
| +	err = -EPROTO;
 | |
| +	if (fragment)
 | |
| +		goto out;
 | |
| +	switch (ip_hdr(skb)->protocol) {
 | |
| +	case IPPROTO_TCP:
 | |
| +		err = skb_maybe_pull_tail(skb,
 | |
| +					  off + sizeof(struct tcphdr),
 | |
| +					  MAX_IP_HDR_LEN);
 | |
| +		if (err < 0)
 | |
| +			goto out;
 | |
| +
 | |
| +		if (!skb_partial_csum_set(skb, off,
 | |
| +					  offsetof(struct tcphdr, check))) {
 | |
| +			err = -EPROTO;
 | |
| +			goto out;
 | |
| +		}
 | |
| +
 | |
| +		if (recalculate)
 | |
| +			tcp_hdr(skb)->check =
 | |
| +				~csum_tcpudp_magic(ip_hdr(skb)->saddr,
 | |
| +						   ip_hdr(skb)->daddr,
 | |
| +						   skb->len - off,
 | |
| +						   IPPROTO_TCP, 0);
 | |
| +		break;
 | |
| +	case IPPROTO_UDP:
 | |
| +		err = skb_maybe_pull_tail(skb,
 | |
| +					  off + sizeof(struct udphdr),
 | |
| +					  MAX_IP_HDR_LEN);
 | |
| +		if (err < 0)
 | |
| +			goto out;
 | |
| +
 | |
| +		if (!skb_partial_csum_set(skb, off,
 | |
| +					  offsetof(struct udphdr, check))) {
 | |
| +			err = -EPROTO;
 | |
| +			goto out;
 | |
| +		}
 | |
| +
 | |
| +		if (recalculate)
 | |
| +			udp_hdr(skb)->check =
 | |
| +				~csum_tcpudp_magic(ip_hdr(skb)->saddr,
 | |
| +						   ip_hdr(skb)->daddr,
 | |
| +						   skb->len - off,
 | |
| +						   IPPROTO_UDP, 0);
 | |
| +		break;
 | |
| +	default:
 | |
| +		goto out;
 | |
| +	}
 | |
| +	err = 0;
 | |
| +out:
 | |
| +	return err;
 | |
| +}
 | |
| +#define MAX_IPV6_HDR_LEN 256
 | |
| +#define OPT_HDR(type, skb, off) \
 | |
| +	(type *)(skb_network_header(skb) + (off))
 | |
| +static inline int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate)
 | |
| +{
 | |
| +	int err;
 | |
| +	u8 nexthdr;
 | |
| +	unsigned int off;
 | |
| +	unsigned int len;
 | |
| +	bool fragment;
 | |
| +	bool done;
 | |
| +	fragment = false;
 | |
| +	done = false;
 | |
| +	off = sizeof(struct ipv6hdr);
 | |
| +	err = skb_maybe_pull_tail(skb, off, MAX_IPV6_HDR_LEN);
 | |
| +	if (err < 0)
 | |
| +		goto out;
 | |
| +	nexthdr = ipv6_hdr(skb)->nexthdr;
 | |
| +	len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
 | |
| +	while (off <= len && !done) {
 | |
| +		switch (nexthdr) {
 | |
| +		case IPPROTO_DSTOPTS:
 | |
| +		case IPPROTO_HOPOPTS:
 | |
| +		case IPPROTO_ROUTING: {
 | |
| +			struct ipv6_opt_hdr *hp;
 | |
| +
 | |
| +			err = skb_maybe_pull_tail(skb, off + sizeof(struct ipv6_opt_hdr), MAX_IPV6_HDR_LEN);
 | |
| +			if (err < 0)
 | |
| +				goto out;
 | |
| +			hp = OPT_HDR(struct ipv6_opt_hdr, skb, off);
 | |
| +			nexthdr = hp->nexthdr;
 | |
| +			off += ipv6_optlen(hp);
 | |
| +			break;
 | |
| +		}
 | |
| +		case IPPROTO_FRAGMENT: {
 | |
| +			struct frag_hdr *hp;
 | |
| +			err = skb_maybe_pull_tail(skb, off + sizeof(struct frag_hdr), MAX_IPV6_HDR_LEN);
 | |
| +			if (err < 0)
 | |
| +				goto out;
 | |
| +			hp = OPT_HDR(struct frag_hdr, skb, off);
 | |
| +			if (hp->frag_off & htons(IP6_OFFSET | IP6_MF))
 | |
| +				fragment = true;
 | |
| +			nexthdr = hp->nexthdr;
 | |
| +			off += sizeof(struct frag_hdr);
 | |
| +			break;
 | |
| +		}
 | |
| +		default:
 | |
| +			done = true;
 | |
| +			break;
 | |
| +		}
 | |
| +	}
 | |
| +	err = -EPROTO;
 | |
| +	if (!done || fragment)
 | |
| +		goto out;
 | |
| +	switch (nexthdr) {
 | |
| +		case IPPROTO_TCP:
 | |
| +			err = skb_maybe_pull_tail(skb,
 | |
| +						  off + sizeof(struct tcphdr),
 | |
| +						  MAX_IPV6_HDR_LEN);
 | |
| +			if (err < 0)
 | |
| +				goto out;
 | |
| +
 | |
| +			if (!skb_partial_csum_set(skb, off,
 | |
| +						  offsetof(struct tcphdr, check))) {
 | |
| +				err = -EPROTO;
 | |
| +				goto out;
 | |
| +			}
 | |
| +
 | |
| +			if (recalculate)
 | |
| +				tcp_hdr(skb)->check =
 | |
| +					~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
 | |
| +							 &ipv6_hdr(skb)->daddr,
 | |
| +							 skb->len - off,
 | |
| +							 IPPROTO_TCP, 0);
 | |
| +			break;
 | |
| +		case IPPROTO_UDP:
 | |
| +			err = skb_maybe_pull_tail(skb,
 | |
| +						  off + sizeof(struct udphdr),
 | |
| +						  MAX_IPV6_HDR_LEN);
 | |
| +			if (err < 0)
 | |
| +				goto out;
 | |
| +
 | |
| +			if (!skb_partial_csum_set(skb, off,
 | |
| +						  offsetof(struct udphdr, check))) {
 | |
| +				err = -EPROTO;
 | |
| +				goto out;
 | |
| +			}
 | |
| +
 | |
| +			if (recalculate)
 | |
| +				udp_hdr(skb)->check =
 | |
| +					~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
 | |
| +							 &ipv6_hdr(skb)->daddr,
 | |
| +							 skb->len - off,
 | |
| +							 IPPROTO_UDP, 0);
 | |
| +			break;
 | |
| +		default:
 | |
| +			goto out;
 | |
| +	}
 | |
| +	err = 0;
 | |
| +out:
 | |
| +	return err;
 | |
| +}
 | |
| +static inline int skb_checksum_setup(struct sk_buff *skb, bool recalculate)
 | |
| +{
 | |
| +	int err;
 | |
| +	switch (skb->protocol) {
 | |
| +	case htons(ETH_P_IP):
 | |
| +		err = skb_checksum_setup_ip(skb, recalculate);
 | |
| +		break;
 | |
| +
 | |
| +	case htons(ETH_P_IPV6):
 | |
| +		err = skb_checksum_setup_ipv6(skb, recalculate);
 | |
| +		break;
 | |
| +	default:
 | |
| +		err = -EPROTO;
 | |
| +		break;
 | |
| +	}
 | |
| +	return err;
 | |
| +}
 | |
| --- a/net/Kconfig
 | |
| +++ b/net/Kconfig
 | |
| @@ -85,2 +85,3 @@ config INET
 | |
|  if INET
 | |
| +source "net/wireguard/Kconfig"
 | |
|  source "net/ipv4/Kconfig"
 | |
| --- a/net/Makefile
 | |
| +++ b/net/Makefile
 | |
| @@ -16,2 +16,3 @@
 | |
|  obj-$(CONFIG_NETFILTER)		+= netfilter/
 | |
| +obj-$(CONFIG_WIREGUARD)		+= wireguard/
 | |
|  obj-$(CONFIG_INET)		+= ipv4/
 |