Difference between revisions of "MoVfuscator Writeup"
m |
m |
||
(6 intermediate revisions by the same user not shown) | |||
Line 218: | Line 218: | ||
ia32: |
ia32: |
||
mkdir -p obj-ia32 |
mkdir -p obj-ia32 |
||
− | $(MAKE) TARGET=ia32 obj-ia32/ |
+ | $(MAKE) TARGET=ia32 obj-ia32/tracer.so |
intel64: |
intel64: |
||
mkdir -p obj-intel64 |
mkdir -p obj-intel64 |
||
− | $(MAKE) TARGET=intel64 obj-intel64/ |
+ | $(MAKE) TARGET=intel64 obj-intel64/tracer.so |
clean-all: |
clean-all: |
||
Line 231: | Line 231: | ||
And use it: (I used the <code>-ifeellucky</code> switch because current latest PIN doesn't know about my linux kernel 4.0 yet) |
And use it: (I used the <code>-ifeellucky</code> switch because current latest PIN doesn't know about my linux kernel 4.0 yet) |
||
make |
make |
||
− | ../../../pin -ifeellucky -t ./obj-ia32/ |
+ | ../../../pin -ifeellucky -t ./obj-ia32/tracer.so -- ./challenge 6bc1bee22e409f96e93d7e117393172a |
==Attack== |
==Attack== |
||
Line 244: | Line 244: | ||
for i in range(50): |
for i in range(50): |
||
plaintext=random.randint(0, (1<<(8*16))-1) |
plaintext=random.randint(0, (1<<(8*16))-1) |
||
− | cmd = '../../../pin -ifeellucky -t ./obj-ia32/ |
+ | cmd = '../../../pin -ifeellucky -t ./obj-ia32/tracer.so -- ./challenge %0*x; exit 0' % (32, plaintext) |
output=subprocess.check_output(cmd, shell=True) |
output=subprocess.check_output(cmd, shell=True) |
||
ciphertext=int(output.split('\n')[3], 16) |
ciphertext=int(output.split('\n')[3], 16) |
||
Line 285: | Line 285: | ||
filenames = sorted(glob.glob('trace_1byte_write*small')) |
filenames = sorted(glob.glob('trace_1byte_write*small')) |
||
+ | |||
#ntraces = len(filenames) |
#ntraces = len(filenames) |
||
# Limit our number of traces for a faster result: |
# Limit our number of traces for a faster result: |
||
ntraces=30 |
ntraces=30 |
||
+ | |||
nsamples = os.path.getsize(filenames[0])*8 |
nsamples = os.path.getsize(filenames[0])*8 |
||
#sample_start = 0 |
#sample_start = 0 |
||
Line 294: | Line 296: | ||
sample_start = 3000 |
sample_start = 3000 |
||
sample_end = 3500 |
sample_end = 3500 |
||
+ | |||
+ | #targetbits=range(8) |
||
+ | # Focus on the first bit only for a faster result: |
||
+ | targetbits=[0] |
||
traces = np.zeros([ntraces, nsamples],np.uint8) |
traces = np.zeros([ntraces, nsamples],np.uint8) |
||
Line 335: | Line 341: | ||
diffs = [0]*256 |
diffs = [0]*256 |
||
for key in range(0, 256): |
for key in range(0, 256): |
||
− | mean1 = np.zeros([sample_end-sample_start, |
+ | mean1 = np.zeros([sample_end-sample_start,len(targetbits)]) |
− | mean0 = np.zeros([sample_end-sample_start, |
+ | mean0 = np.zeros([sample_end-sample_start,len(targetbits)]) |
− | num1 = [0]* |
+ | num1 = [0]*len(targetbits) |
− | num0 = [0]* |
+ | num0 = [0]*len(targetbits) |
for tnum in range(ntraces): |
for tnum in range(ntraces): |
||
Hyp = SBOX[plaintexts[tnum, bnum] ^ key] |
Hyp = SBOX[plaintexts[tnum, bnum] ^ key] |
||
− | for targetbit in |
+ | for targetbit in targetbits: |
if (Hyp & (1 << targetbit)) != 0: |
if (Hyp & (1 << targetbit)) != 0: |
||
mean1[:,targetbit] = np.add(mean1[:,targetbit], traces[tnum,sample_start:sample_end]) |
mean1[:,targetbit] = np.add(mean1[:,targetbit], traces[tnum,sample_start:sample_end]) |
||
Line 354: | Line 360: | ||
print "%02x" % diffs.index(max(diffs)), |
print "%02x" % diffs.index(max(diffs)), |
||
</source> |
</source> |
||
+ | Running it: |
||
+ | time ./dpa.py |
||
+ | 2b 7e 15 16 28 ae d2 a6 ab f7 15 88 09 cf 4f 3c |
||
+ | real 0m1.196s |
||
+ | user 0m1.184s |
||
+ | sys 0m0.008s |
||
One may use autocorrelation on a single trace to find the underlying AES structure and find the first round: |
One may use autocorrelation on a single trace to find the underlying AES structure and find the first round: |
||
Line 382: | Line 394: | ||
Comments? |
Comments? |
||
<br>You can [https://twitter.com/doegox/status/630751161419755520 reply to my tweet] or [[User:PhilippeTeuwen|contact me]]. |
<br>You can [https://twitter.com/doegox/status/630751161419755520 reply to my tweet] or [[User:PhilippeTeuwen|contact me]]. |
||
+ | |||
+ | ==crackme2== |
||
+ | Back to [https://github.com/xoreaxeaxeax/movfuscator/tree/master/poc/crackme crackme2]: we can use the same tracer as above to dump all memory writes. |
||
+ | <br>Then trying all 1-char keys and diffing the traces give some info. Differences vary quite a lot but if we count for which char we've quite a unique number of diffs when compared with other traces, we get good candidates. When attacking byte N, those diff stats don't change no matter what are the N-1 chars. |
||
+ | <br>E.g. here I compare with trace of '...a': |
||
+ | HEAD: => ['@', 'A', 'a', 'z', '{'] |
||
+ | HEAD: a => ['@', 'A', 'N', 'a', 'n'] |
||
+ | HEAD: aa => ['@', 'A', 'O', 'a', 'o'] |
||
+ | HEAD: aaa => ['A', 'a', 'i'] |
||
+ | HEAD: aaaa => ['@', 'A', 'N', 'a', 'n'] |
||
+ | HEAD: aaaaa => ['@', 'A', 'S', 'a', 's'] |
||
+ | etc |
||
+ | And the ugly script I used: |
||
+ | <source lang=python> |
||
+ | #!/usr/bin/env python |
||
+ | |||
+ | import os |
||
+ | import subprocess |
||
+ | |||
+ | def crack(c): |
||
+ | cmd = ['echo "'+ c + '" | ../../../pin -ifeellucky -t ./obj-ia32/tracer.so -- ./crackme2; exit 0'] |
||
+ | output=subprocess.check_output(cmd, shell=True) |
||
+ | os.rename('trace-1byte-writes.bin', 'trace-1byte-writes.bin.'+c) |
||
+ | |||
+ | HEAD="aaaaa" |
||
+ | TEST1="a" |
||
+ | |||
+ | if not os.path.isfile('trace-1byte-writes.bin.'+HEAD+TEST1): |
||
+ | crack(HEAD+TEST1) |
||
+ | t1=open('trace-1byte-writes.bin.'+HEAD+TEST1, 'rb').read() |
||
+ | N1={} |
||
+ | for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@$?+=-<>() #%&*:;[]^_{}|~": |
||
+ | c2=HEAD+c |
||
+ | if not os.path.isfile('trace-1byte-writes.bin.'+c2): |
||
+ | crack(c2) |
||
+ | v=open('trace-1byte-writes.bin.'+c2, 'rb').read() |
||
+ | n1=0 |
||
+ | for i in range(len(v)): |
||
+ | if t1[i]!=v[i]: |
||
+ | n1+=1 |
||
+ | if n1 in N1: |
||
+ | N1[n1]+=","+c |
||
+ | else: |
||
+ | N1[n1]=c |
||
+ | print "HEAD: %15s => %s" % (HEAD, repr(sorted([N1[x] for x in N1 if len(N1[x])==1]))) |
||
+ | </source> |
||
+ | The flag is <code>{noinscount4u}</code> and is case insensitive. |
||
+ | <br>When googling for the flag I saw @al7818376 found it already: https://twitter.com/al7818376/status/618109122722947072 |
Latest revision as of 10:16, 18 August 2015
Intro
Three days ago Chris Domas announced the release of M/o/Vfuscator2, a beautiful single instruction C compiler leveraging the paper "mov is Turing-complete" (pdf), by Stephen Dolan.
That's it: once compiled, your program is made only of MOV instructions.
See the REcon 2015 slides (pdf) for more insight.
The code is available here: https://github.com/xoreaxeaxeax/movfuscator and by default check.sh will apply it on https://github.com/kokke/tiny-AES128-C , a small portable AES128 implementation.
So, is it safe to protect your AES crypto with M/o/Vfuscator2?
Coincidentally we published a new attack against white-boxes a few days ago: Differential Computation Analysis: Hiding your White-Box Designs is Not Enough.
M/o/Vfuscator2 doesn't transform your AES into a traditional white-box based on look-up tables but we should admit it's quite intimidating for a reverser.
Visualization
For example, here is a trace of the initial AES, up to the first three rounds:
Legend: memory range on the X-axis, time counting from top to bottom on the Y-axis, instructions in black, mem reads in green, mem writes in red.
And here once it's compiled with M/o/Vfuscator2:
Legend: memory range on the X-axis, time counting from top to bottom on the Y-axis, instructions in black, mem reads in green, mem writes in red.
Ouch! That's why I limited the trace to the first 3 rounds, it's so huge that I've disk space and RAM issues to display more...
The black square is simply the main MOV loop repeating all long, so if there is information, it should be in the memory accesses.
Challenge
The initial tiny-AES128-C example encrypts a fixed plaintext with a fixed key but let's make a more realistic challenge where one can provide arbitrary plaintexts.
You can download this code [{{#file: challenge.c}} as challenge.c]
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#define CBC 0
#define ECB 1
#include "aes.h"
static const uint8_t *CHRHEX = (uint8_t *)
"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" \
"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" \
"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" \
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xFF\xFF\xFF\xFF\xFF\xFF" \
"\xFF\x0A\x0B\x0C\x0D\x0E\x0F\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" \
"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" \
"\xFF\x0A\x0B\x0C\x0D\x0E\x0F\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" \
"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" \
"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" \
"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" \
"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" \
"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" \
"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" \
"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" \
"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF" \
"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF";
static void phex(uint8_t* str)
{
unsigned char i;
for(i = 0; i < 16; ++i)
printf("%.2x", str[i]);
printf("\n");
}
int main(int argc, char *argv[])
{
uint8_t key[16] = { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c };
uint8_t plaintext[16];
uint8_t ciphertext[16];
int n=0;
const char *p;
const uint8_t *D;
uint8_t c=0, e, *d;
p = argv[1];
d = plaintext;
D = (d + sizeof (plaintext));
for (; d != D && *p; p++) {
e = CHRHEX [(int) *p];
if (e != 0xFF) {
c = ((c << 4) | e);
n++;
if (n == 2) {
*(d++) = c;
n = 0;
}
}
}
AES128_ECB_encrypt(plaintext, key, ciphertext);
printf("plaintext:\n");
phex(plaintext);
printf("ciphertext:\n");
phex(ciphertext);
return 0;
}
To download, compile and test it, you can use [{{#file: build_challenge.sh}} this script]:
#!/bin/sh
git clone https://github.com/xoreaxeaxeax/movfuscator
cd movfuscator
./build.sh
[ ! -d "examples/aes" ] && git clone https://github.com/kokke/tiny-AES128-C examples/aes
wget -O examples/aes/challenge.c "http://wiki.yobi.be/index.php?title=MoVfuscator_Writeup&action=raw&anchor=challenge.c"
movcc examples/aes/aes.c examples/aes/challenge.c -o examples/aes/challenge -s
./examples/aes/challenge 6bc1bee22e409f96e93d7e117393172a
Yes, I kept the same AES key, but that doesn't really matter. You can change it if you wish.
./build_challenge.sh ... emit/mov>cnsti4(0) emit/mov>cnsti4(0) emit/mov>reti4(cnsti4(0)) emit/mov>labelv(17) M/o/Vfuscation complete. plaintext: 6bc1bee22e409f96e93d7e117393172a ciphertext: 3ad77bb40d7a3660a89ecaf32466ef97
Traces
Let's use a very simple [{{#filelink: tracer.cpp}} tracing code tracer.cpp] with Intel PIN to record all 1-byte memory writes: {{#fileanchor: tracer.cpp}}
#include "pin.H"
#include <fstream>
std::ofstream TraceFile;
PIN_LOCK lock;
ADDRINT main_begin;
ADDRINT main_end;
static ADDRINT WriteAddr;
static INT32 WriteSize;
static VOID RecordWriteAddrSize(ADDRINT addr, INT32 size)
{
WriteAddr = addr;
WriteSize = size;
}
static VOID RecordMemWrite(ADDRINT ip)
{
UINT8 memdump[256];
PIN_GetLock(&lock, ip);
PIN_SafeCopy(memdump, (void *)WriteAddr, WriteSize);
if (WriteSize==1)
TraceFile << static_cast<CHAR>(*memdump);
PIN_ReleaseLock(&lock);
}
VOID Instruction_cb(INS ins, VOID *v)
{
ADDRINT ip = INS_Address(ins);
if ((ip < main_begin) || (ip > main_end))
return;
if (INS_IsMemoryWrite(ins))
{
INS_InsertPredicatedCall(
ins, IPOINT_BEFORE, (AFUNPTR)RecordWriteAddrSize,
IARG_MEMORYWRITE_EA,
IARG_MEMORYWRITE_SIZE,
IARG_END);
if (INS_HasFallThrough(ins))
{
INS_InsertCall(
ins, IPOINT_AFTER, (AFUNPTR)RecordMemWrite,
IARG_INST_PTR,
IARG_END);
}
}
}
void ImageLoad_cb(IMG Img, void *v)
{
PIN_GetLock(&lock, 0);
if(IMG_IsMainExecutable(Img))
{
main_begin = IMG_LowAddress(Img);
main_end = IMG_HighAddress(Img);
}
PIN_ReleaseLock(&lock);
}
VOID Fini(INT32 code, VOID *v)
{
TraceFile.close();
}
int main(int argc, char *argv[])
{
PIN_InitSymbols();
PIN_Init(argc,argv);
TraceFile.open("trace-1byte-writes.bin");
if(TraceFile == NULL)
return -1;
IMG_AddInstrumentFunction(ImageLoad_cb, 0);
INS_AddInstrumentFunction(Instruction_cb, 0);
PIN_AddFiniFunction(Fini, 0);
PIN_StartProgram();
return 0;
}
Compile it with your Intel PIN release, e.g. put this file as [{{#filelink: tracer.cpp}} tracer.cpp] in pin-2.13-65163-gcc.4.4.7-linux/source/tools/mystuff/ together with a [{{#filelink: Makefile}} Makefile]: {{#fileanchor: Makefile}}
CONFIG_ROOT := ../Config
include $(CONFIG_ROOT)/makefile.config
include $(TOOLS_ROOT)/Config/makefile.default.rules
all: ia32 intel64
ia32:
mkdir -p obj-ia32
$(MAKE) TARGET=ia32 obj-ia32/tracer.so
intel64:
mkdir -p obj-intel64
$(MAKE) TARGET=intel64 obj-intel64/tracer.so
clean-all:
$(MAKE) TARGET=ia32 clean
$(MAKE) TARGET=intel64 clean
And use it: (I used the -ifeellucky
switch because current latest PIN doesn't know about my linux kernel 4.0 yet)
make ../../../pin -ifeellucky -t ./obj-ia32/tracer.so -- ./challenge 6bc1bee22e409f96e93d7e117393172a
Attack
Ok. Now, we've everything to apply the attack of our paper:
Acquire a few traces with different random plaintexts, keep the traces and corresponding plaintexts (we also keep the ciphertexts, handy to validate a recovered AES key...).
#!/usr/bin/env python
import os
import random
import subprocess
for i in range(50):
plaintext=random.randint(0, (1<<(8*16))-1)
cmd = '../../../pin -ifeellucky -t ./obj-ia32/tracer.so -- ./challenge %0*x; exit 0' % (32, plaintext)
output=subprocess.check_output(cmd, shell=True)
ciphertext=int(output.split('\n')[3], 16)
os.rename('trace-1byte-writes.bin', 'trace_1byte_write_%02i_%0*X_%0*X.bin' % (i, 32, plaintext, 32, ciphertext))
print '%02i %0*X -> %0*X' % (i, 32, plaintext, 32, ciphertext)
The traces are still pretty large with 3.988.860 1-byte memory writes captured per trace but most are common to all traces and therfore contain zero useful information, let's filter them out.
#!/usr/bin/env python
import glob
N=10 # traces to decide which bytes to keep
filelist = sorted(glob.glob('trace_1byte_write_*'))
ref_samples=[ord(x) for x in open(filelist[0], 'rb').read()]
mask=[False]*len(ref_samples)
for filename in filelist[1:N]:
samples=[ord(x) for x in open(filename, 'rb').read()]
mask=[m or (x-y != 0) for m,x,y in zip(mask,ref_samples, samples)]
for filename in filelist:
with open(filename, 'rb') as trace, open(filename+".small", 'wb') as small:
samples=[x for x,m in zip(trace.read(),mask) if m]
small.write(''.join(samples))
Now each trace contains only 6625 1-byte writes, much better!
A classical DPA attack on those traces will reveal the key with about 20 to 30 traces.
Small reminder about the settings to mount a DPA attack with as few traces as possible, from our paper:
- Serialize the traces to consider each bit individually
- Attack first round SBox output, considering each target bit individually, not their Hamming weight.
- DPA works slightly better than CPA
If nevertheless you're trying CPA on Hamming weight of first round SBox, it will require about 160 traces, still quite doable.
Here is a simple DPA on the serialized traces written in Python. For larger datasets, better to use faster alternatives...
#!/usr/bin/env python
import glob
import numpy as np
import os
filenames = sorted(glob.glob('trace_1byte_write*small'))
#ntraces = len(filenames)
# Limit our number of traces for a faster result:
ntraces=30
nsamples = os.path.getsize(filenames[0])*8
#sample_start = 0
#sample_end = nsamples-1
# Focus on the first round for a faster result:
sample_start = 3000
sample_end = 3500
#targetbits=range(8)
# Focus on the first bit only for a faster result:
targetbits=[0]
traces = np.zeros([ntraces, nsamples],np.uint8)
plaintexts = np.zeros([ntraces, 16],np.uint8)
for filename in filenames[:ntraces]:
i,plain,cipher = filename[18:-10].split('_')
open(filename, 'rb').read()
traces[i,:] = np.unpackbits(np.fromfile(filename,dtype=np.uint8))
plaintexts[i,:] = [ord(x) for x in plain.decode('hex')]
SBOX = [0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67,
0x2b, 0xfe, 0xd7, 0xab, 0x76, 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59,
0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, 0xb7,
0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1,
0x71, 0xd8, 0x31, 0x15, 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05,
0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, 0x09, 0x83,
0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29,
0xe3, 0x2f, 0x84, 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, 0xd0, 0xef, 0xaa,
0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c,
0x9f, 0xa8, 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc,
0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 0xcd, 0x0c, 0x13, 0xec,
0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19,
0x73, 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee,
0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, 0xe0, 0x32, 0x3a, 0x0a, 0x49,
0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4,
0xea, 0x65, 0x7a, 0xae, 0x08, 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6,
0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, 0x70,
0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9,
0x86, 0xc1, 0x1d, 0x9e, 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e,
0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, 0x8c, 0xa1,
0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0,
0x54, 0xbb, 0x16]
# code derived from https://media.blackhat.com/eu-13/briefings/OFlynn/bh-eu-13-for-cheapstakes-oflynn-slides.pdf
# by Colin O'Flynn, slide #16
np.seterr(all='ignore')
for bnum in range(0, 16):
diffs = [0]*256
for key in range(0, 256):
mean1 = np.zeros([sample_end-sample_start,len(targetbits)])
mean0 = np.zeros([sample_end-sample_start,len(targetbits)])
num1 = [0]*len(targetbits)
num0 = [0]*len(targetbits)
for tnum in range(ntraces):
Hyp = SBOX[plaintexts[tnum, bnum] ^ key]
for targetbit in targetbits:
if (Hyp & (1 << targetbit)) != 0:
mean1[:,targetbit] = np.add(mean1[:,targetbit], traces[tnum,sample_start:sample_end])
num1[targetbit] += 1
else:
mean0[:,targetbit] = np.add(mean0[:,targetbit], traces[tnum,sample_start:sample_end])
num0[targetbit] += 1
mean1=np.divide(mean1, num1)
mean0=np.divide(mean0, num0)
absdiff = np.fabs(np.subtract(mean1, mean0))
diffs[key] = np.nanmax(absdiff)
print "%02x" % diffs.index(max(diffs)),
Running it:
time ./dpa.py 2b 7e 15 16 28 ae d2 a6 ab f7 15 88 09 cf 4f 3c real 0m1.196s user 0m1.184s sys 0m0.008s
One may use autocorrelation on a single trace to find the underlying AES structure and find the first round:
In red I've circled the spot where useful information leaks. I don't know why there is this unusual black cross.
You see here clearly the nine AES rounds with their MixColumn operation (last round doesn't have MixColumn and is therefore much smaller).
In a typical white-box setup it would leak somewhere further in the first big square because the first round key is merged in the tables of the next operations but here it's mixed immediately, as per the AES definition.
Open samples
In typical white-boxes, the key scheduling is precomputed when the executable is generated and the round keys are merged somehow into the processing, typically in some look-up tables.
But here it was not the case so, if someone knows exactly where to look, he may directly extract the key.
This knowledge requires so called open samples: we need to compile a few same challenges with other known keys.
In such case, we can correlate directly the keys with the binaries to see if the key bytes (or bits) are directly accessible somewhere in the binaries.
Once the spots have been identified, simply extract the key material from the initial challenge binary.
This is left as an exercise for the reader ;)
Conclusions
M/o/Vfuscator2 is wonderful at hiding a processing and someone has still to prove if the transform can be somehow automatically reversed into a more readable disassembly.
And so e.g. crackme2, a simple 15 line crackme in C, is probably pretty hard to crack.
But cryptography is not ordinary processing, and even if the compiled tiny-AES128-c is much larger and slower than the simple crackme2, we saw that it's relatively easy to crack automatically, with a generic attack agnostic to the MOV transform.
Lessons:
Yes, M/o/Vfuscator2 sounds very appealing to obfuscate your processing.
No, don't count on it to harden any cryptographic processing!
Comments?
You can reply to my tweet or contact me.
crackme2
Back to crackme2: we can use the same tracer as above to dump all memory writes.
Then trying all 1-char keys and diffing the traces give some info. Differences vary quite a lot but if we count for which char we've quite a unique number of diffs when compared with other traces, we get good candidates. When attacking byte N, those diff stats don't change no matter what are the N-1 chars.
E.g. here I compare with trace of '...a':
HEAD: => ['@', 'A', 'a', 'z', '{'] HEAD: a => ['@', 'A', 'N', 'a', 'n'] HEAD: aa => ['@', 'A', 'O', 'a', 'o'] HEAD: aaa => ['A', 'a', 'i'] HEAD: aaaa => ['@', 'A', 'N', 'a', 'n'] HEAD: aaaaa => ['@', 'A', 'S', 'a', 's'] etc
And the ugly script I used:
#!/usr/bin/env python
import os
import subprocess
def crack(c):
cmd = ['echo "'+ c + '" | ../../../pin -ifeellucky -t ./obj-ia32/tracer.so -- ./crackme2; exit 0']
output=subprocess.check_output(cmd, shell=True)
os.rename('trace-1byte-writes.bin', 'trace-1byte-writes.bin.'+c)
HEAD="aaaaa"
TEST1="a"
if not os.path.isfile('trace-1byte-writes.bin.'+HEAD+TEST1):
crack(HEAD+TEST1)
t1=open('trace-1byte-writes.bin.'+HEAD+TEST1, 'rb').read()
N1={}
for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@$?+=-<>() #%&*:;[]^_{}|~":
c2=HEAD+c
if not os.path.isfile('trace-1byte-writes.bin.'+c2):
crack(c2)
v=open('trace-1byte-writes.bin.'+c2, 'rb').read()
n1=0
for i in range(len(v)):
if t1[i]!=v[i]:
n1+=1
if n1 in N1:
N1[n1]+=","+c
else:
N1[n1]=c
print "HEAD: %15s => %s" % (HEAD, repr(sorted([N1[x] for x in N1 if len(N1[x])==1])))
The flag is {noinscount4u}
and is case insensitive.
When googling for the flag I saw @al7818376 found it already: https://twitter.com/al7818376/status/618109122722947072