-rwxr-xr-x 31143 saferewrite-20210903/analyze
#!/usr/bin/env python3
compilerlist = (
'clang -O1 -fwrapv -march=native',
'gcc -O3 -march=native -mtune=native',
)
numrandomtests = 16
avoidsimprocedures = (
'memcmp', # we want to test the real libc memcmp
)
typebits = {
'int8': 8,
'int16': 16,
'int32': 32,
'int64': 64,
}
import sys
import os
import shutil
import subprocess
import angr
import claripy
import multiprocessing
import random
import traceback
import functools
try:
os_cores = len(os.sched_getaffinity(0))
except AttributeError:
os_cores = multiprocessing.cpu_count()
os_cores = os.getenv('CORES',default=os_cores)
os_cores = int(os_cores)
if os_cores < 1: os_cores = 1
import resource
def cputime():
return resource.getrusage(resource.RUSAGE_SELF).ru_utime + resource.getrusage(resource.RUSAGE_CHILDREN).ru_utime
def notetime(builddir,what,time):
print('%s seconds %s %.6f' % (builddir,what,time))
sys.stdout.flush()
with open('%s/analysis/seconds' % builddir,'a') as f:
f.write('%s %.6f\n' % (what,time))
def note(builddir,conclusion,contents=None):
print('%s %s' % (builddir,conclusion))
sys.stdout.flush()
with open('%s/analysis/%s' % (builddir,conclusion),'w') as f:
if contents != None:
f.write(str(contents))
sys.setrecursionlimit(1000000)
startdir = os.getcwd()
assert all(x in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./' for x in startdir)
shutil.rmtree('build',ignore_errors=True)
os.makedirs('build')
primitives = []
for o in 'src',:
o = o.strip()
if o == '': continue
if not os.path.isdir(o): continue
if os.stat('%s' % o).st_mode & 0o1000 == 0o1000:
print('%s sticky, skipping' % o)
sys.stdout.flush()
continue
for p in sorted(os.listdir(o)):
if not os.path.isdir('%s/%s' % (o,p)): continue
if os.stat('%s/%s' % (o,p)).st_mode & 0o1000 == 0o1000:
print('%s/%s sticky, skipping' % (o,p))
sys.stdout.flush()
continue
if not os.path.exists('%s/%s/api' % (o,p)):
print('%s/%s/api nonexistent, skipping' % (o,p))
sys.stdout.flush()
continue
primitives += [(o,p)]
op_api = {}
for o,p in primitives:
inputs = []
outputs = []
funargs = []
funargtypes = []
funname = None
funret = None
funrettype = 'void'
with open('%s/%s/api' % (o,p)) as f:
for line in f:
line = line.split()
if len(line) == 0: continue
if line[0] == 'call':
funname = line[1]
if line[0] == 'return':
bitsperentry = typebits[line[1]]
csymbol = line[2]
assert all(c in 'abcdefghijklmnopqrstuvwxyz' for c in csymbol)
entries = 1
outputs += [(csymbol,bitsperentry,entries)]
funret = 'alloc_%s'%csymbol
funrettype = 'uint%d_t'%bitsperentry
if line[0] in ('in','out','inout'):
bitsperentry = typebits[line[1]]
csymbol = line[2]
assert all(c in 'abcdefghijklmnopqrstuvwxyz' for c in csymbol)
if len(line) == 3:
pointer = False
entries = 1
else:
pointer = True
entries = int(line[3])
if line[0] in ('in','inout'):
inputs += [(csymbol,bitsperentry,entries)]
if line[0] in ('out','inout'):
outputs += [(csymbol,bitsperentry,entries)]
if pointer:
funargs += ['alloc_%s'%csymbol]
funargtypes += ['uint%d_t *' % bitsperentry]
else:
funargs += ['*alloc_%s'%csymbol]
funargtypes += ['uint%d_t' % bitsperentry]
# XXX: support constant inputs
op_api[o,p] = inputs,outputs,funargs,funargtypes,funname,funret,funrettype
def input_example_str(inputs,x):
xstr = ''
xpos = 0
for csymbol,bitsperentry,entries in inputs:
for e in range(entries):
varname = 'in_%s_%d'%(csymbol,e)
xstr += '%s = %d\n' % (varname,x[xpos])
xpos += 1
assert xpos == len(x)
return xstr
def output_example_str(outputs,y):
ystr = ''
ypos = 0
for csymbol,bitsperentry,entries in outputs:
for e in range(entries):
varname = 'out_%s_%d'%(csymbol,e)
ystr += '%s = %d\n' % (varname,y[ypos])
ypos += 1
assert ypos == len(y)
return ystr
reservedfilenames = (
'library.so.1',
'analysis',
'analysis-execute',
'analysis-execute.c',
'analysis-valgrind',
'analysis-valgrind.c',
'analysis-angr',
'analysis-angr.c',
)
opimplementations = {}
for o,p in primitives:
opimplementations[o,p] = []
for i in sorted(os.listdir('%s/%s' % (o,p))):
implementationdir = '%s/%s/%s' % (o,p,i)
if not os.path.isdir(implementationdir): continue
if os.stat(implementationdir).st_mode & 0o1000 == 0o1000:
print('%s/%s/%s sticky, skipping' % (o,p,i))
continue
files = sorted(os.listdir(implementationdir))
for f in files:
ok = True
if f in reservedfilenames:
print('%s/%s/%s/%s reserved filename' % (o,p,i,f))
ok = False
if any(fi not in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.' for fi in f):
print('%s/%s/%s/%s prohibited character' % (o,p,i,f))
ok = False
if not ok: continue
opimplementations[o,p] += [i]
for compiler in compilerlist:
compilerword = compiler.replace(' ','_').replace('=','_')
builddir = 'build/%s/%s/%s' % (p,i,compilerword)
os.makedirs('build/%s/%s' % (p,i),exist_ok=True)
def compile(o,p,i,compiler):
compilerword = compiler.replace(' ','_').replace('=','_')
implementationdir = '%s/%s/%s' % (o,p,i)
builddir = 'build/%s/%s/%s' % (p,i,compilerword)
inputs,outputs,funargs,funargtypes,funname,funret,funrettype = op_api[o,p]
files = sorted(os.listdir(implementationdir))
cfiles = [x for x in files if x.endswith('.c')]
sfiles = [x for x in files if x.endswith('.s') or x.endswith('.S')]
files = cfiles + sfiles
shutil.copytree(implementationdir,builddir)
os.makedirs('%s/analysis' % builddir)
for bits in 8,16,32,64:
with open('%s/crypto_int%d.h' % (builddir,bits),'w') as f:
f.write('#include <inttypes.h>\n')
f.write('#define crypto_int%d int%d_t' % (bits,bits))
with open('%s/crypto_uint%d.h' % (builddir,bits),'w') as f:
f.write('#include <inttypes.h>\n')
f.write('#define crypto_uint%d uint%d_t' % (bits,bits))
for analysis in 'execute','valgrind','angr':
with open('%s/analysis-%s.c' % (builddir,analysis),'w') as f:
f.write('#include <stdio.h>\n')
f.write('#include <stdlib.h>\n')
f.write('#include <string.h>\n')
f.write('#include <inttypes.h>\n')
f.write('\n')
# function declaration
f.write('extern ')
if funrettype != None:
f.write('%s ' % funrettype)
f.write('%s(%s);\n' % (funname,','.join(funargtypes)))
f.write('\n')
for csymbol,bitsperentry,entries in inputs+outputs:
f.write('uint%d_t static_%s[%d];\n' % (bitsperentry,csymbol,entries))
f.write('\n')
f.write('int main(int argc,char **argv)\n')
f.write('{\n')
for csymbol,bitsperentry,entries in inputs:
f.write(' uint%d_t *alloc_%s = malloc(%d);\n' % (bitsperentry,csymbol,entries*bitsperentry/8))
for csymbol,bitsperentry,entries in outputs:
if (csymbol,bitsperentry,entries) not in inputs:
f.write(' uint%d_t *alloc_%s = malloc(%d);\n' % (bitsperentry,csymbol,entries*bitsperentry/8))
f.write('\n')
# XXX: resource limits
if analysis == 'execute':
for csymbol,bitsperentry,entries in inputs:
f.write(' for (long long i = 0;i < %d;++i) {\n' % entries)
f.write(' unsigned long long x;\n')
f.write(' if (scanf("%llu",&x) != 1) abort();\n')
f.write(' static_%s[i] = x;\n' % csymbol)
f.write(' }\n')
f.write('\n')
if analysis in ('execute','angr'):
for csymbol,bitsperentry,entries in inputs:
f.write(' for (long long i = 0;i < %d;++i)\n' % entries)
f.write(' alloc_%s[i] = static_%s[i];\n' % (csymbol,csymbol))
f.write('\n')
f.write(' ')
if funret != None:
f.write('%s[0] = ' % funret)
f.write('%s(%s);\n' % (funname,','.join(funargs)))
f.write('\n')
if analysis in ('execute','angr'):
for csymbol,bitsperentry,entries in outputs:
f.write(' for (long long i = 0;i < %d;++i)\n' % entries)
f.write(' static_%s[i] = alloc_%s[i];\n' % (csymbol,csymbol))
f.write('\n')
if analysis == 'execute':
for csymbol,bitsperentry,entries in outputs:
f.write(' for (long long i = 0;i < %d;++i) {\n' % entries)
f.write(' unsigned long long x = static_%s[i];\n' % csymbol)
f.write(' printf("%llu\\n",x);\n')
f.write(' }\n')
f.write(' fflush(stdout);\n')
f.write('\n')
f.write(' return 0;\n')
f.write('}\n')
# ----- compile
compiletime = -cputime()
objfiles = []
for f in files+['analysis-execute.c','analysis-valgrind.c','analysis-angr.c']:
command = '%s -Wall -fPIC -DCRYPTO_NAMESPACE(x)=x -c %s' % (compiler,f)
try:
proc = subprocess.Popen(command.split(),cwd=builddir,stdout=subprocess.PIPE,stderr=subprocess.STDOUT,universal_newlines=True)
out,err = proc.communicate()
except OSError:
note(builddir,'warning-compilefailed',traceback.format_exc())
return o,p,i,compiler,False
assert not err
if out != '':
note(builddir,'warning-compileoutput',out)
if proc.returncode:
note(builddir,'warning-compilefailed','exit code %s' % proc.returncode)
return o,p,i,compiler,False
if f in files:
objfiles += ['.'.join(f.split('.')[:-1]+['o'])]
compiletime += cputime()
notetime(builddir,'compile',compiletime)
# ----- link into executable
linktime = -cputime()
for analysis in 'execute','valgrind','angr':
static = True
if static:
command = 'gcc -no-pie -o analysis-%s analysis-%s.o' % (analysis,analysis)
command = command.split()
command += objfiles
else:
command = 'gcc -shared -Wl,-soname,library.so.1 -o library.so.1'
command = command.split()
command += objfiles
try:
proc = subprocess.Popen(command,cwd=builddir,stdout=subprocess.PIPE,stderr=subprocess.STDOUT,universal_newlines=True)
out,err = proc.communicate()
except OSError:
note(builddir,'warning-linkfailed',traceback.format_exc())
return o,p,i,compiler,False
if out != '':
note(builddir,'warning-linkoutput',out)
assert not err
if proc.returncode:
note(builddir,'warning-linkfailed','exit code %s' % proc.returncode)
return o,p,i,compiler,False
shutil.copy('%s/library.so.1' % builddir,'%s/library.so' % builddir)
command = 'gcc -no-pie -o analysis-%s analysis-%s.o -Wl,-rpath=%s/%s -L. -lrary' % (analysis,analysis,startdir,builddir)
command = command.split()
try:
proc = subprocess.Popen(command,cwd=builddir,stdout=subprocess.PIPE,stderr=subprocess.STDOUT,universal_newlines=True)
out,err = proc.communicate()
except OSError:
note(builddir,'warning-linkfailed',traceback.format_exc())
return o,p,i,compiler,False
if out != '':
note(builddir,'warning-linkoutput',out)
assert not err
if proc.returncode:
note(builddir,'warning-linkfailed','exit code %s' % proc.returncode)
return o,p,i,compiler,False
linktime += cputime()
notetime(builddir,'link',linktime)
return o,p,i,compiler,True
def wanttocompile():
for o,p in primitives:
for i in opimplementations[o,p]:
for compiler in compilerlist:
yield o,p,i,compiler
op_compiled = {}
for o,p in primitives:
op_compiled[o,p] = []
with multiprocessing.Pool(os_cores) as pool:
for o,p,i,compiler,ok in pool.starmap(compile,wanttocompile()):
if not ok: continue
op_compiled[o,p] += [(i,compiler)]
print('----- execute')
op_x = {}
for o,p in primitives:
inputs,outputs,funargs,funargtypes,funname,funret,funrettype = op_api[o,p]
op_x[o,p] = []
for execution in range(numrandomtests):
x = []
for csymbol,bitsperentry,entries in inputs:
for e in range(entries):
if execution == 0:
r = 0
elif execution == 1:
r = 2**bitsperentry-1
else:
r = random.randrange(2**bitsperentry)
x += [r]
op_x[o,p] += [x]
def execute(o,p,i,compiler):
compilerword = compiler.replace(' ','_').replace('=','_')
implementationdir = '%s/%s/%s' % (o,p,i)
builddir = 'build/%s/%s/%s' % (p,i,compilerword)
inputs,outputs,funargs,funargtypes,funname,funret,funrettype = op_api[o,p]
executetime = -cputime()
results = []
command = ['./analysis-execute']
for x in op_x[o,p]:
xstr = ''
for r in x: xstr += '%d\n'%r
try:
proc = subprocess.Popen(command,cwd=builddir,stdin=subprocess.PIPE,stdout=subprocess.PIPE,stderr=subprocess.STDOUT,universal_newlines=True)
ystr,err = proc.communicate(input=xstr)
except OSError:
note(builddir,'warning-executeerror',xstr)
return o,p,i,compiler,False
if proc.returncode != 0:
note(builddir,'warning-executefailed',xstr+'exit code %s' % proc.returncode)
return o,p,i,compiler,False
try:
y = [int(s) for s in ystr.splitlines()]
ypos = 0
for csymbol,bitsperentry,entries in outputs:
for e in range(entries):
assert y[ypos] >= 0
assert y[ypos] < 2**bitsperentry
ypos += 1
assert ypos == len(y)
except ValueError:
note(builddir,'warning-executebadformat',input_example_str(inputs,x)+output_example_str(outputs,y))
return o,p,i,compiler,False
results += [y]
executetime += cputime()
notetime(builddir,'execute',executetime)
return o,p,i,compiler,results
def wanttoexecute():
for o,p in primitives:
for i,compiler in op_compiled[o,p]:
yield o,p,i,compiler
opic_y = {}
with multiprocessing.Pool(os_cores) as pool:
for o,p,i,compiler,results in pool.starmap(execute,wanttoexecute()):
if results == False: continue
opic_y[o,p,i,compiler] = results
print('----- valgrind (can take some time)')
def valgrind(o,p,i,compiler):
compilerword = compiler.replace(' ','_').replace('=','_')
implementationdir = '%s/%s/%s' % (o,p,i)
builddir = 'build/%s/%s/%s' % (p,i,compilerword)
valgrindtime = -cputime()
command = ['valgrind','-q','--error-exitcode=99','./analysis-valgrind']
valgrindstatus = None
try:
proc = subprocess.Popen(command,cwd=builddir,stdout=subprocess.PIPE,stderr=subprocess.STDOUT,universal_newlines=True)
out,err = proc.communicate()
except OSError:
valgrindstatus = 'warning-valgrinderror'
if valgrindstatus == None:
assert not err
if proc.returncode == 99:
valgrindstatus = 'unsafe-valgrindfailure'
elif proc.returncode != 0:
valgrindstatus = 'warning-valgrinderror'
elif out.find('client request') >= 0:
valgrindstatus = 'unsafe-valgrindfailure'
if valgrindstatus != None:
note(builddir,valgrindstatus)
valgrindtime += cputime()
notetime(builddir,'valgrind',valgrindtime)
def wanttovalgrind():
for o,p in primitives:
for i,compiler in op_compiled[o,p]:
yield o,p,i,compiler
with multiprocessing.Pool(os_cores) as pool:
list(pool.starmap(valgrind,wanttovalgrind()))
print('----- unroll (can take tons of time)')
# XXX: could do this in parallel with valgrind
# XXX: unrolled can be huge; pass through disk instead of RAM
def values(terms,replacements):
# input: replacements mapping cache_key to integers
# output: dictionary V mapping cache_key to pairs (b,i) where i is a b-bit value
# output includes all terms
# _or_ output is None if terms use variables outside replacements
V = {}
def evaluate(t):
if t.cache_key in V:
return True
if t.op == 'BoolV':
V[t.cache_key] = 1,t.args[0]
return True
if t.op == 'BVV':
V[t.cache_key] = t.size(),t.args[0]
return True
if t.op == 'BVS':
if t.cache_key not in replacements: return False
V[t.cache_key] = t.size(),replacements[t.cache_key].args[0]
return True
if t.op == 'Extract':
assert len(t.args) == 3
top = t.args[0]
bot = t.args[1]
if not evaluate(t.args[2]): return False
x0 = V[t.args[2].cache_key]
assert x0[0] > top
assert top >= bot
assert bot >= 0
V[t.cache_key] = top+1-bot,((x0[1] & ((2<<top)-1)) >> bot)
return True
if t.op in ('SignExt','ZeroExt'):
assert len(t.args) == 2
if not evaluate(t.args[1]): return False
x0bits,x0 = V[t.args[1].cache_key]
extend = t.args[0]
assert extend >= 0
if t.op == 'SignExt':
if x0 >= (1<<(x0bits-1)):
x0 -= 1<<x0bits
x0 += 1<<(x0bits+extend)
V[t.cache_key] = x0bits+extend,x0
return True
for a in t.args:
if not evaluate(a): return False
x = [V[a.cache_key] for a in t.args]
if t.op == 'Concat':
y = 0
ybits = 0
for xbitsi,xi in x:
y <<= xbitsi
y += xi
ybits += xbitsi
V[t.cache_key] = ybits,y
return True
if t.op in ('__eq__','__ne__'):
assert len(x) == 2
assert x[0][0] == x[1][0]
if t.op == '__eq__': V[t.cache_key] = 1,(x[0][1]==x[1][1])
elif t.op == '__ne__': V[t.cache_key] = 1,(x[0][1]==x[1][1])
else: return False
return True
if t.op in ('__add__','__mul__','__sub__','__lshift__','LShR','__rshift__','__and__','__or__','__xor__'):
bits = x[0][0]
assert all(xi[0] == bits for xi in x)
if t.op == '__add__': reduction = (lambda s,t:(s+t)%(2**bits))
elif t.op == '__mul__': reduction = (lambda s,t:(s*t)%(2**bits))
elif t.op == '__sub__': reduction = (lambda s,t:(s-t)%(2**bits))
elif t.op == '__lshift__': reduction = (lambda s,t:(s<<t)%(2**bits))
elif t.op == 'LShR': reduction = (lambda s,t:(s>>t)%(2**bits))
elif t.op == '__rshift__':
def reduction(s,t):
flip = 2**(bits-1)
ssigned = (s ^ flip) - flip
tsigned = (t ^ flip) - flip
assert 0 <= tsigned
assert tsigned < bits
usigned = ssigned >> tsigned
return (usigned + flip) ^ flip
elif t.op == '__and__': reduction = (lambda s,t:s&t)
elif t.op == '__or__': reduction = (lambda s,t:s|t)
elif t.op == '__xor__': reduction = (lambda s,t:s^t)
else: return False
V[t.cache_key] = bits,functools.reduce(reduction,(xi[1] for xi in x))
return True
if t.op == '__invert__':
assert len(x) == 1
bits = x[0][0]
V[t.cache_key] = bits,(1<<bits)-1-x[0][1]
return True
if t.op == 'Not':
assert len(x) == 1
assert all(xi[0] == 1 for xi in x)
V[t.cache_key] = 1,1-x[0][1]
return True
if t.op in ('And','Or'):
assert all(xi[0] == 1 for xi in x)
if t.op == 'And': reduction = (lambda s,t:s*t)
elif t.op == 'Or': reduction = (lambda s,t:s+t-s*t)
else: return False
V[t.cache_key] = 1,functools.reduce(reduction,(xi[1] for xi in x))
return True
if t.op == 'If':
assert len(x) == 3
assert x[0][0] == 1
if x[0][1]:
V[t.cache_key] = x[1]
else:
V[t.cache_key] = x[2]
return True
if t.op in ('__le__','ULE','__lt__','ULT','__ge__','UGE','__gt__','UGT','SLE','SLT','SGE','SGT'):
assert len(x) == 2
bits = x[0][0]
assert bits == x[1][0]
flip = 2**(bits-1)
x0,x1 = x[0][1],x[1][1]
if t.op == '__le__': V[t.cache_key] = (1,x0<=x1)
elif t.op == 'ULE': V[t.cache_key] = (1,x0<=x1)
elif t.op == '__lt__': V[t.cache_key] = (1,x0<x1)
elif t.op == 'ULT': V[t.cache_key] = (1,x0<x1)
elif t.op == '__ge__': V[t.cache_key] = (1,x0>=x1)
elif t.op == 'UGE': V[t.cache_key] = (1,x0>=x1)
elif t.op == '__gt__': V[t.cache_key] = (1,x0>x1)
elif t.op == 'UGT': V[t.cache_key] = (1,x0>x1)
elif t.op == 'SLE': V[t.cache_key] = (1,(x0^flip)<=(x1^flip))
elif t.op == 'SLT': V[t.cache_key] = (1,(x0^flip)<(x1^flip))
elif t.op == 'SGE': V[t.cache_key] = (1,(x0^flip)>=(x1^flip))
elif t.op == 'SGT': V[t.cache_key] = (1,(x0^flip)>(x1^flip))
else: return False
return True
# XXX: add support for more
print('values: unsupported operation %s, falling back to Z3' % t.op)
return False
for t in terms:
if not evaluate(t): return None
return V
def unroll_print(outputs,unrolled,f):
walked = {}
def walk(t):
if t in walked: return walked[t]
if t.op == 'BoolV':
walknext = len(walked)+1
f.write('v%d = bool(%d)\n' % (walknext,t.args[0]))
elif t.op == 'BVV':
walknext = len(walked)+1
f.write('v%d = constant(%d,%d)\n' % (walknext,t.size(),t.args[0]))
elif t.op == 'BVS':
walknext = len(walked)+1
f.write('v%d = %s\n' % (walknext,t.args[0]))
elif t.op == 'Extract':
assert len(t.args) == 3
input = 'v%d' % walk(t.args[2])
walknext = len(walked)+1
f.write('v%d = Extract(%s,%d,%d)\n' % (walknext,input,t.args[0],t.args[1]))
elif t.op in ['SignExt','ZeroExt']:
assert len(t.args) == 2
input = 'v%d' % walk(t.args[1])
walknext = len(walked)+1
f.write('v%d = %s(%s,%d)\n' % (walknext,t.op,input,t.args[0]))
else:
inputs = ['v%d' % walk(a) for a in t.args]
walknext = len(walked)+1
f.write('v%d = %s(%s)\n' % (walknext,t.op,','.join(inputs)))
walked[t] = walknext
return walknext
for x in unrolled:
walk(x)
unrolledpos = 0
for csymbol,bitsperentry,entries in outputs:
for i in range(entries):
varname = 'out_%s_%d'%(csymbol,i)
f.write('%s = v%s\n' % (varname,walk(unrolled[unrolledpos])))
unrolledpos += 1
def unroll_inputvars(inputs):
result = []
for csymbol,bitsperentry,entries in inputs:
for i in range(entries):
varname = 'in_%s_%d'%(csymbol,i)
variable = claripy.BVS(varname,bitsperentry,explicit_name=True)
result += [(varname,variable)]
return result
# XXX: probably better to merge into unroll()
def unroll_worker(binary,inputs,outputs):
results = []
proj = angr.Project(binary,exclude_sim_procedures_list=avoidsimprocedures)
state = proj.factory.full_init_state()
state.options |= {angr.options.LAZY_SOLVES}
state.options |= {angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY}
state.options |= {angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS}
state.options -= {angr.options.SIMPLIFY_EXPRS}
state.options -= {angr.options.SIMPLIFY_REGISTER_WRITES}
state.options -= {angr.options.SIMPLIFY_MEMORY_WRITES}
state.options -= {angr.options.SIMPLIFY_REGISTER_READS}
state.options -= {angr.options.SIMPLIFY_MEMORY_READS}
for csymbol,bitsperentry,entries in inputs:
xaddr = proj.loader.find_symbol('static_%s'%csymbol).rebased_addr
for i in range(entries):
varname = 'in_%s_%d'%(csymbol,i)
variable = claripy.BVS(varname,bitsperentry,explicit_name=True)
if bitsperentry == 8:
state.mem[xaddr+i].char = variable
elif bitsperentry == 16:
state.mem[xaddr+2*i].short = variable
elif bitsperentry == 32:
state.mem[xaddr+4*i].int = variable
elif bitsperentry == 64:
state.mem[xaddr+8*i].long = variable
simgr = proj.factory.simgr(state)
simgr.run()
if len(simgr.errored) > 0:
return -1,False,simgr.errored
exits = simgr.deadended
assert len(exits) > 0
# cannot be safe if there are multiple exits
# for equivalence tests we'll merge exits below
mergedconstraints = []
for epos,e in enumerate(exits):
mergedconstraint = e.solver.true
for c in e.solver.constraints:
mergedconstraint = e.solver.And(mergedconstraint,c)
mergedconstraints += [mergedconstraint]
resultpos = 0
for csymbol,bitsperentry,entries in outputs:
xaddr = proj.loader.find_symbol('static_%s'%csymbol).rebased_addr
for i in range(entries):
if bitsperentry == 8:
xi = e.mem[xaddr+i].char.resolved
elif bitsperentry == 16:
xi = e.mem[xaddr+2*i].short.resolved
elif bitsperentry == 32:
xi = e.mem[xaddr+4*i].int.resolved
elif bitsperentry == 64:
xi = e.mem[xaddr+8*i].long.resolved
if epos == 0:
assert len(results) == resultpos
results += [xi]
else:
results[resultpos] = e.solver.If(mergedconstraint,xi,results[resultpos])
resultpos += 1
assert resultpos == len(results)
assert len(mergedconstraints) == len(exits)
ispartition = True
# are mergedconstraints a partition of all universes?
# i.e.: in each universe, exactly one of the constraints is satisfied?
s = claripy.Solver()
for c in mergedconstraints:
s.add(claripy.Not(c))
if s.satisfiable():
ispartition = False
for i in range(len(exits)):
for j in range(i):
s = claripy.Solver()
s.add(mergedconstraints[i])
s.add(mergedconstraints[j])
if s.satisfiable():
ispartition = False
return len(exits),ispartition,results
def unroll(o,p,i,compiler):
compilerword = compiler.replace(' ','_').replace('=','_')
implementationdir = '%s/%s/%s' % (o,p,i)
builddir = 'build/%s/%s/%s' % (p,i,compilerword)
inputs,outputs,funargs,funargtypes,funname,funret,funrettype = op_api[o,p]
unrolltime = -cputime()
numexits,ispartition,unrolled = unroll_worker('%s/analysis-angr'%builddir,inputs,outputs)
if numexits < 1:
note(builddir,'warning-unrollerror',unrolled)
return o,p,i,compiler,False
if not ispartition:
note(builddir,'warning-unrollnotpartition')
return o,p,i,compiler,False
if numexits > 1:
note(builddir,'unsafe-unrollsplit-%d'%numexits)
with open('%s/analysis/unrolled' % builddir,'w') as f:
unroll_print(outputs,unrolled,f)
okvars = set(vname for vname,v in unroll_inputvars(inputs))
usedvars = set(v for x in unrolled for v in x.variables)
if not usedvars.issubset(okvars):
note(builddir,'warning-unrollmem')
if not okvars.issubset(usedvars):
note(builddir,'warning-unusedinputs')
for x,y in zip(op_x[o,p],opic_y[o,p,i,compiler]):
# cpu gave us outputs y given inputs x
# does this match unrolled?
replacements = {}
xpos = 0
for csymbol,bitsperentry,entries in inputs:
for e in range(entries):
varname = 'in_%s_%d'%(csymbol,e)
variable = claripy.BVS(varname,bitsperentry,explicit_name=True)
replacements[variable.cache_key] = claripy.BVV(x[xpos],bitsperentry)
xpos += 1
assert xpos == len(x)
V = values(unrolled,replacements)
if V != None:
mismatch = all(yi == V[unrolledi.cache_key] for (yi,unrolledi) in zip(y,unrolled))
else:
# fall back on Z3 for figuring this out
s = claripy.Solver()
mismatch = claripy.false
for yi,unrolledi in zip(y,unrolled):
mismatch = claripy.Or(mismatch,unrolledi.replace_dict(replacements) != yi)
s.add(mismatch)
mismatch = s.satisfiable()
if mismatch:
notestr = ''
for vname,v in unroll_inputvars(inputs):
notestr += '%s = %s\n' % (vname,s.eval(v,1)[0])
pos = 0
for csymbol,bitsperentry,entries in outputs:
for e in range(entries):
varname = 'out_%s_%d'%(csymbol,e)
notestr += 'executed_%s = %s\n' % (varname,y[pos])
notestr += 'unrolled_%s = %s\n' % (varname,s.eval(unrolled[pos],1)[0])
pos += 1
note(builddir,'warning-unrollmismatch',notestr)
return o,p,i,compiler,False
unrolltime += cputime()
notetime(builddir,'unroll',unrolltime)
return o,p,i,compiler,unrolled
def wanttounroll():
for o,p in primitives:
for i,compiler in op_compiled[o,p]:
if (o,p,i,compiler) in opic_y:
yield o,p,i,compiler
opic_unrolled = {}
with multiprocessing.Pool(os_cores) as pool:
for o,p,i,compiler,unrolled in pool.starmap(unroll,wanttounroll()):
if unrolled == False: continue
opic_unrolled[o,p,i,compiler] = unrolled
print('----- compareunrolled (can take tons of time)')
def compareunrolled(o,p,i,compiler,source,sourcecompiler):
compilerword = compiler.replace(' ','_').replace('=','_')
sourcecompilerword = sourcecompiler.replace(' ','_').replace('=','_')
implementationdir = '%s/%s/%s' % (o,p,i)
builddir = 'build/%s/%s/%s' % (p,i,compilerword)
inputs,outputs,funargs,funargtypes,funname,funret,funrettype = op_api[o,p]
for pos,(x,y,z) in enumerate(zip(op_x[o,p],opic_y[o,p,i,compiler],opic_y[o,p,source,sourcecompiler])):
if y != z:
xstr = input_example_str(inputs,x)
note(builddir,'unsafe-randomtest-%d-differentfrom-%s-%s' % (pos,source,sourcecompilerword),xstr)
# could return at this point to save time
# but to help validate symbolic testing we also want to see symbolic testing fail
equivtime = -cputime()
u1 = opic_unrolled[o,p,source,sourcecompiler]
u2 = opic_unrolled[o,p,i,compiler]
assert len(u1) == len(u2)
# XXX: allow other equivalence-testing techniques
s = claripy.Solver()
different = claripy.false
for u1j,u2j in zip(u1,u2):
different = claripy.Or(different,u1j != u2j)
s.add(different)
try:
mismatch = s.satisfiable()
except claripy.errors.ClaripyZ3Error:
# avoid crashing on the sort of bug fixed in https://github.com/angr/angr/pull/2887
note(builddir,'warning-z3failed',traceback.format_exc())
return
if mismatch:
# angr documentation says:
# "If you don't add any constraints between two queries, the results will be consistent with each other."
example = ''
for vname,v in unroll_inputvars(inputs):
example += '%s = %s\n' % (vname,s.eval(v,1)[0])
unrolledpos = 0
for csymbol,bitsperentry,entries in outputs:
for i in range(entries):
varname = 'out_%s_%d'%(csymbol,i)
example += 'source_%s = %s\n' % (varname,s.eval(u1[unrolledpos],1)[0])
example += 'target_%s = %s\n' % (varname,s.eval(u2[unrolledpos],1)[0])
unrolledpos += 1
note(builddir,'unsafe-differentfrom-%s-%s' % (source,sourcecompilerword),example)
else:
note(builddir,'equals-%s-%s' % (source,sourcecompilerword))
equivtime += cputime()
notetime(builddir,'equiv',equivtime)
def wanttocompareunrolled():
for o,p in primitives:
for i,compiler in op_compiled[o,p]:
source = 'ref' # XXX: allow each implementation to choose source
if i == 'ref':
sourcecompiler = compilerlist[0] # XXX: maybe also allow choice
else:
sourcecompiler = compiler
if (o,p,i,compiler) not in opic_unrolled: continue
if (o,p,source,sourcecompiler) not in opic_unrolled: continue
# XXX: could also do self-tests
if (o,p,source,sourcecompiler) == (o,p,i,compiler): continue
yield o,p,i,compiler,source,sourcecompiler
with multiprocessing.Pool(os_cores) as pool:
list(pool.starmap(compareunrolled,wanttocompareunrolled()))