Package vivisect :: Package impemu :: Module emulator
[hide private]
[frames] | no frames]

Source Code for Module vivisect.impemu.emulator

  1  import struct 
  2  import traceback 
  3  import itertools 
  4   
  5  import envi 
  6  import envi.bits as e_bits 
  7  import envi.memory as e_mem 
  8  import envi.registers as e_reg 
  9   
 10  import visgraph.pathcore as vg_path 
 11   
 12  from vivisect.const import * 
 13   
 14  # Pre-initialize a stack memory bytes 
 15  init_stack_map = '' 
 16  for i in xrange(8192/4): 
 17      init_stack_map += struct.pack("<I", 0xfefe0000+(i*4)) 
 18   
19 -def imphook(impname):
20 21 def imptemp(f): 22 f.__imphook__ = impname 23 return f
24 25 return imptemp 26
27 -class WorkspaceEmulator:
28 29 taintregs = [] 30
31 - def __init__(self, vw, logwrite=False, logread=False):
32 33 self.vw = vw 34 self.funcva = None # Set if using runFunction 35 36 self.hooks = {} 37 self.taints = {} 38 self.taintva = itertools.count(0x41560000, 8192) 39 40 self.uninit_use = {} 41 self.logwrite = logwrite 42 self.logread = logread 43 self.path = self.newCodePathNode() 44 self.curpath = self.path 45 self.opcache = {} 46 self.emumon = None 47 self.psize = self.getPointerSize() 48 49 self.stack_map_mask = e_bits.sign_extend(0xfff00000, 4, vw.psize) 50 self.stack_map_base = e_bits.sign_extend(0xbfb00000, 4, vw.psize) 51 self.stack_pointer = self.stack_map_base + 4096 52 53 # Possibly need an "options" API? 54 self._safe_mem = True # Should we be forgiving about memory accesses? 55 self._func_only = True # is this emulator meant to stay in one function scope? 56 57 # Map in a memory map for the stack 58 self.addMemoryMap(self.stack_map_base, 6, "[stack]", init_stack_map) 59 60 # Map in all the memory associated with the workspace 61 for va, size, perms, fname in vw.getMemoryMaps(): 62 offset, bytes = vw.getByteDef(va) 63 self.addMemoryMap(va, perms, fname, bytes) 64 65 for regidx in self.taintregs: 66 rname = self.getRegisterName(regidx) 67 regval = self.setVivTaint( 'uninitreg', regidx ) 68 self.setRegister(regidx, regval) 69 70 self.setStackCounter(self.stack_pointer) 71 72 # Create some pre-made taints for positive stack indexes 73 # NOTE: This is *ugly* for speed.... 74 taints = [ self.setVivTaint('funcstack', i * self.psize) for i in xrange(20) ] 75 taintbytes = ''.join([ e_bits.buildbytes(taint,self.psize) for taint in taints ]) 76 self.writeMemory(self.stack_pointer, taintbytes ) 77 78 for name in dir(self): 79 val = getattr(self, name, None) 80 if val == None: 81 continue 82 83 impname = getattr(val, '__imphook__',None) 84 if impname == None: 85 continue 86 87 self.hooks[impname] = val
88
89 - def getPathProp(self, key):
90 ''' 91 Retrieve a named value from the current code path context. 92 ''' 93 return vg_path.getNodeProp(self.curpath, key)
94
95 - def setPathProp(self, key, value):
96 """ 97 Set a named value which is only relevant for the current code path. 98 """ 99 return vg_path.setNodeProp(self.curpath, key, value)
100
101 - def setEmulationMonitor(self, emumon):
102 """ 103 Snap in an emulation monitor. (see EmulationMonitor doc from vivisect.impemu) 104 """ 105 self.emumon = emumon
106
107 - def parseOpcode(self, pc):
108 # We can make an opcode *faster* with the workspace because of 109 # getByteDef etc... use it. 110 op = self.opcache.get(pc) 111 if op == None: 112 op = envi.Emulator.parseOpcode(self, pc) 113 self.opcache[pc] = op 114 return op
115
116 - def checkCall(self, starteip, endeip, op):
117 """ 118 Check if this was a call, and if so, do the required 119 import emulation and such... 120 """ 121 iscall = bool(op.iflags & envi.IF_CALL) 122 if iscall: 123 api = self.getCallApi(endeip) 124 rtype,rname,convname,callname,funcargs = api 125 126 callconv = self.getCallingConvention(convname) 127 argv = callconv.getCallArgs(self, len(funcargs)) 128 129 ret = None 130 if self.emumon != None: 131 try: 132 ret = self.emumon.apicall(self, op, endeip, api, argv) 133 except Exception, e: 134 self.emumon.logAnomaly(endeip, "%s.apicall failed: %s" % (self.emumon.__class__.__name__, e)) 135 136 hook = self.hooks.get(callname) 137 if ret == None and hook: 138 hook( self, callconv, api, argv ) 139 140 else: 141 142 if ret == None: 143 ret = self.setVivTaint('apicall', (op,endeip,api,argv)) 144 145 callconv.execCallReturn( self, ret, len(funcargs) ) 146 147 # Either way, if it's a call PC goes to next instruction 148 if self._func_only: 149 self.setProgramCounter(starteip+len(op)) 150 151 return iscall
152
153 - def newCodePathNode(self, parent=None, bva=None):
154 ''' 155 NOTE: Right now, this is only called from the actual branch state which 156 needs it. it must stay that way for now (register context is being copied 157 for symbolic emulator...) 158 ''' 159 props = { 160 'bva':bva, # the entry virtual address for this branch 161 'valist':[], # the virtual addresses in this node in order 162 'calllog':[], # FIXME is this even used? 163 'readlog':[], # a log of all memory reads from this block 164 'writelog':[],# a log of all memory writes from this block 165 } 166 ret = vg_path.newPathNode(parent=parent, **props) 167 return ret
168
169 - def getBranchNode(self, node, bva):
170 ''' 171 If a node exists already for the specified branch, return it. Otherwise, 172 create a new one and return that... 173 ''' 174 for knode in vg_path.getNodeKids(node): 175 if vg_path.getNodeProp(knode, 'bva') == bva: 176 return knode 177 return self.newCodePathNode(node, bva)
178
179 - def checkBranches(self, starteip, endeip, op):
180 """ 181 This routine gets the current branch list for this opcode, adds branch 182 entries to the current path, and updates current path as needed 183 (returns a list of (va, CodePath) tuples. 184 """ 185 186 ret = [] 187 # Add all the known branches to the list 188 blist = op.getBranches(emu=self) 189 190 # FIXME this should actually check for conditional... 191 # If there is more than one branch target, we need a new code block 192 if len(blist) > 1: 193 for bva,bflags in blist: 194 if bva == None: 195 print "Unresolved branch even WITH an emulator?" 196 continue 197 198 bpath = self.getBranchNode(self.curpath, bva) 199 ret.append((bva, bpath)) 200 201 return ret
202
203 - def stepi(self):
204 # NOTE: when we step, we *always* want to be stepping over calls 205 # (and possibly import emulate them) 206 starteip = self.getProgramCounter() 207 208 # parse out an opcode 209 op = self.parseOpcode(starteip) 210 if self.emumon: 211 self.emumon.prehook(self, op, starteip) 212 213 # Execute the opcode 214 self.executeOpcode(op) 215 vg_path.getNodeProp(self.curpath, 'valist').append(starteip) 216 217 endeip = self.getProgramCounter() 218 219 if self.emumon: 220 self.emumon.posthook(self, op, endeip) 221 222 if not self.checkCall(starteip, endeip, op): 223 self.checkBranches(starteip, endeip, op)
224
225 - def runFunction(self, funcva, stopva=None, maxhit=None, maxloop=None):
226 """ 227 This is a utility function specific to WorkspaceEmulation (and impemu) that 228 will emulate, but only inside the given function. You may specify a stopva 229 to return once that location is hit. 230 """ 231 232 self.funcva = funcva 233 234 # Let the current (should be base also) path know where we are starting 235 vg_path.setNodeProp(self.curpath, 'bva', funcva) 236 237 hits = {} 238 todo = [(funcva,self.getEmuSnap(),self.path),] 239 vw = self.vw # Save a dereference many many times 240 241 while len(todo): 242 243 va,esnap,self.curpath = todo.pop() 244 245 self.setEmuSnap(esnap) 246 247 self.setProgramCounter(va) 248 249 # Check if we are beyond our loop max... 250 if maxloop != None: 251 lcount = vg_path.getPathLoopCount(self.curpath, 'bva', va) 252 if lcount > maxloop: 253 continue 254 255 while True: 256 257 starteip = self.getProgramCounter() 258 259 if not vw.isValidPointer(starteip): 260 break 261 262 if starteip == stopva: 263 return 264 265 # Check straight hit count... 266 if maxhit != None: 267 h = hits.get(starteip, 0) 268 h += 1 269 if h > maxhit: 270 break 271 hits[starteip] = h 272 273 # If we ran out of path (branches that went 274 # somewhere that we couldn't follow? 275 if self.curpath == None: 276 break 277 278 try: 279 280 # FIXME unify with stepi code... 281 op = self.parseOpcode(starteip) 282 if self.emumon: 283 self.emumon.prehook(self, op, starteip) 284 285 # Execute the opcode 286 self.executeOpcode(op) 287 vg_path.getNodeProp(self.curpath, 'valist').append(starteip) 288 289 endeip = self.getProgramCounter() 290 291 if self.emumon: 292 self.emumon.posthook(self, op, endeip) 293 294 iscall = self.checkCall(starteip, endeip, op) 295 296 # If it wasn't a call, check for branches, if so, add them to 297 # the todo list and go around again... 298 if not iscall: 299 blist = self.checkBranches(starteip, endeip, op) 300 if len(blist): 301 # pc in the snap will be wrong, but over-ridden at restore 302 esnap = self.getEmuSnap() 303 for bva,bpath in blist: 304 todo.append((bva, esnap, bpath)) 305 break 306 307 # If we enounter a procedure exit, it doesn't 308 # matter what EIP is, we're done here. 309 if op.iflags & envi.IF_RET: 310 vg_path.setNodeProp(self.curpath, 'cleanret', True) 311 break 312 313 except Exception, e: 314 #traceback.print_exc() 315 if self.emumon != None: 316 self.emumon.logAnomaly(starteip, str(e)) 317 318 break # If we exc during execution, this branch is dead.
319
320 - def getCallApi(self, va):
321 ''' 322 Retrieve an API definition from either the vivisect workspace 323 ( if the call target is a function within the workspace ) or 324 the impapi definition subsystem ( if the call target is a known 325 import definition ) 326 ''' 327 vw = self.vw 328 ret = None 329 330 if vw.isFunction(va): 331 ret = vw.getFunctionApi(va) 332 if ret != None: 333 return ret 334 335 else: 336 337 taint = self.getVivTaint(va) 338 if taint: 339 tva,ttype,tinfo = taint 340 341 if ttype == 'import': 342 lva,lsize,ltype,linfo = tinfo 343 ret = vw.getImpApi( linfo ) 344 345 elif ttype == 'dynfunc': 346 libname,funcname = tinfo 347 ret = vw.getImpApi('%s.%s' % (libname,funcname)) 348 349 if ret: 350 return ret 351 352 defcall = vw.getMeta("DefaultCall") 353 return ('int', None, defcall, 'UnknownApi', () )
354
355 - def nextVivTaint(self):
356 # One page into the new taint range 357 return self.taintva.next() + 4096
358
359 - def setVivTaint(self, typename, taint):
360 ''' 361 Set a taint in the emulator. Returns the new value for 362 the created taint. 363 ''' 364 va = self.nextVivTaint() 365 self.taints[ va & 0xffffe000 ] = (va,typename,taint) 366 return va
367
368 - def getVivTaint(self, va):
369 ''' 370 Retrieve a previously registered taint ( this will automagically 371 mask values down and allow you to retrieve "near taint" values.) 372 ''' 373 return self.taints.get( va & 0xffffe000 )
374
375 - def reprVivTaint(self, taint):
376 ''' 377 For the base "known" taint types, return a humon readable string 378 to represent the value of the taint. 379 ''' 380 va,ttype,tinfo = taint 381 if ttype == 'uninitreg': 382 return self.getRegisterName(tinfo) 383 384 if ttype == 'import': 385 lva,lsize,ltype,linfo = tinfo 386 return linfo 387 388 if ttype == 'dynlib': 389 libname = tinfo 390 return libname 391 392 if ttype == 'dynfunc': 393 libname,funcname = tinfo 394 return '%s.%s' % (libname,funcname) 395 396 if ttype == 'funcstack': 397 stackoff = tinfo 398 if self.funcva: 399 flocal = self.vw.getFunctionLocal(self.funcva, stackoff) 400 if flocal != None: 401 typename,argname = flocal 402 return argname 403 404 o = '+' 405 if stackoff < 0: 406 o = '-' 407 408 return 'sp%s%d' % (o, abs(stackoff)) 409 410 if ttype == 'apicall': 411 op,pc,api,argv = tinfo 412 rettype,retname,callconv,callname,callargs = api 413 callstr = self.reprVivValue( pc ) 414 argsstr = ','.join([ self.reprVivValue( x ) for x in argv]) 415 return '%s(%s)' % (callstr,argsstr) 416 417 return 'taint: 0x%.8x %s %r' % (va, ttype, tinfo)
418
419 - def reprVivValue(self, val):
420 ''' 421 Return a humon readable string which is the best description for 422 the given value ( given knowledge of the workspace, emu, 423 and taint subsystems ). 424 ''' 425 if self.vw.isFunction(val): 426 thunk = self.vw.getFunctionMeta(val,'Thunk') 427 if thunk: 428 return thunk 429 430 vivname = self.vw.getName(val) 431 if vivname: 432 return vivname 433 434 taint = self.getVivTaint(val) 435 if taint: 436 return self.reprVivTaint(taint) 437 438 stackoff = self.getStackOffset(val) 439 if stackoff != None: 440 funclocal = self.vw.getFunctionLocal(self.funcva, stackoff) 441 if funclocal != None: 442 typename,varname = funclocal 443 return varname 444 445 if val < 4096: 446 return str(val) 447 448 return '0x%.8x' % val
449
450 - def _useVirtAddr(self, va):
451 taint = self.getVivTaint(va) 452 if taint == None: 453 return 454 455 tva,ttype,tinfo = taint 456 457 if ttype == 'uninitreg': 458 self.logUninitRegUse(tinfo)
459
460 - def writeMemory(self, va, bytes):
461 """ 462 Try to write the bytes to the memory object, otherwise, dont' 463 complain... 464 """ 465 if self.logwrite: 466 wlog = vg_path.getNodeProp(self.curpath, 'writelog') 467 wlog.append((self.getProgramCounter(),va,bytes)) 468 469 self._useVirtAddr( va ) 470 471 # It's totally ok to write to invalid memory during the 472 # emulation pass (as long as safe_mem is true...) 473 probeok = self.probeMemory(va, len(bytes), e_mem.MM_WRITE) 474 if self._safe_mem and not probeok: 475 return 476 477 return e_mem.MemoryObject.writeMemory(self, va, bytes)
478
479 - def logUninitRegUse(self, regid):
480 self.uninit_use[regid] = True
481
482 - def getUninitRegUse(self):
483 return self.uninit_use.keys()
484
485 - def readMemory(self, va, size):
486 487 if self.logread: 488 rlog = vg_path.getNodeProp(self.curpath, 'readlog') 489 rlog.append((self.getProgramCounter(),va,size)) 490 491 # If they read an import entry, start a taint... 492 loc = self.vw.getLocation(va) 493 if loc != None: 494 lva, lsize, ltype, ltinfo = loc 495 if ltype == LOC_IMPORT and lsize == size: # They just read an import. 496 ret = self.setVivTaint('import', loc) 497 return e_bits.buildbytes(ret, lsize) 498 499 self._useVirtAddr(va) 500 501 # Read from the emulator's pages if we havent resolved it yet 502 probeok = self.probeMemory(va, size, e_mem.MM_READ) 503 if self._safe_mem and not probeok: 504 return 'A' * size 505 506 return e_mem.MemoryObject.readMemory(self, va, size)
507 508 # Some APIs for telling if pointers are in runtime memory regions 509
510 - def isUninitStack(self, val):
511 """ 512 If val is a numerical value in the same memory page 513 as the un-initialized stack values return True 514 """ 515 #NOTE: If uninit_stack_byte changes, so must this! 516 if (val & 0xfffff000) == 0xfefef000: 517 return True 518 return False
519
520 - def isStackPointer(self, va):
521 return (va & self.stack_map_mask) == self.stack_map_base
522
523 - def getStackOffset(self, va):
524 if (va & self.stack_map_mask) == self.stack_map_base: 525 return va - self.stack_pointer
526