Package envi :: Package archs :: Package amd64 :: Module disasm
[hide private]
[frames] | no frames]

Source Code for Module envi.archs.amd64.disasm

  1  import struct 
  2   
  3  import envi 
  4  import envi.bits as e_bits 
  5  import envi.archs.i386 as e_i386 
  6  import envi.archs.i386.disasm as ed_i386 
  7  import opcode64 as opcode86 
  8  all_tables = opcode86.tables86 
  9   
 10  from envi.archs.i386.disasm import iflag_lookup, operand_range, priv_lookup, \ 
 11          i386Opcode, i386ImmOper, i386RegOper, i386ImmMemOper, i386RegMemOper, \ 
 12          i386SibOper 
 13  from envi.archs.amd64.regs import * 
 14   
 15  # Pre generate these for fast lookup. Because our REX prefixes have the same relative 
 16  # bit relationship to eachother, we can cheat a little... 
 17  amd64_prefixes = list(e_i386.i386_prefixes) 
 18  amd64_prefixes[0x40] = (0x10 << 16) 
 19  amd64_prefixes[0x41] = (0x11 << 16) 
 20  amd64_prefixes[0x42] = (0x12 << 16) 
 21  amd64_prefixes[0x43] = (0x13 << 16) 
 22  amd64_prefixes[0x44] = (0x14 << 16) 
 23  amd64_prefixes[0x45] = (0x15 << 16) 
 24  amd64_prefixes[0x46] = (0x16 << 16) 
 25  amd64_prefixes[0x47] = (0x17 << 16) 
 26  amd64_prefixes[0x48] = (0x18 << 16) 
 27  amd64_prefixes[0x49] = (0x19 << 16) 
 28  amd64_prefixes[0x4a] = (0x1a << 16) 
 29  amd64_prefixes[0x4b] = (0x1b << 16) 
 30  amd64_prefixes[0x4c] = (0x1c << 16) 
 31  amd64_prefixes[0x4d] = (0x1d << 16) 
 32  amd64_prefixes[0x4e] = (0x1e << 16) 
 33  amd64_prefixes[0x4f] = (0x1f << 16) 
 34   
 35  # NOTE: some notes from the intel manual... 
 36  # REX.W overrides 66, but alternate registers (via REX.B etc..) can have 66 to be 16 bit.. 
 37  # REX.R only modifies reg for GPR/SSE(SIMD)/ctrl/debug addressing modes. 
 38  # REX.X only modifies the SIB index value 
 39  # REX.B modifies modrm r/m field, or SIB base (if SIB present), or opcode reg. 
 40  # We inherit all the regular intel prefixes... 
 41  PREFIX_REX   = 0x100000 # Shows that the rex prefix is present 
 42  PREFIX_REX_B = 0x010000 # Bit 0 in REX prefix (0x41) means ModR/M r/m field, SIB base, or opcode reg 
 43  PREFIX_REX_X = 0x020000 # Bit 1 in REX prefix (0x42) means SIB index extension 
 44  PREFIX_REX_R = 0x040000 # Bit 2 in REX prefix (0x44) means ModR/M reg extention 
 45  PREFIX_REX_W = 0x080000 # Bit 3 in REX prefix (0x48) means 64 bit operand 
 46   
 47  REX_BUMP = 8 
 48  MODE_16 = 0 
 49  MODE_32 = 1 
 50  MODE_64 = 2 
 51   
52 -class Amd64RipRelOper(envi.DerefOper):
53 - def __init__(self, imm, tsize):
54 self.imm = imm 55 self.tsize = tsize 56 self._is_deref = True
57
58 - def getOperValue(self, op, emu=None):
59 if self._is_deref == False: # Special lea behavior 60 return self.getOperAddr(op) 61 if emu == None: return None 62 return emu.readMemValue(self.getOperAddr(op, emu), self.tsize)
63
64 - def setOperValue(self, op, emu, val):
65 emu.writeMemValue(self.getOperAddr(op, emu), val, self.tsize)
66
67 - def getOperAddr(self, op, emu=None):
68 return op.va + op.size + self.imm
69
70 - def isDeref(self):
71 # The disassembler may reach in and set this (if lea...) 72 return self._is_deref
73
74 - def isDiscrete(self):
75 return True
76
77 - def render(self, mcanv, op, idx):
78 destva = op.va + op.size + self.imm 79 sym = mcanv.syms.getSymByAddr(destva) 80 81 mcanv.addNameText(e_i386.sizenames[self.tsize]) 82 mcanv.addText(" [") 83 mcanv.addNameText("rip", typename="registers") 84 85 if self.imm > 0: 86 mcanv.addText(" + ") 87 if sym != None: 88 mcanv.addVaText("$%s" % repr(sym), destva) 89 else: 90 mcanv.addNameText(str(self.imm)) 91 elif self.imm < 0: 92 mcanv.addText(" - ") 93 if sym != None: 94 mcanv.addVaText("$%s" % repr(sym), destva) 95 else: 96 mcanv.addNameText(str(abs(self.imm))) 97 mcanv.addText("]")
98
99 - def repr(self, op):
100 return "[rip + %d]" % self.imm
101
102 -class Amd64Disasm(e_i386.i386Disasm):
103
104 - def __init__(self):
105 e_i386.i386Disasm.__init__(self) 106 self._dis_oparch = envi.ARCH_AMD64 107 self._dis_prefixes = amd64_prefixes 108 self._dis_regctx = Amd64RegisterContext() 109 self.ptrsize = 8 110 111 # Over-ride these which are in use by the i386 version of the ASM 112 self.ROFFSETMMX = e_i386.getRegOffset(amd64regs, "mm0") 113 self.ROFFSETSIMD = e_i386.getRegOffset(amd64regs, "xmm0") 114 self.ROFFSETDEBUG = e_i386.getRegOffset(amd64regs, "debug0") 115 self.ROFFSETCTRL = e_i386.getRegOffset(amd64regs, "ctrl0") 116 self.ROFFSETTEST = e_i386.getRegOffset(amd64regs, "test0") 117 self.ROFFSETSEG = e_i386.getRegOffset(amd64regs, "es") 118 self.ROFFSETFPU = e_i386.getRegOffset(amd64regs, "st0")
119 120 # NOTE: Technically, the REX must be the *last* prefix specified 121
122 - def _dis_calc_tsize(self, opertype, prefixes, operflags):
123 """ 124 Use the oper type and prefixes to decide on the tsize for 125 the operand. 126 """ 127 128 mode = MODE_32 129 130 sizelist = opcode86.OPERSIZE.get(opertype, None) 131 if sizelist == None: 132 raise "OPERSIZE FAIL" 133 134 if operflags & opcode86.OP_64AUTO: 135 mode = MODE_64 136 137 # NOTE: REX takes precedence over 66 138 # (see section 2.2.1.2 in Intel 2a) 139 if prefixes & PREFIX_REX_W: 140 141 mode = MODE_64 142 143 elif prefixes & e_i386.PREFIX_OP_SIZE: 144 145 mode = MODE_16 146 147 return sizelist[mode]
148
149 - def disasm(self, bytez, offset, va):
150 151 # Stuff for opcode parsing 152 tabdesc = all_tables[0] # A tuple (optable, shiftbits, mask byte, sub, max) 153 startoff = offset # Use startoff as a size knob if needed 154 155 # Stuff we'll be putting in the opcode object 156 optype = None # This gets set if we successfully decode below 157 mnem = None 158 operands = [] 159 160 prefixes = 0 161 162 while True: 163 164 obyte = ord(bytez[offset]) 165 166 # This line changes in 64 bit mode 167 p = self._dis_prefixes[obyte] 168 if p == None: 169 break 170 if obyte == 0x66 and ord(bytez[offset+1]) == 0x0f: 171 break 172 prefixes |= p 173 offset += 1 174 continue 175 176 #pdone = False 177 while True: 178 179 obyte = ord(bytez[offset]) 180 181 #print "OBYTE",hex(obyte) 182 if (obyte > tabdesc[4]): 183 #print "Jumping To Overflow Table:", tabdesc[5] 184 tabdesc = all_tables[tabdesc[5]] 185 186 tabidx = ((obyte - tabdesc[3]) >> tabdesc[1]) & tabdesc[2] 187 #print "TABIDX: %d" % tabidx 188 opdesc = tabdesc[0][tabidx] 189 #print 'OPDESC: %s' % repr(opdesc) 190 191 # Hunt down multi-byte opcodes 192 nexttable = opdesc[0] 193 #print "NEXT",nexttable,hex(obyte) 194 if nexttable != 0: # If we have a sub-table specified, use it. 195 #print "Multi-Byte Next Hop For",hex(obyte),opdesc[0] 196 tabdesc = all_tables[nexttable] 197 198 # In the case of 66 0f, the next table is *already* assuming we ate 199 # the 66 *and* the 0f... oblidge them. 200 if obyte == 0x66 and ord(bytez[offset+1]) == 0x0f: 201 offset += 1 202 203 # Account for the table jump we made 204 offset += 1 205 206 continue 207 208 # We are now on the final table... 209 #print repr(opdesc) 210 mnem = opdesc[6] 211 optype = opdesc[1] 212 if tabdesc[2] == 0xff: 213 offset += 1 # For our final opcode byte 214 break 215 216 if optype == 0: 217 #print tabidx 218 #print opdesc 219 #print "OPTTYPE 0" 220 raise envi.InvalidInstruction(bytez=bytez[startoff:startoff+16], va=va) 221 222 operoffset = 0 223 # Begin parsing operands based off address method 224 for i in operand_range: 225 226 oper = None # Set this if we end up with an operand 227 osize = 0 228 229 # Pull out the operand description from the table 230 operflags = opdesc[i] 231 opertype = operflags & opcode86.OPTYPE_MASK 232 addrmeth = operflags & opcode86.ADDRMETH_MASK 233 234 # If there are no more operands, break out of the loop! 235 if operflags == 0: 236 break 237 238 #print "ADDRTYPE: %.8x OPERTYPE: %.8x" % (addrmeth, opertype) 239 240 # handles tsize calculations including new REX prefixes 241 tsize = self._dis_calc_tsize(opertype, prefixes, operflags) 242 243 #print hex(opertype),hex(addrmeth),hex(tsize) 244 245 246 # If addrmeth is zero, we have operands embedded in the opcode 247 if addrmeth == 0: 248 osize = 0 249 oper = self.ameth_0(operflags, opdesc[5+i], tsize, prefixes) 250 251 else: 252 #print "ADDRTYPE",hex(addrmeth) 253 ameth = self._dis_amethods[addrmeth >> 16] 254 #print "AMETH",ameth 255 if ameth == None: 256 raise Exception("Implement Addressing Method 0x%.8x" % addrmeth) 257 258 # NOTE: Depending on your addrmethod you may get beginning of operands, or offset 259 try: 260 if addrmeth == opcode86.ADDRMETH_I or addrmeth == opcode86.ADDRMETH_J: 261 osize, oper = ameth(bytez, offset+operoffset, tsize, prefixes, operflags) 262 263 # If we are a sign extended immediate and not the same as the other operand, 264 # do the sign extension during disassembly so nothing else has to worry about it.. 265 if len(operands) and tsize != operands[-1].tsize: 266 # Check if we are an explicitly signed operand *or* REX.W 267 if operflags & opcode86.OP_SIGNED or prefixes & PREFIX_REX_W: 268 otsize = operands[-1].tsize 269 oper.imm = e_bits.sign_extend(oper.imm, oper.tsize, otsize) 270 oper.tsize = otsize 271 272 else: 273 osize, oper = ameth(bytez, offset, tsize, prefixes, operflags) 274 275 except struct.error, e: 276 # Catch struct unpack errors due to insufficient data length 277 raise envi.InvalidInstruction(bytez=bytez[startoff:startoff+16]) 278 279 if oper != None: 280 # This is a filty hack for now... 281 oper._dis_regctx = self._dis_regctx 282 operands.append(oper) 283 284 operoffset += osize 285 286 # Pull in the envi generic instruction flags 287 iflags = iflag_lookup.get(optype, 0) | self._dis_oparch 288 289 if priv_lookup.get(mnem, False): 290 iflags |= envi.IF_PRIV 291 292 # Lea will have a reg-mem/sib operand with _is_deref True, but should be false 293 if optype == opcode86.INS_LEA: 294 operands[1]._is_deref = False 295 296 ret = i386Opcode(va, optype, mnem, prefixes, (offset-startoff)+operoffset, operands, iflags) 297 298 return ret
299
300 - def parse_modrm(self, byte, prefixes=0):
301 # Pass in a string with an offset for speed rather than a new string 302 mod = (byte >> 6) & 0x3 303 reg = (byte >> 3) & 0x7 304 rm = byte & 0x7 305 306 if prefixes & PREFIX_REX_R: 307 reg |= 0b1000 308 309 if prefixes & PREFIX_REX_B: 310 if not (mod != 3 and rm == 4): # if not SIB 311 rm |= 0b1000 312 313 #print "MOD/RM",hex(byte),mod,reg,rm 314 return (mod,reg,rm)
315
316 - def parse_sib(self, bytez, offset, mod, prefixes=0):
317 size, scale, index, base, imm = e_i386.i386Disasm.parse_sib(self, bytez, offset, mod) 318 319 if prefixes & PREFIX_REX_B: 320 base |= 0b1000 321 322 if prefixes & PREFIX_REX_X: 323 index |= 0b1000 324 325 return (size, scale, index, base, imm)
326
327 - def byteRegOffset(self, val, prefixes=0):
328 # NOTE: Override this because there is no AH etc in 64 bit mode 329 if (prefixes & PREFIX_REX): # the parse_modrm function deals with register index adds 330 val |= e_i386.RMETA_LOW8 331 332 else: # not using REX, revert to old split-registers (low/high) 333 if val < 4: 334 val |= e_i386.RMETA_LOW8 335 else: 336 val |= e_i386.RMETA_HIGH8 337 val -= 4 338 339 return val
340
341 - def extended_parse_modrm(self, bytes, offset, opersize, regbase=0, prefixes=0):
342 """ 343 Return a tuple of (size, Operand) 344 """ 345 size = 1 346 # FIXME this would be best to not parse_modrm twice. tweak it. 347 mod,reg,rm = self.parse_modrm(ord(bytes[offset]), prefixes) 348 if mod == 0 and rm == 5: 349 imm = e_bits.parsebytes(bytes, offset + size, 4, sign=True) 350 size += 4 351 return(size, Amd64RipRelOper(imm, opersize)) 352 353 return e_i386.i386Disasm.extended_parse_modrm(self, bytes, offset, opersize, regbase, prefixes)
354 355 # NOTE: Override a bunch of the address modes to account for REX
356 - def ameth_0(self, operflags, operval, tsize, prefixes):
357 o = e_i386.i386Disasm.ameth_0(self, operflags, operval, tsize, prefixes) 358 # If it has a builtin register, we need to check for bump prefix 359 if prefixes & PREFIX_REX_W and isinstance(o, e_i386.i386RegOper): 360 o.reg &= 0xffff 361 if prefixes & PREFIX_REX_B and isinstance(o, e_i386.i386RegOper): 362 o.reg += REX_BUMP 363 return o
364
365 - def ameth_g(self, bytes, offset, tsize, prefixes, operflags):
366 osize, oper = e_i386.i386Disasm.ameth_g(self, bytes, offset, tsize, prefixes, operflags) 367 if oper.tsize == 4 and oper.reg != REG_RIP: 368 oper.reg += RMETA_LOW32 369 if prefixes & PREFIX_REX_R: 370 oper.reg += REX_BUMP 371 return osize, oper
372
373 - def ameth_h(self, bytez, offset, tsize, prefixes, operflags):
374 raise Exception("IMPLEMENT ADDRMETH_H please....") 375 osize = 0 376 oper = 0 377 return osize, oper
378
379 - def ameth_c(self, bytes, offset, tsize, prefixes, operflags):
380 osize, oper = e_i386.i386Disasm.ameth_c(self, bytes, offset, tsize, prefixes, operflags) 381 if prefixes & PREFIX_REX_R: 382 oper.reg += REX_BUMP 383 return osize,oper
384
385 - def ameth_d(self, bytes, offset, tsize, prefixes, operflags):
386 osize, oper = e_i386.i386Disasm.ameth_d(self, bytes, offset, tsize, prefixes, operflags) 387 if prefixes & PREFIX_REX_R: 388 oper.reg += REX_BUMP 389 return osize,oper
390
391 - def ameth_v(self, bytes, offset, tsize, prefixes, operflags):
392 osize, oper = e_i386.i386Disasm.ameth_v(self, bytes, offset, tsize, prefixes, operflags) 393 if prefixes & PREFIX_REX_R: 394 oper.reg += REX_BUMP 395 return osize,oper
396 397 # NOTE: The ones below are the only ones to which REX.X or REX.B can apply (besides ameth_0) 398 #FIXME: we need to adhere to the ADDR_SIZE/OPER_SIZE rules better... the REX_BUMP is an afterthought... 399 # however, all the rules are based on ADDR_SIZE/OPER_SIZE and can be coalesced elegantly into amd64/i386 400 # and even make 16-bit mode play nicely.
401 - def _dis_rex_exmodrm(self, oper, prefixes, operflags):
402 # REMEMBER: all extended mod RM reg fields come from the r/m part. If it 403 # were actually just the reg part, it'd be in one of the above 404 # addressing modes... 405 if getattr(oper, "index", None) != None: 406 # Adjust the size if needed 407 if prefixes & PREFIX_REX_X: 408 oper.index += REX_BUMP 409 410 # oper.reg will be r/m or SIB base 411 if getattr(oper, "reg", None) != None: 412 # Adjust the size if needed 413 if prefixes & PREFIX_REX_B: 414 oper.reg += REX_BUMP 415 416 if isinstance(oper, e_i386.i386RegOper): 417 if oper.tsize == 4: 418 oper.reg += RMETA_LOW32
419
420 - def ameth_e(self, bytes, offset, tsize, prefixes, operflags):
421 osize, oper = e_i386.i386Disasm.ameth_e(self, bytes, offset, tsize, prefixes, operflags) 422 self._dis_rex_exmodrm(oper, prefixes, operflags) 423 return osize, oper
424
425 - def ameth_w(self, bytes, offset, tsize, prefixes, operflags):
426 osize, oper = e_i386.i386Disasm.ameth_w(self, bytes, offset, tsize, prefixes, operflags) 427 self._dis_rex_exmodrm(oper, prefixes, operflags) 428 return osize,oper
429 430 if __name__ == '__main__': 431 import envi.archs 432 envi.archs.dismain( Amd64Disasm() ) 433