Merge branch 'master' of ssh://bill@git.dyninst.org/pub/dyninst
[dyninst.git] / instructionAPI / src / InstructionDecoder-x86.C
1 /*
2 * Copyright (c) 1996-2009 Barton P. Miller
3 *
4 * We provide the Paradyn Parallel Performance Tools (below
5 * described as "Paradyn") on an AS IS basis, and do not warrant its
6 * validity or performance.  We reserve the right to update, modify,
7 * or discontinue this software at any time.  We shall have no
8 * obligation to supply such updates or modifications or any other
9 * form of support to you.
10 *
11 * By your use of Paradyn, you understand and agree that we (or any
12 * other person or entity with proprietary rights in Paradyn) are
13 * under no obligation to provide either maintenance services,
14 * update services, notices of latent defects, or correction of
15 * defects for Paradyn.
16 *
17 * This library is free software; you can redistribute it and/or
18 * modify it under the terms of the GNU Lesser General Public
19 * License as published by the Free Software Foundation; either
20 * version 2.1 of the License, or (at your option) any later version.
21 *
22 * This library is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
25 * Lesser General Public License for more details.
26 *
27 * You should have received a copy of the GNU Lesser General Public
28 * License along with this library; if not, write to the Free Software
29 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30 */
31
32 #include "InstructionDecoder-x86.h"
33 #include "../h/Expression.h"
34 #include "../src/arch-x86.h"
35 #include "../h/Register.h"
36 #include "../h/Dereference.h"
37 #include "../h/Immediate.h"
38 #include "../h/BinaryFunction.h"
39 #include "../../common/h/singleton_object_pool.h"
40
41 using namespace std;
42 namespace Dyninst
43 {
44     namespace InstructionAPI
45     {
46     
47         bool readsOperand(unsigned int opsema, unsigned int i)
48         {
49             switch(opsema) {
50                 case s1R2R:
51                     return (i == 0 || i == 1);
52                 case s1R:
53                 case s1RW:
54                     return i == 0;
55                 case s1W:
56                     return false;
57                 case s1W2RW:
58                 case s1W2R:   // second operand read, first operand written (e.g. mov)
59                     return i == 1;
60                 case s1RW2R:  // two operands read, first written (e.g. add)
61                 case s1RW2RW: // e.g. xchg
62                 case s1R2RW:
63                     return i == 0 || i == 1;
64                 case s1W2R3R: // e.g. imul
65                 case s1W2RW3R: // some mul
66                 case s1W2R3RW: // (stack) push & pop
67                     return i == 1 || i == 2;
68                 case s1W2W3R: // e.g. les
69                     return i == 2;
70                 case s1RW2R3R: // shld/shrd
71                 case s1RW2RW3R: // [i]div, cmpxch8b
72                 case s1R2R3R:
73                     return i == 0 || i == 1 || i == 2;
74                     break;
75                 case sNONE:
76                 default:
77                     return false;
78             }
79       
80         }
81       
82         bool writesOperand(unsigned int opsema, unsigned int i)
83         {
84             switch(opsema) {
85                 case s1R2R:
86                 case s1R:
87                     return false;
88                 case s1RW:
89                 case s1W:
90                     case s1W2R:   // second operand read, first operand written (e.g. mov)
91                         case s1RW2R:  // two operands read, first written (e.g. add)
92                             case s1W2R3R: // e.g. imul
93                                 case s1RW2R3R: // shld/shrd
94                                     return i == 0;
95                 case s1R2RW:
96                     return i == 1;
97                 case s1W2RW:
98                     case s1RW2RW: // e.g. xchg
99                         case s1W2RW3R: // some mul
100                             case s1W2W3R: // e.g. les
101                                 case s1RW2RW3R: // [i]div, cmpxch8b
102                                     return i == 0 || i == 1;
103                                     case s1W2R3RW: // (stack) push & pop
104                                         return i == 0 || i == 2;
105                 case sNONE:
106                 default:
107                     return false;
108             }
109         }
110
111
112     
113     INSTRUCTION_EXPORT InstructionDecoder_x86::InstructionDecoder_x86(const unsigned char* buffer, size_t size,
114                                                                      Architecture arch) :
115             InstructionDecoder(buffer, size, arch),
116     locs(NULL),
117     decodedInstruction(NULL),
118     is32BitMode(true),
119     sizePrefixPresent(false)
120     {
121     }
122     INSTRUCTION_EXPORT InstructionDecoder_x86::InstructionDecoder_x86() :
123             InstructionDecoder(),
124             locs(NULL),
125     decodedInstruction(NULL),
126     is32BitMode(true),
127     sizePrefixPresent(false)
128     {
129     }
130 #if 0    
131     INSTRUCTION_EXPORT InstructionDecoder_x86::InstructionDecoder_x86(const InstructionDecoder_x86& o) :
132             InstructionDecoder(o),
133             locs(NULL),
134     decodedInstruction(NULL),
135     is32BitMode(o.is32BitMode),
136     sizePrefixPresent(o.sizePrefixPresent)
137     {
138     }
139 #endif    
140     INSTRUCTION_EXPORT InstructionDecoder_x86::~InstructionDecoder_x86()
141     {
142         if(decodedInstruction) decodedInstruction->~ia32_instruction();
143         free(decodedInstruction);
144
145         if(locs) locs->~ia32_locations();
146         free(locs);
147     }
148     static const unsigned char modrm_use_sib = 4;
149     
150     INSTRUCTION_EXPORT void InstructionDecoder_x86::setMode(bool is64)
151     {
152         ia32_set_mode_64(is64);
153     }
154     
155     Expression::Ptr InstructionDecoder_x86::makeSIBExpression(unsigned int opType)
156     {
157         unsigned scale;
158         Register index;
159         Register base;
160         Result_Type aw = ia32_is_mode_64() ? u32 : u64;
161
162         decode_SIB(locs->sib_byte, scale, index, base);
163
164         Expression::Ptr scaleAST(make_shared(singleton_object_pool<Immediate>::construct(Result(u8, dword_t(scale)))));
165         Expression::Ptr indexAST(make_shared(singleton_object_pool<RegisterAST>::construct(makeRegisterID(index, opType,
166                                     locs->rex_x))));
167         Expression::Ptr baseAST;
168         if(base == 0x05)
169         {
170             switch(locs->modrm_mod)
171             {
172                 case 0x00:
173                     baseAST = decodeImmediate(op_d, locs->sib_position + 1);
174                     break;
175                     case 0x01: {
176                         MachRegister reg;
177                         if (locs->rex_b)
178                             reg = x86_64::r13;
179                         else
180                           reg = MachRegister::getFramePointer(m_Arch);
181                         
182                         baseAST = makeAddExpression(make_shared(singleton_object_pool<RegisterAST>::construct(reg)),
183                                                     decodeImmediate(op_b, locs->sib_position + 1), 
184                                                     aw);
185                         break;
186                     }
187                     case 0x02: {
188                         MachRegister reg;
189                         if (locs->rex_b)
190                             reg = x86_64::r13;
191                         else
192                             reg = MachRegister::getFramePointer(m_Arch);
193
194                         baseAST = makeAddExpression(make_shared(singleton_object_pool<RegisterAST>::construct(reg)), 
195                                                     decodeImmediate(op_d, locs->sib_position + 1),
196                                                     aw);
197                         break;
198                     }
199                 case 0x03:
200                 default:
201                     assert(0);
202                     break;
203             };
204         }
205         else
206         {
207             baseAST = make_shared(singleton_object_pool<RegisterAST>::construct(makeRegisterID(base, 
208                                                                                                opType,
209                                                                                                locs->rex_b)));
210         }
211         if(index == 0x04 && (!(ia32_is_mode_64()) || !(locs->rex_x)))
212         {
213             return baseAST;
214         }
215         return makeAddExpression(baseAST, makeMultiplyExpression(indexAST, scaleAST, aw), aw);
216     }
217
218     Expression::Ptr InstructionDecoder_x86::makeModRMExpression(unsigned int opType)
219     {
220         unsigned int regType = op_d;
221         if(ia32_is_mode_64())
222         {
223             regType = op_q;
224         }
225         Result_Type aw = ia32_is_mode_64() ? u32 : u64;
226         Expression::Ptr e =
227           makeRegisterExpression(makeRegisterID(locs->modrm_rm, regType, (locs->rex_b == 1)));
228         switch(locs->modrm_mod)
229         {
230             case 0:
231                 if(locs->modrm_rm == modrm_use_sib) {
232                     e = makeSIBExpression(opType);
233                 }
234                 if(locs->modrm_rm == 0x5)
235                 {
236                     assert(locs->opcode_position > -1);
237                     unsigned char opcode = rawInstruction[locs->opcode_position];
238         // treat FP decodes as legacy mode since it appears that's what we've got in our
239         // old code...
240                     if(ia32_is_mode_64() && (opcode < 0xD8 || opcode > 0xDF))
241                     {
242                         e = makeAddExpression(makeRegisterExpression(x86_64::rip),
243                                             getModRMDisplacement(), aw);
244                     }
245                     else
246                     {
247                         e = getModRMDisplacement();
248                     }
249         
250                 }
251                 if(opType == op_lea)
252                 {
253                     return e;
254                 }
255                 return makeDereferenceExpression(e, makeSizeType(opType));
256             case 1:
257             case 2:
258             {
259                 if(locs->modrm_rm == modrm_use_sib) {
260                     e = makeSIBExpression(opType);
261                 }
262                 Expression::Ptr disp_e = makeAddExpression(e, getModRMDisplacement(), aw);
263                 if(opType == op_lea)
264                 {
265                     return disp_e;
266                 }
267                 return makeDereferenceExpression(disp_e, makeSizeType(opType));
268             }
269             case 3:
270               return makeRegisterExpression(makeRegisterID(locs->modrm_rm, opType, (locs->rex_b == 1)));
271             default:
272                 return Expression::Ptr();
273         
274         };
275         
276     }
277
278     Expression::Ptr InstructionDecoder_x86::decodeImmediate(unsigned int opType, unsigned int position, bool isSigned)
279     {
280         const unsigned char* bufferEnd = bufferBegin + (bufferSize ? bufferSize : 16);
281         assert(position != (unsigned int)(-1));
282         switch(opType)
283         {
284             case op_b:
285                 assert(rawInstruction + position < bufferEnd);
286                 return Immediate::makeImmediate(Result(isSigned ? s8 : u8 ,*(const byte_t*)(rawInstruction + position)));
287                 break;
288             case op_d:
289                 assert(rawInstruction + position + 3 < bufferEnd);
290                 return Immediate::makeImmediate(Result(isSigned ? s32 : u32,*(const dword_t*)(rawInstruction + position)));
291             case op_w:
292                 assert(rawInstruction + position + 1 < bufferEnd);
293                 return Immediate::makeImmediate(Result(isSigned ? s16 : u16,*(const word_t*)(rawInstruction + position)));
294                 break;
295             case op_q:
296                 assert(rawInstruction + position + 7 < bufferEnd);
297                 return Immediate::makeImmediate(Result(isSigned ? s64 : u64,*(const int64_t*)(rawInstruction + position)));
298                 break;
299             case op_v:
300             case op_z:
301         // 32 bit mode & no prefix, or 16 bit mode & prefix => 32 bit
302         // 16 bit mode, no prefix or 32 bit mode, prefix => 16 bit
303                 if(!sizePrefixPresent)
304                 {
305                     assert(rawInstruction + position + 3 < bufferEnd);
306                     return Immediate::makeImmediate(Result(isSigned ? s32 : u32,*(const dword_t*)(rawInstruction + position)));
307                 }
308                 else
309                 {
310                     assert(rawInstruction + position + 1 < bufferEnd);
311                     return Immediate::makeImmediate(Result(isSigned ? s16 : u16,*(const word_t*)(rawInstruction + position)));
312                 }
313         
314                 break;
315             case op_p:
316         // 32 bit mode & no prefix, or 16 bit mode & prefix => 48 bit
317         // 16 bit mode, no prefix or 32 bit mode, prefix => 32 bit
318                 if(!sizePrefixPresent)
319                 {
320                     assert(rawInstruction + position + 5< bufferEnd);
321                     return Immediate::makeImmediate(Result(isSigned ? s48 : u48,*(const int64_t*)(rawInstruction + position)));
322                 }
323                 else
324                 {
325                     assert(rawInstruction + position + 3 < bufferEnd);
326                     return Immediate::makeImmediate(Result(isSigned ? s32 : u32,*(const dword_t*)(rawInstruction + position)));
327                 }
328         
329                 break;
330             case op_a:
331             case op_dq:
332             case op_pd:
333             case op_ps:
334             case op_s:
335             case op_si:
336             case op_lea:
337             case op_allgprs:
338             case op_512:
339             case op_c:
340                 assert(!"Can't happen: opType unexpected for valid ways to decode an immediate");
341                 return Expression::Ptr();
342             default:
343                 assert(!"Can't happen: opType out of range");
344                 return Expression::Ptr();
345         }
346     }
347     
348     Expression::Ptr InstructionDecoder_x86::getModRMDisplacement()
349     {
350         int disp_pos;
351
352         if(locs->sib_position != -1)
353         {
354             disp_pos = locs->sib_position + 1;
355         }
356         else
357         {
358             disp_pos = locs->modrm_position + 1;
359         }
360         const unsigned char* bufferEnd = bufferBegin + bufferSize;
361         switch(locs->modrm_mod)
362         {
363             case 1:
364                 assert(rawInstruction + disp_pos + 1 <= bufferEnd);
365                 return make_shared(singleton_object_pool<Immediate>::construct(Result(s8, (*(const byte_t*)(rawInstruction +
366                         disp_pos)))));
367                 break;
368             case 2:
369                 if(sizePrefixPresent)
370                 {
371                     assert(rawInstruction + disp_pos + 2 <= bufferEnd);
372                     return make_shared(singleton_object_pool<Immediate>::construct(Result(s16, *((const word_t*)(rawInstruction +
373                             disp_pos)))));
374                 }
375                 else
376                 {
377                     assert(rawInstruction + disp_pos + 4 <= bufferEnd);
378                     return make_shared(singleton_object_pool<Immediate>::construct(Result(s32, *((const dword_t*)(rawInstruction +
379                             disp_pos)))));
380                 }
381                 break;
382             case 0:
383                 // In 16-bit mode, the word displacement is modrm r/m 6
384                 if(sizePrefixPresent)
385                 {
386                     if(locs->modrm_rm == 6)
387                     {
388                         assert(rawInstruction + disp_pos + 4 <= bufferEnd);
389                         return make_shared(singleton_object_pool<Immediate>::construct(Result(s32,
390                                            *((const dword_t*)(rawInstruction + disp_pos)))));
391                     }
392                     else
393                     {
394                         assert(rawInstruction + disp_pos + 1 <= bufferEnd);
395                         return make_shared(singleton_object_pool<Immediate>::construct(Result(s8, 0)));
396                     }
397                     break;
398                 }
399                 // ...and in 32-bit mode, the dword displacement is modrm r/m 5
400                 else
401                 {
402                     if(locs->modrm_rm == 5)
403                     {
404                         assert(rawInstruction + disp_pos + 4 <= bufferEnd);
405                         return make_shared(singleton_object_pool<Immediate>::construct(Result(s32,
406                                            *((const dword_t*)(rawInstruction + disp_pos)))));
407                     }
408                     else
409                     {
410                         assert(rawInstruction + disp_pos + 1 <= bufferEnd);
411                         return make_shared(singleton_object_pool<Immediate>::construct(Result(s8, 0)));
412                     }
413                     break;
414                 }
415             default:
416                 assert(rawInstruction + disp_pos + 1 <= bufferEnd);
417                 return make_shared(singleton_object_pool<Immediate>::construct(Result(s8, 0)));
418                 break;
419         }
420     }
421
422     enum intelRegBanks
423     {
424         b_8bitNoREX = 0,
425         b_16bit,
426         b_32bit,
427         b_segment,
428         b_64bit,
429         b_xmm,
430         b_mm,
431         b_cr,
432         b_dr,
433         b_tr,
434         b_amd64ext,
435         b_8bitWithREX
436     };
437     using namespace x86;
438     
439     static MachRegister IntelRegTable[][8] = {
440         {
441             al, cl, dl, bl, ah, ch, dh, bh
442         },
443         {
444             ax, cx, dx, bx, sp, bp, si, di
445         },
446         {
447             eax, ecx, edx, ebx, esp, ebp, esi, edi
448         },
449         {
450             es, cs, ss, ds, fs, gs, InvalidReg, InvalidReg
451         },
452         {
453             x86_64::rax, x86_64::rcx, x86_64::rdx, x86_64::rbx, x86_64::rsp, x86_64::rbp, x86_64::rsi, x86_64::rdi
454         },
455         {
456             xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
457         },
458         {
459             mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7
460         },
461         {
462             cr0, cr1, cr2, cr3, cr4, cr5, cr6, cr7
463         },
464         {
465             dr0, dr1, dr2, dr3, dr4, dr5, dr6, dr7
466         },
467         {
468             tr0, tr1, tr2, tr3, tr4, tr5, tr6, tr7
469         },
470         {
471             x86_64::r8, x86_64::r9, x86_64::r10, x86_64::r11, x86_64::r12, x86_64::r13, x86_64::r14, x86_64::r15
472         },
473         {
474             x86_64::al, x86_64::cl, x86_64::dl, x86_64::bl, x86_64::spl, x86_64::bpl, x86_64::sil, x86_64::dil
475         }
476
477     };
478
479     MachRegister InstructionDecoder_x86::makeRegisterID(unsigned int intelReg, unsigned int opType,
480                                         bool isExtendedReg)
481     {
482         MachRegister retVal;
483         if(isExtendedReg)
484         {
485             retVal = IntelRegTable[b_amd64ext][intelReg];
486         }
487         else if(locs->rex_w)
488         {
489             // AMD64 with 64-bit operands
490             retVal = IntelRegTable[b_64bit][intelReg];
491         }
492         else
493         {
494             switch(opType)
495             {
496                 case op_b:
497                     if (locs->rex_position == -1) {
498                         retVal = IntelRegTable[b_8bitNoREX][intelReg];
499                     } else {
500                         retVal = IntelRegTable[b_8bitWithREX][intelReg];
501                     }
502                     break;
503                 case op_q:
504                     retVal = IntelRegTable[b_64bit][intelReg];
505                     break;
506                 case op_d:
507                 case op_si:
508                 case op_w:
509                 default:
510                     retVal = IntelRegTable[b_32bit][intelReg];
511                     break;
512             }
513         }
514         if (m_Arch == Arch_x86) {
515           if ((retVal.val() & 0x00ffffff) == 0x0001000c)
516             assert(0);
517         }
518
519         return MachRegister((retVal.val() & ~retVal.getArchitecture()) | m_Arch);
520     }
521     
522     Result_Type InstructionDecoder_x86::makeSizeType(unsigned int opType)
523     {
524         switch(opType)
525         {
526             case op_b:
527             case op_c:
528                 return u8;
529             case op_d:
530             case op_ss:
531             case op_allgprs:
532             case op_si:
533                 return u32;
534             case op_w:
535             case op_a:
536                 return u16;
537             case op_q:
538             case op_sd:
539                 return u64;
540             case op_v:
541             case op_lea:
542             case op_z:
543                 if(is32BitMode ^ sizePrefixPresent)
544                 {
545                     return u32;
546                 }
547                 else
548                 {
549                     return u16;
550                 }
551                 break;
552             case op_p:
553                 // book says operand size; arch-x86 says word + word * operand size
554                 if(is32BitMode ^ sizePrefixPresent)
555                 {
556                     return u48;
557                 }
558                 else
559                 {
560                     return u32;
561                 }
562             case op_dq:
563                 return u64;
564             case op_512:
565                 return m512;
566             case op_pi:
567             case op_ps:
568             case op_pd:
569                 return dbl128;
570             case op_s:
571                 return u48;
572             case op_f:
573                 return sp_float;
574             case op_dbl:
575                 return dp_float;
576             case op_14:
577                 return m14;
578             default:
579                 assert(!"Can't happen!");
580                 return u8;
581         }
582     }
583
584
585     bool InstructionDecoder_x86::decodeOneOperand(const ia32_operand& operand,
586             int & imm_index, /* immediate operand index */
587             const Instruction* insn_to_complete, bool isRead, bool isWritten)
588             {
589                 unsigned int regType = op_d;
590                 if(ia32_is_mode_64())
591                 {
592                     regType = op_q;
593                 }
594                 bool isCFT = false;
595                 bool isCall = false;
596                 InsnCategory cat = insn_to_complete->getCategory();
597                 if(cat == c_BranchInsn || cat == c_CallInsn)
598                 {
599                     isCFT = true;
600                     if(cat == c_CallInsn)
601                     {
602                         isCall = true;
603                     }
604                 }
605                         
606                 switch(operand.admet)
607                 {
608                     case 0:
609                     // No operand
610                     {
611                         fprintf(stderr, "ERROR: Instruction with mismatched operands. Raw bytes: ");
612                         for(unsigned int i = 0; i < decodedInstruction->getSize(); i++) {
613                             fprintf(stderr, "%x ", rawInstruction[i]);
614                         }
615                         fprintf(stderr, "\n");
616                         assert(!"Mismatched number of operands--check tables");
617                         return false;
618                     }
619                     case am_A:
620                     {
621                         // am_A only shows up as a far call/jump.  Position 1 should be universally safe.
622                         Expression::Ptr addr(decodeImmediate(operand.optype, 1));
623                         Expression::Ptr op(makeDereferenceExpression(addr, makeSizeType(operand.optype)));
624                         insn_to_complete->addSuccessor(op, isCall, false, false, false);
625                     }
626                     break;
627                     case am_C:
628                     {
629                         Expression::Ptr op(makeRegisterExpression(IntelRegTable[b_cr][locs->modrm_reg]));
630                         insn_to_complete->appendOperand(op, isRead, isWritten);
631                     }
632                     break;
633                     case am_D:
634                     {
635                         Expression::Ptr op(makeRegisterExpression(IntelRegTable[b_dr][locs->modrm_reg]));
636                         insn_to_complete->appendOperand(op, isRead, isWritten);
637                     }
638                     break;
639                     case am_E:
640                     // am_M is like am_E, except that mod of 0x03 should never occur (am_M specified memory,
641                     // mod of 0x03 specifies direct register access).
642                     case am_M:
643                     // am_R is the inverse of am_M; it should only have a mod of 3
644                     case am_R:
645                         if(isCFT)
646                         {
647                             insn_to_complete->addSuccessor(makeModRMExpression(operand.optype), isCall, true, false, false);
648                         }
649                         else
650                         {
651                             insn_to_complete->appendOperand(makeModRMExpression(operand.optype), isRead, isWritten);
652                         }
653                     break;
654                     case am_F:
655                     {
656                         Expression::Ptr op(makeRegisterExpression(flags));
657                         insn_to_complete->appendOperand(op, isRead, isWritten);
658                     }
659                     break;
660                     case am_G:
661                     {
662                         Expression::Ptr op(makeRegisterExpression(makeRegisterID(locs->modrm_reg,
663                                 operand.optype, locs->rex_r)));
664                         insn_to_complete->appendOperand(op, isRead, isWritten);
665                     }
666                     break;
667                     case am_I:
668                         insn_to_complete->appendOperand(decodeImmediate(operand.optype, locs->imm_position[imm_index++]), isRead, isWritten);
669                         break;
670                     case am_J:
671                     {
672                         Expression::Ptr Offset(decodeImmediate(operand.optype, locs->imm_position[imm_index++], true));
673                         Expression::Ptr EIP(makeRegisterExpression(MachRegister::getPC(m_Arch)));
674                         Expression::Ptr InsnSize(make_shared(singleton_object_pool<Immediate>::construct(Result(u8,
675                             decodedInstruction->getSize()))));
676                         Expression::Ptr postEIP(makeAddExpression(EIP, InsnSize, u32));
677
678                         Expression::Ptr op(makeAddExpression(Offset, postEIP, u32));
679                         insn_to_complete->addSuccessor(op, isCall, false, false, false);
680                     }
681                     break;
682                     case am_O:
683                     {
684                     // Address/offset width, which is *not* what's encoded by the optype...
685                     // The deref's width is what's actually encoded here.
686                         int pseudoOpType;
687                         switch(locs->address_size)
688                         {
689                             case 1:
690                                 pseudoOpType = op_b;
691                                 break;
692                             case 2:
693                                 pseudoOpType = op_w;
694                                 break;
695                             case 4:
696                                 pseudoOpType = op_d;
697                                 break;
698                             case 0:
699                                 // closest I can get to "will be address size by default"
700                                 pseudoOpType = op_v;
701                                 break;
702                             default:
703                                 assert(!"Bad address size, should be 0, 1, 2, or 4!");
704                                 pseudoOpType = op_b;
705                                 break;
706                         }
707
708
709                         int offset_position = locs->opcode_position;
710                         if(locs->modrm_position > offset_position && locs->modrm_operand <
711                            (int)(insn_to_complete->m_Operands.size()))
712                         {
713                             offset_position = locs->modrm_position;
714                         }
715                         if(locs->sib_position > offset_position)
716                         {
717                             offset_position = locs->sib_position;
718                         }
719                         offset_position++;
720                         insn_to_complete->appendOperand(makeDereferenceExpression(
721                                 decodeImmediate(pseudoOpType, offset_position), makeSizeType(operand.optype)), isRead, isWritten);
722                     }
723                     break;
724                     case am_P:
725                         insn_to_complete->appendOperand(makeRegisterExpression(IntelRegTable[b_mm][locs->modrm_reg]),
726                                 isRead, isWritten);
727                         break;
728                     case am_Q:
729         
730                         switch(locs->modrm_mod)
731                         {
732                             // direct dereference
733                             case 0x00:
734                             case 0x01:
735                             case 0x02:
736                                 insn_to_complete->appendOperand(makeModRMExpression(operand.optype), isRead, isWritten);
737                                 break;
738                             case 0x03:
739                                 // use of actual register
740                                 insn_to_complete->appendOperand(makeRegisterExpression(IntelRegTable[b_mm][locs->modrm_rm]),
741                                                                isRead, isWritten);
742                                 break;
743                             default:
744                                 assert(!"2-bit value modrm_mod out of range");
745                                 break;
746                         };
747                         break;
748                     case am_S:
749                     // Segment register in modrm reg field.
750                         insn_to_complete->appendOperand(makeRegisterExpression(IntelRegTable[b_segment][locs->modrm_reg]),
751                                 isRead, isWritten);
752                         break;
753                     case am_T:
754                         // test register in modrm reg; should only be tr6/tr7, but we'll decode any of them
755                         // NOTE: this only appears in deprecated opcodes
756                         insn_to_complete->appendOperand(makeRegisterExpression(IntelRegTable[b_tr][locs->modrm_reg]),
757                                                        isRead, isWritten);
758                         break;
759                     case am_V:
760                         insn_to_complete->appendOperand(makeRegisterExpression(IntelRegTable[b_xmm][locs->modrm_reg]),
761                                                        isRead, isWritten);
762                         break;
763                     case am_W:
764                         switch(locs->modrm_mod)
765                         {
766                             // direct dereference
767                             case 0x00:
768                             case 0x01:
769                             case 0x02:
770                                 insn_to_complete->appendOperand(makeModRMExpression(makeSizeType(operand.optype)),
771                                                                isRead, isWritten);
772                                 break;
773                             case 0x03:
774                             // use of actual register
775                             {
776                                 insn_to_complete->appendOperand(makeRegisterExpression(IntelRegTable[b_xmm][locs->modrm_rm]),
777                                                                isRead, isWritten);
778                                 break;
779                             }
780                             default:
781                                 assert(!"2-bit value modrm_mod out of range");
782                                 break;
783                         };
784                         break;
785                     case am_X:
786                     {
787                         Expression::Ptr ds(makeRegisterExpression(m_Arch == Arch_x86 ? x86::ds : x86_64::ds));
788                         Expression::Ptr si(makeRegisterExpression(m_Arch == Arch_x86 ? x86::si : x86_64::si));
789                         Expression::Ptr segmentOffset(make_shared(singleton_object_pool<Immediate>::construct(
790                                 Result(u32, 0x10))));
791                         Expression::Ptr ds_segment = makeMultiplyExpression(ds, segmentOffset, u32);
792                         Expression::Ptr ds_si = makeAddExpression(ds_segment, si, u32);
793                         insn_to_complete->appendOperand(makeDereferenceExpression(ds_si, makeSizeType(operand.optype)),
794                                                        isRead, isWritten);
795                     }
796                     break;
797                     case am_Y:
798                     {
799                         Expression::Ptr es(makeRegisterExpression(m_Arch == Arch_x86 ? x86::es : x86_64::es));
800                         Expression::Ptr di(makeRegisterExpression(m_Arch == Arch_x86 ? x86::di : x86_64::di));
801                         Expression::Ptr es_segment = makeMultiplyExpression(es,
802                             make_shared(singleton_object_pool<Immediate>::construct(Result(u32, 0x10))), u32);
803                         Expression::Ptr es_di = makeAddExpression(es_segment, di, u32);
804                         insn_to_complete->appendOperand(makeDereferenceExpression(es_di, makeSizeType(operand.optype)),
805                                                        isRead, isWritten);
806                     }
807                     break;
808                     case am_tworeghack:
809                     {
810                         if(operand.optype == r_EDXEAX)
811                         {
812                             Expression::Ptr edx(makeRegisterExpression(m_Arch == Arch_x86 ? x86::edx : x86_64::edx));
813                             Expression::Ptr eax(makeRegisterExpression(m_Arch == Arch_x86 ? x86::eax : x86_64::eax));
814                             Expression::Ptr highAddr = makeMultiplyExpression(edx,
815                                     Immediate::makeImmediate(Result(u64, 2^32)), u64);
816                             Expression::Ptr addr = makeAddExpression(highAddr, eax, u64);
817                             Expression::Ptr op = makeDereferenceExpression(addr, u64);
818                             insn_to_complete->appendOperand(op, isRead, isWritten);
819                         }
820                         else if (operand.optype == r_ECXEBX)
821                         {
822                             Expression::Ptr ecx(makeRegisterExpression(m_Arch == Arch_x86 ? x86::ecx : x86_64::ecx));
823                             Expression::Ptr ebx(makeRegisterExpression(m_Arch == Arch_x86 ? x86::ebx : x86_64::ebx));
824                             Expression::Ptr highAddr = makeMultiplyExpression(ecx,
825                                     Immediate::makeImmediate(Result(u64, 2^32)), u64);
826                             Expression::Ptr addr = makeAddExpression(highAddr, ebx, u64);
827                             Expression::Ptr op = makeDereferenceExpression(addr, u64);
828                             insn_to_complete->appendOperand(op, isRead, isWritten);
829                         }
830                     }
831                     break;
832                     
833                     case am_reg:
834                     {
835                         MachRegister r(operand.optype);
836                         r = MachRegister((r.val() & ~r.getArchitecture()) | m_Arch);
837                         if(locs->rex_b)
838                         {
839                             r = MachRegister((r.val()) | x86_64::r8.val());
840                         }
841                         if(sizePrefixPresent)
842                         {
843                             r = MachRegister((r.val() & ~x86::FULL) | x86::W_REG);
844                         }
845                         Expression::Ptr op(makeRegisterExpression(r));
846                         insn_to_complete->appendOperand(op, isRead, isWritten);
847                     }
848                     break;
849                 case am_stackH:
850                 case am_stackP:
851                 // handled elsewhere
852                     break;
853                 case am_allgprs:
854                 {
855                     if(m_Arch == Arch_x86)
856                     {
857                         insn_to_complete->appendOperand(makeRegisterExpression(x86::eax), isRead, isWritten);
858                         insn_to_complete->appendOperand(makeRegisterExpression(x86::ecx), isRead, isWritten);
859                         insn_to_complete->appendOperand(makeRegisterExpression(x86::edx), isRead, isWritten);
860                         insn_to_complete->appendOperand(makeRegisterExpression(x86::ebx), isRead, isWritten);
861                         insn_to_complete->appendOperand(makeRegisterExpression(x86::esp), isRead, isWritten);
862                         insn_to_complete->appendOperand(makeRegisterExpression(x86::ebp), isRead, isWritten);
863                         insn_to_complete->appendOperand(makeRegisterExpression(x86::esi), isRead, isWritten);
864                         insn_to_complete->appendOperand(makeRegisterExpression(x86::edi), isRead, isWritten);
865                     }
866                     else
867                     {
868                         insn_to_complete->appendOperand(makeRegisterExpression(x86_64::eax), isRead, isWritten);
869                         insn_to_complete->appendOperand(makeRegisterExpression(x86_64::ecx), isRead, isWritten);
870                         insn_to_complete->appendOperand(makeRegisterExpression(x86_64::edx), isRead, isWritten);
871                         insn_to_complete->appendOperand(makeRegisterExpression(x86_64::ebx), isRead, isWritten);
872                         insn_to_complete->appendOperand(makeRegisterExpression(x86_64::esp), isRead, isWritten);
873                         insn_to_complete->appendOperand(makeRegisterExpression(x86_64::ebp), isRead, isWritten);
874                         insn_to_complete->appendOperand(makeRegisterExpression(x86_64::esi), isRead, isWritten);
875                         insn_to_complete->appendOperand(makeRegisterExpression(x86_64::edi), isRead, isWritten);
876                     }
877                 }
878                     break;
879                 default:
880                     printf("decodeOneOperand() called with unknown addressing method %d\n", operand.admet);
881                         break;
882                 };
883                 return true;
884             }
885
886     extern ia32_entry invalid;
887     
888     void InstructionDecoder_x86::doIA32Decode()
889     {
890         if(decodedInstruction == NULL)
891         {
892             decodedInstruction = reinterpret_cast<ia32_instruction*>(malloc(sizeof(ia32_instruction)));
893             assert(decodedInstruction);
894         }
895         if(locs == NULL)
896         {
897             locs = reinterpret_cast<ia32_locations*>(malloc(sizeof(ia32_locations)));
898             assert(locs);
899         }
900     
901         locs = new(locs) ia32_locations();
902         assert(locs);
903         decodedInstruction = new (decodedInstruction) ia32_instruction(NULL, NULL, locs);
904         assert(locs);
905         ia32_decode(IA32_DECODE_PREFIXES, rawInstruction, *decodedInstruction);
906         sizePrefixPresent = (decodedInstruction->getPrefix()->getOperSzPrefix() == 0x66);
907         addrSizePrefixPresent = (decodedInstruction->getPrefix()->getAddrSzPrefix() == 0x67);
908     }
909     
910     unsigned int InstructionDecoder_x86::decodeOpcode()
911     {
912         static ia32_entry invalid = { e_No_Entry, 0, 0, true, { {0,0}, {0,0}, {0,0} }, 0, 0 };
913         doIA32Decode();
914         if(decodedInstruction->getEntry()) {
915             m_Operation = make_shared(singleton_object_pool<Operation>::construct(decodedInstruction->getEntry(),
916                                     decodedInstruction->getPrefix(), locs, m_Arch));
917         }
918         else
919         {
920                 // Gap parsing can trigger this case; in particular, when it encounters prefixes in an invalid order.
921                 // Notably, if a REX prefix (0x40-0x48) appears followed by another prefix (0x66, 0x67, etc)
922                 // we'll reject the instruction as invalid and send it back with no entry.  Since this is a common
923                 // byte sequence to see in, for example, ASCII strings, we want to simply accept this and move on, not
924                 // yell at the user.
925             m_Operation = make_shared(singleton_object_pool<Operation>::construct(&invalid,
926                                     decodedInstruction->getPrefix(), locs, m_Arch));
927         }
928         return decodedInstruction->getSize();
929     }
930     
931     bool InstructionDecoder_x86::decodeOperands(const Instruction* insn_to_complete)
932     {
933         int imm_index = 0; // handle multiple immediate operands
934         if(!decodedInstruction) return false;
935         assert(locs);
936         unsigned int opsema = decodedInstruction->getEntry()->opsema & 0xFF;
937     
938         for(unsigned i = 0; i < 3; i++)
939         {
940             if(decodedInstruction->getEntry()->operands[i].admet == 0 && decodedInstruction->getEntry()->operands[i].optype == 0)
941                 return true;
942             if(!decodeOneOperand(decodedInstruction->getEntry()->operands[i], 
943                     imm_index, 
944                     insn_to_complete, 
945                     readsOperand(opsema, i),
946                     writesOperand(opsema, i)))
947             {
948                 return false;
949             }
950         }
951     
952         return true;
953     }
954
955     Instruction::Ptr InstructionDecoder_x86::decode()
956     {
957         Instruction::Ptr ret(InstructionDecoder::decode());
958         if(ret)
959         {
960             assert(m_Arch != Arch_none);
961             ret->arch_decoded_from = m_Arch;
962         }
963         return ret;
964     }
965     
966     INSTRUCTION_EXPORT Instruction::Ptr InstructionDecoder_x86::decode(const unsigned char* buffer)
967     {
968         Instruction::Ptr ret(InstructionDecoder::decode(buffer));
969         if(ret)
970         {
971             assert(m_Arch != Arch_none);
972             ret->arch_decoded_from = m_Arch;
973         }
974         return ret;
975     }
976     void InstructionDecoder_x86::doDelayedDecode(const Instruction* insn_to_complete)
977     {
978         setBuffer(reinterpret_cast<const unsigned char*>(insn_to_complete->ptr()), insn_to_complete->size());
979         insn_to_complete->m_Operands.reserve(4);
980         doIA32Decode();        
981         decodeOperands(insn_to_complete);
982         resetBuffer();
983     }
984     
985 };
986 };
987