Fixed several bugs and polished AMD-64 instrumentation optimizations
[dyninst.git] / instructionAPI / src / InstructionDecoder-x86.C
1 /*
2 * Copyright (c) 1996-2009 Barton P. Miller
3 *
4 * We provide the Paradyn Parallel Performance Tools (below
5 * described as "Paradyn") on an AS IS basis, and do not warrant its
6 * validity or performance.  We reserve the right to update, modify,
7 * or discontinue this software at any time.  We shall have no
8 * obligation to supply such updates or modifications or any other
9 * form of support to you.
10 *
11 * By your use of Paradyn, you understand and agree that we (or any
12 * other person or entity with proprietary rights in Paradyn) are
13 * under no obligation to provide either maintenance services,
14 * update services, notices of latent defects, or correction of
15 * defects for Paradyn.
16 *
17 * This library is free software; you can redistribute it and/or
18 * modify it under the terms of the GNU Lesser General Public
19 * License as published by the Free Software Foundation; either
20 * version 2.1 of the License, or (at your option) any later version.
21 *
22 * This library is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
25 * Lesser General Public License for more details.
26 *
27 * You should have received a copy of the GNU Lesser General Public
28 * License along with this library; if not, write to the Free Software
29 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30 */
31
32 #define INSIDE_INSTRUCTION_API
33
34 #include "common/h/Types.h"
35 #include "InstructionDecoder-x86.h"
36 #include "Expression.h"
37 #include "common/h/arch-x86.h"
38 #include "Register.h"
39 #include "Dereference.h"
40 #include "Immediate.h" 
41 #include "BinaryFunction.h"
42 #include "common/h/singleton_object_pool.h"
43
44 using namespace std;
45 using namespace NS_x86;
46 namespace Dyninst
47 {
48     namespace InstructionAPI
49     {
50     
51         bool readsOperand(unsigned int opsema, unsigned int i)
52         {
53             switch(opsema) {
54                 case s1R2R:
55                     return (i == 0 || i == 1);
56                 case s1R:
57                 case s1RW:
58                     return i == 0;
59                 case s1W:
60                     return false;
61                 case s1W2RW:
62                 case s1W2R:   // second operand read, first operand written (e.g. mov)
63                     return i == 1;
64                 case s1RW2R:  // two operands read, first written (e.g. add)
65                 case s1RW2RW: // e.g. xchg
66                 case s1R2RW:
67                     return i == 0 || i == 1;
68                 case s1W2R3R: // e.g. imul
69                 case s1W2RW3R: // some mul
70                 case s1W2R3RW: // (stack) push & pop
71                     return i == 1 || i == 2;
72                 case s1W2W3R: // e.g. les
73                     return i == 2;
74                 case s1RW2R3R: // shld/shrd
75                 case s1RW2RW3R: // [i]div, cmpxch8b
76                 case s1R2R3R:
77                     return i == 0 || i == 1 || i == 2;
78                     break;
79                 case sNONE:
80                 default:
81                     return false;
82             }
83       
84         }
85       
86         bool writesOperand(unsigned int opsema, unsigned int i)
87         {
88             switch(opsema) {
89                 case s1R2R:
90                 case s1R:
91                     return false;
92                 case s1RW:
93                 case s1W:
94                     case s1W2R:   // second operand read, first operand written (e.g. mov)
95                         case s1RW2R:  // two operands read, first written (e.g. add)
96                             case s1W2R3R: // e.g. imul
97                                 case s1RW2R3R: // shld/shrd
98                                     return i == 0;
99                 case s1R2RW:
100                     return i == 1;
101                 case s1W2RW:
102                     case s1RW2RW: // e.g. xchg
103                         case s1W2RW3R: // some mul
104                             case s1W2W3R: // e.g. les
105                                 case s1RW2RW3R: // [i]div, cmpxch8b
106                                     return i == 0 || i == 1;
107                                     case s1W2R3RW: // (stack) push & pop
108                                         return i == 0 || i == 2;
109                 case sNONE:
110                 default:
111                     return false;
112             }
113         }
114
115
116     
117     INSTRUCTION_EXPORT InstructionDecoder_x86::InstructionDecoder_x86(Architecture a) :
118       InstructionDecoderImpl(a),
119     locs(NULL),
120     decodedInstruction(NULL),
121     is32BitMode(true),
122     sizePrefixPresent(false)
123     {
124     }
125     INSTRUCTION_EXPORT InstructionDecoder_x86::~InstructionDecoder_x86()
126     {
127         if(decodedInstruction) decodedInstruction->~ia32_instruction();
128         free(decodedInstruction);
129         if(locs) locs->~ia32_locations();
130         free(locs);
131
132     }
133     static const unsigned char modrm_use_sib = 4;
134     
135     INSTRUCTION_EXPORT void InstructionDecoder_x86::setMode(bool is64)
136     {
137         ia32_set_mode_64(is64);
138     }
139     
140       Expression::Ptr InstructionDecoder_x86::makeSIBExpression(const InstructionDecoder::buffer& b)
141     {
142         unsigned scale;
143         Register index;
144         Register base;
145         Result_Type registerType = ia32_is_mode_64() ? u32 : u64;
146
147         decode_SIB(locs->sib_byte, scale, index, base);
148
149         Expression::Ptr scaleAST(make_shared(singleton_object_pool<Immediate>::construct(Result(u8, dword_t(scale)))));
150         Expression::Ptr indexAST(make_shared(singleton_object_pool<RegisterAST>::construct(makeRegisterID(index, registerType,
151                                     locs->rex_x))));
152         Expression::Ptr baseAST;
153         if(base == 0x05)
154         {
155             switch(locs->modrm_mod)
156             {
157                 case 0x00:
158                     baseAST = decodeImmediate(op_d, b.start + locs->sib_position + 1);
159                     break;
160                     case 0x01: {
161                         MachRegister reg;
162                         if (locs->rex_b)
163                             reg = x86_64::r13;
164                         else
165                           reg = MachRegister::getFramePointer(m_Arch);
166                         
167                         baseAST = makeAddExpression(make_shared(singleton_object_pool<RegisterAST>::construct(reg)),
168                                                     decodeImmediate(op_b, b.start + locs->sib_position + 1),
169                                                     registerType);
170                         break;
171                     }
172                     case 0x02: {
173                         MachRegister reg;
174                         if (locs->rex_b)
175                             reg = x86_64::r13;
176                         else
177                             reg = MachRegister::getFramePointer(m_Arch);
178
179                         baseAST = makeAddExpression(make_shared(singleton_object_pool<RegisterAST>::construct(reg)), 
180                                                     decodeImmediate(op_d, b.start + locs->sib_position + 1),
181                                                     registerType);
182                         break;
183                     }
184                 case 0x03:
185                 default:
186                     assert(0);
187                     break;
188             };
189         }
190         else
191         {
192             baseAST = make_shared(singleton_object_pool<RegisterAST>::construct(makeRegisterID(base, 
193                                                                                                registerType,
194                                                                                                locs->rex_b)));
195         }
196         if(index == 0x04 && (!(ia32_is_mode_64()) || !(locs->rex_x)))
197         {
198             return baseAST;
199         }
200         return makeAddExpression(baseAST, makeMultiplyExpression(indexAST, scaleAST, registerType), registerType);
201     }
202
203       Expression::Ptr InstructionDecoder_x86::makeModRMExpression(const InstructionDecoder::buffer& b,
204                                                                   unsigned int opType)
205     {
206        unsigned int regType = op_d;
207         Result_Type aw = ia32_is_mode_64() ? u32 : u64;
208         if(ia32_is_mode_64())
209         {
210             regType = op_q;
211         }
212         Expression::Ptr e =
213             makeRegisterExpression(makeRegisterID(locs->modrm_rm, regType, (locs->rex_b == 1)));
214         switch(locs->modrm_mod)
215         {
216             case 0:
217                 if(locs->modrm_rm == modrm_use_sib) {
218                     e = makeSIBExpression(b);
219                 }
220                 if(locs->modrm_rm == 0x5 && !addrSizePrefixPresent)
221                 {
222                     assert(locs->opcode_position > -1);
223                     if(ia32_is_mode_64())
224                     {
225                         e = makeAddExpression(makeRegisterExpression(x86_64::rip),
226                                             getModRMDisplacement(b), aw);
227                     }
228                     else
229                     {
230                         e = getModRMDisplacement(b);
231                     }
232         
233                 }
234                 if(locs->modrm_rm == 0x6 && addrSizePrefixPresent)
235                 {
236                     e = getModRMDisplacement(b);
237                 }
238                 if(opType == op_lea)
239                 {
240                     return e;
241                 }
242                 return makeDereferenceExpression(e, makeSizeType(opType));
243                 assert(0);
244                 break;
245             case 1:
246             case 2:
247             {
248                 if(locs->modrm_rm == modrm_use_sib) {
249                     e = makeSIBExpression(b);
250                 }
251                 Expression::Ptr disp_e = makeAddExpression(e, getModRMDisplacement(b), aw);
252                 if(opType == op_lea)
253                 {
254                     return disp_e;
255                 }
256                 return makeDereferenceExpression(disp_e, makeSizeType(opType));
257             }
258             assert(0);
259             break;
260             case 3:
261                 return makeRegisterExpression(makeRegisterID(locs->modrm_rm, opType, (locs->rex_b == 1)));
262             default:
263                 return Expression::Ptr();
264         
265         };
266         // can't get here, but make the compiler happy...
267         assert(0);
268         return Expression::Ptr();
269     }
270
271     Expression::Ptr InstructionDecoder_x86::decodeImmediate(unsigned int opType, const unsigned char* immStart, 
272                                                             bool isSigned)
273     {
274         switch(opType)
275         {
276             case op_b:
277                 return Immediate::makeImmediate(Result(isSigned ? s8 : u8 ,*(const byte_t*)(immStart)));
278                 break;
279             case op_d:
280                 return Immediate::makeImmediate(Result(isSigned ? s32 : u32,*(const dword_t*)(immStart)));
281             case op_w:
282                 return Immediate::makeImmediate(Result(isSigned ? s16 : u16,*(const word_t*)(immStart)));
283                 break;
284             case op_q:
285                 return Immediate::makeImmediate(Result(isSigned ? s64 : u64,*(const int64_t*)(immStart)));
286                 break;
287             case op_v:
288             case op_z:
289         // 32 bit mode & no prefix, or 16 bit mode & prefix => 32 bit
290         // 16 bit mode, no prefix or 32 bit mode, prefix => 16 bit
291                 if(!sizePrefixPresent)
292                 {
293                     return Immediate::makeImmediate(Result(isSigned ? s32 : u32,*(const dword_t*)(immStart)));
294                 }
295                 else
296                 {
297                     return Immediate::makeImmediate(Result(isSigned ? s16 : u16,*(const word_t*)(immStart)));
298                 }
299         
300                 break;
301             case op_p:
302         // 32 bit mode & no prefix, or 16 bit mode & prefix => 48 bit
303         // 16 bit mode, no prefix or 32 bit mode, prefix => 32 bit
304                 if(!sizePrefixPresent)
305                 {
306                     return Immediate::makeImmediate(Result(isSigned ? s48 : u48,*(const int64_t*)(immStart)));
307                 }
308                 else
309                 {
310                     return Immediate::makeImmediate(Result(isSigned ? s32 : u32,*(const dword_t*)(immStart)));
311                 }
312         
313                 break;
314             case op_a:
315             case op_dq:
316             case op_pd:
317             case op_ps:
318             case op_s:
319             case op_si:
320             case op_lea:
321             case op_allgprs:
322             case op_512:
323             case op_c:
324                 assert(!"Can't happen: opType unexpected for valid ways to decode an immediate");
325                 return Expression::Ptr();
326             default:
327                 assert(!"Can't happen: opType out of range");
328                 return Expression::Ptr();
329         }
330     }
331     
332     Expression::Ptr InstructionDecoder_x86::getModRMDisplacement(const InstructionDecoder::buffer& b)
333     {
334         int disp_pos;
335
336         if(locs->sib_position != -1)
337         {
338             disp_pos = locs->sib_position + 1;
339         }
340         else
341         {
342             disp_pos = locs->modrm_position + 1;
343         }
344         switch(locs->modrm_mod)
345         {
346             case 1:
347                 return make_shared(singleton_object_pool<Immediate>::construct(Result(s8, (*(const byte_t*)(b.start +
348                         disp_pos)))));
349                 break;
350             case 2:
351                 if(sizePrefixPresent)
352                 {
353                     return make_shared(singleton_object_pool<Immediate>::construct(Result(s16, *((const word_t*)(b.start +
354                             disp_pos)))));
355                 }
356                 else
357                 {
358                     return make_shared(singleton_object_pool<Immediate>::construct(Result(s32, *((const dword_t*)(b.start +
359                             disp_pos)))));
360                 }
361                 break;
362             case 0:
363                 // In 16-bit mode, the word displacement is modrm r/m 6
364                 if(sizePrefixPresent)
365                 {
366                     if(locs->modrm_rm == 6)
367                     {
368                         return make_shared(singleton_object_pool<Immediate>::construct(Result(s16,
369                                            *((const dword_t*)(b.start + disp_pos)))));
370                     }
371                     // TODO FIXME; this was decoding wrong, but I'm not sure
372                     // why...
373                     else if (locs->modrm_rm == 5) {
374                         assert(b.start + disp_pos + 4 <= b.end);
375                         return make_shared(singleton_object_pool<Immediate>::construct(Result(s32,
376                                            *((const dword_t*)(b.start + disp_pos)))));
377                     } else {
378                         assert(b.start + disp_pos + 1 <= b.end);
379                         return make_shared(singleton_object_pool<Immediate>::construct(Result(s8, 0)));
380                     }
381                     break;
382                 }
383                 // ...and in 32-bit mode, the dword displacement is modrm r/m 5
384                 else
385                 {
386                     if(locs->modrm_rm == 5)
387                     {
388                         assert(b.start + disp_pos + 4 <= b.end);
389                         return make_shared(singleton_object_pool<Immediate>::construct(Result(s32,
390                                            *((const dword_t*)(b.start + disp_pos)))));
391                     }
392                     else
393                     {
394                         assert(b.start + disp_pos + 1 <= b.end);
395                         return make_shared(singleton_object_pool<Immediate>::construct(Result(s8, 0)));
396                     }
397                     break;
398                 }
399             default:
400                 assert(b.start + disp_pos + 1 <= b.end);
401                 return make_shared(singleton_object_pool<Immediate>::construct(Result(s8, 0)));
402                 break;
403         }
404     }
405
406     enum intelRegBanks
407     {
408         b_8bitNoREX = 0,
409         b_16bit,
410         b_32bit,
411         b_segment,
412         b_64bit,
413         b_xmm,
414         b_xmmhigh,
415         b_mm,
416         b_cr,
417         b_dr,
418         b_tr,
419         b_amd64ext,
420         b_8bitWithREX,
421         b_fpstack
422     };
423     static MachRegister IntelRegTable32[][8] = {
424         {
425             x86::al, x86::cl, x86::dl, x86::bl, x86::ah, x86::ch, x86::dh, x86::bh
426         },
427         {
428             x86::ax, x86::cx, x86::dx, x86::bx, x86::sp, x86::bp, x86::si, x86::di
429         },
430         {
431             x86::eax, x86::ecx, x86::edx, x86::ebx, x86::esp, x86::ebp, x86::esi, x86::edi
432         },
433         {
434            x86::es, x86::cs, x86::ss, x86::ds, x86::fs, x86::gs, InvalidReg, InvalidReg
435         },
436         {
437             x86_64::rax, x86_64::rcx, x86_64::rdx, x86_64::rbx, x86_64::rsp, x86_64::rbp, x86_64::rsi, x86_64::rdi
438         },
439         {
440             x86::xmm0, x86::xmm1, x86::xmm2, x86::xmm3, x86::xmm4, x86::xmm5, x86::xmm6, x86::xmm7
441         },
442         {
443             x86_64::xmm8, x86_64::xmm9, x86_64::xmm10, x86_64::xmm11, x86_64::xmm12, x86_64::xmm13, x86_64::xmm14, x86_64::xmm15
444         },
445         {
446             x86::mm0, x86::mm1, x86::mm2, x86::mm3, x86::mm4, x86::mm5, x86::mm6, x86::mm7
447         },
448         {
449             x86::cr0, x86::cr1, x86::cr2, x86::cr3, x86::cr4, x86::cr5, x86::cr6, x86::cr7
450         },
451         {
452             x86::dr0, x86::dr1, x86::dr2, x86::dr3, x86::dr4, x86::dr5, x86::dr6, x86::dr7
453         },
454         {
455             x86::tr0, x86::tr1, x86::tr2, x86::tr3, x86::tr4, x86::tr5, x86::tr6, x86::tr7
456         },
457         {
458             x86_64::r8, x86_64::r9, x86_64::r10, x86_64::r11, x86_64::r12, x86_64::r13, x86_64::r14, x86_64::r15
459         },
460         {
461             x86_64::al, x86_64::cl, x86_64::dl, x86_64::bl, x86_64::spl, x86_64::bpl, x86_64::sil, x86_64::dil
462         },
463         {
464             x86::st0, x86::st1, x86::st2, x86::st3, x86::st4, x86::st5, x86::st6, x86::st7
465         }
466
467     };
468     static MachRegister IntelRegTable64[][8] = {
469         {
470             x86_64::al, x86_64::cl, x86_64::dl, x86_64::bl, x86_64::ah, x86_64::ch, x86_64::dh, x86_64::bh
471         },
472         {
473             x86_64::ax, x86_64::cx, x86_64::dx, x86_64::bx, x86_64::sp, x86_64::bp, x86_64::si, x86_64::di
474         },
475         {
476             x86_64::eax, x86_64::ecx, x86_64::edx, x86_64::ebx, x86_64::esp, x86_64::ebp, x86_64::esi, x86_64::edi
477         },
478         {
479             x86_64::es, x86_64::cs, x86_64::ss, x86_64::ds, x86_64::fs, x86_64::gs, InvalidReg, InvalidReg
480         },
481         {
482             x86_64::rax, x86_64::rcx, x86_64::rdx, x86_64::rbx, x86_64::rsp, x86_64::rbp, x86_64::rsi, x86_64::rdi
483         },
484         {
485             x86_64::xmm0, x86_64::xmm1, x86_64::xmm2, x86_64::xmm3, x86_64::xmm4, x86_64::xmm5, x86_64::xmm6, x86_64::xmm7
486         },
487         {
488             x86_64::xmm8, x86_64::xmm9, x86_64::xmm10, x86_64::xmm11, x86_64::xmm12, x86_64::xmm13, x86_64::xmm14, x86_64::xmm15
489         },
490         {
491             x86_64::mm0, x86_64::mm1, x86_64::mm2, x86_64::mm3, x86_64::mm4, x86_64::mm5, x86_64::mm6, x86_64::mm7
492         },
493         {
494             x86_64::cr0, x86_64::cr1, x86_64::cr2, x86_64::cr3, x86_64::cr4, x86_64::cr5, x86_64::cr6, x86_64::cr7
495         },
496         {
497             x86_64::dr0, x86_64::dr1, x86_64::dr2, x86_64::dr3, x86_64::dr4, x86_64::dr5, x86_64::dr6, x86_64::dr7
498         },
499         {
500             x86_64::tr0, x86_64::tr1, x86_64::tr2, x86_64::tr3, x86_64::tr4, x86_64::tr5, x86_64::tr6, x86_64::tr7
501         },
502         {
503             x86_64::r8, x86_64::r9, x86_64::r10, x86_64::r11, x86_64::r12, x86_64::r13, x86_64::r14, x86_64::r15
504         },
505         {
506             x86_64::al, x86_64::cl, x86_64::dl, x86_64::bl, x86_64::spl, x86_64::bpl, x86_64::sil, x86_64::dil
507         },
508         {
509             x86_64::st0, x86_64::st1, x86_64::st2, x86_64::st3, x86_64::st4, x86_64::st5, x86_64::st6, x86_64::st7
510         }
511
512     };
513
514   /* Uses the appropriate lookup table based on the 
515      decoder architecture */
516   class IntelRegTable_access {
517     public:
518         inline MachRegister operator()(Architecture arch,
519                                        intelRegBanks bank,
520                                        int index)
521         {
522             assert(index >= 0 && index < 8);
523     
524             if(arch == Arch_x86_64)
525                 return IntelRegTable64[bank][index];
526             else if(arch == Arch_x86)
527                 return IntelRegTable32[bank][index];
528             else
529                 assert(0);
530             return IntelRegTable32[bank][index];
531         }
532
533   };
534   static IntelRegTable_access IntelRegTable;
535
536     MachRegister InstructionDecoder_x86::makeRegisterID(unsigned int intelReg, unsigned int opType,
537                                         bool isExtendedReg)
538     {
539         MachRegister retVal;
540
541         if(isExtendedReg)
542         {
543             retVal = IntelRegTable(m_Arch,b_amd64ext,intelReg);
544         }
545         /* Promotion to 64-bit only applies to the operand types
546            that are varible (c,v,z). Ignoring c and z because they
547            do the right thing on 32- and 64-bit code.
548         else if(locs->rex_w)
549         {
550             // AMD64 with 64-bit operands
551             retVal = IntelRegTable[b_64bit][intelReg];
552         }
553         */
554         else
555         {
556             switch(opType)
557             {
558                 case op_v:
559                     if(locs->rex_w)
560                         retVal = IntelRegTable(m_Arch,b_64bit,intelReg);
561                     else
562                         retVal = IntelRegTable(m_Arch,b_32bit,intelReg);
563                     break;
564                 case op_b:
565                     if (locs->rex_position == -1) {
566                         retVal = IntelRegTable(m_Arch,b_8bitNoREX,intelReg);
567                     } else {
568                         retVal = IntelRegTable(m_Arch,b_8bitWithREX,intelReg);
569                     }
570                     break;
571                 case op_q:
572                     retVal = IntelRegTable(m_Arch,b_64bit,intelReg);
573                     break;
574                 case op_w:
575                     retVal = IntelRegTable(m_Arch,b_16bit,intelReg);
576                     break;
577                 case op_f:
578                 case op_dbl:
579                     retVal = IntelRegTable(m_Arch,b_fpstack,intelReg);
580                     break;
581                 case op_d:
582                 case op_si:
583                     retVal = IntelRegTable(m_Arch,b_32bit,intelReg);
584                     break;
585                 default:
586                     retVal = IntelRegTable(m_Arch,b_32bit,intelReg);
587                     break;
588             }
589         }
590
591         if (!ia32_is_mode_64()) {
592           if ((retVal.val() & 0x00ffffff) == 0x0001000c)
593             assert(0);
594         }
595
596         return MachRegister((retVal.val() & ~retVal.getArchitecture()) | m_Arch);
597     }
598     
599     Result_Type InstructionDecoder_x86::makeSizeType(unsigned int opType)
600     {
601         switch(opType)
602         {
603             case op_b:
604             case op_c:
605                 return u8;
606             case op_d:
607             case op_ss:
608             case op_allgprs:
609             case op_si:
610                 return u32;
611             case op_w:
612             case op_a:
613                 return u16;
614             case op_q:
615             case op_sd:
616                 return u64;
617             case op_v:
618             case op_lea:
619             case op_z:
620                 if(is32BitMode ^ sizePrefixPresent)
621                 {
622                     return u32;
623                 }
624                 else
625                 {
626                     return u16;
627                 }
628                 break;
629             case op_p:
630                 // book says operand size; arch-x86 says word + word * operand size
631                 if(is32BitMode ^ sizePrefixPresent)
632                 {
633                     return u48;
634                 }
635                 else
636                 {
637                     return u32;
638                 }
639             case op_dq:
640                 return u64;
641             case op_512:
642                 return m512;
643             case op_pi:
644             case op_ps:
645             case op_pd:
646                 return dbl128;
647             case op_s:
648                 return u48;
649             case op_f:
650                 return sp_float;
651             case op_dbl:
652                 return dp_float;
653             case op_14:
654                 return m14;
655             default:
656                 assert(!"Can't happen!");
657                 return u8;
658         }
659     }
660
661
662     bool InstructionDecoder_x86::decodeOneOperand(const InstructionDecoder::buffer& b,
663                                                   const ia32_operand& operand,
664                                                   int & imm_index, /* immediate operand index */
665                                                   const Instruction* insn_to_complete, 
666                                                   bool isRead, bool isWritten)
667     {
668        bool isCFT = false;
669       bool isCall = false;
670       bool isConditional = false;
671       InsnCategory cat = insn_to_complete->getCategory();
672       if(cat == c_BranchInsn || cat == c_CallInsn)
673         {
674           isCFT = true;
675           if(cat == c_CallInsn)
676             {
677               isCall = true;
678             }
679         }
680       if (cat == c_BranchInsn && insn_to_complete->getOperation().getID() != e_jmp) {
681         isConditional = true;
682       }
683
684       unsigned int optype = operand.optype;
685       if (sizePrefixPresent && 
686           ((optype == op_v) ||
687            (optype == op_z))) {
688         optype = op_w;
689       }
690                 switch(operand.admet)
691                 {
692                     case 0:
693                     // No operand
694                     {
695 /*                        fprintf(stderr, "ERROR: Instruction with mismatched operands. Raw bytes: ");
696                         for(unsigned int i = 0; i < decodedInstruction->getSize(); i++) {
697                             fprintf(stderr, "%x ", b.start[i]);
698                         }
699                         fprintf(stderr, "\n");*/
700                         assert(!"Mismatched number of operands--check tables");
701                         return false;
702                     }
703                     case am_A:
704                     {
705                         // am_A only shows up as a far call/jump.  Position 1 should be universally safe.
706                         Expression::Ptr addr(decodeImmediate(optype, b.start + 1));
707                         insn_to_complete->addSuccessor(addr, isCall, false, false, false);
708                     }
709                     break;
710                     case am_C:
711                     {
712                         Expression::Ptr op(makeRegisterExpression(IntelRegTable(m_Arch,b_cr,locs->modrm_reg)));
713                         insn_to_complete->appendOperand(op, isRead, isWritten);
714                     }
715                     break;
716                     case am_D:
717                     {
718                         Expression::Ptr op(makeRegisterExpression(IntelRegTable(m_Arch,b_dr,locs->modrm_reg)));
719                         insn_to_complete->appendOperand(op, isRead, isWritten);
720                     }
721                     break;
722                     case am_E:
723                     // am_M is like am_E, except that mod of 0x03 should never occur (am_M specified memory,
724                     // mod of 0x03 specifies direct register access).
725                     case am_M:
726                     // am_R is the inverse of am_M; it should only have a mod of 3
727                     case am_R:
728                         if(isCFT)
729                         {
730                           insn_to_complete->addSuccessor(makeModRMExpression(b, optype), isCall, true, false, false);
731                         }
732                         else
733                         {
734                           insn_to_complete->appendOperand(makeModRMExpression(b, optype), isRead, isWritten);
735                         }
736                     break;
737                     case am_F:
738                     {
739                         Expression::Ptr op(makeRegisterExpression(x86::flags));
740                         insn_to_complete->appendOperand(op, isRead, isWritten);
741                     }
742                     break;
743                     case am_G:
744                     {
745                         Expression::Ptr op(makeRegisterExpression(makeRegisterID(locs->modrm_reg,
746                                 optype, locs->rex_r)));
747                         insn_to_complete->appendOperand(op, isRead, isWritten);
748                     }
749                     break;
750                     case am_I:
751                         insn_to_complete->appendOperand(decodeImmediate(optype, b.start + 
752                                                                         locs->imm_position[imm_index++]), 
753                                                         isRead, isWritten);
754                         break;
755                     case am_J:
756                     {
757                         Expression::Ptr Offset(decodeImmediate(optype, 
758                                                                b.start + locs->imm_position[imm_index++], 
759                                                                true));
760                         Expression::Ptr EIP(makeRegisterExpression(MachRegister::getPC(m_Arch)));
761                         Expression::Ptr InsnSize(make_shared(singleton_object_pool<Immediate>::construct(Result(u8,
762                             decodedInstruction->getSize()))));
763                         Expression::Ptr postEIP(makeAddExpression(EIP, InsnSize, u32));
764
765                         Expression::Ptr op(makeAddExpression(Offset, postEIP, u32));
766                         insn_to_complete->addSuccessor(op, isCall, false, isConditional, false);
767                         if (isConditional) 
768                           insn_to_complete->addSuccessor(postEIP, false, false, true, true);
769                     }
770                     break;
771                     case am_O:
772                     {
773                     // Address/offset width, which is *not* what's encoded by the optype...
774                     // The deref's width is what's actually encoded here.
775                         int pseudoOpType;
776                         switch(locs->address_size)
777                         {
778                             case 1:
779                                 pseudoOpType = op_b;
780                                 break;
781                             case 2:
782                                 pseudoOpType = op_w;
783                                 break;
784                             case 4:
785                                 pseudoOpType = op_d;
786                                 break;
787                             case 0:
788                                 // closest I can get to "will be address size by default"
789                                 pseudoOpType = op_v;
790                                 break;
791                             default:
792                                 assert(!"Bad address size, should be 0, 1, 2, or 4!");
793                                 pseudoOpType = op_b;
794                                 break;
795                         }
796
797
798                         int offset_position = locs->opcode_position;
799                         if(locs->modrm_position > offset_position && locs->modrm_operand <
800                            (int)(insn_to_complete->m_Operands.size()))
801                         {
802                             offset_position = locs->modrm_position;
803                         }
804                         if(locs->sib_position > offset_position)
805                         {
806                             offset_position = locs->sib_position;
807                         }
808                         offset_position++;
809                         insn_to_complete->appendOperand(makeDereferenceExpression(
810                                 decodeImmediate(pseudoOpType, b.start + offset_position), makeSizeType(optype)), 
811                                                         isRead, isWritten);
812                     }
813                     break;
814                     case am_P:
815                         insn_to_complete->appendOperand(makeRegisterExpression(IntelRegTable(m_Arch,b_mm,locs->modrm_reg)),
816                                 isRead, isWritten);
817                         break;
818                     case am_Q:
819         
820                         switch(locs->modrm_mod)
821                         {
822                             // direct dereference
823                             case 0x00:
824                             case 0x01:
825                             case 0x02:
826                               insn_to_complete->appendOperand(makeModRMExpression(b, optype), isRead, isWritten);
827                                 break;
828                             case 0x03:
829                                 // use of actual register
830                                 insn_to_complete->appendOperand(makeRegisterExpression(IntelRegTable(m_Arch,b_mm,locs->modrm_rm)),
831                                                                isRead, isWritten);
832                                 break;
833                             default:
834                                 assert(!"2-bit value modrm_mod out of range");
835                                 break;
836                         };
837                         break;
838                     case am_S:
839                     // Segment register in modrm reg field.
840                         insn_to_complete->appendOperand(makeRegisterExpression(IntelRegTable(m_Arch,b_segment,locs->modrm_reg)),
841                                 isRead, isWritten);
842                         break;
843                     case am_T:
844                         // test register in modrm reg; should only be tr6/tr7, but we'll decode any of them
845                         // NOTE: this only appears in deprecated opcodes
846                         insn_to_complete->appendOperand(makeRegisterExpression(IntelRegTable(m_Arch,b_tr,locs->modrm_reg)),
847                                                        isRead, isWritten);
848                         break;
849                     case am_V:
850                        
851                         insn_to_complete->appendOperand(makeRegisterExpression(IntelRegTable(m_Arch,
852                                 (locs->rex_r == 1 )? b_xmmhigh : b_xmm,locs->modrm_reg)),
853                                     isRead, isWritten);
854                         break;
855                     case am_W:
856                         switch(locs->modrm_mod)
857                         {
858                             // direct dereference
859                             case 0x00:
860                             case 0x01:
861                             case 0x02:
862                               insn_to_complete->appendOperand(makeModRMExpression(b, makeSizeType(optype)),
863                                                                isRead, isWritten);
864                                 break;
865                             case 0x03:
866                             // use of actual register
867                             {
868                                 insn_to_complete->appendOperand(makeRegisterExpression(IntelRegTable(m_Arch,
869                                         (locs->rex_b == 1) ? b_xmmhigh : b_xmm, locs->modrm_rm)),
870                                         isRead, isWritten);
871                                 break;
872                             }
873                             default:
874                                 assert(!"2-bit value modrm_mod out of range");
875                                 break;
876                         };
877                         break;
878                     case am_X:
879                     {
880                         MachRegister si_reg;
881                         if(m_Arch == Arch_x86)
882                         {
883                                 if(addrSizePrefixPresent)
884                                 {
885                                         si_reg = x86::si;
886                                 } else
887                                 {
888                                         si_reg = x86::esi;
889                                 }
890                         }
891                         else
892                         {
893                                 if(addrSizePrefixPresent)
894                                 {
895                                         si_reg = x86_64::esi;
896                                 } else
897                                 {
898                                         si_reg = x86_64::rsi;
899                                 }
900                         }
901                         Expression::Ptr ds(makeRegisterExpression(m_Arch == Arch_x86 ? x86::ds : x86_64::ds));
902                         Expression::Ptr si(makeRegisterExpression(si_reg));
903                         Expression::Ptr segmentOffset(make_shared(singleton_object_pool<Immediate>::construct(
904                                 Result(u32, 0x10))));
905                         Expression::Ptr ds_segment = makeMultiplyExpression(ds, segmentOffset, u32);
906                         Expression::Ptr ds_si = makeAddExpression(ds_segment, si, u32);
907                         insn_to_complete->appendOperand(makeDereferenceExpression(ds_si, makeSizeType(optype)),
908                                                        isRead, isWritten);
909                     }
910                     break;
911                     case am_Y:
912                     {
913                         MachRegister di_reg;
914                         if(m_Arch == Arch_x86)
915                         {
916                                 if(addrSizePrefixPresent)
917                                 {
918                                         di_reg = x86::di;
919                                 } else
920                                 {
921                                         di_reg = x86::edi;
922                                 }
923                         }
924                         else
925                         {
926                                 if(addrSizePrefixPresent)
927                                 {
928                                         di_reg = x86_64::edi;
929                                 } else
930                                 {
931                                         di_reg = x86_64::rdi;
932                                 }
933                         }
934                         Expression::Ptr es(makeRegisterExpression(m_Arch == Arch_x86 ? x86::es : x86_64::es));
935                         Expression::Ptr di(makeRegisterExpression(di_reg));
936                         Expression::Ptr es_segment = makeMultiplyExpression(es,
937                             make_shared(singleton_object_pool<Immediate>::construct(Result(u32, 0x10))), u32);
938                         Expression::Ptr es_di = makeAddExpression(es_segment, di, u32);
939                         insn_to_complete->appendOperand(makeDereferenceExpression(es_di, makeSizeType(optype)),
940                                                        isRead, isWritten);
941                     }
942                     break;
943                     case am_tworeghack:
944                     {
945                         if(optype == op_edxeax)
946                         {
947                             Expression::Ptr edx(makeRegisterExpression(m_Arch == Arch_x86 ? x86::edx : x86_64::edx));
948                             Expression::Ptr eax(makeRegisterExpression(m_Arch == Arch_x86 ? x86::eax : x86_64::eax));
949                             Expression::Ptr highAddr = makeMultiplyExpression(edx,
950                                     Immediate::makeImmediate(Result(u64, 2^32)), u64);
951                             Expression::Ptr addr = makeAddExpression(highAddr, eax, u64);
952                             Expression::Ptr op = makeDereferenceExpression(addr, u64);
953                             insn_to_complete->appendOperand(op, isRead, isWritten);
954                         }
955                         else if (optype == op_ecxebx)
956                         {
957                             Expression::Ptr ecx(makeRegisterExpression(m_Arch == Arch_x86 ? x86::ecx : x86_64::ecx));
958                             Expression::Ptr ebx(makeRegisterExpression(m_Arch == Arch_x86 ? x86::ebx : x86_64::ebx));
959                             Expression::Ptr highAddr = makeMultiplyExpression(ecx,
960                                     Immediate::makeImmediate(Result(u64, 2^32)), u64);
961                             Expression::Ptr addr = makeAddExpression(highAddr, ebx, u64);
962                             Expression::Ptr op = makeDereferenceExpression(addr, u64);
963                             insn_to_complete->appendOperand(op, isRead, isWritten);
964                         }
965                     }
966                     break;
967                     
968                     case am_reg:
969                     {
970                         MachRegister r(optype);
971                         r = MachRegister(r.val() & ~r.getArchitecture() | m_Arch);
972                         if(locs->rex_b && insn_to_complete->m_Operands.empty())
973                         {
974                             // FP stack registers are not affected by the rex_b bit in AM_REG.
975                             if(r.regClass() != x86::MMX)
976                             {
977                                 r = MachRegister((r.val()) | x86_64::r8.val());
978                             }
979                         }
980                         if(sizePrefixPresent)
981                         {
982                             r = MachRegister((r.val() & ~x86::FULL) | x86::W_REG);
983                         }
984                         Expression::Ptr op(makeRegisterExpression(r));
985                         insn_to_complete->appendOperand(op, isRead, isWritten);
986                     }
987                     break;
988                 case am_stackH:
989                 case am_stackP:
990                 // handled elsewhere
991                     break;
992                 case am_allgprs:
993                 {
994                     if(m_Arch == Arch_x86)
995                     {
996                         insn_to_complete->appendOperand(makeRegisterExpression(x86::eax), isRead, isWritten);
997                         insn_to_complete->appendOperand(makeRegisterExpression(x86::ecx), isRead, isWritten);
998                         insn_to_complete->appendOperand(makeRegisterExpression(x86::edx), isRead, isWritten);
999                         insn_to_complete->appendOperand(makeRegisterExpression(x86::ebx), isRead, isWritten);
1000                         insn_to_complete->appendOperand(makeRegisterExpression(x86::esp), isRead, isWritten);
1001                         insn_to_complete->appendOperand(makeRegisterExpression(x86::ebp), isRead, isWritten);
1002                         insn_to_complete->appendOperand(makeRegisterExpression(x86::esi), isRead, isWritten);
1003                         insn_to_complete->appendOperand(makeRegisterExpression(x86::edi), isRead, isWritten);
1004                     }
1005                     else
1006                     {
1007                         insn_to_complete->appendOperand(makeRegisterExpression(x86_64::eax), isRead, isWritten);
1008                         insn_to_complete->appendOperand(makeRegisterExpression(x86_64::ecx), isRead, isWritten);
1009                         insn_to_complete->appendOperand(makeRegisterExpression(x86_64::edx), isRead, isWritten);
1010                         insn_to_complete->appendOperand(makeRegisterExpression(x86_64::ebx), isRead, isWritten);
1011                         insn_to_complete->appendOperand(makeRegisterExpression(x86_64::esp), isRead, isWritten);
1012                         insn_to_complete->appendOperand(makeRegisterExpression(x86_64::ebp), isRead, isWritten);
1013                         insn_to_complete->appendOperand(makeRegisterExpression(x86_64::esi), isRead, isWritten);
1014                         insn_to_complete->appendOperand(makeRegisterExpression(x86_64::edi), isRead, isWritten);
1015                     }
1016                 }
1017                     break;
1018                 case am_ImplImm: {
1019                   insn_to_complete->appendOperand(Immediate::makeImmediate(Result(makeSizeType(optype), 1)), isRead, isWritten);
1020                   break;
1021                 }
1022
1023                 default:
1024                     printf("decodeOneOperand() called with unknown addressing method %d\n", operand.admet);
1025                         break;
1026                 };
1027                 return true;
1028             }
1029
1030     extern ia32_entry invalid;
1031     
1032     void InstructionDecoder_x86::doIA32Decode(InstructionDecoder::buffer& b)
1033     {
1034         if(decodedInstruction == NULL)
1035         {
1036             decodedInstruction = reinterpret_cast<ia32_instruction*>(malloc(sizeof(ia32_instruction)));
1037             assert(decodedInstruction);
1038         }
1039         if(locs == NULL)
1040         {
1041             locs = reinterpret_cast<ia32_locations*>(malloc(sizeof(ia32_locations)));
1042             assert(locs);
1043         }
1044         locs = new(locs) ia32_locations; //reinit();
1045         assert(locs->sib_position == -1);
1046         decodedInstruction = new (decodedInstruction) ia32_instruction(NULL, NULL, locs);
1047         ia32_decode(IA32_DECODE_PREFIXES, b.start, *decodedInstruction);
1048         sizePrefixPresent = (decodedInstruction->getPrefix()->getOperSzPrefix() == 0x66);
1049         if (decodedInstruction->getPrefix()->rexW()) {
1050            // as per 2.2.1.2 - rex.w overrides 66h
1051            sizePrefixPresent = false;
1052         }
1053         addrSizePrefixPresent = (decodedInstruction->getPrefix()->getAddrSzPrefix() == 0x67);
1054     }
1055     
1056     void InstructionDecoder_x86::decodeOpcode(InstructionDecoder::buffer& b)
1057     {
1058         static ia32_entry invalid = { e_No_Entry, 0, 0, true, { {0,0}, {0,0}, {0,0} }, 0, 0 };
1059         doIA32Decode(b);
1060         if(decodedInstruction->getEntry()) {
1061             m_Operation = make_shared(singleton_object_pool<Operation>::construct(decodedInstruction->getEntry(),
1062                                     decodedInstruction->getPrefix(), locs, m_Arch));
1063         }
1064         else
1065         {
1066                 // Gap parsing can trigger this case; in particular, when it encounters prefixes in an invalid order.
1067                 // Notably, if a REX prefix (0x40-0x48) appears followed by another prefix (0x66, 0x67, etc)
1068                 // we'll reject the instruction as invalid and send it back with no entry.  Since this is a common
1069                 // byte sequence to see in, for example, ASCII strings, we want to simply accept this and move on, not
1070                 // yell at the user.
1071             m_Operation = make_shared(singleton_object_pool<Operation>::construct(&invalid,
1072                                     decodedInstruction->getPrefix(), locs, m_Arch));
1073         }
1074         b.start += decodedInstruction->getSize();
1075     }
1076     
1077       bool InstructionDecoder_x86::decodeOperands(const Instruction* insn_to_complete)
1078     {
1079        int imm_index = 0; // handle multiple immediate operands
1080         if(!decodedInstruction) return false;
1081         unsigned int opsema = decodedInstruction->getEntry()->opsema & 0xFF;
1082         InstructionDecoder::buffer b(insn_to_complete->ptr(), insn_to_complete->size());
1083         
1084         for(unsigned i = 0; i < 3; i++)
1085         {
1086             if(decodedInstruction->getEntry()->operands[i].admet == 0 && 
1087                decodedInstruction->getEntry()->operands[i].optype == 0)
1088                 return true;
1089             if(!decodeOneOperand(b,
1090                                  decodedInstruction->getEntry()->operands[i], 
1091                                  imm_index, 
1092                                  insn_to_complete, 
1093                                  readsOperand(opsema, i),
1094                                  writesOperand(opsema, i)))
1095             {
1096                 return false;
1097             }
1098         }
1099     
1100         return true;
1101     }
1102
1103     
1104       INSTRUCTION_EXPORT Instruction::Ptr InstructionDecoder_x86::decode(InstructionDecoder::buffer& b)
1105     {
1106         return InstructionDecoderImpl::decode(b);
1107     }
1108     void InstructionDecoder_x86::doDelayedDecode(const Instruction* insn_to_complete)
1109     {
1110       InstructionDecoder::buffer b(insn_to_complete->ptr(), insn_to_complete->size());
1111       //insn_to_complete->m_Operands.reserve(4);
1112       doIA32Decode(b);        
1113       decodeOperands(insn_to_complete);
1114     }
1115     
1116 };
1117 };
1118