Merge branch 'master' into devel
[dyninst.git] / instructionAPI / src / InstructionDecoder-x86.C
1 /*
2 * Copyright (c) 1996-2009 Barton P. Miller
3 *
4 * We provide the Paradyn Parallel Performance Tools (below
5 * described as "Paradyn") on an AS IS basis, and do not warrant its
6 * validity or performance.  We reserve the right to update, modify,
7 * or discontinue this software at any time.  We shall have no
8 * obligation to supply such updates or modifications or any other
9 * form of support to you.
10 *
11 * By your use of Paradyn, you understand and agree that we (or any
12 * other person or entity with proprietary rights in Paradyn) are
13 * under no obligation to provide either maintenance services,
14 * update services, notices of latent defects, or correction of
15 * defects for Paradyn.
16 *
17 * This library is free software; you can redistribute it and/or
18 * modify it under the terms of the GNU Lesser General Public
19 * License as published by the Free Software Foundation; either
20 * version 2.1 of the License, or (at your option) any later version.
21 *
22 * This library is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
25 * Lesser General Public License for more details.
26 *
27 * You should have received a copy of the GNU Lesser General Public
28 * License along with this library; if not, write to the Free Software
29 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30 */
31
32 #define INSIDE_INSTRUCTION_API
33
34 #include "InstructionDecoder-x86.h"
35 #include "Expression.h"
36 #include "arch-x86.h"
37 #include "Register.h"
38 #include "Dereference.h"
39 #include "Immediate.h"
40 #include "BinaryFunction.h"
41 #include "common/h/singleton_object_pool.h"
42
43 using namespace std;
44 namespace Dyninst
45 {
46     namespace InstructionAPI
47     {
48     
49         bool readsOperand(unsigned int opsema, unsigned int i)
50         {
51             switch(opsema) {
52                 case s1R2R:
53                     return (i == 0 || i == 1);
54                 case s1R:
55                 case s1RW:
56                     return i == 0;
57                 case s1W:
58                     return false;
59                 case s1W2RW:
60                 case s1W2R:   // second operand read, first operand written (e.g. mov)
61                     return i == 1;
62                 case s1RW2R:  // two operands read, first written (e.g. add)
63                 case s1RW2RW: // e.g. xchg
64                 case s1R2RW:
65                     return i == 0 || i == 1;
66                 case s1W2R3R: // e.g. imul
67                 case s1W2RW3R: // some mul
68                 case s1W2R3RW: // (stack) push & pop
69                     return i == 1 || i == 2;
70                 case s1W2W3R: // e.g. les
71                     return i == 2;
72                 case s1RW2R3R: // shld/shrd
73                 case s1RW2RW3R: // [i]div, cmpxch8b
74                 case s1R2R3R:
75                     return i == 0 || i == 1 || i == 2;
76                     break;
77                 case sNONE:
78                 default:
79                     return false;
80             }
81       
82         }
83       
84         bool writesOperand(unsigned int opsema, unsigned int i)
85         {
86             switch(opsema) {
87                 case s1R2R:
88                 case s1R:
89                     return false;
90                 case s1RW:
91                 case s1W:
92                     case s1W2R:   // second operand read, first operand written (e.g. mov)
93                         case s1RW2R:  // two operands read, first written (e.g. add)
94                             case s1W2R3R: // e.g. imul
95                                 case s1RW2R3R: // shld/shrd
96                                     return i == 0;
97                 case s1R2RW:
98                     return i == 1;
99                 case s1W2RW:
100                     case s1RW2RW: // e.g. xchg
101                         case s1W2RW3R: // some mul
102                             case s1W2W3R: // e.g. les
103                                 case s1RW2RW3R: // [i]div, cmpxch8b
104                                     return i == 0 || i == 1;
105                                     case s1W2R3RW: // (stack) push & pop
106                                         return i == 0 || i == 2;
107                 case sNONE:
108                 default:
109                     return false;
110             }
111         }
112
113
114     
115     INSTRUCTION_EXPORT InstructionDecoder_x86::InstructionDecoder_x86(Architecture a) :
116       InstructionDecoderImpl(a),
117     locs(NULL),
118     decodedInstruction(NULL),
119     is32BitMode(true),
120     sizePrefixPresent(false)
121     {
122     }
123     INSTRUCTION_EXPORT InstructionDecoder_x86::~InstructionDecoder_x86()
124     {
125         if(decodedInstruction) decodedInstruction->~ia32_instruction();
126         free(decodedInstruction);
127         if(locs) locs->~ia32_locations();
128         free(locs);
129
130     }
131     static const unsigned char modrm_use_sib = 4;
132     
133     INSTRUCTION_EXPORT void InstructionDecoder_x86::setMode(bool is64)
134     {
135         ia32_set_mode_64(is64);
136     }
137     
138       Expression::Ptr InstructionDecoder_x86::makeSIBExpression(const InstructionDecoder::buffer& b)
139     {
140         unsigned scale;
141         Register index;
142         Register base;
143         Result_Type registerType = ia32_is_mode_64() ? u32 : u64;
144
145         decode_SIB(locs->sib_byte, scale, index, base);
146
147         Expression::Ptr scaleAST(make_shared(singleton_object_pool<Immediate>::construct(Result(u8, dword_t(scale)))));
148         Expression::Ptr indexAST(make_shared(singleton_object_pool<RegisterAST>::construct(makeRegisterID(index, registerType,
149                                     locs->rex_x))));
150         Expression::Ptr baseAST;
151         if(base == 0x05)
152         {
153             switch(locs->modrm_mod)
154             {
155                 case 0x00:
156                     baseAST = decodeImmediate(op_d, b.start + locs->sib_position + 1);
157                     break;
158                     case 0x01: {
159                         MachRegister reg;
160                         if (locs->rex_b)
161                             reg = x86_64::r13;
162                         else
163                           reg = MachRegister::getFramePointer(m_Arch);
164                         
165                         baseAST = makeAddExpression(make_shared(singleton_object_pool<RegisterAST>::construct(reg)),
166                                                     decodeImmediate(op_b, b.start + locs->sib_position + 1),
167                                                     registerType);
168                         break;
169                     }
170                     case 0x02: {
171                         MachRegister reg;
172                         if (locs->rex_b)
173                             reg = x86_64::r13;
174                         else
175                             reg = MachRegister::getFramePointer(m_Arch);
176
177                         baseAST = makeAddExpression(make_shared(singleton_object_pool<RegisterAST>::construct(reg)), 
178                                                     decodeImmediate(op_d, b.start + locs->sib_position + 1),
179                                                     registerType);
180                         break;
181                     }
182                 case 0x03:
183                 default:
184                     assert(0);
185                     break;
186             };
187         }
188         else
189         {
190             baseAST = make_shared(singleton_object_pool<RegisterAST>::construct(makeRegisterID(base, 
191                                                                                                registerType,
192                                                                                                locs->rex_b)));
193         }
194         if(index == 0x04 && (!(ia32_is_mode_64()) || !(locs->rex_x)))
195         {
196             return baseAST;
197         }
198         return makeAddExpression(baseAST, makeMultiplyExpression(indexAST, scaleAST, registerType), registerType);
199     }
200
201       Expression::Ptr InstructionDecoder_x86::makeModRMExpression(const InstructionDecoder::buffer& b,
202                                                                   unsigned int opType)
203     {
204         unsigned int regType = op_d;
205         if(ia32_is_mode_64())
206         {
207             regType = op_q;
208         }
209         Result_Type aw = ia32_is_mode_64() ? u32 : u64;
210         Expression::Ptr e =
211           makeRegisterExpression(makeRegisterID(locs->modrm_rm, regType, (locs->rex_b == 1)));
212         switch(locs->modrm_mod)
213         {
214             case 0:
215                 if(locs->modrm_rm == modrm_use_sib) {
216                     e = makeSIBExpression(b);
217                 }
218                 if(locs->modrm_rm == 0x5 && !addrSizePrefixPresent)
219                 {
220                     assert(locs->opcode_position > -1);
221                     if(ia32_is_mode_64())
222                     {
223                         e = makeAddExpression(makeRegisterExpression(x86_64::rip),
224                                             getModRMDisplacement(b), aw);
225                     }
226                     else
227                     {
228                         e = getModRMDisplacement(b);
229                     }
230         
231                 }
232                 if(locs->modrm_rm == 0x6 && addrSizePrefixPresent)
233                 {
234                     e = getModRMDisplacement(b);
235                 }
236                 if(opType == op_lea)
237                 {
238                     return e;
239                 }
240                 return makeDereferenceExpression(e, makeSizeType(opType));
241                 assert(0);
242                 break;
243             case 1:
244             case 2:
245             {
246                 if(locs->modrm_rm == modrm_use_sib) {
247                     e = makeSIBExpression(b);
248                 }
249                 Expression::Ptr disp_e = makeAddExpression(e, getModRMDisplacement(b), aw);
250                 if(opType == op_lea)
251                 {
252                     return disp_e;
253                 }
254                 return makeDereferenceExpression(disp_e, makeSizeType(opType));
255             }
256             assert(0);
257             break;
258             case 3:
259                 return makeRegisterExpression(makeRegisterID(locs->modrm_rm, opType, (locs->rex_b == 1)));
260             default:
261                 return Expression::Ptr();
262         
263         };
264         // can't get here, but make the compiler happy...
265         assert(0);
266         return Expression::Ptr();
267     }
268
269     Expression::Ptr InstructionDecoder_x86::decodeImmediate(unsigned int opType, const unsigned char* immStart, 
270                                                             bool isSigned)
271     {
272         switch(opType)
273         {
274             case op_b:
275                 return Immediate::makeImmediate(Result(isSigned ? s8 : u8 ,*(const byte_t*)(immStart)));
276                 break;
277             case op_d:
278                 return Immediate::makeImmediate(Result(isSigned ? s32 : u32,*(const dword_t*)(immStart)));
279             case op_w:
280                 return Immediate::makeImmediate(Result(isSigned ? s16 : u16,*(const word_t*)(immStart)));
281                 break;
282             case op_q:
283                 return Immediate::makeImmediate(Result(isSigned ? s64 : u64,*(const int64_t*)(immStart)));
284                 break;
285             case op_v:
286             case op_z:
287         // 32 bit mode & no prefix, or 16 bit mode & prefix => 32 bit
288         // 16 bit mode, no prefix or 32 bit mode, prefix => 16 bit
289                 if(!sizePrefixPresent)
290                 {
291                     return Immediate::makeImmediate(Result(isSigned ? s32 : u32,*(const dword_t*)(immStart)));
292                 }
293                 else
294                 {
295                     return Immediate::makeImmediate(Result(isSigned ? s16 : u16,*(const word_t*)(immStart)));
296                 }
297         
298                 break;
299             case op_p:
300         // 32 bit mode & no prefix, or 16 bit mode & prefix => 48 bit
301         // 16 bit mode, no prefix or 32 bit mode, prefix => 32 bit
302                 if(!sizePrefixPresent)
303                 {
304                     return Immediate::makeImmediate(Result(isSigned ? s48 : u48,*(const int64_t*)(immStart)));
305                 }
306                 else
307                 {
308                     return Immediate::makeImmediate(Result(isSigned ? s32 : u32,*(const dword_t*)(immStart)));
309                 }
310         
311                 break;
312             case op_a:
313             case op_dq:
314             case op_pd:
315             case op_ps:
316             case op_s:
317             case op_si:
318             case op_lea:
319             case op_allgprs:
320             case op_512:
321             case op_c:
322                 assert(!"Can't happen: opType unexpected for valid ways to decode an immediate");
323                 return Expression::Ptr();
324             default:
325                 assert(!"Can't happen: opType out of range");
326                 return Expression::Ptr();
327         }
328     }
329     
330     Expression::Ptr InstructionDecoder_x86::getModRMDisplacement(const InstructionDecoder::buffer& b)
331     {
332         int disp_pos;
333
334         if(locs->sib_position != -1)
335         {
336             disp_pos = locs->sib_position + 1;
337         }
338         else
339         {
340             disp_pos = locs->modrm_position + 1;
341         }
342         switch(locs->modrm_mod)
343         {
344             case 1:
345                 return make_shared(singleton_object_pool<Immediate>::construct(Result(s8, (*(const byte_t*)(b.start +
346                         disp_pos)))));
347                 break;
348             case 2:
349                 if(sizePrefixPresent)
350                 {
351                     return make_shared(singleton_object_pool<Immediate>::construct(Result(s16, *((const word_t*)(b.start +
352                             disp_pos)))));
353                 }
354                 else
355                 {
356                     return make_shared(singleton_object_pool<Immediate>::construct(Result(s32, *((const dword_t*)(b.start +
357                             disp_pos)))));
358                 }
359                 break;
360             case 0:
361                 // In 16-bit mode, the word displacement is modrm r/m 6
362                 if(sizePrefixPresent)
363                 {
364                     if(locs->modrm_rm == 6)
365                     {
366                         return make_shared(singleton_object_pool<Immediate>::construct(Result(s16,
367                                            *((const dword_t*)(b.start + disp_pos)))));
368                     }
369                     else
370                     {
371                         assert(b.start + disp_pos + 1 <= b.end);
372                         return make_shared(singleton_object_pool<Immediate>::construct(Result(s8, 0)));
373                     }
374                     break;
375                 }
376                 // ...and in 32-bit mode, the dword displacement is modrm r/m 5
377                 else
378                 {
379                     if(locs->modrm_rm == 5)
380                     {
381                         assert(b.start + disp_pos + 4 <= b.end);
382                         return make_shared(singleton_object_pool<Immediate>::construct(Result(s32,
383                                            *((const dword_t*)(b.start + disp_pos)))));
384                     }
385                     else
386                     {
387                         assert(b.start + disp_pos + 1 <= b.end);
388                         return make_shared(singleton_object_pool<Immediate>::construct(Result(s8, 0)));
389                     }
390                     break;
391                 }
392             default:
393                 assert(b.start + disp_pos + 1 <= b.end);
394                 return make_shared(singleton_object_pool<Immediate>::construct(Result(s8, 0)));
395                 break;
396         }
397     }
398
399     enum intelRegBanks
400     {
401         b_8bitNoREX = 0,
402         b_16bit,
403         b_32bit,
404         b_segment,
405         b_64bit,
406         b_xmm,
407         b_mm,
408         b_cr,
409         b_dr,
410         b_tr,
411         b_amd64ext,
412         b_8bitWithREX,
413         b_fpstack
414     };
415     using namespace x86;
416     
417     static MachRegister IntelRegTable[][8] = {
418         {
419             al, cl, dl, bl, ah, ch, dh, bh
420         },
421         {
422             ax, cx, dx, bx, sp, bp, si, di
423         },
424         {
425             eax, ecx, edx, ebx, esp, ebp, esi, edi
426         },
427         {
428             es, cs, ss, ds, fs, gs, InvalidReg, InvalidReg
429         },
430         {
431             x86_64::rax, x86_64::rcx, x86_64::rdx, x86_64::rbx, x86_64::rsp, x86_64::rbp, x86_64::rsi, x86_64::rdi
432         },
433         {
434             xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
435         },
436         {
437             mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7
438         },
439         {
440             cr0, cr1, cr2, cr3, cr4, cr5, cr6, cr7
441         },
442         {
443             dr0, dr1, dr2, dr3, dr4, dr5, dr6, dr7
444         },
445         {
446             tr0, tr1, tr2, tr3, tr4, tr5, tr6, tr7
447         },
448         {
449             x86_64::r8, x86_64::r9, x86_64::r10, x86_64::r11, x86_64::r12, x86_64::r13, x86_64::r14, x86_64::r15
450         },
451         {
452             x86_64::al, x86_64::cl, x86_64::dl, x86_64::bl, x86_64::spl, x86_64::bpl, x86_64::sil, x86_64::dil
453         },
454         {
455             st0, st1, st2, st3, st4, st5, st6, st7
456         }
457
458     };
459
460     MachRegister InstructionDecoder_x86::makeRegisterID(unsigned int intelReg, unsigned int opType,
461                                         bool isExtendedReg)
462     {
463         MachRegister retVal;
464         if(isExtendedReg)
465         {
466             retVal = IntelRegTable[b_amd64ext][intelReg];
467         }
468         else if(locs->rex_w)
469         {
470             // AMD64 with 64-bit operands
471             retVal = IntelRegTable[b_64bit][intelReg];
472         }
473         else
474         {
475             switch(opType)
476             {
477                 case op_b:
478                     if (locs->rex_position == -1) {
479                         retVal = IntelRegTable[b_8bitNoREX][intelReg];
480                     } else {
481                         retVal = IntelRegTable[b_8bitWithREX][intelReg];
482                     }
483                     break;
484                 case op_q:
485                     retVal = IntelRegTable[b_64bit][intelReg];
486                     break;
487                 case op_w:
488                   retVal = IntelRegTable[b_16bit][intelReg];
489                   break;
490                 case op_f:
491                 case op_dbl:
492                     retVal = IntelRegTable[b_fpstack][intelReg];
493                     break;
494                 case op_d:
495                 case op_si:
496                 case op_v:
497                     retVal = IntelRegTable[b_32bit][intelReg];
498                     break;
499                 default:
500                     retVal = IntelRegTable[b_32bit][intelReg];
501                     break;
502                   
503             }
504         }
505         if (!ia32_is_mode_64()) {
506           if ((retVal.val() & 0x00ffffff) == 0x0001000c)
507             assert(0);
508         }
509
510         return MachRegister((retVal.val() & ~retVal.getArchitecture()) | m_Arch);
511     }
512     
513     Result_Type InstructionDecoder_x86::makeSizeType(unsigned int opType)
514     {
515         switch(opType)
516         {
517             case op_b:
518             case op_c:
519                 return u8;
520             case op_d:
521             case op_ss:
522             case op_allgprs:
523             case op_si:
524                 return u32;
525             case op_w:
526             case op_a:
527                 return u16;
528             case op_q:
529             case op_sd:
530                 return u64;
531             case op_v:
532             case op_lea:
533             case op_z:
534                 if(is32BitMode ^ sizePrefixPresent)
535                 {
536                     return u32;
537                 }
538                 else
539                 {
540                     return u16;
541                 }
542                 break;
543             case op_p:
544                 // book says operand size; arch-x86 says word + word * operand size
545                 if(is32BitMode ^ sizePrefixPresent)
546                 {
547                     return u48;
548                 }
549                 else
550                 {
551                     return u32;
552                 }
553             case op_dq:
554                 return u64;
555             case op_512:
556                 return m512;
557             case op_pi:
558             case op_ps:
559             case op_pd:
560                 return dbl128;
561             case op_s:
562                 return u48;
563             case op_f:
564                 return sp_float;
565             case op_dbl:
566                 return dp_float;
567             case op_14:
568                 return m14;
569             default:
570                 assert(!"Can't happen!");
571                 return u8;
572         }
573     }
574
575
576     bool InstructionDecoder_x86::decodeOneOperand(const InstructionDecoder::buffer& b,
577                                                   const ia32_operand& operand,
578                                                   int & imm_index, /* immediate operand index */
579                                                   const Instruction* insn_to_complete, 
580                                                   bool isRead, bool isWritten)
581     {
582       unsigned int regType = op_d;
583       if(ia32_is_mode_64())
584         {
585           regType = op_q;
586         }
587       bool isCFT = false;
588       bool isCall = false;
589       bool isConditional = false;
590       InsnCategory cat = insn_to_complete->getCategory();
591       if(cat == c_BranchInsn || cat == c_CallInsn)
592         {
593           isCFT = true;
594           if(cat == c_CallInsn)
595             {
596               isCall = true;
597             }
598         }
599       if (cat == c_BranchInsn && insn_to_complete->getOperation().getID() != e_jmp) {
600         isConditional = true;
601       }
602
603       unsigned int optype = operand.optype;
604       if (sizePrefixPresent && 
605           ((optype == op_v) ||
606            (optype == op_z))) {
607         optype = op_w;
608       }
609                 switch(operand.admet)
610                 {
611                     case 0:
612                     // No operand
613                     {
614 /*                        fprintf(stderr, "ERROR: Instruction with mismatched operands. Raw bytes: ");
615                         for(unsigned int i = 0; i < decodedInstruction->getSize(); i++) {
616                             fprintf(stderr, "%x ", b.start[i]);
617                         }
618                         fprintf(stderr, "\n");*/
619                         assert(!"Mismatched number of operands--check tables");
620                         return false;
621                     }
622                     case am_A:
623                     {
624                         // am_A only shows up as a far call/jump.  Position 1 should be universally safe.
625                         Expression::Ptr addr(decodeImmediate(optype, b.start + 1));
626                         Expression::Ptr op(makeDereferenceExpression(addr, makeSizeType(optype)));
627                         insn_to_complete->addSuccessor(op, isCall, false, false, false);
628                     }
629                     break;
630                     case am_C:
631                     {
632                         Expression::Ptr op(makeRegisterExpression(IntelRegTable[b_cr][locs->modrm_reg]));
633                         insn_to_complete->appendOperand(op, isRead, isWritten);
634                     }
635                     break;
636                     case am_D:
637                     {
638                         Expression::Ptr op(makeRegisterExpression(IntelRegTable[b_dr][locs->modrm_reg]));
639                         insn_to_complete->appendOperand(op, isRead, isWritten);
640                     }
641                     break;
642                     case am_E:
643                     // am_M is like am_E, except that mod of 0x03 should never occur (am_M specified memory,
644                     // mod of 0x03 specifies direct register access).
645                     case am_M:
646                     // am_R is the inverse of am_M; it should only have a mod of 3
647                     case am_R:
648                         if(isCFT)
649                         {
650                           insn_to_complete->addSuccessor(makeModRMExpression(b, optype), isCall, true, false, false);
651                         }
652                         else
653                         {
654                           insn_to_complete->appendOperand(makeModRMExpression(b, optype), isRead, isWritten);
655                         }
656                     break;
657                     case am_F:
658                     {
659                         Expression::Ptr op(makeRegisterExpression(flags));
660                         insn_to_complete->appendOperand(op, isRead, isWritten);
661                     }
662                     break;
663                     case am_G:
664                     {
665                         Expression::Ptr op(makeRegisterExpression(makeRegisterID(locs->modrm_reg,
666                                 optype, locs->rex_r)));
667                         insn_to_complete->appendOperand(op, isRead, isWritten);
668                     }
669                     break;
670                     case am_I:
671                         insn_to_complete->appendOperand(decodeImmediate(optype, b.start + 
672                                                                         locs->imm_position[imm_index++]), 
673                                                         isRead, isWritten);
674                         break;
675                     case am_J:
676                     {
677                         Expression::Ptr Offset(decodeImmediate(optype, 
678                                                                b.start + locs->imm_position[imm_index++], 
679                                                                true));
680                         Expression::Ptr EIP(makeRegisterExpression(MachRegister::getPC(m_Arch)));
681                         Expression::Ptr InsnSize(make_shared(singleton_object_pool<Immediate>::construct(Result(u8,
682                             decodedInstruction->getSize()))));
683                         Expression::Ptr postEIP(makeAddExpression(EIP, InsnSize, u32));
684
685                         Expression::Ptr op(makeAddExpression(Offset, postEIP, u32));
686                         insn_to_complete->addSuccessor(op, isCall, false, isConditional, false);
687                         if (isConditional) 
688                           insn_to_complete->addSuccessor(postEIP, false, false, true, true);
689                     }
690                     break;
691                     case am_O:
692                     {
693                     // Address/offset width, which is *not* what's encoded by the optype...
694                     // The deref's width is what's actually encoded here.
695                         int pseudoOpType;
696                         switch(locs->address_size)
697                         {
698                             case 1:
699                                 pseudoOpType = op_b;
700                                 break;
701                             case 2:
702                                 pseudoOpType = op_w;
703                                 break;
704                             case 4:
705                                 pseudoOpType = op_d;
706                                 break;
707                             case 0:
708                                 // closest I can get to "will be address size by default"
709                                 pseudoOpType = op_v;
710                                 break;
711                             default:
712                                 assert(!"Bad address size, should be 0, 1, 2, or 4!");
713                                 pseudoOpType = op_b;
714                                 break;
715                         }
716
717
718                         int offset_position = locs->opcode_position;
719                         if(locs->modrm_position > offset_position && locs->modrm_operand <
720                            (int)(insn_to_complete->m_Operands.size()))
721                         {
722                             offset_position = locs->modrm_position;
723                         }
724                         if(locs->sib_position > offset_position)
725                         {
726                             offset_position = locs->sib_position;
727                         }
728                         offset_position++;
729                         insn_to_complete->appendOperand(makeDereferenceExpression(
730                                 decodeImmediate(pseudoOpType, b.start + offset_position), makeSizeType(optype)), 
731                                                         isRead, isWritten);
732                     }
733                     break;
734                     case am_P:
735                         insn_to_complete->appendOperand(makeRegisterExpression(IntelRegTable[b_mm][locs->modrm_reg]),
736                                 isRead, isWritten);
737                         break;
738                     case am_Q:
739         
740                         switch(locs->modrm_mod)
741                         {
742                             // direct dereference
743                             case 0x00:
744                             case 0x01:
745                             case 0x02:
746                               insn_to_complete->appendOperand(makeModRMExpression(b, optype), isRead, isWritten);
747                                 break;
748                             case 0x03:
749                                 // use of actual register
750                                 insn_to_complete->appendOperand(makeRegisterExpression(IntelRegTable[b_mm][locs->modrm_rm]),
751                                                                isRead, isWritten);
752                                 break;
753                             default:
754                                 assert(!"2-bit value modrm_mod out of range");
755                                 break;
756                         };
757                         break;
758                     case am_S:
759                     // Segment register in modrm reg field.
760                         insn_to_complete->appendOperand(makeRegisterExpression(IntelRegTable[b_segment][locs->modrm_reg]),
761                                 isRead, isWritten);
762                         break;
763                     case am_T:
764                         // test register in modrm reg; should only be tr6/tr7, but we'll decode any of them
765                         // NOTE: this only appears in deprecated opcodes
766                         insn_to_complete->appendOperand(makeRegisterExpression(IntelRegTable[b_tr][locs->modrm_reg]),
767                                                        isRead, isWritten);
768                         break;
769                     case am_V:
770                         insn_to_complete->appendOperand(makeRegisterExpression(IntelRegTable[b_xmm][locs->modrm_reg]),
771                                                        isRead, isWritten);
772                         break;
773                     case am_W:
774                         switch(locs->modrm_mod)
775                         {
776                             // direct dereference
777                             case 0x00:
778                             case 0x01:
779                             case 0x02:
780                               insn_to_complete->appendOperand(makeModRMExpression(b, makeSizeType(optype)),
781                                                                isRead, isWritten);
782                                 break;
783                             case 0x03:
784                             // use of actual register
785                             {
786                                 insn_to_complete->appendOperand(makeRegisterExpression(IntelRegTable[b_xmm][locs->modrm_rm]),
787                                                                isRead, isWritten);
788                                 break;
789                             }
790                             default:
791                                 assert(!"2-bit value modrm_mod out of range");
792                                 break;
793                         };
794                         break;
795                     case am_X:
796                     {
797                         MachRegister si_reg;
798                         if(m_Arch == Arch_x86)
799                         {
800                                 if(addrSizePrefixPresent)
801                                 {
802                                         si_reg = x86::si;
803                                 } else
804                                 {
805                                         si_reg = x86::esi;
806                                 }
807                         }
808                         else
809                         {
810                                 if(addrSizePrefixPresent)
811                                 {
812                                         si_reg = x86_64::esi;
813                                 } else
814                                 {
815                                         si_reg = x86_64::rsi;
816                                 }
817                         }
818                         Expression::Ptr ds(makeRegisterExpression(m_Arch == Arch_x86 ? x86::ds : x86_64::ds));
819                         Expression::Ptr si(makeRegisterExpression(si_reg));
820                         Expression::Ptr segmentOffset(make_shared(singleton_object_pool<Immediate>::construct(
821                                 Result(u32, 0x10))));
822                         Expression::Ptr ds_segment = makeMultiplyExpression(ds, segmentOffset, u32);
823                         Expression::Ptr ds_si = makeAddExpression(ds_segment, si, u32);
824                         insn_to_complete->appendOperand(makeDereferenceExpression(ds_si, makeSizeType(optype)),
825                                                        isRead, isWritten);
826                     }
827                     break;
828                     case am_Y:
829                     {
830                         MachRegister di_reg;
831                         if(m_Arch == Arch_x86)
832                         {
833                                 if(addrSizePrefixPresent)
834                                 {
835                                         di_reg = x86::di;
836                                 } else
837                                 {
838                                         di_reg = x86::edi;
839                                 }
840                         }
841                         else
842                         {
843                                 if(addrSizePrefixPresent)
844                                 {
845                                         di_reg = x86_64::edi;
846                                 } else
847                                 {
848                                         di_reg = x86_64::rdi;
849                                 }
850                         }
851                         Expression::Ptr es(makeRegisterExpression(m_Arch == Arch_x86 ? x86::es : x86_64::es));
852                         Expression::Ptr di(makeRegisterExpression(di_reg));
853                         Expression::Ptr es_segment = makeMultiplyExpression(es,
854                             make_shared(singleton_object_pool<Immediate>::construct(Result(u32, 0x10))), u32);
855                         Expression::Ptr es_di = makeAddExpression(es_segment, di, u32);
856                         insn_to_complete->appendOperand(makeDereferenceExpression(es_di, makeSizeType(optype)),
857                                                        isRead, isWritten);
858                     }
859                     break;
860                     case am_tworeghack:
861                     {
862                         if(optype == op_edxeax)
863                         {
864                             Expression::Ptr edx(makeRegisterExpression(m_Arch == Arch_x86 ? x86::edx : x86_64::edx));
865                             Expression::Ptr eax(makeRegisterExpression(m_Arch == Arch_x86 ? x86::eax : x86_64::eax));
866                             Expression::Ptr highAddr = makeMultiplyExpression(edx,
867                                     Immediate::makeImmediate(Result(u64, 2^32)), u64);
868                             Expression::Ptr addr = makeAddExpression(highAddr, eax, u64);
869                             Expression::Ptr op = makeDereferenceExpression(addr, u64);
870                             insn_to_complete->appendOperand(op, isRead, isWritten);
871                         }
872                         else if (optype == op_ecxebx)
873                         {
874                             Expression::Ptr ecx(makeRegisterExpression(m_Arch == Arch_x86 ? x86::ecx : x86_64::ecx));
875                             Expression::Ptr ebx(makeRegisterExpression(m_Arch == Arch_x86 ? x86::ebx : x86_64::ebx));
876                             Expression::Ptr highAddr = makeMultiplyExpression(ecx,
877                                     Immediate::makeImmediate(Result(u64, 2^32)), u64);
878                             Expression::Ptr addr = makeAddExpression(highAddr, ebx, u64);
879                             Expression::Ptr op = makeDereferenceExpression(addr, u64);
880                             insn_to_complete->appendOperand(op, isRead, isWritten);
881                         }
882                     }
883                     break;
884                     
885                     case am_reg:
886                     {
887                         MachRegister r(optype);
888                         r = MachRegister(r.val() & ~r.getArchitecture() | m_Arch);
889                         if(locs->rex_b)
890                         {
891                             r = MachRegister((r.val()) | x86_64::r8.val());
892                         }
893                         if(sizePrefixPresent)
894                         {
895                             r = MachRegister((r.val() & ~x86::FULL) | x86::W_REG);
896                         }
897                         Expression::Ptr op(makeRegisterExpression(r));
898                         insn_to_complete->appendOperand(op, isRead, isWritten);
899                     }
900                     break;
901                 case am_stackH:
902                 case am_stackP:
903                 // handled elsewhere
904                     break;
905                 case am_allgprs:
906                 {
907                     if(m_Arch == Arch_x86)
908                     {
909                         insn_to_complete->appendOperand(makeRegisterExpression(x86::eax), isRead, isWritten);
910                         insn_to_complete->appendOperand(makeRegisterExpression(x86::ecx), isRead, isWritten);
911                         insn_to_complete->appendOperand(makeRegisterExpression(x86::edx), isRead, isWritten);
912                         insn_to_complete->appendOperand(makeRegisterExpression(x86::ebx), isRead, isWritten);
913                         insn_to_complete->appendOperand(makeRegisterExpression(x86::esp), isRead, isWritten);
914                         insn_to_complete->appendOperand(makeRegisterExpression(x86::ebp), isRead, isWritten);
915                         insn_to_complete->appendOperand(makeRegisterExpression(x86::esi), isRead, isWritten);
916                         insn_to_complete->appendOperand(makeRegisterExpression(x86::edi), isRead, isWritten);
917                     }
918                     else
919                     {
920                         insn_to_complete->appendOperand(makeRegisterExpression(x86_64::eax), isRead, isWritten);
921                         insn_to_complete->appendOperand(makeRegisterExpression(x86_64::ecx), isRead, isWritten);
922                         insn_to_complete->appendOperand(makeRegisterExpression(x86_64::edx), isRead, isWritten);
923                         insn_to_complete->appendOperand(makeRegisterExpression(x86_64::ebx), isRead, isWritten);
924                         insn_to_complete->appendOperand(makeRegisterExpression(x86_64::esp), isRead, isWritten);
925                         insn_to_complete->appendOperand(makeRegisterExpression(x86_64::ebp), isRead, isWritten);
926                         insn_to_complete->appendOperand(makeRegisterExpression(x86_64::esi), isRead, isWritten);
927                         insn_to_complete->appendOperand(makeRegisterExpression(x86_64::edi), isRead, isWritten);
928                     }
929                 }
930                     break;
931                 case am_ImplImm: {
932                   insn_to_complete->appendOperand(Immediate::makeImmediate(Result(makeSizeType(optype), 1)), isRead, isWritten);
933                   break;
934                 }
935
936                 default:
937                     printf("decodeOneOperand() called with unknown addressing method %d\n", operand.admet);
938                         break;
939                 };
940                 return true;
941             }
942
943     extern ia32_entry invalid;
944     
945     void InstructionDecoder_x86::doIA32Decode(InstructionDecoder::buffer& b)
946     {
947         if(decodedInstruction == NULL)
948         {
949             decodedInstruction = reinterpret_cast<ia32_instruction*>(malloc(sizeof(ia32_instruction)));
950             assert(decodedInstruction);
951         }
952         if(locs == NULL)
953         {
954             locs = reinterpret_cast<ia32_locations*>(malloc(sizeof(ia32_locations)));
955             assert(locs);
956         }
957         locs = new(locs) ia32_locations; //reinit();
958         assert(locs->sib_position == -1);
959         decodedInstruction = new (decodedInstruction) ia32_instruction(NULL, NULL, locs);
960         ia32_decode(IA32_DECODE_PREFIXES, b.start, *decodedInstruction);
961         sizePrefixPresent = (decodedInstruction->getPrefix()->getOperSzPrefix() == 0x66);
962         addrSizePrefixPresent = (decodedInstruction->getPrefix()->getAddrSzPrefix() == 0x67);
963     }
964     
965     void InstructionDecoder_x86::decodeOpcode(InstructionDecoder::buffer& b)
966     {
967         static ia32_entry invalid = { e_No_Entry, 0, 0, true, { {0,0}, {0,0}, {0,0} }, 0, 0 };
968         doIA32Decode(b);
969         if(decodedInstruction->getEntry()) {
970             m_Operation = make_shared(singleton_object_pool<Operation>::construct(decodedInstruction->getEntry(),
971                                     decodedInstruction->getPrefix(), locs, m_Arch));
972         }
973         else
974         {
975                 // Gap parsing can trigger this case; in particular, when it encounters prefixes in an invalid order.
976                 // Notably, if a REX prefix (0x40-0x48) appears followed by another prefix (0x66, 0x67, etc)
977                 // we'll reject the instruction as invalid and send it back with no entry.  Since this is a common
978                 // byte sequence to see in, for example, ASCII strings, we want to simply accept this and move on, not
979                 // yell at the user.
980             m_Operation = make_shared(singleton_object_pool<Operation>::construct(&invalid,
981                                     decodedInstruction->getPrefix(), locs, m_Arch));
982         }
983         b.start += decodedInstruction->getSize();
984     }
985     
986       bool InstructionDecoder_x86::decodeOperands(const Instruction* insn_to_complete)
987     {
988         int imm_index = 0; // handle multiple immediate operands
989         if(!decodedInstruction) return false;
990         unsigned int opsema = decodedInstruction->getEntry()->opsema & 0xFF;
991         InstructionDecoder::buffer b(insn_to_complete->ptr(), insn_to_complete->size());
992         
993         for(unsigned i = 0; i < 3; i++)
994         {
995             if(decodedInstruction->getEntry()->operands[i].admet == 0 && 
996                decodedInstruction->getEntry()->operands[i].optype == 0)
997                 return true;
998             if(!decodeOneOperand(b,
999                                  decodedInstruction->getEntry()->operands[i], 
1000                                  imm_index, 
1001                                  insn_to_complete, 
1002                                  readsOperand(opsema, i),
1003                                  writesOperand(opsema, i)))
1004             {
1005                 return false;
1006             }
1007         }
1008     
1009         return true;
1010     }
1011
1012     
1013       INSTRUCTION_EXPORT Instruction::Ptr InstructionDecoder_x86::decode(InstructionDecoder::buffer& b)
1014     {
1015         return InstructionDecoderImpl::decode(b);
1016     }
1017     void InstructionDecoder_x86::doDelayedDecode(const Instruction* insn_to_complete)
1018     {
1019       InstructionDecoder::buffer b(insn_to_complete->ptr(), insn_to_complete->size());
1020       //insn_to_complete->m_Operands.reserve(4);
1021       doIA32Decode(b);        
1022       decodeOperands(insn_to_complete);
1023     }
1024     
1025 };
1026 };
1027