* Bugfix: InstrucIter no longer used for int_function iteration.
[dyninst.git] / dyninstAPI / src / arch-ia64.C
1 /* -*- Mode: C; indent-tabs-mode: true; tab-width: 4 -*- */
2
3 /*
4  * Copyright (c) 1996-2004 Barton P. Miller
5  * 
6  * We provide the Paradyn Parallel Performance Tools (below
7  * described as "Paradyn") on an AS IS basis, and do not warrant its
8  * validity or performance.  We reserve the right to update, modify,
9  * or discontinue this software at any time.  We shall have no
10  * obligation to supply such updates or modifications or any other
11  * form of support to you.
12  * 
13  * This license is for research uses.  For such uses, there is no
14  * charge. We define "research use" to mean you may freely use it
15  * inside your organization for whatever purposes you see fit. But you
16  * may not re-distribute Paradyn or parts of Paradyn, in any form
17  * source or binary (including derivatives), electronic or otherwise,
18  * to any other organization or entity without our permission.
19  * 
20  * (for other uses, please contact us at paradyn@cs.wisc.edu)
21  * 
22  * All warranties, including without limitation, any warranty of
23  * merchantability or fitness for a particular purpose, are hereby
24  * excluded.
25  * 
26  * By your use of Paradyn, you understand and agree that we (or any
27  * other person or entity with proprietary rights in Paradyn) are
28  * under no obligation to provide either maintenance services,
29  * update services, notices of latent defects, or correction of
30  * defects for Paradyn.
31  * 
32  * Even if advised of the possibility of such damages, under no
33  * circumstances shall we (or any other person or entity with
34  * proprietary rights in the software licensed hereunder) be liable
35  * to you or any third party for direct, indirect, or consequential
36  * damages of any character regardless of type of action, including,
37  * without limitation, loss of profits, loss of use, loss of good
38  * will, or computer failure or malfunction.  You agree to indemnify
39  * us (and any other person or entity with proprietary rights in the
40  * software licensed hereunder) for any and all liability it may
41  * incur to third parties resulting from your use of Paradyn.
42  */
43
44 // $Id: arch-ia64.C,v 1.58 2007/12/11 20:22:06 bill Exp $
45 // ia64 instruction decoder
46
47 #include <assert.h>
48 #include <stdio.h>
49 #include <stdlib.h>
50 #include "dyninstAPI/src/arch-ia64.h"
51 #include "util.h"
52 #include "registerSpace.h"
53 #include "debug.h"
54
55 #define ALIGN_RIGHT_SHIFT               23
56 #define TEMPLATE_MASK                   0x000000000000001F      /* bits 00 - 04 */
57 #define INSTRUCTION0_MASK               0x00003FFFFFFFFFE0      /* bits 05 - 45 */
58 #define INSTRUCTION1_LOW_MASK   0xFFFFC00000000000      /* bits 45 - 63 */
59 #define INSTRUCTION1_HIGH_MASK  0x00000000007FFFFF      /* bits 00 - 20 */
60 #define INSTRUCTION2_MASK               0xFFFFFFFFFF800000      /* bits 21 - 63 */
61 #define SYSCALL_IMM                             0x100000
62
63 instruction::unitType INSTRUCTION_TYPE_ARRAY[(0x20 + 1) * 3] = { 
64         instruction::M, instruction::I, instruction::I,
65         instruction::M, instruction::I, instruction::I,
66         instruction::M, instruction::I, instruction::I,
67         instruction::M, instruction::I, instruction::I,
68         instruction::M, instruction::L, instruction::X,
69         instruction::M, instruction::L, instruction::X,
70         instruction::RESERVED, instruction::RESERVED, instruction::RESERVED,
71         instruction::RESERVED, instruction::RESERVED, instruction::RESERVED,
72         instruction::M, instruction::M, instruction::I,
73         instruction::M, instruction::M, instruction::I,
74         instruction::M, instruction::M, instruction::I,
75         instruction::M, instruction::M, instruction::I,
76         instruction::M, instruction::F, instruction::I,
77         instruction::M, instruction::F, instruction::I,
78         instruction::M, instruction::M, instruction::F,
79         instruction::M, instruction::M, instruction::F,
80
81         instruction::M, instruction::I, instruction::B,
82         instruction::M, instruction::I, instruction::B,
83         instruction::M, instruction::B, instruction::B,
84         instruction::M, instruction::B, instruction::B,
85         instruction::RESERVED, instruction::RESERVED, instruction::RESERVED,
86         instruction::RESERVED, instruction::RESERVED, instruction::RESERVED,
87         instruction::B, instruction::B, instruction::B,
88         instruction::B, instruction::B, instruction::B,
89         instruction::M, instruction::M, instruction::B,
90         instruction::M, instruction::M, instruction::B,
91         instruction::RESERVED, instruction::RESERVED, instruction::RESERVED,
92         instruction::RESERVED, instruction::RESERVED, instruction::RESERVED,
93         instruction::M, instruction::F, instruction::B,
94         instruction::M, instruction::F, instruction::B,
95         instruction::RESERVED, instruction::RESERVED, instruction::RESERVED,
96         instruction::RESERVED, instruction::RESERVED, instruction::RESERVED,
97
98         instruction::RESERVED, instruction::RESERVED, instruction::RESERVED,
99         };
100
101 /* NOTE: for the IA64_bundle constructor to work, the individual
102         instruction 'halves' should left-aligned as if they were independent instructions. */
103 instruction_x::instruction_x( uint64_t lowHalf, uint64_t highHalf, uint8_t templ ) {
104         insn_ = lowHalf;
105         insn_x_ = highHalf;
106         templateID = templ;
107 } /* end IA64_Instruction_x() */
108
109 instruction::instruction( uint64_t insn, uint8_t templ, uint8_t slotN ) {
110         insn_ = insn;
111         templateID = templ;
112         slotNumber = slotN;
113 } /* end IA64_Instruction() */
114
115 const void * instruction::ptr() const { 
116         return & insn_;
117         } /* end ptr() */
118         
119 const void * instruction_x::ptr() const { 
120         return & insn_x_;
121         } /* end ptr() */
122
123 uint8_t instruction::getPredicate() const {
124         return GET_PREDICATE( (const insn_tmpl *)(&insn_) );
125 } /* end short instruction predication fetch */
126
127 uint8_t instruction_x::getPredicate() const {
128         return GET_PREDICATE( (const insn_tmpl *)(&insn_x_) );
129         } /* end long instruciton predication fetch */
130
131 instruction::unitType instruction::getUnitType() const {
132         return INSTRUCTION_TYPE_ARRAY[(templateID * 3) + slotNumber];
133         } /* end getUnitType() */
134
135 instruction::insnType instruction::getType() const {
136         /* We'll try to be a little smarter, now, and just look up the unit type. */
137         insn_tmpl tmpl = { insn_ };
138         uint8_t opCode = GET_OPCODE( &tmpl );
139
140         switch( getUnitType() ) {
141                 case M: {
142                         /* Note that we do NOT recognize advance load instructions (see also isLoadOrStore()),
143                            though this can be added without too much trouble. */
144                         uint8_t x  = tmpl.M_LD_ST.x;
145                         uint8_t m  = tmpl.M_LD_ST.m;
146                         uint8_t x6 = tmpl.M_LD_ST.x6;
147                         uint8_t x4 = tmpl.M_SYS.x2;
148                         uint8_t x3 = tmpl.M_SYS.x3;
149                         uint8_t x2 = tmpl.M_SYS.x4;
150
151                         switch( opCode ) {
152                                 case 0x0:
153                                         if( x3 >= 0x4 && x3 <= 0x7 ) return ALAT_CHECK;
154                                         if( x3 == 0x0 && x4 == 0x0 && x2 == 0x0 )
155                                                 if( GET_M37_IMM( &tmpl ) == SYSCALL_IMM )
156                                                         return SYSCALL;
157                                                 else
158                                                         return BREAK;
159
160                                         return OTHER;
161                                         break;
162
163                                 case 0x1:
164                                         if( x3 == 0x1 || x3 == 0x3 ) return SPEC_CHECK;
165                                         if( x3 == 0x06 ) return ALLOC;
166
167                                         return OTHER;
168                                         break;
169
170                                 case 0x4:
171                                         if( x == 0x0 ) {
172                                                 if( ( x6 <= 0x17 ) || x6 == 0x1B ||
173                                                         ( x6 >= 0x20 && x6 <= 0x2B ) ) {
174                                                         // /* DEBUG */ fprintf( stderr, "%s[%d]: INTEGER_LOAD\n", __FILE__, __LINE__ );
175                                                         return INTEGER_LOAD;
176                                                         }
177
178                                                 if( ( x6 >= 0x30 && x6 <= 0x37 ) || x6 == 0x3B )
179                                                         return INTEGER_STORE;
180                                         }
181
182                                         if( m == 0x0 && x == 0x1 ) {
183                                                 if( x6 == 0x28 || x6 == 0x2C ) {
184                                                         // /* DEBUG */ fprintf( stderr, "%s[%d]: INTEGER_16_LOAD\n", __FILE__, __LINE__ );
185                                                         return INTEGER_16_LOAD;
186                                                         }
187                                                 if( x6 == 0x30 || x6 == 0x34 ) return INTEGER_16_STORE;
188                                         }
189
190                                         return OTHER;
191                                         break;
192
193                                 case 0x5:
194                                         if( ( x6 <= 0x17 ) || x6 == 0x1B ||
195                                                 ( x6 >= 0x20 && x6 <= 0x2B ) ) {
196                                                 // / * DEBUG */ fprintf( stderr, "%s[%d]: INTEGER_LOAD\n", __FILE__, __LINE__ );
197                                                 return INTEGER_LOAD;
198                                                 }
199
200                                         if( ( x6 >= 0x30 && x6 <= 0x37 ) || x6 == 0x3B )
201                                                 return INTEGER_STORE;
202
203                                         return OTHER;
204                                         break;
205
206                                 case 0x6:
207                                         if( x == 0x0 ) {
208                                                 if( ( x6 <= 0x0F ) || x6 == 0x1B ||
209                                                         ( x6 >= 0x20 && x6 <= 0x27 ) )
210                                                         return FP_LOAD;
211
212                                                 if( m == 0x0 && ( ( x6 >= 0x30 && x6 <= 0x33 ) || x6 == 0x3B ) )
213                                                         return FP_STORE;
214
215                                                 if( x6 == 0x2C || x6 == 0x2D || x6 == 0x2E || x6 == 0x2F )
216                                                         return PREFETCH;
217                                         }
218
219                                         if( x == 0x1 ) {
220                                                 if( ( x6 >= 0x01 && x6 <= 0x0F ) || ( x6 >= 0x21 && x6 <= 0x27 ) )
221                                                                  switch ( x6 & 0x3 ) {
222                                                                         case 0x1:
223                                                                                 // /* DEBUG */ fprintf( stderr, "%s[%d]: INTEGER_PAIR_LOAD\n", __FILE__, __LINE__ );
224                                                                                 return INTEGER_PAIR_LOAD;
225                                                                         case 0x2:
226                                                                         case 0x3: return FP_PAIR_LOAD;
227                                                                  }
228                                         }
229                                         return OTHER;
230                                         break;
231
232                                 case 0x7:
233                                         if( ( x6 <= 0x0F ) || x6 == 0x1B ||
234                                                 ( x6 >= 0x20 && x6 <= 0x27 ) )
235                                                 return FP_LOAD;
236
237                                         if( ( x6 >= 0x30 && x6 <= 0x33 ) || x6 == 0x3B )
238                                                 return FP_STORE;
239
240                                         if( x6 >= 0x2C && x6 <= 0x2F )
241                                                 return PREFETCH;
242
243                                         return OTHER;
244                                         break;
245
246                                 default:
247                                         return OTHER;
248                                 } /* end memory-unit opcode switch */
249                         } break;
250
251                 case I: {
252                         uint8_t x6 = tmpl.I_MISC.x6;
253                         uint8_t x3 = tmpl.I_MISC.x3;
254
255                         if( opCode == 0x0 && x3 == 0x0 && x6 == 0x00 )
256                                 if( GET_I19_IMM( &tmpl ) == SYSCALL_IMM )
257                                         return SYSCALL;
258                                 else
259                                         return BREAK;
260                         if( opCode == 0x0 && x6 == 0x30 ) return MOVE_FROM_IP;
261                         if( opCode == 0x0 && x3 == 0x1 ) return SPEC_CHECK;
262
263                         return OTHER;
264                         } break;
265
266                 case B: {
267                         switch( opCode ) {
268                                 case 0x0: {
269                                         /* Is it a return or an indirect branch or something else? */
270                                         uint8_t x6 = tmpl.B.x6;
271                                         uint8_t btype = tmpl.B.btype;
272
273                                         if( x6 == 0x00 )
274                                                 if( GET_B9_IMM( &tmpl ) == SYSCALL_IMM )
275                                                         return SYSCALL;
276                                                 else
277                                                         return BREAK;
278                                         if( x6 == 0x21 && btype == 0x4 ) return RETURN;
279                                         if( x6 == 0x20 && btype == 0x0 ) return INDIRECT_BRANCH;
280                                         if( x6 == 0x20 && btype == 0x1 ) return BRANCH_IA;
281
282                                         return OTHER;
283                                         } break;
284
285                                 case 0x1: return INDIRECT_CALL;
286                                 case 0x2: {
287                                         uint8_t x6 = tmpl.B.x6;
288
289                                         if ( x6 == 0x10 || x6 == 0x11 ) return BRANCH_PREDICT;
290
291                                         return OTHER;
292                                         } break;
293
294                                 case 0x4: return DIRECT_BRANCH;
295                                 case 0x5: return DIRECT_CALL;
296                                 case 0x7: return BRANCH_PREDICT;
297
298                                 default:  return OTHER;
299                                 } /* end branch-unit opcode switch */
300                         } break;
301
302                 case F:
303                         if( opCode == 0x0 && tmpl.F15.x == 0x0 && tmpl.F15.x6 == 0x00 )
304                                 if( GET_F15_IMM( &tmpl ) == SYSCALL_IMM )
305                                         return SYSCALL;
306                                 else
307                                         return BREAK;
308
309                         return OTHER;
310
311                 case X:
312                 case L:
313                 case RESERVED:
314                 default: break;
315                 } /* end i-unit type switch */
316
317         return INVALID;
318         } /* end getType() */
319
320 instruction_x::unitType instruction_x::getUnitType() const { 
321         return instruction_x::X;
322         } /* end getUnitType() */
323
324 instruction::insnType instruction_x::getType() const {
325         /* We know we're a long instruction, so just check the major opcode to see which one. */
326         insn_tmpl tmpl = { insn_x_ };
327         insn_tmpl imm  = { insn_ };
328
329         switch( GET_OPCODE( &tmpl )) {
330                 case 0x0:
331                         if( tmpl.X1.x3 == 0x0 && tmpl.X1.x6 == 0x00 )
332                                 if( GET_X1_IMM( &tmpl, &imm ) == SYSCALL_IMM )
333                                         return SYSCALL;
334                                 else
335                                         return BREAK;
336
337                         return OTHER;
338
339                 case 0xD: return DIRECT_CALL;
340                 case 0xC: return DIRECT_BRANCH;
341                 default: return OTHER;
342                 } /* end opcode switch */
343         } /* end getType() */
344
345 IA64_bundle::IA64_bundle( ia64_bundle_t rawBundle ) {
346         * this = IA64_bundle( rawBundle.low, rawBundle.high );
347         } /* end IA64_bundle() */
348
349 IA64_bundle::IA64_bundle( uint8_t templateID, const instruction & instruction0, const instruction instruction1, const instruction instruction2 ) {
350         * this = IA64_bundle( templateID, instruction0.getMachineCode(), instruction1.getMachineCode(), instruction2.getMachineCode() );
351         } /* end IA64_bundle() */
352
353 /* This handles the MLX template/long instructions. */
354 IA64_bundle::IA64_bundle( uint8_t templateID, const instruction & instruction0, const instruction_x & instructionLX ) {
355         if( templateID != MLXstop && templateID != MLX ) { bpfatal( "Attempting to generate a bundle with a long instruction without using the MLX template, aborting.\n" ); abort(); }
356
357         * this = IA64_bundle( templateID, instruction0, instructionLX.getMachineCode().low, instructionLX.getMachineCode().high );
358         } /* end IA64_bundle() */
359
360 IA64_bundle::IA64_bundle( uint8_t templateID, uint64_t instruction0, uint64_t instruction1, uint64_t instruction2 ) {
361         this->templateID = templateID;
362         this->instruction0 = instruction( instruction0, templateID, 0 );
363         this->instruction1 = instruction( instruction1, templateID, 1 );
364         this->instruction2 = instruction( instruction2, templateID, 2 ); 
365
366         myBundle.low  = (( templateID & TEMPLATE_MASK ) |
367                                          ( (instruction0 >> (ALIGN_RIGHT_SHIFT - 5)) & INSTRUCTION0_MASK ) |
368                                          ( (instruction1 << 23) & INSTRUCTION1_LOW_MASK ));
369         myBundle.high = (( (instruction1 >> (ALIGN_RIGHT_SHIFT + 18)) & INSTRUCTION1_HIGH_MASK ) |
370                                          ( (instruction2 & INSTRUCTION2_MASK )));
371         } /* end IA64_bundle() */
372
373 IA64_bundle::IA64_bundle( uint64_t lowHalfBundle, uint64_t highHalfBundle ) {
374         /* The template is right-aligned; the instructions are left-aligned. */
375         templateID = lowHalfBundle & TEMPLATE_MASK;
376         instruction0 = instruction( (lowHalfBundle & INSTRUCTION0_MASK) << 18, templateID, 0 );
377         instruction1 = instruction( ((lowHalfBundle & INSTRUCTION1_LOW_MASK) >> 23) +
378                                                                          ((highHalfBundle & INSTRUCTION1_HIGH_MASK) << 41), templateID, 1 );
379         instruction2 = instruction( highHalfBundle & INSTRUCTION2_MASK, templateID, 2 );
380
381         myBundle.low = lowHalfBundle;
382         myBundle.high = highHalfBundle;
383
384 } /* end IA64_Bundle() */
385
386 instruction_x IA64_bundle::getLongInstruction() {
387         longInstruction = instruction_x( instruction1.getMachineCode(), instruction2.getMachineCode(), templateID );
388         return longInstruction;
389         } /* end getLongInstruction() */
390
391 instruction * IA64_bundle::getInstruction( unsigned int slot ) {
392         if( (slot == 1 || slot == 2) && hasLongInstruction() ) {
393                 return new instruction_x( instruction1.getMachineCode(), instruction2.getMachineCode(), templateID );
394                 }
395         switch( slot ) {
396                 case 0: return new instruction( instruction0 );
397                 case 1: return new instruction( instruction1 );
398                 case 2: return new instruction( instruction2 );
399                 default: bpfatal("Request of invalid instruction (%d), aborting.\n", slot ); abort();
400                 }
401         } /* end getInstruction() */
402
403 // Aids bundle modification.  Used by set_breakpoint_for_syscall_completion().
404 bool IA64_bundle::setInstruction(instruction &newInst)
405 {
406     if ( (templateID == 0x04 || templateID == 0x05) && newInst.slotNumber != 0)
407                 return false;
408
409     switch (newInst.slotNumber) {
410                 case 0:
411                         instruction0 = instruction(newInst.insn_, templateID, newInst.slotNumber);
412                         myBundle.low &= ~INSTRUCTION0_MASK | (newInst.insn_ << (ALIGN_RIGHT_SHIFT - 5));
413                         break;
414                 case 1:
415                         instruction1 = instruction(newInst.insn_, templateID, newInst.slotNumber);
416                         myBundle.low &= ~INSTRUCTION1_LOW_MASK | (newInst.insn_ << 23);
417                         myBundle.high &= ~INSTRUCTION1_HIGH_MASK | (newInst.insn_ >> (ALIGN_RIGHT_SHIFT + 18));
418                         break;
419                 case 2:
420                         instruction2 = instruction(newInst.insn_, templateID, newInst.slotNumber);
421                         myBundle.high &= ~INSTRUCTION2_MASK | newInst.insn_;
422                         break;
423                 default:
424                         return false;
425                         break;
426     }
427     return true;
428 } /* end setInstruction() */
429
430 // Aids bundle modification.  Added for completion.
431 bool IA64_bundle::setInstruction(instruction_x &newInst)
432 {
433     if (templateID != 0x04 && templateID != 0x05)
434         return false;
435
436     instruction1 = instruction(newInst.insn_, templateID, 1);
437     instruction2 = instruction(newInst.insn_x_, templateID, 2);
438
439     myBundle.low &= ~INSTRUCTION1_LOW_MASK | (newInst.insn_ << 23);
440     myBundle.high = ( ((newInst.insn_ >> (ALIGN_RIGHT_SHIFT + 18)) & INSTRUCTION1_HIGH_MASK ) |
441                                           ( newInst.insn_x_ & INSTRUCTION2_MASK ) );
442     return true;
443 } /* end setInstruction(x) */
444
445 /* private refactoring function */
446 bool extractAllocatedRegisters( uint64_t allocInsn, uint64_t * allocatedLocal, uint64_t * allocatedOutput, uint64_t * allocatedRotate ) {
447         /* Verify that the given instruction is actually, so far as we can tell
448            (we don't have the template and the offset), an alloc. */
449
450         insn_tmpl alloc = { allocInsn };
451         if (alloc.M34.opcode != 0x1 || alloc.M34.x3 != 0x6) {
452                 *allocatedLocal = *allocatedOutput = *allocatedRotate = 0;
453                 return false;
454         } /* end if not an alloc instruction */
455
456         /* Extract the local, output, and rotate sizes. */
457         *allocatedLocal = GET_M34_LOCAL(&alloc);
458         *allocatedOutput = GET_M34_OUTPUT(&alloc);
459         *allocatedRotate = GET_M34_ROTATE(&alloc);
460
461         /* Completed successfully. */
462         return true;
463         } /* end extractAllocatedRegisters() */
464
465 instruction generateAllocInstructionFor( registerSpace * rs, int locals, int outputs, int rotates ) {
466         insn_tmpl alloc = { 0x0 };
467         uint64_t sizeOfLocals = rs->GPRs()[0]->number - 32 + locals;
468         assert( 0 <= outputs && outputs <= 8 );
469
470         if( sizeOfLocals + outputs > 96 ) {
471                 // Never allocate a frame larger than 96 registers.
472                 sizeOfLocals = 96 - outputs;
473                 }
474
475         alloc.M34.opcode        = 0x1;
476         alloc.M34.x3            = 0x6;
477         alloc.M34.r1            = rs->originalLocals + rs->originalOutputs + 32;
478         SET_M34_FIELDS( & alloc, sizeOfLocals, outputs, rotates );
479
480         return instruction( alloc.raw );
481         } /* end generateAllocInstructionFor() */
482
483 instruction generateOriginalAllocFor( registerSpace * rs ) {
484         insn_tmpl alloc = { 0x0 };
485
486         alloc.M34.opcode        = 0x1;
487         alloc.M34.x3            = 0x6;
488         alloc.M34.r1            = 1;
489
490         /* Allocating a spurious output register to avoid preserving the target
491            register breaks things for kernels which check the number of 
492            output registers during a syscall entry, so instead,
493            save ar.pfs to a known register (r1) and save & restore it
494            around the alloc instruction. */
495         SET_M34_FIELDS( & alloc, rs->originalLocals, rs->originalOutputs, rs->originalRotates );
496
497         return instruction( alloc.raw );
498         } /* end generateOriginalAllocFor() */
499
500 /* imm22 is assumed to be right-aligned, e.g., an actual value. :) */
501 instruction generateShortConstantInRegister( unsigned int registerN, int imm22 ) {
502         insn_tmpl addl = { 0x0 };
503
504         addl.A5.opcode  = 0x9;
505         addl.A5.r1              = registerN;
506         SET_A5_IMM(&addl, imm22);
507
508         return instruction( addl.raw );
509         } /* end generateConstantInRegister( imm22 ) */
510
511 instruction_x generateLongConstantInRegister( unsigned int registerN, long long int immediate ) {
512         insn_tmpl movl = { 0x0 }, imm = { 0x0 };
513
514         movl.X2.opcode  = 0x6;
515         movl.X2.r1              = registerN;
516         SET_X2_IMM(&movl, &imm, immediate);
517
518         return instruction_x( imm.raw, movl.raw );
519         } /* end generateConstantInRegister( imm64 ) */
520
521 instruction_x generateLongCallTo( long long int displacement64, unsigned int branchRegister, Register predicate ) {
522         insn_tmpl call = { 0x0 }, imm = { 0x0 };
523
524         call.X4.opcode  = 0xD;
525         call.X4.b1              = branchRegister;
526         call.X4.qp              = predicate;
527         SET_X4_TARGET(&call, &imm, displacement64);
528
529         return instruction_x( imm.raw, call.raw );
530         } /* end generateLongCallTo( displacement64 ) */
531
532 instruction_x generateLongBranchTo( long long int displacement64, Register predicate ) {
533         insn_tmpl brl = { 0x0 }, imm = { 0x0 };
534         brl.X3.opcode   = 0xC;
535         brl.X3.qp               = predicate;
536         SET_X3_TARGET(&brl, &imm, displacement64);
537
538         return instruction_x( imm.raw, brl.raw );
539         } /* end generateLongBranchTo( displacement64 ) */
540
541 instruction generateReturnTo( unsigned int branchRegister ) {
542         insn_tmpl ret = { 0x0 };
543
544         /* Ret Opcode   = 0x0 */
545         ret.B4.x6               = 0x21;
546         ret.B4.btype    = 0x4;
547         ret.B4.p                = 0x1;
548         ret.B4.b2               = branchRegister;
549
550         return instruction( ret.raw );
551         } /* end generateReturnTo */
552
553 /* Required by func-reloc.C to calculate relative displacements. */
554 int get_disp( instruction * /* insn */ ) {
555         assert( 0 );
556         return 0;
557         } /* end get_disp() */
558
559 /* Required by func-reloc.C to correct relative displacements after relocation. */
560 int set_disp( bool /* setDisp */, instruction * /* insn */, int /* newOffset */, bool /* outOfFunc */ ) {
561         assert( 0 );
562         return 0;
563         } /* end set_disp() */
564
565 /* Convience methods for func-reloc.C */
566 int sizeOfMachineInsn( instruction * /* insn */ ) {
567         assert( 0 );
568         return 0;
569         } /* end sizeOfMachineInsn() */
570
571 int addressOfMachineInsn( instruction * /* insn */ ) {
572         assert( 0 );
573         return 0;
574         } /* end addressOfMachineInsn */
575
576 /* Convience method for inst-ia64.C */
577 IA64_bundle generateBundleFromLongInstruction( instruction_x longInstruction ) {
578         instruction memoryNOP( NOP_M );
579         return IA64_bundle( MLXstop, memoryNOP, longInstruction );
580         } /* end generateBundleFromLongInstruction() */
581
582 /* Required by inst-ia64.C */
583 Address instruction::getTargetAddress() const {
584         insnType myType = getType();
585         insn_tmpl tmpl = { insn_ };
586
587         if( myType == DIRECT_CALL || myType == DIRECT_BRANCH ) { /* Kind of pointless to guess at the target of indirect jumps. */
588                 switch( GET_OPCODE(&tmpl) ) {
589                         case 0x00: /* Indirect call and branch, respectively. */
590                         case 0x01: assert( 0 );
591                         case 0x04: return GET_B1_TARGET(&tmpl);
592                         case 0x05: return GET_B3_TARGET(&tmpl);
593                         default:
594                                 bpfatal( "getTargetAddress(): unrecognized major opcode, aborting.\n" );
595                                 abort();
596                                 break;
597                         } /* end opcode switch */
598                 } else {
599                 // /* DEBUG */ bperr( "getTargetAddress() returning 0 for indirect branch or call.\n" );
600                 }
601         return 0;
602         } /* end getTargetAddress() */
603
604 Address instruction_x::getTargetAddress() const {
605         insnType myType = getType();
606         insn_tmpl tmpl = { insn_x_ }, imm = { insn_ };
607
608         if (myType == DIRECT_CALL || myType == DIRECT_BRANCH ) {
609                 switch (GET_OPCODE(&tmpl)) {
610                         case 0xC: return GET_X3_TARGET(&tmpl, &imm);
611                         case 0xD: return GET_X4_TARGET(&tmpl, &imm);
612                 }
613         }
614         return 0;
615         } /* end getTargetAddress() */
616
617 #include "process.h"
618 #include "function.h"
619 #include <list>
620
621 /* private refactoring function, for dBTRSF() */
622 int_basicBlock * findBasicBlockInCFG( Address addr, 
623                                                                           const std::vector<int_basicBlock *> &blocks ) {
624         for (unsigned i = 0; i < blocks.size(); i++) {
625                 if ((blocks[i]->origInstance()->firstInsnAddr() <= addr) &&
626                         (addr < blocks[i]->origInstance()->endAddr()))
627                         return blocks[i];
628         }
629         return NULL;
630 } /* end findBasicBlockInCFG() */
631
632 void initBaseTrampStorageMap( registerSpace *regSpace, int sizeOfFrame, bool *usedFPregs )
633 {
634         // Clear the data structures.
635         regSpace->sizeOfStack = 0;
636         memset( regSpace->storageMap, 0, sizeof( regSpace->storageMap ) );
637
638         // Unstacked register save locations
639         int stackIndex = 32 + sizeOfFrame;
640         if( stackIndex > 128 - ( NUM_PRESERVED + NUM_LOCALS + NUM_OUTPUT ) )
641                 stackIndex = 128 - ( NUM_PRESERVED + NUM_LOCALS + NUM_OUTPUT );
642
643         regSpace->storageMap[ BP_AR_PFS   ] = stackIndex++;
644
645         regSpace->storageMap[ BP_GR0 +  1 ] = stackIndex++;
646         regSpace->storageMap[ BP_GR0 +  2 ] = stackIndex++;
647         regSpace->storageMap[ BP_GR0 +  3 ] = stackIndex++;
648
649         regSpace->storageMap[ BP_GR0 +  8 ] = stackIndex++;
650         regSpace->storageMap[ BP_GR0 +  9 ] = stackIndex++;
651         regSpace->storageMap[ BP_GR0 + 10 ] = stackIndex++;
652         regSpace->storageMap[ BP_GR0 + 11 ] = stackIndex++;
653         regSpace->storageMap[ BP_GR0 + 12 ] = stackIndex++;
654
655         regSpace->storageMap[ BP_GR0 + 14 ] = stackIndex++;
656         regSpace->storageMap[ BP_GR0 + 15 ] = stackIndex++;
657         regSpace->storageMap[ BP_GR0 + 16 ] = stackIndex++;
658         regSpace->storageMap[ BP_GR0 + 17 ] = stackIndex++;
659         regSpace->storageMap[ BP_GR0 + 18 ] = stackIndex++;
660         regSpace->storageMap[ BP_GR0 + 19 ] = stackIndex++;
661         regSpace->storageMap[ BP_GR0 + 20 ] = stackIndex++;
662         regSpace->storageMap[ BP_GR0 + 21 ] = stackIndex++;
663         regSpace->storageMap[ BP_GR0 + 22 ] = stackIndex++;
664         regSpace->storageMap[ BP_GR0 + 23 ] = stackIndex++;
665         regSpace->storageMap[ BP_GR0 + 24 ] = stackIndex++;
666         regSpace->storageMap[ BP_GR0 + 25 ] = stackIndex++;
667         regSpace->storageMap[ BP_GR0 + 26 ] = stackIndex++;
668         regSpace->storageMap[ BP_GR0 + 27 ] = stackIndex++;
669         regSpace->storageMap[ BP_GR0 + 28 ] = stackIndex++;
670         regSpace->storageMap[ BP_GR0 + 29 ] = stackIndex++;
671         regSpace->storageMap[ BP_GR0 + 30 ] = stackIndex++;
672         regSpace->storageMap[ BP_GR0 + 31 ] = stackIndex++;
673
674         regSpace->storageMap[ BP_AR_CCV   ] = stackIndex++;
675         regSpace->storageMap[ BP_AR_CSD   ] = stackIndex++;
676         regSpace->storageMap[ BP_AR_SSD   ] = stackIndex++;
677         regSpace->storageMap[ BP_BR0 +  0 ] = stackIndex++;
678         regSpace->storageMap[ BP_BR0 +  6 ] = stackIndex++;
679         regSpace->storageMap[ BP_BR0 +  7 ] = stackIndex++;
680         regSpace->storageMap[ BP_PR       ] = stackIndex++;
681
682         // Stacked register save locations, if needed.
683         // Stacked registers are always saved on the memory stack.
684         stackIndex = 0;
685         for( int i = 128 - (NUM_PRESERVED + NUM_LOCALS + NUM_OUTPUT); i < (32 + sizeOfFrame); ++i )
686                 regSpace->storageMap[ BP_GR0 + i ] = --stackIndex;
687
688         int stackCount = 0;
689         if( stackIndex < 0 ) {
690                 stackCount = -stackIndex * 8;
691                 stackCount += stackCount % 16; // Align stack to 0x10 boundry.
692         }
693
694         if( usedFPregs ) {
695                 for( int i = 0; i < 128; ++i )
696                         if( usedFPregs[ i ] ) stackCount += 16;
697
698         } else {
699                 stackCount += 16 * 106;
700         }
701
702         /* The runtime conventions require a 16-byte scratch area
703            above the SP for function calls.  Since we're assuming
704            function calls will be made from instrumentation, and
705            will thus always have a subtraction here, go ahead and
706            subtract an additional 16 bytes so we can spill two
707            floating-point registers to do multiplication. */
708         regSpace->sizeOfStack = stackCount + 32;
709 }
710
711 #include "instPoint.h"
712 #include "process.h"
713
714 extern bool *doFloatingPointStaticAnalysis( const instPoint * );
715
716 /* Private refactoring function. */
717 void extractAllocatedRegistersFromBasicBlock( const instPoint * location, int_function * pdf, int_basicBlock * allocBlock, uint64_t * locals, uint64_t * outputs, uint64_t * rotates ) {
718         /* We could probably extract pdf from allocBlock, but whatever. */
719         Address encodedAddress = allocBlock->origInstance()->firstInsnAddr();
720         unsigned short slotNumber = encodedAddress % 16;
721         Address alignedOffset = encodedAddress - pdf->getAddress() - slotNumber;
722                         
723         Address fnEntryOffset = pdf->getAddress();
724         Address fnEntryAddress = (Address)location->proc()->getPtrToInstruction(fnEntryOffset);
725         assert( fnEntryAddress % 16 == 0 );
726         const ia64_bundle_t * rawBundlePointer = (const ia64_bundle_t *) fnEntryAddress;
727         IA64_bundle allocBundle = rawBundlePointer[ alignedOffset / 16 ];
728
729         extractAllocatedRegisters( allocBundle.getInstruction( slotNumber )->getMachineCode(),
730                 locals, outputs, rotates );
731         } /* end extractAllocatedRegistersFromBasicBlock */
732
733 registerSpace *defineBaseTrampRegisterSpaceFor( const instPoint * location, 
734                                                                                                 Register &first, Register &last) {
735         /* If no alloc's definition reaches the instPoint _location_, create a base tramp
736            register space compatible with any possible leaf function.
737
738            If exactly one alloc's definition reaches the instPoint _location_, create a
739            base tramp by extending the frame created by that alloc.
740
741            If more than alloc's definition reaches the instPoint _location_, return false,
742            because we can't statically determine the register frame that will be active
743            when the instrumentation at _location_ executes. */
744
745         int_function * pdf = location->func();
746         assert( pdf != NULL );
747         // /* DEBUG */ fprintf( stderr, "%s[%d]: image func %p (in int_function %p)\n", __FILE__, __LINE__, pdf->ifunc(), pdf );
748         
749         /* Determine used FP regs, if needed */
750         if( pdf->getUsedFPregs() == NULL ) {
751                 pdf->ifunc()->usedFPregs = doFloatingPointStaticAnalysis( location );
752                 }
753         
754         const std::vector<int_basicBlock *> &blocks = pdf->blocks();
755
756         /* Initialize the dataflow sets and construct the initial worklist. */
757         // /* DEBUG */ fprintf( stderr, "%s[%d]: listing basic blocks for function beginning at 0x%lx... \n", __FILE__, __LINE__, pdf->getAddress() );
758         std::list< int_basicBlock * > workList;
759         for (unsigned bIter = 0; bIter < blocks.size(); bIter++) {
760                 int_basicBlock * basicBlock = blocks[bIter];
761                 basicBlock->setDataFlowIn( new BPatch_Set< int_basicBlock * > );
762                 basicBlock->setDataFlowOut( new BPatch_Set< int_basicBlock * > );
763                 basicBlock->setDataFlowGen( NULL );
764                 basicBlock->setDataFlowKill( NULL );
765
766                 // /* DEBUG */ fprintf( stderr, "%s[%d]: block #%d: from 0x%lx - 0x%lx\n", __FILE__, __LINE__, bIter, basicBlock->origInstance()->firstInsnAddr(), basicBlock->origInstance()->endAddr() );
767
768                 workList.push_back( basicBlock );
769                 } /* end initialization iteration over all basic blocks */
770         // /* DEBUG */ fprintf( stderr, "%s[%d]: ... listing complete.\n", __FILE__, __LINE__ );
771
772         /* Initialize the alloc blocks. */
773         for( unsigned int i = 0; i < pdf->getAllocs().size(); i++ ) {
774                 Address absoluteAddress = pdf->getAllocs()[i];
775                 // /* DEBUG */ fprintf( stderr, "%s[%d]: absolute address of alloc: 0x%lx (in function starting at 0x%lx)\n", __FILE__, __LINE__, absoluteAddress, pdf->getAddress() );
776                 int_basicBlock * currentAlloc = findBasicBlockInCFG( absoluteAddress, blocks );
777                 
778                 /* The old parser uses the frequently-incorrect symbol table size information,
779                    so we can get allocs in unreachable basic blocks.  Since they're unreachable, 
780                    the CFG doesn't create them and we can't find them.  */
781                 if( currentAlloc == NULL ) { continue; }
782                 /* Switch back to me when the new parser arrives. */
783                 assert( currentAlloc != NULL );
784                 
785                 /* Generically, these should be functors from sets to sets. */
786                 currentAlloc->setDataFlowGen( currentAlloc );
787                 currentAlloc->setDataFlowKill( currentAlloc );
788         } /* end initialization iteration over all allocs. */
789
790         /* Start running the worklist. */
791         while( ! workList.empty() ) {
792                 int_basicBlock * workBlock = workList.front();
793                 workList.pop_front();
794                 // /* DEBUG */ fprintf( stderr, "Working on basicBlock %p\n", workBlock );
795
796                 /* Construct workBlock's new output set from workBlock's immediate predecessors.  If
797                    it's different from workBlock's old output set, add all of workBlock's successors
798                    to the workList. */
799                 BPatch_Set< int_basicBlock * > newOutputSet;
800                 pdvector< int_basicBlock * > predecessors;
801                 workBlock->getSources( predecessors );
802                 for( unsigned int i = 0; i < predecessors.size(); i++ ) {
803                         int_basicBlock * predecessor = predecessors[i];
804                         newOutputSet |= * predecessor->getDataFlowOut();
805                 } /* end iteration over predecessors */
806                 
807                 // /* DEBUG */ fprintf( stderr, "From %d predecessors, %d allocs in input set.\n", predecessors.size(), newOutputSet.size() );
808                 
809                 if( workBlock->getDataFlowKill() != NULL ) {
810                         /* Special case for allocs: any non-NULL kill set kills everything.  Otherwise, you'd
811                            have to use an associative set for kill and gen. */
812                         newOutputSet = BPatch_Set<int_basicBlock *>();
813                 }
814
815                 if( workBlock->getDataFlowGen() != NULL ) {     
816                         newOutputSet.insert( workBlock->getDataFlowGen() ); 
817                 }
818
819                 // /* DEBUG */ fprintf( stderr, "After gen/kill sets, %d in (new) output set.\n", newOutputSet.size() );
820
821                 if( newOutputSet != *workBlock->getDataFlowOut() ) {
822                         // /* DEBUG */ fprintf( stderr, "New output set different, adding successors:" );
823                         * workBlock->getDataFlowOut() = newOutputSet;
824
825                         pdvector< int_basicBlock * > successors;
826                         workBlock->getTargets( successors );
827                         
828                         for( unsigned int i = 0; i < successors.size(); i++ ) {
829                                 // /* DEBUG */ fprintf( stderr, " %p", successors[i] );
830                                 workList.push_back( successors[i] );
831                         } /* end iteration over successors */
832                         // /* DEBUG */ fprintf( stderr, "\n" );
833                 } /* end if the output set changed. */
834         } /* end iteration over worklist. */
835
836         // /* DEBUG */ fprintf( stderr, "%s[%d]: absolute address of location: 0x%lx\n", __FILE__, __LINE__, location->addr() );
837         int numAllocs = 0;
838         bool success = true;
839         BPatch_Set< int_basicBlock * > * reachingAllocs = NULL;
840         int_basicBlock * locationBlock = findBasicBlockInCFG( location->addr(), blocks );
841         if( locationBlock ) {
842                 reachingAllocs = locationBlock->getDataFlowOut();
843                 numAllocs = reachingAllocs->size();
844         }
845         // /* DEBUG */ fprintf( stderr, "%s[%d]: %d reaching allocs located.\n", __FILE__, __LINE__, numAllocs );
846
847         registerSpace *regSpace = NULL;
848
849         switch( numAllocs ) {
850                 case 0: {
851                         // /* DEBUG */ fprintf( stderr, "%s[%d]: no reaching allocs located.\n", __FILE__, __LINE__ );
852                         
853                         /* The largest possible unallocated frame (by the ABI, for leaf
854                            functions) is 8 input registers. */
855                         
856                         first = 32+8+NUM_PRESERVED;
857                         last = first + NUM_LOCALS + NUM_OUTPUT - 1;
858
859                         /* Construct the registerSpace reflecting the desired frame. */
860                         registerSpace::overwriteRegisterSpace64(first, last);
861                         regSpace = registerSpace::savedRegSpace(location->proc());
862
863                         initBaseTrampStorageMap( regSpace, 8, pdf->getUsedFPregs() );
864
865                         /* If we did not have a frame originally, create one such that wrapper functions
866                            will work correctly. */
867                         regSpace->originalLocals = 0;
868                         regSpace->originalOutputs = 8;
869                         regSpace->originalRotates = 0;
870
871                         /* Our static analysis succeeded. */
872                         } break;
873
874                 default: {
875                         // /* DEBUG */ fprintf( stderr, "%s[%d]: more than one (%d) allocs reached.\n", __FILE__, __LINE__, numAllocs );
876                         
877                         /* If all the reaching allocs are the same, we fall through to
878                            the single-reaching-alloc case rather than duplicate code. */
879                         success = true;
880                         
881                         int_basicBlock * firstAlloc = * reachingAllocs->begin();
882                         uint64_t firstLocals, firstOutputs, firstRotates;
883                         extractAllocatedRegistersFromBasicBlock( location, pdf, firstAlloc,
884                                 & firstLocals, & firstOutputs, & firstRotates );
885                         
886                         BPatch_Set< int_basicBlock * >::iterator iter = reachingAllocs->begin();
887                         for( int i = 0; i < numAllocs; ++i, iter++ ) {
888                                 // /* DEBUG */ fprintf( stderr, "%s[%d]: alloc at 0x%lx\n", __FILE__, __LINE__, (* iter)->origInstance()->firstInsnAddr() );
889                                 
890                                 uint64_t locals, outputs, rotates;
891                                 extractAllocatedRegistersFromBasicBlock( location, pdf, * iter, & locals, & outputs, & rotates );
892                                 if( locals != firstLocals || outputs != firstOutputs || rotates != firstRotates ) {
893                                         success = false;
894                                         break;
895                                         }
896                                 } /* end iteration over reaching allocs. */
897                         
898                         if( ! success ) {
899                                 // /* DEBUG */ fprintf( stderr, "%s[%d]: allocs reaching 0x%lx are dissimilar.\n", __FILE__, __LINE__, location->addr() );
900                                 break;
901                                 } else {
902                                 // /* DEBUG */ fprintf( stderr, "%s[%d]: all allocs reaching 0x%lx are the same.\n", __FILE__, __LINE__, location->addr() );
903                                 }
904                         }
905                         
906                 case 1: {                       
907                         /* Where is our alloc instruction?  We need to have a look at it... */
908                         int_basicBlock * allocBlock = * reachingAllocs->begin();
909                         // /* DEBUG */ fprintf( stderr, "%s[%d]: reaching alloc at 0x%lx\n", __FILE__, __LINE__, allocBlock->origInstance()->firstInsnAddr() );
910                         
911                         uint64_t allocatedLocals, allocatedOutputs, allocatedRotates;
912                         extractAllocatedRegistersFromBasicBlock( location, pdf, allocBlock,
913                                 & allocatedLocals, & allocatedOutputs, & allocatedRotates );
914                         uint64_t sizeOfFrame = allocatedLocals + allocatedOutputs;
915
916                         /* ... and construct a deadRegisterList and regSpace above the
917                            registers the application's using. */
918
919                         // Insure that deadRegisterList fits within the 128 general register pool.
920                         int baseReg = 32 + sizeOfFrame + NUM_PRESERVED;
921                         if( baseReg > 128 - (NUM_LOCALS + NUM_OUTPUT) )
922                                 baseReg = 128 - (NUM_LOCALS + NUM_OUTPUT);
923
924                         first = baseReg;
925                         last = baseReg + NUM_LOCALS + NUM_OUTPUT - 1;
926                         registerSpace::overwriteRegisterSpace64(first, last);
927                         regSpace = registerSpace::savedRegSpace(location->proc());
928
929                         initBaseTrampStorageMap( regSpace, sizeOfFrame, pdf->getUsedFPregs() );
930
931                         /* Note that we assume that having extra registers can't be harmful;
932                            that is, that 'restoring' the alloc instruction's frame before
933                            it executes does not change the semantics of the program.  AFAIK,
934                            this will be true for all correct programs. */
935                         regSpace->originalLocals = allocatedLocals;
936                         regSpace->originalOutputs = allocatedOutputs;
937                         regSpace->originalRotates = allocatedRotates;
938
939                         /* Our static analysis succeeded. */
940                         } break;
941                         
942                 } /* end #-of-dominating-allocs switch */
943
944         /* Regardless, clean up. */
945         for( unsigned bIter = 0; bIter < blocks.size(); bIter++ ) {
946                 int_basicBlock *block = blocks[bIter];
947                 delete (block->getDataFlowOut());
948                 delete (block->getDataFlowIn());
949                 block->setDataFlowIn(NULL);
950                 block->setDataFlowOut(NULL);
951                 block->setDataFlowGen(NULL);
952                 block->setDataFlowKill(NULL);
953                 } /* end iteration over all blocks. */  
954
955         return regSpace;
956 } /* end defineBaseTrampRegisterSpace() */
957
958 /* For inst-ia64.h */
959 instruction generateRegisterToRegisterMove( Register source, Register destination ) {
960         return generateShortImmediateAdd( destination, 0, source ); }
961
962 instruction generateIPToRegisterMove( Register destination ) {
963         insn_tmpl mov = { 0x0 };
964
965         /* Mov Opcode   = 0x0 */
966         mov.I25.x6              = 0x30;
967         mov.I25.r1              = destination;
968
969         return instruction( mov.raw );
970         } /* end generateIPToRegisterMove() */
971
972 instruction generateBranchToRegisterMove( Register source, Register destination ) {
973         insn_tmpl mov = { 0x0 };
974
975         mov.I22.x6 = 0x31;
976         mov.I22.b2 = source;
977         mov.I22.r1 = destination;
978
979         return instruction( mov.raw );
980         } /* end generateBranchToRegisterMove() */
981
982 instruction generateRegisterToBranchMove( Register source, Register destination, int immediate ) {
983         insn_tmpl mov = { 0x0 };
984
985         mov.I21.x3              = 0x7;
986         mov.I21.r2              = source;
987         mov.I21.b1              = destination;
988         mov.I21.timm9c  = immediate;
989
990         return instruction( mov.raw );  
991         } /* end generateRegisterToBranchMove() */
992
993 instruction generateShortImmediateAdd( Register destination, int immediate, Register source ) {
994         insn_tmpl add = { 0x0 };
995
996         add.A4.opcode   = 0x8;
997         add.A4.x2a              = 0x2;
998         add.A4.r3               = source;
999         add.A4.r1               = destination;
1000         SET_A4_IMM(&add, immediate);
1001
1002         return instruction( add.raw );
1003         } /* end generateShortImmediateAdd() */
1004
1005 instruction generateArithmetic( opCode op, Register destination, Register lhs, Register rhs ) {
1006         insn_tmpl alu = { 0x0 };
1007
1008         alu.A1.opcode   = 0x8;
1009         alu.A1.r1               = destination;
1010         alu.A1.r2               = lhs;
1011         alu.A1.r3               = rhs;
1012         switch( op ) {
1013                 case plusOp:    alu.A1.x4 = 0; alu.A1.x2b = 0; break;
1014                 case minusOp:   alu.A1.x4 = 1; alu.A1.x2b = 1; break;
1015                 case andOp:             alu.A1.x4 = 3; alu.A1.x2b = 0; break;
1016                 case orOp:              alu.A1.x4 = 3; alu.A1.x2b = 2; break;
1017                 default:
1018                         bpfatal( "generateArithmetic() did not recognize opcode %d, aborting.\n", op );
1019                         abort();
1020                         break;
1021                 } /* end op switch */
1022
1023         return instruction( alu.raw );
1024         } /* end generateArithmetic() */
1025
1026 instruction generateIndirectCallTo( Register indirect, Register rp ) {
1027         insn_tmpl call = { 0x0 };
1028
1029         call.B5.opcode  = 0x1;
1030         call.B5.wh              = 0x1;
1031         call.B5.b2              = indirect;
1032         call.B5.b1              = rp;
1033
1034         return instruction( call.raw );
1035         } /* end generateIndirectCallTo() */
1036
1037 instruction generatePredicatesToRegisterMove( Register destination ) {
1038         insn_tmpl mov = { 0x0 };
1039
1040         /* Mov Opcode   = 0x0 */
1041         mov.I25.x6              = 0x33;
1042         mov.I25.r1              = destination;
1043
1044         return instruction( mov.raw );
1045         } /* end generatePredicatesToRegisterMove() */
1046
1047 instruction generateRegisterToPredicatesMove( Register source, int64_t mask64 ) {
1048         insn_tmpl mov = { 0x0 };
1049
1050         /* Mov Opcode   = 0x0 */
1051         mov.I23.x3              = 0x3;
1052         mov.I23.r2              = source;
1053         SET_I23_MASK(&mov, mask64);
1054
1055         return instruction( mov.raw );
1056         } /* end generateRegisterToPredicatesMove() */
1057
1058 instruction generateSpillTo( Register address, Register source, int64_t imm9 ) {
1059         insn_tmpl spill = { 0x0 };
1060
1061         spill.M5.opcode = 0x5;
1062         spill.M5.x6             = 0x3B;
1063         spill.M5.r2             = source;
1064         spill.M5.r3             = address;
1065         SET_M5_IMM(&spill, imm9);
1066
1067         return instruction( spill.raw );
1068         } /* end generateSpillTo() */
1069
1070 instruction generateFillFrom( Register address, Register destination, int64_t imm9 ) {
1071         insn_tmpl fill = { 0x0 };
1072
1073         if( imm9 == 0x0 ) {
1074                 // Use no update form.
1075                 fill.M1.opcode  = 0x4;
1076                 fill.M1.x6              = 0x1B;
1077                 fill.M1.r3              = address;
1078                 fill.M1.r1              = destination;
1079
1080         } else {
1081                 // Use base update form.
1082                 fill.M3.opcode  = 0x5;
1083                 fill.M3.x6              = 0x1B;
1084                 fill.M3.r3              = address;
1085                 fill.M3.r1              = destination;
1086                 SET_M3_IMM(&fill, imm9);
1087         }
1088
1089         return instruction( fill.raw );
1090         } /* end generateFillFrom() */
1091
1092 instruction generateRegisterStore( Register address, Register source, int size, Register predicate ) {
1093         return generateRegisterStoreImmediate( address, source, 0, size, predicate );
1094         } /* generateRegisterStore() */
1095
1096 instruction generateRegisterStoreImmediate( Register address, Register source, int imm9, int size, Register predicate ) {
1097         insn_tmpl store = { 0x0 };
1098
1099         store.M5.opcode = 0x5;
1100         store.M5.r2             = source;
1101         store.M5.r3             = address;
1102         store.M5.qp             = predicate;
1103         switch (size) {
1104                 case 1: store.M5.x6 = 0x30; break;
1105                 case 2: store.M5.x6 = 0x31; break;
1106                 case 4: store.M5.x6 = 0x32; break;
1107                 case 8: store.M5.x6 = 0x33; break;
1108                 default:
1109                         bpfatal( "Illegal size %d, aborting.\n", size );
1110                         assert( 0 );
1111                         break;
1112                 } /* end sizeSpec determiner */
1113         SET_M5_IMM(&store, imm9);
1114
1115         return instruction( store.raw );
1116         } /* end generateRegisterStore() */
1117
1118 /* This is the no-update form, which lets the code generator do dumb
1119    stuff like load from and into the same register. */
1120 instruction generateRegisterLoad( Register destination, Register address, int size ) {
1121         insn_tmpl load = { 0x0 };
1122
1123         load.M1.opcode  = 0x4;
1124         load.M1.r3              = address;
1125         load.M1.r1              = destination;
1126         switch( size ) {
1127                 case 1: load.M1.x6 = 0x00; break;
1128                 case 2: load.M1.x6 = 0x01; break;
1129                 case 4: load.M1.x6 = 0x02; break;
1130                 case 8: load.M1.x6 = 0x03; break;
1131                 default:
1132                         bpfatal( "Illegal size %d, aborting.\n", size );
1133                         assert( 0 );
1134                         break;
1135                 } /* end sizeSpec determiner */
1136
1137         return instruction( load.raw ); 
1138         } /* end generateRegisterLoad() */
1139
1140 instruction generateRegisterLoadImmediate( Register destination, Register address, int imm9, int size ) { 
1141         insn_tmpl load = { 0x0 };
1142
1143         load.M3.opcode  = 0x5;
1144         load.M3.r3              = address;
1145         load.M3.r1              = destination;
1146         switch( size ) {
1147                 case 1: load.M3.x6 = 0x00; break;
1148                 case 2: load.M3.x6 = 0x01; break;
1149                 case 4: load.M3.x6 = 0x02; break;
1150                 case 8: load.M3.x6 = 0x03; break;
1151                 default:
1152                         bpfatal( "Illegal size %d, aborting.\n", size );
1153                         assert( 0 );
1154                         break;
1155                 } /* end sizeSpec determiner */
1156         SET_M3_IMM(&load, imm9);
1157
1158         return instruction( load.raw );
1159         } /* end generateRegisterLoad() */
1160
1161 instruction generateRegisterToApplicationMove( Register source, Register destination ) {
1162         /* The lower 48 application registers are only accessible via the M unit.  For simplicity,
1163            divide responsibility at the sixty-fourth application register, with an I unit handling
1164            the upper 64. */
1165         insn_tmpl mov = { 0x0 };
1166
1167         if (destination <= 63) {
1168                 mov.M29.opcode  = 0x1;
1169                 mov.M29.x6              = 0x2A;
1170                 mov.M29.r2              = source;
1171                 mov.M29.ar3             = destination;
1172
1173         } else {
1174                 /* Mov Opcode   = 0x0 */
1175                 mov.I26.x6              = 0x2A;
1176                 mov.I26.r2              = source;
1177                 mov.I26.ar3             = destination;
1178         }
1179
1180         return instruction( mov.raw );
1181         } /* end generateRegisterToApplicationMove() */
1182
1183 instruction generateApplicationToRegisterMove( Register source, Register destination ) {
1184         /* The lower 48 application registers are only accessible via the M unit.  For simplicity,
1185            divide responsibility at the sixty-fourth application register, with an I unit handling
1186            the upper 64. */
1187         insn_tmpl mov = { 0x0 };
1188
1189         if (source <= 63) {
1190                 mov.M31.opcode  = 0x1;
1191                 mov.M31.x6              = 0x22;
1192                 mov.M31.ar3             = source;
1193                 mov.M31.r1              = destination;
1194
1195         } else {
1196                 /* Mov Opcode   = 0x0 */
1197                 mov.I28.x6              = 0x32;
1198                 mov.I28.ar3             = source;
1199                 mov.I28.r1              = destination;
1200         }
1201
1202         return instruction( mov.raw );
1203         } /* end generateRegisterToApplicationMove() */
1204
1205 instruction predicateInstruction( Register predicate, instruction insn ) {
1206         insn_tmpl tmpl = { insn.getMachineCode() };
1207
1208         SET_PREDICATE(&tmpl, predicate);
1209         return instruction( tmpl.raw, insn.getTemplateID(), insn.getSlotNumber() );
1210         } /* end predicateInstruction() */
1211
1212 instruction_x predicateLongInstruction( Register predicate, instruction_x insn ) {
1213         insn_tmpl tmpl = { insn.getMachineCode().high };
1214
1215         SET_PREDICATE(&tmpl, predicate);
1216         return instruction_x( insn.getMachineCode().low, tmpl.raw, insn.getTemplateID() );
1217         } /* end predicateLongInstruction() */
1218
1219 #define SWAP(a, b)      ((a) ^= (b), (b) ^= (a), (a) ^= (b))
1220 instruction generateComparison( opCode op, Register destination, Register lhs, Register rhs ) {
1221         insn_tmpl cmp = { 0x0 };
1222         Register anti_destination = destination + 1;
1223
1224         /* We'll assume that all of our comparisons are signed. */
1225         switch( op ) {
1226                 /* This gets cute.  The IA-64 hardware only implements the eq and lt ops,
1227                    so we get to do some argument and target rewriting to make things work.
1228                    The idea is to fall through the operations until we get to one that
1229                    can be implemented in hardware. */
1230         
1231                 case greaterOp: SWAP( destination, anti_destination ); /* Extra SWAP to undo geOp's */
1232                 case leOp:              SWAP( lhs, rhs );
1233                 case geOp:              SWAP( destination, anti_destination );
1234                 case lessOp:    cmp.A6.opcode   = 0xC;
1235                         break;
1236
1237                 case neOp:              SWAP( destination, anti_destination );
1238                 case eqOp:              cmp.A6.opcode   = 0xE;
1239                         break;
1240
1241                 default:
1242                         bpfatal( "Unrecognized op %d in generateComparison(), aborting.\n", op );
1243                         abort();
1244                 } /* end op switch */
1245
1246         cmp.A6.r2       = lhs;
1247         cmp.A6.r3       = rhs;
1248         cmp.A6.p1       = destination;
1249         cmp.A6.p2       = anti_destination;
1250
1251         return instruction( cmp.raw );
1252         } /* end generateComparison() */
1253
1254 instruction generateFPSpillTo( Register address, Register source, int64_t imm9 ) {
1255         insn_tmpl spill_f = { 0x0 };
1256
1257         spill_f.M10.opcode      = 0x7;
1258         spill_f.M10.x6          = 0x3B;
1259         spill_f.M10.f2          = source;
1260         spill_f.M10.r3          = address;
1261         SET_M10_IMM(&spill_f, imm9);
1262
1263         return instruction( spill_f.raw );
1264         } /* end generateFPSpillTo() */
1265
1266 instruction generateFPFillFrom( Register address, Register destination, int64_t imm9 ) {
1267         insn_tmpl fill_f = { 0x0 };
1268
1269         fill_f.M8.opcode        = 0x7;
1270         fill_f.M8.x6            = 0x1B;
1271         fill_f.M8.r3            = address;
1272         fill_f.M8.f1            = destination;
1273         SET_M8_IMM(&fill_f, imm9);
1274
1275         return instruction( fill_f.raw );
1276         } /* end generateFPFillFrom() */
1277
1278 instruction generateRegisterToFloatMove( Register source, Register destination ) {
1279         insn_tmpl mov_f = { 0x0 };
1280
1281         mov_f.M18.opcode        = 0x6;
1282         mov_f.M18.x6            = 0x1C;
1283         mov_f.M18.x                     = 0x1;
1284         mov_f.M18.r2            = source;
1285         mov_f.M18.f1            = destination;
1286
1287         return instruction( mov_f.raw );
1288         } /* end generateRegisterToFloatMove() */
1289
1290 instruction generateFloatToRegisterMove( Register source, Register destination ) {
1291         insn_tmpl mov_f = { 0x0 };
1292
1293         mov_f.M19.opcode        = 0x4;
1294         mov_f.M19.x6            = 0x1C;
1295         mov_f.M19.x                     = 0x1;
1296         mov_f.M19.f2            = source;
1297         mov_f.M19.r1            = destination;
1298
1299         return instruction( mov_f.raw );
1300         } /* end generateFloatToRegisterMove() */
1301
1302 instruction generateFixedPointMultiply( Register destination, Register lhs, Register rhs ) {
1303         insn_tmpl xma_l = { 0x0 };
1304
1305         /* FIXME: We're assuming unsigned, and that the lower 64 bits are more interesting,
1306                   but this may well not be the case. */
1307         xma_l.F2.opcode = 0xE;
1308         xma_l.F2.x              = 0x1;
1309         xma_l.F2.f3             = lhs;
1310         xma_l.F2.f4             = rhs;
1311         xma_l.F2.f1             = destination;
1312
1313         return instruction( xma_l.raw );
1314         } /* end generateFixedPointMultiply() */
1315
1316 void alterLongMoveAtTo( Address target, Address imm64 ) {
1317         ia64_bundle_t *rawBundle = (ia64_bundle_t *)target;
1318         insn_tmpl movl = { rawBundle->high }, imm = { rawBundle->low };
1319
1320         SET_X2_IMM( &movl, &imm, imm64 );
1321
1322         rawBundle->high = movl.raw;
1323         rawBundle->low  = imm.raw;
1324         } /* end alterLongMoveAtTo() */
1325
1326 instruction generateShortImmediateBranch( int64_t target25 ) {
1327         insn_tmpl br_cond = { 0x0 };
1328
1329         br_cond.B1.opcode = 0x4;
1330         SET_B1_TARGET(&br_cond, target25);
1331
1332         return instruction( br_cond.raw );
1333         } /* end generateShortImmediateBranch() */
1334
1335
1336
1337 /* Require by insertTrapAtEntryPointOfMain() */
1338 IA64_bundle generateTrapBundle() {
1339         /* Note: we're using 0x80000 as our break.m immediate,
1340            which is defined to be a debugger breakpoint.  If this
1341            gets flaky, anything up to 0x0FFFFF will generate a SIGTRAP. */
1342
1343         /* Actually, what we're going to do is generate
1344            a SIGILL, (0x40000) because SIGTRAP does silly things. */
1345
1346         return IA64_bundle( MIIstop, TRAP_M, NOP_I, NOP_I );
1347         } /* end generateTrapBundle() */
1348
1349
1350 void IA64_bundle::generate(codeGen &gen) {
1351         GET_PTR(insn, gen);
1352         *insn = myBundle;
1353         insn++;
1354         SET_PTR(insn, gen);
1355 }
1356
1357 void instruction::generateIllegal(codeGen &gen) {
1358         generateTrapBundle().generate(gen);
1359 }
1360
1361 void instruction::generateTrap(codeGen &gen) {
1362         // The trap is actually an illegal, apparently
1363         generateIllegal(gen);
1364 }
1365
1366 void instruction::generateNOOP(codeGen &gen, unsigned size) {
1367         assert((size % 16) == 0);
1368         IA64_bundle nopBundle( MIIstop, NOP_M, NOP_I, NOP_I );
1369         while (size) {
1370                 nopBundle.generate(gen);
1371                 size -= 16;
1372         }
1373 }
1374
1375 void instruction::generateBranch(codeGen &gen, Address from, Address to) {
1376         instruction_x lbtOriginal = generateLongBranchTo( to - from );
1377         IA64_bundle jumpBackBundle( MLXstop, instruction(NOP_M), lbtOriginal );
1378         jumpBackBundle.generate(gen);
1379 }
1380
1381 Address instruction::getTarget(Address origAddr) const {
1382         return (origAddr - (origAddr % 16)) + getTargetAddress();
1383 }
1384
1385 instruction *instruction::copy() const {
1386         return new instruction(insn_, templateID, slotNumber);
1387 }
1388
1389 instruction_x *instruction_x::copy() const {
1390         return new instruction_x(insn_, insn_x_, templateID);
1391 }
1392
1393 instruction generateShiftLeftAndAdd( Register destination, Register shifted, uint64_t count, Register added ) {
1394         insn_tmpl shladd = { 0x0 };
1395
1396         shladd.A2.opcode = 8;
1397         shladd.A2.x2a = 0;
1398         shladd.A2.ve = 0;
1399         shladd.A2.x4 = 4;
1400         
1401         shladd.A2.r1 = destination;
1402         shladd.A2.r2 = shifted;
1403         shladd.A2.r3 = added;
1404         
1405         assert( 1 <= count && count <= 4 );
1406         shladd.A2.ct2d = count - 1;
1407
1408         return instruction( shladd.raw );
1409         } /* end generateShiftLeftAndAdd() */
1410
1411 bool instruction::generateMem(codeGen &,
1412                               Address, 
1413                               Address,
1414                               Register,
1415                   Register) {return false; }
1416
1417 bool instruction::getUsedRegs(pdvector<int> &) {
1418         return false;
1419 }