Update copyright to LGPL on all files
[dyninst.git] / dyninstAPI / src / arch-ia64.C
1 /*
2  * Copyright (c) 1996-2009 Barton P. Miller
3  * 
4  * We provide the Paradyn Parallel Performance Tools (below
5  * described as "Paradyn") on an AS IS basis, and do not warrant its
6  * validity or performance.  We reserve the right to update, modify,
7  * or discontinue this software at any time.  We shall have no
8  * obligation to supply such updates or modifications or any other
9  * form of support to you.
10  * 
11  * By your use of Paradyn, you understand and agree that we (or any
12  * other person or entity with proprietary rights in Paradyn) are
13  * under no obligation to provide either maintenance services,
14  * update services, notices of latent defects, or correction of
15  * defects for Paradyn.
16  * 
17  * This library is free software; you can redistribute it and/or
18  * modify it under the terms of the GNU Lesser General Public
19  * License as published by the Free Software Foundation; either
20  * version 2.1 of the License, or (at your option) any later version.
21  * 
22  * This library is distributed in the hope that it will be useful,
23  * but WITHOUT ANY WARRANTY; without even the implied warranty of
24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
25  * Lesser General Public License for more details.
26  * 
27  * You should have received a copy of the GNU Lesser General Public
28  * License along with this library; if not, write to the Free Software
29  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30  */
31
32 // $Id: arch-ia64.C,v 1.59 2008/04/11 23:30:07 legendre Exp $
33 // ia64 instruction decoder
34
35 #include <assert.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include "dyninstAPI/src/arch-ia64.h"
39 #include "util.h"
40 #include "registerSpace.h"
41 #include "debug.h"
42
43 #define ALIGN_RIGHT_SHIFT               23
44 #define TEMPLATE_MASK                   0x000000000000001F      /* bits 00 - 04 */
45 #define INSTRUCTION0_MASK               0x00003FFFFFFFFFE0      /* bits 05 - 45 */
46 #define INSTRUCTION1_LOW_MASK   0xFFFFC00000000000      /* bits 45 - 63 */
47 #define INSTRUCTION1_HIGH_MASK  0x00000000007FFFFF      /* bits 00 - 20 */
48 #define INSTRUCTION2_MASK               0xFFFFFFFFFF800000      /* bits 21 - 63 */
49 #define SYSCALL_IMM                             0x100000
50
51 instruction::unitType INSTRUCTION_TYPE_ARRAY[(0x20 + 1) * 3] = { 
52         instruction::M, instruction::I, instruction::I,
53         instruction::M, instruction::I, instruction::I,
54         instruction::M, instruction::I, instruction::I,
55         instruction::M, instruction::I, instruction::I,
56         instruction::M, instruction::L, instruction::X,
57         instruction::M, instruction::L, instruction::X,
58         instruction::RESERVED, instruction::RESERVED, instruction::RESERVED,
59         instruction::RESERVED, instruction::RESERVED, instruction::RESERVED,
60         instruction::M, instruction::M, instruction::I,
61         instruction::M, instruction::M, instruction::I,
62         instruction::M, instruction::M, instruction::I,
63         instruction::M, instruction::M, instruction::I,
64         instruction::M, instruction::F, instruction::I,
65         instruction::M, instruction::F, instruction::I,
66         instruction::M, instruction::M, instruction::F,
67         instruction::M, instruction::M, instruction::F,
68
69         instruction::M, instruction::I, instruction::B,
70         instruction::M, instruction::I, instruction::B,
71         instruction::M, instruction::B, instruction::B,
72         instruction::M, instruction::B, instruction::B,
73         instruction::RESERVED, instruction::RESERVED, instruction::RESERVED,
74         instruction::RESERVED, instruction::RESERVED, instruction::RESERVED,
75         instruction::B, instruction::B, instruction::B,
76         instruction::B, instruction::B, instruction::B,
77         instruction::M, instruction::M, instruction::B,
78         instruction::M, instruction::M, instruction::B,
79         instruction::RESERVED, instruction::RESERVED, instruction::RESERVED,
80         instruction::RESERVED, instruction::RESERVED, instruction::RESERVED,
81         instruction::M, instruction::F, instruction::B,
82         instruction::M, instruction::F, instruction::B,
83         instruction::RESERVED, instruction::RESERVED, instruction::RESERVED,
84         instruction::RESERVED, instruction::RESERVED, instruction::RESERVED,
85
86         instruction::RESERVED, instruction::RESERVED, instruction::RESERVED,
87 };
88
89 /* NOTE: for the IA64_bundle constructor to work, the individual
90         instruction 'halves' should left-aligned as if they were independent instructions. */
91 instruction_x::instruction_x( uint64_t lowHalf, uint64_t highHalf, uint8_t templ ) {
92         insn_ = lowHalf;
93         insn_x_ = highHalf;
94         templateID = templ;
95 } /* end IA64_Instruction_x() */
96
97 instruction::instruction( uint64_t insn, uint8_t templ, uint8_t slotN ) {
98         insn_ = insn;
99         templateID = templ;
100         slotNumber = slotN;
101 } /* end IA64_Instruction() */
102
103 const void * instruction::ptr() const { 
104         return & insn_;
105 } /* end ptr() */
106         
107 const void * instruction_x::ptr() const { 
108         return & insn_x_;
109 } /* end ptr() */
110
111 uint8_t instruction::getPredicate() const {
112         return GET_PREDICATE( (const insn_tmpl *)(&insn_) );
113 } /* end short instruction predication fetch */
114
115 uint8_t instruction_x::getPredicate() const {
116         return GET_PREDICATE( (const insn_tmpl *)(&insn_x_) );
117 } /* end long instruciton predication fetch */
118
119 instruction::unitType instruction::getUnitType() const {
120         return INSTRUCTION_TYPE_ARRAY[(templateID * 3) + slotNumber];
121 } /* end getUnitType() */
122
123 instruction::insnType instruction::getType() const {
124         /* We'll try to be a little smarter, now, and just look up the unit type. */
125         insn_tmpl tmpl = { insn_ };
126         uint8_t opCode = GET_OPCODE( &tmpl );
127
128         switch( getUnitType() ) {
129                 case M: {
130                         /* Note that we do NOT recognize advance load instructions (see also isLoadOrStore()),
131                            though this can be added without too much trouble. */
132                         uint8_t x  = tmpl.M_LD_ST.x;
133                         uint8_t m  = tmpl.M_LD_ST.m;
134                         uint8_t x6 = tmpl.M_LD_ST.x6;
135                         uint8_t x4 = tmpl.M_SYS.x2;
136                         uint8_t x3 = tmpl.M_SYS.x3;
137                         uint8_t x2 = tmpl.M_SYS.x4;
138
139                         switch( opCode ) {
140                                 case 0x0:
141                                         if( x3 >= 0x4 && x3 <= 0x7 ) return ALAT_CHECK;
142                                         if( x3 == 0x0 && x4 == 0x0 && x2 == 0x0 )
143                                                 if( GET_M37_IMM( &tmpl ) == SYSCALL_IMM )
144                                                         return SYSCALL;
145                                                 else
146                                                         return BREAK;
147
148                                         return OTHER;
149                                         break;
150
151                                 case 0x1:
152                                         if( x3 == 0x1 || x3 == 0x3 ) return SPEC_CHECK;
153                                         if( x3 == 0x06 ) return ALLOC;
154
155                                         return OTHER;
156                                         break;
157
158                                 case 0x4:
159                                         if( x == 0x0 ) {
160                                                 if( ( x6 <= 0x17 ) || x6 == 0x1B ||
161                                                         ( x6 >= 0x20 && x6 <= 0x2B ) ) {
162                                                         // /* DEBUG */ fprintf( stderr, "%s[%d]: INTEGER_LOAD\n", __FILE__, __LINE__ );
163                                                         return INTEGER_LOAD;
164                                                         }
165
166                                                 if( ( x6 >= 0x30 && x6 <= 0x37 ) || x6 == 0x3B )
167                                                         return INTEGER_STORE;
168                                         }
169
170                                         if( m == 0x0 && x == 0x1 ) {
171                                                 if( x6 == 0x28 || x6 == 0x2C ) {
172                                                         // /* DEBUG */ fprintf( stderr, "%s[%d]: INTEGER_16_LOAD\n", __FILE__, __LINE__ );
173                                                         return INTEGER_16_LOAD;
174                                                         }
175                                                 if( x6 == 0x30 || x6 == 0x34 ) return INTEGER_16_STORE;
176                                         }
177
178                                         return OTHER;
179                                         break;
180
181                                 case 0x5:
182                                         if( ( x6 <= 0x17 ) || x6 == 0x1B ||
183                                                 ( x6 >= 0x20 && x6 <= 0x2B ) ) {
184                                                 // / * DEBUG */ fprintf( stderr, "%s[%d]: INTEGER_LOAD\n", __FILE__, __LINE__ );
185                                                 return INTEGER_LOAD;
186                                                 }
187
188                                         if( ( x6 >= 0x30 && x6 <= 0x37 ) || x6 == 0x3B )
189                                                 return INTEGER_STORE;
190
191                                         return OTHER;
192                                         break;
193
194                                 case 0x6:
195                                         if( x == 0x0 ) {
196                                                 if( ( x6 <= 0x0F ) || x6 == 0x1B ||
197                                                         ( x6 >= 0x20 && x6 <= 0x27 ) )
198                                                         return FP_LOAD;
199
200                                                 if( m == 0x0 && ( ( x6 >= 0x30 && x6 <= 0x33 ) || x6 == 0x3B ) )
201                                                         return FP_STORE;
202
203                                                 if( x6 == 0x2C || x6 == 0x2D || x6 == 0x2E || x6 == 0x2F )
204                                                         return PREFETCH;
205                                         }
206
207                                         if( x == 0x1 ) {
208                                                 if( ( x6 >= 0x01 && x6 <= 0x0F ) || ( x6 >= 0x21 && x6 <= 0x27 ) )
209                                                                  switch ( x6 & 0x3 ) {
210                                                                         case 0x1:
211                                                                                 // /* DEBUG */ fprintf( stderr, "%s[%d]: INTEGER_PAIR_LOAD\n", __FILE__, __LINE__ );
212                                                                                 return INTEGER_PAIR_LOAD;
213                                                                         case 0x2:
214                                                                         case 0x3: return FP_PAIR_LOAD;
215                                                                  }
216                                         }
217                                         return OTHER;
218                                         break;
219
220                                 case 0x7:
221                                         if( ( x6 <= 0x0F ) || x6 == 0x1B ||
222                                                 ( x6 >= 0x20 && x6 <= 0x27 ) )
223                                                 return FP_LOAD;
224
225                                         if( ( x6 >= 0x30 && x6 <= 0x33 ) || x6 == 0x3B )
226                                                 return FP_STORE;
227
228                                         if( x6 >= 0x2C && x6 <= 0x2F )
229                                                 return PREFETCH;
230
231                                         return OTHER;
232                                         break;
233
234                                 default:
235                                         return OTHER;
236                                 } /* end memory-unit opcode switch */
237                         } break;
238
239                 case I: {
240                         uint8_t x6 = tmpl.I_MISC.x6;
241                         uint8_t x3 = tmpl.I_MISC.x3;
242
243                         if( opCode == 0x0 && x3 == 0x0 && x6 == 0x00 )
244                                 if( GET_I19_IMM( &tmpl ) == SYSCALL_IMM )
245                                         return SYSCALL;
246                                 else
247                                         return BREAK;
248                         if( opCode == 0x0 && x6 == 0x30 ) return MOVE_FROM_IP;
249                         if( opCode == 0x0 && x3 == 0x1 ) return SPEC_CHECK;
250
251                         return OTHER;
252                         } break;
253
254                 case B: {
255                         switch( opCode ) {
256                                 case 0x0: {
257                                         /* Is it a return or an indirect branch or something else? */
258                                         uint8_t x6 = tmpl.B.x6;
259                                         uint8_t btype = tmpl.B.btype;
260
261                                         if( x6 == 0x00 )
262                                                 if( GET_B9_IMM( &tmpl ) == SYSCALL_IMM )
263                                                         return SYSCALL;
264                                                 else
265                                                         return BREAK;
266                                         if( x6 == 0x21 && btype == 0x4 ) return RETURN;
267                                         if( x6 == 0x20 && btype == 0x0 ) return INDIRECT_BRANCH;
268                                         if( x6 == 0x20 && btype == 0x1 ) return BRANCH_IA;
269
270                                         return OTHER;
271                                         } break;
272
273                                 case 0x1: return INDIRECT_CALL;
274                                 case 0x2: {
275                                         uint8_t x6 = tmpl.B.x6;
276
277                                         if ( x6 == 0x10 || x6 == 0x11 ) return BRANCH_PREDICT;
278
279                                         return OTHER;
280                                         } break;
281
282         case 0x4: 
283           {
284                 uint8_t btype = tmpl.B.btype;
285                 if(btype == 5 || btype == 6 || btype == 7) // cloop, cexit, ctop don't rely on predicates; cond does, so exclude it
286                 {
287                   return COND_BRANCH;
288                 }
289                                 
290                 return DIRECT_BRANCH;
291           }
292           break;
293                           
294                                 case 0x5: return DIRECT_CALL;
295                                 case 0x7: return BRANCH_PREDICT;
296
297                                 default:  return OTHER;
298                                 } /* end branch-unit opcode switch */
299                         } break;
300
301                 case F:
302                         if( opCode == 0x0 && tmpl.F15.x == 0x0 && tmpl.F15.x6 == 0x00 )
303                                 if( GET_F15_IMM( &tmpl ) == SYSCALL_IMM )
304                                         return SYSCALL;
305                                 else
306                                         return BREAK;
307
308                         return OTHER;
309
310                 case X:
311                 case L:
312                 case RESERVED:
313                 default: break;
314                 } /* end i-unit type switch */
315
316         return INVALID;
317 } /* end getType() */
318
319 instruction_x::unitType instruction_x::getUnitType() const { 
320         return instruction_x::X;
321 } /* end getUnitType() */
322
323 instruction::insnType instruction_x::getType() const {
324         /* We know we're a long instruction, so just check the major opcode to see which one. */
325         insn_tmpl tmpl = { insn_x_ };
326         insn_tmpl imm  = { insn_ };
327
328         switch( GET_OPCODE( &tmpl )) {
329                 case 0x0:
330                         if( tmpl.X1.x3 == 0x0 && tmpl.X1.x6 == 0x00 )
331                                 if( GET_X1_IMM( &tmpl, &imm ) == SYSCALL_IMM )
332                                         return SYSCALL;
333                                 else
334                                         return BREAK;
335
336                         return OTHER;
337
338                 case 0xD: return DIRECT_CALL;
339                 case 0xC: return DIRECT_BRANCH;
340                 default: return OTHER;
341                 } /* end opcode switch */
342 } /* end getType() */
343
344 IA64_bundle::IA64_bundle( ia64_bundle_t rawBundle ) {
345         * this = IA64_bundle( rawBundle.low, rawBundle.high );
346 } /* end IA64_bundle() */
347
348 IA64_bundle::IA64_bundle( uint8_t templateID, const instruction & instruction0, const instruction instruction1, const instruction instruction2 ) {
349         * this = IA64_bundle( templateID, instruction0.getMachineCode(), instruction1.getMachineCode(), instruction2.getMachineCode() );
350 } /* end IA64_bundle() */
351
352 /* This handles the MLX template/long instructions. */
353 IA64_bundle::IA64_bundle( uint8_t templateID, const instruction & instruction0, const instruction_x & instructionLX ) {
354         if( templateID != MLXstop && templateID != MLX ) { bpfatal( "Attempting to generate a bundle with a long instruction without using the MLX template, aborting.\n" ); abort(); }
355
356         * this = IA64_bundle( templateID, instruction0, instructionLX.getMachineCode().low, instructionLX.getMachineCode().high );
357 } /* end IA64_bundle() */
358
359 IA64_bundle::IA64_bundle( uint8_t templateID, uint64_t instruction0, uint64_t instruction1, uint64_t instruction2 ) {
360         this->templateID = templateID;
361         this->instruction0 = instruction( instruction0, templateID, 0 );
362         this->instruction1 = instruction( instruction1, templateID, 1 );
363         this->instruction2 = instruction( instruction2, templateID, 2 ); 
364
365         myBundle.low  = (( templateID & TEMPLATE_MASK ) |
366                                          ( (instruction0 >> (ALIGN_RIGHT_SHIFT - 5)) & INSTRUCTION0_MASK ) |
367                                          ( (instruction1 << 23) & INSTRUCTION1_LOW_MASK ));
368         myBundle.high = (( (instruction1 >> (ALIGN_RIGHT_SHIFT + 18)) & INSTRUCTION1_HIGH_MASK ) |
369                                          ( (instruction2 & INSTRUCTION2_MASK )));
370 } /* end IA64_bundle() */
371
372 IA64_bundle::IA64_bundle( uint64_t lowHalfBundle, uint64_t highHalfBundle ) {
373         /* The template is right-aligned; the instructions are left-aligned. */
374         templateID = lowHalfBundle & TEMPLATE_MASK;
375         instruction0 = instruction( (lowHalfBundle & INSTRUCTION0_MASK) << 18, templateID, 0 );
376         instruction1 = instruction( ((lowHalfBundle & INSTRUCTION1_LOW_MASK) >> 23) +
377                                                                          ((highHalfBundle & INSTRUCTION1_HIGH_MASK) << 41), templateID, 1 );
378         instruction2 = instruction( highHalfBundle & INSTRUCTION2_MASK, templateID, 2 );
379
380         myBundle.low = lowHalfBundle;
381         myBundle.high = highHalfBundle;
382
383 } /* end IA64_Bundle() */
384
385 instruction_x IA64_bundle::getLongInstruction() {
386         longInstruction = instruction_x( instruction1.getMachineCode(), instruction2.getMachineCode(), templateID );
387         return longInstruction;
388 } /* end getLongInstruction() */
389
390 instruction * IA64_bundle::getInstruction( unsigned int slot ) {
391         if( (slot == 1 || slot == 2) && hasLongInstruction() ) {
392                 return new instruction_x( instruction1.getMachineCode(), instruction2.getMachineCode(), templateID );
393                 }
394         switch( slot ) {
395                 case 0: return new instruction( instruction0 );
396                 case 1: return new instruction( instruction1 );
397                 case 2: return new instruction( instruction2 );
398                 default: bpfatal("Request of invalid instruction (%d), aborting.\n", slot ); abort();
399                 }
400 } /* end getInstruction() */
401
402 // Aids bundle modification.  Used by set_breakpoint_for_syscall_completion().
403 bool IA64_bundle::setInstruction(instruction &newInst)
404 {
405     if ( (templateID == 0x04 || templateID == 0x05) && newInst.slotNumber != 0)
406                 return false;
407
408     switch (newInst.slotNumber) {
409                 case 0:
410                         instruction0 = instruction(newInst.insn_, templateID, newInst.slotNumber);
411                         myBundle.low &= ~INSTRUCTION0_MASK | (newInst.insn_ << (ALIGN_RIGHT_SHIFT - 5));
412                         break;
413                 case 1:
414                         instruction1 = instruction(newInst.insn_, templateID, newInst.slotNumber);
415                         myBundle.low &= ~INSTRUCTION1_LOW_MASK | (newInst.insn_ << 23);
416                         myBundle.high &= ~INSTRUCTION1_HIGH_MASK | (newInst.insn_ >> (ALIGN_RIGHT_SHIFT + 18));
417                         break;
418                 case 2:
419                         instruction2 = instruction(newInst.insn_, templateID, newInst.slotNumber);
420                         myBundle.high &= ~INSTRUCTION2_MASK | newInst.insn_;
421                         break;
422                 default:
423                         return false;
424                         break;
425     }
426     return true;
427 } /* end setInstruction() */
428
429 // Aids bundle modification.  Added for completion.
430 bool IA64_bundle::setInstruction(instruction_x &newInst)
431 {
432     if (templateID != 0x04 && templateID != 0x05)
433         return false;
434
435     instruction1 = instruction(newInst.insn_, templateID, 1);
436     instruction2 = instruction(newInst.insn_x_, templateID, 2);
437
438     myBundle.low &= ~INSTRUCTION1_LOW_MASK | (newInst.insn_ << 23);
439     myBundle.high = ( ((newInst.insn_ >> (ALIGN_RIGHT_SHIFT + 18)) & INSTRUCTION1_HIGH_MASK ) |
440                                           ( newInst.insn_x_ & INSTRUCTION2_MASK ) );
441     return true;
442 } /* end setInstruction(x) */
443
444 /* private refactoring function */
445 bool extractAllocatedRegisters( uint64_t allocInsn, uint64_t * allocatedLocal, uint64_t * allocatedOutput, uint64_t * allocatedRotate ) {
446         /* Verify that the given instruction is actually, so far as we can tell
447            (we don't have the template and the offset), an alloc. */
448
449         insn_tmpl alloc = { allocInsn };
450         if (alloc.M34.opcode != 0x1 || alloc.M34.x3 != 0x6) {
451                 *allocatedLocal = *allocatedOutput = *allocatedRotate = 0;
452                 return false;
453         } /* end if not an alloc instruction */
454
455         /* Extract the local, output, and rotate sizes. */
456         *allocatedLocal = GET_M34_LOCAL(&alloc);
457         *allocatedOutput = GET_M34_OUTPUT(&alloc);
458         *allocatedRotate = GET_M34_ROTATE(&alloc);
459
460         /* Completed successfully. */
461         return true;
462 } /* end extractAllocatedRegisters() */
463
464 instruction generateAllocInstructionFor( registerSpace * rs, int locals, int outputs, int rotates ) {
465         insn_tmpl alloc = { 0x0 };
466         uint64_t sizeOfLocals = rs->GPRs()[0]->number - 32 + locals;
467         assert( 0 <= outputs && outputs <= 8 );
468
469         if( sizeOfLocals + outputs > 96 ) {
470                 // Never allocate a frame larger than 96 registers.
471                 sizeOfLocals = 96 - outputs;
472                 }
473
474         alloc.M34.opcode        = 0x1;
475         alloc.M34.x3            = 0x6;
476         alloc.M34.r1            = rs->originalLocals + rs->originalOutputs + 32;
477         SET_M34_FIELDS( & alloc, sizeOfLocals, outputs, rotates );
478
479         return instruction( alloc.raw );
480 } /* end generateAllocInstructionFor() */
481
482 instruction generateOriginalAllocFor( registerSpace * rs ) {
483         insn_tmpl alloc = { 0x0 };
484
485         alloc.M34.opcode        = 0x1;
486         alloc.M34.x3            = 0x6;
487         alloc.M34.r1            = 1;
488
489         /* Allocating a spurious output register to avoid preserving the target
490            register breaks things for kernels which check the number of 
491            output registers during a syscall entry, so instead,
492            save ar.pfs to a known register (r1) and save & restore it
493            around the alloc instruction. */
494         SET_M34_FIELDS( & alloc, rs->originalLocals, rs->originalOutputs, rs->originalRotates );
495
496         return instruction( alloc.raw );
497 } /* end generateOriginalAllocFor() */
498
499 /* imm22 is assumed to be right-aligned, e.g., an actual value. :) */
500 instruction generateShortConstantInRegister( unsigned int registerN, int imm22 ) {
501         insn_tmpl addl = { 0x0 };
502
503         addl.A5.opcode  = 0x9;
504         addl.A5.r1              = registerN;
505         SET_A5_IMM(&addl, imm22);
506
507         return instruction( addl.raw );
508 } /* end generateConstantInRegister( imm22 ) */
509
510 instruction_x generateLongConstantInRegister( unsigned int registerN, long long int immediate ) {
511         insn_tmpl movl = { 0x0 }, imm = { 0x0 };
512
513         movl.X2.opcode  = 0x6;
514         movl.X2.r1              = registerN;
515         SET_X2_IMM(&movl, &imm, immediate);
516
517         return instruction_x( imm.raw, movl.raw );
518 } /* end generateConstantInRegister( imm64 ) */
519
520 instruction_x generateLongCallTo( long long int displacement64, unsigned int branchRegister, Register predicate ) {
521         insn_tmpl call = { 0x0 }, imm = { 0x0 };
522
523         call.X4.opcode  = 0xD;
524         call.X4.b1              = branchRegister;
525         call.X4.qp              = predicate;
526         SET_X4_TARGET(&call, &imm, displacement64);
527
528         return instruction_x( imm.raw, call.raw );
529 } /* end generateLongCallTo( displacement64 ) */
530
531 instruction_x generateLongBranchTo( long long int displacement64, Register predicate ) {
532         insn_tmpl brl = { 0x0 }, imm = { 0x0 };
533         brl.X3.opcode   = 0xC;
534         brl.X3.qp               = predicate;
535         SET_X3_TARGET(&brl, &imm, displacement64);
536
537         return instruction_x( imm.raw, brl.raw );
538 } /* end generateLongBranchTo( displacement64 ) */
539
540 instruction generateReturnTo( unsigned int branchRegister ) {
541         insn_tmpl ret = { 0x0 };
542
543         /* Ret Opcode   = 0x0 */
544         ret.B4.x6               = 0x21;
545         ret.B4.btype    = 0x4;
546         ret.B4.p                = 0x1;
547         ret.B4.b2               = branchRegister;
548
549         return instruction( ret.raw );
550 } /* end generateReturnTo */
551
552 /* Required by func-reloc.C to calculate relative displacements. */
553 int get_disp( instruction * /* insn */ ) {
554         assert( 0 );
555         return 0;
556 } /* end get_disp() */
557
558 /* Required by func-reloc.C to correct relative displacements after relocation. */
559 int set_disp( bool /* setDisp */, instruction * /* insn */, int /* newOffset */, bool /* outOfFunc */ ) {
560         assert( 0 );
561         return 0;
562 } /* end set_disp() */
563
564 /* Convience methods for func-reloc.C */
565 int sizeOfMachineInsn( instruction * /* insn */ ) {
566         assert( 0 );
567         return 0;
568 } /* end sizeOfMachineInsn() */
569
570 int addressOfMachineInsn( instruction * /* insn */ ) {
571         assert( 0 );
572         return 0;
573 } /* end addressOfMachineInsn */
574
575 /* Convience method for inst-ia64.C */
576 IA64_bundle generateBundleFromLongInstruction( instruction_x longInstruction ) {
577         instruction memoryNOP( NOP_M );
578         return IA64_bundle( MLXstop, memoryNOP, longInstruction );
579 } /* end generateBundleFromLongInstruction() */
580
581 /* Required by inst-ia64.C */
582 Address instruction::getTargetAddress() const {
583         insnType myType = getType();
584         insn_tmpl tmpl = { insn_ };
585
586         if( myType == DIRECT_CALL || myType == DIRECT_BRANCH ) { /* Kind of pointless to guess at the target of indirect jumps. */
587                 switch( GET_OPCODE(&tmpl) ) {
588                         case 0x00: /* Indirect call and branch, respectively. */
589                         case 0x01: assert( 0 );
590                         case 0x04: return GET_B1_TARGET(&tmpl);
591                         case 0x05: return GET_B3_TARGET(&tmpl);
592                         default:
593                                 bpfatal( "getTargetAddress(): unrecognized major opcode, aborting.\n" );
594                                 abort();
595                                 break;
596                         } /* end opcode switch */
597                 } else {
598                 // /* DEBUG */ bperr( "getTargetAddress() returning 0 for indirect branch or call.\n" );
599                 }
600         return 0;
601 } /* end getTargetAddress() */
602
603 Address instruction_x::getTargetAddress() const {
604         insnType myType = getType();
605         insn_tmpl tmpl = { insn_x_ }, imm = { insn_ };
606
607         if (myType == DIRECT_CALL || myType == DIRECT_BRANCH ) {
608                 switch (GET_OPCODE(&tmpl)) {
609                         case 0xC: return GET_X3_TARGET(&tmpl, &imm);
610                         case 0xD: return GET_X4_TARGET(&tmpl, &imm);
611                 }
612         }
613         return 0;
614 } /* end getTargetAddress() */
615
616 #include "process.h"
617 #include "function.h"
618 #include <list>
619
620 /* private refactoring function, for dBTRSF() */
621 int_basicBlock * findBasicBlockInCFG( Address addr, 
622                                                                           const std::vector<int_basicBlock *> &blocks ) {
623         for (unsigned i = 0; i < blocks.size(); i++) {
624                 if ((blocks[i]->origInstance()->firstInsnAddr() <= addr) &&
625                         (addr < blocks[i]->origInstance()->endAddr()))
626                         return blocks[i];
627         }
628         return NULL;
629 } /* end findBasicBlockInCFG() */
630
631 void initBaseTrampStorageMap( registerSpace *regSpace, int sizeOfFrame, bool *usedFPregs )
632 {
633         // Clear the data structures.
634         regSpace->sizeOfStack = 0;
635         memset( regSpace->storageMap, 0, sizeof( regSpace->storageMap ) );
636
637         // Unstacked register save locations
638         int stackIndex = 32 + sizeOfFrame;
639         if( stackIndex > 128 - ( NUM_PRESERVED + NUM_LOCALS + NUM_OUTPUT ) )
640                 stackIndex = 128 - ( NUM_PRESERVED + NUM_LOCALS + NUM_OUTPUT );
641
642         regSpace->storageMap[ BP_AR_PFS   ] = stackIndex++;
643
644         regSpace->storageMap[ BP_GR0 +  1 ] = stackIndex++;
645         regSpace->storageMap[ BP_GR0 +  2 ] = stackIndex++;
646         regSpace->storageMap[ BP_GR0 +  3 ] = stackIndex++;
647
648         regSpace->storageMap[ BP_GR0 +  8 ] = stackIndex++;
649         regSpace->storageMap[ BP_GR0 +  9 ] = stackIndex++;
650         regSpace->storageMap[ BP_GR0 + 10 ] = stackIndex++;
651         regSpace->storageMap[ BP_GR0 + 11 ] = stackIndex++;
652         regSpace->storageMap[ BP_GR0 + 12 ] = stackIndex++;
653
654         regSpace->storageMap[ BP_GR0 + 14 ] = stackIndex++;
655         regSpace->storageMap[ BP_GR0 + 15 ] = stackIndex++;
656         regSpace->storageMap[ BP_GR0 + 16 ] = stackIndex++;
657         regSpace->storageMap[ BP_GR0 + 17 ] = stackIndex++;
658         regSpace->storageMap[ BP_GR0 + 18 ] = stackIndex++;
659         regSpace->storageMap[ BP_GR0 + 19 ] = stackIndex++;
660         regSpace->storageMap[ BP_GR0 + 20 ] = stackIndex++;
661         regSpace->storageMap[ BP_GR0 + 21 ] = stackIndex++;
662         regSpace->storageMap[ BP_GR0 + 22 ] = stackIndex++;
663         regSpace->storageMap[ BP_GR0 + 23 ] = stackIndex++;
664         regSpace->storageMap[ BP_GR0 + 24 ] = stackIndex++;
665         regSpace->storageMap[ BP_GR0 + 25 ] = stackIndex++;
666         regSpace->storageMap[ BP_GR0 + 26 ] = stackIndex++;
667         regSpace->storageMap[ BP_GR0 + 27 ] = stackIndex++;
668         regSpace->storageMap[ BP_GR0 + 28 ] = stackIndex++;
669         regSpace->storageMap[ BP_GR0 + 29 ] = stackIndex++;
670         regSpace->storageMap[ BP_GR0 + 30 ] = stackIndex++;
671         regSpace->storageMap[ BP_GR0 + 31 ] = stackIndex++;
672
673         regSpace->storageMap[ BP_AR_CCV   ] = stackIndex++;
674         regSpace->storageMap[ BP_AR_CSD   ] = stackIndex++;
675         regSpace->storageMap[ BP_AR_SSD   ] = stackIndex++;
676         regSpace->storageMap[ BP_BR0 +  0 ] = stackIndex++;
677         regSpace->storageMap[ BP_BR0 +  6 ] = stackIndex++;
678         regSpace->storageMap[ BP_BR0 +  7 ] = stackIndex++;
679         regSpace->storageMap[ BP_PR       ] = stackIndex++;
680
681         // Stacked register save locations, if needed.
682         // Stacked registers are always saved on the memory stack.
683         stackIndex = 0;
684         for( int i = 128 - (NUM_PRESERVED + NUM_LOCALS + NUM_OUTPUT); i < (32 + sizeOfFrame); ++i )
685                 regSpace->storageMap[ BP_GR0 + i ] = --stackIndex;
686
687         int stackCount = 0;
688         if( stackIndex < 0 ) {
689                 stackCount = -stackIndex * 8;
690                 stackCount += stackCount % 16; // Align stack to 0x10 boundry.
691         }
692
693         if( usedFPregs ) {
694                 for( int i = 0; i < 128; ++i )
695                         if( usedFPregs[ i ] ) stackCount += 16;
696
697         } else {
698                 stackCount += 16 * 106;
699         }
700
701         /* The runtime conventions require a 16-byte scratch area
702            above the SP for function calls.  Since we're assuming
703            function calls will be made from instrumentation, and
704            will thus always have a subtraction here, go ahead and
705            subtract an additional 16 bytes so we can spill two
706            floating-point registers to do multiplication. */
707         regSpace->sizeOfStack = stackCount + 32;
708 }
709
710 #include "instPoint.h"
711 #include "process.h"
712
713 extern bool *doFloatingPointStaticAnalysis( const instPoint * );
714
715 /* Private refactoring function. */
716 void extractAllocatedRegistersFromBasicBlock( const instPoint * location, int_function * pdf, int_basicBlock * allocBlock, uint64_t * locals, uint64_t * outputs, uint64_t * rotates ) {
717         /* We could probably extract pdf from allocBlock, but whatever. */
718         Address encodedAddress = allocBlock->origInstance()->firstInsnAddr();
719         unsigned short slotNumber = encodedAddress % 16;
720         Address alignedOffset = encodedAddress - pdf->getAddress() - slotNumber;
721                         
722         Address fnEntryOffset = pdf->getAddress();
723         Address fnEntryAddress = (Address)location->proc()->getPtrToInstruction(fnEntryOffset);
724         assert( fnEntryAddress % 16 == 0 );
725         const ia64_bundle_t * rawBundlePointer = (const ia64_bundle_t *) fnEntryAddress;
726         IA64_bundle allocBundle = rawBundlePointer[ alignedOffset / 16 ];
727
728         extractAllocatedRegisters( allocBundle.getInstruction( slotNumber )->getMachineCode(),
729                 locals, outputs, rotates );
730 } /* end extractAllocatedRegistersFromBasicBlock */
731
732 registerSpace *defineBaseTrampRegisterSpaceFor( const instPoint * location, 
733                                                                                                 Register &first, Register &last) {
734         /* If no alloc's definition reaches the instPoint _location_, create a base tramp
735            register space compatible with any possible leaf function.
736
737            If exactly one alloc's definition reaches the instPoint _location_, create a
738            base tramp by extending the frame created by that alloc.
739
740            If more than alloc's definition reaches the instPoint _location_, return false,
741            because we can't statically determine the register frame that will be active
742            when the instrumentation at _location_ executes. */
743
744         int_function * pdf = location->func();
745         assert( pdf != NULL );
746         // /* DEBUG */ fprintf( stderr, "%s[%d]: image func %p (in int_function %p)\n", __FILE__, __LINE__, pdf->ifunc(), pdf );
747         
748         /* Determine used FP regs, if needed */
749         if( pdf->getUsedFPregs() == NULL ) {
750                 pdf->ifunc()->usedFPregs = doFloatingPointStaticAnalysis( location );
751                 }
752         
753         const std::vector<int_basicBlock *> &blocks = pdf->blocks();
754
755         /* Initialize the dataflow sets and construct the initial worklist. */
756         // /* DEBUG */ fprintf( stderr, "%s[%d]: listing basic blocks for function beginning at 0x%lx... \n", __FILE__, __LINE__, pdf->getAddress() );
757         std::list< int_basicBlock * > workList;
758         for (unsigned bIter = 0; bIter < blocks.size(); bIter++) {
759                 int_basicBlock * basicBlock = blocks[bIter];
760                 basicBlock->setDataFlowIn( new BPatch_Set< int_basicBlock * > );
761                 basicBlock->setDataFlowOut( new BPatch_Set< int_basicBlock * > );
762                 basicBlock->setDataFlowGen( NULL );
763                 basicBlock->setDataFlowKill( NULL );
764
765                 // /* DEBUG */ fprintf( stderr, "%s[%d]: block #%d: from 0x%lx - 0x%lx\n", __FILE__, __LINE__, bIter, basicBlock->origInstance()->firstInsnAddr(), basicBlock->origInstance()->endAddr() );
766
767                 workList.push_back( basicBlock );
768                 } /* end initialization iteration over all basic blocks */
769         // /* DEBUG */ fprintf( stderr, "%s[%d]: ... listing complete.\n", __FILE__, __LINE__ );
770
771         /* Initialize the alloc blocks. */
772         for( unsigned int i = 0; i < pdf->getAllocs().size(); i++ ) {
773                 Address absoluteAddress = pdf->getAllocs()[i];
774                 // /* DEBUG */ fprintf( stderr, "%s[%d]: absolute address of alloc: 0x%lx (in function starting at 0x%lx)\n", __FILE__, __LINE__, absoluteAddress, pdf->getAddress() );
775                 int_basicBlock * currentAlloc = findBasicBlockInCFG( absoluteAddress, blocks );
776                 
777                 /* The old parser uses the frequently-incorrect symbol table size information,
778                    so we can get allocs in unreachable basic blocks.  Since they're unreachable, 
779                    the CFG doesn't create them and we can't find them.  */
780                 if( currentAlloc == NULL ) { continue; }
781                 /* Switch back to me when the new parser arrives. */
782                 assert( currentAlloc != NULL );
783                 
784                 /* Generically, these should be functors from sets to sets. */
785                 currentAlloc->setDataFlowGen( currentAlloc );
786                 currentAlloc->setDataFlowKill( currentAlloc );
787         } /* end initialization iteration over all allocs. */
788
789         /* Start running the worklist. */
790         while( ! workList.empty() ) {
791                 int_basicBlock * workBlock = workList.front();
792                 workList.pop_front();
793                 // /* DEBUG */ fprintf( stderr, "Working on basicBlock %p\n", workBlock );
794
795                 /* Construct workBlock's new output set from workBlock's immediate predecessors.  If
796                    it's different from workBlock's old output set, add all of workBlock's successors
797                    to the workList. */
798                 BPatch_Set< int_basicBlock * > newOutputSet;
799                 pdvector< int_basicBlock * > predecessors;
800                 workBlock->getSources( predecessors );
801                 for( unsigned int i = 0; i < predecessors.size(); i++ ) {
802                         int_basicBlock * predecessor = predecessors[i];
803                         newOutputSet |= * predecessor->getDataFlowOut();
804                 } /* end iteration over predecessors */
805                 
806                 // /* DEBUG */ fprintf( stderr, "From %d predecessors, %d allocs in input set.\n", predecessors.size(), newOutputSet.size() );
807                 
808                 if( workBlock->getDataFlowKill() != NULL ) {
809                         /* Special case for allocs: any non-NULL kill set kills everything.  Otherwise, you'd
810                            have to use an associative set for kill and gen. */
811                         newOutputSet = BPatch_Set<int_basicBlock *>();
812                 }
813
814                 if( workBlock->getDataFlowGen() != NULL ) {     
815                         newOutputSet.insert( workBlock->getDataFlowGen() ); 
816                 }
817
818                 // /* DEBUG */ fprintf( stderr, "After gen/kill sets, %d in (new) output set.\n", newOutputSet.size() );
819
820                 if( newOutputSet != *workBlock->getDataFlowOut() ) {
821                         // /* DEBUG */ fprintf( stderr, "New output set different, adding successors:" );
822                         * workBlock->getDataFlowOut() = newOutputSet;
823
824                         pdvector< int_basicBlock * > successors;
825                         workBlock->getTargets( successors );
826                         
827                         for( unsigned int i = 0; i < successors.size(); i++ ) {
828                                 // /* DEBUG */ fprintf( stderr, " %p", successors[i] );
829                                 workList.push_back( successors[i] );
830                         } /* end iteration over successors */
831                         // /* DEBUG */ fprintf( stderr, "\n" );
832                 } /* end if the output set changed. */
833         } /* end iteration over worklist. */
834
835         // /* DEBUG */ fprintf( stderr, "%s[%d]: absolute address of location: 0x%lx\n", __FILE__, __LINE__, location->addr() );
836         int numAllocs = 0;
837         bool success = true;
838         BPatch_Set< int_basicBlock * > * reachingAllocs = NULL;
839         int_basicBlock * locationBlock = findBasicBlockInCFG( location->addr(), blocks );
840         if( locationBlock ) {
841                 reachingAllocs = locationBlock->getDataFlowOut();
842                 numAllocs = reachingAllocs->size();
843         }
844         // /* DEBUG */ fprintf( stderr, "%s[%d]: %d reaching allocs located.\n", __FILE__, __LINE__, numAllocs );
845
846         registerSpace *regSpace = NULL;
847
848         switch( numAllocs ) {
849                 case 0: {
850                         // /* DEBUG */ fprintf( stderr, "%s[%d]: no reaching allocs located.\n", __FILE__, __LINE__ );
851                         
852                         /* The largest possible unallocated frame (by the ABI, for leaf
853                            functions) is 8 input registers. */
854                         
855                         first = 32+8+NUM_PRESERVED;
856                         last = first + NUM_LOCALS + NUM_OUTPUT - 1;
857
858                         /* Construct the registerSpace reflecting the desired frame. */
859                         registerSpace::overwriteRegisterSpace64(first, last);
860                         regSpace = registerSpace::savedRegSpace(location->proc());
861
862                         initBaseTrampStorageMap( regSpace, 8, pdf->getUsedFPregs() );
863
864                         /* If we did not have a frame originally, create one such that wrapper functions
865                            will work correctly. */
866                         regSpace->originalLocals = 0;
867                         regSpace->originalOutputs = 8;
868                         regSpace->originalRotates = 0;
869
870                         /* Our static analysis succeeded. */
871                         } break;
872
873                 default: {
874                         // /* DEBUG */ fprintf( stderr, "%s[%d]: more than one (%d) allocs reached.\n", __FILE__, __LINE__, numAllocs );
875                         
876                         /* If all the reaching allocs are the same, we fall through to
877                            the single-reaching-alloc case rather than duplicate code. */
878                         success = true;
879                         
880                         int_basicBlock * firstAlloc = * reachingAllocs->begin();
881                         uint64_t firstLocals, firstOutputs, firstRotates;
882                         extractAllocatedRegistersFromBasicBlock( location, pdf, firstAlloc,
883                                 & firstLocals, & firstOutputs, & firstRotates );
884                         
885                         BPatch_Set< int_basicBlock * >::iterator iter = reachingAllocs->begin();
886                         for( int i = 0; i < numAllocs; ++i, iter++ ) {
887                                 // /* DEBUG */ fprintf( stderr, "%s[%d]: alloc at 0x%lx\n", __FILE__, __LINE__, (* iter)->origInstance()->firstInsnAddr() );
888                                 
889                                 uint64_t locals, outputs, rotates;
890                                 extractAllocatedRegistersFromBasicBlock( location, pdf, * iter, & locals, & outputs, & rotates );
891                                 if( locals != firstLocals || outputs != firstOutputs || rotates != firstRotates ) {
892                                         success = false;
893                                         break;
894                                         }
895                                 } /* end iteration over reaching allocs. */
896                         
897                         if( ! success ) {
898                                 // /* DEBUG */ fprintf( stderr, "%s[%d]: allocs reaching 0x%lx are dissimilar.\n", __FILE__, __LINE__, location->addr() );
899                                 break;
900                                 } else {
901                                 // /* DEBUG */ fprintf( stderr, "%s[%d]: all allocs reaching 0x%lx are the same.\n", __FILE__, __LINE__, location->addr() );
902                                 }
903                         }
904                         
905                 case 1: {                       
906                         /* Where is our alloc instruction?  We need to have a look at it... */
907                         int_basicBlock * allocBlock = * reachingAllocs->begin();
908                         // /* DEBUG */ fprintf( stderr, "%s[%d]: reaching alloc at 0x%lx\n", __FILE__, __LINE__, allocBlock->origInstance()->firstInsnAddr() );
909                         
910                         uint64_t allocatedLocals, allocatedOutputs, allocatedRotates;
911                         extractAllocatedRegistersFromBasicBlock( location, pdf, allocBlock,
912                                 & allocatedLocals, & allocatedOutputs, & allocatedRotates );
913                         uint64_t sizeOfFrame = allocatedLocals + allocatedOutputs;
914
915                         /* ... and construct a deadRegisterList and regSpace above the
916                            registers the application's using. */
917
918                         // Insure that deadRegisterList fits within the 128 general register pool.
919                         int baseReg = 32 + sizeOfFrame + NUM_PRESERVED;
920                         if( baseReg > 128 - (NUM_LOCALS + NUM_OUTPUT) )
921                                 baseReg = 128 - (NUM_LOCALS + NUM_OUTPUT);
922
923                         first = baseReg;
924                         last = baseReg + NUM_LOCALS + NUM_OUTPUT - 1;
925                         registerSpace::overwriteRegisterSpace64(first, last);
926                         regSpace = registerSpace::savedRegSpace(location->proc());
927
928                         initBaseTrampStorageMap( regSpace, sizeOfFrame, pdf->getUsedFPregs() );
929
930                         /* Note that we assume that having extra registers can't be harmful;
931                            that is, that 'restoring' the alloc instruction's frame before
932                            it executes does not change the semantics of the program.  AFAIK,
933                            this will be true for all correct programs. */
934                         regSpace->originalLocals = allocatedLocals;
935                         regSpace->originalOutputs = allocatedOutputs;
936                         regSpace->originalRotates = allocatedRotates;
937
938                         /* Our static analysis succeeded. */
939                         } break;
940                         
941                 } /* end #-of-dominating-allocs switch */
942
943         /* Regardless, clean up. */
944         for( unsigned bIter = 0; bIter < blocks.size(); bIter++ ) {
945                 int_basicBlock *block = blocks[bIter];
946                 delete (block->getDataFlowOut());
947                 delete (block->getDataFlowIn());
948                 block->setDataFlowIn(NULL);
949                 block->setDataFlowOut(NULL);
950                 block->setDataFlowGen(NULL);
951                 block->setDataFlowKill(NULL);
952                 } /* end iteration over all blocks. */  
953
954         return regSpace;
955 } /* end defineBaseTrampRegisterSpace() */
956
957 /* For inst-ia64.h */
958 instruction generateRegisterToRegisterMove( Register source, Register destination ) {
959         return generateShortImmediateAdd( destination, 0, source ); }
960
961 instruction generateIPToRegisterMove( Register destination ) {
962         insn_tmpl mov = { 0x0 };
963
964         /* Mov Opcode   = 0x0 */
965         mov.I25.x6              = 0x30;
966         mov.I25.r1              = destination;
967
968         return instruction( mov.raw );
969 } /* end generateIPToRegisterMove() */
970
971 instruction generateBranchToRegisterMove( Register source, Register destination ) {
972         insn_tmpl mov = { 0x0 };
973
974         mov.I22.x6 = 0x31;
975         mov.I22.b2 = source;
976         mov.I22.r1 = destination;
977
978         return instruction( mov.raw );
979 } /* end generateBranchToRegisterMove() */
980
981 instruction generateRegisterToBranchMove( Register source, Register destination, int immediate ) {
982         insn_tmpl mov = { 0x0 };
983
984         mov.I21.x3              = 0x7;
985         mov.I21.r2              = source;
986         mov.I21.b1              = destination;
987         mov.I21.timm9c  = immediate;
988
989         return instruction( mov.raw );  
990 } /* end generateRegisterToBranchMove() */
991
992 instruction generateShortImmediateAdd( Register destination, int immediate, Register source ) {
993         insn_tmpl add = { 0x0 };
994
995         add.A4.opcode   = 0x8;
996         add.A4.x2a              = 0x2;
997         add.A4.r3               = source;
998         add.A4.r1               = destination;
999         SET_A4_IMM(&add, immediate);
1000
1001         return instruction( add.raw );
1002 } /* end generateShortImmediateAdd() */
1003
1004 instruction generateArithmetic( opCode op, Register destination, Register lhs, Register rhs ) {
1005         insn_tmpl alu = { 0x0 };
1006
1007         alu.A1.opcode   = 0x8;
1008         alu.A1.r1               = destination;
1009         alu.A1.r2               = lhs;
1010         alu.A1.r3               = rhs;
1011         switch( op ) {
1012                 case plusOp:    alu.A1.x4 = 0; alu.A1.x2b = 0; break;
1013                 case minusOp:   alu.A1.x4 = 1; alu.A1.x2b = 1; break;
1014                 case andOp:             alu.A1.x4 = 3; alu.A1.x2b = 0; break;
1015                 case orOp:              alu.A1.x4 = 3; alu.A1.x2b = 2; break;
1016                 default:
1017                         bpfatal( "generateArithmetic() did not recognize opcode %d, aborting.\n", op );
1018                         abort();
1019                         break;
1020                 } /* end op switch */
1021
1022         return instruction( alu.raw );
1023 } /* end generateArithmetic() */
1024
1025 instruction generateIndirectCallTo( Register indirect, Register rp ) {
1026         insn_tmpl call = { 0x0 };
1027
1028         call.B5.opcode  = 0x1;
1029         call.B5.wh              = 0x1;
1030         call.B5.b2              = indirect;
1031         call.B5.b1              = rp;
1032
1033         return instruction( call.raw );
1034 } /* end generateIndirectCallTo() */
1035
1036 instruction generatePredicatesToRegisterMove( Register destination ) {
1037         insn_tmpl mov = { 0x0 };
1038
1039         /* Mov Opcode   = 0x0 */
1040         mov.I25.x6              = 0x33;
1041         mov.I25.r1              = destination;
1042
1043         return instruction( mov.raw );
1044 } /* end generatePredicatesToRegisterMove() */
1045
1046 instruction generateRegisterToPredicatesMove( Register source, int64_t mask64 ) {
1047         insn_tmpl mov = { 0x0 };
1048
1049         /* Mov Opcode   = 0x0 */
1050         mov.I23.x3              = 0x3;
1051         mov.I23.r2              = source;
1052         SET_I23_MASK(&mov, mask64);
1053
1054         return instruction( mov.raw );
1055 } /* end generateRegisterToPredicatesMove() */
1056
1057 instruction generateSpillTo( Register address, Register source, int64_t imm9 ) {
1058         insn_tmpl spill = { 0x0 };
1059
1060         spill.M5.opcode = 0x5;
1061         spill.M5.x6             = 0x3B;
1062         spill.M5.r2             = source;
1063         spill.M5.r3             = address;
1064         SET_M5_IMM(&spill, imm9);
1065
1066         return instruction( spill.raw );
1067 } /* end generateSpillTo() */
1068
1069 instruction generateFillFrom( Register address, Register destination, int64_t imm9 ) {
1070         insn_tmpl fill = { 0x0 };
1071
1072         if( imm9 == 0x0 ) {
1073                 // Use no update form.
1074                 fill.M1.opcode  = 0x4;
1075                 fill.M1.x6              = 0x1B;
1076                 fill.M1.r3              = address;
1077                 fill.M1.r1              = destination;
1078
1079         } else {
1080                 // Use base update form.
1081                 fill.M3.opcode  = 0x5;
1082                 fill.M3.x6              = 0x1B;
1083                 fill.M3.r3              = address;
1084                 fill.M3.r1              = destination;
1085                 SET_M3_IMM(&fill, imm9);
1086         }
1087
1088         return instruction( fill.raw );
1089 } /* end generateFillFrom() */
1090
1091 instruction generateRegisterStore( Register address, Register source, int size, Register predicate ) {
1092         return generateRegisterStoreImmediate( address, source, 0, size, predicate );
1093 } /* generateRegisterStore() */
1094
1095 instruction generateRegisterStoreImmediate( Register address, Register source, int imm9, int size, Register predicate ) {
1096         insn_tmpl store = { 0x0 };
1097
1098         store.M5.opcode = 0x5;
1099         store.M5.r2             = source;
1100         store.M5.r3             = address;
1101         store.M5.qp             = predicate;
1102         switch (size) {
1103                 case 1: store.M5.x6 = 0x30; break;
1104                 case 2: store.M5.x6 = 0x31; break;
1105                 case 4: store.M5.x6 = 0x32; break;
1106                 case 8: store.M5.x6 = 0x33; break;
1107                 default:
1108                         bpfatal( "Illegal size %d, aborting.\n", size );
1109                         assert( 0 );
1110                         break;
1111                 } /* end sizeSpec determiner */
1112         SET_M5_IMM(&store, imm9);
1113
1114         return instruction( store.raw );
1115 } /* end generateRegisterStore() */
1116
1117 /* This is the no-update form, which lets the code generator do dumb
1118    stuff like load from and into the same register. */
1119 instruction generateRegisterLoad( Register destination, Register address, int size ) {
1120         insn_tmpl load = { 0x0 };
1121
1122         load.M1.opcode  = 0x4;
1123         load.M1.r3              = address;
1124         load.M1.r1              = destination;
1125         switch( size ) {
1126                 case 1: load.M1.x6 = 0x00; break;
1127                 case 2: load.M1.x6 = 0x01; break;
1128                 case 4: load.M1.x6 = 0x02; break;
1129                 case 8: load.M1.x6 = 0x03; break;
1130                 default:
1131                         bpfatal( "Illegal size %d, aborting.\n", size );
1132                         assert( 0 );
1133                         break;
1134                 } /* end sizeSpec determiner */
1135
1136         return instruction( load.raw ); 
1137 } /* end generateRegisterLoad() */
1138
1139 instruction generateRegisterLoadImmediate( Register destination, Register address, int imm9, int size ) { 
1140         insn_tmpl load = { 0x0 };
1141
1142         load.M3.opcode  = 0x5;
1143         load.M3.r3              = address;
1144         load.M3.r1              = destination;
1145         switch( size ) {
1146                 case 1: load.M3.x6 = 0x00; break;
1147                 case 2: load.M3.x6 = 0x01; break;
1148                 case 4: load.M3.x6 = 0x02; break;
1149                 case 8: load.M3.x6 = 0x03; break;
1150                 default:
1151                         bpfatal( "Illegal size %d, aborting.\n", size );
1152                         assert( 0 );
1153                         break;
1154                 } /* end sizeSpec determiner */
1155         SET_M3_IMM(&load, imm9);
1156
1157         return instruction( load.raw );
1158 } /* end generateRegisterLoad() */
1159
1160 instruction generateRegisterToApplicationMove( Register source, Register destination ) {
1161         /* The lower 48 application registers are only accessible via the M unit.  For simplicity,
1162            divide responsibility at the sixty-fourth application register, with an I unit handling
1163            the upper 64. */
1164         insn_tmpl mov = { 0x0 };
1165
1166         if (destination <= 63) {
1167                 mov.M29.opcode  = 0x1;
1168                 mov.M29.x6              = 0x2A;
1169                 mov.M29.r2              = source;
1170                 mov.M29.ar3             = destination;
1171
1172         } else {
1173                 /* Mov Opcode   = 0x0 */
1174                 mov.I26.x6              = 0x2A;
1175                 mov.I26.r2              = source;
1176                 mov.I26.ar3             = destination;
1177         }
1178
1179         return instruction( mov.raw );
1180 } /* end generateRegisterToApplicationMove() */
1181
1182 instruction generateApplicationToRegisterMove( Register source, Register destination ) {
1183         /* The lower 48 application registers are only accessible via the M unit.  For simplicity,
1184            divide responsibility at the sixty-fourth application register, with an I unit handling
1185            the upper 64. */
1186         insn_tmpl mov = { 0x0 };
1187
1188         if (source <= 63) {
1189                 mov.M31.opcode  = 0x1;
1190                 mov.M31.x6              = 0x22;
1191                 mov.M31.ar3             = source;
1192                 mov.M31.r1              = destination;
1193
1194         } else {
1195                 /* Mov Opcode   = 0x0 */
1196                 mov.I28.x6              = 0x32;
1197                 mov.I28.ar3             = source;
1198                 mov.I28.r1              = destination;
1199         }
1200
1201         return instruction( mov.raw );
1202 } /* end generateRegisterToApplicationMove() */
1203
1204 instruction predicateInstruction( Register predicate, instruction insn ) {
1205         insn_tmpl tmpl = { insn.getMachineCode() };
1206
1207         SET_PREDICATE(&tmpl, predicate);
1208         return instruction( tmpl.raw, insn.getTemplateID(), insn.getSlotNumber() );
1209 } /* end predicateInstruction() */
1210
1211 instruction_x predicateLongInstruction( Register predicate, instruction_x insn ) {
1212         insn_tmpl tmpl = { insn.getMachineCode().high };
1213
1214         SET_PREDICATE(&tmpl, predicate);
1215         return instruction_x( insn.getMachineCode().low, tmpl.raw, insn.getTemplateID() );
1216 } /* end predicateLongInstruction() */
1217
1218 #define SWAP(a, b)      ((a) ^= (b), (b) ^= (a), (a) ^= (b))
1219 instruction generateComparison( opCode op, Register destination, Register lhs, Register rhs ) {
1220         insn_tmpl cmp = { 0x0 };
1221         Register anti_destination = destination + 1;
1222
1223         /* We'll assume that all of our comparisons are signed. */
1224         switch( op ) {
1225                 /* This gets cute.  The IA-64 hardware only implements the eq and lt ops,
1226                    so we get to do some argument and target rewriting to make things work.
1227                    The idea is to fall through the operations until we get to one that
1228                    can be implemented in hardware. */
1229         
1230                 case greaterOp: SWAP( destination, anti_destination ); /* Extra SWAP to undo geOp's */
1231                 case leOp:              SWAP( lhs, rhs );
1232                 case geOp:              SWAP( destination, anti_destination );
1233                 case lessOp:    cmp.A6.opcode   = 0xC;
1234                         break;
1235
1236                 case neOp:              SWAP( destination, anti_destination );
1237                 case eqOp:              cmp.A6.opcode   = 0xE;
1238                         break;
1239
1240                 default:
1241                         bpfatal( "Unrecognized op %d in generateComparison(), aborting.\n", op );
1242                         abort();
1243                 } /* end op switch */
1244
1245         cmp.A6.r2       = lhs;
1246         cmp.A6.r3       = rhs;
1247         cmp.A6.p1       = destination;
1248         cmp.A6.p2       = anti_destination;
1249
1250         return instruction( cmp.raw );
1251 } /* end generateComparison() */
1252
1253 instruction generateFPSpillTo( Register address, Register source, int64_t imm9 ) {
1254         insn_tmpl spill_f = { 0x0 };
1255
1256         spill_f.M10.opcode      = 0x7;
1257         spill_f.M10.x6          = 0x3B;
1258         spill_f.M10.f2          = source;
1259         spill_f.M10.r3          = address;
1260         SET_M10_IMM(&spill_f, imm9);
1261
1262         return instruction( spill_f.raw );
1263 } /* end generateFPSpillTo() */
1264
1265 instruction generateFPFillFrom( Register address, Register destination, int64_t imm9 ) {
1266         insn_tmpl fill_f = { 0x0 };
1267
1268         fill_f.M8.opcode        = 0x7;
1269         fill_f.M8.x6            = 0x1B;
1270         fill_f.M8.r3            = address;
1271         fill_f.M8.f1            = destination;
1272         SET_M8_IMM(&fill_f, imm9);
1273
1274         return instruction( fill_f.raw );
1275 } /* end generateFPFillFrom() */
1276
1277 instruction generateRegisterToFloatMove( Register source, Register destination ) {
1278         insn_tmpl mov_f = { 0x0 };
1279
1280         mov_f.M18.opcode        = 0x6;
1281         mov_f.M18.x6            = 0x1C;
1282         mov_f.M18.x                     = 0x1;
1283         mov_f.M18.r2            = source;
1284         mov_f.M18.f1            = destination;
1285
1286         return instruction( mov_f.raw );
1287 } /* end generateRegisterToFloatMove() */
1288
1289 instruction generateFloatToRegisterMove( Register source, Register destination ) {
1290         insn_tmpl mov_f = { 0x0 };
1291
1292         mov_f.M19.opcode        = 0x4;
1293         mov_f.M19.x6            = 0x1C;
1294         mov_f.M19.x                     = 0x1;
1295         mov_f.M19.f2            = source;
1296         mov_f.M19.r1            = destination;
1297
1298         return instruction( mov_f.raw );
1299 } /* end generateFloatToRegisterMove() */
1300
1301 instruction generateFixedPointMultiply( Register destination, Register lhs, Register rhs ) {
1302         insn_tmpl xma_l = { 0x0 };
1303
1304         /* FIXME: We're assuming unsigned, and that the lower 64 bits are more interesting,
1305                   but this may well not be the case. */
1306         xma_l.F2.opcode = 0xE;
1307         xma_l.F2.x              = 0x1;
1308         xma_l.F2.f3             = lhs;
1309         xma_l.F2.f4             = rhs;
1310         xma_l.F2.f1             = destination;
1311
1312         return instruction( xma_l.raw );
1313 } /* end generateFixedPointMultiply() */
1314
1315 void alterLongMoveAtTo( Address target, Address imm64 ) {
1316         ia64_bundle_t *rawBundle = (ia64_bundle_t *)target;
1317         insn_tmpl movl = { rawBundle->high }, imm = { rawBundle->low };
1318
1319         SET_X2_IMM( &movl, &imm, imm64 );
1320
1321         rawBundle->high = movl.raw;
1322         rawBundle->low  = imm.raw;
1323 } /* end alterLongMoveAtTo() */
1324
1325 instruction generateShortImmediateBranch( int64_t target25 ) {
1326         insn_tmpl br_cond = { 0x0 };
1327
1328         br_cond.B1.opcode = 0x4;
1329         SET_B1_TARGET(&br_cond, target25);
1330
1331         return instruction( br_cond.raw );
1332 } /* end generateShortImmediateBranch() */
1333
1334
1335
1336 /* Require by insertTrapAtEntryPointOfMain() */
1337 IA64_bundle generateTrapBundle() {
1338         /* Note: we're using 0x80000 as our break.m immediate,
1339            which is defined to be a debugger breakpoint.  If this
1340            gets flaky, anything up to 0x0FFFFF will generate a SIGTRAP. */
1341
1342         /* Actually, what we're going to do is generate
1343            a SIGILL, (0x40000) because SIGTRAP does silly things. */
1344
1345         return IA64_bundle( MIIstop, TRAP_M, NOP_I, NOP_I );
1346 } /* end generateTrapBundle() */
1347
1348
1349 void IA64_bundle::generate(codeGen &gen) {
1350         GET_PTR(insn, gen);
1351         *insn = myBundle;
1352         insn++;
1353         SET_PTR(insn, gen);
1354 }
1355
1356 void instruction::generateIllegal(codeGen &gen) {
1357         generateTrapBundle().generate(gen);
1358 }
1359
1360 void instruction::generateTrap(codeGen &gen) {
1361         // The trap is actually an illegal, apparently
1362         generateIllegal(gen);
1363 }
1364
1365 void instruction::generateNOOP(codeGen &gen, unsigned size) {
1366         assert((size % 16) == 0);
1367         IA64_bundle nopBundle( MIIstop, NOP_M, NOP_I, NOP_I );
1368         while (size) {
1369                 nopBundle.generate(gen);
1370                 size -= 16;
1371         }
1372 }
1373
1374 void instruction::generateBranch(codeGen &gen, Address from, Address to) {
1375         instruction_x lbtOriginal = generateLongBranchTo( to - from );
1376         IA64_bundle jumpBackBundle( MLXstop, instruction(NOP_M), lbtOriginal );
1377         jumpBackBundle.generate(gen);
1378 }
1379
1380 Address instruction::getTarget(Address origAddr) const {
1381         return (origAddr - (origAddr % 16)) + getTargetAddress();
1382 }
1383
1384 instruction *instruction::copy() const {
1385         return new instruction(insn_, templateID, slotNumber);
1386 }
1387
1388 instruction_x *instruction_x::copy() const {
1389         return new instruction_x(insn_, insn_x_, templateID);
1390 }
1391
1392 instruction generateShiftLeftAndAdd( Register destination, Register shifted, uint64_t count, Register added ) {
1393         insn_tmpl shladd = { 0x0 };
1394
1395         shladd.A2.opcode = 8;
1396         shladd.A2.x2a = 0;
1397         shladd.A2.ve = 0;
1398         shladd.A2.x4 = 4;
1399         
1400         shladd.A2.r1 = destination;
1401         shladd.A2.r2 = shifted;
1402         shladd.A2.r3 = added;
1403         
1404         assert( 1 <= count && count <= 4 );
1405         shladd.A2.ct2d = count - 1;
1406
1407         return instruction( shladd.raw );
1408 } /* end generateShiftLeftAndAdd() */
1409
1410 bool instruction::generateMem(codeGen &,
1411                               Address, 
1412                               Address,
1413                               Register,
1414                   Register) {return false; }
1415
1416 bool instruction::getUsedRegs(pdvector<int> &) {
1417         return false;
1418 }