6 Email: songxi.buaa@gmail.com
14 #######################################
15 # a flag for vector and floating points
16 #######################################
17 #VEC_SIMD_SWITCH = False
18 VEC_SIMD_SWITCH = True
20 ####################################
21 # dir to store aarch64 ISA xml files
22 ####################################
23 ISA_dir = '/p/paradyn/arm/arm-download-1350222/AR100-DA-70000-r0p0-00rel10/AR100-DA-70000-r0p0-00rel10/ISA_xml/ISA_xml_v2_00rel11/'
24 files_dir = os.listdir(ISA_dir)
26 flagFieldsSet = set(['S', 'imm', 'option', 'opt', 'N', 'cond', 'sz', 'size', 'type'])
27 #forwardFieldsSet = set([ ])
33 encodingsArray = list()
36 def __init__(self, ISA_dir, vec_FP=True):
39 self.fp_insn_set =Set()
41 self.base_insn_file = open(ISA_dir+'index.xml')
42 if self.vec_FP == True:
43 self.vec_FP_insn_file = open(ISA_dir+'fpsimdindex.xml')
47 print ' aarch64_op_'+op+','
49 ##############################
52 ##############################
54 for lines in self.base_insn_file:
55 if lines.startswith(" <iform"):
56 self.op_set.add(lines.split('"')[1].split('.xml')[0].split('_')[0])
57 self.insn_set.add(lines.split('"')[1].split('.xml')[0])
59 if self.vec_FP == True:
60 for lines in self.vec_FP_insn_file:
61 if lines.startswith(" <iform"):
62 self.op_set.add(lines.split('"')[1].split('.xml')[0].split('_')[0])
63 self.insn_set.add(lines.split('"')[1].split('.xml')[0])
64 self.fp_insn_set.add(lines.split('"')[1].split('.xml')[0])
66 def printOpcodes(self):
69 self.printP('INVALID')
70 self.printP('extended')
72 for ele in sorted(self.insn_set):
76 print 'number of instructions: ', len(self.insn_set)
78 def get_insn_set(self):
81 def get_fp_insn_set(self):
82 return self.fp_insn_set
84 #################################
85 # to find all control field names
87 #################################
89 control_field_set = Set()
90 non_ctl_field_set = Set(['sf'] )
92 def getDecodeFieldNames():
94 decodeFile = open(ISA_dir + 'encodingindex.xml')
96 startReadDecodeField = False
98 for line in decodeFile:
99 if startReadDecodeField == True:
100 if line.find('\"bitfields\"') != -1:
101 control_field_set.add(line.split('>')[1].split('<')[0])
103 if line.find('Decode fields') !=-1:
104 startReadDecodeField = True
106 if line.find('</thread>')!=-1 and startReadDecodeField == True:
107 startReadDecodeField = False
109 if line.find('funcgrouphearder') !=-1 and line.find('simd-dp')!=-1:
112 control_field_set.difference_update(non_ctl_field_set)
113 print sorted(control_field_set)
117 def shifting(bitlist):
120 out = (out<<1) | (bit=='1' )
123 ##########################
124 # parse xml files and get
125 # opcodes and operands
126 ##########################
128 def getOperand_Insn(line):
130 analyze one <box> line </box> to get operand
132 for field in line.split(' '):
133 if field.find('name') != -1 and field.find('usename') == -1:
134 opname = field.split('\"')[1]
135 if opname.find('imm') !=-1:
138 # else continue do nothing
140 def isRnUpdated(line):
141 if line.find('post-idx') != -1 or line.find('pre-idx') !=-1:
145 def ifNeedToSetFlags(line):
146 if line.find('<iclass ') != -1 :
147 for field in line.split(' '):
148 if field.find('id=') !=-1:
149 if field.split('\"')[1] == 's':
152 if line.find('<regdiagram') != -1:
153 if line.find('float/compare') != -1:
162 self.masksArray = list()
163 self.encodingsArray = list()
164 self.insnArray = list()
165 self.operandsArray = list()
166 self.operandsSet = set()
167 self.insn_unallocated = (2**28+2**27)
170 return self.masksArray, self.encodingsArray, self.insnArray, self.operandsArray
172 def analyzeEncodeBit(self, encodeBit, maskBit, encodingArray, operands_pos_Insn, reserve_operand_pos, maskStartBit):
173 if encodeBit == '1' or encodeBit == '0':
174 maskBit[31-maskStartBit] = '1'
175 encodingArray[31-maskStartBit] = encodeBit
177 elif encodeBit == '(1)':
178 maskBit[31-maskStartBit] = '1'
179 encodingArray[31-maskStartBit] = '1'
181 elif encodeBit == '(0)':
182 maskBit[31-maskStartBit] = '1'
183 encodingArray[31-maskStartBit] = '0'
185 # if it is 'x', we set it as not a control field
186 # and append the reserved operand to the list
187 elif encodeBit == 'x':
188 maskBit[31-maskStartBit] = '0'
189 encodingArray[31-maskStartBit] = '0'
191 if len(operands_pos_Insn) == 0:
192 operands_pos_Insn.append(reserve_operand_pos)
193 elif operands_pos_Insn[-1:][0] != reserve_operand_pos:
194 operands_pos_Insn.append(reserve_operand_pos)
195 if reserve_operand_pos[0] not in self.operandsSet:
196 self.operandsSet.add(reserve_operand_pos[0])
198 # if it is blank, same as 'x', do late operand appending
199 elif encodeBit == '' or encodeBit.startswith('!=') != -1:
200 if encodeBit == '!= 0000':
202 maskBit[31-maskStartBit+i] = '1'
203 encodingArray[31-maskStartBit+i] = '1'
205 operands_pos_Insn.append(reserve_operand_pos)
206 if reserve_operand_pos[0] not in self.operandsSet:
207 self.operandsSet.add(reserve_operand_pos[0])
210 #if not encodeBit.startswith('!='):
211 print '[WARN] something not has been analyzed:'+ encodeBit
214 # to get the encoding table
215 def printOpTable(self ):
217 self.masksArray.append(self.insn_unallocated)
218 self.encodingsArray.append(int(0))
219 self.insnArray.append('INVALID')
220 self.operandsArray.append('')
224 print 0, '%22s'%'INVALID', '%34s'%bin(self.insn_unallocated), '(', hex(self.insn_unallocated), ')'
226 for file in sorted(files_dir):
228 if file.endswith('.xml'):
229 instruction = file.split('.xml')[0]
231 if instruction in insn_set:
233 curFile = open(ISA_dir+file)
239 maskBit = list('0'*32)
240 encodingArray = list('0'*32)
241 operands_pos_Insn = list()
243 reserve_operand_pos = list()
246 needToSetFlags = False
248 # to analyze lines , do iterative passes#
251 if line.find('<iclass ') != -1 :
252 needToSetFlags = ifNeedToSetFlags(line)
255 if line.find('<regdiagram')!=-1:
256 isRnUp = isRnUpdated(line)
257 if needToSetFlags == False:
258 needToSetFlags = ifNeedToSetFlags(line)
263 if line.find('</regdiagram')!=-1:
264 if needToSetFlags == True:
265 operands_pos_Insn.insert(0, ('setFlags',) )
266 self.printInstnEntry(maskBit, encodingArray, indexOfInsn, instruction, operands_pos_Insn, hasZeroImmh)
269 maskBit = list('0'*32)
270 encodingArray = list('0'*32)
271 operands_pos_Insn = list()
277 if startDiagram == True and line.find('<box') != -1:
278 #name, start bit, length
279 for x in line.split(' '):
280 if x.find('hibit') != -1:
281 maskStartBit = int(x.split('\"')[1])
284 # reserve the operand and position information
285 # it only will be appended if the encoding fields are not defined unique
286 reserve_operand_pos = getOperandValues(line, instruction, isRnUp)
290 if line.find('</box') != -1:
297 if line.find('<c') != -1:
298 encodeBit = line.split('>')[1].split('<')[0]
299 if encodeBit == '!= 0000':
302 self.analyzeEncodeBit(encodeBit, maskBit, encodingArray, operands_pos_Insn, reserve_operand_pos, maskStartBit)
303 maskStartBit = maskStartBit - 1
308 ####################################
309 # generate instructions
310 ####################################
311 def printInstnEntry(self, maskBit, encodingArray, index, instruction, operands, hasZeroImmh):
312 if hasZeroImmh == True and encodingArray[21] == '1' and 'Q' not in operands[0][0]:
313 for i in range(19, 23):
315 encodingArray[31-i] = '0'
317 # print instruction and encoding mask per file
318 maskBitInt = int(''.join(maskBit), 2)
319 encodingBitInt = int( ''.join(encodingArray),2)
321 self.masksArray.append(maskBitInt)
322 self.encodingsArray.append(encodingBitInt)
323 self.insnArray.append(instruction)
324 self.operandsArray.append(operands)
326 print index, "%22s"%instruction, '\t', ''.join(maskBit),'(', hex(maskBitInt),')', '\t', ''.join(encodingArray), '(', hex(encodingBitInt), ')', operands
329 def isLDST(self, insn):
330 if insn.startswith('ld') or insn.startswith('st'):
334 def getRegWidth(self, insn):
336 insnMnemonic = insn.split('_')[0]
337 # ld/st register, do nothing
338 if insnMnemonic.endswith('b') and not insnMnemonic.endswith('sb'):
340 elif insnMnemonic.endswith('h') and not insnMnemonic.endswith('sh'):
342 elif insnMnemonic.endswith('sb'):
344 elif insnMnemonic.endswith('sh'):
346 elif insnMnemonic.endswith('sw'):
348 elif insnMnemonic.endswith('r'):
350 elif insnMnemonic.endswith('p'):
354 #print '[WARN] not recognized instruction:', insn
357 def isSIMD(self, insn):
358 if insn.find('simd') != -1:
363 ####################################
364 # generate the c++ code
365 # which builds the instruction table
366 ####################################
367 def buildInsnTable(self):
368 assert len(self.masksArray) == len(self.encodingsArray) ==len(self.insnArray)
369 print len(self.insnArray)
370 print '*** instruction table ***'
372 for i in range(0, len(self.insnArray)):
373 instruction = self.insnArray[i]
375 if len(self.operandsArray[i]) == 0:
376 operands = 'operandSpec()'
380 # recognize FP and SIMD
381 if instruction in fp_insn_set:
382 if self.isSIMD(instruction) == False:
383 self.operandsArray[i].insert(0, ('setFPMode',) )
385 self.operandsArray[i].insert(0, ('setSIMDMode',) )
387 if self.isLDST(instruction) == True:
388 if self.getRegWidth(instruction) == 32 or self.getRegWidth(instruction) == 64:
389 self.operandsArray[i].insert(0, ('setRegWidth',) )
391 if self.getRegWidth(instruction) != 128:
392 print '[WARN] unknown width'
394 for index, operand in enumerate(self.operandsArray[i]):
395 # this is solution to the compiler bug
396 # if OPRimm<x, y> appears in the first place of the list
397 if len(operand) != 1 and index == 0:
398 operands += '( (operandFactory) fn('
402 if len(operand) != 1:
403 operands += 'OPR'+operand[0]+'<'+ str(operand[1][0])+' COMMA ' + str(operand[1][1])+'>'
405 curOperandName = operand[0]
406 if curOperandName.startswith('set'):
407 operands += curOperandName
409 operands += 'OPR'+ curOperandName
413 #print '\tmain_insn_table.push_back(aarch64_insn_entry(aarch64_op_'+ self.insnArray[i]+', \t\"'+ self.insnArray[i].split('_')[0]+'\",\t'+ operands +' ));'
414 print '\tmain_insn_table.push_back(aarch64_insn_entry(aarch64_op_'+ self.insnArray[i]+', \t\"'+ self.insnArray[i].split('_')[0]+'\",\t'+ operands +', ' \
415 + str(self.encodingsArray[i]) + ', ' + str(self.masksArray[i]) + ') );'
419 # clapse(1001, 1101) => (101 & 111) => 101 => 5
421 def clapseMask(encoding, curMask):
424 if curMask & 0x80000000 != 0:
425 ret = (ret<<1)|((encoding & 0x80000000)>>31)
426 curMask = (curMask << 1)&0xffffffff
427 encoding = (encoding << 1)&0xffffffff
430 ####################################
431 # generate the c++ code
432 # which builds the decoding table
433 ####################################
434 def printDecodertable(entryToPlace, curMask=0, entryList=list(), index=-1 ):
435 entries = 'map_list_of'
436 if len(entryList) == 0:
437 entries = 'branchMap()'
439 for ent in entryList:
440 entries += '('+str(ent[0])+','+str(ent[1])+')'
442 print '\tmain_decoder_table['+str(entryToPlace)+']=aarch64_mask_entry('+str(hex(curMask))+', '+entries+','+str(index)+');'
444 def printDecodertable_list(entryToPlace, curMask=0, entryList=list(), index=list() ):
445 entries = 'map_list_of'
446 if len(entryList) == 0:
447 entries = 'branchMap()'
449 for ent in entryList:
450 entries += '('+str(ent[0])+','+str(ent[1])+')'
454 index_list += '('+ str(i) +')'
456 print '\tmain_decoder_table['+str(entryToPlace)+']=aarch64_mask_entry('+str(hex(curMask))+', '+entries+', list_of'+ index_list+');'
467 def alias_comparator( x, y ):
468 return num1sInMask(x) - num1sInMask(y)
470 def alias_comparatorRev( x, y ):
471 return num1sInMask(y) - num1sInMask(x)
475 global encodingsArray
479 ##########################
480 # generate decoding tree #
481 ##########################
483 self.numOfLeafNodes=0
484 self.processedIndex = Set()
486 ####################################
487 # this is the init value that the instruction
488 # entry should start from. 0 is reserved for
490 ####################################
491 self.entryAvailable = 1
493 self.aliasWeakSolution = True
495 # weak solution to aliases
496 def alias_weakSolution(self, inInsnIndex, entryToPlace):
497 inInsnIndex.sort( cmp=alias_comparator )
498 for i in inInsnIndex:
499 self.processedIndex.add(i)
500 #if self.debug == True:
501 #print insnArray[i], '\t', bin( masksArray[i] ), '\t', bin(encodingsArray[i])
503 printDecodertable(entryToPlace, 0, list(), inInsnIndex[0]);
505 # strict solution to aliases
506 def alias_strictSolution(self, inInsnIndex, entryToPlace):
507 inInsnIndex.sort( cmp=alias_comparatorRev )
510 for i in inInsnIndex:
511 self.processedIndex.add(i)
512 #if self.debug == True:
513 print insnArray[i], '\t', bin( masksArray[i] ), '\t', bin(encodingsArray[i])
515 printDecodertable_list(entryToPlace, 0, list(), inInsnIndex);
519 ###########################################
521 # arg0 is the range of indexes in the instruction array
522 # that you want to analyze
523 # arg1 is the bit mask that has been processed
524 # arg2 is the entry of the decoding table
525 # where the current call should place for one decision node
526 ###########################################
527 def buildDecodeTable(self, inInsnIndex , processedMask, entryToPlace):
530 if entryToPlace > 2**32:
531 print '*** [WARN] should not reach here ***'
534 # terminated condition 1
535 if len(inInsnIndex) < 1:
536 print '*** [WARN] should not reach here ***'
539 # size of inInsnIndex is 1. This means we should generate a leaf node
540 if len(inInsnIndex) ==1:
541 if self.aliasWeakSolution == True:
542 printDecodertable(entryToPlace, 0, list() , inInsnIndex[0]);
544 printDecodertable_list(entryToPlace, 0, list() , inInsnIndex[0:1]);
546 if self.debug == True:
547 print insnArray[inInsnIndex[0]]
548 if inInsnIndex[0] in self.processedIndex:
549 print '[WARN] index processed, repeated index is ', inInsnIndex[0]
551 self.processedIndex.add(inInsnIndex[0])
552 self.numOfLeafNodes +=1
555 validMaskBits = int(''.join('1'*32), 2)
557 # find the minimum common mask bit field
558 for i in inInsnIndex:
559 validMaskBits = validMaskBits & masksArray[i]
560 #print "Valid Mask : ", '\t', bin(validMaskBits)
561 #print "Processed Mask : ", '\t', bin(processedMask)
562 # eliminate the processed mask bit field
563 validMaskBits = validMaskBits&(~processedMask)
564 if self.debug == True:
565 print hex(validMaskBits), bin(validMaskBits)
568 # terminated condition 2
569 # if the mask we get is 0, this means we have a bunch of instructions
570 # sharing the same opcode. They are aliases actually.
571 # Group them together.
573 if validMaskBits == 0:
574 # weak solution to aliases
575 self.alias_weakSolution( inInsnIndex, entryToPlace)
576 # strict solution to aliases
577 #self.alias_strictSolution(inInsnIndex, entryToPlace)
579 self.numOfLeafNodes += len(inInsnIndex)
584 for i in inInsnIndex:
585 #print '%3d'%i+'%22s'%insnArray[i]+'%34s'%bin(masksArray[i])+'%34s'%bin(encodingsArray[i])
586 addMask |= masksArray[i] &(~processedMask)
587 #addMask ^= encodingsArray[i] &(~processedMask)
589 # handle alias, merge them into one node
591 numOfLeafNodes += len(inInsnIndex)
592 for i in inInsnIndex:
593 processedIndex.add(i)
594 printDecodertable(entryToPlace, 0, list(), inInsnIndex[0]);
597 # handle more mask bits
599 validMaskBits = addMask
603 # till here we should have got the mask bits
608 #print hex(validMaskBits)
610 # get currrent valid mask
611 # check bit one by one from MSB
612 for bit in range(0, 32):
613 if (MSBmask & validMaskBits) == 0 :
614 validMaskBits = (validMaskBits << 1)
616 curMask |= 1<<(31-bit)
617 validMaskBits = (validMaskBits << 1)
620 #print 'cur mask', hex(curMask)
624 print "[WARN] mask is 0"
625 if self.debug == True:
626 print '%25s'%'processed mask'+'%35s'%bin(processedMask)
627 for i in inInsnIndex:
628 print '%3d'%i+'%22s'%insnArray[i]+'%35s'%bin(masksArray[i])+'%35s'%bin(encodingsArray[i])
629 self.numOfLeafNodes+=len(inInsnIndex)
633 # update processed mask
634 processedMask = processedMask | curMask
637 for i in range(0, 2**numCurMaskBit):
640 # generate the branches
641 # glue instructions to that branch
642 for index in inInsnIndex:
643 branchNo = clapseMask(encodingsArray[index] & curMask, curMask)
644 #print 'branchNo' ,branchNo
645 indexList[branchNo].append(index)
647 # get number of branches
648 # a trick to eliminate null branches
656 validIndexList.append(i)
657 posToBrNo.append(brNoinIndexList)
660 # typedef mask_table vector<mask_entry>;
661 # assign entry number to that branch
663 for i in range(0, numBranch):
664 entryList.append( (posToBrNo[i], self.entryAvailable + i) )
667 self.entryAvailable += numBranch
669 if self.aliasWeakSolution == True:
670 printDecodertable(entryToPlace, curMask, entryList, -1);
672 printDecodertable_list(entryToPlace, curMask, entryList, [-1]);
676 print '%34s'%bin(curMask)
677 for i in zeroIndexes:
678 print '%34s'%bin(encodingsArray[i])
681 print '%34s'%bin(encodingsArray[i])
683 if self.debug == True:
685 for i in range(0, numBranch):
686 self.buildDecodeTable( validIndexList[i], processedMask, entryList[i][1])
688 ###############################################################
689 # the following section is for parsing and generating operands.
690 ###############################################################
692 def getOperandValues(line, instruction, isRnUp):
693 multiOperandSet = flagFieldsSet
695 if line.find(' name') != -1:
697 tokens = line.split(' ')
699 if token.find('name')!=-1 and token.find('usename')== -1:
700 name = token.split('\"')[1]
701 if name.find('imm') != -1:
703 if token.find('hibit')!=-1:
704 bit = int(token.split('\"')[1])
705 if token.find('width') != -1:
706 width = int(token.split('\"')[1])
708 #print '*** [WARN] Blank operand field ***'
711 if name.find('Rt') != -1 or name.find('Rt2') != -1 or name.find('Rn') !=-1:
712 if instruction.startswith('ld'):
715 if instruction.startswith('st'):
718 if instruction.startswith('ld') or instruction.startswith('st'):
719 if name.startswith('Rn'):
723 endbit = bit - (width-1)
724 if name in multiOperandSet:
725 return (name, [bit, endbit])
730 def printOperandFuncs(operandsSet):
731 print 'operand function declares'
732 for operand in operandsSet:
733 print 'void '+ 'OPR'+operand + '(){ }'
738 ######################################
739 # get opcodes of GP instructions
740 # and FP/VEC instructions respectively
741 ######################################
742 opcodes = Opcode(ISA_dir, True)
743 opcodes.printOpcodes()
746 insn_set = opcodes.get_insn_set()
747 fp_insn_set = opcodes.get_fp_insn_set()
749 #################################################################
750 # get instructions, opcodes, lists of operands
751 # each set is stored in an array
752 # indexes in the arrays are used to find the instruction you want
753 #################################################################
755 opTable.printOpTable()
757 ###############################################
759 # generate C++ code to build instruction tables
760 ###############################################
761 opTable.buildInsnTable()
764 global encodingsArray
767 masksArray, encodingsArray, insnArray, operandsArray = opTable.getTable()
769 #########################################
770 # generate C++ code of operands functions
771 #########################################
772 print '*** operands ***'
773 print sorted(opTable.operandsSet)
774 printOperandFuncs(opTable.operandsSet)
776 ###########################################
778 # Generate C++ code to build decoding table.
779 # Basically, the generated stuff is a decision tree, which is represented in a map.
781 # Each entry stands for a decision node in the decision tree with a few or zero
782 # branches. One branch contains a {Key, Value} pair. Key is used to index. Value is
783 # the next entry number in the map, like a pointer.
785 # Binaries are passed to the root node first, and compared with the decision mask in
786 # that node. Instrution binary does bit operation AND over the mask. The result of it
787 # will be compared with the Key of the branches. Once matched to the Key, the instruction
788 # will be passed to next decision node pointed by the branch.
789 # Recursively do this, until to the leaf node.
791 # Each leaf node contains the index of the instruction array in the last field,
792 # which holds the instruction entry.
793 ###########################################
794 print '*** decoder table ***'
795 validInsnIndex = list( range(0, len(opTable.insnArray) ) )
796 decodeTable = DecodeTable()
797 decodeTable.buildDecodeTable(validInsnIndex, 0 , 0)
799 ###############################
800 # some statistics for debugging
801 ###############################
802 print 'num of vaild leaves', decodeTable.numOfLeafNodes
803 allIndex = Set(range(0, len(opTable.insnArray)) )
804 print 'processed indexex: ', decodeTable.processedIndex, len(decodeTable.processedIndex)
805 print 'missing indexes:', sorted(allIndex - decodeTable.processedIndex), len(allIndex-decodeTable.processedIndex)
806 print 'number of total nodes in the tree:', decodeTable.numNodes
808 if __name__ == '__main__':