1 /* $Id: RTmutatedBinary.c,v 1.3 2002/05/14 20:20:51 chadd Exp $ */
3 /* this file contains the code to restore the necessary
4 data for a mutated binary
8 #include "dyninstAPI_RT/h/dyninstAPI_RT.h"
12 #if defined(sparc_sun_solaris2_4)
14 #elif defined(i386_unknown_linux2_0)
15 #include <libelf/libelf.h>
21 #include <link.h> /* ccw 23 jan 2002 */
22 #if defined(sparc_sun_solaris2_4)
28 #if defined(sparc_sun_solaris2_4)
29 extern void* _DYNAMIC;
30 #elif defined(i386_unknown_linux2_0)
31 extern ElfW(Dyn) _DYNAMIC[];
44 unsigned int checkAddr;
48 unsigned int dl_debug_state_addr;
50 #if defined(sparc_sun_solaris2_4)
51 struct r_debug _r_debug; /* ccw 2 apr 2002 */
52 extern unsigned int _dyninst_call_to_dlopen;
53 extern unsigned int __dyninst_jump_template__;
54 extern unsigned int __dyninst_jump_template__done__;
57 char *sharedLibraryInfo = NULL;
58 unsigned int originalInsnBkpt;
59 unsigned int addressBkpt;
61 /* this is not misnamed. In the future, this function will contain
62 code to patch the instrumentation of a shared library that has
63 been loaded into a different place during a mutated binary run.
65 Now, it just exit()s, as you can see
68 void fixInstrumentation(char* soName, unsigned int currAddr, unsigned int oldAddr){
69 printf(" %s loaded at wrong address: 0x%x (expected at 0x%x) \n", soName, currAddr, oldAddr);
70 printf(" This is an unrecoverable error, the instrumentation will not");
71 printf("\n run correctly if shared libraries are loaded at a different address\n");
72 printf("\n Exiting.....\n");
77 /* this function checks the shared library (soName) to see if it
78 is currently loaded (loadAddr) at the same place it was before (address).
79 If the shared library is not found in the list (sharedLibraryInfo) that
80 mean the shared library was *NOT* instrumented and can be loaded
83 unsigned int checkSOLoadAddr(char *soName, unsigned int loadAddr){
84 unsigned int result=0, found = 0;
86 char *ptr = sharedLibraryInfo;
87 while(ptr && *ptr && !found ){
88 if(strstr(soName, ptr)){
90 ptr += (strlen(ptr) +1);
91 memcpy(&address, ptr, sizeof(unsigned int));
92 /* previous line is done b/c of alignment issues on sparc*/
93 if(loadAddr == address) {
100 ptr += (strlen(ptr) +1);
101 ptr += sizeof(unsigned int);
109 #if defined(sparc_sun_solaris2_4)
110 unsigned int register_o7;
112 unsigned int loadAddr;
113 /* this function is not a signal handler. it was originally but now is
114 not, it is called below in dyninst_jump_template
116 void pseudoSigHandler(int sig){
118 if(_r_debug.r_state == 0){
123 loadAddr = checkSOLoadAddr(map->l_name, map->l_addr);
125 fixInstrumentation(map->l_name, map->l_addr, loadAddr);
133 unsigned int loadAddr;
134 void dyninst_jump_template(){
137 The Solaris loader/ELF file works as follows:
139 A call to dlopen jumps to the Procedure Linking Table
140 slot holding the dlopen information. This slot consists of
144 ba (to another PLT slot)
147 The second PLT slot contains three instructions:
149 call (address of dlopen)
152 dlopen returns directly to where it was called from, not to
153 either of the PLT slots. The address from which it was called
154 is located in %o7 when the call to dlopen in the second PLT
155 slot is made. dlopen returns to %o7 +4.
160 The goals is to intercept this call to dlopen by overwritting
161 the first PLT slot to jump to __dyninst_jump_template__ then we
162 can jump to code that will check the addresses of the loaded
165 first we must preserver %o7 so we know where to go back to.
166 This is done with the first two instructions in __dyninst_jump_template__
167 These are written as nops BUT are overwritten in the SharedLibraries
168 branch in checkElfFile. %o7 is saved in register_o7 declared above.
169 This address is not available until run time so we generate these
170 instructions on the fly.
172 Next, we CALL the second PLT slot as normal. We use the delay
173 slot to run the sethi instruction from the first PLT slot. These
174 instructions are generated at runtime.
176 dlopen will eventually be called and will return to the nop after
177 the sethi. Now we need to call our function to check the
178 shared library address. This is pseudoSigHandler. We must
179 preserve the data returned by dlopen so we do a save to
180 push the register onto the stack before we call our function.
181 We call our function, and then do a restore to retreive the
184 At __dyninst_jump_template__done__ we want to restore the
185 value in register_o7 to %o7 so when we do a retl we will
186 jump back to where the mutated binary originally called
188 The sethi and ld instructions are generated on the fly just
189 as the first sethi and st pair that saved the value of %o7.
190 The retl used %o7 to jump back to the original place
191 the mutatee called dlopen. We are done. Whew.
193 Note that I know the address of the first PLT slot because
194 the mutator found it and saved it in the mutated binary.
195 The address of the second PLT slot is determined by looking
196 at the instructions in the first PLT slot.
201 asm("__dyninst_jump_template__:");
203 asm("nop"); /*sethi hi(register_o7), %g1 GENERATED BELOW*/
204 asm("nop"); /*st %o7 GENERATED BELOW*/
205 asm("nop"); /*call plt GENERATED BELOW*/
206 asm("nop"); /*sethi r1, b4 GENERATED BELOW*/
209 asm("save %sp, -104, %sp");
216 asm("__dyninst_jump_template__done__:");
217 asm("nop"); /*sethi hi(register_o7), %g1 GENERATED BELOW*/
218 asm("nop"); /* ld [register_o7], %o7 GENERATED BELOW*/
221 asm("nop"); /* this will be filled in below */
228 #if defined(i386_unknown_linux2_0)
229 unsigned int loadAddr;
230 void dyninst_dl_debug_state(){
232 if(_r_debug.r_state == 1){
237 loadAddr = checkSOLoadAddr(map->l_name, map->l_addr);
239 fixInstrumentation(map->l_name, map->l_addr, loadAddr);
245 /* the following call is used to call
246 * _dl_debug_state to ensure correctness (if
247 * someone relies on it being called it is
248 * execuated after this function)
249 * The value stored in dl_debug_state_addr is
250 * the address of the function _dl_debug_state
251 * and is set in checkElfFile
256 asm("call *dl_debug_state_addr");
261 void hack_ld_linux_plt(unsigned int pltEntryAddr){
263 * save the world needs to check each shared library
264 * that is loaded to ensure that it is loaded at the
265 * same base address it was loaded at when the mutator/mutatee
267 * So, we know dlopen calls _dl_debug_state per the r_debug
268 * interface to let the process know a shared library has changed
270 * with this function we change the Procedure Linkage Table (.plt)
271 * for ld-linux.so so that the entry that used to point to
272 * _dl_debug_state points to dyninst_dl_debug_state.
274 * dyninst_dl_debug_state then calls _dl_debug_state before
277 * dont try this at home
279 unsigned int mprotectAddr = pltEntryAddr - (pltEntryAddr % getpagesize());
280 unsigned int newTarget = (unsigned int) &dyninst_dl_debug_state ;
282 mprotect( (void*) mprotectAddr, pltEntryAddr - mprotectAddr + 4,
283 PROT_READ|PROT_WRITE|PROT_EXEC);
285 memcpy( (void*) &dl_debug_state_addr, (void*) pltEntryAddr, 4);
287 memcpy( (void*) pltEntryAddr, &newTarget, 4);
292 int checkSO(char* soName){
301 if((fd = (int) open(soName, O_RDONLY)) == -1){
302 RTprintf("cannot open : %s\n",soName);
306 if((elf = elf_begin(fd, ELF_C_READ, NULL)) ==NULL){
307 RTprintf("%s %s \n",soName, elf_errmsg(elf_errno()));
308 RTprintf("cannot elf_begin\n");
314 ehdr = elf32_getehdr(elf);
315 scn = elf_getscn(elf, ehdr->e_shstrndx);
316 strData = elf_getdata(scn,NULL);
317 for( scn = NULL; !result && (scn = elf_nextscn(elf, scn)); ){
318 shdr = elf32_getshdr(scn);
319 if(!strcmp((char *)strData->d_buf + shdr->sh_name, ".dyninst_mutated")) {
336 Elf_Data *elfData,*strData;
339 int retVal = 0, result;
340 unsigned int mmapAddr;
345 unsigned int updateAddress, updateSize, updateOffset;
346 unsigned int *dataPtr;
347 unsigned int numberUpdates,i ;
351 int sawFirstHeapTrampSection = 0;
352 elf_version(EV_CURRENT);
354 #if defined(sparc_sun_solaris2_4)
355 sprintf(execStr,"/proc/%d/object/a.out",getpid());
356 #elif defined(i386_unknown_linux2_0)
357 sprintf(execStr,"/proc/%d/exe",getpid());
360 if((fd = (int) open(execStr, O_RDONLY)) == -1){
361 printf("cannot open : %s\n",execStr);
365 if((elf = elf_begin(fd, ELF_C_READ, NULL)) ==NULL){
366 printf("%s %s \n",execStr, elf_errmsg(elf_errno()));
367 printf("cannot elf_begin\n");
373 ehdr = elf32_getehdr(elf);
374 scn = elf_getscn(elf, ehdr->e_shstrndx);
375 strData = elf_getdata(scn,NULL);
376 pageSize = getpagesize();
377 for(cnt = 0, scn = NULL; !soError && (scn = elf_nextscn(elf, scn));cnt++){
378 shdr = elf32_getshdr(scn);
379 if(!strncmp((char *)strData->d_buf + shdr->sh_name, "dyninstAPI_data", 15)) {
380 elfData = elf_getdata(scn, NULL);
381 tmpPtr = elfData->d_buf;
383 while( dataAddress != 0 ) {
384 dataSize = *(int*) tmpPtr;
386 dataAddress = *(Address*) tmpPtr;
387 tmpPtr += sizeof(Address);
389 memcpy((char*) dataAddress, tmpPtr, dataSize);
394 }else if(!strncmp((char *)strData->d_buf + shdr->sh_name, "dyninstAPI_",11)){
396 #if defined(sparc_sun_solaris2_4)
397 /* solaris does not make _r_debug available by
398 default, we have to find it in the
401 __Elf32_Dyn *_dyn = (__Elf32_Dyn*)& _DYNAMIC;
403 while(_dyn && _dyn->d_tag != 0 && _dyn->d_tag != 21){
406 if(_dyn && _dyn->d_tag != 0){
407 _r_debug = *(struct r_debug*) _dyn->d_un.d_ptr;
409 map = _r_debug.r_map;
412 retVal = 1; /* this is a restored run */
413 tmpStr = strchr((char *)strData->d_buf + shdr->sh_name,'_');
415 if( *tmpStr>=0x30 && *tmpStr <= 0x39 ) {
416 /* this is a heap tramp section */
417 if( sawFirstHeapTrampSection ){
418 result = mmap((void*) shdr->sh_addr, shdr->sh_size,
419 PROT_READ|PROT_WRITE|PROT_EXEC,
420 MAP_FIXED|MAP_PRIVATE,fd,shdr->sh_offset);
422 elfData = elf_getdata(scn, NULL);
423 memcpy((void*)shdr->sh_addr, elfData->d_buf, shdr->sh_size);
424 sawFirstHeapTrampSection = 1;
428 if(!strcmp((char *)strData->d_buf + shdr->sh_name, "dyninstAPI_mutatedSO")){
429 /* make sure the mutated SOs are loaded, not the original ones */
432 __Elf32_Dyn *_dyn = (__Elf32_Dyn*)& _DYNAMIC;
433 #if defined(sparc_sun_solaris2_4)
435 #elif defined(i386_unknown_linux2_0)
436 struct link_map *lmap=0;
438 char *loadedname, *dyninstname;
440 elfData = elf_getdata(scn, NULL);
442 sharedLibraryInfo = (char*) malloc(elfData->d_size);
443 memcpy(sharedLibraryInfo, elfData->d_buf, elfData->d_size);
444 lmap = _r_debug.r_map;
446 for(soNames = (char*) elfData->d_buf ; totallen<elfData->d_size;
447 soNames = &((char*) elfData->d_buf)[strlen(soNames)+1+sizeof(unsigned int)] ){
448 totallen += strlen(soNames) + 1 + sizeof(unsigned int);
449 lmap = _r_debug.r_map;
451 loadedname = strrchr(lmap->l_name,'/');
452 dyninstname = strrchr(soNames,'/');
454 loadedname = lmap->l_name;
456 if(dyninstname == 0){
457 dyninstname = soNames;
459 if(!strcmp(loadedname, dyninstname)) {
460 if(!checkSO(lmap->l_name)){
461 printf("ERROR: %s was mutated during saveworld and",lmap->l_name);
462 printf(" the currently loaded %s has not been mutated\n", lmap->l_name);
463 printf(" check your LD path to be sure the mutated %s is visible\n", soNames);
474 if(!strcmp((char *)strData->d_buf + shdr->sh_name, "dyninstAPI_SharedLibraries")){
475 unsigned int diffAddr;
476 unsigned int ld_linuxBaseAddr, baseAddr, size;
477 #if defined(sparc_sun_solaris2_4)
478 unsigned int *overWriteInsn;
479 unsigned int *pltEntry, *PLTEntry, *dyninst_jump_templatePtr, pltInsn;
480 unsigned int BA_MASK = 0x003fffff;
481 unsigned int offset, callInsn;
482 struct sigaction mysigact, oldsigact;
485 int foundLib = 0, result;
487 elfData = elf_getdata(scn, NULL);
489 ptr = elfData->d_buf;
491 map = _r_debug.r_map;
495 diffAddr = checkSOLoadAddr(map->l_name, map->l_addr);
497 fixInstrumentation(map->l_name, map->l_addr, diffAddr);
499 #if defined(i386_unknown_linux2_0)
500 if(strstr(map->l_name, "ld-linux.so")){
501 ld_linuxBaseAddr =map->l_addr;
505 /* check every loaded SO but leave map such that map->l_next == NULL.
506 The next time a SO is loaded it will be placed at
507 map->l_next, so keep a tail pointer such that we
508 dont need to loop through the entire list again
516 if( shdr->sh_addr != 0){
517 /* if the addr is zero, then there is
518 no PLT entry for dlopen. if there is
519 no entry for dlopen the mutatee must not
520 call it. -- what about calling it from
521 a shared lib that is statically loaded?
524 /* WHY IS THERE A POUND DEFINE HERE?
526 well, we need to intercept the dlopen calls from the mutated binary
527 because our trampolines expect the shared libraries to be in
528 a particular location and if they are not where they are expected
529 our trampolines can jump off into nothingness, or even worse, some
530 random bit of executable code.
532 So we must intercept the dlopen call and then check to be sure
533 the shared libraries are loaded in the same place as before. If
534 they are not we exit with a message to the user saying this is
537 Note, only shared libraries that have been instrumented are checked
541 #if defined(sparc_sun_solaris2_4)
543 For a description of what is going on here read
544 the comment in dyninst_jump_template above.
546 This code generated all the instructions refered
547 to in that comment as "GENERATED BELOW".
550 pltEntry = (unsigned int*) shdr->sh_addr;
551 pltInsn = *pltEntry; /* save insn for later */
553 offset = (*pltEntry) & BA_MASK;
554 if(offset & 0x00200000){
555 /* negative so sign extend */
556 offset = 0xffc00000 | offset;
560 PLTEntry += (offset*4)/sizeof(PLTEntry); /* move PLTEntry offset*4 bytes!*/
561 dyninst_jump_templatePtr = (unsigned int*) & __dyninst_jump_template__;
563 baseAddr = ((unsigned int) dyninst_jump_templatePtr) -
564 ( ((unsigned int) dyninst_jump_templatePtr)% getpagesize());
565 size = (unsigned int) dyninst_jump_templatePtr - baseAddr + 80;
566 result = mprotect(baseAddr , size, PROT_READ|PROT_WRITE|PROT_EXEC);
568 /* build sethi hi(register_o7), %g1 */
569 *dyninst_jump_templatePtr = 0x03000000;
570 *dyninst_jump_templatePtr |= ( (((unsigned int ) & register_o7)& 0xffffe000) >> 10);
572 dyninst_jump_templatePtr ++;
574 /* build st %o7, ®ister_o7 */
575 *dyninst_jump_templatePtr = 0xde206000;
576 *dyninst_jump_templatePtr |= ( ((unsigned int ) & register_o7) & 0x00001fff );
578 dyninst_jump_templatePtr ++;
580 /* build call PLTEntry */
581 *dyninst_jump_templatePtr = 0x40000000;
582 *dyninst_jump_templatePtr |= ( ((unsigned int) (PLTEntry)- ((unsigned int) dyninst_jump_templatePtr)) >>2);
584 dyninst_jump_templatePtr ++;
587 *dyninst_jump_templatePtr = pltInsn;
588 dyninst_jump_templatePtr ++;
591 /* advance past call to pseudoSigHandler */
592 dyninst_jump_templatePtr = (unsigned int) &__dyninst_jump_template__done__ ;
594 /* build sethi hi(register_o7), %g1 */
595 *dyninst_jump_templatePtr = 0x03000000;
596 *dyninst_jump_templatePtr |= ( (((unsigned int ) & register_o7)& 0xffffe000) >> 10);
598 dyninst_jump_templatePtr ++;
600 /* build ld %o7, register_o7 */
601 *dyninst_jump_templatePtr = 0xde006000;
602 *dyninst_jump_templatePtr |= ( ((unsigned int ) & register_o7) & 0x00001fff );
605 /* THIS ENABLES THE JUMP */
606 /* edit plt to jump to __dyninst_jump_template__ */
607 baseAddr = ((unsigned int) pltEntry) -
608 ( ((unsigned int) pltEntry)% getpagesize());
609 size = (unsigned int) pltEntry - baseAddr + 8;
610 result = mprotect(baseAddr , size, PROT_READ|PROT_WRITE|PROT_EXEC);
612 /* build sethi hi(&__dyninst_jump_template__), %g1 */
615 *pltEntry = 0x03000000;
616 *pltEntry |= ( (((unsigned int ) &__dyninst_jump_template__) )>> 10);
619 /* build jmpl %g1, %r0 */
620 *pltEntry = 0x81c06000;
621 *pltEntry |= ( ((unsigned int ) &__dyninst_jump_template__ ) & 0x00003ff );
623 #elif defined(i386_unknown_linux2_0)
624 /* install jump to catch call to _dl_debug_state */
625 /* see comment int hack_ld_linux_plt for explainations */
626 hack_ld_linux_plt(ld_linuxBaseAddr + shdr->sh_addr);
628 }/* shdr->sh_addr != 0 */
630 if(!strncmp((char *)strData->d_buf + shdr->sh_name, "dyninstAPIhighmem_",18)){
631 /*the layout of dyninstAPIhighmem_ is:
640 we must ONLY overwrite the updates, the other
641 areas of the page may be important (and different than
642 the saved data in the file. we first copy out the
643 page, the apply the updates to it, and then
648 retVal = 1; /* just to be sure */
649 elfData = elf_getdata(scn, NULL);
650 numberUpdates = (unsigned int) ( ((unsigned int*) elfData->d_buf)[
651 (elfData->d_size - sizeof(unsigned int))/ sizeof(unsigned int) ]);
652 oldPageDataSize = shdr->sh_size-((2*numberUpdates+1)*
653 sizeof(unsigned int)) ;
654 oldPageData = (char*) malloc(oldPageDataSize);
655 /*copy old page data */
657 /* probe memory to see if we own it */
658 checkAddr = dladdr((void*)shdr->sh_addr, &dlip);
660 updateSize = shdr->sh_size-((2*numberUpdates+1)* sizeof(unsigned int));
663 /* we dont own it,mmap it!*/
665 mmapAddr = shdr->sh_offset;
666 mmapAddr =(unsigned int) mmap((void*) shdr->sh_addr,oldPageDataSize,
667 PROT_READ|PROT_WRITE|PROT_EXEC,MAP_FIXED|MAP_PRIVATE,fd,mmapAddr);
669 /*we own it, finish the memcpy */
670 mmapAddr = memcpy(oldPageData, (void*) shdr->sh_addr, updateSize);
674 dataPtr =(unsigned int*) &(((char*) elfData->d_buf)[oldPageDataSize]);
676 for(i = 0; i< numberUpdates; i++){
677 updateAddress = *dataPtr;
678 updateSize = *(++dataPtr);
680 updateOffset = updateAddress - shdr->sh_addr;
682 memcpy(&( oldPageData[updateOffset]),
683 &(((char*)elfData->d_buf)[updateOffset]) , updateSize);
687 mmapAddr = shdr->sh_offset ;
688 mmapAddr =(unsigned int) mmap((void*) shdr->sh_addr,oldPageDataSize,
689 PROT_READ|PROT_WRITE|PROT_EXEC, MAP_FIXED| MAP_PRIVATE,fd,mmapAddr);
692 memcpy((void*) shdr->sh_addr, oldPageData,oldPageDataSize );
696 if(!strcmp((char *)strData->d_buf + shdr->sh_name, "dyninstAPI_loadLib")){
697 /* ccw 14 may 2002 */
698 /* this section loads shared libraries into the mutated binary
699 that were loaded by BPatch_thread::loadLibrary */
701 elfData = elf_getdata(scn, NULL);
702 tmpPtr = elfData->d_buf;
704 dlopen(tmpPtr, RTLD_NOW);
705 tmpPtr += (strlen(tmpPtr) +1);
725 /* with solaris, the mutatee has a jump from
726 * main() to a trampoline that calls DYNINSTinit() the
727 * trampoline resides in the area that was previously
728 * the heap, this trampoline is loaded as part of the
730 * UPDATE: now the heap tramps are not loaded by the loader
731 * but rather this file so _init is necessary
732 * UPDATE: gcc handles the _init fine, but
733 * cc chokes on it. There seems to be no compiler
734 * independent way to have my code called correctly
735 * at load time so i have defined _NATIVESO_ in
736 * the sparc Makefile for cc only. The #pragma
737 * forces the my_init function to be called
738 * correctly upon load of the library.
739 * Building with gcc is the same as before.
740 * THIS NEEDS TO BE FIXED
742 * with linux the trampolines are ALL in the big
743 * array at the top of this file and so are not loaded
744 * by the loader as part of the data segment. this
745 * needs to be called to map in everything before
746 * main() jumps to the big array
749 #if defined(_NATIVESO_)
751 #pragma init (my_init)
758 /* this buffer is allocated to clear
759 the first page on the heap. This is necessary
760 because loading the heap tramps uses mmap, which
761 is going to eat the heap if the heap begins on
762 the same page the heap tramps end on (almost certain)
764 buffer = (char*) malloc(getpagesize());
765 isElfFile =checkElfFile();