Fixed incorrect calculation of function sizes
[dyninst.git] / pdutil / src / Object-nt.C
1 /*
2  * Copyright (c) 1996-1999 Barton P. Miller
3  * 
4  * We provide the Paradyn Parallel Performance Tools (below
5  * described as Paradyn") on an AS IS basis, and do not warrant its
6  * validity or performance.  We reserve the right to update, modify,
7  * or discontinue this software at any time.  We shall have no
8  * obligation to supply such updates or modifications or any other
9  * form of support to you.
10  * 
11  * This license is for research uses.  For such uses, there is no
12  * charge. We define "research use" to mean you may freely use it
13  * inside your organization for whatever purposes you see fit. But you
14  * may not re-distribute Paradyn or parts of Paradyn, in any form
15  * source or binary (including derivatives), electronic or otherwise,
16  * to any other organization or entity without our permission.
17  * 
18  * (for other uses, please contact us at paradyn@cs.wisc.edu)
19  * 
20  * All warranties, including without limitation, any warranty of
21  * merchantability or fitness for a particular purpose, are hereby
22  * excluded.
23  * 
24  * By your use of Paradyn, you understand and agree that we (or any
25  * other person or entity with proprietary rights in Paradyn) are
26  * under no obligation to provide either maintenance services,
27  * update services, notices of latent defects, or correction of
28  * defects for Paradyn.
29  * 
30  * Even if advised of the possibility of such damages, under no
31  * circumstances shall we (or any other person or entity with
32  * proprietary rights in the software licensed hereunder) be liable
33  * to you or any third party for direct, indirect, or consequential
34  * damages of any character regardless of type of action, including,
35  * without limitation, loss of profits, loss of use, loss of good
36  * will, or computer failure or malfunction.  You agree to indemnify
37  * us (and any other person or entity with proprietary rights in the
38  * software licensed hereunder) for any and all liability it may
39  * incur to third parties resulting from your use of Paradyn.
40  */
41
42 // $Id: Object-nt.C,v 1.2 1999/07/08 19:26:22 pcroth Exp $
43 #include <iostream.h>
44 #include <iomanip.h>
45 #include <limits.h>
46
47 #include "util/h/String.h"
48 #include "util/h/Vector.h"
49 #include "util/h/CodeView.h"
50 #include "util/h/Object.h"
51 #include "util/h/Object-nt.h"
52
53
54
55 //---------------------------------------------------------------------------
56 // prototypes of functions used in this file
57 //---------------------------------------------------------------------------
58 int sym_offset_compare( const void *x, const void *y );
59
60
61
62
63 //---------------------------------------------------------------------------
64 // Object method implementation
65 //---------------------------------------------------------------------------
66
67
68 Object::~Object( void )
69 {
70         if( pDebugInfo != NULL )
71         {
72                 UnmapDebugInformation( pDebugInfo );
73                 pDebugInfo = NULL;
74         }
75 }
76
77
78
79 void
80 Object::ParseDebugInfo( void )
81 {
82         IMAGE_DEBUG_INFORMATION* pDebugInfo = NULL;
83         
84         // access the module's debug information
85         pDebugInfo = MapDebugInformation(NULL, (LPTSTR)file_.string_of(), NULL, 0);
86         if( pDebugInfo != NULL )
87         {
88                 // ensure that the base address is valid
89                 if( baseAddr == NULL )
90                 {
91                         // use the image base address from the disk image
92                         // (this should only happen for EXEs; we should have
93                         // the in-core base address of DLLs.)
94                         // 
95                         // TODO: we should be able to use the in-core address
96                         // for EXEs as well
97                         baseAddr = pDebugInfo->ImageBase;
98                 }
99                 assert( baseAddr != NULL );
100
101                 // determine the location of the relevant sections
102                 ParseSectionMap( pDebugInfo );
103
104                 //
105                 // parse the symbols, if available
106                 // (note that we prefer CodeView over COFF)
107                 //
108                 if( pDebugInfo->CodeViewSymbols != NULL )
109                 {
110                         // we have CodeView debug information
111                         ParseCodeViewSymbols( pDebugInfo );
112                 }
113                 else if( pDebugInfo->CoffSymbols != NULL )
114                 {
115                         // we have COFF debug information
116                         ParseCOFFSymbols( pDebugInfo );
117                 }
118                 else
119                 {
120             // TODO - what to do when there's no debug information?
121                 }
122         }
123         else
124         {
125                 // indicate the failure to access the debug information
126         log_perror(err_func_, "MapDebugInformation");
127         }
128 }
129
130
131 void
132 Object::ParseSectionMap( IMAGE_DEBUG_INFORMATION* pDebugInfo )
133 {
134         DWORD i;
135
136         // currently we care only about the .text and .data segments
137         for( i = 0; i < pDebugInfo->NumberOfSections; i++ )
138         {
139                 IMAGE_SECTION_HEADER& section = pDebugInfo->Sections[i];
140
141                 if( strncmp( (const char*)section.Name, ".text", 5 ) == 0 )
142                 {
143                         // note that section numbers are one-based
144                         textSectionId = i + 1;
145
146                         code_ptr_       = (Word*)(((char*)pDebugInfo->MappedBase) +
147                             section.PointerToRawData);
148                         code_off_       = baseAddr + section.VirtualAddress;
149                         code_len_       = section.Misc.VirtualSize / sizeof(Word);
150                 }
151                 else if( strncmp( (const char*)section.Name, ".data", 5 ) == 0 )
152                 {
153                         // note that section numbers are one-based
154                         dataSectionId = i + 1;
155
156                         data_ptr_       = (Word*)(((char*)pDebugInfo->MappedBase) +
157                             section.PointerToRawData);
158                         data_off_       = baseAddr + section.VirtualAddress;
159                         data_len_       = section.Misc.VirtualSize / sizeof(Word);
160                 }
161         }
162 }
163
164
165 bool
166 Object::ParseCodeViewSymbols( IMAGE_DEBUG_INFORMATION* pDebugInfo )
167 {
168         CodeView cv( (const char*)pDebugInfo->CodeViewSymbols, textSectionId );
169     bool ret = true;
170
171         if( cv.Parse() )
172         {
173                 bool isDll = (pDebugInfo->Characteristics & IMAGE_FILE_DLL);
174                 dictionary_hash<string, unsigned int> libDict( string::hash, 19 );
175                 vector<Symbol> allSymbols;
176                 vector<ModInfo> cvMods;         // CodeView's notion of modules
177         vector<PDModInfo> pdMods;       // Paradyn's notion of modules
178                 unsigned int midx;
179                 unsigned int i;
180
181                 //
182                 // build a module map of the .text section
183                 // by creating a list of CodeView modules that contribute to 
184                 // the .text section, sorted by offset
185                 // 
186                 // note that the CodeView modules vector uses one-based indexing
187                 //
188                 const vector<CodeView::Module>& modules = cv.GetModules();
189                 for( midx = 1; midx < modules.size(); midx++ )
190                 {
191                         const CodeView::Module& mod = modules[midx];
192
193                         //
194                         // determine the Paradyn module that this
195                         // module will be associated with...
196                         //
197
198                         // ...first determine the library that contains
199             // this module, if any...
200                         string libName;
201                         if( mod.GetLibraryIndex() != 0 )
202                         {
203                                 libName = cv.GetLibraries()[mod.GetLibraryIndex()];
204                         }
205
206                         // ...next figure out the Paradyn module with which to associate
207             // this CodeView module...
208             unsigned int pdModIdx = UINT_MAX;
209                         if( !isDll && (mod.GetLibraryIndex() != 0) )
210                         {
211                                 // associate symbol with static library
212
213                                 // handle the case where this is the first time we've
214                                 // seen this library
215                                 if( !libDict.defines( libName ) )
216                                 {
217                                         // add a Paradyn module for the library
218                     // offset and size will be patched later
219                     pdMods += PDModInfo( libName, 0, 0 );
220                     pdModIdx = pdMods.size() - 1;
221
222                                         // keep track of where we added the library,
223                                         // so we can patch the location of the library's code later
224                                         libDict[libName] = pdModIdx;
225                                 }
226                 else
227                 {
228                     // look up the index we saved earlier
229                     pdModIdx = libDict[libName];
230                 }
231                         }
232                         else if( !isDll )
233                         {
234                 // add a Paradyn module for the module's source file
235
236                                 DWORD offset;   // offset of code in text section
237                                 DWORD cb;               // size of code in text section
238
239
240                                 // find a source code name to associate with this module
241                                 if( mod.GetTextBounds( offset, cb ) )
242                                 {
243                     pdMods += PDModInfo( mod.GetSourceName(), offset, cb );
244                     pdModIdx = pdMods.size() - 1;
245                                 }
246                                 else
247                                 {
248                                         // the module doesn't contribute to the .text section
249                     // TODO - so do we care about this module?  should
250                     // we be keeping track of contributions
251                     // to the data section?
252                                 }
253                         }
254                         else
255                         {
256                 // module is part of a DLL, so we 
257                 // associate any symbols directly with the DLL
258                 pdMods += PDModInfo( pDebugInfo->ImageFileName, 0,
259                                         code_len_ * sizeof(Word) );
260                 pdModIdx = pdMods.size() - 1;
261                         }
262
263                         // add the module info to our vector for later sorting
264                         // (but only if it contributes code to the .text section)
265             DWORD offText;
266             DWORD cbText;
267                         if( mod.GetTextBounds( offText, cbText ) && (cbText > 0) )
268                         {
269                 assert( pdModIdx != UINT_MAX );
270                                 cvMods += ModInfo( &mod, pdModIdx );
271                         }
272                 }
273
274                 // sort list of modules by offset to give us our CodeView module map
275         cvMods.sort( ModInfo::CompareByOffset );
276
277 #ifdef _DEBUG
278         // dump the CodeView module map
279         cout << "CodeView module .text map:\n";
280         for( midx = 0; midx < cvMods.size(); midx++ )
281         {
282             DWORD offText = 0;
283             DWORD cbText = 0;
284             string name = cvMods[midx].pCVMod->GetName();
285             cvMods[midx].pCVMod->GetTextBounds( offText, cbText );
286
287             cout << hex
288                 << "0x" << setw( 8 ) << setfill( '0' ) << offText
289                 << "-0x" << setw( 8 ) << setfill( '0' ) << offText + cbText - 1
290                 << " (" << setw( 8 ) << setfill( '0' ) << cbText << ")\t"
291                 << name
292                 << dec
293                 << endl;
294         }
295 #endif // _DEBUG
296
297         // compute bounds for Paradyn modules
298         for( midx = 0; midx < cvMods.size(); midx++ )
299         {
300             PDModInfo& pdMod = pdMods[cvMods[midx].pdModIdx];
301             DWORD offTextCV = 0;
302             DWORD cbTextCV = 0;
303
304             // determine the bounds of the CodeView text
305             cvMods[midx].pCVMod->GetTextBounds( offTextCV, cbTextCV );
306
307             if( cbTextCV > 0 )
308             {
309                 // expand the PD module to cover the CV module's bounds
310                 if( pdMod.cbText == 0 )
311                 {
312                     // this is the first CodeView module we've seen
313                     // for this Paradyn module
314                     pdMod.offText = offTextCV;
315                     pdMod.cbText = cbTextCV;
316                 }
317                 else
318                 {
319                     // we have to handle the (potential) expansion of
320                     // the existing bounds
321                     if( offTextCV < pdMod.offText )
322                     {
323                         DWORD oldOffset = pdMod.offText;
324
325                         // reset the base and extend the bound
326                         pdMod.offText = offTextCV;
327                         pdMod.cbText += (oldOffset - offTextCV);
328                     }
329
330                     if((offTextCV + cbTextCV) > (pdMod.offText + pdMod.cbText))
331                     {
332                         // extend the bound
333                         pdMod.cbText += 
334                             ((offTextCV + cbTextCV) -
335                                 (pdMod.offText + pdMod.cbText));
336                     }
337                 }
338             }
339         }
340
341 #ifdef _DEBUG
342         // dump the Paradyn module map
343         cout << "Paradyn module .text map:\n";
344         for( midx = 0; midx < pdMods.size(); midx++ )
345         {
346             DWORD offText = pdMods[midx].offText;
347             DWORD cbText = pdMods[midx].cbText;
348             string name = pdMods[midx].name;
349
350             cout << hex
351                 << "0x" << setw( 8 ) << setfill( '0' ) << offText
352                 << "-0x" << setw( 8 ) << setfill( '0' ) << offText + cbText - 1
353                 << " (" << setw( 8 ) << setfill( '0' ) << cbText << ")\t"
354                 << name
355                 << dec
356                 << endl;
357         }
358 #endif // _DEBUG
359
360         // add entries for our Paradyn modules
361         for( midx = 0; midx < pdMods.size(); midx++ )
362         {
363                     allSymbols += Symbol( pdMods[midx].name,
364                             "",
365                             Symbol::PDST_MODULE,
366                             Symbol::SL_GLOBAL,
367                             code_off_ + pdMods[midx].offText,
368                             false,
369                             pdMods[midx].cbText );
370         }
371
372
373                 //
374                 // now that we have a module map of the .text segment,
375                 // consider the symbols defined by each module
376                 //
377         CVProcessSymbols( cv, cvMods, pdMods, allSymbols );
378
379
380         // our symbols are sorted by offset
381         // so we can patch up any outstanding sizes
382         CVPatchSymbolSizes( allSymbols );
383
384                 // our symbols are finally ready to enter into
385         // the main symbol dictionary
386                 for( i = 0; i < allSymbols.size(); i++ )
387                 {
388                         if(allSymbols[i].name() != "")
389                         {
390                                 symbols_[allSymbols[i].name()] = allSymbols[i];
391                         }
392                 }
393         }
394         else
395         {
396         // indicate failure to parse symbols
397         ret = false;
398         }
399
400     return ret;
401 }
402
403
404
405 void
406 Object::CVPatchSymbolSizes( vector<Symbol>& allSymbols )
407 {
408     Address lastFuncAddr = NULL;
409     unsigned int i;
410
411
412     for( i = 0; i < allSymbols.size(); i++ )
413     {
414         Symbol& sym = allSymbols[i];
415
416         if( (sym.name() != "") && (sym.size() == 0) &&
417             ((sym.type() == Symbol::PDST_FUNCTION) ||
418              (sym.type() == Symbol::PDST_OBJECT)))
419         {
420             // check for function aliases
421             // note that this check depends on the allSymbols
422             // array being sorted so that aliases are considered
423             // after the "real" function symbol
424             bool isAlias = false;
425             if( (sym.type() == Symbol::PDST_FUNCTION) &&
426                 (sym.addr() == lastFuncAddr) &&
427                 (sym.size() == 0) )
428             {
429                 // this function is an alias
430                 // we currently leave their size as zero to indicate 
431                 // that they are uninstrumentable.  Ideally, this will
432                 // change once a mechanism becomes available to identify
433                 // these as function aliases.
434                 isAlias = true;
435             }
436
437
438             if( !isAlias )
439             {
440                 //
441                 // patch the symbol's size
442                 //
443                 // We consider the symbol's size to be the distance
444                 // to the next symbol.  (Sometimes this causes us to
445                 // overestimate, because compilers sometimes leave some
446                 // "padding" between the end of a function and the beginning
447                 // of the next.)
448                 //
449                 // Note that we have to use the next symbol whose
450                 // address is different from the current one, to handle
451                 // cases where aliases are included in the symbol table
452                 //
453                 unsigned int cb;
454
455                 //
456                 // find next function or object symbol in our section with
457                 // an address different from ours
458                 //
459                 // the while test looks complicated -
460                 // all we're trying to do is skip to the next
461                 // function or object symbol within the array whose
462                 // address is not the same as allSymbols[i].
463                 unsigned int j = i + 1;
464                 while( (j < allSymbols.size()) &&
465                        ( ((allSymbols[j].type() != Symbol::PDST_FUNCTION) &&
466                           (allSymbols[j].type() != Symbol::PDST_OBJECT)
467                          ) ||
468                          (allSymbols[j].addr() == sym.addr())
469                        )
470                      )
471                 {
472                    j++;
473                 }
474
475                 if( j < allSymbols.size() &&
476                     (allSymbols[j].type() == sym.type()) )
477                 {
478                     // we found a symbol from the same section
479                     // with a different address -
480                     // size is just the delta between symbols
481                     cb = allSymbols[j].addr() - sym.addr();
482                 }
483                 else
484                 {
485                     // we couldn't find another symbol in our section
486                     // with a different address -
487                     // size is the remainder of the current section
488                     if( sym.type() == Symbol::PDST_FUNCTION )
489                     {
490                         // size is remainder of the .text section
491                         cb = (code_off_ + code_len_*sizeof(Word)) - sym.addr();
492                     }
493                     else
494                     {
495                         // size is remainder of the .data section
496                         cb = (data_off_ + data_len_*sizeof(Word)) - sym.addr();
497                     }
498                 }
499                 assert( cb != 0 );
500                 sym.change_size( cb );
501             }
502
503             // update the last known function symbol
504             if( sym.type() == Symbol::PDST_FUNCTION )
505             {
506                 lastFuncAddr = sym.addr();
507             }
508         }
509     }
510 }
511
512
513 void
514 Object::CVProcessSymbols( CodeView& cv, 
515                           vector<Object::ModInfo>& cvMods,
516                           vector<Object::PDModInfo>& pdMods,
517                           vector<Symbol>& allSymbols )
518 {
519     unsigned int midx;
520     unsigned int i;
521
522
523         for( midx = 0; midx < cvMods.size(); midx++ )
524         {
525                 const CodeView::Module& mod = *(cvMods[midx].pCVMod);
526         PDModInfo& pdMod = pdMods[cvMods[midx].pdModIdx];
527                     
528                 // add symbols for each global function defined in the module
529         {
530                         const vector<CodeView::SymRecordProc*>& gprocs =
531                 mod.GetSymbols().GetGlobalFunctions();
532                         for( i = 0; i < gprocs.size(); i++ )
533                         {
534                                 const CodeView::SymRecordProc* proc = gprocs[i];
535
536                                 // build a symbol from the proc information
537                                 LPString lpsName( proc->name );
538                                 string strName = (string)lpsName;
539
540                                 Address addr = code_off_ + proc->offset;
541
542                                 allSymbols += ( Symbol( strName,
543                                         pdMod.name,
544                                         Symbol::PDST_FUNCTION,
545                                         Symbol::SL_GLOBAL,
546                                         addr,
547                                         false,
548                                         proc->procLength ));
549                         }
550         }
551
552                 // add symbols for each local function defined in the module
553         {
554                         const vector<CodeView::SymRecordProc*>& lprocs =
555                    mod.GetSymbols().GetLocalFunctions();
556                         for( i = 0; i < lprocs.size(); i++ )
557                         {
558                                 const CodeView::SymRecordProc* proc = lprocs[i];
559                                 LPString lpsName( proc->name );
560                                 string strName = (string)lpsName;
561
562                                 Address addr = code_off_ + proc->offset;
563
564                                 allSymbols += ( Symbol( strName,
565                                         pdMod.name,
566                                         Symbol::PDST_FUNCTION,
567                                         Symbol::SL_LOCAL,
568                                         addr,
569                                         false,
570                                         proc->procLength ));
571                         }
572         }
573
574                 // handle thunks
575         {
576                         const vector<CodeView::SymRecordThunk*>& thunks =
577                 mod.GetSymbols().GetThunks();
578                         for( i = 0; i < thunks.size(); i++ )
579                         {
580                                 const CodeView::SymRecordThunk* thunk = thunks[i];
581                                 LPString lpsName( thunk->name );
582                                 string strName = (string)lpsName;
583
584                                 Address addr = code_off_ + thunk->offset;
585
586                                 allSymbols += ( Symbol( strName,
587                                         pdMod.name,
588                                         Symbol::PDST_FUNCTION,
589                                         Symbol::SL_GLOBAL,
590                                         addr,
591                                         false,
592                                         thunk->thunkLength ) );
593                         }
594         }
595
596                 // add symbols for each global variable defined in the module
597         {
598                         const vector<CodeView::SymRecordData*>& gvars =
599                 mod.GetSymbols().GetGlobalVariables();
600                         for( i = 0; i < gvars.size(); i++ )
601                         {
602                                 const CodeView::SymRecordData* pVar = gvars[i];
603                                 LPString lpsName( pVar->name );
604                                 string strName = (string)lpsName;
605
606                                 Address addr = data_off_ + pVar->offset;
607
608                                 allSymbols += ( Symbol( strName,
609                                         pdMod.name,
610                                         Symbol::PDST_OBJECT,
611                                         Symbol::SL_GLOBAL,
612                                         addr,
613                                         false,
614                                         0 ));               // will be patched later (?)
615                         }
616         }
617
618         {
619                         const vector<CodeView::SymRecordData*>& lvars =
620                 mod.GetSymbols().GetGlobalVariables();
621                         for( i = 0; i < lvars.size(); i++ )
622                         {
623                                 const CodeView::SymRecordData* pVar = lvars[i];
624                                 LPString lpsName( pVar->name );
625                                 string strName = (string)lpsName;
626
627                                 Address addr = data_off_ + pVar->offset;
628
629                                 allSymbols += ( Symbol( strName,
630                                         pdMod.name,
631                                         Symbol::PDST_OBJECT,
632                                         Symbol::SL_LOCAL,
633                                         addr,
634                                         false,
635                                         0 ));               // will be patched later (?)
636                         }
637         }
638         }
639
640     // once we've handled the symbols that the CodeView object
641     // could discover and associate with a module, we've
642     // got to do something with symbols that were not explicitly
643     // associated with a module in the CodeView information
644
645     // Unfortunately, VC++/DF produce S_PUB32 symbols
646     // for functions in some cases.  (For example,
647     // when building a Digital Fortran program, the
648     // software produces an executable with symbols
649     // from the Fortran runtime libraries as S_PUB32
650     // records.)  We do our best to try to determine
651     // whether the symbol is a function, and if so,
652     // how large it is, which module it belongs to, etc.
653         const vector<CodeView::SymRecordData*>& pubs =
654                                             cv.GetSymbols().GetPublics();
655         for( i = 0; i < pubs.size(); i++ )
656         {
657                 const CodeView::SymRecordData* sym = pubs[i];
658
659                 LPString lpsName( sym->name );
660                 string strName = (string)lpsName;
661
662         // we now have to try to determine the type of the
663         // symbol.  Since we're only given a type and a location,
664         // (and the type might not even be valid) we assume
665         // that any public symbol in the code section is
666         // a function and we try to determine which module it
667         // belongs to based on the module map we constructed earlier
668         if( sym->segment == textSectionId )
669         {
670                         Address addr = code_off_ + sym->offset;
671
672             // save the symbol
673                         allSymbols += Symbol( strName,
674                                 FindModuleByOffset( sym->offset, pdMods ),
675                                 Symbol::PDST_FUNCTION,
676                                 Symbol::SL_GLOBAL,
677                                 addr,
678                                 false,
679                                 0 );              // will be patched later
680         }
681         else if( sym->segment == dataSectionId )
682         {
683                         Address addr = data_off_ + sym->offset;
684
685                         allSymbols += Symbol( strName,
686                                 FindModuleByOffset( sym->offset, pdMods ),
687                                 Symbol::PDST_OBJECT,
688                                 Symbol::SL_GLOBAL,
689                                 addr,
690                                 false,
691                                 0 );              // will be patched later
692         }
693         else
694         {
695             // TODO - the symbol is not in the text or data
696             // sections - do we care about it?
697         }
698         }
699         
700     // sort symbols by offset
701     allSymbols.sort( sym_offset_compare );
702 }
703
704
705 void
706 Object::ParseCOFFSymbols( IMAGE_DEBUG_INFORMATION* pDebugInfo )
707 {
708         IMAGE_COFF_SYMBOLS_HEADER* pHdr = pDebugInfo->CoffSymbols;
709     vector<Symbol>      allSymbols;
710         bool gcc_compiled = false;
711         bool isDll = (pDebugInfo->Characteristics & IMAGE_FILE_DLL);
712         DWORD u, v;
713
714         
715         // find the location of the symbol records and string table
716         IMAGE_SYMBOL* syms = (IMAGE_SYMBOL*)(((char*)pHdr) +
717                             pHdr->LvaToFirstSymbol);
718         char* stringTable = ((char*)syms) +
719                             pHdr->NumberOfSymbols * sizeof( IMAGE_SYMBOL );
720
721
722         // for DLLs, we ignore filename information and associate 
723         // symbols with a module representing the DLL
724         if( isDll )
725         {
726                 allSymbols += Symbol( pDebugInfo->ImageFileName,
727                                                                 "",
728                                                                 Symbol::PDST_MODULE,
729                                                                 Symbol::SL_GLOBAL,
730                                                                 code_off_,
731                                                                 false );
732         }
733
734
735         // parse the COFF records
736         for( v = 0; v < pDebugInfo->CoffSymbols->NumberOfSymbols; v++ )
737         {
738                 string name = FindName( stringTable, syms[v] );
739                 Address sym_addr = NULL;
740
741
742                 //
743                 // handle the various types of COFF records...
744                 //
745
746                 if( name.prefixed_by("_$$$") || name.prefixed_by("$$$") )
747                 {
748                         // the record represents a branch target (?)
749                         // skip it
750                         v += syms[v].NumberOfAuxSymbols;
751                 }
752                 else if( syms[v].StorageClass == IMAGE_SYM_CLASS_FILE )
753                 {
754                         // the record is a file record
755                         //
756                         // note that for DLLs, we associate symbols directly with
757                         // the DLL and ignore any filename information we find
758                         if( !isDll )
759                         {
760                                 // extract the name of the source file
761                                 name = (char*)(&syms[v+1]);
762
763                                 // skip auxiliary records containing the filename
764                                 v += (strlen(name.string_of()) / sizeof(IMAGE_SYMBOL)) + 1;
765
766                                 // find a .text record following the file name
767                                 // if there is one, it contains the starting address for
768                                 // this file's text
769                                 // (note - there may not be one!  If not, we detect this by
770                                 // finding the next .file record or running off the end of
771                                 // the symbol information)
772                                 DWORD tidx = v + 1;
773                                 while( (tidx < pDebugInfo->CoffSymbols->NumberOfSymbols) &&
774                                                 ((syms[tidx].N.Name.Short == 0) ||
775                                                  ((strncmp( (const char*)(&syms[tidx].N.ShortName),
776                                     ".text", 5 ) != 0) &&
777                                                   (syms[tidx].StorageClass == IMAGE_SYM_CLASS_FILE))))
778                                 {
779                                         // advance to next record
780                                         tidx++;
781                                 }
782                                 if( (tidx < pDebugInfo->CoffSymbols->NumberOfSymbols) &&
783                                         (syms[tidx].N.Name.Short != 0) &&
784                                         (strncmp( (const char*)(&syms[tidx].N.ShortName),
785                                 ".text", 5 ) == 0) )
786                                 {
787                                         // this is text record for the recently-seen .file record -
788                                         // extract the starting address for symbols from this file
789                                         sym_addr = baseAddr + syms[tidx].Value;
790                                 }
791                                 else
792                                 {
793                                         // there is not a .text record for the recently-seen .file
794                                         // TODO: is there any way we can
795                     // determine the needed information in this case?
796                                         sym_addr = 0;
797                                 }
798                         
799                                 // make note of the symbol
800                                 allSymbols += Symbol(name,
801                                                                                 "",
802                                                                                 Symbol::PDST_MODULE,
803                                                                                 Symbol::SL_GLOBAL,
804                                                                                 sym_addr,
805                                                                                 false);
806                         }
807                 }
808                 else if( syms[v].StorageClass == IMAGE_SYM_CLASS_LABEL )
809                 {
810                         // the record is a label
811
812                         // check whether the label indicates that the
813                         // module was compiled by gcc
814                         if( (name == "gcc2_compiled.") || (name == "___gnu_compiled_c") )
815                         {
816                                 gcc_compiled = true;
817                         }
818                 }
819                 else if(( (syms[v].StorageClass != IMAGE_SYM_CLASS_TYPE_DEFINITION)
820                                         && ISFCN(syms[v].Type))
821                                 || (gcc_compiled &&
822                                         (name == "__exit" || name == "_exit" || name == "exit")))
823                 {
824                         // the record represents a "type" (including functions)
825                         
826                         // the test for gcc and the exit variants is a kludge
827                         // to work around our difficulties in parsing the CygWin32 DLL
828                         sym_addr = (gcc_compiled ?
829                         syms[v].Value :
830                         baseAddr + syms[v].Value);
831
832                         if( syms[v].StorageClass == IMAGE_SYM_CLASS_EXTERNAL )
833                         {
834                                 allSymbols += Symbol(name,
835                                     "DEFAULT_MODULE",
836                                     Symbol::PDST_FUNCTION,
837                                                     Symbol::SL_GLOBAL,
838                                     sym_addr,
839                                     false);
840                         }
841                         else
842                         {
843                                 allSymbols += Symbol(name,
844                                     "DEFAULT_MODULE",
845                                     Symbol::PDST_FUNCTION,
846                                                 Symbol::SL_LOCAL,
847                                     sym_addr,
848                                     false);
849                         }
850
851                         // skip any auxiliary records with the function
852                         v += syms[v].NumberOfAuxSymbols;
853                 }
854                 else if( syms[v].SectionNumber > 0 )
855                 {
856                         // the record represents a variable (?)
857
858                         // determine the address to associate with the symbol
859                         sym_addr = (gcc_compiled ?
860                         syms[v].Value :
861                         baseAddr + syms[v].Value );
862
863                         if( name != ".text" )
864                         {
865                                 if (syms[v].StorageClass == IMAGE_SYM_CLASS_EXTERNAL)
866                                 {
867                                         allSymbols += Symbol(name,
868                                         "DEFAULT_MODULE",
869                                         Symbol::PDST_OBJECT,
870                                                                                 Symbol::SL_GLOBAL,
871                                                                                 sym_addr,
872                                         false);
873                                 }
874                                 else
875                                 {
876                                         allSymbols += Symbol(name,
877                                         "DEFAULT_MODULE",
878                                         Symbol::PDST_OBJECT,
879                                                                                 Symbol::SL_LOCAL,
880                                                                                 sym_addr,
881                                         false);
882                                 }
883                         }
884                         else
885                         {
886                                 // we processed the .text record when we saw
887                                 // its corresponding .file record - skip it
888                         }
889
890                         // skip any auxiliary records
891                         v += syms[v].NumberOfAuxSymbols;
892                 }
893                 else
894                 {
895                         // the record is of a type that we don't care about
896                         // skip it and all of its auxiliary records
897                         v += syms[v].NumberOfAuxSymbols;
898                 }
899
900
901         }
902
903         //
904         // now that we've seen all the symbols,
905         // we need to post-process them into something usable
906         //
907
908         // add an extra symbol to mark the end of the text segment
909         allSymbols += Symbol("",
910                                         "DEFAULT_MODULE",
911                                         Symbol::PDST_OBJECT,
912                                         Symbol::SL_GLOBAL, 
913                                         code_off_ + code_len_ * sizeof(Word),
914                                         false);
915
916     // Sort the symbols on address to find the function boundaries
917     allSymbols.sort(sym_offset_compare);
918
919         // find the function boundaries
920         for( u = 0; u < allSymbols.size(); u++ )
921         {
922                 unsigned int size = 0;
923                 if( allSymbols[u].type() == Symbol::PDST_FUNCTION )
924                 {
925                         // find the function boundary
926                         v = u+1;
927                         while(v < allSymbols.size())
928                         {
929                                 // The .ef below is a special symbol that gcc puts in to
930                                 // mark the end of a function.
931                                 if(allSymbols[v].addr() != allSymbols[u].addr() &&
932                                         (allSymbols[v].type() == Symbol::PDST_FUNCTION ||
933                                         allSymbols[v].name() == ".ef"))
934                                 {
935                                         break;
936                                 }
937                                 v++;
938                         }
939                         if(v < allSymbols.size())
940                         {
941                                 size = (unsigned)allSymbols[v].addr() 
942                                                 - (unsigned)allSymbols[u].addr();
943                         }
944                         else
945                         {
946                                 size = (unsigned)(code_off_ + code_len_*sizeof(Word))
947                                                  - (unsigned)allSymbols[u].addr();
948                         }
949                 }
950
951                 // save the information about this symbol
952                 if(allSymbols[u].name() != "")
953                 {
954                         symbols_[allSymbols[u].name()] =
955                                 Symbol(allSymbols[u].name(), 
956                                         isDll ? allSymbols[u].module() : "DEFAULT_MODULE", 
957                                         allSymbols[u].type(), allSymbols[u].linkage(),
958                                         allSymbols[u].addr(), allSymbols[u].kludge(),
959                                         size);
960                 }
961         }
962 }
963
964
965
966
967 string
968 Object::FindName( const char* stringTable, const IMAGE_SYMBOL& sym )
969 {
970         string name;
971
972         if (sym.N.Name.Short != 0) {
973                 char sname[9];
974                 strncpy(sname, (char *)(&sym.N.ShortName), 8);
975                 sname[8] = 0;
976                 name = sname;
977         } else {
978                 name = stringTable + sym.N.Name.Long;
979         }
980
981         return name;
982 }
983
984
985 // compare function for vector sort of
986 // CodeView modules
987 int
988 Object::ModInfo::CompareByOffset( const void* x, const void* y )
989 {
990     const ModInfo* px = (const ModInfo*)x;
991         const ModInfo* py = (const ModInfo*)y;
992         assert( (px != NULL) && (px->pCVMod != NULL) );
993         assert( (py != NULL) && (py->pCVMod != NULL) );
994
995         // access the offset for each module
996         DWORD offTextx = 0;
997         DWORD cbTextx = 0;
998         DWORD offTexty = 0;
999         DWORD cbTexty = 0;
1000         px->pCVMod->GetTextBounds( offTextx, cbTextx );
1001         py->pCVMod->GetTextBounds( offTexty, cbTexty );
1002
1003         int ret = 0;
1004         if( offTextx > offTexty )
1005         {
1006                 ret = 1;
1007         }
1008         else if( offTextx < offTexty )
1009         {
1010                 ret = -1;
1011         }
1012         else
1013         {
1014                 // the offsets are equal - try our next comparison criteria
1015                 if( (cbTextx != 0) && (cbTexty == 0) )
1016                 {
1017                         ret = 1;
1018                 }
1019                 else if( (cbTextx == 0) && (cbTexty != 0) )
1020                 {
1021                         ret = -1;
1022                 }
1023         }
1024         
1025         return ret;
1026 }
1027
1028
1029 int
1030 sym_offset_compare( const void *x, const void *y )
1031 {
1032     const Symbol *s1 = (const Symbol *)x;
1033     const Symbol *s2 = (const Symbol *)y;
1034     int ret = 0;
1035
1036
1037     // first try comparing by address
1038     if( s1->addr() < s2->addr() ) 
1039     {
1040         ret = -1;
1041     }
1042     else if( s1->addr() > s2->addr() )
1043     {
1044         ret = 1;
1045     }
1046     else
1047     {
1048         // the two symbols have the same address
1049         // use our next criteria (the existence of a size)
1050         // for a given address, we want symbols with a size
1051         // to occur before those without so that we can
1052         // use the size if we wish
1053         if( (s1->size() != 0) && (s2->size() == 0) )
1054         {
1055             ret = -1;
1056         }
1057         else if( (s1->size() == 0) && (s2->size() != 0) )
1058         {
1059             ret = 1;
1060         }
1061     }
1062     return ret;
1063 }
1064
1065
1066
1067 // FindModuleByOffset
1068 // Determines the Paradyn module name
1069 // based on the given offset into the .text section
1070 string
1071 Object::FindModuleByOffset( unsigned int offset,
1072                              const vector<Object::PDModInfo>& pdMods )
1073 {
1074     string retval = "";
1075     unsigned int i;
1076
1077     // we do simple linear search on Paradyn modules
1078     for( i = 0; i < pdMods.size(); i++ )
1079     {
1080         const PDModInfo& pdMod = pdMods[i];
1081
1082         if( (offset >= pdMod.offText) &&
1083             (offset < pdMod.offText + pdMod.cbText) )
1084         {
1085             retval = pdMod.name;
1086             break;
1087         }
1088     }
1089
1090     return retval;
1091 }
1092