Added support for using CodeView NB11 symbols in addition to COFF
[dyninst.git] / pdutil / src / Object-nt.C
1 /*
2  * Copyright (c) 1996-1999 Barton P. Miller
3  * 
4  * We provide the Paradyn Parallel Performance Tools (below
5  * described as Paradyn") on an AS IS basis, and do not warrant its
6  * validity or performance.  We reserve the right to update, modify,
7  * or discontinue this software at any time.  We shall have no
8  * obligation to supply such updates or modifications or any other
9  * form of support to you.
10  * 
11  * This license is for research uses.  For such uses, there is no
12  * charge. We define "research use" to mean you may freely use it
13  * inside your organization for whatever purposes you see fit. But you
14  * may not re-distribute Paradyn or parts of Paradyn, in any form
15  * source or binary (including derivatives), electronic or otherwise,
16  * to any other organization or entity without our permission.
17  * 
18  * (for other uses, please contact us at paradyn@cs.wisc.edu)
19  * 
20  * All warranties, including without limitation, any warranty of
21  * merchantability or fitness for a particular purpose, are hereby
22  * excluded.
23  * 
24  * By your use of Paradyn, you understand and agree that we (or any
25  * other person or entity with proprietary rights in Paradyn) are
26  * under no obligation to provide either maintenance services,
27  * update services, notices of latent defects, or correction of
28  * defects for Paradyn.
29  * 
30  * Even if advised of the possibility of such damages, under no
31  * circumstances shall we (or any other person or entity with
32  * proprietary rights in the software licensed hereunder) be liable
33  * to you or any third party for direct, indirect, or consequential
34  * damages of any character regardless of type of action, including,
35  * without limitation, loss of profits, loss of use, loss of good
36  * will, or computer failure or malfunction.  You agree to indemnify
37  * us (and any other person or entity with proprietary rights in the
38  * software licensed hereunder) for any and all liability it may
39  * incur to third parties resulting from your use of Paradyn.
40  */
41
42 // $Id: Object-nt.C,v 1.1 1999/06/17 18:35:27 pcroth Exp $
43 #include <iostream.h>
44 #include <iomanip.h>
45 #include <limits.h>
46
47 #include "util/h/Object.h"
48 #include "util/h/Object-nt.h"
49
50 #include "util/h/String.h"
51 #include "util/h/Vector.h"
52 #include "util/h/CodeView.h"
53
54
55 //---------------------------------------------------------------------------
56 // structures used only in this file
57 //---------------------------------------------------------------------------
58 struct PDModInfo
59 {
60     string          name;       // name of Paradyn module
61     unsigned int    offText;    // offset of module's code in text segment
62     unsigned int    cbText;     // size of module's code in text segment
63
64     PDModInfo( void )
65       : name( "" ),
66         offText( 0 ),
67         cbText( 0 )
68     {}
69
70     PDModInfo( string modName, unsigned int offset, unsigned int cb )
71       : name( modName ),
72         offText( offset ),
73         cbText( cb )
74     {}
75
76     PDModInfo& operator=( const PDModInfo& mi )
77     {
78         if( &mi != this )
79         {
80             name = mi.name;
81             offText = mi.offText;
82             cbText = mi.cbText;
83         }
84         return *this;
85     }
86 };
87
88 struct ModInfo
89 {
90         const CodeView::Module* pCVMod;     // CodeView information
91     unsigned int pdModIdx;              // index of Paradyn module
92                                         // this module is associated with
93                                         // in the pdMods array
94
95         ModInfo( const CodeView::Module* pCVModule = NULL,
96              unsigned int pdModIndex = 0 )
97           : pCVMod( pCVModule ),
98                 pdModIdx( pdModIndex )
99     {}
100
101     ModInfo& operator=( const ModInfo& mi )
102     {
103         if( &mi != this )
104         {
105             pCVMod = mi.pCVMod;
106             pdModIdx = mi.pdModIdx;
107         }
108         return *this;
109     }
110 };
111
112
113 //---------------------------------------------------------------------------
114 // prototypes of functions used in this file
115 //---------------------------------------------------------------------------
116 int     mod_offset_compare( const void* x, const void* y );
117 static  string  FindModuleByOffset( unsigned int offset,
118                                     const vector<PDModInfo>& pdMods );
119
120
121
122
123 //---------------------------------------------------------------------------
124 // Object method implementation
125 //---------------------------------------------------------------------------
126
127
128 Object::~Object( void )
129 {
130         if( pDebugInfo != NULL )
131         {
132                 UnmapDebugInformation( pDebugInfo );
133                 pDebugInfo = NULL;
134         }
135 }
136
137
138
139 void
140 Object::ParseDebugInfo( void )
141 {
142         IMAGE_DEBUG_INFORMATION* pDebugInfo = NULL;
143         
144         // access the module's debug information
145         pDebugInfo = MapDebugInformation(NULL, (LPTSTR)file_.string_of(), NULL, 0);
146         if( pDebugInfo != NULL )
147         {
148                 // ensure that the base address is valid
149                 if( baseAddr == NULL )
150                 {
151                         // use the image base address from the disk image
152                         // (this should only happen for EXEs; we should have
153                         // the in-core base address of DLLs.)
154                         // 
155                         // TODO: we should be able to use the in-core address
156                         // for EXEs as well
157                         baseAddr = pDebugInfo->ImageBase;
158                 }
159                 assert( baseAddr != NULL );
160
161                 // determine the location of the relevant sections
162                 ParseSectionMap( pDebugInfo );
163
164                 //
165                 // parse the symbols, if available
166                 // (note that we prefer CodeView over COFF)
167                 //
168                 if( pDebugInfo->CodeViewSymbols != NULL )
169                 {
170                         // we have CodeView debug information
171                         ParseCodeViewSymbols( pDebugInfo );
172                 }
173                 else if( pDebugInfo->CoffSymbols != NULL )
174                 {
175                         // we have COFF debug information
176                         ParseCOFFSymbols( pDebugInfo );
177                 }
178                 else
179                 {
180             // TODO - what to do when there's no debug information?
181                 }
182         }
183         else
184         {
185                 // indicate the failure to access the debug information
186         log_perror(err_func_, "MapDebugInformation");
187         }
188 }
189
190
191 void
192 Object::ParseSectionMap( IMAGE_DEBUG_INFORMATION* pDebugInfo )
193 {
194         DWORD i;
195
196         // currently we care only about the .text and .data segments
197         for( i = 0; i < pDebugInfo->NumberOfSections; i++ )
198         {
199                 IMAGE_SECTION_HEADER& section = pDebugInfo->Sections[i];
200
201                 if( strncmp( (const char*)section.Name, ".text", 5 ) == 0 )
202                 {
203                         // note that section numbers are one-based
204                         textSectionId = i + 1;
205
206                         code_ptr_       = (Word*)(((char*)pDebugInfo->MappedBase) +
207                             section.PointerToRawData);
208                         code_off_       = baseAddr + section.VirtualAddress;
209                         code_len_       = section.Misc.VirtualSize / sizeof(Word);
210                 }
211                 else if( strncmp( (const char*)section.Name, ".data", 5 ) == 0 )
212                 {
213                         // note that section numbers are one-based
214                         dataSectionId = i + 1;
215
216                         data_ptr_       = (Word*)(((char*)pDebugInfo->MappedBase) +
217                             section.PointerToRawData);
218                         data_off_       = baseAddr + section.VirtualAddress;
219                         data_len_       = section.Misc.VirtualSize / sizeof(Word);
220                 }
221         }
222 }
223
224
225 void
226 Object::ParseCodeViewSymbols( IMAGE_DEBUG_INFORMATION* pDebugInfo )
227 {
228         CodeView cv( (const char*)pDebugInfo->CodeViewSymbols, textSectionId );
229
230         if( cv.Parse() )
231         {
232                 bool isDll = (pDebugInfo->Characteristics & IMAGE_FILE_DLL);
233                 dictionary_hash<string, unsigned int> libDict( string::hash, 19 );
234                 vector<Symbol> allSymbols;
235                 vector<ModInfo> cvMods;         // CodeView's notion of modules
236         vector<PDModInfo> pdMods;       // Paradyn's notion of modules
237                 unsigned int midx;
238                 unsigned int i;
239
240                 //
241                 // build a module map of the .text section
242                 // by creating a list of CodeView modules that contribute to 
243                 // the .text section, sorted by offset
244                 // 
245                 // note that the CodeView modules vector uses one-based indexing
246                 //
247                 const vector<CodeView::Module>& modules = cv.GetModules();
248                 for( midx = 1; midx < modules.size(); midx++ )
249                 {
250                         const CodeView::Module& mod = modules[midx];
251
252                         //
253                         // determine the Paradyn module that this
254                         // module will be associated with...
255                         //
256
257                         // ...first determine the library that contains
258             // this module, if any...
259                         string libName;
260                         if( mod.GetLibraryIndex() != 0 )
261                         {
262                                 libName = cv.GetLibraries()[mod.GetLibraryIndex()];
263                         }
264
265                         // ...next figure out the Paradyn module with which to associate
266             // this CodeView module...
267             unsigned int pdModIdx = UINT_MAX;
268                         if( !isDll && (mod.GetLibraryIndex() != 0) )
269                         {
270                                 // associate symbol with static library
271
272                                 // handle the case where this is the first time we've
273                                 // seen this library
274                                 if( !libDict.defines( libName ) )
275                                 {
276                                         // add a Paradyn module for the library
277                     // offset and size will be patched later
278                     pdMods += PDModInfo( libName, 0, 0 );
279                     pdModIdx = pdMods.size() - 1;
280
281                                         // keep track of where we added the library,
282                                         // so we can patch the location of the library's code later
283                                         libDict[libName] = pdModIdx;
284                                 }
285                 else
286                 {
287                     // look up the index we saved earlier
288                     pdModIdx = libDict[libName];
289                 }
290                         }
291                         else if( !isDll )
292                         {
293                 // add a Paradyn module for the module's source file
294
295                                 DWORD offset;   // offset of code in text section
296                                 DWORD cb;               // size of code in text section
297
298
299                                 // find a source code name to associate with this module
300                                 if( mod.GetTextBounds( offset, cb ) )
301                                 {
302                     pdMods += PDModInfo( mod.GetSourceName(), offset, cb );
303                     pdModIdx = pdMods.size() - 1;
304                                 }
305                                 else
306                                 {
307                                         // the module doesn't contribute to the .text section
308                     // TODO - so do we care about this module?  should
309                     // we be keeping track of contributions
310                     // to the data section?
311                                 }
312                         }
313                         else
314                         {
315                 // module is part of a DLL, so we 
316                 // associate any symbols directly with the DLL
317                 pdMods += PDModInfo( pDebugInfo->ImageFileName, 0,
318                                         code_len_ * sizeof(Word) );
319                 pdModIdx = pdMods.size() - 1;
320                         }
321
322                         // add the module info to our vector for later sorting
323                         // (but only if it contributes code to the .text section)
324             DWORD offText;
325             DWORD cbText;
326                         if( mod.GetTextBounds( offText, cbText ) && (cbText > 0) )
327                         {
328                 assert( pdModIdx != UINT_MAX );
329                                 cvMods += ModInfo( &mod, pdModIdx );
330                         }
331                 }
332
333                 // sort list of modules by offset to give us our CodeView module map
334                 cvMods.sort( mod_offset_compare );
335
336 #ifdef _DEBUG
337         // dump the CodeView module map
338         cout << "CodeView module .text map:\n";
339         for( midx = 0; midx < cvMods.size(); midx++ )
340         {
341             DWORD offText = 0;
342             DWORD cbText = 0;
343             string name = cvMods[midx].pCVMod->GetName();
344             cvMods[midx].pCVMod->GetTextBounds( offText, cbText );
345
346             cout << hex
347                 << "0x" << setw( 8 ) << setfill( '0' ) << offText
348                 << "-0x" << setw( 8 ) << setfill( '0' ) << offText + cbText - 1
349                 << " (" << setw( 8 ) << setfill( '0' ) << cbText << ")\t"
350                 << name
351                 << dec
352                 << endl;
353         }
354 #endif // _DEBUG
355
356         // compute bounds for Paradyn modules
357         for( midx = 0; midx < cvMods.size(); midx++ )
358         {
359             PDModInfo& pdMod = pdMods[cvMods[midx].pdModIdx];
360             DWORD offTextCV = 0;
361             DWORD cbTextCV = 0;
362
363             // determine the bounds of the CodeView text
364             cvMods[midx].pCVMod->GetTextBounds( offTextCV, cbTextCV );
365
366             if( cbTextCV > 0 )
367             {
368                 // expand the PD module to cover the CV module's bounds
369                 if( pdMod.cbText == 0 )
370                 {
371                     // this is the first CodeView module we've seen
372                     // for this Paradyn module
373                     pdMod.offText = offTextCV;
374                     pdMod.cbText = cbTextCV;
375                 }
376                 else
377                 {
378                     // we have to handle the (potential) expansion of
379                     // the existing bounds
380                     if( offTextCV < pdMod.offText )
381                     {
382                         DWORD oldOffset = pdMod.offText;
383
384                         // reset the base and extend the bound
385                         pdMod.offText = offTextCV;
386                         pdMod.cbText += (oldOffset - offTextCV);
387                     }
388
389                     if((offTextCV + cbTextCV) > (pdMod.offText + pdMod.cbText))
390                     {
391                         // extend the bound
392                         pdMod.cbText += 
393                             ((offTextCV + cbTextCV) -
394                                 (pdMod.offText + pdMod.cbText));
395                     }
396                 }
397             }
398         }
399
400 #ifdef _DEBUG
401         // dump the Paradyn module map
402         cout << "Paradyn module .text map:\n";
403         for( midx = 0; midx < pdMods.size(); midx++ )
404         {
405             DWORD offText = pdMods[midx].offText;
406             DWORD cbText = pdMods[midx].cbText;
407             string name = pdMods[midx].name;
408
409             cout << hex
410                 << "0x" << setw( 8 ) << setfill( '0' ) << offText
411                 << "-0x" << setw( 8 ) << setfill( '0' ) << offText + cbText - 1
412                 << " (" << setw( 8 ) << setfill( '0' ) << cbText << ")\t"
413                 << name
414                 << dec
415                 << endl;
416         }
417 #endif // _DEBUG
418
419         // add entries for our Paradyn modules
420         for( midx = 0; midx < pdMods.size(); midx++ )
421         {
422                     allSymbols += Symbol( pdMods[midx].name,
423                             "",
424                             Symbol::PDST_MODULE,
425                             Symbol::SL_GLOBAL,
426                             code_off_ + pdMods[midx].offText,
427                             false,
428                             pdMods[midx].cbText );
429         }
430
431
432                 //
433                 // now that we have a module map of the .text segment,
434                 // consider the symbols defined by each module
435                 //
436                 for( midx = 0; midx < cvMods.size(); midx++ )
437                 {
438                         const CodeView::Module& mod = *(cvMods[midx].pCVMod);
439             PDModInfo& pdMod = pdMods[cvMods[midx].pdModIdx];
440                         
441                         // add symbols for each global function defined in the module
442             {
443                             const vector<CodeView::SymRecordProc*>& gprocs =
444                     mod.GetSymbols().GetGlobalFunctions();
445                             for( i = 0; i < gprocs.size(); i++ )
446                             {
447                                     const CodeView::SymRecordProc* proc = gprocs[i];
448
449                                     // build a symbol from the proc information
450                                     LPString lpsName( proc->name );
451                                     string strName = (string)lpsName;
452
453                                     Address addr = code_off_ + proc->offset;
454
455                                     allSymbols += ( Symbol( strName,
456                                             pdMod.name,
457                                             Symbol::PDST_FUNCTION,
458                                             Symbol::SL_GLOBAL,
459                                             addr,
460                                             false,
461                                             proc->procLength ));
462                             }
463             }
464
465                         // add symbols for each local function defined in the module
466             {
467                             const vector<CodeView::SymRecordProc*>& lprocs =
468                        mod.GetSymbols().GetLocalFunctions();
469                             for( i = 0; i < lprocs.size(); i++ )
470                             {
471                                     const CodeView::SymRecordProc* proc = lprocs[i];
472                                     LPString lpsName( proc->name );
473                                     string strName = (string)lpsName;
474
475                                     Address addr = code_off_ + proc->offset;
476
477                                     allSymbols += ( Symbol( strName,
478                                             pdMod.name,
479                                             Symbol::PDST_FUNCTION,
480                                             Symbol::SL_LOCAL,
481                                             addr,
482                                             false,
483                                             proc->procLength ));
484                             }
485             }
486
487                         // handle thunks
488             {
489                             const vector<CodeView::SymRecordThunk*>& thunks =
490                     mod.GetSymbols().GetThunks();
491                             for( i = 0; i < thunks.size(); i++ )
492                             {
493                                     const CodeView::SymRecordThunk* thunk = thunks[i];
494                                     LPString lpsName( thunk->name );
495                                     string strName = (string)lpsName;
496
497                                     Address addr = code_off_ + thunk->offset;
498
499                                     allSymbols += ( Symbol( strName,
500                                             pdMod.name,
501                                             Symbol::PDST_FUNCTION,
502                                             Symbol::SL_GLOBAL,
503                                             addr,
504                                             false,
505                                             thunk->thunkLength ) );
506                             }
507             }
508
509                         // add symbols for each global variable defined in the module
510             {
511                             const vector<CodeView::SymRecordData*>& gvars =
512                     mod.GetSymbols().GetGlobalVariables();
513                             for( i = 0; i < gvars.size(); i++ )
514                             {
515                                     const CodeView::SymRecordData* pVar = gvars[i];
516                                     LPString lpsName( pVar->name );
517                                     string strName = (string)lpsName;
518
519                                     Address addr = data_off_ + pVar->offset;
520
521                                     allSymbols += ( Symbol( strName,
522                                             pdMod.name,
523                                             Symbol::PDST_OBJECT,
524                                             Symbol::SL_GLOBAL,
525                                             addr,
526                                             false,
527                                             0 ));               // will be patched later (?)
528                             }
529             }
530
531             {
532                             const vector<CodeView::SymRecordData*>& lvars =
533                     mod.GetSymbols().GetGlobalVariables();
534                             for( i = 0; i < lvars.size(); i++ )
535                             {
536                                     const CodeView::SymRecordData* pVar = lvars[i];
537                                     LPString lpsName( pVar->name );
538                                     string strName = (string)lpsName;
539
540                                     Address addr = data_off_ + pVar->offset;
541
542                                     allSymbols += ( Symbol( strName,
543                                             pdMod.name,
544                                             Symbol::PDST_OBJECT,
545                                             Symbol::SL_LOCAL,
546                                             addr,
547                                             false,
548                                             0 ));               // will be patched later (?)
549                             }
550             }
551                 }
552
553         // once we've handled the symbols that the CodeView object
554         // could discover and associate with a module, we've
555         // got to do something with symbols that were not explicitly
556         // associated with a module in the CodeView information
557
558         // Unfortunately, VC++/DF produce S_PUB32 symbols
559         // for functions in some cases.  (For example,
560         // when building a Digital Fortran program, the
561         // software produces an executable with symbols
562         // from the Fortran runtime libraries as S_PUB32
563         // records.)  We do our best to try to determine
564         // whether the symbol is a function, and if so,
565         // how large it is, which module it belongs to, etc.
566                 const vector<CodeView::SymRecordData*>& pubs =
567                                                 cv.GetSymbols().GetPublics();
568                 for( i = 0; i < pubs.size(); i++ )
569                 {
570                         const CodeView::SymRecordData* sym = pubs[i];
571
572                         LPString lpsName( sym->name );
573                         string strName = (string)lpsName;
574
575             // we now have to try to determine the type of the
576             // symbol.  Since we're only given a type and a location,
577             // (and the type might not even be valid) we assume
578             // that any public symbol in the code section is
579             // a function and we try to determine which module it
580             // belongs to based on the module map we constructed earlier
581             if( sym->segment == textSectionId )
582             {
583                             Address addr = code_off_ + sym->offset;
584
585                 // save the symbol
586                             allSymbols += Symbol( strName,
587                                     FindModuleByOffset( sym->offset, pdMods ),
588                                     Symbol::PDST_FUNCTION,
589                                     Symbol::SL_GLOBAL,
590                                     addr,
591                                     false,
592                                     0 );              // will be patched later
593             }
594             else if( sym->segment == dataSectionId )
595             {
596                             Address addr = data_off_ + sym->offset;
597
598                             allSymbols += Symbol( strName,
599                                     FindModuleByOffset( sym->offset, pdMods ),
600                                     Symbol::PDST_OBJECT,
601                                     Symbol::SL_GLOBAL,
602                                     addr,
603                                     false,
604                                     0 );              // will be patched later
605             }
606             else
607             {
608                 // TODO - the symbol is not in the text or data
609                 // sections - do we care about it?
610             }
611                 }
612                 
613         // now we sort all of our symbols by offset
614         // so that we can patch up any outstanding sizes
615         allSymbols.sort( symbol_compare );
616         for( i = 0; i < allSymbols.size(); i++ )
617         {
618             Symbol& sym = allSymbols[i];
619
620             if( (sym.name() != "") && (sym.size() == 0) &&
621                 ((sym.type() == Symbol::PDST_FUNCTION) ||
622                  (sym.type() == Symbol::PDST_OBJECT)))
623             {
624                 // patch the symbol's size
625                 // we consider the symbol's size to be
626                 // the distance to the next symbol
627                 // (sometimes this causes us to overestimate)
628                 unsigned int cb;
629
630                 if( (i == (allSymbols.size() - 1)) ||
631                     (allSymbols[i+1].type() != sym.type()) )
632                 {
633                     // size is the remainder of the current section
634                     if( sym.type() == Symbol::PDST_FUNCTION )
635                     {
636                         // size is remainder of the .text section
637                         cb = (code_off_ + code_len_*sizeof(Word)) - sym.addr();
638                     }
639                     else
640                     {
641                         // size is remainder of the .data section
642                         cb = (data_off_ + data_len_*sizeof(Word)) - sym.addr();
643                     }
644                 }
645                 else
646                 {
647                     // size is just the delta between symbols
648                     cb = allSymbols[i+1].addr() - sym.addr();
649                 }
650                 sym.change_size( cb );
651             }
652         }
653
654                 // our symbols are finally ready to enter into
655         // the main symbol dictionary
656                 for( i = 0; i < allSymbols.size(); i++ )
657                 {
658                         if(allSymbols[i].name() != "")
659                         {
660                                 symbols_[allSymbols[i].name()] = allSymbols[i];
661                         }
662                 }
663         }
664         else
665         {
666         // TODO - how to indicate the failure to parse symbols?
667         }
668 }
669
670
671
672
673
674 void
675 Object::ParseCOFFSymbols( IMAGE_DEBUG_INFORMATION* pDebugInfo )
676 {
677         IMAGE_COFF_SYMBOLS_HEADER* pHdr = pDebugInfo->CoffSymbols;
678     vector<Symbol>      allSymbols;
679         bool gcc_compiled = false;
680         bool isDll = (pDebugInfo->Characteristics & IMAGE_FILE_DLL);
681         DWORD u, v;
682
683         
684         // find the location of the symbol records and string table
685         IMAGE_SYMBOL* syms = (IMAGE_SYMBOL*)(((char*)pHdr) +
686                             pHdr->LvaToFirstSymbol);
687         char* stringTable = ((char*)syms) +
688                             pHdr->NumberOfSymbols * sizeof( IMAGE_SYMBOL );
689
690
691         // for DLLs, we ignore filename information and associate 
692         // symbols with a module representing the DLL
693         if( isDll )
694         {
695                 allSymbols += Symbol( pDebugInfo->ImageFileName,
696                                                                 "",
697                                                                 Symbol::PDST_MODULE,
698                                                                 Symbol::SL_GLOBAL,
699                                                                 code_off_,
700                                                                 false );
701         }
702
703
704         // parse the COFF records
705         for( v = 0; v < pDebugInfo->CoffSymbols->NumberOfSymbols; v++ )
706         {
707                 string name = FindName( stringTable, syms[v] );
708                 Address sym_addr = NULL;
709
710
711                 //
712                 // handle the various types of COFF records...
713                 //
714
715                 if( name.prefixed_by("_$$$") || name.prefixed_by("$$$") )
716                 {
717                         // the record represents a branch target (?)
718                         // skip it
719                         v += syms[v].NumberOfAuxSymbols;
720                 }
721                 else if( syms[v].StorageClass == IMAGE_SYM_CLASS_FILE )
722                 {
723                         // the record is a file record
724                         //
725                         // note that for DLLs, we associate symbols directly with
726                         // the DLL and ignore any filename information we find
727                         if( !isDll )
728                         {
729                                 // extract the name of the source file
730                                 name = (char*)(&syms[v+1]);
731
732                                 // skip auxiliary records containing the filename
733                                 v += (strlen(name.string_of()) / sizeof(IMAGE_SYMBOL)) + 1;
734
735                                 // find a .text record following the file name
736                                 // if there is one, it contains the starting address for
737                                 // this file's text
738                                 // (note - there may not be one!  If not, we detect this by
739                                 // finding the next .file record or running off the end of
740                                 // the symbol information)
741                                 DWORD tidx = v + 1;
742                                 while( (tidx < pDebugInfo->CoffSymbols->NumberOfSymbols) &&
743                                                 ((syms[tidx].N.Name.Short == 0) ||
744                                                  ((strncmp( (const char*)(&syms[tidx].N.ShortName),
745                                     ".text", 5 ) != 0) &&
746                                                   (syms[tidx].StorageClass == IMAGE_SYM_CLASS_FILE))))
747                                 {
748                                         // advance to next record
749                                         tidx++;
750                                 }
751                                 if( (tidx < pDebugInfo->CoffSymbols->NumberOfSymbols) &&
752                                         (syms[tidx].N.Name.Short != 0) &&
753                                         (strncmp( (const char*)(&syms[tidx].N.ShortName),
754                                 ".text", 5 ) == 0) )
755                                 {
756                                         // this is text record for the recently-seen .file record -
757                                         // extract the starting address for symbols from this file
758                                         sym_addr = baseAddr + syms[tidx].Value;
759                                 }
760                                 else
761                                 {
762                                         // there is not a .text record for the recently-seen .file
763                                         // TODO: is there any way we can
764                     // determine the needed information in this case?
765                                         sym_addr = 0;
766                                 }
767                         
768                                 // make note of the symbol
769                                 allSymbols += Symbol(name,
770                                                                                 "",
771                                                                                 Symbol::PDST_MODULE,
772                                                                                 Symbol::SL_GLOBAL,
773                                                                                 sym_addr,
774                                                                                 false);
775                         }
776                 }
777                 else if( syms[v].StorageClass == IMAGE_SYM_CLASS_LABEL )
778                 {
779                         // the record is a label
780
781                         // check whether the label indicates that the
782                         // module was compiled by gcc
783                         if( (name == "gcc2_compiled.") || (name == "___gnu_compiled_c") )
784                         {
785                                 gcc_compiled = true;
786                         }
787                 }
788                 else if(( (syms[v].StorageClass != IMAGE_SYM_CLASS_TYPE_DEFINITION)
789                                         && ISFCN(syms[v].Type))
790                                 || (gcc_compiled &&
791                                         (name == "__exit" || name == "_exit" || name == "exit")))
792                 {
793                         // the record represents a "type" (including functions)
794                         
795                         // the test for gcc and the exit variants is a kludge
796                         // to work around our difficulties in parsing the CygWin32 DLL
797                         sym_addr = (gcc_compiled ?
798                         syms[v].Value :
799                         baseAddr + syms[v].Value);
800
801                         if( syms[v].StorageClass == IMAGE_SYM_CLASS_EXTERNAL )
802                         {
803                                 allSymbols += Symbol(name,
804                                     "DEFAULT_MODULE",
805                                     Symbol::PDST_FUNCTION,
806                                                     Symbol::SL_GLOBAL,
807                                     sym_addr,
808                                     false);
809                         }
810                         else
811                         {
812                                 allSymbols += Symbol(name,
813                                     "DEFAULT_MODULE",
814                                     Symbol::PDST_FUNCTION,
815                                                 Symbol::SL_LOCAL,
816                                     sym_addr,
817                                     false);
818                         }
819
820                         // skip any auxiliary records with the function
821                         v += syms[v].NumberOfAuxSymbols;
822                 }
823                 else if( syms[v].SectionNumber > 0 )
824                 {
825                         // the record represents a variable (?)
826
827                         // determine the address to associate with the symbol
828                         sym_addr = (gcc_compiled ?
829                         syms[v].Value :
830                         baseAddr + syms[v].Value );
831
832                         if( name != ".text" )
833                         {
834                                 if (syms[v].StorageClass == IMAGE_SYM_CLASS_EXTERNAL)
835                                 {
836                                         allSymbols += Symbol(name,
837                                         "DEFAULT_MODULE",
838                                         Symbol::PDST_OBJECT,
839                                                                                 Symbol::SL_GLOBAL,
840                                                                                 sym_addr,
841                                         false);
842                                 }
843                                 else
844                                 {
845                                         allSymbols += Symbol(name,
846                                         "DEFAULT_MODULE",
847                                         Symbol::PDST_OBJECT,
848                                                                                 Symbol::SL_LOCAL,
849                                                                                 sym_addr,
850                                         false);
851                                 }
852                         }
853                         else
854                         {
855                                 // we processed the .text record when we saw
856                                 // its corresponding .file record - skip it
857                         }
858
859                         // skip any auxiliary records
860                         v += syms[v].NumberOfAuxSymbols;
861                 }
862                 else
863                 {
864                         // the record is of a type that we don't care about
865                         // skip it and all of its auxiliary records
866                         v += syms[v].NumberOfAuxSymbols;
867                 }
868
869
870         }
871
872         //
873         // now that we've seen all the symbols,
874         // we need to post-process them into something usable
875         //
876
877         // add an extra symbol to mark the end of the text segment
878         allSymbols += Symbol("",
879                                         "DEFAULT_MODULE",
880                                         Symbol::PDST_OBJECT,
881                                         Symbol::SL_GLOBAL, 
882                                         code_off_ + code_len_ * sizeof(Word),
883                                         false);
884
885     // Sort the symbols on address to find the function boundaries
886     allSymbols.sort(symbol_compare);
887
888         // find the function boundaries
889         for( u = 0; u < allSymbols.size(); u++ )
890         {
891                 unsigned int size = 0;
892                 if( allSymbols[u].type() == Symbol::PDST_FUNCTION )
893                 {
894                         // find the function boundary
895                         v = u+1;
896                         while(v < allSymbols.size())
897                         {
898                                 // The .ef below is a special symbol that gcc puts in to
899                                 // mark the end of a function.
900                                 if(allSymbols[v].addr() != allSymbols[u].addr() &&
901                                         (allSymbols[v].type() == Symbol::PDST_FUNCTION ||
902                                         allSymbols[v].name() == ".ef"))
903                                 {
904                                         break;
905                                 }
906                                 v++;
907                         }
908                         if(v < allSymbols.size())
909                         {
910                                 size = (unsigned)allSymbols[v].addr() 
911                                                 - (unsigned)allSymbols[u].addr();
912                         }
913                         else
914                         {
915                                 size = (unsigned)(code_off_ + code_len_*sizeof(Word))
916                                                  - (unsigned)allSymbols[u].addr();
917                         }
918                 }
919
920                 // save the information about this symbol
921                 if(allSymbols[u].name() != "")
922                 {
923                         symbols_[allSymbols[u].name()] =
924                                 Symbol(allSymbols[u].name(), 
925                                         isDll ? allSymbols[u].module() : "DEFAULT_MODULE", 
926                                         allSymbols[u].type(), allSymbols[u].linkage(),
927                                         allSymbols[u].addr(), allSymbols[u].kludge(),
928                                         size);
929                 }
930         }
931 }
932
933
934
935
936 string
937 Object::FindName( const char* stringTable, const IMAGE_SYMBOL& sym )
938 {
939         string name;
940
941         if (sym.N.Name.Short != 0) {
942                 char sname[9];
943                 strncpy(sname, (char *)(&sym.N.ShortName), 8);
944                 sname[8] = 0;
945                 name = sname;
946         } else {
947                 name = stringTable + sym.N.Name.Long;
948         }
949
950         return name;
951 }
952
953
954 // compare function for vector sort of
955 // CodeView modules
956 int
957 mod_offset_compare( const void* x, const void* y )
958 {
959         const ModInfo* px = (const ModInfo*)x;
960         const ModInfo* py = (const ModInfo*)y;
961         assert( (px != NULL) && (px->pCVMod != NULL) );
962         assert( (py != NULL) && (py->pCVMod != NULL) );
963
964         // access the offset for each module
965         DWORD offTextx = 0;
966         DWORD cbTextx = 0;
967         DWORD offTexty = 0;
968         DWORD cbTexty = 0;
969         px->pCVMod->GetTextBounds( offTextx, cbTextx );
970         py->pCVMod->GetTextBounds( offTexty, cbTexty );
971
972         int ret = 0;
973         if( offTextx > offTexty )
974         {
975                 ret = 1;
976         }
977         else if( offTextx < offTexty )
978         {
979                 ret = -1;
980         }
981         else
982         {
983                 // the offsets are equal - try our next comparison criteria
984                 if( (cbTextx != 0) && (cbTexty == 0) )
985                 {
986                         ret = 1;
987                 }
988                 else if( (cbTextx == 0) && (cbTexty != 0) )
989                 {
990                         ret = -1;
991                 }
992         }
993         
994         return ret;
995 }
996
997
998
999
1000 // FindModuleByOffset
1001 // Determines the Paradyn module name
1002 // based on the given offset into the .text section
1003 static
1004 string
1005 FindModuleByOffset( unsigned int offset, const vector<PDModInfo>& pdMods )
1006 {
1007     string retval = "";
1008     unsigned int i;
1009
1010     // we do simple linear search on Paradyn modules
1011     for( i = 0; i < pdMods.size(); i++ )
1012     {
1013         const PDModInfo& pdMod = pdMods[i];
1014
1015         if( (offset >= pdMod.offText) &&
1016             (offset < pdMod.offText + pdMod.cbText) )
1017         {
1018             retval = pdMod.name;
1019             break;
1020         }
1021     }
1022
1023     return retval;
1024 }
1025