Added support for using CodeView NB11 symbols in addition to COFF
[dyninst.git] / pdutil / h / CodeView.h
1 /*
2  * Copyright (c) 1996-1999 Barton P. Miller
3  * 
4  * We provide the Paradyn Parallel Performance Tools (below
5  * described as Paradyn") on an AS IS basis, and do not warrant its
6  * validity or performance.  We reserve the right to update, modify,
7  * or discontinue this software at any time.  We shall have no
8  * obligation to supply such updates or modifications or any other
9  * form of support to you.
10  * 
11  * This license is for research uses.  For such uses, there is no
12  * charge. We define "research use" to mean you may freely use it
13  * inside your organization for whatever purposes you see fit. But you
14  * may not re-distribute Paradyn or parts of Paradyn, in any form
15  * source or binary (including derivatives), electronic or otherwise,
16  * to any other organization or entity without our permission.
17  * 
18  * (for other uses, please contact us at paradyn@cs.wisc.edu)
19  * 
20  * All warranties, including without limitation, any warranty of
21  * merchantability or fitness for a particular purpose, are hereby
22  * excluded.
23  * 
24  * By your use of Paradyn, you understand and agree that we (or any
25  * other person or entity with proprietary rights in Paradyn) are
26  * under no obligation to provide either maintenance services,
27  * update services, notices of latent defects, or correction of
28  * defects for Paradyn.
29  * 
30  * Even if advised of the possibility of such damages, under no
31  * circumstances shall we (or any other person or entity with
32  * proprietary rights in the software licensed hereunder) be liable
33  * to you or any third party for direct, indirect, or consequential
34  * damages of any character regardless of type of action, including,
35  * without limitation, loss of profits, loss of use, loss of good
36  * will, or computer failure or malfunction.  You agree to indemnify
37  * us (and any other person or entity with proprietary rights in the
38  * software licensed hereunder) for any and all liability it may
39  * incur to third parties resulting from your use of Paradyn.
40  */
41 //
42 // This file contains the declaration of the CodeView class.
43 // The CodeView class also declares several sub-classes and 
44 // sub-structs that describe the format of many CodeView 
45 // constructs.
46 //
47 // As is commonly the case in Microsoft programming, the 
48 // CodeView data layout involves variable-length constructs.
49 // (For example, many CodeView subsections involve
50 // variable-length arrays.)  C++ does not provide syntax
51 // capable of describing such situations, and so many of
52 // the sub-struct declarations below are incomplete.
53 //
54 // Also, this CodeView class is not complete in that it
55 // does not provide a window into all of the CodeView 
56 // information.  It is intended, at this time, to provide
57 // enough support for the functionality needed by Paradyn/DyninstAPI.
58 // The implementation should be flexible enough, however, to add
59 // additional support as needed.
60 //
61 // For these two reasons, this file should not be used as a 
62 // definition of the CodeView symbol format.  See the CodeView
63 // specification from MSDN for such purposes.
64 //
65 #ifndef CODEVIEW_H
66 #define CODEVIEW_H
67
68
69
70
71 //
72 // LPString
73 //
74 // An LPString is a length-prefixed string.
75 // The length is contained in a byte, followed by the
76 // characters of the string.  Unlike a C string, there
77 // is no null termination.
78 //
79 class LPString
80 {
81 private:
82         const char* data;
83
84 public:
85         LPString( const char* str = NULL ) : data( str ) {}
86
87         unsigned char GetLength( void ) const   { return *data; }
88         const char*     GetChars( void ) const      { return (data + 1); }
89
90         operator string( void ) const           { return string( data + 1,
91                                                 *(unsigned char*)data ); }
92 };
93
94
95
96 //
97 // CodeView
98 //
99 // A CodeView object knows how to parse debug 
100 // information in CodeView format.
101 //
102 class CodeView
103 {
104 public:
105     // types of subsections
106         enum SubsectionType
107         {
108                 sstModule               = 0x120,
109                 sstTypes                = 0x121,
110                 sstPublic               = 0x122,
111                 sstPublicSym    = 0x123,
112                 sstSymbols              = 0x124,
113                 sstAlignSym             = 0x125,
114                 sstSrcLnSeg             = 0x126,
115                 sstSrcModule    = 0x127,
116                 sstLibraries    = 0x128,
117                 sstGlobalSym    = 0x129,
118                 sstGlobalPub    = 0x12a,
119                 sstGlobalTypes  = 0x12b,
120                 sstMPC                  = 0x12c,
121                 sstSegMap               = 0x12d,
122                 sstSegName              = 0x12e,
123                 sstPreComp              = 0x12f,
124                 sstOffsetMap16  = 0x131,
125                 sstOffsetMap32  = 0x132,
126                 sstFileIndex    = 0x133,
127                 sstStaticSym    = 0x134
128         };
129
130     // types of symbol records
131         enum SymbolType
132         {
133                 S_COMPILE               = 0x0001,
134                 S_SSEARCH               = 0x0005,
135                 S_END                   = 0x0006,
136                 S_SKIP                  = 0x0007,
137                 S_CVRESERVE             = 0x0008,
138                 S_OBJNAME               = 0x0009,
139                 S_ENDARG                = 0x000a,
140                 S_COBOLUDT              = 0x000b,
141                 S_MANYREG               = 0x000c,
142                 S_RETURN                = 0x000d,
143                 S_ENTRYTHIS             = 0x000e,
144                 S_REGISTER              = 0x1001,
145                 S_CONSTANT              = 0x1002,
146                 S_UDT                   = 0x1003,
147                 S_COBOLUDT_2    = 0x1004,
148                 S_MANYREG_2             = 0x1005,
149                 S_BPREL32               = 0x1006,
150                 S_LDATA32               = 0x1007,
151                 S_GDATA32               = 0x1008,
152                 S_PUB32                 = 0x1009,
153                 S_LPROC32               = 0x100a,
154                 S_GPROC32               = 0x100b,
155                 S_THUNK32               = 0x0206,
156                 S_BLOCK32               = 0x0207,
157                 S_WITH32                = 0x0208,
158                 S_LABEL32               = 0x0209,
159                 S_CEXMODEL32    = 0x020a,
160                 S_VFTTABLE32    = 0x100c,
161                 S_REGREL32              = 0x100d,
162                 S_LTHREAD32             = 0x100e,
163                 S_GTHREAD32             = 0x100f,
164                 S_LPROCMIPS             = 0x1010,
165                 S_GPROCMIPS             = 0x1011,
166                 S_PROCREF               = 0x0400,
167                 S_DATAREF               = 0x0401,
168                 S_ALIGN                 = 0x0402
169         };
170
171     // format of subsection directory header
172         struct SDHeader
173         {
174                 WORD    cbDirHeader;            // length of subsection directory header
175                 WORD    cbDirEntry;                     // length of each directory entry
176                 DWORD   cDir;                           // number of directory entries
177                 DWORD   IfoNextDir;                     // offset from IfaBase to next directory
178                                     // (currently unused)
179                 DWORD   flags;                          // (currently unused)
180         };
181
182     // format of subsection directory entries
183         struct SDEntry
184         {
185                 WORD    sst;                            // type of the subsection
186                 WORD    iMod;                           // index of associated module
187                 DWORD   offset;                         // offset from IfaBase of subsection
188                 DWORD   cb;                                     // size of subsection in bytes
189         };
190
191     // format of symbol subsection header
192         struct SymHeader
193         {
194                 WORD    symhash;                        // index of symbol hash function
195                 WORD    addrhash;                       // index of address hash function
196                 DWORD   cbSymbol;                       // size (bytes) of symbol table
197                 DWORD   cbSymHash;                      // size (bytes) of symbol hash table
198                 DWORD   cbAddrHash;                     // size (bytes) of address hash table
199         };
200
201
202     // format of common fields for symbol records
203         struct SymRecord
204         {
205                 WORD    length;                 // length of record, excluding length field
206                 WORD    index;                  // type of symbol record
207         };
208
209     // format of PROC (function) symbol records
210         struct SymRecordProc
211         {
212                 SymRecord       base;           // fields to all SymRecords
213                 DWORD           pParent;        // parent lexical scope
214                 DWORD           pEnd;           // end of lexical scope
215                 DWORD           pNext;          // next lexical scope
216                 DWORD           procLength;     // size of the procedure (bytes)
217                 DWORD           debugStart;     // offset in bytes from proc start to point
218                                 //      where stack frame has been set up
219                 DWORD           debugEnd;       // offset in bytes from proc start to point
220                                 //      where procedure is ready to return
221                 DWORD           procType;       // type of procedure type record
222                 DWORD           offset;         // offset portion of proc address
223                 WORD            segment;        // segment (PE section) part of proc address
224                 char            flags;          // flags
225                                                                 //    fpo       :1 true if function has frame
226                                 //                  pointer omitted
227                                                                 //    interrupt :1 true if function is
228                                 //                  interrupt routine
229                                                                 //    return    :1 true if function performs
230                                 //                  far return
231                                                                 //    never     :1 true if function never
232                                 //                  returns
233                                                                 //    unused    :4
234                 char            name[1];        // length-prefixed name of procedure
235         };
236
237     // format of PROCREF symbol records
238     // (A PROCREF references a PROC symbol record
239     // elsewhere in the CodeView information)
240         struct SymRecordProcRef
241         {
242                 SymRecord       base;           // fields common to all SymRecords
243                 DWORD           checksum;       // checksum of referenced symbol name
244                 DWORD           offset;         // offset of procedure symbol record
245                                 // from the module subsection (?)
246                 WORD            module;         // index of the module that contains the symbol
247         };
248
249     // format of DATA symbol records
250         struct SymRecordData
251         {
252                 SymRecord       base;           // fields common to all SymRecords
253                 DWORD           type;           // type of variable
254                 DWORD           offset;         // offset portion of variable address
255                 WORD            segment;        // segment (section) part of variable address
256                 char            name[1];        // length-prefixed name of variable
257         };
258
259     // format of BPREL symbol records
260     // (BPREL records contain information about
261     // variables contained on the stack)
262         struct SymRecordBPRel
263         {
264                 SymRecord       base;           // fields common to all SymRecords
265                 DWORD           offset;         // offset from BP of variable
266                 DWORD           type;           // type of variable
267                 char            name[1];        // length-prefixed name of variable
268         };
269
270     // format of LABEL symbol records
271     // LABEL records are sometimes used to 
272     // represent functions.
273         struct SymRecordLabel
274         {
275                 SymRecord       base;           // fields common to all SymRecords
276                 DWORD           offset;         // offset portion of label address
277                 WORD            segment;        // segment (PE section) part of label address
278                 char            flags;          // flags
279                                                                 //    fpo       :1 true if function has
280                                 //                  frame pointer omitted
281                                                                 //    interrupt :1 true if function is
282                                 //                  interrupt routine
283                                                                 //    return    :1 true if function performs
284                                 //                  far return
285                                                                 //    never     :1 true if function never
286                                 //                  returns
287                                                                 //    unused    :4
288                 char            name[1];        // length-prefixed name of procedure
289         };
290
291     // format of THUNK symbol records
292     // A THUNK is a piece of code outside a function
293         struct SymRecordThunk
294         {
295                 SymRecord       base;           // fields common to all SymRecords
296                 DWORD           pParent;        // parent lexical scope
297                 DWORD           pEnd;           // end of lexical scope
298                 DWORD           pNext;          // next lexical scope
299                 DWORD           offset;         // offset portion of thunk address
300                 WORD            segment;        // segment (PE section) part of thunk address
301                 WORD            thunkLength;    // length of thunk code (in bytes)
302                 char            ordinal;        // ordinal specifying type of thunk
303                                                                 // 0: notype
304                                                                 // 1: adjustor
305                                                                 // 2: vcall
306                                                                 // 3: pcode
307                 char            name[1];        // length-prefixed name of procedure
308                 // variant field.       if ordinal is:
309                 //                      notype => there is no variant field
310                 //                      adjustor => WORD offset from this pointer,
311         //                          then length-prefixed name of function
312         //                          to call
313                 //                      vcall => WORD displacement into virtual table
314                 //                      pcode => segment:offset of pcode entry point
315         };
316
317
318     // format of Module subsection
319         struct ModuleSubsection
320         {
321                 struct SegInfo
322                 {
323                         WORD    seg;                    // segment described
324                         WORD    pad;                    // padding - must be zero
325                         DWORD   offset;                 // offset in segment to where code starts
326                         DWORD   cbSeg;                  // number of bytes of code in the segment
327                 };
328
329                 WORD    ovlNumber;              // overlay number
330                 WORD    iLib;                   // library index if linked from library
331                 WORD    cSeg;                   // number of SegInfo structs in subsection
332                 char    style[2];               // debugging style (should be "CV")
333
334                 // SegInfo      sinfo[];        // array of segment information structs
335                 // char         name[1];        // length-prefixed name of module
336         };
337
338     // format of AlignSym subsection
339         struct AlignSymSubsection
340         {
341                 // sstAlignSym subsections have no header.
342                 // The subsection consists of a stream of variable-length
343         // SymRecord* entries
344         };
345
346     // format of SrcModule subsection
347         struct SrcModuleSubsection
348         {
349         struct FileInfo
350         {
351             WORD    cSegFile;   // number of segments that receive code
352                                 // from this source file
353             WORD    pad;
354             // DWORD    baseSrcLen[cSegFile];   // array of offsets for line/
355                                                 // address mapping table
356             // DWORD    startEnd[cSegFile][2];  // start/end offsets of
357                                                 // code for this source file
358             // char     name[1];                // length-prefixed name of file
359         };
360
361         WORD    cFile;          // number of files that contributed
362                                 // code/data to this module
363         WORD    cSeg;           // number of segments that received
364                                 // code/data from this module
365         // DWORD    baseSrcFile[cFile];     // array of offsets into
366                                             // source file array
367         // DWORD    startEnd[cSeg][2];      // start/end offsets of
368                                             // code or data for each segment
369
370         // WORD     seg[cSeg];              // segment indices for segments
371                                             // that receive code from this
372                                             // module
373         // WORD     pad[0-2];               // padding to maintain 4-byte
374                                             // alignment
375
376         // FileInfo fileInfo[];             // array of variable-length
377                                             // records describing source
378                                             // files
379         };
380
381
382     // object that encapsulates the various types of symbols
383         class Symbols
384         {
385         public:
386         // accessors
387                 const vector<SymRecordProc*>&
388             GetGlobalFunctions( void ) const    { return gprocs; }
389                 const vector<SymRecordProc*>&
390             GetLocalFunctions( void ) const     { return lprocs; }
391                 const vector<SymRecordData*>&
392             GetGlobalVariables( void ) const    { return gvars; }
393                 const vector<SymRecordData*>&
394             GetLocalVariables( void ) const             { return lvars; }
395                 const vector<SymRecordBPRel*>&
396             GetStackVariables( void ) const             { return bprels; }
397                 const vector<SymRecordLabel*>&
398             GetLabels( void ) const                             { return labels; }
399                 const vector<SymRecordThunk*>&
400             GetThunks( void ) const                             { return thunks; }
401         const vector<SymRecordData*>&
402             GetPublics( void ) const            { return pubs; }
403
404         // operations
405                 void    Parse( const char* pSymBase, DWORD cb );
406                 Symbols& operator=( const Symbols& syms );
407
408         private:
409                 vector<SymRecordProc*> gprocs;          // global functions
410                 vector<SymRecordProc*> lprocs;          // local functions
411                 vector<SymRecordData*> gvars;           // global variables
412                 vector<SymRecordData*> lvars;           // local variables
413                 vector<SymRecordBPRel*> bprels;     // stack variables
414                 vector<SymRecordLabel*> labels;     // labels
415                 vector<SymRecordThunk*> thunks;     // thunks (non-function code)
416         vector<SymRecordData*> pubs;        // public (catch-all) symbols
417
418         friend class CodeView;
419         };
420
421
422     // object that represents a CodeView module,
423     // by tying together the relevant subsections
424     // associated with that module
425         class Module
426         {
427         public:
428                 Module( void );
429                 Module( ModuleSubsection* pModule, unsigned int textId );
430
431                 string GetName( void ) const;
432                 WORD GetLibraryIndex( void ) const              { return pmod->iLib; }
433                 const Symbols& GetSymbols( void ) const         { return syms; }
434                 string GetSourceName( void ) const;
435                 bool    GetTextBounds( DWORD& offset, DWORD& cb ) const;
436
437                 Module& operator=( const Module& mod );
438
439         private:
440                 ModuleSubsection* pmod;         // sstModule subsection
441                 AlignSymSubsection* pas;        // sstAlignSym subsection
442                 SrcModuleSubsection* psrc;      // sstSrcModule subsection
443                 Symbols syms;                           // symbols from the sstAlignSym subsection
444         DWORD offsetText;           // offset of code in text section
445         DWORD cbText;               // size of code in text section
446
447                 friend class CodeView;
448         };
449
450
451     // constructors
452         CodeView( const char* pSymbols, unsigned int textSectionId )
453           : pBase( pSymbols ),
454                 textId( textSectionId )
455         {}
456
457     // accessors
458         const vector<Module>&           GetModules( void ) const        { return modules; }
459         const vector<LPString>&         GetLibraries( void ) const      { return libs; }
460     const Symbols&              GetSymbols( void ) const    { return syms; }
461
462     // operations
463         bool Parse( void );
464
465 private:
466         const char* pBase;                      // location of CodeView symbols
467         vector<Module> modules;         // modules represented in the executable
468         vector<LPString> libs;          // libraries used to build this executable
469     Symbols syms;               // symbols not associated with a module
470     unsigned int textId;        // section number for .text section
471
472         void    ParseModuleSubsection( SDEntry* pEntry );
473         void    ParseLibrariesSubsection( SDEntry* pEntry );
474         void    ParseAlignSymSubsection( SDEntry* pEntry );
475         void    ParseSrcModuleSubsection( SDEntry* pEntry );
476         void    ParseSymbolsWithHeaderSubsection( SDEntry* pEntry );
477 };
478
479 #endif // CODEVIEW_H