Last round of fixups for mips and sparc for the tramp recursion guard.
[dyninst.git] / pdutil / h / CodeView.h
1 /*
2  * Copyright (c) 1996-1999 Barton P. Miller
3  * 
4  * We provide the Paradyn Parallel Performance Tools (below
5  * described as Paradyn") on an AS IS basis, and do not warrant its
6  * validity or performance.  We reserve the right to update, modify,
7  * or discontinue this software at any time.  We shall have no
8  * obligation to supply such updates or modifications or any other
9  * form of support to you.
10  * 
11  * This license is for research uses.  For such uses, there is no
12  * charge. We define "research use" to mean you may freely use it
13  * inside your organization for whatever purposes you see fit. But you
14  * may not re-distribute Paradyn or parts of Paradyn, in any form
15  * source or binary (including derivatives), electronic or otherwise,
16  * to any other organization or entity without our permission.
17  * 
18  * (for other uses, please contact us at paradyn@cs.wisc.edu)
19  * 
20  * All warranties, including without limitation, any warranty of
21  * merchantability or fitness for a particular purpose, are hereby
22  * excluded.
23  * 
24  * By your use of Paradyn, you understand and agree that we (or any
25  * other person or entity with proprietary rights in Paradyn) are
26  * under no obligation to provide either maintenance services,
27  * update services, notices of latent defects, or correction of
28  * defects for Paradyn.
29  * 
30  * Even if advised of the possibility of such damages, under no
31  * circumstances shall we (or any other person or entity with
32  * proprietary rights in the software licensed hereunder) be liable
33  * to you or any third party for direct, indirect, or consequential
34  * damages of any character regardless of type of action, including,
35  * without limitation, loss of profits, loss of use, loss of good
36  * will, or computer failure or malfunction.  You agree to indemnify
37  * us (and any other person or entity with proprietary rights in the
38  * software licensed hereunder) for any and all liability it may
39  * incur to third parties resulting from your use of Paradyn.
40  */
41
42 // $Id: CodeView.h,v 1.2 1999/07/14 17:34:30 paradyn Exp $
43
44 //
45 // This file contains the declaration of the CodeView class.
46 // The CodeView class also declares several sub-classes and 
47 // sub-structs that describe the format of many CodeView 
48 // constructs.
49 //
50 // As is commonly the case in Microsoft programming, the 
51 // CodeView data layout involves variable-length constructs.
52 // (For example, many CodeView subsections involve
53 // variable-length arrays.)  C++ does not provide syntax
54 // capable of describing such situations, and so many of
55 // the sub-struct declarations below are incomplete.
56 //
57 // Also, this CodeView class is not complete in that it
58 // does not provide a window into all of the CodeView 
59 // information.  It is intended, at this time, to provide
60 // enough support for the functionality needed by Paradyn/DyninstAPI.
61 // The implementation should be flexible enough, however, to add
62 // additional support as needed.
63 //
64 // For these two reasons, this file should not be used as a 
65 // definition of the CodeView symbol format.  See the CodeView
66 // specification from MSDN for such purposes.
67 //
68 #ifndef CODEVIEW_H
69 #define CODEVIEW_H
70
71
72
73
74 //
75 // LPString
76 //
77 // An LPString is a length-prefixed string.
78 // The length is contained in a byte, followed by the
79 // characters of the string.  Unlike a C string, there
80 // is no null termination.
81 //
82 class LPString
83 {
84 private:
85         const char* data;
86
87 public:
88         LPString( const char* str = NULL ) : data( str ) {}
89
90         unsigned char GetLength( void ) const   { return *data; }
91         const char*     GetChars( void ) const      { return (data + 1); }
92
93         operator string( void ) const           { return string( data + 1,
94                                                 *(unsigned char*)data ); }
95 };
96
97
98
99 //
100 // CodeView
101 //
102 // A CodeView object knows how to parse debug 
103 // information in CodeView format.
104 //
105 class CodeView
106 {
107 public:
108     // types of subsections
109         enum SubsectionType
110         {
111                 sstModule               = 0x120,
112                 sstTypes                = 0x121,
113                 sstPublic               = 0x122,
114                 sstPublicSym    = 0x123,
115                 sstSymbols              = 0x124,
116                 sstAlignSym             = 0x125,
117                 sstSrcLnSeg             = 0x126,
118                 sstSrcModule    = 0x127,
119                 sstLibraries    = 0x128,
120                 sstGlobalSym    = 0x129,
121                 sstGlobalPub    = 0x12a,
122                 sstGlobalTypes  = 0x12b,
123                 sstMPC                  = 0x12c,
124                 sstSegMap               = 0x12d,
125                 sstSegName              = 0x12e,
126                 sstPreComp              = 0x12f,
127                 sstOffsetMap16  = 0x131,
128                 sstOffsetMap32  = 0x132,
129                 sstFileIndex    = 0x133,
130                 sstStaticSym    = 0x134
131         };
132
133     // types of symbol records
134         enum SymbolType
135         {
136                 S_COMPILE               = 0x0001,
137                 S_SSEARCH               = 0x0005,
138                 S_END                   = 0x0006,
139                 S_SKIP                  = 0x0007,
140                 S_CVRESERVE             = 0x0008,
141                 S_OBJNAME               = 0x0009,
142                 S_ENDARG                = 0x000a,
143                 S_COBOLUDT              = 0x000b,
144                 S_MANYREG               = 0x000c,
145                 S_RETURN                = 0x000d,
146                 S_ENTRYTHIS             = 0x000e,
147                 S_REGISTER              = 0x1001,
148                 S_CONSTANT              = 0x1002,
149                 S_UDT                   = 0x1003,
150                 S_COBOLUDT_2    = 0x1004,
151                 S_MANYREG_2             = 0x1005,
152                 S_BPREL32               = 0x1006,
153                 S_LDATA32               = 0x1007,
154                 S_GDATA32               = 0x1008,
155                 S_PUB32                 = 0x1009,
156                 S_LPROC32               = 0x100a,
157                 S_GPROC32               = 0x100b,
158                 S_THUNK32               = 0x0206,
159                 S_BLOCK32               = 0x0207,
160                 S_WITH32                = 0x0208,
161                 S_LABEL32               = 0x0209,
162                 S_CEXMODEL32    = 0x020a,
163                 S_VFTTABLE32    = 0x100c,
164                 S_REGREL32              = 0x100d,
165                 S_LTHREAD32             = 0x100e,
166                 S_GTHREAD32             = 0x100f,
167                 S_LPROCMIPS             = 0x1010,
168                 S_GPROCMIPS             = 0x1011,
169                 S_PROCREF               = 0x0400,
170                 S_DATAREF               = 0x0401,
171                 S_ALIGN                 = 0x0402
172         };
173
174     // format of subsection directory header
175         struct SDHeader
176         {
177                 WORD    cbDirHeader;            // length of subsection directory header
178                 WORD    cbDirEntry;                     // length of each directory entry
179                 DWORD   cDir;                           // number of directory entries
180                 DWORD   IfoNextDir;                     // offset from IfaBase to next directory
181                                     // (currently unused)
182                 DWORD   flags;                          // (currently unused)
183         };
184
185     // format of subsection directory entries
186         struct SDEntry
187         {
188                 WORD    sst;                            // type of the subsection
189                 WORD    iMod;                           // index of associated module
190                 DWORD   offset;                         // offset from IfaBase of subsection
191                 DWORD   cb;                                     // size of subsection in bytes
192         };
193
194     // format of symbol subsection header
195         struct SymHeader
196         {
197                 WORD    symhash;                        // index of symbol hash function
198                 WORD    addrhash;                       // index of address hash function
199                 DWORD   cbSymbol;                       // size (bytes) of symbol table
200                 DWORD   cbSymHash;                      // size (bytes) of symbol hash table
201                 DWORD   cbAddrHash;                     // size (bytes) of address hash table
202         };
203
204
205     // format of common fields for symbol records
206         struct SymRecord
207         {
208                 WORD    length;                 // length of record, excluding length field
209                 WORD    index;                  // type of symbol record
210         };
211
212     // format of PROC (function) symbol records
213         struct SymRecordProc
214         {
215                 SymRecord       base;           // fields to all SymRecords
216                 DWORD           pParent;        // parent lexical scope
217                 DWORD           pEnd;           // end of lexical scope
218                 DWORD           pNext;          // next lexical scope
219                 DWORD           procLength;     // size of the procedure (bytes)
220                 DWORD           debugStart;     // offset in bytes from proc start to point
221                                 //      where stack frame has been set up
222                 DWORD           debugEnd;       // offset in bytes from proc start to point
223                                 //      where procedure is ready to return
224                 DWORD           procType;       // type of procedure type record
225                 DWORD           offset;         // offset portion of proc address
226                 WORD            segment;        // segment (PE section) part of proc address
227                 char            flags;          // flags
228                                                                 //    fpo       :1 true if function has frame
229                                 //                  pointer omitted
230                                                                 //    interrupt :1 true if function is
231                                 //                  interrupt routine
232                                                                 //    return    :1 true if function performs
233                                 //                  far return
234                                                                 //    never     :1 true if function never
235                                 //                  returns
236                                                                 //    unused    :4
237                 char            name[1];        // length-prefixed name of procedure
238         };
239
240     // format of PROCREF symbol records
241     // (A PROCREF references a PROC symbol record
242     // elsewhere in the CodeView information)
243         struct SymRecordProcRef
244         {
245                 SymRecord       base;           // fields common to all SymRecords
246                 DWORD           checksum;       // checksum of referenced symbol name
247                 DWORD           offset;         // offset of procedure symbol record
248                                 // from the module subsection (?)
249                 WORD            module;         // index of the module that contains the symbol
250         };
251
252     // format of DATA symbol records
253         struct SymRecordData
254         {
255                 SymRecord       base;           // fields common to all SymRecords
256                 DWORD           type;           // type of variable
257                 DWORD           offset;         // offset portion of variable address
258                 WORD            segment;        // segment (section) part of variable address
259                 char            name[1];        // length-prefixed name of variable
260         };
261
262     // format of BPREL symbol records
263     // (BPREL records contain information about
264     // variables contained on the stack)
265         struct SymRecordBPRel
266         {
267                 SymRecord       base;           // fields common to all SymRecords
268                 DWORD           offset;         // offset from BP of variable
269                 DWORD           type;           // type of variable
270                 char            name[1];        // length-prefixed name of variable
271         };
272
273     // format of LABEL symbol records
274     // LABEL records are sometimes used to 
275     // represent functions.
276         struct SymRecordLabel
277         {
278                 SymRecord       base;           // fields common to all SymRecords
279                 DWORD           offset;         // offset portion of label address
280                 WORD            segment;        // segment (PE section) part of label address
281                 char            flags;          // flags
282                                                                 //    fpo       :1 true if function has
283                                 //                  frame pointer omitted
284                                                                 //    interrupt :1 true if function is
285                                 //                  interrupt routine
286                                                                 //    return    :1 true if function performs
287                                 //                  far return
288                                                                 //    never     :1 true if function never
289                                 //                  returns
290                                                                 //    unused    :4
291                 char            name[1];        // length-prefixed name of procedure
292         };
293
294     // format of THUNK symbol records
295     // A THUNK is a piece of code outside a function
296         struct SymRecordThunk
297         {
298                 SymRecord       base;           // fields common to all SymRecords
299                 DWORD           pParent;        // parent lexical scope
300                 DWORD           pEnd;           // end of lexical scope
301                 DWORD           pNext;          // next lexical scope
302                 DWORD           offset;         // offset portion of thunk address
303                 WORD            segment;        // segment (PE section) part of thunk address
304                 WORD            thunkLength;    // length of thunk code (in bytes)
305                 char            ordinal;        // ordinal specifying type of thunk
306                                                                 // 0: notype
307                                                                 // 1: adjustor
308                                                                 // 2: vcall
309                                                                 // 3: pcode
310                 char            name[1];        // length-prefixed name of procedure
311                 // variant field.       if ordinal is:
312                 //                      notype => there is no variant field
313                 //                      adjustor => WORD offset from this pointer,
314         //                          then length-prefixed name of function
315         //                          to call
316                 //                      vcall => WORD displacement into virtual table
317                 //                      pcode => segment:offset of pcode entry point
318         };
319
320
321     // format of Module subsection
322         struct ModuleSubsection
323         {
324                 struct SegInfo
325                 {
326                         WORD    seg;                    // segment described
327                         WORD    pad;                    // padding - must be zero
328                         DWORD   offset;                 // offset in segment to where code starts
329                         DWORD   cbSeg;                  // number of bytes of code in the segment
330                 };
331
332                 WORD    ovlNumber;              // overlay number
333                 WORD    iLib;                   // library index if linked from library
334                 WORD    cSeg;                   // number of SegInfo structs in subsection
335                 char    style[2];               // debugging style (should be "CV")
336
337                 // SegInfo      sinfo[];        // array of segment information structs
338                 // char         name[1];        // length-prefixed name of module
339         };
340
341     // format of AlignSym subsection
342         struct AlignSymSubsection
343         {
344                 // sstAlignSym subsections have no header.
345                 // The subsection consists of a stream of variable-length
346         // SymRecord* entries
347         };
348
349     // format of SrcModule subsection
350         struct SrcModuleSubsection
351         {
352         struct FileInfo
353         {
354             WORD    cSegFile;   // number of segments that receive code
355                                 // from this source file
356             WORD    pad;
357             // DWORD    baseSrcLen[cSegFile];   // array of offsets for line/
358                                                 // address mapping table
359             // DWORD    startEnd[cSegFile][2];  // start/end offsets of
360                                                 // code for this source file
361             // char     name[1];                // length-prefixed name of file
362         };
363
364         WORD    cFile;          // number of files that contributed
365                                 // code/data to this module
366         WORD    cSeg;           // number of segments that received
367                                 // code/data from this module
368         // DWORD    baseSrcFile[cFile];     // array of offsets into
369                                             // source file array
370         // DWORD    startEnd[cSeg][2];      // start/end offsets of
371                                             // code or data for each segment
372
373         // WORD     seg[cSeg];              // segment indices for segments
374                                             // that receive code from this
375                                             // module
376         // WORD     pad[0-2];               // padding to maintain 4-byte
377                                             // alignment
378
379         // FileInfo fileInfo[];             // array of variable-length
380                                             // records describing source
381                                             // files
382         };
383
384
385     // object that encapsulates the various types of symbols
386         class Symbols
387         {
388         public:
389         // accessors
390                 const vector<SymRecordProc*>&
391             GetGlobalFunctions( void ) const    { return gprocs; }
392                 const vector<SymRecordProc*>&
393             GetLocalFunctions( void ) const     { return lprocs; }
394                 const vector<SymRecordData*>&
395             GetGlobalVariables( void ) const    { return gvars; }
396                 const vector<SymRecordData*>&
397             GetLocalVariables( void ) const             { return lvars; }
398                 const vector<SymRecordBPRel*>&
399             GetStackVariables( void ) const             { return bprels; }
400                 const vector<SymRecordLabel*>&
401             GetLabels( void ) const                             { return labels; }
402                 const vector<SymRecordThunk*>&
403             GetThunks( void ) const                             { return thunks; }
404         const vector<SymRecordData*>&
405             GetPublics( void ) const            { return pubs; }
406
407         // operations
408                 void    Parse( const char* pSymBase, DWORD cb );
409                 Symbols& operator=( const Symbols& syms );
410
411         private:
412                 vector<SymRecordProc*> gprocs;          // global functions
413                 vector<SymRecordProc*> lprocs;          // local functions
414                 vector<SymRecordData*> gvars;           // global variables
415                 vector<SymRecordData*> lvars;           // local variables
416                 vector<SymRecordBPRel*> bprels;     // stack variables
417                 vector<SymRecordLabel*> labels;     // labels
418                 vector<SymRecordThunk*> thunks;     // thunks (non-function code)
419         vector<SymRecordData*> pubs;        // public (catch-all) symbols
420
421         friend class CodeView;
422         };
423
424
425     // object that represents a CodeView module,
426     // by tying together the relevant subsections
427     // associated with that module
428         class Module
429         {
430         public:
431                 Module( void );
432                 Module( ModuleSubsection* pModule, unsigned int textId );
433
434                 string GetName( void ) const;
435                 WORD GetLibraryIndex( void ) const              { return pmod->iLib; }
436                 const Symbols& GetSymbols( void ) const         { return syms; }
437                 string GetSourceName( void ) const;
438                 bool    GetTextBounds( DWORD& offset, DWORD& cb ) const;
439
440                 Module& operator=( const Module& mod );
441
442         private:
443                 ModuleSubsection* pmod;         // sstModule subsection
444                 AlignSymSubsection* pas;        // sstAlignSym subsection
445                 SrcModuleSubsection* psrc;      // sstSrcModule subsection
446                 Symbols syms;                           // symbols from the sstAlignSym subsection
447         DWORD offsetText;           // offset of code in text section
448         DWORD cbText;               // size of code in text section
449
450                 friend class CodeView;
451         };
452
453
454     // constructors
455         CodeView( const char* pSymbols, unsigned int textSectionId )
456           : pBase( pSymbols ),
457                 textId( textSectionId )
458         {}
459
460     // accessors
461         const vector<Module>&           GetModules( void ) const        { return modules; }
462         const vector<LPString>&         GetLibraries( void ) const      { return libs; }
463     const Symbols&              GetSymbols( void ) const    { return syms; }
464
465     // operations
466         bool Parse( void );
467
468 private:
469         const char* pBase;                      // location of CodeView symbols
470         vector<Module> modules;         // modules represented in the executable
471         vector<LPString> libs;          // libraries used to build this executable
472     Symbols syms;               // symbols not associated with a module
473     unsigned int textId;        // section number for .text section
474
475         void    ParseModuleSubsection( SDEntry* pEntry );
476         void    ParseLibrariesSubsection( SDEntry* pEntry );
477         void    ParseAlignSymSubsection( SDEntry* pEntry );
478         void    ParseSrcModuleSubsection( SDEntry* pEntry );
479         void    ParseSymbolsWithHeaderSubsection( SDEntry* pEntry );
480 };
481
482 #endif // CODEVIEW_H