_A Simple Decompiler_ by William May 1 /************************************************************************ 2 deparse.c 3 4 Decompiles an RPN expression into the original source code (or 5 a reasonable facsimile). Based on an algorithm by P.J. Brown in 6 'More on the Re-creation of Source Code from Reverse Polish' 7 Software - Practice and Experience, vol 7 pgs 545-551 8 9 William May 10 303 Ridgefield Circle 11 Clinton, MA 01510 12 13 *************************************************************************/ 14 15 #include 16 #include 17 18 #define BUFSIZE 80 19 #define NIL (char *)0 20 #define TRUE 1 21 #define FALSE 0 22 23 enum { 24 NONARY_OP = 1, 25 UNARY_OP = 2, 26 BINARY_OP = 4 27 } lex_types; 28 29 /*------------------------------------------------------------------------- 30 This table describes the operators that the code re-creating 31 algorithm will handle. The identifier is the internal representation 32 of an operator, the lexical type indicates the number of arguments to 33 the operator, and the prefixes and suffix supply transformations 34 of the operators. 35 -------------------------------------------------------------------------*/ 36 typedef struct decomp_row { 37 char ident; 38 short lex_type; 39 char *prefix_1; 40 char *prefix_2; 41 char *suffix; 42 } decomp_row; 43 44 /*-------------------------------------------------------------------------- 45 An example of the source recreation table setup. 46 Note that in real life the idents need not be printable 47 However, printable characters make the program much easier to test. 48 --------------------------------------------------------------------------*/ 49 static decomp_row table[] ={ 50 {'!', UNARY_OP, "-", NIL, NIL}, 51 {'+', BINARY_OP, NIL, "+", NIL}, 52 {'-', BINARY_OP, NIL, "-", NIL}, 53 {'/', BINARY_OP, NIL, "/", NIL}, 54 {'*', BINARY_OP, NIL, "*", NIL}, 55 {'^', BINARY_OP, NIL, "^", NIL}, 56 {'(', UNARY_OP, "(", NIL, ")"}, 57 {'$', UNARY_OP, "sum(", NIL, ")"}, 58 {'=', BINARY_OP, "let ", "=", NIL}, 59 {',', BINARY_OP, NIL, ",", NIL} 60 }; 61 62 #define TABLENUM (sizeof(table)/sizeof(decomp_row)) 63 64 #ifdef DEBUG 65 #include 66 67 /*-------------------------------------------------------------------- 68 This is a simple test stub for the deparse function. It 69 reads a line from stdin, displays the line and the decompiled 70 result on stdout. Leave DEBUG undefined when compiling deparse as 71 a library. 72 --------------------------------------------------------------------*/ 73 main(argc, argv) 74 int argc; 75 char *argv[]; 76 { 77 char s[BUFSIZE]; /* input string */ 78 char t[BUFSIZE]; /* output string */ 79 int n; 80 void deparse(); 81 82 printf("\n\nDecompiling test expressions:\n\n"); 83 84 /* get a line. Note that buffer s still has linefeed in it */ 85 while (fgets(s, BUFSIZE, stdin)) { 86 if (n = strlen(s)) { 87 s[n-1] = '\0'; 88 printf("%s --> ", s); 89 deparse(s, t); 90 printf("%s\n", t); 91 } 92 } 93 94 exit(0); 95 } 96 #endif 97 98 /*------------------------------------------------------------------------ 99 deparse(): the only externally visible function, carries out deparsing 100 an RPN expression. 101 ------------------------------------------------------------------------*/ 102 103 void deparse(instr, outstr) 104 char *instr; 105 char *outstr; 106 { 107 char *restart; /* restart position after a forward scan */ 108 int level; /* number of operands passed during a forward scan */ 109 char *p, /* pointer to string to emit */ 110 *prefix_1_for_elem(), 111 *prefix_2_for_elem(), 112 *suffix(), 113 *pop(); 114 void init_stack(), 115 push(); 116 117 init_stack(); 118 119 /* make sure outstr is terminated */ 120 *outstr = '\0'; 121 122 /* scan the input string */ 123 while (*instr) { 124 125 /* look for the next operand */ 126 while (elem_is_operator(*instr)) { 127 if ((p = suffix(*instr)) != NIL) 128 strcat(outstr, p); 129 130 advance_to_next_elem(&instr); 131 132 if (!(*instr)) 133 return; /* no more input, so quit */ 134 } 135 136 /* found an operand, so scan forward for prefixes to this operand. */ 137 level = 0; 138 restart = instr; 139 140 while (level >= 0) { 141 /* get the next token */ 142 advance_to_next_elem(&instr); 143 144 /* have we reached the end of the input? */ 145 if (!*instr) 146 break; 147 148 /* 149 is the next token an operator or operand? If an operand then 150 update count of intervening operands (the level). If an operator 151 figure out if it results in a prefix to our operand (based on the level) 152 */ 153 if (elem_is_operand(*instr)) 154 level++; 155 else { 156 if (elem_is_binary_op(*instr)) 157 --level; 158 159 if (level == 0) 160 if ((p = prefix_1_for_elem(*instr)) != NIL) 161 push(p); 162 163 if (level == -1) 164 if ((p = prefix_2_for_elem(*instr)) != NIL) 165 push(p); 166 167 /* ... can be extended for more complex operators */ 168 } 169 } 170 171 /* 172 unwind results of the forward scan by popping them off 173 the stack. 174 */ 175 while (p = pop()) 176 strcat(outstr, p); 177 178 /* 179 forward scan complete, reset our scan point for another round 180 */ 181 instr = restart; 182 183 /* 184 now emit the operand itself. Note that operands will often 185 need conversions and formatting in real life, i.e. integer -> 186 string or floating point -> string conversions, 187 currency/date/time formatting, etc. Here we just append the 188 raw operand to the output string. 189 */ 190 strncat(outstr, instr, 1); 191 192 advance_to_next_elem(&instr); 193 } 194 } 195 196 /************************************************************************ 197 table handling utilities 198 ************************************************************************/ 199 200 /*------------------------------------------------------------------------ 201 elem_is_operator(): figure out if code is an operator 202 if so, then return true 203 ------------------------------------------------------------------------*/ 204 static elem_is_operator(code) 205 char code; 206 { 207 decomp_row *row, *find_op(); 208 209 if (row = find_op(code)) 210 return TRUE; 211 else 212 return FALSE; 213 } 214 215 /*------------------------------------------------------------------------ 216 elem_is_operand(): figure out if code is an operand 217 if so, then return true 218 219 In this example all operands are alphabetic or numeric characters. 220 ------------------------------------------------------------------------*/ 221 static elem_is_operand(code) 222 char code; 223 { 224 if (isalnum(code)) 225 return TRUE; 226 else 227 return FALSE; 228 } 229 230 /*------------------------------------------------------------------------ 231 elem_is_binary_op(): figure out if code is a binary operator 232 if so, then return true 233 ------------------------------------------------------------------------*/ 234 static elem_is_binary_op(code) 235 char code; 236 { 237 decomp_row *r, *find_op(); 238 239 if (r = find_op(code)) 240 return (r->lex_type & BINARY_OP); 241 else 242 return false; 243 } 244 245 /*------------------------------------------------------------------------ 246 prefix_1_for_elem(): get prefix 1 for the operator 247 ------------------------------------------------------------------------*/ 248 static char *prefix_1_for_elem(op) 249 char op; 250 { 251 decomp_row *r, *find_op(); 252 253 if (r = find_op(op)) 254 return (r->prefix_1); 255 else 256 return NIL; 257 } 258 259 /*------------------------------------------------------------------------ 260 prefix_2_for_elem(): get prefix 2 for the operator 261 ------------------------------------------------------------------------*/ 262 static char *prefix_2_for_elem(op) 263 char op; 264 { 265 decomp_row *r, *find_op(); 266 267 if (r = find_op(op)) 268 return (r->prefix_2); 269 else 270 return NIL; 271 } 272 273 /*------------------------------------------------------------------------ 274 suffix(): get the suffix of given code 275 ------------------------------------------------------------------------*/ 276 static char *suffix(code) 277 char code; 278 { 279 decomp_row *row, *find_op(); 280 281 if (row = find_op(code)) 282 return row->suffix; 283 else 284 return NIL; 285 } 286 287 /*------------------------------------------------------------------------ 288 find_op(): finds the operator "op" in the decompiler table 289 if found, it returns a pointer to the row 290 if not, it returns NIL 291 ------------------------------------------------------------------------*/ 292 decomp_row *find_op(op) 293 char op; 294 { 295 int i; 296 decomp_row *row; 297 298 row = table; 299 for (i = 0; i < TABLENUM; i++, row++) 300 if (op == row->ident) 301 return row; 302 303 return NIL; /* no hit */ 304 } 305 306 /*------------------------------------------------------------------------ 307 advance_to_next_elem(): advance to next element 308 bump scan pointer as we move 309 310 the function assumes that all tokens are a singe byte. 311 ------------------------------------------------------------------------*/ 312 static advance_to_next_elem(p) 313 char **p; 314 { 315 (*p)++; 316 } 317 318 /*======================================================================= 319 a simple stack implementation 320 =======================================================================*/ 321 322 /* stack size in bytes */ 323 #define STACKSIZE 40 324 325 /*** a couple of globals for the stack ***/ 326 static char **sp, **top; 327 static char stack[STACKSIZE]; 328 329 /*------------------------------------------------------------------------ 330 init_stack(): prepare the stack for use 331 ------------------------------------------------------------------------*/ 332 static void init_stack() 333 { 334 top = stack + STACKSIZE - 1; 335 sp = top + 1; 336 } 337 338 /*------------------------------------------------------------------------ 339 pop(): pop a pointer sized value from the stack 340 ------------------------------------------------------------------------*/ 341 static char *pop() 342 { 343 if (sp <= top) 344 return(*sp++); 345 else 346 return NIL; 347 } 348 349 /*------------------------------------------------------------------------ 350 push(): push a pointer sized value onto the stack 351 ------------------------------------------------------------------------*/ 352 static void push(p) 353 char *p; 354 { 355 *(--sp) = p; 356 } 357 Example 1: Source Code Recreation Source Recreation Object code Result abc++ a+b+c bcdfe+*/= let b=c/d*f+e gabc+(*cd,,$ let g=sum(a*(b+c),c,d) Example 2: Transformation Table typedef struct decomp-row { char ident short lex_type; char *prefix_1; char *prefix_2; char *suffix; } decomp_row; static decomp_row table[] ={ {`!', UNARY_OP, "-", NIL, NIL}, {`+', BINARY_OP, NIL, "+", NIL}, {`-', BINARY_OP, NIL, "-", NIL}, {`/', BINARY_OP, NIL, "/", NIL}, {`*', BINARY_OP, NIL, "*", NIL}, {`^' BINARY_OP, NIL, "^", NIL], {`(', UNARY-OP, "(", NIL, ")"}, {`$', UNARY_OP, "sum(",NIL, ")"}, {`=', BINARY_OP, "let ","=", ";"}, {`,', BINARY_OP, NIL, ",", NIL} };