Parsing Complex Text Structures by Ian E. Gorman Example 1: ; swap.xom ; omnimark: swap first two fields in a line of tab-delimited fields ; Run this program with the command ; omnimark -sb swap.xom swap.txt ; where swap.txt is an input file with tab-delimited fields process submit #main-input find line-start [any-text except "%t"]* => Field1 "%t" [any-text except "%t"]* => Field2 output Field2 || "%t" || Field1 Example 2: #!/bin/perl # swap.pl # perl: swap first two fields in a line of tab-delimited fields # Run this program with the command # perl swap.pl swap.txt # where swap.txt is an input file with tab-delimited fields while (<>) { s/^([^\t]*)\t([^\t\n]*)/$2\t$1/; print "$_" } Example 3: #!/bin/sed -f # swap.sed # sed: swap first two fields in a line of tab-delimited fields # Run this program with the command # sed -f swap.sed swap.txt # where swap.txt is an input file with tab-delimited fields s/^\([^ ]*\) \([^ ]*\)/\2 \1/ # Note - each blank is actually a single hard tab "\t" # s/^\([^\t]*\)\t\([^\t]*\)/\2\t\1/ Example 4: program: program expression '\n' | /* nix */ ; expression: INTEGER | expression '+' expression | expression '-' expression | expression '*' expression | expression '/' expression | '(' expression ')' ; Example 5: macro INTEGER is digit+ macro-end define switch function expression as do scan #current-input match INTEGER match expression '+' expression match expression '-' expression match expression '*' expression match expression '/' expression match'(' expression ')' else return false ; never matched done return true ; matched once Example 6: expression -> term r-expression r-expression -> epsilon | "+" term r-expression | "-" term r-expression term -> factor r-term r-term -> epsilon | "+" factor r_term | "-" factor r_term factor -> "(" expression ")" | integer Example 7: define switch function r-expression as do scan #current-input match "+" term r-expression match "-" term r-expression done ;if not matched above, accept the empty string return true Example 8: define switch function r-expression as do scan #current-input match "+" term r-expression ;pop two integers, add them, push the result match "-" term r-expression ;pop two integers, subtract them, push the result done ;if not matched above, accept the empty string return true Example 9: expression -> term r-expression r-expression -> epsilon | add-term r-expression | subtract-term r-expression * add-term -> "+" term * subtract-term -> "-" term term -> factor r-term r-term -> epsilon | multiply-factor r_term | divide-factor r_term * multiply-factor -> "*" factor * divide-factor -> "/" factor factor -> "(" expression ")" * | integer Listing One #!/bin/omnimark -sb ; parnest.xom ; omnimark: pattern functions used in recursive pattern matching ; Run this program with the command ; omnimark -sb parnest.xom parnest.txt ; where parnest.txt is an input file with nested sets of parentheses ; This program extracts nested matching parentheses from a file ; and prints the outermost set with all of the intervening text ; as a single unit. ; In the following line, the underlined text would be printed out ; ( skip this ( but ( show ) this ) and ( not this ; --------------------- ; Try using this program as input to itself: ; omnimark -sb parnest.xom parnest.xom ; forward definition because functions refer to each other define switch function paren-block elsewhere define switch function paren-block-interior as repeat scan #current-input match [any except "()"] ;any except start or end of block match paren-block ;any contained block again return true define switch function paren-block as return #current-input matches ( "(" paren-block-interior ")" ) find paren-block => text ; list each outer block output text || "%n" find any ; Grab everything rejected by the first 'find' rule ; discard all characters process submit #main-input ;Send all input through the 'find' rules above Listing Two ;------------------------------------------------------------------ ; Four-function calculator written with OmniMark pattern matching rules. ; Supports + - * / and () with correct precedence ; Requires OmniMark 5.1 or later (free from ; http://www.omnimark.com) ; Run the program with the following comannd line: ; omnimark -s dc.xom ; Exit by typing "quit" or EOF (ctrl-Z in Windows, ctrl-D in Unix) ;------------------------------------------------------------------ declare #main-input has unbuffered declare #main-output has unbuffered ;------------------------------------------------------------------ ; integer stack, with stack operators global counter Stack variable initial-size 0 declare catch StackUnderflow define function push ( value counter num ) as set new Stack to num define counter function pop () as local counter num throw StackUnderflow when number of Stack < 1 ;nothing to pop set num to Stack remove Stack ;discard top of stack return num ;------------------------------------------------------------------ ; patterns and pattern matching functions declare catch DivisionByZero macro integer is ( digit+ ) macro-end define switch function expression elsewhere ;forward definition define switch function factor as do scan #current-input match blank* "(" blank* expression blank* ")" ;do nothing match blank* integer => int push( int ) else return false done return true define switch function multiply-factor as do scan #current-input match blank* "*" blank* factor push( pop() * pop() ) return true done return false define switch function divide-factor as do scan #current-input match blank* "/" blank* factor local counter num set num to pop() throw DivisionByZero when num = 0 push( pop() / num ) return true done return false define switch function r-term as do scan #current-input match multiply-factor r-term match divide-factor r-term done ; if we match nothing else, we can match the empty string return true define switch function term as return #current-input matches ( factor r-term ) define switch function add-term as do scan #current-input match blank* "+" term push( pop() + pop() ) return true done return false define switch function subtract-term as do scan #current-input match blank* "-" term push( 0 - pop() + pop() ) return true done return false define switch function r-expression as do scan #current-input match add-term r-expression match subtract-term r-expression done ; if we match nothing else, we can match the empty string return true define switch function expression as return #current-input matches ( term r-expression ) ;------------------------------------------------------------------ ; control loop process repeat repeat scan #main-input match expression =>text blank* "%n" output "d" % pop() || "%n" put #error "Stack error: " || "d" % number of Stack || "%n" when number of Stack != 0 clear Stack match blank* ul "quit" halt match blank* "%n" match any-text+ =>text "%n"? put #error "Syntax error: %x(text)%n" clear Stack again halt catch DivisionByZero put #error "Divide by zero%n" catch StackUnderflow put #error "Stack underflow%n" always do scan #main-input match any-text* "%n" done clear Stack again ;------------------------------------------------------------------ 1