ref: aa1e32f32330a6893ba1661088a6c354c2564d19
parent: 1dcb9611241b518cc1681a967d76fbcad64c91cd
author: Andrew Chambers <ac@acha.ninja>
date: Thu Apr 7 21:02:57 CDT 2022
Work on rename.
--- a/Makefile
+++ b/Makefile
@@ -25,7 +25,7 @@
minipeg-split: $(SRC)
$(CC) $(CFLAGS) -o $@ compile.c tree.c peg.c
-minipeg.c: $(SRC)
+minipeg.c: $(SRC) amalgamate.sh
sh amalgamate.sh $(SRC) > $@
peg-new.c: peg.leg minipeg
@@ -40,10 +40,11 @@
diff -u peg-new.c peg.c
diff -u peg-split.c peg.c
-check: minipeg .FORCE
+check: minipeg check-self-host .FORCE
$(SHELL) -ec '(cd examples; $(MAKE))'
clean : .FORCE
rm -f minipeg minipeg-split minipeg.c minipeg-new.c peg-new.c *.o
+ $(SHELL) -ec '(cd examples; $(MAKE) clean)'
.FORCE :
--- a/amalgamate.sh
+++ b/amalgamate.sh
@@ -1,8 +1,20 @@
# Amalgamate the source code, sqlite3 style.
+set -eu
echo "/* This file is a generated distributable version of the minipeg project."
echo " * Visit https://github.com/andrewchambers/minipeg for details. */"
-cat *.c | grep '^#include <' | sort -u
-for f in version.h tree.h compile.c tree.c peg.c
+
+(
+ for f in "$@"
+ do
+ case "$f" in
+ *.c)
+ cat "$f"
+ ;;
+ esac
+ done
+) | grep '^#include <' | sort -u
+
+for f in "$@"
do
echo "#line 0 \"$f\""
grep -v '^#include' "$f"
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -8,7 +8,7 @@
all : $(EXAMPLES)
test : .FORCE
- ../minipeg -o test.leg.c test.leg
+ ../minipeg -o test.peg.c test.peg
$(CC) $(CFLAGS) -o test test.c
echo 'ab.ac.ad.ae.afg.afh.afg.afh.afi.afj.' | ./$@ | $(TEE) $@.out
$(DIFF) $@.ref $@.out
@@ -16,7 +16,7 @@
@echo
rule : .FORCE
- ../minipeg -o rule.leg.c rule.leg
+ ../minipeg -o rule.peg.c rule.peg
$(CC) $(CFLAGS) -o rule rule.c
echo 'abcbcdabcbcdabcbcdabcbcd' | ./$@ | $(TEE) $@.out
$(DIFF) $@.ref $@.out
@@ -24,7 +24,7 @@
@echo
accept : .FORCE
- ../minipeg -o accept.leg.c accept.leg
+ ../minipeg -o accept.peg.c accept.peg
$(CC) $(CFLAGS) -o accept accept.c
echo 'abcbcdabcbcdabcbcdabcbcd' | ./$@ | $(TEE) $@.out
$(DIFF) $@.ref $@.out
@@ -32,15 +32,15 @@
@echo
wc : .FORCE
- ../minipeg -o wc.leg.c wc.leg
- $(CC) $(CFLAGS) -o wc wc.leg.c
- cat wc.leg | ./$@ | $(TEE) $@.out
+ ../minipeg -o wc.peg.c wc.peg
+ $(CC) $(CFLAGS) -o wc wc.peg.c
+ cat wc.peg | ./$@ | $(TEE) $@.out
$(DIFF) $@.ref $@.out
rm -f $@.out
@echo
dc : .FORCE
- ../minipeg -o dc.leg.c dc.leg
+ ../minipeg -o dc.peg.c dc.peg
$(CC) $(CFLAGS) -o dc dc.c
echo ' 2 *3 *(3+ 4) ' | ./dc | $(TEE) $@.out
$(DIFF) $@.ref $@.out
@@ -48,7 +48,7 @@
@echo
dcv : .FORCE
- ../minipeg -o dcv.leg.c dcv.leg
+ ../minipeg -o dcv.peg.c dcv.peg
$(CC) $(CFLAGS) -o dcv dcv.c
echo 'a = 6; b = 7; a * b' | ./dcv | $(TEE) $@.out
$(DIFF) $@.ref $@.out
@@ -56,8 +56,8 @@
@echo
calc : .FORCE
- ../minipeg -o calc.leg.c calc.leg
- $(CC) $(CFLAGS) -o calc calc.leg.c
+ ../minipeg -o calc.peg.c calc.peg
+ $(CC) $(CFLAGS) -o calc calc.peg.c
echo 'a = 6; b = 7; a * b' | ./calc | $(TEE) $@.out
$(DIFF) $@.ref $@.out
rm -f $@.out
@@ -64,32 +64,24 @@
@echo
basic : .FORCE
- ../minipeg -o basic.leg.c basic.leg
- $(CC) $(CFLAGS) -o basic basic.leg.c
+ ../minipeg -o basic.peg.c basic.peg
+ $(CC) $(CFLAGS) -o basic basic.peg.c
( echo 'load "test"'; echo "run" ) | ./basic | $(TEE) $@.out
$(DIFF) $@.ref $@.out
rm -f $@.out
@echo
-localpeg : .FORCE
- ../minipeg -o test.leg.c test.leg
- $(CC) $(CFLAGS) -o localpeg localpeg.c
- echo 'ab.ac.ad.ae.afg.afh.afg.afh.afi.afj.' | ./$@ | $(TEE) $@.out
+local : .FORCE
+ ../minipeg -o local.peg.c local.peg
+ $(CC) $(CFLAGS) -o local local.peg.c
+ ./$@ < local.peg | $(TEE) $@.out
$(DIFF) $@.ref $@.out
rm -f $@.out
@echo
-localleg : .FORCE
- ../minipeg -o localleg.leg.c localleg.leg
- $(CC) $(CFLAGS) -o localleg localleg.leg.c
- ./$@ < localleg.leg | $(TEE) $@.out
- $(DIFF) $@.ref $@.out
- rm -f $@.out
- @echo
-
erract : .FORCE
- ../minipeg -o erract.leg.c erract.leg
- $(CC) $(CFLAGS) -o erract erract.leg.c
+ ../minipeg -o erract.peg.c erract.peg
+ $(CC) $(CFLAGS) -o erract erract.peg.c
echo '6*9' | ./$@ | $(TEE) $@.out
$(DIFF) $@.ref $@.out
rm -f $@.out
@@ -96,6 +88,6 @@
@echo
clean : .FORCE
- rm -f *.o *.leg.c $(EXAMPLES)
+ rm -f *.o *.peg.c $(EXAMPLES)
.FORCE :
--- a/examples/accept.c
+++ b/examples/accept.c
@@ -1,7 +1,7 @@
#include <stdio.h>
#include <stdlib.h>
-#include "accept.leg.c"
+#include "accept.peg.c"
int main()
{
--- a/examples/accept.leg
+++ /dev/null
@@ -1,8 +1,0 @@
-start = abcd+
-
-abcd = 'a' { printf("A %d\n", yypos); } bc { printf("ABC %d\n", yypos); } &{YYACCEPT}
- | 'b' { printf("B %d\n", yypos); } cd { printf("BCD %d\n", yypos); } &{YYACCEPT}
-
-bc = 'b' { printf("B %d\n", yypos); } 'c' { printf("C %d\n", yypos); }
-
-cd = 'c' { printf("C %d\n", yypos); } 'd' { printf("D %d\n", yypos); }
\ No newline at end of file
--- /dev/null
+++ b/examples/accept.peg
@@ -1,0 +1,8 @@
+start = abcd+
+
+abcd = 'a' { printf("A %d\n", yypos); } bc { printf("ABC %d\n", yypos); } &{YYACCEPT}
+ | 'b' { printf("B %d\n", yypos); } cd { printf("BCD %d\n", yypos); } &{YYACCEPT}
+
+bc = 'b' { printf("B %d\n", yypos); } 'c' { printf("C %d\n", yypos); }
+
+cd = 'c' { printf("C %d\n", yypos); } 'd' { printf("D %d\n", yypos); }
\ No newline at end of file
--- a/examples/basic.leg
+++ /dev/null
@@ -1,361 +1,0 @@
-# A 'syntax-directed interpreter' (all execution is a side-effect of parsing).
-# Inspired by Dennis Allison's original Tiny BASIC grammar, circa 1975.
-#
-# Copyright (c) 2007 by Ian Piumarta
-# All rights reserved.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the 'Software'),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, provided that the above copyright notice(s) and this
-# permission notice appear in all copies of the Software. Acknowledgement
-# of the use of this Software in supporting documentation would be
-# appreciated but is not required.
-#
-# THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK.
-#
-# Last edited: 2012-04-29 15:14:06 by piumarta on emilia
-
-%{
-# include <stdio.h>
-
- typedef struct line line;
-
- struct line
- {
- int number;
- int length;
- char *text;
- };
-
- line *lines= 0;
- int numLines= 0;
- int pc= -1, epc= -1;
- int batch= 0;
-
- int nextline(char *buf, int max);
-
-# define min(x, y) ((x) < (y) ? (x) : (y))
-
-# define YY_INPUT(buf, result, max_size) \
- { \
- if ((pc >= 0) && (pc < numLines)) \
- { \
- line *linep= lines+pc++; \
- result= min(max_size, linep->length); \
- memcpy(buf, linep->text, result); \
- } \
- else \
- result= nextline(buf, max_size); \
- }
-
- union value {
- int number;
- char *string;
- int (*binop)(int lhs, int rhs);
- };
-
-# define YYSTYPE union value
-
- int variables[26];
-
- void accept(int number, char *line);
-
- void save(char *name);
- void load(char *name);
- void type(char *name);
-
- int lessThan(int lhs, int rhs) { return lhs < rhs; }
- int lessEqual(int lhs, int rhs) { return lhs <= rhs; }
- int notEqual(int lhs, int rhs) { return lhs != rhs; }
- int equalTo(int lhs, int rhs) { return lhs == rhs; }
- int greaterEqual(int lhs, int rhs) { return lhs >= rhs; }
- int greaterThan(int lhs, int rhs) { return lhs > rhs; }
-
- int input(void);
-
- int stack[1024], sp= 0;
-
- char *help;
-
- void error(char *fmt, ...);
- int findLine(int n, int create);
-%}
-
-line = - s:statement CR
-| - n:number < ( !CR . )* CR > { accept(n.number, yytext); }
-| - CR
-| - < ( !CR . )* CR > { epc= pc; error("syntax error"); }
-| - !. { exit(0); }
-
-statement = 'print'- expr-list
-| 'if'- e1:expression r:relop e2:expression { if (!r.binop(e1.number, e2.number)) yythunkpos= 0; }
- 'then'- statement
-| 'goto'- e:expression { epc= pc; if ((pc= findLine(e.number, 0)) < 0) error("no such line"); }
-| 'input'- var-list
-| 'let'- v:var EQUAL e:expression { variables[v.number]= e.number; }
-| 'gosub'- e:expression { epc= pc; if (sp < 1024) stack[sp++]= pc, pc= findLine(e.number, 0); else error("too many gosubs");
- if (pc < 0) error("no such line"); }
-| 'return'- { epc= pc; if ((pc= sp ? stack[--sp] : -1) < 0) error("no gosub"); }
-| 'clear'- { while (numLines) accept(lines->number, "\n"); }
-| 'list'- { int i; for (i= 0; i < numLines; ++i) printf("%5d %s", lines[i].number, lines[i].text); }
-| 'run'- s:string { load(s.string); pc= 0; }
-| 'run'- { pc= 0; }
-| 'end'- { pc= -1; if (batch) exit(0); }
-| 'rem'- ( !CR . )*
-| ('bye'|'quit'|'exit')- { exit(0); }
-| 'save'- s:string { save(s.string); }
-| 'load'- s:string { load(s.string); }
-| 'type'- s:string { type(s.string); }
-| 'dir'- { system("ls *.bas"); }
-| 'help'- { fprintf(stderr, "%s", help); }
-
-expr-list = ( e:string { printf("%s", e.string); }
- | e:expression { printf("%d", e.number); }
- )? ( COMMA ( e:string { printf("%s", e.string); }
- | e:expression { printf("%d", e.number); }
- )
- )* ( COMMA
- | !COMMA { printf("\n"); }
- )
-
-var-list = v:var { variables[v.number]= input(); }
- ( COMMA v:var { variables[v.number]= input(); }
- )*
-
-expression = ( PLUS? l:term
- | MINUS l:term { l.number = -l.number }
- ) ( PLUS r:term { l.number += r.number }
- | MINUS r:term { l.number -= r.number }
- )* { $$.number = l.number }
-
-term = l:factor ( STAR r:factor { l.number *= r.number }
- | SLASH r:factor { l.number /= r.number }
- )* { $$.number = l.number }
-
-factor = v:var { $$.number = variables[v.number] }
-| n:number
-| OPEN expression CLOSE
-
-var = < [a-z] > - { $$.number = yytext[0] - 'a' }
-
-number = < digit+ > - { $$.number = atoi(yytext); }
-
-digit = [0-9]
-
-string = '"' < [^\"]* > '"' - { $$.string = yytext; }
-
-relop = '<=' - { $$.binop= lessEqual; }
-| '<>' - { $$.binop= notEqual; }
-| '<' - { $$.binop= lessThan; }
-| '>=' - { $$.binop= greaterEqual; }
-| '>' - { $$.binop= greaterThan; }
-| '=' - { $$.binop= equalTo; }
-
-EQUAL = '=' - CLOSE = ')' - OPEN = '(' -
-SLASH = '/' - STAR = '*' - MINUS = '-' -
-PLUS = '+' - COMMA = ',' -
-
-- = [ \t]*
-
-CR = '\n' | '\r' | '\r\n'
-
-%%
-
-#include <unistd.h>
-#include <stdarg.h>
-
-char *help=
- "print <num>|<string> [, <num>|<string> ...] [,]\n"
- "if <expr> <|<=|<>|=|>=|> <expr> then <stmt>\n"
- "input <var> [, <var> ...] let <var> = <expr>\n"
- "goto <expr> gosub <expr>\n"
- "end return\n"
- "list clear\n"
- "run [\"filename\"] rem <comment...>\n"
- "dir type \"filename\"\n"
- "save \"filename\" load \"filename\"\n"
- "bye|quit|exit help\n"
- ;
-
-void error(char *fmt, ...)
-{
- va_list ap;
- va_start(ap, fmt);
- if (epc > 0)
- fprintf(stderr, "\nline %d: %s", lines[epc-1].number, lines[epc-1].text);
- else
- fprintf(stderr, "\n");
- vfprintf(stderr, fmt, ap);
- fprintf(stderr, "\n");
- va_end(ap);
- epc= pc= -1;
-}
-
-#ifdef USE_READLINE
-# include <readline/readline.h>
-# include <readline/history.h>
-#endif
-
-int nextline(char *buf, int max)
-{
- pc= -1;
- if (batch) exit(0);
- if (isatty(fileno(stdin)))
- {
-# ifdef USE_READLINE
- char *line= readline(">");
- if (line)
- {
- int len= strlen(line);
- if (len >= max) len= max - 1;
- strncpy(buf, line, len);
- (buf)[len]= '\n';
- add_history(line);
- free(line);
- return len + 1;
- }
- else
- {
- printf("\n");
- return 0;
- }
-# endif
- putchar('>');
- fflush(stdout);
- }
- return fgets(buf, max, stdin) ? strlen(buf) : 0;
-}
-
-int maxLines= 0;
-
-int findLine(int n, int create)
-{
- int lo= 0, hi= numLines - 1;
- while (lo <= hi)
- {
- int mid= (lo + hi) / 2, lno= lines[mid].number;
- if (lno > n)
- hi= mid - 1;
- else if (lno < n)
- lo= mid + 1;
- else
- return mid;
- }
- if (create)
- {
- if (numLines == maxLines)
- {
- maxLines *= 2;
- lines= realloc(lines, sizeof(line) * maxLines);
- }
- if (lo < numLines)
- memmove(lines + lo + 1, lines + lo, sizeof(line) * (numLines - lo));
- ++numLines;
- lines[lo].number= n;
- lines[lo].text= 0;
- return lo;
- }
- return -1;
-}
-
-void accept(int n, char *s)
-{
- if (s[0] < 32) /* delete */
- {
- int lno= findLine(n, 0);
- if (lno >= 0)
- {
- if (lno < numLines - 1)
- memmove(lines + lno, lines + lno + 1, sizeof(line) * (numLines - lno - 1));
- --numLines;
- }
- }
- else /* insert */
- {
- int lno= findLine(n, 1);
- if (lines[lno].text) free(lines[lno].text);
- lines[lno].length= strlen(s);
- lines[lno].text= strdup(s);
- }
-}
-
-char *extend(char *name)
-{
- static char path[1024];
- int len= strlen(name);
- sprintf(path, "%s%s", name, (((len > 4) && !strcasecmp(".bas", name + len - 4)) ? "" : ".bas"));
- return path;
-}
-
-void save(char *name)
-{
- FILE *f= fopen(name= extend(name), "w");
- if (!f)
- perror(name);
- else
- {
- int i;
- for (i= 0; i < numLines; ++i)
- fprintf(f, "%d %s", lines[i].number, lines[i].text);
- fclose(f);
- }
-}
-
-void load(char *name)
-{
- FILE *f= fopen(name= extend(name), "r");
- if (!f)
- perror(name);
- else
- {
- int lineNumber;
- char lineText[1024];
- while ((1 == fscanf(f, " %d ", &lineNumber)) && fgets(lineText, sizeof(lineText), f))
- accept(lineNumber, lineText);
- fclose(f);
- }
-}
-
-void type(char *name)
-{
- FILE *f= fopen(name= extend(name), "r");
- if (!f)
- perror(name);
- else
- {
- int c, d;
- while ((c= getc(f)) >= 0)
- putchar(d= c);
- fclose(f);
- if ('\n' != d && '\r' != d) putchar('\n');
- }
-}
-
-int input(void)
-{
- char line[32];
- fgets(line, sizeof(line), stdin);
- return atoi(line);
-}
-
-int main(int argc, char **argv)
-{
- lines= malloc(sizeof(line) * (maxLines= 32));
- numLines= 0;
-
- if (argc > 1)
- {
- batch= 1;
- while (argc-- > 1)
- load(*++argv);
- pc= 0;
- }
-
- while (!feof(stdin))
- yyparse();
-
- return 0;
-}
--- /dev/null
+++ b/examples/basic.peg
@@ -1,0 +1,361 @@
+# A 'syntax-directed interpreter' (all execution is a side-effect of parsing).
+# Inspired by Dennis Allison's original Tiny BASIC grammar, circa 1975.
+#
+# Copyright (c) 2007 by Ian Piumarta
+# All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the 'Software'),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, provided that the above copyright notice(s) and this
+# permission notice appear in all copies of the Software. Acknowledgement
+# of the use of this Software in supporting documentation would be
+# appreciated but is not required.
+#
+# THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK.
+#
+# Last edited: 2012-04-29 15:14:06 by piumarta on emilia
+
+%{
+# include <stdio.h>
+
+ typedef struct line line;
+
+ struct line
+ {
+ int number;
+ int length;
+ char *text;
+ };
+
+ line *lines= 0;
+ int numLines= 0;
+ int pc= -1, epc= -1;
+ int batch= 0;
+
+ int nextline(char *buf, int max);
+
+# define min(x, y) ((x) < (y) ? (x) : (y))
+
+# define YY_INPUT(buf, result, max_size) \
+ { \
+ if ((pc >= 0) && (pc < numLines)) \
+ { \
+ line *linep= lines+pc++; \
+ result= min(max_size, linep->length); \
+ memcpy(buf, linep->text, result); \
+ } \
+ else \
+ result= nextline(buf, max_size); \
+ }
+
+ union value {
+ int number;
+ char *string;
+ int (*binop)(int lhs, int rhs);
+ };
+
+# define YYSTYPE union value
+
+ int variables[26];
+
+ void accept(int number, char *line);
+
+ void save(char *name);
+ void load(char *name);
+ void type(char *name);
+
+ int lessThan(int lhs, int rhs) { return lhs < rhs; }
+ int lessEqual(int lhs, int rhs) { return lhs <= rhs; }
+ int notEqual(int lhs, int rhs) { return lhs != rhs; }
+ int equalTo(int lhs, int rhs) { return lhs == rhs; }
+ int greaterEqual(int lhs, int rhs) { return lhs >= rhs; }
+ int greaterThan(int lhs, int rhs) { return lhs > rhs; }
+
+ int input(void);
+
+ int stack[1024], sp= 0;
+
+ char *help;
+
+ void error(char *fmt, ...);
+ int findLine(int n, int create);
+%}
+
+line = - s:statement CR
+| - n:number < ( !CR . )* CR > { accept(n.number, yytext); }
+| - CR
+| - < ( !CR . )* CR > { epc= pc; error("syntax error"); }
+| - !. { exit(0); }
+
+statement = 'print'- expr-list
+| 'if'- e1:expression r:relop e2:expression { if (!r.binop(e1.number, e2.number)) yythunkpos= 0; }
+ 'then'- statement
+| 'goto'- e:expression { epc= pc; if ((pc= findLine(e.number, 0)) < 0) error("no such line"); }
+| 'input'- var-list
+| 'let'- v:var EQUAL e:expression { variables[v.number]= e.number; }
+| 'gosub'- e:expression { epc= pc; if (sp < 1024) stack[sp++]= pc, pc= findLine(e.number, 0); else error("too many gosubs");
+ if (pc < 0) error("no such line"); }
+| 'return'- { epc= pc; if ((pc= sp ? stack[--sp] : -1) < 0) error("no gosub"); }
+| 'clear'- { while (numLines) accept(lines->number, "\n"); }
+| 'list'- { int i; for (i= 0; i < numLines; ++i) printf("%5d %s", lines[i].number, lines[i].text); }
+| 'run'- s:string { load(s.string); pc= 0; }
+| 'run'- { pc= 0; }
+| 'end'- { pc= -1; if (batch) exit(0); }
+| 'rem'- ( !CR . )*
+| ('bye'|'quit'|'exit')- { exit(0); }
+| 'save'- s:string { save(s.string); }
+| 'load'- s:string { load(s.string); }
+| 'type'- s:string { type(s.string); }
+| 'dir'- { system("ls *.bas"); }
+| 'help'- { fprintf(stderr, "%s", help); }
+
+expr-list = ( e:string { printf("%s", e.string); }
+ | e:expression { printf("%d", e.number); }
+ )? ( COMMA ( e:string { printf("%s", e.string); }
+ | e:expression { printf("%d", e.number); }
+ )
+ )* ( COMMA
+ | !COMMA { printf("\n"); }
+ )
+
+var-list = v:var { variables[v.number]= input(); }
+ ( COMMA v:var { variables[v.number]= input(); }
+ )*
+
+expression = ( PLUS? l:term
+ | MINUS l:term { l.number = -l.number }
+ ) ( PLUS r:term { l.number += r.number }
+ | MINUS r:term { l.number -= r.number }
+ )* { $$.number = l.number }
+
+term = l:factor ( STAR r:factor { l.number *= r.number }
+ | SLASH r:factor { l.number /= r.number }
+ )* { $$.number = l.number }
+
+factor = v:var { $$.number = variables[v.number] }
+| n:number
+| OPEN expression CLOSE
+
+var = < [a-z] > - { $$.number = yytext[0] - 'a' }
+
+number = < digit+ > - { $$.number = atoi(yytext); }
+
+digit = [0-9]
+
+string = '"' < [^\"]* > '"' - { $$.string = yytext; }
+
+relop = '<=' - { $$.binop= lessEqual; }
+| '<>' - { $$.binop= notEqual; }
+| '<' - { $$.binop= lessThan; }
+| '>=' - { $$.binop= greaterEqual; }
+| '>' - { $$.binop= greaterThan; }
+| '=' - { $$.binop= equalTo; }
+
+EQUAL = '=' - CLOSE = ')' - OPEN = '(' -
+SLASH = '/' - STAR = '*' - MINUS = '-' -
+PLUS = '+' - COMMA = ',' -
+
+- = [ \t]*
+
+CR = '\n' | '\r' | '\r\n'
+
+%%
+
+#include <unistd.h>
+#include <stdarg.h>
+
+char *help=
+ "print <num>|<string> [, <num>|<string> ...] [,]\n"
+ "if <expr> <|<=|<>|=|>=|> <expr> then <stmt>\n"
+ "input <var> [, <var> ...] let <var> = <expr>\n"
+ "goto <expr> gosub <expr>\n"
+ "end return\n"
+ "list clear\n"
+ "run [\"filename\"] rem <comment...>\n"
+ "dir type \"filename\"\n"
+ "save \"filename\" load \"filename\"\n"
+ "bye|quit|exit help\n"
+ ;
+
+void error(char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ if (epc > 0)
+ fprintf(stderr, "\nline %d: %s", lines[epc-1].number, lines[epc-1].text);
+ else
+ fprintf(stderr, "\n");
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
+ epc= pc= -1;
+}
+
+#ifdef USE_READLINE
+# include <readline/readline.h>
+# include <readline/history.h>
+#endif
+
+int nextline(char *buf, int max)
+{
+ pc= -1;
+ if (batch) exit(0);
+ if (isatty(fileno(stdin)))
+ {
+# ifdef USE_READLINE
+ char *line= readline(">");
+ if (line)
+ {
+ int len= strlen(line);
+ if (len >= max) len= max - 1;
+ strncpy(buf, line, len);
+ (buf)[len]= '\n';
+ add_history(line);
+ free(line);
+ return len + 1;
+ }
+ else
+ {
+ printf("\n");
+ return 0;
+ }
+# endif
+ putchar('>');
+ fflush(stdout);
+ }
+ return fgets(buf, max, stdin) ? strlen(buf) : 0;
+}
+
+int maxLines= 0;
+
+int findLine(int n, int create)
+{
+ int lo= 0, hi= numLines - 1;
+ while (lo <= hi)
+ {
+ int mid= (lo + hi) / 2, lno= lines[mid].number;
+ if (lno > n)
+ hi= mid - 1;
+ else if (lno < n)
+ lo= mid + 1;
+ else
+ return mid;
+ }
+ if (create)
+ {
+ if (numLines == maxLines)
+ {
+ maxLines *= 2;
+ lines= realloc(lines, sizeof(line) * maxLines);
+ }
+ if (lo < numLines)
+ memmove(lines + lo + 1, lines + lo, sizeof(line) * (numLines - lo));
+ ++numLines;
+ lines[lo].number= n;
+ lines[lo].text= 0;
+ return lo;
+ }
+ return -1;
+}
+
+void accept(int n, char *s)
+{
+ if (s[0] < 32) /* delete */
+ {
+ int lno= findLine(n, 0);
+ if (lno >= 0)
+ {
+ if (lno < numLines - 1)
+ memmove(lines + lno, lines + lno + 1, sizeof(line) * (numLines - lno - 1));
+ --numLines;
+ }
+ }
+ else /* insert */
+ {
+ int lno= findLine(n, 1);
+ if (lines[lno].text) free(lines[lno].text);
+ lines[lno].length= strlen(s);
+ lines[lno].text= strdup(s);
+ }
+}
+
+char *extend(char *name)
+{
+ static char path[1024];
+ int len= strlen(name);
+ sprintf(path, "%s%s", name, (((len > 4) && !strcasecmp(".bas", name + len - 4)) ? "" : ".bas"));
+ return path;
+}
+
+void save(char *name)
+{
+ FILE *f= fopen(name= extend(name), "w");
+ if (!f)
+ perror(name);
+ else
+ {
+ int i;
+ for (i= 0; i < numLines; ++i)
+ fprintf(f, "%d %s", lines[i].number, lines[i].text);
+ fclose(f);
+ }
+}
+
+void load(char *name)
+{
+ FILE *f= fopen(name= extend(name), "r");
+ if (!f)
+ perror(name);
+ else
+ {
+ int lineNumber;
+ char lineText[1024];
+ while ((1 == fscanf(f, " %d ", &lineNumber)) && fgets(lineText, sizeof(lineText), f))
+ accept(lineNumber, lineText);
+ fclose(f);
+ }
+}
+
+void type(char *name)
+{
+ FILE *f= fopen(name= extend(name), "r");
+ if (!f)
+ perror(name);
+ else
+ {
+ int c, d;
+ while ((c= getc(f)) >= 0)
+ putchar(d= c);
+ fclose(f);
+ if ('\n' != d && '\r' != d) putchar('\n');
+ }
+}
+
+int input(void)
+{
+ char line[32];
+ fgets(line, sizeof(line), stdin);
+ return atoi(line);
+}
+
+int main(int argc, char **argv)
+{
+ lines= malloc(sizeof(line) * (maxLines= 32));
+ numLines= 0;
+
+ if (argc > 1)
+ {
+ batch= 1;
+ while (argc-- > 1)
+ load(*++argv);
+ pc= 0;
+ }
+
+ while (!feof(stdin))
+ yyparse();
+
+ return 0;
+}
--- a/examples/calc.leg
+++ /dev/null
@@ -1,46 +1,0 @@
-%{
-#include <stdio.h>
-int vars[26];
-%}
-
-Stmt = - e:Expr EOL { printf("%d\n", e); }
- | ( !EOL . )* EOL { printf("error\n"); }
-
-Expr = i:ID ASSIGN s:Sum { $$= vars[i]= s; }
- | s:Sum { $$= s; }
-
-Sum = l:Product
- ( PLUS r:Product { l += r; }
- | MINUS r:Product { l -= r; }
- )* { $$= l; }
-
-Product = l:Value
- ( TIMES r:Value { l *= r; }
- | DIVIDE r:Value { l /= r; }
- )* { $$= l; }
-
-Value = i:NUMBER { $$= atoi(yytext); }
- | i:ID !ASSIGN { $$= vars[i]; }
- | OPEN i:Expr CLOSE { $$= i; }
-
-NUMBER = < [0-9]+ > - { $$= atoi(yytext); }
-ID = < [a-z] > - { $$= yytext[0] - 'a'; }
-ASSIGN = '=' -
-PLUS = '+' -
-MINUS = '-' -
-TIMES = '*' -
-DIVIDE = '/' -
-OPEN = '(' -
-CLOSE = ')' -
-
-- = [ \t]*
-EOL = '\n' | '\r\n' | '\r' | ';'
-
-%%
-
-int main()
-{
- while (yyparse());
-
- return 0;
-}
--- /dev/null
+++ b/examples/calc.peg
@@ -1,0 +1,46 @@
+%{
+#include <stdio.h>
+int vars[26];
+%}
+
+Stmt = - e:Expr EOL { printf("%d\n", e); }
+ | ( !EOL . )* EOL { printf("error\n"); }
+
+Expr = i:ID ASSIGN s:Sum { $$= vars[i]= s; }
+ | s:Sum { $$= s; }
+
+Sum = l:Product
+ ( PLUS r:Product { l += r; }
+ | MINUS r:Product { l -= r; }
+ )* { $$= l; }
+
+Product = l:Value
+ ( TIMES r:Value { l *= r; }
+ | DIVIDE r:Value { l /= r; }
+ )* { $$= l; }
+
+Value = i:NUMBER { $$= atoi(yytext); }
+ | i:ID !ASSIGN { $$= vars[i]; }
+ | OPEN i:Expr CLOSE { $$= i; }
+
+NUMBER = < [0-9]+ > - { $$= atoi(yytext); }
+ID = < [a-z] > - { $$= yytext[0] - 'a'; }
+ASSIGN = '=' -
+PLUS = '+' -
+MINUS = '-' -
+TIMES = '*' -
+DIVIDE = '/' -
+OPEN = '(' -
+CLOSE = ')' -
+
+- = [ \t]*
+EOL = '\n' | '\r\n' | '\r' | ';'
+
+%%
+
+int main()
+{
+ while (yyparse());
+
+ return 0;
+}
--- a/examples/dc.c
+++ b/examples/dc.c
@@ -7,7 +7,7 @@
int push(int n) { return stack[++stackp]= n; }
int pop(void) { return stack[stackp--]; }
-#include "dc.leg.c"
+#include "dc.peg.c"
int main()
{
--- a/examples/dc.leg
+++ /dev/null
@@ -1,27 +1,0 @@
-# Grammar
-
-Expr = SPACE Sum EOL { printf("%d\n", pop()); }
- | (!EOL .)* EOL { printf("error\n"); }
-
-Sum = Product ( PLUS Product { int r= pop(), l= pop(); push(l + r); }
- | MINUS Product { int r= pop(), l= pop(); push(l - r); }
- )*
-
-Product = Value ( TIMES Value { int r= pop(), l= pop(); push(l * r); }
- | DIVIDE Value { int r= pop(), l= pop(); push(l / r); }
- )*
-
-Value = NUMBER { push(atoi(yytext)); }
- | OPEN Sum CLOSE
-
-# Lexemes
-
-NUMBER = < [0-9]+ > SPACE
-PLUS = '+' SPACE
-MINUS = '-' SPACE
-TIMES = '*' SPACE
-DIVIDE = '/' SPACE
-OPEN = '(' SPACE
-CLOSE = ')' SPACE
-SPACE = [ \t]*
-EOL = '\n' | '\r\n' | '\r'
--- /dev/null
+++ b/examples/dc.peg
@@ -1,0 +1,27 @@
+# Grammar
+
+Expr = SPACE Sum EOL { printf("%d\n", pop()); }
+ | (!EOL .)* EOL { printf("error\n"); }
+
+Sum = Product ( PLUS Product { int r= pop(), l= pop(); push(l + r); }
+ | MINUS Product { int r= pop(), l= pop(); push(l - r); }
+ )*
+
+Product = Value ( TIMES Value { int r= pop(), l= pop(); push(l * r); }
+ | DIVIDE Value { int r= pop(), l= pop(); push(l / r); }
+ )*
+
+Value = NUMBER { push(atoi(yytext)); }
+ | OPEN Sum CLOSE
+
+# Lexemes
+
+NUMBER = < [0-9]+ > SPACE
+PLUS = '+' SPACE
+MINUS = '-' SPACE
+TIMES = '*' SPACE
+DIVIDE = '/' SPACE
+OPEN = '(' SPACE
+CLOSE = ')' SPACE
+SPACE = [ \t]*
+EOL = '\n' | '\r\n' | '\r'
--- a/examples/dcv.c
+++ b/examples/dcv.c
@@ -10,7 +10,7 @@
int pop(void) { return stack[stackp--]; }
int top(void) { return stack[stackp]; }
-#include "dcv.leg.c"
+#include "dcv.peg.c"
int main()
{
--- a/examples/dcv.leg
+++ /dev/null
@@ -1,32 +1,0 @@
-
-Stmt = SPACE Expr EOL { printf("%d\n", pop()); }
- | (!EOL .)* EOL { printf("error\n"); }
-
-Expr = ID { var= yytext[0] } ASSIGN Sum { vars[var - 'a']= top(); }
- | Sum
-
-Sum = Product ( PLUS Product { int r= pop(), l= pop(); push(l + r); }
- | MINUS Product { int r= pop(), l= pop(); push(l - r); }
- )*
-
-Product = Value ( TIMES Value { int r= pop(), l= pop(); push(l * r); }
- | DIVIDE Value { int r= pop(), l= pop(); push(l / r); }
- )*
-
-Value = NUMBER { push(atoi(yytext)); }
- | < ID > !ASSIGN { push(vars[yytext[0] - 'a']); }
- | OPEN Expr CLOSE
-
-
-NUMBER = < [0-9]+ > SPACE
-ID = < [a-z] > SPACE
-ASSIGN = '=' SPACE
-PLUS = '+' SPACE
-MINUS = '-' SPACE
-TIMES = '*' SPACE
-DIVIDE = '/' SPACE
-OPEN = '(' SPACE
-CLOSE = ')' SPACE
-
-SPACE = [ \t]*
-EOL = '\n' | '\r\n' | '\r' | ';'
--- /dev/null
+++ b/examples/dcv.peg
@@ -1,0 +1,32 @@
+
+Stmt = SPACE Expr EOL { printf("%d\n", pop()); }
+ | (!EOL .)* EOL { printf("error\n"); }
+
+Expr = ID { var= yytext[0] } ASSIGN Sum { vars[var - 'a']= top(); }
+ | Sum
+
+Sum = Product ( PLUS Product { int r= pop(), l= pop(); push(l + r); }
+ | MINUS Product { int r= pop(), l= pop(); push(l - r); }
+ )*
+
+Product = Value ( TIMES Value { int r= pop(), l= pop(); push(l * r); }
+ | DIVIDE Value { int r= pop(), l= pop(); push(l / r); }
+ )*
+
+Value = NUMBER { push(atoi(yytext)); }
+ | < ID > !ASSIGN { push(vars[yytext[0] - 'a']); }
+ | OPEN Expr CLOSE
+
+
+NUMBER = < [0-9]+ > SPACE
+ID = < [a-z] > SPACE
+ASSIGN = '=' SPACE
+PLUS = '+' SPACE
+MINUS = '-' SPACE
+TIMES = '*' SPACE
+DIVIDE = '/' SPACE
+OPEN = '(' SPACE
+CLOSE = ')' SPACE
+
+SPACE = [ \t]*
+EOL = '\n' | '\r\n' | '\r' | ';'
--- a/examples/erract.leg
+++ /dev/null
@@ -1,27 +1,0 @@
-%{
-#include <stdio.h>
-%}
-
-Expr = a:NUMBER PLUS ~{ printf("fail at PLUS\n") } b:NUMBER { printf("got addition\n"); }
- | ( a:NUMBER MINUS b:NUMBER { printf("got subtraction\n"); } ) ~{ printf("fail at subtraction\n") }
- | a:NUMBER TIMES b:NUMBER { printf("got multiplication\n"); }
- | a:NUMBER DIVIDE b:NUMBER { printf("got division\n"); }
-
-NUMBER = < [0-9]+ > - { $$= atoi(yytext); }
-PLUS = '+' -
-MINUS = '-' -
-TIMES = '*' -
-DIVIDE = '/' -
-
-- = (SPACE | EOL)*
-SPACE = [ \t]
-EOL = '\n' | '\r\n' | '\r' | ';'
-
-%%
-
-int main()
-{
- while (yyparse());
-
- return 0;
-}
--- /dev/null
+++ b/examples/erract.peg
@@ -1,0 +1,27 @@
+%{
+#include <stdio.h>
+%}
+
+Expr = a:NUMBER PLUS ~{ printf("fail at PLUS\n") } b:NUMBER { printf("got addition\n"); }
+ | ( a:NUMBER MINUS b:NUMBER { printf("got subtraction\n"); } ) ~{ printf("fail at subtraction\n") }
+ | a:NUMBER TIMES b:NUMBER { printf("got multiplication\n"); }
+ | a:NUMBER DIVIDE b:NUMBER { printf("got division\n"); }
+
+NUMBER = < [0-9]+ > - { $$= atoi(yytext); }
+PLUS = '+' -
+MINUS = '-' -
+TIMES = '*' -
+DIVIDE = '/' -
+
+- = (SPACE | EOL)*
+SPACE = [ \t]
+EOL = '\n' | '\r\n' | '\r' | ';'
+
+%%
+
+int main()
+{
+ while (yyparse());
+
+ return 0;
+}
--- a/examples/left.c
+++ b/examples/left.c
@@ -7,7 +7,7 @@
if (EOF != c) printf("<%c>\n", c); \
}
-#include "left.leg.c"
+#include "left.peg.c"
int main()
{
--- a/examples/left.leg
+++ /dev/null
@@ -1,2 +1,0 @@
-
-S = (S 'a' | 'a') !'a'
--- /dev/null
+++ b/examples/left.peg
@@ -1,0 +1,2 @@
+
+S = (S 'a' | 'a') !'a'
--- /dev/null
+++ b/examples/local.peg
@@ -1,0 +1,24 @@
+%{
+#define YY_CTX_LOCAL 1
+#define YY_CTX_MEMBERS \
+ int count;
+%}
+
+Char = ('\n' | '\r\n' | '\r') { yy->count++ }
+ | .
+
+%%
+
+#include <stdio.h>
+#include <string.h>
+
+int main()
+{
+ yycontext yy;
+ memset(&yy, 0, sizeof(yy));
+ while (yyparse(&yy))
+ ;
+ printf("%d newlines\n", yy.count);
+ yyrelease(&yy);
+ return 0;
+}
--- /dev/null
+++ b/examples/local.ref
@@ -1,0 +1,1 @@
+24 newlines
--- a/examples/localleg.leg
+++ /dev/null
@@ -1,24 +1,0 @@
-%{
-#define YY_CTX_LOCAL 1
-#define YY_CTX_MEMBERS \
- int count;
-%}
-
-Char = ('\n' | '\r\n' | '\r') { yy->count++ }
- | .
-
-%%
-
-#include <stdio.h>
-#include <string.h>
-
-int main()
-{
- yycontext yy;
- memset(&yy, 0, sizeof(yy));
- while (yyparse(&yy))
- ;
- printf("%d newlines\n", yy.count);
- yyrelease(&yy);
- return 0;
-}
--- a/examples/localleg.ref
+++ /dev/null
@@ -1,1 +1,0 @@
-24 newlines
--- a/examples/rule.c
+++ b/examples/rule.c
@@ -1,7 +1,7 @@
#include <stdio.h>
#include <stdlib.h>
-#include "rule.leg.c"
+#include "rule.peg.c"
int main()
{
--- a/examples/rule.leg
+++ /dev/null
@@ -1,8 +1,0 @@
-start = abcd+
-
-abcd = 'a' { printf("A %d\n", yypos); } bc { printf("ABC %d\n", yypos); }
- | 'b' { printf("B %d\n", yypos); } cd { printf("BCD %d\n", yypos); };
-
-bc = 'b' { printf("B %d\n", yypos); } 'c' { printf("C %d\n", yypos); };
-
-cd = 'c' { printf("C %d\n", yypos); } 'd' { printf("D %d\n", yypos); };
--- /dev/null
+++ b/examples/rule.peg
@@ -1,0 +1,8 @@
+start = abcd+
+
+abcd = 'a' { printf("A %d\n", yypos); } bc { printf("ABC %d\n", yypos); }
+ | 'b' { printf("B %d\n", yypos); } cd { printf("BCD %d\n", yypos); };
+
+bc = 'b' { printf("B %d\n", yypos); } 'c' { printf("C %d\n", yypos); };
+
+cd = 'c' { printf("C %d\n", yypos); } 'd' { printf("D %d\n", yypos); };
--- a/examples/test.leg
+++ /dev/null
@@ -1,13 +1,0 @@
-start = body '.' { printf(".\n"); };
-
-body = 'a' { printf("a1 "); } 'b' { printf("ab1 "); }
-
- | 'a' { printf("a2 "); } 'c' { printf("ac2 "); }
-
- | 'a' { printf("a3 "); } ( 'd' { printf("ad3 "); } | 'e' { printf("ae3 "); } )
-
- | 'a' { printf("a4 "); } ( 'f' { printf("af4 "); } 'g' { printf("afg4 "); }
- | 'f' { printf("af5 "); } 'h' { printf("afh5 "); } )
-
- | 'a' { printf("a6 "); } ( 'f' &{ printf("af6 ") } 'i' &{ printf("afi6 ") }
- | 'f' &{ printf("af7 ") } 'j' &{ printf("afj7 ") } );
--- /dev/null
+++ b/examples/test.peg
@@ -1,0 +1,13 @@
+start = body '.' { printf(".\n"); };
+
+body = 'a' { printf("a1 "); } 'b' { printf("ab1 "); }
+
+ | 'a' { printf("a2 "); } 'c' { printf("ac2 "); }
+
+ | 'a' { printf("a3 "); } ( 'd' { printf("ad3 "); } | 'e' { printf("ae3 "); } )
+
+ | 'a' { printf("a4 "); } ( 'f' { printf("af4 "); } 'g' { printf("afg4 "); }
+ | 'f' { printf("af5 "); } 'h' { printf("afh5 "); } )
+
+ | 'a' { printf("a6 "); } ( 'f' &{ printf("af6 ") } 'i' &{ printf("afi6 ") }
+ | 'f' &{ printf("af7 ") } 'j' &{ printf("afj7 ") } );
--- a/examples/username.leg
+++ /dev/null
@@ -1,14 +1,0 @@
-%{
-#include <unistd.h>
-%}
-
-start = "username" { printf("%s", getlogin()); }
-| < . > { putchar(yytext[0]); }
-
-%%
-
-int main()
-{
- while (yyparse());
- return 0;
-}
--- /dev/null
+++ b/examples/username.peg
@@ -1,0 +1,14 @@
+%{
+#include <unistd.h>
+%}
+
+start = "username" { printf("%s", getlogin()); }
+| < . > { putchar(yytext[0]); }
+
+%%
+
+int main()
+{
+ while (yyparse());
+ return 0;
+}
--- a/examples/wc.leg
+++ /dev/null
@@ -1,22 +1,0 @@
-%{
-#include <stdio.h>
-int lines= 0, words= 0, chars= 0;
-%}
-
-start = (line | word | char)
-
-line = < (( '\n' '\r'* ) | ( '\r' '\n'* )) > { lines++; chars += yyleng; }
-word = < [a-zA-Z]+ > { words++; chars += yyleng; printf("<%s>\n", yytext); }
-char = . { chars++; }
-
-%%
-
-int main()
-{
- while (yyparse())
- ;
- printf("%d lines\n", lines);
- printf("%d chars\n", chars);
- printf("%d words\n", words);
- return 0;
-}
--- /dev/null
+++ b/examples/wc.peg
@@ -1,0 +1,22 @@
+%{
+#include <stdio.h>
+int lines= 0, words= 0, chars= 0;
+%}
+
+start = (line | word | char)
+
+line = < (( '\n' '\r'* ) | ( '\r' '\n'* )) > { lines++; chars += yyleng; }
+word = < [a-zA-Z]+ > { words++; chars += yyleng; printf("<%s>\n", yytext); }
+char = . { chars++; }
+
+%%
+
+int main()
+{
+ while (yyparse())
+ ;
+ printf("%d lines\n", lines);
+ printf("%d chars\n", chars);
+ printf("%d words\n", words);
+ return 0;
+}
--- /dev/null
+++ b/minipeg.1
@@ -1,0 +1,1147 @@
+.\" Copyright (c) 2007,2016 by Ian Piumarta
+.\" All rights reserved.
+.\"
+.\" Permission is hereby granted, free of charge, to any person obtaining a
+.\" copy of this software and associated documentation files (the 'Software'),
+.\" to deal in the Software without restriction, including without limitation
+.\" the rights to use, copy, modify, merge, publish, distribute, and/or sell
+.\" copies of the Software, and to permit persons to whom the Software is
+.\" furnished to do so, provided that the above copyright notice(s) and this
+.\" permission notice appear in all copies of the Software. Acknowledgement
+.\" of the use of this Software in supporting documentation would be
+.\" appreciated but is not required.
+.\"
+.\" THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK.
+.\"
+.\" Last edited: 2016-07-22 09:47:29 by piumarta on zora.local
+.\"
+.TH PEG 1 "September 2013" "Version 0.1"
+.SH NAME
+peg, leg \- parser generators
+.SH SYNOPSIS
+.B peg
+.B [\-hvV \-ooutput]
+.I [filename ...]
+.sp 0
+.B leg
+.B [\-hvV \-ooutput]
+.I [filename ...]
+.SH DESCRIPTION
+.I peg
+and
+.I leg
+are tools for generating recursive\-descent parsers: programs that
+perform pattern matching on text. They process a Parsing Expression
+Grammar (PEG) [Ford 2004] to produce a program that recognises legal
+sentences of that grammar.
+.I peg
+processes PEGs written using the original syntax described by Ford;
+.I leg
+processes PEGs written using slightly different syntax and conventions
+that are intended to make it an attractive replacement for parsers
+built with
+.IR lex (1)
+and
+.IR yacc (1).
+Unlike
+.I lex
+and
+.IR yacc ,
+.I peg
+and
+.I leg
+support unlimited backtracking, provide ordered choice as a means for
+disambiguation, and can combine scanning (lexical analysis) and
+parsing (syntactic analysis) into a single activity.
+.PP
+.I peg
+reads the specified
+.IR filename s,
+or standard input if no
+.IR filename s
+are given, for a grammar describing the parser to generate.
+.I peg
+then generates a C source file that defines a function
+.IR yyparse().
+This C source file can be included in, or compiled and then linked
+with, a client program. Each time the client program calls
+.IR yyparse ()
+the parser consumes input text according to the parsing rules,
+starting from the first rule in the grammar.
+.IR yyparse ()
+returns non\-zero if the input could be parsed according to the
+grammar; it returns zero if the input could not be parsed.
+.PP
+The prefix 'yy' or 'YY' is prepended to all externally\-visible symbols
+in the generated parser. This is intended to reduce the risk of
+namespace pollution in client programs. (The choice of 'yy' is
+historical; see
+.IR lex (1)
+and
+.IR yacc (1),
+for example.)
+.SH OPTIONS
+.I peg
+and
+.I leg
+provide the following options:
+.TP
+.B \-h
+prints a summary of available options and then exits.
+.TP
+.B \-ooutput
+writes the generated parser to the file
+.B output
+instead of the standard output.
+.TP
+.B \-P
+suppresses #line directives in the output.
+.TP
+.B \-v
+writes verbose information to standard error while working.
+.TP
+.B \-V
+writes version information to standard error then exits.
+.SH A SIMPLE EXAMPLE
+The following
+.I peg
+input specifies a grammar with a single rule (called 'start') that is
+satisfied when the input contains the string "username".
+.nf
+
+ start <\- "username"
+
+.fi
+(The quotation marks are
+.I not
+part of the matched text; they serve to indicate a literal string to
+be matched.) In other words,
+.IR yyparse ()
+in the generated C source will return non\-zero only if the next eight
+characters read from the input spell the word "username". If the
+input contains anything else,
+.IR yyparse ()
+returns zero and no input will have been consumed. (Subsequent calls
+to
+.IR yyparse ()
+will also return zero, since the parser is effectively blocked looking
+for the string "username".) To ensure progress we can add an
+alternative clause to the 'start' rule that will match any single
+character if "username" is not found.
+.nf
+
+ start <\- "username"
+ / .
+
+.fi
+.IR yyparse ()
+now always returns non\-zero (except at the very end of the input). To
+do something useful we can add actions to the rules. These actions
+are performed after a complete match is found (starting from the first
+rule) and are chosen according to the 'path' taken through the grammar
+to match the input. (Linguists would call this path a 'phrase
+marker'.)
+.nf
+
+ start <\- "username" { printf("%s\\n", getlogin()); }
+ / < . > { putchar(yytext[0]); }
+
+.fi
+The first line instructs the parser to print the user's login name
+whenever it sees "username" in the input. If that match fails, the
+second line tells the parser to echo the next character on the input
+the standard output. Our parser is now performing useful work: it
+will copy the input to the output, replacing all occurrences of
+"username" with the user's account name.
+.PP
+Note the angle brackets ('<' and '>') that were added to the second
+alternative. These have no effect on the meaning of the rule, but
+serve to delimit the text made available to the following action in
+the variable
+.IR yytext .
+.PP
+If the above grammar is placed in the file
+.BR username.peg ,
+running the command
+.nf
+
+ peg \-o username.c username.peg
+
+.fi
+will save the corresponding parser in the file
+.BR username.c .
+To create a complete program this parser could be included by a C
+program as follows.
+.nf
+
+ #include <stdio.h> /* printf(), putchar() */
+ #include <unistd.h> /* getlogin() */
+
+ #include "username.c" /* yyparse() */
+
+ int main()
+ {
+ while (yyparse()) /* repeat until EOF */
+ ;
+ return 0;
+ }
+.fi
+.SH PEG GRAMMARS
+A grammar consists of a set of named rules.
+.nf
+
+ name <\- pattern
+
+.fi
+The
+.B pattern
+contains one or more of the following elements.
+.TP
+.B name
+The element stands for the entire pattern in the rule with the given
+.BR name .
+.TP
+.BR \(dq characters \(dq
+A character or string enclosed in double quotes is matched literally.
+The ANSI C escape sequences are recognised within the
+.IR characters .
+.TP
+.BR ' characters '
+A character or string enclosed in single quotes is matched literally, as above.
+.TP
+.BR [ characters ]
+A set of characters enclosed in square brackets matches any single
+character from the set, with escape characters recognised as above.
+If the set begins with an uparrow (^) then the set is negated (the
+element matches any character
+.I not
+in the set). Any pair of characters separated with a dash (\-)
+represents the range of characters from the first to the second,
+inclusive. A single alphabetic character or underscore is matched by
+the following set.
+.nf
+
+ [a\-zA\-Z_]
+
+.fi
+Similarly, the following matches any single non\-digit character.
+.nf
+
+ [^0\-9]
+
+.fi
+.TP
+.B .
+A dot matches any character. Note that the only time this fails is at
+the end of file, where there is no character to match.
+.TP
+.BR ( \ pattern\ )
+Parentheses are used for grouping (modifying the precedence of the
+operators described below).
+.TP
+.BR { \ action\ }
+Curly braces surround actions. The action is arbitrary C source code
+to be executed at the end of matching. Any braces within the action
+must be properly nested. Any input text that was matched before the
+action and delimited by angle brackets (see below) is made available
+within the action as the contents of the character array
+.IR yytext .
+The length of (number of characters in)
+.I yytext
+is available in the variable
+.IR yyleng .
+(These variable names are historical; see
+.IR lex (1).)
+.TP
+.B <
+An opening angle bracket always matches (consuming no input) and
+causes the parser to begin accumulating matched text. This text will
+be made available to actions in the variable
+.IR yytext .
+.TP
+.B >
+A closing angle bracket always matches (consuming no input) and causes
+the parser to stop accumulating text for
+.IR yytext .
+.PP
+The above
+.IR element s
+can be made optional and/or repeatable with the following suffixes:
+.TP
+.RB element\ ?
+The element is optional. If present on the input, it is consumed and
+the match succeeds. If not present on the input, no text is consumed
+and the match succeeds anyway.
+.TP
+.RB element\ +
+The element is repeatable. If present on the input, one or more
+occurrences of
+.I element
+are consumed and the match succeeds. If no occurrences of
+.I element
+are present on the input, the match fails.
+.TP
+.RB element\ *
+The element is optional and repeatable. If present on the input, one or more
+occurrences of
+.I element
+are consumed and the match succeeds. If no occurrences of
+.I element
+are present on the input, the match succeeds anyway.
+.PP
+The above elements and suffixes can be converted into predicates (that
+match arbitrary input text and subsequently succeed or fail
+.I without
+consuming that input) with the following prefixes:
+.TP
+.BR & \ element
+The predicate succeeds only if
+.I element
+can be matched. Input text scanned while matching
+.I element
+is not consumed from the input and remains available for subsequent
+matching.
+.TP
+.BR ! \ element
+The predicate succeeds only if
+.I element
+cannot be matched. Input text scanned while matching
+.I element
+is not consumed from the input and remains available for subsequent
+matching. A popular idiom is
+.nf
+
+ !.
+
+.fi
+which matches the end of file, after the last character of the input
+has already been consumed.
+.PP
+A special form of the '&' predicate is provided:
+.TP
+.BR & {\ expression\ }
+In this predicate the simple C
+.I expression
+.RB ( not
+statement) is evaluated immediately when the parser reaches the
+predicate. If the
+.I expression
+yields non\-zero (true) the 'match' succeeds and the parser continues
+with the next element in the pattern. If the
+.I expression
+yields zero (false) the 'match' fails and the parser backs up to look
+for an alternative parse of the input.
+.PP
+Several elements (with or without prefixes and suffixes) can be
+combined into a
+.I sequence
+by writing them one after the other. The entire sequence matches only
+if each individual element within it matches, from left to right.
+.PP
+Sequences can be separated into disjoint alternatives by the
+alternation operator '/'.
+.TP
+.RB sequence\-1\ / \ sequence\-2\ / \ ...\ / \ sequence\-N
+Each sequence is tried in turn until one of them matches, at which
+time matching for the overall pattern succeeds. If none of the
+sequences matches then the match of the overall pattern fails.
+.PP
+Finally, the pound sign (#) introduces a comment (discarded) that
+continues until the end of the line.
+.PP
+To summarise the above, the parser tries to match the input text
+against a pattern containing literals, names (representing other
+rules), and various operators (written as prefixes, suffixes,
+juxtaposition for sequencing and and infix alternation operator) that
+modify how the elements within the pattern are matched. Matches are
+made from left to right, 'descending' into named sub\-rules as they are
+encountered. If the matching process fails, the parser 'back tracks'
+('rewinding' the input appropriately in the process) to find the
+nearest alternative 'path' through the grammar. In other words the
+parser performs a depth\-first, left\-to\-right search for the first
+successfully\-matching path through the rules. If found, the actions
+along the successful path are executed (in the order they were
+encountered).
+.PP
+Note that predicates are evaluated
+.I immediately
+during the search for a successful match, since they contribute to the
+success or failure of the search. Actions, however, are evaluated
+only after a successful match has been found.
+.SH PEG GRAMMAR FOR PEG GRAMMARS
+The grammar for
+.I peg
+grammars is shown below. This will both illustrate and formalise
+the above description.
+.nf
+
+ Grammar <\- Spacing Definition+ EndOfFile
+
+ Definition <\- Identifier LEFTARROW Expression
+ Expression <\- Sequence ( SLASH Sequence )*
+ Sequence <\- Prefix*
+ Prefix <\- AND Action
+ / ( AND | NOT )? Suffix
+ Suffix <\- Primary ( QUERY / STAR / PLUS )?
+ Primary <\- Identifier !LEFTARROW
+ / OPEN Expression CLOSE
+ / Literal
+ / Class
+ / DOT
+ / Action
+ / BEGIN
+ / END
+
+ Identifier <\- < IdentStart IdentCont* > Spacing
+ IdentStart <\- [a\-zA\-Z_]
+ IdentCont <\- IdentStart / [0\-9]
+ Literal <\- ['] < ( !['] Char )* > ['] Spacing
+ / ["] < ( !["] Char )* > ["] Spacing
+ Class <\- '[' < ( !']' Range )* > ']' Spacing
+ Range <\- Char '\-' Char / Char
+ Char <\- '\\\\' [abefnrtv'"\\[\\]\\\\]
+ / '\\\\' [0\-3][0\-7][0\-7]
+ / '\\\\' [0\-7][0\-7]?
+ / '\\\\' '\-'
+ / !'\\\\' .
+ LEFTARROW <\- '<\-' Spacing
+ SLASH <\- '/' Spacing
+ AND <\- '&' Spacing
+ NOT <\- '!' Spacing
+ QUERY <\- '?' Spacing
+ STAR <\- '*' Spacing
+ PLUS <\- '+' Spacing
+ OPEN <\- '(' Spacing
+ CLOSE <\- ')' Spacing
+ DOT <\- '.' Spacing
+ Spacing <\- ( Space / Comment )*
+ Comment <\- '#' ( !EndOfLine . )* EndOfLine
+ Space <\- ' ' / '\\t' / EndOfLine
+ EndOfLine <\- '\\r\\n' / '\\n' / '\\r'
+ EndOfFile <\- !.
+ Action <\- '{' < [^}]* > '}' Spacing
+ BEGIN <\- '<' Spacing
+ END <\- '>' Spacing
+
+.fi
+.SH LEG GRAMMARS
+.I leg
+is a variant of
+.I peg
+that adds some features of
+.IR lex (1)
+and
+.IR yacc (1).
+It differs from
+.I peg
+in the following ways.
+.TP
+.BI %{\ text... \ %}
+A declaration section can appear anywhere that a rule definition is
+expected. The
+.I text
+between the delimiters '%{' and '%}' is copied verbatim to the
+generated C parser code
+.I before
+the code that implements the parser itself.
+.TP
+.IB name\ = \ pattern
+The 'assignment' operator replaces the left arrow operator '<\-'.
+.TP
+.B rule\-name
+Hyphens can appear as letters in the names of rules. Each hyphen is
+converted into an underscore in the generated C source code. A
+single hyphen '\-' is a legal rule name.
+.nf
+
+ \- = [ \\t\\n\\r]*
+ number = [0\-9]+ \-
+ name = [a\-zA\-Z_][a\-zA_Z_0\-9]* \-
+ l\-paren = '(' \-
+ r\-paren = ')' \-
+
+.fi
+This example shows how ignored whitespace can be obvious when reading
+the grammar and yet unobtrusive when placed liberally at the end of
+every rule associated with a lexical element.
+.TP
+.IB seq\-1\ | \ seq\-2
+The alternation operator is vertical bar '|' rather than forward
+slash '/'. The
+.I peg
+rule
+.nf
+
+ name <\- sequence\-1
+ / sequence\-2
+ / sequence\-3
+
+.fi
+is therefore written
+.nf
+
+ name = sequence\-1
+ | sequence\-2
+ | sequence\-3
+ ;
+
+.fi
+in
+.I leg
+(with the final semicolon being optional, as described next).
+.TP
+.IB @{\ action\ }
+Actions prefixed with an 'at' symbol will be performed during parsing,
+at the time they are encountered while matching the input text with a
+rule.
+Because of back-tracking in the PEG parsing algorithm, actions
+prefixed with '@' might be performed multiple times for the same input
+text.
+(The usual behviour of actions is that they are saved up until
+matching is complete, and then those that are part of the
+final derivation are performed in left-to-right order.)
+The variable
+.I yytext
+is available within these actions.
+.TP
+.IB exp \ ~ \ {\ action\ }
+A postfix operator
+.BI ~ {\ action\ }
+can be placed after any expression and will behave like a normal
+action (arbitrary C code) except that it is invoked only when
+.I exp
+fails. It binds less tightly than any other operator except alternation and sequencing, and
+is intended to make error handling and recovery code easier to write.
+Note that
+.I yytext
+and
+.I yyleng
+are not available inside these actions, but the pointer variable
+.I yy
+is available to give the code access to any user\-defined members
+of the parser state (see "CUSTOMISING THE PARSER" below).
+Note also that
+.I exp
+is always a single expression; to invoke an error action for any
+failure within a sequence, parentheses must be used to group the
+sequence into a single expression.
+.nf
+
+ rule = e1 e2 e3 ~{ error("e[12] ok; e3 has failed"); }
+ | ...
+
+ rule = (e1 e2 e3) ~{ error("one of e[123] has failed"); }
+ | ...
+.fi
+.TP
+.IB pattern\ ;
+A semicolon punctuator can optionally terminate a
+.IR pattern .
+.TP
+.BI %% \ text...
+A double percent '%%' terminates the rules (and declarations) section of
+the grammar. All
+.I text
+following '%%' is copied verbatim to the generated C parser code
+.I after
+the parser implementation code.
+.TP
+.BI $$\ = \ value
+A sub\-rule can return a semantic
+.I value
+from an action by assigning it to the pseudo\-variable '$$'. All
+semantic values must have the same type (which defaults to 'int').
+This type can be changed by defining YYSTYPE in a declaration section.
+.TP
+.IB identifier : name
+The semantic value returned (by assigning to '$$') from the sub\-rule
+.I name
+is associated with the
+.I identifier
+and can be referred to in subsequent actions.
+.PP
+The desk calculator example below illustrates the use of '$$' and ':'.
+.SH LEG EXAMPLE: A DESK CALCULATOR
+The extensions in
+.I leg
+described above allow useful parsers and evaluators (including
+declarations, grammar rules, and supporting C functions such
+as 'main') to be kept within a single source file. To illustrate this
+we show a simple desk calculator supporting the four common arithmetic
+operators and named variables. The intermediate results of arithmetic
+evaluation will be accumulated on an implicit stack by returning them
+as semantic values from sub\-rules.
+.nf
+
+ %{
+ #include <stdio.h> /* printf() */
+ #include <stdlib.h> /* atoi() */
+ int vars[26];
+ %}
+
+ Stmt = \- e:Expr EOL { printf("%d\\n", e); }
+ | ( !EOL . )* EOL { printf("error\\n"); }
+
+ Expr = i:ID ASSIGN s:Sum { $$ = vars[i] = s; }
+ | s:Sum { $$ = s; }
+
+ Sum = l:Product
+ ( PLUS r:Product { l += r; }
+ | MINUS r:Product { l \-= r; }
+ )* { $$ = l; }
+
+ Product = l:Value
+ ( TIMES r:Value { l *= r; }
+ | DIVIDE r:Value { l /= r; }
+ )* { $$ = l; }
+
+ Value = i:NUMBER { $$ = atoi(yytext); }
+ | i:ID !ASSIGN { $$ = vars[i]; }
+ | OPEN i:Expr CLOSE { $$ = i; }
+
+ NUMBER = < [0\-9]+ > \- { $$ = atoi(yytext); }
+ ID = < [a\-z] > \- { $$ = yytext[0] \- 'a'; }
+ ASSIGN = '=' \-
+ PLUS = '+' \-
+ MINUS = '\-' \-
+ TIMES = '*' \-
+ DIVIDE = '/' \-
+ OPEN = '(' \-
+ CLOSE = ')' \-
+
+ \- = [ \\t]*
+ EOL = '\\n' | '\\r\\n' | '\\r' | ';'
+
+ %%
+
+ int main()
+ {
+ while (yyparse())
+ ;
+ return 0;
+ }
+
+.fi
+.SH LEG GRAMMAR FOR LEG GRAMMARS
+The grammar for
+.I leg
+grammars is shown below. This will both illustrate and formalise the
+above description.
+.nf
+
+ grammar = \-
+ ( declaration | definition )+
+ trailer? end\-of\-file
+
+ declaration = '%{' < ( !'%}' . )* > RPERCENT
+
+ trailer = '%%' < .* >
+
+ definition = identifier EQUAL expression SEMICOLON?
+
+ expression = sequence ( BAR sequence )*
+
+ sequence = error+
+
+ error = prefix ( TILDE action )?
+
+ prefix = AND action
+ | ( AND | NOT )? suffix
+
+ suffix = primary ( QUERY | STAR | PLUS )?
+
+ primary = identifier COLON identifier !EQUAL
+ | identifier !EQUAL
+ | OPEN expression CLOSE
+ | literal
+ | class
+ | DOT
+ | action
+ | BEGIN
+ | END
+
+ identifier = < [\-a\-zA\-Z_][\-a\-zA\-Z_0\-9]* > \-
+
+ literal = ['] < ( !['] char )* > ['] \-
+ | ["] < ( !["] char )* > ["] \-
+
+ class = '[' < ( !']' range )* > ']' \-
+
+ range = char '\-' char | char
+
+ char = '\\\\' [abefnrtv'"\\[\\]\\\\]
+ | '\\\\' [0\-3][0\-7][0\-7]
+ | '\\\\' [0\-7][0\-7]?
+ | !'\\\\' .
+
+ action = '{' < braces* > '}' \-
+
+ braces = '{' braces* '}'
+ | !'}' .
+
+ EQUAL = '=' \-
+ COLON = ':' \-
+ SEMICOLON = ';' \-
+ BAR = '|' \-
+ AND = '&' \-
+ NOT = '!' \-
+ QUERY = '?' \-
+ STAR = '*' \-
+ PLUS = '+' \-
+ OPEN = '(' \-
+ CLOSE = ')' \-
+ DOT = '.' \-
+ BEGIN = '<' \-
+ END = '>' \-
+ TILDE = '~' \-
+ RPERCENT = '%}' \-
+
+ \- = ( space | comment )*
+ space = ' ' | '\\t' | end\-of\-line
+ comment = '#' ( !end\-of\-line . )* end\-of\-line
+ end\-of\-line = '\\r\\n' | '\\n' | '\\r'
+ end\-of\-file = !.
+
+.fi
+.SH CUSTOMISING THE PARSER
+The following symbols can be redefined in declaration sections to
+modify the generated parser code.
+.TP
+.B YYSTYPE
+The semantic value type. The pseudo\-variable '$$' and the
+identifiers 'bound' to rule results with the colon operator ':' should
+all be considered as being declared to have this type. The default
+value is 'int'.
+.TP
+.B YYPARSE
+The name of the main entry point to the parser. The default value
+is 'yyparse'.
+.TP
+.B YYPARSEFROM
+The name of an alternative entry point to the parser. This function
+expects one argument: the function corresponding to the rule from
+which the search for a match should begin. The default
+is 'yyparsefrom'. Note that yyparse() is defined as
+.nf
+
+ int yyparse() { return yyparsefrom(yy_foo); }
+
+.fi
+where 'foo' is the name of the first rule in the grammar.
+.TP
+.BI YY_INPUT( buf , \ result , \ max_size )
+This macro is invoked by the parser to obtain more input text.
+.I buf
+points to an area of memory that can hold at most
+.I max_size
+characters. The macro should copy input text to
+.I buf
+and then assign the integer variable
+.I result
+to indicate the number of characters copied. If no more input is available,
+the macro should assign 0 to
+.IR result .
+By default, the YY_INPUT macro is defined as follows.
+.nf
+
+ #define YY_INPUT(buf, result, max_size) \\
+ { \\
+ int yyc= getchar(); \\
+ result= (EOF == yyc) ? 0 : (*(buf)= yyc, 1); \\
+ }
+
+.fi
+Note that if YY_CTX_LOCAL is defined (see below) then an additional
+first argument, containing the parser context, is passed to YY_INPUT.
+.TP
+.B YY_DEBUG
+If this symbols is defined then additional code will be included in
+the parser that prints vast quantities of arcane information to the
+standard error while the parser is running.
+.TP
+.B YY_BEGIN
+This macro is invoked to mark the start of input text that will be
+made available in actions as 'yytext'. This corresponds to
+occurrences of '<' in the grammar. These are converted into
+predicates that are expected to succeed. The default definition
+.nf
+
+ #define YY_BEGIN (yybegin= yypos, 1)
+
+.fi
+therefore saves the current input position and returns 1 ('true') as
+the result of the predicate.
+.TP
+.B YY_END
+This macros corresponds to '>' in the grammar. Again, it is a
+predicate so the default definition saves the input position
+before 'succeeding'.
+.nf
+
+ #define YY_END (yyend= yypos, 1)
+
+.fi
+.TP
+.BI YY_PARSE( T )
+This macro declares the parser entry points (yyparse and yyparsefrom)
+to be of type
+.IR T .
+The default definition
+.nf
+
+ #define YY_PARSE(T) T
+
+.fi
+leaves yyparse() and yyparsefrom() with global visibility. If they
+should not be externally visible in other source files, this macro can
+be redefined to declare them 'static'.
+.nf
+
+ #define YY_PARSE(T) static T
+
+.fi
+.TP
+.B YY_CTX_LOCAL
+If this symbol is defined during compilation of a generated parser
+then global parser state will be kept in a structure of
+type 'yycontext' which can be declared as a local variable. This
+allows multiple instances of parsers to coexist and to be thread\-safe.
+The parsing function
+.IR yyparse ()
+will be declared to expect a first argument of type 'yycontext *', an
+instance of the structure holding the global state for the parser.
+This instance must be allocated and initialised to zero by the client.
+A trivial but complete example is as follows.
+.nf
+
+ #include <stdio.h>
+
+ #define YY_CTX_LOCAL
+
+ #include "the\-generated\-parser.peg.c"
+
+ int main()
+ {
+ yycontext ctx;
+ memset(&ctx, 0, sizeof(yycontext));
+ while (yyparse(&ctx));
+ return 0;
+ }
+
+.fi
+Note that if this symbol is undefined then the compiled parser will
+statically allocate its global state and will be neither reentrant nor
+thread\-safe.
+Note also that the parser yycontext structure is initialised automatically
+the first time
+.IR yyparse ()
+is called; this structure
+.B must
+therefore be properly initialised to zero before the first call to
+.IR yyparse ().
+.TP
+.B YY_CTX_MEMBERS
+If YY_CTX_LOCAL is defined (see above) then the macro YY_CTX_MEMBERS
+can be defined to expand to any additional member field declarations
+that the client would like included in the declaration of
+the 'yycontext' structure type. These additional members are
+otherwise ignored by the generated parser. The instance
+of 'yycontext' associated with the currently\-active parser is
+available within actions as the pointer variable
+.IR yy .
+.TP
+.B YY_BUFFER_SIZE
+The initial size of the text buffer, in bytes. The default is 1024
+and the buffer size is doubled whenever required to meet demand during
+parsing. An application that typically parses much longer strings
+could increase this to avoid unnecessary buffer reallocation.
+.TP
+.B YY_STACK_SIZE
+The initial size of the variable and action stacks. The default is
+128, which is doubled whenever required to meet demand during parsing.
+Applications that have deep call stacks with many local variables, or
+that perform many actions after a single successful match, could increase
+this to avoid unnecessary buffer reallocation.
+.TP
+.BI YY_MALLOC( YY , \ SIZE )
+The memory allocator for all parser\-related storage. The parameters
+are the current yycontext structure and the number of bytes to
+allocate. The default definition is:
+.RI malloc( SIZE )
+.TP
+.BI YY_REALLOC( YY , \ PTR , \ SIZE )
+The memory reallocator for dynamically\-grown storage (such as text
+buffers and variable stacks). The parameters are the current
+yycontext structure, the previously\-allocated storage, and the number
+of bytes to which that storage should be grown. The default definition is:
+.RI realloc( PTR , \ SIZE )
+.TP
+.BI YY_FREE( YY , \ PTR )
+The memory deallocator. The parameters are the current yycontext
+structure and the storage to deallocate. The default definition is:
+.RI free( PTR )
+.TP
+.B YYRELEASE
+The name of the function that releases all resources held by a
+yycontext structure. The default value is 'yyrelease'.
+.PP
+The following variables can be referred to within actions.
+.TP
+.B char *yybuf
+This variable points to the parser's input buffer used to store input
+text that has not yet been matched.
+.TP
+.B int yypos
+This is the offset (in yybuf) of the next character to be matched and
+consumed.
+.TP
+.B char *yytext
+The most recent matched text delimited by '<' and '>' is stored in this variable.
+.TP
+.B int yyleng
+This variable indicates the number of characters in 'yytext'.
+.TP
+.B yycontext *yy
+This variable points to the instance of 'yycontext' associated with
+the currently\-active parser.
+.PP
+Programs that wish to release all the resources associated with a
+parser can use the following function.
+.TP
+.BI yyrelease(yycontext * yy )
+Returns all parser\-allocated storage associated with
+.I yy
+to the system. The storage will be reallocated on the next call to
+.IR yyparse ().
+.PP
+Note that the storage for the yycontext structure itself is never
+allocated or reclaimed implicitly. The application must allocate
+these structures in automatic storage, or use
+.IR calloc ()
+and
+.IR free ()
+to manage them explicitly. The example in the following section
+demonstrates one approach to resource management.
+.SH LEG EXAMPLE: EXTENDING THE PARSER'S CONTEXT
+The
+.I yy
+variable passed to actions contains the state of the parser plus any
+additional fields defined by YY_CTX_MEMBERS. Theses fields can be
+used to store application\-specific information that is global to a
+particular call of
+.IR yyparse ().
+A trivial but complete
+.I leg
+example follows in which the yycontext
+structure is extended with a
+.I count
+of the number of newline characters
+seen in the input so far (the grammar otherwise consumes and ignores
+the entire input). The caller of
+.IR yyparse ()
+uses
+.I count
+to print the number of lines of input that were read.
+
+.nf
+
+ %{
+ #define YY_CTX_LOCAL 1
+ #define YY_CTX_MEMBERS \\
+ int count;
+ %}
+
+ Char = ('\\n' | '\\r\\n' | '\\r') { yy\->count++ }
+ | .
+
+ %%
+
+ #include <stdio.h>
+ #include <string.h>
+
+ int main()
+ {
+ /* create a local parser context in automatic storage */
+ yycontext yy;
+ /* the context *must* be initialised to zero before first use*/
+ memset(&yy, 0, sizeof(yy));
+
+ while (yyparse(&yy))
+ ;
+ printf("%d newlines\\n", yy.count);
+
+ /* release all resources associated with the context */
+ yyrelease(&yy);
+
+ return 0;
+ }
+
+.fi
+.SH DIAGNOSTICS
+.I peg
+and
+.I leg
+warn about the following conditions while converting a grammar into a parser.
+.TP
+.B syntax error
+The input grammar was malformed in some way. The error message will
+include the text about to be matched (often backed up a huge amount
+from the actual location of the error) and the line number of the most
+recently considered character (which is often the real location of the
+problem).
+.TP
+.B rule 'foo' used but not defined
+The grammar referred to a rule named 'foo' but no definition for it
+was given. Attempting to use the generated parser will likely result
+in errors from the linker due to undefined symbols associated with the
+missing rule.
+.TP
+.B rule 'foo' defined but not used
+The grammar defined a rule named 'foo' and then ignored it. The code
+associated with the rule is included in the generated parser which
+will in all other respects be healthy.
+.TP
+.B possible infinite left recursion in rule 'foo'
+There exists at least one path through the grammar that leads from the
+rule 'foo' back to (a recursive invocation of) the same rule without
+consuming any input.
+.PP
+Left recursion, especially that found in standards documents, is
+often 'direct' and implies trivial repetition.
+.nf
+
+ # (6.7.6)
+ direct\-abstract\-declarator =
+ LPAREN abstract\-declarator RPAREN
+ | direct\-abstract\-declarator? LBRACKET assign\-expr? RBRACKET
+ | direct\-abstract\-declarator? LBRACKET STAR RBRACKET
+ | direct\-abstract\-declarator? LPAREN param\-type\-list? RPAREN
+
+.fi
+The recursion can easily be eliminated by converting the parts of the
+pattern following the recursion into a repeatable suffix.
+.nf
+
+ # (6.7.6)
+ direct\-abstract\-declarator =
+ direct\-abstract\-declarator\-head?
+ direct\-abstract\-declarator\-tail*
+
+ direct\-abstract\-declarator\-head =
+ LPAREN abstract\-declarator RPAREN
+
+ direct\-abstract\-declarator\-tail =
+ LBRACKET assign\-expr? RBRACKET
+ | LBRACKET STAR RBRACKET
+ | LPAREN param\-type\-list? RPAREN
+
+.fi
+.SH CAVEATS
+A parser that accepts empty input will
+.I always
+succeed. Consider the following example, not atypical of a first
+attempt to write a PEG\-based parser:
+.nf
+
+ Program = Expression*
+ Expression = "whatever"
+ %%
+ int main() {
+ while (yyparse())
+ puts("success!");
+ return 0;
+ }
+
+.fi
+This program loops forever, no matter what (if any) input is provided
+on stdin. Many fixes are possible, the easiest being to insist that
+the parser always consumes some non\-empty input. Changing the first
+line to
+.nf
+
+ Program = Expression+
+
+.fi
+accomplishes this. If the parser is expected to consume the entire
+input, then explicitly requiring the end\-of\-file is also highly
+recommended:
+.nf
+
+ Program = Expression+ !.
+
+.fi
+This works because the parser will only fail to match ("!" predicate)
+any character at all ("." expression) when it attempts to read beyond
+the end of the input.
+.SH BUGS
+You have to type 'man peg' to read the manual page for
+.IR leg (1).
+.PP
+The 'yy' and 'YY' prefixes cannot be changed.
+.PP
+Left recursion is detected in the input grammar but is not handled
+correctly in the generated parser.
+.PP
+Diagnostics for errors in the input grammar are obscure and not
+particularly helpful.
+.PP
+The operators
+.BR ! \ \c
+and
+.B ~
+should really be named the other way around.
+.PP
+Several commonly\-used
+.IR lex (1)
+features (yywrap(), yyin, etc.) are completely absent.
+.PP
+The generated parser does not contain '#line' directives to direct C
+compiler errors back to the grammar description when appropriate.
+.SH SEE ALSO
+D. Val Schorre,
+.I META II, a syntax\-oriented compiler writing language,
+19th ACM National Conference, 1964, pp.\ 41.301\-\-41.311. Describes a
+self\-implementing parser generator for analytic grammars with no
+backtracking.
+.PP
+Alexander Birman,
+.I The TMG Recognition Schema,
+Ph.D. dissertation, Princeton, 1970. A mathematical treatment of the
+power and complexity of recursive\-descent parsing with backtracking.
+.PP
+Bryan Ford,
+.I Parsing Expression Grammars: A Recognition\-Based Syntactic Foundation,
+ACM SIGPLAN Symposium on Principles of Programming Languages, 2004.
+Defines PEGs and analyses them in relation to context\-free and regular
+grammars. Introduces the syntax adopted in
+.IR peg .
+.PP
+The standard Unix utilities
+.IR lex (1)
+and
+.IR yacc (1)
+which influenced the syntax and features of
+.IR leg .
+.PP
+The source code for
+.I peg
+and
+.I leg
+whose grammar parsers are written using themselves.
+.PP
+The latest version of this software and documentation:
+.nf
+
+ http://piumarta.com/software/peg
+
+.fi
+.SH AUTHOR
+.IR peg ,
+.I leg
+and this manual page were written by Ian Piumarta (first\-name at
+last\-name dot com) while investigating the viability of regular and
+parsing\-expression grammars for efficiently extracting type and
+signature information from C header files.
+.PP
+Please send bug reports and suggestions for improvements to the author
+at the above address.
--- a/tinypeg.1
+++ /dev/null
@@ -1,1147 +1,0 @@
-.\" Copyright (c) 2007,2016 by Ian Piumarta
-.\" All rights reserved.
-.\"
-.\" Permission is hereby granted, free of charge, to any person obtaining a
-.\" copy of this software and associated documentation files (the 'Software'),
-.\" to deal in the Software without restriction, including without limitation
-.\" the rights to use, copy, modify, merge, publish, distribute, and/or sell
-.\" copies of the Software, and to permit persons to whom the Software is
-.\" furnished to do so, provided that the above copyright notice(s) and this
-.\" permission notice appear in all copies of the Software. Acknowledgement
-.\" of the use of this Software in supporting documentation would be
-.\" appreciated but is not required.
-.\"
-.\" THE SOFTWARE IS PROVIDED 'AS IS'. USE ENTIRELY AT YOUR OWN RISK.
-.\"
-.\" Last edited: 2016-07-22 09:47:29 by piumarta on zora.local
-.\"
-.TH PEG 1 "September 2013" "Version 0.1"
-.SH NAME
-peg, leg \- parser generators
-.SH SYNOPSIS
-.B peg
-.B [\-hvV \-ooutput]
-.I [filename ...]
-.sp 0
-.B leg
-.B [\-hvV \-ooutput]
-.I [filename ...]
-.SH DESCRIPTION
-.I peg
-and
-.I leg
-are tools for generating recursive\-descent parsers: programs that
-perform pattern matching on text. They process a Parsing Expression
-Grammar (PEG) [Ford 2004] to produce a program that recognises legal
-sentences of that grammar.
-.I peg
-processes PEGs written using the original syntax described by Ford;
-.I leg
-processes PEGs written using slightly different syntax and conventions
-that are intended to make it an attractive replacement for parsers
-built with
-.IR lex (1)
-and
-.IR yacc (1).
-Unlike
-.I lex
-and
-.IR yacc ,
-.I peg
-and
-.I leg
-support unlimited backtracking, provide ordered choice as a means for
-disambiguation, and can combine scanning (lexical analysis) and
-parsing (syntactic analysis) into a single activity.
-.PP
-.I peg
-reads the specified
-.IR filename s,
-or standard input if no
-.IR filename s
-are given, for a grammar describing the parser to generate.
-.I peg
-then generates a C source file that defines a function
-.IR yyparse().
-This C source file can be included in, or compiled and then linked
-with, a client program. Each time the client program calls
-.IR yyparse ()
-the parser consumes input text according to the parsing rules,
-starting from the first rule in the grammar.
-.IR yyparse ()
-returns non\-zero if the input could be parsed according to the
-grammar; it returns zero if the input could not be parsed.
-.PP
-The prefix 'yy' or 'YY' is prepended to all externally\-visible symbols
-in the generated parser. This is intended to reduce the risk of
-namespace pollution in client programs. (The choice of 'yy' is
-historical; see
-.IR lex (1)
-and
-.IR yacc (1),
-for example.)
-.SH OPTIONS
-.I peg
-and
-.I leg
-provide the following options:
-.TP
-.B \-h
-prints a summary of available options and then exits.
-.TP
-.B \-ooutput
-writes the generated parser to the file
-.B output
-instead of the standard output.
-.TP
-.B \-P
-suppresses #line directives in the output.
-.TP
-.B \-v
-writes verbose information to standard error while working.
-.TP
-.B \-V
-writes version information to standard error then exits.
-.SH A SIMPLE EXAMPLE
-The following
-.I peg
-input specifies a grammar with a single rule (called 'start') that is
-satisfied when the input contains the string "username".
-.nf
-
- start <\- "username"
-
-.fi
-(The quotation marks are
-.I not
-part of the matched text; they serve to indicate a literal string to
-be matched.) In other words,
-.IR yyparse ()
-in the generated C source will return non\-zero only if the next eight
-characters read from the input spell the word "username". If the
-input contains anything else,
-.IR yyparse ()
-returns zero and no input will have been consumed. (Subsequent calls
-to
-.IR yyparse ()
-will also return zero, since the parser is effectively blocked looking
-for the string "username".) To ensure progress we can add an
-alternative clause to the 'start' rule that will match any single
-character if "username" is not found.
-.nf
-
- start <\- "username"
- / .
-
-.fi
-.IR yyparse ()
-now always returns non\-zero (except at the very end of the input). To
-do something useful we can add actions to the rules. These actions
-are performed after a complete match is found (starting from the first
-rule) and are chosen according to the 'path' taken through the grammar
-to match the input. (Linguists would call this path a 'phrase
-marker'.)
-.nf
-
- start <\- "username" { printf("%s\\n", getlogin()); }
- / < . > { putchar(yytext[0]); }
-
-.fi
-The first line instructs the parser to print the user's login name
-whenever it sees "username" in the input. If that match fails, the
-second line tells the parser to echo the next character on the input
-the standard output. Our parser is now performing useful work: it
-will copy the input to the output, replacing all occurrences of
-"username" with the user's account name.
-.PP
-Note the angle brackets ('<' and '>') that were added to the second
-alternative. These have no effect on the meaning of the rule, but
-serve to delimit the text made available to the following action in
-the variable
-.IR yytext .
-.PP
-If the above grammar is placed in the file
-.BR username.peg ,
-running the command
-.nf
-
- peg \-o username.c username.peg
-
-.fi
-will save the corresponding parser in the file
-.BR username.c .
-To create a complete program this parser could be included by a C
-program as follows.
-.nf
-
- #include <stdio.h> /* printf(), putchar() */
- #include <unistd.h> /* getlogin() */
-
- #include "username.c" /* yyparse() */
-
- int main()
- {
- while (yyparse()) /* repeat until EOF */
- ;
- return 0;
- }
-.fi
-.SH PEG GRAMMARS
-A grammar consists of a set of named rules.
-.nf
-
- name <\- pattern
-
-.fi
-The
-.B pattern
-contains one or more of the following elements.
-.TP
-.B name
-The element stands for the entire pattern in the rule with the given
-.BR name .
-.TP
-.BR \(dq characters \(dq
-A character or string enclosed in double quotes is matched literally.
-The ANSI C escape sequences are recognised within the
-.IR characters .
-.TP
-.BR ' characters '
-A character or string enclosed in single quotes is matched literally, as above.
-.TP
-.BR [ characters ]
-A set of characters enclosed in square brackets matches any single
-character from the set, with escape characters recognised as above.
-If the set begins with an uparrow (^) then the set is negated (the
-element matches any character
-.I not
-in the set). Any pair of characters separated with a dash (\-)
-represents the range of characters from the first to the second,
-inclusive. A single alphabetic character or underscore is matched by
-the following set.
-.nf
-
- [a\-zA\-Z_]
-
-.fi
-Similarly, the following matches any single non\-digit character.
-.nf
-
- [^0\-9]
-
-.fi
-.TP
-.B .
-A dot matches any character. Note that the only time this fails is at
-the end of file, where there is no character to match.
-.TP
-.BR ( \ pattern\ )
-Parentheses are used for grouping (modifying the precedence of the
-operators described below).
-.TP
-.BR { \ action\ }
-Curly braces surround actions. The action is arbitrary C source code
-to be executed at the end of matching. Any braces within the action
-must be properly nested. Any input text that was matched before the
-action and delimited by angle brackets (see below) is made available
-within the action as the contents of the character array
-.IR yytext .
-The length of (number of characters in)
-.I yytext
-is available in the variable
-.IR yyleng .
-(These variable names are historical; see
-.IR lex (1).)
-.TP
-.B <
-An opening angle bracket always matches (consuming no input) and
-causes the parser to begin accumulating matched text. This text will
-be made available to actions in the variable
-.IR yytext .
-.TP
-.B >
-A closing angle bracket always matches (consuming no input) and causes
-the parser to stop accumulating text for
-.IR yytext .
-.PP
-The above
-.IR element s
-can be made optional and/or repeatable with the following suffixes:
-.TP
-.RB element\ ?
-The element is optional. If present on the input, it is consumed and
-the match succeeds. If not present on the input, no text is consumed
-and the match succeeds anyway.
-.TP
-.RB element\ +
-The element is repeatable. If present on the input, one or more
-occurrences of
-.I element
-are consumed and the match succeeds. If no occurrences of
-.I element
-are present on the input, the match fails.
-.TP
-.RB element\ *
-The element is optional and repeatable. If present on the input, one or more
-occurrences of
-.I element
-are consumed and the match succeeds. If no occurrences of
-.I element
-are present on the input, the match succeeds anyway.
-.PP
-The above elements and suffixes can be converted into predicates (that
-match arbitrary input text and subsequently succeed or fail
-.I without
-consuming that input) with the following prefixes:
-.TP
-.BR & \ element
-The predicate succeeds only if
-.I element
-can be matched. Input text scanned while matching
-.I element
-is not consumed from the input and remains available for subsequent
-matching.
-.TP
-.BR ! \ element
-The predicate succeeds only if
-.I element
-cannot be matched. Input text scanned while matching
-.I element
-is not consumed from the input and remains available for subsequent
-matching. A popular idiom is
-.nf
-
- !.
-
-.fi
-which matches the end of file, after the last character of the input
-has already been consumed.
-.PP
-A special form of the '&' predicate is provided:
-.TP
-.BR & {\ expression\ }
-In this predicate the simple C
-.I expression
-.RB ( not
-statement) is evaluated immediately when the parser reaches the
-predicate. If the
-.I expression
-yields non\-zero (true) the 'match' succeeds and the parser continues
-with the next element in the pattern. If the
-.I expression
-yields zero (false) the 'match' fails and the parser backs up to look
-for an alternative parse of the input.
-.PP
-Several elements (with or without prefixes and suffixes) can be
-combined into a
-.I sequence
-by writing them one after the other. The entire sequence matches only
-if each individual element within it matches, from left to right.
-.PP
-Sequences can be separated into disjoint alternatives by the
-alternation operator '/'.
-.TP
-.RB sequence\-1\ / \ sequence\-2\ / \ ...\ / \ sequence\-N
-Each sequence is tried in turn until one of them matches, at which
-time matching for the overall pattern succeeds. If none of the
-sequences matches then the match of the overall pattern fails.
-.PP
-Finally, the pound sign (#) introduces a comment (discarded) that
-continues until the end of the line.
-.PP
-To summarise the above, the parser tries to match the input text
-against a pattern containing literals, names (representing other
-rules), and various operators (written as prefixes, suffixes,
-juxtaposition for sequencing and and infix alternation operator) that
-modify how the elements within the pattern are matched. Matches are
-made from left to right, 'descending' into named sub\-rules as they are
-encountered. If the matching process fails, the parser 'back tracks'
-('rewinding' the input appropriately in the process) to find the
-nearest alternative 'path' through the grammar. In other words the
-parser performs a depth\-first, left\-to\-right search for the first
-successfully\-matching path through the rules. If found, the actions
-along the successful path are executed (in the order they were
-encountered).
-.PP
-Note that predicates are evaluated
-.I immediately
-during the search for a successful match, since they contribute to the
-success or failure of the search. Actions, however, are evaluated
-only after a successful match has been found.
-.SH PEG GRAMMAR FOR PEG GRAMMARS
-The grammar for
-.I peg
-grammars is shown below. This will both illustrate and formalise
-the above description.
-.nf
-
- Grammar <\- Spacing Definition+ EndOfFile
-
- Definition <\- Identifier LEFTARROW Expression
- Expression <\- Sequence ( SLASH Sequence )*
- Sequence <\- Prefix*
- Prefix <\- AND Action
- / ( AND | NOT )? Suffix
- Suffix <\- Primary ( QUERY / STAR / PLUS )?
- Primary <\- Identifier !LEFTARROW
- / OPEN Expression CLOSE
- / Literal
- / Class
- / DOT
- / Action
- / BEGIN
- / END
-
- Identifier <\- < IdentStart IdentCont* > Spacing
- IdentStart <\- [a\-zA\-Z_]
- IdentCont <\- IdentStart / [0\-9]
- Literal <\- ['] < ( !['] Char )* > ['] Spacing
- / ["] < ( !["] Char )* > ["] Spacing
- Class <\- '[' < ( !']' Range )* > ']' Spacing
- Range <\- Char '\-' Char / Char
- Char <\- '\\\\' [abefnrtv'"\\[\\]\\\\]
- / '\\\\' [0\-3][0\-7][0\-7]
- / '\\\\' [0\-7][0\-7]?
- / '\\\\' '\-'
- / !'\\\\' .
- LEFTARROW <\- '<\-' Spacing
- SLASH <\- '/' Spacing
- AND <\- '&' Spacing
- NOT <\- '!' Spacing
- QUERY <\- '?' Spacing
- STAR <\- '*' Spacing
- PLUS <\- '+' Spacing
- OPEN <\- '(' Spacing
- CLOSE <\- ')' Spacing
- DOT <\- '.' Spacing
- Spacing <\- ( Space / Comment )*
- Comment <\- '#' ( !EndOfLine . )* EndOfLine
- Space <\- ' ' / '\\t' / EndOfLine
- EndOfLine <\- '\\r\\n' / '\\n' / '\\r'
- EndOfFile <\- !.
- Action <\- '{' < [^}]* > '}' Spacing
- BEGIN <\- '<' Spacing
- END <\- '>' Spacing
-
-.fi
-.SH LEG GRAMMARS
-.I leg
-is a variant of
-.I peg
-that adds some features of
-.IR lex (1)
-and
-.IR yacc (1).
-It differs from
-.I peg
-in the following ways.
-.TP
-.BI %{\ text... \ %}
-A declaration section can appear anywhere that a rule definition is
-expected. The
-.I text
-between the delimiters '%{' and '%}' is copied verbatim to the
-generated C parser code
-.I before
-the code that implements the parser itself.
-.TP
-.IB name\ = \ pattern
-The 'assignment' operator replaces the left arrow operator '<\-'.
-.TP
-.B rule\-name
-Hyphens can appear as letters in the names of rules. Each hyphen is
-converted into an underscore in the generated C source code. A
-single hyphen '\-' is a legal rule name.
-.nf
-
- \- = [ \\t\\n\\r]*
- number = [0\-9]+ \-
- name = [a\-zA\-Z_][a\-zA_Z_0\-9]* \-
- l\-paren = '(' \-
- r\-paren = ')' \-
-
-.fi
-This example shows how ignored whitespace can be obvious when reading
-the grammar and yet unobtrusive when placed liberally at the end of
-every rule associated with a lexical element.
-.TP
-.IB seq\-1\ | \ seq\-2
-The alternation operator is vertical bar '|' rather than forward
-slash '/'. The
-.I peg
-rule
-.nf
-
- name <\- sequence\-1
- / sequence\-2
- / sequence\-3
-
-.fi
-is therefore written
-.nf
-
- name = sequence\-1
- | sequence\-2
- | sequence\-3
- ;
-
-.fi
-in
-.I leg
-(with the final semicolon being optional, as described next).
-.TP
-.IB @{\ action\ }
-Actions prefixed with an 'at' symbol will be performed during parsing,
-at the time they are encountered while matching the input text with a
-rule.
-Because of back-tracking in the PEG parsing algorithm, actions
-prefixed with '@' might be performed multiple times for the same input
-text.
-(The usual behviour of actions is that they are saved up until
-matching is complete, and then those that are part of the
-final derivation are performed in left-to-right order.)
-The variable
-.I yytext
-is available within these actions.
-.TP
-.IB exp \ ~ \ {\ action\ }
-A postfix operator
-.BI ~ {\ action\ }
-can be placed after any expression and will behave like a normal
-action (arbitrary C code) except that it is invoked only when
-.I exp
-fails. It binds less tightly than any other operator except alternation and sequencing, and
-is intended to make error handling and recovery code easier to write.
-Note that
-.I yytext
-and
-.I yyleng
-are not available inside these actions, but the pointer variable
-.I yy
-is available to give the code access to any user\-defined members
-of the parser state (see "CUSTOMISING THE PARSER" below).
-Note also that
-.I exp
-is always a single expression; to invoke an error action for any
-failure within a sequence, parentheses must be used to group the
-sequence into a single expression.
-.nf
-
- rule = e1 e2 e3 ~{ error("e[12] ok; e3 has failed"); }
- | ...
-
- rule = (e1 e2 e3) ~{ error("one of e[123] has failed"); }
- | ...
-.fi
-.TP
-.IB pattern\ ;
-A semicolon punctuator can optionally terminate a
-.IR pattern .
-.TP
-.BI %% \ text...
-A double percent '%%' terminates the rules (and declarations) section of
-the grammar. All
-.I text
-following '%%' is copied verbatim to the generated C parser code
-.I after
-the parser implementation code.
-.TP
-.BI $$\ = \ value
-A sub\-rule can return a semantic
-.I value
-from an action by assigning it to the pseudo\-variable '$$'. All
-semantic values must have the same type (which defaults to 'int').
-This type can be changed by defining YYSTYPE in a declaration section.
-.TP
-.IB identifier : name
-The semantic value returned (by assigning to '$$') from the sub\-rule
-.I name
-is associated with the
-.I identifier
-and can be referred to in subsequent actions.
-.PP
-The desk calculator example below illustrates the use of '$$' and ':'.
-.SH LEG EXAMPLE: A DESK CALCULATOR
-The extensions in
-.I leg
-described above allow useful parsers and evaluators (including
-declarations, grammar rules, and supporting C functions such
-as 'main') to be kept within a single source file. To illustrate this
-we show a simple desk calculator supporting the four common arithmetic
-operators and named variables. The intermediate results of arithmetic
-evaluation will be accumulated on an implicit stack by returning them
-as semantic values from sub\-rules.
-.nf
-
- %{
- #include <stdio.h> /* printf() */
- #include <stdlib.h> /* atoi() */
- int vars[26];
- %}
-
- Stmt = \- e:Expr EOL { printf("%d\\n", e); }
- | ( !EOL . )* EOL { printf("error\\n"); }
-
- Expr = i:ID ASSIGN s:Sum { $$ = vars[i] = s; }
- | s:Sum { $$ = s; }
-
- Sum = l:Product
- ( PLUS r:Product { l += r; }
- | MINUS r:Product { l \-= r; }
- )* { $$ = l; }
-
- Product = l:Value
- ( TIMES r:Value { l *= r; }
- | DIVIDE r:Value { l /= r; }
- )* { $$ = l; }
-
- Value = i:NUMBER { $$ = atoi(yytext); }
- | i:ID !ASSIGN { $$ = vars[i]; }
- | OPEN i:Expr CLOSE { $$ = i; }
-
- NUMBER = < [0\-9]+ > \- { $$ = atoi(yytext); }
- ID = < [a\-z] > \- { $$ = yytext[0] \- 'a'; }
- ASSIGN = '=' \-
- PLUS = '+' \-
- MINUS = '\-' \-
- TIMES = '*' \-
- DIVIDE = '/' \-
- OPEN = '(' \-
- CLOSE = ')' \-
-
- \- = [ \\t]*
- EOL = '\\n' | '\\r\\n' | '\\r' | ';'
-
- %%
-
- int main()
- {
- while (yyparse())
- ;
- return 0;
- }
-
-.fi
-.SH LEG GRAMMAR FOR LEG GRAMMARS
-The grammar for
-.I leg
-grammars is shown below. This will both illustrate and formalise the
-above description.
-.nf
-
- grammar = \-
- ( declaration | definition )+
- trailer? end\-of\-file
-
- declaration = '%{' < ( !'%}' . )* > RPERCENT
-
- trailer = '%%' < .* >
-
- definition = identifier EQUAL expression SEMICOLON?
-
- expression = sequence ( BAR sequence )*
-
- sequence = error+
-
- error = prefix ( TILDE action )?
-
- prefix = AND action
- | ( AND | NOT )? suffix
-
- suffix = primary ( QUERY | STAR | PLUS )?
-
- primary = identifier COLON identifier !EQUAL
- | identifier !EQUAL
- | OPEN expression CLOSE
- | literal
- | class
- | DOT
- | action
- | BEGIN
- | END
-
- identifier = < [\-a\-zA\-Z_][\-a\-zA\-Z_0\-9]* > \-
-
- literal = ['] < ( !['] char )* > ['] \-
- | ["] < ( !["] char )* > ["] \-
-
- class = '[' < ( !']' range )* > ']' \-
-
- range = char '\-' char | char
-
- char = '\\\\' [abefnrtv'"\\[\\]\\\\]
- | '\\\\' [0\-3][0\-7][0\-7]
- | '\\\\' [0\-7][0\-7]?
- | !'\\\\' .
-
- action = '{' < braces* > '}' \-
-
- braces = '{' braces* '}'
- | !'}' .
-
- EQUAL = '=' \-
- COLON = ':' \-
- SEMICOLON = ';' \-
- BAR = '|' \-
- AND = '&' \-
- NOT = '!' \-
- QUERY = '?' \-
- STAR = '*' \-
- PLUS = '+' \-
- OPEN = '(' \-
- CLOSE = ')' \-
- DOT = '.' \-
- BEGIN = '<' \-
- END = '>' \-
- TILDE = '~' \-
- RPERCENT = '%}' \-
-
- \- = ( space | comment )*
- space = ' ' | '\\t' | end\-of\-line
- comment = '#' ( !end\-of\-line . )* end\-of\-line
- end\-of\-line = '\\r\\n' | '\\n' | '\\r'
- end\-of\-file = !.
-
-.fi
-.SH CUSTOMISING THE PARSER
-The following symbols can be redefined in declaration sections to
-modify the generated parser code.
-.TP
-.B YYSTYPE
-The semantic value type. The pseudo\-variable '$$' and the
-identifiers 'bound' to rule results with the colon operator ':' should
-all be considered as being declared to have this type. The default
-value is 'int'.
-.TP
-.B YYPARSE
-The name of the main entry point to the parser. The default value
-is 'yyparse'.
-.TP
-.B YYPARSEFROM
-The name of an alternative entry point to the parser. This function
-expects one argument: the function corresponding to the rule from
-which the search for a match should begin. The default
-is 'yyparsefrom'. Note that yyparse() is defined as
-.nf
-
- int yyparse() { return yyparsefrom(yy_foo); }
-
-.fi
-where 'foo' is the name of the first rule in the grammar.
-.TP
-.BI YY_INPUT( buf , \ result , \ max_size )
-This macro is invoked by the parser to obtain more input text.
-.I buf
-points to an area of memory that can hold at most
-.I max_size
-characters. The macro should copy input text to
-.I buf
-and then assign the integer variable
-.I result
-to indicate the number of characters copied. If no more input is available,
-the macro should assign 0 to
-.IR result .
-By default, the YY_INPUT macro is defined as follows.
-.nf
-
- #define YY_INPUT(buf, result, max_size) \\
- { \\
- int yyc= getchar(); \\
- result= (EOF == yyc) ? 0 : (*(buf)= yyc, 1); \\
- }
-
-.fi
-Note that if YY_CTX_LOCAL is defined (see below) then an additional
-first argument, containing the parser context, is passed to YY_INPUT.
-.TP
-.B YY_DEBUG
-If this symbols is defined then additional code will be included in
-the parser that prints vast quantities of arcane information to the
-standard error while the parser is running.
-.TP
-.B YY_BEGIN
-This macro is invoked to mark the start of input text that will be
-made available in actions as 'yytext'. This corresponds to
-occurrences of '<' in the grammar. These are converted into
-predicates that are expected to succeed. The default definition
-.nf
-
- #define YY_BEGIN (yybegin= yypos, 1)
-
-.fi
-therefore saves the current input position and returns 1 ('true') as
-the result of the predicate.
-.TP
-.B YY_END
-This macros corresponds to '>' in the grammar. Again, it is a
-predicate so the default definition saves the input position
-before 'succeeding'.
-.nf
-
- #define YY_END (yyend= yypos, 1)
-
-.fi
-.TP
-.BI YY_PARSE( T )
-This macro declares the parser entry points (yyparse and yyparsefrom)
-to be of type
-.IR T .
-The default definition
-.nf
-
- #define YY_PARSE(T) T
-
-.fi
-leaves yyparse() and yyparsefrom() with global visibility. If they
-should not be externally visible in other source files, this macro can
-be redefined to declare them 'static'.
-.nf
-
- #define YY_PARSE(T) static T
-
-.fi
-.TP
-.B YY_CTX_LOCAL
-If this symbol is defined during compilation of a generated parser
-then global parser state will be kept in a structure of
-type 'yycontext' which can be declared as a local variable. This
-allows multiple instances of parsers to coexist and to be thread\-safe.
-The parsing function
-.IR yyparse ()
-will be declared to expect a first argument of type 'yycontext *', an
-instance of the structure holding the global state for the parser.
-This instance must be allocated and initialised to zero by the client.
-A trivial but complete example is as follows.
-.nf
-
- #include <stdio.h>
-
- #define YY_CTX_LOCAL
-
- #include "the\-generated\-parser.peg.c"
-
- int main()
- {
- yycontext ctx;
- memset(&ctx, 0, sizeof(yycontext));
- while (yyparse(&ctx));
- return 0;
- }
-
-.fi
-Note that if this symbol is undefined then the compiled parser will
-statically allocate its global state and will be neither reentrant nor
-thread\-safe.
-Note also that the parser yycontext structure is initialised automatically
-the first time
-.IR yyparse ()
-is called; this structure
-.B must
-therefore be properly initialised to zero before the first call to
-.IR yyparse ().
-.TP
-.B YY_CTX_MEMBERS
-If YY_CTX_LOCAL is defined (see above) then the macro YY_CTX_MEMBERS
-can be defined to expand to any additional member field declarations
-that the client would like included in the declaration of
-the 'yycontext' structure type. These additional members are
-otherwise ignored by the generated parser. The instance
-of 'yycontext' associated with the currently\-active parser is
-available within actions as the pointer variable
-.IR yy .
-.TP
-.B YY_BUFFER_SIZE
-The initial size of the text buffer, in bytes. The default is 1024
-and the buffer size is doubled whenever required to meet demand during
-parsing. An application that typically parses much longer strings
-could increase this to avoid unnecessary buffer reallocation.
-.TP
-.B YY_STACK_SIZE
-The initial size of the variable and action stacks. The default is
-128, which is doubled whenever required to meet demand during parsing.
-Applications that have deep call stacks with many local variables, or
-that perform many actions after a single successful match, could increase
-this to avoid unnecessary buffer reallocation.
-.TP
-.BI YY_MALLOC( YY , \ SIZE )
-The memory allocator for all parser\-related storage. The parameters
-are the current yycontext structure and the number of bytes to
-allocate. The default definition is:
-.RI malloc( SIZE )
-.TP
-.BI YY_REALLOC( YY , \ PTR , \ SIZE )
-The memory reallocator for dynamically\-grown storage (such as text
-buffers and variable stacks). The parameters are the current
-yycontext structure, the previously\-allocated storage, and the number
-of bytes to which that storage should be grown. The default definition is:
-.RI realloc( PTR , \ SIZE )
-.TP
-.BI YY_FREE( YY , \ PTR )
-The memory deallocator. The parameters are the current yycontext
-structure and the storage to deallocate. The default definition is:
-.RI free( PTR )
-.TP
-.B YYRELEASE
-The name of the function that releases all resources held by a
-yycontext structure. The default value is 'yyrelease'.
-.PP
-The following variables can be referred to within actions.
-.TP
-.B char *yybuf
-This variable points to the parser's input buffer used to store input
-text that has not yet been matched.
-.TP
-.B int yypos
-This is the offset (in yybuf) of the next character to be matched and
-consumed.
-.TP
-.B char *yytext
-The most recent matched text delimited by '<' and '>' is stored in this variable.
-.TP
-.B int yyleng
-This variable indicates the number of characters in 'yytext'.
-.TP
-.B yycontext *yy
-This variable points to the instance of 'yycontext' associated with
-the currently\-active parser.
-.PP
-Programs that wish to release all the resources associated with a
-parser can use the following function.
-.TP
-.BI yyrelease(yycontext * yy )
-Returns all parser\-allocated storage associated with
-.I yy
-to the system. The storage will be reallocated on the next call to
-.IR yyparse ().
-.PP
-Note that the storage for the yycontext structure itself is never
-allocated or reclaimed implicitly. The application must allocate
-these structures in automatic storage, or use
-.IR calloc ()
-and
-.IR free ()
-to manage them explicitly. The example in the following section
-demonstrates one approach to resource management.
-.SH LEG EXAMPLE: EXTENDING THE PARSER'S CONTEXT
-The
-.I yy
-variable passed to actions contains the state of the parser plus any
-additional fields defined by YY_CTX_MEMBERS. Theses fields can be
-used to store application\-specific information that is global to a
-particular call of
-.IR yyparse ().
-A trivial but complete
-.I leg
-example follows in which the yycontext
-structure is extended with a
-.I count
-of the number of newline characters
-seen in the input so far (the grammar otherwise consumes and ignores
-the entire input). The caller of
-.IR yyparse ()
-uses
-.I count
-to print the number of lines of input that were read.
-
-.nf
-
- %{
- #define YY_CTX_LOCAL 1
- #define YY_CTX_MEMBERS \\
- int count;
- %}
-
- Char = ('\\n' | '\\r\\n' | '\\r') { yy\->count++ }
- | .
-
- %%
-
- #include <stdio.h>
- #include <string.h>
-
- int main()
- {
- /* create a local parser context in automatic storage */
- yycontext yy;
- /* the context *must* be initialised to zero before first use*/
- memset(&yy, 0, sizeof(yy));
-
- while (yyparse(&yy))
- ;
- printf("%d newlines\\n", yy.count);
-
- /* release all resources associated with the context */
- yyrelease(&yy);
-
- return 0;
- }
-
-.fi
-.SH DIAGNOSTICS
-.I peg
-and
-.I leg
-warn about the following conditions while converting a grammar into a parser.
-.TP
-.B syntax error
-The input grammar was malformed in some way. The error message will
-include the text about to be matched (often backed up a huge amount
-from the actual location of the error) and the line number of the most
-recently considered character (which is often the real location of the
-problem).
-.TP
-.B rule 'foo' used but not defined
-The grammar referred to a rule named 'foo' but no definition for it
-was given. Attempting to use the generated parser will likely result
-in errors from the linker due to undefined symbols associated with the
-missing rule.
-.TP
-.B rule 'foo' defined but not used
-The grammar defined a rule named 'foo' and then ignored it. The code
-associated with the rule is included in the generated parser which
-will in all other respects be healthy.
-.TP
-.B possible infinite left recursion in rule 'foo'
-There exists at least one path through the grammar that leads from the
-rule 'foo' back to (a recursive invocation of) the same rule without
-consuming any input.
-.PP
-Left recursion, especially that found in standards documents, is
-often 'direct' and implies trivial repetition.
-.nf
-
- # (6.7.6)
- direct\-abstract\-declarator =
- LPAREN abstract\-declarator RPAREN
- | direct\-abstract\-declarator? LBRACKET assign\-expr? RBRACKET
- | direct\-abstract\-declarator? LBRACKET STAR RBRACKET
- | direct\-abstract\-declarator? LPAREN param\-type\-list? RPAREN
-
-.fi
-The recursion can easily be eliminated by converting the parts of the
-pattern following the recursion into a repeatable suffix.
-.nf
-
- # (6.7.6)
- direct\-abstract\-declarator =
- direct\-abstract\-declarator\-head?
- direct\-abstract\-declarator\-tail*
-
- direct\-abstract\-declarator\-head =
- LPAREN abstract\-declarator RPAREN
-
- direct\-abstract\-declarator\-tail =
- LBRACKET assign\-expr? RBRACKET
- | LBRACKET STAR RBRACKET
- | LPAREN param\-type\-list? RPAREN
-
-.fi
-.SH CAVEATS
-A parser that accepts empty input will
-.I always
-succeed. Consider the following example, not atypical of a first
-attempt to write a PEG\-based parser:
-.nf
-
- Program = Expression*
- Expression = "whatever"
- %%
- int main() {
- while (yyparse())
- puts("success!");
- return 0;
- }
-
-.fi
-This program loops forever, no matter what (if any) input is provided
-on stdin. Many fixes are possible, the easiest being to insist that
-the parser always consumes some non\-empty input. Changing the first
-line to
-.nf
-
- Program = Expression+
-
-.fi
-accomplishes this. If the parser is expected to consume the entire
-input, then explicitly requiring the end\-of\-file is also highly
-recommended:
-.nf
-
- Program = Expression+ !.
-
-.fi
-This works because the parser will only fail to match ("!" predicate)
-any character at all ("." expression) when it attempts to read beyond
-the end of the input.
-.SH BUGS
-You have to type 'man peg' to read the manual page for
-.IR leg (1).
-.PP
-The 'yy' and 'YY' prefixes cannot be changed.
-.PP
-Left recursion is detected in the input grammar but is not handled
-correctly in the generated parser.
-.PP
-Diagnostics for errors in the input grammar are obscure and not
-particularly helpful.
-.PP
-The operators
-.BR ! \ \c
-and
-.B ~
-should really be named the other way around.
-.PP
-Several commonly\-used
-.IR lex (1)
-features (yywrap(), yyin, etc.) are completely absent.
-.PP
-The generated parser does not contain '#line' directives to direct C
-compiler errors back to the grammar description when appropriate.
-.SH SEE ALSO
-D. Val Schorre,
-.I META II, a syntax\-oriented compiler writing language,
-19th ACM National Conference, 1964, pp.\ 41.301\-\-41.311. Describes a
-self\-implementing parser generator for analytic grammars with no
-backtracking.
-.PP
-Alexander Birman,
-.I The TMG Recognition Schema,
-Ph.D. dissertation, Princeton, 1970. A mathematical treatment of the
-power and complexity of recursive\-descent parsing with backtracking.
-.PP
-Bryan Ford,
-.I Parsing Expression Grammars: A Recognition\-Based Syntactic Foundation,
-ACM SIGPLAN Symposium on Principles of Programming Languages, 2004.
-Defines PEGs and analyses them in relation to context\-free and regular
-grammars. Introduces the syntax adopted in
-.IR peg .
-.PP
-The standard Unix utilities
-.IR lex (1)
-and
-.IR yacc (1)
-which influenced the syntax and features of
-.IR leg .
-.PP
-The source code for
-.I peg
-and
-.I leg
-whose grammar parsers are written using themselves.
-.PP
-The latest version of this software and documentation:
-.nf
-
- http://piumarta.com/software/peg
-
-.fi
-.SH AUTHOR
-.IR peg ,
-.I leg
-and this manual page were written by Ian Piumarta (first\-name at
-last\-name dot com) while investigating the viability of regular and
-parsing\-expression grammars for efficiently extracting type and
-signature information from C header files.
-.PP
-Please send bug reports and suggestions for improvements to the author
-at the above address.