hub: minipeg

--- a/Makefile

+++ b/Makefile

@@ -25,7 +25,7 @@

 minipeg-split: $(SRC)

 	$(CC) $(CFLAGS) -o $@ compile.c tree.c peg.c

-minipeg.c: $(SRC)

+minipeg.c: $(SRC) amalgamate.sh

 	sh amalgamate.sh $(SRC) > $@

 peg-new.c: peg.leg minipeg

@@ -40,10 +40,11 @@

 	diff -u peg-new.c peg.c

 	diff -u peg-split.c peg.c

-check: minipeg .FORCE

+check: minipeg check-self-host .FORCE

 	$(SHELL) -ec '(cd examples;  $(MAKE))'

 clean : .FORCE

 	rm -f minipeg minipeg-split minipeg.c minipeg-new.c peg-new.c *.o

+	$(SHELL) -ec '(cd examples;  $(MAKE) clean)'

 .FORCE :

--- a/amalgamate.sh

+++ b/amalgamate.sh

@@ -1,8 +1,20 @@

 # Amalgamate the source code, sqlite3 style.

+set -eu

 echo "/* This file is a generated distributable version of the minipeg project."

 echo " * Visit https://github.com/andrewchambers/minipeg for details. */"

-cat *.c | grep '^#include <' | sort -u

-for f in version.h tree.h compile.c tree.c peg.c

+(

+  for f in "$@"

+  do

+    case "$f" in

+      *.c)

+      cat "$f"

+      ;;

+    esac

+  done

+) | grep '^#include <' | sort -u

+for f in "$@"

do

   echo "#line 0 \"$f\""

   grep -v '^#include' "$f"

--- a/examples/Makefile

+++ b/examples/Makefile

@@ -8,7 +8,7 @@

 all : $(EXAMPLES)

 test : .FORCE

-	../minipeg -o test.leg.c test.leg

+	../minipeg -o test.peg.c test.peg

 	$(CC) $(CFLAGS) -o test test.c

 	echo 'ab.ac.ad.ae.afg.afh.afg.afh.afi.afj.' | ./$@ | $(TEE) $@.out

 	$(DIFF) $@.ref $@.out

@@ -16,7 +16,7 @@

 	@echo

 rule : .FORCE

-	../minipeg -o rule.leg.c rule.leg

+	../minipeg -o rule.peg.c rule.peg

 	$(CC) $(CFLAGS) -o rule rule.c

 	echo 'abcbcdabcbcdabcbcdabcbcd' | ./$@ | $(TEE) $@.out

 	$(DIFF) $@.ref $@.out

@@ -24,7 +24,7 @@

 	@echo

 accept : .FORCE

-	../minipeg -o accept.leg.c accept.leg

+	../minipeg -o accept.peg.c accept.peg

 	$(CC) $(CFLAGS) -o accept accept.c

 	echo 'abcbcdabcbcdabcbcdabcbcd' | ./$@ | $(TEE) $@.out

 	$(DIFF) $@.ref $@.out

@@ -32,15 +32,15 @@

 	@echo

 wc : .FORCE

-	../minipeg -o wc.leg.c wc.leg

-	$(CC) $(CFLAGS) -o wc wc.leg.c

-	cat wc.leg | ./$@ | $(TEE) $@.out

+	../minipeg -o wc.peg.c wc.peg

+	$(CC) $(CFLAGS) -o wc wc.peg.c

+	cat wc.peg | ./$@ | $(TEE) $@.out

 	$(DIFF) $@.ref $@.out

 	rm -f $@.out

 	@echo

 dc : .FORCE

-	../minipeg -o dc.leg.c dc.leg

+	../minipeg -o dc.peg.c dc.peg

 	$(CC) $(CFLAGS) -o dc dc.c

 	echo '  2  *3 *(3+ 4) ' | ./dc | $(TEE) $@.out

 	$(DIFF) $@.ref $@.out

@@ -48,7 +48,7 @@

 	@echo

 dcv : .FORCE

-	../minipeg -o dcv.leg.c dcv.leg

+	../minipeg -o dcv.peg.c dcv.peg

 	$(CC) $(CFLAGS) -o dcv dcv.c

 	echo 'a = 6;  b = 7;  a * b' | ./dcv | $(TEE) $@.out

 	$(DIFF) $@.ref $@.out

@@ -56,8 +56,8 @@

 	@echo

 calc : .FORCE

-	../minipeg -o calc.leg.c calc.leg

-	$(CC) $(CFLAGS) -o calc calc.leg.c

+	../minipeg -o calc.peg.c calc.peg

+	$(CC) $(CFLAGS) -o calc calc.peg.c

 	echo 'a = 6;  b = 7;  a * b' | ./calc | $(TEE) $@.out

 	$(DIFF) $@.ref $@.out

 	rm -f $@.out

@@ -64,32 +64,24 @@

 	@echo

 basic : .FORCE

-	../minipeg -o basic.leg.c basic.leg

-	$(CC) $(CFLAGS) -o basic basic.leg.c

+	../minipeg -o basic.peg.c basic.peg

+	$(CC) $(CFLAGS) -o basic basic.peg.c

 	( echo 'load "test"'; echo "run" ) | ./basic | $(TEE) $@.out

 	$(DIFF) $@.ref $@.out

 	rm -f $@.out

 	@echo

-localpeg : .FORCE

-	../minipeg -o test.leg.c test.leg

-	$(CC) $(CFLAGS) -o localpeg localpeg.c

-	echo 'ab.ac.ad.ae.afg.afh.afg.afh.afi.afj.' | ./$@ | $(TEE) $@.out

+local : .FORCE

+	../minipeg -o local.peg.c local.peg

+	$(CC) $(CFLAGS) -o local local.peg.c

+	./$@ < local.peg | $(TEE) $@.out

 	$(DIFF) $@.ref $@.out

 	rm -f $@.out

 	@echo

-localleg : .FORCE

-	../minipeg -o localleg.leg.c localleg.leg

-	$(CC) $(CFLAGS) -o localleg localleg.leg.c

-	./$@ < localleg.leg | $(TEE) $@.out

-	$(DIFF) $@.ref $@.out

-	rm -f $@.out

-	@echo

 erract : .FORCE

-	../minipeg -o erract.leg.c erract.leg

-	$(CC) $(CFLAGS) -o erract erract.leg.c

+	../minipeg -o erract.peg.c erract.peg

+	$(CC) $(CFLAGS) -o erract erract.peg.c

 	echo '6*9' | ./$@ | $(TEE) $@.out

 	$(DIFF) $@.ref $@.out

 	rm -f $@.out

@@ -96,6 +88,6 @@

 	@echo

 clean : .FORCE

-	rm -f *.o *.leg.c $(EXAMPLES)

+	rm -f *.o *.peg.c $(EXAMPLES)

 .FORCE :

--- a/examples/accept.c

+++ b/examples/accept.c

@@ -1,7 +1,7 @@

 #include <stdio.h>

 #include <stdlib.h>

-#include "accept.leg.c"

+#include "accept.peg.c"

 int main()

--- a/examples/accept.leg

+++ /dev/null

@@ -1,8 +1,0 @@

-start	= abcd+

-abcd	= 'a' { printf("A %d\n", yypos); } bc { printf("ABC %d\n", yypos); } &{YYACCEPT}

-	 | 'b' { printf("B %d\n", yypos); } cd { printf("BCD %d\n", yypos); } &{YYACCEPT}

-bc	= 'b' { printf("B %d\n", yypos); } 'c' { printf("C %d\n", yypos); }

-cd	= 'c' { printf("C %d\n", yypos); } 'd' { printf("D %d\n", yypos); }

\ No newline at end of file

--- /dev/null

+++ b/examples/accept.peg

@@ -1,0 +1,8 @@

+start	= abcd+

+abcd	= 'a' { printf("A %d\n", yypos); } bc { printf("ABC %d\n", yypos); } &{YYACCEPT}

+	 | 'b' { printf("B %d\n", yypos); } cd { printf("BCD %d\n", yypos); } &{YYACCEPT}

+bc	= 'b' { printf("B %d\n", yypos); } 'c' { printf("C %d\n", yypos); }

+cd	= 'c' { printf("C %d\n", yypos); } 'd' { printf("D %d\n", yypos); }

\ No newline at end of file

--- a/examples/basic.leg

+++ /dev/null

@@ -1,361 +1,0 @@

-# A 'syntax-directed interpreter' (all execution is a side-effect of parsing).

-# Inspired by Dennis Allison's original Tiny BASIC grammar, circa 1975.

-#

-# Copyright (c) 2007 by Ian Piumarta

-# All rights reserved.

-#

-# Permission is hereby granted, free of charge, to any person obtaining a

-# copy of this software and associated documentation files (the 'Software'),

-# to deal in the Software without restriction, including without limitation

-# the rights to use, copy, modify, merge, publish, distribute, and/or sell

-# copies of the Software, and to permit persons to whom the Software is

-# furnished to do so, provided that the above copyright notice(s) and this

-# permission notice appear in all copies of the Software.  Acknowledgement

-# of the use of this Software in supporting documentation would be

-# appreciated but is not required.

-#

-# THE SOFTWARE IS PROVIDED 'AS IS'.  USE ENTIRELY AT YOUR OWN RISK.

-#

-# Last edited: 2012-04-29 15:14:06 by piumarta on emilia

-%{

-# include <stdio.h>

-  typedef struct line line;

-  struct line

-  {

-    int	  number;

-    int	  length;

-    char *text;

-  };

-  line *lines= 0;

-  int   numLines= 0;

-  int   pc= -1, epc= -1;

-  int   batch= 0;

-  int nextline(char *buf, int max);

-# define min(x, y) ((x) < (y) ? (x) : (y))

-# define YY_INPUT(buf, result, max_size)	\

-  {						\

-    if ((pc >= 0) && (pc < numLines))		\

-      {						\

-        line *linep= lines+pc++;		\

-        result= min(max_size, linep->length);	\

-        memcpy(buf, linep->text, result);	\

-      }						\

-    else					\

-      result= nextline(buf, max_size);		\

-  }

-  union value {

-    int		  number;

-    char	 *string;

-    int		(*binop)(int lhs, int rhs);

-  };

-# define YYSTYPE union value

-  int variables[26];

-  void accept(int number, char *line);

-  void save(char *name);

-  void load(char *name);

-  void type(char *name);

-  int lessThan(int lhs, int rhs)	{ return lhs <  rhs; }

-  int lessEqual(int lhs, int rhs)	{ return lhs <= rhs; }

-  int notEqual(int lhs, int rhs)	{ return lhs != rhs; }

-  int equalTo(int lhs, int rhs)		{ return lhs == rhs; }

-  int greaterEqual(int lhs, int rhs)	{ return lhs >= rhs; }

-  int greaterThan(int lhs, int rhs)	{ return lhs >  rhs; }

-  int input(void);

-  int stack[1024], sp= 0;

-  char *help;

-  void error(char *fmt, ...);

-  int findLine(int n, int create);

-%}

-line = - s:statement CR

-|      - n:number < ( !CR . )* CR >			{ accept(n.number, yytext); }

-|      - CR

-|      - < ( !CR . )* CR >				{ epc= pc;  error("syntax error"); }

-|      - !.						{ exit(0); }

-statement = 'print'- expr-list

-|           'if'- e1:expression r:relop e2:expression	{ if (!r.binop(e1.number, e2.number)) yythunkpos= 0; }

-		'then'- statement

-|           'goto'- e:expression			{ epc= pc;  if ((pc= findLine(e.number, 0)) < 0) error("no such line"); }

-|           'input'- var-list

-|           'let'- v:var EQUAL e:expression		{ variables[v.number]= e.number; }

-|           'gosub'- e:expression			{ epc= pc;  if (sp < 1024) stack[sp++]= pc, pc= findLine(e.number, 0); else error("too many gosubs");

-							  if (pc < 0) error("no such line"); }

-|           'return'-					{ epc= pc;  if ((pc= sp ? stack[--sp] : -1) < 0) error("no gosub"); }

-|           'clear'-					{ while (numLines) accept(lines->number, "\n"); }

-|           'list'-					{ int i;  for (i= 0;  i < numLines;  ++i) printf("%5d %s", lines[i].number, lines[i].text); }

-|           'run'- s:string				{ load(s.string);  pc= 0; }

-|           'run'-					{ pc= 0; }

-|           'end'-					{ pc= -1;  if (batch) exit(0); }

-|           'rem'- ( !CR . )*

-|           ('bye'|'quit'|'exit')-			{ exit(0); }

-|           'save'- s:string				{ save(s.string); }

-|           'load'- s:string				{ load(s.string); }

-|           'type'- s:string				{ type(s.string); }

-|           'dir'-					{ system("ls *.bas"); }

-|           'help'-					{ fprintf(stderr, "%s", help); }

-expr-list = ( e:string					{ printf("%s", e.string); }

-            | e:expression				{ printf("%d", e.number); }

-            )? ( COMMA ( e:string			{ printf("%s", e.string); }

-                       | e:expression			{ printf("%d", e.number); }

-                       )

-               )* ( COMMA

-	          | !COMMA				{ printf("\n"); }

-		  )

-var-list = v:var					{ variables[v.number]= input(); }

-           ( COMMA v:var				{ variables[v.number]= input(); }

-           )*

-expression = ( PLUS? l:term

-             | MINUS l:term				{ l.number = -l.number }

-             ) ( PLUS  r:term				{ l.number += r.number }

-               | MINUS r:term				{ l.number -= r.number }

-               )*					{ $$.number = l.number }

-term = l:factor ( STAR  r:factor			{ l.number *= r.number }

-                | SLASH r:factor			{ l.number /= r.number }

-                )*					{ $$.number = l.number }

-factor = v:var						{ $$.number = variables[v.number] }

-|        n:number

-|        OPEN expression CLOSE

-var = < [a-z] > -					{ $$.number = yytext[0] - 'a' }

-number = < digit+ > -					{ $$.number = atoi(yytext); }

-digit = [0-9]

-string = '"' < [^\"]* > '"' -				{ $$.string = yytext; }

-relop = '<=' -						{ $$.binop= lessEqual; }

-|       '<>' -						{ $$.binop= notEqual; }

-|       '<'  -						{ $$.binop= lessThan; }

-|       '>=' -						{ $$.binop= greaterEqual; }

-|       '>'  -						{ $$.binop= greaterThan; }

-|       '='  -						{ $$.binop= equalTo; }

-EQUAL  = '=' -  CLOSE  = ')' -  OPEN   = '(' -

-SLASH  = '/' -  STAR   = '*' -  MINUS  = '-' -

-PLUS   = '+' -  COMMA  = ',' -

-- = [ \t]*

-CR = '\n' | '\r' | '\r\n'

-%%

-#include <unistd.h>

-#include <stdarg.h>

-char *help=

-  "print <num>|<string> [, <num>|<string> ...] [,]\n"

-  "if <expr> <|<=|<>|=|>=|> <expr> then <stmt>\n"

-  "input <var> [, <var> ...]     let <var> = <expr>\n"

-  "goto <expr>                   gosub <expr>\n"

-  "end                           return\n"

-  "list                          clear\n"

-  "run [\"filename\"]              rem <comment...>\n"

-  "dir                           type \"filename\"\n"

-  "save \"filename\"               load \"filename\"\n"

-  "bye|quit|exit                 help\n"

-  ;

-void error(char *fmt, ...)

-{

-  va_list ap;

-  va_start(ap, fmt);

-  if (epc > 0)

-    fprintf(stderr, "\nline %d: %s", lines[epc-1].number, lines[epc-1].text);

-  else

-    fprintf(stderr, "\n");

-  vfprintf(stderr, fmt, ap);

-  fprintf(stderr, "\n");

-  va_end(ap);

-  epc= pc= -1;

-}

-#ifdef USE_READLINE

-# include <readline/readline.h>

-# include <readline/history.h>

-#endif

-int nextline(char *buf, int max)

-{

-  pc= -1;

-  if (batch) exit(0);

-  if (isatty(fileno(stdin)))

-    {

-#    ifdef USE_READLINE

-      char *line= readline(">");

-      if (line)

-	{

-	  int len= strlen(line);

-	  if (len >= max) len= max - 1;

-	  strncpy(buf, line, len);

-	  (buf)[len]= '\n';

-	  add_history(line);

-	  free(line);

-	  return len + 1;

-	}

-      else

-	{

-	  printf("\n");

-	  return 0;

-	}

-#    endif

-      putchar('>');

-      fflush(stdout);

-    }

-  return fgets(buf, max, stdin) ? strlen(buf) : 0;

-}

-int maxLines= 0;

-int findLine(int n, int create)

-{

-  int lo= 0, hi= numLines - 1;

-  while (lo <= hi)

-    {

-      int mid= (lo + hi) / 2, lno= lines[mid].number;

-      if (lno > n)

-	hi= mid - 1;

-      else if (lno < n)

-	lo= mid + 1;

-      else

-	return mid;

-    }

-  if (create)

-    {

-      if (numLines == maxLines)

-	{

-	  maxLines *= 2;

-	  lines= realloc(lines, sizeof(line) * maxLines);

-	}

-      if (lo < numLines)

-	memmove(lines + lo + 1, lines + lo, sizeof(line) * (numLines - lo));

-      ++numLines;

-      lines[lo].number= n;

-      lines[lo].text= 0;

-      return lo;

-    }

-  return -1;

-}

-void accept(int n, char *s)

-{

-  if (s[0] < 32)	/* delete */

-    {

-      int lno= findLine(n, 0);

-      if (lno >= 0)

-	{

-	  if (lno < numLines - 1)

-	    memmove(lines + lno, lines + lno + 1, sizeof(line) * (numLines - lno - 1));

-	  --numLines;

-	}

-    }

-  else			/* insert */

-    {

-      int lno= findLine(n, 1);

-      if (lines[lno].text) free(lines[lno].text);

-      lines[lno].length= strlen(s);

-      lines[lno].text= strdup(s);

-    }

-}

-char *extend(char *name)

-{

-  static char path[1024];

-  int len= strlen(name);

-  sprintf(path, "%s%s", name, (((len > 4) && !strcasecmp(".bas", name + len - 4)) ? "" : ".bas"));

-  return path;

-}

-void save(char *name)

-{

-  FILE *f= fopen(name= extend(name), "w");

-  if (!f)

-    perror(name);

-  else

-    {

-      int i;

-      for (i= 0;  i < numLines;  ++i)

-	fprintf(f, "%d %s", lines[i].number, lines[i].text);

-      fclose(f);

-    }

-}

-void load(char *name)

-{

-  FILE *f= fopen(name= extend(name), "r");

-  if (!f)

-    perror(name);

-  else

-    {

-      int  lineNumber;

-      char lineText[1024];

-      while ((1 == fscanf(f, " %d ", &lineNumber)) && fgets(lineText, sizeof(lineText), f))

-	accept(lineNumber, lineText);

-      fclose(f);

-    }

-}

-void type(char *name)

-{

-  FILE *f= fopen(name= extend(name), "r");

-  if (!f)

-    perror(name);

-  else

-    {

-      int  c, d;

-      while ((c= getc(f)) >= 0)

-	putchar(d= c);

-      fclose(f);

-      if ('\n' != d && '\r' != d) putchar('\n');

-    }

-}

-int input(void)

-{

-  char line[32];

-  fgets(line, sizeof(line), stdin);

-  return atoi(line);

-}

-int main(int argc, char **argv)

-{

-  lines= malloc(sizeof(line) * (maxLines= 32));

-  numLines= 0;

-  if (argc > 1)

-    {

-      batch= 1;

-      while (argc-- > 1)

-	load(*++argv);

-      pc= 0;

-    }

-  while (!feof(stdin))

-    yyparse();

-  return 0;

-}

--- /dev/null

+++ b/examples/basic.peg

@@ -1,0 +1,361 @@

+# A 'syntax-directed interpreter' (all execution is a side-effect of parsing).

+# Inspired by Dennis Allison's original Tiny BASIC grammar, circa 1975.

+#

+# Copyright (c) 2007 by Ian Piumarta

+# All rights reserved.

+#

+# Permission is hereby granted, free of charge, to any person obtaining a

+# copy of this software and associated documentation files (the 'Software'),

+# to deal in the Software without restriction, including without limitation

+# the rights to use, copy, modify, merge, publish, distribute, and/or sell

+# copies of the Software, and to permit persons to whom the Software is

+# furnished to do so, provided that the above copyright notice(s) and this

+# permission notice appear in all copies of the Software.  Acknowledgement

+# of the use of this Software in supporting documentation would be

+# appreciated but is not required.

+#

+# THE SOFTWARE IS PROVIDED 'AS IS'.  USE ENTIRELY AT YOUR OWN RISK.

+#

+# Last edited: 2012-04-29 15:14:06 by piumarta on emilia

+%{

+# include <stdio.h>

+  typedef struct line line;

+  struct line

+  {

+    int	  number;

+    int	  length;

+    char *text;

+  };

+  line *lines= 0;

+  int   numLines= 0;

+  int   pc= -1, epc= -1;

+  int   batch= 0;

+  int nextline(char *buf, int max);

+# define min(x, y) ((x) < (y) ? (x) : (y))

+# define YY_INPUT(buf, result, max_size)	\

+  {						\

+    if ((pc >= 0) && (pc < numLines))		\

+      {						\

+        line *linep= lines+pc++;		\

+        result= min(max_size, linep->length);	\

+        memcpy(buf, linep->text, result);	\

+      }						\

+    else					\

+      result= nextline(buf, max_size);		\

+  }

+  union value {

+    int		  number;

+    char	 *string;

+    int		(*binop)(int lhs, int rhs);

+  };

+# define YYSTYPE union value

+  int variables[26];

+  void accept(int number, char *line);

+  void save(char *name);

+  void load(char *name);

+  void type(char *name);

+  int lessThan(int lhs, int rhs)	{ return lhs <  rhs; }

+  int lessEqual(int lhs, int rhs)	{ return lhs <= rhs; }

+  int notEqual(int lhs, int rhs)	{ return lhs != rhs; }

+  int equalTo(int lhs, int rhs)		{ return lhs == rhs; }

+  int greaterEqual(int lhs, int rhs)	{ return lhs >= rhs; }

+  int greaterThan(int lhs, int rhs)	{ return lhs >  rhs; }

+  int input(void);

+  int stack[1024], sp= 0;

+  char *help;

+  void error(char *fmt, ...);

+  int findLine(int n, int create);

+%}

+line = - s:statement CR

+|      - n:number < ( !CR . )* CR >			{ accept(n.number, yytext); }

+|      - CR

+|      - < ( !CR . )* CR >				{ epc= pc;  error("syntax error"); }

+|      - !.						{ exit(0); }

+statement = 'print'- expr-list

+|           'if'- e1:expression r:relop e2:expression	{ if (!r.binop(e1.number, e2.number)) yythunkpos= 0; }

+		'then'- statement

+|           'goto'- e:expression			{ epc= pc;  if ((pc= findLine(e.number, 0)) < 0) error("no such line"); }

+|           'input'- var-list

+|           'let'- v:var EQUAL e:expression		{ variables[v.number]= e.number; }

+|           'gosub'- e:expression			{ epc= pc;  if (sp < 1024) stack[sp++]= pc, pc= findLine(e.number, 0); else error("too many gosubs");

+							  if (pc < 0) error("no such line"); }

+|           'return'-					{ epc= pc;  if ((pc= sp ? stack[--sp] : -1) < 0) error("no gosub"); }

+|           'clear'-					{ while (numLines) accept(lines->number, "\n"); }

+|           'list'-					{ int i;  for (i= 0;  i < numLines;  ++i) printf("%5d %s", lines[i].number, lines[i].text); }

+|           'run'- s:string				{ load(s.string);  pc= 0; }

+|           'run'-					{ pc= 0; }

+|           'end'-					{ pc= -1;  if (batch) exit(0); }

+|           'rem'- ( !CR . )*

+|           ('bye'|'quit'|'exit')-			{ exit(0); }

+|           'save'- s:string				{ save(s.string); }

+|           'load'- s:string				{ load(s.string); }

+|           'type'- s:string				{ type(s.string); }

+|           'dir'-					{ system("ls *.bas"); }

+|           'help'-					{ fprintf(stderr, "%s", help); }

+expr-list = ( e:string					{ printf("%s", e.string); }

+            | e:expression				{ printf("%d", e.number); }

+            )? ( COMMA ( e:string			{ printf("%s", e.string); }

+                       | e:expression			{ printf("%d", e.number); }

+                       )

+               )* ( COMMA

+	          | !COMMA				{ printf("\n"); }

+		  )

+var-list = v:var					{ variables[v.number]= input(); }

+           ( COMMA v:var				{ variables[v.number]= input(); }

+           )*

+expression = ( PLUS? l:term

+             | MINUS l:term				{ l.number = -l.number }

+             ) ( PLUS  r:term				{ l.number += r.number }

+               | MINUS r:term				{ l.number -= r.number }

+               )*					{ $$.number = l.number }

+term = l:factor ( STAR  r:factor			{ l.number *= r.number }

+                | SLASH r:factor			{ l.number /= r.number }

+                )*					{ $$.number = l.number }

+factor = v:var						{ $$.number = variables[v.number] }

+|        n:number

+|        OPEN expression CLOSE

+var = < [a-z] > -					{ $$.number = yytext[0] - 'a' }

+number = < digit+ > -					{ $$.number = atoi(yytext); }

+digit = [0-9]

+string = '"' < [^\"]* > '"' -				{ $$.string = yytext; }

+relop = '<=' -						{ $$.binop= lessEqual; }

+|       '<>' -						{ $$.binop= notEqual; }

+|       '<'  -						{ $$.binop= lessThan; }

+|       '>=' -						{ $$.binop= greaterEqual; }

+|       '>'  -						{ $$.binop= greaterThan; }

+|       '='  -						{ $$.binop= equalTo; }

+EQUAL  = '=' -  CLOSE  = ')' -  OPEN   = '(' -

+SLASH  = '/' -  STAR   = '*' -  MINUS  = '-' -

+PLUS   = '+' -  COMMA  = ',' -

+- = [ \t]*

+CR = '\n' | '\r' | '\r\n'

+%%

+#include <unistd.h>

+#include <stdarg.h>

+char *help=

+  "print <num>|<string> [, <num>|<string> ...] [,]\n"

+  "if <expr> <|<=|<>|=|>=|> <expr> then <stmt>\n"

+  "input <var> [, <var> ...]     let <var> = <expr>\n"

+  "goto <expr>                   gosub <expr>\n"

+  "end                           return\n"

+  "list                          clear\n"

+  "run [\"filename\"]              rem <comment...>\n"

+  "dir                           type \"filename\"\n"

+  "save \"filename\"               load \"filename\"\n"

+  "bye|quit|exit                 help\n"

+  ;

+void error(char *fmt, ...)

+{

+  va_list ap;

+  va_start(ap, fmt);

+  if (epc > 0)

+    fprintf(stderr, "\nline %d: %s", lines[epc-1].number, lines[epc-1].text);

+  else

+    fprintf(stderr, "\n");

+  vfprintf(stderr, fmt, ap);

+  fprintf(stderr, "\n");

+  va_end(ap);

+  epc= pc= -1;

+}

+#ifdef USE_READLINE

+# include <readline/readline.h>

+# include <readline/history.h>

+#endif

+int nextline(char *buf, int max)

+{

+  pc= -1;

+  if (batch) exit(0);

+  if (isatty(fileno(stdin)))

+    {

+#    ifdef USE_READLINE

+      char *line= readline(">");

+      if (line)

+	{

+	  int len= strlen(line);

+	  if (len >= max) len= max - 1;

+	  strncpy(buf, line, len);

+	  (buf)[len]= '\n';

+	  add_history(line);

+	  free(line);

+	  return len + 1;

+	}

+      else

+	{

+	  printf("\n");

+	  return 0;

+	}

+#    endif

+      putchar('>');

+      fflush(stdout);

+    }

+  return fgets(buf, max, stdin) ? strlen(buf) : 0;

+}

+int maxLines= 0;

+int findLine(int n, int create)

+{

+  int lo= 0, hi= numLines - 1;

+  while (lo <= hi)

+    {

+      int mid= (lo + hi) / 2, lno= lines[mid].number;

+      if (lno > n)

+	hi= mid - 1;

+      else if (lno < n)

+	lo= mid + 1;

+      else

+	return mid;

+    }

+  if (create)

+    {

+      if (numLines == maxLines)

+	{

+	  maxLines *= 2;

+	  lines= realloc(lines, sizeof(line) * maxLines);

+	}

+      if (lo < numLines)

+	memmove(lines + lo + 1, lines + lo, sizeof(line) * (numLines - lo));

+      ++numLines;

+      lines[lo].number= n;

+      lines[lo].text= 0;

+      return lo;

+    }

+  return -1;

+}

+void accept(int n, char *s)

+{

+  if (s[0] < 32)	/* delete */

+    {

+      int lno= findLine(n, 0);

+      if (lno >= 0)

+	{

+	  if (lno < numLines - 1)

+	    memmove(lines + lno, lines + lno + 1, sizeof(line) * (numLines - lno - 1));

+	  --numLines;

+	}

+    }

+  else			/* insert */

+    {

+      int lno= findLine(n, 1);

+      if (lines[lno].text) free(lines[lno].text);

+      lines[lno].length= strlen(s);

+      lines[lno].text= strdup(s);

+    }

+}

+char *extend(char *name)

+{

+  static char path[1024];

+  int len= strlen(name);

+  sprintf(path, "%s%s", name, (((len > 4) && !strcasecmp(".bas", name + len - 4)) ? "" : ".bas"));

+  return path;

+}

+void save(char *name)

+{

+  FILE *f= fopen(name= extend(name), "w");

+  if (!f)

+    perror(name);

+  else

+    {

+      int i;

+      for (i= 0;  i < numLines;  ++i)

+	fprintf(f, "%d %s", lines[i].number, lines[i].text);

+      fclose(f);

+    }

+}

+void load(char *name)

+{

+  FILE *f= fopen(name= extend(name), "r");

+  if (!f)

+    perror(name);

+  else

+    {

+      int  lineNumber;

+      char lineText[1024];

+      while ((1 == fscanf(f, " %d ", &lineNumber)) && fgets(lineText, sizeof(lineText), f))

+	accept(lineNumber, lineText);

+      fclose(f);

+    }

+}

+void type(char *name)

+{

+  FILE *f= fopen(name= extend(name), "r");

+  if (!f)

+    perror(name);

+  else

+    {

+      int  c, d;

+      while ((c= getc(f)) >= 0)

+	putchar(d= c);

+      fclose(f);

+      if ('\n' != d && '\r' != d) putchar('\n');

+    }

+}

+int input(void)

+{

+  char line[32];

+  fgets(line, sizeof(line), stdin);

+  return atoi(line);

+}

+int main(int argc, char **argv)

+{

+  lines= malloc(sizeof(line) * (maxLines= 32));

+  numLines= 0;

+  if (argc > 1)

+    {

+      batch= 1;

+      while (argc-- > 1)

+	load(*++argv);

+      pc= 0;

+    }

+  while (!feof(stdin))

+    yyparse();

+  return 0;

+}

--- a/examples/calc.leg

+++ /dev/null

@@ -1,46 +1,0 @@

-%{

-#include <stdio.h>

-int vars[26];

-%}

-Stmt	= - e:Expr EOL			{ printf("%d\n", e); }

-	| ( !EOL . )* EOL		{ printf("error\n"); }

-Expr	= i:ID ASSIGN s:Sum		{ $$= vars[i]= s; }

-	| s:Sum				{ $$= s; }

-Sum	= l:Product

-		( PLUS  r:Product	{ l += r; }

-		| MINUS r:Product	{ l -= r; }

-		)*			{ $$= l; }

-Product	= l:Value

-		( TIMES  r:Value	{ l *= r; }

-		| DIVIDE r:Value	{ l /= r; }

-		)*			{ $$= l; }

-Value	= i:NUMBER			{ $$= atoi(yytext); }

-	| i:ID !ASSIGN			{ $$= vars[i]; }

-	| OPEN i:Expr CLOSE		{ $$= i; }

-NUMBER	= < [0-9]+ >	-		{ $$= atoi(yytext); }

-ID	= < [a-z]  >	-		{ $$= yytext[0] - 'a'; }

-ASSIGN	= '='		-

-PLUS	= '+'		-

-MINUS	= '-'		-

-TIMES	= '*'		-

-DIVIDE	= '/'		-

-OPEN	= '('		-

-CLOSE	= ')'		-

--	= [ \t]*

-EOL	= '\n' | '\r\n' | '\r' | ';'

-%%

-int main()

-{

-  while (yyparse());

-  return 0;

-}

--- /dev/null

+++ b/examples/calc.peg

@@ -1,0 +1,46 @@

+%{

+#include <stdio.h>

+int vars[26];

+%}

+Stmt	= - e:Expr EOL			{ printf("%d\n", e); }

+	| ( !EOL . )* EOL		{ printf("error\n"); }

+Expr	= i:ID ASSIGN s:Sum		{ $$= vars[i]= s; }

+	| s:Sum				{ $$= s; }

+Sum	= l:Product

+		( PLUS  r:Product	{ l += r; }

+		| MINUS r:Product	{ l -= r; }

+		)*			{ $$= l; }

+Product	= l:Value

+		( TIMES  r:Value	{ l *= r; }

+		| DIVIDE r:Value	{ l /= r; }

+		)*			{ $$= l; }

+Value	= i:NUMBER			{ $$= atoi(yytext); }

+	| i:ID !ASSIGN			{ $$= vars[i]; }

+	| OPEN i:Expr CLOSE		{ $$= i; }

+NUMBER	= < [0-9]+ >	-		{ $$= atoi(yytext); }

+ID	= < [a-z]  >	-		{ $$= yytext[0] - 'a'; }

+ASSIGN	= '='		-

+PLUS	= '+'		-

+MINUS	= '-'		-

+TIMES	= '*'		-

+DIVIDE	= '/'		-

+OPEN	= '('		-

+CLOSE	= ')'		-

+-	= [ \t]*

+EOL	= '\n' | '\r\n' | '\r' | ';'

+%%

+int main()

+{

+  while (yyparse());

+  return 0;

+}

--- a/examples/dc.c

+++ b/examples/dc.c

@@ -7,7 +7,7 @@

 int push(int n)	{ return stack[++stackp]= n; }

 int pop(void)	{ return stack[stackp--]; }

-#include "dc.leg.c"

+#include "dc.peg.c"

 int main()

--- a/examples/dc.leg

+++ /dev/null

@@ -1,27 +1,0 @@

-# Grammar

-Expr	= SPACE Sum EOL		{ printf("%d\n", pop()); }

-	 | (!EOL .)* EOL		{ printf("error\n"); }

-Sum	= Product ( PLUS  Product	{ int r= pop(), l= pop();  push(l + r); }

-		   | MINUS Product	{ int r= pop(), l= pop();  push(l - r); }

-		   )*

-Product	= Value ( TIMES  Value		{ int r= pop(), l= pop();  push(l * r); }

-		 | DIVIDE Value		{ int r= pop(), l= pop();  push(l / r); }

-		 )*

-Value	= NUMBER			{ push(atoi(yytext)); }

-	 | OPEN Sum CLOSE

-# Lexemes

-NUMBER	= < [0-9]+ > SPACE

-PLUS	= '+' SPACE

-MINUS	= '-' SPACE

-TIMES	= '*' SPACE

-DIVIDE	= '/' SPACE

-OPEN	= '(' SPACE

-CLOSE	= ')' SPACE

-SPACE	= [ \t]*

-EOL	= '\n' | '\r\n' | '\r'

--- /dev/null

+++ b/examples/dc.peg

@@ -1,0 +1,27 @@

+# Grammar

+Expr	= SPACE Sum EOL		{ printf("%d\n", pop()); }

+	 | (!EOL .)* EOL		{ printf("error\n"); }

+Sum	= Product ( PLUS  Product	{ int r= pop(), l= pop();  push(l + r); }

+		   | MINUS Product	{ int r= pop(), l= pop();  push(l - r); }

+		   )*

+Product	= Value ( TIMES  Value		{ int r= pop(), l= pop();  push(l * r); }

+		 | DIVIDE Value		{ int r= pop(), l= pop();  push(l / r); }

+		 )*

+Value	= NUMBER			{ push(atoi(yytext)); }

+	 | OPEN Sum CLOSE

+# Lexemes

+NUMBER	= < [0-9]+ > SPACE

+PLUS	= '+' SPACE

+MINUS	= '-' SPACE

+TIMES	= '*' SPACE

+DIVIDE	= '/' SPACE

+OPEN	= '(' SPACE

+CLOSE	= ')' SPACE

+SPACE	= [ \t]*

+EOL	= '\n' | '\r\n' | '\r'

--- a/examples/dcv.c

+++ b/examples/dcv.c

@@ -10,7 +10,7 @@

 int pop(void)	{ return stack[stackp--]; }

 int top(void)	{ return stack[stackp]; }

-#include "dcv.leg.c"

+#include "dcv.peg.c"

 int main()

--- a/examples/dcv.leg

+++ /dev/null

@@ -1,32 +1,0 @@

-Stmt	= SPACE Expr EOL			{ printf("%d\n", pop()); }

-	 | (!EOL .)* EOL			{ printf("error\n"); }

-Expr	= ID { var= yytext[0] } ASSIGN Sum	{ vars[var - 'a']= top(); }

-	 | Sum

-Sum	= Product ( PLUS  Product		{ int r= pop(), l= pop();  push(l + r); }

-		   | MINUS Product		{ int r= pop(), l= pop();  push(l - r); }

-		   )*

-Product	= Value ( TIMES  Value			{ int r= pop(), l= pop();  push(l * r); }

-                 | DIVIDE Value			{ int r= pop(), l= pop();  push(l / r); }

-		 )*

-Value	= NUMBER				{ push(atoi(yytext)); }

-	 | < ID > !ASSIGN			{ push(vars[yytext[0] - 'a']); }

-	 | OPEN Expr CLOSE

-NUMBER	= < [0-9]+ >	SPACE

-ID	= < [a-z]  >	SPACE

-ASSIGN	= '='		SPACE

-PLUS	= '+'		SPACE

-MINUS	= '-'		SPACE

-TIMES	= '*'		SPACE

-DIVIDE	= '/'		SPACE

-OPEN	= '('		SPACE

-CLOSE	= ')'		SPACE

-SPACE	= [ \t]*

-EOL	= '\n' | '\r\n' | '\r' | ';'

--- /dev/null

+++ b/examples/dcv.peg

@@ -1,0 +1,32 @@

+Stmt	= SPACE Expr EOL			{ printf("%d\n", pop()); }

+	 | (!EOL .)* EOL			{ printf("error\n"); }

+Expr	= ID { var= yytext[0] } ASSIGN Sum	{ vars[var - 'a']= top(); }

+	 | Sum

+Sum	= Product ( PLUS  Product		{ int r= pop(), l= pop();  push(l + r); }

+		   | MINUS Product		{ int r= pop(), l= pop();  push(l - r); }

+		   )*

+Product	= Value ( TIMES  Value			{ int r= pop(), l= pop();  push(l * r); }

+                 | DIVIDE Value			{ int r= pop(), l= pop();  push(l / r); }

+		 )*

+Value	= NUMBER				{ push(atoi(yytext)); }

+	 | < ID > !ASSIGN			{ push(vars[yytext[0] - 'a']); }

+	 | OPEN Expr CLOSE

+NUMBER	= < [0-9]+ >	SPACE

+ID	= < [a-z]  >	SPACE

+ASSIGN	= '='		SPACE

+PLUS	= '+'		SPACE

+MINUS	= '-'		SPACE

+TIMES	= '*'		SPACE

+DIVIDE	= '/'		SPACE

+OPEN	= '('		SPACE

+CLOSE	= ')'		SPACE

+SPACE	= [ \t]*

+EOL	= '\n' | '\r\n' | '\r' | ';'

--- a/examples/erract.leg

+++ /dev/null

@@ -1,27 +1,0 @@

-%{

-#include <stdio.h>

-%}

-Expr	=   a:NUMBER PLUS   ~{ printf("fail at PLUS\n") } b:NUMBER { printf("got addition\n"); }

-	| ( a:NUMBER MINUS  				  b:NUMBER { printf("got subtraction\n"); } ) ~{ printf("fail at subtraction\n") }

-	|   a:NUMBER TIMES  				  b:NUMBER { printf("got multiplication\n"); }

-	|   a:NUMBER DIVIDE 				  b:NUMBER { printf("got division\n"); }

-NUMBER	= < [0-9]+ >	-					   { $$= atoi(yytext); }

-PLUS	= '+'		-

-MINUS	= '-'		-

-TIMES	= '*'		-

-DIVIDE	= '/'		-

--	= (SPACE | EOL)*

-SPACE	= [ \t]

-EOL	= '\n' | '\r\n' | '\r' | ';'

-%%

-int main()

-{

-  while (yyparse());

-  return 0;

-}

--- /dev/null

+++ b/examples/erract.peg

@@ -1,0 +1,27 @@

+%{

+#include <stdio.h>

+%}

+Expr	=   a:NUMBER PLUS   ~{ printf("fail at PLUS\n") } b:NUMBER { printf("got addition\n"); }

+	| ( a:NUMBER MINUS  				  b:NUMBER { printf("got subtraction\n"); } ) ~{ printf("fail at subtraction\n") }

+	|   a:NUMBER TIMES  				  b:NUMBER { printf("got multiplication\n"); }

+	|   a:NUMBER DIVIDE 				  b:NUMBER { printf("got division\n"); }

+NUMBER	= < [0-9]+ >	-					   { $$= atoi(yytext); }

+PLUS	= '+'		-

+MINUS	= '-'		-

+TIMES	= '*'		-

+DIVIDE	= '/'		-

+-	= (SPACE | EOL)*

+SPACE	= [ \t]

+EOL	= '\n' | '\r\n' | '\r' | ';'

+%%

+int main()

+{

+  while (yyparse());

+  return 0;

+}

--- a/examples/left.c

+++ b/examples/left.c

@@ -7,7 +7,7 @@

   if (EOF != c) printf("<%c>\n", c);			\

-#include "left.leg.c"

+#include "left.peg.c"

 int main()

--- a/examples/left.leg

+++ /dev/null

@@ -1,2 +1,0 @@

-S	= (S 'a' | 'a') !'a'

--- /dev/null

+++ b/examples/left.peg

@@ -1,0 +1,2 @@

+S	= (S 'a' | 'a') !'a'

--- /dev/null

+++ b/examples/local.peg

@@ -1,0 +1,24 @@

+%{

+#define YY_CTX_LOCAL 1

+#define YY_CTX_MEMBERS \

+  int count;

+%}

+Char	= ('\n' | '\r\n' | '\r')	{ yy->count++ }

+	| .

+%%

+#include <stdio.h>

+#include <string.h>

+int main()

+{

+    yycontext yy;

+    memset(&yy, 0, sizeof(yy));

+    while (yyparse(&yy))

+	;

+    printf("%d newlines\n", yy.count);

+    yyrelease(&yy);

+    return 0;

+}

--- /dev/null

+++ b/examples/local.ref

@@ -1,0 +1,1 @@

+24 newlines

--- a/examples/localleg.leg

+++ /dev/null

@@ -1,24 +1,0 @@

-%{

-#define YY_CTX_LOCAL 1

-#define YY_CTX_MEMBERS \

-  int count;

-%}

-Char	= ('\n' | '\r\n' | '\r')	{ yy->count++ }

-	| .

-%%

-#include <stdio.h>

-#include <string.h>

-int main()

-{

-    yycontext yy;

-    memset(&yy, 0, sizeof(yy));

-    while (yyparse(&yy))

-	;

-    printf("%d newlines\n", yy.count);

-    yyrelease(&yy);

-    return 0;

-}

--- a/examples/localleg.ref

+++ /dev/null

@@ -1,1 +1,0 @@

-24 newlines

--- a/examples/rule.c

+++ b/examples/rule.c

@@ -1,7 +1,7 @@

 #include <stdio.h>

 #include <stdlib.h>

-#include "rule.leg.c"

+#include "rule.peg.c"

 int main()

--- a/examples/rule.leg

+++ /dev/null

@@ -1,8 +1,0 @@

-start	= abcd+

-abcd	= 'a' { printf("A %d\n", yypos); } bc { printf("ABC %d\n", yypos); }

-	 | 'b' { printf("B %d\n", yypos); } cd { printf("BCD %d\n", yypos); };

-bc	= 'b' { printf("B %d\n", yypos); } 'c' { printf("C %d\n", yypos); };

-cd	= 'c' { printf("C %d\n", yypos); } 'd' { printf("D %d\n", yypos); };

--- /dev/null

+++ b/examples/rule.peg

@@ -1,0 +1,8 @@

+start	= abcd+

+abcd	= 'a' { printf("A %d\n", yypos); } bc { printf("ABC %d\n", yypos); }

+	 | 'b' { printf("B %d\n", yypos); } cd { printf("BCD %d\n", yypos); };

+bc	= 'b' { printf("B %d\n", yypos); } 'c' { printf("C %d\n", yypos); };

+cd	= 'c' { printf("C %d\n", yypos); } 'd' { printf("D %d\n", yypos); };

--- a/examples/test.leg

+++ /dev/null

@@ -1,13 +1,0 @@

-start = body '.' { printf(".\n"); };

-body  = 'a' { printf("a1 "); } 'b' { printf("ab1 "); }

-	 | 'a' { printf("a2 "); }   'c'  { printf("ac2 "); }

-	 | 'a' { printf("a3 "); } ( 'd'  { printf("ad3 "); } | 'e'  { printf("ae3 "); } )

-	 | 'a' { printf("a4 "); } ( 'f'  { printf("af4 "); }   'g'  { printf("afg4 "); }

-				  | 'f'  { printf("af5 "); }   'h'  { printf("afh5 "); } )

-	 | 'a' { printf("a6 "); } ( 'f' &{ printf("af6 ")  }   'i' &{ printf("afi6 ") }

-				  | 'f' &{ printf("af7 ")  }   'j' &{ printf("afj7 ") } );

--- /dev/null

+++ b/examples/test.peg

@@ -1,0 +1,13 @@

+start = body '.' { printf(".\n"); };

+body  = 'a' { printf("a1 "); } 'b' { printf("ab1 "); }

+	 | 'a' { printf("a2 "); }   'c'  { printf("ac2 "); }

+	 | 'a' { printf("a3 "); } ( 'd'  { printf("ad3 "); } | 'e'  { printf("ae3 "); } )

+	 | 'a' { printf("a4 "); } ( 'f'  { printf("af4 "); }   'g'  { printf("afg4 "); }

+				  | 'f'  { printf("af5 "); }   'h'  { printf("afh5 "); } )

+	 | 'a' { printf("a6 "); } ( 'f' &{ printf("af6 ")  }   'i' &{ printf("afi6 ") }

+				  | 'f' &{ printf("af7 ")  }   'j' &{ printf("afj7 ") } );

--- a/examples/username.leg

+++ /dev/null

@@ -1,14 +1,0 @@

-%{

-#include <unistd.h>

-%}

-start =	"username"	{ printf("%s", getlogin()); }

-|	< . >		{ putchar(yytext[0]); }

-%%

-int main()

-{

-  while (yyparse());

-  return 0;

-}

--- /dev/null

+++ b/examples/username.peg

@@ -1,0 +1,14 @@

+%{

+#include <unistd.h>

+%}

+start =	"username"	{ printf("%s", getlogin()); }

+|	< . >		{ putchar(yytext[0]); }

+%%

+int main()

+{

+  while (yyparse());

+  return 0;

+}

--- a/examples/wc.leg

+++ /dev/null

@@ -1,22 +1,0 @@

-%{

-#include <stdio.h>

-int lines= 0, words= 0, chars= 0;

-%}

-start	= (line | word | char)

-line	= < (( '\n' '\r'* ) | ( '\r' '\n'* )) >	{ lines++;  chars += yyleng; }

-word	= < [a-zA-Z]+ >				{ words++;  chars += yyleng;  printf("<%s>\n", yytext); }

-char	= .					{ chars++; }

-%%

-int main()

-{

-  while (yyparse())

-    ;

-  printf("%d lines\n", lines);

-  printf("%d chars\n", chars);

-  printf("%d words\n", words);

-  return 0;

-}

--- /dev/null

+++ b/examples/wc.peg

@@ -1,0 +1,22 @@

+%{

+#include <stdio.h>

+int lines= 0, words= 0, chars= 0;

+%}

+start	= (line | word | char)

+line	= < (( '\n' '\r'* ) | ( '\r' '\n'* )) >	{ lines++;  chars += yyleng; }

+word	= < [a-zA-Z]+ >				{ words++;  chars += yyleng;  printf("<%s>\n", yytext); }

+char	= .					{ chars++; }

+%%

+int main()

+{

+  while (yyparse())

+    ;

+  printf("%d lines\n", lines);

+  printf("%d chars\n", chars);

+  printf("%d words\n", words);

+  return 0;

+}

--- /dev/null

+++ b/minipeg.1

@@ -1,0 +1,1147 @@

+.\" Copyright (c) 2007,2016 by Ian Piumarta

+.\" All rights reserved.

+.\"

+.\" Permission is hereby granted, free of charge, to any person obtaining a

+.\" copy of this software and associated documentation files (the 'Software'),

+.\" to deal in the Software without restriction, including without limitation

+.\" the rights to use, copy, modify, merge, publish, distribute, and/or sell

+.\" copies of the Software, and to permit persons to whom the Software is

+.\" furnished to do so, provided that the above copyright notice(s) and this

+.\" permission notice appear in all copies of the Software.  Acknowledgement

+.\" of the use of this Software in supporting documentation would be

+.\" appreciated but is not required.

+.\"

+.\" THE SOFTWARE IS PROVIDED 'AS IS'.  USE ENTIRELY AT YOUR OWN RISK.

+.\"

+.\" Last edited: 2016-07-22 09:47:29 by piumarta on zora.local

+.\"

+.TH PEG 1 "September 2013" "Version 0.1"

+.SH NAME

+peg, leg \- parser generators

+.SH SYNOPSIS

+.B peg

+.B [\-hvV \-ooutput]

+.I [filename ...]

+.sp 0

+.B leg

+.B [\-hvV \-ooutput]

+.I [filename ...]

+.SH DESCRIPTION

+.I peg

+and

+.I leg

+are tools for generating recursive\-descent parsers: programs that

+perform pattern matching on text.  They process a Parsing Expression

+Grammar (PEG) [Ford 2004] to produce a program that recognises legal

+sentences of that grammar.

+.I peg

+processes PEGs written using the original syntax described by Ford;

+.I leg

+processes PEGs written using slightly different syntax and conventions

+that are intended to make it an attractive replacement for parsers

+built with

+.IR lex (1)

+and

+.IR yacc (1).

+Unlike

+.I lex

+and

+.IR yacc ,

+.I peg

+and

+.I leg

+support unlimited backtracking, provide ordered choice as a means for

+disambiguation, and can combine scanning (lexical analysis) and

+parsing (syntactic analysis) into a single activity.

+.PP

+.I peg

+reads the specified

+.IR filename s,

+or standard input if no

+.IR filename s

+are given, for a grammar describing the parser to generate.

+.I peg

+then generates a C source file that defines a function

+.IR yyparse().

+This C source file can be included in, or compiled and then linked

+with, a client program.  Each time the client program calls

+.IR yyparse ()

+the parser consumes input text according to the parsing rules,

+starting from the first rule in the grammar.

+.IR yyparse ()

+returns non\-zero if the input could be parsed according to the

+grammar; it returns zero if the input could not be parsed.

+.PP

+The prefix 'yy' or 'YY' is prepended to all externally\-visible symbols

+in the generated parser.  This is intended to reduce the risk of

+namespace pollution in client programs.  (The choice of 'yy' is

+historical; see

+.IR lex (1)

+and

+.IR yacc (1),

+for example.)

+.SH OPTIONS

+.I peg

+and

+.I leg

+provide the following options:

+.TP

+.B \-h

+prints a summary of available options and then exits.

+.TP

+.B \-ooutput

+writes the generated parser to the file

+.B output

+instead of the standard output.

+.TP

+.B \-P

+suppresses #line directives in the output.

+.TP

+.B \-v

+writes verbose information to standard error while working.

+.TP

+.B \-V

+writes version information to standard error then exits.

+.SH A SIMPLE EXAMPLE

+The following

+.I peg

+input specifies a grammar with a single rule (called 'start') that is

+satisfied when the input contains the string "username".

+.nf

+    start <\- "username"

+.fi

+(The quotation marks are

+.I not

+part of the matched text; they serve to indicate a literal string to

+be matched.)  In other words,

+.IR  yyparse ()

+in the generated C source will return non\-zero only if the next eight

+characters read from the input spell the word "username".  If the

+input contains anything else,

+.IR yyparse ()

+returns zero and no input will have been consumed.  (Subsequent calls

+to

+.IR yyparse ()

+will also return zero, since the parser is effectively blocked looking

+for the string "username".)  To ensure progress we can add an

+alternative clause to the 'start' rule that will match any single

+character if "username" is not found.

+.nf

+    start <\- "username"

+           / .

+.fi

+.IR yyparse ()

+now always returns non\-zero (except at the very end of the input).  To

+do something useful we can add actions to the rules.  These actions

+are performed after a complete match is found (starting from the first

+rule) and are chosen according to the 'path' taken through the grammar

+to match the input.  (Linguists would call this path a 'phrase

+marker'.)

+.nf

+    start <\- "username"    { printf("%s\\n", getlogin()); }

+           / < . >         { putchar(yytext[0]); }

+.fi

+The first line instructs the parser to print the user's login name

+whenever it sees "username" in the input.  If that match fails, the

+second line tells the parser to echo the next character on the input

+the standard output.  Our parser is now performing useful work: it

+will copy the input to the output, replacing all occurrences of

+"username" with the user's account name.

+.PP

+Note the angle brackets ('<' and '>') that were added to the second

+alternative.  These have no effect on the meaning of the rule, but

+serve to delimit the text made available to the following action in

+the variable

+.IR yytext .

+.PP

+If the above grammar is placed in the file

+.BR username.peg ,

+running the command

+.nf

+    peg \-o username.c username.peg

+.fi

+will save the corresponding parser in the file

+.BR username.c .

+To create a complete program this parser could be included by a C

+program as follows.

+.nf

+    #include <stdio.h>      /* printf(), putchar() */

+    #include <unistd.h>     /* getlogin() */

+    #include "username.c"   /* yyparse() */

+    int main()

+    {

+      while (yyparse())     /* repeat until EOF */

+        ;

+      return 0;

+    }

+.fi

+.SH PEG GRAMMARS

+A grammar consists of a set of named rules.

+.nf

+    name <\- pattern

+.fi

+The

+.B pattern

+contains one or more of the following elements.

+.TP

+.B name

+The element stands for the entire pattern in the rule with the given

+.BR name .

+.TP

+.BR \(dq characters \(dq

+A character or string enclosed in double quotes is matched literally.

+The ANSI C escape sequences are recognised within the

+.IR characters .

+.TP

+.BR ' characters '

+A character or string enclosed in single quotes is matched literally, as above.

+.TP

+.BR [ characters ]

+A set of characters enclosed in square brackets matches any single

+character from the set, with escape characters recognised as above.

+If the set begins with an uparrow (^) then the set is negated (the

+element matches any character

+.I not

+in the set).  Any pair of characters separated with a dash (\-)

+represents the range of characters from the first to the second,

+inclusive.  A single alphabetic character or underscore is matched by

+the following set.

+.nf

+    [a\-zA\-Z_]

+.fi

+Similarly, the following matches  any single non\-digit character.

+.nf

+    [^0\-9]

+.fi

+.TP

+.B .

+A dot matches any character.  Note that the only time this fails is at

+the end of file, where there is no character to match.

+.TP

+.BR ( \ pattern\  )

+Parentheses are used for grouping (modifying the precedence of the

+operators described below).

+.TP

+.BR { \ action\  }

+Curly braces surround actions.  The action is arbitrary C source code

+to be executed at the end of matching.  Any braces within the action

+must be properly nested.  Any input text that was matched before the

+action and delimited by angle brackets (see below) is made available

+within the action as the contents of the character array

+.IR yytext .

+The length of (number of characters in)

+.I yytext

+is available in the variable

+.IR yyleng .

+(These variable names are historical; see

+.IR lex (1).)

+.TP

+.B <

+An opening angle bracket always matches (consuming no input) and

+causes the parser to begin accumulating matched text.  This text will

+be made available to actions in the variable

+.IR yytext .

+.TP

+.B >

+A closing angle bracket always matches (consuming no input) and causes

+the parser to stop accumulating text for

+.IR yytext .

+.PP

+The above

+.IR element s

+can be made optional and/or repeatable with the following suffixes:

+.TP

+.RB element\  ?

+The element is optional.  If present on the input, it is consumed and

+the match succeeds.  If not present on the input, no text is consumed

+and the match succeeds anyway.

+.TP

+.RB element\  +

+The element is repeatable.  If present on the input, one or more

+occurrences of

+.I element

+are consumed and the match succeeds.  If no occurrences of

+.I element

+are present on the input, the match fails.

+.TP

+.RB element\  *

+The element is optional and repeatable.  If present on the input, one or more

+occurrences of

+.I element

+are consumed and the match succeeds.  If no occurrences of

+.I element

+are present on the input, the match succeeds anyway.

+.PP

+The above elements and suffixes can be converted into predicates (that

+match arbitrary input text and subsequently succeed or fail

+.I without

+consuming that input) with the following prefixes:

+.TP

+.BR & \ element

+The predicate succeeds only if

+.I element

+can be matched.  Input text scanned while matching

+.I element

+is not consumed from the input and remains available for subsequent

+matching.

+.TP

+.BR ! \ element

+The predicate succeeds only if

+.I element

+cannot be matched.  Input text scanned while matching

+.I element

+is not consumed from the input and remains available for subsequent

+matching.  A popular idiom is

+.nf

+    !.

+.fi

+which matches the end of file, after the last character of the input

+has already been consumed.

+.PP

+A special form of the '&' predicate is provided:

+.TP

+.BR & {\ expression\ }

+In this predicate the simple C

+.I expression

+.RB ( not

+statement) is evaluated immediately when the parser reaches the

+predicate.  If the

+.I expression

+yields non\-zero (true) the 'match' succeeds and the parser continues

+with the next element in the pattern.  If the

+.I expression

+yields zero (false) the 'match' fails and the parser backs up to look

+for an alternative parse of the input.

+.PP

+Several elements (with or without prefixes and suffixes) can be

+combined into a

+.I sequence

+by writing them one after the other.  The entire sequence matches only

+if each individual element within it matches, from left to right.

+.PP

+Sequences can be separated into disjoint alternatives by the

+alternation operator '/'.

+.TP

+.RB sequence\-1\  / \ sequence\-2\  / \ ...\  / \ sequence\-N

+Each sequence is tried in turn until one of them matches, at which

+time matching for the overall pattern succeeds.  If none of the

+sequences matches then the match of the overall pattern fails.

+.PP

+Finally, the pound sign (#) introduces a comment (discarded) that

+continues until the end of the line.

+.PP

+To summarise the above, the parser tries to match the input text

+against a pattern containing literals, names (representing other

+rules), and various operators (written as prefixes, suffixes,

+juxtaposition for sequencing and and infix alternation operator) that

+modify how the elements within the pattern are matched.  Matches are

+made from left to right, 'descending' into named sub\-rules as they are

+encountered.  If the matching process fails, the parser 'back tracks'

+('rewinding' the input appropriately in the process) to find the

+nearest alternative 'path' through the grammar.  In other words the

+parser performs a depth\-first, left\-to\-right search for the first

+successfully\-matching path through the rules.  If found, the actions

+along the successful path are executed (in the order they were

+encountered).

+.PP

+Note that predicates are evaluated

+.I immediately

+during the search for a successful match, since they contribute to the

+success or failure of the search.  Actions, however, are evaluated

+only after a successful match has been found.

+.SH PEG GRAMMAR FOR PEG GRAMMARS

+The grammar for

+.I peg

+grammars is shown below.  This will both illustrate and formalise

+the above description.

+.nf

+    Grammar         <\- Spacing Definition+ EndOfFile

+    Definition      <\- Identifier LEFTARROW Expression

+    Expression      <\- Sequence ( SLASH Sequence )*

+    Sequence        <\- Prefix*

+    Prefix          <\- AND Action

+                     / ( AND | NOT )? Suffix

+    Suffix          <\- Primary ( QUERY / STAR / PLUS )?

+    Primary         <\- Identifier !LEFTARROW

+                     / OPEN Expression CLOSE

+                     / Literal

+                     / Class

+                     / DOT

+                     / Action

+                     / BEGIN

+                     / END

+    Identifier      <\- < IdentStart IdentCont* > Spacing

+    IdentStart      <\- [a\-zA\-Z_]

+    IdentCont       <\- IdentStart / [0\-9]

+    Literal         <\- ['] < ( !['] Char  )* > ['] Spacing

+                     / ["] < ( !["] Char  )* > ["] Spacing

+    Class           <\- '[' < ( !']' Range )* > ']' Spacing

+    Range           <\- Char '\-' Char / Char

+    Char            <\- '\\\\' [abefnrtv'"\\[\\]\\\\]

+                     / '\\\\' [0\-3][0\-7][0\-7]

+                     / '\\\\' [0\-7][0\-7]?

+                     / '\\\\' '\-'

+                     / !'\\\\' .

+    LEFTARROW       <\- '<\-' Spacing

+    SLASH           <\- '/' Spacing

+    AND             <\- '&' Spacing

+    NOT             <\- '!' Spacing

+    QUERY           <\- '?' Spacing

+    STAR            <\- '*' Spacing

+    PLUS            <\- '+' Spacing

+    OPEN            <\- '(' Spacing

+    CLOSE           <\- ')' Spacing

+    DOT             <\- '.' Spacing

+    Spacing         <\- ( Space / Comment )*

+    Comment         <\- '#' ( !EndOfLine . )* EndOfLine

+    Space           <\- ' ' / '\\t' / EndOfLine

+    EndOfLine       <\- '\\r\\n' / '\\n' / '\\r'

+    EndOfFile       <\- !.

+    Action          <\- '{' < [^}]* > '}' Spacing

+    BEGIN           <\- '<' Spacing

+    END             <\- '>' Spacing

+.fi

+.SH LEG GRAMMARS

+.I leg

+is a variant of

+.I peg

+that adds some features of

+.IR lex (1)

+and

+.IR yacc (1).

+It differs from

+.I peg

+in the following ways.

+.TP

+.BI %{\  text... \ %}

+A declaration section can appear anywhere that a rule definition is

+expected.  The

+.I text

+between the delimiters '%{' and '%}' is copied verbatim to the

+generated C parser code

+.I before

+the code that implements the parser itself.

+.TP

+.IB name\  = \ pattern

+The 'assignment' operator replaces the left arrow operator '<\-'.

+.TP

+.B rule\-name

+Hyphens can appear as letters in the names of rules.  Each hyphen is

+converted into an underscore in the generated C source code.  A

+single hyphen '\-' is a legal rule name.

+.nf

+    \-       = [ \\t\\n\\r]*

+    number  = [0\-9]+                 \-

+    name    = [a\-zA\-Z_][a\-zA_Z_0\-9]* \-

+    l\-paren = '('                    \-

+    r\-paren = ')'                    \-

+.fi

+This example shows how ignored whitespace can be obvious when reading

+the grammar and yet unobtrusive when placed liberally at the end of

+every rule associated with a lexical element.

+.TP

+.IB seq\-1\  | \ seq\-2

+The alternation operator is vertical bar '|' rather than forward

+slash '/'.  The

+.I peg

+rule

+.nf

+    name <\- sequence\-1

+          / sequence\-2

+          / sequence\-3

+.fi

+is therefore written

+.nf

+    name = sequence\-1

+         | sequence\-2

+         | sequence\-3

+         ;

+.fi

+in

+.I leg

+(with the final semicolon being optional, as described next).

+.TP

+.IB @{\ action\ }

+Actions prefixed with an 'at' symbol will be performed during parsing,

+at the time they are encountered while matching the input text with a

+rule.

+Because of back-tracking in the PEG parsing algorithm, actions

+prefixed with '@' might be performed multiple times for the same input

+text.

+(The usual behviour of actions is that they are saved up until

+matching is complete, and then those that are part of the

+final derivation are performed in left-to-right order.)

+The variable

+.I yytext

+is available within these actions.

+.TP

+.IB exp \ ~ \ {\ action\ }

+A postfix operator

+.BI ~ {\ action\ }

+can be placed after any expression and will behave like a normal

+action (arbitrary C code) except that it is invoked only when

+.I exp

+fails.  It binds less tightly than any other operator except alternation and sequencing, and

+is intended to make error handling and recovery code easier to write.

+Note that

+.I yytext

+and

+.I yyleng

+are not available inside these actions, but the pointer variable

+.I yy

+is available to give the code access to any user\-defined members

+of the parser state (see "CUSTOMISING THE PARSER" below).

+Note also that

+.I exp

+is always a single expression; to invoke an error action for any

+failure within a sequence, parentheses must be used to group the

+sequence into a single expression.

+.nf

+    rule = e1 e2 e3 ~{ error("e[12] ok; e3 has failed"); }

+         | ...

+    rule = (e1 e2 e3) ~{ error("one of e[123] has failed"); }

+         | ...

+.fi

+.TP

+.IB pattern\  ;

+A semicolon punctuator can optionally terminate a

+.IR pattern .

+.TP

+.BI %% \ text...

+A double percent '%%' terminates the rules (and declarations) section of

+the grammar.  All

+.I text

+following '%%' is copied verbatim to the generated C parser code

+.I after

+the parser implementation code.

+.TP

+.BI $$\ = \ value

+A sub\-rule can return a semantic

+.I value

+from an action by assigning it to the pseudo\-variable '$$'.  All

+semantic values must have the same type (which defaults to 'int').

+This type can be changed by defining YYSTYPE in a declaration section.

+.TP

+.IB identifier : name

+The semantic value returned (by assigning to '$$') from the sub\-rule

+.I name

+is associated with the

+.I identifier

+and can be referred to in subsequent actions.

+.PP

+The desk calculator example below illustrates the use of '$$' and ':'.

+.SH LEG EXAMPLE: A DESK CALCULATOR

+The extensions in

+.I leg

+described above allow useful parsers and evaluators (including

+declarations, grammar rules, and supporting C functions such

+as 'main') to be kept within a single source file.  To illustrate this

+we show a simple desk calculator supporting the four common arithmetic

+operators and named variables.  The intermediate results of arithmetic

+evaluation will be accumulated on an implicit stack by returning them

+as semantic values from sub\-rules.

+.nf

+    %{

+    #include <stdio.h>     /* printf() */

+    #include <stdlib.h>    /* atoi() */

+    int vars[26];

+    %}

+    Stmt    = \- e:Expr EOL                  { printf("%d\\n", e); }

+            | ( !EOL . )* EOL               { printf("error\\n"); }

+    Expr    = i:ID ASSIGN s:Sum             { $$ = vars[i] = s; }

+            | s:Sum                         { $$ = s; }

+    Sum     = l:Product

+                    ( PLUS  r:Product       { l += r; }

+                    | MINUS r:Product       { l \-= r; }

+                    )*                      { $$ = l; }

+    Product = l:Value

+                    ( TIMES  r:Value        { l *= r; }

+                    | DIVIDE r:Value        { l /= r; }

+                    )*                      { $$ = l; }

+    Value   = i:NUMBER                      { $$ = atoi(yytext); }

+            | i:ID !ASSIGN                  { $$ = vars[i]; }

+            | OPEN i:Expr CLOSE             { $$ = i; }

+    NUMBER  = < [0\-9]+ >    \-               { $$ = atoi(yytext); }

+    ID      = < [a\-z]  >    \-               { $$ = yytext[0] \- 'a'; }

+    ASSIGN  = '='           \-

+    PLUS    = '+'           \-

+    MINUS   = '\-'           \-

+    TIMES   = '*'           \-

+    DIVIDE  = '/'           \-

+    OPEN    = '('           \-

+    CLOSE   = ')'           \-

+    \-       = [ \\t]*

+    EOL     = '\\n' | '\\r\\n' | '\\r' | ';'

+    %%

+    int main()

+    {

+      while (yyparse())

+        ;

+      return 0;

+    }

+.fi

+.SH LEG GRAMMAR FOR LEG GRAMMARS

+The grammar for

+.I leg

+grammars is shown below.  This will both illustrate and formalise the

+above description.

+.nf

+    grammar =       \-

+                    ( declaration | definition )+

+                    trailer? end\-of\-file

+    declaration =   '%{' < ( !'%}' . )* > RPERCENT

+    trailer =       '%%' < .* >

+    definition =    identifier EQUAL expression SEMICOLON?

+    expression =    sequence ( BAR sequence )*

+    sequence =      error+

+    error =         prefix ( TILDE action )?

+    prefix =        AND action

+    |               ( AND | NOT )? suffix

+    suffix =        primary ( QUERY | STAR | PLUS )?

+    primary =       identifier COLON identifier !EQUAL

+    |               identifier !EQUAL

+    |               OPEN expression CLOSE

+    |               literal

+    |               class

+    |               DOT

+    |               action

+    |               BEGIN

+    |               END

+    identifier =    < [\-a\-zA\-Z_][\-a\-zA\-Z_0\-9]* > \-

+    literal =       ['] < ( !['] char )* > ['] \-

+    |               ["] < ( !["] char )* > ["] \-

+    class =         '[' < ( !']' range )* > ']' \-

+    range =         char '\-' char | char

+    char =          '\\\\' [abefnrtv'"\\[\\]\\\\]

+    |               '\\\\' [0\-3][0\-7][0\-7]

+    |               '\\\\' [0\-7][0\-7]?

+    |               !'\\\\' .

+    action =        '{' < braces* > '}' \-

+    braces =        '{' braces* '}'

+    |               !'}' .

+    EQUAL =         '=' \-

+    COLON =         ':' \-

+    SEMICOLON =     ';' \-

+    BAR =           '|' \-

+    AND =           '&' \-

+    NOT =           '!' \-

+    QUERY =         '?' \-

+    STAR =          '*' \-

+    PLUS =          '+' \-

+    OPEN =          '(' \-

+    CLOSE =         ')' \-

+    DOT =           '.' \-

+    BEGIN =         '<' \-

+    END =           '>' \-

+    TILDE =         '~' \-

+    RPERCENT =      '%}' \-

+    \- =             ( space | comment )*

+    space =         ' ' | '\\t' | end\-of\-line

+    comment =       '#' ( !end\-of\-line . )* end\-of\-line

+    end\-of\-line =   '\\r\\n' | '\\n' | '\\r'

+    end\-of\-file =   !.

+.fi

+.SH CUSTOMISING THE PARSER

+The following symbols can be redefined in declaration sections to

+modify the generated parser code.

+.TP

+.B YYSTYPE

+The semantic value type.  The pseudo\-variable '$$' and the

+identifiers 'bound' to rule results with the colon operator ':' should

+all be considered as being declared to have this type.  The default

+value is 'int'.

+.TP

+.B YYPARSE

+The name of the main entry point to the parser.  The default value

+is 'yyparse'.

+.TP

+.B YYPARSEFROM

+The name of an alternative entry point to the parser.  This function

+expects one argument: the function corresponding to the rule from

+which the search for a match should begin.  The default

+is 'yyparsefrom'.  Note that yyparse() is defined as

+.nf

+    int yyparse() { return yyparsefrom(yy_foo); }

+.fi

+where 'foo' is the name of the first rule in the grammar.

+.TP

+.BI YY_INPUT( buf , \ result , \ max_size )

+This macro is invoked by the parser to obtain more input text.

+.I buf

+points to an area of memory that can hold at most

+.I max_size

+characters.  The macro should copy input text to

+.I buf

+and then assign the integer variable

+.I result

+to indicate the number of characters copied.  If no more input is available,

+the macro should assign 0 to

+.IR result .

+By default, the YY_INPUT macro is defined as follows.

+.nf

+    #define YY_INPUT(buf, result, max_size)        \\

+    {                                              \\

+      int yyc= getchar();                          \\

+      result= (EOF == yyc) ? 0 : (*(buf)= yyc, 1); \\

+    }

+.fi

+Note that if YY_CTX_LOCAL is defined (see below) then an additional

+first argument, containing the parser context, is passed to YY_INPUT.

+.TP

+.B YY_DEBUG

+If this symbols is defined then additional code will be included in

+the parser that prints vast quantities of arcane information to the

+standard error while the parser is running.

+.TP

+.B YY_BEGIN

+This macro is invoked to mark the start of input text that will be

+made available in actions as 'yytext'.  This corresponds to

+occurrences of '<' in the grammar.  These are converted into

+predicates that are expected to succeed.  The default definition

+.nf

+    #define YY_BEGIN (yybegin= yypos, 1)

+.fi

+therefore saves the current input position and returns 1 ('true') as

+the result of the predicate.

+.TP

+.B YY_END

+This macros corresponds to '>' in the grammar.  Again, it is a

+predicate so the default definition saves the input position

+before 'succeeding'.

+.nf

+    #define YY_END (yyend= yypos, 1)

+.fi

+.TP

+.BI YY_PARSE( T )

+This macro declares the parser entry points (yyparse and yyparsefrom)

+to be of type

+.IR T .

+The default definition

+.nf

+    #define YY_PARSE(T) T

+.fi

+leaves yyparse() and yyparsefrom() with global visibility.  If they

+should not be externally visible in other source files, this macro can

+be redefined to declare them 'static'.

+.nf

+    #define YY_PARSE(T) static T

+.fi

+.TP

+.B YY_CTX_LOCAL

+If this symbol is defined during compilation of a generated parser

+then global parser state will be kept in a structure of

+type 'yycontext' which can be declared as a local variable.  This

+allows multiple instances of parsers to coexist and to be thread\-safe.

+The parsing function

+.IR yyparse ()

+will be declared to expect a first argument of type 'yycontext *', an

+instance of the structure holding the global state for the parser.

+This instance must be allocated and initialised to zero by the client.

+A trivial but complete example is as follows.

+.nf

+    #include <stdio.h>

+    #define YY_CTX_LOCAL

+    #include "the\-generated\-parser.peg.c"

+    int main()

+    {

+      yycontext ctx;

+      memset(&ctx, 0, sizeof(yycontext));

+      while (yyparse(&ctx));

+      return 0;

+    }

+.fi

+Note that if this symbol is undefined then the compiled parser will

+statically allocate its global state and will be neither reentrant nor

+thread\-safe.

+Note also that the parser yycontext structure is initialised automatically

+the first time

+.IR yyparse ()

+is called; this structure

+.B must

+therefore be properly initialised to zero before the first call to

+.IR yyparse ().

+.TP

+.B YY_CTX_MEMBERS

+If YY_CTX_LOCAL is defined (see above) then the macro YY_CTX_MEMBERS

+can be defined to expand to any additional member field declarations

+that the client would like included in the declaration of

+the 'yycontext' structure type.  These additional members are

+otherwise ignored by the generated parser.  The instance

+of 'yycontext' associated with the currently\-active parser is

+available within actions as the pointer variable

+.IR yy .

+.TP

+.B YY_BUFFER_SIZE

+The initial size of the text buffer, in bytes.  The default is 1024

+and the buffer size is doubled whenever required to meet demand during

+parsing.  An application that typically parses much longer strings

+could increase this to avoid unnecessary buffer reallocation.

+.TP

+.B YY_STACK_SIZE

+The initial size of the variable and action stacks.  The default is

+128, which is doubled whenever required to meet demand during parsing.

+Applications that have deep call stacks with many local variables, or

+that perform many actions after a single successful match, could increase

+this to avoid unnecessary buffer reallocation.

+.TP

+.BI YY_MALLOC( YY , \ SIZE )

+The memory allocator for all parser\-related storage.  The parameters

+are the current yycontext structure and the number of bytes to

+allocate.  The default definition is:

+.RI malloc( SIZE )

+.TP

+.BI YY_REALLOC( YY , \ PTR , \ SIZE )

+The memory reallocator for dynamically\-grown storage (such as text

+buffers and variable stacks).  The parameters are the current

+yycontext structure, the previously\-allocated storage, and the number

+of bytes to which that storage should be grown.  The default definition is:

+.RI realloc( PTR , \ SIZE )

+.TP

+.BI YY_FREE( YY , \ PTR )

+The memory deallocator.  The parameters are the current yycontext

+structure and the storage to deallocate.  The default definition is:

+.RI free( PTR )

+.TP

+.B YYRELEASE

+The name of the function that releases all resources held by a

+yycontext structure.  The default value is 'yyrelease'.

+.PP

+The following variables can be referred to within actions.

+.TP

+.B char *yybuf

+This variable points to the parser's input buffer used to store input

+text that has not yet been matched.

+.TP

+.B int yypos

+This is the offset (in yybuf) of the next character to be matched and

+consumed.

+.TP

+.B char *yytext

+The most recent matched text delimited by '<' and '>' is stored in this variable.

+.TP

+.B int yyleng

+This variable indicates the number of characters in 'yytext'.

+.TP

+.B yycontext *yy

+This variable points to the instance of 'yycontext' associated with

+the currently\-active parser.

+.PP

+Programs that wish to release all the resources associated with a

+parser can use the following function.

+.TP

+.BI yyrelease(yycontext * yy )

+Returns all parser\-allocated storage associated with

+.I yy

+to the system.  The storage will be reallocated on the next call to

+.IR yyparse ().

+.PP

+Note that the storage for the yycontext structure itself is never

+allocated or reclaimed implicitly.  The application must allocate

+these structures in automatic storage, or use

+.IR calloc ()

+and

+.IR free ()

+to manage them explicitly.  The example in the following section

+demonstrates one approach to resource management.

+.SH LEG EXAMPLE: EXTENDING THE PARSER'S CONTEXT

+The

+.I yy

+variable passed to actions contains the state of the parser plus any

+additional fields defined by YY_CTX_MEMBERS.  Theses fields can be

+used to store application\-specific information that is global to a

+particular call of

+.IR yyparse ().

+A trivial but complete

+.I leg

+example follows in which the yycontext

+structure is extended with a

+.I count

+of the number of newline characters

+seen in the input so far (the grammar otherwise consumes and ignores

+the entire input).  The caller of

+.IR yyparse ()

+uses

+.I count

+to print the number of lines of input that were read.

+.nf

+    %{

+    #define YY_CTX_LOCAL 1

+    #define YY_CTX_MEMBERS \\

+      int count;

+    %}

+    Char    = ('\\n' | '\\r\\n' | '\\r')        { yy\->count++ }

+            | .

+    %%

+    #include <stdio.h>

+    #include <string.h>

+    int main()

+    {

+        /* create a local parser context in automatic storage */

+        yycontext yy;

+        /* the context *must* be initialised to zero before first use*/

+        memset(&yy, 0, sizeof(yy));

+        while (yyparse(&yy))

+            ;

+        printf("%d newlines\\n", yy.count);

+        /* release all resources associated with the context */

+        yyrelease(&yy);

+        return 0;

+    }

+.fi

+.SH DIAGNOSTICS

+.I peg

+and

+.I leg

+warn about the following conditions while converting a grammar into a parser.

+.TP

+.B syntax error

+The input grammar was malformed in some way.  The error message will

+include the text about to be matched (often backed up a huge amount

+from the actual location of the error) and the line number of the most

+recently considered character (which is often the real location of the

+problem).

+.TP

+.B rule 'foo' used but not defined

+The grammar referred to a rule named 'foo' but no definition for it

+was given.  Attempting to use the generated parser will likely result

+in errors from the linker due to undefined symbols associated with the

+missing rule.

+.TP

+.B rule 'foo' defined but not used

+The grammar defined a rule named 'foo' and then ignored it.  The code

+associated with the rule is included in the generated parser which

+will in all other respects be healthy.

+.TP

+.B possible infinite left recursion in rule 'foo'

+There exists at least one path through the grammar that leads from the

+rule 'foo' back to (a recursive invocation of) the same rule without

+consuming any input.

+.PP

+Left recursion, especially that found in standards documents, is

+often 'direct' and implies trivial repetition.

+.nf

+    # (6.7.6)

+    direct\-abstract\-declarator =

+        LPAREN abstract\-declarator RPAREN

+    |   direct\-abstract\-declarator? LBRACKET assign\-expr? RBRACKET

+    |   direct\-abstract\-declarator? LBRACKET STAR RBRACKET

+    |   direct\-abstract\-declarator? LPAREN param\-type\-list? RPAREN

+.fi

+The recursion can easily be eliminated by converting the parts of the

+pattern following the recursion into a repeatable suffix.

+.nf

+    # (6.7.6)

+    direct\-abstract\-declarator =

+        direct\-abstract\-declarator\-head?

+        direct\-abstract\-declarator\-tail*

+    direct\-abstract\-declarator\-head =

+        LPAREN abstract\-declarator RPAREN

+    direct\-abstract\-declarator\-tail =

+        LBRACKET assign\-expr? RBRACKET

+    |   LBRACKET STAR RBRACKET

+    |   LPAREN param\-type\-list? RPAREN

+.fi

+.SH CAVEATS

+A parser that accepts empty input will

+.I always

+succeed.  Consider the following example, not atypical of a first

+attempt to write a PEG\-based parser:

+.nf

+    Program = Expression*

+    Expression = "whatever"

+    %%

+    int main() {

+      while (yyparse())

+        puts("success!");

+      return 0;

+    }

+.fi

+This program loops forever, no matter what (if any) input is provided

+on stdin.  Many fixes are possible, the easiest being to insist that

+the parser always consumes some non\-empty input.  Changing the first

+line to

+.nf

+    Program = Expression+

+.fi

+accomplishes this.  If the parser is expected to consume the entire

+input, then explicitly requiring the end\-of\-file is also highly

+recommended:

+.nf

+    Program = Expression+ !.

+.fi

+This works because the parser will only fail to match ("!" predicate)

+any character at all ("." expression) when it attempts to read beyond

+the end of the input.

+.SH BUGS

+You have to type 'man peg' to read the manual page for

+.IR leg (1).

+.PP

+The 'yy' and 'YY' prefixes cannot be changed.

+.PP

+Left recursion is detected in the input grammar but is not handled

+correctly in the generated parser.

+.PP

+Diagnostics for errors in the input grammar are obscure and not

+particularly helpful.

+.PP

+The operators

+.BR ! \ \c

+and

+.B ~

+should really be named the other way around.

+.PP

+Several commonly\-used

+.IR lex (1)

+features (yywrap(), yyin, etc.) are completely absent.

+.PP

+The generated parser does not contain '#line' directives to direct C

+compiler errors back to the grammar description when appropriate.

+.SH SEE ALSO

+D. Val Schorre,

+.I META II, a syntax\-oriented compiler writing language,

+19th ACM National Conference, 1964, pp.\ 41.301\-\-41.311.  Describes a

+self\-implementing parser generator for analytic grammars with no

+backtracking.

+.PP

+Alexander Birman,

+.I The TMG Recognition Schema,

+Ph.D. dissertation, Princeton, 1970.  A mathematical treatment of the

+power and complexity of recursive\-descent parsing with backtracking.

+.PP

+Bryan Ford,

+.I Parsing Expression Grammars: A Recognition\-Based Syntactic Foundation,

+ACM SIGPLAN Symposium on Principles of Programming Languages, 2004.

+Defines PEGs and analyses them in relation to context\-free and regular

+grammars.  Introduces the syntax adopted in

+.IR peg .

+.PP

+The standard Unix utilities

+.IR lex (1)

+and

+.IR yacc (1)

+which influenced the syntax and features of

+.IR leg .

+.PP

+The source code for

+.I peg

+and

+.I leg

+whose grammar parsers are written using themselves.

+.PP

+The latest version of this software and documentation:

+.nf

+    http://piumarta.com/software/peg

+.fi

+.SH AUTHOR

+.IR peg ,

+.I leg

+and this manual page were written by Ian Piumarta (first\-name at

+last\-name dot com) while investigating the viability of regular and

+parsing\-expression grammars for efficiently extracting type and

+signature information from C header files.

+.PP

+Please send bug reports and suggestions for improvements to the author

+at the above address.

--- a/tinypeg.1

+++ /dev/null

@@ -1,1147 +1,0 @@

-.\" Copyright (c) 2007,2016 by Ian Piumarta

-.\" All rights reserved.

-.\"

-.\" Permission is hereby granted, free of charge, to any person obtaining a

-.\" copy of this software and associated documentation files (the 'Software'),

-.\" to deal in the Software without restriction, including without limitation

-.\" the rights to use, copy, modify, merge, publish, distribute, and/or sell

-.\" copies of the Software, and to permit persons to whom the Software is

-.\" furnished to do so, provided that the above copyright notice(s) and this

-.\" permission notice appear in all copies of the Software.  Acknowledgement

-.\" of the use of this Software in supporting documentation would be

-.\" appreciated but is not required.

-.\"

-.\" THE SOFTWARE IS PROVIDED 'AS IS'.  USE ENTIRELY AT YOUR OWN RISK.

-.\"

-.\" Last edited: 2016-07-22 09:47:29 by piumarta on zora.local

-.\"

-.TH PEG 1 "September 2013" "Version 0.1"

-.SH NAME

-peg, leg \- parser generators

-.SH SYNOPSIS

-.B peg

-.B [\-hvV \-ooutput]

-.I [filename ...]

-.sp 0

-.B leg

-.B [\-hvV \-ooutput]

-.I [filename ...]

-.SH DESCRIPTION

-.I peg

-and

-.I leg

-are tools for generating recursive\-descent parsers: programs that

-perform pattern matching on text.  They process a Parsing Expression

-Grammar (PEG) [Ford 2004] to produce a program that recognises legal

-sentences of that grammar.

-.I peg

-processes PEGs written using the original syntax described by Ford;

-.I leg

-processes PEGs written using slightly different syntax and conventions

-that are intended to make it an attractive replacement for parsers

-built with

-.IR lex (1)

-and

-.IR yacc (1).

-Unlike

-.I lex

-and

-.IR yacc ,

-.I peg

-and

-.I leg

-support unlimited backtracking, provide ordered choice as a means for

-disambiguation, and can combine scanning (lexical analysis) and

-parsing (syntactic analysis) into a single activity.

-.PP

-.I peg

-reads the specified

-.IR filename s,

-or standard input if no

-.IR filename s

-are given, for a grammar describing the parser to generate.

-.I peg

-then generates a C source file that defines a function

-.IR yyparse().

-This C source file can be included in, or compiled and then linked

-with, a client program.  Each time the client program calls

-.IR yyparse ()

-the parser consumes input text according to the parsing rules,

-starting from the first rule in the grammar.

-.IR yyparse ()

-returns non\-zero if the input could be parsed according to the

-grammar; it returns zero if the input could not be parsed.

-.PP

-The prefix 'yy' or 'YY' is prepended to all externally\-visible symbols

-in the generated parser.  This is intended to reduce the risk of

-namespace pollution in client programs.  (The choice of 'yy' is

-historical; see

-.IR lex (1)

-and

-.IR yacc (1),

-for example.)

-.SH OPTIONS

-.I peg

-and

-.I leg

-provide the following options:

-.TP

-.B \-h

-prints a summary of available options and then exits.

-.TP

-.B \-ooutput

-writes the generated parser to the file

-.B output

-instead of the standard output.

-.TP

-.B \-P

-suppresses #line directives in the output.

-.TP

-.B \-v

-writes verbose information to standard error while working.

-.TP

-.B \-V

-writes version information to standard error then exits.

-.SH A SIMPLE EXAMPLE

-The following

-.I peg

-input specifies a grammar with a single rule (called 'start') that is

-satisfied when the input contains the string "username".

-.nf

-    start <\- "username"

-.fi

-(The quotation marks are

-.I not

-part of the matched text; they serve to indicate a literal string to

-be matched.)  In other words,

-.IR  yyparse ()

-in the generated C source will return non\-zero only if the next eight

-characters read from the input spell the word "username".  If the

-input contains anything else,

-.IR yyparse ()

-returns zero and no input will have been consumed.  (Subsequent calls

-to

-.IR yyparse ()

-will also return zero, since the parser is effectively blocked looking

-for the string "username".)  To ensure progress we can add an

-alternative clause to the 'start' rule that will match any single

-character if "username" is not found.

-.nf

-    start <\- "username"

-           / .

-.fi

-.IR yyparse ()

-now always returns non\-zero (except at the very end of the input).  To

-do something useful we can add actions to the rules.  These actions

-are performed after a complete match is found (starting from the first

-rule) and are chosen according to the 'path' taken through the grammar

-to match the input.  (Linguists would call this path a 'phrase

-marker'.)

-.nf

-    start <\- "username"    { printf("%s\\n", getlogin()); }

-           / < . >         { putchar(yytext[0]); }

-.fi

-The first line instructs the parser to print the user's login name

-whenever it sees "username" in the input.  If that match fails, the

-second line tells the parser to echo the next character on the input

-the standard output.  Our parser is now performing useful work: it

-will copy the input to the output, replacing all occurrences of

-"username" with the user's account name.

-.PP

-Note the angle brackets ('<' and '>') that were added to the second

-alternative.  These have no effect on the meaning of the rule, but

-serve to delimit the text made available to the following action in

-the variable

-.IR yytext .

-.PP

-If the above grammar is placed in the file

-.BR username.peg ,

-running the command

-.nf

-    peg \-o username.c username.peg

-.fi

-will save the corresponding parser in the file

-.BR username.c .

-To create a complete program this parser could be included by a C

-program as follows.

-.nf

-    #include <stdio.h>      /* printf(), putchar() */

-    #include <unistd.h>     /* getlogin() */

-    #include "username.c"   /* yyparse() */

-    int main()

-    {

-      while (yyparse())     /* repeat until EOF */

-        ;

-      return 0;

-    }

-.fi

-.SH PEG GRAMMARS

-A grammar consists of a set of named rules.

-.nf

-    name <\- pattern

-.fi

-The

-.B pattern

-contains one or more of the following elements.

-.TP

-.B name

-The element stands for the entire pattern in the rule with the given

-.BR name .

-.TP

-.BR \(dq characters \(dq

-A character or string enclosed in double quotes is matched literally.

-The ANSI C escape sequences are recognised within the

-.IR characters .

-.TP

-.BR ' characters '

-A character or string enclosed in single quotes is matched literally, as above.

-.TP

-.BR [ characters ]

-A set of characters enclosed in square brackets matches any single

-character from the set, with escape characters recognised as above.

-If the set begins with an uparrow (^) then the set is negated (the

-element matches any character

-.I not

-in the set).  Any pair of characters separated with a dash (\-)

-represents the range of characters from the first to the second,

-inclusive.  A single alphabetic character or underscore is matched by

-the following set.

-.nf

-    [a\-zA\-Z_]

-.fi

-Similarly, the following matches  any single non\-digit character.

-.nf

-    [^0\-9]

-.fi

-.TP

-.B .

-A dot matches any character.  Note that the only time this fails is at

-the end of file, where there is no character to match.

-.TP

-.BR ( \ pattern\  )

-Parentheses are used for grouping (modifying the precedence of the

-operators described below).

-.TP

-.BR { \ action\  }

-Curly braces surround actions.  The action is arbitrary C source code

-to be executed at the end of matching.  Any braces within the action

-must be properly nested.  Any input text that was matched before the

-action and delimited by angle brackets (see below) is made available

-within the action as the contents of the character array

-.IR yytext .

-The length of (number of characters in)

-.I yytext

-is available in the variable

-.IR yyleng .

-(These variable names are historical; see

-.IR lex (1).)

-.TP

-.B <

-An opening angle bracket always matches (consuming no input) and

-causes the parser to begin accumulating matched text.  This text will

-be made available to actions in the variable

-.IR yytext .

-.TP

-.B >

-A closing angle bracket always matches (consuming no input) and causes

-the parser to stop accumulating text for

-.IR yytext .

-.PP

-The above

-.IR element s

-can be made optional and/or repeatable with the following suffixes:

-.TP

-.RB element\  ?

-The element is optional.  If present on the input, it is consumed and

-the match succeeds.  If not present on the input, no text is consumed

-and the match succeeds anyway.

-.TP

-.RB element\  +

-The element is repeatable.  If present on the input, one or more

-occurrences of

-.I element

-are consumed and the match succeeds.  If no occurrences of

-.I element

-are present on the input, the match fails.

-.TP

-.RB element\  *

-The element is optional and repeatable.  If present on the input, one or more

-occurrences of

-.I element

-are consumed and the match succeeds.  If no occurrences of

-.I element

-are present on the input, the match succeeds anyway.

-.PP

-The above elements and suffixes can be converted into predicates (that

-match arbitrary input text and subsequently succeed or fail

-.I without

-consuming that input) with the following prefixes:

-.TP

-.BR & \ element

-The predicate succeeds only if

-.I element

-can be matched.  Input text scanned while matching

-.I element

-is not consumed from the input and remains available for subsequent

-matching.

-.TP

-.BR ! \ element

-The predicate succeeds only if

-.I element

-cannot be matched.  Input text scanned while matching

-.I element

-is not consumed from the input and remains available for subsequent

-matching.  A popular idiom is

-.nf

-    !.

-.fi

-which matches the end of file, after the last character of the input

-has already been consumed.

-.PP

-A special form of the '&' predicate is provided:

-.TP

-.BR & {\ expression\ }

-In this predicate the simple C

-.I expression

-.RB ( not

-statement) is evaluated immediately when the parser reaches the

-predicate.  If the

-.I expression

-yields non\-zero (true) the 'match' succeeds and the parser continues

-with the next element in the pattern.  If the

-.I expression

-yields zero (false) the 'match' fails and the parser backs up to look

-for an alternative parse of the input.

-.PP

-Several elements (with or without prefixes and suffixes) can be

-combined into a

-.I sequence

-by writing them one after the other.  The entire sequence matches only

-if each individual element within it matches, from left to right.

-.PP

-Sequences can be separated into disjoint alternatives by the

-alternation operator '/'.

-.TP

-.RB sequence\-1\  / \ sequence\-2\  / \ ...\  / \ sequence\-N

-Each sequence is tried in turn until one of them matches, at which

-time matching for the overall pattern succeeds.  If none of the

-sequences matches then the match of the overall pattern fails.

-.PP

-Finally, the pound sign (#) introduces a comment (discarded) that

-continues until the end of the line.

-.PP

-To summarise the above, the parser tries to match the input text

-against a pattern containing literals, names (representing other

-rules), and various operators (written as prefixes, suffixes,

-juxtaposition for sequencing and and infix alternation operator) that

-modify how the elements within the pattern are matched.  Matches are

-made from left to right, 'descending' into named sub\-rules as they are

-encountered.  If the matching process fails, the parser 'back tracks'

-('rewinding' the input appropriately in the process) to find the

-nearest alternative 'path' through the grammar.  In other words the

-parser performs a depth\-first, left\-to\-right search for the first

-successfully\-matching path through the rules.  If found, the actions

-along the successful path are executed (in the order they were

-encountered).

-.PP

-Note that predicates are evaluated

-.I immediately

-during the search for a successful match, since they contribute to the

-success or failure of the search.  Actions, however, are evaluated

-only after a successful match has been found.

-.SH PEG GRAMMAR FOR PEG GRAMMARS

-The grammar for

-.I peg

-grammars is shown below.  This will both illustrate and formalise

-the above description.

-.nf

-    Grammar         <\- Spacing Definition+ EndOfFile

-    Definition      <\- Identifier LEFTARROW Expression

-    Expression      <\- Sequence ( SLASH Sequence )*

-    Sequence        <\- Prefix*

-    Prefix          <\- AND Action

-                     / ( AND | NOT )? Suffix

-    Suffix          <\- Primary ( QUERY / STAR / PLUS )?

-    Primary         <\- Identifier !LEFTARROW

-                     / OPEN Expression CLOSE

-                     / Literal

-                     / Class

-                     / DOT

-                     / Action

-                     / BEGIN

-                     / END

-    Identifier      <\- < IdentStart IdentCont* > Spacing

-    IdentStart      <\- [a\-zA\-Z_]

-    IdentCont       <\- IdentStart / [0\-9]

-    Literal         <\- ['] < ( !['] Char  )* > ['] Spacing

-                     / ["] < ( !["] Char  )* > ["] Spacing

-    Class           <\- '[' < ( !']' Range )* > ']' Spacing

-    Range           <\- Char '\-' Char / Char

-    Char            <\- '\\\\' [abefnrtv'"\\[\\]\\\\]

-                     / '\\\\' [0\-3][0\-7][0\-7]

-                     / '\\\\' [0\-7][0\-7]?

-                     / '\\\\' '\-'

-                     / !'\\\\' .

-    LEFTARROW       <\- '<\-' Spacing

-    SLASH           <\- '/' Spacing

-    AND             <\- '&' Spacing

-    NOT             <\- '!' Spacing

-    QUERY           <\- '?' Spacing

-    STAR            <\- '*' Spacing

-    PLUS            <\- '+' Spacing

-    OPEN            <\- '(' Spacing

-    CLOSE           <\- ')' Spacing

-    DOT             <\- '.' Spacing

-    Spacing         <\- ( Space / Comment )*

-    Comment         <\- '#' ( !EndOfLine . )* EndOfLine

-    Space           <\- ' ' / '\\t' / EndOfLine

-    EndOfLine       <\- '\\r\\n' / '\\n' / '\\r'

-    EndOfFile       <\- !.

-    Action          <\- '{' < [^}]* > '}' Spacing

-    BEGIN           <\- '<' Spacing

-    END             <\- '>' Spacing

-.fi

-.SH LEG GRAMMARS

-.I leg

-is a variant of

-.I peg

-that adds some features of

-.IR lex (1)

-and

-.IR yacc (1).

-It differs from

-.I peg

-in the following ways.

-.TP

-.BI %{\  text... \ %}

-A declaration section can appear anywhere that a rule definition is

-expected.  The

-.I text

-between the delimiters '%{' and '%}' is copied verbatim to the

-generated C parser code

-.I before

-the code that implements the parser itself.

-.TP

-.IB name\  = \ pattern

-The 'assignment' operator replaces the left arrow operator '<\-'.

-.TP

-.B rule\-name

-Hyphens can appear as letters in the names of rules.  Each hyphen is

-converted into an underscore in the generated C source code.  A

-single hyphen '\-' is a legal rule name.

-.nf

-    \-       = [ \\t\\n\\r]*

-    number  = [0\-9]+                 \-

-    name    = [a\-zA\-Z_][a\-zA_Z_0\-9]* \-

-    l\-paren = '('                    \-

-    r\-paren = ')'                    \-

-.fi

-This example shows how ignored whitespace can be obvious when reading

-the grammar and yet unobtrusive when placed liberally at the end of

-every rule associated with a lexical element.

-.TP

-.IB seq\-1\  | \ seq\-2

-The alternation operator is vertical bar '|' rather than forward

-slash '/'.  The

-.I peg

-rule

-.nf

-    name <\- sequence\-1

-          / sequence\-2

-          / sequence\-3

-.fi

-is therefore written

-.nf

-    name = sequence\-1

-         | sequence\-2

-         | sequence\-3

-         ;

-.fi

-in

-.I leg

-(with the final semicolon being optional, as described next).

-.TP

-.IB @{\ action\ }

-Actions prefixed with an 'at' symbol will be performed during parsing,

-at the time they are encountered while matching the input text with a

-rule.

-Because of back-tracking in the PEG parsing algorithm, actions

-prefixed with '@' might be performed multiple times for the same input

-text.

-(The usual behviour of actions is that they are saved up until

-matching is complete, and then those that are part of the

-final derivation are performed in left-to-right order.)

-The variable

-.I yytext

-is available within these actions.

-.TP

-.IB exp \ ~ \ {\ action\ }

-A postfix operator

-.BI ~ {\ action\ }

-can be placed after any expression and will behave like a normal

-action (arbitrary C code) except that it is invoked only when

-.I exp

-fails.  It binds less tightly than any other operator except alternation and sequencing, and

-is intended to make error handling and recovery code easier to write.

-Note that

-.I yytext

-and

-.I yyleng

-are not available inside these actions, but the pointer variable

-.I yy

-is available to give the code access to any user\-defined members

-of the parser state (see "CUSTOMISING THE PARSER" below).

-Note also that

-.I exp

-is always a single expression; to invoke an error action for any

-failure within a sequence, parentheses must be used to group the

-sequence into a single expression.

-.nf

-    rule = e1 e2 e3 ~{ error("e[12] ok; e3 has failed"); }

-         | ...

-    rule = (e1 e2 e3) ~{ error("one of e[123] has failed"); }

-         | ...

-.fi

-.TP

-.IB pattern\  ;

-A semicolon punctuator can optionally terminate a

-.IR pattern .

-.TP

-.BI %% \ text...

-A double percent '%%' terminates the rules (and declarations) section of

-the grammar.  All

-.I text

-following '%%' is copied verbatim to the generated C parser code

-.I after

-the parser implementation code.

-.TP

-.BI $$\ = \ value

-A sub\-rule can return a semantic

-.I value

-from an action by assigning it to the pseudo\-variable '$$'.  All

-semantic values must have the same type (which defaults to 'int').

-This type can be changed by defining YYSTYPE in a declaration section.

-.TP

-.IB identifier : name

-The semantic value returned (by assigning to '$$') from the sub\-rule

-.I name

-is associated with the

-.I identifier

-and can be referred to in subsequent actions.

-.PP

-The desk calculator example below illustrates the use of '$$' and ':'.

-.SH LEG EXAMPLE: A DESK CALCULATOR

-The extensions in

-.I leg

-described above allow useful parsers and evaluators (including

-declarations, grammar rules, and supporting C functions such

-as 'main') to be kept within a single source file.  To illustrate this

-we show a simple desk calculator supporting the four common arithmetic

-operators and named variables.  The intermediate results of arithmetic

-evaluation will be accumulated on an implicit stack by returning them

-as semantic values from sub\-rules.

-.nf

-    %{

-    #include <stdio.h>     /* printf() */

-    #include <stdlib.h>    /* atoi() */

-    int vars[26];

-    %}

-    Stmt    = \- e:Expr EOL                  { printf("%d\\n", e); }

-            | ( !EOL . )* EOL               { printf("error\\n"); }

-    Expr    = i:ID ASSIGN s:Sum             { $$ = vars[i] = s; }

-            | s:Sum                         { $$ = s; }

-    Sum     = l:Product

-                    ( PLUS  r:Product       { l += r; }

-                    | MINUS r:Product       { l \-= r; }

-                    )*                      { $$ = l; }

-    Product = l:Value

-                    ( TIMES  r:Value        { l *= r; }

-                    | DIVIDE r:Value        { l /= r; }

-                    )*                      { $$ = l; }

-    Value   = i:NUMBER                      { $$ = atoi(yytext); }

-            | i:ID !ASSIGN                  { $$ = vars[i]; }

-            | OPEN i:Expr CLOSE             { $$ = i; }

-    NUMBER  = < [0\-9]+ >    \-               { $$ = atoi(yytext); }

-    ID      = < [a\-z]  >    \-               { $$ = yytext[0] \- 'a'; }

-    ASSIGN  = '='           \-

-    PLUS    = '+'           \-

-    MINUS   = '\-'           \-

-    TIMES   = '*'           \-

-    DIVIDE  = '/'           \-

-    OPEN    = '('           \-

-    CLOSE   = ')'           \-

-    \-       = [ \\t]*

-    EOL     = '\\n' | '\\r\\n' | '\\r' | ';'

-    %%

-    int main()

-    {

-      while (yyparse())

-        ;

-      return 0;

-    }

-.fi

-.SH LEG GRAMMAR FOR LEG GRAMMARS

-The grammar for

-.I leg

-grammars is shown below.  This will both illustrate and formalise the

-above description.

-.nf

-    grammar =       \-

-                    ( declaration | definition )+

-                    trailer? end\-of\-file

-    declaration =   '%{' < ( !'%}' . )* > RPERCENT

-    trailer =       '%%' < .* >

-    definition =    identifier EQUAL expression SEMICOLON?

-    expression =    sequence ( BAR sequence )*

-    sequence =      error+

-    error =         prefix ( TILDE action )?

-    prefix =        AND action

-    |               ( AND | NOT )? suffix

-    suffix =        primary ( QUERY | STAR | PLUS )?

-    primary =       identifier COLON identifier !EQUAL

-    |               identifier !EQUAL

-    |               OPEN expression CLOSE

-    |               literal

-    |               class

-    |               DOT

-    |               action

-    |               BEGIN

-    |               END

-    identifier =    < [\-a\-zA\-Z_][\-a\-zA\-Z_0\-9]* > \-

-    literal =       ['] < ( !['] char )* > ['] \-

-    |               ["] < ( !["] char )* > ["] \-

-    class =         '[' < ( !']' range )* > ']' \-

-    range =         char '\-' char | char

-    char =          '\\\\' [abefnrtv'"\\[\\]\\\\]

-    |               '\\\\' [0\-3][0\-7][0\-7]

-    |               '\\\\' [0\-7][0\-7]?

-    |               !'\\\\' .

-    action =        '{' < braces* > '}' \-

-    braces =        '{' braces* '}'

-    |               !'}' .

-    EQUAL =         '=' \-

-    COLON =         ':' \-

-    SEMICOLON =     ';' \-

-    BAR =           '|' \-

-    AND =           '&' \-

-    NOT =           '!' \-

-    QUERY =         '?' \-

-    STAR =          '*' \-

-    PLUS =          '+' \-

-    OPEN =          '(' \-

-    CLOSE =         ')' \-

-    DOT =           '.' \-

-    BEGIN =         '<' \-

-    END =           '>' \-

-    TILDE =         '~' \-

-    RPERCENT =      '%}' \-

-    \- =             ( space | comment )*

-    space =         ' ' | '\\t' | end\-of\-line

-    comment =       '#' ( !end\-of\-line . )* end\-of\-line

-    end\-of\-line =   '\\r\\n' | '\\n' | '\\r'

-    end\-of\-file =   !.

-.fi

-.SH CUSTOMISING THE PARSER

-The following symbols can be redefined in declaration sections to

-modify the generated parser code.

-.TP

-.B YYSTYPE

-The semantic value type.  The pseudo\-variable '$$' and the

-identifiers 'bound' to rule results with the colon operator ':' should

-all be considered as being declared to have this type.  The default

-value is 'int'.

-.TP

-.B YYPARSE

-The name of the main entry point to the parser.  The default value

-is 'yyparse'.

-.TP

-.B YYPARSEFROM

-The name of an alternative entry point to the parser.  This function

-expects one argument: the function corresponding to the rule from

-which the search for a match should begin.  The default

-is 'yyparsefrom'.  Note that yyparse() is defined as

-.nf

-    int yyparse() { return yyparsefrom(yy_foo); }

-.fi

-where 'foo' is the name of the first rule in the grammar.

-.TP

-.BI YY_INPUT( buf , \ result , \ max_size )

-This macro is invoked by the parser to obtain more input text.

-.I buf

-points to an area of memory that can hold at most

-.I max_size

-characters.  The macro should copy input text to

-.I buf

-and then assign the integer variable

-.I result

-to indicate the number of characters copied.  If no more input is available,

-the macro should assign 0 to

-.IR result .

-By default, the YY_INPUT macro is defined as follows.

-.nf

-    #define YY_INPUT(buf, result, max_size)        \\

-    {                                              \\

-      int yyc= getchar();                          \\

-      result= (EOF == yyc) ? 0 : (*(buf)= yyc, 1); \\

-    }

-.fi

-Note that if YY_CTX_LOCAL is defined (see below) then an additional

-first argument, containing the parser context, is passed to YY_INPUT.

-.TP

-.B YY_DEBUG

-If this symbols is defined then additional code will be included in

-the parser that prints vast quantities of arcane information to the

-standard error while the parser is running.

-.TP

-.B YY_BEGIN

-This macro is invoked to mark the start of input text that will be

-made available in actions as 'yytext'.  This corresponds to

-occurrences of '<' in the grammar.  These are converted into

-predicates that are expected to succeed.  The default definition

-.nf

-    #define YY_BEGIN (yybegin= yypos, 1)

-.fi

-therefore saves the current input position and returns 1 ('true') as

-the result of the predicate.

-.TP

-.B YY_END

-This macros corresponds to '>' in the grammar.  Again, it is a

-predicate so the default definition saves the input position

-before 'succeeding'.

-.nf

-    #define YY_END (yyend= yypos, 1)

-.fi

-.TP

-.BI YY_PARSE( T )

-This macro declares the parser entry points (yyparse and yyparsefrom)

-to be of type

-.IR T .

-The default definition

-.nf

-    #define YY_PARSE(T) T

-.fi

-leaves yyparse() and yyparsefrom() with global visibility.  If they

-should not be externally visible in other source files, this macro can

-be redefined to declare them 'static'.

-.nf

-    #define YY_PARSE(T) static T

-.fi

-.TP

-.B YY_CTX_LOCAL

-If this symbol is defined during compilation of a generated parser

-then global parser state will be kept in a structure of

-type 'yycontext' which can be declared as a local variable.  This

-allows multiple instances of parsers to coexist and to be thread\-safe.

-The parsing function

-.IR yyparse ()

-will be declared to expect a first argument of type 'yycontext *', an

-instance of the structure holding the global state for the parser.

-This instance must be allocated and initialised to zero by the client.

-A trivial but complete example is as follows.

-.nf

-    #include <stdio.h>

-    #define YY_CTX_LOCAL

-    #include "the\-generated\-parser.peg.c"

-    int main()

-    {

-      yycontext ctx;

-      memset(&ctx, 0, sizeof(yycontext));

-      while (yyparse(&ctx));

-      return 0;

-    }

-.fi

-Note that if this symbol is undefined then the compiled parser will

-statically allocate its global state and will be neither reentrant nor

-thread\-safe.

-Note also that the parser yycontext structure is initialised automatically

-the first time

-.IR yyparse ()

-is called; this structure

-.B must

-therefore be properly initialised to zero before the first call to

-.IR yyparse ().

-.TP

-.B YY_CTX_MEMBERS

-If YY_CTX_LOCAL is defined (see above) then the macro YY_CTX_MEMBERS

-can be defined to expand to any additional member field declarations

-that the client would like included in the declaration of

-the 'yycontext' structure type.  These additional members are

-otherwise ignored by the generated parser.  The instance

-of 'yycontext' associated with the currently\-active parser is

-available within actions as the pointer variable

-.IR yy .

-.TP

-.B YY_BUFFER_SIZE

-The initial size of the text buffer, in bytes.  The default is 1024

-and the buffer size is doubled whenever required to meet demand during

-parsing.  An application that typically parses much longer strings

-could increase this to avoid unnecessary buffer reallocation.

-.TP

-.B YY_STACK_SIZE

-The initial size of the variable and action stacks.  The default is

-128, which is doubled whenever required to meet demand during parsing.

-Applications that have deep call stacks with many local variables, or

-that perform many actions after a single successful match, could increase

-this to avoid unnecessary buffer reallocation.

-.TP

-.BI YY_MALLOC( YY , \ SIZE )

-The memory allocator for all parser\-related storage.  The parameters

-are the current yycontext structure and the number of bytes to

-allocate.  The default definition is:

-.RI malloc( SIZE )

-.TP

-.BI YY_REALLOC( YY , \ PTR , \ SIZE )

-The memory reallocator for dynamically\-grown storage (such as text

-buffers and variable stacks).  The parameters are the current

-yycontext structure, the previously\-allocated storage, and the number

-of bytes to which that storage should be grown.  The default definition is:

-.RI realloc( PTR , \ SIZE )

-.TP

-.BI YY_FREE( YY , \ PTR )

-The memory deallocator.  The parameters are the current yycontext

-structure and the storage to deallocate.  The default definition is:

-.RI free( PTR )

-.TP

-.B YYRELEASE

-The name of the function that releases all resources held by a

-yycontext structure.  The default value is 'yyrelease'.

-.PP

-The following variables can be referred to within actions.

-.TP

-.B char *yybuf

-This variable points to the parser's input buffer used to store input

-text that has not yet been matched.

-.TP

-.B int yypos

-This is the offset (in yybuf) of the next character to be matched and

-consumed.

-.TP

-.B char *yytext

-The most recent matched text delimited by '<' and '>' is stored in this variable.

-.TP

-.B int yyleng

-This variable indicates the number of characters in 'yytext'.

-.TP

-.B yycontext *yy

-This variable points to the instance of 'yycontext' associated with

-the currently\-active parser.

-.PP

-Programs that wish to release all the resources associated with a

-parser can use the following function.

-.TP

-.BI yyrelease(yycontext * yy )

-Returns all parser\-allocated storage associated with

-.I yy

-to the system.  The storage will be reallocated on the next call to

-.IR yyparse ().

-.PP

-Note that the storage for the yycontext structure itself is never

-allocated or reclaimed implicitly.  The application must allocate

-these structures in automatic storage, or use

-.IR calloc ()

-and

-.IR free ()

-to manage them explicitly.  The example in the following section

-demonstrates one approach to resource management.

-.SH LEG EXAMPLE: EXTENDING THE PARSER'S CONTEXT

-The

-.I yy

-variable passed to actions contains the state of the parser plus any

-additional fields defined by YY_CTX_MEMBERS.  Theses fields can be

-used to store application\-specific information that is global to a

-particular call of

-.IR yyparse ().

-A trivial but complete

-.I leg

-example follows in which the yycontext

-structure is extended with a

-.I count

-of the number of newline characters

-seen in the input so far (the grammar otherwise consumes and ignores

-the entire input).  The caller of

-.IR yyparse ()

-uses

-.I count

-to print the number of lines of input that were read.

-.nf

-    %{

-    #define YY_CTX_LOCAL 1

-    #define YY_CTX_MEMBERS \\

-      int count;

-    %}

-    Char    = ('\\n' | '\\r\\n' | '\\r')        { yy\->count++ }

-            | .

-    %%

-    #include <stdio.h>

-    #include <string.h>

-    int main()

-    {

-        /* create a local parser context in automatic storage */

-        yycontext yy;

-        /* the context *must* be initialised to zero before first use*/

-        memset(&yy, 0, sizeof(yy));

-        while (yyparse(&yy))

-            ;

-        printf("%d newlines\\n", yy.count);

-        /* release all resources associated with the context */

-        yyrelease(&yy);

-        return 0;

-    }

-.fi

-.SH DIAGNOSTICS

-.I peg

-and

-.I leg

-warn about the following conditions while converting a grammar into a parser.

-.TP

-.B syntax error

-The input grammar was malformed in some way.  The error message will

-include the text about to be matched (often backed up a huge amount

-from the actual location of the error) and the line number of the most

-recently considered character (which is often the real location of the

-problem).

-.TP

-.B rule 'foo' used but not defined

-The grammar referred to a rule named 'foo' but no definition for it

-was given.  Attempting to use the generated parser will likely result

-in errors from the linker due to undefined symbols associated with the

-missing rule.

-.TP

-.B rule 'foo' defined but not used

-The grammar defined a rule named 'foo' and then ignored it.  The code

-associated with the rule is included in the generated parser which

-will in all other respects be healthy.

-.TP

-.B possible infinite left recursion in rule 'foo'

-There exists at least one path through the grammar that leads from the

-rule 'foo' back to (a recursive invocation of) the same rule without

-consuming any input.

-.PP

-Left recursion, especially that found in standards documents, is

-often 'direct' and implies trivial repetition.

-.nf

-    # (6.7.6)

-    direct\-abstract\-declarator =

-        LPAREN abstract\-declarator RPAREN

-    |   direct\-abstract\-declarator? LBRACKET assign\-expr? RBRACKET

-    |   direct\-abstract\-declarator? LBRACKET STAR RBRACKET

-    |   direct\-abstract\-declarator? LPAREN param\-type\-list? RPAREN

-.fi

-The recursion can easily be eliminated by converting the parts of the

-pattern following the recursion into a repeatable suffix.

-.nf

-    # (6.7.6)

-    direct\-abstract\-declarator =

-        direct\-abstract\-declarator\-head?

-        direct\-abstract\-declarator\-tail*

-    direct\-abstract\-declarator\-head =

-        LPAREN abstract\-declarator RPAREN

-    direct\-abstract\-declarator\-tail =

-        LBRACKET assign\-expr? RBRACKET

-    |   LBRACKET STAR RBRACKET

-    |   LPAREN param\-type\-list? RPAREN

-.fi

-.SH CAVEATS

-A parser that accepts empty input will

-.I always

-succeed.  Consider the following example, not atypical of a first

-attempt to write a PEG\-based parser:

-.nf

-    Program = Expression*

-    Expression = "whatever"

-    %%

-    int main() {

-      while (yyparse())

-        puts("success!");

-      return 0;

-    }

-.fi

-This program loops forever, no matter what (if any) input is provided

-on stdin.  Many fixes are possible, the easiest being to insist that

-the parser always consumes some non\-empty input.  Changing the first

-line to

-.nf

-    Program = Expression+

-.fi

-accomplishes this.  If the parser is expected to consume the entire

-input, then explicitly requiring the end\-of\-file is also highly

-recommended:

-.nf

-    Program = Expression+ !.

-.fi

-This works because the parser will only fail to match ("!" predicate)

-any character at all ("." expression) when it attempts to read beyond

-the end of the input.

-.SH BUGS

-You have to type 'man peg' to read the manual page for

-.IR leg (1).

-.PP

-The 'yy' and 'YY' prefixes cannot be changed.

-.PP

-Left recursion is detected in the input grammar but is not handled

-correctly in the generated parser.

-.PP

-Diagnostics for errors in the input grammar are obscure and not

-particularly helpful.

-.PP

-The operators

-.BR ! \ \c

-and

-.B ~

-should really be named the other way around.

-.PP

-Several commonly\-used

-.IR lex (1)

-features (yywrap(), yyin, etc.) are completely absent.

-.PP

-The generated parser does not contain '#line' directives to direct C

-compiler errors back to the grammar description when appropriate.

-.SH SEE ALSO

-D. Val Schorre,

-.I META II, a syntax\-oriented compiler writing language,

-19th ACM National Conference, 1964, pp.\ 41.301\-\-41.311.  Describes a

-self\-implementing parser generator for analytic grammars with no

-backtracking.

-.PP

-Alexander Birman,

-.I The TMG Recognition Schema,

-Ph.D. dissertation, Princeton, 1970.  A mathematical treatment of the

-power and complexity of recursive\-descent parsing with backtracking.

-.PP

-Bryan Ford,

-.I Parsing Expression Grammars: A Recognition\-Based Syntactic Foundation,

-ACM SIGPLAN Symposium on Principles of Programming Languages, 2004.

-Defines PEGs and analyses them in relation to context\-free and regular

-grammars.  Introduces the syntax adopted in

-.IR peg .

-.PP

-The standard Unix utilities

-.IR lex (1)

-and

-.IR yacc (1)

-which influenced the syntax and features of

-.IR leg .

-.PP

-The source code for

-.I peg

-and

-.I leg

-whose grammar parsers are written using themselves.

-.PP

-The latest version of this software and documentation:

-.nf

-    http://piumarta.com/software/peg

-.fi

-.SH AUTHOR

-.IR peg ,

-.I leg

-and this manual page were written by Ian Piumarta (first\-name at

-last\-name dot com) while investigating the viability of regular and

-parsing\-expression grammars for efficiently extracting type and

-signature information from C header files.

-.PP

-Please send bug reports and suggestions for improvements to the author

-at the above address.

home: hub: minipeg