hub: minipeg

--- a/Makefile

+++ b/Makefile

@@ -13,7 +13,7 @@

 install: $(BINDIR) $(BINDIR)/leg $(MANDIR) $(MANDIR)/peg.1

 	mkdir -p $(MANDIR) $(BINDIR)

 	cp minipeg $(BINDIR)

-	cp minipeg.1 $(MANDIR)

+	cp doc/minipeg.1 $(MANDIR)

 $(MANDIR) :

 	mkdir -p $(MANDIR)

@@ -35,6 +35,9 @@

 peg-amalg.c: peg.peg minipeg-amalg

 	./minipeg-amalg -o $@ peg.peg

+docs/index.html: .FORCE

+	$(SHELL) -c '(cd docs ; sh ./index.html.sh)' > $@

 # Check the pregenerated peg.c matches the built peg-new.c.

 # We also check peg-amalg.c to test our amalgamation process.

 check-self-host: peg.c peg-new.c peg-amalg.c .FORCE

@@ -45,7 +48,7 @@

 	$(SHELL) -ec '(cd examples;  $(MAKE))'

 clean : .FORCE

-	rm -f minipeg minipeg-split $(GENSRC) $(OBJ)

+	rm -f minipeg minipeg-amalg $(GENSRC) $(OBJ)

 	$(SHELL) -ec '(cd examples;  $(MAKE) clean)'

 .FORCE :

--- a/compile.c

+++ b/compile.c

@@ -119,7 +119,7 @@

 static void label(int n)	{ fprintf(output, "\n  l%d:;\t", n); }

 static void jump(int n)		{ fprintf(output, "  goto l%d;", n); }

 static void save(int n)		{ fprintf(output, "  int yypos%d= yy->__pos, yythunkpos%d= yy->__thunkpos;", n, n); }

-static void restore(int n)	{ fprintf(output,     "  yy->__pos= yypos%d; yy->__thunkpos= yythunkpos%d;", n, n); }

+static void restore(int n)	{ fprintf(output, "  yy->__pos= yypos%d; yy->__thunkpos= yythunkpos%d;", n, n); }

 static void Node_compile_c_ko(Node *node, int ko)

--- /dev/null

+++ b/docs/index.html

@@ -1,0 +1,787 @@

+<!doctype html>

+<meta name=viewport content="width=device-width, initial-scale=1">

+<link rel="stylesheet" href="./simple.css" type="text/css" />

+<title>Minipeg</title>

+</body>

+<div class="container">

+<h1><img id="logo" src="logo.png" alt>Minipeg</h1>

+<p>

+Minipeg is a parser generator for C you can easily add

+to your project as a single file.

+</p>

+<ul>

+<li>Single file distribution <a href="">minipeg.c<a/></li>

+<li>Example <a href="">calculator</a></li>

+<li>Example <a href="https://github.com/andrewchambers/minias/blob/master/asm.peg">x86_64 assembler</a></li>

+<li><a href="">Source repository</a></li>

+</ul>

+<h2>Man Page</h2>

+<pre>

+MINIPEG(1)		    General Commands Manual		    MINIPEG(1)

+NAME

+       minipeg - parser generator

+SYNOPSIS

+       minipeg [-hvVP -ooutput] [filename ...]

+DESCRIPTION

+       minipeg is a tool for generating recursive-descent parsers: programs

+       that perform pattern matching on text.  They process a Parsing

+       Expression Grammar (PEG) [Ford 2004] to produce a program that

+       recognises legal sentences of that grammar.  minipeg processes PEGs

+       written with syntax and conventions that are intended to make it an

+       attractive replacement for parsers built with lex(1) and yacc(1).

+       Unlike lex and yacc, minipeg support unlimited backtracking, provide

+       ordered choice as a means for disambiguation, and can combine scanning

+       (lexical analysis) and parsing (syntactic analysis) into a single

+       activity.

+       minipeg reads the specified filenames, or standard input if no

+       filenames are given, for a grammar describing the parser to generate.

+       minipeg then generates a C source file that defines a function

+       yyparse().  This C source file can be included in, or compiled and then

+       linked with, a client program.  Each time the client program calls

+       yyparse() the parser consumes input text according to the parsing

+       rules, starting from the first rule in the grammar.  yyparse() returns

+       non-zero if the input could be parsed according to the grammar; it

+       returns zero if the input could not be parsed.

+       The prefix 'yy' or 'YY' is prepended to all externally-visible symbols

+       in the generated parser.	 This is intended to reduce the risk of

+       namespace pollution in client programs.	(The choice of 'yy' is

+       historical; see lex(1) and yacc(1), for example.)

+OPTIONS

+       minipeg provide the following options:

+       -h     prints a summary of available options and then exits.

+       -ooutput

+	      writes the generated parser to the file output instead of the

+	      standard output.

+       -P     suppresses #line directives in the output.

+       -v     writes verbose information to standard error while working.

+       -V     writes version information to standard error then exits.

+EXAMPLE: A CALCULATOR

+       Here we show a simple desk calculator supporting the four common

+       arithmetic operators and named variables.  The intermediate results of

+       arithmetic evaluation will be accumulated on an implicit stack by

+       returning them as semantic values from sub-rules.

+	   %{

+	   #include <stdio.h>	  /* printf() */

+	   #include <stdlib.h>	  /* atoi() */

+	   int vars[26];

+	   %}

+	   Stmt	   = - e:Expr EOL		   { printf("%d\n", e); }

+		   | ( !EOL . )* EOL		   { printf("error\n"); }

+	   Expr	   = i:ID ASSIGN s:Sum		   { $$ = vars[i] = s; }

+		   | s:Sum			   { $$ = s; }

+	   Sum	   = l:Product

+			   ( PLUS  r:Product	   { l += r; }

+			   | MINUS r:Product	   { l -= r; }

+			   )*			   { $$ = l; }

+	   Product = l:Value

+			   ( TIMES  r:Value	   { l *= r; }

+			   | DIVIDE r:Value	   { l /= r; }

+			   )*			   { $$ = l; }

+	   Value   = i:NUMBER			   { $$ = atoi(yytext); }

+		   | i:ID !ASSIGN		   { $$ = vars[i]; }

+		   | OPEN i:Expr CLOSE		   { $$ = i; }

+	   NUMBER  = < [0-9]+ >	   -		   { $$ = atoi(yytext); }

+	   ID	   = < [a-z]  >	   -		   { $$ = yytext[0] - 'a'; }

+	   ASSIGN  = '='	   -

+	   PLUS	   = '+'	   -

+	   MINUS   = '-'	   -

+	   TIMES   = '*'	   -

+	   DIVIDE  = '/'	   -

+	   OPEN	   = '('	   -

+	   CLOSE   = ')'	   -

+	   -	   = [ \t]*

+	   EOL	   = '\n' | '\r\n' | '\r' | ';'

+	   %%

+	   int main()

+	   {

+	     while (yyparse())

+	       ;

+	     return 0;

+	   }

+       If the above grammar is placed in the file calc.peg, running the

+       command

+	   $ minipeg -o calc.c calc.peg

+       will save the corresponding parser in the file calc.c.  The program can

+       then be compiled with a C compiler and run

+	   $ cc -o calc calc.c

+	   $ ./calc

+	   a=5

+	   5

+	   a+5

+	   10

+MINIPEG GRAMMARS

+       A grammar consists of a set of named rules.

+	   name = pattern

+       The pattern contains one or more of the following elements.

+       name   The element stands for the entire pattern in the rule with the

+	      given name.

+       "characters"

+	      A character or string enclosed in double quotes is matched

+	      literally.  The ANSI C escape sequences are recognised within

+	      the characters.

+       'characters'

+	      A character or string enclosed in single quotes is matched

+	      literally, as above.

+       [characters]

+	      A set of characters enclosed in square brackets matches any

+	      single character from the set, with escape characters recognised

+	      as above.	 If the set begins with an uparrow (^) then the set is

+	      negated (the element matches any character not in the set).  Any

+	      pair of characters separated with a dash (-) represents the

+	      range of characters from the first to the second, inclusive.  A

+	      single alphabetic character or underscore is matched by the

+	      following set.

+		  [a-zA-Z_]

+	      Similarly, the following matches	any single non-digit

+	      character.

+		  [^0-9]

+       .      A dot matches any character.  Note that the only time this fails

+	      is at the end of file, where there is no character to match.

+       ( pattern )

+	      Parentheses are used for grouping (modifying the precedence of

+	      the operators described below).

+       { action }

+	      Curly braces surround actions.  The action is arbitrary C source

+	      code to be executed at the end of matching.  Any braces within

+	      the action must be properly nested.  Any input text that was

+	      matched before the action and delimited by angle brackets (see

+	      below) is made available within the action as the contents of

+	      the character array yytext.  The length of (number of characters

+	      in) yytext is available in the variable yyleng.  (These variable

+	      names are historical; see lex(1).)

+       @{ action }

+	      Actions prefixed with an 'at' symbol will be performed during

+	      parsing, at the time they are encountered while matching the

+	      input text with a rule.  Because of back-tracking in the PEG

+	      parsing algorithm, actions prefixed with '@' might be performed

+	      multiple times for the same input text.  (The usual behviour of

+	      actions is that they are saved up until matching is complete,

+	      and then those that are part of the final derivation are

+	      performed in left-to-right order.)  The variable yytext is

+	      available within these actions.

+       exp ~ { action }

+	      A postfix operator ~{ action } can be placed after any

+	      expression and will behave like a normal action (arbitrary C

+	      code) except that it is invoked only when exp fails.  It binds

+	      less tightly than any other operator except alternation and

+	      sequencing, and is intended to make error handling and recovery

+	      code easier to write.  Note that yytext and yyleng are not

+	      available inside these actions, but the pointer variable yy is

+	      available to give the code access to any user-defined members of

+	      the parser state (see "CUSTOMISING THE PARSER" below).  Note

+	      also that exp is always a single expression; to invoke an error

+	      action for any failure within a sequence, parentheses must be

+	      used to group the sequence into a single expression.

+		  rule = e1 e2 e3 ~{ error("e[12] ok; e3 has failed"); }

+		       | ...

+		  rule = (e1 e2 e3) ~{ error("one of e[123] has failed"); }

+		       | ...

+       <      An opening angle bracket always matches (consuming no input) and

+	      causes the parser to begin accumulating matched text.  This text

+	      will be made available to actions in the variable yytext.

+       >      A closing angle bracket always matches (consuming no input) and

+	      causes the parser to stop accumulating text for yytext.

+       The above elements can be made optional and/or repeatable with the

+       following suffixes:

+       element ?

+	      The element is optional.	If present on the input, it is

+	      consumed and the match succeeds.	If not present on the input,

+	      no text is consumed and the match succeeds anyway.

+       element +

+	      The element is repeatable.  If present on the input, one or more

+	      occurrences of element are consumed and the match succeeds.  If

+	      no occurrences of element are present on the input, the match

+	      fails.

+       element *

+	      The element is optional and repeatable.  If present on the

+	      input, one or more occurrences of element are consumed and the

+	      match succeeds.  If no occurrences of element are present on the

+	      input, the match succeeds anyway.

+       The above elements and suffixes can be converted into predicates (that

+       match arbitrary input text and subsequently succeed or fail without

+       consuming that input) with the following prefixes:

+       & element

+	      The predicate succeeds only if element can be matched.  Input

+	      text scanned while matching element is not consumed from the

+	      input and remains available for subsequent matching.

+       ! element

+	      The predicate succeeds only if element cannot be matched.	 Input

+	      text scanned while matching element is not consumed from the

+	      input and remains available for subsequent matching.  A popular

+	      idiom is

+		  !.

+	      which matches the end of file, after the last character of the

+	      input has already been consumed.

+       A special form of the '&' predicate is provided:

+       &{ expression }

+	      In this predicate the simple C expression (not statement) is

+	      evaluated immediately when the parser reaches the predicate.  If

+	      the expression yields non-zero (true) the 'match' succeeds and

+	      the parser continues with the next element in the pattern.  If

+	      the expression yields zero (false) the 'match' fails and the

+	      parser backs up to look for an alternative parse of the input.

+       Several elements (with or without prefixes and suffixes) can be

+       combined into a sequence by writing them one after the other.  The

+       entire sequence matches only if each individual element within it

+       matches, from left to right.

+       Sequences can be separated into disjoint alternatives by the

+       alternation operator '|'.

+       sequence-1 | sequence-2 | ... | sequence-N

+	      Each sequence is tried in turn until one of them matches, at

+	      which time matching for the overall pattern succeeds.  If none

+	      of the sequences matches then the match of the overall pattern

+	      fails.

+       The following elements can appear in addition to rules.

+       %{ text... %}

+	      A declaration section can appear anywhere that a rule definition

+	      is expected.  The text between the delimiters '%{' and '%}' is

+	      copied verbatim to the generated C parser code before the code

+	      that implements the parser itself.

+       The pound sign (#) introduces a comment (discarded) that continues

+       until the end of the line.

+       %% text...

+	      A double percent '%%' terminates the rules (and declarations)

+	      section of the grammar.  All text following '%%' is copied

+	      verbatim to the generated C parser code after the parser

+	      implementation code.

+       Some notes regarding rules and and patterns follow.

+       rule-name Hyphens can appear as letters in the names of rules.  Each

+       hyphen is converted into an underscore in the generated C source code.

+       A single hyphen '-' is a legal rule name.

+       Within actions you can access and manipulate named values.

+       $$ = value

+	      A sub-rule can return a semantic value from an action by

+	      assigning it to the pseudo-variable '$$'.	 All semantic values

+	      must have the same type (which defaults to 'int').  This type

+	      can be changed by defining YYSTYPE in a declaration section.

+       identifier:name

+	      The semantic value returned (by assigning to '$$') from the

+	      sub-rule name is associated with the identifier and can be

+	      referred to in subsequent actions.

+MINIPEG GRAMMAR FOR MINIPEG GRAMMARS

+       The grammar for minipeg grammars is shown below.	 This will both

+       illustrate and formalise the above description.

+	   grammar =	   -

+			   ( declaration | definition )+

+			   trailer? end-of-file

+	   declaration =   '%{' < ( !'%}' . )* > RPERCENT

+	   trailer =	   '%%' < .* >

+	   definition =	   identifier EQUAL expression

+	   expression =	   sequence ( BAR sequence )*

+	   sequence =	   error+

+	   error =	   prefix ( TILDE action )?

+	   prefix =	   AND action

+	   |		   ( AND | NOT )? suffix

+	   suffix =	   primary ( QUERY | STAR | PLUS )?

+	   primary =	   identifier COLON identifier !EQUAL

+	   |		   identifier !EQUAL

+	   |		   OPEN expression CLOSE

+	   |		   literal

+	   |		   class

+	   |		   DOT

+	   |		   action

+	   |		   BEGIN

+	   |		   END

+	   identifier =	   < [-a-zA-Z_][-a-zA-Z_0-9]* > -

+	   literal =	   ['] < ( !['] char )* > ['] -

+	   |		   ["] < ( !["] char )* > ["] -

+	   class =	   '[' < ( !']' range )* > ']' -

+	   range =	   char '-' char | char

+	   char =	   '\\' [abefnrtv'"\[\]\\]

+	   |		   '\\' [0-3][0-7][0-7]

+	   |		   '\\' [0-7][0-7]?

+	   |		   !'\\' .

+	   action =	   '{' < braces* > '}' -

+	   braces =	   '{' braces* '}'

+	   |		   !'}' .

+	   EQUAL =	   '=' -

+	   COLON =	   ':' -

+	   BAR =	   '|' -

+	   AND =	   '&' -

+	   NOT =	   '!' -

+	   QUERY =	   '?' -

+	   STAR =	   '*' -

+	   PLUS =	   '+' -

+	   OPEN =	   '(' -

+	   CLOSE =	   ')' -

+	   DOT =	   '.' -

+	   BEGIN =	   '<' -

+	   END =	   '>' -

+	   TILDE =	   '~' -

+	   RPERCENT =	   '%}' -

+	   - =		   ( space | comment )*

+	   space =	   ' ' | '\t' | end-of-line

+	   comment =	   '#' ( !end-of-line . )* end-of-line

+	   end-of-line =   '\r\n' | '\n' | '\r'

+	   end-of-file =   !.

+CUSTOMISING THE PARSER

+       The following symbols can be redefined in declaration sections to

+       modify the generated parser code.

+       YYSTYPE

+	      The semantic value type.	The pseudo-variable '$$' and the

+	      identifiers 'bound' to rule results with the colon operator ':'

+	      should all be considered as being declared to have this type.

+	      The default value is 'int'.

+       YYPARSE

+	      The name of the main entry point to the parser.  The default

+	      value is 'yyparse'.

+       YYPARSEFROM

+	      The name of an alternative entry point to the parser.  This

+	      function expects one argument: the function corresponding to the

+	      rule from which the search for a match should begin.  The

+	      default is 'yyparsefrom'.	 Note that yyparse() is defined as

+		  int yyparse() { return yyparsefrom(yy_foo); }

+	      where 'foo' is the name of the first rule in the grammar.

+       YY_INPUT(buf, result, max_size)

+	      This macro is invoked by the parser to obtain more input text.

+	      buf points to an area of memory that can hold at most max_size

+	      characters.  The macro should copy input text to buf and then

+	      assign the integer variable result to indicate the number of

+	      characters copied.  If no more input is available, the macro

+	      should assign 0 to result.  By default, the YY_INPUT macro is

+	      defined as follows.

+		  #define YY_INPUT(buf, result, max_size)	 \

+		  {						 \

+		    int yyc= getchar();				 \

+		    result= (EOF == yyc) ? 0 : (*(buf)= yyc, 1); \

+		  }

+	      Note that if YY_CTX_LOCAL is defined (see below) then an

+	      additional first argument, containing the parser context, is

+	      passed to YY_INPUT.

+       YY_DEBUG

+	      If this symbols is defined then additional code will be included

+	      in the parser that prints vast quantities of arcane information

+	      to the standard error while the parser is running.

+       YY_BEGIN

+	      This macro is invoked to mark the start of input text that will

+	      be made available in actions as 'yytext'.	 This corresponds to

+	      occurrences of '<' in the grammar.  These are converted into

+	      predicates that are expected to succeed.	The default definition

+		  #define YY_BEGIN (yybegin= yypos, 1)

+	      therefore saves the current input position and returns 1

+	      ('true') as the result of the predicate.

+       YY_END This macros corresponds to '>' in the grammar.  Again, it is a

+	      predicate so the default definition saves the input position

+	      before 'succeeding'.

+		  #define YY_END (yyend= yypos, 1)

+       YY_PARSE(T)

+	      This macro declares the parser entry points (yyparse and

+	      yyparsefrom) to be of type T.  The default definition

+		  #define YY_PARSE(T) T

+	      leaves yyparse() and yyparsefrom() with global visibility.  If

+	      they should not be externally visible in other source files,

+	      this macro can be redefined to declare them 'static'.

+		  #define YY_PARSE(T) static T

+       YY_CTX_LOCAL

+	      If this symbol is defined during compilation of a generated

+	      parser then global parser state will be kept in a structure of

+	      type 'yycontext' which can be declared as a local variable.

+	      This allows multiple instances of parsers to coexist and to be

+	      thread-safe.  The parsing function yyparse() will be declared to

+	      expect a first argument of type 'yycontext *', an instance of

+	      the structure holding the global state for the parser.  This

+	      instance must be allocated and initialised to zero by the

+	      client.  A trivial but complete example is as follows.

+		  #include <stdio.h>

+		  #define YY_CTX_LOCAL

+		  #include "the-generated-parser.peg.c"

+		  int main()

+		  {

+		    yycontext ctx;

+		    memset(&ctx, 0, sizeof(yycontext));

+		    while (yyparse(&ctx));

+		    return 0;

+		  }

+	      Note that if this symbol is undefined then the compiled parser

+	      will statically allocate its global state and will be neither

+	      reentrant nor thread-safe.  Note also that the parser yycontext

+	      structure is initialised automatically the first time yyparse()

+	      is called; this structure must therefore be properly initialised

+	      to zero before the first call to yyparse().

+       YY_CTX_MEMBERS

+	      If YY_CTX_LOCAL is defined (see above) then the macro

+	      YY_CTX_MEMBERS can be defined to expand to any additional member

+	      field declarations that the client would like included in the

+	      declaration of the 'yycontext' structure type.  These additional

+	      members are otherwise ignored by the generated parser.  The

+	      instance of 'yycontext' associated with the currently-active

+	      parser is available within actions as the pointer variable yy.

+       YY_BUFFER_SIZE

+	      The initial size of the text buffer, in bytes.  The default is

+	      1024 and the buffer size is doubled whenever required to meet

+	      demand during parsing.  An application that typically parses

+	      much longer strings could increase this to avoid unnecessary

+	      buffer reallocation.

+       YY_STACK_SIZE

+	      The initial size of the variable and action stacks.  The default

+	      is 128, which is doubled whenever required to meet demand during

+	      parsing.	Applications that have deep call stacks with many

+	      local variables, or that perform many actions after a single

+	      successful match, could increase this to avoid unnecessary

+	      buffer reallocation.

+       YY_MALLOC(YY, SIZE)

+	      The memory allocator for all parser-related storage.  The

+	      parameters are the current yycontext structure and the number of

+	      bytes to allocate.  The default definition is: malloc(SIZE)

+       YY_REALLOC(YY, PTR, SIZE)

+	      The memory reallocator for dynamically-grown storage (such as

+	      text buffers and variable stacks).  The parameters are the

+	      current yycontext structure, the previously-allocated storage,

+	      and the number of bytes to which that storage should be grown.

+	      The default definition is: realloc(PTR, SIZE)

+       YY_FREE(YY, PTR)

+	      The memory deallocator.  The parameters are the current

+	      yycontext structure and the storage to deallocate.  The default

+	      definition is: free(PTR)

+       YYRELEASE

+	      The name of the function that releases all resources held by a

+	      yycontext structure.  The default value is 'yyrelease'.

+       The following variables can be referred to within actions.

+       char *yybuf

+	      This variable points to the parser's input buffer used to store

+	      input text that has not yet been matched.

+       int yypos

+	      This is the offset (in yybuf) of the next character to be

+	      matched and consumed.

+       char *yytext

+	      The most recent matched text delimited by '<' and '>' is stored

+	      in this variable.

+       int yyleng

+	      This variable indicates the number of characters in 'yytext'.

+       yycontext *yy

+	      This variable points to the instance of 'yycontext' associated

+	      with the currently-active parser.

+       Programs that wish to release all the resources associated with a

+       parser can use the following function.

+       yyrelease(yycontext*yy)

+	      Returns all parser-allocated storage associated with yy to the

+	      system.  The storage will be reallocated on the next call to

+	      yyparse().

+       Note that the storage for the yycontext structure itself is never

+       allocated or reclaimed implicitly.  The application must allocate these

+       structures in automatic storage, or use calloc() and free() to manage

+       them explicitly.	 The example in the following section demonstrates one

+       approach to resource management.

+EXAMPLE: EXTENDING THE PARSER'S CONTEXT

+       The yy variable passed to actions contains the state of the parser plus

+       any additional fields defined by YY_CTX_MEMBERS.	 Theses fields can be

+       used to store application-specific information that is global to a

+       particular call of yyparse().  A trivial but complete leg example

+       follows in which the yycontext structure is extended with a count of

+       the number of newline characters seen in the input so far (the grammar

+       otherwise consumes and ignores the entire input).  The caller of

+       yyparse() uses count to print the number of lines of input that were

+       read.

+	   %{

+	   #define YY_CTX_LOCAL 1

+	   #define YY_CTX_MEMBERS \

+	     int count;

+	   %}

+	   Char	   = ('\n' | '\r\n' | '\r')	   { yy->count++ }

+		   | .

+	   %%

+	   #include <stdio.h>

+	   #include <string.h>

+	   int main()

+	   {

+	       /* create a local parser context in automatic storage */

+	       yycontext yy;

+	       /* the context *must* be initialised to zero before first use*/

+	       memset(&yy, 0, sizeof(yy));

+	       while (yyparse(&yy))

+		   ;

+	       printf("%d newlines\n", yy.count);

+	       /* release all resources associated with the context */

+	       yyrelease(&yy);

+	       return 0;

+	   }

+DIAGNOSTICS

+       minipeg warns about the following conditions while converting a grammar

+       into a parser.

+       syntax error

+	      The input grammar was malformed in some way.  The error message

+	      will include the text about to be matched (often backed up a

+	      huge amount from the actual location of the error) and the line

+	      number of the most recently considered character (which is often

+	      the real location of the problem).

+       rule 'foo' used but not defined

+	      The grammar referred to a rule named 'foo' but no definition for

+	      it was given.  Attempting to use the generated parser will

+	      likely result in errors from the linker due to undefined symbols

+	      associated with the missing rule.

+       rule 'foo' defined but not used

+	      The grammar defined a rule named 'foo' and then ignored it.  The

+	      code associated with the rule is included in the generated

+	      parser which will in all other respects be healthy.

+       possible infinite left recursion in rule 'foo'

+	      There exists at least one path through the grammar that leads

+	      from the rule 'foo' back to (a recursive invocation of) the same

+	      rule without consuming any input.

+       Left recursion, especially that found in standards documents, is often

+       'direct' and implies trivial repetition.

+	   # (6.7.6)

+	   direct-abstract-declarator =

+	       LPAREN abstract-declarator RPAREN

+	   |   direct-abstract-declarator? LBRACKET assign-expr? RBRACKET

+	   |   direct-abstract-declarator? LBRACKET STAR RBRACKET

+	   |   direct-abstract-declarator? LPAREN param-type-list? RPAREN

+       The recursion can easily be eliminated by converting the parts of the

+       pattern following the recursion into a repeatable suffix.

+	   # (6.7.6)

+	   direct-abstract-declarator =

+	       direct-abstract-declarator-head?

+	       direct-abstract-declarator-tail*

+	   direct-abstract-declarator-head =

+	       LPAREN abstract-declarator RPAREN

+	   direct-abstract-declarator-tail =

+	       LBRACKET assign-expr? RBRACKET

+	   |   LBRACKET STAR RBRACKET

+	   |   LPAREN param-type-list? RPAREN

+CAVEATS

+       A parser that accepts empty input will always succeed.  Consider the

+       following example, not atypical of a first attempt to write a PEG-based

+       parser:

+	   Program = Expression*

+	   Expression = "whatever"

+	   %%

+	   int main() {

+	     while (yyparse())

+	       puts("success!");

+	     return 0;

+	   }

+       This program loops forever, no matter what (if any) input is provided

+       on stdin.  Many fixes are possible, the easiest being to insist that

+       the parser always consumes some non-empty input.	 Changing the first

+       line to

+	   Program = Expression+

+       accomplishes this.  If the parser is expected to consume the entire

+       input, then explicitly requiring the end-of-file is also highly

+       recommended:

+	   Program = Expression+ !.

+       This works because the parser will only fail to match ("!" predicate)

+       any character at all ("." expression) when it attempts to read beyond

+       the end of the input.

+BUGS

+       The 'yy' and 'YY' prefixes cannot be changed.

+       Left recursion is detected in the input grammar but is not handled

+       correctly in the generated parser.

+       Diagnostics for errors in the input grammar are obscure and not

+       particularly helpful.

+       The operators ! and ~ should really be named the other way around.

+       Several commonly-used lex(1) features (yywrap(), yyin, etc.) are

+       completely absent.

+       The generated parser does not contain '#line' directives to direct C

+       compiler errors back to the grammar description when appropriate.

+SEE ALSO

+       D. Val Schorre, META II, a syntax-oriented compiler writing language,

+       19th ACM National Conference, 1964, pp. 41.301--41.311.	Describes a

+       self-implementing parser generator for analytic grammars with no

+       backtracking.

+       Alexander Birman, The TMG Recognition Schema, Ph.D. dissertation,

+       Princeton, 1970.	 A mathematical treatment of the power and complexity

+       of recursive-descent parsing with backtracking.

+       Bryan Ford, Parsing Expression Grammars: A Recognition-Based Syntactic

+       Foundation, ACM SIGPLAN Symposium on Principles of Programming

+       Languages, 2004.	 Defines PEGs and analyses them in relation to

+       context-free and regular grammars.  Introduces the syntax adopted in

+       peg.

+       The standard Unix utilities lex(1) and yacc(1) which influenced the

+       syntax and features of minipeg.

+       The source code for minipeg whose grammar parsers are written using

+       themselves.

+       The latest version of this software and documentation:

+	   https://github.com/andrewchambers/minipeg

+AUTHOR

+       minipeg and this manual were originally written by Ian Piumarta under

+       the project name peg/leg.  minipeg is a fork of peg/leg by Andrew

+       Chambers.

+       Please send bug reports and suggestions for improvements to the author

+       at the project address.

+								    MINIPEG(1)

+</pre>

+</div>

+</body>

--- /dev/null

+++ b/docs/index.html.sh

@@ -1,0 +1,32 @@

+#!/bin/sh

+set -eu

+cat <<EOF

+<!doctype html>

+<meta name=viewport content="width=device-width, initial-scale=1">

+<link rel="stylesheet" href="./simple.css" type="text/css" />

+<title>Minipeg</title>

+</body>

+<div class="container">

+<h1><img id="logo" src="logo.png" alt>Minipeg</h1>

+<p>

+Minipeg is a parser generator for C you can easily add

+to your project as a single file.

+</p>

+<ul>

+<li>Single file distribution <a href="">minipeg.c<a/></li>

+<li>Example <a href="">calculator</a></li>

+<li>Example <a href="https://github.com/andrewchambers/minias/blob/master/asm.peg">x86_64 assembler</a></li>

+<li><a href="">Source repository</a></li>

+</ul>

+<h2>Man Page</h2>

+EOF

+echo "<pre>"

+mandoc -Tascii -Ofragment ./minipeg.1 | col -b

+echo "</pre>"

+cat <<EOF

+</div>

+</body>

+EOF

binary files /dev/null b/docs/logo.png differ

--- /dev/null

+++ b/docs/minipeg.1

@@ -1,0 +1,915 @@

+.TH MINIPEG 1

+.SH NAME

+minipeg \- parser generator

+.SH SYNOPSIS

+.B minipeg

+.B [\-hvVP \-ooutput]

+.I [filename ...]

+.SH DESCRIPTION

+.I minipeg

+is a tool for generating recursive\-descent parsers: programs that

+perform pattern matching on text.  They process a Parsing Expression

+Grammar (PEG) [Ford 2004] to produce a program that recognises legal

+sentences of that grammar.

+.I minipeg

+processes PEGs written with syntax and conventions

+that are intended to make it an attractive replacement for parsers

+built with

+.IR lex (1)

+and

+.IR yacc (1).

+Unlike

+.I lex

+and

+.IR yacc ,

+.I minipeg

+support unlimited backtracking, provide ordered choice as a means for

+disambiguation, and can combine scanning (lexical analysis) and

+parsing (syntactic analysis) into a single activity.

+.PP

+.I minipeg

+reads the specified

+.IR filename s,

+or standard input if no

+.IR filename s

+are given, for a grammar describing the parser to generate.

+.I minipeg

+then generates a C source file that defines a function

+.IR yyparse().

+This C source file can be included in, or compiled and then linked

+with, a client program.  Each time the client program calls

+.IR yyparse ()

+the parser consumes input text according to the parsing rules,

+starting from the first rule in the grammar.

+.IR yyparse ()

+returns non\-zero if the input could be parsed according to the

+grammar; it returns zero if the input could not be parsed.

+.PP

+The prefix 'yy' or 'YY' is prepended to all externally\-visible symbols

+in the generated parser.  This is intended to reduce the risk of

+namespace pollution in client programs.  (The choice of 'yy' is

+historical; see

+.IR lex (1)

+and

+.IR yacc (1),

+for example.)

+.SH OPTIONS

+.I minipeg

+provide the following options:

+.TP

+.B \-h

+prints a summary of available options and then exits.

+.TP

+.B \-ooutput

+writes the generated parser to the file

+.B output

+instead of the standard output.

+.TP

+.B \-P

+suppresses #line directives in the output.

+.TP

+.B \-v

+writes verbose information to standard error while working.

+.TP

+.B \-V

+writes version information to standard error then exits.

+.SH EXAMPLE: A CALCULATOR

+Here we show a simple desk calculator supporting the four common arithmetic

+operators and named variables.  The intermediate results of arithmetic

+evaluation will be accumulated on an implicit stack by returning them

+as semantic values from sub\-rules.

+.nf

+    %{

+    #include <stdio.h>     /* printf() */

+    #include <stdlib.h>    /* atoi() */

+    int vars[26];

+    %}

+    Stmt    = \- e:Expr EOL                  { printf("%d\\n", e); }

+            | ( !EOL . )* EOL               { printf("error\\n"); }

+    Expr    = i:ID ASSIGN s:Sum             { $$ = vars[i] = s; }

+            | s:Sum                         { $$ = s; }

+    Sum     = l:Product

+                    ( PLUS  r:Product       { l += r; }

+                    | MINUS r:Product       { l \-= r; }

+                    )*                      { $$ = l; }

+    Product = l:Value

+                    ( TIMES  r:Value        { l *= r; }

+                    | DIVIDE r:Value        { l /= r; }

+                    )*                      { $$ = l; }

+    Value   = i:NUMBER                      { $$ = atoi(yytext); }

+            | i:ID !ASSIGN                  { $$ = vars[i]; }

+            | OPEN i:Expr CLOSE             { $$ = i; }

+    NUMBER  = < [0\-9]+ >    \-               { $$ = atoi(yytext); }

+    ID      = < [a\-z]  >    \-               { $$ = yytext[0] \- 'a'; }

+    ASSIGN  = '='           \-

+    PLUS    = '+'           \-

+    MINUS   = '\-'           \-

+    TIMES   = '*'           \-

+    DIVIDE  = '/'           \-

+    OPEN    = '('           \-

+    CLOSE   = ')'           \-

+    \-       = [ \\t]*

+    EOL     = '\\n' | '\\r\\n' | '\\r' | ';'

+    %%

+    int main()

+    {

+      while (yyparse())

+        ;

+      return 0;

+    }

+.fi

+.PP

+If the above grammar is placed in the file

+.BR calc.peg ,

+running the command

+.nf

+    $ minipeg \-o calc.c calc.peg

+.fi

+will save the corresponding parser in the file

+.BR calc.c .

+The program can then be compiled with a C compiler and run

+.nf

+    $ cc \-o calc calc.c

+    $ ./calc

+    a=5

+    5

+    a+5

+    10

+.fi

+.SH MINIPEG GRAMMARS

+A grammar consists of a set of named rules.

+.nf

+    name = pattern

+.fi

+The

+.B pattern

+contains one or more of the following elements.

+.TP

+.B name

+The element stands for the entire pattern in the rule with the given

+.BR name .

+.TP

+.BR \(dq characters \(dq

+A character or string enclosed in double quotes is matched literally.

+The ANSI C escape sequences are recognised within the

+.IR characters .

+.TP

+.BR ' characters '

+A character or string enclosed in single quotes is matched literally, as above.

+.TP

+.BR [ characters ]

+A set of characters enclosed in square brackets matches any single

+character from the set, with escape characters recognised as above.

+If the set begins with an uparrow (^) then the set is negated (the

+element matches any character

+.I not

+in the set).  Any pair of characters separated with a dash (\-)

+represents the range of characters from the first to the second,

+inclusive.  A single alphabetic character or underscore is matched by

+the following set.

+.nf

+    [a\-zA\-Z_]

+.fi

+Similarly, the following matches  any single non\-digit character.

+.nf

+    [^0\-9]

+.fi

+.TP

+.B .

+A dot matches any character.  Note that the only time this fails is at

+the end of file, where there is no character to match.

+.TP

+.BR ( \ pattern\  )

+Parentheses are used for grouping (modifying the precedence of the

+operators described below).

+.TP

+.BR { \ action\  }

+Curly braces surround actions.  The action is arbitrary C source code

+to be executed at the end of matching.  Any braces within the action

+must be properly nested.  Any input text that was matched before the

+action and delimited by angle brackets (see below) is made available

+within the action as the contents of the character array

+.IR yytext .

+The length of (number of characters in)

+.I yytext

+is available in the variable

+.IR yyleng .

+(These variable names are historical; see

+.IR lex (1).)

+.TP

+.IB @{\ action\ }

+Actions prefixed with an 'at' symbol will be performed during parsing,

+at the time they are encountered while matching the input text with a

+rule.

+Because of back-tracking in the PEG parsing algorithm, actions

+prefixed with '@' might be performed multiple times for the same input

+text.

+(The usual behviour of actions is that they are saved up until

+matching is complete, and then those that are part of the

+final derivation are performed in left-to-right order.)

+The variable

+.I yytext

+is available within these actions.

+.TP

+.IB exp \ ~ \ {\ action\ }

+A postfix operator

+.BI ~ {\ action\ }

+can be placed after any expression and will behave like a normal

+action (arbitrary C code) except that it is invoked only when

+.I exp

+fails.  It binds less tightly than any other operator except alternation and sequencing, and

+is intended to make error handling and recovery code easier to write.

+Note that

+.I yytext

+and

+.I yyleng

+are not available inside these actions, but the pointer variable

+.I yy

+is available to give the code access to any user\-defined members

+of the parser state (see "CUSTOMISING THE PARSER" below).

+Note also that

+.I exp

+is always a single expression; to invoke an error action for any

+failure within a sequence, parentheses must be used to group the

+sequence into a single expression.

+.nf

+    rule = e1 e2 e3 ~{ error("e[12] ok; e3 has failed"); }

+         | ...

+    rule = (e1 e2 e3) ~{ error("one of e[123] has failed"); }

+         | ...

+.fi

+.TP

+.B <

+An opening angle bracket always matches (consuming no input) and

+causes the parser to begin accumulating matched text.  This text will

+be made available to actions in the variable

+.IR yytext .

+.TP

+.B >

+A closing angle bracket always matches (consuming no input) and causes

+the parser to stop accumulating text for

+.IR yytext .

+.PP

+The above

+.IR element s

+can be made optional and/or repeatable with the following suffixes:

+.TP

+.RB element\  ?

+The element is optional.  If present on the input, it is consumed and

+the match succeeds.  If not present on the input, no text is consumed

+and the match succeeds anyway.

+.TP

+.RB element\  +

+The element is repeatable.  If present on the input, one or more

+occurrences of

+.I element

+are consumed and the match succeeds.  If no occurrences of

+.I element

+are present on the input, the match fails.

+.TP

+.RB element\  *

+The element is optional and repeatable.  If present on the input, one or more

+occurrences of

+.I element

+are consumed and the match succeeds.  If no occurrences of

+.I element

+are present on the input, the match succeeds anyway.

+.PP

+The above elements and suffixes can be converted into predicates (that

+match arbitrary input text and subsequently succeed or fail

+.I without

+consuming that input) with the following prefixes:

+.TP

+.BR & \ element

+The predicate succeeds only if

+.I element

+can be matched.  Input text scanned while matching

+.I element

+is not consumed from the input and remains available for subsequent

+matching.

+.TP

+.BR ! \ element

+The predicate succeeds only if

+.I element

+cannot be matched.  Input text scanned while matching

+.I element

+is not consumed from the input and remains available for subsequent

+matching.  A popular idiom is

+.nf

+    !.

+.fi

+which matches the end of file, after the last character of the input

+has already been consumed.

+.PP

+A special form of the '&' predicate is provided:

+.TP

+.BR & {\ expression\ }

+In this predicate the simple C

+.I expression

+.RB ( not

+statement) is evaluated immediately when the parser reaches the

+predicate.  If the

+.I expression

+yields non\-zero (true) the 'match' succeeds and the parser continues

+with the next element in the pattern.  If the

+.I expression

+yields zero (false) the 'match' fails and the parser backs up to look

+for an alternative parse of the input.

+.PP

+Several elements (with or without prefixes and suffixes) can be

+combined into a

+.I sequence

+by writing them one after the other.  The entire sequence matches only

+if each individual element within it matches, from left to right.

+.PP

+Sequences can be separated into disjoint alternatives by the

+alternation operator '|'.

+.TP

+.RB sequence\-1\  | \ sequence\-2\  | \ ...\  | \ sequence\-N

+Each sequence is tried in turn until one of them matches, at which

+time matching for the overall pattern succeeds.  If none of the

+sequences matches then the match of the overall pattern fails.

+.PP

+The following elements can appear in addition to rules.

+.TP

+.BI %{\  text... \ %}

+A declaration section can appear anywhere that a rule definition is

+expected.  The

+.I text

+between the delimiters '%{' and '%}' is copied verbatim to the

+generated C parser code

+.I before

+the code that implements the parser itself.

+.PP

+The pound sign (#) introduces a comment (discarded) that

+continues until the end of the line.

+.TP

+.BI %% \ text...

+A double percent '%%' terminates the rules (and declarations) section of

+the grammar.  All

+.I text

+following '%%' is copied verbatim to the generated C parser code

+.I after

+the parser implementation code.

+.PP

+Some notes regarding rules and and patterns follow.

+.PP

+.B rule\-name

+Hyphens can appear as letters in the names of rules.  Each hyphen is

+converted into an underscore in the generated C source code.  A

+single hyphen '\-' is a legal rule name.

+.PP

+Within actions you can access and manipulate named values.

+.TP

+.BI $$\ = \ value

+A sub\-rule can return a semantic

+.I value

+from an action by assigning it to the pseudo\-variable '$$'.  All

+semantic values must have the same type (which defaults to 'int').

+This type can be changed by defining YYSTYPE in a declaration section.

+.TP

+.IB identifier : name

+The semantic value returned (by assigning to '$$') from the sub\-rule

+.I name

+is associated with the

+.I identifier

+and can be referred to in subsequent actions.

+.SH MINIPEG GRAMMAR FOR MINIPEG GRAMMARS

+The grammar for

+.I minipeg

+grammars is shown below.  This will both illustrate and formalise the

+above description.

+.nf

+    grammar =       \-

+                    ( declaration | definition )+

+                    trailer? end\-of\-file

+    declaration =   '%{' < ( !'%}' . )* > RPERCENT

+    trailer =       '%%' < .* >

+    definition =    identifier EQUAL expression

+    expression =    sequence ( BAR sequence )*

+    sequence =      error+

+    error =         prefix ( TILDE action )?

+    prefix =        AND action

+    |               ( AND | NOT )? suffix

+    suffix =        primary ( QUERY | STAR | PLUS )?

+    primary =       identifier COLON identifier !EQUAL

+    |               identifier !EQUAL

+    |               OPEN expression CLOSE

+    |               literal

+    |               class

+    |               DOT

+    |               action

+    |               BEGIN

+    |               END

+    identifier =    < [\-a\-zA\-Z_][\-a\-zA\-Z_0\-9]* > \-

+    literal =       ['] < ( !['] char )* > ['] \-

+    |               ["] < ( !["] char )* > ["] \-

+    class =         '[' < ( !']' range )* > ']' \-

+    range =         char '\-' char | char

+    char =          '\\\\' [abefnrtv'"\\[\\]\\\\]

+    |               '\\\\' [0\-3][0\-7][0\-7]

+    |               '\\\\' [0\-7][0\-7]?

+    |               !'\\\\' .

+    action =        '{' < braces* > '}' \-

+    braces =        '{' braces* '}'

+    |               !'}' .

+    EQUAL =         '=' \-

+    COLON =         ':' \-

+    BAR =           '|' \-

+    AND =           '&' \-

+    NOT =           '!' \-

+    QUERY =         '?' \-

+    STAR =          '*' \-

+    PLUS =          '+' \-

+    OPEN =          '(' \-

+    CLOSE =         ')' \-

+    DOT =           '.' \-

+    BEGIN =         '<' \-

+    END =           '>' \-

+    TILDE =         '~' \-

+    RPERCENT =      '%}' \-

+    \- =             ( space | comment )*

+    space =         ' ' | '\\t' | end\-of\-line

+    comment =       '#' ( !end\-of\-line . )* end\-of\-line

+    end\-of\-line =   '\\r\\n' | '\\n' | '\\r'

+    end\-of\-file =   !.

+.fi

+.SH CUSTOMISING THE PARSER

+The following symbols can be redefined in declaration sections to

+modify the generated parser code.

+.TP

+.B YYSTYPE

+The semantic value type.  The pseudo\-variable '$$' and the

+identifiers 'bound' to rule results with the colon operator ':' should

+all be considered as being declared to have this type.  The default

+value is 'int'.

+.TP

+.B YYPARSE

+The name of the main entry point to the parser.  The default value

+is 'yyparse'.

+.TP

+.B YYPARSEFROM

+The name of an alternative entry point to the parser.  This function

+expects one argument: the function corresponding to the rule from

+which the search for a match should begin.  The default

+is 'yyparsefrom'.  Note that yyparse() is defined as

+.nf

+    int yyparse() { return yyparsefrom(yy_foo); }

+.fi

+where 'foo' is the name of the first rule in the grammar.

+.TP

+.BI YY_INPUT( buf , \ result , \ max_size )

+This macro is invoked by the parser to obtain more input text.

+.I buf

+points to an area of memory that can hold at most

+.I max_size

+characters.  The macro should copy input text to

+.I buf

+and then assign the integer variable

+.I result

+to indicate the number of characters copied.  If no more input is available,

+the macro should assign 0 to

+.IR result .

+By default, the YY_INPUT macro is defined as follows.

+.nf

+    #define YY_INPUT(buf, result, max_size)        \\

+    {                                              \\

+      int yyc= getchar();                          \\

+      result= (EOF == yyc) ? 0 : (*(buf)= yyc, 1); \\

+    }

+.fi

+Note that if YY_CTX_LOCAL is defined (see below) then an additional

+first argument, containing the parser context, is passed to YY_INPUT.

+.TP

+.B YY_DEBUG

+If this symbols is defined then additional code will be included in

+the parser that prints vast quantities of arcane information to the

+standard error while the parser is running.

+.TP

+.B YY_BEGIN

+This macro is invoked to mark the start of input text that will be

+made available in actions as 'yytext'.  This corresponds to

+occurrences of '<' in the grammar.  These are converted into

+predicates that are expected to succeed.  The default definition

+.nf

+    #define YY_BEGIN (yybegin= yypos, 1)

+.fi

+therefore saves the current input position and returns 1 ('true') as

+the result of the predicate.

+.TP

+.B YY_END

+This macros corresponds to '>' in the grammar.  Again, it is a

+predicate so the default definition saves the input position

+before 'succeeding'.

+.nf

+    #define YY_END (yyend= yypos, 1)

+.fi

+.TP

+.BI YY_PARSE( T )

+This macro declares the parser entry points (yyparse and yyparsefrom)

+to be of type

+.IR T .

+The default definition

+.nf

+    #define YY_PARSE(T) T

+.fi

+leaves yyparse() and yyparsefrom() with global visibility.  If they

+should not be externally visible in other source files, this macro can

+be redefined to declare them 'static'.

+.nf

+    #define YY_PARSE(T) static T

+.fi

+.TP

+.B YY_CTX_LOCAL

+If this symbol is defined during compilation of a generated parser

+then global parser state will be kept in a structure of

+type 'yycontext' which can be declared as a local variable.  This

+allows multiple instances of parsers to coexist and to be thread\-safe.

+The parsing function

+.IR yyparse ()

+will be declared to expect a first argument of type 'yycontext *', an

+instance of the structure holding the global state for the parser.

+This instance must be allocated and initialised to zero by the client.

+A trivial but complete example is as follows.

+.nf

+    #include <stdio.h>

+    #define YY_CTX_LOCAL

+    #include "the\-generated\-parser.peg.c"

+    int main()

+    {

+      yycontext ctx;

+      memset(&ctx, 0, sizeof(yycontext));

+      while (yyparse(&ctx));

+      return 0;

+    }

+.fi

+Note that if this symbol is undefined then the compiled parser will

+statically allocate its global state and will be neither reentrant nor

+thread\-safe.

+Note also that the parser yycontext structure is initialised automatically

+the first time

+.IR yyparse ()

+is called; this structure

+.B must

+therefore be properly initialised to zero before the first call to

+.IR yyparse ().

+.TP

+.B YY_CTX_MEMBERS

+If YY_CTX_LOCAL is defined (see above) then the macro YY_CTX_MEMBERS

+can be defined to expand to any additional member field declarations

+that the client would like included in the declaration of

+the 'yycontext' structure type.  These additional members are

+otherwise ignored by the generated parser.  The instance

+of 'yycontext' associated with the currently\-active parser is

+available within actions as the pointer variable

+.IR yy .

+.TP

+.B YY_BUFFER_SIZE

+The initial size of the text buffer, in bytes.  The default is 1024

+and the buffer size is doubled whenever required to meet demand during

+parsing.  An application that typically parses much longer strings

+could increase this to avoid unnecessary buffer reallocation.

+.TP

+.B YY_STACK_SIZE

+The initial size of the variable and action stacks.  The default is

+128, which is doubled whenever required to meet demand during parsing.

+Applications that have deep call stacks with many local variables, or

+that perform many actions after a single successful match, could increase

+this to avoid unnecessary buffer reallocation.

+.TP

+.BI YY_MALLOC( YY , \ SIZE )

+The memory allocator for all parser\-related storage.  The parameters

+are the current yycontext structure and the number of bytes to

+allocate.  The default definition is:

+.RI malloc( SIZE )

+.TP

+.BI YY_REALLOC( YY , \ PTR , \ SIZE )

+The memory reallocator for dynamically\-grown storage (such as text

+buffers and variable stacks).  The parameters are the current

+yycontext structure, the previously\-allocated storage, and the number

+of bytes to which that storage should be grown.  The default definition is:

+.RI realloc( PTR , \ SIZE )

+.TP

+.BI YY_FREE( YY , \ PTR )

+The memory deallocator.  The parameters are the current yycontext

+structure and the storage to deallocate.  The default definition is:

+.RI free( PTR )

+.TP

+.B YYRELEASE

+The name of the function that releases all resources held by a

+yycontext structure.  The default value is 'yyrelease'.

+.PP

+The following variables can be referred to within actions.

+.TP

+.B char *yybuf

+This variable points to the parser's input buffer used to store input

+text that has not yet been matched.

+.TP

+.B int yypos

+This is the offset (in yybuf) of the next character to be matched and

+consumed.

+.TP

+.B char *yytext

+The most recent matched text delimited by '<' and '>' is stored in this variable.

+.TP

+.B int yyleng

+This variable indicates the number of characters in 'yytext'.

+.TP

+.B yycontext *yy

+This variable points to the instance of 'yycontext' associated with

+the currently\-active parser.

+.PP

+Programs that wish to release all the resources associated with a

+parser can use the following function.

+.TP

+.BI yyrelease(yycontext * yy )

+Returns all parser\-allocated storage associated with

+.I yy

+to the system.  The storage will be reallocated on the next call to

+.IR yyparse ().

+.PP

+Note that the storage for the yycontext structure itself is never

+allocated or reclaimed implicitly.  The application must allocate

+these structures in automatic storage, or use

+.IR calloc ()

+and

+.IR free ()

+to manage them explicitly.  The example in the following section

+demonstrates one approach to resource management.

+.SH EXAMPLE: EXTENDING THE PARSER'S CONTEXT

+The

+.I yy

+variable passed to actions contains the state of the parser plus any

+additional fields defined by YY_CTX_MEMBERS.  Theses fields can be

+used to store application\-specific information that is global to a

+particular call of

+.IR yyparse ().

+A trivial but complete

+.I leg

+example follows in which the yycontext

+structure is extended with a

+.I count

+of the number of newline characters

+seen in the input so far (the grammar otherwise consumes and ignores

+the entire input).  The caller of

+.IR yyparse ()

+uses

+.I count

+to print the number of lines of input that were read.

+.nf

+    %{

+    #define YY_CTX_LOCAL 1

+    #define YY_CTX_MEMBERS \\

+      int count;

+    %}

+    Char    = ('\\n' | '\\r\\n' | '\\r')        { yy\->count++ }

+            | .

+    %%

+    #include <stdio.h>

+    #include <string.h>

+    int main()

+    {

+        /* create a local parser context in automatic storage */

+        yycontext yy;

+        /* the context *must* be initialised to zero before first use*/

+        memset(&yy, 0, sizeof(yy));

+        while (yyparse(&yy))

+            ;

+        printf("%d newlines\\n", yy.count);

+        /* release all resources associated with the context */

+        yyrelease(&yy);

+        return 0;

+    }

+.fi

+.SH DIAGNOSTICS

+.I minipeg

+warns about the following conditions while converting a grammar into a parser.

+.TP

+.B syntax error

+The input grammar was malformed in some way.  The error message will

+include the text about to be matched (often backed up a huge amount

+from the actual location of the error) and the line number of the most

+recently considered character (which is often the real location of the

+problem).

+.TP

+.B rule 'foo' used but not defined

+The grammar referred to a rule named 'foo' but no definition for it

+was given.  Attempting to use the generated parser will likely result

+in errors from the linker due to undefined symbols associated with the

+missing rule.

+.TP

+.B rule 'foo' defined but not used

+The grammar defined a rule named 'foo' and then ignored it.  The code

+associated with the rule is included in the generated parser which

+will in all other respects be healthy.

+.TP

+.B possible infinite left recursion in rule 'foo'

+There exists at least one path through the grammar that leads from the

+rule 'foo' back to (a recursive invocation of) the same rule without

+consuming any input.

+.PP

+Left recursion, especially that found in standards documents, is

+often 'direct' and implies trivial repetition.

+.nf

+    # (6.7.6)

+    direct\-abstract\-declarator =

+        LPAREN abstract\-declarator RPAREN

+    |   direct\-abstract\-declarator? LBRACKET assign\-expr? RBRACKET

+    |   direct\-abstract\-declarator? LBRACKET STAR RBRACKET

+    |   direct\-abstract\-declarator? LPAREN param\-type\-list? RPAREN

+.fi

+The recursion can easily be eliminated by converting the parts of the

+pattern following the recursion into a repeatable suffix.

+.nf

+    # (6.7.6)

+    direct\-abstract\-declarator =

+        direct\-abstract\-declarator\-head?

+        direct\-abstract\-declarator\-tail*

+    direct\-abstract\-declarator\-head =

+        LPAREN abstract\-declarator RPAREN

+    direct\-abstract\-declarator\-tail =

+        LBRACKET assign\-expr? RBRACKET

+    |   LBRACKET STAR RBRACKET

+    |   LPAREN param\-type\-list? RPAREN

+.fi

+.SH CAVEATS

+A parser that accepts empty input will

+.I always

+succeed.  Consider the following example, not atypical of a first

+attempt to write a PEG\-based parser:

+.nf

+    Program = Expression*

+    Expression = "whatever"

+    %%

+    int main() {

+      while (yyparse())

+        puts("success!");

+      return 0;

+    }

+.fi

+This program loops forever, no matter what (if any) input is provided

+on stdin.  Many fixes are possible, the easiest being to insist that

+the parser always consumes some non\-empty input.  Changing the first

+line to

+.nf

+    Program = Expression+

+.fi

+accomplishes this.  If the parser is expected to consume the entire

+input, then explicitly requiring the end\-of\-file is also highly

+recommended:

+.nf

+    Program = Expression+ !.

+.fi

+This works because the parser will only fail to match ("!" predicate)

+any character at all ("." expression) when it attempts to read beyond

+the end of the input.

+.SH BUGS

+.PP

+The 'yy' and 'YY' prefixes cannot be changed.

+.PP

+Left recursion is detected in the input grammar but is not handled

+correctly in the generated parser.

+.PP

+Diagnostics for errors in the input grammar are obscure and not

+particularly helpful.

+.PP

+The operators

+.BR ! \ \c

+and

+.B ~

+should really be named the other way around.

+.PP

+Several commonly\-used

+.IR lex (1)

+features (yywrap(), yyin, etc.) are completely absent.

+.PP

+The generated parser does not contain '#line' directives to direct C

+compiler errors back to the grammar description when appropriate.

+.SH SEE ALSO

+D. Val Schorre,

+.I META II, a syntax\-oriented compiler writing language,

+19th ACM National Conference, 1964, pp.\ 41.301\-\-41.311.  Describes a

+self\-implementing parser generator for analytic grammars with no

+backtracking.

+.PP

+Alexander Birman,

+.I The TMG Recognition Schema,

+Ph.D. dissertation, Princeton, 1970.  A mathematical treatment of the

+power and complexity of recursive\-descent parsing with backtracking.

+.PP

+Bryan Ford,

+.I Parsing Expression Grammars: A Recognition\-Based Syntactic Foundation,

+ACM SIGPLAN Symposium on Principles of Programming Languages, 2004.

+Defines PEGs and analyses them in relation to context\-free and regular

+grammars.  Introduces the syntax adopted in

+.IR peg .

+.PP

+The standard Unix utilities

+.IR lex (1)

+and

+.IR yacc (1)

+which influenced the syntax and features of

+.IR minipeg .

+.PP

+The source code for

+.I minipeg

+whose grammar parsers are written using themselves.

+.PP

+The latest version of this software and documentation:

+.nf

+    https://github.com/andrewchambers/minipeg

+.fi

+.SH AUTHOR

+.IR minipeg

+and this manual were originally written by Ian Piumarta

+under the project name peg/leg.

+.IR minipeg

+is a fork of peg/leg by Andrew Chambers.

+.PP

+Please send bug reports and suggestions for improvements to the author

+at the project address.

--- /dev/null

+++ b/docs/simple.css

@@ -1,0 +1,14 @@

+pre, p, h3		{ margin: 0.9em 0px 0.9em; }

+article, .container	{ width: 800px; margin: auto; padding 0 20px 0 20px; }

+h1			{ font-family: arial, sans; }

+h2			{ text-align: center; margin-bottom: 1em; }

+pre, code		{ line-height: normal; overflow: auto; font-size: 0.8em; }

+#logo			{ vertical-align: middle; margin-right: 16px; }

+body			{ line-height: 1.3; }

+html			{ font-size: 20px; font-family: "Charter","Georgia",'Times New Roman',serif; }

+.bnf			{ background-color: white; padding-left: 2em; }

+@media only screen and (max-device-width: 480px) {

+article, .container	{ width: 100%; }

+html			{ font-size: 13px; }

+}

--- a/minipeg.1

+++ /dev/null

@@ -1,915 +1,0 @@

-.TH MINIPEG 1

-.SH NAME

-minipeg \- parser generator

-.SH SYNOPSIS

-.B minipeg

-.B [\-hvVP \-ooutput]

-.I [filename ...]

-.SH DESCRIPTION

-.I minipeg

-is a tool for generating recursive\-descent parsers: programs that

-perform pattern matching on text.  They process a Parsing Expression

-Grammar (PEG) [Ford 2004] to produce a program that recognises legal

-sentences of that grammar.

-.I minipeg

-processes PEGs written with syntax and conventions

-that are intended to make it an attractive replacement for parsers

-built with

-.IR lex (1)

-and

-.IR yacc (1).

-Unlike

-.I lex

-and

-.IR yacc ,

-.I minipeg

-support unlimited backtracking, provide ordered choice as a means for

-disambiguation, and can combine scanning (lexical analysis) and

-parsing (syntactic analysis) into a single activity.

-.PP

-.I minipeg

-reads the specified

-.IR filename s,

-or standard input if no

-.IR filename s

-are given, for a grammar describing the parser to generate.

-.I minipeg

-then generates a C source file that defines a function

-.IR yyparse().

-This C source file can be included in, or compiled and then linked

-with, a client program.  Each time the client program calls

-.IR yyparse ()

-the parser consumes input text according to the parsing rules,

-starting from the first rule in the grammar.

-.IR yyparse ()

-returns non\-zero if the input could be parsed according to the

-grammar; it returns zero if the input could not be parsed.

-.PP

-The prefix 'yy' or 'YY' is prepended to all externally\-visible symbols

-in the generated parser.  This is intended to reduce the risk of

-namespace pollution in client programs.  (The choice of 'yy' is

-historical; see

-.IR lex (1)

-and

-.IR yacc (1),

-for example.)

-.SH OPTIONS

-.I minipeg

-provide the following options:

-.TP

-.B \-h

-prints a summary of available options and then exits.

-.TP

-.B \-ooutput

-writes the generated parser to the file

-.B output

-instead of the standard output.

-.TP

-.B \-P

-suppresses #line directives in the output.

-.TP

-.B \-v

-writes verbose information to standard error while working.

-.TP

-.B \-V

-writes version information to standard error then exits.

-.SH EXAMPLE: A CALCULATOR

-Here we show a simple desk calculator supporting the four common arithmetic

-operators and named variables.  The intermediate results of arithmetic

-evaluation will be accumulated on an implicit stack by returning them

-as semantic values from sub\-rules.

-.nf

-    %{

-    #include <stdio.h>     /* printf() */

-    #include <stdlib.h>    /* atoi() */

-    int vars[26];

-    %}

-    Stmt    = \- e:Expr EOL                  { printf("%d\\n", e); }

-            | ( !EOL . )* EOL               { printf("error\\n"); }

-    Expr    = i:ID ASSIGN s:Sum             { $$ = vars[i] = s; }

-            | s:Sum                         { $$ = s; }

-    Sum     = l:Product

-                    ( PLUS  r:Product       { l += r; }

-                    | MINUS r:Product       { l \-= r; }

-                    )*                      { $$ = l; }

-    Product = l:Value

-                    ( TIMES  r:Value        { l *= r; }

-                    | DIVIDE r:Value        { l /= r; }

-                    )*                      { $$ = l; }

-    Value   = i:NUMBER                      { $$ = atoi(yytext); }

-            | i:ID !ASSIGN                  { $$ = vars[i]; }

-            | OPEN i:Expr CLOSE             { $$ = i; }

-    NUMBER  = < [0\-9]+ >    \-               { $$ = atoi(yytext); }

-    ID      = < [a\-z]  >    \-               { $$ = yytext[0] \- 'a'; }

-    ASSIGN  = '='           \-

-    PLUS    = '+'           \-

-    MINUS   = '\-'           \-

-    TIMES   = '*'           \-

-    DIVIDE  = '/'           \-

-    OPEN    = '('           \-

-    CLOSE   = ')'           \-

-    \-       = [ \\t]*

-    EOL     = '\\n' | '\\r\\n' | '\\r' | ';'

-    %%

-    int main()

-    {

-      while (yyparse())

-        ;

-      return 0;

-    }

-.fi

-.PP

-If the above grammar is placed in the file

-.BR calc.peg ,

-running the command

-.nf

-    $ minipeg \-o calc.c calc.peg

-.fi

-will save the corresponding parser in the file

-.BR calc.c .

-The program can then be compiled with a C compiler and run

-.nf

-    $ cc \-o calc calc.c

-    $ ./calc

-    a=5

-    5

-    a+5

-    10

-.fi

-.SH MINIPEG GRAMMARS

-A grammar consists of a set of named rules.

-.nf

-    name = pattern

-.fi

-The

-.B pattern

-contains one or more of the following elements.

-.TP

-.B name

-The element stands for the entire pattern in the rule with the given

-.BR name .

-.TP

-.BR \(dq characters \(dq

-A character or string enclosed in double quotes is matched literally.

-The ANSI C escape sequences are recognised within the

-.IR characters .

-.TP

-.BR ' characters '

-A character or string enclosed in single quotes is matched literally, as above.

-.TP

-.BR [ characters ]

-A set of characters enclosed in square brackets matches any single

-character from the set, with escape characters recognised as above.

-If the set begins with an uparrow (^) then the set is negated (the

-element matches any character

-.I not

-in the set).  Any pair of characters separated with a dash (\-)

-represents the range of characters from the first to the second,

-inclusive.  A single alphabetic character or underscore is matched by

-the following set.

-.nf

-    [a\-zA\-Z_]

-.fi

-Similarly, the following matches  any single non\-digit character.

-.nf

-    [^0\-9]

-.fi

-.TP

-.B .

-A dot matches any character.  Note that the only time this fails is at

-the end of file, where there is no character to match.

-.TP

-.BR ( \ pattern\  )

-Parentheses are used for grouping (modifying the precedence of the

-operators described below).

-.TP

-.BR { \ action\  }

-Curly braces surround actions.  The action is arbitrary C source code

-to be executed at the end of matching.  Any braces within the action

-must be properly nested.  Any input text that was matched before the

-action and delimited by angle brackets (see below) is made available

-within the action as the contents of the character array

-.IR yytext .

-The length of (number of characters in)

-.I yytext

-is available in the variable

-.IR yyleng .

-(These variable names are historical; see

-.IR lex (1).)

-.TP

-.IB @{\ action\ }

-Actions prefixed with an 'at' symbol will be performed during parsing,

-at the time they are encountered while matching the input text with a

-rule.

-Because of back-tracking in the PEG parsing algorithm, actions

-prefixed with '@' might be performed multiple times for the same input

-text.

-(The usual behviour of actions is that they are saved up until

-matching is complete, and then those that are part of the

-final derivation are performed in left-to-right order.)

-The variable

-.I yytext

-is available within these actions.

-.TP

-.IB exp \ ~ \ {\ action\ }

-A postfix operator

-.BI ~ {\ action\ }

-can be placed after any expression and will behave like a normal

-action (arbitrary C code) except that it is invoked only when

-.I exp

-fails.  It binds less tightly than any other operator except alternation and sequencing, and

-is intended to make error handling and recovery code easier to write.

-Note that

-.I yytext

-and

-.I yyleng

-are not available inside these actions, but the pointer variable

-.I yy

-is available to give the code access to any user\-defined members

-of the parser state (see "CUSTOMISING THE PARSER" below).

-Note also that

-.I exp

-is always a single expression; to invoke an error action for any

-failure within a sequence, parentheses must be used to group the

-sequence into a single expression.

-.nf

-    rule = e1 e2 e3 ~{ error("e[12] ok; e3 has failed"); }

-         | ...

-    rule = (e1 e2 e3) ~{ error("one of e[123] has failed"); }

-         | ...

-.fi

-.TP

-.B <

-An opening angle bracket always matches (consuming no input) and

-causes the parser to begin accumulating matched text.  This text will

-be made available to actions in the variable

-.IR yytext .

-.TP

-.B >

-A closing angle bracket always matches (consuming no input) and causes

-the parser to stop accumulating text for

-.IR yytext .

-.PP

-The above

-.IR element s

-can be made optional and/or repeatable with the following suffixes:

-.TP

-.RB element\  ?

-The element is optional.  If present on the input, it is consumed and

-the match succeeds.  If not present on the input, no text is consumed

-and the match succeeds anyway.

-.TP

-.RB element\  +

-The element is repeatable.  If present on the input, one or more

-occurrences of

-.I element

-are consumed and the match succeeds.  If no occurrences of

-.I element

-are present on the input, the match fails.

-.TP

-.RB element\  *

-The element is optional and repeatable.  If present on the input, one or more

-occurrences of

-.I element

-are consumed and the match succeeds.  If no occurrences of

-.I element

-are present on the input, the match succeeds anyway.

-.PP

-The above elements and suffixes can be converted into predicates (that

-match arbitrary input text and subsequently succeed or fail

-.I without

-consuming that input) with the following prefixes:

-.TP

-.BR & \ element

-The predicate succeeds only if

-.I element

-can be matched.  Input text scanned while matching

-.I element

-is not consumed from the input and remains available for subsequent

-matching.

-.TP

-.BR ! \ element

-The predicate succeeds only if

-.I element

-cannot be matched.  Input text scanned while matching

-.I element

-is not consumed from the input and remains available for subsequent

-matching.  A popular idiom is

-.nf

-    !.

-.fi

-which matches the end of file, after the last character of the input

-has already been consumed.

-.PP

-A special form of the '&' predicate is provided:

-.TP

-.BR & {\ expression\ }

-In this predicate the simple C

-.I expression

-.RB ( not

-statement) is evaluated immediately when the parser reaches the

-predicate.  If the

-.I expression

-yields non\-zero (true) the 'match' succeeds and the parser continues

-with the next element in the pattern.  If the

-.I expression

-yields zero (false) the 'match' fails and the parser backs up to look

-for an alternative parse of the input.

-.PP

-Several elements (with or without prefixes and suffixes) can be

-combined into a

-.I sequence

-by writing them one after the other.  The entire sequence matches only

-if each individual element within it matches, from left to right.

-.PP

-Sequences can be separated into disjoint alternatives by the

-alternation operator '|'.

-.TP

-.RB sequence\-1\  | \ sequence\-2\  | \ ...\  | \ sequence\-N

-Each sequence is tried in turn until one of them matches, at which

-time matching for the overall pattern succeeds.  If none of the

-sequences matches then the match of the overall pattern fails.

-.PP

-The following elements can appear in addition to rules.

-.TP

-.BI %{\  text... \ %}

-A declaration section can appear anywhere that a rule definition is

-expected.  The

-.I text

-between the delimiters '%{' and '%}' is copied verbatim to the

-generated C parser code

-.I before

-the code that implements the parser itself.

-.PP

-The pound sign (#) introduces a comment (discarded) that

-continues until the end of the line.

-.TP

-.BI %% \ text...

-A double percent '%%' terminates the rules (and declarations) section of

-the grammar.  All

-.I text

-following '%%' is copied verbatim to the generated C parser code

-.I after

-the parser implementation code.

-.PP

-Some notes regarding rules and and patterns follow.

-.PP

-.B rule\-name

-Hyphens can appear as letters in the names of rules.  Each hyphen is

-converted into an underscore in the generated C source code.  A

-single hyphen '\-' is a legal rule name.

-.PP

-Within actions you can access and manipulate named values.

-.TP

-.BI $$\ = \ value

-A sub\-rule can return a semantic

-.I value

-from an action by assigning it to the pseudo\-variable '$$'.  All

-semantic values must have the same type (which defaults to 'int').

-This type can be changed by defining YYSTYPE in a declaration section.

-.TP

-.IB identifier : name

-The semantic value returned (by assigning to '$$') from the sub\-rule

-.I name

-is associated with the

-.I identifier

-and can be referred to in subsequent actions.

-.SH MINIPEG GRAMMAR FOR MINIPEG GRAMMARS

-The grammar for

-.I minipeg

-grammars is shown below.  This will both illustrate and formalise the

-above description.

-.nf

-    grammar =       \-

-                    ( declaration | definition )+

-                    trailer? end\-of\-file

-    declaration =   '%{' < ( !'%}' . )* > RPERCENT

-    trailer =       '%%' < .* >

-    definition =    identifier EQUAL expression

-    expression =    sequence ( BAR sequence )*

-    sequence =      error+

-    error =         prefix ( TILDE action )?

-    prefix =        AND action

-    |               ( AND | NOT )? suffix

-    suffix =        primary ( QUERY | STAR | PLUS )?

-    primary =       identifier COLON identifier !EQUAL

-    |               identifier !EQUAL

-    |               OPEN expression CLOSE

-    |               literal

-    |               class

-    |               DOT

-    |               action

-    |               BEGIN

-    |               END

-    identifier =    < [\-a\-zA\-Z_][\-a\-zA\-Z_0\-9]* > \-

-    literal =       ['] < ( !['] char )* > ['] \-

-    |               ["] < ( !["] char )* > ["] \-

-    class =         '[' < ( !']' range )* > ']' \-

-    range =         char '\-' char | char

-    char =          '\\\\' [abefnrtv'"\\[\\]\\\\]

-    |               '\\\\' [0\-3][0\-7][0\-7]

-    |               '\\\\' [0\-7][0\-7]?

-    |               !'\\\\' .

-    action =        '{' < braces* > '}' \-

-    braces =        '{' braces* '}'

-    |               !'}' .

-    EQUAL =         '=' \-

-    COLON =         ':' \-

-    BAR =           '|' \-

-    AND =           '&' \-

-    NOT =           '!' \-

-    QUERY =         '?' \-

-    STAR =          '*' \-

-    PLUS =          '+' \-

-    OPEN =          '(' \-

-    CLOSE =         ')' \-

-    DOT =           '.' \-

-    BEGIN =         '<' \-

-    END =           '>' \-

-    TILDE =         '~' \-

-    RPERCENT =      '%}' \-

-    \- =             ( space | comment )*

-    space =         ' ' | '\\t' | end\-of\-line

-    comment =       '#' ( !end\-of\-line . )* end\-of\-line

-    end\-of\-line =   '\\r\\n' | '\\n' | '\\r'

-    end\-of\-file =   !.

-.fi

-.SH CUSTOMISING THE PARSER

-The following symbols can be redefined in declaration sections to

-modify the generated parser code.

-.TP

-.B YYSTYPE

-The semantic value type.  The pseudo\-variable '$$' and the

-identifiers 'bound' to rule results with the colon operator ':' should

-all be considered as being declared to have this type.  The default

-value is 'int'.

-.TP

-.B YYPARSE

-The name of the main entry point to the parser.  The default value

-is 'yyparse'.

-.TP

-.B YYPARSEFROM

-The name of an alternative entry point to the parser.  This function

-expects one argument: the function corresponding to the rule from

-which the search for a match should begin.  The default

-is 'yyparsefrom'.  Note that yyparse() is defined as

-.nf

-    int yyparse() { return yyparsefrom(yy_foo); }

-.fi

-where 'foo' is the name of the first rule in the grammar.

-.TP

-.BI YY_INPUT( buf , \ result , \ max_size )

-This macro is invoked by the parser to obtain more input text.

-.I buf

-points to an area of memory that can hold at most

-.I max_size

-characters.  The macro should copy input text to

-.I buf

-and then assign the integer variable

-.I result

-to indicate the number of characters copied.  If no more input is available,

-the macro should assign 0 to

-.IR result .

-By default, the YY_INPUT macro is defined as follows.

-.nf

-    #define YY_INPUT(buf, result, max_size)        \\

-    {                                              \\

-      int yyc= getchar();                          \\

-      result= (EOF == yyc) ? 0 : (*(buf)= yyc, 1); \\

-    }

-.fi

-Note that if YY_CTX_LOCAL is defined (see below) then an additional

-first argument, containing the parser context, is passed to YY_INPUT.

-.TP

-.B YY_DEBUG

-If this symbols is defined then additional code will be included in

-the parser that prints vast quantities of arcane information to the

-standard error while the parser is running.

-.TP

-.B YY_BEGIN

-This macro is invoked to mark the start of input text that will be

-made available in actions as 'yytext'.  This corresponds to

-occurrences of '<' in the grammar.  These are converted into

-predicates that are expected to succeed.  The default definition

-.nf

-    #define YY_BEGIN (yybegin= yypos, 1)

-.fi

-therefore saves the current input position and returns 1 ('true') as

-the result of the predicate.

-.TP

-.B YY_END

-This macros corresponds to '>' in the grammar.  Again, it is a

-predicate so the default definition saves the input position

-before 'succeeding'.

-.nf

-    #define YY_END (yyend= yypos, 1)

-.fi

-.TP

-.BI YY_PARSE( T )

-This macro declares the parser entry points (yyparse and yyparsefrom)

-to be of type

-.IR T .

-The default definition

-.nf

-    #define YY_PARSE(T) T

-.fi

-leaves yyparse() and yyparsefrom() with global visibility.  If they

-should not be externally visible in other source files, this macro can

-be redefined to declare them 'static'.

-.nf

-    #define YY_PARSE(T) static T

-.fi

-.TP

-.B YY_CTX_LOCAL

-If this symbol is defined during compilation of a generated parser

-then global parser state will be kept in a structure of

-type 'yycontext' which can be declared as a local variable.  This

-allows multiple instances of parsers to coexist and to be thread\-safe.

-The parsing function

-.IR yyparse ()

-will be declared to expect a first argument of type 'yycontext *', an

-instance of the structure holding the global state for the parser.

-This instance must be allocated and initialised to zero by the client.

-A trivial but complete example is as follows.

-.nf

-    #include <stdio.h>

-    #define YY_CTX_LOCAL

-    #include "the\-generated\-parser.peg.c"

-    int main()

-    {

-      yycontext ctx;

-      memset(&ctx, 0, sizeof(yycontext));

-      while (yyparse(&ctx));

-      return 0;

-    }

-.fi

-Note that if this symbol is undefined then the compiled parser will

-statically allocate its global state and will be neither reentrant nor

-thread\-safe.

-Note also that the parser yycontext structure is initialised automatically

-the first time

-.IR yyparse ()

-is called; this structure

-.B must

-therefore be properly initialised to zero before the first call to

-.IR yyparse ().

-.TP

-.B YY_CTX_MEMBERS

-If YY_CTX_LOCAL is defined (see above) then the macro YY_CTX_MEMBERS

-can be defined to expand to any additional member field declarations

-that the client would like included in the declaration of

-the 'yycontext' structure type.  These additional members are

-otherwise ignored by the generated parser.  The instance

-of 'yycontext' associated with the currently\-active parser is

-available within actions as the pointer variable

-.IR yy .

-.TP

-.B YY_BUFFER_SIZE

-The initial size of the text buffer, in bytes.  The default is 1024

-and the buffer size is doubled whenever required to meet demand during

-parsing.  An application that typically parses much longer strings

-could increase this to avoid unnecessary buffer reallocation.

-.TP

-.B YY_STACK_SIZE

-The initial size of the variable and action stacks.  The default is

-128, which is doubled whenever required to meet demand during parsing.

-Applications that have deep call stacks with many local variables, or

-that perform many actions after a single successful match, could increase

-this to avoid unnecessary buffer reallocation.

-.TP

-.BI YY_MALLOC( YY , \ SIZE )

-The memory allocator for all parser\-related storage.  The parameters

-are the current yycontext structure and the number of bytes to

-allocate.  The default definition is:

-.RI malloc( SIZE )

-.TP

-.BI YY_REALLOC( YY , \ PTR , \ SIZE )

-The memory reallocator for dynamically\-grown storage (such as text

-buffers and variable stacks).  The parameters are the current

-yycontext structure, the previously\-allocated storage, and the number

-of bytes to which that storage should be grown.  The default definition is:

-.RI realloc( PTR , \ SIZE )

-.TP

-.BI YY_FREE( YY , \ PTR )

-The memory deallocator.  The parameters are the current yycontext

-structure and the storage to deallocate.  The default definition is:

-.RI free( PTR )

-.TP

-.B YYRELEASE

-The name of the function that releases all resources held by a

-yycontext structure.  The default value is 'yyrelease'.

-.PP

-The following variables can be referred to within actions.

-.TP

-.B char *yybuf

-This variable points to the parser's input buffer used to store input

-text that has not yet been matched.

-.TP

-.B int yypos

-This is the offset (in yybuf) of the next character to be matched and

-consumed.

-.TP

-.B char *yytext

-The most recent matched text delimited by '<' and '>' is stored in this variable.

-.TP

-.B int yyleng

-This variable indicates the number of characters in 'yytext'.

-.TP

-.B yycontext *yy

-This variable points to the instance of 'yycontext' associated with

-the currently\-active parser.

-.PP

-Programs that wish to release all the resources associated with a

-parser can use the following function.

-.TP

-.BI yyrelease(yycontext * yy )

-Returns all parser\-allocated storage associated with

-.I yy

-to the system.  The storage will be reallocated on the next call to

-.IR yyparse ().

-.PP

-Note that the storage for the yycontext structure itself is never

-allocated or reclaimed implicitly.  The application must allocate

-these structures in automatic storage, or use

-.IR calloc ()

-and

-.IR free ()

-to manage them explicitly.  The example in the following section

-demonstrates one approach to resource management.

-.SH EXAMPLE: EXTENDING THE PARSER'S CONTEXT

-The

-.I yy

-variable passed to actions contains the state of the parser plus any

-additional fields defined by YY_CTX_MEMBERS.  Theses fields can be

-used to store application\-specific information that is global to a

-particular call of

-.IR yyparse ().

-A trivial but complete

-.I leg

-example follows in which the yycontext

-structure is extended with a

-.I count

-of the number of newline characters

-seen in the input so far (the grammar otherwise consumes and ignores

-the entire input).  The caller of

-.IR yyparse ()

-uses

-.I count

-to print the number of lines of input that were read.

-.nf

-    %{

-    #define YY_CTX_LOCAL 1

-    #define YY_CTX_MEMBERS \\

-      int count;

-    %}

-    Char    = ('\\n' | '\\r\\n' | '\\r')        { yy\->count++ }

-            | .

-    %%

-    #include <stdio.h>

-    #include <string.h>

-    int main()

-    {

-        /* create a local parser context in automatic storage */

-        yycontext yy;

-        /* the context *must* be initialised to zero before first use*/

-        memset(&yy, 0, sizeof(yy));

-        while (yyparse(&yy))

-            ;

-        printf("%d newlines\\n", yy.count);

-        /* release all resources associated with the context */

-        yyrelease(&yy);

-        return 0;

-    }

-.fi

-.SH DIAGNOSTICS

-.I minipeg

-warns about the following conditions while converting a grammar into a parser.

-.TP

-.B syntax error

-The input grammar was malformed in some way.  The error message will

-include the text about to be matched (often backed up a huge amount

-from the actual location of the error) and the line number of the most

-recently considered character (which is often the real location of the

-problem).

-.TP

-.B rule 'foo' used but not defined

-The grammar referred to a rule named 'foo' but no definition for it

-was given.  Attempting to use the generated parser will likely result

-in errors from the linker due to undefined symbols associated with the

-missing rule.

-.TP

-.B rule 'foo' defined but not used

-The grammar defined a rule named 'foo' and then ignored it.  The code

-associated with the rule is included in the generated parser which

-will in all other respects be healthy.

-.TP

-.B possible infinite left recursion in rule 'foo'

-There exists at least one path through the grammar that leads from the

-rule 'foo' back to (a recursive invocation of) the same rule without

-consuming any input.

-.PP

-Left recursion, especially that found in standards documents, is

-often 'direct' and implies trivial repetition.

-.nf

-    # (6.7.6)

-    direct\-abstract\-declarator =

-        LPAREN abstract\-declarator RPAREN

-    |   direct\-abstract\-declarator? LBRACKET assign\-expr? RBRACKET

-    |   direct\-abstract\-declarator? LBRACKET STAR RBRACKET

-    |   direct\-abstract\-declarator? LPAREN param\-type\-list? RPAREN

-.fi

-The recursion can easily be eliminated by converting the parts of the

-pattern following the recursion into a repeatable suffix.

-.nf

-    # (6.7.6)

-    direct\-abstract\-declarator =

-        direct\-abstract\-declarator\-head?

-        direct\-abstract\-declarator\-tail*

-    direct\-abstract\-declarator\-head =

-        LPAREN abstract\-declarator RPAREN

-    direct\-abstract\-declarator\-tail =

-        LBRACKET assign\-expr? RBRACKET

-    |   LBRACKET STAR RBRACKET

-    |   LPAREN param\-type\-list? RPAREN

-.fi

-.SH CAVEATS

-A parser that accepts empty input will

-.I always

-succeed.  Consider the following example, not atypical of a first

-attempt to write a PEG\-based parser:

-.nf

-    Program = Expression*

-    Expression = "whatever"

-    %%

-    int main() {

-      while (yyparse())

-        puts("success!");

-      return 0;

-    }

-.fi

-This program loops forever, no matter what (if any) input is provided

-on stdin.  Many fixes are possible, the easiest being to insist that

-the parser always consumes some non\-empty input.  Changing the first

-line to

-.nf

-    Program = Expression+

-.fi

-accomplishes this.  If the parser is expected to consume the entire

-input, then explicitly requiring the end\-of\-file is also highly

-recommended:

-.nf

-    Program = Expression+ !.

-.fi

-This works because the parser will only fail to match ("!" predicate)

-any character at all ("." expression) when it attempts to read beyond

-the end of the input.

-.SH BUGS

-.PP

-The 'yy' and 'YY' prefixes cannot be changed.

-.PP

-Left recursion is detected in the input grammar but is not handled

-correctly in the generated parser.

-.PP

-Diagnostics for errors in the input grammar are obscure and not

-particularly helpful.

-.PP

-The operators

-.BR ! \ \c

-and

-.B ~

-should really be named the other way around.

-.PP

-Several commonly\-used

-.IR lex (1)

-features (yywrap(), yyin, etc.) are completely absent.

-.PP

-The generated parser does not contain '#line' directives to direct C

-compiler errors back to the grammar description when appropriate.

-.SH SEE ALSO

-D. Val Schorre,

-.I META II, a syntax\-oriented compiler writing language,

-19th ACM National Conference, 1964, pp.\ 41.301\-\-41.311.  Describes a

-self\-implementing parser generator for analytic grammars with no

-backtracking.

-.PP

-Alexander Birman,

-.I The TMG Recognition Schema,

-Ph.D. dissertation, Princeton, 1970.  A mathematical treatment of the

-power and complexity of recursive\-descent parsing with backtracking.

-.PP

-Bryan Ford,

-.I Parsing Expression Grammars: A Recognition\-Based Syntactic Foundation,

-ACM SIGPLAN Symposium on Principles of Programming Languages, 2004.

-Defines PEGs and analyses them in relation to context\-free and regular

-grammars.  Introduces the syntax adopted in

-.IR peg .

-.PP

-The standard Unix utilities

-.IR lex (1)

-and

-.IR yacc (1)

-which influenced the syntax and features of

-.IR minipeg .

-.PP

-The source code for

-.I minipeg

-whose grammar parsers are written using themselves.

-.PP

-The latest version of this software and documentation:

-.nf

-    https://github.com/andrewchambers/minipeg

-.fi

-.SH AUTHOR

-.IR minipeg

-and this manual were originally written by Ian Piumarta

-under the project name peg/leg.

-.IR minipeg

-is a fork of peg/leg by Andrew Chambers.

-.PP

-Please send bug reports and suggestions for improvements to the author

-at the project address.

home: hub: minipeg