291 lines
		
	
	
		
			6.8 KiB
		
	
	
	
		
			Groff
		
	
	
	
	
	
			
		
		
	
	
			291 lines
		
	
	
		
			6.8 KiB
		
	
	
	
		
			Groff
		
	
	
	
	
	
| .\"	$Id: mdoc.3,v 1.41 2010/05/30 22:56:02 kristaps Exp $
 | |
| .\"
 | |
| .\" Copyright (c) 2009-2010 Kristaps Dzonsons <kristaps@bsd.lv>
 | |
| .\"
 | |
| .\" Permission to use, copy, modify, and distribute this software for any
 | |
| .\" purpose with or without fee is hereby granted, provided that the above
 | |
| .\" copyright notice and this permission notice appear in all copies.
 | |
| .\"
 | |
| .\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 | |
| .\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 | |
| .\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 | |
| .\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 | |
| .\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 | |
| .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 | |
| .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 | |
| .\"
 | |
| .Dd $Mdocdate: May 30 2010 $
 | |
| .Dt MDOC 3
 | |
| .Os
 | |
| .Sh NAME
 | |
| .Nm mdoc ,
 | |
| .Nm mdoc_alloc ,
 | |
| .Nm mdoc_endparse ,
 | |
| .Nm mdoc_free ,
 | |
| .Nm mdoc_meta ,
 | |
| .Nm mdoc_node ,
 | |
| .Nm mdoc_parseln ,
 | |
| .Nm mdoc_reset
 | |
| .Nd mdoc macro compiler library
 | |
| .Sh SYNOPSIS
 | |
| .In mandoc.h
 | |
| .In mdoc.h
 | |
| .Vt extern const char * const * mdoc_macronames;
 | |
| .Vt extern const char * const * mdoc_argnames;
 | |
| .Ft "struct mdoc *"
 | |
| .Fn mdoc_alloc "void *data" "int pflags" "mandocmsg msgs"
 | |
| .Ft int
 | |
| .Fn mdoc_endparse "struct mdoc *mdoc"
 | |
| .Ft void
 | |
| .Fn mdoc_free "struct mdoc *mdoc"
 | |
| .Ft "const struct mdoc_meta *"
 | |
| .Fn mdoc_meta "const struct mdoc *mdoc"
 | |
| .Ft "const struct mdoc_node *"
 | |
| .Fn mdoc_node "const struct mdoc *mdoc"
 | |
| .Ft int
 | |
| .Fn mdoc_parseln "struct mdoc *mdoc" "int line" "char *buf"
 | |
| .Ft int
 | |
| .Fn mdoc_reset "struct mdoc *mdoc"
 | |
| .Sh DESCRIPTION
 | |
| The
 | |
| .Nm mdoc
 | |
| library parses lines of
 | |
| .Xr mdoc 7
 | |
| input
 | |
| into an abstract syntax tree (AST).
 | |
| .Pp
 | |
| In general, applications initiate a parsing sequence with
 | |
| .Fn mdoc_alloc ,
 | |
| parse each line in a document with
 | |
| .Fn mdoc_parseln ,
 | |
| close the parsing session with
 | |
| .Fn mdoc_endparse ,
 | |
| operate over the syntax tree returned by
 | |
| .Fn mdoc_node
 | |
| and
 | |
| .Fn mdoc_meta ,
 | |
| then free all allocated memory with
 | |
| .Fn mdoc_free .
 | |
| The
 | |
| .Fn mdoc_reset
 | |
| function may be used in order to reset the parser for another input
 | |
| sequence.
 | |
| See the
 | |
| .Sx EXAMPLES
 | |
| section for a simple example.
 | |
| .Pp
 | |
| This section further defines the
 | |
| .Sx Types ,
 | |
| .Sx Functions
 | |
| and
 | |
| .Sx Variables
 | |
| available to programmers.
 | |
| Following that, the
 | |
| .Sx Abstract Syntax Tree
 | |
| section documents the output tree.
 | |
| .Ss Types
 | |
| Both functions (see
 | |
| .Sx Functions )
 | |
| and variables (see
 | |
| .Sx Variables )
 | |
| may use the following types:
 | |
| .Bl -ohang
 | |
| .It Vt struct mdoc
 | |
| An opaque type defined in
 | |
| .Pa mdoc.c .
 | |
| Its values are only used privately within the library.
 | |
| .It Vt struct mdoc_node
 | |
| A parsed node.
 | |
| Defined in
 | |
| .Pa mdoc.h .
 | |
| See
 | |
| .Sx Abstract Syntax Tree
 | |
| for details.
 | |
| .It Vt mandocmsg
 | |
| A function callback type defined in
 | |
| .Pa mandoc.h .
 | |
| .El
 | |
| .Ss Functions
 | |
| Function descriptions follow:
 | |
| .Bl -ohang
 | |
| .It Fn mdoc_alloc
 | |
| Allocates a parsing structure.
 | |
| The
 | |
| .Fa data
 | |
| pointer is passed to
 | |
| .Fa msgs .
 | |
| The
 | |
| .Fa pflags
 | |
| arguments are defined in
 | |
| .Pa mdoc.h .
 | |
| Returns NULL on failure.
 | |
| If non-NULL, the pointer must be freed with
 | |
| .Fn mdoc_free .
 | |
| .It Fn mdoc_reset
 | |
| Reset the parser for another parse routine.
 | |
| After its use,
 | |
| .Fn mdoc_parseln
 | |
| behaves as if invoked for the first time.
 | |
| If it returns 0, memory could not be allocated.
 | |
| .It Fn mdoc_free
 | |
| Free all resources of a parser.
 | |
| The pointer is no longer valid after invocation.
 | |
| .It Fn mdoc_parseln
 | |
| Parse a nil-terminated line of input.
 | |
| This line should not contain the trailing newline.
 | |
| Returns 0 on failure, 1 on success.
 | |
| The input buffer
 | |
| .Fa buf
 | |
| is modified by this function.
 | |
| .It Fn mdoc_endparse
 | |
| Signals that the parse is complete.
 | |
| Note that if
 | |
| .Fn mdoc_endparse
 | |
| is called subsequent to
 | |
| .Fn mdoc_node ,
 | |
| the resulting tree is incomplete.
 | |
| Returns 0 on failure, 1 on success.
 | |
| .It Fn mdoc_node
 | |
| Returns the first node of the parse.
 | |
| Note that if
 | |
| .Fn mdoc_parseln
 | |
| or
 | |
| .Fn mdoc_endparse
 | |
| return 0, the tree will be incomplete.
 | |
| .It Fn mdoc_meta
 | |
| Returns the document's parsed meta-data.
 | |
| If this information has not yet been supplied or
 | |
| .Fn mdoc_parseln
 | |
| or
 | |
| .Fn mdoc_endparse
 | |
| return 0, the data will be incomplete.
 | |
| .El
 | |
| .Ss Variables
 | |
| The following variables are also defined:
 | |
| .Bl -ohang
 | |
| .It Va mdoc_macronames
 | |
| An array of string-ified token names.
 | |
| .It Va mdoc_argnames
 | |
| An array of string-ified token argument names.
 | |
| .El
 | |
| .Ss Abstract Syntax Tree
 | |
| The
 | |
| .Nm
 | |
| functions produce an abstract syntax tree (AST) describing input in a
 | |
| regular form.
 | |
| It may be reviewed at any time with
 | |
| .Fn mdoc_nodes ;
 | |
| however, if called before
 | |
| .Fn mdoc_endparse ,
 | |
| or after
 | |
| .Fn mdoc_endparse
 | |
| or
 | |
| .Fn mdoc_parseln
 | |
| fail, it may be incomplete.
 | |
| .Pp
 | |
| This AST is governed by the ontological
 | |
| rules dictated in
 | |
| .Xr mdoc 7
 | |
| and derives its terminology accordingly.
 | |
| .Qq In-line
 | |
| elements described in
 | |
| .Xr mdoc 7
 | |
| are described simply as
 | |
| .Qq elements .
 | |
| .Pp
 | |
| The AST is composed of
 | |
| .Vt struct mdoc_node
 | |
| nodes with block, head, body, element, root and text types as declared
 | |
| by the
 | |
| .Va type
 | |
| field.
 | |
| Each node also provides its parse point (the
 | |
| .Va line ,
 | |
| .Va sec ,
 | |
| and
 | |
| .Va pos
 | |
| fields), its position in the tree (the
 | |
| .Va parent ,
 | |
| .Va child ,
 | |
| .Va next
 | |
| and
 | |
| .Va prev
 | |
| fields) and some type-specific data.
 | |
| .Pp
 | |
| The tree itself is arranged according to the following normal form,
 | |
| where capitalised non-terminals represent nodes.
 | |
| .Pp
 | |
| .Bl -tag -width "ELEMENTXX" -compact
 | |
| .It ROOT
 | |
| \(<- mnode+
 | |
| .It mnode
 | |
| \(<- BLOCK | ELEMENT | TEXT
 | |
| .It BLOCK
 | |
| \(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
 | |
| .It ELEMENT
 | |
| \(<- TEXT*
 | |
| .It HEAD
 | |
| \(<- mnode+
 | |
| .It BODY
 | |
| \(<- mnode+
 | |
| .It TAIL
 | |
| \(<- mnode+
 | |
| .It TEXT
 | |
| \(<- [[:printable:],0x1e]*
 | |
| .El
 | |
| .Pp
 | |
| Of note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
 | |
| the BLOCK production: these refer to punctuation marks.
 | |
| Furthermore, although a TEXT node will generally have a non-zero-length
 | |
| string, in the specific case of
 | |
| .Sq \&.Bd \-literal ,
 | |
| an empty line will produce a zero-length string.
 | |
| Multiple body parts are only found in invocations of
 | |
| .Sq \&Bl \-column ,
 | |
| where a new body introduces a new phrase.
 | |
| .Sh EXAMPLES
 | |
| The following example reads lines from stdin and parses them, operating
 | |
| on the finished parse tree with
 | |
| .Fn parsed .
 | |
| This example does not error-check nor free memory upon failure.
 | |
| .Bd -literal -offset indent
 | |
| struct mdoc *mdoc;
 | |
| const struct mdoc_node *node;
 | |
| char *buf;
 | |
| size_t len;
 | |
| int line;
 | |
| 
 | |
| line = 1;
 | |
| mdoc = mdoc_alloc(NULL, 0, NULL);
 | |
| buf = NULL;
 | |
| alloc_len = 0;
 | |
| 
 | |
| while ((len = getline(&buf, &alloc_len, stdin)) >= 0) {
 | |
|     if (len && buflen[len - 1] = '\en')
 | |
|         buf[len - 1] = '\e0';
 | |
|     if ( ! mdoc_parseln(mdoc, line, buf))
 | |
|         errx(1, "mdoc_parseln");
 | |
|     line++;
 | |
| }
 | |
| 
 | |
| if ( ! mdoc_endparse(mdoc))
 | |
|     errx(1, "mdoc_endparse");
 | |
| if (NULL == (node = mdoc_node(mdoc)))
 | |
|     errx(1, "mdoc_node");
 | |
| 
 | |
| parsed(mdoc, node);
 | |
| mdoc_free(mdoc);
 | |
| .Ed
 | |
| .Pp
 | |
| Please see
 | |
| .Pa main.c
 | |
| in the source archive for a rigorous reference.
 | |
| .Sh SEE ALSO
 | |
| .Xr mandoc 1 ,
 | |
| .Xr mdoc 7
 | |
| .Sh AUTHORS
 | |
| The
 | |
| .Nm
 | |
| library was written by
 | |
| .An Kristaps Dzonsons Aq kristaps@bsd.lv .
 | 
