diff options
author | Philip Wittamore <philip@wittamore.com> | 2025-09-30 09:00:27 +0200 |
---|---|---|
committer | Philip Wittamore <philip@wittamore.com> | 2025-09-30 09:00:27 +0200 |
commit | 8ad81b33dc20b00626c2ff62946d1d8569d96b11 (patch) | |
tree | 2264f6f130c3223dc9ccbe664df6f5f7ec5089d6 | |
download | par-8ad81b33dc20b00626c2ff62946d1d8569d96b11.tar.gz par-8ad81b33dc20b00626c2ff62946d1d8569d96b11.tar.bz2 par-8ad81b33dc20b00626c2ff62946d1d8569d96b11.zip |
update
-rw-r--r-- | par/Par-1.53.0/buffer.c | 216 | ||||
-rw-r--r-- | par/Par-1.53.0/buffer.h | 78 | ||||
-rw-r--r-- | par/Par-1.53.0/charset.c | 324 | ||||
-rw-r--r-- | par/Par-1.53.0/charset.h | 75 | ||||
-rw-r--r-- | par/Par-1.53.0/errmsg.c | 19 | ||||
-rw-r--r-- | par/Par-1.53.0/errmsg.h | 39 | ||||
-rw-r--r-- | par/Par-1.53.0/par.1 | 1917 | ||||
-rw-r--r-- | par/Par-1.53.0/par.c | 944 | ||||
-rw-r--r-- | par/Par-1.53.0/par.doc | 1394 | ||||
-rw-r--r-- | par/Par-1.53.0/protoMakefile | 115 | ||||
-rw-r--r-- | par/Par-1.53.0/reformat.c | 550 | ||||
-rw-r--r-- | par/Par-1.53.0/reformat.h | 31 | ||||
-rw-r--r-- | par/Par-1.53.0/releasenotes | 279 | ||||
-rwxr-xr-x | par/Par-1.53.0/test-par | 734 |
14 files changed, 6715 insertions, 0 deletions
diff --git a/par/Par-1.53.0/buffer.c b/par/Par-1.53.0/buffer.c new file mode 100644 index 0000000..ac3225e --- /dev/null +++ b/par/Par-1.53.0/buffer.c @@ -0,0 +1,216 @@ +/* +buffer.c +last touched in Par 1.53.0 +last meaningful change in Par 1.50 +Copyright 1993, 1996 Adam M. Costello + +This is ANSI C code (C89). + +additem(), copyitems(), and nextitem() rely on the fact that +sizeof (char) is 1. See section A7.4.8 of The C Programming +Language, Second Edition, by Kerninghan and Ritchie. + +*/ + + +#include "buffer.h" /* Makes sure we're consistent with the prototypes. */ + +#include "errmsg.h" + +#include <stddef.h> +#include <stdlib.h> +#include <string.h> + +#undef NULL +#define NULL ((void *) 0) + +#ifdef DONTFREE +#define free(ptr) +#endif + + +struct buffer { + struct block *firstblk, /* The first block. */ + *current, /* The last non-empty block, or */ + /* firstblk if all are empty. */ + *nextblk; /* The block containing the item to be */ + /* returned by nextitem(), or NULL. */ + int nextindex; /* Index of item in nextblock->items. */ + size_t itemsize; /* The size of an item. */ +}; + +typedef struct block { + struct block *next; /* The next block, or NULL if none. */ + void *items; /* Storage for the items in this block. */ + int maxhere, /* Number of items that fit in *items. */ + numprevious, /* Total of numhere for all previous blocks. */ + numhere; /* The first numhere slots in *items are filled. */ +} block; + + +buffer *newbuffer(size_t itemsize, errmsg_t errmsg) +{ + buffer *buf; + block *blk; + void *items; + int maxhere; + + maxhere = 124 / itemsize; + if (maxhere < 4) maxhere = 4; + + buf = malloc(sizeof (buffer)); + blk = malloc(sizeof (block)); + items = malloc(maxhere * itemsize); + if (!buf || !blk || !items) { + strcpy(errmsg,outofmem); + goto nberror; + } + + buf->itemsize = itemsize; + buf->firstblk = buf->current = buf->nextblk = blk; + buf->nextindex = 0; + blk->next = NULL; + blk->numprevious = blk->numhere = 0; + blk->maxhere = maxhere; + blk->items = items; + + *errmsg = '\0'; + return buf; + +nberror: + + if (buf) free(buf); + if (blk) free(blk); + if (items) free(items); + return NULL; +} + + +void freebuffer(buffer *buf) +{ + block *blk, *tmp; + + blk = buf->firstblk; + while (blk) { + tmp = blk; + blk = blk->next; + if (tmp->items) free(tmp->items); + free(tmp); + } + + free(buf); +} + + +void clearbuffer(buffer *buf) +{ + block *blk; + + for (blk = buf->firstblk; blk; blk = blk->next) + blk->numhere = 0; + + buf->current = buf->firstblk; +} + + +void additem(buffer *buf, const void *item, errmsg_t errmsg) +{ + block *blk, *new; + void *items; + int maxhere; + size_t itemsize = buf->itemsize; + + blk = buf->current; + + if (blk->numhere == blk->maxhere) { + new = blk->next; + if (!new) { + maxhere = 2 * blk->maxhere; + new = malloc(sizeof (block)); + items = malloc(maxhere * itemsize); + if (!new || !items) { + strcpy(errmsg,outofmem); + goto aierror; + } + blk->next = new; + new->next = NULL; + new->maxhere = maxhere; + new->numprevious = blk->numprevious + blk->numhere; + new->numhere = 0; + new->items = items; + } + blk = buf->current = new; + } + + memcpy( ((char *) blk->items) + (blk->numhere * itemsize), item, itemsize ); + + ++blk->numhere; + + *errmsg = '\0'; + return; + +aierror: + + if (new) free(new); + if (items) free(items); +} + + +int numitems(buffer *buf) +{ + block *blk = buf->current; + return blk->numprevious + blk->numhere; +} + + +void *copyitems(buffer *buf, errmsg_t errmsg) +{ + int n; + void *r; + block *blk, *b; + size_t itemsize = buf->itemsize; + + b = buf->current; + n = b->numprevious + b->numhere; + if (!n) return NULL; + + r = malloc(n * itemsize); + if (!r) { + strcpy(errmsg,outofmem); + return NULL; + } + + b = b->next; + + for (blk = buf->firstblk; blk != b; blk = blk->next) + memcpy( ((char *) r) + (blk->numprevious * itemsize), + blk->items, blk->numhere * itemsize); + + *errmsg = '\0'; + return r; +} + + +void rewindbuffer(buffer *buf) +{ + buf->nextblk = buf->firstblk; + buf->nextindex = 0; +} + + +void *nextitem(buffer *buf) +{ + void *r; + + if (!buf->nextblk || buf->nextindex >= buf->nextblk->numhere) + return NULL; + + r = ((char *) buf->nextblk->items) + (buf->nextindex * buf->itemsize); + + if (++buf->nextindex >= buf->nextblk->maxhere) { + buf->nextblk = buf->nextblk->next; + buf->nextindex = 0; + } + + return r; +} diff --git a/par/Par-1.53.0/buffer.h b/par/Par-1.53.0/buffer.h new file mode 100644 index 0000000..ac83bb4 --- /dev/null +++ b/par/Par-1.53.0/buffer.h @@ -0,0 +1,78 @@ +/* +buffer.h +last touched in Par 1.53.0 +last meaningful change in Par 1.31 +Copyright 1993 Adam M. Costello + +This is ANSI C code (C89). + +Note: Those functions declared here which do not use errmsg +always succeed, provided that they are passed valid arguments. + +*/ + + +#include "errmsg.h" + +#include <stddef.h> + + +typedef struct buffer buffer; + + +buffer *newbuffer(size_t itemsize, errmsg_t errmsg); + + /* newbuffer(itemsize,errmsg) returns a pointer to a */ + /* new empty buffer which holds items of size itemsize. */ + /* itemsize must not be 0. Returns NULL on failure. */ + + +void freebuffer(buffer *buf); + + /* freebuffer(buf) frees the memory associated with */ + /* *buf. buf may not be used after this call. */ + + +void clearbuffer(buffer *buf); + + /* clearbuffer(buf) removes */ + /* all items from *buf, but */ + /* does not free any memory. */ + + +void additem(buffer *buf, const void *item, errmsg_t errmsg); + + /* additem(buf,item,errmsg) copies *item to the end of */ + /* *buf. item must point to an object of the proper size */ + /* for *buf. If additem() fails, *buf will be unaffected. */ + + +int numitems(buffer *buf); + + /* numitems(buf) returns the number of items in *buf. */ + + +void *copyitems(buffer *buf, errmsg_t errmsg); + + /* copyitems(buf,errmsg) returns an array of objects of */ + /* the proper size for *buf, one for each item in *buf, */ + /* or NULL if there are no items in buf. The elements */ + /* of the array are copied from the items in *buf, in */ + /* order. The array is allocated with malloc(), so it */ + /* may be freed with free(). Returns NULL on failure. */ + + +void *nextitem(buffer *buf); + + /* When buf was created by newbuffer, a pointer associated with buf */ + /* was initialized to point at the first slot in *buf. If there is */ + /* an item in the slot currently pointed at, nextitem(buf) advances */ + /* the pointer to the next slot and returns the old value. If there */ + /* is no item in the slot, nextitem(buf) leaves the pointer where it */ + /* is and returns NULL. */ + + +void rewindbuffer(buffer *buf); + + /* rewindbuffer(buf) resets the pointer used by */ + /* nextitem() to point at the first slot in *buf. */ diff --git a/par/Par-1.53.0/charset.c b/par/Par-1.53.0/charset.c new file mode 100644 index 0000000..1aea59b --- /dev/null +++ b/par/Par-1.53.0/charset.c @@ -0,0 +1,324 @@ +/* +charset.c +last touched in Par 1.53.0 +last meaningful change in Par 1.53.0 +Copyright 1993, 2001, 2020 Adam M. Costello + +This is ANSI C code (C89). + +Because this is ANSI C code, we can't assume that char has only 8 bits. +Therefore, we can't use bit vectors to represent sets without the risk +of consuming large amounts of memory. Therefore, this code is much more +complicated than might be expected. + +The issues regarding char and unsigned char are relevant to the +use of the ctype.h functions, and the interpretation of the _xhh +sequence. See the comments near the beginning of par.c. + +*/ + + +#include "charset.h" /* Makes sure we're consistent with the prototypes. */ + +#include "buffer.h" +#include "errmsg.h" + +#include <ctype.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#undef NULL +#define NULL ((void *) 0) + +#ifdef DONTFREE +#define free(ptr) +#endif + + +typedef unsigned char csflag_t; + +struct charset { + char *inlist; /* Characters in inlist are in the set. */ + char *outlist; /* Characters in outlist are not in the set. */ + /* inlist and outlist must have no common characters. */ + /* inlist and outlist may be NULL, which acts like "". */ + csflag_t flags; /* Characters in neither list are in the set if they */ + /* belong to any of the classes indicated by flags. */ +}; + +/* The following may be bitwise-OR'd together */ +/* to set the flags field of a charset: */ + +static const csflag_t + CS_UCASE = 1, /* Includes all upper case letters. */ + CS_LCASE = 2, /* Includes all lower case letters. */ + CS_NCASE = 4, /* Includes all neither case letters. */ + CS_DIGIT = 8, /* Includes all decimal digits. */ + CS_SPACE = 16, /* Includes all space characters. */ + CS_NUL = 32; /* Includes the NUL character. */ + + +static int appearsin(char c, const char *str) + +/* Returns 0 if c is '\0' or str is NULL or c */ +/* does not appear in *str. Otherwise returns 1. */ +{ + return c && str && strchr(str,c); +} + + +static int hexdigtoint(char c) + +/* Returns the value represented by the hexadecimal */ +/* digit c, or -1 if c is not a hexadecimal digit. */ +{ + const char *p, * const hexdigits = "0123456789ABCDEFabcdef"; + int n; + + if (!c) return -1; + p = strchr(hexdigits, *(unsigned char *)&c); + if (!p) return -1; + n = p - hexdigits; + if (n >= 16) n -= 6; + return n; + + /* We can't do things like c - 'A' because we can't */ + /* depend on the order of the characters in ANSI C. */ + /* Nor can we do things like hexdigtoint[c] because */ + /* we don't know how large such an array might be. */ +} + + +charset *parsecharset(const char *str, errmsg_t errmsg) +{ + charset *cset = NULL; + buffer *cbuf = NULL; + const char *p, * const singleescapes = "_sbqQx"; + int hex1, hex2; + char ch; + + cset = malloc(sizeof (charset)); + if (!cset) { + strcpy(errmsg,outofmem); + goto pcserror; + } + cset->inlist = cset->outlist = NULL; + cset->flags = 0; + + cbuf = newbuffer(sizeof (char), errmsg); + if (*errmsg) goto pcserror; + + for (p = str; *p; ++p) + if (*p == '_') { + ++p; + if (appearsin(*p, singleescapes)) { + if (*p == '_') ch = '_' ; + else if (*p == 's') ch = ' ' ; + else if (*p == 'b') ch = '\\'; + else if (*p == 'q') ch = '\''; + else if (*p == 'Q') ch = '\"'; + else /* *p == 'x' */ { + hex1 = hexdigtoint(p[1]); + hex2 = hexdigtoint(p[2]); + if (hex1 < 0 || hex2 < 0) goto pcsbadstr; + *(unsigned char *)&ch = 16 * hex1 + hex2; + p += 2; + } + if (!ch) + cset->flags |= CS_NUL; + else { + additem(cbuf, &ch, errmsg); + if (*errmsg) goto pcserror; + } + } + else { + if (*p == 'A') cset->flags |= CS_UCASE; + else if (*p == 'a') cset->flags |= CS_LCASE; + else if (*p == '@') cset->flags |= CS_NCASE; + else if (*p == '0') cset->flags |= CS_DIGIT; + else if (*p == 'S') cset->flags |= CS_SPACE; + else goto pcsbadstr; + } + } + else { + additem(cbuf,p,errmsg); + if (*errmsg) goto pcserror; + } + ch = '\0'; + additem(cbuf, &ch, errmsg); + if (*errmsg) goto pcserror; + cset->inlist = copyitems(cbuf,errmsg); + if (*errmsg) goto pcserror; + +pcscleanup: + + if (cbuf) freebuffer(cbuf); + return cset; + +pcsbadstr: + + sprintf(errmsg, "Bad charset syntax: %.*s\n", errmsg_size - 22, str); + +pcserror: + + if (cset) freecharset(cset); + cset = NULL; + goto pcscleanup; +} + + +void freecharset(charset *cset) +{ + if (cset->inlist) free(cset->inlist); + if (cset->outlist) free(cset->outlist); + free(cset); +} + + +int csmember(char c, const charset *cset) +{ + unsigned char uc; + + if (appearsin(c, cset->inlist )) return 1; + if (appearsin(c, cset->outlist)) return 0; + uc = *(unsigned char *)&c; + + /* The logic for the CS_?CASE flags is a little convoluted, */ + /* but avoids calling islower() or isupper() more than once. */ + + if (cset->flags & CS_NCASE) { + if ( isalpha(uc) && + (cset->flags & CS_LCASE || !islower(uc)) && + (cset->flags & CS_UCASE || !isupper(uc)) ) return 1; + } + else { + if ( (cset->flags & CS_LCASE && islower(uc)) || + (cset->flags & CS_UCASE && isupper(uc)) ) return 1; + } + + return (cset->flags & CS_DIGIT && isdigit(uc)) || + (cset->flags & CS_SPACE && isspace(uc)) || + (cset->flags & CS_NUL && !c ) ; +} + + +static charset *csud( + int u, const charset *cset1, const charset *cset2, errmsg_t errmsg +) +/* Returns the union of cset1 and cset2 if u is 1, or the set */ +/* difference cset1 - cset2 if u is 0. Returns NULL on failure. */ +{ + charset *csu; + buffer *inbuf = NULL, *outbuf = NULL; + char *lists[4], **list, *p, nullchar = '\0'; + + csu = malloc(sizeof (charset)); + if (!csu) { + strcpy(errmsg,outofmem); + goto csuderror; + } + inbuf = newbuffer(sizeof (char), errmsg); + if (*errmsg) goto csuderror; + outbuf = newbuffer(sizeof (char), errmsg); + if (*errmsg) goto csuderror; + csu->inlist = csu->outlist = NULL; + csu->flags = u ? cset1->flags | cset2->flags + : cset1->flags & ~cset2->flags; + + lists[0] = cset1->inlist; + lists[1] = cset1->outlist; + lists[2] = cset2->inlist; + lists[3] = cset2->outlist; + + for (list = lists; list < lists + 4; ++list) + if (*list) { + for (p = *list; *p; ++p) + if (u ? csmember(*p, cset1) || csmember(*p, cset2) + : csmember(*p, cset1) && !csmember(*p, cset2)) { + if (!csmember(*p, csu)) { + additem(inbuf,p,errmsg); + if (*errmsg) goto csuderror; + } + } + else + if (csmember(*p, csu)) { + additem(outbuf,p,errmsg); + if (*errmsg) goto csuderror; + } + } + + additem(inbuf, &nullchar, errmsg); + if (*errmsg) goto csuderror; + additem(outbuf, &nullchar, errmsg); + if (*errmsg) goto csuderror; + csu->inlist = copyitems(inbuf,errmsg); + if (*errmsg) goto csuderror; + csu->outlist = copyitems(outbuf,errmsg); + if (*errmsg) goto csuderror; + +csudcleanup: + + if (inbuf) freebuffer(inbuf); + if (outbuf) freebuffer(outbuf); + return csu; + +csuderror: + + if (csu) freecharset(csu); + csu = NULL; + goto csudcleanup; +} + + +charset *csunion(const charset *cset1, const charset *cset2, errmsg_t errmsg) +{ + return csud(1,cset1,cset2,errmsg); +} + + +charset *csdiff(const charset *cset1, const charset *cset2, errmsg_t errmsg) +{ + return csud(0,cset1,cset2,errmsg); +} + + +void csadd(charset *cset1, const charset *cset2, errmsg_t errmsg) +{ + charset *csu; + + csu = csunion(cset1,cset2,errmsg); + if (*errmsg) return; + csswap(csu,cset1); + freecharset(csu); +} + + +void csremove(charset *cset1, const charset *cset2, errmsg_t errmsg) +{ + charset *csu; + + csu = csdiff(cset1,cset2,errmsg); + if (*errmsg) return; + csswap(csu,cset1); + freecharset(csu); +} + + +charset *cscopy(const charset *cset, errmsg_t errmsg) +{ + charset emptycharset = { NULL, NULL, 0 }; + + return csunion(cset, &emptycharset, errmsg); +} + + +void csswap(charset *cset1, charset *cset2) +{ + charset tmp; + + tmp = *cset1; + *cset1 = *cset2; + *cset2 = tmp; +} diff --git a/par/Par-1.53.0/charset.h b/par/Par-1.53.0/charset.h new file mode 100644 index 0000000..62eaf7c --- /dev/null +++ b/par/Par-1.53.0/charset.h @@ -0,0 +1,75 @@ +/* +charset.h +last touched in Par 1.53.0 +last meaningful change in Par 1.31 +Copyright 1993 Adam M. Costello + +This is ANSI C code (C89). + +Note: Those functions declared here which do not use errmsg +always succeed, provided that they are passed valid arguments. + +*/ + + +#ifndef CHARSET_H +#define CHARSET_H + +#include "errmsg.h" + + +typedef struct charset charset; + + +charset *parsecharset(const char *str, errmsg_t errmsg); + + /* parsecharset(str,errmsg) returns the set of characters defined by */ + /* str using charset syntax (see par.doc). Returns NULL on failure. */ + + +void freecharset(charset *cset); + + /* freecharset(cset) frees any memory associated with */ + /* *cset. cset may not be used after this call. */ + + +int csmember(char c, const charset *cset); + + /* csmember(c,cset) returns 1 if c is a member of *cset, 0 otherwise. */ + + +charset *csunion(const charset *cset1, const charset *cset2, errmsg_t errmsg); + + /* csunion(cset1,cset2) returns a pointer to the */ + /* union of *cset1 and *cset2, or NULL on failure. */ + + +charset *csdiff(const charset *cset1, const charset *cset2, errmsg_t errmsg); + + /* csdiff(cset1,cset2) returns a pointer to the set */ + /* difference *cset1 - *cset2 , or NULL on failure. */ + + +void csadd(charset *cset1, const charset *cset2, errmsg_t errmsg); + + /* csadd(cset1,cset2) adds the members of *cset2 */ + /* to *cset1. On failure, *cset1 is not changed. */ + + +void csremove(charset *cset1, const charset *cset2, errmsg_t errmsg); + + /* csremove(cset1,cset2) removes the members of *cset2 */ + /* from *cset1. On failure, *cset1 is not changed. */ + + +charset *cscopy(const charset *cset, errmsg_t errmsg); + + /* cscopy(cset) returns a copy of cset, or NULL on failure. */ + + +void csswap(charset *cset1, charset *cset2); + + /* csswap(cset1,cset2) swaps the contents of *cset1 and *cset2. */ + + +#endif diff --git a/par/Par-1.53.0/errmsg.c b/par/Par-1.53.0/errmsg.c new file mode 100644 index 0000000..146c76b --- /dev/null +++ b/par/Par-1.53.0/errmsg.c @@ -0,0 +1,19 @@ +/* +errmsg.c +last touched in Par 1.53.0 +last meaningful change in Par 1.40 +Copyright 1993 Adam M. Costello + +This is ANSI C code (C89). + +*/ + + +#include "errmsg.h" /* Makes sure we're consistent with the declarations. */ + + +const char * const outofmem = + "Out of memory.\n"; + +const char * const impossibility = + "Impossibility #%d has occurred. Please report it.\n"; diff --git a/par/Par-1.53.0/errmsg.h b/par/Par-1.53.0/errmsg.h new file mode 100644 index 0000000..d8e24a4 --- /dev/null +++ b/par/Par-1.53.0/errmsg.h @@ -0,0 +1,39 @@ +/* +errmsg.h +last touched in Par 1.53.0 +last meaningful change in Par 1.40 +Copyright 1993 Adam M. Costello + +This is ANSI C code (C89). + +*/ + + +#ifndef ERRMSG_H +#define ERRMSG_H + + +#define errmsg_size 163 + +/* This is the maximum number of characters that will */ +/* fit in an errmsg_t, including the terminating '\0'. */ +/* It will never decrease, but may increase in future */ +/* versions of this header file. */ + + +typedef char errmsg_t[errmsg_size]; + +/* Any function which takes the argument errmsg_t errmsg must, before */ +/* returning, either set errmsg[0] to '\0' (indicating success), or */ +/* write an error message string into errmsg, (indicating failure), */ +/* being careful not to overrun the space. */ + + +extern const char * const outofmem; + /* "Out of memory.\n" */ + +extern const char * const impossibility; + /* "Impossibility #%d has occurred. Please report it.\n" */ + + +#endif diff --git a/par/Par-1.53.0/par.1 b/par/Par-1.53.0/par.1 new file mode 100644 index 0000000..3c3e22b --- /dev/null +++ b/par/Par-1.53.0/par.1 @@ -0,0 +1,1917 @@ +.\" par.1 +.\" last touched in Par 1.53.0 +.\" last meaningful change in Par 1.53.0 +.\" Copyright 1993, 1996, 2000, 2020 Adam M. Costello +.\" +.\" This is nroff -man (or troff -man) code. +.\" +.TH par 1 "2020-Mar-14" "Par 1.53.0" "USER COMMANDS" +.SH NAME +par \- filter for reformatting paragraphs +.SH SYNOPSIS +.ds O \fR[\fP +.ds C \fR]\fP +.de OP +.BI \*O\ \\$1 \\$2\ \*C +.. +.TP .5i +.B par +.na +.OP help +.OP version +.OP B opset +.OP P opset +.OP Q opset +.OP W opset +.OP Z opset +.OP h \*Ohang\*C +.OP p \*Oprefix\*C +.OP r \*Orepeat\*C +.OP s \*Osuffix\*C +.OP T \*OTab\*C +.OP w \*Owidth\*C +.OP b \*Obody\*C +.OP c \*Ocap\*C +.OP d \*Odiv\*C +.OP E \*OErr\*C +.OP e \*Oexpel\*C +.OP f \*Ofit\*C +.OP g \*Oguess\*C +.OP j \*Ojust\*C +.OP l \*Olast\*C +.OP q \*Oquote\*C +.OP R \*OReport\*C +.OP t \*Otouch\*C +.br +.ad +.SH DESCRIPTION +.ie t .ds Q `` +.el .ds Q "" +.ie t .ds U '' +.el .ds U "" +.de IT +.LP +\h'-\w'\\$1\ 'u'\\$1\ \\$2 \\$3 \\$4 \\$5 \\$6 \\$7 \\$8 \\$9 +.. +.LP +.B par +is a filter which copies its input to its output, changing +all white characters (except newlines) to spaces, and +reformatting each paragraph. Paragraphs are separated +by protected, blank, and bodiless lines (see the +.SM TERMINOLOGY +section for definitions), and optionally +delimited by indentation (see the +.B d +option in the +.SM OPTIONS +section). +.LP +Each output paragraph is generated from the +corresponding input paragraph as follows: +.RS +.LP +.IT 1) An optional prefix and/or suffix +is removed from each input line. +.IT 2) The remainder is divided into +words (separated by spaces). +.IT 3) The words are joined into lines +to make an eye-pleasing paragraph. +.IT 4) The prefixes and suffixes are reattached. +.RE +.LP +If there are suffixes, spaces are inserted before +them so that they all end in the same column. +.SH QUICK START +.LP +.B par +is necessarily complex. For those who wish to use +it immediately and understand it later, assign the +.B \s-1PARINIT\s0 +environment variable the following value: +.IP +rTbgqR B=.,?'_A_a_@ Q=_s>| +.LP +The spaces, question mark, apostrophe, greater-than sign, +and vertical bar will probably have to be escaped or +quoted to prevent your shell from interpreting them. +.LP +The documentation, though precise, is unfortunately +not well-written for the end-user. Your +best bet is probably to read quickly the +.SM DESCRIPTION\s0, +.SM TERMINOLOGY\s0, +.SM OPTIONS\s0, +and +.SM ENVIRONMENT +sections, then read carefully the +.SM EXAMPLES +section, referring back to the +.SM OPTIONS +and +.SM TERMINOLOGY +sections as needed. +.LP +For the \*Qpower user\*U, a full understanding of +.B par +will require multiple readings of the +.SM TERMINOLOGY\s0, +.SM OPTIONS\s0, +.SM DETAILS\s0, +and +.SM EXAMPLES +sections. +.SH TERMINOLOGY +.LP +Miscellaneous terms: +.RS +.IP "charset syntax" +A way of representing a set of characters as a string. +The set includes exactly those characters which +appear in the string, except that the underscore (_) +is an escape character. Whenever it appears, it +must begin one of the following escape sequences: +.RS 1.5i +.IT __\ = an underscore +.IT _s\ = a space +.IT _S\ = all space characters +.IT _b\ = a backslash (\e) +.IT _q\ = a single quote (') +.IT _Q\ = a double quote (") +.IT _A\ = all upper case letters +.IT _a\ = all lower case letters +.IT _@\ = all neither-case letters +.IT _0\ = all decimal digits +.IT _x\fIhh\fP\ = the character represented +by the two hexadecimal digits +.I hh +(which may be upper or lower case) +.RE +.IP +The NUL character must not appear in the string, but +it may be included in the set with the _x00 sequence. +.IP +The exact meanings of _S, _A, _a, _@, and _0 are locale-dependent. +(Actually, all locales are supposed to agree on _0, but not on the others.) +In the default \*QC\*U locale: +_S includes only space, formfeed, newline, +carriage return, tab, and vertical tab; +_A includes only A through Z; +_a includes only a through z; +_@ includes nothing; +and _0 includes only 0 through 9. +.IP error +A condition which causes +.B par +to abort. See the +.SM DIAGNOSTICS +section. +.IP IP +Input paragraph. +.IP OP +Output paragraph. +.IP parameter +A symbol which may take on unsigned integral values. There +are several parameters whose values affect the behavior of +.BR par . +Parameters can be assigned values +using command line options. +.RE +.LP +Types of characters: +.RS +.IP "alphanumeric character" +An alphabetic character or decimal digit, +_A_a_@_0 in charset syntax (see above). +.IP "body character" +A member of the set of characters defined by the +.B \s-1PARBODY\s0 +environment variable (see the +.SM ENVIRONMENT +section) and/or the +.B B +option (see the +.SM OPTIONS +section). +.IP "protective character" +A member of the set of characters defined by the +.B \s-1PARPROTECT\s0 +environment variable and/or the +.B P +option. +.IP "quote character" +A member of the set of characters defined by the +.B \s-1PARQUOTE\s0 +environment variable and/or the +.B Q +option. +.IP "terminal character" +A member of the set of characters defined by the +.B Z +option. Initially, before any +.B Z +options have been processed, the set contains +period, question mark, exclamation point, and colon. +.IP "white character" +A member of the set of characters defined by the +.B W +option. Initially, before any +.B W +options have been processed, the set contains space, +formfeed, newline, carriage return, tab, and vertical tab. +.RE +.LP +Functions: +.RS +.IP comprelen +Given a non-empty sequence +.I S +of lines, let +.I c +be their longest common prefix. If the parameter +.I body +is 0, place a divider just after +the leading non-body characters in +.I c +(at the beginning if there are none). If +.I body +is 1, place the divider just after the +last non-space non-body character in +.I c +(at the beginning if there is none), then +advance the divider over any immediately +following spaces. The comprelen of +.I S +is the number of characters preceding the divider. +.IP comsuflen +Given a non-empty sequence +.I S +of lines, let +.I p +be the comprelen of +.IR S . +Let +.I T +be the set of lines which result from stripping the first +.I p +characters from each line in +.IR S . +Let +.I c +be the longest common suffix of the lines in +.IR T . +If +.I body +is 0, place a divider just before +the trailing non-body characters in +.I c +(at the end if there are none), then +advance the divider over all but the last +of any immediately following spaces. If +.I body +is 1, place the divider just before the first +non-space non-body character, then back up +the divider over one immediately preceding +space if there is one. The comsuflen of +.I S +is the number of characters following the divider. +.IP "fallback prelen (suflen)" +The fallback prelen (suflen) of an IP is: the comprelen +(comsuflen) of the IP, if the IP contains at least two +lines; otherwise, the comprelen (comsuflen) of the +block containing the IP, if the block contains at least +two lines; otherwise, the length of the longer of the +prefixes (suffixes) of the bodiless lines just above and +below the block, if the segment containing the block +has any bodiless lines; otherwise, 0. (See below for +the definitions of block, segment, and bodiless line.) +.IP "augmented fallback prelen" +Let +.I fp +be the fallback prelen of an IP. If the +IP contains more than one line, or if +.I quote +is 0, then the augmented fallback prelen of the IP is simply +.IR fp . +Otherwise, it is +.I fp +plus the number of quote characters +immediately following the first +.I fp +characters of the line. +.IP quoteprefix +The quoteprefix of a line is the longest string of quote +characters appearing at the beginning of the line, after +this string has been stripped of any trailing spaces. +.RE +.LP +Types of lines: +.RS +.IP "blank line" +An empty line, or a line whose first character is +not protective and which contains only spaces. +.IP "protected line" +An input line whose first character is protective. +.IP "bodiless line" +A line which is order +.I k +bodiless for some +.IR k . +.IP "order \fIk\fP bodiless line" +There is no such thing as an order 0 bodiless line. Suppose +.I S +is a a contiguous subsequence of a segment (see below) +containing at least two lines, containing no order +.IR k \-1 +bodiless lines, bounded above and below by order +.IR k \-1 +bodiless lines and/or the beginning/end of the segment. Let +.I p +and +.I s +be the comprelen and comsuflen of +.IR S . +Any member of +.I S +which, if stripped of its first +.I p +and last +.I s +characters, would be blank (or, if the line was not inserted by the +.I quote +feature and the parameter +.I repeat +is non-zero, would consist of the +same character repeated at least +.I repeat +times), is order +.I k +bodiless. The first +.I p +characters of the bodiless line +comprise its prefix; the last +.I s +characters comprise its suffix. The character which repeats +in the middle is called its repeat character. If the middle +is empty, the space is taken to be its repeat character. +.IP "vacant line" +A bodiless line whose repeat character is the space. +.IP "superfluous line" +Only blank and vacant lines may be superfluous. If +contiguous vacant lines lie at the beginning or end +of a segment, they are all superfluous. But if they +lie between two non-vacant lines within a segment, +then all are superfluous except one\(emthe one which +contains the fewest non-spaces. In case of a tie, +the first of the tied lines is chosen. Similarly, if +contiguous blank lines lie outside of any segments +at the beginning or end of the input, they are all +superfluous. But if they lie between two segments and/or +protected lines, then all are superfluous except the first. +.RE +.LP +Groups of lines: +.RS +.IP segment +A contiguous sequence of input lines containing no protected +or blank lines, bounded above and below by protected +lines, blank lines, and/or the beginning/end of the input. +.IP block +A contiguous subsequence of a segment containing no +bodiless lines, bounded above and below by bodiless +lines and/or the beginning/end of the segment. +.RE +.LP +Types of words: +.RS +.IP "capitalized word" +If the parameter +.I cap +is 0, a capitalized word is one which contains +at least one alphanumeric character, whose first +alphanumeric character is not a lower case letter. If +.I cap +is 1, every word is considered a capitalized word. (See the +.B c +option in the +.SM OPTIONS +section.) +.IP "curious word" +A word which contains a terminal character +.I c +such that there are no alphanumeric +characters in the word after +.IR c , +but there is at least one alphanumeric +character in the word before +.IR c . +.RE +.SH OPTIONS +.LP +Any command line argument may begin with one minus +sign (\-) which is ignored. Generally, more +than one option may appear in a single command +line argument, but there are exceptions: The +.BR help , +.BR version , +.BR B , +.BR P , +and +.B Q +options must have whole arguments all to themselves. +.TP 1i +.B help +Causes all remaining arguments to be ignored. No +input is read. A usage message is printed on the +output briefly describing the options used by +.BR par . +.TP +.B version +Causes all remaining arguments to be ignored. No input +is read. \*Qpar \*U followed by its version number +is printed on the output. +.TP +.BI B opset +.I op +is a single character, either an equal sign +(=), a plus sign (+), or a minus sign (-), and +.I set +is a string using charset syntax. If +.I op +is an equal sign, the set of body characters +is set to the character set defined by +.IR set . +If +.I op +is a plus/minus sign, the characters in the set defined by +.I set +are added/removed to/from the existing +set of body characters defined by the +.B \s-1PARBODY\s0 +environment variable and any previous B options. It +is okay to add characters that are already in the +set or to remove characters that are not in the set. +.TP +.BI P opset +Just like the +.B B +option, except that it applies to +the set of protective characters. +.TP +.BI Q opset +Just like the +.B B +option, except that it applies +to the set of quote characters. +.TP +.BI W opset +Just like the +.B B +option, except that it applies +to the set of white characters. +.TP +.BI Z opset +Just like the +.B B +option, except that it applies +to the set of terminal characters. +.LP +All remaining options are used to set values of +parameters. Values set by command line options hold for all +paragraphs. Unset parameters are given default values. Any +parameters whose default values depend on the IP (namely +.I prefix +and +.IR suffix ), +if left unset, are recomputed separately for each paragraph. +.LP +The approximate role of each +variable is described here. See the +.SM DETAILS +section for the rest of the story. +.LP +The first six parameters, +.IR hang , +.IR prefix , +.IR repeat , +.IR suffix , +.IR Tab , +and +.IR width , +may be set to any unsigned decimal integer less than 10000. +.TP 1i +.BI h\fR[ hang\fR] +Mainly affects the default values of +.I prefix +and +.IR suffix . +Defaults to 0. If the +.B h +option is given without a number, the +value 1 is inferred. (See also the +.B p +and +.B s +options.) +.TP +.BI p\fR[ prefix\fR] +The first +.I prefix +characters of each line of the OP are copied from the first +.I prefix +characters of the corresponding line +of the IP. If there are more than +.IR hang +1 +lines in the IP, the default value is the comprelen +of all the lines in the IP except the first +.I hang +of them. Otherwise, the default value is the +augmented fallback prelen of the IP. If the +.B p +option is given without a number, +.I prefix +is unset, even if it had been set earlier. (See also the +.B h +and +.B q +options.) +.TP +.BI r\fR[ repeat\fR] +If +.I repeat +is non-zero, bodiless lines have the number of +instances of their repeat characters increased +or decreased until the length of the line is +.IR width . +The exact value of +.I repeat +affects the definition of bodiless +line. Defaults to 0. If the +.B r +option is given without a number, the +value 3 is inferred. (See also the +.B w +option.) +.TP +.BI s\fR[ suffix\fR] +The last +.I suffix +characters of each line of the OP are copied from the last +.I suffix +characters of the corresponding line +of the IP. If there are more than +.IR hang +1 +lines in the IP, the default value is the comsuflen +of all the lines of the IP except the first +.I hang +of them. Otherwise, the default value is +the fallback suflen of the IP. If the +.B s +option is given without a number, +.I suffix +is unset, even if it had been set earlier. (See also the +.B h +option.) +.TP +.BI T\fR[ Tab\fR] +Tab characters in the input are expanded +to spaces, assuming tab stops every +.I Tab +columns. Must not be 0. Defaults to 1. If the +.B T +option is given without a number, the value 8 is inferred. +.TP +.BI w\fR[ width\fR] +No line in the OP may contain more than +.I width +characters, not including the trailing +newlines. Defaults to 72. If the +.B w +option is given without a number, the value 79 is inferred. +.LP +The remaining thirteen parameters, +.IR body , +.IR cap , +.IR div , +.IR Err , +.IR expel , +.IR fit , +.IR guess , +.IR invis , +.IR just , +.IR last , +.IR quote , +.IR Report , +and +.IR touch , +may be set to either 0 or 1. If the number is +absent in the option, the value 1 is inferred. +.TP 1i +.BI b\fR[ body\fR] +If +.I body +is 1, prefixes may not contain any trailing body characters, +and suffixes may not contain any leading body characters. +(Actually, the situation is complicated by space characters. +See comprelen and comsuflen in the Terminology section.) If +.I body +is 0, prefixes and suffixes may not contain +any body characters at all. Defaults to 0. +.TP +.BI c\fR[ cap\fR] +If +.I cap +is 1, all words are considered capitalized. This +currently affects only the application of the +.B g +option. Defaults to 0. +.TP +.BI d\fR[ div\fR] +If +.I div +is 0, each block becomes an IP. If +.I div +is 1, each block is subdivided into IPs as follows: Let +.I p +be the comprelen of the block. +Let a line's status be 1 if its +.RI ( p +1)st +character is a space, 0 otherwise. Every line in the +block whose status is the same as the status of the +first line will begin a new paragraph. Defaults to 0. +.TP +.BI E\fR[ Err\fR] +If +.I Err +is 1, messages to the user (caused by the +.B help +and +.B version +options, or by errors) are sent to the error stream +instead of the output stream. Defaults to 0. +.TP +.BI e\fR[ expel\fR] +If +.I expel +is 1, superfluous lines are withheld +from the output. Defaults to 0. +.TP +.BI f\fR[ fit\fR] +If +.I fit +is 1 and +.I just +is 0, +.B par +tries to make the lines in the OP as nearly the +same length as possible, even if it means making +the OP narrower. Defaults to 0. (See also the +.B j +option.) +.TP +.BI g\fR[ guess\fR] +If +.I guess +is 1, then when +.B par +is choosing line breaks, whenever it encounters a curious +word followed by a capitalized word, it takes one of two +special actions. If the two words are separated by a +single space in the input, they will be merged into one +word with an embedded non-breaking space. If the two words +are separated by more than one space, or by a line break, +.B par +will insure that they are separated by two spaces, +or by a line break, in the output. Defaults to 0. +.TP +.BI i\fR[ invis\fR] +If +.I invis +is 1, then vacant lines inserted because +.I quote +is 1 are invisible; that is, they are not output. If +.I quote +is 0, +.I invis +has no effect. Defaults to 0. (See also the +.B q +option.) +.TP +.BI j\fR[ just\fR] +If +.I just +is 1, +.B par +justifies the OP, inserting spaces between words +so that all lines in the OP have length +.I width +(except the last, if +.I last +is 0). Defaults to 0. (See also the +.BR w , +.BR l , +and +.B f +options.) +.TP +.BI l\fR[ last\fR] +If +.I last +is 1, +.B par +tries to make the last line of the OP about +the same length as the others. Defaults to 0. +.TP +.BI q\fR[ quote\fR] +If +.I quote +is 1, then before each segment +is scanned for bodiless lines, +.B par +supplies vacant lines between different quotation nesting +levels as follows: For each pair of adjacent lines in +the segment, (scanned from the top down) which have +different quoteprefixes, one of two actions is taken. If +.I invis +is 0, and either line consists entirely of quote +characters and spaces (or is empty), that line +is truncated to the longest common prefix of the +two lines (both are truncated if both qualify). +Otherwise, a line consisting of the longest common +prefix of the two lines is inserted between them. +.I quote +also affects the default value of +.IR prefix . +Defaults to 0. (See also the +.B p +and +.B i +options.) +.TP +.BI R\fR[ Report\fR] +If +.I Report +is 1, it is considered an error for +an input word to contain more than +.IR L \ = +.RI ( width \ - +.IR prefix \ - +.IR suffix ) +characters. Otherwise, such +words are chopped after each +.IR L th +character into shorter words. Defaults to 0. +.TP +.BI t\fR[ touch\fR] +Has no effect if +.I suffix +is 0 or +.I just +is 1. Otherwise, if +.I touch +is 0, all lines in the OP have length +.IR width . +If +.I touch +is 1, the length of the lines is decreased until the +suffixes touch the body of the OP. Defaults to the logical +.SM OR +of +.I fit +and +.IR last . +(See also the +.BR s , +.BR j , +.BR w , +.BR f , +and +.B l +options.) +.LP +If an argument begins with a number, +that number is assumed to belong to a +.B p +option if it is 8 or less, and to a +.B w +option otherwise. +.LP +If the value of any parameter is set more +than once, the last value is used. When +unset parameters are assigned default values, +.I hang +and +.I quote +are assigned before +.IR prefix , +and +.I fit +and +.I last +are assigned before +.I touch +(because of the dependencies). +.LP +It is an error if +.I width +<= +.I prefix ++ +.IR suffix . +.SH ENVIRONMENT +.TP 1i +.B \s-1PARBODY\s0 +Determines the initial set of body characters +(which are used for determining comprelens +and comsuflens), using charset syntax. If +.B \s-1PARBODY\s0 +is not set, the set of body characters is initially empty. +.TP +.B \s-1PARINIT\s0 +If set, +.B par +will read command line options from +.B \s-1PARINIT\s0 +before it reads them from the command line. +Within the value of +.B \s-1PARINIT\s0, +arguments are separated by the initial set of white characters. +.TP +.B \s-1PARPROTECT\s0 +Determines the set of protective +characters, using charset syntax. If +.B \s-1PARPROTECT\s0 +is not set, the set of protective +characters is initially empty. +.TP +.B \s-1PARQUOTE\s0 +Determines the set of quote +characters, using charset syntax. If +.B \s-1PARQUOTE\s0 +is not set, the set of quote characters initially +contains only the greater-than sign (>) and the space. +.LP +If a +.SM NUL +character appears in the value of an environment variable, +it and the rest of the string will not be seen by +.BR par . +.LP +Note that the +.B \s-1PARINIT\s0 +variable, together with the +.BR B , +.BR P , +and +.B Q +options, renders the other environment variables +unnecessary. They are included for backward compatibility. +.SH DETAILS +.LP +Lines are terminated by newline characters, but the +newlines are not considered to be included in the lines. +If the last character of the input is a non-newline, +a newline will be inferred immediately after it (but +if the input is empty, no newline will be inferred; +the number of input lines will be 0). Thus, the +input can always be viewed as a sequence of lines. +.LP +Protected lines are copied unchanged from the input to the +output. All other input lines, as they are read, have any +.SM NUL +characters removed, and every white character +(except newlines) turned into a space. +Actually, each tab character is turned into +.I Tab +- +.RI ( n +% +.IR Tab ) +spaces, where +.I n +is the number of characters preceding the +tab character on the line (evaluated after +earlier tab characters have been expanded). +.LP +Blank lines in the input are transformed +into empty lines in the output. +.LP +If +.I repeat +is 0, all bodiless lines are vacant, and they are all +simply stripped of trailing spaces before being output. If +.I repeat +is not 0, only vacant lines whose suffixes have length +0 are treated that way; other bodiless lines have +the number of instances of their repeat characters +increased or decreased until the length of the line is +.IR width . +.LP +If +.I expel +is 1, superfluous lines are not output. If +.I quote +and +.I invis +are both 1, there may be invisible +lines; they are not output. +.LP +The input is divided into segments, which are +divided into blocks, which are divided into +IPs. The exact process depends on the values of +.I quote +and +.I div +(see +.B q +and +.B d +in the +.SM OPTIONS +section). The remainder of this section describes +the process which is applied independently to +each IP to construct the corresponding OP. +.LP +After the values of the parameters are determined (see the +.SM OPTIONS +section), the first +.I prefix +characters and the last +.I suffix +characters of each input line are removed and remembered. +It is an error for any line to contain fewer than +.IR prefix \ +\ suffix +characters. +.LP +The remaining text is treated as a sequence of +characters, not lines. The text is broken into +words, which are separated by spaces. That is, a +word is a maximal sub-sequence of non-spaces. If +.I guess +is 1, some words might be merged (see +.B g +in the +.SM OPTIONS +section). The first word includes any +spaces that precede it on the same line. +.LP +Let +.I L += +.I width +\- +.I prefix +\- +.IR suffix . +.LP +If +.I Report +is 0, some words may get chopped up at this point (see +.B R +in the +.SM OPTIONS +section). +.LP +The words are reassembled, preserving +their order, into lines. If +.I just +is 0, adjacent words within a line are separated +by a single space, (or sometimes two if +.I guess +is 1), and line breaks are chosen so that +the paragraph satisfies the following properties: +.RS 1i +.IT 1) No line contains more than +.I L +characters. +.IT 2) If +.I fit +is 1, the difference between the lengths of the +shortest and longest lines is as small as possible. +.IT 3) The shortest line is as long as +possible, subject to properties 1 and 2. +.IT 4) Let +.I target +be +.I L +if +.I fit +is 0, or the length of the longest line if +.I fit +is 1. The sum of the squares of the differences between +.I target +and the lengths of the lines is as small as +possible, subject to properties 1, 2, and 3. +.RE +.RS .5i +.LP +If +.I last +is 0, the last line does not count as a line for +the purposes of properties 2, 3, and 4 above. +.LP +If all the words fit on a single line, the +properties as worded above don't make much +sense. In that case, no line breaks are inserted. +.RE +.LP +If +.I just +is 1, adjacent words within a line are +separated by one space (or sometimes two if +.I guess +is 1) plus zero or more extra spaces. The value of +.I fit +is disregarded, and line breaks are chosen so that +the paragraph satisfies the following properties: +.RS 1i +.IT 1) Every line contains exactly +.I L +characters. +.IT 2) The largest inter-word gap is as small as +possible, subject to property 1. (An inter-word gap +consists only of the extra spaces, not the regular spaces.) +.IT 3) The sum of the squares of the lengths +of the inter-word gaps is as small as +possible, subject to properties 1 and 2. +.RE +.RS .5i +.LP +If +.I last +is 0, the last line does not count as a line +for the purposes of property 1, and it does +not require or contain any extra spaces. +.LP +Extra spaces are distributed as uniformly as +possible among the inter-word gaps in each line. +.LP +In a justified paragraph, every line must +contain at least two words, but that's not +always possible to accomplish. If the paragraph +cannot be justified, it is considered an error. +.RE +.LP +If the number of lines in the +resulting paragraph is less than +.IR hang , +empty lines are added at the end +to bring the number of lines up to +.IR hang . +.LP +If +.I just +is 0 and +.I touch +is 1, +.I L +is changed to be the length of the longest line. +.LP +If +.I suffix +is not 0, each line is padded at the end +with spaces to bring its length up to +.IR L . +.LP +To each line is prepended +.I prefix +characters. Let +.I n +be the number of lines in the IP, let +.I afp +be the augmented fallback prelen of the IP, and let +.I fs +be the fallback suflen of the IP. The +characters which are prepended to the +.IR i th +line are chosen as follows: +.RS +.LP +.IT 1) If +.I i +<= +.IR n , +the characters are copied from the ones +that were removed from the beginning of the +.IR n th +input line. +.IT 2) If +.I i +> +.I n +> +.IR hang , +the characters are copied from the ones that were +removed from the beginning of the last input line. +.IT 3) If +.I i +> +.I n +and +.I n +<= +.IR hang , +the first +.RI min( afp , prefix ) +of the characters are copied from the ones +that were removed from the beginning of the +last input line, and the rest are all spaces. +.RE +.LP +Then to each line is appended +.I suffix +characters. The characters which are appended to the +.IR i th +line are chosen as follows: +.RS +.LP +.IT 1) If +.I i +<= +.IR n , +the characters are copied from the ones +that were removed from the end of the +.IR n th +input line. +.IT 2) If +.I i +> +.I n +> +.IR hang , +the characters are copied from the ones that +were removed from the end of the last input line. +.IT 3) If +.I i +> +.I n +and +.I n +<= +.IR hang , +the first +.RI min( fs , suffix ) +of the characters are copied from the ones +that were removed from the beginning of the +last input line, and the rest are all spaces. +.RE +.LP +Finally, the lines are printed to the output as the OP. +.SH DIAGNOSTICS +.LP +If there are no errors, +.B par +returns +.B \s-1EXIT_SUCCESS\s0 +(see +.BR <stdlib.h> ). +.LP +If there is an error, an error message +will be printed to the output, and +.B par +will return +.BR \s-1EXIT_FAILURE\s0 . +If the error is local to a single paragraph, the preceding +paragraphs will have been output before the error +was detected. Line numbers in error messages are +local to the IP in which the error occurred. All +error messages begin with \*Qpar\ error:\*U on a line +by itself. Error messages concerning command line +or environment variable syntax are accompanied by +the same usage message that the help option produces. +.LP +Of course, trying to print an error message would be +futile if an error resulted from an output function, so +.B par +doesn't bother doing any error checking on output functions. +.SH EXAMPLES +.de VS +.RS -.5i +.LP +.nf +.ps -1p +.vs -2p +.ft CW +.. +.de VE +.ft P +.vs +.ps +.fi +.RE +.. +.de CM +\&\*Q\fB\\$1\fP\\*U: +.. +.LP +The superiority of +.BR par 's +dynamic programming algorithm over a +greedy algorithm (such as the one used by +.BR fmt ) +can be seen in the following example: +.LP +Original paragraph (note that +each line begins with 8 spaces): +.VS + We the people of the United States, + in order to form a more perfect union, + establish justice, + insure domestic tranquility, + provide for the common defense, + promote the general welfare, + and secure the blessing of liberty + to ourselves and our posterity, + do ordain and establish the Constitution + of the United States of America. +.VE +.LP +After a greedy algorithm with width = 39: +.VS + We the people of the United + States, in order to form a more + perfect union, establish + justice, insure domestic + tranquility, provide for the + common defense, promote the + general welfare, and secure the + blessing of liberty to + ourselves and our posterity, do + ordain and establish the + Constitution of the United + States of America. +.VE +.LP +After +.CM "par 39" +.VS + We the people of the United + States, in order to form a + more perfect union, establish + justice, insure domestic + tranquility, provide for the + common defense, promote the + general welfare, and secure + the blessing of liberty to + ourselves and our posterity, + do ordain and establish the + Constitution of the United + States of America. +.VE +.LP +The line breaks chosen by +.B par +are clearly more eye-pleasing. +.LP +.B par +is most useful in conjunction with the text-filtering +features of an editor, such as the ! commands of +.BR vi . +You may wish to add the following lines to your +.B .exrc +file: +.VS + " use Bourne shell for speed: + set shell=/bin/sh + " + " reformat paragraph with no arguments: + map ** {!}par^M} + " + " reformat paragraph with arguments: + map *^V {!}par +.VE +.LP +Note that the leading spaces must be removed, and that what +is shown as ^M and ^V really need to be ctrl-M and ctrl-V. +Also note that the last map command contains two spaces +following the ctrl-V, plus one at the end of the line. +.LP +To reformat a simple paragraph delimited by blank lines in +.BR vi , +you can put the cursor anywhere in it and type +\*Q\fB**\fP\*U (star star). If you need to supply +arguments to par, you can type \*Q\fB*\ \fP\*U +(star space) instead, then type the arguments. +.LP +The rest of this section is a series of +before-and-after pictures showing some typical uses of +.BR par . +In all cases, no environment variables are set. +.LP +Before: +.VS + /* We the people of the United States, */ + /* in order to form a more perfect union, */ + /* establish justice, */ + /* insure domestic tranquility, */ + /* provide for the common defense, */ + /* promote the general welfare, */ + /* and secure the blessing of liberty */ + /* to ourselves and our posterity, */ + /* do ordain and establish the Constitution */ + /* of the United States of America. */ +.VE +.LP +After +.CM "par 59" +.VS + /* We the people of the United States, in */ + /* order to form a more perfect union, establish */ + /* justice, insure domestic tranquility, provide */ + /* for the common defense, promote the general */ + /* welfare, and secure the blessing of liberty */ + /* to ourselves and our posterity, do ordain */ + /* and establish the Constitution of the United */ + /* States of America. */ +.VE +.LP +Or after +.CM "par 59f" +.VS + /* We the people of the United States, */ + /* in order to form a more perfect union, */ + /* establish justice, insure domestic */ + /* tranquility, provide for the common */ + /* defense, promote the general welfare, */ + /* and secure the blessing of liberty to */ + /* ourselves and our posterity, do ordain */ + /* and establish the Constitution of the */ + /* United States of America. */ +.VE +.LP +Or after +.CM "par 59l" +.VS + /* We the people of the United States, in */ + /* order to form a more perfect union, establish */ + /* justice, insure domestic tranquility, */ + /* provide for the common defense, promote */ + /* the general welfare, and secure the */ + /* blessing of liberty to ourselves and our */ + /* posterity, do ordain and establish the */ + /* Constitution of the United States of America. */ +.VE +.LP +Or after +.CM "par 59lf" +.VS + /* We the people of the United States, */ + /* in order to form a more perfect union, */ + /* establish justice, insure domestic */ + /* tranquility, provide for the common */ + /* defense, promote the general welfare, */ + /* and secure the blessing of liberty */ + /* to ourselves and our posterity, do */ + /* ordain and establish the Constitution */ + /* of the United States of America. */ +.VE +.LP +Or after +.CM "par 59lft0" +.VS + /* We the people of the United States, */ + /* in order to form a more perfect union, */ + /* establish justice, insure domestic */ + /* tranquility, provide for the common */ + /* defense, promote the general welfare, */ + /* and secure the blessing of liberty */ + /* to ourselves and our posterity, do */ + /* ordain and establish the Constitution */ + /* of the United States of America. */ +.VE +.LP +Or after +.CM "par 59j" +.VS + /* We the people of the United States, in */ + /* order to form a more perfect union, establish */ + /* justice, insure domestic tranquility, provide */ + /* for the common defense, promote the general */ + /* welfare, and secure the blessing of liberty */ + /* to ourselves and our posterity, do ordain and */ + /* establish the Constitution of the United */ + /* States of America. */ +.VE +.LP +Or after +.CM "par 59jl" +.VS + /* We the people of the United States, */ + /* in order to form a more perfect */ + /* union, establish justice, insure domestic */ + /* tranquility, provide for the common defense, */ + /* promote the general welfare, and secure */ + /* the blessing of liberty to ourselves and */ + /* our posterity, do ordain and establish the */ + /* Constitution of the United States of America. */ +.VE +.LP +Before: +.VS + Preamble We the people of the United States, + to the US in order to form + Constitution a more perfect union, + establish justice, + insure domestic tranquility, + provide for the common defense, + promote the general welfare, + and secure the blessing of liberty + to ourselves and our posterity, + do ordain and establish + the Constitution + of the United States of America. +.VE +.LP +After +.CM "par 52h3" +.VS + Preamble We the people of the United + to the US States, in order to form a + Constitution more perfect union, establish + justice, insure domestic + tranquility, provide for the + common defense, promote the + general welfare, and secure + the blessing of liberty to + ourselves and our posterity, + do ordain and establish the + Constitution of the United + States of America. +.VE +.LP +Before: +.VS + 1 We the people of the United States, + 2 in order to form a more perfect union, + 3 establish justice, + 4 insure domestic tranquility, + 5 provide for the common defense, + 6 promote the general welfare, + 7 and secure the blessing of liberty + 8 to ourselves and our posterity, + 9 do ordain and establish the Constitution + 10 of the United States of America. +.VE +.LP +After +.CM "par 59p12l" +.VS + 1 We the people of the United States, in order to + 2 form a more perfect union, establish justice, + 3 insure domestic tranquility, provide for the + 4 common defense, promote the general welfare, + 5 and secure the blessing of liberty to ourselves + 6 and our posterity, do ordain and establish the + 7 Constitution of the United States of America. +.VE +.LP +Before: +.VS + > > We the people + > > of the United States, + > > in order to form a more perfect union, + > > establish justice, + > > ensure domestic tranquility, + > > provide for the common defense, + > + > Promote the general welfare, + > and secure the blessing of liberty + > to ourselves and our posterity, + > do ordain and establish + > the Constitution of the United States of America. +.VE +.LP +After +.CM "par 52" +.VS + > > We the people of the United States, in + > > order to form a more perfect union, + > > establish justice, ensure domestic + > > tranquility, provide for the common + > > defense, + > + > Promote the general welfare, and secure + > the blessing of liberty to ourselves and + > our posterity, do ordain and establish + > the Constitution of the United States of + > America. +.VE +.LP +Before: +.VS + > We the people + > of the United States, + > in order to form a more perfect union, + > establish justice, + > ensure domestic tranquility, + > provide for the common defense, + > Promote the general welfare, + > and secure the blessing of liberty + > to ourselves and our posterity, + > do ordain and establish + > the Constitution of the United States of America. +.VE +.LP +After +.CM "par 52d" +.VS + > We the people of the United States, + > in order to form a more perfect union, + > establish justice, ensure domestic + > tranquility, provide for the common + > defense, + > Promote the general welfare, and secure + > the blessing of liberty to ourselves and + > our posterity, do ordain and establish + > the Constitution of the United States of + > America. +.VE +.LP +Before: +.VS + # 1. We the people of the United States. + # 2. In order to form a more perfect union. + # 3. Establish justice, ensure domestic + # tranquility. + # 4. Provide for the common defense + # 5. Promote the general welfare. + # 6. And secure the blessing of liberty + # to ourselves and our posterity. + # 7. Do ordain and establish the Constitution. + # 8. Of the United States of America. +.VE +.LP +After +.CM "par 37p13dh" +.VS + # 1. We the people of the + # United States. + # 2. In order to form a more + # perfect union. + # 3. Establish justice, + # ensure domestic + # tranquility. + # 4. Provide for the common + # defense + # 5. Promote the general + # welfare. + # 6. And secure the blessing + # of liberty to ourselves + # and our posterity. + # 7. Do ordain and establish + # the Constitution. + # 8. Of the United States of + # America. +.VE +.LP +Before: +.VS + /*****************************************/ + /* We the people of the United States, */ + /* in order to form a more perfect union, */ + /* establish justice, insure domestic */ + /* tranquility, */ + /* */ + /* */ + /* [ provide for the common defense, ] */ + /* [ promote the general welfare, ] */ + /* [ and secure the blessing of liberty ] */ + /* [ to ourselves and our posterity, ] */ + /* [ ] */ + /* */ + /* do ordain and establish the Constitution */ + /* of the United States of America. */ + /******************************************/ +.VE +.LP +After +.CM "par 42r" +.VS + /********************************/ + /* We the people of the */ + /* United States, in order to */ + /* form a more perfect union, */ + /* establish justice, insure */ + /* domestic tranquility, */ + /* */ + /* */ + /* [ provide for the common ] */ + /* [ defense, promote the ] */ + /* [ general welfare, and ] */ + /* [ secure the blessing of ] */ + /* [ liberty to ourselves ] */ + /* [ and our posterity, ] */ + /* [ ] */ + /* */ + /* do ordain and establish the */ + /* Constitution of the United */ + /* States of America. */ + /********************************/ +.VE +.LP +Or after +.CM "par 42re" +.VS + /********************************/ + /* We the people of the */ + /* United States, in order to */ + /* form a more perfect union, */ + /* establish justice, insure */ + /* domestic tranquility, */ + /* */ + /* [ provide for the common ] */ + /* [ defense, promote the ] */ + /* [ general welfare, and ] */ + /* [ secure the blessing of ] */ + /* [ liberty to ourselves ] */ + /* [ and our posterity, ] */ + /* */ + /* do ordain and establish the */ + /* Constitution of the United */ + /* States of America. */ + /********************************/ +.VE +.LP +Before: +.VS + Joe Public writes: + > Jane Doe writes: + > > + > > + > > I can't find the source for uncompress. + > Oh no, not again!!! + > + > + > Isn't there a FAQ for this? + > + > + That wasn't very helpful, Joe. Jane, + just make a link from uncompress to compress. +.VE +.LP +After +.CM "par 40q" +.VS + Joe Public writes: + + > Jane Doe writes: + > + > + > > I can't find the source for + > > uncompress. + > + > Oh no, not again!!! + > + > + > Isn't there a FAQ for this? + > + + That wasn't very helpful, Joe. + Jane, just make a link from + uncompress to compress. +.VE +.LP +Or after +.CM "par 40qe" +.VS + Joe Public writes: + + > Jane Doe writes: + > + > > I can't find the source for + > > uncompress. + > + > Oh no, not again!!! + > + > Isn't there a FAQ for this? + + That wasn't very helpful, Joe. + Jane, just make a link from + uncompress to compress. +.VE +.LP +Or after +.CM "par 40qi" +.VS + Joe Public writes: + > Jane Doe writes: + > > + > > + > > I can't find the source for + > > uncompress. + > Oh no, not again!!! + > + > + > Isn't there a FAQ for this? + > + > + That wasn't very helpful, Joe. + Jane, just make a link from + uncompress to compress. +.VE +.LP +Or after +.CM "par 40qie" +.VS + Joe Public writes: + > Jane Doe writes: + > > I can't find the source for + > > uncompress. + > Oh no, not again!!! + > + > Isn't there a FAQ for this? + That wasn't very helpful, Joe. + Jane, just make a link from + uncompress to compress. +.VE +.LP +Before: +.VS + I sure hope there's still room + in Dr. Jones' section of archaeology. + I've heard he's the bestest. [sic] +.VE +.LP +After +.CM "par 50g" +.VS + I sure hope there's still room in + Dr. Jones' section of archaeology. I've + heard he's the bestest. [sic] +.VE +.LP +Or after +.CM "par 50gc" +.VS + I sure hope there's still room in + Dr. Jones' section of archaeology. I've + heard he's the bestest. [sic] +.VE +.LP +Before: +.VS + John writes: + : Mary writes: + : + Anastasia writes: + : + > Hi all! + : + Hi Ana! + : Hi Ana & Mary! + Please unsubscribe me from alt.hello. +.VE +.LP +After +.CM "par Q+:+ q" +.VS + John writes: + + : Mary writes: + : + : + Anastasia writes: + : + + : + > Hi all! + : + + : + Hi Ana! + : + : Hi Ana & Mary! + + Please unsubscribe me from alt.hello. +.VE +.LP +Before: +.VS + amc> The b option was added primarily to deal with + amc> this new style of quotation + amc> which became popular after Par 1.41 was released. + amc> + amc> Par still pays attention to body characters. + amc> Par should not mistake "Par" for part of the prefix. + amc> Par should not mistake "." for a suffix. +.VE +.LP +After +.CM "par B=._A_a 50bg" +.VS + amc> The b option was added primarily to + amc> deal with this new style of quotation + amc> which became popular after Par 1.41 + amc> was released. + amc> + amc> Par still pays attention to body + amc> characters. Par should not mistake + amc> "Par" for part of the prefix. Par + amc> should not mistake "." for a suffix. +.VE +.SH SEE ALSO +.LP +.B par.doc +.SH LIMITATIONS +.LP +The +.I guess +feature guesses wrong in cases like the following: +.VS + I calc'd the approx. + Fermi level to 3 sig. digits. +.VE +.LP +With +.I guess += 1, +.B par +will incorrectly assume that \*Qapprox.\*U +ends a sentence. If the input were: +.VS + I calc'd the approx. Fermi + level to 3 sig. digits. +.VE +.LP +then +.B par +would refuse to put a line break between +\*Qapprox.\*U and \*QFermi\*U in the output, +mainly to avoid creating the first situation (in +case the paragraph were to be fed back through +.B par +again). This non-breaking space policy does come in handy +for cases like \*QMr.\ Johnson\*U and \*QJan.\ 1\*U, though. +.LP +The +.I guess +feature only goes one way. +.B par +can preserve wide sentence breaks in a +paragraph, or remove them, but it can't insert +them if they aren't already in the input. +.LP +If you use tabs, you may not like the way +.B par +handles (or doesn't handle) them. It +expands them into spaces. I didn't let +.B par +output tabs because tabs don't make sense. Not everyone's +terminal has the same tab settings, so text files containing +tabs are sometimes mangled. In fact, almost every text file +containing tabs gets mangled when something is inserted at the +beginning of each line (when quoting e-mail or commenting out +a section of a shell script, for example), making them a pain +to edit. In my opinion, the world would be a nicer place if +everyone stopped using tabs, so I'm doing my part by not letting +.B par +output them. (Thanks to Eric Stuebe for showing me the +light about tabs.) +.LP +There is currently no way for the length of the +output prefix to differ from the length of the +input prefix. Ditto for the suffix. I may consider +adding this capability in a future release, but +right now I'm not sure how I'd want it to work. +.SH APOLOGIES +.LP +Par began in July 1993 as a small program designed to do one +narrow task: reformat a single paragraph that might have a +border on either side. It was pretty clean back then. Over +the next three months, it very rapidly expanded to handle +multiple paragraphs, offer more options, and take better +guesses, at the cost of becoming extremely complex, and very +unclean. It is nowhere near the optimal design for the larger +task it now tries to address. Its only redeeming features +are that it is extremely useful (I find it indispensable), +extremely portable, and very stable since version 1.41 released +on 1993-Oct-31. +.LP +Back in 1993 I had very little experience at writing +documentation for users, so the documentation for Par +became rather nightmarish. There is no separation between +how-it-works (which is painfully complex) and how-to-use-it +(which is fairly simple, if you can ever figure it out). +.LP +Someday I ought to reexamine the problem, and redesign +a new, clean solution from scratch. I don't know +when I might get enough free time to start on such +a project. Text files may be obsolete by then. +.SH BUGS +.LP +If I knew of any bugs, I wouldn't release the package. Of +course, there may be bugs that I haven't yet discovered. +.LP +If you find any bugs (in the program or +in the documentation), or if you have +any suggestions, please contact me: +.RS +.LP +http://www.nicemice.net/amc/ +.RE +.LP +When reporting a bug, please include the exact input and +command line options used, and the version number of +.BR par , +so that I can reproduce it. +.LP +The latest release of Par is available on the Web at: +.RS +.LP +http://www.nicemice.net/par/ +.RE +.LP +I don't expect these URLs to change in the forseeable +future, but if they do, I'll try to leave forward pointers. diff --git a/par/Par-1.53.0/par.c b/par/Par-1.53.0/par.c new file mode 100644 index 0000000..849c9ae --- /dev/null +++ b/par/Par-1.53.0/par.c @@ -0,0 +1,944 @@ +/* +par.c +last touched in Par 1.53.0 +last meaningful change in Par 1.53.0 +Copyright 1993, 1996, 2001, 2020 Adam M. Costello + +This is ANSI C code (C89). + +*/ + + +#include "buffer.h" +#include "charset.h" +#include "errmsg.h" +#include "reformat.h" + +#include <ctype.h> +#include <locale.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#undef NULL +#define NULL ((void *) 0) + +#ifdef DONTFREE +#define free(ptr) +#endif + + +/*=== + +Regarding char and unsigned char: ANSI C is a nightmare in this +respect. Some functions, like puts(), strchr(), and getenv(), use char +or char*, so they work well with character constants like 'a', which +are char, and with argv, which is char**. But several other functions, +like getchar(), putchar(), and isdigit(), use unsigned char (converted +to/from int). Therefore innocent-looking code can be wrong, for +example: + + int c = getchar(); + if (c == 'a') ... + +This is wrong because 'a' is char (converted to int) and could be +negative, but getchar() returns unsigned char (converted to int), so c +is always nonnegative or EOF. For similar reasons, it is wrong to pass +a char to a function that expects an unsigned char: + + putchar('\n'); + if (isdigit(argv[1][0])) ... + +Inevitably, we need to convert between char and unsigned char. This can +be done by integral conversion (casting or assigning a char to unsigned +char or vice versa), or by aliasing (converting a pointer to char to +a pointer to unsigned char (or vice versa) and then dereferencing +it). ANSI C requires that integral conversion alters the bits when the +unsigned value is not representable in the signed type and the signed +type does not use two's complement representation. Aliasing, on the +other hand, preserves the bits. Although the C standard is not at all +clear about which sort of conversion is appropriate for making the +standard library functions interoperate, I think preserving the bits +is what is needed. Under that assumption, here are some examples of +correct code: + + int c = getchar(); + char ch; + + if (c != EOF) { + *(unsigned char *)&ch = c; + if (ch == 'a') ... + if (isdigit(c)) ... + } + + char *s = ... + if (isdigit(*(unsigned char *)s)) ... + +===*/ + + +static const char * const usagemsg = +"\n" +"Options for par:\n" +"\n" +"help print option summary " + " ---------- Boolean parameters: ---------\n" +"version print version number " + " b<body> let non-trailing body chars in\n" +"B<op><set> as <op> is =/+/-, " + " prefix, non-leading in suffix\n" +" replace/augment/diminish " + " c<cap> count all words as capitalized\n" +" body chars by <set> " + " d<div> use indentation as a delimiter\n" +"P,Q,W,Z ditto for protective, " + " E<Err> send messages to stderr\n" +" quote,white,terminal chars" + " e<expel> discard superfluous lines\n" +"-------- Integer parameters: --------" + " f<fit> narrow paragraph for best fit\n" +"h<hang> skip IP's 1st <hang> lines" + " g<guess> preserve wide sentence breaks\n" +" in scan for common affixes" + " i<invis> hide lines inserted by <quote>\n" +"p<prefix> prefix length " + " j<just> justify paragraphs\n" +"r<repeat> if not 0, force bodiless " + " l<last> treat last lines like others\n" +" lines to length <width> " + " q<quote> supply vacant lines between\n" +"s<suffix> suffix length " + " different quote nesting levels\n" +"T<Tab> tab stops every <Tab> cols" + " R<Report> print error for too-long words\n" +"w<width> max output line length " + " t<touch> move suffixes left\n" +"\n" +"See par.doc or par.1 (the man page) for more information.\n" +"\n" +; + + +/* Structure for recording properties of lines within segments: */ + +typedef unsigned char lflag_t; + +typedef struct lineprop { + short p, s; /* Length of the prefix and suffix of a bodiless */ + /* line, or the fallback prelen and suflen */ + /* of the IP containing a non-bodiless line. */ + lflag_t flags; /* Boolean properties (see below). */ + char rc; /* The repeated character of a bodiless line. */ +} lineprop; + +/* Flags for marking boolean properties: */ + +static const lflag_t L_BODILESS = 1, /* Bodiless line. */ + L_INSERTED = 2, /* Inserted by quote. */ + L_FIRST = 4, /* First line of a paragraph. */ + L_SUPERF = 8; /* Superfluous line. */ + +#define isbodiless(prop) ( (prop)->flags & 1) +#define isinserted(prop) (((prop)->flags & 2) != 0) +#define isfirst(prop) (((prop)->flags & 4) != 0) +#define issuperf(prop) (((prop)->flags & 8) != 0) +#define isvacant(prop) (isbodiless(prop) && (prop)->rc == ' ') + + +static int digtoint(char c) + +/* Returns the value represented by the digit c, or -1 if c is not a digit. */ +{ + const char *p, * const digits = "0123456789"; + + if (!c) return -1; + p = strchr(digits,c); + return p ? p - digits : -1; + + /* We can't simply return c - '0' because this is ANSI C code, */ + /* so it has to work for any character set, not just ones which */ + /* put the digits together in order. Also, an array that could */ + /* be referenced as digtoint[c] might be bad because there's no */ + /* upper limit on CHAR_MAX. */ +} + + +static int strtoudec(const char *s, int *pn) + +/* Converts the longest prefix of string s consisting of decimal */ +/* digits to an integer, which is stored in *pn. Normally returns */ +/* 1. If *s is not a digit, then *pn is not changed, but 1 is */ +/* still returned. If the integer represented is greater than */ +/* 9999, then *pn is not changed and 0 is returned. */ +{ + int n = 0, d; + + d = digtoint(*s); + if (d < 0) return 1; + + do { + if (n >= 1000) return 0; + n = 10 * n + d; + d = digtoint(*++s); + } while (d >= 0); + + *pn = n; + + return 1; +} + + +static void parsearg( + const char *arg, + int *phelp, + int *pversion, + charset *bodychars, + charset *protectchars, + charset *quotechars, + charset *whitechars, + charset *terminalchars, + int *phang, + int *pprefix, + int *prepeat, + int *psuffix, + int *pTab, + int *pwidth, + int *pbody, + int *pcap, + int *pdiv, + int *pErr, + int *pexpel, + int *pfit, + int *pguess, + int *pinvis, + int *pjust, + int *plast, + int *pquote, + int *pReport, + int *ptouch, + errmsg_t errmsg +) +/* Parses the command line argument in *arg, setting the objects pointed to */ +/* by the other pointers as appropriate. *phelp and *pversion are boolean */ +/* flags indicating whether the help and version options were supplied. */ +{ + const char *savearg = arg; + charset *chars, *change; + char oc; + int n; + + *errmsg = '\0'; + + if (*arg == '-') ++arg; + + if (!strcmp(arg, "help")) { + *phelp = 1; + return; + } + + if (!strcmp(arg, "version")) { + *pversion = 1; + return; + } + + chars = *arg == 'B' ? bodychars : + *arg == 'P' ? protectchars : + *arg == 'Q' ? quotechars : + *arg == 'W' ? whitechars : + *arg == 'Z' ? terminalchars : + NULL; + if (chars) { + ++arg; + if (*arg != '=' && *arg != '+' && *arg != '-') goto badarg; + change = parsecharset(arg + 1, errmsg); + if (change) { + if (*arg == '=') csswap(chars,change); + else if (*arg == '+') csadd(chars,change,errmsg); + else /* *arg == '-' */ csremove(chars,change,errmsg); + freecharset(change); + } + return; + } + + if (isdigit(*(unsigned char *)arg)) { + if (!strtoudec(arg, &n)) goto badarg; + if (n <= 8) *pprefix = n; + else *pwidth = n; + } + + for (;;) { + while (isdigit(*(unsigned char *)arg)) ++arg; + oc = *arg; + if (!oc) break; + n = -1; + if (!strtoudec(++arg, &n)) goto badarg; + if ( oc == 'h' || oc == 'p' || oc == 'r' + || oc == 's' || oc == 'T' || oc == 'w') { + if (oc == 'h') *phang = n >= 0 ? n : 1; + else if (oc == 'p') *pprefix = n; + else if (oc == 'r') *prepeat = n >= 0 ? n : 3; + else if (oc == 's') *psuffix = n; + else if (oc == 'T') *pTab = n >= 0 ? n : 8; + else /* oc == 'w' */ *pwidth = n >= 0 ? n : 79; + } + else { + if (n < 0) n = 1; + if (n > 1) goto badarg; + if (oc == 'b') *pbody = n; + else if (oc == 'c') *pcap = n; + else if (oc == 'd') *pdiv = n; + else if (oc == 'E') *pErr = n; + else if (oc == 'e') *pexpel = n; + else if (oc == 'f') *pfit = n; + else if (oc == 'g') *pguess = n; + else if (oc == 'i') *pinvis = n; + else if (oc == 'j') *pjust = n; + else if (oc == 'l') *plast = n; + else if (oc == 'q') *pquote = n; + else if (oc == 'R') *pReport = n; + else if (oc == 't') *ptouch = n; + else goto badarg; + } + } + + return; + +badarg: + + sprintf(errmsg, "Bad argument: %.*s\n", errmsg_size - 16, savearg); + *phelp = 1; +} + + +static char **readlines( + lineprop **pprops, const charset *protectchars, + const charset *quotechars, const charset *whitechars, + int Tab, int invis, int quote, errmsg_t errmsg +) +/* Reads lines from stdin until EOF, or until a line beginning with a */ +/* protective character is encountered (in which case the protective */ +/* character is pushed back onto the input stream), or until a blank */ +/* line is encountered (in which case the newline is pushed back onto */ +/* the input stream). Returns a NULL-terminated array of pointers to */ +/* individual lines, stripped of their newline characters. Every NUL */ +/* character is stripped, and every white character is changed to a */ +/* space unless it is a newline. If quote is 1, vacant lines will be */ +/* supplied as described for the q option in par.doc. *pprops is set */ +/* to an array of lineprop structures, one for each line, each of whose */ +/* flags field is either 0 or L_INSERTED (the other fields are 0). If */ +/* there are no lines, *pprops is set to NULL. The returned array may */ +/* be freed with freelines(). *pprops may be freed with free() if */ +/* it's not NULL. On failure, returns NULL and sets *pprops to NULL. */ +{ + buffer *cbuf = NULL, *lbuf = NULL, *lpbuf = NULL; + int c, empty, blank, firstline, qsonly, oldqsonly = 0, vlnlen, i; + char ch, *ln = NULL, nullchar = '\0', *nullline = NULL, *qpend, + *oldln = NULL, *oldqpend = NULL, *p, *op, *vln = NULL, **lines = NULL; + lineprop vprop = { 0, 0, 0, '\0' }, iprop = { 0, 0, 0, '\0' }; + + /* oldqsonly, oldln, and oldquend don't really need to be initialized. */ + /* They are initialized only to appease compilers that try to be helpful */ + /* by issuing warnings about unitialized automatic variables. */ + + iprop.flags = L_INSERTED; + *errmsg = '\0'; + + *pprops = NULL; + + cbuf = newbuffer(sizeof (char), errmsg); + if (*errmsg) goto rlcleanup; + lbuf = newbuffer(sizeof (char *), errmsg); + if (*errmsg) goto rlcleanup; + lpbuf = newbuffer(sizeof (lineprop), errmsg); + if (*errmsg) goto rlcleanup; + + for (empty = blank = firstline = 1; ; ) { + c = getchar(); + if (c == EOF) break; + *(unsigned char *)&ch = c; + if (ch == '\n') { + if (blank) { + ungetc(c,stdin); + break; + } + additem(cbuf, &nullchar, errmsg); + if (*errmsg) goto rlcleanup; + ln = copyitems(cbuf,errmsg); + if (*errmsg) goto rlcleanup; + if (quote) { + for (qpend = ln; *qpend && csmember(*qpend, quotechars); ++qpend); + for (p = qpend; *p == ' ' || csmember(*p, quotechars); ++p); + qsonly = *p == '\0'; + while (qpend > ln && qpend[-1] == ' ') --qpend; + if (!firstline) { + for (p = ln, op = oldln; + p < qpend && op < oldqpend && *p == *op; + ++p, ++op); + if (!(p == qpend && op == oldqpend)) { + if (!invis && (oldqsonly || qsonly)) { + if (oldqsonly) { + *op = '\0'; + oldqpend = op; + } + if (qsonly) { + *p = '\0'; + qpend = p; + } + } + else { + vlnlen = p - ln; + vln = malloc((vlnlen + 1) * sizeof (char)); + if (!vln) { + strcpy(errmsg,outofmem); + goto rlcleanup; + } + strncpy(vln,ln,vlnlen); + vln[vlnlen] = '\0'; + additem(lbuf, &vln, errmsg); + if (*errmsg) goto rlcleanup; + additem(lpbuf, &iprop, errmsg); + if (*errmsg) goto rlcleanup; + vln = NULL; + } + } + } + oldln = ln; + oldqpend = qpend; + oldqsonly = qsonly; + } + additem(lbuf, &ln, errmsg); + if (*errmsg) goto rlcleanup; + ln = NULL; + additem(lpbuf, &vprop, errmsg); + if (*errmsg) goto rlcleanup; + clearbuffer(cbuf); + empty = blank = 1; + firstline = 0; + } + else { + if (empty) { + if (csmember(ch, protectchars)) { + ungetc(c,stdin); + break; + } + empty = 0; + } + if (!ch) continue; + if (ch == '\t') { + ch = ' '; + for (i = Tab - numitems(cbuf) % Tab; i > 0; --i) { + additem(cbuf, &ch, errmsg); + if (*errmsg) goto rlcleanup; + } + continue; + } + if (csmember(ch, whitechars)) ch = ' '; + else blank = 0; + additem(cbuf, &ch, errmsg); + if (*errmsg) goto rlcleanup; + } + } + + if (!blank) { + additem(cbuf, &nullchar, errmsg); + if (*errmsg) goto rlcleanup; + ln = copyitems(cbuf,errmsg); + if (*errmsg) goto rlcleanup; + additem(lbuf, &ln, errmsg); + if (*errmsg) goto rlcleanup; + ln = NULL; + additem(lpbuf, &vprop, errmsg); + if (*errmsg) goto rlcleanup; + } + + additem(lbuf, &nullline, errmsg); + if (*errmsg) goto rlcleanup; + *pprops = copyitems(lpbuf,errmsg); + if (*errmsg) goto rlcleanup; + lines = copyitems(lbuf,errmsg); + +rlcleanup: + + if (cbuf) freebuffer(cbuf); + if (lpbuf) freebuffer(lpbuf); + if (lbuf) { + if (!lines) + for (;;) { + lines = nextitem(lbuf); + if (!lines) break; + free(*lines); + } + freebuffer(lbuf); + } + if (ln) free(ln); + if (vln) free(vln); + + return lines; +} + + +static void compresuflen( + const char * const *lines, const char * const *endline, + const charset *bodychars, int body, int pre, int suf, int *ppre, int *psuf +) +/* lines is an array of strings, up to but not including endline. */ +/* Writes into *ppre and *psuf the comprelen and comsuflen of the */ +/* lines in lines. Assumes that they have already been determined */ +/* to be at least pre and suf. endline must not equal lines. */ +{ + const char *start, *end, *knownstart, * const *line, *p1, *p2, *knownend, + *knownstart2; + + start = *lines; + end = knownstart = start + pre; + if (body) + while (*end) ++end; + else + while (*end && !csmember(*end, bodychars)) ++end; + for (line = lines + 1; line < endline; ++line) { + for (p1 = knownstart, p2 = *line + pre; + p1 < end && *p1 == *p2; + ++p1, ++p2); + end = p1; + } + if (body) + for (p1 = end; p1 > knownstart; ) + if (*--p1 != ' ') { + if (csmember(*p1, bodychars)) + end = p1; + else + break; + } + *ppre = end - start; + + knownstart = *lines + *ppre; + for (end = knownstart; *end; ++end); + knownend = end - suf; + if (body) + start = knownstart; + else + for (start = knownend; + start > knownstart && !csmember(start[-1], bodychars); + --start); + for (line = lines + 1; line < endline; ++line) { + knownstart2 = *line + *ppre; + for (p2 = knownstart2; *p2; ++p2); + for (p1 = knownend, p2 -= suf; + p1 > start && p2 > knownstart2 && p1[-1] == p2[-1]; + --p1, --p2); + start = p1; + } + if (body) { + for (p1 = start; + start < knownend && (*start == ' ' || csmember(*start, bodychars)); + ++start); + if (start > p1 && start[-1] == ' ') --start; + } + else + while (end - start >= 2 && *start == ' ' && start[1] == ' ') ++start; + *psuf = end - start; +} + + +static void delimit( + const char * const *lines, const char * const *endline, + const charset *bodychars, int repeat, int body, int div, + int pre, int suf, lineprop *props +) +/* lines is an array of strings, up to but not including */ +/* endline. Sets fields in each lineprop in the parallel */ +/* array props as appropriate, except for the L_SUPERF flag, */ +/* which is never set. It is assumed that the comprelen */ +/* and comsuflen of the lines in lines have already been */ +/* determined to be at least pre and suf, respectively. */ +{ + const char * const *line, *end, *p, * const *nextline; + char rc; + lineprop *prop, *nextprop; + int anybodiless = 0, status; + + if (endline == lines) return; + + if (endline == lines + 1) { + props->flags |= L_FIRST; + props->p = pre, props->s = suf; + return; + } + + compresuflen(lines, endline, bodychars, body, pre, suf, &pre, &suf); + + line = lines, prop = props; + do { + prop->flags |= L_BODILESS; + prop->p = pre, prop->s = suf; + for (end = *line; *end; ++end); + end -= suf; + p = *line + pre; + rc = p < end ? *p : ' '; + if (rc != ' ' && (isinserted(prop) || !repeat || end - p < repeat)) + prop->flags &= ~L_BODILESS; + else + while (p < end) { + if (*p != rc) { + prop->flags &= ~L_BODILESS; + break; + } + ++p; + } + if (isbodiless(prop)) { + anybodiless = 1; + prop->rc = rc; + } + ++line, ++prop; + } while (line < endline); + + if (anybodiless) { + line = lines, prop = props; + do { + if (isbodiless(prop)) { + ++line, ++prop; + continue; + } + + for (nextline = line + 1, nextprop = prop + 1; + nextline < endline && !isbodiless(nextprop); + ++nextline, ++nextprop); + + delimit(line,nextline,bodychars,repeat,body,div,pre,suf,prop); + + line = nextline, prop = nextprop; + } while (line < endline); + + return; + } + + if (!div) { + props->flags |= L_FIRST; + return; + } + + line = lines, prop = props; + status = ((*lines)[pre] == ' '); + do { + if (((*line)[pre] == ' ') == status) + prop->flags |= L_FIRST; + ++line, ++prop; + } while (line < endline); +} + + +static void marksuperf( + const char * const * lines, const char * const * endline, lineprop *props +) +/* lines points to the first line of a segment, and endline to one */ +/* line beyond the last line in the segment. Sets L_SUPERF bits in */ +/* the flags fields of the props array whenever the corresponding */ +/* line is superfluous. L_BODILESS bits must already be set. */ +{ + const char * const *line, *p; + lineprop *prop, *mprop, dummy; + int inbody, num, mnum; + + for (line = lines, prop = props; line < endline; ++line, ++prop) + if (isvacant(prop)) + prop->flags |= L_SUPERF; + + inbody = mnum = 0; + mprop = &dummy; + for (line = lines, prop = props; line < endline; ++line, ++prop) + if (isvacant(prop)) { + for (num = 0, p = *line; *p; ++p) + if (*p != ' ') ++num; + if (inbody || num < mnum) + mnum = num, mprop = prop; + inbody = 0; + } else { + if (!inbody) mprop->flags &= ~L_SUPERF; + inbody = 1; + } +} + + +static void setaffixes( + const char * const *inlines, const char * const *endline, + const lineprop *props, const charset *bodychars, + const charset *quotechars, int hang, int body, int quote, + int *pafp, int *pfs, int *pprefix, int *psuffix +) +/* inlines is an array of strings, up to but not including endline, */ +/* representing an IP. inlines and endline must not be equal. props */ +/* is the the parallel array of lineprop structures. *pafp and *pfs */ +/* are set to the augmented fallback prelen and fallback suflen of the */ +/* IP. If either of *pprefix, *psuffix is less than 0, it is set to a */ +/* default value as specified in "par.doc". */ +{ + int numin, pre, suf; + const char *p; + + numin = endline - inlines; + + if ((*pprefix < 0 || *psuffix < 0) && numin > hang + 1) + compresuflen(inlines + hang, endline, bodychars, body, 0, 0, &pre, &suf); + + p = *inlines + props->p; + if (numin == 1 && quote) + while (*p && csmember (*p, quotechars)) + ++p; + *pafp = p - *inlines; + *pfs = props->s; + + if (*pprefix < 0) + *pprefix = numin > hang + 1 ? pre : *pafp; + + if (*psuffix < 0) + *psuffix = numin > hang + 1 ? suf : *pfs; +} + + +static void freelines(char **lines) +/* Frees the elements of lines, and lines itself. */ +/* lines is a NULL-terminated array of strings. */ +{ + char **line; + + for (line = lines; *line; ++line) + free(*line); + + free(lines); +} + + +int main(int argc, const char * const *argv) +{ + int help = 0, version = 0, hang = 0, prefix = -1, repeat = 0, suffix = -1, + Tab = 1, width = 72, body = 0, cap = 0, div = 0, Err = 0, expel = 0, + fit = 0, guess = 0, invis = 0, just = 0, last = 0, quote = 0, Report = 0, + touch = -1; + int prefixbak, suffixbak, c, sawnonblank, oweblank, n, i, afp, fs; + charset *bodychars = NULL, *protectchars = NULL, *quotechars = NULL, + *whitechars = NULL, *terminalchars = NULL; + char *parinit = NULL, *arg, **inlines = NULL, **endline, **firstline, *end, + **nextline, **outlines = NULL, **line, ch; + const char *env, * const init_whitechars = " \f\n\r\t\v"; + errmsg_t errmsg = { '\0' }; + lineprop *props = NULL, *firstprop, *nextprop; + FILE *errout; + +/* Set the current locale from the environment: */ + + setlocale(LC_ALL,""); + +/* Process environment variables: */ + + env = getenv("PARBODY"); + if (!env) env = ""; + bodychars = parsecharset(env,errmsg); + if (*errmsg) { + help = 1; + goto parcleanup; + } + + env = getenv("PARPROTECT"); + if (!env) env = ""; + protectchars = parsecharset(env,errmsg); + if (*errmsg) { + help = 1; + goto parcleanup; + } + + env = getenv("PARQUOTE"); + if (!env) env = "> "; + quotechars = parsecharset(env,errmsg); + if (*errmsg) { + help = 1; + goto parcleanup; + } + + whitechars = parsecharset(init_whitechars, errmsg); + if (*errmsg) goto parcleanup; + + terminalchars = parsecharset(".?!:", errmsg); + if (*errmsg) goto parcleanup; + + env = getenv("PARINIT"); + if (env) { + parinit = malloc((strlen(env) + 1) * sizeof (char)); + if (!parinit) { + strcpy(errmsg,outofmem); + goto parcleanup; + } + strcpy(parinit,env); + arg = strtok(parinit, init_whitechars); + while (arg) { + parsearg(arg, &help, &version, + bodychars, protectchars, quotechars, whitechars, terminalchars, + &hang, &prefix, &repeat, &suffix, &Tab, &width, + &body, &cap, &div, &Err, &expel, &fit, &guess, + &invis, &just, &last, "e, &Report, &touch, errmsg ); + if (*errmsg || help || version) goto parcleanup; + arg = strtok(NULL, init_whitechars); + } + free(parinit); + parinit = NULL; + } + +/* Process command line arguments: */ + + while (*++argv) { + parsearg(*argv, &help, &version, + bodychars, protectchars, quotechars, whitechars, terminalchars, + &hang, &prefix, &repeat, &suffix, &Tab, &width, + &body, &cap, &div, &Err, &expel, &fit, &guess, + &invis, &just, &last, "e, &Report, &touch, errmsg ); + if (*errmsg || help || version) goto parcleanup; + } + + if (Tab == 0) { + strcpy(errmsg, "<Tab> must not be 0.\n"); + goto parcleanup; + } + + if (touch < 0) touch = fit || last; + prefixbak = prefix; + suffixbak = suffix; + +/* Main loop: */ + + for (sawnonblank = oweblank = 0; ; ) { + for (;;) { + c = getchar(); + if (c == EOF) break; + *(unsigned char *)&ch = c; + if (expel && ch == '\n') { + oweblank = sawnonblank; + continue; + } + if (csmember(ch, protectchars)) { + sawnonblank = 1; + if (oweblank) { + puts(""); + oweblank = 0; + } + while (ch != '\n') { + putchar(c); + c = getchar(); + if (c == EOF) break; + *(unsigned char *)&ch = c; + } + } + if (ch != '\n') break; /* subsumes the case that c == EOF */ + putchar(c); + } + if (c == EOF) break; + ungetc(c,stdin); + + inlines = + readlines(&props, protectchars, quotechars, whitechars, + Tab, invis, quote, errmsg); + if (*errmsg) goto parcleanup; + + for (endline = inlines; *endline; ++endline); + if (endline == inlines) { + free(inlines); + inlines = NULL; + continue; + } + + sawnonblank = 1; + if (oweblank) { + puts(""); + oweblank = 0; + } + + delimit((const char * const *) inlines, + (const char * const *) endline, + bodychars, repeat, body, div, 0, 0, props); + + if (expel) + marksuperf((const char * const *) inlines, + (const char * const *) endline, props); + + firstline = inlines, firstprop = props; + do { + if (isbodiless(firstprop)) { + if ( !(invis && isinserted(firstprop)) + && !(expel && issuperf(firstprop))) { + for (end = *firstline; *end; ++end); + if (!repeat || (firstprop->rc == ' ' && !firstprop->s)) { + while (end > *firstline && end[-1] == ' ') --end; + *end = '\0'; + puts(*firstline); + } + else { + n = width - firstprop->p - firstprop->s; + if (n < 0) { + sprintf(errmsg,impossibility,5); + goto parcleanup; + } + printf("%.*s", firstprop->p, *firstline); + for (i = n; i; --i) + putchar(*(unsigned char *)&firstprop->rc); + puts(end - firstprop->s); + } + } + ++firstline, ++firstprop; + continue; + } + + for (nextline = firstline + 1, nextprop = firstprop + 1; + nextline < endline && !isbodiless(nextprop) && !isfirst(nextprop); + ++nextline, ++nextprop); + + prefix = prefixbak, suffix = suffixbak; + setaffixes((const char * const *) firstline, + (const char * const *) nextline, firstprop, bodychars, + quotechars, hang, body, quote, &afp, &fs, &prefix, &suffix); + if (width <= prefix + suffix) { + sprintf(errmsg, + "<width> (%d) <= <prefix> (%d) + <suffix> (%d)\n", + width, prefix, suffix); + goto parcleanup; + } + + outlines = + reformat((const char * const *) firstline, + (const char * const *) nextline, + afp, fs, hang, prefix, suffix, width, cap, + fit, guess, just, last, Report, touch, + (const charset *) terminalchars, errmsg); + if (*errmsg) goto parcleanup; + + for (line = outlines; *line; ++line) + puts(*line); + + freelines(outlines); + outlines = NULL; + + firstline = nextline, firstprop = nextprop; + } while (firstline < endline); + + freelines(inlines); + inlines = NULL; + + free(props); + props = NULL; + } + +parcleanup: + + if (bodychars) freecharset(bodychars); + if (protectchars) freecharset(protectchars); + if (quotechars) freecharset(quotechars); + if (parinit) free(parinit); + if (inlines) freelines(inlines); + if (props) free(props); + if (outlines) freelines(outlines); + + errout = Err ? stderr : stdout; + if (*errmsg) fprintf(errout, "par error:\n%.*s", errmsg_size, errmsg); + if (version) fputs("par 1.53.0\n",errout); + if (help) fputs(usagemsg,errout); + + return *errmsg ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/par/Par-1.53.0/par.doc b/par/Par-1.53.0/par.doc new file mode 100644 index 0000000..571b0a1 --- /dev/null +++ b/par/Par-1.53.0/par.doc @@ -0,0 +1,1394 @@ +par.doc +last touched in Par 1.53.0 +last meaningful change in Par 1.53.0 +Copyright 1993, 1996, 2000, 2001, 2020 Adam M. Costello + + + Par 1.53.0 is a package containing: + + + This doc file. + + A man page based on this doc file. + + The ANSI C source for the filter "par". + + Associated scripts and text. + + +Contents + + Contents + File List + Rights and Responsibilities + Compilation + Synopsis + Description + *Quick Start + Terminology + Options + Environment + Details + Diagnostics + Examples + Limitations + Apologies + Bugs + + +File List + + Par 1.53.0 consists of the following files: + + buffer.c 1.53.0 + buffer.h 1.53.0 + charset.c 1.53.0 + charset.h 1.53.0 + errmsg.c 1.53.0 + errmsg.h 1.53.0 + par.1 1.53.0 + par.c 1.53.0 + par.doc 1.53.0 + protoMakefile 1.53.0 + reformat.c 1.53.0 + reformat.h 1.53.0 + releasenotes 1.53.0 + test-par 1.53.0 + + The version number for each file is defined to be the last version + of Par that touched it. Each file is a text file which identifies + itself on the first or second line, and identifies the version of + Par that last touched it on the next line, so you can always tell + which file is which, even if the files have been renamed. + + The file "par.1" is a man page for the filter par (not to be + confused with the package Par, which contains the source code for + par). "par.1" is based on this doc file, and conveys much (not + all) of the same information, but "par.doc" is the definitive + documentation for both par and Par. + + +Rights and Responsibilities + + The files listed in the Files List section above are Copyright + (various years, see the copyright notice in each file) + Adam M. Costello (henceforth "I", "me"). + + I grant everyone ("you") permission to do whatever you like with + these files, provided that if you modify them you take reasonable + steps to avoid confusing or misleading people about who wrote the + modified files (both you and I) or what version they are. All my + versions of Par will have version numbers consisting of only digits + and periods, so you could distinguish your versions by including + some other kind of character. + + I encourage you to send me your suggestions for improvements. See + the Bugs section for my address. + + Though I have tried to make sure that Par is free of bugs, I make no + guarantees about its soundness. Therefore, I am not responsible for + any damage resulting from the use of these files. + + You may alternatively use these files under the MIT License: + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without + restriction, including without limitation the rights to use, + copy, modify, merge, publish, distribute, sublicense, and/or + sell copies of the Software, and to permit persons to whom + the Software is furnished to do so, subject to the following + conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY + KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE + AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + +Compilation + + To compile par, you need an ANSI C compiler. Follow the + instructions in the comments in protoMakefile. + + If your compiler generates any warnings that you think are + legitimate, please tell me about them (see the Bugs section). + + Note that all variables in par are either constant or automatic + (or both), which means that par can be made reentrant (if your + compiler supports it). Given the right operating system, it should + be possible for several par processes to share the same code space + and the same data space (but not the same stack, of course) in + memory. + + +Synopsis + par [help] [version] [B<op><set>] [P<op><set>] [Q<op><set>] + [W<op><set>] [Z<op><set>] [h[<hang>]] [p[<prefix>]] + [r[<repeat>]] [s[<suffix>]] [T[<Tab>]] [w[<width>]] [b[<body>]] + [c[<cap>]] [d[<div>]] [E[<Err>]] [e[<expel>]] [f[<fit>]] + [g[<guess>]] [i[<invis>]] [j[<just>]] [l[<last>]] [q[<quote>]] + [R[<Report>]] [t[<touch>]] + + Things enclosed in [square brackets] are optional. Things enclosed + in <angle brackets> are parameters. + + +Description + + par is a filter which copies its input to its output, changing all + white characters (except newlines) to spaces, and reformatting + each paragraph. Paragraphs are separated by protected, blank, and + bodiless lines (see the Terminology section for definitions), and + optionally delimited by indentation (see the d option in the Options + section). + + Each output paragraph is generated from the corresponding input + paragraph as follows: + + 1) An optional prefix and/or suffix is removed from each input + line. + 2) The remainder is divided into words (separated by spaces). + 3) The words are joined into lines to make an eye-pleasing + paragraph. + 4) The prefixes and suffixes are reattached. + + If there are suffixes, spaces are inserted before them so that they + all end in the same column. + + +Quick Start + + par is necessarily complex. For those who wish to use it + immediately and understand it later, assign to the PARINIT + environment variable the following value: + + rTbgqR B=.,?'_A_a_@ Q=_s>| + + The spaces, question mark, apostrophe, greater-than sign, and + vertical bar will probably have to be escaped or quoted to prevent + your shell from interpreting them. + + The documentation, though precise, is unfortunately not well-written + for the end-user. Your best bet is probably to read quickly the + Description, Terminology, Options, and Environment sections, then + read carefully the Examples section, referring back to the Options + and Terminology sections as needed. + + For the "power user", a full understanding of par will require + multiple readings of the Terminology, Options, Details, and Examples + sections. + + +Terminology + + Miscellaneous terms: + + charset syntax + A way of representing a set of characters as a string. + The set includes exactly those characters which appear in + the string, except that the underscore (_) is an escape + character. Whenever it appears, it must begin one of the + following escape sequences: + + __ = an underscore + _s = a space + _S = all space characters + _b = a backslash (\) + _q = a single quote (') + _Q = a double quote (") + _A = all upper case letters + _a = all lower case letters + _@ = all neither-case letters + _0 = all decimal digits + _xhh = the character represented by the two hexadecimal + digits hh (which may be upper or lower case) + + The NUL character must not appear in the string but it may + be included in the set with the _x00 sequence. + + The exact meanings of _S, _A, _a, _@, and _0 are + locale-dependent. (Actually, all locales are supposed to + agree on _0, but not on the others.) In the default "C" + locale: _S includes only space, formfeed, newline, carriage + return, tab, and vertical tab; _A includes only A through Z; + _a includes only a through z; _@ includes nothing; and _0 + includes only 0 through 9. + + error + A condition which causes par to abort. See the Diagnostics + section. + + IP Input paragraph. + + OP Output paragraph. + + parameter + A symbol which may take on unsigned integral values. There + are several parameters whose values affect the behavior of + par. Parameters can be assigned values using command line + options. + + + Types of characters: + + alphanumeric character + An alphabetic character or decimal digit, _A_a_@_0 in + charset syntax (see above). + + body character + A member of the set of characters defined by the PARBODY + environment variable (see the Environment section) and/or + the B option (see the Options section). + + protective character + A member of the set of characters defined by the PARPROTECT + environment variable and/or the P option. + + quote character + A member of the set of characters defined by the PARQUOTE + environment variable and/or the Q option. + + terminal character + A member of the set of characters defined by the Z option. + Initially, before any Z options have been processed, the + set contains period, question mark, exclamation point, and + colon. + + white character + A member of the set of characters defined by the W option. + Initially, before any W options have been processed, the set + contains space, formfeed, newline, carriage return, tab, and + vertical tab. + + Functions: + + comprelen + Given a non-empty sequence <S> of lines, let <c> be their + longest common prefix. If the parameter <body> is 0, place + a divider just after the leading non-body characters in <c> + (at the beginning if there are none). If <body> is 1, place + the divider just after the last non-space non-body character + in <c> (at the beginning if there is none), then advance + the divider over any immediately following spaces. The + comprelen of <S> is the number of characters preceding the + divider. + + comsuflen + Given a non-empty sequence <S> of lines, let <p> be the + comprelen of <S>. Let <T> be the set of lines which results + from stripping the first <p> characters from each line in + <S>. Let <c> be the longest common suffix of the lines + in <T>. If <body> is 0, place a divider just before the + trailing non-body characters in <c> (at the end if there are + none), then advance the divider over all but the last of any + immediately following spaces. If <body> is 1, place the + divider just before the first non-space non-body character, + then back up the divider over one immediately preceding + space if there is one. The comsuflen of <S> is the number + of characters following the divider. + + fallback prelen (suflen) + The fallback prelen (suflen) of an IP is: the comprelen + (comsuflen) of the IP, if the IP contains at least two + lines; otherwise, the comprelen (comsuflen) of the block + containing the IP, if the block contains at least two + lines; otherwise, the length of the longer of the prefixes + (suffixes) of the bodiless lines just above and below the + block, if the segment containing the block has any bodiless + lines; otherwise, 0. (See below for the definitions of + block, segment, and bodiless line.) + + augmented fallback prelen + Let <fp> be the fallback prelen of an IP. If the IP + contains more than one line, or if <quote> is 0, then + the augmented fallback prelen of the IP is simply <fp>. + Otherwise, it is <fp> plus the number of quote characters + immediately following the first <fp> characters of the line. + + quoteprefix + The quoteprefix of a line is the longest string of quote + characters appearing at the beginning of the line, after + this string has been stripped of any trailing spaces. + + Types of lines: + + blank line + An empty line, or a line whose first character is not + protective and which contains only spaces. + + protected line + An input line whose first character is protective. + + bodiless line + A line which is order <k> bodiless for some <k>. + + order <k> bodiless line + There is no such thing as an order 0 bodiless line. Suppose + <S> is a a contiguous subsequence of a segment (see below) + containing at least two lines, containing no order <k>-1 + bodiless lines, bounded above and below by order <k>-1 + bodiless lines and/or the beginning/end of the segment. + Let <p> and <s> be the comprelen and comsuflen of <S>. + Any member of <S> which, if stripped of its first <p> and + last <s> characters, would be blank (or, if the line was not + inserted by the <quote> feature and the parameter <repeat> + is non-zero, would consist of the same character repeated + at least <repeat> times), is order <k> bodiless. The first + <p> characters of the bodiless line comprise its prefix; + the last <s> characters comprise its suffix. The character + which repeats in the middle is called its repeat character. + If the middle is empty, the space is taken to be its repeat + character. + + vacant line + A bodiless line whose repeat character is the space. + + superfluous line + Only blank and vacant lines may be superfluous. If + contiguous vacant lines lie at the beginning or end of + a segment, they are all superfluous. But if they lie + between two non-vacant lines within a segment, then all are + superfluous except one--the one which contains the fewest + non-spaces. In case of a tie, the first of the tied lines + is chosen. Similarly, if contiguous blank lines lie outside + of any segments at the beginning or end of the input, they + are all superfluous. But if they lie between two segments + and/or protected lines, then all are superfluous except the + first. + + Groups of lines: + + segment + A contiguous sequence of input lines containing no protected + or blank lines, bounded above and below by protected lines, + blank lines, and/or the beginning/end of the input. + + block + A contiguous subsequence of a segment containing no bodiless + lines, bounded above and below by bodiless lines and/or the + beginning/end of the segment. + + Types of words: + + capitalized word + If the parameter <cap> is 0, a capitalized word is one which + contains at least one alphanumeric character, whose first + alphanumeric character is not a lower case letter. If <cap> + is 1, every word is considered a capitalized word. (See the + c option in the Options section.) + + curious word + A word which contains a terminal character <c> such that + there are no alphanumeric characters in the word after <c>, + but there is at least one alphanumeric character in the word + before <c>. + + +Options + + Any command line argument may begin with one minus sign (-) which + is ignored. Generally, more than one option may appear in a single + command line argument, but there are exceptions: The help, version, + B, P, and Q options must have whole arguments all to themselves. + + help Causes all remaining arguments to be ignored. No input + is read. A usage message is printed on the output + briefly describing the options used by par. + + version Causes all remaining arguments to be ignored. No input + is read. "par " followed by its version number is + printed on the output. + + B<op><set> <op> is a single character, either an equal sign (=), + a plus sign (+), or a minus sign (-), and <set> is a + string using charset syntax. If <op> is an equal sign, + the set of body characters is set to the character set + defined by <set>. If <op> is a plus/minus sign, the + characters in the set defined by <set> are added/removed + to/from the existing set of body characters defined by + the PARBODY environment variable and any previous B + options. It is okay to add characters that are already + in the set or to remove characters that are not in the + set. + + P<op><set> Just like the B option, except that it applies to the + set of protective characters. + + Q<op><set> Just like the B option, except that it applies to the + set of quote characters. + + W<op><set> Just like the B option, except that it applies to the + set of white characters. + + Z<op><set> Just like the B option, except that it applies to the + set of terminal characters. + + + All remaining options are used to set values of parameters. Values + set by command line options hold for all paragraphs. Unset + parameters are given default values. Any parameters whose default + values depend on the IP (namely <prefix> and <suffix>), if left + unset, are recomputed separately for each paragraph. + + The approximate role of each parameter is described here. See the + Details section for the rest of the story. + + The first six parameters, <hang>, <prefix>, <repeat>, <suffix>, + <Tab>, and <width>, may be set to any unsigned decimal integer less + than 10000. + + h[<hang>] Mainly affects the default values of <prefix> and + <suffix>. Defaults to 0. If the h option is given + without a number, the value 1 is inferred. (See also + the p and s options.) + + p[<prefix>] The first <prefix> characters of each line of the OP + are copied from the first <prefix> characters of the + corresponding line of the IP. If there are more than + <hang>+1 lines in the IP, the default value is the + comprelen of all the lines in the IP except the first + <hang> of them. Otherwise, the default value is the + augmented fallback prelen of the IP. If the p option is + given without a number, <prefix> is unset, even if it + had been set earlier. (See also the h and q options.) + + r[<repeat>] If <repeat> is non-zero, bodiless lines have the number + of instances of their repeat characters increased or + decreased until the length of the line is <width>. + The exact value of <repeat> affects the definition of + bodiless line. Defaults to 0. If the r option is given + without a number, the value 3 is inferred. (See also + the w option.) + + s[<suffix>] The last <suffix> characters of each line of the OP + are copied from the last <suffix> characters of the + corresponding line of the IP. If there are more than + <hang>+1 lines in the IP, the default value is the + comsuflen of all the lines in the IP except the first + <hang> of them. Otherwise, the default value is the + fallback suflen of the IP. If the s option is given + without a number, <suffix> is unset, even if it had been + set earlier. (See also the h option.) + + T[<Tab>] Tab characters in the input are expanded to spaces, + assuming tab stops every <Tab> columns. Must not be + 0. Defaults to 1. If the T option is given without a + number, the value 8 is inferred. + + w[<width>] No line in the OP may contain more than <width> + characters, not including the trailing newlines. + Defaults to 72. If the w option is given without a + number, the value 79 is inferred. + + The remaining thirteen parameters, <body>, <cap>, <div>, <Err>, + <expel>, <fit>, <guess>, <invis>, <just>, <last>, <quote>, <Report>, + and <touch>, may be set to either 0 or 1. If the number is absent + in the option, the value 1 is inferred. + + b[<body>] If <body> is 1, prefixes may not contain any trailing + body characters, and suffixes may not contain any + leading body characters. (Actually, the situation + is complicated by space characters. See comprelen + and comsuflen in the Terminology section.) If <body> + is 0, prefixes and suffixes may not contain any body + characters at all. Defaults to 0. + + c[<cap>] If <cap> is 1, all words are considered capitalized. + This currently affects only the application of the g + option. Defaults to 0. + + d[<div>] If <div> is 0, each block becomes an IP. If <div> is 1, + each block is subdivided into IPs as follows: Let <p> + be the comprelen of the block. Let a line's status be + 1 if its (<p>+1)st character is a space, 0 otherwise. + Every line in the block whose status is the same as the + status of the first line will begin a new paragraph. + Defaults to 0. + + E[<Err>] If <Err> is 1, messages to the user (caused by the help + and version options, or by errors) are sent to the error + stream instead of the output stream. Defaults to 0. + + e[<expel>] If <expel> is 1, superfluous lines withheld from the + output. Defaults to 0. + + f[<fit>] If <fit> is 1 and <just> is 0, par tries to make the + lines in the OP as nearly the same length as possible, + even if it means making the OP narrower. Defaults to 0. + (See also the j option.) + + g[<guess>] If <guess> is 1, then when par is choosing line breaks, + whenever it encounters a curious word followed by a + capitalized word, it takes one of two special actions. + If the two words are separated by a single space in + the input, they will be merged into one word with an + embedded non-breaking space. If the two words are + separated by more than one space, or by a line break, + par will insure that they are separated by two spaces, + or by a line break, in the output. Defaults to 0. + + i[<invis>] If <invis> is 1, then vacant lines inserted because + <quote> is 1 are invisible; that is, they are not + output. If <quote> is 0, <invis> has no effect. + Defaults to 0. (See also the q option.) + + j[<just>] If <just> is 1, par justifies the OP, inserting spaces + between words so that all lines in the OP have length + <width> (except the last, if <last> is 0). Defaults to + 0. (See also the w, l, and f options.) + + l[<last>] If <last> is 1, par tries to make the last line of the + OP about the same length as the others. Defaults to 0. + + q[<quote>] If <quote> is 1, then before each segment is scanned + for bodiless lines, par supplies vacant lines between + different quotation nesting levels as follows: For each + pair of adjacent lines in the segment (scanned from the + top down) which have different quoteprefixes, one of + two actions is taken. If <invis> is 0, and either line + consists entirely of quote characters and spaces (or is + empty), that line is truncated to the longest common + prefix of the two lines (both are truncated if both + qualify). Otherwise, a line consisting of the longest + common prefix of the two lines is inserted between them. + <quote> also affects the default value of <prefix>. + Defaults to 0. (See also the p and i options.) + + R[<Report>] If <Report> is 1, it is considered an error for an input + word to contain more than <L> = (<width> - <prefix> - + <suffix>) characters. Otherwise, such words are chopped + after each <L>th character into shorter words. Defaults + to 0. + + t[<touch>] Has no effect if <suffix> is 0 or <just> is 1. + Otherwise, if <touch> is 0, all lines in the OP have + length <width>. If <touch> is 1, the length of the + lines is decreased until the suffixes touch the body of + the OP. Defaults to the logical OR of <fit> and <last>. + (See also the s, j, w, f, and l options.) + + If an argument begins with a number, that number is assumed + to belong to a p option if it is 8 or less, and to a w option + otherwise. + + If the value of any parameter is set more than once, the last value + is used. When unset parameters are assigned default values, <hang> + and <quote> are assigned before <prefix>, and <fit> and <last> are + assigned before <touch> (because of the dependencies). + + It is an error if <width> <= <prefix> + <suffix>. + + +Environment + + PARBODY Determines the initial set of body characters (which are + used for determining comprelens and comsuflens), using + charset syntax. If PARBODY is not set, the set of body + characters is initially empty. + + PARINIT If set, par will read command line arguments from + PARINIT before it reads them from the command line. + Within the value of PARINIT, arguments are separated by + the initial set of white characters. + + PARPROTECT Determines the set of protective characters, using charset + syntax. If PARPROTECT is not set, the set of protective + characters is initially empty. + + PARQUOTE Determines the set of quote characters, using charset + syntax. If PARQUOTE is not set, the set of quote characters + initially contains only the greater-than sign (>) and the + space. + + If a NUL character appears in the value of an environment variable, it + and the rest of the string will not be seen by par. + + Note that the PARINIT variable, together with the B, P, and Q + options, renders the other environment variables unnecessary. They + are included for backward compatibility. + +Details + + Lines are terminated by newline characters, but the newlines are not + considered to be included in the lines. If the last character of + the input is a non-newline, a newline will be inferred immediately + after it (but if the input is empty, no newline will be inferred; + the number of input lines will be 0). Thus, the input can always be + viewed as a sequence of lines. + + Protected lines are copied unchanged from the input to the output. + All other input lines, as they are read, have any NUL characters + removed, and every white character (except newlines) turned into a + space. Actually, each tab character is turned into <Tab> - (<n> % + <Tab>) spaces, where <n> is the number of characters preceding the + tab character on the line (evaluated after earlier tab characters + have been expanded). + + Blank lines in the input are transformed into empty lines in the + output. + + If <repeat> is 0, all bodiless lines are vacant, and they are all + simply stripped of trailing spaces before being output. If <repeat> + is not 0, only vacant lines whose suffixes have length 0 are treated + that way; other bodiless lines have the number of instances of their + repeat characters increased or decreased until the length of the + line is <width>. + + If <expel> is 1, superfluous lines are not output. If <quote> and + <invis> are both 1, there may be invisible lines; they are not + output. + + The input is divided into segments, which are divided into blocks, + which are divided into IPs. The exact process depends on the values + of <quote> and <div> (see q and d in the Options section). The + remainder of this section describes the process which is applied + independently to each IP to construct the corresponding OP. + + After the values of the parameters are determined (see the Options + section), the first <prefix> characters and the last <suffix> + characters of each input line are removed and remembered. It is + an error for any line to contain fewer than <prefix> + <suffix> + characters. + + The remaining text is treated as a sequence of characters, not + lines. The text is broken into words, which are separated by + spaces. That is, a word is a maximal sub-sequence of non-spaces. + If <guess> is 1, some words might be merged (see g in the Options + section). The first word includes any spaces that precede it on the + same line. + + Let <L> = <width> - <prefix> - <suffix>. + + If <Report> is 0, some words may get chopped up at this point (see R + in the Options section). + + The words are reassembled, preserving their order, into lines. If + <just> is 0, adjacent words within a line are separated by a single + space (or sometimes two if <guess> is 1), and line breaks are chosen + so that the paragraph satisfies the following properties: + + 1) No line contains more than <L> characters. + + 2) If <fit> is 1, the difference between the lengths of the + shortest and longest lines is as small as possible. + + 3) The shortest line is as long as possible, subject to + properties 1 and 2. + + 4) Let <target> be <L> if <fit> is 0, or the length of the + longest line if <fit> is 1. The sum of the squares of the + differences between <target> and the lengths of the lines is + as small as possible, subject to properties 1, 2, and 3. + + If <last> is 0, the last line does not count as a line for the + purposes of properties 2, 3, and 4 above. + + If all the words fit on a single line, the properties as worded + above don't make much sense. In that case, no line breaks are + inserted. + + If <just> is 1, adjacent words within a line are separated by one + space (or sometimes two if <guess> is 1) plus zero or more extra + spaces. The value of <fit> is disregarded, and line breaks are + chosen so that the paragraph satisfies the following properties: + + 1) Every line contains exactly <L> characters. + + 2) The largest inter-word gap is as small as possible, subject + to property 1. (An inter-word gap consists only of the + extra spaces, not the regular spaces.) + + 3) The sum of the squares of the lengths of the inter-word gaps + is as small as possible, subject to properties 1 and 2. + + If <last> is 0, the last line does not count as a line for the + purposes of property 1, and it does not require or contain any + extra spaces. + + Extra spaces are distributed as uniformly as possible among the + inter-word gaps in each line. + + In a justified paragraph, every line must contain at least two + words, but that's not always possible to accomplish. If the + paragraph cannot be justified, it is considered an error. + + If the number of lines in the resulting paragraph is less than + <hang>, empty lines are added at the end to bring the number of + lines up to <hang>. + + If <just> is 0 and <touch> is 1, <L> is changed to be the length of + the longest line. + + If <suffix> is not 0, each line is padded at the end with spaces to + bring its length up to <L>. + + To each line is prepended <prefix> characters. Let <n> be the + number of lines in the IP, let <afp> be the augmented fallback + prelen of the IP, and let <fs> be the fallback suflen of the IP. + The characters which are prepended to the <i>th line are chosen as + follows: + + 1) If <i> <= <n>, the characters are copied from the ones that were + removed from the beginning of the <n>th input line. + + 2) If <i> > <n> > <hang>, the characters are copied from the ones + that were removed from the beginning of the last input line. + + 3) If <i> > <n> and <n> <= <hang>, the first min(<afp>,<prefix>) + of the characters are copied from the ones that were removed + from the beginning of the last input line, and the rest are all + spaces. + + Then to each line is appended <suffix> characters. The characters + which are appended to the <i>th line are chosen as follows: + + 1) If <i> <= <n>, the characters are copied from the ones that were + removed from the end of the nth input line. + + 2) If <i> > <n> > <hang>, the characters are copied from the ones + that were removed from the end of the last input line. + + 3) If <i> > <n> and <n> <= <hang>, the first min(<fs>,<suffix>) + of the characters are copied from the ones that were removed + from the beginning of the last input line, and the rest are all + spaces. + + Finally, the lines are printed to the output as the OP. + + +Diagnostics + + If there are no errors, par returns EXIT_SUCCESS (see <stdlib.h>). + + If there is an error, an error message will be printed to the + output, and par will return EXIT_FAILURE. If the error is local + to a single paragraph, the preceding paragraphs will have been + output before the error was detected. Line numbers in error + messages are local to the IP in which the error occurred. All + error messages begin with "par error:" on a line by itself. Error + messages concerning command line or environment variable syntax are + accompanied by the same usage message that the help option produces. + + Of course, trying to print an error message would be futile if an + error resulted from an output function, so par doesn't bother doing + any error checking on output functions. + + +Examples + + The superiority of par's dynamic programming algorithm over a greedy + algorithm (such as the one used by fmt) can be seen in the following + example: + + Original paragraph (note that each line begins with 8 spaces): + + We the people of the United States, + in order to form a more perfect union, + establish justice, + insure domestic tranquility, + provide for the common defense, + promote the general welfare, + and secure the blessing of liberty + to ourselves and our posterity, + do ordain and establish the Constitution + of the United States of America. + + After a greedy algorithm with width = 39: + + We the people of the United + States, in order to form a more + perfect union, establish + justice, insure domestic + tranquility, provide for the + common defense, promote the + general welfare, and secure the + blessing of liberty to + ourselves and our posterity, do + ordain and establish the + Constitution of the United + States of America. + + After "par 39": + + We the people of the United + States, in order to form a + more perfect union, establish + justice, insure domestic + tranquility, provide for the + common defense, promote the + general welfare, and secure + the blessing of liberty to + ourselves and our posterity, + do ordain and establish the + Constitution of the United + States of America. + + The line breaks chosen by par are clearly more eye-pleasing. + + par is most useful in conjunction with the text-filtering features + of an editor, such as the ! commands of vi. You may wish to add the + following lines to your .exrc file: + + " use Bourne shell for speed: + set shell=/bin/sh + " + " reformat paragraph with no arguments: + map ** {!}par^M} + " + " reformat paragraph with arguments: + map *^V {!}par + + Note that the leading spaces must be removed, and that what is shown + as ^M and ^V really need to be ctrl-M and ctrl-V. Also note that + the last map command contains two spaces following the ctrl-V, plus + one at the end of the line. + + To reformat a simple paragraph delimited by blank lines in vi, you + can put the cursor anywhere in it and type "**" (star star). If + you need to supply arguments to par, you can type "* " (star space) + instead, then type the arguments. + + The rest of this section is a series of before-and-after pictures + showing some typical uses of par. In all cases, no environment + variables are set. + + Before: + + /* We the people of the United States, */ + /* in order to form a more perfect union, */ + /* establish justice, */ + /* insure domestic tranquility, */ + /* provide for the common defense, */ + /* promote the general welfare, */ + /* and secure the blessing of liberty */ + /* to ourselves and our posterity, */ + /* do ordain and establish the Constitution */ + /* of the United States of America. */ + + After "par 59": + + /* We the people of the United States, in */ + /* order to form a more perfect union, establish */ + /* justice, insure domestic tranquility, provide */ + /* for the common defense, promote the general */ + /* welfare, and secure the blessing of liberty */ + /* to ourselves and our posterity, do ordain */ + /* and establish the Constitution of the United */ + /* States of America. */ + + Or after "par 59f": + + /* We the people of the United States, */ + /* in order to form a more perfect union, */ + /* establish justice, insure domestic */ + /* tranquility, provide for the common */ + /* defense, promote the general welfare, */ + /* and secure the blessing of liberty to */ + /* ourselves and our posterity, do ordain */ + /* and establish the Constitution of the */ + /* United States of America. */ + + Or after "par 59l": + + /* We the people of the United States, in */ + /* order to form a more perfect union, establish */ + /* justice, insure domestic tranquility, */ + /* provide for the common defense, promote */ + /* the general welfare, and secure the */ + /* blessing of liberty to ourselves and our */ + /* posterity, do ordain and establish the */ + /* Constitution of the United States of America. */ + + Or after "par 59lf": + + /* We the people of the United States, */ + /* in order to form a more perfect union, */ + /* establish justice, insure domestic */ + /* tranquility, provide for the common */ + /* defense, promote the general welfare, */ + /* and secure the blessing of liberty */ + /* to ourselves and our posterity, do */ + /* ordain and establish the Constitution */ + /* of the United States of America. */ + + Or after "par 59lft0": + + /* We the people of the United States, */ + /* in order to form a more perfect union, */ + /* establish justice, insure domestic */ + /* tranquility, provide for the common */ + /* defense, promote the general welfare, */ + /* and secure the blessing of liberty */ + /* to ourselves and our posterity, do */ + /* ordain and establish the Constitution */ + /* of the United States of America. */ + + Or after "par 59j": + + /* We the people of the United States, in */ + /* order to form a more perfect union, establish */ + /* justice, insure domestic tranquility, provide */ + /* for the common defense, promote the general */ + /* welfare, and secure the blessing of liberty */ + /* to ourselves and our posterity, do ordain and */ + /* establish the Constitution of the United */ + /* States of America. */ + + Or after "par 59jl": + + /* We the people of the United States, */ + /* in order to form a more perfect */ + /* union, establish justice, insure domestic */ + /* tranquility, provide for the common defense, */ + /* promote the general welfare, and secure */ + /* the blessing of liberty to ourselves and */ + /* our posterity, do ordain and establish the */ + /* Constitution of the United States of America. */ + + Before: + + Preamble We the people of the United States, + to the US in order to form + Constitution a more perfect union, + establish justice, + insure domestic tranquility, + provide for the common defense, + promote the general welfare, + and secure the blessing of liberty + to ourselves and our posterity, + do ordain and establish + the Constitution + of the United States of America. + + After "par 52h3": + + Preamble We the people of the United + to the US States, in order to form a + Constitution more perfect union, establish + justice, insure domestic + tranquility, provide for the + common defense, promote the + general welfare, and secure + the blessing of liberty to + ourselves and our posterity, + do ordain and establish the + Constitution of the United + States of America. + + Before: + + 1 We the people of the United States, + 2 in order to form a more perfect union, + 3 establish justice, + 4 insure domestic tranquility, + 5 provide for the common defense, + 6 promote the general welfare, + 7 and secure the blessing of liberty + 8 to ourselves and our posterity, + 9 do ordain and establish the Constitution + 10 of the United States of America. + + After "par 59p12l": + + 1 We the people of the United States, in order to + 2 form a more perfect union, establish justice, + 3 insure domestic tranquility, provide for the + 4 common defense, promote the general welfare, + 5 and secure the blessing of liberty to ourselves + 6 and our posterity, do ordain and establish the + 7 Constitution of the United States of America. + + Before: + + > > We the people + > > of the United States, + > > in order to form a more perfect union, + > > establish justice, + > > ensure domestic tranquility, + > > provide for the common defense, + > + > Promote the general welfare, + > and secure the blessing of liberty + > to ourselves and our posterity, + > do ordain and establish + > the Constitution of the United States of America. + + After "par 52": + + > > We the people of the United States, in + > > order to form a more perfect union, + > > establish justice, ensure domestic + > > tranquility, provide for the common + > > defense, + > + > Promote the general welfare, and secure + > the blessing of liberty to ourselves and + > our posterity, do ordain and establish + > the Constitution of the United States of + > America. + + Before: + + > We the people + > of the United States, + > in order to form a more perfect union, + > establish justice, + > ensure domestic tranquility, + > provide for the common defense, + > Promote the general welfare, + > and secure the blessing of liberty + > to ourselves and our posterity, + > do ordain and establish + > the Constitution of the United States of America. + + After "par 52d": + + > We the people of the United States, + > in order to form a more perfect union, + > establish justice, ensure domestic + > tranquility, provide for the common + > defense, + > Promote the general welfare, and secure + > the blessing of liberty to ourselves and + > our posterity, do ordain and establish + > the Constitution of the United States of + > America. + + Before: + + # 1. We the people of the United States. + # 2. In order to form a more perfect union. + # 3. Establish justice, ensure domestic + # tranquility. + # 4. Provide for the common defense + # 5. Promote the general welfare. + # 6. And secure the blessing of liberty + # to ourselves and our posterity. + # 7. Do ordain and establish the Constitution. + # 8. Of the United States of America. + + After "par 37p13dh": + + # 1. We the people of the + # United States. + # 2. In order to form a more + # perfect union. + # 3. Establish justice, + # ensure domestic + # tranquility. + # 4. Provide for the common + # defense + # 5. Promote the general + # welfare. + # 6. And secure the blessing + # of liberty to ourselves + # and our posterity. + # 7. Do ordain and establish + # the Constitution. + # 8. Of the United States of + # America. + + Before: + + /*****************************************/ + /* We the people of the United States, */ + /* in order to form a more perfect union, */ + /* establish justice, insure domestic */ + /* tranquility, */ + /* */ + /* */ + /* [ provide for the common defense, ] */ + /* [ promote the general welfare, ] */ + /* [ and secure the blessing of liberty ] */ + /* [ to ourselves and our posterity, ] */ + /* [ ] */ + /* */ + /* do ordain and establish the Constitution */ + /* of the United States of America. */ + /******************************************/ + + After "par 42r": + + /********************************/ + /* We the people of the */ + /* United States, in order to */ + /* form a more perfect union, */ + /* establish justice, insure */ + /* domestic tranquility, */ + /* */ + /* */ + /* [ provide for the common ] */ + /* [ defense, promote the ] */ + /* [ general welfare, and ] */ + /* [ secure the blessing of ] */ + /* [ liberty to ourselves ] */ + /* [ and our posterity, ] */ + /* [ ] */ + /* */ + /* do ordain and establish the */ + /* Constitution of the United */ + /* States of America. */ + /********************************/ + + Or after "par 42re": + + /********************************/ + /* We the people of the */ + /* United States, in order to */ + /* form a more perfect union, */ + /* establish justice, insure */ + /* domestic tranquility, */ + /* */ + /* [ provide for the common ] */ + /* [ defense, promote the ] */ + /* [ general welfare, and ] */ + /* [ secure the blessing of ] */ + /* [ liberty to ourselves ] */ + /* [ and our posterity, ] */ + /* */ + /* do ordain and establish the */ + /* Constitution of the United */ + /* States of America. */ + /********************************/ + + Before: + + Joe Public writes: + > Jane Doe writes: + > > + > > + > > I can't find the source for uncompress. + > Oh no, not again!!! + > + > + > Isn't there a FAQ for this? + > + > + That wasn't very helpful, Joe. Jane, + just make a link from uncompress to compress. + + After "par 40q": + + Joe Public writes: + + > Jane Doe writes: + > + > + > > I can't find the source for + > > uncompress. + > + > Oh no, not again!!! + > + > + > Isn't there a FAQ for this? + > + + That wasn't very helpful, Joe. + Jane, just make a link from + uncompress to compress. + + Or after "par 40qe": + + Joe Public writes: + + > Jane Doe writes: + > + > > I can't find the source for + > > uncompress. + > + > Oh no, not again!!! + > + > Isn't there a FAQ for this? + + That wasn't very helpful, Joe. + Jane, just make a link from + uncompress to compress. + + Or after "par 40qi": + + Joe Public writes: + > Jane Doe writes: + > > + > > + > > I can't find the source for + > > uncompress. + > Oh no, not again!!! + > + > + > Isn't there a FAQ for this? + > + > + That wasn't very helpful, Joe. + Jane, just make a link from + uncompress to compress. + + Or after "par 40qie": + + Joe Public writes: + > Jane Doe writes: + > > I can't find the source for + > > uncompress. + > Oh no, not again!!! + > + > Isn't there a FAQ for this? + That wasn't very helpful, Joe. + Jane, just make a link from + uncompress to compress. + + Before: + + I sure hope there's still room + in Dr. Jones' section of archaeology. + I've heard he's the bestest. [sic] + + After "par 50g": + + I sure hope there's still room in + Dr. Jones' section of archaeology. I've + heard he's the bestest. [sic] + + Or after "par 50gc": + + I sure hope there's still room in + Dr. Jones' section of archaeology. I've + heard he's the bestest. [sic] + + Before: + + John writes: + : Mary writes: + : + Anastasia writes: + : + > Hi all! + : + Hi Ana! + : Hi Ana & Mary! + Please unsubscribe me from alt.hello. + + After "par Q+:+ q": + + John writes: + + : Mary writes: + : + : + Anastasia writes: + : + + : + > Hi all! + : + + : + Hi Ana! + : + : Hi Ana & Mary! + + Please unsubscribe me from alt.hello. + + Before: + + amc> The b option was added primarily to deal with + amc> this new style of quotation + amc> which became popular after Par 1.41 was released. + amc> + amc> Par still pays attention to body characters. + amc> Par should not mistake "Par" for part of the prefix. + amc> Par should not mistake "." for a suffix. + + After "par B=._A_a 50bg": + + amc> The b option was added primarily to + amc> deal with this new style of quotation + amc> which became popular after Par 1.41 + amc> was released. + amc> + amc> Par still pays attention to body + amc> characters. Par should not mistake + amc> "Par" for part of the prefix. Par + amc> should not mistake "." for a suffix. + + +Limitations + + The <guess> feature guesses wrong in cases like the following: + + I calc'd the approx. + Fermi level to 3 sig. digits. + + With <guess> = 1, par will incorrectly assume that "approx." ends a + sentence. If the input were: + + I calc'd the approx. Fermi + level to 3 sig. digits. + + then par would refuse to put a line break between "approx." and + "Fermi" in the output, mainly to avoid creating the first situation + (in case the paragraph were to be fed back through par again). + This non-breaking space policy does come in handy for cases like + "Mr. Johnson" and "Jan. 1", though. + + The <guess> feature only goes one way. par can preserve wide + sentence breaks in a paragraph, or remove them, but it can't insert + them if they aren't already in the input. + + If you use tabs, you may not like the way par handles (or doesn't + handle) them. It expands them into spaces. I didn't let par output + tabs because tabs don't make sense. Not everyone's terminal has + the same tab settings, so text files containing tabs are sometimes + mangled. In fact, almost every text file containing tabs gets + mangled when something is inserted at the beginning of each line + (when quoting e-mail or commenting out a section of a shell script, + for example), making them a pain to edit. In my opinion, the world + would be a nicer place if everyone stopped using tabs, so I'm doing + my part by not letting par output them. (Thanks to Eric Stuebe for + showing me the light about tabs.) + + There is currently no way for the length of the output prefix to + differ from the length of the input prefix. Ditto for the suffix. + I may consider adding this capability in a future release, but right + now I'm not sure how I'd want it to work. + + +Apologies + + Par began in July 1993 as a small program designed to do one narrow + task: reformat a single paragraph that might have a border on either + side. It was pretty clean back then. Over the next three months, + it very rapidly expanded to handle multiple paragraphs, offer more + options, and take better guesses, at the cost of becoming extremely + complex, and very unclean. It is nowhere near the optimal design + for the larger task it now tries to address. Its only redeeming + features are that it is extremely useful (I find it indispensable), + extremely portable, and very stable since version 1.41 released on + 1993-Oct-31. + + Back in 1993 I had very little experience at writing documentation + for users, so the documentation for Par became rather nightmarish. + There is no separation between how-it-works (which is painfully + complex) and how-to-use-it (which is fairly simple, if you can ever + figure it out). + + Someday I ought to reexamine the problem, and redesign a new, clean + solution from scratch. I don't know when I might get enough free + time to start on such a project. Text files may be obsolete by + then. + + +Bugs + + If I knew of any bugs, I wouldn't release the package. Of course, + there may be bugs that I haven't yet discovered. + + If you find any bugs (in the program or in the documentation), or if + you have any suggestions, please contact me: + + http://www.nicemice.net/amc/ + + When reporting a bug, please include the exact input and command + line options used, and the version number of par, so that I can + reproduce it. + + The latest release of Par is available on the Web at: + + http://www.nicemice.net/par/ + + I don't expect these URLs to change in the forseeable future, but if + they do, I'll try to leave forward pointers. diff --git a/par/Par-1.53.0/protoMakefile b/par/Par-1.53.0/protoMakefile new file mode 100644 index 0000000..dae8893 --- /dev/null +++ b/par/Par-1.53.0/protoMakefile @@ -0,0 +1,115 @@ +# protoMakefile +# last touched in Par 1.53.0 +# last meaningful change in Par 1.53.0 +# Copyright 1993, 1996, 2020 Adam M. Costello + + +##### +##### Instructions +##### + +# If you have no make command (or equivalent), you can easily tell by +# looking at this file what make would do. It would compile each .c +# file into a .o file, then link all the .o files into the executable +# par. You can do this manually. Then you should go look for a version +# of make for your system, since it will come in handy in the future. + +# If you do have make, you can either copy this file to Makefile, edit +# the definitions of CC, LINK1, LINK2, RM, JUNK, O, and E, and then run +# make; or, better yet, create a short script which looks something +# like: +# +# #!/bin/sh +# make -f protoMakefile CC="cc -c" LINK1="cc" LINK2="-o" RM="rm" JUNK="" $* +# +# (Alter this to use commands and values appropriate for your compiler +# and shell). The advantage of the second method is that the script +# will probably work on the next release of Par. + +##### +##### Configuration +##### + +# Define CC so that the command +# +# $(CC) foo.c +# +# compiles the ANSI C source file "foo.c" into the object file "foo.o". +# You may assume that foo.c uses no floating point math. +# +# If your operating system or your compiler's exit() function +# automatically frees all memory allocated by malloc() when a process +# terminates, then you can choose to trade away space efficiency for +# time efficiency by defining DONTFREE. +# +# Example (for Solaris 2.x with SPARCompiler C): +# CC = cc -c -O -s -Xc -DDONTFREE + +CPPFLAGS = +CFLAGS = +CC = cc $(CPPFLAGS) $(CFLAGS) -c + +# Define LINK1 and LINK2 so that the command +# +# $(LINK1) foo1.o foo2.o foo3.o $(LINK2) foo +# +# links the object files "foo1.o", "foo2.o", "foo3.o" into the +# executable file "foo". You may assume that none of the .o files use +# floating point math. +# +# Example (for Solaris 2.x with SPARCompiler C): +# LINK1 = cc -s +# LINK2 = -o + +LINK1 = cc +LINK2 = -o + +# Define RM so that the command +# +# $(RM) foo1 foo2 foo3 +# +# removes the files "foo1", "foo2", and "foo3", and preferably doesn't +# complain if they don't exist. + +RM = rm -f + +# Define JUNK to be a list of additional files, other than par and +# $(OBJS), that you want to be removed by "make clean". + +JUNK = + +# Define O to be the usual suffix for object files. + +O = .o + +# Define E to be the usual suffix for executable files. + +E = + +##### +##### Guts (you shouldn't need to touch this part) +##### + +OBJS = buffer$O charset$O errmsg$O par$O reformat$O + +.c$O: + $(CC) $< + +par$E: $(OBJS) + $(LINK1) $(OBJS) $(LINK2) par$E + +buffer$O: buffer.c buffer.h errmsg.h + +charset$O: charset.c charset.h errmsg.h buffer.h + +errmsg$O: errmsg.c errmsg.h + +par$O: par.c charset.h errmsg.h buffer.h reformat.h + +reformat$O: reformat.c reformat.h buffer.h charset.h errmsg.h + +test: par$E + ./test-par ./par$E + +clean: + $(RM) par$E $(OBJS) $(JUNK) diff --git a/par/Par-1.53.0/reformat.c b/par/Par-1.53.0/reformat.c new file mode 100644 index 0000000..f3b89d5 --- /dev/null +++ b/par/Par-1.53.0/reformat.c @@ -0,0 +1,550 @@ +/* +reformat.c +last touched in Par 1.53.0 +last meaningful change in Par 1.53.0 +Copyright 1993, 2001, 2020 Adam M. Costello + +This is ANSI C code (C89). + +The issues regarding char and unsigned char are relevant to the use of +the ctype.h functions. See the comments near the beginning of par.c. + +*/ + + +#include "reformat.h" /* Makes sure we're consistent with the prototype. */ + +#include "buffer.h" +#include "charset.h" +#include "errmsg.h" + +#include <ctype.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#undef NULL +#define NULL ((void *) 0) + +#ifdef DONTFREE +#define free(ptr) +#endif + + +typedef unsigned char wflag_t; + +typedef struct word { + const char *chrs; /* Pointer to the characters in the word */ + /* (NOT terminated by '\0'). */ + struct word *prev, /* Pointer to previous word. */ + *next, /* Pointer to next word. */ + /* Supposing this word were the first... */ + *nextline; /* Pointer to first word in next line. */ + int score, /* Value of the objective function. */ + length; /* Length of this word. */ + wflag_t flags; /* Notable properties of this word. */ +} word; + +/* The following may be bitwise-OR'd together */ +/* to set the flags field of a word: */ + +static const wflag_t + W_SHIFTED = 1, /* This word should have an extra space before */ + /* it unless it's the first word in the line. */ + W_CURIOUS = 2, /* This is a curious word (see par.doc). */ + W_CAPITAL = 4; /* This is a capitalized word (see par.doc). */ + +#define isshifted(w) ( (w)->flags & 1) +#define iscurious(w) (((w)->flags & 2) != 0) +#define iscapital(w) (((w)->flags & 4) != 0) + + +static int checkcapital(word *w) +/* Returns 1 if *w is capitalized according to the definition */ +/* in par.doc (assuming <cap> is 0), or 0 if not. */ +{ + const char *p, *end; + + for (p = w->chrs, end = p + w->length; + p < end && !isalnum(*(unsigned char *)p); + ++p); + return p < end && !islower(*(unsigned char *)p); +} + + +static int checkcurious(word *w, const charset *terminalchars) +/* Returns 1 if *w is curious according to */ +/* the definition in par.doc, or 0 if not. */ +{ + const char *start, *p; + char ch; + + for (start = w->chrs, p = start + w->length; p > start; --p) { + ch = p[-1]; + if (isalnum(*(unsigned char *)&ch)) return 0; + if (csmember(ch,terminalchars)) break; + } + + if (p <= start + 1) return 0; + + --p; + do if (isalnum(*(unsigned char *)--p)) return 1; + while (p > start); + + return 0; +} + + +static int simplebreaks(word *head, word *tail, int L, int last) + +/* Chooses line breaks in a list of words which maximize the length of the */ +/* shortest line. L is the maximum line length. The last line counts as a */ +/* line only if last is non-zero. _head must point to a dummy word, and tail */ +/* must point to the last word, whose next field must be NULL. Returns the */ +/* length of the shortest line on success, -1 if there is a word of length */ +/* greater than L, or L if there are no lines. */ +{ + word *w1, *w2; + int linelen, score; + + if (!head->next) return L; + + for (w1 = tail, linelen = w1->length; + w1 != head && linelen <= L; + linelen += isshifted(w1), w1 = w1->prev, linelen += 1 + w1->length) { + w1->score = last ? linelen : L; + w1->nextline = NULL; + } + + for ( ; w1 != head; w1 = w1->prev) { + w1->score = -1; + for (linelen = w1->length, w2 = w1->next; + linelen <= L; + linelen += 1 + isshifted(w2) + w2->length, w2 = w2->next) { + score = w2->score; + if (linelen < score) score = linelen; + if (score >= w1->score) { + w1->nextline = w2; + w1->score = score; + } + } + } + + return head->next->score; +} + + +static void normalbreaks( + word *head, word *tail, int L, int fit, int last, errmsg_t errmsg +) +/* Chooses line breaks in a list of words according to the policy */ +/* in "par.doc" for <just> = 0 (L is <L>, fit is <fit>, and last is */ +/* <last>). head must point to a dummy word, and tail must point */ +/* to the last word, whose next field must be NULL. */ +{ + word *w1, *w2; + int tryL, shortest, score, target, linelen, extra, minlen; + + *errmsg = '\0'; + if (!head->next) return; + + target = L; + +/* Determine minimum possible difference between */ +/* the lengths of the shortest and longest lines: */ + + if (fit) { + score = L + 1; + for (tryL = L; ; --tryL) { + shortest = simplebreaks(head,tail,tryL,last); + if (shortest < 0) break; + if (tryL - shortest < score) { + target = tryL; + score = target - shortest; + } + } + } + +/* Determine maximum possible length of the shortest line: */ + + shortest = simplebreaks(head,tail,target,last); + if (shortest < 0) { + sprintf(errmsg,impossibility,1); + return; + } + +/* Minimize the sum of the squares of the differences */ +/* between target and the lengths of the lines: */ + + w1 = tail; + do { + w1->score = -1; + for (linelen = w1->length, w2 = w1->next; + linelen <= target; + linelen += 1 + isshifted(w2) + w2->length, w2 = w2->next) { + extra = target - linelen; + minlen = shortest; + if (w2) + score = w2->score; + else { + score = 0; + if (!last) extra = minlen = 0; + } + if (linelen >= minlen && score >= 0) { + score += extra * extra; + if (w1->score < 0 || score <= w1->score) { + w1->nextline = w2; + w1->score = score; + } + } + if (!w2) break; + } + w1 = w1->prev; + } while (w1 != head); + + if (head->next->score < 0) + sprintf(errmsg,impossibility,2); +} + + +static void justbreaks( + word *head, word *tail, int L, int last, errmsg_t errmsg +) +/* Chooses line breaks in a list of words according to the */ +/* policy in "par.doc" for <just> = 1 (L is <L> and last is */ +/* <last>). head must point to a dummy word, and tail must */ +/* point to the last word, whose next field must be NULL. */ +{ + word *w1, *w2; + int numgaps, extra, score, gap, maxgap, numbiggaps; + + *errmsg = '\0'; + if (!head->next) return; + +/* Determine the minimum possible largest inter-word gap: */ + + w1 = tail; + do { + w1->score = L; + for (numgaps = 0, extra = L - w1->length, w2 = w1->next; + extra >= 0; + ++numgaps, extra -= 1 + isshifted(w2) + w2->length, w2 = w2->next) { + gap = numgaps ? (extra + numgaps - 1) / numgaps : L; + if (w2) + score = w2->score; + else { + score = 0; + if (!last) gap = 0; + } + if (gap > score) score = gap; + if (score < w1->score) { + w1->nextline = w2; + w1->score = score; + } + if (!w2) break; + } + w1 = w1->prev; + } while (w1 != head); + + maxgap = head->next->score; + if (maxgap >= L) { + strcpy(errmsg, "Cannot justify.\n"); + return; + } + +/* Minimize the sum of the squares of the numbers */ +/* of extra spaces required in each inter-word gap: */ + + w1 = tail; + do { + w1->score = -1; + for (numgaps = 0, extra = L - w1->length, w2 = w1->next; + extra >= 0; + ++numgaps, extra -= 1 + isshifted(w2) + w2->length, w2 = w2->next) { + gap = numgaps ? (extra + numgaps - 1) / numgaps : L; + if (w2) + score = w2->score; + else { + if (!last) { + w1->nextline = NULL; + w1->score = 0; + break; + } + score = 0; + } + if (gap <= maxgap && score >= 0) { + numbiggaps = extra % numgaps; + score += (extra / numgaps) * (extra + numbiggaps) + numbiggaps; + /* The above may not look like the sum of the squares of the numbers */ + /* of extra spaces required in each inter-word gap, but trust me, it */ + /* is. It's easier to prove graphically than algebraicly. */ + if (w1->score < 0 || score <= w1->score) { + w1->nextline = w2; + w1->score = score; + } + } + if (!w2) break; + } + w1 = w1->prev; + } while (w1 != head); + + if (head->next->score < 0) + sprintf(errmsg,impossibility,3); +} + + +char **reformat( + const char * const *inlines, const char * const *endline, int afp, int fs, + int hang, int prefix, int suffix, int width, int cap, int fit, int guess, + int just, int last, int Report, int touch, const charset *terminalchars, + errmsg_t errmsg +) +{ + int numin, affix, L, onfirstword = 1, linelen, numout, numgaps, extra, phase; + const char * const *line, **suffixes = NULL, **suf, *end, *p1, *p2; + char *q1, *q2, **outlines = NULL; + word dummy, *head, *tail, *w1, *w2; + buffer *pbuf = NULL; + +/* Initialization: */ + + *errmsg = '\0'; + dummy.next = dummy.prev = NULL; + dummy.flags = 0; + head = tail = &dummy; + numin = endline - inlines; + if (numin <= 0) { + sprintf(errmsg,impossibility,4); + goto rfcleanup; + } + numgaps = extra = 0; /* unnecessary, but quiets compiler warnings */ + +/* Allocate space for pointers to the suffixes: */ + + suffixes = malloc(numin * sizeof (const char *)); + if (!suffixes) { + strcpy(errmsg,outofmem); + goto rfcleanup; + } + +/* Set the pointers to the suffixes, and create the words: */ + + affix = prefix + suffix; + L = width - prefix - suffix; + + line = inlines, suf = suffixes; + do { + for (end = *line; *end; ++end); + if (end - *line < affix) { + sprintf(errmsg, + "Line %ld shorter than <prefix> + <suffix> = %d + %d = %d\n", + (long)(line - inlines + 1), prefix, suffix, affix); + goto rfcleanup; + } + end -= suffix; + *suf = end; + p1 = *line + prefix; + for (;;) { + while (p1 < end && *p1 == ' ') ++p1; + if (p1 == end) break; + p2 = p1; + if (onfirstword) { + p1 = *line + prefix; + onfirstword = 0; + } + while (p2 < end && *p2 != ' ') ++p2; + w1 = malloc(sizeof (word)); + if (!w1) { + strcpy(errmsg,outofmem); + goto rfcleanup; + } + w1->next = NULL; + w1->prev = tail; + tail = tail->next = w1; + w1->chrs = p1; + w1->length = p2 - p1; + w1->flags = 0; + p1 = p2; + } + ++line, ++suf; + } while (line < endline); + +/* If guess is 1, set flag values and merge words: */ + + if (guess) { + for (w1 = head, w2 = head->next; w2; w1 = w2, w2 = w2->next) { + if (checkcurious(w2,terminalchars)) w2->flags |= W_CURIOUS; + if (cap || checkcapital(w2)) { + w2->flags |= W_CAPITAL; + if (iscurious(w1)) { + if (w1->chrs[w1->length] && w1->chrs + w1->length + 1 == w2->chrs) { + w2->length += w1->length + 1; + w2->chrs = w1->chrs; + w2->prev = w1->prev; + w2->prev->next = w2; + if (iscapital(w1)) w2->flags |= W_CAPITAL; + else w2->flags &= ~W_CAPITAL; + if (isshifted(w1)) w2->flags |= W_SHIFTED; + else w2->flags &= ~W_SHIFTED; + free(w1); + } + else w2->flags |= W_SHIFTED; + } + } + } + tail = w1; + } + +/* Check for too-long words: */ + + if (Report) + for (w2 = head->next; w2; w2 = w2->next) { + if (w2->length > L) { + linelen = w2->length; + if (linelen > errmsg_size - 17) + linelen = errmsg_size - 17; + sprintf(errmsg, "Word too long: %.*s\n", linelen, w2->chrs); + goto rfcleanup; + } + } + else + for (w2 = head->next; w2; w2 = w2->next) + while (w2->length > L) { + w1 = malloc(sizeof (word)); + if (!w1) { + strcpy(errmsg,outofmem); + goto rfcleanup; + } + w1->next = w2; + w1->prev = w2->prev; + w1->prev->next = w1; + w2->prev = w1; + w1->chrs = w2->chrs; + w2->chrs += L; + w1->length = L; + w2->length -= L; + w1->flags = 0; + if (iscapital(w2)) { + w1->flags |= W_CAPITAL; + w2->flags &= ~W_CAPITAL; + } + if (isshifted(w2)) { + w1->flags |= W_SHIFTED; + w2->flags &= ~W_SHIFTED; + } + } + +/* Choose line breaks according to policy in "par.doc": */ + + if (just) justbreaks(head,tail,L,last,errmsg); + else normalbreaks(head,tail,L,fit,last,errmsg); + if (*errmsg) goto rfcleanup; + +/* Change L to the length of the longest line if required: */ + + if (!just && touch) { + L = 0; + w1 = head->next; + while (w1) { + for (linelen = w1->length, w2 = w1->next; + w2 != w1->nextline; + linelen += 1 + isshifted(w2) + w2->length, w2 = w2->next); + if (linelen > L) L = linelen; + w1 = w2; + } + } + +/* Construct the lines: */ + + pbuf = newbuffer(sizeof (char *), errmsg); + if (*errmsg) goto rfcleanup; + + numout = 0; + w1 = head->next; + while (numout < hang || w1) { + if (w1) + for (w2 = w1->next, numgaps = 0, extra = L - w1->length; + w2 != w1->nextline; + ++numgaps, extra -= 1 + isshifted(w2) + w2->length, w2 = w2->next); + linelen = suffix || (just && (w2 || last)) ? + L + affix : + w1 ? prefix + L - extra : prefix; + q1 = malloc((linelen + 1) * sizeof (char)); + if (!q1) { + strcpy(errmsg,outofmem); + goto rfcleanup; + } + additem(pbuf, &q1, errmsg); + if (*errmsg) goto rfcleanup; + ++numout; + q2 = q1 + prefix; + if (numout <= numin) memcpy(q1, inlines[numout - 1], prefix); + else if (numin > hang ) memcpy(q1, endline[-1], prefix); + else { + if (afp > prefix) afp = prefix; + memcpy(q1, endline[-1], afp); + q1 += afp; + while (q1 < q2) *q1++ = ' '; + } + q1 = q2; + if (w1) { + phase = numgaps / 2; + for (w2 = w1; ; ) { + memcpy(q1, w2->chrs, w2->length); + q1 += w2->length; + w2 = w2->next; + if (w2 == w1->nextline) break; + *q1++ = ' '; + if (just && (w1->nextline || last)) { + phase += extra; + while (phase >= numgaps) { + *q1++ = ' '; + phase -= numgaps; + } + } + if (isshifted(w2)) *q1++ = ' '; + } + } + q2 += linelen - affix; + while (q1 < q2) *q1++ = ' '; + q2 = q1 + suffix; + if (numout <= numin) memcpy(q1, suffixes[numout - 1], suffix); + else if (numin > hang ) memcpy(q1, suffixes[numin - 1], suffix); + else { + if (fs > suffix) fs = suffix; + memcpy(q1, suffixes[numin - 1], fs); + q1 += fs; + while(q1 < q2) *q1++ = ' '; + } + *q2 = '\0'; + if (w1) w1 = w1->nextline; + } + + q1 = NULL; + additem(pbuf, &q1, errmsg); + if (*errmsg) goto rfcleanup; + + outlines = copyitems(pbuf,errmsg); + +rfcleanup: + + if (suffixes) free(suffixes); + + while (tail != head) { + tail = tail->prev; + free(tail->next); + } + + if (pbuf) { + if (!outlines) + for (;;) { + outlines = nextitem(pbuf); + if (!outlines) break; + free(*outlines); + } + freebuffer(pbuf); + } + + return outlines; +} diff --git a/par/Par-1.53.0/reformat.h b/par/Par-1.53.0/reformat.h new file mode 100644 index 0000000..c56268f --- /dev/null +++ b/par/Par-1.53.0/reformat.h @@ -0,0 +1,31 @@ +/* +reformat.h +last touched in Par 1.53.0 +last meaningful change in Par 1.53.0 +Copyright 1993, 2020 Adam M. Costello + +This is ANSI C code (C89). + +*/ + + +#include "charset.h" +#include "errmsg.h" + + +char **reformat( + const char * const *inlines, const char * const *endline, int afp, int fs, + int hang, int prefix, int suffix, int width, int cap, int fit, int guess, + int just, int last, int Report, int touch, const charset *terminalchars, + errmsg_t errmsg +); + /* inlines is an array of pointers to input lines, up to but not */ + /* including endline. inlines and endline must not be equal. */ + /* terminalchars is the set of terminal characters as described */ + /* in "par.doc". The other parameters are variables described in */ + /* "par.doc". reformat(inlines, endline, afp, fs, hang, prefix, */ + /* suffix, width, cap, fit, guess, just, last, Report, touch, */ + /* terminalchars, errmsg) returns a NULL-terminated array of */ + /* pointers to output lines containing the reformatted paragraph, */ + /* according to the specification in "par.doc". None of the */ + /* integer parameters may be negative. Returns NULL on failure. */ diff --git a/par/Par-1.53.0/releasenotes b/par/Par-1.53.0/releasenotes new file mode 100644 index 0000000..c942a91 --- /dev/null +++ b/par/Par-1.53.0/releasenotes @@ -0,0 +1,279 @@ +releasenotes +last touched in Par 1.53.0 +last meaningful change in Par 1.53.0 +Copyright 1993, 1996, 2000, 2001, 2020 Adam M. Costello + + +Each entry below describes changes since the previous version. + +Par 1.53.0 released 2020-Mar-14 + Fixed the following bugs: + An unintended bad interaction between <quote> and <repeat>. + The specification was inconsistent. Although it said that + the lines inserted by the <quote> feature were vacant, + the <repeat> feature could interpret the quote character + of inserted lines as a repeat character, in which case + the lines were not vacant (according to the definition), + and more quote/repeat characters would be inserted to + extend the line to the full width, which no one would ever + want. The definition of "bodiless line" is revised so that + lines inserted by the <quote> feature are excluded from + consideration by the <repeat> feature. + A printf format string mismatch (ptrdiff_t vs. %d), reported by + Quentin Barnes (qbarnes at gmail.com). + protoMakefile's clean target removed par rather than par$E, + reported by George V. Reilly (george at reilly.org). + Added the following features: + Locale support for single-byte character sets. + The previous verson, 1.52, attempted to do this in one line + of code (setlocale()), but it's not that simple. Versions + before 1.52 gave no thought to character sets other than + US-ASCII and did not handle them entirely correctly. + Calling setlocale() in version 1.52 corrected some flaws but + not all, and created one new flaw. This version and the + previous two all have the same character handling in the + "C" locale and when processing US-ASCII text; differences + arise only when processing non-US-ASCII text in another + locale. In versions before 1.52 the implementation assumed + that "uppercase letter" means only the 26 characters A-Z + and that "lowercase letter" means only the 26 characters + a-z, and the specification assumed that every letter is + either upper case or lower case. These assumptions hold + for US-ASCII, but not for most other character sets; + therefore versions before 1.52 did not really support + non-US-ASCII text. Version 1.52, by calling setlocale(), + relaxed the three assumptions in the implementation but + not the assumption in the specification, and inadvertantly + caused the implementation to deviate from the specification + by converting all space characters to spaces rather than + converting only the specified white characters, which is not + necessarily what you want (for example, you probably don't + want no-break space converted to space, but some locales + on some platforms have been known to classify no-break + space as white-space, like FreeBSD did in 2004). This + version fixes the specification by introducing the concept + of "neither-case letter" and redefining "alphanumeric + character" to include it, and fixes the implementation to + convert only the white characters to spaces. It also makes + the white characters configurable so that the version 1.52 + space-conversion behavior can be restored if desired (W=_S). + Note that Par still assumes constant display width per byte, + which is not true for UTF-8 nor ISO-2022-*, but is true for + most single-byte charsets and for EUC-* text without the + single-shift-in/out codes. + The W option, for configuring the set of white characters. + The Z option, for configuring the set of terminal characters. + The _@ escape sequence (neither-case letter) in charset syntax. + The _S escape sequence (any space) in charset syntax. + Added _@ and apostrophe to the set of body characters in PARINIT. + Add #includes for whatever interfaces are used rather than depending + on included files to #include other files. + In protoMakefile introduced CPPFLAGS and CFLAGS (default empty) into + the default definition of CC. + Added a test-par script and a test target in protoMakefile. + Fixed a misspelling (preceed => precede) in par.doc and par.1. + Changed the top-of-file comment convention so that every release + won't need to touch every file (this is the last time). + Updated the author contact information. + Clarified the license and added the MIT License as an alternative. + Adopted a modern version number format, with minor and patch numbers + as dot-separated integers rather than adjacent single digits. + +Par 1.52 released 2001-Apr-29 + Fixed a portability problem regarding unsigned char versus char. + (This was a potential problem only on platforms where char is a + non-two's-complement signed type, which is very few, if any.) + Added a call to setlocale(), for 8-bit character support, requested by + Alex Postnikov (apost at math.berkeley.edu) and Oliver Fromme (olli + at secnetix.de). + Added some unnecessary parentheses, braces, and initialization, + to quiet the more aggressive compiler warnings. (This + was suggested back in 1993 by Thomas E. Dickey (dickey at + software.org) but I refused. I guess my attitude has changed.) + Changed the EXAMPLES section of par.1 to use the constant-width font + rather than constant-spaced Roman, suggested by Carl Lowenstein (cdl + at mpl.ucsd.edu). + Reorganized releasenotes to put the most recent version first, and + rearranged the email addresses to confuse address harvesters. + +Par 1.51 released 2000-Feb-24 + Updated the author contact information. + Relaxed the copyright restrictions. + Added comma to the recommended set of body characters in PARINIT. + +Par 1.50 released 1996-Feb-11 + Added the following features: + The T option, suggested by Jules Junker (jjunker at eos.hitc.com). + The b option. + Loosened the terms of the Copyright. + Changed protoMakefile: + Added the E and O macros, suggested by Peter A. Getterman + (pgutterman at worldbank.org). + Added sensible default values for all macros. + Added question-mark to the recommended set of body characters. + Restored documentation of the number-without-a-letter shortcut + command-line option, which got lost between versions 1.00 and 1.10. + Its absence was noted by David W. Sanderson (dws at ssec.wisc.edu). + Added .exrc examples, suggested by Alex Matulich (matuli_a at + marlin.navsea.navy.mil), though I used my own .exrc. + Added the Apologies section to par.doc and par.1. + Made many other tweeks to the documentation. + Removed the useless synopsis from the runtime help message. + +Par 1.41 released 1993-Oct-31 + Moved the former Release Notes section of par.doc into its own file. + Fixed the following bugs: + A couple of inconsistencies of style in the code, pointed out by + Thomas E. Dickey (dickey at software.org). + If <quote> and <hang> were 1 and a one-line paragraph beginning with + quote characters was reformatted into a multi-line paragraph, + the new lines began with spaces instead of quote characters. + This was because the policy for copying prefixes used the + fallback prelen rather than the augmented version of it used for + computing the default value of <prefix>. Now both use the same + formula. Thanks go to Steven King (king at rtsg.mot.com) for + reporting the problem. + If the t option were given without a number, it was unset instead of + being set to 1. + Added the following feature: + The E option, suggested by Alex Matulich (alex at bilver.oau.org). + Altered the terminology: + Added the term "augmented fallback prelen". + Added to the Rights and Responsibilities section of par.doc a guideline + for handling version numbers in patched versions of Par. + Added a useful suggestion to protoMakefile. + +Par 1.40 released 1993-Oct-10 + Fixed the following bugs: + The phrase "containing at least two lines" was left out of the + definition of "vacant line" in par.doc and par.1, although + the code implemented the correct definition. (The phrase now + appears in the definition of "order <k> bodiless line".) + There was still a flaw in the design of the <quote> feature. If two + adjacent lines had quoteprefixes neither of which was a prefix + of the other, no line would be inserted between them, possibly + causing other inserted lines not to be vacant. Now it should + be true that every line inserted or truncated by the <quote> + feature will turn out to be vacant. + When <hang> began affecting the default value of <suffix> (as of Par + 1.20), it should also have begun affecting the choice of source + of characters for the suffixes of lines in OPs. + Added the following features: + The i option, suggested by Alan Barrett (barrett at ee.und.ac.za). + The e option, inspired by a suggestion from Tim Pointing (tim at + ben.dciem.dnd.ca). + The r option. + The p, s, and w options are now accepted without numbers. + par no longer gives up so easily when choosing default values for + <prefix> and <suffix> for an IP with less than <hang>+2 lines, + nor when choosing the source of characters for the prefix and + suffix of the <i>th output line when <i> > <n> and <n> <= + <hang>. These are incompatible changes, but I cannot imagine + anyone preferring the old behavior. + Altered the terminology: + Added the terms "bodiless line" (a generalization of "vacant line"), + "fallback prelen", and "fallback suflen". + Made miscellaneous changes to the documentation, including the addition + of the Quick Start section. + +Par 1.32 released 1993-Sep-13 + Fixed the following bugs: + par could crash when the + or - operator was used with the B, P, and + Q options. Thanks go to Andrew Cashin (splat at deakin.oz.au) + for reporting this. + If <quote> were 1 and two adjacent lines had different + quoteprefixes, one of which was a prefix of the other, and only + the line with the shorter quoteprefix contained a non-quote + character, then nothing would be altered. According to par.doc + 1.31, this was correct, but since neither line is vacant, I + consider this a bug in the design of the <quote> feature. Now + the longer quoteprefix will be truncated to match the shorter + one, and will therefore be vacant. Thanks go to Andrew Cashin + (splat at deakin.oz.au) for asking about this. + Made slight changes to the documentation. + +Par 1.31 released 1993-Sep-07 + The version number is 1.31 rather than 1.40 because all added features + are really just enhancements of existing features. + Fixed the following bug: + In par.doc, in the example of a paragraph produced by a greedy + algorithm, the word "establish" appeared twice in a row. Thanks + go to Daniel Kussendrager (daniel at astro.rug.nl) for first + pointing this out. (The example is now even better because the + paragraph looks even worse than before.) + Added the following features: + A usage message to accompany command line or environment variable + syntax errors, first suggested by Karl Stiefvater (qarl at + ecl.wustl.edu). + The help and c options. + The B, P, and Q options, which render PARBODY, PARPROTECT, and + PARQUOTE no longer necessary. They are retained, though, for + compatibility and convenience. + The _b, _q, and _Q escape sequences for charset syntax. + Added the term "charset syntax". + Isolated the character set code in charset.c and charset.h. + +Par 1.30 released 1993-Aug-18 + Since Par 1.20 was posted to comp.sources.misc, I have made only + backward-compatible changes in Par 1.30. + Fixed the following bugs: + One wrong word in par.c sometimes caused par to crash. Thanks go to + Contr Karl Vogel (vogelke at c-17igp.wpafb.af.mil) for sending + me an input file that caused a crash. + Too-long words were chopped up before the first word in a paragraph + was expanded to include initial spaces, allowing impossibility + #1 to occur. The order of the two operations has been reversed. + Thanks go to Andrew Cashin (splat at deakin.oz.au) for reporting + the error message. + Added the following features: + The g option, motivated by suggestions from several people. + The q option, inspired by a suggestion from Andrew Cashin (splat at + deakin.oz.au). + The R option (my attempt to squash a bad idea from Par 1.00). + The PARQUOTE environment variable (comes with the q option). + The PARPROTECT environment variable, inspired by a suggestion from + Dennis Flaherty (dennisf at se01.elk.miles.com). + Altered the terminology: + Several terms have been added, and the meaning of some terms has + been slightly modified. This is a change in the language used to + describe par's behavior, not a change in par's actual behavior. + Added a clean target to protoMakefile, suggested by Hal Jespersen (hlj + at posix.com). + +Par 1.20 released 1993-Aug-10 + Since Par 1.10 was distributed to no one, I've made some more + incompatible changes in Par 1.20. + Added the following features: + The d option. + Paragraphs are now separated by vacant lines, not just blank lines. + <hang> now affects not only <prefix> but also <suffix>. + +Par 1.10 released 1993-Aug-02 + Fixed the following bugs: + In reformat.c I used sprintf() but forgot to #include <stdio.h>. + I forgot to verify that <width> > <prefix> + <suffix>. + The first word of a paragraph was expanded to include initial white + characters, not just spaces, contrary to par.doc. + Some invalid options were not complained about. + NUL characters in the input were not handled. + A pointer foul-up in freelines() in par.c could cause a crash. + Added the following features: + The f, j, and t options. + The PARBODY environment variable. + Multiple options may be concatenated into a single argument. + Removed the m option: + Its function is better performed by the f and t options. Normally + I would avoid making incompatible changes, unless I were doing a + complete overhaul of the whole program, in which case I'd make the + version number 2.00 to alert users to possible incompatibilities. + However, in this particular instance I allowed an incompatibility in + a minor upgrade because version 1.00 was distributed to only four + people. + Changed the handling of white characters: + par now changes all of them (except newlines) to spaces as they are + read. This is another incompatible change, excused for the same + reason. + Made all error messages begin with "par error:". + +Par 1.00 released 1993-Jul-25 + The first release. diff --git a/par/Par-1.53.0/test-par b/par/Par-1.53.0/test-par new file mode 100755 index 0000000..d53a903 --- /dev/null +++ b/par/Par-1.53.0/test-par @@ -0,0 +1,734 @@ +: +# test-par +# last touched in Par 1.53.0 +# last meaningful change in Par 1.53.0 +# Copyright 2020 Adam M. Costello + +# This is POSIX shell code. + +if [ $# -ne 1 ]; then + echo 'need exactly one argument, the pathname for par' >&2 + exit 2 +fi + +par=$1 +unset PARBODY PARINIT PARPROTECT PARQUOTE +pass_count=0 +fail_count=0 + + +# The caller must set variables called 'input' and 'expected', may set +# a variable called 'locale' (which if nonempty will be exposed to +# par as environment variable LC_ALL), and may pass arguments for par +# on the command line. The input and expected text need not end with +# newlines, because test_par will add a newline to the input, and the +# shell backquotes that capture the output will strip the last newline. +test_par() { + output=` + if [ -n "$locale" ]; then + LC_ALL=$locale "$par" "$@" + else + "$par" "$@" + fi << EOF +$input +EOF +` + cmdline="${locale:+LC_ALL=$locale }$par $@" + if [ "$expected" = "$output" ]; then + pass_count=`expr $pass_count + 1` + echo "passed: $cmdline" + else + fail_count=`expr $fail_count + 1` + echo " +FAILED: $cmdline +input { +$input +} +expected { +$expected +} +output { +$output +} +" + fi +} + + +# From the Examples section of par.doc: + +input=`cat << 'EOF' + We the people of the United States, + in order to form a more perfect union, + establish justice, + insure domestic tranquility, + provide for the common defense, + promote the general welfare, + and secure the blessing of liberty + to ourselves and our posterity, + do ordain and establish the Constitution + of the United States of America. +EOF +` +args=39 +expected=`cat << 'EOF' + We the people of the United + States, in order to form a + more perfect union, establish + justice, insure domestic + tranquility, provide for the + common defense, promote the + general welfare, and secure + the blessing of liberty to + ourselves and our posterity, + do ordain and establish the + Constitution of the United + States of America. +EOF +` +test_par $args + +input=`cat << 'EOF' + /* We the people of the United States, */ + /* in order to form a more perfect union, */ + /* establish justice, */ + /* insure domestic tranquility, */ + /* provide for the common defense, */ + /* promote the general welfare, */ + /* and secure the blessing of liberty */ + /* to ourselves and our posterity, */ + /* do ordain and establish the Constitution */ + /* of the United States of America. */ +EOF +` +args=59 +expected=`cat << 'EOF' + /* We the people of the United States, in */ + /* order to form a more perfect union, establish */ + /* justice, insure domestic tranquility, provide */ + /* for the common defense, promote the general */ + /* welfare, and secure the blessing of liberty */ + /* to ourselves and our posterity, do ordain */ + /* and establish the Constitution of the United */ + /* States of America. */ +EOF +` +test_par $args + +args=59f +expected=`cat << 'EOF' + /* We the people of the United States, */ + /* in order to form a more perfect union, */ + /* establish justice, insure domestic */ + /* tranquility, provide for the common */ + /* defense, promote the general welfare, */ + /* and secure the blessing of liberty to */ + /* ourselves and our posterity, do ordain */ + /* and establish the Constitution of the */ + /* United States of America. */ +EOF +` +test_par $args + +args=59l +expected=`cat << 'EOF' + /* We the people of the United States, in */ + /* order to form a more perfect union, establish */ + /* justice, insure domestic tranquility, */ + /* provide for the common defense, promote */ + /* the general welfare, and secure the */ + /* blessing of liberty to ourselves and our */ + /* posterity, do ordain and establish the */ + /* Constitution of the United States of America. */ +EOF +` +test_par $args + +args=59lf +expected=`cat << 'EOF' + /* We the people of the United States, */ + /* in order to form a more perfect union, */ + /* establish justice, insure domestic */ + /* tranquility, provide for the common */ + /* defense, promote the general welfare, */ + /* and secure the blessing of liberty */ + /* to ourselves and our posterity, do */ + /* ordain and establish the Constitution */ + /* of the United States of America. */ +EOF +` +test_par $args + +args=59lft0 +expected=`cat << 'EOF' + /* We the people of the United States, */ + /* in order to form a more perfect union, */ + /* establish justice, insure domestic */ + /* tranquility, provide for the common */ + /* defense, promote the general welfare, */ + /* and secure the blessing of liberty */ + /* to ourselves and our posterity, do */ + /* ordain and establish the Constitution */ + /* of the United States of America. */ +EOF +` +test_par $args + +args=59j +expected=`cat << 'EOF' + /* We the people of the United States, in */ + /* order to form a more perfect union, establish */ + /* justice, insure domestic tranquility, provide */ + /* for the common defense, promote the general */ + /* welfare, and secure the blessing of liberty */ + /* to ourselves and our posterity, do ordain and */ + /* establish the Constitution of the United */ + /* States of America. */ +EOF +` +test_par $args + +args=59jl +expected=`cat << 'EOF' + /* We the people of the United States, */ + /* in order to form a more perfect */ + /* union, establish justice, insure domestic */ + /* tranquility, provide for the common defense, */ + /* promote the general welfare, and secure */ + /* the blessing of liberty to ourselves and */ + /* our posterity, do ordain and establish the */ + /* Constitution of the United States of America. */ +EOF +` +test_par $args + +input=`cat << 'EOF' + Preamble We the people of the United States, + to the US in order to form + Constitution a more perfect union, + establish justice, + insure domestic tranquility, + provide for the common defense, + promote the general welfare, + and secure the blessing of liberty + to ourselves and our posterity, + do ordain and establish + the Constitution + of the United States of America. +EOF +` +args=52h3 +expected=`cat << 'EOF' + Preamble We the people of the United + to the US States, in order to form a + Constitution more perfect union, establish + justice, insure domestic + tranquility, provide for the + common defense, promote the + general welfare, and secure + the blessing of liberty to + ourselves and our posterity, + do ordain and establish the + Constitution of the United + States of America. +EOF +` +test_par $args + +input=`cat << 'EOF' + 1 We the people of the United States, + 2 in order to form a more perfect union, + 3 establish justice, + 4 insure domestic tranquility, + 5 provide for the common defense, + 6 promote the general welfare, + 7 and secure the blessing of liberty + 8 to ourselves and our posterity, + 9 do ordain and establish the Constitution + 10 of the United States of America. +EOF +` +args=59p12l +expected=`cat << 'EOF' + 1 We the people of the United States, in order to + 2 form a more perfect union, establish justice, + 3 insure domestic tranquility, provide for the + 4 common defense, promote the general welfare, + 5 and secure the blessing of liberty to ourselves + 6 and our posterity, do ordain and establish the + 7 Constitution of the United States of America. +EOF +` +test_par $args + +input=`cat << 'EOF' + > > We the people + > > of the United States, + > > in order to form a more perfect union, + > > establish justice, + > > ensure domestic tranquility, + > > provide for the common defense, + > + > Promote the general welfare, + > and secure the blessing of liberty + > to ourselves and our posterity, + > do ordain and establish + > the Constitution of the United States of America. +EOF +` +args=52 +expected=`cat << 'EOF' + > > We the people of the United States, in + > > order to form a more perfect union, + > > establish justice, ensure domestic + > > tranquility, provide for the common + > > defense, + > + > Promote the general welfare, and secure + > the blessing of liberty to ourselves and + > our posterity, do ordain and establish + > the Constitution of the United States of + > America. +EOF +` +test_par $args + +input=`cat << 'EOF' + > We the people + > of the United States, + > in order to form a more perfect union, + > establish justice, + > ensure domestic tranquility, + > provide for the common defense, + > Promote the general welfare, + > and secure the blessing of liberty + > to ourselves and our posterity, + > do ordain and establish + > the Constitution of the United States of America. +EOF +` +args=52d +expected=`cat << 'EOF' + > We the people of the United States, + > in order to form a more perfect union, + > establish justice, ensure domestic + > tranquility, provide for the common + > defense, + > Promote the general welfare, and secure + > the blessing of liberty to ourselves and + > our posterity, do ordain and establish + > the Constitution of the United States of + > America. +EOF +` +test_par $args + +input=`cat << 'EOF' + # 1. We the people of the United States. + # 2. In order to form a more perfect union. + # 3. Establish justice, ensure domestic + # tranquility. + # 4. Provide for the common defense + # 5. Promote the general welfare. + # 6. And secure the blessing of liberty + # to ourselves and our posterity. + # 7. Do ordain and establish the Constitution. + # 8. Of the United States of America. +EOF +` +args=37p13dh +expected=`cat << 'EOF' + # 1. We the people of the + # United States. + # 2. In order to form a more + # perfect union. + # 3. Establish justice, + # ensure domestic + # tranquility. + # 4. Provide for the common + # defense + # 5. Promote the general + # welfare. + # 6. And secure the blessing + # of liberty to ourselves + # and our posterity. + # 7. Do ordain and establish + # the Constitution. + # 8. Of the United States of + # America. +EOF +` +test_par $args + +input=`cat << 'EOF' + /*****************************************/ + /* We the people of the United States, */ + /* in order to form a more perfect union, */ + /* establish justice, insure domestic */ + /* tranquility, */ + /* */ + /* */ + /* [ provide for the common defense, ] */ + /* [ promote the general welfare, ] */ + /* [ and secure the blessing of liberty ] */ + /* [ to ourselves and our posterity, ] */ + /* [ ] */ + /* */ + /* do ordain and establish the Constitution */ + /* of the United States of America. */ + /******************************************/ +EOF +` +args=42r +expected=`cat << 'EOF' + /********************************/ + /* We the people of the */ + /* United States, in order to */ + /* form a more perfect union, */ + /* establish justice, insure */ + /* domestic tranquility, */ + /* */ + /* */ + /* [ provide for the common ] */ + /* [ defense, promote the ] */ + /* [ general welfare, and ] */ + /* [ secure the blessing of ] */ + /* [ liberty to ourselves ] */ + /* [ and our posterity, ] */ + /* [ ] */ + /* */ + /* do ordain and establish the */ + /* Constitution of the United */ + /* States of America. */ + /********************************/ +EOF +` +test_par $args + +args=42re +expected=`cat << 'EOF' + /********************************/ + /* We the people of the */ + /* United States, in order to */ + /* form a more perfect union, */ + /* establish justice, insure */ + /* domestic tranquility, */ + /* */ + /* [ provide for the common ] */ + /* [ defense, promote the ] */ + /* [ general welfare, and ] */ + /* [ secure the blessing of ] */ + /* [ liberty to ourselves ] */ + /* [ and our posterity, ] */ + /* */ + /* do ordain and establish the */ + /* Constitution of the United */ + /* States of America. */ + /********************************/ +EOF +` +test_par $args + +input=`cat << 'EOF' + Joe Public writes: + > Jane Doe writes: + > > + > > + > > I can't find the source for uncompress. + > Oh no, not again!!! + > + > + > Isn't there a FAQ for this? + > + > + That wasn't very helpful, Joe. Jane, + just make a link from uncompress to compress. +EOF +` +args=40q +expected=`cat << 'EOF' + Joe Public writes: + + > Jane Doe writes: + > + > + > > I can't find the source for + > > uncompress. + > + > Oh no, not again!!! + > + > + > Isn't there a FAQ for this? + > + + That wasn't very helpful, Joe. + Jane, just make a link from + uncompress to compress. +EOF +` +test_par $args + +args=40qe +expected=`cat << 'EOF' + Joe Public writes: + + > Jane Doe writes: + > + > > I can't find the source for + > > uncompress. + > + > Oh no, not again!!! + > + > Isn't there a FAQ for this? + + That wasn't very helpful, Joe. + Jane, just make a link from + uncompress to compress. +EOF +` +test_par $args + +args=40qi +expected=`cat << 'EOF' + Joe Public writes: + > Jane Doe writes: + > > + > > + > > I can't find the source for + > > uncompress. + > Oh no, not again!!! + > + > + > Isn't there a FAQ for this? + > + > + That wasn't very helpful, Joe. + Jane, just make a link from + uncompress to compress. +EOF +` +test_par $args + +args=40qie +expected=`cat << 'EOF' + Joe Public writes: + > Jane Doe writes: + > > I can't find the source for + > > uncompress. + > Oh no, not again!!! + > + > Isn't there a FAQ for this? + That wasn't very helpful, Joe. + Jane, just make a link from + uncompress to compress. +EOF +` +test_par $args + +input=`cat << 'EOF' + I sure hope there's still room + in Dr. Jones' section of archaeology. + I've heard he's the bestest. [sic] +EOF +` +args=50g +expected=`cat << 'EOF' + I sure hope there's still room in + Dr. Jones' section of archaeology. I've + heard he's the bestest. [sic] +EOF +` +test_par $args + +args=50gc +expected=`cat << 'EOF' + I sure hope there's still room in + Dr. Jones' section of archaeology. I've + heard he's the bestest. [sic] +EOF +` +test_par $args + +input=`cat << 'EOF' + John writes: + : Mary writes: + : + Anastasia writes: + : + > Hi all! + : + Hi Ana! + : Hi Ana & Mary! + Please unsubscribe me from alt.hello. +EOF +` +args='Q+:+ q' +expected=`cat << 'EOF' + John writes: + + : Mary writes: + : + : + Anastasia writes: + : + + : + > Hi all! + : + + : + Hi Ana! + : + : Hi Ana & Mary! + + Please unsubscribe me from alt.hello. +EOF +` +test_par $args + +input=`cat << 'EOF' + amc> The b option was added primarily to deal with + amc> this new style of quotation + amc> which became popular after Par 1.41 was released. + amc> + amc> Par still pays attention to body characters. + amc> Par should not mistake "Par" for part of the prefix. + amc> Par should not mistake "." for a suffix. +EOF +` +args='B=._A_a 50bg' +expected=`cat << 'EOF' + amc> The b option was added primarily to + amc> deal with this new style of quotation + amc> which became popular after Par 1.41 + amc> was released. + amc> + amc> Par still pays attention to body + amc> characters. Par should not mistake + amc> "Par" for part of the prefix. Par + amc> should not mistake "." for a suffix. +EOF +` +test_par $args + + +# Tests for new features in 1.53.0: + +tmpdir=/tmp/test-par-$$ +export LOCPATH=$tmpdir/locale +mkdir -p $LOCPATH/en_US +mkdir $LOCPATH/he_IL +localedef -f ISO-8859-1 -i en_US $LOCPATH/en_US +localedef -f ISO-8859-8 -i he_IL $LOCPATH/he_IL + +# ISO-8859-1 capital letters A and AE, small letters A and AE +input=`printf 'A\306a\346'` +# +args= +locale=C +expected=$input +test_par $args +locale=en_US +expected=$input +test_par $args +# +args=W=_A +locale=C +expected=`printf ' \306a\346'` +test_par $args +locale=en_US +expected=`printf ' a\346'` +test_par $args +# +args=W=_a +locale=C +expected=`printf 'A\306 \346'` +test_par $args +locale=en_US +expected=`printf 'A\306'` +test_par $args + +# ISO-8859-1 no-break spaces and vertical tabs +input=`printf 'X\240\240\240Y\v\v\vZ'` +# +args= +locale=C +expected=`printf 'X\240\240\240Y Z'` +test_par $args +locale=en_US +test_par $args +# +args=W=_S +locale=C +expected=`printf 'X\240\240\240Y Z'` +test_par $args +# +args=W= +locale=en_US +expected=$input +test_par $args +# +# The behavior of isspace() on non-ASCII characters (like no-break +# space) in locales other than C is not standardized, and has been +# observed to vary, so we won't test that. If you want to refer to +# no-break space, the surest way is with _x. +args=W+_xA0 +locale=en_US +expected='X Y Z' +test_par $args + +# ISO-8859-8 letter alef +input=`printf '.\n\340'` +# +args=P=_A_a +locale=he_IL +expected=`printf '. \340'` +test_par $args +# +args=P=_@ +expected=`printf '.\n\340'` +test_par $args + +unset LOCPATH locale + +input=`cat << 'EOF' +> one +>> two +>>> three +>>>> four +>>>>> five +EOF +` +args='Q=> qr' +expected=`cat << 'EOF' +> one +> +>> two +>> +>>> three +>>> +>>>> four +>>>> +>>>>> five +EOF +` +test_par $args + +input=`cat << 'EOF' +One. +Two: +Three. +EOF +` +args=g +expected=`cat << 'EOF' +One. Two: Three. +EOF +` +test_par $args +# +args='g Z-:' +expected=`cat << 'EOF' +One. Two: Three. +EOF +` +test_par $args + + +rm -rf $tmpdir +echo +echo "$pass_count passed" +echo "$fail_count failed" +[ 0 = $fail_count ] |