/*
 * extract [-a] [-m] [-c] [-s] [-g] [-dx] [-rx] [-ix] [-v argument] -debug
	[-w] partitionfile [-we] [-t] [-lc] [-lp] [-p pattern] ... [-f patfile] ... dfile ...
 *
 * extract is a program to select data records from 1 or more data files
 * and combine selected parts of records with other characters to generate
 * new records.  As such it can be used for purposes of information retrieval,
 * report generation, record rearrangement, and can be readily combined with
 * other programs such as sed, labels, wc, sort, uniq, pr, etc. to generate reports.
 *
 * by David E. Miran    Wisconsin State Hygiene Lab
 * 7/1/83
 *
 */

#include	<stdio.h>
#include	"vmyio.h"

/* -debug is to produce runtime trace of activity
 * -t is to produce trace of pattern analysis and listing of pattern tables
 */

#define		NREC	300	/* Maximum number of record selection patterns */
#define		NTEXT	1000	/* maximum number of field or text selection patterns */
#define		CSIZE	7000	/* size of character storage space for patterns which need it */
#define		MAXSEG	200	/* maximum number of segments */
#define		FNAME	20	/* max size of file names for -w mode names */
#define		MAXOPN	10	/* max number of simultaneous -w mode files open */
#define		MAXFLD	300	/* maximum number of fields in a record */
#define		MAXREC	2400	/* maximum size of a record */
#define		LN	0
#define		LS	1
#define		LZ	2
#define		RS	3
#define		RZ	4
#define		DT	5
#define		LEFT	10
#define		RIGHT	20
/* field alignment left or right, 0 or space (Z/S) padding */

struct	rform	{	/* record selection information */
		char	rcode;
		char	rmode;
		int	rfld;
		int	rseg;
		long	loc1;
		long 	loc2;
		};

struct	tform	{	/* field/text specification information */
		char	tcode;
		char	tmode;
		int	tnum;
		int	tfld;
		int	fsize;
		int	tseg;
		};

struct	rform	rsel[NREC];	/* record selection */
struct	tform	tsel[NTEXT];	/* field/text selection */
long	ecnt[MAXSEG];		/* counts of selected records */
int	eploc[MAXSEG];		/* loc of text */
int	vcnt	0;	/* count of internal variables */
int	vloc[10];	/* loc of internal variables */
int	vlen[10];	/* size of internal variables */
char	cspace[CSIZE];	/* space for patterns, fill text, etc. */

/* -w mode (partition) variables */
struct	part {	/* partition file table */
		int	pfid;	/* open file fid, -1 = closed */
		int	*pbuf;	/* pointer to buffer if file open */
		int	sloc;	/* index into LRU allocation table */
		char	fname[FNAME];	/* partition file name */
		} pt[MAXSEG];

struct	fbuf	w[MAXOPN];	/* buffers for -w mode output */
int	maxpart	0;	/* actual number of partition files */
int	curopn	0;	/* number currently open */
int	wopt	0;	/* set if -w option is selected */
int	weopt	0;	/* set if -we option is selected */
int	pos[MAXOPN];	/* segment number - part of LRU table */
int	cnt[MAXOPN];	/* LRU counter */
/* LRU allocation scheme works as follows:
	If a slot is free allocate it.
	If no slot is free take the one with the highest count,
		write it out, close the file and then re-allocate it.
	Each time a file is written its count is set to 1 and all other
		counts on open files are incremented by one.
	The details of the scheme are in the internal routine xfile
*/
		char	fname[FNAME];	/* partition file name */

int	mflag, tflag, curseg, topseg;
int	debflg	0;	/* set for debugging mode */

char *progname;
int	fid;
struct	fbuf	f1,f2;	/* i/o buffers */
int maxr, maxt,maxc;
char	iline[256];
char	*patfile;
int	bflag	0;
char	rbuf[MAXREC];	/* space for 1 full record */
int	rsize;
int	nfld;
int	fpos[MAXFLD];	/* pointers to start of each field */
int	flen[MAXFLD];	/* length of each field */
int	fptr, fcnt;

char	fldmrk	'\'';
char	recmrk	'!';
int	curcol;
char	datsep	'/';
int	glbflg	0;	/* set if next selection rule is global */
int	sel	0;	/* set if this record has been selected by any segments */

long datecv();
long atol();

main(argc, argv)
int argc;
char **argv;
{
register char c;
register int i;
int j;

	progname = *argv;
	maxr = maxt = maxc = 0;
	curseg = topseg = 0;
	mflag = tflag = 0;
	for (i=0; i<MAXSEG; i++) {
		eploc[i] = -1;
		ecnt[i] = 0L;
	}
	for (i=0; i<10; i++) vloc[i] = vlen[i] = 0;
	while (--argc) {
		argv++;
		if (**argv != '-') break;
		if (strcmp(*argv, "-a") == 0) {
			apat();
			continue;
		}
		if (strcmp(*argv, "-s") == 0) {
			spat();
			continue;
		}
		if (strcmp(*argv, "-m") == 0) {
			mflag = 1;
			fldmrk = ':';
			recmrk = '\n';
			continue;
		}
		if (strcmp(*argv, "-lc") == 0) {  /* standard -c format labels */
			mflag = 2;
			fldmrk = '\n';
			recmrk = '!';
			continue;
		}
		if (strcmp(*argv, "-lp") == 0) {  /* pdp-12 (p format) labels */
			mflag = 3;
			fldmrk = '\\';
			recmrk = '\n';
			continue;
		}
		if (strcmp(*argv, "-c") == 0) {
			cpat();
			continue;
		}
		if (strncmp(*argv, "-r", 2) == 0) {
			recmrk = (*argv)[2];
			continue;
		}
		if (strcmp(*argv,"-debug") == 0) {
			debflg = 1;
			continue;
		}
		if (strncmp(*argv, "-d", 2) == 0) {
			datsep = (*argv)[2];
			continue;
		}
		if (strncmp(*argv, "-i", 2) == 0) {
			fldmrk = (*argv)[2];
			continue;
		}
		if (strcmp(*argv, "-p") == 0) {
			argc--;
			argv++;
			i = 0;
			while (c = argv[0][i]) iline[i++] = c;
			iline[i++] = '\0';
			dopat(i);
			continue;
		}
		if (strcmp(*argv, "-f") == 0) {
			argc--;
			argv++;
			patfile = *argv;
			fid = open(patfile, 0);
			if (fid < 0) {
				fprintf(stderr,"%s:  cannot open pattern file %s\n",progname, patfile);
				exit(1);
				}
			mfinit(&f1, fid);
			while (i = getl()) dopat(i);
			close(fid);
			continue;
		}
		if (strcmp(*argv, "-w") == 0) {
			argc--;
			argv++;
			patfile = *argv;
			fid = open(patfile, 0);
			if (fid < 0) {
				fprintf(stderr,"%s:  cannot open partition file %s\n",progname, patfile);
				exit(1);
				}
			mfinit(&f1, fid);
			while (i = getl()) {
				if (i > FNAME) {
					fprintf(stderr,"%s: partition file name %s is too long - over %d characters\n",
						progname,iline,FNAME-1);
					exit(1);
				}
				strcpy(pt[maxpart].fname,iline);
				pt[maxpart].pfid = -1;
				maxpart++;
			}
			close(fid);
			wopt = 1;
			for (i=0; i<MAXOPN; i++) {
				pos[i] = -1;
				cnt[i] = 0;
			}
			curopn = 0;
			continue;
		}
		if (strcmp(*argv, "-we") == 0) {
			weopt = 1;
			continue;
		}
		if (strcmp(*argv, "-v") == 0) {
			argc--;
			argv++;
			vloc[vcnt] = maxc;
			strcpy(&cspace[maxc], *argv);
			i = strlen(*argv);
			vlen[vcnt] = i;
			maxc += i+1;
			vcnt++;
			continue;
		}
		if (strcmp(*argv, "-t") == 0) {
			tflag = 1;
			continue;
		}
		if (strcmp(*argv, "-g") == 0) {
			glbflg = 1;
			continue;
		}
		if (strcmp(*argv, "-b") == 0) {
			bflag = 1;
			continue;
		}
		fprintf(stderr, "%s:  invalid option %s\n",progname,*argv);
		exit(1);
	}

/* done processing options and reading patterns - process data files */

	if (!maxr) {
		fprintf(stderr,"no record selection specified\n");
		exit(1);
	}
	if (!maxt) {
		fprintf(stderr,"no output generation specified\n");
		exit(1);
	}
	mfinit(&f2,1);  /* std output */
	topseg = curseg;
	if (tflag) diagprnt();
	if (argc < 1) {
		fid = 0;  /* stdin */
		mfinit(&f1, fid);
		goto dofile;
	}
filoop:
	if ((fid = open(*argv, 0)) < 0) {
		fprintf(stderr, "%s:  cannot open file %s\n", progname, *argv);
		exit(1);
	}
	mfinit(&f1, fid);
dofile:
	extract();
	close(fid);
	argc--;
	argv++;
	if (argc > 0) goto filoop;
/* print out accumulated counts if any 'e' patterns were specified */
	for (i=0; i<= topseg; i++) {
		if (eploc[i] < 0) continue;
		if (bflag)
			sprintf(iline,"%s%8D\n",&cspace[eploc[i]],ecnt[i]);
		else
			sprintf(iline,"%8D%s\n",ecnt[i],&cspace[eploc[i]]);
		j = strlen(iline);
		if (weopt) { /* enumerate counts go into partition files */
			curseg = i;
			xputrec(iline,j,&f2);
		} else
			mputrec(iline,j,&f2);
	}
	mflush(&f2);
	if (wopt) {	/* flush buffers */
		for (i=0; i<MAXOPN; i++) {
			if (pos[i] < 0) continue;
			mfclose(&w[i]);
		}
	}
	exit(0);
}

/* dopat - process pattern lines */

dopat(i)
int i;
{
register char c;
	if (tflag) printf("pattern: length=%d line=%s\n",i,iline);
	c = iline[0];
	switch(c) {
		case '[':  break;  /* comment */
		case 'R':
		case 'r':
			rpat();
			break;
		case 'A':
		case 'a':
			apat();
			break;
		case 'C':
		case 'c':
			cpat();
			break;
		case 'T':
		case 't':
			tpat(i);
			break;
		case 'F':
		case 'f':
			fpat();
			break;
		case 'N':
		case 'n':
			npat();
			break;
		case 'S':
		case 's':
			spat();
			break;
		case 'E':
		case 'e':
			epat(i);
			break;
		case 'G':
		case 'g':
			glbflg = 1;
			break;
		default:
			error();
	}
}

/* spat - increment segment number */
spat()
{
	curseg++;
	if (curseg >= MAXSEG) {
		fprintf(stderr,"%s:  Too many segments - over %d\n",progname,MAXSEG);
		exit(1);
	}
}
/* apat - selection mode for this segment is all (code L) */
apat()
{
	makr('L',' ',0,0L,0L);
}
/* cpat - generation mode for this segment is copy (code C) */
cpat()
{
	makt('C',' ',0,0,0);
}

/* rpat - analyze an R pattern line (record selection) */
/* following R pattern codes are used: A, B, D, E, F, G, M, N, O, S, U, V, X */

rpat()
{
register int i;
register char c;
int fld;
char mode;
long l1, l2;
int flg;

	flg = 0;
	mode = iline[1];
	if ((mode != 'o') && (mode != 'a')) error();
	c = iline[2];	/* selection type */
	switch (c) {
		case 'E':
		case 'e':	/* selection based on field not empty*/
			fld = atoi(&iline[3]);
			makr('E', mode, fld, 0L, 0L);
			break;
		case 'X':
		case 'x':	/* selection based on field empty*/
			fld = atoi(&iline[3]);
			makr('X', mode, fld, 0L, 0L);
			break;
		case 'G':	/* sliding match fails */
		case 'g':
			flg = 3;
			goto do_m;
		case 'F':	/* fail to exact match */
		case 'f':
			flg = 2;
			goto do_m;
		case 'S':	/* sliding exact match of partial field */
		case 's':
			flg = 1;
		case 'M':	/* selection based on field exactly */
		case 'm':	/* matching a character pattern */
do_m:
			fld = atoi(&iline[3]);
			i = 3;
mlp:			if (iline[i] == ',') goto mlpd;
			if (iline[i] == '\0') error();
			i++;
			goto mlp;
mlpd:
			l1 = maxc;
			l2 = 0;
			i++;
mlp2:
			c = iline[i++];
			if (c == '\0') goto mdone;
			if (c == ',') goto mdone;
			cspace[maxc++] = c;
			l2++;
			goto mlp2;
mdone:
			cspace[maxc++] = '\0';
			switch (flg)  {
				case 0:
					makr('M', mode, fld, l1, l2);
					break;
				case 1:
					makr('S', mode, fld, l1, l2);
					break;
				case 2:
					makr('F', mode, fld, l1, l2);
					break;
				case 3:
					makr('G', mode, fld, l1, l2);
			}
			break;
		case 'V':	/* numeric outside range check */
		case 'v':
			flg = 1;
		case 'N':	/* numeric range check of a field */
		case 'n':
			fld = atoi(&iline[3]);
			i = 3;
nlp1:
			if (iline[i] == ',') goto nlp1d;
			if (iline[i] == '\0') error();
			i++;
			goto nlp1;
nlp1d:
			i++;
			l1 = atol(&iline[i]);
nlp2:
			if (iline[i] == ',') goto nlp2d;
			if (iline[i] == '\0') error();
			i++;
			goto nlp2;
nlp2d:
			i++;
			l2 = atol(&iline[i]);
			switch (flg) {
				case 0:
					makr('N', mode, fld, l1, l2);
					break;
				case 1:
					makr('V', mode, fld, l1, l2);
			}
			break;
		case 'O':	/* outside lexicographic range */
		case 'o':
			flg = 1;
		case 'A':	/* alphabetic (lexicographic) range check */
		case 'a':
			fld = atoi(&iline[3]);
			i = 3;
			l1 = maxc;
alp1:
			if (iline[i] == ',') goto alp1d;
			if (iline[i] == '\0') error();
			i++;
			goto alp1;
alp1d:
			i++;
alp2:
			c = iline[i++];
			if (c == ',') goto alp2d;
			if (c == '\0') error();
			cspace[maxc++] = c;
			goto alp2;
alp2d:
			cspace[maxc++] = '\0';
			l2 = maxc;
alp3:
			c = iline[i++];
			if (c == ',') goto alp3d;
			if (c == '\0') goto alp3d;
			cspace[maxc++] = c;
			goto alp3;
alp3d:
			cspace[maxc++] = '\0';
			switch (flg) {
				case 0:
					makr('A', mode, fld, l1, l2);
					break;
				case 1:
					makr('O', mode, fld, l1, l2);
			}
			break;
		case 'B':	/* beyond (outside) date range */
		case 'b':
			flg = 1;
		case 'D':	/* date range checking */
		case 'd':
			fld = atoi(&iline[3]);
			i = 3;
dlp1:
			if (iline[i] == ',') goto dlp1d;
			if (iline[i] == '\0') error();
			i++;
			goto dlp1;
dlp1d:
			i++;
			l1 = datecv(i, ',');
			i++;
dlp2:
			if (iline[i] == ',') goto dlp2d;
			if (iline[i] == '\0') error();
			i++;
			goto dlp2;
dlp2d:
			i++;
			l2 = datecv(i, ',');
			if (flg == 1)
				makr('B', mode, fld, l1, l2);
			else
				makr('D', mode, fld, l1, l2);
			break;
		case 'U':	/* record not selected in previous segment */
		case 'u':
				makr('U',mode,0,0L,0L);
				break;
		default:
			error();
	}
}

/* tpat - analyze a t (text) pattern */

tpat(lsize)
int lsize;
{
register char c, brk;
register int i;
int st, size, fld;
	brk = iline[1];
/* tform - tfld is conditional field number, num is start pos, fsize is size of text */
	st = maxc;
	i = 2;
	size = 0;
	while ((c = iline[i++]) != brk) {
		cspace[maxc++] = c;
		size++;
		if (size > lsize) error();
	}
	cspace[maxc++] = '\0';
	if (iline[i] == '\0')  {  /* unconditional */
		makt('T','u',st,0,size);
		return;
	}
	c = iline[i];
	if ((c != 'c') && (c != 'x')) error();
	i++;
	fld = atoi(&iline[i]);
	makt('T',c,st,fld,size);
	return;
}

/* epat - analyze an e (enumerate - i.e. count) pattern */

epat(lsize)
int lsize;
{
register char c, brk;
register int i;
int st, size, fld;
	brk = iline[1];
	st = maxc;
	i = 2;
	size = 0;
	while ((c = iline[i++]) != brk) {
		cspace[maxc++] = c;
		size++;
		if (size > lsize) error();
	}
	cspace[maxc++] = '\0';
	eploc[curseg] = st;
	ecnt[curseg] = 0L;
	makt('E',' ',st, 0, size);
	return;
}

/* fpat - analyze a field selection pattern */

fpat()
{
register char c, mode;
register int i;
int fld, size, md;
char tr[3];
	mode = ' ';
	size = 0;
	md = LN;
	i = 1;
	if (iline[i] == 'c') {
		mode = 'c';
		i++;
	}
	fld = atoi(&iline[i]);
flp:
	if (iline[i] == ',') goto fm;
	if (iline[i] == '\0') goto fdone;
	i++;
	goto flp;
fm:
	i++;
	if ((iline[i] == ',') || (iline[i] == '\0')) goto fdone;
	tr[0] = iline[i];
	tr[1] = iline[i+1];
	tr[2] = '\0';
	if (strcmp(tr, "LN") == 0) { md = LN;  goto fm2;}
	if (strcmp(tr, "ln") == 0) { md = LN;  goto fm2;}
	if (strcmp(tr, "RS") == 0) { md = RS;  goto fm2;}
	if (strcmp(tr, "rs") == 0) { md = RS;  goto fm2;}
	if (strcmp(tr, "RZ") == 0) { md = RZ;  goto fm2;}
	if (strcmp(tr, "rz") == 0) { md = RZ;  goto fm2;}
	if (strcmp(tr, "LS") == 0) { md = LS;  goto fm2;}
	if (strcmp(tr, "ls") == 0) { md = LS;  goto fm2;}
	if (strcmp(tr, "LZ") == 0) { md = LZ;  goto fm2;}
	if (strcmp(tr, "lz") == 0) { md = LZ;  goto fm2;}
	if (strcmp(tr, "dt") == 0) { md = DT;  goto fm2;}
	if (strcmp(tr, "DT") == 0) { md = DT;  goto fm2;}
	error();
fm2:
	i += 2;
	if (iline[i] == '\0') goto fdone;
	i++;
	size = atoi(&iline[i]);
fdone:
	if ((size == 0) && (md != LN)) error();
	makt('F',mode,md,fld,size);
	return;
}

/* npat - conditional newline */

npat()
{
int i;
	i = atoi(&iline[1]);
	makt('N',' ',i,0,0);
}

/* datecv - convert [m]mddyy date to a long integer */

long datecv(i, brk)
int i;
char brk;
{
long l1;
	while ((iline[i] != brk) && (iline[i] != '\0')) i++;
/* computation for date is day + month*100 + year*10000 */
	l1 = (iline[i-1] - '0') * 10000L;  /* year least digit */
	l1 += (iline[i-2] -'0') * 100000L;  /* year most sig digit */
	l1 += (iline[i-3] - '0') ;  /* day least sig digit */
	l1 += (iline[i-4] - '0') * 10L;	/* day most sig digit */
	l1 += (iline[i-5] - '0') * 100L;  /* month least sig digit */
	if (iline[i-6] != brk)
		l1 += (iline[i-6] - '0') * 1000L;  /* month most sig digit */
	return(l1);
}

makr(c, m, f, la, lb)
char c, m;
int f;
long la, lb;
{
	if (maxr >= NREC) {
		fprintf(stderr,"%s: too many record selection patterns -(over %d)\n",progname,NREC);
		exit(1);
	}
	rsel[maxr].rcode = c;
	rsel[maxr].rmode = m;
	rsel[maxr].rfld = f;
	rsel[maxr].loc1 = la;
	rsel[maxr].loc2 = lb;
	if (glbflg != 0)
		rsel[maxr].rseg = -1;
	else
		rsel[maxr].rseg = curseg;
	maxr++;
	glbflg = 0;
}

/* makt - build a tform (record construction) entry */

makt(c,m,n,f,s)
char c,m;
int n,f,s;
{
	if (maxt >= NTEXT) {
		fprintf(stderr,"%s:  too many field/text specifications (over %d)\n",progname,NTEXT);
		exit(1);
	}
	tsel[maxt].tcode = c;
	tsel[maxt].tmode = m;
	tsel[maxt].tnum = n;
	tsel[maxt].tfld = f;
	tsel[maxt].fsize = s;
	tsel[maxt].tseg = curseg;
	maxt++;
}

error()
{
	fprintf(stderr,"%s:  following pattern line is invalid.\n%s\n",progname,iline);
	exit(1);
}

getl()
{
register int i;
register char c, b;
int j;
	i = 0;
	if (fid < 0) return(0);
lp:
	c = mget(&f1);
	if (f1.nrd < 1) return(0);
	if (c == '\n') {
		iline[i++] = '\0';
		return(i);
	}
	if (c == '$') { /* internal variable interpretation */
		b = mget(&f1);
		j = (b &077) - 060;
		strncpy(&iline[i],&cspace[vloc[j]], vlen[j]);
		i += vlen[j];
		goto lp;
	}
	if (c == '\\')
		c = mget(&f1);
	iline[i++] = c;
	goto lp;
}

/* extract - process one file of data */

extract()
{
	while (grec()) {
		if (debflg) printf("extract: size=%d, #fld=%d, record=\n     %s\n",rsize,nfld,rbuf);
		sel = 0;	/* record not yet selected */
		for (curseg = 0; curseg <= topseg; curseg++) {
			if (select()) {
				if (debflg) printf("extract: selected for segment %d\n",curseg);
				build();
				if (debflg) printf("extract: built\n");
			}
		}
	}
}

/* grec - read and analyze next record (ignoring del characters) */

grec()
{
register char c;
register int i, fi;
	fi = i = 0;
	flen[0] = 0;
glp:
	c = mget(&f1);
	if (f1.nrd < 1) return(0);
	if (c == 0177) goto glp;
	if (i >= 2400) {  /* record too large (or wrong type) */
		fprintf(stderr,"extract: record too long (or wrong mode?) - record starts\n%70.70s\n",rbuf);
		exit(1);
	}
	if (c == fldmrk) {
		fi++;
		rbuf[i++] = c;
		fpos[fi] = i;
		flen[fi] = 0;
		goto glp;
	}
	if (c == recmrk) {
		rbuf[i++] = c;
		if (recmrk != '\n')  rbuf[i++] = '\n';
		rsize = i;
		nfld = fi;
		if (mflag == 2) c = mget(&f1);
		return(1);
	}
	if (c == '\n') goto glp;
/* need code here to handle comments in label forms */
	rbuf[i++] = c;
	flen[fi]++;
	goto glp;
}

/* select - decide if a record is to be kept - return 1 for yes */

select()
{
int anyor, anyand, orcnt;
register int i, fi;
int j, k, l;
long tmp;

	orcnt = anyor = anyand = 0;
	for (i = 0; i<maxr; i++) {
		if ((rsel[i].rseg != curseg) && (rsel[i].rseg != -1)) continue;
		if (rsel[i].rcode == 'L') return(1);
		if (rsel[i].rmode == 'o') orcnt++;
		fi = rsel[i].rfld;
		if (fi < 0) fi = nfld + fi + 1;
		switch(rsel[i].rcode) {
			case 'E':  /* field existance */
				if (fi > nfld) goto fail;
				if (flen[fi] > 0) goto ckmode;
				goto fail;
			case 'M':  /* exact pattern match */
				if (fi > nfld) goto fail;
				if (flen[fi] != rsel[i].loc2) goto fail;
				if (strncmp(&rbuf[fpos[fi]], &cspace[rsel[i].loc1], flen[fi]) == 0) goto ckmode;
				goto fail;
			case 'F':  /* fail exact pattern match */
				if (fi > nfld) goto ckmode;
				if (flen[fi] != rsel[i].loc2) goto ckmode;
				if (strncmp(&rbuf[fpos[fi]], &cspace[rsel[i].loc1], flen[fi]) == 0) goto fail;
				goto ckmode;
			case 'S':  /* sliding pattern match */
				if (fi > nfld) goto fail;
				k = flen[fi];
				j = rsel[i].loc2;
				if (k < j) goto fail;
				strncpy(iline, &rbuf[fpos[fi]],k);
				iline[k] = '\0';
				for (l=0; l <= (k-j); l++) {
					if (strncmp(&iline[l], &cspace[rsel[i].loc1], j) == 0) goto ckmode;
				}
				goto fail;
			case 'G':  /* sliding pattern match fails */
				if (fi > nfld) goto ckmode;
				k = flen[fi];
				j = rsel[i].loc2;
				if (k < j) goto ckmode;
				strncpy(iline, &rbuf[fpos[fi]],k);
				iline[k] = '\0';
				for (l=0; l <= (k-j); l++) {
					if (strncmp(&iline[l], &cspace[rsel[i].loc1], j) == 0) goto fail;
				}
				goto ckmode;
			case 'N':
				if (fi > nfld) goto fail;
				tmp = atol(&rbuf[fpos[fi]]);
				if ((tmp < rsel[i].loc1) || (tmp > rsel[i].loc2)) goto fail;
				if (tmp == 0) {
					if (rbuf[fpos[fi]] != '0') goto fail;
				}
				goto ckmode;
			case 'V':
				if (fi > nfld) goto ckmode;
				tmp = atol(&rbuf[fpos[fi]]);
				if ((tmp < rsel[i].loc1) || (tmp > rsel[i].loc2)) goto ckmode;
				goto fail;
			case 'A':
				if (fi > nfld) goto fail;
				strncpy(iline,&rbuf[fpos[fi]],flen[fi]);
				iline[flen[fi]] = '\0';
				if (strcmp(iline,&cspace[rsel[i].loc1]) < 0) goto fail;
				if (strcmp(iline,&cspace[rsel[i].loc2]) > 0) goto fail;
				goto ckmode;
			case 'O':
				/* outside lexicographix range */
				if (fi > nfld) goto ckmode;
				strncpy(iline,&rbuf[fpos[fi]],flen[fi]);
				iline[flen[fi]] = '\0';
				if (strcmp(iline,&cspace[rsel[i].loc1]) < 0) goto ckmode;
				if (strcmp(iline,&cspace[rsel[i].loc2]) > 0) goto ckmode;
				goto fail;
			case 'D':
				/* within date range */
				if (fi > nfld) goto fail;
				iline[0] = fldmrk;
				strncpy(&iline[1],&rbuf[fpos[fi]],flen[fi]);
				iline[flen[fi]+1] = fldmrk;
				tmp = datecv(1, fldmrk);
				if (tmp < rsel[i].loc1) goto fail;
				if (tmp > rsel[i].loc2) goto fail;
				goto ckmode;
			case 'B':
				/* beyond (outside) date range */
				if (fi > nfld) goto ckmode;
				iline[0] = fldmrk;
				strncpy(&iline[1],&rbuf[fpos[fi]],flen[fi]);
				iline[flen[fi]+1] = fldmrk;
				tmp = datecv(1, fldmrk);
				if (tmp < rsel[i].loc1) goto ckmode;
				if (tmp > rsel[i].loc2) goto ckmode;
				goto fail;
			case 'X':
				if (fi > nfld) goto ckmode;
				if (flen[fi] == 0) goto ckmode;
				goto fail;
			case 'U':
				if (sel) goto fail;
				goto ckmode;
		}
ckmode:
		if (rsel[i].rmode == 'o') {
			anyor = 1;
			continue;
		}
		anyand = 1;
		continue;
fail:
		if (rsel[i].rmode == 'a') return(0);
	}
	if ((anyor == 0) && (anyand ==0)) return(0);
	if ((orcnt > 0) && (!anyor)) return(0);
	sel = 1;
	return(1);
}

/* build - compose the output record */

build()
{
register int i, fi;
int md, size;
char mode;
	curcol = 0;
	for (i = 0; i<maxt; i++) {
		if (tsel[i].tseg != curseg) continue;
		if (debflg) printf("build: i=%d,pattern is tcode=%d,tmode=%c,tfld=%d,num=%d,fsize=%d\n",
		i,tsel[i].tcode,tsel[i].tmode,tsel[i].tfld,tsel[i].tnum,tsel[i].fsize);
		switch(tsel[i].tcode) {
			case 'C':  /* copy mode */
				xputrec(rbuf,rsize,&f2);
				curcol = 0;
				break;
			case 'T':  /* text insertion */
				if (tsel[i].tmode == 'u') goto do_t;
				fi = tsel[i].tfld;
				if (fi < 0) fi = nfld + fi + 1;
				if (tsel[i].tmode == 'x') goto ck_no;
				if (fi > nfld) break;
				if (flen[fi] == 0) break;
				goto do_t;
ck_no:
				if (fi>nfld) goto do_t;
				if (flen[fi] != 0) break;
do_t:
				xputrec(&cspace[tsel[i].tnum], tsel[i].fsize, &f2);
				curcol += tsel[i].fsize;
				break;
			case 'N':  /* conditional newline insertion */
				if (curcol > tsel[i].tnum) {
					xput('\n',&f2);
					curcol = 0;
				}
				break;
			case 'E':  /* enumeration */
				ecnt[curseg]++;
				break;
			case 'F':  /* copy field contents */
				fi = tsel[i].tfld;
				if (fi < 0) fi = nfld + fi + 1;
				md = tsel[i].tnum;
				mode = tsel[i].tmode;
				size = tsel[i].fsize;
				if (mode == 'c') {
					if (fi > nfld) break;
					if (flen[fi] == 0) break;
				}
				if (fi > nfld) {
					flen[fi] = fpos[fi] = 0;
				}
				switch (md) {
					case LN:  /* no padding */
						xputrec(&rbuf[fpos[fi]],flen[fi], &f2);
						curcol += flen[fi];
						break;
					case LS:  /* ljsf */
						adj(' ',LEFT, size, &rbuf[fpos[fi]],flen[fi]);
						break;
					case LZ:  /* ljzf */
						adj('0',LEFT, size, &rbuf[fpos[fi]],flen[fi]);
						break;
					case RS:  /* rjsf */
						adj(' ',RIGHT, size, &rbuf[fpos[fi]],flen[fi]);
						break;
					case RZ:  /* rjzf */
						adj('0',RIGHT, size, &rbuf[fpos[fi]],flen[fi]);
						break;
					case DT:  /* date in form mm/dd/yy */
						if (flen[fi] == 0)
							adj(' ', RIGHT, size, &rbuf[fpos[fi]],flen[fi]);
						else
							datout(size,&rbuf[fpos[fi]],flen[fi]);
						break;
				}
		}
	}
	if (curcol > 0) xput('\n',&f2);
}

/* adj - generate output left or right justified, blank or zero filled */

adj(fill,side,size,string,stlen)
char fill,*string;
int side,size,stlen;
{
register int i;
int fs;
	fs = size - stlen;
	curcol += size;
	if (fs < 0) fs = 0;
	if (stlen >= size) goto filled;
	if (side == RIGHT) goto right;
	xputrec(string,stlen,&f2);
	for (i=0; i<fs; i++) xput(fill,&f2);
	return;
right:
	for (i=0; i<fs; i++) xput(fill,&f2);
filled:
	fs = stlen;
	if (fs > size) fs = size;
	xputrec(string, fs, &f2);
	return;
}

diagprnt()
{
register int i;
	for (i=0; i<maxr; i++)
		printf("rform= %c %c %5d %8D %8D%8d\n"
		,rsel[i].rcode,rsel[i].rmode,rsel[i].rfld,rsel[i].loc1,rsel[i].loc2,rsel[i].rseg);
	for (i=0; i<maxt; i++)
		printf("tform= %c %c %5d %5d %5d%8d\n"
		,tsel[i].tcode,tsel[i].tmode,tsel[i].tnum,tsel[i].tfld,tsel[i].fsize,tsel[i].tseg);
	write(1, cspace, maxc);
	write(1, "\n", 1);
}

/* datout - form a date into mm/dd/yy for output */

datout(size,string, stlen)
int size,stlen;
char *string;
{
register int i, j, k;
	i = 1;
	if (stlen == 5) {
		xput('0',&f2);
		i = 0;
		goto p2;
	}
	xput(string[0],&f2);
p2:
	xput(string[i++],&f2);
	for (j = 0; j<2; j++) {
		xput(datsep, &f2);
		for (k = 0; k<2; k++)
			xput(string[i++],&f2);
	}
}

/* following is the code to implement the partition option (-w).
 * xfile, which manages the buffer allocation is called by xput or xputrec
 * just before calling mput or mputrec.
 * This allows xfile to select the proper buffer for output.
 */

/* xfile - select the proper output file and allocate a buffer if necessary */

int *xfile()
{
register int i, j, k;
	if (!wopt) return(&f2); /* stdout */
	if (curseg >= maxpart) return(&f2);
	if (debflg) fprintf(stderr,"xfile: checking - curseg = %d\n",curseg);
	if (pt[curseg].pfid >= 0) {
		update();	/* update LRU table */
		return(pt[curseg].pbuf);
	}
	if (debflg) fprintf(stderr,"xfile: not open\n");
	if (curopn < MAXOPN) goto alloc;
/* we must select a file to close, after flushing its buffer.
 * then it can be allocated to this new use.
 */
	j = k = 0;
	for (i=0; i<MAXOPN; i++)
		if (cnt[i] > j) {
			j = cnt[i];
			k = i;		/* k is a less recently used (LRU) buffer */
		}
	mfclose(&w[k]);
	pt[pos[k]].pfid = -1;
	curopn--;
	if (debflg) fprintf(stderr,"xfile: closed k=%d to allow alloc\n",k);
	goto alloc2;
alloc:
	for (k=0; k<MAXOPN; k++)
		if (pos[k] == -1) break;	/* found empty slot */
alloc2:	/* allocate an available buffer */
	if (debflg) fprintf(stderr,"xfile: alloc2 - i,j,k= %d,  %d,  %d\n",i,j,k);
	pos[k] = curseg;
	if ((i = open(pt[curseg].fname,1)) < 0) { /* no such file */
		if ((i = creat(pt[curseg].fname,0664)) < 0) { /* and can't create */
			fprintf(stderr,"%s: Cannot open partitioned output file %s. Job killed.\n",progname,pt[curseg].fname);
			exit(1);
		}
	} else  /* file exists - seek to end of file */
		lseek(i, 0L, 2);
	pt[curseg].pfid = i;
	mfinit(&w[k],i);
	curopn++;
	if (debflg) fprintf(stderr,"xfile: allocated k=%d\n",k);
	pt[curseg].pbuf = &w[k];
	pt[curseg].sloc = k;
	update();
	return(pt[curseg].pbuf);
}
/* update - update buffer usage counters */
update()
{
register i, j;
	i = pt[curseg].sloc;
	for (j=0; j<MAXOPN; j++) {
		if (pos[j] < 0) continue;
		if (j == i) continue;
		cnt[j]++;
	}
	cnt[i] = 1;
}
xput(c,file)
char c;
struct fbuf *file;
{
struct fbuf *i;
	i = xfile();
	mput(c, i);
	if (debflg) mflush(i);
	return;
}
xputrec(rec, n,file)
char *rec;
int	n;
struct fbuf *file;
{
struct fbuf *i;
	i = xfile();
	mputrec(rec, n, i);
	if (debflg) mflush(i);
	return;
}
