/*
	copyright (c) 1982 by Steve Passe, all rights reserved.

	this code may be used/distributed subject to the following conditions:

	1: non-comercial use (i.e. you don't sell anything using it).
	2: credits and copyright notice remain in all distributions.

	contact author about commercial rights
*/

/*
	btree.c - created 5/1/82
*/

/*
	The second attempt at a btree.  This tree will be what is
refered to as a 'b+' tree, i.e. all keys are kept in leaves even if
they also occur somewhere in a branch.  This fact in conjunction with
the linked list storage format of leaves will allow for fast
sequential searches.  Furthermore the index (b+tree) file is kept
seperate from the actual data file.  Each key stored in the index has
an associated logical record pointer(s) to be used by the application
program to access the actual data record.

*/

/*
	Features suggested by potential users:

1.  Multiple trees open at same time (preferably 16, memory is limit
    here).

2.  Multiple occurances of same key in index (duplicate keys).

3.  Ability to search for first occurance of a (possibly duplicated)
    key (last key added), or alternately to search for the last
    occurance (first key added).  This implicitely says that a
    duplicate key will always be added to the head of the group of
    duplicate keys.

4.  Sequential scan capability in either last key to first key order
    (lifo), or first key to last key order (fifo).

5.  Multiple pointers associated with each key.

6.  Ability to construct a new btree from one or more existing btrees.

7.  Re-use of deleted nodes.

*/

/*
	functions:


bt_init(buffers, max_node)
unsigned buffers;
char max_node;
{
	setup btree access parameters. buffers specifies number
	of buffers open at once, max_node specifies size of
	largest node in (128 byte) sectors.

	returns OK or ERROR
}

bt_creat(bt_name, blocksize, nodesize, key_type, dup_flag)
char *bt_name;
char blocksize;
char nodesize;
char key_type;
char dup_flag;
{
	creat a new btree, defining the following:

	1. the name of the btree file
	2. the size of a physical sector on host disk (in 128 byte log. sectors)
	3. the size of a branch/leaf (in blocks, i.e. physical sectors)
	4: key type: string, int, unsigned, char (byte)
	5: usage of duplicate keys

	returns OK (null) if all goes well, ERROR otherwise.
}

struct bt_def *
bt_open(bt_name)
char *bt_name;
{
	open an existing btree, where bt_name points to a string containing
	a valid filename.

	returns the address a struct of type bt_def which was read from first
	sector of bt_name.
}

bt_close(btp)
struct bt_def *btp;
{
	close an open btree defined by the struct btp.

	returns OK, or ERROR if problems.
}

key_insert(btp, key, data_ptr)
struct bt_def *btp;
union _key key;
unsigned data_ptr;
{
	insert 'key', along with associated 'data_ptr' in btree defined by
	btp.

	returns OK, or:	
					ERROR (-1), misc. errors
					NO_DUPS (-2), duplicate key (and dups disabled)
}

key_delete(btp, key, data_ptr)
struct bt_def *btp;
union _key key;
unsigned data_ptr;
{
	remove 'key', along with associated 'data_ptr' from btree defined by
	btp.

	returns OK, or:	
					ERROR (-1), misc. errors
					BAD_PTR (-2), internal data_ptr doesn't match internal
									value
}

key_search(btp, key, key_order)
struct bt_def *btp;
union _key key;
char key_order;
{
	place data_ptr associated with 'key' into struct pointed to by
	btp.

	returns:
			ERROR (-1), for errors
			FOUND (-2), if key is present
			BELONGS (-3), if key is not present
}

unsigned
key_retreive(btp, key, key_order)
struct bt_def *btp;
union _key key;
char key_order;
{
	return data_ptr associated with 'key' in btree defined by btp.

	call key_search(), then...
	return NULL if not found, btp->_data_ptr if found, or ERROR if problems.
}

unsigned
key_next(btp)
struct bt_def *btp;
{
	return data_ptr associated with 'next key' in btree defined by btp.
	return NULL if not found, data_ptr if found, or ERROR if problems.
}

unsigned
key_last(btp)
struct bt_def *btp;
{
	return data_ptr associated with 'previous key' in btree defined by btp.
	return NULL if not found, data_ptr if found, or ERROR if problems.
}

unsigned
bt_entries(btp)
struct bt_def *btp;
{
	return number of entries (keys) presently in btree
}

*/

/*
	structure of each branch:

parent_ptr:left_ptr:right_ptr:end_offset:node_ptr:key1:node_ptr:..keyi:node_ptr

	Parent, left, & right_ptr are unsigneds and point to the parent node, left
(preceding) node, and right (following) node, respectively.

	The parent_ptr will be NULL if the branch is also the root node of the
tree, otherwise it will be a positive number that represents the logical branch
that points to this node.  Left_ptr points to the node immediately preceding
the present node at this level, while right_ptr points to the following node at
th present level.  The first node of any level will have a left_ptr of 0 (NULL)
while the last node will have a right_ptr that is NULL.

	End_offset is also an unsigned and points to the first unused byte of a
branch, relative to the first byte of first node_ptr.  To get the machine
address of this byte you would add the offset to the address of the first byte
of the first node_ptr.

	Node_ptr's are unsigneds and 'point' to a node at the next lower level of
the tree.  More specifically, they are a 'logical' node number which is
converted to a specific sector number based on a:size of branches and b:first
sector of root node.  These two facts are obtained from the structure 'bt_def'
which always occurs at beginning of a btree and which is pointed to by 'btp',
the return value of a 'bt_open()'.

	Key can be any of several formats (and varying lengths if a string)
depending on the declared key_type.

*/

/*
	structure of each leaf:

parent_ptr:left_ptr:right_ptr:end_offset:
		key1:key_count1:data_ptr1...data_ptr[key_count1]..
			..keyi:key_counti:data_ptri...data_ptr[key_counti]

	As with branches, parent, left, & right_ptr are unsigneds and point to the
parent node, left (preceding) node, and right (following) node, respectively.

	The parent_ptr will be a positive number that represents the logical branch
that points to this leaf.  Left_ptr points to the leaf immediately preceding
the present leaf, while right_ptr points to the following leaf. The first leaf
will have a left_ptr of 0 (NULL) while the last leaf will have a right_ptr that
is NULL.

	End_offset is also an unsigned and points to the first unused byte of a
leaf, relative to the first byte of first key.  To get the machine address of
this byte you would add the offset to the address of the first byte of the
first key.

	Key can be any of several formats (and varying lengths if a string)
depending on the declared key_type.

	Key_count reflects the number of key occurances (when dups are allowed) and
thus specifies the number of data_ptrs that MUST follow it.

	Data_ptr is an unsigned and represents the logical record number of another
file that is related to the preceding key.

*/

/*
	Buffer strategy:

	A call to bt_init() must be made before any btree calls can be made.  This
function sets aside enough core storage for the buffers used by the tree.  It
then initializes a table of pointers to buffers (Buf_ptrs[]) to NULLs, the
state of a table with no active buffers.

	The function get_node() checks this table for an active buffer with the
node in question.  If buffered, it returns a pointer to this buffer
(specifically the first byte past header info), if not it scans Buf_ptrs[] for
an empty buffer.  If an empty buffer is found this buffer is filled with the
requested node and the pointer to same is placed at the top of Buf_ptrs[].  If
not found the least recently used buffer is found (via bottom pointer in
Buf_ptrs[]), updated to disk if necessary (flagged by bufrd_node._update_flag),
and used to load the requested node.


*/

/* defines */

#define MAX_BT_BUF 20					/* max buffers in use */
#define BT_BUFSPACE 10					/* max space to use, in k */
#define BUFRD_SIZE 5					/* size of struct bufrd_node		*/
										/* bds can't do sizeof until a copy */
										/* of struct is made				*/
#define FIFO 'f'
#define LIFO 'l'
#deine NO_DUPS -2						/* flag dup key when not allowed */
#define NOT_FOUND -2					/* key not found for key_delete() */
#define BAD_PTR -3						/* mismatched ptrs on key_delete() */
#define FOUND -2						/* key found by key_search() */
#define BELONGS -3						/* key not present  (by key_search() */

/*
	structures:
*/

struct bt_def {
char _key_type;						/* 'data type' of key */
char _keysize;						/* size in bytes, or NULL if variable */
char _dup_flag;						/* true if duplicate keys allowed */
int (*_compare) ();					/* ptr to 'compare' funct, returns <,>,= */
unsigned _free_node;				/* ptr to first free node of linked list */
char _order;						/* order or 'depth' of tree */
char _blocksize;					/* size of physical sector in	*/
									/*	(logical 128 byte) sectors	*/
char _branchsize;					/* size of branch in sectors (128 byte) */
char _leafsize;						/* size of leaf in sectors (128 byte) */
int _bt_fd;							/* fd of file holding this tree */
unsigned _@_node;					/* currently addressed leaf (logical) */
unsigned _@_key;					/* address (machine) of current key */
};

struct bufrd_node {
char _in_use;						/* this buffer currently in use */
char _update_flag;
char *_buf_base;
unsigned _btp;
unsigned _node_num;
};

/* unions */

union _key {
char *_skey;						/* key is a string */
int _ikey;							/* key is an integer */
unsigned _ukey;						/* key is an unsigned */
char _ckey;							/* key is a char */
};

/* externals */

struct bufrd_node *Buf_ptrs[MAX_BT_BUF];
unsigned Bufspace;							/* address of buffer */

/*
	setup btree access parameters. buffers specifies number of buffers open at
once, max_secs specifies size of largest node in (128 byte) sectors.  enough
core to hold all buffers plus 5 bytes for each buffer's header info (struct
bufrd_node) is gotten from alloc().

	returns OK or ERROR
*/

bt_init(buffers, max_secs)
unsigned buffers;
char max_secs;
{
	if ((Bufspace = (((SECSIZ * max_secs) + BUFRD_SIZE) * buffers))
			> (BT_BUFSPACE * 1024)) {
		Bufspace = NULL;
		return ERROR;
	}												/* not enough room */
	if ((Bufspace = alloc(Bufspace)) == NULL) {
		return ERROR;								/* get core for buffers */
	}
	setmem(Buf_ptrs, (MAX_BT_BUF + 1) * 2, NULL);	/* clear buf ptrs */
	return OK;
}

/*
	returns OK (null) if all goes well, ERROR otherwise.
*/

bt_creat(bt_name, blocksize, nodesize, key_type, dup_flag)
char *bt_name;
char blocksize;
char nodesize;
char key_type;
char dup_flag;
{
	int fd;
	char header[128];
	struct bt_def *hdr_ptr;
	int intcmp(), bytcmp(), unscmp(), strcmp();

	if ((fd = creat(bt_name)) == ERROR) {			/* creat the file */
		return ERROR;
	}
/** build a header (struct of type bt_def) */
	setmem(header, 128, NULL);						/* clear first block */
	hdr_ptr = header;								/* make ptr to struct */
	hdr_ptr->_key_type = key_type;
	switch (key_type) {
	case 'i':
		hdr_ptr->_keysize = 2;
		hdr_ptr->_compare = intcmp;
		break;
	case 'c':
		hdr_ptr->_keysize = 1;
		hdr_ptr->_compare = bytcmp;
		break;
	case 'u':
		hdr_ptr->_keysize = 2;
		hdr_ptr->_compare = unscmp;
		break;
	case 's':
		hdr_ptr->_keysize = 0;
		hdr_ptr->_compare = strcmp;
		break;
	default:
		return ERROR;
	}
	hdr_ptr->_dup_flag = dup_flag;
/** need to fix following line to handle nodes smaller than blocksize */
	hdr_ptr->_free_node = (2 * blocksize);		/* block following rootblock */
	hdr_ptr->_order = 0;						/* empty tree */
	hdr_ptr->_entries = 0;						/* empty tree */
	hdr_ptr->_blocksize = blocksize;
	hdr_ptr->_branchsize = nodesize;
	hdr_ptr->_leafsize = nodesize;
	hdr_ptr->_bt_fd = NULL;						/* dynamic storage for fd */
	hdr_ptr->_@_node = NULL;					/* dynamic buffer */
	hdr_ptr->_@_key = NULL;						/* again, dynamic */
	if (write(fd, header, 1) != 1) {			/* install header */
		return ERROR;
	}
	setmem(header, 128, NULL);					/* fill remainder of 	*/
	for (x = blocksize; --x; ) {				/*	block with NULLs	*/
		if (write(fd, header, 1) != 1) {
			return ERROR;
		}
	}
/*? build empty root node */
	setmem(header, 128, NULL);					/* clear root */
	header[7] = 8;								/* point to first cover_ptr */
	if (write(fd, header, 1) != 1) {			/* install root */
		return ERROR;
	}
	setmem(header, 128, NULL);					/* fill remainder of 	*/
	for (x = blocksize; --x; ) {				/*	block with NULLs	*/
		if (write(fd, header, 1) != 1) {
			return ERROR;
		}
	}
	if (write(fd, header, 1) != 1) {		/* make an end link of freelist */
		return ERROR;						/*	(NULL or no ptr) does it	*/
	}
	return close(fd);							/* close file */
}

/*
	open an existing btree, where bt_name points to a string containing
	a valid filename.

	returns the address a struct of type bt_def which was read from first
	sector of bt_name.
*/

struct bt_def *
bt_open(bt_name)
char *bt_name;
{
	int fd;
	struct bt_def *btp;

	if ((fd = open(bt_name, 2)) == ERROR) {			/* open the file */
		return ERROR;
	}
	if (!(btp = alloc(128)) {			/* allocate 128 bytes for bt_def */
		return ERROR;
	}

	if (read(fd, btp, 1) <= 0) {	/* read first sec. into allocated space */
		return ERROR;
	}
	btp->_bt_fd = fd;							/* dynamic storage for fd */
	btp->_@_node = NULL;						/* make sure that	*/
	btp->_@_key = NULL;							/*	they're empty	*/
	return btp;
}

/*
	close an open btree defined by the struct btp.

	returns OK, or ERROR if problems.
*/

bt_close(btp)
struct bt_def *btp;
{
/** flush any modified buffers belonging to btp */

	if (close(btp->_bt_fd) == ERROR) {				/* close the file */
		return ERROR;
	}
	return OK;
}

/*
	insert 'key', along with associated 'data_ptr' in btree defined by
	btp.

	returns OK, or:	
					ERROR (-1), misc. errors
					NO_DUPS (-2), duplicate key (and dups disabled)
*/

key_insert(btp, key, data_ptr)
struct bt_def *btp;
union _key key;
unsigned data_ptr;
{
/** call key_search() to find its place */

	if ((key_search(btp, key, LIFO) == FOUND) && (!btp->_dup_flag)) {
		return NO_DUPS;			/* if found and dups illegal return NO_DUPS */
	}
/** split leaf if necessary */
/** add key and data_ptr to leaf */
/** update ptrs in parent if necessary

	++btp->_entries;							/* increment btp->_entries */
	return OK;
}

/*
	remove 'key', along with associated 'data_ptr' from btree defined by
	btp.

	returns OK, or:	
					ERROR (-1), misc. errors
					BAD_PTR (-2), internal data_ptr doesn't match arg value
*/

key_delete(btp, key, data_ptr)
struct bt_def *btp;
union _key key;
unsigned data_ptr;
{
	if (key_search(btp, key, LIFO) != FOUND) {	/* call key_search for key */
		return NOT_FOUND;					/* if not found return NOT_FOUND */
	}
/** if data_ptr and data_ptrs don't match return BAD_PTR */
	/** check arg with stored data_ptr */
	/** if not same check for a dup key */
		/** if dup found check data_ptr again */
	/** till no more dups */
	/** if not found return BAD_PTR */
/** remove key from leaf */
/** check need to merge leaves */
/** merge if needed */
/** fix pointers above if merged */
/** continue merge check */

	--btp->_entries;							/* decrement btp->_entries */
	return OK;
}

/*
	place data_ptr associated with 'key' into struct pointed to by
	btp.

	returns:
			ERROR (-1), for errors
			FOUND (-2), if key is present
			BELONGS (-3), if key is not present
*/

key_search(btp, key, key_order)
struct bt_def *btp;
union _key key;
char key_order;									/* 'l'ifo or 'f'ifo */
{
	char levels, *branch_buffer, *get_node();
	unsigned node, cover_ptr();

	for (node = 1, levels = btp->_order; levels--; ) { /* at each level... */
		if ((branch_buffer = get_node(btp, node)) == ERROR) {
													/* ...buffer a node... */
			return ERROR;
		}
		if ((node = cover_ptr(btp, key, branch_buffer)) == ERROR) {
							/* ...scan root/branch for cover key's ptr... */
			return ERROR;
		}

	}							/* ...and continue till proper leaf found */
	if ((branch_buffer = get_node(btp, node)) == ERROR) {
													/* buffer a leaf... */
		return ERROR;
	}
	return scan_key(btp, key, key_order, branch_buffer);/* scan leaf for key */
}

/*
	return data_ptr associated with 'key' in btree defined by btp.

	call key_search(), then...
	return NULL if not found, btp->_data_ptr if found, or ERROR if problems.
*/

unsigned
key_retreive(btp, key, key_order)
struct bt_def *btp;
union _key key;
char key_order;
{
	int result;
	if ((result = key_search(btp, key, key_order)) == FOUND) {
										/* call key_search for key position */
		return scan_ptr(btp);	/* ...get data_ptr from leaf and return it */
	}
	if (result == BELONGS) {
		return NULL;								/* return NULL */
	}
	return ERROR;									/* no other choice */
}

/*
	return data_ptr associated with 'next key' in btree defined by btp.
	return NULL if not found, data_ptr if found, or ERROR if problems.
*/

unsigned
key_next(btp)
struct bt_def *btp;
{
/** get key_node and key_position from struct btp */
/** update key_node and key_position in struct btp */
/** return data_ptr associated with the updated key */
	return data_ptr;
/** or return NULL if none exists */
}

/*
	return data_ptr associated with 'previous key' in btree defined by btp.
	return NULL if not found, data_ptr if found, or ERROR if problems.
*/

unsigned
key_last(btp)
struct bt_def *btp;
{
/** get key_node and key_position from struct btp */
/** update key_node and key_position in struct btp */
/** return data_ptr associated with the updated key */
	return data_ptr;
/** or return NULL if none exists */
}

/*
	place the requested logical node from the tree desrcibed by btp into
a buffer, place ptr to this buffer in Buf_ptrs[1] and return this ptr.
If the node is already buffered merely bubble its Buf_ptrs[] index upto
element 1.  This allows a least recently used algorithm to be used in the
buffering scheme.  The buffer slot is picked from the bottom buffer managed
by Buf_ptrs[].

*/

char *
get_node(btp, node)
struct bt_def *btp;
unsigned node;
{
}

/*
	scan the branch buffered in branch_buffer for key, returning its associated
cover pointer.

*/

unsigned
cover_ptr(btp, key, branch_buffer)
struct bt_def *btp;
union _key key;
char *branch_buffer;
{
}

/*
	scan the leaf in leaf_buffer for key, in key_order, setting _@_node
and _@_leaf in btp as appropriate
	
	returns:
			ERROR (-1), for errors
			FOUND (-2), if key is present
			BELONGS (-3), if key is not present
*/

scan_key(btp, key, key_order, leaf_buffer)
struct bt_def *btp;
union _key key;
char key_order;
char *leaf_buffer;
{
}

/*
	return the logical ptr immediately following the current key in current
leaf

*/

unsigned
scan_ptr(btp)
struct bt_def *btp;
{
}

/*
	return number of entries (keys) presently in btree
*/

unsigned
bt_entries(btp)
struct bt_def *btp;
{
	return btp->_entries;
}

/*
	return number of levels (order) of btree
*/

unsigned
bt_order(btp)
struct bt_def *btp;
{
	return btp->_order;
}

/* compare two integers */

intcmp(int_1, int_2)
int *int_1, *int_2;
{
	if (*int_1 < *int_2)
		return (-1);
	else if (*int_1 > *int_2)
		return (1);
	else return (0);
}

/* compare two bytes */

bytcmp(byt_1, byt_2)
byte *byt_1, *byt_2;
{
	if (*byt_1 < *byt_2)
		return (-1);
	else if (*byt_1 > *byt_2)
		return (1);
	else return (0);
}

/* compare two anonymouie */

unscmp(uns_1, uns_2)
unsigned *uns_1, *uns_2;
{
	if (*uns_1 < *uns_2)
		return (-1);
	else if (*uns_1 > *uns_2)
		return (1);
	else return (0);
}
_key key;
char key_order;
char *leaf_buffer;
{
}

/*
	return the logical ptr immediately 