/*

Copyright 1998, Tim Kientzle.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:

1. Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
   notice, this list of conditions and the following disclaimer in the
   documentation and/or other materials provided with the distribution.
3. The name of Tim Kientzle may not be used to endorse or promote
   products derived from this software without specific prior written
   permission.

THIS SOFTWARE IS PROVIDED BY THE AUTHOR OR AUTHORS ``AS IS'' AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR AUTHORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.

*/

/**
 * <P>This is a Java implementation of the Berkeley DB database
 * library.  Since DB is a C library, and is built-in to many
 * Perl systems, this allows you to create database files using
 * C, C++, or Perl, and access them from Java.</P>
 *
 * <P>Limitations: This package only supports BTREE
 * database types.  It does not support writing to
 * DB files, nor does it support cursor objects.  I've
 * tried to design the classes to easily support writing and cursors
 * at a later date. If you extend this in any way, I'd like to
 * hear from you.</P>
 *
 * <P>There are a three important classes:</P>
 * <DL>
 * <DT>DBBTree<DD> Maintains the actual file handle, and
 *    answers requests for pages.  Auxiliary classes are
 *    instantiated with a reference to this class so that they
 *    can access the file.
 * <DT>DBBTreePage<DD> Most database
 *    manipulations are handled here.  For example, to search, the
 *    top-level DBBTree object fetches
 *    the root page (always page 1) from the page file and asks it to
 *    search itself. It will go back to the page file to fetch additional
 *    pages as needed.
 * <DT>DBBTreeNode<DD>A fairly simple class that encapsulates
 *    the key/data pairs stored in pages.
 * </DD>
 */

//package com.ddj;

import java.lang.*;
import java.io.*;

public class DBBTree {
  protected RandomAccessFile file;
  protected File fileName;
  private int pageSize;  // page size
  private boolean msbFirst; // True = MSB byte order, False = LSB byte order

  /**
   * Close the database file.
   */
  public void close() {
    try {
      file.close();
    } catch (Exception e) {
      System.err.println("File close on " + fileName + " failed: " + e);
    }
    file = null;
  }

  /**
   * Re-open the database file.  Note that the constructor opens
   * the database file and saves the filename.
   */
  public void open() throws IOException {
    try {
      if(file == null)
	file = new RandomAccessFile(fileName,"r");
    } catch (SecurityException e) {
      throw new IOException("Wrong permissions for "+fileName+": "+e);
    } catch (IOException e) {
      throw new IOException("file open failed: " + e);
    }
  }

  /**
   * The constructor takes a File name or File object.
   */
  public DBBTree(String name) throws IOException {
    this(new File(name));
  }

  public DBBTree(java.io.File name) throws IOException {
    fileName = name;
    try {
      file = new RandomAccessFile(name,"r");
    } catch (SecurityException e) {
      throw new IOException("Wrong permissions for "+name+": "+e);
    } catch (IOException e) {
      throw new IOException("file open failed: " + e);
    }
    /* Read the metadata.  I only read the first three values,
     * but here's the complete list of DB BTree metadata:
     *
     *  Size  Description
     *  4     Magic Value: 0x053162 (used to determine endianness)
     *  4     Version of BTREE file
     *  4     Page Size
     *  4     Page number of first free page
     *  4     Number of records in file
     *  4     Flags:
     *          0x0020 - Duplicate keys are not permitted
     *          0x0080 - R_RECNO: "record oriented tree" (??)
     *
     * Since I don't support writing, I don't care about free
     * pages, and I assume there are no duplicate keys.
     */
    pageSize = 256; // Big enough to get all of the metadata
    byte [] metaData = readRawPage(0); // Page 0 holds metadata

    // Read magic number and determine endianness of DB file
    msbFirst = true;  // Try MSB format first...
    int magic = bytesToInt(metaData,0);
    if(magic != 0x053162) { // Failed? Try LSB...
      msbFirst = false;
      magic = bytesToInt(metaData,0);
      System.out.println("Opening LSB format database");
    } else
      System.out.println("Opening MSB format database");

    // Read version number and abort if not a DB file.
    int version = bytesToInt(metaData,4);
    if((magic != 0x053162) || (version != 3)) // BTREE Magic number and version
      throw new IOException("Not a DB file (magic: 0x"
				    + Integer.toHexString(magic)
				    + ", version: " 
				    + Integer.toString(version) + ")");
    // Set actual page size
    pageSize = bytesToInt(metaData,8);

    setCacheSize(8);    // Set the cache to 8 pages as default
    primeCache();       // Pre-read first 8 pages into cache
  }

  /**
   * Search for a key.  A 'key' here is a byte array.
   * Returns the data associated with the key
   * or null if the key wasn't found.
   */
  public byte [] search(byte [] key) throws IOException {
    return readPage(1).search(key);    // Search always begins on page 1
  }

  /**
   * Read a raw page, return an array of bytes containing that page.
   */
  protected byte []  readRawPage(int pageno) throws IOException {
    try{
      byte data[] = new byte[pageSize];
      file.seek(pageno*pageSize);
      file.read(data);
      return data;
    } catch (IOException e) {
      throw new IOException("readRawPage("+pageno+") failed: " + e);
    }
  }

  /**
   * For efficiency, a certain fixed number of pages are kept
   * in memory so we don't have to go to disk as often.
   * The cache is kept in LRU (least-recently-used) order,
   * that is, whenever a page is accessed, it gets moved to
   * the front of the list, and pages are lost when they fall
   * off the end of the list.  This code is simpler than the
   * corresponding C version, since I don't need to keep track
   * of whether a page is still being used before I drop it.
   * (In C, you have to be careful to only free() a page that's
   * no longer in use; in Java, the GC takes care of that for me.)
   */
  int cachePageNumbers[];
  byte cachePages[][];

  /**
   * Set the cache size.  By default, it's set to 8 pages.
   */
  public void setCacheSize(int size) {
    int[] newCachePageNumbers = new int[size];
    if(cachePageNumbers != null)
      for(int i=0;i<size && i<cachePageNumbers.length;i++)
	newCachePageNumbers[i] = cachePageNumbers[i];
    else
      for(int i=0;i<size;i++)
	newCachePageNumbers[i] = 0;
    cachePageNumbers = newCachePageNumbers;

    byte[][] newCachePages = new byte[size] [];
    if(cachePages != null)
      for(int i=0;i<size && i<cachePages.length;i++)
	newCachePages[i] = cachePages[i];
    cachePages = newCachePages;
  }

  /**
   * Fill the cache with the first pages in the database.
   * This can improve perceived performance noticably.
   * In particular, this pulls in page 1, which is the
   * top page of the B-Tree, and is searched on every lookup.
   */
  public void primeCache() throws IOException {
    int i = 1;
    while(cachePageNumbers[cachePageNumbers.length-1] == 0) {
      readPage(i);
      i++;
    }
  }

  /**
   * <P>Read a page, parse it, and return a DBBTreePage object.</P>
   * <P>Question: Is it more appropriate to cache DBBTreePage objects,
   * or raw bytes?  For now, I'm caching the raw bytes.</P>
   *
   * <P>Because I'm only caching a few pages, it's perfectly
   * reasonable to use a simple array and just move elements down
   * in the array to maintain LRU order.  For a large cache,
   * something more sophisticated would be appropriate.</P>
   */
  DBBTreePage readPage(int pageno) throws IOException {
    // Try to find page in cache, return it if found
    for(int i=0; i< cachePageNumbers.length; i++) {
      if (cachePageNumbers[i] == pageno) {
	byte page[] = cachePages[i];
	if(i>0) {
	  // Move this page to front of cache
	  System.arraycopy(cachePageNumbers,0,
			   cachePageNumbers,1,i);
	  System.arraycopy(cachePages,0,
			   cachePages,1,i);
	  cachePageNumbers[0] = pageno;
	  cachePages[0] = page;
	}
	// Return found page
	return new DBBTreePage(this,page);
      }
    }

    // Page wasn't in cache, read it from disk
    byte page[] = readRawPage(pageno);

    // Insert new page at front of cache.
    System.arraycopy(cachePageNumbers,0,
		     cachePageNumbers,1,cachePageNumbers.length-1);
    System.arraycopy(cachePages,0,
		     cachePages,1,cachePages.length-1);
    cachePageNumbers[0] = pageno;
    cachePages[0] = page;
    return new DBBTreePage(this,page);
  }

  /**
   * Convert four bytes from a byte array into an integer, using
   * LSB or MSB data order, as appropriate.
   * (Shouldn't this be in the JDK somewhere?)
   *
   * @param barray Array of bytes
   * @param offset Offset in barray to read integer
   * @return integer formed from indicated bytes
   */
  int bytesToInt(byte barray[],int offset) {
    if(msbFirst)
      return 16777216 * (((int)barray[offset  ])&255) +
                65536 * (((int)barray[offset+1])&255) +
	          256 * (((int)barray[offset+2])&255) +
                        (((int)barray[offset+3])&255);
    else
      return            (((int)barray[offset  ])&255) +
                  256 * (((int)barray[offset+1])&255) +
	        65536 * (((int)barray[offset+2])&255) +
             16777216 * (((int)barray[offset+3])&255);
  }

  /**
   * Convert two bytes from a byte array into a short.
   *
   * @param barray Array of bytes
   * @param offset Offset in barray to read short
   * @return short formed from indicated bytes
   */
  short bytesToShort(byte barray[],int offset) {
    if(msbFirst)
      return (short)(256 * (((short)barray[offset  ])&255) +
                           (((short)barray[offset+1])&255));
    else
      return (short)(      (((short)barray[offset  ])&255) +
                     256 * (((short)barray[offset+1])&255));
  }
}

/****************************************************************************/

/**
 *  A single BTREE page read from the database.
 *
 *
 * <P>A Page in a BTREE file has the following layout:</P>
 * <PRE>
 * +--------+----------+------------+---------------------------+
 * | Header | Pointers | Free space | Nodes                     |
 * +--------+----------+------------+---------------------------+
 * </PRE>
 *
 * <P><B>Header:</B> A BTREE page has the following metadata:</P>
 * <PRE>
 *  Size  Description
 *  4     Number of this page
 *  4     Page number of preceding page
 *  4     Page number of following page
 *  4     Flags:
 *           0x01 - BTree internal page
 *           0x02 - BTree leaf page
 *           0x04 - Overflow page
 *           0x08 - Recno internal page
 *           0x10 - Recno leaf page
 *            0x20 - Never delete this chain of pages
 *  4     Lower bound of free space on page
 *  4     Upper bound of free space on page
 * </PRE>
 *
 * <P><B>Pointers:</B> A series of four-byte offsets indicating
 * the location of successive nodes.</P>
 *
 * <P><B>Nodes:</B> For internal pages, each node contains a key
 * and the page containing elements greater than or equal to that
 * key (but smaller than the next key).  For leaf pages,
 * each node has a key and a data item.  Ordinarily, the key and/or data
 * immediately follow the flags.  If the key or data is too big,
 * it's stored on a separate 'overflow' page (or pageS, if it's really
 * big!).  In that case, the node contains a 4-byte page number
 * and a 4-byte size instead of the corresponding key or data bytes.</P>
 *
 * <P>Overflow pages have the same header as normal pages, but
 * simply hold raw key/data bytes.  They use the 'next page' value
 * to chain successive pages of data together.</P>
 *
 * <P>Each node has the following format:</P>
 * <PRE>
 * Size  Description
 *  4    Key Size
 *  4    Page Number for internal nodes, Data Size for Leaf nodes
 *  1    Flags: 0x01 for overflow data, 0x02 for overflow key
 *  n    Key/Data
 * </PRE>
 */
class DBBTreePage {
  DBBTree dbFile;
  protected byte rawPage[];
  byte [] getRawPage() { return rawPage; }

  // Check certain flags
  public int getFlags() { return dbFile.bytesToInt(rawPage,12); }
  boolean isBTree() { return (getFlags() & 0x03) != 0; }
  boolean isLeaf() { return (getFlags() & 0x12) != 0; }
  boolean isOverflow() { return (getFlags() & 0x04) != 0; }

  public short getLower() { return dbFile.bytesToShort(rawPage,16); }
  protected short lastOffset() { return (short)((getLower()-20)/2); }


  // Basically, this class just stores a reference to the
  // raw page bytes, and decodes them as necessary.
  public DBBTreePage(DBBTree file, byte[] rawPageData) {
    dbFile = file; // Handle on DBBTree object
    rawPage = rawPageData;
    if(!isBTree()) {
      // fail
    }
  }

  // Return an object representing the indicated node on this page
  public DBBTreeNode getNodeBTree(int nodeIndex)
    		throws IndexOutOfBoundsException {
    if(isOverflow()) { // Overflow pages don't have nodes...
      System.err.println
	("*** Internal failure: request for offset on overflow page");
      throw new IndexOutOfBoundsException();
    }
    int offset = 20+2*nodeIndex; // 20-byte header + 2 bytes per pointer
    if(offset >= getLower()) { // Check for nonsense...
      System.err.println
	("*** Internal failure: request for node that doesn't exist");
      throw new IndexOutOfBoundsException();
    }
    // Convert the two-byte pointer to an integer
    short ptr = dbFile.bytesToShort(rawPage,offset);
    // Build and return a node object
    return new DBBTreeNode(this,ptr);
  }

  /**
   * <P>Locate the indicated key and return a page/offset pair.
   * Note that this is implicitly recursive; to search for a key,
   * I'll typically have to fetch a new page and invoke this same
   * method on that page.</P>
   *
   * <P>The search is a simple binary search.  If this is an
   * internal page, I locate the key preceding (or equal to)
   * my search key, then fetch the corresponding page and recurse.
   * If this is a leaf page, I look for an exact match.</P>
   */
  public byte [] search(byte key[]) throws IOException {
    // Binary search within page
    int nodeIndexFirst = 0, nodeIndexLast = lastOffset()-1;
    while(nodeIndexLast > nodeIndexFirst) {
      int nodeIndex = (nodeIndexFirst+nodeIndexLast)/2;
      DBBTreeNode node = getNodeBTree(nodeIndex);

      int cmp = node.compareTo(key);
      if(cmp > 0) { // key we want is before this node
	// The -1 here prevents lock-up with a one-position window
	nodeIndexLast = nodeIndex - 1;
      } else if (cmp < 0) { // Key we want is after this node
	nodeIndexFirst = nodeIndex;
	// For leaf pages, make sure search converges
	if(isLeaf()) nodeIndexFirst++;
	// For internal pages, the search may stop with
	// two adjacent nodes (thanks to integer arithmetic).
	// Here, I manually check for that case and resolve it.
	else if (nodeIndexLast - nodeIndexFirst == 1) { // two-node window
	  // Fetch second node (I already have first one)
	  DBBTreeNode node2 = getNodeBTree(nodeIndexLast);
	  if(node2.compareTo(key) > 0) { // key is between these two nodes
	    DBBTreePage page = dbFile.readPage(node.getPageNumber());
	    return page.search(key);
	  } else { // Key we want is just after second node
	    DBBTreePage page = dbFile.readPage(node2.getPageNumber());
	    return page.search(key);
	  }
	}
      } else { // Exact match
	if(isLeaf()) // If leaf, we're done
	  return node.dataAsBytes();
	// if internal, we can go to the next page
	DBBTreePage page = dbFile.readPage(node.getPageNumber());
	return page.search(key);
      }
    } // end of binary search

    // Land here if binary search identified a single node
    DBBTreeNode node = getNodeBTree(nodeIndexFirst);

    // Leaf node? need an exact match to avoid failure
    if(isLeaf()) {
      if(node.compareTo(key) == 0) return node.dataAsBytes();
      else return null;
    } else { // This is an internal node, get next page and continue
      DBBTreePage page = dbFile.readPage(node.getPageNumber());
      return page.search(key);
    }
    // End of function not reached
  }

}

/****************************************************************************/

/**
 * For BTree pages, nodes are either key/pgno pairs (internal pages)
 * or key/data pairs (leaf pages).
 * Each form is stored the same, so I can take a shortcut here and just
 * use a single class for either one.
 */
class DBBTreeNode {
  // These first three variables are sufficient to recover the node data
  // or to pull any other page or node from the file
  private DBBTree dbFile;  // Underlying file
  private DBBTreePage sourcePage; // Origin of this node
  private int nodeOffset; // Location of this node on that page

  private int keyLength; // Length of key
  private int pageNumber; // doubles as data size for leaf pages
  int getPageNumber() { return pageNumber; }
  private byte flags; // 1 if data is on an overflow page, 2 for overflow key
  private short key[];

  boolean isOverflowKey() { return (flags & 0x02) != 0; }
  boolean isOverflowData() { return (flags & 0x01) != 0; }

  public DBBTreeNode(DBBTreePage page,int offset) {
    byte rawPage[] = page.getRawPage();
    dbFile = page.dbFile; // For fetching other pages for overflow key/data
    sourcePage = page; // For accessing raw page
    nodeOffset = offset; // For locating this node again
    keyLength = dbFile.bytesToInt(rawPage,offset+0);
    pageNumber = dbFile.bytesToInt(rawPage,offset+4);
    flags = rawPage[offset+8];
  }

  /**
   * Copy key from page to internal storage. (facilitate repeated comparisons)
   */
  private void getKey() {
    int keyOffset = nodeOffset+9;
    byte [] page = sourcePage.getRawPage();
    key = new short[keyLength];
    for(int i=0;i<keyLength;i++)
      key[i] = (short)(page[keyOffset+i] & (short)0xff);
  }

  /**
   * Compares this key to a, returns
   * positive (a > this), zero (a==this), or negative (a<this).
   * Some gyrations are needed to obtain unsigned comparisons.
   */
  int compareTo(byte a[]) {
    int i=0;
    if(key == null) getKey();
    while(i<a.length && i<keyLength) {
      short b = (short)(a[i] & (short)0xff); // get unsigned byte
      if(b < key[i]) return +1; // compare to unsigned value
      if(b > key[i]) return -1;
      i++;
    }
    if(i<a.length) return -1; // A longer than B
    if(i<keyLength) return +1; // B longer than A
    return 0;
  }

  public byte[] keyAsBytes() {
    if(!isOverflowKey()) {
      byte [] key = new byte[keyLength];
      System.arraycopy(sourcePage.getRawPage(),nodeOffset+9,
			 key,0,keyLength);
      return key;
    } else {
      System.err.println("*** Overflow keys not yet supported");
      return null;
    }
  }

  public byte[] dataAsBytes() {
    if(!isOverflowData()) {
      byte [] data = new byte[pageNumber];
      System.arraycopy(sourcePage.getRawPage(),nodeOffset+9+keyLength,
		       data,0,pageNumber);
      return data;
    } else {
      System.err.println("*** Overflow data not yet supported");
      return null;
    }
  }
}

