/*
    This source code was written by Jeffrey J. Peters (c) 1999.
    It is hereby released into the public domain, for use by anyone
    for anything.

*/

//---------------------------------------------------------------------------
#include <vcl.h>
#pragma hdrstop

#include <stdlib.h>
#include <string.h>

#include "PCRegExp.h"
#pragma package(smart_init)

//---------------------------------------------------------------------------
TPCRegExp::TPCRegExp(TComponent* Owner)
        : TComponent(Owner)
{
  FCaptureList = new TStringList;
  _CompiledPattern = NULL;
  _PIndexArray = NULL;
}
//---------------------------------------------------------------------------
TPCRegExp::~TPCRegExp(void)
{
  if (FCaptureList)
  {
    delete FCaptureList;
    FCaptureList = NULL;
  }
  if (_CompiledPattern)
  {
    free(_CompiledPattern);
    _CompiledPattern = NULL;
  }
  if (_PIndexArray)
  {
    delete _PIndexArray;
    _PIndexArray = NULL;
  }
}
//---------------------------------------------------------------------------
int TPCRegExp::MakeIntFromFlags(TPCREFlags &f)
{
  int result = 0;

  if (f.Contains(pcfCASELESS))
    result |= PCRE_CASELESS;

  if (f.Contains(pcfMULTILINE))
    result |= PCRE_MULTILINE;

  if (f.Contains(pcfDOTALL))
    result |= PCRE_DOTALL;

  if (f.Contains(pcfEXTENDED))
    result |= PCRE_EXTENDED;

  if (f.Contains(pcfANCHORED))
    result |= PCRE_ANCHORED;

  if (f.Contains(pcfDOLLAR_ENDONLY))
    result |= PCRE_DOLLAR_ENDONLY;

  if (f.Contains(pcfEXTRA))
    result |= PCRE_EXTRA;

  if (f.Contains(pcfNOTBOL))
    result |= PCRE_NOTBOL;

  if (f.Contains(pcfNOTEOL))
    result |= PCRE_NOTEOL;

  if (f.Contains(pcfUNGREEDY))
    result |= PCRE_UNGREEDY;

  return result;
}
//---------------------------------------------------------------------------
bool TPCRegExp::Compile(AnsiString &ErrorString, int &ErrorOffset)
{
  char *er_str = NULL;
  pcre *p;
  int total_brackets;

  // Attempt to compile the pattern for later use.
  p = pcre_compile(
                   FPatternString.c_str(),         // The regexp pattern string
                   MakeIntFromFlags(FCompileFlags),// The compile flags
                   (const char **)&er_str,         // holder of an error string
                   &ErrorOffset,                   // pattern string offset of error
                   NULL                            // Use the default character tables
                  );

  // An error was detected during the compile of the pattern.
  if (!p)
  {
    if (!er_str)
      er_str = "<Unknown Error>";
    ErrorString = er_str;
    // ErrorOffset was already set directly by pcre_compile()
    return false;
  }

  // Make sure that the old pattern is freed.
  free(_CompiledPattern);
  _CompiledPattern = p;

  // Make sure that the old index array is freed.
  delete _PIndexArray;

  // Get the number of capturing subpatterns needed.
  total_brackets = pcre_info(p, NULL, NULL);

  _IndexSize = (total_brackets + 1) * 3;
  // Now allocate our index array.
  _PIndexArray = new int[_IndexSize];

  return true;
}
//---------------------------------------------------------------------------
bool TPCRegExp::Execute(const char *subject, int len)
{
  int ret;

  // Ensure that we've already compiled.
  if (!_CompiledPattern)
    throw new Exception("Must Compile() the regexp before calling Execute()");

  // Ensure that there is a valid subject to search.
  if (!subject)
    return false;

  // We'll allow the user to specify both the string and its length, in case
  // only a sub-string should be searched.  But to make it easy, specify
  // 0 for the length and we'll use calculate and use the length of the entire
  // string.  The len parameter is defaulted to 0 for this purpose.
  if (len == 0)
    len = strlen(subject);

  // Ok, now execute the search.
  ret = pcre_exec(
                  _CompiledPattern,       // The previously compiled pattern
                  NULL,                   // No pcre_extra data
                  subject,                // This is the string we'll be searching
                  len,                    // length of the subject string
                  MakeIntFromFlags(FExecuteFlags), // The execution flags
                  _PIndexArray,
                  _IndexSize
                 );

   // Clear the previous string list to make room for the new one
   FCaptureList->Clear();

   // Something failed
   if (ret <=0)
   {
     switch (ret)
     {
       // Everything worked correctly, but there was no match found.
       case PCRE_ERROR_NOMATCH:
         return false;

       // Some internal errors:

       default:
         throw new Exception("Unknown Internal TPCRegExp Error");

       case 0:
         throw new Exception("Internal TPCRegExp Error: _PindexArray not big enough");

       case PCRE_ERROR_NULL:
         throw new Exception("Internal TPCRegExp Error: NULL");

       case PCRE_ERROR_BADOPTION:
         throw new Exception("Internal TPCRegExp Error: Bad option flag");

       case PCRE_ERROR_BADMAGIC:
         throw new Exception("Internal TPCRegExp Error: Corrupted pattern data");

       case PCRE_ERROR_UNKNOWN_NODE:
         throw new Exception("Internal TPCRegExp Error: Corrupted pattern data (or PCRE bug)");

       case PCRE_ERROR_NOMEMORY:
         throw new Exception("Internal TPCRegExp Error: No Memory");
     }
   }

   // If we get here, then there was a successful match, so take all the
   // index pairs and convert them into strings in the string list.
   FillUpStringList(subject, ret);
   return true;
}
//---------------------------------------------------------------------------
AnsiString TPCRegExp::ConvertToString(const char *subject, int start, int end)
{
  char *p;
  int len;
  AnsiString ret;

  // Some elements indicate no string by storing -1's in them.
  // And some by placing 0's there.
  if (start == -1 || end == -1 || end == 0)
  {
    return "";
  }

  len = end - start;


  // Check for bad values that could hurt us.
  if (!subject || !subject[0] || len < 1 || len > (int)strlen(subject))
  {
    return "<Bad Data>";
  }

  p = new char[len + 1];
  strncpy (p, &subject[start], len);
  p[len] = 0;
  ret = p;
  delete p;
  return ret;
}
//---------------------------------------------------------------------------
void TPCRegExp::FillUpStringList(const char *subject, int num)
{
  int j;
  int *p = _PIndexArray;

  // Parameter 'num' is the return value from pcre_exec.  It specifies
  // how many index pairs were written into the index array.  It includes
  // the first pair ([0] & [1]) which contain the offset of the entire
  // match.  If there were no capturing subpatterns, then this value should
  // be 1 to indicate only the whole match string is availible in the index
  // array.

  // First process the "entire match string" pair from [0] & [1]
  FEntireMatch = ConvertToString(subject, p[0], p[1]);

  // Now iterate across all of the elements adding them into the list
  for (j=1;j<num;j++)
  {
    FCaptureList->Add(ConvertToString(subject, p[j*2], p[(j*2)+1]));
  }
}

//---------------------------------------------------------------------------
// ValidCtrCheck is used to assure that the components created do not have
// any pure virtual functions.
//
static inline void ValidCtrCheck(TPCRegExp *)
{
        new TPCRegExp(NULL);
}
//---------------------------------------------------------------------------
namespace Pcregexp
{
        void __fastcall PACKAGE Register()
        {
                 TComponentClass classes[1] = {__classid(TPCRegExp)};
                 RegisterComponents("PCRE", classes, 0);
        }
}
//---------------------------------------------------------------------------
