// ---------------------------------------------------------------------------
// - Regex.cpp                                                               -
// - standard object library - regex class implementation                    -
// ---------------------------------------------------------------------------
// - This program is free software;  you can redistribute it  and/or  modify -
// - it provided that this copyright notice is kept intact.                  -
// -                                                                         -
// - This program  is  distributed in  the hope  that it will be useful, but -
// - without  any  warranty;  without  even   the   implied    warranty   of -
// - merchantability or fitness for a particular purpose.  In no event shall -
// - the copyright holder be liable for any  direct, indirect, incidental or -
// - special damages arising in any way out of the use of this software.     -
// ---------------------------------------------------------------------------
// - copyright (c) 1999-2001 amaury darsch                                   -
// ---------------------------------------------------------------------------

#include "Real.hpp"
#include "Regex.hpp"
#include "Vector.hpp"
#include "Boolean.hpp"
#include "Runnable.hpp"
#include "Exception.hpp"
#include "InputString.hpp"

namespace aleph {

  // the regex supported quarks
  static const long QUARK_GET      = String::intern ("get");
  static const long QUARK_MATCH    = String::intern ("match");
  static const long QUARK_LENGTH   = String::intern ("length");
  static const long QUARK_REPLACE  = String::intern ("replace");

  // the regex block node type
  enum t_renode {
    RE_CHAR, // character node
    RE_META, // meta character node 
    RE_CSET, // character set node
    RE_BLOK, // sub blok node
    RE_GMRK, // group mark
    RE_GSET, // group accept
    RE_OPRD  // operand node
  };

  // the regex operator type
  enum t_reoper {
    RE_NONE, // no operator
    RE_PLUS, // + operator
    RE_MULT, // * operator
    RE_ZONE, // ? operator
    RE_ALTN, // | operator
    RE_CTRL  // control node
  };

  // the regex node
  struct s_renode {
    // operator control
    t_reoper d_oper;
    // the node type
    t_renode d_type;
    // node data
    union {
      char      d_cval;
      bool*     p_cset;
      s_renode* p_nval;
    };
    // operand node
    s_renode* p_oprd;
    // next node in list
    s_renode* p_next;
    // protection flag
    bool d_prot;
    // create a new node by operator
    s_renode (t_renode type) {
      if (type != RE_CSET) {
	d_oper = RE_CTRL;
	d_type = type;
	d_cval = nilc;
	p_oprd = nilp;
	p_next = nilp;
	d_prot = false;
      } else {
	d_oper = RE_NONE;
	d_type = type;
	p_cset = new bool[256];
	p_oprd = nilp;
	p_next = nilp;
	d_prot = false;
	for (long i = 0; i < 256; i++) p_cset[i] = false;
      }
    }
    // create a new node by type and character
    s_renode (t_renode type, const char c) {
      d_oper = RE_NONE;
      d_type = type;
      d_cval = c;
      p_oprd = nilp;
      p_next = nilp;
      d_prot = false;
    }
    // create a new block node
    s_renode (s_renode* node) {
      d_oper = RE_NONE;
      d_type = RE_BLOK;
      p_nval = node;
      p_oprd = nilp;
      p_next = nilp;
      d_prot = false;
    }
    // delete this node
    ~s_renode (void) {
      if (d_type == RE_CSET) {
	delete [] p_cset;
	p_cset = nilp;
      }
      if ((d_oper == RE_ALTN) && (p_next != nilp)) p_next->d_prot = true;
      if ((d_type == RE_BLOK) || (d_type == RE_OPRD)) {
	if ((p_nval != nilp) && (p_nval->d_prot == false)) delete p_nval;
      }
      if (d_type == RE_OPRD) {
	if ((p_oprd != nilp) && (p_oprd->d_prot == false)) delete p_oprd;
      }
      if ((d_oper == RE_ALTN) && (p_next != nilp)) p_next->d_prot = false;
      if ((p_next != nilp) && (p_next->d_prot == false)) delete p_next;
    }

    // mark a node as an operator - move node data if alternate
    void mark (t_reoper oper) {
      // check for alternate
      if (oper == RE_ALTN) {
	if (d_oper == RE_ALTN)
	  throw Exception ("regex-error", "invalid dual | operator");
	// copy node
	s_renode* node = new s_renode (d_type);
	node->d_oper = d_oper;
	node->d_type = d_type;
	node->p_nval = p_nval;
	node->p_oprd = p_oprd;
	node->p_next = nilp;
	node->d_prot = d_prot;
	// re-adjust node
	d_oper = RE_ALTN;
	d_type = RE_OPRD;
	p_nval = node;
	p_oprd = nilp;
	p_next = nilp;
	return;
      }
      if (d_oper != RE_NONE) 
	throw Exception ("regex-error", "invalid operator position");
      d_oper = oper;
    }
  };

  // the regex structure
  struct s_regex {
    // the root node
    s_renode* p_root;
    // the last node
    s_renode* p_last;
    // the reference count
    long d_rcount;
    // initialize the regex structure
    s_regex (void) {
      p_root = nilp;
      p_last = nilp;
      d_rcount = 1;
    }
  };

  // read a character in the buffer - possibly escaped
  static char re_escape_char (InputString& buf) {
    // check for escaped
    char c = buf.read ();
    if (c != '\\') return c;
    // the character is escaped
    c = buf.read ();
    if (c == eofc) throw Exception ("regex-error", "invalid eof character");
    switch (c) {
    case 'n':  return '\n';
    case 't':  return '\t';
    case '"':  return '"';
    case '\\': return '\\';
    default:   return c;
    }
  }

  // find the last node in a node chain
  static s_renode* re_find_last (s_renode* node) {
    if (node == nilp) return nilp;
    while (node->p_next != nilp) node = node->p_next;
    return node;
  }

  // append a node to a node chain
  static void re_append_node (s_renode** root, s_renode** last, 
			      s_renode* node) {
    if (*root == nilp) {
      *root = node;
      *last = node;
      return;
    }
    // check if we have an alternate node
    if ((*last)->d_oper == RE_ALTN) {
      // mark operand node
      if ((*last)->p_oprd == nilp) {
	(*last)->p_oprd = node;
	return;
      }
      // add next node at the end of aleternates node
      s_renode* nlast = re_find_last ((*last)->p_nval);
      nlast->p_next = node;
      nlast = re_find_last ((*last)->p_oprd);
      nlast->p_next = node;
    }
    // at the end node
    (*last)->p_next = node;
    *last = node;
  }

  // regex context structure
  struct s_rectx {
    // string data 
    String  d_str;
    // string length
    long  d_len;
    // start index
    long  d_sidx;
    // current index;
    long  d_cidx;
    // group mark
    long  d_gmrk;
    // group vector
    Vector* p_grpv;
    // create a new context
    s_rectx (const String& s, const long sidx, Vector* grpv) {
      d_str  = s;
      d_len  = s.length ();
      d_sidx = (sidx < d_len) ? sidx : d_len;
      d_cidx = d_sidx;
      d_gmrk = d_sidx;
      Object::iref (p_grpv = grpv);
    }
    // copy construct this context
    s_rectx (const s_rectx& ctx) {
      d_str  = ctx.d_str;
      d_len  = ctx.d_len;
      d_sidx = ctx.d_sidx;
      d_cidx = ctx.d_cidx;
      d_gmrk = ctx.d_gmrk;
      Object::iref (p_grpv = ctx.p_grpv);
    }
    // destroy this context
    ~s_rectx (void) {
      Object::dref (p_grpv);
    }
    // assign a context to this one
    s_rectx& operator = (const s_rectx& ctx) {
      d_str  = ctx.d_str;
      d_len  = ctx.d_len;
      d_sidx = ctx.d_sidx;
      d_cidx = ctx.d_cidx;
      d_gmrk = ctx.d_gmrk;
      Object::iref (ctx.p_grpv);
      Object::dref (p_grpv);
      p_grpv = ctx.p_grpv;
      return *this;
    }
    // read a character from this context
    char read (void) {
      if (d_cidx == d_len) return eofc;
      return d_str[d_cidx++];
    }
    // return true if we are at the end
    bool iseof (void) {
      return (d_cidx == d_len);
    }
    // mark the group start
    void gmrk (void) {
      d_gmrk = d_cidx;
    }
    // set a group result
    void gset (void) {
      long  len = d_cidx - d_gmrk;
      if (len <= 0) len = 0;
      char* buf = new char[len + 1];
      for (long i = 0; i < len; i++) buf[i] = d_str[i+d_gmrk];
      buf[len] = nilc;
      p_grpv->append (new String (buf));
      delete [] buf;
    }
    // pop a last entry in the group
    void gpop (void) {
      p_grpv->back ();
    }
    // return the string match
    String subs (void) {
      return d_str.substr (d_sidx, d_cidx);
    }
  };

  // check for a blank character (blank or tab)
  static inline bool re_check_blank (const char c) {
    if ((c == ' ') || (c == tabc)) return true;
    return false;
  }
  // check for a newline/ cariage return or end of file
  static inline bool re_check_newln (const char c) {
    if ((c == eolc) || (c == eofc) || (c == crlc)) return true;
    return false;
  }
  // check for a alpha numeric character
  static inline bool re_check_alpha (const char c) {
    if ((c >= 'a') && (c <= 'z')) return true;
    if ((c >= 'A') && (c <= 'Z')) return true;
    if ((c >= '0') && (c <= '9')) return true;
    return false;
  }
  // check for a digit
  static inline bool re_check_digit (const char c) {
    if ((c >= '0') && (c <= '9')) return true;
    return false;
  }
  // check for a lower character
  static inline bool re_check_lower (const char c) {
    if ((c >= 'a') && (c <= 'z')) return true;
    return false;
  }
  // check for a upper character
  static inline bool re_check_upper (const char c) {
    if ((c >= 'A') || (c <= 'Z')) return true;
    return false;
  }
  // check for a letter character
  static inline bool re_check_letter (const char c) {
    if ((c >= 'a') && (c <= 'z')) return true;
    if ((c >= 'A') && (c <= 'Z')) return true;
    return false;
  }

  // check for a hexadecimal character
  static inline bool re_check_hexa (const char c) {
    if ((c >= '0') && (c <= '9')) return true;
    if ((c >= 'a') && (c <= 'f')) return true;
    if ((c >= 'A') && (c <= 'F')) return true;
    return false;
  }

  // check for a aleph constituent
  static inline bool re_check_aleph (const char c) {
    if ((c >= 'a') && (c <= 'z')) return true;
    if ((c >= 'A') && (c <= 'Z')) return true;
    if ((c >= '0') && (c <= '9')) return true;

    if (c == '.') return true;
    if (c == '+') return true;
    if (c == '-') return true;
    if (c == '*') return true;
    if (c == '/') return true;
    if (c == '!') return true;
    if (c == '=') return true;
    if (c == '.') return true;
    if (c == '>') return true;
    if (c == '<') return true;
    if (c == '?') return true;
    return false;
  }

  // check a meta character against a character
  static bool re_check_meta (const char meta, const char c) {
    switch (meta) {
    case 'a':
      return  re_check_alpha (c);
      break;
    case 'A':
      return !re_check_alpha (c);
      break;
    case 'b':
      return  re_check_blank (c);
      break;
    case 'B':
      return !re_check_blank (c);
      break;
    case 'd':
      return  re_check_digit (c);
      break;
    case 'D':
      return !re_check_digit (c);
      break;
    case 'l':
      return  re_check_lower (c);
      break;
    case 'L':
      return !re_check_lower (c);
      break;
    case 'n':
      return  re_check_newln (c);
      break;
    case 'N':
      return !re_check_newln (c);
      break;
    case 's':
      return  re_check_letter (c);
      break;
    case 'S':
      return !re_check_letter (c);
      break;
    case 'u':
      return  re_check_upper (c);
      break;
    case 'U':
      return !re_check_upper (c);
      break;
    case 'x':
      return  re_check_hexa (c);
      break;
    case 'X':
      return !re_check_hexa (c);
      break;
    case 'w':
      return  re_check_aleph (c);
      break;
    case 'W':
      return !re_check_aleph (c);
      break;
    default:
      return (meta == c);
      break;
    }
    return false;
  }

  // check a character set against a character
  static bool re_check_cset (const bool* cset, const char c) {
    if (cset == nilp) return false;
    return cset[c];
  }

  // forward declaration for node execution
  static bool re_exec      (s_renode* node, s_rectx& ctx);
  static bool re_exec_node (s_renode* node, s_rectx& ctx);
  static bool re_exec_loop (s_renode* node, s_rectx& ctx);
  static bool re_exec_plus (s_renode* node, s_rectx& ctx);
  static bool re_exec_mult (s_renode* node, s_rectx& ctx);
  static bool re_exec_zone (s_renode* node, s_rectx& ctx);
  static bool re_exec_altn (s_renode* node, s_rectx& ctx);
  static bool re_exec_ctrl (s_renode* node, s_rectx& ctx);

  // execute a particular node
  static bool re_exec_node (s_renode* node, s_rectx& ctx) {
    // check for node and succed
    if (node == nilp) return true;
    bool status = false;
    // dispatch according to node type
    switch (node->d_type) {
    case RE_CHAR:
      status = (ctx.read () == node->d_cval);
      break;
    case RE_META:
      status = re_check_meta (node->d_cval, ctx.read ());
      break;
    case RE_CSET:
      status = re_check_cset (node->p_cset, ctx.read ());
      break;
    case RE_BLOK:
      status = re_exec (node->p_nval, ctx);
      break;
    default:
      throw Exception ("regex-error", "internal exec node error");
    }
    return status;
  }

  static bool re_exec (s_renode* node, s_rectx& ctx) {
    // without node - we succed
    if (node == nilp) return true;
    // initialize the status and save context
    s_rectx bctx = ctx;
    bool status  = false;
    // dispatch based on node operation
    switch (node->d_oper) {
    case RE_NONE:
      status = re_exec_node (node, ctx);
      if (status == false) {
	ctx = bctx;
	break;
      }
      status = re_exec (node->p_next, ctx);
      break;
    case RE_PLUS:
      status = re_exec_plus (node, ctx);
      break;
    case RE_MULT:
      status = re_exec_mult (node, ctx);
      break;
    case RE_ZONE:
      status = re_exec_zone (node, ctx);
      break;
    case RE_ALTN:
      status = re_exec_altn (node, ctx);
      break;
    case RE_CTRL:
      status = re_exec_ctrl (node, ctx);
    }
    return status;
  }

  //  execute a loop with fall back
  static bool re_exec_loop (s_renode* node, s_rectx& ctx) {
    // save the context and reset status
    s_rectx bctx = ctx;
    bool status  = false;
    // execute the node and check for fallback
    status = re_exec_node (node, ctx);
    // check if we were at the eof and we succeded again - in this case
    // we force the status to false to avoid an infinite loop
    if ((status == true) && (bctx.iseof () == true)) status = false;
    if (status == false) {
      ctx = bctx;
      // try to fall back
      status = re_exec (node->p_next, ctx);
      if (status == false) {
	ctx = bctx;
	return false;
      }
      return status;
    }
    status = re_exec_loop (node,ctx);
    if (status == false) {
      ctx = bctx;
      // try to fall back
      status = re_exec (node->p_next, ctx);
      if (status == false) {
	ctx = bctx;
	return false;
      }
      return status;
    }
    return status;
  }

  // execute a node with a plus operator
  static bool re_exec_plus (s_renode* node, s_rectx& ctx) {
    // save the context and reset status
    s_rectx bctx = ctx;
    bool status  = false;
    // execute the node once
    status = re_exec_node (node, ctx);
    if (status == false) {
      ctx = bctx;
      return false;
    }
    // execute the loop with fallback
    return re_exec_loop (node, ctx);
  }

  // execute a node with a mult operator
  static bool re_exec_mult (s_renode* node, s_rectx& ctx) {
    return re_exec_loop (node,ctx);
  }

  // execute a node with a zone operator
  static bool re_exec_zone (s_renode* node, s_rectx& ctx) {
    // save the context and reset status
    s_rectx zctx = ctx;
    bool status  = false;
    // execute the node once
    status = re_exec_node (node, ctx);
    // fallback if wrong
    if (status == false) {
      ctx = zctx;
      return re_exec (node->p_next, ctx);
    }
    // try with one
    status = re_exec (node->p_next, ctx);
    if (status == false) {
      ctx = zctx;
      return re_exec (node->p_next, ctx);
    }
    return status;
  }

  // execute an alternate node
  static bool re_exec_altn (s_renode* node, s_rectx& ctx) {
    // save the context and reset status
    s_rectx bctx = ctx;
    bool status  = false;

    // try with first node
    status = re_exec (node->p_nval, ctx);
    if (status == false) {
      ctx = bctx;
      status = re_exec (node->p_oprd, ctx);
      if (status == false) {
	ctx = bctx;
	return status;
      }
    }
    return status;
  }

  // execute a control node
  static bool re_exec_ctrl (s_renode* node, s_rectx& ctx) {
    // save the context and reset status
    s_rectx bctx = ctx;
    bool status  = false;
    // dispatch from node type
    switch (node->d_type) {
    case RE_GMRK:
      ctx.gmrk ();
      status = re_exec (node->p_next, ctx);
      break;
    case RE_GSET:
      ctx.gset ();
      status = re_exec (node->p_next, ctx);
      if (status == false) ctx.gpop ();
      break;
    default:
      throw Exception ("regex-error", "internal regex control node error");
      break;
    }
    if (status == false) ctx = bctx;
    return status;
  }

  // get or create the group vector
  static Vector* re_get_grpv (Thrmap& gmap) {
    Object* vobj = gmap.get ();
    if (vobj == nilp) gmap.set (vobj = new Vector);
    return dynamic_cast <Vector*> (vobj);
  }

  // mark a character set from a meta character
  static void re_mark_cset (bool* cset, const char c) {
    if (cset == nilp) return;
    for (long i = 0; i < 256; i++) cset[i] = re_check_meta (c, i);
  }

  // complement a character set
  static void re_complement_cset (bool* cset) {
    if (cset == nilp) return;
    for (long i = 0; i < 256; i++) cset[i] = (!cset[i]);
  }

  // compile from a stream and return a root node
  static s_renode* re_compile (InputString& is, const bool bflag) {
    // check for data
    if (is.iseof () == true) return nilp;

    // initialize the root and last node
    s_renode* root = nilp;
    s_renode* last = nilp;
    s_renode* node = nilp;

    // flag for balancing checks
    long bcount = bflag ? 1 : 0;
    long gcount = 0;
    // flags for character set complement
    bool fflag  = false;
    bool ccset  = false;
    // loop in the expressions
    while (is.iseof () == false) {
      char c = is.read ();
      switch (c) {
      case eofc:
	if (bcount != 0) 
	  throw Exception ("regex-error", "unbalanced [] in expression");
	if (gcount != 0) 
	  throw Exception ("regex-error", "unbalanced () in expression");
	return root;
      case ']':
	bcount--;
	if (bcount != 0) 
	  throw Exception ("regex-error", "unbalanced [] in expression");
	if (gcount != 0) 
	  throw Exception ("regex-error", "unbalanced () in expression");
	return root;	
      case '[':
	node = new s_renode (re_compile (is, true));
	re_append_node (&root, &last, node);
	break;
      case '(':
	gcount++;
	node = new s_renode (RE_GMRK);
	re_append_node (&root, &last, node);
	break;
      case ')':
	gcount--;
	node = new s_renode (RE_GSET);
	re_append_node (&root, &last, node);
	break;
      case '$':
	c = is.read ();
	if (c == eofc) throw Exception ("regex-error", "end of regex with $");
	node = new s_renode (RE_META, c);
	re_append_node (&root, &last, node);
	break;
      case '"':
	while (is.get () != '"') {
	  char c = re_escape_char (is);
	  node = new s_renode (RE_CHAR, c);
	  re_append_node (&root, &last, node);
	}
	// consume last double quote
	is.read ();
	break;
      case '<':
	node = new s_renode (RE_CSET);
	while (is.get () != '>') {
	  char c = is.read ();
	  if (fflag == false) {
	    fflag = true;
	    if (c == '^') {
	      ccset = true;
	      continue;
	    }
	  }
	  if (c == eofc) {
	    delete node;
	    throw Exception ("regex-error", "end of regex with <");
	  }
	  if (c == '$') {
	    c = is.read ();
	    if (c == eofc) {
	      delete node;
	      throw Exception ("regex-error", "end of regex with $");
	    }
	    re_mark_cset (node->p_cset, c);
	  } else {
	    node->p_cset[c] = true;
	  } 
	}
	// consume last character - eventually complement and append
	is.read ();
	if (ccset == true) re_complement_cset (node->p_cset);
	re_append_node (&root, &last, node);
	break;
      case '+':
	if (last == nilp) throw Exception ("regex-error", "invalid + start");
	last->mark (RE_PLUS);
	break;
      case '*':
	if (last == nilp) throw Exception ("regex-error", "invalid * start");
	last->mark (RE_MULT);
	break;
      case '?':
	if (last == nilp) throw Exception ("regex-error", "invalid ? start");
	last->mark (RE_ZONE);
	break;
      case '|':
	if (last == nilp) throw Exception ("regex-error", "invalid | start");
	last->mark (RE_ALTN);
	break;
      default:
	node = new s_renode (RE_CHAR, c);
	re_append_node (&root, &last, node);
	break;
      }
    }
    // check for balancing
    if (bcount != 0) 
      throw Exception ("regex-error", "unbalanced [] in expression");
    if (gcount != 0) 
      throw Exception ("regex-error", "unbalanced () in expression");
    return root;
  }

  // create a null regex

  Regex::Regex (void) {
    p_recni = new s_regex;
  }

  // create a regex from a string

  Regex::Regex (const String& re) {
    p_recni = new s_regex;
    compile (re);
  }

  // copy construct this regex
  
  Regex::Regex (const Regex& that) {
    that.rdlock ();
    d_reval = that.d_reval;
    that.p_recni->d_rcount++;
    p_recni = that.p_recni;
    that.unlock ();
  }

  // destroy this regex

  Regex::~Regex (void) {
    if (--p_recni->d_rcount == 0) {
      delete p_recni->p_root;
      delete p_recni;
    }
  }

  // return the class name

  String Regex::repr (void) const {
    return "Regex";
  }

  // return a string representation of this regex

  String Regex::tostring (void) const {
    return d_reval;
  }

  // return a literal representation of this regex

  String Regex::toliteral (void) const {
    rdlock ();
    String result = (d_reval[0] == '[') ? d_reval : (String ("[") + d_reval 
                                                           + "]");
    unlock ();
    return result;
  }

  // clone this regex

  Object* Regex::clone (void) const {
    return new Regex (*this);
  }

  // return the regex serial code

  t_byte Regex::serialid (void) const {
    return SERIAL_REGX_ID;
  }

  // serialize this regex

  void Regex::wrstream (Output& os) const {
    rdlock ();
    d_reval.wrstream (os);
    unlock ();
  }

  // deserialize this regex

  void Regex::rdstream (Input& is) {
    wrlock ();
    try {
      String sval;
      sval.rdstream (is);
      compile (sval);
      unlock ();
    } catch (...) {
      unlock ();
      throw;
    }
  }

  // compile a string as a regex

  void Regex::compile (const String& re) {
    wrlock ();
    try {
      // clean the old compiled node info
      if (--p_recni->d_rcount == 0) {
	delete p_recni->p_root;
	delete p_recni;
	p_recni = new s_regex;
      }
      // create an input stream
      InputString is (re);
      // get the root and last node
      p_recni->p_root = re_compile   (is, false);
      p_recni->p_last = re_find_last (p_recni->p_root);
      // check for consistency
      if (is.iseof () == false) {
	delete p_recni->p_root;
	p_recni->p_root = nilp;
	p_recni->p_last = nilp;
	throw Exception ("regex-error", "regex syntax error", re);
      }
      // save the string regex
      d_reval = re;
      // unlock the regex
      unlock ();
    } catch (...) {
      delete p_recni->p_root;
      p_recni->p_root = nilp;
      p_recni->p_last = nilp;
      unlock ();
      throw;
    }
  }

  // match this regex against a string

  bool Regex::operator == (const String& s) {
    // get the group vector and reset it
    Vector* grpv = re_get_grpv (d_gmap);
    if (grpv != nilp) grpv->reset ();
    // lock in read mode
    rdlock ();
    // create a regex context
    s_rectx ctx (s, 0, grpv);
    try {
      bool result = re_exec (p_recni->p_root, ctx) & ctx.iseof ();
      unlock ();
      return result;
    } catch (...) {
      if (grpv != nilp) grpv->reset ();
      unlock ();
      throw;
    }
  }

  // match this regex partially against a string

  bool Regex::operator < (const String& s) {
    // get the group vector
    Vector* grpv = re_get_grpv (d_gmap);
    // lock in read mode
    rdlock ();
    long len = s.length ();
    for (long i = 0; i < len; i++) {
      // reset the group vector
      if (grpv != nilp) grpv->reset ();
      // create a regex context
      s_rectx ctx (s, i, grpv);
      try {
	bool result = re_exec (p_recni->p_root, ctx);
	if (result == false) continue;
	unlock ();
	return result;
      } catch (...) {
	if (grpv != nilp) grpv->reset ();
	unlock ();
	throw;
      }
    }
    unlock ();
    return false;
  }

  // reverse matching the regex

  bool Regex::operator != (const String& s) {
    return (*this == s) ? false : true;
  }

  // match this regex partially and return the matching string

  String Regex::match (const String& s) {
    // get the group vector
    Vector* grpv = re_get_grpv (d_gmap);
    // lock in read mode
    rdlock ();
    long len = s.length ();
    for (long i = 0; i < len; i++) {
      // reset the group vector
      if (grpv != nilp) grpv->reset ();
      // create a regex context
      s_rectx ctx (s, i, grpv);
      try {
	if (re_exec (p_recni->p_root, ctx) == false) continue;
	String result = ctx.subs ();
	unlock ();
	return result;
      } catch (...) {
	if (grpv != nilp) grpv->reset ();
	unlock ();
	throw;
      }
    }
    unlock ();
    return "";
  }

  // replace a match with another string

  String Regex::replace (const String& s, const String& val) {
    Buffer result;
    // get the group vector
    Vector* grpv = re_get_grpv (d_gmap);
    // lock in read mode
    rdlock ();
    long len = s.length ();
    for (long i = 0; i < len; i++) {
      // reset the group vector
      if (grpv != nilp) grpv->reset ();
      // create a regex context
      s_rectx ctx (s, i, grpv);
      try {
	if (re_exec (p_recni->p_root, ctx) == false) {
	  result.add (s[i]);
	  continue;
	}
	result.add (val);
	i = ctx.d_cidx - 1;
      } catch (...) {
	if (grpv != nilp) grpv->reset ();
	unlock ();
	throw;
      }
    }
    unlock ();
    return result.tostring ();
  }

  // get the length of the group vector

  long Regex::length (void) const {
    // get the group vector
    Vector* grpv = re_get_grpv (d_gmap);
    // get its length
    return (grpv == nilp) ? 0 : grpv->length ();
  }

  // get an object at certain index

  Object* Regex::getobj (const long index) const {
    // get the group vector
    Vector* grpv = re_get_grpv (d_gmap);
    return  (grpv == nilp) ? nilp : grpv->get (index);
  }

  // get a string at certain index

  String Regex::getstr (const long index) const {
    // get the group vector
    Vector* grpv = re_get_grpv (d_gmap);
    if (grpv == nilp) 
      throw Exception ("regex-error", "out of bound group access");
    return grpv->getstring (index);
  }

  // get an integer at certain index

  t_long Regex::getint (const long index) const {
    // get the group vector
    Vector* grpv = re_get_grpv (d_gmap);
    if (grpv == nilp) 
      throw Exception ("regex-error", "out of bound group access");
    Integer val (grpv->getstring (index));
    return val.tointeger ();
  }

  // get a real at certain index

  t_real Regex::getreal (const long index) const {
    // get the group vector
    Vector* grpv = re_get_grpv (d_gmap);
    if (grpv == nilp) 
      throw Exception ("regex-error", "out of bound group access");
    Real val (grpv->getstring (index));
    return val.toreal ();
  }


  // create a new regex in a generic way

  Object* Regex::mknew (Vector* argv) {
    long argc = (argv == nilp) ? 0 : argv->length ();
    // check for 0 argument
    if (argc == 0) return new Regex;
    // check for one argument
    if (argc == 1) {
      String str = argv->getstring (0);
      return new Regex (str);
    }
    throw Exception ("argument-error", "too many argument with regex");
  }

  // operate this regex with another object

  Object* Regex::oper (Runnable* robj, t_oper type, Object* object) {
    // get a literal object
    Literal* lobj = dynamic_cast <Literal*> (object);
    switch (type) {
    case Object::EQL:
      if (lobj != nilp) return new Boolean (*this == lobj->tostring ());
      break;
    case Object::NEQ:
      if (lobj != nilp) return new Boolean (*this != lobj->tostring ());
      break;
    case Object::LTH:
      if (lobj != nilp) return new Boolean (*this < lobj->tostring ());
      break;      
    default:
      break;
    }
    throw Exception ("type-error", "invalid operand with regex",
		     Object::repr (object));
  }

  // apply this regex with a set of arguments and a quark

  Object* Regex::apply (Runnable* robj, Nameset* nset, const long quark,
			Vector* argv) {
    // get the number of arguments
    long argc = (argv == nilp) ? 0 : argv->length ();

    // dispatch 0 argument
    if (argc == 0) {
      if (quark == QUARK_LENGTH) return new Integer (length ());
    } 

    // dispatch one argument
    if (argc == 1) {
      if (quark == QUARK_GET) {
	Object* result = getobj (argv->getint (0));
	robj->post (result);
	return result;
      }
      if (quark == QUARK_MATCH) {
	String* result = new String (match (argv->getstring (0)));
	robj->post (result);
	return result;
      }
    }

    // dispatch two argument
    if (argc == 2) {
      if (quark == QUARK_REPLACE) {
	String s   = argv->getstring (0);
	String val = argv->getstring (1);
	String* result = new String (replace (s, val));
	robj->post (result);
	return result;
      }
    }    
    // call the literal method
    return Literal::apply (robj, nset, quark, argv);
  }
}
