/* ,file-id archive://[lord]/423/rx/dfa.c/1998-05-18
 */
/*	Copyright (C) 1997 Tom Lord
 * 
 * This program is provided to you under the terms of the Liberty Software
 * License.  You are NOT permitted to redistribute, modify, or use it
 * except in very specific ways described by that license.
 *
 * This software comes with NO WARRANTY.
 * 
 * You should have received a copy of the Liberty Software License
 * along with this software; see the file =LICENSE.  If not, write to
 * the Tom Lord, 1810 Francisco St. #2, Berkeley CA, 94703, USA.  
 */





#include "vu/bitset.h"
#include "vu/dstr.h"
#include "vu/hashtab.h"
#include "rexp.h"
#include "nfa.h"
#include "dfa.h"
#include "super.h"



/* To really understand the code in this file, it is necessary
 * to understand the code in  "super.h" and "super.c", upon which
 * this module heavily depends.
 */



struct rx_dfa *
rx_dfa ()
{
  struct rx_dfa * a;
  a = (struct rx_dfa *)xmalloc (sizeof (*a));
  memset0 (a, sizeof (*a));
  return a;
}

void
rx_free_dfa (struct rx_dfa * dfa)
{
  if (dfa->rx)
    rx_free_dfa_storage (dfa);
  xfree (dfa);
}


/* Initialize a DFA from an NFA.
 */

void
rx_init_dfa_from_rx (struct rx_dfa * frame, struct rx_nfa * rx)
{
  frame->rx = rx;
  frame->state = 0;
  frame->final_tag = 0;
}


/* Initialize a DFA by copying another DFA.
 */
void
rx_init_dfa_from_dfa (struct rx_dfa * dest, struct rx_dfa * src)
{
  dest->rx = src->rx;
  dest->state = src->state;
  dest->final_tag = src->final_tag;
  if (dest->state)
    rx_lock_superstate (dest->rx, dest->state);
}

void
rx_free_dfa_storage (struct rx_dfa * frame)
{
  if (frame->state)
    {
      rx_unlock_superstate (frame->rx, frame->state);
      frame->state = 0;
      frame->final_tag = 0;
    }
}


/* rx_dfa_goto_start_superstate
 *
 * Return (or initialize) a DFA to its start state.
 */

void
rx_dfa_goto_start_superstate (struct rx_dfa * frame)
{
  struct rx_superset * start_contents;
  struct rx_nfa_state_set * start_nfa_set;

  if (frame->rx->start_set)
    start_contents = frame->rx->start_set;
  else
    {
      start_nfa_set = rx_state_closure (frame->rx, frame->rx->start_nfa_state);

      start_contents = rx_superstate_eclosure_union (frame->rx,
						     rx_superset_cons (frame->rx, 0, 0),
						     start_nfa_set);
      if (!start_contents)
	{
	  panic ("unable to build start superstate in rx_dfa_goto_start_superstate");
	  return;
	}
	    
      start_contents->starts_for = frame->rx;
      frame->rx->start_set = start_contents;
    }

  if (   start_contents->superstate
      && (start_contents->superstate->rx_id == frame->rx->rx_id))
    {
      if (frame->state)
	{
	  rx_unlock_superstate (frame->rx, frame->state);
	}
      frame->state = start_contents->superstate;
      frame->final_tag = frame->state->members->state_label;
      /* The cached superstate may be in a semifree state.
       * We need to lock it and preserve the invariant
       * that a locked superstate is never semifree.
       * So refresh it.
       */
      rx_refresh_this_superstate (frame->state);
      rx_lock_superstate (frame->rx, frame->state);
      return;
    }
  else
    {
      struct rx_superstate * state;

      rx_protect_superset (frame->rx, start_contents);
      state = rx_superstate (frame->rx, start_contents);
      rx_release_superset (frame->rx, start_contents);
      if (!state)
	{
	  panic ("unable to construct superstate in rx_dfa_goto_start_superstate");
	  return;
	}
      if (frame->state)
	{
	  rx_unlock_superstate (frame->rx, frame->state);
	}
      frame->state = state;
      frame->final_tag = frame->state->members->state_label;
      rx_lock_superstate (frame->rx, frame->state);
      return;
    }
}



/* rx_dfa_fit_p
 * 
 * Compare a DFA to string: is the entire string a member
 * of the language defined by the DFA?
 */
int
rx_dfa_fit_p (struct rx_dfa * frame,
	  unsigned const char * burst,
	  int len)
{
  struct rx_inx * inx_table;
  struct rx_inx * inx;

  if (!len)
    {
      frame->final_tag = frame->state->members->state_label;
      return (frame->state->members->state_label
	      ? 1
	      : 0);
    }

  inx_table = frame->state->transitions;
  rx_unlock_superstate (frame->rx, frame->state);

  while (len--)
    {
      struct rx_inx * next_table;

      inx = inx_table + *burst;
      next_table = (struct rx_inx *)inx->data;
      while (!next_table)
	{
	  struct rx_superstate * state;
	  state = ((struct rx_superstate *)
		   ((char *)inx_table
		    - ((unsigned long)
		       ((struct rx_superstate *)0)->transitions)));

	  switch ((long)inx->inx)
	    {
	    case rx_backtrack:
	      /* RX_BACKTRACK means that we've reached the empty
	       * superstate, indicating that match can't succeed
	       * from this point.
	       */
	      frame->state = 0;
	      frame->final_tag = 0;
	      return 0;
	    
	    case rx_cache_miss:
	      /* Because the superstate NFA is lazily constructed,
	       * and in fact may erode from underneath us, we sometimes
	       * have to construct the next instruction from the hard way.
	       * This invokes one step in the lazy-conversion.
	       */
	      inx = 
		rx_handle_cache_miss
		  (frame->rx, state, *burst, inx->data_2);

	      next_table = (struct rx_inx *)inx->data;
	      continue;
		
	      /* No other instructions are possible here.
	       */
	    default:
	      return panic ("unrecognized instruction in rx_dfa_fit_p");
	  }
	}
      inx_table = next_table;
      ++burst;
    }

  if (inx->data_2)		/* indicates a final superstate */
    {
      frame->final_tag = (int)inx->data_2;
      frame->state = ((struct rx_superstate *)
		      ((char *)inx_table
		       - ((unsigned long)
			  ((struct rx_superstate *)0)->transitions)));
      rx_lock_superstate (frame->rx, frame->state);
      return 1;
    }
  frame->state = ((struct rx_superstate *)
		  ((char *)inx_table
		   - ((unsigned long)
		      ((struct rx_superstate *)0)->transitions)));
  frame->final_tag = frame->state->members->state_label;
  rx_lock_superstate (frame->rx, frame->state);
  return 0;
}



/* rx_dfa_advance
 *
 * Advance a DFA, reading characters from a string.
 * Stop at the end of the string, returning 1
 * or when a character is encountered for which no
 * transition is defined, returning 0.
 */
int
rx_dfa_advance (struct rx_dfa * frame,
	    unsigned const char * burst,
	    int len)
{
  struct rx_inx * inx_table;

  if (!len)
    return 1;

  inx_table = frame->state->transitions;
  rx_unlock_superstate (frame->rx, frame->state);

  while (len--)
    {
      struct rx_inx * inx;
      struct rx_inx * next_table;

      inx = inx_table + *burst;
      next_table = (struct rx_inx *)inx->data;
      while (!next_table)
	{
	  struct rx_superstate * state;
	  state = ((struct rx_superstate *)
		   ((char *)inx_table
		    - ((unsigned long)
		       ((struct rx_superstate *)0)->transitions)));

	  switch ((long)inx->inx)
	    {
	    case rx_backtrack:
	      /* RX_BACKTRACK means that we've reached the empty
	       * superstate, indicating that match can't succeed
	       * from this point.
	       */
	      frame->state = 0;
	      frame->final_tag = 0;
	      return 0;
	    
	    case rx_cache_miss:
	      /* Because the superstate NFA is lazily constructed,
	       * and in fact may erode from underneath us, we sometimes
	       * have to construct the next instruction from the hard way.
	       * This invokes one step in the lazy-conversion.
	       */
	      inx = 
		rx_handle_cache_miss
		  (frame->rx, state, *burst, inx->data_2);

	      next_table = (struct rx_inx *)inx->data;
	      continue;
		

	      /* No other instructions are legal here.
	       */
	    default:
	      return panic ("unrecognized instruction in rx_dfa_advance");
	  }
	}
      inx_table = next_table;
      ++burst;
    }
  
  frame->state = ((struct rx_superstate *)
		  ((char *)inx_table
		   - ((unsigned long)
		      ((struct rx_superstate *)0)->transitions)));
  frame->final_tag = frame->state->members->state_label;
  rx_lock_superstate (frame->rx, frame->state);
  return 1;
}


/* rx_dfa_advance_to_final
 *
 * Advance a DFA, reading characters from a string.
 *
 * Stop at the end of the string, or when a superstate
 * is encountered with a non-0 label.  Return the number
 * of unread characters from the string.  Set the "final_tag"
 * field of "frame" to the state_label of the last superstate
 * encountered.
 *
 */
int
rx_dfa_advance_to_final (struct rx_dfa * frame,
		     unsigned const char * burst,
		     int len)
{
  int initial_len;
  struct rx_inx * inx_table;
  struct rx_superstate * this_state;

  if (!len)
    {
      frame->final_tag = frame->state->members->state_label;
      return 0;
    }

  inx_table = frame->state->transitions;

  initial_len = len;

  this_state = frame->state;

  while (len--)
    {
      struct rx_inx * inx;
      struct rx_inx * next_table;

      /* this_state holds the state for the position we're
       * leaving.  this_state is locked. 
       */
      inx = inx_table + *burst;
      next_table = (struct rx_inx *)inx->data;

      while (!next_table)
	{
	  struct rx_superstate * state;

	  state = ((struct rx_superstate *)
		   ((char *)inx_table
		    - ((unsigned long)
		       ((struct rx_superstate *)0)->transitions)));
	  
	  switch ((long)inx->inx)
	    {
	    case rx_backtrack:
	      /* RX_BACKTRACK means that we've reached the empty
	       * superstate, indicating that match can't succeed
	       * from this point.
	       *
	       * Return to the state for the position prior to what
	       * we failed at, and return that position.
	       */
	      frame->state = this_state;
	      frame->final_tag = this_state->members->state_label;
	      return (initial_len - len) - 1;
	    
	    case rx_cache_miss:
	      /* Because the superstate NFA is lazily constructed,
	       * and in fact may erode from underneath us, we sometimes
	       * have to construct the next instruction from the hard way.
	       * This invokes one step in the lazy-conversion.
	       */
	      inx = rx_handle_cache_miss
		(frame->rx, state, *burst, inx->data_2);

	      next_table = (struct rx_inx *)inx->data;
	      continue;
		

	      /* No other instructions are legal here.
	       */
	    default:
	      return panic ("unrecognized instruction in rx_dfa_advance_to_final");
	  }
	}

      /* Release the superstate for the preceeding position: */
      rx_unlock_superstate (frame->rx, this_state);

      /* Compute the superstate for the new position: */
      inx_table = next_table;
      this_state = ((struct rx_superstate *)
		    ((char *)inx_table
		     - ((unsigned long)
			((struct rx_superstate *)0)->transitions)));
      
      /* Lock it (see top-of-loop invariant): */
      rx_lock_superstate (frame->rx, this_state);
      
      /* Check to see if we should stop: */
      if (this_state->members->state_label)
	{
	  frame->state = this_state;
	  frame->final_tag = this_state->members->state_label;
	  return (initial_len - len);
	}
      
      ++burst;
    }

  /* Consumed all of the characters. */
  frame->state = this_state;
  frame->final_tag = this_state->members->state_label;

  /* state already locked (see top-of-loop invariant) */
  return initial_len;
}

