package xm::pfe::flattenwordrefs;
use strict;
use xm::o;
use xm::sub;
use xm::pfe::cutfcodedocs;

sub DESC
{"
  look for a wordset-table, kill the C-syntax around it, and
  flatten the information. That is, prepend the latest _INTO-spec
  onto a word-export to from the XREF we can later dbjoin, and
  add another id for the wordset. The whole thing shall look a
  bit more like text than C after it is run through.
"}

sub ARGS { return xm::o::args_stdin(@_,DESC); }

sub coded
{
    return "ordinary primitive"    if $_[0] eq "P4_FXCO" or "CO" eq $_[0];
    return "immediate primitive"   if $_[0] eq "P4_IXCO" or "CI" eq $_[0];
    return "compiling primitive"   if $_[0] eq "P4_SXCO" or "CS" eq $_[0];
    return "constructor primitive" if $_[0] eq "P4_XXCO" or "CX" eq $_[0];
    return "ordinary variable"     if $_[0] eq "P4_OVAR" or "OV" eq $_[0];
    return "immediate variable"    if $_[0] eq "P4_IVAR" or "IV" eq $_[0];
    return "ordinary valuevar"     if $_[0] eq "P4_OVAL" or "OL" eq $_[0];
    return "immediate valuevar"    if $_[0] eq "P4_IVAL" or "IL" eq $_[0];
    return "ordinary constant"     if $_[0] eq "P4_OCON" or "OC" eq $_[0];
    return "immediate constant"    if $_[0] eq "P4_ICON" or "IC" eq $_[0];
    return "threadstate variable"  if $_[0] eq "P4_DVAR" or "DV" eq $_[0];
    return "threadstate constant"  if $_[0] eq "P4_DCON" or "DC" eq $_[0];
    return "ordinary offsetval"    if $_[0] eq "P4_OFFS";
    return "ordinary vocabulary"   if $_[0] eq "P4_OVOC";
    return "immediate vocabulary"  if $_[0] eq "P4_IVOC";
    return "loading slot id"       if $_[0] eq "P4_SLOT";
    return "loading slot size"     if $_[0] eq "P4_SSIZ";
    return "loading into"          if $_[0] eq "P4_INTO";
    return "loading wordset"       if $_[0] eq "P4_LOAD";
    return "loader code $_[0]";
}


sub per_ITEMWORDREF
{
    my ($in0,$in,$in2,$name,$intoref) = @_; 

    my $typewordref = ""; my $typeword = "";
    $in =~ s{ (<TYPEWORDREF\b[^<>]*>) ((?:.(?!</TYPEWORDREF\b))*.) 
		  (</TYPEWORDREF\b[^<>]*>) }
    { $typewordref .= $1.$2.$3; $typeword = $2; "<:>" }gsex;

    my $cstrwordref = ""; my $cstrword = "";
    $in =~ s{ (<CSTRWORDREF\b[^<>]*>) ((?:.(?!</CSTRWORDREF\b))*.) 
		  (</CSTRWORDREF\b[^<>]*>) }
    { $cstrwordref .= $1.$2.$3; $cstrword = $2; "" }gsex;

    my $linkwordref = ""; 
    $in =~ s{ (<LINKWORDREF\b[^<>]*>) ((?:.(?!</LINKWORDREF\b))*.) 
		  (</LINKWORDREF\b[^<>]*>) }
    { $linkwordref .= $1.$2.$3; "" }gsex;

    my $attrwordref = ""; 
    $in =~ s{ ^ ((?:.(?!<:>))*.) <:> } 
    { $attrwordref .= $1; "" }gsex;

    $cstrword = xm::sub::C($cstrword);

    $typeword = coded($typeword);
    my $spc = " ";
    my $out = $in0.$attrwordref
	."<XREFWORDREF>".$$intoref.$cstrword."</XREFWORDREF>".$spc
	    ."<MAKEWORDREF>".$spc
		."<FROMWORDSET>".$name."</FROMWORDSET>".$spc
		    .$cstrwordref.$spc.$linkwordref.$spc
			."<TYPEWORDCODED>".$typeword."</TYPEWORDCODED>".$spc
			    .$typewordref."</MAKEWORDREF>".$in2;
    
    # print STDERR "[",length $out,"]";
    return $out if (length $typeword == 2);
    
    if ($typeword =~ /loading/)
    {
	$$intoref = $cstrword." ";
	$in0 =~ s{(<) \w+} {$1."CDOCWORDREF"}sex;
	$in2 =~ s{(</)\w+} {$1."CDOCWORDREF"}sex;
	return $in0.$cstrword.$in2;
    }
	
    return $out;
}

sub per_LISTWORDREF
{
    my ($in,$name) = @_; 

    # embedded cdocs are kept, but renamed to CDOCWORDREF

    $in =~ s{ <CDOC\b([^<>]*)> ((?:.(?!</CDOC\b))*.) 
		  </CDOC\b([^<>]*)> }
    { "<CDOCWORDREF".$1.">".xm::pfe::cutfcodedocs::format($2)
	  ."</CDOCWORDREF".$3.">" }gsex;
    
    # clean ITEMWORDREFs
    my $into = "";

    $in =~ s{ (<ITEMWORDREF\b[^<>]*>) ((?:.(?!</ITEMWORDREF\b))*.) 
		  (</ITEMWORDREF\b[^<>]*>) }
    { per_ITEMWORDREF($1,$2,$3,$name,\$into) }gsex;

    print STDERR $name," ";
    # print STDERR " <wordset:",$name," ", length $in, ">\n";
    return $in;
}

sub per_ITEMWORDSET
{
    my $in = shift;
    my $out = ""; 

    my $cdocwordset = "";
    $in =~ s{ (<CDOCWORDSET\b)([^<>]*>) ((?:.(?!</CDOCWORDSET\b))*.) 
		  (</CDOCWORDSET\b)([^<>]*>) }
    { $cdocwordset .= $1.$2.$3.$4.$5; "" }gsex;
    my $namewordset = ""; my $wordset;
    $in =~ s{ (<NAMEWORDSET\b[^<>]*>) ((?:.(?!</NAMEWORDSET\b))*.) 
		  (</NAMEWORDSET\b[^<>]*>) }
    { $namewordset .= $1.$2.$3; $wordset = $2; "" }gsex;
    my $cstrwordcnt = "";
    $in =~ s{ (<CSTRWORDCNT\b)([^<>]*>) ((?:.(?!</CSTRWORDCNT\b))*.) 
		  (</CSTRWORDCNT\b)([^<>]*>) } 
    { $cstrwordcnt .= "<TITLEWORDSET".$2.$3."</TITLEWORDSET".$5; "" }gsex;
    $in =~ s{ (<LISTWORDREF\b[^<>]*>) ((?:.(?!</LISTWORDREF\b))*.) 
		  (</LISTWORDREF\b[^<>]*>) } 
    {  $out .= $cstrwordcnt.$namewordset.$cdocwordset
	   .$1.per_LISTWORDREF($2,$wordset).$3; "" }gsex;

    return $out;
}

sub DO
{
    my $in = shift;
    my $out = "";
    my $comment;

    print STDERR "<wordsets> ";
    $in =~ s{ (<ITEMWORDSET\b[^<>]*>)  
		  ((?:.(?!</?ITEMWORDSET\b))*.) (</ITEMWORDSET\b[^<>]*>)
		  }
    { $1.per_ITEMWORDSET($2).$3 }gsex;
    print STDERR "</wordsets>\n";

    return $in;
}

1;

