/*
 *  SPL - The SPL Programming Language
 *  Copyright (C) 2004, 2005  Clifford Wolf <clifford@clifford.at>
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 *  optimizer.c: Peephole optimizer for the SPL assembler code
 */

#include <stdlib.h>
#include <assert.h>

#include "spl.h"
#include "compat.h"

int spl_dump_translatable_strings = 0;

struct data_entry {
	int data_len;
	char *data;
	char *newpos;
	struct data_entry *next;
};

struct insn {
	struct insn *next, *last, *argp;
	struct data_entry *de;
	int size, op, arg, addr;
	int istarget, reachable;
};

struct insn_list {
	struct insn *first;
	struct insn *last;
	int text_size;
};

int spl_optype(int opcode)
{
	for (int i=0; spl_ops[i].name; i++)
		if (opcode == spl_ops[i].op) return spl_ops[i].optype;
	return 0;
}

static struct insn_list *asm_to_insn(struct spl_asm *as, int growinsn)
{
	struct insn_list *r = malloc(sizeof(struct insn_list));
	struct insn **addrmap = calloc(as->text_size, sizeof(struct insn *));
	struct insn *l = 0, *ll = 0, *t = 0;

	for (int i=0; i<as->text_size; i+=t->size)
	{
		t = calloc(1, sizeof(struct insn));

		t->last = l;
		if (l) l->next = t;
		else r->first = t;

		addrmap[i] = t;
		t->op = as->text[i];
		t->addr = i;

		if (as->text[i] < 0x60) {
			t->size = 5 - (t->op & 3);
			t->op = t->op & ~3;
			t->arg = spl_bytes_to_int(t->size - 1, as->text+i+1);
			if (growinsn) t->size = 5;
		} else {
			t->size = 1;
			t->arg = 0;
		}
		l = t;
	}
	r->last = t;

	l=0;
	t=r->first;
	while (t) {
		if ( t->op < 0x20 ) {
			t->argp = addrmap[t->addr+t->arg+t->size];
			t->argp->istarget = 1;
		}
		if (l && spl_optype(l->op) == 'B')
			t->istarget = 1;
		if (ll && spl_optype(ll->op) == 'C')
			t->istarget = 1;
		ll = l; l = t;
		t = t->next;
	}

	free(addrmap);
	r->text_size = as->text_size;
	return r;
}

static void recalc_addr(struct insn_list *il)
{
	struct insn *t = il->first;
	int addr = 0;

	while (t) {
		t->addr = addr;
		addr += t->size;
		t = t->next;
	}
	il->text_size = addr;

	for (t = il->first; t; t = t->next)
		if ( t->argp )
			t->arg = t->argp->addr - (t->addr+t->size);
}

static void recalc_size(struct insn_list *il)
{
	struct insn *t = il->first;

	while (t) {
		if (t->op < 0x60) {
			t->size = 5;
			if (t->op < 0x20) {
				if ( t->arg == (int16_t)t->arg ) t->size = 3;
				if ( t->arg == (int8_t)t->arg  ) t->size = 2;
			} else {
				// add 30 bytes headroom for stupid peoples
				// post-optimization epilogue..
				int arg = (il->text_size - t->addr) + t->arg + 30;
				if ( arg == (int16_t)arg ) t->size = 3;
				if ( arg == (int8_t)arg  ) t->size = 2;
			}
		}
		t = t->next;
	}
}

static void insn_to_asm(struct spl_asm *as, struct insn_list *il)
{
	struct insn *t = il->first, *o;
	int new_text_size = 0;

	while (t) {
		new_text_size += t->size;
		t = t->next;
	}
	t = il->first;

	as->text_size = 0;
	as->text = realloc(as->text, new_text_size);
	free(il);

	while (t) {
		switch ( t->size ) {
			case 1:
				as->text[as->text_size] = t->op;
				break;
			case 2:
				as->text[as->text_size] = t->op | 3;
				spl_int_to_bytes(t->arg, 1, as->text+as->text_size+1);
				break;
			case 3:
				as->text[as->text_size] = t->op | 2;
				spl_int_to_bytes(t->arg, 2, as->text+as->text_size+1);
				break;
			case 4:
				as->text[as->text_size] = t->op | 1;
				spl_int_to_bytes(t->arg, 3, as->text+as->text_size+1);
				break;
			case 5:
				as->text[as->text_size] = t->op;
				spl_int_to_bytes(t->arg, 4, as->text+as->text_size+1);
				break;
		}
		as->text_size += t->size;

		t = (o=t)->next;
		free(o);
	}
}

static void optimize_remove_ops(struct insn_list *il, int op)
{
	for (struct insn *t = il->first, *l = 0; t; t = (l=t)->next)
		if ((!l || (l->op != SPL_OP_IF && l->op != SPL_OP_UNLESS)) && t->op == op) t->size = 0;
}

static void optimize_debug(struct insn_list *il)
{
	struct insn *t, *last_debug = 0;

	for (t = il->first; t; t = t->next)
		if (t->op == SPL_OP_DBGSYM) {
			last_debug = t;
			t = t->next;
			break;
		}

	for (; t; t = t->next) {
		if (t->op != SPL_OP_DBGSYM && t->istarget) {
			struct insn *i = calloc(1, sizeof(struct insn));

			i->last = t;
			i->next = t->next;
			t->next = i;

			if (i->next)
				i->next->last = i;
			else
				il->last = i;

			i->size = t->size;
			i->op   = t->op;
			i->arg  = t->arg;
			i->argp = t->argp;
			i->addr = t->addr;

			t->size = last_debug->size;
			t->op   = last_debug->op;
			t->arg  = last_debug->arg;
			t->argp = last_debug->argp;
			t->addr = last_debug->addr;

			i->istarget = 0;
		}
		if (t->op == SPL_OP_DBGSYM) {
			if (!t->istarget && t->arg == last_debug->arg) {
				t->op = SPL_OP_NOP;
				t->size = 0;
			} else
				last_debug = t;
		}
	}
}

static int optimize_pattern(struct insn_list *il)
{
	struct insn *t;
	int didit = 0;

	for (t=il->first; t; t = t->next)
	{
		if (!t->next || t->next->istarget) continue;

		if (t->op == SPL_OP_PUSHC && t->next->op == SPL_OP_PUSHAV) {
			t->op = SPL_OP_PUSHAC;
			t->next->op = SPL_OP_NOP;
			t->next->size = 0;
			didit = 1;
		}
		if (t->op == SPL_OP_PUSHC && t->next->op == SPL_OP_GETVAL) {
			t->op = SPL_OP_PUSH;
			t->next->op = SPL_OP_NOP;
			t->next->size = 0;
			didit = 1;
		}
		if (t->op == SPL_OP_PUSHC && t->next->op == SPL_OP_GETVAL) {
			t->op = SPL_OP_PUSH;
			t->next->op = SPL_OP_NOP;
			t->next->size = 0;
			didit = 1;
		}
		if (t->op == SPL_OP_PUSH && t->next->op == SPL_OP_PUSHAV) {
			t->op = SPL_OP_PUSHA;
			t->next->op = SPL_OP_NOP;
			t->next->size = 0;
			didit = 1;
		}
		if (t->op == SPL_OP_POPIC && t->next->op == SPL_OP_DROP) {
			t->op = SPL_OP_POPI;
			t->next->op = SPL_OP_NOP;
			t->next->size = 0;
			didit = 1;
		}
		if (t->op == SPL_OP_POPLC && t->next->op == SPL_OP_DROP) {
			t->op = SPL_OP_POPL;
			t->next->op = SPL_OP_NOP;
			t->next->size = 0;
			didit = 1;
		}
		if (t->op == SPL_OP_POPSC && t->next->op == SPL_OP_DROP) {
			t->op = SPL_OP_POPS;
			t->next->op = SPL_OP_NOP;
			t->next->size = 0;
			didit = 1;
		}
		if (t->op == SPL_OP_POPUC && t->next->op == SPL_OP_DROP) {
			t->op = SPL_OP_POPU;
			t->next->op = SPL_OP_NOP;
			t->next->size = 0;
			didit = 1;
		}
		if (t->op == SPL_OP_LNOT && t->next->op == SPL_OP_LNOT) {
			t->op = SPL_OP_NOP;
			t->size = 0;
			t->next->op = SPL_OP_NOP;
			t->next->size = 0;
			didit = 1;
		}
		if (t->op == SPL_OP_NEG && t->next->op == SPL_OP_NEG) {
			t->op = SPL_OP_NOP;
			t->size = 0;
			t->next->op = SPL_OP_NOP;
			t->next->size = 0;
			didit = 1;
		}
		if (t->op == SPL_OP_LNOT && t->next->op == SPL_OP_IF) {
			t->op = SPL_OP_NOP; t->size = 0;
			t->next->op = SPL_OP_UNLESS;
			didit = 1;
		}
		if (t->op == SPL_OP_LNOT && t->next->op == SPL_OP_UNLESS) {
			t->op = SPL_OP_NOP; t->size = 0;
			t->next->op = SPL_OP_IF;
			didit = 1;
		}
	}

	return didit;
}

static int optimize_statics_analyze(struct insn *t,
		struct spl_asm *as, int operands, int reduce)
{
	struct spl_asm *tas = spl_asm_create();

	for (int i=0; i<operands; i++) {
#if 0
		printf("> [%s] %s\n", spl_asm_op2txt(t->op), as->data + t->arg);
#endif
		spl_asm_add(tas, t->op, as->data + t->arg);
		if (reduce) {
			t->op = SPL_OP_NOP;
			t->arg = 0;
			t->size = 0;
			t = t->next;
		}
	}

	if (reduce)
		spl_asm_add(tas, t->op, 0);
	spl_asm_add(tas, SPL_OP_HALT, 0);

	struct spl_vm *vm = spl_vm_create();
	struct spl_task *task = spl_task_create(vm, 0);
	spl_task_setcode(task, spl_asm_dump(tas));
	spl_asm_destroy(tas);

	while (task->code) spl_exec(task);

	struct spl_node *value_node = spl_pop(task);
	char *value = spl_get_string(value_node);
	int returnval = spl_get_int(value_node);

	if (reduce) {
#if 0
		printf("< [%s] %s\n", spl_asm_op2txt(t->op), value);
#endif
		if (!strcmp(value, "0")) {
			t->op = SPL_OP_ZERO;
			t->arg = 0;
			t->size = 1;
		} else
		if (!strcmp(value, "1")) {
			t->op = SPL_OP_ONE;
			t->arg = 0;
			t->size = 1;
		} else {
			t->op = SPL_OP_PUSHC;
			t->arg = spl_asm_newdata(as, value);
			t->size = 5;
		}
	}

	spl_put(vm, value_node);
	spl_vm_destroy(vm);
	return returnval;
}

static int optimize_statics(struct insn_list *il, struct spl_asm *as)
{
	struct insn *t;
	int didit = 0;

	for (t=il->first; t; t = t->next) {
		if ( t->next && !t->next->istarget && spl_optype(t->op) == 'S') {
			char next_op_type = spl_optype(t->next->op);
			switch (next_op_type)
			{
			case '1':
				optimize_statics_analyze(t, as, 1, 1);
				didit = 1;
				break;
			case 'C':
				if ( t->next->next ) {
					int status = optimize_statics_analyze(t, as, 1, 0);
#if 0
		printf("* [%s] %d\n", spl_asm_op2txt(t->next->op), status);
#endif
					if ((t->next->op == SPL_OP_IF) == (status == 0)) {
						t->next->next->op   = SPL_OP_NOP;
						t->next->next->arg  = 0;
						t->next->next->size = 0;
					}
					t->op   = t->next->op   = SPL_OP_NOP;
					t->arg  = t->next->arg  = 0;
					t->size = t->next->size = 0;
					didit = 1;
				}
				break;
			case 'S':
				if ( t->next->next && !t->next->next->istarget &&
				     spl_optype(t->next->next->op) == '2') {
					optimize_statics_analyze(t, as, 2, 1);
					didit = 1;
				}
				break;
			default:
				if ( t->next->op == SPL_OP_DROP ) {
					t->op   = t->next->op   = SPL_OP_NOP;
					t->arg  = t->next->arg  = 0;
					t->size = t->next->size = 0;
					didit = 1;
				}
			}
		}
	}

	return didit;
}

static int data_list_compar(const void *_a, const void *_b)
{
	struct data_entry * const *a = _a, * const *b = _b;
	if ((*a)->data_len < (*b)->data_len) return +1;
	if ((*a)->data_len > (*b)->data_len) return -1;
	int rc = strcmp((*a)->data, (*b)->data);
	assert(rc != 0);
	return rc;
}

static int optimize_datasect(struct insn_list *il, struct spl_asm *as)
{
	struct data_entry *data_list = 0;
	int data_list_size = 0;
	struct insn *t, *u;
	int didit = 0;

	for (t=il->first; t; t = t->next) {
		if (t->op >= 0x20 && t->op < 0x60 && !t->de) {
			struct data_entry *nde = malloc(sizeof(struct data_entry));
			nde->data = strdup(as->data + t->arg);
			nde->data_len = strlen(nde->data);
			nde->next = data_list;
			data_list = t->de = nde;
			data_list_size++;

			for (u=t->next; u; u = u->next) {
				if (u->op >= 0x20 && u->op < 0x60 && !u->de &&
				    !strcmp(nde->data, as->data + u->arg))
						u->de = nde;
			}
		}
	}

	struct data_entry **data_list_map = malloc(sizeof(struct data_entry *)*data_list_size);
	struct data_entry *de = data_list;

	for (int i=0; i<data_list_size; i++) {
		data_list_map[i] = de;
		de = de->next;
	}

	qsort(data_list_map, data_list_size,
			sizeof(struct data_entry *),
			&data_list_compar);

	char *newdata_buffer = malloc(as->data_size);
	char *newdata = newdata_buffer+as->data_size;
	int newdata_size = 0;

	for (int i=0; i<data_list_size; i++) {
		de = data_list_map[i];

		de->newpos = my_memmem(newdata, newdata_size, de->data, de->data_len+1);
		if (!de->newpos) {
			newdata_size += de->data_len+1;
			newdata -= de->data_len+1;
			memcpy(newdata, de->data, de->data_len+1);
			de->newpos = newdata;
		}
	}

	if (newdata_size != as->data_size ||
	    memcmp(newdata, as->data, newdata_size))
	{
#if 0
		for (int i=0; i<as->data_size-1; i++)
			if (as->data[i] == 0) as->data[i] = '|';
		printf("<< %s\n", as->data);
#endif

		free(as->data);
		as->data = malloc(newdata_size);
		as->data_size = as->data_size_roof = newdata_size;
		memcpy(as->data, newdata, newdata_size);

		for (t=il->first; t; t = t->next) {
			if (t->op >= 0x20 && t->op < 0x60)
				t->arg = t->de->newpos - newdata;
		}

#if 0
		for (int i=0; i<newdata_size-1; i++)
			if (newdata[i] == 0) newdata[i] = '|';
		printf(">> %s\n", newdata);
#endif

		didit = 1;
	}

	free(newdata_buffer);
	for (int i=0; i<data_list_size; i++) {
		free(data_list_map[i]->data);
		free(data_list_map[i]);
	}
	free(data_list_map);

	return didit;
}

static int optimize_bogusblock_analyze(struct insn **t)
{
	int didit = 0;
	int found_local_var = 0;
	struct insn *begin = *t;

	*t=(*t)->next;
	while (*t && (*t)->op != SPL_OP_END) {
		switch ((*t)->op)
		{
		case SPL_OP_BEGIN:
			didit += optimize_bogusblock_analyze(t);
			break;
		case SPL_OP_CATCH:
		case SPL_OP_POPA:
		case SPL_OP_REMATCH:
		case SPL_OP_RESUBST:
		case SPL_OP_OBJECT:
		case SPL_OP_IMPORT:
		case SPL_OP_TRY:
		case SPL_OP_POPL:
		case SPL_OP_POPLC:
		case SPL_OP_POPS:
		case SPL_OP_POPSC:
		case SPL_OP_APOPA:
			found_local_var = 1;
		}
		if (*t) *t=(*t)->next;
	}

	if (!found_local_var && *t) {
		(*t)->op   = begin->op   = SPL_OP_NOP;
		(*t)->arg  = begin->arg  = 0;
		(*t)->size = begin->size = 0;
		didit = 1;
	}

	return didit;
}

static int optimize_bogusblock(struct insn_list *il)
{
	int didit = 0;
	struct insn *t = il->first;

	// This optimization may break hand-written spl assembler
	// programs. But the compiler generates only code that is
	// compatible with this here...

	while (t) {
		if (t->op == SPL_OP_BEGIN)
			didit += optimize_bogusblock_analyze(&t);
		if (t)
			t = t->next;
	}

	return didit > 0;
}

static void optimize_unreachable_analyze(struct insn *t)
{
	while (t && !t->reachable) {
		t->reachable = 1;

		switch (spl_optype(t->op))
		{
		case 'B':
			optimize_unreachable_analyze(t->next);
			t = t->argp;
			break;
		case 'J':
			t = t->argp;
			break;
		case 'C':
			if (t->next)
				optimize_unreachable_analyze(t->next->next);
			t = t->next;
			break;
		case 'E':
			t = 0;
			break;
		default:
			t = t->next;
		}
	}
}

static int optimize_unreachable(struct insn_list *il)
{
	int didit = 0;

	optimize_unreachable_analyze(il->first);

	for (struct insn *t=il->first; t; t = t->next) {
		if (!t->reachable && t->op != SPL_OP_BEGIN && t->op != SPL_OP_END) {
			t->op = SPL_OP_NOP;
			t->size = t->arg = 0;
			didit = 1;
		}
	}

	return didit;
}

static int optimize_oozechainops(struct insn_list *il)
{
	int didit = 0;

	for (struct insn *t=il->first; t && t->next &&
			t->next->next && t->next->next->next; t = t->next)
	{
		struct insn *t1 = t;
		struct insn *t2 = t->next;
		struct insn *t3 = t->next->next;
		struct insn *t4 = t->next->next->next;

		if (t2->istarget) continue;
		if (t3->istarget) continue;

		if (t2->op != t4->op) continue;

		if (spl_optype(t1->op) != 'S') continue;
		if (spl_optype(t3->op) != 'S') continue;

		if (t2->op == SPL_OP_IADD ||
		    t2->op == SPL_OP_IMUL ||
		    t2->op == SPL_OP_FADD ||
		    t2->op == SPL_OP_FMUL ||
		    t2->op == SPL_OP_CAT  ||
		    t2->op == SPL_OP_DOTCAT)
		{
			struct insn tmp;

			tmp.op   = t2->op;
			tmp.arg  = t2->arg;
			tmp.size = t2->size;

			t2->op   = t3->op;
			t2->arg  = t3->arg;
			t2->size = t3->size;

			t3->op   = tmp.op;
			t3->arg  = tmp.arg;
			t3->size = tmp.size;

			didit = 1;
		}
	}
	return didit;
}

static int optimize_complexnop(struct insn_list *il)
{
	int didit = 0;

	for (struct insn *t=il->first, *l = 0; t; t = (l=t)->next) {
		if ((t->op == SPL_OP_IF || t->op == SPL_OP_UNLESS) &&
		     t->next && t->next->op == SPL_OP_NOP) {
			t->op = SPL_OP_NOP;
			t->size = t->arg = 0;
			didit = 1;
		}
		if ((t->op == SPL_OP_JUMP || t->op == SPL_OP_GOTO) && t->arg == 0) {
			if (l && (l->op == SPL_OP_IF || l->op == SPL_OP_UNLESS))
				l->op = SPL_OP_DROP;
			t->op = SPL_OP_NOP;
			t->size = t->arg = 0;
			didit = 1;
		}
	}

	return didit;
}

static int optimize_uncopy(struct insn_list *il)
{
	int didit = 0;

	for (struct insn *t=il->first; t; t = t->next)
	{
		if (spl_optype(t->op) != 'S') continue;
		struct insn *u = t->next;

		while (u && u->op == SPL_OP_COPY) u = u->next;
		if (!u || u == t->next) continue;

		int ot1 = spl_optype(u->op);
		int ot2 = u->next ? spl_optype(u->next->op) : 0;

		if (ot1 == 'C' || ot1 == '1' || ot1 == '2') {
			u->last->op  = t->op;
			u->last->arg = t->arg;
			didit = 1;
		}

		if (ot1 == '2' && u->last->last != t) {
			u->last->last->op  = t->op;
			u->last->last->arg = t->arg;
			didit = 1;
		}

		if (ot1 == 'S' && ot2 == '2') {
			u->last->op  = t->op;
			u->last->arg = t->arg;
			didit = 1;
		}
	}

	return didit;
}

void spl_optimizer(struct spl_asm *as)
{
	int orig_text_size = as->text_size;
	struct insn_list *il = asm_to_insn(as, 1);
	int schedule_redo = 0;

	optimize_remove_ops(il, SPL_OP_NOP);
	optimize_remove_ops(il, SPL_OP_CHECKP);
	optimize_debug(il);

redo_optimize_loop1:
	schedule_redo += optimize_uncopy(il);
	schedule_redo += optimize_pattern(il);
	schedule_redo += optimize_statics(il, as);
	schedule_redo += optimize_oozechainops(il);
	schedule_redo += optimize_unreachable(il);
	schedule_redo += optimize_datasect(il, as);
	schedule_redo += optimize_bogusblock(il);
	schedule_redo += optimize_complexnop(il);

	if ( !schedule_redo && spl_dump_translatable_strings ) {
		printf("/* Dummy C file for xgettext */\n");
		for (struct insn *t=il->first; t && t->next; t = t->next)
		{
			if (t->op != SPL_OP_PUSHAC || t->next->op != SPL_OP_DCALL || strcmp(as->data + t->next->arg, "_"))
				continue;

			printf("gettext(\"");

			for (int i=0; (as->data + t->arg)[i]; i++) {
				if ((as->data + t->arg)[i] < 32)
					printf("\\%03o", (as->data + t->arg)[i]);
				else
					putchar((as->data + t->arg)[i]);
			}

			printf("\");\n");
		}
	}

	recalc_addr(il);
	insn_to_asm(as, il);

	if ( schedule_redo )
	{
		schedule_redo = 0;
		orig_text_size = as->text_size;
		il = asm_to_insn(as, 0);
		goto redo_optimize_loop1;
	}

redo_optimize_loop2:
	il = asm_to_insn(as, 0);
	orig_text_size = as->text_size;
	recalc_size(il);
	recalc_addr(il);
	insn_to_asm(as, il);

	if ( orig_text_size > as->text_size )
		goto redo_optimize_loop2;
}

