/*******************************************************************
*  MINARET (for edge-triggered circuits)
*
*  BY
*
*  NARESH MAHESHWARI AND SACHIN S. SAPATNEKAR
*
*  Copyright 1998 Iowa State University Research Foundation, Inc.
*  All Rights Reserved
*
*  Source Code for retiming edge-triggered circuits
*
*  ASTRA:   Refer to paper in DAC 95 by Deokar & Sapatnekar
*                       and IEEE Tran on CAD 10/1998
*  Minaret: Refer to paper in DAC 97 by Maheshwari & Sapatnekar
*                       and IEEE Tran on VLSI 10/1998
*
*  Contact Address: sachin@ece.umn.edu
*
*        Availiable on as is basis, with no support
*******************************************************************/
/*******************************
FILE wd.c

functions for generating the period constraints
shars some structures with heaps.c
******************************/
#include "include.h"
#include <string.h>


/*******************************
EXTERNAL VARS  from heap.c simplex.c 
******************************/
typedef struct
{
  int size;
  INT_FLT *set;
} HEAPTYPE;

extern int Max_no_heaps;
extern int no_heaps; /* current no of heaps */
extern HEAPTYPE *heaps;

/******* variables from simplex.c ***/
extern S_ARC *S_Arcs;
extern S_NODE *S_nodes;
extern S_ARC *New_S_Arcs;

extern int Arc_Array_Size;
extern int No_S_arcs;
extern int Min_L,Max_U;

extern int dbwd;
extern int dbcon;
extern int temp_counter;

extern int Leak;

/*******************************
GLOBAL VARS
******************************/

/************* use a global a_row, b_row so don't
  have to alloac it every time (i.e., for every row)
a_row can be made local by passing it to add_cons()
The counters are  global since wd is done for one node at a time
*******/
int *a_row;
float *b_row;

int kept_max;
int tcount=0;
int Diff_w=0;
int Diff_d =0;
int Diff_t=0;
int max_wd =0;
int block_size;
int no_realloc =0;
/****************************************
  get_wd_row(int w, float clock, int Reduce_Mode)

  "w" is the source vertex/gate
  "clock" is the clock period for which constraints are generated 
  "Reduce_Mode" to decide if Minaret features are used ;

  generate a row of W D using Shenoy's (ICCAD 94) algorithm,
  also adds constraints directly to simplex graph rather than C-graph.

  uses nolatches to initilze heap
  store Wij -1 in constraint so DO NOT substract 1 when writing LP/or solving
  *****************************************/
void get_wd_row(int w, float clock, int Reduce_Mode)
{
  int k,u,v;
  int i,flag=0;
  int new_a;
  float  new_b;
  W *w1;
  INT_FLT max;
  int My_max_w;
  int Max_gen_w =0;


  /**** removing this memory leaks slows down code so an leaving it here **/ 
  if(Leak)w1 = (W *)MALLOC(1,sizeof(W));


  /*************************************
    ensure arc array has atleast "nogates" extra space so that there is no need
    to check for arc array size when adding each individual arc
    ****************************************/
  if(Arc_Array_Size - No_S_arcs  < nogates +10)
    {
      Arc_Array_Size += block_size*nogates + 10;
      /*printf(" ARC ALLOC reallocing to new size %d \n",Arc_Array_Size);*/
      no_realloc++;
      New_S_Arcs  = realloc(S_Arcs,Arc_Array_Size*sizeof(S_ARC));
      if(New_S_Arcs!= NULL)S_Arcs = New_S_Arcs;
      else { 
	printf("ERR OUT OF MEMORY in arcs for wd of gate %d \n",w);
	exit(-1);
      }
    }


  if(Reduce_Mode == REDUCE)My_max_w = nodes[w]->bounds[UPPER] - Min_L;
  else My_max_w = INF;

  /*** init a and b and reset heap*/
  reset_heaps();
  /******** useing faster code to do the following initilization
    for(i=0;i<maxid+1;i++){
    a_row[i]=INF; 
    b_row[i] = 0;
    }
    *******************/
  /**** IMPORTANT may not port across all machines **/
  /********* memset sets a byte so a-row beomces 1111H or 16843009 ****/
  a_row = memset(a_row,1,(maxid +1)*sizeof(int));
  b_row = memset(b_row,0,(maxid +1)*sizeof(float));

  a_row[w] = 0;
  b_row[w] = nodes[w]->maxdel;
  heap_insert(0,b_row[w],w);
  k = 0;

#ifdef CHECK
  if(Reduce_Mode == REDUCE)if(nodes[w]->IsFixed){
    printf(" ERR fixed source %d given in get_wd_row \n",w);
    exit(-1);
  }
#endif
  kept_max =0; /** STAT **/

  while(1)
    {
      /** inlining it below  k = get_min_index();if(k < 0)break;*/
      if(heaps[k].size == 0)
	{
	  while((heaps[k].size == 0)&&(k < no_heaps ))k++;

	  if(k == no_heaps)break;
#ifdef MY_METHOD
	  if(k > My_max_w)break;
#else
	  if(Reduce_Mode == REDUCE)if(k > My_max_w)break;
#endif /**!!!!!!!! here is the exit *********/
	}
	
				
#ifdef STATS
      if(k > Max_gen_w)Max_gen_w = k;  /** STAT **/
      Diff_t++;
#endif
      /*** all heaps empty so done with while loop */
      max = get_max(k);
      u = max.id;

#ifdef DEBUG
      if(dbwd)printf("for w %d k %d id %d val %.2f u %d W %d D %f\n",w,k,max.id,max.value,u,a_row[u],b_row[u]);
#endif

      if( (k > a_row[u]) || ( (a_row[u] == k)&&( max.value < b_row[u])))
	{ /*fprintf(fperr,"WARNING Neglecting [%d]%s node has A %d  B %.2f but heap %d gave B %.2f\n",u,nodes[u]->name,a_row[u],b_row[u],k,max.value);*/
   
#ifdef STATS
	  if(k > a_row[u])Diff_w++;
	  else Diff_d++;
#endif
	  continue;/*** IMPORTANT !!!!!!!!*********/
			
	}

      /*************** 
	if already one latch garunteed then there is no need to process fanouts of a gate;
	one latch is not gauranteed if "u" is a constant node and D(w,u) < c
	(in this case only >= 0 are assured not >= 1) 
	********/
#ifdef MY_METHOD				   
      if(a_row[u] - nodes[w]->bounds[UPPER] + nodes[u]->bounds[LOWER] >= 1)continue;
#else
      if(Reduce_Mode == REDUCE)
	if(a_row[u] - nodes[w]->bounds[UPPER] + nodes[u]->bounds[LOWER] >= 1)continue;

#endif

      /*******************************
	!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
	IMPORTANT else implements Shenoy's algo
	since if a period edge is added fanouts are not processed 

	just to improve the efficiency: do not go around a  loop;
	Flag implements the loop by preventing "w" to come again as "u" after first time.
	To enable constraints from host to be generated execute the "elseif" for HOST 
	only the first time. Flag is 0 when we start and since the very first "u" will be
	"w" itself flag will now be set to 1.
	**********************************/

      if(b_row[u] > clock)  
	{
	  /*****************  add constarint from w to u NO need to check if one latch gaurnteed above ***/
	  S_Arcs[No_S_arcs].from = nodes[w]->phase ;
	  S_Arcs[No_S_arcs].to = nodes[u]->phase ;
	  S_Arcs[No_S_arcs].cost = (a_row[u] -1);
	  No_S_arcs++;
		
	}
      else if(((u != HOST)&&(u!=w))|| (!flag))
	{
	  if(u == w){
	    flag=1;
	  }

	  for(w1=nodes[u]->fout;w1 != NULL;w1= w1->next)
	    {
	      v = w1->el;
	      /*** why insert a constant node in heap at all? because if D(ij) to constant node < period, I need to process fanouts see test1.bench  ***/
	      /***** no need to put nodes in heap which will be ignored when they come out
		however this causes the no of constraints to change don't know why****/
				
	      new_a = a_row[u] + w1->wt;
	      new_b = b_row[u] + nodes[v]->maxdel;
#ifdef MY_METHOD				   
	      if(new_a - nodes[w]->bounds[UPPER] + nodes[v]->bounds[LOWER] >= 1){continue;}			      
#else 
	      if(Reduce_Mode == REDUCE)if(new_a - nodes[w]->bounds[UPPER] + nodes[v]->bounds[LOWER] >= 1){continue;}			      
 
#endif 
			  
#ifdef DEBUG
	      if(dbwd)printf("u [%d]%s V [%d]%s new A %d B %.2f, old A %d B %.2f\n",u,nodes[u]->name,v,nodes[v]->name,new_a,new_b,a_row[v],b_row[v]);
#endif
	      if( (a_row[v] > new_a) || ( (a_row[v] == new_a)&&(b_row[v] < new_b)))
		{
		  heap_insert(new_a,new_b,v);
		  a_row[v] = new_a;
		  b_row[v] = new_b;
		}
				 
	    }
	}

    } /*** while(1)*/

#ifdef DEBUG
  if(0)if(w == 161){
    fprintf(fperr," \n node [%d]%s\n",w,nodes[w]->name);
    for(i=0;i<nonodes+1;i++)/*if((nodes[i]->type > 2)&& (a_row[i] < nolatches))*/fprintf(fperr," [%d]%s W %d D %.2f  \n ",i,nodes[i]->name,a_row[i],b_row[i]);
  }
#endif

}

/*********************
generates all the period constarints

IMPORTANT uses nolatches and nogates to initilize heap
and nonodes to initilize a_row and b_row
**********************************/
void  generate_period_cons(float clk,int Red_Mode,int Max_W)
{
  int i,n;
  NODETYPE *np;
	
  /**** for reduced mode host is  0 *****/
  if(Red_Mode == REDUCE){
    nodes[HOST]->bounds[LOWER] = 0;
    nodes[HOST]->bounds[UPPER] = 0;
  }

  if(Red_Mode == SHENOY)block_size = 1;
  else block_size = 1;

  /*** need maxid (and not nonodes) as gate id can be > nogates.
    Since get_wd_row resets the arrays anyway, use malloc (not calloc)*********/
  a_row = (int *)MALLOC((maxid+1),sizeof(int));
  b_row = (float *)MALLOC((maxid+1),sizeof(float));

  /*************************
    need  nolatches +1 heaps (one for W = 0)
    initilize heaps only once, just reset them evrey time;
    to avoid multiple memory allocations */
	
  init_heaps(Max_W,nonodes+5);
	
  for(i=0;i<nogates;i++)
    { 
      if(Red_Mode == REDUCE) { 
	n = gates[i];
	np = nodes[n];
	/*if(np->IsFixed == 0)*/get_wd_row(n,clk,Red_Mode);
      }
      else get_wd_row(gates[i],clk,Red_Mode);
    }
  free(a_row);
  free(b_row);
  free_heaps();

#ifdef STATS	
  printf("!!!Diff W %d D %d Total %d Frac %.2f\n",Diff_w,Diff_d,Diff_t,(float)(Diff_w + Diff_d)/Diff_t);
  fprintf(fpres,"Diff W %d D %d Total %d Frac %.2f\n",Diff_w,Diff_d,Diff_t,(float)(Diff_w + Diff_d)/Diff_t);
#endif

  return;
}

/**************** EOF ****************/
