//MinnSSTA Release 1.1
//(c) Copyright Hongliang Chang, Qunzeng Liu, Sachin. S. Sapatnekar

/////////////////////////////////////////////////////////////////////// 
// Timing.cpp: implementation of the CTiming class. 
// 
//////////////////////////////////////////////////////////////////////
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <fstream>
#include <string>
#include <math.h>
#include <time.h>

#ifdef WIN32
#include "../include/stdafx.h"
#endif
using namespace std;

#include "../include/define.h"

#include "../include/mecdf.h"
#include "../include/inddatacoef.h"
#include "../include/delaypc.h"
#include "../include/loadcap.h"
#include "../include/timingdata.h"
#include "../include/ckt.h"
#include "../include/lib.h"
#include "../include/gridpara.h"
#include "../include/grid.h"
#include "../include/corrmodel.h"
#include "../include/tree.h"
#include "../include/timing.h"
#include "../include/monte.h"
#include "../include/pca.h"
#include "../include/tree.h"
#include "../include/treelist.h"


class CCell; 
class CGrid;

extern void Delay(string name,float w,float l,float* toxp,float* toxn,float* nap,float* nan,float cload,
		  CTimingData inputSlope,int pinnum,float size,int option,
		  CTimingDataCoef& delay, CTimingDataCoef& outslp);
extern float GetGateInCap(float w,float l,float toxp,float toxn,string celltype,float size);

//extern float GetNInCap(float wn,float l,float tox);
//extern float GetPInCap(float wp,float l,float tox);

extern float CaldTrdVtp(float tox,float Na,float betap,float cload);
extern float CaldTfdVtn(float tox,float Na,float betap,float cload);
extern float CaldVtdTox(float Na);
extern float CaldVtdNa(float tox,float Na);

extern float CalCoefVtpTox(float tr, float tox);
extern float CalCoefVtnTox(float tf, float tox);

extern float Normal2(float,float,float);

extern void readPdfN01();
extern void readCdfN01();
extern float pdfnorm1v(float a, float mu, float sigma);
extern float norm1v(float a, float mu, float sigma);
extern float discnorm1v(float a, float b, float mu, float sigma);
extern doublereal norm2v(doublereal *d, doublereal *rho, integer *ndim );
extern doublereal discnorm2v(doublereal *d1,doublereal *d2,doublereal *rho);

extern float meansumxy(float x,float y,float sx,float sy,float r);
extern float sigmsumxy(float x,float y,float sx,float sy,float r);
extern float  meanxy(float x,float y,float sx,float sy,float r);
extern float sigmaxy(float x,float y,float varx,float vary,float r);
extern float meanxdivy(float mu1,float mu2,float s1,float s2,float r);
extern float sigmaxdivy(float mu1,float mu2,float s1,float s2,float r);
extern float covxy(float w1,float w2,float covw, float l1,float l2, float covl);


extern ofstream bugfile;
extern float RHO1;

extern string namelist[NUMCELL];
extern map<string,int> namelistmap;
extern struct sCoefWpn coefWpn[NUMCELL];
extern int gateSize[NUMCELL];

extern int ParaType[TOTNUMPARA];  //parameter type: 1:nominal 2:+3sig  3:-3sig

extern float cdf[LOOP];

float distrN01[PDISCSIZE];

vector<DISTRIB> PinCapDistr;    //for PO pin capacitance only


float mutr=0,sigtr=0,muslope=0,sigslope=0, corr=0;


float muPath1=0,muPath2=0,sigPath1=0,sigPath2=0,covP12=0;

float ParaMean[NUMPARA]={GATELEFF, GATEWIDn, MTLWID_1, MTLTHK_1, MTLHOX_1,MTLWID_2, MTLTHK_2, MTLHOX_2};
float ParaVar[NUMPARA]={VARL, VARW, VARMTLWID, VARMTLTHK,VARMTLHOX,VARMTLWID, VARMTLTHK,VARMTLHOX}; 

float IndParaMean[INDNUMPARA]={TOX, NaNmos};
float IndParaVar[INDNUMPARA]={VARTOX, VARNA};

/*
   float ParaMean[NUMPARA]={GATELEFF, GATEWIDn, TOX, NaNmos, MTLWID_1, MTLTHK_1, MTLHOX_1,MTLWID_2, MTLTHK_2, MTLHOX_2};
   float ParaVar[NUMPARA]={VARL, VARW, VARTOX, VARNA, VARMTLWID, VARMTLTHK,VARMTLHOX,VARMTLWID, VARMTLTHK,VARMTLHOX}; 
 */

float MtlSpace[NUMLAYER]={MTLSPACE_1, MTLSPACE_2};

float PercInter[NUMPARA]={PERCL,PERCW,PERCMTLWID_1,PERCMTLTHK_1,PERCMTLHOX_1,
				PERCMTLWID_2,PERCMTLTHK_2,PERCMTLHOX_2};
float IndPercInter[INDNUMPARA]={PERCTOX,PERCNAN};

float abs(float num)
{
    return num>=0? num:-num;
}

bool compDisPoint(const DISTRIB& point1, const DISTRIB& point2)
{
    return point1.point>point2.point;
}

double dumpTime(char* str)
{
    struct tm *ptr;
    time_t lt;

    lt = time(NULL);
    ptr = localtime(&lt);
    bugfile << endl << str << endl;
    bugfile << " time: " <<
	asctime(ptr)<<endl<<endl;

    return lt;
}

//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////

CTiming::CTiming(CCkt *ckt, CTreeList* treelist, CCorrModel* corrModel)
{
    m_pCkt=ckt;
    m_pTreeList=treelist;
    m_pCorrModel=corrModel;

    /*    m_iNumLevel=numlevel;
	  m_iGridSizeX=gx;
	  m_iGridSizeY=gy;
	  m_Grid=new CGrid[gx*gy];*/
};

CTiming::~CTiming()
{

}



//////////////////////////////////////////////////////////////////////
// Monte Carlo simulation
//////////////////////////////////////////////////////////////////////
void CTiming::RunMonte(int loop, int type/*=0*/)
{
    int i; 

    ofstream mtfile;
    mtfile.open("monte.rst",ios::out);

    //start time 0
    dumpTime("init monte starts");

    Init();

/*    float minD,maxD;
    int dsize;
    cout<<"Enter minD: ";
    scanf("%f",&minD);
    cout<<"Enter maxD: ";
    scanf("%f",&maxD);
    cout<<"Discrete size: ";
    scanf("%i",&dsize);
    float step=(maxD-minD)/(float)dsize;
*/
    dumpTime("monte before the decomposition matrix");  
//    InitMtDistr(minD,maxD,dsize,step);

    m_MonteMu=m_MonteSig=0;
    CMonte sample;       
    if (!type){                
	sample.Init(m_pCorrModel->m_iGridSizeX,m_pCorrModel->m_iGridSizeY,m_pCorrModel,NUMPARA);
	if (!sample.CalDecomp())                        
	    exit(1);        
    }

    //start time
    dumpTime("monte simulation starts");
    clock_t start=clock();

    for (i=0;i<loop;i++){
	if (!type){           		
	    sample.SamplePara();
	    MtInstIndPara();
	    MonteUdtPinCap(sample,type);		
	    m_pTreeList->CalRC(sample.m_pMatrix,1);
	    m_pTreeList->CalDelay();
	    //	    MonteUdtInstDelay(sample,type);
	}else{            
	    IidMtInstPara(sample);		
	    MonteUdtPinCap(sample,type);		
	    m_pTreeList->CalRC(NULL,2);
	    m_pTreeList->CalDelay();
	    //	    MonteUdtInstDelay(sample,type);
	}

	Pert(-1,type,&sample);

//	UdtMtDistr(minD,maxD,dsize,step);

	mtfile<<m_fMaxDelay<<endl;
	//mean
	m_MonteMu+=m_fMaxDelay;
	//variation
	m_MonteSig+=m_fMaxDelay*m_fMaxDelay;

	if (i%((int)(loop*0.1))==0)
	    cout<<i<<", ";


	ClearPara();

    }
    cout<<endl;

#ifdef _DEBUG  //::
    GridSigMu(loop);
#endif

    m_MonteMu=m_MonteMu/loop;
    m_MonteSig=sqrt(m_MonteSig/loop-m_MonteMu*m_MonteMu);

//    PrtMonteDistr(loop,dsize,minD,maxD);

    bugfile<<"Monte Carlo result:\n";

    bugfile<<"monte mu:"<<m_MonteMu<<" monte sigma:"<<m_MonteSig<<endl;


    //end time
    dumpTime("stat timing analysis ends");
    clock_t finish = clock();
    double duration = (double)(finish - start) / CLOCKS_PER_SEC;    
    bugfile<<"Monte time elapsed: "<<duration<<" seconds\n";

    mtfile.close();

}

void CTiming::InitMtDistr(float& minD,float& maxD,int& dsize, float& step)
{
    int i;
    m_MonteDistr.resize(dsize);
    for (i=0;i<dsize;i++){
	m_MonteDistr[i].prob=0;
	m_MonteDistr[i].intva=minD+i*step;
	m_MonteDistr[i].intvb=m_MonteDistr[i].intva+step;
	m_MonteDistr[i].point=(m_MonteDistr[i].intva+m_MonteDistr[i].intvb)/2;
    }
}

void CTiming::UdtMtDistr(float& minD,float& maxD,int& dsize,float& step)
{
    int slot=(int)((m_fMaxDelay-minD)/step);
    if (slot<0) slot=0;
    else if (slot>=dsize) slot=dsize-1;
    m_MonteDistr[slot].prob++;
}

void CTiming::PrtMonteDistr(int& loop, int& dsize,float& minD,float& maxD)
{
    int i;
    float sumCdf=0;
    bugfile<<"\n========Pdf & Cdf:\n";
    bugfile<<"mtslot=[\n";
    for (i=0;i<dsize;i++){
	//bugfile<<m_MonteDistr[i].point<<" ";
	bugfile<<m_MonteDistr[i].intva<<" ";
	bugfile<<m_MonteDistr[i].intvb<<" ";
	m_MonteDistr[i].prob=m_MonteDistr[i].prob/loop;
	bugfile<<m_MonteDistr[i].prob<<" ";
	sumCdf+=m_MonteDistr[i].prob;
	bugfile<<sumCdf<<endl;
    }
    bugfile<<"];\n\n";

    bugfile<<"min="<<minD<<";"<<endl;
    bugfile<<"max="<<maxD<<";"<<endl;
    bugfile<<"dsize="<<dsize<<";"<<endl;

    bugfile<<"compslot(mtslot,mu,sigma,min,max,dsize);\n\n";
}

void CTiming::TestSlope(string instname,string portname,CTimingData& instdelay,CTimingData& inslope)
{
    if (instname!="G8" || portname!="in1")
	return;


    mutr+=instdelay.Tr;  sigtr+=instdelay.Tr*instdelay.Tr;
    muslope+=inslope.Tf;  sigslope+=inslope.Tf*inslope.Tf;
    corr+=instdelay.Tr*inslope.Tf;
}


//test if the correlation holds by monte sampling
void CTiming::TestMonte(int loop)
{	   
    Init();
    int grid1,grid2;

    printf("grid to test..\n");
    printf("grid1: ");
    scanf("%i",&grid1);
    printf("grid2: ");
    scanf("%i",&grid2);

    float w1,w2;
    float muw1,muw2,sw1,sw2;
    float corrw;
    muw1=muw2=sw1=sw2=corrw=0;
    float l1,l2;
    float mul1,mul2,sl1,sl2;
    float corrl;
    mul1=mul2=sl1=sl2=corrl=0;
    CMonte sample;
    sample.Init(m_pCorrModel->m_iGridSizeX,m_pCorrModel->m_iGridSizeY,m_pCorrModel,NUMPARA);
    if (!sample.CalDecomp()){
	exit(1);
    }
    int i;
    for (i=0;i<loop;i++){
	sample.SamplePara();
	w1=sample.m_pMatrix[0][grid1];
	w2=sample.m_pMatrix[0][grid2];

	muw1+=w1;	sw1+=w1*w1;
	muw2+=w2;	sw2+=w2*w2;
	corrw+=w1*w2;

	l1=sample.m_pMatrix[1][grid1];
	l2=sample.m_pMatrix[1][grid2];

	mul1+=l1;	sl1+=l1*l1;
	mul2+=l2;	sl2+=l2*l2;
	corrl+=l1*l2;
    }

    muw1=muw1/loop;
    muw2=muw2/loop;
    sw1=sqrt(sw1/loop-muw1*muw1);
    sw2=sqrt(sw2/loop-muw2*muw2);
    corrw=(corrw/loop-muw1*muw2)/(sw1*sw2);
    cout<<"W:\n";
    cout<<muw1<<" "<<sw1<<endl;
    cout<<muw2<<" "<<sw2<<endl;
    cout<<corrw<<endl;

    mul1=mul1/loop;
    mul2=mul2/loop;
    sl1=sqrt(sl1/loop-mul1*mul1);
    sl2=sqrt(sl2/loop-mul2*mul2);
    corrl=(corrl/loop-mul1*mul2)/(sl1*sl2);
    cout<<"L:\n";
    cout<<mul1<<" "<<sl1<<endl;
    cout<<mul2<<" "<<sl2<<endl;
    cout<<corrl<<endl;


}

void CTiming::GridSigMu(int loop)
{
    list<CInst*>::iterator institer;
    for (institer=m_lInstSeq.begin();institer!=m_lInstSeq.end();++institer)
    {
	CInst *inst=(*institer);
	bugfile<<"gate:---------";
	bugfile<<inst->m_iLevel<<" "<<inst->m_InstName<<endl;

	map<string,CPort*>::iterator ptiter;        
	for (ptiter=inst->m_PortHash.begin();ptiter!=inst->m_PortHash.end();++ptiter)
	{
	    CPort *port=(*ptiter).second;
	    bugfile<<"  port:"<<port->m_PortName<<endl;
	    inst->m_MtGmu[port->m_PortName] = inst->m_MtGmu[port->m_PortName]/loop;
	    inst->m_MtGsigma[port->m_PortName] = sqrt(inst->m_MtGsigma[port->m_PortName]/loop - inst->m_MtGmu[port->m_PortName]*inst->m_MtGmu[port->m_PortName]);
	    bugfile<<" mu:"<<inst->m_MtGmu[port->m_PortName]<<" sig:"<<inst->m_MtGsigma[port->m_PortName]<<endl;
	    //bugfile<<" "<<inst->m_fMtGsigma/inst->m_fMtGmu<<endl;

	    inst->m_fInSlopeTrMu[port->m_PortName]=inst->m_fInSlopeTrMu[port->m_PortName]/loop;  
	    inst->m_fInSlopeTrSigma[port->m_PortName] = sqrt(inst->m_fInSlopeTrSigma[port->m_PortName]/loop - inst->m_fInSlopeTrMu[port->m_PortName]*inst->m_fInSlopeTrMu[port->m_PortName]);
	    inst->m_fInSlopeTfMu[port->m_PortName]=inst->m_fInSlopeTfMu[port->m_PortName]/loop;  
	    inst->m_fInSlopeTfSigma[port->m_PortName] = sqrt(inst->m_fInSlopeTfSigma[port->m_PortName]/loop - inst->m_fInSlopeTfMu[port->m_PortName]*inst->m_fInSlopeTfMu[port->m_PortName]);
	    bugfile<<" input slope:"<<endl;
	    bugfile<<" Tr:"<<" mu: "<<inst->m_fInSlopeTrMu[port->m_PortName]<<" sigma: "<<inst->m_fInSlopeTrSigma[port->m_PortName];
	    bugfile<<" Tf:"<<" mu: "<<inst->m_fInSlopeTfMu[port->m_PortName]<<" sigma: "<<inst->m_fInSlopeTfSigma[port->m_PortName]<<endl;

	}

	inst->m_fMtPmu = inst->m_fMtPmu/loop;
	inst->m_fMtPsigma = sqrt(inst->m_fMtPsigma/loop - inst->m_fMtPmu*inst->m_fMtPmu);
	inst->m_fMtPMaxmu = inst->m_fMtPMaxmu/loop;
	inst->m_fMtPMaxsigma = sqrt(inst->m_fMtPMaxsigma/loop - inst->m_fMtPMaxmu*inst->m_fMtPMaxmu);		
	inst->m_fGSlopeTrMu=inst->m_fGSlopeTrMu/loop;
	inst->m_fGSlopeTrSigma = sqrt(inst->m_fGSlopeTrSigma/loop - inst->m_fGSlopeTrMu*inst->m_fGSlopeTrMu);		
	inst->m_fGSlopeTfMu=inst->m_fGSlopeTfMu/loop;
	inst->m_fGSlopeTfSigma = sqrt(inst->m_fGSlopeTfSigma/loop - inst->m_fGSlopeTfMu*inst->m_fGSlopeTfMu);		


	bugfile<<"path:\n";
	bugfile<<" mu:"<<inst->m_fMtPmu<<" sig:"<<inst->m_fMtPsigma<<endl;
	//bugfile<<" "<<inst->m_fMtPsigma/inst->m_fMtPmu<<endl;
	bugfile<<"path max:\n";
	bugfile<<" mu:"<<inst->m_fMtPMaxmu<<" sig:"<<inst->m_fMtPMaxsigma<<endl;
	bugfile<<"slope max:\n";
	bugfile<<" Tr--mu:"<<inst->m_fGSlopeTrMu<<" sig:"<<inst->m_fGSlopeTrSigma;
	bugfile<<" Tf--mu:"<<inst->m_fGSlopeTfMu<<" sig:"<<inst->m_fGSlopeTfSigma<<endl;	
    }
#ifdef _DEBUG0
    mutr=mutr/loop;   sigtr=sqrt(sigtr/loop-mutr*mutr);
    muslope=muslope/loop;  sigslope=sqrt(sigslope/loop-muslope*muslope);
    corr=(corr/loop-mutr*muslope)/(sigtr*sigslope);
    bugfile<<"\n gate delay and its in slope relation:\n";
    bugfile<<"muTr: "<<mutr<<" sigTr: "<<sigtr<<endl;
    bugfile<<"muslope: "<<muslope<<" sigslope: "<<sigslope<<endl;
    bugfile<<"corr: "<<corr<<endl;
#endif
	muPath1/=loop; muPath2/=loop; 
	sigPath1=sqrt(sigPath1/loop-muPath1*muPath1);
	sigPath2=sqrt(sigPath2/loop-muPath2*muPath2);
	covP12=(covP12/loop-muPath1*muPath2)/(sigPath1*sigPath2);
	bugfile<<"G9"<<endl;
	bugfile<<muPath1<<" "<<sigPath1<<endl;
	bugfile<<muPath2<<" "<<sigPath2<<endl;
	bugfile<<covP12<<endl;
}

void CTiming::ClearPara()
{
    //insts
    map<string,CInst*>::iterator institer=m_pCkt->m_InstHash.begin();
    for (;institer!=m_pCkt->m_InstHash.end();++institer)
    {
	CInst *inst=(*institer).second;
	inst->m_iLastInstType=-1;
	inst->m_iNextInstType=-1;

	//ports
	map<string,CPort*>::iterator ptiter;        
	for (ptiter=inst->m_PortHash.begin();ptiter!=inst->m_PortHash.end();++ptiter)
	{
	    CPort *port=(*ptiter).second;
	    port->m_dLoad=0;
	    port->m_dPinCap=0;
	    port->m_bTruePath=true;
	    port->m_fPartPathDelay=-1;
	    port->m_pLastPartPath=NULL;
	}
    }

    //ckt
    m_pCkt->m_fMaxDelay=-1;

    //ctiming
    m_fMaxDelay=-1;


}

void CTiming::MonteUdtPinCap(CMonte& sample,int type)
{
    map<string,CInst*>::iterator institer;
    map<string,CPort*>::iterator ptiter;   

    institer=m_pCkt->m_InstHash.begin();
    for (;institer!=m_pCkt->m_InstHash.end();++institer){
	CInst *drvinst=(*institer).second;
	map<string,CPort*>& porthash=drvinst->GetPortHash(); 


	float w,l;
	int index=(int)drvinst->m_Grid.index;
	if (!type){
	    w=sample.m_pMatrix[ENUMWGATE][index];
	    l=sample.m_pMatrix[ENUMLGATE][index];
	}else{
	    w=drvinst->m_MtPara[ENUMWGATE];
	    l=drvinst->m_MtPara[ENUMLGATE];
	}
	float *toxp,*toxn;
	toxp=drvinst->m_MtIndParap[ENUMTOXGATE];
	toxn=drvinst->m_MtIndParan[ENUMTOXGATE];

	string celltype=drvinst->m_CellType;
	CCell* cell=m_pCkt->m_plib->FindCell(celltype);
	for (ptiter=porthash.begin();ptiter!=porthash.end();++ptiter)
	{           
	    CPort* port=(*ptiter).second;
	    int pinnum=cell->FindPortNum(port->m_PortName);
	    if(port->m_Direction=="input"){
		port->SetPinCap(GetGateInCap(w,l,toxp[pinnum-1],toxn[pinnum-1],celltype,drvinst->m_fGateSize));
		//port->SetPinCap(GetNInCap(w,l,tox)*2);  //assume wn=wp=w
	    }
	}
    }

    //set pin capacitance for PO pins
    //    CMonte samplePO;
    //    float mu[1],sig[1],cap[1];
    //    mu[0]=POPINCAP;
    //    sig[0]=SIGPOPINCAP;
    for (ptiter=m_pCkt->m_POHash.begin();ptiter!=m_pCkt->m_POHash.end();++ptiter)
    {
	CPort *port=(*ptiter).second;
	//	samplePO.Sample0(cap,sig,mu,1);
	//	port->SetPinCap(cap[0]);
	port->SetPinCap(POPINCAP);
    }

}

void CTiming::MonteUdtInstDelay(CInst* drvinst,CMonte* sample,int type)
{
    //update (inport,outport) delay for each inst
    //    list<CInst*>::iterator institer;   
    //    for (institer=m_lInstSeq.begin();institer!=m_lInstSeq.end();++institer)
    //    {        
    //	CInst* drvinst=*institer;

    /*map<string,CInst*>::iterator institer;
      institer=m_pCkt->m_InstHash.begin();
      for (;institer!=m_pCkt->m_InstHash.end();++institer){
      CInst *drvinst=(*institer).second;*/

    map<string,CPort*>& porthash=drvinst->GetPortHash(); 

    //find net(drvnet) driven by the instance output port(drvport)
    CNet* drvnet;        
    CPort* drvport;

    map<string,CPort*>::iterator ptiter;          
    for (ptiter=porthash.begin();ptiter!=porthash.end();++ptiter)
    {           
	CPort* port=(*ptiter).second;
	if(port->m_Direction=="output")
	{//the net that the instance drives            
	    drvnet=port->m_Conn->m_ConnNet;
	    drvport=port; 
	    break;
	}
    }

    //drvport's load capacitance=(wire cap + sink cap) -- already calculated in tree
    /* this part considers only sink(receiver)'s load
       drvport->m_dLoad=0;
       list<CConn*>& connlist=drvnet->GetNetConnList();
       list<CConn*>::iterator conniter;               
       for (conniter=connlist.begin();conniter!=connlist.end();++conniter)
       {
       CConn* conn=*conniter;
       if(conn->m_ConnPort->m_Direction=="input" ||
       m_pCkt->FindPO(conn->m_ConnPort->m_PortName))                   
       {//the receiver of the net
    //get the input port load of the receivers
    drvport->m_dLoad+=conn->m_ConnPort->GetPinCap();                            
    }
    }*/
    float cload=drvport->m_dLoad;

    //calculate the instance delay, for all pairs:
    //from pinnum-input_port to output_port
    string celltype=drvinst->m_CellType;	
    CCell* cell=m_pCkt->m_plib->FindCell(celltype);
    float w,l;
    int totpin=drvinst->GetInPinNum();

    int index=(int)drvinst->m_Grid.index;
    if (!type){
	w=sample->m_pMatrix[ENUMWGATE][index];
	l=sample->m_pMatrix[ENUMLGATE][index];
    }else{
	w=drvinst->m_MtPara[ENUMWGATE];
	l=drvinst->m_MtPara[ENUMLGATE];
    }
    float *toxp,*toxn,*nap,*nan;
    toxp=drvinst->m_MtIndParap[ENUMTOXGATE];
    toxn=drvinst->m_MtIndParan[ENUMTOXGATE];
    nap=drvinst->m_MtIndParap[ENUMNAGATE];
    nan=drvinst->m_MtIndParan[ENUMNAGATE];


    //CTimingData maxOutputSlope;
    for (ptiter=porthash.begin();ptiter!=porthash.end();++ptiter)
    {           
	CPort* port=(*ptiter).second;
	if(port->m_Direction=="input")
	{//input port of the instance                            							
	    int pinnum=cell->FindPortNum(port->m_PortName);
	    //CTimingData inputSlope=port->GetSlope();
	    CTimingDataCoef instDelay,outSlope;
	    CTimingData inputSlope;		            
	    CTimingData& lastInstslope=port->m_Conn->m_ConnNet->m_pDrvPort->GetOutSlope();
	    inputSlope=lastInstslope;
	    CNet* lastNet=port->m_Conn->m_ConnNet;  
	    float lastNetDelay=lastNet->GetDelay(drvinst->m_InstName+"-"+port->m_PortName);
	    inputSlope.Tf=inputSlope.Tr=0;
	    //## 		inputSlope.Tf+=lastNetDelay;   inputSlope.Tr+=lastNetDelay;

	    Delay(celltype,w,l,toxp,toxn,nap,nan,cload,inputSlope,pinnum,drvinst->m_fGateSize,0,instDelay,outSlope);
	    drvinst->SetDelay(port->m_PortName,&instDelay); 
	    port->SetOutSlope(outSlope);
#ifdef _DEBUG0
	    TestSlope(drvinst->m_InstName,port->m_PortName,instDelay,inputSlope);
#endif		

#ifdef _DEBUG //::	    
	    drvinst->MonteGateSigmu(instDelay.Delay,port->m_PortName); 	
	    drvinst->MonteInSlopeSigmu(inputSlope,port->m_PortName); 	
#endif

	    /*		if (maxOutputSlope.Tr<instDelay.Tr)			 
			maxOutputSlope.Tr=instDelay.Tr;	
			if (maxOutputSlope.Tf<instDelay.Tf)			 
			maxOutputSlope.Tf=instDelay.Tf;  */

	}
    } 
    //	drvport->SetSlope(maxOutputSlope);
#ifdef _DEBUG //::
    //	drvinst->MonteGSlopeSigmu(maxOutputSlope);
#endif	   
    //    }

}

void CTiming::IidMtInstPara(CMonte& sample)
{	int i;
    map<string,CInst*>::iterator institer;
    institer=m_pCkt->m_InstHash.begin();
    for (;institer!=m_pCkt->m_InstHash.end();++institer){            
	CInst *drvinst=(*institer).second;
	int index=drvinst->m_Grid.index;
	float mu[GATENUMPARA],sig[GATENUMPARA];
	for (i=0;i<GATENUMPARA;i++){
	    mu[i]=m_pCorrModel->m_pGrid[index].m_Para[i].m_fMean;
	    sig[i]=m_pCorrModel->m_pGrid[index].m_Para[i].m_fSigma;			
	}
	sample.Sample0(drvinst->m_MtPara,sig,mu,GATENUMPARA);		
    }
    MtInstIndPara();
}

void CTiming::MtInstIndPara()
{	
    CMonte sample;
    int i,j;
    float intermu[INDNUMPARA],intersig[INDNUMPARA],interPara[INDNUMPARA];
    for (i=0;i<INDNUMPARA;i++){
	intermu[i]=0;
	intersig[i]=m_pCorrModel->m_fInterIndPSig[i];
    }
    sample.Sample0(interPara,intersig,intermu,INDNUMPARA);

    float mu[INDNUMPARA],sig[INDNUMPARA];
    for (i=0;i<INDNUMPARA;i++){
	mu[i]=m_pCorrModel->m_fIndPMean[i];
	sig[i]=m_pCorrModel->m_fIntraIndPSig[i];
    }
    map<string,CInst*>::iterator institer;	
    for (institer=m_pCkt->m_InstHash.begin();institer!=m_pCkt->m_InstHash.end();++institer){            
	CInst *drvinst=(*institer).second;
	int totpin=drvinst->GetInPinNum();	       
	for (i=0;i<INDGATENUMPARA;i++){
	    for (j=0;j<totpin;j++){
		sample.Sample0(&drvinst->m_MtIndParap[i][j],&sig[i],&mu[i],1);
		sample.Sample0(&drvinst->m_MtIndParan[i][j],&sig[i],&mu[i],1);
		drvinst->m_MtIndParap[i][j]+=interPara[i];
		drvinst->m_MtIndParan[i][j]+=interPara[i];
	    }
	} 	
    }
}


//////////////////////////////////////////////////////////////////////
//Traditional timing analysis
//////////////////////////////////////////////////////////////////////

//conventional pert-like method
//0: nominal delay, 1: worst case delay, 2: best case delay
void CTiming::RunConvTiming()
{
    //levelize ckt, read cell locations etc.    
    Init();

    int i,j;
    for (i=0;i<TOTNUMPARA;i++)
	ParaType[i]=1;
    ConvTiming(-1);
    m_fNomDelay=m_fMaxDelay;
    ClearPara();
    bugfile<<"nominal delay: "<<m_fNomDelay<<endl;
    float max,min;
    max=min=m_fNomDelay;

    // delay calculation by exaustive parameter combinations
    float mean=0, sigma=0;
    /*CombBits counter;
      for (i=0;i<32;i++){
      counter.sth=i;
      ParaType[0]=(unsigned int)counter.bits.bit0+2;	
      ParaType[1]=(unsigned int)counter.bits.bit1+2;	
      ParaType[2]=(unsigned int)counter.bits.bit2+2;	
      ParaType[3]=(unsigned int)counter.bits.bit3+2;	
      ParaType[4]=(unsigned int)counter.bits.bit4+2;	*/
    
    dumpTime("conventional starts");
    clock_t start=clock();


    int totcomb=(int)pow(2,TOTNUMPARA);
    //int totcomb=1;
    for (i=0;i<totcomb;i++){
	for (j=0;j<TOTNUMPARA;j++)
	    ParaType[j]=(bool)(i & int(pow(2,j)))+2;
	ConvTiming(-1);
	bugfile<<m_fMaxDelay<<endl;
	mean+=m_fMaxDelay;
	//sigma+=m_fMaxDelay*m_fMaxDelay;
	if (max<m_fMaxDelay) max=m_fMaxDelay;
	if (min>m_fMaxDelay) min=m_fMaxDelay;
	ClearPara();
    }
    mean=mean/totcomb;
    //sigma=sqrt(sigma/totcomb-mean*mean);
    sigma=(max-mean)/3.0;

    dumpTime(" timing analysis ends");
    clock_t finish = clock();
    double duration = (double)(finish - start) / CLOCKS_PER_SEC;
    bugfile<<"time elapsed: "<<duration<<" seconds\n";

    bugfile<<"Structural worst case delay: "<<endl;
    bugfile<<"mean: "<<mean<<" sigma: "<<sigma<<endl; 
    bugfile<<"max: "<<max<<"  min: "<<min<<endl;
}


//0: nominal delay, 1: worst case delay, 2: best case delay
void CTiming::ConvTiming(int type)
{    
    PreTiming(type);
    Pert(type,-1,NULL);
}

void CTiming::Init(){
    // levelize the ckt graph
    Levelize();

    // read the cell locations
    //    ReadInstLoc(fname);

    // assign instances to corresponding grids
    // calculate means of parameters in each grid	
    //assign correlation matrix
    m_pCorrModel->Init(m_pCkt);


    m_pTreeList->SetWireGrid();


}


void CTiming::PreTiming(int type){

    /* Init(fname) consists of:
    // levelize the ckt graph
    Levelize();

    // read the cell locations
    ReadInstLoc(fname);

    // assign instances to corresponding grids
    // calculate means of parameters in each grid
    UdtDelta();
     */

    //cal input pin capacitances of all input-ports/PO-pin 
    UdtPinCap(type);
#ifdef _DEBUG0
    m_pCkt->PrtPinCap();
#endif

    //calculate all net segment RC, and calcuate all net pin-pin delay
    //update the net's drvport's load capacitance at the same time
    m_pTreeList->CalRC(NULL,0,type);
    m_pTreeList->CalDelay();

    // cal inst delay based on the mean value of the grid
    // 	UdtInstDelay(type);
#ifdef _DEBUG0
    m_pCkt->PrtInstDelay();
#endif

}

//calc input pinCap for input ports of all instances
void CTiming::UdtPinCap(int type){

    map<string,CInst*>::iterator institer;
    map<string,CPort*>::iterator ptiter;   

    institer=m_pCkt->m_InstHash.begin();
    for (;institer!=m_pCkt->m_InstHash.end();++institer){
	CInst *drvinst=(*institer).second;
	map<string,CPort*>& porthash=drvinst->GetPortHash(); 

	int drvloc=(int)drvinst->m_Grid.index;
	int totpin=drvinst->GetInPinNum();
	//float w,l;
	float w=m_pCorrModel->m_pGrid[drvloc].GetParaMean(ENUMWGATE,ParaType[ENUMWGATE]);
	float l=m_pCorrModel->m_pGrid[drvloc].GetParaMean(ENUMLGATE,ParaType[ENUMLGATE]);
	float tox=m_pCorrModel->GetIndPVal(ENUMTOXGATE,ParaType[ENUMTOXGATE]);
	float toxn[MAXDIM],toxp[MAXDIM];		
	for (int i=0;i<totpin;i++){
	    toxn[i]=tox;	toxp[i]=tox;
	}

	string celltype=drvinst->m_CellType;
	CCell* cell=m_pCkt->m_plib->FindCell(celltype);		
	for (ptiter=porthash.begin();ptiter!=porthash.end();++ptiter)
	{           
	    CPort* port=(*ptiter).second;
	    int pinnum=cell->FindPortNum(port->m_PortName);
	    if(port->m_Direction=="input"){
		port->SetPinCap(GetGateInCap(w,l,toxp[pinnum-1],toxn[pinnum-1],celltype,drvinst->m_fGateSize));
		//port->SetPinCap(GetNInCap(w,l,tox)*2);  //assume wn=wp=w
	    }
	}
    }

    //set pin capacitance for PO pins
    for (ptiter=m_pCkt->m_POHash.begin();ptiter!=m_pCkt->m_POHash.end();++ptiter)
    {
	CPort *port=(*ptiter).second;    
	port->SetPinCap(POPINCAP);
    }


}

//Calculate instance delay
void CTiming::UdtInstDelay(CInst* drvinst,int type)
{
    int i;

    //update (inport,outport) delay for each inst
    //browse the inst Queue
    /*	list<CInst*>::iterator institer;   
	for (institer=m_lInstSeq.begin();institer!=m_lInstSeq.end();++institer)
	{        
	CInst* drvinst=*institer;
     */
    //find net(drvnet) driven by the instance output port(drvport)
    CPort* drvport;
    map<string,CPort*>& porthash=drvinst->GetPortHash(); 
    map<string,CPort*>::iterator ptiter;          
    for (ptiter=porthash.begin();ptiter!=porthash.end();++ptiter)
    {           
	CPort* port=(*ptiter).second;
	if(port->m_Direction=="output")
	{//the net that the instance drives            
	    drvport=port; 
	    break;
	}
    }
    CNet* drvnet=drvport->m_Conn->m_ConnNet;      


    float cload=drvport->m_dLoad;

    //calculate the instance delay, for all pairs:
    //from pinnum-input_port to output_port
    string celltype=drvinst->m_CellType;
    CCell* cell=m_pCkt->m_plib->FindCell(celltype);
    int totpin=drvinst->GetInPinNum();
    int index=(int)drvinst->m_Grid.index;
    float w,l;
    w=m_pCorrModel->m_pGrid[index].GetParaMean(ENUMWGATE,ParaType[ENUMWGATE]);
    l=m_pCorrModel->m_pGrid[index].GetParaMean(ENUMLGATE,ParaType[ENUMLGATE]);

    float tox=m_pCorrModel->GetIndPVal(ENUMTOXGATE,ParaType[ENUMTOXGATE]);
    float na=m_pCorrModel->GetIndPVal(ENUMNAGATE,ParaType[ENUMNAGATE]);
    float toxn[MAXDIM],toxp[MAXDIM],nan[MAXDIM],nap[MAXDIM];
    for (i=0;i<totpin;i++){
	toxn[i]=tox;	toxp[i]=tox;
	nan[i]=na;	nap[i]=na;
    }


    //CTimingData maxOutputSlope;
    for (ptiter=porthash.begin();ptiter!=porthash.end();++ptiter)
    {           
	CPort* port=(*ptiter).second;
	if(port->m_Direction=="input")
	{//input port of the instance                            
	    int pinnum=cell->FindPortNum(port->m_PortName);

	    //CTimingData inputSlope=drvport->GetSlope();  may do this later   
	    CTimingDataCoef instDelay,outSlope;
	    CTimingData inputSlope;		            
	    CTimingData& lastInstslope=port->m_Conn->m_ConnNet->m_pDrvPort->GetOutSlope();
	    inputSlope=lastInstslope;
	    CNet* lastNet=port->m_Conn->m_ConnNet;  
	    float lastNetDelay=lastNet->GetDelay(drvinst->m_InstName+"-"+port->m_PortName);
	    inputSlope.Tf=inputSlope.Tr=0;
	    //##			inputSlope.Tf+=lastNetDelay; inputSlope.Tr+=lastNetDelay;
	    //inputSlope.Delay=inputSlope.Tr+inputSlope.Tf;
	    //port->SetInSlope(inputSlope);

	    Delay(celltype,w,l,toxp,toxn,nap,nan,cload,inputSlope,pinnum,drvinst->m_fGateSize,0,instDelay,outSlope);
	    drvinst->SetDelay(port->m_PortName,&instDelay); 
	    port->SetOutSlope(outSlope);

	    /*		    if (maxOutputSlope.Tr<instDelay.Tr)			 
			    maxOutputSlope.Tr=instDelay.Tr;	
			    if (maxOutputSlope.Tf<instDelay.Tf)			 
			    maxOutputSlope.Tf=instDelay.Tf;	 */
	}
    } 

    //	    drvport->SetSlope(maxOutputSlope);

    // 	}
}


//calculate interconnect delay;
void CTiming::UdtNetDelay(){
}


//add net delay into port arrival time 
//  to get arrival time of all fanout ports on the net

void CTiming::CalNetDepTime(CNet* net,CPort* drvport)
{
    float netdelay,delay;
    float netArrTime=net->GetArrivalTime();	
    list<CConn*>::iterator conniter;
    for (conniter=net->m_NetConn.begin();conniter!=net->m_NetConn.end();++conniter)
    {
	CConn* conn=(*conniter);
	CPort* port=conn->m_ConnPort;
	if (port!=drvport){
	    string sinkName;
	    if (conn->m_ConnInst)
		sinkName=conn->m_ConnInst->m_InstName+"-"+port->m_PortName;
	    else
		sinkName=port->m_PortName;
	    netdelay=net->GetDelay(sinkName);
	    delay=netArrTime+netdelay;
	    port->SetArrivalTime(delay);

	    //update the input slope of the receiver
	    //CTimingData slope=drvport->GetSlope();
	    //slope.Tf+=netdelay; slope.Tr+=netdelay; slope.Delay=(slope.Tf+slope.Tr)/2;
	    //port->SetSlope(slope);
	}
    }

}


//Timing by Pert-like traversal 
void CTiming::Pert(int typePert, int typeMonte, CMonte* sample){


    //for PI and DffPI ports, 
    //set the PI/DffPI ports arrival time
    //set the arrival time of nets they drive (init)
    //add net delay into port arrival time to get arrival time of all fanout ports on the net
    map<string,CPort*>::iterator ptiter;
    ptiter=m_pCkt->m_PIHash.begin();
    for (;ptiter!=m_pCkt->m_PIHash.end();++ptiter)
    {
	CPort *port=(*ptiter).second;
	port->SetArrivalTime(0);
	CNet* net=port->m_Conn->m_ConnNet;
	net->SetArrivalTime(0);
	CalNetDepTime(net,port);
    }

    ptiter=m_pCkt->m_DffPIHash.begin();
    for (;ptiter!=m_pCkt->m_DffPIHash.end();++ptiter)
    {
	CPort *port=(*ptiter).second;
	port->SetArrivalTime(0);
	CNet* net=port->m_Conn->m_ConnNet;
	net->SetArrivalTime(0);
	CalNetDepTime(net,port);
    }

    //browse the inst Queue
    list<CInst*>::iterator institer;   
    for (institer=m_lInstSeq.begin();institer!=m_lInstSeq.end();++institer)
    {
	CInst* drvinst=*institer;

	//update instance delay
	if (typeMonte==-1)
	    UdtInstDelay(drvinst,typePert);
	else
	    MonteUdtInstDelay(drvinst,sample,typeMonte);

	//browse the ports of the instance
	//using the arrival times for all input ports of the instance  
	//adding the instance delay
	//to find path delay till the drvport of the instance (max operation)
	CPort* drvport;
	CNet* drvnet;
	float maxdelay=-1,instdelay;        
	CPort* maxport=NULL;
	map<string,CPort*>& porthash=drvinst->GetPortHash();
	float dPath1=0,dPath2=0;
	for (ptiter=porthash.begin();ptiter!=porthash.end();++ptiter)
	{               
	    CPort* port=(*ptiter).second;
	    if(port->m_Direction=="input")
	    {//input port of the instance

		float delay=port->GetArrivalTime();			
		instdelay=drvinst->GetDelay(port->m_PortName).Delay;
		delay+=instdelay;
		if (delay>maxdelay){
		    maxdelay=delay;
		    maxport=port;
		}       
if (drvinst->m_InstName=="G9") {
	if (port->m_PortName=="in1"){
		muPath1+=delay; sigPath1+=delay*delay; dPath1=delay;
	}
	if (port->m_PortName=="in2"){
		muPath2+=delay; sigPath2+=delay*delay; dPath2=delay;
	}
	if (dPath1 && dPath2)
		covP12+=dPath1*dPath2;
}

	    }else{					     
		drvport=port;
		drvnet=drvport->m_Conn->m_ConnNet;
	    }
	}

	//set departTime of the drvport
	//update the arrTime of the net driven by the instance

	drvport->SetArrivalTime(maxdelay);           
	drvnet->SetArrivalTime(maxdelay);
	CalNetDepTime(drvnet,drvport);

#ifdef _DEBUG //::
	drvinst->MontePathSigmu(maxdelay);
	drvinst->MontePMaxSigmu(maxdelay-instdelay);
#endif
	//port with max pathdelay --> output slope
	drvport->SetOutSlope(maxport->GetOutSlope());
	drvinst->m_pCrtPort=maxport;

    }//for loop of instance queue



    //POs, DffPO are not in the instance queue
    //find maximum arrTime at the pins of POs and DffPO
    m_fMaxDelay=-1;

    ptiter=m_pCkt->m_POHash.begin();
    for (;ptiter!=m_pCkt->m_POHash.end();++ptiter)
    {                
	CPort *port=(*ptiter).second;
	float delay=port->GetArrivalTime();
	if (delay>m_fMaxDelay){
	    m_fMaxDelay=delay;
	    m_MaxDelayPort=port;
	    if (port->m_Conn->m_ConnInst)
		m_MaxDelayInst=port->m_Conn->m_ConnInst;
	    else
		m_MaxDelayInst=NULL;
	}
    }

    ptiter=m_pCkt->m_DffPOHash.begin();
    for (;ptiter!=m_pCkt->m_DffPOHash.end();++ptiter)
    {                
	CPort *port=(*ptiter).second;
	float delay=port->GetArrivalTime();
	if (delay>m_fMaxDelay){
	    m_fMaxDelay=delay;
	    m_MaxDelayPort=port;
	    if (port->m_Conn->m_ConnInst)
		m_MaxDelayInst=port->m_Conn->m_ConnInst;
	    else
		m_MaxDelayInst=NULL;
	}
    }


}



void CTiming::Levelize()
{
    map<string,CPort*>::iterator ptiter;

    //remove PI/PO that has no connections --> should be put in CCkt
    for (ptiter=m_pCkt->m_PIHash.begin();ptiter!=m_pCkt->m_PIHash.end();ptiter++){
	if (!(*ptiter).second->m_Conn){
	    m_pCkt->m_PIHash.erase(ptiter++);
	    ptiter--;
	}
    }
    for (ptiter=m_pCkt->m_POHash.begin();ptiter!=m_pCkt->m_POHash.end();++ptiter){
	if (!(*ptiter).second->m_Conn){
	    m_pCkt->m_POHash.erase(ptiter++);
	    ptiter--;
	}
    }



    NETLIST netlist,*pnetlist;

    //Add first level nets;
    m_mLevelList.push_back(netlist);
    m_mLevelList.push_back(netlist);
    pnetlist=&m_mLevelList[1];

    //Clear Port hash
    ptiter=m_pCkt->m_PIHash.begin();

    //Insert level 1 to queue and level list
    for (;ptiter!=m_pCkt->m_PIHash.end();++ptiter)
    {
	CPort *port=(*ptiter).second;
	CNet* net=port->m_Conn->m_ConnNet;
	//Set Arribal Time
	net->SetArrivalTime(0);
	//PI level -> 1
	net->SetLevel(1);
	//Insert into net level list
	pnetlist->push_back(net);

#ifdef _DEBUG0
	bugfile << "NetLevel List " << 1 << "<-- " << net->m_NetName <<"\n";
#endif

	CInst* inst;
	list<CConn*>& netclist = net->GetNetConnList();
	list<CConn*>::iterator citer=netclist.begin();


	//Add instance to queue
	for (;citer!=netclist.end();++citer)
	{
	    CConn* conn=*citer;
	    //Active input pin by decreasing avtive counter by 1
	    // For PI, instance is NULL
	    if((conn->m_ConnPort->m_Direction=="input")
	       &&(conn->m_ConnInst!=NULL)
	       && !m_pCkt->FindDffPO(conn->m_ConnInst->m_InstName,conn->m_ConnPort->m_PortName))

		//Only fully activated instance can be added into queue
		if(!conn->m_ConnInst->DecUncalcPinCnt())
		{
		    conn->m_ConnInst->SetLevel(1);                       
		    m_lInstSeq.push_back(conn->m_ConnInst);
#ifdef _DEBUG0
		    bugfile << "Active Queue (PI) <-- " << conn->m_ConnInst->m_InstName <<"\n";
#endif
		}
	}

    }

    //Manipulate Dff
    //Clear Port hash
    ptiter=m_pCkt->m_DffPIHash.begin();

    //Insert level 1 to queue and level list
    for (;ptiter!=m_pCkt->m_DffPIHash.end();++ptiter)
    {
	CPort *port=(*ptiter).second;
	CNet* net=port->m_Conn->m_ConnNet;
	//Set Arrival Time
	net->SetArrivalTime(0);
	//PI level -> 1
	net->SetLevel(1);
	//Insert into net level list
	pnetlist->push_back(net);




#ifdef _DEBUG0
	bugfile << "NetLevel List " << 1 << "<-- " << net->m_NetName <<"\n";
#endif

	CInst* inst;
	list<CConn*>& netclist = net->GetNetConnList();
	list<CConn*>::iterator citer=netclist.begin();

	//Add instance to queue
	for (;citer!=netclist.end();++citer)
	{
	    CConn* conn=*citer;
	    //Active input pin by decreasing avtive counter by 1
	    // For PI, instance is NULL
	    if((conn->m_ConnPort->m_Direction=="input")
	       &&(conn->m_ConnInst!=NULL)
	       && !m_pCkt->FindDffPO(conn->m_ConnInst->m_InstName,conn->m_ConnPort->m_PortName))
		//Only fully activated instance can be added into queue
		if(!conn->m_ConnInst->DecUncalcPinCnt())
		{
		    conn->m_ConnInst->SetLevel(1);
		    m_lInstSeq.push_back(conn->m_ConnInst);
#ifdef _DEBUG0
		    bugfile << "Active Queue (DFF PI) <-- " << conn->m_ConnInst->m_InstName <<"\n";
#endif
		}
	}

    }


    //loop till the queue is empty
    list<CInst*>::iterator institer;
    institer=m_lInstSeq.begin();
    //Loop of queue
    for (;institer!=m_lInstSeq.end();++institer)
    {
	//browse the queue of instance
	CInst* inst=*institer;
	map<string,CPort*>& porthash=inst->GetPortHash();

	map<string,CPort*>::iterator ptiter;

	for (ptiter=porthash.begin();ptiter!=porthash.end();++ptiter)
	{
	    CPort* port=(*ptiter).second;
	    if(port->m_Direction=="output")
	    {
		//find the net that the instance drives
		CNet* net=port->m_Conn->m_ConnNet;
		list<CConn*>& connlist=net->GetNetConnList();
		int level=GetNetLevel(net);
		net->SetLevel(level);
		if(level!=INVALID)
		{
		    //Add first level list;
		    //if(m_mLevelList.find(level)==m_mLevelList.end())
		    //    m_mLevelList[level]= netlist;
		    if (m_mLevelList.size()<=level+1)
			m_mLevelList.push_back(netlist);
		    pnetlist=&m_mLevelList[level];
		    pnetlist->push_back(net);
#ifdef _DEBUG0
		    bugfile << "NetLevel List " << level << "<-- " << net->m_NetName <<"\n";
#endif

		}


		// add the recievers to the queue if applicable
		//driver: Inst, reciever: conn->m_ConnInst
		list<CConn*>::iterator conniter;
		conniter=connlist.begin();
		for (;conniter!=connlist.end();++conniter)
		{
		    CConn* conn=*conniter;
		    if(conn->m_ConnPort->m_Direction=="input"
		       && !m_pCkt->FindDffPO(conn->m_ConnInst->m_InstName,conn->m_ConnPort->m_PortName))
		    {
			if(!conn->m_ConnInst->DecUncalcPinCnt())
			{
			    conn->m_ConnInst->SetLevel(level);
			    m_lInstSeq.push_back(conn->m_ConnInst);
#ifdef _DEBUG0
			    bugfile << "Active Queue <-- " << conn->m_ConnInst->m_InstName <<"\n";
#endif


			}
		    }
		}
	    }
	}
	//remove from queue
	//       institer=m_lInstSeq.erase(institer);
    }//end loop of queue

}

int CTiming::GetInstLevel(CInst* inst)
{
    map<string,CPort*>::iterator ptiter;
    ptiter=inst->m_PortHash.begin();
    int level=INVALID;

    for (;ptiter!=inst->m_PortHash.end();++ptiter)
    {
	CPort* port=(*ptiter).second;
	if(port->m_Direction=="input")
	{
	    int tmplevel=port->m_Conn->m_ConnNet->GetLevel();
	    if(tmplevel==INVALID)
		return INVALID;
	    else
		level=tmplevel>level?tmplevel:level;
	}
    }

    return level;

}

int CTiming::GetNetLevel(CNet* net)
{
    list<CConn*>::iterator ntiter;
    ntiter=net->m_NetConn.begin();
    int level=INVALID;

    for (;ntiter!=net->m_NetConn.end();++ntiter)
    {
	CConn* conn=(*ntiter);

	//find the instance that drives the net
	if(conn->m_ConnPort->m_Direction=="output")
	{
	    //Skip PO
	    if(conn->m_ConnInst==NULL) continue;

	    int tmplevel=GetInstLevel(conn->m_ConnInst);
	    if(tmplevel==INVALID)
		return INVALID;
	    else
		level=tmplevel>level?tmplevel:level;
	}
    }
    return level==INVALID?INVALID:level+1;
}

//*************************************************************/
// assign the delta values of W,L,Tox... for each instance	
// Wni=Wn(1+deltaW), deltaW = DeltaW_0 + DeltaW_x*x + DeltaW_y*y		
//*************************************************************/
/*   void CTiming::UdtDelta(){

     float xmid,ymid,xMax,yMax,xMin,yMin;
     float gridW,gridH;

     xMax = m_pCkt->m_UpRight.x;
     yMax = m_pCkt->m_UpRight.y;
     xMin = m_pCkt->m_LowLeft.x;
     yMin = m_pCkt->m_LowLeft.y;
     xmid=(xMax+xMin)/2;
     ymid=(yMax+yMin)/2;

     gridW=(xMax-xMin)/m_iGridSizeX;
     gridH=(yMax-yMin)/m_iGridSizeY;
     m_fGridW=gridW;
     m_fGridH=gridH;


     float padW=m_pCkt->m_plib->FindCell("pad")->m_Size.width;
     float padH=m_pCkt->m_plib->FindCell("pad")->m_Size.height;

//calculate each grid's center point location
int i,j;
for (i=0;i<m_iGridSizeX;i++)
for (j=0;j<m_iGridSizeY;j++){
m_Grid[i+j*m_iGridSizeX].m_Center.x=(i+0.5)*gridW;
m_Grid[i+j*m_iGridSizeX].m_Center.y=(j+0.5)*gridH;
m_Grid[i+j*m_iGridSizeX].m_LowLeft.x=i*gridW+((i==0)? -padW:0);
m_Grid[i+j*m_iGridSizeX].m_LowLeft.y=j*gridH+((j==0)? -padH:0);
m_Grid[i+j*m_iGridSizeX].m_UpRight.x=(i+1)*gridW+((i==m_iGridSizeX)? padW:0);
m_Grid[i+j*m_iGridSizeX].m_UpRight.y=(j+1)*gridH+((j==m_iGridSizeY)? padH:0);
}


//calculate DeltaW_x, DeltaW_y
//delatw=delta0+x*DeltaW_x+y*DeltaW_y, delta_x=delta_y, 
//
//Wn(x,y)=Wn+x*DeltaW_x+y*DeltaW_y+N(0,DeltaW_rand), 
//DeltaW_x=DeltaW_y,
//xMax*DeltaW_x+yMax*DeltaW_y=3*DeltaW_rand
//xMax*DeltaW_x+yMax*DeltaW_y+3*DeltaW_rand=VarW;


//m_fDeltaWrand=VARW/6;
//m_fDeltaWx=m_fDeltaWy=(VARW-3*m_fDeltaWrand)/(xMax+yMax);
//m_fDeltaLrand=VARL/6;
//m_fDeltaLx=m_fDeltaLy=(VARL-3*m_fDeltaLrand)/(xMax+yMax);

float rand[NUMPARA],deltax[NUMPARA],deltay[NUMPARA];
for (i=0;i<NUMPARA;i++)
rand[i]=ParaVar[i]*ParaMean[i]/6;

int size=m_iGridSizeX+m_iGridSizeY-2;
if (size){
for (i=0;i<NUMPARA;i++)
deltax[i]=deltay[i]=3*rand[i]/size;
}else{
for (i=0;i<NUMPARA;i++)
deltax[i]=deltay[i]=0;		
}

float sdivmu[NUMPARA];
for (i=0;i<NUMPARA;i++)
sdivmu[i]=rand[i]/(ParaMean[i]+3*rand[i]);  //at max cord, ratio of sigma/mu for paras: W,L...

cout<<"percW:"<<sdivmu[ENUMWGATE]<<" percL"<<sdivmu[ENUMLGATE]<<endl;



//assign mean value of parameters for each grid
for (i=0;i<m_iGridSizeX;i++)
for (j=0;j<m_iGridSizeY;j++)
{
    //float x=m_Grid[i+j*m_iGridSizeX].m_Center.x;
    //float y=m_Grid[i+j*m_iGridSizeX].m_Center.y;
    int x=i;
    int y=j;

    for (int p=0;p<NUMPARA;p++){

	//m_Grid[i+j*m_iGridSizeX].m_Para[p].m_fMean=ParaMean[p] + deltax[p]*x + deltay[p]*y;
	m_Grid[i+j*m_iGridSizeX].m_Para[p].m_fMean=ParaMean[p];

	if (!m_iNumLevel)	    
	    m_Grid[i+j*m_iGridSizeX].m_Para[p].m_fSigma=sdivmu[p] * m_Grid[i+j*m_iGridSizeX].m_Para[p].m_fMean;
	else
	    m_Grid[i+j*m_iGridSizeX].m_Para[p].m_fSigma=rand[p];

#ifdef _DEBUG0
	bugfile<<"Grid:"<<i<<","<<j<<" "<<"Loc:"<<x<<","<<y<<"\n";
	bugfile<<"      "<<m_Grid[i+j*m_iGridSizeX].m_Para[p].m_fMean<<" "<<rand[p];
#endif
    }

}


//update which grid each instance belongs to 
map<string,CInst*>::iterator iter;
iter=m_pCkt->m_InstHash.begin();

for (iter;iter!=m_pCkt->m_InstHash.end();++iter){
    CInst *inst;
    inst=(*iter).second;

    inst->m_Grid.x=floor((inst->m_Loc.x-xMin)/gridW);
    inst->m_Grid.y=floor((inst->m_Loc.y-yMin)/gridH);
#ifdef _DEBUG0
    bugfile<<"Inst:"<<inst->m_InstName<<" "<<inst->m_Loc.x<<","<<inst->m_Loc.y
	<<" "<<"grid"<<inst->m_Grid.x<<","<<inst->m_Grid.y<<"\n";	
#endif

}

}

void CTiming::PrtCorrMatrix()
{
    int i,j,k;
    COVMAX* pcov;


    for (k=0;k<NUMPARA;k++)
    {
	printf("cov for para %i\n",k);
	for (i=0;i<m_iGridSizeX*m_iGridSizeY;i++)
	{
	    for (j=0;j<m_iGridSizeX*m_iGridSizeY;j++)
	    {
		pcov=GetCorrMatrix(i,j);
		float val=0;
		if (pcov)
		    val=pcov->cov[k];
		printf("%.5f ",val);
	    }
	    printf("\n");
	}
    }

}

//assign covariance matrix for the grids
//a) cov(i,i) = sigma_i * sigma_i
//b) cov(i,j) = rho_ij * sigma_i * sigma_j
//		cov(i,j)=cov(j,i), thus always store to cov(i,j) if i<j;

//assign cov by TAU02 model
//assumption: m_iGridSizeX=m_iGridSizeY
//assume sigma in all grid are equal
void CTiming::UdtCorrMatrix2()
{
    int i,j,m,n,k,p;

    int level=m_iNumLevel;
    float* perc=new float[level];

    printf("input percentage for levels (%i numbers):",level);
    for (k=0;k<level;k++)
	scanf("%f",&perc[k]);
    //	perc[0]=0.5; perc[1]=0.5; perc[2]=0.5;


    m_fCovMatrix.resize(m_iGridSizeX*m_iGridSizeY);

    float rval[NUMPARA];	
    for (i=0;i<m_iGridSizeX*m_iGridSizeY;i++)
	for (j=i;j<m_iGridSizeX*m_iGridSizeY;j++)
	{	int xi=i%m_iGridSizeX; int yi=i/m_iGridSizeX;
	    int xj=j%m_iGridSizeX; int yj=j/m_iGridSizeX;

	    if (i==j){
		for (p=0;p<NUMPARA;p++)
		    rval[p]=m_Grid[i].m_Para[p].m_fSigma*m_Grid[i].m_Para[p].m_fSigma;
		WtCorrMatrix2(i,j,rval);
		continue;
	    }

	    for (p=0;p<NUMPARA;p++)	rval[p]=0;				
	    for (k=level-2;k>=0;k--)
	    {
		int slot=m_iGridSizeX/(int)pow(2,k);
		int gridxi=(int)(xi/slot);	int gridyi=(int)(yi/slot);
		int gridxj=(int)(xj/slot);	int gridyj=(int)(yj/slot);
		if ((gridxi==gridxj) && (gridyi==gridyj)){	
		    for (p=0;p<NUMPARA;p++){
			float sig=m_Grid[i].m_Para[p].m_fSigma;
			rval[p]+=perc[k]*sig*sig;
		    }
		}
	    }			
	    WtCorrMatrix2(i,j,rval);						
	}

    if (perc) delete[] perc;

}
void CTiming::WtCorrMatrix2(int index,int nbindex,float* r)
{
    COVMAX ele;
    ele.x=index;
    ele.y=nbindex;

    for (int p=0;p<NUMPARA;p++)
	ele.cov[p]=r[p];

    m_fCovMatrix[index].push_back(ele);
}

//assign cov only to the grid 4 neighbours
void CTiming::UdtCorrMatrix()
{
    m_fCovMatrix.resize(m_iGridSizeX*m_iGridSizeY);
    int i,j;
    for (i=0;i<m_iGridSizeX*m_iGridSizeY;i++)
    {
	int row=(int)i/m_iGridSizeX;
	int col=(int)i%m_iGridSizeX;
	//assume grid has correlation with only its 4 next-to neighbours
	//and cov(i,j)=cov(j,i), store only at i<j  (lower triangle)
	//			 cov(i,j)=rho_ij*sigma(i)*sigma(j)
	//also store cov(i,i)=sigma(i)*sigma(i)

	WtCorrMatrix(i,i,1);
	if (row<m_iGridSizeY-1){
	    int nbrow=row+1;
	    int nbcol=col;
	    int nbindex=nbrow*m_iGridSizeX+nbcol;
	    WtCorrMatrix(i,nbindex,RHO1);
	}
	if (col<m_iGridSizeX-1){
	    int nbrow=row;
	    int nbcol=col+1;
	    int nbindex=nbrow*m_iGridSizeX+nbcol;
	    WtCorrMatrix(i,nbindex,RHO1);
	}
    }
}

void CTiming::WtCorrMatrix(int index,int nbindex,float r)
{
    COVMAX ele;
    ele.x=index;
    ele.y=nbindex;

    float sig[NUMPARA],nbsig[NUMPARA];
    for (int p=0;p<NUMPARA;p++)
    {
	sig[p]=m_Grid[index].m_Para[p].m_fSigma;
	nbsig[p]=m_Grid[nbindex].m_Para[p].m_fSigma;

	ele.cov[p]=r*sig[p]*nbsig[p];
    }


    m_fCovMatrix[index].push_back(ele);
}


//get the covariance value between grid[x] and grid[y]
// npara: the ith parameter   0th: W, 1st: L
COVMAX* CTiming::GetCorrMatrix(int x,int y)
{
    COVMAX* ret=NULL;

    int i,j;
    if (x<y){
	i=x;
	j=y;
    }else{
	i=y;
	j=x;
    }
    list<COVMAX>::iterator iter;
    for (iter=m_fCovMatrix[i].begin();iter!=m_fCovMatrix[i].end();++iter){
	if (j==(*iter).y){
	    ret=&(*iter);
	    break;
	}
    }

    return ret;
}

void CTiming::CpCovMatrix(int npara, double* tgtArray)
{

    int i,j;
    COVMAX* pcov;

    int size=m_iGridSizeX*m_iGridSizeY;
    for (i=0;i<size;i++)
	for (j=0;j<size;j++)
	{
	    pcov=GetCorrMatrix(i,j);
	    float val=0;
	    if (pcov)	val=pcov->cov[npara];
	    tgtArray[i*size+j]=(double)val;
	    tgtArray[j*size+i]=(double)val;

	}

}

//Note: special copy cov matrix only for Tox and Vt
void CTiming::CpCovMatrix2(int npara1, int npara2, double* tgtArray)
{

    int i,j;
    COVMAX* pcov;

    //Tox: npara1
    int size=m_iGridSizeX*m_iGridSizeY;
    for (i=0;i<size;i++)
	for (j=0;j<=i;j++)
	{
	    pcov=GetCorrMatrix(i,j);
	    float val=0;
	    if (pcov)	val=pcov->cov[npara1];
	    tgtArray[i*size*2+j]=(double)val;
	    tgtArray[j*size*2+i]=(double)val;

	}
    //Vt: npara2
    for (i=size;i<size*2;i++)
	for (j=size;j<=i;j++)
	{
	    pcov=GetCorrMatrix(i,j);
	    float val=0;
	    if (pcov)	val=pcov->cov[npara2];
	    tgtArray[i*size*2+j]=(double)val;
	    tgtArray[j*size*2+i]=(double)val;

	}

    //cov between Tox and Vt
    float k;	//k=sqrt(2*E_si*q*Na*2*Phi_b)
    for (i=0;i<size;i++)
	for (j=size;j<size*2;j++)
	{	
	    float val=k*tgtArray[i*size*2+i];
	    tgtArray[i*size*2+j]=(double)val;
	    tgtArray[j*size*2+i]=(double)val;
	}

}

//calculate PCs for all parameters
//copy the returned PC to the grid structure
void CTiming::UdtGridPC()
{
    int i,j;

    int size=m_iGridSizeX*m_iGridSizeY;
    CPca pca(this,size);

    if (!pca.CallMtLab()){
	cout<<"can't start matlab\n";
	exit(1);
    }

    /////////////// W,L
    for (int p=0;p<NUMPARA;p++){
	CpCovMatrix(p,pca.m_fCovMatrix);
	if (!pca.Start()){
	    cout<<"error calculation of PCA\n";
	    exit(1);
	}
	int pcsize=pca.GetPCSize();
	for (i=0;i<size;i++){
	    m_Grid[i].m_Para[p].UdtCoefPC(pca.m_RetPC[i],pcsize);
	}

#ifdef _DEBUG0
	cout<<"{";
	for (i=0;i<size;i++){
	    cout<<"{";
	    for (j=0;j<size;j++){
		cout<<m_Grid[i].m_Para[p].m_vCoefPC[j]<<",";
	    }
	    cout<<"},\n";
	}
	cout<<"}\n";
#endif

    }

}
*/

//////////////////////////////////////////////////////////////////////
// Statistical Timing Analysis
//////////////////////////////////////////////////////////////////////

void CTiming::StatTiming()
{
    readCdfN01();
    readPdfN01();

    //start time 0
    dumpTime("Init stat timing analysis starts");	

    Init();
    m_pCorrModel->UdtGridPC();

    //start time
    dumpTime("stat timing analysis starts");
    clock_t start = clock();


    PreTiming(1);

    //    Pert(1,-1,NULL);
    //	  InitRmFPath();
    InitNoRmFPath();

#ifdef _DEBUG0
    PrtTPInstQ();
#endif



    CDelayPC* maxrst=StatPert();

    bugfile<<"CKT Stat Data:\n";
    maxrst->PrtData();

    if (m_lTruePO.size()>1 && maxrst) delete maxrst;

    //end time
    dumpTime("stat timing analysis ends");
    clock_t finish = clock();
    double duration = (double)(finish - start) / CLOCKS_PER_SEC;
    bugfile<<"Time Elapsed: "<<duration<<" seconds"<<endl;

}


void CTiming::AddGridCapMap(GridCapMap& gridCap,int recloc,float recload,CPort* port)
{
    if (gridCap.find(recloc)==gridCap.end()){   
	//if there is no entry "recloc"
	//add the map[recload]=1 to this entry
	gridCap[recloc]=recload;

    }else{
	//if there is the entry "recloc"
	gridCap[recloc]+=recload;
    }

}


void CTiming::UdtGridCapMap(CPort* drvport, GridCapMap& gridCap)
{
    map<int,int> foutgrid;
    CInst* drvinst=drvport->m_Conn->m_ConnInst;
    int drvloc=(int)drvinst->m_Grid.index;

    //calc drvnet's load capacitance
    //for each receiver, find which grid it belongs to
    //remember the number of recievers/totload value in each grid
    int recloc=-1;  //-1: for POs, not in any grid

    CNet* drvnet=drvport->m_Conn->m_ConnNet;
    list<CConn*>& connlist=drvnet->GetNetConnList();
    list<CConn*>::iterator conniter;               
    for (conniter=connlist.begin();conniter!=connlist.end();++conniter)
    {
	CConn* conn=*conniter;
	if(conn->m_ConnPort->m_Direction=="input"  ||
	   m_pCkt->FindPO(conn->m_ConnPort->m_PortName))                   
	{//the receiver of the net

	    CInst* recinst=conn->m_ConnInst;
	    float recload=conn->m_ConnPort->GetPinCap();

	    if (recinst) {
		recloc=(int)recinst->m_Grid.index;
	    }
	    else {//the rec is a PO/DFFPO pin, use -1,-2... in the gridmap
		recloc=-1;
	    }

	    //get grid-capacitance-coef(count) for all recievers
	    //remember it in gridCap(local variable)
	    AddGridCapMap(gridCap,recloc,recload,conn->m_ConnPort);
	}
    }
}


void CTiming::StatInstDelay2(CPort* drvport)
{
    /*	CInst* drvinst=drvport->m_Conn->m_ConnInst;	
	int drvloc=(int)drvinst->m_Grid.x+(int)drvinst->m_Grid.y*m_iGridSizeX;
	float drvw=m_Grid[drvloc].m_Para[ENUMWGATE].m_fMean;
	float drvl=m_Grid[drvloc].m_Para[ENUMLGATE].m_fMean;
	float drvtox=m_Grid[drvloc].m_Para[ENUMTOXGATE].m_fMean;
	float drvna=m_Grid[drvloc].m_Para[ENUMNAGATE].m_fMean;


	string cellName1=drvinst->m_CellType;
	cellName1="N"+cellName1;	//AND is NAND+NOT, OR is NOR+NOT   	
	float load1=GetGateInCap(drvw,drvl,drvtox,"NOT1");//load of NAND/NOR (input cap of gate NOT)
	CStatTimingData instdelay1=Delay(cellName1,drvw,drvl,drvtox,drvna,load1,0,1);	//cal delay of the first NAND/NOR

	CLoadDPara derv;	 
	float wn=coefWpn[namelistmap["NOT1"]].wn+(drvw-GATEWIDn);
	float wp=coefWpn[namelistmap["NOT1"]].wp+(drvw-GATEWIDn);
	derv.m_pDervPara[ENUMWGATE]=2*load1/(wn+wp); 
	derv.m_pDervPara[ENUMLGATE]=load1/drvl; 
	derv.m_pDervPara[ENUMTOXGATE]=-load1/drvtox;
	derv.m_pDervPara[ENUMNAGATE]=0;
	derv.m_pDervPara[ENUMWINT]=derv.m_pDervPara[ENUMTINT]=0;
	CGridLoadDerv gridCap1;
	gridCap1.InsertData(drvloc,derv);
	gridCap1.SetTotCap(load1);

	StatInstCoef(instdelay1,drvloc,cellName1,gridCap1,coeflist);


	CStatTimingData instdelay2=Delay("NOT1",drvw,drvl,drvtox,drvna,totLoad,0,1);
	CalGridCoef(instdelay2,drvloc,"NOT1",,drvport->m_mLoadDerv,coeflist);
     */
}

// gate delay, with fanouts in more than one grid
//  D0+D0/W*dW-D0/L*dL+sum{D0/TotC*totCi/Wi*dWi+D0/TotC*totCi/Li*dLi}
//= D0+D0/W*dW-D0/L*dL + D0/TotC * sum{totCi/Wi*dWi+totCi/Li*dLi}
void CTiming::StatInstDelay(CPort* drvport,map<string,CStatSlope> &statInstTrTf) 
{
    int i,j;
    CInst* drvinst=drvport->m_Conn->m_ConnInst;	
    string cellType=drvinst->m_CellType;
    CCell* cell=m_pCkt->m_plib->FindCell(cellType);
    int drvloc=(int)drvinst->m_Grid.index;


    int actPinCnt=drvinst->GetActPInCnt();
    int totpin=drvinst->GetInPinNum();
    //statInstTrTf=new CStatSlope[actPinCnt];  //!!
    list<CDelayPC*> gateTrList,gateTfList;
    int count=0;
    map<string,CPort*>::iterator ptiter;  
    map<string,CPort*>& porthash=drvinst->GetPortHash(); 
    for (ptiter=porthash.begin();ptiter!=porthash.end();++ptiter)
    {           
	CPort* port=(*ptiter).second;
	if(port->m_Direction=="input" && port->m_bTruePath)
	{//input port of the instance on the true path                           							
	    int pinnum=cell->FindPortNum(port->m_PortName);
	    CStatSlope& inputSlope=port->GetStatInSlope();
	    //CNet* lastnet=port->m_Conn->m_ConnNet;
	    //string sinkName=drvinst->m_InstName+"-"+port->m_PortName;
	    //CDelayPC& netSinkDelay=lastnet->GetStatDelay(sinkName);
	    //CPort* lastport=lastnet->GetNetDrv();
	    //CStatSlope inputSlope;
	    //inputSlope.m_RiseTime.StatSumDelay(lastport->GetStatSlope().m_RiseTime,netSinkDelay);
	    //inputSlope.m_FallTime.StatSumDelay(lastport->GetStatSlope().m_FallTime,netSinkDelay);		

	    //calculate instdelay  
	    StatInstCoef(drvloc,cellType,totpin,drvport->m_mLoadDerv,inputSlope,pinnum,statInstTrTf[port->m_PortName],port->m_StatSlope,drvinst->m_fGateSize);	

#ifdef _DEBUG
	    bugfile<<"-----------"<<"Inst: "<<drvinst->m_InstName<<endl;
	    bugfile<<" input slope port "<<port->m_PortName<<":"<<endl;
	    bugfile<<"Tr: ";
	    port->GetStatInSlope().m_RiseTime.PrtData();
	    bugfile<<"Tf: ";
	    port->GetStatInSlope().m_FallTime.PrtData();
#endif

#ifdef _DEBUG0
	    if (drvinst->m_InstName=="G8" && port->m_PortName=="in1"){
		float corr=CDelayPC::CalCorr2v(statInstTrTf[count].m_RiseTime,inputSlope.m_FallTime);
		bugfile<<"\n gate delay and its in slope relation:\n";
		bugfile<<"muTr: "<<statInstTrTf[count].m_RiseTime.m_fMean;
		bugfile<<" sigTr: "<<statInstTrTf[count].m_RiseTime.m_fSigma<<endl;
		bugfile<<"muslope: "<<inputSlope.m_FallTime.m_fMean;
		bugfile<<" sigslope: "<<inputSlope.m_FallTime.m_fSigma<<endl;
		bugfile<<"corr: "<<corr<<endl;
	    }
#endif
	    //update gate delay = (Tr+Tf)/2
	    CDelayPC& statInstDelay=drvinst->GetStatDelay(port->m_PortName);
	    statInstDelay.StatSumDelay(statInstTrTf[port->m_PortName].m_RiseTime,statInstTrTf[port->m_PortName].m_FallTime);

	    //ScaleDelayPC(statInstDelay,0.5);
	    statInstDelay*=0.5;


	    //put Tr,Tf to list for max operation
	    /*!!	    gateTrList.push_back(&statInstTrTf[count].m_RiseTime);
	      gateTfList.push_back(&statInstTrTf[count].m_FallTime);
	     */
	    count++;
	}
    } 

    /* !!
    //use max(instdelay) to find max slope. store at drvport
    if (actPinCnt>1){
    CDelayPC* maxgateTr=StatMax(gateTrList);  //rise slope: max rise time of gate delay
    CDelayPC* maxgateTf=StatMax(gateTfList);  //fall slope: max fall time of gate delay
    drvport->m_StatSlope.m_RiseTime=*maxgateTr;
    drvport->m_StatSlope.m_FallTime=*maxgateTf;

    if (maxgateTr) delete maxgateTr;
    if (maxgateTf) delete maxgateTf;
    }else{
    drvport->m_StatSlope.m_RiseTime=statInstTrTf[0].m_RiseTime;
    drvport->m_StatSlope.m_FallTime=statInstTrTf[0].m_FallTime;
    }

    if (statInstTrTf) delete[] statInstTrTf;
     */
}

//find instance delay (pin-pin) represented in PC coefs
void CTiming::StatInstCoef(int drvloc,string celltype, int totpin,
			   CGridLoadDerv& gridCap,CStatSlope inputSlope,int pinnum,
			   CStatSlope& statInstdelay,CStatSlope& statSlp,float size)
{
    int i,j;
    float w=m_pCorrModel->m_pGrid[drvloc].m_Para[ENUMWGATE].m_fMean;
    float l=m_pCorrModel->m_pGrid[drvloc].m_Para[ENUMLGATE].m_fMean;
    float toxmu=m_pCorrModel->m_fIndPMean[ENUMTOXGATE];
    float namu=m_pCorrModel->m_fIndPMean[ENUMNAGATE];
    float toxn[MAXDIM],toxp[MAXDIM],nan[MAXDIM],nap[MAXDIM];
    for (i=0;i<totpin;i++){
	toxn[i]=toxmu;	toxp[i]=toxmu;
	nan[i]=namu;	nap[i]=namu;
    }

    //float coefTr[NUMPARA],coefTf[NUMPARA];
    CDelayCoef coefTr,coefTf, coefSlpTr,coefSlpTf;
    //	CCoefIndP coefTrInd,coefTfInd;
    CGridDelayCoef coeflistTr,coeflistTf,coeflistSlpTr,coeflistSlpTf;

    float cload=gridCap.m_fTotCap;
    CTimingData slope;
    slope.Tr=0; slope.Tf=0;
    //##   slope.Tr=inputSlope.m_RiseTime.m_fMean; slope.Tf=inputSlope.m_FallTime.m_fMean;
    CTimingDataCoef instDelay,outSlope;
    Delay(celltype,w,l,toxp,toxn,nap,nan,cload,slope,pinnum,size,1,instDelay,outSlope);




    //all other coefTr.m_pCoef[i], coefTf.m_pCoef[i] default is 0
    for (i=0;i<GATENUMPARA;i++){
	coefTr.m_pCoef[i]=instDelay.CoefTr[i];	
	coefTf.m_pCoef[i]=instDelay.CoefTf[i];
	coefSlpTr.m_pCoef[i]=outSlope.CoefTr[i];	
	coefSlpTf.m_pCoef[i]=outSlope.CoefTf[i];
    }  

    //put the coefs to the list
    coeflistTr.InsertCoef(drvloc,coefTr);
    coeflistTf.InsertCoef(drvloc,coefTf);
    coeflistSlpTr.InsertCoef(drvloc,coefSlpTr);
    coeflistSlpTf.InsertCoef(drvloc,coefSlpTf);
    //    coeflistTr.InsertCoefInd(coefTrInd);
    //    coeflistTf.InsertCoefInd(coefTfInd);

    map<int,CLoadDPara>::iterator giter;
    for (giter=gridCap.m_mGridDerv.begin();giter!=gridCap.m_mGridDerv.end();giter++)
    {
	int recloc=giter->first;
	float* recloadderv=giter->second.m_pDervPara;

	for (i=0;i<NUMPARA;i++){		
	    coefTr.m_pCoef[i]=instDelay.Rp*recloadderv[i];	
	    coefTf.m_pCoef[i]=instDelay.Rn*recloadderv[i];
	    coefSlpTr.m_pCoef[i]=outSlope.Rp*recloadderv[i];
	    coefSlpTf.m_pCoef[i]=outSlope.Rn*recloadderv[i];
	}
	//put the coefs to the list
	coeflistTr.InsertCoef(recloc,coefTr);
	coeflistTf.InsertCoef(recloc,coefTf);
	coeflistSlpTr.InsertCoef(recloc,coefSlpTr);
	coeflistSlpTf.InsertCoef(recloc,coefSlpTf);
    }	

    ////
    for (i=0;i<INDGATENUMPARA;i++){
	coeflistTr.m_fDervInter[i]=instDelay.Rp*gridCap.m_fDervInter[i];
	coeflistTf.m_fDervInter[i]=instDelay.Rn*gridCap.m_fDervInter[i];
	coeflistSlpTr.m_fDervInter[i]=outSlope.Rp*gridCap.m_fDervInter[i];
	coeflistSlpTf.m_fDervInter[i]=outSlope.Rn*gridCap.m_fDervInter[i];
    }
    for (i=0;i<INDGATENUMPARA;i++){
	coeflistTr.m_fDervInter[i]+=m_pCorrModel->m_fInterIndPSig[i]*instDelay.CoefIndTr.CalSum(i);
	coeflistTf.m_fDervInter[i]+=m_pCorrModel->m_fInterIndPSig[i]*instDelay.CoefIndTf.CalSum(i);
	coeflistSlpTr.m_fDervInter[i]+=m_pCorrModel->m_fInterIndPSig[i]*outSlope.CoefIndTr.CalSum(i);
	coeflistSlpTf.m_fDervInter[i]+=m_pCorrModel->m_fInterIndPSig[i]*outSlope.CoefIndTf.CalSum(i);
    }

    coeflistTr.SetMean(instDelay.Tr);
    coeflistTf.SetMean(instDelay.Tf);
    coeflistSlpTr.SetMean(outSlope.Tr);
    coeflistSlpTf.SetMean(outSlope.Tf);

    //convert grid coef of statinst delay(Tr,Tf,and (tr+tf)/2 to representation in PC coefs
    CStatTimingData stepInstdelay,stepSlp;
    stepInstdelay.m_RiseTime.GridCoef2PC(coeflistTr,m_pCorrModel->m_pGrid);  //-1, no cal mean, sigma
    stepInstdelay.m_FallTime.GridCoef2PC(coeflistTf,m_pCorrModel->m_pGrid); 
    stepSlp.m_RiseTime.GridCoef2PC(coeflistSlpTr,m_pCorrModel->m_pGrid);  //-1, no cal mean, sigma
    stepSlp.m_FallTime.GridCoef2PC(coeflistSlpTf,m_pCorrModel->m_pGrid);

    //instdelay is not done yet: add coef of stat input slope to statinst delay
    //since tr = tr_step + tin_f*(1+2p)/6,	tf = tf_step + tin_r*(1+2p)/6 ?? not right yet
    //##	
    //ScaleDelayPC(inputSlope.m_RiseTime,(1+2*instDelay.Vtn/VDD)/6.0);
    //ScaleDelayPC(inputSlope.m_FallTime,(1+2*instDelay.Vtp/VDD)/6.0);
    inputSlope.m_RiseTime*=(1+2*instDelay.Vtn/VDD)/6.0;
    inputSlope.m_FallTime*=(1+2*instDelay.Vtp/VDD)/6.0;

#ifdef _DEBUG0
    bugfile<<"@@\n";
    CDelayPC testinstD,testinslopeD,addD;
    testinstD.StatSumDelay(stepInstdelay.m_RiseTime,stepInstdelay.m_FallTime);
    ScaleDelayPC(testinstD,0.5);
    testinstD.PrtData();
    testinslopeD.StatSumDelay(inputSlope.m_FallTime,inputSlope.m_RiseTime);
    ScaleDelayPC(testinslopeD,0.5);
    testinslopeD.PrtData();
    addD.StatSumDelay(testinstD,testinslopeD);
    addD.PrtData();
    bugfile<<"@@\n";
#endif
    //   statInstdelay.m_RiseTime.StatSumDelay(stepInstdelay.m_RiseTime,inputSlope.m_FallTime);
    //   statInstdelay.m_FallTime.StatSumDelay(stepInstdelay.m_FallTime,inputSlope.m_RiseTime);
    //   statSlp.m_RiseTime.StatSumDelay(stepSlp.m_RiseTime,inputSlope.m_FallTime);
    //   statSlp.m_FallTime.StatSumDelay(stepSlp.m_FallTime,inputSlope.m_RiseTime);   
    statInstdelay.m_RiseTime=stepInstdelay.m_RiseTime;
    statInstdelay.m_FallTime=stepInstdelay.m_FallTime;
    statSlp.m_RiseTime=stepSlp.m_RiseTime;
    statSlp.m_FallTime=stepSlp.m_FallTime;

    ////
    statInstdelay.m_RiseTime.m_CoefInd=new CIndDataCoef;
    statInstdelay.m_FallTime.m_CoefInd=new CIndDataCoef;
    statSlp.m_RiseTime.m_CoefInd=new CIndDataCoef;
    statSlp.m_FallTime.m_CoefInd=new CIndDataCoef;
    statInstdelay.m_RiseTime.m_CoefInd->CoefLdRand=instDelay.Rp*gridCap.m_fDervRand;
    statInstdelay.m_FallTime.m_CoefInd->CoefLdRand=instDelay.Rn*gridCap.m_fDervRand;
    statSlp.m_RiseTime.m_CoefInd->CoefLdRand=outSlope.Rp*gridCap.m_fDervRand;
    statSlp.m_FallTime.m_CoefInd->CoefLdRand=outSlope.Rn*gridCap.m_fDervRand;


    for (i=0;i<INDGATENUMPARA;i++){
	for (j=0;j<instDelay.CoefIndTr.Dimp;j++){
	    statInstdelay.m_RiseTime.m_CoefInd->Coefp[i][j]=instDelay.CoefIndTr.Coefp[i][j]*m_pCorrModel->m_fIntraIndPSig[i];
	    statSlp.m_RiseTime.m_CoefInd->Coefp[i][j]=outSlope.CoefIndTr.Coefp[i][j]*m_pCorrModel->m_fIntraIndPSig[i];
	}
	for (j=0;j<instDelay.CoefIndTf.Dimn;j++){
	    statInstdelay.m_FallTime.m_CoefInd->Coefn[i][j]=instDelay.CoefIndTf.Coefn[i][j]*m_pCorrModel->m_fIntraIndPSig[i];
	    statSlp.m_FallTime.m_CoefInd->Coefn[i][j]=outSlope.CoefIndTf.Coefn[i][j]*m_pCorrModel->m_fIntraIndPSig[i];
	}
    }

    statInstdelay.m_RiseTime.m_fMean=instDelay.Tr;
    statInstdelay.m_FallTime.m_fMean=instDelay.Tf;
    statSlp.m_RiseTime.m_fMean=outSlope.Tr;
    statSlp.m_FallTime.m_fMean=outSlope.Tf;

    statInstdelay.m_RiseTime.m_CoefInd->Dimp=instDelay.CoefIndTr.Dimp;
    statInstdelay.m_FallTime.m_CoefInd->Dimn=instDelay.CoefIndTf.Dimn;
    statSlp.m_RiseTime.m_CoefInd->Dimp=outSlope.CoefIndTr.Dimp;
    statSlp.m_FallTime.m_CoefInd->Dimn=outSlope.CoefIndTf.Dimn;

    statInstdelay.m_RiseTime.CalSigma();
    statInstdelay.m_FallTime.CalSigma();
    statSlp.m_RiseTime.CalSigma();
    statSlp.m_FallTime.CalSigma();


}

/*
//convert delay in (grid,coef) pair to representation in PC coefs
void CTiming::GridCoef2PC(CGridDelayCoef& srcGridCoef,CDelayPC& tgtCoefPC)
{
tgtCoefPC.m_vCoefPC.erase(tgtCoefPC.m_vCoefPC.begin(),tgtCoefPC.m_vCoefPC.end()); //:)
tgtCoefPC.m_vCoefPC.resize(NUMPARA);	
map<int,CDelayCoef>::iterator liter;
for (liter=srcGridCoef.m_mGridCoef.begin();liter!=srcGridCoef.m_mGridCoef.end();liter++)
{
int gridloc=(*liter).first;
float* coef=(*liter).second.m_pCoef;
for (int i=0;i<NUMPARA;i++)
AddCoefPC(tgtCoefPC.m_vCoefPC[i],m_Grid[gridloc].m_Para[i].m_vCoefPC,coef[i]);
}

//update the mean and sigma 
tgtCoefPC.m_fMean=srcGridCoef.m_fMean;
tgtCoefPC.m_fSigma=CalSigma(tgtCoefPC.m_vCoefPC);
}
 */

/*
//make target(tgt)'s coefPCs += coef * source(src)'s coefPCs respectively
//return Sum(coefPC_i^2)
void CTiming::AddCoefPC(vector<float>& tgt, vector<float>& src, float coef)
{
int i;

//note: assuming whenever tgt resized, the size is the same
//		i.e. each grid W/L has the same number of PCs
if (tgt.empty()){
tgt.resize(src.size());
for (i=0;i<src.size();i++)
tgt[i]=0;
}

for (i=0;i<src.size();i++)
{	float pc=src[i];
float val=coef*src[i];
tgt[i]+=val;
}
}
 */

//delaycoef*=val
/*void CTiming::ScaleDelayPC(CDelayPC& delayPC,float coef)
  {		
  int i,j;
  for (i=0;i<delayPC.m_vCoefPC.size();i++)				
  for (j=0;j<delayPC.m_vCoefPC[i].size();j++)					
  delayPC.m_vCoefPC[i][j]*=coef;
  delayPC.m_fMean*=coef;
  delayPC.m_fSigma*=coef;

  }*/

/*
   float CTiming::CalSigma(vector<vector<float> >& delayPC)
   {
   float sumsig=0;
   int i,j;
   for (i=0;i<delayPC.size();i++)
   for (j=0;j<delayPC[i].size();j++){
   float val=delayPC[i][j];
   sumsig+=val*val;
   }

   sumsig=sqrt(sumsig);

   return sumsig;
   }
 */

/*
//x: N(u1,s1),	y:N(u2,s2),	R(x,y)=r,	t=max(x,y)
//
//(1)
//E(t)=u1*cdf(beta)+u2*cdf(-beta)+alpha*pdf(beta)
//Var(t)=(u1^2+s1^2)*cdf(beta)+(u2^2+s2^2)*cdf(-beta)+(u1+u2)*alpha*pdf(beta)-[E(t)]^2
//
//	alpha=sqrt(s1^2+s2^2-2*r*s1*s2)
//	beta=(u1-u2)/alpha
//	fx=pdf(x)=1/sqrt(2pi)*exp(-x^2/2)
//	Fx=cdf(x)=1/sqrt(2pi)*Integrate[exp(-y^2/2),{y,-Inf,x}]
//
//(2)
//if R(x,z)=r1,	R(y,z)=r2,	 z is normal distribution
//then	R(t,z)=[s1*r1*cdf(beta)+s2*r2*cdf(-beta)]/sqrt[var(t)]

//
void CTiming::StatMax2v(CDelayPC& x, CDelayPC& y, CDelayPC& t)
{
float mu1=x.m_fMean;	float s1=x.m_fSigma;
float mu2=y.m_fMean;	float s2=y.m_fSigma;

#ifdef _DEBUG0
float mm=x.CalSigma();
mm=y.CalSigma();
#endif

float min1=mu1-3*s1; float max1=mu1+3*s1;
float min2=mu1-3*s1; float max2=mu2+3*s2;
if (max1<min2){
t=y;
return;
}else if (max2<min1){
t=x;
return;
}

float rho=CDelayPC::CalCorr2v(x,y);

float alpha=s1*s1+s2*s2-2*rho*s1*s2;
alpha=(alpha<0)? 0:sqrt(alpha);

//if alpha is zero, 
// t->x, mut -> u1, sigmat -> s1 (if u1>u2). vice versa.
if (alpha<ZEROVAL) {
t = (mu1>=mu2)? x : y;
return;
}

float beta=(mu1-mu2)/alpha;		
float cdfp=norm1v(beta,0,1);	//cdf(beta)
float cdfn=norm1v(-beta,0,1);	//cdf(-beta)
float pdfp=pdfnorm1v(beta,0,1);	//pdf(beta)

//cal mu(t), sigma(t)
float mut=mu1*cdfp + mu2*cdfn + alpha*pdfp;
float vart=(mu1*mu1+s1*s1)*cdfp + (mu2*mu2+s2*s2)*cdfn + (mu1+mu2)*alpha*pdfp - mut*mut;
float sigmat=sqrt(vart);

t.m_fMean=mut;
t.m_fSigma=sigmat;

//cal all coefs: cov(t,wi) (and cov(t,li) etc)
// cov(t,wi)=corr(t,wi)*sigma(t)
// find corr(t,wi) based on corr(x,wi) & corr(y,wi)

//note: assuming the delay x and y sensitive to the same number of parameters
//		and have the same number of pcs on each parameter

t.m_vCoefPC.resize(x.m_vCoefPC.size());
int i,j;
for (i=0;i<x.m_vCoefPC.size();i++){
int sizex=x.m_vCoefPC[i].size();  int sizey=x.m_vCoefPC[i].size();
int newsize=sizex>sizey? sizex:sizey;
if (newsize>0){
    t.m_vCoefPC[i].resize(newsize);
    for (j=0;j<x.m_vCoefPC[i].size();j++)
    {
	float rho1=sizex>0? (x.m_vCoefPC[i][j]/x.m_fSigma):0;
	float rho2=sizey>0? (y.m_vCoefPC[i][j]/y.m_fSigma):0;
	float covtz=(s1*rho1*cdfp+s2*rho2*cdfn);

	t.m_vCoefPC[i][j]=covtz;
    }
}
}

//normalize the coeficients by sigma
float pcsig=t.CalSigma();
float delta=sigmat/pcsig;
for (i=0;i<t.m_vCoefPC.size();i++)
for (j=0;j<t.m_vCoefPC[i].size();j++)
t.m_vCoefPC[i][j]=t.m_vCoefPC[i][j]*delta;

#ifdef _DEBUG0
bugfile<<"max-- pcsig: "<<pcsig<<" calsig"<<sigmat<<endl;
bugfile<<"mu1: "<<mu1<<" sig1: "<<s1<<" mu2: "<<mu2<<" sig2: "<<s2;
bugfile<<" diff: "<<mu1-mu2<<endl;
#endif

}
*/

/*
//x=mux+k11*w1+... +
//y=muy+k21*w1+... +
//cov(x,y)=sum(k1i*k2i)  (i.e. k11*k21+k12*k22+...)
//corr(x,y)=cov(x,y)/(sig(x)*sig(y))
float CTiming::CalCorr2v(CDelayPC& x, CDelayPC& y)
{
//note: assuming the delay x and y sensitive to the same number of parameters
//		and have the same number of pcs on each parameter

float sum=0;
int i,j;
for (i=0;i<x.m_vCoefPC.size();i++){
int sizex=x.m_vCoefPC[i].size();  int sizey=y.m_vCoefPC[i].size();
if (sizex==0 || sizey==0)
continue;
for (j=0;j<x.m_vCoefPC[i].size();j++)
{	
sum+=x.m_vCoefPC[i][j]*y.m_vCoefPC[i][j];
}
}

sum=sum/(x.m_fSigma*y.m_fSigma);

if (sum>1) sum=1;	//can this bug show up?
else if (sum<0) sum=0;
return sum;
}
 */

//max on the delays of a set of port >=2
CDelayPC* CTiming::StatMax(list<CDelayPC*>& distrList)
{
    if (distrList.size()==1){
	CDelayPC* distrRst=new CDelayPC;
	*distrRst=**distrList.begin();
	return distrRst;
    }
    list<CDelayPC*> distrList2;
    list<CDelayPC*>::iterator iter;
    while (distrList.size()>1){

	CDelayPC* distrRst=new CDelayPC;

	iter=distrList.begin();
	CDelayPC* distr1=(*iter);
	iter++;
	CDelayPC* distr2=(*iter);

	distrRst->StatMax2v(*distr1,*distr2);

	iter++;
	distrList.erase(distrList.begin(),iter);

	distrList2.push_back(distrRst);
    }
    if (distrList.size()==1){
	CDelayPC* distrRst=new CDelayPC;
	distrRst->StatMax2v(**(distrList.begin()), **(distrList2.begin()));

	distrList.erase(distrList.begin());
	delete *(distrList2.begin());
	distrList2.erase(distrList2.begin());
	distrList2.push_back(distrRst);
    }

    while (distrList2.size()>1){

	CDelayPC* distrRst=new CDelayPC;

	iter=distrList2.begin();
	CDelayPC* distr1=(*iter);
	iter++;
	CDelayPC* distr2=(*iter);

	distrRst->StatMax2v(*distr1,*distr2);

	if (distr1) delete distr1;
	if (distr2) delete distr2;
	iter++;
	distrList2.erase(distrList2.begin(),iter);

	distrList2.push_back(distrRst);
    }

    CDelayPC* rst=*(distrList2.begin());


    return rst;

}

/*
//stat add two delay in the format of coefs
void CTiming::StatSumDelay(CDelayPC& x, CDelayPC& y, CDelayPC& rst)
{
//note: assuming the delay x and y sensitive to the same number of parameters
//		and have the same number of pcs on each parameter

if (x.m_vCoefPC.size()==0){
rst=y;
return;
}
if (y.m_vCoefPC.size()==0){
rst=x;
return;
}

rst.m_vCoefPC.resize(x.m_vCoefPC.size());
int i,j;
for (i=0;i<x.m_vCoefPC.size();i++){
int sizex=x.m_vCoefPC[i].size();  int sizey=y.m_vCoefPC[i].size();
int newsize=sizex>sizey? sizex:sizey;

if (newsize>0){
rst.m_vCoefPC[i].resize(newsize);
if (!sizex){
rst.m_vCoefPC[i]=y.m_vCoefPC[i];			
}
else if (!sizey){
rst.m_vCoefPC[i]=x.m_vCoefPC[i];				
}
else{
for (j=0;j<x.m_vCoefPC[i].size();j++)
{	float val=x.m_vCoefPC[i][j]+y.m_vCoefPC[i][j];
float a=x.m_vCoefPC[i][j];
float b=y.m_vCoefPC[i][j];
rst.m_vCoefPC[i][j]=val;					
}
}
}
}

float sumsig=0;
for (i=0;i<rst.m_vCoefPC.size();i++)
for (j=0;j<rst.m_vCoefPC[i].size();j++)
sumsig+=rst.m_vCoefPC[i][j]*rst.m_vCoefPC[i][j];	
rst.m_fSigma=sqrt(sumsig);
rst.m_fMean=x.m_fMean+y.m_fMean;
}
 */

//stat find maximum among input ports of the instance
//add the maximum into the gate delay to get the path delay till this inst output

void CTiming::StatPathDelay(CPort* drvport,map<CPort*,CDelayPC> &candPath)
{
    CInst* drvinst=drvport->m_Conn->m_ConnInst;


    //do max among all pairs: input ports+inst pin-pin delay (on true paths only)
    //find max pair by pair
    map<string,CPort*>& porthash=drvinst->GetPortHash();

    CDelayPC* maxrst=NULL;
    int actPinCnt=drvinst->GetActPInCnt();
    if (actPinCnt>1){
	//multi-input gate

	//array: hold candidate paths: inport pathdelay+inst pin-pin delay
	//candPath=new CDelayPC[actPinCnt]; //!!

	//put the candidates to a list (list of pointer)
	list<CDelayPC*> distrList;

	//int count=0;
	map<string,CPort*>::iterator ptiter;
	for (ptiter=porthash.begin();ptiter!=porthash.end();++ptiter)
	{				
	    CPort* port=(*ptiter).second;
	    if (port!=drvport && port->m_bTruePath){				
		//add inport path delay to the gate pin-pin delay
		//	to get the path delay till this inst output
		candPath[port].StatSumDelay(port->m_StatPathDelay,drvinst->GetStatDelay(port->m_PortName));
#ifdef _DEBUG0
		if (drvinst->m_InstName=="G8" && port->m_PortName=="in1"){
		    bugfile<<"@@\n";
		    drvinst->GetStatDelay(port->m_PortName).PrtData();
		    port->m_StatPathDelay.PrtData();
		    candPath[port].PrtData();
		    bugfile<<"@@\n";
		}
#endif

		distrList.push_back(&(candPath[port]));				
		//count++;
	    }
	}

	maxrst=StatMax(distrList);
	drvport->m_StatPathDelay=*maxrst;

#ifdef _DEBUG0
	bugfile<<"max op after add gatedelay:";
	bugfile<<" pc:"<<maxrst->CalSigma()<<" cal"<<maxrst->m_fSigma<<endl;
#endif    


	//delete "maxrst" - rst of maximum operation/input port of single-fanin gate
	if (maxrst)  delete maxrst;
	/*!!	if (candPath) delete[] candPath; */


    }else{
	//single-input gate	
	map<string,CPort*>::iterator ptiter;
	for (ptiter=porthash.begin();ptiter!=porthash.end();++ptiter)
	{				
	    CPort* port=(*ptiter).second;
	    if (port!=drvport  && port->m_bTruePath){					
		//add the maximum into the gate delay
		//	to get the path delay till this inst output
		drvport->m_StatPathDelay.StatSumDelay(port->m_StatPathDelay,drvinst->GetStatDelay(port->m_PortName));
		break;
	    }
	}

    }

}


void CTiming::StatInstOutSlope(CPort* drvport,map<CPort*,CDelayPC>& candPath)
{
    int i,j,k;
    int flag=0;
    CInst* drvinst=drvport->m_Conn->m_ConnInst;
    drvport->m_StatSlope.m_RiseTime.ClearCoefPC();  //:)
    drvport->m_StatSlope.m_FallTime.ClearCoefPC();  //:)
    drvport->m_StatSlope.m_RiseTime.m_vCoefPC.resize(NUMPARA);
    drvport->m_StatSlope.m_FallTime.m_vCoefPC.resize(NUMPARA);

    int actPinCnt=drvinst->GetActPInCnt();
    if (actPinCnt>1){
	//>1 active inpin
	//output slope = sum{ Prob[path(xi)>maxpath(x1...xn)]*slope[inst(xi)]}
	float maxprob=-1;
	CPort* maxport=NULL;
	map<CPort*,CDelayPC>::iterator iter1,iter2;
	for (iter1=candPath.begin();iter1!=candPath.end();iter1++){
	    //max path(candPath) --- excluding path xi
	    CDelayPC* maxrst=NULL;
	    list<CDelayPC*> distrList;
	    for (iter2=candPath.begin();iter2!=candPath.end();iter2++)
		if (iter1!=iter2) distrList.push_back(&((*iter2).second)); 
	    maxrst=StatMax(distrList);

	    //prob[(path(xi) > maxpath(candPath)]  		 
	    float corr=CDelayPC::CalCorr2v((*iter1).second,*maxrst);
	    float mean=(*iter1).second.m_fMean - maxrst->m_fMean;
	    float var=(*iter1).second.m_fSigma * (*iter1).second.m_fSigma + maxrst->m_fSigma*maxrst->m_fSigma
		-2 * (*iter1).second.m_fSigma*maxrst->m_fSigma*corr;
	    float sigma;
	    if (corr>1){
		cout<<"corr>1: "<<corr<<endl; exit(0);
	    }
	    /*if (var<0 && var<-1e-5){
	      cout<<"Error in instance output slope calculation\n";
	      }*/
	    else
		sigma=(var<0)? 0:sqrt(var);

	    if (maxrst)  delete maxrst;
	    float prob;
	    if (!sigma){
		//	    if (mean>0 || (!mean && !i))
		if (mean>0 || (!mean && !flag)){
		    prob=1;
		    flag=1;
		}
		else
		    prob=0;
	    }else{
		prob=1-norm1v(0,mean,sigma);
	    }

	    //sum up -- prob*slope[inst(xi)]
	    CPort* thisport=(*iter1).first;
	    CStatSlope& thisOutslp=thisport->GetStatSlope();
	    drvport->m_StatSlope.m_RiseTime.ProbAddData(thisOutslp.m_RiseTime,prob);
	    drvport->m_StatSlope.m_FallTime.ProbAddData(thisOutslp.m_FallTime,prob);

	    if (maxprob<prob){
		maxprob=prob;
		maxport=thisport;
	    }

	}
	drvport->m_StatSlope.m_RiseTime.m_fSigma=drvport->m_StatSlope.m_RiseTime.CalSigma();
	drvport->m_StatSlope.m_FallTime.m_fSigma=drvport->m_StatSlope.m_FallTime.CalSigma();

	drvinst->m_pStatCrtPort=maxport;

    }else{
	CPort* thisport;
	map<string,CPort*>& porthash=drvinst->GetPortHash();
	map<string,CPort*>::iterator ptiter;
	for (ptiter=porthash.begin();ptiter!=porthash.end();++ptiter){				
	    thisport=(*ptiter).second;
	    if (thisport!=drvport  && thisport->m_bTruePath)
		break;
	}
	//only 1 active inpin
	drvport->m_StatSlope.m_RiseTime=thisport->m_StatSlope.m_RiseTime;
	drvport->m_StatSlope.m_FallTime=thisport->m_StatSlope.m_FallTime;
	drvinst->m_pStatCrtPort=thisport;
    }

    //merge random components
    drvport->m_StatSlope.m_RiseTime.IndMg2Rand();
    drvport->m_StatSlope.m_FallTime.IndMg2Rand();
    drvport->m_StatPathDelay.IndMg2Rand();
    //if (statInstTrTf) delete[] statInstTrTf;
    //if (candPath) delete[] candPath;
}


//stat calculate net delay from drvport to all fanout ports in pair
void CTiming::StatNetDelay(CNet* net,CPort* drvport)
{
}


//given the delay at drvport
//stat find the delay at the fanout-ports of the drvnet
//Nowatime(considering only gatedelay), only copy the drvport delay to all fanout-ports
void CTiming::StatCalNetDepTime(CNet* drvnet, CPort* drvport,bool flag/*=true*/)
{
    //stat calculate net delay at each 
    //
    //add delay of drvport to the delay of drvnet to get delay at the fanout-ports of the drvnet
    //
    //Nowatime, only copy the drvport delay to all fanout-ports


    list<CConn*>& connlist=drvnet->GetNetConnList();
    list<CConn*>::iterator conniter;
    for (conniter=connlist.begin();conniter!=connlist.end();++conniter)
    {
	CConn* conn=*conniter;
	CPort* port=conn->m_ConnPort;

	//write the pc_coefs only to those ports on the true paths
	if(drvport!=port && port->m_bTruePath)
	{	string sinkName;
	    if (conn->m_ConnInst)
		sinkName=conn->m_ConnInst->m_InstName+"-"+port->m_PortName;
	    else
		sinkName=port->m_PortName;
	    //:) CGridDelayCoef& coeflist=drvnet->GetStatDelay(sinkName);
	    //:) CDelayPC netSinkDelay;				
	    //:) GridCoef2PC(coeflist,netSinkDelay);
	    CDelayPC& netSinkDelay=drvnet->GetStatDelay(sinkName);

	    port->m_StatPathDelay.StatSumDelay(drvport->m_StatPathDelay,netSinkDelay);

	    //add drvport->outputslope into netsinkdelay to get port->inputslope
	    //## moved to statinstdelay as local variable
	    if (flag){
		port->m_StatInSlope.m_RiseTime.StatSumDelay(drvport->GetStatSlope().m_RiseTime,netSinkDelay);
		port->m_StatInSlope.m_FallTime.StatSumDelay(drvport->GetStatSlope().m_FallTime,netSinkDelay);
	    }

	}
    }

    //since all pca_coefs are copied to the fanout-ports, we can:
    //remove delaycoef at the drvnet and loadderv at drvport
    //!!
#ifndef _GSIZE
    drvnet->ClearStatDelay();
    drvport->m_mLoadDerv.Clear();

    //remove pc_coefs of gate/path delay at all in/out ports of the drv inst (or the PI port)
    CInst* drvinst=drvport->m_Conn->m_ConnInst;
    if (drvinst){
	map<string,CPort*>& porthash=drvinst->GetPortHash();
	map<string,CPort*>::iterator ptiter;
	for (ptiter=porthash.begin();ptiter!=porthash.end();++ptiter)
	{           
	    CPort* port=(*ptiter).second;
	    if (m_pCkt->FindDffPO(drvinst->m_InstName,port->m_PortName) && port->m_bTruePath)
		continue;
	    //port->m_GateDelay.ClearCoefPC();
	    port->m_StatPathDelay.ClearCoefPC();
	    port->m_StatSlope.m_RiseTime.ClearCoefPC();
	    port->m_StatSlope.m_FallTime.ClearCoefPC();
	    port->m_StatInSlope.m_RiseTime.ClearCoefPC();
	    port->m_StatInSlope.m_FallTime.ClearCoefPC();
	}
	
	map<string,CDelayPC>::iterator iter;
	for (iter=drvinst->m_StatDelayHash.begin();iter!=drvinst->m_StatDelayHash.end();iter++)
	{
	    (*iter).second.ClearCoefPC();
	}
	map<string,CStatSlope>::iterator iter2;
	for (iter2=drvinst->m_StatTrTfHash.begin();iter2!=drvinst->m_StatTrTfHash.end();iter2++)
	{
	    (*iter2).second.m_RiseTime.ClearCoefPC();
	    (*iter2).second.m_FallTime.ClearCoefPC();
	}
	    
    }else{
	drvport->m_StatPathDelay.ClearCoefPC();
	drvport->m_StatSlope.m_RiseTime.ClearCoefPC();
	drvport->m_StatSlope.m_FallTime.ClearCoefPC();
    }
#endif
    //remove input/output slope at all in/out ports of the drvinst 

}




CDelayPC* CTiming::StatCktMax()
{
    list<CPort*>::iterator lptiter;
    CDelayPC* maxrst=NULL;

    //browse the PO/DffPO on true paths only
    if (m_lTruePO.size()>1)
    {	//if more than one PO/DffPO considered

	//copy the port pointer to the distrList
	list<CDelayPC*> distrList;
	for (lptiter=m_lTruePO.begin();lptiter!=m_lTruePO.end();++lptiter)
	{
	    CPort *port=(*lptiter);
	    distrList.push_back(&(port->m_StatPathDelay));
	}

	maxrst=StatMax(distrList);



    }else{
	lptiter=m_lTruePO.begin();
	maxrst=&((*lptiter)->m_StatPathDelay);

    }

    return maxrst;

}



//statistical pert-like traversal algorithm 
//to get discrete PDF
CDelayPC* CTiming::StatPert()
{
    //for PI and DffPI ports,
    //set the PI/DffPI ports arrival time
    //set the arrival time of nets they drive (init)
    //add net delay into port arrival time to get arrival time of all fanout ports on the net

    map<string,CPort*>::iterator ptiter;
    list<CPort*>::iterator lptiter;

    //browse the PI/DffPI on true paths only	
    for (lptiter=m_lTruePI.begin();lptiter!=m_lTruePI.end();++lptiter)
    {
	CPort *port=(*lptiter);
	port->SetArrivalTime(0);
	port->m_StatPathDelay.m_fMean=port->m_StatPathDelay.m_fSigma=0;
	CNet* net=port->m_Conn->m_ConnNet;
	net->SetArrivalTime(0);
	net->m_pTree->StatCalNodeRC(net->m_pTree->m_pSrcNode);
	net->m_pTree->StatCalDelay();
	StatCalNetDepTime(net,port);
    }


    //browse the inst Queue (instance on the true paths)
    list<CInst*>::iterator institer;   
    for (institer=m_lInstSeq.begin();institer!=m_lInstSeq.end();++institer)
    {

	CInst* drvinst=*institer;


	//find the output port and net driven by the instance 
	CPort *drvport;
	map<string,CPort*>& porthash=drvinst->GetPortHash();
	for (ptiter=porthash.begin();ptiter!=porthash.end();++ptiter)
	{           
	    CPort* port=(*ptiter).second;
	    if(port->m_Direction=="output")
	    {
		//the output port of the instance
		drvport=port; 
		break;
	    }
	}
	CNet* drvnet=drvport->m_Conn->m_ConnNet;


	//stat find the tree delay and the derivative of load on the tree for later calculations
	drvnet->m_pTree->StatCalNodeRC(drvnet->m_pTree->m_pSrcNode);

	//stat find instdelay 
	map<string,CStatSlope>& statInstTrTf=drvinst->m_StatTrTfHash;
	StatInstDelay(drvport,statInstTrTf);
#ifdef _DEBUG  //::
	PrtStatInstD(drvport);
#endif

	//maximum operation on all paths arriving at drvport
	map<CPort*,CDelayPC> candPath;
	StatPathDelay(drvport,candPath);
#ifdef _DEBUG //::
	PrtStatPathD(drvport);
#endif

	//stat find output slope of the instance
	StatInstOutSlope(drvport,candPath);

	//using the delay of the net that this inst drives
	//to update the arrival time of the ports on the fanout of the net
	//to update the input slope of these ports
	drvnet->m_pTree->StatCalDelay();
	StatCalNetDepTime(drvnet,drvport);
#ifdef _DEBUG0
	PrtStatInstD(drvport);
#endif


    }//for loop of instance queue


    //find pdf of max among PO,DFFPO
    return StatCktMax();




}

void CTiming::PrtStatInstD(CPort* drvport)
{
    CInst* drvinst=drvport->m_Conn->m_ConnInst;
    bugfile<<"delay of inst: "<<drvinst->m_InstName<<endl;

    map<string,CPort*>::iterator ptiter;
    map<string,CPort*>& porthash=drvinst->GetPortHash();
    for (ptiter=porthash.begin();ptiter!=porthash.end();++ptiter)
    {
	CPort *port=(*ptiter).second;
	if (port!=drvport){
	    bugfile<<"port "<<port->m_PortName<<endl;
	    drvinst->GetStatDelay(port->m_PortName).PrtData();
	    //port->m_GateDelay.PrtData();
	    //			bugfile<<"input slope:\n ";
	    //			bugfile<<"Tr: ";
	    //			port->GetStatSlope().m_RiseTime.PrtData();
	    //			bugfile<<"Tf: ";
	    //			port->GetStatSlope().m_FallTime.PrtData();
	}
    }
    //   bugfile<<"slope at outport of inst:"<<endl;
    //   bugfile<<"  Tr: ";
    //   drvport->m_StatSlope.m_RiseTime.PrtData();
    //   bugfile<<"  Tf: ";
    //   drvport->m_StatSlope.m_FallTime.PrtData();

}

void CTiming::PrtStatPathD(CPort* drvport)
{
    CInst* drvinst=drvport->m_Conn->m_ConnInst;
    bugfile<<"path delay till inst: "<<drvinst->m_InstName<<endl;

    bugfile<<"port "<<drvport->m_PortName<<endl;
    drvport->m_StatPathDelay.PrtData();
}


//print fanin, fanout of inst on true paths
void CTiming::PrtTPInstQ()
{

#ifdef _DEBUG
    cout<<"size of the queue:"<<m_lInstSeq.size()<<endl;

    //browse the inst Queue
    list<CInst*>::iterator institer;
    for (institer=m_lInstSeq.begin();institer!=m_lInstSeq.end();++institer)
    {
	CInst* drvinst=*institer;
	cout<<drvinst->m_InstName<<" "<<drvinst->m_iUnActPInCnt<<" "<<drvinst->m_iTPFoutNum<<endl;
    }
#endif
}


//count the number of True fanout
int CTiming::CntTFout(CPort* drvport, CNet* drvnet)
{

    int count=0;
    list<CConn*>& connlist=drvnet->GetNetConnList();
    list<CConn*>::iterator conniter;
    for (conniter=connlist.begin();conniter!=connlist.end();++conniter)
    {
	CConn* conn=*conniter;
	if(drvport!=conn->m_ConnPort && conn->m_ConnPort->m_bTruePath){
	    count++;
	}
    }

    return count;
}


//no removing non-critical paths, copy all PI,PO and inst to the list
void CTiming::InitNoRmFPath()
{
    map<string,CPort*>::iterator ptiter;	
    for (ptiter=m_pCkt->m_POHash.begin();ptiter!=m_pCkt->m_POHash.end();++ptiter)
    {
	CPort* port=(*ptiter).second;		
	AddTruePO(port);
    }
    for (ptiter=m_pCkt->m_DffPOHash.begin();ptiter!=m_pCkt->m_DffPOHash.end();++ptiter)
    {
	CPort* port=(*ptiter).second;
	AddTruePO(port);
    }
    for (ptiter=m_pCkt->m_PIHash.begin();ptiter!=m_pCkt->m_PIHash.end();++ptiter)
    {
	CPort* port=(*ptiter).second;
	AddTruePI(port);
    }
    for (ptiter=m_pCkt->m_DffPIHash.begin();ptiter!=m_pCkt->m_DffPIHash.end();++ptiter)
    {
	CPort* port=(*ptiter).second;		
	AddTruePI(port);
    }
    list<CInst*>::iterator institer;
    for (institer=m_lInstSeq.begin();institer!=m_lInstSeq.end();++institer)
    {
	CInst* drvinst=*institer;
	drvinst->m_iUnActPInCnt=drvinst->GetInPinNum();
    }

}

//remove non-critical paths
void CTiming::InitRmFPath()
{

    //browse the inst Queue
    list<CInst*>::iterator institer;
    for (institer=m_lInstSeq.begin();institer!=m_lInstSeq.end();++institer)
    {
	CInst* drvinst=*institer;
	map<string,CPort*>& porthash=drvinst->GetPortHash();
	int fpin=RemoveFPath(&porthash, 2);
	drvinst->m_iUnActPInCnt=drvinst->GetInPinNum()-fpin;
    }

    RemoveFPath(&(m_pCkt->m_POHash),1);
    RemoveFPath(&(m_pCkt->m_DffPOHash),1);
    RemoveFPath2(&(m_pCkt->m_POHash),&(m_pCkt->m_DffPOHash),1);


    map<string,CPort*>::iterator ptiter;
    //update PO/DFFPO list to store true paths only
    for (ptiter=m_pCkt->m_POHash.begin();ptiter!=m_pCkt->m_POHash.end();++ptiter)
    {
	CPort* port=(*ptiter).second;
	if (port->m_bTruePath)
	    AddTruePO(port);
    }
    for (ptiter=m_pCkt->m_DffPOHash.begin();ptiter!=m_pCkt->m_DffPOHash.end();++ptiter)
    {
	CPort* port=(*ptiter).second;
	if (port->m_bTruePath)
	    AddTruePO(port);
    }

    //trace back to remove non-critical paths
    institer=m_lInstSeq.end();
    do{
	institer--;
	CInst *drvinst=*institer;
	map<string,CPort*>& porthash=drvinst->GetPortHash();

	CPort* drvport;
	map<string,CPort*>::iterator ptiter;
	for (ptiter=porthash.begin();ptiter!=porthash.end();++ptiter)
	{
	    CPort* port=(*ptiter).second;
	    if(port->m_Direction=="output")
	    {//the net that the instance drives
		drvport=port;
		break;
	    }
	}
	CNet* drvnet=drvport->m_Conn->m_ConnNet;


	int flag=CntTFout(drvport,drvnet);


	//if all fanout ports are false
	//	remove the instance from the queue 
	//	set all its input ports as false
	//	set acpin=0;
	//else
	//	set acpin=flag
	if (!flag){

	    for (ptiter=porthash.begin();ptiter!=porthash.end();++ptiter)
	    {
		CPort* port=(*ptiter).second;
		if(port->m_Direction=="input"){
		    port->m_bTruePath=false;
		}
	    }
	    drvinst->m_iUnActPInCnt=0;

	    //remove the inst from the queue
	    institer=m_lInstSeq.erase(institer);

	}
	drvinst->m_iTPFoutNum=flag;



    }while (institer!=m_lInstSeq.begin());


    //update PI/DFFPI list to store true paths
    for (ptiter=m_pCkt->m_PIHash.begin();ptiter!=m_pCkt->m_PIHash.end();++ptiter)
    {
	CPort* port=(*ptiter).second;		
	int flag=CntTFout(port,port->m_Conn->m_ConnNet);
	if (flag)
	    AddTruePI(port);
    }
    for (ptiter=m_pCkt->m_DffPIHash.begin();ptiter!=m_pCkt->m_DffPIHash.end();++ptiter)
    {
	CPort* port=(*ptiter).second;		
	int flag=CntTFout(port,port->m_Conn->m_ConnNet);
	if (flag)
	    AddTruePI(port);
    }

}


//option-	0: PO/DffPO pdf 
//			1: PO/DFFPO delay value
//			2: instance input ports delay value
int CTiming::RemoveFPath(map<string,CPort*> *porthash, 
			 int option/*=0*/)
{
    map<string,CPort*>::iterator ptiter,ptiter2;
    int count=0;

    ptiter=porthash->begin();
    for (;ptiter!=porthash->end();++ptiter)
    {
	CPort* port=(*ptiter).second;
	if (port->m_bTruePath){
	    float min,max;
	    if (option==0){
		min=port->m_vPathPdf[0].point;
		max=port->m_vPathPdf[port->m_vPathPdf.size()-1].point;
	    }else if ((option==1) || (option==2)&&(port->m_Direction=="input")){
		float val=port->GetArrivalTime();
		min=val*(1-RNGMINMAX);
		if (min<0)  min=0;   
		max=val*(1+RNGMINMAX);
	    }else{
		continue;
	    }

	    ptiter2=ptiter;
	    ptiter2++;
	    for (;ptiter2!=porthash->end();++ptiter2)
	    {
		CPort* port2=(*ptiter2).second;
		if (port2->m_bTruePath){
		    float min2,max2;
		    if (option==0){
			min2=port2->m_vPathPdf[0].point;
			max2=port2->m_vPathPdf[port2->m_vPathPdf.size()-1].point;
		    }else if ((option==1) || (option==2)&&(port2->m_Direction=="input")){
			float val=port2->GetArrivalTime();
			min2=val*(1-RNGMINMAX);
			if (min2<0) min2=0;
			max2=val*(1+RNGMINMAX);
		    }else{
			continue;
		    }
		    if (!max && !max2)
			continue;
		    if (max<=min2){
			port->m_bTruePath=false;
			count++;
			break;
		    }
		    else if (max2<=min){
			port2->m_bTruePath=false;
			count++;
		    }
		}
	    }

	}
    }    

    return count;

}


void CTiming::RemoveFPath2(map<string,CPort*> *porthash, map<string,CPort*> *porthash2,
			   int option/*=0*/)
{
    map<string,CPort*>::iterator ptiter,ptiter2;

    ptiter=porthash->begin();
    for (;ptiter!=porthash->end();++ptiter)
    {
	CPort* port=(*ptiter).second;
	if (port->m_bTruePath){
	    float min,max;
	    if (option==0){
		min=port->m_vPathPdf[0].point;
		max=port->m_vPathPdf[port->m_vPathPdf.size()-1].point;
	    }else if ((option==1) || (option==2)&&(port->m_Direction=="input")){
		float val=port->GetArrivalTime();
		min=val*(1-RNGMINMAX);
		if (min<0)  min=0;   
		max=val*(1+RNGMINMAX);
	    }else{
		continue;
	    }


	    ptiter2=porthash2->begin();
	    for (;ptiter2!=porthash2->end();++ptiter2)
	    {
		CPort* port2=(*ptiter2).second;
		if (port2->m_bTruePath){
		    float min2,max2;
		    if (option==0){
			min2=port2->m_vPathPdf[0].point;
			max2=port2->m_vPathPdf[port2->m_vPathPdf.size()-1].point;
		    }else if ((option==1) || (option==2)&&(port2->m_Direction=="input")){
			float val=port2->GetArrivalTime();
			min2=val*(1-RNGMINMAX);
			if (min2<0) min2=0;
			max2=val*(1+RNGMINMAX);

		    }else{
			continue;
		    }

		    if (!max && !max2)
			continue;
		    if (max<=min2) 
			port->m_bTruePath=false;
		    else if (max2<=min)
			port2->m_bTruePath=false;
		}
	    }	

	}
    }    

}





//calculate the correlated probabiltiy of intervals from 2 normal random variables
float CTiming::CalProbNorm2(float mu_w,float sigma_w,float wa,float wb,
			    float mu_l,float sigma_l,float la,float lb,
			    float r)
{
    doublereal x[2],y[2],rho;
    x[0]=(doublereal)((wa-mu_w)/sigma_w);
    x[1]=(doublereal)((wb-mu_w)/sigma_w);
    y[0]=(doublereal)((la-mu_l)/sigma_l);
    y[1]=(doublereal)((lb-mu_l)/sigma_l);
    rho=(doublereal) r;
    float corrp=(float)discnorm2v(x,y,&rho);
    //bugfile<<"N[Integrate[f,{x,"<<x[0]<<","<<x[1]<<"},{y,"<<y[0]<<","<<y[1]<<"}]]  "<<corrp<<endl;

    return corrp;
}


/*int CTiming::GridIndex(Loc& gloc)
  {
  return (int)(gloc.x+gloc.y*m_pCorrModel->m_iGridSizeX);
  }*/


/////////////////////////////////////////////////////////
// write net connection and placement information to a file (cktname.net)
//		(.tech file will not be created here)
//
// format of cktname.net file:
// Circuit <cktname>  bounding box       lox loy hix hiy  
// #sink format: name x y RAT cell name
// NET new18
//   Source p3_1   13180.69    7411.08            cell C_0
//   Sink   p22_1   11403.11     800.00 4.1188e-10 cell C_1
// ENDNET
////////////////////////////////////////////////////////
/*void CTiming::WriteRouteFmt(string fname, string pathname, string cktname)
  {
  ReadInstLoc(fname);

//give locations to all ports (PI, PO and inst ports)
UdtPortLoc();

//write .net used for router
OutRouteNetFile(pathname, cktname);
}


void CTiming::OutRouteNetFile(string pathname,string cktname)
{
//  	string fname=pathname+"\\"+cktname+".net";
string fname=pathname+"/"+cktname+".net";
ofstream fmtfile;
fmtfile.open(fname.c_str(),ios::out);
fmtfile.setf(ios::fixed,ios::floatfield);
fmtfile.precision(2);


fmtfile<<"Circuit "<<cktname<<"  bounding box "<<m_pCkt->m_LowLeft<<" "<<m_pCkt->m_UpRight<<endl;
fmtfile<<"#sink format: name x y RAT cell name"<<endl;

map<string,CNet*>::iterator netiter;
for (netiter=m_pCkt->m_NetHash.begin();netiter!=m_pCkt->m_NetHash.end();++netiter)
{
CNet *net=(*netiter).second;

fmtfile<<"NET "<<net->m_NetName<<endl;

CInst* drvinst;
CPort* drvport;
list<CConn*>& connlist=net->GetNetConnList();
list<CConn*>::iterator conniter;
for (conniter=connlist.begin();conniter!=connlist.end();++conniter)
{
CConn* conn=*conniter;
CInst* inst=conn->m_ConnInst;
CPort* port=conn->m_ConnPort;
if (port->m_Direction=="output" || m_pCkt->FindPI(port->m_PortName)){
drvport=port;
drvinst=inst;
}
}
fmtfile<<"  Source ";
if (drvinst)
fmtfile<<drvinst->m_InstName+"_"+drvport->m_PortName<<"\t"<<drvport->m_Loc<<"    cell "<<drvinst->m_CellType<<endl;
else
fmtfile<<drvport->m_PortName<<"\t"<<drvport->m_Loc<<"    cell PAD"<<endl;


for (conniter=connlist.begin();conniter!=connlist.end();++conniter)
{
CConn* conn=*conniter;
CInst* inst=conn->m_ConnInst;
CPort* port=conn->m_ConnPort;
if (port!=drvport){					
fmtfile<<"  Sink   ";			
if (inst)
fmtfile<<inst->m_InstName+"_"+port->m_PortName<<"\t"<<port->m_Loc<<" -1 cell "<<inst->m_CellType<<endl;
else
fmtfile<<port->m_PortName<<"\t"<<port->m_Loc<<"	-1 cell PAD"<<endl;

}
}

fmtfile<<"ENDNET"<<endl;

}

}

//give locations to all ports (PI, PO and inst ports), according to inst locations
void CTiming::UdtPortLoc()
{
    //assign locations for all ports of instances
    map<string,CInst*>::iterator institer=m_pCkt->m_InstHash.begin();
    for (;institer!=m_pCkt->m_InstHash.end();++institer)
    {
	CInst *inst=(*institer).second;	
	Loc instLoc=inst->m_Loc;		

	int count=0;
	map<string,CPort*>::iterator ptiter;
	for (ptiter=inst->m_PortHash.begin();ptiter!=inst->m_PortHash.end();++ptiter)
	{
	    CPort *port=(*ptiter).second;
	    //port->m_Loc.x=instLoc.x;	port->m_Loc.y=instLoc.y-4+count;
	    port->m_Loc.x=instLoc.x;	port->m_Loc.y=instLoc.y;
	    count++;
	}

    }

}
*/

////////////////////////////////////////////////////////////
// for test purpose only
////////////////////////////////////////////////////////////
void CTiming::Test(){

    //PreTiming(fname,1);

    //test for "find corr paths"
    //TestGetCorrPath();

    //test for the grids the path traverses
    //TestPath();

    //test for "find how many interconnects jump from one grid to another"
    //TestDiffGrid();

    //test for "find how many multi-fanin gates have fanins in the same next-grid
    // TestFaninGrid(fname);

    // test if cut small prob still ok
    TestCut();

}


void CTiming::TestCut() 
{
    //w/l*(w'l')
    //w/l*(w'l')

    int k,m,i,j;

    float distrN2[PDISCSIZE][PDISCSIZE];	//r=0
    float distrN2C[PDISCSIZE][PDISCSIZE];	//r=0.5
    float distrN2_c[PDISCSIZE][PDISCSIZE];	//r=0
    float distrN2C_c[PDISCSIZE][PDISCSIZE];	//r=0.5
    float distrCut[PDISCSIZE][PDISCSIZE][PDISCSIZE][PDISCSIZE];

    readCdfN01();

    float r=0.5;
    float min=-SPREAD; 
    float step=2*SPREAD/PDISCSIZE;
    for (k=0;k<PDISCSIZE;k++)
    {
	float a=min+k*step;
	float b=a+step;
	float cdf=discnorm1v(a,b,0,1);
	distrN01[k]=cdf;
    }


#define SMALLPROB 1E-5
    float sumN2=0, sumN2C=0;
    for (k=0;k<PDISCSIZE;k++)
    {
	float a=min+k*step;
	float b=a+step;
	float p1=distrN01[k];
	for (m=0;m<PDISCSIZE;m++)
	{
	    float a2=min+m*step;
	    float b2=a2+step;
	    float p2=distrN01[m];
	    float prob=p1*p2;
	    distrN2[k][m]=prob;
	    if (distrN2[k][m]<SMALLPROB){
		sumN2+=distrN2[k][m];
		distrN2_c[k][m]=0;
	    }else{
		distrN2_c[k][m]=prob;
	    }
	    prob=CalProbNorm2(0,1,a,b,0,1,a2,b2,r);
	    distrN2C[k][m]=prob;
	    if (distrN2C[k][m]<SMALLPROB){
		sumN2C+=distrN2C[k][m];
		distrN2C_c[k][m]=0;
	    }else{
		distrN2C_c[k][m]=prob;
	    }
	}
    }


    for (k=0;k<PDISCSIZE;k++)
    {
	for (m=0;m<PDISCSIZE;m++)
	{
	    distrN2_c[k][m]=distrN2_c[k][m]/(1-sumN2);
	    distrN2C_c[k][m]=distrN2C_c[k][m]/(1-sumN2C);
	}
    }

    float sumCut=0;
    for (i=0;i<PDISCSIZE;i++)
	for (j=0;j<PDISCSIZE;j++)
	    for (k=0;k<PDISCSIZE;k++)
		for (m=0;m<PDISCSIZE;m++)
		{
		    float prob=distrN2C_c[i][j]*distrN2C_c[k][m];
		    if (prob<SMALLPROB){
			sumCut+=prob;
			distrCut[i][j][k][m]=0;
		    }else{
			distrCut[i][j][k][m]=prob;
		    }
		}
    for (i=0;i<PDISCSIZE;i++)
	for (j=0;j<PDISCSIZE;j++)
	    for (k=0;k<PDISCSIZE;k++)
		for (m=0;m<PDISCSIZE;m++)
		{
		    distrCut[i][j][k][m]=distrCut[i][j][k][m]/(1-sumCut);
		}


#define GNum 3
    float w[GNum][PDISCSIZE],l[GNum][PDISCSIZE]; 
    float mu[GNum],sig[GNum];
    for (i=0;i<GNum;i++){
	mu[i]=1+i*0.1;
	sig[i]=0.1*mu[i];
    }
    for (i=0;i<GNum;i++){
	float min=mu[i]-3*sig[i];
	float step=sig[i];
	for (j=0;j<PDISCSIZE;j++)
	{
	    w[i][j]=min+j*step; 
	    l[i][j]=min+j*step;
	}
    }

    float minV,maxV,stepV;
    printf("min:");
    scanf("%f",&minV);
    printf("min:");
    scanf("%f",&maxV);
    stepV=(maxV-minV)/RSTDISCSIZE;
    DISTRIB slot[RSTDISCSIZE],slot_c[RSTDISCSIZE];
    for (i=0;i<RSTDISCSIZE;i++)
    {
	slot[i].intva=slot_c[i].intva=minV+i*stepV;
	slot[i].intvb=slot_c[i].intvb=minV+(i+1)*stepV;
	slot[i].prob=slot_c[i].prob=0;
    }
    for (i=0;i<PDISCSIZE;i++)
	for (j=0;j<PDISCSIZE;j++)
	    for (k=0;k<PDISCSIZE;k++)
		for (m=0;m<PDISCSIZE;m++)
		{
		    float val=w[0][i]/l[0][j]*w[1][k]*l[1][m];
		    float prob=distrN2C[i][k] * distrN2C[j][m];
		    float prob_c=distrCut[i][k][j][m];

		    int ind=(int)((val-minV)/stepV);
		    if (ind<0) ind=0;
		    else if (ind>=RSTDISCSIZE) ind=RSTDISCSIZE-1;
		    slot[ind].prob+=prob;
		    slot_c[ind].prob+=prob_c;

		}

    cout<<"no cut\n";
    for (i=0;i<PDISCSIZE;i++)
    {
	cout<<slot[i].intva<<" "<<slot[i].intvb<<" "<<slot[i].prob<<endl;
    }

    cout<<"with cut\n";
    for (i=0;i<PDISCSIZE;i++)
    {
	cout<<slot_c[i].intva<<" "<<slot_c[i].intvb<<" "<<slot_c[i].prob<<endl;
    }

}



void CTiming::TestFaninGrid()
{      
    //    ReadInstLoc(fname);	
    m_pCorrModel->UdtDelta(m_pCkt);

    map<string,CInst*>::iterator institer;
    map<string,CPort*>::iterator ptiter;
    int totalcount=0,nextcount=0,multicount=0,max=-1;
    int count,flag;
    institer=m_pCkt->m_InstHash.begin();
    for (;institer!=m_pCkt->m_InstHash.end();++institer){
	CInst *inst=(*institer).second;
	float drvx=inst->m_Grid.x;
	float drvy=inst->m_Grid.y;
	map<string,CPort*>& porthash=inst->GetPortHash(); 
	count=0;
	flag=-1;
	float lastgridx=-1,lastgridy=-1;
	for (ptiter=porthash.begin();ptiter!=porthash.end();++ptiter)
	{               
	    CPort* port=(*ptiter).second;
	    if(port->m_Direction=="input")
	    {
		CConn* conn=port->m_Conn->m_ConnNet->GetNetDrv()->m_Conn;
		if (conn->m_ConnInst && !m_pCkt->FindDffPI(inst->m_InstName,port->m_PortName)){
		    float x=conn->m_ConnInst->m_Grid.x;
		    float y=conn->m_ConnInst->m_Grid.y;
		    if (lastgridx==-1){
			lastgridx=x;
			lastgridy=y;
			count++;
		    }else if ((lastgridx==x && abs(lastgridy-y)==1)||
			      (lastgridy==y && abs(lastgridx-x)==1)){
			if ((x!=drvx|| y!=drvy) && (lastgridx!=drvx|| lastgridy!=drvy)){
			    flag=2;
			}
			count++;	
		    }else if(lastgridx!=x || lastgridy!=y){
			flag=0;
			count++;
			break;
		    }else{
			flag=1;
			count++;
		    }
		}
	    }
	}
	max=max>count?max:count;
	if (count>1){
	    multicount++;
	    if (flag==1 &&  (drvx!=lastgridx || drvy!=lastgridy))  {
		totalcount++;
		cout<<"same grid instance:"<<inst->m_InstName<<endl;
	    }
	    if (flag==2){
		nextcount++;
		cout<<"next grid instance:"<<inst->m_InstName<<endl;
	    }
	}
    }

    cout<<"max fanout#:"<<max<<endl;
    cout<<"fanin in next grid:"<<nextcount<<endl;
    cout<<"fanin in same grid:"<<totalcount<<endl;
    cout<<"total instances with multi-fanin:"<<multicount<<endl;
}


// for test purpose
// how many driver-reciever pairs belongs to different grids?
void CTiming::TestDiffGrid(){

    int countDiffGrid=0,countAdjGrid=0,countSameGrid=0;

    list<CInst*>::iterator institer;
    institer=m_lInstSeq.begin();
    for (;institer!=m_lInstSeq.end();++institer)
    {
	//browse the queue of instance
	CInst* inst=*institer;
	float drvx=inst->m_Grid.x;	
	float drvy=inst->m_Grid.y;	

	map<string,CPort*>& porthash=inst->GetPortHash();
	map<string,CPort*>::iterator ptiter;       
	for (ptiter=porthash.begin();ptiter!=porthash.end();++ptiter)
	{
	    CPort* port=(*ptiter).second;
	    if(port->m_Direction=="output")
	    {
		// find the grid locations of all the recievers
		CNet* net=port->m_Conn->m_ConnNet;
		list<CConn*>& connlist=net->GetNetConnList();

		list<CConn*>::iterator conniter;
		conniter=connlist.begin();
		for (;conniter!=connlist.end();++conniter)
		{ 
		    CConn* conn=*conniter;
		    CInst* recInst=conn->m_ConnInst;
		    if ((!recInst) || recInst->m_CellType=="DFF1")
			continue;
		    if ((recInst->m_Grid.x!=drvx) || (recInst->m_Grid.y!=drvy)){
			countDiffGrid++;
			int dx=(int)recInst->m_Grid.x-(int)drvx;
			int dy=(int)recInst->m_Grid.y-(int)drvy;
			cout<<dx<<" "<<" "<<dy<<" "<<sqrt(dx*dx+dy*dy)<<endl;
			if (sqrt(dx*dx+dy*dy)==1)
			    countAdjGrid++;
		    }else{
			countSameGrid++;
		    }
		}
	    }
	}
    }

    cout << "drv-rec in same grid: "<< countSameGrid<<endl;
    cout << "drv-rec in diff grid: "<< countDiffGrid<<endl;
    cout << "drv-rec in diff adj grid: "<< countAdjGrid<<endl;
}


//for test purpose
// find for each PO/DffPO the grids it traverses
void CTiming::TestPath()
{
    //browse the inst Queue
    list<CInst*>::iterator institer;   
    for (institer=m_lInstSeq.begin();institer!=m_lInstSeq.end();++institer)
    {
	CInst* drvinst=*institer;
	map<string,CPort*>& porthash=drvinst->GetPortHash();

	int instloc=(int)drvinst->m_Grid.index;
	if (drvinst->m_iLevel==1)
	    drvinst->m_PathGrid[instloc]=1;	

	//browse the ports of the instance
	//find the net driven by the instance
	CNet* drvnet;        
	CPort* drvport;
	map<string,CPort*>::iterator ptiter;          
	for (ptiter=porthash.begin();ptiter!=porthash.end();++ptiter)
	{           
	    CPort* port=(*ptiter).second;
	    if(port->m_Direction=="output")
	    {//the net that the instance drives            
		drvnet=port->m_Conn->m_ConnNet;
		drvport=port; 
		break;
	    }
	}

	//add the instance's grid to its fanout instances
	list<CConn*>& connlist=drvnet->GetNetConnList();
	list<CConn*>::iterator conniter;               
	for (conniter=connlist.begin();conniter!=connlist.end();++conniter)
	{
	    CConn* conn=*conniter;
	    if((conn->m_ConnPort->m_Direction=="input")&&(conn->m_ConnInst!=NULL))
	    {   

		map<int,int>::iterator pgit;
		pgit=drvinst->m_PathGrid.begin();
		for (;pgit!=drvinst->m_PathGrid.end();++pgit)
		{
		    conn->m_ConnInst->m_PathGrid[(*pgit).first]+=(*pgit).second;
		}

		int loc=(int)conn->m_ConnInst->m_Grid.index;
		conn->m_ConnInst->m_PathGrid[loc]+=1;
		//print the path-grid at PO/DffPO
		if (m_pCkt->FindDffPO(conn->m_ConnInst->m_InstName,conn->m_ConnPort->m_PortName))
		{
		    bugfile<<"***DffPO:"<<conn->m_ConnInst->m_InstName<<endl;
		    map<int,int>::iterator pgit;
		    pgit=conn->m_ConnInst->m_PathGrid.begin();
		    for (;pgit!=conn->m_ConnInst->m_PathGrid.end();++pgit)
		    {   
			bugfile<<"Grid:"<< (*pgit).first;
			bugfile<<", num_of_Inst:"<< (*pgit).second<<endl;
		    }
		}
	    }

	    //print the path-grid at PO
	    if (m_pCkt->FindPO(conn->m_ConnPort->m_PortName))
	    {
		bugfile<<"***PO:"<<conn->m_ConnPort->m_PortName<<endl;
		map<int,int>::iterator pgit;
		pgit=drvinst->m_PathGrid.begin();
		for (;pgit!=drvinst->m_PathGrid.end();++pgit)
		{
		    bugfile<<"Grid:"<< (*pgit).first;
		    bugfile<<", num_of_Inst:"<< (*pgit).second<<endl;
		}
	    }	
	}

	drvinst->m_PathGrid.erase(drvinst->m_PathGrid.begin(),drvinst->m_PathGrid.end());   

    }

}

//for test purpose only
// for gates with multi-fanin, measure how much these fanins correlated to each other
void CTiming::TestGetCorrPath()
{
    CalTiming();
    FindCorrPath();
    PrtCorrPath();
}

void CTiming::CalTiming(){

    //for PI and DffPI ports, set the departure time
    //set the arrival time of nets they drive
    map<string,CPort*>::iterator ptiter;
    ptiter=m_pCkt->m_PIHash.begin();
    for (;ptiter!=m_pCkt->m_PIHash.end();++ptiter)
    {
	CPort *port=(*ptiter).second;
	port->SetArrivalTime(0);
	CNet* net=port->m_Conn->m_ConnNet;
	net->SetArrivalTime(0);
    }
    ptiter=m_pCkt->m_DffPIHash.begin();
    for (;ptiter!=m_pCkt->m_DffPIHash.end();++ptiter)
    {
	CPort *port=(*ptiter).second;
	port->SetArrivalTime(0);
	CNet* net=port->m_Conn->m_ConnNet;
	net->SetArrivalTime(0);
    }

    //browse the inst Queue
    list<CInst*>::iterator institer;   
    for (institer=m_lInstSeq.begin();institer!=m_lInstSeq.end();++institer)
    {
	CInst* drvinst=*institer;
	map<string,CPort*>& porthash=drvinst->GetPortHash();

	//browse the ports of the instance
	//find the net driven by the instance

	CNet* drvnet;        
	CPort* drvport;

	map<string,CPort*>::iterator ptiter;          
	for (ptiter=porthash.begin();ptiter!=porthash.end();++ptiter)
	{           
	    CPort* port=(*ptiter).second;
	    if(port->m_Direction=="output")
	    {//the net that the instance drives            
		drvnet=port->m_Conn->m_ConnNet;
		drvport=port; 
		break;
	    }
	}

	//calc the arrival time for all input ports of the instance             
	//find path delay till the drvport of the instance 
	float maxdelay=-1,maxInstdelay=-1;        
	CNet* maxinnet=NULL;
	CPort* maxinport=NULL;
	for (ptiter=porthash.begin();ptiter!=porthash.end();++ptiter)
	{               
	    //calc depTime for net conn to the input ports of the instance
	    //   =arrTime + netDelay 
	    //set arrival time for the input ports of the instance

	    CPort* port=(*ptiter).second;
	    if(port->m_Direction=="input")
	    {//input port of the instance

		CNet* net=port->m_Conn->m_ConnNet;                
		float netdelay=net->GetDelay(port->m_PortName);
		float delay=net->GetArrivalTime()+netdelay;
		port->SetArrivalTime(delay);

		float instdelay;
		instdelay=drvinst->GetDelay(port->m_PortName).Delay;
		delay+=instdelay;                    
		if (delay>maxdelay){
		    maxdelay=delay;
		    maxinnet=net;
		    maxInstdelay=instdelay;
		    maxinport=port;
		}                
	    }
	}


	//set departTime of the drvport
	//update the arrTime of the net driven by the instance
	drvport->SetArrivalTime(maxdelay);           
	drvnet->SetArrivalTime(maxdelay);


	//get last net on the longest path	
	CPort* lastport=maxinnet->GetNetDrv();
	//copy the last maxdelay port's m_CorrPath to drvport
	//and remove if ..(??)
	drvport->m_CorrPath.lastport=lastport;
	drvport->m_CorrPath.inport=maxinport;
	drvport->m_CorrPath.pathgrid=lastport->m_CorrPath.pathgrid;    
	//add this gate's (grid,delay) to m_CorrPath
	int x=(int)drvinst->m_Grid.x;
	int y=(int)drvinst->m_Grid.y;
	drvport->m_CorrPath.pathgrid[drvinst->m_Grid.index]+=maxInstdelay;
#ifdef _DEBUG0
	drvport->PrtPathgrid();
#endif  
    }//for loop of instance queue


    //POs, DffPO are not in the instance queue
    //cal arrTime for the pins of POs and DffPO
    m_fMaxDelay=-1;

    ptiter=m_pCkt->m_POHash.begin();
    for (;ptiter!=m_pCkt->m_POHash.end();++ptiter)
    {                
	CPort *port=(*ptiter).second;
	CNet* net=port->m_Conn->m_ConnNet;
	float netdelay=net->GetDelay(port->m_PortName);
	float delay=net->GetArrivalTime()+netdelay;
	port->SetArrivalTime(delay);
	if (delay>m_fMaxDelay){
	    m_fMaxDelay=delay;
	    m_MaxDelayPort=port;
	    if (port->m_Conn->m_ConnInst)
		m_MaxDelayInst=port->m_Conn->m_ConnInst;
	    else
		m_MaxDelayInst=NULL;
	}
	//get last net on the longest path
	CPort* lastport=net->GetNetDrv();
	//copy the last maxdelay port's m_CorrPath to drvport
	//and remove if ..(??)
	port->m_CorrPath.lastport=lastport;
	port->m_CorrPath.inport=port;
	port->m_CorrPath.pathgrid=lastport->m_CorrPath.pathgrid; 
#ifdef _DEBUG0
	port->PrtPathgrid();
#endif
    }

    ptiter=m_pCkt->m_DffPOHash.begin();
    for (;ptiter!=m_pCkt->m_DffPOHash.end();++ptiter)
    {                
	CPort *port=(*ptiter).second;
	CNet* net=port->m_Conn->m_ConnNet;
	float netdelay=net->GetDelay(port->m_PortName);
	float delay=net->GetArrivalTime()+netdelay;
	port->SetArrivalTime(delay);
	if (delay>m_fMaxDelay){
	    m_fMaxDelay=delay;
	    m_MaxDelayPort=port;
	    if (port->m_Conn->m_ConnInst)
		m_MaxDelayInst=port->m_Conn->m_ConnInst;
	    else
		m_MaxDelayInst=NULL;
	}
	//get last net on the longest path

	CPort* lastport=net->GetNetDrv();
	//copy the last maxdelay port's m_CorrPath to drvport
	//and remove if ..(??)
	port->m_CorrPath.lastport=lastport;
	port->m_CorrPath.inport=port;
	port->m_CorrPath.pathgrid=lastport->m_CorrPath.pathgrid; 
#ifdef _DEBUG0
	port->PrtPathgrid();
#endif
    }

}


//check each instance in the ckt
//see if ite fanin-paths are highly correlated,if it's multi-input ones
void CTiming::FindCorrPath()
{

    bugfile<<"Total PO:"<<m_pCkt->m_POHash.size()<<endl;
    bugfile<<"Total DffPO:"<<m_pCkt->m_DffPOHash.size()<<endl;

    float corr;
    int fancount=0;
    map<string,CInst*>::iterator institer;
    map<string,CPort*>::iterator ptiter,ptiter2;
    institer=m_pCkt->m_InstHash.begin();
    int instcount=0;
    for (;institer!=m_pCkt->m_InstHash.end();++institer){
	instcount++;
	CInst *inst;
	inst=(*institer).second;
	map<string,CPort*>& porthash=inst->GetPortHash(); 
	int count=0;
	for (ptiter=porthash.begin();ptiter!=porthash.end();++ptiter)
	{               
	    CPort* port=(*ptiter).second;
	    if(port->m_Direction=="input")
	    {
		count++;
		ptiter2=ptiter;
		ptiter2++;
		//ptiter2=porthash.begin();
		for (;ptiter2!=porthash.end();++ptiter2)        
		{               
		    CPort* port2=(*ptiter2).second;
		    if(port2->m_Direction=="input")
		    {
			corr=CalCorrPath(port,port2);
		    }
		} 
	    }
	}
	if (count>1)
	    fancount++;

    }//end of inst hash
    bugfile<<"Total multi-fanin instance:"<<fancount<<endl;
    bugfile<<"Total non-dff instance:"<<instcount<<endl;


    //do the same thing for all the POs/DffPOs
    int maxcount=0;
    ptiter=m_pCkt->m_POHash.begin();
    while (ptiter!=m_pCkt->m_DffPOHash.end())
    {
	int count=0;
	CPort* port=(*ptiter).second;   
	if (port->GetArrivalTime()>0.5*m_fMaxDelay)
	{
	    ptiter2=ptiter;
	    ++ptiter2;
	    if (ptiter2==m_pCkt->m_POHash.end())
		ptiter2=m_pCkt->m_DffPOHash.begin();
	    while (ptiter2!=m_pCkt->m_DffPOHash.end())
	    {
		CPort* port2=(*ptiter2).second;
		if (port->GetArrivalTime()>0.5*m_fMaxDelay)
		{  
		    corr=CalCorrPath(port,port2);
		    if (corr!=-1){
			count++;
		    }
		}
		++ptiter2;
		if (ptiter2==m_pCkt->m_POHash.end())
		    ptiter2=m_pCkt->m_DffPOHash.begin();
	    }
	}
	++ptiter;
	if (ptiter==m_pCkt->m_POHash.end())
	    ptiter=m_pCkt->m_DffPOHash.begin();

	maxcount=maxcount>count? maxcount:count;
    }

    bugfile<<"maxcorr dffPOs:"<<maxcount<<endl;
}

float CTiming::CalCorrPath(CPort* port1,CPort* port2)
{

    map<int,float>::iterator iter1,iter2;
    int grid1,grid2;
    float delay1,delay2,pathdelay1,pathdelay2;
    float rate1,rate2,degree,sumdegree;
    float rst1=0,rst2=0;
    float corr=-1;
    map<int,CorrHash> corr2Hash;

    // pathdelay1=drvport1->GetDepartTime();
    //pathdelay2=drvport2->GetDepartTime();
    pathdelay1=port1->GetArrivalTime();
    pathdelay2=port2->GetArrivalTime();
    if (abs(pathdelay1-pathdelay2)>(pathdelay1+pathdelay2)*0.2)
	return -1;


    CPort* drvport1=port1->m_Conn->m_ConnNet->GetNetDrv();    

    CPort* drvport2=port2->m_Conn->m_ConnNet->GetNetDrv(); 


    iter1=drvport1->m_CorrPath.pathgrid.begin();
    for (;iter1!=drvport1->m_CorrPath.pathgrid.end();++iter1)
    {
	grid1=(*iter1).first; //grid
	delay1=(*iter1).second;   //comp delay
	rate1=delay1/pathdelay1;

	iter2=drvport2->m_CorrPath.pathgrid.begin(); 
	sumdegree=0;
	int count=0;
	for (;iter2!=drvport2->m_CorrPath.pathgrid.end();++iter2)
	{
	    grid2=(*iter2).first; //grid
	    delay2=(*iter2).second;    //comp delay
	    rate2=delay2/pathdelay2;        
	    degree=CorrDegree(grid1,grid2);
	    //hash(grid2)=degree,rate
	    corr2Hash[grid2].rate=rate2;
	    corr2Hash[grid2].degree+=degree;
	    corr2Hash[grid2].count++;

	    sumdegree+=degree;
	    count++;
	}
	rst1+=rate1*sumdegree/count;
    }

    map<int,CorrHash>::iterator corr2iter;
    corr2iter=corr2Hash.begin();
    for (;corr2iter!=corr2Hash.end();++corr2iter)
    {
	rst2+=(*corr2iter).second.rate*(*corr2iter).second.degree/(*corr2iter).second.count;
    }


    //if rst1 or rst2 is small, we can say, path1,2 corr small
    //else if the value rst1/rst2 close to 1, add to corrpaths
    if (!rst1 || !rst2)
	return -1;
    if (rst1<VALNOPCORR||rst2<VALNOPCORR)
	return -1;

    if (rst1<=rst2)
	corr=rst1/rst2;
    else
	corr=rst2/rst1;

    if (corr<RATENOPCORR)
	return -1;

    //add the path to port1,port2 to highly correlated paths
    CORRFANIN corrpath;
    corrpath.port1=port1;
    corrpath.port2=port2;
    corrpath.value=corr;
    m_Corrpath.push_back(corrpath);

    return corr;
}    

float CTiming::CorrDegree(int g1,int g2)
{
    if (g1==g2) 
	return 1;
    if (abs(g1-g2)==1 || abs(g1-g2)==m_pCorrModel->m_iGridSizeX)
	return 0.75;

    return 0;    
}

void CTiming::PrtCorrPath()
{
    list<CORRFANIN>::iterator iter;
    int corrCount=0,corrNPtCount=0;

    iter=m_Corrpath.begin();
    for (;iter!=m_Corrpath.end();++iter)
    {

	CPort* port1=(*iter).port1;
	CPort* port2=(*iter).port2;

	CPort* drvport1=port1->m_Conn->m_ConnNet->GetNetDrv();      

	CPort* drvport2=port2->m_Conn->m_ConnNet->GetNetDrv(); 

	int drvx1,drvy1,drvx2,drvy2,x,y,minx,miny,maxx,maxy;
	minx=miny=32767;
	maxx=maxy=-1;
	if (port1->m_Conn->m_ConnInst)
	{
	    drvx1=x=(int)port1->m_Conn->m_ConnInst->m_Grid.x;
	    drvy1=y=(int)port1->m_Conn->m_ConnInst->m_Grid.y;

	    minx=maxx=x;
	    miny=maxy=y;
	}
	if (port2->m_Conn->m_ConnInst)
	{
	    drvx2=x=(int)port2->m_Conn->m_ConnInst->m_Grid.x;
	    drvy2=y=(int)port2->m_Conn->m_ConnInst->m_Grid.y;
	    minx=minx<=x?minx:x;
	    maxx=maxx>=x?maxx:x;
	    miny=miny<=y?miny:y;
	    maxy=maxy>=y?maxy:y;
	}


	map<int,float>::iterator iter1;
	iter1=drvport1->m_CorrPath.pathgrid.begin();
	for (;iter1!=drvport1->m_CorrPath.pathgrid.end();++iter1)
	{
	    x=(*iter1).first % m_pCorrModel->m_iGridSizeX;
	    y=(int)(*iter1).first/m_pCorrModel->m_iGridSizeX;
	    minx=minx<=x?minx:x;
	    maxx=maxx>=x?maxx:x;
	    miny=miny<=y?miny:y;
	    maxy=maxy>=y?maxy:y;
	}
	iter1=drvport2->m_CorrPath.pathgrid.begin();
	for (;iter1!=drvport2->m_CorrPath.pathgrid.end();++iter1)
	{
	    x=(*iter1).first % m_pCorrModel->m_iGridSizeX;
	    y=(int)(*iter1).first/m_pCorrModel->m_iGridSizeX;
	    minx=minx<=x?minx:x;
	    maxx=maxx>=x?maxx:x;
	    miny=miny<=y?miny:y;
	    maxy=maxy>=y?maxy:y;
	}

	if ((maxx-minx+1)<=m_pCorrModel->m_iNumLevel && (maxy-maxy+1)<=m_pCorrModel->m_iNumLevel)
	    continue;
	//else, the winsize can wrap the correlated paths
	if (port1->m_Conn->m_ConnInst)
	    if (!m_pCkt->FindDffPO(port1->m_Conn->m_ConnInst->m_InstName,port1->m_PortName))
		corrNPtCount++;  

	corrCount++;

	if ((*iter).port1->m_Conn->m_ConnInst){
	    bugfile<<(*iter).port1->m_Conn->m_ConnInst->m_InstName;
	    bugfile<<"--grid "<<drvx1+drvy1*m_pCorrModel->m_iGridSizeX;
	}
	bugfile<<"--"<<(*iter).port1->m_PortName;
	bugfile<<";";
	if ((*iter).port2->m_Conn->m_ConnInst){
	    bugfile<<(*iter).port2->m_Conn->m_ConnInst->m_InstName;
	    bugfile<<"--grid "<<drvx2+drvy2*m_pCorrModel->m_iGridSizeX;
	}
	bugfile<<"--"<<(*iter).port2->m_PortName;
	bugfile<<":"<<(*iter).value<<endl;

	bugfile<<"  grids passed:\n";
	bugfile<<"      --:\n";
	iter1=drvport1->m_CorrPath.pathgrid.begin();
	for (;iter1!=drvport1->m_CorrPath.pathgrid.end();++iter1)
	{
	    x=(*iter1).first % m_pCorrModel->m_iGridSizeX;
	    y=(int)(*iter1).first/m_pCorrModel->m_iGridSizeX;
	    bugfile<<"      "<<(*iter1).first;
	}
	bugfile<<"\n      --:\n";
	iter1=drvport2->m_CorrPath.pathgrid.begin();
	for (;iter1!=drvport2->m_CorrPath.pathgrid.end();++iter1)
	{
	    x=(*iter1).first % m_pCorrModel->m_iGridSizeX;
	    y=(int)(*iter1).first/m_pCorrModel->m_iGridSizeX;
	    bugfile<<"      "<<(*iter1).first;
	}
	bugfile<<endl;
    }
    bugfile<<"Total number of CorrPaths for fanin of normal insts:"<<corrNPtCount<<endl;
    bugfile<<"Total number of CorrPaths:"<<corrCount<<endl;
}

void CTiming::CmpActPin(int flag)
{
    list<CInst*>::iterator institer;
    for	(institer=m_lInstSeq.begin();institer!=m_lInstSeq.end();++institer)
    {
	CInst* drvinst=*institer;
	int	actPinCnt=drvinst->GetActPInCnt();
	int realpinnum=drvinst->GetInPinNum();
	int pinnum=0;
	map<string,CPort*>::iterator ptiter;  
	map<string,CPort*>& porthash=drvinst->GetPortHash(); 
	for (ptiter=porthash.begin();ptiter!=porthash.end();++ptiter)
	{           
	    CPort* port=(*ptiter).second;
	    if (port->m_Direction=="input" && port->m_bTruePath)
	    {                                                                                  
		pinnum++;
	    }
	}
	bugfile<<drvinst->m_InstName<<" "<<realpinnum<<" "<<actPinCnt<<" "<<pinnum;
	if (flag==0 && realpinnum!=pinnum)
	    bugfile <<"---pin wrong"<<realpinnum-pinnum<<endl;
	else if (flag==1 && actPinCnt!=pinnum)
	    bugfile <<"---not eql"<<actPinCnt-pinnum<<endl;
	else if (flag==1 && realpinnum!=pinnum)
	    bugfile<<"---path rm"<<realpinnum-pinnum<<endl;
	else
	    bugfile<<endl;

    }

}

////////////////////
//compute total area of circuit -- in terms of sum of transistor gate widths
//  num_NMOS*wn + num_PMOS*wp 
float CTiming::CalTotArea()
{
    float totArea=0;
    map<string,CInst*>::iterator iter;
    for (iter=m_pCkt->m_InstHash.begin();iter!=m_pCkt->m_InstHash.end();++iter){
	CInst *inst=(*iter).second;  
	float thisGateSize=CalInstArea(inst);
	totArea+=thisGateSize;
	// 		cout<<inst->m_InstName<<" "<<inst->m_fGateSize<<" "<<thisGateSize<<endl;
    }
    return totArea;
}

float CTiming::CalInstArea(CInst* inst,float diff/*=1.0*/)
{
    int gloc=inst->m_Grid.index;
    float w=m_pCorrModel->m_pGrid[gloc].GetParaMean(ENUMWGATE);
    float gsize=inst->m_fGateSize*diff;
    float wn=coefWpn[namelistmap[inst->m_CellType]].wn*gsize+ (w-GATEWIDn);	
    float wp=coefWpn[namelistmap[inst->m_CellType]].wp*gsize+ (w-GATEWIDn);	
    float thisGateSize=gateSize[namelistmap[inst->m_CellType]]*(wn+wp);
    return thisGateSize;
}



