
#ifndef __MCHARIMAGE__
#define __MCHARIMAGE__

#include "MUtils.h"
#include <stdio.h>
#include <stdlib.h>
#include <iostream>
#include <fstream>
#include <assert.h>
#include <string.h> // memcpy, memset
#include <math.h> // sqrt

#include "MShortImage.h"
#include "MFloatImage.h"

class MShortImage;
class MFloatImage;
 
class MCharImage { // use mainly MMX instructions
  int x__, y__;
  unsigned char *yuv;
 public:
  MCharImage(int x_= 0, int y_= 0, int WithBlackInit= 0);
  ~MCharImage();

  inline void SetSize(int x_, int y_);

  const unsigned char* GetPtr() const {return yuv;}
  inline int x() const { return x__; }
  inline int y() const { return y__; }
  inline const unsigned char& y_(int xx, int yy) const {return yuv[xx+yy*x__];}
  inline unsigned char& y(int xx, int yy) {return yuv[xx+yy*x__];}
  inline unsigned char& uv(int xx, int yy) {return yuv[x__*y__+xx/2+(yy/2)*x__];}
  inline float InterpolationBilineaire(float xx,float yy);
  inline double InterpolationBilineaire(double xx,double yy);
  void NoColor();
  void FillBlack();
  void FillGray(unsigned char GrayLevel);  
  void FillBlackBorder(int BorderSize);
  friend ostream& operator<<(ostream& Out, const MCharImage& Image);

  // Drawing
  void ConvertFrom(const MFloatImage& Image, float Min_, float Max_);
  void ConvertFrom(const MShortImage& Image);
  //copie de l'image avec division par 2 pour assombrir l'image
  void CopyDiv2(const MCharImage& Data);
  void DrawLocalMaxima(const MCharImage& Data, int NbPoint, int* x_yMxStride);
  int Load(FILE* Flux); // Flux from mpeg2dec output
  int Load(const char* PpmOrPgm);
  const MCharImage& operator=(const MCharImage& Image); // full copy
  void PointMark(int NbPoint, int* PointIndices, int WithWhite1Black2= 3);

  void Put(const MCharImage& Data, int xx, int yy);
  void RotateLeft(const MCharImage& Data);
  void RotateRight(const MCharImage& Data);
  void Reduce2x2(const MCharImage& Data); // the truncated size is choosen here

  void ConvertTo16BitScreen(short* Color16Bit, int BytesPerLine) const;

  // Calculations
  void Smooth121(int IsHorizontal0Vertical1, const MCharImage& Data);
  void Gradient_101(int IsHorizontal0Vertical1,const MCharImage& Data);
  void AbsGradient_101(int isHorizontal0Vertical1,const MCharImage& Data);
  void Laplacian1_21_128(const MCharImage& Data); // DO X- & Y-SMOOTH121 BEFORE
  void AbsAndMultiplyLaplacian_128(int Coef);

  // ==>> NO EMMS, NO INDEX TESTS for the following methods :
  float Zncc5x5(int x0,int y0,int x1,int y1, const MCharImage& Data) const;
  void Zncc5x5(int x0,int y0, int x1,int y1, int dy,
	       const MCharImage& Data, int* TableOfdyScore) const;
  inline int Moravec5x5(int xx,int yy) const;
  inline void Mask5x5(int xx, int yy, const MCharImage& Data);

  //Sum of Absolute Difference
  inline short SAD5x5(int x0,int y0,int x1,int y1,const MCharImage& Data) const;
  //Methodes pour le precalcul de ZNCC11x11
  inline float ZNCC11x11Moyenne(int xx,int yy);
  inline void ZNCC11x11Num(float *ZNCCNum,int xx,int yy,float ZNCCMoyenne);

  //Mesure de confiance pour propagation des appariements
  inline short MesureConfiance(int xx,int yy);
  //Renvoie 255 si le pixel depasse le seuil de confiance (fournir aussi -Seuil pour eviter de le recalculer a chaque fois)
  //Renvoie 0 si inferieur au seuil
  inline unsigned char MesureConfianceSuperieur(int offset,short Seuil,short MoinsSeuil);
  //Calcul de l'image de confiance (0 si <Seuil, 255 si >Seuil)
  inline unsigned char MCharImage::CalcImgConfianceSuperieur(MCharImage& Source,short Seuil);

  //Egalisation d'histogramme pour les niveaux de gris dans [GrisMin..GrisMax]
  void EgaliseHistogramme(unsigned char GrisMin=0,unsigned char GrisMax=255);

};
void MCompare(const MCharImage& Image0, const MCharImage& Image1, int Border);

//-----------------------------------------------------------------------
// SetSize
//-----------------------------------------------------------------------
inline void MCharImage::SetSize(int x_, int y_) {
  assert((x_&7)== 0 && (y_&1)== 0);
  if (x_*y_> x__*y__) {
    if (x__)
      MAlign16ByteDelete((int*)yuv); // delete yuv;
    yuv= (unsigned char*)MAlign16ByteNew(((x_*(y_+1)*3)/2+16)/4);
  };
  x__= x_;
  y__= y_;
}

//-----------------------------------------------------------------------
// Interpolation bilineaire
//-----------------------------------------------------------------------
inline float MCharImage::InterpolationBilineaire(float xx,float yy)
{
	int xx0=int(xx);
	int yy0=int(yy);
	float u=xx-xx0;
	float v=yy-yy0;
	float val00,val01,val10,val11;
	
	val00=yuv[xx0+yy0*x__];
	if (yy0<y__) val01=yuv[xx0+(yy0+1)*x__];
	else val01=val00;
	if (xx0<x__) val10=yuv[xx0+1+yy0*x__];
	else val10=val00;
	if ((xx0<x__) && (yy0<y__)) val11=yuv[xx0+1+(yy0+1)*x__];
	else val11=val00;
	return ( (1-u)*(1-v)*val00 + u*(1-v)*val10 + (1-u)*v*val01 + u*v*val11 );	
}
inline double MCharImage::InterpolationBilineaire(double xx,double yy)
{
	int xx0=int(xx);
	int yy0=int(yy);
	double u=xx-xx0;
	double v=yy-yy0;
	double val00,val01,val10,val11;

	val00=yuv[xx0+yy0*x__];
	if (yy0<y__) val01=yuv[xx0+(yy0+1)*x__];
	else val01=val00;
	if (xx0<x__) val10=yuv[xx0+1+yy0*x__];
	else val10=val00;
	if ((xx0<x__) && (yy0<y__)) val11=yuv[xx0+1+(yy0+1)*x__];
	else val11=val00;
	return ( (1-u)*(1-v)*val00 + u*(1-v)*val10 + (1-u)*v*val01 + u*v*val11 );
}

// *********************** L1-moravec 5x5 using psadbw ************************
// le code est ici pour pouvoir declarer la fonction inline

#ifdef ARCH_X86
static inline void MmmxMoravec5x5_1(const unsigned char* In) {
  static mmx_t mmx_00fffff0 = {0x0000ffffffffff00ull};
  movq_m2r    (mmx_00fffff0,mm0); // mm0= 00 00 ff ff ff ff ff 00
  movq_m2r    (*In,mm1);          // mm1= y7 y6 y5 y4 y3 y2 y1 y0
  pxor_r2r    (mm4,mm4);          // mm4= 0 0 0 0
  pxor_r2r    (mm5,mm5);          // mm5= 0 0 0 0
  pxor_r2r    (mm6,mm6);          // mm6= 0 0 0 0
  pxor_r2r    (mm7,mm7);          // mm7= 0 0 0 0
}
static inline void MmmxMoravec5x5_2(const unsigned char* In) {
  // mm0= 00 00 ff ff ff ff ff 00
  // mm1= y7 y6 y5 y4 y3 y2 y1 y0
  // mm7= sum(dxy4) sum(dxy3) sum(dxy2) sum(dxy1)
  movq_r2r    (mm1,mm2);
  pand_r2r    (mm0,mm2);  // mm2= 00 00 y5 y4 y3 y2 y1 00

  movq_r2r    (mm1,mm3);
  psrlq_i2r   (8,mm3);
  pand_r2r    (mm0,mm3);  // mm3= 00 00 y6 y5 y4 y3 y2 00
  psadbw_r2r  (mm2,mm3);  // mm3= 0 0 0 |y2-y1|+|y3-y2|+|y4-y3|+|y5-y4|+|y6-y5|
  paddw_r2r   (mm3,mm4);

  movq_m2r    (*In,mm1);  // mm1= z7 z6 z5 z4 z3 z2 z1 z0

  movq_r2r    (mm1,mm3);
  pand_r2r    (mm0,mm3);  // mm3= 00 00 z5 z4 z3 z2 z1 00
  psadbw_r2r  (mm2,mm3);  // mm3= 0 0 0 |z5-y5|+|z4-y4|+|z3-y3|+|z2-y2|+|z1-y1|
  paddw_r2r   (mm3,mm5);

  movq_r2r    (mm1,mm3);
  psrlq_i2r   (8,mm3);
  pand_r2r    (mm0,mm3);  // mm3= 00 00 z6 z5 z4 z3 z2 00
  psadbw_r2r  (mm2,mm3);  // mm3= 0 0 0 |z6-y5|+|z5-y4|+|z4-y3|+|z3-y2|+|z2-y1|
  paddw_r2r   (mm3,mm6);

  movq_r2r    (mm1,mm3);
  psllq_i2r   (8,mm3);
  pand_r2r    (mm0,mm3);  // mm3= 00 00 z4 z3 z2 z1 z0 00
  psadbw_r2r  (mm2,mm3);  // mm3= 0 0 0 |z4-y5|+|z3-y4|+|z2-y3|+|z1-y2|+|z0-y1|
  paddw_r2r   (mm3,mm7);
}
static inline void MmmxMoravec5x5_3(unsigned int* Out) {
  pminsw_r2r  (mm6,mm7);
  pminsw_r2r  (mm4,mm5);
  pminsw_r2r  (mm5,mm7); // mm7=000min(sum(dy+1),sum(dx+1),sum(dx-1),sum(dy-1))
  pshufw_r2r  (mm7,mm7,0);

  movd_r2m    (mm7,*Out); // save two shorts with the same result !
}
static inline void MmmxMoravec5x5(const unsigned char* In, int xStride,
				  unsigned int* Out) {
  MmmxMoravec5x5_1(In);
  for (int i= 5; i; i--) {
    In+= xStride;
    MmmxMoravec5x5_2(In);
  }
  MmmxMoravec5x5_3(Out);
};
#else
static inline void MmmxMoravec5x5(const unsigned char* In, int xStride,
				  unsigned int* Out) {}
#endif

inline int MCharImage::Moravec5x5(int xx, int yy) const {
  xx-= 2;
  yy-= 2;
  if (WithMmxSseUse) {
    unsigned int Result;
    MmmxMoravec5x5(yuv+yy*x__+(xx-1), x__, &Result);
    return (0xffff&Result);
  } else {
    int xInc= 0, yIncxInc= 0, yIncxDec= 0, yInc= 0;
    for (int dy= 0; dy< 5; dy++) {
      unsigned char* Pte= yuv+(yy+dy)*x__+xx;
      for (int dx= 0; dx< 5; dx++) {
	int V= Pte[dx];
	xInc+= Abs(V-Pte[dx+1]);         yIncxInc+= Abs(V-Pte[dx+1+x__]);
	yIncxDec+= Abs(V-Pte[dx-1+x__]); yInc+=     Abs(V-Pte[dx+x__]);
      };
    };
    return Min(Min(xInc,yIncxInc), Min(yIncxDec, yInc));
  };
}


//-------------------------------------------------------------------------------------------------------
// ZNCC11x11Moyenne
//-------------------------------------------------------------------------------------------------------
#ifdef ARCH_X86
static inline void MmmxZNCC11x11Moyenne0(unsigned char* In)
{
	static mmx_t mmx_FFF0w={0x0000FFFFFFFFFFFFull};
	movq_m2r(mmx_FFF0w,mm6);
  pxor_r2r(mm7,mm7);	//mm7= 00 00 00 00 00 00 00 00

  //Les 8 premiers octets
	movq_m2r(*In,mm0);  //mm0= i7 i6 i5 i4 i3 i2 i1 i0
	movq_r2r(mm0,mm1);
	punpckhbw_r2r(mm7,mm1);	//mm1= 00i7 00i6 00i5 00i4
	punpcklbw_r2r(mm7,mm0); //mm0= 00i3 00i2 00i1 00i0
	paddusw_r2r(mm1,mm0);   //resultat partiel dans mm0
	//Les 3 suivants
	movq_m2r(*(In+8),mm3);	//mm3= j7 j6 j5 j4 j3 j2 j1 j0
	punpcklbw_r2r(mm7,mm3); //mm3= 00j7 00j6 00j5 00j4
	pand_r2r(mm6,mm3);			//mm3= 00j7 00j6 00j5 0000
	paddusw_r2r(mm3,mm0);		//resultat de la ligne dans mm0
}
static inline void MmmxZNCC11x11Moyenne1(unsigned char* In)
{
	//Les 8 premiers octets
	movq_m2r(*In,mm1);  //mm1= i7 i6 i5 i4 i3 i2 i1 i0
	movq_r2r(mm1,mm2);
	punpckhbw_r2r(mm7,mm2);	//mm2= 00i7 00i6 00i5 00i4
	punpcklbw_r2r(mm7,mm1); //mm1= 00i3 00i2 00i1 00i0
	paddusw_r2r(mm2,mm1);   
	paddusw_r2r(mm1,mm0);
	//Les 3 suivants
	movq_m2r(*(In+8),mm3);	//mm3= j7 j6 j5 j4 j3 j2 j1 j0
	punpcklbw_r2r(mm7,mm3); //mm3= 00j7 00j6 00j5 00j4
	pand_r2r(mm6,mm3);			//mm3= 00j7 00j6 00j5 0000
	paddusw_r2r(mm3,mm0);		//resultat de la ligne dans mm0
}
static inline void MmmxZNCC11x11Moyenne2(signed short* Res)
{
	movq_r2m(mm0,*Res);
}
static inline int MmmxZNCC11x11Moyenne(unsigned char* In,int xStride)
{
	//calcul de la somme des 8 premiers octets de chacune des 11 lignes
  MmmxZNCC11x11Moyenne0(In);
  In+=xStride;
  MmmxZNCC11x11Moyenne1(In);
  In+=xStride;
  MmmxZNCC11x11Moyenne1(In);
  In+=xStride;
  MmmxZNCC11x11Moyenne1(In);
  In+=xStride;
  MmmxZNCC11x11Moyenne1(In);
  In+=xStride;
  MmmxZNCC11x11Moyenne1(In);
  In+=xStride;
  MmmxZNCC11x11Moyenne1(In);
  In+=xStride;
  MmmxZNCC11x11Moyenne1(In);
  In+=xStride;
  MmmxZNCC11x11Moyenne1(In);
  In+=xStride;
  MmmxZNCC11x11Moyenne1(In);
  In+=xStride;
  MmmxZNCC11x11Moyenne1(In);
	//récupération du résultat
	signed short Res[4];
	MmmxZNCC11x11Moyenne2(Res);
	return int(Res[0]+Res[1]+Res[2]+Res[3]);
}
#else
static inline int MmmxZNCC11x11Moyenne(unsigned char* In,int xStride)
{return 0;}
#endif
inline float MCharImage::ZNCC11x11Moyenne(int xx,int yy)
{
	int Somme=0;

	int xmin=Max(0,xx-5);
	int xmax=Min(x__,xx+5);
	int ymin=Max(0,yy-5);
	int ymax=Min(y__,yy+5);
	int nb=(xmax-xmin+1)*(ymax-ymin+1);

	if ( (xx>=5) && ((xx+5)<x__) && (yy>=5) && ((yy+5)<y__) && MWithMmxSseUse())
	{
    Somme=MmmxZNCC11x11Moyenne(&(yuv[xmin+ymin*x__]),x__);
    Memms();
	}
	else
	{
		for (int j=ymin;j<=ymax;j++)
		{
			unsigned char* PteIn=&(yuv[xmin+j*x__]);
			for (int i=xmin;i<=xmax;i++)
			{
					Somme+=*PteIn;
					PteIn++;
			}
		}
	}
	return (float(Somme)/float(nb));
}

//-------------------------------------------------------------------------------------------------------
// ZNCC11x11Num
//-------------------------------------------------------------------------------------------------------
#ifdef ARCH_X86
static inline void MmmxZNCC11x11NumFloat0(float *ZNCCMoy4)
{
	pxor_r2r(xmm7,xmm7);
	movups_m2r(*ZNCCMoy4,xmm6);
}
static inline void MmmxZNCC11x11NumFloat1(unsigned char *In,float *Out)
{
	//a l'entree : xmm7=0, xmm6=ZNCCMoyenne (x4)
	movq_m2r(*In,xmm1);					//xmm1=00 00 00 00 00 00 00 00 i7 i6 i5 i4 i3 i2 i1 i0
	punpcklbw_r2r(xmm7,xmm1);	  //xmm1=00i7 00i6 00i5 00i4 00i3 00i2 00i1 00i0
	movaps_r2r(xmm1,xmm2);
	punpcklwd_r2r(xmm7,xmm1);		//xmm1=000000i7 000000i6 000000i5 000000i4
	punpckhwd_r2r(xmm7,xmm2);		//xmm2=000000i3 000000i2 000000i1 000000i0
	cvtdq2ps_r2r(xmm1,xmm1);    //conversion en float
	cvtdq2ps_r2r(xmm2,xmm2);    //conversion en float
	subps_r2r(xmm6,xmm1);
	movups_r2m(xmm1,*Out);
	subps_r2r(xmm6,xmm2);
	movups_r2m(xmm2,*(Out+4));
}
static inline void MmmxZNCC11x11NumFloat(unsigned char *In,float ZNCCMoyenne,float *ZNCCNum,int xStride)
{
	float ZNCCMoy4[4];
	ZNCCMoy4[0]=ZNCCMoyenne;ZNCCMoy4[1]=ZNCCMoyenne;ZNCCMoy4[2]=ZNCCMoyenne;ZNCCMoy4[3]=ZNCCMoyenne;
	MmmxZNCC11x11NumFloat0(ZNCCMoy4);

	float *PtOut=ZNCCNum;
	unsigned char *PtIn=In;
	for (int i=0;i<11;i++)
	{
		MmmxZNCC11x11NumFloat1(PtIn,PtOut);
		PtOut[8]=float(PtIn[8])-ZNCCMoyenne;
		PtOut[9]=float(PtIn[9])-ZNCCMoyenne;
		PtOut[10]=float(PtIn[10])-ZNCCMoyenne;

		PtIn+=xStride;
		PtOut+=11;
	}	
}
#else
static inline void MmmxZNCC11x11NumFloat(unsigned char *In,float ZNCCMoyenne,float *ZNCCNum)
{}
#endif

inline void MCharImage::ZNCC11x11Num(float *ZNCCNum,int xx,int yy,float ZNCCMoyenne)
{
	int xmin=Max(0,xx-5);
	int xmax=Min(x__,xx+5);
	int ymin=Max(0,yy-5);
	int ymax=Min(y__,yy+5);

	bool Bord=(xmin!=xx-5) || (xmax!=xx+5) || (ymin!=yy-5) || (ymax!=yy+5); //true si le voisinage 11x11 sort de l'image
	
  //Si certains points du voisinage 11x11 sont hors image on les met a zero
  if ((MWithMmxSseUse()==2) && (!Bord))
  {
		unsigned char* PteIn=&(yuv[xmin+ymin*x__]);
    MmmxZNCC11x11NumFloat(PteIn,ZNCCMoyenne,ZNCCNum,x__);

    /*//Verification
    float ZNCCNumVerif[121];
		for (int j=ymin;j<=ymax;j++)
		{
			float* PteOut=&(ZNCCNumVerif[(xmin-xx+5)+(j-yy+5)*11]);
			unsigned char* PteIn=&(yuv[xmin+j*x__]);
			for (int i=xmin;i<=xmax;i++)
			{
				*PteOut=float(*PteIn)-ZNCCMoyenne;
				PteIn++;
				PteOut++;
			}
		}
    for (int i=0;i<121;i++) if (ZNCCNum[i]!=ZNCCNumVerif[i]) cout << ZNCCNum[i] << " = " << ZNCCNumVerif[i] << "    " << endl;*/

	}
	else
	{
		if (Bord)
		{
			for (int i=0;i<121;i++) ZNCCNum[i]=0.0;
		}
	
		for (int j=ymin;j<=ymax;j++)
		{
			float* PteOut=&(ZNCCNum[(xmin-xx+5)+(j-yy+5)*11]);
			unsigned char* PteIn=&(yuv[xmin+j*x__]);
			for (int i=xmin;i<=xmax;i++)
			{
				*PteOut=float(*PteIn)-ZNCCMoyenne;
				PteIn++;
				PteOut++;
			}		
		}
	}	
}

//----------------------------------------------------------------------------------
//Mesure de confiance pour propagation des appariements
//----------------------------------------------------------------------------------
inline short MCharImage::MesureConfiance(int xx,int yy)
{
	unsigned char *Pt=&(yuv[xx+yy*x__]);
	short i=short(Pt[0]);
	short a1,a2,a3,a4;
	if (xx<x__-1) a1=i-short(Pt[1]);
	else a1=0;
	short a11=-a1;
	a1=Max(a1,a11);

	if (xx>0) a2=i-short(Pt[-1]);
	else a2=0;
	short a22=-a2;
	a2=Max(a2,a22);

	if (yy<y__-1) a3=i-short(Pt[x__]);
	else a3=0;
	short a33=-a3;
	a3=Max(a3,a33);

	if (yy>0) a4=i-short(Pt[-x__]);
	else a4=0;
	short a44=-a4;
	a4=Max(a4,a44);
	
	return Max(Max(a1,a2),Max(a3,a4));
}
inline unsigned char MCharImage::MesureConfianceSuperieur(int offset,short Seuil,short MoinsSeuil)
{
	unsigned char *Pt=&(yuv[offset]);
	short i=short(Pt[0]);
	short a1;

	a1=i-short(Pt[1]);
  if ((a1>=Seuil) || (a1<=MoinsSeuil)) return 255;

	a1=i-short(Pt[-1]);
  if ((a1>=Seuil) || (a1<=MoinsSeuil)) return 255;

	a1=i-short(Pt[x__]);
  if ((a1>=Seuil) || (a1<=MoinsSeuil)) return 255;

	a1=i-short(Pt[-x__]);
  if ((a1>=Seuil) || (a1<=MoinsSeuil)) return 255;

  return 0;
}
inline unsigned char MCharImage::CalcImgConfianceSuperieur(MCharImage& Source,short Seuil)
{
	SetSize(Source.x(),Source.y());
	short MoinsSeuilConfiance=-Seuil;
	for (int j=1;j<y__-1;j++)
  {
		int offset=j*x__+1;
		unsigned char* PteOut=&(y(1,j));
		for (int i=1;i<x__-1;i++)
		{
			(*PteOut)=Source.MesureConfianceSuperieur(offset,Seuil,MoinsSeuilConfiance);
			offset++;
			PteOut++;
		}
	}
}

//----------------------------------------------------------------------------------
 //Sum of Absolute Difference
//----------------------------------------------------------------------------------
inline short MCharImage::SAD5x5(int x0,int y0,int x1,int y1,const MCharImage& Data) const
{
	short Somme;
  /*if (MWithMmxSseUse())
  {
		
	}
	else*/
	{
		Somme=0;
		unsigned char* PteIn0=&(yuv[(x0-2)+(y0-2)*x__]);
		unsigned char* PteIn1=&(Data.yuv[(x1-2)+(y1-2)*x__]);
		for (int i=0;i<5;i++)
		{
      short diff=short(PteIn0[0])-short(PteIn1[0]);
      short diffm=-diff;
      Somme+=Max(diff,diffm);
      diff=short(PteIn0[1])-short(PteIn1[1]);
      diffm=-diff;
      Somme+=Max(diff,diffm);
      diff=short(PteIn0[2])-short(PteIn1[2]);
      diffm=-diff;
      Somme+=Max(diff,diffm);
      diff=short(PteIn0[3])-short(PteIn1[3]);
      diffm=-diff;
      Somme+=Max(diff,diffm);
      diff=short(PteIn0[4])-short(PteIn1[4]);
      diffm=-diff;
      Somme+=Max(diff,diffm);      
			PteIn0+=x__;
			PteIn1+=x__;
		}
	}
	return Somme;	  		
}


#endif //fin du fichier

