///////////////////////////////////////////////////////////////////////////////////
// File : CG_SOLVER_SSE.h
///////////////////////////////////////////////////////////////////////////////////
//
// Copyright 2006
// The University of North Carolina at Chapel Hill
// 
///////////////////////////////////////////////////////////////////////////////////
//
// Permission to use, copy, modify, distribute and sell this software and its 
// documentation for any purpose is hereby granted without fee, provided that 
// the above copyright notice appear in all copies and that both that copyright 
// notice and this permission notice appear in supporting documentation. 
// Binaries may be compiled with this software without any royalties or 
// restrictions. 
//
// The University of North Carolina at Chapel Hill makes no representations 
// about the suitability of this software for any purpose. It is provided 
// "as is" without express or implied warranty.
//

#ifndef CG_SOLVER_SSE_H
#define CG_SOLVER_SSE_H

#include "CG_SOLVER.h"
#include <xmmintrin.h>

////////////////////////////////////////////////////////////////////
/// \brief Conjugate gradient Poisson solver that exploits SSE.
////////////////////////////////////////////////////////////////////
class CG_SOLVER_SSE : public CG_SOLVER
{
public:
  //! constructor
  CG_SOLVER_SSE(int maxDepth, int iterations = 10, int digits = 1);
  //! destructor
  ~CG_SOLVER_SSE();

  //! solve the Poisson problem using SSE
  virtual int solve(list<CELL*> cells);
  
private:
  //! reallocate the SSE-friendly scratch arrays
  virtual void reallocate();
  
  // SSE linear algebra operators
  inline float dotSSE(float* x, float* y);
  inline void saxpySSE(float a, float* x, float* y);
  inline void saypxSSE(float a, float* x, float* y);
  inline float maxSSE(float* x);
  inline void addSSE(float* x, float* y);
  inline void multiplySSE(float* x, float* y);
  inline void multiplySSE(float* x, float* y, float* z);
  inline void multiplySubtractSSE(float* w, float* x, float* y, float* z);
  inline void setSSE(float* x, float val);
  inline void wipeSSE(float* x);
  inline void copySSE(float* x, float* y);
};

#endif
