diff --git a/src/CalculateE.c b/src/CalculateE.c
index f8c8009e..4c80fb59 100644
--- a/src/CalculateE.c
+++ b/src/CalculateE.c
@@ -5,7 +5,7 @@
  *        Routines for most scattering quantities are in crosssec.c. Also saves internal fields to
  *        file (optional).
  *
- * Copyright (C) 2006-2012 ADDA contributors
+ * Copyright (C) 2006-2013 ADDA contributors
  * This file is part of ADDA.
  *
  * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU
@@ -60,13 +60,6 @@ extern size_t TotalEFieldPlane;
 // used in iterative.c
 TIME_TYPE tstart_CE;
 
-// EXTERNAL FUNCTIONS
-
-// GenerateB.c
-void GenerateB(enum incpol which,doublecomplex *x);
-// iterative.c
-int IterativeSolver(enum iter method);
-
 // LOCAL VARIABLES
 
 #define MUEL_HEADER "s11 s12 s13 s14 s21 s22 s23 s24 s31 s32 s33 s34 s41 s42 s43 s44"
@@ -80,6 +73,13 @@ int IterativeSolver(enum iter method);
 #define ANGLE_FORMAT "%.2f"
 #define RMSE_FORMAT "%.3E"
 
+// EXTERNAL FUNCTIONS
+
+// GenerateB.c
+void GenerateB(enum incpol which,doublecomplex *x);
+// iterative.c
+int IterativeSolver(enum iter method);
+
 //============================================================
 
 static void ComputeMuellerMatrix(double matrix[4][4], const doublecomplex s1,const doublecomplex s2,
@@ -111,37 +111,6 @@ static void ComputeMuellerMatrix(double matrix[4][4], const doublecomplex s1,con
 
 //============================================================
 
-static void ATT_UNUSED ComputeMuellerMatrixNorm(double matrix[4][4],const doublecomplex s1,
-	const doublecomplex s2,const doublecomplex s3,const doublecomplex s4)
-/* computer mueller matrix from scattering matrix elements s1, s2, s3, s4, according to formula
- * 3.16 from Bohren and Huffman; normalize all elements to S11 (except itself)
- */
-{
-	matrix[0][0] = 0.5*(cMultConRe(s1,s1)+cMultConRe(s2,s2)+cMultConRe(s3,s3)+cMultConRe(s4,s4));
-	matrix[0][1] = 0.5*(cMultConRe(s2,s2)-cMultConRe(s1,s1)+cMultConRe(s4,s4)-cMultConRe(s3,s3))
-	             / matrix[0][0];
-	matrix[0][2] = (cMultConRe(s2,s3)+cMultConRe(s1,s4))/matrix[0][0];
-	matrix[0][3] = (cMultConIm(s2,s3)-cMultConIm(s1,s4))/matrix[0][0];
-
-	matrix[1][0] = 0.5*(cMultConRe(s2,s2)-cMultConRe(s1,s1)+cMultConRe(s3,s3)-cMultConRe(s4,s4))
-	             / matrix[0][0];
-	matrix[1][1] = 0.5*(cMultConRe(s2,s2)+cMultConRe(s1,s1)-cMultConRe(s3,s3)-cMultConRe(s4,s4))
-	             / matrix[0][0];
-	matrix[1][2] = (cMultConRe(s2,s3)-cMultConRe(s1,s4))/matrix[0][0];
-	matrix[1][3] = (cMultConIm(s2,s3)+cMultConIm(s1,s4))/matrix[0][0];
-
-	matrix[2][0] = (cMultConRe(s2,s4)+cMultConRe(s1,s3))/matrix[0][0];
-	matrix[2][1] = (cMultConRe(s2,s4)-cMultConRe(s1,s3))/matrix[0][0];
-	matrix[2][2] = (cMultConRe(s1,s2)+cMultConRe(s3,s4))/matrix[0][0];
-	matrix[2][3] = (cMultConIm(s2,s1)+cMultConIm(s4,s3))/matrix[0][0];
-
-	matrix[3][0] = (cMultConIm(s4,s2)+cMultConIm(s1,s3))/matrix[0][0];
-	matrix[3][1] = (cMultConIm(s4,s2)-cMultConIm(s1,s3))/matrix[0][0];
-	matrix[3][2] = (cMultConIm(s1,s2)-cMultConIm(s3,s4))/matrix[0][0];
-	matrix[3][3] = (cMultConRe(s1,s2)-cMultConRe(s3,s4))/matrix[0][0];
-}
-
-//==============================================================
 INLINE void InitMuellerIntegrFile(const int type,const char * restrict fname,FILE * restrict * file,
 	char * restrict buf,const size_t buf_size,double * restrict *mult)
 /* If 'phi_int_type' matches 'type', appropriate file (name given by 'fname') is created (with
diff --git a/src/Makefile b/src/Makefile
index eba25a5e..a11c5b13 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -79,7 +79,7 @@ ifneq ($(if $(MAKECMDGOALS),$(if $(filter $(NONTRIVIAL),$(MAKECMDGOALS)),1,),1),
 # below are appended to the list specified elsewhere.
 # Full list of possible options is the following:
 VALID_OPTS := DEBUG DEBUGFULL FFT_TEMPERTON PRECISE_TIMING NOT_USE_LOCK ONLY_LOCKFILE NO_FORTRAN \
-              NO_CPP OVERRIDE_STDC_TEST OCL_READ_SOURCE_RUNTIME CLFFT_APPLE SPARSE
+              NO_CPP OVERRIDE_STDC_TEST OCL_READ_SOURCE_RUNTIME CLFFT_APPLE SPARSE USE_SSE3
 
 # Debug mode. By default, release configuration is used (no debug, no warnings, maximum
 # optimization). DEBUG turns on producing debugging symbols (-g) and warnings and brings
@@ -224,22 +224,42 @@ else
   $(info Release mode)
   DBGLVL := 0
 endif
-ifneq ($(filter FFT_TEMPERTON,$(OPTIONS)),)
-  $(info Temperton FFT)
-  CDEFS += -DFFT_TEMPERTON
-  ifeq ($(filter NO_FORTRAN,$(OPTIONS)),)
-    FSOURCE += cfft99D.f
-  else
-    $(error Temperton FFT (FFT_TEMPERTON) is implemented in Fortran, hence incompatible with NO_FORTRAN)
+ifneq ($(filter SPARSE,$(OPTIONS)),)
+  $(info Sparse (non-FFT) mode)
+  ifneq ($(filter FFT_TEMPERTON,$(OPTIONS)),)
+    $(error SPARSE turns off all FFT-related code, so it is incompatible with FFT_TEMPERTON)
+  endif
+  ifneq ($(filter CLFFT_APPLE,$(OPTIONS)),)
+    $(error SPARSE turns off all FFT-related code, so it is incompatible with CLFFT_APPLE)
+  endif
+  ifneq ($(filter PRECISE_TIMING,$(OPTIONS)),)
+    $(error SPARSE is currently incompatible with PRECISE_TIMING)
   endif
+  
+  CDEFS += -DSPARSE
 else
-  $(info FFTW3)
-  LDLIBS += -lfftw3
-  ifdef FFTW3_INC_PATH
-    CFLAGS += -I$(FFTW3_INC_PATH)
+  CSOURCE += fft.c
+  ifneq ($(filter FFT_TEMPERTON,$(OPTIONS)),)
+    $(info Temperton FFT)
+    CDEFS += -DFFT_TEMPERTON
+    ifeq ($(filter NO_FORTRAN,$(OPTIONS)),)
+      FSOURCE += cfft99D.f
+    else
+      $(error Temperton FFT (FFT_TEMPERTON) is implemented in Fortran, hence incompatible with NO_FORTRAN)
+    endif
+  else
+    $(info FFTW3)
+    LDLIBS += -lfftw3
+    ifdef FFTW3_INC_PATH
+      CFLAGS += -I$(FFTW3_INC_PATH)
+    endif
+    ifdef FFTW3_LIB_PATH
+      LDFLAGS += -L$(FFTW3_LIB_PATH)
+    endif
   endif
-  ifdef FFTW3_LIB_PATH
-    LDFLAGS += -L$(FFTW3_LIB_PATH)
+  ifneq ($(filter CLFFT_APPLE,$(OPTIONS)),)
+    # Here only the info is printed, the main logic is in ocl/Makefile
+    $(info Apple clFFT routines)
   endif
 endif
 ifneq ($(filter PRECISE_TIMING,$(OPTIONS)),)
@@ -270,24 +290,15 @@ ifneq ($(filter OVERRIDE_STDC_TEST,$(OPTIONS)),)
   $(info Overriding test for C99 conformance)
   CDEFS += -DOVERRIDE_STDC_TEST
 endif
-
-ifneq ($(filter SPARSE,$(OPTIONS)),)
-  CDEFS += -DADDA_SPARSE
-else
-  CSOURCE += fft.c
-endif
 ifneq ($(filter USE_SSE3,$(OPTIONS)),)
   CDEFS += -DUSE_SSE3
   CFLAGS += -msse3
+  $(info Using SSE3 optimizations)
 endif
 ifneq ($(filter OCL_READ_SOURCE_RUNTIME,$(OPTIONS)),)
   # Here only the info is printed, the main logic is in ocl/Makefile
   $(info Read CL sources at runtime)
 endif
-ifneq ($(filter CLFFT_APPLE,$(OPTIONS)),)
-  # Here only the info is printed, the main logic is in ocl/Makefile
-  $(info Apple clFFT routines)
-endif
 # Process EXTRA_FLAGS
 ifneq ($(strip $(EXTRA_FLAGS)),)
   $(info Extra compiler options: '$(EXTRA_FLAGS)')
@@ -446,12 +457,12 @@ clean: cleanseq cleanmpi cleanocl
 # compilation and thus contain quite heavy processing.
 cleanseq:
 	@echo "Removing sequential compiled files"
-	cd $(SEQ) && rm -f *.o *.d *.d.* $(OPTSFILES) $(PROGSEQ) $(PROGSEQ).exe
+	cd $(SEQ) && rm -f *.o *.d $(OPTSFILES) $(PROGSEQ) $(PROGSEQ).exe
 
 cleanmpi:
 	@echo "Removing MPI compiled files"
-	cd $(MPI) && rm -f *.o *.d *.d.* $(OPTSFILES) $(PROGMPI) $(PROGMPI).exe
+	cd $(MPI) && rm -f *.o *.d $(OPTSFILES) $(PROGMPI) $(PROGMPI).exe
 
 cleanocl:
 	@echo "Removing OpenCL compiled files"
-	cd $(OCL) && rm -f *.o *.d *.d.* $(OPTSFILES) $(PROGOCL) $(PROGOCL).exe *.clstr
+	cd $(OCL) && rm -f *.o *.d $(OPTSFILES) $(PROGOCL) $(PROGOCL).exe *.clstr
diff --git a/src/calculator.c b/src/calculator.c
index fa29529c..c3a08ea3 100644
--- a/src/calculator.c
+++ b/src/calculator.c
@@ -3,7 +3,7 @@
  * Descr: all the initialization is done here before actually calculating internal fields; includes
  *        calculation of couple constants
  *
- * Copyright (C) 2006-2010,2012 ADDA contributors
+ * Copyright (C) 2006-2010,2013 ADDA contributors
  * This file is part of ADDA.
  *
  * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU
@@ -27,6 +27,7 @@
 #include "interaction.h"
 #include "io.h"
 #include "memory.h"
+#include "oclcore.h"
 #include "Romberg.h"
 #include "timing.h"
 #include "vars.h"
@@ -35,10 +36,6 @@
 #include <stdlib.h>
 #include <string.h>
 
-#ifdef OPENCL
-#	include "oclcore.h"
-#endif
-
 // SEMI-GLOBAL VARIABLES
 
 // defined and initialized in crosssec.c
@@ -64,26 +61,27 @@ double dtheta_deg,dtheta_rad; // delta theta in degrees and radians
 	// amplitude matrix for different values of alpha
 doublecomplex * restrict ampl_alphaX,* restrict ampl_alphaY;
 double * restrict muel_alpha; // mueller matrix for different values of alpha
-// used in fft.c
-  // tables of integrals
-double * restrict tab1,* restrict tab2,* restrict tab3,* restrict tab4,* restrict tab5,
-	* restrict tab6,* restrict tab7,* restrict tab8,* restrict tab9,* restrict tab10;
-/* it is preferable to declare the following as "* restrict * restrict", but it is hard to make it
- * generally compatible with Free_iMatrix function syntax. However, it is defined so in fft.c.
- */
-int ** restrict tab_index; // matrix for indexing of table arrays
+
 // used in crosssec.c
 double * restrict E2_alldir; // square of E, calculated for alldir
 double * restrict E2_alldir_buffer; // buffer to accumulate E2_alldir
 doublecomplex cc[MAX_NMAT][3]; // couple constants
 	// arrays of exponents along 3 axes (for calc_field)
+#ifndef SPARSE
 doublecomplex * restrict expsX,* restrict expsY,* restrict expsZ;
+#endif
+
 // used in iterative.c
 doublecomplex *rvec;                 // current residual
 doublecomplex * restrict Avecbuffer; // used to hold the result of matrix-vector products
 // auxiliary vectors, used in some iterative solvers (with more meaningful names)
 doublecomplex * restrict vec1,* restrict vec2,* restrict vec3;
 
+#ifdef SPARSE
+// used in matvec.c
+doublecomplex * restrict arg_full; // vector to hold argvec for all dipoles
+#endif
+
 // LOCAL VARIABLES
 
 static size_t block_theta; // size of one block of mueller matrix - 16*nTheta
@@ -95,9 +93,9 @@ static double * restrict out;
 // EXTERNAL FUNCTIONS
 
 // CalculateE.c
-extern int CalculateE(enum incpol which,enum Eftype type);
-extern void MuellerMatrix(void);
-extern void SaveMuellerAndCS(double * restrict in);
+int CalculateE(enum incpol which,enum Eftype type);
+void MuellerMatrix(void);
+void SaveMuellerAndCS(double * restrict in);
 
 //============================================================
 
@@ -247,91 +245,6 @@ static void InitCC(const enum incpol which)
 
 //============================================================
 
-static double * ATT_MALLOC ReadTableFile(const char * restrict sh_fname,const int size_multiplier)
-{
-	FILE * restrict ftab;
-	double * restrict tab_n;
-	int size;
-	char fname[MAX_FNAME];
-	int i;
-
-	size=TAB_SIZE*size_multiplier;
-	memory+=size*sizeof(double);
-	if (!prognosis) {
-		// allocate memory for tab_n
-		MALLOC_VECTOR(tab_n,double,size,ALL);
-		// open file
-		SnprintfErr(ALL_POS,fname,MAX_FNAME,TAB_PATH"%s",sh_fname);
-		ftab=FOpenErr(fname,"r",ALL_POS);
-		// scan file
-		for (i=0; i<size; i++) if (fscanf(ftab,"%lf\t",&(tab_n[i]))!=1)
-			LogError(ALL_POS,"Scan error in file '%s'. Probably file is too small",fname);
-		if (!feof(ftab))
-			LogWarning(EC_WARN,ONE_POS,"File '%s' is longer than specified size (%d)",fname,size);
-		// close file
-		FCloseErr(ftab,fname,ALL_POS);
-		return tab_n;
-	}
-	else return NULL;
-}
-
-//============================================================
-
-static void ReadTables(void)
-{
-	int i, j, ymax, Rm2, Rm2x;
-
-	TIME_TYPE tstart=GET_TIME();
-	tab1=ReadTableFile(TAB_FNAME(1),1);
-	tab2=ReadTableFile(TAB_FNAME(2),6);
-	tab3=ReadTableFile(TAB_FNAME(3),3);
-	tab4=ReadTableFile(TAB_FNAME(4),18);
-	tab5=ReadTableFile(TAB_FNAME(5),6);
-	tab6=ReadTableFile(TAB_FNAME(6),36);
-	tab7=ReadTableFile(TAB_FNAME(7),1);
-	tab8=ReadTableFile(TAB_FNAME(8),6);
-	tab9=ReadTableFile(TAB_FNAME(9),1);
-	tab10=ReadTableFile(TAB_FNAME(10),6);
-	Timing_FileIO += GET_TIME() - tstart;
-
-	if (!prognosis) {
-		// allocate memory for tab_index
-		MALLOC_IMATRIX(tab_index,1,TAB_RMAX,0,TAB_RMAX,ALL);
-		// fill tab_index
-		Rm2=TAB_RMAX*TAB_RMAX;
-		tab_index[1][0] = 0;
-		for (i=1; i<=TAB_RMAX; i++) {
-			Rm2x=Rm2-i*i;
-			ymax = MIN(i,(int)floor(sqrt(Rm2x)));
-			for (j=0; j<ymax; j++) {
-				tab_index[i][j+1] = tab_index[i][j] + MIN(j,(int)floor(sqrt(Rm2x-j*j)))+1;
-			}
-			if (i<TAB_RMAX) tab_index[i+1][0] = tab_index[i][ymax]
-			                                  + MIN(ymax,(int)floor(sqrt(Rm2x-ymax*ymax)))+1;
-		}
-	}
-	// PRINTZ("P[5,3]=%d (should be 41)\n",tab_index[5][3]);
-}
-
-//============================================================
-
-static void FreeTables(void)
-{
-	Free_iMatrix(tab_index,1,TAB_RMAX,0);
-	Free_general(tab1);
-	Free_general(tab2);
-	Free_general(tab3);
-	Free_general(tab4);
-	Free_general(tab5);
-	Free_general(tab6);
-	Free_general(tab7);
-	Free_general(tab8);
-	Free_general(tab9);
-	Free_general(tab10);
-}
-
-//============================================================
-
 static void calculate_one_orientation(double * restrict res)
 // performs calculation for one orientation; may do orientation averaging and put the result in res
 {
@@ -426,12 +339,12 @@ static void AllocateEverything(void)
 		MALLOC_VECTOR(Avecbuffer,complex,local_nRows,ALL);
 	}
 	memory+=5*tmp;
-#ifdef ADDA_SPARSE
+#ifdef SPARSE
 	if (!prognosis) {
 		MALLOC_VECTOR(arg_full,complex,3*nvoid_Ndip,ALL);
 	}
 	memory+=3*nvoid_Ndip*sizeof(*arg_full);
-#endif //ADDA_SPARSE
+#endif // !SPARSE
 	/* additional vectors for iterative methods. Potentially, this procedure can be fully automated
 	 * for any new iterative solver, based on the information contained in structure array 'params'
 	 * in file iterative.c. However, this requires different order of function calls to extract this
@@ -458,11 +371,11 @@ static void AllocateEverything(void)
 	 * non-zero, then change the above condition to allocate memory for these vectors. Variable
 	 * memory should be incremented to reflect the total allocated memory.
 	 */
-#ifndef ADDA_SPARSE
+#ifndef SPARSE
 	MALLOC_VECTOR(expsX,complex,boxX,ALL);
 	MALLOC_VECTOR(expsY,complex,boxY,ALL);
 	MALLOC_VECTOR(expsZ,complex,local_Nz_unif,ALL);
-#endif //ADDA_SPARSE
+#endif // !SPARSE
 	if (yzplane) {
 		tmp=2*(double)nTheta;
 		if (!prognosis) {
@@ -584,25 +497,16 @@ static void FreeEverything(void)
  * actually allocated.
  */
 {
-	if (IntRelation == G_SO || IntRelation == G_IGT_SO) FreeTables();
-#ifndef ADDA_SPARSE	
+	FreeInteraction();
+#ifndef SPARSE	
 	Free_FFT_Dmat();
 	Free_cVector(expsX);
 	Free_cVector(expsY);
 	Free_cVector(expsZ);
+	Free_general(position); // allocated in MakeParticle();
 #else	
-	Free_general(position_full);
-	Free_general(material_full);
-#ifdef PARALLEL
-	Free_general(arg_full);
-	Free_general(proc_mem_position);
-	Free_general(proc_mem_material);
-	Free_general(proc_mem_argvec);
-	Free_general(proc_disp_position);
-	Free_general(proc_disp_material);
-	Free_general(proc_disp_argvec);
-#endif //PARALLEL
-#endif //ADDA_SPARSE
+	Free_general(position_full); // allocated in MakeParticle();
+#endif // SPARSE
 
 	Free_cVector(xvec);
 	Free_cVector(rvec);
@@ -657,9 +561,8 @@ static void FreeEverything(void)
 		Free_general(Egrid_buffer);
 #endif
 	}
-	// these 3 were allocated in MakeParticle
+	// these 2 were allocated in MakeParticle
 	Free_general(DipoleCoord);
-	Free_general(position);
 	Free_general(material);
 
 	if (orient_avg) {
@@ -702,19 +605,14 @@ void Calculator (void)
 	}
 	else dtheta_deg=dtheta_rad=block_theta=0;
 	finish_avg=false;
-	// read tables if needed
-	if (IntRelation == G_SO || IntRelation == G_IGT_SO) ReadTables();
-
 	// Do preliminary setup for MatVec
 	InitInteraction();
-
-#ifndef ADDA_SPARSE
+#ifndef SPARSE
 	// initialize D matrix (for matrix-vector multiplication)
 	D("InitDmatrix started");
 	InitDmatrix();
 	D("InitDmatrix finished");
-#endif //ADDA_SPARSE
-
+#endif // !SPARSE
 	// allocate most (that is not already allocated; perform memory analysis
 	AllocateEverything();
 	// finish initialization
diff --git a/src/cmplx.h b/src/cmplx.h
index 90623ba8..78c491e2 100644
--- a/src/cmplx.h
+++ b/src/cmplx.h
@@ -882,6 +882,9 @@ INLINE double Rad2Deg(const double rad)
 }
 
 #ifdef USE_SSE3
+
+//============================================================
+
 static inline __m128d cmul(__m128d a,__m128d b)
 // complex multiplication
 {
@@ -894,12 +897,14 @@ static inline __m128d cmul(__m128d a,__m128d b)
 	return _mm_addsub_pd(t,b);
 }
 
+//============================================================
+
 static inline __m128d cadd(__m128d a,__m128d b)
 // complex addition
 {
 	return _mm_add_pd(a,b);
 }
 
-#endif //USE_SSE3
+#endif // USE_SSE3
 
 #endif // __cmplx_h
diff --git a/src/comm.c b/src/comm.c
index 19f16517..8b87b41a 100644
--- a/src/comm.c
+++ b/src/comm.c
@@ -3,7 +3,7 @@
  * Descr: incorporates all parallelization related code, so most of it is directly involved in or
  *        closely related to interprocess communication
  *
- * Copyright (C) 2006-2012 ADDA contributors
+ * Copyright (C) 2006-2013 ADDA contributors
  * This file is part of ADDA.
  *
  * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU
@@ -35,7 +35,7 @@
 #include <string.h>
 
 #ifdef ADDA_MPI
-MPI_Datatype mpi_dcomplex,mpi_double3,mpi_dcomplex3;  // combined datatypes
+MPI_Datatype mpi_dcomplex,mpi_int3,mpi_double3,mpi_dcomplex3;  // combined datatypes
 int *recvcounts,*displs; // arrays of size ringid required for AllGather operations
 bool displs_init=false;  // whether arrays above are initialized
 #endif
@@ -46,31 +46,25 @@ bool displs_init=false;  // whether arrays above are initialized
  */
 #define SYNCHRONIZE_TIMING
 
+#ifdef PARALLEL
+#ifndef SPARSE
+
 // SEMI-GLOBAL VARIABLES
 
 // defined and allocated in fft.c
 extern double * restrict BT_buffer, * restrict BT_rbuffer;
+
 // defined and initialized in timing.c
 extern TIME_TYPE Timing_InitDmComm;
 
 // LOCAL VARIABLES
 
-#ifdef PARALLEL
-
 static int Ntrans;         // number of transmissions; used in CalcPartner
-#ifndef ADDA_SPARSE
 static int * restrict gr_comm_size;  // sizes of transmissions for granule generator communications
 static int * restrict gr_comm_overl; // shows whether two sequential transmissions overlap
 static unsigned char * restrict gr_comm_ob; // buffer for overlaps
 static void * restrict gr_comm_buf;         // buffer for MPI transfers
-#else //ADDA_SPARSE
-int * proc_mem_position;
-int * proc_mem_material;
-int * proc_mem_argvec; 
-int * proc_disp_position;
-int * proc_disp_material;
-int * proc_disp_argvec; 
-#endif //ADDA_SPARSE
+#endif // !SPARSE
 
 
 // First several functions are defined only in parallel mode
@@ -91,7 +85,8 @@ static void RecoverCommandLine(int *argc_p,char ***argv_p)
 }
 //============================================================
 
-#ifndef ADDA_SPARSE
+#ifndef SPARSE
+
 INLINE size_t IndexBlock(const size_t x,const size_t y,const size_t z,const size_t lengthY)
 // index block; used in BlockTranspose
 {
@@ -119,7 +114,7 @@ INLINE int CalcPartner(const int tran)
 	}
 	return part;
 }
-#endif //ADDA_SPARSE
+#endif // !SPARSE
 
 //============================================================
 
@@ -181,6 +176,13 @@ static MPI_Datatype MPIVarType(var_type type,bool reduce,int *mult)
 		}
 		else res=mpi_dcomplex;
 	}
+	else if (type==int3_type) {
+		if (reduce) {
+			res=MPI_INT;
+			*mult=3;
+		}
+		else res=mpi_int3;
+	}
 	else if (type==double3_type) {
 		if (reduce) {
 			res=MPI_DOUBLE;
@@ -224,7 +226,9 @@ void InitDispls(void)
 //============================================================
 
 void AllGather(void * restrict x_from,void * restrict x_to,const var_type type,TIME_TYPE *timing)
-// Gather distributed arrays; works for all types;increments 'timing' (if not NULL) by the time used
+/* Gather distributed arrays; works for all types; x_from can be NULL then the data from x_to is used (in_place)
+ * increments 'timing' (if not NULL) by the time used
+ */
 {
 #ifdef ADDA_MPI
 	MPI_Datatype mes_type;
@@ -240,7 +244,8 @@ void AllGather(void * restrict x_from,void * restrict x_to,const var_type type,T
 	}
 	InitDispls(); // actually initialization is done only once
 	mes_type=MPIVarType(type,false,NULL);
-	MPI_Allgatherv(x_from,local_nvoid_Ndip,mes_type,x_to,recvcounts,displs,mes_type,MPI_COMM_WORLD);
+	if (x_from==NULL) MPI_Allgatherv(MPI_IN_PLACE,0,mes_type,x_to,recvcounts,displs,mes_type,MPI_COMM_WORLD);
+	else MPI_Allgatherv(x_from,local_nvoid_Ndip,mes_type,x_to,recvcounts,displs,mes_type,MPI_COMM_WORLD);
 	if (timing!=NULL) (*timing)+=GET_TIME()-tstart;
 #endif
 }
@@ -269,15 +274,19 @@ void InitComm(int *argc_p UOIP,char ***argv_p UOIP)
 	// initialize ringid and nprocs
 	MPI_Comm_rank(MPI_COMM_WORLD,&ringid);
 	MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
+#ifndef SPARSE
 	// initialize Ntrans
 	if (IS_EVEN(nprocs)) Ntrans=nprocs-1;
 	else Ntrans=nprocs;
+#endif
 	// define a few derived datatypes
 	/* this is intimately tied to the type definition of doublecomplex; when switching to C99
 	 * complex datatypes, this should be replaced just by MPI_DOUBLE_COMPLEX.
 	 */
 	MPI_Type_contiguous(2,MPI_DOUBLE,&mpi_dcomplex);
 	MPI_Type_commit(&mpi_dcomplex);
+	MPI_Type_contiguous(3,MPI_INT,&mpi_int3);
+	MPI_Type_commit(&mpi_int3);
 	MPI_Type_contiguous(3,MPI_DOUBLE,&mpi_double3);
 	MPI_Type_commit(&mpi_double3);
 	MPI_Type_contiguous(3,mpi_dcomplex,&mpi_dcomplex3);
@@ -294,12 +303,12 @@ void InitComm(int *argc_p UOIP,char ***argv_p UOIP)
 	ringid=ADDA_ROOT;
 #endif
 
-#ifndef ADDA_SPARSE //CheckNprocs does not exist in sparse mode
+#ifndef SPARSE // CheckNprocs does not exist in sparse mode
 	/* check if weird number of processors is specified; called even in sequential mode to
 	 * initialize weird_nprocs
 	 */
 	CheckNprocs();
-#endif //ADDA_SPARSE
+#endif // !SPARSE
 }
 
 //============================================================
@@ -467,12 +476,10 @@ void MyInnerProduct(void * restrict data UOIP,const var_type type UOIP,size_t n_
 void ParSetup(void)
 // initialize common parameters; need to do in the beginning to enable call to MakeParticle
 {
-
-#ifndef ADDA_SPARSE //FFT mode initialization
-
-#ifdef PARALLEL
+#ifndef SPARSE //FFT mode initialization
+#	ifdef PARALLEL
 	int unitZ,unitX;
-#endif
+#	endif
 	// calculate size of 3D grid
 	gridX=fftFit(2*boxX,nprocs);
 	gridY=fftFit(2*boxY,1);
@@ -484,7 +491,7 @@ void ParSetup(void)
 	 * except for XY values, used in granule generator
 	 */
 	gridYZ=MultOverflow(gridY,gridZ,ALL_POS,"gridYZ");
-#ifdef PARALLEL
+#	ifdef PARALLEL
 	unitZ=smallZ/nprocs; // this should always be an exact division
 	local_z0=ringid*unitZ;
 	local_z1=(ringid+1)*unitZ;
@@ -493,13 +500,13 @@ void ParSetup(void)
 	unitX=gridX/nprocs;
 	local_x0=ringid*unitX;
 	local_x1=(ringid+1)*unitX;
-#else
+#	else
 	local_z0=0;
 	local_z1=smallZ;
 	local_z1_coer=boxZ;
 	local_x0=0;
 	local_x1=gridX;
-#endif
+#	endif
 	if (local_z1_coer<=local_z0) {
 		LogWarning(EC_INFO,ALL_POS,"No real dipoles are assigned");
 		local_z1_coer=local_z0;
@@ -509,29 +516,30 @@ void ParSetup(void)
 	boxXY=boxX*(size_t)boxY; // overflow check is covered by gridYZ above
 	local_Ndip=MultOverflow(boxXY,local_z1_coer-local_z0,ALL_POS,"local_Ndip");
 	D("%i :  %i %i %i %zu %zu \n",ringid,local_z0,local_z1_coer,local_z1,local_Ndip,local_Nx);
-	
-#else //sparse mode initialization (completely different from FFT mode)
-
-#ifdef PARALLEL
+#else // SPARSE
+	/* For sparse mode, nvoid_Ndip is defined in InitDipFile(), and here we define local_nvoid_d0 and local_nvoid_d1,
+	 * since they are required already in ReadDipFile()
+	 */
+	D("%zu",nvoid_Ndip);
+#	ifdef PARALLEL
 	double unit_f=1.0/nprocs;
 	//making sure that the first local_f0 and last local_f1 are 0.0 and 1.0
-	local_f0 = (ringid == 0) ? 0.0 : ringid*unit_f;
-	local_f1 = (ringid == nprocs-1) ? 1.0 : (ringid+1)*unit_f; 
-#else
-	local_f0 = 0.0;
-	local_f1 = 1.0;
-#endif //PARALLEL
-
-#endif //ADDA_SPARSE
+	double local_f0 = (ringid == 0) ? 0.0 : ringid*unit_f;
+	double local_f1 = (ringid == nprocs-1) ? 1.0 : (ringid+1)*unit_f;
+	local_nvoid_d0 = local_f0*nvoid_Ndip;
+	local_nvoid_d1 = local_f1*nvoid_Ndip;
+#	else
+	local_nvoid_d0=0;
+	local_nvoid_d1=nvoid_Ndip;
+#	endif // PARALLEL
+#endif
 }
 
 //============================================================
 
 void SetupLocalD(void)
-// initialize local_nvoid_d0 and local_nvoid_d1
+// initialize local_nvoid_d0 and local_nvoid_d1 in FFT mode. In sparse mode those are set earlier in ParSetup()
 {
-#ifndef ADDA_SPARSE //FFT mode
- 
 #ifdef ADDA_MPI
 	/* use of exclusive scan (MPI_Exscan) is logically more suitable, but it has special behavior
 	 * for the ringid=0. The latter would require special additional arrangements.
@@ -542,19 +550,11 @@ void SetupLocalD(void)
 	local_nvoid_d0=0;
 	local_nvoid_d1=local_nvoid_Ndip;
 #endif
-
-#else // sparse mode
-
-	D("%d", (int)nvoid_Ndip);
-	local_nvoid_d0 = local_f0*nvoid_Ndip;
-	local_nvoid_d1 = local_f1*nvoid_Ndip;
-
-#endif //ADDA_SPARSE
 }
 
 //============================================================
 
-#ifndef ADDA_SPARSE
+#ifndef SPARSE
 
 void BlockTranspose(doublecomplex * restrict X UOIP,TIME_TYPE *timing UOIP)
 /* do the data-transposition, i.e. exchange, between fftX and fftY&fftZ; specializes at Xmatrix;
@@ -866,70 +866,4 @@ bool ExchangePhaseShifts(doublecomplex * restrict bottom, doublecomplex * restri
 
 #endif // PARALLEL
 
-#endif //ADDA_SPARSE
-
-//============================================================
-
-#ifdef ADDA_SPARSE //functions exclusive to the sparse mode
-
-void InitArraySync() {
-#ifdef ADDA_MPI
-	MALLOC_VECTOR(proc_mem_position,int,nprocs,ALL);
-	MALLOC_VECTOR(proc_mem_material,int,nprocs,ALL);
-	MALLOC_VECTOR(proc_mem_argvec,int,nprocs,ALL);
-	MALLOC_VECTOR(proc_disp_position,int,nprocs,ALL);
-	MALLOC_VECTOR(proc_disp_material,int,nprocs,ALL);
-	MALLOC_VECTOR(proc_disp_argvec,int,nprocs,ALL);
-
-	proc_mem_position[ringid] = 3*local_nvoid_Ndip;
-	proc_mem_material[ringid] = local_nvoid_Ndip;
-	proc_mem_argvec[ringid] = 3*local_nvoid_Ndip*2; //*2 due to 2 doubles in complex
-	proc_disp_position[ringid] = 3*local_nvoid_d0;
-	proc_disp_material[ringid] = local_nvoid_d0;
-	proc_disp_argvec[ringid] = 3*local_nvoid_d0*2;
-	
-	MPI_Allgather(MPI_IN_PLACE,0,MPI_INT,proc_mem_position,1,MPI_INT,MPI_COMM_WORLD);
-	MPI_Allgather(MPI_IN_PLACE,0,MPI_INT,proc_mem_material,1,MPI_INT,MPI_COMM_WORLD);
-	MPI_Allgather(MPI_IN_PLACE,0,MPI_INT,proc_mem_argvec,1,MPI_INT,MPI_COMM_WORLD);
-	MPI_Allgather(MPI_IN_PLACE,0,MPI_INT,proc_disp_position,1,MPI_INT,MPI_COMM_WORLD);
-	MPI_Allgather(MPI_IN_PLACE,0,MPI_INT,proc_disp_material,1,MPI_INT,MPI_COMM_WORLD);
-	MPI_Allgather(MPI_IN_PLACE,0,MPI_INT,proc_disp_argvec,1,MPI_INT,MPI_COMM_WORLD);
-#endif
-}
-
-//============================================================
-
-void SyncPosition(int * restrict pos)
-{
-#ifdef ADDA_MPI
-	static const MPI_Datatype mes_type = MPI_INT;
-	
-	MPI_Allgatherv(MPI_IN_PLACE, 0, mes_type,pos, proc_mem_position,
-	               proc_disp_position, mes_type, MPI_COMM_WORLD);
-		
-#endif
-}
-
-//============================================================
-
-void SyncMaterial(unsigned char * restrict mat)
-{
-#ifdef ADDA_MPI	
-	static const MPI_Datatype mes_type = MPI_CHAR;
-	MPI_Allgatherv(MPI_IN_PLACE, 0, mes_type, mat, proc_mem_material,
-	               proc_disp_material, mes_type, MPI_COMM_WORLD);
-#endif
-}
-
-//============================================================
-
-void SyncArgvec()
-{
-#ifdef ADDA_MPI
-	static const MPI_Datatype mes_type = MPI_DOUBLE;
-	MPI_Allgatherv(MPI_IN_PLACE, 0, mes_type,(double *)arg_full, proc_mem_argvec,
-	               proc_disp_argvec, mes_type, MPI_COMM_WORLD);
-#endif
-}
-
-#endif //ADDA_SPARSE
+#endif // !SPARSE
diff --git a/src/comm.h b/src/comm.h
index c0ecb4dd..d0e0e8cd 100644
--- a/src/comm.h
+++ b/src/comm.h
@@ -2,7 +2,7 @@
  * $Date::                            $
  * Descr: definitions of communication global variables and routines
  *
- * Copyright (C) 2006-2012 ADDA contributors
+ * Copyright (C) 2006-2013 ADDA contributors
  * This file is part of ADDA.
  *
  * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU
@@ -31,27 +31,11 @@
 #define UOIP ATT_UNUSED
 #endif
 
-typedef enum {uchar_type,int_type,sizet_type,double_type,double3_type,cmplx_type,cmplx3_type} var_type;
-
-#ifdef PARALLEL
-#ifdef ADDA_SPARSE
-extern int * proc_mem_position;
-extern int * proc_mem_material;
-extern int * proc_mem_argvec; 
-extern int * proc_disp_position;
-extern int * proc_disp_material;
-extern int * proc_disp_argvec; 
-#endif //ADDA_SPARSE
-#endif //PARALLEL
+typedef enum {uchar_type,int_type,int3_type,sizet_type,double_type,double3_type,cmplx_type,cmplx3_type} var_type;
 
 void Stop(int) ATT_NORETURN;
 void Synchronize(void);
-#ifndef ADDA_SPARSE
-void BlockTranspose(doublecomplex * restrict X,TIME_TYPE *timing);
-void BlockTranspose_Dm(doublecomplex * restrict X,size_t lengthY,size_t lengthZ);
-#endif //ADDA_SPARSE
 double AccumulateMax(double data,double *max);
-
 void Accumulate(double * restrict data,size_t size,double * restrict buffer,TIME_TYPE *timing);
 void MyInnerProduct(void * restrict data,var_type type,size_t n_elem,TIME_TYPE *timing);
 void InitComm(int *argc_p,char ***argv_p);
@@ -59,21 +43,17 @@ void ParSetup(void);
 void SetupLocalD(void);
 void MyBcast(void * restrict data,var_type type,const size_t n_elem,TIME_TYPE *timing);
 void BcastOrient(int *i,int *j,int *k);
+
+#ifndef SPARSE
+void BlockTranspose(doublecomplex * restrict X,TIME_TYPE *timing);
+void BlockTranspose_Dm(doublecomplex * restrict X,size_t lengthY,size_t lengthZ);
 // used by granule generator
-#ifndef ADDA_SPARSE
 void SetGranulComm(double z0,double z1,double gdZ,int gZ,size_t gXY,size_t buf_size,int *lz0,
                    int *lz1,int sm_gr);
 void CollectDomainGranul(unsigned char * restrict dom,size_t gXY,int lz0,int locgZ,TIME_TYPE *timing);
 void FreeGranulComm(int sm_gr);
 void ExchangeFits(char * restrict data,const size_t n,TIME_TYPE *timing);
-#endif //ADDA_SPARSE
-
-#ifdef ADDA_SPARSE
-void InitArraySync(void);
-void SyncPosition(int * restrict pos);
-void SyncMaterial(unsigned char * restrict mat);
-void SyncArgvec(void);
-#endif //ADDA_SPARSE
+#endif // !SPARSE
 
 #ifdef PARALLEL
 // these functions are defined only in parallel mode
diff --git a/src/const.h b/src/const.h
index 06aad603..26e0b1e3 100644
--- a/src/const.h
+++ b/src/const.h
@@ -21,7 +21,7 @@
 #define __const_h
 
 // version number (string)
-#define ADDA_VERSION "1.2b1"
+#define ADDA_VERSION "1.2b2"
 
 /* ADDA uses certain C99 extensions, which are widely supported by GNU and Intel compilers. However,
  * they may be not completely supported by e.g. Microsoft Visual Studio compiler. Therefore, we
diff --git a/src/crosssec.c b/src/crosssec.c
index 3ebc6da7..df0ec61a 100644
--- a/src/crosssec.c
+++ b/src/crosssec.c
@@ -3,7 +3,7 @@
  * Descr: all the functions to calculate scattering quantities (except Mueller matrix); to read
  *        different parameters from files; and initialize orientation of the particle
  *
- * Copyright (C) 2006-2012 ADDA contributors
+ * Copyright (C) 2006-2013 ADDA contributors
  * This file is part of ADDA.
  *
  * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU
@@ -41,7 +41,9 @@
 // defined and initialized in calculator.c
 extern double * restrict E2_alldir,* restrict E2_alldir_buffer;
 extern const doublecomplex cc[][3];
+#ifndef SPARSE
 extern doublecomplex * restrict expsX,* restrict expsY,* restrict expsZ;
+#endif
 // defined and initialized in GenerateB.c
 extern const double beam_center_0[3];
 // defined and initialized in param.c
@@ -117,26 +119,6 @@ void InitRotation (void)
 
 //=====================================================================
 
-static int ATT_UNUSED ReadLine(FILE * restrict file, // opened file
-	const char * restrict fname,                     // ... its filename
-	char * restrict buf,const int buf_size)          // buffer for line and its size
-// reads the first uncommented line; returns 1 if EOF reached
-{
-	while (!feof(file)) {
-		fgets(buf,buf_size,file);
-		if (*buf!='#') { // if uncommented
-			if (strstr(buf,"\n")==NULL && !feof(file)) LogError(ONE_POS,
-				"Buffer overflow while reading '%s' (size of uncommented line > %d)",
-				fname,buf_size-1);
-			else return 0; // complete line is read
-		} // finish reading the commented line
-		else while (strstr(buf,"\n")==NULL  && !feof(file)) fgets(buf,buf_size,file);
-	}
-	return 1;
-}
-
-//=====================================================================
-
 static void ReadLineStart(FILE *  restrict file,                  // opened file
 	                      const char * restrict fname,            // ... its filename
                           char * restrict buf,const int buf_size, // buffer for line and its size
@@ -526,7 +508,7 @@ void CalcField (doublecomplex * restrict ebuff, // where to write calculated sca
 	double temp, na;
 	doublecomplex mult_mat[MAX_NMAT];
 	const bool scat_avg=true; // temporary fixed option for SO formulation
-#ifdef ADDA_SPARSE
+#ifdef SPARSE
 	doublecomplex expX, expY, expZ;
 #endif
 
@@ -546,11 +528,11 @@ void CalcField (doublecomplex * restrict ebuff, // where to write calculated sca
 	}
 	for(i=0;i<3;i++) sum[i][RE]=sum[i][IM]=0.0;
 	// prepare values of exponents, along each of the coordinates
-#ifndef ADDA_SPARSE	
+#ifndef SPARSE	
 	imExp_arr(-kd*n[0],boxX,expsX);
 	imExp_arr(-kd*n[1],boxY,expsY);
 	imExp_arr(-kd*n[2],local_Nz_unif,expsZ);
-#endif //ADDA_SPARSE
+#endif // !SPARSE
 	/* not to double the code in the source we use two temporary defines,since the following 'if'
 	 * cases differ only by one line of code; (taking 'if' inside the cycle will affect performance)
 	 */
@@ -562,7 +544,7 @@ void CalcField (doublecomplex * restrict ebuff, // where to write calculated sca
 	 * the grid.
 	 */
 	 
-#ifndef ADDA_SPARSE //FFT mode
+#ifndef SPARSE //FFT mode
 #define PART1\
 	iy1=iz1=UNDEF;\
 	for (j=0;j<local_nvoid_Ndip;++j) {\
@@ -578,7 +560,7 @@ void CalcField (doublecomplex * restrict ebuff, // where to write calculated sca
 			cMult(expsY[iy2],expsZ[iz2],tmp);\
 		}\
 		cMult(tmp,expsX[ix],a);
-#else //sparse mode
+#else // sparse mode
 #define PART1\
 	iy1=iz1=UNDEF;\
 	for (j=0;j<local_nvoid_Ndip;++j) {\
@@ -597,7 +579,7 @@ void CalcField (doublecomplex * restrict ebuff, // where to write calculated sca
 		}\
 		imExp(-kd*n[0]*ix,expX);\
 		cMult(tmp,expX,a);
-#endif //ADDA_SPARSE
+#endif // SPARSE
 #define PART2\
 	/* sum(P*exp(-ik*r.n)) */\
 		for(i=0;i<3;i++) {\
@@ -1068,13 +1050,18 @@ void Frp_mat(double Finc_tot[static restrict 3],double Fsca_tot[static restrict
 	}
 	// check if it can work at all; check is redundant for sequential mode
 	size_t nRows=MultOverflow(3,nvoid_Ndip,ONE_POS_FUNC);
-	#ifdef PARALLEL
+#ifdef PARALLEL
 	/* Because of the parallelization by row-block decomposition the distributed arrays involved
 	 * need to be gathered on each node a) DipoleCoord -> rdipT; b) pvec -> pT.
 	 * Actually this routine is usually called for two polarizations and rdipT does not change
 	 * between the calls. So one AllGather of rdipT can be removed. Number of memory allocations can
 	 * also be reduced. But this should be replaced by Fourier anyway.
 	 */
+	/* The following is somewhat redundant in sparse mode, since "full" (containing information about all dipoles)
+	 * vectors are already present in that mode. However, we do not optimize it now, since in standard mode radiation
+	 * forces should be computed by FFT anyway. Moreover, there are certain ideas to optimize sparse mode, so it will
+	 * not use full vectors - if done, this improvement can be also adjusted to the code below.
+	 */
 	// allocates a lot of additional memory
 	MALLOC_VECTOR(rdipT,double,nRows,ALL);
 	MALLOC_VECTOR(pT,complex,nRows,ALL);
diff --git a/src/fft.c b/src/fft.c
index 501e3764..46f84760 100644
--- a/src/fft.c
+++ b/src/fft.c
@@ -1,7 +1,6 @@
 /* File: fft.c
  * $Date::                            $
- * Descr: initialization of all FFT for matrix-vector products; and FFT procedures themselves
-
+ * Descr: initialization of all FFT for matrix-vector products; and FFT procedures themselves; not used in sparse mode
  *        TODO: A lot of indirect indexing used - way to optimize.
  *
  * Copyright (C) 2006-2013 ADDA contributors
@@ -26,7 +25,9 @@
 #include "debug.h"
 #include "function.h"
 #include "io.h"
+#include "interaction.h"
 #include "memory.h"
+#include "oclcore.h"
 #include "prec_time.h"
 #include "vars.h"
 // system headers
@@ -34,16 +35,13 @@
 #include <stdlib.h>
 #include <string.h>
 
-#ifdef OPENCL
-#	ifdef CLFFT_AMD
-#		include <clAmdFft.h> //external library from AMD
-#	elif defined(CLFFT_APPLE)
-#		include "cpp/clFFT.h" //nearly unmodified APPLE FFT header file
-#		ifdef NO_CPP
-#			error "Apple clFFT relies on C++ sources, hence is incompatible with NO_CPP option"
-#		endif
+#ifdef CLFFT_AMD
+#	include <clAmdFft.h> //external library from AMD
+#elif defined(CLFFT_APPLE)
+#	include "cpp/clFFT.h" //nearly unmodified APPLE FFT header file
+#	ifdef NO_CPP
+#		error "Apple clFFT relies on C++ sources, hence is incompatible with NO_CPP option"
 #	endif
-#	include "oclcore.h"
 #endif
 /* standard FFT routines (FFTW3 of FFT_TEMPERTON) are required even when OpenCL is used, since
  * they are used for Fourier transform of the D-matrix
@@ -70,17 +68,6 @@
 
 // SEMI-GLOBAL VARIABLES
 
-// defined ant initialized in calculator.c
-extern const double * restrict tab1,* restrict tab2,* restrict tab3,* restrict tab4,* restrict tab5,
-	* restrict tab6,* restrict tab7,* restrict tab8,* restrict tab9,* restrict tab10;
-extern const int * restrict * restrict tab_index;
-
-// defined and initialized in make_particle.c
-extern double gridspace;
-
-// defined and initialized in param.c
-extern double igt_lim, igt_eps;
-
 // defined and initialized in timing.c
 extern TIME_TYPE Timing_FFT_Init,Timing_Dm_Init;
 
@@ -132,17 +119,6 @@ void cfft99_(double * restrict data,double * restrict _work,const double * restr
 	const int *isign);
 #endif
 
-// EXTERNAL FUNCTIONS
-
-#ifndef NO_FORTRAN
-// fort/propaesplibreintadda.f
-void propaespacelibreintadda_(const double *Rij,const double *ka,const double *arretecube,
-	const double *relreq, double *result);
-#endif
-
-// sinint.c
-void cisi(double x,double *ci,double *si);
-
 //============================================================
 
 INLINE size_t IndexDmatrix(const size_t x,size_t y,size_t z)
@@ -373,7 +349,7 @@ void fftY(const int isign)
 	if (isign==FFT_FORWARD) fftw_execute(planYf);
 	else fftw_execute(planYb);
 #elif defined(FFT_TEMPERTON)
-	int nn=gridY,inc=1,jump=nn,lot=3*gridZ,j;
+	int nn=gridY,inc=1,jump=nn,lot=3*gridZ;
 
 	cfft99_((double *)(slices_tr),work,trigsY,ifaxY,&inc,&jump,&nn,&lot,&isign);
 #endif
@@ -740,532 +716,6 @@ static void fftInitAfterD(void)
 
 //============================================================
 
-INLINE bool TestTableSize(const double rn)
-// tests if rn fits into the table; if not, returns false and produces info message
-{
-	static bool warned=false;
-
-	if (rn>TAB_RMAX) {
-		if (!warned) {
-			warned=true;
-			LogWarning(EC_INFO,ONE_POS,"Not enough table size (available only up to R/d=%d), "
-				"so O(kd^2) accuracy of Green's function is not guaranteed",TAB_RMAX);
-		}
-		return false;
-	}
-	else return true;
-}
-
-//============================================================
-
-static void CalcInterTerm(const int i,const int j,const int k,doublecomplex * restrict result)
-/* calculates interaction term between two dipoles; given integer distance vector {i,j,k}
- * (in units of d). All six components of the symmetric matrix are computed at once.
- */
-{
-	double rr,qvec[3],q2[3],invr3,qavec[3],av[3];
-	double kr,kr2,kr3,kd2,q4,rn,rn2;
-	double temp,qmunu[6],qa,qamunu[6],invrn,invrn2,invrn3,invrn4;
-	double dmunu[6]; // KroneckerDelta[mu,nu] - can serve both as multiplier, and as bool
-	double kfr,ci,si,ci1,si1,ci2,si2,brd,cov,siv,g0,g2;
-	doublecomplex expval,br,br1,m,m2,Gf1,Gm0,Gm1,Gc1,Gc2;
-	int ind0,ind1,ind2,ind2m,ind3,ind4,indmunu,comp,mu,nu,mu1,nu1;
-	int sigV[3],ic,sig,ivec[3],ord[3],invord[3];
-	double t3q,t3a,t4q,t4a,t5tr,t5aa,t6tr,t6aa;
-	const bool inter_avg=true; // temporary fixed option for SO formulation
-
-// this is used for debugging, should be empty define, when not required
-#define PRINT_GVAL /*printf("%d,%d,%d: %g%+gi, %g%+gi, %g%+gi,\n%g%+gi, %g%+gi, %g%+gi\n",\
-	i,j,k,result[0][RE],result[0][IM],result[1][RE],result[1][IM],result[2][RE],result[2][IM],\
-	result[3][RE],result[3][IM],result[4][RE],result[4][IM],result[5][RE],result[5][IM]);*/
-
-	// self interaction; self term is computed in different subroutine
-	if (i==0 && j==0 && k==0) for (comp=0;comp<NDCOMP;comp++) {
-		result[comp][RE]=result[comp][IM]=0;
-		return;
-	}
-	//====== calculate some basic constants ======
-	qvec[0]=i; // qvec is normalized below (after IGT)
-	qvec[1]=j;
-	qvec[2]=k;
-	rn2=DotProd(qvec,qvec);
-	rn=sqrt(rn2); // normalized r
-#ifndef NO_FORTRAN
-	if (IntRelation==G_IGT && (igt_lim==UNDEF || rn<=igt_lim)) { // a special case
-		double rtemp[3];
-		vMultScal(gridspace,qvec,rtemp);
-		propaespacelibreintadda_(rtemp,&WaveNum,&gridspace,&igt_eps,(double *)result);
-		PRINT_GVAL;
-		return;
-	}
-#endif
-	// a common part of the code (up to FCD...), which effectively implements G_POINT_DIP
-	invrn=1/rn;
-	vMultScalSelf(invrn,qvec); // finalize qvec
-	rr=rn*gridspace;
-	invr3=1/(rr*rr*rr);
-	kr=WaveNum*rr;
-	kr2=kr*kr;
-	kfr=PI*rn; // k_F*r, for FCD
-	// cov=cos(kr); siv=sin(kr); expval=Exp(ikr)/r^3
-	imExp(kr,expval);
-	cov=expval[RE];
-	siv=expval[IM];
-	cMultReal(invr3,expval,expval);
-	//====== calculate Gp ========
-	for (mu=0,comp=0;mu<3;mu++) for (nu=mu;nu<3;nu++,comp++) {
-		dmunu[comp]= mu==nu ? 1 : 0;
-		qmunu[comp]=qvec[mu]*qvec[nu];
-		// br=delta[mu,nu]*(-1+ikr+kr^2)-qmunu*(-3+3ikr+kr^2)
-		br[RE]=(3-kr2)*qmunu[comp];
-		br[IM]=-3*kr*qmunu[comp];
-		if(dmunu[comp]) {
-			br[RE]+=kr2-1;
-			br[IM]+=kr;
-		}
-		// result=Gp=expval*br
-		cMult(br,expval,result[comp]);
-	}
-	//====== FCD (static and full) ========
-	/* speed of FCD can be improved by using faster version of sici routine, using predefined
-	 * tables, etc (e.g. as is done in GSL library). But currently extra time for this computation
-	 * is already smaller than one main iteration.
-	 */
-	if (IntRelation==G_FCD_ST) {
-		/* FCD is based on Gay-Balmaz P., Martin O.J.F. "A library for computing the filtered and
-		 * non-filtered 3D Green's tensor associated with infinite homogeneous space and surfaces",
-		 * Comp. Phys. Comm. 144:111-120 (2002), and Piller N.B. "Increasing the performance of the
-		 * coupled-dipole approximation: A spectral approach", IEEE Trans.Ant.Propag. 46(8):
-		 * 1126-1137. Here it differs by a factor of 4*pi*k^2.
-		 */
-		// result = Gp*[3*Si(k_F*r)+k_F*r*cos(k_F*r)-4*sin(k_F*r)]*2/(3*pi)
-		cisi(kfr,&ci,&si);
-		brd=TWO_OVER_PI*ONE_THIRD*(3*si+kfr*cos(kfr)-4*sin(kfr));
-		for (comp=0;comp<NDCOMP;comp++) cMultReal(brd,result[comp],result[comp]);
-	}
-	else if (IntRelation==G_FCD) {
-		// ci,si_1,2 = ci,si_+,- = Ci,Si((k_F +,- k)r)
-		cisi(kfr+kr,&ci1,&si1);
-		cisi(kfr-kr,&ci2,&si2);
-		// ci=ci1-ci2; si=pi-si1-si2
-		ci=ci1-ci2;
-		si=PI-si1-si2;
-		g0=INV_PI*(siv*ci+cov*si);
-		g2=INV_PI*(kr*(cov*ci-siv*si)+2*ONE_THIRD*(kfr*cos(kfr)-4*sin(kfr)))-g0;
-		temp=g0*kr2;
-		for (comp=0;comp<NDCOMP;comp++) {
-			// brd=(delta[mu,nu]*(-g0*kr^2-g2)+qmunu*(g0*kr^2+3g2))/r^3
-			brd=qmunu[comp]*(temp+3*g2);
-			if (dmunu[comp]) brd-=temp+g2;
-			brd*=invr3;
-			// result=Gp+brd
-			result[comp][RE]+=brd;
-		}
-	}
-	//======= second order corrections ========
-	else if (IntRelation==G_IGT_SO) {
-		/* There is still some space for speed optimization here (e.g. move mu,nu-independent
-		 * operations out of the cycles over components).
-		 */
-		kd2=kd*kd;
-		if (kr*rn < G_BOUND_CLOSE && TestTableSize(rn)) {
-			//====== G close for IGT =============
-			ivec[0]=i;
-			ivec[1]=j;
-			ivec[2]=k;
-			// transformation of negative coordinates
-			for (ic=0;ic<3;ic++) {
-				if (ivec[ic]<0) {
-					sigV[ic]=-1;
-					qvec[ic]*=-1;
-					ivec[ic]*=-1;
-				}
-				else sigV[ic]=1;
-			}
-			// transformation to case i>=j>=k>=0
-			// building of ord; ord[x] is x-th largest coordinate (0-th - the largest)
-			if (ivec[0]>=ivec[1]) { // i>=j
-				if (ivec[0]>=ivec[2]) { // i>=k
-					ord[0]=0;
-					if (ivec[1]>=ivec[2]) { // j>=k
-						ord[1]=1;
-						ord[2]=2;
-					}
-					else {
-						ord[1]=2;
-						ord[2]=1;
-					}
-				}
-				else {
-					ord[0]=2;
-					ord[1]=0;
-					ord[2]=1;
-				}
-			}
-			else {
-				if (ivec[0]>=ivec[2]) { // i>=k
-					ord[0]=1;
-					ord[1]=0;
-					ord[2]=2;
-				}
-				else {
-					ord[2]=0;
-					if (ivec[1]>=ivec[2]) { // j>=k
-						ord[0]=1;
-						ord[1]=2;
-					}
-					else {
-						ord[0]=2;
-						ord[1]=1;
-					}
-				}
-			}
-			// change parameters according to coordinate transforms
-			Permutate(qvec,ord);
-			Permutate_i(ivec,ord);
-			// compute inverse permutation
-			memcpy(invord,ord,3*sizeof(int));
-			Permutate_i(invord,ord);
-			if (invord[0]==0 && invord[1]==1 && invord[2]==2) memcpy(invord,ord,3*sizeof(int));
-			// temp = kr/24; and set some indices
-			temp=kr/24;
-			ind0=tab_index[ivec[0]][ivec[1]]+ivec[2];
-			ind1=3*ind0;
-			ind2m=6*ind0;
-			// cycle over tensor components
-			for (mu=0,comp=0;mu<3;mu++) for (nu=mu;nu<3;nu++,comp++) {
-				sig=sigV[mu]*sigV[nu]; // sign of some terms below
-				/* indexes for tables of different dimensions based on transformed indices mu and nu
-				 * '...munu' variables are invariant to permutations because both constituent
-				 * vectors and indices are permutated. So this variables can be used, as precomputed
-				 * above.
-				 */
-				mu1=invord[mu];
-				nu1=invord[nu];
-				/* indmunu is a number of component[mu,nu] in a symmetric matrix, but counted
-				 * differently than comp. This is {{0,1,3},{1,2,4},{3,4,5}}
-				 */
-				indmunu=mu1+nu1;
-				if (mu1==2 || nu1==2) indmunu++;
-				ind2=ind2m+indmunu;
-				ind3=3*ind2;
-				ind4=6*ind2;
-				// computing several quantities with table integrals
-				t3q=DotProd(qvec,tab3+ind1);
-				t4q=DotProd(qvec,tab4+ind3);
-				t5tr=TrSym(tab5+ind2m);
-				t6tr=TrSym(tab6+ind4);
-				//====== computing Gc0 =====
-				/* br = delta[mu,nu]*(-I7-I9/2-kr*(i+kr)/24+2*t3q+t5tr)
-				 *    - (-3I8[mu,nu]-3I10[mu,nu]/2-qmunu*kr*(i+kr)/24+2*t4q+t6tr)
-				 */
-				br[RE]=sig*(3*(tab10[ind2]/2+tab8[ind2])-2*t4q-t6tr)+temp*qmunu[comp]*kr;
-				br[IM]=3*temp*qmunu[comp];
-				if (dmunu[comp]) {
-					br[RE]+=2*t3q+t5tr-temp*kr-tab9[ind0]/2-tab7[ind0];
-					br[IM]-=temp;
-				}
-				// br*=kd^2
-				cMultReal(kd2,br,br);
-				// br+=I1*delta[mu,nu]*(-1+ikr+kr^2)-sig*I2[mu,nu]*(-3+3ikr+kr^2)
-				br[RE]+=sig*tab2[ind2]*(3-kr2);
-				br[IM]-=sig*tab2[ind2]*3*kr;
-				if (dmunu[comp]) {
-					br[RE]+=tab1[ind0]*(kr2-1);
-					br[IM]+=tab1[ind0]*kr;
-				}
-				// Gc0=expval*br
-				cMult(expval,br,result[comp]);
-			}
-		}
-		else {
-			//====== Gfar (and part of Gmedian) for IGT =======
-			// Gf0 = Gp*(1-kd^2/24)
-			for (comp=0;comp<NDCOMP;comp++) cMultReal(1-kd2/24,result[comp],result[comp]);
-			if (kr < G_BOUND_MEDIAN) {
-				//===== G median for IGT ========
-				vSquare(qvec,q2);
-				q4=DotProd(q2,q2);
-				invrn2=invrn*invrn;
-				invrn3=invrn2*invrn;
-				invrn4=invrn2*invrn2;
-				for (mu=0,comp=0;mu<3;mu++) for (nu=mu;nu<3;nu++,comp++) {
-					// Gm0=expval*br*temp; temp is set anew
-					temp=qmunu[comp]*(33*q4-7-12*(q2[mu]+q2[nu]));
-					if (mu == nu) temp+=(1-3*q4+4*q2[mu]);
-					temp*=7*invrn4/64;
-					br[RE]=-1;
-					br[IM]=kr;
-					cMultReal(temp,br,Gm0);
-					cMultSelf(Gm0,expval);
-					// result = Gf + Gm0 + [ Gm1 ]
-					cAdd(Gm0,result[comp],result[comp]);
-				}
-			}
-		}
-	}
-	else if (IntRelation==G_SO) {
-		/* There is still some space for speed optimization here (e.g. move mu,nu-independent
-		 * operations out of the cycles over components). But now extra time is equivalent to 2-3
-		 * main iterations. So first priority is to make something useful out of SO.
-		 */
-		// next line should never happen
-		if (anisotropy) LogError(ONE_POS,"Incompatibility error in CalcInterTerm");
-		kd2=kd*kd;
-		kr3=kr2*kr;
-		// only one refractive index can be used for FFT-compatible algorithm
-		cEqual(ref_index[0],m);
-		cSquare(m,m2);
-		if (!inter_avg) {
-			qa=DotProd(qvec,prop);
-			for (mu=0,comp=0;mu<3;mu++) for (nu=mu;nu<3;nu++,comp++) {
-				// qamunu=qvec[mu]*prop[nu] + qvec[nu]*prop[mu]
-				qamunu[comp]=qvec[mu]*prop[nu];
-				if (dmunu[comp]) qamunu[comp]*=2;
-				else qamunu[comp]+=qvec[nu]*prop[mu];
-			}
-		}
-		if (kr*rn < G_BOUND_CLOSE && TestTableSize(rn)) {
-			//====== G close =============
-			// av is copy of propagation vector
-			if (!inter_avg) memcpy(av,prop,3*sizeof(double));
-			ivec[0]=i;
-			ivec[1]=j;
-			ivec[2]=k;
-			// transformation of negative coordinates
-			for (ic=0;ic<3;ic++) {
-				if (ivec[ic]<0) {
-					sigV[ic]=-1;
-					av[ic]*=-1;
-					qvec[ic]*=-1;
-					ivec[ic]*=-1;
-				}
-				else sigV[ic]=1;
-			}
-			// transformation to case i>=j>=k>=0
-			// building of ord; ord[x] is x-th largest coordinate (0-th - the largest)
-			if (ivec[0]>=ivec[1]) { // i>=j
-				if (ivec[0]>=ivec[2]) { // i>=k
-					ord[0]=0;
-					if (ivec[1]>=ivec[2]) { // j>=k
-						ord[1]=1;
-						ord[2]=2;
-					}
-					else {
-						ord[1]=2;
-						ord[2]=1;
-					}
-				}
-				else {
-					ord[0]=2;
-					ord[1]=0;
-					ord[2]=1;
-				}
-			}
-			else {
-				if (ivec[0]>=ivec[2]) { // i>=k
-					ord[0]=1;
-					ord[1]=0;
-					ord[2]=2;
-				}
-				else {
-					ord[2]=0;
-					if (ivec[1]>=ivec[2]) { // j>=k
-						ord[0]=1;
-						ord[1]=2;
-					}
-					else {
-						ord[0]=2;
-						ord[1]=1;
-					}
-				}
-			}
-			// change parameters according to coordinate transforms
-			Permutate(qvec,ord);
-			if (!inter_avg) Permutate(av,ord);
-			Permutate_i(ivec,ord);
-			// compute inverse permutation
-			memcpy(invord,ord,3*sizeof(int));
-			Permutate_i(invord,ord);
-			if (invord[0]==0 && invord[1]==1 && invord[2]==2) memcpy(invord,ord,3*sizeof(int));
-			// temp = kr/24; and set some indices
-			temp=kr/24;
-			ind0=tab_index[ivec[0]][ivec[1]]+ivec[2];
-			ind1=3*ind0;
-			ind2m=6*ind0;			// cycle over tensor components
-			for (mu=0,comp=0;mu<3;mu++) for (nu=mu;nu<3;nu++,comp++) {
-				sig=sigV[mu]*sigV[nu]; // sign of some terms below
-				/* indexes for tables of different dimensions based on transformed indices mu and nu
-				 * '...munu' variables are invariant to permutations because both constituent
-				 * vectors and indices are permutated. So this variables can be used, as precomputed
-				 * above.
-				 */
-				mu1=invord[mu];
-				nu1=invord[nu];
-				/* indmunu is a number of component[mu,nu] in a symmetric matrix, but counted
-				 * differently than comp. This is {{0,1,3},{1,2,4},{3,4,5}}
-				 */
-				indmunu=mu1+nu1;
-				if (mu1==2 || nu1==2) indmunu++;
-				ind2=ind2m+indmunu;
-				ind3=3*ind2;
-				ind4=6*ind2;
-				// computing several quantities with table integrals
-				t3q=DotProd(qvec,tab3+ind1);
-				t4q=DotProd(qvec,tab4+ind3);
-				t5tr=TrSym(tab5+ind2m);
-				t6tr=TrSym(tab6+ind4);
-				if (inter_avg) {
-					// <a[mu]*a[nu]>=1/3*delta[mu,nu]
-					t5aa=ONE_THIRD*t5tr;
-					t6aa=ONE_THIRD*t6tr;
-				}
-				else {
-					t3a=DotProd(av,tab3+ind1);
-					t4a=DotProd(av,tab4+ind3);
-					t5aa=QuadForm(tab5+ind2m,av);
-					t6aa=QuadForm(tab6+ind4,av);
-				}
-				//====== computing Gc0 =====
-				/* br = delta[mu,nu]*(-I7-I9/2-kr*(i+kr)/24+2*t3q+t5tr)
-				 *    - (-3I8[mu,nu]-3I10[mu,nu]/2-qmunu*kr*(i+kr)/24+2*t4q+t6tr)
-				 */
-				br[RE]=sig*(3*(tab10[ind2]/2+tab8[ind2])-2*t4q-t6tr)+temp*qmunu[comp]*kr;
-				br[IM]=3*temp*qmunu[comp];
-				if (dmunu[comp]) {
-					br[RE]+=2*t3q+t5tr-temp*kr-tab9[ind0]/2-tab7[ind0];
-					br[IM]-=temp;
-				}
-				// br*=kd^2
-				cMultReal(kd2,br,br);
-				// br+=I1*delta[mu,nu]*(-1+ikr+kr^2)-sig*I2[mu,nu]*(-3+3ikr+kr^2)
-				br[RE]+=sig*tab2[ind2]*(3-kr2);
-				br[IM]-=sig*tab2[ind2]*3*kr;
-				if (dmunu[comp]) {
-					br[RE]+=tab1[ind0]*(kr2-1);
-					br[IM]+=tab1[ind0]*kr;
-				}
-				// Gc0=expval*br
-				cMult(expval,br,result[comp]);
-				//==== computing Gc1 ======
-				if (!inter_avg) {
-					// br=(kd*kr/24)*(qa*(delta[mu,nu]*(-2+ikr)-qmunu*(-6+ikr))-qamunu)
-					br[RE]=6*qmunu[comp];
-					br[IM]=-kr*qmunu[comp];
-					if (dmunu[comp]) {
-						br[RE]-=2;
-						br[IM]+=kr;
-					}
-					cMultReal(qa,br,br);
-					br[RE]-=qamunu[comp];
-					cMultReal(2*temp*kd,br,br);
-					//  br1=(d/r)*(delta[mu,nu]*t3h*(-1+ikr)-sig*t4h*(-3+3ikr))
-					br1[RE]=3*sig*t4a;
-					br1[IM]=-kr*br1[RE];
-					if (dmunu[comp]) {
-						br1[RE]-=t3a;
-						br1[IM]+=t3a*kr;
-					}
-					cMultReal(1/rn,br1,br1);
-					// Gc1=expval*i*m*kd*(br1+br)
-					cAdd(br,br1,Gc1);
-					cMultSelf(Gc1,m);
-					cMultReal(kd,Gc1,Gc1);
-					cMultSelf(Gc1,expval);
-					cMult_i(Gc1);
-				}
-				//==== computing Gc2 ======
-				// br=delta[mu,nu]*t5aa-3*sig*t6aa-(kr/12)*(delta[mu,nu]*(i+kr)-qmunu*(3i+kr))
-				br[RE]=-kr*qmunu[comp];
-				br[IM]=-3*qmunu[comp];
-				if (dmunu[comp]) {
-					br[RE]+=kr;
-					br[IM]+=1;
-				}
-				cMultReal(-2*temp,br,br);
-				br[RE]-=3*sig*t6aa;
-				if (dmunu[comp]) br[RE]+=t5aa;
-				// Gc2=expval*(kd^2/2)*m^2*br
-				cMult(m2,br,Gc2);
-				cMultReal(kd2/2,Gc2,Gc2);
-				cMultSelf(Gc2,expval);
-				// result = Gc0 + [ Gc1 ] + Gc2
-				if (!inter_avg) cAdd(Gc2,Gc1,Gc2);
-				cAdd(Gc2,result[comp],result[comp]);
-			}
-		}
-		else {
-			//====== Gfar (and part of Gmedian) =======
-			// temp=kd^2/24
-			temp=kd2/24;
-			// br=1-(1+m^2)*kd^2/24
-			br[RE]=1-(1+m2[RE])*temp;
-			br[IM]=-m2[IM]*temp;
-			// Gf0 + Gf2 = Gp*br
-			for (comp=0;comp<NDCOMP;comp++) cMultSelf(result[comp],br);
-			//==== compute and add Gf1 ===
-			if (!inter_avg) for (comp=0;comp<NDCOMP;comp++) {
-				/* br = {delta[mu,nu]*(3-3ikr-2kr^2+ikr^3)-qmunu*(15-15ikr-6kr^2+ikr^3)}*qa
-				 *    + qamunu*(3-3ikr-kr^2)
-				 */
-				br[RE]=(6*kr2-15)*qmunu[comp];
-				br[IM]=(15*kr-kr3)*qmunu[comp];
-				if(dmunu[comp]) {
-					br[RE]+=3-2*kr2;
-					br[IM]+=kr3-3*kr;
-				}
-				cMultReal(qa,br,br);
-				br[RE]+=(3-kr2)*qamunu[comp];
-				br[IM]-=3*kr*qamunu[comp];
-				// Gf1=expval*i*m*temp*br
-				cMult(m,br,Gf1);
-				cMultReal(temp*2/kr,Gf1,Gf1);
-				cMultSelf(Gf1,expval);
-				cMult_i(Gf1);
-				// result = Gf
-				cAdd(Gf1,result[comp],result[comp]);
-			}
-			if (kr < G_BOUND_MEDIAN) {
-				//===== G median ========
-				vSquare(qvec,q2);
-				q4=DotProd(q2,q2);
-				invrn2=invrn*invrn;
-				invrn3=invrn2*invrn;
-				invrn4=invrn2*invrn2;
-				for (mu=0,comp=0;mu<3;mu++) for (nu=mu;nu<3;nu++,comp++) {
-					// Gm0=expval*br*temp; temp is set anew
-					temp=qmunu[comp]*(33*q4-7-12*(q2[mu]+q2[nu]));
-					if (mu == nu) temp+=(1-3*q4+4*q2[mu]);
-					temp*=7*invrn4/64;
-					br[RE]=-1;
-					br[IM]=kr;
-					cMultReal(temp,br,Gm0);
-					cMultSelf(Gm0,expval);
-					if (!inter_avg) {
-						// Gm1=expval*i*m*temp; temp is set anew
-						vMult(qvec,prop,qavec);
-						temp = 3*qa*(dmunu[comp]-7*qmunu[comp]) + 6*dmunu[comp]*qvec[mu]*prop[mu]
-							 - 7*(dmunu[comp]-9*qmunu[comp])*DotProd(qavec,q2)
-							 + 3*(prop[mu]*qvec[nu]*(1-7*q2[mu])+prop[nu]*qvec[mu]*(1-7*q2[nu]));
-						temp*=kd*invrn3/48;
-						cMultReal(temp,m,Gm1);
-						cMult_i(Gm1);
-						cMultSelf(Gm1,expval);
-						// add Gm1 to Gm0
-						cAdd(Gm0,Gm1,Gm0);
-					}
-					// result = Gf + Gm0 + [ Gm1 ]
-					cAdd(Gm0,result[comp],result[comp]);
-				}
-			}
-		}
-	}
-	PRINT_GVAL;
-}
-#undef PRINT_GVAL
-
-//============================================================
-
 void InitDmatrix(void)
 /* Initializes the matrix D. D[i][j][k]=A[i1-i2][j1-j2][k1-k2]. Actually D=-FFT(G)/Ngrid.
  * Then -G.x=invFFT(D*FFT(x)) for practical implementation of FFT such that invFFT(FFT(x))=Ngrid*x.
@@ -1478,7 +928,7 @@ void InitDmatrix(void)
 	 * indexing) component-wise (in cycle over NDCOMP)
 	 */
 
-	/* fill Dmatrix with 0, this if to fill the possible gap between e.g. boxY and gridY/2;
+	/* fill Dmatrix with 0, this if to fill the possible gap between e.g. boxY and gridY/2; (and for R=0)
 	 * probably faster than using a lot of conditionals
 	 */
 	for (ind=0;ind<Dsize;ind++) Dmatrix[ind][RE]=Dmatrix[ind][IM]=0;
@@ -1489,7 +939,11 @@ void InitDmatrix(void)
 		else kcor=k;
 		for (j=jstart;j<boxY;j++) for (i=1-boxX;i<boxX;i++) {
 			index=NDCOMP*IndexD2matrix(i,j,k,nnn);
-			CalcInterTerm(i,j,kcor,Dmatrix+index);
+			/* The test for zero distance is somewhat non-optimal. However, other alternatives are not perfect either:
+			 * 1) complicate the loops to remove the zero element in the beginning (move tests to the upper level)
+			 * 2) call the function with zero - it will produce NaN. Then set this element to zero after the loop.
+			 */
+			if (i!=0 || j!=0 || kcor!=0) (*CalcInterTerm)(i,j,kcor,Dmatrix+index);
 		}
 	} // end of i,j,k loop
 	if (IFROOT) printf("Fourier transform of Dmatrix");
diff --git a/src/fft.h b/src/fft.h
index 9ce43825..3cd9804e 100644
--- a/src/fft.h
+++ b/src/fft.h
@@ -1,6 +1,6 @@
 /* File: fft.h
  * $Date::                            $
- * Descr: definitions of FFT parameters and routines
+ * Descr: definitions of FFT parameters and routines; void in sparse mode
  *
  * Copyright (C) 2006,2008,2010-2013 ADDA contributors
  * This file is part of ADDA.
@@ -16,11 +16,11 @@
  * You should have received a copy of the GNU General Public License along with ADDA. If not, see
  * <http://www.gnu.org/licenses/>.
  */
+#ifndef SPARSE
+
 #ifndef __fft_h
 #define __fft_h
 
-#ifndef ADDA_SPARSE //not needed in sparse mode
-
 #ifndef FFT_TEMPERTON
 #	define FFTW3 // FFTW3 is default
 #endif
@@ -45,6 +45,6 @@ void Free_FFT_Dmat(void);
 int fftFit(int size, int _div);
 void CheckNprocs(void);
 
-#endif // ADDA_SPARSE
-
 #endif // __fft_h
+
+#endif // !SPARSE
diff --git a/src/interaction.c b/src/interaction.c
index 4a8fd15a..bce978b2 100644
--- a/src/interaction.c
+++ b/src/interaction.c
@@ -1,227 +1,193 @@
 /* FILE : interaction.c
  * Descr: the functions used to calculate the interaction term
  *
- * Copyright (C) 2006-2012 ADDA contributors
+ * Copyright (C) 2011-2013 ADDA contributors
  * This file is part of ADDA.
  *
- * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU
- * General Public License as published by the Free Software Foundation, either version 3 of the
- * License, or (at your option) any later version.
+ * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
  *
- * ADDA is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
- * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
- * Public License for more details.
+ * ADDA is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License along with ADDA. If not, see
  * <http://www.gnu.org/licenses/>.
  */
-
+#include "const.h" // keep this first
+#include "interaction.h" // corresponding headers
+// project headers
 #include "cmplx.h"
-#include "const.h"
-#include "interaction.h"
 #include "io.h"
-#include "types.h"
+#include "memory.h"
 #include "vars.h"
-#include "param.h"
 
-// defined and initialized in calculator.c
-extern const double * restrict tab1,* restrict tab2,* restrict tab3,* restrict tab4,* restrict tab5,
-	* restrict tab6,* restrict tab7,* restrict tab8,* restrict tab9,* restrict tab10;
-extern const int * restrict * restrict tab_index;
+// SEMI-GLOBAL VARIABLES
+
+// defined and initialized in make_particle.c
 extern double gridspace;
-extern double igt_lim, igt_eps;
 
-#ifndef NO_FORTRAN
-// fort/propaesplibreintadda.f
-void propaespacelibreintadda_(const double *Rij,const double *ka,const double *arretecube,
-	const double *relreq, double *result);
-#endif
+// defined and initialized in param.c
+extern double igt_lim,igt_eps;
+
+// LOCAL VARIABLES
 
-//This function pointer...
-void (*CalcInterTerm)(const int i,const int j,const int k,doublecomplex * restrict result);
-//...will point to one of the functions below
-void CalcInterTerm_opt(const int i,const int j,const int k,doublecomplex * restrict result);
-void CalcInterTerm_complete(const int i,const int j,const int k,doublecomplex * restrict result);
+// tables of integrals
+static double * restrict tab1,* restrict tab2,* restrict tab3,* restrict tab4,* restrict tab5,* restrict tab6,
+	* restrict tab7,* restrict tab8,* restrict tab9,* restrict tab10;
+/* it is preferable to declare the following as "* restrict * restrict", but it is hard to make it
+* generally compatible with Free_iMatrix function syntax.
+*/
+static int ** restrict tab_index; // matrix for indexing of table arrays
 
 #ifdef USE_SSE3
-__m128d c1, c2, c3, zo, inv_2pi, p360, prad_to_deg;
-__m128d exptbl[361];
+static __m128d c1, c2, c3, zo, inv_2pi, p360, prad_to_deg;
+static __m128d exptbl[361];
 #endif //USE_SSE3
 
-#define M_PI 3.14159265358979323846
+// EXTERNAL FUNCTIONS
 
-void InitInteraction(void) 
-/*
-	Initialize the interaction calculations
-*/
-{
-	if (IntRelation==G_POINT_DIP) {
-		CalcInterTerm = CalcInterTerm_opt;
-	} else {
-		CalcInterTerm = CalcInterTerm_complete;
-	}
-
-	#ifdef USE_SSE3
-	c1 = _mm_set_pd(1.34959795251974073996e-11,3.92582397764340914444e-14);
-	c2 = _mm_set_pd(-8.86096155697856783296e-7,-3.86632385155548605680e-9);
-	c3 = _mm_set_pd(1.74532925199432957214e-2,1.52308709893354299569e-4);
-	zo = _mm_set_pd(0.0,1.0);
-	inv_2pi = _mm_set_sd(1.0/(2*M_PI));
-	p360 = _mm_set_sd(360.0);
-	prad_to_deg = _mm_set_sd(180.0/M_PI);
-	
-	for (unsigned int i=0; i<=360; i++) {
-		double x = M_PI/180.0*(double)i; 
-		exptbl[i] = _mm_set_pd(sin(x),cos(x));
-	}
-	#endif
-}
+#ifndef NO_FORTRAN
+// fort/propaesplibreintadda.f
+void propaespacelibreintadda_(const double *Rij,const double *ka,const double *arretecube,const double *relreq,
+	double *result);
+#endif
+// sinint.c
+void cisi(double x,double *ci,double *si);
 
+// this is used for debugging, should be empty define, when not required
+#define PRINT_GVAL /*printf("%d,%d,%d: %g%+gi, %g%+gi, %g%+gi,\n%g%+gi, %g%+gi, %g%+gi\n",\
+	i,j,k,result[0][RE],result[0][IM],result[1][RE],result[1][IM],result[2][RE],result[2][IM],\
+	result[3][RE],result[3][IM],result[4][RE],result[4][IM],result[5][RE],result[5][IM]);*/
 
 #ifdef USE_SSE3
 
-static inline __m128d accImExp(double x, double c)
-/*
-   Accelerated sin-cos (or imaginary exp) routine for use in the calculation of the 
-   interaction tensor. Returns c*exp(ix) in the resultant vector. The code is adapted
-   from the CEPHES library. The idea is that we have precalculated exp(iy) for some
-   discrete values of y. Then we take the y that is nearest to our x and write
-   exp(ix)=exp(iy+(ix-iy))=exp(iy)exp(i(x-y)). We take exp(y) from the table and 
-   exp(i(x-y)) from the Taylor series. This converges very fast since |x-y| is small.
-*/
+//=====================================================================================================================
+
+static inline __m128d accImExp(double x,double c)
+/* Accelerated sin-cos (or imaginary exp) routine for use in the calculation of the interaction tensor. Returns
+ * c*exp(ix) in the resultant vector. The code is adapted from the CEPHES library. The idea is that we have
+ * precalculated exp(iy) for some discrete values of y. Then we take the y that is nearest to our x and write
+ * exp(ix)=exp(iy+(ix-iy))=exp(iy)exp(i(x-y)). We take exp(y) from the table and exp(i(x-y)) from the Taylor series.
+ * This converges very fast since |x-y| is small.
+ */
 {
 	__m128d px = _mm_set_sd(x);
 	__m128d ipx = _mm_mul_pd(px,inv_2pi);
-	int ix = _mm_cvttsd_si32(ipx); //truncation so the function only works for 0 <= x < 2*pi*2^-31
+	int ix = _mm_cvttsd_si32(ipx); // truncation so the function only works for 0 <= x < 2*pi*2^-31
 	ipx = _mm_cvtsi32_sd(ipx,ix);
 	ipx = _mm_mul_pd(p360,ipx);
 	px = _mm_mul_pd(prad_to_deg,px);
-	px = _mm_sub_pd(px,ipx); //this is x (deg) mod 360
-	
-	ix = _mm_cvtsd_si32(px); //the closest integer (rounded, not truncated)
-	__m128d scx = exptbl[ix]; //the tabulated value
-	
+	px = _mm_sub_pd(px,ipx); // this is x (deg) mod 360
+
+	ix = _mm_cvtsd_si32(px); // the closest integer (rounded, not truncated)
+	__m128d scx = exptbl[ix]; // the tabulated value
+
 	ipx = _mm_cvtsi32_sd(ipx,ix);
-	__m128d pz = _mm_sub_pd(ipx,px); //the residual -z=(ix-x)	
+	__m128d pz = _mm_sub_pd(ipx,px); // the residual -z=(ix-x)
 	__m128d py = _mm_mul_pd(pz,pz);	
-	__m128d yy = _mm_shuffle_pd(py,py,0); //now (y,y)		
+	__m128d yy = _mm_shuffle_pd(py,py,0); // now (y,y)
 	__m128d zy = _mm_shuffle_pd(yy,pz,1);	
-	
-	__m128d scz = _mm_mul_pd(c1,yy);	//taylor series approximation
+
+	__m128d scz = _mm_mul_pd(c1,yy);	// Taylor series approximation
 	scz = _mm_add_pd(c2,scz);
 	scz = _mm_mul_pd(yy,scz);
 	scz = _mm_add_pd(c3,scz);
 	scz = _mm_mul_pd(zy,scz);
 	scz = _mm_sub_pd(zo,scz);
-	scx = cmul(scz,scx); //multiply lookup and approximation
-  	return _mm_mul_pd(_mm_set1_pd(c),scx); 
+	scx = cmul(scz,scx); // multiply lookup and approximation
+	return _mm_mul_pd(_mm_set1_pd(c),scx);
 }
 
+//=====================================================================================================================
+
 void CalcInterTerm_opt(const int i,const int j,const int k,doublecomplex * restrict result)
-/* calculates interaction term between two dipoles; given integer distance vector {i,j,k}
- * (in units of d). All six components of the symmetric matrix are computed at once.
- * The elements in result are: [G11, G12, G13, G22, G23, G33]
+/* calculates interaction term between two dipoles; given integer distance vector {i,j,k} (in units of d). All six
+ * components of the symmetric matrix are computed at once. The elements in result are: [G11, G12, G13, G22, G23, G33]
+ * Only 'poi' interaction is implemented in this optimized routine
  */
 {	
-
-// this is used for debugging, should be empty define, when not required
-#define PRINT_GVAL /*printf("%d,%d,%d: %g%+gi, %g%+gi, %g%+gi,\n%g%+gi, %g%+gi, %g%+gi\n",\
-	i,j,k,result[0][RE],result[0][IM],result[1][RE],result[1][IM],result[2][RE],result[2][IM],\
-	result[3][RE],result[3][IM],result[4][RE],result[4][IM],result[5][RE],result[5][IM]);*/
-	
 	double qx = i;
 	double qy = j;
 	double qz = k;
-	
+
 	const double rn = sqrt(qx*qx + qy*qy + qz*qz);
-		
 	const double rr=rn*gridspace;
 	const double invr3=1.0/(rr*rr*rr);
 	const double kr=WaveNum*rr;	
 	const __m128d sc = accImExp(kr,invr3);
-	
 	const double invrn = 1.0/rn;
 	qx *= invrn; qy *= invrn; qz *= invrn;
 	const double qxx=qx*qx, qxy=qx*qy, qxz=qx*qz, qyy=qy*qy, qyz=qy*qz, qzz=qz*qz;
-	
 	const double kr2=kr*kr;
 	const double t1=(3-kr2), t2=-3*kr, t3=(kr2-1);
-	 
+
 	__m128d qff, im_re;
 #define INTERACT_MUL(I)\
-   im_re = cmul(sc,im_re);\
-   _mm_store_pd(result+I,im_re);\
-	
+	im_re = cmul(sc,im_re);\
+	_mm_store_pd(result[I],im_re);
+
 	const __m128d v1 = _mm_set_pd(kr,t3);
 	const __m128d v2 = _mm_set_pd(t2,t1);
-	
+
 	qff = _mm_set1_pd(qxx);
 	im_re = _mm_add_pd(v1,_mm_mul_pd(v2,qff));
 	INTERACT_MUL(0)
-	
+
 	qff = _mm_set1_pd(qxy);
 	im_re = _mm_mul_pd(v2,qff);
 	INTERACT_MUL(1)
-	
+
 	qff = _mm_set1_pd(qxz);
 	im_re = _mm_mul_pd(v2,qff);
 	INTERACT_MUL(2)
-	
+
 	qff = _mm_set1_pd(qyy);
 	im_re = _mm_add_pd(v1,_mm_mul_pd(v2,qff));
 	INTERACT_MUL(3)
-	
+
 	qff = _mm_set1_pd(qyz);
 	im_re = _mm_mul_pd(v2,qff);
 	INTERACT_MUL(4)
-	
+
 	qff = _mm_set1_pd(qzz);
 	im_re = _mm_add_pd(v1,_mm_mul_pd(v2,qff));
 	INTERACT_MUL(5)
-
 #undef INTERACT_MUL
-	
+
 	PRINT_GVAL;
 }
 
 #else //not using SSE3
 
+//=====================================================================================================================
+
 void CalcInterTerm_opt(const int i,const int j,const int k,doublecomplex * restrict result)
-/* calculates interaction term between two dipoles; given integer distance vector {i,j,k}
- * (in units of d). All six components of the symmetric matrix are computed at once.
- * The elements in result are: [G11, G12, G13, G22, G23, G33]
+/* calculates interaction term between two dipoles; given integer distance vector {i,j,k} (in units of d). All six
+ * components of the symmetric matrix are computed at once. The elements in result are: [G11, G12, G13, G22, G23, G33]
  */
 {	
 	double re,im;
-
-// this is used for debugging, should be empty define, when not required
-#define PRINT_GVAL /*printf("%d,%d,%d: %g%+gi, %g%+gi, %g%+gi,\n%g%+gi, %g%+gi, %g%+gi\n",\
-	i,j,k,result[0][RE],result[0][IM],result[1][RE],result[1][IM],result[2][RE],result[2][IM],\
-	result[3][RE],result[3][IM],result[4][RE],result[4][IM],result[5][RE],result[5][IM]);*/
-	
 	double qx = i;
 	double qy = j;
 	double qz = k;
-	
+
 	const double rn = sqrt(qx*qx + qy*qy + qz*qz);
 	const double invrn = 1.0/rn;
 	qx *= invrn; qy *= invrn; qz *= invrn;
 	const double qxx=qx*qx, qxy=qx*qy, qxz=qx*qz, qyy = qy*qy, qyz=qy*qz, qzz=qz*qz;	
-	
 	const double rr=rn*gridspace;
 	const double invr3=1.0/(rr*rr*rr);
 	const double kr=WaveNum*rr;
 	const double kr2=kr*kr;
-	
 	const double cov=cos(kr)*invr3;
 	const double siv=sin(kr)*invr3;
-	
+
 	const double t1=(3-kr2), t2=-3*kr, t3=(kr2-1);
-	
+
 #define INTERACT_MUL(I)\
-   result[I][RE] = re*cov - im*siv;\
+	result[I][RE] = re*cov - im*siv;\
 	result[I][IM] = re*siv + im*cov;
-	
+
 	re = t1*qxx + t3;
 	im = kr + t2*qxx;
 	INTERACT_MUL(0)	
@@ -240,18 +206,15 @@ void CalcInterTerm_opt(const int i,const int j,const int k,doublecomplex * restr
 	re = t1*qzz + t3;
 	im = kr + t2*qzz;
 	INTERACT_MUL(5)
-	
+
 #undef INTERACT_MUL	
-	
+
 	PRINT_GVAL;
 }
 
 #endif //USE_SSE3
 
-// sinint.c
-void cisi(double x,double *ci,double *si);
-
-//============================================================
+//=====================================================================================================================
 
 INLINE bool TestTableSize(const double rn)
 // tests if rn fits into the table; if not, returns false and produces info message
@@ -269,12 +232,11 @@ INLINE bool TestTableSize(const double rn)
 	else return true;
 }
 
-//============================================================
+//=====================================================================================================================
 
 void CalcInterTerm_complete(const int i,const int j,const int k,doublecomplex * restrict result)
-/* calculates interaction term between two dipoles; given integer distance vector {i,j,k}
- * (in units of d). All six components of the symmetric matrix are computed at once.
- * The elements in result are: [G11, G12, G13, G22, G23, G33]
+/* calculates interaction term between two dipoles; given integer distance vector {i,j,k} (in units of d). All six
+ * components of the symmetric matrix are computed at once. The elements in result are: [G11, G12, G13, G22, G23, G33]
  */
 {
 	static double rr,qvec[3],q2[3],invr3,qavec[3],av[3];
@@ -288,16 +250,6 @@ void CalcInterTerm_complete(const int i,const int j,const int k,doublecomplex *
 	static double t3q,t3a,t4q,t4a,t5tr,t5aa,t6tr,t6aa;
 	static const bool inter_avg=true; // temporary fixed option for SO formulation
 
-// this is used for debugging, should be empty define, when not required
-#define PRINT_GVAL /*printf("%d,%d,%d: %g%+gi, %g%+gi, %g%+gi,\n%g%+gi, %g%+gi, %g%+gi\n",\
-	i,j,k,result[0][RE],result[0][IM],result[1][RE],result[1][IM],result[2][RE],result[2][IM],\
-	result[3][RE],result[3][IM],result[4][RE],result[4][IM],result[5][RE],result[5][IM]);*/ 
-
-	// self interaction; self term is computed in different subroutine
-	if (!(i || j || k)) for (comp=0;comp<NDCOMP;comp++) {
-		result[comp][RE]=result[comp][IM]=0;
-		return;
-	}
 	//====== calculate some basic constants ======
 	qvec[0]=i; // qvec is normalized below (after IGT)
 	qvec[1]=j;
@@ -341,16 +293,14 @@ void CalcInterTerm_complete(const int i,const int j,const int k,doublecomplex *
 		cMult(br,expval,result[comp]);
 	}
 	//====== FCD (static and full) ========
-	/* speed of FCD can be improved by using faster version of sici routine, using predefined
-	 * tables, etc (e.g. as is done in GSL library). But currently extra time for this computation
-	 * is already smaller than one main iteration.
+	/* speed of FCD can be improved by using faster version of sici routine, using predefined tables, etc (e.g. as is
+	 * done in GSL library). But currently extra time for this computation is already smaller than one main iteration.
 	 */
 	if (IntRelation==G_FCD_ST) {
-		/* FCD is based on Gay-Balmaz P., Martin O.J.F. "A library for computing the filtered and
-		 * non-filtered 3D Green's tensor associated with infinite homogeneous space and surfaces",
-		 * Comp. Phys. Comm. 144:111-120 (2002), and Piller N.B. "Increasing the performance of the
-		 * coupled-dipole approximation: A spectral approach", IEEE Trans.Ant.Propag. 46(8):
-		 * 1126-1137. Here it differs by a factor of 4*pi*k^2.
+		/* FCD is based on Gay-Balmaz P., Martin O.J.F. "A library for computing the filtered and non-filtered 3D
+		 * Green's tensor associated with infinite homogeneous space and surfaces", Comp. Phys. Comm. 144:111-120
+		 * (2002), and Piller N.B. "Increasing the performance of the coupled-dipole approximation: A spectral
+		 * approach", IEEE Trans.Ant.Propag. 46(8): 1126-1137. Here it differs by a factor of 4*pi*k^2.
 		 */
 		// result = Gp*[3*Si(k_F*r)+k_F*r*cos(k_F*r)-4*sin(k_F*r)]*2/(3*pi)
 		cisi(kfr,&ci,&si);
@@ -378,8 +328,8 @@ void CalcInterTerm_complete(const int i,const int j,const int k,doublecomplex *
 	}
 	//======= second order corrections ========
 	else if (IntRelation==G_IGT_SO) {
-		/* There is still some space for speed optimization here (e.g. move mu,nu-independent
-		 * operations out of the cycles over components).
+		/* There is still some space for speed optimization here (e.g. move mu,nu-independent operations out of the
+		 * cycles over components).
 		 */
 		kd2=kd*kd;
 		if (kr*rn < G_BOUND_CLOSE && TestTableSize(rn)) {
@@ -449,15 +399,14 @@ void CalcInterTerm_complete(const int i,const int j,const int k,doublecomplex *
 			// cycle over tensor components
 			for (mu=0,comp=0;mu<3;mu++) for (nu=mu;nu<3;nu++,comp++) {
 				sig=sigV[mu]*sigV[nu]; // sign of some terms below
-				/* indexes for tables of different dimensions based on transformed indices mu and nu
-				 * '...munu' variables are invariant to permutations because both constituent
-				 * vectors and indices are permutated. So this variables can be used, as precomputed
-				 * above.
+				/* indexes for tables of different dimensions based on transformed indices mu and nu '...munu' variables
+				 * are invariant to permutations because both constituent vectors and indices are permutated. So this
+				 * variables can be used, as precomputed above.
 				 */
 				mu1=invord[mu];
 				nu1=invord[nu];
-				/* indmunu is a number of component[mu,nu] in a symmetric matrix, but counted
-				 * differently than comp. This is {{0,1,3},{1,2,4},{3,4,5}}
+				/* indmunu is a number of component[mu,nu] in a symmetric matrix, but counted differently than comp.
+				 * This is {{0,1,3},{1,2,4},{3,4,5}}
 				 */
 				indmunu=mu1+nu1;
 				if (mu1==2 || nu1==2) indmunu++;
@@ -519,9 +468,9 @@ void CalcInterTerm_complete(const int i,const int j,const int k,doublecomplex *
 		}
 	}
 	else if (IntRelation==G_SO) {
-		/* There is still some space for speed optimization here (e.g. move mu,nu-independent
-		 * operations out of the cycles over components). But now extra time is equivalent to 2-3
-		 * main iterations. So first priority is to make something useful out of SO.
+		/* There is still some space for speed optimization here (e.g. move mu,nu-independent operations out of the
+		 * cycles over components). But now extra time is equivalent to 2-3 main iterations. So first priority is to
+		 * make something useful out of SO.
 		 */
 		// next line should never happen
 		if (anisotropy) LogError(ONE_POS,"Incompatibility error in CalcInterTerm");
@@ -609,15 +558,14 @@ void CalcInterTerm_complete(const int i,const int j,const int k,doublecomplex *
 			ind2m=6*ind0;			// cycle over tensor components
 			for (mu=0,comp=0;mu<3;mu++) for (nu=mu;nu<3;nu++,comp++) {
 				sig=sigV[mu]*sigV[nu]; // sign of some terms below
-				/* indexes for tables of different dimensions based on transformed indices mu and nu
-				 * '...munu' variables are invariant to permutations because both constituent
-				 * vectors and indices are permutated. So this variables can be used, as precomputed
-				 * above.
+				/* indexes for tables of different dimensions based on transformed indices mu and nu '...munu' variables
+				 * are invariant to permutations because both constituent vectors and indices are permutated. So this
+				 * variables can be used, as precomputed above.
 				 */
 				mu1=invord[mu];
 				nu1=invord[nu];
-				/* indmunu is a number of component[mu,nu] in a symmetric matrix, but counted
-				 * differently than comp. This is {{0,1,3},{1,2,4},{3,4,5}}
+				/* indmunu is a number of component[mu,nu] in a symmetric matrix, but counted differently than comp.
+				 * This is {{0,1,3},{1,2,4},{3,4,5}}
 				 */
 				indmunu=mu1+nu1;
 				if (mu1==2 || nu1==2) indmunu++;
@@ -719,9 +667,7 @@ void CalcInterTerm_complete(const int i,const int j,const int k,doublecomplex *
 			for (comp=0;comp<NDCOMP;comp++) cMultSelf(result[comp],br);
 			//==== compute and add Gf1 ===
 			if (!inter_avg) for (comp=0;comp<NDCOMP;comp++) {
-				/* br = {delta[mu,nu]*(3-3ikr-2kr^2+ikr^3)-qmunu*(15-15ikr-6kr^2+ikr^3)}*qa
-				 *    + qamunu*(3-3ikr-kr^2)
-				 */
+				// br = {delta[mu,nu]*(3-3ikr-2kr^2+ikr^3)-qmunu*(15-15ikr-6kr^2+ikr^3)}*qa + qamunu*(3-3ikr-kr^2)
 				br[RE]=(6*kr2-15)*qmunu[comp];
 				br[IM]=(15*kr-kr3)*qmunu[comp];
 				if(dmunu[comp]) {
@@ -759,8 +705,8 @@ void CalcInterTerm_complete(const int i,const int j,const int k,doublecomplex *
 						// Gm1=expval*i*m*temp; temp is set anew
 						vMult(qvec,prop,qavec);
 						temp = 3*qa*(dmunu[comp]-7*qmunu[comp]) + 6*dmunu[comp]*qvec[mu]*prop[mu]
-							 - 7*(dmunu[comp]-9*qmunu[comp])*DotProd(qavec,q2)
-							 + 3*(prop[mu]*qvec[nu]*(1-7*q2[mu])+prop[nu]*qvec[mu]*(1-7*q2[nu]));
+						     - 7*(dmunu[comp]-9*qmunu[comp])*DotProd(qavec,q2)
+						     + 3*(prop[mu]*qvec[nu]*(1-7*q2[mu])+prop[nu]*qvec[mu]*(1-7*q2[nu]));
 						temp*=kd*invrn3/48;
 						cMultReal(temp,m,Gm1);
 						cMult_i(Gm1);
@@ -777,4 +723,122 @@ void CalcInterTerm_complete(const int i,const int j,const int k,doublecomplex *
 	PRINT_GVAL;
 }
 
-#undef PRINT_GVAL
+//=====================================================================================================================
+
+static double * ATT_MALLOC ReadTableFile(const char * restrict sh_fname,const int size_multiplier)
+{
+	FILE * restrict ftab;
+	double * restrict tab_n;
+	int size;
+	char fname[MAX_FNAME];
+	int i;
+
+	size=TAB_SIZE*size_multiplier;
+	memory+=size*sizeof(double);
+	if (!prognosis) {
+		// allocate memory for tab_n
+		MALLOC_VECTOR(tab_n,double,size,ALL);
+		// open file
+		SnprintfErr(ALL_POS,fname,MAX_FNAME,TAB_PATH"%s",sh_fname);
+		ftab=FOpenErr(fname,"r",ALL_POS);
+		// scan file
+		for (i=0; i<size; i++) if (fscanf(ftab,"%lf\t",&(tab_n[i]))!=1)
+			LogError(ALL_POS,"Scan error in file '%s'. Probably file is too small",fname);
+		if (!feof(ftab))
+			LogWarning(EC_WARN,ONE_POS,"File '%s' is longer than specified size (%d)",fname,size);
+		// close file
+		FCloseErr(ftab,fname,ALL_POS);
+		return tab_n;
+	}
+	else return NULL;
+}
+
+//=====================================================================================================================
+
+static void ReadTables(void)
+{
+	int i, j, ymax, Rm2, Rm2x;
+
+	TIME_TYPE tstart=GET_TIME();
+	tab1=ReadTableFile(TAB_FNAME(1),1);
+	tab2=ReadTableFile(TAB_FNAME(2),6);
+	tab3=ReadTableFile(TAB_FNAME(3),3);
+	tab4=ReadTableFile(TAB_FNAME(4),18);
+	tab5=ReadTableFile(TAB_FNAME(5),6);
+	tab6=ReadTableFile(TAB_FNAME(6),36);
+	tab7=ReadTableFile(TAB_FNAME(7),1);
+	tab8=ReadTableFile(TAB_FNAME(8),6);
+	tab9=ReadTableFile(TAB_FNAME(9),1);
+	tab10=ReadTableFile(TAB_FNAME(10),6);
+	Timing_FileIO += GET_TIME() - tstart;
+
+	if (!prognosis) {
+		// allocate memory for tab_index
+		MALLOC_IMATRIX(tab_index,1,TAB_RMAX,0,TAB_RMAX,ALL);
+		// fill tab_index
+		Rm2=TAB_RMAX*TAB_RMAX;
+		tab_index[1][0] = 0;
+		for (i=1; i<=TAB_RMAX; i++) {
+			Rm2x=Rm2-i*i;
+			ymax = MIN(i,(int)floor(sqrt(Rm2x)));
+			for (j=0; j<ymax; j++) {
+				tab_index[i][j+1] = tab_index[i][j] + MIN(j,(int)floor(sqrt(Rm2x-j*j)))+1;
+			}
+			if (i<TAB_RMAX) tab_index[i+1][0] = tab_index[i][ymax]
+			                                  + MIN(ymax,(int)floor(sqrt(Rm2x-ymax*ymax)))+1;
+		}
+	}
+	// PRINTZ("P[5,3]=%d (should be 41)\n",tab_index[5][3]);
+}
+
+//=====================================================================================================================
+
+static void FreeTables(void)
+{
+	Free_iMatrix(tab_index,1,TAB_RMAX,0);
+	Free_general(tab1);
+	Free_general(tab2);
+	Free_general(tab3);
+	Free_general(tab4);
+	Free_general(tab5);
+	Free_general(tab6);
+	Free_general(tab7);
+	Free_general(tab8);
+	Free_general(tab9);
+	Free_general(tab10);
+}
+
+//=====================================================================================================================
+
+void InitInteraction(void)
+// Initialize the interaction calculations
+{
+	if (IntRelation==G_POINT_DIP) {
+		CalcInterTerm = &CalcInterTerm_opt;
+#ifdef USE_SSE3
+		c1 = _mm_set_pd(1.34959795251974073996e-11,3.92582397764340914444e-14);
+		c2 = _mm_set_pd(-8.86096155697856783296e-7,-3.86632385155548605680e-9);
+		c3 = _mm_set_pd(1.74532925199432957214e-2,1.52308709893354299569e-4);
+		zo = _mm_set_pd(0.0,1.0);
+		inv_2pi = _mm_set_sd(1.0/(2*PI));
+		p360 = _mm_set_sd(360.0);
+		prad_to_deg = _mm_set_sd(180.0/PI);
+
+		for (unsigned int i=0; i<=360; i++) {
+			double x = (PI/180.0)*(double)i;
+			exptbl[i] = _mm_set_pd(sin(x),cos(x));
+		}
+#endif
+	}
+	else CalcInterTerm = &CalcInterTerm_complete;
+	// read tables if needed
+	if (IntRelation == G_SO || IntRelation == G_IGT_SO) ReadTables();
+}
+
+//=====================================================================================================================
+
+void FreeInteraction(void)
+// Free buffers used for interaction calculation
+{
+	if (IntRelation == G_SO || IntRelation == G_IGT_SO) FreeTables();
+}
diff --git a/src/interaction.h b/src/interaction.h
index b3a25bba..e4add71f 100644
--- a/src/interaction.h
+++ b/src/interaction.h
@@ -1,36 +1,25 @@
 /* FILE : interaction.h
  * Descr: the functions used to calculate the interaction term
  *
- * Copyright (C) 2006-2012 ADDA contributors
+ * Copyright (C) 2011-2013 ADDA contributors
  * This file is part of ADDA.
  *
- * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU
- * General Public License as published by the Free Software Foundation, either version 3 of the
- * License, or (at your option) any later version.
+ * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
  *
- * ADDA is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
- * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
- * Public License for more details.
+ * ADDA is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License along with ADDA. If not, see
  * <http://www.gnu.org/licenses/>.
  */
-
 #ifndef __interaction_h
 #define __interaction_h
 
 #include "cmplx.h"
 
-extern double gridspace, WaveNum;
-
-#ifdef USE_SSE3
-extern __m128d c1, c2, c3, zo, inv_2pi, p360, prad_to_deg;
-extern __m128d exptbl[361];
-#endif //USE_SSE3
-
-extern void (*CalcInterTerm)(const int i,const int j,const int k,doublecomplex * restrict result);
-extern void InitInteraction(void);
-
-//============================================================
+void (*CalcInterTerm)(const int i,const int j,const int k,doublecomplex * restrict result);
+void InitInteraction(void);
+void FreeInteraction(void);
 
 #endif //__interaction_h
diff --git a/src/iterative.c b/src/iterative.c
index b65a400c..2bf3cd67 100644
--- a/src/iterative.c
+++ b/src/iterative.c
@@ -10,7 +10,7 @@
  *        (e.g. -int so), however they do it much slowly than usually. It is recommended then to use
  *        BiCGStab.
  *
- * Copyright (C) 2006-2012 ADDA contributors
+ * Copyright (C) 2006-2013 ADDA contributors
  * This file is part of ADDA.
  *
  * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU
@@ -49,7 +49,7 @@ extern const TIME_TYPE tstart_CE;
 extern doublecomplex *rvec; // can't be declared restrict due to SwapPointers
 extern doublecomplex * restrict vec1,* restrict vec2,* restrict vec3,* restrict Avecbuffer;
 // defined and initialized in fft.c
-#ifndef OPENCL
+#if !defined(OPENCL) && !defined(SPARSE)
 extern doublecomplex * restrict Xmatrix; // used as storage for arrays in WKB init field
 #endif
 // defined and initialized in param.c
@@ -366,7 +366,7 @@ ITER_FUNC(BiCG_CS)
 		scalars[0].ptr=&ro_old;
 		scalars[0].size=sizeof(doublecomplex);
 	}
-	else if (ph==PHASE_INIT); // no specific initialization required
+	else if (ph==PHASE_INIT) {} // no specific initialization required
 	// main iteration cycle
 	else if (ph==PHASE_ITER) {
 		// ro_k-1=r_k-1(*).r_k-1; check for ro_k-1!=0
@@ -384,7 +384,7 @@ ITER_FUNC(BiCG_CS)
 			nIncrem10_cmplx(pvec,rvec,beta,NULL,NULL);
 		}
 		// q_k=Avecbuffer=A.p_k
-		if (niter==1 && matvec_ready); // do nothing, Avecbuffer is ready to use
+		if (niter==1 && matvec_ready) {} // do nothing, Avecbuffer is ready to use
 		else MatVec(pvec,Avecbuffer,NULL,false,&Timing_OneIterComm);
 		// mu_k=p_k.q_k; check for mu_k!=0
 		nDotProd_conj(pvec,Avecbuffer,mu,&Timing_OneIterComm);
@@ -514,7 +514,7 @@ ITER_FUNC(CGNR)
 		scalars[0].ptr=&ro_old;
 		scalars[0].size=sizeof(double);
 	}
-	else if (ph==PHASE_INIT); // no specific initialization required
+	else if (ph==PHASE_INIT) {} // no specific initialization required
 	else if (ph==PHASE_ITER) {
 		// p_1=Ah.r_0 and ro_new=ro_0=|Ah.r_0|^2
 		// since first product is with Ah , matvec_ready can't be employed
@@ -1061,7 +1061,7 @@ static const char *CalcInitField(double zero_resid)
 		nSubtr(rvec,pvec,Avecbuffer,&inprodR,&Timing_InitIterComm);
 		descr="x_0 = E_inc\n";
 	}
-#ifndef ADDA_SPARSE	//currently no support for WKB in sparse mode
+#ifndef SPARSE	//currently no support for WKB in sparse mode
 	else if (InitField==IF_WKB) {
 		if (prop[2]!=1) LogError(ONE_POS,"WKB initial field currently works only with default "
 			"incident direction of the incoming wave (along z-axis)");
@@ -1160,7 +1160,7 @@ static const char *CalcInitField(double zero_resid)
 		nSubtr(rvec,pvec,Avecbuffer,&inprodR,&Timing_InitIterComm);
 		descr="x_0 = result of WKB\n";
 	} // redundant test
-#endif //ADDA_SPARSE	
+#endif // !SPARSE
 	else LogError(ONE_POS,"Unknown method to calculate initial field (%d)",(int)InitField);
 	return descr;
 }
diff --git a/src/make_particle.c b/src/make_particle.c
index 252fbb3e..907a4480 100644
--- a/src/make_particle.c
+++ b/src/make_particle.c
@@ -3,7 +3,7 @@
  * Descr: this module initializes the dipole set, either using predefined shapes or reading from a
  *        file; includes granule generator
  *
- * Copyright (C) 2006-2012 ADDA contributors
+ * Copyright (C) 2006-2013 ADDA contributors
  * This file is part of ADDA.
  *
  * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU
@@ -43,28 +43,33 @@
 // SEMI-GLOBAL VARIABLES
 
 // defined and initialized in param.c
-extern const int sh_Npars;
 extern const enum sh shape;
-extern const double sh_pars[];
 extern const enum sym sym_type;
 extern const double lambda;
 extern double sizeX,dpl,a_eq;
 extern const int jagged;
 extern const char *shape_fname;
 extern const char *shapename;
-extern const char *save_geom_fname;
 extern const bool volcor,save_geom;
 extern opt_index opt_sh;
+#ifndef SPARSE
+extern const int sh_Npars;
+extern const double sh_pars[];
+extern const char *save_geom_fname;
 extern const double gr_vf;
 extern double gr_d;
 extern const int gr_mat;
 extern enum shform sg_format;
 extern bool store_grans;
+#endif
 
 // defined and initialized in timing.c
-extern TIME_TYPE Timing_Particle,Timing_Granul,Timing_GranulComm;
+extern TIME_TYPE Timing_Particle;
+#ifndef SPARSE
+extern TIME_TYPE Timing_Granul,Timing_GranulComm;
+#endif
 
-// used in fft.c
+// used in interaction.c
 double gridspace; // interdipole distance (dipole size)
 
 // used in param.c
@@ -83,7 +88,9 @@ static const char geom_format_ext[]="%d %d %d %d\n";       // extended format of
  */
 static const char ddscat_format_read1[]="%*s %d %d %d %d %d %d\n";
 static const char ddscat_format_read2[]="%*s %d %d %d %d";
+#ifndef SPARSE
 static const char ddscat_format_write[]="%zu %d %d %d %d %d %d\n";
+#endif
 // ratio of scatterer volume to enclosing cube; used for dpl correction and initialization by a_eq
 static double volume_ratio;
 static double Ndip;             // total number of dipoles (in a circumscribing cube)
@@ -93,7 +100,7 @@ static FILE * restrict dipfile; // handle of dipole file
 static enum shform read_format; // format of dipole file, which is read
 static double cX,cY,cZ;         // center for DipoleCoord, it is sometimes used in PlaceGranules
 
-#ifndef ADDA_SPARSE
+#ifndef SPARSE
 
 // shape parameters
 static double coat_x,coat_y,coat_z,coat_r2;
@@ -133,11 +140,11 @@ struct segment * restrict contSeg;
 
 // temporary arrays before their real counterparts are allocated
 static unsigned char * restrict material_tmp;
-static double * restrict DipoleCoord_tmp;
 static unsigned short * restrict position_tmp;
-#endif //ADDA_SPARSE
 
-#ifndef ADDA_SPARSE //much of the functionality here is disabled in sparse mode
+#endif // !SPARSE
+
+#ifndef SPARSE //much of the functionality here is disabled in sparse mode
 
 // EXTERNAL FUNCTIONS
 
@@ -231,8 +238,6 @@ static void SaveGeometry(void)
 	Timing_FileIO+=GET_TIME()-tstart;
 }
 
-//===========================================================
-
 //==========================================================
 #define ALLOCATE_SEGMENTS(N) (struct segment *)voidVector((N)*sizeof(struct segment),ALL_POS,\
 	"contour segment");
@@ -1037,7 +1042,7 @@ static size_t PlaceGranules(void)
 
 //===========================================================
 
-#endif //ADDA_SPARSE
+#endif // !SPARSE
 
 static void InitDipFile(const char * restrict fname,int *bX,int *bY,int *bZ,int *Nm,
 	const char **rft)
@@ -1227,17 +1232,21 @@ static void InitDipFile(const char * restrict fname,int *bX,int *bY,int *bZ,int
 //===========================================================
 
 static void ReadDipFile(const char * restrict fname)
-/* read dipole file; no consistency checks are made since they are made in InitDipFile.
- * the file is opened in InitDipFile; this function only closes the file.
+/* read dipole file; no consistency checks are made since they are made in InitDipFile. The file is opened in
+ * InitDipFile; this function only closes the file.
+ *
+ * The operation is quite different in FFT and sparse modes. In FFT mode only material is set here, while position is
+ * set in the main loop over shapes in MakeParticle(). By contrast, in sparse mode position is set here as well (since
+ * the loop in MakeParticle() is skipped altogether.
  */
 {
 	int x,y,z,x0,y0,z0,mat,scanned;
 	size_t index=0;
 	char linebuf[BUF_LINE];	
-#ifndef ADDA_SPARSE
+#ifndef SPARSE
 	// to remove possible overflows
 	size_t boxX_l=(size_t)boxX;
-#endif //ADDA_SPARSE
+#endif // !SPARSE
 
 	TIME_TYPE tstart=GET_TIME();
 	
@@ -1262,26 +1271,25 @@ static void ReadDipFile(const char * restrict fname)
 			y0-=minY;
 			z0-=minZ;
 			// initialize box jagged*jagged*jagged instead of one dipole
-#ifndef ADDA_SPARSE
+#ifndef SPARSE
 			for (z=jagged*z0;z<jagged*(z0+1);z++) if (z>=local_z0 && z<local_z1_coer)
 				for (x=jagged*x0;x<jagged*(x0+1);x++) for (y=jagged*y0;y<jagged*(y0+1);y++) {
 					index=(z-local_z0)*boxXY+y*boxX_l+x;
 					material_tmp[index]=(unsigned char)(mat-1);
 			}
 #else
-			
 			for (z=0;z<jagged;z++) 
 			for (y=0;y<jagged;y++) 
 			for (x=0;x<jagged;x++) {
 				if ((index >= local_nvoid_d0) && (index < local_nvoid_d1)) {
-					material_full[index]=(unsigned char)(mat-1);
+					material[index-local_nvoid_d0]=(unsigned char)(mat-1);
 					position_full[3*index]=x0*jagged+x;
 					position_full[3*index+1]=y0*jagged+y;
 					position_full[3*index+2]=z0*jagged+z;
 				}
 				index++;
 			}
-#endif //ADDA_SPARSE
+#endif // SPARSE
 		}
 	}
 	
@@ -1333,7 +1341,7 @@ void InitShape(void)
 	double n_sizeX; // new value for size
 	double yx_ratio,zx_ratio;
 	double tmp1,tmp2;
-#ifndef ADDA_SPARSE
+#ifndef SPARSE
 	double tmp3;
 #endif
 	TIME_TYPE tstart;
@@ -1371,7 +1379,7 @@ void InitShape(void)
 	// initialization of global option index for error messages
 	opt=opt_sh;
 	// shape initialization
-#ifndef ADDA_SPARSE //shapes other than "read" are disabled in sparse mode
+#ifndef SPARSE //shapes other than "read" are disabled in sparse mode
 	if (shape==SH_AXISYMMETRIC) {
 		/* Axisymmetric homogeneous shape, defined by its contour in ro-z plane of the cylindrical
 		 * coordinate system. Its symmetry axis coincides with the z-axis, and the contour is read
@@ -1782,7 +1790,7 @@ void InitShape(void)
 		Nmat_need=2;
 	}	
 	else 
-#endif //ADDA_SPARSE	
+#endif // !SPARSE
 	if (shape==SH_READ) {
 		symX=symY=symZ=symR=false; // input file is assumed fully asymmetric
 		const char *rf_text;
@@ -1826,11 +1834,11 @@ void InitShape(void)
 	 *   define your own (with more informative names) in the beginning of this function.
 	 */
 	else 
-#ifndef ADDA_SPARSE 
+#ifndef SPARSE 
 		LogError(ONE_POS,"Unknown shape"); // this is mainly to remove 'uninitialized' warnings
 #else
 		LogError(ONE_POS,"Only shapes read from a file are currently supported in sparse mode"); 
-#endif //ADDA_SPARSE
+#endif // SPARSE
 
 	// check for redundancy of input data
 	if (dpl!=UNDEF) {
@@ -1848,7 +1856,7 @@ void InitShape(void)
 				"while shape '%s' sets both the particle size and the size of the grid",shapename);
 		}
 	}
-#ifndef ADDA_SPARSE //granulation disabled in sparse mode
+#ifndef SPARSE //granulation disabled in sparse mode
 	// initialize domain granulation
 	if (sh_granul) {
 		symX=symY=symZ=symR=false; // no symmetry with granules
@@ -1961,13 +1969,13 @@ void InitShape(void)
 			PrintError("Particle (boxY,Z={%d,%d}) does not fit into specified boxY,Z={%d,%d}",
 				n_boxY,n_boxZ,boxY,boxZ);
 	}
-#ifndef ADDA_SPARSE //this check is not needed in sparse mode
+#ifndef SPARSE //this check is not needed in sparse mode
 	// initialize number of dipoles; first check that it fits into size_t type
 	double tmp=((double)boxX)*((double)boxY)*((double)boxZ);
 	if (tmp > SIZE_MAX) LogError(ONE_POS,"Total number of dipoles in the circumscribing "
 		"box (%.0f) is larger than supported by size_t type on this system (%zu). If possible, "
 		"please recompile ADDA in 64-bit mode.",tmp,SIZE_MAX);
-#endif //ADDA_SPARSE
+#endif // !SPARSE
 	Ndip=boxX*boxY*boxZ;
 	// initialize maxiter; not very realistic
 	if (maxiter==UNDEF) maxiter=MIN(INT_MAX,3*Ndip);
@@ -1991,11 +1999,10 @@ void MakeParticle(void)
 // creates a particle; initializes all dipoles counts, dpl, gridspace
 {
 	
-	size_t index;
+	size_t index,dip,i3;
 	TIME_TYPE tstart;
-#ifndef ADDA_SPARSE
 	int i;
-	size_t dip;
+#ifndef SPARSE
 	double jcX,jcY,jcZ; // center for jagged
 	size_t local_nRows_tmp;
 	int j,k,ns;	
@@ -2007,7 +2014,7 @@ void MakeParticle(void)
 	int mat;
 	unsigned short us_tmp;
 	TIME_TYPE tgran;
-#endif //ADDA_SPARSE
+#endif // !SPARSE
 
 	/* TO ADD NEW SHAPE
 	 * Add here all intermediate variables, which are used only inside this function. You may as
@@ -2020,7 +2027,7 @@ void MakeParticle(void)
 	cY=(boxY-1)/2.0;
 	cZ=(boxZ-1)/2.0;
 		
-#ifndef ADDA_SPARSE //shapes other than "read" are disabled in sparse mode
+#ifndef SPARSE //shapes other than "read" are disabled in sparse mode
 	index=0;	
 	// assumed that box's are even
 	jcX=jcY=jcZ=jagged/2.0;
@@ -2031,7 +2038,6 @@ void MakeParticle(void)
 	 * they will be reallocated afterwards (when local_nRows is known).
 	 */
 	MALLOC_VECTOR(material_tmp,uchar,local_Ndip,ALL);
-	MALLOC_VECTOR(DipoleCoord_tmp,double,local_nRows_tmp,ALL);
 	MALLOC_VECTOR(position_tmp,ushort,local_nRows_tmp,ALL);
 
 	for(k=local_z0;k<local_z1_coer;k++)
@@ -2195,49 +2201,35 @@ void MakeParticle(void)
 				 * only in this part of the code), either use 'tmp1'-'tmp3' or define your own (with
 				 * more informative names) in the beginning of this function.
 				 */
-
 				position_tmp[3*index]=(unsigned short)i;
 				position_tmp[3*index+1]=(unsigned short)j;
 				position_tmp[3*index+2]=(unsigned short)k;
 				// afterwards multiplied by gridspace
-				DipoleCoord_tmp[3*index]=i-cX;
-				DipoleCoord_tmp[3*index+1]=j-cY;
-				DipoleCoord_tmp[3*index+2]=k-cZ;
 				material_tmp[index]=(unsigned char)mat;
 				index++;
 			} // End box loop
-#endif //ADDA_SPARSE
-
-#ifdef ADDA_SPARSE
-	MALLOC_VECTOR(material_full,uchar,nvoid_Ndip,ALL);
-	MALLOC_VECTOR(position_full,int,nvoid_Ndip*3,ALL);
-	memory+=(sizeof(char)+sizeof(int)*3)*nvoid_Ndip;
-
-	//for sparse mode, nvoid_Ndip is defined in InitDipFile
-	//so we actually run SetupLocalD before reading the file
-	//(which requires local_nvoid_d0 and local_nvoid_d1 to be defined)
-	SetupLocalD();
+#else // SPARSE
+	// local_nvoid_d0 and local_nvoid_d1 are set earlier in ParSetup()
 	local_nvoid_Ndip=local_nvoid_d1-local_nvoid_d0;
-	local_Ndip = local_nvoid_Ndip;
-#endif
-
+	local_Ndip=local_nvoid_Ndip;
+	MALLOC_VECTOR(material,uchar,local_nvoid_Ndip,ALL);
+	MALLOC_VECTOR(position_full,int,nvoid_Ndip*3,ALL);
+	memory+=3*sizeof(int)*nvoid_Ndip+sizeof(char)*local_nvoid_Ndip;
+#endif // SPARSE
 	if (shape==SH_READ) ReadDipFile(shape_fname);
 	// initialization of mat_count and dipoles counts
-	
-#ifndef ADDA_SPARSE
 	for(i=0;i<=Nmat;i++) mat_count[i]=0;
+#ifdef SPARSE
+	for(dip=0;dip<local_Ndip;dip++) mat_count[material[dip]]++;
+	MyInnerProduct(mat_count,sizet_type,Nmat+1,NULL);
+#else
 	for(dip=0;dip<local_Ndip;dip++) mat_count[material_tmp[dip]]++;
 	local_nvoid_Ndip=local_Ndip-mat_count[Nmat];
 	SetupLocalD();
 	MyInnerProduct(mat_count,sizet_type,Nmat+1,NULL);
-	if ((nvoid_Ndip=Ndip-mat_count[Nmat])==0)
-		LogError(ONE_POS,"All dipoles of the scatterer are void");
-#endif //ADDA_SPARSE
-
-
-	
-	if (nvoid_Ndip==0)
-		LogError(ONE_POS,"All dipoles of the scatterer are void");
+	nvoid_Ndip=Ndip-mat_count[Nmat];
+#endif // !SPARSE
+	if (nvoid_Ndip==0) LogError(ONE_POS,"All dipoles of the scatterer are void");
 	local_nRows=3*local_nvoid_Ndip;
 	// initialize dpl and gridspace
 	volcor_used=(volcor && (volume_ratio!=UNDEF));
@@ -2268,7 +2260,7 @@ void MakeParticle(void)
 	ka_eq = WaveNum*a_eq;
 	inv_G = 1/(PI*a_eq*a_eq);
 	
-#ifndef ADDA_SPARSE
+#ifndef SPARSE
 	// granulate one domain, if needed
 	if (sh_granul) {
 		tgran=GET_TIME();
@@ -2296,61 +2288,50 @@ void MakeParticle(void)
 		mat_count[gr_mat]-=mat_count[Nmat-1];
 		Timing_Granul=GET_TIME()-tgran;
 	}
-#endif //ADDA_SPARSE
-	
 	/* allocate main particle arrays, using precise local_nRows even when prognosis is used to enable
 	 * save_geom afterwards.
 	 */
-MALLOC_VECTOR(material,uchar,local_nvoid_Ndip,ALL);
-MALLOC_VECTOR(DipoleCoord,double,local_nRows,ALL);
-#ifndef ADDA_SPARSE	
+	MALLOC_VECTOR(material,uchar,local_nvoid_Ndip,ALL);
 	MALLOC_VECTOR(position,ushort,local_nRows,ALL);
-	memory+=(3*(sizeof(short int)+sizeof(double))+sizeof(char))*local_nvoid_Ndip;
-#else
-	MALLOC_VECTOR(position,int,local_nRows,ALL);
-	memory+=(3*(sizeof(int)+sizeof(double))+sizeof(char))*local_nvoid_Ndip;
-	
-	memcpy(material, &(material_full[local_nvoid_d0]), local_nvoid_Ndip*sizeof(*material_full));
-	memcpy(position, &(position_full[3*local_nvoid_d0]), local_nRows*sizeof(*position_full));      
-
-	for (index=0; index<local_nvoid_Ndip; index++) {
-		DipoleCoord[3*index] = (position[3*index] - cX) * gridspace;
-		DipoleCoord[3*index+1] = (position[3*index+1] - cY) * gridspace;
-		DipoleCoord[3*index+2] = (position[3*index+2] - cZ) * gridspace;
-	}
-#endif
-
-#ifndef ADDA_SPARSE	
+	memory+=(3*sizeof(short int)+sizeof(char))*local_nvoid_Ndip;
 	// copy nontrivial part of arrays
 	index=0;
 	for (dip=0;dip<local_Ndip;dip++) if (material_tmp[dip]<Nmat) {
 		material[index]=material_tmp[dip];
-		// DipoleCoord=gridspace*DipoleCoord_tmp
-		vMultScal(gridspace,DipoleCoord_tmp+3*dip,DipoleCoord+3*index);
 		memcpy(position+3*index,position_tmp+3*dip,3*sizeof(short int));
 		index++;
 	}
 
 	// free temporary memory
 	Free_general(material_tmp);
-	Free_general(DipoleCoord_tmp);
 	Free_general(position_tmp);
 	if (shape==SH_AXISYMMETRIC) {
 		for (ns=0;ns<contNseg;ns++) FreeContourSegment(contSeg+ns);
 		Free_general(contSegRoMin);
 		Free_general(contSegRoMax);
 	}
-#endif
+#else
+	position=position_full + 3*local_nvoid_d0;
+#endif // SPARSE
+	// initialize DipoleCoord
+	MALLOC_VECTOR(DipoleCoord,double,local_nRows,ALL);
+	memory+=3*sizeof(double)*local_nvoid_Ndip;
+	for (index=0; index<local_nvoid_Ndip; index++) {
+		i3=3*index;
+		DipoleCoord[i3] = (position[i3]-cX)*gridspace;
+		DipoleCoord[i3+1] = (position[i3+1]-cY)*gridspace;
+		DipoleCoord[i3+2] = (position[i3+2]-cZ)*gridspace;
+	}
 
 	// save geometry
 	if (save_geom)
-#ifndef ADDA_SPARSE 
+#ifndef SPARSE 
 		SaveGeometry();
 #else
-		LogWarning(EC_WARN,ONE_POS,"Saving the geometry is not supported in sparse mode");
-#endif //ADDA_SPARSE
+		LogError(ONE_POS,"Saving the geometry is not supported in sparse mode");
+#endif // SPARSE
 
-#ifndef ADDA_SPARSE
+#ifndef SPARSE
 	/* adjust z-axis of position vector, to speed-up matrix-vector multiplication a little bit;
 	 * after this point 'position(z)' is taken relative to the local_z0.
 	 */
@@ -2360,19 +2341,18 @@ MALLOC_VECTOR(DipoleCoord,double,local_nRows,ALL);
 	}
 	local_Nz_unif=position[3*local_nvoid_Ndip-1]+1;
 	local_z0_unif=local_z0; // TODO: should be changed afterwards
-#endif
+#endif // !SPARSE
 
 	box_origin_unif[0]=-gridspace*cX;
 	box_origin_unif[1]=-gridspace*cY;
-#ifndef ADDA_SPARSE
+#ifndef SPARSE
 	box_origin_unif[2]=gridspace*(local_z0_unif-cZ);
 #else
 	box_origin_unif[2]=-gridspace*cZ;
-	
-	InitArraySync();
-	SyncPosition(position_full);
-	SyncMaterial(material_full);
-#endif //ADDA_SPARSE
+#	ifdef PARALLEL
+	AllGather(NULL,position_full,int3_type,NULL);
+#	endif
+#endif // SPARSE
 	
 	Timing_Particle += GET_TIME() - tstart;
 }
diff --git a/src/matvec.c b/src/matvec.c
index 39e48dc6..b5820ccd 100644
--- a/src/matvec.c
+++ b/src/matvec.c
@@ -3,7 +3,7 @@
  * Descr: calculate local matrix vector product of decomposed interaction matrix with r_k or p_k,
  *        using a FFT based convolution algorithm
  *
- * Copyright (C) 2006-2012 ADDA contributors
+ * Copyright (C) 2006-2013 ADDA contributors
  * This file is part of ADDA.
  *
  * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU
@@ -25,32 +25,32 @@
 #include "fft.h"
 #include "function.h"
 #include "io.h"
-#include "sparse_ops.h"
 #include "interaction.h"
 #include "linalg.h"
+#include "oclcore.h"
 #include "prec_time.h"
+#include "sparse_ops.h"
 #include "vars.h"
 // system headers
 #include <stdio.h>
 #include <string.h>
 
-#ifdef OPENCL
-#	include "oclcore.h"
-#endif
-
 // SEMI-GLOBAL VARIABLES
 
+#ifdef SPARSE
+// defined and initialized in calculator.c
+extern doublecomplex * restrict arg_full;
+#elif !defined(OPENCL)
 // defined and initialized in fft.c
-#ifndef OPENCL
 extern const doublecomplex * restrict Dmatrix;
 extern doublecomplex * restrict Xmatrix,* restrict slices,* restrict slices_tr;
-#endif
 extern const size_t DsizeY,DsizeZ,DsizeYZ;
+#endif // !SPARSE && !OPENCL
 
 // defined and initialized in timing.c
 extern size_t TotalMatVec;
 
-#ifndef ADDA_SPARSE
+#ifndef SPARSE
 #ifndef OPENCL // the following inline functions are not used in OpenCL or sparse mode
 
 //============================================================
@@ -102,11 +102,11 @@ INLINE size_t IndexDmatrix_mv(size_t x,size_t y,size_t z,const bool transposed)
 	return(NDCOMP*(x*DsizeYZ+z*DsizeY+y));
 }
 #endif // OPENCL
-#endif // ADDA_SPARSE
+#endif // SPARSE
 
 //============================================================
 
-#ifndef ADDA_SPARSE
+#ifndef SPARSE
 void MatVec (doublecomplex * restrict argvec,    // the argument vector
              doublecomplex * restrict resultvec, // the result vector
              double *inprod,         // the resulting inner product
@@ -474,11 +474,12 @@ void MatVec (doublecomplex * restrict argvec,    // the argument vector
 	TotalMatVec++;
 }
 
-#else //ADDA_SPARSE is defined
+#else // SPARSE is defined
+
+//============================================================
 
-/* The sparse MatVec is implemented completely separately from the non-sparse version.
- * Although there is some code duplication, this probably makes the both versions easier
- * to maintain.
+/* The sparse MatVec is implemented completely separately from the non-sparse version. Although there is some code
+ * duplication, this probably makes the both versions easier to maintain.
 */
 void MatVec (doublecomplex * restrict argvec,    // the argument vector
              doublecomplex * restrict resultvec, // the result vector
@@ -487,52 +488,33 @@ void MatVec (doublecomplex * restrict argvec,    // the argument vector
              TIME_TYPE *comm_timing) // this variable is incremented by communication time
 {	
 	const bool ipr = (inprod != NULL);
-
-	if (her) {
-		for (size_t j=0; j<local_nRows; j++) {
-			argvec[j][IM] = -argvec[j][IM];		 
-		}
-	}
-	
-	for (size_t j=0; j<local_nvoid_Ndip; j++) {
-		CcMul(argvec,arg_full+3*local_nvoid_d0,j);
-	}
-	SyncArgvec();	
-	
-	for (size_t i=0; i<local_nvoid_Ndip; i++) {
-		const unsigned int i3 = 3*i;		
+	size_t i,j,i3;
+
+	if (her) for (j=0; j<local_nRows; j++) argvec[j][IM] = -argvec[j][IM];
+	for (j=0; j<local_nvoid_Ndip; j++) CcMul(argvec,arg_full+3*local_nvoid_d0,j);
+#	ifdef PARALLEL
+	AllGather(NULL,arg_full,cmplx3_type,comm_timing);
+#	endif
+	for (i=0; i<local_nvoid_Ndip; i++) {
+		i3 = 3*i;
 		resultvec[i3][RE]=resultvec[i3][IM]=0.0;
 		resultvec[i3+1][RE]=resultvec[i3+1][IM]=0.0;
 		resultvec[i3+2][RE]=resultvec[i3+2][IM]=0.0;
-		for (size_t j=0; j<local_nvoid_d0+i; j++) {			
-			AijProd(arg_full, resultvec, i, j);
-		}		
-		for (size_t j=local_nvoid_d0+i+1; j<nvoid_Ndip; j++) {
-			AijProd(arg_full, resultvec, i, j);
-		}
+		for (j=0; j<local_nvoid_d0+i; j++) AijProd(arg_full,resultvec,i,j);
+		for (j=local_nvoid_d0+i+1; j<nvoid_Ndip; j++) AijProd(arg_full,resultvec,i,j);
 	}		
-	
-	for (size_t i=0; i<local_nvoid_Ndip; i++) {	
-		DiagProd(argvec, resultvec, i);		 
+	for (i=0; i<local_nvoid_Ndip; i++) DiagProd(argvec,resultvec,i);
+	if (her) for (i=0; i<local_nRows; i++) {
+		resultvec[i][IM] = -resultvec[i][IM];
+		argvec[i][IM] = -argvec[i][IM];
 	}
-	
-	if (her) {
-		for (size_t i=0; i<local_nRows; i++) {
-			resultvec[i][IM] = -resultvec[i][IM];
-			argvec[i][IM] = -argvec[i][IM];		
-		}
-	}	
-		
 	if (ipr) {
 		*inprod = 0.0;
-		for (size_t i=0; i<local_nRows; i++) {
-			*inprod += resultvec[i][RE]*resultvec[i][RE] + resultvec[i][IM]*resultvec[i][IM];		 
-		}		
+		for (i=0; i<local_nRows; i++)
+			*inprod += resultvec[i][RE]*resultvec[i][RE] + resultvec[i][IM]*resultvec[i][IM];
 		MyInnerProduct(inprod,double_type,1,comm_timing);
 	}
-	
 	TotalMatVec++;
-	
 }
 
-#endif //ADDA_SPARSE
+#endif // SPARSE
diff --git a/src/ocl/Makefile b/src/ocl/Makefile
index e3de38e2..b136f447 100644
--- a/src/ocl/Makefile
+++ b/src/ocl/Makefile
@@ -35,27 +35,31 @@
 #  LDFLAGS += -Wl,--enable-stdcall-fixup
 #endif
 
-ifneq ($(filter CLFFT_APPLE,$(OPTIONS)),)
-  ifeq ($(filter NO_CPP,$(OPTIONS)),)
-    CPPSOURCE += fft_execute.cpp fft_setup.cpp fft_kernelstring.cpp
-    CFLAGS    += -DCLFFT_APPLE
-  else
-    $(error Apple clFFT (CLFFT_APPLE) is implemented in C++, hence incompatible with NO_CPP)
-  endif
-else # AMD clFFT flags
-  # Depending on a particular clAmdFft installation one may need to manually specify paths to its headers and libraries.
-  # Path should be absolute or relative to the location of this Makefile.
-  #CFLAGS  += -I"C:\Program Files (x86)\AMD\clAmdFft\include"
-  #LDFLAGS += -L"C:\Program Files (x86)\AMD\clAmdFft\lib64\import"
+ifneq ($(filter SPARSE,$(OPTIONS)),)
+  $(error OPENCL version of sparse mode is not yet available)
+else # the following is only for non-sparse mode
+  ifneq ($(filter CLFFT_APPLE,$(OPTIONS)),)
+    ifeq ($(filter NO_CPP,$(OPTIONS)),)
+      CPPSOURCE += fft_execute.cpp fft_setup.cpp fft_kernelstring.cpp
+      CFLAGS    += -DCLFFT_APPLE
+    else
+      $(error Apple clFFT (CLFFT_APPLE) is implemented in C++, hence incompatible with NO_CPP)
+    endif
+  else # AMD clFFT flags
+    # Depending on a particular clAmdFft installation one may need to manually specify paths to its headers and 
+    # libraries. Path should be absolute or relative to the location of this Makefile.
+    #CFLAGS  += -I"C:\Program Files (x86)\AMD\clAmdFft\include"
+    #LDFLAGS += -L"C:\Program Files (x86)\AMD\clAmdFft\lib64\import"
   
-  LDFLAGS += -lclAmdFft.Runtime
+    LDFLAGS += -lclAmdFft.Runtime
   
-  # Uncomment the following line if you experience problems during linking, like
-  # "...libclAmdFft.Runtime.so: undefined reference to `clRetainContext@OPENCL_1.0'"
-  # This is caused by Nvidia OpenCL libraries on Linux (hopefully it will disappear in next releases of this library)
-  # The drawback of this workaround is that a warning would appear at each execution of adda_ocl. If you don't like it,
-  # provide another libOpenCL.so (e.g. that from AMD) for linkig.
-  #LDFLAGS +=-Wl,--unresolved-symbols=ignore-in-shared-libs
+    # Uncomment the following line if you experience problems during linking, like
+    # "...libclAmdFft.Runtime.so: undefined reference to `clRetainContext@OPENCL_1.0'"
+    # This is caused by Nvidia OpenCL libraries on Linux (hopefully it will disappear in next releases of this library)
+    # The drawback of this workaround is that a warning would appear at each execution of adda_ocl. If you don't like 
+    # it, provide another libOpenCL.so (e.g. that from AMD) for linkig.
+    #LDFLAGS +=-Wl,--unresolved-symbols=ignore-in-shared-libs
+  endif
 endif
 
 #===================================================================================================
diff --git a/src/oclcore.c b/src/oclcore.c
index af7bdd14..c7f4a001 100644
--- a/src/oclcore.c
+++ b/src/oclcore.c
@@ -2,7 +2,7 @@
  * $Date::                            $
  * Descr: core parts of OpenCL code
  *
- * Copyright (C) 2010-2012 ADDA contributors
+ * Copyright (C) 2010-2013 ADDA contributors
  * This file is part of ADDA.
  *
  * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU
@@ -73,9 +73,8 @@ struct string {
 // The kernel source is either loaded from oclkernels.cl at runtime or included at compile time
 //#define OCL_READ_SOURCE_RUNTIME
 
-// For some reason Eclipse points out a syntax error in the following block. Just ignore it.
 #ifndef OCL_READ_SOURCE_RUNTIME
-	const char stringifiedSourceCL[]=
+	const char stringifiedSourceCL[]=""
 	// the following is a pure string generated automatically from oclkernels.cl at compile time
 #	include "ocl/oclkernels.clstr"
 	;
diff --git a/src/oclcore.h b/src/oclcore.h
index 6f06a8d6..3749d65a 100644
--- a/src/oclcore.h
+++ b/src/oclcore.h
@@ -1,8 +1,8 @@
 /* File: oclcore.h
  * $Date::                            $
- * Descr: all common OpenCL variables and functions
+ * Descr: all common OpenCL variables and functions; void in non-OpenCL mode
  *
- * Copyright (C) 2010-2012 ADDA contributors
+ * Copyright (C) 2010-2013 ADDA contributors
  * This file is part of ADDA.
  *
  * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU
@@ -16,6 +16,7 @@
  * You should have received a copy of the GNU General Public License along with ADDA. If not, see
  * <http://www.gnu.org/licenses/>.
  */
+#ifdef OPENCL
 
 #ifndef __oclcore_h
 #define __oclcore_h
@@ -73,3 +74,5 @@ INLINE void CheckCLErr(const cl_int err,ERR_LOC_DECL,const char * restrict msg)
 }
 
 #endif // __oclcore_h
+
+#endif // OPENCL
diff --git a/src/param.c b/src/param.c
index eaf59905..b0bd394f 100644
--- a/src/param.c
+++ b/src/param.c
@@ -26,6 +26,7 @@
 #include "fft.h"
 #include "function.h"
 #include "io.h"
+#include "oclcore.h"
 #include "os.h"
 #include "parbas.h"
 #include "vars.h"
@@ -38,11 +39,8 @@
 #include <string.h>
 #include <time.h>
 
-#ifdef OPENCL
-#	include "oclcore.h"
-#	ifdef CLFFT_AMD
-#		include <clAmdFft.h> // for version information
-#	endif
+#ifdef CLFFT_AMD
+#	include <clAmdFft.h> // for version information
 #endif
 
 // definitions for file locking
@@ -80,10 +78,10 @@ extern const char beam_descr[];
 // defined and initialized in make_particle.c
 extern const bool volcor_used;
 extern const char *sh_form_str1,*sh_form_str2;
-#ifndef ADDA_SPARSE
+#ifndef SPARSE
 extern const int gr_N;
 extern const double gr_vf_real;
-#endif //ADDA_SPARSE
+#endif //SPARSE
 extern const size_t mat_count[];
 
 // used in CalculateE.c
@@ -110,15 +108,16 @@ const char *scat_grid_parms;      // name of file with parameters of scattering
 double prop_0[3];                 // initial incident direction (in laboratory reference frame)
 double incPolX_0[3],incPolY_0[3]; // initial incident polarizations (in lab RF)
 enum scat ScatRelation;           // type of formulae for scattering quantities
-// used in fft.c
-double igt_lim; // limit (threshold) for integration in IGT
-double igt_eps; // relative error of integration in IGT
+
 // used in GenerateB.c
 int beam_Npars;
 double beam_pars[MAX_N_BEAM_PARMS]; // beam parameters
 opt_index opt_beam;                 // option index of beam option used
 const char *beam_fnameY;            // names of files, defining the beam (for two polarizations)
 const char *beam_fnameX;
+// used in interaction.c
+double igt_lim; // limit (threshold) for integration in IGT
+double igt_eps; // relative error of integration in IGT
 // used in io.c
 char logfname[MAX_FNAME]=""; // name of logfile
 // used in iterative.c
@@ -237,6 +236,7 @@ static const struct subopt_struct beam_opt[]={
 	{NULL,NULL,NULL,0,0}
 };
 static const struct subopt_struct shape_opt[]={
+#ifndef SPARSE
 	{"axisymmetric","<filename>","Axisymmetric homogeneous shape, defined by its contour in "
 		"ro-z plane of the cylindrical coordinate system. Its symmetry axis coincides with the "
 		"z-axis, and the contour is read from file.",FNAME_ARG,SH_AXISYMMETRIC},
@@ -279,10 +279,13 @@ static const struct subopt_struct shape_opt[]={
 		"b, and diameter at the position of the maximum width c. The surface is described by "
 		"ro^4+2S*ro^2*z^2+z^4+P*ro^2+Q*z^2+R=0, ro^2=x^2+y^2, P,Q,R,S are determined by the "
 		"described parameters.",3,SH_RBC},
+#endif // !SPARSE
 	{"read","<filename>","Read a particle geometry from file <filename>",FNAME_ARG,SH_READ},
+#ifndef SPARSE
 	{"sphere","","Homogeneous sphere",0,SH_SPHERE},
 	{"spherebox","<d_sph/Dx>","Sphere (diameter d_sph) in a cube (size Dx, first domain)",1,
 		SH_SPHEREBOX},
+#endif // !SPARSE
 	/* TO ADD NEW SHAPE
 	 * add a row to this list in alphabetical order. It contains:
 	 * shape name (used in command line), usage string, help string, possible number of float
@@ -337,7 +340,9 @@ PARSE_FUNC(eq_rad);
 #ifdef OPENCL
 PARSE_FUNC(gpu);
 #endif
+#ifndef SPARSE
 PARSE_FUNC(granul);
+#endif
 PARSE_FUNC(grid);
 PARSE_FUNC(h) ATT_NORETURN;
 PARSE_FUNC(init_field);
@@ -357,17 +362,23 @@ PARSE_FUNC(pol);
 PARSE_FUNC(prognosis);
 PARSE_FUNC(prop);
 PARSE_FUNC(recalc_resid);
+#ifndef SPARSE
 PARSE_FUNC(save_geom);
+#endif
 PARSE_FUNC(scat);
 PARSE_FUNC(scat_grid_inp);
 PARSE_FUNC(scat_matr);
+#ifndef SPARSE
 PARSE_FUNC(sg_format);
+#endif
 PARSE_FUNC(shape);
 PARSE_FUNC(size);
 PARSE_FUNC(store_beam);
 PARSE_FUNC(store_dip_pol);
 PARSE_FUNC(store_force);
+#ifndef SPARSE
 PARSE_FUNC(store_grans);
+#endif
 PARSE_FUNC(store_int_field);
 PARSE_FUNC(store_scat_grid);
 PARSE_FUNC(sym);
@@ -422,11 +433,13 @@ static struct opt_struct options[]={
 		"only for OpenCL version of ADDA, running on a system with several GPUs.\n"
 		"Default: 0",1,NULL},
 #endif
+#ifndef SPARSE
 	{PAR(granul),"<vol_frac> <diam> [<dom_number>]","Specifies that one particle domain should be "
 		"randomly filled with spherical granules with specified diameter <diam> and volume "
 		"fraction <vol_frac>. Domain number to fill is given by the last optional argument. "
 		"Algorithm may fail for volume fractions > 30-50%.\n"
 		"Default <dom_number>: 1",UNDEF,NULL},
+#endif // !SPARSE
 	{PAR(grid),"<nx> [<ny> <nz>]","Sets dimensions of the computation grid. Arguments should be "
 		"even integers. In most cases <ny> and <nz> can be omitted (they are automatically "
 		"determined by <nx> based on the proportions of the scatterer). This command line option "
@@ -440,10 +453,15 @@ static struct opt_struct options[]={
 		"preceding dash). For some options (e.g. '-beam' or '-shape') specific help on a "
 		"particular suboption <subopt> may be shown.\n"
 		"Example: shape coated",UNDEF,NULL},
-	{PAR(init_field),"{auto|zero|inc|wkb}","Sets prescription to calculate initial (starting) "
-		"field for the iterative solver. 'zero' is a zero vector, 'inc' - equal to the incident "
-		"field, 'wkb' - from Wentzel-Kramers-Brillouin approximation, 'auto' - automatically "
-		"choose from 'zero' and 'inc' based on the lower residual value.\n"
+	{PAR(init_field),"{auto|inc|wkb|zero}",
+		"Sets prescription to calculate initial (starting) field for the iterative solver.\n"
+		"'auto' - automatically choose from 'zero' and 'inc' based on the lower residual value.\n"
+		"'inc' - equal to the incident field,\n"
+		"'wkb' - from Wentzel-Kramers-Brillouin approximation,\n"
+#ifdef SPARSE
+		"!!! 'wkb' is not operational in sparse mode\n"
+#endif
+		"'zero' is a zero vector,\n"
 		"Default: auto",1,NULL},
 	{PAR(int),"{fcd|fcd_st|igt [<lim> [<prec>]]|igt_so|poi|so}",
 		"Sets prescription to calculate the interaction term.\n"
@@ -459,6 +477,9 @@ static struct opt_struct options[]={
 		"'igt_so' - approximate evaluation of IGT using second order of kd approximation.\n"
 		"'poi' - (the simplest) interaction between point dipoles.\n"
 		"'so' - under development and incompatible with '-anisotr'.\n"
+#ifdef SPARSE
+		"!!! All options except 'poi' incur a significant slowing down in sparse mode.\n"
+#endif
 		"Default: poi",UNDEF,NULL},
 	{PAR(iter),"{bicg|bicgstab|cgnr|csym|qmr|qmr2}","Sets the iterative solver.\n"
 		"Default: qmr",1,NULL},
@@ -529,12 +550,14 @@ static struct opt_struct options[]={
 		"Normalization (to the unity vector) is performed automatically.\n"
 		"Default: 0 0 1",3,NULL},
 	{PAR(recalc_resid),"","Recalculate residual at the end of iterative solver.",0,NULL},
+#ifndef SPARSE
 	{PAR(save_geom),"[<filename>]","Save dipole configuration to a file <filename> (a path "
 		"relative to the output directory). Can be used with '-prognosis'.\n"
 		"Default: <type>.geom \n"
 		"(<type> is a first argument to the '-shape' option; '_gran' is added if '-granul' option "
 		"is used; file extension can differ depending on argument of '-sg_format' option).",
 		UNDEF,NULL},
+#endif // !SPARSE
 	{PAR(scat),"{dr|fin|igt_so|so}","Sets prescription to calculate scattering quantities.\n"
 		"'dr' - (by Draine) standard formulation for point dipoles\n"
 		"'fin' - slightly different one, based on a radiative correction for a finite dipole.\n"
@@ -548,12 +571,14 @@ static struct opt_struct options[]={
 		"amplitude) should be saved to file. Amplitude matrix is never integrated (in combination "
 		"with '-orient avg' or '-phi_integr').\n"
 		"Default: muel",1,NULL},
+#ifndef SPARSE
 	{PAR(sg_format),"{text|text_ext|ddscat6|ddscat7}","Specifies format for saving geometry files. "
 		"First two are ADDA default formats for single- and multi-domain particles respectively. "
 		"'text' is automatically changed to 'text_ext' for multi-domain particles. Two DDSCAT "
 		"formats correspond to its shape options 'FRMFIL' (version 6) and 'FROM_FILE' (version 7) "
 		"and output of 'calltarget' utility.\n"
 		"Default: text",1,NULL},
+#endif // !SPARSE
 		/* TO ADD NEW FORMAT OF SHAPE FILE
 		 * Modify string constants after 'PAR(sg_format)': add new argument to list {...} and
 		 * add its description to the next string.
@@ -572,7 +597,9 @@ static struct opt_struct options[]={
 	{PAR(store_dip_pol),"","Save dipole polarizations to a file",0,NULL},
 	{PAR(store_force),"","Calculate the radiation force on each dipole. Implies '-Cpr'",
 		0,NULL},
+#ifndef SPARSE
 	{PAR(store_grans),"","Save granule coordinates (placed by '-granul' option) to a file",0,NULL},
+#endif
 	{PAR(store_int_field),"","Save internal fields to a file",0,NULL},
 	{PAR(store_scat_grid),"","Calculate Mueller matrix for a grid of scattering angles and save it "
 		"to a file.",0,NULL},
@@ -722,7 +749,7 @@ INLINE void TestNarg(const int Narg,const int need)
 			NargError(Narg,buf);
 		}
 	} // otherwise special cases are considered, encoded by negative values
-	else if (need==UNDEF); // do nothing
+	else if (need==UNDEF) {} // do nothing
 	else if (need==FNAME_ARG) {
 		if (Narg!=1) NargError(Narg,"1");
 	}
@@ -901,6 +928,7 @@ PARSE_FUNC(beam)
 	int i,j,need;
 	bool found;
 
+	if (Narg==0) PrintErrorHelp("Suboption (argument) is missing for '-beam' option");
 	Narg--;
 	found=false;
 	i=-1;
@@ -997,6 +1025,7 @@ PARSE_FUNC(gpu)
 	TestNonNegative_i(gpuInd,"GPU index");
 }
 #endif
+#ifndef SPARSE
 PARSE_FUNC(granul)
 {
 	if (Narg!=2 && Narg!=3) NargError(Narg,"2 or 3");
@@ -1012,6 +1041,7 @@ PARSE_FUNC(granul)
 	gr_mat--; // converted to usual indexing starting from 0
 	sh_granul=true;
 }
+#endif
 PARSE_FUNC(grid)
 {
 	if (Narg!=1 && Narg!=3) NargError(Narg,"1 or 3");
@@ -1059,6 +1089,10 @@ PARSE_FUNC(h)
 						while (options[i].sub[++j].name!=NULL)
 							printf("  %s %s\n",options[i].sub[j].name,options[i].sub[j].usage);
 						printf("Type '%s -h %s <subopt>' for details\n",exename,options[i].name);
+#ifdef SPARSE
+						if (strcmp(options[i].name,"shape")==0)
+							printf("!!! Most of the shape options are disabled in sparse mode\n");
+#endif
 					}
 				}
 				found=true;
@@ -1071,6 +1105,9 @@ PARSE_FUNC(h)
 			       "Available options:\n",exename,exeusage);
 			for (i=0;i<LENGTH(options);i++) printf("  -%s %s\n",options[i].name,options[i].usage);
 			printf("Type '%s -h <opt>' for details\n",exename);
+#ifdef SPARSE
+			printf("!!! A number of options are disabled in sparse mode\n");
+#endif
 		}
 	}
 	// exit
@@ -1081,19 +1118,18 @@ PARSE_FUNC(init_field)
 	if (strcmp(argv[1],"auto")==0) InitField=IF_AUTO;
 	else if (strcmp(argv[1],"zero")==0) InitField=IF_ZERO;
 	else if (strcmp(argv[1],"inc")==0) InitField=IF_INC;
-	else if (strcmp(argv[1],"wkb")==0) InitField=IF_WKB;
+	else if (strcmp(argv[1],"wkb")==0)
+#ifdef SPARSE
+		PrintErrorHelp("Initial field 'wkb' is not supported in sparse mode");
+#else
+		InitField=IF_WKB;
+#endif
 	else NotSupported("Initial field prescription",argv[1]);
 }
 PARSE_FUNC(int)
 {
 	double tmp;
 	
-#ifdef ADDA_SPARSE //only point dipoles in sparse mode
-	if (strcmp(argv[1],"poi")!=0) {
-		NotSupported("Interaction term prescription (sparse mode)",argv[1]);
-	}
-#endif
-
 	if (Narg<1 || Narg>3) NargError(Narg,"from 1 to 3");
 	if (strcmp(argv[1],"fcd")==0) IntRelation=G_FCD;
 	else if (strcmp(argv[1],"fcd_st")==0) IntRelation=G_FCD_ST;
@@ -1254,12 +1290,14 @@ PARSE_FUNC(recalc_resid)
 {
 	recalc_resid=true;
 }
+#ifndef SPARSE
 PARSE_FUNC(save_geom)
 {
 	if (Narg>1) NargError(Narg,"0 or 1");
 	save_geom=true;
 	if (Narg==1) save_geom_fname=ScanStrError(argv[1],MAX_FNAME);
 }
+#endif
 PARSE_FUNC(scat)
 {
 	if (strcmp(argv[1],"dr")==0) ScatRelation=SQ_DRAINE;
@@ -1286,6 +1324,7 @@ PARSE_FUNC(scat_matr)
 	else if (strcmp(argv[1],"none")==0) store_mueller=store_ampl=false;
 	else NotSupported("Scattering matrix specifier",argv[1]);
 }
+#ifndef SPARSE
 PARSE_FUNC(sg_format)
 {
 	if (strcmp(argv[1],"text")==0) sg_format=SF_TEXT;
@@ -1303,11 +1342,13 @@ PARSE_FUNC(sg_format)
 	 */
 	else NotSupported("Geometry format",argv[1]);
 }
+#endif
 PARSE_FUNC(shape)
 {
 	int i,j,need;
 	bool found;
 
+	if (Narg==0) PrintErrorHelp("Suboption (argument) is missing for '-shape' option");
 	Narg--;
 	found=false;
 	i=-1;
@@ -1360,10 +1401,12 @@ PARSE_FUNC(store_force)
 	store_force = true;
 	calc_mat_force = true;
 }
+#ifndef SPARSE
 PARSE_FUNC(store_grans)
 {
 	store_grans=true;
 }
+#endif
 PARSE_FUNC(store_int_field)
 {
 	store_int_field=true;
@@ -1521,6 +1564,12 @@ PARSE_FUNC(V)
 #endif
 #ifdef CLFFT_APPLE
 		"CLFFT_APPLE, "
+#endif
+#ifdef SPARSE
+		"SPARSE, "
+#endif
+#ifdef USE_SSE3
+		"USE_SSE3, "
 #endif
 		"";
 		printf("Extra build options: ");
@@ -1857,6 +1906,9 @@ void VariablesInterconnect(void)
 	if (sizeX!=UNDEF && a_eq!=UNDEF) PrintError("'-size' and '-eq_rad' can not be used together");
 	if (calc_mat_force && beamtype!=B_PLANE)
 		PrintError("Currently radiation forces can not be calculated for non-plane incident wave");
+#ifdef SPARSE
+	if (shape==SH_SPHERE) PrintError("Sparse mode requires shape to be read from file (-shape read ...)");
+#endif
 	// scale boxes by jagged; should be completely robust to overflows
 #define JAGGED_BOX(a) if (a!=UNDEF) { \
 	if ((BOX_MAX/(size_t)jagged)<(size_t)a) \
@@ -2030,12 +2082,12 @@ void PrintInfo(void)
 		fprintf(logfile,"lambda: "GFORM"\n",lambda);
 		fprintf(logfile,"shape: ");
 		fprintf(logfile,"%s"GFORM"%s\n",sh_form_str1,sizeX,sh_form_str2);
-#ifndef ADDA_SPARSE
+#ifndef SPARSE
 		if (sh_granul) fprintf(logfile,
 			"  domain %d is filled with %d granules of diameter "GFORMDEF"\n"
 			"    volume fraction: specified - "GFORMDEF", actual - "GFORMDEF"\n",
 			gr_mat+1,gr_N,gr_d,gr_vf,gr_vf_real);
-#endif //ADDA_SPARSE
+#endif // SPARSE
 		fprintf(logfile,"box dimensions: %ix%ix%i\n",boxX,boxY,boxZ);
 		if (anisotropy) {
 			fprintf(logfile,"refractive index (diagonal elements of the tensor):\n");
@@ -2151,8 +2203,10 @@ void PrintInfo(void)
 		fprintf(logfile,"FFTW3\n");
 #elif defined(FFT_TEMPERTON)
 		fprintf(logfile,"by C.Temperton\n");
+#elif defined(SPARSE)
+		fprintf(logfile,"none (sparse mode)\n");
 #endif
-#ifdef OPENCL
+#if defined(OPENCL) && !defined(SPARSE)
 		fprintf(logfile,"OpenCL FFT algorithm: ");
 #	ifdef CLFFT_AMD
 		fprintf(logfile,"by AMD\n");
diff --git a/src/sparse_ops.h b/src/sparse_ops.h
index 26de2469..f9624580 100644
--- a/src/sparse_ops.h
+++ b/src/sparse_ops.h
@@ -1,186 +1,176 @@
 /* File: sparse_ops.h
- * Descr: low-level routines for the sparse mode ADDA
+ * Descr: low-level routines for the sparse mode ADDA; void in non-sparse mode
  *
- * Copyright (C) 2006-2012 ADDA contributors
+ * Copyright (C) 2011-2013 ADDA contributors
  * This file is part of ADDA.
  *
- * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU
- * General Public License as published by the Free Software Foundation, either version 3 of the
- * License, or (at your option) any later version.
+ * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
  *
- * ADDA is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
- * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
- * Public License for more details.
+ * ADDA is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License along with ADDA. If not, see
  * <http://www.gnu.org/licenses/>.
  */
+#ifdef SPARSE
 
-#ifndef __aijprod_h
-#define __aijprod_h
+#ifndef __sparse_ops_h
+#define __sparse_ops_h
 
 #include "cmplx.h"
 #include "interaction.h"
 #include "vars.h"
-   
-#ifdef ADDA_SPARSE
 
 #ifdef USE_SSE3
 
-inline static void CcMul(doublecomplex * restrict argvec_src, 
-                         doublecomplex * restrict argvec_dest, const int j)
-/*
-   Takes the j'th block in argvec_src, multiplies by cc_sqrt at that 
-   position and stores the result in argvec_dest.
-*/
+//=====================================================================================================================
+
+static inline void CcMul(doublecomplex * restrict argvec_src,doublecomplex * restrict argvec_dest, const size_t j)
+// Takes the j'th block in argvec_src, multiplies by cc_sqrt at that position and stores the result in argvec_dest.
 {
-   const int j3 = j*3;
-   *(__m128d *)&(argvec_dest[j3]) = cmul(*(__m128d *)&(argvec_src[j3]),
-                                         *(__m128d *)&(cc_sqrt[material[j]][0]));
-   *(__m128d *)&(argvec_dest[j3+1]) = cmul(*(__m128d *)&(argvec_src[j3+1]),
-                                           *(__m128d *)&(cc_sqrt[material[j]][1]));
-   *(__m128d *)&(argvec_dest[j3+2]) = cmul(*(__m128d *)&(argvec_src[j3+2]),
-                                           *(__m128d *)&(cc_sqrt[material[j]][2]));
+	const size_t j3=j*3;
+	*(__m128d *)&(argvec_dest[j3]) = cmul(*(__m128d *)&(argvec_src[j3]),
+		*(__m128d *)&(cc_sqrt[material[j]][0]));
+	*(__m128d *)&(argvec_dest[j3+1]) = cmul(*(__m128d *)&(argvec_src[j3+1]),
+		*(__m128d *)&(cc_sqrt[material[j]][1]));
+	*(__m128d *)&(argvec_dest[j3+2]) = cmul(*(__m128d *)&(argvec_src[j3+2]),
+		*(__m128d *)&(cc_sqrt[material[j]][2]));
 }
 
-inline static void AijProd(doublecomplex * restrict argvec, 
-                           doublecomplex * restrict resultvec,
-                           const int i, const int j)
-/*
-   Handles the multiplication of the i'th block of
-   argvec with the G_ij block of the G-matrix, and adds the result
-   to the j'th block of resultvec.
-*/
+//=====================================================================================================================
+
+static inline void AijProd(doublecomplex * restrict argvec,doublecomplex * restrict resultvec,const size_t i,
+	const size_t j)
+/* Handles the multiplication of the i'th block of argvec with the G_ij block of the G-matrix, and adds the result
+ * to the j'th block of resultvec.
+ */
 {	
-   doublecomplex iterm[6];
-	const unsigned int i3 = 3*i, j3 = 3*j;
-   
-   CalcInterTerm(position[i3]-position_full[j3], position[i3+1]-position_full[j3+1], 
-	              position[i3+2]-position_full[j3+2], iterm);
-
-   __m128d res, tmp;
- 
-   const __m128d argX = _mm_load_pd((double *)(argvec+j3));
-   const __m128d argY = _mm_load_pd((double *)(argvec+j3+1));
-   const __m128d argZ = _mm_load_pd((double *)(argvec+j3+2));
-   
-   res = cmul(argX, *(__m128d *)&(iterm[0]));
-   tmp = cmul(argY, *(__m128d *)&(iterm[1]));
-   res = cadd(tmp,res);
-   tmp = cmul(argZ, *(__m128d *)&(iterm[2]));
-   res = cadd(tmp,res);   
-   *(__m128d *)&(resultvec[i3]) = cadd(res, *(__m128d *)&(resultvec[i3]));
-   
-   res = cmul(argX, *(__m128d *)&(iterm[1]));
-   tmp = cmul(argY, *(__m128d *)&(iterm[3]));
-   res = cadd(tmp,res);
-   tmp = cmul(argZ, *(__m128d *)&(iterm[4]));
-   res = cadd(tmp,res);   
-   *(__m128d *)&(resultvec[i3+1]) = cadd(res, *(__m128d *)&(resultvec[i3+1]));
-   
-   res = cmul(argX, *(__m128d *)&(iterm[2]));
-   tmp = cmul(argY, *(__m128d *)&(iterm[4]));
-   res = cadd(tmp,res);
-   tmp = cmul(argZ, *(__m128d *)&(iterm[5]));
-   res = cadd(tmp,res);   
-   *(__m128d *)&(resultvec[i3+2]) = cadd(res, *(__m128d *)&(resultvec[i3+2]));
+	doublecomplex iterm[6];
+	const size_t i3 = 3*i, j3 = 3*j;
+
+	(*CalcInterTerm)(position[i3]-position_full[j3], position[i3+1]-position_full[j3+1],
+		position[i3+2]-position_full[j3+2], iterm);
+
+	__m128d res, tmp;
+
+	const __m128d argX = _mm_load_pd((double *)(argvec+j3));
+	const __m128d argY = _mm_load_pd((double *)(argvec+j3+1));
+	const __m128d argZ = _mm_load_pd((double *)(argvec+j3+2));
+
+	res = cmul(argX, *(__m128d *)&(iterm[0]));
+	tmp = cmul(argY, *(__m128d *)&(iterm[1]));
+	res = cadd(tmp,res);
+	tmp = cmul(argZ, *(__m128d *)&(iterm[2]));
+	res = cadd(tmp,res);
+	*(__m128d *)&(resultvec[i3]) = cadd(res, *(__m128d *)&(resultvec[i3]));
+
+	res = cmul(argX, *(__m128d *)&(iterm[1]));
+	tmp = cmul(argY, *(__m128d *)&(iterm[3]));
+	res = cadd(tmp,res);
+	tmp = cmul(argZ, *(__m128d *)&(iterm[4]));
+	res = cadd(tmp,res);
+	*(__m128d *)&(resultvec[i3+1]) = cadd(res, *(__m128d *)&(resultvec[i3+1]));
+
+	res = cmul(argX, *(__m128d *)&(iterm[2]));
+	tmp = cmul(argY, *(__m128d *)&(iterm[4]));
+	res = cadd(tmp,res);
+	tmp = cmul(argZ, *(__m128d *)&(iterm[5]));
+	res = cadd(tmp,res);
+	*(__m128d *)&(resultvec[i3+2]) = cadd(res, *(__m128d *)&(resultvec[i3+2]));
 }
 
-inline static void DiagProd(doublecomplex * restrict argvec, doublecomplex * restrict resultvec, 
-                            const int i) 
-/*
-   Multiplies the result in the i'th block of resultvec by cc_sqrt at that block,
-   subtracts the result from the i'th block of argvec, and stores the result in
-   the i'th block if resultvec. 
-*/
+//=====================================================================================================================
+
+static inline void DiagProd(doublecomplex * restrict argvec,doublecomplex * restrict resultvec,const size_t i)
+/* Multiplies the result in the i'th block of resultvec by cc_sqrt at that block, subtracts the result from the i'th
+ * block of argvec, and stores the result in the i'th block if resultvec.
+ */
 {
-   const int i3 = i*3; 
-   
-   const __m128d tmp1 = cmul(*(__m128d *)&(resultvec[i3]),*(__m128d *)&(cc_sqrt[material[i]][0]));
-   const __m128d tmp2 = cmul(*(__m128d *)&(resultvec[i3+1]),*(__m128d *)&(cc_sqrt[material[i]][1]));
-   const __m128d tmp3 = cmul(*(__m128d *)&(resultvec[i3+2]),*(__m128d *)&(cc_sqrt[material[i]][2]));
-   
-   *(__m128d *)&(resultvec[i3]) = _mm_sub_pd(*(__m128d *)&(argvec[i3]),tmp1);
-   *(__m128d *)&(resultvec[i3+1]) = _mm_sub_pd(*(__m128d *)&(argvec[i3+1]),tmp2);
-   *(__m128d *)&(resultvec[i3+2]) = _mm_sub_pd(*(__m128d *)&(argvec[i3+2]),tmp3);
+	const size_t i3 = i*3;
+
+	const __m128d tmp1 = cmul(*(__m128d *)&(resultvec[i3]),*(__m128d *)&(cc_sqrt[material[i]][0]));
+	const __m128d tmp2 = cmul(*(__m128d *)&(resultvec[i3+1]),*(__m128d *)&(cc_sqrt[material[i]][1]));
+	const __m128d tmp3 = cmul(*(__m128d *)&(resultvec[i3+2]),*(__m128d *)&(cc_sqrt[material[i]][2]));
+
+	*(__m128d *)&(resultvec[i3]) = _mm_sub_pd(*(__m128d *)&(argvec[i3]),tmp1);
+	*(__m128d *)&(resultvec[i3+1]) = _mm_sub_pd(*(__m128d *)&(argvec[i3+1]),tmp2);
+	*(__m128d *)&(resultvec[i3+2]) = _mm_sub_pd(*(__m128d *)&(argvec[i3+2]),tmp3);
 }
 
 #else //SSE3 not used
 
-inline static void CcMul(doublecomplex * restrict argvec_src, 
-                         doublecomplex * restrict argvec_dest, const int j)
-/*
-   Takes the j'th block in argvec_src, multiplies by cc_sqrt at that 
-   position and stores the result in argvec_dest.
-*/                         
+//=====================================================================================================================
+
+static inline void CcMul(doublecomplex * restrict argvec_src,doublecomplex * restrict argvec_dest, const size_t j)
+// Takes the j'th block in argvec_src, multiplies by cc_sqrt at that position and stores the result in argvec_dest.
 {
-   const int j3 = j*3;
-   cMult(argvec_src[j3],cc_sqrt[material_full[j]][0],argvec_dest[j3]);
-   cMult(argvec_src[j3+1],cc_sqrt[material_full[j]][1],argvec_dest[j3+1]);
-   cMult(argvec_src[j3+2],cc_sqrt[material_full[j]][2],argvec_dest[j3+2]);                                                               
+	const size_t j3 = j*3;
+	cMult(argvec_src[j3],cc_sqrt[material[j]][0],argvec_dest[j3]);
+	cMult(argvec_src[j3+1],cc_sqrt[material[j]][1],argvec_dest[j3+1]);
+	cMult(argvec_src[j3+2],cc_sqrt[material[j]][2],argvec_dest[j3+2]);
 }
 
-inline static void AijProd(doublecomplex * restrict argvec, 
-                           doublecomplex * restrict resultvec,
-                           const int i, const int j)
-/*
-   Handles the multiplication of the i'th block of
-   argvec with the G_ij block of the G-matrix, and adds the result
-   to the j'th block of resultvec.
-*/                           
-{	
-	static doublecomplex tmp1, resX, resY, resZ;
+//=====================================================================================================================
+
+static inline void AijProd(doublecomplex * restrict argvec,doublecomplex * restrict resultvec,const size_t i,
+	const size_t j)
+/* Handles the multiplication of the i'th block of argvec with the G_ij block of the G-matrix, and adds the result
+ * to the j'th block of resultvec.
+ */
+{
+	static doublecomplex tmp1,resX,resY,resZ;
 	static doublecomplex iterm[6];
-	const unsigned int i3 = 3*i, j3 = 3*j;
-	
-	//D("%d %d %d %d %d %d %d %d", i, j, position[3*i], position[3*i+1], position[3*i+2], position[3*j], position[3*j+1], position[3*j+2]);
-	CalcInterTerm(position[i3]-position_full[j3], position[i3+1]-position_full[j3+1], 
-	              position[i3+2]-position_full[j3+2], iterm);
-	
+	const size_t i3=3*i,j3=3*j;
+
+	//D("%d %d %d %d %d %d %d %d",i,j,position[3*i],position[3*i+1],position[3*i+2],
+	//	position[3*j],position[3*j+1],position[3*j+2]);
+	(*CalcInterTerm)(position[i3]-position_full[j3],position[i3+1]-position_full[j3+1],
+		position[i3+2]-position_full[j3+2],iterm);
+
 	cMult(argvec[j3],iterm[0],resX);
 	cMult(argvec[j3+1],iterm[1],tmp1);
 	cAdd(resX,tmp1,resX);
 	cMult(argvec[j3+2],iterm[2],tmp1);
 	cAdd(resX,tmp1,resX);
-	
+
 	cMult(argvec[j3],iterm[1],resY);
 	cMult(argvec[j3+1],iterm[3],tmp1);
 	cAdd(resY,tmp1,resY);
 	cMult(argvec[j3+2],iterm[4],tmp1);
 	cAdd(resY,tmp1,resY);
-	
+
 	cMult(argvec[j3],iterm[2],resZ);
 	cMult(argvec[j3+1],iterm[4],tmp1);
 	cAdd(resZ,tmp1,resZ);
 	cMult(argvec[j3+2],iterm[5],tmp1);
 	cAdd(resZ,tmp1,resZ);
-	
+
 	cAdd(resX,resultvec[i3],resultvec[i3]);
 	cAdd(resY,resultvec[i3+1],resultvec[i3+1]);
 	cAdd(resZ,resultvec[i3+2],resultvec[i3+2]);
 }
 
-inline static void DiagProd(doublecomplex * restrict argvec, doublecomplex * restrict resultvec, 
-                            const int i) 
-/*
-   Multiplies the result in the i'th block of resultvec by cc_sqrt at that block,
-   subtracts the result from the i'th block of argvec, and stores the result in
-   the i'th block if resultvec.
-*/
+//=====================================================================================================================
+
+static inline void DiagProd(doublecomplex * restrict argvec,doublecomplex * restrict resultvec,const size_t i)
+/* Multiplies the result in the i'th block of resultvec by cc_sqrt at that block, subtracts the result from the i'th
+ * block of argvec, and stores the result in the i'th block if resultvec.
+ */
 {
-   const int i3 = i*3;
-   static doublecomplex tmp1, tmp2, tmp3;
-   
-   cMult(resultvec[i3],cc_sqrt[material[i]][0],tmp1);
-   cMult(resultvec[i3+1],cc_sqrt[material[i]][1],tmp2);
-   cMult(resultvec[i3+2],cc_sqrt[material[i]][2],tmp3);
-   cSubtr(argvec[i3], tmp1, resultvec[i3]);
-   cSubtr(argvec[i3+1], tmp2, resultvec[i3+1]);
-   cSubtr(argvec[i3+2], tmp3, resultvec[i3+2]);
+	const size_t i3 = i*3;
+	static doublecomplex tmp1, tmp2, tmp3;
+
+	cMult(resultvec[i3],cc_sqrt[material[i]][0],tmp1);
+	cMult(resultvec[i3+1],cc_sqrt[material[i]][1],tmp2);
+	cMult(resultvec[i3+2],cc_sqrt[material[i]][2],tmp3);
+	cSubtr(argvec[i3], tmp1, resultvec[i3]);
+	cSubtr(argvec[i3+1], tmp2, resultvec[i3+1]);
+	cSubtr(argvec[i3+2], tmp3, resultvec[i3+2]);
 }
 
-#endif //USE_SSE3
+#endif // !USE_SSE3
 
-#endif //ADDA_SPARSE
+#endif // __sparse_ops_h
 
-#endif //__aijprod_h
+#endif // SPARSE
diff --git a/src/timing.c b/src/timing.c
index 6ff82f71..63722540 100644
--- a/src/timing.c
+++ b/src/timing.c
@@ -2,7 +2,7 @@
  * $Date::                            $
  * Descr: basic timing and statistics routines
  *
- * Copyright (C) 2006,2008-2012 ADDA contributors
+ * Copyright (C) 2006,2008-2013 ADDA contributors
  * This file is part of ADDA.
  *
  * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU
@@ -95,7 +95,7 @@ void InitTiming(void)
 	TotalIter=TotalMatVec=TotalEval=TotalEFieldPlane=0;
 	Timing_EField=Timing_FileIO=Timing_IntField=Timing_ScatQuan=Timing_Integration=0;
 	Timing_ScatQuanComm=Timing_InitDmComm=0;
-#ifdef ADDA_SPARSE
+#ifdef SPARSE
 	Timing_Dm_Init=Timing_Granul=Timing_FFT_Init=Timing_GranulComm=0;
 #endif	
 }
@@ -147,14 +147,16 @@ void FinalStatistics(void)
 				"    init OpenCL          "FFORMT"\n",TO_SEC(Timing_OCL_Init));
 
 #endif
+#ifndef SPARSE
 			fprintf(logfile,
 				"    init Dmatrix         "FFORMT"\n",TO_SEC(Timing_Dm_Init));
-#ifdef PARALLEL
+#	ifdef PARALLEL
 			fprintf(logfile,
 				"      communication:       "FFORMT"\n",TO_SEC(Timing_InitDmComm));
-#endif
+#	endif
 			fprintf(logfile,
 				"    FFT setup:           "FFORMT"\n",TO_SEC(Timing_FFT_Init));
+#endif // !SPARSE
 		}
 		fprintf(logfile,
 			"    make particle:       "FFORMT"\n",TO_SEC(Timing_Particle));
diff --git a/src/vars.c b/src/vars.c
index e027cd14..0ce81ddb 100644
--- a/src/vars.c
+++ b/src/vars.c
@@ -6,7 +6,7 @@
  *        source files are called 'semi-global' and not listed here. They are defined in one file
  *        and referenced with 'extern' in another one.
  *
- * Copyright (C) 2006-2012 ADDA contributors
+ * Copyright (C) 2006-2013 ADDA contributors
  * This file is part of ADDA.
  *
  * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU
@@ -135,7 +135,7 @@ TIME_TYPE Timing_EField,      // time for calculating scattered fields
           Timing_Integration, // time for all integrations (with precomputed values)
           tstart_main;        // starting time of the program (after MPI_Init in parallel)
           
-#ifndef ADDA_SPARSE //These variables are exclusive to the FFT mode
+#ifndef SPARSE //These variables are exclusive to the FFT mode
 
 unsigned short * restrict position; /* position of the dipoles; in the very end of make_particle()
                                      * z-components are adjusted to be relative to the local_z0
@@ -168,14 +168,9 @@ size_t local_x0,local_x1,local_Nx; /* starting, ending x for current processor a
 
 #else //These variables are exclusive to the sparse mode
 
-int * restrict position;   // no reason to restrict this to short in sparse mode
+int *position; // no reason to restrict this to short in sparse mode; actually it points to a part of position_full
 //in sparse mode, all coordinates must be available to each process
-int * restrict position_full;  
+int * restrict position_full;
 
-unsigned char * restrict material_full;
-doublecomplex * restrict arg_full;
-
-double local_f0, local_f1;
-
-#endif //ADDA_SPARSE
+#endif //SPARSE
 
diff --git a/src/vars.h b/src/vars.h
index 5ee8a82e..d6733419 100644
--- a/src/vars.h
+++ b/src/vars.h
@@ -6,7 +6,7 @@
  *        source files are called 'semi-global' and not listed here. They are defined in one file
  *        and referenced with 'extern' in another one.
  *
- * Copyright (C) 2006-2012 ADDA contributors
+ * Copyright (C) 2006-2013 ADDA contributors
  * This file is part of ADDA.
  *
  * ADDA is free software: you can redistribute it and/or modify it under the terms of the GNU
@@ -89,7 +89,7 @@ extern size_t local_Ndip,local_nvoid_Ndip,local_nRows,local_nvoid_d0,local_nvoid
 extern time_t wt_start,last_chp_wt;
 extern TIME_TYPE Timing_EField,Timing_FileIO,Timing_Integration,tstart_main;
 
-#ifndef ADDA_SPARSE //These variables are exclusive to the FFT mode
+#ifndef SPARSE //These variables are exclusive to the FFT mode
 
 extern unsigned short * restrict position;
 
@@ -105,14 +105,9 @@ extern size_t local_Nz,local_x0,local_x1,local_Nx;
 
 #else //These variables are exclusive to the sparse mode
 
-extern int * restrict position;
-extern double * restrict DipoleCoord_full;
+extern int *position;
 extern int * restrict position_full;
-extern unsigned char * restrict material_full;
-extern doublecomplex * restrict arg_full;
 
-extern double local_f0, local_f1;
-
-#endif //ADDA_SPARSE
+#endif //SPARSE
 
 #endif // __vars_h
diff --git a/tests/2exec/README b/tests/2exec/README
index 3525fc7d..5d002a5f 100644
--- a/tests/2exec/README
+++ b/tests/2exec/README
@@ -1,15 +1,13 @@
-Utils for automatic cross-testing different ADDA executables (by comparing the simulation results).
-The main part is a bash script 'comp2exec'. Under Windows one may use MSYS to run it. This script 
-contains comments in the head and throughout the file to explain its usage. In particular, one may
-specify the particular executables to cross test (a few typical pairs can be selected by command 
-line options to this script).
+Utils for automatic cross-testing different ADDA executables (by comparing the simulation results). The main part is a
+bash script 'comp2exec'. Under Windows one may use MSYS to run it. This script contains comments in the head and
+throughout the file to explain its usage. In particular, one may specify the particular executables to cross test (a few
+typical pairs can be selected by command line options to this script).
 
-The particular tests to perform are specified by file 'suite', by a list of command lines with 
-certain additions and abbreviations (see comments in the file). Currently, this file provides an
-extensive list of tests for testing future ADDA versions against a stable release. Some of this
-tests probably need to be removed for more specific tests, since they will produce errors. 
-A different file (instead of 'suite') can also be used, if specified by command line option to
+The particular tests to perform are specified by file 'suite', by a list of command lines with certain additions and
+abbreviations (see comments in the file). Currently, this file provides an extensive list of tests for testing future
+ADDA versions against a stable release. Some of this tests probably need to be removed for more specific tests, since
+they will produce errors. A different file (instead of 'suite') can also be used, if specified by command line option to
 'comp2exec'.
 
-Another important part is script 'diff_numeric.awk', which compares two files with given numerical
-tolerance. It may also need modification to fit a specific application.
\ No newline at end of file
+Another important part is script 'diff_numeric.awk', which compares two files with given numerical tolerance. It may
+also need modification to fit a specific application.
diff --git a/tests/2exec/comp2exec b/tests/2exec/comp2exec
index 8edfebb2..2e4cd985 100755
--- a/tests/2exec/comp2exec
+++ b/tests/2exec/comp2exec
@@ -1,16 +1,15 @@
 #!/bin/bash
-# First parameter is seq (default), mpi, mpi_seq, ocl, or ocl_seq. 
-# One-word mode compares corresponding current version with previous stable version (e.g. 1.0)
-# - some differences are always expected.
-# Two-word mode compares two current versions corresponding to different modes (e.g. mpi vs. seq)
-# - only minor differences are generally expected.
+# First parameter is seq (default), mpi, mpi_seq, ocl, or ocl_seq.
+# One-word mode compares corresponding current version with previous stable version (e.g. 1.0) - some differences are
+# always expected. Two-word mode compares two current versions corresponding to different modes (e.g. mpi vs. seq) -
+# only minor differences are generally expected.
 # Second (if given) specifies file with a test suite (default - "suite").
-# Third (if given) specifies bash pattern (best if quoted). Tests are started only from the command 
-# line that matches it. (convenient to restart the test suite from a break point)
+# Third (if given) specifies bash pattern (best if quoted). Tests are started only from the command line that matches
+# it. (convenient to restart the test suite from a break point)
 #
 # Look below for "!!!", which mark the places where adjustments are probably need to be done
 
-#---------------- Set parameters and define internal functions ---------------------------------
+#---------------- Set parameters and define internal functions ---------------------------------------------------------
 
 # Location of sample input files (not needed if all input files are already present)
 INPUTDIR="./../../input"
@@ -19,25 +18,53 @@ ADDASEQ="./../../src/seq/adda"
 ADDAMPI="./../../src/mpi/adda_mpi"
 ADDAOCL="./../../src/ocl/adda_ocl"
 # MPI command prefix
-MPIRUN="mpiexec -n 2"
+MPIRUN="mpiexec -n 4"
+
+# Set the following flag to ignore differences related to different FFT methods, such as FFT grid sizes and memory.
+# Also useful for comparison of sparse version with the standard one
+#FFTCOMP=1
+
+# SPARSE indicates that sparse mode is compared with sparse. SPARSE_STANDARD - that sparse is compared with standard
+# (automatically implies FFTCOMP above). Use at most one of them
+#SPARSE=1
+#SPARSE_STANDARD=1
+
+if [ -n "$SPARSE_STANDARD" ]; then
+  FFTCOMP=1
+fi
+
+if [ -n "$SPARSE" ]; then
+  DEFSUITE=suite_sparse
+  SEQREF="./adda_sparse" # !!! This should be adjusted
+  MPIREF="./adda_mpi_sparse" # !!! This should be adjusted
+  OCLREF="./adda_ocl_sparse" # !!! This should be adjusted
+else
+  if [ -n "$SPARSE_STANDARD" ]; then
+    DEFSUITE=suite_sparse
+  else
+    DEFSUITE=suite
+  fi
+  SEQREF="./adda_1.1" # !!! This should be adjusted
+  MPIREF="./adda_mpi_1.1.exe" # !!! This should be adjusted
+  OCLREF="./adda_ocl_1.1" # !!! This should be adjusted
+fi
 
 MODE=${1:-seq}
-DEFSUITE=suite
 SUITEFILE=${2:-$DEFSUITE}
 
 if [ $MODE == "seq" ]; then
-  EXECREF="./adda"  # !!! This should be adjusted
+  EXECREF=$SEQREF
   EXECTEST=$ADDASEQ
   IGERRREF=1
 elif [ $MODE == "mpi" ]; then
-  EXECREF="$MPIRUN ./adda_mpi" # !!! This should be adjusted
+  EXECREF="$MPIRUN $MPIREF"
   EXECTEST="$MPIRUN $ADDAMPI"
   IGERRREF=1
 elif [ $MODE == "mpi_seq" ]; then
   EXECREF=$ADDASEQ
   EXECTEST="$MPIRUN $ADDAMPI"
 elif [ $MODE == "ocl" ]; then
-  EXECREF="./adda_ocl_amd" # !!! This should be adjusted
+  EXECREF=$OCLREF
   EXECTEST=$ADDAOCL
   IGERRREF=1
 elif [ $MODE == "ocl_seq" ]; then
@@ -48,12 +75,8 @@ else
   exit 1
 fi
 
-# Set the following flag to ignore differences related to different FFT methods, such as FFT grid sizes and memory
-#FFTCOMP=1
-
-# Whether errors in running reference version of ADDA should be ignored. Useful for comparing with 
-# older versions, which lack all the tested functionality. This variable is set above for some 
-# modes.
+# Whether errors in running reference version of ADDA should be ignored. Useful for comparing with older versions, which
+# lack all the tested functionality. This variable is set above for some modes.
 # !!! It can also be explicitly set here.
 #IGERRREF=1
 
@@ -67,8 +90,8 @@ TMPREF=ref.tmp # temporary files for text processing
 TMPTEST=test.tmp
 
 function cleanfile {
-  # Processes file $1 and stores the result in $2. Basic step is removing lines matching regexp $3.
-  # Additional optional step is cutting the end of the file starting from line matching regexp $4.
+  # Processes file $1 and stores the result in $2. Basic step is removing lines matching regexp $3. Additional optional
+  # step is cutting the end of the file starting from line matching regexp $4.
   if [ -n "$4" ]; then
     awk "BEGIN{p=1} /$4/{p=0} p" $1 | grep -v -E -e "$3" > $2
   else
@@ -76,21 +99,20 @@ function cleanfile {
   fi
 }
 function numdiff {
-  # Performs comparison of two files, ignoring differences in numerical values smaller than 
-  # absolute and relative tolerances, specified by variables atol and rtol (-log of real value).
-  # The main logic is inside awk script 'diff_numeric'.
+  # Performs comparison of two files, ignoring differences in numerical values smaller than absolute and relative
+  # tolerances, specified by variables atol and rtol (-log of real value). The main logic is inside awk script
+  # 'diff_numeric'.
   diff $1 $2 | awk -f diff_numeric.awk -v abs_tol=1e-$atol -v rel_tol=1e-$rtol -  >&2
 }
 function numigndiff {
-  # Same as numdiff, but additionally ignores certain differences in input files - see description
-  # of function cleanfile.
+  # Same as numdiff, but additionally ignores certain differences in input files - see description of function cleanfile
   cleanfile $1 $TMPREF "$3" "$4"
   cleanfile $2 $TMPTEST "$3" "$4"
   diff $TMPREF $TMPTEST | awk -f diff_numeric.awk -v abs_tol=1e-$atol -v rel_tol=1e-$rtol -  >&2
 }
 function igndiff {
-  # Performs comparison of two files, ignoring differences in lines matching regexp $3 and all the
-  # differences after the line matching regexp $4.
+  # Performs comparison of two files, ignoring differences in lines matching regexp $3 and all the differences after the
+  # line matching regexp $4.
   cleanfile $1 $TMPREF "$3" "$4"
   cleanfile $2 $TMPTEST "$3" "$4"
   diff $TMPREF $TMPTEST >&2
@@ -102,10 +124,9 @@ function asmin {
   fi
 }
 function mycmp {
-  # Determines which differences are considered significant, depending on files. Use of $ at the
-  # end of ignore patterns seems to be not portable, since it does not work on Windows because of
-  # different EOL style. So we skip it for now.
-  
+  # Determines which differences are considered significant, depending on files. Use of $ at the end of ignore patterns
+  # seems to be not portable, since it does not work on Windows because of different EOL style. So we skip it for now.
+
   # First, set default numerical accuracy or adjust it based on cmdline
   if [[ "$cmdline" == -granul\ * ]]; then
     asmin atol 1
@@ -124,10 +145,15 @@ function mycmp {
     if [ $MODE == "mpi_seq" ]; then
       IGNORE="$IGNORE|^(M|Total m|Maximum m|Additional m)emory usage"
     elif [ $MODE == "ocl_seq" ]; then
-      IGNORE="$IGNORE|^Using OpenCL device|^Device memory|^Searching for OpenCL devices|^Initializing (clFFT|FFTW3)|^(M|Total m|OpenCL m)emory usage"  
+      # double definition to wrap line
+	  IGNORE="$IGNORE|^Using OpenCL device|^Device memory|^Searching for OpenCL devices|^Initializing (clFFT|FFTW3)"
+      IGNORE="$IGNORE|^(M|Total m|OpenCL m)emory usage"
     fi
     if [ -n "$FFTCOMP" ]; then
-      IGNORE="$IGNORE|^(M|Total m|OpenCL m)emory usage"
+      IGNORE="$IGNORE|^(M|Total m|OpenCL m|Maximum m)emory usage"
+    fi
+    if [ -n "$SPARSE_STANDARD" ]; then
+      IGNORE="$IGNORE|^Calculating Green's function|^Fourier transform of Dmatrix|^Initializing (clFFT|FFTW3)"
     fi
     if [[ $MODE == "mpi" || $MODE == "mpi_seq" ]]; then
       CUT="^Error posting writev, " # due to typical random errors of MPICH under Windows
@@ -148,10 +174,10 @@ function mycmp {
     if [ $MODE == "mpi_seq" ]; then
       IGNORE="$IGNORE|^The program was run on: |^(M|Total m|Maximum m|Additional m)emory usage"
     elif [ $MODE == "ocl_seq" ]; then
-      IGNORE="$IGNORE|^Using OpenCL device|^Device memory|^OpenCL FFT algorithm: by |^(M|Total m|OpenCL m)emory usage"  
+      IGNORE="$IGNORE|^Using OpenCL device|^Device memory|^OpenCL FFT algorithm: |^(M|Total m|OpenCL m)emory usage"
     fi
     if [ -n "$FFTCOMP" ]; then
-      IGNORE="$IGNORE|^(|OpenCL )FFT algorithm: by |The FFT grid is: [0-9]+x[0-9]+x[0-9]+|^(M|Total m|OpenCL m)emory usage"
+      IGNORE="$IGNORE|^(|OpenCL )FFT algorithm: |^The FFT grid is: |^(M|Total m|OpenCL m|Maximum m)emory usage"
     fi
     CUT="^Total wall time: "
     asmin rtol 4
@@ -163,15 +189,15 @@ function mycmp {
     asmin rtol 5
     numdiff $1 $2
   elif [[ "$base" == log_int_* || "$base" == "log_orient_avg" ]]; then
-    asmin atol 13
+    asmin atol 12
     numdiff $1 $2
   elif [[ "$base" == "granules" ]]; then #compare only some comments and total number of lines
     if [ `wc -l < $1` == `wc -l < $2` ]; then
       igndiff $1 $2 "^([^#]|#generated by ADDA v\.)"
-    else 
+    else
       echo "Different number of granules"
       return 1
-    fi  
+    fi
   elif [[ "$base" == *.geom || "$base" == *.dat ]]; then
     igndiff $1 $2 "generated by ADDA v\."
   else
@@ -183,17 +209,16 @@ function mydiff {
   if !(mycmp $1 $2); then
     echo "!!! Difference between files '$1' and '$2'" >&2
     # !!! This should be adjusted
-    # It is recommended to put here a GUI diff program, which allows quick estimate of the 
-    # importance of differences (e.g. when there are differences in minor digits of many numbers).
-    # However, most effort should be put in improving mycmp, so that calling the function below 
-    # will indicate significant error by itself. The following line can be commented out at all, 
-    # since function mycmp will produce (significant) diff of compared files to stderr.
+    # It is recommended to put here a GUI diff program, which allows quick estimate of the importance of differences
+    # (e.g. when there are differences in minor digits of many numbers). However, most effort should be put in improving
+    # mycmp, so that calling the function below will indicate significant error by itself. The following line can be
+    # also commented out, since function mycmp will produce (significant) diff of compared files to stderr.
     # Options include, for example: tortoisemerge, meld, vimdiff
     tortoisemerge $1 $2
   fi
 }
 
-#---------------- Prepare input files -----------------------------------------
+#---------------- Prepare input files ----------------------------------------------------------------------------------
 
 NEEDEDFILES="scat_params.dat avg_params.dat alldir_params.dat"
 NEEDEDDIRS="tables"
@@ -210,16 +235,16 @@ for dir in $NEEDEDDIRS; do
   fi
 done
 
-#---------------- Run comparison ----------------------------------------------
+#---------------- Run comparison ---------------------------------------------------------------------------------------
 
 # This in combination with stdin redirection to ADDA calls below redirects current stdin directly
 # to ADDA call. While the stdin of the code block (while...done) is kept intact. This is especially
-# relevant to mpi version, since then ADDA is executed with mpiexec, which creates forks and a big 
+# relevant to mpi version, since then ADDA is executed with mpiexec, which creates forks and a big
 # mess with stdin.
 exec 3<&0
 imax=-1
 # initialize skipping of lines up to pattern $3
-if [ -n "$3" ]; then 
+if [ -n "$3" ]; then
   skip=1
 else
   skip=0
@@ -249,7 +274,7 @@ while read -r cmpfiles cmdline; do
         echo -e "\nERROR while running \n$runref\nsee $SOREF" >&2
         exit 1
       fi
-    else 
+    else
       refok=1
     fi
     # test run
diff --git a/tests/2exec/diff_numeric.awk b/tests/2exec/diff_numeric.awk
index 9416a104..3c9f9b87 100644
--- a/tests/2exec/diff_numeric.awk
+++ b/tests/2exec/diff_numeric.awk
@@ -1,8 +1,6 @@
-# Processes the diff of two files and ignores the small errors in numbers -
-# either small absolute or relative errors - controlled by 'abs_tol' and 'rel_tol'.
-# These values can be either specified outside by e.g. '-v abs_tol=1e-10' or assume
-# default values specified below.
-# Numbers should be separated from other text by spaces, '=', ',', or brackets -
+# Processes the diff of two files and ignores the small errors in numbers - either small absolute or relative errors -
+# controlled by 'abs_tol' and 'rel_tol'. These values can be either specified outside by e.g. '-v abs_tol=1e-10' or
+# assume default values specified below. Numbers should be separated from other text by spaces, '=', ',', or brackets -
 # controlled by 'FS'. Output happens only if differences are found.
 
 BEGIN {
@@ -23,15 +21,6 @@ function relerr(a,b,  c) {
 	return (c==0) ? 0 : abs(a-b)/c
 }
 
-/^[0-9]/ {
-	if (i1>i2) {
-		printf "First file contains unmatched line:\n%s\n", ref[i2+1]
-		exit 3
-	}
-	i1=0
-	i2=0
-}
-
 /^< / {
 	i1++
 	ref[i1]=substr($0,3)
@@ -55,9 +44,26 @@ function relerr(a,b,  c) {
 				exit 2
 			}
 			else if (abs(p1[j]-p2[j])>abs_tol && relerr(p1[j],p2[j])>rel_tol) {
+				# wrapping the following line produces some errors on Windows, so keep it like this
 				printf "Significant difference between numbers: '%s' and '%s' (abs_tol='%s', rel_tol='%s')\n", p1[j], p2[j], abs_tol, rel_tol
 				exit 1
 			}
 		}
 	}
+}
+
+/^[0-9]/ {
+	if (i1>i2) {
+		printf "First file contains unmatched line:\n%s\n", ref[i2+1]
+		exit 3
+	}
+	i1=0
+	i2=0
+}
+
+END {
+	if (i1>i2) {
+		printf "First file contains unmatched line:\n%s\n", ref[i2+1]
+		exit 3
+	}
 }
\ No newline at end of file
diff --git a/tests/2exec/ellipsoid.geom b/tests/2exec/ellipsoid.geom
index d8d91072..98c0807b 100644
--- a/tests/2exec/ellipsoid.geom
+++ b/tests/2exec/ellipsoid.geom
@@ -1,283 +1,203 @@
-#generated by ADDA v.1.0a3
+#generated by ADDA v.1.1
 #shape: 'ellipsoid'
-#box size: 16x8x4
-6 1 0
-7 1 0
-8 1 0
-9 1 0
+#box size: 8x4x12
+3 1 0
+4 1 0
+3 2 0
 4 2 0
-5 2 0
-6 2 0
-7 2 0
-8 2 0
-9 2 0
-10 2 0
-11 2 0
-3 3 0
-4 3 0
-5 3 0
-6 3 0
-7 3 0
-8 3 0
-9 3 0
-10 3 0
-11 3 0
-12 3 0
-3 4 0
-4 4 0
-5 4 0
-6 4 0
-7 4 0
-8 4 0
-9 4 0
-10 4 0
-11 4 0
-12 4 0
-4 5 0
-5 5 0
-6 5 0
-7 5 0
-8 5 0
-9 5 0
-10 5 0
-11 5 0
-6 6 0
-7 6 0
-8 6 0
-9 6 0
-5 0 1
-6 0 1
-7 0 1
-8 0 1
-9 0 1
-10 0 1
 2 1 1
 3 1 1
 4 1 1
 5 1 1
-6 1 1
-7 1 1
-8 1 1
-9 1 1
-10 1 1
-11 1 1
-12 1 1
-13 1 1
-1 2 1
 2 2 1
 3 2 1
 4 2 1
 5 2 1
-6 2 1
-7 2 1
-8 2 1
-9 2 1
-10 2 1
-11 2 1
-12 2 1
-13 2 1
-14 2 1
-0 3 1
-1 3 1
-2 3 1
-3 3 1
-4 3 1
-5 3 1
-6 3 1
-7 3 1
-8 3 1
-9 3 1
-10 3 1
-11 3 1
-12 3 1
-13 3 1
-14 3 1
-15 3 1
-0 4 1
-1 4 1
-2 4 1
-3 4 1
-4 4 1
-5 4 1
-6 4 1
-7 4 1
-8 4 1
-9 4 1
-10 4 1
-11 4 1
-12 4 1
-13 4 1
-14 4 1
-15 4 1
-1 5 1
-2 5 1
-3 5 1
-4 5 1
-5 5 1
-6 5 1
-7 5 1
-8 5 1
-9 5 1
-10 5 1
-11 5 1
-12 5 1
-13 5 1
-14 5 1
-2 6 1
-3 6 1
-4 6 1
-5 6 1
-6 6 1
-7 6 1
-8 6 1
-9 6 1
-10 6 1
-11 6 1
-12 6 1
-13 6 1
-5 7 1
-6 7 1
-7 7 1
-8 7 1
-9 7 1
-10 7 1
-5 0 2
-6 0 2
-7 0 2
-8 0 2
-9 0 2
-10 0 2
+3 0 2
+4 0 2
+1 1 2
 2 1 2
 3 1 2
 4 1 2
 5 1 2
 6 1 2
-7 1 2
-8 1 2
-9 1 2
-10 1 2
-11 1 2
-12 1 2
-13 1 2
 1 2 2
 2 2 2
 3 2 2
 4 2 2
 5 2 2
 6 2 2
-7 2 2
-8 2 2
-9 2 2
-10 2 2
-11 2 2
-12 2 2
-13 2 2
-14 2 2
-0 3 2
-1 3 2
-2 3 2
 3 3 2
 4 3 2
-5 3 2
-6 3 2
-7 3 2
-8 3 2
-9 3 2
-10 3 2
-11 3 2
-12 3 2
-13 3 2
-14 3 2
-15 3 2
-0 4 2
-1 4 2
-2 4 2
-3 4 2
-4 4 2
-5 4 2
-6 4 2
-7 4 2
-8 4 2
-9 4 2
-10 4 2
-11 4 2
-12 4 2
-13 4 2
-14 4 2
-15 4 2
-1 5 2
-2 5 2
-3 5 2
-4 5 2
-5 5 2
-6 5 2
-7 5 2
-8 5 2
-9 5 2
-10 5 2
-11 5 2
-12 5 2
-13 5 2
-14 5 2
-2 6 2
-3 6 2
-4 6 2
-5 6 2
-6 6 2
-7 6 2
-8 6 2
-9 6 2
-10 6 2
-11 6 2
-12 6 2
-13 6 2
-5 7 2
-6 7 2
-7 7 2
-8 7 2
-9 7 2
-10 7 2
+2 0 3
+3 0 3
+4 0 3
+5 0 3
+1 1 3
+2 1 3
+3 1 3
+4 1 3
+5 1 3
 6 1 3
-7 1 3
-8 1 3
-9 1 3
+1 2 3
+2 2 3
+3 2 3
 4 2 3
 5 2 3
 6 2 3
-7 2 3
-8 2 3
-9 2 3
-10 2 3
-11 2 3
+2 3 3
 3 3 3
 4 3 3
 5 3 3
-6 3 3
-7 3 3
-8 3 3
-9 3 3
-10 3 3
-11 3 3
-12 3 3
-3 4 3
-4 4 3
-5 4 3
-6 4 3
-7 4 3
-8 4 3
-9 4 3
-10 4 3
-11 4 3
-12 4 3
-4 5 3
-5 5 3
-6 5 3
-7 5 3
-8 5 3
-9 5 3
-10 5 3
-11 5 3
-6 6 3
-7 6 3
-8 6 3
-9 6 3
+2 0 4
+3 0 4
+4 0 4
+5 0 4
+0 1 4
+1 1 4
+2 1 4
+3 1 4
+4 1 4
+5 1 4
+6 1 4
+7 1 4
+0 2 4
+1 2 4
+2 2 4
+3 2 4
+4 2 4
+5 2 4
+6 2 4
+7 2 4
+2 3 4
+3 3 4
+4 3 4
+5 3 4
+1 0 5
+2 0 5
+3 0 5
+4 0 5
+5 0 5
+6 0 5
+0 1 5
+1 1 5
+2 1 5
+3 1 5
+4 1 5
+5 1 5
+6 1 5
+7 1 5
+0 2 5
+1 2 5
+2 2 5
+3 2 5
+4 2 5
+5 2 5
+6 2 5
+7 2 5
+1 3 5
+2 3 5
+3 3 5
+4 3 5
+5 3 5
+6 3 5
+1 0 6
+2 0 6
+3 0 6
+4 0 6
+5 0 6
+6 0 6
+0 1 6
+1 1 6
+2 1 6
+3 1 6
+4 1 6
+5 1 6
+6 1 6
+7 1 6
+0 2 6
+1 2 6
+2 2 6
+3 2 6
+4 2 6
+5 2 6
+6 2 6
+7 2 6
+1 3 6
+2 3 6
+3 3 6
+4 3 6
+5 3 6
+6 3 6
+2 0 7
+3 0 7
+4 0 7
+5 0 7
+0 1 7
+1 1 7
+2 1 7
+3 1 7
+4 1 7
+5 1 7
+6 1 7
+7 1 7
+0 2 7
+1 2 7
+2 2 7
+3 2 7
+4 2 7
+5 2 7
+6 2 7
+7 2 7
+2 3 7
+3 3 7
+4 3 7
+5 3 7
+2 0 8
+3 0 8
+4 0 8
+5 0 8
+1 1 8
+2 1 8
+3 1 8
+4 1 8
+5 1 8
+6 1 8
+1 2 8
+2 2 8
+3 2 8
+4 2 8
+5 2 8
+6 2 8
+2 3 8
+3 3 8
+4 3 8
+5 3 8
+3 0 9
+4 0 9
+1 1 9
+2 1 9
+3 1 9
+4 1 9
+5 1 9
+6 1 9
+1 2 9
+2 2 9
+3 2 9
+4 2 9
+5 2 9
+6 2 9
+3 3 9
+4 3 9
+2 1 10
+3 1 10
+4 1 10
+5 1 10
+2 2 10
+3 2 10
+4 2 10
+5 2 10
+3 1 11
+4 1 11
+3 2 11
+4 2 11
diff --git a/tests/2exec/sphere.geom b/tests/2exec/sphere.geom
new file mode 100644
index 00000000..76c32bfb
--- /dev/null
+++ b/tests/2exec/sphere.geom
@@ -0,0 +1,283 @@
+#generated by ADDA v.1.0
+#shape: 'sphere'
+#box size: 8x8x8
+3 2 0
+4 2 0
+2 3 0
+3 3 0
+4 3 0
+5 3 0
+2 4 0
+3 4 0
+4 4 0
+5 4 0
+3 5 0
+4 5 0
+2 1 1
+3 1 1
+4 1 1
+5 1 1
+1 2 1
+2 2 1
+3 2 1
+4 2 1
+5 2 1
+6 2 1
+1 3 1
+2 3 1
+3 3 1
+4 3 1
+5 3 1
+6 3 1
+1 4 1
+2 4 1
+3 4 1
+4 4 1
+5 4 1
+6 4 1
+1 5 1
+2 5 1
+3 5 1
+4 5 1
+5 5 1
+6 5 1
+2 6 1
+3 6 1
+4 6 1
+5 6 1
+3 0 2
+4 0 2
+1 1 2
+2 1 2
+3 1 2
+4 1 2
+5 1 2
+6 1 2
+1 2 2
+2 2 2
+3 2 2
+4 2 2
+5 2 2
+6 2 2
+0 3 2
+1 3 2
+2 3 2
+3 3 2
+4 3 2
+5 3 2
+6 3 2
+7 3 2
+0 4 2
+1 4 2
+2 4 2
+3 4 2
+4 4 2
+5 4 2
+6 4 2
+7 4 2
+1 5 2
+2 5 2
+3 5 2
+4 5 2
+5 5 2
+6 5 2
+1 6 2
+2 6 2
+3 6 2
+4 6 2
+5 6 2
+6 6 2
+3 7 2
+4 7 2
+2 0 3
+3 0 3
+4 0 3
+5 0 3
+1 1 3
+2 1 3
+3 1 3
+4 1 3
+5 1 3
+6 1 3
+0 2 3
+1 2 3
+2 2 3
+3 2 3
+4 2 3
+5 2 3
+6 2 3
+7 2 3
+0 3 3
+1 3 3
+2 3 3
+3 3 3
+4 3 3
+5 3 3
+6 3 3
+7 3 3
+0 4 3
+1 4 3
+2 4 3
+3 4 3
+4 4 3
+5 4 3
+6 4 3
+7 4 3
+0 5 3
+1 5 3
+2 5 3
+3 5 3
+4 5 3
+5 5 3
+6 5 3
+7 5 3
+1 6 3
+2 6 3
+3 6 3
+4 6 3
+5 6 3
+6 6 3
+2 7 3
+3 7 3
+4 7 3
+5 7 3
+2 0 4
+3 0 4
+4 0 4
+5 0 4
+1 1 4
+2 1 4
+3 1 4
+4 1 4
+5 1 4
+6 1 4
+0 2 4
+1 2 4
+2 2 4
+3 2 4
+4 2 4
+5 2 4
+6 2 4
+7 2 4
+0 3 4
+1 3 4
+2 3 4
+3 3 4
+4 3 4
+5 3 4
+6 3 4
+7 3 4
+0 4 4
+1 4 4
+2 4 4
+3 4 4
+4 4 4
+5 4 4
+6 4 4
+7 4 4
+0 5 4
+1 5 4
+2 5 4
+3 5 4
+4 5 4
+5 5 4
+6 5 4
+7 5 4
+1 6 4
+2 6 4
+3 6 4
+4 6 4
+5 6 4
+6 6 4
+2 7 4
+3 7 4
+4 7 4
+5 7 4
+3 0 5
+4 0 5
+1 1 5
+2 1 5
+3 1 5
+4 1 5
+5 1 5
+6 1 5
+1 2 5
+2 2 5
+3 2 5
+4 2 5
+5 2 5
+6 2 5
+0 3 5
+1 3 5
+2 3 5
+3 3 5
+4 3 5
+5 3 5
+6 3 5
+7 3 5
+0 4 5
+1 4 5
+2 4 5
+3 4 5
+4 4 5
+5 4 5
+6 4 5
+7 4 5
+1 5 5
+2 5 5
+3 5 5
+4 5 5
+5 5 5
+6 5 5
+1 6 5
+2 6 5
+3 6 5
+4 6 5
+5 6 5
+6 6 5
+3 7 5
+4 7 5
+2 1 6
+3 1 6
+4 1 6
+5 1 6
+1 2 6
+2 2 6
+3 2 6
+4 2 6
+5 2 6
+6 2 6
+1 3 6
+2 3 6
+3 3 6
+4 3 6
+5 3 6
+6 3 6
+1 4 6
+2 4 6
+3 4 6
+4 4 6
+5 4 6
+6 4 6
+1 5 6
+2 5 6
+3 5 6
+4 5 6
+5 5 6
+6 5 6
+2 6 6
+3 6 6
+4 6 6
+5 6 6
+3 2 7
+4 2 7
+2 3 7
+3 3 7
+4 3 7
+5 3 7
+2 4 7
+3 4 7
+4 4 7
+5 4 7
+3 5 7
+4 5 7
diff --git a/tests/2exec/sphere4.geom b/tests/2exec/sphere4.geom
new file mode 100644
index 00000000..4aa65c6f
--- /dev/null
+++ b/tests/2exec/sphere4.geom
@@ -0,0 +1,35 @@
+#generated by ADDA v.1.0
+#shape: 'sphere'
+#box size: 4x4x4
+1 1 0
+2 1 0
+1 2 0
+2 2 0
+1 0 1
+2 0 1
+0 1 1
+1 1 1
+2 1 1
+3 1 1
+0 2 1
+1 2 1
+2 2 1
+3 2 1
+1 3 1
+2 3 1
+1 0 2
+2 0 2
+0 1 2
+1 1 2
+2 1 2
+3 1 2
+0 2 2
+1 2 2
+2 2 2
+3 2 2
+1 3 2
+2 3 2
+1 1 3
+2 1 3
+1 2 3
+2 2 3
diff --git a/tests/2exec/suite b/tests/2exec/suite
index e6d6a002..40c52b4d 100644
--- a/tests/2exec/suite
+++ b/tests/2exec/suite
@@ -1,7 +1,6 @@
-#------------------------- Variable definitons ---------------------------------
-# All variable names should be enclosed in ';' and start the line - its value
-# is assigned to the rest of the line. It can be defined through other variables
-# defined below. Each variable must be defined only once.
+#------------------------- Variable definitons -------------------------------------------------------------------------
+# All variable names should be enclosed in ';' and start the line - its value is assigned to the rest of the line. It
+# can be defined through other variables defined below. Each variable must be defined only once.
 
 # default for most tests
 ;mgn; ;m; ;g; ;n;
@@ -22,13 +21,13 @@
 ;n; -ntheta 5
 ;p; -prop 1 2 3
 ;se; -shape ellipsoid 0.5 1.5
-#----------------------------- List of tests -----------------------------------
-# The format is the following: '<list,of,files,to,compare> <cmdline>'
-# the first one is coma-separated list of files to compare or 'all' (which compares 
-# all produced files. <cmdline> is everything after the first space and it is passed
+#----------------------------- List of tests ---------------------------------------------------------------------------
+# The format is the following: '<list,of,files,to,compare> <cmdline>' the first one is coma-separated list of files to
+# compare or 'all' (which compares all produced files. <cmdline> is everything after the first space and it is passed
 # directly to ADDA.
 
 all
+
 # testing of different grids - relevant for FFT methods
 # to remove redundant warnings for ocl_seq, only (2,3,5) numbers are used
 all -grid 2 ;smn;
@@ -97,8 +96,8 @@ all -eps 10 ;mgn;
 all -h eq_rad
 all -eq_rad 1 ;mgn;
 
-# it is hard to make meaningful comparison of stdout and log for random placement of granules
-# however, optical properties are compared using rather large tolerances
+# It is hard to make meaningful comparison of stdout and log for random placement of granules. However, optical
+# properties are compared using rather large tolerances
 all -h granul
 CrossSec-Y,CrossSec-X,mueller -granul 0.2 0.5 2 -size 8 -shape coated 0.5 ;3m; ;n;
 CrossSec-Y,CrossSec-X,mueller -granul 0.2 2 2 -size 8 -shape coated 0.5 ;3m; ;n;
@@ -208,7 +207,7 @@ all -h scat_matr
 all -scat_matr muel ;mgn;
 all -scat_matr ampl ;mgn;
 all -scat_matr both ;mgn;
-all -scat_matr none ;m; ;g; 
+all -scat_matr none ;m; ;g;
 
 all -h shape
 all -h shape axisymmetric
@@ -253,7 +252,7 @@ all -shape sphere ;mgn;
 all -h shape spherebox
 all -shape spherebox 0.5 ;2mgn;
 
-all -h size 
+all -h size
 all -size 8 ;mgn;
 
 all -h store_beam
@@ -276,7 +275,7 @@ all -store_int_field ;se; ;mgn;
 all -h store_scat_grid
 all -store_scat_grid ;sep; ;mgn;
 
-all -h sym 
+all -h sym
 all -sym auto ;mgn;
 all -sym no ;mgn;
 all -sym enf ;mgn;
diff --git a/tests/2exec/suite_sparse b/tests/2exec/suite_sparse
new file mode 100644
index 00000000..4b67bed8
--- /dev/null
+++ b/tests/2exec/suite_sparse
@@ -0,0 +1,275 @@
+#------------------------- Variable definitons -------------------------------------------------------------------------
+# All variable names should be enclosed in ';' and start the line - its value is assigned to the rest of the line. It
+# can be defined through other variables defined below. Each variable must be defined only once.
+
+# default for most tests
+;mgn; ;m; ;g; ;n;
+# default for two-domain particles
+;2mgn; ;2m; ;g; ;n;
+# for large computations (e.g. orientation averaging)
+;mg4n; ;m; -shape read sphere4.geom -sym enf ;n;
+# default addition to make particle completely non-symmetric
+;sep; ;se; ;p;
+# default addition for non-spherical shapes
+;mn; ;m; ;n;
+
+# Elementary variables
+;m; -m 1.1 0.1
+;2m; -m 1.1 0.1 1.2 0.2
+;3m; -m 1.05 0.1 1.1 0.2 1.2 0.3
+;g; -shape read sphere.geom -sym enf
+;n; -ntheta 5
+;p; -prop 1 2 3
+;se; -shape read ellipsoid.geom
+
+#----------------------------- List of tests ---------------------------------------------------------------------------
+# The format is the following: '<list,of,files,to,compare> <cmdline>' the first one is coma-separated list of files to
+# compare or 'all' (which compares all produced files. <cmdline> is everything after the first space and it is passed
+# directly to ADDA.
+
+all ;g;
+
+all -h alldir_inp
+all -alldir_inp adp.dat -Csca ;mgn;
+
+all -h anisotr
+all -anisotr ;3m; ;g; ;n;
+
+all -h asym
+all -asym ;sep; ;mn;
+
+all -h beam
+all -h beam plane
+all -beam plane ;mgn;
+all -h beam lminus
+all -beam lminus 2 1 2 3 ;mgn;
+all -h beam davis3
+all -beam davis3 2 1 2 3 ;mgn;
+all -h beam barton5
+all -beam barton5 2 1 2 3 ;mgn;
+all -h beam read
+all -beam read IncBeam-Y IncBeam-X ;se; ;mn;
+
+all -h chpoint
+all -chpoint 1s -eps 3 ;mgn;
+all -h chp_type
+all -chp_type normal -chpoint 1s -eps 3 ;mgn;
+all -chp_dir chp_tmp -chp_type regular -chpoint 1s -eps 3 ;mgn;
+all -h chp_dir
+all -chp_dir chp_tmp -chp_type always -eps 3 ;mgn;
+all -h chp_load
+all -chp_dir chp_tmp -chp_load ;mgn;
+
+all -h Cpr
+all -Cpr ;sep; ;mn;
+
+all -h Csca
+all -Csca ;se; ;mn;
+
+all -h dir
+
+all -h dpl
+all -dpl 20 ;mgn;
+
+all -h eps
+all -eps 10 ;mgn;
+
+all -h eq_rad
+all -eq_rad 1 ;mgn;
+
+# It is hard to make meaningful comparison of stdout and log for random placement of granules. However, optical
+# properties are compared using rather large tolerances
+#all -h granul
+#CrossSec-Y,CrossSec-X,mueller -granul 0.2 0.5 2 -size 8 -shape coated 0.5 ;3m; ;n;
+#CrossSec-Y,CrossSec-X,mueller -granul 0.2 2 2 -size 8 -shape coated 0.5 ;3m; ;n;
+
+all -h grid
+#all -grid 4 6 8 ;m; ;n;
+
+all -h
+all -h h
+
+all -h init_field
+all -init_field auto ;mgn;
+all -init_field zero ;mgn;
+all -init_field inc ;mgn;
+#all -init_field wkb ;mgn;
+
+all -h int
+all -int fcd ;mgn;
+all -int fcd_st ;mgn;
+all -int igt ;mg4n;
+all -int igt 3 ;mg4n;
+all -int igt 3 0.01 ;mg4n;
+all -int igt_so ;mgn;
+all -int poi ;mgn;
+all -int so ;mgn;
+
+all -h iter
+all -iter bicg ;mgn;
+all -iter bicgstab ;mgn;
+all -iter cgnr ;mgn;
+all -iter csym ;mgn;
+all -iter qmr ;mgn;
+all -iter qmr2 ;mgn;
+
+all -h jagged
+all -jagged 2 ;mg4n;
+
+all -h lambda
+all -lambda 1 ;mgn;
+
+all -h m
+all -m 1.2 0.2 ;g; ;n;
+
+all -h maxiter
+all -maxiter 5 ;mgn;
+
+all -h no_reduced_fft
+all -no_reduced_fft ;mgn;
+
+all -h no_vol_cor
+all -no_vol_cor -size 3 ;mgn;
+
+all -h ntheta
+all -ntheta 10 ;m; ;g;
+
+all -h opt
+all -opt speed ;mgn;
+all -opt mem ;mgn;
+
+all -h orient
+all -orient 10 20 30 ;se; ;mn;
+all -orient avg ;se; ;mn;
+all -orient avg ap.dat ;se; ;mn;
+
+all -h phi_integr
+all -phi_integr 31 ;sep; ;mn;
+
+all -h pol
+all -pol cldr ;p; ;mgn;
+all -pol cm ;mgn;
+all -pol dgf ;mgn;
+all -pol fcd ;mgn;
+all -pol igt_so ;mgn;
+all -pol ldr ;p; ;mgn;
+all -pol ldr avgpol ;p; ;mgn;
+all -pol rrc ;mgn;
+all -pol so ;p; ;mgn;
+
+all -h prognosis
+all -prognosis ;g;
+
+all -h prop
+all -prop 5 1 3 ;se; ;mn;
+
+all -h recalc_resid
+all -recalc_resid ;mgn;
+
+#all -h save_geom
+#all -save_geom -shape read coated.geom -prognosis
+#all -h sg_format
+#all -save_geom ;se; -prognosis -sg_format text
+#all -save_geom ;se; -prognosis -sg_format text_ext
+#all -save_geom ;se; -prognosis -sg_format ddscat6
+#all -save_geom ;se; -prognosis -sg_format ddscat7
+
+all -h scat
+all -scat dr ;mgn;
+all -scat fin ;mgn;
+all -scat igt_so ;mgn;
+all -scat so ;mgn;
+
+all -h scat_grid_inp
+all -scat_grid_inp sp.dat ;mgn;
+
+all -h scat_matr
+all -scat_matr muel ;mgn;
+all -scat_matr ampl ;mgn;
+all -scat_matr both ;mgn;
+all -scat_matr none ;m; ;g; 
+
+all -h shape
+#all -h shape axisymmetric
+#all -shape axisymmetric 196.txt ;mgn;
+#all -h shape bicoated
+#all -shape bicoated 3 0.5 ;2mgn;
+#all -h shape biellipsoid
+#all -shape biellipsoid 0.5 1.5 0.75 0.5 1.5 ;2mgn;
+#all -h shape bisphere
+#all -shape bisphere 2 ;mgn;
+#all -h shape box
+#all -shape box ;mgn;
+#all -shape box 0.5 1.5 ;mgn;
+#all -h shape capsule
+#all -shape capsule 1.5 ;mgn;
+#all -h shape chebyshev
+#all -shape chebyshev 0.3 5 ;mgn;
+#all -h shape coated
+#all -shape coated 0.5 ;2mgn;
+#all -shape coated 0.4 0.1 0.15 0.2 ;2mgn;
+#all -h shape cylinder
+#all -shape cylinder 1 ;mgn;
+#all -h shape egg
+#all -shape egg 0.5 0.2 ;mgn;
+#all -h shape ellipsoid
+#all -shape ellipsoid 0.25 2 ;mgn;
+#all -h shape line
+#all -shape line -grid 16 ;m; ;n;
+#all -h shape plate
+#all -shape plate 0.5 ;mgn;
+#all -h shape prism
+#all -shape prism 5 1.5 ;mgn;
+#all -h shape rbc
+#all -shape rbc 0.3 0.1 0.3 ;mgn;
+all -h shape read
+all -shape read ellipsoid.geom ;m; ;n;
+all -shape read coated.geom ;2m; ;n;
+all -shape read ell_ddscat6.dat ;m; ;n;
+all -shape read ell_ddscat7.dat ;m; ;n;
+#all -h shape sphere
+#all -shape sphere ;mgn;
+#all -h shape spherebox
+#all -shape spherebox 0.5 ;2mgn;
+
+all -h size 
+all -size 8 ;mgn;
+
+all -h store_beam
+all -store_beam ;se; ;mn;
+
+all -h store_dip_pol
+all -store_dip_pol ;se; ;mn;
+
+all -h store_force
+all -store_force -Cpr ;sep; ;mn;
+all -store_force ;mgn;
+
+#all -h store_grans
+#granules -store_grans -granul 0.2 1 -size 4 ;2mgn;
+
+all -h store_int_field
+all -store_int_field ;se; ;mn;
+
+all -h store_scat_grid
+all -store_scat_grid ;sep; ;mn;
+
+all -h sym 
+all -sym auto ;mn; -shape read sphere.geom
+all -sym no ;mn; -shape read sphere.geom
+all -sym enf ;mn; -shape read sphere.geom
+all -sym auto ;sep; ;mn;
+all -sym no ;sep; ;mn;
+all -sym enf ;sep; ;mn;
+
+all -h test
+all -test ;mgn;
+
+all -h V
+all -V
+
+all -h vec
+all -vec ;sep; ;mn;
+
+all -h yz
+all -yz -store_scat_grid ;mgn;