diff --git a/.travis_scripts/sandbox.sh b/.travis_scripts/sandbox.sh
index e7269051..c6c128c6 100755
--- a/.travis_scripts/sandbox.sh
+++ b/.travis_scripts/sandbox.sh
@@ -10,4 +10,5 @@ VERBOSE=1 $make_j
 OMP_NUM_THREADS=4 ctest -R tgv_2d || cat Testing/Temporary/LastTest.log /
 OMP_NUM_THREADS=4 make -C convergence_2d_3d test || cat convergence_2d_3d/Testing/Temporary/LastTest.log /
 if [[ $TRAVIS_OS_NAME == 'linux' ]]; then OMP_NUM_THREADS=4 make -C convergence_spacetime test || cat convergence_spacetime/Testing/Temporary/LastTest.log /; fi
+OMP_NUM_THREADS=4 make -C bconds_div test || cat bconds_div/Testing/Temporary/LastTest.log /
 cd ../../..
diff --git a/libmpdata++/bcond/cyclic_1d.hpp b/libmpdata++/bcond/cyclic_1d.hpp
index 0be8a61a..d135e3c3 100644
--- a/libmpdata++/bcond/cyclic_1d.hpp
+++ b/libmpdata++/bcond/cyclic_1d.hpp
@@ -35,6 +35,11 @@ namespace libmpdataxx
       {
 	av[0](this->left_halo_vctr) = av[0](this->rght_intr_vctr);
       }
+      
+      void fill_halos_vctr_alng_cyclic(arrvec_t<arr_t> &av, const bool ad = false)
+      {
+        fill_halos_vctr_alng(av, ad);
+      }
     };
 
     template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int dim>
@@ -62,6 +67,10 @@ namespace libmpdataxx
 	av[0](this->rght_halo_vctr) = av[0](this->left_intr_vctr);
       }
       
+      void fill_halos_vctr_alng_cyclic(arrvec_t<arr_t> &av, const bool ad = false)
+      {
+        fill_halos_vctr_alng(av, ad);
+      }
     };
   } // namespace bcond
 } // namespace libmpdataxx
diff --git a/libmpdata++/bcond/cyclic_2d.hpp b/libmpdata++/bcond/cyclic_2d.hpp
index 01a0483b..38da2357 100644
--- a/libmpdata++/bcond/cyclic_2d.hpp
+++ b/libmpdata++/bcond/cyclic_2d.hpp
@@ -68,6 +68,16 @@ namespace libmpdataxx
       {
         fill_halos_sclr(a, j);
       }
+      
+      void fill_halos_vctr_alng_cyclic(arrvec_t<arr_t> &av, const rng_t &j, const bool ad = false)
+      {
+        fill_halos_vctr_alng(av, j, ad);
+      }
+
+      void fill_halos_vctr_nrml_cyclic(arr_t &a, const rng_t &j)
+      {
+        fill_halos_vctr_nrml(a, j);
+      }
     };
 
     template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int d>
@@ -127,6 +137,16 @@ namespace libmpdataxx
       {
         fill_halos_sclr(a, j);
       }
+      
+      void fill_halos_vctr_alng_cyclic(arrvec_t<arr_t> &av, const rng_t &j, const bool ad = false)
+      {
+        fill_halos_vctr_alng(av, j, ad);
+      }
+
+      void fill_halos_vctr_nrml_cyclic(arr_t &a, const rng_t &j)
+      {
+        fill_halos_vctr_nrml(a, j);
+      }
     };
   } // namespace bcond
 } // namespace libmpdataxx
diff --git a/libmpdata++/bcond/cyclic_3d.hpp b/libmpdata++/bcond/cyclic_3d.hpp
index 350321d4..95b75f69 100644
--- a/libmpdata++/bcond/cyclic_3d.hpp
+++ b/libmpdata++/bcond/cyclic_3d.hpp
@@ -68,6 +68,16 @@ namespace libmpdataxx
       {
         fill_halos_sclr(a, j, k);
       }
+      
+      void fill_halos_vctr_alng_cyclic(arrvec_t<arr_t> &av, const rng_t &j, const rng_t &k, const bool ad = false)
+      {
+        fill_halos_vctr_alng(av, j, k, ad);
+      }
+
+      void fill_halos_vctr_nrml_cyclic(arr_t &a, const rng_t &j, const rng_t &k)
+      {
+        fill_halos_vctr_nrml(a, j, k);
+      }
     };
 
     template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int d>
@@ -127,6 +137,16 @@ namespace libmpdataxx
       {
         fill_halos_sclr(a, j, k);
       }
+      
+      void fill_halos_vctr_alng_cyclic(arrvec_t<arr_t> &av, const rng_t &j, const rng_t &k, const bool ad = false)
+      {
+        fill_halos_vctr_alng(av, j, k, ad);
+      }
+
+      void fill_halos_vctr_nrml_cyclic(arr_t &a, const rng_t &j, const rng_t &k)
+      {
+        fill_halos_vctr_nrml(a, j, k);
+      }
     };
   } // namespace bcond
 } // namespace libmpdataxx
diff --git a/libmpdata++/bcond/detail/bcond_common.hpp b/libmpdata++/bcond/detail/bcond_common.hpp
index 89b0166c..306162f8 100644
--- a/libmpdata++/bcond/detail/bcond_common.hpp
+++ b/libmpdata++/bcond/detail/bcond_common.hpp
@@ -52,6 +52,9 @@ namespace libmpdataxx
 	{ 
 	  assert(false && "bcond::fill_halos_vctr() called!"); 
 	};
+	
+        virtual void fill_halos_vctr_alng_cyclic(arrvec_t<blitz::Array<real_t, 1>> &, const bool ad = false)
+	{};
 
 	// 2D
 	virtual void fill_halos_sclr(arr_2d_t &, const rng_t &, const bool deriv = false) 
@@ -98,6 +101,15 @@ namespace libmpdataxx
 	{
 	  assert(false && "bcond::fill_halos_vctr_nrml() called!");
 	};
+	
+        virtual void fill_halos_vctr_alng_cyclic(arrvec_t<blitz::Array<real_t, 2>> &, const rng_t &, const bool ad = false) 
+	{};
+
+	virtual void fill_halos_vctr_nrml_cyclic(blitz::Array<real_t, 2> &, const rng_t &) 
+	{};
+	
+        virtual void fill_halos_flux(arrvec_t<blitz::Array<real_t, 2>> &, const rng_t &) 
+	{};
 
 	// 3D
 	virtual void fill_halos_sclr(arr_3d_t &, const rng_t &, const rng_t &, const bool deriv = false) 
@@ -153,6 +165,15 @@ namespace libmpdataxx
 	{
 	  assert(false && "bcond::fill_halos_vctr_nrml() called!");
 	};
+	
+        virtual void fill_halos_vctr_alng_cyclic(arrvec_t<blitz::Array<real_t, 3>> &, const rng_t &, const rng_t &, const bool ad = false) 
+	{};
+
+	virtual void fill_halos_vctr_nrml_cyclic(blitz::Array<real_t, 3> &, const rng_t &, const rng_t &) 
+	{};
+	
+        virtual void fill_halos_flux(arrvec_t<blitz::Array<real_t, 3>> &, const rng_t &, const rng_t &) 
+	{};
 
 	protected:
 	  // sclr
diff --git a/libmpdata++/bcond/open_1d.hpp b/libmpdata++/bcond/open_1d.hpp
index d1def2b2..8db70c62 100644
--- a/libmpdata++/bcond/open_1d.hpp
+++ b/libmpdata++/bcond/open_1d.hpp
@@ -39,7 +39,7 @@ namespace libmpdataxx
 
       void fill_halos_vctr_alng(arrvec_t<arr_t> &av, const bool ad = false)
       {
-        for (int i = this->left_halo_vctr.first(); i <= this->left_halo_vctr.last(); ++i)
+        for (int i = this->left_halo_vctr.first(); i <= this->left_halo_vctr.last() - (ad ? 1 : 0); ++i)
 	  av[0](rng_t(i, i)) = av[0](this->left_intr_vctr.first());
       }
     };
@@ -72,7 +72,7 @@ namespace libmpdataxx
 
       void fill_halos_vctr_alng(arrvec_t<arr_t> &av, const bool ad = false)
       {
-        for (int i = this->rght_halo_vctr.first(); i <= this->rght_halo_vctr.last(); ++i)
+        for (int i = this->rght_halo_vctr.first() + (ad ? 1 : 0); i <= this->rght_halo_vctr.last(); ++i)
 	  av[0](rng_t(i, i)) = av[0](this->rght_intr_vctr.first());
       }
     };
diff --git a/libmpdata++/bcond/open_2d.hpp b/libmpdata++/bcond/open_2d.hpp
index b91a5688..3880041e 100644
--- a/libmpdata++/bcond/open_2d.hpp
+++ b/libmpdata++/bcond/open_2d.hpp
@@ -62,6 +62,12 @@ namespace libmpdataxx
       {
         using namespace idxperm;
         a(pi<d>(this->left_edge_sclr, j)) = sign * edge_velocity(pi<d>(0, j));
+        
+        if (halo > 1)
+        {
+          a(pi<d>(this->left_halo_sclr.last() - 1, j)) =   3 * a(pi<d>(this->left_edge_sclr,     j))
+                                                         - 2 * a(pi<d>(this->left_edge_sclr + 1, j));
+        }
       }
 
       void fill_halos_vctr_alng(arrvec_t<arr_t> &av, const rng_t &j, const bool ad = false)
@@ -77,7 +83,7 @@ namespace libmpdataxx
         }
        
 	// zero-divergence condition
-        for (int ii = this->left_halo_vctr.first(); ii <= this->left_halo_vctr.last(); ++ii)
+        for (int ii = this->left_halo_vctr.first(); ii <= this->left_halo_vctr.last() - (ad ? 1 : 0); ++ii)
         {
 	  av[d](pi<d>(ii, j)) = 
 	    av[d](pi<d>(i+h, j)) 
@@ -133,6 +139,12 @@ namespace libmpdataxx
         // equivalent to one-sided derivatives at the boundary
         a(pi<d>(this->rght_halo_sclr.first(), j)) = 2 * a(pi<d>(this->rght_edge_sclr,     j))
                                                       - a(pi<d>(this->rght_edge_sclr - 1, j));
+        if (halo > 1)
+        {
+          a(pi<d>(this->rght_halo_sclr.first() + 1, j)) =   3 * a(pi<d>(this->rght_edge_sclr,     j))
+                                                          - 2 * a(pi<d>(this->rght_edge_sclr - 1, j));
+        }
+
       }
       
       void save_edge_vel(const arr_t &a, const rng_t &j)
@@ -163,7 +175,7 @@ namespace libmpdataxx
         }
        
 	// zero-divergence condition
-        for (int ii = this->rght_halo_vctr.first(); ii <= this->rght_halo_vctr.last(); ++ii)
+        for (int ii = this->rght_halo_vctr.first() + (ad ? 1 : 0); ii <= this->rght_halo_vctr.last(); ++ii)
         {
 	  av[d](pi<d>(ii, j)) = 
 	    av[d](pi<d>(i-h, j)) + (
diff --git a/libmpdata++/bcond/open_3d.hpp b/libmpdata++/bcond/open_3d.hpp
index 056cd223..0e1600f4 100644
--- a/libmpdata++/bcond/open_3d.hpp
+++ b/libmpdata++/bcond/open_3d.hpp
@@ -47,6 +47,11 @@ namespace libmpdataxx
         // equivalent to one-sided derivatives at the boundary
         a(pi<d>(this->left_halo_sclr.last(), j, k)) = 2 * a(pi<d>(this->left_edge_sclr,     j, k))
                                                         - a(pi<d>(this->left_edge_sclr + 1, j, k));
+        if (halo > 1)
+        {
+          a(pi<d>(this->left_halo_sclr.last() - 1, j, k)) =   3 * a(pi<d>(this->left_edge_sclr,     j, k))
+                                                            - 2 * a(pi<d>(this->left_edge_sclr + 1, j, k));
+        }
       }
       
       void save_edge_vel(const arr_t &a, const rng_t &j, const rng_t &k)
@@ -93,7 +98,7 @@ namespace libmpdataxx
 	assert(std::isfinite(sum(av[d+2](pi<d>(i, j, k+h)))));
 
         // zero-divergence condition
-        for (int ii = this->left_halo_vctr.first(); ii <= this->left_halo_vctr.last(); ++ii)
+        for (int ii = this->left_halo_vctr.first(); ii <= this->left_halo_vctr.last() - (ad ? 1 : 0); ++ii)
         {
           av[d](pi<d>(ii, j, k)) = 
             av[d](pi<d>(i+h, j, k)) 
@@ -153,6 +158,12 @@ namespace libmpdataxx
         // equivalent to one-sided derivatives at the boundary
         a(pi<d>(this->rght_halo_sclr.first(), j, k)) = 2 * a(pi<d>(this->rght_edge_sclr,     j, k))
                                                          - a(pi<d>(this->rght_edge_sclr - 1, j, k));
+
+        if (halo > 1)
+        {
+          a(pi<d>(this->rght_halo_sclr.first() + 1, j, k)) =   3 * a(pi<d>(this->rght_edge_sclr,     j, k))
+                                                             - 2 * a(pi<d>(this->rght_edge_sclr - 1, j, k));
+        }
       }
       
       void save_edge_vel(const arr_t &a, const rng_t &j, const rng_t &k)
@@ -197,7 +208,7 @@ namespace libmpdataxx
 	assert(std::isfinite(sum(av[d+2](pi<d>(i, j, k-h)))));
 	assert(std::isfinite(sum(av[d+2](pi<d>(i, j, k+h)))));
 
-        for (int ii = this->rght_halo_vctr.first(); ii <= this->rght_halo_vctr.last(); ++ii)
+        for (int ii = this->rght_halo_vctr.first() + (ad ? 1 : 0); ii <= this->rght_halo_vctr.last(); ++ii)
         {
           av[d](pi<d>(ii, j, k)) = 
             av[d](pi<d>(i-h, j, k)) 
diff --git a/libmpdata++/bcond/rigid_2d.hpp b/libmpdata++/bcond/rigid_2d.hpp
index 6b0c0e40..fc29f70f 100644
--- a/libmpdata++/bcond/rigid_2d.hpp
+++ b/libmpdata++/bcond/rigid_2d.hpp
@@ -35,14 +35,17 @@ namespace libmpdataxx
           a(pi<d>(i, j)) = a(pi<d>(this->left_edge_sclr + n, j));
         }
       }
-      
+ 
       void fill_halos_pres(arr_t &a, const rng_t &j)
       {
         using namespace idxperm;
         // equivalent to one-sided derivatives at the boundary
-        a(pi<d>(this->left_halo_sclr.last(), j)) = 2 * a(pi<d>(this->left_edge_sclr,     j))
-                                                     - a(pi<d>(this->left_edge_sclr + 1, j));
-      }
+        for (int i = this->left_halo_sclr.first(), n = halo; i <= this->left_halo_sclr.last(); ++i, --n)
+        {
+          a(pi<d>(i, j)) = 2 * a(pi<d>(this->left_edge_sclr,     j))
+                             - a(pi<d>(this->left_edge_sclr + n, j));
+        }
+      }     
 
       void save_edge_vel(const arr_t &, const rng_t &) {}
      
@@ -55,10 +58,20 @@ namespace libmpdataxx
       void fill_halos_vctr_alng(arrvec_t<arr_t> &av, const rng_t &j, const bool ad = false)
       {
 	using namespace idxperm;
-        // zero velocity condition
-        for (int i = this->left_halo_vctr.first(), n = halo; i <= this->left_halo_vctr.last(); ++i, --n)
+        int i = this->left_halo_vctr.last();
+
+        if (!ad)
         {
-	  av[d](pi<d>(i, j)) = -av[d](pi<d>(this->left_edge_sclr + n - h, j));
+          // zero velocity condition
+          av[d](pi<d>(i, j)) = -av[d](pi<d>(this->left_edge_sclr + h, j));
+        }
+        if (halo > 1)
+        {
+          // extrapolation
+          av[d](pi<d>(i - 1, j)) = av[d](pi<d>(this->left_edge_sclr + 1 + h, j)) + 3 * (
+                                    av[d](pi<d>(this->left_edge_sclr - h, j))
+                                  - av[d](pi<d>(this->left_edge_sclr + h, j))
+                                  );
         }
       }
 
@@ -67,6 +80,12 @@ namespace libmpdataxx
         // note intentional sclr
         fill_halos_sclr(a, j);
       }
+      
+      void fill_halos_flux(arrvec_t<arr_t> &av, const rng_t &j)
+      {
+        using namespace idxperm;
+	av[d](pi<d>(this->left_halo_vctr.last(), j)) = -av[d](pi<d>(this->left_edge_sclr + h, j));
+      }
     };
 
     template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int d>
@@ -98,8 +117,11 @@ namespace libmpdataxx
       {
         using namespace idxperm;
         // equivalent to one-sided derivatives at the boundary
-        a(pi<d>(this->rght_halo_sclr.first(), j)) = 2 * a(pi<d>(this->rght_edge_sclr,     j))
-                                                      - a(pi<d>(this->rght_edge_sclr - 1, j));
+        for (int i = this->rght_halo_sclr.first(), n = 1; i <= this->rght_halo_sclr.last(); ++i, ++n)
+        {
+          a(pi<d>(i, j)) = 2 * a(pi<d>(this->rght_edge_sclr,     j))
+                             - a(pi<d>(this->rght_edge_sclr - n, j));
+        }
       }
       
       void save_edge_vel(const arr_t &, const rng_t &) {}
@@ -113,10 +135,20 @@ namespace libmpdataxx
       void fill_halos_vctr_alng(arrvec_t<arr_t> &av, const rng_t &j, const bool ad = false)
       {
 	using namespace idxperm;
-        // zero velocity condition
-        for (int i = this->rght_halo_vctr.first(), n = 1; i <= this->rght_halo_vctr.last(); ++i, ++n)
+        
+        int i = this->rght_halo_vctr.first();
+        if (!ad)
+        {
+          // zero velocity condition
+	  av[d](pi<d>(i, j)) = -av[d](pi<d>(this->rght_edge_sclr - h, j));
+        }
+        if (halo > 1)
         {
-	  av[d](pi<d>(i, j)) = -av[d](pi<d>(this->rght_edge_sclr - n + h, j));
+          // extrapolation
+          av[d](pi<d>(i + 1, j)) = av[d](pi<d>(this->rght_edge_sclr - 1 - h, j)) + 3 * (
+                                    av[d](pi<d>(this->rght_edge_sclr + h, j))
+                                  - av[d](pi<d>(this->rght_edge_sclr - h, j))
+                                  );
         }
       }
       
@@ -125,6 +157,13 @@ namespace libmpdataxx
         // note intentional sclr
         fill_halos_sclr(a, j);
       }
+      
+      void fill_halos_flux(arrvec_t<arr_t> &av, const rng_t &j)
+      {
+	using namespace idxperm;
+        // zero flux condition
+	av[d](pi<d>(this->rght_halo_vctr.first(), j)) = -av[d](pi<d>(this->rght_edge_sclr - h, j));
+      }
     };
   } // namespace bcond
 } // namespace libmpdataxx
diff --git a/libmpdata++/bcond/rigid_3d.hpp b/libmpdata++/bcond/rigid_3d.hpp
index 9ae57237..6808a950 100644
--- a/libmpdata++/bcond/rigid_3d.hpp
+++ b/libmpdata++/bcond/rigid_3d.hpp
@@ -40,8 +40,11 @@ namespace libmpdataxx
       {
         using namespace idxperm;
         // equivalent to one-sided derivatives at the boundary
-        a(pi<d>(this->left_halo_sclr.last(), j, k)) = 2 * a(pi<d>(this->left_edge_sclr,     j, k))
-                                                        - a(pi<d>(this->left_edge_sclr + 1, j, k));
+        for (int i = this->left_halo_sclr.first(), n = halo; i <= this->left_halo_sclr.last(); ++i, --n)
+        {
+          a(pi<d>(i, j, k)) = 2 * a(pi<d>(this->left_edge_sclr,     j, k))
+                                - a(pi<d>(this->left_edge_sclr + n, j, k));
+        }
       }
       
       void save_edge_vel(const arr_t &, const rng_t &, const rng_t &) {}
@@ -55,10 +58,20 @@ namespace libmpdataxx
       void fill_halos_vctr_alng(arrvec_t<arr_t> &av, const rng_t &j, const rng_t &k, const bool ad = false)
       {
 	using namespace idxperm;
-        // zero velocity condition
-        for (int i = this->left_halo_vctr.first(), n = halo; i <= this->left_halo_vctr.last(); ++i, --n)
+        int i = this->left_halo_vctr.last();
+
+        if (!ad)
+        {
+          // zero velocity condition
+          av[d](pi<d>(i, j, k)) = -av[d](pi<d>(this->left_edge_sclr + h, j, k));
+        }
+        if (halo > 1)
         {
-	  av[d](pi<d>(i, j, k)) = -av[d](pi<d>(this->left_edge_sclr + n - h, j, k));
+          // extrapolation
+          av[d](pi<d>(i - 1, j, k)) = av[d](pi<d>(this->left_edge_sclr + 1 + h, j, k)) + 3 * (
+                                       av[d](pi<d>(this->left_edge_sclr - h, j, k))
+                                     - av[d](pi<d>(this->left_edge_sclr + h, j, k))
+                                     );
         }
       }
 
@@ -67,6 +80,13 @@ namespace libmpdataxx
         // note intentional sclr
         fill_halos_sclr(a, j, k);
       }
+      
+      void fill_halos_flux(arrvec_t<arr_t> &av, const rng_t &j, const rng_t &k)
+      {
+	using namespace idxperm;
+        // zero flux condition
+	av[d](pi<d>(this->left_halo_vctr.last(), j, k)) = -av[d](pi<d>(this->left_edge_sclr + h, j, k));
+      }
     };
 
     template <typename real_t, int halo, bcond_e knd, drctn_e dir, int n_dims, int d>
@@ -96,13 +116,16 @@ namespace libmpdataxx
       
       
       void save_edge_vel(const arr_t &, const rng_t &, const rng_t &) {}
-
+      
       void fill_halos_pres(arr_t &a, const rng_t &j, const rng_t &k)
       {
         using namespace idxperm;
         // equivalent to one-sided derivatives at the boundary
-        a(pi<d>(this->rght_halo_sclr.first(), j, k)) = 2 * a(pi<d>(this->rght_edge_sclr,     j, k))
-                                                         - a(pi<d>(this->rght_edge_sclr - 1, j, k));
+        for (int i = this->rght_halo_sclr.first(), n = 1; i <= this->rght_halo_sclr.last(); ++i, ++n)
+        {
+          a(pi<d>(i, j, k)) = 2 * a(pi<d>(this->rght_edge_sclr,     j, k))
+                                - a(pi<d>(this->rght_edge_sclr - n, j, k));
+        }
       }
       
       void set_edge_pres(arr_t &a, const rng_t &j, const rng_t &k, int)
@@ -115,9 +138,20 @@ namespace libmpdataxx
       {
 	using namespace idxperm;
         // zero velocity condition
-        for (int i = this->rght_halo_vctr.first(), n = 1; i <= this->rght_halo_vctr.last(); ++i, ++n)
+
+        int i = this->rght_halo_vctr.first();
+        if (!ad)
+        {
+          // zero velocity condition
+	  av[d](pi<d>(i, j, k)) = -av[d](pi<d>(this->rght_edge_sclr - h, j, k));
+        }
+        if (halo > 1)
         {
-	  av[d](pi<d>(i, j, k)) = -av[d](pi<d>(this->rght_edge_sclr - n + h, j, k));
+          // extrapolation
+          av[d](pi<d>(i + 1, j, k)) = av[d](pi<d>(this->rght_edge_sclr - 1 - h, j, k)) + 3 * (
+                                       av[d](pi<d>(this->rght_edge_sclr + h, j, k))
+                                     - av[d](pi<d>(this->rght_edge_sclr - h, j, k))
+                                     );
         }
       }
       
@@ -126,6 +160,13 @@ namespace libmpdataxx
         // note intentional sclr
         fill_halos_sclr(a, j, k);
       }
+      
+      void fill_halos_flux(arrvec_t<arr_t> &av, const rng_t &j, const rng_t &k)
+      {
+	using namespace idxperm;
+        // zero flux condition
+	av[d](pi<d>(this->rght_halo_vctr.first(), j, k)) = -av[d](pi<d>(this->rght_edge_sclr - h, j, k));
+      }
     };
   } // namespace bcond
 } // namespace libmpdataxx
diff --git a/libmpdata++/formulae/mpdata/formulae_mpdata_common.hpp b/libmpdata++/formulae/mpdata/formulae_mpdata_common.hpp
index 63ebbc53..a20c9666 100644
--- a/libmpdata++/formulae/mpdata/formulae_mpdata_common.hpp
+++ b/libmpdata++/formulae/mpdata/formulae_mpdata_common.hpp
@@ -36,15 +36,17 @@ namespace libmpdataxx
       using std::abs;
 
       const int n_tlev = 2;
-
-      constexpr const int halo(const opts_t &opts) 
+  
+      template <class ct_params_t>
+      constexpr int halo()
       {
         return (
-          opts::isset(opts, opts::tot)        || // see psi 2-nd derivatives in eq. (36) in PKS & LGM 1998
-          opts::isset(opts, opts::dfl)        || // see +3/2 in eq. (30) in PKS & LGM 1998
-          opts::isset(opts, opts::div_2nd)    || 
-          opts::isset(opts, opts::div_3rd)    ||
-          opts::isset(opts, opts::div_3rd_dt)
+          opts::isset(ct_params_t::opts, opts::tot)        || // see psi 2-nd derivatives in eq. (36) in PKS & LGM 1998
+          opts::isset(ct_params_t::opts, opts::dfl)        || // see +3/2 in eq. (30) in PKS & LGM 1998
+          opts::isset(ct_params_t::opts, opts::div_2nd)    || 
+          opts::isset(ct_params_t::opts, opts::div_3rd)    ||
+          opts::isset(ct_params_t::opts, opts::div_3rd_dt) ||
+          ct_params_t::prs_order == 4
         ) ? 2 : 1; 
       }
 
diff --git a/libmpdata++/formulae/nabla_formulae.hpp b/libmpdata++/formulae/nabla_formulae.hpp
index b6a9f65f..60ad3806 100644
--- a/libmpdata++/formulae/nabla_formulae.hpp
+++ b/libmpdata++/formulae/nabla_formulae.hpp
@@ -19,40 +19,77 @@ namespace libmpdataxx
       using idxperm::pi;
       using arakawa_c::h;
 
-      template <class arg_t, typename real_t>
+      // 1D version
+      template <int ord = 2, class arg_t, typename real_t>
       inline auto grad(
 	const arg_t &x,
 	const rng_t &i,
-	const real_t dx
+	const real_t dx,
+        typename std::enable_if<ord == 2>::type* = 0
       ) return_macro(,
 	(
 	  x(i+1) - 
 	  x(i-1)
 	) / dx / 2.
       )
+      
+      // 4th order version of the above
+      template <int ord, class arg_t, typename real_t>
+      inline auto grad(
+	const arg_t &x,
+	const rng_t &i,
+	const real_t dx,
+        typename std::enable_if<ord == 4>::type* = 0
+      ) return_macro(,
+	(
+	  -   x(i+2) +
+	  8 * x(i+1) - 
+	  8 * x(i-1) +
+	      x(i-2)
+	) / (12 * dx)
+      )
 
       // 2D version
-      template <int d, class arg_t, typename real_t>
+      template <int d, int ord = 2, class arg_t, typename real_t>
       inline auto grad(
 	const arg_t &x,
 	const rng_t &i,
 	const rng_t &j,
-	const real_t dx
+	const real_t dx,
+        typename std::enable_if<ord == 2>::type* = 0
       ) return_macro(,
 	(
 	  x(pi<d>(i+1, j)) - 
 	  x(pi<d>(i-1, j))
 	) / dx / 2.
       )
+
+      // 4th order version of the above
+      template <int d, int ord, class arg_t, typename real_t>
+      inline auto grad(
+	const arg_t &x,
+	const rng_t &i,
+	const rng_t &j,
+	const real_t dx,
+        typename std::enable_if<ord == 4>::type* = 0
+      ) return_macro(,
+	(
+	  -   x(pi<d>(i+2, j)) + 
+	  8 * x(pi<d>(i+1, j)) - 
+	  8 * x(pi<d>(i-1, j)) +
+	      x(pi<d>(i-2, j))
+	) / (12 * dx)
+      )
       
       // 3D version
-      template <int d, class arg_t, typename real_t>
+      template <int d, int ord = 2, class arg_t, typename real_t>
       inline auto grad(
 	const arg_t &x,
 	const rng_t &i,
 	const rng_t &j,
 	const rng_t &k,
-	const real_t dx
+	const real_t dx,
+        typename std::enable_if<ord == 2>::type* = 0
       ) return_macro(,
 	(
 	  x(pi<d>(i+1, j, k)) - 
@@ -60,6 +97,26 @@ namespace libmpdataxx
 	) / dx / 2.
       )
       
+      // 4th order version of the above
+      template <int d, int ord, class arg_t, typename real_t>
+      inline auto grad(
+	const arg_t &x,
+	const rng_t &i,
+	const rng_t &j,
+	const rng_t &k,
+	const real_t dx,
+        typename std::enable_if<ord == 4>::type* = 0
+      ) return_macro(,
+	(
+	  -   x(pi<d>(i+2, j, k)) +
+	  8 * x(pi<d>(i+1, j, k)) - 
+	  8 * x(pi<d>(i-1, j, k)) +
+	      x(pi<d>(i-2, j, k))
+	) / (12 * dx)
+      )
+      
+      // compact gradients
+
       template <class arg_t, typename real_t>
       inline auto grad_cmpct(
 	const arg_t &x,
@@ -104,27 +161,27 @@ namespace libmpdataxx
       // helper function to calculate gradient components of a scalar field
       
       // 1D version
-      template <int nd, class arrvec_t, class arr_t, class ijk_t, class dijk_t>
+      template <int nd, int ord = 2, class arrvec_t, class arr_t, class ijk_t, class dijk_t>
       inline void calc_grad(arrvec_t v, arr_t a, ijk_t ijk, dijk_t dijk, typename std::enable_if<nd == 1>::type* = 0)
       {
-        v[0](ijk) = formulae::nabla::grad<0>(a, ijk[0], dijk[0]);
+        v[0](ijk) = grad<0, ord>(a, ijk[0], dijk[0]);
       }
 
       // 2D version
-      template <int nd, class arrvec_t, class arr_t, class ijk_t, class dijk_t>
+      template <int nd, int ord = 2, class arrvec_t, class arr_t, class ijk_t, class dijk_t>
       inline void calc_grad(arrvec_t v, arr_t a, ijk_t ijk, dijk_t dijk, typename std::enable_if<nd == 2>::type* = 0)
       {
-        v[0](ijk) = formulae::nabla::grad<0>(a, ijk[0], ijk[1], dijk[0]);
-        v[1](ijk) = formulae::nabla::grad<1>(a, ijk[1], ijk[0], dijk[1]);
+        v[0](ijk) = grad<0, ord>(a, ijk[0], ijk[1], dijk[0]);
+        v[1](ijk) = grad<1, ord>(a, ijk[1], ijk[0], dijk[1]);
       }
 
       // 3D version
-      template <int nd, class arrvec_t, class arr_t, class ijk_t, class dijk_t>
+      template <int nd, int ord = 2, class arrvec_t, class arr_t, class ijk_t, class dijk_t>
       inline void calc_grad(arrvec_t v, arr_t a, ijk_t ijk, dijk_t dijk, typename std::enable_if<nd == 3>::type* = 0)
       {
-        v[0](ijk) = formulae::nabla::grad<0>(a, ijk[0], ijk[1], ijk[2], dijk[0]);
-        v[1](ijk) = formulae::nabla::grad<1>(a, ijk[1], ijk[2], ijk[0], dijk[1]);
-        v[2](ijk) = formulae::nabla::grad<2>(a, ijk[2], ijk[0], ijk[1], dijk[2]);
+        v[0](ijk) = grad<0, ord>(a, ijk[0], ijk[1], ijk[2], dijk[0]);
+        v[1](ijk) = grad<1, ord>(a, ijk[1], ijk[2], ijk[0], dijk[1]);
+        v[2](ijk) = grad<2, ord>(a, ijk[2], ijk[0], ijk[1], dijk[2]);
       }
       
       // 2D version
@@ -147,31 +204,28 @@ namespace libmpdataxx
       // divergence
       
       // 2D version
-      template <int nd, class arrvec_t, class ijk_t, class dijk_t>
+      template <int nd, int ord = 2, class arrvec_t, class ijk_t, class dijk_t>
       inline auto div(
 	const arrvec_t &v, // vector field
 	const ijk_t &ijk,
 	const dijk_t dijk,
         typename std::enable_if<nd == 2>::type* = 0
       ) return_macro(,
-	(v[0](ijk[0]+1, ijk[1]) - v[0](ijk[0]-1, ijk[1])) / dijk[0] / 2.
-	+
-	(v[1](ijk[0], ijk[1]+1) - v[1](ijk[0], ijk[1]-1)) / dijk[1] / 2.
+          grad<0 BOOST_PP_COMMA() ord>(v[0], ijk[0], ijk[1], dijk[0])
+        + grad<1 BOOST_PP_COMMA() ord>(v[1], ijk[1], ijk[0], dijk[1])
       )
       
       // 3D version
-      template <int nd, class arrvec_t, class ijk_t, class dijk_t>
+      template <int nd, int ord = 2, class arrvec_t, class ijk_t, class dijk_t>
       inline auto div(
 	const arrvec_t &v, // vector field
 	const ijk_t &ijk,
 	const dijk_t dijk,
         typename std::enable_if<nd == 3>::type* = 0
       ) return_macro(,
-	(v[0](ijk[0]+1, ijk[1], ijk[2]) - v[0](ijk[0]-1, ijk[1], ijk[2])) / dijk[0] / 2.
-	+
-	(v[1](ijk[0], ijk[1]+1, ijk[2]) - v[1](ijk[0], ijk[1]-1, ijk[2])) / dijk[1] / 2.
-	+
-	(v[2](ijk[0], ijk[1], ijk[2]+1) - v[2](ijk[0], ijk[1], ijk[2]-1)) / dijk[2] / 2.
+          grad<0 BOOST_PP_COMMA() ord>(v[0], ijk[0], ijk[1], ijk[2], dijk[0])
+        + grad<1 BOOST_PP_COMMA() ord>(v[1], ijk[1], ijk[2], ijk[0], dijk[1])
+        + grad<2 BOOST_PP_COMMA() ord>(v[2], ijk[2], ijk[0], ijk[1], dijk[2])
       )
     } // namespace nabla_op
   } // namespace formulae
diff --git a/libmpdata++/opts.hpp b/libmpdata++/opts.hpp
index 758a304d..cb0ef5b1 100644
--- a/libmpdata++/opts.hpp
+++ b/libmpdata++/opts.hpp
@@ -81,6 +81,7 @@ namespace libmpdataxx
     static constexpr int hint_scale(const int &e) { return 0; } // base-2 logarithm
     enum { var_dt = false};
     enum { vip_vab = 0};
+    enum { prs_order = 2};
     enum { prs_k_iters = 4};
     enum { prs_khn = false}; // if true use Kahan summation in the pressure solver
     enum { sgs_scheme = 0}; // iles
diff --git a/libmpdata++/solvers/detail/boussinesq_expl.hpp b/libmpdata++/solvers/detail/boussinesq_expl.hpp
index 69ec37f2..a9d7a38f 100644
--- a/libmpdata++/solvers/detail/boussinesq_expl.hpp
+++ b/libmpdata++/solvers/detail/boussinesq_expl.hpp
@@ -50,20 +50,24 @@ namespace libmpdataxx
         // helpers for buoyancy forces
         template<class ijk_t>
         inline auto buoy_at_0(const ijk_t &ijk)
-        return_macro(,
-          this->g * (this->state(ix::tht)(ijk) - this->tht_e(ijk)) / this->Tht_ref
-        )
+        {
+          return return_helper<rng_t>(
+            this->g * (this->state(ix::tht)(ijk) - this->tht_e(ijk)) / this->Tht_ref
+          );
+        }
         
         template<class ijk_t>
         inline auto buoy_at_1(const ijk_t &ijk)
-        return_macro(,
-          this->g * (
-              (  this->state(ix::tht)(ijk)
-               + 0.5 * this->dt * this->hflux_frc(ijk) + 0.5 * this->dt * this->tht_abs(ijk) * this->tht_e(ijk))
-              / (1 + 0.5 * this->dt * this->tht_abs(ijk))
-              - this->tht_e(ijk)
-            ) / this->Tht_ref
-        )
+        {
+          return return_helper<rng_t>(
+            this->g * (
+                (  this->state(ix::tht)(ijk)
+                 + 0.5 * this->dt * this->hflux_frc(ijk) + 0.5 * this->dt * this->tht_abs(ijk) * this->tht_e(ijk))
+                / (1 + 0.5 * this->dt * this->tht_abs(ijk))
+                - this->tht_e(ijk)
+              ) / this->Tht_ref
+          );
+        }
 
         void calc_full_tht(typename parent_t::arr_t &full_tht) final
         {
diff --git a/libmpdata++/solvers/detail/mpdata_common.hpp b/libmpdata++/solvers/detail/mpdata_common.hpp
index a13691f2..b8778f96 100644
--- a/libmpdata++/solvers/detail/mpdata_common.hpp
+++ b/libmpdata++/solvers/detail/mpdata_common.hpp
@@ -16,13 +16,13 @@ namespace libmpdataxx
       class mpdata_common : public detail::solver<
         ct_params_t, 
         formulae::mpdata::n_tlev, 
-        detail::max(minhalo, formulae::mpdata::halo(ct_params_t::opts))
+        detail::max(minhalo, formulae::mpdata::halo<ct_params_t>())
       >
       {
         using parent_t = detail::solver<
           ct_params_t, 
           formulae::mpdata::n_tlev, 
-          detail::max(minhalo, formulae::mpdata::halo(ct_params_t::opts))
+          detail::max(minhalo, formulae::mpdata::halo<ct_params_t>())
         >;
 
 	using GC_t = arrvec_t<typename parent_t::arr_t>;
diff --git a/libmpdata++/solvers/detail/mpdata_osc_2d.hpp b/libmpdata++/solvers/detail/mpdata_osc_2d.hpp
index beaa2ada..d1da657d 100644
--- a/libmpdata++/solvers/detail/mpdata_osc_2d.hpp
+++ b/libmpdata++/solvers/detail/mpdata_osc_2d.hpp
@@ -147,7 +147,8 @@ namespace libmpdataxx
               this->flux_ptr = &this->GC(iter);
             }   
 
-            const auto &flx = (*(this->flux_ptr));
+            auto &flx = (*(this->flux_ptr));
+            this->xchng_flux(flx);
 
             // sanity check for input
             //assert(std::isfinite(sum(this->mem->psi[e][this->n[e]](this->ijk))));
diff --git a/libmpdata++/solvers/detail/mpdata_osc_3d.hpp b/libmpdata++/solvers/detail/mpdata_osc_3d.hpp
index 16e86bc1..e70df9a4 100644
--- a/libmpdata++/solvers/detail/mpdata_osc_3d.hpp
+++ b/libmpdata++/solvers/detail/mpdata_osc_3d.hpp
@@ -161,7 +161,8 @@ namespace libmpdataxx
               this->flux_ptr = &GC;
             }
             
-            const auto &flx = (*(this->flux_ptr));
+            auto &flx = (*(this->flux_ptr));
+            this->xchng_flux(flx);
 
             // sanity check for input
             assert(std::isfinite(sum(psi[n](ijk))));
diff --git a/libmpdata++/solvers/detail/mpdata_rhs_vip_common.hpp b/libmpdata++/solvers/detail/mpdata_rhs_vip_common.hpp
index 1389a784..079511ff 100644
--- a/libmpdata++/solvers/detail/mpdata_rhs_vip_common.hpp
+++ b/libmpdata++/solvers/detail/mpdata_rhs_vip_common.hpp
@@ -30,80 +30,118 @@ namespace libmpdataxx
 
     namespace detail
     {
-      // 
-      template <class ct_params_t > 
-      class mpdata_rhs_vip_common : public mpdata_rhs<ct_params_t>
+      // override default interpolation in ct_params for vip
+      template <class ct_params_t> 
+      struct ct_params_vip_default_t : ct_params_t
+      {
+        enum {sptl_intrp = ct_params_t::prs_order == 4 ? aver4 : aver2};
+      };
+
+      template <class ct_params_t> 
+      class mpdata_rhs_vip_common : public mpdata_rhs<ct_params_vip_default_t<ct_params_t>>
       {
         using ix = typename ct_params_t::ix;
 
 	protected:
 	
-        using parent_t = mpdata_rhs<ct_params_t>;
+        using parent_t = mpdata_rhs<ct_params_vip_default_t<ct_params_t>>;
 
 	// member fields
         std::array<int, parent_t::n_dims> vip_ixs;
 	arrvec_t<typename parent_t::arr_t> &stash, &vip_rhs;
         typename parent_t::real_t eps;
+        // to propagate the default override forward
+        static constexpr auto sptl_intrp = ct_params_vip_default_t<ct_params_t>::sptl_intrp;
 
-	virtual void fill_stash() = 0;
-	virtual void fill_stash_helper(const int d, const int e) final
+	virtual void fill_stash_helper(const int d) final
 	{
+          // for third-order mpdata saving to t_lev == -2 so that it becomes -1 at the next time step
+          int save_t_lev = parent_t::div3_mpdata ? -2 : -1;
 	  if (ix::vip_den == -1)
-	    this->stash[d](this->ijk) = this->state(e)(this->ijk);
+	    vip_state(save_t_lev, d)(this->ijk) = vips()[d](this->ijk);
 	  else if (eps == 0) // this is the default  
           {
             // for those simulations advecting momentum where the division by mass will not cause division by zero
             // (for shallow water simulations it means simulations with no collapsing/inflating shallow water layers)
-            this->stash[d](this->ijk) = this->state(e)(this->ijk) / this->state(ix::vip_den)(this->ijk);
+            vip_state(save_t_lev, d)(this->ijk) = vips()[d](this->ijk) / this->state(ix::vip_den)(this->ijk);
           }
 	  else
 	  {  
-	    this->stash[d](this->ijk) = where(
+	    vip_state(save_t_lev, d)(this->ijk) = where(
 	      // if
 	      this->state(ix::vip_den)(this->ijk) > eps,
 	      // then
-	      this->state(e)(this->ijk) / this->state(ix::vip_den)(this->ijk),
+	      vips()[d](this->ijk) / this->state(ix::vip_den)(this->ijk),
 	      // else
 	      0
 	    );
 	  }
 
-          assert(std::isfinite(sum(this->stash[d](this->ijk))));
+          assert(std::isfinite(sum(vip_state(save_t_lev, d)(this->ijk))));
 	}
+	
+        void fill_stash() 
+        {
+          for (int d = 0; d < ct_params_t::n_dims; ++d) fill_stash_helper(d);
+        }
+
+        typename parent_t::arr_t& vip_state(const int t_lev, const int d)
+        {
+          // t_lev ==  0 -> output for extrapolation/derivatives
+          // t_lev == -1 -> (n-1) state
+          // t_lev == -2 -> (n-2) state, only available with div3_mpdata
+          if (!ct_params_t::var_dt && !parent_t::div3_mpdata)
+          {
+            assert(t_lev == 0 || t_lev == -1);
+            // for dt constant in time we can
+            // use the same stash since we don't need the previous state any more
+            return stash[d];
+          }
+          else if (!parent_t::div3_mpdata)
+          {
+            // for dt variable in time, however, we have to perform multiple
+            // extrapolations per time step and we need to keep the previous state
+            assert(t_lev == 0 || t_lev == -1);
+            return stash[d - t_lev *  ct_params_t::n_dims];
+          }
+          else
+          {
+            // for the fully third-order mpdata we need to keep both the (n-1)
+            // and the (n-2) state and juggle them around to avoid array copying
+            assert(t_lev == 0 || t_lev == -1 || t_lev == -2);
+            return (t_lev == 0 ? stash[d] :
+                      this->timestep % 2 == 0 ? stash[d - t_lev * ct_params_t::n_dims] : 
+                        stash[d + (3 + t_lev) * ct_params_t::n_dims]
+                   );
+          }
+        }
 
 	void extrp(const int d, const int e) // extrapolate velocity field in time to t+1/2
 	{                 
 	  using namespace arakawa_c;
 
-          const auto beta = this->dt / (2 * this->prev_dt);
-
-          // for dt constant in time we can
-          // write the result to stash since we don't need the previous state any more
-          // for dt variable in time, however, we have to perform multiple
-          // extrapolations per time step and we need to keep the previous state
-          // hence the need for another output variable masqueraded as stash[d + offset]
-          const auto outd = d + (ct_params_t::var_dt ? ct_params_t::n_dims : 0);
+          const auto beta = this->prev_dt[0] > 0 ? this->dt / (2 * this->prev_dt[0]) : 0;
 
-          if (!ct_params_t::var_dt)
+          if (!ct_params_t::var_dt && !parent_t::div3_mpdata)
           {
-	    this->stash[outd](this->ijk) *= -beta;
+	    this->vip_state(0, d)(this->ijk) *= -beta;
           }
           else
           {
-	    this->stash[outd](this->ijk) = -beta * this->stash[d](this->ijk);
+	    this->vip_state(0, d)(this->ijk) = -beta * this->vip_state(-1, d)(this->ijk);
           }
 
 	  if (ix::vip_den == -1) 
-	    this->stash[outd](this->ijk) += (1 + beta) * this->state(e)(this->ijk);
+	    this->vip_state(0, d)(this->ijk) += (1 + beta) * this->state(e)(this->ijk);
 	  else if (eps == 0) //this is the default
           {             
             // for those simulations advecting momentum where the division by mass will not cause division by zero
             // (for shallow water simulations it means simulations with no collapsing/inflating shallow water layers)
-	    this->stash[outd](this->ijk) += (1 + beta) * (this->state(e)(this->ijk) / this->state(ix::vip_den)(this->ijk)); 
+	    this->vip_state(0, d)(this->ijk) += (1 + beta) * (this->state(e)(this->ijk) / this->state(ix::vip_den)(this->ijk)); 
           }
 	  else
 	  {
-	    this->stash[outd](this->ijk) += where(
+	    this->vip_state(0, d)(this->ijk) += where(
 	      // if
 	      this->state(ix::vip_den)(this->ijk) > eps,
 	      // then
@@ -113,7 +151,7 @@ namespace libmpdataxx
 	    );  
 	  }
 
-          assert(std::isfinite(sum(this->stash[outd](this->ijk))));
+          assert(std::isfinite(sum(this->vip_state(0, d)(this->ijk))));
 	}   
 
 	arrvec_t<typename parent_t::arr_t>& vips()
@@ -126,7 +164,7 @@ namespace libmpdataxx
         }
 
 	virtual void extrapolate_in_time() = 0;
-	virtual void interpolate_in_space() = 0;
+	virtual void interpolate_in_space(arrvec_t<typename parent_t::arr_t> &interpolated) = 0;
 
         virtual void vip_rhs_impl_init()
         {
@@ -208,13 +246,19 @@ namespace libmpdataxx
 	{
           // fill Courant numbers with zeros so that the divergence test does no harm
           if (this->rank == 0)
-            for (int d=0; d < parent_t::n_dims; ++d) this->mem->GC.at(d) = 0; 
+            for (int d = 0; d < parent_t::n_dims; ++d) this->mem->GC.at(d) = 0; 
+          
+          for (int d = 0; d < parent_t::n_dims; ++d)
+          {
+            // these arrays should be multiplied by zeros if used before
+            // they are available but to avoid problems with NaNs we fill them with zeros anyway
+            vip_state(-1, d)(this->ijk) = 0;
+            if (parent_t::div3_mpdata) vip_state(-2, d)(this->ijk) = 0;
+          }
           this->mem->barrier();
 
 	  parent_t::hook_ante_loop(nt);
 	  
-	  // to make extrapolation possible at the first time-step
-	  fill_stash();
           vip_rhs_impl_init();
 	}
 
@@ -224,13 +268,46 @@ namespace libmpdataxx
 	  extrapolate_in_time();
 
 	  //interpolate from velocity field to courant field (mpdata needs courant numbers from t+1/2)
-	  interpolate_in_space();
+	  interpolate_in_space(this->mem->GC);
 
           // TODO: why???
 	  this->mem->barrier();
 
           return true;
         }
+        
+        void calc_ndt_gc() final
+        {
+          if (parent_t::div3_mpdata)
+          {
+            auto ex = this->halo - 1;
+            if (this->prev_dt[0] > 0)
+            {
+              for (int d = 0; d < parent_t::n_dims; ++d)
+              {
+                vip_state(0, d)(this->ijk) = this->dt * (vips()[d](this->ijk) - vip_state(-1, d)(this->ijk)) / this->prev_dt[0];
+                this->xchng_pres(this->vip_state(0, d), this->ijk, ex);
+              }
+              interpolate_in_space(this->mem->ndt_GC);
+              this->mem->barrier();
+            }
+            
+            if (this->prev_dt[0] > 0 && this->prev_dt[1] > 0)
+            {
+              for (int d = 0; d < parent_t::n_dims; ++d)
+              {
+                vip_state(0, d)(this->ijk) = this->dt * this->dt * (
+                                                  this->prev_dt[1] * vips()[d](this->ijk)
+                                                - (this->prev_dt[1] + this->prev_dt[0]) * vip_state(-1, d)(this->ijk)
+                                                + this->prev_dt[0] * vip_state(-2, d)(this->ijk)
+                                                ) / (this->prev_dt[0] * this->prev_dt[1]);
+                this->xchng_pres(this->vip_state(0, d), this->ijk, ex);
+              }
+              interpolate_in_space(this->mem->ndtt_GC);
+              this->mem->barrier();
+            }
+          }
+        }
 
 	void hook_ante_step()
 	{ 
@@ -266,7 +343,8 @@ namespace libmpdataxx
           const int &n_iters
 	) {
 	  parent_t::alloc(mem, n_iters);
-	  parent_t::alloc_tmp_sclr(mem, __FILE__, (ct_params_t::var_dt ? 2 : 1) * parent_t::n_dims); // stash
+	  parent_t::alloc_tmp_sclr(mem, __FILE__,
+                                   (parent_t::div3_mpdata ? 3 : ct_params_t::var_dt ? 2 : 1) * parent_t::n_dims); // stash
 	  parent_t::alloc_tmp_sclr(mem, __FILE__, parent_t::n_dims); // vip_rhs
 	}
  
diff --git a/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_common.hpp b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_common.hpp
index 36137a7f..63114812 100644
--- a/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_common.hpp
+++ b/libmpdata++/solvers/detail/mpdata_rhs_vip_prs_common.hpp
@@ -55,7 +55,7 @@ namespace libmpdataxx
           bool simple // if true do not normalize gradients (simple laplacian)
         ) return_macro(
           this->xchng_pres(arr, ijk);
-          formulae::nabla::calc_grad<parent_t::n_dims>(lap_tmp, arr, ijk, dijk);
+          formulae::nabla::calc_grad<parent_t::n_dims BOOST_PP_COMMA() ct_params_t::prs_order>(lap_tmp, arr, ijk, dijk);
           if (err_init)
           {
             for (int d = 0; d < parent_t::n_dims; ++d)
@@ -71,13 +71,13 @@ namespace libmpdataxx
             }
           }
           if (!simple) this->normalize_vip(lap_tmp);
-          this->set_edges(lap_tmp, this->ijk, 0);
+          this->set_edges(lap_tmp, this->ijk, err_init ? -1 : 0);
           for (int d = 0; d < parent_t::n_dims; ++d)
           {
             this->xchng_pres(lap_tmp[d], ijk);
           }
           ,
-          formulae::nabla::div<parent_t::n_dims>(lap_tmp, ijk, dijk)
+          formulae::nabla::div<parent_t::n_dims BOOST_PP_COMMA() ct_params_t::prs_order>(lap_tmp, ijk, dijk)
           / formulae::G<ct_params_t::opts>(*this->mem->G, this->ijk)
         )
 
@@ -121,7 +121,7 @@ namespace libmpdataxx
 
 	  this->xchng_pres(this->Phi, this->ijk);
 
-          formulae::nabla::calc_grad<parent_t::n_dims>(tmp_uvw, Phi, this->ijk, this->dijk);
+          formulae::nabla::calc_grad<parent_t::n_dims, ct_params_t::prs_order>(tmp_uvw, Phi, this->ijk, this->dijk);
         }
 
 	void pressure_solver_apply()
@@ -144,7 +144,7 @@ namespace libmpdataxx
 	  pressure_solver_update(true);
 
           this->xchng_pres(this->Phi, this->ijk);
-          formulae::nabla::calc_grad<parent_t::n_dims>(tmp_uvw, Phi, this->ijk, this->dijk);
+          formulae::nabla::calc_grad<parent_t::n_dims, ct_params_t::prs_order>(tmp_uvw, Phi, this->ijk, this->dijk);
 	  pressure_solver_apply();
           this->set_edges(this->vips(), this->ijk, 1);
 	  
@@ -155,7 +155,7 @@ namespace libmpdataxx
  
           // allow pressure_solver_apply at the first time step
 	  this->xchng_pres(this->Phi, this->ijk);
-          formulae::nabla::calc_grad<parent_t::n_dims>(tmp_uvw, Phi, this->ijk, this->dijk);
+          formulae::nabla::calc_grad<parent_t::n_dims, ct_params_t::prs_order>(tmp_uvw, Phi, this->ijk, this->dijk);
           for (int d = 0; d < parent_t::n_dims; ++d)
           {
             this->vip_rhs[d](this->ijk) -= tmp_uvw[d](this->ijk);
diff --git a/libmpdata++/solvers/detail/solver_1d.hpp b/libmpdata++/solvers/detail/solver_1d.hpp
index fbe45ee0..6b417a1b 100644
--- a/libmpdata++/solvers/detail/solver_1d.hpp
+++ b/libmpdata++/solvers/detail/solver_1d.hpp
@@ -28,7 +28,9 @@ namespace libmpdataxx
 	protected:
 
 	const rng_t i; //TODO: to be removed
-        typename parent_t::arr_t &courant_field; // TODO: should be in solver common but cannot be allocated there ?
+
+        // generic field used for various statistics (currently Courant number and divergence)
+        typename parent_t::arr_t &stat_field; // TODO: should be in solver common but cannot be allocated there ?
 
         virtual void xchng_sclr(typename parent_t::arr_t &arr, const bool deriv = false) final // for a given array
         {
@@ -42,17 +44,34 @@ namespace libmpdataxx
           xchng_sclr(this->mem->psi[e][ this->n[e]]);
 	}
 
-        void xchng_vctr_alng(arrvec_t<typename parent_t::arr_t> &arrvec, const bool ad = false) final
+        void xchng_vctr_alng(arrvec_t<typename parent_t::arr_t> &arrvec,
+                             const bool ad = false,
+                             const bool cyclic = false,
+                             const int ex = 0
+        ) final
         {
           this->mem->barrier();
-          for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_alng(arrvec, ad); 
+          if (!cyclic)
+          {
+            for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_alng(arrvec, ad); 
+          }
+          else
+          {
+            for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_alng_cyclic(arrvec, ad);
+          }
           this->mem->barrier();
         }
 
         typename parent_t::real_t courant_number(const arrvec_t<typename parent_t::arr_t> &arrvec) final
         {
-          courant_field(this->ijk) = 0.5 * (abs(arrvec[0](i+h) + arrvec[0](i-h)));
-          return this->mem->max(this->rank, courant_field(this->ijk));
+          stat_field(this->ijk) = 0.5 * (abs(arrvec[0](i+h) + arrvec[0](i-h)));
+          return this->mem->max(this->rank, stat_field(this->ijk));
+        }
+        
+        typename parent_t::real_t max_abs_vctr_div(const arrvec_t<typename parent_t::arr_t> &arrvec) final
+        {
+          stat_field(this->ijk) = abs((arrvec[0](i+h) - arrvec[0](i-h)));
+          return this->mem->max(this->rank, stat_field(this->ijk));
         }
 
         void scale_gc(const typename parent_t::real_t time,
@@ -94,7 +113,7 @@ namespace libmpdataxx
             idx_t<parent_t::n_dims>(args.i)
           ), 
           i(args.i),
-          courant_field(args.mem->tmp[__FILE__][0][0])
+          stat_field(args.mem->tmp[__FILE__][0][0])
 	{
           this->di = p.di;
           this->dijk = {p.di};
diff --git a/libmpdata++/solvers/detail/solver_2d.hpp b/libmpdata++/solvers/detail/solver_2d.hpp
index 5929c4fe..daf32784 100644
--- a/libmpdata++/solvers/detail/solver_2d.hpp
+++ b/libmpdata++/solvers/detail/solver_2d.hpp
@@ -29,7 +29,9 @@ namespace libmpdataxx
 	protected:
       
 	const rng_t i, j; // TODO: to be removed
-        typename parent_t::arr_t &courant_field; // TODO: should be in solver common but cannot be allocated there ?
+
+        // generic field used for various statistics (currently Courant number and divergence)
+        typename parent_t::arr_t &stat_field; // TODO: should be in solver common but cannot be allocated there ?
 
 	virtual void xchng_sclr(typename parent_t::arr_t &arr,
                         const idx_t<2> &range_ijk,
@@ -48,15 +50,35 @@ namespace libmpdataxx
           this->xchng_sclr(this->mem->psi[e][ this->n[e]], this->ijk, this->halo);
 	}
 
-        void xchng_vctr_alng(arrvec_t<typename parent_t::arr_t> &arrvec, const bool ad = false) final
+        void xchng_vctr_alng(arrvec_t<typename parent_t::arr_t> &arrvec,
+                             const bool ad = false,
+                             const bool cyclic = false,
+                             const int ex = 0
+        ) final
         {
           this->mem->barrier();
-          for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_alng(arrvec, j, ad);
-          for (auto &bc : this->bcs[1]) bc->fill_halos_vctr_alng(arrvec, i, ad);
+          if (!cyclic)
+          {
+            for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_alng(arrvec, j^ex, ad);
+            for (auto &bc : this->bcs[1]) bc->fill_halos_vctr_alng(arrvec, i^ex, ad);
+          }
+          else
+          {
+            for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_alng_cyclic(arrvec, j^ex, ad);
+            for (auto &bc : this->bcs[1]) bc->fill_halos_vctr_alng_cyclic(arrvec, i^ex, ad);
+          }
           // TODO: open bc nust be last!!!
           this->mem->barrier();
         }
         
+        virtual void xchng_flux(arrvec_t<typename parent_t::arr_t> &arrvec) final
+        {
+          this->mem->barrier();
+          for (auto &bc : this->bcs[0]) bc->fill_halos_flux(arrvec, j);
+          for (auto &bc : this->bcs[1]) bc->fill_halos_flux(arrvec, i);
+          this->mem->barrier();
+        }
+        
         virtual void xchng_sgs_div(
           typename parent_t::arr_t &arr,
           const idx_t<2> &range_ijk
@@ -108,23 +130,33 @@ namespace libmpdataxx
         virtual void xchng_vctr_nrml(
           arrvec_t<typename parent_t::arr_t> &arrvec, 
           const idx_t<2> &range_ijk,
-          const int ext = 0
+          const int ext = 0,
+          const bool cyclic = false
         ) final
         {
           this->mem->barrier();
-          for (auto &bc : this->bcs[1]) bc->fill_halos_vctr_nrml(arrvec[0], range_ijk[0]^ext^h);
-          for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_nrml(arrvec[1], range_ijk[1]^ext^h);
+          if (!cyclic)
+          {
+            for (auto &bc : this->bcs[1]) bc->fill_halos_vctr_nrml(arrvec[0], range_ijk[0]^ext^h);
+            for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_nrml(arrvec[1], range_ijk[1]^ext^h);
+          }
+          else
+          {
+            for (auto &bc : this->bcs[1]) bc->fill_halos_vctr_nrml_cyclic(arrvec[0], range_ijk[0]^ext^h);
+            for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_nrml_cyclic(arrvec[1], range_ijk[1]^ext^h);
+          }
           this->mem->barrier();
         }
 
         virtual void xchng_pres(
           typename parent_t::arr_t &arr,
-          const idx_t<2> &range_ijk
+          const idx_t<2> &range_ijk,
+          const int ex = 0
         ) final
         {
           this->mem->barrier();
-          for (auto &bc : this->bcs[0]) bc->fill_halos_pres(arr, range_ijk[1]);
-          for (auto &bc : this->bcs[1]) bc->fill_halos_pres(arr, range_ijk[0]);
+          for (auto &bc : this->bcs[0]) bc->fill_halos_pres(arr, range_ijk[1]^ex);
+          for (auto &bc : this->bcs[1]) bc->fill_halos_pres(arr, range_ijk[0]^ex);
           this->mem->barrier();
         }
 
@@ -159,17 +191,7 @@ namespace libmpdataxx
           // TODO: same in 1D
           if (!opts::isset(ct_params_t::opts, opts::dfl))
           {
-            typename ct_params_t::real_t max_abs_div = max(abs(
-              (
-                ( 
-                  this->mem->GC[0](i-h, j  ) - 
-                  this->mem->GC[0](i+h, j  )
-                ) + (
-                  this->mem->GC[1](i,   j-h) - 
-                  this->mem->GC[1](i,   j+h)
-                )
-              ) / formulae::G<ct_params_t::opts, 0>(*this->mem->G, i, j)
-	    ));
+            typename ct_params_t::real_t max_abs_div = max_abs_vctr_div(this->mem->GC);
 
 	    if (max_abs_div > this->max_abs_div_eps) 
 	      throw std::runtime_error("initial advector field is divergent");
@@ -178,11 +200,20 @@ namespace libmpdataxx
 
         typename parent_t::real_t courant_number(const arrvec_t<typename parent_t::arr_t> &arrvec) final
         {
-          courant_field(this->ijk) = 0.5 * (
-                                             abs(arrvec[0](i+h, j) + arrvec[0](i-h, j))
-                                           + abs(arrvec[1](i, j+h) + arrvec[1](i, j-h))
-                                           ) / formulae::G<ct_params_t::opts, 0>(*this->mem->G, i, j);
-          return this->mem->max(this->rank, courant_field(this->ijk));
+          stat_field(this->ijk) = 0.5 * (
+                                           abs(arrvec[0](i+h, j) + arrvec[0](i-h, j))
+                                         + abs(arrvec[1](i, j+h) + arrvec[1](i, j-h))
+                                        ) / formulae::G<ct_params_t::opts, 0>(*this->mem->G, i, j);
+          return this->mem->max(this->rank, stat_field(this->ijk));
+        }
+        
+        typename parent_t::real_t max_abs_vctr_div(const arrvec_t<typename parent_t::arr_t> &arrvec) final
+        {
+          stat_field(this->ijk) = abs(
+                                        (arrvec[0](i+h, j) - arrvec[0](i-h, j))
+                                      + (arrvec[1](i, j+h) - arrvec[1](i, j-h))
+                                     ) / formulae::G<ct_params_t::opts, 0>(*this->mem->G, i, j);
+          return this->mem->max(this->rank, stat_field(this->ijk));
         }
         
         void scale_gc(const typename parent_t::real_t time,
@@ -228,7 +259,7 @@ namespace libmpdataxx
           ),
 	  i(args.i), 
 	  j(args.j),
-          courant_field(args.mem->tmp[__FILE__][0][0])
+          stat_field(args.mem->tmp[__FILE__][0][0])
 	{
           this->di = p.di;
           this->dj = p.dj;
diff --git a/libmpdata++/solvers/detail/solver_3d.hpp b/libmpdata++/solvers/detail/solver_3d.hpp
index b76671b2..b2b8ac64 100644
--- a/libmpdata++/solvers/detail/solver_3d.hpp
+++ b/libmpdata++/solvers/detail/solver_3d.hpp
@@ -29,7 +29,9 @@ namespace libmpdataxx
 	protected:
 
 	const rng_t i, j, k; // TODO: we have ijk in solver_common - could it be removed?
-        typename parent_t::arr_t &courant_field; // TODO:/: should be in solver common but cannot be allocated there ?
+
+        // generic field used for various statistics (currently Courant number and divergence)
+        typename parent_t::arr_t &stat_field; // TODO:/: should be in solver common but cannot be allocated there ?
 
 	virtual void xchng_sclr(typename parent_t::arr_t &arr,
 	               const idx_t<3> &range_ijk,
@@ -48,15 +50,36 @@ namespace libmpdataxx
 	  this->xchng_sclr(this->mem->psi[e][ this->n[e]], this->ijk, this->halo);
 	}
 
-        void xchng_vctr_alng(arrvec_t<typename parent_t::arr_t> &arrvec, const bool ad = false) final
+        void xchng_vctr_alng(arrvec_t<typename parent_t::arr_t> &arrvec,
+                             const bool ad = false,
+                             const bool cyclic = false,
+                             const int ex = 0
+        ) final
         {
           this->mem->barrier();
-          for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_alng(arrvec, j, k, ad); 
-          for (auto &bc : this->bcs[1]) bc->fill_halos_vctr_alng(arrvec, k, i, ad); 
-          for (auto &bc : this->bcs[2]) bc->fill_halos_vctr_alng(arrvec, i, j, ad);
+          if (!cyclic)
+          {
+            for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_alng(arrvec, j^ex, k^ex, ad); 
+            for (auto &bc : this->bcs[1]) bc->fill_halos_vctr_alng(arrvec, k^ex, i^ex, ad); 
+            for (auto &bc : this->bcs[2]) bc->fill_halos_vctr_alng(arrvec, i^ex, j^ex, ad);
+          }
+          else
+          {
+            for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_alng_cyclic(arrvec, j^ex, k^ex, ad); 
+            for (auto &bc : this->bcs[1]) bc->fill_halos_vctr_alng_cyclic(arrvec, k^ex, i^ex, ad); 
+            for (auto &bc : this->bcs[2]) bc->fill_halos_vctr_alng_cyclic(arrvec, i^ex, j^ex, ad);
+          }
           this->mem->barrier();
         }
         
+        virtual void xchng_flux(arrvec_t<typename parent_t::arr_t> &arrvec) final
+        {
+          this->mem->barrier();
+          for (auto &bc : this->bcs[0]) bc->fill_halos_flux(arrvec, j, k);
+          for (auto &bc : this->bcs[1]) bc->fill_halos_flux(arrvec, k, i);
+          for (auto &bc : this->bcs[2]) bc->fill_halos_flux(arrvec, i, j);
+        }
+        
         virtual void xchng_sgs_div(
 	  typename parent_t::arr_t &arr,
 	  const idx_t<3> &range_ijk
@@ -124,31 +147,47 @@ namespace libmpdataxx
 
         virtual void xchng_vctr_nrml(
           arrvec_t<typename parent_t::arr_t> &arrvec,
-	  const idx_t<3> &range_ijk,
-          const int ext = 0
+          const idx_t<3> &range_ijk,
+          const int ext = 0,
+          const bool cyclic = false
         ) final
         {
           this->mem->barrier();
-          for (auto &bc : this->bcs[1]) bc->fill_halos_vctr_nrml(arrvec[0], range_ijk[2]^ext^1, range_ijk[0]^ext^h);
-          for (auto &bc : this->bcs[2]) bc->fill_halos_vctr_nrml(arrvec[0], range_ijk[0]^ext^h, range_ijk[1]^ext^1);
-
-          for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_nrml(arrvec[1], range_ijk[1]^ext^h, range_ijk[2]^ext^1);
-          for (auto &bc : this->bcs[2]) bc->fill_halos_vctr_nrml(arrvec[1], range_ijk[0]^ext^1, range_ijk[1]^ext^h);
-   
-          for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_nrml(arrvec[2], range_ijk[1]^ext^1, range_ijk[2]^ext^h);
-          for (auto &bc : this->bcs[1]) bc->fill_halos_vctr_nrml(arrvec[2], range_ijk[2]^ext^h, range_ijk[0]^ext^1);
+          if (!cyclic)
+          {
+            for (auto &bc : this->bcs[1]) bc->fill_halos_vctr_nrml(arrvec[0], range_ijk[2]^ext^1, range_ijk[0]^ext^h);
+            for (auto &bc : this->bcs[2]) bc->fill_halos_vctr_nrml(arrvec[0], range_ijk[0]^ext^h, range_ijk[1]^ext^1);
+
+            for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_nrml(arrvec[1], range_ijk[1]^ext^h, range_ijk[2]^ext^1);
+            for (auto &bc : this->bcs[2]) bc->fill_halos_vctr_nrml(arrvec[1], range_ijk[0]^ext^1, range_ijk[1]^ext^h);
+            
+            for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_nrml(arrvec[2], range_ijk[1]^ext^1, range_ijk[2]^ext^h);
+            for (auto &bc : this->bcs[1]) bc->fill_halos_vctr_nrml(arrvec[2], range_ijk[2]^ext^h, range_ijk[0]^ext^1);
+          }
+          else
+          {
+            for (auto &bc : this->bcs[1]) bc->fill_halos_vctr_nrml_cyclic(arrvec[0], range_ijk[2]^ext^1, range_ijk[0]^ext^h);
+            for (auto &bc : this->bcs[2]) bc->fill_halos_vctr_nrml_cyclic(arrvec[0], range_ijk[0]^ext^h, range_ijk[1]^ext^1);
+
+            for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_nrml_cyclic(arrvec[1], range_ijk[1]^ext^h, range_ijk[2]^ext^1);
+            for (auto &bc : this->bcs[2]) bc->fill_halos_vctr_nrml_cyclic(arrvec[1], range_ijk[0]^ext^1, range_ijk[1]^ext^h);
+            
+            for (auto &bc : this->bcs[0]) bc->fill_halos_vctr_nrml_cyclic(arrvec[2], range_ijk[1]^ext^1, range_ijk[2]^ext^h);
+            for (auto &bc : this->bcs[1]) bc->fill_halos_vctr_nrml_cyclic(arrvec[2], range_ijk[2]^ext^h, range_ijk[0]^ext^1);
+          }
           this->mem->barrier();
         }
 
         virtual void xchng_pres(
 	  typename parent_t::arr_t &arr,
-	  const idx_t<3> &range_ijk
+	  const idx_t<3> &range_ijk,
+          const int ex = 0
         ) final
         {
           this->mem->barrier();
-          for (auto &bc : this->bcs[0]) bc->fill_halos_pres(arr, range_ijk[1], range_ijk[2]);
-          for (auto &bc : this->bcs[1]) bc->fill_halos_pres(arr, range_ijk[2], range_ijk[0]);
-          for (auto &bc : this->bcs[2]) bc->fill_halos_pres(arr, range_ijk[0], range_ijk[1]);
+          for (auto &bc : this->bcs[0]) bc->fill_halos_pres(arr, range_ijk[1]^ex, range_ijk[2]^ex);
+          for (auto &bc : this->bcs[1]) bc->fill_halos_pres(arr, range_ijk[2]^ex, range_ijk[0]^ex);
+          for (auto &bc : this->bcs[2]) bc->fill_halos_pres(arr, range_ijk[0]^ex, range_ijk[1]^ex);
           this->mem->barrier();
         }
 
@@ -183,20 +222,8 @@ namespace libmpdataxx
           // TODO: same in 1D
           if (!opts::isset(ct_params_t::opts, opts::dfl))
           {
-            typename ct_params_t::real_t max_abs_div = max(abs(
-              (
-                ( 
-                  this->mem->GC[0](i-h, j, k) - 
-                  this->mem->GC[0](i+h, j, k)
-                ) + (
-                  this->mem->GC[1](i, j-h, k) - 
-                  this->mem->GC[1](i, j+h, k)
-                ) + (
-                  this->mem->GC[2](i, j, k-h) - 
-                  this->mem->GC[2](i, j, k+h)
-                )
-              ) / formulae::G<ct_params_t::opts, 0>(*this->mem->G, i, j, k)
-            ));
+            typename ct_params_t::real_t max_abs_div = max_abs_vctr_div(this->mem->GC);
+
 	    if (max_abs_div > this->max_abs_div_eps) 
 	      throw std::runtime_error("initial advector field is divergent");
           }
@@ -204,12 +231,23 @@ namespace libmpdataxx
 
         typename parent_t::real_t courant_number(const arrvec_t<typename parent_t::arr_t> &arrvec) final
         {
-          courant_field(this->ijk) =  0.5 * (
-                                               abs(arrvec[0](i+h, j, k) + arrvec[0](i-h, j, k))
-                                             + abs(arrvec[1](i, j+h, k) + arrvec[1](i, j-h, k))
-                                             + abs(arrvec[2](i, j, k+h) + arrvec[2](i, j, k-h))
-                                             ) / formulae::G<ct_params_t::opts, 0>(*this->mem->G, i, j, k);
-          return this->mem->max(this->rank, courant_field(this->ijk));
+          stat_field(this->ijk) =  0.5 * (
+                                            abs(arrvec[0](i+h, j, k) + arrvec[0](i-h, j, k))
+                                          + abs(arrvec[1](i, j+h, k) + arrvec[1](i, j-h, k))
+                                          + abs(arrvec[2](i, j, k+h) + arrvec[2](i, j, k-h))
+                                         ) / formulae::G<ct_params_t::opts, 0>(*this->mem->G, i, j, k);
+          return this->mem->max(this->rank, stat_field(this->ijk));
+        }
+        
+        typename parent_t::real_t max_abs_vctr_div(const arrvec_t<typename parent_t::arr_t> &arrvec) final
+        {
+          stat_field(this->ijk) =  abs(
+                                         (arrvec[0](i+h, j, k) - arrvec[0](i-h, j, k))
+                                       + (arrvec[1](i, j+h, k) - arrvec[1](i, j-h, k))
+                                       + (arrvec[2](i, j, k+h) - arrvec[2](i, j, k-h))
+                                      ) / formulae::G<ct_params_t::opts, 0>(*this->mem->G, i, j, k);
+
+          return this->mem->max(this->rank, stat_field(this->ijk));
         }
 
         void scale_gc(const typename parent_t::real_t time,
@@ -260,7 +298,7 @@ namespace libmpdataxx
 	  i(args.i), 
           j(args.j), 
           k(args.k),
-          courant_field(args.mem->tmp[__FILE__][0][0])
+          stat_field(args.mem->tmp[__FILE__][0][0])
 	{
           this->di = p.di;
           this->dj = p.dj;
diff --git a/libmpdata++/solvers/detail/solver_common.hpp b/libmpdata++/solvers/detail/solver_common.hpp
index c305aaa3..e9ffca10 100644
--- a/libmpdata++/solvers/detail/solver_common.hpp
+++ b/libmpdata++/solvers/detail/solver_common.hpp
@@ -50,14 +50,19 @@ namespace libmpdataxx
 
 	protected: 
         // TODO: output common doesnt know about ct_params_t
-        bool var_dt = ct_params_t::var_dt;
+        static constexpr bool var_dt = ct_params_t::var_dt;
+       
+        // for convenience
+        static constexpr bool div3_mpdata = opts::isset(ct_params_t::opts, opts::div_3rd)    ||
+                                            opts::isset(ct_params_t::opts, opts::div_3rd_dt)  ;
 
         std::array<std::array<bcp_t, 2>, n_dims> bcs;
 
         const int rank;
 
         // di, dj, dk declared here for output purposes
-        real_t prev_dt, dt, di, dj, dk, max_abs_div_eps, max_courant;
+        real_t dt, di, dj, dk, max_abs_div_eps, max_courant;
+        std::array<real_t, div3_mpdata ? 2 : 1> prev_dt;
         std::array<real_t, n_dims> dijk;
 
 	const idx_t<n_dims> ijk;
@@ -81,7 +86,7 @@ namespace libmpdataxx
 	virtual void xchng(int e) = 0;
         // TODO: implement flagging of valid/invalid halo for optimisations
 
-        virtual void xchng_vctr_alng(arrvec_t<arr_t>&, const bool ad = false) = 0;
+        virtual void xchng_vctr_alng(arrvec_t<arr_t>&, const bool ad = false, const bool cyclic = false, const int ex = 0) = 0;
 
         void set_bcs(const int &d, bcp_t &bcl, bcp_t &bcr)
         {
@@ -90,9 +95,11 @@ namespace libmpdataxx
         }
 
 	virtual real_t courant_number(const arrvec_t<arr_t>&) = 0;
+	virtual real_t max_abs_vctr_div(const arrvec_t<arr_t>&) = 0;
        
         // return false if advector does not change in time
         virtual bool calc_gc() {return false;}
+        virtual void calc_ndt_gc() {}
 
         virtual void scale_gc(const real_t time, const real_t cur_dt, const real_t old_dt) = 0;
 
@@ -137,8 +144,11 @@ namespace libmpdataxx
             real_t cfl = courant_number(mem->GC);
             if (cfl > 0)
             {
+              auto old_dt = dt; // a bit confusing, old_dt - dt before scaling,
+                                // prev_dt[] - dt(s) from previous time steps
+                                // TODO: reconsider
               dt *= max_courant / cfl;
-              scale_gc(time, dt, prev_dt);
+              scale_gc(time, dt, old_dt);
             }
           }
         }
@@ -161,7 +171,7 @@ namespace libmpdataxx
           const decltype(ijk) &ijk
         ) :
           rank(rank), 
-          prev_dt(p.dt),
+          prev_dt{},
           dt(p.dt),
           di(0),
           dj(0),
@@ -247,6 +257,10 @@ namespace libmpdataxx
               }
             }
 
+            // once we set the time step
+            // for third-order MPDATA we need to calculate time derivatives of the advector field
+            if (var_gc && div3_mpdata) calc_ndt_gc();
+            
             hook_ante_step();
 
 	    for (int e = 0; e < n_eqns; ++e) scale(e, ct_params_t::hint_scale(e));
@@ -263,7 +277,8 @@ namespace libmpdataxx
 
             timestep++;
             time = ct_params_t::var_dt ? time + dt : timestep * dt;
-            prev_dt = dt;
+            if (div3_mpdata) prev_dt[1] = prev_dt[0];
+            prev_dt[0] = dt;
             hook_post_step();
 
             if (time >= nt) additional_steps--;
diff --git a/libmpdata++/solvers/mpdata_rhs_vip.hpp b/libmpdata++/solvers/mpdata_rhs_vip.hpp
index 3c36de15..02fecfce 100644
--- a/libmpdata++/solvers/mpdata_rhs_vip.hpp
+++ b/libmpdata++/solvers/mpdata_rhs_vip.hpp
@@ -36,22 +36,15 @@ namespace libmpdataxx
       // member fields
       const rng_t im;
 
-      void fill_stash() final
-      {
-        this->fill_stash_helper(0, ix::vip_i);
-      }
-
-      void interpolate_in_space() final
+      void interpolate_in_space(arrvec_t<typename parent_t::arr_t> &interpolated) final
       {
         using namespace libmpdataxx::arakawa_c;
-        
-        const auto off = ct_params_t::var_dt ? ct_params_t::n_dims : 0;
 
 	if (!this->mem->G)
 	{
-	  this->mem->GC[0](im + h) = this->dt / this->di * .5 * (
-	    this->stash[0 + off](im    ) + 
-	    this->stash[0 + off](im + 1)
+	  interpolated(im + h) = this->dt / this->di * .5 * (
+	    this->vip_state(0, 0)(im    ) + 
+	    this->vip_state(0, 0)(im + 1)
 	  );
 	} 
 	else
@@ -62,9 +55,8 @@ namespace libmpdataxx
 
       void extrapolate_in_time() final
       {
-        const auto off = ct_params_t::var_dt ? ct_params_t::n_dims : 0;
 	this->extrp(0, ix::vip_i);     
-	this->xchng_sclr(this->stash[0 + off]);      // filling halos 
+	this->xchng_sclr(this->vip_state(0, 0));      // filling halos 
       }
 
       public:
@@ -100,15 +92,10 @@ namespace libmpdataxx
       // member fields
       const rng_t im, jm;
 
-      void fill_stash() final 
-      {
-        this->fill_stash_helper(0, ix::vip_i);
-        this->fill_stash_helper(1, ix::vip_j);
-      }
-
       template<int d, class arr_t> 
       void intrp(
-	const arr_t psi,
+        arrvec_t<typename parent_t::arr_t> &intrp_psi,
+	const arr_t &psi,
 	const rng_t &i, 
 	const rng_t &j, 
 	const typename ct_params_t::real_t &di 
@@ -116,46 +103,81 @@ namespace libmpdataxx
       {   
 	using idxperm::pi;
 	using namespace arakawa_c;
-  
-	if (!this->mem->G)
-	{
-	  this->mem->GC[d](pi<d>(i+h,j)) = this->dt / di * .5 * (
-	    psi(pi<d>(i,    j)) + 
-	    psi(pi<d>(i + 1,j))
-	  );
-	} 
-	else
-	{ 
-	  this->mem->GC[d](pi<d>(i+h,j)) = this->dt / di * .5 * (
-	    (*this->mem->G)(pi<d>(i,    j)) * psi(pi<d>(i,    j)) + 
-	    (*this->mem->G)(pi<d>(i + 1,j)) * psi(pi<d>(i + 1,j))
-	  );
-	}
+ 
+        if (parent_t::sptl_intrp == aver2)
+        {
+          if (!this->mem->G)
+          {
+            intrp_psi[d](pi<d>(i+h,j)) = this->dt / di * .5 * (
+              psi(pi<d>(i,    j)) + 
+              psi(pi<d>(i + 1,j))
+            );
+          } 
+          else
+          { 
+            intrp_psi[d](pi<d>(i+h,j)) = this->dt / di * .5 * (
+              (*this->mem->G)(pi<d>(i,    j)) * psi(pi<d>(i,    j)) + 
+              (*this->mem->G)(pi<d>(i + 1,j)) * psi(pi<d>(i + 1,j))
+            );
+          }
+        }
+        else if (parent_t::sptl_intrp == aver4)
+        {
+          if (!this->mem->G)
+          {
+            intrp_psi[d](pi<d>(i+h, j)) = this->dt /(12 * di) * (
+              7 *
+              (
+                psi(pi<d>(i  , j)) + 
+                psi(pi<d>(i+1, j))
+              )
+              -
+              (
+                psi(pi<d>(i-1, j)) + 
+                psi(pi<d>(i+2, j))
+              )
+            );
+          } 
+          else
+          { 
+            intrp_psi[d](pi<d>(i+h, j)) = this->dt /(12 * di) * (
+              7 *
+              (
+                (*this->mem->G)(pi<d>(i  , j)) * psi(pi<d>(i  , j)) + 
+                (*this->mem->G)(pi<d>(i+1, j)) * psi(pi<d>(i+1, j))
+              )
+              -
+              (
+                (*this->mem->G)(pi<d>(i-1, j)) * psi(pi<d>(i-1, j)) + 
+                (*this->mem->G)(pi<d>(i+2, j)) * psi(pi<d>(i+2, j))
+              )
+            );
+          }
+        }
       }  
 
-      void interpolate_in_space() final
+      void interpolate_in_space(arrvec_t<typename parent_t::arr_t> &interpolated) final
       {
         using namespace libmpdataxx::arakawa_c;
 
-        const auto off = ct_params_t::var_dt ? ct_params_t::n_dims : 0;
-
-	intrp<0>(this->stash[0 + off], im, this->j^this->halo, this->di);
-	intrp<1>(this->stash[1 + off], jm, this->i^this->halo, this->dj);
-        this->xchng_vctr_alng(this->mem->GC);
         auto ex = this->halo - 1;
-        this->xchng_vctr_nrml(this->mem->GC, this->ijk, ex);
+	intrp<0>(interpolated, this->vip_state(0, 0), im, this->j^ex, this->di);
+	intrp<1>(interpolated, this->vip_state(0, 1), jm, this->i^ex, this->dj);
+        this->xchng_vctr_alng(interpolated, /*ad*/ true, /*cyclic*/ true, ex);
+        this->xchng_vctr_nrml(interpolated, this->ijk, ex, /*cyclic*/ false);
       }
 
       void extrapolate_in_time() final
       {
         using namespace libmpdataxx::arakawa_c; 
 
-        const auto off = ct_params_t::var_dt ? ct_params_t::n_dims : 0;
-
-	this->extrp(0, ix::vip_i);     
-	this->xchng_sclr(this->stash[0 + off], this->ijk, this->halo);      // filling halos 
+        // using xchng_pres because bcs have to be consistent with those used in
+        // pressure solver to obtain non-divergent advector field
+        auto ex = this->halo - 1;
+	this->extrp(0, ix::vip_i);
+	this->xchng_pres(this->vip_state(0, 0), this->ijk, ex);
 	this->extrp(1, ix::vip_j);
-	this->xchng_sclr(this->stash[1 + off], this->ijk, this->halo);      // filling halos 
+	this->xchng_pres(this->vip_state(0, 1), this->ijk, ex);
       }
 
       public:
@@ -215,16 +237,10 @@ namespace libmpdataxx
       // member fields
       const rng_t im, jm, km;
 
-      void fill_stash() final 
-      {
-        this->fill_stash_helper(0, ix::vip_i);
-        this->fill_stash_helper(1, ix::vip_j);
-        this->fill_stash_helper(2, ix::vip_k);
-      }
-
       template<int d, class arr_t> 
       void intrp(
-	const arr_t psi,
+        arrvec_t<typename parent_t::arr_t> &intrp_psi,
+	const arr_t &psi,
 	const rng_t &i, 
 	const rng_t &j, 
 	const rng_t &k, 
@@ -234,48 +250,83 @@ namespace libmpdataxx
 	using idxperm::pi;
 	using namespace arakawa_c;
   
-	if (!this->mem->G)
-	{
-	  this->mem->GC[d](pi<d>(i+h, j, k)) = this->dt / di * .5 * (
-	    psi(pi<d>(i,     j, k)) + 
-	    psi(pi<d>(i + 1, j, k))
-	  );
-	} 
-	else
-	{ 
-	  this->mem->GC[d](pi<d>(i+h, j, k)) = this->dt / di * .5 * (
-	    (*this->mem->G)(pi<d>(i  , j, k)) * psi(pi<d>(i,   j, k)) + 
-	    (*this->mem->G)(pi<d>(i+1, j, k)) * psi(pi<d>(i+1, j, k))
-	  );
-	}
+        if (parent_t::sptl_intrp == aver2)
+        {
+          if (!this->mem->G)
+          {
+            intrp_psi[d](pi<d>(i+h, j, k)) = this->dt / di * .5 * (
+              psi(pi<d>(i,     j, k)) + 
+              psi(pi<d>(i + 1, j, k))
+            );
+          } 
+          else
+          { 
+            intrp_psi[d](pi<d>(i+h, j, k)) = this->dt / di * .5 * (
+              (*this->mem->G)(pi<d>(i  , j, k)) * psi(pi<d>(i,   j, k)) + 
+              (*this->mem->G)(pi<d>(i+1, j, k)) * psi(pi<d>(i+1, j, k))
+            );
+          }
+        }
+        else if (parent_t::sptl_intrp == aver4)
+        {
+          if (!this->mem->G)
+          {
+            intrp_psi[d](pi<d>(i+h, j, k)) = this->dt /(12 * di) * (
+              7 *
+              (
+                psi(pi<d>(i  , j, k)) + 
+                psi(pi<d>(i+1, j, k))
+              )
+              -
+              (
+                psi(pi<d>(i-1, j, k)) + 
+                psi(pi<d>(i+2, j, k))
+              )
+            );
+          } 
+          else
+          { 
+            intrp_psi[d](pi<d>(i+h, j, k)) = this->dt /(12 * di) * (
+              7 *
+              (
+                (*this->mem->G)(pi<d>(i  , j, k)) * psi(pi<d>(i  , j, k)) + 
+                (*this->mem->G)(pi<d>(i+1, j, k)) * psi(pi<d>(i+1, j, k))
+              )
+              -
+              (
+                (*this->mem->G)(pi<d>(i-1, j, k)) * psi(pi<d>(i-1, j, k)) + 
+                (*this->mem->G)(pi<d>(i+2, j, k)) * psi(pi<d>(i+2, j, k))
+              )
+            );
+          }
+        }
       }  
 
-      void interpolate_in_space() final
+      void interpolate_in_space(arrvec_t<typename parent_t::arr_t> &interpolated) final
       {
         using namespace libmpdataxx::arakawa_c;
         
-        const auto off = ct_params_t::var_dt ? ct_params_t::n_dims : 0;
-
-	intrp<0>(this->stash[0 + off], im, this->j^this->halo, this->k^this->halo, this->di);
-	intrp<1>(this->stash[1 + off], jm, this->k^this->halo, this->i^this->halo, this->dj);
-	intrp<2>(this->stash[2 + off], km, this->i^this->halo, this->j^this->halo, this->dk);
-        this->xchng_vctr_alng(this->mem->GC);
         auto ex = this->halo - 1;
-        this->xchng_vctr_nrml(this->mem->GC, this->ijk, ex);
+	intrp<0>(interpolated, this->vip_state(0, 0), im^ex, this->j^ex, this->k^ex, this->di);
+	intrp<1>(interpolated, this->vip_state(0, 1), jm^ex, this->k^ex, this->i^ex, this->dj);
+	intrp<2>(interpolated, this->vip_state(0, 2), km^ex, this->i^ex, this->j^ex, this->dk);
+        this->xchng_vctr_alng(interpolated, /*ad*/ true, /*cyclic*/ true);
+        this->xchng_vctr_nrml(interpolated, this->ijk, ex, /*cyclic*/ false);
       }
 
       void extrapolate_in_time() final
       {
         using namespace libmpdataxx::arakawa_c; 
 
-        const auto off = ct_params_t::var_dt ? ct_params_t::n_dims : 0;
-
+        // using xchng_pres because bcs have to be consistent with those used in
+        // pressure solver to obtain non-divergent advector field
+        auto ex = this->halo - 1;
 	this->extrp(0, ix::vip_i);     
-	this->xchng_sclr(this->stash[0 + off], this->ijk, this->halo);      // filling halos 
+	this->xchng_pres(this->vip_state(0, 0), this->ijk, ex);
 	this->extrp(1, ix::vip_j);
-	this->xchng_sclr(this->stash[1 + off], this->ijk, this->halo);      // filling halos 
+	this->xchng_pres(this->vip_state(0, 1), this->ijk, ex);
 	this->extrp(2, ix::vip_k);
-	this->xchng_sclr(this->stash[2 + off], this->ijk, this->halo);      // filling halos 
+	this->xchng_pres(this->vip_state(0, 2), this->ijk, ex);
       }
 
       public:
diff --git a/tests/mp3_paper_JCP/reversing_deform/panel_plot.py b/tests/mp3_paper_JCP/reversing_deform/panel_plot.py
index c1cde541..abe54b53 100644
--- a/tests/mp3_paper_JCP/reversing_deform/panel_plot.py
+++ b/tests/mp3_paper_JCP/reversing_deform/panel_plot.py
@@ -10,7 +10,7 @@ def panel_plot(plot_data, fname = 'panel.pdf'):
 
     levels = np.linspace(-0.05, 1.15, 25)
 
-    fig, axarr = plt.subplots(2, 2, figsize=(20, 14), sharex = 'col', sharey = 'row')
+    fig, axarr = plt.subplots(2, 2, figsize=(16, 10), sharex = 'col', sharey = 'row')
     field2pos = {'gh' :  (0, 0), 'cb' : (0, 1), 'ccb' : (1, 1), 'sc' : (1, 0) }
     field2title = {'gh' :  '(a) Gaussian hills',
                    'cb' : '(b) cosine bells',
@@ -27,11 +27,15 @@ def panel_plot(plot_data, fname = 'panel.pdf'):
         axarr[i].set_title(field2title[f], fontsize = 30, y = 1.02)
     
     for ax in axarr.flatten():
+        ax.set_xlim([0, nx - 1])
         ax.set_xticks([0, (nx - 1) / 4, (nx - 1) / 2, 3 * (nx - 1) / 4, nx - 1])
         ax.set_xticklabels([r'$0$', r'$\pi$/2', r'$\pi$', r'$3\pi/2$', r'$2\pi$'])
         
+        ax.set_ylim([0, ny - 1])
         ax.set_yticks([0, (ny+0.5) / 2, ny - 1])
         ax.set_yticklabels([r'$-\pi/2$', r'$0$', r'$\pi/2$'])
+        
+        ax.set_aspect(1.0, adjustable = 'box-forced')
         ax.grid(color = 'w', lw = 2, linestyle = ':')
 
     plt.subplots_adjust(bottom=0.2)
diff --git a/tests/paper_2015_GMD/0_basic_example/refdata/log-orig.gz b/tests/paper_2015_GMD/0_basic_example/refdata/log-orig.gz
index 91948e34..3a314d14 100644
Binary files a/tests/paper_2015_GMD/0_basic_example/refdata/log-orig.gz and b/tests/paper_2015_GMD/0_basic_example/refdata/log-orig.gz differ
diff --git a/tests/paper_2015_GMD/3_rotating_cone_2d/refdata/log-orig.gz b/tests/paper_2015_GMD/3_rotating_cone_2d/refdata/log-orig.gz
index 3a65f764..7355de36 100644
Binary files a/tests/paper_2015_GMD/3_rotating_cone_2d/refdata/log-orig.gz and b/tests/paper_2015_GMD/3_rotating_cone_2d/refdata/log-orig.gz differ
diff --git a/tests/paper_2015_GMD/3_rotating_cone_2d/refdata/stats_basic_.txt.gz b/tests/paper_2015_GMD/3_rotating_cone_2d/refdata/stats_basic_.txt.gz
index 16fd025e..9a784014 100644
Binary files a/tests/paper_2015_GMD/3_rotating_cone_2d/refdata/stats_basic_.txt.gz and b/tests/paper_2015_GMD/3_rotating_cone_2d/refdata/stats_basic_.txt.gz differ
diff --git a/tests/paper_2015_GMD/3_rotating_cone_2d/refdata/stats_fct_.txt.gz b/tests/paper_2015_GMD/3_rotating_cone_2d/refdata/stats_fct_.txt.gz
index 59d641e8..c8b03d6b 100644
Binary files a/tests/paper_2015_GMD/3_rotating_cone_2d/refdata/stats_fct_.txt.gz and b/tests/paper_2015_GMD/3_rotating_cone_2d/refdata/stats_fct_.txt.gz differ
diff --git a/tests/paper_2015_GMD/3_rotating_cone_2d/refdata/stats_iga_fct_.txt.gz b/tests/paper_2015_GMD/3_rotating_cone_2d/refdata/stats_iga_fct_.txt.gz
index 90ac9daa..8d5dfb61 100644
Binary files a/tests/paper_2015_GMD/3_rotating_cone_2d/refdata/stats_iga_fct_.txt.gz and b/tests/paper_2015_GMD/3_rotating_cone_2d/refdata/stats_iga_fct_.txt.gz differ
diff --git a/tests/paper_2015_GMD/3_rotating_cone_2d/refdata/stats_iga_tot_fct_.txt.gz b/tests/paper_2015_GMD/3_rotating_cone_2d/refdata/stats_iga_tot_fct_.txt.gz
index d404b2f8..ed1d4ba8 100644
Binary files a/tests/paper_2015_GMD/3_rotating_cone_2d/refdata/stats_iga_tot_fct_.txt.gz and b/tests/paper_2015_GMD/3_rotating_cone_2d/refdata/stats_iga_tot_fct_.txt.gz differ
diff --git a/tests/paper_2015_GMD/3_rotating_cone_2d/refdata/stats_iters3_tot_fct_.txt.gz b/tests/paper_2015_GMD/3_rotating_cone_2d/refdata/stats_iters3_tot_fct_.txt.gz
index 7651db87..9eb46148 100644
Binary files a/tests/paper_2015_GMD/3_rotating_cone_2d/refdata/stats_iters3_tot_fct_.txt.gz and b/tests/paper_2015_GMD/3_rotating_cone_2d/refdata/stats_iters3_tot_fct_.txt.gz differ
diff --git a/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/fct/const.h5 b/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/fct/const.h5
index ece8425a..3a498de9 100644
Binary files a/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/fct/const.h5 and b/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/fct/const.h5 differ
diff --git a/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/fct/timestep0000000000.h5 b/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/fct/timestep0000000000.h5
index 382d0a27..009bf4df 100644
Binary files a/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/fct/timestep0000000000.h5 and b/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/fct/timestep0000000000.h5 differ
diff --git a/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/fct/timestep0000000556.h5 b/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/fct/timestep0000000556.h5
index 23bc8a47..1ba279f2 100644
Binary files a/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/fct/timestep0000000556.h5 and b/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/fct/timestep0000000556.h5 differ
diff --git a/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/iga_fct/const.h5 b/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/iga_fct/const.h5
index dbc5d769..ba9abc34 100644
Binary files a/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/iga_fct/const.h5 and b/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/iga_fct/const.h5 differ
diff --git a/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/iga_fct/timestep0000000000.h5 b/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/iga_fct/timestep0000000000.h5
index 0938f260..4fe12fa3 100644
Binary files a/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/iga_fct/timestep0000000000.h5 and b/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/iga_fct/timestep0000000000.h5 differ
diff --git a/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/iga_fct/timestep0000000556.h5 b/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/iga_fct/timestep0000000556.h5
index 97ad70d5..8c80f7e7 100644
Binary files a/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/iga_fct/timestep0000000556.h5 and b/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/iga_fct/timestep0000000556.h5 differ
diff --git a/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/stats_fct.txt.gz b/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/stats_fct.txt.gz
index eeddf75f..c02030b5 100644
Binary files a/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/stats_fct.txt.gz and b/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/stats_fct.txt.gz differ
diff --git a/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/stats_iga_fct.txt.gz b/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/stats_iga_fct.txt.gz
index 1bff9971..0e7f57a8 100644
Binary files a/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/stats_iga_fct.txt.gz and b/tests/paper_2015_GMD/4_revolving_sphere_3d/refdata/stats_iga_fct.txt.gz differ
diff --git a/tests/sandbox/CMakeLists.txt b/tests/sandbox/CMakeLists.txt
index 762df2f4..1cb6a9d5 100644
--- a/tests/sandbox/CMakeLists.txt
+++ b/tests/sandbox/CMakeLists.txt
@@ -47,3 +47,4 @@ add_subdirectory(tgv)
 add_subdirectory(convergence_2d_3d)
 add_subdirectory(pbl)
 add_subdirectory(convergence_spacetime)
+add_subdirectory(bconds_div)
diff --git a/tests/sandbox/bconds_div/CMakeLists.txt b/tests/sandbox/bconds_div/CMakeLists.txt
new file mode 100644
index 00000000..08987e55
--- /dev/null
+++ b/tests/sandbox/bconds_div/CMakeLists.txt
@@ -0,0 +1,2 @@
+libmpdataxx_add_test(bconds_div_2d)
+libmpdataxx_add_test(bconds_div_3d)
diff --git a/tests/sandbox/bconds_div/bconds_div.hpp b/tests/sandbox/bconds_div/bconds_div.hpp
new file mode 100644
index 00000000..5cb60c96
--- /dev/null
+++ b/tests/sandbox/bconds_div/bconds_div.hpp
@@ -0,0 +1,45 @@
+#pragma once
+#include <libmpdata++/solvers/boussinesq.hpp>
+
+template <class ct_params_t>
+class bconds_div : public libmpdataxx::solvers::boussinesq<ct_params_t>
+{
+  using parent_t = libmpdataxx::solvers::boussinesq<ct_params_t>;
+
+  protected:
+  
+  const std::string error_str;
+
+  void hook_post_step() final
+  {
+    auto gc_div = this->max_abs_vctr_div(this->mem->GC);
+
+    if (gc_div > 2 * this->prs_tol)
+    {
+      if (this->rank == 0)
+      {
+        std::cout << "bconds: " << error_str
+                  << " gc_div: " << gc_div << std::endl;
+      }
+      this->mem->barrier();
+      throw std::runtime_error("");
+    }
+    
+    parent_t::hook_post_step();
+  }
+
+  public:
+
+  struct rt_params_t : parent_t::rt_params_t
+  {
+    std::string error_str;
+  };
+
+  bconds_div(
+    typename parent_t::ctor_args_t args,
+    const rt_params_t &p
+  ) :
+    parent_t(args, p),
+    error_str(p.error_str)
+  {}
+};
diff --git a/tests/sandbox/bconds_div/bconds_div_2d.cpp b/tests/sandbox/bconds_div/bconds_div_2d.cpp
new file mode 100644
index 00000000..2a29db53
--- /dev/null
+++ b/tests/sandbox/bconds_div/bconds_div_2d.cpp
@@ -0,0 +1,76 @@
+// test to check if we obtain non-divergent advector field with requested precision
+// using different sets of boundary conditions, setup based on the boussinesq test
+// from the paper suite
+#include <libmpdata++/solvers/boussinesq.hpp>
+#include <libmpdata++/concurr/threads.hpp>
+#include "bconds_div.hpp"
+
+using namespace libmpdataxx;
+
+template <bcond::bcond_e bcond_h, bcond::bcond_e bcond_v>
+void test(const std::string &error_str)
+{
+  struct ct_params_t : ct_params_default_t
+  {
+    using real_t = double;
+    enum { n_dims = 2 };
+    enum { n_eqns = 3 };
+    enum { rhs_scheme = solvers::trapez };
+    enum { prs_scheme = solvers::cr };
+    struct ix { enum {
+      u, w, tht, 
+      vip_i=u, vip_j=w, vip_den=-1
+    }; };
+  }; 
+  
+  using ix = typename ct_params_t::ix;
+  using real_t = typename ct_params_t::real_t;
+  
+  const int r0 = 250; 
+  const int nx = 201, ny = 201, nt = 10;
+  typename ct_params_t::real_t Tht_ref = 300;
+
+  using slv_t = bconds_div<ct_params_t>;
+
+  typename slv_t::rt_params_t p;
+  p.dt = 7.5;
+  p.di = p.dj = 10.; 
+  p.Tht_ref = Tht_ref; 
+  p.prs_tol = 1e-10;
+  p.grid_size = {nx, ny};
+  p.error_str = error_str;
+
+  libmpdataxx::concurr::threads<
+    slv_t, 
+    bcond_h, bcond_h,
+    bcond_v, bcond_v
+  > slv(p);
+
+  {
+    blitz::firstIndex i;
+    blitz::secondIndex j;
+    
+    slv.sclr_array("tht_e") = Tht_ref;
+    slv.advectee(ix::tht) = Tht_ref + where(
+      // if
+      pow(i * p.di - 4    * r0 , 2) + 
+      pow(j * p.dj - 1.04 * r0 , 2) <= pow(r0, 2), 
+      // then
+      .5, 
+      // else
+      0
+    );
+    slv.advectee(ix::u) = 0; 
+    slv.advectee(ix::w) = 0; 
+  }
+
+  slv.advance(nt);  
+}
+
+int main() 
+{
+  test<bcond::cyclic, bcond::cyclic>("cyclic_cyclic");
+  test<bcond::open  , bcond::cyclic>("open_cyclic");
+  test<bcond::open  , bcond::rigid >("open_rigid");
+  test<bcond::cyclic, bcond::rigid >("cyclic_rigid");
+};
diff --git a/tests/sandbox/bconds_div/bconds_div_3d.cpp b/tests/sandbox/bconds_div/bconds_div_3d.cpp
new file mode 100644
index 00000000..edbf4558
--- /dev/null
+++ b/tests/sandbox/bconds_div/bconds_div_3d.cpp
@@ -0,0 +1,79 @@
+// test to check if we obtain non-divergent advector field with requested precision
+// using different sets of boundary conditions, setup based on the boussinesq test
+// from the paper suite
+#include <libmpdata++/solvers/boussinesq.hpp>
+#include <libmpdata++/concurr/threads.hpp>
+#include "bconds_div.hpp"
+
+using namespace libmpdataxx;
+
+template <bcond::bcond_e bcond_x, bcond::bcond_e bcond_y, bcond::bcond_e bcond_z>
+void test(const std::string &error_str)
+{
+  struct ct_params_t : ct_params_default_t
+  {
+    using real_t = double;
+    enum { n_dims = 3 };
+    enum { n_eqns = 4 };
+    enum { rhs_scheme = solvers::trapez };
+    enum { prs_scheme = solvers::cr };
+    struct ix { enum {
+      u, v, w, tht, 
+      vip_i=u, vip_j=v, vip_k=w, vip_den=-1
+    }; };
+  }; 
+  
+  using ix = typename ct_params_t::ix;
+  using real_t = typename ct_params_t::real_t;
+  
+  const double r0 = 37.5; 
+  const int nx = 31, ny = 31, nz = 31, nt = 10;
+  typename ct_params_t::real_t Tht_ref = 300;
+
+  using slv_t = bconds_div<ct_params_t>;
+
+  typename slv_t::rt_params_t p;
+  p.dt = 15;
+  p.di = p.dj = p.dk = 10.; 
+  p.Tht_ref = Tht_ref; 
+  p.prs_tol = 1e-10;
+  p.grid_size = {nx, ny, nz};
+  p.error_str = error_str;
+
+  libmpdataxx::concurr::threads<
+    slv_t, 
+    bcond_x, bcond_x,
+    bcond_y, bcond_y,
+    bcond_z, bcond_z
+  > slv(p);
+
+  {
+    blitz::firstIndex i;
+    blitz::secondIndex j;
+    blitz::thirdIndex k;
+    
+    slv.sclr_array("tht_e") = Tht_ref;
+    slv.advectee(ix::tht) = Tht_ref + where(
+      // if
+      pow(i * p.di - 4    * r0 , 2) + 
+      pow(j * p.dj - 1.04 * r0 , 2) <= pow(r0, 2), 
+      // then
+      .5, 
+      // else
+      0
+    );
+    slv.advectee(ix::u) = 0; 
+    slv.advectee(ix::v) = 0; 
+    slv.advectee(ix::w) = 0; 
+  }
+
+  slv.advance(nt);  
+}
+
+int main() 
+{
+  test<bcond::cyclic, bcond::cyclic, bcond::cyclic>("cyclic_cyclic_cyclic");
+  test<bcond::cyclic, bcond::cyclic, bcond::rigid >("cyclic_cyclic_rigid" );
+  test<bcond::open  , bcond::open  , bcond::rigid >("open_open_rigid"     );
+  test<bcond::open  , bcond::rigid , bcond::cyclic>("open_rigid_cyclic"   );
+};