// SASAEVAL.CPP

// Copyright (C) 2000 Tommi Hassinen.

// This package is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.

// This package is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with this package; if not, write to the Free Software
// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA

/*################################################################################################*/

#include "sasaeval.h"

#include "engine.h"

#include "local_i18n.h"
#include "notice.h"

#include <vector>
#include <algorithm>
#include <sstream>
using namespace std;

/*################################################################################################*/

// the surface area code apparently contains some bugs, since it sometimes
// crashes. another possibility is that the surface area math contains some
// bad cases (like arcs/segments with zero length/area ???) which should be
// avoided somehow. either way, the numerical and analytical gradients of
// surface area seem to match.

// LOWLIMIT is a trick to prevent zero-divisions in surface-area calculations.
// if a zero-division seems to happen, the values are just changed to LOWLIMIT
// in as early stage as possible, thus making minimum effects on results...

#define LOWLIMIT 0.0000001	// 0.0000001 seems to work quite well...

/*################################################################################################*/

sasaeval::sasaeval(engine * tmpe)
{
	eng = tmpe;
	
	natm_GLOB = eng->GetAtomCount();
	natm_loc = NOT_DEFINED;
	
	radius_GLOB = new f64[natm_GLOB];
	index_GLOB_2_LOC = new i32u[natm_GLOB];
	
	for (i32u i = 0;i < natm_GLOB;i++)
	{
		radius_GLOB[i] = -1.0;
		index_GLOB_2_LOC[i] = NOT_DEFINED;
	}
	
	index_l2g = NULL;
	
	radius1 = NULL;
	radius2 = NULL;
	
	dist1 = NULL;
	dist2 = NULL;
	
	nl = NULL;
	
	sasa = NULL;
	d_sasa = NULL;
}

sasaeval::~sasaeval(void)
{
	delete[] radius_GLOB;
	radius_GLOB = NULL;
	
	delete[] index_GLOB_2_LOC;
	index_GLOB_2_LOC = NULL;
	
	if (index_l2g != NULL)
	{
		delete[] index_l2g;
		index_l2g = NULL;
	}
	
	if (radius1 != NULL)
	{
		delete[] radius1;
		radius1 = NULL;
	}
	
	if (radius2 != NULL)
	{
		delete[] radius2;
		radius2 = NULL;
	}
	
	if (dist1 != NULL)
	{
		delete[] dist1;
		dist1 = NULL;
	}
	
	if (dist2 != NULL)
	{
		delete[] dist2;
		dist2 = NULL;
	}
	
	if (nl != NULL)
	{
		delete[] nl;
		nl = NULL;
	}
	
	if (sasa != NULL)
	{
		delete[] sasa;
		sasa = NULL;
	}
	
	if (d_sasa != NULL)
	{
		delete[] d_sasa;
		d_sasa = NULL;
	}
}

bool sasaeval::RegisterAtom(i32u atmi_GLOB, double r)
{
	if (atmi_GLOB >= (i32u) natm_GLOB)
	{
		assertion_failed(__FILE__, __LINE__, "atmi_GLOB overflow.");
	}
	
	if (r < 0.001)
	{
		assertion_failed(__FILE__, __LINE__, "bad radius.");
	}
	
	if (radius_GLOB[atmi_GLOB] >= 0.0)
	{
		cout << _("WARNING : sasaeval::RegisterAtom() : atom ") << atmi_GLOB << _(" is already registered!") << endl;
		return false;
	}
	
	radius_GLOB[atmi_GLOB] = r;
	return true;
}

void sasaeval::RegisterAtomsFinished(void)
{
	natm_loc = 0;
	
	for (i32s i = 0;i < natm_GLOB;i++)
	{
		if (radius_GLOB[i] < 0.0)
		{
			index_GLOB_2_LOC[i] = NOT_DEFINED;	// should be already...
		}
		else
		{
			index_GLOB_2_LOC[i] = natm_loc;
			natm_loc++;
		}
	}
	
	index_l2g = new i32u[natm_loc];
	
	radius1 = new f64[natm_loc];
	radius2 = new f64[natm_loc];
	
	int localcounter = 0;
	for (i32s i = 0;i < natm_GLOB;i++)
	{
		if (radius_GLOB[i] < 0.0) continue;
		
		index_l2g[localcounter] = i;
		
		const f64 r = radius_GLOB[i];
		
		radius1[localcounter] = r;
		radius2[localcounter] = r * r;
		
		localcounter++;
	}
	
	dist1 = new i32s[natm_loc];
	dist2 = new f64[natm_loc * (natm_loc - 1) / 2];
	
	i32s n1 = 0; i32s n2 = 0;
	while (n2 < natm_loc)
	{
		dist1[n2++] = n1;
		n1 += natm_loc - n2;
	}
	
	nl = new cg_nbt3_nl[natm_loc];
	
	for (i32s i = 0;i < natm_loc;i++)
	{
		nl[i].index = new i32s[SIZE_NLI];
	}
	
	sasa = new f64[natm_loc];
	d_sasa = new f64[natm_loc * 3];
}

void sasaeval::HandleNL(i32u atmiA_GLOB, i32u atmiB_GLOB, f64 dist)
{
	if (natm_loc < 0)
	{
		assertion_failed(__FILE__, __LINE__, "atom registration is not finished!");
	}
	
	bool bad_atoms = false;
	if (atmiA_GLOB >= (i32u) natm_GLOB) bad_atoms = true;
	if (atmiB_GLOB >= (i32u) natm_GLOB) bad_atoms = true;
	if (atmiA_GLOB == atmiB_GLOB) bad_atoms = true;
	
	if (bad_atoms)
	{
		ostringstream msg;
		msg << "bad atoms " << atmiA_GLOB << " " << atmiB_GLOB << ends;
		assertion_failed(__FILE__, __LINE__, msg.str().c_str());
	}
	
	const i32u atmi[2] = { index_GLOB_2_LOC[atmiA_GLOB], index_GLOB_2_LOC[atmiB_GLOB] };
	
	const bool first = (atmi[0] > atmi[1]);
	dist2[dist1[atmi[first]] + (atmi[!first] - atmi[first]) - 1] = dist;
	
	if (dist < (radius1[atmi[0]] + radius1[atmi[1]]))
	{
		nl[atmi[0]].index[nl[atmi[0]].index_count++] = atmi[1];
		if (nl[atmi[0]].index_count >= SIZE_NLI)
		{
			assertion_failed(__FILE__, __LINE__, "SASA NL index table overflow!");
		}
		
		nl[atmi[1]].index[nl[atmi[1]].index_count++] = atmi[0];
		if (nl[atmi[1]].index_count >= SIZE_NLI)
		{
			assertion_failed(__FILE__, __LINE__, "SASA NL index table overflow!");
		}
	}
}

void sasaeval::Evaluate(i32s p1)
{
	if (natm_loc < 0)
	{
		assertion_failed(__FILE__, __LINE__, "atom registration is not finished!");
	}
	
	for (i32s n1 = 0;n1 < natm_loc;n1++)
	{
		nl[n1].index_count = 0;
		
		sasa[n1] = 0.0;
		
		if (p1 > 0)
		{
			d_sasa[n1 * 3 + 0] = 0.0;
			d_sasa[n1 * 3 + 1] = 0.0;
			d_sasa[n1 * 3 + 2] = 0.0;
		}
	}
	
	cg_nbt3_nl * nlist = nl;
	
	for (i32s n1 = 0;n1 < natm_loc;n1++)
	{
		cg_nbt3_nd ndt[SIZE_NLI];
		for (i32s n2 = 0;n2 < nlist[n1].index_count;n2++)
		{
			ndt[n2].index = nlist[n1].index[n2];
			i32s atmi[2] = { n1, ndt[n2].index }; bool first = (atmi[0] > atmi[1]);
			ndt[n2].distance = dist2[dist1[atmi[first]] + (atmi[!first] - atmi[first]) - 1];
		}
		
		sort(ndt, ndt + nlist[n1].index_count);
		i32s n_count = 0; i32s nt[SIZE_NT];
		
		// neighbor-list reduction... THIS WON'T WORK IF ANY OF THE BT1/NBT1-TERMS ARE LEFT OUT!!!
		// neighbor-list reduction... THIS WON'T WORK IF ANY OF THE BT1/NBT1-TERMS ARE LEFT OUT!!!
		// neighbor-list reduction... THIS WON'T WORK IF ANY OF THE BT1/NBT1-TERMS ARE LEFT OUT!!!
		
		// test this against a slow-but-simple implementation?!?!?!
		// seems to be OK because different layers give similar results...
		
		for (i32s n2 = 0;n2 < nlist[n1].index_count;n2++)
		{
			i32s ind1 = ndt[n2].index;
			f64 dij = ndt[n2].distance;
			
			bool flag = true;
			
			for (i32s n3 = n2 + 1;n3 < nlist[n1].index_count;n3++)
			{
				i32s ind2 = ndt[n3].index;
				
				i32s atmi[2] = { ind1, ind2 }; bool first = (atmi[0] > atmi[1]);
				f64 djk = dist2[dist1[atmi[first]] + (atmi[!first] - atmi[first]) - 1];
				
				if (djk > dij) continue;
				
				f64 dij2 = dij * dij; f64 djk2 = djk * djk;
				f64 dik = ndt[n3].distance; f64 dik2 = dik * dik;
				
				// here dij and dik both represent distances which should never be
				// very close to zero (if LJ-terms work as they should) -> no checking
				
				f64 ca = (radius2[n1] + dij2 - radius2[ind1]) / (2.0 * radius1[n1] * dij);
				f64 cb = (radius2[n1] + dik2 - radius2[ind2]) / (2.0 * radius1[n1] * dik);
				f64 cg = (dij2 + dik2 - djk2) / (2.0 * dij * dik);
				
				f64 sa2 = 1.0 - ca * ca;
				f64 sg2 = 1.0 - cg * cg;
				
				f64 dc = sa2 * sg2;
				if (dc < 0.0) dc = 0.0;		// domain check...
				
				if (cb < ca * cg - sqrt(dc))
				{
					flag = false;
					break;
				}
			}
			
			if (flag)
			{
				nt[n_count++] = ind1;
				if (n_count >= SIZE_NT)
				{
					assertion_failed(__FILE__, __LINE__, "NT overflow!");
				}
			}
		}
		
		i32s coi_count = 0; cg_nbt3_coi coit[SIZE_COI];
		
		// next we will create the coi-table...
		// next we will create the coi-table...
		// next we will create the coi-table...
		
		for (i32s n2 = 0;n2 < n_count;n2++)
		{
			coit[coi_count].index = nt[n2]; coit[coi_count].flag = false;
			
			f64 t1a[3]; f64 t1b = 0.0;
			for (i32s n3 = 0;n3 < 3;n3++)
			{
				i32s tmpi;
				
				tmpi = index_l2g[n1];
				f64 t9a = eng->crd[tmpi * 3 + n3];
				
				tmpi = index_l2g[coit[coi_count].index];
				f64 t9b = eng->crd[tmpi * 3 + n3];
				
				t1a[n3] = t9b - t9a;
				t1b += t1a[n3] * t1a[n3];
			}
			
			f64 t1c = sqrt(t1b);
			coit[coi_count].dist = t1c;
			
			// also t1c is a distance which should never be close to zero -> no checking
			
			f64 t2a[3];
			for (i32s n3 = 0;n3 < 3;n3++)
			{
				t2a[n3] = t1a[n3] / t1c;
				coit[coi_count].dv[n3] = t2a[n3];
			}
			
			coit[coi_count].g = (t1b + radius2[n1] - radius2[coit[coi_count].index]) / (2.0 * t1c);
			coit[coi_count].ct = coit[coi_count].g / radius1[n1];
			
			if (p1 > 0)
			{
				for (i32s n3 = 0;n3 < 3;n3++)
				{
					for (i32s n4 = 0;n4 < 3;n4++)
					{
						f64 t9a = t2a[n3] * t2a[n4]; f64 t9b;
						if (n3 != n4) t9b = -t9a; else t9b = 1.0 - t9a;
						coit[coi_count].ddv[n3][n4] = t9b / t1c;
					}
				}
				
				f64 t3a = (t1c - coit[coi_count].g) / t1c;
				coit[coi_count].dg[0] = t3a * coit[coi_count].dv[0];
				coit[coi_count].dg[1] = t3a * coit[coi_count].dv[1];
				coit[coi_count].dg[2] = t3a * coit[coi_count].dv[2];
				
				coit[coi_count].dct[0] = coit[coi_count].dg[0] / radius1[n1];
				coit[coi_count].dct[1] = coit[coi_count].dg[1] / radius1[n1];
				coit[coi_count].dct[2] = coit[coi_count].dg[2] / radius1[n1];
			}
			
			coit[coi_count++].ipd_count = 0;
			if (coi_count >= SIZE_COI)
			{
				assertion_failed(__FILE__, __LINE__, "COI overflow!");
			}
		}
		
		i32s ips_total_count = 0;
		i32s ips_count = 0; cg_nbt3_ips ipst[SIZE_IPS];
		
		// next we will create the ips-table...
		// next we will create the ips-table...
		// next we will create the ips-table...
		
		for (i32s n2 = 0;n2 < coi_count - 1;n2++)
		{
			for (i32s n3 = n2 + 1;n3 < coi_count;n3++)
			{
				f64 t1a[3];
				t1a[0] = coit[n2].dv[0] * coit[n3].dv[0];
				t1a[1] = coit[n2].dv[1] * coit[n3].dv[1];
				t1a[2] = coit[n2].dv[2] * coit[n3].dv[2];
				
				f64 t1b = t1a[0] + t1a[1] + t1a[2];	// cos phi
				
				if (t1b < -1.0) t1b = -1.0;	// domain check...
				if (t1b > +1.0) t1b = +1.0;	// domain check...
				
				f64 t1c = 1.0 - t1b * t1b;		// sin^2 phi
				if (t1c < LOWLIMIT) t1c = LOWLIMIT;
				
				f64 t2a = (coit[n2].g - coit[n3].g * t1b) / t1c;	// tau_kj
				f64 t2b = (coit[n3].g - coit[n2].g * t1b) / t1c;	// tau_jk
				
				f64 t2c = radius2[n1] - coit[n2].g * t2a - coit[n3].g * t2b;	// gamma^2
				if (t2c < LOWLIMIT) continue;		// these will not intercept...
				
				ips_total_count++;
				coit[n2].flag = true;
				coit[n3].flag = true;
				
				f64 t3a[3];	// eta
				t3a[0] = coit[n2].dv[0] * t2a + coit[n3].dv[0] * t2b;
				t3a[1] = coit[n2].dv[1] * t2a + coit[n3].dv[1] * t2b;
				t3a[2] = coit[n2].dv[2] * t2a + coit[n3].dv[2] * t2b;
				
				f64 t1d = sqrt(t1c);	// sin phi
				
				f64 t3b[3];	// omega
				t3b[0] = (coit[n2].dv[1] * coit[n3].dv[2] - coit[n2].dv[2] * coit[n3].dv[1]) / t1d;
				t3b[1] = (coit[n2].dv[2] * coit[n3].dv[0] - coit[n2].dv[0] * coit[n3].dv[2]) / t1d;
				t3b[2] = (coit[n2].dv[0] * coit[n3].dv[1] - coit[n2].dv[1] * coit[n3].dv[0]) / t1d;
				
				f64 t2d = sqrt(t2c);	// gamma
				
				for (i32s n4 = 0;n4 < 3;n4++)
				{
					f64 t9a = t3b[n4] * t2d;
					ipst[ips_count].ipv[0][n4] = t3a[n4] - t9a;
					ipst[ips_count].ipv[1][n4] = t3a[n4] + t9a;
				}
				
				// skip those intersection points that fall inside any other sphere...
				// SKIP ALSO IF EQUAL DISTANCE??? i.e. compare using "<" or "<=" ???
				
				bool skip_both = false;
				bool skip[2] = { false, false };
				for (i32s n4 = 0;n4 < n_count;n4++)
				{
					i32s n5 = nt[n4];
					if (n5 == coit[n2].index || n5 == coit[n3].index) continue;
					
	f64 t9a[3];
	t9a[0] = (eng->crd[index_l2g[n1] * 3 + 0] + ipst[ips_count].ipv[0][0]) - eng->crd[index_l2g[n5] * 3 + 0];
	t9a[1] = (eng->crd[index_l2g[n1] * 3 + 1] + ipst[ips_count].ipv[0][1]) - eng->crd[index_l2g[n5] * 3 + 1];
	t9a[2] = (eng->crd[index_l2g[n1] * 3 + 2] + ipst[ips_count].ipv[0][2]) - eng->crd[index_l2g[n5] * 3 + 2];
	f64 t9b = t9a[0] * t9a[0] + t9a[1] * t9a[1] + t9a[2] * t9a[2];
	if (t9b < radius2[n5]) skip[0] = true;
	
	f64 t9c[3];
	t9c[0] = (eng->crd[index_l2g[n1] * 3 + 0] + ipst[ips_count].ipv[1][0]) - eng->crd[index_l2g[n5] * 3 + 0];
	t9c[1] = (eng->crd[index_l2g[n1] * 3 + 1] + ipst[ips_count].ipv[1][1]) - eng->crd[index_l2g[n5] * 3 + 1];
	t9c[2] = (eng->crd[index_l2g[n1] * 3 + 2] + ipst[ips_count].ipv[1][2]) - eng->crd[index_l2g[n5] * 3 + 2];
	f64 t9d = t9c[0] * t9c[0] + t9c[1] * t9c[1] + t9c[2] * t9c[2];
	if (t9d < radius2[n5]) skip[1] = true;
					
					skip_both = (skip[0] && skip[1]);
					if (skip_both) break;
				}
				
				if (skip_both) continue;	// overwrite this one...
				
				ipst[ips_count].coi[0] = n2;
				ipst[ips_count].coi[1] = n3;
				
				if (!skip[0])
				{
					coit[n2].AddIPD(ipst[ips_count].ipv[0], ips_count);
					coit[n3].AddIPD(ipst[ips_count].ipv[0], ips_count | ORDER_FLAG);
				}
				
				if (!skip[1])
				{
					coit[n2].AddIPD(ipst[ips_count].ipv[1], ips_count | INDEX_FLAG | ORDER_FLAG);
					coit[n3].AddIPD(ipst[ips_count].ipv[1], ips_count | INDEX_FLAG);
				}
				
				if (p1 > 0)
				{
					f64 t1f[3];	// d(cos phi) / dXk
					t1f[0] = (coit[n3].dv[0] - t1b * coit[n2].dv[0]) / coit[n2].dist;
					t1f[1] = (coit[n3].dv[1] - t1b * coit[n2].dv[1]) / coit[n2].dist;
					t1f[2] = (coit[n3].dv[2] - t1b * coit[n2].dv[2]) / coit[n2].dist;
					
					f64 t1g[3];	// d(cos phi) / dXj
					t1g[0] = (coit[n2].dv[0] - t1b * coit[n3].dv[0]) / coit[n3].dist;
					t1g[1] = (coit[n2].dv[1] - t1b * coit[n3].dv[1]) / coit[n3].dist;
					t1g[2] = (coit[n2].dv[2] - t1b * coit[n3].dv[2]) / coit[n3].dist;
					
					f64 t2e[3];	// d(tau_kj) / dXk
					t2e[0] = (t1f[0] * (2.0 * t2a * t1b - coit[n3].g) + coit[n2].dg[0]) / t1c;
					t2e[1] = (t1f[1] * (2.0 * t2a * t1b - coit[n3].g) + coit[n2].dg[1]) / t1c;
					t2e[2] = (t1f[2] * (2.0 * t2a * t1b - coit[n3].g) + coit[n2].dg[2]) / t1c;
					
					f64 t2f[3];	// d(tau_kj) / dXj
					t2f[0] = (t1g[0] * (2.0 * t2a * t1b - coit[n3].g) - t1b * coit[n3].dg[0]) / t1c;
					t2f[1] = (t1g[1] * (2.0 * t2a * t1b - coit[n3].g) - t1b * coit[n3].dg[1]) / t1c;
					t2f[2] = (t1g[2] * (2.0 * t2a * t1b - coit[n3].g) - t1b * coit[n3].dg[2]) / t1c;
					
					f64 t2g[3];	// d(tau_jk) / dXk
					t2g[0] = (t1f[0] * (2.0 * t2b * t1b - coit[n2].g) - t1b * coit[n2].dg[0]) / t1c;
					t2g[1] = (t1f[1] * (2.0 * t2b * t1b - coit[n2].g) - t1b * coit[n2].dg[1]) / t1c;
					t2g[2] = (t1f[2] * (2.0 * t2b * t1b - coit[n2].g) - t1b * coit[n2].dg[2]) / t1c;
					
					f64 t2h[3];	// d(tau_jk) / dXj
					t2h[0] = (t1g[0] * (2.0 * t2b * t1b - coit[n2].g) + coit[n3].dg[0]) / t1c;
					t2h[1] = (t1g[1] * (2.0 * t2b * t1b - coit[n2].g) + coit[n3].dg[1]) / t1c;
					t2h[2] = (t1g[2] * (2.0 * t2b * t1b - coit[n2].g) + coit[n3].dg[2]) / t1c;
					
					f64 t3c[3][3];	// d(eta) / dXk
					f64 t3d[3][3];	// d(eta) / dXj
					
					for (i32s n4 = 0;n4 < 3;n4++)
					{
						for (i32s n5 = 0;n5 < 3;n5++)
						{
	f64 t9a = coit[n2].dv[n5]; f64 t9b = coit[n3].dv[n5];
	t3c[n4][n5] = t9a * t2e[n4] + t9b * t2g[n4] + t2a * coit[n2].ddv[n4][n5];
	t3d[n4][n5] = t9a * t2f[n4] + t9b * t2h[n4] + t2b * coit[n3].ddv[n4][n5];
						}
					}
					
					f64 t3e[3][3];	// d(omega) / dXk
					f64 t3f[3][3];	// d(omega) / dXj
					
					for (i32s n4 = 0;n4 < 3;n4++)
					{
						for (i32s n5 = 0;n5 < 3;n5++)
						{
							t3e[n4][n5] = t1b * t3b[n5] * t1f[n4] / t1c;
							t3f[n4][n5] = t1b * t3b[n5] * t1g[n4] / t1c;
						}
						
	t3e[n4][0] += (coit[n2].ddv[n4][1] * coit[n3].dv[2] - coit[n2].ddv[n4][2] * coit[n3].dv[1]) / t1d;
	t3e[n4][1] += (coit[n2].ddv[n4][2] * coit[n3].dv[0] - coit[n2].ddv[n4][0] * coit[n3].dv[2]) / t1d;
	t3e[n4][2] += (coit[n2].ddv[n4][0] * coit[n3].dv[1] - coit[n2].ddv[n4][1] * coit[n3].dv[0]) / t1d;
	
	t3f[n4][0] += (coit[n2].dv[1] * coit[n3].ddv[n4][2] - coit[n2].dv[2] * coit[n3].ddv[n4][1]) / t1d;
	t3f[n4][1] += (coit[n2].dv[2] * coit[n3].ddv[n4][0] - coit[n2].dv[0] * coit[n3].ddv[n4][2]) / t1d;
	t3f[n4][2] += (coit[n2].dv[0] * coit[n3].ddv[n4][1] - coit[n2].dv[1] * coit[n3].ddv[n4][0]) / t1d;
						
					}
					
	f64 t2i[3];	// d(gamma) / dXk
	t2i[0] = -(coit[n2].g * t2e[0] + t2a * coit[n2].dg[0] + coit[n3].g * t2g[0]) / (2.0 * t2d);
	t2i[1] = -(coit[n2].g * t2e[1] + t2a * coit[n2].dg[1] + coit[n3].g * t2g[1]) / (2.0 * t2d);
	t2i[2] = -(coit[n2].g * t2e[2] + t2a * coit[n2].dg[2] + coit[n3].g * t2g[2]) / (2.0 * t2d);
						
	f64 t2j[3];	// d(gamma) / dXj
	t2j[0] = -(coit[n2].g * t2f[0] + coit[n3].g * t2h[0] + t2b * coit[n3].dg[0]) / (2.0 * t2d);
	t2j[1] = -(coit[n2].g * t2f[1] + coit[n3].g * t2h[1] + t2b * coit[n3].dg[1]) / (2.0 * t2d);
	t2j[2] = -(coit[n2].g * t2f[2] + coit[n3].g * t2h[2] + t2b * coit[n3].dg[2]) / (2.0 * t2d);
					
					// the final result is derivatives for points dipv[2][2][3][3].
					// indexes are as follows: [point][atom][variable][xyz].
					
					for (i32s n4 = 0;n4 < 3;n4++)
					{
						for (i32s n5 = 0;n5 < 3;n5++)
						{
							ipst[ips_count].dipv[0][0][n4][n5] = t3c[n4][n5];
							ipst[ips_count].dipv[0][1][n4][n5] = t3d[n4][n5];
							ipst[ips_count].dipv[1][0][n4][n5] = t3c[n4][n5];
							ipst[ips_count].dipv[1][1][n4][n5] = t3d[n4][n5];
						}
						
						for (i32s n5 = 0;n5 < 3;n5++)
						{
							f64 t9a = t3b[n5] * t2i[n4] + t2d * t3e[n4][n5];
							f64 t9b = t3b[n5] * t2j[n4] + t2d * t3f[n4][n5];
							
							ipst[ips_count].dipv[0][0][n4][n5] -= t9a;
							ipst[ips_count].dipv[0][1][n4][n5] -= t9b;
							ipst[ips_count].dipv[1][0][n4][n5] += t9a;
							ipst[ips_count].dipv[1][1][n4][n5] += t9b;
						}
					}          
				}
				
				ips_count++;
				if (ips_count >= SIZE_IPS)
				{
					assertion_failed(__FILE__, __LINE__, "IPS overflow!");
				}
			}
		}
		
		i32s arc_count = 0; cg_nbt3_arc arct[SIZE_ARC];
		
		// next we will create the arc-table...
		// next we will create the arc-table...
		// next we will create the arc-table...
		
		for (i32s n2 = 0;n2 < coi_count;n2++)
		{
			f64 t1z = radius2[n1] - coit[n2].g * coit[n2].g;
			if (t1z < 0.0) t1z = 0.0;	// domain check...
			
			f64 t1a = sqrt(t1z);
			if (t1a < LOWLIMIT) t1a = LOWLIMIT;
			
			sort(coit[n2].ipdt, coit[n2].ipdt + coit[n2].ipd_count);
			
			for (i32s n3 = 0;n3 < coit[n2].ipd_count;n3++)
			{
				if (coit[n2].ipdt[n3].ipdata & ORDER_FLAG) continue;
				i32s n4 = n3 + 1; if (n4 == coit[n2].ipd_count) n4 = 0;
				if (!(coit[n2].ipdt[n4].ipdata & ORDER_FLAG)) continue;
				
				arct[arc_count].coi = n2; arct[arc_count].flag = false;
				
				arct[arc_count].ipdata[0] = (coit[n2].ipdt[n3].ipdata & ~ORDER_FLAG);
				arct[arc_count].ipdata[1] = (coit[n2].ipdt[n4].ipdata & ~ORDER_FLAG);
				
				i32s i1a = (arct[arc_count].ipdata[0] & FLAG_MASK);
				bool i1b = (arct[arc_count].ipdata[0] & INDEX_FLAG ? 1 : 0);
				
				i32s i2a = (arct[arc_count].ipdata[1] & FLAG_MASK);
				bool i2b = (arct[arc_count].ipdata[1] & INDEX_FLAG ? 1 : 0);
				
				arct[arc_count].index[0][0] = coit[ipst[i1a].coi[i1b]].index;
				arct[arc_count].index[0][1] = coit[ipst[i1a].coi[!i1b]].index;
				
				arct[arc_count].index[1][0] = coit[ipst[i2a].coi[!i2b]].index;
				arct[arc_count].index[1][1] = coit[ipst[i2a].coi[i2b]].index;
				
				// let's compute the tangent vectors...
				
				f64 * ref1 = ipst[i1a].ipv[i1b];
				arct[arc_count].tv[0][0] = (ref1[1] * coit[n2].dv[2] - ref1[2] * coit[n2].dv[1]) / t1a;
				arct[arc_count].tv[0][1] = (ref1[2] * coit[n2].dv[0] - ref1[0] * coit[n2].dv[2]) / t1a;
				arct[arc_count].tv[0][2] = (ref1[0] * coit[n2].dv[1] - ref1[1] * coit[n2].dv[0]) / t1a;
				
				f64 * ref2 = ipst[i2a].ipv[i2b];
				arct[arc_count].tv[1][0] = (ref2[1] * coit[n2].dv[2] - ref2[2] * coit[n2].dv[1]) / t1a;
				arct[arc_count].tv[1][1] = (ref2[2] * coit[n2].dv[0] - ref2[0] * coit[n2].dv[2]) / t1a;
				arct[arc_count].tv[1][2] = (ref2[0] * coit[n2].dv[1] - ref2[1] * coit[n2].dv[0]) / t1a;
				
				if (p1 > 0)
				{
					for (i32s n4 = 0;n4 < 3;n4++)
					{
						f64 t9a = coit[n2].g * coit[n2].dg[n4] / t1a;
						for (i32s n5 = 0;n5 < 3;n5++)
						{
							arct[arc_count].dtv[0][0][n4][n5] = t9a * arct[arc_count].tv[0][n5];
							arct[arc_count].dtv[1][0][n4][n5] = t9a * arct[arc_count].tv[1][n5];
						}
						
	f64 * ref1a = ipst[i1a].dipv[i1b][i1b][n4];	// d(P1) / dXk
	arct[arc_count].dtv[0][0][n4][0] += ref1a[1] * coit[n2].dv[2] - ref1a[2] * coit[n2].dv[1];
	arct[arc_count].dtv[0][0][n4][1] += ref1a[2] * coit[n2].dv[0] - ref1a[0] * coit[n2].dv[2];
	arct[arc_count].dtv[0][0][n4][2] += ref1a[0] * coit[n2].dv[1] - ref1a[1] * coit[n2].dv[0];
	
	f64 * ref1b = ipst[i2a].dipv[i2b][!i2b][n4];	// d(P2) / dXk
	arct[arc_count].dtv[1][0][n4][0] += ref1b[1] * coit[n2].dv[2] - ref1b[2] * coit[n2].dv[1];
	arct[arc_count].dtv[1][0][n4][1] += ref1b[2] * coit[n2].dv[0] - ref1b[0] * coit[n2].dv[2];
	arct[arc_count].dtv[1][0][n4][2] += ref1b[0] * coit[n2].dv[1] - ref1b[1] * coit[n2].dv[0];
	
	f64 * ref2a = ipst[i1a].ipv[i1b];
	arct[arc_count].dtv[0][0][n4][0] += ref2a[1] * coit[n2].ddv[n4][2] - ref2a[2] * coit[n2].ddv[n4][1];
	arct[arc_count].dtv[0][0][n4][1] += ref2a[2] * coit[n2].ddv[n4][0] - ref2a[0] * coit[n2].ddv[n4][2];
	arct[arc_count].dtv[0][0][n4][2] += ref2a[0] * coit[n2].ddv[n4][1] - ref2a[1] * coit[n2].ddv[n4][0];
	
	f64 * ref2b = ipst[i2a].ipv[i2b];
	arct[arc_count].dtv[1][0][n4][0] += ref2b[1] * coit[n2].ddv[n4][2] - ref2b[2] * coit[n2].ddv[n4][1];
	arct[arc_count].dtv[1][0][n4][1] += ref2b[2] * coit[n2].ddv[n4][0] - ref2b[0] * coit[n2].ddv[n4][2];
	arct[arc_count].dtv[1][0][n4][2] += ref2b[0] * coit[n2].ddv[n4][1] - ref2b[1] * coit[n2].ddv[n4][0];
						
						for (i32s n5 = 0;n5 < 3;n5++)
						{
							arct[arc_count].dtv[0][0][n4][n5] /= t1a;
							arct[arc_count].dtv[1][0][n4][n5] /= t1a;
						}
						
	f64 * ref3a = ipst[i1a].dipv[i1b][!i1b][n4];	// d(P1) / dXj
	arct[arc_count].dtv[0][1][n4][0] = (ref3a[1] * coit[n2].dv[2] - ref3a[2] * coit[n2].dv[1]) / t1a;
	arct[arc_count].dtv[0][1][n4][1] = (ref3a[2] * coit[n2].dv[0] - ref3a[0] * coit[n2].dv[2]) / t1a;
	arct[arc_count].dtv[0][1][n4][2] = (ref3a[0] * coit[n2].dv[1] - ref3a[1] * coit[n2].dv[0]) / t1a;
	
	f64 * ref3b = ipst[i2a].dipv[i2b][i2b][n4];	// d(P2) / dXj
	arct[arc_count].dtv[1][1][n4][0] = (ref3b[1] * coit[n2].dv[2] - ref3b[2] * coit[n2].dv[1]) / t1a;
	arct[arc_count].dtv[1][1][n4][1] = (ref3b[2] * coit[n2].dv[0] - ref3b[0] * coit[n2].dv[2]) / t1a;
	arct[arc_count].dtv[1][1][n4][2] = (ref3b[0] * coit[n2].dv[1] - ref3b[1] * coit[n2].dv[0]) / t1a;
					}
				}
				
				arc_count++;
				if (arc_count >= SIZE_ARC)
				{
					assertion_failed(__FILE__, __LINE__, "ARC overflow!");
				}
			}
		}
		
		// all cases will pass through this point!!!
		// all cases will pass through this point!!!
		// all cases will pass through this point!!!
		
		f64 area;
		if (!arc_count)
		{
			if (ips_total_count)
			{
				// save the solv-exp value here if needed!!!
				// save the solv-exp value here if needed!!!
				// save the solv-exp value here if needed!!!
				continue;	// fully buried...
			}
			else area = 4.0 * M_PI;
		}
		else
		{
			area = 0.0;
			i32s arc_counter = 0;
			
			do
			{
				i32s prev; i32s curr = 0;
				while (arct[curr].flag)
				{
					curr++;
					if (curr == arc_count)
					{
						cout << "area_panic: can't find the first arc!!!" << endl;
						goto area_panic;
					}
				}
				
				i32s first = curr;
				
				f64 sum1 = 0.0;
				f64 sum2 = 0.0;
				
				while (true)
				{
					i32s coi = arct[curr].coi;
					
	f64 t1a[3];
	t1a[0] = arct[curr].tv[1][1] * arct[curr].tv[0][2] - arct[curr].tv[1][2] * arct[curr].tv[0][1];
	t1a[1] = arct[curr].tv[1][2] * arct[curr].tv[0][0] - arct[curr].tv[1][0] * arct[curr].tv[0][2];
	t1a[2] = arct[curr].tv[1][0] * arct[curr].tv[0][1] - arct[curr].tv[1][1] * arct[curr].tv[0][0];
					
					f64 t1b[3];
					t1b[0] = coit[coi].dv[0] * t1a[0];
					t1b[1] = coit[coi].dv[1] * t1a[1];
					t1b[2] = coit[coi].dv[2] * t1a[2];
					
					f64 t1c = (t1b[0] + t1b[1] + t1b[2] < 0.0 ? -1.0 : +1.0);
					
					f64 t2a[3];
					t2a[0] = arct[curr].tv[0][0] * arct[curr].tv[1][0];
					t2a[1] = arct[curr].tv[0][1] * arct[curr].tv[1][1];
					t2a[2] = arct[curr].tv[0][2] * arct[curr].tv[1][2];
					
					f64 t2b = t2a[0] + t2a[1] + t2a[2];
					
					if (t2b < -1.0) t2b = -1.0;	// domain check...
					if (t2b > +1.0) t2b = +1.0;	// domain check...
					
					f64 t2c = (1.0 - t1c) * M_PI + t1c * acos(t2b);
					sum1 += t2c * coit[coi].ct;
					
					if (p1 > 0)
					{
						f64 t2x = fabs(sin(t2c));
						if (t2x < LOWLIMIT) t2x = LOWLIMIT;
						
						f64 t2y = -coit[coi].ct * t1c / t2x;
						
						// 1st are same points and 2nd are different ones...
						// 1st are same points and 2nd are different ones...
						// 1st are same points and 2nd are different ones...
						
						for (i32s n2 = 0;n2 < 3;n2++)
						{
				f64 t3a[3];
				t3a[0] = arct[curr].dtv[0][0][n2][0] * arct[curr].tv[1][0];
				t3a[1] = arct[curr].dtv[0][0][n2][1] * arct[curr].tv[1][1];
				t3a[2] = arct[curr].dtv[0][0][n2][2] * arct[curr].tv[1][2];
				f64 t3b = t3a[0] + t3a[1] + t3a[2];
				
				f64 t3c[3];
				t3c[0] = arct[curr].tv[0][0] * arct[curr].dtv[1][0][n2][0];
				t3c[1] = arct[curr].tv[0][1] * arct[curr].dtv[1][0][n2][1];
				t3c[2] = arct[curr].tv[0][2] * arct[curr].dtv[1][0][n2][2];
				f64 t3d = t3c[0] + t3c[1] + t3c[2];
				
				f64 t4a[3];
				t4a[0] = arct[curr].dtv[0][1][n2][0] * arct[curr].tv[1][0];
				t4a[1] = arct[curr].dtv[0][1][n2][1] * arct[curr].tv[1][1];
				t4a[2] = arct[curr].dtv[0][1][n2][2] * arct[curr].tv[1][2];
				f64 t4b = t4a[0] + t4a[1] + t4a[2];
				
				f64 t4c[3];
				t4c[0] = arct[curr].tv[0][0] * arct[curr].dtv[1][1][n2][0];
				t4c[1] = arct[curr].tv[0][1] * arct[curr].dtv[1][1][n2][1];
				t4c[2] = arct[curr].tv[0][2] * arct[curr].dtv[1][1][n2][2];
				f64 t4d = t4c[0] + t4c[1] + t4c[2];
				
				f64 t3e = t2y * (t3b + t3d) + t2c * coit[coi].dct[n2];
				f64 t5a = t2y * t4b; f64 t5b = t2y * t4d;
				
				d_sasa[arct[curr].index[0][0] * 3 + n2] += t3e;
				d_sasa[arct[curr].index[0][1] * 3 + n2] += t5a;
				d_sasa[arct[curr].index[1][1] * 3 + n2] += t5b;
				d_sasa[n1 * 3 + n2] -= t3e + t5a + t5b;
						}
					}
					
					prev = curr; curr = 0;
					i32u ipd = arct[prev].ipdata[1];
					while (true)
					{
						if (arct[curr].ipdata[0] != ipd) curr++;
						else break;
						
						if (curr == arc_count)
						{
							cout << "area_panic: incomplete set of arcs!!!" << endl;
							goto area_panic;
						}
					}
					
					arc_counter++;
					arct[curr].flag = true;
					
					f64 t2d[3];
					t2d[0] = arct[prev].tv[1][0] * arct[curr].tv[0][0];
					t2d[1] = arct[prev].tv[1][1] * arct[curr].tv[0][1];
					t2d[2] = arct[prev].tv[1][2] * arct[curr].tv[0][2];
					
					f64 t2e = t2d[0] + t2d[1] + t2d[2];
					
					if (t2e < -1.0) t2e = -1.0;	// domain check...
					if (t2e > +1.0) t2e = +1.0;	// domain check...
					
					f64 t2f = -acos(t2e); sum2 += t2f;
					
					if (p1 > 0)
					{
						f64 t2x = fabs(sin(t2f));
						if (t2x < LOWLIMIT) t2x = LOWLIMIT;
						
						f64 t2y = 1.0 / t2x;
						
						// prev_k = curr_j and prev_j = curr_k !!!
						// prev_k = curr_j and prev_j = curr_k !!!
						// prev_k = curr_j and prev_j = curr_k !!!
						
						for (i32s n2 = 0;n2 < 3;n2++)
						{
					f64 t3a[3];
					t3a[0] = arct[prev].dtv[1][0][n2][0] * arct[curr].tv[0][0];
					t3a[1] = arct[prev].dtv[1][0][n2][1] * arct[curr].tv[0][1];
					t3a[2] = arct[prev].dtv[1][0][n2][2] * arct[curr].tv[0][2];
					f64 t3b = t3a[0] + t3a[1] + t3a[2];
					
					f64 t3c[3];
					t3c[0] = arct[prev].tv[1][0] * arct[curr].dtv[0][1][n2][0];
					t3c[1] = arct[prev].tv[1][1] * arct[curr].dtv[0][1][n2][1];
					t3c[2] = arct[prev].tv[1][2] * arct[curr].dtv[0][1][n2][2];
					f64 t3d = t3c[0] + t3c[1] + t3c[2];
					
					f64 t4a[3];
					t4a[0] = arct[prev].dtv[1][1][n2][0] * arct[curr].tv[0][0];
					t4a[1] = arct[prev].dtv[1][1][n2][1] * arct[curr].tv[0][1];
					t4a[2] = arct[prev].dtv[1][1][n2][2] * arct[curr].tv[0][2];
					f64 t4b = t4a[0] + t4a[1] + t4a[2];
					
					f64 t4c[3];
					t4c[0] = arct[prev].tv[1][0] * arct[curr].dtv[0][0][n2][0];
					t4c[1] = arct[prev].tv[1][1] * arct[curr].dtv[0][0][n2][1];
					t4c[2] = arct[prev].tv[1][2] * arct[curr].dtv[0][0][n2][2];
					f64 t4d = t4c[0] + t4c[1] + t4c[2];
					
					f64 t3e = t2y * (t3b + t3d);
					f64 t4e = t2y * (t4b + t4d);
					
					d_sasa[arct[prev].index[1][0] * 3 + n2] += t3e;
					d_sasa[arct[prev].index[1][1] * 3 + n2] += t4e;
					d_sasa[n1 * 3 + n2] -= t3e + t4e;
						}    
					}
					
					if (curr == first) break;
				}
				
				area += 2.0 * M_PI + sum1 + sum2;
			} while (arc_counter < arc_count);
			
			// when we have some problems somewhere above (for example, if we have
			// an incomplete set of arcs or no arcs at all; these things are possible
			// in rare special cases; for example we might have to reject some arcs
			// if they contained some singular intermediate values) we will truncate
			// the sum and jump right here.
			
			// in this case we will calculate incorrect value for the area, but the
			// good news is that the value and the gradient will still be consistent.
			
			// since these cases are very rare, this probably won't make big problems
			// in any applications...
			
			area_panic:	// we will jump here in all problematic cases...
			
			while (area > 4.0 * M_PI) area -= 4.0 * M_PI;
		}
		
		// finally here we will handle the single separate patches...
		// finally here we will handle the single separate patches...
		// finally here we will handle the single separate patches...
		
		for (i32s n2 = 0;n2 < coi_count;n2++)
		{
			if (coit[n2].flag) continue;
			
			f64 t1a = 2.0 * M_PI / radius1[n1];
			area -= t1a * (radius1[n1] - coit[n2].g);
			
			if (p1 > 0)
			{
				for (i32s n3 = 0;n3 < 3;n3++)
				{
					f64 t1b = t1a * coit[n2].dg[n3];
					
					d_sasa[coit[n2].index * 3 + n3] += t1b;
					d_sasa[n1 * 3 + n3] -= t1b;
				}
			}
		}
		
		sasa[n1] = area;
	}
}

/*################################################################################################*/

// eof

