/********************************************************************************************************
 * QRNA - Comparative analysis of biological sequences 
 *         with pair hidden Markov models, pair stochastic context-free
 *        grammars, and probabilistic evolutionary  models.
 *       
 * Version 2.0.0 (JUN 2003)
 *
 * Copyright (C) 2000-2003 Howard Hughes Medical Institute/Washington University School of Medicine
 * All Rights Reserved
 * 
 *     This source code is distributed under the terms of the
 *     GNU General Public License. See the files COPYING and LICENSE
 *     for details.
 ***********************************************************************************************************/

/* cfgio.c
 * I/O of SCFG model to/from disk files
 *
 * ER, Fri Jun 18 15:31:53 CDT 1999
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>

#include "funcs.h"
#include "globals.h"
#include "squid.h"
#include "structs.h"
#include "version.h"


/* The magic number is "yrn1" + 0x80808080 */
static int v10magic     = 0xf9f2eeb1;
static int v10swap      = 0xb1eef2f9;

static void byteswap(char *swap, int nbytes);

/* Function: ReadSCFG()
 * 
 * Purpose:  Read an SCFG from disk in binary format.
 *           
 *           
 * Return:   Return 1 on success, 0 on failure.
 *           ret_cfg is freed by called with FreeSCFG().
 */
int 
ReadSCFG(FILE *ofp, double ***ret_cfg)
{
  int fs, i;
  double **cfg;
  int magic;
  int do_byteswap = FALSE;

  cfg = AllocSCFG();

  if (fread(&magic, sizeof(int), 1, ofp) < 1)
    { Warn("Failed to read magic number from SCFG save file"); return 0; }
  if (magic == v10swap) {
    do_byteswap = TRUE;
    byteswap((char *)&magic, sizeof(int));
  }
  if (magic == v10magic)
    {
      for (fs = 0; fs < NDPS-1; fs++)
	if (fread(cfg[fs], sizeof(double), Ntrans[fs], ofp) < Ntrans[fs])
	  {Warn("fread failed on SCFG save file"); return 0; }
      if (do_byteswap) {
	for (fs = 0; fs < NDPS; fs++)
	  for (i = 0; i < Ntrans[fs]; i++)
	    byteswap((char *)&(cfg[fs][i]), sizeof(double));
      }
    }
  else
    { Warn("bad magic on that SCFG save file"); return 0; }

  *ret_cfg = cfg;

  return 1;
}

/* Function: SaveSCFG()
 * 
 * Purpose:  Write an SCFG to disk in binary format.
 *           Return 1 on success, 0 on failure
 */
int
SaveSCFG(FILE *ofp, double **cfg)
{
  int fs;

  if (fwrite(&v10magic, sizeof(int), 1, ofp) < 1)
    return 0;
  for (fs = 0; fs < NDPS; fs++)
    if (fwrite(cfg[fs], sizeof(double), Ntrans[fs], ofp) < Ntrans[fs])
      return 0;
  return 1;
}
 


/* Function: WriteRdbSCFG()
 * 
 * Purpose:  Write a SCFG to disk in RDB database format.
 *           Though the file has such long lines that it is
 *           essentially unreadable, the strategy is deliberate:
 *           as SCFGs change, this code remains completely general,
 *           and necessary tables can be generated by RDB commands.
 */
void
WriteRdbSCFG(FILE *ofp, double **cfg)
{
  int     i, j;
  int     idx, node;
  int     nparam;
  double  ntrans;
  double *pair;
  double  totpairs = 0.;

  pair = MallocOrDie(sizeof(double) * 16);

  /* Comment section of RDB database file
   */
  fprintf(ofp, "# SCFG output\n");
  fprintf(ofp, "#\n");

  /* And the rest is data.
   */
  for (i = 0; i < NDPS; i++)
    for (idx = 0; idx < Idx[i]; idx++) 
      {
	fprintf(ofp, "%s(%d)\n", stNAME[i], idx);
	ntrans = 0.0;
	nparam = 0;
	
	for (node = 0; node < NNODES; node++)
	  if (Connects(i,node)) {
	    fprintf(ofp, "node %s", dpcNAME(node));
	    for (j = idx*TransPerDp[i]; j < (idx+1)*TransPerDp[i]; j++) 
	      if (node == Ntype(i,j)) {
		if (nparam%4 == 0) fprintf(ofp, "\n");
		fprintf(ofp, "%f\t", cfg[i][j]);
		nparam += 1;
		ntrans += cfg[i][j];
	      }
	    fprintf(ofp, "\n");
	  }
	
	fprintf(ofp, "# param %s = %d \n", stNAME[i], nparam);
	fprintf(ofp, "# trans %s = %2.3f \n\n", stNAME[i], ntrans);
      }
  
  fprintf(ofp, "\n");
  fprintf(ofp, "** pairs output **\n");

  PairsSCFG(cfg, &pair);

  for (i = 0; i < 16; i++)
    totpairs += pair[i];

  fprintf(ofp, "Total # pairs = %f\n", totpairs);
  fprintf(ofp, "\n");
  for (i = 0; i < 16; i++)
    fprintf(ofp, "pairs (%d %d) = %f \t[%f]\n", i/4, i%4, pair[i], pair[i]/totpairs);
  }


/* Function: PairsSCFG()
 *
 * Date:     ER, Fri Jul  9 18:09:50 CDT 1999
 * 
 * Purpose:  calcualtes pair frequencies from a  SCFG (counts form).
 */
void
PairsSCFG (double **cfg, double **ret_pair)
{
  int      i,j,k,l;
  double  *nij;

  nij = (double *)MallocOrDie(sizeof(double) * 16);
  for (i = 0; i < 16; i++)
    nij[i] = 0.;


  for (i = 0; i < 4; i++)
    for (j = 0; j < 4; j++) 
      {
	nij[idx(i,j)] = cfg[W][idxP(i,j)] + cfg[WB][idxP(i,j)];

	for (k = 0; k < 4; k++)
	  for (l = 0; l < 4; l++) {
	    nij[idx(i,j)] += cfg[V][idxS2S(i,j,k,l)] + cfg[V][idxS2B(i,j,k,l)] 
	      + cfg[V][idxS2I(i,j,k,l)];
	  }
      }
  *ret_pair = nij;
}

/* Function: Pairs5SCFG()
 *
 * Date:     ER, Fri Jul  9 18:09:50 CDT 1999
 * 
 * Purpose:  calcualtes pair frequencies from a  SCFG (counts form).
 */
void
Pairs5SCFG (double **cfg, double **ret_pair)
{
  int      i,j,k,l;
  double  *nij;

  nij = (double *) MallocOrDie(sizeof(double) * 25);

  for (i = 0; i < 25; i++)
    nij[i] = 0.;

  /* fill the counts, leave empty the gaps */ 
  for (i = 0; i < 4; i++)
    for (j = 0; j < 4; j++) 
      {
	nij[idx5(i,j)] = cfg[W][idxP(i,j)] + cfg[WB][idxP(i,j)];

	for (k = 0; k < 4; k++)
	  for (l = 0; l < 4; l++) {
	    nij[idx5(i,j)] += cfg[V][idxS2S(i,j,k,l)] + cfg[V][idxS2B(i,j,k,l)] 
	      + cfg[V][idxS2I(i,j,k,l)];
	  }
      }

  *ret_pair = nij;
}

/* Function: WriteRdbLogSCFG()
 * 
 * Purpose:  Write a log2-form SCFG to disk in RDB database format.
 */
void
WriteRdbLogSCFG(FILE *ofp, double **cfg)
{
  int i, j, node, idx;
  int nparam;
  
  /* Comment section of RDB database file
   */
  fprintf(ofp, "# SCFG output\n");
  fprintf(ofp, "#\n");
  
  /* And the rest is data.
   */
  for (i = 0; i < NDPS; i++)
    for (idx = 0; idx < Idx[i]; idx++) 
      {
	fprintf(ofp, "%s(%d)\n", stNAME[i], idx);
	nparam = 0;
	
	for (node = 0; node < NNODES; node++)
	  if (Connects(i,node)) {
	    fprintf(ofp, "node %s", dpcNAME(node));
	    for (j = idx*TransPerDp[i]; j < (idx+1)*TransPerDp[i]; j++) 
	      if (node == Ntype(i,j)) {
		if (nparam%4 == 0) fprintf(ofp, "\n");
		if (cfg[i][j] <= -BIGFLOAT)
		  fprintf(ofp, "-inf\t");
		else 
		  fprintf(ofp, "%9.3f\t", cfg[i][j]);
		nparam += 1;
	      }
	    fprintf(ofp, "\n");
	  }  
	fprintf(ofp, "# param %s = %d \n\n", stNAME[i], nparam);
	
      }
}

/* Function: WriteRdbSummary()
 * 
 * Purpose:  Write a more compact view of a counts-based SCFG in
 *           RDB database format. The summary is generated
 *           by summing over node types. 
 */
void
WriteRdbSummary(FILE *ofp, double **cfg)
{
  int i, j;
  float sum[NDPS][NNODES];

  /* Calculate the by-node summary
   */
  for (i = 0; i < NDPS; i++)
    for (j = 0; j < NNODES; j++)
      sum[i][j] = 0.0;

  for (i = 0; i < NDPS; i++)
    for (j = 0; j < Ntrans[i]; j++)
      sum[i][Ntype(i,j)] += cfg[i][j];

  /* Comment section of RDB database file
   */
  fprintf(ofp, "# SCFG counts data, summarized by node type\n");
  fprintf(ofp, "#\n");

  /* And the rest is data.
   */
  for (i = 0; i < NDPS; i++)
    {
      fprintf(ofp, "%s\t", dpcNAME(i));
      for (j = 0; j < NNODES; j++)
	fprintf(ofp, "%6.0f\t", sum[i][j]);
      fputs("\n", ofp);
    }
}


/* Function: byteswap()
 * 
 * Purpose:  Swap between big-endian and little-endian.
 *           For example:
 *               int foo = 0x12345678;
 *               byteswap((char *) &foo, sizeof(int));
 *               printf("%x\n", foo)
 *           gives 78563412.
 *           
 *           I don't fully understand byte-swapping issues.
 *           However, I have tested this on chars through floats,
 *           on various machines:
 *               SGI IRIX 4.0.5, SunOS 4.1.3, DEC Alpha OSF/1, Alliant
 *               
 *           Note: this is only a partial solution to the problem of
 *           binary file portability. 32 bit integers are assumed by HMMER,
 *           for instance. This should be true for all UNIX, VAX, and WinNT
 *           platforms, I believe.     
 *
 * Date: Sun Feb 12 10:26:22 1995              
 */
static void
byteswap(char *swap, int nbytes)
{
  int  x;
  char byte;
  
  for (x = 0; x < nbytes / 2; x++)
    {
      byte = swap[nbytes - x - 1];
      swap[nbytes - x - 1] = swap[x];
      swap[x] = byte;
    }
}
