/* Copyright (c) 1991-2002 Doshita Lab. Speech Group, Kyoto University */
/* Copyright (c) 2000-2002 Speech and Acoustics Processing Lab., NAIST */
/*   All rights reserved   */

/* wav2mfcc.c --- call waveform to MFCC_E_D_Z converter */

/* $Id: wav2mfcc.c,v 1.5 2002/09/11 22:02:33 ri Exp $ */

#undef SSDEBUG

#include <julius.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

/* make MFCC_E_D_N_Z from speech[0..speechlen-1] and returns
   newly malloced param */
HTK_Param *new_wav2mfcc(SP16 speech[], int speechlen)
{
  HTK_Param *param;
  Value para;			/* parameters for Wav2MFCC */
  int framenum;
  int i;
  int len;

  /* set parameters */
  para.smp_period = smpPeriod;
  para.framesize  = fsize;
  para.frameshift = fshift;
  para.preEmph    = DEF_PREENPH;
  para.mfcc_dim   = DEF_MFCCDIM;
  para.lifter     = DEF_CEPLIF;
  para.delWin     = delwin;
  para.silFloor   = DEF_SILFLOOR;
  para.hipass     = hipass;
  para.lopass     = lopass;
  para.c0         = c0_required;
  /* para.escale     = DEF_ESCALE; */
  para.escale     = 1.0;
  para.fbank_num  = DEF_FBANK;
  /* para.cmn        = FALSE;*/
  para.cmn        = TRUE;
  para.enormal    = FALSE;
  para.raw_e      = FALSE;
  /* for SS */
  para.ss_alpha   = ssalpha;
  para.ss_floor   = ssfloor;

  para.vec_num = (para.mfcc_dim + 1) * 2;

  if (ssload_filename && ssbuf == NULL) {
    /* load noise spectrum for spectral subtraction from file (once) */
    if ((ssbuf = new_SS_load_from_file(ssload_filename, &sslen)) == NULL) {
      j_error("Error: failed to read \"%s\"\n", ssload_filename);
    }
  }

  if (sscalc) {
    /* compute noise spectrum from head silence for each input */
    len = sscalc_len * smpFreq / 1000;
    if (len > speechlen) len = speechlen;
#ifdef SSDEBUG
    printf("[%d]\n", len);
#endif
    ssbuf = new_SS_calculate(speech, len, para, &sslen);
  }
#ifdef SSDEBUG
  {
    int i;
    for(i=0;i<sslen;i++) {
      printf("%d: %f\n", i, ssbuf[i]);
    }
  }
#endif
  
  /* calculate frame length from speech length, frame size and frame shift */
  framenum = (int)((speechlen - para.framesize) / para.frameshift) + 1;
  if (framenum < 1) {
    j_printerr("input too short, ignored\n");
    return NULL;
  }
  
  /* malloc new param */
  param = new_param();
  param->parvec = (VECT **)mymalloc(sizeof(VECT *) * framenum);
  for(i=0;i<framenum;i++) {
    param->parvec[i] = (VECT *)mymalloc(sizeof(VECT) * para.vec_num);
  }

  /* make MFCC_E_D_(N_)_Z from speech data */
  /* (bogus) needs conversion here if intergerized */
  Wav2MFCC_E_D(speech, param->parvec, para, speechlen, ssbuf, sslen);

  /* set miscellaneous parameters */
  param->header.samplenum = framenum;
  param->header.wshift = para.smp_period * para.frameshift;
  param->header.sampsize = para.vec_num * sizeof(VECT); /* not compressed */
  if (c0_required) {
    param->header.samptype = F_MFCC | F_ZEROTH | F_DELTA | F_CEPNORM;
  } else {
    param->header.samptype = F_MFCC | F_ENERGY | F_DELTA | F_CEPNORM;
  }
  param->veclen = para.vec_num;
  param->samplenum = framenum;

  return param;
}

