// module FourierEH.c
// Reads and decodes a WAVE file, processes DFT algorithm for a limited
//     frequency range
// Portable between Windows and Linux (this is a standard C program)
// The basic ideas are from: 
//    https://electricdruid.net/fourier-analysis-for-non-mathematicians/
// Copyright Dr.E.Huckert 1-99, 11-2020
//
// versions:
// 17.04.2012 bug fixed (open()/read() not prepared for binary file)
//            bug fixed in readDataChunk()
// 23.12.2015 fix for multiple data chunks where other chunks then 'data'
//                     preceed the data chunks (ex.: 'LIST')
// 15.07.2020 more attributes from main chunk output
//            Option -v added, option -i added, usage() added
// 18.08.2020 readDataChunk(): additional logging output
// 26.08.2020 prepared for Linux/gcc (data types int32_t, int16_t)
//                   readSubChunk() completely redesigned
// 02.09.2020 Option -gGnuplot added  (output x+y values)
// 07.09.2020 Option -gKientzle added (outputs only Y values)
// 16.11.2020 Works now! Tested under Windows 7 with DMC and Linux/Ubuntu with gcc
// 17.11.2020 makeSine() optimized, gives factor 3-9 better performance

// compile:
// Windows: Digital Mars: dmc -mn -DWINDOWS FourierEH.c
//          Zeit: 717 ms (-o+speed), optimized 265 ms
//          GNU C:        gcc -DWINDOWS -o Fourier.exe Fourier.c
//          Zeit: 1544 ms (-O3), optimized 171 ms
// Linux (GNU C): gcc -o FourierEH FourierEH.c -lm

// General note for WAVE files:
// "The default byte ordering assumed for WAVE data files is little-endian. 
// Files written using the big-endian byte ordering scheme have the 
// identifier RIFX instead of RIFF."
// EH: even if the data are always little endian the machine on which this
//     program runs may be big endian! This must be taken into account!

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <math.h>

#ifdef WINDOWS
#include <windows.h>
#include <io.h>
#else
#include <unistd.h>
#define DWORD int32_t  // added EH 25.08.2020
#define WORD  int16_t  // added EH 25.08.2020
#endif

// file size is typically for 1 second of samples:
//   44 bytes header
//  +   sample rate * bytes_per_sample
// ex.: 88244 bytes for a mono file at 44100 samples/second = CD quality
typedef struct
{
  DWORD main_chunk;  // "RIFF"
  DWORD mlength;     // Lenth overall without the first 8 Bytes
  DWORD chunk_type;  // "WAVE"
} MAIN_CHUNK;        // 12 bytes

// contains the essential parameters...
typedef struct       // start byte #12
{
  DWORD sub_chunk;   // muss 'fmt ' sein, 4 bytes
  DWORD slength;     // #16, 4 bytes, normally value 16 for PCM
  WORD  format;      // #18, 4 bytes, for PCM = 1
  WORD  channels;    // #20  1=mono, 2=stereo
  DWORD sample_fq;   // #24  4 bytes: 44100
  DWORD  byteRate;    //      4 bytes
  WORD  blockAlign;   //     2 bytes
  WORD  bits_per_sample;  // 2 bytes
} SUB_CHUNK;        //       24 bytes total

typedef struct       // start at pos. (12 + 24)=36
{
  DWORD data_type;   // 'data' or 'LINK' or ...
  DWORD dlength;     // no. of bytes - not samples!
}  DATA_CHUNK;       // 8 bytes

// global variables
int      gVerbose     = 0;  // set with option -v  DWORD   
int      gGnuplot     = 0;  // output x+y values
int      gKientzle    = 0;  // output a length + y values only
DWORD    actLength    = 0L;
unsigned gNoFrame     = 0;
unsigned gNoSamples   = 0;
double   *pInpData    = NULL;
double   *pSineData   = NULL;
double   *pSineTable  = NULL;
SUB_CHUNK gFormatHeader;  

// ----------------------------------------------------------
// Simplified WAVE file handling
// read 12 bytes  main chunk header
// Returns: the number of bytes read
//          < 0 if error
int readMainChunk(int ifd)
{
  int nb;
  char buf[20];
  char *caddr;
  MAIN_CHUNK header;
  int ret = 0;
  //
  nb = read(ifd, &header, sizeof(MAIN_CHUNK));
  if (nb != sizeof(MAIN_CHUNK))
  {
    printf("ERROR: cannot read header of main chunk\n");
    return -1;
  }
  ret += nb;
  memcpy(buf, (char *)(&(header.main_chunk)), 4);
  buf[4] = 0;
  if (gVerbose)
  {
    printf("Kennung:     %s\n", buf);
    printf("Laenge:      %ld\n", (long)(header.mlength));
  }
  if (strncmp(buf, "RIFF", 4) != 0)
  {
     fprintf(stderr, "ERROR: wrong file tag (should be RIFF)\n");
     return -2; 
  }
  if (gVerbose)
    printf("length main chunk: %d\n",  ret);
  return ret;
}   // end readMainChunk()

// -------------------------------------------------------------
// Simplified WAVE file handling
// read 24 bytes  sub chunk header (format header)
// starts at byte #12
// Returns: the number of bytes read
//          < 0 if error
int readSubChunk(int ifd)
{
  int nb;
  char buf[20];
  char kennung[5];
  int noRead = 0;
  //
  if (gVerbose)
    printf("readSubChunk() start\n");
  //
  // Alternative simple method: gives good results
  // Read the chunk ID
  nb = read(ifd, buf, 4);
  if (nb <= 0)
    return -1;
  memcpy(&(gFormatHeader.sub_chunk), buf, 4);
  noRead += nb;
  if (strncmp((char *)&(gFormatHeader.sub_chunk), "fmt ", 4) != 0)
  {
    char auxBuf[5];
    memcpy(auxBuf, (char *)(&(gFormatHeader.sub_chunk)), 4);
    auxBuf[4] = 0;
    fprintf(stderr, "readSubChunk(): wrong sub ID: %s\n", auxBuf);
    return -2; 
  }
  memcpy(kennung, buf, 4);
  kennung[4] = 0;
  //
  // length of chunk (size)
  nb = read(ifd, buf, 4);
  if (nb <= 0)
    return -3;
  memcpy(&(gFormatHeader.slength), buf, 4);
  noRead += nb;
  //
  // audio format
  nb = read(ifd, buf, 2);
  if (nb <= 0)
    return -4;
  memcpy(&(gFormatHeader.format), buf, 2);
  noRead += nb;
  //
  // no. of channels
  nb = read(ifd, buf, 2);
  if (nb <= 0)
    return -5;
  memcpy(&(gFormatHeader.channels), buf, 2);
  noRead += nb;
  //
  // sample frequency (sample rate)
  nb = read(ifd, buf, 4);
  if (nb <= 0)
    return -6;
  memcpy(&(gFormatHeader.sample_fq), buf, 4);
  noRead += nb;
  //
  // bytes per sample (byte rate)
  nb = read(ifd, buf, 4);
  if (nb <= 0)
    return -7;
  memcpy(&(gFormatHeader.byteRate), buf, 4);
  noRead += nb;
  //
  // block align
  nb = read(ifd, buf, 2);
  if (nb <= 0)
    return -8;
  memcpy(&(gFormatHeader.blockAlign), buf, 2);
  noRead += nb;
  //
  // bits per sample
  nb = read(ifd, buf, 2);
  if (nb <= 0)
    return -0;
  memcpy(&(gFormatHeader.bits_per_sample), buf, 2);
  noRead += nb;
  //
  if (gVerbose)
  {
    printf("Sub-Kennung: %s\n",    kennung); // "fmt "
    printf("Sub-Laenge:  %u\n",   (unsigned)(gFormatHeader.slength));   // 4 Bytes
    printf("Format:      %04x\n", (short)(gFormatHeader.format));   // 2 Bytes
    printf("Kanaele:     %02x\n", (short)(gFormatHeader.channels)); // 2 Bytes
    printf("Sample-Freq: %ld\n",  (long)(gFormatHeader.sample_fq));   // 4 bytes
    printf("Bytes Rate:  %u\n",     (unsigned)gFormatHeader.byteRate);  // 4 bytes
    printf("Block Align: %02u\n", (short)gFormatHeader.blockAlign);   // 2 bytes
    printf("Bits/Sample: %u\n",   (unsigned)(gFormatHeader.bits_per_sample));   // 2 bytes
  }
  return noRead;;
}   // end readSubChunk()

// ---------------------------------------------------------------
// Simplified WAVE file handling
// Read 8 bytes  data chunk header + all data bytes
// The bytes are stored as doubles in the allocated array pInpData
// Returns: the number of bytes read
//          < 0 if error
long readDataChunk(int ifd, 
                   DATA_CHUNK *pHeader,
                   DWORD maxLength)  // max. no. of samples to be output
{
  int        nb, i, outputDone;
  long       ll;
  unsigned   dataIdx = 0;
  long       noRead = 0L;
  char       buf[512]; 
  short      shortSample;
  short      *shortAddr;
  double     doubleSample = 0.0;
  static unsigned frameCount = 0;
  //
  if (gVerbose)
  {
    printf("\nreadDataChunk() start maxLength=%ld\n", 
           (long)(maxLength));
  }
  // read 8 bytes = 2 DWORDs
  nb = read(ifd, pHeader, sizeof(DATA_CHUNK)); 
  if (nb <= 0)
  {
    return -1L;
  }
  noRead += nb;
  memcpy(buf, (char *)(&(pHeader->data_type)), 4);
  buf[4] = 0;
  frameCount++;
  if (gVerbose)
  {
    printf("readDataChunk() frame count=%u\n", frameCount);
    printf("readDataChunk() id: %s\n", buf);
    printf("readDataChunk() length:  %ld\n", (long)(pHeader->dlength));
  }
  if (strncmp(buf, "data", 4) != 0)
  {
     fprintf(stderr, "readDataChunk(): ERROR wrong data chunk ID\n");
     return -2L; 
  }
  //
  // allocate an array of double for the input data (sound samples)
  if (pInpData == NULL)
  {
    gNoSamples = (pHeader->dlength) / ((gFormatHeader.bits_per_sample/8));
    pInpData = (double *)malloc(gNoSamples * (sizeof(double)));
    if (gVerbose)
      printf("readDataChunk() noSamples=%u\n", gNoSamples);
  }
  if (pInpData == NULL)
  {
    fprintf(stderr, "readDataChunk(): ERROR cannot allocate sample memory\n");
    return -3L;
  }
  //    
  // Read here the data (input sound samples)
  // Store the data in array pInpData
  ll = (DWORD)(pHeader->dlength);
  if (gVerbose)
  {
    printf("readDataChunk() pheader->dlength=%ld\n", 
           (long)(pHeader->dlength));
  }
  //
  while (ll > 0L)
  {
    nb = sizeof(buf);
    if (ll < nb) 
      nb = ll;
    nb = read(ifd, buf, nb);
    if (nb <= 0) 
    {
      break;
    }
    ll     -= nb;
    noRead += nb;
    if (gVerbose)
      printf("readDataChunk() read=%d total=%ld\n", nb, noRead);
    //
    // output the data: one sample per line, use redirection in a file ("> file.txt")
    // This output can later be read by gGnuplot command: plot "file.txt" with lines
    for (i = 0; i < nb; i += ((gFormatHeader.bits_per_sample / 8)))
    {
      // here only for 16 Bit data
      shortAddr    = (short *)(&(buf[i]));
      shortSample  = *shortAddr;    // TODO: big/low endian!!!
      // convert the sample to a double value
      doubleSample = (double)shortSample;
      //
      // state the sample value in array pInpData
      pInpData[dataIdx] = doubleSample;
      dataIdx++;
      //
      if (maxLength > 0L)
      {
        actLength++;
        if (gVerbose > 1)
        {
          printf("readDataChunk() maxLength=%ld actLength=%ld\n", 
                  (long)(maxLength), (long)(actLength));
        }
        if (actLength >= maxLength)
          goto zurueck;
      }
    }   // end for i...
  }   // end while (ll > 0L)
  //
  zurueck:
  if (gVerbose)
  { 
    printf("readDataChunk() ret=%ld\n", noRead);
  }
  return noRead;
}   // end readDataChunk()

// --------------------------------------------------------------
// Simplified WAVE file handling
// Read a sequence of DATA_CHUNK frames
// The real data are in chunks of type 'data'
long readData(int   ifd,
              DWORD *readPos,  // will be changed!
              DWORD maxLength) // no.of bytes to read
{
  DWORD lnb = 0L;
  DWORD actNoBytes = 0L;
  long lRet = 0L;
  DATA_CHUNK dataHeader;
  //
  lseek(ifd, 36L, SEEK_SET);  // EH added 26.8.2020
  while (1)
  {
    lnb = readDataChunk(ifd, &dataHeader, maxLength);
    if (lnb <= 0L)
    {
      //printf("ERROR reading the data chunk\n");
      break;
    }
    *readPos += lnb;
    actNoBytes += lnb;
    gNoFrame++;
    if (gVerbose > 1)
    {
      printf("after frame=%u read pos=%ld 0x%lx\n", 
           gNoFrame, (long)(*readPos), (long)(*readPos));
      printf("\n");
    }
    if (maxLength > 0L)
    {
      if (actNoBytes >= maxLength)
        break;
    }
  }  // end while 1
  //
  return lRet;
}   // end readData()

// --------------------------------------------------------------
// Normalize the input data (Y values) to range -1 .. +1
// This is necessary as the Y sine values will also be in this range
int normalizeData()
{
  int ret = 0;
  double maxVal = 0.1;
  double minVal = -0.1;
  unsigned count = 0;
  //
  if (gVerbose)
    printf("normalizeData() gNoSamples=%u\n", gNoSamples);
  //
  // step 1: find the max. and min. values
  for (unsigned i = 0; i < gNoSamples; i++)
  {
    if (pInpData[i] > maxVal)  maxVal = pInpData[i];
    if (pInpData[i] <= minVal) minVal = pInpData[i];
  }
  if (maxVal == 0.0) maxVal = 0.1;  // avoid div. by 0
  if (minVal == 0.0) minVal = 0.1;  // avoid div. by 0
  if (gVerbose)
    printf("normalizeData() minVal=%lf maxVal=%lf\n",
            minVal, maxVal);
  if (minVal < 0.0)  minVal = abs(minVal);  // keep the sign in the results
  //
  // step 2: normalize for range -1 to +1
  for (unsigned i = 0; i < gNoSamples; i++)
  {
    if (pInpData[i] >= 0.0)
      pInpData[i] = pInpData[i] / maxVal;
    else
      pInpData[i] = pInpData[i] / minVal;
  }
  //
  return ret;
}   // end normalizeData()

// ----------------------------------------------------------
// Output a data pair (x,y) here to stdout
void outputSampleData(unsigned xValue,
                      double   doubleSample,
                      unsigned maxLength)
{
  static unsigned long noOutputs = 0L;
  int outputDone = 0;
  if (gGnuplot)
  {
    // output x and y value
    printf("%lf %lf\n", (double)xValue, doubleSample);
    outputDone = 1;
  }
  if (gKientzle)
  {
    // output the length at the beginning
    if (noOutputs++ == 0L)
      printf("%u\n", (unsigned)maxLength); 
    // output only the y value
    printf("%lf\n", doubleSample);
    outputDone = 1;
  }
  if (! outputDone)
    // output only the y value
    printf("%lf\n", doubleSample);
}   // end outputSampleData()

// ---------------------------------------------------------------
// Output all data pairs (x,y) to stdout
void outputData(unsigned maxLength)
{
  for (unsigned i=0; i < maxLength; i++)
  {
    outputSampleData(i, pInpData[i], maxLength);
  }
}   // end outputData()


// ----------------------------------------------------------
// Fill the array pSineData with sine values
// Note: the sine data allocated here must be freed externally!!!
int makeSine(unsigned maxX,
             unsigned noHertz)  
{
  int    ret  = 0;
  double step = 1.0;
  #define PI   ((double)3.1415926)
  double radianFactor = PI / 180.0;
  double sineValue = 0.0;
  unsigned n       = 0;
  double   doubleN = 0.0;
  //
  if (noHertz == 0)
    noHertz = 1;    // avoid division by zero
  if (gVerbose)
    printf("makeSine() maxX=%u noHertz=%u\n", 
            maxX, noHertz);
  //
  // allocate an array of double for the sine values
  if (pSineData == NULL)
  {
    pSineData = (double *)malloc(gNoSamples * sizeof(double));
    if (gVerbose)
      printf("makeSine() noSamples=%u\n", gNoSamples);
  }
  if (pSineData == NULL)
  {
    fprintf(stderr, "makeSine(): ERROR cannot allocate memory for sine data\n");
    ret = -1;
  }
  //
  step = (double)gNoSamples / (double)noHertz; // = no.samples for 360 degrees
  step = step / (double)360.0;                 // = no_samples for 1 degree
  if (gVerbose)
      printf("makeSine() step=%lf\n", step);
  if (step <= 0.0) step = 0.01;
  //
  // Not optimized: we could reuse the sine values in each for loop!!!
  n       = 0;
  doubleN = 0.0;
  int tableFilled = 0;
  if (pSineTable == NULL)
  {
    tableFilled = 0;
    pSineTable = (double *)malloc(360 * sizeof(double));
  }
  else
    tableFilled = 1;
  while (n < gNoSamples)
  {
    // 17.11.2020 optimized: uses a fixed sine value table
    // preset the sine table
    for (double sineArg = 0.0; sineArg < 360.0; sineArg += 1.0)
    {
      n = (unsigned)doubleN;
      if (n >= gNoSamples) 
        break;
      if (tableFilled == 0)
      {
        // preset the sine table
        sineValue = (double)(sin(sineArg * radianFactor));
        pSineTable[(unsigned)sineArg] = sineValue;
      }
      else
      {
        // get the value from the filled sine table
        sineValue = pSineTable[(unsigned)sineArg];
      }
      pSineData[n] = sineValue;
      doubleN += step;
    }   // end for ...
  }   // end while ...
  //
  zurueck:
  return ret;
}   // end makeSine()

// ----------------------------------------------------------
// Process the DFT (Discrete Fourier Transformation)
// Uses the samples in pInpData
// Uses the sine data in array pSineData (see proc. makeSine()).
// Multiplies the inp.values with the corresponding sine values.
double processDFT(unsigned maxX,
                  unsigned noHertz) 
{
  double ret = 0.0;
  double multVal = 0.0;
  double sumVal = 0.0;
  double *pResultData = NULL;
  static unsigned outCount = 0;
  //
  if (gVerbose)
    printf("processDFT() maxX=%u frequency=%u\n", 
            maxX, noHertz);
  //
  pResultData = (double *)malloc(maxX * sizeof(double));
  for (unsigned n=0; n < maxX; n++)
  {
    // multiply the normalized inp. value(sample) with the sine values
    /*
    if (outCount++ < 256)
      printf("n=%04u sample=%lf sine=%lf\n", n, pInpData[n], pSineData[n]);
    */
    multVal = pInpData[n] * pSineData[n];
    if (gVerbose > 1)
      printf("processDFT() sample=%u multVal=%lf\n", 
              n, multVal);
    pResultData[n] = multVal;
  }   // end for n ...
  //
  // Calculate the sum of all multipl.results for that frequency
  sumVal = 0.0;
  for (unsigned n=0; n < maxX; n++)
  {
    sumVal = sumVal + pResultData[n];
  }
  // Divide th result by the num. of samples to get an average value
  ret = sumVal / (double)maxX;
  //
  zurueck:
  if (gVerbose)
      printf("processDFT() sum=%lf\n", 
              ret);
  if (pResultData != NULL)
    free(pResultData);
  return ret;
}   // end processDFT()

// --------------------------------------------------------------
void usage()
{
  printf("Copyright Dr.E.Huckert 1999-2020 V1.1\n");
  printf("Usage: FourierEH [-v] [-length nnn ]\n");
  printf("       [-gnuplot] [-kientzle] -i filename\n");
}   // end usage()

// --------------------------------------------------------------
int main(int argc, char *argv[])
{
  int   ifd = -1;
  WORD  ret = 0;
  WORD  nb, n;
  char  buf[512];
  char  fileName[256] = "";
  DWORD readPos = 0L;
  DWORD maxLength = 1024L;
  unsigned long t1, t2;
  unsigned freqStart = 100;
  unsigned freqEnd   = 1000;
  unsigned freqStep  = 20;
  double   doubleRet = 0.0;
  double bestResult  = 0.0;
  unsigned bestFreq  = 0;
  //
  #ifdef WINDOWS
  t1 = GetCurrentTime();
  #endif
  //
  // Evaluate the command line
  for (n=1; n < argc; n++)
  {
    if (strcmp(argv[n], "-v") == 0)
      gVerbose = 1;
    if (strcmp(argv[n], "-i") == 0)
      strcpy(fileName, argv[n + 1]);
    if (strcmp(argv[n], "-length") == 0)  // 1024 per default
      maxLength = atol(argv[n + 1]);
    if (strcmp(argv[n], "-gnuplot") == 0)
      gGnuplot = 1;
    if (strcmp(argv[n], "-kientzle") == 0)
      gKientzle = 1;
  }
  if (strlen(fileName) == 0)
  {
    printf("ERROR: command line not correct (\"-i\" missing?)\n");
    usage();
    ret = -1;
    goto zurueck;
  }
  if ((maxLength > 4096L) || (maxLength <= 0L))
  {
    printf("WARNING: length restricte to 4096!\n");
    maxLength = 4096L;
  }
  #ifdef WINDOWS
  ifd = open(fileName, O_RDONLY + O_BINARY);
  #else
  ifd = open(fileName, O_RDONLY);  // Linux always binary!
  #endif
  if (ifd < 0)
  {
    ret = -2;
    printf("ERROR: Inp.wave file not found: %s\n", fileName);
    goto zurueck;
  }
  //
  // Read the first header: "RIFF", Laenge, "WAVE"
  nb = readMainChunk(ifd);
  if (nb < 0)
  {
    printf("ERROR: main chunk error\n");
    ret = -2;
    goto zurueck;
  }
  readPos += nb;
  if (gVerbose)
  {
    printf("after main chunk read pos=%ld 0x%lx\n", 
           (long)readPos, (long)readPos);
    printf("\n");
  }
  //
  // Read the second header (sub chunk, format chunk)
  // Contains the number of channels and the sample freq.
  lseek(ifd, 12L, SEEK_SET);  // EH added 26.8.2020
  nb = readSubChunk(ifd);
  if (nb < 0)
  {
    printf("ERROR: sub chunk\n");
    ret = -3;
    goto zurueck;
  }
  readPos += nb;
  if (gVerbose)
  {
     printf("data chunk: %d bytes read\n", nb);
     printf("after format chunk read pos=%ld 0x%lx\n", 
            (long)readPos, (long)readPos);
     printf("\n");
  }
  // 
  // Read all data (all data chunks)
  ret =  readData(ifd, &readPos, maxLength);
  if (ret < 0L)
  {
    goto zurueck;
  }
  //
  // Normalize the y values in range -1 to +1 as the
  // normal sine values are also in this range
  ret = normalizeData();
  //
  // Output the data in some practical formats like GnuPlot
  if (gGnuplot | gKientzle)
    outputData((unsigned)maxLength);
  //
  // We assume the Y values have been normalized in the range -1 to 1
  // Loop over a limited frequency range using a given step
  for (unsigned n=freqStart; n <= freqEnd; n += freqStep)
  {
    if (gVerbose)
      printf("main() freq=%u\n", n);
    // get the sine values for that frequency
    ret = makeSine((unsigned)maxLength, n);
    if (ret < 0)
      goto zurueck;
    //
    // Process the DFT procedure for that frequency
    doubleRet = processDFT((unsigned)maxLength, n);
    if (gVerbose)
      printf("result for freq.=%04u: %7.3lf\n", 
             n, doubleRet);
    //
    // keep the best absolute value
    if (doubleRet >= bestResult)
    {
      bestResult = doubleRet;
      bestFreq   = n;
    }
    //
    free(pSineData);
    pSineData = NULL;
  }   // end for n...
  //
  printf("\nbestResult=%lf best matching freq.=%u\n",
          bestResult, bestFreq);
  #ifdef WINDOWS
  t2 = GetCurrentTime();
  printf("time=%lu\n", t2 - t1);
  #endif
  ret = 0;  // OK
  //
  zurueck:
  if (gVerbose)
  {
    printf("last read pos=%ld\n", (long)readPos);
    printf("return code=%d\n", ret);
  }
  if (ifd >= 0)
    close(ifd);
  if (pSineData != NULL)
    free(pSineData);
  if (pInpData != NULL)
    free(pInpData);
  if (pSineTable != NULL)
    free(pSineTable);
  return ret;
}   // end main()
