/**
 * \file pappsomspp/processing/cbor/psm/psmfilereaderbase.h
 * \date 05/07/2025
 * \author Olivier Langella
 * \brief Base class to read CBOR PSM file
 */

/*******************************************************************************
 * Copyright (c) 2025 Olivier Langella <Olivier.Langella@universite-paris-saclay.fr>.
 *
 * This file is part of PAPPSOms-tools.
 *
 *     PAPPSOms-tools is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 *
 *     PAPPSOms-tools is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 *
 *     You should have received a copy of the GNU General Public License
 *     along with PAPPSOms-tools.  If not, see <http://www.gnu.org/licenses/>.
 *
 ******************************************************************************/

#include "psmfilereaderbase.h"
#include "pappsomspp/core/pappsoexception.h"
#include <QDebug>
#include "../../../peptide/peptideproformaparser.h"
#include <QCborArray>


namespace pappso
{
namespace cbor
{
namespace psm
{

PsmFileReaderBase::PsmFileReaderBase() : CborStreamReaderInterface()
{
}

PsmFileReaderBase::~PsmFileReaderBase()
{
  // qWarning() << "~PsmFileReaderBase";
}


void
PsmFileReaderBase::readCbor(QIODevice *cborp, pappso::UiMonitorInterface &monitor)
{

  qDebug();
  initCborReader(cborp);

  qDebug();
  if(mpa_cborReader->isMap())
    {
      readRoot(monitor);
    }
  qDebug();
}

void
PsmFileReaderBase::readCbor(QFile *cborp, pappso::UiMonitorInterface &monitor)
{
  qDebug();
  initCborReader(cborp);

  qDebug();
  if(mpa_cborReader->isMap())
    {
      readRoot(monitor);
    }
  qDebug();
}


bool
PsmFileReaderBase::currentProteinRefListContainsDecoy() const
{
  for(auto &it : m_currentPsmProteinRefList)
    {
      if(!m_proteinMap.getByAccession(it.accession).isTarget)
        return true;
    }
  return false;
}

bool
PsmFileReaderBase::currentProteinRefListContainsTarget() const
{
  for(auto &it : m_currentPsmProteinRefList)
    {
      if(m_proteinMap.getByAccession(it.accession).isTarget)
        return true;
    }
  return false;
}


void
PsmFileReaderBase::readRoot(pappso::UiMonitorInterface &monitor)
{
  qDebug();
  mpa_cborReader->enterContainer();

  getExpectedString();
  if(m_expectedString == "informations")
    {
      qDebug() << m_expectedString;
      readInformations(monitor);
      getExpectedString();

      qDebug() << m_expectedString;
      if(m_expectedString == "log")
        {
          qDebug() << m_expectedString;
          readLog(monitor);
          getExpectedString();
        }

      logReady(monitor);
    }
  else
    {
      throw pappso::PappsoException("ERROR: expecting informations element");
    }

  qDebug() << m_expectedString;

  if(m_expectedString == "parameter_map")
    {
      qDebug();
      readParameterMap(monitor);
    }
  else
    {
      throw pappso::PappsoException("ERROR: expecting parameter_map element");
    }


  getExpectedString();
  m_targetFastaFiles.clear();
  m_decoyFastaFiles.clear();
  if(m_expectedString == "target_fasta_files")
    {
      mpa_cborReader->readArray(m_targetFastaFiles);
      getExpectedString();
    }

  if(m_expectedString == "decoy_fasta_files")
    {
      mpa_cborReader->readArray(m_decoyFastaFiles);
      getExpectedString();
    }
  fastaFilesReady(monitor);

  if(m_expectedString == "protein_map")
    {
      readProteinMap(monitor);
      getExpectedString();
    }

  if(m_expectedString == "sample_list")
    {
      sampleListStarted(monitor);
      mpa_cborReader->enterContainer(); // array
      while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
        {
          readSample(monitor);
        }
      mpa_cborReader->leaveContainer(); // array
      sampleListFinished(monitor);
    }
  else
    {
      throw pappso::PappsoException(
        QObject::tr("ERROR: expecting sample_list element not %1").arg(m_expectedString));
    }
  mpa_cborReader->leaveContainer(); // whole file
  if(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
    {
      readRoot(monitor);
    }
}

void
PsmFileReaderBase::readInformations(pappso::UiMonitorInterface &monitor [[maybe_unused]])
{
  bool is_ok;
  // m_cborInformations.clear();
  is_ok = mpa_cborReader->readCborMap(m_cborInformations);

  if(!is_ok)
    {
      throw pappso::PappsoException("ERROR: PSM cbor header informations not well formed");
    }
  qDebug() << m_cborInformations.keys();
  if(m_cborInformations.value("type").toString() != "psm")
    {
      QStringList all_keys;
      for(auto it_k : m_cborInformations.keys())
        {
          all_keys << it_k.toString();
        }
      throw pappso::PappsoException(
        QObject::tr("ERROR: this file does not contain PSM data but %1 and %2")
          .arg(m_cborInformations.value("type").toString())
          .arg(all_keys.join(" ")));
    }
  informationsReady(monitor);
}

void
PsmFileReaderBase::readLog(pappso::UiMonitorInterface &monitor [[maybe_unused]])
{
  bool is_ok;
  // m_cborInformations.clear();
  is_ok = mpa_cborReader->readCborArray(m_cborLog);

  if(!is_ok)
    {
      throw pappso::PappsoException("ERROR: PSM cbor header log not well formed");
    }
}


void
PsmFileReaderBase::readParameterMap(pappso::UiMonitorInterface &monitor [[maybe_unused]])
{
  bool is_ok;
  m_cborParameterMap.clear();
  is_ok = mpa_cborReader->readCborMap(m_cborParameterMap);

  if(!is_ok)
    {
      throw pappso::PappsoException("ERROR: PSM cbor parameter_map not well formed");
    }
  parameterMapReady(monitor);
}

void
PsmFileReaderBase::readProteinMap(pappso::UiMonitorInterface &monitor)
{
  m_proteinMap.readMap(*mpa_cborReader);
  proteinMapReady(monitor);
}


PsmFileReaderBase::PsmProteinRef
PsmFileReaderBase::readPsmProteinRef(bool &is_ok)
{
  PsmProteinRef protein_ref;
  protein_ref.accession = "";
  protein_ref.positions.clear();
  mpa_cborReader->enterContainer();
  getExpectedString();
  qDebug() << m_expectedString;
  if(m_expectedString == "accession")
    {
      is_ok = mpa_cborReader->decodeString(protein_ref.accession);
      if(!is_ok)
        {
          throw pappso::PappsoException("ERROR: protein accession is not a string");
        }
    }
  else
    {
      throw pappso::PappsoException("ERROR: expecting accession element in PSM protein_list");
    }

  getExpectedString();
  qDebug() << m_expectedString;
  if(m_expectedString == "positions")
    {
      mpa_cborReader->readArray(protein_ref.positions);

      // mpa_cborReader->next();
    }
  else
    {
      throw pappso::PappsoException(
        QString("ERROR: expecting positions element in PSM protein_list not %1")
          .arg(m_expectedString));
    }
  mpa_cborReader->leaveContainer();

  qDebug() << "end";
  return protein_ref;
}


PsmFile
PsmFileReaderBase::readPsmFile(bool &is_ok)
{
  is_ok = false;
  PsmFile file;
  mpa_cborReader->enterContainer();
  getExpectedString();
  if(m_expectedString == "name")
    {
      if(!mpa_cborReader->decodeString(file.name))
        {
          throw pappso::PappsoException("file name is not a string");
        }
      is_ok = true;
    }
  else
    {
      throw pappso::PappsoException("ERROR: expecting name element in file");
    }
  mpa_cborReader->leaveContainer();
  return file;
}


void
PsmFileReaderBase::writePsmFile(pappso::cbor::CborStreamWriter &writer, const PsmFile &psm_file)
{
  writer.startMap();
  writer.append("name");
  writer.append(psm_file.name);
  writer.endMap();
}

void
PsmFileReaderBase::writePsmFileList(pappso::cbor::CborStreamWriter &writer,
                                    const std::vector<PsmFile> &file_list)
{
  writer.startArray();
  for(auto &psm_file : file_list)
    {
      writePsmFile(writer, psm_file);
    }
  writer.endArray();
}


void
PsmFileReaderBase::readSample(pappso::UiMonitorInterface &monitor)
{
  //"name": "tandem2017_nopatch_20120906_balliau_extract_1_A01_urnb-1",
  qDebug();
  mpa_cborReader->enterContainer();
  getExpectedString();

  qDebug() << m_expectedString;
  if(m_expectedString == "name")
    {
      if(!mpa_cborReader->decodeString(m_currentSampleName))
        {
          throw pappso::PappsoException("sample name is not a string");
        }
    }
  else
    {
      throw pappso::PappsoException("ERROR: expecting name element in file");
    }
  //"identification_file_list": [{ "name":
  //"/home/langella/data1/tandem/tandem2017_nopatch_20120906_balliau_extract_1_A01_urnb-1.xml",
  //}],

  getExpectedString();

  qDebug() << m_expectedString;
  m_currentIdentificationFileList.clear();
  if(m_expectedString == "identification_file_list")
    {
      bool is_ok;
      mpa_cborReader->enterContainer();

      while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
        {
          m_currentIdentificationFileList.push_back(readPsmFile(is_ok));
        }
      mpa_cborReader->leaveContainer();

      getExpectedString();
    }
  //"peaklist_file": {"name": "tandem2017_nopatch_20120906_balliau_extract_1_A01_urnb-1.mzml"
  //},

  if(m_expectedString == "peaklist_file")
    {
      bool is_ok;
      m_currentPeaklistFile = readPsmFile(is_ok);
    }
  else
    {
      throw pappso::PappsoException("ERROR: expecting peaklist_file element in sample");
    }
  //"scan_list": [
  sampleStarted(monitor);
  getExpectedString();
  if(m_expectedString == "scan_list")
    {
      mpa_cborReader->enterContainer();

      while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
        {
          readScan(monitor);
        }
      mpa_cborReader->leaveContainer();
    }
  else
    {
      throw pappso::PappsoException("ERROR: expecting scan_list element in sample");
    }
  mpa_cborReader->leaveContainer();

  sampleFinished(monitor);
}

void
PsmFileReaderBase::readScan(pappso::UiMonitorInterface &monitor)
{
  qDebug();
  m_cborScanId.clear();
  mpa_cborReader->enterContainer();
  //"id": {
  //"index": 1976
  //},
  qDebug() << "scan begin";

  getExpectedString();
  qDebug() << m_expectedString;
  if(m_expectedString == "id")
    {
      if(!mpa_cborReader->readCborMap(m_cborScanId))
        {
          throw pappso::PappsoException(QObject::tr("id element in scan is not a cbor map"));
        }
    }
  else
    {
      throw pappso::PappsoException(
        QObject::tr("ERROR: expecting id element in scan not %1").arg(m_expectedString));
    }
  //"precursor": {
  //"z": 2,
  //"mz": 1120.529471
  //},

  getExpectedString();
  m_cborScanPrecursor.clear();
  qDebug() << m_expectedString;
  if(m_expectedString == "precursor")
    {
      if(!mpa_cborReader->readCborMap(m_cborScanPrecursor))
        {
          throw pappso::PappsoException(QObject::tr("precursor element in scan is not a cbor map"));
        }
    }
  //"ms2": {PSM CBOR format documentation
  //"rt": 12648.87,
  //"mz" :[1,2,3,4],
  //"intensity" : [1,2,3,4]
  //},

  getExpectedString();
  qDebug() << m_expectedString;
  m_cborScanMs2.clear();
  if(m_expectedString == "ms2")
    {
      if(!mpa_cborReader->readCborMap(m_cborScanMs2))
        {
          throw pappso::PappsoException(
            QObject::tr("ms2 element in scan is not a cbor map %1 %2:\n%3")
              .arg(m_currentSampleName)
              .arg(m_cborScanId.value("index").toInteger())
              .arg(mpa_cborReader->lastError().toString()));
        }
    }
  //"psm_list": [
  scanStarted(monitor);

  getExpectedString();
  qDebug() << m_expectedString;
  if(m_expectedString == "psm_list")
    {
      mpa_cborReader->enterContainer();
      while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
        {
          readPsm(monitor);
        }
      mpa_cborReader->leaveContainer();
    }

  mpa_cborReader->leaveContainer();
  qDebug() << "scan end";
  scanFinished(monitor);
  qDebug();
}

void
PsmFileReaderBase::readPsm(pappso::UiMonitorInterface &monitor [[maybe_unused]])
{
  bool is_ok;
  mpa_cborReader->enterContainer();
  getExpectedString();
  //  "proforma": "AQEEM[+15.99491]AQVAK",
  if(m_expectedString == "proforma")
    {
      if(!mpa_cborReader->decodeString(m_currentPsmProforma))
        {
          throw pappso::PappsoException("ERROR: proforma element in psm-scan is not a string");
        }
    }
  else
    {
      throw pappso::PappsoException("ERROR: expecting proforma element in psm-scan");
    }
  //"protein_list" : [
  //{
  //"accession": "GRMZM2G083841_P01",
  //"position": [15,236]
  //}
  //],

  getExpectedString();
  m_currentPsmProteinRefList.clear();
  qDebug() << m_expectedString;

  if(m_expectedString == "protein_list")
    {
      mpa_cborReader->enterContainer(); // array
      while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
        {
          m_currentPsmProteinRefList.push_back(readPsmProteinRef(is_ok));
          if(!is_ok)
            {
              qDebug();
              throw pappso::PappsoException(
                QObject::tr("ERROR: reading protein_list element in psm-scan"));
            }
        }
      // qDebug() << mpa_cborReader->type();
      mpa_cborReader->leaveContainer(); // array
    }
  else
    {
      throw pappso::PappsoException("ERROR: expecting protein_list element in psm-scan");
    }
  // props: {
  m_cborScanPsmProps.clear();

  //"eval": {
  qDebug();
  m_cborScanPsmEval.clear();
  getExpectedString();
  qDebug() << m_expectedString;

  if(m_expectedString == "props")
    {
      is_ok = mpa_cborReader->readCborMap(m_cborScanPsmProps);
      if(!is_ok)
        {
          throw pappso::PappsoException("ERROR: props element in psm-scan is not well formed");
        }
      if(!getExpectedString())
        {
          throw pappso::PappsoException(
            QObject::tr("ERROR: expecting eval element in psm-scan %1").arg(m_currentPsmProforma));
        }
    }
  if(m_expectedString == "eval")
    {
      is_ok = mpa_cborReader->readCborMap(m_cborScanPsmEval);
      if(!is_ok)
        {
          throw pappso::PappsoException("ERROR: eval element in psm-scan is not well formed");
        }
    }
  else

    {
      throw pappso::PappsoException(
        QObject::tr("ERROR: expecting eval element in psm-scan %1 not %2 in %3 %4 %5")
          .arg(m_currentPsmProforma)
          .arg(m_expectedString)
          .arg(__FILE__)
          .arg(__FUNCTION__)
          .arg(__LINE__));
    }


  qDebug() << m_expectedString;


  mpa_cborReader->leaveContainer();
  qDebug();
  psmReady(monitor);
}

void
PsmFileReaderBase::psmReady(pappso::UiMonitorInterface &monitor [[maybe_unused]])
{
  // PSM is ready, do what you want :)
}

void
PsmFileReaderBase::informationsReady(pappso::UiMonitorInterface &monitor [[maybe_unused]])
{
}

void
PsmFileReaderBase::parameterMapReady(pappso::UiMonitorInterface &monitor [[maybe_unused]])
{
}

void
PsmFileReaderBase::logReady(pappso::UiMonitorInterface &monitor [[maybe_unused]])
{
}


void
PsmFileReaderBase::fastaFilesReady(pappso::UiMonitorInterface &monitor [[maybe_unused]])
{
}

void
PsmFileReaderBase::proteinMapReady(pappso::UiMonitorInterface &monitor [[maybe_unused]])
{
}

void
PsmFileReaderBase::sampleStarted(pappso::UiMonitorInterface &monitor [[maybe_unused]])
{
}

void
PsmFileReaderBase::scanStarted(pappso::UiMonitorInterface &monitor [[maybe_unused]])
{
}

void
PsmFileReaderBase::scanFinished(pappso::UiMonitorInterface &monitor [[maybe_unused]])
{
}

void
PsmFileReaderBase::sampleFinished(pappso::UiMonitorInterface &monitor [[maybe_unused]])
{
}

pappso::PeptideSp
PsmFileReaderBase::getCurrentPsmPeptideSp() const
{
  pappso::PeptideSp peptide_sp;
  if(m_currentPsmProforma.isEmpty())
    {
      throw pappso::PappsoException(QObject::tr("ERROR: m_currentPsmProforma is empty"));
    }
  else
    {
      peptide_sp = pappso::PeptideProFormaParser::parseString(m_currentPsmProforma);
    }
  return peptide_sp;
}

pappso::QualifiedMassSpectrumSPtr
PsmFileReaderBase::getCurrentQualifiedMassSpectrumSPtr() const
{
  if(m_currentPeaklistFile.name.isEmpty())
    {
      throw pappso::PappsoException(QObject::tr("ERROR: m_currentPeaklistFile is empty"));
    }
  if(m_cborScanId.isEmpty())
    {
      throw pappso::PappsoException(QObject::tr("ERROR: m_cborScanId is empty"));
    }
  if(m_cborScanPrecursor.isEmpty())
    {
      throw pappso::PappsoException(QObject::tr("ERROR: m_cborScanPrecursor is empty"));
    }

  if(!m_cborScanId.keys().contains("index"))
    {
      throw pappso::PappsoException("There is no scan index");
    }

  if(m_cborScanMs2.isEmpty())
    {
      throw pappso::PappsoException(QObject::tr("ERROR: m_cborScanMs2 is empty"));
    }
  else
    {
      if(!m_cborScanMs2.keys().contains("mz"))
        {
          throw pappso::PappsoException("There is no ms2 mz values");
        }
      if(!m_cborScanMs2.keys().contains("intensity"))
        {
          throw pappso::PappsoException("There is no ms2 intensity values");
        }
    }
  pappso::MsRunId msrun_id(m_currentPeaklistFile.name);
  msrun_id.setSampleName(m_currentSampleName);
  pappso::MsRunIdCstSPtr msrun_id_sp = std::make_shared<const pappso::MsRunId>(msrun_id);
  pappso::MassSpectrumId ms_id(msrun_id_sp);
  ms_id.setSpectrumIndex(m_cborScanId.value("index").toInteger());

  // native_id
  if(m_cborScanId.keys().contains("native_id"))
    {
      ms_id.setNativeId(m_cborScanId.value("native_id").toString());
    }

  std::vector<DataPoint> data_point_vector;
  std::size_t i = 0;
  for(auto cbor_mz_value : m_cborScanMs2.value("mz").toArray())
    {
      data_point_vector.push_back(
        {cbor_mz_value.toDouble(), m_cborScanMs2.value("intensity").toArray().at(i).toDouble()});
      i++;
    }


  MassSpectrum mass_spectrum(data_point_vector);
  pappso::PrecursorIonData precursor_ion_data;

  pappso::QualifiedMassSpectrum qualified_mass_spectrum(ms_id);
  qualified_mass_spectrum.setMassSpectrumSPtr(mass_spectrum.makeMassSpectrumSPtr());
  qualified_mass_spectrum.setMsLevel(2);

  if(m_cborScanPrecursor.keys().contains("z"))
    {
      precursor_ion_data.charge = m_cborScanPrecursor.value("z").toInteger();
    }
  if(m_cborScanPrecursor.keys().contains("mz"))
    {
      precursor_ion_data.mz = m_cborScanPrecursor.value("mz").toDouble();
    }
  if(m_cborScanPrecursor.keys().contains("intensity"))
    {
      precursor_ion_data.intensity = m_cborScanPrecursor.value("intensity").toDouble();
    }
  qualified_mass_spectrum.appendPrecursorIonData(precursor_ion_data);
  if(m_cborScanMs2.keys().contains("rt"))
    {
      qualified_mass_spectrum.setRtInSeconds(m_cborScanMs2.value("rt").toDouble());
    }


  return qualified_mass_spectrum.makeQualifiedMassSpectrumSPtr();
}

void
PsmFileReaderBase::sampleListStarted(pappso::UiMonitorInterface &monitor [[maybe_unused]])
{
}

void
PsmFileReaderBase::sampleListFinished(pappso::UiMonitorInterface &monitor [[maybe_unused]])
{
}


double
PsmFileReaderBase::getPrecursorMass(double mz_prec, uint charge) const
{
  // compute precursor mass given the charge state
  mz_prec = mz_prec * (double)charge;
  mz_prec -= (MHPLUS * (double)charge);
  return mz_prec;
}


} // namespace psm
} // namespace cbor
} // namespace pappso
