/*
 * Written by Bastien Chevreux (BaCh)
 *
 * Copyright (C) 1997-2000 by the German Cancer Research Center (Deutsches
 *   Krebsforschungszentrum, DKFZ Heidelberg) and Bastien Chevreux
 * Copyright (C) 2000 and later by Bastien Chevreux
 *
 * All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the
 * Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 *
 */


#include "mira/contig.H"
#include "util/progressindic.H"
#include "util/misc.H"

#ifdef MIRA_HAS_EDIT
#include "examine/scf_look.H"
#endif


using namespace std;


//#define CEBUGFLAG
#ifdef CEBUGFLAG
#define CEBUG(bla)   {cout << bla; cout.flush();}
#define CEBUGF(bla)  {cout << bla; cout.flush();}
#define CEBUGF2(bla)  {cout << bla; cout.flush();}
#else
#define CEBUG(bla)
#define CEBUGF(bla)
#define CEBUGF2(bla)
#endif





/*************************************************************************
 *
 * Gives back the "best" range which should be free of misassemblies
 * needs SRMc marked
 *
 * 1. collect all ranges free of SRMc
 * 2. ...
 *
 *************************************************************************/

std::pair<int32,int32> Contig::findBestNonMisassembledRange()
{
  FUNCSTART("void Contig::findBestNonMisassembledRange()");

  pair<int32,int32> ret(-1,-1);

  // we might have been called with a contig which has been discarded (empty)
  //  -> bail out before posset(CON_counts.size(),0) creates a segfault
  // or has no consensus tags set, then we can return anyway
  if(CON_counts.empty()
     || CON_consensus_tags.empty()){
    return ret;
  }

  cout << "Looking for best SRM free range in contig " << endl;

  // just in case we have data not generated by MIRA with SRMc tags of length >1
  //  or with overlapping SRMc, do it the the complicated way
  vector<uint8> posset(CON_counts.size(),0);
  for(auto & cte : CON_consensus_tags){
    BUGIFTHROW(cte.from<0 || cte.to>=CON_counts.size(),"Ooops for a tag?\n" << cte << "\ncte.from " << cte.from << "<0 || cte.to " << cte.to << " >=CON_counts.size() " << CON_counts.size());
    if(cte.identifier==CON_tagentry_idSRMc){
      for(auto pos=cte.from; pos<=cte.to; ++pos){
	posset[pos]=1;
      }
    }
  }

  vector<int32> ranges;
  uint32 pos=0;
  while(posset[pos] && pos<posset.size()) ++pos;
  while(pos < posset.size()){
    ranges.push_back(pos);
    for(;posset[pos]==0 && pos<posset.size(); ++pos) {}
    ranges.push_back(pos);
    // if we're at end of contig, then the range (being [ [) must end one after
    if(pos==posset.size()) ++ranges.back();

    for(;posset[pos] && pos<posset.size(); ++pos) {}
  }

  if(ranges.size()>2){
    int32 largest=0;
    uint32 lindex=0;
    uint32 rindex=0;
    for(uint32 ri=0; ri<ranges.size()-1; ++ri){
      cout << "Range " << ranges[ri] << '\t' << ranges[ri+1] << endl;
      if(ranges[ri+1]-ranges[ri] > largest){
	lindex=ri;
	rindex=ri+1;
	largest=ranges[rindex]-ranges[lindex];
      }
    }
    cout << "Largest OK: " << ranges[lindex] << '\t' << ranges[rindex] << '\t' << getContigName() << endl;
    ret=make_pair(ranges[lindex],ranges[rindex]);
    //trimContigToRange(ranges[lindex],ranges[rindex]);
  }else{
    cout << "Full contig OK.\n";
  }
  return ret;
}



/*************************************************************************
 *
 * Given a range, trims down the contig so that just reads in the range
 *  stay
 *
 * from[ to[
 *
 *************************************************************************/

//#define CEBUG(bla)   {cout << bla; cout.flush();}
void Contig::trimContigToRange(uint32 frompos, uint32 topos)
{
  FUNCSTART("void Contig::trimContigToRange(uint32 frompos, uint32 topos)");
  try{
    CEBUG("trimContigToRange clen: " << getContigLength() << endl);
    if(tctr_helper1(frompos,topos,true)){
      tctr_helper1(frompos,topos,false);

      // the above may resulted in holes in the contig (no coverage), this can
      //  happen when reads are of very different sizes
      //
      //  ----------------------------------------------------
      //       -----------------------------------------------
      //            -----------
      //                                 ---------------------
      //         ^
      //         cut here
      // leads to:  xxxxxxxxxxx0000000000xxxxxxxxxxxxxxxxxxxxx
      //
      // Note that the 0 coverage area might have a 0 length when reads
      //  are adjacent but not overlapping or overlapped by other reads
      //            -----------
      //                       ---------------------
      // Both reads are removed then

      // remove coverage-1 reads
      tctr_helper2();

      // we still can have holes
      //    -----------          ------------
      //        ------------       --------
      tctr_helper3();

      deleteStarOnlyColumns();
    }
  }
  catch(Notify n) {
    n.handleError(THISFUNC);
  }
}
//#define CEBUG(bla)

/*************************************************************************
 *
 * Helper, count or remove contigs outside of range
 *
 * from[ to[
 *
 * but keep at least one read
 *
 *************************************************************************/

//#define CEBUG(bla)   {cout << bla; cout.flush();}
bool Contig::tctr_helper1(uint32 frompos, uint32 topos, bool simulateonly)
{
  FUNCSTART("bool Contig::tctr_helper1(uint32 frompos, uint32 topos, bool simulateonly)");

  CEBUG("tctr_helper1\n");
  if(!simulateonly)  CEBUG("Cutting down " << getContigName() << " down to " << frompos << " " << topos << endl);

  size_t numremoved=0;
  bool readskept=false;
  auto pcrI=CON_reads.begin();
  while(pcrI!=CON_reads.end()){
    if(pcrI.getReadStartOffset() < frompos
       || pcrI.getReadStartOffset() + pcrI->getLenClippedSeq() >= topos){
      ++numremoved;
      if(!simulateonly){
	if(CON_reads.size()>1){
	  // update of internal contig statistics
	  --CON_readsperstrain[pcrI->getStrainID()];
	  --CON_readsperreadgroup[pcrI->getReadGroupID().getLibId()];
	  CEBUG("Removing " << pcrI->getName() << endl);
	  pcrI=CON_reads.removeRead(pcrI);
	}else{
	  --numremoved;
	  ++pcrI;
	}
      }else{
	++pcrI;
      }
    }else{
      readskept=true;
      ++pcrI;
    }
  }

  BUGIFTHROW(CON_reads.begin() == CON_reads.end(),"Oooops, deleted everything?");

  if(!simulateonly && numremoved){
    definalise();
    rebuildConCounts();
    chompFront(-1);
    chompBack(-1);
  }

  return readskept;
}
//#define CEBUG(bla)


/*************************************************************************
 *
 * Removes all reads covering positions of the contig where coverage == 1
 *
 * but keep at least one read
 *
 *************************************************************************/

//#define CEBUG(bla)   {cout << bla; cout.flush();}
void Contig::tctr_helper2()
{
  CEBUG("tctr_helper2\n");

  if(CON_reads.size()<=1) return;

  uint32 numremoved=0;

  auto pcrI=CON_reads.begin();
  while(pcrI!=CON_reads.end() && CON_reads.size()>1){
    auto ccI=CON_counts.begin();
    ccI.advance(pcrI.getReadStartOffset());
    bool greaterone=false;
    for(auto x=0; x<pcrI->getLenClippedSeq(); ++x, ++ccI){
      CEBUG(pcrI->getName() << "\t" << pcrI.getReadStartOffset()+x << "\t" << *ccI << endl);
      if(ccI->total_cov>1) {
	greaterone=true;
	break;
      }
    }
    if(!greaterone){
      ++numremoved;
      // update of internal contig statistics
      --CON_readsperstrain[pcrI->getStrainID()];
      --CON_readsperreadgroup[pcrI->getReadGroupID().getLibId()];
      CEBUG("Removing 1cov " << pcrI->getName() << endl);
      pcrI=CON_reads.removeRead(pcrI);
    }else{
      ++pcrI;
    }
  }
  CEBUG("Done removing\n");
  if(numremoved){
    CEBUG("#1\n");
    definalise();
    CEBUG("#2\n");
    rebuildConCounts();
    CEBUG("#3\n");
    chompFront(-1);
    CEBUG("#4\n");
    chompBack(-1);
  }
}
//#define CEBUG(bla)


/*************************************************************************
 *
 * Cut the contig at the first coverage hole
 *
 *************************************************************************/

//#define CEBUG(bla)   {cout << bla; cout.flush();}
void Contig::tctr_helper3()
{
  CEBUG("tctr_helper3\n");

  if(CON_reads.size()<=1) return;

  auto ccI=CON_counts.begin();
  for(; ccI!=CON_counts.end(); ++ccI){
    if(ccI->total_cov==0) break;
  }
  if(ccI==CON_counts.end()) return;

  uint32 delpos=ccI-CON_counts.begin();
  uint32 numremoved=0;
  auto pcrI=CON_reads.begin();
  while(pcrI!=CON_reads.end()){
    if(pcrI.getReadStartOffset()>=delpos){
      ++numremoved;
      // update of internal contig statistics
      --CON_readsperstrain[pcrI->getStrainID()];
      --CON_readsperreadgroup[pcrI->getReadGroupID().getLibId()];
      CEBUG("Removing hole " << pcrI->getName() << endl);
      pcrI=CON_reads.removeRead(pcrI);
    }else{
      ++pcrI;
    }
  }
  if(numremoved){
    definalise();
    rebuildConCounts();
    chompFront(-1);
    chompBack(-1);
  }
}
//#define CEBUG(bla)



/*************************************************************************
 *
 * Checks a (newly entered at call time, but might be used for other means)
 *  read whether it has mismatches occuring at SRMB or WRMB position
 *
 * Gives back 'true' if a certain level of danger is attained
 *
 * Dangerlevel is currently 4. Each SRMB/SRMB or SRMB/WRMB mismatch counts 4,
 *  WRMB/WRMB mismatches count for 1 if both basequalities are below 35,
 *  or 2 if one of the qualities is > 35. Or 1 if WRMB/WRMB and one of the
 *  bases is a gap.
 *
 * TODO: 07.09.2012
 *  Gahhhhh, extremely inefficient for extreme contigs (RNASeq, metagenome
 *  with outlier coverage) where contig build times of >= 30 minutes can
 *  be >= 80% due to this routine.
 *
 *************************************************************************/

//#define CEBUG(bla)   {cout << bla; cout.flush();}
//#define CEBUGF(bla)  {cout << bla; cout.flush();}
//#define CEBUGFLAG

//#define CEBUG(bla)   {if(dodebug) {cout << bla; cout.flush();}}

bool Contig::analyseRMBZones(PlacedContigReads::const_iterator pcrI)
{
  FUNCSTART("bool Contig::analyseRMBZones(PlacedContigReads::const_iterator pcrI)");

  //bool dodebug=pcrI->getName()=="...";

  CEBUG("Analysing pcrI: " << pcrI->getName()<<endl);

  //CEBUG(*this);

  if(pcrI.getORPID()==-1) return false;

  BOUNDCHECK(pcrI.getReadStartOffset(), 0, static_cast<uint32>(CON_counts.size()));

  contig_parameters const & con_params= (*CON_miraparams)[0].getContigParams();

  // ok, we need to do this only if the read has baselock tags
  // *sigh* effect on time used to build large contigs is NIL!
  bool haslocktag=false;
  if(pcrI->getNumOfTags()){
    for(auto & blie : CON_baselock_ids){
      CEBUG("Checking " << multitag_t::getIdentifierStr(blie) << '\n');
      if(pcrI->hasTag(blie)) {
	haslocktag=true;
	break;
      }
    }
  }
  if(!haslocktag) return false;

  bool isdangerous=false;
  int32 dangerlevel=0;

  auto ccI=CON_counts.begin();
  advance(ccI, pcrI.getReadStartOffset());
  int32 actcontigpos=pcrI.getReadStartOffset();

  CEBUG("Need check\n");

  // taken out of loop to save constructor time
  vector<PlacedContigReads::const_iterator> overlappcrIs;
  auto chklen=pcrI->getLenClippedSeq();
  for(uint32 readpos=0; readpos<chklen; ++actcontigpos, ++ccI, ++readpos){
    CEBUG("acp: " << actcontigpos << "\trp: " << readpos << "\tisdangerous " << isdangerous << endl);
    CEBUG(*ccI << endl);
    if(isdangerous) break;
    if(ccI->baselock > 0 || ccI->snplock > 0){
      CEBUG("Base locked at pos " << readpos << endl);
      uint32 numbaseset=0;
      if(ccI->A > ccI->N) numbaseset++;
      if(ccI->C > ccI->N) numbaseset++;
      if(ccI->G > ccI->N) numbaseset++;
      if(ccI->T > ccI->N) numbaseset++;
      if(ccI->star > 0) numbaseset++;
      CEBUG(numbaseset << " different bases set\n");
      if(numbaseset>1){
	int32 actcontigpos=pcrI.getReadStartOffset()+readpos;

	// 0 fuer nichts, 1 fuer weak,
	//  3 fuer stark.
	// Nicht gleich bei einem weak schmeissen (konfigurierbar)
	int32 ricbaselock;
	int32 ricsnplock;
	int32 ricpos=pcrI.contigPos2UnclippedReadPos(actcontigpos);
	if(pcrI.getReadDirection()>0){
	  checkForLockedBase(ricpos, pcrI, ricbaselock, ricsnplock);
	}else{
	  checkForLockedBaseComplement(ricpos, pcrI, ricbaselock, ricsnplock);
	}

	char ricbase=toupper(pcrI.getBase(actcontigpos));
	base_quality_t ricqual=pcrI.getQuality(actcontigpos);


	CEBUG("Ricbaselock: " << ricbaselock << endl);
	CEBUG("Ricsnplock: " << ricsnplock << endl);
	CEBUG("ricbase: " << ricbase << endl);

	// no need to check further if base is not locked
	//  == in dubio pro reo
	// -> markRepeats() will mark this newly added sequence
	//  if it is wrong
	if(ricbaselock==0 && ricsnplock==0) continue;

	// change 20.06.05: gap bases are now fully counted
	// we don't care if base is a N or X (in dubio pro reo)
	if(ricbase=='N' || ricbase=='X') continue;

	// if it is a WRMB (weak repeat), look at the qual: if it is
	//  >= 35, increase the danger up a notch
	if(ricbaselock==1 && ricqual >= 35){
	  ricbaselock++;
	}

	overlappcrIs.clear();
	getPCRIteratorsAtContigPosition(overlappcrIs, actcontigpos);

	int32 localdanger=0;
	//for(uint32 actoid=0; actoid<overlappcrIs.size(); actoid++){
	for(auto & opcrIe : overlappcrIs){
	  // BaCh 07.09.12
	  // quick break for contigs with huge coverage
	  if(localdanger>=4) break;

	  //contigread_t & actread=*opcrIe;
	  if(opcrIe.getORPID()==pcrI.getORPID()) continue;
	  if(opcrIe.getORPID()==-1) continue;

	  // ar == actread == *opcrIe

	  char arbase=toupper(opcrIe.getBase(actcontigpos));
	  CEBUG("arbase in " << opcrIe->getName()<<": " << arbase << endl);

	  // change 20.06.05: gap bases are now fully counted
	  // we don't care if base is a N or X (in dubio pro reo)
	  if(arbase=='N'|| arbase=='X') continue;

	  // bases should differ, really :-)
	  if(ricbase==arbase) continue;

	  int32 arbaselock;
	  int32 arsnplock;
	  int32 arpos=opcrIe.contigPos2UnclippedReadPos(actcontigpos);
	  if(opcrIe.getReadDirection()>0){
	    checkForLockedBase(arpos, opcrIe, arbaselock, arsnplock);
	  }else{
	    checkForLockedBaseComplement(arpos, opcrIe, arbaselock, arsnplock);
	  }

	  CEBUG("arbaselock: " << arbaselock << endl);
	  CEBUG("arsnplock: " << arsnplock << endl);
	  // no need to check further if base is not locked
	  //  == in dubio pro reo
	  // -> markRepeats() will mark this newly added sequence
	  //  if it is wrong
	  if(arbaselock==0 && arsnplock==0) continue;

	  base_quality_t arqual=opcrIe.getQuality(actcontigpos);

	  // if it is a WRMB (weak repeat), look at the qual: if it is
	  //  >= 35, inrease the danger up a notch
	  if(arbaselock==1 && arqual >= 35){
	    arbaselock++;
	  }

	  // adjust the danger levels back a bit if both RMBs are weak
	  //  and one of the bases a gap
	  if(ricbaselock==1 && arbaselock==1
	     && (ricbase=='*' || arbase=='*')){
	    ricbaselock=1;
	    arbaselock=1;
	  }

	  CEBUG("Performing checks");
	  // check for RMB danger in
	  if(arbaselock>0){
	    // here, we must differ between (ric)RMB/(ar)RMB
	    //  and (ric)SIO/(ar)RMB
	    CEBUG(" RMB");
	    if(ricbaselock>0){
	      // RMB/RMB case
	      // do the bases differ and are they recognised as locked
	      /// (RMB etc.)?  if yes -> danger
	      // if no: in dubio pro reo, the non-locked have not been recognised as dangerous
	      int32 maxlock= max(ricbaselock, arbaselock);
	      if(maxlock > localdanger) localdanger=maxlock;
	    }else{
	      // SIO/RMB case, here we must check that the strains are
	      //  the same for both before saying it's dangerous
	      if(pcrI->getStrainID() == opcrIe->getStrainID()){
		int32 maxlock= max(ricbaselock, arbaselock);
		if(maxlock > localdanger) localdanger=maxlock;
	      }
	    }
	  }

	  // check for SNP danger
	  if(arsnplock>0){
	    // TODO: also differ RMB/RMB and RMB/SIO ?
	    CEBUG(" SNP");
	    // here we must first look at the strain IDs, only if they are
	    //  the same we must react
	    if(pcrI->getStrainID() == opcrIe->getStrainID()){
	      // so, we have a SIOr tag in reads of the same strain, if
	      //  the bases differ -> treat it as RMB!
	      int32 maxlock= max(ricsnplock, arsnplock);
	      if(maxlock > localdanger) localdanger=maxlock;
	    }
	  }
	  CEBUG(".\n");
	}

	CEBUG("localdanger: " << localdanger << endl);

	dangerlevel+=localdanger;
	if(dangerlevel>=4) isdangerous=true;
	CEBUG("dangerlevel now: " << dangerlevel << endl);
      }
    }
  }

  CEBUG("isdangerous: " << isdangerous << endl);

  FUNCEND();

  return isdangerous;
}
//#define CEBUG(bla)
//#define CEBUGF(bla)





/*************************************************************************
 *
 * Checks if base at readpos in UNCLIPPED read is covered by a
 * lockbase-tag
 * Return 4 for a SRMB tag and 1 for a WRMB, 0 for nothing
 * Also return 4 for SIOr tag if we're not in assume_snp mode
 *
 *************************************************************************/
void Contig::checkForLockedBase(const uint32 readpos, PlacedContigReads::const_iterator pcrI, int32 & baselock, int32 & snplock)
{
  FUNCSTART("void Contig::checkForLockedBase(const uint32 readpos, PlacedContigReads::const_iterator pcrI, int32 & baselock, int32 & snplock)");

  CEBUG("Searching for lock tag in " << pcrI->getName() << " at pos " << readpos<< endl);
  //Read::setCoutType(Read::AS_TEXTCLIPS);
  CEBUG(*pcrI);

  // checks all tags until it found PRMB (because a WRMB might be shadowed by a PRMB)
  // TODO: maybe make more "intelligent" by sorting all tags prioro to assembly?
  //  maybe dangerous and error prone for little gain

  BUGIFTHROW(readpos >= pcrI->getLenSeq(), "readpos " << readpos << " >= " << pcrI->getLenSeq() << " not in read " << pcrI->getName() << " ???");

  baselock=0;
  snplock=0;

  //for(uint32 i=0; i<pcrI->getNumOfTags(); i++){
  for(auto & acttag : pcrI->getTags()){
    if(readpos >= acttag.from
       && readpos <= acttag.to){
      CEBUG("Pos matches, must check tag " << multitag_t::getIdentifierStr(acttag.identifier) << endl);
      for(auto & blie : CON_baselock_ids){
	CEBUG("Checking " << multitag_t::getIdentifierStr(blie) << '\n');
	if(acttag.identifier==blie){
	  baselock=1;
	  if(acttag.identifier==Read::REA_tagentry_idSRMr
	    || acttag.identifier==Read::REA_tagentry_idCRMr) {
	    baselock=4;
	    FUNCEND();
	    return;
	  }
	}
      }
      if(!(*CON_miraparams)[0].getContigParams().con_assume_snp_insteadof_rmb){
	for(auto & slie : CON_snplock_ids){
	  if(acttag.identifier==slie){
	    snplock=4;
	    FUNCEND();
	    return;
	  }
	}
      }
    }
  }

  CEBUG("Will return baselock " << baselock << " snplock " << snplock << '\n');

  FUNCEND();
  return;
}


/*************************************************************************
 *
 * Checks if base at readpos in UNCLIPPED complement read is covered by a
 * lockbase-tag
 *
 *************************************************************************/
void Contig::checkForLockedBaseComplement(const uint32 readpos, PlacedContigReads::const_iterator pcrI, int32 & baselock, int32 & snplock)
{
  return checkForLockedBase(pcrI->getLenSeq()-1-readpos,
			    pcrI,
			    baselock,
			    snplock);
}




/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

////++++////bool Contig::analyseDangerZones(const contigread_t & ric)
////++++////{
////++++////  FUNCSTART("bool Contig::analyseDangerZones(const contigread_t & ric)");
////++++////
////++++////  //  cout << "dandan";
////++++////
////++++////  contig_parameters const & con_params= (*CON_miraparams)[0].getContigParams();
////++++////
////++++////  bool isdangerous=false;
////++++////
////++++////  uint32 analysemode=con_params.con_danger_analyse_mode;
////++++////
////++++////  // if analyse mode = NONE then return
////++++////  if(analysemode==0) {
////++++////    return isdangerous;
////++++////  }
////++++////
////++++////  // if mode = Signal but the signal is not available, fall back to TEXTmode
////++++////  if(analysemode==2 && (const_cast<contigread_t &>(ric)).read.hasSCFData()==false) analysemode=1;
////++++////
////++++////  // if mode = Signal but the EDIT lib is not available, fall back to TEXTmode
////++++////#ifndef MIRA_HAS_EDIT
////++++////  analysemode=1;
////++++////#endif
////++++////
////++++////  uint32 checklen=0;
////++++////  uint32 totalchecklen=0;
////++++////  uint32 errorcount=0;
////++++////
////++++////  for(uint32 i=0; i<ric.read.getNumOfTags(); i++){
////++++////    multitag_t acttag=ric.read.getTag(i);
////++++////    //CEBUG("Tag #" << i << "\tfrom: " << acttag.from << "\tto: " << acttag.to << "\t" << acttag.identifier[0] << acttag.identifier[1] << acttag.identifier[2] << acttag.identifier[3] << endl);
////++++////    for(uint32 j=0; j<CON_danger_zones_ids.size(); j++){
////++++////      if(acttag.identifier==CON_danger_zones_ids[j]){
////++++////
////++++////#ifdef MIRA_HAS_EDIT
////++++////	// must make once this as thomas buffer might contain reads that do not match mine
////++++////	if(totalchecklen==0) ScfBuffer::discard();
////++++////#endif
////++++////
////++++////	// check for danger
////++++////	int32 tleftbound;
////++++////	int32 trightbound;
////++++////	int32 contigpos;
////++++////	vector<char>::const_iterator rI;
////++++////	vector<char>::const_iterator rIb;
////++++////
////++++////	// TODO: alles ungetestet!
////++++////	if(ric.direction>0){
////++++////	  tleftbound=acttag.from-ric.read.getLeftClipoff();
////++++////	  trightbound=acttag.to-ric.read.getLeftClipoff();
////++++////	  rI=ric.read.getClippedSeqIterator();
////++++////	}else{
////++++////	  tleftbound=ric.read.getRightClipoff()-acttag.to-1;
////++++////	  trightbound=ric.read.getRightClipoff()-acttag.from-1;
////++++////
////++++//////	    int32 tmpoffset=ric.read.getLenSeq()-ric.read.getRightClipoff();
////++++//////	    tleftbound=ric.read.getLenSeq()-acttag.to-tmpoffset;
////++++//////	    trightbound=ric.read.getLenSeq()-acttag.from-tmpoffset;
////++++////	  rI=ric.read.getClippedComplementSeqIterator();
////++++////	}
////++++////	rIb=rI;
////++++////	if(tleftbound<0) tleftbound=0;
////++++////	if(tleftbound>static_cast<int32>(ric.read.getLenClippedSeq())) tleftbound=ric.read.getLenClippedSeq();
////++++////	if(trightbound<0) trightbound=0;
////++++////	if(trightbound>static_cast<int32>(ric.read.getLenClippedSeq())) trightbound=ric.read.getLenClippedSeq();
////++++////
////++++////	contigpos=ric.offset+tleftbound;
////++++////	// sparen, wird durch die 4 ifs oben gemach:
////++++////	// BOUNDCHECK(tleftbound, 0, ric.read.getLenClippedSeq()+1);
////++++////	advance(rI, tleftbound);
////++++////
////++++////	checklen=trightbound-tleftbound;
////++++////	totalchecklen+=checklen;
////++++////
////++++////	for(int32 k=tleftbound; k<trightbound; k++,contigpos++, rI++){
////++++////#if 0
////++++////	  cout << CON_counts[contigpos].A;
////++++////	  cout << "\t" << CON_counts[contigpos].C;
////++++////	  cout << "\t" << CON_counts[contigpos].G;
////++++////	  cout << "\t" << CON_counts[contigpos].T;
////++++////	  cout << "\t:" << *rI << endl;
////++++////#endif
////++++////
////++++////	  if(contigpos>=static_cast<int32>(CON_counts.size())){
////++++////	    MIRANOTIFY(Notify::INTERNAL, "contigpos>=CON_counts.size()?");
////++++////	  }
////++++////
////++++////	  uint16 maximum= max(CON_counts[contigpos].A, max(CON_counts[contigpos].C, max(CON_counts[contigpos].G, CON_counts[contigpos].T)));
////++++////	  uint8 counts=0;
////++++////	  char base=0;
////++++////	  //CEBUG(CON_counts[i].A << "\t" << CON_counts[i].C << "\t" << CON_counts[i].G << "\t" << CON_counts[i].T << "\n");
////++++////	  if(CON_counts[contigpos].A==maximum){
////++++////	    counts++;
////++++////	    base='A';
////++++////	  }
////++++////	  if(CON_counts[contigpos].C==maximum){
////++++////	    counts++;
////++++////	    base='C';
////++++////	  }
////++++////	  if(CON_counts[contigpos].G==maximum){
////++++////	    counts++;
////++++////	    base='G';
////++++////	  }
////++++////	  if(CON_counts[contigpos].T==maximum){
////++++////	    counts++;
////++++////	    base='T';
////++++////	  }
////++++////	  if(counts>1) base='N';
////++++////
////++++//////#ifdef CEBUGFLAG
////++++//////	    if(base==0){
////++++//////	      cout << "Gnargl!" << endl;
////++++//////	      cout << *this;
////++++//////	      cout << "tleftbound: " << tleftbound<< endl;
////++++//////	      cout << "trightbound: " << trightbound<< endl;
////++++//////	      cout << "contigpos: " << contigpos<< endl;
////++++//////	      cout << "*rI: " << *rI << (uint16) *rI << endl;
////++++//////	    }
////++++//////#endif
////++++////
////++++////	  // Now, look if there is a problem
////++++////	  if(base!=toupper(*rI)){
////++++////	    CEBUG("bah");
////++++////	    // uh, yes, there is
////++++////
////++++////	    //	  Kaputt: was ist, wenn keine Datenfiles da?
////++++////
////++++////	    if(analysemode==1){
////++++////	      // Text mode analyse routines
////++++////	      if(*rI=='*'){
////++++////		if(CON_counts[contigpos].star<=1){
////++++////		  CEBUG("MMMStars: " << CON_counts[contigpos].star << endl);
////++++////		  CEBUG("MMMCover: " << CON_counts[contigpos].total_cov << endl);
////++++////		  // treat it as error only if there isn't another star
////++++////		  //  already in the contig.
////++++////		  errorcount++;
////++++////		}
////++++////		// This was wrong: N in consensus means probably error!
////++++////		//	      }else if(base!='N' && *rI!='N'){
////++++////	      }else if(!(toupper(*rI)=='N' || toupper(*rI)=='X')){
////++++////		errorcount++;
////++++////	      }
////++++////	    }else{
////++++////	      // Signal mode analyse routines.
////++++////	      int32 dt=666;
////++++////	      CEBUG("SSSignalanalysis: ");
////++++////	      if(base=='N' || base=='X'){
////++++////		if(CON_counts[contigpos].total_cov==2){
////++++////		  char alternatebase='*';
////++++////		  switch(toupper(*rI)){
////++++////		  case 'A':{
////++++////		    if(CON_counts[contigpos].C!=0) alternatebase='C';
////++++////		    if(CON_counts[contigpos].G!=0) alternatebase='G';
////++++////		    if(CON_counts[contigpos].T!=0) alternatebase='T';
////++++////		    break;
////++++////		  }
////++++////		  case 'C':
////++++////		    if(CON_counts[contigpos].A!=0) alternatebase='A';
////++++////		    if(CON_counts[contigpos].G!=0) alternatebase='G';
////++++////		    if(CON_counts[contigpos].T!=0) alternatebase='T';
////++++////		    break;
////++++////		  case 'G':
////++++////		    if(CON_counts[contigpos].A!=0) alternatebase='A';
////++++////		    if(CON_counts[contigpos].C!=0) alternatebase='C';
////++++////		    if(CON_counts[contigpos].T!=0) alternatebase='T';
////++++////		    break;
////++++////		  case 'T':
////++++////		    if(CON_counts[contigpos].A!=0) alternatebase='A';
////++++////		    if(CON_counts[contigpos].C!=0) alternatebase='C';
////++++////		    if(CON_counts[contigpos].G!=0) alternatebase='G';
////++++////		    break;
////++++////		  case 'N':
////++++////		  case 'X':
////++++////		  case '*':
////++++////		    if(CON_counts[contigpos].A!=0) alternatebase='A';
////++++////		    if(CON_counts[contigpos].C!=0) alternatebase='C';
////++++////		    if(CON_counts[contigpos].G!=0) alternatebase='G';
////++++////		    if(CON_counts[contigpos].T!=0) alternatebase='T';
////++++////		    break;
////++++////		  default : {}
////++++////		  }
////++++////		  // if alternatebase still == '*' then we have two reads with
////++++////		  //  a '*' each, do nothing, no error
////++++////		  if(alternatebase!='*'){
////++++////		    // N in cons with 2 reads: check if base could be alternate base:
////++++////#ifdef MIRA_HAS_EDIT
////++++////		    dt=deepThought(ric, rI-rIb, alternatebase);
////++++////#else
////++++////		    dt=-1;
////++++////#endif
////++++////		    if(dt<0){
////++++////		      errorcount++;
////++++////		    }
////++++////		  }
////++++////		}else{
////++++////		  // N in consensus with more than 2 reads means probably error!
////++++////		  errorcount++;
////++++////		}
////++++////	      }else if(!(toupper(*rI)=='N' || toupper(*rI)=='X')){
////++++////#ifdef MIRA_HAS_EDIT
////++++////		dt=deepThought(ric, rI-rIb, base);
////++++////#else
////++++////		dt=-1;
////++++////#endif
////++++////		if(dt<0){
////++++////		  errorcount++;
////++++////		}
////++++////	      }
////++++////#ifdef CEBUGFLAG
////++++////	      if(dt==666){
////++++////		cout << "TTSignal: not analysed" << endl;
////++++////	      }else if(dt <0){
////++++////		cout << "TTSignal: rejected" << endl;
////++++////	      }else if(dt==0){
////++++////		cout << "TTNo SCF Signal" << endl;
////++++////	      }else{
////++++////		cout << "TTSignal: accepted" << endl;
////++++////	      }
////++++////#endif
////++++////	    }
////++++////	  }
////++++////	}
////++++////      }
////++++////    }
////++++////  }
////++++////
////++++////
////++++////  // if more than x% errors in length of all danger zones -> reject it
////++++////  if(totalchecklen>0
////++++////     && static_cast<int32>((100*errorcount)/totalchecklen)>con_params.con_danger_max_error_rate){
////++++////    //  if(errorcount>0) {
////++++////    CEBUG("MMreject: " << errorcount << endl);
////++++////    isdangerous=true;
////++++////
////++++////
////++++////    CON_last_dangerous_overlaps.clear();
////++++////
////++++////    int32 i=0;
////++++////    vector<contigread_t>::const_iterator I= CON_reads.begin();
////++++////
////++++////    int32 rstart=ric.offset;
////++++////    int32 rend=rstart+ric.read.getLenClippedSeq();
////++++////
////++++////    while(I!=CON_reads.end()){
////++++////      if( ((I->offset >= rstart) && (I->offset < rend))
////++++////	  || ((I->offset+I->read.getLenClippedSeq() >= rstart)
////++++////	      && (I->offset+I->read.getLenClippedSeq() < rend))){
////++++////	CON_last_dangerous_overlaps.push_back(i);
////++++////      }
////++++////      I++;i++;
////++++////    }
////++++////  }
////++++////
////++++////  FUNCEND();
////++++////
////++++////  //  debugOff();
////++++////
////++++////  return isdangerous;
////++++////}


/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

//bool Contig::analyseAllZones(const contigread_t & ric)
//{
//  FUNCSTART("bool Contig::analyseAllZones(const contigread_t & ric)");
//
//  cout << "alai";
//
//  // must make this as thomas' buffer might contain reads that do not match mine
//#ifdef MIRA_HAS_EDIT
//  ScfBuffer::discard();
//#endif
//
//  contig_parameters const & con_params= (*CON_miraparams)[0].getContigParams();
//
//  bool isdangerous=false;
//
//  uint32 analysemode=con_params.con_danger_analyse_mode;
//
//  // if analyse mode = NONE then return
//  if(analysemode==0) return isdangerous;
//
//  // if mode = Signal but the signal is not available, fall back to TEXTmode
//  if(analysemode==2 && (const_cast<contigread_t &>(ric)).read.hasSCFData()==false) analysemode=1;
//
//  // if mode = Signal but the EDIT lib is not available, fall back to TEXTmode
//#ifndef MIRA_HAS_EDIT
//  analysemode=1;
//#endif
//
//
//  uint32 checklen=0;
//  uint32 totalchecklen=0;
//  uint32 errorcount=0;
//
//  int32 contigpos;
//  vector<char>::const_iterator rI;
//  vector<char>::const_iterator rIb;
//
//  // TODO: alles ungetestet!
//  if(ric.direction>0){
//    rI=ric.read.getClippedSeqIterator();
//  }else{
//    rI=ric.read.getClippedComplementSeqIterator();
//  }
//  rIb=rI;
//
//  contigpos=ric.offset;
//
//  checklen=ric.read.getLenClippedSeq();
//  totalchecklen+=checklen;
//
//  for(uint32 k=0; k<checklen; k++,contigpos++, rI++){
//    if(contigpos>=static_cast<int32>(CON_counts.size())){
//      MIRANOTIFY(Notify::INTERNAL, "contigpos>=CON_counts.size()?");
//    }
//
//    uint16 maximum= max(CON_counts[contigpos].A, max(CON_counts[contigpos].C, max(CON_counts[contigpos].G, CON_counts[contigpos].T)));
//    uint8 counts=0;
//    char base='N';
//    //	  CEBUG(CON_counts[i].A << "\t" << CON_counts[i].C << "\t" << CON_counts[i].G << "\t" << CON_counts[i].T << "\n");
//    if(CON_counts[contigpos].A==maximum){
//      counts++;
//      base='A';
//    }
//    if(CON_counts[contigpos].C==maximum){
//      counts++;
//      base='C';
//    }
//    if(CON_counts[contigpos].G==maximum){
//      counts++;
//      base='G';
//    }
//    if(CON_counts[contigpos].T==maximum){
//      counts++;
//      base='T';
//    }
//
//    // Now, look if there is a problem
//    if(base!=toupper(*rI)){
//      CEBUG("bah");
//      // uh, yes, there is
//
//      //	  Kaputt: was ist, wenn keine Datenfiles da?
//
//      if(analysemode==1){
//	// Text mode analyse routines
//	if(*rI=='*'){
//	  if(CON_counts[contigpos].star<=1){
//	    CEBUG("MMMStars: " << CON_counts[contigpos].star << endl);
//	    CEBUG("MMMCover: " << CON_counts[contigpos].total_cov << endl);
//	    // treat it as error only if there isn't another star
//	    //  already in the contig.
//	    errorcount++;
//	  }
//	  // This was wrong: N in consensus means probably error!
//	  //	      }else if(base!='N' && *rI!='N'){
//	}else if(!(toupper(*rI)=='N' || toupper(*rI)=='X')){
//	  errorcount++;
//	}
//      }else{
//	// Signal mode analyse routines.
//	int32 dt=666;
//	CEBUG("SSSignalanalysis: ");
//	if(base=='N' || base=='X'){
//	  if(CON_counts[contigpos].total_cov==2){
//	    char alternatebase='*';
//	    switch(toupper(*rI)){
//	    case 'A':{
//	      if(CON_counts[contigpos].C!=0) alternatebase='C';
//	      if(CON_counts[contigpos].G!=0) alternatebase='G';
//	      if(CON_counts[contigpos].T!=0) alternatebase='T';
//	      break;
//	    }
//	    case 'C': {
//	      if(CON_counts[contigpos].A!=0) alternatebase='A';
//	      if(CON_counts[contigpos].G!=0) alternatebase='G';
//	      if(CON_counts[contigpos].T!=0) alternatebase='T';
//	      break;
//	    }
//	    case 'G': {
//	      if(CON_counts[contigpos].A!=0) alternatebase='A';
//	      if(CON_counts[contigpos].C!=0) alternatebase='C';
//	      if(CON_counts[contigpos].T!=0) alternatebase='T';
//	      break;
//	    }
//	    case 'T': {
//	      if(CON_counts[contigpos].A!=0) alternatebase='A';
//	      if(CON_counts[contigpos].C!=0) alternatebase='C';
//	      if(CON_counts[contigpos].G!=0) alternatebase='G';
//	      break;
//	    }
//	    case 'N':
//	    case 'X':
//	    case '*': {
//	      if(CON_counts[contigpos].A!=0) alternatebase='A';
//	      if(CON_counts[contigpos].C!=0) alternatebase='C';
//	      if(CON_counts[contigpos].G!=0) alternatebase='G';
//	      if(CON_counts[contigpos].T!=0) alternatebase='T';
//	      break;
//	    }
//	    default : {}
//	    }
//	    // if alternatebase still == '*' then we have two reads with
//	    //  a '*' each, do nothing, no error
//	    if(alternatebase!='*'){
//	      // N in cons with 2 reads: check if base could be alternate base:
//#ifdef MIRA_HAS_EDIT
//	      dt=deepThought(ric, rI-rIb, alternatebase);
//#else
//	      dt=-1;
//#endif
//	      if(dt<0){
//		errorcount++;
//	      }
//	    }
//	  }else{
//	    // N in consensus with more than 2 reads means probably error!
//	    errorcount++;
//	  }
//	}else if(!(toupper(*rI)=='N' || toupper(*rI)=='X')){
//#ifdef MIRA_HAS_EDIT
//	  dt=deepThought(ric, rI-rIb, base);
//#else
//	  dt=-1;
//#endif
//	  if(dt<0){
//	    errorcount++;
//	  }
//	}
//#ifdef CEBUGFLAG
//	if(dt==666){
//	  cout << "TTSignal: not analysed" << endl;
//	}else if(dt <0){
//	  cout << "TTSignal: rejected" << endl;
//	}else if(dt==0){
//	  cout << "TTNo SCF Signal" << endl;
//	}else{
//	  cout << "TTSignal: accepted" << endl;
//	}
//#endif
//      }
//    }
//  }
//
//
//  // if more than x% errors in length of read, reject
//  //TODO: parametrieren
//  if(totalchecklen>0 && (100*errorcount)/totalchecklen>  3  ){
//    //  if(errorcount>0) {
//    CEBUG("MMreject: " << errorcount << endl);
//    isdangerous=true;
//
//
//    CON_last_dangerous_overlaps.clear();
//
//    int32 i=0;
//    vector<contigread_t>::const_iterator I= CON_reads.begin();
//
//    int32 rstart=ric.offset;
//    int32 rend=rstart+ric.read.getLenClippedSeq();
//
//    while(I!=CON_reads.end()){
//      if( ((I->offset >= rstart) && (I->offset < rend))
//	  || ((I->offset+I->read.getLenClippedSeq() >= rstart)
//	      && (I->offset+I->read.getLenClippedSeq() < rend))){
//	CON_last_dangerous_overlaps.push_back(i);
//      }
//      I++;i++;
//    }
//  }
//
//  FUNCEND();
//  return isdangerous;
//}





/*************************************************************************
 *
 *  -1 rejected, 0 SCF not available, 1 accepted
 *
 *
 *************************************************************************/

#ifdef MIRA_HAS_EDIT
int32 Contig::deepThought(const contigread_t &aRead, int offset, char newbase)
{
  FUNCSTART("int32 Contig::deepThought(const contigread_t &aRead, int offset, char newbase)");
#if 1
//  cout << "Dubai\n";
//  cout << "Offset: " << offset << "\n";
//  cout << "Newbase: " << newbase << "\t(" << (uint32) newbase << ")\n";
//  cout << "ric.id: " << aRead.id << "\n";
//  cout << "ric.offset: " << aRead.offset << "\n";
//  cout << "ric.direction: " << aRead.direction << "\n";
//  aRead.read.setCoutType(Read::AS_TEXT);
//  cout << aRead.read;
//  aRead.read.setCoutType(Read::AS_CAF);

  BOUNDCHECK(offset, 0, static_cast<int>(aRead.read.getLenClippedSeq()));

  if(!aRead.read.usesAdjustments()) return 0;

  int32 returnit=0;

  vector<char>::const_iterator seqIt;
  int dbpos;
  int question;
  SCF_look *theScfLook;

  //  ofstream devNull;
  //  devNull.open("/dev/null", ios::out);

  try{
    theScfLook = ScfBuffer::bufferRead(aRead.read, aRead.direction);
    char si1='N', si2='N', si3='N';
    if (aRead.direction > 0) {
      seqIt = aRead.read.getClippedSeqIterator() + offset;
      dbpos = offset + aRead.read.getLeftClipoff();
      if(dbpos>0) si1=*(seqIt-1);
      if(dbpos < aRead.read.getLenSeq()-1) si3=*(seqIt+1);
    } else {
      seqIt = aRead.read.getClippedComplementSeqIterator() + offset;
      dbpos = offset + aRead.read.getLenSeq() - aRead.read.getRightClipoff();
      if(dbpos>0) si3=*(seqIt+1);
      if(dbpos < aRead.read.getLenSeq()-1) si1=*(seqIt-1);
    }
    si2=*seqIt;

    //    cout << "seqIt(s): " << si1 << " " <<  si2 << " " <<  si3 << "\n";

    question = findFaultClass( si1, si2, si3, newbase);

    const vector <int32> & adjust=aRead.read.getAdjustments();
    int32 scfpos;
    if(aRead.direction<0){
      scfpos=adjust[aRead.read.getLenSeq()-1-dbpos];
    }else{
      scfpos=adjust[dbpos];
    }
    //  theProblem->setProblem(question, dbpos, -1,
    //			  aRead.read.getLeftClipoff(),
    //			  aRead.read.getRightClipoff(),
    //			  base );

    //    cout << "Start: Call: scfpos: " << scfpos << " dbpos: " << dbpos << endl;

//    float result = evaluate(scfpos, dbpos, question,
//			      newbase, theScfLook, cout);
    float result = evaluate(scfpos, dbpos, question,
			    newbase, theScfLook);

    //  cout << "Succeed" << endl;

    if(result>0.6) {
      returnit=1;
    }else{
      returnit=-1;
    }

    ScfBuffer::bufferDelete(theScfLook);

  }
  catch (Notify n){
    return 0; // SCF could not be loaded
  }

  FUNCEND();
  return returnit;
#else

  cout << "Internal error: if you see this, immediately contact the authors: a compile configuration error happened.\nContig::deepThought\nFatal warning: Signal libs not included." << endl;
  exit(1000);
  return 0;
#endif
}
#endif







/*************************************************************************
 *
 *
 *
 *
 *************************************************************************/

void Contig::allowedRefIDs_forbidAll()
{
  CON_allowedrefids.clear();
  CON_allowedrefids.resize(CON_readpool->size(),false);
}

void Contig::allowedRefIDs_allowAll()
{
  CON_allowedrefids.clear();
  CON_allowedrefids.resize(CON_readpool->size(),true);
}

//#define CEBUG(bla)   {cout << bla; cout.flush();}
void Contig::allowedRefIDs_allowSomeRailsOnShortReadCoverage(int32 belowcoverage, int32 strainid, uint8 seqtype)
{
  FUNCSTART("void Contig::allowedRefIDs_allowSomeRailsOnShortReadCoverage(int32 belowcoverage, int32 strainid, uint8 seqtype)");

  BUGIFTHROW(true,"need redo ca1 for PlacedContigReads");

////++++////  if(CON_allowedrefids.empty()) allowedRefIDs_forbidAll();
////++++////
////++++////  unsigned int sr_seqtypeoffset=0;
////++++////  if(seqtype == ReadGroupLib::SEQTYPE_SOLEXA){
////++++////    sr_seqtypeoffset=0;
////++++////  }else if(seqtype == ReadGroupLib::SEQTYPE_ABISOLID){
////++++////    sr_seqtypeoffset=1;
////++++////  }else{
////++++////    MIRANOTIFY(Notify::INTERNAL, "not called with seqtype SOLEXA or SOLiD?");
////++++////  }
////++++////
////++++////  // this rcci has only rails in it
////++++////  rcci_t rcci_rails;
////++++////  {
////++++////    vector<int32> allowedstrainids
////++++////    // empty would be all ids, pushing back an "impossible" strain id
////++++////    //  makes it equal to "no normal reads"
////++++////    allowedstrainids.push_back(10000);
////++++////    vector<uint8> allowedreadtypes;
////++++////    allowedreadtypes.push_back(100);
////++++////    readColContigIteratorInit(rcci_rails,
////++++////			      allowedstrainids,
////++++////			      allowedreadtypes,
////++++////			      true,           // take rails
////++++////			      false,          // no backbones
////++++////			      false);   // no reads without readpool-reads
////++++////  }
////++++////
////++++////  // this rcci has only the reads of seqtype/strainid in it
////++++////  rcci_t rcci_reads;
////++++////  {
////++++////    vector<int32> allowedstrainids;
////++++////    allowedstrainids.push_back(strainid);
////++++////    vector<uint8> allowedreadtypes;
////++++////    allowedreadtypes.push_back(seqtype);
////++++////    readColContigIteratorInit(rcci_reads,
////++++////			      allowedstrainids,
////++++////			      allowedreadtypes,
////++++////			      false,           // no rails
////++++////			      false,          // no backbones
////++++////			      false);   // no reads without readpool-reads
////++++////  }
////++++////
////++++////  uint8 strainmask=255;
////++++////
////++++////  CEBUG("Strainidtotake: " << strainid << '\n');
////++++////  if(strainid>=0) strainmask=getBBStrainMask(strainid);
////++++////
////++++////  auto ccI=CON_counts.begin();
////++++////  for(uint32 actcontigpos=0; actcontigpos<CON_counts.size() ;actcontigpos++, ccI++, readColContigIteratorAdvance(rcci_rails), readColContigIteratorAdvance(rcci_reads)){
////++++////
////++++////    CEBUG("acp: " << actcontigpos);
////++++////    CEBUG("\tstrains: " << static_cast<uint16>(ccI->bbstrains[sr_seqtypeoffset]));
////++++////    CEBUG("\tbbcount: " << ccI->bbcounts[sr_seqtypeoffset]);
////++++////    CEBUG("\treadcov: " << static_cast<uint16>(rcci_reads.read_ids_in_col.size()));
////++++////    CEBUG('\n');
////++++////    if((ccI->bbstrains[sr_seqtypeoffset] & strainmask == 0)
////++++////       || rcci_reads.read_ids_in_col.size()+ ccI->bbcounts[sr_seqtypeoffset]<=belowcoverage){
////++++////      for(uint32 i=0;i<rcci_rails.read_ids_in_col.size(); i++){
////++++////	CEBUG("Allowing " << CON_readpool->getRead(rcci_rails.read_ids_in_col[i]).getName() << '\n');
////++++////	CON_allowedrefids[rcci_rails.read_ids_in_col[i]]=true;
////++++////      }
////++++////    }
////++++////  }
////++++////  FUNCEND();
////++++////
////++++////  uint32 numallowed=0;
////++++////  for(uint32 i=0; i<CON_allowedrefids.size(); i++){
////++++////    if(CON_allowedrefids[i]) numallowed++;
////++++////  }
////++++////  cout << "Allowed RefIDs: "  << numallowed << endl;

  FUNCEND();
}
//#define CEBUG(bla)






/*************************************************************************
 *
 * New repeat marker that works with different sequencing types and
 *  strains
 *
 * 24.06.2012 Had routine once dump
 *  Marking possibly misassembled repeats: tcmalloc: large alloc 0 bytes == (nil) @
 *  Ouch, out of memory detected.
 * on me. Almost impossible, additional checks inserted.
 *
 *************************************************************************/


//#define CEBUGF2(bla)  {cout << bla; cout.flush();}

void Contig::newMarkPossibleRepeats(repeatmarker_stats_t & repstats, vector<bool> & readsmarkedsrm)
{
  FUNCSTART("void Contig::newMarkPossibleRepeats(const uint32 numstrains)");

  repstats.numSRMs=0;
  repstats.numWRMs=0;
  repstats.numSNPs=0;

  BUGIFTHROW(CON_readpool->size()==0,"CON_readpool->size()==0 ???");    // should never be, hunt for tcmalloc 0 alloc message
  readsmarkedsrm.clear();
  readsmarkedsrm.resize(CON_readpool->size(),false);

  // early return on empty contig
  if(CON_counts.size()==0) return;

  checkContig();

  // get highest ID of strain with reads as number of strains in contig
  // this is formally wrong as a contig may contain some strain IDs but not others
  //  e.g.  (1 0 0 0 25 0 0 0) contains two strains, but has the highest ID=4
  // but the routines below just need a correctly size vector
  uint32 numstrains=0;
  for(uint32 nsi=0; nsi<CON_readsperstrain.size(); ++nsi){
    if(CON_readsperstrain[nsi]>0) numstrains=nsi;
  }
  // strain numbering starts at 0, so add 1
  ++numstrains;

  static const string groupbases("ACGT*");

  nngroups_t emptygroup;
  emptygroup.base='!';
  emptygroup.valid=false;
  emptygroup.forwarddircounter=0;
  emptygroup.complementdircounter=0;
  emptygroup.groupquality=0;

  // groups per seqtype per strain, empty template
  vector<vector<vector <nngroups_t> > > emptygroups_st_st;

  BUGIFTHROW(ReadGroupLib::getNumSequencingTypes()==0,"ReadGroupLib::getNumSequencingTypes()==0 ???");    // should never be, hunt for tcmalloc 0 alloc message
  emptygroups_st_st.resize(ReadGroupLib::getNumSequencingTypes());
  for(uint32 seqtype=0; seqtype<ReadGroupLib::getNumSequencingTypes(); ++seqtype){
    BUGIFTHROW(numstrains==0,"numstrains==0 ???");    // should never be, hunt for tcmalloc 0 alloc message
    emptygroups_st_st[seqtype].resize(numstrains);
    for(uint32 strainid=0; strainid<numstrains; strainid++){
      for(uint32 actgroup=0; actgroup<groupbases.size(); ++actgroup){
	emptygroups_st_st[seqtype][strainid].push_back(emptygroup);
	emptygroups_st_st[seqtype][strainid].back().base=groupbases[actgroup];
      }
    }
  }

  // groups per seqtype per strain, the real thing
  vector<vector<vector <nngroups_t> > > groups_st_st;


  nnpos_rep_col_t emptyprc;
  emptyprc.urdids.clear();
  emptyprc.groupbases.clear();
  emptyprc.groupquals.clear();
  emptyprc.type=Read::REA_tagentry_idEmpty;
  emptyprc.contigpos=0;
  emptyprc.is_dangerous=false;
  emptyprc.tagged=false;



  vector<int8> maskshadow;
  vector<multitag_t::mte_id_t> masktagtypes;
  //masktagtypes.push_back(Read::REA_tagentry_idSOFApolyA_sequence);
  masktagtypes.push_back(Read::REA_defaulttag_PSHP.identifier);
  buildMaskShadow(maskshadow,masktagtypes,false);

  CEBUGF2("Start." << endl);

  // the ercci is for the analysis of the bases themselves
  //
  ercci_t ercci(this);
  ercci.init(false,        // don't take rails
	     true,        // take backbone
	     numstrains);

  // this rcci has only rails and backbones in it (to set tags also
  //  in them as they're not contained in th ercci
  rcci_t rcci(this);
  {
    vector<int32> allowedstrainids;
    // empty would be all ids, pushing back an "impossible" strain id
    //  makes it equal to "no normal reads"
    allowedstrainids.push_back(1000000);
    vector<uint8> allowedreadtypes;
    allowedreadtypes.push_back(ReadGroupLib::SEQTYPE_454GS20);
    rcci.init(allowedstrainids,
	      allowedreadtypes,
	      true,           // take rails
	      true,           // and backbones
	      false);   // no reads without readpool-reads
  }


  ProgressIndicator<int32> P(0, CON_counts.size());
  auto ccI=CON_counts.cbegin();
  for(uint32 actcontigpos=0; actcontigpos<CON_counts.size(); ++actcontigpos, ++ccI, ercci.advance(), rcci.advance()){
    P.progress(actcontigpos);

    CEBUGF2("acp: " << actcontigpos << endl);

    // 23.10.2007
    // this must be deleted for sequencing type dependend analysis
    // if(ccI->coverage < 2*con_params.con_minreadspergroup) continue;

    // check for disagreement in this column
    if((ccI->A > 0)+(ccI->C > 0)+(ccI->G > 0)+(ccI->T > 0)+(ccI->star > 0) <= 1) continue;

    // ok, there are some disagreements
    CEBUGF2("Disagreement pos " << actcontigpos << ' ' << *ccI << endl);

    //{
    //  ostringstream ostr;
    //  ostr << static_cast<char>(basecounter[maxsize_i].base) << ": " << maxsize;
    //  ostr << " " << static_cast<char>(basecounter[runnerup_i].base) << ": " << runnerup;
    //  ostr << "  -  " << 100*runnerup/(maxsize+runnerup) << "%";
    //
    //  addTagToConsensus(actcontigpos, actcontigpos,'=',"T454",ostr.str().c_str());
    //}

    // calc some values (not needed now, but later perhaps
    {
      uint32 seqtypesaffected=0;
      for(uint32 seqtype=0; seqtype<ReadGroupLib::getNumSequencingTypes(); ++seqtype){
	uint32 strainsaffected=0;
	for(uint32 strainid=0; strainid<numstrains; ++strainid){
	  if(ercci.getPCRIstst()[seqtype][strainid].size()) {
	    CEBUGF2("Seqtype: " << seqtype << "\tStrainid: " << strainid);
	    CEBUGF2("\tNum reads: " << ercci.getPCRIstst()[seqtype][strainid].size() << '\n');
	    ++strainsaffected;
	  }
	}
	if(strainsaffected) {
	  ++seqtypesaffected;
	  CEBUGF2("Strains affected in seqtype " << seqtype << ": " << strainsaffected << '\n');
	}
      }
      CEBUGF2("Seqtypes affected: " << seqtypesaffected << '\n');
    }

    // clear the groups
    groups_st_st=emptygroups_st_st;

    // put the bases of the different reads into groups
    nmpr_firstfillin(ercci, maskshadow, masktagtypes, groups_st_st);
    nmpr_rategroups(groups_st_st, ccI);

    // look how many different groups are set altogether
    uint32 numvalidgroups=0;
    {
      vector<bool> validgroupmask(groupbases.size(),false);
      for(uint32 seqtype=0; seqtype<ReadGroupLib::getNumSequencingTypes(); ++seqtype){
	for(uint32 strainid=0; strainid<numstrains; ++strainid){
	  CEBUGF2("seqt: " << seqtype << "\tstrid: " << strainid << '\n');
	  for(uint32 actgroupid=0; actgroupid<groupbases.size(); ++actgroupid){
	    if(groups_st_st[seqtype][strainid][actgroupid].valid){
	      CEBUGF2("Valid possible group " << actgroupid << '\n');
	      CEBUGF2(groups_st_st[seqtype][strainid][actgroupid]);
	      if(!validgroupmask[actgroupid]){
		validgroupmask[actgroupid]=true;
		++numvalidgroups;
	      }
	    }
	  }
	}
      }
    }

    if(numvalidgroups>1){
      CEBUGF2("Bingo! " << actcontigpos << '\n');

      nmpr_secondfillin(ercci, maskshadow, masktagtypes, groups_st_st);

      // num valid groups per sequencing type per strain
      vector<vector<uint32> > numvalids_st_st;
      numvalids_st_st.resize(ReadGroupLib::getNumSequencingTypes());
      for(uint32 seqtype=0; seqtype<ReadGroupLib::getNumSequencingTypes(); ++seqtype){
	numvalids_st_st[seqtype].resize(numstrains,0);
      }

      for(uint32 seqtype=0; seqtype<ReadGroupLib::getNumSequencingTypes(); ++seqtype){
	for(uint32 strainid=0; strainid<numstrains; ++strainid){
	  CEBUGF2("seqt: " << seqtype << "\tstrid: " << strainid << '\n');
	  for(auto & actgroup : groups_st_st[seqtype][strainid]){
	    if(actgroup.valid){
	      numvalids_st_st[seqtype][strainid]++;
	      CEBUGF2("Valid updated group\n" << actgroup << '\n');
	    }
	  }
	}
      }

      uint32 numseqtypeswithvalids=0;
      for(uint32 seqtype=0; seqtype<ReadGroupLib::getNumSequencingTypes(); ++seqtype){
	for(uint32 strainid=0; strainid<numstrains; ++strainid){
	  if(numvalids_st_st[seqtype][strainid]>0){
	    ++numseqtypeswithvalids;
	    break;
	  }
	}
      }

      CEBUGF2("Found " << numseqtypeswithvalids << " sequencing types with valid groups.\n");

      if(numseqtypeswithvalids==0) {
	cout << "ehhhhhhh .... ?\n";
	MIRANOTIFY(Notify::INTERNAL, "no seq type with valid groups at this place? Impossible.");
      }else if(numseqtypeswithvalids==1){
	CEBUGF2("Just one sequencing type, good.\n");
	// could be multiple strains, but that's dealt with in
	//  nmpr_evaluateOneSeqType(), we just need to tag
	//  the prc we get back
	for(uint32 seqtype=0; seqtype<ReadGroupLib::getNumSequencingTypes(); ++seqtype){
	  for(uint32 strainid=0; strainid<numstrains; ++strainid){
	    if(numvalids_st_st[seqtype][strainid]>0){
	      vector<nnpos_rep_col_t> newprcs;
	      // find out what it is
	      nmpr_evaluateOneSeqType(seqtype,
				      ercci,
				      groups_st_st[seqtype],
				      newprcs,
				      emptyprc);
	      // and tag column accordingly
	      for(uint32 ni=0; ni< newprcs.size(); ++ni){
		nmpr_tagColumn(newprcs[ni],
			       rcci,
			       readsmarkedsrm,
			       repstats);
	      }
	    }
	  }
	}
      }else{
	nmpr_cautiousMultiSeqTypeTagging(ercci,
					 rcci,
					 groups_st_st,
					 emptyprc,
					 readsmarkedsrm,
					 repstats);
      }
    }
  }

  P.finishAtOnce();


  FUNCEND();
  return;
}


/*************************************************************************
 *
 * Helper for new repeat marker
 *
 *************************************************************************/

// groups_st_st == groups per seqtype per strain
void Contig::nmpr_cautiousMultiSeqTypeTagging(const ercci_t & ercci, const rcci_t & rcci, const vector<vector<vector <nngroups_t> > > & groups_st_st, const nnpos_rep_col_t & emptyprc, vector<bool> & readsmarkedsrm, repeatmarker_stats_t & repstats)
{
  FUNCSTART("void Contig::nmpr_cautiousMultiSeqTypeTagging(const ercci_t & ercci, const vector<vector<vector <nngroups_t> > > & groups_st_st, const nnpos_rep_col_t & emptyprc, vector<bool> & readsmarkedsrm)");

  CEBUGF2("Multiple sequencing types, cautious tagging.\n");

  // new PRCs per sequencing type
  vector<vector<nnpos_rep_col_t> > newprcs_st(groups_st_st.size());
  //// num valid groups per seqtype
  //vector<uint32> numvalidgroups_st(groups_st_st.size())

  // num valid groups per seqtype
  vector<bool> seqtypewithvalidgroups(groups_st_st.size(),false);
  vector<bool> seqtypewithSRM(groups_st_st.size(),false);
  vector<bool> seqtypewithWRM(groups_st_st.size(),false);

#if CPP_READ_SEQTYPE_END != 8
#error "This code is made for 8 sequencing types, adapt!"
#endif

  bool hasvalidgapinsanger=false;
  bool hasvalidgapin454=false;        // unused yet
  bool hasvalidgapinion=false;        // unused yet
  bool hasSRM=false;

  // ok, first determine what we have per sequencing type (per strain)
  for(uint32 actseqtype=0; actseqtype < newprcs_st.size(); ++actseqtype){

    // TODO: remove if once solexa and abi are tested
    if(groups_st_st[actseqtype].size()){
      CEBUGF2("Seqt: " << actseqtype << '\n');

//      for(uint32 strainid=0; strainid<groups_st_st[actseqtype].size(); strainid++){
//	CEBUGF2("seqt: " << actseqtype << "\tstrid: " << strainid << '\n');
//	for(uint32 actgroup=0; actgroup<groups_st_st[actseqtype][strainid].size(); actgroup++){
//	  if(groups_st_st[actseqtype][strainid][actgroup].valid){
//	    CEBUGF2("nmpr_cau... checking group " << actgroup << '\n');
//	    CEBUGF2(groups_st_st[actseqtype][strainid][actgroup]);
//	  }
//	}
//      }

      nmpr_evaluateOneSeqType(actseqtype,
			      ercci,
			      groups_st_st[actseqtype],
			      newprcs_st[actseqtype],
			      emptyprc);
    }

    uint32 hasgapasvalidgroup=false;
    for(uint32 actprci=0; actprci<newprcs_st[actseqtype].size(); ++actprci){
      // no |= operator accepted by GCC 4 here

      if(newprcs_st[actseqtype][actprci].groupbases.size() > 0){
	if(actseqtype == ReadGroupLib::SEQTYPE_ABISOLID){
	  MIRANOTIFY(Notify::INTERNAL, "Type ABI SOLiD needs more support 10.");
	}
      }

      seqtypewithvalidgroups[actseqtype]= seqtypewithvalidgroups[actseqtype] | (newprcs_st[actseqtype][actprci].groupbases.size() > 0);
      seqtypewithSRM[actseqtype]= seqtypewithSRM[actseqtype] | (newprcs_st[actseqtype][actprci].type == Read::REA_tagentry_idSRMr);
      seqtypewithWRM[actseqtype]= seqtypewithWRM[actseqtype] | (newprcs_st[actseqtype][actprci].type == Read::REA_tagentry_idWRMr);
      hasSRM|=(newprcs_st[actseqtype][actprci].type == Read::REA_tagentry_idSRMr);

      for(uint32 actbasei=0; actbasei<newprcs_st[actseqtype][actprci].groupbases.size(); ++actbasei){
	if(newprcs_st[actseqtype][actprci].groupbases[actbasei] == '*') hasgapasvalidgroup=true;
      }
    }

    // TODO: PacBio LQ /HQ ???

    if(hasgapasvalidgroup){
      switch(actseqtype) {
      case ReadGroupLib::SEQTYPE_SANGER: {
	hasvalidgapinsanger=true;
	break;
      }
      case ReadGroupLib::SEQTYPE_454GS20:{
	hasvalidgapin454=true;
	break;
      }
      case ReadGroupLib::SEQTYPE_IONTORRENT:{
	hasvalidgapinion=true;
	break;
      }
      default : {
      }
      }
    }
  }

  CEBUGF2("hasvalidgapinsanger: " << hasvalidgapinsanger << "\thasvalidgapin454: " << hasvalidgapin454 << "\thasvalidgapinion: " << hasvalidgapinion);
  CEBUGF2("\thasSRM: " << hasSRM << '\n');

  if(hasvalidgapinsanger
     && (seqtypewithSRM[ReadGroupLib::SEQTYPE_SANGER]
	 || seqtypewithWRM[ReadGroupLib::SEQTYPE_SANGER])){
    // special case: tag only sanger
    for(uint32 ni=0; ni< newprcs_st[ReadGroupLib::SEQTYPE_SANGER].size(); ++ni){
      nmpr_tagColumn(newprcs_st[ReadGroupLib::SEQTYPE_SANGER][ni],
		     rcci,
		     readsmarkedsrm,
		     repstats);
    }
  } else if(hasSRM){
    // special case: tag all prcs with SRMs
    for(uint32 actseqtype=0; actseqtype < newprcs_st.size(); ++actseqtype){
      for(uint32 ni=0; ni< newprcs_st[actseqtype].size(); ++ni){
	if(newprcs_st[actseqtype][ni].type == Read::REA_tagentry_idSRMr) {
	  nmpr_tagColumn(newprcs_st[actseqtype][ni],
			 rcci,
			 readsmarkedsrm,
			 repstats);
	}
      }
    }
  } else {
    // tag all
    CEBUGF2("Tagging all\n");
    for(uint32 actseqtype=0; actseqtype < newprcs_st.size(); ++actseqtype){
      CEBUGF2("newprcs_st[actseqtype].size(): " << newprcs_st[actseqtype].size() << '\n');
      for(uint32 ni=0; ni< newprcs_st[actseqtype].size(); ++ni){
	nmpr_tagColumn(newprcs_st[actseqtype][ni],
		       rcci,
		       readsmarkedsrm,
		       repstats);
      }
    }
  }


  FUNCEND();
}



/*************************************************************************
 *
 *
 *
 *
 *
 *************************************************************************/

void Contig::nmpr_evaluateOneSeqType(const uint32 actseqtype, const ercci_t & ercci, const vector<vector <nngroups_t> > & groups_st, vector<nnpos_rep_col_t> & newprcvec, const nnpos_rep_col_t & emptyprc)
{
  FUNCSTART("void Contig::nmpr_evaluateOneSeqType(const ercci_t & ercci, vector<vector <nngroups_t> > & groups_st, nnpos_rep_col_t & newprc)");

  contig_parameters const & con_params= (*CON_miraparams)[actseqtype].getContigParams();

  newprcvec.clear();

  nnpos_rep_col_t templateprc=emptyprc;
  templateprc.contigpos=ercci.getContigPos();


  // count some numbers
  uint32 numstrainswithvalids=0;
  uint32 maxgroupsinonestrain=0;

  // also, if we are in Sanger sequencing type:
  //  if '*' is a valid group, see whether it is a WRM
  // a weak repeat marker is a gap that has as counterpart only
  //  one other valid base. Two valid bases and a gap are therefore
  //  an SRM
  bool hasweakgap=false;

  bool maybestrong=false;
  for(uint32 strainid=0; strainid<groups_st.size(); ++strainid){
    uint32 groupsinstrain=0;
    uint32 groupmaybestrong=0;
    for(auto & actgroup : groups_st[strainid]) {
      if(actgroup.valid){
	if(groupsinstrain==0) numstrainswithvalids++;
	++groupsinstrain;

	if(actseqtype == ReadGroupLib::SEQTYPE_SANGER
	   && actgroup.base=='*'
	   && groupsinstrain == 2){
	  hasweakgap=true;
	  if(actgroup.forwarddircounter + actgroup.complementdircounter >= 8
	     && actgroup.forwarddircounter >= 4
	     && actgroup.complementdircounter >= 4){
	    ++groupmaybestrong;
	  }
	}

	// Solexa also needs weak gaps at SRM positions
	// but ... if there are really too many gaps/non-gaps, this seems pretty strong
	// prepare to change that later
	if(actseqtype == ReadGroupLib::SEQTYPE_SOLEXA){
	  if(actgroup.forwarddircounter + actgroup.complementdircounter >= 10
	     && actgroup.forwarddircounter >= 5
	     && actgroup.complementdircounter >= 5){
	    ++groupmaybestrong;
	  }
	  if(actgroup.base=='*'
	     && groupsinstrain == 2){
	    hasweakgap=true;
	  }
	}
      }
    }
    if(groupmaybestrong>=2) maybestrong=true;
    maxgroupsinonestrain=max(maxgroupsinonestrain, groupsinstrain);
  }

  // It was a strong solexa gap/nogap SNP ... no weak tagging here!
  if(actseqtype == ReadGroupLib::SEQTYPE_SOLEXA && maybestrong){
    hasweakgap=false;
  }

  if(numstrainswithvalids){
    if(actseqtype == ReadGroupLib::SEQTYPE_ABISOLID){
      MIRANOTIFY(Notify::INTERNAL, "Type ABI SOLiD needs more support 11.");
    }
  }


  CEBUGF2("maxgroupsinonestrain: " << maxgroupsinonestrain << '\n');
  CEBUGF2("numstrainswithvalids: " << numstrainswithvalids << '\n');
  CEBUGF2("hasweakgap: " << hasweakgap << '\n');
  CEBUGF2("maybestrong: " << maybestrong << '\n');

  // if maxgroups in one strain >1, then we have a repeat that was
  //  misassembled. There also could be a SNP, but we need to get rid
  //  of the repeat first.

  if(maxgroupsinonestrain>1){
    // Repeat Marker Base, treat each strain by itself, i.e.,
    //  return one prc per strain which has >=2 groups
    // just check whether they should be treated as SNP
    if(con_params.con_assume_snp_insteadof_rmb) {
      if(numstrainswithvalids>1){
	templateprc.type=Read::REA_tagentry_idSIOr;
      }else{
	templateprc.type=Read::REA_tagentry_idSAOr;
      }
    }else{
      templateprc.type=Read::REA_tagentry_idSRMr;
      if(hasweakgap) templateprc.type=Read::REA_tagentry_idWRMr;
    }
    CEBUGF2("Determined type: " << multitag_t::getIdentifierStr(templateprc.type) << '\n');
    for(uint32 strainid=0; strainid<groups_st.size(); ++strainid){
      nnpos_rep_col_t tmpprc=templateprc;
      if(nmpr_appendPRCFieldsWithGroupsOfOneStrain(groups_st[strainid],
						   tmpprc) >1){
	newprcvec.push_back(tmpprc);
      }
    }
  }else if(maxgroupsinonestrain>0 && numstrainswithvalids>0){
    // SNP between strains, look at everything together
    //
    templateprc.type=Read::REA_tagentry_idSROr;
    CEBUGF2("Determined type: " << multitag_t::getIdentifierStr(templateprc.type) << '\n');
    nnpos_rep_col_t tmpprc=templateprc;
    for(uint32 strainid=0; strainid<groups_st.size(); strainid++){
      nmpr_appendPRCFieldsWithGroupsOfOneStrain(groups_st[strainid],
						tmpprc);
    }
    //if(tmpprc.groupbases.size()>1) newprcvec.push_back(tmpprc);
    newprcvec.push_back(tmpprc);
  }

  FUNCEND();
  return;
}




/*************************************************************************
 *
 * Helper for new repeat marker
 *
 *************************************************************************/

// groups == groups
uint32 Contig::nmpr_appendPRCFieldsWithGroupsOfOneStrain(const vector <nngroups_t> & groups, nnpos_rep_col_t & newprc)
{
  FUNCSTART("void Contig::nmpr_appendPRCFieldsWithGroupsOfOneStrain(const vector <nngroups_t> & groups, nnpos_rep_col_t & newprc)");

  uint32 numgroupswithvalids=0;
  for(auto & actgroup : groups){
    if(actgroup.valid){
      ++numgroupswithvalids;
      CEBUGF2("Valid group:\n" << actgroup);

      newprc.groupbases.push_back(actgroup.base);
      newprc.groupquals.push_back(actgroup.groupquality);
      copy(actgroup.urdids.begin(),
	   actgroup.urdids.end(),
	   back_inserter(newprc.urdids));
    }
  }
  CEBUGF2("Filled data for " << numgroupswithvalids << " valid groups.\n");

  FUNCEND();
  return numgroupswithvalids;
}




/*************************************************************************
 *
 * Helper for new repeat marker
 *
 * First fill in of groups (per sequencing type per strain). Depending
 *  on sequencing type, look at different
 *
 *************************************************************************/

//#define CEBUGF2(bla)  {cout << bla; cout.flush();}

void Contig::nmpr_firstfillin(const ercci_t & ercci, const vector<int8> & maskshadow, const vector<multitag_t::mte_id_t> & masktagtypes, vector<vector<vector <nngroups_t> > > & groups_st_st)
{
  FUNCSTART("void Contig::nmpr_firstfillin(const ercci_t & ercci, const vector<int8> & maskshadow)");

  auto & pcri_st_st=ercci.getPCRIstst();

  for(uint32 seqtype=0; seqtype<ReadGroupLib::getNumSequencingTypes(); ++seqtype){
    contig_parameters const & con_rt_params= (*CON_miraparams)[seqtype].getContigParams();

    int32 endreadmarkexclusionarea=con_rt_params.con_endreadmarkexclusionarea;
    BUGIFTHROW(endreadmarkexclusionarea<1, "endreadmarkexclusionarea may not be < 1");

    for(uint32 strainid=0; strainid < pcri_st_st[0].size(); ++strainid){
      for(const auto & tpcrI : pcri_st_st[seqtype][strainid]){
	char           base='?';
	base_quality_t qual=0;

	int32 readpos=tpcrI.contigPos2UnclippedReadPos(ercci.getContigPos());

	// reads that start or end in that region are dangerous as some
	//  spurious false bases might influence the result. Skip those.
	if(tpcrI.getReadDirection() > 0){
	  if(readpos < tpcrI->getLeftClipoff()+endreadmarkexclusionarea
	     || readpos > tpcrI->getRightClipoff()-endreadmarkexclusionarea) {
	    CEBUGF2(tpcrI->getName()<< ": readposition too near to start or end of read.\n");
	    continue;
	  }
	}else{
	  if(tpcrI->calcComplPos(readpos) < tpcrI->getLeftClipoff()+endreadmarkexclusionarea
	     || tpcrI->calcComplPos(readpos) > tpcrI->getRightClipoff()-endreadmarkexclusionarea) {
	    CEBUGF2(tpcrI->getName()<< ": readposition too near to start or end of read.\n");
	    continue;
	  }
	}

	CEBUGF2("Looking at " << ercci.getContigPos() << " " << readpos << " " << tpcrI->getName()<< " ");

	// check whether this read has a tag that prohibits searching for repeats here
	// (only PSHP for the time being)
	if(maskshadow[ercci.getContigPos()]) {
	  // remember that the readpos computing routine doesn't take care
	  //  of direction, so we have to complement that position in reverse cases
	  int32 rawreadpos=readpos;
	  if(tpcrI.getReadDirection() < 0){
	    rawreadpos=tpcrI->calcComplPos(readpos);
	  }

	  CEBUGF2("MASKED: " << ercci.getContigPos() << endl);
	  CEBUGF2(tpcrI->getName() << "\t" << rawreadpos << "\t");
	  bool maskfound=false;
	  for(uint32 mtti=0; mtti<masktagtypes.size(); ++mtti){
	    if(tpcrI->hasTag(masktagtypes[mtti],rawreadpos)) {
	      CEBUGF2("Masked with " << multitag_t::getIdentifierStr(masktagtypes[mtti]) << ", next read.\n");
	      maskfound=true;
	      break;
	    }
	  }
	  if(maskfound) continue;
	}

	// ok, we know that we are at least con_endreadmarkexclusionarea away from
	//  the limits.
	// we now check if:
	//  1) a base left or right is a *
	//  2) there's a masked base (X) within the con_endreadmarkexclusionarea
	//      left and right
	// if yes: in dubio pro reo, do not mark and skip

	// untested, but should work

#if CPP_READ_SEQTYPE_END != 8
#error "This code is made for 8 sequencing types, adapt!"
#endif
	if(con_rt_params.con_endreadmarkexclusionarea >0
	   && readpos > 0 && readpos+1 < tpcrI->getLenSeq() ) {
	  vector<char>::const_iterator B,S,E;
	  if(tpcrI.getReadDirection()>0){
	    B=tpcrI->getSeqIteratorBegin();
	    S=B;
	    E=tpcrI->getSeqIteratorEnd();
	  }else{
	    B=tpcrI->getComplementSeqIteratorBegin();
	    S=B;
	    E=tpcrI->getComplementSeqIteratorEnd();
	  }
	  advance(S,readpos);

	  // if we do not mark gap bases, check whether we have a gap
	  //  beside the actual position. If yes, then stop looking at this
	  //  read.
	  bool stopsearch=false;
	  if(*(S-1) == '*' || *(S+1)=='*') {
	    switch(seqtype) {
	    case ReadGroupLib::SEQTYPE_IONTORRENT:
	    case ReadGroupLib::SEQTYPE_454GS20: {
	    //case ReadGroupLib::SEQTYPE_454GS20CONS:
	    //case ReadGroupLib::SEQTYPE_454GS20CONSSHRED:{
	      stopsearch=true;
	      break;
	    }
	    case ReadGroupLib::SEQTYPE_ABISOLID: {
	      MIRANOTIFY(Notify::INTERNAL, "Type ABI SOLiD needs more support 12a.");
	      break;
	    }
	    case ReadGroupLib::SEQTYPE_PACBIOHQ:
	    case ReadGroupLib::SEQTYPE_PACBIOLQ: {
	      // TODO: PacBio HQ / LQ

	      // at the moment, fall through to Solexa / Sanger
	    }
	    case ReadGroupLib::SEQTYPE_TEXT:
	    case ReadGroupLib::SEQTYPE_SOLEXA:
	    case ReadGroupLib::SEQTYPE_SANGER: {
	      // with SANGER and Solexa, check with parameters
	      if(con_rt_params.con_also_mark_gap_bases==false
		 || (con_rt_params.con_also_mark_gap_bases==true
		     && con_rt_params.con_also_mark_gap_bases_evenmc==false)){
		stopsearch=true;
	      }
	      break;
	    }
	    default : {
	      cout.flush();
	      cerr << "Seqtype: " << seqtype << endl;
	      MIRANOTIFY(Notify::INTERNAL, "Unknown seqtype?.");
	    }
	    }
	    if(stopsearch) {
	      CEBUGF2(tpcrI->getName()<< ": there's a * beside me, I won't count.\n");
	      continue;
	    }

            // now check whether we have masked bases (X) within reach
	    // this would mean that we are still in the endread exclusion
	    //  area (not best tactic, as Xed out bases may be within read,
	    //  but best I can do for now)

	    vector<char>::const_iterator I=S;
	    bool continuesearch=true;
	    for(uint32 ermeai=0; S!=B && ermeai<endreadmarkexclusionarea; ++ermeai,--I){
	      //CEBUGF2(*S);
	      if(toupper(*S)=='X'){
	    	continuesearch=false;
	      }
	    }
	    I=S;
	    for(uint32 ermeai=0; ermeai<endreadmarkexclusionarea; ++ermeai,++I){
	      if(I==E) break;
	      //CEBUGF2(*S);
	      if(toupper(*S)=='X'){
	    	continuesearch=false;
	      }
	    }

	    if(!continuesearch) {
	      CEBUGF2(" : there's a X somewhere beside me, I won't count.\n");
	      continue;
	    }
	  }
	}

	// CEBUGF2(" nothing special found ");


	{
	  // getting base and basequal
	  // also checking whether a base left and right has a quality
	  //  < (defined by seqtype)
	  // If yes, the area is a bit to uncertain and we'll skip it.
	  bool skipit=false;
	  uint32 rocksolid=0;            // special for Solexa and perhaps SOLiD
	  const static base_quality_t rsthreshold=37;  // threshold for rock solid qual

#if CPP_READ_SEQTYPE_END != 8
#error "This code is made for 8 sequencing types, adapt!"
#endif
	  base_quality_t minqual;
	  switch(seqtype) {
	  case ReadGroupLib::SEQTYPE_SANGER:
	  case ReadGroupLib::SEQTYPE_454GS20:
	  case ReadGroupLib::SEQTYPE_IONTORRENT:
	  case ReadGroupLib::SEQTYPE_PACBIOHQ:
	  case ReadGroupLib::SEQTYPE_PACBIOLQ:
	  case ReadGroupLib::SEQTYPE_TEXT:
	  case ReadGroupLib::SEQTYPE_SOLEXA: {
	    minqual=con_rt_params.con_minrmbneighbourqual;
	    break;
	  }
	  case ReadGroupLib::SEQTYPE_ABISOLID: {
	    MIRANOTIFY(Notify::INTERNAL, "Type ABI SOLiD needs more support 14.");
	    break;
	  }
	  default : {
	    cout.flush();
	    cerr << "Seqtype: " << seqtype << endl;
	    MIRANOTIFY(Notify::INTERNAL, "Unknown seqtype? 14");
	  }
	  }

	  if(tpcrI.getReadDirection()>0){
	    CEBUGF2(" + ");
	    base=toupper(tpcrI->getBaseInSequence(readpos));
	    qual=tpcrI->getQualityInSequence(readpos);
	    if(readpos-1<0){
	      CEBUGF2(" <oob");
	      skipit=true;
	    }else if(tpcrI->getQualityInSequence(readpos-1) < minqual) {
	      CEBUGF2(" <fail_minqual");
	      skipit=true;
	    }else if(qual>=rsthreshold){
	      ++rocksolid;
	    }
	    if(readpos+1 >= tpcrI->getLenSeq()){
	      CEBUGF2(" >oob");
	      skipit=true;
	    }else if(tpcrI->getQualityInSequence(readpos+1) < minqual) {
	      CEBUGF2(" >fail_minqual");
	      skipit=true;
	    }else if(qual>=rsthreshold){
	      ++rocksolid;
	    }
	  }else{
	    CEBUGF2(" - ");
	    base=toupper(tpcrI->getBaseInComplementSequence(readpos));
	    qual=tpcrI->getQualityInComplementSequence(readpos);
	    if(readpos-1<0){
	      CEBUGF2(" <oob");
	      skipit=true;
	    }else if(tpcrI->getQualityInComplementSequence(readpos-1) < minqual) {
	      skipit=true;
	      CEBUGF2(" <fail_minqual");
	    }else if(qual>=rsthreshold){
	      ++rocksolid;
	    }
	    if(readpos+1 >= tpcrI->getLenSeq()){
	      CEBUGF2(" >oob");
	      skipit=true;
	    }else if(tpcrI->getQualityInComplementSequence(readpos+1) < minqual) {
	      CEBUGF2(" >fail_minqual");
	      skipit=true;
	    }else if(qual>=rsthreshold){
	      ++rocksolid;
	    }
	  }
	  if(!skipit){
	    if(qual < minqual) {
	      CEBUGF2(" qual below threshold " << base << " " << static_cast<uint16>(qual) << endl);
	      skipit=true;
	    }else if(qual>=rsthreshold){
	      ++rocksolid;
	    }
	  }
	  if(skipit) {
	    // special rule for solexa: we'll also go with two out of the three bases,
	    //  if they have a rock solid quality
	    if(seqtype==ReadGroupLib::SEQTYPE_SOLEXA){
	      if(rocksolid>=2) skipit=false;
	    }
	    if(skipit) {
	      CEBUGF2(" skipit" << endl);
	      continue;
	    }
	  }
	}

	CEBUGF2(base << " " << static_cast<uint16>(qual) << endl);

	// Quality of bases from railreads are set to 0 so as not
	//  to be counted twice (as bases are the same as in backbone)
	//
	if(tpcrI->isRail()) {
	  qual=0;
	}

	for(auto & actgroup : groups_st_st[seqtype][strainid]) {
	  if(actgroup.base==base) {
	    actgroup.urdids.push_back(tpcrI.getURDID());
	    actgroup.quals.push_back(qual);
	    actgroup.directions.push_back(tpcrI.getReadDirection());

	    // if it is a rail or a backbone, the info counts for
	    //  both forward and complement direction
	    // else only for it's given direction
	    if(tpcrI->isRail() || tpcrI->isBackbone()) {
	      // TODO: not sure whether this is really good! (May05)
	      //actgroup.hasforwarddir=true;
	      //actgroup.hascomplementdir=true;
	    }else{
	      if(tpcrI.getReadDirection()>0){
		++actgroup.forwarddircounter;
	      }else{
		++actgroup.complementdircounter;
	      }
	    }
	    break;
	  }
	}
      }

      // compute the groups quality
      for(auto & actgroup : groups_st_st[seqtype][strainid]) {
	calcGroupQual(actgroup);
      }
    }
  }

  FUNCEND();
  return;
}
//#define CEBUGF2(bla)



/*************************************************************************
 *
 * Helper for new repeat marker
 *
 * After the first fill in and the rating of groups, refill the surviving
 *  valid groups. This time we'll take less stringent conditions, i.e.,
 *  we don't care about minimum quality and the read end exclusion area
 *  also is reduced to a minimum. We still care about mask tags though.
 *
 *************************************************************************/

void Contig::nmpr_secondfillin(const ercci_t & ercci, const vector<int8> & maskshadow, const vector<multitag_t::mte_id_t> & masktagtypes, vector<vector<vector <nngroups_t> > > & groups_st_st)
{
  FUNCSTART("void Contig::nmpr_secondfillin(const ercci_t & ercci, const vector<int8> & maskshadow)");

  bool involvesagap=false;

  // consolidate valid groups regardless of seqtype or strain
  char validgroups[128];
  validgroups['A']=0;
  validgroups['C']=0;
  validgroups['G']=0;
  validgroups['T']=0;
  validgroups['*']=0;
  for(uint32 seqtype=0; seqtype < ReadGroupLib::getNumSequencingTypes(); ++seqtype){
    for(uint32 strainid=0; strainid < groups_st_st[0].size(); ++strainid){
      bool involvesagap=false;
      for(auto & actgroup : groups_st_st[seqtype][strainid]) {
	if(actgroup.valid==false) continue;
	validgroups[actgroup.base]=1;
	if(actgroup.base=='*') involvesagap=true;
      }
    }
  }

  auto & pcri_st_st=ercci.getPCRIstst();

  for(uint32 seqtype=0; seqtype < ReadGroupLib::getNumSequencingTypes(); ++seqtype){
    for(uint32 strainid=0; strainid < groups_st_st[0].size(); ++strainid){
      for(auto & actgroup : groups_st_st[seqtype][strainid]) {
	if(validgroups[actgroup.base]==0) continue;
	actgroup.forwarddircounter=0;
	actgroup.complementdircounter=0;
	actgroup.urdids.clear();
	actgroup.quals.clear();
	actgroup.directions.clear();
	CEBUGF2("group " << actgroup.base << " will be refilled.\n");
	if(actgroup.base=='*') involvesagap=true;
      }

      for(const auto & tpcrI : pcri_st_st[seqtype][strainid]){
	int32 readpos=tpcrI.contigPos2UnclippedReadPos(ercci.getContigPos());

	// get base and basequal
	char           base=toupper(tpcrI.getBase(ercci.getContigPos()));
	base_quality_t qual=tpcrI.getQuality(ercci.getContigPos());

	uint32 actgroupid;
	switch(base){
	case 'A': {
	  actgroupid=0;
	  break;
	}
	case 'C': {
	  actgroupid=1;
	  break;
	}
	case 'G': {
	  actgroupid=2;
	  break;
	}
	case 'T': {
	  actgroupid=3;
	  break;
	}
	case '*': {
	  actgroupid=4;
	  break;
	}
	default: {
	  // not ACGT* ? don't tag anyway
	  continue;
	}
	}

	// if this base in this read is not in a valid group, continue
	if(groups_st_st[seqtype][strainid][actgroupid].valid==false) continue;

	// still care about a read end exclusion area,
	//  but this time a fixed one, not sequencing type dependent
	int32 endreadmarkexclusionarea=2;
	// due to *very* spurious alignment problems in rare cases,
	//  we set the exclusion area a bit wider
	// TODO: check if still needed with improved alignments???
	if(involvesagap) endreadmarkexclusionarea=5;

	// BaCh 07.07.2011
	// comeback of the sequencing type dependend area
	// as I do not have time atm to check the above TODO, I'll define
	//  that only for Solexas, the area is 0
	if(seqtype == ReadGroupLib::SEQTYPE_SOLEXA){
	  endreadmarkexclusionarea=0;
	}

	if(tpcrI.getReadDirection()>0){
	  if(readpos < tpcrI->getLeftClipoff()+endreadmarkexclusionarea
	     || readpos > tpcrI->getRightClipoff()-endreadmarkexclusionarea) {
	    CEBUGF2(tpcrI->getName()<< ": readposition too near to start or end of read.\n");
	    continue;
	  }
	}else{
	  if(tpcrI->calcComplPos(readpos) < tpcrI->getLeftClipoff()+endreadmarkexclusionarea
	     || tpcrI->calcComplPos(readpos) > tpcrI->getRightClipoff()-endreadmarkexclusionarea) {
	    CEBUGF2(tpcrI->getName()<< ": readposition too near to start or end of read.\n");
	    continue;
	  }
	}

	CEBUGF2("Looking at " << tpcrI->getName()<< " ");

	// check whether this read has a tag that prohibits searching
	//  or tagging repeats here
	// (only PSHP for the time being)
	if(maskshadow[ercci.getContigPos()]) {
	  // remember that the readpos computing routine doesn't take care
	  //  of direction, so we have to complement that position in reverse cases
	  int32 rawreadpos=readpos;
	  if(tpcrI.getReadDirection()<0){
	    rawreadpos=tpcrI->calcComplPos(readpos);
	  }

	  CEBUGF2("MASKED: " << ercci.getContigPos() << endl);
	  CEBUGF2(tpcrI->getName() << "\t" << rawreadpos << "\t");
	  bool maskfound=false;
	  for(uint32 i=0; i<masktagtypes.size(); i++){
	    if(tpcrI->hasTag(masktagtypes[i],rawreadpos)) {
	      CEBUGF2("Masked with " << multitag_t::getIdentifierStr(masktagtypes[i]) << ", next read.\n");
	      maskfound=true;
	      break;
	    }
	  }
	  if(maskfound) continue;
	}

	// ok, we know that we are at least endreadmarkexclusionarea
	//  away from the read ends and are in a valid group

	CEBUGF2(base << " " << static_cast<uint16>(qual) << "\trefill.\n");

	// Quality of bases from railreads are set to 0 so as not
	//  to be counted twice (as bases are the same as in backbone)
	//
	if(tpcrI->isRail()) {
	  qual=0;
	}

	groups_st_st[seqtype][strainid][actgroupid].urdids.push_back(tpcrI.getURDID());
	groups_st_st[seqtype][strainid][actgroupid].quals.push_back(qual);
	groups_st_st[seqtype][strainid][actgroupid].directions.push_back(tpcrI.getReadDirection());

	// if it is a rail or a backbone, the info counts for
	//  both forward and complement direction
	// else only for it's given direction
	if(tpcrI->isRail() || tpcrI->isBackbone()) {
	}else{
	  if(tpcrI.getReadDirection() > 0){
	    ++groups_st_st[seqtype][strainid][actgroupid].forwarddircounter;

	    // TODO: test
	    // if it is a merged short read, count it also as reverse
	    if(tpcrI->isCoverageEquivalentRead()
	       && (seqtype==ReadGroupLib::SEQTYPE_SOLEXA
		   || seqtype==ReadGroupLib::SEQTYPE_ABISOLID)){
	      ++groups_st_st[seqtype][strainid][actgroupid].complementdircounter;
	    }
	  }else{
	    ++groups_st_st[seqtype][strainid][actgroupid].complementdircounter;
	  }
	}
      }

      // compute the groups quality and set valid flag
      for(auto & actgroup : groups_st_st[seqtype][strainid]) {
	if(actgroup.forwarddircounter
	   || actgroup.complementdircounter){
	  actgroup.valid=true;
	}
	calcGroupQual(actgroup);
      }
    }
  }

  FUNCEND();
  return;
}


/*************************************************************************
 *
 * Helper for new repeat marker
 *
 * After the first fill in of groups, this function looks whether the
 *  group satisfies the criteria to be valid (depending on sequencing
 *  type and parameters set)
 * Groups that do not satisfy the criteria are set to invalid
 *
 * Criteria presently checked:
 * Sanger: minimum reads per group
 *         minimum group quality
 *         criteria for having a gap as valid group (as set in parameters)
 *         some magic to get rid of spurious mismatches
 * 454: minimum of 4 reads per group
 *      forward and reverse reads must be present in every group
 * Solexa: tbd
 *
 *************************************************************************/

void Contig::nmpr_rategroups(vector<vector<vector <nngroups_t> > > & groups_st_st, cccontainer_t::const_iterator ccI)
{
  FUNCSTART("void Contig::nmpr_rategroups(vector<vector<vector <nngroups_t> > > & groups_st_st, cccontainer_t::const_iterator ccI)");

  // compute group quality and check the groups: valid or not?

  for(uint32 seqtype=0; seqtype<ReadGroupLib::getNumSequencingTypes(); ++seqtype){
    contig_parameters const & con_rt_params= (*CON_miraparams)[seqtype].getContigParams();

    for(uint32 strainid=0; strainid<groups_st_st[0].size(); ++strainid){

      uint32 maxreadingroupcount=0;
      uint32 minreadingroupcount=10000000;

      for(auto & actgroup : groups_st_st[seqtype][strainid]) {
	CEBUGF2("Comp group: seqt(" << seqtype << ")\tstrain(" << strainid << ")\t" << static_cast<char>(actgroup.base) << " ");

	CEBUGF2(static_cast<uint16>(actgroup.groupquality));
	CEBUGF2('\t' << actgroup.urdids.size());

	// TODO: configure if check by character only or if qualities are also taken

	actgroup.valid=false;

	//// continue if no base is present

	if(actgroup.urdids.empty()) {
	  CEBUGF2("empty\n");
	  actgroup.valid=false;
	}else{
	  // basically give every group a valid=true if reads are present
	  // set valid back to false later on for each sequencing type
	  actgroup.valid=true;

#if CPP_READ_SEQTYPE_END != 8
#error "This code is made for 8 sequencing types, adapt!"
#endif
	  switch(seqtype) {
	  case ReadGroupLib::SEQTYPE_SANGER: {
	    // groups with Sanger reads need a minimum quality in group and minimum number
	    //  of reads per group as set in preferences
	    if(static_cast<int32>(actgroup.urdids.size()) < con_rt_params.con_minreadspergroup
	       || actgroup.groupquality<con_rt_params.con_mingroupqualforrmbtagging){
	      actgroup.valid=false;
	    }
	    break;
	  }
	  case ReadGroupLib::SEQTYPE_454GS20: {
	    // groups with 454 reads need forward & reverse directions in group
	    // AND a minimum quality in group
	    // AND and minimum number of reads per group as set in preferences
	    if(actgroup.forwarddircounter==0
	       || actgroup.complementdircounter==0
	       || actgroup.groupquality<con_rt_params.con_mingroupqualforrmbtagging
	       || static_cast<int32>(actgroup.urdids.size()) < con_rt_params.con_minreadspergroup){
	      actgroup.valid=false;
	    }
	    break;
	  }
	  case ReadGroupLib::SEQTYPE_IONTORRENT: {
	    // currently a copy of 454, but looks good so far
	    // groups with IonTorrent reads need forward & reverse directions in group
	    // AND a minimum quality in group
	    // AND and minimum number of reads per group as set in preferences
	    if(actgroup.forwarddircounter==0
	       || actgroup.complementdircounter==0
	       || actgroup.groupquality<con_rt_params.con_mingroupqualforrmbtagging
	       || static_cast<int32>(actgroup.urdids.size()) < con_rt_params.con_minreadspergroup){
	      actgroup.valid=false;
	    }
	    break;
	  }
	  case ReadGroupLib::SEQTYPE_PACBIOLQ:
	  case ReadGroupLib::SEQTYPE_PACBIOHQ: {
	    // TODO: PacBio LQ / HQ

	    // groups with PacBio reads need forward & reverse directions in group
	    // AND a minimum quality in group
	    // AND and minimum number of reads per group as set in preferences
	    if(actgroup.forwarddircounter==0
	       || actgroup.complementdircounter==0
	       || actgroup.groupquality<con_rt_params.con_mingroupqualforrmbtagging
	       || static_cast<int32>(actgroup.urdids.size()) < con_rt_params.con_minreadspergroup){
	      actgroup.valid=false;
	    }
	    break;
	  }
	  case ReadGroupLib::SEQTYPE_TEXT: {
	    // groups with Text "reads" need a minimum number
	    //  of reads per group as set in preferences
	    if(static_cast<int32>(actgroup.urdids.size()) < con_rt_params.con_minreadspergroup){
	      actgroup.valid=false;
	    }
	    break;
	  }
	  case ReadGroupLib::SEQTYPE_SOLEXA: {
	    // No good
	    //// groups with Solexa reads need forward & reverse directions in group
	    ////  AND at least 4 per group
	    //if(actgroup.forwarddircounter==0
	    //   || actgroup.complementdircounter==0
	    //   || (actgroup.forwarddircounter
	    //	 +actgroup.complementdircounter) < 4){
	    //  actgroup.valid=false;
	    //}

	    // groups with Solexa reads need a minimum quality in group and minimum number
	    //  of reads per group as set in preferences
	    // depending on the coverage of Solexa and whether there is a backbone
	    //  match, it also need at least two reads in each direction (to get
	    //  rid of these pesky Solexa GGC-triggered errors

	    actgroup.valid=false;

	    int32 numreadsatpos=static_cast<int32>(actgroup.urdids.size());
	    base_quality_t gqual=actgroup.groupquality;

	    CEBUGF2(" mrpg: " << con_rt_params.con_minreadspergroup);
	    CEBUGF2(" nrat: " << numreadsatpos << " bbc: " << ccI->getBBChar() << ' ');

	    if(ccI->getBBChar() == actgroup.base){
	      CEBUGF2("bbmatch! ");
	      if(ccI->bbcounts[0] & getBBStrainMask(strainid)){
		CEBUGF2("strainmatch! ");
		numreadsatpos+=ccI->bbcounts[0];
		gqual=max(gqual,ccI->bbbestquals[0]);

		// TODO: change this to really reflect forward/backward
		uint32 tempqual=ccI->bbcounts[0]*5+gqual;
		if(tempqual>90) {
		  gqual=90;
		}else{
		  gqual=static_cast<base_quality_t>(tempqual);
		}
		CEBUGF("newnumreads: " << numreadsatpos << " newqual: " << static_cast<uint16>(gqual));
	      }
	      if(numreadsatpos >= con_rt_params.con_minreadspergroup
		 && gqual >=con_rt_params.con_mingroupqualforrmbtagging){
		actgroup.valid=true;
	      }
	    }else{
	      if(numreadsatpos >= con_rt_params.con_minreadspergroup
		 && gqual >=con_rt_params.con_mingroupqualforrmbtagging){

		// at least 2 reads in every direction + 1 read per 12 reads of group
		//  size
		// this should prevent the worst false positives
		int minreadsperdir=1+numreadsatpos/12;
		if(actgroup.forwarddircounter>minreadsperdir
		   && actgroup.complementdircounter>minreadsperdir){
		  actgroup.valid=true;
		}
	      }
	    }
	    break;
	  }
	  case ReadGroupLib::SEQTYPE_ABISOLID: {
	    actgroup.valid=false;
	    if(!actgroup.urdids.empty()) {
	      MIRANOTIFY(Notify::INTERNAL, "Type ABI SOLiD needs more support 12b.");
	    }
	    break;
	  }
	  default : {
	    MIRANOTIFY(Notify::INTERNAL, "Unknown seqtype? 12b.");
	  }
	  }
	}
	CEBUGF2("\tvalid1: " << actgroup.valid << '\n');


	maxreadingroupcount=max(maxreadingroupcount,static_cast<uint32>(actgroup.urdids.size()));
	minreadingroupcount=min(minreadingroupcount,static_cast<uint32>(actgroup.urdids.size()));
      }

      // Try to get rid of spurious mismatches

      // see whether we have something at all
      if(maxreadingroupcount>0){

#if CPP_READ_SEQTYPE_END != 8
#error "This code is made for 8 sequencing types, adapt!"
#endif

	switch(seqtype) {
	case ReadGroupLib::SEQTYPE_SANGER: {
	  // new: only if groupquality is lower 40
	  // TODO didum 40 config
	  //  bad idea! single reads never are >35-37 in base qual!
	  //  testing: lower 30 -> scrap

	  if(con_rt_params.con_disregard_spurious_rmb_mismatches
	     || con_rt_params.con_also_mark_gap_bases_needbothstrands){
	    if(con_rt_params.con_disregard_spurious_rmb_mismatches){
	      if(minreadingroupcount==1 && maxreadingroupcount>=10){
		for(auto & actgroup : groups_st_st[seqtype][strainid]) {
		  if(actgroup.valid==true
		     && actgroup.urdids.size()==1){
		    if(actgroup.groupquality < 30){
		      actgroup.valid=false;
		    }
		  }
		}
	      }
	      //CEBUGF2("numvalidgroups non-spurious: " << newprc.numvalidgroups << endl);
	    }


	    // look if we need to have two strand when a gap group is present
	    if(con_rt_params.con_also_mark_gap_bases_needbothstrands
	       && groups_st_st[seqtype][strainid].back().valid==true
	       && groups_st_st[seqtype][strainid].back().forwarddircounter>0
	       && groups_st_st[seqtype][strainid].back().complementdircounter>0){
	      // we still could have two valid base groups
	      uint32 numvalids=0;
	      for(auto & actgroup : groups_st_st[seqtype][strainid]) {
		if(actgroup.valid) ++numvalids;
	      }
	      if(numvalids>1) {
		// yes, two valid base groups. Ok, let's be conservative
		//  and first resolve the two valid base groups.
		// the gap group will probably be dealt with in a later iteration
		groups_st_st[seqtype][strainid].back().valid=false;
	      }else{
		// just one other base group
		// make sure it's double stranded. If not, it's not valid.
		for(auto & actgroup : groups_st_st[seqtype][strainid]) {
		  if(actgroup.forwarddircounter==0
		     || actgroup.complementdircounter==0){
		    actgroup.valid=false;
		  }
		}
	      }
	      //CEBUGF2("numvalidgroups gap both strands: " << newprc.numvalidgroups << endl);
	    }
	  }
	  break;
	}
	case ReadGroupLib::SEQTYPE_454GS20: {
	  // with 454 data, gap groups are presently not valid
	  // that's too ... unsure.
	  groups_st_st[seqtype][strainid].back().valid=false;
	  break;
	}
	case ReadGroupLib::SEQTYPE_IONTORRENT: {
	  // with IonTorrent data, gap groups are presently not valid
	  // that's too ... unsure.
	  groups_st_st[seqtype][strainid].back().valid=false;
	  break;
	}
	case ReadGroupLib::SEQTYPE_PACBIOLQ:
	case ReadGroupLib::SEQTYPE_PACBIOHQ: {
	  // TODO: PacBio LQ / HQ

	  // no info atm, say it's invalid (so that I can have a look)
	  groups_st_st[seqtype][strainid].back().valid=false;
	  break;
	}
	case ReadGroupLib::SEQTYPE_TEXT: {
	  // a bit like Sanger, but only working with fwd/rev
	  // TODO: check a bit more if really a good idea
	  if(con_rt_params.con_disregard_spurious_rmb_mismatches
	     || con_rt_params.con_also_mark_gap_bases_needbothstrands){

	    // look if we need to have two strand when a gap group is present
	    if(con_rt_params.con_also_mark_gap_bases_needbothstrands
	       && groups_st_st[seqtype][strainid].back().valid==true
	       && groups_st_st[seqtype][strainid].back().forwarddircounter>1
	       && groups_st_st[seqtype][strainid].back().complementdircounter>1){
	      // we still could have two valid base groups
	      uint32 numvalids=0;
	      for(auto & actgroup : groups_st_st[seqtype][strainid]) {
		if(actgroup.valid) ++numvalids;
	      }
	      if(numvalids > 1) {
		// yes, two valid base groups. Ok, let's be conservative
		//  and first resolve the two valid base groups.
		// the gap group will probably be dealt with in a later iteration
		groups_st_st[seqtype][strainid].back().valid=false;
	      }else{
		// just one other base group
		// make sure it's double stranded. If not, it's not valid.
		for(auto & actgroup : groups_st_st[seqtype][strainid]) {
		  if(actgroup.forwarddircounter==0
		     || actgroup.complementdircounter==0){
		    actgroup.valid=false;
		  }
		}
	      }
	      //CEBUGF2("numvalidgroups gap both strands: " << newprc.numvalidgroups << endl);
	    }
	  }
	  break;
	}
	case ReadGroupLib::SEQTYPE_SOLEXA: {
	  // Solexa ... hmmm, everything is valid atm

	  // TODO: on higher coverages, perhaps need at least 3?
	  //  especially if there's a gap?

	  //throw Notify(Notify::INTERNAL, THISFUNC, "Type Solexa needs more support 12c.");
	  break;
	}
	case ReadGroupLib::SEQTYPE_ABISOLID: {
	  MIRANOTIFY(Notify::INTERNAL, "Type ABI SOLiD needs more support 12c.");
	  break;
	}
	default : {
	  MIRANOTIFY(Notify::INTERNAL, "Unknown seqtype? 12c.");
	}
	}
      }

    }
  }

  FUNCEND();
}


/*************************************************************************
 *
 * Helper for new repeat marker
 *
 *************************************************************************/

void Contig::calcGroupQual(const nngroups_t & g)
{
  FUNCSTART("void Contig::calcGroupQual(const nngroups_t & g)");

  if(g.urdids.empty()){
    const_cast<nngroups_t &>(g).groupquality=0;
    return;
  }

  /* errorrate for this group is computed as follows:
     Best quality for a base in a direction makes basic rate = 100%
     add to this: 10% of next best base quality

     Same procedure for other direction, then add both qualities

     In general, the values are almost the same (mostly a tad higher) as
     with the more complicated (and time consuming) old variant.

     Cap at 90

     e.g.
     + A 30     -> 30       \
     + A 20     ->  2        \
     + A 20                  /+ = 32    \
     + A 20                 /            \
     .                                    > + = 60
     - A 26     -> 26     \              /
     - A 20     ->  2      >  + = 28    /
     - A 15               /
  */

  base_quality_t max1f=0;
  base_quality_t max2f=0;
  base_quality_t max1r=0;
  base_quality_t max2r=0;

  // find two highest values for each direction
  // using iterators in the loop below brings exactly nothing

  for(uint32 j=0; j<g.urdids.size(); ++j) {
    // testing for rails practically doubles the time needed in this loop, so let's go
    //  without. In practice, rails shouldn't have any quality at all anyway ... or a very low one
    //
    //if(CON_reads.getReadAtURDID(g.urdids[j]).isRail()) continue;

    if(g.directions[j]>0) {
      if(g.quals[j] >= max1f){
	max2f=max1f;
	max1f=g.quals[j];
      }else if(g.quals[j] > max2f){
	max2f=g.quals[j];
      }
    } else {
      if(g.quals[j] >= max1r){
	max2r=max1r;
	max1r=g.quals[j];
      }else if(g.quals[j] > max2r){
	max2r=g.quals[j];
      }
    }
  }

  // compute quality

  const_cast<nngroups_t &>(g).groupquality=max1f+max1r+(max2f+max2r)/10;
  if((g).groupquality>90) const_cast<nngroups_t &>(g).groupquality=90;

  return;
}








/*************************************************************************
 *
 * tags only the bases forming a valid group in a column
 * beside tagging the column and the consenus, this function now also
 *  makes the following:
 *
 *  read1  ......xaGGGtx.....
 *  read2  ......xa*GGtx.....
 *
 *  In case that we are tagging a column with gaps, it will create
 *   tags at the endpoints of basestretches in the reads
 *  In the example above: tags in read1 "aG" and "Gt" and tags in read2
 *   "a*G" and "Gt"
 *
 *************************************************************************/

//#define CEBUGF2(bla)  {cout << bla; cout.flush();}
//#define CEBUGF2(bla)


void Contig::nmpr_tagColumn(nnpos_rep_col_t & prc, const rcci_t & rcci, vector<bool> & readsmarkedsrm, repeatmarker_stats_t & repstats)
{
  FUNCSTART("void Contig::nmpr_tagColumn(nnpos_rep_col_t & prc, const rcci_t & rcci, vector<bool> & readsmarkedsrm, repeatmarker_stats_t & repstats)");

  CEBUGF2("See whether to tag " << prc);

  if(prc.tagged==false) {
    prc.tagged=true;
    CEBUGF2("Need a tag. " << multitag_t::getIdentifierStr(prc.type) << " adding tag to consensus at " << prc.contigpos << endl);

    multitag_t tmpmt(Read::REA_defaulttag_UNSr);
    tmpmt.identifier=prc.type;

    // First checking whether there are reads in the rcci that have a base
    //  like one of groupbases on the prc.
    //for(uint32 idsrcci=0; idsrcci < rcci.read_ids_in_col.size(); idsrcci++){
    for(const auto & pcrI : rcci.getPCRIsInCol()){
      if(pcrI.getORPID()==-1) continue;
      char base=pcrI.getBase(rcci.getContigPos());

      for(uint32 gbi=0; gbi<prc.groupbases.size(); ++gbi){
	if(base==prc.groupbases[gbi]){
	  prc.urdids.push_back(pcrI.getURDID());
	}
      }
    }

    if(prc.type == Read::REA_tagentry_idSRMr){
      repstats.numSRMs++;
    }else if(prc.type == Read::REA_tagentry_idWRMr){
      repstats.numWRMs++;
    }else if(prc.type == Read::REA_tagentry_idSIOr
	     || prc.type == Read::REA_tagentry_idSAOr
	     || prc.type == Read::REA_tagentry_idSROr){
      repstats.numSNPs++;
    }

    base_quality_t qualA=0;
    base_quality_t qualC=0;
    base_quality_t qualG=0;
    base_quality_t qualT=0;
    base_quality_t qualStar=0;

    bool hasgapinprc=false;
    for(uint32 gbi=0; gbi<prc.groupbases.size(); ++gbi){
      switch(prc.groupbases[gbi]) {
      case 'A': {
	qualA=prc.groupquals[gbi];
	break;
      }
      case 'C': {
	qualC=prc.groupquals[gbi];
	break;
      }
      case 'G': {
	qualG=prc.groupquals[gbi];
	break;
      }
      case 'T': {
	qualT=prc.groupquals[gbi];
	break;
      }
      case '*': {
	qualStar=prc.groupquals[gbi];
	hasgapinprc=true;
	break;
      }
      default: {}
      }

    }

    vector<char> runbases;
    runbases.reserve(4);
    if(hasgapinprc) {
      for(uint32 gbi=0; gbi<prc.groupbases.size(); ++gbi){
	if(prc.groupbases[gbi]!='*') runbases.push_back(prc.groupbases[gbi]);
      }
    }

    bool shorttagcomments=(*CON_miraparams)[0].getContigParams().con_shorttagcomments;

    if(prc.type==Read::REA_tagentry_idSRMr){
      tmpmt.comment=Read::REA_tagentry_coSRMr;
    }else if(prc.type== Read::REA_tagentry_idWRMr) {
      tmpmt.comment=Read::REA_tagentry_coWRMr;
    }else if(prc.type==Read::REA_tagentry_idSROr) {
      tmpmt.comment=Read::REA_tagentry_coSROr;
    }else if(prc.type==Read::REA_tagentry_idSAOr) {
      tmpmt.comment=Read::REA_tagentry_coSAOr;
    }else if(prc.type==Read::REA_tagentry_idSIOr) {
      tmpmt.comment=Read::REA_tagentry_coSIOr;
    }else{
      tmpmt.comment=Read::REA_tagentry_coUnknown;
    }

    if(shorttagcomments && tmpmt.comment != Read::REA_tagentry_coUnknown){
      tmpmt.comment=Read::REA_tagentry_coEmpty;
    }

    //for(uint32 idsj=0; idsj < prc.ids.size(); idsj++){
    for(auto & urdid : prc.urdids){
      if(CON_reads.getReadORPIDAtURDID(urdid) == -1) continue;
      auto pcrI =CON_reads.getIteratorOfReadpoolID(CON_reads.getReadORPIDAtURDID(urdid));

      int32 rawreadpos=pcrI.contigPos2RawReadPos(prc.contigpos);

      CEBUGF2("\tfor read " << pcrI->getName() << " at " << rawreadpos << endl);

      readsmarkedsrm[CON_reads.getReadORPIDAtURDID(urdid)]=true;

      try {
	if(hasgapinprc &&
	   (prc.type==Read::REA_tagentry_idSRMr
	    ||prc.type==Read::REA_tagentry_idWRMr)){

	  for(uint8 i=0; i<runbases.size(); i++){
	    // logic for the tagging of gapped groups
	    // this is somewhat sick ...
	    char searchbase=runbases[i];
	    if(CON_reads.getReadDirectionAtURDID(urdid) < 0) searchbase=dptools::getComplementBase(searchbase);
	    int32 arpu=0;
	    int32 arpl=0;

	    // tag the front of potential homopolymer
	    if(CON_reads.getReadDirectionAtURDID(urdid)<0){
	      arpu=pcrI->getUpperBoundPosOfBaseRun(rawreadpos,searchbase,true);
	      if(arpu > static_cast<int32>(pcrI->getLenSeq())-1) arpu=static_cast<int32>(pcrI->getLenSeq())-1;
	      arpl=pcrI->getLowerNonGapPosOfReadPos(arpu);
	      if(arpl<0) arpl=0;
	      ++arpu;
	      if(arpu > static_cast<int32>(pcrI->getLenSeq())-1) arpu=static_cast<int32>(pcrI->getLenSeq())-1;
	    }else{
	      arpl=pcrI->getLowerBoundPosOfBaseRun(rawreadpos,searchbase,true);
	      if(arpl<0) arpl=0;
	      arpu=pcrI->getUpperNonGapPosOfReadPos(arpl);
	      if(arpu > static_cast<int32>(pcrI->getLenSeq())-1) arpu=static_cast<int32>(pcrI->getLenSeq())-1;
	      if(arpl>0) --arpl;
	    }
	    tmpmt.from=arpl;
	    tmpmt.to=arpu;
	    const_cast<Read &>(*pcrI).addTagO(tmpmt);

	    // tag the back of potential homopolymer
	    if(CON_reads.getReadDirectionAtURDID(urdid) < 0){
	      arpl=pcrI->getLowerBoundPosOfBaseRun(rawreadpos,searchbase,true);
	      if(arpl<0) arpl=0;
	      arpu=pcrI->getUpperNonGapPosOfReadPos(arpl);
	      if(arpu > static_cast<int32>(pcrI->getLenSeq())-1) arpu=static_cast<int32>(pcrI->getLenSeq())-1;
	      if(arpl> 0) --arpl;
	    }else{
	      arpu=pcrI->getUpperBoundPosOfBaseRun(rawreadpos,searchbase,true);
	      if(arpu > static_cast<int32>(pcrI->getLenSeq())-1) arpu=static_cast<int32>(pcrI->getLenSeq())-1;
	      arpl=pcrI->getLowerNonGapPosOfReadPos(arpu);
	      if(arpl<0) arpl=0;
	      ++arpu;
	      if(arpu > static_cast<int32>(pcrI->getLenSeq())-1) arpu=static_cast<int32>(pcrI->getLenSeq())-1;
	    }
	    tmpmt.from=arpl;
	    tmpmt.to=arpu;
	  }
	}else{
	  tmpmt.from=rawreadpos;
	  tmpmt.to=rawreadpos;
	}
	const_cast<Read &>(*pcrI).addTagO(tmpmt);
      }
      catch (...) {
	cout << "Tag failed?!?\n";
      }
    }

    string contigtype=multitag_t::getIdentifierStr(prc.type).substr(0,3)+'c';
    addTagToConsensus(prc.contigpos,
		      prc.contigpos,
		      '=',
		      contigtype.c_str(),
		      "",
		      true,
		      true,
		      qualA, qualC, qualG, qualT, qualStar);
  }

  FUNCEND();
}




/*************************************************************************
 *
 *
 *
 *************************************************************************/

//#define CEBUG(bla)   {cout << bla; cout.flush(); }

void Contig::markFeaturesByConsensus(bool markSNPs, bool markUNSs, bool markIUPACs)
{
  FUNCSTART("void Contig::markFeaturesByConsensus(bool markUNSs, bool markIUPACs)")

  cout << "Searching for: ";
  if(markSNPs){
    cout << "SROs ";
  }
  if(markUNSs){
    cout << "UNSs ";
  }
  if(markIUPACs){
    cout << "IUPACs";
  }

  cout << ", preparing needed data: sorting tags ... "; cout.flush();

  const contig_parameters & con_params=(*CON_miraparams)[0].getContigParams();
  sortConsensusTags();
  cout << "done" << endl;

  //CEBUG("fetching overall consensus\n");
  //string consseq;
  //vector<base_quality_t> consqual;
  //getConsensus(consseq, consqual, false, 1,0);

  vector<string> strainseq(ReadGroupLib::getNumOfStrains());
  vector<vector<base_quality_t> > strainqual(ReadGroupLib::getNumOfStrains());

  for(uint32 sid=0; sid<ReadGroupLib::getNumOfStrains(); ++sid){
    strainseq[sid].reserve(getContigLength());
    strainqual[sid].reserve(getContigLength());
  }

  calcConsensi();
  for(uint32 sid=0; sid<ReadGroupLib::getNumOfStrains(); ++sid){
    cout << "fetching consensus for strain" << sid << " ...";cout.flush();
    newConsensusGet(strainseq[sid], strainqual[sid], sid);
    //cout << "I'm back" << endl;

    // it's possible that the sequences we got are entirely made out
    //  of X and *... when they are never covered by the given strain
    // In this case, just set the whole sequence to "?"

    bool hasseq=false;
    for(uint32 acp=0; acp<strainseq[sid].size(); ++acp){
      if(strainseq[sid][acp]!='X' && strainseq[sid][acp]!='*' ) {
	hasseq=true;
	break;
      }
    }

    //cout << "hasseq: " << hasseq << endl;

    if(!hasseq){
      cout << "(strain " << sid << " empty.) ..."; cout.flush();
      for(uint32 acp=0; acp<strainseq[sid].size(); ++acp){
	strainseq[sid][acp]='?';
      }
    }
  }

  vector<uint8> cacheIUPctag(getContigLength(),0);
  vector<uint8> cacheSROSIOSAOcctags(getContigLength(),0);

  for(const auto & ctag : CON_consensus_tags){
    if(ctag.identifier==Contig::CON_tagentry_idIUPc
       || ctag.identifier==Contig::CON_tagentry_idSROc
       || ctag.identifier==Contig::CON_tagentry_idSIOc
       || ctag.identifier==Contig::CON_tagentry_idSAOc){
      for(uint32 ctp=ctag.from; ctp<=ctag.to; ++ctp){
	if(ctag.identifier==Contig::CON_tagentry_idIUPc){
	  cacheIUPctag[ctp]=1;
	}
	if(ctag.identifier==Contig::CON_tagentry_idSROc
	   || ctag.identifier==Contig::CON_tagentry_idSIOc
	   || ctag.identifier==Contig::CON_tagentry_idSAOc){
	  cacheSROSIOSAOcctags[ctp]=1;
	}
      }
    }
  }

  cout << "done.\nStarting search:" << endl;

  rcci_t rcci(this);
  {
    vector<int32> allowedstrainids;
    // empty is == all ids
    vector<uint8> allowedreadtypes;
    rcci.init(allowedstrainids,
	      allowedreadtypes,
	      true,           // take rails
	      true,           // take backbones
	      true);   /* and also reads without readpool-reads like
			  the merged solexa reads */
  }

  string strengthcomment;
  string straincomment;
  int32 mcfrom=-1;
  int32 mcto=-1;

  multitag_t tmpmt(Read::REA_defaulttag_UNSr);

  for(uint32 acp=0; acp<strainseq[0].size(); rcci.advance(), ++acp){
    bool foundsomething=false;
    //bool foundsomethingweak=false;
    bool hasSNP=false;
    bool missingcov=false;
    bool makeitUNS=false;
    bool hadoutput=false;

    strengthcomment.clear();

    for(uint32 sid1=0; sid1<ReadGroupLib::getNumOfStrains(); ++sid1){
      char sid1b=toupper(strainseq[sid1][acp]);
      if(sid1b=='?') continue;
      if(markIUPACs && sid1b!='X' && !dptools::isValidStarBase(sid1b)){

	if(!cacheIUPctag[acp]){
	  straincomment="Strain '"+ReadGroupLib::getStrainOfStrainID(sid1)+"': "+sid1b;
	  addTagToConsensus(rcci.getContigPos(),
			    rcci.getContigPos(),
			    '=',
			    multitag_t::getIdentifierStr(CON_tagentry_idIUPc).c_str(),
			    straincomment.c_str(),
			    false);
	  CEBUG("IUPAC Tag: " << straincomment << '\n');
	}
      }
      if(sid1b=='N' || !(markUNSs||markSNPs)) continue;
      for(uint32 sid2=sid1+1; sid2<ReadGroupLib::getNumOfStrains(); ++sid2){
	char sid2b=toupper(strainseq[sid2][acp]);
	if(sid2b=='?' || sid2b=='N') continue;
	//CEBUG("acp: " << acp << "\tstrain[" << sid1 << "] <-> strain[" << sid2 << "]: ");
	//CEBUG(sid1b << " (" << static_cast<uint16>(strainqual[sid1][acp]) << ")\t");
	//CEBUG(sid2b << " (" << static_cast<uint16>(strainqual[sid2][acp]) << ")\n");
	if(sid1b=='X' || sid2b=='X'){
	  CEBUG("acp: " << acp << "\tmissing cov [" << sid1 << "] <-> strain[" << sid2 << "]: ");
	  CEBUG(sid1b << " (" << static_cast<uint16>(strainqual[sid1][acp]) << ")\t");
	  CEBUG(sid2b << " (" << static_cast<uint16>(strainqual[sid2][acp]) << ")");
	  hadoutput=true;

	  foundsomething=true;
	  missingcov=true;
	}else if(sid1b != sid2b) {
	  CEBUG("acp: " << acp << "\tmismatch strain[" << sid1 << "] <-> strain[" << sid2 << "]: ");
	  CEBUG(sid1b << " (" << static_cast<uint16>(strainqual[sid1][acp]) << ")\t");
	  CEBUG(sid2b << " (" << static_cast<uint16>(strainqual[sid2][acp]) << ")");
	  hadoutput=true;
	  hasSNP=true;

	  // if it's not a clean thing (ACGT or *),
	  //  it's probably a repeat where the
	  //  backbone does not have all the copies resolved.
	  if(!dptools::isValidStarBase(sid1b)
	     || !dptools::isValidStarBase(sid2b)) makeitUNS=true;

	  if(strainqual[sid1][acp]>=con_params.con_mingroupqualforrmbtagging
	     && strainqual[sid2][acp]>=con_params.con_mingroupqualforrmbtagging){
	    CEBUG("\tsomething");
	    foundsomething=true;
	    strengthcomment="high";
	    break;
	  }
	  if(strainqual[sid1][acp]>=con_params.con_mingroupqualforrmbtagging/2
	     && strainqual[sid2][acp]>=con_params.con_mingroupqualforrmbtagging/2){
	    CEBUG("\tsomething medium");
	    foundsomething=true;
	    strengthcomment="medium";
	    break;
	  }
	  // TODO: testing
	  // Looks like a weak difference
	  CEBUG("\tsomething weak");
	  foundsomething=true;
	  strengthcomment="weak";
	  break;
	}
      }
    }

    if(foundsomething){
      if(hasSNP){
	//vector<consensustag_t>::const_iterator ctI=CON_consensus_tags.begin();

	bool hasconsSNPtag=false;
	bool hasconsUNStag=false;
	if(cacheSROSIOSAOcctags[acp]) hasconsSNPtag=true;
	if(cacheIUPctag[acp]) hasconsUNStag=true;

	if(!markSNPs){
	  CEBUG("forcing UNS");
	  makeitUNS=true;
	}
	if(!hasconsSNPtag && !hasconsUNStag){
	  if(makeitUNS){
	    CEBUG("\ttag new UNS");
	  }else{
	    CEBUG("\ttag new SROc");
	  }
	}

	//for(uint32 idsj=0; idsj < rcci.read_ids_in_col.size(); idsj++){
	for(auto & pcrI : rcci.getPCRIsInCol()){
	  //int32 actreadid=rcci.read_ids_in_col[idsj];
	  //contigread_t & ric =CON_reads[actreadid];

	  int32 rawreadpos=pcrI.contigPos2RawReadPos(rcci.getContigPos());

	  tmpmt.from=rawreadpos;
	  tmpmt.to=rawreadpos;
	  tmpmt.setCommentStr(strengthcomment);
	  if(makeitUNS){
	    //pcrI->addTagO(rawreadpos, rawreadpos, Read::REA_tagentry_idUNSr, strengthcomment);
	    tmpmt.identifier=Read::REA_defaulttag_UNSr.identifier;
	    const_cast<Read &>(*pcrI).addTagO(tmpmt);
	  }else{
	    //pcrI->addTag(rawreadpos, rawreadpos, Read::REA_tagentry_idSROr, strengthcomment);
	    tmpmt.identifier=Read::REA_defaulttag_SROr.identifier;
	    const_cast<Read &>(*pcrI).addTagO(tmpmt);
	  }
	}
	if(!hasconsSNPtag && !hasconsUNStag){
	  if(makeitUNS){
	    addTagToConsensus(rcci.getContigPos(),
			      rcci.getContigPos(),
			      '=',
			      multitag_t::getIdentifierStr(CON_tagentry_idUNSc).c_str(),
			      strengthcomment.c_str(),
			      false);
	  }else{
	    addTagToConsensus(rcci.getContigPos(),
			      rcci.getContigPos(),
			      '=',
			      multitag_t::getIdentifierStr(CON_tagentry_idSROc).c_str(),
			      strengthcomment.c_str(),
			      true);
	  }
	}
      }else if(missingcov){
	if(mcfrom<0){
	  mcfrom=static_cast<int32>(rcci.getContigPos());
	  mcto=mcfrom;
	}else if(static_cast<int32>(rcci.getContigPos()) == mcto+1){
	  mcto=static_cast<int32>(rcci.getContigPos());
	}else{
	  addTagToConsensus(mcfrom,
			    mcto,
			    '=',
			    multitag_t::getIdentifierStr(CON_tagentry_idMCVc).c_str(),
			    "",
			    false);
	  mcfrom=static_cast<int32>(rcci.getContigPos());
	  mcto=mcfrom;
	}
      }
    }
    if(hadoutput) CEBUG('\n');
  }
  if(mcfrom>=0){
    addTagToConsensus(mcfrom,
		      mcto,
		      '=',
		      multitag_t::getIdentifierStr(CON_tagentry_idMCVc).c_str(),
		      "",
		      false);
  }

  cout << "done with search" << endl;

  {
    vector<multitag_t::mte_id_t> master;
    vector<multitag_t::mte_id_t> unwanted;

    master.push_back(Contig::CON_tagentry_idSROc);
    master.push_back(Contig::CON_tagentry_idSIOc);
    master.push_back(Contig::CON_tagentry_idSAOc);
    master.push_back(Contig::CON_tagentry_idSRMc);
    master.push_back(Contig::CON_tagentry_idWRMc);
    unwanted.push_back(Contig::CON_tagentry_idIUPc);
    unwanted.push_back(Contig::CON_tagentry_idUNSc);
    unwanted.push_back(Contig::CON_tagentry_idSTMS);
    unwanted.push_back(Contig::CON_tagentry_idSTMU);
    reduceConsensusTags(master,unwanted);

    master.clear();
    unwanted.clear();
    master.push_back(Contig::CON_tagentry_idSROc);
    master.push_back(Contig::CON_tagentry_idSIOc);
    master.push_back(Contig::CON_tagentry_idSAOc);
    unwanted.push_back(Contig::CON_tagentry_idSRMc);
    unwanted.push_back(Contig::CON_tagentry_idWRMc);
    reduceConsensusTags(master,unwanted);

    master.clear();
    unwanted.clear();
    master.push_back(Contig::CON_tagentry_idWRMc);
    unwanted.push_back(Contig::CON_tagentry_idSRMc);
    reduceConsensusTags(master,unwanted);
  }

  FUNCEND();

  return;
}

//#define CEBUG(bla)


/*************************************************************************
 *
 * If several consensus tags are at the same position, deletes the unwanted ones
 *
 *************************************************************************/

void Contig::reduceConsensusTags(const vector<multitag_t::mte_id_t> & mastertags, const vector<multitag_t::mte_id_t> & deletecandidates)
{
  sortConsensusTags();

  vector<vector<consensustag_t>::iterator> tagstodelete;

  vector<consensustag_t>::iterator ctI=CON_consensus_tags.begin();
  uint32 acttag=0;
  for(; ctI != CON_consensus_tags.end(); ){
    uint32 cpos=ctI->from;
    //cout << "looking: " << cpos << endl;
    vector<consensustag_t>::iterator rangeE=ctI;
    bool hasmaster=false;
    for(; rangeE != CON_consensus_tags.end() && rangeE->from == cpos; ++rangeE) {
      if(!hasmaster){
	for(uint32 mti=0; mti<mastertags.size(); ++mti){
	  if(rangeE->identifier == mastertags[mti]) {
	    hasmaster=true;
	    break;
	  }
	}
      }
    }

    //cout << "hasmaster: " << hasmaster << endl;
    if(hasmaster){
      vector<consensustag_t>::iterator rangeS=ctI;
      for(; rangeS != rangeE; ++rangeS, ++acttag) {
	for(uint32 dci=0; dci<deletecandidates.size(); dci++){
	  //cout << "delcan: " << deletecandidates[dci] << endl;
	  if(rangeS->identifier == deletecandidates[dci]) {
	    tagstodelete.push_back(rangeS);
	    //cout << "To delete: " << *rangeS << endl;
	    break;
	  }
	}
      }
    }
    ctI=rangeE;
  }
  while(!tagstodelete.empty()){
    CON_consensus_tags.erase(tagstodelete.back());
    tagstodelete.pop_back();
  }
}




/*************************************************************************
 *
 * 1) Goes through each contig position in each sequencing type and stores
 *    whether the coverage of that sequencing type reaches a threshold
 *    (must have an initial start of minimum length, can have grace
 *    length of bases below once started
 * 2) If maxcovperread not empty: also set multicopies[] to 1 if
 *    maxcoverage of read exceeds covperst[straintype] * -AS:ardct
 *    (in the multicopies array)
 * 3) Stores the maximum coverage reached by each read
 *  involved with into the array. It does this on a per seqtype basis
 *
 *
 * returns:
 *
 *************************************************************************/

//#define CEBUG(bla)   {cout << bla; cout.flush(); }

bool Contig::analyseReadCoverage(vector<uint32> & maxcovperread, vector<uint8> & multicopies, vector<uint32> covperst)
{
  FUNCSTART("void Contig::analyseReadCoverage(vector<uint32> & maxcov)");

  bool somethingchanged=false;

  CEBUG("Additional coverage analysis.\n");

  BUGIFTHROW(maxcovperread.size() != CON_readpool->size(), "maxcovperread.size() != CON_readpool->size() ???");

  vector< vector<uint8> > newmulticopies_perst(ReadGroupLib::SEQTYPE_END);
  for(auto & nmps : newmulticopies_perst){
    nmps.resize(CON_counts.size(),0);
  }

  // transform covperseqtype effectively in a threshold for detection
  //  of multicopies
  for(uint32 cpsi=0; cpsi < covperst.size(); ++cpsi){
    covperst[cpsi]=static_cast<uint32>(
      static_cast<double>(covperst[cpsi])*((*CON_miraparams)[cpsi]).getAssemblyParams().as_ard_multicopythreshold
      );
    CEBUG("threshold(" << cpsi << "): " << covperst[cpsi] << endl);
  }

  if(!covperst.empty()){
    for(uint32 seqtype=0; seqtype<ReadGroupLib::SEQTYPE_END; ++seqtype){
      int32 actlen=0;
      int32 actgrace=0;

      int32 mcstart=-1;
      int32 mcend=-1;
      bool hasvalidmc=false;

      auto ccI=CON_counts.begin();
      for(uint32 actcontigpos=0; actcontigpos<CON_counts.size(); ++ccI, ++actcontigpos){

	CEBUG("acp: " << actcontigpos << "\tac: " << ccI->seqtype_cov[seqtype] << '\n');
	if(covperst[seqtype]>0 && ccI->seqtype_cov[seqtype] >= covperst[seqtype]) {
	  if(mcstart>=0) {
	    CEBUG("\t#1\n");
	    mcend=1+static_cast<int32>(actcontigpos);
	    actgrace=0;
	    actlen++;
	    if(actlen>=((*CON_miraparams)[seqtype]).getAssemblyParams().as_ard_multicopyminlen) hasvalidmc=true;
	  } else {
	    CEBUG("\t#2\n");
	    mcstart=static_cast<int32>(actcontigpos);
	    mcend=mcstart+1;
	    actgrace=0;
	    actlen=1;
	  }
	}else{
	  if(hasvalidmc){
	    CEBUG("\t#3\n");
	    actgrace++;
	    if(actgrace>=((*CON_miraparams)[seqtype]).getAssemblyParams().as_ard_multicopygrace){
	      CEBUG("\t#4\n");
	      if(mcend-mcstart>=((*CON_miraparams)[seqtype]).getAssemblyParams().as_ard_multicopyminlen){
		CEBUG("\t#5\n");
		for(int32 cp=mcstart; cp<mcend; cp++){
		  CEBUG("new mc pos: " << cp << '\n');
		  newmulticopies_perst[seqtype][cp]=1;
		}
	      }
	      mcstart=-1;
	      mcend=-1;
	      actlen=0;
	      actgrace=0;
	      hasvalidmc=false;
	    }
	  }else{
	    CEBUG("\t#6\n");
	    mcstart=-1;
	    mcend=-1;
	    actlen=0;
	    actgrace=0;
	    hasvalidmc=false;
	  }
	}
      }
      if(hasvalidmc){
	if(mcend-mcstart>=((*CON_miraparams)[seqtype]).getAssemblyParams().as_ard_multicopyminlen){
	  CEBUG("\t#7\n");
	  for(int32 cp=mcstart; cp<mcend; cp++){
	    CEBUG("new mc pos: " << cp << '\n');
	    newmulticopies_perst[seqtype][cp]=1;
	  }
	}
      }
    }
  }


  // now go through the vector and mark all reads at positions
  //  with new mc values

  for(uint32 seqtype=0; seqtype<ReadGroupLib::SEQTYPE_END; ++seqtype){
    rcci_t rcci(this);
    {
      vector<int32> allowedstrainids; // empty == all
      vector<uint8> allowedreadtypes; // empty == all
      allowedreadtypes.push_back(seqtype);
      rcci.init(allowedstrainids,
		allowedreadtypes,
		false,           // no rails
		false,          // no backbones
		false);   // no reads without readpool-reads
    }

    auto ccI=CON_counts.begin();
    for(uint32 actcontigpos=0; actcontigpos<CON_counts.size(); ++ccI, rcci.advance(), ++actcontigpos){
      //for(uint32 idsrcci=0; idsrcci < rcci.read_ids_in_col.size(); idsrcci++){
      for(auto & pcrI : rcci.getPCRIsInCol()){
	if(pcrI.getORPID() == -1) continue;
	if(ccI->seqtype_cov[seqtype]>maxcovperread[pcrI.getORPID()]){
	  maxcovperread[pcrI.getORPID()]=ccI->seqtype_cov[seqtype];
	  CEBUG(pcrI->getName() << " new maxcov: " << maxcovperread[pcrI.getORPID()] << '\n');
	  if(covperst.empty()){
	    somethingchanged=true;
	  }
	}
	if(pcrI->getSequencingType() != seqtype) continue;
	if(newmulticopies_perst[seqtype][actcontigpos]){
	  if(multicopies[pcrI.getORPID()]) {
	    //CEBUG(pcrI->getName() << " again multicopy " << maxcovperread[pcrI.getORPID()] << '\n');
	  }else{
	    CEBUG(pcrI->getName() << " new multicopy " << maxcovperread[pcrI.getORPID()] << '\n');
	  }
	  multicopies[pcrI.getORPID()]=1;
	  somethingchanged=true;
	}
      }
    }
  }

  CEBUG("Done (Additional coverage analysis).\n");

  FUNCEND();

  return somethingchanged;
}

//#define CEBUG(bla)



// given a read, a pos and a base, returns as return value
//  - how long the poly base stretch is (in real number of bases)
// in a reference var
//  - the span of the stretch including gaps in "span"
//  - zeroqualcounts counts bases with a quality of 0,

// Remark: jumps over gaps!
// Result can also be 0:
//  - when run arrives at one of the ends
//  - when there is no run of this base at that position

uint32 Contig::getBaseRunLength(const Read & read, const uint32 readpos, const char base, uint32 & from, uint32 & to, uint32 & zeroqualcounts, bool useclipsasends)
{
  FUNCSTART("uint32 Contig::getBaseRunLength(const Read & read, const uint32 readpos, const char base, uint32 & from, uint32 & to, uint32 & zeroqualcounts, bool useclipsasends)");

  // initialise return by reference variables
  //span=0;
  zeroqualcounts=0;
  from=0;
  to=0;

  // is the base at that position what we need to check for?
  // if not, well, return
  char checkbase=toupper(read.getBaseInSequence(readpos));
  if((dptools::isValidACGTBase(checkbase) && checkbase!=base)){
    CEBUG("getBaseRunLength(): not on base\n");
    return 0;
  }

  // should be zero, 2 for testing!
  uint8 zeroqual=0;

  // check how long the stretch of char base is to the left
  uint32 leftruncounter=0;
  {
    int32 rrpi=readpos-1;

    for(; rrpi>=0; --rrpi, ++leftruncounter){
      if(read.getBaseInSequence(rrpi) == '*'){
	--leftruncounter;
      }else{
	if(toupper(read.getBaseInSequence(rrpi)) != base) break;
	if(read.getQualityInSequence(rrpi)<=zeroqual) ++zeroqualcounts;
      }
    }

    // don't count when this run was on border of read
    if((useclipsasends && rrpi<=read.getLeftClipoff())
       || (!useclipsasends && rrpi<0)){
      zeroqualcounts=0;
      CEBUG("getBaseRunLength(): left border\n");
      return 0;
    }

    from=rrpi+1;
  }

  // check how long the stretch of char is to the right
  uint32 rightruncounter=0;
  {
    int32 rrpi=readpos;
    for(; static_cast<uint32>(rrpi)<read.getLenSeq(); ++rrpi, ++rightruncounter){
      if(read.getBaseInSequence(rrpi) == '*'){
	--rightruncounter;
      }else{
	if(toupper(read.getBaseInSequence(rrpi)) != base) break;
	if(read.getQualityInSequence(rrpi)<=zeroqual) ++zeroqualcounts;
      }
    }
    // don't count when this run was on border of read
    if((useclipsasends && static_cast<uint32>(rrpi)>=read.getRightClipoff())
       || (!useclipsasends && static_cast<uint32>(rrpi)>=read.getLenSeq())) {
      zeroqualcounts=0;
      from=0;
      CEBUG("getBaseRunLength(): right border\n");
      return 0;
    }

    to=rrpi-1;
    //span=rrpi-leftborder-1;
  }

  uint32 totalruncounter=leftruncounter+rightruncounter;

  CEBUG("left run: " << leftruncounter << endl);
  CEBUG("right run: " << rightruncounter << endl);
  CEBUG("total run: " << totalruncounter << endl);
  CEBUG("zeroqualcounts: " << zeroqualcounts << endl);

  FUNCEND();

  return totalruncounter;
}



/*************************************************************************
 *
 * Go through contig and searches whether there are "gap patterns"
 *  for gap sizes >= 3 (to account for high homopolymer variability
 *  in 454 data)
 *
 * When found, mark the reads with SRMr markers
 *
 * E.g.
 *       .........xxxxxx******xxxxx.............
 *       .........xxxxxx******xxxxx.............
 *       .........xxxxxxxxxxxxxxxxx.............
 *       .........xxxxxx******xxxxx.............
 *       .........xxxxxxxxxxxxxxxxx.............
 *       .........xxxxxxxxxxxxxxxxx.............
 *       .........xxxxxx******xxxxx.............
 *
 * Really tricky due to those damn homopolymers of 454 which can vary up to
 *  5 or more bses for longer runs.
 *
 * TODO??? Mark as SNP (different types) if the difference is separated by strain
 *
 *************************************************************************/


//#define CEBUG(bla)   {cout << bla; cout.flush(); }
//#define CEBUGF2(bla)   {cout << bla; cout.flush(); }


void Contig::largeGapRepeatMarker(uint32 mingaplen, uint32 mingroupsize, bool needbothstrands)
{
  FUNCSTART("void Contig::largeGapRepeatMarker(uint32 mingaplen, uint32 mingroupsize, bool needbothstrands)");

  if(mingroupsize==0) mingroupsize=1;
  if(mingaplen==0) mingaplen=1;

  rcci_t rcci(this);
  {
    vector<int32> allowedstrainids; // empty == all
    vector<uint8> allowedreadtypes; // empty == all
    rcci.init(allowedstrainids,
	      allowedreadtypes,
	      false,           // no rails
	      false,          // no backbones
	      false);   // no reads without readpool-reads
  }

  vector<uint32> gapruns;
  gapruns.reserve(1000);

  // check for homopolymer runs
  vector<char> checkbases;
  checkbases.push_back('A');
  checkbases.push_back('C');
  checkbases.push_back('G');
  checkbases.push_back('T');


  uint32 jumpover=0;  // once a gap are was found, loop through the for loop until after the gap area when >0
  auto ccI=CON_counts.begin();
  for(uint32 actcontigpos=0; actcontigpos<CON_counts.size(); ++actcontigpos, ++ccI, rcci.advance()){
    CEBUGF2("lgrm acp: " << actcontigpos << '\n');
    if(jumpover) {
      --jumpover;
      continue;
    }
    CEBUGF2(*ccI);
    if(ccI->total_cov < 2*mingroupsize) continue;
    if(ccI->star < 4*mingroupsize) continue;
    uint32 numvalidgroups=1;
    if(ccI->A >= 4*mingroupsize) ++numvalidgroups;
    if(ccI->C >= 4*mingroupsize) ++numvalidgroups;
    if(ccI->G >= 4*mingroupsize) ++numvalidgroups;
    if(ccI->T >= 4*mingroupsize) ++numvalidgroups;
    if(numvalidgroups<2) continue;

    CEBUG("lgrm check: " << actcontigpos << '\n');

    // first, find out how the most probable base counts of
    //  each base on both sides of the area
    vector<vector<uint32> > hpruncount(checkbases.size());
    vector<uint32> mostfrequenthprun(checkbases.size(),0);
    vector<bool> ishomopolymer;
    for(auto & tpcrI : rcci.getPCRIsInCol()){
      if(tpcrI.getORPID() == -1
	 || tpcrI->isBackbone()
	 || tpcrI->isRail()) continue;

      int32 rawrpos=tpcrI.contigPos2RawReadPos(actcontigpos);
      char ricbase=tpcrI.getBase(actcontigpos);
      CEBUG("rn " << tpcrI->getName()<< "\t" << ricbase << '\n');

      for(uint32 cbi=0; cbi<checkbases.size(); cbi++){
	uint32 from,to,zeroqualcounts;
	char checkbase=checkbases[cbi];
	if(tpcrI.getReadDirection() < 0) checkbase=dptools::getComplementBase(checkbase);
	uint32 runlen=getBaseRunLength(*tpcrI,
				       rawrpos,
				       checkbase,
				       from,to,zeroqualcounts);
	CEBUGF2("cbi: " << cbi << "\t" << runlen << '\n');
	if(runlen>=hpruncount[cbi].size()) {
	  CEBUGF2("Extend to " << runlen << '\n');
	  for(uint32 i=0; i<checkbases.size(); i++) hpruncount[i].resize(runlen+1,0);
	}
	++hpruncount[cbi][runlen];
	CEBUGF2("hpruncount[cbi][runlen]: " << hpruncount[cbi][runlen] << '\n');
	CEBUGF2("mostfrequenthprun[cbi]: " << mostfrequenthprun[cbi] << '\n');
	if(hpruncount[cbi][runlen]>hpruncount[cbi][mostfrequenthprun[cbi]]){
	  mostfrequenthprun[cbi]=runlen;
	}
      }
    }

    for(uint32 cbi=0; cbi<checkbases.size(); ++cbi){
      CEBUG("mostfrequenthprun[" << cbi << "]: " << mostfrequenthprun[cbi]);
      CEBUG("\tfrequency: " << hpruncount[cbi][mostfrequenthprun[cbi]] << '\n');

      ishomopolymer.push_back(mostfrequenthprun[cbi]>=4);
    }

    // the gapcorrector reduces the number of "seen" gaps
    //
    // ..........AAAAAAA..........
    // ..........**AAAAA..........
    // ..........*AAAAAA..........
    // ..........*AAAAAA..........
    // ..........*AAAAAA..........
    //
    // for each homoply runlength larger the expected rl, increase the
    //  gapcorrector by one if less than 1/10 of the reads support this
    //  larger length

    uint32 gapcorrector=0;
    for(uint32 hplen=hpruncount[0].size()-1; hplen>0; --hplen){
      bool docorrect=false;
      for(uint32 cbi=0; cbi<checkbases.size(); ++cbi){
	//if(ishomopolymer[cbi]){
	  if(hpruncount[cbi][hplen]>0 && hpruncount[cbi][hplen]<hpruncount[cbi][mostfrequenthprun[cbi]]/10){
	    CEBUG("Correcting because of " << hplen << ", " << cbi << '\t' << hpruncount[cbi][hplen] << '\n');
	    docorrect=true;
	    break;
	  }
	  //}
      }
      if(docorrect) {
	++gapcorrector;
      }else{
	break;
      }
    }

    CEBUG("gapcorrector: " << gapcorrector << '\n');

    // find out how many reads have at least the minimum gap size
    // if there's a gap, the reads must also have the minimum homopolymer
    //  size which is the one of the most frequent from above

    uint32 readswithgaps=0;
    uint32 readswithnogaps=0;
    uint32 mostfrequentgapsize=0;
    bool hasgapsforward=false;
    bool hasgapsreverse=false;
    gapruns.resize(1);
    gapruns[0]=0;

    for(auto & tpcrI : rcci.getPCRIsInCol()){
      if(tpcrI.getORPID() == -1) continue;

      int32 rawrpos=tpcrI.contigPos2RawReadPos(actcontigpos);
      char ricbase=tpcrI.getBase(actcontigpos);
      CEBUG("rn " << tpcrI->getName()<< "\t" << ricbase << '\n');

      if(ricbase=='*'){
	bool cantake=true;

	// if it's categorised as homopolymer and the length is lower
	//  than the average estimate, don't take
	// if it's not categorised as homopolymer and the length is
	//  is larger than avg len +2, don't take
	for(uint32 cbi=0; cbi< checkbases.size(); ++cbi){
	  char base =checkbases[cbi];
	  if(tpcrI.getReadDirection() < 0) base=dptools::getComplementBase(base);
	  uint32 from,to,zeroqualcounts;
	  uint32 hprunlen=getBaseRunLength(*tpcrI,
					   rawrpos,
					   base,
					   from,to,zeroqualcounts);
	  CEBUG("hprunlen[" << cbi << "]: " << hprunlen << '\n');
	  if(ishomopolymer[cbi]){
	    if(hprunlen<mostfrequenthprun[cbi]) {
	      cantake=false;
	      CEBUG("Homopolymer to short\n");
	    }
	  }else{
	    if(hprunlen>mostfrequenthprun[cbi] + 2) {
	      cantake=false;
	      CEBUG("Too many new homoplymer bases.\n");
	    }
	  }
	}
	if(cantake){
	  uint32 gaprunlen=tpcrI->getLenOfGapRun(rawrpos);
	  CEBUG("grl " << gaprunlen << '\n');
	  if(gapcorrector>gaprunlen) {
	    gaprunlen=0;
	  }else{
	    gaprunlen-=gapcorrector;
	  }
	  CEBUG("grlc " << gaprunlen << '\n');

	  if(gaprunlen>=gapruns.size()) gapruns.resize(gaprunlen+1,0);
	  ++gapruns[gaprunlen];
	  if(gapruns[gaprunlen]>gapruns[mostfrequentgapsize]) mostfrequentgapsize=gaprunlen;

	  if(gaprunlen>=mingaplen) {
	    ++readswithgaps;
	    if(tpcrI.getReadDirection() > 0) {
	      hasgapsforward=true;
	    }else{
	      hasgapsreverse=true;
	    }
	  }
	}else{
	  ++readswithnogaps;
	}
      }
    }


    uint32 maxgaprunlen=gapruns.size();
    if(maxgaprunlen>0) --maxgaprunlen;

    CEBUG("readsincol: " << rcci.getPCRIsInCol().size() << '\n');
    CEBUG("readswithgaps: " << readswithgaps << "\treadswithnogaps: " << readswithnogaps << '\n');
    CEBUG("hasgapsforward: " << hasgapsforward << "\thasgapsreverse: " << hasgapsreverse << '\n');
    CEBUG("maxgaprunlen: " << maxgaprunlen << '\n');
    CEBUG("most frequent gap size: " << mostfrequentgapsize << "\toccuring: " << gapruns[mostfrequentgapsize]<<'\n');

    if(readswithgaps<mingroupsize) continue;
    if(needbothstrands && !(hasgapsforward && hasgapsreverse)) continue;


    // so, basically this looks like a good place to mark

    CEBUG("Tagging: " << actcontigpos << " to " << actcontigpos+maxgaprunlen << '\n');

    addTagToConsensus(actcontigpos,actcontigpos+maxgaprunlen,
    		      '=',"Fcon","",true);

    jumpover=maxgaprunlen;

  }

  FUNCEND();
}





/*************************************************************************
 *
 * Helper (currently only for codonSingleBaseRepeatMarker)
 *
 * First fill in of groups (per sequencing type per strain).
 *  Don't look at qualities
 *
 *************************************************************************/

//#define CEBUGF2(bla)  {cout << bla; cout.flush();}

void Contig::csbrm_fillin_groups_stst(const ercci_t & ercci, const vector<int8> & maskshadow, const vector<multitag_t::mte_id_t> & masktagtypes, vector<vector<vector <nngroups_t> > > & groups_st_st)
{
  FUNCSTART("void Contig::csbrm_fillin_groups_stst(const ercci_t & ercci, const vector<int8> & maskshadow, const vector<string> & masktagtypes, vector<vector<vector <nngroups_t> > > & groups_st_st)");

  for(uint32 seqtype=0; seqtype<ReadGroupLib::getNumSequencingTypes(); ++seqtype){
    contig_parameters const & con_rt_params= (*CON_miraparams)[seqtype].getContigParams();

    // TODO: check whether use the one from parameters???
    int32 endreadmarkexclusionarea=5;
    //switch(seqtype) {
    //case ReadGroupLib::SEQTYPE_SANGER:
    //case ReadGroupLib::SEQTYPE_454GS20:
    //case ReadGroupLib::SEQTYPE_SOLEXA:
    //case ReadGroupLib::SEQTYPE_ABISOLID: {
    //  endreadmarkexclusionarea=1;
    //  break;
    //}
    //default : {
    //  endreadmarkexclusionarea=1;
    //}
    //}

    BUGIFTHROW(endreadmarkexclusionarea<1, "endreadmarkexclusionarea may not be < 1");

    auto & pcri_st_st=ercci.getPCRIstst();
    for(uint32 strainid=0; strainid < pcri_st_st[0].size(); ++strainid){
      for(const auto & tpcrI : pcri_st_st[seqtype][strainid]){
	if(tpcrI->isRail()
	  || tpcrI->isBackbone()) {
	  continue;
	}

	int32 readpos=tpcrI.contigPos2UnclippedReadPos(ercci.getContigPos());

	// reads that start or end in that region are dangerous as some
	//  spurious false bases might influence the result. Skip those.
	if(tpcrI.getReadDirection() > 0){
	  if(readpos < tpcrI->getLeftClipoff()+endreadmarkexclusionarea
	     || readpos > tpcrI->getRightClipoff()-endreadmarkexclusionarea) {
	    CEBUGF2(tpcrI->getName()<< ": readposition too near to start or end of read.\n");
	    continue;
	  }
	}else{
	  if(tpcrI->calcComplPos(readpos) < tpcrI->getLeftClipoff()+endreadmarkexclusionarea
	     || tpcrI->calcComplPos(readpos) > tpcrI->getRightClipoff()-endreadmarkexclusionarea) {
	    CEBUGF2(tpcrI->getName()<< ": readposition too near to start or end of read.\n");
	    continue;
	  }
	}

	CEBUGF2("\nLooking at " << tpcrI->getName()<< " ");

	// check whether this read has a tag that prohibits searching for repeats here
	// (only PSHP for the time being)
	if(maskshadow[ercci.getContigPos()]) {
	  // remember that the readpos computing routine doesn't take care
	  //  of direction, so we have to complement that position in reverse cases
	  int32 rawreadpos=readpos;
	  if(tpcrI.getReadDirection() < 0){
	    rawreadpos=tpcrI->calcComplPos(readpos);
	  }

	  CEBUGF2("MASKED: " << ercci.getContigPos() << endl);
	  CEBUGF2(tpcrI->getName() << "\t" << rawreadpos << "\t");
	  bool maskfound=false;
	  for(uint32 i=0; i<masktagtypes.size(); i++){
	    if(tpcrI->hasTag(masktagtypes[i],rawreadpos)) {
	      CEBUGF2("Masked with " << multitag_t::getIdentifierStr(masktagtypes[i]) << ", next read.\n");
	      maskfound=true;
	      break;
	    }
	  }
	  if(maskfound) continue;
	}

	// ok, we know that we are at least con_endreadmarkexclusionarea away from

	char           base=tpcrI.getBase(ercci.getContigPos());
	base_quality_t qual=tpcrI.getQuality(ercci.getContigPos());

	CEBUGF2('\t' << tpcrI.getReadDirection() << '\t' << base << ' ' << static_cast<uint16>(qual));

	//for(uint32 actgroup=0; actgroup<groups_st_st[seqtype][strainid].size(); actgroup++) {
	for(auto & actgroup : groups_st_st[seqtype][strainid]){
	  if(actgroup.base==base) {
	    actgroup.urdids.push_back(tpcrI.getURDID());
	    actgroup.quals.push_back(qual);
	    actgroup.directions.push_back(tpcrI.getReadDirection());

	    if(tpcrI.getReadDirection() > 0){
	      ++actgroup.forwarddircounter;
	    }else{
	      ++actgroup.complementdircounter;
	    }
	    break;
	  }
	}
      }

      // TODO: I think I don't need that at the moment, check that when time
      for(auto & actgroup : groups_st_st[seqtype][strainid]){
	calcGroupQual(actgroup);
      }
    }
  }

  FUNCEND();
  return;
}



/*************************************************************************
 *
 *
 *************************************************************************/

bool Contig::csbrm_checkReadsForHomopolymers(uint32 actcontigpos, uint32 maxrunallowed, const vector<int32> & checktheseurdids)
{
  FUNCSTART("void Contig::csbrm_checkReadsForHomopolymers(uint32 actcontigpos, vector<int32> & checktheseurdids)");

  //for(uint32 xid=0; xid<checktheseurdids.size(); xid++){
  for(auto & urdid : checktheseurdids){
    auto pcrI=CON_reads.getIteratorOfReadpoolID(CON_reads.getReadORPIDAtURDID(urdid));

    CEBUGF2("\ncsbrm hp look " << pcrI->getName());

    int32 checkpos=pcrI.contigPos2RawReadPos(actcontigpos);
    int32 rightcheck=checkpos+5;
    checkpos-=5;

    // TODO: we work on unclipped sequence here because the
    //  cause for some problems may be in the clipped part
    //  Check whether this is really the right strategy (should be)
    for(; checkpos < rightcheck; checkpos++){
      if(checkpos>=0 && checkpos < pcrI->getLenSeq()){
	char base=pcrI->getBaseInSequence(checkpos);
	if(base != '*'){
	  uint32 lowerposofrun=pcrI->getLowerBoundPosOfBaseRun(checkpos,
								  base,
								  true);
	  uint32 upperposofrun=1+pcrI->getUpperBoundPosOfBaseRun(checkpos,
								    base,
								    true);
	  // now count how many bases there are really (without gaps)
	  uint32 count=0;
	  for(;lowerposofrun<upperposofrun; lowerposofrun++){
	    if(pcrI->getBaseInSequence(lowerposofrun)==base) count++;
	  }
	  CEBUGF2("\t" << base << " " << count);
	  if(count > maxrunallowed){
	    CEBUG("\tCritical len, that's bad, stopping check.\n");
	    return false;
	  }
	}
      }
    }

  }

  FUNCEND();
  return true;
}



/*************************************************************************
 *
 * Around given contigpos, checks that there's at max one position
 *  where a (and only one) read has a gap character
 *
 * If all ok, return true. False if not.
 *
 *************************************************************************/
bool Contig::csbrm_checkForCleanBlock(cccontainer_t::iterator ccI)
{
  CEBUGF2("cfcb 1\n");

  // while going 5 backwards, if we're at contig begin return
  for(uint32 i=0; i<5; --ccI, ++i ){
    if(ccI==CON_counts.begin()) return false;
  }

  CEBUGF2("cfcb 2\n");

  uint32 readwithgapcount=0;
  for(uint32 i=0; i<11; ccI++, ++i){
    CEBUGF2("i: " << i << "\t" << readwithgapcount << '\n');
    if(ccI==CON_counts.end()) return false;
    if(ccI->star>0) {
      if(readwithgapcount
	 || ccI->star > 1) return false;
      ++readwithgapcount;
    }
  }

  CEBUGF2("true\n");
  return true;
}


//#define CEBUGF2(bla)


/*************************************************************************
 *
 * Some code taken from new mark possible repeats ... *sigh*
 *
 * Searches and marks for places in the contig where new mark possible repeats
 *  may have missed repeats (especially in 454 data). Takes only Sanger or 454
 *  sequences to do so, Solexa leads to too many false positives (SOLiD probably
 *  too) and the repeats in Solexa should have been caught by the normal
 *  repeat marker anyway.
 *
 * Place with more than x reads in each group get tagged if they
 *  either have + & - reads and no homopolymer >5 in the vicinity
 *  or are in a clean area (no gaps), +/- reads and no homopolymer >3 in the
 *  vicinity
 *
 * If SNPS instead of repeat markers, set SROr/SAOr but not SIOr
 *
 * TODO: check what to do with PacBio
 *
 *************************************************************************/

//#define CEBUG(bla)  {cout << bla; cout.flush();}
//#define CEBUGF2(bla)  {cout << bla; cout.flush();}

void Contig::codonSingleBaseRepeatMarker(uint32 mingroupsize, repeatmarker_stats_t & repstats, vector<bool> & readsmarkedsrm)
{
  FUNCSTART("void Contig::codonSingleBaseRepeatMarker(uint32 mingroupsize, repeatmarker_stats_t & repstats, vector<bool> & readsmarkedsrm)");

  if(mingroupsize==0) mingroupsize=1;

  vector<int8> maskshadow;
  vector<multitag_t::mte_id_t> masktagtypes;
  //masktagtypes.push_back(Read::REA_tagentry_idSOFApolyA_sequence);
  masktagtypes.push_back(Read::REA_defaulttag_PSHP.identifier);
  buildMaskShadow(maskshadow,masktagtypes,false);

  // get highest ID of strain with reads as number of strains in contig
  // this is formally wrong as a contig may contain some strain IDs but not others
  //  e.g.  (1 0 0 0 25 0 0 0) contains two strains, but has the highest ID=4
  // but the routines below just need a correctly size vector
  uint32 numstrains=0;
  for(uint32 nsi=0; nsi<CON_readsperstrain.size(); ++nsi){
    if(CON_readsperstrain[nsi]>0) numstrains=nsi;
  }
  // strain numbering starts at 0, so add 1
  ++numstrains;

  static const string groupbases("ACGT*");

  nngroups_t emptygroup;
  emptygroup.base='!';
  emptygroup.valid=false;
  emptygroup.forwarddircounter=0;
  emptygroup.complementdircounter=0;
  emptygroup.groupquality=0;

  // groups per seqtype per strain, empty template
  vector<vector<vector <nngroups_t> > > emptygroups_st_st;

  emptygroups_st_st.resize(ReadGroupLib::getNumSequencingTypes());
  for(uint32 seqtype=0; seqtype<ReadGroupLib::getNumSequencingTypes(); seqtype++){
    emptygroups_st_st[seqtype].resize(numstrains);
    for(uint32 strainid=0; strainid<numstrains; ++strainid){
      for(uint32 actgroup=0; actgroup<groupbases.size(); ++actgroup){
	emptygroups_st_st[seqtype][strainid].push_back(emptygroup);
	emptygroups_st_st[seqtype][strainid].back().base=groupbases[actgroup];
      }
    }
  }

  // groups per seqtype per strain, the real thing
  vector<vector<vector <nngroups_t> > > groups_st_st;

  // which reads to mark
  vector<uint32> urdidstomark;
  urdidstomark.reserve(1024);

  // bool vector
  vector<uint8> strainswithgroups;
  strainswithgroups.reserve(numstrains);

  ercci_t ercci(this);
  ercci.init(false,        // don't take rails
	     false,        // take backbone
	     numstrains);

  auto ccI=CON_counts.begin();

  multitag_t tmpmt;

  for(uint32 actcontigpos=0; actcontigpos<CON_counts.size() ; ++actcontigpos, ++ccI, ercci.advance()){
    CEBUGF2("\ncsbrm acp: " << actcontigpos << '\n');
    CEBUGF2(*ccI);

    if(ccI->total_cov < 2*mingroupsize) continue;
    // more than 10% gaps?
    if(ccI->star * 10 > ccI->total_cov) continue;

    uint32 numvalidgroups=0;
    if(ccI->A >= 4*mingroupsize) numvalidgroups++;
    if(ccI->C >= 4*mingroupsize) numvalidgroups++;
    if(ccI->G >= 4*mingroupsize) numvalidgroups++;
    if(ccI->T >= 4*mingroupsize) numvalidgroups++;
    if(numvalidgroups<2) continue;

    CEBUG("csbrm check: " << actcontigpos << '\n');

    // check the environment
    bool cleanblock=csbrm_checkForCleanBlock(ccI);

    CEBUG("Cleanblock: " << cleanblock << '\n');

    // clear the groups
    groups_st_st=emptygroups_st_st;

    // put the bases of the different reads into groups
    csbrm_fillin_groups_stst(ercci,
			     maskshadow,
			     masktagtypes,
			     groups_st_st);

    bool foundsomething=false;

#if CPP_READ_SEQTYPE_END != 8
#error "This code is made for 8 sequencing types, adapt!"
#endif

    // TODO: check what to do with PacBio

    urdidstomark.clear();
    strainswithgroups.clear();
    strainswithgroups.resize(numstrains,0);
    for(uint32 seqtype=0; seqtype<ReadGroupLib::getNumSequencingTypes(); seqtype++){
      // next sequencing type if sequencing type is not Sanger or 454 or IonTorrent
      if(seqtype != ReadGroupLib::SEQTYPE_SANGER
	 && seqtype != ReadGroupLib::SEQTYPE_454GS20
	 && seqtype != ReadGroupLib::SEQTYPE_IONTORRENT
	 && seqtype != ReadGroupLib::SEQTYPE_TEXT) continue;
      for(uint32 strainid=0; strainid<numstrains; ++strainid){
	CEBUG("seqt: " << seqtype << "\tstrid: " << strainid << '\n');
	uint32 numgroups=0;
	vector<uint32> tmpurdids2mark;
	//for(uint32 actgroup=0; actgroup<groupbases.size(); ++actgroup){
	for(auto & actgroup : groups_st_st[seqtype][strainid]){
	  if(actgroup.urdids.size() >= mingroupsize){
	    // minimum size is there
	    // now: either the group has + and - direction and no homoplymer >5
	    //  or it is in a clean block, has +/- reads and no homopolymer
	    //  with more than 3 bases in sourrounding
	    if((actgroup.forwarddircounter>0
		&& actgroup.complementdircounter>0
		&& csbrm_checkReadsForHomopolymers(actcontigpos,
						   5,
						   actgroup.urdids))
	       || (actgroup.forwarddircounter>0
		   && actgroup.complementdircounter>0
		   && cleanblock
		   && csbrm_checkReadsForHomopolymers(actcontigpos,
						      3,
						      actgroup.urdids))){
	      CEBUG("Possible group " << actgroup << '\n');
	      ++numgroups;
	      tmpurdids2mark.insert(
		tmpurdids2mark.end(),
		actgroup.urdids.begin(),
		actgroup.urdids.end());
	    }
	  }
	}
	if(numgroups>=2) {
	  foundsomething=true;
	  strainswithgroups[strainid]=1;
	  urdidstomark.insert(
	    urdidstomark.end(),
	    tmpurdids2mark.begin(),
	    tmpurdids2mark.end());
	}
      }
    }

    if(!foundsomething){
      CEBUG("Nothing in group analysis, we'll stop here.\n");
      continue;
    }

    CEBUG("Found something in group analysis.\n");

    uint32 numstrainswithgroups=0;
    for(auto swge : strainswithgroups){
      if(swge) ++numstrainswithgroups;
    }
    BUGIFTHROW(numstrainswithgroups==0, "Ooooops?! numstrainswithgroups==0?");

    if((*CON_miraparams)[0].getContigParams().con_assume_snp_insteadof_rmb){
      if(numstrainswithgroups==1) {
	tmpmt=Read::REA_defaulttag_SAOr;
      }else{
	tmpmt=Read::REA_defaulttag_SROr;
      }
    }else{
      tmpmt=Read::REA_defaulttag_SRMr;
    }

    //for(uint32 ids=0; ids < urdidstomark.size(); ++ids){
    for(auto urdid : urdidstomark){
      auto orpid=CON_reads.getReadORPIDAtURDID(urdid);
      if(orpid==-1) continue;
      readsmarkedsrm[orpid]=true;
      auto pcrI=CON_reads.getIteratorOfReadpoolID(orpid);

      int32 rawreadpos=pcrI.contigPos2RawReadPos(actcontigpos);

      CEBUG("Mark read " << pcrI->getName() << " at " << rawreadpos << endl);

      //pcrI->addTag(rawreadpos, rawreadpos, tagtoset, Read::REA_tagentry_coSRMr);
      tmpmt.from=rawreadpos;
      tmpmt.to=rawreadpos;
      const_cast<Read &>(*pcrI).addTagO(tmpmt);
    }

    repstats.numSRMs++;
    CEBUG("Mark consensus\n");
    addTagToConsensus(actcontigpos,actcontigpos,
		      '=',"Fcon","",true);
    addTagToConsensus(actcontigpos,actcontigpos,
		      '=',
		      multitag_t::getIdentifierStr(CON_tagentry_idSRMc).c_str(),"",true);
  }
}



/*************************************************************************
 *
 *
 *
 *************************************************************************/

//#define CEBUG(bla)  {cout << bla; cout.flush();}
//#define CEBUGF2(bla)  {cout << bla; cout.flush();}

bool Contig::shouldHaveTPartnerInContig(PlacedContigReads::const_iterator pcrI, PlacedContigReads::const_iterator opcrI)
{
  FUNCSTART("bool Contig::shouldHaveTPartnerInContig(PlacedContigReads::const_iterator pcrI, PlacedContigReads::const_iterator opcrI)");
  return true;

}
