libStatGen Software 1
Pileup< PILEUP_TYPE, FUNC_CLASS > Class Template Reference

Class to perform a pileup of all reads by position, assuming the reads are coordinate sorted. More...

#include <Pileup.h>

Collaboration diagram for Pileup< PILEUP_TYPE, FUNC_CLASS >:

Public Member Functions

 Pileup (const FUNC_CLASS &fp=FUNC_CLASS())
 Constructor using the default maximum number of bases a read spans. More...
 
 Pileup (int window, const FUNC_CLASS &fp=FUNC_CLASS())
 Constructor that sets the maximum number of bases a read spans. More...
 
 Pileup (const std::string &refSeqFileName, const FUNC_CLASS &fp=FUNC_CLASS())
 Perform pileup with a reference. More...
 
 Pileup (int window, const std::string &refSeqFileName, const FUNC_CLASS &fp=FUNC_CLASS())
 Perform pileup with a reference and a specified window size. More...
 
virtual ~Pileup ()
 Destructor. More...
 
virtual int processFile (const std::string &fileName, uint16_t excludeFlag=0x0704, uint16_t includeFlag=0)
 Performs a pileup on the specified file. More...
 
virtual void processAlignment (SamRecord &record)
 Add an alignment to the pileup. More...
 
virtual void processAlignmentRegion (SamRecord &record, int startPos, int endPos, PosList *excludeList=NULL)
 Add only positions that fall within the specified region of the alignment to the pileup and outside of the specified excluded positions. More...
 
void flushPileup ()
 Done processing, flush every position that is currently being stored in the pileup. More...
 

Protected Member Functions

void addAlignmentPosition (int refPosition, SamRecord &record)
 
virtual void flushPileup (int refID, int refPosition)
 
void flushPileup (int refPosition)
 
int pileupPosition (int refPosition)
 
virtual void resetElement (PILEUP_TYPE &element, int position)
 
virtual void addElement (PILEUP_TYPE &element, SamRecord &record)
 
virtual void analyzeElement (PILEUP_TYPE &element)
 
virtual void analyzeHead ()
 

Protected Attributes

FUNC_CLASS myAnalyzeFuncPtr
 
std::vector< PILEUP_TYPE > myElements
 
int pileupStart
 
int pileupHead
 
int pileupTail
 
int pileupWindow
 
int myCurrentRefID
 
GenomeSequencemyRefPtr
 

Detailed Description

template<class PILEUP_TYPE, class FUNC_CLASS = defaultPileup<PILEUP_TYPE>>
class Pileup< PILEUP_TYPE, FUNC_CLASS >

Class to perform a pileup of all reads by position, assuming the reads are coordinate sorted.

Definition at line 58 of file Pileup.h.

Constructor & Destructor Documentation

◆ Pileup() [1/4]

template<class PILEUP_TYPE , class FUNC_CLASS >
Pileup< PILEUP_TYPE, FUNC_CLASS >::Pileup ( const FUNC_CLASS &  fp = FUNC_CLASS())

Constructor using the default maximum number of bases a read spans.

Definition at line 152 of file Pileup.h.

153 : myAnalyzeFuncPtr(fp),
154 myElements(),
155 pileupStart(0),
156 pileupHead(0),
157 pileupTail(-1),
158 pileupWindow(PileupHelper::DEFAULT_WINDOW_SIZE),
159 myCurrentRefID(-2),
160 myRefPtr(NULL)
161{
162 // Not using pointers since this is templated.
163 myElements.resize(pileupWindow);
164}

◆ Pileup() [2/4]

template<class PILEUP_TYPE , class FUNC_CLASS >
Pileup< PILEUP_TYPE, FUNC_CLASS >::Pileup ( int  window,
const FUNC_CLASS &  fp = FUNC_CLASS() 
)

Constructor that sets the maximum number of bases a read spans.

This is the "window" the length of the buffer that holds the pileups for each position until the read start has moved past the position.

Definition at line 168 of file Pileup.h.

169 : myAnalyzeFuncPtr(fp),
170 myElements(),
171 pileupStart(0),
172 pileupHead(0),
173 pileupTail(-1),
174 pileupWindow(window),
175 myCurrentRefID(-2),
176 myRefPtr(NULL)
177{
178 // Not using pointers since this is templated.
179 myElements.resize(window);
180}

◆ Pileup() [3/4]

template<class PILEUP_TYPE , class FUNC_CLASS >
Pileup< PILEUP_TYPE, FUNC_CLASS >::Pileup ( const std::string &  refSeqFileName,
const FUNC_CLASS &  fp = FUNC_CLASS() 
)

Perform pileup with a reference.

Definition at line 184 of file Pileup.h.

185 : myAnalyzeFuncPtr(fp),
186 myElements(),
187 pileupStart(0),
188 pileupHead(0),
189 pileupTail(-1),
190 pileupWindow(PileupHelper::DEFAULT_WINDOW_SIZE),
191 myCurrentRefID(-2),
192 myRefPtr(NULL)
193{
194 myRefPtr = new GenomeSequence(refSeqFileName.c_str());
195
196 // Not using pointers since this is templated.
197 myElements.resize(pileupWindow);
198
199 PILEUP_TYPE::setReference(myRefPtr);
200}
Create/Access/Modify/Load Genome Sequences stored as binary mapped files.

◆ Pileup() [4/4]

template<class PILEUP_TYPE , class FUNC_CLASS >
Pileup< PILEUP_TYPE, FUNC_CLASS >::Pileup ( int  window,
const std::string &  refSeqFileName,
const FUNC_CLASS &  fp = FUNC_CLASS() 
)

Perform pileup with a reference and a specified window size.

Definition at line 204 of file Pileup.h.

205 : myAnalyzeFuncPtr(fp),
206 myElements(),
207 pileupStart(0),
208 pileupHead(0),
209 pileupTail(-1),
210 pileupWindow(window),
211 myCurrentRefID(-2),
212 myRefPtr(NULL)
213{
214 myRefPtr = new GenomeSequence(refSeqFileName.c_str());
215
216 // Not using pointers since this is templated.
217 myElements.resize(window);
218
219 PILEUP_TYPE::setReference(myRefPtr);
220}

◆ ~Pileup()

template<class PILEUP_TYPE , class FUNC_CLASS >
Pileup< PILEUP_TYPE, FUNC_CLASS >::~Pileup
virtual

Destructor.

Definition at line 224 of file Pileup.h.

225{
226 flushPileup();
227 if(myRefPtr != NULL)
228 {
229 delete myRefPtr;
230 myRefPtr = NULL;
231 }
232}
void flushPileup()
Done processing, flush every position that is currently being stored in the pileup.
Definition: Pileup.h:368

Member Function Documentation

◆ addAlignmentPosition()

template<class PILEUP_TYPE , class FUNC_CLASS >
void Pileup< PILEUP_TYPE, FUNC_CLASS >::addAlignmentPosition ( int  refPosition,
SamRecord record 
)
protected

Definition at line 384 of file Pileup.h.

386{
387 int offset = 0;
388 try{
389 offset = pileupPosition(refPosition);
390 }
391 catch(std::runtime_error& err)
392 {
393 const char* overflowErr = "Overflow on the pileup buffer:";
394 String errorMessage = err.what();
395 if(strncmp(err.what(), overflowErr, strlen(overflowErr)) == 0)
396 {
397
398 errorMessage += "\n\tPileup Buffer Overflow: recordName = ";
399 errorMessage += record.getReadName();
400 errorMessage += "; Cigar = ";
401 errorMessage += record.getCigar();
402 }
403 throw std::runtime_error(errorMessage.c_str());
404 }
405
406 if((offset < 0) || (offset >= pileupWindow))
407 {
408 std::cerr << "Pileup Buffer Overflow: position = " << refPosition
409 << "; refID = " << record.getReferenceID()
410 << "; recStartPos = " << record.get1BasedPosition()
411 << "; pileupStart = " << pileupStart
412 << "; pileupHead = " << pileupHead
413 << "; pileupTail = " << pileupTail;
414 }
415
416 addElement(myElements[offset], record);
417}
int32_t get1BasedPosition()
Get the 1-based(SAM) leftmost position (POS) of the record.
Definition: SamRecord.cpp:1312
int32_t getReferenceID()
Get the reference sequence id of the record (BAM format rid).
Definition: SamRecord.cpp:1305
const char * getCigar()
Returns the SAM formatted CIGAR string.
Definition: SamRecord.cpp:1555
const char * getReadName()
Returns the SAM formatted Read Name (QNAME).
Definition: SamRecord.cpp:1542

◆ addElement()

template<class PILEUP_TYPE , class FUNC_CLASS >
void Pileup< PILEUP_TYPE, FUNC_CLASS >::addElement ( PILEUP_TYPE &  element,
SamRecord record 
)
protectedvirtual

Definition at line 534 of file Pileup.h.

536{
537 element.addEntry(record);
538}

◆ analyzeElement()

template<class PILEUP_TYPE , class FUNC_CLASS >
void Pileup< PILEUP_TYPE, FUNC_CLASS >::analyzeElement ( PILEUP_TYPE &  element)
protectedvirtual

Definition at line 542 of file Pileup.h.

543{
544 myAnalyzeFuncPtr(element);
545}

◆ analyzeHead()

template<class PILEUP_TYPE , class FUNC_CLASS >
void Pileup< PILEUP_TYPE, FUNC_CLASS >::analyzeHead
protectedvirtual

Definition at line 549 of file Pileup.h.

550{
551 myAnalyzeFuncPtr(myElements[pileupHead - pileupStart]);
552}

◆ flushPileup() [1/3]

template<class PILEUP_TYPE , class FUNC_CLASS >
void Pileup< PILEUP_TYPE, FUNC_CLASS >::flushPileup

Done processing, flush every position that is currently being stored in the pileup.

Definition at line 368 of file Pileup.h.

369{
370 // while there are still entries between the head and tail, flush,
371 // but no need to flush if pileupTail == -1 because in that case
372 // no entries have been added
373 while ((pileupHead <= pileupTail) && (pileupTail != -1))
374 {
375 flushPileup(pileupHead+1);
376 }
377 pileupStart = pileupHead = 0;
378 pileupTail = -1;
379}

◆ flushPileup() [2/3]

template<class PILEUP_TYPE , class FUNC_CLASS >
void Pileup< PILEUP_TYPE, FUNC_CLASS >::flushPileup ( int  refID,
int  refPosition 
)
protectedvirtual

Definition at line 421 of file Pileup.h.

422{
423 // if new chromosome, flush the entire pileup.
424 if(refID != myCurrentRefID)
425 {
426 // New chromosome, flush everything.
427 flushPileup();
428 myCurrentRefID = refID;
429 }
430 else
431 {
432 // on the same chromosome, so flush just up to this new position.
433 flushPileup(position);
434 }
435}

◆ flushPileup() [3/3]

template<class PILEUP_TYPE , class FUNC_CLASS >
void Pileup< PILEUP_TYPE, FUNC_CLASS >::flushPileup ( int  refPosition)
protected

Definition at line 439 of file Pileup.h.

440{
441 // Flush up to this new position, but no reason to flush if
442 // pileupHead has not been set.
443 while((pileupHead < position) && (pileupHead <= pileupTail))
444 {
445 analyzeHead();
446
447 pileupHead++;
448
449 if(pileupHead - pileupStart >= pileupWindow)
450 pileupStart += pileupWindow;
451 }
452
453 if(pileupHead > pileupTail)
454 {
455 // All positions have been flushed, so reset pileup info
456 pileupHead = pileupStart = 0;
457 pileupTail = -1;
458 }
459}

◆ pileupPosition()

template<class PILEUP_TYPE , class FUNC_CLASS >
int Pileup< PILEUP_TYPE, FUNC_CLASS >::pileupPosition ( int  refPosition)
protected

Definition at line 466 of file Pileup.h.

467{
468 // Check to see if this is the first position (pileupTail == -1)
469 if(pileupTail == -1)
470 {
471 pileupStart = pileupHead = position;
472 // This is the first time this position is being used, so
473 // reset the element.
474 resetElement(myElements[0], position);
475 pileupTail = position;
476 return(0);
477 }
478
479
480 if((position < pileupHead) || (position > (pileupHead + pileupWindow)))
481 {
482 String errorMessage =
483 "Overflow on the pileup buffer: specifiedPosition = ";
484 errorMessage += position;
485 errorMessage += ", pileup buffer start position: ";
486 errorMessage += pileupHead;
487 errorMessage += ", pileup buffer end position: ";
488 errorMessage += pileupHead + pileupWindow;
489
490 throw std::runtime_error(errorMessage.c_str());
491 }
492
493 // int offset = position - pileupStart;
494 int offset = position - pileupStart;
495
496 if(offset >= pileupWindow)
497 {
498 offset -= pileupWindow;
499 }
500
501 // Check to see if position is past the end of the currently
502 // setup pileup positions.
503 while(position > pileupTail)
504 {
505 // Increment pileupTail to the next position since the current
506 // pileupTail is already in use.
507 ++pileupTail;
508
509 // Figure out the offset for this next position.
510 offset = pileupTail - pileupStart;
511 if(offset >= pileupWindow)
512 {
513 offset -= pileupWindow;
514 }
515
516 // This is the first time this position is being used, so
517 // reset the element.
518 resetElement(myElements[offset], pileupTail);
519 }
520
521 return(offset);
522}

◆ processAlignment()

template<class PILEUP_TYPE , class FUNC_CLASS >
void Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignment ( SamRecord record)
virtual

Add an alignment to the pileup.

Definition at line 296 of file Pileup.h.

297{
298 int refPosition = record.get0BasedPosition();
299 int refID = record.getReferenceID();
300
301 // Flush any elements from the pileup that are prior to this record
302 // since the file is sorted, we are done with those positions.
303 flushPileup(refID, refPosition);
304
305 // Loop through for each reference position covered by the record.
306 // It is up to the PILEUP_TYPE to handle insertions/deletions, etc
307 // that are related with the given reference position.
308 for(; refPosition <= record.get0BasedAlignmentEnd(); ++refPosition)
309 {
310 addAlignmentPosition(refPosition, record);
311 }
312}
int32_t get0BasedAlignmentEnd()
Returns the 0-based inclusive rightmost position of the clipped sequence.
Definition: SamRecord.cpp:1467
int32_t get0BasedPosition()
Get the 0-based(BAM) leftmost position of the record.
Definition: SamRecord.cpp:1319

References SamRecord::get0BasedAlignmentEnd(), SamRecord::get0BasedPosition(), and SamRecord::getReferenceID().

◆ processAlignmentRegion()

template<class PILEUP_TYPE , class FUNC_CLASS >
void Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignmentRegion ( SamRecord record,
int  startPos,
int  endPos,
PosList excludeList = NULL 
)
virtual

Add only positions that fall within the specified region of the alignment to the pileup and outside of the specified excluded positions.

Parameters
recordalignment to be added to the pileup.
startPos0-based start position of the bases that should be added to the pileup.
endPos0-based end position of the bases that should be added to the pileup (this position is not added). Set to -1 if there is no end position to the region.
excludeListlist of refID/positions to exclude from processing.

Definition at line 316 of file Pileup.h.

320{
321 int refPosition = record.get0BasedPosition();
322 int refID = record.getReferenceID();
323
324 // Flush any elements from the pileup that are prior to this record
325 // since the file is sorted, we are done with those positions.
326 flushPileup(refID, refPosition);
327
328 // Check if the region starts after this reference starts. If so,
329 // we only want to start adding at the region start position.
330 if(startPos > refPosition)
331 {
332 refPosition = startPos;
333 }
334
335 // Loop through for each reference position covered by the record.
336 // It is up to the PILEUP_TYPE to handle insertions/deletions, etc
337 // that are related with the given reference position.
338 for(; refPosition <= record.get0BasedAlignmentEnd(); ++refPosition)
339 {
340 // Check to see if we have gone past the end of the region, in which
341 // case we can stop processing this record. Check >= since the
342 // end position is not in the region.
343 if((endPos != -1) && (refPosition >= endPos))
344 {
345 break;
346 }
347
348 // Check to see if this position is in the exclude list.
349 bool addPos = true;
350 if(excludeList != NULL)
351 {
352 // There is an exclude list, so lookup the position.
353 if(excludeList->hasPosition(refID, refPosition))
354 {
355 // This position is in the exclude list, so don't add it.
356 addPos = false;
357 }
358 }
359 if(addPos)
360 {
361 addAlignmentPosition(refPosition, record);
362 }
363 }
364}
bool hasPosition(int refID, int refPosition)
Return whether or not this list contains the specified reference ID and position (negative values wil...
Definition: PosList.cpp:81

References SamRecord::get0BasedAlignmentEnd(), SamRecord::get0BasedPosition(), SamRecord::getReferenceID(), and PosList::hasPosition().

◆ processFile()

template<class PILEUP_TYPE , class FUNC_CLASS >
int Pileup< PILEUP_TYPE, FUNC_CLASS >::processFile ( const std::string &  fileName,
uint16_t  excludeFlag = 0x0704,
uint16_t  includeFlag = 0 
)
virtual

Performs a pileup on the specified file.

Parameters
excludeFlagif specified, if any bit set in the exclude flag is set in the record's flag, it will be dropped. Defaulted to exclude:
  • unmapped,
  • not primary alignment
  • failed platform/vendor quality check
  • PCR or optical duplicate
includeFlagif specified, every bit must be set in the record's flag for it to be included - defaulted to 0, no bits are required to be set.
Returns
0 for success and non-zero for failure.

Definition at line 235 of file Pileup.h.

238{
239 SamFile samIn;
240 SamFileHeader header;
241 SamRecord record;
242
243 if(myRefPtr != NULL)
244 {
245 samIn.SetReference(myRefPtr);
246 }
247
248 if(!samIn.OpenForRead(fileName.c_str()))
249 {
250 fprintf(stderr, "%s\n", samIn.GetStatusMessage());
251 return(samIn.GetStatus());
252 }
253
254 if(!samIn.ReadHeader(header))
255 {
256 fprintf(stderr, "%s\n", samIn.GetStatusMessage());
257 return(samIn.GetStatus());
258 }
259
260 // The file needs to be sorted by coordinate.
262
263 // Iterate over all records
264 while (samIn.ReadRecord(header, record))
265 {
266 uint16_t flag = record.getFlag();
267 if(flag & excludeFlag)
268 {
269 // This record has an excluded flag set,
270 // so continue to the next one.
271 continue;
272 }
273 if((flag & includeFlag) != includeFlag)
274 {
275 // This record does not have all required flags set,
276 // so continue to the next one.
277 continue;
278 }
279 processAlignment(record);
280 }
281
282 flushPileup();
283
284 int returnValue = 0;
286 {
287 // Failed to read a record.
288 fprintf(stderr, "%s\n", samIn.GetStatusMessage());
289 returnValue = samIn.GetStatus();
290 }
291 return(returnValue);
292}
virtual void processAlignment(SamRecord &record)
Add an alignment to the pileup.
Definition: Pileup.h:296
This class allows a user to get/set the fields in a SAM/BAM Header.
Definition: SamFileHeader.h:35
Allows the user to easily read/write a SAM/BAM file.
Definition: SamFile.h:36
bool ReadHeader(SamFileHeader &header)
Reads the header section from the file and stores it in the passed in header.
Definition: SamFile.cpp:450
@ COORDINATE
file is sorted by coordinate.
Definition: SamFile.h:49
bool ReadRecord(SamFileHeader &header, SamRecord &record)
Reads the next record from the file & stores it in the passed in record.
Definition: SamFile.cpp:514
void SetReference(GenomeSequence *reference)
Sets the reference to the specified genome sequence object.
Definition: SamFile.cpp:380
bool OpenForRead(const char *filename, SamFileHeader *header=NULL)
Open a sam/bam file for reading with the specified filename, determing the type of file and SAM/BAM b...
Definition: SamFile.cpp:93
SamStatus::Status GetStatus()
Get the Status of the last call that sets status.
Definition: SamFile.h:212
const char * GetStatusMessage()
Get the Status Message of the last call that sets status.
Definition: SamFile.h:218
void setSortedValidation(SortedType sortType)
Set the flag to validate that the file is sorted as it is read/written.
Definition: SamFile.cpp:682
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
Definition: SamRecord.h:52
uint16_t getFlag()
Get the flag (FLAG).
Definition: SamRecord.cpp:1384
@ NO_MORE_RECS
NO_MORE_RECS: failed to read a record since there are no more to read either in the file or section i...
Definition: StatGenStatus.h:36

References SamFile::COORDINATE, SamRecord::getFlag(), SamFile::GetStatus(), SamFile::GetStatusMessage(), StatGenStatus::NO_MORE_RECS, SamFile::OpenForRead(), SamFile::ReadHeader(), SamFile::ReadRecord(), SamFile::SetReference(), and SamFile::setSortedValidation().

◆ resetElement()

template<class PILEUP_TYPE , class FUNC_CLASS >
void Pileup< PILEUP_TYPE, FUNC_CLASS >::resetElement ( PILEUP_TYPE &  element,
int  position 
)
protectedvirtual

Definition at line 526 of file Pileup.h.

528{
529 element.reset(position);
530}

Member Data Documentation

◆ myAnalyzeFuncPtr

template<class PILEUP_TYPE , class FUNC_CLASS = defaultPileup<PILEUP_TYPE>>
FUNC_CLASS Pileup< PILEUP_TYPE, FUNC_CLASS >::myAnalyzeFuncPtr
protected

Definition at line 119 of file Pileup.h.

◆ myCurrentRefID

template<class PILEUP_TYPE , class FUNC_CLASS = defaultPileup<PILEUP_TYPE>>
int Pileup< PILEUP_TYPE, FUNC_CLASS >::myCurrentRefID
protected

Definition at line 145 of file Pileup.h.

◆ myElements

template<class PILEUP_TYPE , class FUNC_CLASS = defaultPileup<PILEUP_TYPE>>
std::vector<PILEUP_TYPE> Pileup< PILEUP_TYPE, FUNC_CLASS >::myElements
protected

Definition at line 138 of file Pileup.h.

◆ myRefPtr

template<class PILEUP_TYPE , class FUNC_CLASS = defaultPileup<PILEUP_TYPE>>
GenomeSequence* Pileup< PILEUP_TYPE, FUNC_CLASS >::myRefPtr
protected

Definition at line 147 of file Pileup.h.

◆ pileupHead

template<class PILEUP_TYPE , class FUNC_CLASS = defaultPileup<PILEUP_TYPE>>
int Pileup< PILEUP_TYPE, FUNC_CLASS >::pileupHead
protected

Definition at line 141 of file Pileup.h.

◆ pileupStart

template<class PILEUP_TYPE , class FUNC_CLASS = defaultPileup<PILEUP_TYPE>>
int Pileup< PILEUP_TYPE, FUNC_CLASS >::pileupStart
protected

Definition at line 140 of file Pileup.h.

◆ pileupTail

template<class PILEUP_TYPE , class FUNC_CLASS = defaultPileup<PILEUP_TYPE>>
int Pileup< PILEUP_TYPE, FUNC_CLASS >::pileupTail
protected

Definition at line 142 of file Pileup.h.

◆ pileupWindow

template<class PILEUP_TYPE , class FUNC_CLASS = defaultPileup<PILEUP_TYPE>>
int Pileup< PILEUP_TYPE, FUNC_CLASS >::pileupWindow
protected

Definition at line 143 of file Pileup.h.


The documentation for this class was generated from the following file: