libStatGen Software  1
SamFile.cpp
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 #include <stdexcept>
18 #include "SamFile.h"
19 #include "SamFileHeader.h"
20 #include "SamRecord.h"
21 #include "BamInterface.h"
22 #include "SamInterface.h"
23 #include "BgzfFileType.h"
24 
25 // Constructor, init variables.
27  : myStatus()
28 {
29  init();
30  resetFile();
31 }
32 
33 
34 // Constructor, init variables.
36  : myStatus(errorHandlingType)
37 {
38  init();
39  resetFile();
40 }
41 
42 
43 // Constructor, init variables and open the specified file based on the
44 // specified mode (READ/WRITE).
45 SamFile::SamFile(const char* filename, OpenType mode)
46  : myStatus()
47 {
48  init(filename, mode, NULL);
49 }
50 
51 
52 // Constructor, init variables and open the specified file based on the
53 // specified mode (READ/WRITE). Default is READ..
54 SamFile::SamFile(const char* filename, OpenType mode,
55  ErrorHandler::HandlingType errorHandlingType)
56  : myStatus(errorHandlingType)
57 {
58  init(filename, mode, NULL);
59 }
60 
61 
62 // Constructor, init variables and open the specified file based on the
63 // specified mode (READ/WRITE).
64 SamFile::SamFile(const char* filename, OpenType mode, SamFileHeader* header)
65  : myStatus()
66 {
67  init(filename, mode, header);
68 }
69 
70 
71 // Constructor, init variables and open the specified file based on the
72 // specified mode (READ/WRITE). Default is READ..
73 SamFile::SamFile(const char* filename, OpenType mode,
74  ErrorHandler::HandlingType errorHandlingType,
75  SamFileHeader* header)
76  : myStatus(errorHandlingType)
77 {
78  init(filename, mode, header);
79 }
80 
81 
83 {
84  resetFile();
85  if(myStatistics != NULL)
86  {
87  delete myStatistics;
88  }
89 }
90 
91 
92 // Open a sam/bam file for reading with the specified filename.
93 bool SamFile::OpenForRead(const char * filename, SamFileHeader* header)
94 {
95  // Reset for any previously operated on files.
96  resetFile();
97 
98  int lastchar = 0;
99 
100  while (filename[lastchar] != 0) lastchar++;
101 
102  // If at least one character, check for '-'.
103  if((lastchar >= 1) && (filename[0] == '-'))
104  {
105  // Read from stdin - determine type of file to read.
106  // Determine if compressed bam.
107  if(strcmp(filename, "-.bam") == 0)
108  {
109  // Compressed bam - open as bgzf.
110  // -.bam is the filename, read compressed bam from stdin
111  filename = "-";
112 
113  myFilePtr = new InputFile;
114  // support recover mode - this switches in a reader
115  // capable of recovering from bad BGZF compression blocks.
116  myFilePtr->setAttemptRecovery(myAttemptRecovery);
117  myFilePtr->openFile(filename, "rb", InputFile::BGZF);
118 
119  myInterfacePtr = new BamInterface;
120 
121  // Read the magic string.
122  char magic[4];
123  ifread(myFilePtr, magic, 4);
124  }
125  else if(strcmp(filename, "-.ubam") == 0)
126  {
127  // uncompressed BAM File.
128  // -.ubam is the filename, read uncompressed bam from stdin.
129  // uncompressed BAM is still compressed with BGZF, but using
130  // compression level 0, so still open as BGZF since it has a
131  // BGZF header.
132  filename = "-";
133 
134  // Uncompressed, so do not require the eof block.
135 #ifdef __ZLIB_AVAILABLE__
136  BgzfFileType::setRequireEofBlock(false);
137 #endif
138  myFilePtr = ifopen(filename, "rb", InputFile::BGZF);
139 
140  myInterfacePtr = new BamInterface;
141 
142  // Read the magic string.
143  char magic[4];
144  ifread(myFilePtr, magic, 4);
145  }
146  else if((strcmp(filename, "-") == 0) || (strcmp(filename, "-.sam") == 0))
147  {
148  // SAM File.
149  // read sam from stdin
150  filename = "-";
151  myFilePtr = ifopen(filename, "rb", InputFile::UNCOMPRESSED);
152  myInterfacePtr = new SamInterface;
153  }
154  else
155  {
156  std::string errorMessage = "Invalid SAM/BAM filename, ";
157  errorMessage += filename;
158  errorMessage += ". From stdin, can only be '-', '-.sam', '-.bam', or '-.ubam'";
159  myStatus.setStatus(SamStatus::FAIL_IO, errorMessage.c_str());
160  delete myFilePtr;
161  myFilePtr = NULL;
162  return(false);
163  }
164  }
165  else
166  {
167  // Not from stdin. Read the file to determine the type.
168 
169  myFilePtr = new InputFile;
170 
171  // support recovery mode - this conditionally enables a reader
172  // capable of recovering from bad BGZF compression blocks.
173  myFilePtr->setAttemptRecovery(myAttemptRecovery);
174  bool rc = myFilePtr->openFile(filename, "rb", InputFile::DEFAULT);
175 
176  if (rc == false)
177  {
178  std::string errorMessage = "Failed to Open ";
179  errorMessage += filename;
180  errorMessage += " for reading";
181  myStatus.setStatus(SamStatus::FAIL_IO, errorMessage.c_str());
182  delete myFilePtr;
183  myFilePtr = NULL;
184  return(false);
185  }
186 
187  char magic[4];
188  ifread(myFilePtr, magic, 4);
189 
190  if (magic[0] == 'B' && magic[1] == 'A' && magic[2] == 'M' &&
191  magic[3] == 1)
192  {
193  myInterfacePtr = new BamInterface;
194  // Set that it is a bam file open for reading. This is needed to
195  // determine if an index file can be used.
196  myIsBamOpenForRead = true;
197  }
198  else
199  {
200  // Not a bam, so rewind to the beginning of the file so it
201  // can be read.
202  ifrewind(myFilePtr);
203  myInterfacePtr = new SamInterface;
204  }
205  }
206 
207  // File is open for reading.
208  myIsOpenForRead = true;
209 
210  // Read the header if one was passed in.
211  if(header != NULL)
212  {
213  return(ReadHeader(*header));
214  }
215 
216  // Successfully opened the file.
218  return(true);
219 }
220 
221 
222 // Open a sam/bam file for writing with the specified filename.
223 bool SamFile::OpenForWrite(const char * filename, SamFileHeader* header)
224 {
225  // Reset for any previously operated on files.
226  resetFile();
227 
228  int lastchar = 0;
229  while (filename[lastchar] != 0) lastchar++;
230  if (lastchar >= 4 &&
231  filename[lastchar - 4] == 'u' &&
232  filename[lastchar - 3] == 'b' &&
233  filename[lastchar - 2] == 'a' &&
234  filename[lastchar - 1] == 'm')
235  {
236  // BAM File.
237  // if -.ubam is the filename, write uncompressed bam to stdout
238  if((lastchar == 6) && (filename[0] == '-') && (filename[1] == '.'))
239  {
240  filename = "-";
241  }
242 
243  myFilePtr = ifopen(filename, "wb0", InputFile::BGZF);
244 
245  myInterfacePtr = new BamInterface;
246  }
247  else if (lastchar >= 3 &&
248  filename[lastchar - 3] == 'b' &&
249  filename[lastchar - 2] == 'a' &&
250  filename[lastchar - 1] == 'm')
251  {
252  // BAM File.
253  // if -.bam is the filename, write compressed bam to stdout
254  if((lastchar == 5) && (filename[0] == '-') && (filename[1] == '.'))
255  {
256  filename = "-";
257  }
258  myFilePtr = ifopen(filename, "wb", InputFile::BGZF);
259 
260  myInterfacePtr = new BamInterface;
261  }
262  else
263  {
264  // SAM File
265  // if - (followed by anything is the filename,
266  // write uncompressed sam to stdout
267  if((lastchar >= 1) && (filename[0] == '-'))
268  {
269  filename = "-";
270  }
271  myFilePtr = ifopen(filename, "wb", InputFile::UNCOMPRESSED);
272 
273  myInterfacePtr = new SamInterface;
274  }
275 
276  if (myFilePtr == NULL)
277  {
278  std::string errorMessage = "Failed to Open ";
279  errorMessage += filename;
280  errorMessage += " for writing";
281  myStatus.setStatus(SamStatus::FAIL_IO, errorMessage.c_str());
282  return(false);
283  }
284 
285  myIsOpenForWrite = true;
286 
287  // Write the header if one was passed in.
288  if(header != NULL)
289  {
290  return(WriteHeader(*header));
291  }
292 
293  // Successfully opened the file.
295  return(true);
296 }
297 
298 
299 // Read BAM Index file.
300 bool SamFile::ReadBamIndex(const char* bamIndexFilename)
301 {
302  // Cleanup a previously setup index.
303  if(myBamIndex != NULL)
304  {
305  delete myBamIndex;
306  myBamIndex = NULL;
307  }
308 
309  // Create a new bam index.
310  myBamIndex = new BamIndex();
311  SamStatus::Status indexStat = myBamIndex->readIndex(bamIndexFilename);
312 
313  if(indexStat != SamStatus::SUCCESS)
314  {
315  std::string errorMessage = "Failed to read the bam Index file: ";
316  errorMessage += bamIndexFilename;
317  myStatus.setStatus(indexStat, errorMessage.c_str());
318  delete myBamIndex;
319  myBamIndex = NULL;
320  return(false);
321  }
323  return(true);
324 }
325 
326 
327 // Read BAM Index file.
329 {
330  if(myFilePtr == NULL)
331  {
332  // Can't read the bam index file because the BAM file has not yet been
333  // opened, so we don't know the base filename for the index file.
334  std::string errorMessage = "Failed to read the bam Index file -"
335  " the BAM file needs to be read first in order to determine"
336  " the index filename.";
337  myStatus.setStatus(SamStatus::FAIL_ORDER, errorMessage.c_str());
338  return(false);
339  }
340 
341  const char* bamBaseName = myFilePtr->getFileName();
342 
343  std::string indexName = bamBaseName;
344  indexName += ".bai";
345 
346  bool foundFile = true;
347  try
348  {
349  if(ReadBamIndex(indexName.c_str()) == false)
350  {
351  foundFile = false;
352  }
353  }
354  catch (std::exception&)
355  {
356  foundFile = false;
357  }
358 
359  // Check to see if the index file was found.
360  if(!foundFile)
361  {
362  // Not found - try without the bam extension.
363  // Locate the start of the bam extension
364  size_t startExt = indexName.find(".bam");
365  if(startExt == std::string::npos)
366  {
367  // Could not find the .bam extension, so just return false since the
368  // call to ReadBamIndex set the status.
369  return(false);
370  }
371  // Remove ".bam" and try reading the index again.
372  indexName.erase(startExt, 4);
373  return(ReadBamIndex(indexName.c_str()));
374  }
375  return(true);
376 }
377 
378 
379 // Sets the reference to the specified genome sequence object.
381 {
382  myRefPtr = reference;
383 }
384 
385 
386 // Set the type of sequence translation to use when reading the sequence.
388 {
389  myReadTranslation = translation;
390 }
391 
392 
393 // Set the type of sequence translation to use when writing the sequence.
395 {
396  myWriteTranslation = translation;
397 }
398 
399 // Close the file if there is one open.
401 {
402  // Resetting the file will close it if it is open, and
403  // will reset all other variables.
404  resetFile();
405 }
406 
407 
408 // Returns whether or not the file has been opened.
409 // return: int - true = open; false = not open.
411 {
412  if (myFilePtr != NULL)
413  {
414  // File Pointer is set, so return if it is open.
415  return(myFilePtr->isOpen());
416  }
417  // File pointer is not set, so return false, not open.
418  return false;
419 }
420 
421 
422 // Returns whether or not the end of the file has been reached.
423 // return: int - true = EOF; false = not eof.
425 {
426  if(myIsOpenForRead == false)
427  {
428  // Not open for read, return true.
429  return(true);
430  }
431  return(myInterfacePtr->isEOF(myFilePtr));
432 }
433 
434 
435 // Returns whether or not the file is a stream.
436 // return: bool - true = stream; false = not stream/not open.
438 {
439  if (myFilePtr != NULL)
440  {
441  // File Pointer is set, so return if it is a stream.
442  return((myFilePtr->getFileName())[0] == '-');
443  }
444  // File pointer is not set, so return false, not a stream.
445  return false;
446 }
447 
448 
449 // Read the header from the currently opened file.
451 {
453  if(myIsOpenForRead == false)
454  {
455  // File is not open for read
457  "Cannot read header since the file is not open for reading");
458  return(false);
459  }
460 
461  if(myHasHeader == true)
462  {
463  // The header has already been read.
465  "Cannot read header since it has already been read.");
466  return(false);
467  }
468 
469  if(myInterfacePtr->readHeader(myFilePtr, header, myStatus))
470  {
471  // The header has now been successfully read.
472  myHasHeader = true;
473  return(true);
474  }
475  return(false);
476 }
477 
478 
479 // Write the header to the currently opened file.
481 {
483  if(myIsOpenForWrite == false)
484  {
485  // File is not open for write
486  // -OR-
487  // The header has already been written.
489  "Cannot write header since the file is not open for writing");
490  return(false);
491  }
492 
493  if(myHasHeader == true)
494  {
495  // The header has already been written.
497  "Cannot write header since it has already been written");
498  return(false);
499  }
500 
501  if(myInterfacePtr->writeHeader(myFilePtr, header, myStatus))
502  {
503  // The header has now been successfully written.
504  myHasHeader = true;
505  return(true);
506  }
507 
508  // return the status.
509  return(false);
510 }
511 
512 
513 // Read a record from the currently opened file.
515  SamRecord& record)
516 {
518 
519  if(myIsOpenForRead == false)
520  {
521  // File is not open for read
523  "Cannot read record since the file is not open for reading");
524  throw(std::runtime_error("SOFTWARE BUG: trying to read a SAM/BAM record prior to opening the file."));
525  return(false);
526  }
527 
528  if(myHasHeader == false)
529  {
530  // The header has not yet been read.
531  // TODO - maybe just read the header.
533  "Cannot read record since the header has not been read.");
534  throw(std::runtime_error("SOFTWARE BUG: trying to read a SAM/BAM record prior to reading the header."));
535  return(false);
536  }
537 
538  // Check to see if a new region has been set. If so, determine the
539  // chunks for that region.
540  if(myNewSection)
541  {
542  if(!processNewSection(header))
543  {
544  // Failed processing a new section. Could be an
545  // order issue like the file not being open or the
546  // indexed file not having been read.
547  // processNewSection sets myStatus with the failure reason.
548  return(false);
549  }
550  }
551 
552  // Read until a record is not successfully read or there are no more
553  // requested records.
554  while(myStatus == SamStatus::SUCCESS)
555  {
556  record.setReference(myRefPtr);
557  record.setSequenceTranslation(myReadTranslation);
558 
559  // If reading by index, this method will setup to ensure it is in
560  // the correct position for the next record (if not already there).
561  // Sets myStatus if it could not move to a good section.
562  // Just returns true if it is not setup to read by index.
563  if(!ensureIndexedReadPosition())
564  {
565  // Either there are no more records in the section
566  // or it failed to move to the right section, so there
567  // is nothing more to read, stop looping.
568  break;
569  }
570 
571  // File is open for reading and the header has been read, so read the
572  // next record.
573  myInterfacePtr->readRecord(myFilePtr, header, record, myStatus);
575  {
576  // Failed to read the record, so break out of the loop.
577  break;
578  }
579 
580  // Successfully read a record, so check if we should filter it.
581  // First check if it is out of the section. Returns true
582  // if not reading by sections, returns false if the record
583  // is outside of the section. Sets status to NO_MORE_RECS if
584  // there is nothing left ot read in the section.
585  if(!checkRecordInSection(record))
586  {
587  // The record is not in the section.
588  // The while loop will detect if NO_MORE_RECS was set.
589  continue;
590  }
591 
592  // Check the flag for required/excluded flags.
593  uint16_t flag = record.getFlag();
594  if((flag & myRequiredFlags) != myRequiredFlags)
595  {
596  // The record does not conatain all required flags, so
597  // continue looking.
598  continue;
599  }
600  if((flag & myExcludedFlags) != 0)
601  {
602  // The record contains an excluded flag, so continue looking.
603  continue;
604  }
605 
606  //increment the record count.
607  myRecordCount++;
608 
609  if(myStatistics != NULL)
610  {
611  // Statistics should be updated.
612  myStatistics->updateStatistics(record);
613  }
614 
615  // Successfully read the record, so check the sort order.
616  if(!validateSortOrder(record, header))
617  {
618  // ValidateSortOrder sets the status on a failure.
619  return(false);
620  }
621  return(true);
622 
623  } // End while loop that checks if a desired record is found or failure.
624 
625  // Return true if a record was found.
626  return(myStatus == SamStatus::SUCCESS);
627 }
628 
629 
630 
631 // Write a record to the currently opened file.
633  SamRecord& record)
634 {
635  if(myIsOpenForWrite == false)
636  {
637  // File is not open for writing
639  "Cannot write record since the file is not open for writing");
640  return(false);
641  }
642 
643  if(myHasHeader == false)
644  {
645  // The header has not yet been written.
647  "Cannot write record since the header has not been written");
648  return(false);
649  }
650 
651  // Before trying to write the record, validate the sort order.
652  if(!validateSortOrder(record, header))
653  {
654  // Not sorted like it is supposed to be, do not write the record
656  "Cannot write the record since the file is not properly sorted.");
657  return(false);
658  }
659 
660  if(myRefPtr != NULL)
661  {
662  record.setReference(myRefPtr);
663  }
664 
665  // File is open for writing and the header has been written, so write the
666  // record.
667  myStatus = myInterfacePtr->writeRecord(myFilePtr, header, record,
668  myWriteTranslation);
669 
671  {
672  // A record was successfully written, so increment the record count.
673  myRecordCount++;
674  return(true);
675  }
676  return(false);
677 }
678 
679 
680 // Set the flag to validate that the file is sorted as it is read/written.
681 // Must be called after the file has been opened.
683 {
684  mySortedType = sortType;
685 }
686 
687 
688 // Return the number of records that have been read/written so far.
690 {
691  return(myRecordCount);
692 }
693 
694 
695 // Sets what part of the SamFile should be read.
696 bool SamFile::SetReadSection(int32_t refID)
697 {
698  // No start/end specified, so set back to default -1.
699  return(SetReadSection(refID, -1, -1));
700 }
701 
702 
703 
704 // Sets what part of the SamFile should be read.
705 bool SamFile::SetReadSection(const char* refName)
706 {
707  // No start/end specified, so set back to default -1.
708  return(SetReadSection(refName, -1, -1));
709 }
710 
711 
712 // Sets what part of the BAM file should be read.
713 bool SamFile::SetReadSection(int32_t refID, int32_t start, int32_t end,
714  bool overlap)
715 {
716  // If there is not a BAM file open for reading, return failure.
717  // Opening a new file clears the read section, so it must be
718  // set after the file is opened.
719  if(!myIsBamOpenForRead)
720  {
721  // There is not a BAM file open for reading.
723  "Cannot set section since there is no bam file open");
724  return(false);
725  }
726 
727  myNewSection = true;
728  myOverlapSection = overlap;
729  myStartPos = start;
730  myEndPos = end;
731  myRefID = refID;
732  myRefName.clear();
733  myChunksToRead.clear();
734  // Reset the end of the current chunk. We are resetting our read, so
735  // we no longer have a "current chunk" that we are reading.
736  myCurrentChunkEnd = 0;
738 
739  // Reset the sort order criteria since we moved around in the file.
740  myPrevCoord = -1;
741  myPrevRefID = 0;
742  myPrevReadName.Clear();
743 
744  return(true);
745 }
746 
747 
748 // Sets what part of the BAM file should be read.
749 bool SamFile::SetReadSection(const char* refName, int32_t start, int32_t end,
750  bool overlap)
751 {
752  // If there is not a BAM file open for reading, return failure.
753  // Opening a new file clears the read section, so it must be
754  // set after the file is opened.
755  if(!myIsBamOpenForRead)
756  {
757  // There is not a BAM file open for reading.
759  "Cannot set section since there is no bam file open");
760  return(false);
761  }
762 
763  myNewSection = true;
764  myOverlapSection = overlap;
765  myStartPos = start;
766  myEndPos = end;
767  if((strcmp(refName, "") == 0) || (strcmp(refName, "*") == 0))
768  {
769  // No Reference name specified, so read just the "-1" entries.
770  myRefID = BamIndex::REF_ID_UNMAPPED;
771  }
772  else
773  {
774  // save the reference name and revert the reference ID to unknown
775  // so it will be calculated later.
776  myRefName = refName;
777  myRefID = BamIndex::REF_ID_ALL;
778  }
779  myChunksToRead.clear();
780  // Reset the end of the current chunk. We are resetting our read, so
781  // we no longer have a "current chunk" that we are reading.
782  myCurrentChunkEnd = 0;
784 
785  // Reset the sort order criteria since we moved around in the file.
786  myPrevCoord = -1;
787  myPrevRefID = 0;
788  myPrevReadName.Clear();
789 
790  return(true);
791 }
792 
793 
794 void SamFile::SetReadFlags(uint16_t requiredFlags, uint16_t excludedFlags)
795 {
796  myRequiredFlags = requiredFlags;
797  myExcludedFlags = excludedFlags;
798 }
799 
800 
801 // Get the number of mapped reads in the specified reference id.
802 // Returns -1 for out of range refIDs.
804 {
805  // The bam index must have already been read.
806  if(myBamIndex == NULL)
807  {
809  "Cannot get num mapped reads from the index until it has been read.");
810  return(false);
811  }
812  return(myBamIndex->getNumMappedReads(refID));
813 }
814 
815 
816 // Get the number of unmapped reads in the specified reference id.
817 // Returns -1 for out of range refIDs.
819 {
820  // The bam index must have already been read.
821  if(myBamIndex == NULL)
822  {
824  "Cannot get num unmapped reads from the index until it has been read.");
825  return(false);
826  }
827  return(myBamIndex->getNumUnMappedReads(refID));
828 }
829 
830 
831 // Get the number of mapped reads in the specified reference id.
832 // Returns -1 for out of range references.
833 int32_t SamFile::getNumMappedReadsFromIndex(const char* refName,
834  SamFileHeader& header)
835 {
836  // The bam index must have already been read.
837  if(myBamIndex == NULL)
838  {
840  "Cannot get num mapped reads from the index until it has been read.");
841  return(false);
842  }
843  int32_t refID = BamIndex::REF_ID_UNMAPPED;
844  if((strcmp(refName, "") != 0) && (strcmp(refName, "*") != 0))
845  {
846  // Reference name specified, so read just the "-1" entries.
847  refID = header.getReferenceID(refName);
848  }
849  return(myBamIndex->getNumMappedReads(refID));
850 }
851 
852 
853 // Get the number of unmapped reads in the specified reference id.
854 // Returns -1 for out of range refIDs.
855 int32_t SamFile::getNumUnMappedReadsFromIndex(const char* refName,
856  SamFileHeader& header)
857 {
858  // The bam index must have already been read.
859  if(myBamIndex == NULL)
860  {
862  "Cannot get num unmapped reads from the index until it has been read.");
863  return(false);
864  }
865  int32_t refID = BamIndex::REF_ID_UNMAPPED;
866  if((strcmp(refName, "") != 0) && (strcmp(refName, "*") != 0))
867  {
868  // Reference name specified, so read just the "-1" entries.
869  refID = header.getReferenceID(refName);
870  }
871  return(myBamIndex->getNumUnMappedReads(refID));
872 }
873 
874 
875 // Returns the number of bases in the passed in read that overlap the
876 // region that is currently set.
877 uint32_t SamFile::GetNumOverlaps(SamRecord& samRecord)
878 {
879  if(myRefPtr != NULL)
880  {
881  samRecord.setReference(myRefPtr);
882  }
883  samRecord.setSequenceTranslation(myReadTranslation);
884 
885  // Get the overlaps in the sam record for the region currently set
886  // for this file.
887  return(samRecord.getNumOverlaps(myStartPos, myEndPos));
888 }
889 
890 
891 void SamFile::GenerateStatistics(bool genStats)
892 {
893  if(genStats)
894  {
895  if(myStatistics == NULL)
896  {
897  // Want to generate statistics, but do not yet have the
898  // structure for them, so create one.
899  myStatistics = new SamStatistics();
900  }
901  }
902  else
903  {
904  // Do not generate statistics, so if myStatistics is not NULL,
905  // delete it.
906  if(myStatistics != NULL)
907  {
908  delete myStatistics;
909  myStatistics = NULL;
910  }
911  }
912 
913 }
914 
915 
917 {
918  return(myBamIndex);
919 }
920 
921 
922 // initialize.
923 void SamFile::init()
924 {
925  myFilePtr = NULL;
926  myInterfacePtr = NULL;
927  myStatistics = NULL;
928  myBamIndex = NULL;
929  myRefPtr = NULL;
930  myReadTranslation = SamRecord::NONE;
931  myWriteTranslation = SamRecord::NONE;
932  myAttemptRecovery = false;
933  myRequiredFlags = 0;
934  myExcludedFlags = 0;
935 }
936 
937 
938 void SamFile::init(const char* filename, OpenType mode, SamFileHeader* header)
939 {
940  init();
941 
942  resetFile();
943 
944  bool openStatus = true;
945  if(mode == READ)
946  {
947  // open the file for read.
948  openStatus = OpenForRead(filename, header);
949  }
950  else
951  {
952  // open the file for write.
953  openStatus = OpenForWrite(filename, header);
954  }
955  if(!openStatus)
956  {
957  // Failed to open the file - print error and abort.
958  fprintf(stderr, "%s\n", GetStatusMessage());
959  std::cerr << "FAILURE - EXITING!!!" << std::endl;
960  exit(-1);
961  }
962 }
963 
964 
965 // Reset variables for each file.
967 {
968  // Close the file.
969  if (myFilePtr != NULL)
970  {
971  // If we already have an open file, close it.
972  ifclose(myFilePtr);
973  myFilePtr = NULL;
974  }
975  if(myInterfacePtr != NULL)
976  {
977  delete myInterfacePtr;
978  myInterfacePtr = NULL;
979  }
980 
981  myIsOpenForRead = false;
982  myIsOpenForWrite = false;
983  myHasHeader = false;
984  mySortedType = UNSORTED;
985  myPrevReadName.Clear();
986  myPrevCoord = -1;
987  myPrevRefID = 0;
988  myRecordCount = 0;
990 
991  // Reset indexed bam values.
992  myIsBamOpenForRead = false;
993  myRefID = BamIndex::REF_ID_ALL;
994  myStartPos = -1;
995  myEndPos = -1;
996  myNewSection = false;
997  myOverlapSection = true;
998  myCurrentChunkEnd = 0;
999  myChunksToRead.clear();
1000  if(myBamIndex != NULL)
1001  {
1002  delete myBamIndex;
1003  myBamIndex = NULL;
1004  }
1005 
1006  // If statistics are being generated, reset them.
1007  if(myStatistics != NULL)
1008  {
1009  myStatistics->reset();
1010  }
1011 
1012  myRefName.clear();
1013 }
1014 
1015 
1016 // Validate that the record is sorted compared to the previously read record
1017 // if there is one, according to the specified sort order.
1018 // If the sort order is UNSORTED, true is returned.
1020 {
1021  if(myRefPtr != NULL)
1022  {
1023  record.setReference(myRefPtr);
1024  }
1025  record.setSequenceTranslation(myReadTranslation);
1026 
1027  bool status = false;
1028  if(mySortedType == UNSORTED)
1029  {
1030  // Unsorted, so nothing to validate, just return true.
1031  status = true;
1032  }
1033  else
1034  {
1035  // Check to see if mySortedType is based on the header.
1036  if(mySortedType == FLAG)
1037  {
1038  // Determine the sorted type from what was read out of the header.
1039  mySortedType = getSortOrderFromHeader(header);
1040  }
1041 
1042  if(mySortedType == QUERY_NAME)
1043  {
1044  // Validate that it is sorted by query name.
1045  // Get the query name from the record.
1046  const char* readName = record.getReadName();
1047 
1048  // Check if it is sorted either in samtools way or picard's way.
1049  if((myPrevReadName.Compare(readName) > 0) &&
1050  (strcmp(myPrevReadName.c_str(), readName) > 0))
1051  {
1052  // return false.
1053  String errorMessage = "ERROR: File is not sorted by read name at record ";
1054  errorMessage += myRecordCount;
1055  errorMessage += "\n\tPrevious record was ";
1056  errorMessage += myPrevReadName;
1057  errorMessage += ", but this record is ";
1058  errorMessage += readName;
1060  errorMessage.c_str());
1061  status = false;
1062  }
1063  else
1064  {
1065  myPrevReadName = readName;
1066  status = true;
1067  }
1068  }
1069  else
1070  {
1071  // Validate that it is sorted by COORDINATES.
1072  // Get the leftmost coordinate and the reference index.
1073  int32_t refID = record.getReferenceID();
1074  int32_t coord = record.get0BasedPosition();
1075  // The unmapped reference id is at the end of a sorted file.
1076  if(refID == BamIndex::REF_ID_UNMAPPED)
1077  {
1078  // A new reference ID that is for the unmapped reads
1079  // is always valid.
1080  status = true;
1081  myPrevRefID = refID;
1082  myPrevCoord = coord;
1083  }
1084  else if(myPrevRefID == BamIndex::REF_ID_UNMAPPED)
1085  {
1086  // Previous reference ID was for unmapped reads, but the
1087  // current one is not, so this is not sorted.
1088  String errorMessage = "ERROR: File is not coordinate sorted at record ";
1089  errorMessage += myRecordCount;
1090  errorMessage += "\n\tPrevious record was unmapped, but this record is ";
1091  errorMessage += header.getReferenceLabel(refID) + ":" + coord;
1093  errorMessage.c_str());
1094  status = false;
1095  }
1096  else if(refID < myPrevRefID)
1097  {
1098  // Current reference id is less than the previous one,
1099  //meaning that it is not sorted.
1100  String errorMessage = "ERROR: File is not coordinate sorted at record ";
1101  errorMessage += myRecordCount;
1102  errorMessage += "\n\tPrevious record was ";
1103  errorMessage += header.getReferenceLabel(myPrevRefID) + ":" + myPrevCoord;
1104  errorMessage += ", but this record is ";
1105  errorMessage += header.getReferenceLabel(refID) + ":" + coord;
1107  errorMessage.c_str());
1108  status = false;
1109  }
1110  else
1111  {
1112  // The reference IDs are in the correct order.
1113  if(refID > myPrevRefID)
1114  {
1115  // New reference id, so set the previous coordinate to -1
1116  myPrevCoord = -1;
1117  }
1118 
1119  // Check the coordinates.
1120  if(coord < myPrevCoord)
1121  {
1122  // New Coord is less than the previous position.
1123  String errorMessage = "ERROR: File is not coordinate sorted at record ";
1124  errorMessage += myRecordCount;
1125  errorMessage += "\n\tPreviousRecord was ";
1126  errorMessage += header.getReferenceLabel(myPrevRefID) + ":" + myPrevCoord;
1127  errorMessage += ", but this record is ";
1128  errorMessage += header.getReferenceLabel(refID) + ":" + coord;
1130  errorMessage.c_str());
1131  status = false;
1132  }
1133  else
1134  {
1135  myPrevRefID = refID;
1136  myPrevCoord = coord;
1137  status = true;
1138  }
1139  }
1140  }
1141  }
1142 
1143  return(status);
1144 }
1145 
1146 
1147 SamFile::SortedType SamFile::getSortOrderFromHeader(SamFileHeader& header)
1148 {
1149  const char* tag = header.getSortOrder();
1150 
1151  // Default to unsorted since if it is not specified in the header
1152  // that is the value that should be used.
1153  SortedType headerSortOrder = UNSORTED;
1154  if(strcmp(tag, "queryname") == 0)
1155  {
1156  headerSortOrder = QUERY_NAME;
1157  }
1158  else if(strcmp(tag, "coordinate") == 0)
1159  {
1160  headerSortOrder = COORDINATE;
1161  }
1162  return(headerSortOrder);
1163 }
1164 
1165 
1166  bool SamFile::ensureIndexedReadPosition()
1167  {
1168  // If no sections are specified, return true.
1169  if(myRefID == BamIndex::REF_ID_ALL)
1170  {
1171  return(true);
1172  }
1173 
1174  // Check to see if we have more to read out of the current chunk.
1175  // By checking the current position in relation to the current
1176  // end chunk. If the current position is >= the end of the
1177  // current chunk, then we must see to the next chunk.
1178  uint64_t currentPos = iftell(myFilePtr);
1179  if(currentPos >= myCurrentChunkEnd)
1180  {
1181  // If there are no more chunks to process, return failure.
1182  if(myChunksToRead.empty())
1183  {
1185  return(false);
1186  }
1187 
1188  // There are more chunks left, so get the next chunk.
1189  Chunk newChunk = myChunksToRead.pop();
1190 
1191  // Move to the location of the new chunk if it is not adjacent
1192  // to the current chunk.
1193  if(newChunk.chunk_beg != currentPos)
1194  {
1195  // New chunk is not adjacent, so move to it.
1196  if(ifseek(myFilePtr, newChunk.chunk_beg, SEEK_SET) != true)
1197  {
1198  // seek failed, cannot retrieve next record, return failure.
1200  "Failed to seek to the next record");
1201  return(false);
1202  }
1203  }
1204  // Seek succeeded, set the end of the new chunk.
1205  myCurrentChunkEnd = newChunk.chunk_end;
1206  }
1207  return(true);
1208  }
1209 
1210 
1211 bool SamFile::checkRecordInSection(SamRecord& record)
1212 {
1213  bool recordFound = true;
1214  if(myRefID == BamIndex::REF_ID_ALL)
1215  {
1216  return(true);
1217  }
1218  // Check to see if it is in the correct reference/position.
1219  if(record.getReferenceID() != myRefID)
1220  {
1221  // Incorrect reference ID, return no more records.
1223  return(false);
1224  }
1225 
1226  // Found a record.
1227  recordFound = true;
1228 
1229  // If start/end position are set, verify that the alignment falls
1230  // within those.
1231  // If the alignment start is greater than the end of the region,
1232  // return NO_MORE_RECS.
1233  // Since myEndPos is Exclusive 0-based, anything >= myEndPos is outside
1234  // of the region.
1235  if((myEndPos != -1) && (record.get0BasedPosition() >= myEndPos))
1236  {
1238  return(false);
1239  }
1240 
1241  // We know the start is less than the end position, so the alignment
1242  // overlaps the region if the alignment end position is greater than the
1243  // start of the region.
1244  if((myStartPos != -1) && (record.get0BasedAlignmentEnd() < myStartPos))
1245  {
1246  // If it does not overlap the region, so go to the next
1247  // record...set recordFound back to false.
1248  recordFound = false;
1249  }
1250 
1251  if(!myOverlapSection)
1252  {
1253  // Needs to be fully contained. Not fully contained if
1254  // 1) the record start position is < the region start position.
1255  // or
1256  // 2) the end position is specified and the record end position
1257  // is greater than or equal to the region end position.
1258  // (equal to since the region is exclusive.
1259  if((record.get0BasedPosition() < myStartPos) ||
1260  ((myEndPos != -1) &&
1261  (record.get0BasedAlignmentEnd() >= myEndPos)))
1262  {
1263  // This record is not fully contained, so move on to the next
1264  // record.
1265  recordFound = false;
1266  }
1267  }
1268 
1269  return(recordFound);
1270 }
1271 
1272 
1273 bool SamFile::processNewSection(SamFileHeader &header)
1274 {
1275  myNewSection = false;
1276 
1277  // If there is no index file, return failure.
1278  if(myBamIndex == NULL)
1279  {
1280  // No bam index has been read.
1282  "Cannot read section since there is no index file open");
1283  throw(std::runtime_error("SOFTWARE BUG: trying to read a BAM record by section prior to opening the BAM Index file."));
1284  return(false);
1285  }
1286 
1287  // If there is not a BAM file open for reading, return failure.
1288  if(!myIsBamOpenForRead)
1289  {
1290  // There is not a BAM file open for reading.
1292  "Cannot read section since there is no bam file open");
1293  throw(std::runtime_error("SOFTWARE BUG: trying to read a BAM record by section without opening a BAM file."));
1294  return(false);
1295  }
1296 
1297  if(myHasHeader == false)
1298  {
1299  // The header has not yet been read.
1301  "Cannot read record since the header has not been read.");
1302  throw(std::runtime_error("SOFTWARE BUG: trying to read a BAM record by section prior to opening the header."));
1303  return(false);
1304  }
1305 
1306  // Indexed Bam open for read, so disable read buffering because iftell
1307  // will be used.
1308  // Needs to be done here after we already know that the header has been
1309  // read.
1310  myFilePtr->disableBuffering();
1311 
1312  myChunksToRead.clear();
1313  // Reset the end of the current chunk. We are resetting our read, so
1314  // we no longer have a "current chunk" that we are reading.
1315  myCurrentChunkEnd = 0;
1316 
1317  // Check to see if the read section was set based on the reference name
1318  // but not yet converted to reference id.
1319  if(!myRefName.empty())
1320  {
1321  myRefID = header.getReferenceID(myRefName.c_str());
1322  // Clear the myRefName length so this code is only executed once.
1323  myRefName.clear();
1324 
1325  // Check to see if a reference id was found.
1326  if(myRefID == SamReferenceInfo::NO_REF_ID)
1327  {
1329  return(false);
1330  }
1331  }
1332 
1333  // Get the chunks associated with this reference region.
1334  if(myBamIndex->getChunksForRegion(myRefID, myStartPos, myEndPos,
1335  myChunksToRead) == true)
1336  {
1338  }
1339  else
1340  {
1341  String errorMsg = "Failed to get the specified region, refID = ";
1342  errorMsg += myRefID;
1343  errorMsg += "; startPos = ";
1344  errorMsg += myStartPos;
1345  errorMsg += "; endPos = ";
1346  errorMsg += myEndPos;
1348  errorMsg);
1349  }
1350  return(true);
1351 }
1352 
1353 //
1354 // When the caller to SamFile::ReadRecord() catches an
1355 // exception, it may choose to call this method to resync
1356 // on the underlying binary stream.
1357 //
1358 // Arguments: a callback function that will requires length bytes
1359 // of data to validate a record header.
1360 //
1361 // The expected use case is to re-sync on the next probably valid
1362 // BAM record, so that we can resume reading even after detecting
1363 // a corrupted BAM file.
1364 //
1365 bool SamFile::attemptRecoverySync(bool (*checkSignature)(void *data) , int length)
1366 {
1367  if(myFilePtr==NULL) return false;
1368  // non-recovery aware objects will just return false:
1369  return myFilePtr->attemptRecoverySync(checkSignature, length);
1370 }
1371 
1372 // Default Constructor.
1374  : SamFile()
1375 {
1376 }
1377 
1378 
1379 // Constructor that opens the specified file for read.
1380 SamFileReader::SamFileReader(const char* filename)
1381  : SamFile(filename, READ)
1382 {
1383 }
1384 
1385 
1386 
1387 
1388 // Constructor that opens the specified file for read.
1389 SamFileReader::SamFileReader(const char* filename,
1390  ErrorHandler::HandlingType errorHandlingType)
1391  : SamFile(filename, READ, errorHandlingType)
1392 {
1393 }
1394 
1395 
1396 // Constructor that opens the specified file for read.
1397 SamFileReader::SamFileReader(const char* filename,
1398  SamFileHeader* header)
1399  : SamFile(filename, READ, header)
1400 {
1401 }
1402 
1403 
1404 // Constructor that opens the specified file for read.
1405 SamFileReader::SamFileReader(const char* filename,
1406  ErrorHandler::HandlingType errorHandlingType,
1407  SamFileHeader* header)
1408  : SamFile(filename, READ, errorHandlingType, header)
1409 {
1410 }
1411 
1412 
1413 SamFileReader::~SamFileReader()
1414 {
1415 }
1416 
1417 
1418 // Default Constructor.
1420  : SamFile()
1421 {
1422 }
1423 
1424 
1425 // Constructor that opens the specified file for write.
1426 SamFileWriter::SamFileWriter(const char* filename)
1427  : SamFile(filename, WRITE)
1428 {
1429 }
1430 
1431 
1432 // Constructor that opens the specified file for write.
1433 SamFileWriter::SamFileWriter(const char* filename,
1434  ErrorHandler::HandlingType errorHandlingType)
1435  : SamFile(filename, WRITE, errorHandlingType)
1436 {
1437 }
1438 
1439 
1440 // Constructor that opens the specified file for write.
1441 SamFileWriter::SamFileWriter(const char* filename,
1442  SamFileHeader* header)
1443  : SamFile(filename, WRITE, header)
1444 {
1445 }
1446 
1447 
1448 // Constructor that opens the specified file for write.
1449 SamFileWriter::SamFileWriter(const char* filename,
1450  ErrorHandler::HandlingType errorHandlingType,
1451  SamFileHeader* header)
1452  : SamFile(filename, WRITE, errorHandlingType, header)
1453 {
1454 }
1455 
1456 
1457 SamFileWriter::~SamFileWriter()
1458 {
1459 }
bool ifseek(IFILE file, int64_t offset, int origin)
Seek to the specified position (result from an iftell), but cannot be done for stdin/stdout.
Definition: InputFile.h:701
void ifrewind(IFILE file)
Reset to the beginning of the file (cannot be done for stdin/stdout).
Definition: InputFile.h:642
IFILE ifopen(const char *filename, const char *mode, InputFile::ifileCompression compressionMode=InputFile::DEFAULT)
Open a file with the specified name and mode, using a filename of "-" to indicate stdin/stdout.
Definition: InputFile.h:562
int64_t iftell(IFILE file)
Get current position in the file.
Definition: InputFile.h:682
int ifclose(IFILE &file)
Close the file.
Definition: InputFile.h:580
unsigned int ifread(IFILE file, void *buffer, unsigned int size)
Read up to size bytes from the file into the buffer.
Definition: InputFile.h:600
bool getChunksForRegion(int32_t refID, int32_t start, int32_t end, SortedChunkList &chunkList)
Get the list of chunks associated with this region.
Definition: BamIndex.cpp:218
int32_t getNumMappedReads(int32_t refID)
Get the number of mapped reads for this reference id.
Definition: BamIndex.cpp:355
SamStatus::Status readIndex(const char *filename)
Definition: BamIndex.cpp:45
static const int32_t REF_ID_ALL
The number used to indicate that all reference ids should be used.
Definition: BamIndex.h:89
int32_t getNumUnMappedReads(int32_t refID)
Get the number of unmapped reads for this reference id.
Definition: BamIndex.cpp:377
static const int32_t REF_ID_UNMAPPED
The number used for the reference id of unmapped reads.
Definition: BamIndex.h:86
HandlingType
This specifies how this class should respond to errors.
Definition: ErrorHandler.h:29
Create/Access/Modify/Load Genome Sequences stored as binary mapped files.
Class for easily reading/writing files without having to worry about file type (uncompressed,...
Definition: InputFile.h:37
void disableBuffering()
Disable read buffering.
Definition: InputFile.h:121
const char * getFileName() const
Get the filename that is currently opened.
Definition: InputFile.h:473
bool isOpen() const
Returns whether or not the file was successfully opened.
Definition: InputFile.h:423
void setAttemptRecovery(bool flag=false)
Enable (default) or disable recovery.
Definition: InputFile.h:485
@ BGZF
bgzf file.
Definition: InputFile.h:48
@ DEFAULT
Check the extension, if it is ".gz", treat as gzip, otherwise treat it as UNCOMPRESSED.
Definition: InputFile.h:45
@ UNCOMPRESSED
uncompressed file.
Definition: InputFile.h:46
This class allows a user to get/set the fields in a SAM/BAM Header.
Definition: SamFileHeader.h:35
const char * getSortOrder()
Return the Sort Order value that is set in the Header, returning "" if this field does not exist.
int getReferenceID(const String &referenceName, bool addID=false)
Get the reference ID for the specified reference name (chromosome).
const String & getReferenceLabel(int id) const
Return the reference name (chromosome) for the specified reference id.
SamFileReader()
Default Constructor.
Definition: SamFile.cpp:1373
SamFileWriter()
Default Constructor.
Definition: SamFile.cpp:1419
Allows the user to easily read/write a SAM/BAM file.
Definition: SamFile.h:36
bool ReadHeader(SamFileHeader &header)
Reads the header section from the file and stores it in the passed in header.
Definition: SamFile.cpp:450
bool IsOpen()
Returns whether or not the file has been opened successfully.
Definition: SamFile.cpp:410
int32_t getNumUnMappedReadsFromIndex(int32_t refID)
Get the number of unmapped reads in the specified reference id.
Definition: SamFile.cpp:818
void Close()
Close the file if there is one open.
Definition: SamFile.cpp:400
void resetFile()
Resets the file prepping for a new file.
Definition: SamFile.cpp:966
int32_t myPrevCoord
Previous values used for checking if the file is sorted.
Definition: SamFile.h:409
bool myIsOpenForRead
Flag to indicate if a file is open for reading.
Definition: SamFile.h:399
const char * GetStatusMessage()
Get the Status Message of the last call that sets status.
Definition: SamFile.h:218
bool ReadBamIndex()
Read the bam index file using the BAM filename as a base.
Definition: SamFile.cpp:328
bool SetReadSection(int32_t refID)
Sets which reference id (index into the BAM list of reference information) of the BAM file should be ...
Definition: SamFile.cpp:696
SortedType
Enum for indicating the type of sort expected in the file.
Definition: SamFile.h:46
@ UNSORTED
file is not sorted.
Definition: SamFile.h:47
@ FLAG
SO flag from the header indicates the sort type.
Definition: SamFile.h:48
@ QUERY_NAME
file is sorted by queryname.
Definition: SamFile.h:50
@ COORDINATE
file is sorted by coordinate.
Definition: SamFile.h:49
bool ReadRecord(SamFileHeader &header, SamRecord &record)
Reads the next record from the file & stores it in the passed in record.
Definition: SamFile.cpp:514
bool IsStream()
Returns whether or not the file has been opened for streaming input/output.
Definition: SamFile.cpp:437
void SetReadFlags(uint16_t requiredFlags, uint16_t excludedFlags)
Specify which reads should be returned by ReadRecord.
Definition: SamFile.cpp:794
void SetReference(GenomeSequence *reference)
Sets the reference to the specified genome sequence object.
Definition: SamFile.cpp:380
OpenType
Enum for indicating whether to open the file for read or write.
Definition: SamFile.h:39
@ READ
open for reading.
Definition: SamFile.h:40
void GenerateStatistics(bool genStats)
Whether or not statistics should be generated for this file.
Definition: SamFile.cpp:891
const BamIndex * GetBamIndex()
Return the bam index if one has been opened.
Definition: SamFile.cpp:916
uint32_t GetNumOverlaps(SamRecord &samRecord)
Returns the number of bases in the passed in read that overlap the region that is currently set.
Definition: SamFile.cpp:877
bool OpenForRead(const char *filename, SamFileHeader *header=NULL)
Open a sam/bam file for reading with the specified filename, determing the type of file and SAM/BAM b...
Definition: SamFile.cpp:93
bool IsEOF()
Returns whether or not the end of the file has been reached.
Definition: SamFile.cpp:424
int32_t getNumMappedReadsFromIndex(int32_t refID)
Get the number of mapped reads in the specified reference id.
Definition: SamFile.cpp:803
bool OpenForWrite(const char *filename, SamFileHeader *header=NULL)
Open a sam/bam file for writing with the specified filename, determining SAM/BAM from the extension (...
Definition: SamFile.cpp:223
bool WriteHeader(SamFileHeader &header)
Writes the specified header into the file.
Definition: SamFile.cpp:480
bool myIsOpenForWrite
Flag to indicate if a file is open for writing.
Definition: SamFile.h:401
bool myIsBamOpenForRead
Values for reading Sorted BAM files via the index.
Definition: SamFile.h:423
bool myHasHeader
Flag to indicate if a header has been read/written - required before being able to read/write a recor...
Definition: SamFile.h:404
virtual ~SamFile()
Destructor.
Definition: SamFile.cpp:82
SamFile()
Default Constructor, initializes the variables, but does not open any files.
Definition: SamFile.cpp:26
void SetWriteSequenceTranslation(SamRecord::SequenceTranslation translation)
Set the type of sequence translation to use when writing the sequence.
Definition: SamFile.cpp:394
void SetReadSequenceTranslation(SamRecord::SequenceTranslation translation)
Set the type of sequence translation to use when reading the sequence.
Definition: SamFile.cpp:387
bool validateSortOrder(SamRecord &record, SamFileHeader &header)
Validate that the record is sorted compared to the previously read record if there is one,...
Definition: SamFile.cpp:1019
bool WriteRecord(SamFileHeader &header, SamRecord &record)
Writes the specified record into the file.
Definition: SamFile.cpp:632
uint32_t myRecordCount
Keep a count of the number of records that have been read/written so far.
Definition: SamFile.h:414
SamStatus myStatus
The status of the last SamFile command.
Definition: SamFile.h:420
uint32_t GetCurrentRecordCount()
Return the number of records that have been read/written so far.
Definition: SamFile.cpp:689
void setSortedValidation(SortedType sortType)
Set the flag to validate that the file is sorted as it is read/written.
Definition: SamFile.cpp:682
SamStatistics * myStatistics
Pointer to the statistics for this file.
Definition: SamFile.h:417
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
Definition: SamRecord.h:52
SequenceTranslation
Enum containing the settings on how to translate the sequence if a reference is available.
Definition: SamRecord.h:57
@ NONE
Leave the sequence as is.
Definition: SamRecord.h:58
int32_t getReferenceID()
Get the reference sequence id of the record (BAM format rid).
Definition: SamRecord.cpp:1305
uint16_t getFlag()
Get the flag (FLAG).
Definition: SamRecord.cpp:1384
void setSequenceTranslation(SequenceTranslation translation)
Set the type of sequence translation to use when getting the sequence.
Definition: SamRecord.cpp:187
uint32_t getNumOverlaps(int32_t start, int32_t end)
Return the number of bases in this read that overlap the passed in region.
Definition: SamRecord.cpp:1853
void setReference(GenomeSequence *reference)
Set the reference to the specified genome sequence object.
Definition: SamRecord.cpp:178
int32_t get0BasedAlignmentEnd()
Returns the 0-based inclusive rightmost position of the clipped sequence.
Definition: SamRecord.cpp:1467
int32_t get0BasedPosition()
Get the 0-based(BAM) leftmost position of the record.
Definition: SamRecord.cpp:1319
const char * getReadName()
Returns the SAM formatted Read Name (QNAME).
Definition: SamRecord.cpp:1542
static const int NO_REF_ID
Constant for the value returned if a reference id does not exist for a queried reference name.
Status
Return value enum for StatGenFile methods.
Definition: StatGenStatus.h:32
@ NO_MORE_RECS
NO_MORE_RECS: failed to read a record since there are no more to read either in the file or section i...
Definition: StatGenStatus.h:36
@ SUCCESS
method completed successfully.
Definition: StatGenStatus.h:32
@ FAIL_IO
method failed due to an I/O issue.
Definition: StatGenStatus.h:37
@ INVALID_SORT
record is invalid due to it not being sorted.
Definition: StatGenStatus.h:43
@ FAIL_PARSE
failed to parse a record/header - invalid format.
Definition: StatGenStatus.h:42
@ FAIL_ORDER
FAIL_ORDER: method failed because it was called out of order, like trying to read a file without open...
Definition: StatGenStatus.h:41
void setStatus(Status newStatus, const char *newMessage)
Set the status with the specified status enum and message.