libStatGen Software  1
BamIndexTest.cpp
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "BamIndex.h"
19 #include "TestValidate.h"
20 #include "BamIndexTest.h"
21 
22 #include <assert.h>
23 
24 void testIndex(BamIndex& bamIndex)
25 {
26 #ifdef __ZLIB_AVAILABLE__
27  assert(bamIndex.getNumMappedReads(1) == 2);
28  assert(bamIndex.getNumUnMappedReads(1) == 0);
29  assert(bamIndex.getNumMappedReads(0) == 4);
30  assert(bamIndex.getNumUnMappedReads(0) == 1);
31  assert(bamIndex.getNumMappedReads(23) == -1);
32  assert(bamIndex.getNumUnMappedReads(23) == -1);
33  assert(bamIndex.getNumMappedReads(-1) == 0);
34  assert(bamIndex.getNumUnMappedReads(-1) == 2);
35  assert(bamIndex.getNumMappedReads(-2) == -1);
36  assert(bamIndex.getNumUnMappedReads(-2) == -1);
37  assert(bamIndex.getNumMappedReads(22) == 0);
38  assert(bamIndex.getNumUnMappedReads(22) == 0);
39 
40  // Get the chunks for reference id 1.
41  Chunk testChunk;
42  SortedChunkList chunkList;
43  assert(bamIndex.getChunksForRegion(1, -1, -1, chunkList) == true);
44  assert(!chunkList.empty());
45  testChunk = chunkList.pop();
46  assert(chunkList.empty());
47  assert(testChunk.chunk_beg == 0x4e7);
48  assert(testChunk.chunk_end == 0x599);
49 
50  // Get the chunks for reference id 0.
51  assert(bamIndex.getChunksForRegion(0, -1, -1, chunkList) == true);
52  assert(!chunkList.empty());
53  testChunk = chunkList.pop();
54  assert(chunkList.empty());
55  assert(testChunk.chunk_beg == 0x360);
56  assert(testChunk.chunk_end == 0x4e7);
57 
58 
59  // Get the chunks for reference id 2.
60  assert(bamIndex.getChunksForRegion(2, -1, -1, chunkList) == true);
61  assert(!chunkList.empty());
62  testChunk = chunkList.pop();
63  assert(chunkList.empty());
64  assert(testChunk.chunk_beg == 0x599);
65  assert(testChunk.chunk_end == 0x5ea);
66 
67  // Get the chunks for reference id 3.
68  // There isn't one for this ref id, but still successfully read the file,
69  // so it should return true, but the list should be empty.
70  assert(bamIndex.getChunksForRegion(3, -1, -1, chunkList) == true);
71  assert(chunkList.empty());
72 
73  // Test reading an indexed bam file.
74  SamFile inFile;
75  assert(inFile.OpenForRead("testFiles/sortedBam.bam"));
77  assert(inFile.ReadBamIndex("testFiles/sortedBam.bam.bai"));
78  SamFileHeader samHeader;
79  assert(inFile.ReadHeader(samHeader));
80  SamRecord samRecord;
81 
82  // Test getting num mapped/unmapped reads.
83  assert(inFile.getNumMappedReadsFromIndex(1) == 2);
84  assert(inFile.getNumUnMappedReadsFromIndex(1) == 0);
85  assert(inFile.getNumMappedReadsFromIndex(0) == 4);
86  assert(inFile.getNumUnMappedReadsFromIndex(0) == 1);
87  assert(inFile.getNumMappedReadsFromIndex(23) == -1);
88  assert(inFile.getNumUnMappedReadsFromIndex(23) == -1);
89  assert(inFile.getNumMappedReadsFromIndex(-1) == 0);
90  assert(inFile.getNumUnMappedReadsFromIndex(-1) == 2);
91  assert(inFile.getNumMappedReadsFromIndex(-2) == -1);
92  assert(inFile.getNumUnMappedReadsFromIndex(-2) == -1);
93  assert(inFile.getNumMappedReadsFromIndex(22) == 0);
94  assert(inFile.getNumUnMappedReadsFromIndex(22) == 0);
95 
96  assert(inFile.getNumMappedReadsFromIndex("2", samHeader) == 2);
97  assert(inFile.getNumUnMappedReadsFromIndex("2", samHeader) == 0);
98  assert(inFile.getNumMappedReadsFromIndex("1", samHeader) == 4);
99  assert(inFile.getNumUnMappedReadsFromIndex("1", samHeader) == 1);
100  assert(inFile.getNumMappedReadsFromIndex("22", samHeader) == 0);
101  assert(inFile.getNumUnMappedReadsFromIndex("22", samHeader) == 0);
102  assert(inFile.getNumMappedReadsFromIndex("", samHeader) == 0);
103  assert(inFile.getNumUnMappedReadsFromIndex("*", samHeader) == 2);
104  assert(inFile.getNumMappedReadsFromIndex("unknown", samHeader) == -1);
105  assert(inFile.getNumUnMappedReadsFromIndex("unknown", samHeader) == -1);
106  assert(inFile.getNumMappedReadsFromIndex("X", samHeader) == 0);
107  assert(inFile.getNumUnMappedReadsFromIndex("X", samHeader) == 0);
108 
109  // Set the read section saying the reads must be fully enclosed in the section.
110  assert(inFile.SetReadSection("1", 1010, 1011, false));
111  assert(inFile.ReadRecord(samHeader, samRecord) == false);
112 
113  assert(inFile.SetReadSection("1", 1011, 1012, false));
114  assert(inFile.ReadRecord(samHeader, samRecord));
115  validateRead2(samRecord);
116  assert(inFile.ReadRecord(samHeader, samRecord) == false);
117 
118  // Section -1 = Ref *: 2 records (8 & 10 from testSam.sam that is reflected
119  // in the validation.
120  assert(inFile.SetReadSection(-1));
121  assert(inFile.ReadRecord(samHeader, samRecord));
122  validateRead8(samRecord);
123  assert(inFile.ReadRecord(samHeader, samRecord));
124  validateRead10(samRecord);
125  assert(inFile.ReadRecord(samHeader, samRecord) == false);
126 
127  // Section 2 = Ref 3: 1 records (9 from testSam.sam that is reflected
128  // in the validation.
129  assert(inFile.SetReadSection(2));
130  assert(inFile.ReadRecord(samHeader, samRecord));
131  validateRead9(samRecord);
132  assert(inFile.ReadRecord(samHeader, samRecord) == false);
133 
134  // Section 0 = Ref 1: 5 records (3, 4, 1, 2, & 6 from testSam.sam that is
135  // reflected in the validation.
136  assert(inFile.SetReadSection(0));
137  assert(inFile.ReadRecord(samHeader, samRecord));
138  validateRead3(samRecord);
139  assert(inFile.ReadRecord(samHeader, samRecord));
140  validateRead4(samRecord);
141  assert(inFile.ReadRecord(samHeader, samRecord));
142  validateRead1(samRecord);
143  assert(inFile.ReadRecord(samHeader, samRecord));
144  validateRead2(samRecord);
145  assert(inFile.ReadRecord(samHeader, samRecord));
146  validateRead6(samRecord);
147  assert(inFile.ReadRecord(samHeader, samRecord) == false);
148 
149  // Section 1 = Ref 2: 2 records (5 & 7 from testSam.sam that is reflected
150  // in the validation.
151  assert(inFile.SetReadSection(1));
152  assert(inFile.ReadRecord(samHeader, samRecord));
153  validateRead5(samRecord);
154  assert(inFile.ReadRecord(samHeader, samRecord));
155  validateRead7(samRecord);
156  assert(inFile.ReadRecord(samHeader, samRecord) == false);
157 
158  // Section 3 to 22 (ref 4 - 23): 0 records.
159  for(int i = 3; i < 23; i++)
160  {
161  assert(inFile.SetReadSection(i));
162  assert(inFile.ReadRecord(samHeader, samRecord) == false);
163  }
164 
165 
166  // Set the read section.
167  assert(inFile.SetReadSection("1", 1010, 1012));
168  assert(inFile.ReadRecord(samHeader, samRecord));
169  validateRead1(samRecord);
170  assert(inFile.GetNumOverlaps(samRecord) == 2);
171  assert(samRecord.getNumOverlaps(1010, 1012) == 2);
172  assert(samRecord.getNumOverlaps(1010, 1020) == 5);
173  assert(samRecord.getNumOverlaps(1010, 1011) == 1);
174  assert(samRecord.getNumOverlaps(1011, 1012) == 1);
175  assert(inFile.ReadRecord(samHeader, samRecord));
176  validateRead2(samRecord);
177  assert(inFile.GetNumOverlaps(samRecord) == 0);
178  assert(samRecord.getNumOverlaps(1010, 1012) == 0);
179  assert(samRecord.getNumOverlaps(1010, 1020) == 0);
180  assert(samRecord.getNumOverlaps(1010, 1011) == 0);
181  assert(samRecord.getNumOverlaps(1011, 1012) == 0);
182  assert(inFile.ReadRecord(samHeader, samRecord) == false);
183 
184  assert(inFile.SetReadSection("1", 1010, 1020));
185  assert(inFile.ReadRecord(samHeader, samRecord));
186  validateRead1(samRecord);
187  assert(inFile.GetNumOverlaps(samRecord) == 5);
188  assert(samRecord.getNumOverlaps(1010, 1012) == 2);
189  assert(samRecord.getNumOverlaps(1010, 1020) == 5);
190  assert(samRecord.getNumOverlaps(1010, 1011) == 1);
191  assert(samRecord.getNumOverlaps(1011, 1012) == 1);
192  assert(inFile.ReadRecord(samHeader, samRecord));
193  validateRead2(samRecord);
194  assert(inFile.GetNumOverlaps(samRecord) == 0);
195  assert(samRecord.getNumOverlaps(1010, 1012) == 0);
196  assert(samRecord.getNumOverlaps(1010, 1020) == 0);
197  assert(samRecord.getNumOverlaps(1010, 1011) == 0);
198  assert(samRecord.getNumOverlaps(1011, 1012) == 0);
199  assert(inFile.ReadRecord(samHeader, samRecord) == false);
200 
201  assert(inFile.SetReadSection("1", 1010, 1011));
202  assert(inFile.ReadRecord(samHeader, samRecord));
203  validateRead1(samRecord);
204  assert(inFile.GetNumOverlaps(samRecord) == 1);
205  assert(samRecord.getNumOverlaps(1010, 1012) == 2);
206  assert(samRecord.getNumOverlaps(1010, 1020) == 5);
207  assert(samRecord.getNumOverlaps(1010, 1011) == 1);
208  assert(samRecord.getNumOverlaps(1011, 1012) == 1);
209  assert(inFile.ReadRecord(samHeader, samRecord) == false);
210 
211  assert(inFile.SetReadSection("1", 1011, 1012));
212  assert(inFile.ReadRecord(samHeader, samRecord));
213  validateRead1(samRecord);
214  assert(inFile.GetNumOverlaps(samRecord) == 1);
215  assert(samRecord.getNumOverlaps(1010, 1012) == 2);
216  assert(samRecord.getNumOverlaps(1010, 1020) == 5);
217  assert(samRecord.getNumOverlaps(1010, 1011) == 1);
218  assert(samRecord.getNumOverlaps(1011, 1012) == 1);
219  assert(inFile.ReadRecord(samHeader, samRecord));
220  validateRead2(samRecord);
221  assert(inFile.GetNumOverlaps(samRecord) == 0);
222  assert(samRecord.getNumOverlaps(1010, 1012) == 0);
223  assert(samRecord.getNumOverlaps(1010, 1020) == 0);
224  assert(samRecord.getNumOverlaps(1010, 1011) == 0);
225  assert(samRecord.getNumOverlaps(1011, 1012) == 0);
226  assert(inFile.ReadRecord(samHeader, samRecord) == false);
227 #endif
228 }
229 
230 
231 void testBamIndex()
232 {
233  // BAM indexes are compressed, so can't be tested without zlib.
234 #ifdef __ZLIB_AVAILABLE__
235  // Create a bam index.
236  BamIndex bamIndex;
237  bamIndex.readIndex("testFiles/sortedBam.bam.bai");
238  testIndex(bamIndex);
239 
240  BamIndexFileTest test1;
241  bool caughtException = false;
242  try
243  {
244  // Try reading the bam index without specifying a
245  // filename and before opening a bam file.
246  assert(test1.ReadBamIndex() == false);
247  }
248  catch (std::exception& e)
249  {
250  caughtException = true;
251  assert(strcmp(e.what(), "FAIL_ORDER: Failed to read the bam Index file - the BAM file needs to be read first in order to determine the index filename.") == 0);
252  }
253  // Should have failed and thrown an exception.
254  assert(caughtException);
255 
256  // Read the bam index with a specified name.
257  assert(test1.ReadBamIndex("testFiles/sortedBam.bam.bai"));
258  BamIndex* index = test1.getBamIndex();
259  assert(index != NULL);
260  testIndex(*index);
261 
262  // Open the bam file so the index can be opened.
263  assert(test1.OpenForRead("testFiles/sortedBam.bam"));
264  // Try reading the bam index without specifying a
265  // filename after opening a bam file.
266  assert(test1.ReadBamIndex() == true);
267  index = test1.getBamIndex();
268  assert(index != NULL);
269  testIndex(*index);
270 
271  // Open the bam file so the index can be opened.
272  // This time the index file does not have .bam in it.
273  assert(test1.OpenForRead("testFiles/sortedBam2.bam"));
274  // Try reading the bam index without specifying a
275  // filename after opening a bam file.
276  assert(test1.ReadBamIndex() == true);
277  index = test1.getBamIndex();
278  assert(index != NULL);
279  testIndex(*index);
280 #endif
281 }
282 
283 
int32_t getNumUnMappedReads(int32_t refID)
Get the number of unmapped reads for this reference id.
Definition: BamIndex.cpp:377
SamStatus::Status readIndex(const char *filename)
Definition: BamIndex.cpp:45
int32_t getNumMappedReads(int32_t refID)
Get the number of mapped reads for this reference id.
Definition: BamIndex.cpp:355
void setSortedValidation(SortedType sortType)
Set the flag to validate that the file is sorted as it is read/written.
Definition: SamFile.cpp:669
file is sorted by coordinate.
Definition: SamFile.h:49
Allows the user to easily read/write a SAM/BAM file.
Definition: SamFile.h:35
bool getChunksForRegion(int32_t refID, int32_t start, int32_t end, SortedChunkList &chunkList)
Get the list of chunks associated with this region.
Definition: BamIndex.cpp:218
This class allows a user to get/set the fields in a SAM/BAM Header.
Definition: SamFileHeader.h:34
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record...
Definition: SamRecord.h:51
bool OpenForRead(const char *filename, SamFileHeader *header=NULL)
Open a sam/bam file for reading with the specified filename, determing the type of file and SAM/BAM b...
Definition: SamFile.cpp:93
bool ReadBamIndex(const char *filename)
Read the specified bam index file.
Definition: SamFile.cpp:300