libStatGen Software  1
Tabix.h
1 /*
2  * Copyright (C) 2012-2013 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifndef __TABIX_H__
19 #define __TABIX_H__
20 
21 #include <stdint.h>
22 #include <vector>
23 #include <map>
24 #include <stdlib.h>
25 
26 #include "IndexBase.h"
27 
28 #include "InputFile.h"
29 #include "StatGenStatus.h"
30 
31 class Tabix : public IndexBase
32 {
33 public:
34 
35  enum Format
36  {
37  FORMAT_GENERIC = 0,
38  FORMAT_SAM = 1,
39  FORMAT_VCF = 2
40  };
41 
42  Tabix();
43  virtual ~Tabix();
44 
45  /// Reset the member data for a new index file.
46  void resetIndex();
47 
48  // Read & parse the specified index file.
49  /// \param filename the bam index file to be read.
50  /// \return the status of the read.
51  StatGenStatus::Status readIndex(const char* filename);
52 
53  /// Get the starting file offset to look for the specified start position.
54  /// For an entire reference ID, set start to -1.
55  /// To start at the beginning of the region, set start to 0/-1.
56  bool getStartPos(const char* refName, int32_t start,
57  uint64_t& fileStartPos) const;
58 
59  /// Return the reference name at the specified index or
60  /// throws an exception if out of range.
61  const char* getRefName(unsigned int indexNum) const;
62 
63  // Get the format of this tabix file.
64  inline int32_t getFormat() const { return myFormat.format; }
65 
66 private:
67  struct TabixFormat
68  {
69  int32_t format;
70  int32_t col_seq;
71  int32_t col_beg;
72  int32_t col_end;
73  int32_t meta; // character that starts header lines
74  int32_t skip; // Number of lines to skip from putting into the index.
75  };
76 
77  TabixFormat myFormat;
78 
79  char* myChromNamesBuffer;
80 
81  // vector pointing to the chromosome names.
82  std::vector<const char*> myChromNamesVector;
83 };
84 
85 
86 #endif
Status
Return value enum for StatGenFile methods.
Definition: StatGenStatus.h:32
Definition: Tabix.h:32
void resetIndex()
Reset the member data for a new index file.
Definition: Tabix.cpp:39
bool getStartPos(const char *refName, int32_t start, uint64_t &fileStartPos) const
Get the starting file offset to look for the specified start position.
Definition: Tabix.cpp:218
const char * getRefName(unsigned int indexNum) const
Return the reference name at the specified index or throws an exception if out of range.
Definition: Tabix.cpp:247
StatGenStatus::Status readIndex(const char *filename)
Definition: Tabix.cpp:52