libStatGen Software  1
CigarRoller.h
1 /*
2  * Copyright (C) 2010-2011 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #if !defined(_CIGAR_ROLLER_H)
19 #define _CIGAR_ROLLER_H
20 
21 #include "Cigar.h"
22 
23 /// The purpose of this class is to provide accessors for setting, updating, modifying the CIGAR object. It is a child class of Cigar.
24 
25 ///
26 /// Docs from Sam1.pdf:
27 ///
28 /// Clipped alignment. In Smith-Waterman alignment, a sequence may not be aligned from the first residue to the last one.
29 /// Subsequences at the ends may be clipped off. We introduce operation ʻSʼ to describe (softly) clipped alignment. Here is
30 /// an example. Suppose the clipped alignment is:
31 /// REF: AGCTAGCATCGTGTCGCCCGTCTAGCATACGCATGATCGACTGTCAGCTAGTCAGACTAGTCGATCGATGTG
32 /// READ: gggGTGTAACC-GACTAGgggg
33 /// where on the read sequence, bases in uppercase are matches and bases in lowercase are clipped off. The CIGAR for
34 /// this alignment is: 3S8M1D6M4S.
35 ///
36 ///
37 /// If the mapping position of the query is not available, RNAME and
38 /// CIGAR are set as “*”
39 ///
40 /// A CIGAR string is comprised of a series of operation lengths plus the operations. The conventional CIGAR format allows
41 /// for three types of operations: M for match or mismatch, I for insertion and D for deletion. The extended CIGAR format
42 /// further allows four more operations, as is shown in the following table, to describe clipping, padding and splicing:
43 ///
44 /// op Description
45 /// -- -----------
46 /// M Match or mismatch
47 /// I Insertion to the reference
48 /// D Deletion from the reference
49 /// N Skipped region from the reference
50 /// S Soft clip on the read (clipped sequence present in <seq>)
51 /// H Hard clip on the read (clipped sequence NOT present in <seq>)
52 /// P Padding (silent deletion from the padded reference sequence)
53 ///
54 
55 
56 
57 ////////////////////////////////////////////////////////////////////////
58 ///
59 /// CigarRoller is an aid to correctly generating the CIGAR strings
60 /// necessary to represent how a read maps to the reference.
61 ///
62 /// It is called once a particular match candidate is being written
63 /// out, so it is far less performance sensitive than the Smith Waterman
64 /// code below.
65 ///
66 class CigarRoller : public Cigar
67 {
68 public:
69 
70  ////////////////////////////////////////////////////////////////////////
71  //
72  // Cigar Roller Class
73  //
74  /// Writes all of the cigar operations contained in this roller to the
75  /// passed in stream.
76  friend std::ostream &operator << (std::ostream &stream, const CigarRoller& roller);
77 
78  /// Default constructor initializes as a CIGAR with no operations.
80  {
81  clearQueryAndReferenceIndexes();
82  }
83 
84  /// Constructor that initializes the object with the specified cigarString.
85  CigarRoller(const char *cigarString)
86  {
87  Set(cigarString);
88  }
89 
90  /// Add the contents of the specified CigarRoller to this object.
92 
93  /// Append the specified operator to this object.
94  CigarRoller & operator += (const CigarOperator &rhs);
95 
96  /// Set this object to be equal to the specified CigarRoller.
98 
99  /// Append the specified operation with the specified count to this object.
100  void Add(Operation operation, int count);
101 
102  /// Append the specified operation with the specified count to this object.
103  void Add(char operation, int count);
104 
105  /// Append the specified cigarString to this object.
106  void Add(const char *cigarString);
107 
108  /// Append the specified Cigar object to this object.
109  void Add(CigarRoller &rhs)
110  {
111  (*this) += rhs;
112  }
113 
114  /// Remove the operation at the specified index.
115  /// \return true if successfully removed, false if not.
116  bool Remove(int index);
117 
118  /// Increments the count for the operation at the specified index
119  /// by the specified value, specify a negative value to decrement.
120  /// \return true if it is successfully incremented, false if not.
121  bool IncrementCount(int index, int increment);
122 
123  /// Updates the operation at the specified index to be the specified
124  /// operation and have the specified count.
125  /// \return true if it is successfully updated, false if not.
126  bool Update(int index, Operation op, int count);
127 
128  /// Sets this object to the specified cigarString.
129  void Set(const char *cigarString);
130 
131  /// Sets this object to the BAM formatted cigar found at the beginning
132  /// of the specified buffer which is bufferLen long.
133  void Set(const uint32_t* cigarBuffer, uint16_t bufferLen);
134 
135  //
136  // when we examine CIGAR strings, we need to know how
137  // many cumulative insert and delete positions there are
138  // so that we can adjust the read location appropriately.
139  //
140  // Here, we iterate over the vector of CIGAR operations,
141  // summaring the count for each insert or delete (insert
142  // increases the offset, delete decreases it).
143  //
144  // The use case for this is when we have a genome match
145  // position based on an index word other than the first one,
146  // and there is also a insert or delete between the beginning
147  // of the read and the index word. We can't simply report
148  // the match position without taking into account the indels,
149  // otherwise we'll be off by N where N is the sum of this
150  // indel count.
151  //
152  /// DEPRECATED - do not use, there are better ways to accomplish that by
153  /// using read lengths, reference lengths, span of the read, etc.
155 
156  /// Get the string reprentation of the Cigar operations in this object,
157  /// caller must delete the returned value.
158  const char *getString();
159 
160  /// Clear this object so that it has no Cigar Operations.
161  void clear();
162 
163 private:
164 };
165 
166 
167 inline std::ostream &operator << (std::ostream &stream, const CigarRoller& roller)
168 {
169  stream << roller.cigarOperations;
170  return stream;
171 }
172 
173 #endif
InputFile & operator<<(InputFile &stream, const std::string &str)
Write to a file using streaming.
Definition: InputFile.h:736
The purpose of this class is to provide accessors for setting, updating, modifying the CIGAR object....
Definition: CigarRoller.h:67
CigarRoller & operator+=(CigarRoller &rhs)
Add the contents of the specified CigarRoller to this object.
Definition: CigarRoller.cpp:29
bool Remove(int index)
Remove the operation at the specified index.
bool IncrementCount(int index, int increment)
Increments the count for the operation at the specified index by the specified value,...
void Add(Operation operation, int count)
Append the specified operation with the specified count to this object.
Definition: CigarRoller.cpp:77
CigarRoller()
Default constructor initializes as a CIGAR with no operations.
Definition: CigarRoller.h:79
CigarRoller(const char *cigarString)
Constructor that initializes the object with the specified cigarString.
Definition: CigarRoller.h:85
bool Update(int index, Operation op, int count)
Updates the operation at the specified index to be the specified operation and have the specified cou...
void clear()
Clear this object so that it has no Cigar Operations.
friend std::ostream & operator<<(std::ostream &stream, const CigarRoller &roller)
Writes all of the cigar operations contained in this roller to the passed in stream.
Definition: CigarRoller.h:167
void Add(CigarRoller &rhs)
Append the specified Cigar object to this object.
Definition: CigarRoller.h:109
const char * getString()
Get the string reprentation of the Cigar operations in this object, caller must delete the returned v...
void Set(const char *cigarString)
Sets this object to the specified cigarString.
CigarRoller & operator=(CigarRoller &rhs)
Set this object to be equal to the specified CigarRoller.
Definition: CigarRoller.cpp:66
int getMatchPositionOffset()
DEPRECATED - do not use, there are better ways to accomplish that by using read lengths,...
This class represents the CIGAR without any methods to set the cigar (see CigarRoller for that).
Definition: Cigar.h:84
Operation
Enum for the cigar operations.
Definition: Cigar.h:87