libfreecontact  1.0.21
freecontact.h
Go to the documentation of this file.
1 /* FreeContact - program to predict protein residue contacts from a sufficiently large multiple alignment
2 * Copyright (C) 2012-2013 Laszlo Kajan <lkajan@rostlab.org> Rost Lab, Technical University of Munich, Germany
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17 #ifndef LIBFREECONTACT_H
18 #define LIBFREECONTACT_H
19 
20 #include <errno.h>
21 #include <map>
22 #include <math.h>
23 #include <stdexcept>
24 #include <stdint.h>
25 #include <string.h>
26 #include <vector>
27 #ifdef __SSE2__
28 #include <x86intrin.h>
29 #endif // __SSE2__
30 
31 #define LIBFREEC_API __attribute__ ((visibility ("default")))
32 #define LIBFREEC_LOCAL __attribute__ ((visibility ("hidden")))
33 
34 namespace freecontact {
35 
36 // http://www.fao.org/docrep/004/Y2775E/y2775e0e.htm
37 // Use 20 for gap.
38 static const uint8_t aamap_gapidx = 20;
39 static const uint8_t aamap[] = {
40  // A B C D E F G H I J K L M N
41  21, 0, 3, 4, 3, 6, 13, 7, 8, 9, 21, 11, 10, 12, 2,
42  // O P Q R S T U V W X Y Z
43  21, 14, 5, 1, 15, 16, 21, 19, 17, 21, 18, 6
44 };
45 static const uint8_t mapaa[] = {
46  //0 1 2 3 4 5 6 7 8 9
47  'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I',
48  'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V',
49  '-', 'X'
50 };
51 
53 class LIBFREEC_API alilen_error : public std::runtime_error {
54  public:
55  alilen_error(const std::string& __arg) : std::runtime_error(__arg){}
56 };
57 
59 class LIBFREEC_API icme_timeout_error : public std::runtime_error {
60  public:
61  icme_timeout_error(const std::string& __arg) : std::runtime_error(__arg){}
62 };
63 
64 #ifndef __SSE2__
65 typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
66 #endif
67 
69 class LIBFREEC_API ali_t : public std::vector<__m128i>
70 {
71  private:
72  #pragma GCC visibility push(hidden)
73  #pragma GCC visibility pop
74  public:
75  uint32_t seqcnt;
76  uint16_t alilen; // aligned sequence length, max 65535, reasonable
77  uint16_t alilen16; // size in 16-bytes
78  uint16_t alilenpad; // 16 * alilen16
79 
80  ali_t( const uint16_t __alilen = 0 ) : std::vector<__m128i>(), seqcnt(0), alilen(__alilen), alilen16(( alilen-1 )/16 + 1), alilenpad( 16 * alilen16 )
81  {
82  reserve(1024*alilen16);
83  }
84  virtual ~ali_t(){}
85 
86  inline uint8_t& operator()(uint32_t __k, uint16_t __ai)
87  {
88  return reinterpret_cast<uint8_t*>(this->_M_impl._M_start)[ __k*alilenpad + __ai ];
89  }
90  inline const uint8_t& operator()(uint32_t __k, uint16_t __ai) const
91  {
92  return reinterpret_cast<uint8_t*>(this->_M_impl._M_start)[ __k*alilenpad + __ai ];
93  }
94 
96  static std::vector<uint8_t>
97  read_a_seq( const std::string& __l )
98  {
99  std::vector<uint8_t> ret; ret.reserve(__l.length());
100  for( std::string::const_iterator l_b = __l.begin(), l_e = __l.end(); l_b != l_e; ++l_b )
101  {
102  const std::string::value_type c = *l_b;
103  if( isalpha(c) )
104  ret.push_back( aamap[c & 0x1F] );
105  else if( c == '-' )
106  ret.push_back( 20 );
107  else break;
108  }
109  return ret;
110  }
111 
113  ali_t& push(const std::vector<uint8_t>& __al);// throw (alilen_error);
114 
116  inline ali_t& push(const std::string& __l) //throw (alilen_error)
117  {
118  return push(read_a_seq(__l));
119  }
120 };
121 
122 
126  uint16_t i, j;
128  float score;
129  contact_t( uint16_t __i = 0, uint16_t __j = 0, float __score = 0 ) : i(__i), j(__j), score(__score) {}
130 
131  inline bool operator<( const contact_t& __b ) const { return score < __b.score; } // to facilitate sorting by score
132  inline bool operator>( const contact_t& __b ) const { return score > __b.score; } // to facilitate sorting by score
133 };
134 
136 struct parset_t {
137  double clustpc, density, gapth; uint16_t mincontsep;
139  bool cov20, apply_gapth; double rho;
140 
141  parset_t( double __clustpc = 0, double __density = 0, double __gapth = 0, uint16_t __mincontsep = 0,
142  double __pseudocnt = 0, double __pscnt_weight = 0, bool __estimate_ivcov = 0, double __shrink_lambda = 0,
143  bool __cov20 = 0, bool __apply_gapth = 0, double __rho = 0
144  ) :
145  clustpc(__clustpc), density(__density), gapth(__gapth), mincontsep(__mincontsep),
146  pseudocnt(__pseudocnt), pscnt_weight(__pscnt_weight), estimate_ivcov(__estimate_ivcov), shrink_lambda(__shrink_lambda),
147  cov20(__cov20), apply_gapth(__apply_gapth), rho(__rho)
148  {}
149 };
150 
151 const parset_t // clust conts gapt minc pseud pscw estim shrin cov20 agapth rho
152  //ps_evfold(0.70, 0.0, 1.0, 1, 0.0, 0.5, false, 0.0, true, false, -1 ), // original Matlab that expects gap pre-filtering
153  ps_evfold( 0.70, 0.0, 0.9, 1, 0.0, 0.5, false, 0.0, true, true, -1 ),
154  ps_psicov( 0.62, 0.03, 0.9, 5, 1.0, 0.0, true, 0.1, false, false, -1 ), // as published
155  ps_psicov_sd( 0.62, 0.0, 0.9, 5, 1.0, 0.0, true, 0.1, false, false, 0.001 ); // PSICOV sensible default (sd) for most cases
156 
158 
161  public:
162  typedef std::map<std::string, std::vector<contact_t> > cont_res_t;
163  typedef double freq_t;
164  typedef float pairfreq_t;
165  typedef std::vector<freq_t> freq_vec_t;
166  typedef std::map<std::string, double> time_res_t;
167  public:
168  bool dbg;
169  predictor(bool __dbg = false) : dbg(__dbg) {}
170  virtual ~predictor(){}
171 
173 
181  void get_seq_weights(
182  freq_vec_t &__aliw, double &__wtot,
183  const ali_t& __ali, double __clustpc,
184  bool __veczw = true, int __num_threads = 0
185  );// throw (alilen_error, icme_timeout_error, std::range_error, std::runtime_error);
186 
188 
214  cont_res_t run(const ali_t& __ali, const freq_vec_t &__aliw, const double __wtot,
215  double __density, double __gapth, uint16_t __mincontsep,
216  double __pseudocnt, double __pscnt_weight, bool __estimate_ivcov, double __shrink_lambda,
217  bool __cov20, bool __apply_gapth, double __rho,
218  int __num_threads = 0, time_t __icme_timeout = 1800, time_res_t *__timing = NULL
219  );// throw (alilen_error, icme_timeout_error, std::range_error, std::runtime_error);
220 
222  cont_res_t run(const ali_t& __ali, double __clustpc,
223  double __density, double __gapth, uint16_t __mincontsep,
224  double __pseudocnt, double __pscnt_weight, bool __estimate_ivcov, double __shrink_lambda,
225  bool __cov20, bool __apply_gapth, double __rho,
226  bool __veczw = true, int __num_threads = 0, time_t __icme_timeout = 1800, time_res_t *__timing = NULL
227  );// throw (alilen_error, icme_timeout_error, std::range_error, std::runtime_error);
228 
230  inline
231  cont_res_t run(const ali_t& __ali,
232  const parset_t& __parset,
233  bool __veczw = true, int __num_threads = 0, time_t __icme_timeout = 1800, time_res_t *__timing = NULL
234  )// throw (alilen_error, icme_timeout_error, std::range_error, std::runtime_error)
235  {
236  return run(__ali, __parset.clustpc,
237  __parset.density, __parset.gapth, __parset.mincontsep,
238  __parset.pseudocnt, __parset.pscnt_weight, __parset.estimate_ivcov, __parset.shrink_lambda,
239  __parset.cov20, __parset.apply_gapth, __parset.rho,
240  __veczw, __num_threads, __icme_timeout, __timing);
241  }
242 };
243 
244 } // namespace freecontact
245 
246 #endif // LIBFREECONTACT_H
247 // vim:et:ts=4:ai:
Protein sequence alignment.
Definition: freecontact.h:70
const uint8_t & operator()(uint32_t __k, uint16_t __ai) const
Definition: freecontact.h:90
uint8_t & operator()(uint32_t __k, uint16_t __ai)
Definition: freecontact.h:86
virtual ~ali_t()
Definition: freecontact.h:84
static std::vector< uint8_t > read_a_seq(const std::string &__l)
Convert alignment string to amino acid codes with aamap.
Definition: freecontact.h:97
uint16_t alilenpad
Definition: freecontact.h:78
ali_t(const uint16_t __alilen=0)
Definition: freecontact.h:80
ali_t & push(const std::string &__l)
Push a sequence to the alignment.
Definition: freecontact.h:116
Alignment length exception.
Definition: freecontact.h:53
alilen_error(const std::string &__arg)
Definition: freecontact.h:55
Inverse covariance matrix estimation timeout exception.
Definition: freecontact.h:59
icme_timeout_error(const std::string &__arg)
Definition: freecontact.h:61
Protein residue contact predictor.
Definition: freecontact.h:160
std::map< std::string, std::vector< contact_t > > cont_res_t
Definition: freecontact.h:162
std::map< std::string, double > time_res_t
Definition: freecontact.h:166
cont_res_t run(const ali_t &__ali, const parset_t &__parset, bool __veczw=true, int __num_threads=0, time_t __icme_timeout=1800, time_res_t *__timing=NULL)
Predict residue contacts.
Definition: freecontact.h:231
predictor(bool __dbg=false)
Definition: freecontact.h:169
std::vector< freq_t > freq_vec_t
Definition: freecontact.h:165
#define LIBFREEC_API
Definition: freecontact.h:31
const parset_t ps_psicov_sd(0.62, 0.0, 0.9, 5, 1.0, 0.0, true, 0.1, false, false, 0.001)
static const uint8_t aamap_gapidx
Definition: freecontact.h:38
long long __m128i __attribute__((__vector_size__(16), __may_alias__))
Definition: freecontact.h:65
const parset_t ps_evfold(0.70, 0.0, 0.9, 1, 0.0, 0.5, false, 0.0, true, true, -1)
const parset_t ps_psicov(0.62, 0.03, 0.9, 5, 1.0, 0.0, true, 0.1, false, false, -1)
static const uint8_t aamap[]
Definition: freecontact.h:39
static const uint8_t mapaa[]
Definition: freecontact.h:45
Contact prediction.
Definition: freecontact.h:124
contact_t(uint16_t __i=0, uint16_t __j=0, float __score=0)
Definition: freecontact.h:129
bool operator<(const contact_t &__b) const
Definition: freecontact.h:131
float score
Contact score.
Definition: freecontact.h:128
uint16_t i
Residue number, 0-based(!).
Definition: freecontact.h:126
bool operator>(const contact_t &__b) const
Definition: freecontact.h:132
Parameter set structure for predictor::run().
Definition: freecontact.h:136
parset_t(double __clustpc=0, double __density=0, double __gapth=0, uint16_t __mincontsep=0, double __pseudocnt=0, double __pscnt_weight=0, bool __estimate_ivcov=0, double __shrink_lambda=0, bool __cov20=0, bool __apply_gapth=0, double __rho=0)
Definition: freecontact.h:141