libStatGen Software  1
PedigreeGlobals.cpp
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "PedigreeGlobals.h"
19 #include "Sort.h"
20 #include "Error.h"
21 
22 #include <math.h>
23 #include <string.h>
24 #include <ctype.h>
25 
26 int PedigreeGlobals::traitCount = 0;
27 int PedigreeGlobals::affectionCount = 0;
28 int PedigreeGlobals::covariateCount = 0;
29 int PedigreeGlobals::markerCount = 0;
30 int PedigreeGlobals::stringCount = 0;
31 
32 // If this value isn't set, all X chromosome data will be rejected
33 bool PedigreeGlobals::chromosomeX = false;
34 bool PedigreeGlobals::sexSpecificMap = false;
35 
36 StringArray PedigreeGlobals::traitNames;
37 StringArray PedigreeGlobals::markerNames;
38 StringArray PedigreeGlobals::covariateNames;
39 StringArray PedigreeGlobals::affectionNames;
40 StringArray PedigreeGlobals::stringNames;
41 StringIntHash PedigreeGlobals::markerLookup;
42 StringIntHash PedigreeGlobals::traitLookup;
43 StringIntHash PedigreeGlobals::affectionLookup;
44 StringIntHash PedigreeGlobals::covariateLookup;
45 StringIntHash PedigreeGlobals::stringLookup;
46 
47 int PedigreeGlobals::markerInfoCount = 0;
48 int PedigreeGlobals::markerInfoSize = 0;
49 
50 MarkerInfo ** PedigreeGlobals::markerInfo = NULL;
51 MarkerInfo ** PedigreeGlobals::markerInfoByInteger = NULL;
52 StringHash PedigreeGlobals::markerInfoByName;
53 
54 int MarkerInfo::count = 0;
55 
56 int MarkerInfo::ComparePosition(MarkerInfo ** left, MarkerInfo ** right)
57 {
58  if ((*left)->chromosome != (*right)->chromosome)
59  return (*left)->chromosome - (*right)->chromosome;
60 
61  double difference = (*left)->position - (*right)->position;
62 
63  if (difference > 0.0)
64  return 1;
65  else if (difference == 0.0)
66  return (*left)->serial - (*right)->serial;
67  else
68  return -1;
69 }
70 
71 String MarkerInfo::GetAlleleLabel(int allele)
72 {
73  if (alleleLabels.Length() > allele && alleleLabels[allele].Length())
74  return alleleLabels[allele];
75  else if (alleleLabels.Length() <= allele)
76  alleleLabels.Dimension(allele + 1);
77  return alleleLabels[allele] = allele;
78 }
79 
80 bool MarkerInfo::AdjustFrequencies()
81 {
82  if (freq.Length() <= 1)
83  {
84  freq.Clear();
85  return false;
86  }
87 
88  if (freq.Min() < 0.0)
89  error("Locus %s has negative allele frequencies\n", (const char *) name);
90 
91  double sum = freq.Sum();
92 
93  if (sum <= 0.0)
94  error("Locus %s frequencies sum to %f, which doesn't make sense\n",
95  (const char *) name, sum);
96 
97  if (sum != 1.0)
98  freq *= 1.0 / sum;
99 
100  if (fabs(sum - 1.0) > 1.2e-5)
101  {
102  printf("Locus %s frequencies sum to %f, adjusted to 1.0\n",
103  (const char *) name, sum);
104  return true;
105  }
106 
107  return false;
108 }
109 
110 void MarkerInfo::IndexAlleles()
111 {
112  if (alleleLabels.Length() >= 255)
113  error("Marker %s has more than 254 distinct alleles\n",
114  (const char *) name);
115 
116  alleleNumbers.Clear();
117 
118  for (int i = 1; i < alleleLabels.Length(); i++)
119  alleleNumbers.SetInteger(alleleLabels[i], i);
120 }
121 
122 int MarkerInfo::NewAllele(const String & label)
123 {
124  if (alleleLabels.Length() == 0)
125  alleleLabels.Push("");
126 
127  if (alleleLabels.Length() >= 255)
128  error("Marker %s has more than 254 distinct alleles\n",
129  (const char *) name);
130 
131  alleleNumbers.SetInteger(label, alleleLabels.Length());
132  alleleLabels.Push(label);
133 
134  return alleleLabels.Length() - 1;
135 }
136 
137 int PedigreeGlobals::GetTraitID(const char * name)
138 {
139  int idx = traitLookup.Integer(name);
140 
141  if (idx != -1) return idx;
142 
143  traitNames.Add(name);
144  traitLookup.SetInteger(name, traitCount);
145  return traitCount++;
146 }
147 
148 int PedigreeGlobals::GetAffectionID(const char * name)
149 {
150  int idx = affectionLookup.Integer(name);
151 
152  if (idx != -1) return idx;
153 
154  affectionNames.Add(name);
155  affectionLookup.SetInteger(name, affectionCount);
156  return affectionCount++;
157 }
158 
159 int PedigreeGlobals::GetCovariateID(const char * name)
160 {
161  int idx = covariateLookup.Integer(name);
162 
163  if (idx != -1) return idx;
164 
165  covariateNames.Add(name);
166  covariateLookup.SetInteger(name, covariateCount);
167  return covariateCount++;
168 }
169 
170 int PedigreeGlobals::GetStringID(const char * name)
171 {
172  int idx = stringLookup.Integer(name);
173 
174  if (idx != -1) return idx;
175 
176  stringNames.Add(name);
177  stringLookup.SetInteger(name, stringCount);
178  return stringCount++;
179 }
180 
181 int PedigreeGlobals::GetMarkerID(const char * name)
182 {
183  int idx = markerLookup.Integer(name);
184 
185  if (idx != -1) return idx;
186 
187  markerNames.Add(name);
188  markerLookup.SetInteger(name, markerCount);
189 
190  // Grow the marker info key ...
191  if (markerCount == 0)
192  {
193  markerInfoByInteger = new MarkerInfo * [16];
194 
195  for (int i = 0; i < 16; i++)
196  markerInfoByInteger[i] = NULL;
197  }
198  else if ((markerCount & (markerCount - 1)) == 0 && markerCount > 15)
199  {
200  MarkerInfo ** newKey = new MarkerInfo * [markerCount * 2];
201 
202  for (int i = 0; i < markerCount; i++)
203  newKey[i] = markerInfoByInteger[i];
204 
205  for (int i = markerCount; i < markerCount * 2; i++)
206  newKey[i] = NULL;
207 
208  delete [] markerInfoByInteger;
209 
210  markerInfoByInteger = newKey;
211  }
212 
213  return markerCount++;
214 }
215 
216 MarkerInfo * PedigreeGlobals::GetMarkerInfo(String & name)
217 {
218  MarkerInfo * info = (MarkerInfo *) markerInfoByName.Object(name);
219 
220  if (info != NULL) return info;
221 
222  info = new MarkerInfo(name);
223  markerInfoByName.Add(name, info);
224 
225  if (markerInfoCount >= markerInfoSize)
226  GrowMarkerInfo();
227 
228  markerInfo[markerInfoCount++] = info;
229 
230  int markerId = LookupMarker(name);
231  if (markerId >= 0) markerInfoByInteger[markerId] = info;
232 
233  return info;
234 }
235 
236 MarkerInfo * PedigreeGlobals::GetMarkerInfo(int markerId)
237 {
238  if (markerId >= markerCount)
239  error("Attempted to retrieve MarkerInfo using out-of-bounds index\n");
240 
241  if (markerInfoByInteger[markerId] != NULL)
242  return markerInfoByInteger[markerId];
243  else
244  return GetMarkerInfo(markerNames[markerId]);
245 }
246 
247 void PedigreeGlobals::GrowMarkerInfo()
248 {
249  int newSize = markerInfoSize ? 2 * markerInfoSize : 32;
250 
251  MarkerInfo ** newArray = new MarkerInfo * [newSize];
252 
253  if (markerInfoSize)
254  {
255  memcpy(newArray, markerInfo, sizeof(MarkerInfo *) * markerInfoSize);
256  delete [] markerInfo;
257  }
258 
259  markerInfo = newArray;
260  markerInfoSize = newSize;
261 }
262 
263 void PedigreeGlobals::FlagMissingMarkers(IntArray & missingMarkers)
264 {
265  int skipped_markers = 0;
266 
267  if (missingMarkers.Length())
268  {
269  StringArray names;
270 
271  printf("These markers couldn't be placed and won't be analysed:");
272 
273  for (int i = 0; i < missingMarkers.Length(); i++)
274  names.Push(GetMarkerInfo(missingMarkers[i])->name);
275  names.Sort();
276 
277  for (int i = 0, line = 80, lines = 0; i < missingMarkers.Length(); i++)
278  {
279  if (line + names[i].Length() + 1 > 79)
280  printf("\n "), line = 3, lines++;
281 
282  if (lines < 5)
283  {
284  printf("%s ", (const char *) names[i]);
285  line += names[i].Length() + 1;
286  }
287  else
288  skipped_markers++;
289  }
290 
291  if (skipped_markers)
292  printf("as well as %d other unlisted markers...", skipped_markers);
293 
294  printf("\n\n");
295  }
296 }
297 
298 void PedigreeGlobals::GetOrderedMarkers(IntArray & markers)
299 {
300  if (markers.Length() == 0)
301  {
302  markers.Dimension(markerCount);
303  markers.SetSequence(0, 1);
304  }
305 
306  MarkerInfo ** subset = new MarkerInfo * [markers.Length()];
307 
308  int count = 0;
309  IntArray missingMarkers;
310 
311  for (int i = 0; i < markers.Length(); i++)
312  {
313  MarkerInfo * info = GetMarkerInfo(markers[i]);
314 
315  if (info->chromosome != -1)
316  subset[count++] = info;
317  else
318  missingMarkers.Push(i);
319  }
320 
321  FlagMissingMarkers(missingMarkers);
322 
323  QuickSort(subset, count, sizeof(MarkerInfo *),
324  COMPAREFUNC MarkerInfo::ComparePosition);
325 
326  markers.Clear();
327  for (int i = 0; i < count; i++)
328  markers.Push(GetMarkerID(subset[i]->name));
329 }
330 
331 int PedigreeGlobals::SortMarkersInMapOrder(IntArray & markers, int chromosome)
332 {
333  if (markers.Length() == 0)
334  {
335  markers.Dimension(markerCount);
336  markers.SetSequence(0, 1);
337  }
338 
339  MarkerInfo ** subset = new MarkerInfo * [markers.Length()];
340 
341  int count = 0;
342  IntArray missingMarkers;
343 
344  for (int i = 0; i < markers.Length(); i++)
345  {
346  MarkerInfo * info = GetMarkerInfo(markers[i]);
347 
348  if (info->chromosome != -1)
349  subset[count++] = info;
350  else if (chromosome == -1)
351  missingMarkers.Push(i);
352  }
353 
354  if (chromosome == -1)
355  FlagMissingMarkers(missingMarkers);
356 
357  QuickSort(subset, count, sizeof(MarkerInfo *),
358  COMPAREFUNC MarkerInfo::ComparePosition);
359 
360  markers.Clear();
361 
362  int current_chromosome = -1, next_chromosome = 0;
363 
364  for (int i = 0; i < count; i++)
365  if (subset[i]->chromosome < chromosome)
366  continue;
367  else if (current_chromosome == -1 ||
368  subset[i]->chromosome == current_chromosome)
369  {
370  markers.Push(GetMarkerID(subset[i]->name));
371  current_chromosome = subset[i]->chromosome;
372  }
373  else if (!next_chromosome)
374  {
375  next_chromosome = subset[i]->chromosome;
376  break;
377  }
378 
379  delete [] subset;
380 
381  return next_chromosome;
382 }
383 
384 void PedigreeGlobals::VerifySexSpecificOrder()
385 {
386  if (markerCount <= 1)
387  return;
388 
389  MarkerInfo ** sortedMarkers = new MarkerInfo * [markerCount];
390 
391  for (int i = 0; i < markerCount; i++)
392  sortedMarkers[i] = GetMarkerInfo(i);
393 
394  QuickSort(sortedMarkers, markerCount, sizeof(MarkerInfo *),
395  COMPAREFUNC MarkerInfo::ComparePosition);
396 
397  double prev_female = sortedMarkers[0]->positionFemale;
398  double prev_male = sortedMarkers[0]->positionMale;
399  double curr_female, curr_male;
400 
401  int prev_chromosome = sortedMarkers[0]->chromosome;
402  int curr_chromosome;
403 
404  for (int i = 1; i < markerCount; i++)
405  {
406  curr_chromosome = sortedMarkers[i]->chromosome;
407  curr_female = sortedMarkers[i]->positionFemale;
408  curr_male = sortedMarkers[i]->positionMale;
409 
410  if (curr_chromosome == prev_chromosome &&
411  (curr_female < prev_female || curr_male < prev_male))
412  error("Sex-specific and sex-averaged maps are inconsistent.\n\n"
413  "In the sex-averaged map, marker %s (%.2f cM) follows marker %s (%.2f cM).\n"
414  "In the %smale map, marker %s (%.2f cM) PRECEDES marker %s (%.2f cM).\n",
415  (const char *) sortedMarkers[i]->name,
416  sortedMarkers[i]->position * 100,
417  (const char *) sortedMarkers[i-1]->name,
418  sortedMarkers[i-1]->position * 100,
419  curr_female < prev_female ? "fe" : "",
420  (const char *) sortedMarkers[i]->name,
421  (curr_female < prev_female ? curr_female : curr_male) * 100,
422  (const char *) sortedMarkers[i-1]->name,
423  (curr_female < prev_female ? prev_female : prev_male) * 100);
424 
425  prev_chromosome = curr_chromosome;
426  prev_female = curr_female;
427  prev_male = curr_male;
428  }
429 
430  delete [] sortedMarkers;
431 }
432 
433 void PedigreeGlobals::LoadAlleleFrequencies(const char * filename, bool required)
434 {
435  // This function is often called with an empty string, and not
436  // all implementations of the C library like that ...
437  if (filename[0] == 0)
438  {
439  if (required)
440  error("No name provided for required allele freuquency file\n");
441  else
442  return;
443  }
444 
445  // If we get here, the filename is not empty and things should
446  // work as planned
447  IFILE f = ifopen(filename, "rb");
448 
449  if (f == NULL)
450  {
451  if (required)
452  error("Failed to open required allele frequency file '%s'",
453  (const char *) filename);
454  else
455  return;
456  }
457 
458  LoadAlleleFrequencies(f);
459  ifclose(f);
460 }
461 
462 void PedigreeGlobals::LoadAlleleFrequencies(IFILE & input)
463 {
464  int done = 0;
465  String buffer;
466  StringArray tokens;
467  MarkerInfo *info = NULL;
468 
469  bool need_blank_line = false;
470  int allele_size, old_max, next_allele = 0; // Initialization avoids compiler warning
471 
472  while (!ifeof(input) && !done)
473  {
474  int i, j;
475 
476  buffer.ReadLine(input);
477 
478  tokens.Clear();
479  tokens.AddTokens(buffer, WHITESPACE);
480 
481  if (tokens.Length() < 1) continue;
482 
483  switch (toupper(tokens[0][0]))
484  {
485  case 'M' :
486  if (tokens.Length() == 1)
487  error("Unnamed marker in allele frequency file");
488  if (info != NULL)
489  need_blank_line |= info->AdjustFrequencies();
490  info = GetMarkerInfo(tokens[1]);
491  info->freq.Clear();
492  info->freq.Push(0.0);
493  next_allele = 1;
494  break;
495  case 'F' :
496  if (info != NULL)
497  for (i = 1; i < tokens.Length(); i++)
498  {
499  buffer = next_allele++;
500 
501  int allele = LoadAllele(info, buffer);
502 
503  if (allele >= info->freq.Length())
504  {
505  old_max = info->freq.Length();
506  info->freq.Dimension(allele + 1);
507  for (j = old_max; j < allele; j++)
508  info->freq[j] = 0.0;
509  }
510 
511  info->freq[allele] = tokens[i].AsDouble();
512  }
513  break;
514  case 'A' :
515  if (info == NULL) continue;
516 
517  if (tokens.Length() != 3)
518  error("Error reading named allele frequencies for locus %s\n"
519  "Lines with named alleles should have the format\n"
520  " A allele_label allele_frequency\n\n"
521  "But the following line was read:\n%s\n",
522  (const char *) info->name, (const char *) buffer);
523 
524  allele_size = LoadAllele(info, tokens[1]);
525  next_allele = atoi(tokens[1]) + 1;
526 
527  if (allele_size < 1)
528  error("Error reading named allele frequencies for locus %s\n"
529  "An invalid allele label was encountered\n",
530  (const char *) info->name);
531 
532  if (allele_size >= info->freq.Length())
533  {
534  old_max = info->freq.Length();
535  info->freq.Dimension(allele_size + 1);
536  for (i = old_max; i < allele_size; i++)
537  info->freq[i] = 0.0;
538  }
539 
540  info->freq[allele_size] = tokens[2];
541  break;
542  case 'E' :
543  done = 1;
544  break;
545  default :
546  error("Problem in allele frequency file.\n"
547  "Lines in this file should be of two types:\n"
548  " -- Marker name lines begin with an M\n"
549  " -- Frequency lines begin with an F\n\n"
550  "However the following line is different:\n%s\n",
551  (const char *) buffer);
552  }
553  }
554 
555  if (info != NULL)
556  need_blank_line |= info->AdjustFrequencies();
557 
558  if (need_blank_line) printf("\n");
559 }
560 
561 void PedigreeGlobals::LoadMarkerMap(const char * filename, bool filter)
562 {
563  IFILE f = ifopen(filename, "rb");
564  if (f == NULL) return;
565  LoadMarkerMap(f, filter);
566  ifclose(f);
567 }
568 
569 void PedigreeGlobals::LoadMarkerMap(IFILE & input, bool filter)
570 {
571  String buffer;
572  StringArray tokens;
573  bool first_pass = true;
574 
575  while (!ifeof(input))
576  {
577  buffer.ReadLine(input);
578 
579  tokens.Clear();
580  tokens.AddTokens(buffer, WHITESPACE);
581 
582  if (tokens.Length() < 1) continue;
583 
584  if (first_pass)
585  {
586  sexSpecificMap = (tokens.Length() == 5);
587 
588  // if (sexSpecificMap)
589  // printf("\n Found sex-specific map ...\n\n");
590 
591  first_pass = false;
592  }
593 
594  if (tokens.Length() != 3 && !sexSpecificMap)
595  error("Error reading map file\n"
596  "Each line in this file should include 3 fields:\n"
597  "CHROMOSOME, MARKER_NAME, and POSITION\n"
598  "However the following line has %d fields\n%s\n",
599  tokens.Length(), (const char *) buffer);
600 
601  if (tokens.Length() != 5 && sexSpecificMap)
602  error("Error reading map file\n"
603  "Each line in this file should include 5 fields:\n\n"
604  "CHROMOSOME, MARKER_NAME, SEX_AVERAGED_POS, FEMALE_POS AND MALE_POS\n\n"
605  "However the following line has %d fields\n%s\n",
606  tokens.Length(), (const char *) buffer);
607 
608  bool previous_state = String::caseSensitive;
609  String::caseSensitive = false;
610 
611  if ((tokens[0] == "CHR" || tokens[0] == "CHROMOSOME") &&
612  (tokens[1] == "MARKER" || tokens[1] == "MARKER_NAME" || tokens[1] == "MRK") &&
613  (tokens[2] == "KOSAMBI" || tokens[2] == "POS" || tokens[2] == "POSITION" ||
614  tokens[2] == "SEX_AVERAGED_POS" || tokens[2] == "CM" || tokens[2] == "HALDANE"))
615  continue;
616 
617  String::caseSensitive = previous_state;
618 
619  if (filter)
620  if (LookupMarker(tokens[1]) < 0)
621  continue;
622 
623  MarkerInfo * info = GetMarkerInfo(tokens[1]);
624 
625  int chr = (tokens[0][0] == 'x' || tokens[0][0] == 'X') ? 999 : (int) tokens[0];
626 
627  info->chromosome = chr;
628  info->position = (double) tokens[2] * 0.01;
629 
630  if (sexSpecificMap)
631  {
632  char * flag;
633 
634  double female = strtod(tokens[3], &flag);
635  if (*flag)
636  error("In the map file, the female cM position for marker\n"
637  "%s is %s. This is not a valid number.",
638  (const char *) tokens[1], (const char *) tokens[3]);
639 
640  double male = strtod(tokens[4], &flag);
641  if (*flag)
642  error("In the map file, the male cM position for marker\n"
643  "%s is %s. This is not a valid number.",
644  (const char *) tokens[1], (const char *) tokens[4]);
645 
646  info->positionFemale = (double) female * 0.01;
647  info->positionMale = (double) male * 0.01;
648  }
649  else
650  info->positionFemale = info->positionMale = info->position;
651  }
652 
653  if (sexSpecificMap) VerifySexSpecificOrder();
654 }
655 
656 void PedigreeGlobals::LoadBasepairMap(const char * filename)
657 {
658  IFILE f = ifopen(filename, "rb");
659  if (f == NULL)
660  error("The map file [%s] could not be opened\n\n"
661  "Please check that the filename is correct and that the file is\n"
662  "not being used by another program", filename);
663  LoadBasepairMap(f);
664  ifclose(f);
665 }
666 
667 void PedigreeGlobals::LoadBasepairMap(IFILE & input)
668 {
669  String buffer;
670  StringArray tokens;
671 
672  sexSpecificMap = false;
673 
674  while (!ifeof(input))
675  {
676  buffer.ReadLine(input);
677 
678  tokens.Clear();
679  tokens.AddTokens(buffer, WHITESPACE);
680 
681  if (tokens.Length() < 1) continue;
682 
683  if (tokens.Length() != 3)
684  error("Error reading map file\n"
685  "Each line in this file should include 3 fields:\n"
686  "CHROMOSOME, MARKER_NAME, and POSITION\n"
687  "However the following line has %d fields\n%s\n",
688  tokens.Length(), (const char *) buffer);
689 
690  bool previous_state = String::caseSensitive;
691  String::caseSensitive = false;
692 
693  if ((tokens[0] == "CHR" || tokens[0] == "CHROMOSOME") &&
694  (tokens[1] == "MARKER" || tokens[1] == "MARKER_NAME" || tokens[1] == "MRK") &&
695  (tokens[2] == "BASEPAIR" || tokens[2] == "POS" || tokens[2] == "POSITION"))
696  continue;
697 
698  String::caseSensitive = previous_state;
699 
700  MarkerInfo * info = GetMarkerInfo(tokens[1]);
701 
702  int chr = (tokens[0][0] == 'x' || tokens[0][0] == 'X') ? 999 : (int) tokens[0];
703 
704  info->chromosome = chr;
705  info->position = (double) tokens[2];
706  }
707 }
708 
709 int PedigreeGlobals::instanceCount = 0;
710 
711 PedigreeGlobals::~PedigreeGlobals()
712 {
713  if (--instanceCount == 0 && markerInfoSize)
714  {
715  for (int i = 0; i < markerInfoCount; i++)
716  delete markerInfo[i];
717  delete [] markerInfo;
718  delete [] markerInfoByInteger;
719  }
720 }
721 
722 void PedigreeGlobals::WriteMapFile(const char * filename)
723 {
724  if (!MarkerPositionsAvailable())
725  return;
726 
727  FILE * output = fopen(filename, "wt");
728 
729  if (output == NULL)
730  error("Creating map file \"%s\"", filename);
731 
732  WriteMapFile(output);
733  fclose(output);
734 }
735 
736 void PedigreeGlobals::WriteMapFile(FILE * output)
737 {
738  if (!sexSpecificMap)
739  fprintf(output, "CHR MARKER POS\n");
740  else
741  fprintf(output, "CHR MARKER POS POSF POSM\n");
742 
743  for (int i = 0; i < markerInfoCount; i++)
744  {
745  if (markerInfo[i]->chromosome != -1)
746  {
747  if (!sexSpecificMap)
748  fprintf(output, "%3d %-10s %g\n",
749  markerInfo[i]->chromosome,
750  (const char *) markerInfo[i]->name,
751  markerInfo[i]->position * 100.0);
752  else
753  fprintf(output, "%3d %-10s %g %g %g\n",
754  markerInfo[i]->chromosome,
755  (const char *) markerInfo[i]->name,
756  markerInfo[i]->position * 100.0,
757  markerInfo[i]->positionFemale * 100.0,
758  markerInfo[i]->positionMale * 100.0);
759  }
760  }
761 }
762 
763 void PedigreeGlobals::WriteFreqFile(const char * filename, bool old_format)
764 {
765  FILE * output = fopen(filename, "wt");
766 
767  if (output == NULL)
768  error("Creating allele frequency file \"%s\"", filename);
769 
770  WriteFreqFile(output, old_format);
771  fclose(output);
772 }
773 
774 void PedigreeGlobals::WriteFreqFile(FILE * output, bool old_format)
775 {
776  for (int i = 0; i < markerInfoCount; i++)
777  {
778  MarkerInfo * info = markerInfo[i];
779 
780  if (info->freq.Length() == 0) continue;
781 
782  fprintf(output, "M %s\n", (const char *) info->name);
783 
784  if (old_format && info->alleleLabels.Length() == 0)
785  for (int j = 1; j < info->freq.Length(); j++)
786  fprintf(output, "%s%.5f%s",
787  j % 7 == 1 ? "F " : "", info->freq[j],
788  j == info->freq.Length() - 1 ? "\n" : j % 7 == 0 ? "\n" : " ");
789  else
790  for (int j = 1; j < info->freq.Length(); j++)
791  if (info->freq[j] > 1e-7)
792  fprintf(output, "A %5s %.5f\n",
793  (const char *) info->GetAlleleLabel(j), info->freq[j]);
794  }
795 }
796 
797 bool PedigreeGlobals::MarkerPositionsAvailable()
798 {
799  for (int i = 0; i < markerInfoCount; i++)
800  if (markerInfo[i]->chromosome != -1)
801  return true;
802 
803  return false;
804 }
805 
806 bool PedigreeGlobals::AlleleFrequenciesAvailable()
807 {
808  for (int i = 0; i < markerInfoCount; i++)
809  if (markerInfo[i]->freq.Length() > 1)
810  return true;
811 
812  return false;
813 }
814 
815 int PedigreeGlobals::LoadAllele(int marker, String & token)
816 {
817  return LoadAllele(GetMarkerInfo(marker), token);
818 }
819 
820 int PedigreeGlobals::LoadAllele(MarkerInfo * info, String & token)
821 {
822  int allele = info->GetAlleleNumber(token);
823 
824  if (allele >= 0) return allele;
825 
826  static unsigned char lookup[128];
827  static bool init = false;
828 
829  if (!init)
830  {
831  init = true;
832 
833  for (int i = 0; i < 128; i++)
834  lookup[i] = 0;
835 
836  for (int i = '1'; i <= '9'; i++)
837  lookup[i] = 1;
838 
839  lookup[int('a')] = lookup[int('A')] = lookup[int('c')] = lookup[int('C')] = 2;
840  lookup[int('g')] = lookup[int('G')] = lookup[int('t')] = lookup[int('T')] = 2;
841  }
842 
843  int first = token[0];
844  bool goodstart = first > 0 && first < 128;
845 
846  if (token.Length() == 1 && goodstart && lookup[int(token[0])])
847  return info->NewAllele(token);
848 
849  if (!goodstart || lookup[int(token[0])] != 1)
850  return 0;
851 
852  int integer = token.AsInteger();
853  token = integer;
854 
855  allele = info->GetAlleleNumber(token);
856 
857  if (allele > 0)
858  return allele;
859 
860  if (integer <= 0) return 0;
861 
862  if (integer > 1000000)
863  {
864  static bool warn_user = true;
865 
866  if (warn_user)
867  {
868  printf("Some allele numbers for marker %s are > 1000000\n"
869  "All allele numbers >1000000 will be treated as missing\n\n",
870  (const char *) info->name);
871  warn_user = false;
872  }
873 
874  return 0;
875  }
876 
877  return info->NewAllele(token);
878 }
879 
880 std::ostream &operator << (std::ostream &stream, MarkerInfo &m)
881 {
882  stream << "MarkerInfo for marker " << m.name << std::endl;
883  stream << " located on chromsome " << m.chromosome << ":" << (int64_t)(100 * m.position) << std::endl;
884  stream << " allele count = " << m.freq.Length() << std::endl;
885  stream << " label count = " << m.alleleLabels.Length() << std::endl;
886  if (m.freq.Length() == m.alleleLabels.Length())
887  {
888  for (int i=0; i<m.freq.Length(); i++)
889  {
890  stream << " " << m.alleleLabels[i] << ":" << m.freq[i];
891  }
892  stream << std::endl;
893  }
894  else
895  {
896  stream << " output truncated - counts appear corrupt." << std::endl;
897  }
898  return stream;
899 }
900 
int ifeof(IFILE file)
Check to see if we have reached the EOF (returns 0 if not EOF).
Definition: InputFile.h:654
IFILE ifopen(const char *filename, const char *mode, InputFile::ifileCompression compressionMode=InputFile::DEFAULT)
Open a file with the specified name and mode, using a filename of "-" to indicate stdin/stdout.
Definition: InputFile.h:562
int ifclose(IFILE &file)
Close the file.
Definition: InputFile.h:580
InputFile & operator<<(InputFile &stream, const std::string &str)
Write to a file using streaming.
Definition: InputFile.h:736
Class for easily reading/writing files without having to worry about file type (uncompressed,...
Definition: InputFile.h:37