libStatGen Software  1
PedigreeDescription.cpp
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "PedigreeDescription.h"
19 #include "MapFunction.h"
20 #include "MathVector.h"
21 #include "Constant.h"
22 #include "FortranFormat.h"
23 #include "Error.h"
24 
25 #include <stdlib.h>
26 #include <ctype.h>
27 #include <string.h>
28 #include <math.h>
29 
30 PedigreeDescription::PedigreeDescription()
31 {
32  columnCount = 0;
33  mendelFormat = false;
34 }
35 
36 PedigreeDescription::~PedigreeDescription()
37 { };
38 
39 PedigreeDescription & PedigreeDescription::operator = (PedigreeDescription & rhs)
40 {
41  columnCount = rhs.columnCount;
42 
43  columns = rhs.columns;
44  columnHash = rhs.columnHash;
45 
46  return *this;
47 };
48 
49 void PedigreeDescription::Load(IFILE & input, bool warnIfLinkage)
50 {
51  // Check if we are dealing with a linkage format data file
52  String buffer;
53  StringArray tokens;
54 
55  mendelFormat = false;
56 
57  ReadLineHelper(input, buffer, tokens);
58  ifrewind(input);
59 
60  if (tokens.Length() == 4 && isdigit(tokens[0][0]))
61  {
62  if (warnIfLinkage) printf("Data file looks like a LINKAGE format file...\n\n");
63  LoadLinkageDataFile(input);
64  return;
65  }
66 
67  if (buffer.Length() > 18
68  && (buffer.SubStr(8,8).SlowCompare("AUTOSOME") == 0 ||
69  buffer.SubStr(8,8).SlowCompare("X-LINKED") == 0)
70  && (isdigit(buffer[16]) || isdigit(buffer[17]))
71  && (isdigit(buffer[18]) || isdigit(buffer[19]) ||
72  (buffer.Length() > 19 && isdigit(buffer[20]))))
73  {
74  printf("Data file looks like a MENDEL format file...\n"
75  " Activating EXPERIMENTAL support for this format\n\n");
76  LoadMendelDataFile(input);
77  return;
78  }
79 
80  // Reset things
81  ifrewind(input);
82  int done = 0;
83  int line = 0;
84 
85  columns.Clear();
86  columnHash.Clear();
87  columnCount = 0;
88 
89  while (!ifeof(input) && !done)
90  {
91  int i;
92 
93  buffer.ReadLine(input);
94  line++;
95 
96  tokens.Clear();
97  tokens.AddTokens(buffer, WHITESPACE);
98 
99  if (tokens.Length() < 1) continue;
100 
101  if (tokens.Length() == 1)
102  error("Problem reading data file:\n"
103  "Item #%d (of type %s) has no name.",
104  columnCount+1, (const char *) tokens[0]);
105 
106  switch (toupper(tokens[0][0]))
107  {
108  case 'A' :
109  columnHash.Push(GetAffectionID(tokens[1]));
110  columns.Push(pcAffection);
111  columnCount++;
112  break;
113  case 'M' :
114  columnHash.Push(GetMarkerID(tokens[1]));
115  columns.Push(pcMarker);
116  columnCount++;
117  break;
118  case 'T' :
119  columnHash.Push(GetTraitID(tokens[1]));
120  columns.Push(pcTrait);
121  columnCount++;
122  break;
123  case 'C' :
124  columnHash.Push(GetCovariateID(tokens[1]));
125  columns.Push(pcCovariate);
126  columnCount++;
127  break;
128  case '$' :
129  columnHash.Push(GetStringID(tokens[1]));
130  columns.Push(pcString);
131  columnCount++;
132  break;
133  case 'S' :
134  i = (int) tokens[0].SubStr(1);
135  i = i > 0 ? i : 1;
136  while (i--)
137  {
138  columns.Push(pcSkip);
139  columnHash.Push(0);
140  columnCount++;
141  }
142  break;
143  case 'Z' :
144  columnHash.Push(0);
145  columns.Push(pcZygosity);
146  columnCount++;
147  break;
148  case 'V' :
149  GetMarkerID(tokens[1]);
150  break;
151  case 'E' :
152  done = 1;
153  break;
154  case 'U' :
155  if (toupper(tokens[0][1]) == 'T' && toupper(tokens[0][2]) == 'C')
156  {
157  int c = GetCovariateID(tokens[1]);
158  int t = GetTraitID(tokens[1]);
159 
160  if (c >= 32767 || t >= 32767)
161  error("Internal error processing data file\n");
162 
163  columnHash.Push(t * 32768 + c);
164  columns.Push(pcUndocumentedTraitCovariate);
165  columnCount++;
166  break;
167  }
168  default :
169  error("Problem in data file (line %d):\n%s\n",
170  line, (const char *) buffer);
171  }
172  }
173 
174  columns.Push(pcEnd);
175  columnHash.Push(0);
176 };
177 
178 void PedigreeDescription::Load(const char * iFilename, bool warnIfLinkage)
179 {
180  IFILE f = ifopen(iFilename, "rb");
181 
182  if (f == NULL)
183  error(
184  "The datafile %s cannot be opened\n\n"
185  "Common causes for this problem are:\n"
186  " * You might not have used the correct options to specify input file names,\n"
187  " please check the program documentation for information on how to do this\n\n"
188  " * The file doesn't exist or the filename might have been misspelt\n\n"
189  " * The file exists but it is being used by another program which you will need\n"
190  " to close before continuing\n\n"
191  " * The file is larger than 2GB and you haven't compiled this application with\n"
192  " large file support.\n\n",
193  iFilename);
194 
195  Load(f, warnIfLinkage);
196  ifclose(f);
197 
198  filename = iFilename;
199 };
200 
201 void PedigreeDescription::LoadMap(const char * iFilename)
202 {
203  IFILE f = ifopen(iFilename, "rb");
204 
205  if (f == NULL)
206  error(
207  "The mapfile %s cannot be opened\n\n"
208  "Please check that the file exists and is not being used by another program\n"
209  "To find out how to set input filenames, check the documentation\n",
210  iFilename);
211 
212  LoadMap(f);
213  ifclose(f);
214 };
215 
216 void PedigreeDescription::LoadMap(IFILE & input)
217 {
218  columns.Clear();
219  columnHash.Clear();
220  columnCount = 0;
221 
222  int lastposition = 0;
223  String buffer;
224  StringArray tokens;
225 
226  buffer.ReadLine(input);
227  tokens.AddTokens(buffer, WHITESPACE);
228 
229  while (tokens.Length() == 0 && !ifeof(input))
230  {
231  buffer.ReadLine(input);
232  tokens.AddTokens(buffer, WHITESPACE);
233  }
234 
235  if (tokens.Length() != 3)
236  error("Error reading map file header, which has %d columns.\n"
237  "Three columns were expected, corresponding to\n"
238  "MARKER_ID, MARKER_NAME and BASE_PAIR_POSITION\n"
239  "The offending header is transcribed below:\n\n"
240  "%s", tokens.Length(), (const char *) buffer);
241  else
242  printf("Map file column labels\n"
243  " -- COLUMN 1, Expecting MARKER_ID, Read %s\n"
244  " -- COLUMN 2, Expecting MARKER_NAME, Read %s\n"
245  " -- COLUMN 3, Expection BASE_PAIR_POSITION, Read %s\n\n",
246  (const char *)(tokens[0]), (const char *)(tokens[1]),
247  (const char *)(tokens[2]));
248 
249  int line = 1;
250  while (!ifeof(input))
251  {
252  int serial;
253  long position;
254 
255  buffer.ReadLine(input);
256  line++;
257 
258  tokens.Clear();
259  tokens.AddTokens(buffer, WHITESPACE);
260 
261  if (tokens.Length() < 1) continue;
262  if (tokens.Length() != 3)
263  error("Each line in the map file should have 3 tokens, corresponding\n"
264  "to MARKER_ID, MARKER_NAME and BASE_PAIR_POSITION respectively\n"
265  "However, there are %d tokens in line %d, transcribed below:\n\n"
266  "%s", tokens.Length(), line, (const char *) buffer);
267 
268  serial = (int) tokens[0];
269  if (serial != columnCount + 1)
270  error("Reading Marker Index from Map File...\n"
271  "Markers should be indexed consecutively starting at 1\n"
272  "Marker %d does not fit this pattern\n", columnCount + 1);
273 
274  position = (int) tokens[2];
275  if (position < lastposition)
276  error("Reading Marker Position from Map File...\n"
277  "Marker position should be in base-pairs\n"
278  "and markers should be in map order\n");
279 
280  // TODO -- store marker locations somewhere!
281  lastposition = position;
282 
283  columnHash.Push(GetMarkerID(tokens[1]));
284  columns.Push(pcMarker);
285  columnCount++;
286 
287  GetMarkerInfo(tokens[1])->position = position * 1e-8;
288  }
289 
290  columns.Push(pcEnd);
291  columnHash.Push(0);
292 };
293 
294 int PedigreeDescription::CountTextColumns()
295 {
296  int count = 0;
297 
298  for (int i = 0; i < columnCount; i++, count++)
299  if (columns[i] == pcMarker)
300  count++;
301 
302  return count;
303 }
304 
305 void PedigreeDescription::LoadLinkageDataFile(const char * iFilename)
306 {
307  IFILE f = ifopen(iFilename, "rb");
308 
309  if (f == NULL)
310  error(
311  "The linkage format datafile %s cannot be opened\n\n"
312  "Please check that the file exists and is not being used by another program\n"
313  "To find out how to set input filenames, check the documentation\n",
314  iFilename);
315 
316  LoadLinkageDataFile(f);
317  ifclose(f);
318 
319  filename = iFilename;
320 };
321 
322 void PedigreeDescription::LoadLinkageDataFile(IFILE & input)
323 {
324  columns.Clear();
325  columnHash.Clear();
326  columnCount = 0;
327 
328  String buffer, label;
329  StringArray tokens;
330 
331  ReadLineHelper(input, buffer, tokens);
332 
333  if (tokens.Length() != 4 || tokens[2].AsInteger() != (int) chromosomeX ||
334  tokens[0].AsInteger() < 0)
335  error("Cannot handle first line of data file\n\n"
336  "Expecting four (4) numeric values, which correspond to:\n"
337  " num-loci -- number of loci in the pedigree\n"
338  " this value must be positive\n"
339  " risk-locus -- locus for which risks should be calculated\n"
340  " this value will be ignored\n"
341  " sex-link -- are the loci sex linked [0 - No, 1 - Yes]\n"
342  " %s\n"
343  " program -- which LINKAGE program do you want to use?\n"
344  " this value will also be ignored\n\n"
345  "The actual input read:\n%s\n",
346  chromosomeX ? "expecting X-linked data, so this value must be ONE (1)"
347  : "expecting autosomal data, so this must be ZERO (0)",
348  (const char *) buffer);
349 
350  int numloci = tokens[0];
351 
352  ReadLineHelper(input, buffer, tokens);
353 
354  if (tokens.Length() != 4 ||
355  tokens[0].AsInteger() != 0 ||
356  tokens[3].AsInteger() != 0)
357  error("Cannot handle second line of data file\n\n"
358  "Expecting four (4) numeric values, which correspond to:\n"
359  " mutation-model -- must be zero, corresponding to no mutation\n"
360  " male-mutation-rate -- ignored\n"
361  " female-mutation-rate -- ignored\n"
362  " linkage-disequilibrium -- must be zero, may be used in the future to\n"
363  " read haplotype frequencies\n\n"
364  "The actual input read:\n%s\n", (const char *) buffer);
365 
366  StringArray markerOrder;
367  int unknown = 0;
368 
369  ReadLineHelper(input, buffer, markerOrder);
370 
371  if (markerOrder.Length() > numloci)
372  error("The third line of the data file lists marker order\n\n"
373  "Although %d loci are defined [in the first line],\n"
374  "this line includes %d values:\n%s\n",
375  numloci, markerOrder.Length(), (const char *) buffer);
376 
377  IntArray locus;
378  bool need_blank_line = false;
379 
380  while (!ifeof(input) && numloci--)
381  {
382  if (ReadLineHelper(input, buffer, tokens) == 0)
383  error("Linkage data file ends unexpectedly");
384 
385  if (tokens.Length() < 2)
386  error("Incomplete locus information in data file\n"
387  "Information for each locus should include 2 or more fiels\n"
388  "The expected fields are:\n"
389  " field_type -- indicator of locus type (trait, marker,...)\n"
390  " alleles -- number of alleles\n"
391  " name -- locus name, preceded by hash (#) sign\n\n"
392  "The actual input read:\n%s\n", (const char *) buffer);
393 
394  int locus_type = (int) tokens[0];
395  int alleles = (int) tokens[1];
396 
397  String locus_name("LOCUS");
398  locus_name += ++unknown;
399 
400  if (tokens.Length() > 2 && tokens[2][0] == '#')
401  {
402  if (tokens[2][1] != 0)
403  locus_name = tokens[2].SubStr(1);
404  else if (tokens.Length() > 3)
405  locus_name = tokens[3];
406  }
407 
408  if ((locus_type == 4 && alleles == 0) ||
409  (locus_type == 4 && alleles == 1))
410  {
411  columnHash.Push(GetCovariateID(locus_name));
412  columns.Push(pcCovariate);
413  columnCount++;
414  continue;
415  }
416 
417  if (locus_type == 0 && alleles == 0)
418  {
419  columnHash.Push(GetTraitID(locus_name));
420  columns.Push(pcTrait);
421  columnCount++;
422  continue;
423  }
424 
425  if (ReadLineHelper(input, buffer, tokens) != alleles)
426  error("Expecting %d allele frequencies, but input has %d columns:\n"
427  "%s\n", alleles, tokens.Length(), (const char *) buffer);
428 
429  Vector frequencies(alleles + 1);
430 
431  frequencies[0] = 0.0;
432  for (int i = 1; i <= alleles; i++)
433  frequencies[i] = (double) tokens[i - 1];
434 
435  double sum = frequencies.Sum();
436 
437  if (sum <= 0.0)
438  error("Allele frequencies at %s sum to %f, which doesn't make sense\n",
439  (const char *) locus_name, sum);
440 
441  if (fabs(sum - 1.0) > 1.2e-5)
442  {
443  printf("Allele frequencies at %s sum to %f, adjusted to 1.0\n",
444  (const char *) locus_name, sum);
445  need_blank_line = true;
446  }
447 
448  if (sum != 1.0)
449  frequencies *= 1.0 / sum;
450 
451  switch (locus_type)
452  {
453  case 1 :
454  {
455  // Affection
456  columnHash.Push(GetAffectionID(locus_name));
457  columns.Push(pcAffection);
458  columnCount++;
459 
460  // Read number of liability classes
461  if (ReadLineHelper(input, buffer, tokens) == 0)
462  error("Linkage data file ends unexpectedly\n");
463 
464  // Skip liability class data
465  int classes = tokens[0];
466  if (classes > 1)
467  {
468  columnHash.Push(0);
469  columns.Push(pcSkip);
470  columnCount++;
471  }
472 
473  // Separate liability class rows for males and females for X-linked data
474  if (chromosomeX) classes *= 2;
475 
476  while (classes--)
477  if (ReadLineHelper(input, buffer, tokens) == 0)
478  error("Linkage data file ends unexpectedly\n");
479 
480  // Ignore map location for quantitative variables
481  locus.Push(-1);
482  }
483  break;
484  case 3 :
485  {
486  columnHash.Push(GetMarkerID(locus_name));
487  columns.Push(pcMarker);
488  columnCount++;
489 
490  // Store allele frequencies
491  MarkerInfo * info = GetMarkerInfo(locus_name);
492 
493  info->freq = frequencies;
494 
495  // Initialize allele labels
496  info->alleleLabels.Clear();
497  for (int i = 0; i < frequencies.Length(); i++)
498  info->alleleLabels.Push(label = i);
499  info->IndexAlleles();
500 
501  // Store marker id, so that we can track map location
502  locus.Push(GetMarkerID(locus_name));
503  }
504  break;
505  case 0 :
506  {
507  // Read number of quantitative variables
508  if (ReadLineHelper(input, buffer, tokens) == 0)
509  error("Linkage data file ends unexpectedly\n");
510 
511  // Add each quantitative variable to pedigree
512  // Discard information on means
513  for (int vars = tokens[0], i = 0; i < vars; i++)
514  {
515  if (ReadLineHelper(input, buffer, tokens) == 0)
516  error("Linkage data file ends unexpectedly\n");
517 
518  String trait_name(locus_name);
519 
520  if (i)
521  {
522  trait_name += ".";
523  trait_name += i + 1;
524  }
525 
526  columnHash.Push(GetTraitID(trait_name));
527  columns.Push(pcTrait);
528  columnCount++;
529  }
530 
531  // Skip var-covar matrix
532  if (ReadLineHelper(input, buffer, tokens) == 0)
533  error("Linkage data file ends unexpectedly\n");
534 
535  // Skip heterozygote scaling factor for var-covar matrix
536  if (ReadLineHelper(input, buffer, tokens) == 0)
537  error("Linkage data file ends unexpectedly\n");
538 
539  // Ignore map location for quantitative variables
540  locus.Push(-1);
541  }
542  break;
543  case 2 :
544  error("The data file includes binary factors\n"
545  "Regretably, loci of this type are not supported\n\n");
546  break;
547  default :
548  error("Unsupported locus type [%d] in data file", locus_type);
549  break;
550  }
551  }
552 
553  if (need_blank_line) printf("\n");
554 
555  columns.Push(pcEnd);
556  columnHash.Push(0);
557 
558  ReadLineHelper(input, buffer, tokens);
559  int sexDifference = tokens.Length() ? tokens[0].AsInteger() : -1;
560  if (tokens.Length() != 2 ||
561  (sexDifference != 0 && sexDifference != 2) ||
562  tokens[1].AsInteger() != 0)
563  error("Error retrieving recombination information\n\n"
564  "Expecting two (2) numeric values, which correspond to:\n"
565  " sex-difference -- must be zero (no difference) or two (sex specific recombination)\n"
566  " map-function -- must be zero, that is, no interference\n"
567  "The actual input read:\n%s\n", (const char *) buffer);
568 
569  Vector distances[2];
570  bool distance_in_centimorgans = false;
571 
572  for (int r = 0; r <= sexDifference; r += 2)
573  {
574  ReadLineHelper(input, buffer, tokens);
575  if (tokens.Length() != markerOrder.Length() - 1)
576  error("Error retrieving recombination information\n\n"
577  "Expecting %d recombination fractions (current map includes %d loci)\n"
578  "Instead the following line was input:\n%s\n",
579  markerOrder.Length() - 1, markerOrder.Length(), (const char *) buffer);
580 
581  distances[r >> 1].Dimension(tokens.Length());
582  for (int i = 0; i < tokens.Length(); i++)
583  distances[r >> 1][i] = (double) tokens[i];
584 
585  if (distances[r >> 1].Min() < 0.0)
586  error("Linkage datafile specifies negative recombination fractions");
587 
588  bool centimorgans = distances[r >> 1].Max() > 0.5;
589 
590  if (centimorgans && !distance_in_centimorgans)
591  printf(" Some recombination fractions in datafile are greater than 0.5,\n"
592  " so recombination fractions will be interpreted as cM distances\n\n");
593 
594  distance_in_centimorgans |= centimorgans;
595  }
596 
597  double position = 0.0, positionMale = 0.0;
598 
599  for (int i = 0, moving = false; i < markerOrder.Length(); i++)
600  {
601  int m = markerOrder[i].AsInteger() - 1;
602 
603  if (m < 0 || m >= locus.Length())
604  error("The marker order in the linkage datafile is invalid\n");
605 
606  m = locus[m];
607 
608  if (m != -1)
609  {
610  MarkerInfo * info = GetMarkerInfo(m);
611  info->chromosome = chromosomeX ? 9999 : 0;
612 
613  if (sexDifference == 2)
614  info->position = (position + positionMale) * 0.5,
615  info->positionFemale = position,
616  info->positionMale = positionMale;
617  else
618  info->position = info->positionMale = info->positionFemale = position;
619 
620  moving = true;
621  }
622 
623  if (i < markerOrder.Length() - 1 && moving)
624  position += distance_in_centimorgans ?
625  0.01 * distances[0][i] : RecombinationToDistance(distances[0][i]);
626 
627  if (sexDifference == 2 && i < markerOrder.Length() - 1 && moving)
628  positionMale += distance_in_centimorgans ?
629  0.01 * distances[1][i] : RecombinationToDistance(distances[1][i]);
630  }
631 }
632 
633 int PedigreeDescription::ReadLineHelper(IFILE & input,
634  String & buffer,
635  StringArray & tokens)
636 {
637  do
638  {
639  // Read Line
640  buffer.ReadLine(input);
641  buffer.Trim();
642 
643  // Strip comments marked with >>
644  int pos = buffer.FastFind(">>");
645  if (pos == -1) pos = buffer.FastFind("<<");
646  if (pos == -1) pos = buffer.Length() + 1;
647  if (buffer[0] == '#') pos = 0;
648 
649  // Find space/tab delimited tokens
650  tokens.Clear();
651  tokens.AddTokens(buffer.Left(pos - 1), WHITESPACE);
652 
653  }
654  while (tokens.Length() == 0 && !ifeof(input));
655 
656  return tokens.Length();
657 }
658 
659 void PedigreeDescription::LoadMendelDataFile(const char * iFilename)
660 {
661  IFILE f = ifopen(iFilename, "rb");
662 
663  if (f == NULL)
664  error(
665  "The MENDEL format datafile %s cannot be opened\n\n"
666  "Please check that the file exists and is not being used by another program\n"
667  "To find out how to set input filenames, check the documentation\n",
668  iFilename);
669 
670  LoadMendelDataFile(f);
671  ifclose(f);
672 };
673 
674 void PedigreeDescription::LoadMendelDataFile(IFILE & file)
675 {
676  // Processes mendel format file
677  mendelFormat = true;
678 
679  // Codominant markers are mapped to markers
680  // Non-codominant markers are mapped into multiple "affection status"
681  // (Y/N) variables
682  columns.Clear();
683  columnHash.Clear();
684  columnCount = 0;
685 
686  FortranFormat parser;
687 
688  // Variables for storing parsed input
689  String locusName;
690  String locusType;
691  String alleleLabel;
692  String alleleFreq;
693  String phenotype;
694  String genotype;
695  int phenoCount;
696  int alleleCount;
697 
698  while (!ifeof(file))
699  {
700  // Cycle through headers for each locus
701  parser.SetInputFile(file);
702  parser.SetFormat("(2A8,I2,I3)");
703 
704  // After retrieving locus name, check that we haven't tried to
705  // read past the end-of-file
706  parser.GetNextField(locusName);
707  parser.GetNextField(locusType);
708  alleleCount = parser.GetNextInteger();
709  phenoCount = parser.GetNextInteger();
710 
711  if (locusName.IsEmpty() && locusType.IsEmpty() && alleleCount == 0 &&
712  phenoCount == 0 && ifeof(file))
713  break;
714 
715  // Only recognize autosomal and x-linked loci
716  if (locusType.Compare("AUTOSOME") != 0 && locusType.Compare("X-LINKED"))
717  error("Unrecognized locus type '%s' in Mendel data file\n\n"
718  "Recognized locus types are \"AUTOSOME\" and \"X-LINKED\".",
719  (const char *) locusType);
720 
721  if (locusType.Compare("AUTOSOME") == 0 && chromosomeX)
722  error("The data file indicates that locus %s is AUTOSOMAL, but\n"
723  "X-LINKED loci were expected as input\n",
724  (const char *) locusName);
725 
726  if (locusType.Compare("X-LINKED") == 0 && !chromosomeX)
727  error("The data file indicates that locus %s is X-LINKED, but\n"
728  "AUTOSOMAL loci were expected as input\n",
729  (const char *) locusName);
730 
731  if (locusName.IsEmpty())
732  error("Blank locus name encountered in data file\n");
733 
734  if (phenoCount == 0)
735  {
736  // Co-dominant marker
737  columns.Push(pcMarker);
738  columnHash.Push(GetMarkerID(locusName));
739  columnCount++;
740 
741  // Update marker info with allele labels and frequencies
742  MarkerInfo * info = GetMarkerInfo(locusName);
743 
744  info->alleleLabels.Clear();
745  info->alleleLabels.Push("");
746  info->freq.Clear();
747 
748  parser.SetFormat("(2A8)");
749 
750  // Mendel allows allele names to be specified with frequencies
751  // left blank
752  for (int i = 0; i < alleleCount; i++)
753  {
754  parser.GetNextField(alleleLabel);
755  parser.GetNextField(alleleFreq);
756 
757  if (alleleLabel.IsEmpty())
758  error("Locus %s is missing allele label for allele #%d\n",
759  (const char *) locusName, i+1);
760 
761  info->alleleLabels.Push(alleleLabel);
762 
763  if (!alleleFreq.IsEmpty())
764  {
765  if (info->freq.Length() == 0)
766  info->freq.Push(0.0);
767 
768  info->freq.Push(alleleFreq.AsDouble());
769  }
770  }
771  info->IndexAlleles();
772 
773  if (info->alleleLabels.Length() != info->freq.Length() &&
774  info->freq.Length() != 0)
775  error("Locus %s is missing allele frequency information for %d alleles\n",
776  (const char *) locusName,
777  info->alleleLabels.Length() - info->freq.Length());
778  }
779  else
780  {
781  // Non-codominant marker, which we decompose into multiple traits...
782  parser.SetFormat("(2A8)");
783 
784  // First skip allele frequency information
785  for (int i = 0; i < alleleCount; i++)
786  {
787  parser.GetNextField(alleleLabel);
788  parser.GetNextField(alleleFreq);
789  }
790 
791  // Then read in each phenotype
792  for (int i = 0; i < alleleCount; i++)
793  {
794  parser.SetFormat("(A8,I3)");
795  parser.GetNextField(phenotype);
796  int genoCount = parser.GetNextInteger();
797 
798  parser.SetFormat("(A17)");
799  for (int j = 0; j < genoCount; j++)
800  parser.GetNextField(genotype);
801 
802  columns.Push(pcAffection);
803  columnHash.Push(GetAffectionID(locusName + "->" + phenotype));
804  columnCount++;
805  }
806  }
807  }
808 
809  columns.Push(pcEnd);
810  columnHash.Push(0);
811 }
812 
813 int PedigreeDescription::CountColumns(int type)
814 {
815  int count = 0;
816 
817  for (int i = 0; i < columns.Length(); i++)
818  if (columns[i] == type)
819  count++;
820 
821  return count;
822 }
823 
824 const char * PedigreeDescription::ColumnSummary(String & string)
825 {
826  string.Clear();
827  UpdateSummary(string, pcMarker, " markers [x2 cols]");
828  UpdateSummary(string, pcTrait, " traits");
829  UpdateSummary(string, pcAffection, " discrete traits");
830  UpdateSummary(string, pcCovariate, " covariates");
831  UpdateSummary(string, pcString, " strings");
832  UpdateSummary(string, pcZygosity, " zygosity");
833  UpdateSummary(string, pcSkip, " skipped");
834  return string;
835 }
836 
837 void PedigreeDescription::UpdateSummary(String & string, int type, const char * label)
838 {
839  int count = CountColumns(type);
840 
841  if (count)
842  {
843  if (string.Length())
844  string += ", ";
845  string += count;
846  string += label;
847  }
848 }
849 
850 
851 void PedigreeDescription::AddMarkerColumn(const char * markerName)
852 {
853  if (columns.Last() == pcEnd)
854  {
855  columns.Pop();
856  columnHash.Pop();
857  }
858 
859  columnHash.Push(GetMarkerID(markerName));
860  columns.Push(pcMarker);
861  columnCount++;
862 }
863 
864 void PedigreeDescription::AddCovariateColumn(const char * covariateName)
865 {
866  if (columns.Last() == pcEnd)
867  {
868  columns.Pop();
869  columnHash.Pop();
870  }
871 
872  columnHash.Push(GetCovariateID(covariateName));
873  columns.Push(pcCovariate);
874  columnCount++;
875 }
876 
877 void PedigreeDescription::AddTraitColumn(const char * traitName)
878 {
879  if (columns.Last() == pcEnd)
880  {
881  columns.Pop();
882  columnHash.Pop();
883  }
884 
885  columnHash.Push(GetCovariateID(traitName));
886  columns.Push(pcTrait);
887  columnCount++;
888 }
889 
890 void PedigreeDescription::AddAffectionColumn(const char * affectionName)
891 {
892  if (columns.Last() == pcEnd)
893  {
894  columns.Pop();
895  columnHash.Pop();
896  }
897 
898  columnHash.Push(GetAffectionID(affectionName));
899  columns.Push(pcAffection);
900  columnCount++;
901 }
902 
903 void PedigreeDescription::AddStringColumn(const char * stringName)
904 {
905  if (columns.Last() == pcEnd)
906  {
907  columns.Pop();
908  columnHash.Pop();
909  }
910 
911  columnHash.Push(GetStringID(stringName));
912  columns.Push(pcString);
913  columnCount++;
914 }
915 
916 void PedigreeDescription::AddZygosityColumn()
917 {
918  if (columns.Last() == pcEnd)
919  {
920  columns.Pop();
921  columnHash.Pop();
922  }
923 
924  columnHash.Push(0);
925  columns.Push(pcZygosity);
926  columnCount++;
927 }
928 
929 void PedigreeDescription::AddSkippedColumn()
930 {
931  if (columns.Last() == pcEnd)
932  {
933  columns.Pop();
934  columnHash.Pop();
935  }
936 
937  columnHash.Push(0);
938  columns.Push(pcSkip);
939  columnCount++;
940 }
941 
int ifeof(IFILE file)
Check to see if we have reached the EOF (returns 0 if not EOF).
Definition: InputFile.h:654
Class for easily reading/writing files without having to worry about file type (uncompressed, gzip, bgzf) when reading.
Definition: InputFile.h:36
IFILE ifopen(const char *filename, const char *mode, InputFile::ifileCompression compressionMode=InputFile::DEFAULT)
Open a file with the specified name and mode, using a filename of "-" to indicate stdin/stdout...
Definition: InputFile.h:562
void ifrewind(IFILE file)
Reset to the beginning of the file (cannot be done for stdin/stdout).
Definition: InputFile.h:642
int ifclose(IFILE &file)
Close the file.
Definition: InputFile.h:580