libStatGen Software 1
PedigreeTrim.cpp
1/*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include "Pedigree.h"
19
20void Pedigree::ShowTrimHeader(bool & flag)
21{
22 if (flag)
23 {
24 printf("Trimming uninformative individuals...\n");
25 flag = false;
26 }
27}
28
29void Pedigree::Trim(bool quiet, int * informative)
30{
31 int newCount = 0;
32 Person ** newPersons = new Person * [count];
33
34 // This function applies the following filters to reduce complexity
35 // of pedigree
36 //
37 // RULE 1: Remove all pedigrees no genotype or phenotype data
38 // RULE 2: Remove leaf individuals with no data
39 // RULE 3: Remove founder couples with <2 offspring and no data
40
41 bool showHeader = true;
42 IntArray discardable, offspring, mates, haveData;
43
44 for (int f = 0; f < familyCount; f++)
45 {
46 Family * fam = families[f];
47
48 // Cache for storing indicators about whether each family member is
49 // informative
50 haveData.Dimension(fam->count);
51
52 // Check that some data is available in the family
53 int hasData = false;
54 for (int i = fam->first; i <= fam->last; i++)
55 if (informative == NULL)
56 hasData |= haveData[persons[i]->traverse] = persons[i]->haveData();
57 else
58 hasData |= haveData[persons[i]->traverse] = informative[i];
59
60 if (!hasData)
61 {
62 if (!quiet)
63 {
64 ShowTrimHeader(showHeader);
65 printf(" Removing family %s: No data\n", (const char *) fam->famid);
66 }
67
68 for (int i = fam->first; i <= fam->last; i++)
69 delete persons[i];
70
71 continue;
72 }
73
74 // Assume that we need everyone in the family
75 discardable.Dimension(fam->count);
76 discardable.Set(0);
77
78 bool trimming = true;
79
80 while (trimming)
81 {
82 trimming = false;
83
84 // Tally the number of offspring for each individual
85 offspring.Dimension(fam->count);
86 offspring.Zero();
87
88 // Tally the number of mates for each individual
89 mates.Dimension(fam->count);
90 mates.Set(-1);
91
92 // In the first round, we count the number of offspring
93 // for each individual in the current trimmed version of the
94 // pedigree
95 for (int i = fam->count - 1; i >= fam->founders; i--)
96 {
97 if (discardable[i]) continue;
98
99 Person & p = *(persons[fam->path[i]]);
100
101 if (discardable[p.father->traverse])
102 continue;
103
104 if (offspring[i] == 0 && !haveData[p.traverse])
105 {
106 trimming = true;
107 discardable[i] = true;
108 continue;
109 }
110
111 int father = p.father->traverse;
112 int mother = p.mother->traverse;
113
114 if (mates[father] == -1 && mates[mother] == -1)
115 {
116 mates[father] = mother,
117 mates[mother] = father;
118 }
119 else if (mates[father] != mother)
120 {
121 if (mates[father] >= 0)
122 mates[mates[father]] = -2;
123
124 if (mates[mother] >= 0)
125 mates[mates[mother]] = -2;
126
127 mates[mother] = -2;
128 mates[father] = -2;
129 }
130
131 offspring[father]++;
132 offspring[mother]++;
133 }
134
135 // In the second pass, we remove individuals with no
136 // data who are founders with a single offspring (and
137 // no multiple matings) or who have no descendants
138 for (int i = fam->count - 1; i >= 0; i--)
139 {
140 if (discardable[i]) continue;
141
142 Person & p = *(persons[fam->path[i]]);
143
144 if (p.isFounder() || discardable[p.father->traverse])
145 {
146 if (mates[i] == -2 ||
147 offspring[i] > 1 ||
148 (mates[i] >= fam->founders &&
149 !discardable[persons[fam->path[mates[i]]]->father->traverse]) ||
150 haveData[p.traverse] ||
151 (mates[i] != -1 && haveData[mates[i]]))
152 continue;
153
154 trimming = true;
155 discardable[i] = true;
156 continue;
157 }
158 }
159 }
160
161 for (int i = fam->count - 1; i >= 0; i--)
162 if (discardable[i])
163 {
164 if (!quiet)
165 {
166 ShowTrimHeader(showHeader);
167 printf(" Removing person %s->%s: No data\n",
168 (const char *) fam->famid,
169 (const char *) persons[fam->path[i]]->pid);
170 }
171 delete persons[fam->path[i]];
172 }
173 else
174 newPersons[newCount++] = persons[fam->path[i]];
175 }
176
177 if (!showHeader)
178 printf("\n");
179
180 delete [] persons;
181
182 persons = newPersons;
183 count = newCount;
184 Sort();
185}
186
187