libStatGen Software 1
glfHandler.h
1/*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#ifndef __GLF_HANDLER_H__
19#define __GLF_HANDLER_H__
20
21#include "InputFile.h"
22#include "StringBasics.h"
23
24#if defined(__APPLE__)
25// #pragma warn "Caution, glfHandler.h is non-portable"
26#else
27#pragma pack(push)
28#pragma pack(1)
29#endif
30
32{
33 // Likelihood for the 1/1, 2/2 and 1/2
34 unsigned char lk[3];
35
36 // Allele lengths
37 short length[2];
38
39 unsigned char padding[3];
40};
41
43{
44 /** "XACMGRSVTWYHKDBN"[ref_base] gives the reference base */
45 unsigned char refBase:4, recordType:4;
46
47 /** offset of this record from the previous one, in bases */
48 unsigned int offset;
49
50 /** log10 minimum likelihood * 10 and the number of mapped reads */
51 unsigned depth:24, minLLK:8;
52
53 /** root mean squared maximum mapping quality for overlapping reads */
54 unsigned char mapQuality;
55
56 union
57 {
58 /** log10 likelihood ratio * 10 for genotypes AA, AC, AG, AT, CC, CG, CT, GG, GT, TT */
59 unsigned char lk[10];
60 glfIndel indel;
61 };
62
63 glfEntry & operator = (glfEntry & rhs);
64};
65
66#if defined(__APPLE__)
67// #pragma warn "Caution, glfHandler.h is non-portable"
68#else
69#pragma pack(pop)
70#endif
71
73{
74public:
75 // Global information about the current GLF file
76 bool isStub;
77 IFILE handle;
78 String header;
79
80 // Information about the current section
81 String label;
82 int sections;
83 int currentSection;
84 int maxPosition;
85
86 // Information on whether the end of the current section has been reached
87 bool endOfSection;
88
89 // Currently active GLF record
90 glfEntry data;
91 int position;
92 double likelihoods[10];
93 String indelSequence[2];
94
95 // Error message in case previous command fails
96 const char * errorMsg;
97
98 glfHandler();
100
101 bool Open(const String & filename);
102 void OpenStub();
103 bool Create(const String & filename);
104 bool isOpen();
105 void Close();
106 void Rewind();
107
108 bool NextSection();
109 bool NextEntry();
110 bool NextBaseEntry();
111
112 void BeginSection(const String & sectionLabel, int sectionLength);
113 void EndSection();
114
115 void WriteEntry(int outputPosition);
116
117 char GetReference(int position, char defaultBase);
118 int GetDepth(int position);
119 const double * GetLikelihoods(int position);
120 const unsigned char * GetLogLikelihoods(int position);
121 int GetMapQuality(int position);
122
123 static const double * GetDefaultLikelihoods()
124 {
125 return nullLikelihoods;
126 }
127 static const unsigned char * GetDefaultLogLikelihoods()
128 {
129 return nullLogLikelihoods;
130 }
131
132 static int GenotypeIndex(int base1, int base2)
133 {
134 return base1 < base2 ? (base1 - 1) *(10 - base1) / 2 + (base2 - base1) :
135 (base2 - 1) *(10 - base2) / 2 + (base1 - base2);
136 }
137
138private:
139 static char translateBase[16];
140 static char backTranslateBase[5];
141 static double nullLikelihoods[10];
142 static unsigned char nullLogLikelihoods[10];
143
144 bool ReadHeader();
145 void WriteHeader(const String & headerText = "");
146};
147
148#endif
149
Class for easily reading/writing files without having to worry about file type (uncompressed,...
Definition: InputFile.h:37
unsigned int offset
offset of this record from the previous one, in bases
Definition: glfHandler.h:48
unsigned char mapQuality
root mean squared maximum mapping quality for overlapping reads
Definition: glfHandler.h:54
unsigned depth
log10 minimum likelihood * 10 and the number of mapped reads
Definition: glfHandler.h:51
unsigned char lk[10]
log10 likelihood ratio * 10 for genotypes AA, AC, AG, AT, CC, CG, CT, GG, GT, TT
Definition: glfHandler.h:59
unsigned char refBase
"XACMGRSVTWYHKDBN"[ref_base] gives the reference base
Definition: glfHandler.h:45