libStatGen Software 1
GlfRecord.cpp
1/*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include <stdlib.h>
19#include "GlfRecord.h"
20#include "GlfException.h"
21#include "StringBasics.h"
22
23std::string GlfRecord::REF_BASE_CHAR = "XACMGRSVTQYHKSVN";
24
26{
27 reset();
28}
29
30
32{
33 reset();
34}
35
36
37// Reset the record for a new entry, clearing out previous values.
39{
40 myRecTypeRefBase = 0;
41 myRec1Base.offset = 0;
42 myRec1Base.min_depth = 0;
43 myRec1Base.rmsMapQ = 0;
44 for(int i = 0; i < 10; i++)
45 {
46 myRec1Base.lk[i] = 0;
47 }
48
49 myRec2Base.offset = 0;
50 myRec2Base.min_depth = 0;
51 myRec2Base.rmsMapQ = 0;
52 myRec2Base.lkHom1 = 0;
53 myRec2Base.lkHom2 = 0;
54 myRec2Base.lkHet = 0;
55 myRec2Base.indelLen1 = 0;
56 myRec2Base.indelLen2 = 0;
57
58 myIndelSeq1.reset();
59 myIndelSeq2.reset();
60}
61
62
63// Read the record from the specified file. Assumes the file is in
64// the correct position for reading the record.
66{
67 // Read the record type and reference base.
68 int numRead = 0;
69 int byteLen = sizeof(uint8_t);
70 numRead = ifread(filePtr, &myRecTypeRefBase, byteLen);
71 if(numRead != byteLen)
72 {
73 String errorMsg = "Failed to read the record type & reference base (";
74 errorMsg += byteLen;
75 errorMsg += " bytes). Only read ";
76 errorMsg += numRead;
77 errorMsg += " bytes.";
78 std::string errorString = errorMsg.c_str();
79 throw(GlfException(GlfStatus::FAIL_IO, errorString));
80 return(false);
81 }
82
83 // TODO, split up by types of records...
84 switch(getRecordType())
85 {
86 case 0:
87 // Last record.
88 // Nothing more to read.
89 break;
90 case 1:
91 // Read type 1.
92 readType1(filePtr);
93 break;
94 case 2:
95 // Read type 2.
96 readType2(filePtr);
97 break;
98 default:
99 String errorMsg = "Failed to read the record: unknown type: ";
100 errorMsg += getRecordType();
101 std::string errorString = errorMsg.c_str();
102 throw(GlfException(GlfStatus::INVALID, errorString));
103 return(false);
104 break;
105 };
106
107 // Successfully read, return success.
108 return(true);
109}
110
111
112// Write the record to the specified file.
113bool GlfRecord::write(IFILE filePtr) const
114{
115 // TODO, split up by types of records...
116 switch(getRecordType())
117 {
118 case 0:
119 writeRtypeRef(filePtr);
120 break;
121 case 1:
122 // write type 1.
123 writeType1(filePtr);
124 break;
125 case 2:
126 // write type 2.
127 writeType2(filePtr);
128 break;
129 default:
130 // unknown type, return error.
131 String errorMsg = "Failed to write the record: unknown type: ";
132 errorMsg += getRecordType();
133 std::string errorString = errorMsg.c_str();
134 throw(GlfException(GlfStatus::INVALID, errorString));
135 return(false);
136 break;
137 };
138
139 return(true);
140}
141
142
144{
145 std::cout << "record_type: " << getRecordType()
146 << "; ref_base: " << getRefBase()
147 << "; ref_base_char: " << getRefBaseChar()
148 << "\n";
149
150 // TODO, split up by types of records...
151 switch(getRecordType())
152 {
153 case 0:
154 break;
155 case 1:
156 // print type 1.
157 std::cout << "\toffset: " << myRec1Base.offset
158 << "; min_lk: " << (myRec1Base.min_depth >> 24)
159 << "; read_depth: " << (myRec1Base.min_depth & 0xFFFFFF)
160 << "; rmsMapQ: " << (int)myRec1Base.rmsMapQ;
161 for(int i = 0; i < 10; ++i)
162 {
163 std::cout << "; lk[" << i << "]: " << (int)myRec1Base.lk[i];
164 }
165
166 std::cout << "\n";
167 break;
168 case 2:
169 // print type 2.
170 std::cout << "\toffset: " << myRec2Base.offset
171 << "; min_lk: " << (myRec2Base.min_depth >> 24)
172 << "; read_depth: " << (myRec2Base.min_depth & 0xFFFFF)
173 << "; rmsMapQ: " << (int)myRec2Base.rmsMapQ
174 << "; lkHom1: " << (int)myRec2Base.lkHom1
175 << "; lkHom2: " << (int)myRec2Base.lkHom2
176 << "; lkHet: " << (int)myRec2Base.lkHet
177 << "; indelLen1: " << myRec2Base.indelLen1
178 << "; indelLen2: " << myRec2Base.indelLen2
179 << "; myIndelSeq1: " << myIndelSeq1.c_str()
180 << "; myIndelSeq2: " << myIndelSeq2.c_str()
181 << "\n";
182 break;
183 default:
184 break;
185 };
186}
187
188bool GlfRecord::setRtypeRef(uint8_t rtypeRef)
189{
190 myRecTypeRefBase = rtypeRef;
191 return(true);
192}
193
194bool GlfRecord::setRecordType(uint8_t recType)
195{
196 myRecTypeRefBase =
197 (myRecTypeRefBase & REF_BASE_MASK) | (recType << REC_TYPE_SHIFT);
198 return(true);
199}
200
201bool GlfRecord::setRefBaseInt(uint8_t refBase)
202{
203 myRecTypeRefBase =
204 (myRecTypeRefBase & REC_TYPE_MASK) | (refBase & REF_BASE_MASK);
205 return(true);
206}
207
208// bool GlfRecord::setRefBaseChar(char refBase)
209// {
210
211// uint8_t refBaseInt = REF_BASE_CHAR_TO_INT[refBase];
212// return(setRefBaseInt(refBaseInt));
213// }
214
215bool GlfRecord::setOffset(uint32_t offset)
216{
217 myRec1Base.offset = offset;
218 myRec2Base.offset = offset;
219 return(true);
220}
221
222bool GlfRecord::setMinDepth(uint32_t minDepth)
223{
224 myRec1Base.min_depth = minDepth;
225 myRec2Base.min_depth = minDepth;
226 return(true);
227}
228
229bool GlfRecord::setMinLk(uint8_t minLk)
230{
231 setMinDepth((myRec1Base.min_depth & READ_DEPTH_MASK) |
232 (minLk << MIN_LK_SHIFT));
233 return(true);
234}
235
236bool GlfRecord::setReadDepth(uint32_t readDepth)
237{
238 setMinDepth((myRec1Base.min_depth & MIN_LK_MASK) |
239 (readDepth & READ_DEPTH_MASK));
240 return(true);
241}
242
243bool GlfRecord::setRmsMapQ(uint8_t rmsMapQ)
244{
245 myRec1Base.rmsMapQ = rmsMapQ;
246 myRec2Base.rmsMapQ = rmsMapQ;
247 return(true);
248}
249
250// Accessors to get the gneric values.
252{
253 int index = myRecTypeRefBase & REF_BASE_MASK;
254 if((index > REF_BASE_MAX) || (index < 0))
255 {
256 // TODO throw exception.
257 return('N');
258 }
259 return(REF_BASE_CHAR[index]);
260}
261
262
263uint32_t GlfRecord::getOffset() const
264{
265 if(getRecordType() == 1)
266 {
267 return(myRec1Base.offset);
268 }
269 else if(getRecordType() == 2)
270 {
271 return(myRec2Base.offset);
272 }
274 "Tried to call getOffset for Record not of type 1 or 2."));
275 return(0);
276}
277
279{
280 if(getRecordType() == 1)
281 {
282 return(myRec1Base.min_depth);
283 }
284 else if(getRecordType() == 2)
285 {
286 return(myRec2Base.min_depth);
287 }
289 "Tried to call getMinDepth for Record not of type 1 or 2."));
290 return(0);
291}
292
293uint8_t GlfRecord::getMinLk() const
294{
295 if(getRecordType() == 1)
296 {
297 return(myRec1Base.min_depth >> MIN_LK_SHIFT);
298 }
299 else if(getRecordType() == 2)
300 {
301 return(myRec2Base.min_depth >> MIN_LK_SHIFT);
302 }
304 "Tried to call getMinLk for Record not of type 1 or 2."));
305 return(0);
306}
307
309{
310 if(getRecordType() == 1)
311 {
312 return(myRec1Base.min_depth & READ_DEPTH_MASK);
313 }
314 else if(getRecordType() == 2)
315 {
316 return(myRec2Base.min_depth & READ_DEPTH_MASK);
317 }
319 "Tried to call getReadDepth for Record not of type 1 or 2."));
320 return(0);
321}
322
324{
325 if(getRecordType() == 1)
326 {
327 return(myRec1Base.rmsMapQ);
328 }
329 else if(getRecordType() == 2)
330 {
331 return(myRec2Base.rmsMapQ);
332 }
334 "Tried to call getRmsMapQ for Record not of type 1 or 2."));
335 return(0);
336}
337
338
339// Accessors for getting record type 1
340bool GlfRecord::setLk(int index, uint8_t value)
341{
342 if((index < 0) || (index >= NUM_REC1_LIKELIHOOD))
343 {
344 // Out of range.
346 "Trying to set Record Type 1 likelihood position< 0 or >= 10."));
347 return(false);
348 }
349
350 // In range.
351 myRec1Base.lk[index] = value;
352 return(true);
353}
354
355uint8_t GlfRecord::getLk(int index)
356{
357 if(getRecordType() != 1)
358 {
360 "Tried to call getLk for Record not of type 1."));
361 return(0);
362 }
363 if((index < 0) || (index >= NUM_REC1_LIKELIHOOD))
364 {
366 "Tried to call getLk for index < 0 or >= 10."));
367 return(0);
368 }
369 return(myRec1Base.lk[index]);
370}
371
372
373// Accessors for getting record type 2
374bool GlfRecord::setLkHom1(uint8_t lk)
375{
376 myRec2Base.lkHom1 = lk;
377 return(true);
378}
379
380bool GlfRecord::setLkHom2(uint8_t lk)
381{
382 myRec2Base.lkHom2 = lk;
383 return(true);
384}
385
386bool GlfRecord::setLkHet(uint8_t lk)
387{
388 myRec2Base.lkHet = lk;
389 return(true);
390}
391
392bool GlfRecord::setInsertionIndel1(const std::string& indelSeq)
393{
394 myRec2Base.indelLen1 = indelSeq.length();
395 myIndelSeq1 = indelSeq;
396 return(true);
397}
398
399bool GlfRecord::setDeletionIndel1(const std::string& indelSeq)
400{
401 myRec2Base.indelLen1 = 0 - (indelSeq.length());
402 myIndelSeq1 = indelSeq;
403 return(true);
404}
405
406bool GlfRecord::setInsertionIndel2(const std::string& indelSeq)
407{
408 myRec2Base.indelLen2 = indelSeq.length();
409 myIndelSeq2 = indelSeq;
410 return(true);
411}
412
413bool GlfRecord::setDeletionIndel2(const std::string& indelSeq)
414{
415 myRec2Base.indelLen2 = 0 - (indelSeq.length());
416 myIndelSeq2 = indelSeq;
417 return(true);
418}
419
421{
422 if(getRecordType() != 2)
423 {
425 "Tried to call getLkHom1 for Record not of type 2."));
426 return(0);
427 }
428 return(myRec2Base.lkHom1);
429}
430
432{
433 if(getRecordType() != 2)
434 {
436 "Tried to call getLkHom2 for Record not of type 2."));
437 return(0);
438 }
439 return(myRec2Base.lkHom2);
440}
441
443{
444 if(getRecordType() != 2)
445 {
447 "Tried to call getLkHet for Record not of type 2."));
448 return(0);
449 }
450 return(myRec2Base.lkHet);
451}
452
453int16_t GlfRecord::getIndel1(std::string& indelSeq)
454{
455 if(getRecordType() != 2)
456 {
458 "Tried to call getIndel1 for Record not of type 2."));
459 return(0);
460 }
461 indelSeq = myIndelSeq1.c_str();
462 return(myRec2Base.indelLen1);
463}
464
465int16_t GlfRecord::getIndel2(std::string& indelSeq)
466{
467 if(getRecordType() != 2)
468 {
470 "Tried to call getIndel2 for Record not of type 2."));
471 return(0);
472 }
473 indelSeq = myIndelSeq2.c_str();
474 return(myRec2Base.indelLen2);
475}
476
477
478void GlfRecord::readType1(IFILE filePtr)
479{
480 // Read record type 1 information.
481 int numRead = 0;
482 numRead = ifread(filePtr, &myRec1Base, REC1_BASE_SIZE);
483 if(numRead != REC1_BASE_SIZE)
484 {
485 String errorMsg = "Failed to read record of type 1 (";
486 errorMsg += REC1_BASE_SIZE;
487 errorMsg += " bytes). Only read ";
488 errorMsg += numRead;
489 errorMsg += " bytes.";
490 std::string errorString = errorMsg.c_str();
491 throw(GlfException(GlfStatus::FAIL_IO, errorString));
492 }
493
494 // Record type 1 is fixed size and has no additional variable length
495 // fields, so done reading.
496}
497
498
499void GlfRecord::readType2(IFILE filePtr)
500{
501 // Read record type 2 information.
502 int numRead = 0;
503 numRead = ifread(filePtr, &myRec2Base, REC2_BASE_SIZE);
504 if(numRead != REC2_BASE_SIZE)
505 {
506 String errorMsg = "Failed to read record of type 2 base info (";
507 errorMsg += REC2_BASE_SIZE;
508 errorMsg += " bytes). Only read ";
509 errorMsg += numRead;
510 errorMsg += " bytes.";
511 std::string errorString = errorMsg.c_str();
512 throw(GlfException(GlfStatus::FAIL_IO, errorString));
513 }
514
515 // Record type 2 has 2 additional variable length fields. Read those
516 // fields.
517 int16_t len = abs(myRec2Base.indelLen1);
518 numRead = myIndelSeq1.readFromFile(filePtr, len);
519 if(numRead != len)
520 {
521 String errorMsg = "Failed to read record of type 2, 1st indel sequence (";
522 errorMsg += len;
523 errorMsg += " bytes). Only read ";
524 errorMsg += numRead;
525 errorMsg += " bytes.";
526 std::string errorString = errorMsg.c_str();
527 throw(GlfException(GlfStatus::FAIL_IO, errorString));
528 }
529 len = abs(myRec2Base.indelLen2);
530 numRead = myIndelSeq2.readFromFile(filePtr, len);
531 if(numRead != len)
532 {
533 String errorMsg = "Failed to read record of type 2, 2nd indel sequence (";
534 errorMsg += len;
535 errorMsg += " bytes). Only read ";
536 errorMsg += numRead;
537 errorMsg += " bytes.";
538 std::string errorString = errorMsg.c_str();
539 throw(GlfException(GlfStatus::FAIL_IO, errorString));
540 }
541}
542
543
544void GlfRecord::writeRtypeRef(IFILE filePtr) const
545{
546 int byteLen = sizeof(myRecTypeRefBase);
547 int numWrite =
548 ifwrite(filePtr, &myRecTypeRefBase, byteLen);
549 if(numWrite != byteLen)
550 {
551 String errorMsg =
552 "Failed to write the length of the record type and reference base (";
553 errorMsg += byteLen;
554 errorMsg += " bytes). Only wrote ";
555 errorMsg += numWrite;
556 errorMsg += " bytes.";
557 std::string errorString = errorMsg.c_str();
558 throw(GlfException(GlfStatus::FAIL_IO, errorString));
559 }
560}
561
562
563void GlfRecord::writeType1(IFILE filePtr) const
564{
565 // Write the generic record field that all records have.
566 writeRtypeRef(filePtr);
567
568 // Record type 1 is fixed size and has no additional variable length
569 // fields, so just write the base info.
570 int numWrite = ifwrite(filePtr, &myRec1Base, REC1_BASE_SIZE);
571 if(numWrite != REC1_BASE_SIZE)
572 {
573 // failed to write.
574 String errorMsg = "Failed to write record of type 1 (";
575 errorMsg += REC1_BASE_SIZE;
576 errorMsg += " bytes). Only wrote ";
577 errorMsg += numWrite;
578 errorMsg += " bytes.";
579 std::string errorString = errorMsg.c_str();
580 throw(GlfException(GlfStatus::FAIL_IO, errorString));
581 }
582
583
584 // Done writing the record.
585}
586
587
588void GlfRecord::writeType2(IFILE filePtr) const
589{
590 // Write the generic record field that all records have.
591 writeRtypeRef(filePtr);
592
593 // Write the record type 2 base info.
594 int numWrite = ifwrite(filePtr, &myRec2Base, REC2_BASE_SIZE);
595 if(numWrite != REC2_BASE_SIZE)
596 {
597 // failed to write.
598 String errorMsg = "Failed to write record of type 2 base info (";
599 errorMsg += REC2_BASE_SIZE;
600 errorMsg += " bytes). Only wrote ";
601 errorMsg += numWrite;
602 errorMsg += " bytes.";
603 std::string errorString = errorMsg.c_str();
604 throw(GlfException(GlfStatus::FAIL_IO, errorString));
605 }
606
607 // Record type 2 has 2 additional variable length fields. Write those
608 // fields.
609 int len = myIndelSeq1.length();
610 numWrite = ifwrite(filePtr, myIndelSeq1.c_str(), len);
611 if(numWrite != len)
612 {
613 // failed to write.
614 String errorMsg = "Failed to write record of type 2, 1st indel sequence (";
615 errorMsg += len;
616 errorMsg += " bytes). Only wrote ";
617 errorMsg += numWrite;
618 errorMsg += " bytes.";
619 std::string errorString = errorMsg.c_str();
620 throw(GlfException(GlfStatus::FAIL_IO, errorString));
621 }
622 len = myIndelSeq2.length();
623 numWrite = ifwrite(filePtr, myIndelSeq2.c_str(), len);
624 if(numWrite != len)
625 {
626 // failed to write.
627 String errorMsg = "Failed to write record of type 2, 2nd indel sequence (";
628 errorMsg += len;
629 errorMsg += " bytes). Only wrote ";
630 errorMsg += numWrite;
631 errorMsg += " bytes.";
632 std::string errorString = errorMsg.c_str();
633 throw(GlfException(GlfStatus::FAIL_IO, errorString));
634 }
635
636 // Done writing the record.
637}
unsigned int ifread(IFILE file, void *buffer, unsigned int size)
Read up to size bytes from the file into the buffer.
Definition: InputFile.h:600
unsigned int ifwrite(IFILE file, const void *buffer, unsigned int size)
Write the specified number of bytes from the specified buffer into the file.
Definition: InputFile.h:669
GlfException objects should be thrown by functions that operate on Glf files for exceptions.
Definition: GlfException.h:28
bool setLk(int index, uint8_t value)
Set the likelihood for the specified genotype.
Definition: GlfRecord.cpp:340
void print() const
Print the reference section in a readable format.
Definition: GlfRecord.cpp:143
bool setDeletionIndel2(const std::string &indelSeq)
Set the sequence of the 2nd indel allele if the 2nd indel is an deletion.
Definition: GlfRecord.cpp:413
GlfRecord()
Constructor.
Definition: GlfRecord.cpp:25
int getRecordType() const
Return the record type.
Definition: GlfRecord.h:126
uint8_t getLkHet()
Return the likelihood of a heterozygote.
Definition: GlfRecord.cpp:442
bool write(IFILE filePtr) const
Write the record to the specified file.
Definition: GlfRecord.cpp:113
uint8_t getLkHom1()
Return the likelihood of the 1st homozygous indel allele.
Definition: GlfRecord.cpp:420
char getRefBaseChar() const
Return the reference base as a character.
Definition: GlfRecord.cpp:251
bool setLkHom1(uint8_t lk)
Set the likelihood of the first homozygous indel allele.
Definition: GlfRecord.cpp:374
~GlfRecord()
Destructor.
Definition: GlfRecord.cpp:31
int getRefBase() const
Return the reference base as an integer.
Definition: GlfRecord.h:134
bool setDeletionIndel1(const std::string &indelSeq)
Set the sequence of the first indel allele if the first indel is an deletion.
Definition: GlfRecord.cpp:399
bool setRmsMapQ(uint8_t rmsMapQ)
Set the RMS of mapping qualities of reads covering the site.
Definition: GlfRecord.cpp:243
uint8_t getRmsMapQ() const
Return the RMS of mapping qualities of reads covering the site.
Definition: GlfRecord.cpp:323
bool setInsertionIndel2(const std::string &indelSeq)
Set the sequence of the 2nd indel allele if the 2nd indel is an insertion.
Definition: GlfRecord.cpp:406
uint32_t getOffset() const
Return the offset from the precedent record.
Definition: GlfRecord.cpp:263
bool setRecordType(uint8_t recType)
Set the record type.
Definition: GlfRecord.cpp:194
uint32_t getMinDepth() const
Return the minimum likelihood and read depth.
Definition: GlfRecord.cpp:278
bool setRefBaseInt(uint8_t refBase)
Set the reference base from an integer value.
Definition: GlfRecord.cpp:201
bool setReadDepth(uint32_t readDepth)
Set the the read depth.
Definition: GlfRecord.cpp:236
bool setLkHet(uint8_t lk)
Set the likelihood of a heterozygote.
Definition: GlfRecord.cpp:386
uint8_t getMinLk() const
Return the minimum likelihood.
Definition: GlfRecord.cpp:293
bool setRtypeRef(uint8_t rtypeRef)
Set the record type and reference base.
Definition: GlfRecord.cpp:188
bool setLkHom2(uint8_t lk)
Set the likelihood of the 2nd homozygous indel allele.
Definition: GlfRecord.cpp:380
int16_t getIndel1(std::string &indelSeq)
Get the sequence and length (+:ins, -:del) of the 1st indel allele.
Definition: GlfRecord.cpp:453
int16_t getIndel2(std::string &indelSeq)
Get the sequence and length (+:ins, -:del) of the 2nd indel allele.
Definition: GlfRecord.cpp:465
bool read(IFILE filePtr)
Read the record from the specified file (file MUST be in the correct position for reading a record).
Definition: GlfRecord.cpp:65
uint32_t getReadDepth() const
Return the read depth.
Definition: GlfRecord.cpp:308
uint8_t getLk(int index)
Get the likelihood for the specified genotype index.
Definition: GlfRecord.cpp:355
bool setMinLk(uint8_t minLk)
Set the minimum likelihood.
Definition: GlfRecord.cpp:229
void reset()
Clear this record back to the default setting.
Definition: GlfRecord.cpp:38
uint8_t getLkHom2()
Return the likelihood of the 2nd homozygous indel allele.
Definition: GlfRecord.cpp:431
bool setOffset(uint32_t offset)
Set the offset from the precedent record.
Definition: GlfRecord.cpp:215
bool setInsertionIndel1(const std::string &indelSeq)
Set the sequence of the first indel allele if the first indel is an insertion.
Definition: GlfRecord.cpp:392
bool setMinDepth(uint32_t minDepth)
Set the minimum likelihood and the read depth.
Definition: GlfRecord.cpp:222
@ UNKNOWN
unknown result (default value should never be used)
Definition: GlfStatus.h:33
@ INVALID
invalid.
Definition: GlfStatus.h:39
@ FAIL_IO
method failed due to an I/O issue.
Definition: GlfStatus.h:34
Class for easily reading/writing files without having to worry about file type (uncompressed,...
Definition: InputFile.h:37