libStatGen Software 1
BamInterface.cpp
1/*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include "BamInterface.h"
19#include "CharBuffer.h"
20
21BamInterface::BamInterface()
22{
23}
24
25
26BamInterface::~BamInterface()
27{
28}
29
30
31// Read a BAM file's header.
32bool BamInterface::readHeader(IFILE filePtr, SamFileHeader& header,
33 SamStatus& status)
34{
35 if(filePtr == NULL)
36 {
37 // File is not open, return false.
39 "Cannot read header since the file pointer is null");
40 return(false);
41 }
42 if(filePtr->isOpen() == false)
43 {
45 "Cannot read header since the file is not open");
46 return(false);
47 }
48
49 // Clear the passed in header.
50 header.resetHeader();
51
52 int32_t headerLength;
53 int readSize = ifread(filePtr, &headerLength, sizeof(headerLength));
54
55 if(readSize != sizeof(headerLength))
56 {
57 String errMsg = "Failed to read the BAM header length, read ";
58 errMsg += readSize;
59 errMsg += " bytes instead of ";
60 errMsg += (unsigned int)sizeof(headerLength);
61 status.setStatus(SamStatus::FAIL_IO, errMsg.c_str());
62 return(false);
63 }
64
65 String headerStr;
66 if(headerLength > 0)
67 {
68 // Read the header.
69 readSize =
70 ifread(filePtr, headerStr.LockBuffer(headerLength + 1), headerLength);
71 headerStr[headerLength] = 0;
72 headerStr.UnlockBuffer();
73 if(readSize != headerLength)
74 {
75 // Failed to read the header.
76 status.setStatus(SamStatus::FAIL_IO, "Failed to read the BAM header.");
77 return(false);
78 }
79 }
80
81 // Parse the header that was read.
82 if(!header.addHeader(headerStr))
83 {
84 // Status is set in the method on failure.
86 return(false);
87 }
88
89 int referenceCount;
90 // Read the number of references sequences.
91 ifread(filePtr, &referenceCount, sizeof(int));
92
93 // Get and clear the reference info so it can be set
94 // from the bam reference table.
95 SamReferenceInfo& refInfo =
96 header.getReferenceInfoForBamInterface();
97 refInfo.clear();
98
99 CharBuffer refName;
100
101 // Read each reference sequence
102 for (int i = 0; i < referenceCount; i++)
103 {
104 int nameLength;
105 int rc;
106 // Read the length of the reference name.
107 rc = ifread(filePtr, &nameLength, sizeof(int));
108 if(rc != sizeof(int))
109 {
111 "Failed to read the BAM reference dictionary.");
112 return(false);
113 }
114
115 // Read the name.
116 refName.readFromFile(filePtr, nameLength);
117
118 // Read the length of the reference sequence.
119 int32_t refLen;
120 rc = ifread(filePtr, &refLen, sizeof(int));
121
122 if(rc != sizeof(int)) {
124 "Failed to read the BAM reference dictionary.");
125 return(false);
126 }
127
128 refInfo.add(refName.c_str(), refLen);
129 }
130
131 // Successfully read the file.
132 return(true);
133}
134
135
136bool BamInterface::writeHeader(IFILE filePtr, SamFileHeader& header,
137 SamStatus& status)
138{
139 if((filePtr == NULL) || (filePtr->isOpen() == false))
140 {
141 // File is not open, return false.
143 "Cannot write header since the file pointer is null");
144 return(false);
145 }
146
147 char magic[4];
148 magic[0] = 'B';
149 magic[1] = 'A';
150 magic[2] = 'M';
151 magic[3] = 1;
152
153 // Write magic to the file.
154 ifwrite(filePtr, magic, 4);
155
156 ////////////////////////////////
157 // Write the header to the file.
158 ////////////////////////////////
159 // Construct a string containing the entire header.
160 std::string headerString = "";
161 header.getHeaderString(headerString);
162
163 int32_t headerLen = headerString.length();
164 int numWrite = 0;
165
166 // Write the header length.
167 numWrite = ifwrite(filePtr, &headerLen, sizeof(int32_t));
168 if(numWrite != sizeof(int32_t))
169 {
171 "Failed to write the BAM header length.");
172 return(false);
173 }
174
175 // Write the header to the file.
176 numWrite = ifwrite(filePtr, headerString.c_str(), headerLen);
177 if(numWrite != headerLen)
178 {
180 "Failed to write the BAM header.");
181 return(false);
182 }
183
184 ////////////////////////////////////////////////////////
185 // Write the Reference Information.
186 const SamReferenceInfo& refInfo = header.getReferenceInfo();
187
188 // Get the number of sequences.
189 int32_t numSeq = refInfo.getNumEntries();
190 ifwrite(filePtr, &numSeq, sizeof(int32_t));
191
192 // Write each reference sequence
193 for (int i = 0; i < numSeq; i++)
194 {
195 const char* refName = refInfo.getReferenceName(i);
196 // Add one for the null value.
197 int32_t nameLength = strlen(refName) + 1;
198 // Write the length of the reference name.
199 ifwrite(filePtr, &nameLength, sizeof(int32_t));
200
201 // Write the name.
202 ifwrite(filePtr, refName, nameLength);
203 // Write the length of the reference sequence.
204 int32_t refLen = refInfo.getReferenceLength(i);
205 ifwrite(filePtr, &refLen, sizeof(int32_t));
206 }
207
208 return(true);
209}
210
211
212void BamInterface::readRecord(IFILE filePtr, SamFileHeader& header,
213 SamRecord& record,
214 SamStatus& samStatus)
215{
216 // TODO - need to validate there are @SQ lines in both sam/bam - MAYBE!
217
218 // SetBufferFromFile will reset the record prior to reading a new one.
219 if(record.setBufferFromFile(filePtr, header) != SamStatus::SUCCESS)
220 {
221 // Failed, so add the error message.
222 samStatus.addError(record.getStatus());
223 }
224}
225
226SamStatus::Status BamInterface::writeRecord(IFILE filePtr,
227 SamFileHeader& header,
228 SamRecord& record,
230{
231 // Write the file, returning the status.
232 return(record.writeRecordBuffer(filePtr, translation));
233}
234
235
unsigned int ifread(IFILE file, void *buffer, unsigned int size)
Read up to size bytes from the file into the buffer.
Definition: InputFile.h:600
unsigned int ifwrite(IFILE file, const void *buffer, unsigned int size)
Write the specified number of bytes from the specified buffer into the file.
Definition: InputFile.h:669
Class for easily reading/writing files without having to worry about file type (uncompressed,...
Definition: InputFile.h:37
bool isOpen() const
Returns whether or not the file was successfully opened.
Definition: InputFile.h:423
This class allows a user to get/set the fields in a SAM/BAM Header.
Definition: SamFileHeader.h:35
const char * getErrorMessage()
Get the failure message if a method returned failure.
const SamReferenceInfo & getReferenceInfo() const
Get the Reference Information.
bool getHeaderString(std::string &header) const
Set the passed in string to the entire header string, clearing its current contents.
void resetHeader()
Initialize the header.
bool addHeader(const char *header)
Add a header that is already preformatted in a const char*.
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
Definition: SamRecord.h:52
SequenceTranslation
Enum containing the settings on how to translate the sequence if a reference is available.
Definition: SamRecord.h:57
SamStatus::Status writeRecordBuffer(IFILE filePtr)
Write the record as a BAM into the specified already opened file.
Definition: SamRecord.cpp:1237
SamStatus::Status setBufferFromFile(IFILE filePtr, SamFileHeader &header)
Read the BAM record from a file.
Definition: SamRecord.cpp:558
const SamStatus & getStatus()
Returns the status associated with the last method that sets the status.
Definition: SamRecord.cpp:2403
Class for tracking the reference information mapping between the reference ids and the reference name...
void clear()
Reset this reference info.
int32_t getNumEntries() const
Get the number of entries contained here.
const char * getReferenceName(int index) const
Return the reference name at the specified index, returning "" if the index is out of bounds.
void add(const char *referenceSequenceName, int32_t referenceSequenceLength)
Add reference sequence name and reference sequence length.
int32_t getReferenceLength(int index) const
Return the reference length at the specified index, returning 0 if the index is out of bounds.
This class is used to track the status results of some methods in the BAM classes.
Definition: StatGenStatus.h:27
Status
Return value enum for StatGenFile methods.
Definition: StatGenStatus.h:32
@ SUCCESS
method completed successfully.
Definition: StatGenStatus.h:32
@ FAIL_IO
method failed due to an I/O issue.
Definition: StatGenStatus.h:37
@ FAIL_PARSE
failed to parse a record/header - invalid format.
Definition: StatGenStatus.h:42
@ FAIL_ORDER
FAIL_ORDER: method failed because it was called out of order, like trying to read a file without open...
Definition: StatGenStatus.h:41
void setStatus(Status newStatus, const char *newMessage)
Set the status with the specified status enum and message.
void addError(Status newStatus, const char *newMessage)
Add the specified error message to the status message, setting the status to newStatus if the current...