21 zfits(
const std::string& fname,
const std::string& tableName=
"",
bool force=
false)
30 zfits(
const std::string& fname,
const std::string& fout,
const std::string& tableName,
bool force=
false)
53 const bool rawsum =
GetStr(
"RAWSUM") == std::to_string((
long long int)
fRawsum.
val());
101 clear(rdstate()|std::ios::badbit);
103 throw std::runtime_error(
"Only the FACT compression scheme is handled by this reader.");
105 gLog <<
___err___ <<
"ERROR - Only the FACT compression scheme is handled by this reader." << std::endl;
132 memcpy(dest, src, c.num*c.size);
150 std::vector<std::vector<std::pair<int64_t, int64_t>>>
fCatalog;
168 if (buffer_size % 4 != 0)
169 buffer_size += 4 - (buffer_size%4);
171 if (compressed_buffer_size % 4 != 0)
172 compressed_buffer_size += 4 - (compressed_buffer_size%4);
174 fBuffer.resize(buffer_size);
176 fTransposedBuffer.resize(buffer_size);
177 fCompressedBuffer.resize(compressed_buffer_size);
183 std::vector<char> readBuf(16);
184 fCatalog.resize(fNumTiles);
186 const streampos catalogStart = tellg();
194 read(readBuf.data(), 2*
sizeof(int64_t));
197 int64_t tempValues[2] = {0,0};
198 revcpy<8>(
reinterpret_cast<char*
>(tempValues), readBuf.data(), 2);
199 if (tempValues[0] < 0 || tempValues[1] < 0)
201 clear(rdstate()|std::ios::badbit);
203 throw std::runtime_error(
"Negative value in the catalog");
205 gLog <<
___err___ <<
"ERROR - negative value in the catalog" << std::endl;
210 fCatalog[
i].emplace_back(tempValues[0], tempValues[1]);
220 if (fNumRowsPerTile%fShrinkFactor)
222 clear(rdstate()|std::ios::badbit);
224 throw std::runtime_error(
"Rows per tile and shrink factor do not match");
226 gLog <<
___err___ <<
"ERROR - Rows per tile and shrink factor do not match" << std::endl;
235 fTileSize.resize(fNumTiles);
236 fTileOffsets.resize(fNumTiles);
242 fTileSize[
i] += fCatalog[
i][j].first;
243 fTileOffsets[
i].emplace_back(fCatalog[
i][j].
second - fCatalog[
i][0].
second);
247 if (!
fCopy.is_open())
255 std::vector<char> buf(catSize);
256 read(buf.data(), catSize);
258 fCopy.write(buf.data(), catSize);
260 clear(rdstate()|std::ios::badbit);
279 if (!fCatalogInitialized)
286 const int64_t requestedSuperTile = requestedTile /
fShrinkFactor;
289 const int64_t requestedSubTile = requestedTile %
fShrinkFactor;
293 const bool isFirstTile = fCurrentRow<0;
296 const bool isNextTile = requestedTile==currentTile+1 || isFirstTile;
298 fCurrentRow = rowNum;
301 if (requestedTile!=currentTile || isFirstTile)
304 const int64_t superTileStart = fCatalog[requestedSuperTile][0].second -
sizeof(
FITS::TileHeader);
306 std::vector<size_t> offsets = fTileOffsets[requestedSuperTile];
311 if (!isNextTile || isFirstTile)
314 seekg(fHeapOff+superTileStart);
317 for (uint32_t k=0; k<requestedSubTile; k++)
329 const int64_t subTileStart = tellg() -
fHeapOff;
334 char *destBuffer = fCompressedBuffer.data()+offset;
337 size_t currentTileSize = 0;
341 if (requestedSubTile>0)
361 offsets[
i+1] = offsets[
i];
374 read(destBuffer, currentTileSize);
382 memset(fCompressedBuffer.data(), 0, offset);
383 memset(destBuffer+currentTileSize, 0, fCompressedBuffer.size()-currentTileSize-offset);
388 if (isNextTile &&
fCopy.is_open() &&
fCopy.good())
390 fCopy.write(fCompressedBuffer.data()+offset, currentTileSize);
392 clear(rdstate()|std::ios::badbit);
396 clear(rdstate()|std::ios::badbit);
405 const char *src = fTransposedBuffer.data();
410 char *
buffer = fBuffer.data() + it->offset;
412 switch (fColumnOrdering[i])
418 memcpy(dest, src, it->bytes);
425 for (
char *elem=buffer; elem<buffer+it->bytes; elem+=it->size)
429 memcpy(dest, src, it->size);
436 clear(rdstate()|std::ios::badbit);
438 std::ostringstream
str;
439 str <<
"Unkown column ordering scheme found (i=" << i <<
", " << fColumnOrdering[
i] <<
")";
441 throw std::runtime_error(str.str());
459 uint32_t sizeOfElems)
461 memcpy(dest, src, numElems*sizeOfElems);
462 return numElems*sizeOfElems;
470 std::vector<uint16_t> uncompressed;
473 const uint32_t* compressedSizes =
reinterpret_cast<const uint32_t*
>(src);
474 src +=
sizeof(uint32_t)*numChunks;
477 uint32_t sizeWritten = 0;
478 for (uint32_t j=0;j<numChunks;j++)
480 Huffman::Decode(reinterpret_cast<const unsigned char*>(src), compressedSizes[j], uncompressed);
482 memcpy(dest, uncompressed.data(), uncompressed.size()*
sizeof(uint16_t));
484 sizeWritten += uncompressed.size()*
sizeof(uint16_t);
485 dest += uncompressed.size()*
sizeof(uint16_t);
486 src += compressedSizes[j];
496 for (uint32_t j=2;j<numElems;j++)
497 data[j] = data[j] + (data[j-1]+data[j-2])/2;
499 return numElems*
sizeof(uint16_t);
504 const uint32_t &thisRoundNumRows,
505 const uint32_t offset)
507 char *dest = fTransposedBuffer.data();
517 const int64_t compressedOffset = offsets[
i]+offset;
528 for (int32_t j=head->
numProcs-1;j >= 0; j--)
530 uint32_t sizeWritten=0;
539 sizeWritten =
UnApplySMOOTHING(reinterpret_cast<int16_t*>(dest), numRows*numCols);
547 clear(rdstate()|std::ios::badbit);
549 std::ostringstream
str;
550 str <<
"Unknown processing applied to data (col=" <<
i <<
", proc=" << j <<
"/" << (int)head->
numProcs;
552 throw std::runtime_error(str.str());
570 const streamoff whereAreWe = tellg();
579 std::vector<std::vector<std::pair<int64_t, int64_t> > > catalog;
584 streamoff offsetInHeap = 0;
594 if (memcmp(tileHead.
id,
"TILE", 4))
596 clear(rdstate()|std::ios::badbit);
601 catalog.emplace_back();
605 for (
size_t i=0;
i<numCols;
i++)
610 catalog.back().emplace_back(0,0);
621 catalog.back().emplace_back((int64_t)(columnHead.
size),offsetInHeap);
622 offsetInHeap += columnHead.
size;
623 seekg(fHeapOff+offsetInHeap);
638 clear(rdstate()|std::ios::badbit);
639 std::ostringstream
str;
640 str <<
"Heap data does not agree with header: " << numRows <<
" calculated vs " <<
fTable.
num_rows <<
" from header.";
642 throw std::runtime_error(str.str());
658 if (catalog.size() != fCatalog.size())
660 clear(rdstate()|std::ios::badbit);
662 throw std::runtime_error(
"Heap data does not agree with header.");
664 gLog <<
___err___ <<
"ERROR - Heap data does not agree with header." << std::endl;
669 for (uint32_t
i=0;
i<catalog.size();
i++)
670 for (uint32_t j=0;j<numCols;j++)
675 clear(rdstate()|std::ios::badbit);
677 throw std::runtime_error(
"Heap data does not agree with header.");
679 gLog <<
___err___ <<
"ERROR - Heap data does not agree with header." << std::endl;
size_t fNumRowsPerTile
Number of rows per compressed tile.
zfits(const std::string &fname, const std::string &tableName="", bool force=false)
std::vector< char > fTransposedBuffer
intermediate buffer to transpose the rows
size_t GetBytesPerRow() const
virtual void MoveColumnDataToUserSpace(char *dest, const char *src, const Table::Column &c)
bool HasKey(const std::string &key) const
std::string GetStr(const std::string &key) const
void MoveColumnDataToUserSpace(char *dest, const char *src, const Table::Column &c)
uint64_t GetUInt(const std::string &key) const
streamoff fHeapOff
offset from the beginning of the file of the binary data
uint32_t UncompressHUFFMAN16(char *dest, const char *src, uint32_t numChunks)
int64_t second
offset of this column in the tile, from the start of the heap area
uint32_t UncompressUNCOMPRESSED(char *dest, const char *src, uint32_t numElems, uint32_t sizeOfElems)
int64_t Decode(const uint8_t *bufin, size_t bufinlen, std::vector< uint16_t > &pbufout)
size_t GetNumRows() const
int64_t first
Size of this column in the tile.
SortedColumns sorted_cols
void InitCompressionReading()
BlockHeader(uint64_t s=0, char o=kOrderByRow, unsigned char n=1)
T Get(const std::string &key) const
std::vector< char > fColumnOrdering
ordering of the column's rows. Can change from tile to tile.
virtual bool SkipNextRow()
std::vector< char > fCompressedBuffer
compressed rows
int64_t GetInt(const std::string &key) const
std::vector< std::vector< std::pair< int64_t, int64_t > > > fCatalog
Catalog, i.e. the main table that points to the compressed data.
void open(const char *name)
streamoff GetHeapShift() const
std::vector< size_t > fTileSize
size in bytes of each compressed tile
bool UncompressBuffer(const std::vector< size_t > &offsets, const uint32_t &thisRoundNumRows, const uint32_t offset)
std::vector< std::vector< size_t > > fTileOffsets
offset from start of tile of a given compressed column
virtual void WriteRowToCopyFile(size_t row)
virtual bool IsFileOk() const
std::vector< char > fBufferRow
size_t fNumTiles
Total number of tiles.
void CheckIfFileIsConsistent(bool update_catalog=false)
virtual void StageRow(size_t row, char *dest)
Checksum fRawsum
Checksum of the uncompressed, raw data.
bool ReadBinaryRow(const size_t &rowNum, char *bufferToRead)
uint32_t UnApplySMOOTHING(int16_t *data, uint32_t numElems)
virtual bool IsFileOk() const
virtual void StageRow(size_t row, char *dest)
int64_t fCurrentRow
current row in memory signed because we need -1
streamoff fHeapFromDataStart
offset from the beginning of the data table
bool add(const char *buf, size_t len, bool big_endian=true)
size_t fShrinkFactor
shrink factor
std::vector< char > fBuffer
store the uncompressed rows
void Constructor(const std::string &fname, std::string fout="", const std::string &tableName="", bool force=false)
zfits(const std::string &fname, const std::string &fout, const std::string &tableName, bool force=false)