FACT++  1.0
int Compress ( const string &  ifile,
const string &  ofile 
)

Definition at line 66 of file zfits.cc.

References Huffman::Encode(), fits::GetColumns(), fits::GetNextRow(), fits::GetNumRows(), fits::GetRow(), if(), fits::SetPtrAddress(), start(), and Time::UnixTime().

Referenced by main().

67 {
68  // when to print some info on the screen (every f percent)
69  float frac = 0.01;
70 
71  // open a fits file
72  fits f(ifile);
73 
74  // open output file
75  ofstream fout(ofile);
76 
77  // counters for total size and compressed size
78  uint64_t tot = 0;
79  uint64_t com = 0;
80 
81  // very simple timer
82  double sec = 0;
83 
84  // Produce a lookup table with all informations about the
85  // columns in the same order as they are in the file
86  const fits::Table::Columns &cols= f.GetColumns();
87 
88  struct col_t : fits::Table::Column
89  {
90  string name;
91  void *ptr;
92  };
93 
94 
95  map<size_t, col_t> columns;
96 
97  size_t row_tot = 0;
98  for (auto it=cols.begin(); it!=cols.end(); it++)
99  {
100  col_t c;
101 
102  c.offset = it->second.offset;
103  c.size = it->second.size;
104  c.num = it->second.num;
105  c.name = it->first;
106  c.ptr = f.SetPtrAddress(it->first);
107 
108  columns[c.offset] = c;
109 
110  row_tot += c.size*c.num;
111  }
112 
113  // copy the header from the input to the output file
114  // and prefix the output file as a compressed fits file
115  string header;
116  header.resize(f.tellg());
117 
118  f.seekg(0);
119  f.read((char*)header.c_str(), header.size());
120 
121  char m[2];
122  m[0] = 'z'+128;
123  m[1] = 'f'+128;
124 
125  const size_t hlen = 0;
126 
127  size_t hs = header.size();
128 
129  fout.write(m, 2); // magic number
130  fout.write((char*)&hlen, sizeof(size_t)); // length of possible header data (e.g. file version, compression algorithm)
131  fout.write((char*)&hs, sizeof(size_t)); // size of FITS header
132  fout.write(header.c_str(), header.size()); // uncompressed FITS header
133 
134  tot += header.size();
135  com += header.size()+2+2*sizeof(size_t);
136 
137  cout << fixed;
138 
139  Time start;
140 
141  // loop over all rows
142  vector<char> cache(row_tot);
143  while (f.GetNextRow())
144  {
145  // pointer to the start of the cache for the data of one row
146  char *out = cache.data();
147 
148  // mask stroing which column have been compressed and which not
149  vector<uint8_t> mask(cols.size()/8 + 1);
150 
151  // loop over all columns
152  uint32_t icol = 0;
153  for (auto it=columns.begin(); it!=columns.end(); it++, icol++)
154  {
155  // size of cell in bytes
156  const size_t len_col = it->second.size * it->second.num;
157 
158  // get pointer to data
159  int16_t *ptr = (int16_t*)it->second.ptr;
160 
161  // If the column is the data, preprocess the data
162  /*
163  if (it->second.name=="Data")
164  {
165  int16_t *end = ptr+1440*300-4-(1440*300)%2;
166  int16_t *beg = ptr;
167 
168  while (end>=beg)
169  {
170  const int16_t avg = (end[0] + end[1])/2;
171  end[2] -= avg;
172  end[3] -= avg;
173  end -=2;
174  }
175  }*/
176 
177  // do not try to compress less than 32bytes
178  if (len_col>32 && it->second.size==2)
179  {
180  Time now;
181 
182  // perform 16bit hoffman (option for 8bit missing, skip 64bit)
183  // (what to do with floats?)
184  string buf;
185  /*int len =*/ Huffman::Encode(buf, (uint16_t*)ptr, len_col/2);
186 
187  sec += Time().UnixTime()-now.UnixTime();
188 
189  // check if data was really compressed
190  if (buf.size()<len_col)
191  {
192  // copy compressed data into output cache
193  memcpy(out, buf.c_str(), buf.size());
194  out += buf.size();
195 
196  // update mask
197  const uint64_t bit = (icol%8);
198  mask[icol/8] |= (1<<bit);
199 
200  continue;
201  }
202  }
203 
204  // just copy the data if it has not been compressed
205  memcpy(out, (char*)ptr, len_col);
206  out += len_col;
207  }
208 
209  // calcualte size of output buffer
210  const size_t sz = out-cache.data();
211 
212  // update counters
213  tot += row_tot;
214  com += sz + mask.size();
215 
216  // write the compression mask and the (partly) copmpressed data stream
217  fout.write((char*)mask.data(), mask.size());
218  fout.write(cache.data(), sz);
219 
220  //if (sz2<0 || memcmp(data, dest3.data(), 432000*2)!=0)
221  // cout << "grrrr" << endl;
222 
223  const float proc = float(f.GetRow())/f.GetNumRows();
224  if (proc>frac)
225  {
226  const double elep = Time().UnixTime()-start.UnixTime();
227  cout << "\r" << setprecision(0) << setw(3) << 100*proc << "% [" << setprecision(1) << setw(5) << 100.*com/tot << "%] cpu:" << sec << "s in:" << tot/1000000/elep << "MB/s" << flush;
228  frac += 0.01;
229  }
230  }
231 
232  const double elep = Time().UnixTime()-start.UnixTime();
233  cout << setprecision(0) << "\r100% [" << setprecision(1) << setw(5) << 100.*com/tot << "%] cpu:" << sec << "s in:" << tot/1000000/elep << "MB/s" << endl;
234 
235  return 0;
236 }
int start(int initState)
Definition: feeserver.c:1740
std::map< std::string, Column > Columns
Definition: fits.h:113
std::string name
Definition: fits.h:95
Adds some functionality to boost::posix_time::ptime for our needs.
Definition: Time.h:30
bool Encode(std::string &bufout, const uint16_t *bufin, size_t bufinlen)
Definition: huffman.h:385
Definition: fits.h:54
double UnixTime() const
Definition: Time.cc:195
if(extraDns) new Dns

+ Here is the call graph for this function:

+ Here is the caller graph for this function: