DOLFINx
DOLFINx C++ interface
HDF5Interface.h
1 // Copyright (C) 2012 Chris N. Richardson and Garth N. Wells
2 //
3 // This file is part of DOLFINx (https://www.fenicsproject.org)
4 //
5 // SPDX-License-Identifier: LGPL-3.0-or-later
6 
7 #pragma once
8 
9 #include <array>
10 #include <cassert>
11 #include <chrono>
12 #include <cstdint>
13 #include <dolfinx/common/log.h>
14 #include <filesystem>
15 #include <hdf5.h>
16 #include <mpi.h>
17 #include <string>
18 #include <vector>
19 
20 namespace dolfinx::io
21 {
22 
24 
26 {
27 #define HDF5_FAIL -1
28 public:
34  static hid_t open_file(MPI_Comm comm, const std::filesystem::path& filename,
35  const std::string& mode, const bool use_mpi_io);
36 
39  static void close_file(const hid_t handle);
40 
43  static void flush_file(const hid_t handle);
44 
48  static std::filesystem::path get_filename(hid_t handle);
49 
60  template <typename T>
61  static void write_dataset(const hid_t handle, const std::string& dataset_path,
62  const T* data,
63  const std::array<std::int64_t, 2>& range,
64  const std::vector<std::int64_t>& global_size,
65  bool use_mpi_io, bool use_chunking);
66 
75  template <typename T>
76  static std::vector<T> read_dataset(const hid_t handle,
77  const std::string& dataset_path,
78  const std::array<std::int64_t, 2>& range);
79 
84  static bool has_dataset(const hid_t handle, const std::string& dataset_path);
85 
90  static std::vector<std::int64_t>
91  get_dataset_shape(const hid_t handle, const std::string& dataset_path);
92 
99  static void set_mpi_atomicity(const hid_t handle, const bool atomic);
100 
105  static bool get_mpi_atomicity(const hid_t handle);
106 
107 private:
111  static void add_group(const hid_t handle, const std::string& dataset_path);
112 
113  // Return HDF5 data type
114  template <typename T>
115  static hid_t hdf5_type()
116  {
117  throw std::runtime_error("Cannot get HDF5 primitive data type. "
118  "No specialised function for this data type");
119  }
120 };
122 //---------------------------------------------------------------------------
123 template <>
124 inline hid_t HDF5Interface::hdf5_type<float>()
125 {
126  return H5T_NATIVE_FLOAT;
127 }
128 //---------------------------------------------------------------------------
129 template <>
130 inline hid_t HDF5Interface::hdf5_type<double>()
131 {
132  return H5T_NATIVE_DOUBLE;
133 }
134 //---------------------------------------------------------------------------
135 template <>
136 inline hid_t HDF5Interface::hdf5_type<int>()
137 {
138  return H5T_NATIVE_INT;
139 }
140 //---------------------------------------------------------------------------
141 template <>
142 inline hid_t HDF5Interface::hdf5_type<std::int64_t>()
143 {
144  return H5T_NATIVE_INT64;
145 }
146 //---------------------------------------------------------------------------
147 template <>
148 inline hid_t HDF5Interface::hdf5_type<std::size_t>()
149 {
150  if (sizeof(std::size_t) == sizeof(unsigned long))
151  return H5T_NATIVE_ULONG;
152  else if (sizeof(std::size_t) == sizeof(unsigned int))
153  return H5T_NATIVE_UINT;
154 
155  throw std::runtime_error("Cannot determine size of std::size_t. "
156  "std::size_t is not the same size as long or int");
157 }
158 //---------------------------------------------------------------------------
159 template <typename T>
160 inline void HDF5Interface::write_dataset(
161  const hid_t file_handle, const std::string& dataset_path, const T* data,
162  const std::array<std::int64_t, 2>& range,
163  const std::vector<int64_t>& global_size, bool use_mpi_io, bool use_chunking)
164 {
165  // Data rank
166  const std::size_t rank = global_size.size();
167  assert(rank != 0);
168 
169  if (rank > 2)
170  {
171  throw std::runtime_error("Cannot write dataset to HDF5 file"
172  "Only rank 1 and rank 2 dataset are supported");
173  }
174 
175  // Get HDF5 data type
176  const hid_t h5type = hdf5_type<T>();
177 
178  // Hyperslab selection parameters
179  std::vector<hsize_t> count(global_size.begin(), global_size.end());
180  count[0] = range[1] - range[0];
181 
182  // Data offsets
183  std::vector<hsize_t> offset(rank, 0);
184  offset[0] = range[0];
185 
186  // Dataset dimensions
187  const std::vector<hsize_t> dimsf(global_size.begin(), global_size.end());
188 
189  // Generic status report
190  herr_t status;
191 
192  // Create a global data space
193  const hid_t filespace0 = H5Screate_simple(rank, dimsf.data(), nullptr);
194  assert(filespace0 != HDF5_FAIL);
195 
196  // Set chunking parameters
197  hid_t chunking_properties;
198  if (use_chunking)
199  {
200  // Set chunk size and limit to 1kB min/1MB max
201  hsize_t chunk_size = dimsf[0] / 2;
202  if (chunk_size > 1048576)
203  chunk_size = 1048576;
204  if (chunk_size < 1024)
205  chunk_size = 1024;
206 
207  hsize_t chunk_dims[2] = {chunk_size, dimsf[1]};
208  chunking_properties = H5Pcreate(H5P_DATASET_CREATE);
209  H5Pset_chunk(chunking_properties, rank, chunk_dims);
210  }
211  else
212  chunking_properties = H5P_DEFAULT;
213 
214  // Check that group exists and recursively create if required
215  const std::string group_name(dataset_path, 0, dataset_path.rfind('/'));
216  add_group(file_handle, group_name);
217 
218  // Create global dataset (using dataset_path)
219  const hid_t dset_id
220  = H5Dcreate2(file_handle, dataset_path.c_str(), h5type, filespace0,
221  H5P_DEFAULT, chunking_properties, H5P_DEFAULT);
222  assert(dset_id != HDF5_FAIL);
223 
224  // Close global data space
225  status = H5Sclose(filespace0);
226  assert(status != HDF5_FAIL);
227 
228  // Create a local data space
229  const hid_t memspace = H5Screate_simple(rank, count.data(), nullptr);
230  assert(memspace != HDF5_FAIL);
231 
232  // Create a file dataspace within the global space - a hyperslab
233  const hid_t filespace1 = H5Dget_space(dset_id);
234  status = H5Sselect_hyperslab(filespace1, H5S_SELECT_SET, offset.data(),
235  nullptr, count.data(), nullptr);
236  assert(status != HDF5_FAIL);
237 
238  // Set parallel access
239  const hid_t plist_id = H5Pcreate(H5P_DATASET_XFER);
240  if (use_mpi_io)
241  {
242 #ifdef H5_HAVE_PARALLEL
243  status = H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);
244  assert(status != HDF5_FAIL);
245 #else
246  throw std::runtime_error("HDF5 library has not been configured with MPI");
247 #endif
248  }
249 
250  // Write local dataset into selected hyperslab
251  status = H5Dwrite(dset_id, h5type, memspace, filespace1, plist_id, data);
252  assert(status != HDF5_FAIL);
253 
254  if (use_chunking)
255  {
256  // Close chunking properties
257  status = H5Pclose(chunking_properties);
258  assert(status != HDF5_FAIL);
259  }
260 
261  // Close dataset collectively
262  status = H5Dclose(dset_id);
263  assert(status != HDF5_FAIL);
264 
265  // Close hyperslab
266  status = H5Sclose(filespace1);
267  assert(status != HDF5_FAIL);
268 
269  // Close local dataset
270  status = H5Sclose(memspace);
271  assert(status != HDF5_FAIL);
272 
273  // Release file-access template
274  status = H5Pclose(plist_id);
275  assert(status != HDF5_FAIL);
276 }
277 //---------------------------------------------------------------------------
278 template <typename T>
279 inline std::vector<T>
280 HDF5Interface::read_dataset(const hid_t file_handle,
281  const std::string& dataset_path,
282  const std::array<std::int64_t, 2>& range)
283 {
284  auto timer_start = std::chrono::system_clock::now();
285 
286  // Open the dataset
287  const hid_t dset_id
288  = H5Dopen2(file_handle, dataset_path.c_str(), H5P_DEFAULT);
289  assert(dset_id != HDF5_FAIL);
290 
291  // Open dataspace
292  const hid_t dataspace = H5Dget_space(dset_id);
293  assert(dataspace != HDF5_FAIL);
294 
295  // Get rank of data set
296  const int rank = H5Sget_simple_extent_ndims(dataspace);
297  assert(rank >= 0);
298 
299  if (rank > 2)
300  LOG(WARNING) << "HDF5Interface::read_dataset untested for rank > 2.";
301 
302  // Allocate data for shape
303  std::vector<hsize_t> shape(rank);
304 
305  // Get size in each dimension
306  const int ndims = H5Sget_simple_extent_dims(dataspace, shape.data(), nullptr);
307  assert(ndims == rank);
308 
309  // Hyperslab selection
310  std::vector<hsize_t> offset(rank, 0);
311  std::vector<hsize_t> count = shape;
312  if (range[0] != -1 and range[1] != -1)
313  {
314  offset[0] = range[0];
315  count[0] = range[1] - range[0];
316  }
317  else
318  offset[0] = 0;
319 
320  // Select a block in the dataset beginning at offset[], with
321  // size=count[]
322  herr_t status = H5Sselect_hyperslab(dataspace, H5S_SELECT_SET, offset.data(),
323  nullptr, count.data(), nullptr);
324  assert(status != HDF5_FAIL);
325 
326  // Create a memory dataspace
327  const hid_t memspace = H5Screate_simple(rank, count.data(), nullptr);
328  assert(memspace != HDF5_FAIL);
329 
330  // Create local data to read into
331  std::size_t data_size = 1;
332  for (std::size_t i = 0; i < count.size(); ++i)
333  data_size *= count[i];
334  std::vector<T> data(data_size);
335 
336  // Read data on each process
337  const hid_t h5type = hdf5_type<T>();
338  status
339  = H5Dread(dset_id, h5type, memspace, dataspace, H5P_DEFAULT, data.data());
340  assert(status != HDF5_FAIL);
341 
342  // Close dataspace
343  status = H5Sclose(dataspace);
344  assert(status != HDF5_FAIL);
345 
346  // Close memspace
347  status = H5Sclose(memspace);
348  assert(status != HDF5_FAIL);
349 
350  // Close dataset
351  status = H5Dclose(dset_id);
352  assert(status != HDF5_FAIL);
353 
354  auto timer_end = std::chrono::system_clock::now();
355  std::chrono::duration<double> dt = (timer_end - timer_start);
356  double data_rate = data.size() * sizeof(T) / (1e6 * dt.count());
357 
358  LOG(INFO) << "HDF5 Read data rate: " << data_rate << "MB/s";
359 
360  return data;
361 }
362 //---------------------------------------------------------------------------
364 } // namespace dolfinx::io
This class provides an interface to some HDF5 functionality.
Definition: HDF5Interface.h:26
static std::vector< T > read_dataset(const hid_t handle, const std::string &dataset_path, const std::array< std::int64_t, 2 > &range)
Read data from a HDF5 dataset "dataset_path" as defined by range blocks on each process.
static bool has_dataset(const hid_t handle, const std::string &dataset_path)
Check for existence of dataset in HDF5 file.
Definition: HDF5Interface.cpp:149
static void set_mpi_atomicity(const hid_t handle, const bool atomic)
Set MPI atomicity. See https://support.hdfgroup.org/HDF5/doc/RM/RM_H5F.html#File-SetMpiAtomicity and ...
Definition: HDF5Interface.cpp:232
static void close_file(const hid_t handle)
Close HDF5 file.
Definition: HDF5Interface.cpp:120
static hid_t open_file(MPI_Comm comm, const std::filesystem::path &filename, const std::string &mode, const bool use_mpi_io)
Open HDF5 and return file descriptor.
Definition: HDF5Interface.cpp:58
static std::vector< std::int64_t > get_dataset_shape(const hid_t handle, const std::string &dataset_path)
Get dataset shape (size of each dimension)
Definition: HDF5Interface.cpp:199
static void flush_file(const hid_t handle)
Flush data to file to improve data integrity after interruption.
Definition: HDF5Interface.cpp:126
static std::filesystem::path get_filename(hid_t handle)
Get filename.
Definition: HDF5Interface.cpp:132
static bool get_mpi_atomicity(const hid_t handle)
Get MPI atomicity. See https://support.hdfgroup.org/HDF5/doc/RM/RM_H5F.html#File-GetMpiAtomicity and ...
Definition: HDF5Interface.cpp:240
static void write_dataset(const hid_t handle, const std::string &dataset_path, const T *data, const std::array< std::int64_t, 2 > &range, const std::vector< std::int64_t > &global_size, bool use_mpi_io, bool use_chunking)
Write data to existing HDF file as defined by range blocks on each process.
int rank(MPI_Comm comm)
Return process rank for the communicator.
Definition: MPI.cpp:74
Support for file IO.
Definition: cells.h:22