Sparse Virtual File System  0.4.0
A Sparse Virtual File System.
svf.h
Go to the documentation of this file.
1 
177 #ifndef CPPSVF_SVF_H
178 #define CPPSVF_SVF_H
179 
180 #include <string>
181 #include <vector>
182 #include <map>
183 #include <chrono>
184 #include <cassert>
185 
186 #ifdef SVF_THREAD_SAFE
187 
188 #include <mutex>
189 
190 #endif
191 
192 #ifdef DEBUG
194 #define SVF_ASSERT(x) assert(x)
195 #else
197 #define SVF_ASSERT(x)
198 #endif
199 
200 namespace SVFS {
201 
202 #pragma mark - Exceptions
203 
204  namespace Exceptions {
205 
207  class ExceptionSparseVirtualFile : public std::exception {
208  public:
209  explicit ExceptionSparseVirtualFile(const std::string &in_msg) : msg(in_msg) {}
210 
211  [[nodiscard]] const std::string &message() const { return msg; }
212 
213  protected:
214  std::string msg;
215  };
216 
219  public:
220  explicit ExceptionSparseVirtualFileWrite(const std::string &in_msg) : ExceptionSparseVirtualFile(in_msg) {}
221  };
222 
225  public:
227  in_msg) {}
228  };
229 
232  public:
233  explicit ExceptionSparseVirtualFileRead(const std::string &in_msg) : ExceptionSparseVirtualFile(in_msg) {}
234  };
235 
238  public:
239  explicit ExceptionSparseVirtualFileErase(const std::string &in_msg) : ExceptionSparseVirtualFile(in_msg) {}
240  };
241 
242  } // namespace Exceptions {
243 
244 #pragma mark - typedefs
245 
247  typedef size_t t_fpos;
249  typedef std::pair<t_fpos, size_t> t_seek_read;
251  typedef std::vector<t_seek_read> t_seek_reads;
253  typedef uint32_t t_block_touch;
255  typedef std::map<t_block_touch, t_fpos> t_block_touches;
256 
257 #pragma mark - SVF configuration
258 
262  typedef struct SparseVirtualFileConfig {
268  bool overwrite_on_exit = false;
275  bool compare_for_diff = true;
277 
278 #pragma mark - The SVF class
279 
289  public:
297  explicit SparseVirtualFile(const std::string &id, double mod_time,
299  m_id(id),
300  m_file_mod_time(mod_time),
301  m_config(config),
302  m_bytes_total(0),
303  m_count_write(0),
304  m_count_read(0),
305  m_bytes_write(0),
306  m_bytes_read(0),
307  m_time_write(std::chrono::time_point<std::chrono::system_clock>::min()),
308  m_time_read(std::chrono::time_point<std::chrono::system_clock>::min()),
309  m_block_touch(0),
310  m_blocks_erased(0),
311  m_bytes_erased(0),
312  m_blocks_punted(0),
313  m_bytes_punted(0) {
314  }
315 
316  // ---- Read and write etc. ----
318  [[nodiscard]] bool has(t_fpos fpos, size_t len) const noexcept;
319 
320  void write(t_fpos fpos, const char *data, size_t len);
321 
324  void read(t_fpos fpos, size_t len, char *p);
325 
327  [[nodiscard]] t_seek_reads need(t_fpos fpos, size_t len, size_t greedy_length = 0) const noexcept;
330  [[nodiscard]] t_seek_reads need_many(t_seek_reads &seek_reads, size_t greedy_length = 0) const noexcept;
331 
333  void clear() noexcept;
334 
338  size_t erase(t_fpos fpos);
339 
340  // ---- Meta information about the SVF ----
342  [[nodiscard]] t_seek_reads blocks() const noexcept;
343 
344  size_t block_size(t_fpos fpos) const;
345 
346  // Information about memory used:
348  [[nodiscard]] size_t size_of() const noexcept;
349 
351  [[nodiscard]] size_t num_bytes() const noexcept { return m_bytes_total; };
352 
354  [[nodiscard]] size_t num_blocks() const noexcept { return m_svf.size(); }
355 
357  [[nodiscard]] t_fpos last_file_position() const noexcept;
358 
361  [[nodiscard]] bool file_mod_time_matches(const double &file_mod_time) const noexcept {
362  return file_mod_time == m_file_mod_time;
363  }
364 
365  // ---- Attribute access ----
367  [[nodiscard]] const std::string &id() const noexcept { return m_id; }
368 
370  [[nodiscard]] double file_mod_time() const noexcept { return m_file_mod_time; }
371 
373  [[nodiscard]] const tSparseVirtualFileConfig &config() const noexcept { return m_config; }
374 
376  [[nodiscard]] size_t count_write() const noexcept { return m_count_write; }
377 
379  [[nodiscard]] size_t count_read() const noexcept { return m_count_read; }
380 
382  [[nodiscard]] size_t bytes_write() const noexcept { return m_bytes_write; }
383 
385  [[nodiscard]] size_t bytes_read() const noexcept { return m_bytes_read; }
386 
388  [[nodiscard]] size_t blocks_erased() const noexcept { return m_blocks_erased; }
390  [[nodiscard]] size_t bytes_erased() const noexcept { return m_bytes_erased; }
392  [[nodiscard]] size_t blocks_punted() const noexcept { return m_blocks_punted; }
394  [[nodiscard]] size_t bytes_punted() const noexcept { return m_bytes_punted; }
395 
399  [[nodiscard]] std::chrono::time_point<std::chrono::system_clock> time_write() const noexcept {
400  return m_time_write;
401  }
402 
406  [[nodiscard]] std::chrono::time_point<std::chrono::system_clock> time_read() const noexcept {
407  return m_time_read;
408  }
409 
411  [[nodiscard]] t_block_touch block_touch() const noexcept { return m_block_touch; }
412  [[nodiscard]] t_block_touches block_touches() const noexcept;
413  size_t lru_punt(size_t cache_size_upper_bound);
414 
416  SparseVirtualFile(const SparseVirtualFile &rhs) = delete;
417 
419  SparseVirtualFile operator=(const SparseVirtualFile &rhs) = delete;
420 
421 #ifdef SVF_THREAD_SAFE
422 
425 
427 
428 #else
430  SparseVirtualFile(SparseVirtualFile &&other) = default;
432 #endif
433 
436 
437  private:
439  std::string m_id;
445  size_t m_bytes_total = 0;
447  size_t m_count_write = 0;
449  size_t m_count_read = 0;
452  size_t m_bytes_write = 0;
455  size_t m_bytes_read = 0;
457  std::chrono::time_point<std::chrono::system_clock> m_time_write;
459  std::chrono::time_point<std::chrono::system_clock> m_time_read;
461  typedef struct {
462  std::vector<char> data;
463  // Potentially more fields here such as time of access.
465  } t_val;
467  typedef std::map<t_fpos, t_val> t_map;
472 #ifdef SVF_THREAD_SAFE
474  mutable std::mutex m_mutex;
475 #endif
484  private:
485  void _throw_diff(t_fpos fpos, const char *data, t_map::const_iterator iter, size_t index_iter) const;
486 
487  // Write data at file position without checks.
488  void _write_new_block(t_fpos fpos, const char *data, size_t len, t_map::const_iterator hint);
489 
490  void _write_new_append_old(t_fpos fpos, const char *data, size_t len, t_map::iterator iter);
491 
492  void _write_append_new_to_old(t_fpos fpos, const char *new_data, size_t new_data_len,
493  t_map::iterator base_block_iter);
494 
495  // Does not use mutex or checks integrity
496  [[nodiscard]] t_fpos _file_position_immediatly_after_end() const noexcept;
497 
498  [[nodiscard]] t_fpos _file_position_immediatly_after_block(t_map::const_iterator iter) const noexcept;
499 
500  [[nodiscard]] static size_t _amount_to_read(t_seek_read iter, size_t greedy_length) noexcept;
501 
502  [[nodiscard]] static t_seek_reads
503  _minimise_seek_reads(const t_seek_reads &seek_reads, size_t greedy_length) noexcept;
504 
505  [[nodiscard]] t_seek_reads _need_no_lock(t_fpos fpos, size_t len, size_t greedy_length = 0) const noexcept;
506  [[nodiscard]] size_t _erase_no_lock(t_fpos fpos);
507  [[nodiscard]] t_block_touches _block_touches_no_lock() const noexcept;
508 
525  };
526 
527  [[nodiscard]] ERROR_CONDITION integrity() const noexcept;
528  };
529 
530 } // namespace SVFS
531 
532 #endif //CPPSVF_SVF_H
Might be thrown during a write operation where the data differs.
Definition: svf.h:224
ExceptionSparseVirtualFileDiff(const std::string &in_msg)
Definition: svf.h:226
Might be thrown during a erase operation where the file position is not at the exact beginning of a b...
Definition: svf.h:237
ExceptionSparseVirtualFileErase(const std::string &in_msg)
Definition: svf.h:239
Exception specialisation for the SparseVirtualFile.
Definition: svf.h:207
ExceptionSparseVirtualFile(const std::string &in_msg)
Definition: svf.h:209
const std::string & message() const
Definition: svf.h:211
Might be thrown during a write operation where the data differs.
Definition: svf.h:231
ExceptionSparseVirtualFileRead(const std::string &in_msg)
Definition: svf.h:233
Might be thrown during a write operation which fails.
Definition: svf.h:218
ExceptionSparseVirtualFileWrite(const std::string &in_msg)
Definition: svf.h:220
Implementation of a Sparse Virtual File.
Definition: svf.h:288
size_t bytes_read() const noexcept
Count of total bytes read with read() operations.
Definition: svf.h:385
SparseVirtualFile & operator=(SparseVirtualFile &&rhs)=delete
ERROR_CONDITION
Check result of internal integrity.
Definition: svf.h:510
@ ERROR_BLOCKS_OVERLAP
Blocks overlap.
Definition: svf.h:518
@ ERROR_ADJACENT_BLOCKS
Blocks are adjacent and have not been coalesced.
Definition: svf.h:516
@ ERROR_NONE
No error.
Definition: svf.h:512
@ ERROR_BYTE_COUNT_MISMATCH
Missmatch in byte count where the count of the bytes in all the blocks does not match m_bytes_total.
Definition: svf.h:520
@ ERROR_DUPLICATE_BLOCK
Duplicate blocks of the same length and at the same file positions.
Definition: svf.h:522
@ ERROR_EMPTY_BLOCK
A block is empty.
Definition: svf.h:514
@ ERROR_DUPLICATE_BLOCK_TOUCH
Two or more blocks have the same block touch value.
Definition: svf.h:524
t_fpos _file_position_immediatly_after_end() const noexcept
Returns the file position immediately after the last block.
Definition: svf.cpp:1051
t_seek_reads need_many(t_seek_reads &seek_reads, size_t greedy_length=0) const noexcept
Given many [(file position, lengths), ...] what data do I need that I don't yet have?
Definition: svf.cpp:708
size_t m_bytes_punted
The count of bytes that have been erased by punting.
Definition: svf.h:483
static t_seek_reads _minimise_seek_reads(const t_seek_reads &seek_reads, size_t greedy_length) noexcept
May reduce the list of file position/lengths by coalescing them if possible up to a limit greedy_leng...
Definition: svf.cpp:748
std::chrono::time_point< std::chrono::system_clock > m_time_write
Last access real-time timestamp for a write.
Definition: svf.h:457
size_t size_of() const noexcept
size_of() gives best guess of total memory usage.
Definition: svf.cpp:823
size_t m_bytes_total
Total number of bytes in this SVF.
Definition: svf.h:445
size_t m_bytes_read
Definition: svf.h:455
size_t m_blocks_punted
The count of blocks that have been erased by punting.
Definition: svf.h:481
size_t lru_punt(size_t cache_size_upper_bound)
Definition: svf.cpp:1021
void _write_append_new_to_old(t_fpos fpos, const char *new_data, size_t new_data_len, t_map::iterator base_block_iter)
From file position, write the new_data to the block identified by base_block_iter....
Definition: svf.cpp:313
size_t blocks_punted() const noexcept
Returns the The total count of blocks that have been erased by punting.
Definition: svf.h:392
tSparseVirtualFileConfig m_config
The SVF configuration.
Definition: svf.h:443
t_block_touches block_touches() const noexcept
Returns a std::map of latest touch value key and file position value.
Definition: svf.cpp:1005
size_t num_blocks() const noexcept
Number of blocks used.
Definition: svf.h:354
size_t m_bytes_write
Definition: svf.h:452
void _throw_diff(t_fpos fpos, const char *data, t_map::const_iterator iter, size_t index_iter) const
Throws a ExceptionSparseVirtualFileDiff with an explanation of the data difference.
Definition: svf.cpp:85
double file_mod_time() const noexcept
The file modification time as a double representing UNIX seconds.
Definition: svf.h:370
size_t bytes_erased() const noexcept
Returns the The total count of bytes that have been erased either directly or by punting.
Definition: svf.h:390
const std::string & id() const noexcept
The ID of the file.
Definition: svf.h:367
size_t num_bytes() const noexcept
Gives exact number of data bytes held.
Definition: svf.h:351
size_t m_bytes_erased
The total count of bytes that have been erased either directly or by punting.
Definition: svf.h:479
std::chrono::time_point< std::chrono::system_clock > time_write() const noexcept
Definition: svf.h:399
size_t m_blocks_erased
The total count of blocks that have been erased either directly or by punting.
Definition: svf.h:477
SparseVirtualFile(const std::string &id, double mod_time, const tSparseVirtualFileConfig &config=tSparseVirtualFileConfig())
Create a Sparse Virtual File.
Definition: svf.h:297
size_t m_count_write
Access statistics: count of write operations.
Definition: svf.h:447
t_block_touch block_touch() const noexcept
Return the latest value of the monotonically increasing block_touch value.
Definition: svf.h:411
std::chrono::time_point< std::chrono::system_clock > m_time_read
Last access real-time timestamp for a read.
Definition: svf.h:459
void write(t_fpos fpos, const char *data, size_t len)
Write the data a the given file position.
Definition: svf.cpp:453
const tSparseVirtualFileConfig & config() const noexcept
The configuration.
Definition: svf.h:373
void clear() noexcept
Executes the data deletion strategy.
Definition: svf.cpp:849
t_fpos _file_position_immediatly_after_block(t_map::const_iterator iter) const noexcept
Returns the file position immediately after the particular block.
Definition: svf.cpp:1070
void _write_new_append_old(t_fpos fpos, const char *data, size_t len, t_map::iterator iter)
Write a new block and append existing blocks to it.
Definition: svf.cpp:179
size_t bytes_punted() const noexcept
Returns the The total count of bytes that have been erased by punting.
Definition: svf.h:394
SparseVirtualFile operator=(const SparseVirtualFile &rhs)=delete
Eliminate copying.
t_block_touches _block_touches_no_lock() const noexcept
Returns a std::map of latest touch value key and file position value.
Definition: svf.cpp:987
static size_t _amount_to_read(t_seek_read iter, size_t greedy_length) noexcept
Returns the maximal length to read given a greedy length.
Definition: svf.cpp:734
std::chrono::time_point< std::chrono::system_clock > time_read() const noexcept
Definition: svf.h:406
size_t erase(t_fpos fpos)
Remove a particular block.
Definition: svf.cpp:905
size_t blocks_erased() const noexcept
Returns the The total count of blocks that have been erased either directly or by punting.
Definition: svf.h:388
size_t count_read() const noexcept
Count of read() operations.
Definition: svf.h:379
bool has(t_fpos fpos, size_t len) const noexcept
Do I have the data at the given file position and length?
Definition: svf.cpp:56
size_t block_size(t_fpos fpos) const
The length of the block at a specific file position.
Definition: svf.cpp:798
size_t bytes_write() const noexcept
Count of total bytes written with write() operations.
Definition: svf.h:382
size_t _erase_no_lock(t_fpos fpos)
Remove a particular block.
Definition: svf.cpp:879
bool file_mod_time_matches(const double &file_mod_time) const noexcept
Definition: svf.h:361
void _write_new_block(t_fpos fpos, const char *data, size_t len, t_map::const_iterator hint)
Write a brand new block into either an empty SVF or beyond the current blocks.
Definition: svf.cpp:113
ERROR_CONDITION integrity() const noexcept
Internal integrity check.
Definition: svf.cpp:927
size_t count_write() const noexcept
Count of write() operations.
Definition: svf.h:376
std::mutex m_mutex
Thread mutex. This adds about 5-10% execution time compared with a single threaded version.
Definition: svf.h:474
t_map m_svf
The actual SVF.
Definition: svf.h:469
t_fpos last_file_position() const noexcept
The position of the last byte.
Definition: svf.cpp:972
void read(t_fpos fpos, size_t len, char *p)
Read data and write to the buffer provided by the caller. This is non-const as it updates the non-con...
Definition: svf.cpp:504
t_seek_reads _need_no_lock(t_fpos fpos, size_t len, size_t greedy_length=0) const noexcept
Definition: svf.cpp:590
size_t m_count_read
Access statistics: count of read operations.
Definition: svf.h:449
t_seek_reads need(t_fpos fpos, size_t len, size_t greedy_length=0) const noexcept
Create a new fragmentation list of seek/read instructions.
Definition: svf.cpp:582
double m_file_mod_time
The original file modification date as UNIX time. This is used for consistency checking.
Definition: svf.h:441
std::string m_id
The SVF ID.
Definition: svf.h:439
SparseVirtualFile(SparseVirtualFile &&other)=delete
Prohibit moving, the mutex has no move constructor.
~SparseVirtualFile()
Destruction just clears the internal map.
Definition: svf.h:435
t_seek_reads blocks() const noexcept
The existing blocks as a list of (file_position, size) pairs.
Definition: svf.cpp:777
t_block_touch m_block_touch
A monotonically increasing integer that indicates the age of a block, smaller is older.
Definition: svf.h:471
std::map< t_fpos, t_val > t_map
Typedef for the map of file blocks <file_position, data>.
Definition: svf.h:467
The namespace for all svfsc code.
Definition: svf.cpp:41
struct SVFS::SparseVirtualFileConfig tSparseVirtualFileConfig
Configuration for the Sparse Virtual File.
std::vector< t_seek_read > t_seek_reads
Definition: svf.h:251
std::map< t_block_touch, t_fpos > t_block_touches
Definition: svf.h:255
std::pair< t_fpos, size_t > t_seek_read
Definition: svf.h:249
size_t t_fpos
Definition: svf.h:247
uint32_t t_block_touch
Definition: svf.h:253
Typedef for the data. This allows for extra per-block fields in the future.
Definition: svf.h:461
std::vector< char > data
Definition: svf.h:462
t_block_touch block_touch
Definition: svf.h:464
Configuration for the Sparse Virtual File.
Definition: svf.h:262