Sparse Virtual File System  0.4.1
A Sparse Virtual File System.
svf.cpp
Go to the documentation of this file.
1 
32 #include <algorithm>
33 #include <cstring>
34 #include <iostream>
35 #include <iterator>
36 #include <sstream>
37 #include <set>
38 
39 #include "svf.h"
40 
41 namespace SVFS {
45  static const char OVERWRITE_CHAR = '0';
46 
56  bool SparseVirtualFile::has(t_fpos fpos, size_t len) const noexcept {
57  SVF_ASSERT(integrity() == ERROR_NONE);
58 #ifdef SVF_THREAD_SAFE
59  std::lock_guard<std::mutex> mutex(m_mutex);
60 #endif
61 
62  if (m_svf.empty()) {
63  return false;
64  }
65  t_map::const_iterator iter = m_svf.upper_bound(fpos);
66  if (iter != m_svf.begin()) {
67  --iter;
68  }
69  t_fpos fpos_end = _file_position_immediatly_after_block(iter);
70  if (fpos >= iter->first && (fpos + len) <= fpos_end) {
71  return true;
72  }
73  return false;
74  }
75 
84  void
85  SparseVirtualFile::_throw_diff(t_fpos fpos, const char *data, t_map::const_iterator iter, size_t index_iter) const {
86  assert(data);
87  assert(iter != m_svf.end());
88  assert(m_config.compare_for_diff);
89 
90  if (*data != iter->second.data[index_iter]) {
91  std::ostringstream os;
92  os << "SparseVirtualFile::write():";
93  os << " Difference at position " << fpos;
94  os << " '" << *(data) << "' != '" << iter->second.data[index_iter] << "'";
95  os << " Ordinal " << static_cast<int>(*data) << " != " << static_cast<int>(iter->second.data[index_iter]);
96  std::string str = os.str();
98  }
99  // Assert as this should now never be called is there is _not_ a diff.
100  assert(0);
101  }
102 
113  void SparseVirtualFile::_write_new_block(t_fpos fpos, const char *data, size_t len, t_map::const_iterator hint) {
114  assert(m_svf.count(fpos) == 0);
115 
116  t_val new_value;
117  new_value.data.reserve(len);
118  new_value.block_touch = m_block_touch++;
119 
120  // A simpler call thant the loop but not necessarily faster. See git commit 2024-08-28
121  new_value.data.insert(new_value.data.end(), data, data + len);
122  m_bytes_total += len;
123 
124  auto size_before_insert = m_svf.size();
125  m_svf.insert(hint, {fpos, std::move(new_value)});
126  // Sanity check that we really have added a new block (rather than replacing one).
127  if (m_svf.size() != 1 + size_before_insert) {
128  std::ostringstream os;
129  os << "SparseVirtualFile::_write_new_block():";
130  os << " Unable to insert new block at " << fpos;
132  }
133  }
134 
178  void SparseVirtualFile::_write_new_append_old(t_fpos fpos, const char *data, size_t len, t_map::iterator iter) {
180  assert(data);
181  assert(len > 0);
182  assert(iter != m_svf.end());
183  assert(iter->first > fpos);
184 
185  size_t fpos_start = fpos;
186  size_t fpos_end = fpos + len;
187  t_val new_value;
188  new_value.block_touch = m_block_touch++;
189 
190  while (true) {
191  while (len && fpos < iter->first) {
192  // Copy new data 'X' up to start of iter.
193  // ^===========| |=====|
194  // %XXXX+++++++++++++XX++|
195  new_value.data.push_back(*data);
196  ++data;
197  ++fpos;
198  --len;
199  ++m_bytes_total;
200  }
201  size_t index_iter = 0;
202  size_t delta = std::min(len, iter->second.data.size());
203  // Check overlapped data matches 'Y'
204  // ^===========| |=====|
205  // %++++YYYYYYYYYYYYY++++|
207  if (std::memcmp(iter->second.data.data(), data, delta) != 0) {
208  _throw_diff(fpos, data, iter, 0);
209  }
210  }
211  for (size_t i = 0; i < delta; ++i) {
212  new_value.data.push_back(*data);
213  ++data;
214  }
215  fpos += delta;
216  len -= delta;
217  index_iter += delta;
218  if (_file_position_immediatly_after_block(iter) > fpos_end) {
219  // Copy rest of iter 'Z'
220  assert(len == 0);
221  // ^=========ZZZ
222  // %+++++++++++++|
223  // So append up to the end of iter and (maybe) go round again.
224  while (index_iter < iter->second.data.size()) {
225  new_value.data.push_back(iter->second.data[index_iter]);
226  ++index_iter;
227  }
229  iter->second.data.assign(iter->second.data.size(), OVERWRITE_CHAR);
230  }
231  m_svf.erase(iter);
232  break;
233  }
234  // Remove copied and checked old block and move on.
236  iter->second.data.assign(iter->second.data.size(), OVERWRITE_CHAR);
237  }
238  iter = m_svf.erase(iter);
239  if (iter == m_svf.end() || iter->first > fpos + len) {
240  // Copy rest of new and break
241  while (len) {
242  new_value.data.push_back(*data);
243  ++data;
244  ++fpos;
245  --len;
246  ++m_bytes_total;
247  }
248  break;
249  }
250  }
251  auto size_before_insert = m_svf.size();
252  m_svf.insert({fpos_start, std::move(new_value)});
253  if (m_svf.size() != 1 + size_before_insert) {
254  std::ostringstream os;
255  os << "SparseVirtualFile::write():";
256  os << " Unable to insert new block at " << fpos_start;
258  }
259  assert(len == 0);
260  assert(fpos == fpos_end);
262  }
263 
312  void SparseVirtualFile::_write_append_new_to_old(t_fpos fpos, const char *new_data, size_t new_data_len,
313  t_map::iterator base_block_iter) {
315  assert(new_data);
316  assert(new_data_len > 0);
317  assert(base_block_iter != m_svf.end());
318  assert(fpos >= base_block_iter->first);
319  assert(fpos <= _file_position_immediatly_after_block(base_block_iter));
320 
321 #ifdef DEBUG
322  size_t fpos_end = fpos + new_data_len;
323 #endif
324  // Diff check against base_block_iter
325  // Do the check to end of new_data_len or end of base_block_iter which ever comes first.
326  // Do not increment m_bytes_total as this is existing new_data.
327  // Diff check against base_block_iter
328  size_t write_index_from_block_start = fpos - base_block_iter->first;
329  // Do the check to end of new_data_len or end of base_block_iter which ever comes first.
330  size_t len_check_or_copy = std::min(new_data_len,
331  base_block_iter->second.data.size() - write_index_from_block_start);
333  if (std::memcmp(base_block_iter->second.data.data() + write_index_from_block_start, new_data,
334  len_check_or_copy) != 0) {
335  _throw_diff(fpos, new_data, base_block_iter, write_index_from_block_start);
336  }
337  }
338  new_data += len_check_or_copy;
339  fpos += len_check_or_copy;
340  new_data_len -= len_check_or_copy;
341  t_map::iterator next_block_iter = std::next(base_block_iter);
342  while (new_data_len) {
343  if (next_block_iter == m_svf.end()) {
344  // Termination case, copy remainder
345  while (new_data_len) {
346  base_block_iter->second.data.push_back(*new_data);
347  ++new_data;
348  ++fpos;
349  --new_data_len;
350  m_bytes_total += 1;
351  }
352  break; // Done. Needed because we are going to do erase(next_block_iter) otherwise.
353  } else {
354  // Copy the new_data up to start of next_block_iter or, we have exhausted the new_data.
355  while (new_data_len && fpos < next_block_iter->first) {
356  base_block_iter->second.data.push_back(*new_data);
357  ++new_data;
358  ++fpos;
359  --new_data_len;
360  m_bytes_total += 1;
361  }
362  }
363  if (new_data_len == 0 and fpos < next_block_iter->first) {
364  // We have exhausted new data and not reached the next block so we are done
365  break;
366  }
367  // Still data to copy, and we are up against the next block which we will coalesce into the base_block_iter.
368  write_index_from_block_start = 0;
369  // Diff check, but also append to base_block_iter.
370  // Do not increment m_bytes_total as this is existing data.
371  len_check_or_copy = std::min(new_data_len, _file_position_immediatly_after_block(next_block_iter) - fpos);
372  if (len_check_or_copy) {
374  if (std::memcmp(next_block_iter->second.data.data(), new_data, len_check_or_copy) != 0) {
375  _throw_diff(fpos, new_data, base_block_iter, 0);
376  }
377  }
378  // We could push_back either the new_data or the existing next block data. We choose the former.
379  for (size_t i = 0; i < len_check_or_copy; ++i) {
380  base_block_iter->second.data.push_back(*new_data);
381  ++new_data;
382  ++fpos;
383  --new_data_len;
384  ++write_index_from_block_start;
385  }
386  }
387  // Here either new_data is exhausted or next_block_iter is.
388  // If new_data is exhausted then copy remaining from next_block_iter to base_block_iter.
389  // Do not increment m_bytes_total as this is existing new_data.
390  if (new_data_len == 0) {
391  while (write_index_from_block_start < next_block_iter->second.data.size()) {
392  base_block_iter->second.data.push_back(next_block_iter->second.data[write_index_from_block_start]);
393  ++write_index_from_block_start;
394  }
395  }
396  // New data is not exhausted so erase next_block_iter as we have copied it and move on to the next block.
398  next_block_iter->second.data.assign(next_block_iter->second.data.size(), OVERWRITE_CHAR);
399  }
400  next_block_iter = m_svf.erase(next_block_iter);
401  }
402  base_block_iter->second.block_touch = m_block_touch++;
403  assert(new_data_len == 0);
404 #ifdef DEBUG
405  assert(fpos == fpos_end);
406 #endif
408  }
409 
452  void SparseVirtualFile::write(t_fpos fpos, const char *data, size_t len) {
454 #ifdef SVF_THREAD_SAFE
455  std::lock_guard<std::mutex> mutex(m_mutex);
456 #endif
457  // TODO: throw if !data, len == 0
458  if (m_svf.empty() || fpos > _file_position_immediatly_after_end()) {
459  // Simple insert of new data into empty map or a node beyond the end (common case).
460  _write_new_block(fpos, data, len, m_svf.begin());
461  } else {
462  t_map::iterator iter = m_svf.upper_bound(fpos);
463  if (iter != m_svf.begin()) {
464  --iter;
465  }
466  if (iter->first > fpos) {
467  // Insert new block, possibly coalescing existing blocks.
468  // New comes earlier so either create a new block or copy existing block on to it.
469  if (iter->first <= fpos + len) {
470  // Need to coalesce
471  _write_new_append_old(fpos, data, len, iter);
472  } else {
473  // The new block precedes the old one
474  _write_new_block(fpos, data, len, iter);
475  }
476  } else {
477  // Existing block.first is <= fpos
478  if (fpos > _file_position_immediatly_after_block(iter)) {
479  // No overlap so just write new block after the last one
480  _write_new_block(fpos, data, len, m_svf.end());
481  } else {
482  // Append new to existing block, possibly coalescing existing blocks.
483  _write_append_new_to_old(fpos, data, len, iter);
484  }
485  }
486  }
487  // Update internals.
488  // NOTE: m_block_touch is incremented in one of the three actual write methods.
489  m_count_write += 1;
490  m_bytes_write += len;
491  m_time_write = std::chrono::system_clock::now();
493  }
494 
503  void SparseVirtualFile::read(t_fpos fpos, size_t len, char *p) {
504 #ifdef SVF_THREAD_SAFE
505  std::lock_guard<std::mutex> mutex(m_mutex);
506 #endif
508 
509  if (m_svf.empty()) {
511  "SparseVirtualFile::read(): Sparse virtual file is empty.");
512  }
513  t_map::iterator iter = m_svf.lower_bound(fpos);
514  if (iter == m_svf.begin() && iter->first != fpos) {
515  std::ostringstream os;
516  os << "SparseVirtualFile::read():";
517  os << " Requested file position " << fpos << " precedes first block at " << iter->first;
519  }
520  size_t offset_into_block = 0;
521  if (iter == m_svf.end() || iter->first != fpos) {
522  --iter;
523  offset_into_block = fpos - iter->first;
524  }
525  if (offset_into_block + len > iter->second.data.size()) {
526  std::ostringstream os;
527  os << "SparseVirtualFile::read():";
528  os << " Requested position " << fpos << " length " << len;
529  os << " (end " << fpos + len << ")";
530  os << " overruns block that starts at " << iter->first << " has size " << iter->second.data.size();
531  os << " (end " << iter->first + iter->second.data.size() << ").";
532  os << " Offset into block is " << offset_into_block;
533  os << " overrun is " << offset_into_block + len - iter->second.data.size() << " bytes";
535  }
536  if (memcpy(p, iter->second.data.data() + offset_into_block, len) != p) {
537  std::ostringstream os;
538  os << "SparseVirtualFile::read():";
539  os << " memcpy failed " << fpos << " length " << len;
541  }
542  // Adjust non-const members
543  iter->second.block_touch = m_block_touch++;
544  m_bytes_read += len;
545  m_count_read += 1;
546  m_time_read = std::chrono::system_clock::now();
547  }
548 
581  t_seek_reads SparseVirtualFile::need(t_fpos fpos, size_t len, size_t greedy_length) const noexcept {
582  SVF_ASSERT(integrity() == ERROR_NONE);
583 #ifdef SVF_THREAD_SAFE
584  std::lock_guard<std::mutex> mutex(m_mutex);
585 #endif
586  return _need_no_lock(fpos, len, greedy_length);
587  }
588 
589  t_seek_reads SparseVirtualFile::_need_no_lock(t_fpos fpos, size_t len, size_t greedy_length) const noexcept {
590  SVF_ASSERT(integrity() == ERROR_NONE);
591  if (m_svf.empty()) {
592  return {{fpos, greedy_length > len ? greedy_length : len}};
593  }
594  size_t original_len = len;
595  t_fpos fpos_to = fpos + len;
596  t_seek_reads ret;
597  t_map::const_iterator iter = m_svf.upper_bound(fpos);
598  if (iter == m_svf.begin()) {
599  if (fpos + len <= iter->first) {
600  // ^==|
601  // |+++|
602  ret.push_back({fpos, len});
603  fpos += len;
604  // Mark that we are done.
605  len = 0;
606  }
607  } else {
608  // Otherwise check the previous node with std::prev.
609  auto last_fpos = _file_position_immediatly_after_block(std::prev(iter));
610  if (fpos < last_fpos) {
611  // Example, change:
612  // |==| ^==|
613  // |++++++++++++|
614  // to:
615  // |+++++++++|
616  len -= std::min(len, last_fpos - fpos);
617  fpos = std::min(fpos_to, last_fpos);
618  }
619  }
620  // Now walk through the remaining nodes until we have exhausted the read.
621  while (len) {
622  if (iter == m_svf.end() || fpos + len <= iter->first) {
623  // Either:
624  // |==|
625  // |++++++++++++|
626  // or:
627  // |==|
628  // |+++++|
629  ret.emplace_back(fpos, len);
630  fpos += len;
631  len = 0;
632  break;
633  }
634  // We are in this state:
635  // ^=====|
636  // |++++++++++++++|
637  // or:
638  // ^=====|
639  // |++++++++|
640  if (fpos < iter->first) {
641  assert(len >= iter->first - fpos);
642  auto bytes_added = iter->first - fpos;
643  ret.emplace_back(fpos, bytes_added);
644  len -= bytes_added;
645  fpos += bytes_added;
646  }
647  // We are now in this state:
648  // ^=====|
649  // |+++++++|
650  // or:
651  // ^=====|
652  // |++++|
653  assert(fpos == iter->first);
654  if (fpos + len <= _file_position_immediatly_after_block(iter)) {
655  // ^======|
656  // |++++|
657  fpos += len;
658  len = 0;
659  break;
660  } else {
661  // ^======|
662  // |+++++++++|
663  fpos += iter->second.data.size();
664  len -= iter->second.data.size();
665  }
666  ++iter;
667  }
668  assert(fpos == fpos_to);
669  assert(len == 0);
670  if (greedy_length && greedy_length > original_len && !ret.empty()) {
671  ret = _minimise_seek_reads(ret, greedy_length);
672  }
673  return ret;
674  }
675 
707  t_seek_reads SparseVirtualFile::need_many(t_seek_reads &seek_reads, size_t greedy_length) const noexcept {
708  SVF_ASSERT(integrity() == ERROR_NONE);
709 #ifdef SVF_THREAD_SAFE
710  std::lock_guard<std::mutex> mutex(m_mutex);
711 #endif
712 
713  std::sort(seek_reads.begin(), seek_reads.end());
714  if (m_svf.empty()) {
715  return _minimise_seek_reads(seek_reads, greedy_length);
716  }
717  t_seek_reads ret;
718  for (const auto &iter_seek_read: seek_reads) {
719  for (const auto &iter_need: _need_no_lock(iter_seek_read.first, iter_seek_read.second, 0)) {
720  ret.emplace_back(iter_need);
721  }
722  }
723  return _minimise_seek_reads(ret, greedy_length);
724  }
725 
733  size_t SparseVirtualFile::_amount_to_read(t_seek_read iter, size_t greedy_length) noexcept {
734  return iter.second > greedy_length ? iter.second : greedy_length;
735  }
736 
747  SparseVirtualFile::_minimise_seek_reads(const t_seek_reads &seek_reads, size_t greedy_length) noexcept {
748 
749  t_seek_reads new_seek_reads;
750  for (const t_seek_read &seek_read: seek_reads) {
751  if (new_seek_reads.empty()) {
752  // Add the first greedy block
753  new_seek_reads.emplace_back(seek_read.first, _amount_to_read(seek_read, greedy_length));
754  } else {
755  // Compare with last new block
756  auto last_iter_of_new = new_seek_reads.end();
757  --last_iter_of_new;
758  if (seek_read.first > last_iter_of_new->first + last_iter_of_new->second) {
759  // Add a new greedy block
760  new_seek_reads.emplace_back(seek_read.first, _amount_to_read(seek_read, greedy_length));
761  } else if (seek_read.first + seek_read.second > last_iter_of_new->first + last_iter_of_new->second) {
762  // Extend last block
763  last_iter_of_new->second +=
764  (seek_read.first + seek_read.second) - (last_iter_of_new->first + last_iter_of_new->second);
765  } // Otherwise do nothing, it is covered by greedy.
766  }
767  }
768  return new_seek_reads;
769  }
770 
778 #ifdef SVF_THREAD_SAFE
779  std::lock_guard<std::mutex> mutex(m_mutex);
780 #endif
781 
782  t_seek_reads ret;
783  for (const auto &iter: m_svf) {
784  ret.emplace_back(iter.first, iter.second.data.size());
785  }
786  return ret;
787  }
788 
799 #ifdef SVF_THREAD_SAFE
800  std::lock_guard<std::mutex> mutex(m_mutex);
801 #endif
802 
803  if (m_svf.empty()) {
805  "SparseVirtualFile::block_size(): Sparse virtual file is empty.");
806  }
807  t_map::const_iterator iter = m_svf.find(fpos);
808  if (iter == m_svf.end()) {
809  std::ostringstream os;
810  os << "SparseVirtualFile::block_size():";
811  os << " Requested file position " << fpos << " is not at the start of a block";
813  }
814  return iter->second.data.size();
815  }
816 
822  size_t SparseVirtualFile::size_of() const noexcept {
824 #ifdef SVF_THREAD_SAFE
825  std::lock_guard<std::mutex> mutex(m_mutex);
826 #endif
827  size_t ret = sizeof(SparseVirtualFile);
828 
829  // Add heap referenced data sizes.
830  ret += m_id.size();
831  for (const auto &iter: m_svf) {
832  ret += sizeof(iter.first);
833  ret += sizeof(iter.second);
834  ret += iter.second.data.size();
835  }
836  return ret;
837  }
838 
846  void SparseVirtualFile::clear() noexcept {
848 #ifdef SVF_THREAD_SAFE
849  std::lock_guard<std::mutex> mutex(m_mutex);
850 #endif
851  // Maintain ID and constructor arguments.
853  for (auto &iter: m_svf) {
854  iter.second.data.assign(iter.second.data.size(), OVERWRITE_CHAR);
855  }
856  }
857  m_svf.clear();
858  m_bytes_total = 0;
859  m_count_write = 0;
860  m_count_read = 0;
861  m_bytes_write = 0;
862  m_bytes_read = 0;
863  m_time_write = std::chrono::time_point<std::chrono::system_clock>::min();
864  m_time_read = std::chrono::time_point<std::chrono::system_clock>::min();
865  m_block_touch = 0;
866  m_blocks_erased = 0;
867  m_bytes_erased = 0;
868  m_blocks_punted = 0;
869  m_bytes_punted = 0;
871  }
872 
883 
884  auto iter = m_svf.find(fpos);
885  if (iter == m_svf.end()) {
886  std::ostringstream os;
887  os << "SparseVirtualFile::erase():";
888  os << " Non-existent file position " << fpos << " at start of block.";
890  }
891  size_t ret = iter->second.data.size();
892  m_bytes_total -= ret;
893  m_svf.erase(iter);
894  m_blocks_erased++;
895  m_bytes_erased += ret;
896  return ret;
897  }
898 
909 #ifdef SVF_THREAD_SAFE
910  std::lock_guard<std::mutex> mutex(m_mutex);
911 #endif
912  return _erase_no_lock(fpos);
913  }
914 
929  SparseVirtualFile::integrity() const noexcept {
930  t_fpos prev_fpos = 0;
931  size_t prev_size = 0;
932  t_map::const_iterator iter = m_svf.begin();
933  size_t byte_count = 0;
934  std::set<t_block_touch> block_touches;
935 
936  while (iter != m_svf.end()) {
937  if (iter->second.data.empty()) {
938  return ERROR_EMPTY_BLOCK;
939  }
940  if (iter != m_svf.begin()) {
941  if (prev_fpos == iter->first && prev_size == iter->second.data.size()) {
942  return ERROR_DUPLICATE_BLOCK;
943  }
944  if (prev_fpos + prev_size == iter->first) {
945  return ERROR_ADJACENT_BLOCKS;
946  }
947  if (prev_fpos + prev_size > iter->first) {
948  return ERROR_BLOCKS_OVERLAP;
949  }
950  }
951  if (block_touches.find(iter->second.block_touch) != block_touches.end()) {
952  // Duplicate block_touches value
954  }
955  prev_fpos = iter->first;
956  prev_size = iter->second.data.size();
957  byte_count += prev_size;
958  ++iter;
959  }
960  if (byte_count != m_bytes_total) {
962  }
963  return ERROR_NONE;
964  }
965 
973  t_fpos
976 #ifdef SVF_THREAD_SAFE
977  std::lock_guard<std::mutex> mutex(m_mutex);
978 #endif
980  }
981 
991  t_block_touches ret;
992  for (const auto &iter: m_svf) {
993  // The block_touch should not be in the return value, yet.
994  assert(ret.find(iter.second.block_touch) == ret.end());
995  ret[iter.second.block_touch] = iter.first;
996  }
997  return ret;
998  }
999 
1007  [[nodiscard]] t_block_touches SparseVirtualFile::block_touches() const noexcept {
1009 #ifdef SVF_THREAD_SAFE
1010  std::lock_guard<std::mutex> mutex(m_mutex);
1011 #endif
1012  return _block_touches_no_lock();
1013  }
1014 
1023  size_t SparseVirtualFile::lru_punt(size_t cache_size_upper_bound) {
1025 #ifdef SVF_THREAD_SAFE
1026  std::lock_guard<std::mutex> mutex(m_mutex);
1027 #endif
1028  size_t ret = 0;
1029  if (m_svf.size() > 1 and m_bytes_total >= cache_size_upper_bound) {
1030  auto touch_fpos_map = _block_touches_no_lock();
1031  for (const auto &iter: touch_fpos_map) {
1032  if (m_svf.size() > 1 and m_bytes_total >= cache_size_upper_bound) {
1033  ret += _erase_no_lock(iter.second);
1034  m_blocks_punted++;
1035  } else {
1036  break;
1037  }
1038  }
1039  }
1040  m_bytes_punted += ret;
1041  return ret;
1042  }
1043 
1052  t_fpos
1054  if (m_svf.empty()) {
1055  return 0;
1056  } else {
1057  auto iter = m_svf.end();
1058  --iter;
1060  }
1061  }
1062 
1071  t_fpos
1072  SparseVirtualFile::_file_position_immediatly_after_block(t_map::const_iterator iter) const noexcept {
1073  // NOTE: do not SVF_ASSERT(integrity() == ERROR_NONE); as integrity() calls this so infinite recursion.
1074  assert(iter != m_svf.end());
1075 
1076  auto ret = iter->first + iter->second.data.size();
1077  return ret;
1078  }
1079 
1080 } // namespace SVFS
Might be thrown during a write operation where the data differs.
Definition: svf.h:224
Might be thrown during a erase operation where the file position is not at the exact beginning of a b...
Definition: svf.h:237
Might be thrown during a write operation where the data differs.
Definition: svf.h:231
Might be thrown during a write operation which fails.
Definition: svf.h:218
ERROR_CONDITION
Check result of internal integrity.
Definition: svf.h:510
@ ERROR_BLOCKS_OVERLAP
Blocks overlap.
Definition: svf.h:518
@ ERROR_ADJACENT_BLOCKS
Blocks are adjacent and have not been coalesced.
Definition: svf.h:516
@ ERROR_NONE
No error.
Definition: svf.h:512
@ ERROR_BYTE_COUNT_MISMATCH
Missmatch in byte count where the count of the bytes in all the blocks does not match m_bytes_total.
Definition: svf.h:520
@ ERROR_DUPLICATE_BLOCK
Duplicate blocks of the same length and at the same file positions.
Definition: svf.h:522
@ ERROR_EMPTY_BLOCK
A block is empty.
Definition: svf.h:514
@ ERROR_DUPLICATE_BLOCK_TOUCH
Two or more blocks have the same block touch value.
Definition: svf.h:524
t_fpos _file_position_immediatly_after_end() const noexcept
Returns the file position immediately after the last block.
Definition: svf.cpp:1053
t_seek_reads need_many(t_seek_reads &seek_reads, size_t greedy_length=0) const noexcept
Given many [(file position, lengths), ...] what data do I need that I don't yet have?
Definition: svf.cpp:707
size_t m_bytes_punted
The count of bytes that have been erased by punting.
Definition: svf.h:483
static t_seek_reads _minimise_seek_reads(const t_seek_reads &seek_reads, size_t greedy_length) noexcept
May reduce the list of file position/lengths by coalescing them if possible up to a limit greedy_leng...
Definition: svf.cpp:747
std::chrono::time_point< std::chrono::system_clock > m_time_write
Last access real-time timestamp for a write.
Definition: svf.h:457
size_t size_of() const noexcept
size_of() gives best guess of total memory usage.
Definition: svf.cpp:822
size_t m_bytes_total
Total number of bytes in this SVF.
Definition: svf.h:445
size_t m_bytes_read
Definition: svf.h:455
size_t m_blocks_punted
The count of blocks that have been erased by punting.
Definition: svf.h:481
size_t lru_punt(size_t cache_size_upper_bound)
Definition: svf.cpp:1023
void _write_append_new_to_old(t_fpos fpos, const char *new_data, size_t new_data_len, t_map::iterator base_block_iter)
From file position, write the new_data to the block identified by base_block_iter....
Definition: svf.cpp:312
tSparseVirtualFileConfig m_config
The SVF configuration.
Definition: svf.h:443
t_block_touches block_touches() const noexcept
Returns a std::map of latest touch value key and file position value.
Definition: svf.cpp:1007
size_t m_bytes_write
Definition: svf.h:452
void _throw_diff(t_fpos fpos, const char *data, t_map::const_iterator iter, size_t index_iter) const
Throws a ExceptionSparseVirtualFileDiff with an explanation of the data difference.
Definition: svf.cpp:85
size_t m_bytes_erased
The total count of bytes that have been erased either directly or by punting.
Definition: svf.h:479
size_t m_blocks_erased
The total count of blocks that have been erased either directly or by punting.
Definition: svf.h:477
SparseVirtualFile(const std::string &id, double mod_time, const tSparseVirtualFileConfig &config=tSparseVirtualFileConfig())
Create a Sparse Virtual File.
Definition: svf.h:297
size_t m_count_write
Access statistics: count of write operations.
Definition: svf.h:447
std::chrono::time_point< std::chrono::system_clock > m_time_read
Last access real-time timestamp for a read.
Definition: svf.h:459
void write(t_fpos fpos, const char *data, size_t len)
Write the data a the given file position.
Definition: svf.cpp:452
void clear() noexcept
Executes the data deletion strategy.
Definition: svf.cpp:846
t_fpos _file_position_immediatly_after_block(t_map::const_iterator iter) const noexcept
Returns the file position immediately after the particular block.
Definition: svf.cpp:1072
void _write_new_append_old(t_fpos fpos, const char *data, size_t len, t_map::iterator iter)
Write a new block and append existing blocks to it.
Definition: svf.cpp:178
t_block_touches _block_touches_no_lock() const noexcept
Returns a std::map of latest touch value key and file position value.
Definition: svf.cpp:989
static size_t _amount_to_read(t_seek_read iter, size_t greedy_length) noexcept
Returns the maximal length to read given a greedy length.
Definition: svf.cpp:733
size_t erase(t_fpos fpos)
Remove a particular block.
Definition: svf.cpp:907
bool has(t_fpos fpos, size_t len) const noexcept
Do I have the data at the given file position and length?
Definition: svf.cpp:56
size_t block_size(t_fpos fpos) const
The length of the block at a specific file position.
Definition: svf.cpp:797
size_t _erase_no_lock(t_fpos fpos)
Remove a particular block.
Definition: svf.cpp:881
void _write_new_block(t_fpos fpos, const char *data, size_t len, t_map::const_iterator hint)
Write a brand new block into either an empty SVF or beyond the current blocks.
Definition: svf.cpp:113
ERROR_CONDITION integrity() const noexcept
Internal integrity check.
Definition: svf.cpp:929
std::mutex m_mutex
Thread mutex. This adds about 5-10% execution time compared with a single threaded version.
Definition: svf.h:474
t_map m_svf
The actual SVF.
Definition: svf.h:469
t_fpos last_file_position() const noexcept
The position of the last byte.
Definition: svf.cpp:974
void read(t_fpos fpos, size_t len, char *p)
Read data and write to the buffer provided by the caller. This is non-const as it updates the non-con...
Definition: svf.cpp:503
t_seek_reads _need_no_lock(t_fpos fpos, size_t len, size_t greedy_length=0) const noexcept
Definition: svf.cpp:589
size_t m_count_read
Access statistics: count of read operations.
Definition: svf.h:449
t_seek_reads need(t_fpos fpos, size_t len, size_t greedy_length=0) const noexcept
Create a new fragmentation list of seek/read instructions.
Definition: svf.cpp:581
std::string m_id
The SVF ID.
Definition: svf.h:439
t_seek_reads blocks() const noexcept
The existing blocks as a list of (file_position, size) pairs.
Definition: svf.cpp:776
t_block_touch m_block_touch
A monotonically increasing integer that indicates the age of a block, smaller is older.
Definition: svf.h:471
The namespace for all svfsc code.
Definition: svf.cpp:41
static const char OVERWRITE_CHAR
Used to overwrite the memory before discarding it (if required).
Definition: svf.cpp:45
std::vector< t_seek_read > t_seek_reads
Definition: svf.h:251
std::map< t_block_touch, t_fpos > t_block_touches
Definition: svf.h:255
std::pair< t_fpos, size_t > t_seek_read
Definition: svf.h:249
size_t t_fpos
Definition: svf.h:247
Typedef for the data. This allows for extra per-block fields in the future.
Definition: svf.h:461
std::vector< char > data
Definition: svf.h:462
t_block_touch block_touch
Definition: svf.h:464
#define SVF_ASSERT(x)
Definition: svf.h:194