58 #ifdef SVF_THREAD_SAFE
59 std::lock_guard<std::mutex> mutex(m_mutex);
65 t_map::const_iterator iter = m_svf.upper_bound(fpos);
66 if (iter != m_svf.begin()) {
69 t_fpos fpos_end = _file_position_immediatly_after_block(iter);
70 if (fpos >= iter->first && (fpos + len) <= fpos_end) {
87 assert(iter !=
m_svf.end());
90 if (*data != iter->second.data[index_iter]) {
91 std::ostringstream os;
92 os <<
"SparseVirtualFile::write():";
93 os <<
" Difference at position " << fpos;
94 os <<
" '" << *(data) <<
"' != '" << iter->second.data[index_iter] <<
"'";
95 os <<
" Ordinal " <<
static_cast<int>(*data) <<
" != " <<
static_cast<int>(iter->second.data[index_iter]);
96 std::string str = os.str();
114 assert(
m_svf.count(fpos) == 0);
117 new_value.
data.reserve(len);
121 new_value.
data.insert(new_value.
data.end(), data, data + len);
124 auto size_before_insert =
m_svf.size();
125 m_svf.insert(hint, {fpos, std::move(new_value)});
127 if (
m_svf.size() != 1 + size_before_insert) {
128 std::ostringstream os;
129 os <<
"SparseVirtualFile::_write_new_block():";
130 os <<
" Unable to insert new block at " << fpos;
182 assert(iter !=
m_svf.end());
183 assert(iter->first > fpos);
185 size_t fpos_start = fpos;
186 size_t fpos_end = fpos + len;
191 while (len && fpos < iter->first) {
195 new_value.
data.push_back(*data);
201 size_t index_iter = 0;
202 size_t delta = std::min(len, iter->second.data.size());
207 if (std::memcmp(iter->second.data.data(), data, delta) != 0) {
211 for (
size_t i = 0; i < delta; ++i) {
212 new_value.
data.push_back(*data);
224 while (index_iter < iter->second.data.size()) {
225 new_value.
data.push_back(iter->second.data[index_iter]);
229 iter->second.data.assign(iter->second.data.size(),
OVERWRITE_CHAR);
236 iter->second.data.assign(iter->second.data.size(),
OVERWRITE_CHAR);
238 iter =
m_svf.erase(iter);
239 if (iter ==
m_svf.end() || iter->first > fpos + len) {
242 new_value.
data.push_back(*data);
251 auto size_before_insert =
m_svf.size();
252 m_svf.insert({fpos_start, std::move(new_value)});
253 if (
m_svf.size() != 1 + size_before_insert) {
254 std::ostringstream os;
255 os <<
"SparseVirtualFile::write():";
256 os <<
" Unable to insert new block at " << fpos_start;
260 assert(fpos == fpos_end);
313 t_map::iterator base_block_iter) {
316 assert(new_data_len > 0);
317 assert(base_block_iter !=
m_svf.end());
318 assert(fpos >= base_block_iter->first);
322 size_t fpos_end = fpos + new_data_len;
328 size_t write_index_from_block_start = fpos - base_block_iter->first;
330 size_t len_check_or_copy = std::min(new_data_len,
331 base_block_iter->second.data.size() - write_index_from_block_start);
333 if (std::memcmp(base_block_iter->second.data.data() + write_index_from_block_start, new_data,
334 len_check_or_copy) != 0) {
335 _throw_diff(fpos, new_data, base_block_iter, write_index_from_block_start);
338 new_data += len_check_or_copy;
339 fpos += len_check_or_copy;
340 new_data_len -= len_check_or_copy;
341 t_map::iterator next_block_iter = std::next(base_block_iter);
342 while (new_data_len) {
343 if (next_block_iter ==
m_svf.end()) {
345 while (new_data_len) {
346 base_block_iter->second.data.push_back(*new_data);
355 while (new_data_len && fpos < next_block_iter->first) {
356 base_block_iter->second.data.push_back(*new_data);
363 if (new_data_len == 0 and fpos < next_block_iter->first) {
368 write_index_from_block_start = 0;
372 if (len_check_or_copy) {
374 if (std::memcmp(next_block_iter->second.data.data(), new_data, len_check_or_copy) != 0) {
379 for (
size_t i = 0; i < len_check_or_copy; ++i) {
380 base_block_iter->second.data.push_back(*new_data);
384 ++write_index_from_block_start;
390 if (new_data_len == 0) {
391 while (write_index_from_block_start < next_block_iter->second.data.size()) {
392 base_block_iter->second.data.push_back(next_block_iter->second.data[write_index_from_block_start]);
393 ++write_index_from_block_start;
398 next_block_iter->second.data.assign(next_block_iter->second.data.size(),
OVERWRITE_CHAR);
400 next_block_iter =
m_svf.erase(next_block_iter);
403 assert(new_data_len == 0);
405 assert(fpos == fpos_end);
454 #ifdef SVF_THREAD_SAFE
455 std::lock_guard<std::mutex> mutex(
m_mutex);
462 t_map::iterator iter =
m_svf.upper_bound(fpos);
463 if (iter !=
m_svf.begin()) {
466 if (iter->first > fpos) {
469 if (iter->first <= fpos + len) {
504 #ifdef SVF_THREAD_SAFE
505 std::lock_guard<std::mutex> mutex(
m_mutex);
511 "SparseVirtualFile::read(): Sparse virtual file is empty.");
513 t_map::iterator iter =
m_svf.lower_bound(fpos);
514 if (iter ==
m_svf.begin() && iter->first != fpos) {
515 std::ostringstream os;
516 os <<
"SparseVirtualFile::read():";
517 os <<
" Requested file position " << fpos <<
" precedes first block at " << iter->first;
520 size_t offset_into_block = 0;
521 if (iter ==
m_svf.end() || iter->first != fpos) {
523 offset_into_block = fpos - iter->first;
525 if (offset_into_block + len > iter->second.data.size()) {
526 std::ostringstream os;
527 os <<
"SparseVirtualFile::read():";
528 os <<
" Requested position " << fpos <<
" length " << len;
529 os <<
" (end " << fpos + len <<
")";
530 os <<
" overruns block that starts at " << iter->first <<
" has size " << iter->second.data.size();
531 os <<
" (end " << iter->first + iter->second.data.size() <<
").";
532 os <<
" Offset into block is " << offset_into_block;
533 os <<
" overrun is " << offset_into_block + len - iter->second.data.size() <<
" bytes";
536 if (memcpy(p, iter->second.data.data() + offset_into_block, len) != p) {
537 std::ostringstream os;
538 os <<
"SparseVirtualFile::read():";
539 os <<
" memcpy failed " << fpos <<
" length " << len;
583 #ifdef SVF_THREAD_SAFE
584 std::lock_guard<std::mutex> mutex(m_mutex);
586 return _need_no_lock(fpos, len, greedy_length);
592 return {{fpos, greedy_length > len ? greedy_length : len}};
594 size_t original_len = len;
595 t_fpos fpos_to = fpos + len;
597 t_map::const_iterator iter = m_svf.upper_bound(fpos);
598 if (iter == m_svf.begin()) {
599 if (fpos + len <= iter->first) {
602 ret.push_back({fpos, len});
609 auto last_fpos = _file_position_immediatly_after_block(std::prev(iter));
610 if (fpos < last_fpos) {
616 len -= std::min(len, last_fpos - fpos);
617 fpos = std::min(fpos_to, last_fpos);
622 if (iter == m_svf.end() || fpos + len <= iter->first) {
629 ret.emplace_back(fpos, len);
640 if (fpos < iter->first) {
641 assert(len >= iter->first - fpos);
642 auto bytes_added = iter->first - fpos;
643 ret.emplace_back(fpos, bytes_added);
653 assert(fpos == iter->first);
654 if (fpos + len <= _file_position_immediatly_after_block(iter)) {
663 fpos += iter->second.data.size();
664 len -= iter->second.data.size();
668 assert(fpos == fpos_to);
670 if (greedy_length && greedy_length > original_len && !ret.empty()) {
671 ret = _minimise_seek_reads(ret, greedy_length);
709 #ifdef SVF_THREAD_SAFE
710 std::lock_guard<std::mutex> mutex(m_mutex);
713 std::sort(seek_reads.begin(), seek_reads.end());
715 return _minimise_seek_reads(seek_reads, greedy_length);
718 for (
const auto &iter_seek_read: seek_reads) {
719 for (
const auto &iter_need: _need_no_lock(iter_seek_read.first, iter_seek_read.second, 0)) {
720 ret.emplace_back(iter_need);
723 return _minimise_seek_reads(ret, greedy_length);
734 return iter.second > greedy_length ? iter.second : greedy_length;
751 if (new_seek_reads.empty()) {
753 new_seek_reads.emplace_back(seek_read.first, _amount_to_read(seek_read, greedy_length));
756 auto last_iter_of_new = new_seek_reads.end();
758 if (seek_read.first > last_iter_of_new->first + last_iter_of_new->second) {
760 new_seek_reads.emplace_back(seek_read.first, _amount_to_read(seek_read, greedy_length));
761 }
else if (seek_read.first + seek_read.second > last_iter_of_new->first + last_iter_of_new->second) {
763 last_iter_of_new->second +=
764 (seek_read.first + seek_read.second) - (last_iter_of_new->first + last_iter_of_new->second);
768 return new_seek_reads;
778 #ifdef SVF_THREAD_SAFE
779 std::lock_guard<std::mutex> mutex(
m_mutex);
783 for (
const auto &iter:
m_svf) {
784 ret.emplace_back(iter.first, iter.second.data.size());
799 #ifdef SVF_THREAD_SAFE
800 std::lock_guard<std::mutex> mutex(
m_mutex);
805 "SparseVirtualFile::block_size(): Sparse virtual file is empty.");
807 t_map::const_iterator iter =
m_svf.find(fpos);
808 if (iter ==
m_svf.end()) {
809 std::ostringstream os;
810 os <<
"SparseVirtualFile::block_size():";
811 os <<
" Requested file position " << fpos <<
" is not at the start of a block";
814 return iter->second.data.size();
824 #ifdef SVF_THREAD_SAFE
825 std::lock_guard<std::mutex> mutex(
m_mutex);
831 for (
const auto &iter:
m_svf) {
832 ret +=
sizeof(iter.first);
833 ret +=
sizeof(iter.second);
834 ret += iter.second.data.size();
848 #ifdef SVF_THREAD_SAFE
849 std::lock_guard<std::mutex> mutex(
m_mutex);
853 for (
auto &iter:
m_svf) {
863 m_time_write = std::chrono::time_point<std::chrono::system_clock>::min();
864 m_time_read = std::chrono::time_point<std::chrono::system_clock>::min();
884 auto iter =
m_svf.find(fpos);
885 if (iter ==
m_svf.end()) {
886 std::ostringstream os;
887 os <<
"SparseVirtualFile::erase():";
888 os <<
" Non-existent file position " << fpos <<
" at start of block.";
891 size_t ret = iter->second.data.size();
909 #ifdef SVF_THREAD_SAFE
910 std::lock_guard<std::mutex> mutex(
m_mutex);
931 size_t prev_size = 0;
932 t_map::const_iterator iter =
m_svf.begin();
933 size_t byte_count = 0;
936 while (iter !=
m_svf.end()) {
937 if (iter->second.data.empty()) {
940 if (iter !=
m_svf.begin()) {
941 if (prev_fpos == iter->first && prev_size == iter->second.data.size()) {
944 if (prev_fpos + prev_size == iter->first) {
947 if (prev_fpos + prev_size > iter->first) {
955 prev_fpos = iter->first;
956 prev_size = iter->second.data.size();
957 byte_count += prev_size;
976 #ifdef SVF_THREAD_SAFE
977 std::lock_guard<std::mutex> mutex(
m_mutex);
992 for (
const auto &iter:
m_svf) {
994 assert(ret.find(iter.second.block_touch) == ret.end());
995 ret[iter.second.block_touch] = iter.first;
1009 #ifdef SVF_THREAD_SAFE
1010 std::lock_guard<std::mutex> mutex(
m_mutex);
1025 #ifdef SVF_THREAD_SAFE
1026 std::lock_guard<std::mutex> mutex(
m_mutex);
1031 for (
const auto &iter: touch_fpos_map) {
1054 if (
m_svf.empty()) {
1057 auto iter =
m_svf.end();
1074 assert(iter != m_svf.end());
1076 auto ret = iter->first + iter->second.data.size();
Might be thrown during a write operation where the data differs.
Might be thrown during a erase operation where the file position is not at the exact beginning of a b...
Might be thrown during a write operation where the data differs.
Might be thrown during a write operation which fails.
ERROR_CONDITION
Check result of internal integrity.
@ ERROR_BLOCKS_OVERLAP
Blocks overlap.
@ ERROR_ADJACENT_BLOCKS
Blocks are adjacent and have not been coalesced.
@ ERROR_BYTE_COUNT_MISMATCH
Missmatch in byte count where the count of the bytes in all the blocks does not match m_bytes_total.
@ ERROR_DUPLICATE_BLOCK
Duplicate blocks of the same length and at the same file positions.
@ ERROR_EMPTY_BLOCK
A block is empty.
@ ERROR_DUPLICATE_BLOCK_TOUCH
Two or more blocks have the same block touch value.
t_fpos _file_position_immediatly_after_end() const noexcept
Returns the file position immediately after the last block.
t_seek_reads need_many(t_seek_reads &seek_reads, size_t greedy_length=0) const noexcept
Given many [(file position, lengths), ...] what data do I need that I don't yet have?
size_t m_bytes_punted
The count of bytes that have been erased by punting.
static t_seek_reads _minimise_seek_reads(const t_seek_reads &seek_reads, size_t greedy_length) noexcept
May reduce the list of file position/lengths by coalescing them if possible up to a limit greedy_leng...
std::chrono::time_point< std::chrono::system_clock > m_time_write
Last access real-time timestamp for a write.
size_t size_of() const noexcept
size_of() gives best guess of total memory usage.
size_t m_bytes_total
Total number of bytes in this SVF.
size_t m_blocks_punted
The count of blocks that have been erased by punting.
size_t lru_punt(size_t cache_size_upper_bound)
void _write_append_new_to_old(t_fpos fpos, const char *new_data, size_t new_data_len, t_map::iterator base_block_iter)
From file position, write the new_data to the block identified by base_block_iter....
tSparseVirtualFileConfig m_config
The SVF configuration.
t_block_touches block_touches() const noexcept
Returns a std::map of latest touch value key and file position value.
void _throw_diff(t_fpos fpos, const char *data, t_map::const_iterator iter, size_t index_iter) const
Throws a ExceptionSparseVirtualFileDiff with an explanation of the data difference.
size_t m_bytes_erased
The total count of bytes that have been erased either directly or by punting.
size_t m_blocks_erased
The total count of blocks that have been erased either directly or by punting.
SparseVirtualFile(const std::string &id, double mod_time, const tSparseVirtualFileConfig &config=tSparseVirtualFileConfig())
Create a Sparse Virtual File.
size_t m_count_write
Access statistics: count of write operations.
std::chrono::time_point< std::chrono::system_clock > m_time_read
Last access real-time timestamp for a read.
void write(t_fpos fpos, const char *data, size_t len)
Write the data a the given file position.
void clear() noexcept
Executes the data deletion strategy.
t_fpos _file_position_immediatly_after_block(t_map::const_iterator iter) const noexcept
Returns the file position immediately after the particular block.
void _write_new_append_old(t_fpos fpos, const char *data, size_t len, t_map::iterator iter)
Write a new block and append existing blocks to it.
t_block_touches _block_touches_no_lock() const noexcept
Returns a std::map of latest touch value key and file position value.
static size_t _amount_to_read(t_seek_read iter, size_t greedy_length) noexcept
Returns the maximal length to read given a greedy length.
size_t erase(t_fpos fpos)
Remove a particular block.
bool has(t_fpos fpos, size_t len) const noexcept
Do I have the data at the given file position and length?
size_t block_size(t_fpos fpos) const
The length of the block at a specific file position.
size_t _erase_no_lock(t_fpos fpos)
Remove a particular block.
void _write_new_block(t_fpos fpos, const char *data, size_t len, t_map::const_iterator hint)
Write a brand new block into either an empty SVF or beyond the current blocks.
ERROR_CONDITION integrity() const noexcept
Internal integrity check.
std::mutex m_mutex
Thread mutex. This adds about 5-10% execution time compared with a single threaded version.
t_map m_svf
The actual SVF.
t_fpos last_file_position() const noexcept
The position of the last byte.
void read(t_fpos fpos, size_t len, char *p)
Read data and write to the buffer provided by the caller. This is non-const as it updates the non-con...
t_seek_reads _need_no_lock(t_fpos fpos, size_t len, size_t greedy_length=0) const noexcept
size_t m_count_read
Access statistics: count of read operations.
t_seek_reads need(t_fpos fpos, size_t len, size_t greedy_length=0) const noexcept
Create a new fragmentation list of seek/read instructions.
std::string m_id
The SVF ID.
t_seek_reads blocks() const noexcept
The existing blocks as a list of (file_position, size) pairs.
t_block_touch m_block_touch
A monotonically increasing integer that indicates the age of a block, smaller is older.
The namespace for all svfsc code.
static const char OVERWRITE_CHAR
Used to overwrite the memory before discarding it (if required).
std::vector< t_seek_read > t_seek_reads
std::map< t_block_touch, t_fpos > t_block_touches
std::pair< t_fpos, size_t > t_seek_read
Typedef for the data. This allows for extra per-block fields in the future.
t_block_touch block_touch