58 #ifdef SVF_THREAD_SAFE
59 std::lock_guard<std::mutex> mutex(m_mutex);
65 t_map::const_iterator iter = m_svf.upper_bound(fpos);
66 if (iter != m_svf.begin()) {
69 t_fpos fpos_end = _file_position_immediatly_after_block(iter);
70 if (fpos >= iter->first && (fpos + len) <= fpos_end) {
87 assert(iter !=
m_svf.end());
90 if (*data != iter->second.data[index_iter]) {
91 std::ostringstream os;
92 os <<
"SparseVirtualFile::write():";
93 os <<
" Difference at position " << fpos;
94 os <<
" '" << *(data) <<
"' != '" << iter->second.data[index_iter] <<
"'";
95 os <<
" Ordinal " <<
static_cast<int>(*data) <<
" != " <<
static_cast<int>(iter->second.data[index_iter]);
96 std::string str = os.str();
114 assert(
m_svf.count(fpos) == 0);
117 new_value.
data.reserve(len);
120 new_value.
data.push_back(*data);
125 auto size_before_insert =
m_svf.size();
126 m_svf.insert(hint, {fpos, std::move(new_value)});
128 if (
m_svf.size() != 1 + size_before_insert) {
129 std::ostringstream os;
130 os <<
"SparseVirtualFile::_write_new_block():";
131 os <<
" Unable to insert new block at " << fpos;
183 assert(iter !=
m_svf.end());
184 assert(iter->first > fpos);
186 size_t fpos_start = fpos;
187 size_t fpos_end = fpos + len;
192 while (len && fpos < iter->first) {
196 new_value.
data.push_back(*data);
202 size_t index_iter = 0;
203 size_t delta = std::min(len, iter->second.data.size());
208 if (std::memcmp(iter->second.data.data(), data, delta) != 0) {
212 for (
size_t i = 0; i < delta; ++i) {
213 new_value.
data.push_back(*data);
225 while (index_iter < iter->second.data.size()) {
226 new_value.
data.push_back(iter->second.data[index_iter]);
230 iter->second.data.assign(iter->second.data.size(),
OVERWRITE_CHAR);
237 iter->second.data.assign(iter->second.data.size(),
OVERWRITE_CHAR);
239 iter =
m_svf.erase(iter);
240 if (iter ==
m_svf.end() || iter->first > fpos + len) {
243 new_value.
data.push_back(*data);
252 auto size_before_insert =
m_svf.size();
253 m_svf.insert({fpos_start, std::move(new_value)});
254 if (
m_svf.size() != 1 + size_before_insert) {
255 std::ostringstream os;
256 os <<
"SparseVirtualFile::write():";
257 os <<
" Unable to insert new block at " << fpos_start;
261 assert(fpos == fpos_end);
314 t_map::iterator base_block_iter) {
317 assert(new_data_len > 0);
318 assert(base_block_iter !=
m_svf.end());
319 assert(fpos >= base_block_iter->first);
323 size_t fpos_end = fpos + new_data_len;
329 size_t write_index_from_block_start = fpos - base_block_iter->first;
331 size_t len_check_or_copy = std::min(new_data_len,
332 base_block_iter->second.data.size() - write_index_from_block_start);
334 if (std::memcmp(base_block_iter->second.data.data() + write_index_from_block_start, new_data,
335 len_check_or_copy) != 0) {
336 _throw_diff(fpos, new_data, base_block_iter, write_index_from_block_start);
339 new_data += len_check_or_copy;
340 fpos += len_check_or_copy;
341 new_data_len -= len_check_or_copy;
342 t_map::iterator next_block_iter = std::next(base_block_iter);
343 while (new_data_len) {
344 if (next_block_iter ==
m_svf.end()) {
346 while (new_data_len) {
347 base_block_iter->second.data.push_back(*new_data);
356 while (new_data_len && fpos < next_block_iter->first) {
357 base_block_iter->second.data.push_back(*new_data);
364 if (new_data_len == 0 and fpos < next_block_iter->first) {
369 write_index_from_block_start = 0;
373 if (len_check_or_copy) {
375 if (std::memcmp(next_block_iter->second.data.data(), new_data, len_check_or_copy) != 0) {
380 for (
size_t i = 0; i < len_check_or_copy; ++i) {
381 base_block_iter->second.data.push_back(*new_data);
385 ++write_index_from_block_start;
391 if (new_data_len == 0) {
392 while (write_index_from_block_start < next_block_iter->second.data.size()) {
393 base_block_iter->second.data.push_back(next_block_iter->second.data[write_index_from_block_start]);
394 ++write_index_from_block_start;
399 next_block_iter->second.data.assign(next_block_iter->second.data.size(),
OVERWRITE_CHAR);
401 next_block_iter =
m_svf.erase(next_block_iter);
404 assert(new_data_len == 0);
406 assert(fpos == fpos_end);
455 #ifdef SVF_THREAD_SAFE
456 std::lock_guard<std::mutex> mutex(
m_mutex);
463 t_map::iterator iter =
m_svf.upper_bound(fpos);
464 if (iter !=
m_svf.begin()) {
467 if (iter->first > fpos) {
470 if (iter->first <= fpos + len) {
505 #ifdef SVF_THREAD_SAFE
506 std::lock_guard<std::mutex> mutex(
m_mutex);
512 "SparseVirtualFile::read(): Sparse virtual file is empty.");
514 t_map::iterator iter =
m_svf.lower_bound(fpos);
515 if (iter ==
m_svf.begin() && iter->first != fpos) {
516 std::ostringstream os;
517 os <<
"SparseVirtualFile::read():";
518 os <<
" Requested file position " << fpos <<
" precedes first block at " << iter->first;
521 size_t offset_into_block = 0;
522 if (iter ==
m_svf.end() || iter->first != fpos) {
524 offset_into_block = fpos - iter->first;
526 if (offset_into_block + len > iter->second.data.size()) {
527 std::ostringstream os;
528 os <<
"SparseVirtualFile::read():";
529 os <<
" Requested position " << fpos <<
" length " << len;
530 os <<
" (end " << fpos + len <<
")";
531 os <<
" overruns block that starts at " << iter->first <<
" has size " << iter->second.data.size();
532 os <<
" (end " << iter->first + iter->second.data.size() <<
").";
533 os <<
" Offset into block is " << offset_into_block;
534 os <<
" overrun is " << offset_into_block + len - iter->second.data.size() <<
" bytes";
537 if (memcpy(p, iter->second.data.data() + offset_into_block, len) != p) {
538 std::ostringstream os;
539 os <<
"SparseVirtualFile::read():";
540 os <<
" memcpy failed " << fpos <<
" length " << len;
584 #ifdef SVF_THREAD_SAFE
585 std::lock_guard<std::mutex> mutex(m_mutex);
587 return _need_no_lock(fpos, len, greedy_length);
593 return {{fpos, greedy_length > len ? greedy_length : len}};
595 size_t original_len = len;
596 t_fpos fpos_to = fpos + len;
598 t_map::const_iterator iter = m_svf.upper_bound(fpos);
599 if (iter == m_svf.begin()) {
600 if (fpos + len <= iter->first) {
603 ret.push_back({fpos, len});
610 auto last_fpos = _file_position_immediatly_after_block(std::prev(iter));
611 if (fpos < last_fpos) {
617 len -= std::min(len, last_fpos - fpos);
618 fpos = std::min(fpos_to, last_fpos);
623 if (iter == m_svf.end() || fpos + len <= iter->first) {
630 ret.emplace_back(fpos, len);
641 if (fpos < iter->first) {
642 assert(len >= iter->first - fpos);
643 auto bytes_added = iter->first - fpos;
644 ret.emplace_back(fpos, bytes_added);
654 assert(fpos == iter->first);
655 if (fpos + len <= _file_position_immediatly_after_block(iter)) {
664 fpos += iter->second.data.size();
665 len -= iter->second.data.size();
669 assert(fpos == fpos_to);
671 if (greedy_length && greedy_length > original_len && !ret.empty()) {
672 ret = _minimise_seek_reads(ret, greedy_length);
710 #ifdef SVF_THREAD_SAFE
711 std::lock_guard<std::mutex> mutex(m_mutex);
714 std::sort(seek_reads.begin(), seek_reads.end());
716 return _minimise_seek_reads(seek_reads, greedy_length);
719 for (
const auto &iter_seek_read: seek_reads) {
720 for (
const auto &iter_need: _need_no_lock(iter_seek_read.first, iter_seek_read.second, 0)) {
721 ret.emplace_back(iter_need);
724 return _minimise_seek_reads(ret, greedy_length);
735 return iter.second > greedy_length ? iter.second : greedy_length;
752 if (new_seek_reads.empty()) {
754 new_seek_reads.emplace_back(seek_read.first, _amount_to_read(seek_read, greedy_length));
757 auto last_iter_of_new = new_seek_reads.end();
759 if (seek_read.first > last_iter_of_new->first + last_iter_of_new->second) {
761 new_seek_reads.emplace_back(seek_read.first, _amount_to_read(seek_read, greedy_length));
762 }
else if (seek_read.first + seek_read.second > last_iter_of_new->first + last_iter_of_new->second) {
764 last_iter_of_new->second +=
765 (seek_read.first + seek_read.second) - (last_iter_of_new->first + last_iter_of_new->second);
769 return new_seek_reads;
779 #ifdef SVF_THREAD_SAFE
780 std::lock_guard<std::mutex> mutex(
m_mutex);
784 for (
const auto &iter:
m_svf) {
785 ret.emplace_back(iter.first, iter.second.data.size());
800 #ifdef SVF_THREAD_SAFE
801 std::lock_guard<std::mutex> mutex(
m_mutex);
806 "SparseVirtualFile::block_size(): Sparse virtual file is empty.");
808 t_map::const_iterator iter =
m_svf.find(fpos);
809 if (iter ==
m_svf.end()) {
810 std::ostringstream os;
811 os <<
"SparseVirtualFile::block_size():";
812 os <<
" Requested file position " << fpos <<
" is not at the start of a block";
815 return iter->second.data.size();
825 #ifdef SVF_THREAD_SAFE
826 std::lock_guard<std::mutex> mutex(
m_mutex);
832 for (
const auto &iter:
m_svf) {
833 ret +=
sizeof(iter.first);
834 ret +=
sizeof(iter.second);
835 ret += iter.second.data.size();
851 #ifdef SVF_THREAD_SAFE
852 std::lock_guard<std::mutex> mutex(
m_mutex);
858 for (
auto &iter:
m_svf) {
882 auto iter =
m_svf.find(fpos);
883 if (iter ==
m_svf.end()) {
884 std::ostringstream os;
885 os <<
"SparseVirtualFile::erase():";
886 os <<
" Non-existent file position " << fpos <<
" at start of block.";
889 size_t ret = iter->second.data.size();
907 #ifdef SVF_THREAD_SAFE
908 std::lock_guard<std::mutex> mutex(
m_mutex);
929 size_t prev_size = 0;
930 t_map::const_iterator iter =
m_svf.begin();
931 size_t byte_count = 0;
934 while (iter !=
m_svf.end()) {
935 if (iter->second.data.empty()) {
938 if (iter !=
m_svf.begin()) {
939 if (prev_fpos == iter->first && prev_size == iter->second.data.size()) {
942 if (prev_fpos + prev_size == iter->first) {
945 if (prev_fpos + prev_size > iter->first) {
953 prev_fpos = iter->first;
954 prev_size = iter->second.data.size();
955 byte_count += prev_size;
974 #ifdef SVF_THREAD_SAFE
975 std::lock_guard<std::mutex> mutex(
m_mutex);
990 for (
const auto &iter:
m_svf) {
992 assert(ret.find(iter.second.block_touch) == ret.end());
993 ret[iter.second.block_touch] = iter.first;
1007 #ifdef SVF_THREAD_SAFE
1008 std::lock_guard<std::mutex> mutex(
m_mutex);
1023 #ifdef SVF_THREAD_SAFE
1024 std::lock_guard<std::mutex> mutex(
m_mutex);
1029 for (
const auto &iter: touch_fpos_map) {
1052 if (
m_svf.empty()) {
1055 auto iter =
m_svf.end();
1072 assert(iter != m_svf.end());
1074 auto ret = iter->first + iter->second.data.size();
Might be thrown during a write operation where the data differs.
Might be thrown during a erase operation where the file position is not at the exact beginning of a b...
Might be thrown during a write operation where the data differs.
Might be thrown during a write operation which fails.
ERROR_CONDITION
Check result of internal integrity.
@ ERROR_BLOCKS_OVERLAP
Blocks overlap.
@ ERROR_ADJACENT_BLOCKS
Blocks are adjacent and have not been coalesced.
@ ERROR_BYTE_COUNT_MISMATCH
Missmatch in byte count where the count of the bytes in all the blocks does not match m_bytes_total.
@ ERROR_DUPLICATE_BLOCK
Duplicate blocks of the same length and at the same file positions.
@ ERROR_EMPTY_BLOCK
A block is empty.
@ ERROR_DUPLICATE_BLOCK_TOUCH
Two or more blocks have the same block touch value.
t_fpos _file_position_immediatly_after_end() const noexcept
Returns the file position immediately after the last block.
t_seek_reads need_many(t_seek_reads &seek_reads, size_t greedy_length=0) const noexcept
Given many [(file position, lengths), ...] what data do I need that I don't yet have?
size_t m_bytes_punted
The count of bytes that have been erased by punting.
static t_seek_reads _minimise_seek_reads(const t_seek_reads &seek_reads, size_t greedy_length) noexcept
May reduce the list of file position/lengths by coalescing them if possible up to a limit greedy_leng...
std::chrono::time_point< std::chrono::system_clock > m_time_write
Last access real-time timestamp for a write.
size_t size_of() const noexcept
size_of() gives best guess of total memory usage.
size_t m_bytes_total
Total number of bytes in this SVF.
size_t m_blocks_punted
The count of blocks that have been erased by punting.
size_t lru_punt(size_t cache_size_upper_bound)
void _write_append_new_to_old(t_fpos fpos, const char *new_data, size_t new_data_len, t_map::iterator base_block_iter)
From file position, write the new_data to the block identified by base_block_iter....
tSparseVirtualFileConfig m_config
The SVF configuration.
t_block_touches block_touches() const noexcept
Returns a std::map of latest touch value key and file position value.
void _throw_diff(t_fpos fpos, const char *data, t_map::const_iterator iter, size_t index_iter) const
Throws a ExceptionSparseVirtualFileDiff with an explanation of the data difference.
size_t m_bytes_erased
The total count of bytes that have been erased either directly or by punting.
size_t m_blocks_erased
The total count of blocks that have been erased either directly or by punting.
SparseVirtualFile(const std::string &id, double mod_time, const tSparseVirtualFileConfig &config=tSparseVirtualFileConfig())
Create a Sparse Virtual File.
size_t m_count_write
Access statistics: count of write operations.
std::chrono::time_point< std::chrono::system_clock > m_time_read
Last access real-time timestamp for a read.
void write(t_fpos fpos, const char *data, size_t len)
Write the data a the given file position.
void clear() noexcept
Executes the data deletion strategy.
t_fpos _file_position_immediatly_after_block(t_map::const_iterator iter) const noexcept
Returns the file position immediately after the particular block.
void _write_new_append_old(t_fpos fpos, const char *data, size_t len, t_map::iterator iter)
Write a new block and append existing blocks to it.
t_block_touches _block_touches_no_lock() const noexcept
Returns a std::map of latest touch value key and file position value.
static size_t _amount_to_read(t_seek_read iter, size_t greedy_length) noexcept
Returns the maximal length to read given a greedy length.
size_t erase(t_fpos fpos)
Remove a particular block.
bool has(t_fpos fpos, size_t len) const noexcept
Do I have the data at the given file position and length?
size_t block_size(t_fpos fpos) const
The length of the block at a specific file position.
size_t _erase_no_lock(t_fpos fpos)
Remove a particular block.
void _write_new_block(t_fpos fpos, const char *data, size_t len, t_map::const_iterator hint)
Write a brand new block into either an empty SVF or beyond the current blocks.
ERROR_CONDITION integrity() const noexcept
Internal integrity check.
std::mutex m_mutex
Thread mutex. This adds about 5-10% execution time compared with a single threaded version.
t_map m_svf
The actual SVF.
t_fpos last_file_position() const noexcept
The position of the last byte.
void read(t_fpos fpos, size_t len, char *p)
Read data and write to the buffer provided by the caller. This is non-const as it updates the non-con...
t_seek_reads _need_no_lock(t_fpos fpos, size_t len, size_t greedy_length=0) const noexcept
size_t m_count_read
Access statistics: count of read operations.
t_seek_reads need(t_fpos fpos, size_t len, size_t greedy_length=0) const noexcept
Create a new fragmentation list of seek/read instructions.
std::string m_id
The SVF ID.
t_seek_reads blocks() const noexcept
The existing blocks as a list of (file_position, size) pairs.
t_block_touch m_block_touch
A monotonically increasing integer that indicates the age of a block, smaller is older.
The namespace for all svfsc code.
static const char OVERWRITE_CHAR
Used to overwrite the memory before discarding it (if required).
std::vector< t_seek_read > t_seek_reads
std::map< t_block_touch, t_fpos > t_block_touches
std::pair< t_fpos, size_t > t_seek_read
Typedef for the data. This allows for extra per-block fields in the future.
t_block_touch block_touch