More efficiently schedule sync runs for files that need delayed sync runs

Signed-off-by: Claudio Cambra <claudio.cambra@gmail.com>
This commit is contained in:
Claudio Cambra 2022-09-23 02:33:15 +02:00 committed by Matthieu Gallien
parent fd332a52e2
commit bbbfe3f1c4
5 changed files with 229 additions and 12 deletions

View File

@ -1379,9 +1379,9 @@ bool SyncJournalDb::updateLocalMetadata(const QString &filename,
query->bindValue(3, modtime);
query->bindValue(4, size);
query->bindValue(5, lockInfo._locked ? 1 : 0);
query->bindValue(6, lockInfo._lockOwnerDisplayName);
query->bindValue(7, lockInfo._lockOwnerId);
query->bindValue(8, lockInfo._lockOwnerType);
query->bindValue(6, lockInfo._lockOwnerType);
query->bindValue(7, lockInfo._lockOwnerDisplayName);
query->bindValue(8, lockInfo._lockOwnerId);
query->bindValue(9, lockInfo._lockEditorApp);
query->bindValue(10, lockInfo._lockTime);
query->bindValue(11, lockInfo._lockTimeout);

View File

@ -398,11 +398,15 @@ void ProcessDirectoryJob::processFile(PathTuple path,
if(serverEntry.locked == SyncFileItem::LockStatus::LockedItem) {
const auto lockExpirationTime = serverEntry.lockTime + serverEntry.lockTimeout;
const auto timeRemaining = QDateTime::currentDateTime().secsTo(QDateTime::fromSecsSinceEpoch(lockExpirationTime));
const auto timerInterval = qMax(5LL, timeRemaining);
// Add on a second as a precaution, sometimes we catch the server before it has had a chance to update
const auto lockExpirationTimeout = qMax(5LL, timeRemaining + 1);
qCInfo(lcDisco) << "File:" << path._original << "is locked."
<< "Lock expires in:" << lockExpirationTimeout << "seconds."
<< "A sync run will be scheduled for around that time.";
qCInfo(lcDisco) << "Will re-check lock status for:" << path._original << "in:" << timerInterval << "seconds.";
_discoveryData->_anotherSyncNeeded = true;
_discoveryData->_scheduleSyncInSecs = timerInterval;
_discoveryData->_filesNeedingScheduledSync.insert(path._original, lockExpirationTimeout);
}
// VFS suffixed files on the server are ignored

View File

@ -283,7 +283,7 @@ public:
// output
QByteArray _dataFingerprint;
bool _anotherSyncNeeded = false;
int _scheduleSyncInSecs = -1;
QHash<QString, long long> _filesNeedingScheduledSync;
signals:
void fatalError(const QString &errorString);

View File

@ -463,7 +463,6 @@ void SyncEngine::startSync()
}
if (s_anySyncRunning || _syncRunning) {
ASSERT(false)
return;
}
@ -699,10 +698,8 @@ void SyncEngine::slotDiscoveryFinished()
restoreOldFiles(_syncItems);
}
if (_discoveryPhase->_anotherSyncNeeded && _discoveryPhase->_scheduleSyncInSecs > 0) {
QTimer::singleShot(_discoveryPhase->_scheduleSyncInSecs * 1000, this, [this]{
this->startSync();
});
if (_discoveryPhase->_anotherSyncNeeded && !_discoveryPhase->_filesNeedingScheduledSync.empty()) {
slotScheduleFilesDelayedSync();
} else if (_discoveryPhase->_anotherSyncNeeded && _anotherSyncNeeded == NoFollowUpSync) {
_anotherSyncNeeded = ImmediateFollowUp;
}
@ -1130,4 +1127,169 @@ void SyncEngine::slotInsufficientRemoteStorage()
emit syncError(msg, ErrorCategory::InsufficientRemoteStorage);
}
void SyncEngine::slotScheduleFilesDelayedSync()
{
if (!_discoveryPhase || _discoveryPhase->_filesNeedingScheduledSync.empty()) {
return;
}
// The latest sync of the interval bucket is the one that goes through and is used in the timer.
// By running the sync run as late as possible in the selected interval, we try to strike a
// balance between updating the needed file in a timely manner while also syncing late enough
// to cover all the files in the interval bucket.
static constexpr long long intervalSecs = 60;
const auto scheduledSyncBuckets = groupNeededScheduledSyncRuns(intervalSecs);
qCDebug(lcEngine) << "Active scheduled sync run timers:" << _scheduledSyncTimers.count();
for (const auto &[scheduledSyncTimerSecs, filesAffected] : scheduledSyncBuckets) {
const auto currentSecsSinceEpoch = QDateTime::currentSecsSinceEpoch();
const auto scheduledSyncTimerTime = QDateTime::fromSecsSinceEpoch(currentSecsSinceEpoch + scheduledSyncTimerSecs);
const auto scheduledSyncTimerMsecs = std::chrono::milliseconds(scheduledSyncTimerSecs * 1000);
// We want to make sure that this bucket won't schedule a sync near a pre-existing sync run,
// as we often get, for example, locked file notifications one by one as the user interacts
// through the web.
if (nearbyScheduledSyncTimerUsable(scheduledSyncTimerSecs, intervalSecs)) {
qCInfo(lcEngine) << "Already have a nearby scheduled sync run at:" << scheduledSyncTimerTime
<< "which will be used for files:" << filesAffected;
// We still want to remove these files as files scheduled for sync. We schedule another timer
// for the same time the nearby scheduled sync run completes, remocing these files from our
// hash of files already scheduled for later sync.
QTimer::singleShot(scheduledSyncTimerMsecs, this, [this, filesAffected = filesAffected] {
qCInfo(lcEngine) << "Following files:" << filesAffected
<< "were scheduled for a sync run that already had a nearby run scheduled.";
for (const auto &file : filesAffected) {
_filesScheduledForLaterSync.remove(file);
}
});
continue;
}
qCInfo(lcEngine) << "Will have a new sync run in" << scheduledSyncTimerSecs
<< "seconds, at" << scheduledSyncTimerTime
<< "for files:" << filesAffected;
QSharedPointer<QTimer> newTimer(new QTimer);
newTimer->setSingleShot(true);
// In C++17 structured bindings (used above) cannot be captured, so we need an init capture.
// This is because structured bindings are never names of variables, thus uncapturable.
// This has been changed in C++20 -- feel free to change once we use it
newTimer->callOnTimeout(this, [this, filesAffected = filesAffected] {
qCInfo(lcEngine) << "Rescanning now that delayed sync run is scheduled for:" << filesAffected;
for (const auto &file : filesAffected) {
this->_filesScheduledForLaterSync.remove(file);
}
this->startSync();
this->slotCleanupScheduledSyncTimers();
});
newTimer->start(scheduledSyncTimerMsecs);
_scheduledSyncTimers.append(newTimer);
}
}
QHash<long long, SyncEngine::ScheduledSyncBucket> SyncEngine::groupNeededScheduledSyncRuns(const int interval)
{
QHash<long long, ScheduledSyncBucket> intervalSyncBuckets;
for (auto it = _discoveryPhase->_filesNeedingScheduledSync.cbegin();
it != _discoveryPhase->_filesNeedingScheduledSync.cend();
++it) {
const auto file = it.key();
const auto syncScheduledSecs = it.value();
// We don't want to schedule syncs again for files we have already discovered needing a
// scheduled sync, unless the files have been re-locked or had their lock expire time
// extended
if (_filesScheduledForLaterSync.contains(file) &&
_filesScheduledForLaterSync.value(file) > syncScheduledSecs) {
continue;
}
_filesScheduledForLaterSync.insert(file, syncScheduledSecs);
// Both long long so division results in floor-ed result
const auto intervalBucketKey = syncScheduledSecs / interval;
if (!intervalSyncBuckets.contains(intervalBucketKey)) {
intervalSyncBuckets.insert(intervalBucketKey, {syncScheduledSecs, {file}});
continue;
}
auto bucketValue = intervalSyncBuckets.value(intervalBucketKey);
bucketValue.scheduledSyncTimerSecs = qMax(bucketValue.scheduledSyncTimerSecs, syncScheduledSecs);
bucketValue.files.append(file);
intervalSyncBuckets.insert(intervalBucketKey, bucketValue);
}
return intervalSyncBuckets;
}
bool SyncEngine::nearbyScheduledSyncTimerUsable(const long long scheduledSyncTimerSecs,
const long long intervalSecs) const
{
const auto scheduledSyncTimerMsecs = scheduledSyncTimerSecs * 1000;
const auto halfIntervalMsecs = (intervalSecs * 1000) / 2;
auto nearbyScheduledSync = false;
for (const auto &scheduledTimer : _scheduledSyncTimers) {
const auto timerRemainingMsecs = scheduledTimer->remainingTime();
const auto differenceMsecs = timerRemainingMsecs - scheduledSyncTimerMsecs;
nearbyScheduledSync = differenceMsecs > -halfIntervalMsecs && differenceMsecs < halfIntervalMsecs;
// Iterated timer is going to fire slightly before we need it to for the parameter timer,
// delay it.
if (differenceMsecs > -halfIntervalMsecs && differenceMsecs < 0) {
const auto scheduledSyncTimerTimeoutMsecs = std::chrono::milliseconds(scheduledSyncTimerMsecs);
nearbyScheduledSync = true;
scheduledTimer->start(scheduledSyncTimerTimeoutMsecs);
qCInfo(lcEngine) << "Delayed sync timer with remaining time" << timerRemainingMsecs / 1000
<< "by" << (differenceMsecs * -1) / 1000
<< "seconds due to nearby new sync run needed.";
}
if (nearbyScheduledSync) {
break;
}
}
return nearbyScheduledSync;
}
void SyncEngine::slotCleanupScheduledSyncTimers()
{
qCDebug(lcEngine) << "Beginning scheduled sync timer cleanup.";
auto it = _scheduledSyncTimers.begin();
while(it != _scheduledSyncTimers.end()) {
const auto &timerPtr = *it;
if(timerPtr.isNull() || !timerPtr->isActive()) {
qCDebug(lcEngine) << "Erasing an expired scheduled sync timer.";
it = _scheduledSyncTimers.erase(it);
} else {
++it;
}
}
}
} // namespace OCC

View File

@ -213,7 +213,35 @@ private slots:
void slotInsufficientLocalStorage();
void slotInsufficientRemoteStorage();
void slotScheduleFilesDelayedSync();
void slotCleanupScheduledSyncTimers();
private:
// Some files need a sync run to be executed at a specified time after
// their status is scheduled to change (e.g. lock status will expire in
// 20 minutes.)
//
// Rather than execute a sync run for each file that needs one, we want
// to schedule as few sync runs as possible, trying to have the state of
// these files updated in a timely manner without scheduling runs too
// frequently. We can therefore group files into a bucket.
//
// A bucket contains a group of files requiring a sync run in close
// proximity to each other, with an assigned sync timer interval that can
// be used to schedule a sync run which will update all the files in the
// bucket at the time their state is scheduled to change.
//
// In the pair, first is the actual time at which the bucket is going to
// have its sync scheduled. Second is the vector of all the (paths of)
// files that fall into this bucket.
//
// See SyncEngine::groupNeededScheduledSyncRuns and
// SyncEngine::slotScheduleFilesDelayedSync for usage.
struct ScheduledSyncBucket {
long long scheduledSyncTimerSecs;
QVector<QString> files;
};
bool checkErrorBlacklisting(SyncFileItem &item);
// Cleans up unnecessary downloadinfo entries in the journal as well
@ -232,6 +260,24 @@ private:
// cleanup and emit the finished signal
void finalize(bool success);
// Aggregate scheduled sync runs into interval buckets. Can be used to
// schedule a sync run per bucket instead of per file, reducing load.
//
// Bucket classification is done by simply dividing the seconds until
// scheduled sync time by the interval (note -- integer division!)
QHash<long long, ScheduledSyncBucket> groupNeededScheduledSyncRuns(const int interval);
// Checks if there is already a scheduled sync run timer active near the
// time provided as the parameter.
//
// If this timer will expire within the interval provided, the return is
// true.
//
// If this expiration occurs before the scheduled sync run provided as the
// parameter, it is rescheduled to expire at the time of the parameter.
bool nearbyScheduledSyncTimerUsable(const long long scheduledSyncTimerSecs,
const long long intervalSecs) const;
static bool s_anySyncRunning; //true when one sync is running somewhere (for debugging)
// Must only be acessed during update and reconcile
@ -303,6 +349,11 @@ private:
std::set<QString> _localDiscoveryPaths;
QStringList _leadingAndTrailingSpacesFilesAllowed;
// Hash of files we have scheduled for later sync runs, along with their lock expire times
// NOTE: not necessarily the time at which their sync run will take place
QHash<QString, long long> _filesScheduledForLaterSync;
QVector<QSharedPointer<QTimer>> _scheduledSyncTimers;
};
}