From bbbfe3f1c47ef562602dad09df9b3189de2180d3 Mon Sep 17 00:00:00 2001 From: Claudio Cambra Date: Fri, 23 Sep 2022 02:33:15 +0200 Subject: More efficiently schedule sync runs for files that need delayed sync runs Signed-off-by: Claudio Cambra --- src/common/syncjournaldb.cpp | 6 +- src/libsync/discovery.cpp | 10 ++- src/libsync/discoveryphase.h | 2 +- src/libsync/syncengine.cpp | 172 +++++++++++++++++++++++++++++++++++++++++-- src/libsync/syncengine.h | 51 +++++++++++++ 5 files changed, 229 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/common/syncjournaldb.cpp b/src/common/syncjournaldb.cpp index 94ec86899..ba4a84d4e 100644 --- a/src/common/syncjournaldb.cpp +++ b/src/common/syncjournaldb.cpp @@ -1379,9 +1379,9 @@ bool SyncJournalDb::updateLocalMetadata(const QString &filename, query->bindValue(3, modtime); query->bindValue(4, size); query->bindValue(5, lockInfo._locked ? 1 : 0); - query->bindValue(6, lockInfo._lockOwnerDisplayName); - query->bindValue(7, lockInfo._lockOwnerId); - query->bindValue(8, lockInfo._lockOwnerType); + query->bindValue(6, lockInfo._lockOwnerType); + query->bindValue(7, lockInfo._lockOwnerDisplayName); + query->bindValue(8, lockInfo._lockOwnerId); query->bindValue(9, lockInfo._lockEditorApp); query->bindValue(10, lockInfo._lockTime); query->bindValue(11, lockInfo._lockTimeout); diff --git a/src/libsync/discovery.cpp b/src/libsync/discovery.cpp index 5faaa11ad..00494ee52 100644 --- a/src/libsync/discovery.cpp +++ b/src/libsync/discovery.cpp @@ -398,11 +398,15 @@ void ProcessDirectoryJob::processFile(PathTuple path, if(serverEntry.locked == SyncFileItem::LockStatus::LockedItem) { const auto lockExpirationTime = serverEntry.lockTime + serverEntry.lockTimeout; const auto timeRemaining = QDateTime::currentDateTime().secsTo(QDateTime::fromSecsSinceEpoch(lockExpirationTime)); - const auto timerInterval = qMax(5LL, timeRemaining); + // Add on a second as a precaution, sometimes we catch the server before it has had a chance to update + const auto lockExpirationTimeout = qMax(5LL, timeRemaining + 1); + + qCInfo(lcDisco) << "File:" << path._original << "is locked." + << "Lock expires in:" << lockExpirationTimeout << "seconds." + << "A sync run will be scheduled for around that time."; - qCInfo(lcDisco) << "Will re-check lock status for:" << path._original << "in:" << timerInterval << "seconds."; _discoveryData->_anotherSyncNeeded = true; - _discoveryData->_scheduleSyncInSecs = timerInterval; + _discoveryData->_filesNeedingScheduledSync.insert(path._original, lockExpirationTimeout); } // VFS suffixed files on the server are ignored diff --git a/src/libsync/discoveryphase.h b/src/libsync/discoveryphase.h index f0597170e..21226b444 100644 --- a/src/libsync/discoveryphase.h +++ b/src/libsync/discoveryphase.h @@ -283,7 +283,7 @@ public: // output QByteArray _dataFingerprint; bool _anotherSyncNeeded = false; - int _scheduleSyncInSecs = -1; + QHash _filesNeedingScheduledSync; signals: void fatalError(const QString &errorString); diff --git a/src/libsync/syncengine.cpp b/src/libsync/syncengine.cpp index a36e7e33d..0b885e86f 100644 --- a/src/libsync/syncengine.cpp +++ b/src/libsync/syncengine.cpp @@ -463,7 +463,6 @@ void SyncEngine::startSync() } if (s_anySyncRunning || _syncRunning) { - ASSERT(false) return; } @@ -699,10 +698,8 @@ void SyncEngine::slotDiscoveryFinished() restoreOldFiles(_syncItems); } - if (_discoveryPhase->_anotherSyncNeeded && _discoveryPhase->_scheduleSyncInSecs > 0) { - QTimer::singleShot(_discoveryPhase->_scheduleSyncInSecs * 1000, this, [this]{ - this->startSync(); - }); + if (_discoveryPhase->_anotherSyncNeeded && !_discoveryPhase->_filesNeedingScheduledSync.empty()) { + slotScheduleFilesDelayedSync(); } else if (_discoveryPhase->_anotherSyncNeeded && _anotherSyncNeeded == NoFollowUpSync) { _anotherSyncNeeded = ImmediateFollowUp; } @@ -1130,4 +1127,169 @@ void SyncEngine::slotInsufficientRemoteStorage() emit syncError(msg, ErrorCategory::InsufficientRemoteStorage); } +void SyncEngine::slotScheduleFilesDelayedSync() +{ + if (!_discoveryPhase || _discoveryPhase->_filesNeedingScheduledSync.empty()) { + return; + } + + // The latest sync of the interval bucket is the one that goes through and is used in the timer. + // By running the sync run as late as possible in the selected interval, we try to strike a + // balance between updating the needed file in a timely manner while also syncing late enough + // to cover all the files in the interval bucket. + + static constexpr long long intervalSecs = 60; + const auto scheduledSyncBuckets = groupNeededScheduledSyncRuns(intervalSecs); + + qCDebug(lcEngine) << "Active scheduled sync run timers:" << _scheduledSyncTimers.count(); + + for (const auto &[scheduledSyncTimerSecs, filesAffected] : scheduledSyncBuckets) { + + const auto currentSecsSinceEpoch = QDateTime::currentSecsSinceEpoch(); + const auto scheduledSyncTimerTime = QDateTime::fromSecsSinceEpoch(currentSecsSinceEpoch + scheduledSyncTimerSecs); + const auto scheduledSyncTimerMsecs = std::chrono::milliseconds(scheduledSyncTimerSecs * 1000); + + // We want to make sure that this bucket won't schedule a sync near a pre-existing sync run, + // as we often get, for example, locked file notifications one by one as the user interacts + // through the web. + + if (nearbyScheduledSyncTimerUsable(scheduledSyncTimerSecs, intervalSecs)) { + qCInfo(lcEngine) << "Already have a nearby scheduled sync run at:" << scheduledSyncTimerTime + << "which will be used for files:" << filesAffected; + + // We still want to remove these files as files scheduled for sync. We schedule another timer + // for the same time the nearby scheduled sync run completes, remocing these files from our + // hash of files already scheduled for later sync. + + QTimer::singleShot(scheduledSyncTimerMsecs, this, [this, filesAffected = filesAffected] { + qCInfo(lcEngine) << "Following files:" << filesAffected + << "were scheduled for a sync run that already had a nearby run scheduled."; + + for (const auto &file : filesAffected) { + _filesScheduledForLaterSync.remove(file); + } + }); + + continue; + } + + qCInfo(lcEngine) << "Will have a new sync run in" << scheduledSyncTimerSecs + << "seconds, at" << scheduledSyncTimerTime + << "for files:" << filesAffected; + + QSharedPointer newTimer(new QTimer); + newTimer->setSingleShot(true); + + // In C++17 structured bindings (used above) cannot be captured, so we need an init capture. + // This is because structured bindings are never names of variables, thus uncapturable. + // This has been changed in C++20 -- feel free to change once we use it + + newTimer->callOnTimeout(this, [this, filesAffected = filesAffected] { + qCInfo(lcEngine) << "Rescanning now that delayed sync run is scheduled for:" << filesAffected; + + for (const auto &file : filesAffected) { + this->_filesScheduledForLaterSync.remove(file); + } + + this->startSync(); + this->slotCleanupScheduledSyncTimers(); + }); + + newTimer->start(scheduledSyncTimerMsecs); + _scheduledSyncTimers.append(newTimer); + } +} + +QHash SyncEngine::groupNeededScheduledSyncRuns(const int interval) +{ + QHash intervalSyncBuckets; + + for (auto it = _discoveryPhase->_filesNeedingScheduledSync.cbegin(); + it != _discoveryPhase->_filesNeedingScheduledSync.cend(); + ++it) { + + const auto file = it.key(); + const auto syncScheduledSecs = it.value(); + + // We don't want to schedule syncs again for files we have already discovered needing a + // scheduled sync, unless the files have been re-locked or had their lock expire time + // extended + + if (_filesScheduledForLaterSync.contains(file) && + _filesScheduledForLaterSync.value(file) > syncScheduledSecs) { + + continue; + } + + _filesScheduledForLaterSync.insert(file, syncScheduledSecs); + // Both long long so division results in floor-ed result + const auto intervalBucketKey = syncScheduledSecs / interval; + + if (!intervalSyncBuckets.contains(intervalBucketKey)) { + intervalSyncBuckets.insert(intervalBucketKey, {syncScheduledSecs, {file}}); + continue; + } + + auto bucketValue = intervalSyncBuckets.value(intervalBucketKey); + bucketValue.scheduledSyncTimerSecs = qMax(bucketValue.scheduledSyncTimerSecs, syncScheduledSecs); + bucketValue.files.append(file); + intervalSyncBuckets.insert(intervalBucketKey, bucketValue); + } + + return intervalSyncBuckets; +} + +bool SyncEngine::nearbyScheduledSyncTimerUsable(const long long scheduledSyncTimerSecs, + const long long intervalSecs) const +{ + const auto scheduledSyncTimerMsecs = scheduledSyncTimerSecs * 1000; + const auto halfIntervalMsecs = (intervalSecs * 1000) / 2; + auto nearbyScheduledSync = false; + + for (const auto &scheduledTimer : _scheduledSyncTimers) { + + const auto timerRemainingMsecs = scheduledTimer->remainingTime(); + const auto differenceMsecs = timerRemainingMsecs - scheduledSyncTimerMsecs; + + nearbyScheduledSync = differenceMsecs > -halfIntervalMsecs && differenceMsecs < halfIntervalMsecs; + + // Iterated timer is going to fire slightly before we need it to for the parameter timer, + // delay it. + if (differenceMsecs > -halfIntervalMsecs && differenceMsecs < 0) { + const auto scheduledSyncTimerTimeoutMsecs = std::chrono::milliseconds(scheduledSyncTimerMsecs); + + nearbyScheduledSync = true; + scheduledTimer->start(scheduledSyncTimerTimeoutMsecs); + + qCInfo(lcEngine) << "Delayed sync timer with remaining time" << timerRemainingMsecs / 1000 + << "by" << (differenceMsecs * -1) / 1000 + << "seconds due to nearby new sync run needed."; + } + + if (nearbyScheduledSync) { + break; + } + } + + return nearbyScheduledSync; +} + +void SyncEngine::slotCleanupScheduledSyncTimers() +{ + qCDebug(lcEngine) << "Beginning scheduled sync timer cleanup."; + + auto it = _scheduledSyncTimers.begin(); + + while(it != _scheduledSyncTimers.end()) { + const auto &timerPtr = *it; + + if(timerPtr.isNull() || !timerPtr->isActive()) { + qCDebug(lcEngine) << "Erasing an expired scheduled sync timer."; + it = _scheduledSyncTimers.erase(it); + } else { + ++it; + } + } +} + } // namespace OCC diff --git a/src/libsync/syncengine.h b/src/libsync/syncengine.h index 86f0b7731..e35087693 100644 --- a/src/libsync/syncengine.h +++ b/src/libsync/syncengine.h @@ -213,7 +213,35 @@ private slots: void slotInsufficientLocalStorage(); void slotInsufficientRemoteStorage(); + void slotScheduleFilesDelayedSync(); + void slotCleanupScheduledSyncTimers(); + private: + // Some files need a sync run to be executed at a specified time after + // their status is scheduled to change (e.g. lock status will expire in + // 20 minutes.) + // + // Rather than execute a sync run for each file that needs one, we want + // to schedule as few sync runs as possible, trying to have the state of + // these files updated in a timely manner without scheduling runs too + // frequently. We can therefore group files into a bucket. + // + // A bucket contains a group of files requiring a sync run in close + // proximity to each other, with an assigned sync timer interval that can + // be used to schedule a sync run which will update all the files in the + // bucket at the time their state is scheduled to change. + // + // In the pair, first is the actual time at which the bucket is going to + // have its sync scheduled. Second is the vector of all the (paths of) + // files that fall into this bucket. + // + // See SyncEngine::groupNeededScheduledSyncRuns and + // SyncEngine::slotScheduleFilesDelayedSync for usage. + struct ScheduledSyncBucket { + long long scheduledSyncTimerSecs; + QVector files; + }; + bool checkErrorBlacklisting(SyncFileItem &item); // Cleans up unnecessary downloadinfo entries in the journal as well @@ -232,6 +260,24 @@ private: // cleanup and emit the finished signal void finalize(bool success); + // Aggregate scheduled sync runs into interval buckets. Can be used to + // schedule a sync run per bucket instead of per file, reducing load. + // + // Bucket classification is done by simply dividing the seconds until + // scheduled sync time by the interval (note -- integer division!) + QHash groupNeededScheduledSyncRuns(const int interval); + + // Checks if there is already a scheduled sync run timer active near the + // time provided as the parameter. + // + // If this timer will expire within the interval provided, the return is + // true. + // + // If this expiration occurs before the scheduled sync run provided as the + // parameter, it is rescheduled to expire at the time of the parameter. + bool nearbyScheduledSyncTimerUsable(const long long scheduledSyncTimerSecs, + const long long intervalSecs) const; + static bool s_anySyncRunning; //true when one sync is running somewhere (for debugging) // Must only be acessed during update and reconcile @@ -303,6 +349,11 @@ private: std::set _localDiscoveryPaths; QStringList _leadingAndTrailingSpacesFilesAllowed; + + // Hash of files we have scheduled for later sync runs, along with their lock expire times + // NOTE: not necessarily the time at which their sync run will take place + QHash _filesScheduledForLaterSync; + QVector> _scheduledSyncTimers; }; } -- cgit v1.2.3