Nicole Mazzuca e79f0dc532 [vcpkg] Make Filesystem::remove_all faster #7570
I added benchmarks to measure how fast the parallel remove_all code was
-- it turns out, about 3x slower than stdfs::remove_all. Since this was
the case, I removed all of the parallelism and rewrote it serially, and
ended up about 30% faster than stdfs::remove_all (in addition to
supporting symlinks).

In addition, I did the following three orthogonal changes:
  - simplified the work queue, basing it on Billy O'Neal's idea
  - Fix warnings on older versions of compilers in tests, by splitting
    the pragmas out of pch.h.
  - Ran clang-format on some files

In fixing up remove_all, the following changes were made:
  - On Windows, regular symlinks and directory symlinks are distinct;
    as an example, to remove directory symlinks (and junctions, for that
    matter), one must use RemoveDirectory. Only on Windows, I added new
    `file_type` and `file_status` types, with `file_type` including a new
    `directory_symlink` enumerator, and `file_status` being exactly the
    same as the old one except using the new `file_type`. On Unix, I
    didn't make that change since they don't make a distinction.
  - I added new `symlink_status` and `status` functions which use the
    new `file_status` on Windows.
  - I made `Filesystem::exists` call `fs::exists(status(p))`, as opposed
    to the old version which called `stdfs::exists` directly.
  - Added benchmarks to `vcpkg-test/files.cpp`. They test the
    performance of `remove_all` on small directories (~20 files), with
    symlinks and without, and on large directories (~2000 files), with
    symlinks and without.
2019-08-07 16:51:12 -07:00

141 lines
4.4 KiB
C++

#pragma once
#include <vcpkg/base/checks.h>
#include <condition_variable>
#include <memory>
#include <vector>
namespace vcpkg
{
template<class Action>
struct WorkQueue
{
WorkQueue(LineInfo li) : m_line_info(li) {}
WorkQueue(const WorkQueue&) = delete;
~WorkQueue()
{
auto lck = std::unique_lock<std::mutex>(m_mutex, std::try_to_lock);
/*
if we don't own the lock, there isn't much we can do
it is likely a spurious failure
*/
if (lck && m_running_workers != 0)
{
Checks::exit_with_message(
m_line_info, "Internal error -- outstanding workers (%u) at destruct point", m_running_workers);
}
}
template<class F>
void run_and_join(unsigned num_threads, const F& tld_init) noexcept
{
if (m_actions.empty()) return;
std::vector<std::thread> threads;
threads.reserve(num_threads);
for (unsigned i = 0; i < num_threads; ++i)
{
threads.emplace_back(Worker<decltype(tld_init())>{this, tld_init()});
}
for (auto& thrd : threads)
{
thrd.join();
}
}
// useful in the case of errors
// doesn't stop any existing running tasks
// returns immediately, so that one can call this in a task
void cancel() const
{
{
auto lck = std::lock_guard<std::mutex>(m_mutex);
m_cancelled = true;
m_actions.clear();
}
m_cv.notify_all();
}
void enqueue_action(Action a) const
{
{
auto lck = std::lock_guard<std::mutex>(m_mutex);
if (m_cancelled) return;
m_actions.push_back(std::move(a));
}
m_cv.notify_one();
}
private:
template<class ThreadLocalData>
struct Worker
{
const WorkQueue* work_queue;
ThreadLocalData tld;
void operator()()
{
auto lck = std::unique_lock<std::mutex>(work_queue->m_mutex);
for (;;)
{
const auto& w = *work_queue;
work_queue->m_cv.wait(lck, [&w] {
if (w.m_cancelled)
return true;
else if (!w.m_actions.empty())
return true;
else if (w.m_running_workers == 0)
return true;
else
return false;
});
if (work_queue->m_cancelled || work_queue->m_actions.empty())
{
/*
if we've been cancelled, or if the work queue is empty
and there are no other workers, we want to return
immediately; we don't check for the latter condition
since if we're at this point, then either the queue
is not empty, or there are no other workers, or both.
We can't have an empty queue, and other workers, or
we would still be in the wait.
*/
break;
}
++work_queue->m_running_workers;
auto action = std::move(work_queue->m_actions.back());
work_queue->m_actions.pop_back();
lck.unlock();
work_queue->m_cv.notify_one();
std::move(action)(tld, *work_queue);
lck.lock();
const auto after = --work_queue->m_running_workers;
if (work_queue->m_actions.empty() && after == 0)
{
work_queue->m_cv.notify_all();
return;
}
}
}
};
mutable std::mutex m_mutex{};
// these are all under m_mutex
mutable bool m_cancelled = false;
mutable std::vector<Action> m_actions{};
mutable std::condition_variable m_cv{};
mutable unsigned long m_running_workers = 0;
LineInfo m_line_info;
};
}