From 5544436fd6654fcc6cd6b3a9c3f3b9b7e0b21865 Mon Sep 17 00:00:00 2001
From: "Niall Douglas (s [underscore] sourceforge {at} nedprod [dot] com)"
 <spamtrap@nedprod.com>
Date: Tue, 3 Aug 2021 18:23:27 +0100
Subject: Add utils::current_process_cpu_usage().

---
 include/llfio/revision.hpp                       |   6 +-
 include/llfio/v2.0/detail/impl/posix/utils.ipp   | 197 +++++++++++++++++++----
 include/llfio/v2.0/detail/impl/windows/utils.ipp |  36 ++++-
 include/llfio/v2.0/utils.hpp                     |  51 ++++++
 4 files changed, 255 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/llfio/revision.hpp b/include/llfio/revision.hpp
index 424376d1..905c3a59 100644
--- a/include/llfio/revision.hpp
+++ b/include/llfio/revision.hpp
@@ -1,4 +1,4 @@
 // Note the second line of this file must ALWAYS be the git SHA, third line ALWAYS the git SHA update time
-#define LLFIO_PREVIOUS_COMMIT_REF    a93b8521afb5d858f423de7accdc6d9d4504e7cb
-#define LLFIO_PREVIOUS_COMMIT_DATE   "2021-05-11 18:16:38 +00:00"
-#define LLFIO_PREVIOUS_COMMIT_UNIQUE a93b8521
+#define LLFIO_PREVIOUS_COMMIT_REF    d867bd612097979ca40b07852abbf7332538ab98
+#define LLFIO_PREVIOUS_COMMIT_DATE   "2021-08-03 17:03:03 +00:00"
+#define LLFIO_PREVIOUS_COMMIT_UNIQUE d867bd61
diff --git a/include/llfio/v2.0/detail/impl/posix/utils.ipp b/include/llfio/v2.0/detail/impl/posix/utils.ipp
index e3b47484..1461ae8f 100644
--- a/include/llfio/v2.0/detail/impl/posix/utils.ipp
+++ b/include/llfio/v2.0/detail/impl/posix/utils.ipp
@@ -32,6 +32,9 @@ Distributed under the Boost Software License, Version 1.0.
 #include <unistd.h>  // for preadv
 #endif
 #ifdef __APPLE__
+#include <mach/mach_host.h>
+#include <mach/mach_time.h>
+#include <mach/processor_info.h>
 #include <mach/task.h>
 #include <mach/task_info.h>
 #endif
@@ -259,7 +262,7 @@ namespace utils
         buffer.resize(buffer.size() * 2);
       }
       const string_view totalview(buffer.data(), buffer.size());
-      //std::cerr << totalview << std::endl;
+      // std::cerr << totalview << std::endl;
       std::vector<string_view> anon_entries, non_anon_entries;
       anon_entries.reserve(32);
       non_anon_entries.reserve(32);
@@ -298,16 +301,17 @@ namespace utils
         // Is there " ac" after vmflagsidx?
         if(string_view::npos != item.find(" ac", vmflagsidx) && inode == 0)
         {
-          //std::cerr << "Adding anon entry at offset " << itemtopidx << std::endl;
+          // std::cerr << "Adding anon entry at offset " << itemtopidx << std::endl;
           anon_entries.push_back(item);
         }
         else
         {
-          //std::cerr << "Adding non-anon entry at offset " << itemtopidx << std::endl;
+          // std::cerr << "Adding non-anon entry at offset " << itemtopidx << std::endl;
           non_anon_entries.push_back(item);
         }
       }
-      auto parse = [](string_view item, string_view what) ->result<uint64_t> { auto idx = item.find(what);
+      auto parse = [](string_view item, string_view what) -> result<uint64_t> {
+        auto idx = item.find(what);
         if(string_view::npos == idx)
         {
           return (uint64_t) -1;
@@ -359,7 +363,7 @@ namespace utils
         return value;
       };
       process_memory_usage ret;
-      //std::cerr << "Anon entries:";
+      // std::cerr << "Anon entries:";
       for(auto &i : anon_entries)
       {
         OUTCOME_TRY(auto &&size, parse(i, "\nSize:"));
@@ -378,9 +382,9 @@ namespace utils
             ret.private_paged_in -= lazyfree;
           }
         }
-        //std::cerr << i << "\nSize = " << size << " Rss = " << rss << std::endl;
+        // std::cerr << i << "\nSize = " << size << " Rss = " << rss << std::endl;
       }
-      //std::cerr << "\n\nNon-anon entries:";
+      // std::cerr << "\n\nNon-anon entries:";
       for(auto &i : non_anon_entries)
       {
         OUTCOME_TRY(auto &&size, parse(i, "\nSize:"));
@@ -395,7 +399,7 @@ namespace utils
             ret.total_address_space_paged_in -= lazyfree;
           }
         }
-        //std::cerr << i << "\nSize = " << size << " Rss = " << rss << std::endl;
+        // std::cerr << i << "\nSize = " << size << " Rss = " << rss << std::endl;
       }
       return ret;
     }
@@ -404,32 +408,167 @@ namespace utils
       return error_from_exception();
     }
 #elif defined(__APPLE__)
-  kern_return_t error;
-  mach_msg_type_number_t outCount;
-  task_vm_info_data_t vmInfo;
-  //task_kernelmemory_info_data_t kmInfo;
+    kern_return_t error;
+    mach_msg_type_number_t outCount;
+    task_vm_info_data_t vmInfo;
+    // task_kernelmemory_info_data_t kmInfo;
 
-  outCount = TASK_VM_INFO_COUNT;
-  error = task_info(mach_task_self(), TASK_VM_INFO, (task_info_t)&vmInfo, &outCount);
-  if (error != KERN_SUCCESS) {
-    return errc::invalid_argument;
+    outCount = TASK_VM_INFO_COUNT;
+    error = task_info(mach_task_self(), TASK_VM_INFO, (task_info_t) &vmInfo, &outCount);
+    if(error != KERN_SUCCESS)
+    {
+      return errc::invalid_argument;
+    }
+    // outCount = TASK_KERNELMEMORY_INFO_COUNT;
+    // error = task_info(mach_task_self(), TASK_KERNELMEMORY_INFO, (task_info_t)&kmInfo, &outCount);
+    // if (error != KERN_SUCCESS) {
+    //  return errc::invalid_argument;
+    //}
+    // std::cout << vmInfo.virtual_size << "\n" << vmInfo.region_count << "\n" << vmInfo.resident_size << "\n" << vmInfo.device << "\n" << vmInfo.internal <<
+    // "\n" << vmInfo.external << "\n" << vmInfo.reusable << "\n" << vmInfo.purgeable_volatile_pmap<< "\n" << vmInfo.purgeable_volatile_resident << "\n" <<
+    // vmInfo.purgeable_volatile_virtual << "\n" << vmInfo.compressed << "\n" << vmInfo.phys_footprint << std::endl; std::cout << "\n" << kmInfo.total_palloc <<
+    // "\n" << kmInfo.total_pfree << "\n" << kmInfo.total_salloc << "\n" << kmInfo.total_sfree << std::endl;
+    process_memory_usage ret;
+    ret.total_address_space_in_use = vmInfo.virtual_size;
+    ret.total_address_space_paged_in = vmInfo.resident_size;
+    ret.private_committed = vmInfo.internal + vmInfo.compressed;
+    ret.private_paged_in = vmInfo.phys_footprint;
+    return ret;
+#else
+#error Unknown platform
+#endif
   }
-  //outCount = TASK_KERNELMEMORY_INFO_COUNT;
-  //error = task_info(mach_task_self(), TASK_KERNELMEMORY_INFO, (task_info_t)&kmInfo, &outCount);
-  //if (error != KERN_SUCCESS) {
-  //  return errc::invalid_argument;
-  //}
-  //std::cout << vmInfo.virtual_size << "\n" << vmInfo.region_count << "\n" << vmInfo.resident_size << "\n" << vmInfo.device << "\n" << vmInfo.internal << "\n" << vmInfo.external << "\n" << vmInfo.reusable << "\n" << vmInfo.purgeable_volatile_pmap<< "\n" << vmInfo.purgeable_volatile_resident << "\n" << vmInfo.purgeable_volatile_virtual << "\n" << vmInfo.compressed << "\n" << vmInfo.phys_footprint << std::endl;
-  //std::cout << "\n" << kmInfo.total_palloc << "\n" << kmInfo.total_pfree << "\n" << kmInfo.total_salloc << "\n" << kmInfo.total_sfree << std::endl;
-  process_memory_usage ret;
-  ret.total_address_space_in_use = vmInfo.virtual_size;
-  ret.total_address_space_paged_in = vmInfo.resident_size;
-  ret.private_committed = vmInfo.internal + vmInfo.compressed;
-  ret.private_paged_in = vmInfo.phys_footprint;
-  return ret;
+
+  result<process_cpu_usage> current_process_cpu_usage() noexcept
+  {
+    process_cpu_usage ret;
+    memset(&ret, 0, sizeof(ret));
+#ifdef __linux__
+    try
+    {
+      /* Need to multiply all below by 1000000000ULL / sysconf(_SC_CLK_TCK)
+
+      /proc/[pid]/stat:
+
+      %*d %*s %*c %*d %*d %*d %*d %*d %*u %*lu %*lu %*lu %*lu %lu %lu
+
+      The last two are process user time and process kernel time.
+
+      /proc/stat:
+
+      cpu <user> <user-nice> <kernel> <idle>
+      */
+      std::vector<char> buffer1(65536), buffer2(65536);
+      auto fill_buffer = [](std::vector<char> &buffer, const char *path) -> result<void> {
+        for(;;)
+        {
+          int ih = ::open(path, O_RDONLY);
+          if(ih == -1)
+          {
+            return posix_error();
+          }
+          size_t totalbytesread = 0;
+          for(;;)
+          {
+            auto bytesread = ::read(ih, buffer.data() + totalbytesread, buffer.size() - totalbytesread);
+            if(bytesread < 0)
+            {
+              ::close(ih);
+              return posix_error();
+            }
+            if(bytesread == 0)
+            {
+              break;
+            }
+            totalbytesread += bytesread;
+          }
+          ::close(ih);
+          if(totalbytesread < buffer.size())
+          {
+            buffer.resize(totalbytesread);
+            break;
+          }
+          buffer.resize(buffer.size() * 2);
+        }
+        return success();
+      };
+      static const uint64_t ts_multiplier = 1000000000ULL / sysconf(_SC_CLK_TCK);
+      OUTCOME_TRY(fill_buffer(buffer1, "/proc/self/stat"));
+      OUTCOME_TRY(fill_buffer(buffer2, "/proc/stat"));
+      if(sscanf(buffer1.data(), "%*d %*s %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %lu %lu", &ret.process_ns_in_user_mode,
+                &ret.process_ns_in_kernel_mode) < 2)
+      {
+        return errc::protocol_error;
+      }
+      uint64_t user_nice;
+      if(sscanf(buffer2.data(), "cpu %lu %lu %lu %lu", &ret.system_ns_in_user_mode, &user_nice, &ret.system_ns_in_kernel_mode, &ret.system_ns_in_idle_mode) <
+         4)
+      {
+        return errc::protocol_error;
+      }
+      ret.system_ns_in_user_mode += user_nice;
+      ret.process_ns_in_user_mode *= ts_multiplier;
+      ret.process_ns_in_kernel_mode *= ts_multiplier;
+      ret.system_ns_in_user_mode *= ts_multiplier;
+      ret.system_ns_in_kernel_mode *= ts_multiplier;
+      ret.system_ns_in_idle_mode *= ts_multiplier;
+      return ret;
+    }
+    catch(...)
+    {
+      return error_from_exception();
+    }
+#elif defined(__APPLE__)
+    kern_return_t error;
+    mach_msg_type_number_t outCount;
+    task_basic_info_64 processInfo1;
+    task_thread_times_info processInfo2;
+
+    outCount = TASK_BASIC_INFO_64_COUNT;
+    error = task_info(mach_task_self(), TASK_BASIC_INFO_64, (task_info_t) &processInfo1, &outCount);
+    if(error != KERN_SUCCESS)
+    {
+      return errc::invalid_argument;
+    }
+    outCount = TASK_THREAD_TIMES_INFO_COUNT;
+    error = task_info(mach_task_self(), TASK_THREAD_TIMES_INFO, (task_info_t) &processInfo2, &outCount);
+    if(error != KERN_SUCCESS)
+    {
+      return errc::invalid_argument;
+    }
+    ret.process_ns_in_user_mode = (processInfo1.user_time.seconds + processInfo2.user_time.seconds) * 1000000000ULL +
+                                  (processInfo1.user_time.microseconds + processInfo2.user_time.microseconds) * 1000ULL;
+    ret.process_ns_in_kernel_mode = (processInfo1.system_time.seconds + processInfo2.system_time.seconds) * 1000000000ULL +
+                                    (processInfo1.system_time.microseconds + processInfo2.system_time.microseconds) * 1000ULL;
+
+    natural_t numCPU = 0;
+    processor_info_array_t cpuInfo;
+    mach_msg_type_number_t numCpuInfo;
+    error = host_processor_info(mach_host_self(), PROCESSOR_CPU_LOAD_INFO, &numCPU, &cpuInfo, &numCpuInfo);
+    if(error != KERN_SUCCESS)
+    {
+      return errc::invalid_argument;
+    }
+    for(natural_t n = 0; n < numCPU; n++)
+    {
+      ret.system_ns_in_user_mode += cpuInfo[CPU_STATE_MAX * n + CPU_STATE_USER] + cpuInfo[CPU_STATE_MAX * n + CPU_STATE_NICE];
+      ret.system_ns_in_kernel_mode += cpuInfo[CPU_STATE_MAX * n + CPU_STATE_SYSTEM];
+      ret.system_ns_in_idle_mode += cpuInfo[CPU_STATE_MAX * n + CPU_STATE_IDLE];
+    }
+    vm_deallocate(mach_task_self(), cpuInfo, sizeof(integer_t) * numCPUInfo);
+    static const double ts_multiplier = [] {
+      mach_timebase_info_data_t timebase;
+      mach_timebase_info(&timebase);
+      return (double) timebase.numer / timebase.denom;
+    };
+    ret.system_ns_in_user_mode = (uint64_t)(ts_multiplier * ret.system_ns_in_user_mode);
+    ret.system_ns_in_kernel_mode = (uint64_t)(ts_multiplier * ret.system_ns_in_kernel_mode);
+    ret.system_ns_in_idle_mode = (uint64_t)(ts_multiplier * ret.system_ns_in_idle_mode);
+    return ret;
 #else
 #error Unknown platform
 #endif
+    return ret;
   }
 
   namespace detail
diff --git a/include/llfio/v2.0/detail/impl/windows/utils.ipp b/include/llfio/v2.0/detail/impl/windows/utils.ipp
index 0735b4d2..d4516000 100644
--- a/include/llfio/v2.0/detail/impl/windows/utils.ipp
+++ b/include/llfio/v2.0/detail/impl/windows/utils.ipp
@@ -129,7 +129,8 @@ namespace utils
         tp.PrivilegeCount = 1;
         tp.Privileges[0].Luid = luid;
         tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
-        if(AdjustTokenPrivileges(processToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), static_cast<PTOKEN_PRIVILEGES>(nullptr), static_cast<PDWORD>(nullptr)) == 0)
+        if(AdjustTokenPrivileges(processToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), static_cast<PTOKEN_PRIVILEGES>(nullptr), static_cast<PDWORD>(nullptr)) ==
+           0)
         {
           return win32_error();
         }
@@ -184,7 +185,8 @@ namespace utils
         tp.PrivilegeCount = 1;
         tp.Privileges[0].Luid = luid;
         tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
-        if(AdjustTokenPrivileges(processToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), static_cast<PTOKEN_PRIVILEGES>(nullptr), static_cast<PDWORD>(nullptr)) == 0)
+        if(AdjustTokenPrivileges(processToken, FALSE, &tp, sizeof(TOKEN_PRIVILEGES), static_cast<PTOKEN_PRIVILEGES>(nullptr), static_cast<PDWORD>(nullptr)) ==
+           0)
         {
           return win32_error();
         }
@@ -205,7 +207,8 @@ namespace utils
     return success();
   }
 
-  result<process_memory_usage> current_process_memory_usage() noexcept {
+  result<process_memory_usage> current_process_memory_usage() noexcept
+  {
     // Amazingly Win32 doesn't expose private working set, so to avoid having
     // to iterate all the pages in the process and calculate, use a hidden
     // NT kernel call
@@ -239,6 +242,33 @@ namespace utils
     return ret;
   }
 
+  result<process_cpu_usage> current_process_cpu_usage() noexcept
+  {
+    process_cpu_usage ret;
+    memset(&ret, 0, sizeof(ret));
+    {
+      FILETIME IdleTime, KernelTime, UserTime;
+      if(GetSystemTimes(&IdleTime, &KernelTime, &UserTime) == 0)
+      {
+        return win32_error();
+      }
+      ret.system_ns_in_idle_mode = (((uint64_t) IdleTime.dwHighDateTime << 32U) | IdleTime.dwLowDateTime) * 100;
+      ret.system_ns_in_kernel_mode = (((uint64_t) KernelTime.dwHighDateTime << 32U) | KernelTime.dwLowDateTime) * 100;
+      ret.system_ns_in_user_mode = (((uint64_t) UserTime.dwHighDateTime << 32U) | UserTime.dwLowDateTime) * 100;
+    }
+    {
+      FILETIME CreationTime, ExitTime, KernelTime, UserTime;
+      if(GetProcessTimes(GetCurrentProcess(), &CreationTime, &ExitTime, &KernelTime, &UserTime) == 0)
+      {
+        return win32_error();
+      }
+      // Is it worth adjusting KernelTime and UserTime by QueryProcessCycleTime to make them TSC granularity?
+      ret.process_ns_in_kernel_mode = (((uint64_t) KernelTime.dwHighDateTime << 32U) | KernelTime.dwLowDateTime) * 100;
+      ret.process_ns_in_user_mode = (((uint64_t) UserTime.dwHighDateTime << 32U) | UserTime.dwLowDateTime) * 100;
+    }
+    return ret;
+  }
+
   namespace detail
   {
     large_page_allocation allocate_large_pages(size_t bytes)
diff --git a/include/llfio/v2.0/utils.hpp b/include/llfio/v2.0/utils.hpp
index c50514f7..da524ac7 100644
--- a/include/llfio/v2.0/utils.hpp
+++ b/include/llfio/v2.0/utils.hpp
@@ -208,12 +208,63 @@ namespace utils
     //! The total anonymous memory currently paged into the process. Always `<= private_committed`. Also known as "active anonymous pages".
     size_t private_paged_in{0};
   };
+  static_assert(std::is_trivially_copyable<process_memory_usage>::value, "process_memory_usage is not trivially copyable!");
+
   /*! \brief Retrieve the current memory usage statistics for this process.
 
    \note Mac OS provides no way of reading how much memory a process has committed. We therefore supply as `private_committed` the same value as `private_paged_in`.
   */
   LLFIO_HEADERS_ONLY_FUNC_SPEC result<process_memory_usage> current_process_memory_usage() noexcept;
 
+  /*! \brief CPU usage statistics for a process.
+  */
+  struct process_cpu_usage
+  {
+    //! The amount of nanoseconds all processes ever have spent in user mode.
+    uint64_t system_ns_in_user_mode;
+    //! The amount of nanoseconds all processes ever have spent in kernel mode.
+    uint64_t system_ns_in_kernel_mode;
+    //! The amount of nanoseconds all processes ever have spent in idle mode.
+    uint64_t system_ns_in_idle_mode;
+
+    //! The amount of nanoseconds this process has spent in user mode.
+    uint64_t process_ns_in_user_mode;
+    //! The amount of nanoseconds this process has spent in kernel mode.
+    uint64_t process_ns_in_kernel_mode;
+
+    //! Subtracts an earlier result from a later result.
+    process_cpu_usage operator-(const process_cpu_usage &o) const noexcept
+    {
+      return {system_ns_in_user_mode - o.system_ns_in_user_mode, system_ns_in_kernel_mode - o.system_ns_in_kernel_mode,
+              system_ns_in_idle_mode - o.system_ns_in_idle_mode, process_ns_in_user_mode - o.process_ns_in_user_mode,
+              process_ns_in_kernel_mode - o.process_ns_in_kernel_mode};
+    }
+    //! Subtracts an earlier result from a later result.
+    process_cpu_usage &operator-=(const process_cpu_usage &o) noexcept
+    {
+      system_ns_in_user_mode -= o.system_ns_in_user_mode;
+      system_ns_in_kernel_mode -= o.system_ns_in_kernel_mode;
+      system_ns_in_idle_mode -= o.system_ns_in_idle_mode;
+      process_ns_in_user_mode -= o.process_ns_in_user_mode;
+      process_ns_in_kernel_mode -= o.process_ns_in_kernel_mode;
+      return *this;
+    }
+  };
+  static_assert(std::is_trivially_copyable<process_cpu_usage>::value, "process_cpu_usage is not trivially copyable!");
+
+  /*! \brief Retrieve the current CPU usage statistics for this system and this process. These
+  are unsigned counters which always increment, and so may eventually wrap.
+
+  The simplest way to use this API is to call it whilst also taking the current monotonic
+  clock/CPU TSC and then calculating the delta change over that period of time.
+  
+  \note The returned values may not be a snapshot accurate against one another as they
+  may get derived from multiple sources. Also, granularity is probably either a lot more
+  than one nanosecond on most platforms, but may be CPU TSC based on others (you can test
+  it to be sure).
+  */
+  LLFIO_HEADERS_ONLY_FUNC_SPEC result<process_cpu_usage> current_process_cpu_usage() noexcept;
+
   namespace detail
   {
     struct large_page_allocation
-- 
cgit v1.2.3