feat: use tracing crate to measure cpu time

2021-12-10 00:18:01 +00:00 · 2021-12-10 00:18:01 +00:00 · 0a5e25e4f0
commit 0a5e25e4f0
parent 80fe2c6d83
8 changed files with 289 additions and 104 deletions
--- a/src/timings.rs
+++ b/src/timings.rs
@ -0,0 +1,140 @@
+//! This module provides a crude system for timing how long various steps of the program take to
+//! execute using the tracing framework. I seek absolution for suboptimal code. For example I want
+//! to throw a span around all clones and see exactly how long I spend copying data. If I see that
+//! I only spend a few milliseconds overall then that could be a price worth paying for development
+//! velocity.
+//!
+//! This is quite a different use case then flame graphs.
+use rustc_hash::FxHashMap;
+use std::collections::hash_map::Entry;
+use std::collections::HashMap;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::{Arc, RwLock};
+use std::thread::{self, ThreadId};
+use std::time::{Duration, Instant};
+use tracing::info_span;
+use tracing::span::{Attributes, Record};
+use tracing::{Event, Id, Metadata, Subscriber};
+
+//
+// Each step will be described with a less than 100 char string. Steps will overlap.
+//
+// I am not tracking the relationship between
+// the spans. I assume that in each thread that if enter is called then exit will be called before
+// enter is called again.
+pub(crate) struct Timings {
+    ids: AtomicUsize,
+    started: RwLock<FxHashMap<(ThreadId, u64), Instant>>,
+    // This setup of having separate locks creates a risk of deadlocks when generating a report.
+    // What if the reading thread takes a lock
+    summary: Arc<RwLock<FxHashMap<u64, Duration>>>,
+    span_names: Arc<RwLock<FxHashMap<u64, String>>>,
+}
+
+pub(crate) struct ReportHandle {
+    summary: Arc<RwLock<FxHashMap<u64, Duration>>>,
+    span_names: Arc<RwLock<FxHashMap<u64, String>>>,
+}
+
+impl Timings {
+    pub fn new() -> (Timings, ReportHandle) {
+        let summary = Arc::new(RwLock::new(FxHashMap::default()));
+        let span_names = Arc::new(RwLock::new(FxHashMap::default()));
+
+        let report_handle = ReportHandle {
+            summary: Arc::clone(&summary),
+            span_names: Arc::clone(&span_names),
+        };
+
+        let subscriber = Timings {
+            ids: AtomicUsize::new(1),
+            started: RwLock::new(FxHashMap::default()),
+            summary,
+            span_names,
+        };
+
+        (subscriber, report_handle)
+    }
+}
+
+impl ReportHandle {
+    pub fn dump(&self) -> Option<Vec<(String, Duration)>> {
+        // Now it might take a long time (relatively speaking) to copy many megabytes of report
+        // data. I went with this design because my target use case is for batch programs that
+        // print out a report at the end of processing. Also my target use case won't have
+        // megabytes of timing information. It should only have a few kilobytes.
+        let summary = self.summary.try_read().ok()?.clone();
+        let span_names = self.span_names.try_read().ok()?.clone();
+
+        let mut accumulator: HashMap<String, Duration> = HashMap::new();
+
+        for (id, duration) in summary.into_iter() {
+            if let Some(name) = span_names.get(&id) {
+                let entry = accumulator
+                    .entry(name.clone())
+                    .or_insert_with(|| Default::default());
+                *entry += duration;
+            }
+        }
+
+        Some(accumulator.into_iter().collect())
+    }
+}
+
+impl tracing::Subscriber for Timings {
+    fn enabled(&self, metadata: &Metadata<'_>) -> bool {
+        true
+    }
+
+    fn new_span(&self, span: &Attributes<'_>) -> Id {
+        //println!("got new span {:?}", span);
+        let x = self.ids.fetch_add(1, Ordering::SeqCst);
+        let id = Id::from_u64(x as u64);
+        let name = span.metadata().name().to_owned();
+        if let Ok(mut guard) = self.span_names.write() {
+            guard.insert(x as u64, name);
+        }
+
+        id
+    }
+
+    fn record(&self, _span: &Id, _values: &Record<'_>) {}
+
+    fn record_follows_from(&self, _span: &Id, _follows: &Id) {}
+
+    fn event(&self, _event: &Event<'_>) {}
+
+    fn enter(&self, span: &Id) {
+        let mut lock = self.started.write().unwrap();
+        match lock.entry((thread::current().id(), span.into_u64())) {
+            Entry::Vacant(vacant) => {
+                vacant.insert(Instant::now());
+            }
+            _ => {
+                // We already started this span in the current thread so this is a logic error.
+                // This subscriber is only listening to how long each span is executing.
+                ()
+            }
+        }
+    }
+
+    fn exit(&self, span: &Id) {
+        let stopped = Instant::now();
+        // I unwrap the read lock because the mutex should not be poisoned in the simple single
+        // threaded code of starmelon.
+        let key = (thread::current().id(), span.into_u64());
+        if let Some(started) = self.started.write().unwrap().remove(&key) {
+            let mut lock = self.summary.write().unwrap();
+            match lock.entry(span.into_u64()) {
+                Entry::Vacant(vacant) => {
+                    vacant.insert(stopped - started);
+                }
+                Entry::Occupied(mut occupied) => {
+                    *occupied.get_mut() += stopped - started;
+                }
+            }
+        } else {
+            println!("failed to find span when exiting");
+        }
+    }
+}