starmelon/src/timings.rs

//! This module provides a crude system for timing how long various steps of the program take to
//! execute using the tracing framework. I seek absolution for suboptimal code. For example I want
//! to throw a span around all clones and see exactly how long I spend copying data. If I see that
//! I only spend a few milliseconds overall then that could be a price worth paying for development
//! velocity.
//!
//! This is quite a different use case then flame graphs.
use rustc_hash::FxHashMap;
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, RwLock};
use std::thread::{self, ThreadId};
use std::time::{Duration, Instant};
use tracing::info_span;
use tracing::span::{Attributes, Record};
use tracing::{Event, Id, Metadata, Subscriber};

//
// Each step will be described with a less than 100 char string. Steps will overlap.
//
// I am not tracking the relationship between
// the spans. I assume that in each thread that if enter is called then exit will be called before
// enter is called again.
pub(crate) struct Timings {
    ids: AtomicUsize,
    started: RwLock<FxHashMap<(ThreadId, u64), Instant>>,
    // This setup of having separate locks creates a risk of deadlocks when generating a report.
    // What if the reading thread takes a lock
    summary: Arc<RwLock<FxHashMap<u64, Duration>>>,
    span_names: Arc<RwLock<FxHashMap<u64, String>>>,
}

pub(crate) struct ReportHandle {
    summary: Arc<RwLock<FxHashMap<u64, Duration>>>,
    span_names: Arc<RwLock<FxHashMap<u64, String>>>,
}

impl Timings {
    pub fn new() -> (Timings, ReportHandle) {
        let summary = Arc::new(RwLock::new(FxHashMap::default()));
        let span_names = Arc::new(RwLock::new(FxHashMap::default()));

        let report_handle = ReportHandle {
            summary: Arc::clone(&summary),
            span_names: Arc::clone(&span_names),
        };

        let subscriber = Timings {
            ids: AtomicUsize::new(1),
            started: RwLock::new(FxHashMap::default()),
            summary,
            span_names,
        };

        (subscriber, report_handle)
    }
}

impl ReportHandle {
    pub fn dump(&self) -> Option<Vec<(String, Duration)>> {
        // Now it might take a long time (relatively speaking) to copy many megabytes of report
        // data. I went with this design because my target use case is for batch programs that
        // print out a report at the end of processing. Also my target use case won't have
        // megabytes of timing information. It should only have a few kilobytes.
        let summary = self.summary.try_read().ok()?.clone();
        let span_names = self.span_names.try_read().ok()?.clone();

        let mut accumulator: HashMap<String, Duration> = HashMap::new();

        for (id, duration) in summary.into_iter() {
            if let Some(name) = span_names.get(&id) {
                let entry = accumulator
                    .entry(name.clone())
                    .or_insert_with(|| Default::default());
                *entry += duration;
            }
        }

        Some(accumulator.into_iter().collect())
    }
}

impl tracing::Subscriber for Timings {
    fn enabled(&self, metadata: &Metadata<'_>) -> bool {
        true
    }

    fn new_span(&self, span: &Attributes<'_>) -> Id {
        //println!("got new span {:?}", span);
        let x = self.ids.fetch_add(1, Ordering::SeqCst);
        let id = Id::from_u64(x as u64);
        let name = span.metadata().name().to_owned();
        if let Ok(mut guard) = self.span_names.write() {
            guard.insert(x as u64, name);
        }

        id
    }

    fn record(&self, _span: &Id, _values: &Record<'_>) {}

    fn record_follows_from(&self, _span: &Id, _follows: &Id) {}

    fn event(&self, _event: &Event<'_>) {}

    fn enter(&self, span: &Id) {
        let mut lock = self.started.write().unwrap();
        match lock.entry((thread::current().id(), span.into_u64())) {
            Entry::Vacant(vacant) => {
                vacant.insert(Instant::now());
            }
            _ => {
                // We already started this span in the current thread so this is a logic error.
                // This subscriber is only listening to how long each span is executing.
                ()
            }
        }
    }

    fn exit(&self, span: &Id) {
        let stopped = Instant::now();
        // I unwrap the read lock because the mutex should not be poisoned in the simple single
        // threaded code of starmelon.
        let key = (thread::current().id(), span.into_u64());
        if let Some(started) = self.started.write().unwrap().remove(&key) {
            let mut lock = self.summary.write().unwrap();
            match lock.entry(span.into_u64()) {
                Entry::Vacant(vacant) => {
                    vacant.insert(stopped - started);
                }
                Entry::Occupied(mut occupied) => {
                    *occupied.get_mut() += stopped - started;
                }
            }
        } else {
            println!("failed to find span when exiting");
        }
    }
}
feat: use tracing crate to measure cpu time 2021-12-10 00:18:01 +00:00			`//! This module provides a crude system for timing how long various steps of the program take to`
			`//! execute using the tracing framework. I seek absolution for suboptimal code. For example I want`
			`//! to throw a span around all clones and see exactly how long I spend copying data. If I see that`
			`//! I only spend a few milliseconds overall then that could be a price worth paying for development`
			`//! velocity.`
			`//!`
			`//! This is quite a different use case then flame graphs.`
			`use rustc_hash::FxHashMap;`
			`use std::collections::hash_map::Entry;`
			`use std::collections::HashMap;`
			`use std::sync::atomic::{AtomicUsize, Ordering};`
			`use std::sync::{Arc, RwLock};`
			`use std::thread::{self, ThreadId};`
			`use std::time::{Duration, Instant};`
			`use tracing::info_span;`
			`use tracing::span::{Attributes, Record};`
			`use tracing::{Event, Id, Metadata, Subscriber};`

			`//`
			`// Each step will be described with a less than 100 char string. Steps will overlap.`
			`//`
			`// I am not tracking the relationship between`
			`// the spans. I assume that in each thread that if enter is called then exit will be called before`
			`// enter is called again.`
			`pub(crate) struct Timings {`
			`ids: AtomicUsize,`
			`started: RwLock<FxHashMap<(ThreadId, u64), Instant>>,`
			`// This setup of having separate locks creates a risk of deadlocks when generating a report.`
			`// What if the reading thread takes a lock`
			`summary: Arc<RwLock<FxHashMap<u64, Duration>>>,`
			`span_names: Arc<RwLock<FxHashMap<u64, String>>>,`
			`}`

			`pub(crate) struct ReportHandle {`
			`summary: Arc<RwLock<FxHashMap<u64, Duration>>>,`
			`span_names: Arc<RwLock<FxHashMap<u64, String>>>,`
			`}`

			`impl Timings {`
			`pub fn new() -> (Timings, ReportHandle) {`
			`let summary = Arc::new(RwLock::new(FxHashMap::default()));`
			`let span_names = Arc::new(RwLock::new(FxHashMap::default()));`

			`let report_handle = ReportHandle {`
			`summary: Arc::clone(&summary),`
			`span_names: Arc::clone(&span_names),`
			`};`

			`let subscriber = Timings {`
			`ids: AtomicUsize::new(1),`
			`started: RwLock::new(FxHashMap::default()),`
			`summary,`
			`span_names,`
			`};`

			`(subscriber, report_handle)`
			`}`
			`}`

			`impl ReportHandle {`
			`pub fn dump(&self) -> Option<Vec<(String, Duration)>> {`
			`// Now it might take a long time (relatively speaking) to copy many megabytes of report`
			`// data. I went with this design because my target use case is for batch programs that`
			`// print out a report at the end of processing. Also my target use case won't have`
			`// megabytes of timing information. It should only have a few kilobytes.`
			`let summary = self.summary.try_read().ok()?.clone();`
			`let span_names = self.span_names.try_read().ok()?.clone();`

			`let mut accumulator: HashMap<String, Duration> = HashMap::new();`

			`for (id, duration) in summary.into_iter() {`
			`if let Some(name) = span_names.get(&id) {`
			`let entry = accumulator`
			`.entry(name.clone())`
			`.or_insert_with(\|\| Default::default());`
			`*entry += duration;`
			`}`
			`}`

			`Some(accumulator.into_iter().collect())`
			`}`
			`}`

			`impl tracing::Subscriber for Timings {`
			`fn enabled(&self, metadata: &Metadata<'_>) -> bool {`
			`true`
			`}`

			`fn new_span(&self, span: &Attributes<'_>) -> Id {`
			`//println!("got new span {:?}", span);`
			`let x = self.ids.fetch_add(1, Ordering::SeqCst);`
			`let id = Id::from_u64(x as u64);`
			`let name = span.metadata().name().to_owned();`
			`if let Ok(mut guard) = self.span_names.write() {`
			`guard.insert(x as u64, name);`
			`}`

			`id`
			`}`

			`fn record(&self, _span: &Id, _values: &Record<'_>) {}`

			`fn record_follows_from(&self, _span: &Id, _follows: &Id) {}`

			`fn event(&self, _event: &Event<'_>) {}`

			`fn enter(&self, span: &Id) {`
			`let mut lock = self.started.write().unwrap();`
			`match lock.entry((thread::current().id(), span.into_u64())) {`
			`Entry::Vacant(vacant) => {`
			`vacant.insert(Instant::now());`
			`}`
			`_ => {`
			`// We already started this span in the current thread so this is a logic error.`
			`// This subscriber is only listening to how long each span is executing.`
			`()`
			`}`
			`}`
			`}`

			`fn exit(&self, span: &Id) {`
			`let stopped = Instant::now();`
			`// I unwrap the read lock because the mutex should not be poisoned in the simple single`
			`// threaded code of starmelon.`
			`let key = (thread::current().id(), span.into_u64());`
			`if let Some(started) = self.started.write().unwrap().remove(&key) {`
			`let mut lock = self.summary.write().unwrap();`
			`match lock.entry(span.into_u64()) {`
			`Entry::Vacant(vacant) => {`
			`vacant.insert(stopped - started);`
			`}`
			`Entry::Occupied(mut occupied) => {`
			`*occupied.get_mut() += stopped - started;`
			`}`
			`}`
			`} else {`
			`println!("failed to find span when exiting");`
			`}`
			`}`
			`}`