Dendro  5.01
Dendro in Greek language means tree. The Dendro library is a large scale (262K cores on ORNL's Titan) distributed memory adaptive octree framework. The main goal of Dendro is to perform large scale multiphysics simulations efficeiently in mordern supercomputers. Dendro consists of efficient parallel data structures and algorithms to perform variational ( finite element) methods and finite difference mthods on 2:1 balanced arbitary adaptive octrees which enables the users to perform simulations raning from black holes (binary black hole mergers) to blood flow in human body, where applications ranging from relativity, astrophysics to biomedical engineering.
dollar.hpp
1 // Dollar is a generic instrumented CPU profiler (C++11), header-only and zlib/libpng licensed.
2 // Dollar outputs traces for chrome:://tracing and also ASCII, CSV, TSV and Markdown text formats.
3 // - rlyeh ~~ listening to Team Ghost / High hopes.
4 
5 /* usage:
6 #include "dollar.hpp" // dollar is enabled by default. compile with -D$= to disable any profiling
7 int main() { $ // <-- put a dollar after every curly brace to determinate cpu cost of the scope
8  for( int x = 0; x < 10000000; ++x ) { $ // <-- functions or loops will apply too
9  // slow stuff...
10  }
11  std::ofstream file("chrome.json");
12  dollar::chrome(file); // write tracing results to a json file (for chrome://tracing embedded profiler)
13  dollar::text(std::cout); // report stats to std::cout in text format; see also csv(), tsv() and markdown()
14  dollar::clear(); // clear all scopes (like when entering a new frame)
15 }
16 */
17 
18 #pragma once
19 
20 #define DOLLAR_VERSION "1.2.0" /* (2016/10/03) Add chrome://tracing profiler support; Project renamed;
21 #define DOLLAR_VERSION "1.1.0" /* (2016/05/03) New tree view and CPU meters (ofxProfiler style); Smaller implementation;
22 #define DOLLAR_VERSION "1.0.1" // (2015/11/15) Fix win32 `max()` macro conflict
23 #define DOLLAR_VERSION "1.0.0" // (2015/08/02) Macro renamed
24 #define DOLLAR_VERSION "0.0.0" // (2015/03/13) Initial commit */
25 
26 #ifdef $
27 
28 #include <iostream>
29 
30 namespace dollar {
31 
32  inline void csv( std::ostream &cout ) {}
33  inline void tsv( std::ostream &cout ) {}
34  inline void text( std::ostream &cout ) {}
35  inline void chrome( std::ostream &cout ) {}
36  inline void markdown( std::ostream &cout ) {}
37  inline void pause( bool paused ) {}
38  inline bool is_paused() {}
39  inline void clear() {}
40 
41 };
42 
43 #else
44 #include <stdio.h>
45 
46 #include <algorithm>
47 #include <iostream>
48 #include <map>
49 #include <sstream>
50 #include <string>
51 #include <vector>
52 #include <thread>
53 
54 #ifdef DOLLAR_USE_OMP
55 # include <omp.h>
56 #else
57 # include <chrono>
58 #endif
59 
60 #ifdef _MSC_VER
61 #include <process.h>
62 #else
63 #include <unistd.h>
64 #endif
65 
66 #ifndef DOLLAR_MAX_TRACES
67 #define DOLLAR_MAX_TRACES 512
68 #endif
69 
70 #ifndef DOLLAR_CPUMETER_WIDTH
71 #define DOLLAR_CPUMETER_WIDTH 10
72 #endif
73 
74 #define DOLLAR_GLUE(a,b) a##b
75 #define DOLLAR_JOIN(a,b) DOLLAR_GLUE(a,b)
76 #define DOLLAR_UNIQUE(sym) DOLLAR_JOIN(sym, __LINE__)
77 #define DOLLAR_STRINGIFY(x) #x
78 #define DOLLAR_TOSTRING(x) DOLLAR_STRINGIFY(x)
79 
80 #ifdef _MSC_VER
81 #define DOLLAR(name) dollar::sampler DOLLAR_UNIQUE(dollar_sampler_)(name);
82 #define $ dollar::sampler DOLLAR_UNIQUE(dollar_sampler_)(std::string(__FUNCTION__) + " (" __FILE__ ":" DOLLAR_TOSTRING(__LINE__) ")" );
83 #else
84 #define DOLLAR(name) dollar::sampler DOLLAR_UNIQUE(dollar_sampler_)(name);
85 #define $ dollar::sampler DOLLAR_UNIQUE(dollar_sampler_)(std::string(__PRETTY_FUNCTION__) + " (" __FILE__ ":" DOLLAR_TOSTRING(__LINE__) ")" );
86 #endif
87 
88 namespace dollar
89 {
90  template < typename T >
91  inline T* singleton() {
92  static T tVar;
93  return &tVar;
94  }
95  inline double now() {
96 # ifdef DOLLAR_USE_OMP
97  static auto const epoch = omp_get_wtime();
98  return omp_get_wtime() - epoch;
99 # else
100  static auto const epoch = std::chrono::steady_clock::now(); // milli ms > micro us > nano ns
101  return std::chrono::duration_cast< std::chrono::microseconds >( std::chrono::steady_clock::now() - epoch ).count() / 1000000.0;
102 # endif
103  };
104  inline std::vector< std::string > tokenize( const std::string &self, const std::string &delimiters ) {
105  unsigned char map [256] = {};
106  for( const unsigned char &ch : delimiters ) {
107  map[ ch ] = '\1';
108  }
109  std::vector< std::string > tokens(1);
110  for( const unsigned char &ch : self ) {
111  if( !map[ch] ) tokens.back().push_back( char(ch) );
112  else if( tokens.back().size() ) tokens.push_back( std::string() );
113  }
114  while( tokens.size() && !tokens.back().size() ) tokens.pop_back();
115  return tokens;
116  }
117  template<typename info>
118  struct Node {
119  std::string name;
120  info *value;
121  std::vector<Node> children;
122 
123  Node( const std::string &name, info *value = 0 ) : name(name), value(value)
124  {}
125 
126  void tree_printer( std::string indent, bool leaf, std::ostream &out ) const {
127  if( leaf ) {
128  out << indent << "+-" << name << std::endl;
129  indent += " ";
130  } else {
131  out << indent << "|-" << name << std::endl;
132  indent += "| ";
133  }
134  for( auto end = children.size(), it = end - end; it < end; ++it ) {
135  children[it].tree_printer( indent, it == (end - 1), out );
136  }
137  }
138  void tree_printer( std::ostream &out = std::cout ) const {
139  tree_printer( "", true, out );
140  }
141  Node&tree_recreate_branch( const std::vector<std::string> &names ) {
142  auto *where = &(*this);
143  for( auto &name : names ) {
144  bool found = false;
145  for( auto &it : where->children ) {
146  if( it.name == name ) {
147  where = &it;
148  found = true;
149  break;
150  }
151  }
152  if( !found ) {
153  where->children.push_back( Node(name) );
154  where = &where->children.back();
155  }
156  }
157  return *where;
158  }
159  template<typename FN0, typename FN1, typename FN2>
160  void tree_walker( const FN0 &method, const FN1 &pre_children, const FN2 &post_chilren ) const {
161  if( children.empty() ) {
162  method( *this );
163  } else {
164  pre_children( *this );
165  for( auto &child : children ) {
166  child.tree_walker( method, pre_children, post_chilren );
167  }
168  post_chilren( *this );
169  }
170  }
171  };
172  class profiler
173  {
174  std::vector<std::string> stack;
175  bool paused;
176 
177  public:
179  struct info {
180  bool paused = false;
181  unsigned hits = 0;
182  double current = 0, total = 0;
183 #ifdef _MSC_VER
184  int pid = 0;
185 #else
186  pid_t pid = 0;
187 #endif
188  std::thread::id tid;
189  std::string title;
190 
191  info()
192  {}
193 
194  info( const std::string &title ) : title(title)
195  {}
196 
197  inline friend
198  std::ostream &operator<<( std::ostream &os, const info &k ) {
199  os << "title:" << tokenize(k.title, ";").back() << std::endl;
200  os << "paused:" << k.paused << std::endl;
201  os << "hits:" << k.hits << std::endl;
202  os << "current:" << k.current << std::endl;
203  os << "total:" << k.total << std::endl;
204  os << "pid:" << k.pid << std::endl;
205  os << "tid:" << k.tid << std::endl;
206  return os;
207  }
208  };
209 
210  profiler() {
211  stack.reserve( DOLLAR_MAX_TRACES );
212  }
213 
214  info &in( const std::string &title ) {
215 #ifdef _MSC_VER
216  auto pid = _getpid();
217 #else
218  auto pid = getpid();
219 #endif
220  auto tid = std::this_thread::get_id();
221 
222  //std::stringstream header;
223  //header << pid << "/" << tid << "/" << title;
224  //stack.push_back( stack.empty() ? header.str() : stack.back() + ";" + header.str() );
225  stack.push_back( stack.empty() ? title : stack.back() + ";" + title );
226 
227  auto &id = stack.back();
228 
229  if( counters.find( id ) == counters.end() ) {
230  counters[ id ] = info ( stack.back() );
231  }
232 
233  auto &sample = counters[ id ];
234 
235  sample.hits ++;
236  sample.current = -dollar::now();
237 
238  sample.pid = pid;
239  sample.tid = tid;
240 
241  return sample;
242  }
243 
244  void out( info &sample ) {
245  sample.current += dollar::now();
246  sample.total += ( sample.paused ? 0.f : sample.current );
247  stack.pop_back();
248  }
249 
250  template<bool for_chrome>
251  void print( std::ostream &out, const char *tab = ",", const char *feed = "\r\n" ) const {
252  auto inital_matches = []( const std::string &text, const std::string &abc ) -> unsigned {
253  unsigned c = 0;
254  for( auto end = (std::min)(text.size(), abc.size()), it = end - end; it < end; ++it, ++c ) {
255  if( text[it] != abc[it] ) break;
256  }
257  return c;
258  };
259  auto starts_with = [&]( const std::string &text, const std::string &abc ) -> bool {
260  return inital_matches( text, abc ) == abc.size();
261  };
262 
263  // create a copy of the class to modify it, so this method is still const
264  auto copy = *this;
265 
266  // finish any active scope
267  while( !copy.stack.empty() ) {
268  auto &current = copy.counters[ stack.back() ];
269  copy.out( current );
270  }
271 
272  // update time hierarchically
273  {
274  // sorted tree
275  std::vector< std::pair<std::string, info *> > az_tree;
276 
277  for( auto &it : copy.counters ) {
278  auto &info = it.second;
279  az_tree.emplace_back( info.title, &info );
280  }
281 
282  std::sort( az_tree.begin(), az_tree.end() );
283  std::reverse( az_tree.begin(), az_tree.end() );
284 
285  // here's the magic
286  for( size_t i = 0; i < az_tree.size(); ++i ) {
287  for( size_t j = i + 1; j < az_tree.size(); ++j ) {
288  if( starts_with( az_tree[ i ].first, az_tree[ j ].first ) ) {
289  az_tree[ j ].second->total -= az_tree[ i ].second->total;
290  }
291  }
292  }
293  }
294 
295  // calculate total accumulated time
296  double total = 0;
297  for( auto &it : copy.counters ) {
298  total += it.second.total;
299  }
300 
301  std::vector<std::string> list;
302 
303  // string2tree {
304  static unsigned char pos = 0;
305  info dummy;
306  dummy.title = "/";
307 #ifdef _MSC_VER
308  dummy.pid = _getpid();
309 #else
310  dummy.pid = getpid();
311 #endif
312  dummy.tid = std::this_thread::get_id();
313  Node<info> root( std::string() + "\\|/-"[(++pos)%4], &dummy );
314  for( auto it = copy.counters.begin(), end = copy.counters.end(); it != end; ++it ) {
315  auto &info = it->second;
316  list.push_back( info.title );
317 
318  auto split = tokenize( info.title, ";" );
319 
320  auto &node = root.tree_recreate_branch( split );
321  node.value = &info;
322  }
323  std::stringstream ss;
324  root.tree_printer( ss );
325  list = tokenize( ss.str(), "\r\n" );
326  static size_t maxlen = 0;
327  for( auto &it : list ) {
328  maxlen = (std::max)(maxlen, it.size());
329  }
330  for( auto &it : list ) {
331  if( maxlen > it.size() ) it += std::string( maxlen - it.size(), ' ' );
332  else if( maxlen < it.size() ) it.resize( maxlen );
333  }
334  // }
335 
336  // prettify name/titles
337  size_t i = 0;
338  if( for_chrome ) {
339  for( auto &cp : copy.counters ) {
340  cp.second.title = tokenize( cp.second.title, ";" ).back();
341  for( auto &ch : cp.second.title ) {
342  if( ch == '\\' ) ch = '/';
343  }
344  }
345  } else {
346  size_t x = 0;
347  for( auto &cp : copy.counters ) {
348  cp.second.title = list[++x];
349  for( auto &ch : cp.second.title ) {
350  if( ch == '\\' ) ch = '/';
351  }
352  }
353  }
354 
355  if( !for_chrome ) {
356  std::string format, sep, graph, buffer(1024, '\0');
357  // pre-loop
358  for( auto &it : std::vector<std::string>{ "%4d.","%s","[%s]","%5.2f%% CPU","(%9.3fms)","%5d hits",feed } ) {
359  format += sep + it;
360  sep = tab;
361  }
362  // loop
363  for( auto &it : copy.counters ) {
364  auto &info = it.second;
365  double cpu = info.total * 100.0 / total;
366  int width(cpu*DOLLAR_CPUMETER_WIDTH/100);
367  graph = std::string( width, '=' ) + std::string( DOLLAR_CPUMETER_WIDTH - width, '.' );
368 #ifdef _MSC_VER
369  sprintf_s( &buffer[0], 1024,
370 #else
371  sprintf( &buffer[0],
372 #endif
373  format.c_str(), ++i, it.second.title.c_str(), graph.c_str(), cpu, (float)(info.total * 1000), info.hits );
374  out << &buffer[0];
375  }
376  } else {
377 
378  // setup
379  out << "[" << std::endl;
380 
381  // json array format
382  // [ref] https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview
383  // [ref] https://github.com/catapult-project/catapult/blob/master/tracing/tracing/base/color_scheme.html#L54
384 
385  auto get_color = []( float pct ) {
386  return pct <= 16 ? "good":
387  pct <= 33 ? "bad":
388  "terrible";
389  };
390 
391  double timestamp = 0;
392  root.tree_walker(
393  [&]( const Node<info> &node ) {
394  auto &info = *node.value;
395  double cpu = info.total * 100.0 / total;
396  out << "{\"name\": \"" << info.title << "\","
397  "\"cat\": \"" << "CPU,DOLLAR" << "\","
398  "\"ph\": \"" << 'X' << "\","
399  "\"pid\": " << info.pid << ","
400  "\"tid\": " << info.tid << ","
401  "\"ts\": " << (unsigned int)(timestamp * 1000 * 1000) << ","
402  "\"dur\": " << (unsigned int)(info.total * 1000 * 1000) << ","
403  "\"cname\": \"" << get_color(cpu) << "\"" "," <<
404  "\"args\": {}},\n";
405  timestamp += info.total;
406  },
407  [&]( const Node<info> &node ) {
408  auto &info = *node.value;
409  double cpu = info.total * 100.0 / total;
410  out << "{\"name\": \"" << info.title << "\","
411  "\"cat\": \"" << "CPU,DOLLAR" << "\","
412  "\"ph\": \"" << 'B' << "\","
413  "\"pid\": " << info.pid << ","
414  "\"tid\": " << info.tid << ","
415  "\"ts\": " << (unsigned int)(timestamp * 1000 * 1000) << ","
416  "\"args\": {}},\n";
417  timestamp += info.total;
418  },
419  [&]( const Node<info> &node ) {
420  auto &info = *node.value;
421  double cpu = info.total * 100.0 / total;
422  out << "{\"name\": \"" << info.title << "\","
423  "\"cat\": \"" << "CPU,DOLLAR" << "\","
424  "\"ph\": \"" << 'E' << "\","
425  "\"pid\": " << info.pid << ","
426  "\"tid\": " << info.tid << ","
427  "\"ts\": " << (unsigned int)((timestamp + info.total) * 1000 * 1000) << ","
428  "\"cname\": \"" << get_color(cpu) << "\"" "," <<
429  "\"args\": {}},\n";
430  timestamp += info.total;
431  } );
432  }
433  }
434 
435  void pause( bool paused_ ) {
436  paused = paused_;
437  }
438 
439  bool is_paused() const {
440  return paused;
441  }
442 
443  void clear() {
444  bool p = paused;
445  auto num_unfinished_scopes = stack.size();
446  *this = profiler();
447  stack.resize( num_unfinished_scopes );
448  paused = p;
449  }
450 
451  private: std::map< std::string, info > counters;
452  };
454  class sampler {
455  sampler();
456  sampler( const sampler & );
457  sampler& operator=( const sampler & );
458  profiler::info *handle;
459 
460  public: // public api
461 
462  explicit sampler( const std::string &title ) {
463  handle = &singleton<profiler>()->in( title );
464  }
465 
466  ~sampler() {
467  singleton<profiler>()->out( *handle );
468  }
469  };
470 
471  inline void csv( std::ostream &os ) {
472  singleton<profiler>()->print<0>(os, ",");
473  }
474 
475  inline void tsv( std::ostream &os ) {
476  singleton<profiler>()->print<0>(os, "\t");
477  }
478 
479  inline void markdown( std::ostream &os ) {
480  singleton<profiler>()->print<0>(os, "|");
481  }
482 
483  inline void text( std::ostream &os ) {
484  singleton<profiler>()->print<0>(os, " ");
485  }
486 
487  inline void chrome( std::ostream &os ) {
488  singleton<profiler>()->print<1>(os, "");
489  }
490 
491  inline void pause( bool paused ) {
492  singleton<profiler>()->pause( paused );
493  }
494 
495  inline bool is_paused() {
496  return singleton<profiler>()->is_paused();
497  }
498 
499  inline void clear() {
500  singleton<profiler>()->clear();
501  }
502 }
503 
504 #endif
505 
506 #ifdef DOLLAR_BUILD_DEMO
507 #include <iostream>
508 #include <fstream>
509 #include <chrono>
510 #include <thread>
511 
512 void x( int counter ) { $
513  while( counter-- > 0 ) { $
514  std::this_thread::sleep_for( std::chrono::microseconds( int(0.00125 * 1000000) ) );
515  }
516 }
517 void c( int counter ) { $
518  while( counter-- > 0 ) { $
519  std::this_thread::sleep_for( std::chrono::microseconds( int(0.00125 * 1000000) ) );
520  }
521 }
522 void y( int counter ) { $
523  while( counter-- > 0 ) { $
524  std::this_thread::sleep_for( std::chrono::microseconds( int(0.00125 * 1000000) ) );
525  if( counter % 2 ) c(counter); else x(counter);
526  }
527 }
528 void a( int counter ) { $
529  while( counter-- > 0 ) { $
530  std::this_thread::sleep_for( std::chrono::microseconds( int(0.00125 * 1000000) ) );
531  y(counter);
532  }
533 }
534 
535 int main() { $
536  a(10);
537 
538  // write tracing results to a json file (for chrome://tracing embedded profiler)
539  std::ofstream file("chrome.json");
540  dollar::chrome(file);
541 
542  // display ascii text results
543  dollar::text(std::cout);
544 
545  // clear next frame
546  dollar::clear();
547 }
548 #endif
Definition: dollar.hpp:87
Definition: dollar.hpp:178
Definition: dollar.hpp:453
Definition: dollar.hpp:171
Definition: dollar.hpp:117