16 #include "kmp_stats.h" 25 #define STRINGIZE2(x) #x 26 #define STRINGIZE(x) STRINGIZE2(x) 28 #define expandName(name, flags, ignore) {STRINGIZE(name), flags}, 29 statInfo timeStat::timerInfo[] = {
30 KMP_FOREACH_TIMER(expandName, 0){
"TIMER_LAST", 0}};
31 const statInfo counter::counterInfo[] = {
35 #define expandName(ignore1, ignore2, ignore3) {0.0, 0.0, 0.0}, 36 kmp_stats_output_module::rgb_color kmp_stats_output_module::timerColorInfo[] = {
37 KMP_FOREACH_TIMER(expandName, 0){0.0, 0.0, 0.0}};
40 const kmp_stats_output_module::rgb_color
41 kmp_stats_output_module::globalColorArray[] = {
65 static uint32_t statsPrinted = 0;
68 static kmp_stats_output_module *__kmp_stats_global_output = NULL;
72 void statistic::addSample(
double sample) {
73 double delta = sample - meanVal;
75 sampleCount = sampleCount + 1;
76 meanVal = meanVal + delta / sampleCount;
77 m2 = m2 + delta * (sample - meanVal);
79 minVal = std::min(minVal, sample);
80 maxVal = std::max(maxVal, sample);
83 statistic &statistic::operator+=(
const statistic &other) {
84 if (sampleCount == 0) {
89 uint64_t newSampleCount = sampleCount + other.sampleCount;
90 double dnsc = double(newSampleCount);
91 double dsc = double(sampleCount);
92 double dscBydnsc = dsc / dnsc;
93 double dosc = double(other.sampleCount);
94 double delta = other.meanVal - meanVal;
102 meanVal = meanVal * dscBydnsc + other.meanVal * (1 - dscBydnsc);
103 m2 = m2 + other.m2 + dscBydnsc * dosc * delta * delta;
104 minVal = std::min(minVal, other.minVal);
105 maxVal = std::max(maxVal, other.maxVal);
106 sampleCount = newSampleCount;
111 void statistic::scale(
double factor) {
112 minVal = minVal * factor;
113 maxVal = maxVal * factor;
114 meanVal = meanVal * factor;
115 m2 = m2 * factor * factor;
119 std::string statistic::format(
char unit,
bool total)
const {
120 std::string result = formatSI(sampleCount, 9,
' ');
122 if (sampleCount == 0) {
123 result = result + std::string(
", ") + formatSI(0.0, 9, unit);
124 result = result + std::string(
", ") + formatSI(0.0, 9, unit);
125 result = result + std::string(
", ") + formatSI(0.0, 9, unit);
127 result = result + std::string(
", ") + formatSI(0.0, 9, unit);
128 result = result + std::string(
", ") + formatSI(0.0, 9, unit);
130 result = result + std::string(
", ") + formatSI(minVal, 9, unit);
131 result = result + std::string(
", ") + formatSI(meanVal, 9, unit);
132 result = result + std::string(
", ") + formatSI(maxVal, 9, unit);
135 result + std::string(
", ") + formatSI(meanVal * sampleCount, 9, unit);
136 result = result + std::string(
", ") + formatSI(getSD(), 9, unit);
143 void explicitTimer::start(timer_e timerEnumValue) {
144 startTime = tsc_tick_count::now();
147 __kmp_stats_thread_ptr->incrementNestValue();
152 void explicitTimer::stop(timer_e timerEnumValue,
153 kmp_stats_list *stats_ptr ) {
154 if (startTime.getValue() == 0)
157 tsc_tick_count finishTime = tsc_tick_count::now();
160 stat->addSample(((finishTime - startTime) - totalPauseTime).ticks());
164 stats_ptr = __kmp_stats_thread_ptr;
165 stats_ptr->push_event(
166 startTime.getValue() - __kmp_stats_start_time.getValue(),
167 finishTime.getValue() - __kmp_stats_start_time.getValue(),
168 __kmp_stats_thread_ptr->getNestValue(), timerEnumValue);
169 stats_ptr->decrementNestValue();
179 partitionedTimers::partitionedTimers() { timer_stack.reserve(8); }
182 void partitionedTimers::add_timer(explicit_timer_e timer_index,
183 explicitTimer *timer_pointer) {
184 KMP_DEBUG_ASSERT((
int)timer_index < (
int)EXPLICIT_TIMER_LAST + 1);
185 timers[timer_index] = timer_pointer;
189 void partitionedTimers::init(timerPair init_timer_pair) {
190 KMP_DEBUG_ASSERT(this->timer_stack.size() == 0);
191 timer_stack.push_back(init_timer_pair);
192 timers[init_timer_pair.get_index()]->start(init_timer_pair.get_timer());
199 void partitionedTimers::push(timerPair timer_pair) {
204 KMP_DEBUG_ASSERT(this->timer_stack.size() > 0);
205 timerPair current_timer = timer_stack.back();
206 timer_stack.push_back(timer_pair);
207 if (current_timer != timer_pair) {
208 timers[current_timer.get_index()]->pause();
209 timers[timer_pair.get_index()]->start(timer_pair.get_timer());
214 void partitionedTimers::pop() {
219 KMP_DEBUG_ASSERT(this->timer_stack.size() > 1);
220 timerPair current_timer = timer_stack.back();
221 timer_stack.pop_back();
222 timerPair new_timer = timer_stack.back();
223 if (current_timer != new_timer) {
224 timers[current_timer.get_index()]->stop(current_timer.get_timer());
225 timers[new_timer.get_index()]->resume();
233 void partitionedTimers::windup() {
234 while (timer_stack.size() > 1) {
237 if (timer_stack.size() > 0) {
238 timerPair last_timer = timer_stack.back();
239 timer_stack.pop_back();
240 timers[last_timer.get_index()]->stop(last_timer.get_timer());
246 void kmp_stats_event_vector::deallocate() {
257 int compare_two_events(
const void *event1,
const void *event2) {
258 kmp_stats_event *ev1 = (kmp_stats_event *)event1;
259 kmp_stats_event *ev2 = (kmp_stats_event *)event2;
261 if (ev1->getStart() < ev2->getStart())
263 else if (ev1->getStart() > ev2->getStart())
269 void kmp_stats_event_vector::sort() {
270 qsort(events, internal_size,
sizeof(kmp_stats_event), compare_two_events);
276 kmp_stats_list *kmp_stats_list::push_back(
int gtid) {
277 kmp_stats_list *newnode =
278 (kmp_stats_list *)__kmp_allocate(
sizeof(kmp_stats_list));
281 new (newnode) kmp_stats_list();
282 newnode->setGtid(gtid);
283 newnode->prev = this->prev;
284 newnode->next =
this;
285 newnode->prev->next = newnode;
286 newnode->next->prev = newnode;
289 void kmp_stats_list::deallocate() {
290 kmp_stats_list *ptr = this->next;
291 kmp_stats_list *delptr = this->next;
292 while (ptr !=
this) {
296 delptr->_event_vector.deallocate();
297 delptr->~kmp_stats_list();
301 kmp_stats_list::iterator kmp_stats_list::begin() {
302 kmp_stats_list::iterator it;
306 kmp_stats_list::iterator kmp_stats_list::end() {
307 kmp_stats_list::iterator it;
311 int kmp_stats_list::size() {
313 kmp_stats_list::iterator it;
314 for (retval = 0, it = begin(); it != end(); it++, retval++) {
321 kmp_stats_list::iterator::iterator() : ptr(NULL) {}
322 kmp_stats_list::iterator::~iterator() {}
323 kmp_stats_list::iterator kmp_stats_list::iterator::operator++() {
324 this->ptr = this->ptr->next;
327 kmp_stats_list::iterator kmp_stats_list::iterator::operator++(
int dummy) {
328 this->ptr = this->ptr->next;
331 kmp_stats_list::iterator kmp_stats_list::iterator::operator--() {
332 this->ptr = this->ptr->prev;
335 kmp_stats_list::iterator kmp_stats_list::iterator::operator--(
int dummy) {
336 this->ptr = this->ptr->prev;
339 bool kmp_stats_list::iterator::operator!=(
const kmp_stats_list::iterator &rhs) {
340 return this->ptr != rhs.ptr;
342 bool kmp_stats_list::iterator::operator==(
const kmp_stats_list::iterator &rhs) {
343 return this->ptr == rhs.ptr;
345 kmp_stats_list *kmp_stats_list::iterator::operator*()
const {
351 const char *kmp_stats_output_module::eventsFileName = NULL;
352 const char *kmp_stats_output_module::plotFileName = NULL;
353 int kmp_stats_output_module::printPerThreadFlag = 0;
354 int kmp_stats_output_module::printPerThreadEventsFlag = 0;
358 void kmp_stats_output_module::init() {
359 char *statsFileName = getenv(
"KMP_STATS_FILE");
360 eventsFileName = getenv(
"KMP_STATS_EVENTS_FILE");
361 plotFileName = getenv(
"KMP_STATS_PLOT_FILE");
362 char *threadStats = getenv(
"KMP_STATS_THREADS");
363 char *threadEvents = getenv(
"KMP_STATS_EVENTS");
370 std::string baseFileName, pid, suffix;
371 std::stringstream ss;
372 outputFileName = std::string(statsFileName);
373 index = outputFileName.find_last_of(
'.');
374 if (index == std::string::npos) {
375 baseFileName = outputFileName;
377 baseFileName = outputFileName.substr(0, index);
378 suffix = outputFileName.substr(index);
382 outputFileName = baseFileName +
"-" + pid + suffix;
384 eventsFileName = eventsFileName ? eventsFileName :
"events.dat";
385 plotFileName = plotFileName ? plotFileName :
"events.plt";
389 printPerThreadFlag = __kmp_str_match_true(threadStats);
390 printPerThreadEventsFlag = __kmp_str_match_true(threadEvents);
392 if (printPerThreadEventsFlag) {
397 timeStat::clearEventFlags();
403 void kmp_stats_output_module::setupEventColors() {
405 int globalColorIndex = 0;
406 int numGlobalColors =
sizeof(globalColorArray) /
sizeof(rgb_color);
407 for (i = 0; i < TIMER_LAST; i++) {
409 timerColorInfo[i] = globalColorArray[globalColorIndex];
410 globalColorIndex = (globalColorIndex + 1) % numGlobalColors;
416 void kmp_stats_output_module::printTimerStats(FILE *statsOut,
417 statistic
const *theStats,
418 statistic
const *totalStats) {
419 fprintf(statsOut,
"Timer, SampleCount, Min, " 420 "Mean, Max, Total, SD\n");
421 for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) {
422 statistic
const *stat = &theStats[s];
425 fprintf(statsOut,
"%-28s, %s\n", timeStat::name(s),
426 stat->format(tag,
true).c_str());
429 for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) {
432 fprintf(statsOut,
"Total_%-22s, %s\n", timeStat::name(s),
433 totalStats[s].format(tag,
true).c_str());
437 void kmp_stats_output_module::printCounterStats(FILE *statsOut,
438 statistic
const *theStats) {
439 fprintf(statsOut,
"Counter, ThreadCount, Min, Mean, " 440 " Max, Total, SD\n");
441 for (
int s = 0; s < COUNTER_LAST; s++) {
442 statistic
const *stat = &theStats[s];
443 fprintf(statsOut,
"%-25s, %s\n", counter::name(counter_e(s)),
444 stat->format(
' ',
true).c_str());
448 void kmp_stats_output_module::printCounters(FILE *statsOut,
449 counter
const *theCounters) {
452 fprintf(statsOut,
"\nCounter, Count\n");
453 for (
int c = 0; c < COUNTER_LAST; c++) {
454 counter
const *stat = &theCounters[c];
455 fprintf(statsOut,
"%-25s, %s\n", counter::name(counter_e(c)),
456 formatSI(stat->getValue(), 9,
' ').c_str());
460 void kmp_stats_output_module::printEvents(FILE *eventsOut,
461 kmp_stats_event_vector *theEvents,
465 for (
int i = 0; i < theEvents->size(); i++) {
466 kmp_stats_event ev = theEvents->at(i);
467 rgb_color color = getEventColor(ev.getTimerName());
468 fprintf(eventsOut,
"%d %lu %lu %1.1f rgb(%1.1f,%1.1f,%1.1f) %s\n", gtid,
469 ev.getStart(), ev.getStop(), 1.2 - (ev.getNestLevel() * 0.2),
470 color.r, color.g, color.b, timeStat::name(ev.getTimerName()));
475 void kmp_stats_output_module::windupExplicitTimers() {
479 kmp_stats_list::iterator it;
480 for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
481 kmp_stats_list *ptr = *it;
482 ptr->getPartitionedTimers()->windup();
483 for (
int timer = 0; timer < EXPLICIT_TIMER_LAST; timer++) {
484 ptr->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer, ptr);
489 void kmp_stats_output_module::printPloticusFile() {
491 int size = __kmp_stats_list->size();
492 FILE *plotOut = fopen(plotFileName,
"w+");
494 fprintf(plotOut,
"#proc page\n" 498 fprintf(plotOut,
"#proc getdata\n" 502 fprintf(plotOut,
"#proc areadef\n" 503 " title: OpenMP Sampling Timeline\n" 504 " titledetails: align=center size=16\n" 505 " rectangle: 1 1 13 9\n" 506 " xautorange: datafield=2,3\n" 507 " yautorange: -1 %d\n\n",
510 fprintf(plotOut,
"#proc xaxis\n" 512 " stubdetails: size=12\n" 513 " label: Time (ticks)\n" 514 " labeldetails: size=14\n\n");
516 fprintf(plotOut,
"#proc yaxis\n" 519 " stubdetails: size=12\n" 521 " labeldetails: size=14\n\n",
524 fprintf(plotOut,
"#proc bars\n" 525 " exactcolorfield: 5\n" 528 " segmentfields: 2 3\n" 529 " barwidthfield: 4\n\n");
532 for (i = 0; i < TIMER_LAST; i++) {
534 rgb_color c = getEventColor((timer_e)i);
535 fprintf(plotOut,
"#proc legendentry\n" 536 " sampletype: color\n" 538 " details: rgb(%1.1f,%1.1f,%1.1f)\n\n",
539 timeStat::name((timer_e)i), c.r, c.g, c.b);
543 fprintf(plotOut,
"#proc legend\n" 545 " location: max max\n\n");
555 void kmp_stats_output_module::printHeaderInfo(FILE *statsOut) {
556 std::time_t now = std::time(0);
560 std::strftime(&buffer[0],
sizeof(buffer),
"%c", std::localtime(&now));
561 fprintf(statsOut,
"# Time of run: %s\n", &buffer[0]);
562 if (gethostname(&hostName[0],
sizeof(hostName)) == 0)
563 fprintf(statsOut,
"# Hostname: %s\n", &hostName[0]);
564 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 565 fprintf(statsOut,
"# CPU: %s\n", &__kmp_cpuinfo.name[0]);
566 fprintf(statsOut,
"# Family: %d, Model: %d, Stepping: %d\n",
567 __kmp_cpuinfo.family, __kmp_cpuinfo.model, __kmp_cpuinfo.stepping);
568 if (__kmp_cpuinfo.frequency == 0)
569 fprintf(statsOut,
"# Nominal frequency: Unknown\n");
571 fprintf(statsOut,
"# Nominal frequency: %sz\n",
572 formatSI(
double(__kmp_cpuinfo.frequency), 9,
'H').c_str());
576 void kmp_stats_output_module::outputStats(
const char *heading) {
580 windupExplicitTimers();
582 statistic allStats[TIMER_LAST];
583 statistic totalStats[TIMER_LAST];
585 statistic allCounters[COUNTER_LAST];
588 !outputFileName.empty() ? fopen(outputFileName.c_str(),
"a+") : stderr;
593 if (eventPrintingEnabled()) {
594 eventsOut = fopen(eventsFileName,
"w+");
597 printHeaderInfo(statsOut);
598 fprintf(statsOut,
"%s\n", heading);
600 kmp_stats_list::iterator it;
601 for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
602 int t = (*it)->getGtid();
604 if (printPerThreadFlag) {
605 fprintf(statsOut,
"Thread %d\n", t);
606 printTimerStats(statsOut, (*it)->getTimers(), 0);
607 printCounters(statsOut, (*it)->getCounters());
608 fprintf(statsOut,
"\n");
611 if (eventPrintingEnabled()) {
612 kmp_stats_event_vector events = (*it)->getEventVector();
613 printEvents(eventsOut, &events, t);
617 for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) {
619 if ((timeStat::masterOnly(s) && (t != 0)) ||
621 (timeStat::workerOnly(s) && (t == 0))
627 statistic *threadStat = (*it)->getTimer(s);
628 allStats[s] += *threadStat;
632 totalStats[s].addSample(threadStat->getTotal());
636 for (counter_e c = counter_e(0); c < COUNTER_LAST; c = counter_e(c + 1)) {
637 if (counter::masterOnly(c) && t != 0)
639 allCounters[c].addSample((*it)->getCounter(c)->getValue());
643 if (eventPrintingEnabled()) {
648 fprintf(statsOut,
"Aggregate for all threads\n");
649 printTimerStats(statsOut, &allStats[0], &totalStats[0]);
650 fprintf(statsOut,
"\n");
651 printCounterStats(statsOut, &allCounters[0]);
653 if (statsOut != stderr)
663 void __kmp_reset_stats() {
664 kmp_stats_list::iterator it;
665 for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
666 timeStat *timers = (*it)->getTimers();
667 counter *counters = (*it)->getCounters();
668 explicitTimer *eTimers = (*it)->getExplicitTimers();
670 for (
int t = 0; t < TIMER_LAST; t++)
673 for (
int c = 0; c < COUNTER_LAST; c++)
676 for (
int t = 0; t < EXPLICIT_TIMER_LAST; t++)
680 (*it)->resetEventVector();
686 void __kmp_output_stats(
const char *heading) {
687 __kmp_stats_global_output->outputStats(heading);
691 void __kmp_accumulate_stats_at_exit(
void) {
693 if (KMP_XCHG_FIXED32(&statsPrinted, 1) != 0)
696 __kmp_output_stats(
"Statistics on exit");
699 void __kmp_stats_init(
void) {
700 __kmp_init_tas_lock(&__kmp_stats_lock);
701 __kmp_stats_start_time = tsc_tick_count::now();
702 __kmp_stats_global_output =
new kmp_stats_output_module();
703 __kmp_stats_list =
new kmp_stats_list();
706 void __kmp_stats_fini(
void) {
707 __kmp_accumulate_stats_at_exit();
708 __kmp_stats_list->deallocate();
709 delete __kmp_stats_global_output;
710 delete __kmp_stats_list;
do not show a TOTAL_aggregation for this statistic
statistic doesn't need units printed next to it in output
#define KMP_FOREACH_COUNTER(macro, arg)
Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h.