-
Notifications
You must be signed in to change notification settings - Fork 12
/
perfeval.c
221 lines (192 loc) · 7.63 KB
/
perfeval.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <stdlib.h>
#include <linux/hashtable.h>
#include <linux/thread_map.h>
#include <monitor.h>
/*
* Performance evaluation
*
* Sampling events can have a performance impact. Events are attached to the CPU, which
* will cause the CPU utilization to increase. Events attached to thread context will
* cause thread utilization to increase, and calls to __perf_event_task_sched_in()/_out()
* will also cause more increases.
*
* In addition, event filtering also causes more increases. The more filter logic operations
* there are, the more increases.
*
* On the trend, the more events are sampled, the more the utilization increases. Therefore,
* the performance evaluation is to count the number of events sampled in the cpu/thread
* context. Above a certain limit, sampling is disabled.
*/
#define PERFEVAL_HASHBITS (6)
struct perfeval_node {
struct hlist_node node;
u32 cpu_tid; // cpu or tid;
u64 samples;
};
#define perfeval_node_add(hashtable, obj, key) \
obj->cpu_tid = (key); \
hlist_add_head(&obj->node, &hashtable[hash_min((key), PERFEVAL_HASHBITS)])
#define perfeval_node_find(hashtable, obj, key) \
hlist_for_each_entry(obj, &hashtable[hash_min((key), PERFEVAL_HASHBITS)], node) \
if (obj->cpu_tid == (key))
#define perfeval_node_for_each(hashtable, obj, bkt) \
for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < (1 << PERFEVAL_HASHBITS); (bkt)++) \
hlist_for_each_entry(obj, &hashtable[bkt], node)
void perfeval_sample(struct prof_dev *dev, union perf_event *event, int instance)
{
struct perfeval_node *node;
u32 cpu_tid[2] = {0};
int i;
if (likely(!(dev->perfeval[0].hashmap || dev->perfeval[1].hashmap)))
return;
if (event->header.type == PERF_RECORD_DEV) {
cpu_tid[0] = ((struct perf_record_dev *)event)->cpu;
cpu_tid[1] = ((struct perf_record_dev *)event)->tid;
} else {
if (dev->perfeval[0].hashmap)
cpu_tid[0] = *(u32 *)((void *)event->sample.array + dev->perfeval[0].mem_pos);
if (dev->perfeval[1].hashmap)
cpu_tid[1] = *(u32 *)((void *)event->sample.array + dev->perfeval[1].mem_pos);
}
// Count events that occurred in the specified cpu/tid context.
for (i = 0; i < 2; i ++) {
struct performance_evaluation *perfeval = &dev->perfeval[i];
if (perfeval->hashmap) {
perfeval->sampled_events ++;
perfeval_node_find(perfeval->hashmap, node, cpu_tid[i]) {
if (node->samples++ == 0)
perfeval->nr_ins ++;
perfeval->matched_events ++;
break;
}
}
}
}
void perfeval_evaluate(struct prof_dev *dev)
{
const char *str[2] = {"cpu", "tid"};
struct perfeval_node *node;
int i, j;
if (likely(!(dev->perfeval[0].hashmap || dev->perfeval[1].hashmap)))
return;
for (i = 0; i < 2; i ++) {
struct performance_evaluation *perfeval = &dev->perfeval[i];
if (perfeval->hashmap && perfeval->sampled_events) {
long sampled_interval = perfeval->sampled_events * 1000 / dev->env->interval;
long matched_interval = perfeval->matched_events * 1000 / dev->env->interval;
bool disable = false;
// Exceeding the limit, exit perf-prof.
if (dev->env->sampling_limit && matched_interval / perfeval->nr_ins > dev->env->sampling_limit) {
disable = true;
prof_dev_close(dev);
}
print_time(stdout);
printf("%s: perfeval %s(%s): sampled %lu events matched %lu on %d instances%s\n", dev->prof->name,
i == 0 ? "cpus" : "pids", i == 0 ? dev->env->perfeval_cpus : dev->env->perfeval_pids,
sampled_interval, matched_interval, perfeval->nr_ins,
disable ? ", exceeds the limit, exit" : "");
perfeval_node_for_each(perfeval->hashmap, node, j) {
if (node->samples) {
if (disable) {
print_time(stdout);
printf("%s: perfeval: %s %u sampled %lu events\n", dev->prof->name, str[i],
node->cpu_tid, node->samples);
}
node->samples = 0;
}
}
perfeval->nr_ins = 0;
perfeval->matched_events = 0;
perfeval->sampled_events = 0;
}
}
}
void perfeval_free(struct prof_dev *dev)
{
struct env *env = dev->env;
if (!env->interval ||
!(env->perfeval_cpus || env->perfeval_pids))
return ;
if (dev->perfeval[0].hashmap)
free(dev->perfeval[0].hashmap);
if (dev->perfeval[1].hashmap)
free(dev->perfeval[1].hashmap);
}
int perfeval_init(struct prof_dev *dev)
{
struct env *env = dev->env;
struct prof_dev *source, *tmp;
struct perf_cpu_map *cpus = NULL;
struct perf_thread_map *threads = NULL;
struct performance_evaluation *perfeval;
struct hlist_head *hash;
struct perfeval_node *node;
int idx, cpu, thread;
int tid_pos = dev->pos.tid_pos;
int cpu_pos = dev->pos.cpu_pos;
if (!env->interval ||
!(env->perfeval_cpus || env->perfeval_pids))
return 0;
if (env->perfeval_cpus) {
if (cpu_pos < 0)
goto out_free;
// Check the cpu_pos of the forwarding source device.
for_each_source_dev_get(source, tmp, dev)
if (source->pos.cpu_pos < 0 ||
cpu_pos != sizeof(u32)+sizeof(u32)+sizeof(u64)+sizeof(u64) /* struct perf_record_dev cpu pos */) {
prof_dev_put(source);
goto out_free;
}
perfeval = &dev->perfeval[0];
perfeval->mem_pos = cpu_pos;
cpus = perf_cpu_map__new(env->perfeval_cpus);
perfeval->hashmap = zalloc((sizeof(struct hlist_head) << PERFEVAL_HASHBITS) +
sizeof(*node) * perf_cpu_map__nr(cpus));
if (!cpus || !perfeval->hashmap)
goto out_free;
hash = perfeval->hashmap;
node = (void *)(hash + (1 << PERFEVAL_HASHBITS));
__hash_init(hash, 1 << PERFEVAL_HASHBITS);
perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
perfeval_node_add(hash, node, cpu);
node ++;
}
}
if (env->perfeval_pids) {
if (tid_pos < 0)
goto out_free;
// Check the tid_pos of the forwarding source device.
for_each_source_dev_get(source, tmp, dev)
if (source->pos.tid_pos < 0 ||
tid_pos != sizeof(u32) /* struct perf_record_dev tid pos */) {
prof_dev_put(source);
goto out_free;
}
perfeval = &dev->perfeval[1];
perfeval->mem_pos = tid_pos + sizeof(u32); // tid
threads = thread_map__new_str(env->perfeval_pids, NULL, 0, 0);
perfeval->hashmap = zalloc((sizeof(struct hlist_head) << PERFEVAL_HASHBITS) +
sizeof(*node) * perf_thread_map__nr(threads));
if (!threads || !perfeval->hashmap)
goto out_free;
hash = perfeval->hashmap;
node = (void *)(hash + (1 << PERFEVAL_HASHBITS));
__hash_init(hash, 1 << PERFEVAL_HASHBITS);
perf_thread_map__for_each_thread(thread, idx, threads) {
perfeval_node_add(hash, node, thread);
node ++;
}
}
perf_cpu_map__put(cpus);
perf_thread_map__put(threads);
return 0;
out_free:
fprintf(stderr, "perfeval init failed\n");
perf_cpu_map__put(cpus);
perf_thread_map__put(threads);
perfeval_free(dev);
return -1;
}