summaryrefslogblamecommitdiff
path: root/tools/perf/util/intel-pt.c
blob: 38942e1eac8f53678948d8ed16bea8b7844fb824 (plain) (tree)
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575




























































































                                                                            




                            


































                                           
                          
























































































































































































































































































































































































































































                                                                                  


















                                                                           



































































































                                                                         


                                                     




























































































































































































































































                                                                                           
                                                                      



                                        

                                                      





























































































































































































                                                                                                
                                                                
 
                                              


                                   
                         


























                                                                




                                                     
                                                             
                                               


















                                                                           

                                                                                 

                                                                            
                                                                             























































































































































































                                                                                         

                                                                        

                                   
                

                             
                         


                                           
                         











                                                                    
                                                                    















                                                                          

























                                                                                   


                                                                   








































                                                                                            


































                                                                                          
                                                    












                                                                    
                                                                               



















                                                                             


                                                                   





































































































































































































































































                                                                                                            











                                                            
                                                  













                                                                               


















































                                                                                  










                                                                                

































                                                                               



                                                                                

































































                                                                               
/*
 * intel_pt.c: Intel Processor Trace support
 * Copyright (c) 2013-2015, Intel Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 */

#include <stdio.h>
#include <stdbool.h>
#include <errno.h>
#include <linux/kernel.h>
#include <linux/types.h>

#include "../perf.h"
#include "session.h"
#include "machine.h"
#include "tool.h"
#include "event.h"
#include "evlist.h"
#include "evsel.h"
#include "map.h"
#include "color.h"
#include "util.h"
#include "thread.h"
#include "thread-stack.h"
#include "symbol.h"
#include "callchain.h"
#include "dso.h"
#include "debug.h"
#include "auxtrace.h"
#include "tsc.h"
#include "intel-pt.h"

#include "intel-pt-decoder/intel-pt-log.h"
#include "intel-pt-decoder/intel-pt-decoder.h"
#include "intel-pt-decoder/intel-pt-insn-decoder.h"
#include "intel-pt-decoder/intel-pt-pkt-decoder.h"

#define MAX_TIMESTAMP (~0ULL)

struct intel_pt {
	struct auxtrace auxtrace;
	struct auxtrace_queues queues;
	struct auxtrace_heap heap;
	u32 auxtrace_type;
	struct perf_session *session;
	struct machine *machine;
	struct perf_evsel *switch_evsel;
	struct thread *unknown_thread;
	bool timeless_decoding;
	bool sampling_mode;
	bool snapshot_mode;
	bool per_cpu_mmaps;
	bool have_tsc;
	bool data_queued;
	bool est_tsc;
	bool sync_switch;
	int have_sched_switch;
	u32 pmu_type;
	u64 kernel_start;
	u64 switch_ip;
	u64 ptss_ip;

	struct perf_tsc_conversion tc;
	bool cap_user_time_zero;

	struct itrace_synth_opts synth_opts;

	bool sample_instructions;
	u64 instructions_sample_type;
	u64 instructions_sample_period;
	u64 instructions_id;

	bool sample_branches;
	u32 branches_filter;
	u64 branches_sample_type;
	u64 branches_id;

	bool sample_transactions;
	u64 transactions_sample_type;
	u64 transactions_id;

	bool synth_needs_swap;

	u64 tsc_bit;
	u64 mtc_bit;
	u64 mtc_freq_bits;
	u32 tsc_ctc_ratio_n;
	u32 tsc_ctc_ratio_d;
	u64 cyc_bit;
	u64 noretcomp_bit;
	unsigned max_non_turbo_ratio;
};

enum switch_state {
	INTEL_PT_SS_NOT_TRACING,
	INTEL_PT_SS_UNKNOWN,
	INTEL_PT_SS_TRACING,
	INTEL_PT_SS_EXPECTING_SWITCH_EVENT,
	INTEL_PT_SS_EXPECTING_SWITCH_IP,
};

struct intel_pt_queue {
	struct intel_pt *pt;
	unsigned int queue_nr;
	struct auxtrace_buffer *buffer;
	void *decoder;
	const struct intel_pt_state *state;
	struct ip_callchain *chain;
	union perf_event *event_buf;
	bool on_heap;
	bool stop;
	bool step_through_buffers;
	bool use_buffer_pid_tid;
	pid_t pid, tid;
	int cpu;
	int switch_state;
	pid_t next_tid;
	struct thread *thread;
	bool exclude_kernel;
	bool have_sample;
	u64 time;
	u64 timestamp;
	u32 flags;
	u16 insn_len;
	u64 last_insn_cnt;
};

static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
			  unsigned char *buf, size_t len)
{
	struct intel_pt_pkt packet;
	size_t pos = 0;
	int ret, pkt_len, i;
	char desc[INTEL_PT_PKT_DESC_MAX];
	const char *color = PERF_COLOR_BLUE;

	color_fprintf(stdout, color,
		      ". ... Intel Processor Trace data: size %zu bytes\n",
		      len);

	while (len) {
		ret = intel_pt_get_packet(buf, len, &packet);
		if (ret > 0)
			pkt_len = ret;
		else
			pkt_len = 1;
		printf(".");
		color_fprintf(stdout, color, "  %08x: ", pos);
		for (i = 0; i < pkt_len; i++)
			color_fprintf(stdout, color, " %02x", buf[i]);
		for (; i < 16; i++)
			color_fprintf(stdout, color, "   ");
		if (ret > 0) {
			ret = intel_pt_pkt_desc(&packet, desc,
						INTEL_PT_PKT_DESC_MAX);
			if (ret > 0)
				color_fprintf(stdout, color, " %s\n", desc);
		} else {
			color_fprintf(stdout, color, " Bad packet!\n");
		}
		pos += pkt_len;
		buf += pkt_len;
		len -= pkt_len;
	}
}

static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
				size_t len)
{
	printf(".\n");
	intel_pt_dump(pt, buf, len);
}

static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
				   struct auxtrace_buffer *b)
{
	void *start;

	start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
				      pt->have_tsc);
	if (!start)
		return -EINVAL;
	b->use_size = b->data + b->size - start;
	b->use_data = start;
	return 0;
}

static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq,
					struct auxtrace_queue *queue,
					struct auxtrace_buffer *buffer)
{
	if (queue->cpu == -1 && buffer->cpu != -1)
		ptq->cpu = buffer->cpu;

	ptq->pid = buffer->pid;
	ptq->tid = buffer->tid;

	intel_pt_log("queue %u cpu %d pid %d tid %d\n",
		     ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);

	thread__zput(ptq->thread);

	if (ptq->tid != -1) {
		if (ptq->pid != -1)
			ptq->thread = machine__findnew_thread(ptq->pt->machine,
							      ptq->pid,
							      ptq->tid);
		else
			ptq->thread = machine__find_thread(ptq->pt->machine, -1,
							   ptq->tid);
	}
}

/* This function assumes data is processed sequentially only */
static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
{
	struct intel_pt_queue *ptq = data;
	struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer;
	struct auxtrace_queue *queue;

	if (ptq->stop) {
		b->len = 0;
		return 0;
	}

	queue = &ptq->pt->queues.queue_array[ptq->queue_nr];

	buffer = auxtrace_buffer__next(queue, buffer);
	if (!buffer) {
		if (old_buffer)
			auxtrace_buffer__drop_data(old_buffer);
		b->len = 0;
		return 0;
	}

	ptq->buffer = buffer;

	if (!buffer->data) {
		int fd = perf_data_file__fd(ptq->pt->session->file);

		buffer->data = auxtrace_buffer__get_data(buffer, fd);
		if (!buffer->data)
			return -ENOMEM;
	}

	if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer &&
	    intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
		return -ENOMEM;

	if (old_buffer)
		auxtrace_buffer__drop_data(old_buffer);

	if (buffer->use_data) {
		b->len = buffer->use_size;
		b->buf = buffer->use_data;
	} else {
		b->len = buffer->size;
		b->buf = buffer->data;
	}
	b->ref_timestamp = buffer->reference;

	if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode &&
						      !buffer->consecutive)) {
		b->consecutive = false;
		b->trace_nr = buffer->buffer_nr + 1;
	} else {
		b->consecutive = true;
	}

	if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid ||
					ptq->tid != buffer->tid))
		intel_pt_use_buffer_pid_tid(ptq, queue, buffer);

	if (ptq->step_through_buffers)
		ptq->stop = true;

	if (!b->len)
		return intel_pt_get_trace(b, data);

	return 0;
}

struct intel_pt_cache_entry {
	struct auxtrace_cache_entry	entry;
	u64				insn_cnt;
	u64				byte_cnt;
	enum intel_pt_insn_op		op;
	enum intel_pt_insn_branch	branch;
	int				length;
	int32_t				rel;
};

static int intel_pt_config_div(const char *var, const char *value, void *data)
{
	int *d = data;
	long val;

	if (!strcmp(var, "intel-pt.cache-divisor")) {
		val = strtol(value, NULL, 0);
		if (val > 0 && val <= INT_MAX)
			*d = val;
	}

	return 0;
}

static int intel_pt_cache_divisor(void)
{
	static int d;

	if (d)
		return d;

	perf_config(intel_pt_config_div, &d);

	if (!d)
		d = 64;

	return d;
}

static unsigned int intel_pt_cache_size(struct dso *dso,
					struct machine *machine)
{
	off_t size;

	size = dso__data_size(dso, machine);
	size /= intel_pt_cache_divisor();
	if (size < 1000)
		return 10;
	if (size > (1 << 21))
		return 21;
	return 32 - __builtin_clz(size);
}

static struct auxtrace_cache *intel_pt_cache(struct dso *dso,
					     struct machine *machine)
{
	struct auxtrace_cache *c;
	unsigned int bits;

	if (dso->auxtrace_cache)
		return dso->auxtrace_cache;

	bits = intel_pt_cache_size(dso, machine);

	/* Ignoring cache creation failure */
	c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200);

	dso->auxtrace_cache = c;

	return c;
}

static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
			      u64 offset, u64 insn_cnt, u64 byte_cnt,
			      struct intel_pt_insn *intel_pt_insn)
{
	struct auxtrace_cache *c = intel_pt_cache(dso, machine);
	struct intel_pt_cache_entry *e;
	int err;

	if (!c)
		return -ENOMEM;

	e = auxtrace_cache__alloc_entry(c);
	if (!e)
		return -ENOMEM;

	e->insn_cnt = insn_cnt;
	e->byte_cnt = byte_cnt;
	e->op = intel_pt_insn->op;
	e->branch = intel_pt_insn->branch;
	e->length = intel_pt_insn->length;
	e->rel = intel_pt_insn->rel;

	err = auxtrace_cache__add(c, offset, &e->entry);
	if (err)
		auxtrace_cache__free_entry(c, e);

	return err;
}

static struct intel_pt_cache_entry *
intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
{
	struct auxtrace_cache *c = intel_pt_cache(dso, machine);

	if (!c)
		return NULL;

	return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
}

static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
				   uint64_t *insn_cnt_ptr, uint64_t *ip,
				   uint64_t to_ip, uint64_t max_insn_cnt,
				   void *data)
{
	struct intel_pt_queue *ptq = data;
	struct machine *machine = ptq->pt->machine;
	struct thread *thread;
	struct addr_location al;
	unsigned char buf[1024];
	size_t bufsz;
	ssize_t len;
	int x86_64;
	u8 cpumode;
	u64 offset, start_offset, start_ip;
	u64 insn_cnt = 0;
	bool one_map = true;

	if (to_ip && *ip == to_ip)
		goto out_no_cache;

	bufsz = intel_pt_insn_max_size();

	if (*ip >= ptq->pt->kernel_start)
		cpumode = PERF_RECORD_MISC_KERNEL;
	else
		cpumode = PERF_RECORD_MISC_USER;

	thread = ptq->thread;
	if (!thread) {
		if (cpumode != PERF_RECORD_MISC_KERNEL)
			return -EINVAL;
		thread = ptq->pt->unknown_thread;
	}

	while (1) {
		thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al);
		if (!al.map || !al.map->dso)
			return -EINVAL;

		if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
		    dso__data_status_seen(al.map->dso,
					  DSO_DATA_STATUS_SEEN_ITRACE))
			return -ENOENT;

		offset = al.map->map_ip(al.map, *ip);

		if (!to_ip && one_map) {
			struct intel_pt_cache_entry *e;

			e = intel_pt_cache_lookup(al.map->dso, machine, offset);
			if (e &&
			    (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) {
				*insn_cnt_ptr = e->insn_cnt;
				*ip += e->byte_cnt;
				intel_pt_insn->op = e->op;
				intel_pt_insn->branch = e->branch;
				intel_pt_insn->length = e->length;
				intel_pt_insn->rel = e->rel;
				intel_pt_log_insn_no_data(intel_pt_insn, *ip);
				return 0;
			}
		}

		start_offset = offset;
		start_ip = *ip;

		/* Load maps to ensure dso->is_64_bit has been updated */
		map__load(al.map, machine->symbol_filter);

		x86_64 = al.map->dso->is_64_bit;

		while (1) {
			len = dso__data_read_offset(al.map->dso, machine,
						    offset, buf, bufsz);
			if (len <= 0)
				return -EINVAL;

			if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn))
				return -EINVAL;

			intel_pt_log_insn(intel_pt_insn, *ip);

			insn_cnt += 1;

			if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH)
				goto out;

			if (max_insn_cnt && insn_cnt >= max_insn_cnt)
				goto out_no_cache;

			*ip += intel_pt_insn->length;

			if (to_ip && *ip == to_ip)
				goto out_no_cache;

			if (*ip >= al.map->end)
				break;

			offset += intel_pt_insn->length;
		}
		one_map = false;
	}
out:
	*insn_cnt_ptr = insn_cnt;

	if (!one_map)
		goto out_no_cache;

	/*
	 * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
	 * entries.
	 */
	if (to_ip) {
		struct intel_pt_cache_entry *e;

		e = intel_pt_cache_lookup(al.map->dso, machine, start_offset);
		if (e)
			return 0;
	}

	/* Ignore cache errors */
	intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt,
			   *ip - start_ip, intel_pt_insn);

	return 0;

out_no_cache:
	*insn_cnt_ptr = insn_cnt;
	return 0;
}

static bool intel_pt_get_config(struct intel_pt *pt,
				struct perf_event_attr *attr, u64 *config)
{
	if (attr->type == pt->pmu_type) {
		if (config)
			*config = attr->config;
		return true;
	}

	return false;
}

static bool intel_pt_exclude_kernel(struct intel_pt *pt)
{
	struct perf_evsel *evsel;

	evlist__for_each(pt->session->evlist, evsel) {
		if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
		    !evsel->attr.exclude_kernel)
			return false;
	}
	return true;
}

static bool intel_pt_return_compression(struct intel_pt *pt)
{
	struct perf_evsel *evsel;
	u64 config;

	if (!pt->noretcomp_bit)
		return true;

	evlist__for_each(pt->session->evlist, evsel) {
		if (intel_pt_get_config(pt, &evsel->attr, &config) &&
		    (config & pt->noretcomp_bit))
			return false;
	}
	return true;
}

static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
{
	struct perf_evsel *evsel;
	unsigned int shift;
	u64 config;

	if (!pt->mtc_freq_bits)
		return 0;

	for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++)
		config >>= 1;

	evlist__for_each(pt->session->evlist, evsel) {
		if (intel_pt_get_config(pt, &evsel->attr, &config))
			return (config & pt->mtc_freq_bits) >> shift;
	}
	return 0;
}

static bool intel_pt_timeless_decoding(struct intel_pt *pt)
{
	struct perf_evsel *evsel;
	bool timeless_decoding = true;
	u64 config;

	if (!pt->tsc_bit || !pt->cap_user_time_zero)
		return true;

	evlist__for_each(pt->session->evlist, evsel) {
		if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME))
			return true;
		if (intel_pt_get_config(pt, &evsel->attr, &config)) {
			if (config & pt->tsc_bit)
				timeless_decoding = false;
			else
				return true;
		}
	}
	return timeless_decoding;
}

static bool intel_pt_tracing_kernel(struct intel_pt *pt)
{
	struct perf_evsel *evsel;

	evlist__for_each(pt->session->evlist, evsel) {
		if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
		    !evsel->attr.exclude_kernel)
			return true;
	}
	return false;
}

static bool intel_pt_have_tsc(struct intel_pt *pt)
{
	struct perf_evsel *evsel;
	bool have_tsc = false;
	u64 config;

	if (!pt->tsc_bit)
		return false;

	evlist__for_each(pt->session->evlist, evsel) {
		if (intel_pt_get_config(pt, &evsel->attr, &config)) {
			if (config & pt->tsc_bit)
				have_tsc = true;
			else
				return false;
		}
	}
	return have_tsc;
}

static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
{
	u64 quot, rem;

	quot = ns / pt->tc.time_mult;
	rem  = ns % pt->tc.time_mult;
	return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) /
		pt->tc.time_mult;
}

static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
						   unsigned int queue_nr)
{
	struct intel_pt_params params = { .get_trace = 0, };
	struct intel_pt_queue *ptq;

	ptq = zalloc(sizeof(struct intel_pt_queue));
	if (!ptq)
		return NULL;

	if (pt->synth_opts.callchain) {
		size_t sz = sizeof(struct ip_callchain);

		sz += pt->synth_opts.callchain_sz * sizeof(u64);
		ptq->chain = zalloc(sz);
		if (!ptq->chain)
			goto out_free;
	}

	ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
	if (!ptq->event_buf)
		goto out_free;

	ptq->pt = pt;
	ptq->queue_nr = queue_nr;
	ptq->exclude_kernel = intel_pt_exclude_kernel(pt);
	ptq->pid = -1;
	ptq->tid = -1;
	ptq->cpu = -1;
	ptq->next_tid = -1;

	params.get_trace = intel_pt_get_trace;
	params.walk_insn = intel_pt_walk_next_insn;
	params.data = ptq;
	params.return_compression = intel_pt_return_compression(pt);
	params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
	params.mtc_period = intel_pt_mtc_period(pt);
	params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
	params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;

	if (pt->synth_opts.instructions) {
		if (pt->synth_opts.period) {
			switch (pt->synth_opts.period_type) {
			case PERF_ITRACE_PERIOD_INSTRUCTIONS:
				params.period_type =
						INTEL_PT_PERIOD_INSTRUCTIONS;
				params.period = pt->synth_opts.period;
				break;
			case PERF_ITRACE_PERIOD_TICKS:
				params.period_type = INTEL_PT_PERIOD_TICKS;
				params.period = pt->synth_opts.period;
				break;
			case PERF_ITRACE_PERIOD_NANOSECS:
				params.period_type = INTEL_PT_PERIOD_TICKS;
				params.period = intel_pt_ns_to_ticks(pt,
							pt->synth_opts.period);
				break;
			default:
				break;
			}
		}

		if (!params.period) {
			params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS;
			params.period = 1000;
		}
	}

	ptq->decoder = intel_pt_decoder_new(&params);
	if (!ptq->decoder)
		goto out_free;

	return ptq;

out_free:
	zfree(&ptq->event_buf);
	zfree(&ptq->chain);
	free(ptq);
	return NULL;
}

static void intel_pt_free_queue(void *priv)
{
	struct intel_pt_queue *ptq = priv;

	if (!ptq)
		return;
	thread__zput(ptq->thread);
	intel_pt_decoder_free(ptq->decoder);
	zfree(&ptq->event_buf);
	zfree(&ptq->chain);
	free(ptq);
}

static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
				     struct auxtrace_queue *queue)
{
	struct intel_pt_queue *ptq = queue->priv;

	if (queue->tid == -1 || pt->have_sched_switch) {
		ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu);
		thread__zput(ptq->thread);
	}

	if (!ptq->thread && ptq->tid != -1)
		ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid);

	if (ptq->thread) {
		ptq->pid = ptq->thread->pid_;
		if (queue->cpu == -1)
			ptq->cpu = ptq->thread->cpu;
	}
}

static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
{
	if (ptq->state->flags & INTEL_PT_ABORT_TX) {
		ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
	} else if (ptq->state->flags & INTEL_PT_ASYNC) {
		if (ptq->state->to_ip)
			ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
				     PERF_IP_FLAG_ASYNC |
				     PERF_IP_FLAG_INTERRUPT;
		else
			ptq->flags = PERF_IP_FLAG_BRANCH |
				     PERF_IP_FLAG_TRACE_END;
		ptq->insn_len = 0;
	} else {
		if (ptq->state->from_ip)
			ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
		else
			ptq->flags = PERF_IP_FLAG_BRANCH |
				     PERF_IP_FLAG_TRACE_BEGIN;
		if (ptq->state->flags & INTEL_PT_IN_TX)
			ptq->flags |= PERF_IP_FLAG_IN_TX;
		ptq->insn_len = ptq->state->insn_len;
	}
}

static int intel_pt_setup_queue(struct intel_pt *pt,
				struct auxtrace_queue *queue,
				unsigned int queue_nr)
{
	struct intel_pt_queue *ptq = queue->priv;

	if (list_empty(&queue->head))
		return 0;

	if (!ptq) {
		ptq = intel_pt_alloc_queue(pt, queue_nr);
		if (!ptq)
			return -ENOMEM;
		queue->priv = ptq;

		if (queue->cpu != -1)
			ptq->cpu = queue->cpu;
		ptq->tid = queue->tid;

		if (pt->sampling_mode) {
			if (pt->timeless_decoding)
				ptq->step_through_buffers = true;
			if (pt->timeless_decoding || !pt->have_sched_switch)
				ptq->use_buffer_pid_tid = true;
		}
	}

	if (!ptq->on_heap &&
	    (!pt->sync_switch ||
	     ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
		const struct intel_pt_state *state;
		int ret;

		if (pt->timeless_decoding)
			return 0;

		intel_pt_log("queue %u getting timestamp\n", queue_nr);
		intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
			     queue_nr, ptq->cpu, ptq->pid, ptq->tid);
		while (1) {
			state = intel_pt_decode(ptq->decoder);
			if (state->err) {
				if (state->err == INTEL_PT_ERR_NODATA) {
					intel_pt_log("queue %u has no timestamp\n",
						     queue_nr);
					return 0;
				}
				continue;
			}
			if (state->timestamp)
				break;
		}

		ptq->timestamp = state->timestamp;
		intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n",
			     queue_nr, ptq->timestamp);
		ptq->state = state;
		ptq->have_sample = true;
		intel_pt_sample_flags(ptq);
		ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
		if (ret)
			return ret;
		ptq->on_heap = true;
	}

	return 0;
}

static int intel_pt_setup_queues(struct intel_pt *pt)
{
	unsigned int i;
	int ret;

	for (i = 0; i < pt->queues.nr_queues; i++) {
		ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i);
		if (ret)
			return ret;
	}
	return 0;
}

static int intel_pt_inject_event(union perf_event *event,
				 struct perf_sample *sample, u64 type,
				 bool swapped)
{
	event->header.size = perf_event__sample_event_size(sample, type, 0);
	return perf_event__synthesize_sample(event, type, 0, sample, swapped);
}

static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
{
	int ret;
	struct intel_pt *pt = ptq->pt;
	union perf_event *event = ptq->event_buf;
	struct perf_sample sample = { .ip = 0, };

	event->sample.header.type = PERF_RECORD_SAMPLE;
	event->sample.header.misc = PERF_RECORD_MISC_USER;
	event->sample.header.size = sizeof(struct perf_event_header);

	if (!pt->timeless_decoding)
		sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);

	sample.ip = ptq->state->from_ip;
	sample.pid = ptq->pid;
	sample.tid = ptq->tid;
	sample.addr = ptq->state->to_ip;
	sample.id = ptq->pt->branches_id;
	sample.stream_id = ptq->pt->branches_id;
	sample.period = 1;
	sample.cpu = ptq->cpu;
	sample.flags = ptq->flags;
	sample.insn_len = ptq->insn_len;

	if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
		return 0;

	if (pt->synth_opts.inject) {
		ret = intel_pt_inject_event(event, &sample,
					    pt->branches_sample_type,
					    pt->synth_needs_swap);
		if (ret)
			return ret;
	}

	ret = perf_session__deliver_synth_event(pt->session, event, &sample);
	if (ret)
		pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n",
		       ret);

	return ret;
}

static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
{
	int ret;
	struct intel_pt *pt = ptq->pt;
	union perf_event *event = ptq->event_buf;
	struct perf_sample sample = { .ip = 0, };

	event->sample.header.type = PERF_RECORD_SAMPLE;
	event->sample.header.misc = PERF_RECORD_MISC_USER;
	event->sample.header.size = sizeof(struct perf_event_header);

	if (!pt->timeless_decoding)
		sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);

	sample.ip = ptq->state->from_ip;
	sample.pid = ptq->pid;
	sample.tid = ptq->tid;
	sample.addr = ptq->state->to_ip;
	sample.id = ptq->pt->instructions_id;
	sample.stream_id = ptq->pt->instructions_id;
	sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
	sample.cpu = ptq->cpu;
	sample.flags = ptq->flags;
	sample.insn_len = ptq->insn_len;

	ptq->last_insn_cnt = ptq->state->tot_insn_cnt;

	if (pt->synth_opts.callchain) {
		thread_stack__sample(ptq->thread, ptq->chain,
				     pt->synth_opts.callchain_sz, sample.ip);
		sample.callchain = ptq->chain;
	}

	if (pt->synth_opts.inject) {
		ret = intel_pt_inject_event(event, &sample,
					    pt->instructions_sample_type,
					    pt->synth_needs_swap);
		if (ret)
			return ret;
	}

	ret = perf_session__deliver_synth_event(pt->session, event, &sample);
	if (ret)
		pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n",
		       ret);

	return ret;
}

static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
{
	int ret;
	struct intel_pt *pt = ptq->pt;
	union perf_event *event = ptq->event_buf;
	struct perf_sample sample = { .ip = 0, };

	event->sample.header.type = PERF_RECORD_SAMPLE;
	event->sample.header.misc = PERF_RECORD_MISC_USER;
	event->sample.header.size = sizeof(struct perf_event_header);

	if (!pt->timeless_decoding)
		sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);

	sample.ip = ptq->state->from_ip;
	sample.pid = ptq->pid;
	sample.tid = ptq->tid;
	sample.addr = ptq->state->to_ip;
	sample.id = ptq->pt->transactions_id;
	sample.stream_id = ptq->pt->transactions_id;
	sample.period = 1;
	sample.cpu = ptq->cpu;
	sample.flags = ptq->flags;
	sample.insn_len = ptq->insn_len;

	if (pt->synth_opts.callchain) {
		thread_stack__sample(ptq->thread, ptq->chain,
				     pt->synth_opts.callchain_sz, sample.ip);
		sample.callchain = ptq->chain;
	}

	if (pt->synth_opts.inject) {
		ret = intel_pt_inject_event(event, &sample,
					    pt->transactions_sample_type,
					    pt->synth_needs_swap);
		if (ret)
			return ret;
	}

	ret = perf_session__deliver_synth_event(pt->session, event, &sample);
	if (ret)
		pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
		       ret);

	return ret;
}

static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
				pid_t pid, pid_t tid, u64 ip)
{
	union perf_event event;
	char msg[MAX_AUXTRACE_ERROR_MSG];
	int err;

	intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);

	auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
			     code, cpu, pid, tid, ip, msg);

	err = perf_session__deliver_synth_event(pt->session, &event, NULL);
	if (err)
		pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
		       err);

	return err;
}

static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
{
	struct auxtrace_queue *queue;
	pid_t tid = ptq->next_tid;
	int err;

	if (tid == -1)
		return 0;

	intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid);

	err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid);

	queue = &pt->queues.queue_array[ptq->queue_nr];
	intel_pt_set_pid_tid_cpu(pt, queue);

	ptq->next_tid = -1;

	return err;
}

static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
{
	struct intel_pt *pt = ptq->pt;

	return ip == pt->switch_ip &&
	       (ptq->flags & PERF_IP_FLAG_BRANCH) &&
	       !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC |
			       PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
}

static int intel_pt_sample(struct intel_pt_queue *ptq)
{
	const struct intel_pt_state *state = ptq->state;
	struct intel_pt *pt = ptq->pt;
	int err;

	if (!ptq->have_sample)
		return 0;

	ptq->have_sample = false;

	if (pt->sample_instructions &&
	    (state->type & INTEL_PT_INSTRUCTION)) {
		err = intel_pt_synth_instruction_sample(ptq);
		if (err)
			return err;
	}

	if (pt->sample_transactions &&
	    (state->type & INTEL_PT_TRANSACTION)) {
		err = intel_pt_synth_transaction_sample(ptq);
		if (err)
			return err;
	}

	if (!(state->type & INTEL_PT_BRANCH))
		return 0;

	if (pt->synth_opts.callchain)
		thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
				    state->to_ip, ptq->insn_len,
				    state->trace_nr);
	else
		thread_stack__set_trace_nr(ptq->thread, state->trace_nr);

	if (pt->sample_branches) {
		err = intel_pt_synth_branch_sample(ptq);
		if (err)
			return err;
	}

	if (!pt->sync_switch)
		return 0;

	if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
		switch (ptq->switch_state) {
		case INTEL_PT_SS_UNKNOWN:
		case INTEL_PT_SS_EXPECTING_SWITCH_IP:
			err = intel_pt_next_tid(pt, ptq);
			if (err)
				return err;
			ptq->switch_state = INTEL_PT_SS_TRACING;
			break;
		default:
			ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT;
			return 1;
		}
	} else if (!state->to_ip) {
		ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
	} else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) {
		ptq->switch_state = INTEL_PT_SS_UNKNOWN;
	} else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
		   state->to_ip == pt->ptss_ip &&
		   (ptq->flags & PERF_IP_FLAG_CALL)) {
		ptq->switch_state = INTEL_PT_SS_TRACING;
	}

	return 0;
}

static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
{
	struct machine *machine = pt->machine;
	struct map *map;
	struct symbol *sym, *start;
	u64 ip, switch_ip = 0;
	const char *ptss;

	if (ptss_ip)
		*ptss_ip = 0;

	map = machine__kernel_map(machine, MAP__FUNCTION);
	if (!map)
		return 0;

	if (map__load(map, machine->symbol_filter))
		return 0;

	start = dso__first_symbol(map->dso, MAP__FUNCTION);

	for (sym = start; sym; sym = dso__next_symbol(sym)) {
		if (sym->binding == STB_GLOBAL &&
		    !strcmp(sym->name, "__switch_to")) {
			ip = map->unmap_ip(map, sym->start);
			if (ip >= map->start && ip < map->end) {
				switch_ip = ip;
				break;
			}
		}
	}

	if (!switch_ip || !ptss_ip)
		return 0;

	if (pt->have_sched_switch == 1)
		ptss = "perf_trace_sched_switch";
	else
		ptss = "__perf_event_task_sched_out";

	for (sym = start; sym; sym = dso__next_symbol(sym)) {
		if (!strcmp(sym->name, ptss)) {
			ip = map->unmap_ip(map, sym->start);
			if (ip >= map->start && ip < map->end) {
				*ptss_ip = ip;
				break;
			}
		}
	}

	return switch_ip;
}

static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
{
	const struct intel_pt_state *state = ptq->state;
	struct intel_pt *pt = ptq->pt;
	int err;

	if (!pt->kernel_start) {
		pt->kernel_start = machine__kernel_start(pt->machine);
		if (pt->per_cpu_mmaps &&
		    (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) &&
		    !pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
		    !pt->sampling_mode) {
			pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip);
			if (pt->switch_ip) {
				intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
					     pt->switch_ip, pt->ptss_ip);
				pt->sync_switch = true;
			}
		}
	}

	intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
		     ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
	while (1) {
		err = intel_pt_sample(ptq);
		if (err)
			return err;

		state = intel_pt_decode(ptq->decoder);
		if (state->err) {
			if (state->err == INTEL_PT_ERR_NODATA)
				return 1;
			if (pt->sync_switch &&
			    state->from_ip >= pt->kernel_start) {
				pt->sync_switch = false;
				intel_pt_next_tid(pt, ptq);
			}
			if (pt->synth_opts.errors) {
				err = intel_pt_synth_error(pt, state->err,
							   ptq->cpu, ptq->pid,
							   ptq->tid,
							   state->from_ip);
				if (err)
					return err;
			}
			continue;
		}

		ptq->state = state;
		ptq->have_sample = true;
		intel_pt_sample_flags(ptq);

		/* Use estimated TSC upon return to user space */
		if (pt->est_tsc &&
		    (state->from_ip >= pt->kernel_start || !state->from_ip) &&
		    state->to_ip && state->to_ip < pt->kernel_start) {
			intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
				     state->timestamp, state->est_timestamp);
			ptq->timestamp = state->est_timestamp;
		/* Use estimated TSC in unknown switch state */
		} else if (pt->sync_switch &&
			   ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
			   intel_pt_is_switch_ip(ptq, state->to_ip) &&
			   ptq->next_tid == -1) {
			intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
				     state->timestamp, state->est_timestamp);
			ptq->timestamp = state->est_timestamp;
		} else if (state->timestamp > ptq->timestamp) {
			ptq->timestamp = state->timestamp;
		}

		if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
			*timestamp = ptq->timestamp;
			return 0;
		}
	}
	return 0;
}

static inline int intel_pt_update_queues(struct intel_pt *pt)
{
	if (pt->queues.new_data) {
		pt->queues.new_data = false;
		return intel_pt_setup_queues(pt);
	}
	return 0;
}

static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp)
{
	unsigned int queue_nr;
	u64 ts;
	int ret;

	while (1) {
		struct auxtrace_queue *queue;
		struct intel_pt_queue *ptq;

		if (!pt->heap.heap_cnt)
			return 0;

		if (pt->heap.heap_array[0].ordinal >= timestamp)
			return 0;

		queue_nr = pt->heap.heap_array[0].queue_nr;
		queue = &pt->queues.queue_array[queue_nr];
		ptq = queue->priv;

		intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n",
			     queue_nr, pt->heap.heap_array[0].ordinal,
			     timestamp);

		auxtrace_heap__pop(&pt->heap);

		if (pt->heap.heap_cnt) {
			ts = pt->heap.heap_array[0].ordinal + 1;
			if (ts > timestamp)
				ts = timestamp;
		} else {
			ts = timestamp;
		}

		intel_pt_set_pid_tid_cpu(pt, queue);

		ret = intel_pt_run_decoder(ptq, &ts);

		if (ret < 0) {
			auxtrace_heap__add(&pt->heap, queue_nr, ts);
			return ret;
		}

		if (!ret) {
			ret = auxtrace_heap__add(&pt->heap, queue_nr, ts);
			if (ret < 0)
				return ret;
		} else {
			ptq->on_heap = false;
		}
	}

	return 0;
}

static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid,
					    u64 time_)
{
	struct auxtrace_queues *queues = &pt->queues;
	unsigned int i;
	u64 ts = 0;

	for (i = 0; i < queues->nr_queues; i++) {
		struct auxtrace_queue *queue = &pt->queues.queue_array[i];
		struct intel_pt_queue *ptq = queue->priv;

		if (ptq && (tid == -1 || ptq->tid == tid)) {
			ptq->time = time_;
			intel_pt_set_pid_tid_cpu(pt, queue);
			intel_pt_run_decoder(ptq, &ts);
		}
	}
	return 0;
}

static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
{
	return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
				    sample->pid, sample->tid, 0);
}

static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
{
	unsigned i, j;

	if (cpu < 0 || !pt->queues.nr_queues)
		return NULL;

	if ((unsigned)cpu >= pt->queues.nr_queues)
		i = pt->queues.nr_queues - 1;
	else
		i = cpu;

	if (pt->queues.queue_array[i].cpu == cpu)
		return pt->queues.queue_array[i].priv;

	for (j = 0; i > 0; j++) {
		if (pt->queues.queue_array[--i].cpu == cpu)
			return pt->queues.queue_array[i].priv;
	}

	for (; j < pt->queues.nr_queues; j++) {
		if (pt->queues.queue_array[j].cpu == cpu)
			return pt->queues.queue_array[j].priv;
	}

	return NULL;
}

static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
				u64 timestamp)
{
	struct intel_pt_queue *ptq;
	int err;

	if (!pt->sync_switch)
		return 1;

	ptq = intel_pt_cpu_to_ptq(pt, cpu);
	if (!ptq)
		return 1;

	switch (ptq->switch_state) {
	case INTEL_PT_SS_NOT_TRACING:
		ptq->next_tid = -1;
		break;
	case INTEL_PT_SS_UNKNOWN:
	case INTEL_PT_SS_TRACING:
		ptq->next_tid = tid;
		ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP;
		return 0;
	case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
		if (!ptq->on_heap) {
			ptq->timestamp = perf_time_to_tsc(timestamp,
							  &pt->tc);
			err = auxtrace_heap__add(&pt->heap, ptq->queue_nr,
						 ptq->timestamp);
			if (err)
				return err;
			ptq->on_heap = true;
		}
		ptq->switch_state = INTEL_PT_SS_TRACING;
		break;
	case INTEL_PT_SS_EXPECTING_SWITCH_IP:
		ptq->next_tid = tid;
		intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
		break;
	default:
		break;
	}

	return 1;
}

static int intel_pt_process_switch(struct intel_pt *pt,
				   struct perf_sample *sample)
{
	struct perf_evsel *evsel;
	pid_t tid;
	int cpu, ret;

	evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
	if (evsel != pt->switch_evsel)
		return 0;

	tid = perf_evsel__intval(evsel, sample, "next_pid");
	cpu = sample->cpu;

	intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
		     cpu, tid, sample->time, perf_time_to_tsc(sample->time,
		     &pt->tc));

	ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
	if (ret <= 0)
		return ret;

	return machine__set_current_tid(pt->machine, cpu, -1, tid);
}

static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
				   struct perf_sample *sample)
{
	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
	pid_t pid, tid;
	int cpu, ret;

	cpu = sample->cpu;

	if (pt->have_sched_switch == 3) {
		if (!out)
			return 0;
		if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) {
			pr_err("Expecting CPU-wide context switch event\n");
			return -EINVAL;
		}
		pid = event->context_switch.next_prev_pid;
		tid = event->context_switch.next_prev_tid;
	} else {
		if (out)
			return 0;
		pid = sample->pid;
		tid = sample->tid;
	}

	if (tid == -1) {
		pr_err("context_switch event has no tid\n");
		return -EINVAL;
	}

	intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
		     cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time,
		     &pt->tc));

	ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
	if (ret <= 0)
		return ret;

	return machine__set_current_tid(pt->machine, cpu, pid, tid);
}

static int intel_pt_process_itrace_start(struct intel_pt *pt,
					 union perf_event *event,
					 struct perf_sample *sample)
{
	if (!pt->per_cpu_mmaps)
		return 0;

	intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
		     sample->cpu, event->itrace_start.pid,
		     event->itrace_start.tid, sample->time,
		     perf_time_to_tsc(sample->time, &pt->tc));

	return machine__set_current_tid(pt->machine, sample->cpu,
					event->itrace_start.pid,
					event->itrace_start.tid);
}

static int intel_pt_process_event(struct perf_session *session,
				  union perf_event *event,
				  struct perf_sample *sample,
				  struct perf_tool *tool)
{
	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
					   auxtrace);
	u64 timestamp;
	int err = 0;

	if (dump_trace)
		return 0;

	if (!tool->ordered_events) {
		pr_err("Intel Processor Trace requires ordered events\n");
		return -EINVAL;
	}

	if (sample->time && sample->time != (u64)-1)
		timestamp = perf_time_to_tsc(sample->time, &pt->tc);
	else
		timestamp = 0;

	if (timestamp || pt->timeless_decoding) {
		err = intel_pt_update_queues(pt);
		if (err)
			return err;
	}

	if (pt->timeless_decoding) {
		if (event->header.type == PERF_RECORD_EXIT) {
			err = intel_pt_process_timeless_queues(pt,
							       event->fork.tid,
							       sample->time);
		}
	} else if (timestamp) {
		err = intel_pt_process_queues(pt, timestamp);
	}
	if (err)
		return err;

	if (event->header.type == PERF_RECORD_AUX &&
	    (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
	    pt->synth_opts.errors) {
		err = intel_pt_lost(pt, sample);
		if (err)
			return err;
	}

	if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
		err = intel_pt_process_switch(pt, sample);
	else if (event->header.type == PERF_RECORD_ITRACE_START)
		err = intel_pt_process_itrace_start(pt, event, sample);
	else if (event->header.type == PERF_RECORD_SWITCH ||
		 event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
		err = intel_pt_context_switch(pt, event, sample);

	intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n",
		     perf_event__name(event->header.type), event->header.type,
		     sample->cpu, sample->time, timestamp);

	return err;
}

static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
{
	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
					   auxtrace);
	int ret;

	if (dump_trace)
		return 0;

	if (!tool->ordered_events)
		return -EINVAL;

	ret = intel_pt_update_queues(pt);
	if (ret < 0)
		return ret;

	if (pt->timeless_decoding)
		return intel_pt_process_timeless_queues(pt, -1,
							MAX_TIMESTAMP - 1);

	return intel_pt_process_queues(pt, MAX_TIMESTAMP);
}

static void intel_pt_free_events(struct perf_session *session)
{
	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
					   auxtrace);
	struct auxtrace_queues *queues = &pt->queues;
	unsigned int i;

	for (i = 0; i < queues->nr_queues; i++) {
		intel_pt_free_queue(queues->queue_array[i].priv);
		queues->queue_array[i].priv = NULL;
	}
	intel_pt_log_disable();
	auxtrace_queues__free(queues);
}

static void intel_pt_free(struct perf_session *session)
{
	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
					   auxtrace);

	auxtrace_heap__free(&pt->heap);
	intel_pt_free_events(session);
	session->auxtrace = NULL;
	thread__delete(pt->unknown_thread);
	free(pt);
}

static int intel_pt_process_auxtrace_event(struct perf_session *session,
					   union perf_event *event,
					   struct perf_tool *tool __maybe_unused)
{
	struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
					   auxtrace);

	if (pt->sampling_mode)
		return 0;

	if (!pt->data_queued) {
		struct auxtrace_buffer *buffer;
		off_t data_offset;
		int fd = perf_data_file__fd(session->file);
		int err;

		if (perf_data_file__is_pipe(session->file)) {
			data_offset = 0;
		} else {
			data_offset = lseek(fd, 0, SEEK_CUR);
			if (data_offset == -1)
				return -errno;
		}

		err = auxtrace_queues__add_event(&pt->queues, session, event,
						 data_offset, &buffer);
		if (err)
			return err;

		/* Dump here now we have copied a piped trace out of the pipe */
		if (dump_trace) {
			if (auxtrace_buffer__get_data(buffer, fd)) {
				intel_pt_dump_event(pt, buffer->data,
						    buffer->size);
				auxtrace_buffer__put_data(buffer);
			}
		}
	}

	return 0;
}

struct intel_pt_synth {
	struct perf_tool dummy_tool;
	struct perf_session *session;
};

static int intel_pt_event_synth(struct perf_tool *tool,
				union perf_event *event,
				struct perf_sample *sample __maybe_unused,
				struct machine *machine __maybe_unused)
{
	struct intel_pt_synth *intel_pt_synth =
			container_of(tool, struct intel_pt_synth, dummy_tool);

	return perf_session__deliver_synth_event(intel_pt_synth->session, event,
						 NULL);
}

static int intel_pt_synth_event(struct perf_session *session,
				struct perf_event_attr *attr, u64 id)
{
	struct intel_pt_synth intel_pt_synth;

	memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
	intel_pt_synth.session = session;

	return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
					   &id, intel_pt_event_synth);
}

static int intel_pt_synth_events(struct intel_pt *pt,
				 struct perf_session *session)
{
	struct perf_evlist *evlist = session->evlist;
	struct perf_evsel *evsel;
	struct perf_event_attr attr;
	bool found = false;
	u64 id;
	int err;

	evlist__for_each(evlist, evsel) {
		if (evsel->attr.type == pt->pmu_type && evsel->ids) {
			found = true;
			break;
		}
	}

	if (!found) {
		pr_debug("There are no selected events with Intel Processor Trace data\n");
		return 0;
	}

	memset(&attr, 0, sizeof(struct perf_event_attr));
	attr.size = sizeof(struct perf_event_attr);
	attr.type = PERF_TYPE_HARDWARE;
	attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
			    PERF_SAMPLE_PERIOD;
	if (pt->timeless_decoding)
		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
	else
		attr.sample_type |= PERF_SAMPLE_TIME;
	if (!pt->per_cpu_mmaps)
		attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
	attr.exclude_user = evsel->attr.exclude_user;
	attr.exclude_kernel = evsel->attr.exclude_kernel;
	attr.exclude_hv = evsel->attr.exclude_hv;
	attr.exclude_host = evsel->attr.exclude_host;
	attr.exclude_guest = evsel->attr.exclude_guest;
	attr.sample_id_all = evsel->attr.sample_id_all;
	attr.read_format = evsel->attr.read_format;

	id = evsel->id[0] + 1000000000;
	if (!id)
		id = 1;

	if (pt->synth_opts.instructions) {
		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
		if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
			attr.sample_period =
				intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
		else
			attr.sample_period = pt->synth_opts.period;
		pt->instructions_sample_period = attr.sample_period;
		if (pt->synth_opts.callchain)
			attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
		pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
			 id, (u64)attr.sample_type);
		err = intel_pt_synth_event(session, &attr, id);
		if (err) {
			pr_err("%s: failed to synthesize 'instructions' event type\n",
			       __func__);
			return err;
		}
		pt->sample_instructions = true;
		pt->instructions_sample_type = attr.sample_type;
		pt->instructions_id = id;
		id += 1;
	}

	if (pt->synth_opts.transactions) {
		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
		attr.sample_period = 1;
		if (pt->synth_opts.callchain)
			attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
		pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
			 id, (u64)attr.sample_type);
		err = intel_pt_synth_event(session, &attr, id);
		if (err) {
			pr_err("%s: failed to synthesize 'transactions' event type\n",
			       __func__);
			return err;
		}
		pt->sample_transactions = true;
		pt->transactions_id = id;
		id += 1;
		evlist__for_each(evlist, evsel) {
			if (evsel->id && evsel->id[0] == pt->transactions_id) {
				if (evsel->name)
					zfree(&evsel->name);
				evsel->name = strdup("transactions");
				break;
			}
		}
	}

	if (pt->synth_opts.branches) {
		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
		attr.sample_period = 1;
		attr.sample_type |= PERF_SAMPLE_ADDR;
		attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
		pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
			 id, (u64)attr.sample_type);
		err = intel_pt_synth_event(session, &attr, id);
		if (err) {
			pr_err("%s: failed to synthesize 'branches' event type\n",
			       __func__);
			return err;
		}
		pt->sample_branches = true;
		pt->branches_sample_type = attr.sample_type;
		pt->branches_id = id;
	}

	pt->synth_needs_swap = evsel->needs_swap;

	return 0;
}

static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist)
{
	struct perf_evsel *evsel;

	evlist__for_each_reverse(evlist, evsel) {
		const char *name = perf_evsel__name(evsel);

		if (!strcmp(name, "sched:sched_switch"))
			return evsel;
	}

	return NULL;
}

static bool intel_pt_find_switch(struct perf_evlist *evlist)
{
	struct perf_evsel *evsel;

	evlist__for_each(evlist, evsel) {
		if (evsel->attr.context_switch)
			return true;
	}

	return false;
}

static const char * const intel_pt_info_fmts[] = {
	[INTEL_PT_PMU_TYPE]		= "  PMU Type            %"PRId64"\n",
	[INTEL_PT_TIME_SHIFT]		= "  Time Shift          %"PRIu64"\n",
	[INTEL_PT_TIME_MULT]		= "  Time Muliplier      %"PRIu64"\n",
	[INTEL_PT_TIME_ZERO]		= "  Time Zero           %"PRIu64"\n",
	[INTEL_PT_CAP_USER_TIME_ZERO]	= "  Cap Time Zero       %"PRId64"\n",
	[INTEL_PT_TSC_BIT]		= "  TSC bit             %#"PRIx64"\n",
	[INTEL_PT_NORETCOMP_BIT]	= "  NoRETComp bit       %#"PRIx64"\n",
	[INTEL_PT_HAVE_SCHED_SWITCH]	= "  Have sched_switch   %"PRId64"\n",
	[INTEL_PT_SNAPSHOT_MODE]	= "  Snapshot mode       %"PRId64"\n",
	[INTEL_PT_PER_CPU_MMAPS]	= "  Per-cpu maps        %"PRId64"\n",
	[INTEL_PT_MTC_BIT]		= "  MTC bit             %#"PRIx64"\n",
	[INTEL_PT_TSC_CTC_N]		= "  TSC:CTC numerator   %"PRIu64"\n",
	[INTEL_PT_TSC_CTC_D]		= "  TSC:CTC denominator %"PRIu64"\n",
	[INTEL_PT_CYC_BIT]		= "  CYC bit             %#"PRIx64"\n",
};

static void intel_pt_print_info(u64 *arr, int start, int finish)
{
	int i;

	if (!dump_trace)
		return;

	for (i = start; i <= finish; i++)
		fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
}

int intel_pt_process_auxtrace_info(union perf_event *event,
				   struct perf_session *session)
{
	struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
	size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
	struct intel_pt *pt;
	int err;

	if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
					min_sz)
		return -EINVAL;

	pt = zalloc(sizeof(struct intel_pt));
	if (!pt)
		return -ENOMEM;

	err = auxtrace_queues__init(&pt->queues);
	if (err)
		goto err_free;

	intel_pt_log_set_name(INTEL_PT_PMU_NAME);

	pt->session = session;
	pt->machine = &session->machines.host; /* No kvm support */
	pt->auxtrace_type = auxtrace_info->type;
	pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE];
	pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT];
	pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT];
	pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO];
	pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO];
	pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT];
	pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT];
	pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH];
	pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE];
	pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS];
	intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
			    INTEL_PT_PER_CPU_MMAPS);

	if (auxtrace_info->header.size >= sizeof(struct auxtrace_info_event) +
					(sizeof(u64) * INTEL_PT_CYC_BIT)) {
		pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT];
		pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS];
		pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N];
		pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D];
		pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT];
		intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT,
				    INTEL_PT_CYC_BIT);
	}

	pt->timeless_decoding = intel_pt_timeless_decoding(pt);
	pt->have_tsc = intel_pt_have_tsc(pt);
	pt->sampling_mode = false;
	pt->est_tsc = !pt->timeless_decoding;

	pt->unknown_thread = thread__new(999999999, 999999999);
	if (!pt->unknown_thread) {
		err = -ENOMEM;
		goto err_free_queues;
	}
	err = thread__set_comm(pt->unknown_thread, "unknown", 0);
	if (err)
		goto err_delete_thread;
	if (thread__init_map_groups(pt->unknown_thread, pt->machine)) {
		err = -ENOMEM;
		goto err_delete_thread;
	}

	pt->auxtrace.process_event = intel_pt_process_event;
	pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event;
	pt->auxtrace.flush_events = intel_pt_flush;
	pt->auxtrace.free_events = intel_pt_free_events;
	pt->auxtrace.free = intel_pt_free;
	session->auxtrace = &pt->auxtrace;

	if (dump_trace)
		return 0;

	if (pt->have_sched_switch == 1) {
		pt->switch_evsel = intel_pt_find_sched_switch(session->evlist);
		if (!pt->switch_evsel) {
			pr_err("%s: missing sched_switch event\n", __func__);
			goto err_delete_thread;
		}
	} else if (pt->have_sched_switch == 2 &&
		   !intel_pt_find_switch(session->evlist)) {
		pr_err("%s: missing context_switch attribute flag\n", __func__);
		goto err_delete_thread;
	}

	if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
		pt->synth_opts = *session->itrace_synth_opts;
	} else {
		itrace_synth_opts__set_default(&pt->synth_opts);
		if (use_browser != -1) {
			pt->synth_opts.branches = false;
			pt->synth_opts.callchain = true;
		}
	}

	if (pt->synth_opts.log)
		intel_pt_log_enable();

	/* Maximum non-turbo ratio is TSC freq / 100 MHz */
	if (pt->tc.time_mult) {
		u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);

		pt->max_non_turbo_ratio = (tsc_freq + 50000000) / 100000000;
		intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
		intel_pt_log("Maximum non-turbo ratio %u\n",
			     pt->max_non_turbo_ratio);
	}

	if (pt->synth_opts.calls)
		pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
				       PERF_IP_FLAG_TRACE_END;
	if (pt->synth_opts.returns)
		pt->branches_filter |= PERF_IP_FLAG_RETURN |
				       PERF_IP_FLAG_TRACE_BEGIN;

	if (pt->synth_opts.callchain && !symbol_conf.use_callchain) {
		symbol_conf.use_callchain = true;
		if (callchain_register_param(&callchain_param) < 0) {
			symbol_conf.use_callchain = false;
			pt->synth_opts.callchain = false;
		}
	}

	err = intel_pt_synth_events(pt, session);
	if (err)
		goto err_delete_thread;

	err = auxtrace_queues__process_index(&pt->queues, session);
	if (err)
		goto err_delete_thread;

	if (pt->queues.populated)
		pt->data_queued = true;

	if (pt->timeless_decoding)
		pr_debug2("Intel PT decoding without timestamps\n");

	return 0;

err_delete_thread:
	thread__delete(pt->unknown_thread);
err_free_queues:
	intel_pt_log_disable();
	auxtrace_queues__free(&pt->queues);
	session->auxtrace = NULL;
err_free:
	free(pt);
	return err;
}