1167 lines
34 KiB
C
1167 lines
34 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright IBM Corp. 2018
|
|
* Auxtrace support for s390 CPU-Measurement Sampling Facility
|
|
*
|
|
* Author(s): Thomas Richter <tmricht@linux.ibm.com>
|
|
*
|
|
* Auxiliary traces are collected during 'perf record' using rbd000 event.
|
|
* Several PERF_RECORD_XXX are generated during recording:
|
|
*
|
|
* PERF_RECORD_AUX:
|
|
* Records that new data landed in the AUX buffer part.
|
|
* PERF_RECORD_AUXTRACE:
|
|
* Defines auxtrace data. Followed by the actual data. The contents of
|
|
* the auxtrace data is dependent on the event and the CPU.
|
|
* This record is generated by perf record command. For details
|
|
* see Documentation/perf.data-file-format.txt.
|
|
* PERF_RECORD_AUXTRACE_INFO:
|
|
* Defines a table of contains for PERF_RECORD_AUXTRACE records. This
|
|
* record is generated during 'perf record' command. Each record contains
|
|
* up to 256 entries describing offset and size of the AUXTRACE data in the
|
|
* perf.data file.
|
|
* PERF_RECORD_AUXTRACE_ERROR:
|
|
* Indicates an error during AUXTRACE collection such as buffer overflow.
|
|
* PERF_RECORD_FINISHED_ROUND:
|
|
* Perf events are not necessarily in time stamp order, as they can be
|
|
* collected in parallel on different CPUs. If the events should be
|
|
* processed in time order they need to be sorted first.
|
|
* Perf report guarantees that there is no reordering over a
|
|
* PERF_RECORD_FINISHED_ROUND boundary event. All perf records with a
|
|
* time stamp lower than this record are processed (and displayed) before
|
|
* the succeeding perf record are processed.
|
|
*
|
|
* These records are evaluated during perf report command.
|
|
*
|
|
* 1. PERF_RECORD_AUXTRACE_INFO is used to set up the infrastructure for
|
|
* auxiliary trace data processing. See s390_cpumsf_process_auxtrace_info()
|
|
* below.
|
|
* Auxiliary trace data is collected per CPU. To merge the data into the report
|
|
* an auxtrace_queue is created for each CPU. It is assumed that the auxtrace
|
|
* data is in ascending order.
|
|
*
|
|
* Each queue has a double linked list of auxtrace_buffers. This list contains
|
|
* the offset and size of a CPU's auxtrace data. During auxtrace processing
|
|
* the data portion is mmap()'ed.
|
|
*
|
|
* To sort the queues in chronological order, all queue access is controlled
|
|
* by the auxtrace_heap. This is basicly a stack, each stack element has two
|
|
* entries, the queue number and a time stamp. However the stack is sorted by
|
|
* the time stamps. The highest time stamp is at the bottom the lowest
|
|
* (nearest) time stamp is at the top. That sort order is maintained at all
|
|
* times!
|
|
*
|
|
* After the auxtrace infrastructure has been setup, the auxtrace queues are
|
|
* filled with data (offset/size pairs) and the auxtrace_heap is populated.
|
|
*
|
|
* 2. PERF_RECORD_XXX processing triggers access to the auxtrace_queues.
|
|
* Each record is handled by s390_cpumsf_process_event(). The time stamp of
|
|
* the perf record is compared with the time stamp located on the auxtrace_heap
|
|
* top element. If that time stamp is lower than the time stamp from the
|
|
* record sample, the auxtrace queues will be processed. As auxtrace queues
|
|
* control many auxtrace_buffers and each buffer can be quite large, the
|
|
* auxtrace buffer might be processed only partially. In this case the
|
|
* position in the auxtrace_buffer of that queue is remembered and the time
|
|
* stamp of the last processed entry of the auxtrace_buffer replaces the
|
|
* current auxtrace_heap top.
|
|
*
|
|
* 3. Auxtrace_queues might run of out data and are feeded by the
|
|
* PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event().
|
|
*
|
|
* Event Generation
|
|
* Each sampling-data entry in the auxilary trace data generates a perf sample.
|
|
* This sample is filled
|
|
* with data from the auxtrace such as PID/TID, instruction address, CPU state,
|
|
* etc. This sample is processed with perf_session__deliver_synth_event() to
|
|
* be included into the GUI.
|
|
*
|
|
* 4. PERF_RECORD_FINISHED_ROUND event is used to process all the remaining
|
|
* auxiliary traces entries until the time stamp of this record is reached
|
|
* auxtrace_heap top. This is triggered by ordered_event->deliver().
|
|
*
|
|
*
|
|
* Perf event processing.
|
|
* Event processing of PERF_RECORD_XXX entries relies on time stamp entries.
|
|
* This is the function call sequence:
|
|
*
|
|
* __cmd_report()
|
|
* |
|
|
* perf_session__process_events()
|
|
* |
|
|
* __perf_session__process_events()
|
|
* |
|
|
* perf_session__process_event()
|
|
* | This functions splits the PERF_RECORD_XXX records.
|
|
* | - Those generated by perf record command (type number equal or higher
|
|
* | than PERF_RECORD_USER_TYPE_START) are handled by
|
|
* | perf_session__process_user_event(see below)
|
|
* | - Those generated by the kernel are handled by
|
|
* | perf_evlist__parse_sample_timestamp()
|
|
* |
|
|
* perf_evlist__parse_sample_timestamp()
|
|
* | Extract time stamp from sample data.
|
|
* |
|
|
* perf_session__queue_event()
|
|
* | If timestamp is positive the sample is entered into an ordered_event
|
|
* | list, sort order is the timestamp. The event processing is deferred until
|
|
* | later (see perf_session__process_user_event()).
|
|
* | Other timestamps (0 or -1) are handled immediately by
|
|
* | perf_session__deliver_event(). These are events generated at start up
|
|
* | of command perf record. They create PERF_RECORD_COMM and PERF_RECORD_MMAP*
|
|
* | records. They are needed to create a list of running processes and its
|
|
* | memory mappings and layout. They are needed at the beginning to enable
|
|
* | command perf report to create process trees and memory mappings.
|
|
* |
|
|
* perf_session__deliver_event()
|
|
* | Delivers a PERF_RECORD_XXX entry for handling.
|
|
* |
|
|
* auxtrace__process_event()
|
|
* | The timestamp of the PERF_RECORD_XXX entry is taken to correlate with
|
|
* | time stamps from the auxiliary trace buffers. This enables
|
|
* | synchronization between auxiliary trace data and the events on the
|
|
* | perf.data file.
|
|
* |
|
|
* machine__deliver_event()
|
|
* | Handles the PERF_RECORD_XXX event. This depends on the record type.
|
|
* It might update the process tree, update a process memory map or enter
|
|
* a sample with IP and call back chain data into GUI data pool.
|
|
*
|
|
*
|
|
* Deferred processing determined by perf_session__process_user_event() is
|
|
* finally processed when a PERF_RECORD_FINISHED_ROUND is encountered. These
|
|
* are generated during command perf record.
|
|
* The timestamp of PERF_RECORD_FINISHED_ROUND event is taken to process all
|
|
* PERF_RECORD_XXX entries stored in the ordered_event list. This list was
|
|
* built up while reading the perf.data file.
|
|
* Each event is now processed by calling perf_session__deliver_event().
|
|
* This enables time synchronization between the data in the perf.data file and
|
|
* the data in the auxiliary trace buffers.
|
|
*/
|
|
|
|
#include <endian.h>
|
|
#include <errno.h>
|
|
#include <byteswap.h>
|
|
#include <inttypes.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/types.h>
|
|
#include <linux/bitops.h>
|
|
#include <linux/log2.h>
|
|
#include <linux/zalloc.h>
|
|
|
|
#include <sys/stat.h>
|
|
#include <sys/types.h>
|
|
|
|
#include "color.h"
|
|
#include "evsel.h"
|
|
#include "evlist.h"
|
|
#include "machine.h"
|
|
#include "session.h"
|
|
#include "tool.h"
|
|
#include "debug.h"
|
|
#include "auxtrace.h"
|
|
#include "s390-cpumsf.h"
|
|
#include "s390-cpumsf-kernel.h"
|
|
#include "s390-cpumcf-kernel.h"
|
|
#include "config.h"
|
|
|
|
struct s390_cpumsf {
|
|
struct auxtrace auxtrace;
|
|
struct auxtrace_queues queues;
|
|
struct auxtrace_heap heap;
|
|
struct perf_session *session;
|
|
struct machine *machine;
|
|
u32 auxtrace_type;
|
|
u32 pmu_type;
|
|
u16 machine_type;
|
|
bool data_queued;
|
|
bool use_logfile;
|
|
char *logdir;
|
|
};
|
|
|
|
struct s390_cpumsf_queue {
|
|
struct s390_cpumsf *sf;
|
|
unsigned int queue_nr;
|
|
struct auxtrace_buffer *buffer;
|
|
int cpu;
|
|
FILE *logfile;
|
|
FILE *logfile_ctr;
|
|
};
|
|
|
|
/* Check if the raw data should be dumped to file. If this is the case and
|
|
* the file to dump to has not been opened for writing, do so.
|
|
*
|
|
* Return 0 on success and greater zero on error so processing continues.
|
|
*/
|
|
static int s390_cpumcf_dumpctr(struct s390_cpumsf *sf,
|
|
struct perf_sample *sample)
|
|
{
|
|
struct s390_cpumsf_queue *sfq;
|
|
struct auxtrace_queue *q;
|
|
int rc = 0;
|
|
|
|
if (!sf->use_logfile || sf->queues.nr_queues <= sample->cpu)
|
|
return rc;
|
|
|
|
q = &sf->queues.queue_array[sample->cpu];
|
|
sfq = q->priv;
|
|
if (!sfq) /* Queue not yet allocated */
|
|
return rc;
|
|
|
|
if (!sfq->logfile_ctr) {
|
|
char *name;
|
|
|
|
rc = (sf->logdir)
|
|
? asprintf(&name, "%s/aux.ctr.%02x",
|
|
sf->logdir, sample->cpu)
|
|
: asprintf(&name, "aux.ctr.%02x", sample->cpu);
|
|
if (rc > 0)
|
|
sfq->logfile_ctr = fopen(name, "w");
|
|
if (sfq->logfile_ctr == NULL) {
|
|
pr_err("Failed to open counter set log file %s, "
|
|
"continue...\n", name);
|
|
rc = 1;
|
|
}
|
|
free(name);
|
|
}
|
|
|
|
if (sfq->logfile_ctr) {
|
|
/* See comment above for -4 */
|
|
size_t n = fwrite(sample->raw_data, sample->raw_size - 4, 1,
|
|
sfq->logfile_ctr);
|
|
if (n != 1) {
|
|
pr_err("Failed to write counter set data\n");
|
|
rc = 1;
|
|
}
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/* Display s390 CPU measurement facility basic-sampling data entry
|
|
* Data written on s390 in big endian byte order and contains bit
|
|
* fields across byte boundaries.
|
|
*/
|
|
static bool s390_cpumsf_basic_show(const char *color, size_t pos,
|
|
struct hws_basic_entry *basicp)
|
|
{
|
|
struct hws_basic_entry *basic = basicp;
|
|
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
|
struct hws_basic_entry local;
|
|
unsigned long long word = be64toh(*(unsigned long long *)basicp);
|
|
|
|
memset(&local, 0, sizeof(local));
|
|
local.def = be16toh(basicp->def);
|
|
local.prim_asn = word & 0xffff;
|
|
local.CL = word >> 30 & 0x3;
|
|
local.I = word >> 32 & 0x1;
|
|
local.AS = word >> 33 & 0x3;
|
|
local.P = word >> 35 & 0x1;
|
|
local.W = word >> 36 & 0x1;
|
|
local.T = word >> 37 & 0x1;
|
|
local.U = word >> 40 & 0xf;
|
|
local.ia = be64toh(basicp->ia);
|
|
local.gpp = be64toh(basicp->gpp);
|
|
local.hpp = be64toh(basicp->hpp);
|
|
basic = &local;
|
|
#endif
|
|
if (basic->def != 1) {
|
|
pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos);
|
|
return false;
|
|
}
|
|
color_fprintf(stdout, color, " [%#08zx] Basic Def:%04x Inst:%#04x"
|
|
" %c%c%c%c AS:%d ASN:%#04x IA:%#018llx\n"
|
|
"\t\tCL:%d HPP:%#018llx GPP:%#018llx\n",
|
|
pos, basic->def, basic->U,
|
|
basic->T ? 'T' : ' ',
|
|
basic->W ? 'W' : ' ',
|
|
basic->P ? 'P' : ' ',
|
|
basic->I ? 'I' : ' ',
|
|
basic->AS, basic->prim_asn, basic->ia, basic->CL,
|
|
basic->hpp, basic->gpp);
|
|
return true;
|
|
}
|
|
|
|
/* Display s390 CPU measurement facility diagnostic-sampling data entry.
|
|
* Data written on s390 in big endian byte order and contains bit
|
|
* fields across byte boundaries.
|
|
*/
|
|
static bool s390_cpumsf_diag_show(const char *color, size_t pos,
|
|
struct hws_diag_entry *diagp)
|
|
{
|
|
struct hws_diag_entry *diag = diagp;
|
|
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
|
struct hws_diag_entry local;
|
|
unsigned long long word = be64toh(*(unsigned long long *)diagp);
|
|
|
|
local.def = be16toh(diagp->def);
|
|
local.I = word >> 32 & 0x1;
|
|
diag = &local;
|
|
#endif
|
|
if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) {
|
|
pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos);
|
|
return false;
|
|
}
|
|
color_fprintf(stdout, color, " [%#08zx] Diag Def:%04x %c\n",
|
|
pos, diag->def, diag->I ? 'I' : ' ');
|
|
return true;
|
|
}
|
|
|
|
/* Return TOD timestamp contained in an trailer entry */
|
|
static unsigned long long trailer_timestamp(struct hws_trailer_entry *te,
|
|
int idx)
|
|
{
|
|
/* te->t set: TOD in STCKE format, bytes 8-15
|
|
* to->t not set: TOD in STCK format, bytes 0-7
|
|
*/
|
|
unsigned long long ts;
|
|
|
|
memcpy(&ts, &te->timestamp[idx], sizeof(ts));
|
|
return be64toh(ts);
|
|
}
|
|
|
|
/* Display s390 CPU measurement facility trailer entry */
|
|
static bool s390_cpumsf_trailer_show(const char *color, size_t pos,
|
|
struct hws_trailer_entry *te)
|
|
{
|
|
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
|
struct hws_trailer_entry local;
|
|
const unsigned long long flags = be64toh(te->flags);
|
|
|
|
memset(&local, 0, sizeof(local));
|
|
local.f = flags >> 63 & 0x1;
|
|
local.a = flags >> 62 & 0x1;
|
|
local.t = flags >> 61 & 0x1;
|
|
local.bsdes = be16toh((flags >> 16 & 0xffff));
|
|
local.dsdes = be16toh((flags & 0xffff));
|
|
memcpy(&local.timestamp, te->timestamp, sizeof(te->timestamp));
|
|
local.overflow = be64toh(te->overflow);
|
|
local.clock_base = be64toh(te->progusage[0]) >> 63 & 1;
|
|
local.progusage2 = be64toh(te->progusage2);
|
|
te = &local;
|
|
#endif
|
|
if (te->bsdes != sizeof(struct hws_basic_entry)) {
|
|
pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos);
|
|
return false;
|
|
}
|
|
color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d"
|
|
" dsdes:%d Overflow:%lld Time:%#llx\n"
|
|
"\t\tC:%d TOD:%#lx\n",
|
|
pos,
|
|
te->f ? 'F' : ' ',
|
|
te->a ? 'A' : ' ',
|
|
te->t ? 'T' : ' ',
|
|
te->bsdes, te->dsdes, te->overflow,
|
|
trailer_timestamp(te, te->clock_base),
|
|
te->clock_base, te->progusage2);
|
|
return true;
|
|
}
|
|
|
|
/* Test a sample data block. It must be 4KB or a multiple thereof in size and
|
|
* 4KB page aligned. Each sample data page has a trailer entry at the
|
|
* end which contains the sample entry data sizes.
|
|
*
|
|
* Return true if the sample data block passes the checks and set the
|
|
* basic set entry size and diagnostic set entry size.
|
|
*
|
|
* Return false on failure.
|
|
*
|
|
* Note: Old hardware does not set the basic or diagnostic entry sizes
|
|
* in the trailer entry. Use the type number instead.
|
|
*/
|
|
static bool s390_cpumsf_validate(int machine_type,
|
|
unsigned char *buf, size_t len,
|
|
unsigned short *bsdes,
|
|
unsigned short *dsdes)
|
|
{
|
|
struct hws_basic_entry *basic = (struct hws_basic_entry *)buf;
|
|
struct hws_trailer_entry *te;
|
|
|
|
*dsdes = *bsdes = 0;
|
|
if (len & (S390_CPUMSF_PAGESZ - 1)) /* Illegal size */
|
|
return false;
|
|
if (be16toh(basic->def) != 1) /* No basic set entry, must be first */
|
|
return false;
|
|
/* Check for trailer entry at end of SDB */
|
|
te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
|
|
- sizeof(*te));
|
|
*bsdes = be16toh(te->bsdes);
|
|
*dsdes = be16toh(te->dsdes);
|
|
if (!te->bsdes && !te->dsdes) {
|
|
/* Very old hardware, use CPUID */
|
|
switch (machine_type) {
|
|
case 2097:
|
|
case 2098:
|
|
*dsdes = 64;
|
|
*bsdes = 32;
|
|
break;
|
|
case 2817:
|
|
case 2818:
|
|
*dsdes = 74;
|
|
*bsdes = 32;
|
|
break;
|
|
case 2827:
|
|
case 2828:
|
|
*dsdes = 85;
|
|
*bsdes = 32;
|
|
break;
|
|
case 2964:
|
|
case 2965:
|
|
*dsdes = 112;
|
|
*bsdes = 32;
|
|
break;
|
|
default:
|
|
/* Illegal trailer entry */
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/* Return true if there is room for another entry */
|
|
static bool s390_cpumsf_reached_trailer(size_t entry_sz, size_t pos)
|
|
{
|
|
size_t payload = S390_CPUMSF_PAGESZ - sizeof(struct hws_trailer_entry);
|
|
|
|
if (payload - (pos & (S390_CPUMSF_PAGESZ - 1)) < entry_sz)
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
/* Dump an auxiliary buffer. These buffers are multiple of
|
|
* 4KB SDB pages.
|
|
*/
|
|
static void s390_cpumsf_dump(struct s390_cpumsf *sf,
|
|
unsigned char *buf, size_t len)
|
|
{
|
|
const char *color = PERF_COLOR_BLUE;
|
|
struct hws_basic_entry *basic;
|
|
struct hws_diag_entry *diag;
|
|
unsigned short bsdes, dsdes;
|
|
size_t pos = 0;
|
|
|
|
color_fprintf(stdout, color,
|
|
". ... s390 AUX data: size %zu bytes\n",
|
|
len);
|
|
|
|
if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
|
|
&dsdes)) {
|
|
pr_err("Invalid AUX trace data block size:%zu"
|
|
" (type:%d bsdes:%hd dsdes:%hd)\n",
|
|
len, sf->machine_type, bsdes, dsdes);
|
|
return;
|
|
}
|
|
|
|
/* s390 kernel always returns 4KB blocks fully occupied,
|
|
* no partially filled SDBs.
|
|
*/
|
|
while (pos < len) {
|
|
/* Handle Basic entry */
|
|
basic = (struct hws_basic_entry *)(buf + pos);
|
|
if (s390_cpumsf_basic_show(color, pos, basic))
|
|
pos += bsdes;
|
|
else
|
|
return;
|
|
|
|
/* Handle Diagnostic entry */
|
|
diag = (struct hws_diag_entry *)(buf + pos);
|
|
if (s390_cpumsf_diag_show(color, pos, diag))
|
|
pos += dsdes;
|
|
else
|
|
return;
|
|
|
|
/* Check for trailer entry */
|
|
if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
|
|
/* Show trailer entry */
|
|
struct hws_trailer_entry te;
|
|
|
|
pos = (pos + S390_CPUMSF_PAGESZ)
|
|
& ~(S390_CPUMSF_PAGESZ - 1);
|
|
pos -= sizeof(te);
|
|
memcpy(&te, buf + pos, sizeof(te));
|
|
/* Set descriptor sizes in case of old hardware
|
|
* where these values are not set.
|
|
*/
|
|
te.bsdes = bsdes;
|
|
te.dsdes = dsdes;
|
|
if (s390_cpumsf_trailer_show(color, pos, &te))
|
|
pos += sizeof(te);
|
|
else
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void s390_cpumsf_dump_event(struct s390_cpumsf *sf, unsigned char *buf,
|
|
size_t len)
|
|
{
|
|
printf(".\n");
|
|
s390_cpumsf_dump(sf, buf, len);
|
|
}
|
|
|
|
#define S390_LPP_PID_MASK 0xffffffff
|
|
|
|
static bool s390_cpumsf_make_event(size_t pos,
|
|
struct hws_basic_entry *basic,
|
|
struct s390_cpumsf_queue *sfq)
|
|
{
|
|
struct perf_sample sample = {
|
|
.ip = basic->ia,
|
|
.pid = basic->hpp & S390_LPP_PID_MASK,
|
|
.tid = basic->hpp & S390_LPP_PID_MASK,
|
|
.cpumode = PERF_RECORD_MISC_CPUMODE_UNKNOWN,
|
|
.cpu = sfq->cpu,
|
|
.period = 1
|
|
};
|
|
union perf_event event;
|
|
|
|
memset(&event, 0, sizeof(event));
|
|
if (basic->CL == 1) /* Native LPAR mode */
|
|
sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
|
|
: PERF_RECORD_MISC_KERNEL;
|
|
else if (basic->CL == 2) /* Guest kernel/user space */
|
|
sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
|
|
: PERF_RECORD_MISC_GUEST_KERNEL;
|
|
else if (basic->gpp || basic->prim_asn != 0xffff)
|
|
/* Use heuristics on old hardware */
|
|
sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
|
|
: PERF_RECORD_MISC_GUEST_KERNEL;
|
|
else
|
|
sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
|
|
: PERF_RECORD_MISC_KERNEL;
|
|
|
|
event.sample.header.type = PERF_RECORD_SAMPLE;
|
|
event.sample.header.misc = sample.cpumode;
|
|
event.sample.header.size = sizeof(struct perf_event_header);
|
|
|
|
pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n",
|
|
__func__, pos, sample.ip, basic->P, basic->CL, sample.pid,
|
|
sample.tid, sample.cpumode, sample.cpu);
|
|
if (perf_session__deliver_synth_event(sfq->sf->session, &event,
|
|
&sample)) {
|
|
pr_err("s390 Auxiliary Trace: failed to deliver event\n");
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static unsigned long long get_trailer_time(const unsigned char *buf)
|
|
{
|
|
struct hws_trailer_entry *te;
|
|
unsigned long long aux_time, progusage2;
|
|
bool clock_base;
|
|
|
|
te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
|
|
- sizeof(*te));
|
|
|
|
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
|
clock_base = be64toh(te->progusage[0]) >> 63 & 0x1;
|
|
progusage2 = be64toh(te->progusage[1]);
|
|
#else
|
|
clock_base = te->clock_base;
|
|
progusage2 = te->progusage2;
|
|
#endif
|
|
if (!clock_base) /* TOD_CLOCK_BASE value missing */
|
|
return 0;
|
|
|
|
/* Correct calculation to convert time stamp in trailer entry to
|
|
* nano seconds (taken from arch/s390 function tod_to_ns()).
|
|
* TOD_CLOCK_BASE is stored in trailer entry member progusage2.
|
|
*/
|
|
aux_time = trailer_timestamp(te, clock_base) - progusage2;
|
|
aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9);
|
|
return aux_time;
|
|
}
|
|
|
|
/* Process the data samples of a single queue. The first parameter is a
|
|
* pointer to the queue, the second parameter is the time stamp. This
|
|
* is the time stamp:
|
|
* - of the event that triggered this processing.
|
|
* - or the time stamp when the last proccesing of this queue stopped.
|
|
* In this case it stopped at a 4KB page boundary and record the
|
|
* position on where to continue processing on the next invocation
|
|
* (see buffer->use_data and buffer->use_size).
|
|
*
|
|
* When this function returns the second parameter is updated to
|
|
* reflect the time stamp of the last processed auxiliary data entry
|
|
* (taken from the trailer entry of that page). The caller uses this
|
|
* returned time stamp to record the last processed entry in this
|
|
* queue.
|
|
*
|
|
* The function returns:
|
|
* 0: Processing successful. The second parameter returns the
|
|
* time stamp from the trailer entry until which position
|
|
* processing took place. Subsequent calls resume from this
|
|
* position.
|
|
* <0: An error occurred during processing. The second parameter
|
|
* returns the maximum time stamp.
|
|
* >0: Done on this queue. The second parameter returns the
|
|
* maximum time stamp.
|
|
*/
|
|
static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts)
|
|
{
|
|
struct s390_cpumsf *sf = sfq->sf;
|
|
unsigned char *buf = sfq->buffer->use_data;
|
|
size_t len = sfq->buffer->use_size;
|
|
struct hws_basic_entry *basic;
|
|
unsigned short bsdes, dsdes;
|
|
size_t pos = 0;
|
|
int err = 1;
|
|
u64 aux_ts;
|
|
|
|
if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
|
|
&dsdes)) {
|
|
*ts = ~0ULL;
|
|
return -1;
|
|
}
|
|
|
|
/* Get trailer entry time stamp and check if entries in
|
|
* this auxiliary page are ready for processing. If the
|
|
* time stamp of the first entry is too high, whole buffer
|
|
* can be skipped. In this case return time stamp.
|
|
*/
|
|
aux_ts = get_trailer_time(buf);
|
|
if (!aux_ts) {
|
|
pr_err("[%#08" PRIx64 "] Invalid AUX trailer entry TOD clock base\n",
|
|
(s64)sfq->buffer->data_offset);
|
|
aux_ts = ~0ULL;
|
|
goto out;
|
|
}
|
|
if (aux_ts > *ts) {
|
|
*ts = aux_ts;
|
|
return 0;
|
|
}
|
|
|
|
while (pos < len) {
|
|
/* Handle Basic entry */
|
|
basic = (struct hws_basic_entry *)(buf + pos);
|
|
if (s390_cpumsf_make_event(pos, basic, sfq))
|
|
pos += bsdes;
|
|
else {
|
|
err = -EBADF;
|
|
goto out;
|
|
}
|
|
|
|
pos += dsdes; /* Skip diagnositic entry */
|
|
|
|
/* Check for trailer entry */
|
|
if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
|
|
pos = (pos + S390_CPUMSF_PAGESZ)
|
|
& ~(S390_CPUMSF_PAGESZ - 1);
|
|
/* Check existence of next page */
|
|
if (pos >= len)
|
|
break;
|
|
aux_ts = get_trailer_time(buf + pos);
|
|
if (!aux_ts) {
|
|
aux_ts = ~0ULL;
|
|
goto out;
|
|
}
|
|
if (aux_ts > *ts) {
|
|
*ts = aux_ts;
|
|
sfq->buffer->use_data += pos;
|
|
sfq->buffer->use_size -= pos;
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
out:
|
|
*ts = aux_ts;
|
|
sfq->buffer->use_size = 0;
|
|
sfq->buffer->use_data = NULL;
|
|
return err; /* Buffer completely scanned or error */
|
|
}
|
|
|
|
/* Run the s390 auxiliary trace decoder.
|
|
* Select the queue buffer to operate on, the caller already selected
|
|
* the proper queue, depending on second parameter 'ts'.
|
|
* This is the time stamp until which the auxiliary entries should
|
|
* be processed. This value is updated by called functions and
|
|
* returned to the caller.
|
|
*
|
|
* Resume processing in the current buffer. If there is no buffer
|
|
* get a new buffer from the queue and setup start position for
|
|
* processing.
|
|
* When a buffer is completely processed remove it from the queue
|
|
* before returning.
|
|
*
|
|
* This function returns
|
|
* 1: When the queue is empty. Second parameter will be set to
|
|
* maximum time stamp.
|
|
* 0: Normal processing done.
|
|
* <0: Error during queue buffer setup. This causes the caller
|
|
* to stop processing completely.
|
|
*/
|
|
static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq,
|
|
u64 *ts)
|
|
{
|
|
|
|
struct auxtrace_buffer *buffer;
|
|
struct auxtrace_queue *queue;
|
|
int err;
|
|
|
|
queue = &sfq->sf->queues.queue_array[sfq->queue_nr];
|
|
|
|
/* Get buffer and last position in buffer to resume
|
|
* decoding the auxiliary entries. One buffer might be large
|
|
* and decoding might stop in between. This depends on the time
|
|
* stamp of the trailer entry in each page of the auxiliary
|
|
* data and the time stamp of the event triggering the decoding.
|
|
*/
|
|
if (sfq->buffer == NULL) {
|
|
sfq->buffer = buffer = auxtrace_buffer__next(queue,
|
|
sfq->buffer);
|
|
if (!buffer) {
|
|
*ts = ~0ULL;
|
|
return 1; /* Processing done on this queue */
|
|
}
|
|
/* Start with a new buffer on this queue */
|
|
if (buffer->data) {
|
|
buffer->use_size = buffer->size;
|
|
buffer->use_data = buffer->data;
|
|
}
|
|
if (sfq->logfile) { /* Write into log file */
|
|
size_t rc = fwrite(buffer->data, buffer->size, 1,
|
|
sfq->logfile);
|
|
if (rc != 1)
|
|
pr_err("Failed to write auxiliary data\n");
|
|
}
|
|
} else
|
|
buffer = sfq->buffer;
|
|
|
|
if (!buffer->data) {
|
|
int fd = perf_data__fd(sfq->sf->session->data);
|
|
|
|
buffer->data = auxtrace_buffer__get_data(buffer, fd);
|
|
if (!buffer->data)
|
|
return -ENOMEM;
|
|
buffer->use_size = buffer->size;
|
|
buffer->use_data = buffer->data;
|
|
|
|
if (sfq->logfile) { /* Write into log file */
|
|
size_t rc = fwrite(buffer->data, buffer->size, 1,
|
|
sfq->logfile);
|
|
if (rc != 1)
|
|
pr_err("Failed to write auxiliary data\n");
|
|
}
|
|
}
|
|
pr_debug4("%s queue_nr:%d buffer:%" PRId64 " offset:%#" PRIx64 " size:%#zx rest:%#zx\n",
|
|
__func__, sfq->queue_nr, buffer->buffer_nr, buffer->offset,
|
|
buffer->size, buffer->use_size);
|
|
err = s390_cpumsf_samples(sfq, ts);
|
|
|
|
/* If non-zero, there is either an error (err < 0) or the buffer is
|
|
* completely done (err > 0). The error is unrecoverable, usually
|
|
* some descriptors could not be read successfully, so continue with
|
|
* the next buffer.
|
|
* In both cases the parameter 'ts' has been updated.
|
|
*/
|
|
if (err) {
|
|
sfq->buffer = NULL;
|
|
list_del_init(&buffer->list);
|
|
auxtrace_buffer__free(buffer);
|
|
if (err > 0) /* Buffer done, no error */
|
|
err = 0;
|
|
}
|
|
return err;
|
|
}
|
|
|
|
static struct s390_cpumsf_queue *
|
|
s390_cpumsf_alloc_queue(struct s390_cpumsf *sf, unsigned int queue_nr)
|
|
{
|
|
struct s390_cpumsf_queue *sfq;
|
|
|
|
sfq = zalloc(sizeof(struct s390_cpumsf_queue));
|
|
if (sfq == NULL)
|
|
return NULL;
|
|
|
|
sfq->sf = sf;
|
|
sfq->queue_nr = queue_nr;
|
|
sfq->cpu = -1;
|
|
if (sf->use_logfile) {
|
|
char *name;
|
|
int rc;
|
|
|
|
rc = (sf->logdir)
|
|
? asprintf(&name, "%s/aux.smp.%02x",
|
|
sf->logdir, queue_nr)
|
|
: asprintf(&name, "aux.smp.%02x", queue_nr);
|
|
if (rc > 0)
|
|
sfq->logfile = fopen(name, "w");
|
|
if (sfq->logfile == NULL) {
|
|
pr_err("Failed to open auxiliary log file %s,"
|
|
"continue...\n", name);
|
|
sf->use_logfile = false;
|
|
}
|
|
free(name);
|
|
}
|
|
return sfq;
|
|
}
|
|
|
|
static int s390_cpumsf_setup_queue(struct s390_cpumsf *sf,
|
|
struct auxtrace_queue *queue,
|
|
unsigned int queue_nr, u64 ts)
|
|
{
|
|
struct s390_cpumsf_queue *sfq = queue->priv;
|
|
|
|
if (list_empty(&queue->head))
|
|
return 0;
|
|
|
|
if (sfq == NULL) {
|
|
sfq = s390_cpumsf_alloc_queue(sf, queue_nr);
|
|
if (!sfq)
|
|
return -ENOMEM;
|
|
queue->priv = sfq;
|
|
|
|
if (queue->cpu != -1)
|
|
sfq->cpu = queue->cpu;
|
|
}
|
|
return auxtrace_heap__add(&sf->heap, queue_nr, ts);
|
|
}
|
|
|
|
static int s390_cpumsf_setup_queues(struct s390_cpumsf *sf, u64 ts)
|
|
{
|
|
unsigned int i;
|
|
int ret = 0;
|
|
|
|
for (i = 0; i < sf->queues.nr_queues; i++) {
|
|
ret = s390_cpumsf_setup_queue(sf, &sf->queues.queue_array[i],
|
|
i, ts);
|
|
if (ret)
|
|
break;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static int s390_cpumsf_update_queues(struct s390_cpumsf *sf, u64 ts)
|
|
{
|
|
if (!sf->queues.new_data)
|
|
return 0;
|
|
|
|
sf->queues.new_data = false;
|
|
return s390_cpumsf_setup_queues(sf, ts);
|
|
}
|
|
|
|
static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp)
|
|
{
|
|
unsigned int queue_nr;
|
|
u64 ts;
|
|
int ret;
|
|
|
|
while (1) {
|
|
struct auxtrace_queue *queue;
|
|
struct s390_cpumsf_queue *sfq;
|
|
|
|
if (!sf->heap.heap_cnt)
|
|
return 0;
|
|
|
|
if (sf->heap.heap_array[0].ordinal >= timestamp)
|
|
return 0;
|
|
|
|
queue_nr = sf->heap.heap_array[0].queue_nr;
|
|
queue = &sf->queues.queue_array[queue_nr];
|
|
sfq = queue->priv;
|
|
|
|
auxtrace_heap__pop(&sf->heap);
|
|
if (sf->heap.heap_cnt) {
|
|
ts = sf->heap.heap_array[0].ordinal + 1;
|
|
if (ts > timestamp)
|
|
ts = timestamp;
|
|
} else {
|
|
ts = timestamp;
|
|
}
|
|
|
|
ret = s390_cpumsf_run_decoder(sfq, &ts);
|
|
if (ret < 0) {
|
|
auxtrace_heap__add(&sf->heap, queue_nr, ts);
|
|
return ret;
|
|
}
|
|
if (!ret) {
|
|
ret = auxtrace_heap__add(&sf->heap, queue_nr, ts);
|
|
if (ret < 0)
|
|
return ret;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu,
|
|
pid_t pid, pid_t tid, u64 ip, u64 timestamp)
|
|
{
|
|
char msg[MAX_AUXTRACE_ERROR_MSG];
|
|
union perf_event event;
|
|
int err;
|
|
|
|
strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1);
|
|
auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
|
|
code, cpu, pid, tid, ip, msg, timestamp);
|
|
|
|
err = perf_session__deliver_synth_event(sf->session, &event, NULL);
|
|
if (err)
|
|
pr_err("s390 Auxiliary Trace: failed to deliver error event,"
|
|
"error %d\n", err);
|
|
return err;
|
|
}
|
|
|
|
static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample)
|
|
{
|
|
return s390_cpumsf_synth_error(sf, 1, sample->cpu,
|
|
sample->pid, sample->tid, 0,
|
|
sample->time);
|
|
}
|
|
|
|
static int
|
|
s390_cpumsf_process_event(struct perf_session *session,
|
|
union perf_event *event,
|
|
struct perf_sample *sample,
|
|
struct perf_tool *tool)
|
|
{
|
|
struct s390_cpumsf *sf = container_of(session->auxtrace,
|
|
struct s390_cpumsf,
|
|
auxtrace);
|
|
u64 timestamp = sample->time;
|
|
struct evsel *ev_bc000;
|
|
|
|
int err = 0;
|
|
|
|
if (dump_trace)
|
|
return 0;
|
|
|
|
if (!tool->ordered_events) {
|
|
pr_err("s390 Auxiliary Trace requires ordered events\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (event->header.type == PERF_RECORD_SAMPLE &&
|
|
sample->raw_size) {
|
|
/* Handle event with raw data */
|
|
ev_bc000 = perf_evlist__event2evsel(session->evlist, event);
|
|
if (ev_bc000 &&
|
|
ev_bc000->core.attr.config == PERF_EVENT_CPUM_CF_DIAG)
|
|
err = s390_cpumcf_dumpctr(sf, sample);
|
|
return err;
|
|
}
|
|
|
|
if (event->header.type == PERF_RECORD_AUX &&
|
|
event->aux.flags & PERF_AUX_FLAG_TRUNCATED)
|
|
return s390_cpumsf_lost(sf, sample);
|
|
|
|
if (timestamp) {
|
|
err = s390_cpumsf_update_queues(sf, timestamp);
|
|
if (!err)
|
|
err = s390_cpumsf_process_queues(sf, timestamp);
|
|
}
|
|
return err;
|
|
}
|
|
|
|
struct s390_cpumsf_synth {
|
|
struct perf_tool cpumsf_tool;
|
|
struct perf_session *session;
|
|
};
|
|
|
|
static int
|
|
s390_cpumsf_process_auxtrace_event(struct perf_session *session,
|
|
union perf_event *event __maybe_unused,
|
|
struct perf_tool *tool __maybe_unused)
|
|
{
|
|
struct s390_cpumsf *sf = container_of(session->auxtrace,
|
|
struct s390_cpumsf,
|
|
auxtrace);
|
|
|
|
int fd = perf_data__fd(session->data);
|
|
struct auxtrace_buffer *buffer;
|
|
off_t data_offset;
|
|
int err;
|
|
|
|
if (sf->data_queued)
|
|
return 0;
|
|
|
|
if (perf_data__is_pipe(session->data)) {
|
|
data_offset = 0;
|
|
} else {
|
|
data_offset = lseek(fd, 0, SEEK_CUR);
|
|
if (data_offset == -1)
|
|
return -errno;
|
|
}
|
|
|
|
err = auxtrace_queues__add_event(&sf->queues, session, event,
|
|
data_offset, &buffer);
|
|
if (err)
|
|
return err;
|
|
|
|
/* Dump here after copying piped trace out of the pipe */
|
|
if (dump_trace) {
|
|
if (auxtrace_buffer__get_data(buffer, fd)) {
|
|
s390_cpumsf_dump_event(sf, buffer->data,
|
|
buffer->size);
|
|
auxtrace_buffer__put_data(buffer);
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused)
|
|
{
|
|
}
|
|
|
|
static int s390_cpumsf_flush(struct perf_session *session __maybe_unused,
|
|
struct perf_tool *tool __maybe_unused)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static void s390_cpumsf_free_queues(struct perf_session *session)
|
|
{
|
|
struct s390_cpumsf *sf = container_of(session->auxtrace,
|
|
struct s390_cpumsf,
|
|
auxtrace);
|
|
struct auxtrace_queues *queues = &sf->queues;
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < queues->nr_queues; i++) {
|
|
struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *)
|
|
queues->queue_array[i].priv;
|
|
|
|
if (sfq != NULL) {
|
|
if (sfq->logfile) {
|
|
fclose(sfq->logfile);
|
|
sfq->logfile = NULL;
|
|
}
|
|
if (sfq->logfile_ctr) {
|
|
fclose(sfq->logfile_ctr);
|
|
sfq->logfile_ctr = NULL;
|
|
}
|
|
}
|
|
zfree(&queues->queue_array[i].priv);
|
|
}
|
|
auxtrace_queues__free(queues);
|
|
}
|
|
|
|
static void s390_cpumsf_free(struct perf_session *session)
|
|
{
|
|
struct s390_cpumsf *sf = container_of(session->auxtrace,
|
|
struct s390_cpumsf,
|
|
auxtrace);
|
|
|
|
auxtrace_heap__free(&sf->heap);
|
|
s390_cpumsf_free_queues(session);
|
|
session->auxtrace = NULL;
|
|
zfree(&sf->logdir);
|
|
free(sf);
|
|
}
|
|
|
|
static int s390_cpumsf_get_type(const char *cpuid)
|
|
{
|
|
int ret, family = 0;
|
|
|
|
ret = sscanf(cpuid, "%*[^,],%u", &family);
|
|
return (ret == 1) ? family : 0;
|
|
}
|
|
|
|
/* Check itrace options set on perf report command.
|
|
* Return true, if none are set or all options specified can be
|
|
* handled on s390 (currently only option 'd' for logging.
|
|
* Return false otherwise.
|
|
*/
|
|
static bool check_auxtrace_itrace(struct itrace_synth_opts *itops)
|
|
{
|
|
bool ison = false;
|
|
|
|
if (!itops || !itops->set)
|
|
return true;
|
|
ison = itops->inject || itops->instructions || itops->branches ||
|
|
itops->transactions || itops->ptwrites ||
|
|
itops->pwr_events || itops->errors ||
|
|
itops->dont_decode || itops->calls || itops->returns ||
|
|
itops->callchain || itops->thread_stack ||
|
|
itops->last_branch;
|
|
if (!ison)
|
|
return true;
|
|
pr_err("Unsupported --itrace options specified\n");
|
|
return false;
|
|
}
|
|
|
|
/* Check for AUXTRACE dump directory if it is needed.
|
|
* On failure print an error message but continue.
|
|
* Return 0 on wrong keyword in config file and 1 otherwise.
|
|
*/
|
|
static int s390_cpumsf__config(const char *var, const char *value, void *cb)
|
|
{
|
|
struct s390_cpumsf *sf = cb;
|
|
struct stat stbuf;
|
|
int rc;
|
|
|
|
if (strcmp(var, "auxtrace.dumpdir"))
|
|
return 0;
|
|
sf->logdir = strdup(value);
|
|
if (sf->logdir == NULL) {
|
|
pr_err("Failed to find auxtrace log directory %s,"
|
|
" continue with current directory...\n", value);
|
|
return 1;
|
|
}
|
|
rc = stat(sf->logdir, &stbuf);
|
|
if (rc == -1 || !S_ISDIR(stbuf.st_mode)) {
|
|
pr_err("Missing auxtrace log directory %s,"
|
|
" continue with current directory...\n", value);
|
|
zfree(&sf->logdir);
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
int s390_cpumsf_process_auxtrace_info(union perf_event *event,
|
|
struct perf_session *session)
|
|
{
|
|
struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
|
|
struct s390_cpumsf *sf;
|
|
int err;
|
|
|
|
if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info))
|
|
return -EINVAL;
|
|
|
|
sf = zalloc(sizeof(struct s390_cpumsf));
|
|
if (sf == NULL)
|
|
return -ENOMEM;
|
|
|
|
if (!check_auxtrace_itrace(session->itrace_synth_opts)) {
|
|
err = -EINVAL;
|
|
goto err_free;
|
|
}
|
|
sf->use_logfile = session->itrace_synth_opts->log;
|
|
if (sf->use_logfile)
|
|
perf_config(s390_cpumsf__config, sf);
|
|
|
|
err = auxtrace_queues__init(&sf->queues);
|
|
if (err)
|
|
goto err_free;
|
|
|
|
sf->session = session;
|
|
sf->machine = &session->machines.host; /* No kvm support */
|
|
sf->auxtrace_type = auxtrace_info->type;
|
|
sf->pmu_type = PERF_TYPE_RAW;
|
|
sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid);
|
|
|
|
sf->auxtrace.process_event = s390_cpumsf_process_event;
|
|
sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event;
|
|
sf->auxtrace.flush_events = s390_cpumsf_flush;
|
|
sf->auxtrace.free_events = s390_cpumsf_free_events;
|
|
sf->auxtrace.free = s390_cpumsf_free;
|
|
session->auxtrace = &sf->auxtrace;
|
|
|
|
if (dump_trace)
|
|
return 0;
|
|
|
|
err = auxtrace_queues__process_index(&sf->queues, session);
|
|
if (err)
|
|
goto err_free_queues;
|
|
|
|
if (sf->queues.populated)
|
|
sf->data_queued = true;
|
|
|
|
return 0;
|
|
|
|
err_free_queues:
|
|
auxtrace_queues__free(&sf->queues);
|
|
session->auxtrace = NULL;
|
|
err_free:
|
|
zfree(&sf->logdir);
|
|
free(sf);
|
|
return err;
|
|
}
|