perf trace: Deref sys_enter pointer args with contents from probe:vfs_getname
authorArnaldo Carvalho de Melo <acme@redhat.com>
Wed, 5 Aug 2015 01:30:09 +0000 (22:30 -0300)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Wed, 5 Aug 2015 13:52:45 +0000 (10:52 -0300)
To work like strace and dereference syscall pointer args we need to
insert probes (or tracepoints) right after we copy those bytes from
userspace.

Since we're formatting the syscall args at raw_syscalls:sys_enter time,
we need to have a formatter that just stores the position where, later,
when we get the probe:vfs_getname, we can insert the pointer contents.

Now, if a probe:vfs_getname with this format is in place:

 # perf probe -l
  probe:vfs_getname (on getname_flags:72@/home/git/linux/fs/namei.c with pathname)

That was, in this case, put in place with:

 # perf probe 'vfs_getname=getname_flags:72 pathname=filename:string'
 Added new event:
  probe:vfs_getname    (on getname_flags:72 with pathname=filename:string)

 You can now use it in all perf tools, such as:

perf record -e probe:vfs_getname -aR sleep 1
 #

Then 'perf trace' will notice that and do the pointer -> contents
expansion:

 # trace -e open touch /tmp/bla
  0.165 (0.010 ms): touch/17752 open(filename: /etc/ld.so.cache, flags: CLOEXEC) = 3
  0.195 (0.011 ms): touch/17752 open(filename: /lib64/libc.so.6, flags: CLOEXEC) = 3
  0.512 (0.012 ms): touch/17752 open(filename: /usr/lib/locale/locale-archive, flags: CLOEXEC) = 3
  0.582 (0.012 ms): touch/17752 open(filename: /tmp/bla, flags: CREAT|NOCTTY|NONBLOCK|WRONLY, mode: 438) = 3
 #

Roughly equivalent to strace's output:

 # strace -rT -e open touch /tmp/bla
  0.000000 open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 <0.000039>
  0.000317 open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 <0.000102>
  0.001461 open("/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3 <0.000072>
  0.000405 open("/tmp/bla", O_WRONLY|O_CREAT|O_NOCTTY|O_NONBLOCK, 0666) = 3 <0.000055>
  0.000641 +++ exited with 0 +++
 #

Now we need to either look for at all syscalls that are marked as
pointers and have some well known names ("filename", "pathname", etc)
and set the arg formatter to the one used for the "open" syscall in this
patch.

This implementation works for syscalls with just a string being copied
from userspace, for matching syscalls with more than one string being
copied via the same probe/trace point (vfs_getname) we need to extend
the vfs_getname probe spec to include the pointer too, but there are
some problems with that in 'perf probe' or the kernel kprobes code, need
to investigate before considering supporting multiple strings per
syscall.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Milian Wolff <mail@milianw.de>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-xvuwx6nuj8cf389kf9s2ue2s@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/builtin-trace.c

index 12d6fc0227b1aa019cf5f1e831f2fcd81352cce4..0255dd8926629dc42e4233db8950c0d51875f3b4 100644 (file)
@@ -744,6 +744,11 @@ static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
 
 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
 
+static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
+                                             struct syscall_arg *arg);
+
+#define SCA_FILENAME syscall_arg__scnprintf_filename
+
 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
                                               struct syscall_arg *arg)
 {
@@ -1088,7 +1093,8 @@ static struct syscall_fmt {
        { .name     = "newfstatat", .errmsg = true,
          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
        { .name     = "open",       .errmsg = true,
-         .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
+         .arg_scnprintf = { [0] = SCA_FILENAME,   /* filename */
+                            [1] = SCA_OPEN_FLAGS, /* flags */ }, },
        { .name     = "open_by_handle_at", .errmsg = true,
          .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
                             [2] = SCA_OPEN_FLAGS, /* flags */ }, },
@@ -1208,6 +1214,11 @@ static size_t fprintf_duration(unsigned long t, FILE *fp)
        return printed + fprintf(fp, "): ");
 }
 
+/**
+ * filename.ptr: The filename char pointer that will be vfs_getname'd
+ * filename.entry_str_pos: Where to insert the string translated from
+ *                         filename.ptr by the vfs_getname tracepoint/kprobe.
+ */
 struct thread_trace {
        u64               entry_time;
        u64               exit_time;
@@ -1216,6 +1227,10 @@ struct thread_trace {
        unsigned long     pfmaj, pfmin;
        char              *entry_str;
        double            runtime_ms;
+        struct {
+               unsigned long ptr;
+               int           entry_str_pos;
+       } filename;
        struct {
                int       max;
                char      **table;
@@ -1418,6 +1433,27 @@ static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
        return printed;
 }
 
+static void thread__set_filename_pos(struct thread *thread, const char *bf,
+                                    unsigned long ptr)
+{
+       struct thread_trace *ttrace = thread__priv(thread);
+
+       ttrace->filename.ptr = ptr;
+       ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
+}
+
+static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
+                                             struct syscall_arg *arg)
+{
+       unsigned long ptr = arg->val;
+
+       if (!arg->trace->vfs_getname)
+               return scnprintf(bf, size, "%#x", ptr);
+
+       thread__set_filename_pos(arg->thread, bf, ptr);
+       return 0;
+}
+
 static bool trace__filter_duration(struct trace *trace, double t)
 {
        return t < (trace->duration_filter * NSEC_PER_MSEC);
@@ -1938,7 +1974,45 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
                              union perf_event *event __maybe_unused,
                              struct perf_sample *sample)
 {
+       struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+       struct thread_trace *ttrace;
+       size_t filename_len, entry_str_len, to_move;
+       ssize_t remaining_space;
+       char *pos;
+       const char *filename;
+
        trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
+
+       if (!thread)
+               goto out;
+
+       ttrace = thread__priv(thread);
+       if (!ttrace)
+               goto out;
+
+       if (!ttrace->filename.ptr)
+               goto out;
+
+       entry_str_len = strlen(ttrace->entry_str);
+       remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
+       if (remaining_space <= 0)
+               goto out;
+
+       filename = trace->last_vfs_getname;
+       filename_len = strlen(filename);
+       if (filename_len > (size_t)remaining_space) {
+               filename += filename_len - remaining_space;
+               filename_len = remaining_space;
+       }
+
+       to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
+       pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
+       memmove(pos + filename_len, pos, to_move);
+       memcpy(pos, filename, filename_len);
+
+       ttrace->filename.ptr = 0;
+       ttrace->filename.entry_str_pos = 0;
+out:
        return 0;
 }