Skip to content

Commit 35a0fbd

Browse files
committed
Be more robust to seeing a poptrap without a matching pushtrap
Also add more verbose logging for decode errors. Signed-off-by: Tudor Brindus <[email protected]>
1 parent 492afe4 commit 35a0fbd

File tree

3 files changed

+43
-13
lines changed

3 files changed

+43
-13
lines changed

src/trace_writer.ml

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -826,18 +826,31 @@ end = struct
826826
thread_info.callstack <- Callstack.create ~create_time:time;
827827
Callstack.push thread_info.callstack top
828828
| Poptrap ->
829-
(* We should only have the synthetic frame we created at this point. If we
830-
have more than that, we've gotten confused in our state tracking
831-
somewhere. *)
829+
(* Assuming we didn't drop anything, we should only have the synthetic
830+
frame we created at this point. If we have more than that, we either got
831+
confused in our state tracking somewhere, or more likely, IPT dropped
832+
some data. *)
832833
if Callstack.depth thread_info.callstack <> 1
833834
then
834-
eprint_s
835-
[%message
836-
"BUG: expected callstack to be depth 1 at the time we encountered a \
837-
poptrap, but it wasn't"
838-
~callstack:(thread_info.callstack : Callstack.t)];
839-
ignore (Callstack.pop thread_info.callstack : _);
840-
clear_trap_stack t thread_info ~time));
835+
eprintf
836+
"Warning: expected callstack depth to be the same going into a [try] \
837+
block as when leaving it, but it wasn't (off by %d). Did Intel \
838+
Processor Trace drop some data? Will attempt to recover.\n\
839+
%!"
840+
(Callstack.depth thread_info.callstack - 1)
841+
else (
842+
(* Only pop the exception callstack if we're at the same callstack
843+
depth as we were when we saw [Pushtrap]. This should let us recover
844+
from situations like:
845+
846+
- Pushtrap 1
847+
- Pushtrap 2
848+
- Poptrap 2
849+
- Poptrap 1
850+
851+
where "Pushtrap 2" gets dropped. *)
852+
ignore (Callstack.pop thread_info.callstack : _);
853+
clear_trap_stack t thread_info ~time)));
841854
last_known_instruction_pointer := Some dst.instruction_pointer
842855
;;
843856
end
@@ -939,6 +952,15 @@ let write_event_and_callstack
939952
event_and_callstack
940953
;;
941954

955+
let warn_decode_error ~instruction_pointer ~message =
956+
eprintf
957+
"Warning: perf reported an error decoding the trace: %s\n!"
958+
(match instruction_pointer with
959+
| None -> [%string "'%{message}'"]
960+
| Some instruction_pointer ->
961+
[%string "'%{message}' @ IP %{instruction_pointer#Int64.Hex}."])
962+
;;
963+
942964
(* Write perf_events into a file as a Fuschia trace (stack events). Events should be
943965
collected with --itrace=be or cre, and -F pid,tid,time,flags,addr,sym,symoff as per
944966
the constants defined above. *)
@@ -955,6 +977,7 @@ let write_event (T t) ?events_writer event =
955977
maybe_stop_filtered_region t ~should_write;
956978
match event with
957979
| Error { thread = _; instruction_pointer; message; time = _ } ->
980+
warn_decode_error ~instruction_pointer ~message;
958981
let name = sprintf !"[decode error: %s]" message in
959982
write_duration_instant t ~thread ~name ~time ~args:[];
960983
let is_kernel_address =

test/decode_errors.ml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@ let%expect_test "decode error during memmove" =
2323
INPUT TRACE STREAM ENDED, any lines printed below this were deferred
2424
-> 21ns BEGIN itch_bbo::book::Book::add_order [inferred start time]
2525
-> 141ns END __memmove_avx_unaligned_erms
26-
-> 141ns END itch_bbo::book::Book::add_order |}]
26+
-> 141ns END itch_bbo::book::Book::add_order
27+
Warning: perf reported an error decoding the trace: 'Overflow packet' @ IP 0x7ffff7327730.
28+
! |}]
2729
;;
2830

2931
let%expect_test "decode error during rust B-tree rebalance" =
@@ -95,5 +97,8 @@ let%expect_test "decode error during rust B-tree rebalance" =
9597
-> 419ns END _int_free
9698
-> 419ns END merge_tracking_child_edge
9799
-> 420ns END remove_leaf_kv
98-
INPUT TRACE STREAM ENDED, any lines printed below this were deferred |}]
100+
INPUT TRACE STREAM ENDED, any lines printed below this were deferred
101+
Warning: perf reported an error decoding the trace: 'Overflow packet' @ IP 0x7ffff7327730.
102+
!Warning: perf reported an error decoding the trace: 'Overflow packet' @ IP 0x7ffff7327730.
103+
! |}]
99104
;;

test/page_fault.ml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -643,5 +643,7 @@ let%expect_test "A page fault during demo.c" =
643643
-> 224ns BEGIN _dl_catch_exception [inferred start time]
644644
-> 224ns BEGIN _fini [inferred start time]
645645
-> 1.373us END _fini
646-
-> 1.373us END _dl_catch_exception |}]
646+
-> 1.373us END _dl_catch_exception
647+
Warning: perf reported an error decoding the trace: 'Overflow packet (foo.so)' @ IP 0x7f5948676e18.
648+
! |}]
647649
;;

0 commit comments

Comments
 (0)