golang beyla采集trace程序原理源码解析
作者:a朋
beyla支持通过ebpf,无侵入的、自动采集应用程序的trace信息,本文以golang的nethttp为例,讲述beyla对trace的采集的实现原理,有需要的朋友可以借鉴参考下,希望能够有所帮助
一. 整体原理
trace采集时,监听了golang应用程序的net/http中的函数:
- net/http.serverHandler.ServeHTTP;
- net/http.(*Transport).roundTrip;
监听ServeHTTP时:
- 若request中没有trace信息,则生成traceparent,存入go_trace_map结构(key=goroutine地址,value=trace信息);
- 若request中有trace信息,则根据trace信息,重新生成span,存入go_trace_map结构;
监听roundTrip的调用:
- 首先,根据goroutine地址,读go_trace_map结构,得到trace信息;
- 然后,将当前连接的trace信息,存入ongoing_http_client_requests结构(key=goroutine地址,value=trace信息);
监听roundTrip的调用返回:
- 首先,根据goroutine地址,读ongoing_http_client_requests结构,得到trace信息;
- 然后,将当前调用的trace信息,转换为http_request_trace结构,保存到ringbuf中;
最终,ebpf用户程序,读取ringbuf中的trace信息,采集到trace信息。
二. 监听uprobe/ServeHTTP
处理流程:
- 首先,提取goroutine和request指针;
- 然后,通过server_trace_parent()函数,处理trace信息,存入go_trace_map结构;
- 最后,将数据存入onging_http_server_requests结构;
// beyla/bpf/go_nethttp.c SEC("uprobe/ServeHTTP") int uprobe_ServeHTTP(struct pt_regs *ctx) { void *goroutine_addr = GOROUTINE_PTR(ctx); void *req = GO_PARAM4(ctx); http_func_invocation_t invocation = { .start_monotime_ns = bpf_ktime_get_ns(), .req_ptr = (u64)req, .tp = {0} }; if (req) { // 处理trace信息,存入go_trace_map server_trace_parent(goroutine_addr, &invocation.tp, (void*)(req + req_header_ptr_pos)); } // write event if (bpf_map_update_elem(&ongoing_http_server_requests, &goroutine_addr, &invocation, BPF_ANY)) { bpf_dbg_printk("can't update map element"); } return 0; }
重点看一下server_trace_parent()函数:
首先,从req_header读取traceparent:
- 若读到了,则copy traceId,将parentId=上层的spanId;
- 否则,则生成trace_id,将parentId=0;
- 然后,使用urand,生成随机的spanId;
- 最后,将trace信息存入go_trace_map结构,key=goroutine地址,value=trace信息;
// bpf/go_common.h static __always_inline void server_trace_parent(void *goroutine_addr, tp_info_t *tp, void *req_header) { // May get overriden when decoding existing traceparent, but otherwise we set sample ON tp->flags = 1; // Get traceparent from the Request.Header void *traceparent_ptr = extract_traceparent_from_req_headers(req_header); if (traceparent_ptr != NULL) { // 读到了traceparent .... } else { // 未读到traceparent bpf_dbg_printk("No traceparent in headers, generating"); urand_bytes(tp->trace_id, TRACE_ID_SIZE_BYTES); // 生成随机的trace_id; *((u64 *)tp->parent_id) = 0; } urand_bytes(tp->span_id, SPAN_ID_SIZE_BYTES); bpf_map_update_elem(&go_trace_map, &goroutine_addr, tp, BPF_ANY); }
go_trace_map对象的定义:
struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); __type(key, void *); // key: pointer to the goroutine __type(value, tp_info_t); // value: traceparent info __uint(max_entries, MAX_CONCURRENT_SHARED_REQUESTS); __uint(pinning, LIBBPF_PIN_BY_NAME); } go_trace_map SEC(".maps"); typedef struct tp_info { unsigned char trace_id[TRACE_ID_SIZE_BYTES]; unsigned char span_id[SPAN_ID_SIZE_BYTES]; unsigned char parent_id[SPAN_ID_SIZE_BYTES]; u64 ts; u8 flags; } tp_info_t;
三. 监听uprobe/roundTrip
roundTrip函数,在使用http client发起请求时,被调用。
处理流程:
- 首先,提取goroutine地址和request地址;
- 然后,根据goroutine_addr和request,查找trace信息;
- 最后,将trace信息写入ongoing_http_client_requests对象;
// beyla/bpf/go_nethttp.c SEC("uprobe/roundTrip") int uprobe_roundTrip(struct pt_regs *ctx) { roundTripStartHelper(ctx); return 0; } static __always_inline void roundTripStartHelper(struct pt_regs *ctx) { void *goroutine_addr = GOROUTINE_PTR(ctx); void *req = GO_PARAM2(ctx); http_func_invocation_t invocation = { .start_monotime_ns = bpf_ktime_get_ns(), .req_ptr = (u64)req, .tp = {0} }; // 根据request和goroutine_addr,查找trace信息 __attribute__((__unused__)) u8 existing_tp = client_trace_parent(goroutine_addr, &invocation.tp, (void*)(req + req_header_ptr_pos)); // 将trace信息写入ongoing_http_client_requests if (bpf_map_update_elem(&ongoing_http_client_requests, &goroutine_addr, &invocation, BPF_ANY)) { bpf_dbg_printk("can't update http client map element"); } }
重点看一下查找trace信息的client_trace_parent()函数:
首先,尝试从request的header中提取traceparent:
- 若找到了,则copy traceId,设置当前span.parentId=上游span的spanId;
然后,再使用goroutine及其parent_goroutine,去go_trace_map中找:
- 若找到了,则copy traceId,设置当前span.parentId=上游span的spanId;
// beyla/go_common.h static __always_inline u8 client_trace_parent(void *goroutine_addr, tp_info_t *tp_i, void *req_header) { u8 found_trace_id = 0; u8 trace_id_exists = 0; // May get overriden when decoding existing traceparent or finding a server span, but otherwise we set sample ON tp_i->flags = 1; // 首先尝试从request的header中提取traceparent if (req_header) { ... } // 然后再使用goroutine去go_trace_map中找 if (!found_trace_id) { tp_info_t *tp = 0; u64 parent_id = find_parent_goroutine(goroutine_addr); if (parent_id) {// we found a parent request tp = (tp_info_t *)bpf_map_lookup_elem(&go_trace_map, &parent_id); } if (tp) { // 找到了,copy traceId,当前span.parentId=上流span.spanId *((u64 *)tp_i->trace_id) = *((u64 *)tp->trace_id); *((u64 *)(tp_i->trace_id + 8)) = *((u64 *)(tp->trace_id + 8)); *((u64 *)tp_i->parent_id) = *((u64 *)tp->span_id); tp_i->flags = tp->flags; } ... // 生成当前span.spanId urand_bytes(tp_i->span_id, SPAN_ID_SIZE_BYTES); } return trace_id_exists; }
这里有个隐形的假设条件:
- 一个goroutine及其child goroutine仅处理一个http请求;
- nethttp的框架在设计时,就由一个goroutine去处理一个http请求,是符合这个假设的;
四. 监听uprobe/roundTrip_return
处理流程:
- 首先,使用goroutine_addr,从ongoing_http_client_requests中找trace信息;
然后,初始化http_request_trace:
- 从request中找method/host/url/content_length,赋值给http_request_trace;
- 将trace信息赋值到http_request_trace;
- 从response中找status,赋值给http_request_trace;
- 最后,将http_request_trace提交到ringbuf;
// beyla/bpf/go_nethttp.c SEC("uprobe/roundTrip_return") int uprobe_roundTripReturn(struct pt_regs *ctx) { void *goroutine_addr = GOROUTINE_PTR(ctx); // 使用goroutine_addr找ongoing_http_client_requests http_func_invocation_t *invocation = bpf_map_lookup_elem(&ongoing_http_client_requests, &goroutine_addr); bpf_map_delete_elem(&ongoing_http_client_requests, &goroutine_addr); http_request_trace *trace = bpf_ringbuf_reserve(&events, sizeof(http_request_trace), 0); // 初始化http_request_trace task_pid(&trace->pid); trace->type = EVENT_HTTP_CLIENT; trace->start_monotime_ns = invocation->start_monotime_ns; trace->go_start_monotime_ns = invocation->start_monotime_ns; trace->end_monotime_ns = bpf_ktime_get_ns(); void *req_ptr = (void *)invocation->req_ptr; void *resp_ptr = (void *)GO_PARAM1(ctx); // 从request中找method,赋值给trace->method if (!read_go_str("method", req_ptr, method_ptr_pos, &trace->method, sizeof(trace->method))) { ... } // 从request中找host,赋值给trace->host if (!read_go_str("host", req_ptr, host_ptr_pos, &trace->host, sizeof(trace->host))) { ... } // 从request中找url,赋值给trace->path void *url_ptr = 0; bpf_probe_read(&url_ptr, sizeof(url_ptr), (void *)(req_ptr + url_ptr_pos)); if (!url_ptr || !read_go_str("path", url_ptr, path_ptr_pos, &trace->path, sizeof(trace->path))) { ... } // 赋值trace信息 trace->tp = invocation->tp; // 从request中找content_length,赋值给trace->content_length bpf_probe_read(&trace->content_length, sizeof(trace->content_length), (void *)(req_ptr + content_length_ptr_pos)); // 从resp中找status,赋值给trace->status bpf_probe_read(&trace->status, sizeof(trace->status), (void *)(resp_ptr + status_code_ptr_pos)); // 提交trace到ringbuf bpf_ringbuf_submit(trace, get_flags()); return 0; }
参考:
1.https://github.com/grafana/beyla/issues/521
2.https://github.com/grafana/beyla/blob/main/docs/sources/distributed-traces.md
以上就是golang beyla采集trace程序原理源码解析的详细内容,更多关于golang beyla采集trace的资料请关注脚本之家其它相关文章!