1
0
Fork 0
mirror of https://github.com/ossrs/srs.git synced 2025-03-09 15:49:59 +00:00

APM: Support distributed tracing by Tencent Cloud APM. v5.0.63

This commit is contained in:
winlin 2022-08-24 11:04:39 +08:00
parent 736c661808
commit 3e2f8622f8
49 changed files with 4989 additions and 719 deletions

View file

@ -38,6 +38,7 @@ using namespace std;
#include <srs_protocol_utility.hpp>
#include <srs_protocol_json.hpp>
#include <srs_app_rtc_source.hpp>
#include <srs_app_tencentcloud.hpp>
// the timeout in srs_utime_t to wait encoder to republish
// if timeout, close the connection.
@ -100,6 +101,9 @@ SrsRtmpConn::SrsRtmpConn(SrsServer* svr, srs_netfd_t c, string cip, int cport)
ip = cip;
port = cport;
create_time = srsu2ms(srs_get_system_time());
span_main_ = _srs_apm->dummy();
span_connect_ = _srs_apm->dummy();
span_client_ = _srs_apm->dummy();
trd = new SrsSTCoroutine("rtmp", this, _srs_context->get_id());
kbps = new SrsNetworkKbps();
@ -145,6 +149,9 @@ SrsRtmpConn::~SrsRtmpConn()
srs_freep(rtmp);
srs_freep(refer);
srs_freep(security);
srs_freep(span_main_);
srs_freep(span_connect_);
srs_freep(span_client_);
}
std::string SrsRtmpConn::desc()
@ -152,13 +159,27 @@ std::string SrsRtmpConn::desc()
return "RtmpConn";
}
std::string srs_ipv4_string(uint32_t rip)
{
return srs_fmt("%d.%d.%d.%d", uint8_t(rip>>24), uint8_t(rip>>16), uint8_t(rip>>8), uint8_t(rip));
}
// TODO: return detail message when error for client.
srs_error_t SrsRtmpConn::do_cycle()
{
srs_error_t err = srs_success;
// We should keep the root span to alive util connection closed.
// Note that we use producer and consumer span because RTMP connection is long polling connection.
// Note that we also store this span in coroutine context, so that edge could load it.
srs_freep(span_main_);
span_main_ = _srs_apm->span("rtmp")->set_kind(SrsApmKindServer)->attr("cip", ip)
->attr("cid", _srs_context->get_id().c_str());
srs_trace("RTMP client ip=%s:%d, fd=%d", ip.c_str(), port, srs_netfd_fileno(stfd));
srs_trace("RTMP client ip=%s:%d, fd=%d, trace=%s, span=%s", ip.c_str(), port, srs_netfd_fileno(stfd),
span_main_->format_trace_id(), span_main_->format_span_id()
);
rtmp->set_recv_timeout(SRS_CONSTS_RTMP_TIMEOUT);
rtmp->set_send_timeout(SRS_CONSTS_RTMP_TIMEOUT);
@ -167,16 +188,23 @@ srs_error_t SrsRtmpConn::do_cycle()
}
uint32_t rip = rtmp->proxy_real_ip();
std::string rips = srs_ipv4_string(rip);
if (rip > 0) {
srs_trace("RTMP proxy real client ip=%d.%d.%d.%d",
uint8_t(rip>>24), uint8_t(rip>>16), uint8_t(rip>>8), uint8_t(rip));
srs_trace("RTMP proxy real client ip=%s", rips.c_str());
}
// Update the real IP of client, also set the HTTP fields.
span_main_->attr("rip", rip ? rips : ip)->attr("http.client_ip", rip ? rips : ip);
// The span for RTMP connecting to application.
srs_freep(span_connect_);
span_connect_ = _srs_apm->span("connect")->as_child(span_main_);
SrsRequest* req = info->req;
if ((err = rtmp->connect_app(req)) != srs_success) {
return srs_error_wrap(err, "rtmp connect tcUrl");
}
// set client ip to request.
req->ip = ip;
@ -210,6 +238,10 @@ srs_error_t SrsRtmpConn::do_cycle()
srs_trace("edge-srs ip=%s, version=%s, pid=%d, id=%d",
srs_server_ip.c_str(), srs_version.c_str(), srs_pid, srs_id);
}
// Load the span from the AMF0 object propagator.
// Note that we will update the trace id, so please make sure no spans are ended before this.
_srs_apm->extract(span_main_, req->args);
}
if ((err = service_cycle()) != srs_success) {
@ -381,6 +413,9 @@ srs_error_t SrsRtmpConn::service_cycle()
if ((err = rtmp->response_connect_app(req, local_ip.c_str())) != srs_success) {
return srs_error_wrap(err, "rtmp: response connect app");
}
// Must be a connecting application span.
span_connect_->end();
if ((err = rtmp->on_bw_done()) != srs_success) {
return srs_error_wrap(err, "rtmp: on bw down");
@ -439,9 +474,8 @@ srs_error_t SrsRtmpConn::service_cycle()
srs_error_t SrsRtmpConn::stream_service_cycle()
{
srs_error_t err = srs_success;
SrsRequest* req = info->req;
if ((err = rtmp->identify_client(info->res->stream_id, info->type, req->stream, req->duration)) != srs_success) {
return srs_error_wrap(err, "rtmp: identify client");
}
@ -458,12 +492,21 @@ srs_error_t SrsRtmpConn::stream_service_cycle()
req->strip();
srs_trace("client identified, type=%s, vhost=%s, app=%s, stream=%s, param=%s, duration=%dms",
srs_client_type_string(info->type).c_str(), req->vhost.c_str(), req->app.c_str(), req->stream.c_str(), req->param.c_str(), srsu2msi(req->duration));
// Start APM only when client is identified, because it might republish.
srs_freep(span_client_);
span_client_ = _srs_apm->span("client")->as_child(span_connect_)->attr("type", srs_client_type_string(info->type))
->attr("url", req->get_stream_url())->attr("http.url", req->get_stream_url());
// We store the span to coroutine context, for edge to load it.
_srs_apm->store(span_client_);
// discovery vhost, resolve the vhost from config
SrsConfDirective* parsed_vhost = _srs_config->get_vhost(req->vhost);
if (parsed_vhost) {
req->vhost = parsed_vhost->arg0();
}
span_client_->attr("vhost", req->vhost)->attr("http.host", req->host)->attr("http.server_name", req->vhost)
->attr("http.target", srs_fmt("/%s/%s", req->app.c_str(), req->stream.c_str()));
if (req->schema.empty() || req->vhost.empty() || req->port == 0 || req->app.empty()) {
return srs_error_new(ERROR_RTMP_REQ_TCURL, "discovery tcUrl failed, tcUrl=%s, schema=%s, vhost=%s, port=%d, app=%s",
@ -535,6 +578,11 @@ srs_error_t SrsRtmpConn::stream_service_cycle()
if ((err = http_hooks_on_play()) != srs_success) {
return srs_error_wrap(err, "rtmp: callback on play");
}
// Must be a client span.
span_client_->set_name("play")->end();
// We end the connection span because it's a producer and only trace the established.
span_main_->end();
err = playing(source);
http_hooks_on_stop();
@ -545,6 +593,11 @@ srs_error_t SrsRtmpConn::stream_service_cycle()
if ((err = rtmp->start_fmle_publish(info->res->stream_id)) != srs_success) {
return srs_error_wrap(err, "rtmp: start FMLE publish");
}
// Must be a client span.
span_client_->set_name("publish")->end();
// We end the connection span because it's a producer and only trace the established.
span_main_->end();
return publishing(source);
}
@ -552,6 +605,11 @@ srs_error_t SrsRtmpConn::stream_service_cycle()
if ((err = rtmp->start_haivision_publish(info->res->stream_id)) != srs_success) {
return srs_error_wrap(err, "rtmp: start HAIVISION publish");
}
// Must be a client span.
span_client_->set_name("publish")->end();
// We end the connection span because it's a producer and only trace the established.
span_main_->end();
return publishing(source);
}
@ -559,6 +617,11 @@ srs_error_t SrsRtmpConn::stream_service_cycle()
if ((err = rtmp->start_flash_publish(info->res->stream_id)) != srs_success) {
return srs_error_wrap(err, "rtmp: start FLASH publish");
}
// Must be a client span.
span_client_->set_name("publish")->end();
// We end the connection span because it's a producer and only trace the established.
span_main_->end();
return publishing(source);
}
@ -719,6 +782,10 @@ srs_error_t SrsRtmpConn::do_playing(SrsLiveSource* source, SrsLiveConsumer* cons
srs_trace("start play smi=%dms, mw_sleep=%d, mw_msgs=%d, realtime=%d, tcp_nodelay=%d",
srsu2msi(send_min_interval), srsu2msi(mw_sleep), mw_msgs, realtime, tcp_nodelay);
ISrsApmSpan* span = _srs_apm->span("play-cycle")->set_kind(SrsApmKindProducer)->as_child(span_client_)
->attr("realtime", srs_fmt("%d", realtime))->end();
SrsAutoFree(ISrsApmSpan, span);
while (true) {
// when source is set to expired, disconnect it.
@ -762,6 +829,11 @@ srs_error_t SrsRtmpConn::do_playing(SrsLiveSource* source, SrsLiveConsumer* cons
srs_trace("-> " SRS_CONSTS_LOG_PLAY " time=%d, msgs=%d, okbps=%d,%d,%d, ikbps=%d,%d,%d, mw=%d/%d",
(int)pprint->age(), count, kbps->get_send_kbps(), kbps->get_send_kbps_30s(), kbps->get_send_kbps_5m(),
kbps->get_recv_kbps(), kbps->get_recv_kbps_30s(), kbps->get_recv_kbps_5m(), srsu2msi(mw_sleep), mw_msgs);
// TODO: Do not use pithy print for frame span.
ISrsApmSpan* sample = _srs_apm->span("play-frame")->set_kind(SrsApmKindConsumer)->as_child(span)
->attr("msgs", srs_fmt("%d", count))->attr("kbps", srs_fmt("%d", kbps->get_send_kbps_30s()));
srs_freep(sample);
}
if (count <= 0) {
@ -887,6 +959,10 @@ srs_error_t SrsRtmpConn::do_publishing(SrsLiveSource* source, SrsPublishRecvThre
srs_utime_t mr_sleep = _srs_config->get_mr_sleep(req->vhost);
srs_trace("start publish mr=%d/%d, p1stpt=%d, pnt=%d, tcp_nodelay=%d", mr, srsu2msi(mr_sleep), srsu2msi(publish_1stpkt_timeout), srsu2msi(publish_normal_timeout), tcp_nodelay);
}
ISrsApmSpan* span = _srs_apm->span("publish-cycle")->set_kind(SrsApmKindProducer)->as_child(span_client_)
->attr("timeout", srs_fmt("%d", srsu2msi(publish_normal_timeout)))->end();
SrsAutoFree(ISrsApmSpan, span);
int64_t nb_msgs = 0;
uint64_t nb_frames = 0;
@ -935,6 +1011,12 @@ srs_error_t SrsRtmpConn::do_publishing(SrsLiveSource* source, SrsPublishRecvThre
(int)pprint->age(), kbps->get_send_kbps(), kbps->get_send_kbps_30s(), kbps->get_send_kbps_5m(),
kbps->get_recv_kbps(), kbps->get_recv_kbps_30s(), kbps->get_recv_kbps_5m(), mr, srsu2msi(mr_sleep),
srsu2msi(publish_1stpkt_timeout), srsu2msi(publish_normal_timeout));
// TODO: Do not use pithy print for frame span.
ISrsApmSpan* sample = _srs_apm->span("publish-frame")->set_kind(SrsApmKindConsumer)->as_child(span)
->attr("msgs", srs_fmt("%" PRId64, nb_frames))->attr("kbps", srs_fmt("%d", kbps->get_recv_kbps_30s()));
srs_freep(sample);
}
}
@ -1453,7 +1535,18 @@ srs_error_t SrsRtmpConn::start()
srs_error_t SrsRtmpConn::cycle()
{
srs_error_t err = do_cycle();
srs_error_t err = srs_success;
// Serve the client.
err = do_cycle();
// Final APM span, parent is the last span, not the root span. Note that only client or server kind will be filtered
// for error or exception report.
ISrsApmSpan* span_final = _srs_apm->span("final")->set_kind(SrsApmKindServer)->as_child(span_client_);
SrsAutoFree(ISrsApmSpan, span_final);
if (srs_error_code(err) != 0) {
span_final->record_error(err)->set_status(SrsApmStatusError, srs_fmt("fail code=%d", srs_error_code(err)));
}
// Update statistic when done.
SrsStatistic* stat = SrsStatistic::instance();