From fad6074a255b75889028bd76020a6df3119191ca Mon Sep 17 00:00:00 2001 From: winlin Date: Fri, 5 Dec 2014 11:24:05 +0800 Subject: [PATCH] set send socket buf. mw/mr sleep default to 350. --- README.md | 3 +- trunk/conf/full.conf | 9 ++-- trunk/src/app/srs_app_recv_thread.cpp | 5 ++- trunk/src/app/srs_app_rtmp_conn.cpp | 58 +++++++++++++++++++++++-- trunk/src/app/srs_app_rtmp_conn.hpp | 3 ++ trunk/src/app/srs_app_source.cpp | 2 +- trunk/src/core/srs_core.hpp | 2 +- trunk/src/core/srs_core_performance.hpp | 21 ++++++++- trunk/src/kernel/srs_kernel_consts.hpp | 26 +++-------- trunk/src/rtmp/srs_protocol_stack.cpp | 8 +++- 10 files changed, 101 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 3fc55843c..9681f7150 100755 --- a/README.md +++ b/README.md @@ -726,6 +726,7 @@ The play benchmark by [st-load](https://github.com/winlinvip/st-load): * 2014-11-12, SRS 2.0.14, 3.5k(3500)clients, 95%CPU, 78MB. [commit](https://github.com/winlinvip/simple-rtmp-server/commit/8acd143a7a152885b815999162660fd4e7a3f247) * 2014-11-13, SRS 2.0.15, 6.0k(6000)clients, 82%CPU, 203MB. [commit](https://github.com/winlinvip/simple-rtmp-server/commit/cc6aca9ad55342a06440ce7f3b38453776b2b2d1) * 2014-11-22, SRS 2.0.30, 7.5k(7500)clients, 87%CPU, 320MB. [commit](https://github.com/winlinvip/simple-rtmp-server/commit/58136ec178e3d47db6c90a59875d7e40946936e5) +* 2014-12-05, SRS 2.0.55, 8.0k(8000)clients, 89%CPU, 360MB. (mw_sleep=1800)[commit](https://github.com/winlinvip/simple-rtmp-server/commit/58136ec178e3d47db6c90a59875d7e40946936e5) ### Publish benchmark @@ -739,7 +740,7 @@ The publish benchmark by [st-load](https://github.com/winlinvip/st-load): * 2014-12-04, SRS 2.0.49, 1.4k(1400) publishers, 68%CPU, 144MB. * 2014-12-04, SRS 2.0.49, 2.5k(2500) publishers, 95%CPU, 404MB. [commit](https://github.com/winlinvip/simple-rtmp-server/commit/29324fab469e0f7cef9ad04ffdbce832ac7dd9ff) * 2014-12-04, SRS 2.0.51, 2.5k(2500) publishers, 91%CPU, 259MB. [commit](https://github.com/winlinvip/simple-rtmp-server/commit/f57801eb46c16755b173984b915a4166922df6a6) -* 2014-12-04, SRS 2.0.52, 4.0k(4000) publishers, 80%CPU, 331MB. (mr_sleep=2000) [commit](https://github.com/winlinvip/simple-rtmp-server/commit/5589b13d2e216b91f97afb78ee0c011b2fccf7da) +* 2014-12-04, SRS 2.0.52, 4.0k(4000) publishers, 80%CPU, 331MB. (mr_sleep=2000)[commit](https://github.com/winlinvip/simple-rtmp-server/commit/5589b13d2e216b91f97afb78ee0c011b2fccf7da) ## Architecture diff --git a/trunk/conf/full.conf b/trunk/conf/full.conf index ddcbcb0ee..de0ca0498 100755 --- a/trunk/conf/full.conf +++ b/trunk/conf/full.conf @@ -158,14 +158,15 @@ vhost mrw.srs.com { # when there are 2500 publisher, the total memory of SRS atleast: # 183KB * 2500 = 446MB # the value recomment is [300, 2000] - # default: 500 - latency 500; + # default: 350 + latency 350; } # set the MW(merged-write) latency in ms. # SRS always set mw on, so we just set the latency value. # the latency of stream >= mw_latency + mr_latency - # default: 500 - mw_latency 500; + # the value recomment is [300, 1800] + # default: 350 + mw_latency 350; } # vhost for edge, edge and origin is the same vhost diff --git a/trunk/src/app/srs_app_recv_thread.cpp b/trunk/src/app/srs_app_recv_thread.cpp index e17f9737e..6ee404a02 100644 --- a/trunk/src/app/srs_app_recv_thread.cpp +++ b/trunk/src/app/srs_app_recv_thread.cpp @@ -421,13 +421,14 @@ int SrsPublishRecvThread::on_reload_vhost_mr(string vhost) void SrsPublishRecvThread::set_socket_buffer(int sleep_ms) { - // the underlayer api will set to SRS_MR_SOCKET_BUFFER bytes. + // the bytes: // 4KB=4096, 8KB=8192, 16KB=16384, 32KB=32768, 64KB=65536, // 128KB=131072, 256KB=262144, 512KB=524288 - // the buffer should set to SRS_MR_MAX_SLEEP_MS*kbps/8, + // the buffer should set to sleep*kbps/8, // for example, your system delivery stream in 1000kbps, // sleep 800ms for small bytes, the buffer should set to: // 800*1000/8=100000B(about 128KB). + // other examples: // 2000*3000/8=750000B(about 732KB). // 2000*5000/8=1250000B(about 1220KB). int kbps = 5000; diff --git a/trunk/src/app/srs_app_rtmp_conn.cpp b/trunk/src/app/srs_app_rtmp_conn.cpp index ff1b8696b..aa5a86942 100644 --- a/trunk/src/app/srs_app_rtmp_conn.cpp +++ b/trunk/src/app/srs_app_rtmp_conn.cpp @@ -83,7 +83,9 @@ SrsRtmpConn::SrsRtmpConn(SrsServer* srs_server, st_netfd_t client_stfd) duration = 0; kbps = new SrsKbps(); kbps->set_io(skt, skt); + mw_sleep = SRS_PERF_MW_SLEEP; + mw_enabled = false; _srs_config->subscribe(this); } @@ -212,7 +214,10 @@ int SrsRtmpConn::on_reload_vhost_removed(string vhost) int SrsRtmpConn::on_reload_vhost_mw(string /*vhost*/) { - mw_sleep = _srs_config->get_mw_sleep_ms(req->vhost); + int sleep_ms = _srs_config->get_mw_sleep_ms(req->vhost); + + // when mw_sleep changed, resize the socket send buffer. + change_mw_sleep(sleep_ms); return ERROR_SUCCESS; } @@ -513,8 +518,7 @@ int SrsRtmpConn::playing(SrsSource* source) // use isolate thread to recv, // @see: https://github.com/winlinvip/simple-rtmp-server/issues/217 - SrsQueueRecvThread trd(rtmp, - SRS_CONSTS_RTMP_PULSE_TIMEOUT_US / 1000); + SrsQueueRecvThread trd(rtmp, SRS_PERF_MW_SLEEP); // start isolate recv thread. if ((ret = trd.start()) != ERROR_SUCCESS) { @@ -558,10 +562,15 @@ int SrsRtmpConn::do_playing(SrsSource* source, SrsQueueRecvThread* trd) // initialize other components SrsPithyPrint pithy_print(SRS_CONSTS_STAGE_PLAY_USER); - SrsMessageArray msgs(SYS_CONSTS_MAX_PLAY_SEND_MSGS); + SrsMessageArray msgs(SRS_PERF_MW_MSGS); bool user_specified_duration_to_stop = (req->duration > 0); int64_t starttime = -1; + // setup the mw config. + // when mw_sleep changed, resize the socket send buffer. + mw_enabled = true; + change_mw_sleep(_srs_config->get_mw_sleep_ms(req->vhost)); + while (true) { // to use isolate thread to recv, can improve about 33% performance. // @see: https://github.com/winlinvip/simple-rtmp-server/issues/196 @@ -602,6 +611,7 @@ int SrsRtmpConn::do_playing(SrsSource* source, SrsQueueRecvThread* trd) srs_verbose("sleep for no messages to send"); st_usleep(mw_sleep * 1000); } + srs_info("got %d msgs, mw=%d", count, mw_sleep); // reportable if (pithy_print.can_print()) { @@ -980,6 +990,46 @@ int SrsRtmpConn::process_play_control_msg(SrsConsumer* consumer, SrsMessage* msg return ret; } +void SrsRtmpConn::change_mw_sleep(int sleep_ms) +{ + if (!mw_enabled) { + return; + } + + // the bytes: + // 4KB=4096, 8KB=8192, 16KB=16384, 32KB=32768, 64KB=65536, + // 128KB=131072, 256KB=262144, 512KB=524288 + // the buffer should set to sleep*kbps/8, + // for example, your system delivery stream in 1000kbps, + // sleep 800ms for small bytes, the buffer should set to: + // 800*1000/8=100000B(about 128KB). + // other examples: + // 2000*3000/8=750000B(about 732KB). + // 2000*5000/8=1250000B(about 1220KB). + int kbps = 5000; + int socket_buffer_size = sleep_ms * kbps / 8; + + int fd = st_netfd_fileno(stfd); + int onb_sbuf = 0; + socklen_t sock_buf_size = sizeof(int); + getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &onb_sbuf, &sock_buf_size); + + // socket send buffer, system will double it. + int nb_sbuf = socket_buffer_size / 2; + + // set the socket send buffer when required larger buffer + if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &nb_sbuf, sock_buf_size) < 0) { + srs_warn("set sock SO_SENDBUF=%d failed.", nb_sbuf); + } + getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &nb_sbuf, &sock_buf_size); + + srs_trace("mw changed %d=>%d, max_msgs=%d, esbuf=%d, sbuf %d=>%d", + mw_sleep, sleep_ms, SRS_PERF_MW_MSGS, socket_buffer_size, + onb_sbuf, nb_sbuf); + + mw_sleep = sleep_ms; +} + int SrsRtmpConn::check_edge_token_traverse_auth() { int ret = ERROR_SUCCESS; diff --git a/trunk/src/app/srs_app_rtmp_conn.hpp b/trunk/src/app/srs_app_rtmp_conn.hpp index a5a8885a2..3ecaf82ca 100644 --- a/trunk/src/app/srs_app_rtmp_conn.hpp +++ b/trunk/src/app/srs_app_rtmp_conn.hpp @@ -73,6 +73,8 @@ private: SrsKbps* kbps; // the MR(merged-write) sleep time in ms. int mw_sleep; + // the MR(merged-write) only enabled for play. + int mw_enabled; public: SrsRtmpConn(SrsServer* srs_server, st_netfd_t client_stfd); virtual ~SrsRtmpConn(); @@ -102,6 +104,7 @@ private: virtual int handle_publish_message(SrsSource* source, SrsMessage* msg, bool is_fmle, bool vhost_is_edge); virtual int process_publish_message(SrsSource* source, SrsMessage* msg, bool vhost_is_edge); virtual int process_play_control_msg(SrsConsumer* consumer, SrsMessage* msg); + virtual void change_mw_sleep(int sleep_ms); private: virtual int check_edge_token_traverse_auth(); virtual int connect_server(int origin_index, st_netfd_t* pstsock); diff --git a/trunk/src/app/srs_app_source.cpp b/trunk/src/app/srs_app_source.cpp index d03b6dc08..baa3e5019 100644 --- a/trunk/src/app/srs_app_source.cpp +++ b/trunk/src/app/srs_app_source.cpp @@ -216,7 +216,7 @@ int SrsMessageQueue::dump_packets(int max_count, SrsMessage** pmsgs, int& count) } else { // erase some vector elements may cause memory copy, // maybe can use more efficient vector.swap to avoid copy. - // @remark for the pmsgs is big enough, for instance, SYS_CONSTS_MAX_PLAY_SEND_MSGS 128, + // @remark for the pmsgs is big enough, for instance, SRS_PERF_MW_MSGS 128, // the rtmp play client will get 128msgs once, so this branch rarely execute. msgs.erase(msgs.begin(), msgs.begin() + count); } diff --git a/trunk/src/core/srs_core.hpp b/trunk/src/core/srs_core.hpp index 8e11138ae..13a24da78 100644 --- a/trunk/src/core/srs_core.hpp +++ b/trunk/src/core/srs_core.hpp @@ -31,7 +31,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // current release version #define VERSION_MAJOR 2 #define VERSION_MINOR 0 -#define VERSION_REVISION 53 +#define VERSION_REVISION 55 // server info. #define RTMP_SIG_SRS_KEY "SRS" #define RTMP_SIG_SRS_ROLE "origin/edge server" diff --git a/trunk/src/core/srs_core_performance.hpp b/trunk/src/core/srs_core_performance.hpp index d2e148949..e54f4ef0f 100644 --- a/trunk/src/core/srs_core_performance.hpp +++ b/trunk/src/core/srs_core_performance.hpp @@ -60,7 +60,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define SRS_PERF_MERGED_READ // the default config of mr. #define SRS_PERF_MR_ENABLED false -#define SRS_PERF_MR_SLEEP 500 +#define SRS_PERF_MR_SLEEP 350 /** * the MW(merged-write) send cache time in ms. @@ -73,9 +73,26 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * @remark the socket send buffer default to 185KB, it large enough. * @see https://github.com/winlinvip/simple-rtmp-server/issues/194 * @see SrsConfig::get_mw_sleep_ms() +* @remark the mw sleep and msgs to send, maybe: +* mw_sleep msgs iovs +* 350 24/48 48/84 +* 500 24/48 48/84 +* 800 42/64 84/128 +* 1000 64/85 128/170 +* 1200 65/86 130/172 +* 1500 87/110 174/220 +* 1800 106/128 212/256 +* 2000 134/142 268/284 */ // the default config of mw. -#define SRS_PERF_MW_SLEEP 500 +#define SRS_PERF_MW_SLEEP 350 +/** +* how many msgs can be send entirely. +* for play clients to get msgs then totally send out. +* for the mw sleep set to 1800, the msgs is about 128. +* @remark, recomment to 156. +*/ +#define SRS_PERF_MW_MSGS 156 /** * how many chunk stream to cache, [0, N]. diff --git a/trunk/src/kernel/srs_kernel_consts.hpp b/trunk/src/kernel/srs_kernel_consts.hpp index d30fd07d1..5b5860c0c 100644 --- a/trunk/src/kernel/srs_kernel_consts.hpp +++ b/trunk/src/kernel/srs_kernel_consts.hpp @@ -98,42 +98,28 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // always use fmt0 as cache. //#define SRS_CONSTS_RTMP_MAX_FMT3_HEADER_SIZE 5 -/** -* how many msgs can be send entirely. -* for play clients to get msgs then totally send out. -* for example, 25fps video, 40ms per video packet, -* while audio is 20ms per audio packet where 2/3 is audios, -* when SYS_CONSTS_MAX_PLAY_SEND_MSGS is 128, then -* we will send all 128*40ms/3=1706ms packets in a time, -* which should greater than the SRS_CONSTS_RTMP_PULSE_TIMEOUT_US -* (for example, 500ms), that is, we should: -* SYS_CONSTS_MAX_PLAY_SEND_MSGS * 40 / 3 >= SRS_CONSTS_RTMP_PULSE_TIMEOUT_US -* @remark, recomment to 128. -*/ -#define SYS_CONSTS_MAX_PLAY_SEND_MSGS 128 /** * for performance issue, * the iovs cache, @see https://github.com/winlinvip/simple-rtmp-server/issues/194 * iovs cache for multiple messages for each connections. -* each iovc is 16bytes, sizeof(iovec)=16, suppose the chunk size is 64k, -* each message send in a chunk which needs only 2 iovec, -* so the iovs max should be (SYS_CONSTS_MAX_PLAY_SEND_MSGS * 16 * 2) +* suppose the chunk size is 64k, each message send in a chunk which needs only 2 iovec, +* so the iovs max should be (SRS_PERF_MW_MSGS * 2) * * @remark, SRS will realloc when the iovs not enough. */ -#define SRS_CONSTS_IOVS_MAX (SYS_CONSTS_MAX_PLAY_SEND_MSGS * 32) +#define SRS_CONSTS_IOVS_MAX (SRS_PERF_MW_MSGS * 2) /** * for performance issue, * the c0c3 cache, @see https://github.com/winlinvip/simple-rtmp-server/issues/194 * c0c3 cache for multiple messages for each connections. * each c0 <= 16byes, suppose the chunk size is 64k, * each message send in a chunk which needs only a c0 header, -* so the c0c3 cache should be (SYS_CONSTS_MAX_PLAY_SEND_MSGS * 16) +* so the c0c3 cache should be (SRS_PERF_MW_MSGS * 16) * * @remark, SRS will try another loop when c0c3 cache dry, for we cannot realloc it. -* so we use larger c0c3 cache, that is (SYS_CONSTS_MAX_PLAY_SEND_MSGS * 32) +* so we use larger c0c3 cache, that is (SRS_PERF_MW_MSGS * 32) */ -#define SRS_CONSTS_C0C3_HEADERS_MAX (SYS_CONSTS_MAX_PLAY_SEND_MSGS * 32) +#define SRS_CONSTS_C0C3_HEADERS_MAX (SRS_PERF_MW_MSGS * 32) /////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////// diff --git a/trunk/src/rtmp/srs_protocol_stack.cpp b/trunk/src/rtmp/srs_protocol_stack.cpp index 058793c45..bc84046fe 100644 --- a/trunk/src/rtmp/srs_protocol_stack.cpp +++ b/trunk/src/rtmp/srs_protocol_stack.cpp @@ -645,7 +645,7 @@ int SrsProtocol::do_send_messages(SrsMessage** msgs, int nb_msgs) // we donot use the complex basic header, // ensure the basic header is 1bytes. if (msg->header.perfer_cid < 2) { - srs_warn("change the chunk_id=%d to default=%d", + srs_info("change the chunk_id=%d to default=%d", msg->header.perfer_cid, RTMP_CID_ProtocolControl); msg->header.perfer_cid = RTMP_CID_ProtocolControl; } @@ -682,6 +682,10 @@ int SrsProtocol::do_send_messages(SrsMessage** msgs, int nb_msgs) // for we donot know how many messges maybe to send entirely, // we just alloc the iovs, it's ok. if (iov_index >= nb_out_iovs - 2) { + srs_warn("resize iovs %d => %d, max_msgs=%d", + nb_out_iovs, nb_out_iovs + SRS_CONSTS_IOVS_MAX, + SRS_PERF_MW_MSGS); + nb_out_iovs += SRS_CONSTS_IOVS_MAX; int realloc_size = sizeof(iovec) * nb_out_iovs; out_iovs = (iovec*)realloc(out_iovs, realloc_size); @@ -732,6 +736,8 @@ int SrsProtocol::do_send_messages(SrsMessage** msgs, int nb_msgs) if (iov_index <= 0) { return ret; } + srs_info("mw %d msgs in %d iovs, max_msgs=%d, nb_out_iovs=%d", + nb_msgs, iov_index, SRS_PERF_MW_MSGS, nb_out_iovs); // send by writev // sendout header and payload by writev.