From be0aad892521a14f504e846db386c1eba95e04a8 Mon Sep 17 00:00:00 2001 From: Callum Date: Tue, 2 Apr 2024 15:37:44 +0100 Subject: [PATCH 1/4] document new integrated estimation flag usage --- .../load_balancer/doc/load_balancer_admin.xml | 129 ++++++++++++++---- 1 file changed, 106 insertions(+), 23 deletions(-) diff --git a/modules/load_balancer/doc/load_balancer_admin.xml b/modules/load_balancer/doc/load_balancer_admin.xml index 7b3bfb9ce2f..ac7ff55abf3 100644 --- a/modules/load_balancer/doc/load_balancer_admin.xml +++ b/modules/load_balancer/doc/load_balancer_admin.xml @@ -91,23 +91,26 @@ - Dialog - Dialog module + dialog - Dialog module + + - freeswitch. - only if - "fetch_freeswitch_stats" is enabled. + freeswitch - only if + "fetch_freeswitch_stats" is enabled + (required for integrated estimation mode) - dialog - TM module (only if probing is + tm - TM module (only if probing is enabled) clusterer - only if "cluster_id" - option is enabled. + option is enabled @@ -332,12 +335,7 @@ modparam("load_balancer", "lb_define_blacklist", "blist2= 2,10,6") using statistics pushed by the FreeSWITCH box. - The max value of a resource is updated every event_heartbeat_interval - seconds (see the "freeswitch" OpenSIPS module for more details - regarding this setting), as the stats arrive from FreeSWITCH. - - - Given the following format for FreeSWITCH heartbeat messages: + FreeSWITCH heartbeat messages provide the following statistics: { ... @@ -349,9 +347,28 @@ modparam("load_balancer", "lb_define_blacklist", "blist2= 2,10,6") ... } - , the load balancer uses the following formula in order to periodically - update its "max_load" values for each FreeSWITCH box (FreeSWITCH data - is highlighted in bold): + + + The current/maximum sessions and CPU idle data for each instance + are updated as the stats arrive from FreeSWITCH every + event_heartbeat_interval seconds (see the "freeswitch" OpenSIPS + module for more details regarding this setting). + + + These are used according to the operational mode used in the + load balancing function calls. + + + + Relative mode + + + + The max load score for each instance is updated every + fetch_freeswitch_stats seconds. In relative mode, the load balancer + uses the following formula in order to periodically update its + "max_load" values for each FreeSWITCH box (FreeSWITCH data is + highlighted in bold): max_load = (Idle-CPU / 100) @@ -359,6 +376,54 @@ modparam("load_balancer", "lb_define_blacklist", "blist2= 2,10,6") (Session-Count - current_load)) + + + + Integrated estimation mode + + + + This mode is intended to be used in high throughput environments where + not all inbound and outbound sessions are tracked on the local + OpenSIPs instance. The heartbeat data is used as the primary source of + truth for server load. + + + In addition to the data collected in the most recent heartbeat the module + will count sessions allocated to each instance and use this data in + each subsequent calculation to track sessions and distribute the load. + Each fetch_freeswitch_stats interval the sessions since last heartbeat counters + are reset as up to data load data has been provided. It is advisable to set + event_heartbeat_interval and fetch_freeswitch_stats low to improve session + data synchronisation. + + + In integrated estimation mode, the load balancer uses the collects the session + data for each FreeSWITCH box every fetch_freeswitch_stats seconds. Rather than + maintaining a max load score this mode performs the following calculation at + the time a call is selecting a destination (FreeSWITCH data is highlighted in bold): + + + load_score = (100 - (100 * Session-Count + + sessions_since_last_heartbeat / Max-Sessions)) + * (Idle-CPU/100) + + + +Warning - heartbeat processing is asynchronous to this module + +Heartbeat data is collected in the freeswitch module upon arrival +from each FreeSWITCH instance as controlled by both the minimum interval setting +on the instance and the event_heartbeat_interval module setting. This module +will refresh its internal calculations at intervals defined by +fetch_freeswitch_stats. + +When using integrated estimation mode the sessions +since last heartbeat counter will be reset every fetch_freeswitch_stats +seconds. Keeping these values low and the same is advised for more accurate +load estimations according to your throughput requirements. + + Default value is 0 (disabled). @@ -405,7 +470,7 @@ modparam("load_balancer", "initial_freeswitch_load", 200) of the destinations and for controlling the pinging to destinations. - If clustering enbled, the module will automatically share changes + If clustering enabled, the module will automatically share changes over the status of the destinations with the other OpenSIPS instances that are part of a cluster. Whenever such a status changes (following an MI command, a probing result, a script command), @@ -510,7 +575,7 @@ modparam("load_balancer", "cluster_sharing_tag", "vip") - n - Negative availability - use + n - Negative availability - use destinations with negative availability (exceeded capacity); do not ignore resources with negative availability, and thus able to select for load balancing destinations with exceeded @@ -519,13 +584,26 @@ modparam("load_balancer", "cluster_sharing_tag", "vip") important/high-priority calls. + + i - Integrated estimation - + intended for use in deployments + where many separate SIP proxies are feeding calls into + a pool of FreeSWITCH servers. Load calculations are + performed using the most recent heartbeat data and a + counter of all sessions allocated since the last heartbeat. + Profile counting is unused in the calculation. The reported + CPU load value is used to reduce session load on systems + with high CPU utilisation. Mutually exclusive with flag "r". + + r - Relative value - the relative available load (how many percentages are free) is used in computing the load of each pear/resource; Without this flag, the Absolute value is assumed - the effective available load ( maximum_load - current_load) is used in - computing the load of each pear/resource. + computing the load of each pear/resource. Mutually exclusive + with flag "i". @@ -574,6 +652,11 @@ modparam("load_balancer", "cluster_sharing_tag", "vip") (requested resources do not exist) + + -5 (false) - mutually exclusive flags + "i" and "r" were both set + + This function can be used from REQUEST_ROUTE, BRANCH_ROUTE and @@ -583,7 +666,7 @@ modparam("load_balancer", "cluster_sharing_tag", "vip") <function>lb_start</function> usage ... -if (lb_start(1,"trascoding;conference")) { +if (lb_start(1,"transcoding;conference")) { # dst URI points to the new destination xlog("sending call to $du\n"); t_relay(); @@ -630,8 +713,8 @@ if (lb_start(1,"trascoding;conference")) { -2 (false) - no capacity available - (detinations are up and available, but they do not have any - availabe channels) + (destinations are up and available, but they do not have any + available channels) -3 (false) - no more destinations @@ -695,7 +778,7 @@ if (t_check_status("(408)|(5[0-9][0-9])")) { Function to stop and flush a current LB session. To be used in failure route, if you want to stop the current LB session (not to try - any other destinations from this session) and to start a completly new + any other destinations from this session) and to start a completely new one. @@ -882,7 +965,7 @@ if (lb_is_destination($si,$sp) ) { <function>lb_count_call</function> usage ... -# count as load also the calls orgininated by lb destinations +# count as load also the calls originated by lb destinations if (lb_is_destination($si,$sp) ) { # inbound call from destination lb_count_call($si,$sp,-1,"conference"); @@ -911,7 +994,7 @@ if (lb_is_destination($si,$sp) ) {
<function moreinfo="none">lb_reload</function> - Trigers the reload of the load balancing data from the DB. + Triggers the reload of the load balancing data from the DB. MI FIFO Command Format: From d15bc45f1bfe9f6d15e83e4375bc96c494816154 Mon Sep 17 00:00:00 2001 From: Callum Date: Tue, 2 Apr 2024 15:37:11 +0100 Subject: [PATCH 2/4] modify character choice for new flag --- modules/load_balancer/load_balancer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/load_balancer/load_balancer.c b/modules/load_balancer/load_balancer.c index 3418c270866..94ea992bcd8 100644 --- a/modules/load_balancer/load_balancer.c +++ b/modules/load_balancer/load_balancer.c @@ -557,19 +557,19 @@ static int w_lb_start(struct sip_msg *req, int *grp_no, switch( *f ) { case 'r': if( flags & LB_FLAGS_PERCENT_WITH_CPU ) { - LM_ERR("flags c & r are mutually exclusive (r)\n"); + LM_ERR("flags i & r are mutually exclusive (r)\n"); return -5; } flags |= LB_FLAGS_RELATIVE; LM_DBG("using relative versus absolute estimation\n"); break; - case 'c': + case 'i': if( flags & LB_FLAGS_RELATIVE ) { - LM_ERR("flags c & r are mutually exclusive (c)\n"); + LM_ERR("flags i & r are mutually exclusive (i)\n"); return -5; } flags |= LB_FLAGS_PERCENT_WITH_CPU; - LM_DBG("using percentage of max sessions with CPU factor estimation \n"); + LM_DBG("using integrated estimation (percentage of max sessions with CPU factor estimation) \n"); break; case 'n': flags |= LB_FLAGS_NEGATIVE; From 2e68e64dcf0d8a12591daa78c9e273f997ec35e9 Mon Sep 17 00:00:00 2001 From: Callum Date: Tue, 2 Apr 2024 10:24:14 +0100 Subject: [PATCH 3/4] remove comment --- modules/load_balancer/load_balancer.c | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/load_balancer/load_balancer.c b/modules/load_balancer/load_balancer.c index 94ea992bcd8..2033cd42146 100644 --- a/modules/load_balancer/load_balancer.c +++ b/modules/load_balancer/load_balancer.c @@ -810,7 +810,6 @@ static void lb_update_max_loads(unsigned int ticks, void *param) dst->rmap[ri].resource->profile, &dst->profile_id); old = dst->rmap[ri].max_load; - // if ( flags & LB_FLAGS_PERCENT_WITH_CPU ) { todo flags not avavilable here /* * In LB_FLAGS_PERCENT_WITH_CPU mode we capture the raw values and use these in each LB calculation. This * means we do not use profile counting in the load calculation. This is suitable for From ffe4d0ff48d3407581146d26b990a4b15bfbc272 Mon Sep 17 00:00:00 2001 From: Callum Date: Fri, 29 Mar 2024 08:16:15 +0000 Subject: [PATCH 4/4] :bug: fix print of str type --- modules/load_balancer/lb_data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/load_balancer/lb_data.c b/modules/load_balancer/lb_data.c index 2db32f0e066..2c614cccca9 100644 --- a/modules/load_balancer/lb_data.c +++ b/modules/load_balancer/lb_data.c @@ -432,7 +432,7 @@ static int get_dst_load(struct lb_resource **res, unsigned int res_no, /* generate score based on the percentage of channels occupied, reduced by CPU idle factor */ if( dst->rmap[l].max_sessions ) { av = ( 100 - ( 100 * ( dst->rmap[l].current_sessions + dst->rmap[l].sessions_since_last_heartbeat ) / dst->rmap[l].max_sessions ) ) * dst->rmap[l].cpu_idle; - LM_DBG("destination %d <%s> availability score %d (sessions=%d since_last_hb=%d max_sess=%d cpu_idle=%.2f)", dst->id, dst->uri, av, dst->rmap[l].current_sessions, dst->rmap[l].sessions_since_last_heartbeat, dst->rmap[l].max_sessions, dst->rmap[l].cpu_idle); + LM_DBG("destination %d <%.*s> availability score %d (sessions=%d since_last_hb=%d max_sess=%d cpu_idle=%.2f)", dst->id, dst->uri.len, dst->uri.s, av, dst->rmap[l].current_sessions, dst->rmap[l].sessions_since_last_heartbeat, dst->rmap[l].max_sessions, dst->rmap[l].cpu_idle); } } else { av = dst->rmap[l].max_load - lb_dlg_binds.get_profile_size(res[k]->profile, &dst->profile_id);