diff --git a/scripts/route_check.py b/scripts/route_check.py index bf56b6d866..b3fae15212 100755 --- a/scripts/route_check.py +++ b/scripts/route_check.py @@ -46,6 +46,7 @@ import signal import traceback import subprocess +import concurrent.futures from ipaddress import ip_network from swsscommon import swsscommon @@ -338,10 +339,18 @@ def is_suppress_fib_pending_enabled(namespace): return state == 'enabled' -def get_frr_routes(namespace): +def fetch_routes(cmd): """ - Read routes from zebra through CLI command - :return frr routes dictionary + Fetch routes using the given command. + """ + output = subprocess.check_output(cmd, text=True) + return json.loads(output) + + +def get_frr_routes_parallel(namespace): + """ + Read routes from zebra through CLI command for IPv4 and IPv6 in parallel + :return combined IPv4 and IPv6 routes dictionary. """ if namespace == multi_asic.DEFAULT_NAMESPACE: v4_route_cmd = ['show', 'ip', 'route', 'json'] @@ -350,12 +359,18 @@ def get_frr_routes(namespace): v4_route_cmd = ['show', 'ip', 'route', '-n', namespace, 'json'] v6_route_cmd = ['show', 'ipv6', 'route', '-n', namespace, 'json'] - output = subprocess.check_output(v4_route_cmd, text=True) - routes = json.loads(output) - output = subprocess.check_output(v6_route_cmd, text=True) - routes.update(json.loads(output)) - print_message(syslog.LOG_DEBUG, "FRR Routes: namespace={}, routes={}".format(namespace, routes)) - return routes + with concurrent.futures.ThreadPoolExecutor() as executor: + future_v4 = executor.submit(fetch_routes, v4_route_cmd) + future_v6 = executor.submit(fetch_routes, v6_route_cmd) + + # Wait for both results to complete + v4_routes = future_v4.result() + v6_routes = future_v6.result() + + # Combine both IPv4 and IPv6 routes + v4_routes.update(v6_routes) + print_message(syslog.LOG_DEBUG, "FRR Routes: namespace={}, routes={}".format(namespace, v4_routes)) + return v4_routes def get_interfaces(namespace): @@ -556,7 +571,7 @@ def check_frr_pending_routes(namespace): retries = FRR_CHECK_RETRIES for i in range(retries): missed_rt = [] - frr_routes = get_frr_routes(namespace) + frr_routes = get_frr_routes_parallel(namespace) for _, entries in frr_routes.items(): for entry in entries: @@ -689,8 +704,9 @@ def _filter_out_neigh_route(routes, neighs): return rt_appl_miss, rt_asic_miss -def check_routes(namespace): +def check_routes_for_namespace(namespace): """ + Process a Single Namespace: The heart of this script which runs the checks. Read APPL-DB & ASIC-DB, the relevant tables for route checking. Checkout routes in ASIC-DB to match APPL-DB, discounting local & @@ -708,6 +724,82 @@ def check_routes(namespace): :return (0, None) on sucess, else (-1, results) where results holds the unjustifiable entries. """ + + results = {} + adds = [] + deletes = [] + intf_appl_miss = [] + rt_appl_miss = [] + rt_asic_miss = [] + rt_frr_miss = [] + + selector, subs, rt_asic = get_asicdb_routes(namespace) + + rt_appl = get_appdb_routes(namespace) + intf_appl = get_interfaces(namespace) + + # Diff APPL-DB routes & ASIC-DB routes + rt_appl_miss, rt_asic_miss = diff_sorted_lists(rt_appl, rt_asic) + + # Check missed ASIC routes against APPL-DB INTF_TABLE + _, rt_asic_miss = diff_sorted_lists(intf_appl, rt_asic_miss) + rt_asic_miss = filter_out_default_routes(rt_asic_miss) + rt_asic_miss = filter_out_vnet_routes(namespace, rt_asic_miss) + rt_asic_miss = filter_out_standalone_tunnel_routes(namespace, rt_asic_miss) + rt_asic_miss = filter_out_soc_ip_routes(namespace, rt_asic_miss) + + # Check APPL-DB INTF_TABLE with ASIC table route entries + intf_appl_miss, _ = diff_sorted_lists(intf_appl, rt_asic) + + if rt_appl_miss: + rt_appl_miss = filter_out_local_interfaces(namespace, rt_appl_miss) + + if rt_appl_miss: + rt_appl_miss = filter_out_voq_neigh_routes(namespace, rt_appl_miss) + + # NOTE: On dualtor environment, ignore any route miss for the + # neighbors learned from the vlan subnet. + if rt_appl_miss or rt_asic_miss: + rt_appl_miss, rt_asic_miss = filter_out_vlan_neigh_route_miss(namespace, rt_appl_miss, rt_asic_miss) + + if rt_appl_miss or rt_asic_miss: + # Look for subscribe updates for a second + adds, deletes = get_subscribe_updates(selector, subs) + + # Drop all those for which SET received + rt_appl_miss, _ = diff_sorted_lists(rt_appl_miss, adds) + + # Drop all those for which DEL received + rt_asic_miss, _ = diff_sorted_lists(rt_asic_miss, deletes) + + if rt_appl_miss: + results["missed_ROUTE_TABLE_routes"] = rt_appl_miss + + if intf_appl_miss: + results["missed_INTF_TABLE_entries"] = intf_appl_miss + + if rt_asic_miss: + results["Unaccounted_ROUTE_ENTRY_TABLE_entries"] = rt_asic_miss + + rt_frr_miss = check_frr_pending_routes(namespace) + + if rt_frr_miss: + results["missed_FRR_routes"] = rt_frr_miss + + if results: + if rt_frr_miss and not rt_appl_miss and not rt_asic_miss: + print_message(syslog.LOG_ERR, "Some routes are not set offloaded in FRR{} \ + but all routes in APPL_DB and ASIC_DB are in sync".format(namespace)) + if is_suppress_fib_pending_enabled(namespace): + mitigate_installed_not_offloaded_frr_routes(namespace, rt_frr_miss, rt_appl) + + return results, adds, deletes + + +def check_routes(namespace): + """ + Main function to parallelize route checks across all namespaces. + """ namespace_list = [] if namespace is not multi_asic.DEFAULT_NAMESPACE and namespace in multi_asic.get_namespace_list(): namespace_list.append(namespace) @@ -716,89 +808,32 @@ def check_routes(namespace): print_message(syslog.LOG_INFO, "Checking routes for namespaces: ", namespace_list) results = {} - adds = {} - deletes = {} - for namespace in namespace_list: - intf_appl_miss = [] - rt_appl_miss = [] - rt_asic_miss = [] - rt_frr_miss = [] - adds[namespace] = [] - deletes[namespace] = [] - - selector, subs, rt_asic = get_asicdb_routes(namespace) - - rt_appl = get_appdb_routes(namespace) - intf_appl = get_interfaces(namespace) - - # Diff APPL-DB routes & ASIC-DB routes - rt_appl_miss, rt_asic_miss = diff_sorted_lists(rt_appl, rt_asic) - - # Check missed ASIC routes against APPL-DB INTF_TABLE - _, rt_asic_miss = diff_sorted_lists(intf_appl, rt_asic_miss) - rt_asic_miss = filter_out_default_routes(rt_asic_miss) - rt_asic_miss = filter_out_vnet_routes(namespace, rt_asic_miss) - rt_asic_miss = filter_out_standalone_tunnel_routes(namespace, rt_asic_miss) - rt_asic_miss = filter_out_soc_ip_routes(namespace, rt_asic_miss) - - # Check APPL-DB INTF_TABLE with ASIC table route entries - intf_appl_miss, _ = diff_sorted_lists(intf_appl, rt_asic) - - if rt_appl_miss: - rt_appl_miss = filter_out_local_interfaces(namespace, rt_appl_miss) - - if rt_appl_miss: - rt_appl_miss = filter_out_voq_neigh_routes(namespace, rt_appl_miss) - - # NOTE: On dualtor environment, ignore any route miss for the - # neighbors learned from the vlan subnet. - if rt_appl_miss or rt_asic_miss: - rt_appl_miss, rt_asic_miss = filter_out_vlan_neigh_route_miss(namespace, rt_appl_miss, rt_asic_miss) - - if rt_appl_miss or rt_asic_miss: - # Look for subscribe updates for a second - adds[namespace], deletes[namespace] = get_subscribe_updates(selector, subs) - - # Drop all those for which SET received - rt_appl_miss, _ = diff_sorted_lists(rt_appl_miss, adds[namespace]) - - # Drop all those for which DEL received - rt_asic_miss, _ = diff_sorted_lists(rt_asic_miss, deletes[namespace]) - - if rt_appl_miss: - if namespace not in results: - results[namespace] = {} - results[namespace]["missed_ROUTE_TABLE_routes"] = rt_appl_miss - - if intf_appl_miss: - if namespace not in results: - results[namespace] = {} - results[namespace]["missed_INTF_TABLE_entries"] = intf_appl_miss - - if rt_asic_miss: - if namespace not in results: - results[namespace] = {} - results[namespace]["Unaccounted_ROUTE_ENTRY_TABLE_entries"] = rt_asic_miss - - rt_frr_miss = check_frr_pending_routes(namespace) - - if rt_frr_miss: - if namespace not in results: - results[namespace] = {} - results[namespace]["missed_FRR_routes"] = rt_frr_miss - - if results: - if rt_frr_miss and not rt_appl_miss and not rt_asic_miss: - print_message(syslog.LOG_ERR, "Some routes are not set offloaded in FRR{} \ - but all routes in APPL_DB and ASIC_DB are in sync".format(namespace)) - if is_suppress_fib_pending_enabled(namespace): - mitigate_installed_not_offloaded_frr_routes(namespace, rt_frr_miss, rt_appl) + all_adds = {} + all_deletes = {} + + # Use ThreadPoolExecutor to parallelize the check for each namespace + with concurrent.futures.ThreadPoolExecutor() as executor: + futures = {executor.submit(check_routes_for_namespace, ns): ns for ns in namespace_list} + + for future in concurrent.futures.as_completed(futures): + ns = futures[future] + all_adds[ns] = [] + all_deletes[ns] = [] + try: + result, adds, deletes = future.result() + if result: + results[ns] = result + all_adds[ns] = adds + all_deletes[ns] = deletes + except Exception as e: + print_message(syslog.LOG_ERR, "Error processing namespace {}: {}".format(ns, e)) + return -1, results if results: print_message(syslog.LOG_WARNING, "Failure results: {", json.dumps(results, indent=4), "}") print_message(syslog.LOG_WARNING, "Failed. Look at reported mismatches above") - print_message(syslog.LOG_WARNING, "add: ", json.dumps(adds, indent=4)) - print_message(syslog.LOG_WARNING, "del: ", json.dumps(deletes, indent=4)) + print_message(syslog.LOG_WARNING, "add: ", json.dumps(all_adds, indent=4)) + print_message(syslog.LOG_WARNING, "del: ", json.dumps(all_deletes, indent=4)) return -1, results else: print_message(syslog.LOG_INFO, "All good!")