From 8cce00bc9dddc3fc47d63625b0f512693c27ce2f Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Sat, 11 Nov 2023 18:34:27 +0100 Subject: [PATCH] qca-ssdk: fix unsupported scenario with PORT1 not declared in switch bmp Commit 947b44d9ae17 ("ipq807x: fix wrong define for LAN and WAN ess mask") started fixing wrong switch_lan_bmp that defined lan there weren't actually present. This displayed a fragility in the malibu phy init code in qca-ssdk. Add patch to fix this. Quoting the patch detailed description: I'm very confused by this and to me it's not clear the real usage of this logic. From what I can see the usage of this is EXTREMELY FRAGILE and results in dangerous results if the OEM (or anyone that by chance try to implement things in a logical manner) deviates from the default values from the "magical template". To be in more details. With QSDK 12.4, some tweaks were done to improve autoneg and now on every call of port status, the phydev is tried to add. This resulted in the call and log spam of an error with ports that are actually not present on the system with qsdk reporting phydev is NULL. This itself is not an error and printing the error is correct. What is actually an error from ages is setting generic bitmap reporting presence of port that are actually not present. This is very common on OEM where the switch_lan_bmp is always a variant of 0x1e (that on bitmap results in PORT1 PORT2 PORT3 PORT4 present) or 0x3e (PORT1 PORT2 PORT3 PORT4 PORT5). Reality is that many device are used as AP with one LAN port or one WAN port. (or even exotic configuration with PORT1 not present and PORT2 PORT3 PORT4 present (Xiaomi 3600) With this finding one can say... ok nice, then lets update the DT and set the correct bitmap... Again world is a bad place and reality is that this cause wonderful regression in some case of by extreme luck the first ever connected port working and the rest of the switch dead. The problem has been bisected to all the device that doesn't have the PORT1 declared in any of the bitmap. With this perfection in mind, on to the REAL problem. malibu_phy_hw_init FOR SOME REASON, set a global variable first_phy_addr to the first detected PHY addr that coincidentally is always PORT1. PORT1 addr is 0x0. The entire code in malibu_phy use this variable to derive the phy addrs in some function. Declaring a bitmap where the PORT1 is missing (or worse PORT4 the only one connected) result in first_phy_addr set to 1 or whatever phy addr is detected first setting wrong value all over the init stage. To fix this, just drop this variable and hardcode everything to assume the first phy adrr is ALWAYS 0 and remove calculation and use define for special case. With the following change normal switch traffic is restored and ports function is recovered. Fixes: #13945 Fixes: 947b44d9ae17 ("ipq807x: fix wrong define for LAN and WAN ess mask") Signed-off-by: Christian Marangi --- package/kernel/qca-ssdk/Makefile | 2 +- ...ibu-phy-drop-usage-of-first_phy_addr.patch | 264 ++++++++++++++++++ 2 files changed, 265 insertions(+), 1 deletion(-) create mode 100644 package/kernel/qca-ssdk/patches/100-malibu-phy-drop-usage-of-first_phy_addr.patch diff --git a/package/kernel/qca-ssdk/Makefile b/package/kernel/qca-ssdk/Makefile index f5d82605034..4107208c0eb 100644 --- a/package/kernel/qca-ssdk/Makefile +++ b/package/kernel/qca-ssdk/Makefile @@ -1,7 +1,7 @@ include $(TOPDIR)/rules.mk PKG_NAME:=qca-ssdk -PKG_RELEASE:=1 +PKG_RELEASE:=2 PKG_SOURCE_URL:=https://git.codelinaro.org/clo/qsdk/oss/lklm/qca-ssdk.git PKG_SOURCE_PROTO:=git diff --git a/package/kernel/qca-ssdk/patches/100-malibu-phy-drop-usage-of-first_phy_addr.patch b/package/kernel/qca-ssdk/patches/100-malibu-phy-drop-usage-of-first_phy_addr.patch new file mode 100644 index 00000000000..905f1cde011 --- /dev/null +++ b/package/kernel/qca-ssdk/patches/100-malibu-phy-drop-usage-of-first_phy_addr.patch @@ -0,0 +1,264 @@ +From 46ed8163ac0d9a11a629f1c446e8c5e711cf35d6 Mon Sep 17 00:00:00 2001 +From: Christian Marangi +Date: Sat, 11 Nov 2023 18:13:02 +0100 +Subject: [PATCH] malibu-phy: drop usage of first_phy_addr + +I'm very confused by this and to me it's not clear the real usage of +this logic. + +From what I can see the usage of this is EXTREMELY FRAGILE and results +in dangerous results if the OEM (or anyone that by chance try to +implement things in a logical manner) deviates from the default values +from the "magical template". + +To be in more details. With QSDK 12.4, some tweaks were done to improve +autoneg and now on every call of port status, the phydev is tried to +add. This resulted in the call and log spam of an error with ports that +are actually not present on the system with qsdk reporting phydev is +NULL. This itself is not an error and printing the error is correct. + +What is actually an error from ages is setting generic bitmap reporting +presence of port that are actually not present. This is very common on +OEM where the switch_lan_bmp is always a variant of 0x1e (that on bitmap +results in PORT1 PORT2 PORT3 PORT4 present) or 0x3e (PORT1 PORT2 PORT3 +PORT4 PORT5). Reality is that many device are used as AP with one LAN +port or one WAN port. (or even exotic configuration with PORT1 not +present and PORT2 PORT3 PORT4 present (Xiaomi 3600) + +With this finding one can say... ok nice, then lets update the DT and +set the correct bitmap... + +Again world is a bad place and reality is that this cause wonderful +regression in some case of by extreme luck the first ever connected +port working and the rest of the switch dead. + +The problem has been bisected to all the device that doesn't have the +PORT1 declared in any of the bitmap. + +With this perfection in mind, on to the REAL problem. + +malibu_phy_hw_init FOR SOME REASON, set a global variable first_phy_addr +to the first detected PHY addr that coincidentally is always PORT1. +PORT1 addr is 0x0. The entire code in malibu_phy use this variable to +derive the phy addrs in some function. + +Declaring a bitmap where the PORT1 is missing (or worse PORT4 the only +one connected) result in first_phy_addr set to 1 or whatever phy addr is +detected first setting wrong value all over the init stage. + +To fix this, just drop this variable and hardcode everything to assume +the first phy adrr is ALWAYS 0 and remove calculation and use define for +special case. + +With the following change normal switch traffic is restored and ports +function is recovered. + +Signed-off-by: Christian Marangi +--- + src/hsl/phy/malibu_phy.c | 63 +++++++++++++++++----------------------- + 1 file changed, 26 insertions(+), 37 deletions(-) + +--- a/src/hsl/phy/malibu_phy.c ++++ b/src/hsl/phy/malibu_phy.c +@@ -26,8 +26,9 @@ + #include "qcaphy_common.h" + #include "ssdk_plat.h" + +-static a_uint32_t first_phy_addr = MAX_PHY_ADDR; + static a_uint32_t combo_phy_addr = MAX_PHY_ADDR; ++#define PORT4_PHY_ID 0x4 ++#define PORT5_PHY_ID 0x5 + #define COMBO_PHY_ID combo_phy_addr + + /****************************************************************************** +@@ -1250,10 +1251,10 @@ sw_error_t + malibu_phy_serdes_reset(a_uint32_t dev_id) + { + +- hsl_phy_mii_reg_write(dev_id, first_phy_addr + MALIBU_PHY_PSGMII_ADDR_INC, ++ hsl_phy_mii_reg_write(dev_id, MALIBU_PHY_PSGMII_ADDR_INC, + MALIBU_MODE_RESET_REG, MALIBU_MODE_CHANAGE_RESET); + mdelay(100); +- hsl_phy_mii_reg_write(dev_id, first_phy_addr + MALIBU_PHY_PSGMII_ADDR_INC, ++ hsl_phy_mii_reg_write(dev_id, MALIBU_PHY_PSGMII_ADDR_INC, + MALIBU_MODE_RESET_REG, MALIBU_MODE_RESET_DEFAULT_VALUE); + + return SW_OK; +@@ -1271,8 +1272,7 @@ malibu_phy_interface_set_mode(a_uint32_t + a_uint16_t phy_data = 0; + static fal_port_interface_mode_t phy_mode = PORT_INTERFACE_MODE_MAX; + +- if ((phy_addr < first_phy_addr) || +- (phy_addr > (first_phy_addr + MALIBU_PHY_MAX_ADDR_INC))) ++ if (phy_addr > MALIBU_PHY_MAX_ADDR_INC) + return SW_NOT_SUPPORTED; + /*if interface_mode have been configured, then no need to configure again*/ + if(phy_mode == interface_mode) +@@ -1295,20 +1295,19 @@ malibu_phy_interface_set_mode(a_uint32_t + return SW_BAD_PARAM; + } + +- hsl_phy_modify_mii(dev_id, +- first_phy_addr + MALIBU_PHY_MAX_ADDR_INC, MALIBU_PHY_CHIP_CONFIG, ++ hsl_phy_modify_mii(dev_id, MALIBU_PHY_MAX_ADDR_INC, MALIBU_PHY_CHIP_CONFIG, + BITS(0, 4), phy_data); + + /* reset operation */ + malibu_phy_serdes_reset(dev_id); + + if (interface_mode == PHY_PSGMII_FIBER) { +- hsl_phy_mii_reg_write(dev_id, first_phy_addr + MALIBU_PHY_MAX_ADDR_INC, ++ hsl_phy_mii_reg_write(dev_id, MALIBU_PHY_MAX_ADDR_INC, + MALIBU_PHY_CHIP_CONFIG, MALIBU_MODECTRL_DFLT); +- hsl_phy_mii_reg_write(dev_id, first_phy_addr + MALIBU_PHY_MAX_ADDR_INC, ++ hsl_phy_mii_reg_write(dev_id, MALIBU_PHY_MAX_ADDR_INC, + MALIBU_PHY_CONTROL, MALIBU_MIICTRL_DFLT); + hsl_phy_phydev_autoneg_update(dev_id, +- first_phy_addr + MALIBU_PHY_MAX_ADDR_INC, A_FALSE, 0); ++ MALIBU_PHY_MAX_ADDR_INC, A_FALSE, 0); + } + phy_mode = interface_mode; + SSDK_DEBUG("malibu phy is configured as phy_mode:0x%x\n", phy_mode); +@@ -1329,13 +1328,12 @@ malibu_phy_interface_get_mode(a_uint32_t + a_uint16_t phy_data; + a_uint16_t copper_mode; + +- if ((phy_addr < first_phy_addr) || +- (phy_addr > (first_phy_addr + MALIBU_PHY_MAX_ADDR_INC))) { ++ if (phy_addr > MALIBU_PHY_MAX_ADDR_INC) { + return SW_NOT_SUPPORTED; + } + + phy_data = hsl_phy_mii_reg_read(dev_id, +- first_phy_addr + MALIBU_PHY_MAX_ADDR_INC, MALIBU_PHY_CHIP_CONFIG); ++ MALIBU_PHY_MAX_ADDR_INC, MALIBU_PHY_CHIP_CONFIG); + copper_mode = ((phy_data & MALIBU_PHY_COPPER_MODE) >> 0xf); + phy_data &= 0x000f; + +@@ -1344,13 +1342,13 @@ malibu_phy_interface_get_mode(a_uint32_t + *interface_mode = PHY_PSGMII_BASET; + break; + case MALIBU_PHY_PSGMII_BX1000: +- if (phy_addr == first_phy_addr + MALIBU_PHY_MAX_ADDR_INC) ++ if (phy_addr == MALIBU_PHY_MAX_ADDR_INC) + *interface_mode = PHY_PSGMII_BX1000; + else + *interface_mode = PHY_PSGMII_BASET; + break; + case MALIBU_PHY_PSGMII_FX100: +- if (phy_addr == first_phy_addr + MALIBU_PHY_MAX_ADDR_INC) ++ if (phy_addr == MALIBU_PHY_MAX_ADDR_INC) + *interface_mode = PHY_PSGMII_FX100; + else + *interface_mode = PHY_PSGMII_BASET; +@@ -1359,14 +1357,14 @@ malibu_phy_interface_get_mode(a_uint32_t + if (copper_mode) { + *interface_mode = PHY_PSGMII_BASET; + } else { +- if (phy_addr == first_phy_addr + MALIBU_PHY_MAX_ADDR_INC) ++ if (phy_addr == MALIBU_PHY_MAX_ADDR_INC) + *interface_mode = PHY_PSGMII_FIBER; + else + *interface_mode = PHY_PSGMII_BASET; + } + break; + case MALIBU_PHY_SGMII_BASET: +- if (phy_addr == first_phy_addr + MALIBU_PHY_MAX_ADDR_INC) ++ if (phy_addr == MALIBU_PHY_MAX_ADDR_INC) + *interface_mode = PHY_SGMII_BASET; + else + *interface_mode = PORT_QSGMII; +@@ -1392,13 +1390,12 @@ malibu_phy_interface_get_mode_status(a_u + a_uint16_t phy_data, phy_mode, phy_mode_status; + a_uint16_t copper_mode; + +- if ((phy_addr < first_phy_addr) || +- (phy_addr > (first_phy_addr + MALIBU_PHY_MAX_ADDR_INC))) { ++ if (phy_addr > MALIBU_PHY_MAX_ADDR_INC) { + return SW_NOT_SUPPORTED; + } + + phy_data = hsl_phy_mii_reg_read(dev_id, +- first_phy_addr + MALIBU_PHY_MAX_ADDR_INC, MALIBU_PHY_CHIP_CONFIG); ++ MALIBU_PHY_MAX_ADDR_INC, MALIBU_PHY_CHIP_CONFIG); + copper_mode = ((phy_data & MALIBU_PHY_COPPER_MODE) >> 0xf); + phy_mode = phy_data & 0x000f; + phy_mode_status = (phy_data & 0x00f0) >> 0x4; +@@ -1407,7 +1404,7 @@ malibu_phy_interface_get_mode_status(a_u + if (copper_mode) { + *interface_mode_status = PHY_PSGMII_BASET; + } else { +- if (phy_addr == first_phy_addr + MALIBU_PHY_MAX_ADDR_INC) ++ if (phy_addr == MALIBU_PHY_MAX_ADDR_INC) + *interface_mode_status = PHY_PSGMII_FIBER; + else + *interface_mode_status = PHY_PSGMII_BASET; +@@ -1418,19 +1415,19 @@ malibu_phy_interface_get_mode_status(a_u + *interface_mode_status = PHY_PSGMII_BASET; + break; + case MALIBU_PHY_PSGMII_BX1000: +- if (phy_addr == first_phy_addr + MALIBU_PHY_MAX_ADDR_INC) ++ if (phy_addr == MALIBU_PHY_MAX_ADDR_INC) + *interface_mode_status = PHY_PSGMII_BX1000; + else + *interface_mode_status = PHY_PSGMII_BASET; + break; + case MALIBU_PHY_PSGMII_FX100: +- if (phy_addr == first_phy_addr + MALIBU_PHY_MAX_ADDR_INC) ++ if (phy_addr == MALIBU_PHY_MAX_ADDR_INC) + *interface_mode_status = PHY_PSGMII_FX100; + else + *interface_mode_status = PHY_PSGMII_BASET; + break; + case MALIBU_PHY_SGMII_BASET: +- if (phy_addr == first_phy_addr + MALIBU_PHY_MAX_ADDR_INC) ++ if (phy_addr == MALIBU_PHY_MAX_ADDR_INC) + *interface_mode_status = PHY_SGMII_BASET; + else + *interface_mode_status = PORT_QSGMII; +@@ -1795,10 +1792,6 @@ malibu_phy_hw_init(a_uint32_t dev_id, a_ + { + phy_cnt ++; + phy_addr = qca_ssdk_port_to_phy_addr(dev_id, port_id); +- if (phy_addr < first_phy_addr) +- { +- first_phy_addr = phy_addr; +- } + /*enable phy power saving function by default */ + malibu_phy_set_8023az(dev_id, phy_addr, A_TRUE); + malibu_phy_set_powersave(dev_id, phy_addr, A_TRUE); +@@ -1824,29 +1817,25 @@ malibu_phy_hw_init(a_uint32_t dev_id, a_ + MALIBU_EXTENDED_NEXT_PAGE_EN, 0); + } + } +- /* qca 8072 two ports phy chip's firstly address to init phy chip */ +- if ((phy_cnt == QCA8072_PHY_NUM) && (first_phy_addr >= 0x3)) { +- first_phy_addr = first_phy_addr - 0x3; +- } + + /*workaround to enable AZ transmitting ability*/ +- hsl_phy_mmd_reg_write(dev_id, first_phy_addr + 5, A_FALSE, MALIBU_PHY_MMD1_NUM, ++ hsl_phy_mmd_reg_write(dev_id, PORT5_PHY_ID, A_FALSE, MALIBU_PHY_MMD1_NUM, + MALIBU_PSGMII_MODE_CTRL, MALIBU_PHY_PSGMII_MODE_CTRL_ADJUST_VALUE); + + /* adjust psgmii serdes tx amp */ +- hsl_phy_mii_reg_write(dev_id, first_phy_addr + 5, ++ hsl_phy_mii_reg_write(dev_id, PORT5_PHY_ID, + MALIBU_PSGMII_TX_DRIVER_1_CTRL, MALIBU_PHY_PSGMII_REDUCE_SERDES_TX_AMP); + + /* to avoid psgmii module goes into hibernation, work with psgmii self test*/ +- hsl_phy_modify_mmd(dev_id, first_phy_addr + 4, A_FALSE, MALIBU_PHY_MMD3_NUM, ++ hsl_phy_modify_mmd(dev_id, PORT4_PHY_ID, A_FALSE, MALIBU_PHY_MMD3_NUM, + MALIBU_PHY_MMD3_ADDR_REMOTE_LOOPBACK_CTRL, BIT(1), 0); + + mode = ssdk_dt_global_get_mac_mode(dev_id, 0); + if (mode == PORT_WRAPPER_PSGMII_FIBER) +- malibu_phy_interface_set_mode(dev_id, first_phy_addr, PHY_PSGMII_FIBER); ++ malibu_phy_interface_set_mode(dev_id, 0x0, PHY_PSGMII_FIBER); + + /*init combo phy address*/ +- combo_phy_addr = first_phy_addr+4; ++ combo_phy_addr = PORT4_PHY_ID; + + return SW_OK; + } -- 2.30.2