Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 55e8ddec authored by Wim Van Sebroeck's avatar Wim Van Sebroeck
Browse files

[WATCHDOG] iTCO_wdt: Fix ICH7+ reboot issue.



Bugzilla: 9868 & 10195.
There seems to be a bug into the SMM code that handles TCO Timeout SMI.
Andriy Gapon found that the code on his DG33TL system does the following:
> The handler is quite simple - it tests value in TCO1_CNT against 0x800, i.e.
> checks TCO_TMR_HLT. If the bit is set the handler goes into an infinite loop,
> apparently to allow the second timeout and reboot. Otherwise it simply clears
> TIMEOUT bit in TCO1_STS and that's it.
> So the logic seems to be reversed, because it is hard to see how TIMEOUT can
> get set to 1 and SMI generated when TCO_TMR_HLT is set (other than a
> transitional effect).

The only trick we have is to bypass the SMM code by turning of the generation
of the SMI#. The trick can only be enabled by setting the vendorsupport module
parameter to 911. This trick doesn't work well on laptop's.

Note: this is a dirty hack. Please handle with care. The only real fix is that
the bug in the SMM bios code get's fixed.

Signed-off-by: default avatarWim Van Sebroeck <wim@iguana.be>
parent de8cd9a3
Loading
Loading
Loading
Loading
+76 −6
Original line number Original line Diff line number Diff line
@@ -19,7 +19,7 @@


/* Module and version information */
/* Module and version information */
#define DRV_NAME	"iTCO_vendor_support"
#define DRV_NAME	"iTCO_vendor_support"
#define DRV_VERSION	"1.03"
#define DRV_VERSION	"1.04"
#define PFX		DRV_NAME ": "
#define PFX		DRV_NAME ": "


/* Includes */
/* Includes */
@@ -44,11 +44,14 @@
#define SUPERMICRO_OLD_BOARD	1
#define SUPERMICRO_OLD_BOARD	1
/* SuperMicro Pentium 4 / Xeon 4 / EMT64T Era Systems */
/* SuperMicro Pentium 4 / Xeon 4 / EMT64T Era Systems */
#define SUPERMICRO_NEW_BOARD	2
#define SUPERMICRO_NEW_BOARD	2
/* Broken BIOS */
#define BROKEN_BIOS		911


static int vendorsupport;
static int vendorsupport;
module_param(vendorsupport, int, 0);
module_param(vendorsupport, int, 0);
MODULE_PARM_DESC(vendorsupport, "iTCO vendor specific support mode, default="
MODULE_PARM_DESC(vendorsupport, "iTCO vendor specific support mode, default="
			"0 (none), 1=SuperMicro Pent3, 2=SuperMicro Pent4+");
			"0 (none), 1=SuperMicro Pent3, 2=SuperMicro Pent4+, "
							"911=Broken SMI BIOS");


/*
/*
 *	Vendor Specific Support
 *	Vendor Specific Support
@@ -242,6 +245,59 @@ static void supermicro_new_pre_set_heartbeat(unsigned int heartbeat)
	supermicro_new_lock_watchdog();
	supermicro_new_lock_watchdog();
}
}


/*
 *	Vendor Support: 911
 *	Board: Some Intel ICHx based motherboards
 *	iTCO chipset: ICH7+
 *
 *	Some Intel motherboards have a broken BIOS implementation: i.e.
 *	the SMI handler clear's the TIMEOUT bit in the TC01_STS register
 *	and does not reload the time. Thus the TCO watchdog does not reboot
 *	the system.
 *
 *	These are the conclusions of Andriy Gapon <avg@icyb.net.ua> after
 *	debugging: the SMI handler is quite simple - it tests value in
 *	TCO1_CNT against 0x800, i.e. checks TCO_TMR_HLT. If the bit is set
 *	the handler goes into an infinite loop, apparently to allow the
 *	second timeout and reboot. Otherwise it simply clears TIMEOUT bit
 *	in TCO1_STS and that's it.
 *	So the logic seems to be reversed, because it is hard to see how
 *	TIMEOUT can get set to 1 and SMI generated when TCO_TMR_HLT is set
 *	(other than a transitional effect).
 *
 *	The only fix found to get the motherboard(s) to reboot is to put
 *	the glb_smi_en bit to 0. This is a dirty hack that bypasses the
 *	broken code by disabling Global SMI.
 *
 *	WARNING: globally disabling SMI could possibly lead to dramatic
 *	problems, especially on laptops! I.e. various ACPI things where
 *	SMI is used for communication between OS and firmware.
 *
 *	Don't use this fix if you don't need to!!!
 */

static void broken_bios_start(unsigned long acpibase)
{
	unsigned long val32;

	val32 = inl(SMI_EN);
	/* Bit 13: TCO_EN     -> 0 = Disables TCO logic generating an SMI#
	   Bit  0: GBL_SMI_EN -> 0 = No SMI# will be generated by ICH. */
	val32 &= 0xffffdffe;
	outl(val32, SMI_EN);
}

static void broken_bios_stop(unsigned long acpibase)
{
	unsigned long val32;

	val32 = inl(SMI_EN);
	/* Bit 13: TCO_EN     -> 1 = Enables TCO logic generating an SMI#
	   Bit  0: GBL_SMI_EN -> 1 = Turn global SMI on again. */
	val32 |= 0x00002001;
	outl(val32, SMI_EN);
}

/*
/*
 *	Generic Support Functions
 *	Generic Support Functions
 */
 */
@@ -249,19 +305,33 @@ static void supermicro_new_pre_set_heartbeat(unsigned int heartbeat)
void iTCO_vendor_pre_start(unsigned long acpibase,
void iTCO_vendor_pre_start(unsigned long acpibase,
			   unsigned int heartbeat)
			   unsigned int heartbeat)
{
{
	if (vendorsupport == SUPERMICRO_OLD_BOARD)
	switch (vendorsupport) {
	case SUPERMICRO_OLD_BOARD:
		supermicro_old_pre_start(acpibase);
		supermicro_old_pre_start(acpibase);
	else if (vendorsupport == SUPERMICRO_NEW_BOARD)
		break;
	case SUPERMICRO_NEW_BOARD:
		supermicro_new_pre_start(heartbeat);
		supermicro_new_pre_start(heartbeat);
		break;
	case BROKEN_BIOS:
		broken_bios_start(acpibase);
		break;
	}
}
}
EXPORT_SYMBOL(iTCO_vendor_pre_start);
EXPORT_SYMBOL(iTCO_vendor_pre_start);


void iTCO_vendor_pre_stop(unsigned long acpibase)
void iTCO_vendor_pre_stop(unsigned long acpibase)
{
{
	if (vendorsupport == SUPERMICRO_OLD_BOARD)
	switch (vendorsupport) {
	case SUPERMICRO_OLD_BOARD:
		supermicro_old_pre_stop(acpibase);
		supermicro_old_pre_stop(acpibase);
	else if (vendorsupport == SUPERMICRO_NEW_BOARD)
		break;
	case SUPERMICRO_NEW_BOARD:
		supermicro_new_pre_stop();
		supermicro_new_pre_stop();
		break;
	case BROKEN_BIOS:
		broken_bios_stop(acpibase);
		break;
	}
}
}
EXPORT_SYMBOL(iTCO_vendor_pre_stop);
EXPORT_SYMBOL(iTCO_vendor_pre_stop);