mirror of
https://github.com/projectacrn/acrn-hypervisor.git
synced 2025-08-06 10:44:41 +00:00
tools:acrn-crashlog: Detect and classify the crash in ACRN and kernel
Since ACRN has the capability to reboot and reboot reason is available in SOS, acrnprobe could detect the crash of acrn and SOS kernel. List of added crash types: 1. ACRNCRASH - crashed in hypervisor, this detection depends on files in /tmp/acrnlog_last(provided by acrnlog). 2. IPANIC - crashed in SOS kernel, this detection depends on pstore. 3. SWWDT_IPANIC - crashed in SOS kernel and reboot reason is wdt. 4. HWWDT_UNHANDLE - only recognize reboot reason is global, there is no further clues that it's a SOS kernel crash or a hypervisor crash. 5. SWWDT_UNHANDLE - only recognize reboot reason is wdt, there is no further clues that it's a SOS kernel crash or a hypervisor crash. 6. UNKNOWN - only recognize reboot reason is warm, there is no further clues that it's a SOS kernel crash or a hypervisor crash. Signed-off-by: Liu, Xinwu <xinwu.liu@intel.com> Acked-by: Chen Gang <gang.c.chen@intel.com>
This commit is contained in:
parent
a5853d6d8a
commit
b30ba3db15
@ -20,6 +20,8 @@
|
||||
#include "fsutils.h"
|
||||
#include "strutils.h"
|
||||
#include "channels.h"
|
||||
#include "startupreason.h"
|
||||
#include "probeutils.h"
|
||||
#include "log_sys.h"
|
||||
|
||||
#define POLLING_TIMER_SIG 0xCEAC
|
||||
@ -105,11 +107,13 @@ static void channel_oneshot(struct channel_t *cnl)
|
||||
|
||||
LOGD("initializing channel %s ...\n", cname);
|
||||
|
||||
if (!is_boot_id_changed())
|
||||
return;
|
||||
|
||||
e = create_event(REBOOT, cname, NULL, 0, NULL);
|
||||
if (e)
|
||||
event_enqueue(e);
|
||||
|
||||
|
||||
for_each_crash(id, crash, conf) {
|
||||
if (!crash || !is_root_crash(crash))
|
||||
continue;
|
||||
@ -117,13 +121,26 @@ static void channel_oneshot(struct channel_t *cnl)
|
||||
if (strcmp(crash->channel, cname))
|
||||
continue;
|
||||
|
||||
if (crash->trigger &&
|
||||
!strcmp("file", crash->trigger->type) &&
|
||||
file_exists(crash->trigger->path)) {
|
||||
e = create_event(CRASH, cname, (void *)crash,
|
||||
0, crash->trigger->path);
|
||||
if (e)
|
||||
event_enqueue(e);
|
||||
if (!crash->trigger)
|
||||
continue;
|
||||
|
||||
if (!strcmp("file", crash->trigger->type)) {
|
||||
if (file_exists(crash->trigger->path)) {
|
||||
e = create_event(CRASH, cname, (void *)crash,
|
||||
0, crash->trigger->path);
|
||||
if (e)
|
||||
event_enqueue(e);
|
||||
}
|
||||
} else if (!strcmp("rebootreason", crash->trigger->type)) {
|
||||
char rreason[REBOOT_REASON_SIZE];
|
||||
|
||||
read_startupreason(rreason, sizeof(rreason));
|
||||
if (!strcmp(rreason, crash->content[0])) {
|
||||
e = create_event(CRASH, cname, (void *)crash,
|
||||
0, crash->trigger->path);
|
||||
if (e)
|
||||
event_enqueue(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -40,5 +40,6 @@ void generate_crashfile(char *dir, char *event, char *hashkey,
|
||||
char *type, char *data0,
|
||||
char *data1, char *data2);
|
||||
char *generate_log_dir(enum e_dir_mode mode, char *hashkey);
|
||||
int is_boot_id_changed(void);
|
||||
|
||||
#endif
|
||||
|
@ -38,14 +38,21 @@
|
||||
#define STATS_CURRENT_LOG "currentstatslog"
|
||||
#define VM_CURRENT_LOG "currentvmlog"
|
||||
|
||||
#define BOOTID_NODE "/proc/sys/kernel/random/boot_id"
|
||||
#define BOOTID_LOG "currentbootid"
|
||||
|
||||
unsigned long long get_uptime(void)
|
||||
{
|
||||
static long long time_ns = -1;
|
||||
struct timespec ts;
|
||||
long long time_ns;
|
||||
struct timespec ts;
|
||||
int res;
|
||||
|
||||
clock_gettime(CLOCK_BOOTTIME, &ts);
|
||||
time_ns = (long long)ts.tv_sec * 1000000000LL +
|
||||
(long long)ts.tv_nsec;
|
||||
res = clock_gettime(CLOCK_BOOTTIME, &ts);
|
||||
if (res == -1)
|
||||
return res;
|
||||
|
||||
time_ns = (long long)ts.tv_sec * 1000000000LL +
|
||||
(long long)ts.tv_nsec;
|
||||
|
||||
return time_ns;
|
||||
}
|
||||
@ -56,6 +63,8 @@ int get_uptime_string(char *newuptime, int *hours)
|
||||
int seconds, minutes;
|
||||
|
||||
tm = get_uptime();
|
||||
if (tm == -1)
|
||||
return -1;
|
||||
|
||||
/* seconds */
|
||||
*hours = (int)(tm / 1000000000LL);
|
||||
@ -444,3 +453,41 @@ char *generate_log_dir(enum e_dir_mode mode, char *hashkey)
|
||||
|
||||
return strdup(path);
|
||||
}
|
||||
|
||||
int is_boot_id_changed(void)
|
||||
{
|
||||
void *boot_id;
|
||||
void *logged_boot_id;
|
||||
char logged_boot_id_path[PATH_MAX];
|
||||
unsigned long size;
|
||||
struct sender_t *crashlog;
|
||||
int res;
|
||||
int result = 1; /* returns changed by default */
|
||||
|
||||
crashlog = get_sender_by_name("crashlog");
|
||||
if (!crashlog)
|
||||
return result;
|
||||
|
||||
res = read_file(BOOTID_NODE, &size, &boot_id);
|
||||
if (res == -1)
|
||||
return result;
|
||||
|
||||
snprintf(logged_boot_id_path, sizeof(logged_boot_id_path), "%s/%s",
|
||||
crashlog->outdir, BOOTID_LOG);
|
||||
if (file_exists(logged_boot_id_path)) {
|
||||
res = read_file(logged_boot_id_path, &size, &logged_boot_id);
|
||||
if (res == -1)
|
||||
goto out;
|
||||
|
||||
if (!strcmp((char *)logged_boot_id, (char *)boot_id))
|
||||
result = 0;
|
||||
|
||||
free(logged_boot_id);
|
||||
}
|
||||
|
||||
if (result)
|
||||
overwrite_file(logged_boot_id_path, boot_id);
|
||||
out:
|
||||
free(boot_id);
|
||||
return result;
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
Description=ACRN crashlog probe
|
||||
Requires=telemd.socket
|
||||
Requires=usercrash_s
|
||||
After=acrnlog.service
|
||||
After=usercrash.service
|
||||
After=prepare.service
|
||||
|
||||
|
@ -27,8 +27,8 @@
|
||||
<triggers>
|
||||
<trigger id="1" enable="true">
|
||||
<name>t_pstore</name>
|
||||
<type>file</type>
|
||||
<path>/sys/fs/pstore/console-ramoops</path>
|
||||
<type>node</type>
|
||||
<path>/sys/fs/pstore/console-ramoops-0</path>
|
||||
</trigger>
|
||||
<trigger id="2" enable="true">
|
||||
<name>t_boot</name>
|
||||
@ -40,6 +40,15 @@
|
||||
<type>dir</type>
|
||||
<path>/var/log/usercrashes</path>
|
||||
</trigger>
|
||||
<trigger id="4" enable="true">
|
||||
<name>t_rebootreason</name>
|
||||
<type>rebootreason</type>
|
||||
</trigger>
|
||||
<trigger id="5" enable="true">
|
||||
<name>t_acrnlog_last</name>
|
||||
<type>file</type>
|
||||
<path>/tmp/acrnlog/acrnlog_last.[*]</path>
|
||||
</trigger>
|
||||
</triggers>
|
||||
|
||||
<vms enable="true">
|
||||
@ -57,8 +66,8 @@
|
||||
<logs>
|
||||
<log id="1" enable="true">
|
||||
<name>pstore</name>
|
||||
<type>file</type>
|
||||
<path>/sys/fs/pstore/console-ramoops</path>
|
||||
<type>node</type>
|
||||
<path>/sys/fs/pstore/console-ramoops-0</path>
|
||||
</log>
|
||||
<log id='2' enable='true'>
|
||||
<name>kmsg</name>
|
||||
@ -90,34 +99,66 @@
|
||||
|
||||
<crashes>
|
||||
<crash id='1' inherit='0' enable='true'>
|
||||
<name>UNKNOWN</name>
|
||||
<trigger>t_rebootreason</trigger>
|
||||
<channel>oneshot</channel>
|
||||
<content id='1'>WARM</content>
|
||||
<log id='1'>pstore</log>
|
||||
<log id='2'>acrnlog_last</log>
|
||||
</crash>
|
||||
<crash id='2' inherit='0' enable='true'>
|
||||
<name>SWWDT_UNHANDLE</name>
|
||||
<trigger>t_rebootreason</trigger>
|
||||
<channel>oneshot</channel>
|
||||
<content id='1'>WATCHDOG</content>
|
||||
<log id='1'>pstore</log>
|
||||
<log id='2'>acrnlog_last</log>
|
||||
</crash>
|
||||
<crash id='3' inherit='0' enable='true'>
|
||||
<name>HWWDT_UNHANDLE</name>
|
||||
<trigger>t_rebootreason</trigger>
|
||||
<channel>oneshot</channel>
|
||||
<content id='1'>GLOBAL</content>
|
||||
<log id='1'>pstore</log>
|
||||
<log id='2'>acrnlog_last</log>
|
||||
</crash>
|
||||
<crash id='4' inherit='1' enable='true'>
|
||||
<name>ACRNCRASH</name>
|
||||
<trigger>t_acrnlog_last</trigger>
|
||||
<content id='1'>= Unhandled exception:</content>
|
||||
</crash>
|
||||
<crash id='5' inherit='1' enable='true'>
|
||||
<name>IPANIC</name>
|
||||
<trigger>t_pstore</trigger>
|
||||
<channel>oneshot</channel>
|
||||
<log id='1'>pstore</log>
|
||||
<content id='1'> </content>
|
||||
<mightcontent expression='1' id='1'>Kernel panic - not syncing:</mightcontent>
|
||||
<mightcontent expression='1' id='2'>BUG: unable to handle kernel</mightcontent>
|
||||
<data id='1'>kernel BUG at</data>
|
||||
<data id='2'>EIP is at</data>
|
||||
<data id='3'>Comm:</data>
|
||||
</crash>
|
||||
<crash id='2' inherit='1' enable='true'>
|
||||
<name>IPANIC_SWWDT</name>
|
||||
<content id='1'>BUG: soft lockup - CPU#</content>
|
||||
<crash id='6' inherit='2' enable='true'>
|
||||
<name>ACRNCRASH</name>
|
||||
<trigger>t_acrnlog_last</trigger>
|
||||
<content id='1'>= Unhandled exception:</content>
|
||||
</crash>
|
||||
<crash id='3' inherit='2' enable='true'>
|
||||
<name>IPANIC_SWWDT_FAKE</name>
|
||||
<mightcontent expression='1' id='1'>EIP: panic_dbg_set</mightcontent>
|
||||
<mightcontent expression='1' id='2'>RIP: panic_dbg_set</mightcontent>
|
||||
<crash id='7' inherit='2' enable='true'>
|
||||
<name>SWWDT_IPANIC</name>
|
||||
<trigger>t_pstore</trigger>
|
||||
<content id='1'> </content>
|
||||
<mightcontent expression='1' id='1'>Kernel panic - not syncing:</mightcontent>
|
||||
<mightcontent expression='1' id='2'>BUG: unable to handle kernel</mightcontent>
|
||||
<data id='1'>kernel BUG at</data>
|
||||
<data id='2'>EIP is at</data>
|
||||
<data id='3'>Comm:</data>
|
||||
</crash>
|
||||
<crash id='4' inherit='0' enable='true'>
|
||||
<crash id='8' inherit='0' enable='true'>
|
||||
<name>USERCRASH</name>
|
||||
<trigger>t_usercrash</trigger>
|
||||
<channel>inotify</channel>
|
||||
<log id='1'>kmsg</log>
|
||||
<log id='2'>syslog</log>
|
||||
</crash>
|
||||
<crash id='5' inherit='1' enable='true'>
|
||||
<name>IPANIC_HWWDT</name>
|
||||
<content id='1'>Watchdog detected hard LOCKUP on cpu</content>
|
||||
</crash>
|
||||
</crashes>
|
||||
|
||||
<infos>
|
||||
@ -132,6 +173,4 @@
|
||||
</info>
|
||||
</infos>
|
||||
|
||||
|
||||
|
||||
</conf>
|
||||
|
Loading…
Reference in New Issue
Block a user