tools:acrn-crashlog: Detect and classify the crash in ACRN and kernel

Since ACRN has the capability to reboot and reboot reason is available
in SOS, acrnprobe could detect the crash of acrn and SOS kernel.

List of added crash types:

1. ACRNCRASH            - crashed in hypervisor, this detection depends on
                          files in /tmp/acrnlog_last(provided by acrnlog).
2. IPANIC               - crashed in SOS kernel, this detection depends on
                          pstore.
3. SWWDT_IPANIC         - crashed in SOS kernel and reboot reason is wdt.
4. HWWDT_UNHANDLE       - only recognize reboot reason is global, there is no
                          further clues that it's a SOS kernel crash or a
                          hypervisor crash.
5. SWWDT_UNHANDLE       - only recognize reboot reason is wdt, there is no
                          further clues that it's a SOS kernel crash or a
                          hypervisor crash.
6. UNKNOWN              - only recognize reboot reason is warm, there is no
                          further clues that it's a SOS kernel crash or a
                          hypervisor crash.

Signed-off-by: Liu, Xinwu <xinwu.liu@intel.com>
Acked-by: Chen Gang <gang.c.chen@intel.com>
This commit is contained in:
Liu, Xinwu 2018-07-03 10:51:45 +08:00 committed by Jack Ren
parent a5853d6d8a
commit b30ba3db15
5 changed files with 138 additions and 33 deletions

View File

@ -20,6 +20,8 @@
#include "fsutils.h"
#include "strutils.h"
#include "channels.h"
#include "startupreason.h"
#include "probeutils.h"
#include "log_sys.h"
#define POLLING_TIMER_SIG 0xCEAC
@ -105,11 +107,13 @@ static void channel_oneshot(struct channel_t *cnl)
LOGD("initializing channel %s ...\n", cname);
if (!is_boot_id_changed())
return;
e = create_event(REBOOT, cname, NULL, 0, NULL);
if (e)
event_enqueue(e);
for_each_crash(id, crash, conf) {
if (!crash || !is_root_crash(crash))
continue;
@ -117,13 +121,26 @@ static void channel_oneshot(struct channel_t *cnl)
if (strcmp(crash->channel, cname))
continue;
if (crash->trigger &&
!strcmp("file", crash->trigger->type) &&
file_exists(crash->trigger->path)) {
e = create_event(CRASH, cname, (void *)crash,
0, crash->trigger->path);
if (e)
event_enqueue(e);
if (!crash->trigger)
continue;
if (!strcmp("file", crash->trigger->type)) {
if (file_exists(crash->trigger->path)) {
e = create_event(CRASH, cname, (void *)crash,
0, crash->trigger->path);
if (e)
event_enqueue(e);
}
} else if (!strcmp("rebootreason", crash->trigger->type)) {
char rreason[REBOOT_REASON_SIZE];
read_startupreason(rreason, sizeof(rreason));
if (!strcmp(rreason, crash->content[0])) {
e = create_event(CRASH, cname, (void *)crash,
0, crash->trigger->path);
if (e)
event_enqueue(e);
}
}
}

View File

@ -40,5 +40,6 @@ void generate_crashfile(char *dir, char *event, char *hashkey,
char *type, char *data0,
char *data1, char *data2);
char *generate_log_dir(enum e_dir_mode mode, char *hashkey);
int is_boot_id_changed(void);
#endif

View File

@ -38,14 +38,21 @@
#define STATS_CURRENT_LOG "currentstatslog"
#define VM_CURRENT_LOG "currentvmlog"
#define BOOTID_NODE "/proc/sys/kernel/random/boot_id"
#define BOOTID_LOG "currentbootid"
unsigned long long get_uptime(void)
{
static long long time_ns = -1;
struct timespec ts;
long long time_ns;
struct timespec ts;
int res;
clock_gettime(CLOCK_BOOTTIME, &ts);
time_ns = (long long)ts.tv_sec * 1000000000LL +
(long long)ts.tv_nsec;
res = clock_gettime(CLOCK_BOOTTIME, &ts);
if (res == -1)
return res;
time_ns = (long long)ts.tv_sec * 1000000000LL +
(long long)ts.tv_nsec;
return time_ns;
}
@ -56,6 +63,8 @@ int get_uptime_string(char *newuptime, int *hours)
int seconds, minutes;
tm = get_uptime();
if (tm == -1)
return -1;
/* seconds */
*hours = (int)(tm / 1000000000LL);
@ -444,3 +453,41 @@ char *generate_log_dir(enum e_dir_mode mode, char *hashkey)
return strdup(path);
}
int is_boot_id_changed(void)
{
void *boot_id;
void *logged_boot_id;
char logged_boot_id_path[PATH_MAX];
unsigned long size;
struct sender_t *crashlog;
int res;
int result = 1; /* returns changed by default */
crashlog = get_sender_by_name("crashlog");
if (!crashlog)
return result;
res = read_file(BOOTID_NODE, &size, &boot_id);
if (res == -1)
return result;
snprintf(logged_boot_id_path, sizeof(logged_boot_id_path), "%s/%s",
crashlog->outdir, BOOTID_LOG);
if (file_exists(logged_boot_id_path)) {
res = read_file(logged_boot_id_path, &size, &logged_boot_id);
if (res == -1)
goto out;
if (!strcmp((char *)logged_boot_id, (char *)boot_id))
result = 0;
free(logged_boot_id);
}
if (result)
overwrite_file(logged_boot_id_path, boot_id);
out:
free(boot_id);
return result;
}

View File

@ -2,6 +2,7 @@
Description=ACRN crashlog probe
Requires=telemd.socket
Requires=usercrash_s
After=acrnlog.service
After=usercrash.service
After=prepare.service

View File

@ -27,8 +27,8 @@
<triggers>
<trigger id="1" enable="true">
<name>t_pstore</name>
<type>file</type>
<path>/sys/fs/pstore/console-ramoops</path>
<type>node</type>
<path>/sys/fs/pstore/console-ramoops-0</path>
</trigger>
<trigger id="2" enable="true">
<name>t_boot</name>
@ -40,6 +40,15 @@
<type>dir</type>
<path>/var/log/usercrashes</path>
</trigger>
<trigger id="4" enable="true">
<name>t_rebootreason</name>
<type>rebootreason</type>
</trigger>
<trigger id="5" enable="true">
<name>t_acrnlog_last</name>
<type>file</type>
<path>/tmp/acrnlog/acrnlog_last.[*]</path>
</trigger>
</triggers>
<vms enable="true">
@ -57,8 +66,8 @@
<logs>
<log id="1" enable="true">
<name>pstore</name>
<type>file</type>
<path>/sys/fs/pstore/console-ramoops</path>
<type>node</type>
<path>/sys/fs/pstore/console-ramoops-0</path>
</log>
<log id='2' enable='true'>
<name>kmsg</name>
@ -90,34 +99,66 @@
<crashes>
<crash id='1' inherit='0' enable='true'>
<name>UNKNOWN</name>
<trigger>t_rebootreason</trigger>
<channel>oneshot</channel>
<content id='1'>WARM</content>
<log id='1'>pstore</log>
<log id='2'>acrnlog_last</log>
</crash>
<crash id='2' inherit='0' enable='true'>
<name>SWWDT_UNHANDLE</name>
<trigger>t_rebootreason</trigger>
<channel>oneshot</channel>
<content id='1'>WATCHDOG</content>
<log id='1'>pstore</log>
<log id='2'>acrnlog_last</log>
</crash>
<crash id='3' inherit='0' enable='true'>
<name>HWWDT_UNHANDLE</name>
<trigger>t_rebootreason</trigger>
<channel>oneshot</channel>
<content id='1'>GLOBAL</content>
<log id='1'>pstore</log>
<log id='2'>acrnlog_last</log>
</crash>
<crash id='4' inherit='1' enable='true'>
<name>ACRNCRASH</name>
<trigger>t_acrnlog_last</trigger>
<content id='1'>= Unhandled exception:</content>
</crash>
<crash id='5' inherit='1' enable='true'>
<name>IPANIC</name>
<trigger>t_pstore</trigger>
<channel>oneshot</channel>
<log id='1'>pstore</log>
<content id='1'> </content>
<mightcontent expression='1' id='1'>Kernel panic - not syncing:</mightcontent>
<mightcontent expression='1' id='2'>BUG: unable to handle kernel</mightcontent>
<data id='1'>kernel BUG at</data>
<data id='2'>EIP is at</data>
<data id='3'>Comm:</data>
</crash>
<crash id='2' inherit='1' enable='true'>
<name>IPANIC_SWWDT</name>
<content id='1'>BUG: soft lockup - CPU#</content>
<crash id='6' inherit='2' enable='true'>
<name>ACRNCRASH</name>
<trigger>t_acrnlog_last</trigger>
<content id='1'>= Unhandled exception:</content>
</crash>
<crash id='3' inherit='2' enable='true'>
<name>IPANIC_SWWDT_FAKE</name>
<mightcontent expression='1' id='1'>EIP: panic_dbg_set</mightcontent>
<mightcontent expression='1' id='2'>RIP: panic_dbg_set</mightcontent>
<crash id='7' inherit='2' enable='true'>
<name>SWWDT_IPANIC</name>
<trigger>t_pstore</trigger>
<content id='1'> </content>
<mightcontent expression='1' id='1'>Kernel panic - not syncing:</mightcontent>
<mightcontent expression='1' id='2'>BUG: unable to handle kernel</mightcontent>
<data id='1'>kernel BUG at</data>
<data id='2'>EIP is at</data>
<data id='3'>Comm:</data>
</crash>
<crash id='4' inherit='0' enable='true'>
<crash id='8' inherit='0' enable='true'>
<name>USERCRASH</name>
<trigger>t_usercrash</trigger>
<channel>inotify</channel>
<log id='1'>kmsg</log>
<log id='2'>syslog</log>
</crash>
<crash id='5' inherit='1' enable='true'>
<name>IPANIC_HWWDT</name>
<content id='1'>Watchdog detected hard LOCKUP on cpu</content>
</crash>
</crashes>
<infos>
@ -132,6 +173,4 @@
</info>
</infos>
</conf>