From b30ba3db15a49eba3c4f70380a0f184456b1e12b Mon Sep 17 00:00:00 2001 From: "Liu, Xinwu" Date: Tue, 3 Jul 2018 10:51:45 +0800 Subject: [PATCH] tools:acrn-crashlog: Detect and classify the crash in ACRN and kernel Since ACRN has the capability to reboot and reboot reason is available in SOS, acrnprobe could detect the crash of acrn and SOS kernel. List of added crash types: 1. ACRNCRASH - crashed in hypervisor, this detection depends on files in /tmp/acrnlog_last(provided by acrnlog). 2. IPANIC - crashed in SOS kernel, this detection depends on pstore. 3. SWWDT_IPANIC - crashed in SOS kernel and reboot reason is wdt. 4. HWWDT_UNHANDLE - only recognize reboot reason is global, there is no further clues that it's a SOS kernel crash or a hypervisor crash. 5. SWWDT_UNHANDLE - only recognize reboot reason is wdt, there is no further clues that it's a SOS kernel crash or a hypervisor crash. 6. UNKNOWN - only recognize reboot reason is warm, there is no further clues that it's a SOS kernel crash or a hypervisor crash. Signed-off-by: Liu, Xinwu Acked-by: Chen Gang --- tools/acrn-crashlog/acrnprobe/channels.c | 33 ++++++-- .../acrnprobe/include/probeutils.h | 1 + tools/acrn-crashlog/acrnprobe/probeutils.c | 57 +++++++++++-- tools/acrn-crashlog/data/acrnprobe.service | 1 + tools/acrn-crashlog/data/acrnprobe.xml | 79 ++++++++++++++----- 5 files changed, 138 insertions(+), 33 deletions(-) diff --git a/tools/acrn-crashlog/acrnprobe/channels.c b/tools/acrn-crashlog/acrnprobe/channels.c index 872507ff1..8599d04f1 100644 --- a/tools/acrn-crashlog/acrnprobe/channels.c +++ b/tools/acrn-crashlog/acrnprobe/channels.c @@ -20,6 +20,8 @@ #include "fsutils.h" #include "strutils.h" #include "channels.h" +#include "startupreason.h" +#include "probeutils.h" #include "log_sys.h" #define POLLING_TIMER_SIG 0xCEAC @@ -105,11 +107,13 @@ static void channel_oneshot(struct channel_t *cnl) LOGD("initializing channel %s ...\n", cname); + if (!is_boot_id_changed()) + return; + e = create_event(REBOOT, cname, NULL, 0, NULL); if (e) event_enqueue(e); - for_each_crash(id, crash, conf) { if (!crash || !is_root_crash(crash)) continue; @@ -117,13 +121,26 @@ static void channel_oneshot(struct channel_t *cnl) if (strcmp(crash->channel, cname)) continue; - if (crash->trigger && - !strcmp("file", crash->trigger->type) && - file_exists(crash->trigger->path)) { - e = create_event(CRASH, cname, (void *)crash, - 0, crash->trigger->path); - if (e) - event_enqueue(e); + if (!crash->trigger) + continue; + + if (!strcmp("file", crash->trigger->type)) { + if (file_exists(crash->trigger->path)) { + e = create_event(CRASH, cname, (void *)crash, + 0, crash->trigger->path); + if (e) + event_enqueue(e); + } + } else if (!strcmp("rebootreason", crash->trigger->type)) { + char rreason[REBOOT_REASON_SIZE]; + + read_startupreason(rreason, sizeof(rreason)); + if (!strcmp(rreason, crash->content[0])) { + e = create_event(CRASH, cname, (void *)crash, + 0, crash->trigger->path); + if (e) + event_enqueue(e); + } } } diff --git a/tools/acrn-crashlog/acrnprobe/include/probeutils.h b/tools/acrn-crashlog/acrnprobe/include/probeutils.h index 1ee423757..f8d89681f 100644 --- a/tools/acrn-crashlog/acrnprobe/include/probeutils.h +++ b/tools/acrn-crashlog/acrnprobe/include/probeutils.h @@ -40,5 +40,6 @@ void generate_crashfile(char *dir, char *event, char *hashkey, char *type, char *data0, char *data1, char *data2); char *generate_log_dir(enum e_dir_mode mode, char *hashkey); +int is_boot_id_changed(void); #endif diff --git a/tools/acrn-crashlog/acrnprobe/probeutils.c b/tools/acrn-crashlog/acrnprobe/probeutils.c index 41dccc742..5710973f4 100644 --- a/tools/acrn-crashlog/acrnprobe/probeutils.c +++ b/tools/acrn-crashlog/acrnprobe/probeutils.c @@ -38,14 +38,21 @@ #define STATS_CURRENT_LOG "currentstatslog" #define VM_CURRENT_LOG "currentvmlog" +#define BOOTID_NODE "/proc/sys/kernel/random/boot_id" +#define BOOTID_LOG "currentbootid" + unsigned long long get_uptime(void) { - static long long time_ns = -1; - struct timespec ts; + long long time_ns; + struct timespec ts; + int res; - clock_gettime(CLOCK_BOOTTIME, &ts); - time_ns = (long long)ts.tv_sec * 1000000000LL + - (long long)ts.tv_nsec; + res = clock_gettime(CLOCK_BOOTTIME, &ts); + if (res == -1) + return res; + + time_ns = (long long)ts.tv_sec * 1000000000LL + + (long long)ts.tv_nsec; return time_ns; } @@ -56,6 +63,8 @@ int get_uptime_string(char *newuptime, int *hours) int seconds, minutes; tm = get_uptime(); + if (tm == -1) + return -1; /* seconds */ *hours = (int)(tm / 1000000000LL); @@ -444,3 +453,41 @@ char *generate_log_dir(enum e_dir_mode mode, char *hashkey) return strdup(path); } + +int is_boot_id_changed(void) +{ + void *boot_id; + void *logged_boot_id; + char logged_boot_id_path[PATH_MAX]; + unsigned long size; + struct sender_t *crashlog; + int res; + int result = 1; /* returns changed by default */ + + crashlog = get_sender_by_name("crashlog"); + if (!crashlog) + return result; + + res = read_file(BOOTID_NODE, &size, &boot_id); + if (res == -1) + return result; + + snprintf(logged_boot_id_path, sizeof(logged_boot_id_path), "%s/%s", + crashlog->outdir, BOOTID_LOG); + if (file_exists(logged_boot_id_path)) { + res = read_file(logged_boot_id_path, &size, &logged_boot_id); + if (res == -1) + goto out; + + if (!strcmp((char *)logged_boot_id, (char *)boot_id)) + result = 0; + + free(logged_boot_id); + } + + if (result) + overwrite_file(logged_boot_id_path, boot_id); +out: + free(boot_id); + return result; +} diff --git a/tools/acrn-crashlog/data/acrnprobe.service b/tools/acrn-crashlog/data/acrnprobe.service index 611d3b410..257a51e02 100644 --- a/tools/acrn-crashlog/data/acrnprobe.service +++ b/tools/acrn-crashlog/data/acrnprobe.service @@ -2,6 +2,7 @@ Description=ACRN crashlog probe Requires=telemd.socket Requires=usercrash_s +After=acrnlog.service After=usercrash.service After=prepare.service diff --git a/tools/acrn-crashlog/data/acrnprobe.xml b/tools/acrn-crashlog/data/acrnprobe.xml index a2e51c02b..7f0532534 100644 --- a/tools/acrn-crashlog/data/acrnprobe.xml +++ b/tools/acrn-crashlog/data/acrnprobe.xml @@ -27,8 +27,8 @@ t_pstore - file - /sys/fs/pstore/console-ramoops + node + /sys/fs/pstore/console-ramoops-0 t_boot @@ -40,6 +40,15 @@ dir /var/log/usercrashes + + t_rebootreason + rebootreason + + + t_acrnlog_last + file + /tmp/acrnlog/acrnlog_last.[*] + @@ -57,8 +66,8 @@ pstore - file - /sys/fs/pstore/console-ramoops + node + /sys/fs/pstore/console-ramoops-0 kmsg @@ -90,34 +99,66 @@ + UNKNOWN + t_rebootreason + oneshot + WARM + pstore + acrnlog_last + + + SWWDT_UNHANDLE + t_rebootreason + oneshot + WATCHDOG + pstore + acrnlog_last + + + HWWDT_UNHANDLE + t_rebootreason + oneshot + GLOBAL + pstore + acrnlog_last + + + ACRNCRASH + t_acrnlog_last + = Unhandled exception: + + IPANIC t_pstore - oneshot - pstore + + Kernel panic - not syncing: + BUG: unable to handle kernel kernel BUG at EIP is at Comm: - - IPANIC_SWWDT - BUG: soft lockup - CPU# + + ACRNCRASH + t_acrnlog_last + = Unhandled exception: - - IPANIC_SWWDT_FAKE - EIP: panic_dbg_set - RIP: panic_dbg_set + + SWWDT_IPANIC + t_pstore + + Kernel panic - not syncing: + BUG: unable to handle kernel + kernel BUG at + EIP is at + Comm: - + USERCRASH t_usercrash inotify kmsg syslog - - IPANIC_HWWDT - Watchdog detected hard LOCKUP on cpu - @@ -132,6 +173,4 @@ - -