when graphing CPU as stacked area chart, which parts add up to total ?
— Kyle Hailey (@kylelf_) 2019年12月20日
guest
irq
nice
steal
Stacking System and Users and probably wait are clear, but is nice part of user? Looks like guest is part of nice. Is irq part of sys? @OracleSK@oradiag@BertrandDrouvot@fritshoogland
Answer: user + nice + sys + irq + softirq + steal
- User space: user + nice
- Kernel space: sys + irq + softirq
- Idle: idle + iowait
- Steal: Stolen by other guest VMs or Hypervisor.
The field itself means the time the VM CPU has to wait for others VMs (virtual machines) finishing their turn (slice), or for a task of the hypervisor itself. ... It’s the time the hypervisor scheduled something else to run instead of something within your VM. This might be time for another VM, or for the Hypervisor host itself. If no time were stolen, this time would be used to run your CPU workload or your idle thread.
linux - iostat - What does the 'steal' field mean? - Unix & Linux Stack Exchange
- Guest: the cpu time spent in virtual machine. "guest" time is double-counted in "user" time.
/* * Account guest cpu time to a process. * @p: the process that the cpu time gets accounted to * @cputime: the cpu time spent in virtual machine since the last update */ void account_guest_time(struct task_struct *p, u64 cputime) { u64 *cpustat = kcpustat_this_cpu->cpustat; /* Add guest time to process. */ p->utime += cputime; account_group_user_time(p, cputime); p->gtime += cputime; /* Add guest time to cpustat. */ if (task_nice(p) > 0) { cpustat[CPUTIME_NICE] += cputime; <-- double-counted cpustat[CPUTIME_GUEST_NICE] += cputime; <-- } else { cpustat[CPUTIME_USER] += cputime; <-- double-counted cpustat[CPUTIME_GUEST] += cputime; <-- } }
Is nice part of user?: Yes
vmstat
- vmstat reads /proc/stat to get cpu time
$ strace -e open vmstat 1 1 open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 open("/lib64/libproc-3.2.8.so", O_RDONLY|O_CLOEXEC) = 3 open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 open("/proc/self/auxv", O_RDONLY) = 3 open("/sys/devices/system/cpu/online", O_RDONLY|O_CLOEXEC) = 3 procs -----------memory---------- ---swap-- -----io---- --system-- -----cpu----- r b swpd free buff cache si so bi bo in cs us sy id wa st open("/proc/meminfo", O_RDONLY) = 3 open("/proc/stat", O_RDONLY) = 4 <--- vmstat reads /proc/stat to get cpu time open("/proc/vmstat", O_RDONLY) = 5 0 0 0 238889648 134392 765980 0 0 76 4 1 1 0 0 99 0 0 +++ exited with 0 +++
static void new_format(void) { ... getstat(cpu_use, cpu_nic, cpu_sys, cpu_idl, cpu_iow, cpu_xxx, cpu_yyy, cpu_zzz, pgpgin, pgpgout, pswpin, pswpout, intr, ctxt, &running, &blocked, &dummy_1, &dummy_2); ... duse = *cpu_use + *cpu_nic; <--- user + nice in /proc/stat dsys = *cpu_sys + *cpu_xxx + *cpu_yyy; <--- system + irq + softirq in /proc/stat didl = *cpu_idl; diow = *cpu_iow; dstl = *cpu_zzz;
void getstat(jiff *restrict cuse, jiff *restrict cice, jiff *restrict csys, jiff *restrict cide, jiff *restrict ciow, jiff *restrict cxxx, jiff *restrict cyyy, jiff *restrict czzz, unsigned long *restrict pin, unsigned long *restrict pout, unsigned long *restrict s_in, unsigned long *restrict sout, unsigned *restrict intr, unsigned *restrict ctxt, unsigned int *restrict running, unsigned int *restrict blocked, unsigned int *restrict btime, unsigned int *restrict processes) { static int fd; unsigned long long llbuf = 0; int need_vmstat_file = 0; int need_proc_scan = 0; const char* b; memset(buff, '\0', BUFFSIZE); /* ensure null termination in buffer */ if(fd){ lseek(fd, 0L, SEEK_SET); }else{ fd = open("/proc/stat", O_RDONLY, 0); <--- vmstat reads /proc/stat to get cpu time if(fd == -1) crash("/proc/stat"); } read(fd,buff,BUFFSIZE-1); *intr = 0; *ciow = 0; /* not separated out until the 2.5.41 kernel */ *cxxx = 0; /* not separated out until the 2.6.0-test4 kernel */ <--- irq in /proc/stat *cyyy = 0; /* not separated out until the 2.6.0-test4 kernel */ <--- softirq in /proc/stat *czzz = 0; /* not separated out until the 2.6.11 kernel */ <---steal in /proc/stat b = strstr(buff, "cpu "); if(b) sscanf(b, "cpu %llu %llu %llu %llu %llu %llu %llu %llu", cuse, cice, csys, cide, ciow, cxxx, cyyy, czzz);
/proc/stat
kernel/system statistics. Varies with architecture. Common
entries include:
cpu 10132153 290696 3084719 46828483 16683 0 25195 0 175628 0
cpu0 1393280 32966 572056 13343292 6130 0 17875 0 23933 0
The amount of time, measured in units of USER_HZ
(1/100ths of a second on most architectures, use
sysconf(_SC_CLK_TCK) to obtain the right value), that
the system ("cpu" line) or the specific CPU ("cpuN"
line) spent in various states:
user (1) Time spent in user mode.
nice (2) Time spent in user mode with low priority
(nice).
system (3) Time spent in system mode.
idle (4) Time spent in the idle task. This value
should be USER_HZ times the second entry in the
/proc/uptime pseudo-file.
iowait (since Linux 2.5.41)
(5) Time waiting for I/O to complete. This
value is not reliable, for the following rea‐
sons:
1. The CPU will not wait for I/O to complete;
iowait is the time that a task is waiting for
I/O to complete. When a CPU goes into idle
state for outstanding task I/O, another task
will be scheduled on this CPU.
2. On a multi-core CPU, the task waiting for I/O
to complete is not running on any CPU, so the
iowait of each CPU is difficult to calculate.
3. The value in this field may decrease in cer‐
tain conditions.
irq (since Linux 2.6.0) <--- cxxx in /proc/sysinfo.c#getstat
(6) Time servicing interrupts.
softirq (since Linux 2.6.0 <--- cyyy in /proc/sysinfo.c#getstat
(7) Time servicing softirqs.
steal (since Linux 2.6.11) <--- czzz in /proc/sysinfo.c#getstat
(8) Stolen time, which is the time spent in
other operating systems when running in a virtu‐
alized environment
guest (since Linux 2.6.24)
(9) Time spent running a virtual CPU for guest
operating systems under the control of the Linux
kernel.
guest_nice (since Linux 2.6.33)
(10) Time spent running a niced guest (virtual
CPU for guest operating systems under the con‐
trol of the Linux kernel).
mpstat
- mpstat reads /proc/stat to get cpu time
$ strace -e open mpstat -P ALL 1 1 open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 open("/proc/interrupts", O_RDONLY) = 3 open("/etc/localtime", O_RDONLY|O_CLOEXEC) = 3 Linux 4.14.146-93.123.amzn1.x86_64 (ip-172-31-10-8) 12/21/19 _x86_64_ (32 CPU) open("/proc/uptime", O_RDONLY) = 3 open("/proc/stat", O_RDONLY) = 3 <--- mpstat reads /proc/stat to get cpu time --- SIGALRM {si_signo=SIGALRM, si_code=SI_KERNEL, si_value={int=194, ptr=0xc2}} --- open("/proc/uptime", O_RDONLY) = 3 open("/proc/stat", O_RDONLY) = 3 17:49:18 CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle 17:49:19 all 0.03 0.00 0.03 0.00 0.00 0.00 0.00 0.00 99.94 17:49:19 0 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 100.00 17:49:19 1 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 100.00 17:49:19 2 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 100.00
- sysstat/mpstat.c at 9439fa7da6115bb034f42f19192744a9f7c12aa3 · sysstat/sysstat · GitHub
- mpstat.c#main
int main(int argc, char **argv) ... /* Get system name, release number and hostname */ __uname(&header); print_gal_header(&(mp_tstamp[0]), header.sysname, header.release, header.nodename, header.machine, get_cpu_nr(~0, FALSE), DISPLAY_JSON_OUTPUT(flags)); /* Main loop */ rw_mpstat_loop(dis_hdr, rows); <--- Read stats and display them.
- sysstat/mpstat.c at 9439fa7da6115bb034f42f19192744a9f7c12aa3 · sysstat/sysstat · GitHub
- mpstat.c#rw_mpstat_loop
/* *************************************************************************** * Main loop: Read stats from the relevant sources, and display them. * * IN: * @dis_hdr Set to TRUE if the header line must always be printed. * @rows Number of rows of screen. *************************************************************************** */ void rw_mpstat_loop(int dis_hdr, int rows) { struct stats_cpu *scc; int i; int curr = 1, dis = 1; unsigned long lines = rows; /* Dont buffer data if redirected to a pipe */ setbuf(stdout, NULL); /* Read system uptime and CPU stats */ read_uptime(&(uptime_cs[0])); read_stat_cpu(st_cpu[0], cpu_nr + 1); <--- Read cpu stats. /* * Calculate global CPU stats as the sum of individual ones. * Done only on SMP machines. On UP machines, we keep the values * read from /proc/stat for global CPU stats. */ if (cpu_nr > 1) { memset(st_cpu[0], 0, STATS_CPU_SIZE); for (i = 1; i <= cpu_nr; i++) { scc = st_cpu[0] + i; st_cpu[0]->cpu_user += scc->cpu_user; st_cpu[0]->cpu_nice += scc->cpu_nice; st_cpu[0]->cpu_sys += scc->cpu_sys; st_cpu[0]->cpu_idle += scc->cpu_idle; st_cpu[0]->cpu_iowait += scc->cpu_iowait; st_cpu[0]->cpu_hardirq += scc->cpu_hardirq; st_cpu[0]->cpu_steal += scc->cpu_steal; st_cpu[0]->cpu_softirq += scc->cpu_softirq; st_cpu[0]->cpu_guest += scc->cpu_guest; st_cpu[0]->cpu_guest_nice += scc->cpu_guest_nice; } }
- sysstat/rd_stats.c at 9439fa7da6115bb034f42f19192744a9f7c12aa3 · sysstat/sysstat · GitHub
- rd_stats.c#read_stat_cpu
/* *************************************************************************** * Read CPU statistics. * Remember that this function is used by several sysstat commands! * * IN: * @st_cpu Buffer where structures containing stats will be saved. * @nr_alloc Total number of structures allocated. Value is >= 1. * * OUT: * @st_cpu Buffer with statistics. * * RETURNS: * Highest CPU number(*) for which statistics have been read. * 1 means CPU "all", 2 means CPU 0, 3 means CPU 1, etc. * Or -1 if the buffer was too small and needs to be reallocated. * * (*)This doesn't account for all processors in the machine in the case * where some CPU are offline and located at the end of the list. *************************************************************************** */ __nr_t read_stat_cpu(struct stats_cpu *st_cpu, __nr_t nr_alloc) { FILE *fp; struct stats_cpu *st_cpu_i; struct stats_cpu sc; char line[8192]; int proc_nr; __nr_t cpu_read = 0; if ((fp = fopen(STAT, "r")) == NULL) { <--- mpstat reads /proc/stat to get cpu time fprintf(stderr, _("Cannot open %s: %s\n"), STAT, strerror(errno)); exit(2); }
/* Files */ #define STAT PRE "/proc/stat" <--- /proc/stat #define UPTIME PRE "/proc/uptime" #define DISKSTATS PRE "/proc/diskstats" #define INTERRUPTS PRE "/proc/interrupts" #define MEMINFO PRE "/proc/meminfo"
/proc/stat
/proc/stat
kernel/system statistics. Varies with architecture. Common
entries include:
cpu 10132153 290696 3084719 46828483 16683 0 25195 0 175628 0
cpu0 1393280 32966 572056 13343292 6130 0 17875 0 23933 0
The amount of time, measured in units of USER_HZ
(1/100ths of a second on most architectures, use
sysconf(_SC_CLK_TCK) to obtain the right value), that
the system ("cpu" line) or the specific CPU ("cpuN"
line) spent in various states:
user (1) Time spent in user mode.
nice (2) Time spent in user mode with low priority
(nice).
system (3) Time spent in system mode.
idle (4) Time spent in the idle task. This value
should be USER_HZ times the second entry in the
/proc/uptime pseudo-file.
iowait (since Linux 2.5.41)
(5) Time waiting for I/O to complete. This
value is not reliable, for the following rea‐
sons:
1. The CPU will not wait for I/O to complete;
iowait is the time that a task is waiting for
I/O to complete. When a CPU goes into idle
state for outstanding task I/O, another task
will be scheduled on this CPU.
2. On a multi-core CPU, the task waiting for I/O
to complete is not running on any CPU, so the
iowait of each CPU is difficult to calculate.
3. The value in this field may decrease in cer‐
tain conditions.
irq (since Linux 2.6.0)
(6) Time servicing interrupts.
softirq (since Linux 2.6.0)
(7) Time servicing softirqs.
steal (since Linux 2.6.11)
(8) Stolen time, which is the time spent in
other operating systems when running in a virtu‐
alized environment
guest (since Linux 2.6.24)
(9) Time spent running a virtual CPU for guest
operating systems under the control of the Linux
kernel.
guest_nice (since Linux 2.6.33)
(10) Time spent running a niced guest (virtual
CPU for guest operating systems under the con‐
trol of the Linux kernel).
static int show_stat(struct seq_file *p, void *v) { int i, j; u64 user, nice, system, idle, iowait, irq, softirq, steal; u64 guest, guest_nice; u64 sum = 0; u64 sum_softirq = 0; unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; struct timespec64 boottime; user = nice = system = idle = iowait = irq = softirq = steal = 0; guest = guest_nice = 0; getboottime64(&boottime); for_each_possible_cpu(i) { user += kcpustat_cpu(i).cpustat[CPUTIME_USER]; nice += kcpustat_cpu(i).cpustat[CPUTIME_NICE]; system += kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]; idle += get_idle_time(i); iowait += get_iowait_time(i); irq += kcpustat_cpu(i).cpustat[CPUTIME_IRQ]; softirq += kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]; steal += kcpustat_cpu(i).cpustat[CPUTIME_STEAL]; guest += kcpustat_cpu(i).cpustat[CPUTIME_GUEST]; guest_nice += kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE]; sum += kstat_cpu_irqs_sum(i); sum += arch_irq_stat_cpu(i); for (j = 0; j < NR_SOFTIRQS; j++) { unsigned int softirq_stat = kstat_softirqs_cpu(j, i); per_softirq_sums[j] += softirq_stat; sum_softirq += softirq_stat; } } sum += arch_irq_stat(); seq_put_decimal_ull(p, "cpu ", nsec_to_clock_t(user)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(system)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(idle)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(iowait)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(irq)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(softirq)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(steal)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest_nice)); seq_putc(p, '\n'); for_each_online_cpu(i) { /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ user = kcpustat_cpu(i).cpustat[CPUTIME_USER]; nice = kcpustat_cpu(i).cpustat[CPUTIME_NICE]; system = kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]; idle = get_idle_time(i); iowait = get_iowait_time(i); irq = kcpustat_cpu(i).cpustat[CPUTIME_IRQ]; softirq = kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]; steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL]; guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST]; guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE]; seq_printf(p, "cpu%d", i); seq_put_decimal_ull(p, " ", nsec_to_clock_t(user)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(system)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(idle)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(iowait)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(irq)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(softirq)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(steal)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(guest_nice)); seq_putc(p, '\n'); } seq_put_decimal_ull(p, "intr ", (unsigned long long)sum); /* sum again ? it could be updated? */ for_each_irq_nr(j) seq_put_decimal_ull(p, " ", kstat_irqs_usr(j)); seq_printf(p, "\nctxt %llu\n" "btime %llu\n" "processes %lu\n" "procs_running %lu\n" "procs_blocked %lu\n", nr_context_switches(), (unsigned long long)boottime.tv_sec, total_forks, nr_running(), nr_iowait()); seq_put_decimal_ull(p, "softirq ", (unsigned long long)sum_softirq); for (i = 0; i < NR_SOFTIRQS; i++) seq_put_decimal_ull(p, " ", per_softirq_sums[i]); seq_putc(p, '\n'); return 0; }
/* * Account user cpu time to a process. * @p: the process that the cpu time gets accounted to * @cputime: the cpu time spent in user space since the last update */ void account_user_time(struct task_struct *p, u64 cputime) { int index; /* Add user time to process. */ p->utime += cputime; account_group_user_time(p, cputime); index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; /* Add user time to cpustat. */ task_group_account_field(p, index, cputime); /* Account for user time used */ acct_account_cputime(p); } ... /* * Account guest cpu time to a process. * @p: the process that the cpu time gets accounted to * @cputime: the cpu time spent in virtual machine since the last update */ void account_guest_time(struct task_struct *p, u64 cputime) { u64 *cpustat = kcpustat_this_cpu->cpustat; /* Add guest time to process. */ p->utime += cputime; account_group_user_time(p, cputime); p->gtime += cputime; /* Add guest time to cpustat. */ if (task_nice(p) > 0) { cpustat[CPUTIME_NICE] += cputime; cpustat[CPUTIME_GUEST_NICE] += cputime; } else { cpustat[CPUTIME_USER] += cputime; cpustat[CPUTIME_GUEST] += cputime; } } ... /* * Account system cpu time to a process. * @p: the process that the cpu time gets accounted to * @hardirq_offset: the offset to subtract from hardirq_count() * @cputime: the cpu time spent in kernel space since the last update */ void account_system_time(struct task_struct *p, int hardirq_offset, u64 cputime) { int index; if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { account_guest_time(p, cputime); return; } if (hardirq_count() - hardirq_offset) index = CPUTIME_IRQ; else if (in_serving_softirq()) index = CPUTIME_SOFTIRQ; else index = CPUTIME_SYSTEM; account_system_index_time(p, cputime, index); } /* * Account for idle time. * @cputime: the cpu time spent in idle wait */ void account_idle_time(u64 cputime) { u64 *cpustat = kcpustat_this_cpu->cpustat; struct rq *rq = this_rq(); if (atomic_read(&rq->nr_iowait) > 0) cpustat[CPUTIME_IOWAIT] += cputime; else cpustat[CPUTIME_IDLE] += cputime; }
Is irq part of sys?: Yes
/* * Account system cpu time to a process. * @p: the process that the cpu time gets accounted to * @hardirq_offset: the offset to subtract from hardirq_count() * @cputime: the cpu time spent in kernel space since the last update */ void account_system_time(struct task_struct *p, int hardirq_offset, u64 cputime) { int index; if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { account_guest_time(p, cputime); return; } if (hardirq_count() - hardirq_offset) index = CPUTIME_IRQ; else if (in_serving_softirq()) index = CPUTIME_SOFTIRQ; else index = CPUTIME_SYSTEM; account_system_index_time(p, cputime, index); }