From 105c8b5baf97c2a58e10b2664d73144b7dcecf41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B1=E5=85=AB?= Date: Sat, 1 Aug 2020 21:24:56 +0800 Subject: [PATCH] [fix] fix the bug of cpu metric --- .../powerjob/common/model/SystemMetrics.java | 35 ++++++++---- .../service/ha/ClusterStatusHolder.java | 9 ++- .../server/web/response/WorkerStatusVO.java | 28 +++++----- .../worker/common/OmsWorkerVersion.java | 10 +++- .../worker/common/utils/SystemInfoUtils.java | 48 +++++++++++----- .../powerjob/function/MonitorTest.java | 55 +++++++++++++++++++ 6 files changed, 140 insertions(+), 45 deletions(-) create mode 100644 powerjob-worker/src/test/java/com/github/kfcfans/powerjob/function/MonitorTest.java diff --git a/powerjob-common/src/main/java/com/github/kfcfans/powerjob/common/model/SystemMetrics.java b/powerjob-common/src/main/java/com/github/kfcfans/powerjob/common/model/SystemMetrics.java index 9e36fb94..4f13accb 100644 --- a/powerjob-common/src/main/java/com/github/kfcfans/powerjob/common/model/SystemMetrics.java +++ b/powerjob-common/src/main/java/com/github/kfcfans/powerjob/common/model/SystemMetrics.java @@ -14,7 +14,7 @@ public class SystemMetrics implements OmsSerializable, Comparable // CPU核心数量 private int cpuProcessors; - // CPU负载(需要处以核心数) + // CPU负载(负载 和 使用率 是两个完全不同的概念,Java 无法获取 CPU 使用率,只能获取负载) private double cpuLoad; // 内存(单位 GB) @@ -38,7 +38,7 @@ public class SystemMetrics implements OmsSerializable, Comparable } /** - * 计算得分情况,内存 then CPU then 磁盘(磁盘必须有1G以上的剩余空间) + * 计算得分情况,内存 & CPU (磁盘不参与计算) * @return 得分情况 */ public int calculateScore() { @@ -47,13 +47,16 @@ public class SystemMetrics implements OmsSerializable, Comparable return score; } - double availableCPUCores = cpuProcessors * cpuLoad; - double availableMemory = jvmMaxMemory - jvmUsedMemory; + // 对于 TaskTracker 来说,内存是任务顺利完成的关键,因此内存 2 块钱 1GB + double memScore = (jvmMaxMemory - jvmUsedMemory) * 2; + // CPU 剩余负载,1 块钱 1 斤 + double cpuScore = cpuProcessors - cpuLoad; + // Indian Windows 无法获取 CpuLoad,为 -1,固定为 1 + if (cpuScore > cpuProcessors) { + cpuScore = 1; + } - // Windows下无法获取CPU可用核心数,值固定为-1 - cpuLoad = Math.max(0, cpuLoad); - - return (int) (availableMemory * 2 + availableCPUCores); + return (int) (memScore + cpuScore); } /** @@ -65,9 +68,17 @@ public class SystemMetrics implements OmsSerializable, Comparable */ public boolean available(double minCPUCores, double minMemorySpace, double minDiskSpace) { - double currentCpuCores = Math.max(cpuLoad * cpuProcessors, 0); - double currentMemory = jvmMaxMemory - jvmUsedMemory; - double currentDisk = diskTotal - diskUsed; - return currentCpuCores >= minCPUCores && currentMemory >= minMemorySpace && currentDisk >= minDiskSpace; + double availableMemory = jvmMaxMemory - jvmUsedMemory; + double availableDisk = diskTotal - diskUsed; + + if (availableMemory < minMemorySpace || availableDisk < minDiskSpace) { + return false; + } + + // cpuLoad 为负数代表无法获取,不判断。等于 0 为最理想情况,CPU 空载,不需要判断 + if (cpuLoad <= 0 || minCPUCores <= 0) { + return true; + } + return minCPUCores < (cpuProcessors - cpuLoad); } } diff --git a/powerjob-server/src/main/java/com/github/kfcfans/powerjob/server/service/ha/ClusterStatusHolder.java b/powerjob-server/src/main/java/com/github/kfcfans/powerjob/server/service/ha/ClusterStatusHolder.java index abaabc61..9bfea3f6 100644 --- a/powerjob-server/src/main/java/com/github/kfcfans/powerjob/server/service/ha/ClusterStatusHolder.java +++ b/powerjob-server/src/main/java/com/github/kfcfans/powerjob/server/service/ha/ClusterStatusHolder.java @@ -22,13 +22,13 @@ import java.util.Map; public class ClusterStatusHolder { // 集群所属的应用名称 - private String appName; + private final String appName; // 集群中所有机器的健康状态 - private Map address2Metrics; + private final Map address2Metrics; // 集群中所有机器的容器部署状态 containerId -> (workerAddress -> containerInfo) private Map> containerId2Infos; // 集群中所有机器的最后心跳时间 - private Map address2ActiveTime; + private final Map address2ActiveTime; private static final long WORKER_TIMEOUT_MS = 60000; @@ -78,11 +78,14 @@ public class ClusterStatusHolder { address2Metrics.forEach((address, metrics) -> { if (timeout(address)) { + log.info("[ClusterStatusHolder] worker(address={},metrics={}) was filtered because of timeout, last active time is {}.", address, metrics, address2ActiveTime.get(address)); return; } // 判断指标 if (metrics.available(minCPUCores, minMemorySpace, minDiskSpace)) { workers.add(address); + }else { + log.info("[ClusterStatusHolder] worker(address={},metrics={}) was filtered by config(minCPUCores={},minMemory={},minDiskSpace={})", address, metrics, minCPUCores, minMemorySpace, minDiskSpace); } }); diff --git a/powerjob-server/src/main/java/com/github/kfcfans/powerjob/server/web/response/WorkerStatusVO.java b/powerjob-server/src/main/java/com/github/kfcfans/powerjob/server/web/response/WorkerStatusVO.java index 258bdc2d..5b98c3a1 100644 --- a/powerjob-server/src/main/java/com/github/kfcfans/powerjob/server/web/response/WorkerStatusVO.java +++ b/powerjob-server/src/main/java/com/github/kfcfans/powerjob/server/web/response/WorkerStatusVO.java @@ -25,37 +25,35 @@ public class WorkerStatusVO { private int status; // 12.3%(4 cores) - private static final String CPU_FORMAT = "%s%%(%d cores)"; + private static final String CPU_FORMAT = "%s / %s cores"; // 27.7%(2.9/8.0 GB) - private static final String OTHER_FORMAT = "%s%%(%s/%s GB)"; + private static final String OTHER_FORMAT = "%s%%(%s / %s GB)"; private static final DecimalFormat df = new DecimalFormat("#.#"); - private static final double threshold = 0.8; + private static final double THRESHOLD = 0.8; public WorkerStatusVO(String address, SystemMetrics systemMetrics) { + this.status = 1; this.address = address; - - String cpuL = df.format(systemMetrics.getCpuLoad() * 100); - this.cpuLoad = String.format(CPU_FORMAT, cpuL, systemMetrics.getCpuProcessors()); - + this.cpuLoad = String.format(CPU_FORMAT, df.format(systemMetrics.getCpuLoad()), systemMetrics.getCpuProcessors()); + if (systemMetrics.getCpuLoad() > systemMetrics.getCpuProcessors() * THRESHOLD) { + this.status ++; + } String menL = df.format(systemMetrics.getJvmMemoryUsage() * 100); String menUsed = df.format(systemMetrics.getJvmUsedMemory()); String menMax = df.format(systemMetrics.getJvmMaxMemory()); this.memoryLoad = String.format(OTHER_FORMAT, menL, menUsed, menMax); + if (systemMetrics.getJvmMemoryUsage() > THRESHOLD) { + this.status ++; + } String diskL = df.format(systemMetrics.getDiskUsage() * 100); String diskUsed = df.format(systemMetrics.getDiskUsed()); String diskMax = df.format(systemMetrics.getDiskTotal()); this.diskLoad = String.format(OTHER_FORMAT, diskL, diskUsed, diskMax); - - - if (systemMetrics.getCpuLoad() < threshold && systemMetrics.getDiskUsage() < threshold && systemMetrics.getJvmMemoryUsage() < threshold) { - status = 1; - }else if (systemMetrics.getCpuLoad() > threshold && systemMetrics.getDiskUsage() > threshold && systemMetrics.getJvmMemoryUsage() > threshold) { - status = 3; - }else { - status = 2; + if (systemMetrics.getDiskUsage() > THRESHOLD) { + this.status ++; } } } diff --git a/powerjob-worker/src/main/java/com/github/kfcfans/powerjob/worker/common/OmsWorkerVersion.java b/powerjob-worker/src/main/java/com/github/kfcfans/powerjob/worker/common/OmsWorkerVersion.java index ce324db4..122fff8c 100644 --- a/powerjob-worker/src/main/java/com/github/kfcfans/powerjob/worker/common/OmsWorkerVersion.java +++ b/powerjob-worker/src/main/java/com/github/kfcfans/powerjob/worker/common/OmsWorkerVersion.java @@ -1,5 +1,7 @@ package com.github.kfcfans.powerjob.worker.common; +import org.apache.commons.lang3.StringUtils; + import java.io.File; import java.io.IOException; import java.net.JarURLConnection; @@ -17,6 +19,8 @@ import java.util.jar.JarFile; */ public final class OmsWorkerVersion { + private static String CACHE = null; + /** * Return the full version string of the present OhMyScheduler-Worker codebase, or {@code null} * if it cannot be determined. @@ -24,7 +28,11 @@ public final class OmsWorkerVersion { * @see Package#getImplementationVersion() */ public static String getVersion() { - return determineSpringBootVersion(); + if (StringUtils.isNotEmpty(CACHE)) { + return CACHE; + } + CACHE = determineSpringBootVersion(); + return CACHE; } private static String determineSpringBootVersion() { diff --git a/powerjob-worker/src/main/java/com/github/kfcfans/powerjob/worker/common/utils/SystemInfoUtils.java b/powerjob-worker/src/main/java/com/github/kfcfans/powerjob/worker/common/utils/SystemInfoUtils.java index cb6d1a58..17643613 100644 --- a/powerjob-worker/src/main/java/com/github/kfcfans/powerjob/worker/common/utils/SystemInfoUtils.java +++ b/powerjob-worker/src/main/java/com/github/kfcfans/powerjob/worker/common/utils/SystemInfoUtils.java @@ -5,6 +5,8 @@ import com.github.kfcfans.powerjob.common.model.SystemMetrics; import java.io.File; import java.lang.management.ManagementFactory; import java.lang.management.OperatingSystemMXBean; +import java.math.RoundingMode; +import java.text.NumberFormat; /** * 系统信息工具,用于采集监控指标 @@ -14,26 +16,45 @@ import java.lang.management.OperatingSystemMXBean; */ public class SystemInfoUtils { + private static final NumberFormat NF = NumberFormat.getNumberInstance(); + static { + NF.setMaximumFractionDigits(4); + NF.setMinimumFractionDigits(4); + NF.setRoundingMode(RoundingMode.HALF_UP); + } + // JMX bean can be accessed externally and is meant for management tools like hyperic ( or even nagios ) - It would delegate to Runtime anyway. private static final Runtime runtime = Runtime.getRuntime(); - private static OperatingSystemMXBean osMXBean = ManagementFactory.getOperatingSystemMXBean(); + private static final OperatingSystemMXBean osMXBean = ManagementFactory.getOperatingSystemMXBean(); public static SystemMetrics getSystemMetrics() { SystemMetrics metrics = new SystemMetrics(); - // CPU 信息 - metrics.setCpuProcessors(osMXBean.getAvailableProcessors()); - metrics.setCpuLoad(osMXBean.getSystemLoadAverage() / osMXBean.getAvailableProcessors()); + fillCPUInfo(metrics); + fillMemoryInfo(metrics); + fillDiskInfo(metrics); + // 在Worker完成分数计算,减小Server压力 + metrics.calculateScore(); + return metrics; + } + + private static void fillCPUInfo(SystemMetrics metrics) { + metrics.setCpuProcessors(osMXBean.getAvailableProcessors()); + metrics.setCpuLoad(miniDouble(osMXBean.getSystemLoadAverage())); + } + + private static void fillMemoryInfo(SystemMetrics metrics) { // JVM内存信息(maxMemory指JVM能从操作系统获取的最大内存,即-Xmx参数设置的值,totalMemory指JVM当前持久的总内存) metrics.setJvmMaxMemory(bytes2GB(runtime.maxMemory())); // 已使用内存:当前申请总量 - 当前空余量 metrics.setJvmUsedMemory(bytes2GB(runtime.totalMemory() - runtime.freeMemory())); - // 百分比,直接 * 100 - metrics.setJvmMemoryUsage(1.0 * metrics.getJvmUsedMemory() / runtime.maxMemory()); + // 已用内存比例 + metrics.setJvmMemoryUsage(miniDouble(metrics.getJvmUsedMemory() / runtime.maxMemory())); + } - // 磁盘信息 + private static void fillDiskInfo(SystemMetrics metrics) { long free = 0; long total = 0; File[] roots = File.listRoots(); @@ -44,16 +65,15 @@ public class SystemInfoUtils { metrics.setDiskUsed(bytes2GB(total - free)); metrics.setDiskTotal(bytes2GB(total)); - metrics.setDiskUsage(metrics.getDiskUsed() / metrics.getDiskTotal() * 1.0); - - // 在Worker完成分数计算,减小Server压力 - metrics.calculateScore(); - return metrics; + metrics.setDiskUsage(miniDouble(metrics.getDiskUsed() / metrics.getDiskTotal())); } - private static double bytes2GB(long bytes) { - return bytes / 1024.0 / 1024 / 1024; + return miniDouble(bytes / 1024.0 / 1024 / 1024); + } + + private static double miniDouble(double origin) { + return Double.parseDouble(NF.format(origin)); } } diff --git a/powerjob-worker/src/test/java/com/github/kfcfans/powerjob/function/MonitorTest.java b/powerjob-worker/src/test/java/com/github/kfcfans/powerjob/function/MonitorTest.java new file mode 100644 index 00000000..d27390e2 --- /dev/null +++ b/powerjob-worker/src/test/java/com/github/kfcfans/powerjob/function/MonitorTest.java @@ -0,0 +1,55 @@ +package com.github.kfcfans.powerjob.function; + +import com.github.kfcfans.powerjob.common.model.SystemMetrics; +import com.github.kfcfans.powerjob.common.utils.JsonUtils; +import com.github.kfcfans.powerjob.worker.common.utils.SystemInfoUtils; +import com.google.common.base.Stopwatch; +import org.junit.jupiter.api.Test; + +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryMXBean; +import java.lang.management.OperatingSystemMXBean; + +/** + * 测试监控指标 + * + * @author tjq + * @since 2020/8/1 + */ +public class MonitorTest { + + private static final OperatingSystemMXBean osMXBean = ManagementFactory.getOperatingSystemMXBean(); + private static final Runtime runtime = Runtime.getRuntime(); + + @Test + public void testGetSystemLoadAverage() { + for (int i = 0; i < 10000; i++) { + double average = osMXBean.getSystemLoadAverage(); + System.out.println(average); + System.out.println(average / osMXBean.getAvailableProcessors()); + try { + Thread.sleep(1000); + }catch (Exception ignore) { + } + } + } + + @Test + public void testListDisk() { + Stopwatch sw = Stopwatch.createStarted(); + SystemMetrics systemMetrics = SystemInfoUtils.getSystemMetrics(); + System.out.println(JsonUtils.toJSONString(systemMetrics)); + System.out.println(sw.stop()); + Stopwatch sw2 = Stopwatch.createStarted(); + System.out.println(systemMetrics.calculateScore()); + System.out.println(sw2.stop()); + } + + @Test + public void testMemory() { + System.out.println("- used:" + (runtime.totalMemory() - runtime.freeMemory())); + MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean(); + System.out.println("heap used: " + memoryMXBean.getHeapMemoryUsage()); + System.out.println("noheap used: " + memoryMXBean.getNonHeapMemoryUsage()); + } +}