[fix] fix the bug of cpu metric

This commit is contained in:
朱八 2020-08-01 21:24:56 +08:00
parent 3b267a38c1
commit 105c8b5baf
6 changed files with 140 additions and 45 deletions

View File

@ -14,7 +14,7 @@ public class SystemMetrics implements OmsSerializable, Comparable<SystemMetrics>
// CPU核心数量 // CPU核心数量
private int cpuProcessors; private int cpuProcessors;
// CPU负载需要处以核心数 // CPU负载负载 使用率 是两个完全不同的概念Java 无法获取 CPU 使用率只能获取负载
private double cpuLoad; private double cpuLoad;
// 内存单位 GB // 内存单位 GB
@ -38,7 +38,7 @@ public class SystemMetrics implements OmsSerializable, Comparable<SystemMetrics>
} }
/** /**
* 计算得分情况内存 then CPU then 磁盘磁盘必须有1G以上的剩余空间 * 计算得分情况内存 & CPU (磁盘不参与计算)
* @return 得分情况 * @return 得分情况
*/ */
public int calculateScore() { public int calculateScore() {
@ -47,13 +47,16 @@ public class SystemMetrics implements OmsSerializable, Comparable<SystemMetrics>
return score; return score;
} }
double availableCPUCores = cpuProcessors * cpuLoad; // 对于 TaskTracker 来说内存是任务顺利完成的关键因此内存 2 块钱 1GB
double availableMemory = jvmMaxMemory - jvmUsedMemory; double memScore = (jvmMaxMemory - jvmUsedMemory) * 2;
// CPU 剩余负载1 块钱 1
double cpuScore = cpuProcessors - cpuLoad;
// Indian Windows 无法获取 CpuLoad -1固定为 1
if (cpuScore > cpuProcessors) {
cpuScore = 1;
}
// Windows下无法获取CPU可用核心数值固定为-1 return (int) (memScore + cpuScore);
cpuLoad = Math.max(0, cpuLoad);
return (int) (availableMemory * 2 + availableCPUCores);
} }
/** /**
@ -65,9 +68,17 @@ public class SystemMetrics implements OmsSerializable, Comparable<SystemMetrics>
*/ */
public boolean available(double minCPUCores, double minMemorySpace, double minDiskSpace) { public boolean available(double minCPUCores, double minMemorySpace, double minDiskSpace) {
double currentCpuCores = Math.max(cpuLoad * cpuProcessors, 0); double availableMemory = jvmMaxMemory - jvmUsedMemory;
double currentMemory = jvmMaxMemory - jvmUsedMemory; double availableDisk = diskTotal - diskUsed;
double currentDisk = diskTotal - diskUsed;
return currentCpuCores >= minCPUCores && currentMemory >= minMemorySpace && currentDisk >= minDiskSpace; if (availableMemory < minMemorySpace || availableDisk < minDiskSpace) {
return false;
}
// cpuLoad 为负数代表无法获取不判断等于 0 为最理想情况CPU 空载不需要判断
if (cpuLoad <= 0 || minCPUCores <= 0) {
return true;
}
return minCPUCores < (cpuProcessors - cpuLoad);
} }
} }

View File

@ -22,13 +22,13 @@ import java.util.Map;
public class ClusterStatusHolder { public class ClusterStatusHolder {
// 集群所属的应用名称 // 集群所属的应用名称
private String appName; private final String appName;
// 集群中所有机器的健康状态 // 集群中所有机器的健康状态
private Map<String, SystemMetrics> address2Metrics; private final Map<String, SystemMetrics> address2Metrics;
// 集群中所有机器的容器部署状态 containerId -> (workerAddress -> containerInfo) // 集群中所有机器的容器部署状态 containerId -> (workerAddress -> containerInfo)
private Map<Long, Map<String, DeployedContainerInfo>> containerId2Infos; private Map<Long, Map<String, DeployedContainerInfo>> containerId2Infos;
// 集群中所有机器的最后心跳时间 // 集群中所有机器的最后心跳时间
private Map<String, Long> address2ActiveTime; private final Map<String, Long> address2ActiveTime;
private static final long WORKER_TIMEOUT_MS = 60000; private static final long WORKER_TIMEOUT_MS = 60000;
@ -78,11 +78,14 @@ public class ClusterStatusHolder {
address2Metrics.forEach((address, metrics) -> { address2Metrics.forEach((address, metrics) -> {
if (timeout(address)) { if (timeout(address)) {
log.info("[ClusterStatusHolder] worker(address={},metrics={}) was filtered because of timeout, last active time is {}.", address, metrics, address2ActiveTime.get(address));
return; return;
} }
// 判断指标 // 判断指标
if (metrics.available(minCPUCores, minMemorySpace, minDiskSpace)) { if (metrics.available(minCPUCores, minMemorySpace, minDiskSpace)) {
workers.add(address); workers.add(address);
}else {
log.info("[ClusterStatusHolder] worker(address={},metrics={}) was filtered by config(minCPUCores={},minMemory={},minDiskSpace={})", address, metrics, minCPUCores, minMemorySpace, minDiskSpace);
} }
}); });

View File

@ -25,37 +25,35 @@ public class WorkerStatusVO {
private int status; private int status;
// 12.3%(4 cores) // 12.3%(4 cores)
private static final String CPU_FORMAT = "%s%%(%d cores)"; private static final String CPU_FORMAT = "%s / %s cores";
// 27.7%(2.9/8.0 GB) // 27.7%(2.9/8.0 GB)
private static final String OTHER_FORMAT = "%s%%%s/%s GB"; private static final String OTHER_FORMAT = "%s%%%s / %s GB";
private static final DecimalFormat df = new DecimalFormat("#.#"); private static final DecimalFormat df = new DecimalFormat("#.#");
private static final double threshold = 0.8; private static final double THRESHOLD = 0.8;
public WorkerStatusVO(String address, SystemMetrics systemMetrics) { public WorkerStatusVO(String address, SystemMetrics systemMetrics) {
this.status = 1;
this.address = address; this.address = address;
this.cpuLoad = String.format(CPU_FORMAT, df.format(systemMetrics.getCpuLoad()), systemMetrics.getCpuProcessors());
String cpuL = df.format(systemMetrics.getCpuLoad() * 100); if (systemMetrics.getCpuLoad() > systemMetrics.getCpuProcessors() * THRESHOLD) {
this.cpuLoad = String.format(CPU_FORMAT, cpuL, systemMetrics.getCpuProcessors()); this.status ++;
}
String menL = df.format(systemMetrics.getJvmMemoryUsage() * 100); String menL = df.format(systemMetrics.getJvmMemoryUsage() * 100);
String menUsed = df.format(systemMetrics.getJvmUsedMemory()); String menUsed = df.format(systemMetrics.getJvmUsedMemory());
String menMax = df.format(systemMetrics.getJvmMaxMemory()); String menMax = df.format(systemMetrics.getJvmMaxMemory());
this.memoryLoad = String.format(OTHER_FORMAT, menL, menUsed, menMax); this.memoryLoad = String.format(OTHER_FORMAT, menL, menUsed, menMax);
if (systemMetrics.getJvmMemoryUsage() > THRESHOLD) {
this.status ++;
}
String diskL = df.format(systemMetrics.getDiskUsage() * 100); String diskL = df.format(systemMetrics.getDiskUsage() * 100);
String diskUsed = df.format(systemMetrics.getDiskUsed()); String diskUsed = df.format(systemMetrics.getDiskUsed());
String diskMax = df.format(systemMetrics.getDiskTotal()); String diskMax = df.format(systemMetrics.getDiskTotal());
this.diskLoad = String.format(OTHER_FORMAT, diskL, diskUsed, diskMax); this.diskLoad = String.format(OTHER_FORMAT, diskL, diskUsed, diskMax);
if (systemMetrics.getDiskUsage() > THRESHOLD) {
this.status ++;
if (systemMetrics.getCpuLoad() < threshold && systemMetrics.getDiskUsage() < threshold && systemMetrics.getJvmMemoryUsage() < threshold) {
status = 1;
}else if (systemMetrics.getCpuLoad() > threshold && systemMetrics.getDiskUsage() > threshold && systemMetrics.getJvmMemoryUsage() > threshold) {
status = 3;
}else {
status = 2;
} }
} }
} }

View File

@ -1,5 +1,7 @@
package com.github.kfcfans.powerjob.worker.common; package com.github.kfcfans.powerjob.worker.common;
import org.apache.commons.lang3.StringUtils;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.net.JarURLConnection; import java.net.JarURLConnection;
@ -17,6 +19,8 @@ import java.util.jar.JarFile;
*/ */
public final class OmsWorkerVersion { public final class OmsWorkerVersion {
private static String CACHE = null;
/** /**
* Return the full version string of the present OhMyScheduler-Worker codebase, or {@code null} * Return the full version string of the present OhMyScheduler-Worker codebase, or {@code null}
* if it cannot be determined. * if it cannot be determined.
@ -24,7 +28,11 @@ public final class OmsWorkerVersion {
* @see Package#getImplementationVersion() * @see Package#getImplementationVersion()
*/ */
public static String getVersion() { public static String getVersion() {
return determineSpringBootVersion(); if (StringUtils.isNotEmpty(CACHE)) {
return CACHE;
}
CACHE = determineSpringBootVersion();
return CACHE;
} }
private static String determineSpringBootVersion() { private static String determineSpringBootVersion() {

View File

@ -5,6 +5,8 @@ import com.github.kfcfans.powerjob.common.model.SystemMetrics;
import java.io.File; import java.io.File;
import java.lang.management.ManagementFactory; import java.lang.management.ManagementFactory;
import java.lang.management.OperatingSystemMXBean; import java.lang.management.OperatingSystemMXBean;
import java.math.RoundingMode;
import java.text.NumberFormat;
/** /**
* 系统信息工具用于采集监控指标 * 系统信息工具用于采集监控指标
@ -14,26 +16,45 @@ import java.lang.management.OperatingSystemMXBean;
*/ */
public class SystemInfoUtils { public class SystemInfoUtils {
private static final NumberFormat NF = NumberFormat.getNumberInstance();
static {
NF.setMaximumFractionDigits(4);
NF.setMinimumFractionDigits(4);
NF.setRoundingMode(RoundingMode.HALF_UP);
}
// JMX bean can be accessed externally and is meant for management tools like hyperic ( or even nagios ) - It would delegate to Runtime anyway. // JMX bean can be accessed externally and is meant for management tools like hyperic ( or even nagios ) - It would delegate to Runtime anyway.
private static final Runtime runtime = Runtime.getRuntime(); private static final Runtime runtime = Runtime.getRuntime();
private static OperatingSystemMXBean osMXBean = ManagementFactory.getOperatingSystemMXBean(); private static final OperatingSystemMXBean osMXBean = ManagementFactory.getOperatingSystemMXBean();
public static SystemMetrics getSystemMetrics() { public static SystemMetrics getSystemMetrics() {
SystemMetrics metrics = new SystemMetrics(); SystemMetrics metrics = new SystemMetrics();
// CPU 信息 fillCPUInfo(metrics);
metrics.setCpuProcessors(osMXBean.getAvailableProcessors()); fillMemoryInfo(metrics);
metrics.setCpuLoad(osMXBean.getSystemLoadAverage() / osMXBean.getAvailableProcessors()); fillDiskInfo(metrics);
// 在Worker完成分数计算减小Server压力
metrics.calculateScore();
return metrics;
}
private static void fillCPUInfo(SystemMetrics metrics) {
metrics.setCpuProcessors(osMXBean.getAvailableProcessors());
metrics.setCpuLoad(miniDouble(osMXBean.getSystemLoadAverage()));
}
private static void fillMemoryInfo(SystemMetrics metrics) {
// JVM内存信息(maxMemory指JVM能从操作系统获取的最大内存-Xmx参数设置的值totalMemory指JVM当前持久的总内存) // JVM内存信息(maxMemory指JVM能从操作系统获取的最大内存-Xmx参数设置的值totalMemory指JVM当前持久的总内存)
metrics.setJvmMaxMemory(bytes2GB(runtime.maxMemory())); metrics.setJvmMaxMemory(bytes2GB(runtime.maxMemory()));
// 已使用内存当前申请总量 - 当前空余量 // 已使用内存当前申请总量 - 当前空余量
metrics.setJvmUsedMemory(bytes2GB(runtime.totalMemory() - runtime.freeMemory())); metrics.setJvmUsedMemory(bytes2GB(runtime.totalMemory() - runtime.freeMemory()));
// 百分比直接 * 100 // 已用内存比例
metrics.setJvmMemoryUsage(1.0 * metrics.getJvmUsedMemory() / runtime.maxMemory()); metrics.setJvmMemoryUsage(miniDouble(metrics.getJvmUsedMemory() / runtime.maxMemory()));
}
// 磁盘信息 private static void fillDiskInfo(SystemMetrics metrics) {
long free = 0; long free = 0;
long total = 0; long total = 0;
File[] roots = File.listRoots(); File[] roots = File.listRoots();
@ -44,16 +65,15 @@ public class SystemInfoUtils {
metrics.setDiskUsed(bytes2GB(total - free)); metrics.setDiskUsed(bytes2GB(total - free));
metrics.setDiskTotal(bytes2GB(total)); metrics.setDiskTotal(bytes2GB(total));
metrics.setDiskUsage(metrics.getDiskUsed() / metrics.getDiskTotal() * 1.0); metrics.setDiskUsage(miniDouble(metrics.getDiskUsed() / metrics.getDiskTotal()));
// 在Worker完成分数计算减小Server压力
metrics.calculateScore();
return metrics;
} }
private static double bytes2GB(long bytes) { private static double bytes2GB(long bytes) {
return bytes / 1024.0 / 1024 / 1024; return miniDouble(bytes / 1024.0 / 1024 / 1024);
}
private static double miniDouble(double origin) {
return Double.parseDouble(NF.format(origin));
} }
} }

View File

@ -0,0 +1,55 @@
package com.github.kfcfans.powerjob.function;
import com.github.kfcfans.powerjob.common.model.SystemMetrics;
import com.github.kfcfans.powerjob.common.utils.JsonUtils;
import com.github.kfcfans.powerjob.worker.common.utils.SystemInfoUtils;
import com.google.common.base.Stopwatch;
import org.junit.jupiter.api.Test;
import java.lang.management.ManagementFactory;
import java.lang.management.MemoryMXBean;
import java.lang.management.OperatingSystemMXBean;
/**
* 测试监控指标
*
* @author tjq
* @since 2020/8/1
*/
public class MonitorTest {
private static final OperatingSystemMXBean osMXBean = ManagementFactory.getOperatingSystemMXBean();
private static final Runtime runtime = Runtime.getRuntime();
@Test
public void testGetSystemLoadAverage() {
for (int i = 0; i < 10000; i++) {
double average = osMXBean.getSystemLoadAverage();
System.out.println(average);
System.out.println(average / osMXBean.getAvailableProcessors());
try {
Thread.sleep(1000);
}catch (Exception ignore) {
}
}
}
@Test
public void testListDisk() {
Stopwatch sw = Stopwatch.createStarted();
SystemMetrics systemMetrics = SystemInfoUtils.getSystemMetrics();
System.out.println(JsonUtils.toJSONString(systemMetrics));
System.out.println(sw.stop());
Stopwatch sw2 = Stopwatch.createStarted();
System.out.println(systemMetrics.calculateScore());
System.out.println(sw2.stop());
}
@Test
public void testMemory() {
System.out.println("- used:" + (runtime.totalMemory() - runtime.freeMemory()));
MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean();
System.out.println("heap used: " + memoryMXBean.getHeapMemoryUsage());
System.out.println("noheap used: " + memoryMXBean.getNonHeapMemoryUsage());
}
}