mirror of
https://github.com/PowerJob/PowerJob.git
synced 2025-07-17 00:00:04 +08:00
[fix] fix the bug of cpu metric
This commit is contained in:
parent
3b267a38c1
commit
105c8b5baf
@ -14,7 +14,7 @@ public class SystemMetrics implements OmsSerializable, Comparable<SystemMetrics>
|
||||
|
||||
// CPU核心数量
|
||||
private int cpuProcessors;
|
||||
// CPU负载(需要处以核心数)
|
||||
// CPU负载(负载 和 使用率 是两个完全不同的概念,Java 无法获取 CPU 使用率,只能获取负载)
|
||||
private double cpuLoad;
|
||||
|
||||
// 内存(单位 GB)
|
||||
@ -38,7 +38,7 @@ public class SystemMetrics implements OmsSerializable, Comparable<SystemMetrics>
|
||||
}
|
||||
|
||||
/**
|
||||
* 计算得分情况,内存 then CPU then 磁盘(磁盘必须有1G以上的剩余空间)
|
||||
* 计算得分情况,内存 & CPU (磁盘不参与计算)
|
||||
* @return 得分情况
|
||||
*/
|
||||
public int calculateScore() {
|
||||
@ -47,13 +47,16 @@ public class SystemMetrics implements OmsSerializable, Comparable<SystemMetrics>
|
||||
return score;
|
||||
}
|
||||
|
||||
double availableCPUCores = cpuProcessors * cpuLoad;
|
||||
double availableMemory = jvmMaxMemory - jvmUsedMemory;
|
||||
// 对于 TaskTracker 来说,内存是任务顺利完成的关键,因此内存 2 块钱 1GB
|
||||
double memScore = (jvmMaxMemory - jvmUsedMemory) * 2;
|
||||
// CPU 剩余负载,1 块钱 1 斤
|
||||
double cpuScore = cpuProcessors - cpuLoad;
|
||||
// Indian Windows 无法获取 CpuLoad,为 -1,固定为 1
|
||||
if (cpuScore > cpuProcessors) {
|
||||
cpuScore = 1;
|
||||
}
|
||||
|
||||
// Windows下无法获取CPU可用核心数,值固定为-1
|
||||
cpuLoad = Math.max(0, cpuLoad);
|
||||
|
||||
return (int) (availableMemory * 2 + availableCPUCores);
|
||||
return (int) (memScore + cpuScore);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -65,9 +68,17 @@ public class SystemMetrics implements OmsSerializable, Comparable<SystemMetrics>
|
||||
*/
|
||||
public boolean available(double minCPUCores, double minMemorySpace, double minDiskSpace) {
|
||||
|
||||
double currentCpuCores = Math.max(cpuLoad * cpuProcessors, 0);
|
||||
double currentMemory = jvmMaxMemory - jvmUsedMemory;
|
||||
double currentDisk = diskTotal - diskUsed;
|
||||
return currentCpuCores >= minCPUCores && currentMemory >= minMemorySpace && currentDisk >= minDiskSpace;
|
||||
double availableMemory = jvmMaxMemory - jvmUsedMemory;
|
||||
double availableDisk = diskTotal - diskUsed;
|
||||
|
||||
if (availableMemory < minMemorySpace || availableDisk < minDiskSpace) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// cpuLoad 为负数代表无法获取,不判断。等于 0 为最理想情况,CPU 空载,不需要判断
|
||||
if (cpuLoad <= 0 || minCPUCores <= 0) {
|
||||
return true;
|
||||
}
|
||||
return minCPUCores < (cpuProcessors - cpuLoad);
|
||||
}
|
||||
}
|
||||
|
@ -22,13 +22,13 @@ import java.util.Map;
|
||||
public class ClusterStatusHolder {
|
||||
|
||||
// 集群所属的应用名称
|
||||
private String appName;
|
||||
private final String appName;
|
||||
// 集群中所有机器的健康状态
|
||||
private Map<String, SystemMetrics> address2Metrics;
|
||||
private final Map<String, SystemMetrics> address2Metrics;
|
||||
// 集群中所有机器的容器部署状态 containerId -> (workerAddress -> containerInfo)
|
||||
private Map<Long, Map<String, DeployedContainerInfo>> containerId2Infos;
|
||||
// 集群中所有机器的最后心跳时间
|
||||
private Map<String, Long> address2ActiveTime;
|
||||
private final Map<String, Long> address2ActiveTime;
|
||||
|
||||
private static final long WORKER_TIMEOUT_MS = 60000;
|
||||
|
||||
@ -78,11 +78,14 @@ public class ClusterStatusHolder {
|
||||
address2Metrics.forEach((address, metrics) -> {
|
||||
|
||||
if (timeout(address)) {
|
||||
log.info("[ClusterStatusHolder] worker(address={},metrics={}) was filtered because of timeout, last active time is {}.", address, metrics, address2ActiveTime.get(address));
|
||||
return;
|
||||
}
|
||||
// 判断指标
|
||||
if (metrics.available(minCPUCores, minMemorySpace, minDiskSpace)) {
|
||||
workers.add(address);
|
||||
}else {
|
||||
log.info("[ClusterStatusHolder] worker(address={},metrics={}) was filtered by config(minCPUCores={},minMemory={},minDiskSpace={})", address, metrics, minCPUCores, minMemorySpace, minDiskSpace);
|
||||
}
|
||||
});
|
||||
|
||||
|
@ -25,37 +25,35 @@ public class WorkerStatusVO {
|
||||
private int status;
|
||||
|
||||
// 12.3%(4 cores)
|
||||
private static final String CPU_FORMAT = "%s%%(%d cores)";
|
||||
private static final String CPU_FORMAT = "%s / %s cores";
|
||||
// 27.7%(2.9/8.0 GB)
|
||||
private static final String OTHER_FORMAT = "%s%%(%s/%s GB)";
|
||||
private static final String OTHER_FORMAT = "%s%%(%s / %s GB)";
|
||||
private static final DecimalFormat df = new DecimalFormat("#.#");
|
||||
|
||||
private static final double threshold = 0.8;
|
||||
private static final double THRESHOLD = 0.8;
|
||||
|
||||
public WorkerStatusVO(String address, SystemMetrics systemMetrics) {
|
||||
this.status = 1;
|
||||
this.address = address;
|
||||
|
||||
String cpuL = df.format(systemMetrics.getCpuLoad() * 100);
|
||||
this.cpuLoad = String.format(CPU_FORMAT, cpuL, systemMetrics.getCpuProcessors());
|
||||
|
||||
this.cpuLoad = String.format(CPU_FORMAT, df.format(systemMetrics.getCpuLoad()), systemMetrics.getCpuProcessors());
|
||||
if (systemMetrics.getCpuLoad() > systemMetrics.getCpuProcessors() * THRESHOLD) {
|
||||
this.status ++;
|
||||
}
|
||||
|
||||
String menL = df.format(systemMetrics.getJvmMemoryUsage() * 100);
|
||||
String menUsed = df.format(systemMetrics.getJvmUsedMemory());
|
||||
String menMax = df.format(systemMetrics.getJvmMaxMemory());
|
||||
this.memoryLoad = String.format(OTHER_FORMAT, menL, menUsed, menMax);
|
||||
if (systemMetrics.getJvmMemoryUsage() > THRESHOLD) {
|
||||
this.status ++;
|
||||
}
|
||||
|
||||
String diskL = df.format(systemMetrics.getDiskUsage() * 100);
|
||||
String diskUsed = df.format(systemMetrics.getDiskUsed());
|
||||
String diskMax = df.format(systemMetrics.getDiskTotal());
|
||||
this.diskLoad = String.format(OTHER_FORMAT, diskL, diskUsed, diskMax);
|
||||
|
||||
|
||||
if (systemMetrics.getCpuLoad() < threshold && systemMetrics.getDiskUsage() < threshold && systemMetrics.getJvmMemoryUsage() < threshold) {
|
||||
status = 1;
|
||||
}else if (systemMetrics.getCpuLoad() > threshold && systemMetrics.getDiskUsage() > threshold && systemMetrics.getJvmMemoryUsage() > threshold) {
|
||||
status = 3;
|
||||
}else {
|
||||
status = 2;
|
||||
if (systemMetrics.getDiskUsage() > THRESHOLD) {
|
||||
this.status ++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,7 @@
|
||||
package com.github.kfcfans.powerjob.worker.common;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.net.JarURLConnection;
|
||||
@ -17,6 +19,8 @@ import java.util.jar.JarFile;
|
||||
*/
|
||||
public final class OmsWorkerVersion {
|
||||
|
||||
private static String CACHE = null;
|
||||
|
||||
/**
|
||||
* Return the full version string of the present OhMyScheduler-Worker codebase, or {@code null}
|
||||
* if it cannot be determined.
|
||||
@ -24,7 +28,11 @@ public final class OmsWorkerVersion {
|
||||
* @see Package#getImplementationVersion()
|
||||
*/
|
||||
public static String getVersion() {
|
||||
return determineSpringBootVersion();
|
||||
if (StringUtils.isNotEmpty(CACHE)) {
|
||||
return CACHE;
|
||||
}
|
||||
CACHE = determineSpringBootVersion();
|
||||
return CACHE;
|
||||
}
|
||||
|
||||
private static String determineSpringBootVersion() {
|
||||
|
@ -5,6 +5,8 @@ import com.github.kfcfans.powerjob.common.model.SystemMetrics;
|
||||
import java.io.File;
|
||||
import java.lang.management.ManagementFactory;
|
||||
import java.lang.management.OperatingSystemMXBean;
|
||||
import java.math.RoundingMode;
|
||||
import java.text.NumberFormat;
|
||||
|
||||
/**
|
||||
* 系统信息工具,用于采集监控指标
|
||||
@ -14,26 +16,45 @@ import java.lang.management.OperatingSystemMXBean;
|
||||
*/
|
||||
public class SystemInfoUtils {
|
||||
|
||||
private static final NumberFormat NF = NumberFormat.getNumberInstance();
|
||||
static {
|
||||
NF.setMaximumFractionDigits(4);
|
||||
NF.setMinimumFractionDigits(4);
|
||||
NF.setRoundingMode(RoundingMode.HALF_UP);
|
||||
}
|
||||
|
||||
// JMX bean can be accessed externally and is meant for management tools like hyperic ( or even nagios ) - It would delegate to Runtime anyway.
|
||||
private static final Runtime runtime = Runtime.getRuntime();
|
||||
private static OperatingSystemMXBean osMXBean = ManagementFactory.getOperatingSystemMXBean();
|
||||
private static final OperatingSystemMXBean osMXBean = ManagementFactory.getOperatingSystemMXBean();
|
||||
|
||||
public static SystemMetrics getSystemMetrics() {
|
||||
|
||||
SystemMetrics metrics = new SystemMetrics();
|
||||
|
||||
// CPU 信息
|
||||
metrics.setCpuProcessors(osMXBean.getAvailableProcessors());
|
||||
metrics.setCpuLoad(osMXBean.getSystemLoadAverage() / osMXBean.getAvailableProcessors());
|
||||
fillCPUInfo(metrics);
|
||||
fillMemoryInfo(metrics);
|
||||
fillDiskInfo(metrics);
|
||||
|
||||
// 在Worker完成分数计算,减小Server压力
|
||||
metrics.calculateScore();
|
||||
return metrics;
|
||||
}
|
||||
|
||||
private static void fillCPUInfo(SystemMetrics metrics) {
|
||||
metrics.setCpuProcessors(osMXBean.getAvailableProcessors());
|
||||
metrics.setCpuLoad(miniDouble(osMXBean.getSystemLoadAverage()));
|
||||
}
|
||||
|
||||
private static void fillMemoryInfo(SystemMetrics metrics) {
|
||||
// JVM内存信息(maxMemory指JVM能从操作系统获取的最大内存,即-Xmx参数设置的值,totalMemory指JVM当前持久的总内存)
|
||||
metrics.setJvmMaxMemory(bytes2GB(runtime.maxMemory()));
|
||||
// 已使用内存:当前申请总量 - 当前空余量
|
||||
metrics.setJvmUsedMemory(bytes2GB(runtime.totalMemory() - runtime.freeMemory()));
|
||||
// 百分比,直接 * 100
|
||||
metrics.setJvmMemoryUsage(1.0 * metrics.getJvmUsedMemory() / runtime.maxMemory());
|
||||
// 已用内存比例
|
||||
metrics.setJvmMemoryUsage(miniDouble(metrics.getJvmUsedMemory() / runtime.maxMemory()));
|
||||
}
|
||||
|
||||
// 磁盘信息
|
||||
private static void fillDiskInfo(SystemMetrics metrics) {
|
||||
long free = 0;
|
||||
long total = 0;
|
||||
File[] roots = File.listRoots();
|
||||
@ -44,16 +65,15 @@ public class SystemInfoUtils {
|
||||
|
||||
metrics.setDiskUsed(bytes2GB(total - free));
|
||||
metrics.setDiskTotal(bytes2GB(total));
|
||||
metrics.setDiskUsage(metrics.getDiskUsed() / metrics.getDiskTotal() * 1.0);
|
||||
|
||||
// 在Worker完成分数计算,减小Server压力
|
||||
metrics.calculateScore();
|
||||
return metrics;
|
||||
metrics.setDiskUsage(miniDouble(metrics.getDiskUsed() / metrics.getDiskTotal()));
|
||||
}
|
||||
|
||||
|
||||
private static double bytes2GB(long bytes) {
|
||||
return bytes / 1024.0 / 1024 / 1024;
|
||||
return miniDouble(bytes / 1024.0 / 1024 / 1024);
|
||||
}
|
||||
|
||||
private static double miniDouble(double origin) {
|
||||
return Double.parseDouble(NF.format(origin));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,55 @@
|
||||
package com.github.kfcfans.powerjob.function;
|
||||
|
||||
import com.github.kfcfans.powerjob.common.model.SystemMetrics;
|
||||
import com.github.kfcfans.powerjob.common.utils.JsonUtils;
|
||||
import com.github.kfcfans.powerjob.worker.common.utils.SystemInfoUtils;
|
||||
import com.google.common.base.Stopwatch;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.lang.management.ManagementFactory;
|
||||
import java.lang.management.MemoryMXBean;
|
||||
import java.lang.management.OperatingSystemMXBean;
|
||||
|
||||
/**
|
||||
* 测试监控指标
|
||||
*
|
||||
* @author tjq
|
||||
* @since 2020/8/1
|
||||
*/
|
||||
public class MonitorTest {
|
||||
|
||||
private static final OperatingSystemMXBean osMXBean = ManagementFactory.getOperatingSystemMXBean();
|
||||
private static final Runtime runtime = Runtime.getRuntime();
|
||||
|
||||
@Test
|
||||
public void testGetSystemLoadAverage() {
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
double average = osMXBean.getSystemLoadAverage();
|
||||
System.out.println(average);
|
||||
System.out.println(average / osMXBean.getAvailableProcessors());
|
||||
try {
|
||||
Thread.sleep(1000);
|
||||
}catch (Exception ignore) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testListDisk() {
|
||||
Stopwatch sw = Stopwatch.createStarted();
|
||||
SystemMetrics systemMetrics = SystemInfoUtils.getSystemMetrics();
|
||||
System.out.println(JsonUtils.toJSONString(systemMetrics));
|
||||
System.out.println(sw.stop());
|
||||
Stopwatch sw2 = Stopwatch.createStarted();
|
||||
System.out.println(systemMetrics.calculateScore());
|
||||
System.out.println(sw2.stop());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMemory() {
|
||||
System.out.println("- used:" + (runtime.totalMemory() - runtime.freeMemory()));
|
||||
MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean();
|
||||
System.out.println("heap used: " + memoryMXBean.getHeapMemoryUsage());
|
||||
System.out.println("noheap used: " + memoryMXBean.getNonHeapMemoryUsage());
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user