mirror of
https://github.com/PowerJob/PowerJob.git
synced 2025-07-17 00:00:04 +08:00
[fix] fix the bug of cpu metric
This commit is contained in:
parent
3b267a38c1
commit
105c8b5baf
@ -14,7 +14,7 @@ public class SystemMetrics implements OmsSerializable, Comparable<SystemMetrics>
|
|||||||
|
|
||||||
// CPU核心数量
|
// CPU核心数量
|
||||||
private int cpuProcessors;
|
private int cpuProcessors;
|
||||||
// CPU负载(需要处以核心数)
|
// CPU负载(负载 和 使用率 是两个完全不同的概念,Java 无法获取 CPU 使用率,只能获取负载)
|
||||||
private double cpuLoad;
|
private double cpuLoad;
|
||||||
|
|
||||||
// 内存(单位 GB)
|
// 内存(单位 GB)
|
||||||
@ -38,7 +38,7 @@ public class SystemMetrics implements OmsSerializable, Comparable<SystemMetrics>
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 计算得分情况,内存 then CPU then 磁盘(磁盘必须有1G以上的剩余空间)
|
* 计算得分情况,内存 & CPU (磁盘不参与计算)
|
||||||
* @return 得分情况
|
* @return 得分情况
|
||||||
*/
|
*/
|
||||||
public int calculateScore() {
|
public int calculateScore() {
|
||||||
@ -47,13 +47,16 @@ public class SystemMetrics implements OmsSerializable, Comparable<SystemMetrics>
|
|||||||
return score;
|
return score;
|
||||||
}
|
}
|
||||||
|
|
||||||
double availableCPUCores = cpuProcessors * cpuLoad;
|
// 对于 TaskTracker 来说,内存是任务顺利完成的关键,因此内存 2 块钱 1GB
|
||||||
double availableMemory = jvmMaxMemory - jvmUsedMemory;
|
double memScore = (jvmMaxMemory - jvmUsedMemory) * 2;
|
||||||
|
// CPU 剩余负载,1 块钱 1 斤
|
||||||
|
double cpuScore = cpuProcessors - cpuLoad;
|
||||||
|
// Indian Windows 无法获取 CpuLoad,为 -1,固定为 1
|
||||||
|
if (cpuScore > cpuProcessors) {
|
||||||
|
cpuScore = 1;
|
||||||
|
}
|
||||||
|
|
||||||
// Windows下无法获取CPU可用核心数,值固定为-1
|
return (int) (memScore + cpuScore);
|
||||||
cpuLoad = Math.max(0, cpuLoad);
|
|
||||||
|
|
||||||
return (int) (availableMemory * 2 + availableCPUCores);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -65,9 +68,17 @@ public class SystemMetrics implements OmsSerializable, Comparable<SystemMetrics>
|
|||||||
*/
|
*/
|
||||||
public boolean available(double minCPUCores, double minMemorySpace, double minDiskSpace) {
|
public boolean available(double minCPUCores, double minMemorySpace, double minDiskSpace) {
|
||||||
|
|
||||||
double currentCpuCores = Math.max(cpuLoad * cpuProcessors, 0);
|
double availableMemory = jvmMaxMemory - jvmUsedMemory;
|
||||||
double currentMemory = jvmMaxMemory - jvmUsedMemory;
|
double availableDisk = diskTotal - diskUsed;
|
||||||
double currentDisk = diskTotal - diskUsed;
|
|
||||||
return currentCpuCores >= minCPUCores && currentMemory >= minMemorySpace && currentDisk >= minDiskSpace;
|
if (availableMemory < minMemorySpace || availableDisk < minDiskSpace) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// cpuLoad 为负数代表无法获取,不判断。等于 0 为最理想情况,CPU 空载,不需要判断
|
||||||
|
if (cpuLoad <= 0 || minCPUCores <= 0) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return minCPUCores < (cpuProcessors - cpuLoad);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -22,13 +22,13 @@ import java.util.Map;
|
|||||||
public class ClusterStatusHolder {
|
public class ClusterStatusHolder {
|
||||||
|
|
||||||
// 集群所属的应用名称
|
// 集群所属的应用名称
|
||||||
private String appName;
|
private final String appName;
|
||||||
// 集群中所有机器的健康状态
|
// 集群中所有机器的健康状态
|
||||||
private Map<String, SystemMetrics> address2Metrics;
|
private final Map<String, SystemMetrics> address2Metrics;
|
||||||
// 集群中所有机器的容器部署状态 containerId -> (workerAddress -> containerInfo)
|
// 集群中所有机器的容器部署状态 containerId -> (workerAddress -> containerInfo)
|
||||||
private Map<Long, Map<String, DeployedContainerInfo>> containerId2Infos;
|
private Map<Long, Map<String, DeployedContainerInfo>> containerId2Infos;
|
||||||
// 集群中所有机器的最后心跳时间
|
// 集群中所有机器的最后心跳时间
|
||||||
private Map<String, Long> address2ActiveTime;
|
private final Map<String, Long> address2ActiveTime;
|
||||||
|
|
||||||
private static final long WORKER_TIMEOUT_MS = 60000;
|
private static final long WORKER_TIMEOUT_MS = 60000;
|
||||||
|
|
||||||
@ -78,11 +78,14 @@ public class ClusterStatusHolder {
|
|||||||
address2Metrics.forEach((address, metrics) -> {
|
address2Metrics.forEach((address, metrics) -> {
|
||||||
|
|
||||||
if (timeout(address)) {
|
if (timeout(address)) {
|
||||||
|
log.info("[ClusterStatusHolder] worker(address={},metrics={}) was filtered because of timeout, last active time is {}.", address, metrics, address2ActiveTime.get(address));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// 判断指标
|
// 判断指标
|
||||||
if (metrics.available(minCPUCores, minMemorySpace, minDiskSpace)) {
|
if (metrics.available(minCPUCores, minMemorySpace, minDiskSpace)) {
|
||||||
workers.add(address);
|
workers.add(address);
|
||||||
|
}else {
|
||||||
|
log.info("[ClusterStatusHolder] worker(address={},metrics={}) was filtered by config(minCPUCores={},minMemory={},minDiskSpace={})", address, metrics, minCPUCores, minMemorySpace, minDiskSpace);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -25,37 +25,35 @@ public class WorkerStatusVO {
|
|||||||
private int status;
|
private int status;
|
||||||
|
|
||||||
// 12.3%(4 cores)
|
// 12.3%(4 cores)
|
||||||
private static final String CPU_FORMAT = "%s%%(%d cores)";
|
private static final String CPU_FORMAT = "%s / %s cores";
|
||||||
// 27.7%(2.9/8.0 GB)
|
// 27.7%(2.9/8.0 GB)
|
||||||
private static final String OTHER_FORMAT = "%s%%(%s/%s GB)";
|
private static final String OTHER_FORMAT = "%s%%(%s / %s GB)";
|
||||||
private static final DecimalFormat df = new DecimalFormat("#.#");
|
private static final DecimalFormat df = new DecimalFormat("#.#");
|
||||||
|
|
||||||
private static final double threshold = 0.8;
|
private static final double THRESHOLD = 0.8;
|
||||||
|
|
||||||
public WorkerStatusVO(String address, SystemMetrics systemMetrics) {
|
public WorkerStatusVO(String address, SystemMetrics systemMetrics) {
|
||||||
|
this.status = 1;
|
||||||
this.address = address;
|
this.address = address;
|
||||||
|
this.cpuLoad = String.format(CPU_FORMAT, df.format(systemMetrics.getCpuLoad()), systemMetrics.getCpuProcessors());
|
||||||
String cpuL = df.format(systemMetrics.getCpuLoad() * 100);
|
if (systemMetrics.getCpuLoad() > systemMetrics.getCpuProcessors() * THRESHOLD) {
|
||||||
this.cpuLoad = String.format(CPU_FORMAT, cpuL, systemMetrics.getCpuProcessors());
|
this.status ++;
|
||||||
|
}
|
||||||
|
|
||||||
String menL = df.format(systemMetrics.getJvmMemoryUsage() * 100);
|
String menL = df.format(systemMetrics.getJvmMemoryUsage() * 100);
|
||||||
String menUsed = df.format(systemMetrics.getJvmUsedMemory());
|
String menUsed = df.format(systemMetrics.getJvmUsedMemory());
|
||||||
String menMax = df.format(systemMetrics.getJvmMaxMemory());
|
String menMax = df.format(systemMetrics.getJvmMaxMemory());
|
||||||
this.memoryLoad = String.format(OTHER_FORMAT, menL, menUsed, menMax);
|
this.memoryLoad = String.format(OTHER_FORMAT, menL, menUsed, menMax);
|
||||||
|
if (systemMetrics.getJvmMemoryUsage() > THRESHOLD) {
|
||||||
|
this.status ++;
|
||||||
|
}
|
||||||
|
|
||||||
String diskL = df.format(systemMetrics.getDiskUsage() * 100);
|
String diskL = df.format(systemMetrics.getDiskUsage() * 100);
|
||||||
String diskUsed = df.format(systemMetrics.getDiskUsed());
|
String diskUsed = df.format(systemMetrics.getDiskUsed());
|
||||||
String diskMax = df.format(systemMetrics.getDiskTotal());
|
String diskMax = df.format(systemMetrics.getDiskTotal());
|
||||||
this.diskLoad = String.format(OTHER_FORMAT, diskL, diskUsed, diskMax);
|
this.diskLoad = String.format(OTHER_FORMAT, diskL, diskUsed, diskMax);
|
||||||
|
if (systemMetrics.getDiskUsage() > THRESHOLD) {
|
||||||
|
this.status ++;
|
||||||
if (systemMetrics.getCpuLoad() < threshold && systemMetrics.getDiskUsage() < threshold && systemMetrics.getJvmMemoryUsage() < threshold) {
|
|
||||||
status = 1;
|
|
||||||
}else if (systemMetrics.getCpuLoad() > threshold && systemMetrics.getDiskUsage() > threshold && systemMetrics.getJvmMemoryUsage() > threshold) {
|
|
||||||
status = 3;
|
|
||||||
}else {
|
|
||||||
status = 2;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
package com.github.kfcfans.powerjob.worker.common;
|
package com.github.kfcfans.powerjob.worker.common;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.JarURLConnection;
|
import java.net.JarURLConnection;
|
||||||
@ -17,6 +19,8 @@ import java.util.jar.JarFile;
|
|||||||
*/
|
*/
|
||||||
public final class OmsWorkerVersion {
|
public final class OmsWorkerVersion {
|
||||||
|
|
||||||
|
private static String CACHE = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the full version string of the present OhMyScheduler-Worker codebase, or {@code null}
|
* Return the full version string of the present OhMyScheduler-Worker codebase, or {@code null}
|
||||||
* if it cannot be determined.
|
* if it cannot be determined.
|
||||||
@ -24,7 +28,11 @@ public final class OmsWorkerVersion {
|
|||||||
* @see Package#getImplementationVersion()
|
* @see Package#getImplementationVersion()
|
||||||
*/
|
*/
|
||||||
public static String getVersion() {
|
public static String getVersion() {
|
||||||
return determineSpringBootVersion();
|
if (StringUtils.isNotEmpty(CACHE)) {
|
||||||
|
return CACHE;
|
||||||
|
}
|
||||||
|
CACHE = determineSpringBootVersion();
|
||||||
|
return CACHE;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String determineSpringBootVersion() {
|
private static String determineSpringBootVersion() {
|
||||||
|
@ -5,6 +5,8 @@ import com.github.kfcfans.powerjob.common.model.SystemMetrics;
|
|||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.lang.management.ManagementFactory;
|
import java.lang.management.ManagementFactory;
|
||||||
import java.lang.management.OperatingSystemMXBean;
|
import java.lang.management.OperatingSystemMXBean;
|
||||||
|
import java.math.RoundingMode;
|
||||||
|
import java.text.NumberFormat;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 系统信息工具,用于采集监控指标
|
* 系统信息工具,用于采集监控指标
|
||||||
@ -14,26 +16,45 @@ import java.lang.management.OperatingSystemMXBean;
|
|||||||
*/
|
*/
|
||||||
public class SystemInfoUtils {
|
public class SystemInfoUtils {
|
||||||
|
|
||||||
|
private static final NumberFormat NF = NumberFormat.getNumberInstance();
|
||||||
|
static {
|
||||||
|
NF.setMaximumFractionDigits(4);
|
||||||
|
NF.setMinimumFractionDigits(4);
|
||||||
|
NF.setRoundingMode(RoundingMode.HALF_UP);
|
||||||
|
}
|
||||||
|
|
||||||
// JMX bean can be accessed externally and is meant for management tools like hyperic ( or even nagios ) - It would delegate to Runtime anyway.
|
// JMX bean can be accessed externally and is meant for management tools like hyperic ( or even nagios ) - It would delegate to Runtime anyway.
|
||||||
private static final Runtime runtime = Runtime.getRuntime();
|
private static final Runtime runtime = Runtime.getRuntime();
|
||||||
private static OperatingSystemMXBean osMXBean = ManagementFactory.getOperatingSystemMXBean();
|
private static final OperatingSystemMXBean osMXBean = ManagementFactory.getOperatingSystemMXBean();
|
||||||
|
|
||||||
public static SystemMetrics getSystemMetrics() {
|
public static SystemMetrics getSystemMetrics() {
|
||||||
|
|
||||||
SystemMetrics metrics = new SystemMetrics();
|
SystemMetrics metrics = new SystemMetrics();
|
||||||
|
|
||||||
// CPU 信息
|
fillCPUInfo(metrics);
|
||||||
metrics.setCpuProcessors(osMXBean.getAvailableProcessors());
|
fillMemoryInfo(metrics);
|
||||||
metrics.setCpuLoad(osMXBean.getSystemLoadAverage() / osMXBean.getAvailableProcessors());
|
fillDiskInfo(metrics);
|
||||||
|
|
||||||
|
// 在Worker完成分数计算,减小Server压力
|
||||||
|
metrics.calculateScore();
|
||||||
|
return metrics;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void fillCPUInfo(SystemMetrics metrics) {
|
||||||
|
metrics.setCpuProcessors(osMXBean.getAvailableProcessors());
|
||||||
|
metrics.setCpuLoad(miniDouble(osMXBean.getSystemLoadAverage()));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void fillMemoryInfo(SystemMetrics metrics) {
|
||||||
// JVM内存信息(maxMemory指JVM能从操作系统获取的最大内存,即-Xmx参数设置的值,totalMemory指JVM当前持久的总内存)
|
// JVM内存信息(maxMemory指JVM能从操作系统获取的最大内存,即-Xmx参数设置的值,totalMemory指JVM当前持久的总内存)
|
||||||
metrics.setJvmMaxMemory(bytes2GB(runtime.maxMemory()));
|
metrics.setJvmMaxMemory(bytes2GB(runtime.maxMemory()));
|
||||||
// 已使用内存:当前申请总量 - 当前空余量
|
// 已使用内存:当前申请总量 - 当前空余量
|
||||||
metrics.setJvmUsedMemory(bytes2GB(runtime.totalMemory() - runtime.freeMemory()));
|
metrics.setJvmUsedMemory(bytes2GB(runtime.totalMemory() - runtime.freeMemory()));
|
||||||
// 百分比,直接 * 100
|
// 已用内存比例
|
||||||
metrics.setJvmMemoryUsage(1.0 * metrics.getJvmUsedMemory() / runtime.maxMemory());
|
metrics.setJvmMemoryUsage(miniDouble(metrics.getJvmUsedMemory() / runtime.maxMemory()));
|
||||||
|
}
|
||||||
|
|
||||||
// 磁盘信息
|
private static void fillDiskInfo(SystemMetrics metrics) {
|
||||||
long free = 0;
|
long free = 0;
|
||||||
long total = 0;
|
long total = 0;
|
||||||
File[] roots = File.listRoots();
|
File[] roots = File.listRoots();
|
||||||
@ -44,16 +65,15 @@ public class SystemInfoUtils {
|
|||||||
|
|
||||||
metrics.setDiskUsed(bytes2GB(total - free));
|
metrics.setDiskUsed(bytes2GB(total - free));
|
||||||
metrics.setDiskTotal(bytes2GB(total));
|
metrics.setDiskTotal(bytes2GB(total));
|
||||||
metrics.setDiskUsage(metrics.getDiskUsed() / metrics.getDiskTotal() * 1.0);
|
metrics.setDiskUsage(miniDouble(metrics.getDiskUsed() / metrics.getDiskTotal()));
|
||||||
|
|
||||||
// 在Worker完成分数计算,减小Server压力
|
|
||||||
metrics.calculateScore();
|
|
||||||
return metrics;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private static double bytes2GB(long bytes) {
|
private static double bytes2GB(long bytes) {
|
||||||
return bytes / 1024.0 / 1024 / 1024;
|
return miniDouble(bytes / 1024.0 / 1024 / 1024);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static double miniDouble(double origin) {
|
||||||
|
return Double.parseDouble(NF.format(origin));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,55 @@
|
|||||||
|
package com.github.kfcfans.powerjob.function;
|
||||||
|
|
||||||
|
import com.github.kfcfans.powerjob.common.model.SystemMetrics;
|
||||||
|
import com.github.kfcfans.powerjob.common.utils.JsonUtils;
|
||||||
|
import com.github.kfcfans.powerjob.worker.common.utils.SystemInfoUtils;
|
||||||
|
import com.google.common.base.Stopwatch;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.lang.management.ManagementFactory;
|
||||||
|
import java.lang.management.MemoryMXBean;
|
||||||
|
import java.lang.management.OperatingSystemMXBean;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 测试监控指标
|
||||||
|
*
|
||||||
|
* @author tjq
|
||||||
|
* @since 2020/8/1
|
||||||
|
*/
|
||||||
|
public class MonitorTest {
|
||||||
|
|
||||||
|
private static final OperatingSystemMXBean osMXBean = ManagementFactory.getOperatingSystemMXBean();
|
||||||
|
private static final Runtime runtime = Runtime.getRuntime();
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetSystemLoadAverage() {
|
||||||
|
for (int i = 0; i < 10000; i++) {
|
||||||
|
double average = osMXBean.getSystemLoadAverage();
|
||||||
|
System.out.println(average);
|
||||||
|
System.out.println(average / osMXBean.getAvailableProcessors());
|
||||||
|
try {
|
||||||
|
Thread.sleep(1000);
|
||||||
|
}catch (Exception ignore) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testListDisk() {
|
||||||
|
Stopwatch sw = Stopwatch.createStarted();
|
||||||
|
SystemMetrics systemMetrics = SystemInfoUtils.getSystemMetrics();
|
||||||
|
System.out.println(JsonUtils.toJSONString(systemMetrics));
|
||||||
|
System.out.println(sw.stop());
|
||||||
|
Stopwatch sw2 = Stopwatch.createStarted();
|
||||||
|
System.out.println(systemMetrics.calculateScore());
|
||||||
|
System.out.println(sw2.stop());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMemory() {
|
||||||
|
System.out.println("- used:" + (runtime.totalMemory() - runtime.freeMemory()));
|
||||||
|
MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean();
|
||||||
|
System.out.println("heap used: " + memoryMXBean.getHeapMemoryUsage());
|
||||||
|
System.out.println("noheap used: " + memoryMXBean.getNonHeapMemoryUsage());
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user