mirror of
https://github.com/PowerJob/PowerJob.git
synced 2025-07-17 00:00:04 +08:00
fix: split brain #102
This commit is contained in:
parent
4be5b01ba1
commit
5a50d933a8
@ -12,6 +12,7 @@ import com.github.kfcfans.powerjob.server.service.lock.LockService;
|
||||
import com.google.common.collect.Sets;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
@ -20,6 +21,7 @@ import java.util.Date;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.CompletionStage;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
@ -37,17 +39,25 @@ public class ServerSelectService {
|
||||
@Resource
|
||||
private AppInfoRepository appInfoRepository;
|
||||
|
||||
@Value("${oms.accurate.select.server.percentage}")
|
||||
private int accurateSelectServerPercentage;
|
||||
|
||||
private static final int RETRY_TIMES = 10;
|
||||
private static final long PING_TIMEOUT_MS = 1000;
|
||||
private static final String SERVER_ELECT_LOCK = "server_elect_%d";
|
||||
|
||||
/**
|
||||
* 获取某个应用对应的Server
|
||||
*
|
||||
* @param appId 应用ID
|
||||
* @return 当前可用的Server
|
||||
*/
|
||||
public String getServer(Long appId) {
|
||||
|
||||
public String getServer(Long appId, String currentServer) {
|
||||
if (!accurate()) {
|
||||
// 如果是本机,就不需要查数据库那么复杂的操作了,直接返回成功
|
||||
if (OhMyServer.getActorSystemAddress().equals(currentServer)) {
|
||||
return currentServer;
|
||||
}
|
||||
}
|
||||
return getServer0(appId);
|
||||
}
|
||||
|
||||
private String getServer0(Long appId) {
|
||||
|
||||
Set<String> downServerCache = Sets.newHashSet();
|
||||
|
||||
@ -95,7 +105,7 @@ public class ServerSelectService {
|
||||
lockService.unlock(lockName);
|
||||
}
|
||||
}
|
||||
throw new RuntimeException("server elect failed for app " + appId);
|
||||
throw new PowerJobException("server elect failed for app " + appId);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -113,6 +123,10 @@ public class ServerSelectService {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (OhMyServer.getActorSystemAddress().equals(serverAddress)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
Ping ping = new Ping();
|
||||
ping.setCurrentTime(System.currentTimeMillis());
|
||||
|
||||
@ -128,4 +142,8 @@ public class ServerSelectService {
|
||||
downServerCache.add(serverAddress);
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean accurate() {
|
||||
return ThreadLocalRandom.current().nextInt(100) < accurateSelectServerPercentage;
|
||||
}
|
||||
}
|
||||
|
@ -2,26 +2,21 @@ package com.github.kfcfans.powerjob.server.web.controller;
|
||||
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import com.alibaba.fastjson.JSONObject;
|
||||
import com.github.kfcfans.powerjob.common.PowerJobException;
|
||||
import com.github.kfcfans.powerjob.common.response.ResultDTO;
|
||||
import com.github.kfcfans.powerjob.common.utils.CommonUtils;
|
||||
import com.github.kfcfans.powerjob.common.utils.NetUtils;
|
||||
import com.github.kfcfans.powerjob.server.akka.OhMyServer;
|
||||
import com.github.kfcfans.powerjob.server.persistence.core.model.AppInfoDO;
|
||||
import com.github.kfcfans.powerjob.server.persistence.core.model.JobInfoDO;
|
||||
import com.github.kfcfans.powerjob.server.persistence.core.repository.AppInfoRepository;
|
||||
import com.github.kfcfans.powerjob.server.persistence.core.repository.JobInfoRepository;
|
||||
import com.github.kfcfans.powerjob.server.service.ha.ClusterStatusHolder;
|
||||
import com.github.kfcfans.powerjob.server.service.ha.ServerSelectService;
|
||||
import com.github.kfcfans.powerjob.server.service.ha.WorkerManagerService;
|
||||
import com.taobao.api.internal.cluster.ClusterManager;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.TimeZone;
|
||||
|
||||
@ -52,13 +47,7 @@ public class ServerController {
|
||||
|
||||
@GetMapping("/acquire")
|
||||
public ResultDTO<String> acquireServer(Long appId, String currentServer) {
|
||||
|
||||
// 如果是本机,就不需要查数据库那么复杂的操作了,直接返回成功
|
||||
if (OhMyServer.getActorSystemAddress().equals(currentServer)) {
|
||||
return ResultDTO.success(currentServer);
|
||||
}
|
||||
String server = serverSelectService.getServer(appId);
|
||||
return ResultDTO.success(server);
|
||||
return ResultDTO.success(serverSelectService.getServer(appId, currentServer));
|
||||
}
|
||||
|
||||
@GetMapping("/hello")
|
||||
|
@ -33,3 +33,6 @@ oms.container.retention.remote=-1
|
||||
|
||||
####### 缓存配置 #######
|
||||
oms.instance.metadata.cache.size=1024
|
||||
|
||||
####### 精确获取 server 的百分比,0~100,100代表每次 worker 获取 server 都会进行完整的探活流程,不存在脑裂问题,但有性能开销 #######
|
||||
oms.accurate.select.server.percentage = 50
|
@ -33,3 +33,6 @@ oms.container.retention.remote=-1
|
||||
|
||||
####### 缓存配置 #######
|
||||
oms.instance.metadata.cache.size=1024
|
||||
|
||||
####### 精确获取 server 的百分比,0~100,100代表每次 worker 获取 server 都会进行完整的探活流程,不存在脑裂问题,但有性能开销 #######
|
||||
oms.accurate.select.server.percentage = 50
|
@ -33,3 +33,6 @@ oms.container.retention.remote=-1
|
||||
|
||||
####### 缓存配置 #######
|
||||
oms.instance.metadata.cache.size=2048
|
||||
|
||||
####### 精确获取 server 的百分比,0~100,100代表每次 worker 获取 server 都会进行完整的探活流程,不存在脑裂问题,但有性能开销 #######
|
||||
oms.accurate.select.server.percentage = 50
|
Loading…
x
Reference in New Issue
Block a user