mirror of
https://github.com/PowerJob/PowerJob.git
synced 2025-07-17 00:00:04 +08:00
fix: split brain #102
This commit is contained in:
parent
4be5b01ba1
commit
5a50d933a8
@ -12,6 +12,7 @@ import com.github.kfcfans.powerjob.server.service.lock.LockService;
|
|||||||
import com.google.common.collect.Sets;
|
import com.google.common.collect.Sets;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.springframework.beans.factory.annotation.Value;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
import javax.annotation.Resource;
|
import javax.annotation.Resource;
|
||||||
@ -20,6 +21,7 @@ import java.util.Date;
|
|||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.concurrent.CompletionStage;
|
import java.util.concurrent.CompletionStage;
|
||||||
|
import java.util.concurrent.ThreadLocalRandom;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -37,17 +39,25 @@ public class ServerSelectService {
|
|||||||
@Resource
|
@Resource
|
||||||
private AppInfoRepository appInfoRepository;
|
private AppInfoRepository appInfoRepository;
|
||||||
|
|
||||||
|
@Value("${oms.accurate.select.server.percentage}")
|
||||||
|
private int accurateSelectServerPercentage;
|
||||||
|
|
||||||
private static final int RETRY_TIMES = 10;
|
private static final int RETRY_TIMES = 10;
|
||||||
private static final long PING_TIMEOUT_MS = 1000;
|
private static final long PING_TIMEOUT_MS = 1000;
|
||||||
private static final String SERVER_ELECT_LOCK = "server_elect_%d";
|
private static final String SERVER_ELECT_LOCK = "server_elect_%d";
|
||||||
|
|
||||||
/**
|
|
||||||
* 获取某个应用对应的Server
|
public String getServer(Long appId, String currentServer) {
|
||||||
*
|
if (!accurate()) {
|
||||||
* @param appId 应用ID
|
// 如果是本机,就不需要查数据库那么复杂的操作了,直接返回成功
|
||||||
* @return 当前可用的Server
|
if (OhMyServer.getActorSystemAddress().equals(currentServer)) {
|
||||||
*/
|
return currentServer;
|
||||||
public String getServer(Long appId) {
|
}
|
||||||
|
}
|
||||||
|
return getServer0(appId);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getServer0(Long appId) {
|
||||||
|
|
||||||
Set<String> downServerCache = Sets.newHashSet();
|
Set<String> downServerCache = Sets.newHashSet();
|
||||||
|
|
||||||
@ -95,7 +105,7 @@ public class ServerSelectService {
|
|||||||
lockService.unlock(lockName);
|
lockService.unlock(lockName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
throw new RuntimeException("server elect failed for app " + appId);
|
throw new PowerJobException("server elect failed for app " + appId);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -113,6 +123,10 @@ public class ServerSelectService {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (OhMyServer.getActorSystemAddress().equals(serverAddress)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
Ping ping = new Ping();
|
Ping ping = new Ping();
|
||||||
ping.setCurrentTime(System.currentTimeMillis());
|
ping.setCurrentTime(System.currentTimeMillis());
|
||||||
|
|
||||||
@ -128,4 +142,8 @@ public class ServerSelectService {
|
|||||||
downServerCache.add(serverAddress);
|
downServerCache.add(serverAddress);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean accurate() {
|
||||||
|
return ThreadLocalRandom.current().nextInt(100) < accurateSelectServerPercentage;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -2,26 +2,21 @@ package com.github.kfcfans.powerjob.server.web.controller;
|
|||||||
|
|
||||||
import com.alibaba.fastjson.JSON;
|
import com.alibaba.fastjson.JSON;
|
||||||
import com.alibaba.fastjson.JSONObject;
|
import com.alibaba.fastjson.JSONObject;
|
||||||
import com.github.kfcfans.powerjob.common.PowerJobException;
|
|
||||||
import com.github.kfcfans.powerjob.common.response.ResultDTO;
|
import com.github.kfcfans.powerjob.common.response.ResultDTO;
|
||||||
import com.github.kfcfans.powerjob.common.utils.CommonUtils;
|
import com.github.kfcfans.powerjob.common.utils.CommonUtils;
|
||||||
import com.github.kfcfans.powerjob.common.utils.NetUtils;
|
import com.github.kfcfans.powerjob.common.utils.NetUtils;
|
||||||
import com.github.kfcfans.powerjob.server.akka.OhMyServer;
|
import com.github.kfcfans.powerjob.server.akka.OhMyServer;
|
||||||
import com.github.kfcfans.powerjob.server.persistence.core.model.AppInfoDO;
|
import com.github.kfcfans.powerjob.server.persistence.core.model.AppInfoDO;
|
||||||
import com.github.kfcfans.powerjob.server.persistence.core.model.JobInfoDO;
|
|
||||||
import com.github.kfcfans.powerjob.server.persistence.core.repository.AppInfoRepository;
|
import com.github.kfcfans.powerjob.server.persistence.core.repository.AppInfoRepository;
|
||||||
import com.github.kfcfans.powerjob.server.persistence.core.repository.JobInfoRepository;
|
import com.github.kfcfans.powerjob.server.persistence.core.repository.JobInfoRepository;
|
||||||
import com.github.kfcfans.powerjob.server.service.ha.ClusterStatusHolder;
|
|
||||||
import com.github.kfcfans.powerjob.server.service.ha.ServerSelectService;
|
import com.github.kfcfans.powerjob.server.service.ha.ServerSelectService;
|
||||||
import com.github.kfcfans.powerjob.server.service.ha.WorkerManagerService;
|
import com.github.kfcfans.powerjob.server.service.ha.WorkerManagerService;
|
||||||
import com.taobao.api.internal.cluster.ClusterManager;
|
|
||||||
import org.springframework.web.bind.annotation.GetMapping;
|
import org.springframework.web.bind.annotation.GetMapping;
|
||||||
import org.springframework.web.bind.annotation.RequestMapping;
|
import org.springframework.web.bind.annotation.RequestMapping;
|
||||||
import org.springframework.web.bind.annotation.RequestParam;
|
import org.springframework.web.bind.annotation.RequestParam;
|
||||||
import org.springframework.web.bind.annotation.RestController;
|
import org.springframework.web.bind.annotation.RestController;
|
||||||
|
|
||||||
import javax.annotation.Resource;
|
import javax.annotation.Resource;
|
||||||
import java.util.List;
|
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.TimeZone;
|
import java.util.TimeZone;
|
||||||
|
|
||||||
@ -52,13 +47,7 @@ public class ServerController {
|
|||||||
|
|
||||||
@GetMapping("/acquire")
|
@GetMapping("/acquire")
|
||||||
public ResultDTO<String> acquireServer(Long appId, String currentServer) {
|
public ResultDTO<String> acquireServer(Long appId, String currentServer) {
|
||||||
|
return ResultDTO.success(serverSelectService.getServer(appId, currentServer));
|
||||||
// 如果是本机,就不需要查数据库那么复杂的操作了,直接返回成功
|
|
||||||
if (OhMyServer.getActorSystemAddress().equals(currentServer)) {
|
|
||||||
return ResultDTO.success(currentServer);
|
|
||||||
}
|
|
||||||
String server = serverSelectService.getServer(appId);
|
|
||||||
return ResultDTO.success(server);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@GetMapping("/hello")
|
@GetMapping("/hello")
|
||||||
|
@ -32,4 +32,7 @@ oms.container.retention.local=1
|
|||||||
oms.container.retention.remote=-1
|
oms.container.retention.remote=-1
|
||||||
|
|
||||||
####### 缓存配置 #######
|
####### 缓存配置 #######
|
||||||
oms.instance.metadata.cache.size=1024
|
oms.instance.metadata.cache.size=1024
|
||||||
|
|
||||||
|
####### 精确获取 server 的百分比,0~100,100代表每次 worker 获取 server 都会进行完整的探活流程,不存在脑裂问题,但有性能开销 #######
|
||||||
|
oms.accurate.select.server.percentage = 50
|
@ -32,4 +32,7 @@ oms.container.retention.local=3
|
|||||||
oms.container.retention.remote=-1
|
oms.container.retention.remote=-1
|
||||||
|
|
||||||
####### 缓存配置 #######
|
####### 缓存配置 #######
|
||||||
oms.instance.metadata.cache.size=1024
|
oms.instance.metadata.cache.size=1024
|
||||||
|
|
||||||
|
####### 精确获取 server 的百分比,0~100,100代表每次 worker 获取 server 都会进行完整的探活流程,不存在脑裂问题,但有性能开销 #######
|
||||||
|
oms.accurate.select.server.percentage = 50
|
@ -32,4 +32,7 @@ oms.container.retention.local=7
|
|||||||
oms.container.retention.remote=-1
|
oms.container.retention.remote=-1
|
||||||
|
|
||||||
####### 缓存配置 #######
|
####### 缓存配置 #######
|
||||||
oms.instance.metadata.cache.size=2048
|
oms.instance.metadata.cache.size=2048
|
||||||
|
|
||||||
|
####### 精确获取 server 的百分比,0~100,100代表每次 worker 获取 server 都会进行完整的探活流程,不存在脑裂问题,但有性能开销 #######
|
||||||
|
oms.accurate.select.server.percentage = 50
|
Loading…
x
Reference in New Issue
Block a user